fluent-plugin-elasticsearch 2.10.3 → 2.10.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,25 +1,25 @@
1
- require 'securerandom'
2
- require 'base64'
3
- require 'fluent/plugin/filter'
4
-
5
- module Fluent::Plugin
6
- class ElasticsearchGenidFilter < Filter
7
- Fluent::Plugin.register_filter('elasticsearch_genid', self)
8
-
9
- config_param :hash_id_key, :string, :default => '_hash'
10
-
11
- def initialize
12
- super
13
- end
14
-
15
- def configure(conf)
16
- super
17
- end
18
-
19
- def filter(tag, time, record)
20
- record[@hash_id_key] = Base64.strict_encode64(SecureRandom.uuid)
21
- record
22
- end
23
-
24
- end
25
- end
1
+ require 'securerandom'
2
+ require 'base64'
3
+ require 'fluent/plugin/filter'
4
+
5
+ module Fluent::Plugin
6
+ class ElasticsearchGenidFilter < Filter
7
+ Fluent::Plugin.register_filter('elasticsearch_genid', self)
8
+
9
+ config_param :hash_id_key, :string, :default => '_hash'
10
+
11
+ def initialize
12
+ super
13
+ end
14
+
15
+ def configure(conf)
16
+ super
17
+ end
18
+
19
+ def filter(tag, time, record)
20
+ record[@hash_id_key] = Base64.strict_encode64(SecureRandom.uuid)
21
+ record
22
+ end
23
+
24
+ end
25
+ end
@@ -1,538 +1,540 @@
1
- # encoding: UTF-8
2
- require 'date'
3
- require 'excon'
4
- require 'elasticsearch'
5
- require 'json'
6
- require 'uri'
7
- begin
8
- require 'strptime'
9
- rescue LoadError
10
- end
11
-
12
- require 'fluent/plugin/output'
13
- require 'fluent/event'
14
- require_relative 'elasticsearch_constants'
15
- require_relative 'elasticsearch_error_handler'
16
- require_relative 'elasticsearch_index_template'
17
-
18
- module Fluent::Plugin
19
- class ElasticsearchOutput < Output
20
- class ConnectionFailure < StandardError; end
21
-
22
- # RetryStreamError privides a stream to be
23
- # put back in the pipeline for cases where a bulk request
24
- # failed (e.g some records succeed while others failed)
25
- class RetryStreamError < StandardError
26
- attr_reader :retry_stream
27
- def initialize(retry_stream)
28
- @retry_stream = retry_stream
29
- end
30
- end
31
-
32
- helpers :event_emitter, :compat_parameters, :record_accessor
33
-
34
- Fluent::Plugin.register_output('elasticsearch', self)
35
-
36
- DEFAULT_BUFFER_TYPE = "memory"
37
- DEFAULT_ELASTICSEARCH_VERSION = 5 # For compatibility.
38
- DEFAULT_TYPE_NAME_ES_7x = "_doc".freeze
39
- DEFAULT_TYPE_NAME = "fluentd".freeze
40
-
41
- config_param :host, :string, :default => 'localhost'
42
- config_param :port, :integer, :default => 9200
43
- config_param :user, :string, :default => nil
44
- config_param :password, :string, :default => nil, :secret => true
45
- config_param :path, :string, :default => nil
46
- config_param :scheme, :string, :default => 'http'
47
- config_param :hosts, :string, :default => nil
48
- config_param :target_index_key, :string, :default => nil
49
- config_param :target_type_key, :string, :default => nil,
50
- :deprecated => <<EOC
51
- Elasticsearch 7.x or above will ignore this config. Please use fixed type_name instead.
52
- EOC
53
- config_param :time_key_format, :string, :default => nil
54
- config_param :time_precision, :integer, :default => 9
55
- config_param :include_timestamp, :bool, :default => false
56
- config_param :logstash_format, :bool, :default => false
57
- config_param :logstash_prefix, :string, :default => "logstash"
58
- config_param :logstash_prefix_separator, :string, :default => '-'
59
- config_param :logstash_dateformat, :string, :default => "%Y.%m.%d"
60
- config_param :utc_index, :bool, :default => true
61
- config_param :type_name, :string, :default => DEFAULT_TYPE_NAME
62
- config_param :index_name, :string, :default => "fluentd"
63
- config_param :id_key, :string, :default => nil
64
- config_param :write_operation, :string, :default => "index"
65
- config_param :parent_key, :string, :default => nil
66
- config_param :routing_key, :string, :default => nil
67
- config_param :request_timeout, :time, :default => 5
68
- config_param :reload_connections, :bool, :default => true
69
- config_param :reload_on_failure, :bool, :default => false
70
- config_param :retry_tag, :string, :default=>nil
71
- config_param :resurrect_after, :time, :default => 60
72
- config_param :time_key, :string, :default => nil
73
- config_param :time_key_exclude_timestamp, :bool, :default => false
74
- config_param :ssl_verify , :bool, :default => true
75
- config_param :client_key, :string, :default => nil
76
- config_param :client_cert, :string, :default => nil
77
- config_param :client_key_pass, :string, :default => nil
78
- config_param :ca_file, :string, :default => nil
79
- config_param :ssl_version, :enum, list: [:SSLv23, :TLSv1, :TLSv1_1, :TLSv1_2], :default => :TLSv1
80
- config_param :remove_keys, :string, :default => nil
81
- config_param :remove_keys_on_update, :string, :default => ""
82
- config_param :remove_keys_on_update_key, :string, :default => nil
83
- config_param :flatten_hashes, :bool, :default => false
84
- config_param :flatten_hashes_separator, :string, :default => "_"
85
- config_param :template_name, :string, :default => nil
86
- config_param :template_file, :string, :default => nil
87
- config_param :template_overwrite, :bool, :default => false
88
- config_param :templates, :hash, :default => nil
89
- config_param :max_retry_putting_template, :integer, :default => 10
90
- config_param :include_tag_key, :bool, :default => false
91
- config_param :tag_key, :string, :default => 'tag'
92
- config_param :time_parse_error_tag, :string, :default => 'Fluent::ElasticsearchOutput::TimeParser.error'
93
- config_param :reconnect_on_error, :bool, :default => false
94
- config_param :pipeline, :string, :default => nil
95
- config_param :with_transporter_log, :bool, :default => false
96
- config_param :content_type, :enum, list: [:"application/json", :"application/x-ndjson"], :default => :"application/json",
97
- :deprecated => <<EOC
98
- elasticsearch gem v6.0.2 starts to use correct Content-Type. Please upgrade elasticserach gem and stop to use this option.
99
- see: https://github.com/elastic/elasticsearch-ruby/pull/514
100
- EOC
101
-
102
- config_section :buffer do
103
- config_set_default :@type, DEFAULT_BUFFER_TYPE
104
- config_set_default :chunk_keys, ['tag']
105
- config_set_default :timekey_use_utc, true
106
- end
107
-
108
- include Fluent::ElasticsearchIndexTemplate
109
- include Fluent::Plugin::ElasticsearchConstants
110
-
111
- def initialize
112
- super
113
- end
114
-
115
- def configure(conf)
116
- compat_parameters_convert(conf, :buffer)
117
-
118
- super
119
- raise Fluent::ConfigError, "'tag' in chunk_keys is required." if not @chunk_key_tag
120
-
121
- @time_parser = create_time_parser
122
-
123
- if @remove_keys
124
- @remove_keys = @remove_keys.split(/\s*,\s*/)
125
- end
126
-
127
- if @target_index_key && @target_index_key.is_a?(String)
128
- @target_index_key = @target_index_key.split '.'
129
- end
130
-
131
- if @target_type_key && @target_type_key.is_a?(String)
132
- @target_type_key = @target_type_key.split '.'
133
- end
134
-
135
- if @remove_keys_on_update && @remove_keys_on_update.is_a?(String)
136
- @remove_keys_on_update = @remove_keys_on_update.split ','
137
- end
138
-
139
- if @template_name && @template_file
140
- retry_install(@max_retry_putting_template) do
141
- template_install(@template_name, @template_file, @template_overwrite)
142
- end
143
- elsif @templates
144
- retry_install(@max_retry_putting_template) do
145
- templates_hash_install(@templates, @template_overwrite)
146
- end
147
- end
148
-
149
- # Consider missing the prefix of "$." in nested key specifiers.
150
- @id_key = convert_compat_id_key(@id_key) if @id_key
151
- @parent_key = convert_compat_id_key(@parent_key) if @parent_key
152
- @routing_key = convert_compat_id_key(@routing_key) if @routing_key
153
-
154
- @meta_config_map = create_meta_config_map
155
-
156
- begin
157
- require 'oj'
158
- @dump_proc = Oj.method(:dump)
159
- rescue LoadError
160
- @dump_proc = Yajl.method(:dump)
161
- end
162
-
163
- if @user && m = @user.match(/%{(?<user>.*)}/)
164
- @user = URI.encode_www_form_component(m["user"])
165
- end
166
- if @password && m = @password.match(/%{(?<password>.*)}/)
167
- @password = URI.encode_www_form_component(m["password"])
168
- end
169
-
170
- if @hash_config
171
- raise Fluent::ConfigError, "@hash_config.hash_id_key and id_key must be equal." unless @hash_config.hash_id_key == @id_key
172
- end
173
- @transport_logger = nil
174
- if @with_transporter_log
175
- @transport_logger = log
176
- log_level = conf['@log_level'] || conf['log_level']
177
- log.warn "Consider to specify log_level with @log_level." unless log_level
178
- end
179
-
180
- @last_seen_major_version = detect_es_major_version rescue DEFAULT_ELASTICSEARCH_VERSION
181
- if @last_seen_major_version == 6 && @type_name != DEFAULT_TYPE_NAME_ES_7x
182
- log.info "Detected ES 6.x: ES 7.x will only accept `_doc` in type_name."
183
- end
184
- if @last_seen_major_version >= 7 && @type_name != DEFAULT_TYPE_NAME_ES_7x
185
- log.warn "Detected ES 7.x or above: `_doc` will be used as the document `_type`."
186
- @type_name = '_doc'.freeze
187
- end
188
- end
189
-
190
- def detect_es_major_version
191
- @_es_info ||= client.info
192
- @_es_info["version"]["number"].to_i
193
- end
194
-
195
- def convert_compat_id_key(key)
196
- if key.include?('.') && !key.start_with?('$[')
197
- key = "$.#{key}" unless key.start_with?('$.')
198
- end
199
- key
200
- end
201
-
202
- def create_meta_config_map
203
- result = []
204
- result << [record_accessor_create(@id_key), '_id'] if @id_key
205
- result << [record_accessor_create(@parent_key), '_parent'] if @parent_key
206
- result << [record_accessor_create(@routing_key), '_routing'] if @routing_key
207
- result
208
- end
209
-
210
- # once fluent v0.14 is released we might be able to use
211
- # Fluent::Parser::TimeParser, but it doesn't quite do what we want - if gives
212
- # [sec,nsec] where as we want something we can call `strftime` on...
213
- def create_time_parser
214
- if @time_key_format
215
- begin
216
- # Strptime doesn't support all formats, but for those it does it's
217
- # blazingly fast.
218
- strptime = Strptime.new(@time_key_format)
219
- Proc.new { |value| strptime.exec(value).to_datetime }
220
- rescue
221
- # Can happen if Strptime doesn't recognize the format; or
222
- # if strptime couldn't be required (because it's not installed -- it's
223
- # ruby 2 only)
224
- Proc.new { |value| DateTime.strptime(value, @time_key_format) }
225
- end
226
- else
227
- Proc.new { |value| DateTime.parse(value) }
228
- end
229
- end
230
-
231
- def parse_time(value, event_time, tag)
232
- @time_parser.call(value)
233
- rescue => e
234
- router.emit_error_event(@time_parse_error_tag, Fluent::Engine.now, {'tag' => tag, 'time' => event_time, 'format' => @time_key_format, 'value' => value}, e)
235
- return Time.at(event_time).to_datetime
236
- end
237
-
238
- def client
239
- @_es ||= begin
240
- excon_options = { client_key: @client_key, client_cert: @client_cert, client_key_pass: @client_key_pass }
241
- adapter_conf = lambda {|f| f.adapter :excon, excon_options }
242
- transport = Elasticsearch::Transport::Transport::HTTP::Faraday.new(get_connection_options.merge(
243
- options: {
244
- reload_connections: @reload_connections,
245
- reload_on_failure: @reload_on_failure,
246
- resurrect_after: @resurrect_after,
247
- retry_on_failure: 5,
248
- logger: @transport_logger,
249
- transport_options: {
250
- headers: { 'Content-Type' => @content_type.to_s },
251
- request: { timeout: @request_timeout },
252
- ssl: { verify: @ssl_verify, ca_file: @ca_file, version: @ssl_version }
253
- },
254
- http: {
255
- user: @user,
256
- password: @password
257
- }
258
- }), &adapter_conf)
259
- es = Elasticsearch::Client.new transport: transport
260
-
261
- begin
262
- raise ConnectionFailure, "Can not reach Elasticsearch cluster (#{connection_options_description})!" unless es.ping
263
- rescue *es.transport.host_unreachable_exceptions => e
264
- raise ConnectionFailure, "Can not reach Elasticsearch cluster (#{connection_options_description})! #{e.message}"
265
- end
266
-
267
- log.info "Connection opened to Elasticsearch cluster => #{connection_options_description}"
268
- es
269
- end
270
- end
271
-
272
- def get_escaped_userinfo(host_str)
273
- if m = host_str.match(/(?<scheme>.*)%{(?<user>.*)}:%{(?<password>.*)}(?<path>@.*)/)
274
- m["scheme"] +
275
- URI.encode_www_form_component(m["user"]) +
276
- ':' +
277
- URI.encode_www_form_component(m["password"]) +
278
- m["path"]
279
- else
280
- host_str
281
- end
282
- end
283
-
284
- def get_connection_options
285
- raise "`password` must be present if `user` is present" if @user && !@password
286
-
287
- hosts = if @hosts
288
- @hosts.split(',').map do |host_str|
289
- # Support legacy hosts format host:port,host:port,host:port...
290
- if host_str.match(%r{^[^:]+(\:\d+)?$})
291
- {
292
- host: host_str.split(':')[0],
293
- port: (host_str.split(':')[1] || @port).to_i,
294
- scheme: @scheme
295
- }
296
- else
297
- # New hosts format expects URLs such as http://logs.foo.com,https://john:pass@logs2.foo.com/elastic
298
- uri = URI(get_escaped_userinfo(host_str))
299
- %w(user password path).inject(host: uri.host, port: uri.port, scheme: uri.scheme) do |hash, key|
300
- hash[key.to_sym] = uri.public_send(key) unless uri.public_send(key).nil? || uri.public_send(key) == ''
301
- hash
302
- end
303
- end
304
- end.compact
305
- else
306
- [{host: @host, port: @port, scheme: @scheme}]
307
- end.each do |host|
308
- host.merge!(user: @user, password: @password) if !host[:user] && @user
309
- host.merge!(path: @path) if !host[:path] && @path
310
- end
311
-
312
- {
313
- hosts: hosts
314
- }
315
- end
316
-
317
- def connection_options_description
318
- get_connection_options[:hosts].map do |host_info|
319
- attributes = host_info.dup
320
- attributes[:password] = 'obfuscated' if attributes.has_key?(:password)
321
- attributes.inspect
322
- end.join(', ')
323
- end
324
-
325
- def append_record_to_messages(op, meta, header, record, msgs)
326
- case op
327
- when UPDATE_OP, UPSERT_OP
328
- if meta.has_key?(ID_FIELD)
329
- header[UPDATE_OP] = meta
330
- msgs << @dump_proc.call(header) << BODY_DELIMITER
331
- msgs << @dump_proc.call(update_body(record, op)) << BODY_DELIMITER
332
- end
333
- when CREATE_OP
334
- if meta.has_key?(ID_FIELD)
335
- header[CREATE_OP] = meta
336
- msgs << @dump_proc.call(header) << BODY_DELIMITER
337
- msgs << @dump_proc.call(record) << BODY_DELIMITER
338
- end
339
- when INDEX_OP
340
- header[INDEX_OP] = meta
341
- msgs << @dump_proc.call(header) << BODY_DELIMITER
342
- msgs << @dump_proc.call(record) << BODY_DELIMITER
343
- end
344
- end
345
-
346
- def update_body(record, op)
347
- update = remove_keys(record)
348
- body = {"doc".freeze => update}
349
- if op == UPSERT_OP
350
- if update == record
351
- body["doc_as_upsert".freeze] = true
352
- else
353
- body[UPSERT_OP] = record
354
- end
355
- end
356
- body
357
- end
358
-
359
- def remove_keys(record)
360
- keys = record[@remove_keys_on_update_key] || @remove_keys_on_update || []
361
- record.delete(@remove_keys_on_update_key)
362
- return record unless keys.any?
363
- record = record.dup
364
- keys.each { |key| record.delete(key) }
365
- record
366
- end
367
-
368
- def flatten_record(record, prefix=[])
369
- ret = {}
370
- if record.is_a? Hash
371
- record.each { |key, value|
372
- ret.merge! flatten_record(value, prefix + [key.to_s])
373
- }
374
- elsif record.is_a? Array
375
- # Don't mess with arrays, leave them unprocessed
376
- ret.merge!({prefix.join(@flatten_hashes_separator) => record})
377
- else
378
- return {prefix.join(@flatten_hashes_separator) => record}
379
- end
380
- ret
381
- end
382
-
383
- def expand_placeholders(metadata)
384
- logstash_prefix = extract_placeholders(@logstash_prefix, metadata)
385
- index_name = extract_placeholders(@index_name, metadata)
386
- type_name = extract_placeholders(@type_name, metadata)
387
- return logstash_prefix, index_name, type_name
388
- end
389
-
390
- def multi_workers_ready?
391
- true
392
- end
393
-
394
- def write(chunk)
395
- bulk_message_count = 0
396
- bulk_message = ''
397
- header = {}
398
- meta = {}
399
-
400
- tag = chunk.metadata.tag
401
- extracted_values = expand_placeholders(chunk.metadata)
402
- @last_seen_major_version = detect_es_major_version rescue DEFAULT_ELASTICSEARCH_VERSION
403
-
404
- chunk.msgpack_each do |time, record|
405
- next unless record.is_a? Hash
406
- begin
407
- process_message(tag, meta, header, time, record, bulk_message, extracted_values)
408
- bulk_message_count += 1
409
- rescue => e
410
- router.emit_error_event(tag, time, record, e)
411
- end
412
- end
413
-
414
- send_bulk(bulk_message, tag, chunk, bulk_message_count, extracted_values) unless bulk_message.empty?
415
- bulk_message.clear
416
- end
417
-
418
- def process_message(tag, meta, header, time, record, bulk_message, extracted_values)
419
- logstash_prefix, index_name, type_name = extracted_values
420
-
421
- if @flatten_hashes
422
- record = flatten_record(record)
423
- end
424
-
425
- if @hash_config
426
- record = generate_hash_id_key(record)
427
- end
428
-
429
- dt = nil
430
- if @logstash_format || @include_timestamp
431
- if record.has_key?(TIMESTAMP_FIELD)
432
- rts = record[TIMESTAMP_FIELD]
433
- dt = parse_time(rts, time, tag)
434
- elsif record.has_key?(@time_key)
435
- rts = record[@time_key]
436
- dt = parse_time(rts, time, tag)
437
- record[TIMESTAMP_FIELD] = dt.iso8601(@time_precision) unless @time_key_exclude_timestamp
438
- else
439
- dt = Time.at(time).to_datetime
440
- record[TIMESTAMP_FIELD] = dt.iso8601(@time_precision)
441
- end
442
- end
443
-
444
- target_index_parent, target_index_child_key = @target_index_key ? get_parent_of(record, @target_index_key) : nil
445
- if target_index_parent && target_index_parent[target_index_child_key]
446
- target_index = target_index_parent.delete(target_index_child_key)
447
- elsif @logstash_format
448
- dt = dt.new_offset(0) if @utc_index
449
- target_index = "#{logstash_prefix}#{@logstash_prefix_separator}#{dt.strftime(@logstash_dateformat)}"
450
- else
451
- target_index = index_name
452
- end
453
-
454
- # Change target_index to lower-case since Elasticsearch doesn't
455
- # allow upper-case characters in index names.
456
- target_index = target_index.downcase
457
- if @include_tag_key
458
- record[@tag_key] = tag
459
- end
460
-
461
- target_type_parent, target_type_child_key = @target_type_key ? get_parent_of(record, @target_type_key) : nil
462
- if target_type_parent && target_type_parent[target_type_child_key]
463
- target_type = target_type_parent.delete(target_type_child_key)
464
- if @last_seen_major_version == 6
465
- log.warn "Detected ES 6.x: `@type_name` will be used as the document `_type`."
466
- target_type = type_name
467
- elsif @last_seen_major_version >= 7
468
- log.warn "Detected ES 7.x or above: `_doc` will be used as the document `_type`."
469
- target_type = '_doc'.freeze
470
- end
471
- else
472
- if @last_seen_major_version >= 7 && target_type != DEFAULT_TYPE_NAME_ES_7x
473
- log.warn "Detected ES 7.x or above: `_doc` will be used as the document `_type`."
474
- target_type = '_doc'.freeze
475
- else
476
- target_type = type_name
477
- end
478
- end
479
-
480
- meta.clear
481
- meta["_index".freeze] = target_index
482
- meta["_type".freeze] = target_type
483
-
484
- if @pipeline
485
- meta["pipeline".freeze] = @pipeline
486
- end
487
-
488
- @meta_config_map.each do |record_accessor, meta_key|
489
- if raw_value = record_accessor.call(record)
490
- meta[meta_key] = raw_value
491
- end
492
- end
493
-
494
- if @remove_keys
495
- @remove_keys.each { |key| record.delete(key) }
496
- end
497
-
498
- append_record_to_messages(@write_operation, meta, header, record, bulk_message)
499
- end
500
-
501
- # returns [parent, child_key] of child described by path array in record's tree
502
- # returns [nil, child_key] if path doesnt exist in record
503
- def get_parent_of(record, path)
504
- parent_object = path[0..-2].reduce(record) { |a, e| a.is_a?(Hash) ? a[e] : nil }
505
- [parent_object, path[-1]]
506
- end
507
-
508
- # send_bulk given a specific bulk request, the original tag,
509
- # chunk, and bulk_message_count
510
- def send_bulk(data, tag, chunk, bulk_message_count, extracted_values)
511
- retries = 0
512
- begin
513
- response = client.bulk body: data
514
- if response['errors']
515
- error = Fluent::Plugin::ElasticsearchErrorHandler.new(self)
516
- error.handle_error(response, tag, chunk, bulk_message_count, extracted_values)
517
- end
518
- rescue RetryStreamError => e
519
- emit_tag = @retry_tag ? @retry_tag : tag
520
- router.emit_stream(emit_tag, e.retry_stream)
521
- rescue *client.transport.host_unreachable_exceptions => e
522
- if retries < 2
523
- retries += 1
524
- @_es = nil
525
- @_es_info = nil
526
- log.warn "Could not push logs to Elasticsearch, resetting connection and trying again. #{e.message}"
527
- sleep 2**retries
528
- retry
529
- end
530
- raise ConnectionFailure, "Could not push logs to Elasticsearch after #{retries} retries. #{e.message}"
531
- rescue Exception
532
- @_es = nil if @reconnect_on_error
533
- @_es_info = nil if @reconnect_on_error
534
- raise
535
- end
536
- end
537
- end
538
- end
1
+ # encoding: UTF-8
2
+ require 'date'
3
+ require 'excon'
4
+ require 'elasticsearch'
5
+ require 'json'
6
+ require 'uri'
7
+ begin
8
+ require 'strptime'
9
+ rescue LoadError
10
+ end
11
+
12
+ require 'fluent/plugin/output'
13
+ require 'fluent/event'
14
+ require 'fluent/error'
15
+ require_relative 'elasticsearch_constants'
16
+ require_relative 'elasticsearch_error_handler'
17
+ require_relative 'elasticsearch_index_template'
18
+
19
+ module Fluent::Plugin
20
+ class ElasticsearchOutput < Output
21
+ class ConnectionFailure < Fluent::UnrecoverableError; end
22
+
23
+ # RetryStreamError privides a stream to be
24
+ # put back in the pipeline for cases where a bulk request
25
+ # failed (e.g some records succeed while others failed)
26
+ class RetryStreamError < StandardError
27
+ attr_reader :retry_stream
28
+ def initialize(retry_stream)
29
+ @retry_stream = retry_stream
30
+ end
31
+ end
32
+
33
+ helpers :event_emitter, :compat_parameters, :record_accessor
34
+
35
+ Fluent::Plugin.register_output('elasticsearch', self)
36
+
37
+ DEFAULT_BUFFER_TYPE = "memory"
38
+ DEFAULT_ELASTICSEARCH_VERSION = 5 # For compatibility.
39
+ DEFAULT_TYPE_NAME_ES_7x = "_doc".freeze
40
+ DEFAULT_TYPE_NAME = "fluentd".freeze
41
+
42
+ config_param :host, :string, :default => 'localhost'
43
+ config_param :port, :integer, :default => 9200
44
+ config_param :user, :string, :default => nil
45
+ config_param :password, :string, :default => nil, :secret => true
46
+ config_param :path, :string, :default => nil
47
+ config_param :scheme, :string, :default => 'http'
48
+ config_param :hosts, :string, :default => nil
49
+ config_param :target_index_key, :string, :default => nil
50
+ config_param :target_type_key, :string, :default => nil,
51
+ :deprecated => <<EOC
52
+ Elasticsearch 7.x or above will ignore this config. Please use fixed type_name instead.
53
+ EOC
54
+ config_param :time_key_format, :string, :default => nil
55
+ config_param :time_precision, :integer, :default => 9
56
+ config_param :include_timestamp, :bool, :default => false
57
+ config_param :logstash_format, :bool, :default => false
58
+ config_param :logstash_prefix, :string, :default => "logstash"
59
+ config_param :logstash_prefix_separator, :string, :default => '-'
60
+ config_param :logstash_dateformat, :string, :default => "%Y.%m.%d"
61
+ config_param :utc_index, :bool, :default => true
62
+ config_param :type_name, :string, :default => DEFAULT_TYPE_NAME
63
+ config_param :index_name, :string, :default => "fluentd"
64
+ config_param :id_key, :string, :default => nil
65
+ config_param :write_operation, :string, :default => "index"
66
+ config_param :parent_key, :string, :default => nil
67
+ config_param :routing_key, :string, :default => nil
68
+ config_param :request_timeout, :time, :default => 5
69
+ config_param :reload_connections, :bool, :default => true
70
+ config_param :reload_on_failure, :bool, :default => false
71
+ config_param :retry_tag, :string, :default=>nil
72
+ config_param :resurrect_after, :time, :default => 60
73
+ config_param :time_key, :string, :default => nil
74
+ config_param :time_key_exclude_timestamp, :bool, :default => false
75
+ config_param :ssl_verify , :bool, :default => true
76
+ config_param :client_key, :string, :default => nil
77
+ config_param :client_cert, :string, :default => nil
78
+ config_param :client_key_pass, :string, :default => nil
79
+ config_param :ca_file, :string, :default => nil
80
+ config_param :ssl_version, :enum, list: [:SSLv23, :TLSv1, :TLSv1_1, :TLSv1_2], :default => :TLSv1
81
+ config_param :remove_keys, :string, :default => nil
82
+ config_param :remove_keys_on_update, :string, :default => ""
83
+ config_param :remove_keys_on_update_key, :string, :default => nil
84
+ config_param :flatten_hashes, :bool, :default => false
85
+ config_param :flatten_hashes_separator, :string, :default => "_"
86
+ config_param :template_name, :string, :default => nil
87
+ config_param :template_file, :string, :default => nil
88
+ config_param :template_overwrite, :bool, :default => false
89
+ config_param :templates, :hash, :default => nil
90
+ config_param :max_retry_putting_template, :integer, :default => 10
91
+ config_param :include_tag_key, :bool, :default => false
92
+ config_param :tag_key, :string, :default => 'tag'
93
+ config_param :time_parse_error_tag, :string, :default => 'Fluent::ElasticsearchOutput::TimeParser.error'
94
+ config_param :reconnect_on_error, :bool, :default => false
95
+ config_param :pipeline, :string, :default => nil
96
+ config_param :with_transporter_log, :bool, :default => false
97
+ config_param :content_type, :enum, list: [:"application/json", :"application/x-ndjson"], :default => :"application/json",
98
+ :deprecated => <<EOC
99
+ elasticsearch gem v6.0.2 starts to use correct Content-Type. Please upgrade elasticserach gem and stop to use this option.
100
+ see: https://github.com/elastic/elasticsearch-ruby/pull/514
101
+ EOC
102
+
103
+ config_section :buffer do
104
+ config_set_default :@type, DEFAULT_BUFFER_TYPE
105
+ config_set_default :chunk_keys, ['tag']
106
+ config_set_default :timekey_use_utc, true
107
+ end
108
+
109
+ include Fluent::ElasticsearchIndexTemplate
110
+ include Fluent::Plugin::ElasticsearchConstants
111
+
112
+ def initialize
113
+ super
114
+ end
115
+
116
+ def configure(conf)
117
+ compat_parameters_convert(conf, :buffer)
118
+
119
+ super
120
+ raise Fluent::ConfigError, "'tag' in chunk_keys is required." if not @chunk_key_tag
121
+
122
+ @time_parser = create_time_parser
123
+
124
+ if @remove_keys
125
+ @remove_keys = @remove_keys.split(/\s*,\s*/)
126
+ end
127
+
128
+ if @target_index_key && @target_index_key.is_a?(String)
129
+ @target_index_key = @target_index_key.split '.'
130
+ end
131
+
132
+ if @target_type_key && @target_type_key.is_a?(String)
133
+ @target_type_key = @target_type_key.split '.'
134
+ end
135
+
136
+ if @remove_keys_on_update && @remove_keys_on_update.is_a?(String)
137
+ @remove_keys_on_update = @remove_keys_on_update.split ','
138
+ end
139
+
140
+ raise Fluent::ConfigError, "'max_retry_putting_template' must be positive number." if @max_retry_putting_template < 0
141
+ if @template_name && @template_file
142
+ retry_install(@max_retry_putting_template) do
143
+ template_install(@template_name, @template_file, @template_overwrite)
144
+ end
145
+ elsif @templates
146
+ retry_install(@max_retry_putting_template) do
147
+ templates_hash_install(@templates, @template_overwrite)
148
+ end
149
+ end
150
+
151
+ # Consider missing the prefix of "$." in nested key specifiers.
152
+ @id_key = convert_compat_id_key(@id_key) if @id_key
153
+ @parent_key = convert_compat_id_key(@parent_key) if @parent_key
154
+ @routing_key = convert_compat_id_key(@routing_key) if @routing_key
155
+
156
+ @meta_config_map = create_meta_config_map
157
+
158
+ begin
159
+ require 'oj'
160
+ @dump_proc = Oj.method(:dump)
161
+ rescue LoadError
162
+ @dump_proc = Yajl.method(:dump)
163
+ end
164
+
165
+ if @user && m = @user.match(/%{(?<user>.*)}/)
166
+ @user = URI.encode_www_form_component(m["user"])
167
+ end
168
+ if @password && m = @password.match(/%{(?<password>.*)}/)
169
+ @password = URI.encode_www_form_component(m["password"])
170
+ end
171
+
172
+ if @hash_config
173
+ raise Fluent::ConfigError, "@hash_config.hash_id_key and id_key must be equal." unless @hash_config.hash_id_key == @id_key
174
+ end
175
+ @transport_logger = nil
176
+ if @with_transporter_log
177
+ @transport_logger = log
178
+ log_level = conf['@log_level'] || conf['log_level']
179
+ log.warn "Consider to specify log_level with @log_level." unless log_level
180
+ end
181
+
182
+ @last_seen_major_version = detect_es_major_version rescue DEFAULT_ELASTICSEARCH_VERSION
183
+ if @last_seen_major_version == 6 && @type_name != DEFAULT_TYPE_NAME_ES_7x
184
+ log.info "Detected ES 6.x: ES 7.x will only accept `_doc` in type_name."
185
+ end
186
+ if @last_seen_major_version >= 7 && @type_name != DEFAULT_TYPE_NAME_ES_7x
187
+ log.warn "Detected ES 7.x or above: `_doc` will be used as the document `_type`."
188
+ @type_name = '_doc'.freeze
189
+ end
190
+ end
191
+
192
+ def detect_es_major_version
193
+ @_es_info ||= client.info
194
+ @_es_info["version"]["number"].to_i
195
+ end
196
+
197
+ def convert_compat_id_key(key)
198
+ if key.include?('.') && !key.start_with?('$[')
199
+ key = "$.#{key}" unless key.start_with?('$.')
200
+ end
201
+ key
202
+ end
203
+
204
+ def create_meta_config_map
205
+ result = []
206
+ result << [record_accessor_create(@id_key), '_id'] if @id_key
207
+ result << [record_accessor_create(@parent_key), '_parent'] if @parent_key
208
+ result << [record_accessor_create(@routing_key), '_routing'] if @routing_key
209
+ result
210
+ end
211
+
212
+ # once fluent v0.14 is released we might be able to use
213
+ # Fluent::Parser::TimeParser, but it doesn't quite do what we want - if gives
214
+ # [sec,nsec] where as we want something we can call `strftime` on...
215
+ def create_time_parser
216
+ if @time_key_format
217
+ begin
218
+ # Strptime doesn't support all formats, but for those it does it's
219
+ # blazingly fast.
220
+ strptime = Strptime.new(@time_key_format)
221
+ Proc.new { |value| strptime.exec(value).to_datetime }
222
+ rescue
223
+ # Can happen if Strptime doesn't recognize the format; or
224
+ # if strptime couldn't be required (because it's not installed -- it's
225
+ # ruby 2 only)
226
+ Proc.new { |value| DateTime.strptime(value, @time_key_format) }
227
+ end
228
+ else
229
+ Proc.new { |value| DateTime.parse(value) }
230
+ end
231
+ end
232
+
233
+ def parse_time(value, event_time, tag)
234
+ @time_parser.call(value)
235
+ rescue => e
236
+ router.emit_error_event(@time_parse_error_tag, Fluent::Engine.now, {'tag' => tag, 'time' => event_time, 'format' => @time_key_format, 'value' => value}, e)
237
+ return Time.at(event_time).to_datetime
238
+ end
239
+
240
+ def client
241
+ @_es ||= begin
242
+ excon_options = { client_key: @client_key, client_cert: @client_cert, client_key_pass: @client_key_pass }
243
+ adapter_conf = lambda {|f| f.adapter :excon, excon_options }
244
+ transport = Elasticsearch::Transport::Transport::HTTP::Faraday.new(get_connection_options.merge(
245
+ options: {
246
+ reload_connections: @reload_connections,
247
+ reload_on_failure: @reload_on_failure,
248
+ resurrect_after: @resurrect_after,
249
+ retry_on_failure: 5,
250
+ logger: @transport_logger,
251
+ transport_options: {
252
+ headers: { 'Content-Type' => @content_type.to_s },
253
+ request: { timeout: @request_timeout },
254
+ ssl: { verify: @ssl_verify, ca_file: @ca_file, version: @ssl_version }
255
+ },
256
+ http: {
257
+ user: @user,
258
+ password: @password
259
+ }
260
+ }), &adapter_conf)
261
+ es = Elasticsearch::Client.new transport: transport
262
+
263
+ begin
264
+ raise ConnectionFailure, "Can not reach Elasticsearch cluster (#{connection_options_description})!" unless es.ping
265
+ rescue *es.transport.host_unreachable_exceptions => e
266
+ raise ConnectionFailure, "Can not reach Elasticsearch cluster (#{connection_options_description})! #{e.message}"
267
+ end
268
+
269
+ log.info "Connection opened to Elasticsearch cluster => #{connection_options_description}"
270
+ es
271
+ end
272
+ end
273
+
274
+ def get_escaped_userinfo(host_str)
275
+ if m = host_str.match(/(?<scheme>.*)%{(?<user>.*)}:%{(?<password>.*)}(?<path>@.*)/)
276
+ m["scheme"] +
277
+ URI.encode_www_form_component(m["user"]) +
278
+ ':' +
279
+ URI.encode_www_form_component(m["password"]) +
280
+ m["path"]
281
+ else
282
+ host_str
283
+ end
284
+ end
285
+
286
+ def get_connection_options
287
+ raise "`password` must be present if `user` is present" if @user && !@password
288
+
289
+ hosts = if @hosts
290
+ @hosts.split(',').map do |host_str|
291
+ # Support legacy hosts format host:port,host:port,host:port...
292
+ if host_str.match(%r{^[^:]+(\:\d+)?$})
293
+ {
294
+ host: host_str.split(':')[0],
295
+ port: (host_str.split(':')[1] || @port).to_i,
296
+ scheme: @scheme
297
+ }
298
+ else
299
+ # New hosts format expects URLs such as http://logs.foo.com,https://john:pass@logs2.foo.com/elastic
300
+ uri = URI(get_escaped_userinfo(host_str))
301
+ %w(user password path).inject(host: uri.host, port: uri.port, scheme: uri.scheme) do |hash, key|
302
+ hash[key.to_sym] = uri.public_send(key) unless uri.public_send(key).nil? || uri.public_send(key) == ''
303
+ hash
304
+ end
305
+ end
306
+ end.compact
307
+ else
308
+ [{host: @host, port: @port, scheme: @scheme}]
309
+ end.each do |host|
310
+ host.merge!(user: @user, password: @password) if !host[:user] && @user
311
+ host.merge!(path: @path) if !host[:path] && @path
312
+ end
313
+
314
+ {
315
+ hosts: hosts
316
+ }
317
+ end
318
+
319
+ def connection_options_description
320
+ get_connection_options[:hosts].map do |host_info|
321
+ attributes = host_info.dup
322
+ attributes[:password] = 'obfuscated' if attributes.has_key?(:password)
323
+ attributes.inspect
324
+ end.join(', ')
325
+ end
326
+
327
+ def append_record_to_messages(op, meta, header, record, msgs)
328
+ case op
329
+ when UPDATE_OP, UPSERT_OP
330
+ if meta.has_key?(ID_FIELD)
331
+ header[UPDATE_OP] = meta
332
+ msgs << @dump_proc.call(header) << BODY_DELIMITER
333
+ msgs << @dump_proc.call(update_body(record, op)) << BODY_DELIMITER
334
+ end
335
+ when CREATE_OP
336
+ if meta.has_key?(ID_FIELD)
337
+ header[CREATE_OP] = meta
338
+ msgs << @dump_proc.call(header) << BODY_DELIMITER
339
+ msgs << @dump_proc.call(record) << BODY_DELIMITER
340
+ end
341
+ when INDEX_OP
342
+ header[INDEX_OP] = meta
343
+ msgs << @dump_proc.call(header) << BODY_DELIMITER
344
+ msgs << @dump_proc.call(record) << BODY_DELIMITER
345
+ end
346
+ end
347
+
348
+ def update_body(record, op)
349
+ update = remove_keys(record)
350
+ body = {"doc".freeze => update}
351
+ if op == UPSERT_OP
352
+ if update == record
353
+ body["doc_as_upsert".freeze] = true
354
+ else
355
+ body[UPSERT_OP] = record
356
+ end
357
+ end
358
+ body
359
+ end
360
+
361
+ def remove_keys(record)
362
+ keys = record[@remove_keys_on_update_key] || @remove_keys_on_update || []
363
+ record.delete(@remove_keys_on_update_key)
364
+ return record unless keys.any?
365
+ record = record.dup
366
+ keys.each { |key| record.delete(key) }
367
+ record
368
+ end
369
+
370
+ def flatten_record(record, prefix=[])
371
+ ret = {}
372
+ if record.is_a? Hash
373
+ record.each { |key, value|
374
+ ret.merge! flatten_record(value, prefix + [key.to_s])
375
+ }
376
+ elsif record.is_a? Array
377
+ # Don't mess with arrays, leave them unprocessed
378
+ ret.merge!({prefix.join(@flatten_hashes_separator) => record})
379
+ else
380
+ return {prefix.join(@flatten_hashes_separator) => record}
381
+ end
382
+ ret
383
+ end
384
+
385
+ def expand_placeholders(metadata)
386
+ logstash_prefix = extract_placeholders(@logstash_prefix, metadata)
387
+ index_name = extract_placeholders(@index_name, metadata)
388
+ type_name = extract_placeholders(@type_name, metadata)
389
+ return logstash_prefix, index_name, type_name
390
+ end
391
+
392
+ def multi_workers_ready?
393
+ true
394
+ end
395
+
396
+ def write(chunk)
397
+ bulk_message_count = 0
398
+ bulk_message = ''
399
+ header = {}
400
+ meta = {}
401
+
402
+ tag = chunk.metadata.tag
403
+ extracted_values = expand_placeholders(chunk.metadata)
404
+ @last_seen_major_version = detect_es_major_version rescue DEFAULT_ELASTICSEARCH_VERSION
405
+
406
+ chunk.msgpack_each do |time, record|
407
+ next unless record.is_a? Hash
408
+ begin
409
+ process_message(tag, meta, header, time, record, bulk_message, extracted_values)
410
+ bulk_message_count += 1
411
+ rescue => e
412
+ router.emit_error_event(tag, time, record, e)
413
+ end
414
+ end
415
+
416
+ send_bulk(bulk_message, tag, chunk, bulk_message_count, extracted_values) unless bulk_message.empty?
417
+ bulk_message.clear
418
+ end
419
+
420
+ def process_message(tag, meta, header, time, record, bulk_message, extracted_values)
421
+ logstash_prefix, index_name, type_name = extracted_values
422
+
423
+ if @flatten_hashes
424
+ record = flatten_record(record)
425
+ end
426
+
427
+ if @hash_config
428
+ record = generate_hash_id_key(record)
429
+ end
430
+
431
+ dt = nil
432
+ if @logstash_format || @include_timestamp
433
+ if record.has_key?(TIMESTAMP_FIELD)
434
+ rts = record[TIMESTAMP_FIELD]
435
+ dt = parse_time(rts, time, tag)
436
+ elsif record.has_key?(@time_key)
437
+ rts = record[@time_key]
438
+ dt = parse_time(rts, time, tag)
439
+ record[TIMESTAMP_FIELD] = dt.iso8601(@time_precision) unless @time_key_exclude_timestamp
440
+ else
441
+ dt = Time.at(time).to_datetime
442
+ record[TIMESTAMP_FIELD] = dt.iso8601(@time_precision)
443
+ end
444
+ end
445
+
446
+ target_index_parent, target_index_child_key = @target_index_key ? get_parent_of(record, @target_index_key) : nil
447
+ if target_index_parent && target_index_parent[target_index_child_key]
448
+ target_index = target_index_parent.delete(target_index_child_key)
449
+ elsif @logstash_format
450
+ dt = dt.new_offset(0) if @utc_index
451
+ target_index = "#{logstash_prefix}#{@logstash_prefix_separator}#{dt.strftime(@logstash_dateformat)}"
452
+ else
453
+ target_index = index_name
454
+ end
455
+
456
+ # Change target_index to lower-case since Elasticsearch doesn't
457
+ # allow upper-case characters in index names.
458
+ target_index = target_index.downcase
459
+ if @include_tag_key
460
+ record[@tag_key] = tag
461
+ end
462
+
463
+ target_type_parent, target_type_child_key = @target_type_key ? get_parent_of(record, @target_type_key) : nil
464
+ if target_type_parent && target_type_parent[target_type_child_key]
465
+ target_type = target_type_parent.delete(target_type_child_key)
466
+ if @last_seen_major_version == 6
467
+ log.warn "Detected ES 6.x: `@type_name` will be used as the document `_type`."
468
+ target_type = type_name
469
+ elsif @last_seen_major_version >= 7
470
+ log.warn "Detected ES 7.x or above: `_doc` will be used as the document `_type`."
471
+ target_type = '_doc'.freeze
472
+ end
473
+ else
474
+ if @last_seen_major_version >= 7 && target_type != DEFAULT_TYPE_NAME_ES_7x
475
+ log.warn "Detected ES 7.x or above: `_doc` will be used as the document `_type`."
476
+ target_type = '_doc'.freeze
477
+ else
478
+ target_type = type_name
479
+ end
480
+ end
481
+
482
+ meta.clear
483
+ meta["_index".freeze] = target_index
484
+ meta["_type".freeze] = target_type
485
+
486
+ if @pipeline
487
+ meta["pipeline".freeze] = @pipeline
488
+ end
489
+
490
+ @meta_config_map.each do |record_accessor, meta_key|
491
+ if raw_value = record_accessor.call(record)
492
+ meta[meta_key] = raw_value
493
+ end
494
+ end
495
+
496
+ if @remove_keys
497
+ @remove_keys.each { |key| record.delete(key) }
498
+ end
499
+
500
+ append_record_to_messages(@write_operation, meta, header, record, bulk_message)
501
+ end
502
+
503
+ # returns [parent, child_key] of child described by path array in record's tree
504
+ # returns [nil, child_key] if path doesnt exist in record
505
+ def get_parent_of(record, path)
506
+ parent_object = path[0..-2].reduce(record) { |a, e| a.is_a?(Hash) ? a[e] : nil }
507
+ [parent_object, path[-1]]
508
+ end
509
+
510
+ # send_bulk given a specific bulk request, the original tag,
511
+ # chunk, and bulk_message_count
512
+ def send_bulk(data, tag, chunk, bulk_message_count, extracted_values)
513
+ retries = 0
514
+ begin
515
+ response = client.bulk body: data
516
+ if response['errors']
517
+ error = Fluent::Plugin::ElasticsearchErrorHandler.new(self)
518
+ error.handle_error(response, tag, chunk, bulk_message_count, extracted_values)
519
+ end
520
+ rescue RetryStreamError => e
521
+ emit_tag = @retry_tag ? @retry_tag : tag
522
+ router.emit_stream(emit_tag, e.retry_stream)
523
+ rescue *client.transport.host_unreachable_exceptions => e
524
+ if retries < 2
525
+ retries += 1
526
+ @_es = nil
527
+ @_es_info = nil
528
+ log.warn "Could not push logs to Elasticsearch, resetting connection and trying again. #{e.message}"
529
+ sleep 2**retries
530
+ retry
531
+ end
532
+ raise ConnectionFailure, "Could not push logs to Elasticsearch after #{retries} retries. #{e.message}"
533
+ rescue Exception
534
+ @_es = nil if @reconnect_on_error
535
+ @_es_info = nil if @reconnect_on_error
536
+ raise
537
+ end
538
+ end
539
+ end
540
+ end