fluent-plugin-elasticsearch 2.10.2 → 2.10.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,25 +1,25 @@
1
- require 'securerandom'
2
- require 'base64'
3
- require 'fluent/plugin/filter'
4
-
5
- module Fluent::Plugin
6
- class ElasticsearchGenidFilter < Filter
7
- Fluent::Plugin.register_filter('elasticsearch_genid', self)
8
-
9
- config_param :hash_id_key, :string, :default => '_hash'
10
-
11
- def initialize
12
- super
13
- end
14
-
15
- def configure(conf)
16
- super
17
- end
18
-
19
- def filter(tag, time, record)
20
- record[@hash_id_key] = Base64.strict_encode64(SecureRandom.uuid)
21
- record
22
- end
23
-
24
- end
25
- end
1
+ require 'securerandom'
2
+ require 'base64'
3
+ require 'fluent/plugin/filter'
4
+
5
+ module Fluent::Plugin
6
+ class ElasticsearchGenidFilter < Filter
7
+ Fluent::Plugin.register_filter('elasticsearch_genid', self)
8
+
9
+ config_param :hash_id_key, :string, :default => '_hash'
10
+
11
+ def initialize
12
+ super
13
+ end
14
+
15
+ def configure(conf)
16
+ super
17
+ end
18
+
19
+ def filter(tag, time, record)
20
+ record[@hash_id_key] = Base64.strict_encode64(SecureRandom.uuid)
21
+ record
22
+ end
23
+
24
+ end
25
+ end
@@ -1,533 +1,538 @@
1
- # encoding: UTF-8
2
- require 'date'
3
- require 'excon'
4
- require 'elasticsearch'
5
- require 'json'
6
- require 'uri'
7
- begin
8
- require 'strptime'
9
- rescue LoadError
10
- end
11
-
12
- require 'fluent/plugin/output'
13
- require 'fluent/event'
14
- require_relative 'elasticsearch_constants'
15
- require_relative 'elasticsearch_error_handler'
16
- require_relative 'elasticsearch_index_template'
17
-
18
- module Fluent::Plugin
19
- class ElasticsearchOutput < Output
20
- class ConnectionFailure < StandardError; end
21
-
22
- # RetryStreamError privides a stream to be
23
- # put back in the pipeline for cases where a bulk request
24
- # failed (e.g some records succeed while others failed)
25
- class RetryStreamError < StandardError
26
- attr_reader :retry_stream
27
- def initialize(retry_stream)
28
- @retry_stream = retry_stream
29
- end
30
- end
31
-
32
- helpers :event_emitter, :compat_parameters, :record_accessor
33
-
34
- Fluent::Plugin.register_output('elasticsearch', self)
35
-
36
- DEFAULT_BUFFER_TYPE = "memory"
37
- DEFAULT_ELASTICSEARCH_VERSION = 5 # For compatibility.
38
- DEFAULT_TYPE_NAME_ES_7x = "_doc".freeze
39
- DEFAULT_TYPE_NAME = "fluentd".freeze
40
-
41
- config_param :host, :string, :default => 'localhost'
42
- config_param :port, :integer, :default => 9200
43
- config_param :user, :string, :default => nil
44
- config_param :password, :string, :default => nil, :secret => true
45
- config_param :path, :string, :default => nil
46
- config_param :scheme, :string, :default => 'http'
47
- config_param :hosts, :string, :default => nil
48
- config_param :target_index_key, :string, :default => nil
49
- config_param :target_type_key, :string, :default => nil,
50
- :deprecated => <<EOC
51
- Elasticsearch 7.x or above will ignore this config. Please use fixed type_name instead.
52
- EOC
53
- config_param :time_key_format, :string, :default => nil
54
- config_param :time_precision, :integer, :default => 9
55
- config_param :include_timestamp, :bool, :default => false
56
- config_param :logstash_format, :bool, :default => false
57
- config_param :logstash_prefix, :string, :default => "logstash"
58
- config_param :logstash_prefix_separator, :string, :default => '-'
59
- config_param :logstash_dateformat, :string, :default => "%Y.%m.%d"
60
- config_param :utc_index, :bool, :default => true
61
- config_param :type_name, :string, :default => DEFAULT_TYPE_NAME
62
- config_param :index_name, :string, :default => "fluentd"
63
- config_param :id_key, :string, :default => nil
64
- config_param :write_operation, :string, :default => "index"
65
- config_param :parent_key, :string, :default => nil
66
- config_param :routing_key, :string, :default => nil
67
- config_param :request_timeout, :time, :default => 5
68
- config_param :reload_connections, :bool, :default => true
69
- config_param :reload_on_failure, :bool, :default => false
70
- config_param :retry_tag, :string, :default=>nil
71
- config_param :resurrect_after, :time, :default => 60
72
- config_param :time_key, :string, :default => nil
73
- config_param :time_key_exclude_timestamp, :bool, :default => false
74
- config_param :ssl_verify , :bool, :default => true
75
- config_param :client_key, :string, :default => nil
76
- config_param :client_cert, :string, :default => nil
77
- config_param :client_key_pass, :string, :default => nil
78
- config_param :ca_file, :string, :default => nil
79
- config_param :ssl_version, :enum, list: [:SSLv23, :TLSv1, :TLSv1_1, :TLSv1_2], :default => :TLSv1
80
- config_param :remove_keys, :string, :default => nil
81
- config_param :remove_keys_on_update, :string, :default => ""
82
- config_param :remove_keys_on_update_key, :string, :default => nil
83
- config_param :flatten_hashes, :bool, :default => false
84
- config_param :flatten_hashes_separator, :string, :default => "_"
85
- config_param :template_name, :string, :default => nil
86
- config_param :template_file, :string, :default => nil
87
- config_param :template_overwrite, :bool, :default => false
88
- config_param :templates, :hash, :default => nil
89
- config_param :include_tag_key, :bool, :default => false
90
- config_param :tag_key, :string, :default => 'tag'
91
- config_param :time_parse_error_tag, :string, :default => 'Fluent::ElasticsearchOutput::TimeParser.error'
92
- config_param :reconnect_on_error, :bool, :default => false
93
- config_param :pipeline, :string, :default => nil
94
- config_param :with_transporter_log, :bool, :default => false
95
- config_param :content_type, :enum, list: [:"application/json", :"application/x-ndjson"], :default => :"application/json",
96
- :deprecated => <<EOC
97
- elasticsearch gem v6.0.2 starts to use correct Content-Type. Please upgrade elasticserach gem and stop to use this option.
98
- see: https://github.com/elastic/elasticsearch-ruby/pull/514
99
- EOC
100
-
101
- config_section :buffer do
102
- config_set_default :@type, DEFAULT_BUFFER_TYPE
103
- config_set_default :chunk_keys, ['tag']
104
- config_set_default :timekey_use_utc, true
105
- end
106
-
107
- include Fluent::ElasticsearchIndexTemplate
108
- include Fluent::Plugin::ElasticsearchConstants
109
-
110
- def initialize
111
- super
112
- end
113
-
114
- def configure(conf)
115
- compat_parameters_convert(conf, :buffer)
116
-
117
- super
118
- raise Fluent::ConfigError, "'tag' in chunk_keys is required." if not @chunk_key_tag
119
-
120
- @time_parser = create_time_parser
121
-
122
- if @remove_keys
123
- @remove_keys = @remove_keys.split(/\s*,\s*/)
124
- end
125
-
126
- if @target_index_key && @target_index_key.is_a?(String)
127
- @target_index_key = @target_index_key.split '.'
128
- end
129
-
130
- if @target_type_key && @target_type_key.is_a?(String)
131
- @target_type_key = @target_type_key.split '.'
132
- end
133
-
134
- if @remove_keys_on_update && @remove_keys_on_update.is_a?(String)
135
- @remove_keys_on_update = @remove_keys_on_update.split ','
136
- end
137
-
138
- if @template_name && @template_file
139
- template_install(@template_name, @template_file, @template_overwrite)
140
- elsif @templates
141
- templates_hash_install(@templates, @template_overwrite)
142
- end
143
-
144
- # Consider missing the prefix of "$." in nested key specifiers.
145
- @id_key = convert_compat_id_key(@id_key) if @id_key
146
- @parent_key = convert_compat_id_key(@parent_key) if @parent_key
147
- @routing_key = convert_compat_id_key(@routing_key) if @routing_key
148
-
149
- @meta_config_map = create_meta_config_map
150
-
151
- begin
152
- require 'oj'
153
- @dump_proc = Oj.method(:dump)
154
- rescue LoadError
155
- @dump_proc = Yajl.method(:dump)
156
- end
157
-
158
- if @user && m = @user.match(/%{(?<user>.*)}/)
159
- @user = URI.encode_www_form_component(m["user"])
160
- end
161
- if @password && m = @password.match(/%{(?<password>.*)}/)
162
- @password = URI.encode_www_form_component(m["password"])
163
- end
164
-
165
- if @hash_config
166
- raise Fluent::ConfigError, "@hash_config.hash_id_key and id_key must be equal." unless @hash_config.hash_id_key == @id_key
167
- end
168
- @transport_logger = nil
169
- if @with_transporter_log
170
- @transport_logger = log
171
- log_level = conf['@log_level'] || conf['log_level']
172
- log.warn "Consider to specify log_level with @log_level." unless log_level
173
- end
174
-
175
- @last_seen_major_version = detect_es_major_version rescue DEFAULT_ELASTICSEARCH_VERSION
176
- if @last_seen_major_version == 6 && @type_name != DEFAULT_TYPE_NAME_ES_7x
177
- log.info "Detected ES 6.x: ES 7.x will only accept `_doc` in type_name."
178
- end
179
- if @last_seen_major_version >= 7 && @type_name != DEFAULT_TYPE_NAME_ES_7x
180
- log.warn "Detected ES 7.x or above: `_doc` will be used as the document `_type`."
181
- @type_name = '_doc'.freeze
182
- end
183
- end
184
-
185
- def detect_es_major_version
186
- @_es_info ||= client.info
187
- @_es_info["version"]["number"].to_i
188
- end
189
-
190
- def convert_compat_id_key(key)
191
- if key.include?('.') && !key.start_with?('$[')
192
- key = "$.#{key}" unless key.start_with?('$.')
193
- end
194
- key
195
- end
196
-
197
- def create_meta_config_map
198
- result = []
199
- result << [record_accessor_create(@id_key), '_id'] if @id_key
200
- result << [record_accessor_create(@parent_key), '_parent'] if @parent_key
201
- result << [record_accessor_create(@routing_key), '_routing'] if @routing_key
202
- result
203
- end
204
-
205
- # once fluent v0.14 is released we might be able to use
206
- # Fluent::Parser::TimeParser, but it doesn't quite do what we want - if gives
207
- # [sec,nsec] where as we want something we can call `strftime` on...
208
- def create_time_parser
209
- if @time_key_format
210
- begin
211
- # Strptime doesn't support all formats, but for those it does it's
212
- # blazingly fast.
213
- strptime = Strptime.new(@time_key_format)
214
- Proc.new { |value| strptime.exec(value).to_datetime }
215
- rescue
216
- # Can happen if Strptime doesn't recognize the format; or
217
- # if strptime couldn't be required (because it's not installed -- it's
218
- # ruby 2 only)
219
- Proc.new { |value| DateTime.strptime(value, @time_key_format) }
220
- end
221
- else
222
- Proc.new { |value| DateTime.parse(value) }
223
- end
224
- end
225
-
226
- def parse_time(value, event_time, tag)
227
- @time_parser.call(value)
228
- rescue => e
229
- router.emit_error_event(@time_parse_error_tag, Fluent::Engine.now, {'tag' => tag, 'time' => event_time, 'format' => @time_key_format, 'value' => value}, e)
230
- return Time.at(event_time).to_datetime
231
- end
232
-
233
- def client
234
- @_es ||= begin
235
- excon_options = { client_key: @client_key, client_cert: @client_cert, client_key_pass: @client_key_pass }
236
- adapter_conf = lambda {|f| f.adapter :excon, excon_options }
237
- transport = Elasticsearch::Transport::Transport::HTTP::Faraday.new(get_connection_options.merge(
238
- options: {
239
- reload_connections: @reload_connections,
240
- reload_on_failure: @reload_on_failure,
241
- resurrect_after: @resurrect_after,
242
- retry_on_failure: 5,
243
- logger: @transport_logger,
244
- transport_options: {
245
- headers: { 'Content-Type' => @content_type.to_s },
246
- request: { timeout: @request_timeout },
247
- ssl: { verify: @ssl_verify, ca_file: @ca_file, version: @ssl_version }
248
- },
249
- http: {
250
- user: @user,
251
- password: @password
252
- }
253
- }), &adapter_conf)
254
- es = Elasticsearch::Client.new transport: transport
255
-
256
- begin
257
- raise ConnectionFailure, "Can not reach Elasticsearch cluster (#{connection_options_description})!" unless es.ping
258
- rescue *es.transport.host_unreachable_exceptions => e
259
- raise ConnectionFailure, "Can not reach Elasticsearch cluster (#{connection_options_description})! #{e.message}"
260
- end
261
-
262
- log.info "Connection opened to Elasticsearch cluster => #{connection_options_description}"
263
- es
264
- end
265
- end
266
-
267
- def get_escaped_userinfo(host_str)
268
- if m = host_str.match(/(?<scheme>.*)%{(?<user>.*)}:%{(?<password>.*)}(?<path>@.*)/)
269
- m["scheme"] +
270
- URI.encode_www_form_component(m["user"]) +
271
- ':' +
272
- URI.encode_www_form_component(m["password"]) +
273
- m["path"]
274
- else
275
- host_str
276
- end
277
- end
278
-
279
- def get_connection_options
280
- raise "`password` must be present if `user` is present" if @user && !@password
281
-
282
- hosts = if @hosts
283
- @hosts.split(',').map do |host_str|
284
- # Support legacy hosts format host:port,host:port,host:port...
285
- if host_str.match(%r{^[^:]+(\:\d+)?$})
286
- {
287
- host: host_str.split(':')[0],
288
- port: (host_str.split(':')[1] || @port).to_i,
289
- scheme: @scheme
290
- }
291
- else
292
- # New hosts format expects URLs such as http://logs.foo.com,https://john:pass@logs2.foo.com/elastic
293
- uri = URI(get_escaped_userinfo(host_str))
294
- %w(user password path).inject(host: uri.host, port: uri.port, scheme: uri.scheme) do |hash, key|
295
- hash[key.to_sym] = uri.public_send(key) unless uri.public_send(key).nil? || uri.public_send(key) == ''
296
- hash
297
- end
298
- end
299
- end.compact
300
- else
301
- [{host: @host, port: @port, scheme: @scheme}]
302
- end.each do |host|
303
- host.merge!(user: @user, password: @password) if !host[:user] && @user
304
- host.merge!(path: @path) if !host[:path] && @path
305
- end
306
-
307
- {
308
- hosts: hosts
309
- }
310
- end
311
-
312
- def connection_options_description
313
- get_connection_options[:hosts].map do |host_info|
314
- attributes = host_info.dup
315
- attributes[:password] = 'obfuscated' if attributes.has_key?(:password)
316
- attributes.inspect
317
- end.join(', ')
318
- end
319
-
320
- def append_record_to_messages(op, meta, header, record, msgs)
321
- case op
322
- when UPDATE_OP, UPSERT_OP
323
- if meta.has_key?(ID_FIELD)
324
- header[UPDATE_OP] = meta
325
- msgs << @dump_proc.call(header) << BODY_DELIMITER
326
- msgs << @dump_proc.call(update_body(record, op)) << BODY_DELIMITER
327
- end
328
- when CREATE_OP
329
- if meta.has_key?(ID_FIELD)
330
- header[CREATE_OP] = meta
331
- msgs << @dump_proc.call(header) << BODY_DELIMITER
332
- msgs << @dump_proc.call(record) << BODY_DELIMITER
333
- end
334
- when INDEX_OP
335
- header[INDEX_OP] = meta
336
- msgs << @dump_proc.call(header) << BODY_DELIMITER
337
- msgs << @dump_proc.call(record) << BODY_DELIMITER
338
- end
339
- end
340
-
341
- def update_body(record, op)
342
- update = remove_keys(record)
343
- body = {"doc".freeze => update}
344
- if op == UPSERT_OP
345
- if update == record
346
- body["doc_as_upsert".freeze] = true
347
- else
348
- body[UPSERT_OP] = record
349
- end
350
- end
351
- body
352
- end
353
-
354
- def remove_keys(record)
355
- keys = record[@remove_keys_on_update_key] || @remove_keys_on_update || []
356
- record.delete(@remove_keys_on_update_key)
357
- return record unless keys.any?
358
- record = record.dup
359
- keys.each { |key| record.delete(key) }
360
- record
361
- end
362
-
363
- def flatten_record(record, prefix=[])
364
- ret = {}
365
- if record.is_a? Hash
366
- record.each { |key, value|
367
- ret.merge! flatten_record(value, prefix + [key.to_s])
368
- }
369
- elsif record.is_a? Array
370
- # Don't mess with arrays, leave them unprocessed
371
- ret.merge!({prefix.join(@flatten_hashes_separator) => record})
372
- else
373
- return {prefix.join(@flatten_hashes_separator) => record}
374
- end
375
- ret
376
- end
377
-
378
- def expand_placeholders(metadata)
379
- logstash_prefix = extract_placeholders(@logstash_prefix, metadata)
380
- index_name = extract_placeholders(@index_name, metadata)
381
- type_name = extract_placeholders(@type_name, metadata)
382
- return logstash_prefix, index_name, type_name
383
- end
384
-
385
- def multi_workers_ready?
386
- true
387
- end
388
-
389
- def write(chunk)
390
- bulk_message_count = 0
391
- bulk_message = ''
392
- header = {}
393
- meta = {}
394
-
395
- tag = chunk.metadata.tag
396
- extracted_values = expand_placeholders(chunk.metadata)
397
- @last_seen_major_version = detect_es_major_version rescue DEFAULT_ELASTICSEARCH_VERSION
398
-
399
- chunk.msgpack_each do |time, record|
400
- next unless record.is_a? Hash
401
- begin
402
- process_message(tag, meta, header, time, record, bulk_message, extracted_values)
403
- bulk_message_count += 1
404
- rescue => e
405
- router.emit_error_event(tag, time, record, e)
406
- end
407
- end
408
-
409
- send_bulk(bulk_message, tag, chunk, bulk_message_count, extracted_values) unless bulk_message.empty?
410
- bulk_message.clear
411
- end
412
-
413
- def process_message(tag, meta, header, time, record, bulk_message, extracted_values)
414
- logstash_prefix, index_name, type_name = extracted_values
415
-
416
- if @flatten_hashes
417
- record = flatten_record(record)
418
- end
419
-
420
- if @hash_config
421
- record = generate_hash_id_key(record)
422
- end
423
-
424
- dt = nil
425
- if @logstash_format || @include_timestamp
426
- if record.has_key?(TIMESTAMP_FIELD)
427
- rts = record[TIMESTAMP_FIELD]
428
- dt = parse_time(rts, time, tag)
429
- elsif record.has_key?(@time_key)
430
- rts = record[@time_key]
431
- dt = parse_time(rts, time, tag)
432
- record[TIMESTAMP_FIELD] = dt.iso8601(@time_precision) unless @time_key_exclude_timestamp
433
- else
434
- dt = Time.at(time).to_datetime
435
- record[TIMESTAMP_FIELD] = dt.iso8601(@time_precision)
436
- end
437
- end
438
-
439
- target_index_parent, target_index_child_key = @target_index_key ? get_parent_of(record, @target_index_key) : nil
440
- if target_index_parent && target_index_parent[target_index_child_key]
441
- target_index = target_index_parent.delete(target_index_child_key)
442
- elsif @logstash_format
443
- dt = dt.new_offset(0) if @utc_index
444
- target_index = "#{logstash_prefix}#{@logstash_prefix_separator}#{dt.strftime(@logstash_dateformat)}"
445
- else
446
- target_index = index_name
447
- end
448
-
449
- # Change target_index to lower-case since Elasticsearch doesn't
450
- # allow upper-case characters in index names.
451
- target_index = target_index.downcase
452
- if @include_tag_key
453
- record[@tag_key] = tag
454
- end
455
-
456
- target_type_parent, target_type_child_key = @target_type_key ? get_parent_of(record, @target_type_key) : nil
457
- if target_type_parent && target_type_parent[target_type_child_key]
458
- target_type = target_type_parent.delete(target_type_child_key)
459
- if @last_seen_major_version == 6
460
- log.warn "Detected ES 6.x: `@type_name` will be used as the document `_type`."
461
- target_type = type_name
462
- elsif @last_seen_major_version >= 7
463
- log.warn "Detected ES 7.x or above: `_doc` will be used as the document `_type`."
464
- target_type = '_doc'.freeze
465
- end
466
- else
467
- if @last_seen_major_version >= 7 && target_type != DEFAULT_TYPE_NAME_ES_7x
468
- log.warn "Detected ES 7.x or above: `_doc` will be used as the document `_type`."
469
- target_type = '_doc'.freeze
470
- else
471
- target_type = type_name
472
- end
473
- end
474
-
475
- meta.clear
476
- meta["_index".freeze] = target_index
477
- meta["_type".freeze] = target_type
478
-
479
- if @pipeline
480
- meta["pipeline".freeze] = @pipeline
481
- end
482
-
483
- @meta_config_map.each do |record_accessor, meta_key|
484
- if raw_value = record_accessor.call(record)
485
- meta[meta_key] = raw_value
486
- end
487
- end
488
-
489
- if @remove_keys
490
- @remove_keys.each { |key| record.delete(key) }
491
- end
492
-
493
- append_record_to_messages(@write_operation, meta, header, record, bulk_message)
494
- end
495
-
496
- # returns [parent, child_key] of child described by path array in record's tree
497
- # returns [nil, child_key] if path doesnt exist in record
498
- def get_parent_of(record, path)
499
- parent_object = path[0..-2].reduce(record) { |a, e| a.is_a?(Hash) ? a[e] : nil }
500
- [parent_object, path[-1]]
501
- end
502
-
503
- # send_bulk given a specific bulk request, the original tag,
504
- # chunk, and bulk_message_count
505
- def send_bulk(data, tag, chunk, bulk_message_count, extracted_values)
506
- retries = 0
507
- begin
508
- response = client.bulk body: data
509
- if response['errors']
510
- error = Fluent::Plugin::ElasticsearchErrorHandler.new(self)
511
- error.handle_error(response, tag, chunk, bulk_message_count, extracted_values)
512
- end
513
- rescue RetryStreamError => e
514
- emit_tag = @retry_tag ? @retry_tag : tag
515
- router.emit_stream(emit_tag, e.retry_stream)
516
- rescue *client.transport.host_unreachable_exceptions => e
517
- if retries < 2
518
- retries += 1
519
- @_es = nil
520
- @_es_info = nil
521
- log.warn "Could not push logs to Elasticsearch, resetting connection and trying again. #{e.message}"
522
- sleep 2**retries
523
- retry
524
- end
525
- raise ConnectionFailure, "Could not push logs to Elasticsearch after #{retries} retries. #{e.message}"
526
- rescue Exception
527
- @_es = nil if @reconnect_on_error
528
- @_es_info = nil if @reconnect_on_error
529
- raise
530
- end
531
- end
532
- end
533
- end
1
+ # encoding: UTF-8
2
+ require 'date'
3
+ require 'excon'
4
+ require 'elasticsearch'
5
+ require 'json'
6
+ require 'uri'
7
+ begin
8
+ require 'strptime'
9
+ rescue LoadError
10
+ end
11
+
12
+ require 'fluent/plugin/output'
13
+ require 'fluent/event'
14
+ require_relative 'elasticsearch_constants'
15
+ require_relative 'elasticsearch_error_handler'
16
+ require_relative 'elasticsearch_index_template'
17
+
18
+ module Fluent::Plugin
19
+ class ElasticsearchOutput < Output
20
+ class ConnectionFailure < StandardError; end
21
+
22
+ # RetryStreamError privides a stream to be
23
+ # put back in the pipeline for cases where a bulk request
24
+ # failed (e.g some records succeed while others failed)
25
+ class RetryStreamError < StandardError
26
+ attr_reader :retry_stream
27
+ def initialize(retry_stream)
28
+ @retry_stream = retry_stream
29
+ end
30
+ end
31
+
32
+ helpers :event_emitter, :compat_parameters, :record_accessor
33
+
34
+ Fluent::Plugin.register_output('elasticsearch', self)
35
+
36
+ DEFAULT_BUFFER_TYPE = "memory"
37
+ DEFAULT_ELASTICSEARCH_VERSION = 5 # For compatibility.
38
+ DEFAULT_TYPE_NAME_ES_7x = "_doc".freeze
39
+ DEFAULT_TYPE_NAME = "fluentd".freeze
40
+
41
+ config_param :host, :string, :default => 'localhost'
42
+ config_param :port, :integer, :default => 9200
43
+ config_param :user, :string, :default => nil
44
+ config_param :password, :string, :default => nil, :secret => true
45
+ config_param :path, :string, :default => nil
46
+ config_param :scheme, :string, :default => 'http'
47
+ config_param :hosts, :string, :default => nil
48
+ config_param :target_index_key, :string, :default => nil
49
+ config_param :target_type_key, :string, :default => nil,
50
+ :deprecated => <<EOC
51
+ Elasticsearch 7.x or above will ignore this config. Please use fixed type_name instead.
52
+ EOC
53
+ config_param :time_key_format, :string, :default => nil
54
+ config_param :time_precision, :integer, :default => 9
55
+ config_param :include_timestamp, :bool, :default => false
56
+ config_param :logstash_format, :bool, :default => false
57
+ config_param :logstash_prefix, :string, :default => "logstash"
58
+ config_param :logstash_prefix_separator, :string, :default => '-'
59
+ config_param :logstash_dateformat, :string, :default => "%Y.%m.%d"
60
+ config_param :utc_index, :bool, :default => true
61
+ config_param :type_name, :string, :default => DEFAULT_TYPE_NAME
62
+ config_param :index_name, :string, :default => "fluentd"
63
+ config_param :id_key, :string, :default => nil
64
+ config_param :write_operation, :string, :default => "index"
65
+ config_param :parent_key, :string, :default => nil
66
+ config_param :routing_key, :string, :default => nil
67
+ config_param :request_timeout, :time, :default => 5
68
+ config_param :reload_connections, :bool, :default => true
69
+ config_param :reload_on_failure, :bool, :default => false
70
+ config_param :retry_tag, :string, :default=>nil
71
+ config_param :resurrect_after, :time, :default => 60
72
+ config_param :time_key, :string, :default => nil
73
+ config_param :time_key_exclude_timestamp, :bool, :default => false
74
+ config_param :ssl_verify , :bool, :default => true
75
+ config_param :client_key, :string, :default => nil
76
+ config_param :client_cert, :string, :default => nil
77
+ config_param :client_key_pass, :string, :default => nil
78
+ config_param :ca_file, :string, :default => nil
79
+ config_param :ssl_version, :enum, list: [:SSLv23, :TLSv1, :TLSv1_1, :TLSv1_2], :default => :TLSv1
80
+ config_param :remove_keys, :string, :default => nil
81
+ config_param :remove_keys_on_update, :string, :default => ""
82
+ config_param :remove_keys_on_update_key, :string, :default => nil
83
+ config_param :flatten_hashes, :bool, :default => false
84
+ config_param :flatten_hashes_separator, :string, :default => "_"
85
+ config_param :template_name, :string, :default => nil
86
+ config_param :template_file, :string, :default => nil
87
+ config_param :template_overwrite, :bool, :default => false
88
+ config_param :templates, :hash, :default => nil
89
+ config_param :max_retry_putting_template, :integer, :default => 10
90
+ config_param :include_tag_key, :bool, :default => false
91
+ config_param :tag_key, :string, :default => 'tag'
92
+ config_param :time_parse_error_tag, :string, :default => 'Fluent::ElasticsearchOutput::TimeParser.error'
93
+ config_param :reconnect_on_error, :bool, :default => false
94
+ config_param :pipeline, :string, :default => nil
95
+ config_param :with_transporter_log, :bool, :default => false
96
+ config_param :content_type, :enum, list: [:"application/json", :"application/x-ndjson"], :default => :"application/json",
97
+ :deprecated => <<EOC
98
+ elasticsearch gem v6.0.2 starts to use correct Content-Type. Please upgrade elasticserach gem and stop to use this option.
99
+ see: https://github.com/elastic/elasticsearch-ruby/pull/514
100
+ EOC
101
+
102
+ config_section :buffer do
103
+ config_set_default :@type, DEFAULT_BUFFER_TYPE
104
+ config_set_default :chunk_keys, ['tag']
105
+ config_set_default :timekey_use_utc, true
106
+ end
107
+
108
+ include Fluent::ElasticsearchIndexTemplate
109
+ include Fluent::Plugin::ElasticsearchConstants
110
+
111
+ def initialize
112
+ super
113
+ end
114
+
115
+ def configure(conf)
116
+ compat_parameters_convert(conf, :buffer)
117
+
118
+ super
119
+ raise Fluent::ConfigError, "'tag' in chunk_keys is required." if not @chunk_key_tag
120
+
121
+ @time_parser = create_time_parser
122
+
123
+ if @remove_keys
124
+ @remove_keys = @remove_keys.split(/\s*,\s*/)
125
+ end
126
+
127
+ if @target_index_key && @target_index_key.is_a?(String)
128
+ @target_index_key = @target_index_key.split '.'
129
+ end
130
+
131
+ if @target_type_key && @target_type_key.is_a?(String)
132
+ @target_type_key = @target_type_key.split '.'
133
+ end
134
+
135
+ if @remove_keys_on_update && @remove_keys_on_update.is_a?(String)
136
+ @remove_keys_on_update = @remove_keys_on_update.split ','
137
+ end
138
+
139
+ if @template_name && @template_file
140
+ retry_install(@max_retry_putting_template) do
141
+ template_install(@template_name, @template_file, @template_overwrite)
142
+ end
143
+ elsif @templates
144
+ retry_install(@max_retry_putting_template) do
145
+ templates_hash_install(@templates, @template_overwrite)
146
+ end
147
+ end
148
+
149
+ # Consider missing the prefix of "$." in nested key specifiers.
150
+ @id_key = convert_compat_id_key(@id_key) if @id_key
151
+ @parent_key = convert_compat_id_key(@parent_key) if @parent_key
152
+ @routing_key = convert_compat_id_key(@routing_key) if @routing_key
153
+
154
+ @meta_config_map = create_meta_config_map
155
+
156
+ begin
157
+ require 'oj'
158
+ @dump_proc = Oj.method(:dump)
159
+ rescue LoadError
160
+ @dump_proc = Yajl.method(:dump)
161
+ end
162
+
163
+ if @user && m = @user.match(/%{(?<user>.*)}/)
164
+ @user = URI.encode_www_form_component(m["user"])
165
+ end
166
+ if @password && m = @password.match(/%{(?<password>.*)}/)
167
+ @password = URI.encode_www_form_component(m["password"])
168
+ end
169
+
170
+ if @hash_config
171
+ raise Fluent::ConfigError, "@hash_config.hash_id_key and id_key must be equal." unless @hash_config.hash_id_key == @id_key
172
+ end
173
+ @transport_logger = nil
174
+ if @with_transporter_log
175
+ @transport_logger = log
176
+ log_level = conf['@log_level'] || conf['log_level']
177
+ log.warn "Consider to specify log_level with @log_level." unless log_level
178
+ end
179
+
180
+ @last_seen_major_version = detect_es_major_version rescue DEFAULT_ELASTICSEARCH_VERSION
181
+ if @last_seen_major_version == 6 && @type_name != DEFAULT_TYPE_NAME_ES_7x
182
+ log.info "Detected ES 6.x: ES 7.x will only accept `_doc` in type_name."
183
+ end
184
+ if @last_seen_major_version >= 7 && @type_name != DEFAULT_TYPE_NAME_ES_7x
185
+ log.warn "Detected ES 7.x or above: `_doc` will be used as the document `_type`."
186
+ @type_name = '_doc'.freeze
187
+ end
188
+ end
189
+
190
+ def detect_es_major_version
191
+ @_es_info ||= client.info
192
+ @_es_info["version"]["number"].to_i
193
+ end
194
+
195
+ def convert_compat_id_key(key)
196
+ if key.include?('.') && !key.start_with?('$[')
197
+ key = "$.#{key}" unless key.start_with?('$.')
198
+ end
199
+ key
200
+ end
201
+
202
+ def create_meta_config_map
203
+ result = []
204
+ result << [record_accessor_create(@id_key), '_id'] if @id_key
205
+ result << [record_accessor_create(@parent_key), '_parent'] if @parent_key
206
+ result << [record_accessor_create(@routing_key), '_routing'] if @routing_key
207
+ result
208
+ end
209
+
210
+ # once fluent v0.14 is released we might be able to use
211
+ # Fluent::Parser::TimeParser, but it doesn't quite do what we want - if gives
212
+ # [sec,nsec] where as we want something we can call `strftime` on...
213
+ def create_time_parser
214
+ if @time_key_format
215
+ begin
216
+ # Strptime doesn't support all formats, but for those it does it's
217
+ # blazingly fast.
218
+ strptime = Strptime.new(@time_key_format)
219
+ Proc.new { |value| strptime.exec(value).to_datetime }
220
+ rescue
221
+ # Can happen if Strptime doesn't recognize the format; or
222
+ # if strptime couldn't be required (because it's not installed -- it's
223
+ # ruby 2 only)
224
+ Proc.new { |value| DateTime.strptime(value, @time_key_format) }
225
+ end
226
+ else
227
+ Proc.new { |value| DateTime.parse(value) }
228
+ end
229
+ end
230
+
231
+ def parse_time(value, event_time, tag)
232
+ @time_parser.call(value)
233
+ rescue => e
234
+ router.emit_error_event(@time_parse_error_tag, Fluent::Engine.now, {'tag' => tag, 'time' => event_time, 'format' => @time_key_format, 'value' => value}, e)
235
+ return Time.at(event_time).to_datetime
236
+ end
237
+
238
+ def client
239
+ @_es ||= begin
240
+ excon_options = { client_key: @client_key, client_cert: @client_cert, client_key_pass: @client_key_pass }
241
+ adapter_conf = lambda {|f| f.adapter :excon, excon_options }
242
+ transport = Elasticsearch::Transport::Transport::HTTP::Faraday.new(get_connection_options.merge(
243
+ options: {
244
+ reload_connections: @reload_connections,
245
+ reload_on_failure: @reload_on_failure,
246
+ resurrect_after: @resurrect_after,
247
+ retry_on_failure: 5,
248
+ logger: @transport_logger,
249
+ transport_options: {
250
+ headers: { 'Content-Type' => @content_type.to_s },
251
+ request: { timeout: @request_timeout },
252
+ ssl: { verify: @ssl_verify, ca_file: @ca_file, version: @ssl_version }
253
+ },
254
+ http: {
255
+ user: @user,
256
+ password: @password
257
+ }
258
+ }), &adapter_conf)
259
+ es = Elasticsearch::Client.new transport: transport
260
+
261
+ begin
262
+ raise ConnectionFailure, "Can not reach Elasticsearch cluster (#{connection_options_description})!" unless es.ping
263
+ rescue *es.transport.host_unreachable_exceptions => e
264
+ raise ConnectionFailure, "Can not reach Elasticsearch cluster (#{connection_options_description})! #{e.message}"
265
+ end
266
+
267
+ log.info "Connection opened to Elasticsearch cluster => #{connection_options_description}"
268
+ es
269
+ end
270
+ end
271
+
272
+ def get_escaped_userinfo(host_str)
273
+ if m = host_str.match(/(?<scheme>.*)%{(?<user>.*)}:%{(?<password>.*)}(?<path>@.*)/)
274
+ m["scheme"] +
275
+ URI.encode_www_form_component(m["user"]) +
276
+ ':' +
277
+ URI.encode_www_form_component(m["password"]) +
278
+ m["path"]
279
+ else
280
+ host_str
281
+ end
282
+ end
283
+
284
+ def get_connection_options
285
+ raise "`password` must be present if `user` is present" if @user && !@password
286
+
287
+ hosts = if @hosts
288
+ @hosts.split(',').map do |host_str|
289
+ # Support legacy hosts format host:port,host:port,host:port...
290
+ if host_str.match(%r{^[^:]+(\:\d+)?$})
291
+ {
292
+ host: host_str.split(':')[0],
293
+ port: (host_str.split(':')[1] || @port).to_i,
294
+ scheme: @scheme
295
+ }
296
+ else
297
+ # New hosts format expects URLs such as http://logs.foo.com,https://john:pass@logs2.foo.com/elastic
298
+ uri = URI(get_escaped_userinfo(host_str))
299
+ %w(user password path).inject(host: uri.host, port: uri.port, scheme: uri.scheme) do |hash, key|
300
+ hash[key.to_sym] = uri.public_send(key) unless uri.public_send(key).nil? || uri.public_send(key) == ''
301
+ hash
302
+ end
303
+ end
304
+ end.compact
305
+ else
306
+ [{host: @host, port: @port, scheme: @scheme}]
307
+ end.each do |host|
308
+ host.merge!(user: @user, password: @password) if !host[:user] && @user
309
+ host.merge!(path: @path) if !host[:path] && @path
310
+ end
311
+
312
+ {
313
+ hosts: hosts
314
+ }
315
+ end
316
+
317
+ def connection_options_description
318
+ get_connection_options[:hosts].map do |host_info|
319
+ attributes = host_info.dup
320
+ attributes[:password] = 'obfuscated' if attributes.has_key?(:password)
321
+ attributes.inspect
322
+ end.join(', ')
323
+ end
324
+
325
+ def append_record_to_messages(op, meta, header, record, msgs)
326
+ case op
327
+ when UPDATE_OP, UPSERT_OP
328
+ if meta.has_key?(ID_FIELD)
329
+ header[UPDATE_OP] = meta
330
+ msgs << @dump_proc.call(header) << BODY_DELIMITER
331
+ msgs << @dump_proc.call(update_body(record, op)) << BODY_DELIMITER
332
+ end
333
+ when CREATE_OP
334
+ if meta.has_key?(ID_FIELD)
335
+ header[CREATE_OP] = meta
336
+ msgs << @dump_proc.call(header) << BODY_DELIMITER
337
+ msgs << @dump_proc.call(record) << BODY_DELIMITER
338
+ end
339
+ when INDEX_OP
340
+ header[INDEX_OP] = meta
341
+ msgs << @dump_proc.call(header) << BODY_DELIMITER
342
+ msgs << @dump_proc.call(record) << BODY_DELIMITER
343
+ end
344
+ end
345
+
346
+ def update_body(record, op)
347
+ update = remove_keys(record)
348
+ body = {"doc".freeze => update}
349
+ if op == UPSERT_OP
350
+ if update == record
351
+ body["doc_as_upsert".freeze] = true
352
+ else
353
+ body[UPSERT_OP] = record
354
+ end
355
+ end
356
+ body
357
+ end
358
+
359
+ def remove_keys(record)
360
+ keys = record[@remove_keys_on_update_key] || @remove_keys_on_update || []
361
+ record.delete(@remove_keys_on_update_key)
362
+ return record unless keys.any?
363
+ record = record.dup
364
+ keys.each { |key| record.delete(key) }
365
+ record
366
+ end
367
+
368
+ def flatten_record(record, prefix=[])
369
+ ret = {}
370
+ if record.is_a? Hash
371
+ record.each { |key, value|
372
+ ret.merge! flatten_record(value, prefix + [key.to_s])
373
+ }
374
+ elsif record.is_a? Array
375
+ # Don't mess with arrays, leave them unprocessed
376
+ ret.merge!({prefix.join(@flatten_hashes_separator) => record})
377
+ else
378
+ return {prefix.join(@flatten_hashes_separator) => record}
379
+ end
380
+ ret
381
+ end
382
+
383
+ def expand_placeholders(metadata)
384
+ logstash_prefix = extract_placeholders(@logstash_prefix, metadata)
385
+ index_name = extract_placeholders(@index_name, metadata)
386
+ type_name = extract_placeholders(@type_name, metadata)
387
+ return logstash_prefix, index_name, type_name
388
+ end
389
+
390
+ def multi_workers_ready?
391
+ true
392
+ end
393
+
394
+ def write(chunk)
395
+ bulk_message_count = 0
396
+ bulk_message = ''
397
+ header = {}
398
+ meta = {}
399
+
400
+ tag = chunk.metadata.tag
401
+ extracted_values = expand_placeholders(chunk.metadata)
402
+ @last_seen_major_version = detect_es_major_version rescue DEFAULT_ELASTICSEARCH_VERSION
403
+
404
+ chunk.msgpack_each do |time, record|
405
+ next unless record.is_a? Hash
406
+ begin
407
+ process_message(tag, meta, header, time, record, bulk_message, extracted_values)
408
+ bulk_message_count += 1
409
+ rescue => e
410
+ router.emit_error_event(tag, time, record, e)
411
+ end
412
+ end
413
+
414
+ send_bulk(bulk_message, tag, chunk, bulk_message_count, extracted_values) unless bulk_message.empty?
415
+ bulk_message.clear
416
+ end
417
+
418
+ def process_message(tag, meta, header, time, record, bulk_message, extracted_values)
419
+ logstash_prefix, index_name, type_name = extracted_values
420
+
421
+ if @flatten_hashes
422
+ record = flatten_record(record)
423
+ end
424
+
425
+ if @hash_config
426
+ record = generate_hash_id_key(record)
427
+ end
428
+
429
+ dt = nil
430
+ if @logstash_format || @include_timestamp
431
+ if record.has_key?(TIMESTAMP_FIELD)
432
+ rts = record[TIMESTAMP_FIELD]
433
+ dt = parse_time(rts, time, tag)
434
+ elsif record.has_key?(@time_key)
435
+ rts = record[@time_key]
436
+ dt = parse_time(rts, time, tag)
437
+ record[TIMESTAMP_FIELD] = dt.iso8601(@time_precision) unless @time_key_exclude_timestamp
438
+ else
439
+ dt = Time.at(time).to_datetime
440
+ record[TIMESTAMP_FIELD] = dt.iso8601(@time_precision)
441
+ end
442
+ end
443
+
444
+ target_index_parent, target_index_child_key = @target_index_key ? get_parent_of(record, @target_index_key) : nil
445
+ if target_index_parent && target_index_parent[target_index_child_key]
446
+ target_index = target_index_parent.delete(target_index_child_key)
447
+ elsif @logstash_format
448
+ dt = dt.new_offset(0) if @utc_index
449
+ target_index = "#{logstash_prefix}#{@logstash_prefix_separator}#{dt.strftime(@logstash_dateformat)}"
450
+ else
451
+ target_index = index_name
452
+ end
453
+
454
+ # Change target_index to lower-case since Elasticsearch doesn't
455
+ # allow upper-case characters in index names.
456
+ target_index = target_index.downcase
457
+ if @include_tag_key
458
+ record[@tag_key] = tag
459
+ end
460
+
461
+ target_type_parent, target_type_child_key = @target_type_key ? get_parent_of(record, @target_type_key) : nil
462
+ if target_type_parent && target_type_parent[target_type_child_key]
463
+ target_type = target_type_parent.delete(target_type_child_key)
464
+ if @last_seen_major_version == 6
465
+ log.warn "Detected ES 6.x: `@type_name` will be used as the document `_type`."
466
+ target_type = type_name
467
+ elsif @last_seen_major_version >= 7
468
+ log.warn "Detected ES 7.x or above: `_doc` will be used as the document `_type`."
469
+ target_type = '_doc'.freeze
470
+ end
471
+ else
472
+ if @last_seen_major_version >= 7 && target_type != DEFAULT_TYPE_NAME_ES_7x
473
+ log.warn "Detected ES 7.x or above: `_doc` will be used as the document `_type`."
474
+ target_type = '_doc'.freeze
475
+ else
476
+ target_type = type_name
477
+ end
478
+ end
479
+
480
+ meta.clear
481
+ meta["_index".freeze] = target_index
482
+ meta["_type".freeze] = target_type
483
+
484
+ if @pipeline
485
+ meta["pipeline".freeze] = @pipeline
486
+ end
487
+
488
+ @meta_config_map.each do |record_accessor, meta_key|
489
+ if raw_value = record_accessor.call(record)
490
+ meta[meta_key] = raw_value
491
+ end
492
+ end
493
+
494
+ if @remove_keys
495
+ @remove_keys.each { |key| record.delete(key) }
496
+ end
497
+
498
+ append_record_to_messages(@write_operation, meta, header, record, bulk_message)
499
+ end
500
+
501
+ # returns [parent, child_key] of child described by path array in record's tree
502
+ # returns [nil, child_key] if path doesnt exist in record
503
+ def get_parent_of(record, path)
504
+ parent_object = path[0..-2].reduce(record) { |a, e| a.is_a?(Hash) ? a[e] : nil }
505
+ [parent_object, path[-1]]
506
+ end
507
+
508
+ # send_bulk given a specific bulk request, the original tag,
509
+ # chunk, and bulk_message_count
510
+ def send_bulk(data, tag, chunk, bulk_message_count, extracted_values)
511
+ retries = 0
512
+ begin
513
+ response = client.bulk body: data
514
+ if response['errors']
515
+ error = Fluent::Plugin::ElasticsearchErrorHandler.new(self)
516
+ error.handle_error(response, tag, chunk, bulk_message_count, extracted_values)
517
+ end
518
+ rescue RetryStreamError => e
519
+ emit_tag = @retry_tag ? @retry_tag : tag
520
+ router.emit_stream(emit_tag, e.retry_stream)
521
+ rescue *client.transport.host_unreachable_exceptions => e
522
+ if retries < 2
523
+ retries += 1
524
+ @_es = nil
525
+ @_es_info = nil
526
+ log.warn "Could not push logs to Elasticsearch, resetting connection and trying again. #{e.message}"
527
+ sleep 2**retries
528
+ retry
529
+ end
530
+ raise ConnectionFailure, "Could not push logs to Elasticsearch after #{retries} retries. #{e.message}"
531
+ rescue Exception
532
+ @_es = nil if @reconnect_on_error
533
+ @_es_info = nil if @reconnect_on_error
534
+ raise
535
+ end
536
+ end
537
+ end
538
+ end