fluent-plugin-elasticsearch 2.10.2 → 2.10.3

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,25 +1,25 @@
1
- require 'securerandom'
2
- require 'base64'
3
- require 'fluent/plugin/filter'
4
-
5
- module Fluent::Plugin
6
- class ElasticsearchGenidFilter < Filter
7
- Fluent::Plugin.register_filter('elasticsearch_genid', self)
8
-
9
- config_param :hash_id_key, :string, :default => '_hash'
10
-
11
- def initialize
12
- super
13
- end
14
-
15
- def configure(conf)
16
- super
17
- end
18
-
19
- def filter(tag, time, record)
20
- record[@hash_id_key] = Base64.strict_encode64(SecureRandom.uuid)
21
- record
22
- end
23
-
24
- end
25
- end
1
+ require 'securerandom'
2
+ require 'base64'
3
+ require 'fluent/plugin/filter'
4
+
5
+ module Fluent::Plugin
6
+ class ElasticsearchGenidFilter < Filter
7
+ Fluent::Plugin.register_filter('elasticsearch_genid', self)
8
+
9
+ config_param :hash_id_key, :string, :default => '_hash'
10
+
11
+ def initialize
12
+ super
13
+ end
14
+
15
+ def configure(conf)
16
+ super
17
+ end
18
+
19
+ def filter(tag, time, record)
20
+ record[@hash_id_key] = Base64.strict_encode64(SecureRandom.uuid)
21
+ record
22
+ end
23
+
24
+ end
25
+ end
@@ -1,533 +1,538 @@
1
- # encoding: UTF-8
2
- require 'date'
3
- require 'excon'
4
- require 'elasticsearch'
5
- require 'json'
6
- require 'uri'
7
- begin
8
- require 'strptime'
9
- rescue LoadError
10
- end
11
-
12
- require 'fluent/plugin/output'
13
- require 'fluent/event'
14
- require_relative 'elasticsearch_constants'
15
- require_relative 'elasticsearch_error_handler'
16
- require_relative 'elasticsearch_index_template'
17
-
18
- module Fluent::Plugin
19
- class ElasticsearchOutput < Output
20
- class ConnectionFailure < StandardError; end
21
-
22
- # RetryStreamError privides a stream to be
23
- # put back in the pipeline for cases where a bulk request
24
- # failed (e.g some records succeed while others failed)
25
- class RetryStreamError < StandardError
26
- attr_reader :retry_stream
27
- def initialize(retry_stream)
28
- @retry_stream = retry_stream
29
- end
30
- end
31
-
32
- helpers :event_emitter, :compat_parameters, :record_accessor
33
-
34
- Fluent::Plugin.register_output('elasticsearch', self)
35
-
36
- DEFAULT_BUFFER_TYPE = "memory"
37
- DEFAULT_ELASTICSEARCH_VERSION = 5 # For compatibility.
38
- DEFAULT_TYPE_NAME_ES_7x = "_doc".freeze
39
- DEFAULT_TYPE_NAME = "fluentd".freeze
40
-
41
- config_param :host, :string, :default => 'localhost'
42
- config_param :port, :integer, :default => 9200
43
- config_param :user, :string, :default => nil
44
- config_param :password, :string, :default => nil, :secret => true
45
- config_param :path, :string, :default => nil
46
- config_param :scheme, :string, :default => 'http'
47
- config_param :hosts, :string, :default => nil
48
- config_param :target_index_key, :string, :default => nil
49
- config_param :target_type_key, :string, :default => nil,
50
- :deprecated => <<EOC
51
- Elasticsearch 7.x or above will ignore this config. Please use fixed type_name instead.
52
- EOC
53
- config_param :time_key_format, :string, :default => nil
54
- config_param :time_precision, :integer, :default => 9
55
- config_param :include_timestamp, :bool, :default => false
56
- config_param :logstash_format, :bool, :default => false
57
- config_param :logstash_prefix, :string, :default => "logstash"
58
- config_param :logstash_prefix_separator, :string, :default => '-'
59
- config_param :logstash_dateformat, :string, :default => "%Y.%m.%d"
60
- config_param :utc_index, :bool, :default => true
61
- config_param :type_name, :string, :default => DEFAULT_TYPE_NAME
62
- config_param :index_name, :string, :default => "fluentd"
63
- config_param :id_key, :string, :default => nil
64
- config_param :write_operation, :string, :default => "index"
65
- config_param :parent_key, :string, :default => nil
66
- config_param :routing_key, :string, :default => nil
67
- config_param :request_timeout, :time, :default => 5
68
- config_param :reload_connections, :bool, :default => true
69
- config_param :reload_on_failure, :bool, :default => false
70
- config_param :retry_tag, :string, :default=>nil
71
- config_param :resurrect_after, :time, :default => 60
72
- config_param :time_key, :string, :default => nil
73
- config_param :time_key_exclude_timestamp, :bool, :default => false
74
- config_param :ssl_verify , :bool, :default => true
75
- config_param :client_key, :string, :default => nil
76
- config_param :client_cert, :string, :default => nil
77
- config_param :client_key_pass, :string, :default => nil
78
- config_param :ca_file, :string, :default => nil
79
- config_param :ssl_version, :enum, list: [:SSLv23, :TLSv1, :TLSv1_1, :TLSv1_2], :default => :TLSv1
80
- config_param :remove_keys, :string, :default => nil
81
- config_param :remove_keys_on_update, :string, :default => ""
82
- config_param :remove_keys_on_update_key, :string, :default => nil
83
- config_param :flatten_hashes, :bool, :default => false
84
- config_param :flatten_hashes_separator, :string, :default => "_"
85
- config_param :template_name, :string, :default => nil
86
- config_param :template_file, :string, :default => nil
87
- config_param :template_overwrite, :bool, :default => false
88
- config_param :templates, :hash, :default => nil
89
- config_param :include_tag_key, :bool, :default => false
90
- config_param :tag_key, :string, :default => 'tag'
91
- config_param :time_parse_error_tag, :string, :default => 'Fluent::ElasticsearchOutput::TimeParser.error'
92
- config_param :reconnect_on_error, :bool, :default => false
93
- config_param :pipeline, :string, :default => nil
94
- config_param :with_transporter_log, :bool, :default => false
95
- config_param :content_type, :enum, list: [:"application/json", :"application/x-ndjson"], :default => :"application/json",
96
- :deprecated => <<EOC
97
- elasticsearch gem v6.0.2 starts to use correct Content-Type. Please upgrade elasticserach gem and stop to use this option.
98
- see: https://github.com/elastic/elasticsearch-ruby/pull/514
99
- EOC
100
-
101
- config_section :buffer do
102
- config_set_default :@type, DEFAULT_BUFFER_TYPE
103
- config_set_default :chunk_keys, ['tag']
104
- config_set_default :timekey_use_utc, true
105
- end
106
-
107
- include Fluent::ElasticsearchIndexTemplate
108
- include Fluent::Plugin::ElasticsearchConstants
109
-
110
- def initialize
111
- super
112
- end
113
-
114
- def configure(conf)
115
- compat_parameters_convert(conf, :buffer)
116
-
117
- super
118
- raise Fluent::ConfigError, "'tag' in chunk_keys is required." if not @chunk_key_tag
119
-
120
- @time_parser = create_time_parser
121
-
122
- if @remove_keys
123
- @remove_keys = @remove_keys.split(/\s*,\s*/)
124
- end
125
-
126
- if @target_index_key && @target_index_key.is_a?(String)
127
- @target_index_key = @target_index_key.split '.'
128
- end
129
-
130
- if @target_type_key && @target_type_key.is_a?(String)
131
- @target_type_key = @target_type_key.split '.'
132
- end
133
-
134
- if @remove_keys_on_update && @remove_keys_on_update.is_a?(String)
135
- @remove_keys_on_update = @remove_keys_on_update.split ','
136
- end
137
-
138
- if @template_name && @template_file
139
- template_install(@template_name, @template_file, @template_overwrite)
140
- elsif @templates
141
- templates_hash_install(@templates, @template_overwrite)
142
- end
143
-
144
- # Consider missing the prefix of "$." in nested key specifiers.
145
- @id_key = convert_compat_id_key(@id_key) if @id_key
146
- @parent_key = convert_compat_id_key(@parent_key) if @parent_key
147
- @routing_key = convert_compat_id_key(@routing_key) if @routing_key
148
-
149
- @meta_config_map = create_meta_config_map
150
-
151
- begin
152
- require 'oj'
153
- @dump_proc = Oj.method(:dump)
154
- rescue LoadError
155
- @dump_proc = Yajl.method(:dump)
156
- end
157
-
158
- if @user && m = @user.match(/%{(?<user>.*)}/)
159
- @user = URI.encode_www_form_component(m["user"])
160
- end
161
- if @password && m = @password.match(/%{(?<password>.*)}/)
162
- @password = URI.encode_www_form_component(m["password"])
163
- end
164
-
165
- if @hash_config
166
- raise Fluent::ConfigError, "@hash_config.hash_id_key and id_key must be equal." unless @hash_config.hash_id_key == @id_key
167
- end
168
- @transport_logger = nil
169
- if @with_transporter_log
170
- @transport_logger = log
171
- log_level = conf['@log_level'] || conf['log_level']
172
- log.warn "Consider to specify log_level with @log_level." unless log_level
173
- end
174
-
175
- @last_seen_major_version = detect_es_major_version rescue DEFAULT_ELASTICSEARCH_VERSION
176
- if @last_seen_major_version == 6 && @type_name != DEFAULT_TYPE_NAME_ES_7x
177
- log.info "Detected ES 6.x: ES 7.x will only accept `_doc` in type_name."
178
- end
179
- if @last_seen_major_version >= 7 && @type_name != DEFAULT_TYPE_NAME_ES_7x
180
- log.warn "Detected ES 7.x or above: `_doc` will be used as the document `_type`."
181
- @type_name = '_doc'.freeze
182
- end
183
- end
184
-
185
- def detect_es_major_version
186
- @_es_info ||= client.info
187
- @_es_info["version"]["number"].to_i
188
- end
189
-
190
- def convert_compat_id_key(key)
191
- if key.include?('.') && !key.start_with?('$[')
192
- key = "$.#{key}" unless key.start_with?('$.')
193
- end
194
- key
195
- end
196
-
197
- def create_meta_config_map
198
- result = []
199
- result << [record_accessor_create(@id_key), '_id'] if @id_key
200
- result << [record_accessor_create(@parent_key), '_parent'] if @parent_key
201
- result << [record_accessor_create(@routing_key), '_routing'] if @routing_key
202
- result
203
- end
204
-
205
- # once fluent v0.14 is released we might be able to use
206
- # Fluent::Parser::TimeParser, but it doesn't quite do what we want - if gives
207
- # [sec,nsec] where as we want something we can call `strftime` on...
208
- def create_time_parser
209
- if @time_key_format
210
- begin
211
- # Strptime doesn't support all formats, but for those it does it's
212
- # blazingly fast.
213
- strptime = Strptime.new(@time_key_format)
214
- Proc.new { |value| strptime.exec(value).to_datetime }
215
- rescue
216
- # Can happen if Strptime doesn't recognize the format; or
217
- # if strptime couldn't be required (because it's not installed -- it's
218
- # ruby 2 only)
219
- Proc.new { |value| DateTime.strptime(value, @time_key_format) }
220
- end
221
- else
222
- Proc.new { |value| DateTime.parse(value) }
223
- end
224
- end
225
-
226
- def parse_time(value, event_time, tag)
227
- @time_parser.call(value)
228
- rescue => e
229
- router.emit_error_event(@time_parse_error_tag, Fluent::Engine.now, {'tag' => tag, 'time' => event_time, 'format' => @time_key_format, 'value' => value}, e)
230
- return Time.at(event_time).to_datetime
231
- end
232
-
233
- def client
234
- @_es ||= begin
235
- excon_options = { client_key: @client_key, client_cert: @client_cert, client_key_pass: @client_key_pass }
236
- adapter_conf = lambda {|f| f.adapter :excon, excon_options }
237
- transport = Elasticsearch::Transport::Transport::HTTP::Faraday.new(get_connection_options.merge(
238
- options: {
239
- reload_connections: @reload_connections,
240
- reload_on_failure: @reload_on_failure,
241
- resurrect_after: @resurrect_after,
242
- retry_on_failure: 5,
243
- logger: @transport_logger,
244
- transport_options: {
245
- headers: { 'Content-Type' => @content_type.to_s },
246
- request: { timeout: @request_timeout },
247
- ssl: { verify: @ssl_verify, ca_file: @ca_file, version: @ssl_version }
248
- },
249
- http: {
250
- user: @user,
251
- password: @password
252
- }
253
- }), &adapter_conf)
254
- es = Elasticsearch::Client.new transport: transport
255
-
256
- begin
257
- raise ConnectionFailure, "Can not reach Elasticsearch cluster (#{connection_options_description})!" unless es.ping
258
- rescue *es.transport.host_unreachable_exceptions => e
259
- raise ConnectionFailure, "Can not reach Elasticsearch cluster (#{connection_options_description})! #{e.message}"
260
- end
261
-
262
- log.info "Connection opened to Elasticsearch cluster => #{connection_options_description}"
263
- es
264
- end
265
- end
266
-
267
- def get_escaped_userinfo(host_str)
268
- if m = host_str.match(/(?<scheme>.*)%{(?<user>.*)}:%{(?<password>.*)}(?<path>@.*)/)
269
- m["scheme"] +
270
- URI.encode_www_form_component(m["user"]) +
271
- ':' +
272
- URI.encode_www_form_component(m["password"]) +
273
- m["path"]
274
- else
275
- host_str
276
- end
277
- end
278
-
279
- def get_connection_options
280
- raise "`password` must be present if `user` is present" if @user && !@password
281
-
282
- hosts = if @hosts
283
- @hosts.split(',').map do |host_str|
284
- # Support legacy hosts format host:port,host:port,host:port...
285
- if host_str.match(%r{^[^:]+(\:\d+)?$})
286
- {
287
- host: host_str.split(':')[0],
288
- port: (host_str.split(':')[1] || @port).to_i,
289
- scheme: @scheme
290
- }
291
- else
292
- # New hosts format expects URLs such as http://logs.foo.com,https://john:pass@logs2.foo.com/elastic
293
- uri = URI(get_escaped_userinfo(host_str))
294
- %w(user password path).inject(host: uri.host, port: uri.port, scheme: uri.scheme) do |hash, key|
295
- hash[key.to_sym] = uri.public_send(key) unless uri.public_send(key).nil? || uri.public_send(key) == ''
296
- hash
297
- end
298
- end
299
- end.compact
300
- else
301
- [{host: @host, port: @port, scheme: @scheme}]
302
- end.each do |host|
303
- host.merge!(user: @user, password: @password) if !host[:user] && @user
304
- host.merge!(path: @path) if !host[:path] && @path
305
- end
306
-
307
- {
308
- hosts: hosts
309
- }
310
- end
311
-
312
- def connection_options_description
313
- get_connection_options[:hosts].map do |host_info|
314
- attributes = host_info.dup
315
- attributes[:password] = 'obfuscated' if attributes.has_key?(:password)
316
- attributes.inspect
317
- end.join(', ')
318
- end
319
-
320
- def append_record_to_messages(op, meta, header, record, msgs)
321
- case op
322
- when UPDATE_OP, UPSERT_OP
323
- if meta.has_key?(ID_FIELD)
324
- header[UPDATE_OP] = meta
325
- msgs << @dump_proc.call(header) << BODY_DELIMITER
326
- msgs << @dump_proc.call(update_body(record, op)) << BODY_DELIMITER
327
- end
328
- when CREATE_OP
329
- if meta.has_key?(ID_FIELD)
330
- header[CREATE_OP] = meta
331
- msgs << @dump_proc.call(header) << BODY_DELIMITER
332
- msgs << @dump_proc.call(record) << BODY_DELIMITER
333
- end
334
- when INDEX_OP
335
- header[INDEX_OP] = meta
336
- msgs << @dump_proc.call(header) << BODY_DELIMITER
337
- msgs << @dump_proc.call(record) << BODY_DELIMITER
338
- end
339
- end
340
-
341
- def update_body(record, op)
342
- update = remove_keys(record)
343
- body = {"doc".freeze => update}
344
- if op == UPSERT_OP
345
- if update == record
346
- body["doc_as_upsert".freeze] = true
347
- else
348
- body[UPSERT_OP] = record
349
- end
350
- end
351
- body
352
- end
353
-
354
- def remove_keys(record)
355
- keys = record[@remove_keys_on_update_key] || @remove_keys_on_update || []
356
- record.delete(@remove_keys_on_update_key)
357
- return record unless keys.any?
358
- record = record.dup
359
- keys.each { |key| record.delete(key) }
360
- record
361
- end
362
-
363
- def flatten_record(record, prefix=[])
364
- ret = {}
365
- if record.is_a? Hash
366
- record.each { |key, value|
367
- ret.merge! flatten_record(value, prefix + [key.to_s])
368
- }
369
- elsif record.is_a? Array
370
- # Don't mess with arrays, leave them unprocessed
371
- ret.merge!({prefix.join(@flatten_hashes_separator) => record})
372
- else
373
- return {prefix.join(@flatten_hashes_separator) => record}
374
- end
375
- ret
376
- end
377
-
378
- def expand_placeholders(metadata)
379
- logstash_prefix = extract_placeholders(@logstash_prefix, metadata)
380
- index_name = extract_placeholders(@index_name, metadata)
381
- type_name = extract_placeholders(@type_name, metadata)
382
- return logstash_prefix, index_name, type_name
383
- end
384
-
385
- def multi_workers_ready?
386
- true
387
- end
388
-
389
- def write(chunk)
390
- bulk_message_count = 0
391
- bulk_message = ''
392
- header = {}
393
- meta = {}
394
-
395
- tag = chunk.metadata.tag
396
- extracted_values = expand_placeholders(chunk.metadata)
397
- @last_seen_major_version = detect_es_major_version rescue DEFAULT_ELASTICSEARCH_VERSION
398
-
399
- chunk.msgpack_each do |time, record|
400
- next unless record.is_a? Hash
401
- begin
402
- process_message(tag, meta, header, time, record, bulk_message, extracted_values)
403
- bulk_message_count += 1
404
- rescue => e
405
- router.emit_error_event(tag, time, record, e)
406
- end
407
- end
408
-
409
- send_bulk(bulk_message, tag, chunk, bulk_message_count, extracted_values) unless bulk_message.empty?
410
- bulk_message.clear
411
- end
412
-
413
- def process_message(tag, meta, header, time, record, bulk_message, extracted_values)
414
- logstash_prefix, index_name, type_name = extracted_values
415
-
416
- if @flatten_hashes
417
- record = flatten_record(record)
418
- end
419
-
420
- if @hash_config
421
- record = generate_hash_id_key(record)
422
- end
423
-
424
- dt = nil
425
- if @logstash_format || @include_timestamp
426
- if record.has_key?(TIMESTAMP_FIELD)
427
- rts = record[TIMESTAMP_FIELD]
428
- dt = parse_time(rts, time, tag)
429
- elsif record.has_key?(@time_key)
430
- rts = record[@time_key]
431
- dt = parse_time(rts, time, tag)
432
- record[TIMESTAMP_FIELD] = dt.iso8601(@time_precision) unless @time_key_exclude_timestamp
433
- else
434
- dt = Time.at(time).to_datetime
435
- record[TIMESTAMP_FIELD] = dt.iso8601(@time_precision)
436
- end
437
- end
438
-
439
- target_index_parent, target_index_child_key = @target_index_key ? get_parent_of(record, @target_index_key) : nil
440
- if target_index_parent && target_index_parent[target_index_child_key]
441
- target_index = target_index_parent.delete(target_index_child_key)
442
- elsif @logstash_format
443
- dt = dt.new_offset(0) if @utc_index
444
- target_index = "#{logstash_prefix}#{@logstash_prefix_separator}#{dt.strftime(@logstash_dateformat)}"
445
- else
446
- target_index = index_name
447
- end
448
-
449
- # Change target_index to lower-case since Elasticsearch doesn't
450
- # allow upper-case characters in index names.
451
- target_index = target_index.downcase
452
- if @include_tag_key
453
- record[@tag_key] = tag
454
- end
455
-
456
- target_type_parent, target_type_child_key = @target_type_key ? get_parent_of(record, @target_type_key) : nil
457
- if target_type_parent && target_type_parent[target_type_child_key]
458
- target_type = target_type_parent.delete(target_type_child_key)
459
- if @last_seen_major_version == 6
460
- log.warn "Detected ES 6.x: `@type_name` will be used as the document `_type`."
461
- target_type = type_name
462
- elsif @last_seen_major_version >= 7
463
- log.warn "Detected ES 7.x or above: `_doc` will be used as the document `_type`."
464
- target_type = '_doc'.freeze
465
- end
466
- else
467
- if @last_seen_major_version >= 7 && target_type != DEFAULT_TYPE_NAME_ES_7x
468
- log.warn "Detected ES 7.x or above: `_doc` will be used as the document `_type`."
469
- target_type = '_doc'.freeze
470
- else
471
- target_type = type_name
472
- end
473
- end
474
-
475
- meta.clear
476
- meta["_index".freeze] = target_index
477
- meta["_type".freeze] = target_type
478
-
479
- if @pipeline
480
- meta["pipeline".freeze] = @pipeline
481
- end
482
-
483
- @meta_config_map.each do |record_accessor, meta_key|
484
- if raw_value = record_accessor.call(record)
485
- meta[meta_key] = raw_value
486
- end
487
- end
488
-
489
- if @remove_keys
490
- @remove_keys.each { |key| record.delete(key) }
491
- end
492
-
493
- append_record_to_messages(@write_operation, meta, header, record, bulk_message)
494
- end
495
-
496
- # returns [parent, child_key] of child described by path array in record's tree
497
- # returns [nil, child_key] if path doesnt exist in record
498
- def get_parent_of(record, path)
499
- parent_object = path[0..-2].reduce(record) { |a, e| a.is_a?(Hash) ? a[e] : nil }
500
- [parent_object, path[-1]]
501
- end
502
-
503
- # send_bulk given a specific bulk request, the original tag,
504
- # chunk, and bulk_message_count
505
- def send_bulk(data, tag, chunk, bulk_message_count, extracted_values)
506
- retries = 0
507
- begin
508
- response = client.bulk body: data
509
- if response['errors']
510
- error = Fluent::Plugin::ElasticsearchErrorHandler.new(self)
511
- error.handle_error(response, tag, chunk, bulk_message_count, extracted_values)
512
- end
513
- rescue RetryStreamError => e
514
- emit_tag = @retry_tag ? @retry_tag : tag
515
- router.emit_stream(emit_tag, e.retry_stream)
516
- rescue *client.transport.host_unreachable_exceptions => e
517
- if retries < 2
518
- retries += 1
519
- @_es = nil
520
- @_es_info = nil
521
- log.warn "Could not push logs to Elasticsearch, resetting connection and trying again. #{e.message}"
522
- sleep 2**retries
523
- retry
524
- end
525
- raise ConnectionFailure, "Could not push logs to Elasticsearch after #{retries} retries. #{e.message}"
526
- rescue Exception
527
- @_es = nil if @reconnect_on_error
528
- @_es_info = nil if @reconnect_on_error
529
- raise
530
- end
531
- end
532
- end
533
- end
1
+ # encoding: UTF-8
2
+ require 'date'
3
+ require 'excon'
4
+ require 'elasticsearch'
5
+ require 'json'
6
+ require 'uri'
7
+ begin
8
+ require 'strptime'
9
+ rescue LoadError
10
+ end
11
+
12
+ require 'fluent/plugin/output'
13
+ require 'fluent/event'
14
+ require_relative 'elasticsearch_constants'
15
+ require_relative 'elasticsearch_error_handler'
16
+ require_relative 'elasticsearch_index_template'
17
+
18
+ module Fluent::Plugin
19
+ class ElasticsearchOutput < Output
20
+ class ConnectionFailure < StandardError; end
21
+
22
+ # RetryStreamError privides a stream to be
23
+ # put back in the pipeline for cases where a bulk request
24
+ # failed (e.g some records succeed while others failed)
25
+ class RetryStreamError < StandardError
26
+ attr_reader :retry_stream
27
+ def initialize(retry_stream)
28
+ @retry_stream = retry_stream
29
+ end
30
+ end
31
+
32
+ helpers :event_emitter, :compat_parameters, :record_accessor
33
+
34
+ Fluent::Plugin.register_output('elasticsearch', self)
35
+
36
+ DEFAULT_BUFFER_TYPE = "memory"
37
+ DEFAULT_ELASTICSEARCH_VERSION = 5 # For compatibility.
38
+ DEFAULT_TYPE_NAME_ES_7x = "_doc".freeze
39
+ DEFAULT_TYPE_NAME = "fluentd".freeze
40
+
41
+ config_param :host, :string, :default => 'localhost'
42
+ config_param :port, :integer, :default => 9200
43
+ config_param :user, :string, :default => nil
44
+ config_param :password, :string, :default => nil, :secret => true
45
+ config_param :path, :string, :default => nil
46
+ config_param :scheme, :string, :default => 'http'
47
+ config_param :hosts, :string, :default => nil
48
+ config_param :target_index_key, :string, :default => nil
49
+ config_param :target_type_key, :string, :default => nil,
50
+ :deprecated => <<EOC
51
+ Elasticsearch 7.x or above will ignore this config. Please use fixed type_name instead.
52
+ EOC
53
+ config_param :time_key_format, :string, :default => nil
54
+ config_param :time_precision, :integer, :default => 9
55
+ config_param :include_timestamp, :bool, :default => false
56
+ config_param :logstash_format, :bool, :default => false
57
+ config_param :logstash_prefix, :string, :default => "logstash"
58
+ config_param :logstash_prefix_separator, :string, :default => '-'
59
+ config_param :logstash_dateformat, :string, :default => "%Y.%m.%d"
60
+ config_param :utc_index, :bool, :default => true
61
+ config_param :type_name, :string, :default => DEFAULT_TYPE_NAME
62
+ config_param :index_name, :string, :default => "fluentd"
63
+ config_param :id_key, :string, :default => nil
64
+ config_param :write_operation, :string, :default => "index"
65
+ config_param :parent_key, :string, :default => nil
66
+ config_param :routing_key, :string, :default => nil
67
+ config_param :request_timeout, :time, :default => 5
68
+ config_param :reload_connections, :bool, :default => true
69
+ config_param :reload_on_failure, :bool, :default => false
70
+ config_param :retry_tag, :string, :default=>nil
71
+ config_param :resurrect_after, :time, :default => 60
72
+ config_param :time_key, :string, :default => nil
73
+ config_param :time_key_exclude_timestamp, :bool, :default => false
74
+ config_param :ssl_verify , :bool, :default => true
75
+ config_param :client_key, :string, :default => nil
76
+ config_param :client_cert, :string, :default => nil
77
+ config_param :client_key_pass, :string, :default => nil
78
+ config_param :ca_file, :string, :default => nil
79
+ config_param :ssl_version, :enum, list: [:SSLv23, :TLSv1, :TLSv1_1, :TLSv1_2], :default => :TLSv1
80
+ config_param :remove_keys, :string, :default => nil
81
+ config_param :remove_keys_on_update, :string, :default => ""
82
+ config_param :remove_keys_on_update_key, :string, :default => nil
83
+ config_param :flatten_hashes, :bool, :default => false
84
+ config_param :flatten_hashes_separator, :string, :default => "_"
85
+ config_param :template_name, :string, :default => nil
86
+ config_param :template_file, :string, :default => nil
87
+ config_param :template_overwrite, :bool, :default => false
88
+ config_param :templates, :hash, :default => nil
89
+ config_param :max_retry_putting_template, :integer, :default => 10
90
+ config_param :include_tag_key, :bool, :default => false
91
+ config_param :tag_key, :string, :default => 'tag'
92
+ config_param :time_parse_error_tag, :string, :default => 'Fluent::ElasticsearchOutput::TimeParser.error'
93
+ config_param :reconnect_on_error, :bool, :default => false
94
+ config_param :pipeline, :string, :default => nil
95
+ config_param :with_transporter_log, :bool, :default => false
96
+ config_param :content_type, :enum, list: [:"application/json", :"application/x-ndjson"], :default => :"application/json",
97
+ :deprecated => <<EOC
98
+ elasticsearch gem v6.0.2 starts to use correct Content-Type. Please upgrade elasticserach gem and stop to use this option.
99
+ see: https://github.com/elastic/elasticsearch-ruby/pull/514
100
+ EOC
101
+
102
+ config_section :buffer do
103
+ config_set_default :@type, DEFAULT_BUFFER_TYPE
104
+ config_set_default :chunk_keys, ['tag']
105
+ config_set_default :timekey_use_utc, true
106
+ end
107
+
108
+ include Fluent::ElasticsearchIndexTemplate
109
+ include Fluent::Plugin::ElasticsearchConstants
110
+
111
+ def initialize
112
+ super
113
+ end
114
+
115
+ def configure(conf)
116
+ compat_parameters_convert(conf, :buffer)
117
+
118
+ super
119
+ raise Fluent::ConfigError, "'tag' in chunk_keys is required." if not @chunk_key_tag
120
+
121
+ @time_parser = create_time_parser
122
+
123
+ if @remove_keys
124
+ @remove_keys = @remove_keys.split(/\s*,\s*/)
125
+ end
126
+
127
+ if @target_index_key && @target_index_key.is_a?(String)
128
+ @target_index_key = @target_index_key.split '.'
129
+ end
130
+
131
+ if @target_type_key && @target_type_key.is_a?(String)
132
+ @target_type_key = @target_type_key.split '.'
133
+ end
134
+
135
+ if @remove_keys_on_update && @remove_keys_on_update.is_a?(String)
136
+ @remove_keys_on_update = @remove_keys_on_update.split ','
137
+ end
138
+
139
+ if @template_name && @template_file
140
+ retry_install(@max_retry_putting_template) do
141
+ template_install(@template_name, @template_file, @template_overwrite)
142
+ end
143
+ elsif @templates
144
+ retry_install(@max_retry_putting_template) do
145
+ templates_hash_install(@templates, @template_overwrite)
146
+ end
147
+ end
148
+
149
+ # Consider missing the prefix of "$." in nested key specifiers.
150
+ @id_key = convert_compat_id_key(@id_key) if @id_key
151
+ @parent_key = convert_compat_id_key(@parent_key) if @parent_key
152
+ @routing_key = convert_compat_id_key(@routing_key) if @routing_key
153
+
154
+ @meta_config_map = create_meta_config_map
155
+
156
+ begin
157
+ require 'oj'
158
+ @dump_proc = Oj.method(:dump)
159
+ rescue LoadError
160
+ @dump_proc = Yajl.method(:dump)
161
+ end
162
+
163
+ if @user && m = @user.match(/%{(?<user>.*)}/)
164
+ @user = URI.encode_www_form_component(m["user"])
165
+ end
166
+ if @password && m = @password.match(/%{(?<password>.*)}/)
167
+ @password = URI.encode_www_form_component(m["password"])
168
+ end
169
+
170
+ if @hash_config
171
+ raise Fluent::ConfigError, "@hash_config.hash_id_key and id_key must be equal." unless @hash_config.hash_id_key == @id_key
172
+ end
173
+ @transport_logger = nil
174
+ if @with_transporter_log
175
+ @transport_logger = log
176
+ log_level = conf['@log_level'] || conf['log_level']
177
+ log.warn "Consider to specify log_level with @log_level." unless log_level
178
+ end
179
+
180
+ @last_seen_major_version = detect_es_major_version rescue DEFAULT_ELASTICSEARCH_VERSION
181
+ if @last_seen_major_version == 6 && @type_name != DEFAULT_TYPE_NAME_ES_7x
182
+ log.info "Detected ES 6.x: ES 7.x will only accept `_doc` in type_name."
183
+ end
184
+ if @last_seen_major_version >= 7 && @type_name != DEFAULT_TYPE_NAME_ES_7x
185
+ log.warn "Detected ES 7.x or above: `_doc` will be used as the document `_type`."
186
+ @type_name = '_doc'.freeze
187
+ end
188
+ end
189
+
190
+ def detect_es_major_version
191
+ @_es_info ||= client.info
192
+ @_es_info["version"]["number"].to_i
193
+ end
194
+
195
+ def convert_compat_id_key(key)
196
+ if key.include?('.') && !key.start_with?('$[')
197
+ key = "$.#{key}" unless key.start_with?('$.')
198
+ end
199
+ key
200
+ end
201
+
202
+ def create_meta_config_map
203
+ result = []
204
+ result << [record_accessor_create(@id_key), '_id'] if @id_key
205
+ result << [record_accessor_create(@parent_key), '_parent'] if @parent_key
206
+ result << [record_accessor_create(@routing_key), '_routing'] if @routing_key
207
+ result
208
+ end
209
+
210
+ # once fluent v0.14 is released we might be able to use
211
+ # Fluent::Parser::TimeParser, but it doesn't quite do what we want - if gives
212
+ # [sec,nsec] where as we want something we can call `strftime` on...
213
+ def create_time_parser
214
+ if @time_key_format
215
+ begin
216
+ # Strptime doesn't support all formats, but for those it does it's
217
+ # blazingly fast.
218
+ strptime = Strptime.new(@time_key_format)
219
+ Proc.new { |value| strptime.exec(value).to_datetime }
220
+ rescue
221
+ # Can happen if Strptime doesn't recognize the format; or
222
+ # if strptime couldn't be required (because it's not installed -- it's
223
+ # ruby 2 only)
224
+ Proc.new { |value| DateTime.strptime(value, @time_key_format) }
225
+ end
226
+ else
227
+ Proc.new { |value| DateTime.parse(value) }
228
+ end
229
+ end
230
+
231
+ def parse_time(value, event_time, tag)
232
+ @time_parser.call(value)
233
+ rescue => e
234
+ router.emit_error_event(@time_parse_error_tag, Fluent::Engine.now, {'tag' => tag, 'time' => event_time, 'format' => @time_key_format, 'value' => value}, e)
235
+ return Time.at(event_time).to_datetime
236
+ end
237
+
238
+ def client
239
+ @_es ||= begin
240
+ excon_options = { client_key: @client_key, client_cert: @client_cert, client_key_pass: @client_key_pass }
241
+ adapter_conf = lambda {|f| f.adapter :excon, excon_options }
242
+ transport = Elasticsearch::Transport::Transport::HTTP::Faraday.new(get_connection_options.merge(
243
+ options: {
244
+ reload_connections: @reload_connections,
245
+ reload_on_failure: @reload_on_failure,
246
+ resurrect_after: @resurrect_after,
247
+ retry_on_failure: 5,
248
+ logger: @transport_logger,
249
+ transport_options: {
250
+ headers: { 'Content-Type' => @content_type.to_s },
251
+ request: { timeout: @request_timeout },
252
+ ssl: { verify: @ssl_verify, ca_file: @ca_file, version: @ssl_version }
253
+ },
254
+ http: {
255
+ user: @user,
256
+ password: @password
257
+ }
258
+ }), &adapter_conf)
259
+ es = Elasticsearch::Client.new transport: transport
260
+
261
+ begin
262
+ raise ConnectionFailure, "Can not reach Elasticsearch cluster (#{connection_options_description})!" unless es.ping
263
+ rescue *es.transport.host_unreachable_exceptions => e
264
+ raise ConnectionFailure, "Can not reach Elasticsearch cluster (#{connection_options_description})! #{e.message}"
265
+ end
266
+
267
+ log.info "Connection opened to Elasticsearch cluster => #{connection_options_description}"
268
+ es
269
+ end
270
+ end
271
+
272
+ def get_escaped_userinfo(host_str)
273
+ if m = host_str.match(/(?<scheme>.*)%{(?<user>.*)}:%{(?<password>.*)}(?<path>@.*)/)
274
+ m["scheme"] +
275
+ URI.encode_www_form_component(m["user"]) +
276
+ ':' +
277
+ URI.encode_www_form_component(m["password"]) +
278
+ m["path"]
279
+ else
280
+ host_str
281
+ end
282
+ end
283
+
284
+ def get_connection_options
285
+ raise "`password` must be present if `user` is present" if @user && !@password
286
+
287
+ hosts = if @hosts
288
+ @hosts.split(',').map do |host_str|
289
+ # Support legacy hosts format host:port,host:port,host:port...
290
+ if host_str.match(%r{^[^:]+(\:\d+)?$})
291
+ {
292
+ host: host_str.split(':')[0],
293
+ port: (host_str.split(':')[1] || @port).to_i,
294
+ scheme: @scheme
295
+ }
296
+ else
297
+ # New hosts format expects URLs such as http://logs.foo.com,https://john:pass@logs2.foo.com/elastic
298
+ uri = URI(get_escaped_userinfo(host_str))
299
+ %w(user password path).inject(host: uri.host, port: uri.port, scheme: uri.scheme) do |hash, key|
300
+ hash[key.to_sym] = uri.public_send(key) unless uri.public_send(key).nil? || uri.public_send(key) == ''
301
+ hash
302
+ end
303
+ end
304
+ end.compact
305
+ else
306
+ [{host: @host, port: @port, scheme: @scheme}]
307
+ end.each do |host|
308
+ host.merge!(user: @user, password: @password) if !host[:user] && @user
309
+ host.merge!(path: @path) if !host[:path] && @path
310
+ end
311
+
312
+ {
313
+ hosts: hosts
314
+ }
315
+ end
316
+
317
+ def connection_options_description
318
+ get_connection_options[:hosts].map do |host_info|
319
+ attributes = host_info.dup
320
+ attributes[:password] = 'obfuscated' if attributes.has_key?(:password)
321
+ attributes.inspect
322
+ end.join(', ')
323
+ end
324
+
325
+ def append_record_to_messages(op, meta, header, record, msgs)
326
+ case op
327
+ when UPDATE_OP, UPSERT_OP
328
+ if meta.has_key?(ID_FIELD)
329
+ header[UPDATE_OP] = meta
330
+ msgs << @dump_proc.call(header) << BODY_DELIMITER
331
+ msgs << @dump_proc.call(update_body(record, op)) << BODY_DELIMITER
332
+ end
333
+ when CREATE_OP
334
+ if meta.has_key?(ID_FIELD)
335
+ header[CREATE_OP] = meta
336
+ msgs << @dump_proc.call(header) << BODY_DELIMITER
337
+ msgs << @dump_proc.call(record) << BODY_DELIMITER
338
+ end
339
+ when INDEX_OP
340
+ header[INDEX_OP] = meta
341
+ msgs << @dump_proc.call(header) << BODY_DELIMITER
342
+ msgs << @dump_proc.call(record) << BODY_DELIMITER
343
+ end
344
+ end
345
+
346
+ def update_body(record, op)
347
+ update = remove_keys(record)
348
+ body = {"doc".freeze => update}
349
+ if op == UPSERT_OP
350
+ if update == record
351
+ body["doc_as_upsert".freeze] = true
352
+ else
353
+ body[UPSERT_OP] = record
354
+ end
355
+ end
356
+ body
357
+ end
358
+
359
+ def remove_keys(record)
360
+ keys = record[@remove_keys_on_update_key] || @remove_keys_on_update || []
361
+ record.delete(@remove_keys_on_update_key)
362
+ return record unless keys.any?
363
+ record = record.dup
364
+ keys.each { |key| record.delete(key) }
365
+ record
366
+ end
367
+
368
+ def flatten_record(record, prefix=[])
369
+ ret = {}
370
+ if record.is_a? Hash
371
+ record.each { |key, value|
372
+ ret.merge! flatten_record(value, prefix + [key.to_s])
373
+ }
374
+ elsif record.is_a? Array
375
+ # Don't mess with arrays, leave them unprocessed
376
+ ret.merge!({prefix.join(@flatten_hashes_separator) => record})
377
+ else
378
+ return {prefix.join(@flatten_hashes_separator) => record}
379
+ end
380
+ ret
381
+ end
382
+
383
+ def expand_placeholders(metadata)
384
+ logstash_prefix = extract_placeholders(@logstash_prefix, metadata)
385
+ index_name = extract_placeholders(@index_name, metadata)
386
+ type_name = extract_placeholders(@type_name, metadata)
387
+ return logstash_prefix, index_name, type_name
388
+ end
389
+
390
+ def multi_workers_ready?
391
+ true
392
+ end
393
+
394
+ def write(chunk)
395
+ bulk_message_count = 0
396
+ bulk_message = ''
397
+ header = {}
398
+ meta = {}
399
+
400
+ tag = chunk.metadata.tag
401
+ extracted_values = expand_placeholders(chunk.metadata)
402
+ @last_seen_major_version = detect_es_major_version rescue DEFAULT_ELASTICSEARCH_VERSION
403
+
404
+ chunk.msgpack_each do |time, record|
405
+ next unless record.is_a? Hash
406
+ begin
407
+ process_message(tag, meta, header, time, record, bulk_message, extracted_values)
408
+ bulk_message_count += 1
409
+ rescue => e
410
+ router.emit_error_event(tag, time, record, e)
411
+ end
412
+ end
413
+
414
+ send_bulk(bulk_message, tag, chunk, bulk_message_count, extracted_values) unless bulk_message.empty?
415
+ bulk_message.clear
416
+ end
417
+
418
+ def process_message(tag, meta, header, time, record, bulk_message, extracted_values)
419
+ logstash_prefix, index_name, type_name = extracted_values
420
+
421
+ if @flatten_hashes
422
+ record = flatten_record(record)
423
+ end
424
+
425
+ if @hash_config
426
+ record = generate_hash_id_key(record)
427
+ end
428
+
429
+ dt = nil
430
+ if @logstash_format || @include_timestamp
431
+ if record.has_key?(TIMESTAMP_FIELD)
432
+ rts = record[TIMESTAMP_FIELD]
433
+ dt = parse_time(rts, time, tag)
434
+ elsif record.has_key?(@time_key)
435
+ rts = record[@time_key]
436
+ dt = parse_time(rts, time, tag)
437
+ record[TIMESTAMP_FIELD] = dt.iso8601(@time_precision) unless @time_key_exclude_timestamp
438
+ else
439
+ dt = Time.at(time).to_datetime
440
+ record[TIMESTAMP_FIELD] = dt.iso8601(@time_precision)
441
+ end
442
+ end
443
+
444
+ target_index_parent, target_index_child_key = @target_index_key ? get_parent_of(record, @target_index_key) : nil
445
+ if target_index_parent && target_index_parent[target_index_child_key]
446
+ target_index = target_index_parent.delete(target_index_child_key)
447
+ elsif @logstash_format
448
+ dt = dt.new_offset(0) if @utc_index
449
+ target_index = "#{logstash_prefix}#{@logstash_prefix_separator}#{dt.strftime(@logstash_dateformat)}"
450
+ else
451
+ target_index = index_name
452
+ end
453
+
454
+ # Change target_index to lower-case since Elasticsearch doesn't
455
+ # allow upper-case characters in index names.
456
+ target_index = target_index.downcase
457
+ if @include_tag_key
458
+ record[@tag_key] = tag
459
+ end
460
+
461
+ target_type_parent, target_type_child_key = @target_type_key ? get_parent_of(record, @target_type_key) : nil
462
+ if target_type_parent && target_type_parent[target_type_child_key]
463
+ target_type = target_type_parent.delete(target_type_child_key)
464
+ if @last_seen_major_version == 6
465
+ log.warn "Detected ES 6.x: `@type_name` will be used as the document `_type`."
466
+ target_type = type_name
467
+ elsif @last_seen_major_version >= 7
468
+ log.warn "Detected ES 7.x or above: `_doc` will be used as the document `_type`."
469
+ target_type = '_doc'.freeze
470
+ end
471
+ else
472
+ if @last_seen_major_version >= 7 && target_type != DEFAULT_TYPE_NAME_ES_7x
473
+ log.warn "Detected ES 7.x or above: `_doc` will be used as the document `_type`."
474
+ target_type = '_doc'.freeze
475
+ else
476
+ target_type = type_name
477
+ end
478
+ end
479
+
480
+ meta.clear
481
+ meta["_index".freeze] = target_index
482
+ meta["_type".freeze] = target_type
483
+
484
+ if @pipeline
485
+ meta["pipeline".freeze] = @pipeline
486
+ end
487
+
488
+ @meta_config_map.each do |record_accessor, meta_key|
489
+ if raw_value = record_accessor.call(record)
490
+ meta[meta_key] = raw_value
491
+ end
492
+ end
493
+
494
+ if @remove_keys
495
+ @remove_keys.each { |key| record.delete(key) }
496
+ end
497
+
498
+ append_record_to_messages(@write_operation, meta, header, record, bulk_message)
499
+ end
500
+
501
+ # returns [parent, child_key] of child described by path array in record's tree
502
+ # returns [nil, child_key] if path doesnt exist in record
503
+ def get_parent_of(record, path)
504
+ parent_object = path[0..-2].reduce(record) { |a, e| a.is_a?(Hash) ? a[e] : nil }
505
+ [parent_object, path[-1]]
506
+ end
507
+
508
+ # send_bulk given a specific bulk request, the original tag,
509
+ # chunk, and bulk_message_count
510
+ def send_bulk(data, tag, chunk, bulk_message_count, extracted_values)
511
+ retries = 0
512
+ begin
513
+ response = client.bulk body: data
514
+ if response['errors']
515
+ error = Fluent::Plugin::ElasticsearchErrorHandler.new(self)
516
+ error.handle_error(response, tag, chunk, bulk_message_count, extracted_values)
517
+ end
518
+ rescue RetryStreamError => e
519
+ emit_tag = @retry_tag ? @retry_tag : tag
520
+ router.emit_stream(emit_tag, e.retry_stream)
521
+ rescue *client.transport.host_unreachable_exceptions => e
522
+ if retries < 2
523
+ retries += 1
524
+ @_es = nil
525
+ @_es_info = nil
526
+ log.warn "Could not push logs to Elasticsearch, resetting connection and trying again. #{e.message}"
527
+ sleep 2**retries
528
+ retry
529
+ end
530
+ raise ConnectionFailure, "Could not push logs to Elasticsearch after #{retries} retries. #{e.message}"
531
+ rescue Exception
532
+ @_es = nil if @reconnect_on_error
533
+ @_es_info = nil if @reconnect_on_error
534
+ raise
535
+ end
536
+ end
537
+ end
538
+ end