fluent-plugin-elasticsearch 2.10.3 → 2.10.4

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,25 +1,25 @@
1
- require 'securerandom'
2
- require 'base64'
3
- require 'fluent/plugin/filter'
4
-
5
- module Fluent::Plugin
6
- class ElasticsearchGenidFilter < Filter
7
- Fluent::Plugin.register_filter('elasticsearch_genid', self)
8
-
9
- config_param :hash_id_key, :string, :default => '_hash'
10
-
11
- def initialize
12
- super
13
- end
14
-
15
- def configure(conf)
16
- super
17
- end
18
-
19
- def filter(tag, time, record)
20
- record[@hash_id_key] = Base64.strict_encode64(SecureRandom.uuid)
21
- record
22
- end
23
-
24
- end
25
- end
1
+ require 'securerandom'
2
+ require 'base64'
3
+ require 'fluent/plugin/filter'
4
+
5
+ module Fluent::Plugin
6
+ class ElasticsearchGenidFilter < Filter
7
+ Fluent::Plugin.register_filter('elasticsearch_genid', self)
8
+
9
+ config_param :hash_id_key, :string, :default => '_hash'
10
+
11
+ def initialize
12
+ super
13
+ end
14
+
15
+ def configure(conf)
16
+ super
17
+ end
18
+
19
+ def filter(tag, time, record)
20
+ record[@hash_id_key] = Base64.strict_encode64(SecureRandom.uuid)
21
+ record
22
+ end
23
+
24
+ end
25
+ end
@@ -1,538 +1,540 @@
1
- # encoding: UTF-8
2
- require 'date'
3
- require 'excon'
4
- require 'elasticsearch'
5
- require 'json'
6
- require 'uri'
7
- begin
8
- require 'strptime'
9
- rescue LoadError
10
- end
11
-
12
- require 'fluent/plugin/output'
13
- require 'fluent/event'
14
- require_relative 'elasticsearch_constants'
15
- require_relative 'elasticsearch_error_handler'
16
- require_relative 'elasticsearch_index_template'
17
-
18
- module Fluent::Plugin
19
- class ElasticsearchOutput < Output
20
- class ConnectionFailure < StandardError; end
21
-
22
- # RetryStreamError privides a stream to be
23
- # put back in the pipeline for cases where a bulk request
24
- # failed (e.g some records succeed while others failed)
25
- class RetryStreamError < StandardError
26
- attr_reader :retry_stream
27
- def initialize(retry_stream)
28
- @retry_stream = retry_stream
29
- end
30
- end
31
-
32
- helpers :event_emitter, :compat_parameters, :record_accessor
33
-
34
- Fluent::Plugin.register_output('elasticsearch', self)
35
-
36
- DEFAULT_BUFFER_TYPE = "memory"
37
- DEFAULT_ELASTICSEARCH_VERSION = 5 # For compatibility.
38
- DEFAULT_TYPE_NAME_ES_7x = "_doc".freeze
39
- DEFAULT_TYPE_NAME = "fluentd".freeze
40
-
41
- config_param :host, :string, :default => 'localhost'
42
- config_param :port, :integer, :default => 9200
43
- config_param :user, :string, :default => nil
44
- config_param :password, :string, :default => nil, :secret => true
45
- config_param :path, :string, :default => nil
46
- config_param :scheme, :string, :default => 'http'
47
- config_param :hosts, :string, :default => nil
48
- config_param :target_index_key, :string, :default => nil
49
- config_param :target_type_key, :string, :default => nil,
50
- :deprecated => <<EOC
51
- Elasticsearch 7.x or above will ignore this config. Please use fixed type_name instead.
52
- EOC
53
- config_param :time_key_format, :string, :default => nil
54
- config_param :time_precision, :integer, :default => 9
55
- config_param :include_timestamp, :bool, :default => false
56
- config_param :logstash_format, :bool, :default => false
57
- config_param :logstash_prefix, :string, :default => "logstash"
58
- config_param :logstash_prefix_separator, :string, :default => '-'
59
- config_param :logstash_dateformat, :string, :default => "%Y.%m.%d"
60
- config_param :utc_index, :bool, :default => true
61
- config_param :type_name, :string, :default => DEFAULT_TYPE_NAME
62
- config_param :index_name, :string, :default => "fluentd"
63
- config_param :id_key, :string, :default => nil
64
- config_param :write_operation, :string, :default => "index"
65
- config_param :parent_key, :string, :default => nil
66
- config_param :routing_key, :string, :default => nil
67
- config_param :request_timeout, :time, :default => 5
68
- config_param :reload_connections, :bool, :default => true
69
- config_param :reload_on_failure, :bool, :default => false
70
- config_param :retry_tag, :string, :default=>nil
71
- config_param :resurrect_after, :time, :default => 60
72
- config_param :time_key, :string, :default => nil
73
- config_param :time_key_exclude_timestamp, :bool, :default => false
74
- config_param :ssl_verify , :bool, :default => true
75
- config_param :client_key, :string, :default => nil
76
- config_param :client_cert, :string, :default => nil
77
- config_param :client_key_pass, :string, :default => nil
78
- config_param :ca_file, :string, :default => nil
79
- config_param :ssl_version, :enum, list: [:SSLv23, :TLSv1, :TLSv1_1, :TLSv1_2], :default => :TLSv1
80
- config_param :remove_keys, :string, :default => nil
81
- config_param :remove_keys_on_update, :string, :default => ""
82
- config_param :remove_keys_on_update_key, :string, :default => nil
83
- config_param :flatten_hashes, :bool, :default => false
84
- config_param :flatten_hashes_separator, :string, :default => "_"
85
- config_param :template_name, :string, :default => nil
86
- config_param :template_file, :string, :default => nil
87
- config_param :template_overwrite, :bool, :default => false
88
- config_param :templates, :hash, :default => nil
89
- config_param :max_retry_putting_template, :integer, :default => 10
90
- config_param :include_tag_key, :bool, :default => false
91
- config_param :tag_key, :string, :default => 'tag'
92
- config_param :time_parse_error_tag, :string, :default => 'Fluent::ElasticsearchOutput::TimeParser.error'
93
- config_param :reconnect_on_error, :bool, :default => false
94
- config_param :pipeline, :string, :default => nil
95
- config_param :with_transporter_log, :bool, :default => false
96
- config_param :content_type, :enum, list: [:"application/json", :"application/x-ndjson"], :default => :"application/json",
97
- :deprecated => <<EOC
98
- elasticsearch gem v6.0.2 starts to use correct Content-Type. Please upgrade elasticserach gem and stop to use this option.
99
- see: https://github.com/elastic/elasticsearch-ruby/pull/514
100
- EOC
101
-
102
- config_section :buffer do
103
- config_set_default :@type, DEFAULT_BUFFER_TYPE
104
- config_set_default :chunk_keys, ['tag']
105
- config_set_default :timekey_use_utc, true
106
- end
107
-
108
- include Fluent::ElasticsearchIndexTemplate
109
- include Fluent::Plugin::ElasticsearchConstants
110
-
111
- def initialize
112
- super
113
- end
114
-
115
- def configure(conf)
116
- compat_parameters_convert(conf, :buffer)
117
-
118
- super
119
- raise Fluent::ConfigError, "'tag' in chunk_keys is required." if not @chunk_key_tag
120
-
121
- @time_parser = create_time_parser
122
-
123
- if @remove_keys
124
- @remove_keys = @remove_keys.split(/\s*,\s*/)
125
- end
126
-
127
- if @target_index_key && @target_index_key.is_a?(String)
128
- @target_index_key = @target_index_key.split '.'
129
- end
130
-
131
- if @target_type_key && @target_type_key.is_a?(String)
132
- @target_type_key = @target_type_key.split '.'
133
- end
134
-
135
- if @remove_keys_on_update && @remove_keys_on_update.is_a?(String)
136
- @remove_keys_on_update = @remove_keys_on_update.split ','
137
- end
138
-
139
- if @template_name && @template_file
140
- retry_install(@max_retry_putting_template) do
141
- template_install(@template_name, @template_file, @template_overwrite)
142
- end
143
- elsif @templates
144
- retry_install(@max_retry_putting_template) do
145
- templates_hash_install(@templates, @template_overwrite)
146
- end
147
- end
148
-
149
- # Consider missing the prefix of "$." in nested key specifiers.
150
- @id_key = convert_compat_id_key(@id_key) if @id_key
151
- @parent_key = convert_compat_id_key(@parent_key) if @parent_key
152
- @routing_key = convert_compat_id_key(@routing_key) if @routing_key
153
-
154
- @meta_config_map = create_meta_config_map
155
-
156
- begin
157
- require 'oj'
158
- @dump_proc = Oj.method(:dump)
159
- rescue LoadError
160
- @dump_proc = Yajl.method(:dump)
161
- end
162
-
163
- if @user && m = @user.match(/%{(?<user>.*)}/)
164
- @user = URI.encode_www_form_component(m["user"])
165
- end
166
- if @password && m = @password.match(/%{(?<password>.*)}/)
167
- @password = URI.encode_www_form_component(m["password"])
168
- end
169
-
170
- if @hash_config
171
- raise Fluent::ConfigError, "@hash_config.hash_id_key and id_key must be equal." unless @hash_config.hash_id_key == @id_key
172
- end
173
- @transport_logger = nil
174
- if @with_transporter_log
175
- @transport_logger = log
176
- log_level = conf['@log_level'] || conf['log_level']
177
- log.warn "Consider to specify log_level with @log_level." unless log_level
178
- end
179
-
180
- @last_seen_major_version = detect_es_major_version rescue DEFAULT_ELASTICSEARCH_VERSION
181
- if @last_seen_major_version == 6 && @type_name != DEFAULT_TYPE_NAME_ES_7x
182
- log.info "Detected ES 6.x: ES 7.x will only accept `_doc` in type_name."
183
- end
184
- if @last_seen_major_version >= 7 && @type_name != DEFAULT_TYPE_NAME_ES_7x
185
- log.warn "Detected ES 7.x or above: `_doc` will be used as the document `_type`."
186
- @type_name = '_doc'.freeze
187
- end
188
- end
189
-
190
- def detect_es_major_version
191
- @_es_info ||= client.info
192
- @_es_info["version"]["number"].to_i
193
- end
194
-
195
- def convert_compat_id_key(key)
196
- if key.include?('.') && !key.start_with?('$[')
197
- key = "$.#{key}" unless key.start_with?('$.')
198
- end
199
- key
200
- end
201
-
202
- def create_meta_config_map
203
- result = []
204
- result << [record_accessor_create(@id_key), '_id'] if @id_key
205
- result << [record_accessor_create(@parent_key), '_parent'] if @parent_key
206
- result << [record_accessor_create(@routing_key), '_routing'] if @routing_key
207
- result
208
- end
209
-
210
- # once fluent v0.14 is released we might be able to use
211
- # Fluent::Parser::TimeParser, but it doesn't quite do what we want - if gives
212
- # [sec,nsec] where as we want something we can call `strftime` on...
213
- def create_time_parser
214
- if @time_key_format
215
- begin
216
- # Strptime doesn't support all formats, but for those it does it's
217
- # blazingly fast.
218
- strptime = Strptime.new(@time_key_format)
219
- Proc.new { |value| strptime.exec(value).to_datetime }
220
- rescue
221
- # Can happen if Strptime doesn't recognize the format; or
222
- # if strptime couldn't be required (because it's not installed -- it's
223
- # ruby 2 only)
224
- Proc.new { |value| DateTime.strptime(value, @time_key_format) }
225
- end
226
- else
227
- Proc.new { |value| DateTime.parse(value) }
228
- end
229
- end
230
-
231
- def parse_time(value, event_time, tag)
232
- @time_parser.call(value)
233
- rescue => e
234
- router.emit_error_event(@time_parse_error_tag, Fluent::Engine.now, {'tag' => tag, 'time' => event_time, 'format' => @time_key_format, 'value' => value}, e)
235
- return Time.at(event_time).to_datetime
236
- end
237
-
238
- def client
239
- @_es ||= begin
240
- excon_options = { client_key: @client_key, client_cert: @client_cert, client_key_pass: @client_key_pass }
241
- adapter_conf = lambda {|f| f.adapter :excon, excon_options }
242
- transport = Elasticsearch::Transport::Transport::HTTP::Faraday.new(get_connection_options.merge(
243
- options: {
244
- reload_connections: @reload_connections,
245
- reload_on_failure: @reload_on_failure,
246
- resurrect_after: @resurrect_after,
247
- retry_on_failure: 5,
248
- logger: @transport_logger,
249
- transport_options: {
250
- headers: { 'Content-Type' => @content_type.to_s },
251
- request: { timeout: @request_timeout },
252
- ssl: { verify: @ssl_verify, ca_file: @ca_file, version: @ssl_version }
253
- },
254
- http: {
255
- user: @user,
256
- password: @password
257
- }
258
- }), &adapter_conf)
259
- es = Elasticsearch::Client.new transport: transport
260
-
261
- begin
262
- raise ConnectionFailure, "Can not reach Elasticsearch cluster (#{connection_options_description})!" unless es.ping
263
- rescue *es.transport.host_unreachable_exceptions => e
264
- raise ConnectionFailure, "Can not reach Elasticsearch cluster (#{connection_options_description})! #{e.message}"
265
- end
266
-
267
- log.info "Connection opened to Elasticsearch cluster => #{connection_options_description}"
268
- es
269
- end
270
- end
271
-
272
- def get_escaped_userinfo(host_str)
273
- if m = host_str.match(/(?<scheme>.*)%{(?<user>.*)}:%{(?<password>.*)}(?<path>@.*)/)
274
- m["scheme"] +
275
- URI.encode_www_form_component(m["user"]) +
276
- ':' +
277
- URI.encode_www_form_component(m["password"]) +
278
- m["path"]
279
- else
280
- host_str
281
- end
282
- end
283
-
284
- def get_connection_options
285
- raise "`password` must be present if `user` is present" if @user && !@password
286
-
287
- hosts = if @hosts
288
- @hosts.split(',').map do |host_str|
289
- # Support legacy hosts format host:port,host:port,host:port...
290
- if host_str.match(%r{^[^:]+(\:\d+)?$})
291
- {
292
- host: host_str.split(':')[0],
293
- port: (host_str.split(':')[1] || @port).to_i,
294
- scheme: @scheme
295
- }
296
- else
297
- # New hosts format expects URLs such as http://logs.foo.com,https://john:pass@logs2.foo.com/elastic
298
- uri = URI(get_escaped_userinfo(host_str))
299
- %w(user password path).inject(host: uri.host, port: uri.port, scheme: uri.scheme) do |hash, key|
300
- hash[key.to_sym] = uri.public_send(key) unless uri.public_send(key).nil? || uri.public_send(key) == ''
301
- hash
302
- end
303
- end
304
- end.compact
305
- else
306
- [{host: @host, port: @port, scheme: @scheme}]
307
- end.each do |host|
308
- host.merge!(user: @user, password: @password) if !host[:user] && @user
309
- host.merge!(path: @path) if !host[:path] && @path
310
- end
311
-
312
- {
313
- hosts: hosts
314
- }
315
- end
316
-
317
- def connection_options_description
318
- get_connection_options[:hosts].map do |host_info|
319
- attributes = host_info.dup
320
- attributes[:password] = 'obfuscated' if attributes.has_key?(:password)
321
- attributes.inspect
322
- end.join(', ')
323
- end
324
-
325
- def append_record_to_messages(op, meta, header, record, msgs)
326
- case op
327
- when UPDATE_OP, UPSERT_OP
328
- if meta.has_key?(ID_FIELD)
329
- header[UPDATE_OP] = meta
330
- msgs << @dump_proc.call(header) << BODY_DELIMITER
331
- msgs << @dump_proc.call(update_body(record, op)) << BODY_DELIMITER
332
- end
333
- when CREATE_OP
334
- if meta.has_key?(ID_FIELD)
335
- header[CREATE_OP] = meta
336
- msgs << @dump_proc.call(header) << BODY_DELIMITER
337
- msgs << @dump_proc.call(record) << BODY_DELIMITER
338
- end
339
- when INDEX_OP
340
- header[INDEX_OP] = meta
341
- msgs << @dump_proc.call(header) << BODY_DELIMITER
342
- msgs << @dump_proc.call(record) << BODY_DELIMITER
343
- end
344
- end
345
-
346
- def update_body(record, op)
347
- update = remove_keys(record)
348
- body = {"doc".freeze => update}
349
- if op == UPSERT_OP
350
- if update == record
351
- body["doc_as_upsert".freeze] = true
352
- else
353
- body[UPSERT_OP] = record
354
- end
355
- end
356
- body
357
- end
358
-
359
- def remove_keys(record)
360
- keys = record[@remove_keys_on_update_key] || @remove_keys_on_update || []
361
- record.delete(@remove_keys_on_update_key)
362
- return record unless keys.any?
363
- record = record.dup
364
- keys.each { |key| record.delete(key) }
365
- record
366
- end
367
-
368
- def flatten_record(record, prefix=[])
369
- ret = {}
370
- if record.is_a? Hash
371
- record.each { |key, value|
372
- ret.merge! flatten_record(value, prefix + [key.to_s])
373
- }
374
- elsif record.is_a? Array
375
- # Don't mess with arrays, leave them unprocessed
376
- ret.merge!({prefix.join(@flatten_hashes_separator) => record})
377
- else
378
- return {prefix.join(@flatten_hashes_separator) => record}
379
- end
380
- ret
381
- end
382
-
383
- def expand_placeholders(metadata)
384
- logstash_prefix = extract_placeholders(@logstash_prefix, metadata)
385
- index_name = extract_placeholders(@index_name, metadata)
386
- type_name = extract_placeholders(@type_name, metadata)
387
- return logstash_prefix, index_name, type_name
388
- end
389
-
390
- def multi_workers_ready?
391
- true
392
- end
393
-
394
- def write(chunk)
395
- bulk_message_count = 0
396
- bulk_message = ''
397
- header = {}
398
- meta = {}
399
-
400
- tag = chunk.metadata.tag
401
- extracted_values = expand_placeholders(chunk.metadata)
402
- @last_seen_major_version = detect_es_major_version rescue DEFAULT_ELASTICSEARCH_VERSION
403
-
404
- chunk.msgpack_each do |time, record|
405
- next unless record.is_a? Hash
406
- begin
407
- process_message(tag, meta, header, time, record, bulk_message, extracted_values)
408
- bulk_message_count += 1
409
- rescue => e
410
- router.emit_error_event(tag, time, record, e)
411
- end
412
- end
413
-
414
- send_bulk(bulk_message, tag, chunk, bulk_message_count, extracted_values) unless bulk_message.empty?
415
- bulk_message.clear
416
- end
417
-
418
- def process_message(tag, meta, header, time, record, bulk_message, extracted_values)
419
- logstash_prefix, index_name, type_name = extracted_values
420
-
421
- if @flatten_hashes
422
- record = flatten_record(record)
423
- end
424
-
425
- if @hash_config
426
- record = generate_hash_id_key(record)
427
- end
428
-
429
- dt = nil
430
- if @logstash_format || @include_timestamp
431
- if record.has_key?(TIMESTAMP_FIELD)
432
- rts = record[TIMESTAMP_FIELD]
433
- dt = parse_time(rts, time, tag)
434
- elsif record.has_key?(@time_key)
435
- rts = record[@time_key]
436
- dt = parse_time(rts, time, tag)
437
- record[TIMESTAMP_FIELD] = dt.iso8601(@time_precision) unless @time_key_exclude_timestamp
438
- else
439
- dt = Time.at(time).to_datetime
440
- record[TIMESTAMP_FIELD] = dt.iso8601(@time_precision)
441
- end
442
- end
443
-
444
- target_index_parent, target_index_child_key = @target_index_key ? get_parent_of(record, @target_index_key) : nil
445
- if target_index_parent && target_index_parent[target_index_child_key]
446
- target_index = target_index_parent.delete(target_index_child_key)
447
- elsif @logstash_format
448
- dt = dt.new_offset(0) if @utc_index
449
- target_index = "#{logstash_prefix}#{@logstash_prefix_separator}#{dt.strftime(@logstash_dateformat)}"
450
- else
451
- target_index = index_name
452
- end
453
-
454
- # Change target_index to lower-case since Elasticsearch doesn't
455
- # allow upper-case characters in index names.
456
- target_index = target_index.downcase
457
- if @include_tag_key
458
- record[@tag_key] = tag
459
- end
460
-
461
- target_type_parent, target_type_child_key = @target_type_key ? get_parent_of(record, @target_type_key) : nil
462
- if target_type_parent && target_type_parent[target_type_child_key]
463
- target_type = target_type_parent.delete(target_type_child_key)
464
- if @last_seen_major_version == 6
465
- log.warn "Detected ES 6.x: `@type_name` will be used as the document `_type`."
466
- target_type = type_name
467
- elsif @last_seen_major_version >= 7
468
- log.warn "Detected ES 7.x or above: `_doc` will be used as the document `_type`."
469
- target_type = '_doc'.freeze
470
- end
471
- else
472
- if @last_seen_major_version >= 7 && target_type != DEFAULT_TYPE_NAME_ES_7x
473
- log.warn "Detected ES 7.x or above: `_doc` will be used as the document `_type`."
474
- target_type = '_doc'.freeze
475
- else
476
- target_type = type_name
477
- end
478
- end
479
-
480
- meta.clear
481
- meta["_index".freeze] = target_index
482
- meta["_type".freeze] = target_type
483
-
484
- if @pipeline
485
- meta["pipeline".freeze] = @pipeline
486
- end
487
-
488
- @meta_config_map.each do |record_accessor, meta_key|
489
- if raw_value = record_accessor.call(record)
490
- meta[meta_key] = raw_value
491
- end
492
- end
493
-
494
- if @remove_keys
495
- @remove_keys.each { |key| record.delete(key) }
496
- end
497
-
498
- append_record_to_messages(@write_operation, meta, header, record, bulk_message)
499
- end
500
-
501
- # returns [parent, child_key] of child described by path array in record's tree
502
- # returns [nil, child_key] if path doesnt exist in record
503
- def get_parent_of(record, path)
504
- parent_object = path[0..-2].reduce(record) { |a, e| a.is_a?(Hash) ? a[e] : nil }
505
- [parent_object, path[-1]]
506
- end
507
-
508
- # send_bulk given a specific bulk request, the original tag,
509
- # chunk, and bulk_message_count
510
- def send_bulk(data, tag, chunk, bulk_message_count, extracted_values)
511
- retries = 0
512
- begin
513
- response = client.bulk body: data
514
- if response['errors']
515
- error = Fluent::Plugin::ElasticsearchErrorHandler.new(self)
516
- error.handle_error(response, tag, chunk, bulk_message_count, extracted_values)
517
- end
518
- rescue RetryStreamError => e
519
- emit_tag = @retry_tag ? @retry_tag : tag
520
- router.emit_stream(emit_tag, e.retry_stream)
521
- rescue *client.transport.host_unreachable_exceptions => e
522
- if retries < 2
523
- retries += 1
524
- @_es = nil
525
- @_es_info = nil
526
- log.warn "Could not push logs to Elasticsearch, resetting connection and trying again. #{e.message}"
527
- sleep 2**retries
528
- retry
529
- end
530
- raise ConnectionFailure, "Could not push logs to Elasticsearch after #{retries} retries. #{e.message}"
531
- rescue Exception
532
- @_es = nil if @reconnect_on_error
533
- @_es_info = nil if @reconnect_on_error
534
- raise
535
- end
536
- end
537
- end
538
- end
1
+ # encoding: UTF-8
2
+ require 'date'
3
+ require 'excon'
4
+ require 'elasticsearch'
5
+ require 'json'
6
+ require 'uri'
7
+ begin
8
+ require 'strptime'
9
+ rescue LoadError
10
+ end
11
+
12
+ require 'fluent/plugin/output'
13
+ require 'fluent/event'
14
+ require 'fluent/error'
15
+ require_relative 'elasticsearch_constants'
16
+ require_relative 'elasticsearch_error_handler'
17
+ require_relative 'elasticsearch_index_template'
18
+
19
+ module Fluent::Plugin
20
+ class ElasticsearchOutput < Output
21
+ class ConnectionFailure < Fluent::UnrecoverableError; end
22
+
23
+ # RetryStreamError privides a stream to be
24
+ # put back in the pipeline for cases where a bulk request
25
+ # failed (e.g some records succeed while others failed)
26
+ class RetryStreamError < StandardError
27
+ attr_reader :retry_stream
28
+ def initialize(retry_stream)
29
+ @retry_stream = retry_stream
30
+ end
31
+ end
32
+
33
+ helpers :event_emitter, :compat_parameters, :record_accessor
34
+
35
+ Fluent::Plugin.register_output('elasticsearch', self)
36
+
37
+ DEFAULT_BUFFER_TYPE = "memory"
38
+ DEFAULT_ELASTICSEARCH_VERSION = 5 # For compatibility.
39
+ DEFAULT_TYPE_NAME_ES_7x = "_doc".freeze
40
+ DEFAULT_TYPE_NAME = "fluentd".freeze
41
+
42
+ config_param :host, :string, :default => 'localhost'
43
+ config_param :port, :integer, :default => 9200
44
+ config_param :user, :string, :default => nil
45
+ config_param :password, :string, :default => nil, :secret => true
46
+ config_param :path, :string, :default => nil
47
+ config_param :scheme, :string, :default => 'http'
48
+ config_param :hosts, :string, :default => nil
49
+ config_param :target_index_key, :string, :default => nil
50
+ config_param :target_type_key, :string, :default => nil,
51
+ :deprecated => <<EOC
52
+ Elasticsearch 7.x or above will ignore this config. Please use fixed type_name instead.
53
+ EOC
54
+ config_param :time_key_format, :string, :default => nil
55
+ config_param :time_precision, :integer, :default => 9
56
+ config_param :include_timestamp, :bool, :default => false
57
+ config_param :logstash_format, :bool, :default => false
58
+ config_param :logstash_prefix, :string, :default => "logstash"
59
+ config_param :logstash_prefix_separator, :string, :default => '-'
60
+ config_param :logstash_dateformat, :string, :default => "%Y.%m.%d"
61
+ config_param :utc_index, :bool, :default => true
62
+ config_param :type_name, :string, :default => DEFAULT_TYPE_NAME
63
+ config_param :index_name, :string, :default => "fluentd"
64
+ config_param :id_key, :string, :default => nil
65
+ config_param :write_operation, :string, :default => "index"
66
+ config_param :parent_key, :string, :default => nil
67
+ config_param :routing_key, :string, :default => nil
68
+ config_param :request_timeout, :time, :default => 5
69
+ config_param :reload_connections, :bool, :default => true
70
+ config_param :reload_on_failure, :bool, :default => false
71
+ config_param :retry_tag, :string, :default=>nil
72
+ config_param :resurrect_after, :time, :default => 60
73
+ config_param :time_key, :string, :default => nil
74
+ config_param :time_key_exclude_timestamp, :bool, :default => false
75
+ config_param :ssl_verify , :bool, :default => true
76
+ config_param :client_key, :string, :default => nil
77
+ config_param :client_cert, :string, :default => nil
78
+ config_param :client_key_pass, :string, :default => nil
79
+ config_param :ca_file, :string, :default => nil
80
+ config_param :ssl_version, :enum, list: [:SSLv23, :TLSv1, :TLSv1_1, :TLSv1_2], :default => :TLSv1
81
+ config_param :remove_keys, :string, :default => nil
82
+ config_param :remove_keys_on_update, :string, :default => ""
83
+ config_param :remove_keys_on_update_key, :string, :default => nil
84
+ config_param :flatten_hashes, :bool, :default => false
85
+ config_param :flatten_hashes_separator, :string, :default => "_"
86
+ config_param :template_name, :string, :default => nil
87
+ config_param :template_file, :string, :default => nil
88
+ config_param :template_overwrite, :bool, :default => false
89
+ config_param :templates, :hash, :default => nil
90
+ config_param :max_retry_putting_template, :integer, :default => 10
91
+ config_param :include_tag_key, :bool, :default => false
92
+ config_param :tag_key, :string, :default => 'tag'
93
+ config_param :time_parse_error_tag, :string, :default => 'Fluent::ElasticsearchOutput::TimeParser.error'
94
+ config_param :reconnect_on_error, :bool, :default => false
95
+ config_param :pipeline, :string, :default => nil
96
+ config_param :with_transporter_log, :bool, :default => false
97
+ config_param :content_type, :enum, list: [:"application/json", :"application/x-ndjson"], :default => :"application/json",
98
+ :deprecated => <<EOC
99
+ elasticsearch gem v6.0.2 starts to use correct Content-Type. Please upgrade elasticserach gem and stop to use this option.
100
+ see: https://github.com/elastic/elasticsearch-ruby/pull/514
101
+ EOC
102
+
103
+ config_section :buffer do
104
+ config_set_default :@type, DEFAULT_BUFFER_TYPE
105
+ config_set_default :chunk_keys, ['tag']
106
+ config_set_default :timekey_use_utc, true
107
+ end
108
+
109
+ include Fluent::ElasticsearchIndexTemplate
110
+ include Fluent::Plugin::ElasticsearchConstants
111
+
112
+ def initialize
113
+ super
114
+ end
115
+
116
+ def configure(conf)
117
+ compat_parameters_convert(conf, :buffer)
118
+
119
+ super
120
+ raise Fluent::ConfigError, "'tag' in chunk_keys is required." if not @chunk_key_tag
121
+
122
+ @time_parser = create_time_parser
123
+
124
+ if @remove_keys
125
+ @remove_keys = @remove_keys.split(/\s*,\s*/)
126
+ end
127
+
128
+ if @target_index_key && @target_index_key.is_a?(String)
129
+ @target_index_key = @target_index_key.split '.'
130
+ end
131
+
132
+ if @target_type_key && @target_type_key.is_a?(String)
133
+ @target_type_key = @target_type_key.split '.'
134
+ end
135
+
136
+ if @remove_keys_on_update && @remove_keys_on_update.is_a?(String)
137
+ @remove_keys_on_update = @remove_keys_on_update.split ','
138
+ end
139
+
140
+ raise Fluent::ConfigError, "'max_retry_putting_template' must be positive number." if @max_retry_putting_template < 0
141
+ if @template_name && @template_file
142
+ retry_install(@max_retry_putting_template) do
143
+ template_install(@template_name, @template_file, @template_overwrite)
144
+ end
145
+ elsif @templates
146
+ retry_install(@max_retry_putting_template) do
147
+ templates_hash_install(@templates, @template_overwrite)
148
+ end
149
+ end
150
+
151
+ # Consider missing the prefix of "$." in nested key specifiers.
152
+ @id_key = convert_compat_id_key(@id_key) if @id_key
153
+ @parent_key = convert_compat_id_key(@parent_key) if @parent_key
154
+ @routing_key = convert_compat_id_key(@routing_key) if @routing_key
155
+
156
+ @meta_config_map = create_meta_config_map
157
+
158
+ begin
159
+ require 'oj'
160
+ @dump_proc = Oj.method(:dump)
161
+ rescue LoadError
162
+ @dump_proc = Yajl.method(:dump)
163
+ end
164
+
165
+ if @user && m = @user.match(/%{(?<user>.*)}/)
166
+ @user = URI.encode_www_form_component(m["user"])
167
+ end
168
+ if @password && m = @password.match(/%{(?<password>.*)}/)
169
+ @password = URI.encode_www_form_component(m["password"])
170
+ end
171
+
172
+ if @hash_config
173
+ raise Fluent::ConfigError, "@hash_config.hash_id_key and id_key must be equal." unless @hash_config.hash_id_key == @id_key
174
+ end
175
+ @transport_logger = nil
176
+ if @with_transporter_log
177
+ @transport_logger = log
178
+ log_level = conf['@log_level'] || conf['log_level']
179
+ log.warn "Consider to specify log_level with @log_level." unless log_level
180
+ end
181
+
182
+ @last_seen_major_version = detect_es_major_version rescue DEFAULT_ELASTICSEARCH_VERSION
183
+ if @last_seen_major_version == 6 && @type_name != DEFAULT_TYPE_NAME_ES_7x
184
+ log.info "Detected ES 6.x: ES 7.x will only accept `_doc` in type_name."
185
+ end
186
+ if @last_seen_major_version >= 7 && @type_name != DEFAULT_TYPE_NAME_ES_7x
187
+ log.warn "Detected ES 7.x or above: `_doc` will be used as the document `_type`."
188
+ @type_name = '_doc'.freeze
189
+ end
190
+ end
191
+
192
+ def detect_es_major_version
193
+ @_es_info ||= client.info
194
+ @_es_info["version"]["number"].to_i
195
+ end
196
+
197
+ def convert_compat_id_key(key)
198
+ if key.include?('.') && !key.start_with?('$[')
199
+ key = "$.#{key}" unless key.start_with?('$.')
200
+ end
201
+ key
202
+ end
203
+
204
+ def create_meta_config_map
205
+ result = []
206
+ result << [record_accessor_create(@id_key), '_id'] if @id_key
207
+ result << [record_accessor_create(@parent_key), '_parent'] if @parent_key
208
+ result << [record_accessor_create(@routing_key), '_routing'] if @routing_key
209
+ result
210
+ end
211
+
212
+ # once fluent v0.14 is released we might be able to use
213
+ # Fluent::Parser::TimeParser, but it doesn't quite do what we want - if gives
214
+ # [sec,nsec] where as we want something we can call `strftime` on...
215
+ def create_time_parser
216
+ if @time_key_format
217
+ begin
218
+ # Strptime doesn't support all formats, but for those it does it's
219
+ # blazingly fast.
220
+ strptime = Strptime.new(@time_key_format)
221
+ Proc.new { |value| strptime.exec(value).to_datetime }
222
+ rescue
223
+ # Can happen if Strptime doesn't recognize the format; or
224
+ # if strptime couldn't be required (because it's not installed -- it's
225
+ # ruby 2 only)
226
+ Proc.new { |value| DateTime.strptime(value, @time_key_format) }
227
+ end
228
+ else
229
+ Proc.new { |value| DateTime.parse(value) }
230
+ end
231
+ end
232
+
233
+ def parse_time(value, event_time, tag)
234
+ @time_parser.call(value)
235
+ rescue => e
236
+ router.emit_error_event(@time_parse_error_tag, Fluent::Engine.now, {'tag' => tag, 'time' => event_time, 'format' => @time_key_format, 'value' => value}, e)
237
+ return Time.at(event_time).to_datetime
238
+ end
239
+
240
+ def client
241
+ @_es ||= begin
242
+ excon_options = { client_key: @client_key, client_cert: @client_cert, client_key_pass: @client_key_pass }
243
+ adapter_conf = lambda {|f| f.adapter :excon, excon_options }
244
+ transport = Elasticsearch::Transport::Transport::HTTP::Faraday.new(get_connection_options.merge(
245
+ options: {
246
+ reload_connections: @reload_connections,
247
+ reload_on_failure: @reload_on_failure,
248
+ resurrect_after: @resurrect_after,
249
+ retry_on_failure: 5,
250
+ logger: @transport_logger,
251
+ transport_options: {
252
+ headers: { 'Content-Type' => @content_type.to_s },
253
+ request: { timeout: @request_timeout },
254
+ ssl: { verify: @ssl_verify, ca_file: @ca_file, version: @ssl_version }
255
+ },
256
+ http: {
257
+ user: @user,
258
+ password: @password
259
+ }
260
+ }), &adapter_conf)
261
+ es = Elasticsearch::Client.new transport: transport
262
+
263
+ begin
264
+ raise ConnectionFailure, "Can not reach Elasticsearch cluster (#{connection_options_description})!" unless es.ping
265
+ rescue *es.transport.host_unreachable_exceptions => e
266
+ raise ConnectionFailure, "Can not reach Elasticsearch cluster (#{connection_options_description})! #{e.message}"
267
+ end
268
+
269
+ log.info "Connection opened to Elasticsearch cluster => #{connection_options_description}"
270
+ es
271
+ end
272
+ end
273
+
274
+ def get_escaped_userinfo(host_str)
275
+ if m = host_str.match(/(?<scheme>.*)%{(?<user>.*)}:%{(?<password>.*)}(?<path>@.*)/)
276
+ m["scheme"] +
277
+ URI.encode_www_form_component(m["user"]) +
278
+ ':' +
279
+ URI.encode_www_form_component(m["password"]) +
280
+ m["path"]
281
+ else
282
+ host_str
283
+ end
284
+ end
285
+
286
+ def get_connection_options
287
+ raise "`password` must be present if `user` is present" if @user && !@password
288
+
289
+ hosts = if @hosts
290
+ @hosts.split(',').map do |host_str|
291
+ # Support legacy hosts format host:port,host:port,host:port...
292
+ if host_str.match(%r{^[^:]+(\:\d+)?$})
293
+ {
294
+ host: host_str.split(':')[0],
295
+ port: (host_str.split(':')[1] || @port).to_i,
296
+ scheme: @scheme
297
+ }
298
+ else
299
+ # New hosts format expects URLs such as http://logs.foo.com,https://john:pass@logs2.foo.com/elastic
300
+ uri = URI(get_escaped_userinfo(host_str))
301
+ %w(user password path).inject(host: uri.host, port: uri.port, scheme: uri.scheme) do |hash, key|
302
+ hash[key.to_sym] = uri.public_send(key) unless uri.public_send(key).nil? || uri.public_send(key) == ''
303
+ hash
304
+ end
305
+ end
306
+ end.compact
307
+ else
308
+ [{host: @host, port: @port, scheme: @scheme}]
309
+ end.each do |host|
310
+ host.merge!(user: @user, password: @password) if !host[:user] && @user
311
+ host.merge!(path: @path) if !host[:path] && @path
312
+ end
313
+
314
+ {
315
+ hosts: hosts
316
+ }
317
+ end
318
+
319
+ def connection_options_description
320
+ get_connection_options[:hosts].map do |host_info|
321
+ attributes = host_info.dup
322
+ attributes[:password] = 'obfuscated' if attributes.has_key?(:password)
323
+ attributes.inspect
324
+ end.join(', ')
325
+ end
326
+
327
+ def append_record_to_messages(op, meta, header, record, msgs)
328
+ case op
329
+ when UPDATE_OP, UPSERT_OP
330
+ if meta.has_key?(ID_FIELD)
331
+ header[UPDATE_OP] = meta
332
+ msgs << @dump_proc.call(header) << BODY_DELIMITER
333
+ msgs << @dump_proc.call(update_body(record, op)) << BODY_DELIMITER
334
+ end
335
+ when CREATE_OP
336
+ if meta.has_key?(ID_FIELD)
337
+ header[CREATE_OP] = meta
338
+ msgs << @dump_proc.call(header) << BODY_DELIMITER
339
+ msgs << @dump_proc.call(record) << BODY_DELIMITER
340
+ end
341
+ when INDEX_OP
342
+ header[INDEX_OP] = meta
343
+ msgs << @dump_proc.call(header) << BODY_DELIMITER
344
+ msgs << @dump_proc.call(record) << BODY_DELIMITER
345
+ end
346
+ end
347
+
348
+ def update_body(record, op)
349
+ update = remove_keys(record)
350
+ body = {"doc".freeze => update}
351
+ if op == UPSERT_OP
352
+ if update == record
353
+ body["doc_as_upsert".freeze] = true
354
+ else
355
+ body[UPSERT_OP] = record
356
+ end
357
+ end
358
+ body
359
+ end
360
+
361
+ def remove_keys(record)
362
+ keys = record[@remove_keys_on_update_key] || @remove_keys_on_update || []
363
+ record.delete(@remove_keys_on_update_key)
364
+ return record unless keys.any?
365
+ record = record.dup
366
+ keys.each { |key| record.delete(key) }
367
+ record
368
+ end
369
+
370
+ def flatten_record(record, prefix=[])
371
+ ret = {}
372
+ if record.is_a? Hash
373
+ record.each { |key, value|
374
+ ret.merge! flatten_record(value, prefix + [key.to_s])
375
+ }
376
+ elsif record.is_a? Array
377
+ # Don't mess with arrays, leave them unprocessed
378
+ ret.merge!({prefix.join(@flatten_hashes_separator) => record})
379
+ else
380
+ return {prefix.join(@flatten_hashes_separator) => record}
381
+ end
382
+ ret
383
+ end
384
+
385
+ def expand_placeholders(metadata)
386
+ logstash_prefix = extract_placeholders(@logstash_prefix, metadata)
387
+ index_name = extract_placeholders(@index_name, metadata)
388
+ type_name = extract_placeholders(@type_name, metadata)
389
+ return logstash_prefix, index_name, type_name
390
+ end
391
+
392
+ def multi_workers_ready?
393
+ true
394
+ end
395
+
396
+ def write(chunk)
397
+ bulk_message_count = 0
398
+ bulk_message = ''
399
+ header = {}
400
+ meta = {}
401
+
402
+ tag = chunk.metadata.tag
403
+ extracted_values = expand_placeholders(chunk.metadata)
404
+ @last_seen_major_version = detect_es_major_version rescue DEFAULT_ELASTICSEARCH_VERSION
405
+
406
+ chunk.msgpack_each do |time, record|
407
+ next unless record.is_a? Hash
408
+ begin
409
+ process_message(tag, meta, header, time, record, bulk_message, extracted_values)
410
+ bulk_message_count += 1
411
+ rescue => e
412
+ router.emit_error_event(tag, time, record, e)
413
+ end
414
+ end
415
+
416
+ send_bulk(bulk_message, tag, chunk, bulk_message_count, extracted_values) unless bulk_message.empty?
417
+ bulk_message.clear
418
+ end
419
+
420
+ def process_message(tag, meta, header, time, record, bulk_message, extracted_values)
421
+ logstash_prefix, index_name, type_name = extracted_values
422
+
423
+ if @flatten_hashes
424
+ record = flatten_record(record)
425
+ end
426
+
427
+ if @hash_config
428
+ record = generate_hash_id_key(record)
429
+ end
430
+
431
+ dt = nil
432
+ if @logstash_format || @include_timestamp
433
+ if record.has_key?(TIMESTAMP_FIELD)
434
+ rts = record[TIMESTAMP_FIELD]
435
+ dt = parse_time(rts, time, tag)
436
+ elsif record.has_key?(@time_key)
437
+ rts = record[@time_key]
438
+ dt = parse_time(rts, time, tag)
439
+ record[TIMESTAMP_FIELD] = dt.iso8601(@time_precision) unless @time_key_exclude_timestamp
440
+ else
441
+ dt = Time.at(time).to_datetime
442
+ record[TIMESTAMP_FIELD] = dt.iso8601(@time_precision)
443
+ end
444
+ end
445
+
446
+ target_index_parent, target_index_child_key = @target_index_key ? get_parent_of(record, @target_index_key) : nil
447
+ if target_index_parent && target_index_parent[target_index_child_key]
448
+ target_index = target_index_parent.delete(target_index_child_key)
449
+ elsif @logstash_format
450
+ dt = dt.new_offset(0) if @utc_index
451
+ target_index = "#{logstash_prefix}#{@logstash_prefix_separator}#{dt.strftime(@logstash_dateformat)}"
452
+ else
453
+ target_index = index_name
454
+ end
455
+
456
+ # Change target_index to lower-case since Elasticsearch doesn't
457
+ # allow upper-case characters in index names.
458
+ target_index = target_index.downcase
459
+ if @include_tag_key
460
+ record[@tag_key] = tag
461
+ end
462
+
463
+ target_type_parent, target_type_child_key = @target_type_key ? get_parent_of(record, @target_type_key) : nil
464
+ if target_type_parent && target_type_parent[target_type_child_key]
465
+ target_type = target_type_parent.delete(target_type_child_key)
466
+ if @last_seen_major_version == 6
467
+ log.warn "Detected ES 6.x: `@type_name` will be used as the document `_type`."
468
+ target_type = type_name
469
+ elsif @last_seen_major_version >= 7
470
+ log.warn "Detected ES 7.x or above: `_doc` will be used as the document `_type`."
471
+ target_type = '_doc'.freeze
472
+ end
473
+ else
474
+ if @last_seen_major_version >= 7 && target_type != DEFAULT_TYPE_NAME_ES_7x
475
+ log.warn "Detected ES 7.x or above: `_doc` will be used as the document `_type`."
476
+ target_type = '_doc'.freeze
477
+ else
478
+ target_type = type_name
479
+ end
480
+ end
481
+
482
+ meta.clear
483
+ meta["_index".freeze] = target_index
484
+ meta["_type".freeze] = target_type
485
+
486
+ if @pipeline
487
+ meta["pipeline".freeze] = @pipeline
488
+ end
489
+
490
+ @meta_config_map.each do |record_accessor, meta_key|
491
+ if raw_value = record_accessor.call(record)
492
+ meta[meta_key] = raw_value
493
+ end
494
+ end
495
+
496
+ if @remove_keys
497
+ @remove_keys.each { |key| record.delete(key) }
498
+ end
499
+
500
+ append_record_to_messages(@write_operation, meta, header, record, bulk_message)
501
+ end
502
+
503
+ # returns [parent, child_key] of child described by path array in record's tree
504
+ # returns [nil, child_key] if path doesnt exist in record
505
+ def get_parent_of(record, path)
506
+ parent_object = path[0..-2].reduce(record) { |a, e| a.is_a?(Hash) ? a[e] : nil }
507
+ [parent_object, path[-1]]
508
+ end
509
+
510
+ # send_bulk given a specific bulk request, the original tag,
511
+ # chunk, and bulk_message_count
512
+ def send_bulk(data, tag, chunk, bulk_message_count, extracted_values)
513
+ retries = 0
514
+ begin
515
+ response = client.bulk body: data
516
+ if response['errors']
517
+ error = Fluent::Plugin::ElasticsearchErrorHandler.new(self)
518
+ error.handle_error(response, tag, chunk, bulk_message_count, extracted_values)
519
+ end
520
+ rescue RetryStreamError => e
521
+ emit_tag = @retry_tag ? @retry_tag : tag
522
+ router.emit_stream(emit_tag, e.retry_stream)
523
+ rescue *client.transport.host_unreachable_exceptions => e
524
+ if retries < 2
525
+ retries += 1
526
+ @_es = nil
527
+ @_es_info = nil
528
+ log.warn "Could not push logs to Elasticsearch, resetting connection and trying again. #{e.message}"
529
+ sleep 2**retries
530
+ retry
531
+ end
532
+ raise ConnectionFailure, "Could not push logs to Elasticsearch after #{retries} retries. #{e.message}"
533
+ rescue Exception
534
+ @_es = nil if @reconnect_on_error
535
+ @_es_info = nil if @reconnect_on_error
536
+ raise
537
+ end
538
+ end
539
+ end
540
+ end