fluent-plugin-elasticsearch 1.9.4 → 5.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. checksums.yaml +5 -5
  2. data/.github/ISSUE_TEMPLATE/bug_report.md +37 -0
  3. data/.github/ISSUE_TEMPLATE/feature_request.md +24 -0
  4. data/.github/workflows/issue-auto-closer.yml +12 -0
  5. data/.github/workflows/linux.yml +26 -0
  6. data/.github/workflows/macos.yml +26 -0
  7. data/.github/workflows/windows.yml +26 -0
  8. data/.travis.yml +33 -6
  9. data/CONTRIBUTING.md +24 -0
  10. data/Gemfile +4 -1
  11. data/History.md +445 -1
  12. data/ISSUE_TEMPLATE.md +19 -0
  13. data/README.ElasticsearchGenID.md +116 -0
  14. data/README.ElasticsearchInput.md +293 -0
  15. data/README.Troubleshooting.md +692 -0
  16. data/README.md +1013 -38
  17. data/appveyor.yml +20 -0
  18. data/fluent-plugin-elasticsearch.gemspec +15 -9
  19. data/{Gemfile.v0.12 → gemfiles/Gemfile.elasticsearch.v6} +6 -5
  20. data/lib/fluent/log-ext.rb +38 -0
  21. data/lib/fluent/plugin/default-ilm-policy.json +14 -0
  22. data/lib/fluent/plugin/elasticsearch_constants.rb +13 -0
  23. data/lib/fluent/plugin/elasticsearch_error.rb +5 -0
  24. data/lib/fluent/plugin/elasticsearch_error_handler.rb +129 -0
  25. data/lib/fluent/plugin/elasticsearch_fallback_selector.rb +9 -0
  26. data/lib/fluent/plugin/elasticsearch_index_lifecycle_management.rb +67 -0
  27. data/lib/fluent/plugin/elasticsearch_index_template.rb +186 -12
  28. data/lib/fluent/plugin/elasticsearch_simple_sniffer.rb +10 -0
  29. data/lib/fluent/plugin/elasticsearch_tls.rb +70 -0
  30. data/lib/fluent/plugin/filter_elasticsearch_genid.rb +77 -0
  31. data/lib/fluent/plugin/in_elasticsearch.rb +325 -0
  32. data/lib/fluent/plugin/oj_serializer.rb +22 -0
  33. data/lib/fluent/plugin/out_elasticsearch.rb +1008 -267
  34. data/lib/fluent/plugin/out_elasticsearch_data_stream.rb +218 -0
  35. data/lib/fluent/plugin/out_elasticsearch_dynamic.rb +232 -214
  36. data/test/plugin/test_alias_template.json +9 -0
  37. data/test/plugin/test_elasticsearch_error_handler.rb +646 -0
  38. data/test/plugin/test_elasticsearch_fallback_selector.rb +74 -0
  39. data/test/plugin/test_elasticsearch_index_lifecycle_management.rb +66 -0
  40. data/test/plugin/test_elasticsearch_tls.rb +145 -0
  41. data/test/plugin/test_filter_elasticsearch_genid.rb +215 -0
  42. data/test/plugin/test_in_elasticsearch.rb +459 -0
  43. data/test/plugin/test_index_alias_template.json +11 -0
  44. data/test/plugin/test_index_template.json +25 -0
  45. data/test/plugin/test_oj_serializer.rb +19 -0
  46. data/test/plugin/test_out_elasticsearch.rb +5029 -387
  47. data/test/plugin/test_out_elasticsearch_data_stream.rb +337 -0
  48. data/test/plugin/test_out_elasticsearch_dynamic.rb +681 -208
  49. data/test/test_log-ext.rb +35 -0
  50. metadata +97 -19
@@ -2,315 +2,930 @@
2
2
  require 'date'
3
3
  require 'excon'
4
4
  require 'elasticsearch'
5
+ begin
6
+ require 'elasticsearch/xpack'
7
+ rescue LoadError
8
+ end
5
9
  require 'json'
6
10
  require 'uri'
11
+ require 'base64'
7
12
  begin
8
13
  require 'strptime'
9
14
  rescue LoadError
10
15
  end
16
+ require 'resolv'
11
17
 
12
- require 'fluent/output'
18
+ require 'fluent/plugin/output'
19
+ require 'fluent/event'
20
+ require 'fluent/error'
21
+ require 'fluent/time'
22
+ require 'fluent/unique_id'
23
+ require 'fluent/log-ext'
24
+ require 'zlib'
25
+ require_relative 'elasticsearch_constants'
26
+ require_relative 'elasticsearch_error'
27
+ require_relative 'elasticsearch_error_handler'
13
28
  require_relative 'elasticsearch_index_template'
29
+ require_relative 'elasticsearch_index_lifecycle_management'
30
+ require_relative 'elasticsearch_tls'
31
+ require_relative 'elasticsearch_fallback_selector'
32
+ begin
33
+ require_relative 'oj_serializer'
34
+ rescue LoadError
35
+ end
14
36
 
15
- class Fluent::ElasticsearchOutput < Fluent::ObjectBufferedOutput
16
- class ConnectionFailure < StandardError; end
17
-
18
- Fluent::Plugin.register_output('elasticsearch', self)
19
-
20
- config_param :host, :string, :default => 'localhost'
21
- config_param :port, :integer, :default => 9200
22
- config_param :user, :string, :default => nil
23
- config_param :password, :string, :default => nil, :secret => true
24
- config_param :path, :string, :default => nil
25
- config_param :scheme, :string, :default => 'http'
26
- config_param :hosts, :string, :default => nil
27
- config_param :target_index_key, :string, :default => nil
28
- config_param :target_type_key, :string, :default => nil
29
- config_param :time_key_format, :string, :default => nil
30
- config_param :logstash_format, :bool, :default => false
31
- config_param :logstash_prefix, :string, :default => "logstash"
32
- config_param :logstash_dateformat, :string, :default => "%Y.%m.%d"
33
- config_param :utc_index, :bool, :default => true
34
- config_param :type_name, :string, :default => "fluentd"
35
- config_param :index_name, :string, :default => "fluentd"
36
- config_param :id_key, :string, :default => nil
37
- config_param :write_operation, :string, :default => "index"
38
- config_param :parent_key, :string, :default => nil
39
- config_param :routing_key, :string, :default => nil
40
- config_param :request_timeout, :time, :default => 5
41
- config_param :reload_connections, :bool, :default => true
42
- config_param :reload_on_failure, :bool, :default => false
43
- config_param :resurrect_after, :time, :default => 60
44
- config_param :time_key, :string, :default => nil
45
- config_param :time_key_exclude_timestamp, :bool, :default => false
46
- config_param :ssl_verify , :bool, :default => true
47
- config_param :client_key, :string, :default => nil
48
- config_param :client_cert, :string, :default => nil
49
- config_param :client_key_pass, :string, :default => nil
50
- config_param :ca_file, :string, :default => nil
51
- config_param :remove_keys, :string, :default => nil
52
- config_param :remove_keys_on_update, :string, :default => ""
53
- config_param :remove_keys_on_update_key, :string, :default => nil
54
- config_param :flatten_hashes, :bool, :default => false
55
- config_param :flatten_hashes_separator, :string, :default => "_"
56
- config_param :template_name, :string, :default => nil
57
- config_param :template_file, :string, :default => nil
58
- config_param :templates, :hash, :default => nil
59
- config_param :include_tag_key, :bool, :default => false
60
- config_param :tag_key, :string, :default => 'tag'
61
- config_param :time_parse_error_tag, :string, :default => 'Fluent::ElasticsearchOutput::TimeParser.error'
62
- config_param :reconnect_on_error, :bool, :default => false
63
-
64
- include Fluent::ElasticsearchIndexTemplate
65
-
66
- def initialize
67
- super
68
- end
37
+ module Fluent::Plugin
38
+ class ElasticsearchOutput < Output
39
+ class RecoverableRequestFailure < StandardError; end
40
+ class UnrecoverableRequestFailure < Fluent::UnrecoverableError; end
41
+ class RetryStreamEmitFailure < StandardError; end
69
42
 
70
- def configure(conf)
71
- super
72
- @time_parser = create_time_parser
43
+ # MissingIdFieldError is raised for records that do not
44
+ # include the field for the unique record identifier
45
+ class MissingIdFieldError < StandardError; end
73
46
 
74
- if @remove_keys
75
- @remove_keys = @remove_keys.split(/\s*,\s*/)
47
+ # RetryStreamError privides a stream to be
48
+ # put back in the pipeline for cases where a bulk request
49
+ # failed (e.g some records succeed while others failed)
50
+ class RetryStreamError < StandardError
51
+ attr_reader :retry_stream
52
+ def initialize(retry_stream)
53
+ @retry_stream = retry_stream
54
+ end
76
55
  end
77
56
 
78
- if @target_index_key && @target_index_key.is_a?(String)
79
- @target_index_key = @target_index_key.split '.'
57
+ RequestInfo = Struct.new(:host, :index, :ilm_index, :ilm_alias)
58
+
59
+ attr_reader :alias_indexes
60
+ attr_reader :template_names
61
+ attr_reader :ssl_version_options
62
+ attr_reader :compressable_connection
63
+ attr_reader :api_key_header
64
+
65
+ helpers :event_emitter, :compat_parameters, :record_accessor, :timer
66
+
67
+ Fluent::Plugin.register_output('elasticsearch', self)
68
+
69
+ DEFAULT_BUFFER_TYPE = "memory"
70
+ DEFAULT_ELASTICSEARCH_VERSION = 5 # For compatibility.
71
+ DEFAULT_TYPE_NAME_ES_7x = "_doc".freeze
72
+ DEFAULT_TYPE_NAME = "fluentd".freeze
73
+ DEFAULT_RELOAD_AFTER = -1
74
+ TARGET_BULK_BYTES = 20 * 1024 * 1024
75
+ DEFAULT_POLICY_ID = "logstash-policy"
76
+
77
+ config_param :host, :string, :default => 'localhost'
78
+ config_param :port, :integer, :default => 9200
79
+ config_param :user, :string, :default => nil
80
+ config_param :password, :string, :default => nil, :secret => true
81
+ config_param :cloud_id, :string, :default => nil
82
+ config_param :cloud_auth, :string, :default => nil
83
+ config_param :path, :string, :default => nil
84
+ config_param :scheme, :enum, :list => [:https, :http], :default => :http
85
+ config_param :hosts, :string, :default => nil
86
+ config_param :target_index_key, :string, :default => nil
87
+ config_param :target_type_key, :string, :default => nil,
88
+ :deprecated => <<EOC
89
+ Elasticsearch 7.x or above will ignore this config. Please use fixed type_name instead.
90
+ EOC
91
+ config_param :time_key_format, :string, :default => nil
92
+ config_param :time_precision, :integer, :default => 9
93
+ config_param :include_timestamp, :bool, :default => false
94
+ config_param :logstash_format, :bool, :default => false
95
+ config_param :logstash_prefix, :string, :default => "logstash"
96
+ config_param :logstash_prefix_separator, :string, :default => '-'
97
+ config_param :logstash_dateformat, :string, :default => "%Y.%m.%d"
98
+ config_param :utc_index, :bool, :default => true
99
+ config_param :type_name, :string, :default => DEFAULT_TYPE_NAME
100
+ config_param :suppress_type_name, :bool, :default => false
101
+ config_param :index_name, :string, :default => "fluentd"
102
+ config_param :id_key, :string, :default => nil
103
+ config_param :write_operation, :string, :default => "index"
104
+ config_param :parent_key, :string, :default => nil
105
+ config_param :routing_key, :string, :default => nil
106
+ config_param :request_timeout, :time, :default => 5
107
+ config_param :reload_connections, :bool, :default => true
108
+ config_param :reload_on_failure, :bool, :default => false
109
+ config_param :retry_tag, :string, :default=>nil
110
+ config_param :resurrect_after, :time, :default => 60
111
+ config_param :time_key, :string, :default => nil
112
+ config_param :time_key_exclude_timestamp, :bool, :default => false
113
+ config_param :ssl_verify , :bool, :default => true
114
+ config_param :client_key, :string, :default => nil
115
+ config_param :client_cert, :string, :default => nil
116
+ config_param :client_key_pass, :string, :default => nil, :secret => true
117
+ config_param :ca_file, :string, :default => nil
118
+ config_param :remove_keys, :string, :default => nil
119
+ config_param :remove_keys_on_update, :string, :default => ""
120
+ config_param :remove_keys_on_update_key, :string, :default => nil
121
+ config_param :flatten_hashes, :bool, :default => false
122
+ config_param :flatten_hashes_separator, :string, :default => "_"
123
+ config_param :template_name, :string, :default => nil
124
+ config_param :template_file, :string, :default => nil
125
+ config_param :template_overwrite, :bool, :default => false
126
+ config_param :customize_template, :hash, :default => nil
127
+ config_param :rollover_index, :string, :default => false
128
+ config_param :index_date_pattern, :string, :default => "now/d"
129
+ config_param :index_separator, :string, :default => "-"
130
+ config_param :deflector_alias, :string, :default => nil
131
+ config_param :index_prefix, :string, :default => "logstash",
132
+ obsoleted: "This parameter shouldn't be used in 4.0.0 or later. Specify ILM target index with using `index_name' w/o `logstash_format' or 'logstash_prefix' w/ `logstash_format' instead."
133
+ config_param :application_name, :string, :default => "default"
134
+ config_param :templates, :hash, :default => nil
135
+ config_param :max_retry_putting_template, :integer, :default => 10
136
+ config_param :fail_on_putting_template_retry_exceed, :bool, :default => true
137
+ config_param :fail_on_detecting_es_version_retry_exceed, :bool, :default => true
138
+ config_param :max_retry_get_es_version, :integer, :default => 15
139
+ config_param :include_tag_key, :bool, :default => false
140
+ config_param :tag_key, :string, :default => 'tag'
141
+ config_param :time_parse_error_tag, :string, :default => 'Fluent::ElasticsearchOutput::TimeParser.error'
142
+ config_param :reconnect_on_error, :bool, :default => false
143
+ config_param :pipeline, :string, :default => nil
144
+ config_param :with_transporter_log, :bool, :default => false
145
+ config_param :emit_error_for_missing_id, :bool, :default => false
146
+ config_param :sniffer_class_name, :string, :default => nil
147
+ config_param :selector_class_name, :string, :default => nil
148
+ config_param :reload_after, :integer, :default => DEFAULT_RELOAD_AFTER
149
+ config_param :content_type, :enum, list: [:"application/json", :"application/x-ndjson"], :default => :"application/json",
150
+ :deprecated => <<EOC
151
+ elasticsearch gem v6.0.2 starts to use correct Content-Type. Please upgrade elasticserach gem and stop to use this option.
152
+ see: https://github.com/elastic/elasticsearch-ruby/pull/514
153
+ EOC
154
+ config_param :include_index_in_url, :bool, :default => false
155
+ config_param :http_backend, :enum, list: [:excon, :typhoeus], :default => :excon
156
+ config_param :http_backend_excon_nonblock, :bool, :default => true
157
+ config_param :validate_client_version, :bool, :default => false
158
+ config_param :prefer_oj_serializer, :bool, :default => false
159
+ config_param :unrecoverable_error_types, :array, :default => ["out_of_memory_error", "es_rejected_execution_exception"]
160
+ config_param :verify_es_version_at_startup, :bool, :default => true
161
+ config_param :default_elasticsearch_version, :integer, :default => DEFAULT_ELASTICSEARCH_VERSION
162
+ config_param :log_es_400_reason, :bool, :default => false
163
+ config_param :custom_headers, :hash, :default => {}
164
+ config_param :api_key, :string, :default => nil, :secret => true
165
+ config_param :suppress_doc_wrap, :bool, :default => false
166
+ config_param :ignore_exceptions, :array, :default => [], value_type: :string, :desc => "Ignorable exception list"
167
+ config_param :exception_backup, :bool, :default => true, :desc => "Chunk backup flag when ignore exception occured"
168
+ config_param :bulk_message_request_threshold, :size, :default => TARGET_BULK_BYTES
169
+ config_param :compression_level, :enum, list: [:no_compression, :best_speed, :best_compression, :default_compression], :default => :no_compression
170
+ config_param :enable_ilm, :bool, :default => false
171
+ config_param :ilm_policy_id, :string, :default => DEFAULT_POLICY_ID
172
+ config_param :ilm_policy, :hash, :default => {}
173
+ config_param :ilm_policies, :hash, :default => {}
174
+ config_param :ilm_policy_overwrite, :bool, :default => false
175
+ config_param :truncate_caches_interval, :time, :default => nil
176
+ config_param :use_legacy_template, :bool, :default => true
177
+ config_param :catch_transport_exception_on_retry, :bool, :default => true
178
+
179
+ config_section :metadata, param_name: :metainfo, multi: false do
180
+ config_param :include_chunk_id, :bool, :default => false
181
+ config_param :chunk_id_key, :string, :default => "chunk_id".freeze
80
182
  end
81
183
 
82
- if @target_type_key && @target_type_key.is_a?(String)
83
- @target_type_key = @target_type_key.split '.'
184
+ config_section :buffer do
185
+ config_set_default :@type, DEFAULT_BUFFER_TYPE
186
+ config_set_default :chunk_keys, ['tag']
187
+ config_set_default :timekey_use_utc, true
84
188
  end
85
189
 
86
- if @remove_keys_on_update && @remove_keys_on_update.is_a?(String)
87
- @remove_keys_on_update = @remove_keys_on_update.split ','
190
+ include Fluent::ElasticsearchIndexTemplate
191
+ include Fluent::Plugin::ElasticsearchConstants
192
+ include Fluent::Plugin::ElasticsearchIndexLifecycleManagement
193
+ include Fluent::Plugin::ElasticsearchTLS
194
+
195
+ def initialize
196
+ super
88
197
  end
89
198
 
90
- if @template_name && @template_file
91
- template_install(@template_name, @template_file)
92
- elsif @templates
93
- templates_hash_install (@templates)
199
+ def configure(conf)
200
+ compat_parameters_convert(conf, :buffer)
201
+
202
+ super
203
+ if placeholder_substitution_needed_for_template?
204
+ # nop.
205
+ elsif not @buffer_config.chunk_keys.include? "tag" and
206
+ not @buffer_config.chunk_keys.include? "_index"
207
+ raise Fluent::ConfigError, "'tag' or '_index' in chunk_keys is required."
208
+ end
209
+ @time_parser = create_time_parser
210
+ @backend_options = backend_options
211
+ @ssl_version_options = set_tls_minmax_version_config(@ssl_version, @ssl_max_version, @ssl_min_version)
212
+
213
+ if @remove_keys
214
+ @remove_keys = @remove_keys.split(/\s*,\s*/)
215
+ end
216
+
217
+ if @target_index_key && @target_index_key.is_a?(String)
218
+ @target_index_key = @target_index_key.split '.'
219
+ end
220
+
221
+ if @target_type_key && @target_type_key.is_a?(String)
222
+ @target_type_key = @target_type_key.split '.'
223
+ end
224
+
225
+ if @remove_keys_on_update && @remove_keys_on_update.is_a?(String)
226
+ @remove_keys_on_update = @remove_keys_on_update.split ','
227
+ end
228
+
229
+ @api_key_header = setup_api_key
230
+
231
+ raise Fluent::ConfigError, "'max_retry_putting_template' must be greater than or equal to zero." if @max_retry_putting_template < 0
232
+ raise Fluent::ConfigError, "'max_retry_get_es_version' must be greater than or equal to zero." if @max_retry_get_es_version < 0
233
+
234
+ # Dump log when using host placeholders and template features at same time.
235
+ valid_host_placeholder = placeholder?(:host_placeholder, @host)
236
+ if valid_host_placeholder && (@template_name && @template_file || @templates)
237
+ if @verify_es_version_at_startup
238
+ raise Fluent::ConfigError, "host placeholder, template installation, and verify Elasticsearch version at startup are exclusive feature at same time. Please specify verify_es_version_at_startup as `false` when host placeholder and template installation are enabled."
239
+ end
240
+ log.info "host placeholder and template installation makes your Elasticsearch cluster a bit slow down(beta)."
241
+ end
242
+
243
+ raise Fluent::ConfigError, "You can't specify ilm_policy and ilm_policies at the same time" unless @ilm_policy.empty? or @ilm_policies.empty?
244
+
245
+ unless @ilm_policy.empty?
246
+ @ilm_policies = { @ilm_policy_id => @ilm_policy }
247
+ end
248
+ @alias_indexes = []
249
+ @template_names = []
250
+ if !dry_run?
251
+ if @template_name && @template_file
252
+ if @enable_ilm
253
+ raise Fluent::ConfigError, "deflector_alias is prohibited to use with enable_ilm at same time." if @deflector_alias
254
+ end
255
+ if @ilm_policy.empty? && @ilm_policy_overwrite
256
+ raise Fluent::ConfigError, "ilm_policy_overwrite requires a non empty ilm_policy."
257
+ end
258
+ if @logstash_format || placeholder_substitution_needed_for_template?
259
+ class << self
260
+ alias_method :template_installation, :template_installation_actual
261
+ end
262
+ else
263
+ template_installation_actual(@deflector_alias ? @deflector_alias : @index_name, @template_name, @customize_template, @application_name, @index_name, @ilm_policy_id)
264
+ end
265
+ verify_ilm_working if @enable_ilm
266
+ end
267
+ if @templates
268
+ retry_operate(@max_retry_putting_template,
269
+ @fail_on_putting_template_retry_exceed,
270
+ @catch_transport_exception_on_retry) do
271
+ templates_hash_install(@templates, @template_overwrite)
272
+ end
273
+ end
274
+ end
275
+
276
+ @truncate_mutex = Mutex.new
277
+ if @truncate_caches_interval
278
+ timer_execute(:out_elasticsearch_truncate_caches, @truncate_caches_interval) do
279
+ log.info('Clean up the indices and template names cache')
280
+
281
+ @truncate_mutex.synchronize {
282
+ @alias_indexes.clear
283
+ @template_names.clear
284
+ }
285
+ end
286
+ end
287
+
288
+ @serializer_class = nil
289
+ begin
290
+ require 'oj'
291
+ @dump_proc = Oj.method(:dump)
292
+ if @prefer_oj_serializer
293
+ @serializer_class = Fluent::Plugin::Serializer::Oj
294
+ Elasticsearch::API.settings[:serializer] = Fluent::Plugin::Serializer::Oj
295
+ end
296
+ rescue LoadError
297
+ @dump_proc = Yajl.method(:dump)
298
+ end
299
+
300
+ raise Fluent::ConfigError, "`cloud_auth` must be present if `cloud_id` is present" if @cloud_id && @cloud_auth.nil?
301
+ raise Fluent::ConfigError, "`password` must be present if `user` is present" if @user && @password.nil?
302
+
303
+ if @cloud_auth
304
+ @user = @cloud_auth.split(':', -1)[0]
305
+ @password = @cloud_auth.split(':', -1)[1]
306
+ end
307
+
308
+ if @user && m = @user.match(/%{(?<user>.*)}/)
309
+ @user = URI.encode_www_form_component(m["user"])
310
+ end
311
+ if @password && m = @password.match(/%{(?<password>.*)}/)
312
+ @password = URI.encode_www_form_component(m["password"])
313
+ end
314
+
315
+ @transport_logger = nil
316
+ if @with_transporter_log
317
+ @transport_logger = log
318
+ log_level = conf['@log_level'] || conf['log_level']
319
+ log.warn "Consider to specify log_level with @log_level." unless log_level
320
+ end
321
+ # Specify @sniffer_class before calling #client.
322
+ # #detect_es_major_version uses #client.
323
+ @sniffer_class = nil
324
+ begin
325
+ @sniffer_class = Object.const_get(@sniffer_class_name) if @sniffer_class_name
326
+ rescue Exception => ex
327
+ raise Fluent::ConfigError, "Could not load sniffer class #{@sniffer_class_name}: #{ex}"
328
+ end
329
+
330
+ @selector_class = nil
331
+ begin
332
+ @selector_class = Object.const_get(@selector_class_name) if @selector_class_name
333
+ rescue Exception => ex
334
+ raise Fluent::ConfigError, "Could not load selector class #{@selector_class_name}: #{ex}"
335
+ end
336
+
337
+ @last_seen_major_version = if major_version = handle_last_seen_es_major_version
338
+ major_version
339
+ else
340
+ @default_elasticsearch_version
341
+ end
342
+ if @suppress_type_name && @last_seen_major_version >= 7
343
+ @type_name = nil
344
+ else
345
+ if @last_seen_major_version == 6 && @type_name != DEFAULT_TYPE_NAME_ES_7x
346
+ log.info "Detected ES 6.x: ES 7.x will only accept `_doc` in type_name."
347
+ end
348
+ if @last_seen_major_version == 7 && @type_name != DEFAULT_TYPE_NAME_ES_7x
349
+ log.warn "Detected ES 7.x: `_doc` will be used as the document `_type`."
350
+ @type_name = '_doc'.freeze
351
+ end
352
+ if @last_seen_major_version >= 8 && @type_name != DEFAULT_TYPE_NAME_ES_7x
353
+ log.debug "Detected ES 8.x or above: This parameter has no effect."
354
+ @type_name = nil
355
+ end
356
+ end
357
+
358
+ if @validate_client_version && !dry_run?
359
+ if @last_seen_major_version != client_library_version.to_i
360
+ raise Fluent::ConfigError, <<-EOC
361
+ Detected ES #{@last_seen_major_version} but you use ES client #{client_library_version}.
362
+ Please consider to use #{@last_seen_major_version}.x series ES client.
363
+ EOC
364
+ end
365
+ end
366
+
367
+ if @last_seen_major_version >= 6
368
+ case @ssl_version
369
+ when :SSLv23, :TLSv1, :TLSv1_1
370
+ if @scheme == :https
371
+ log.warn "Detected ES 6.x or above and enabled insecure security:
372
+ You might have to specify `ssl_version TLSv1_2` in configuration."
373
+ end
374
+ end
375
+ end
376
+
377
+ if @ssl_version && @scheme == :https
378
+ if !@http_backend_excon_nonblock
379
+ log.warn "TLS handshake will be stucked with block connection.
380
+ Consider to set `http_backend_excon_nonblock` as true"
381
+ end
382
+ end
383
+
384
+ # Consider missing the prefix of "$." in nested key specifiers.
385
+ @id_key = convert_compat_id_key(@id_key) if @id_key
386
+ @parent_key = convert_compat_id_key(@parent_key) if @parent_key
387
+ @routing_key = convert_compat_id_key(@routing_key) if @routing_key
388
+
389
+ @routing_key_name = configure_routing_key_name
390
+ @meta_config_map = create_meta_config_map
391
+ @current_config = nil
392
+ @compressable_connection = false
393
+
394
+ @ignore_exception_classes = @ignore_exceptions.map do |exception|
395
+ unless Object.const_defined?(exception)
396
+ log.warn "Cannot find class #{exception}. Will ignore it."
397
+
398
+ nil
399
+ else
400
+ Object.const_get(exception)
401
+ end
402
+ end.compact
403
+
404
+ if @bulk_message_request_threshold < 0
405
+ class << self
406
+ alias_method :split_request?, :split_request_size_uncheck?
407
+ end
408
+ else
409
+ class << self
410
+ alias_method :split_request?, :split_request_size_check?
411
+ end
412
+ end
413
+
414
+ if Gem::Version.create(::Elasticsearch::Transport::VERSION) < Gem::Version.create("7.2.0")
415
+ if compression
416
+ raise Fluent::ConfigError, <<-EOC
417
+ Cannot use compression with elasticsearch-transport plugin version < 7.2.0
418
+ Your elasticsearch-transport plugin version version is #{Elasticsearch::Transport::VERSION}.
419
+ Please consider to upgrade ES client.
420
+ EOC
421
+ end
422
+ end
94
423
  end
95
424
 
96
- @meta_config_map = create_meta_config_map
425
+ def setup_api_key
426
+ return {} unless @api_key
97
427
 
98
- begin
99
- require 'oj'
100
- @dump_proc = Oj.method(:dump)
101
- rescue LoadError
102
- @dump_proc = Yajl.method(:dump)
428
+ { "Authorization" => "ApiKey " + Base64.strict_encode64(@api_key) }
103
429
  end
104
- end
105
430
 
106
- def create_meta_config_map
107
- result = []
108
- result << [@id_key, '_id'] if @id_key
109
- result << [@parent_key, '_parent'] if @parent_key
110
- result << [@routing_key, '_routing'] if @routing_key
111
- result
112
- end
431
+ def dry_run?
432
+ if Fluent::Engine.respond_to?(:dry_run_mode)
433
+ Fluent::Engine.dry_run_mode
434
+ elsif Fluent::Engine.respond_to?(:supervisor_mode)
435
+ Fluent::Engine.supervisor_mode
436
+ end
437
+ end
113
438
 
114
- # once fluent v0.14 is released we might be able to use
115
- # Fluent::Parser::TimeParser, but it doesn't quite do what we want - if gives
116
- # [sec,nsec] where as we want something we can call `strftime` on...
117
- def create_time_parser
118
- if @time_key_format
119
- begin
120
- # Strptime doesn't support all formats, but for those it does it's
121
- # blazingly fast.
122
- strptime = Strptime.new(@time_key_format)
123
- Proc.new { |value| strptime.exec(value).to_datetime }
124
- rescue
125
- # Can happen if Strptime doesn't recognize the format; or
126
- # if strptime couldn't be required (because it's not installed -- it's
127
- # ruby 2 only)
128
- Proc.new { |value| DateTime.strptime(value, @time_key_format) }
129
- end
130
- else
131
- Proc.new { |value| DateTime.parse(value) }
439
+ def placeholder?(name, param)
440
+ placeholder_validities = []
441
+ placeholder_validators(name, param).each do |v|
442
+ begin
443
+ v.validate!
444
+ placeholder_validities << true
445
+ rescue Fluent::ConfigError => e
446
+ log.debug("'#{name} #{param}' is tested built-in placeholder(s) but there is no valid placeholder(s). error: #{e}")
447
+ placeholder_validities << false
448
+ end
449
+ end
450
+ placeholder_validities.include?(true)
132
451
  end
133
- end
134
452
 
135
- def parse_time(value, event_time, tag)
136
- @time_parser.call(value)
137
- rescue => e
138
- router.emit_error_event(@time_parse_error_tag, Fluent::Engine.now, {'tag' => tag, 'time' => event_time, 'format' => @time_key_format, 'value' => value}, e)
139
- return Time.at(event_time).to_datetime
140
- end
453
+ def compression
454
+ !(@compression_level == :no_compression)
455
+ end
141
456
 
142
- def client
143
- @_es ||= begin
144
- excon_options = { client_key: @client_key, client_cert: @client_cert, client_key_pass: @client_key_pass }
145
- adapter_conf = lambda {|f| f.adapter :excon, excon_options }
146
- transport = Elasticsearch::Transport::Transport::HTTP::Faraday.new(get_connection_options.merge(
147
- options: {
148
- reload_connections: @reload_connections,
149
- reload_on_failure: @reload_on_failure,
150
- resurrect_after: @resurrect_after,
151
- retry_on_failure: 5,
152
- transport_options: {
153
- headers: { 'Content-Type' => 'application/json' },
154
- request: { timeout: @request_timeout },
155
- ssl: { verify: @ssl_verify, ca_file: @ca_file }
156
- }
157
- }), &adapter_conf)
158
- es = Elasticsearch::Client.new transport: transport
457
+ def compression_strategy
458
+ case @compression_level
459
+ when :default_compression
460
+ Zlib::DEFAULT_COMPRESSION
461
+ when :best_compression
462
+ Zlib::BEST_COMPRESSION
463
+ when :best_speed
464
+ Zlib::BEST_SPEED
465
+ else
466
+ Zlib::NO_COMPRESSION
467
+ end
468
+ end
159
469
 
470
+ def backend_options
471
+ case @http_backend
472
+ when :excon
473
+ { client_key: @client_key, client_cert: @client_cert, client_key_pass: @client_key_pass, nonblock: @http_backend_excon_nonblock }
474
+ when :typhoeus
475
+ require 'typhoeus'
476
+ { sslkey: @client_key, sslcert: @client_cert, keypasswd: @client_key_pass }
477
+ end
478
+ rescue LoadError => ex
479
+ log.error_backtrace(ex.backtrace)
480
+ raise Fluent::ConfigError, "You must install #{@http_backend} gem. Exception: #{ex}"
481
+ end
482
+
483
+ def handle_last_seen_es_major_version
484
+ if @verify_es_version_at_startup && !dry_run?
485
+ retry_operate(@max_retry_get_es_version,
486
+ @fail_on_detecting_es_version_retry_exceed,
487
+ @catch_transport_exception_on_retry) do
488
+ detect_es_major_version
489
+ end
490
+ else
491
+ nil
492
+ end
493
+ end
494
+
495
+ def detect_es_major_version
496
+ @_es_info ||= client.info
160
497
  begin
161
- raise ConnectionFailure, "Can not reach Elasticsearch cluster (#{connection_options_description})!" unless es.ping
162
- rescue *es.transport.host_unreachable_exceptions => e
163
- raise ConnectionFailure, "Can not reach Elasticsearch cluster (#{connection_options_description})! #{e.message}"
498
+ unless version = @_es_info.dig("version", "number")
499
+ version = @default_elasticsearch_version
500
+ end
501
+ rescue NoMethodError => e
502
+ log.warn "#{@_es_info} can not dig version information. Assuming Elasticsearch #{@default_elasticsearch_version}", error: e
503
+ version = @default_elasticsearch_version
164
504
  end
505
+ version.to_i
506
+ end
165
507
 
166
- log.info "Connection opened to Elasticsearch cluster => #{connection_options_description}"
167
- es
508
+ def client_library_version
509
+ Elasticsearch::VERSION
168
510
  end
169
- end
170
511
 
171
- def get_connection_options
172
- raise "`password` must be present if `user` is present" if @user && !@password
173
-
174
- hosts = if @hosts
175
- @hosts.split(',').map do |host_str|
176
- # Support legacy hosts format host:port,host:port,host:port...
177
- if host_str.match(%r{^[^:]+(\:\d+)?$})
178
- {
179
- host: host_str.split(':')[0],
180
- port: (host_str.split(':')[1] || @port).to_i,
181
- scheme: @scheme
512
+ def configure_routing_key_name
513
+ if @last_seen_major_version >= 7
514
+ 'routing'
515
+ else
516
+ '_routing'
517
+ end
518
+ end
519
+
520
+ def convert_compat_id_key(key)
521
+ if key.include?('.') && !key.start_with?('$[')
522
+ key = "$.#{key}" unless key.start_with?('$.')
523
+ end
524
+ key
525
+ end
526
+
527
+ def create_meta_config_map
528
+ result = []
529
+ result << [record_accessor_create(@id_key), '_id'] if @id_key
530
+ result << [record_accessor_create(@parent_key), '_parent'] if @parent_key
531
+ result << [record_accessor_create(@routing_key), @routing_key_name] if @routing_key
532
+ result
533
+ end
534
+
535
+ # once fluent v0.14 is released we might be able to use
536
+ # Fluent::Parser::TimeParser, but it doesn't quite do what we want - if gives
537
+ # [sec,nsec] where as we want something we can call `strftime` on...
538
+ def create_time_parser
539
+ if @time_key_format
540
+ begin
541
+ # Strptime doesn't support all formats, but for those it does it's
542
+ # blazingly fast.
543
+ strptime = Strptime.new(@time_key_format)
544
+ Proc.new { |value|
545
+ value = convert_numeric_time_into_string(value, @time_key_format) if value.is_a?(Numeric)
546
+ strptime.exec(value).to_datetime
547
+ }
548
+ rescue
549
+ # Can happen if Strptime doesn't recognize the format; or
550
+ # if strptime couldn't be required (because it's not installed -- it's
551
+ # ruby 2 only)
552
+ Proc.new { |value|
553
+ value = convert_numeric_time_into_string(value, @time_key_format) if value.is_a?(Numeric)
554
+ DateTime.strptime(value, @time_key_format)
182
555
  }
183
- else
184
- # New hosts format expects URLs such as http://logs.foo.com,https://john:pass@logs2.foo.com/elastic
185
- uri = URI(host_str)
186
- %w(user password path).inject(host: uri.host, port: uri.port, scheme: uri.scheme) do |hash, key|
187
- hash[key.to_sym] = uri.public_send(key) unless uri.public_send(key).nil? || uri.public_send(key) == ''
188
- hash
189
- end
190
556
  end
191
- end.compact
192
- else
193
- [{host: @host, port: @port, scheme: @scheme}]
194
- end.each do |host|
195
- host.merge!(user: @user, password: @password) if !host[:user] && @user
196
- host.merge!(path: @path) if !host[:path] && @path
557
+ else
558
+ Proc.new { |value|
559
+ value = convert_numeric_time_into_string(value) if value.is_a?(Numeric)
560
+ DateTime.parse(value)
561
+ }
562
+ end
197
563
  end
198
564
 
199
- {
200
- hosts: hosts
201
- }
202
- end
565
+ def convert_numeric_time_into_string(numeric_time, time_key_format = "%Y-%m-%d %H:%M:%S.%N%z")
566
+ numeric_time_parser = Fluent::NumericTimeParser.new(:float)
567
+ Time.at(numeric_time_parser.parse(numeric_time).to_r).strftime(time_key_format)
568
+ end
203
569
 
204
- def connection_options_description
205
- get_connection_options[:hosts].map do |host_info|
206
- attributes = host_info.dup
207
- attributes[:password] = 'obfuscated' if attributes.has_key?(:password)
208
- attributes.inspect
209
- end.join(', ')
210
- end
570
+ def parse_time(value, event_time, tag)
571
+ @time_parser.call(value)
572
+ rescue => e
573
+ router.emit_error_event(@time_parse_error_tag, Fluent::Engine.now, {'tag' => tag, 'time' => event_time, 'format' => @time_key_format, 'value' => value}, e)
574
+ return Time.at(event_time).to_datetime
575
+ end
211
576
 
212
- BODY_DELIMITER = "\n".freeze
213
- UPDATE_OP = "update".freeze
214
- UPSERT_OP = "upsert".freeze
215
- CREATE_OP = "create".freeze
216
- INDEX_OP = "index".freeze
217
- ID_FIELD = "_id".freeze
218
- TIMESTAMP_FIELD = "@timestamp".freeze
219
-
220
- def append_record_to_messages(op, meta, header, record, msgs)
221
- case op
222
- when UPDATE_OP, UPSERT_OP
223
- if meta.has_key?(ID_FIELD)
224
- header[UPDATE_OP] = meta
225
- msgs << @dump_proc.call(header) << BODY_DELIMITER
226
- msgs << @dump_proc.call(update_body(record, op)) << BODY_DELIMITER
577
+ def cloud_client
578
+ Elasticsearch::Client.new(
579
+ cloud_id: @cloud_id,
580
+ user: @user,
581
+ password: @password
582
+ )
583
+ end
584
+
585
+ def client(host = nil, compress_connection = false)
586
+ return cloud_client if @cloud_id
587
+
588
+ # check here to see if we already have a client connection for the given host
589
+ connection_options = get_connection_options(host)
590
+
591
+ @_es = nil unless is_existing_connection(connection_options[:hosts])
592
+ @_es = nil unless @compressable_connection == compress_connection
593
+
594
+ @_es ||= begin
595
+ @compressable_connection = compress_connection
596
+ @current_config = connection_options[:hosts].clone
597
+ adapter_conf = lambda {|f| f.adapter @http_backend, @backend_options }
598
+ local_reload_connections = @reload_connections
599
+ if local_reload_connections && @reload_after > DEFAULT_RELOAD_AFTER
600
+ local_reload_connections = @reload_after
601
+ end
602
+
603
+ gzip_headers = if compress_connection
604
+ {'Content-Encoding' => 'gzip'}
605
+ else
606
+ {}
607
+ end
608
+ headers = { 'Content-Type' => @content_type.to_s }
609
+ .merge(@custom_headers)
610
+ .merge(@api_key_header)
611
+ .merge(gzip_headers)
612
+ ssl_options = { verify: @ssl_verify, ca_file: @ca_file}.merge(@ssl_version_options)
613
+
614
+ transport = Elasticsearch::Transport::Transport::HTTP::Faraday.new(connection_options.merge(
615
+ options: {
616
+ reload_connections: local_reload_connections,
617
+ reload_on_failure: @reload_on_failure,
618
+ resurrect_after: @resurrect_after,
619
+ logger: @transport_logger,
620
+ transport_options: {
621
+ headers: headers,
622
+ request: { timeout: @request_timeout },
623
+ ssl: ssl_options,
624
+ },
625
+ http: {
626
+ user: @user,
627
+ password: @password,
628
+ scheme: @scheme
629
+ },
630
+ sniffer_class: @sniffer_class,
631
+ serializer_class: @serializer_class,
632
+ selector_class: @selector_class,
633
+ compression: compress_connection,
634
+ }), &adapter_conf)
635
+ Elasticsearch::Client.new transport: transport
636
+ end
637
+ end
638
+
639
+ def get_escaped_userinfo(host_str)
640
+ if m = host_str.match(/(?<scheme>.*)%{(?<user>.*)}:%{(?<password>.*)}(?<path>@.*)/)
641
+ m["scheme"] +
642
+ URI.encode_www_form_component(m["user"]) +
643
+ ':' +
644
+ URI.encode_www_form_component(m["password"]) +
645
+ m["path"]
646
+ else
647
+ host_str
648
+ end
649
+ end
650
+
651
+ def get_connection_options(con_host=nil)
652
+
653
+ hosts = if con_host || @hosts
654
+ (con_host || @hosts).split(',').map do |host_str|
655
+ # Support legacy hosts format host:port,host:port,host:port...
656
+ if host_str.match(%r{^[^:]+(\:\d+)?$})
657
+ {
658
+ host: host_str.split(':')[0],
659
+ port: (host_str.split(':')[1] || @port).to_i,
660
+ scheme: @scheme.to_s
661
+ }
662
+ else
663
+ # New hosts format expects URLs such as http://logs.foo.com,https://john:pass@logs2.foo.com/elastic
664
+ uri = URI(get_escaped_userinfo(host_str))
665
+ %w(user password path).inject(host: uri.host, port: uri.port, scheme: uri.scheme) do |hash, key|
666
+ hash[key.to_sym] = uri.public_send(key) unless uri.public_send(key).nil? || uri.public_send(key) == ''
667
+ hash
668
+ end
669
+ end
670
+ end.compact
671
+ else
672
+ if Resolv::IPv6::Regex.match(@host)
673
+ [{host: "[#{@host}]", scheme: @scheme.to_s, port: @port}]
674
+ else
675
+ [{host: @host, port: @port, scheme: @scheme.to_s}]
676
+ end
677
+ end.each do |host|
678
+ host.merge!(user: @user, password: @password) if !host[:user] && @user
679
+ host.merge!(path: @path) if !host[:path] && @path
227
680
  end
228
- when CREATE_OP
229
- if meta.has_key?(ID_FIELD)
230
- header[CREATE_OP] = meta
681
+
682
+ {
683
+ hosts: hosts
684
+ }
685
+ end
686
+
687
+ def connection_options_description(con_host=nil)
688
+ get_connection_options(con_host)[:hosts].map do |host_info|
689
+ attributes = host_info.dup
690
+ attributes[:password] = 'obfuscated' if attributes.has_key?(:password)
691
+ attributes.inspect
692
+ end.join(', ')
693
+ end
694
+
695
+ # append_record_to_messages adds a record to the bulk message
696
+ # payload to be submitted to Elasticsearch. Records that do
697
+ # not include '_id' field are skipped when 'write_operation'
698
+ # is configured for 'create' or 'update'
699
+ #
700
+ # returns 'true' if record was appended to the bulk message
701
+ # and 'false' otherwise
702
+ def append_record_to_messages(op, meta, header, record, msgs)
703
+ case op
704
+ when UPDATE_OP, UPSERT_OP
705
+ if meta.has_key?(ID_FIELD)
706
+ header[UPDATE_OP] = meta
707
+ msgs << @dump_proc.call(header) << BODY_DELIMITER
708
+ msgs << @dump_proc.call(update_body(record, op)) << BODY_DELIMITER
709
+ return true
710
+ end
711
+ when CREATE_OP
712
+ if meta.has_key?(ID_FIELD)
713
+ header[CREATE_OP] = meta
714
+ msgs << @dump_proc.call(header) << BODY_DELIMITER
715
+ msgs << @dump_proc.call(record) << BODY_DELIMITER
716
+ return true
717
+ end
718
+ when INDEX_OP
719
+ header[INDEX_OP] = meta
231
720
  msgs << @dump_proc.call(header) << BODY_DELIMITER
232
721
  msgs << @dump_proc.call(record) << BODY_DELIMITER
722
+ return true
233
723
  end
234
- when INDEX_OP
235
- header[INDEX_OP] = meta
236
- msgs << @dump_proc.call(header) << BODY_DELIMITER
237
- msgs << @dump_proc.call(record) << BODY_DELIMITER
724
+ return false
725
+ end
726
+
727
+ def update_body(record, op)
728
+ update = remove_keys(record)
729
+ if @suppress_doc_wrap
730
+ return update
731
+ end
732
+ body = {"doc".freeze => update}
733
+ if op == UPSERT_OP
734
+ if update == record
735
+ body["doc_as_upsert".freeze] = true
736
+ else
737
+ body[UPSERT_OP] = record
738
+ end
739
+ end
740
+ body
238
741
  end
239
- end
240
742
 
241
- def update_body(record, op)
242
- update = remove_keys(record)
243
- body = {"doc".freeze => update}
244
- if op == UPSERT_OP
245
- if update == record
246
- body["doc_as_upsert".freeze] = true
743
+ def remove_keys(record)
744
+ keys = record[@remove_keys_on_update_key] || @remove_keys_on_update || []
745
+ record.delete(@remove_keys_on_update_key)
746
+ return record unless keys.any?
747
+ record = record.dup
748
+ keys.each { |key| record.delete(key) }
749
+ record
750
+ end
751
+
752
+ def flatten_record(record, prefix=[])
753
+ ret = {}
754
+ if record.is_a? Hash
755
+ record.each { |key, value|
756
+ ret.merge! flatten_record(value, prefix + [key.to_s])
757
+ }
758
+ elsif record.is_a? Array
759
+ # Don't mess with arrays, leave them unprocessed
760
+ ret.merge!({prefix.join(@flatten_hashes_separator) => record})
247
761
  else
248
- body[UPSERT_OP] = record
762
+ return {prefix.join(@flatten_hashes_separator) => record}
249
763
  end
764
+ ret
250
765
  end
251
- body
252
- end
253
766
 
254
- def remove_keys(record)
255
- keys = record[@remove_keys_on_update_key] || @remove_keys_on_update || []
256
- record.delete(@remove_keys_on_update_key)
257
- return record unless keys.any?
258
- record = record.dup
259
- keys.each { |key| record.delete(key) }
260
- record
261
- end
767
+ def expand_placeholders(chunk)
768
+ logstash_prefix = extract_placeholders(@logstash_prefix, chunk)
769
+ logstash_dateformat = extract_placeholders(@logstash_dateformat, chunk)
770
+ index_name = extract_placeholders(@index_name, chunk)
771
+ if @type_name
772
+ type_name = extract_placeholders(@type_name, chunk)
773
+ else
774
+ type_name = nil
775
+ end
776
+ if @template_name
777
+ template_name = extract_placeholders(@template_name, chunk)
778
+ else
779
+ template_name = nil
780
+ end
781
+ if @customize_template
782
+ customize_template = @customize_template.each_with_object({}) { |(key, value), hash| hash[key] = extract_placeholders(value, chunk) }
783
+ else
784
+ customize_template = nil
785
+ end
786
+ if @deflector_alias
787
+ deflector_alias = extract_placeholders(@deflector_alias, chunk)
788
+ else
789
+ deflector_alias = nil
790
+ end
791
+ if @application_name
792
+ application_name = extract_placeholders(@application_name, chunk)
793
+ else
794
+ application_name = nil
795
+ end
796
+ if @pipeline
797
+ pipeline = extract_placeholders(@pipeline, chunk)
798
+ else
799
+ pipeline = nil
800
+ end
801
+ if @ilm_policy_id
802
+ ilm_policy_id = extract_placeholders(@ilm_policy_id, chunk)
803
+ else
804
+ ilm_policy_id = nil
805
+ end
806
+ return logstash_prefix, logstash_dateformat, index_name, type_name, template_name, customize_template, deflector_alias, application_name, pipeline, ilm_policy_id
807
+ end
262
808
 
263
- def flatten_record(record, prefix=[])
264
- ret = {}
265
- if record.is_a? Hash
266
- record.each { |key, value|
267
- ret.merge! flatten_record(value, prefix + [key.to_s])
268
- }
269
- elsif record.is_a? Array
270
- # Don't mess with arrays, leave them unprocessed
271
- ret.merge!({prefix.join(@flatten_hashes_separator) => record})
272
- else
273
- return {prefix.join(@flatten_hashes_separator) => record}
809
+ def multi_workers_ready?
810
+ true
274
811
  end
275
- ret
276
- end
277
812
 
278
- def write_objects(tag, chunk)
279
- bulk_message = ''
280
- header = {}
281
- meta = {}
813
+ def inject_chunk_id_to_record_if_needed(record, chunk_id)
814
+ if @metainfo&.include_chunk_id
815
+ record[@metainfo.chunk_id_key] = chunk_id
816
+ record
817
+ else
818
+ record
819
+ end
820
+ end
821
+
822
+ def write(chunk)
823
+ bulk_message_count = Hash.new { |h,k| h[k] = 0 }
824
+ bulk_message = Hash.new { |h,k| h[k] = '' }
825
+ header = {}
826
+ meta = {}
827
+
828
+ tag = chunk.metadata.tag
829
+ chunk_id = dump_unique_id_hex(chunk.unique_id)
830
+ extracted_values = expand_placeholders(chunk)
831
+ host = if @hosts
832
+ extract_placeholders(@hosts, chunk)
833
+ else
834
+ extract_placeholders(@host, chunk)
835
+ end
282
836
 
283
- chunk.msgpack_each do |time, record|
284
- next unless record.is_a? Hash
837
+ chunk.msgpack_each do |time, record|
838
+ next unless record.is_a? Hash
839
+
840
+ record = inject_chunk_id_to_record_if_needed(record, chunk_id)
841
+
842
+ begin
843
+ meta, header, record = process_message(tag, meta, header, time, record, extracted_values)
844
+ info = if @include_index_in_url
845
+ RequestInfo.new(host, meta.delete("_index".freeze), meta["_index".freeze], meta.delete("_alias".freeze))
846
+ else
847
+ RequestInfo.new(host, nil, meta["_index".freeze], meta.delete("_alias".freeze))
848
+ end
849
+
850
+ if split_request?(bulk_message, info)
851
+ bulk_message.each do |info, msgs|
852
+ send_bulk(msgs, tag, chunk, bulk_message_count[info], extracted_values, info) unless msgs.empty?
853
+ msgs.clear
854
+ # Clear bulk_message_count for this info.
855
+ bulk_message_count[info] = 0;
856
+ next
857
+ end
858
+ end
859
+
860
+ if append_record_to_messages(@write_operation, meta, header, record, bulk_message[info])
861
+ bulk_message_count[info] += 1;
862
+ else
863
+ if @emit_error_for_missing_id
864
+ raise MissingIdFieldError, "Missing '_id' field. Write operation is #{@write_operation}"
865
+ else
866
+ log.on_debug { log.debug("Dropping record because its missing an '_id' field and write_operation is #{@write_operation}: #{record}") }
867
+ end
868
+ end
869
+ rescue => e
870
+ router.emit_error_event(tag, time, record, e)
871
+ end
872
+ end
873
+
874
+ bulk_message.each do |info, msgs|
875
+ send_bulk(msgs, tag, chunk, bulk_message_count[info], extracted_values, info) unless msgs.empty?
876
+ msgs.clear
877
+ end
878
+ end
879
+
880
+ def split_request?(bulk_message, info)
881
+ # For safety.
882
+ end
883
+
884
+ def split_request_size_check?(bulk_message, info)
885
+ bulk_message[info].size > @bulk_message_request_threshold
886
+ end
887
+
888
+ def split_request_size_uncheck?(bulk_message, info)
889
+ false
890
+ end
891
+
892
+ def process_message(tag, meta, header, time, record, extracted_values)
893
+ logstash_prefix, logstash_dateformat, index_name, type_name, _template_name, _customize_template, _deflector_alias, application_name, pipeline, _ilm_policy_id = extracted_values
285
894
 
286
895
  if @flatten_hashes
287
896
  record = flatten_record(record)
288
897
  end
289
898
 
290
- target_index_parent, target_index_child_key = @target_index_key ? get_parent_of(record, @target_index_key) : nil
291
- if target_index_parent && target_index_parent[target_index_child_key]
292
- target_index = target_index_parent.delete(target_index_child_key)
293
- elsif @logstash_format
899
+ dt = nil
900
+ if @logstash_format || @include_timestamp
294
901
  if record.has_key?(TIMESTAMP_FIELD)
295
902
  rts = record[TIMESTAMP_FIELD]
296
903
  dt = parse_time(rts, time, tag)
297
904
  elsif record.has_key?(@time_key)
298
905
  rts = record[@time_key]
299
906
  dt = parse_time(rts, time, tag)
300
- record[TIMESTAMP_FIELD] = rts unless @time_key_exclude_timestamp
907
+ record[TIMESTAMP_FIELD] = dt.iso8601(@time_precision) unless @time_key_exclude_timestamp
301
908
  else
302
909
  dt = Time.at(time).to_datetime
303
- record[TIMESTAMP_FIELD] = dt.to_s
910
+ record[TIMESTAMP_FIELD] = dt.iso8601(@time_precision)
304
911
  end
912
+ end
913
+
914
+ target_index_parent, target_index_child_key = @target_index_key ? get_parent_of(record, @target_index_key) : nil
915
+ if target_index_parent && target_index_parent[target_index_child_key]
916
+ target_index_alias = target_index = target_index_parent.delete(target_index_child_key)
917
+ elsif @logstash_format
305
918
  dt = dt.new_offset(0) if @utc_index
306
- target_index = "#{@logstash_prefix}-#{dt.strftime(@logstash_dateformat)}"
919
+ target_index = "#{logstash_prefix}#{@logstash_prefix_separator}#{dt.strftime(logstash_dateformat)}"
920
+ target_index_alias = "#{logstash_prefix}#{@logstash_prefix_separator}#{application_name}#{@logstash_prefix_separator}#{dt.strftime(logstash_dateformat)}"
307
921
  else
308
- target_index = @index_name
922
+ target_index_alias = target_index = index_name
309
923
  end
310
924
 
311
925
  # Change target_index to lower-case since Elasticsearch doesn't
312
926
  # allow upper-case characters in index names.
313
927
  target_index = target_index.downcase
928
+ target_index_alias = target_index_alias.downcase
314
929
  if @include_tag_key
315
930
  record[@tag_key] = tag
316
931
  end
@@ -318,55 +933,181 @@ class Fluent::ElasticsearchOutput < Fluent::ObjectBufferedOutput
318
933
  target_type_parent, target_type_child_key = @target_type_key ? get_parent_of(record, @target_type_key) : nil
319
934
  if target_type_parent && target_type_parent[target_type_child_key]
320
935
  target_type = target_type_parent.delete(target_type_child_key)
936
+ if @last_seen_major_version == 6
937
+ log.warn "Detected ES 6.x: `@type_name` will be used as the document `_type`."
938
+ target_type = type_name
939
+ elsif @last_seen_major_version == 7
940
+ log.warn "Detected ES 7.x: `_doc` will be used as the document `_type`."
941
+ target_type = '_doc'.freeze
942
+ elsif @last_seen_major_version >=8
943
+ log.debug "Detected ES 8.x or above: document type will not be used."
944
+ target_type = nil
945
+ end
321
946
  else
322
- target_type = @type_name
947
+ if @suppress_type_name && @last_seen_major_version >= 7
948
+ target_type = nil
949
+ elsif @last_seen_major_version == 7 && @type_name != DEFAULT_TYPE_NAME_ES_7x
950
+ log.warn "Detected ES 7.x: `_doc` will be used as the document `_type`."
951
+ target_type = '_doc'.freeze
952
+ elsif @last_seen_major_version >= 8
953
+ log.debug "Detected ES 8.x or above: document type will not be used."
954
+ target_type = nil
955
+ else
956
+ target_type = type_name
957
+ end
323
958
  end
324
959
 
325
960
  meta.clear
326
961
  meta["_index".freeze] = target_index
327
- meta["_type".freeze] = target_type
962
+ meta["_type".freeze] = target_type unless @last_seen_major_version >= 8
963
+ meta["_alias".freeze] = target_index_alias
328
964
 
329
- @meta_config_map.each do |record_key, meta_key|
330
- meta[meta_key] = record[record_key] if record[record_key]
965
+ if @pipeline
966
+ meta["pipeline".freeze] = pipeline
967
+ end
968
+
969
+ @meta_config_map.each do |record_accessor, meta_key|
970
+ if raw_value = record_accessor.call(record)
971
+ meta[meta_key] = raw_value
972
+ end
331
973
  end
332
974
 
333
975
  if @remove_keys
334
976
  @remove_keys.each { |key| record.delete(key) }
335
977
  end
336
978
 
337
- append_record_to_messages(@write_operation, meta, header, record, bulk_message)
979
+ return [meta, header, record]
338
980
  end
339
981
 
340
- send_bulk(bulk_message) unless bulk_message.empty?
341
- bulk_message.clear
342
- end
982
+ # returns [parent, child_key] of child described by path array in record's tree
983
+ # returns [nil, child_key] if path doesnt exist in record
984
+ def get_parent_of(record, path)
985
+ parent_object = path[0..-2].reduce(record) { |a, e| a.is_a?(Hash) ? a[e] : nil }
986
+ [parent_object, path[-1]]
987
+ end
343
988
 
344
- # returns [parent, child_key] of child described by path array in record's tree
345
- # returns [nil, child_key] if path doesnt exist in record
346
- def get_parent_of(record, path)
347
- parent_object = path[0..-2].reduce(record) { |a, e| a.is_a?(Hash) ? a[e] : nil }
348
- [parent_object, path[-1]]
349
- end
989
+ # gzip compress data
990
+ def gzip(string)
991
+ wio = StringIO.new("w")
992
+ w_gz = Zlib::GzipWriter.new(wio, strategy = compression_strategy)
993
+ w_gz.write(string)
994
+ w_gz.close
995
+ wio.string
996
+ end
997
+
998
+ def placeholder_substitution_needed_for_template?
999
+ need_substitution = placeholder?(:host, @host.to_s) ||
1000
+ placeholder?(:index_name, @index_name.to_s) ||
1001
+ placeholder?(:template_name, @template_name.to_s) ||
1002
+ @customize_template&.values&.any? { |value| placeholder?(:customize_template, value.to_s) } ||
1003
+ placeholder?(:logstash_prefix, @logstash_prefix.to_s) ||
1004
+ placeholder?(:logstash_dateformat, @logstash_dateformat.to_s) ||
1005
+ placeholder?(:deflector_alias, @deflector_alias.to_s) ||
1006
+ placeholder?(:application_name, @application_name.to_s) ||
1007
+ placeholder?(:ilm_policy_id, @ilm_policy_id.to_s)
1008
+ log.debug("Need substitution: #{need_substitution}")
1009
+ need_substitution
1010
+ end
1011
+
1012
+ def template_installation(deflector_alias, template_name, customize_template, application_name, ilm_policy_id, target_index, host)
1013
+ # for safety.
1014
+ end
1015
+
1016
+ def template_installation_actual(deflector_alias, template_name, customize_template, application_name, target_index, ilm_policy_id, host=nil)
1017
+ if template_name && @template_file
1018
+ if !@logstash_format && (deflector_alias.nil? || (@alias_indexes.include? deflector_alias)) && (@template_names.include? template_name)
1019
+ if deflector_alias
1020
+ log.debug("Index alias #{deflector_alias} and template #{template_name} already exist (cached)")
1021
+ else
1022
+ log.debug("Template #{template_name} already exists (cached)")
1023
+ end
1024
+ else
1025
+ retry_operate(@max_retry_putting_template,
1026
+ @fail_on_putting_template_retry_exceed,
1027
+ @catch_transport_exception_on_retry) do
1028
+ if customize_template
1029
+ template_custom_install(template_name, @template_file, @template_overwrite, customize_template, @enable_ilm, deflector_alias, ilm_policy_id, host, target_index, @index_separator)
1030
+ else
1031
+ template_install(template_name, @template_file, @template_overwrite, @enable_ilm, deflector_alias, ilm_policy_id, host, target_index, @index_separator)
1032
+ end
1033
+ ilm_policy = @ilm_policies[ilm_policy_id] || {}
1034
+ create_rollover_alias(target_index, @rollover_index, deflector_alias, application_name, @index_date_pattern, @index_separator, @enable_ilm, ilm_policy_id, ilm_policy, @ilm_policy_overwrite, host)
1035
+ end
1036
+ @alias_indexes << deflector_alias unless deflector_alias.nil?
1037
+ @template_names << template_name
1038
+ end
1039
+ end
1040
+ end
1041
+
1042
+ # send_bulk given a specific bulk request, the original tag,
1043
+ # chunk, and bulk_message_count
1044
+ def send_bulk(data, tag, chunk, bulk_message_count, extracted_values, info)
1045
+ _logstash_prefix, _logstash_dateformat, index_name, _type_name, template_name, customize_template, deflector_alias, application_name, _pipeline, ilm_policy_id = extracted_values
1046
+ if deflector_alias
1047
+ template_installation(deflector_alias, template_name, customize_template, application_name, index_name, ilm_policy_id, info.host)
1048
+ else
1049
+ template_installation(info.ilm_index, template_name, customize_template, application_name, @logstash_format ? info.ilm_alias : index_name, ilm_policy_id, info.host)
1050
+ end
1051
+
1052
+ begin
1053
+
1054
+ log.on_trace { log.trace "bulk request: #{data}" }
1055
+
1056
+ prepared_data = if compression
1057
+ gzip(data)
1058
+ else
1059
+ data
1060
+ end
1061
+
1062
+ response = client(info.host, compression).bulk body: prepared_data, index: info.index
1063
+ log.on_trace { log.trace "bulk response: #{response}" }
1064
+
1065
+ if response['errors']
1066
+ error = Fluent::Plugin::ElasticsearchErrorHandler.new(self)
1067
+ error.handle_error(response, tag, chunk, bulk_message_count, extracted_values)
1068
+ end
1069
+ rescue RetryStreamError => e
1070
+ log.trace "router.emit_stream for retry stream doing..."
1071
+ emit_tag = @retry_tag ? @retry_tag : tag
1072
+ # check capacity of buffer space
1073
+ if retry_stream_retryable?
1074
+ router.emit_stream(emit_tag, e.retry_stream)
1075
+ else
1076
+ raise RetryStreamEmitFailure, "buffer is full."
1077
+ end
1078
+ log.trace "router.emit_stream for retry stream done."
1079
+ rescue => e
1080
+ ignore = @ignore_exception_classes.any? { |clazz| e.class <= clazz }
1081
+
1082
+ log.warn "Exception ignored in tag #{tag}: #{e.class.name} #{e.message}" if ignore
1083
+
1084
+ @_es = nil if @reconnect_on_error
1085
+ @_es_info = nil if @reconnect_on_error
1086
+
1087
+ raise UnrecoverableRequestFailure if ignore && @exception_backup
1088
+
1089
+ # FIXME: identify unrecoverable errors and raise UnrecoverableRequestFailure instead
1090
+ raise RecoverableRequestFailure, "could not push logs to Elasticsearch cluster (#{connection_options_description(info.host)}): #{e.message}" unless ignore
1091
+ end
1092
+ end
1093
+
1094
+ def retry_stream_retryable?
1095
+ @buffer.storable?
1096
+ end
1097
+
1098
+ def is_existing_connection(host)
1099
+ # check if the host provided match the current connection
1100
+ return false if @_es.nil?
1101
+ return false if @current_config.nil?
1102
+ return false if host.length != @current_config.length
1103
+
1104
+ for i in 0...host.length
1105
+ if !host[i][:host].eql? @current_config[i][:host] || host[i][:port] != @current_config[i][:port]
1106
+ return false
1107
+ end
1108
+ end
350
1109
 
351
- def send_bulk(data)
352
- retries = 0
353
- begin
354
- response = client.bulk body: data
355
- if response['errors']
356
- log.error "Could not push log to Elasticsearch: #{response}"
357
- end
358
- rescue *client.transport.host_unreachable_exceptions => e
359
- if retries < 2
360
- retries += 1
361
- @_es = nil
362
- log.warn "Could not push logs to Elasticsearch, resetting connection and trying again. #{e.message}"
363
- sleep 2**retries
364
- retry
365
- end
366
- raise ConnectionFailure, "Could not push logs to Elasticsearch after #{retries} retries. #{e.message}"
367
- rescue Exception
368
- @_es = nil if @reconnect_on_error
369
- raise
1110
+ return true
370
1111
  end
371
1112
  end
372
1113
  end