fluent-plugin-elasticsearch 1.18.2 → 2.0.0.rc.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -3,13 +3,13 @@ $:.push File.expand_path('../lib', __FILE__)
3
3
 
4
4
  Gem::Specification.new do |s|
5
5
  s.name = 'fluent-plugin-elasticsearch'
6
- s.version = '1.18.2'
6
+ s.version = '2.0.0.rc.1'
7
7
  s.authors = ['diogo', 'pitr']
8
8
  s.email = ['pitr.vern@gmail.com', 'me@diogoterror.com']
9
- s.description = %q{Elasticsearch output plugin for Fluent event collector}
9
+ s.description = %q{ElasticSearch output plugin for Fluent event collector}
10
10
  s.summary = s.description
11
11
  s.homepage = 'https://github.com/uken/fluent-plugin-elasticsearch'
12
- s.license = 'Apache-2.0'
12
+ s.license = 'MIT'
13
13
 
14
14
  s.files = `git ls-files`.split($/)
15
15
  s.executables = s.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
@@ -18,7 +18,7 @@ Gem::Specification.new do |s|
18
18
 
19
19
  s.required_ruby_version = Gem::Requirement.new(">= 2.0".freeze)
20
20
 
21
- s.add_runtime_dependency 'fluentd', '>= 0.12.10'
21
+ s.add_runtime_dependency 'fluentd', '>= 0.14.8'
22
22
  s.add_runtime_dependency 'excon', '>= 0'
23
23
  s.add_runtime_dependency 'elasticsearch'
24
24
 
@@ -27,5 +27,5 @@ Gem::Specification.new do |s|
27
27
  s.add_development_dependency 'webmock', '~> 1'
28
28
  s.add_development_dependency 'test-unit', '~> 3.1.0'
29
29
  s.add_development_dependency 'minitest', '~> 5.8'
30
- s.add_development_dependency 'flexmock', '~> 2.3.5'
30
+ s.add_development_dependency 'flexmock', '~> 2.0'
31
31
  end
@@ -19,12 +19,7 @@ module Fluent::ElasticsearchIndexTemplate
19
19
  client.indices.put_template(:name => name, :body => template)
20
20
  end
21
21
 
22
- def template_install(name, template_file, overwrite)
23
- if overwrite
24
- template_put(name, get_template(template_file))
25
- log.info("Template '#{name}' overwritten with #{template_file}.")
26
- return
27
- end
22
+ def template_install(name, template_file)
28
23
  if !template_exists?(name)
29
24
  template_put(name, get_template(template_file))
30
25
  log.info("Template configured, but no template installed. Installed '#{name}' from #{template_file}.")
@@ -33,9 +28,9 @@ module Fluent::ElasticsearchIndexTemplate
33
28
  end
34
29
  end
35
30
 
36
- def templates_hash_install(templates, overwrite)
31
+ def templates_hash_install (templates)
37
32
  templates.each do |key, value|
38
- template_install(key, value, overwrite)
33
+ template_install(key, value)
39
34
  end
40
35
  end
41
36
 
@@ -9,488 +9,382 @@ begin
9
9
  rescue LoadError
10
10
  end
11
11
 
12
- require 'fluent/output'
13
- require 'fluent/event'
14
- require 'fluent/log-ext'
15
- require_relative 'elasticsearch_constants'
16
- require_relative 'elasticsearch_error_handler'
12
+ require 'fluent/plugin/output'
17
13
  require_relative 'elasticsearch_index_template'
18
14
 
19
- class Fluent::ElasticsearchOutput < Fluent::ObjectBufferedOutput
20
- class ConnectionFailure < StandardError; end
15
+ module Fluent::Plugin
16
+ class ElasticsearchOutput < Output
17
+ class ConnectionFailure < StandardError; end
18
+
19
+ helpers :event_emitter, :compat_parameters
20
+
21
+ Fluent::Plugin.register_output('elasticsearch', self)
22
+
23
+ DEFAULT_BUFFER_TYPE = "memory"
24
+
25
+ config_param :host, :string, :default => 'localhost'
26
+ config_param :port, :integer, :default => 9200
27
+ config_param :user, :string, :default => nil
28
+ config_param :password, :string, :default => nil, :secret => true
29
+ config_param :path, :string, :default => nil
30
+ config_param :scheme, :string, :default => 'http'
31
+ config_param :hosts, :string, :default => nil
32
+ config_param :target_index_key, :string, :default => nil
33
+ config_param :target_type_key, :string, :default => nil
34
+ config_param :time_key_format, :string, :default => nil
35
+ config_param :time_precision, :integer, :default => 9
36
+ config_param :logstash_format, :bool, :default => false
37
+ config_param :logstash_prefix, :string, :default => "logstash"
38
+ config_param :logstash_dateformat, :string, :default => "%Y.%m.%d"
39
+ config_param :utc_index, :bool, :default => true
40
+ config_param :type_name, :string, :default => "fluentd"
41
+ config_param :index_name, :string, :default => "fluentd"
42
+ config_param :id_key, :string, :default => nil
43
+ config_param :write_operation, :string, :default => "index"
44
+ config_param :parent_key, :string, :default => nil
45
+ config_param :routing_key, :string, :default => nil
46
+ config_param :request_timeout, :time, :default => 5
47
+ config_param :reload_connections, :bool, :default => true
48
+ config_param :reload_on_failure, :bool, :default => false
49
+ config_param :resurrect_after, :time, :default => 60
50
+ config_param :time_key, :string, :default => nil
51
+ config_param :time_key_exclude_timestamp, :bool, :default => false
52
+ config_param :ssl_verify , :bool, :default => true
53
+ config_param :client_key, :string, :default => nil
54
+ config_param :client_cert, :string, :default => nil
55
+ config_param :client_key_pass, :string, :default => nil
56
+ config_param :ca_file, :string, :default => nil
57
+ config_param :remove_keys, :string, :default => nil
58
+ config_param :remove_keys_on_update, :string, :default => ""
59
+ config_param :remove_keys_on_update_key, :string, :default => nil
60
+ config_param :flatten_hashes, :bool, :default => false
61
+ config_param :flatten_hashes_separator, :string, :default => "_"
62
+ config_param :template_name, :string, :default => nil
63
+ config_param :template_file, :string, :default => nil
64
+ config_param :templates, :hash, :default => nil
65
+ config_param :include_tag_key, :bool, :default => false
66
+ config_param :tag_key, :string, :default => 'tag'
67
+ config_param :time_parse_error_tag, :string, :default => 'Fluent::ElasticsearchOutput::TimeParser.error'
68
+ config_param :reconnect_on_error, :bool, :default => false
69
+
70
+ config_section :buffer do
71
+ config_set_default :@type, DEFAULT_BUFFER_TYPE
72
+ config_set_default :chunk_keys, ['tag']
73
+ end
21
74
 
22
- # MissingIdFieldError is raised for records that do not
23
- # include the field for the unique record identifier
24
- class MissingIdFieldError < StandardError; end
75
+ include Fluent::ElasticsearchIndexTemplate
25
76
 
26
- # RetryStreamError privides a stream to be
27
- # put back in the pipeline for cases where a bulk request
28
- # failed (e.g some records succeed while others failed)
29
- class RetryStreamError < StandardError
30
- attr_reader :retry_stream
31
- def initialize(retry_stream)
32
- @retry_stream = retry_stream
77
+ def initialize
78
+ super
33
79
  end
34
- end
35
80
 
36
- Fluent::Plugin.register_output('elasticsearch', self)
37
-
38
- DEFAULT_RELOAD_AFTER = -1
39
-
40
- config_param :host, :string, :default => 'localhost'
41
- config_param :port, :integer, :default => 9200
42
- config_param :user, :string, :default => nil
43
- config_param :password, :string, :default => nil, :secret => true
44
- config_param :path, :string, :default => nil
45
- config_param :scheme, :enum, :list => [:https, :http], :default => :http
46
- config_param :hosts, :string, :default => nil
47
- config_param :target_index_key, :string, :default => nil
48
- config_param :target_type_key, :string, :default => nil
49
- config_param :time_key_format, :string, :default => nil
50
- config_param :time_precision, :integer, :default => 0
51
- config_param :include_timestamp, :bool, :default => false
52
- config_param :logstash_format, :bool, :default => false
53
- config_param :logstash_prefix, :string, :default => "logstash"
54
- config_param :logstash_prefix_separator, :string, :default => '-'
55
- config_param :logstash_dateformat, :string, :default => "%Y.%m.%d"
56
- config_param :utc_index, :bool, :default => true
57
- config_param :type_name, :string, :default => "fluentd"
58
- config_param :index_name, :string, :default => "fluentd"
59
- config_param :id_key, :string, :default => nil
60
- config_param :write_operation, :string, :default => "index"
61
- config_param :parent_key, :string, :default => nil
62
- config_param :routing_key, :string, :default => nil
63
- config_param :request_timeout, :time, :default => 5
64
- config_param :reload_connections, :bool, :default => true
65
- config_param :reload_on_failure, :bool, :default => false
66
- config_param :retry_tag, :string, :default=>nil
67
- config_param :resurrect_after, :time, :default => 60
68
- config_param :time_key, :string, :default => nil
69
- config_param :time_key_exclude_timestamp, :bool, :default => false
70
- config_param :ssl_verify , :bool, :default => true
71
- config_param :client_key, :string, :default => nil
72
- config_param :client_cert, :string, :default => nil
73
- config_param :client_key_pass, :string, :default => nil
74
- config_param :ca_file, :string, :default => nil
75
- config_param :ssl_version, :enum, list: [:SSLv23, :TLSv1, :TLSv1_1, :TLSv1_2], :default => :TLSv1
76
- config_param :remove_keys, :string, :default => nil
77
- config_param :remove_keys_on_update, :string, :default => ""
78
- config_param :remove_keys_on_update_key, :string, :default => nil
79
- config_param :flatten_hashes, :bool, :default => false
80
- config_param :flatten_hashes_separator, :string, :default => "_"
81
- config_param :template_name, :string, :default => nil
82
- config_param :template_file, :string, :default => nil
83
- config_param :template_overwrite, :bool, :default => false
84
- config_param :templates, :hash, :default => nil
85
- config_param :include_tag_key, :bool, :default => false
86
- config_param :tag_key, :string, :default => 'tag'
87
- config_param :time_parse_error_tag, :string, :default => 'Fluent::ElasticsearchOutput::TimeParser.error'
88
- config_param :reconnect_on_error, :bool, :default => false
89
- config_param :pipeline, :string, :default => nil
90
- config_param :with_transporter_log, :bool, :default => false
91
- config_param :emit_error_for_missing_id, :bool, :default => false
92
- config_param :sniffer_class_name, :string, :default => nil
93
- config_param :reload_after, :integer, :default => DEFAULT_RELOAD_AFTER
94
- config_param :suppress_doc_wrap, :bool, :default => false
95
-
96
- include Fluent::ElasticsearchIndexTemplate
97
- include Fluent::ElasticsearchConstants
98
-
99
- def initialize
100
- super
101
- end
81
+ def configure(conf)
82
+ compat_parameters_convert(conf, :buffer)
102
83
 
103
- def configure(conf)
104
- super
105
- @time_parser = create_time_parser
84
+ super
85
+ raise Fluent::ConfigError, "'tag' in chunk_keys is required." if not @chunk_key_tag
106
86
 
107
- if @remove_keys
108
- @remove_keys = @remove_keys.split(/\s*,\s*/)
109
- end
87
+ @time_parser = create_time_parser
110
88
 
111
- if @target_index_key && @target_index_key.is_a?(String)
112
- @target_index_key = @target_index_key.split '.'
113
- end
89
+ if @remove_keys
90
+ @remove_keys = @remove_keys.split(/\s*,\s*/)
91
+ end
114
92
 
115
- if @target_type_key && @target_type_key.is_a?(String)
116
- @target_type_key = @target_type_key.split '.'
117
- end
93
+ if @target_index_key && @target_index_key.is_a?(String)
94
+ @target_index_key = @target_index_key.split '.'
95
+ end
118
96
 
119
- if @remove_keys_on_update && @remove_keys_on_update.is_a?(String)
120
- @remove_keys_on_update = @remove_keys_on_update.split ','
121
- end
97
+ if @target_type_key && @target_type_key.is_a?(String)
98
+ @target_type_key = @target_type_key.split '.'
99
+ end
122
100
 
123
- if @template_name && @template_file
124
- template_install(@template_name, @template_file, @template_overwrite)
125
- elsif @templates
126
- templates_hash_install(@templates, @template_overwrite)
127
- end
101
+ if @remove_keys_on_update && @remove_keys_on_update.is_a?(String)
102
+ @remove_keys_on_update = @remove_keys_on_update.split ','
103
+ end
128
104
 
129
- @meta_config_map = create_meta_config_map
105
+ if @template_name && @template_file
106
+ template_install(@template_name, @template_file)
107
+ elsif @templates
108
+ templates_hash_install (@templates)
109
+ end
130
110
 
131
- begin
132
- require 'oj'
133
- @dump_proc = Oj.method(:dump)
134
- rescue LoadError
135
- @dump_proc = Yajl.method(:dump)
136
- end
111
+ @meta_config_map = create_meta_config_map
137
112
 
138
- if @user && m = @user.match(/%{(?<user>.*)}/)
139
- @user = URI.encode_www_form_component(m["user"])
140
- end
141
- if @password && m = @password.match(/%{(?<password>.*)}/)
142
- @password = URI.encode_www_form_component(m["password"])
113
+ begin
114
+ require 'oj'
115
+ @dump_proc = Oj.method(:dump)
116
+ rescue LoadError
117
+ @dump_proc = Yajl.method(:dump)
118
+ end
143
119
  end
144
120
 
145
- if @hash_config
146
- raise Fluent::ConfigError, "@hash_config.hash_id_key and id_key must be equal." unless @hash_config.hash_id_key == @id_key
121
+ def create_meta_config_map
122
+ result = []
123
+ result << [@id_key, '_id'] if @id_key
124
+ result << [@parent_key, '_parent'] if @parent_key
125
+ result << [@routing_key, '_routing'] if @routing_key
126
+ result
147
127
  end
148
128
 
149
- @transport_logger = nil
150
- if @with_transporter_log
151
- @transport_logger = log
152
- log_level = conf['@log_level'] || conf['log_level']
153
- log.warn "Consider to specify log_level with @log_level." unless log_level
129
+ # once fluent v0.14 is released we might be able to use
130
+ # Fluent::Parser::TimeParser, but it doesn't quite do what we want - if gives
131
+ # [sec,nsec] where as we want something we can call `strftime` on...
132
+ def create_time_parser
133
+ if @time_key_format
134
+ begin
135
+ # Strptime doesn't support all formats, but for those it does it's
136
+ # blazingly fast.
137
+ strptime = Strptime.new(@time_key_format)
138
+ Proc.new { |value| strptime.exec(value).to_datetime }
139
+ rescue
140
+ # Can happen if Strptime doesn't recognize the format; or
141
+ # if strptime couldn't be required (because it's not installed -- it's
142
+ # ruby 2 only)
143
+ Proc.new { |value| DateTime.strptime(value, @time_key_format) }
144
+ end
145
+ else
146
+ Proc.new { |value| DateTime.parse(value) }
147
+ end
154
148
  end
155
149
 
156
- @sniffer_class = nil
157
- begin
158
- @sniffer_class = Object.const_get(@sniffer_class_name) if @sniffer_class_name
159
- rescue Exception => ex
160
- raise Fluent::ConfigError, "Could not load sniffer class #{@sniffer_class_name}: #{ex}"
150
+ def parse_time(value, event_time, tag)
151
+ @time_parser.call(value)
152
+ rescue => e
153
+ router.emit_error_event(@time_parse_error_tag, Fluent::Engine.now, {'tag' => tag, 'time' => event_time, 'format' => @time_key_format, 'value' => value}, e)
154
+ return Time.at(event_time).to_datetime
161
155
  end
162
156
 
163
- end
164
-
165
- def create_meta_config_map
166
- result = []
167
- result << [@id_key, '_id'] if @id_key
168
- result << [@parent_key, '_parent'] if @parent_key
169
- result << [@routing_key, '_routing'] if @routing_key
170
- result
171
- end
157
+ def client
158
+ @_es ||= begin
159
+ excon_options = { client_key: @client_key, client_cert: @client_cert, client_key_pass: @client_key_pass }
160
+ adapter_conf = lambda {|f| f.adapter :excon, excon_options }
161
+ transport = Elasticsearch::Transport::Transport::HTTP::Faraday.new(get_connection_options.merge(
162
+ options: {
163
+ reload_connections: @reload_connections,
164
+ reload_on_failure: @reload_on_failure,
165
+ resurrect_after: @resurrect_after,
166
+ retry_on_failure: 5,
167
+ transport_options: {
168
+ headers: { 'Content-Type' => 'application/json' },
169
+ request: { timeout: @request_timeout },
170
+ ssl: { verify: @ssl_verify, ca_file: @ca_file }
171
+ }
172
+ }), &adapter_conf)
173
+ es = Elasticsearch::Client.new transport: transport
174
+
175
+ begin
176
+ raise ConnectionFailure, "Can not reach Elasticsearch cluster (#{connection_options_description})!" unless es.ping
177
+ rescue *es.transport.host_unreachable_exceptions => e
178
+ raise ConnectionFailure, "Can not reach Elasticsearch cluster (#{connection_options_description})! #{e.message}"
179
+ end
172
180
 
173
- # once fluent v0.14 is released we might be able to use
174
- # Fluent::Parser::TimeParser, but it doesn't quite do what we want - if gives
175
- # [sec,nsec] where as we want something we can call `strftime` on...
176
- def create_time_parser
177
- if @time_key_format
178
- begin
179
- # Strptime doesn't support all formats, but for those it does it's
180
- # blazingly fast.
181
- strptime = Strptime.new(@time_key_format)
182
- Proc.new { |value| strptime.exec(value).to_datetime }
183
- rescue
184
- # Can happen if Strptime doesn't recognize the format; or
185
- # if strptime couldn't be required (because it's not installed -- it's
186
- # ruby 2 only)
187
- Proc.new { |value| DateTime.strptime(value, @time_key_format) }
181
+ log.info "Connection opened to Elasticsearch cluster => #{connection_options_description}"
182
+ es
188
183
  end
189
- else
190
- Proc.new { |value| DateTime.parse(value) }
191
184
  end
192
- end
193
-
194
- def parse_time(value, event_time, tag)
195
- @time_parser.call(value)
196
- rescue => e
197
- router.emit_error_event(@time_parse_error_tag, Fluent::Engine.now, {'tag' => tag, 'time' => event_time, 'format' => @time_key_format, 'value' => value}, e)
198
- return Time.at(event_time).to_datetime
199
- end
200
-
201
- def client
202
- @_es ||= begin
203
- excon_options = { client_key: @client_key, client_cert: @client_cert, client_key_pass: @client_key_pass }
204
- adapter_conf = lambda {|f| f.adapter :excon, excon_options }
205
- local_reload_connections = @reload_connections
206
- if local_reload_connections && @reload_after > DEFAULT_RELOAD_AFTER
207
- local_reload_connections = @reload_after
208
- end
209
- transport = Elasticsearch::Transport::Transport::HTTP::Faraday.new(get_connection_options.merge(
210
- options: {
211
- reload_connections: local_reload_connections,
212
- reload_on_failure: @reload_on_failure,
213
- resurrect_after: @resurrect_after,
214
- retry_on_failure: 5,
215
- logger: @transport_logger,
216
- transport_options: {
217
- headers: { 'Content-Type' => 'application/json' },
218
- request: { timeout: @request_timeout },
219
- ssl: { verify: @ssl_verify, ca_file: @ca_file, version: @ssl_version }
220
- },
221
- http: {
222
- user: @user,
223
- password: @password
224
- },
225
- sniffer_class: @sniffer_class,
226
- }), &adapter_conf)
227
- es = Elasticsearch::Client.new transport: transport
228
185
 
229
- begin
230
- raise ConnectionFailure, "Can not reach Elasticsearch cluster (#{connection_options_description})!" unless es.ping
231
- rescue *es.transport.host_unreachable_exceptions => e
232
- raise ConnectionFailure, "Can not reach Elasticsearch cluster (#{connection_options_description})! #{e.message}"
186
+ def get_connection_options
187
+ raise "`password` must be present if `user` is present" if @user && !@password
188
+
189
+ hosts = if @hosts
190
+ @hosts.split(',').map do |host_str|
191
+ # Support legacy hosts format host:port,host:port,host:port...
192
+ if host_str.match(%r{^[^:]+(\:\d+)?$})
193
+ {
194
+ host: host_str.split(':')[0],
195
+ port: (host_str.split(':')[1] || @port).to_i,
196
+ scheme: @scheme
197
+ }
198
+ else
199
+ # New hosts format expects URLs such as http://logs.foo.com,https://john:pass@logs2.foo.com/elastic
200
+ uri = URI(host_str)
201
+ %w(user password path).inject(host: uri.host, port: uri.port, scheme: uri.scheme) do |hash, key|
202
+ hash[key.to_sym] = uri.public_send(key) unless uri.public_send(key).nil? || uri.public_send(key) == ''
203
+ hash
204
+ end
205
+ end
206
+ end.compact
207
+ else
208
+ [{host: @host, port: @port, scheme: @scheme}]
209
+ end.each do |host|
210
+ host.merge!(user: @user, password: @password) if !host[:user] && @user
211
+ host.merge!(path: @path) if !host[:path] && @path
233
212
  end
234
213
 
235
- log.info "Connection opened to Elasticsearch cluster => #{connection_options_description}"
236
- es
214
+ {
215
+ hosts: hosts
216
+ }
237
217
  end
238
- end
239
218
 
240
- def get_escaped_userinfo(host_str)
241
- if m = host_str.match(/(?<scheme>.*)%{(?<user>.*)}:%{(?<password>.*)}(?<path>@.*)/)
242
- m["scheme"] +
243
- URI.encode_www_form_component(m["user"]) +
244
- ':' +
245
- URI.encode_www_form_component(m["password"]) +
246
- m["path"]
247
- else
248
- host_str
219
+ def connection_options_description
220
+ get_connection_options[:hosts].map do |host_info|
221
+ attributes = host_info.dup
222
+ attributes[:password] = 'obfuscated' if attributes.has_key?(:password)
223
+ attributes.inspect
224
+ end.join(', ')
249
225
  end
250
- end
251
226
 
252
- def get_connection_options
253
- raise "`password` must be present if `user` is present" if @user && !@password
254
-
255
- hosts = if @hosts
256
- @hosts.split(',').map do |host_str|
257
- # Support legacy hosts format host:port,host:port,host:port...
258
- if host_str.match(%r{^[^:]+(\:\d+)?$})
259
- {
260
- host: host_str.split(':')[0],
261
- port: (host_str.split(':')[1] || @port).to_i,
262
- scheme: @scheme.to_s
263
- }
264
- else
265
- # New hosts format expects URLs such as http://logs.foo.com,https://john:pass@logs2.foo.com/elastic
266
- uri = URI(get_escaped_userinfo(host_str))
267
- %w(user password path).inject(host: uri.host, port: uri.port, scheme: uri.scheme) do |hash, key|
268
- hash[key.to_sym] = uri.public_send(key) unless uri.public_send(key).nil? || uri.public_send(key) == ''
269
- hash
270
- end
227
+ BODY_DELIMITER = "\n".freeze
228
+ UPDATE_OP = "update".freeze
229
+ UPSERT_OP = "upsert".freeze
230
+ CREATE_OP = "create".freeze
231
+ INDEX_OP = "index".freeze
232
+ ID_FIELD = "_id".freeze
233
+ TIMESTAMP_FIELD = "@timestamp".freeze
234
+
235
+ def append_record_to_messages(op, meta, header, record, msgs)
236
+ case op
237
+ when UPDATE_OP, UPSERT_OP
238
+ if meta.has_key?(ID_FIELD)
239
+ header[UPDATE_OP] = meta
240
+ msgs << @dump_proc.call(header) << BODY_DELIMITER
241
+ msgs << @dump_proc.call(update_body(record, op)) << BODY_DELIMITER
271
242
  end
272
- end.compact
273
- else
274
- [{host: @host, port: @port, scheme: @scheme.to_s}]
275
- end.each do |host|
276
- host.merge!(user: @user, password: @password) if !host[:user] && @user
277
- host.merge!(path: @path) if !host[:path] && @path
278
- end
279
-
280
- {
281
- hosts: hosts
282
- }
283
- end
284
-
285
- def connection_options_description
286
- get_connection_options[:hosts].map do |host_info|
287
- attributes = host_info.dup
288
- attributes[:password] = 'obfuscated' if attributes.has_key?(:password)
289
- attributes.inspect
290
- end.join(', ')
291
- end
292
-
293
- # append_record_to_messages adds a record to the bulk message
294
- # payload to be submitted to Elasticsearch. Records that do
295
- # not include '_id' field are skipped when 'write_operation'
296
- # is configured for 'create' or 'update'
297
- #
298
- # returns 'true' if record was appended to the bulk message
299
- # and 'false' otherwise
300
- def append_record_to_messages(op, meta, header, record, msgs)
301
- case op
302
- when UPDATE_OP, UPSERT_OP
303
- if meta.has_key?(ID_FIELD)
304
- header[UPDATE_OP] = meta
305
- msgs << @dump_proc.call(header) << BODY_DELIMITER
306
- msgs << @dump_proc.call(update_body(record, op)) << BODY_DELIMITER
307
- return true
308
- end
309
- when CREATE_OP
310
- if meta.has_key?(ID_FIELD)
311
- header[CREATE_OP] = meta
243
+ when CREATE_OP
244
+ if meta.has_key?(ID_FIELD)
245
+ header[CREATE_OP] = meta
246
+ msgs << @dump_proc.call(header) << BODY_DELIMITER
247
+ msgs << @dump_proc.call(record) << BODY_DELIMITER
248
+ end
249
+ when INDEX_OP
250
+ header[INDEX_OP] = meta
312
251
  msgs << @dump_proc.call(header) << BODY_DELIMITER
313
252
  msgs << @dump_proc.call(record) << BODY_DELIMITER
314
- return true
315
253
  end
316
- when INDEX_OP
317
- header[INDEX_OP] = meta
318
- msgs << @dump_proc.call(header) << BODY_DELIMITER
319
- msgs << @dump_proc.call(record) << BODY_DELIMITER
320
- return true
321
254
  end
322
- return false
323
- end
324
-
325
- def update_body(record, op)
326
- update = remove_keys(record)
327
- if @suppress_doc_wrap
328
- return update
329
- end
330
- body = {"doc".freeze => update}
331
- if op == UPSERT_OP
332
- if update == record
333
- body["doc_as_upsert".freeze] = true
334
- else
335
- body[UPSERT_OP] = record
336
- end
337
- end
338
- body
339
- end
340
-
341
- def remove_keys(record)
342
- keys = record[@remove_keys_on_update_key] || @remove_keys_on_update || []
343
- record.delete(@remove_keys_on_update_key)
344
- return record unless keys.any?
345
- record = record.dup
346
- keys.each { |key| record.delete(key) }
347
- record
348
- end
349
-
350
- def flatten_record(record, prefix=[])
351
- ret = {}
352
- if record.is_a? Hash
353
- record.each { |key, value|
354
- ret.merge! flatten_record(value, prefix + [key.to_s])
355
- }
356
- elsif record.is_a? Array
357
- # Don't mess with arrays, leave them unprocessed
358
- ret.merge!({prefix.join(@flatten_hashes_separator) => record})
359
- else
360
- return {prefix.join(@flatten_hashes_separator) => record}
361
- end
362
- ret
363
- end
364
255
 
365
- def write_objects(tag, chunk)
366
- bulk_message_count = 0
367
- bulk_message = ''
368
- header = {}
369
- meta = {}
370
- chunk.msgpack_each do |time, record|
371
- next unless record.is_a? Hash
372
- begin
373
- if process_message(tag, meta, header, time, record, bulk_message)
374
- bulk_message_count += 1
256
+ def update_body(record, op)
257
+ update = remove_keys(record)
258
+ body = {"doc".freeze => update}
259
+ if op == UPSERT_OP
260
+ if update == record
261
+ body["doc_as_upsert".freeze] = true
375
262
  else
376
- if @emit_error_for_missing_id
377
- raise MissingIdFieldError, "Missing '_id' field. Write operation is #{@write_operation}"
378
- else
379
- log.on_debug { log.debug("Dropping record because its missing an '_id' field and write_operation is #{@write_operation}: #{record}") }
380
- end
263
+ body[UPSERT_OP] = record
381
264
  end
382
- rescue=>e
383
- router.emit_error_event(tag, time, record, e)
384
265
  end
266
+ body
385
267
  end
386
268
 
387
- send_bulk(bulk_message, tag, chunk, bulk_message_count) unless bulk_message.empty?
388
- bulk_message.clear
389
- end
390
-
391
- def process_message(tag, meta, header, time, record, bulk_message)
392
- if @flatten_hashes
393
- record = flatten_record(record)
394
- end
395
-
396
- if @hash_config
397
- record = generate_hash_id_key(record)
269
+ def remove_keys(record)
270
+ keys = record[@remove_keys_on_update_key] || @remove_keys_on_update || []
271
+ record.delete(@remove_keys_on_update_key)
272
+ return record unless keys.any?
273
+ record = record.dup
274
+ keys.each { |key| record.delete(key) }
275
+ record
398
276
  end
399
277
 
400
- dt = nil
401
- if @logstash_format || @include_timestamp
402
- if record.has_key?(TIMESTAMP_FIELD)
403
- rts = record[TIMESTAMP_FIELD]
404
- dt = parse_time(rts, time, tag)
405
- elsif record.has_key?(@time_key)
406
- rts = record[@time_key]
407
- dt = parse_time(rts, time, tag)
408
- record[TIMESTAMP_FIELD] = rts unless @time_key_exclude_timestamp
278
+ def flatten_record(record, prefix=[])
279
+ ret = {}
280
+ if record.is_a? Hash
281
+ record.each { |key, value|
282
+ ret.merge! flatten_record(value, prefix + [key.to_s])
283
+ }
284
+ elsif record.is_a? Array
285
+ # Don't mess with arrays, leave them unprocessed
286
+ ret.merge!({prefix.join(@flatten_hashes_separator) => record})
409
287
  else
410
- dt = Time.at(time).to_datetime
411
- record[TIMESTAMP_FIELD] = dt.iso8601(@time_precision)
288
+ return {prefix.join(@flatten_hashes_separator) => record}
412
289
  end
290
+ ret
413
291
  end
414
292
 
415
- target_index_parent, target_index_child_key = @target_index_key ? get_parent_of(record, @target_index_key) : nil
416
- if target_index_parent && target_index_parent[target_index_child_key]
417
- target_index = target_index_parent.delete(target_index_child_key)
418
- elsif @logstash_format
419
- dt = dt.new_offset(0) if @utc_index
420
- target_index = "#{@logstash_prefix}#{@logstash_prefix_separator}#{dt.strftime(@logstash_dateformat)}"
421
- else
422
- target_index = @index_name
423
- end
424
-
425
- # Change target_index to lower-case since Elasticsearch doesn't
426
- # allow upper-case characters in index names.
427
- target_index = target_index.downcase
428
- if @include_tag_key
429
- record[@tag_key] = tag
430
- end
293
+ def write(chunk)
294
+ bulk_message = ''
295
+ header = {}
296
+ meta = {}
431
297
 
432
- target_type_parent, target_type_child_key = @target_type_key ? get_parent_of(record, @target_type_key) : nil
433
- if target_type_parent && target_type_parent[target_type_child_key]
434
- target_type = target_type_parent.delete(target_type_child_key)
435
- else
436
- target_type = @type_name
437
- end
298
+ tag = chunk.metadata.tag
438
299
 
439
- meta.clear
440
- meta["_index".freeze] = target_index
441
- meta["_type".freeze] = target_type
300
+ chunk.msgpack_each do |time, record|
301
+ next unless record.is_a? Hash
442
302
 
443
- if @pipeline
444
- meta["pipeline".freeze] = @pipeline
445
- end
303
+ if @flatten_hashes
304
+ record = flatten_record(record)
305
+ end
446
306
 
447
- @meta_config_map.each do |record_key, meta_key|
448
- meta[meta_key] = record[record_key] if record[record_key]
449
- end
307
+ target_index_parent, target_index_child_key = @target_index_key ? get_parent_of(record, @target_index_key) : nil
308
+ if target_index_parent && target_index_parent[target_index_child_key]
309
+ target_index = target_index_parent.delete(target_index_child_key)
310
+ elsif @logstash_format
311
+ if record.has_key?(TIMESTAMP_FIELD)
312
+ rts = record[TIMESTAMP_FIELD]
313
+ dt = parse_time(rts, time, tag)
314
+ elsif record.has_key?(@time_key)
315
+ rts = record[@time_key]
316
+ dt = parse_time(rts, time, tag)
317
+ record[TIMESTAMP_FIELD] = rts unless @time_key_exclude_timestamp
318
+ else
319
+ dt = Time.at(time).to_datetime
320
+ record[TIMESTAMP_FIELD] = dt.iso8601(@time_precision)
321
+ end
322
+ dt = dt.new_offset(0) if @utc_index
323
+ target_index = "#{@logstash_prefix}-#{dt.strftime(@logstash_dateformat)}"
324
+ else
325
+ target_index = @index_name
326
+ end
450
327
 
451
- if @remove_keys
452
- @remove_keys.each { |key| record.delete(key) }
453
- end
328
+ # Change target_index to lower-case since Elasticsearch doesn't
329
+ # allow upper-case characters in index names.
330
+ target_index = target_index.downcase
331
+ if @include_tag_key
332
+ record[@tag_key] = tag
333
+ end
454
334
 
455
- append_record_to_messages(@write_operation, meta, header, record, bulk_message)
456
- end
335
+ target_type_parent, target_type_child_key = @target_type_key ? get_parent_of(record, @target_type_key) : nil
336
+ if target_type_parent && target_type_parent[target_type_child_key]
337
+ target_type = target_type_parent.delete(target_type_child_key)
338
+ else
339
+ target_type = @type_name
340
+ end
457
341
 
458
- # returns [parent, child_key] of child described by path array in record's tree
459
- # returns [nil, child_key] if path doesnt exist in record
460
- def get_parent_of(record, path)
461
- parent_object = path[0..-2].reduce(record) { |a, e| a.is_a?(Hash) ? a[e] : nil }
462
- [parent_object, path[-1]]
463
- end
342
+ meta.clear
343
+ meta["_index".freeze] = target_index
344
+ meta["_type".freeze] = target_type
464
345
 
465
- # send_bulk given a specific bulk request, the original tag,
466
- # chunk, and bulk_message_count
467
- def send_bulk(data, tag, chunk, bulk_message_count)
468
- retries = 0
469
- begin
346
+ @meta_config_map.each do |record_key, meta_key|
347
+ meta[meta_key] = record[record_key] if record[record_key]
348
+ end
470
349
 
471
- log.on_trace { log.trace "bulk request: #{data}" }
472
- response = client.bulk body: data
473
- log.on_trace { log.trace "bulk response: #{response}" }
350
+ if @remove_keys
351
+ @remove_keys.each { |key| record.delete(key) }
352
+ end
474
353
 
475
- if response['errors']
476
- error = Fluent::ElasticsearchErrorHandler.new(self)
477
- error.handle_error(response, tag, chunk, bulk_message_count)
354
+ append_record_to_messages(@write_operation, meta, header, record, bulk_message)
478
355
  end
479
- rescue RetryStreamError => e
480
- emit_tag = @retry_tag ? @retry_tag : tag
481
- router.emit_stream(emit_tag, e.retry_stream)
482
- rescue *client.transport.host_unreachable_exceptions => e
483
- if retries < 2
484
- retries += 1
485
- @_es = nil
486
- log.warn "Could not push logs to Elasticsearch, resetting connection and trying again. #{e.message}"
487
- sleep 2**retries
488
- retry
356
+
357
+ send_bulk(bulk_message) unless bulk_message.empty?
358
+ bulk_message.clear
359
+ end
360
+
361
+ # returns [parent, child_key] of child described by path array in record's tree
362
+ # returns [nil, child_key] if path doesnt exist in record
363
+ def get_parent_of(record, path)
364
+ parent_object = path[0..-2].reduce(record) { |a, e| a.is_a?(Hash) ? a[e] : nil }
365
+ [parent_object, path[-1]]
366
+ end
367
+
368
+ def send_bulk(data)
369
+ retries = 0
370
+ begin
371
+ response = client.bulk body: data
372
+ if response['errors']
373
+ log.error "Could not push log to Elasticsearch: #{response}"
374
+ end
375
+ rescue *client.transport.host_unreachable_exceptions => e
376
+ if retries < 2
377
+ retries += 1
378
+ @_es = nil
379
+ log.warn "Could not push logs to Elasticsearch, resetting connection and trying again. #{e.message}"
380
+ sleep 2**retries
381
+ retry
382
+ end
383
+ raise ConnectionFailure, "Could not push logs to Elasticsearch after #{retries} retries. #{e.message}"
384
+ rescue Exception
385
+ @_es = nil if @reconnect_on_error
386
+ raise
489
387
  end
490
- raise ConnectionFailure, "Could not push logs to Elasticsearch after #{retries} retries. #{e.message}"
491
- rescue Exception
492
- @_es = nil if @reconnect_on_error
493
- raise
494
388
  end
495
389
  end
496
390
  end