fluent-plugin-elasticsearch 1.9.4 → 5.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. checksums.yaml +5 -5
  2. data/.github/ISSUE_TEMPLATE/bug_report.md +37 -0
  3. data/.github/ISSUE_TEMPLATE/feature_request.md +24 -0
  4. data/.github/workflows/issue-auto-closer.yml +12 -0
  5. data/.github/workflows/linux.yml +26 -0
  6. data/.github/workflows/macos.yml +26 -0
  7. data/.github/workflows/windows.yml +26 -0
  8. data/.travis.yml +33 -6
  9. data/CONTRIBUTING.md +24 -0
  10. data/Gemfile +4 -1
  11. data/History.md +445 -1
  12. data/ISSUE_TEMPLATE.md +19 -0
  13. data/README.ElasticsearchGenID.md +116 -0
  14. data/README.ElasticsearchInput.md +293 -0
  15. data/README.Troubleshooting.md +692 -0
  16. data/README.md +1013 -38
  17. data/appveyor.yml +20 -0
  18. data/fluent-plugin-elasticsearch.gemspec +15 -9
  19. data/{Gemfile.v0.12 → gemfiles/Gemfile.elasticsearch.v6} +6 -5
  20. data/lib/fluent/log-ext.rb +38 -0
  21. data/lib/fluent/plugin/default-ilm-policy.json +14 -0
  22. data/lib/fluent/plugin/elasticsearch_constants.rb +13 -0
  23. data/lib/fluent/plugin/elasticsearch_error.rb +5 -0
  24. data/lib/fluent/plugin/elasticsearch_error_handler.rb +129 -0
  25. data/lib/fluent/plugin/elasticsearch_fallback_selector.rb +9 -0
  26. data/lib/fluent/plugin/elasticsearch_index_lifecycle_management.rb +67 -0
  27. data/lib/fluent/plugin/elasticsearch_index_template.rb +186 -12
  28. data/lib/fluent/plugin/elasticsearch_simple_sniffer.rb +10 -0
  29. data/lib/fluent/plugin/elasticsearch_tls.rb +70 -0
  30. data/lib/fluent/plugin/filter_elasticsearch_genid.rb +77 -0
  31. data/lib/fluent/plugin/in_elasticsearch.rb +325 -0
  32. data/lib/fluent/plugin/oj_serializer.rb +22 -0
  33. data/lib/fluent/plugin/out_elasticsearch.rb +1008 -267
  34. data/lib/fluent/plugin/out_elasticsearch_data_stream.rb +218 -0
  35. data/lib/fluent/plugin/out_elasticsearch_dynamic.rb +232 -214
  36. data/test/plugin/test_alias_template.json +9 -0
  37. data/test/plugin/test_elasticsearch_error_handler.rb +646 -0
  38. data/test/plugin/test_elasticsearch_fallback_selector.rb +74 -0
  39. data/test/plugin/test_elasticsearch_index_lifecycle_management.rb +66 -0
  40. data/test/plugin/test_elasticsearch_tls.rb +145 -0
  41. data/test/plugin/test_filter_elasticsearch_genid.rb +215 -0
  42. data/test/plugin/test_in_elasticsearch.rb +459 -0
  43. data/test/plugin/test_index_alias_template.json +11 -0
  44. data/test/plugin/test_index_template.json +25 -0
  45. data/test/plugin/test_oj_serializer.rb +19 -0
  46. data/test/plugin/test_out_elasticsearch.rb +5029 -387
  47. data/test/plugin/test_out_elasticsearch_data_stream.rb +337 -0
  48. data/test/plugin/test_out_elasticsearch_dynamic.rb +681 -208
  49. data/test/test_log-ext.rb +35 -0
  50. metadata +97 -19
@@ -0,0 +1,77 @@
1
+ require 'securerandom'
2
+ require 'base64'
3
+ require 'fluent/plugin/filter'
4
+
5
+ module Fluent::Plugin
6
+ class ElasticsearchGenidFilter < Filter
7
+ Fluent::Plugin.register_filter('elasticsearch_genid', self)
8
+
9
+ config_param :hash_id_key, :string, :default => '_hash'
10
+ config_param :include_tag_in_seed, :bool, :default => false
11
+ config_param :include_time_in_seed, :bool, :default => false
12
+ config_param :use_record_as_seed, :bool, :default => false
13
+ config_param :use_entire_record, :bool, :default => false
14
+ config_param :record_keys, :array, :default => []
15
+ config_param :separator, :string, :default => '_'
16
+ config_param :hash_type, :enum, list: [:md5, :sha1, :sha256, :sha512], :default => :sha1
17
+
18
+ def initialize
19
+ super
20
+ end
21
+
22
+ def configure(conf)
23
+ super
24
+
25
+ if !@use_entire_record
26
+ if @record_keys.empty? && @use_record_as_seed
27
+ raise Fluent::ConfigError, "When using record as hash seed, users must specify `record_keys`."
28
+ end
29
+ end
30
+
31
+ if @use_record_as_seed
32
+ class << self
33
+ alias_method :filter, :filter_seed_as_record
34
+ end
35
+ else
36
+ class << self
37
+ alias_method :filter, :filter_simple
38
+ end
39
+ end
40
+ end
41
+
42
+ def filter(tag, time, record)
43
+ # for safety.
44
+ end
45
+
46
+ def filter_simple(tag, time, record)
47
+ record[@hash_id_key] = Base64.strict_encode64(SecureRandom.uuid)
48
+ record
49
+ end
50
+
51
+ def filter_seed_as_record(tag, time, record)
52
+ seed = ""
53
+ seed += tag + separator if @include_tag_in_seed
54
+ seed += time.to_s + separator if @include_time_in_seed
55
+ if @use_entire_record
56
+ record.each {|k,v| seed += "|#{k}|#{v}"}
57
+ else
58
+ seed += record_keys.map {|k| record[k]}.join(separator)
59
+ end
60
+ record[@hash_id_key] = Base64.strict_encode64(encode_hash(@hash_type, seed))
61
+ record
62
+ end
63
+
64
+ def encode_hash(type, seed)
65
+ case type
66
+ when :md5
67
+ Digest::MD5.digest(seed)
68
+ when :sha1
69
+ Digest::SHA1.digest(seed)
70
+ when :sha256
71
+ Digest::SHA256.digest(seed)
72
+ when :sha512
73
+ Digest::SHA512.digest(seed)
74
+ end
75
+ end
76
+ end
77
+ end
@@ -0,0 +1,325 @@
1
+ require 'elasticsearch'
2
+
3
+ require 'fluent/log-ext'
4
+ require 'fluent/plugin/input'
5
+ require_relative 'elasticsearch_constants'
6
+
7
+ module Fluent::Plugin
8
+ class ElasticsearchInput < Input
9
+ class UnrecoverableRequestFailure < Fluent::UnrecoverableError; end
10
+
11
+ DEFAULT_RELOAD_AFTER = -1
12
+ DEFAULT_STORAGE_TYPE = 'local'
13
+ METADATA = "@metadata".freeze
14
+
15
+ helpers :timer, :thread
16
+
17
+ Fluent::Plugin.register_input('elasticsearch', self)
18
+
19
+ config_param :tag, :string
20
+ config_param :host, :string, :default => 'localhost'
21
+ config_param :port, :integer, :default => 9200
22
+ config_param :user, :string, :default => nil
23
+ config_param :password, :string, :default => nil, :secret => true
24
+ config_param :path, :string, :default => nil
25
+ config_param :scheme, :enum, :list => [:https, :http], :default => :http
26
+ config_param :hosts, :string, :default => nil
27
+ config_param :index_name, :string, :default => "fluentd"
28
+ config_param :parse_timestamp, :bool, :default => false
29
+ config_param :timestamp_key_format, :string, :default => nil
30
+ config_param :timestamp_parse_error_tag, :string, :default => 'elasticsearch_plugin.input.time.error'
31
+ config_param :query, :hash, :default => {"sort" => [ "_doc" ]}
32
+ config_param :scroll, :string, :default => "1m"
33
+ config_param :size, :integer, :default => 1000
34
+ config_param :num_slices, :integer, :default => 1
35
+ config_param :interval, :size, :default => 5
36
+ config_param :repeat, :bool, :default => true
37
+ config_param :http_backend, :enum, list: [:excon, :typhoeus], :default => :excon
38
+ config_param :request_timeout, :time, :default => 5
39
+ config_param :reload_connections, :bool, :default => true
40
+ config_param :reload_on_failure, :bool, :default => false
41
+ config_param :resurrect_after, :time, :default => 60
42
+ config_param :reload_after, :integer, :default => DEFAULT_RELOAD_AFTER
43
+ config_param :ssl_verify , :bool, :default => true
44
+ config_param :client_key, :string, :default => nil
45
+ config_param :client_cert, :string, :default => nil
46
+ config_param :client_key_pass, :string, :default => nil, :secret => true
47
+ config_param :ca_file, :string, :default => nil
48
+ config_param :ssl_version, :enum, list: [:SSLv23, :TLSv1, :TLSv1_1, :TLSv1_2], :default => :TLSv1_2
49
+ config_param :with_transporter_log, :bool, :default => false
50
+ config_param :sniffer_class_name, :string, :default => nil
51
+ config_param :custom_headers, :hash, :default => {}
52
+ config_param :docinfo_fields, :array, :default => ['_index', '_type', '_id']
53
+ config_param :docinfo_target, :string, :default => METADATA
54
+ config_param :docinfo, :bool, :default => false
55
+
56
+ include Fluent::Plugin::ElasticsearchConstants
57
+
58
+ def initialize
59
+ super
60
+ end
61
+
62
+ def configure(conf)
63
+ super
64
+
65
+ @timestamp_parser = create_time_parser
66
+ @backend_options = backend_options
67
+
68
+ raise Fluent::ConfigError, "`password` must be present if `user` is present" if @user && @password.nil?
69
+
70
+ if @user && m = @user.match(/%{(?<user>.*)}/)
71
+ @user = URI.encode_www_form_component(m["user"])
72
+ end
73
+ if @password && m = @password.match(/%{(?<password>.*)}/)
74
+ @password = URI.encode_www_form_component(m["password"])
75
+ end
76
+
77
+ @transport_logger = nil
78
+ if @with_transporter_log
79
+ @transport_logger = log
80
+ log_level = conf['@log_level'] || conf['log_level']
81
+ log.warn "Consider to specify log_level with @log_level." unless log_level
82
+ end
83
+ @current_config = nil
84
+ # Specify @sniffer_class before calling #client.
85
+ @sniffer_class = nil
86
+ begin
87
+ @sniffer_class = Object.const_get(@sniffer_class_name) if @sniffer_class_name
88
+ rescue Exception => ex
89
+ raise Fluent::ConfigError, "Could not load sniffer class #{@sniffer_class_name}: #{ex}"
90
+ end
91
+
92
+ @options = {
93
+ :index => @index_name,
94
+ :scroll => @scroll,
95
+ :size => @size
96
+ }
97
+ @base_query = @query
98
+ end
99
+
100
+ def backend_options
101
+ case @http_backend
102
+ when :excon
103
+ { client_key: @client_key, client_cert: @client_cert, client_key_pass: @client_key_pass }
104
+ when :typhoeus
105
+ require 'typhoeus'
106
+ { sslkey: @client_key, sslcert: @client_cert, keypasswd: @client_key_pass }
107
+ end
108
+ rescue LoadError => ex
109
+ log.error_backtrace(ex.backtrace)
110
+ raise Fluent::ConfigError, "You must install #{@http_backend} gem. Exception: #{ex}"
111
+ end
112
+
113
+ def get_escaped_userinfo(host_str)
114
+ if m = host_str.match(/(?<scheme>.*)%{(?<user>.*)}:%{(?<password>.*)}(?<path>@.*)/)
115
+ m["scheme"] +
116
+ URI.encode_www_form_component(m["user"]) +
117
+ ':' +
118
+ URI.encode_www_form_component(m["password"]) +
119
+ m["path"]
120
+ else
121
+ host_str
122
+ end
123
+ end
124
+
125
+ def get_connection_options(con_host=nil)
126
+
127
+ hosts = if con_host || @hosts
128
+ (con_host || @hosts).split(',').map do |host_str|
129
+ # Support legacy hosts format host:port,host:port,host:port...
130
+ if host_str.match(%r{^[^:]+(\:\d+)?$})
131
+ {
132
+ host: host_str.split(':')[0],
133
+ port: (host_str.split(':')[1] || @port).to_i,
134
+ scheme: @scheme.to_s
135
+ }
136
+ else
137
+ # New hosts format expects URLs such as http://logs.foo.com,https://john:pass@logs2.foo.com/elastic
138
+ uri = URI(get_escaped_userinfo(host_str))
139
+ %w(user password path).inject(host: uri.host, port: uri.port, scheme: uri.scheme) do |hash, key|
140
+ hash[key.to_sym] = uri.public_send(key) unless uri.public_send(key).nil? || uri.public_send(key) == ''
141
+ hash
142
+ end
143
+ end
144
+ end.compact
145
+ else
146
+ [{host: @host, port: @port, scheme: @scheme.to_s}]
147
+ end.each do |host|
148
+ host.merge!(user: @user, password: @password) if !host[:user] && @user
149
+ host.merge!(path: @path) if !host[:path] && @path
150
+ end
151
+
152
+ {
153
+ hosts: hosts
154
+ }
155
+ end
156
+
157
+ def start
158
+ super
159
+
160
+ timer_execute(:in_elasticsearch_timer, @interval, repeat: @repeat, &method(:run))
161
+ end
162
+
163
+ # once fluent v0.14 is released we might be able to use
164
+ # Fluent::Parser::TimeParser, but it doesn't quite do what we want - if gives
165
+ # [sec,nsec] where as we want something we can call `strftime` on...
166
+ def create_time_parser
167
+ if @timestamp_key_format
168
+ begin
169
+ # Strptime doesn't support all formats, but for those it does it's
170
+ # blazingly fast.
171
+ strptime = Strptime.new(@timestamp_key_format)
172
+ Proc.new { |value|
173
+ value = convert_numeric_time_into_string(value, @timestamp_key_format) if value.is_a?(Numeric)
174
+ strptime.exec(value).to_time
175
+ }
176
+ rescue
177
+ # Can happen if Strptime doesn't recognize the format; or
178
+ # if strptime couldn't be required (because it's not installed -- it's
179
+ # ruby 2 only)
180
+ Proc.new { |value|
181
+ value = convert_numeric_time_into_string(value, @timestamp_key_format) if value.is_a?(Numeric)
182
+ DateTime.strptime(value, @timestamp_key_format).to_time
183
+ }
184
+ end
185
+ else
186
+ Proc.new { |value|
187
+ value = convert_numeric_time_into_string(value) if value.is_a?(Numeric)
188
+ DateTime.parse(value).to_time
189
+ }
190
+ end
191
+ end
192
+
193
+ def convert_numeric_time_into_string(numeric_time, timestamp_key_format = "%Y-%m-%dT%H:%M:%S.%N%z")
194
+ numeric_time_parser = Fluent::NumericTimeParser.new(:float)
195
+ Time.at(numeric_time_parser.parse(numeric_time).to_r).strftime(timestamp_key_format)
196
+ end
197
+
198
+ def parse_time(value, event_time, tag)
199
+ @timestamp_parser.call(value)
200
+ rescue => e
201
+ router.emit_error_event(@timestamp_parse_error_tag, Fluent::Engine.now, {'tag' => tag, 'time' => event_time, 'format' => @timestamp_key_format, 'value' => value}, e)
202
+ return Time.at(event_time).to_time
203
+ end
204
+
205
+ def client(host = nil)
206
+ # check here to see if we already have a client connection for the given host
207
+ connection_options = get_connection_options(host)
208
+
209
+ @_es = nil unless is_existing_connection(connection_options[:hosts])
210
+
211
+ @_es ||= begin
212
+ @current_config = connection_options[:hosts].clone
213
+ adapter_conf = lambda {|f| f.adapter @http_backend, @backend_options }
214
+ local_reload_connections = @reload_connections
215
+ if local_reload_connections && @reload_after > DEFAULT_RELOAD_AFTER
216
+ local_reload_connections = @reload_after
217
+ end
218
+
219
+ headers = { 'Content-Type' => "application/json" }.merge(@custom_headers)
220
+
221
+ transport = Elasticsearch::Transport::Transport::HTTP::Faraday.new(
222
+ connection_options.merge(
223
+ options: {
224
+ reload_connections: local_reload_connections,
225
+ reload_on_failure: @reload_on_failure,
226
+ resurrect_after: @resurrect_after,
227
+ logger: @transport_logger,
228
+ transport_options: {
229
+ headers: headers,
230
+ request: { timeout: @request_timeout },
231
+ ssl: { verify: @ssl_verify, ca_file: @ca_file, version: @ssl_version }
232
+ },
233
+ http: {
234
+ user: @user,
235
+ password: @password
236
+ },
237
+ sniffer_class: @sniffer_class,
238
+ }), &adapter_conf)
239
+ Elasticsearch::Client.new transport: transport
240
+ end
241
+ end
242
+
243
+ def is_existing_connection(host)
244
+ # check if the host provided match the current connection
245
+ return false if @_es.nil?
246
+ return false if @current_config.nil?
247
+ return false if host.length != @current_config.length
248
+
249
+ for i in 0...host.length
250
+ if !host[i][:host].eql? @current_config[i][:host] || host[i][:port] != @current_config[i][:port]
251
+ return false
252
+ end
253
+ end
254
+
255
+ return true
256
+ end
257
+
258
+ def run
259
+ return run_slice if @num_slices <= 1
260
+
261
+ log.warn("Large slice number is specified:(#{@num_slices}). Consider reducing num_slices") if @num_slices > 8
262
+
263
+ @num_slices.times.map do |slice_id|
264
+ thread_create(:"in_elasticsearch_thread_#{slice_id}") do
265
+ run_slice(slice_id)
266
+ end
267
+ end
268
+ end
269
+
270
+ def run_slice(slice_id=nil)
271
+ slice_query = @base_query
272
+ slice_query = slice_query.merge('slice' => { 'id' => slice_id, 'max' => @num_slices}) unless slice_id.nil?
273
+ result = client.search(@options.merge(:body => Yajl.dump(slice_query) ))
274
+ es = Fluent::MultiEventStream.new
275
+
276
+ result["hits"]["hits"].each {|hit| process_events(hit, es)}
277
+ has_hits = result['hits']['hits'].any?
278
+ scroll_id = result['_scroll_id']
279
+
280
+ while has_hits && scroll_id
281
+ result = process_next_scroll_request(es, scroll_id)
282
+ has_hits = result['has_hits']
283
+ scroll_id = result['_scroll_id']
284
+ end
285
+
286
+ router.emit_stream(@tag, es)
287
+ client.clear_scroll(scroll_id: scroll_id) if scroll_id
288
+ end
289
+
290
+ def process_scroll_request(scroll_id)
291
+ client.scroll(:body => { :scroll_id => scroll_id }, :scroll => @scroll)
292
+ end
293
+
294
+ def process_next_scroll_request(es, scroll_id)
295
+ result = process_scroll_request(scroll_id)
296
+ result['hits']['hits'].each { |hit| process_events(hit, es) }
297
+ {'has_hits' => result['hits']['hits'].any?, '_scroll_id' => result['_scroll_id']}
298
+ end
299
+
300
+ def process_events(hit, es)
301
+ event = hit["_source"]
302
+ time = Fluent::Engine.now
303
+ if @parse_timestamp
304
+ if event.has_key?(TIMESTAMP_FIELD)
305
+ rts = event[TIMESTAMP_FIELD]
306
+ time = parse_time(rts, time, @tag)
307
+ end
308
+ end
309
+ if @docinfo
310
+ docinfo_target = event[@docinfo_target] || {}
311
+
312
+ unless docinfo_target.is_a?(Hash)
313
+ raise UnrecoverableError, "incompatible type for the docinfo_target=#{@docinfo_target} field in the `_source` document, expected a hash got:", :type => docinfo_target.class, :event => event
314
+ end
315
+
316
+ @docinfo_fields.each do |field|
317
+ docinfo_target[field] = hit[field]
318
+ end
319
+
320
+ event[@docinfo_target] = docinfo_target
321
+ end
322
+ es.add(time, event)
323
+ end
324
+ end
325
+ end
@@ -0,0 +1,22 @@
1
+ require 'oj'
2
+
3
+ module Fluent::Plugin
4
+ module Serializer
5
+
6
+ class Oj
7
+ include Elasticsearch::Transport::Transport::Serializer::Base
8
+
9
+ # De-serialize a Hash from JSON string
10
+ #
11
+ def load(string, options={})
12
+ ::Oj.load(string, options)
13
+ end
14
+
15
+ # Serialize a Hash to JSON string
16
+ #
17
+ def dump(object, options={})
18
+ ::Oj.dump(object, options)
19
+ end
20
+ end
21
+ end
22
+ end