fluent-plugin-elasticsearch-dext 5.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. checksums.yaml +7 -0
  2. data/.coveralls.yml +2 -0
  3. data/.editorconfig +9 -0
  4. data/.github/ISSUE_TEMPLATE/bug_report.md +37 -0
  5. data/.github/ISSUE_TEMPLATE/feature_request.md +24 -0
  6. data/.github/workflows/issue-auto-closer.yml +12 -0
  7. data/.github/workflows/linux.yml +26 -0
  8. data/.github/workflows/macos.yml +26 -0
  9. data/.github/workflows/windows.yml +26 -0
  10. data/.gitignore +18 -0
  11. data/.travis.yml +40 -0
  12. data/CONTRIBUTING.md +24 -0
  13. data/Gemfile +11 -0
  14. data/History.md +553 -0
  15. data/ISSUE_TEMPLATE.md +30 -0
  16. data/LICENSE.txt +201 -0
  17. data/PULL_REQUEST_TEMPLATE.md +10 -0
  18. data/README.ElasticsearchGenID.md +116 -0
  19. data/README.ElasticsearchInput.md +293 -0
  20. data/README.Troubleshooting.md +601 -0
  21. data/README.md +1467 -0
  22. data/Rakefile +11 -0
  23. data/appveyor.yml +20 -0
  24. data/fluent-plugin-elasticsearch.gemspec +35 -0
  25. data/gemfiles/Gemfile.elasticsearch.v6 +12 -0
  26. data/lib/fluent/log-ext.rb +38 -0
  27. data/lib/fluent/plugin/default-ilm-policy.json +14 -0
  28. data/lib/fluent/plugin/elasticsearch_constants.rb +13 -0
  29. data/lib/fluent/plugin/elasticsearch_error.rb +5 -0
  30. data/lib/fluent/plugin/elasticsearch_error_handler.rb +129 -0
  31. data/lib/fluent/plugin/elasticsearch_fallback_selector.rb +9 -0
  32. data/lib/fluent/plugin/elasticsearch_index_lifecycle_management.rb +67 -0
  33. data/lib/fluent/plugin/elasticsearch_index_template.rb +211 -0
  34. data/lib/fluent/plugin/elasticsearch_simple_sniffer.rb +10 -0
  35. data/lib/fluent/plugin/elasticsearch_tls.rb +70 -0
  36. data/lib/fluent/plugin/filter_elasticsearch_genid.rb +77 -0
  37. data/lib/fluent/plugin/in_elasticsearch.rb +325 -0
  38. data/lib/fluent/plugin/oj_serializer.rb +22 -0
  39. data/lib/fluent/plugin/out_elasticsearch.rb +1108 -0
  40. data/lib/fluent/plugin/out_elasticsearch_data_stream.rb +218 -0
  41. data/lib/fluent/plugin/out_elasticsearch_dynamic.rb +282 -0
  42. data/test/helper.rb +24 -0
  43. data/test/plugin/test_alias_template.json +9 -0
  44. data/test/plugin/test_elasticsearch_error_handler.rb +646 -0
  45. data/test/plugin/test_elasticsearch_fallback_selector.rb +74 -0
  46. data/test/plugin/test_elasticsearch_index_lifecycle_management.rb +66 -0
  47. data/test/plugin/test_elasticsearch_tls.rb +145 -0
  48. data/test/plugin/test_filter_elasticsearch_genid.rb +215 -0
  49. data/test/plugin/test_in_elasticsearch.rb +459 -0
  50. data/test/plugin/test_index_alias_template.json +11 -0
  51. data/test/plugin/test_index_template.json +25 -0
  52. data/test/plugin/test_oj_serializer.rb +19 -0
  53. data/test/plugin/test_out_elasticsearch.rb +5688 -0
  54. data/test/plugin/test_out_elasticsearch_data_stream.rb +337 -0
  55. data/test/plugin/test_out_elasticsearch_dynamic.rb +1134 -0
  56. data/test/plugin/test_template.json +23 -0
  57. data/test/test_log-ext.rb +35 -0
  58. metadata +236 -0
@@ -0,0 +1,218 @@
1
+ require_relative 'out_elasticsearch'
2
+
3
+ module Fluent::Plugin
4
+ class ElasticsearchOutputDataStream < ElasticsearchOutput
5
+
6
+ Fluent::Plugin.register_output('elasticsearch_data_stream', self)
7
+
8
+ helpers :event_emitter
9
+
10
+ config_param :data_stream_name, :string
11
+ # Elasticsearch 7.9 or later always support new style of index template.
12
+ config_set_default :use_legacy_template, false
13
+
14
+ INVALID_START_CHRACTERS = ["-", "_", "+", "."]
15
+ INVALID_CHARACTERS = ["\\", "/", "*", "?", "\"", "<", ">", "|", " ", ",", "#", ":"]
16
+
17
+ def configure(conf)
18
+ super
19
+
20
+ begin
21
+ require 'elasticsearch/api'
22
+ require 'elasticsearch/xpack'
23
+ rescue LoadError
24
+ raise Fluent::ConfigError, "'elasticsearch/api', 'elasticsearch/xpack' are required for <@elasticsearch_data_stream>."
25
+ end
26
+
27
+ # ref. https://www.elastic.co/guide/en/elasticsearch/reference/master/indices-create-data-stream.html
28
+ unless placeholder?(:data_stream_name_placeholder, @data_stream_name)
29
+ validate_data_stream_name
30
+ else
31
+ @use_placeholder = true
32
+ @data_stream_names = []
33
+ end
34
+
35
+ @client = client
36
+ unless @use_placeholder
37
+ begin
38
+ @data_stream_names = [@data_stream_name]
39
+ create_ilm_policy(@data_stream_name)
40
+ create_index_template(@data_stream_name)
41
+ create_data_stream(@data_stream_name)
42
+ rescue => e
43
+ raise Fluent::ConfigError, "Failed to create data stream: <#{@data_stream_name}> #{e.message}"
44
+ end
45
+ end
46
+ end
47
+
48
+ def validate_data_stream_name
49
+ unless valid_data_stream_name?
50
+ unless start_with_valid_characters?
51
+ if not_dots?
52
+ raise Fluent::ConfigError, "'data_stream_name' must not start with #{INVALID_START_CHRACTERS.join(",")}: <#{@data_stream_name}>"
53
+ else
54
+ raise Fluent::ConfigError, "'data_stream_name' must not be . or ..: <#{@data_stream_name}>"
55
+ end
56
+ end
57
+ unless valid_characters?
58
+ raise Fluent::ConfigError, "'data_stream_name' must not contain invalid characters #{INVALID_CHARACTERS.join(",")}: <#{@data_stream_name}>"
59
+ end
60
+ unless lowercase_only?
61
+ raise Fluent::ConfigError, "'data_stream_name' must be lowercase only: <#{@data_stream_name}>"
62
+ end
63
+ if @data_stream_name.bytes.size > 255
64
+ raise Fluent::ConfigError, "'data_stream_name' must not be longer than 255 bytes: <#{@data_stream_name}>"
65
+ end
66
+ end
67
+ end
68
+
69
+ def create_ilm_policy(name)
70
+ return if data_stream_exist?(name)
71
+ params = {
72
+ policy_id: "#{name}_policy",
73
+ body: File.read(File.join(File.dirname(__FILE__), "default-ilm-policy.json"))
74
+ }
75
+ retry_operate(@max_retry_putting_template,
76
+ @fail_on_putting_template_retry_exceed,
77
+ @catch_transport_exception_on_retry) do
78
+ @client.xpack.ilm.put_policy(params)
79
+ end
80
+ end
81
+
82
+ def create_index_template(name)
83
+ return if data_stream_exist?(name)
84
+ body = {
85
+ "index_patterns" => ["#{name}*"],
86
+ "data_stream" => {},
87
+ "template" => {
88
+ "settings" => {
89
+ "index.lifecycle.name" => "#{name}_policy"
90
+ }
91
+ }
92
+ }
93
+ params = {
94
+ name: name,
95
+ body: body
96
+ }
97
+ retry_operate(@max_retry_putting_template,
98
+ @fail_on_putting_template_retry_exceed,
99
+ @catch_transport_exception_on_retry) do
100
+ @client.indices.put_index_template(params)
101
+ end
102
+ end
103
+
104
+ def data_stream_exist?(name)
105
+ params = {
106
+ "name": name
107
+ }
108
+ begin
109
+ response = @client.indices.get_data_stream(params)
110
+ return (not response.is_a?(Elasticsearch::Transport::Transport::Errors::NotFound))
111
+ rescue Elasticsearch::Transport::Transport::Errors::NotFound => e
112
+ log.info "Specified data stream does not exist. Will be created: <#{e}>"
113
+ return false
114
+ end
115
+ end
116
+
117
+ def create_data_stream(name)
118
+ return if data_stream_exist?(name)
119
+ params = {
120
+ "name": name
121
+ }
122
+ retry_operate(@max_retry_putting_template,
123
+ @fail_on_putting_template_retry_exceed,
124
+ @catch_transport_exception_on_retry) do
125
+ @client.indices.create_data_stream(params)
126
+ end
127
+ end
128
+
129
+ def valid_data_stream_name?
130
+ lowercase_only? and
131
+ valid_characters? and
132
+ start_with_valid_characters? and
133
+ not_dots? and
134
+ @data_stream_name.bytes.size <= 255
135
+ end
136
+
137
+ def lowercase_only?
138
+ @data_stream_name.downcase == @data_stream_name
139
+ end
140
+
141
+ def valid_characters?
142
+ not (INVALID_CHARACTERS.each.any? do |v| @data_stream_name.include?(v) end)
143
+ end
144
+
145
+ def start_with_valid_characters?
146
+ not (INVALID_START_CHRACTERS.each.any? do |v| @data_stream_name.start_with?(v) end)
147
+ end
148
+
149
+ def not_dots?
150
+ not (@data_stream_name == "." or @data_stream_name == "..")
151
+ end
152
+
153
+ def client_library_version
154
+ Elasticsearch::VERSION
155
+ end
156
+
157
+ def multi_workers_ready?
158
+ true
159
+ end
160
+
161
+ def write(chunk)
162
+ data_stream_name = @data_stream_name
163
+ if @use_placeholder
164
+ data_stream_name = extract_placeholders(@data_stream_name, chunk)
165
+ unless @data_stream_names.include?(data_stream_name)
166
+ begin
167
+ create_ilm_policy(data_stream_name)
168
+ create_index_template(data_stream_name)
169
+ create_data_stream(data_stream_name)
170
+ @data_stream_names << data_stream_name
171
+ rescue => e
172
+ raise Fluent::ConfigError, "Failed to create data stream: <#{data_stream_name}> #{e.message}"
173
+ end
174
+ end
175
+ end
176
+
177
+ bulk_message = ""
178
+ headers = {
179
+ CREATE_OP => {}
180
+ }
181
+ tag = chunk.metadata.tag
182
+ chunk.msgpack_each do |time, record|
183
+ next unless record.is_a? Hash
184
+
185
+ begin
186
+ record.merge!({"@timestamp" => Time.at(time).iso8601(@time_precision)})
187
+ bulk_message = append_record_to_messages(CREATE_OP, {}, headers, record, bulk_message)
188
+ rescue => e
189
+ router.emit_error_event(tag, time, record, e)
190
+ end
191
+ end
192
+
193
+ params = {
194
+ index: data_stream_name,
195
+ body: bulk_message
196
+ }
197
+ begin
198
+ response = @client.bulk(params)
199
+ if response['errors']
200
+ log.error "Could not bulk insert to Data Stream: #{data_stream_name} #{response}"
201
+ end
202
+ rescue => e
203
+ log.error "Could not bulk insert to Data Stream: #{data_stream_name} #{e.message}"
204
+ end
205
+ end
206
+
207
+ def append_record_to_messages(op, meta, header, record, msgs)
208
+ header[CREATE_OP] = meta
209
+ msgs << @dump_proc.call(header) << BODY_DELIMITER
210
+ msgs << @dump_proc.call(record) << BODY_DELIMITER
211
+ msgs
212
+ end
213
+
214
+ def retry_stream_retryable?
215
+ @buffer.storable?
216
+ end
217
+ end
218
+ end
@@ -0,0 +1,282 @@
1
+ # encoding: UTF-8
2
+ require_relative 'out_elasticsearch'
3
+
4
+ module Fluent::Plugin
5
+ class ElasticsearchOutputDynamic < ElasticsearchOutput
6
+
7
+ Fluent::Plugin.register_output('elasticsearch_dynamic', self)
8
+
9
+ helpers :event_emitter
10
+
11
+ config_param :delimiter, :string, :default => "."
12
+
13
+ DYNAMIC_PARAM_NAMES = %W[hosts host port include_timestamp logstash_format logstash_prefix logstash_dateformat time_key utc_index index_name tag_key type_name id_key parent_key routing_key write_operation]
14
+ DYNAMIC_PARAM_SYMBOLS = DYNAMIC_PARAM_NAMES.map { |n| "@#{n}".to_sym }
15
+
16
+ RequestInfo = Struct.new(:host, :index)
17
+
18
+ attr_reader :dynamic_config
19
+
20
+ def configure(conf)
21
+ super
22
+
23
+ # evaluate all configurations here
24
+ @dynamic_config = {}
25
+ DYNAMIC_PARAM_SYMBOLS.each_with_index { |var, i|
26
+ value = expand_param(self.instance_variable_get(var), nil, nil, nil)
27
+ key = DYNAMIC_PARAM_NAMES[i]
28
+ @dynamic_config[key] = value.to_s
29
+ }
30
+ # end eval all configs
31
+ end
32
+
33
+ def create_meta_config_map
34
+ {'id_key' => '_id', 'parent_key' => '_parent', 'routing_key' => @routing_key_name}
35
+ end
36
+
37
+
38
+ def client(host = nil, compress_connection = false)
39
+ # check here to see if we already have a client connection for the given host
40
+ connection_options = get_connection_options(host)
41
+
42
+ @_es = nil unless is_existing_connection(connection_options[:hosts])
43
+ @_es = nil unless @compressable_connection == compress_connection
44
+
45
+ @_es ||= begin
46
+ @compressable_connection = compress_connection
47
+ @current_config = connection_options[:hosts].clone
48
+ adapter_conf = lambda {|f| f.adapter @http_backend, @backend_options }
49
+ gzip_headers = if compress_connection
50
+ {'Content-Encoding' => 'gzip'}
51
+ else
52
+ {}
53
+ end
54
+ headers = { 'Content-Type' => @content_type.to_s, }.merge(gzip_headers)
55
+ ssl_options = { verify: @ssl_verify, ca_file: @ca_file}.merge(@ssl_version_options)
56
+ transport = Elasticsearch::Transport::Transport::HTTP::Faraday.new(connection_options.merge(
57
+ options: {
58
+ reload_connections: @reload_connections,
59
+ reload_on_failure: @reload_on_failure,
60
+ resurrect_after: @resurrect_after,
61
+ logger: @transport_logger,
62
+ transport_options: {
63
+ headers: headers,
64
+ request: { timeout: @request_timeout },
65
+ ssl: ssl_options,
66
+ },
67
+ http: {
68
+ user: @user,
69
+ password: @password,
70
+ scheme: @scheme
71
+ },
72
+ compression: compress_connection,
73
+ }), &adapter_conf)
74
+ Elasticsearch::Client.new transport: transport
75
+ end
76
+ end
77
+
78
+ def get_connection_options(con_host)
79
+ raise "`password` must be present if `user` is present" if @user && !@password
80
+
81
+ hosts = if con_host || @hosts
82
+ (con_host || @hosts).split(',').map do |host_str|
83
+ # Support legacy hosts format host:port,host:port,host:port...
84
+ if host_str.match(%r{^[^:]+(\:\d+)?$})
85
+ {
86
+ host: host_str.split(':')[0],
87
+ port: (host_str.split(':')[1] || @port).to_i,
88
+ scheme: @scheme.to_s
89
+ }
90
+ else
91
+ # New hosts format expects URLs such as http://logs.foo.com,https://john:pass@logs2.foo.com/elastic
92
+ uri = URI(get_escaped_userinfo(host_str))
93
+ %w(user password path).inject(host: uri.host, port: uri.port, scheme: uri.scheme) do |hash, key|
94
+ hash[key.to_sym] = uri.public_send(key) unless uri.public_send(key).nil? || uri.public_send(key) == ''
95
+ hash
96
+ end
97
+ end
98
+ end.compact
99
+ else
100
+ [{host: @host, port: @port.to_i, scheme: @scheme.to_s}]
101
+ end.each do |host|
102
+ host.merge!(user: @user, password: @password) if !host[:user] && @user
103
+ host.merge!(path: @path) if !host[:path] && @path
104
+ end
105
+
106
+ {
107
+ hosts: hosts
108
+ }
109
+ end
110
+
111
+ def connection_options_description(host)
112
+ get_connection_options(host)[:hosts].map do |host_info|
113
+ attributes = host_info.dup
114
+ attributes[:password] = 'obfuscated' if attributes.has_key?(:password)
115
+ attributes.inspect
116
+ end.join(', ')
117
+ end
118
+
119
+ def multi_workers_ready?
120
+ true
121
+ end
122
+
123
+ def write(chunk)
124
+ bulk_message = Hash.new { |h,k| h[k] = '' }
125
+ dynamic_conf = @dynamic_config.clone
126
+
127
+ headers = {
128
+ UPDATE_OP => {},
129
+ UPSERT_OP => {},
130
+ CREATE_OP => {},
131
+ INDEX_OP => {}
132
+ }
133
+
134
+ tag = chunk.metadata.tag
135
+
136
+ chunk.msgpack_each do |time, record|
137
+ next unless record.is_a? Hash
138
+
139
+ if @flatten_hashes
140
+ record = flatten_record(record)
141
+ end
142
+
143
+ begin
144
+ # evaluate all configurations here
145
+ DYNAMIC_PARAM_SYMBOLS.each_with_index { |var, i|
146
+ k = DYNAMIC_PARAM_NAMES[i]
147
+ v = self.instance_variable_get(var)
148
+ # check here to determine if we should evaluate
149
+ if dynamic_conf[k] != v
150
+ value = expand_param(v, tag, time, record)
151
+ dynamic_conf[k] = value
152
+ end
153
+ }
154
+ # end eval all configs
155
+ rescue => e
156
+ # handle dynamic parameters misconfigurations
157
+ router.emit_error_event(tag, time, record, e)
158
+ next
159
+ end
160
+
161
+ if eval_or_val(dynamic_conf['logstash_format']) || eval_or_val(dynamic_conf['include_timestamp'])
162
+ if record.has_key?("@timestamp")
163
+ time = Time.parse record["@timestamp"]
164
+ elsif record.has_key?(dynamic_conf['time_key'])
165
+ time = Time.parse record[dynamic_conf['time_key']]
166
+ record['@timestamp'] = record[dynamic_conf['time_key']] unless time_key_exclude_timestamp
167
+ else
168
+ record.merge!({"@timestamp" => Time.at(time).iso8601(@time_precision)})
169
+ end
170
+ end
171
+
172
+ if eval_or_val(dynamic_conf['logstash_format'])
173
+ if eval_or_val(dynamic_conf['utc_index'])
174
+ target_index = "#{dynamic_conf['logstash_prefix']}#{@logstash_prefix_separator}#{Time.at(time).getutc.strftime("#{dynamic_conf['logstash_dateformat']}")}"
175
+ else
176
+ target_index = "#{dynamic_conf['logstash_prefix']}#{@logstash_prefix_separator}#{Time.at(time).strftime("#{dynamic_conf['logstash_dateformat']}")}"
177
+ end
178
+ else
179
+ target_index = dynamic_conf['index_name']
180
+ end
181
+
182
+ # Change target_index to lower-case since Elasticsearch doesn't
183
+ # allow upper-case characters in index names.
184
+ target_index = target_index.downcase
185
+
186
+ if @include_tag_key
187
+ record.merge!(dynamic_conf['tag_key'] => tag)
188
+ end
189
+
190
+ if dynamic_conf['hosts']
191
+ host = dynamic_conf['hosts']
192
+ else
193
+ host = "#{dynamic_conf['host']}:#{dynamic_conf['port']}"
194
+ end
195
+
196
+ if @include_index_in_url
197
+ key = RequestInfo.new(host, target_index)
198
+ meta = {"_type" => dynamic_conf['type_name']}
199
+ else
200
+ key = RequestInfo.new(host, nil)
201
+ meta = {"_index" => target_index, "_type" => dynamic_conf['type_name']}
202
+ end
203
+
204
+ @meta_config_map.each_pair do |config_name, meta_key|
205
+ if dynamic_conf[config_name] && accessor = record_accessor_create(dynamic_conf[config_name])
206
+ if raw_value = accessor.call(record)
207
+ meta[meta_key] = raw_value
208
+ end
209
+ end
210
+ end
211
+
212
+ if @remove_keys
213
+ @remove_keys.each { |key| record.delete(key) }
214
+ end
215
+
216
+ write_op = dynamic_conf["write_operation"]
217
+ append_record_to_messages(write_op, meta, headers[write_op], record, bulk_message[key])
218
+ end
219
+
220
+ bulk_message.each do |info, msgs|
221
+ send_bulk(msgs, info.host, info.index) unless msgs.empty?
222
+ msgs.clear
223
+ end
224
+ end
225
+
226
+ def send_bulk(data, host, index)
227
+ begin
228
+ prepared_data = if compression
229
+ gzip(data)
230
+ else
231
+ data
232
+ end
233
+ response = client(host, compression).bulk body: prepared_data, index: index
234
+ if response['errors']
235
+ log.error "Could not push log to Elasticsearch: #{response}"
236
+ end
237
+ rescue => e
238
+ @_es = nil if @reconnect_on_error
239
+ # FIXME: identify unrecoverable errors and raise UnrecoverableRequestFailure instead
240
+ raise RecoverableRequestFailure, "could not push logs to Elasticsearch cluster (#{connection_options_description(host)}): #{e.message}"
241
+ end
242
+ end
243
+
244
+ def eval_or_val(var)
245
+ return var unless var.is_a?(String)
246
+ eval(var)
247
+ end
248
+
249
+ def expand_param(param, tag, time, record)
250
+ # check for '${ ... }'
251
+ # yes => `eval`
252
+ # no => return param
253
+ return param if (param.to_s =~ /\${.+}/).nil?
254
+
255
+ # check for 'tag_parts[]'
256
+ # separated by a delimiter (default '.')
257
+ tag_parts = tag.split(@delimiter) unless (param =~ /tag_parts\[.+\]/).nil? || tag.nil?
258
+
259
+ # pull out section between ${} then eval
260
+ inner = param.clone
261
+ while inner.match(/\${.+}/)
262
+ to_eval = inner.match(/\${(.+?)}/){$1}
263
+
264
+ if !(to_eval =~ /record\[.+\]/).nil? && record.nil?
265
+ return to_eval
266
+ elsif !(to_eval =~/tag_parts\[.+\]/).nil? && tag_parts.nil?
267
+ return to_eval
268
+ elsif !(to_eval =~/time/).nil? && time.nil?
269
+ return to_eval
270
+ else
271
+ inner.sub!(/\${.+?}/, eval( to_eval ))
272
+ end
273
+ end
274
+ inner
275
+ end
276
+
277
+ def is_valid_expand_param_type(param)
278
+ return false if [:@buffer_type].include?(param)
279
+ return self.instance_variable_get(param).is_a?(String)
280
+ end
281
+ end
282
+ end