fluent-plugin-elasticsearch 4.3.3 → 5.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3,7 +3,7 @@ $:.push File.expand_path('../lib', __FILE__)
3
3
 
4
4
  Gem::Specification.new do |s|
5
5
  s.name = 'fluent-plugin-elasticsearch'
6
- s.version = '4.3.3'
6
+ s.version = '5.0.0'
7
7
  s.authors = ['diogo', 'pitr', 'Hiroshi Hatake']
8
8
  s.email = ['pitr.vern@gmail.com', 'me@diogoterror.com', 'cosmo0920.wp@gmail.com']
9
9
  s.description = %q{Elasticsearch output plugin for Fluent event collector}
@@ -0,0 +1,215 @@
1
+ require_relative 'out_elasticsearch'
2
+
3
+ module Fluent::Plugin
4
+ class ElasticsearchOutputDataStream < ElasticsearchOutput
5
+
6
+ Fluent::Plugin.register_output('elasticsearch_data_stream', self)
7
+
8
+ helpers :event_emitter
9
+
10
+ config_param :data_stream_name, :string
11
+ # Elasticsearch 7.9 or later always support new style of index template.
12
+ config_set_default :use_legacy_template, false
13
+
14
+ INVALID_START_CHRACTERS = ["-", "_", "+", "."]
15
+ INVALID_CHARACTERS = ["\\", "/", "*", "?", "\"", "<", ">", "|", " ", ",", "#", ":"]
16
+
17
+ def configure(conf)
18
+ super
19
+
20
+ begin
21
+ require 'elasticsearch/xpack'
22
+ rescue LoadError
23
+ raise Fluent::ConfigError, "'elasticsearch/xpack'' is required for <@elasticsearch_data_stream>."
24
+ end
25
+
26
+ # ref. https://www.elastic.co/guide/en/elasticsearch/reference/master/indices-create-data-stream.html
27
+ unless placeholder?(:data_stream_name_placeholder, @data_stream_name)
28
+ validate_data_stream_name
29
+ else
30
+ @use_placeholder = true
31
+ @data_stream_names = []
32
+ end
33
+
34
+ @client = client
35
+ unless @use_placeholder
36
+ begin
37
+ @data_stream_names = [@data_stream_name]
38
+ create_ilm_policy(@data_stream_name)
39
+ create_index_template(@data_stream_name)
40
+ create_data_stream(@data_stream_name)
41
+ rescue => e
42
+ raise Fluent::ConfigError, "Failed to create data stream: <#{@data_stream_name}> #{e.message}"
43
+ end
44
+ end
45
+ end
46
+
47
+ def validate_data_stream_name
48
+ unless valid_data_stream_name?
49
+ unless start_with_valid_characters?
50
+ if not_dots?
51
+ raise Fluent::ConfigError, "'data_stream_name' must not start with #{INVALID_START_CHRACTERS.join(",")}: <#{@data_stream_name}>"
52
+ else
53
+ raise Fluent::ConfigError, "'data_stream_name' must not be . or ..: <#{@data_stream_name}>"
54
+ end
55
+ end
56
+ unless valid_characters?
57
+ raise Fluent::ConfigError, "'data_stream_name' must not contain invalid characters #{INVALID_CHARACTERS.join(",")}: <#{@data_stream_name}>"
58
+ end
59
+ unless lowercase_only?
60
+ raise Fluent::ConfigError, "'data_stream_name' must be lowercase only: <#{@data_stream_name}>"
61
+ end
62
+ if @data_stream_name.bytes.size > 255
63
+ raise Fluent::ConfigError, "'data_stream_name' must not be longer than 255 bytes: <#{@data_stream_name}>"
64
+ end
65
+ end
66
+ end
67
+
68
+ def create_ilm_policy(name)
69
+ params = {
70
+ policy_id: "#{name}_policy",
71
+ body: File.read(File.join(File.dirname(__FILE__), "default-ilm-policy.json"))
72
+ }
73
+ retry_operate(@max_retry_putting_template,
74
+ @fail_on_putting_template_retry_exceed,
75
+ @catch_transport_exception_on_retry) do
76
+ @client.xpack.ilm.put_policy(params)
77
+ end
78
+ end
79
+
80
+ def create_index_template(name)
81
+ body = {
82
+ "index_patterns" => ["#{name}*"],
83
+ "data_stream" => {},
84
+ "template" => {
85
+ "settings" => {
86
+ "index.lifecycle.name" => "#{name}_policy"
87
+ }
88
+ }
89
+ }
90
+ params = {
91
+ name: name,
92
+ body: body
93
+ }
94
+ retry_operate(@max_retry_putting_template,
95
+ @fail_on_putting_template_retry_exceed,
96
+ @catch_transport_exception_on_retry) do
97
+ @client.indices.put_index_template(params)
98
+ end
99
+ end
100
+
101
+ def data_stream_exist?(name)
102
+ params = {
103
+ "name": name
104
+ }
105
+ begin
106
+ response = @client.indices.get_data_stream(params)
107
+ return (not response.is_a?(Elasticsearch::Transport::Transport::Errors::NotFound))
108
+ rescue Elasticsearch::Transport::Transport::Errors::NotFound => e
109
+ log.info "Specified data stream does not exist. Will be created: <#{e}>"
110
+ return false
111
+ end
112
+ end
113
+
114
+ def create_data_stream(name)
115
+ return if data_stream_exist?(name)
116
+ params = {
117
+ "name": name
118
+ }
119
+ retry_operate(@max_retry_putting_template,
120
+ @fail_on_putting_template_retry_exceed,
121
+ @catch_transport_exception_on_retry) do
122
+ @client.indices.create_data_stream(params)
123
+ end
124
+ end
125
+
126
+ def valid_data_stream_name?
127
+ lowercase_only? and
128
+ valid_characters? and
129
+ start_with_valid_characters? and
130
+ not_dots? and
131
+ @data_stream_name.bytes.size <= 255
132
+ end
133
+
134
+ def lowercase_only?
135
+ @data_stream_name.downcase == @data_stream_name
136
+ end
137
+
138
+ def valid_characters?
139
+ not (INVALID_CHARACTERS.each.any? do |v| @data_stream_name.include?(v) end)
140
+ end
141
+
142
+ def start_with_valid_characters?
143
+ not (INVALID_START_CHRACTERS.each.any? do |v| @data_stream_name.start_with?(v) end)
144
+ end
145
+
146
+ def not_dots?
147
+ not (@data_stream_name == "." or @data_stream_name == "..")
148
+ end
149
+
150
+ def client_library_version
151
+ Elasticsearch::VERSION
152
+ end
153
+
154
+ def multi_workers_ready?
155
+ true
156
+ end
157
+
158
+ def write(chunk)
159
+ data_stream_name = @data_stream_name
160
+ if @use_placeholder
161
+ data_stream_name = extract_placeholders(@data_stream_name, chunk)
162
+ unless @data_stream_names.include?(data_stream_name)
163
+ begin
164
+ create_ilm_policy(data_stream_name)
165
+ create_index_template(data_stream_name)
166
+ create_data_stream(data_stream_name)
167
+ @data_stream_names << data_stream_name
168
+ rescue => e
169
+ raise Fluent::ConfigError, "Failed to create data stream: <#{data_stream_name}> #{e.message}"
170
+ end
171
+ end
172
+ end
173
+
174
+ bulk_message = ""
175
+ headers = {
176
+ CREATE_OP => {}
177
+ }
178
+ tag = chunk.metadata.tag
179
+ chunk.msgpack_each do |time, record|
180
+ next unless record.is_a? Hash
181
+
182
+ begin
183
+ record.merge!({"@timestamp" => Time.at(time).iso8601(@time_precision)})
184
+ bulk_message = append_record_to_messages(CREATE_OP, {}, headers, record, bulk_message)
185
+ rescue => e
186
+ router.emit_error_event(tag, time, record, e)
187
+ end
188
+ end
189
+
190
+ params = {
191
+ index: data_stream_name,
192
+ body: bulk_message
193
+ }
194
+ begin
195
+ response = @client.bulk(params)
196
+ if response['errors']
197
+ log.error "Could not bulk insert to Data Stream: #{data_stream_name} #{response}"
198
+ end
199
+ rescue => e
200
+ log.error "Could not bulk insert to Data Stream: #{data_stream_name} #{e.message}"
201
+ end
202
+ end
203
+
204
+ def append_record_to_messages(op, meta, header, record, msgs)
205
+ header[CREATE_OP] = meta
206
+ msgs << @dump_proc.call(header) << BODY_DELIMITER
207
+ msgs << @dump_proc.call(record) << BODY_DELIMITER
208
+ msgs
209
+ end
210
+
211
+ def retry_stream_retryable?
212
+ @buffer.storable?
213
+ end
214
+ end
215
+ end
@@ -0,0 +1,337 @@
1
+ require_relative '../helper'
2
+ require 'date'
3
+ require 'fluent/test/helpers'
4
+ require 'fluent/test/driver/output'
5
+ require 'flexmock/test_unit'
6
+ require 'fluent/plugin/out_elasticsearch_data_stream'
7
+
8
+ class ElasticsearchOutputDataStreamTest < Test::Unit::TestCase
9
+ include FlexMock::TestCase
10
+ include Fluent::Test::Helpers
11
+
12
+ attr_accessor :bulk_records
13
+
14
+ REQUIRED_ELASTIC_MESSAGE = "Elasticsearch 7.9.0 or later is needed."
15
+ ELASTIC_DATA_STREAM_TYPE = "elasticsearch_data_stream"
16
+
17
+ def setup
18
+ Fluent::Test.setup
19
+ @driver = nil
20
+ log = Fluent::Engine.log
21
+ log.out.logs.slice!(0, log.out.logs.length)
22
+ @bulk_records = 0
23
+ end
24
+
25
+ def driver(conf='', es_version=5, client_version="\"5.0\"")
26
+ # For request stub to detect compatibility.
27
+ @es_version ||= es_version
28
+ @client_version ||= client_version
29
+ Fluent::Plugin::ElasticsearchOutputDataStream.module_eval(<<-CODE)
30
+ def detect_es_major_version
31
+ #{@es_version}
32
+ end
33
+ CODE
34
+ @driver ||= Fluent::Test::Driver::Output.new(Fluent::Plugin::ElasticsearchOutputDataStream) {
35
+ # v0.12's test driver assume format definition. This simulates ObjectBufferedOutput format
36
+ if !defined?(Fluent::Plugin::Output)
37
+ def format(tag, time, record)
38
+ [time, record].to_msgpack
39
+ end
40
+ end
41
+ }.configure(conf)
42
+ end
43
+
44
+ def sample_data_stream
45
+ {
46
+ 'data_streams': [
47
+ {
48
+ 'name' => 'my-data-stream',
49
+ 'timestamp_field' => {
50
+ 'name' => '@timestamp'
51
+ }
52
+ }
53
+ ]
54
+ }
55
+ end
56
+
57
+ def sample_record
58
+ {'@timestamp' => Time.now.iso8601, 'message' => 'Sample record'}
59
+ end
60
+
61
+ RESPONSE_ACKNOWLEDGED = {"acknowledged": true}
62
+ DUPLICATED_DATA_STREAM_EXCEPTION = {"error": {}, "status": 400}
63
+ NONEXISTENT_DATA_STREAM_EXCEPTION = {"error": {}, "status": 404}
64
+
65
+ def stub_ilm_policy(name="foo")
66
+ stub_request(:put, "http://localhost:9200/_ilm/policy/#{name}_policy").to_return(:status => [200, RESPONSE_ACKNOWLEDGED])
67
+ end
68
+
69
+ def stub_index_template(name="foo")
70
+ stub_request(:put, "http://localhost:9200/_index_template/#{name}").to_return(:status => [200, RESPONSE_ACKNOWLEDGED])
71
+ end
72
+
73
+ def stub_data_stream(name="foo")
74
+ stub_request(:put, "http://localhost:9200/_data_stream/#{name}").to_return(:status => [200, RESPONSE_ACKNOWLEDGED])
75
+ end
76
+
77
+ def stub_existent_data_stream?(name="foo")
78
+ stub_request(:get, "http://localhost:9200/_data_stream/#{name}").to_return(:status => [200, RESPONSE_ACKNOWLEDGED])
79
+ end
80
+
81
+ def stub_nonexistent_data_stream?(name="foo")
82
+ stub_request(:get, "http://localhost:9200/_data_stream/#{name}").to_return(:status => [200, Elasticsearch::Transport::Transport::Errors::NotFound])
83
+ end
84
+
85
+ def stub_bulk_feed(name="foo")
86
+ stub_request(:post, "http://localhost:9200/#{name}/_bulk").with do |req|
87
+ # bulk data must be pair of OP and records
88
+ # {"create": {}}\n
89
+ # {"@timestamp": ...}
90
+ @bulk_records += req.body.split("\n").size / 2
91
+ end
92
+ end
93
+
94
+ def stub_default(name="foo")
95
+ stub_ilm_policy(name)
96
+ stub_index_template(name)
97
+ stub_existent_data_stream?(name)
98
+ stub_data_stream(name)
99
+ end
100
+
101
+ def data_stream_supported?
102
+ Gem::Version.create(::Elasticsearch::Transport::VERSION) >= Gem::Version.create("7.9.0")
103
+ end
104
+
105
+ # ref. https://www.elastic.co/guide/en/elasticsearch/reference/master/indices-create-data-stream.html
106
+ class DataStreamNameTest < self
107
+
108
+ def test_missing_data_stream_name
109
+ conf = config_element(
110
+ 'ROOT', '', {
111
+ '@type' => 'elasticsearch_datastream'
112
+ })
113
+ assert_raise Fluent::ConfigError.new("'data_stream_name' parameter is required") do
114
+ driver(conf).run
115
+ end
116
+ end
117
+
118
+ def test_invalid_uppercase
119
+ conf = config_element(
120
+ 'ROOT', '', {
121
+ '@type' => 'elasticsearch_datastream',
122
+ 'data_stream_name' => 'TEST'
123
+ })
124
+ assert_raise Fluent::ConfigError.new("'data_stream_name' must be lowercase only: <TEST>") do
125
+ driver(conf)
126
+ end
127
+ end
128
+
129
+ data("backslash" => "\\",
130
+ "slash" => "/",
131
+ "asterisk" => "*",
132
+ "question" => "?",
133
+ "doublequote" => "\"",
134
+ "lt" => "<",
135
+ "gt" => ">",
136
+ "bar" => "|",
137
+ "space" => " ",
138
+ "comma" => ",",
139
+ "sharp" => "#",
140
+ "colon" => ":")
141
+ def test_invalid_characters(data)
142
+ c, _ = data
143
+ conf = config_element(
144
+ 'ROOT', '', {
145
+ '@type' => ELASTIC_DATA_STREAM_TYPE,
146
+ 'data_stream_name' => "TEST#{c}"
147
+ })
148
+ label = Fluent::Plugin::ElasticsearchOutputDataStream::INVALID_CHARACTERS.join(',')
149
+ assert_raise Fluent::ConfigError.new("'data_stream_name' must not contain invalid characters #{label}: <TEST#{c}>") do
150
+ driver(conf)
151
+ end
152
+ end
153
+
154
+ data("hyphen" => "-",
155
+ "underscore" => "_",
156
+ "plus" => "+",
157
+ "period" => ".")
158
+ def test_invalid_start_characters(data)
159
+ c, _ = data
160
+ conf = config_element(
161
+ 'ROOT', '', {
162
+ '@type' => ELASTIC_DATA_STREAM_TYPE,
163
+ 'data_stream_name' => "#{c}TEST"
164
+ })
165
+ label = Fluent::Plugin::ElasticsearchOutputDataStream::INVALID_START_CHRACTERS.join(',')
166
+ assert_raise Fluent::ConfigError.new("'data_stream_name' must not start with #{label}: <#{c}TEST>") do
167
+ driver(conf)
168
+ end
169
+ end
170
+
171
+ data("current" => ".",
172
+ "parents" => "..")
173
+ def test_invalid_dots
174
+ c, _ = data
175
+ conf = config_element(
176
+ 'ROOT', '', {
177
+ '@type' => ELASTIC_DATA_STREAM_TYPE,
178
+ 'data_stream_name' => "#{c}"
179
+ })
180
+ assert_raise Fluent::ConfigError.new("'data_stream_name' must not be . or ..: <#{c}>") do
181
+ driver(conf)
182
+ end
183
+ end
184
+
185
+ def test_invalid_length
186
+ c = "a" * 256
187
+ conf = config_element(
188
+ 'ROOT', '', {
189
+ '@type' => ELASTIC_DATA_STREAM_TYPE,
190
+ 'data_stream_name' => "#{c}"
191
+ })
192
+ assert_raise Fluent::ConfigError.new("'data_stream_name' must not be longer than 255 bytes: <#{c}>") do
193
+ driver(conf)
194
+ end
195
+ end
196
+ end
197
+
198
+ def test_datastream_configure
199
+ omit REQUIRED_ELASTIC_MESSAGE unless data_stream_supported?
200
+
201
+ stub_default
202
+ conf = config_element(
203
+ 'ROOT', '', {
204
+ '@type' => ELASTIC_DATA_STREAM_TYPE,
205
+ 'data_stream_name' => 'foo'
206
+ })
207
+ assert_equal "foo", driver(conf).instance.data_stream_name
208
+ end
209
+
210
+ def test_nonexistent_data_stream
211
+ omit REQUIRED_ELASTIC_MESSAGE unless data_stream_supported?
212
+
213
+ stub_ilm_policy
214
+ stub_index_template
215
+ stub_nonexistent_data_stream?
216
+ stub_data_stream
217
+ conf = config_element(
218
+ 'ROOT', '', {
219
+ '@type' => ELASTIC_DATA_STREAM_TYPE,
220
+ 'data_stream_name' => 'foo'
221
+ })
222
+ assert_equal "foo", driver(conf).instance.data_stream_name
223
+ end
224
+
225
+ def test_placeholder
226
+ omit REQUIRED_ELASTIC_MESSAGE unless data_stream_supported?
227
+
228
+ name = "foo_test"
229
+ stub_default(name)
230
+ stub_bulk_feed(name)
231
+ conf = config_element(
232
+ 'ROOT', '', {
233
+ '@type' => ELASTIC_DATA_STREAM_TYPE,
234
+ 'data_stream_name' => 'foo_${tag}'
235
+ })
236
+ driver(conf).run(default_tag: 'test') do
237
+ driver.feed(sample_record)
238
+ end
239
+ assert_equal 1, @bulk_records
240
+ end
241
+
242
+ def test_time_placeholder
243
+ omit REQUIRED_ELASTIC_MESSAGE unless data_stream_supported?
244
+
245
+ time = Time.now
246
+ name = "foo_#{time.strftime("%Y%m%d")}"
247
+ stub_default(name)
248
+ stub_bulk_feed(name)
249
+ conf = config_element(
250
+ 'ROOT', '', {
251
+ '@type' => ELASTIC_DATA_STREAM_TYPE,
252
+ 'data_stream_name' => 'foo_%Y%m%d'
253
+ }, [config_element('buffer', 'time', {
254
+ 'timekey' => '1d'
255
+ }, [])]
256
+ )
257
+ driver(conf).run(default_tag: 'test') do
258
+ driver.feed(sample_record)
259
+ end
260
+ assert_equal 1, @bulk_records
261
+ end
262
+
263
+ def test_custom_record_placeholder
264
+ omit REQUIRED_ELASTIC_MESSAGE unless data_stream_supported?
265
+
266
+ keys = ["bar", "baz"]
267
+ keys.each do |key|
268
+ name = "foo_#{key}"
269
+ stub_default(name)
270
+ stub_bulk_feed(name)
271
+ end
272
+ conf = config_element(
273
+ 'ROOT', '', {
274
+ '@type' => ELASTIC_DATA_STREAM_TYPE,
275
+ 'data_stream_name' => 'foo_${key1}'
276
+ }, [config_element('buffer', 'tag,key1', {
277
+ 'timekey' => '1d'
278
+ }, [])]
279
+ )
280
+ driver(conf).run(default_tag: 'test') do
281
+ keys.each do |key|
282
+ record = sample_record.merge({"key1" => key})
283
+ driver.feed(record)
284
+ end
285
+ end
286
+ assert_equal keys.count, @bulk_records
287
+ end
288
+
289
+ def test_bulk_insert_feed
290
+ omit REQUIRED_ELASTIC_MESSAGE unless data_stream_supported?
291
+
292
+ stub_default
293
+ stub_bulk_feed
294
+ conf = config_element(
295
+ 'ROOT', '', {
296
+ '@type' => ELASTIC_DATA_STREAM_TYPE,
297
+ 'data_stream_name' => 'foo'
298
+ })
299
+ driver(conf).run(default_tag: 'test') do
300
+ driver.feed(sample_record)
301
+ end
302
+ assert_equal 1, @bulk_records
303
+ end
304
+
305
+ def test_template_retry_install_fails
306
+ omit REQUIRED_ELASTIC_MESSAGE unless data_stream_supported?
307
+
308
+ cwd = File.dirname(__FILE__)
309
+ template_file = File.join(cwd, 'test_index_template.json')
310
+
311
+ config = %{
312
+ host logs.google.com
313
+ port 778
314
+ scheme https
315
+ data_stream_name foo
316
+ user john
317
+ password doe
318
+ template_name logstash
319
+ template_file #{template_file}
320
+ max_retry_putting_template 3
321
+ }
322
+
323
+ connection_resets = 0
324
+ # check if template exists
325
+ stub_request(:get, "https://logs.google.com:778/_index_template/logstash")
326
+ .with(basic_auth: ['john', 'doe']) do |req|
327
+ connection_resets += 1
328
+ raise Faraday::ConnectionFailed, "Test message"
329
+ end
330
+
331
+ assert_raise(Fluent::Plugin::ElasticsearchError::RetryableOperationExhaustedFailure) do
332
+ driver(config)
333
+ end
334
+
335
+ assert_equal(4, connection_resets)
336
+ end
337
+ end