fluent-plugin-elasticsearch 4.3.3 → 5.0.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -3,7 +3,7 @@ $:.push File.expand_path('../lib', __FILE__)
3
3
 
4
4
  Gem::Specification.new do |s|
5
5
  s.name = 'fluent-plugin-elasticsearch'
6
- s.version = '4.3.3'
6
+ s.version = '5.0.0'
7
7
  s.authors = ['diogo', 'pitr', 'Hiroshi Hatake']
8
8
  s.email = ['pitr.vern@gmail.com', 'me@diogoterror.com', 'cosmo0920.wp@gmail.com']
9
9
  s.description = %q{Elasticsearch output plugin for Fluent event collector}
@@ -0,0 +1,215 @@
1
+ require_relative 'out_elasticsearch'
2
+
3
+ module Fluent::Plugin
4
+ class ElasticsearchOutputDataStream < ElasticsearchOutput
5
+
6
+ Fluent::Plugin.register_output('elasticsearch_data_stream', self)
7
+
8
+ helpers :event_emitter
9
+
10
+ config_param :data_stream_name, :string
11
+ # Elasticsearch 7.9 or later always support new style of index template.
12
+ config_set_default :use_legacy_template, false
13
+
14
+ INVALID_START_CHRACTERS = ["-", "_", "+", "."]
15
+ INVALID_CHARACTERS = ["\\", "/", "*", "?", "\"", "<", ">", "|", " ", ",", "#", ":"]
16
+
17
+ def configure(conf)
18
+ super
19
+
20
+ begin
21
+ require 'elasticsearch/xpack'
22
+ rescue LoadError
23
+ raise Fluent::ConfigError, "'elasticsearch/xpack'' is required for <@elasticsearch_data_stream>."
24
+ end
25
+
26
+ # ref. https://www.elastic.co/guide/en/elasticsearch/reference/master/indices-create-data-stream.html
27
+ unless placeholder?(:data_stream_name_placeholder, @data_stream_name)
28
+ validate_data_stream_name
29
+ else
30
+ @use_placeholder = true
31
+ @data_stream_names = []
32
+ end
33
+
34
+ @client = client
35
+ unless @use_placeholder
36
+ begin
37
+ @data_stream_names = [@data_stream_name]
38
+ create_ilm_policy(@data_stream_name)
39
+ create_index_template(@data_stream_name)
40
+ create_data_stream(@data_stream_name)
41
+ rescue => e
42
+ raise Fluent::ConfigError, "Failed to create data stream: <#{@data_stream_name}> #{e.message}"
43
+ end
44
+ end
45
+ end
46
+
47
+ def validate_data_stream_name
48
+ unless valid_data_stream_name?
49
+ unless start_with_valid_characters?
50
+ if not_dots?
51
+ raise Fluent::ConfigError, "'data_stream_name' must not start with #{INVALID_START_CHRACTERS.join(",")}: <#{@data_stream_name}>"
52
+ else
53
+ raise Fluent::ConfigError, "'data_stream_name' must not be . or ..: <#{@data_stream_name}>"
54
+ end
55
+ end
56
+ unless valid_characters?
57
+ raise Fluent::ConfigError, "'data_stream_name' must not contain invalid characters #{INVALID_CHARACTERS.join(",")}: <#{@data_stream_name}>"
58
+ end
59
+ unless lowercase_only?
60
+ raise Fluent::ConfigError, "'data_stream_name' must be lowercase only: <#{@data_stream_name}>"
61
+ end
62
+ if @data_stream_name.bytes.size > 255
63
+ raise Fluent::ConfigError, "'data_stream_name' must not be longer than 255 bytes: <#{@data_stream_name}>"
64
+ end
65
+ end
66
+ end
67
+
68
+ def create_ilm_policy(name)
69
+ params = {
70
+ policy_id: "#{name}_policy",
71
+ body: File.read(File.join(File.dirname(__FILE__), "default-ilm-policy.json"))
72
+ }
73
+ retry_operate(@max_retry_putting_template,
74
+ @fail_on_putting_template_retry_exceed,
75
+ @catch_transport_exception_on_retry) do
76
+ @client.xpack.ilm.put_policy(params)
77
+ end
78
+ end
79
+
80
+ def create_index_template(name)
81
+ body = {
82
+ "index_patterns" => ["#{name}*"],
83
+ "data_stream" => {},
84
+ "template" => {
85
+ "settings" => {
86
+ "index.lifecycle.name" => "#{name}_policy"
87
+ }
88
+ }
89
+ }
90
+ params = {
91
+ name: name,
92
+ body: body
93
+ }
94
+ retry_operate(@max_retry_putting_template,
95
+ @fail_on_putting_template_retry_exceed,
96
+ @catch_transport_exception_on_retry) do
97
+ @client.indices.put_index_template(params)
98
+ end
99
+ end
100
+
101
+ def data_stream_exist?(name)
102
+ params = {
103
+ "name": name
104
+ }
105
+ begin
106
+ response = @client.indices.get_data_stream(params)
107
+ return (not response.is_a?(Elasticsearch::Transport::Transport::Errors::NotFound))
108
+ rescue Elasticsearch::Transport::Transport::Errors::NotFound => e
109
+ log.info "Specified data stream does not exist. Will be created: <#{e}>"
110
+ return false
111
+ end
112
+ end
113
+
114
+ def create_data_stream(name)
115
+ return if data_stream_exist?(name)
116
+ params = {
117
+ "name": name
118
+ }
119
+ retry_operate(@max_retry_putting_template,
120
+ @fail_on_putting_template_retry_exceed,
121
+ @catch_transport_exception_on_retry) do
122
+ @client.indices.create_data_stream(params)
123
+ end
124
+ end
125
+
126
+ def valid_data_stream_name?
127
+ lowercase_only? and
128
+ valid_characters? and
129
+ start_with_valid_characters? and
130
+ not_dots? and
131
+ @data_stream_name.bytes.size <= 255
132
+ end
133
+
134
+ def lowercase_only?
135
+ @data_stream_name.downcase == @data_stream_name
136
+ end
137
+
138
+ def valid_characters?
139
+ not (INVALID_CHARACTERS.each.any? do |v| @data_stream_name.include?(v) end)
140
+ end
141
+
142
+ def start_with_valid_characters?
143
+ not (INVALID_START_CHRACTERS.each.any? do |v| @data_stream_name.start_with?(v) end)
144
+ end
145
+
146
+ def not_dots?
147
+ not (@data_stream_name == "." or @data_stream_name == "..")
148
+ end
149
+
150
+ def client_library_version
151
+ Elasticsearch::VERSION
152
+ end
153
+
154
+ def multi_workers_ready?
155
+ true
156
+ end
157
+
158
+ def write(chunk)
159
+ data_stream_name = @data_stream_name
160
+ if @use_placeholder
161
+ data_stream_name = extract_placeholders(@data_stream_name, chunk)
162
+ unless @data_stream_names.include?(data_stream_name)
163
+ begin
164
+ create_ilm_policy(data_stream_name)
165
+ create_index_template(data_stream_name)
166
+ create_data_stream(data_stream_name)
167
+ @data_stream_names << data_stream_name
168
+ rescue => e
169
+ raise Fluent::ConfigError, "Failed to create data stream: <#{data_stream_name}> #{e.message}"
170
+ end
171
+ end
172
+ end
173
+
174
+ bulk_message = ""
175
+ headers = {
176
+ CREATE_OP => {}
177
+ }
178
+ tag = chunk.metadata.tag
179
+ chunk.msgpack_each do |time, record|
180
+ next unless record.is_a? Hash
181
+
182
+ begin
183
+ record.merge!({"@timestamp" => Time.at(time).iso8601(@time_precision)})
184
+ bulk_message = append_record_to_messages(CREATE_OP, {}, headers, record, bulk_message)
185
+ rescue => e
186
+ router.emit_error_event(tag, time, record, e)
187
+ end
188
+ end
189
+
190
+ params = {
191
+ index: data_stream_name,
192
+ body: bulk_message
193
+ }
194
+ begin
195
+ response = @client.bulk(params)
196
+ if response['errors']
197
+ log.error "Could not bulk insert to Data Stream: #{data_stream_name} #{response}"
198
+ end
199
+ rescue => e
200
+ log.error "Could not bulk insert to Data Stream: #{data_stream_name} #{e.message}"
201
+ end
202
+ end
203
+
204
+ def append_record_to_messages(op, meta, header, record, msgs)
205
+ header[CREATE_OP] = meta
206
+ msgs << @dump_proc.call(header) << BODY_DELIMITER
207
+ msgs << @dump_proc.call(record) << BODY_DELIMITER
208
+ msgs
209
+ end
210
+
211
+ def retry_stream_retryable?
212
+ @buffer.storable?
213
+ end
214
+ end
215
+ end
@@ -0,0 +1,337 @@
1
+ require_relative '../helper'
2
+ require 'date'
3
+ require 'fluent/test/helpers'
4
+ require 'fluent/test/driver/output'
5
+ require 'flexmock/test_unit'
6
+ require 'fluent/plugin/out_elasticsearch_data_stream'
7
+
8
+ class ElasticsearchOutputDataStreamTest < Test::Unit::TestCase
9
+ include FlexMock::TestCase
10
+ include Fluent::Test::Helpers
11
+
12
+ attr_accessor :bulk_records
13
+
14
+ REQUIRED_ELASTIC_MESSAGE = "Elasticsearch 7.9.0 or later is needed."
15
+ ELASTIC_DATA_STREAM_TYPE = "elasticsearch_data_stream"
16
+
17
+ def setup
18
+ Fluent::Test.setup
19
+ @driver = nil
20
+ log = Fluent::Engine.log
21
+ log.out.logs.slice!(0, log.out.logs.length)
22
+ @bulk_records = 0
23
+ end
24
+
25
+ def driver(conf='', es_version=5, client_version="\"5.0\"")
26
+ # For request stub to detect compatibility.
27
+ @es_version ||= es_version
28
+ @client_version ||= client_version
29
+ Fluent::Plugin::ElasticsearchOutputDataStream.module_eval(<<-CODE)
30
+ def detect_es_major_version
31
+ #{@es_version}
32
+ end
33
+ CODE
34
+ @driver ||= Fluent::Test::Driver::Output.new(Fluent::Plugin::ElasticsearchOutputDataStream) {
35
+ # v0.12's test driver assume format definition. This simulates ObjectBufferedOutput format
36
+ if !defined?(Fluent::Plugin::Output)
37
+ def format(tag, time, record)
38
+ [time, record].to_msgpack
39
+ end
40
+ end
41
+ }.configure(conf)
42
+ end
43
+
44
+ def sample_data_stream
45
+ {
46
+ 'data_streams': [
47
+ {
48
+ 'name' => 'my-data-stream',
49
+ 'timestamp_field' => {
50
+ 'name' => '@timestamp'
51
+ }
52
+ }
53
+ ]
54
+ }
55
+ end
56
+
57
+ def sample_record
58
+ {'@timestamp' => Time.now.iso8601, 'message' => 'Sample record'}
59
+ end
60
+
61
+ RESPONSE_ACKNOWLEDGED = {"acknowledged": true}
62
+ DUPLICATED_DATA_STREAM_EXCEPTION = {"error": {}, "status": 400}
63
+ NONEXISTENT_DATA_STREAM_EXCEPTION = {"error": {}, "status": 404}
64
+
65
+ def stub_ilm_policy(name="foo")
66
+ stub_request(:put, "http://localhost:9200/_ilm/policy/#{name}_policy").to_return(:status => [200, RESPONSE_ACKNOWLEDGED])
67
+ end
68
+
69
+ def stub_index_template(name="foo")
70
+ stub_request(:put, "http://localhost:9200/_index_template/#{name}").to_return(:status => [200, RESPONSE_ACKNOWLEDGED])
71
+ end
72
+
73
+ def stub_data_stream(name="foo")
74
+ stub_request(:put, "http://localhost:9200/_data_stream/#{name}").to_return(:status => [200, RESPONSE_ACKNOWLEDGED])
75
+ end
76
+
77
+ def stub_existent_data_stream?(name="foo")
78
+ stub_request(:get, "http://localhost:9200/_data_stream/#{name}").to_return(:status => [200, RESPONSE_ACKNOWLEDGED])
79
+ end
80
+
81
+ def stub_nonexistent_data_stream?(name="foo")
82
+ stub_request(:get, "http://localhost:9200/_data_stream/#{name}").to_return(:status => [200, Elasticsearch::Transport::Transport::Errors::NotFound])
83
+ end
84
+
85
+ def stub_bulk_feed(name="foo")
86
+ stub_request(:post, "http://localhost:9200/#{name}/_bulk").with do |req|
87
+ # bulk data must be pair of OP and records
88
+ # {"create": {}}\n
89
+ # {"@timestamp": ...}
90
+ @bulk_records += req.body.split("\n").size / 2
91
+ end
92
+ end
93
+
94
+ def stub_default(name="foo")
95
+ stub_ilm_policy(name)
96
+ stub_index_template(name)
97
+ stub_existent_data_stream?(name)
98
+ stub_data_stream(name)
99
+ end
100
+
101
+ def data_stream_supported?
102
+ Gem::Version.create(::Elasticsearch::Transport::VERSION) >= Gem::Version.create("7.9.0")
103
+ end
104
+
105
+ # ref. https://www.elastic.co/guide/en/elasticsearch/reference/master/indices-create-data-stream.html
106
+ class DataStreamNameTest < self
107
+
108
+ def test_missing_data_stream_name
109
+ conf = config_element(
110
+ 'ROOT', '', {
111
+ '@type' => 'elasticsearch_datastream'
112
+ })
113
+ assert_raise Fluent::ConfigError.new("'data_stream_name' parameter is required") do
114
+ driver(conf).run
115
+ end
116
+ end
117
+
118
+ def test_invalid_uppercase
119
+ conf = config_element(
120
+ 'ROOT', '', {
121
+ '@type' => 'elasticsearch_datastream',
122
+ 'data_stream_name' => 'TEST'
123
+ })
124
+ assert_raise Fluent::ConfigError.new("'data_stream_name' must be lowercase only: <TEST>") do
125
+ driver(conf)
126
+ end
127
+ end
128
+
129
+ data("backslash" => "\\",
130
+ "slash" => "/",
131
+ "asterisk" => "*",
132
+ "question" => "?",
133
+ "doublequote" => "\"",
134
+ "lt" => "<",
135
+ "gt" => ">",
136
+ "bar" => "|",
137
+ "space" => " ",
138
+ "comma" => ",",
139
+ "sharp" => "#",
140
+ "colon" => ":")
141
+ def test_invalid_characters(data)
142
+ c, _ = data
143
+ conf = config_element(
144
+ 'ROOT', '', {
145
+ '@type' => ELASTIC_DATA_STREAM_TYPE,
146
+ 'data_stream_name' => "TEST#{c}"
147
+ })
148
+ label = Fluent::Plugin::ElasticsearchOutputDataStream::INVALID_CHARACTERS.join(',')
149
+ assert_raise Fluent::ConfigError.new("'data_stream_name' must not contain invalid characters #{label}: <TEST#{c}>") do
150
+ driver(conf)
151
+ end
152
+ end
153
+
154
+ data("hyphen" => "-",
155
+ "underscore" => "_",
156
+ "plus" => "+",
157
+ "period" => ".")
158
+ def test_invalid_start_characters(data)
159
+ c, _ = data
160
+ conf = config_element(
161
+ 'ROOT', '', {
162
+ '@type' => ELASTIC_DATA_STREAM_TYPE,
163
+ 'data_stream_name' => "#{c}TEST"
164
+ })
165
+ label = Fluent::Plugin::ElasticsearchOutputDataStream::INVALID_START_CHRACTERS.join(',')
166
+ assert_raise Fluent::ConfigError.new("'data_stream_name' must not start with #{label}: <#{c}TEST>") do
167
+ driver(conf)
168
+ end
169
+ end
170
+
171
+ data("current" => ".",
172
+ "parents" => "..")
173
+ def test_invalid_dots
174
+ c, _ = data
175
+ conf = config_element(
176
+ 'ROOT', '', {
177
+ '@type' => ELASTIC_DATA_STREAM_TYPE,
178
+ 'data_stream_name' => "#{c}"
179
+ })
180
+ assert_raise Fluent::ConfigError.new("'data_stream_name' must not be . or ..: <#{c}>") do
181
+ driver(conf)
182
+ end
183
+ end
184
+
185
+ def test_invalid_length
186
+ c = "a" * 256
187
+ conf = config_element(
188
+ 'ROOT', '', {
189
+ '@type' => ELASTIC_DATA_STREAM_TYPE,
190
+ 'data_stream_name' => "#{c}"
191
+ })
192
+ assert_raise Fluent::ConfigError.new("'data_stream_name' must not be longer than 255 bytes: <#{c}>") do
193
+ driver(conf)
194
+ end
195
+ end
196
+ end
197
+
198
+ def test_datastream_configure
199
+ omit REQUIRED_ELASTIC_MESSAGE unless data_stream_supported?
200
+
201
+ stub_default
202
+ conf = config_element(
203
+ 'ROOT', '', {
204
+ '@type' => ELASTIC_DATA_STREAM_TYPE,
205
+ 'data_stream_name' => 'foo'
206
+ })
207
+ assert_equal "foo", driver(conf).instance.data_stream_name
208
+ end
209
+
210
+ def test_nonexistent_data_stream
211
+ omit REQUIRED_ELASTIC_MESSAGE unless data_stream_supported?
212
+
213
+ stub_ilm_policy
214
+ stub_index_template
215
+ stub_nonexistent_data_stream?
216
+ stub_data_stream
217
+ conf = config_element(
218
+ 'ROOT', '', {
219
+ '@type' => ELASTIC_DATA_STREAM_TYPE,
220
+ 'data_stream_name' => 'foo'
221
+ })
222
+ assert_equal "foo", driver(conf).instance.data_stream_name
223
+ end
224
+
225
+ def test_placeholder
226
+ omit REQUIRED_ELASTIC_MESSAGE unless data_stream_supported?
227
+
228
+ name = "foo_test"
229
+ stub_default(name)
230
+ stub_bulk_feed(name)
231
+ conf = config_element(
232
+ 'ROOT', '', {
233
+ '@type' => ELASTIC_DATA_STREAM_TYPE,
234
+ 'data_stream_name' => 'foo_${tag}'
235
+ })
236
+ driver(conf).run(default_tag: 'test') do
237
+ driver.feed(sample_record)
238
+ end
239
+ assert_equal 1, @bulk_records
240
+ end
241
+
242
+ def test_time_placeholder
243
+ omit REQUIRED_ELASTIC_MESSAGE unless data_stream_supported?
244
+
245
+ time = Time.now
246
+ name = "foo_#{time.strftime("%Y%m%d")}"
247
+ stub_default(name)
248
+ stub_bulk_feed(name)
249
+ conf = config_element(
250
+ 'ROOT', '', {
251
+ '@type' => ELASTIC_DATA_STREAM_TYPE,
252
+ 'data_stream_name' => 'foo_%Y%m%d'
253
+ }, [config_element('buffer', 'time', {
254
+ 'timekey' => '1d'
255
+ }, [])]
256
+ )
257
+ driver(conf).run(default_tag: 'test') do
258
+ driver.feed(sample_record)
259
+ end
260
+ assert_equal 1, @bulk_records
261
+ end
262
+
263
+ def test_custom_record_placeholder
264
+ omit REQUIRED_ELASTIC_MESSAGE unless data_stream_supported?
265
+
266
+ keys = ["bar", "baz"]
267
+ keys.each do |key|
268
+ name = "foo_#{key}"
269
+ stub_default(name)
270
+ stub_bulk_feed(name)
271
+ end
272
+ conf = config_element(
273
+ 'ROOT', '', {
274
+ '@type' => ELASTIC_DATA_STREAM_TYPE,
275
+ 'data_stream_name' => 'foo_${key1}'
276
+ }, [config_element('buffer', 'tag,key1', {
277
+ 'timekey' => '1d'
278
+ }, [])]
279
+ )
280
+ driver(conf).run(default_tag: 'test') do
281
+ keys.each do |key|
282
+ record = sample_record.merge({"key1" => key})
283
+ driver.feed(record)
284
+ end
285
+ end
286
+ assert_equal keys.count, @bulk_records
287
+ end
288
+
289
+ def test_bulk_insert_feed
290
+ omit REQUIRED_ELASTIC_MESSAGE unless data_stream_supported?
291
+
292
+ stub_default
293
+ stub_bulk_feed
294
+ conf = config_element(
295
+ 'ROOT', '', {
296
+ '@type' => ELASTIC_DATA_STREAM_TYPE,
297
+ 'data_stream_name' => 'foo'
298
+ })
299
+ driver(conf).run(default_tag: 'test') do
300
+ driver.feed(sample_record)
301
+ end
302
+ assert_equal 1, @bulk_records
303
+ end
304
+
305
+ def test_template_retry_install_fails
306
+ omit REQUIRED_ELASTIC_MESSAGE unless data_stream_supported?
307
+
308
+ cwd = File.dirname(__FILE__)
309
+ template_file = File.join(cwd, 'test_index_template.json')
310
+
311
+ config = %{
312
+ host logs.google.com
313
+ port 778
314
+ scheme https
315
+ data_stream_name foo
316
+ user john
317
+ password doe
318
+ template_name logstash
319
+ template_file #{template_file}
320
+ max_retry_putting_template 3
321
+ }
322
+
323
+ connection_resets = 0
324
+ # check if template exists
325
+ stub_request(:get, "https://logs.google.com:778/_index_template/logstash")
326
+ .with(basic_auth: ['john', 'doe']) do |req|
327
+ connection_resets += 1
328
+ raise Faraday::ConnectionFailed, "Test message"
329
+ end
330
+
331
+ assert_raise(Fluent::Plugin::ElasticsearchError::RetryableOperationExhaustedFailure) do
332
+ driver(config)
333
+ end
334
+
335
+ assert_equal(4, connection_resets)
336
+ end
337
+ end