fluent-plugin-elasticsearch 1.16.2 → 1.17.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/History.md +3 -0
- data/README.md +10 -0
- data/fluent-plugin-elasticsearch.gemspec +1 -1
- data/lib/fluent/plugin/elasticsearch_error_handler.rb +2 -2
- data/lib/fluent/plugin/out_elasticsearch.rb +25 -2
- data/test/plugin/test_elasticsearch_error_handler.rb +2 -1
- data/test/plugin/test_out_elasticsearch.rb +100 -4
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 18bf8262f87eae27a0ff3e7d5efc101984ae112e0b0b1f9fcc8c286b60f760ea
|
4
|
+
data.tar.gz: d7964fb843241c67fa1920de545bc2c0759f4d6b7f45fbd3c2b3f8f66d7a7797
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ffbd3297314e2f3a9ebecaa0009ac17f13d17fc97b057ac94e462b706e667f86cc21f3290e14f744ae7e71c905583cddd54303923a88e66510bbfa6ef70f3245
|
7
|
+
data.tar.gz: e814f94fb1110a3598136dc17ae846d80ff7e85c308a3f554d93c197dec94db4033a708502cce8c8c3257c34726ed509098f89f0f3fac96d4bb8aa2ff4cde524
|
data/History.md
CHANGED
data/README.md
CHANGED
@@ -15,6 +15,7 @@ Note: For Amazon Elasticsearch Service please consider using [fluent-plugin-aws-
|
|
15
15
|
* [Usage](#usage)
|
16
16
|
+ [Index templates](#index-templates)
|
17
17
|
* [Configuration](#configuration)
|
18
|
+
+ [emit_error_for_missing_id](#emit_error_for_missing_id)
|
18
19
|
+ [hosts](#hosts)
|
19
20
|
+ [user, password, path, scheme, ssl_verify](#user-password-path-scheme-ssl_verify)
|
20
21
|
+ [logstash_format](#logstash_format)
|
@@ -87,6 +88,15 @@ This plugin creates Elasticsearch indices by merely writing to them. Consider us
|
|
87
88
|
|
88
89
|
## Configuration
|
89
90
|
|
91
|
+
### emit_error_for_missing_id
|
92
|
+
|
93
|
+
```
|
94
|
+
emit_error_for_missing_id true
|
95
|
+
```
|
96
|
+
When `write_operation` is configured to anything other then `index`, setting this value to `true` will
|
97
|
+
cause the plugin to `emit_error_event` of any records which do not include an `_id` field. The default (`false`)
|
98
|
+
behavior is to silently drop the records.
|
99
|
+
|
90
100
|
### hosts
|
91
101
|
|
92
102
|
```
|
@@ -3,7 +3,7 @@ $:.push File.expand_path('../lib', __FILE__)
|
|
3
3
|
|
4
4
|
Gem::Specification.new do |s|
|
5
5
|
s.name = 'fluent-plugin-elasticsearch'
|
6
|
-
s.version = '1.
|
6
|
+
s.version = '1.17.0'
|
7
7
|
s.authors = ['diogo', 'pitr']
|
8
8
|
s.email = ['pitr.vern@gmail.com', 'me@diogoterror.com']
|
9
9
|
s.description = %q{Elasticsearch output plugin for Fluent event collector}
|
@@ -14,7 +14,7 @@ class Fluent::ElasticsearchErrorHandler
|
|
14
14
|
|
15
15
|
def handle_error(response, tag, chunk, bulk_message_count)
|
16
16
|
items = response['items']
|
17
|
-
if items.nil? || !items.is_a?(Array)
|
17
|
+
if items.nil? || !items.is_a?(Array)
|
18
18
|
raise ElasticsearchVersionMismatch, "The response format was unrecognized: #{response}"
|
19
19
|
end
|
20
20
|
if bulk_message_count != items.length
|
@@ -30,7 +30,7 @@ class Fluent::ElasticsearchErrorHandler
|
|
30
30
|
begin
|
31
31
|
# we need a deep copy for process_message to alter
|
32
32
|
processrecord = Marshal.load(Marshal.dump(rawrecord))
|
33
|
-
@plugin.process_message(tag, meta, header, time, processrecord, bulk_message)
|
33
|
+
next unless @plugin.process_message(tag, meta, header, time, processrecord, bulk_message)
|
34
34
|
rescue => e
|
35
35
|
stats[:bad_chunk_record] += 1
|
36
36
|
next
|
@@ -18,6 +18,10 @@ require_relative 'elasticsearch_index_template'
|
|
18
18
|
class Fluent::ElasticsearchOutput < Fluent::ObjectBufferedOutput
|
19
19
|
class ConnectionFailure < StandardError; end
|
20
20
|
|
21
|
+
# MissingIdFieldError is raised for records that do not
|
22
|
+
# include the field for the unique record identifier
|
23
|
+
class MissingIdFieldError < StandardError; end
|
24
|
+
|
21
25
|
# RetryStreamError privides a stream to be
|
22
26
|
# put back in the pipeline for cases where a bulk request
|
23
27
|
# failed (e.g some records succeed while others failed)
|
@@ -81,6 +85,7 @@ class Fluent::ElasticsearchOutput < Fluent::ObjectBufferedOutput
|
|
81
85
|
config_param :reconnect_on_error, :bool, :default => false
|
82
86
|
config_param :pipeline, :string, :default => nil
|
83
87
|
config_param :with_transporter_log, :bool, :default => false
|
88
|
+
config_param :emit_error_for_missing_id, :bool, :default => false
|
84
89
|
|
85
90
|
include Fluent::ElasticsearchIndexTemplate
|
86
91
|
include Fluent::ElasticsearchConstants
|
@@ -267,6 +272,13 @@ class Fluent::ElasticsearchOutput < Fluent::ObjectBufferedOutput
|
|
267
272
|
end.join(', ')
|
268
273
|
end
|
269
274
|
|
275
|
+
# append_record_to_messages adds a record to the bulk message
|
276
|
+
# payload to be submitted to Elasticsearch. Records that do
|
277
|
+
# not include '_id' field are skipped when 'write_operation'
|
278
|
+
# is configured for 'create' or 'update'
|
279
|
+
#
|
280
|
+
# returns 'true' if record was appended to the bulk message
|
281
|
+
# and 'false' otherwise
|
270
282
|
def append_record_to_messages(op, meta, header, record, msgs)
|
271
283
|
case op
|
272
284
|
when UPDATE_OP, UPSERT_OP
|
@@ -274,18 +286,22 @@ class Fluent::ElasticsearchOutput < Fluent::ObjectBufferedOutput
|
|
274
286
|
header[UPDATE_OP] = meta
|
275
287
|
msgs << @dump_proc.call(header) << BODY_DELIMITER
|
276
288
|
msgs << @dump_proc.call(update_body(record, op)) << BODY_DELIMITER
|
289
|
+
return true
|
277
290
|
end
|
278
291
|
when CREATE_OP
|
279
292
|
if meta.has_key?(ID_FIELD)
|
280
293
|
header[CREATE_OP] = meta
|
281
294
|
msgs << @dump_proc.call(header) << BODY_DELIMITER
|
282
295
|
msgs << @dump_proc.call(record) << BODY_DELIMITER
|
296
|
+
return true
|
283
297
|
end
|
284
298
|
when INDEX_OP
|
285
299
|
header[INDEX_OP] = meta
|
286
300
|
msgs << @dump_proc.call(header) << BODY_DELIMITER
|
287
301
|
msgs << @dump_proc.call(record) << BODY_DELIMITER
|
302
|
+
return true
|
288
303
|
end
|
304
|
+
return false
|
289
305
|
end
|
290
306
|
|
291
307
|
def update_body(record, op)
|
@@ -333,8 +349,15 @@ class Fluent::ElasticsearchOutput < Fluent::ObjectBufferedOutput
|
|
333
349
|
chunk.msgpack_each do |time, record|
|
334
350
|
next unless record.is_a? Hash
|
335
351
|
begin
|
336
|
-
process_message(tag, meta, header, time, record, bulk_message)
|
337
|
-
|
352
|
+
if process_message(tag, meta, header, time, record, bulk_message)
|
353
|
+
bulk_message_count += 1
|
354
|
+
else
|
355
|
+
if @emit_error_for_missing_id
|
356
|
+
raise MissingIdFieldError, "Missing '_id' field. Write operation is #{@write_operation}"
|
357
|
+
else
|
358
|
+
log.on_debug { log.debug("Dropping record because its missing an '_id' field and write_operation is #{@write_operation}: #{record}") }
|
359
|
+
end
|
360
|
+
end
|
338
361
|
rescue=>e
|
339
362
|
router.emit_error_event(tag, time, record, e)
|
340
363
|
end
|
@@ -26,6 +26,7 @@ class TestElasticsearchErrorHandler < Test::Unit::TestCase
|
|
26
26
|
if record.has_key?('raise') && record['raise']
|
27
27
|
raise Exception('process_message')
|
28
28
|
end
|
29
|
+
return true
|
29
30
|
end
|
30
31
|
end
|
31
32
|
|
@@ -58,7 +59,7 @@ class TestElasticsearchErrorHandler < Test::Unit::TestCase
|
|
58
59
|
end
|
59
60
|
|
60
61
|
def test_dlq_400_responses
|
61
|
-
records = [{time: 123, record: {"foo" => "bar"}}]
|
62
|
+
records = [{time: 123, record: {"foo" => "bar", '_id' => 'abc'}}]
|
62
63
|
response = parse_response(%({
|
63
64
|
"took" : 0,
|
64
65
|
"errors" : true,
|
@@ -27,8 +27,8 @@ class ElasticsearchOutput < Test::Unit::TestCase
|
|
27
27
|
}.configure(conf)
|
28
28
|
end
|
29
29
|
|
30
|
-
def sample_record
|
31
|
-
|
30
|
+
def sample_record(content={})
|
31
|
+
{'age' => 26, 'request_id' => '42', 'parent_id' => 'parent', 'routing_id' => 'routing'}.merge(content)
|
32
32
|
end
|
33
33
|
|
34
34
|
def stub_elastic_ping(url="http://localhost:9200")
|
@@ -158,9 +158,9 @@ class ElasticsearchOutput < Test::Unit::TestCase
|
|
158
158
|
stub_request(:post, url).to_return(lambda { |req| bodystr = make_response_body(req, 0, 500, error); body = JSON.parse(bodystr); body['items'][0]['unknown'] = body['items'][0].delete('create'); { :status => 200, :body => body.to_json, :headers => { 'Content-Type' => 'json' } } })
|
159
159
|
end
|
160
160
|
|
161
|
-
def assert_logs_include(logs, msg)
|
161
|
+
def assert_logs_include(logs, msg, exp_matches=1)
|
162
162
|
matches = logs.grep /#{msg}/
|
163
|
-
assert_equal(
|
163
|
+
assert_equal(exp_matches, matches.length, "Logs do not contain '#{msg}' '#{logs}'")
|
164
164
|
end
|
165
165
|
|
166
166
|
def test_configure
|
@@ -1381,6 +1381,102 @@ class ElasticsearchOutput < Test::Unit::TestCase
|
|
1381
1381
|
driver.run
|
1382
1382
|
end
|
1383
1383
|
|
1384
|
+
def test_create_should_write_records_with_ids_and_skip_those_without
|
1385
|
+
driver.configure("write_operation create\nid_key my_id\nlog_level debug")
|
1386
|
+
log = driver.instance.router.emit_error_handler.log
|
1387
|
+
stub_elastic_ping
|
1388
|
+
stub_request(:post, 'http://localhost:9200/_bulk')
|
1389
|
+
.to_return(lambda do |req|
|
1390
|
+
{ :status => 200,
|
1391
|
+
:headers => { 'Content-Type' => 'json' },
|
1392
|
+
:body => %({
|
1393
|
+
"took" : 1,
|
1394
|
+
"errors" : true,
|
1395
|
+
"items" : [
|
1396
|
+
{
|
1397
|
+
"create" : {
|
1398
|
+
"_index" : "foo",
|
1399
|
+
"_type" : "bar",
|
1400
|
+
"_id" : "abc"
|
1401
|
+
}
|
1402
|
+
},
|
1403
|
+
{
|
1404
|
+
"create" : {
|
1405
|
+
"_index" : "foo",
|
1406
|
+
"_type" : "bar",
|
1407
|
+
"_id" : "xyz",
|
1408
|
+
"status" : 500,
|
1409
|
+
"error" : {
|
1410
|
+
"type" : "some unrecognized type",
|
1411
|
+
"reason":"some error to cause version mismatch"
|
1412
|
+
}
|
1413
|
+
}
|
1414
|
+
}
|
1415
|
+
]
|
1416
|
+
})
|
1417
|
+
}
|
1418
|
+
end)
|
1419
|
+
sample_record1 = sample_record('my_id' => 'abc')
|
1420
|
+
sample_record4 = sample_record('my_id' => 'xyz')
|
1421
|
+
driver.emit(sample_record1, 1)
|
1422
|
+
driver.emit(sample_record, 2)
|
1423
|
+
driver.emit(sample_record, 3)
|
1424
|
+
driver.emit(sample_record4, 4)
|
1425
|
+
|
1426
|
+
# one record succeeded while the other should be 'retried'
|
1427
|
+
driver.expect_emit('test', 4, sample_record4)
|
1428
|
+
driver.run
|
1429
|
+
assert_logs_include(log.out.logs, /(Dropping record)/, 2)
|
1430
|
+
end
|
1431
|
+
|
1432
|
+
def test_create_should_write_records_with_ids_and_emit_those_without
|
1433
|
+
driver.configure("write_operation create\nid_key my_id\nemit_error_for_missing_id true")
|
1434
|
+
log = driver.instance.router.emit_error_handler.log
|
1435
|
+
stub_elastic_ping
|
1436
|
+
stub_request(:post, 'http://localhost:9200/_bulk')
|
1437
|
+
.to_return(lambda do |req|
|
1438
|
+
{ :status => 200,
|
1439
|
+
:headers => { 'Content-Type' => 'json' },
|
1440
|
+
:body => %({
|
1441
|
+
"took" : 1,
|
1442
|
+
"errors" : true,
|
1443
|
+
"items" : [
|
1444
|
+
{
|
1445
|
+
"create" : {
|
1446
|
+
"_index" : "foo",
|
1447
|
+
"_type" : "bar",
|
1448
|
+
"_id" : "abc"
|
1449
|
+
}
|
1450
|
+
},
|
1451
|
+
{
|
1452
|
+
"create" : {
|
1453
|
+
"_index" : "foo",
|
1454
|
+
"_type" : "bar",
|
1455
|
+
"_id" : "xyz",
|
1456
|
+
"status" : 500,
|
1457
|
+
"error" : {
|
1458
|
+
"type" : "some unrecognized type",
|
1459
|
+
"reason":"some error to cause version mismatch"
|
1460
|
+
}
|
1461
|
+
}
|
1462
|
+
}
|
1463
|
+
]
|
1464
|
+
})
|
1465
|
+
}
|
1466
|
+
end)
|
1467
|
+
sample_record1 = sample_record('my_id' => 'abc')
|
1468
|
+
sample_record4 = sample_record('my_id' => 'xyz')
|
1469
|
+
driver.emit(sample_record1, 1)
|
1470
|
+
driver.emit(sample_record, 2)
|
1471
|
+
driver.emit(sample_record, 3)
|
1472
|
+
driver.emit(sample_record4, 4)
|
1473
|
+
|
1474
|
+
# one record succeeded while the other should be 'retried'
|
1475
|
+
driver.expect_emit('test', 4, sample_record4)
|
1476
|
+
driver.run
|
1477
|
+
assert_logs_include(log.out.logs, /(Missing '_id' field)/, 2)
|
1478
|
+
end
|
1479
|
+
|
1384
1480
|
def test_bulk_error
|
1385
1481
|
stub_elastic_ping
|
1386
1482
|
stub_request(:post, 'http://localhost:9200/_bulk')
|