fluent-plugin-elasticsearch 1.16.2 → 1.17.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: c3921e6bfaf0cf8f2560814962f2476acd7bfa408cf1444ab33e799e445205b3
4
- data.tar.gz: 545e05cb56f2d13b531d8b2bf5dc4b9c59f3f5aee36c20944e1334c4e17ff228
3
+ metadata.gz: 18bf8262f87eae27a0ff3e7d5efc101984ae112e0b0b1f9fcc8c286b60f760ea
4
+ data.tar.gz: d7964fb843241c67fa1920de545bc2c0759f4d6b7f45fbd3c2b3f8f66d7a7797
5
5
  SHA512:
6
- metadata.gz: 0ab5a3c10bf1fce8e71776ed0b78019fb3e3ab7261493f212c1775e685490aead3e8f12b319d73fe10c3a889ec2f52d8b01316ee0b2bfc8b5df70c489afb1442
7
- data.tar.gz: 6ee96c25dc743588dd071098e680613f12d7004198fb58893823fbd897227bcd6c94c38af9ab0d9c2880ff0ac32d17775e0b99a4b76d0fe6214a526622d5fb1f
6
+ metadata.gz: ffbd3297314e2f3a9ebecaa0009ac17f13d17fc97b057ac94e462b706e667f86cc21f3290e14f744ae7e71c905583cddd54303923a88e66510bbfa6ef70f3245
7
+ data.tar.gz: e814f94fb1110a3598136dc17ae846d80ff7e85c308a3f554d93c197dec94db4033a708502cce8c8c3257c34726ed509098f89f0f3fac96d4bb8aa2ff4cde524
data/History.md CHANGED
@@ -2,6 +2,9 @@
2
2
 
3
3
  ### [Unreleased]
4
4
 
5
+ ### 1.17.0
6
+ - Fix #434 bulk count (#437)
7
+
5
8
  ### 1.16.2
6
9
  - add trace logging to send_bulk (#435)
7
10
 
data/README.md CHANGED
@@ -15,6 +15,7 @@ Note: For Amazon Elasticsearch Service please consider using [fluent-plugin-aws-
15
15
  * [Usage](#usage)
16
16
  + [Index templates](#index-templates)
17
17
  * [Configuration](#configuration)
18
+ + [emit_error_for_missing_id](#emit_error_for_missing_id)
18
19
  + [hosts](#hosts)
19
20
  + [user, password, path, scheme, ssl_verify](#user-password-path-scheme-ssl_verify)
20
21
  + [logstash_format](#logstash_format)
@@ -87,6 +88,15 @@ This plugin creates Elasticsearch indices by merely writing to them. Consider us
87
88
 
88
89
  ## Configuration
89
90
 
91
+ ### emit_error_for_missing_id
92
+
93
+ ```
94
+ emit_error_for_missing_id true
95
+ ```
96
+ When `write_operation` is configured to anything other then `index`, setting this value to `true` will
97
+ cause the plugin to `emit_error_event` of any records which do not include an `_id` field. The default (`false`)
98
+ behavior is to silently drop the records.
99
+
90
100
  ### hosts
91
101
 
92
102
  ```
@@ -3,7 +3,7 @@ $:.push File.expand_path('../lib', __FILE__)
3
3
 
4
4
  Gem::Specification.new do |s|
5
5
  s.name = 'fluent-plugin-elasticsearch'
6
- s.version = '1.16.2'
6
+ s.version = '1.17.0'
7
7
  s.authors = ['diogo', 'pitr']
8
8
  s.email = ['pitr.vern@gmail.com', 'me@diogoterror.com']
9
9
  s.description = %q{Elasticsearch output plugin for Fluent event collector}
@@ -14,7 +14,7 @@ class Fluent::ElasticsearchErrorHandler
14
14
 
15
15
  def handle_error(response, tag, chunk, bulk_message_count)
16
16
  items = response['items']
17
- if items.nil? || !items.is_a?(Array)
17
+ if items.nil? || !items.is_a?(Array)
18
18
  raise ElasticsearchVersionMismatch, "The response format was unrecognized: #{response}"
19
19
  end
20
20
  if bulk_message_count != items.length
@@ -30,7 +30,7 @@ class Fluent::ElasticsearchErrorHandler
30
30
  begin
31
31
  # we need a deep copy for process_message to alter
32
32
  processrecord = Marshal.load(Marshal.dump(rawrecord))
33
- @plugin.process_message(tag, meta, header, time, processrecord, bulk_message)
33
+ next unless @plugin.process_message(tag, meta, header, time, processrecord, bulk_message)
34
34
  rescue => e
35
35
  stats[:bad_chunk_record] += 1
36
36
  next
@@ -18,6 +18,10 @@ require_relative 'elasticsearch_index_template'
18
18
  class Fluent::ElasticsearchOutput < Fluent::ObjectBufferedOutput
19
19
  class ConnectionFailure < StandardError; end
20
20
 
21
+ # MissingIdFieldError is raised for records that do not
22
+ # include the field for the unique record identifier
23
+ class MissingIdFieldError < StandardError; end
24
+
21
25
  # RetryStreamError privides a stream to be
22
26
  # put back in the pipeline for cases where a bulk request
23
27
  # failed (e.g some records succeed while others failed)
@@ -81,6 +85,7 @@ class Fluent::ElasticsearchOutput < Fluent::ObjectBufferedOutput
81
85
  config_param :reconnect_on_error, :bool, :default => false
82
86
  config_param :pipeline, :string, :default => nil
83
87
  config_param :with_transporter_log, :bool, :default => false
88
+ config_param :emit_error_for_missing_id, :bool, :default => false
84
89
 
85
90
  include Fluent::ElasticsearchIndexTemplate
86
91
  include Fluent::ElasticsearchConstants
@@ -267,6 +272,13 @@ class Fluent::ElasticsearchOutput < Fluent::ObjectBufferedOutput
267
272
  end.join(', ')
268
273
  end
269
274
 
275
+ # append_record_to_messages adds a record to the bulk message
276
+ # payload to be submitted to Elasticsearch. Records that do
277
+ # not include '_id' field are skipped when 'write_operation'
278
+ # is configured for 'create' or 'update'
279
+ #
280
+ # returns 'true' if record was appended to the bulk message
281
+ # and 'false' otherwise
270
282
  def append_record_to_messages(op, meta, header, record, msgs)
271
283
  case op
272
284
  when UPDATE_OP, UPSERT_OP
@@ -274,18 +286,22 @@ class Fluent::ElasticsearchOutput < Fluent::ObjectBufferedOutput
274
286
  header[UPDATE_OP] = meta
275
287
  msgs << @dump_proc.call(header) << BODY_DELIMITER
276
288
  msgs << @dump_proc.call(update_body(record, op)) << BODY_DELIMITER
289
+ return true
277
290
  end
278
291
  when CREATE_OP
279
292
  if meta.has_key?(ID_FIELD)
280
293
  header[CREATE_OP] = meta
281
294
  msgs << @dump_proc.call(header) << BODY_DELIMITER
282
295
  msgs << @dump_proc.call(record) << BODY_DELIMITER
296
+ return true
283
297
  end
284
298
  when INDEX_OP
285
299
  header[INDEX_OP] = meta
286
300
  msgs << @dump_proc.call(header) << BODY_DELIMITER
287
301
  msgs << @dump_proc.call(record) << BODY_DELIMITER
302
+ return true
288
303
  end
304
+ return false
289
305
  end
290
306
 
291
307
  def update_body(record, op)
@@ -333,8 +349,15 @@ class Fluent::ElasticsearchOutput < Fluent::ObjectBufferedOutput
333
349
  chunk.msgpack_each do |time, record|
334
350
  next unless record.is_a? Hash
335
351
  begin
336
- process_message(tag, meta, header, time, record, bulk_message)
337
- bulk_message_count += 1
352
+ if process_message(tag, meta, header, time, record, bulk_message)
353
+ bulk_message_count += 1
354
+ else
355
+ if @emit_error_for_missing_id
356
+ raise MissingIdFieldError, "Missing '_id' field. Write operation is #{@write_operation}"
357
+ else
358
+ log.on_debug { log.debug("Dropping record because its missing an '_id' field and write_operation is #{@write_operation}: #{record}") }
359
+ end
360
+ end
338
361
  rescue=>e
339
362
  router.emit_error_event(tag, time, record, e)
340
363
  end
@@ -26,6 +26,7 @@ class TestElasticsearchErrorHandler < Test::Unit::TestCase
26
26
  if record.has_key?('raise') && record['raise']
27
27
  raise Exception('process_message')
28
28
  end
29
+ return true
29
30
  end
30
31
  end
31
32
 
@@ -58,7 +59,7 @@ class TestElasticsearchErrorHandler < Test::Unit::TestCase
58
59
  end
59
60
 
60
61
  def test_dlq_400_responses
61
- records = [{time: 123, record: {"foo" => "bar"}}]
62
+ records = [{time: 123, record: {"foo" => "bar", '_id' => 'abc'}}]
62
63
  response = parse_response(%({
63
64
  "took" : 0,
64
65
  "errors" : true,
@@ -27,8 +27,8 @@ class ElasticsearchOutput < Test::Unit::TestCase
27
27
  }.configure(conf)
28
28
  end
29
29
 
30
- def sample_record
31
- {'age' => 26, 'request_id' => '42', 'parent_id' => 'parent', 'routing_id' => 'routing'}
30
+ def sample_record(content={})
31
+ {'age' => 26, 'request_id' => '42', 'parent_id' => 'parent', 'routing_id' => 'routing'}.merge(content)
32
32
  end
33
33
 
34
34
  def stub_elastic_ping(url="http://localhost:9200")
@@ -158,9 +158,9 @@ class ElasticsearchOutput < Test::Unit::TestCase
158
158
  stub_request(:post, url).to_return(lambda { |req| bodystr = make_response_body(req, 0, 500, error); body = JSON.parse(bodystr); body['items'][0]['unknown'] = body['items'][0].delete('create'); { :status => 200, :body => body.to_json, :headers => { 'Content-Type' => 'json' } } })
159
159
  end
160
160
 
161
- def assert_logs_include(logs, msg)
161
+ def assert_logs_include(logs, msg, exp_matches=1)
162
162
  matches = logs.grep /#{msg}/
163
- assert_equal(1, matches.length, "Logs do not contain '#{msg}' '#{logs}'")
163
+ assert_equal(exp_matches, matches.length, "Logs do not contain '#{msg}' '#{logs}'")
164
164
  end
165
165
 
166
166
  def test_configure
@@ -1381,6 +1381,102 @@ class ElasticsearchOutput < Test::Unit::TestCase
1381
1381
  driver.run
1382
1382
  end
1383
1383
 
1384
+ def test_create_should_write_records_with_ids_and_skip_those_without
1385
+ driver.configure("write_operation create\nid_key my_id\nlog_level debug")
1386
+ log = driver.instance.router.emit_error_handler.log
1387
+ stub_elastic_ping
1388
+ stub_request(:post, 'http://localhost:9200/_bulk')
1389
+ .to_return(lambda do |req|
1390
+ { :status => 200,
1391
+ :headers => { 'Content-Type' => 'json' },
1392
+ :body => %({
1393
+ "took" : 1,
1394
+ "errors" : true,
1395
+ "items" : [
1396
+ {
1397
+ "create" : {
1398
+ "_index" : "foo",
1399
+ "_type" : "bar",
1400
+ "_id" : "abc"
1401
+ }
1402
+ },
1403
+ {
1404
+ "create" : {
1405
+ "_index" : "foo",
1406
+ "_type" : "bar",
1407
+ "_id" : "xyz",
1408
+ "status" : 500,
1409
+ "error" : {
1410
+ "type" : "some unrecognized type",
1411
+ "reason":"some error to cause version mismatch"
1412
+ }
1413
+ }
1414
+ }
1415
+ ]
1416
+ })
1417
+ }
1418
+ end)
1419
+ sample_record1 = sample_record('my_id' => 'abc')
1420
+ sample_record4 = sample_record('my_id' => 'xyz')
1421
+ driver.emit(sample_record1, 1)
1422
+ driver.emit(sample_record, 2)
1423
+ driver.emit(sample_record, 3)
1424
+ driver.emit(sample_record4, 4)
1425
+
1426
+ # one record succeeded while the other should be 'retried'
1427
+ driver.expect_emit('test', 4, sample_record4)
1428
+ driver.run
1429
+ assert_logs_include(log.out.logs, /(Dropping record)/, 2)
1430
+ end
1431
+
1432
+ def test_create_should_write_records_with_ids_and_emit_those_without
1433
+ driver.configure("write_operation create\nid_key my_id\nemit_error_for_missing_id true")
1434
+ log = driver.instance.router.emit_error_handler.log
1435
+ stub_elastic_ping
1436
+ stub_request(:post, 'http://localhost:9200/_bulk')
1437
+ .to_return(lambda do |req|
1438
+ { :status => 200,
1439
+ :headers => { 'Content-Type' => 'json' },
1440
+ :body => %({
1441
+ "took" : 1,
1442
+ "errors" : true,
1443
+ "items" : [
1444
+ {
1445
+ "create" : {
1446
+ "_index" : "foo",
1447
+ "_type" : "bar",
1448
+ "_id" : "abc"
1449
+ }
1450
+ },
1451
+ {
1452
+ "create" : {
1453
+ "_index" : "foo",
1454
+ "_type" : "bar",
1455
+ "_id" : "xyz",
1456
+ "status" : 500,
1457
+ "error" : {
1458
+ "type" : "some unrecognized type",
1459
+ "reason":"some error to cause version mismatch"
1460
+ }
1461
+ }
1462
+ }
1463
+ ]
1464
+ })
1465
+ }
1466
+ end)
1467
+ sample_record1 = sample_record('my_id' => 'abc')
1468
+ sample_record4 = sample_record('my_id' => 'xyz')
1469
+ driver.emit(sample_record1, 1)
1470
+ driver.emit(sample_record, 2)
1471
+ driver.emit(sample_record, 3)
1472
+ driver.emit(sample_record4, 4)
1473
+
1474
+ # one record succeeded while the other should be 'retried'
1475
+ driver.expect_emit('test', 4, sample_record4)
1476
+ driver.run
1477
+ assert_logs_include(log.out.logs, /(Missing '_id' field)/, 2)
1478
+ end
1479
+
1384
1480
  def test_bulk_error
1385
1481
  stub_elastic_ping
1386
1482
  stub_request(:post, 'http://localhost:9200/_bulk')
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fluent-plugin-elasticsearch
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.16.2
4
+ version: 1.17.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - diogo