fluent-plugin-elasticsearch 1.16.2 → 1.17.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: c3921e6bfaf0cf8f2560814962f2476acd7bfa408cf1444ab33e799e445205b3
4
- data.tar.gz: 545e05cb56f2d13b531d8b2bf5dc4b9c59f3f5aee36c20944e1334c4e17ff228
3
+ metadata.gz: 18bf8262f87eae27a0ff3e7d5efc101984ae112e0b0b1f9fcc8c286b60f760ea
4
+ data.tar.gz: d7964fb843241c67fa1920de545bc2c0759f4d6b7f45fbd3c2b3f8f66d7a7797
5
5
  SHA512:
6
- metadata.gz: 0ab5a3c10bf1fce8e71776ed0b78019fb3e3ab7261493f212c1775e685490aead3e8f12b319d73fe10c3a889ec2f52d8b01316ee0b2bfc8b5df70c489afb1442
7
- data.tar.gz: 6ee96c25dc743588dd071098e680613f12d7004198fb58893823fbd897227bcd6c94c38af9ab0d9c2880ff0ac32d17775e0b99a4b76d0fe6214a526622d5fb1f
6
+ metadata.gz: ffbd3297314e2f3a9ebecaa0009ac17f13d17fc97b057ac94e462b706e667f86cc21f3290e14f744ae7e71c905583cddd54303923a88e66510bbfa6ef70f3245
7
+ data.tar.gz: e814f94fb1110a3598136dc17ae846d80ff7e85c308a3f554d93c197dec94db4033a708502cce8c8c3257c34726ed509098f89f0f3fac96d4bb8aa2ff4cde524
data/History.md CHANGED
@@ -2,6 +2,9 @@
2
2
 
3
3
  ### [Unreleased]
4
4
 
5
+ ### 1.17.0
6
+ - Fix #434 bulk count (#437)
7
+
5
8
  ### 1.16.2
6
9
  - add trace logging to send_bulk (#435)
7
10
 
data/README.md CHANGED
@@ -15,6 +15,7 @@ Note: For Amazon Elasticsearch Service please consider using [fluent-plugin-aws-
15
15
  * [Usage](#usage)
16
16
  + [Index templates](#index-templates)
17
17
  * [Configuration](#configuration)
18
+ + [emit_error_for_missing_id](#emit_error_for_missing_id)
18
19
  + [hosts](#hosts)
19
20
  + [user, password, path, scheme, ssl_verify](#user-password-path-scheme-ssl_verify)
20
21
  + [logstash_format](#logstash_format)
@@ -87,6 +88,15 @@ This plugin creates Elasticsearch indices by merely writing to them. Consider us
87
88
 
88
89
  ## Configuration
89
90
 
91
+ ### emit_error_for_missing_id
92
+
93
+ ```
94
+ emit_error_for_missing_id true
95
+ ```
96
+ When `write_operation` is configured to anything other then `index`, setting this value to `true` will
97
+ cause the plugin to `emit_error_event` of any records which do not include an `_id` field. The default (`false`)
98
+ behavior is to silently drop the records.
99
+
90
100
  ### hosts
91
101
 
92
102
  ```
@@ -3,7 +3,7 @@ $:.push File.expand_path('../lib', __FILE__)
3
3
 
4
4
  Gem::Specification.new do |s|
5
5
  s.name = 'fluent-plugin-elasticsearch'
6
- s.version = '1.16.2'
6
+ s.version = '1.17.0'
7
7
  s.authors = ['diogo', 'pitr']
8
8
  s.email = ['pitr.vern@gmail.com', 'me@diogoterror.com']
9
9
  s.description = %q{Elasticsearch output plugin for Fluent event collector}
@@ -14,7 +14,7 @@ class Fluent::ElasticsearchErrorHandler
14
14
 
15
15
  def handle_error(response, tag, chunk, bulk_message_count)
16
16
  items = response['items']
17
- if items.nil? || !items.is_a?(Array)
17
+ if items.nil? || !items.is_a?(Array)
18
18
  raise ElasticsearchVersionMismatch, "The response format was unrecognized: #{response}"
19
19
  end
20
20
  if bulk_message_count != items.length
@@ -30,7 +30,7 @@ class Fluent::ElasticsearchErrorHandler
30
30
  begin
31
31
  # we need a deep copy for process_message to alter
32
32
  processrecord = Marshal.load(Marshal.dump(rawrecord))
33
- @plugin.process_message(tag, meta, header, time, processrecord, bulk_message)
33
+ next unless @plugin.process_message(tag, meta, header, time, processrecord, bulk_message)
34
34
  rescue => e
35
35
  stats[:bad_chunk_record] += 1
36
36
  next
@@ -18,6 +18,10 @@ require_relative 'elasticsearch_index_template'
18
18
  class Fluent::ElasticsearchOutput < Fluent::ObjectBufferedOutput
19
19
  class ConnectionFailure < StandardError; end
20
20
 
21
+ # MissingIdFieldError is raised for records that do not
22
+ # include the field for the unique record identifier
23
+ class MissingIdFieldError < StandardError; end
24
+
21
25
  # RetryStreamError privides a stream to be
22
26
  # put back in the pipeline for cases where a bulk request
23
27
  # failed (e.g some records succeed while others failed)
@@ -81,6 +85,7 @@ class Fluent::ElasticsearchOutput < Fluent::ObjectBufferedOutput
81
85
  config_param :reconnect_on_error, :bool, :default => false
82
86
  config_param :pipeline, :string, :default => nil
83
87
  config_param :with_transporter_log, :bool, :default => false
88
+ config_param :emit_error_for_missing_id, :bool, :default => false
84
89
 
85
90
  include Fluent::ElasticsearchIndexTemplate
86
91
  include Fluent::ElasticsearchConstants
@@ -267,6 +272,13 @@ class Fluent::ElasticsearchOutput < Fluent::ObjectBufferedOutput
267
272
  end.join(', ')
268
273
  end
269
274
 
275
+ # append_record_to_messages adds a record to the bulk message
276
+ # payload to be submitted to Elasticsearch. Records that do
277
+ # not include '_id' field are skipped when 'write_operation'
278
+ # is configured for 'create' or 'update'
279
+ #
280
+ # returns 'true' if record was appended to the bulk message
281
+ # and 'false' otherwise
270
282
  def append_record_to_messages(op, meta, header, record, msgs)
271
283
  case op
272
284
  when UPDATE_OP, UPSERT_OP
@@ -274,18 +286,22 @@ class Fluent::ElasticsearchOutput < Fluent::ObjectBufferedOutput
274
286
  header[UPDATE_OP] = meta
275
287
  msgs << @dump_proc.call(header) << BODY_DELIMITER
276
288
  msgs << @dump_proc.call(update_body(record, op)) << BODY_DELIMITER
289
+ return true
277
290
  end
278
291
  when CREATE_OP
279
292
  if meta.has_key?(ID_FIELD)
280
293
  header[CREATE_OP] = meta
281
294
  msgs << @dump_proc.call(header) << BODY_DELIMITER
282
295
  msgs << @dump_proc.call(record) << BODY_DELIMITER
296
+ return true
283
297
  end
284
298
  when INDEX_OP
285
299
  header[INDEX_OP] = meta
286
300
  msgs << @dump_proc.call(header) << BODY_DELIMITER
287
301
  msgs << @dump_proc.call(record) << BODY_DELIMITER
302
+ return true
288
303
  end
304
+ return false
289
305
  end
290
306
 
291
307
  def update_body(record, op)
@@ -333,8 +349,15 @@ class Fluent::ElasticsearchOutput < Fluent::ObjectBufferedOutput
333
349
  chunk.msgpack_each do |time, record|
334
350
  next unless record.is_a? Hash
335
351
  begin
336
- process_message(tag, meta, header, time, record, bulk_message)
337
- bulk_message_count += 1
352
+ if process_message(tag, meta, header, time, record, bulk_message)
353
+ bulk_message_count += 1
354
+ else
355
+ if @emit_error_for_missing_id
356
+ raise MissingIdFieldError, "Missing '_id' field. Write operation is #{@write_operation}"
357
+ else
358
+ log.on_debug { log.debug("Dropping record because its missing an '_id' field and write_operation is #{@write_operation}: #{record}") }
359
+ end
360
+ end
338
361
  rescue=>e
339
362
  router.emit_error_event(tag, time, record, e)
340
363
  end
@@ -26,6 +26,7 @@ class TestElasticsearchErrorHandler < Test::Unit::TestCase
26
26
  if record.has_key?('raise') && record['raise']
27
27
  raise Exception('process_message')
28
28
  end
29
+ return true
29
30
  end
30
31
  end
31
32
 
@@ -58,7 +59,7 @@ class TestElasticsearchErrorHandler < Test::Unit::TestCase
58
59
  end
59
60
 
60
61
  def test_dlq_400_responses
61
- records = [{time: 123, record: {"foo" => "bar"}}]
62
+ records = [{time: 123, record: {"foo" => "bar", '_id' => 'abc'}}]
62
63
  response = parse_response(%({
63
64
  "took" : 0,
64
65
  "errors" : true,
@@ -27,8 +27,8 @@ class ElasticsearchOutput < Test::Unit::TestCase
27
27
  }.configure(conf)
28
28
  end
29
29
 
30
- def sample_record
31
- {'age' => 26, 'request_id' => '42', 'parent_id' => 'parent', 'routing_id' => 'routing'}
30
+ def sample_record(content={})
31
+ {'age' => 26, 'request_id' => '42', 'parent_id' => 'parent', 'routing_id' => 'routing'}.merge(content)
32
32
  end
33
33
 
34
34
  def stub_elastic_ping(url="http://localhost:9200")
@@ -158,9 +158,9 @@ class ElasticsearchOutput < Test::Unit::TestCase
158
158
  stub_request(:post, url).to_return(lambda { |req| bodystr = make_response_body(req, 0, 500, error); body = JSON.parse(bodystr); body['items'][0]['unknown'] = body['items'][0].delete('create'); { :status => 200, :body => body.to_json, :headers => { 'Content-Type' => 'json' } } })
159
159
  end
160
160
 
161
- def assert_logs_include(logs, msg)
161
+ def assert_logs_include(logs, msg, exp_matches=1)
162
162
  matches = logs.grep /#{msg}/
163
- assert_equal(1, matches.length, "Logs do not contain '#{msg}' '#{logs}'")
163
+ assert_equal(exp_matches, matches.length, "Logs do not contain '#{msg}' '#{logs}'")
164
164
  end
165
165
 
166
166
  def test_configure
@@ -1381,6 +1381,102 @@ class ElasticsearchOutput < Test::Unit::TestCase
1381
1381
  driver.run
1382
1382
  end
1383
1383
 
1384
+ def test_create_should_write_records_with_ids_and_skip_those_without
1385
+ driver.configure("write_operation create\nid_key my_id\nlog_level debug")
1386
+ log = driver.instance.router.emit_error_handler.log
1387
+ stub_elastic_ping
1388
+ stub_request(:post, 'http://localhost:9200/_bulk')
1389
+ .to_return(lambda do |req|
1390
+ { :status => 200,
1391
+ :headers => { 'Content-Type' => 'json' },
1392
+ :body => %({
1393
+ "took" : 1,
1394
+ "errors" : true,
1395
+ "items" : [
1396
+ {
1397
+ "create" : {
1398
+ "_index" : "foo",
1399
+ "_type" : "bar",
1400
+ "_id" : "abc"
1401
+ }
1402
+ },
1403
+ {
1404
+ "create" : {
1405
+ "_index" : "foo",
1406
+ "_type" : "bar",
1407
+ "_id" : "xyz",
1408
+ "status" : 500,
1409
+ "error" : {
1410
+ "type" : "some unrecognized type",
1411
+ "reason":"some error to cause version mismatch"
1412
+ }
1413
+ }
1414
+ }
1415
+ ]
1416
+ })
1417
+ }
1418
+ end)
1419
+ sample_record1 = sample_record('my_id' => 'abc')
1420
+ sample_record4 = sample_record('my_id' => 'xyz')
1421
+ driver.emit(sample_record1, 1)
1422
+ driver.emit(sample_record, 2)
1423
+ driver.emit(sample_record, 3)
1424
+ driver.emit(sample_record4, 4)
1425
+
1426
+ # one record succeeded while the other should be 'retried'
1427
+ driver.expect_emit('test', 4, sample_record4)
1428
+ driver.run
1429
+ assert_logs_include(log.out.logs, /(Dropping record)/, 2)
1430
+ end
1431
+
1432
+ def test_create_should_write_records_with_ids_and_emit_those_without
1433
+ driver.configure("write_operation create\nid_key my_id\nemit_error_for_missing_id true")
1434
+ log = driver.instance.router.emit_error_handler.log
1435
+ stub_elastic_ping
1436
+ stub_request(:post, 'http://localhost:9200/_bulk')
1437
+ .to_return(lambda do |req|
1438
+ { :status => 200,
1439
+ :headers => { 'Content-Type' => 'json' },
1440
+ :body => %({
1441
+ "took" : 1,
1442
+ "errors" : true,
1443
+ "items" : [
1444
+ {
1445
+ "create" : {
1446
+ "_index" : "foo",
1447
+ "_type" : "bar",
1448
+ "_id" : "abc"
1449
+ }
1450
+ },
1451
+ {
1452
+ "create" : {
1453
+ "_index" : "foo",
1454
+ "_type" : "bar",
1455
+ "_id" : "xyz",
1456
+ "status" : 500,
1457
+ "error" : {
1458
+ "type" : "some unrecognized type",
1459
+ "reason":"some error to cause version mismatch"
1460
+ }
1461
+ }
1462
+ }
1463
+ ]
1464
+ })
1465
+ }
1466
+ end)
1467
+ sample_record1 = sample_record('my_id' => 'abc')
1468
+ sample_record4 = sample_record('my_id' => 'xyz')
1469
+ driver.emit(sample_record1, 1)
1470
+ driver.emit(sample_record, 2)
1471
+ driver.emit(sample_record, 3)
1472
+ driver.emit(sample_record4, 4)
1473
+
1474
+ # one record succeeded while the other should be 'retried'
1475
+ driver.expect_emit('test', 4, sample_record4)
1476
+ driver.run
1477
+ assert_logs_include(log.out.logs, /(Missing '_id' field)/, 2)
1478
+ end
1479
+
1384
1480
  def test_bulk_error
1385
1481
  stub_elastic_ping
1386
1482
  stub_request(:post, 'http://localhost:9200/_bulk')
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fluent-plugin-elasticsearch
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.16.2
4
+ version: 1.17.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - diogo