fluent-plugin-elasticsearch 2.10.5 → 2.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 2f9dd556dc8e7f51f181c44a3bcff4496bca22ee330c7a374654da3cc2772773
4
- data.tar.gz: 76cc551a4dd10d6f4d5f215d4a736db3b1f9cb9ab7a7999166a9a4345a55c209
3
+ metadata.gz: b98a52f6ea81dcfb963cb072468dfdbbfd0ad23202c9974d1e19032ad3059b56
4
+ data.tar.gz: 79ab8568a0d34dd71adb9c8b3e1016bdad3cabda102603a3da49cd19f9f15e1e
5
5
  SHA512:
6
- metadata.gz: 3221ab803aa9c873e9f9c8631a87899d8376adc8fa1a90627d3856eb929b0a304fa8850b0dd58d20e23b4a7020512ea4e16b9311f017199ef3e493b4fee9d4ab
7
- data.tar.gz: 7e0e90b03e16d63b74e1936f33f1d6586dcab00b78c68b47342ea6044e96609f2129e38629821d34a06241d7caafedf6e47825601ff39c2410140e8e1696ee4d
6
+ metadata.gz: 843a03b70c2e6da558da1674b877b4771bbfe55f8ce444cd2c53a279b47772413c680059a09b57e420638f4bf6b91549f680177ef4881f32d4258c02780de105
7
+ data.tar.gz: 321d22427a0f2c0770dd48f13ebcc710390a44b908329853cac4f4b7c69e6eda0d31591af507c58b99e71bacbd70f9a9fc80a793a4651336f4c21cf43172ede7
data/History.md CHANGED
@@ -2,6 +2,9 @@
2
2
 
3
3
  ### [Unreleased]
4
4
 
5
+ ### 2.11.0
6
+ - Uplift Merge pull request #437 from jcantrill/fix_bulk_count (#438)
7
+
5
8
  ### 2.10.5
6
9
  - Uplift Merge pull request #435 from jcantrill/add_trace_logging (#436)
7
10
 
data/README.md CHANGED
@@ -17,6 +17,7 @@ Current maintainers: @cosmo0920
17
17
  * [Usage](#usage)
18
18
  + [Index templates](#index-templates)
19
19
  * [Configuration](#configuration)
20
+ + [emit_error_for_missing_id](#emit_error_for_missing_id)
20
21
  + [hosts](#hosts)
21
22
  + [user, password, path, scheme, ssl_verify](#user-password-path-scheme-ssl_verify)
22
23
  + [logstash_format](#logstash_format)
@@ -104,6 +105,15 @@ This plugin creates Elasticsearch indices by merely writing to them. Consider us
104
105
 
105
106
  ## Configuration
106
107
 
108
+ ### emit_error_for_missing_id
109
+
110
+ ```
111
+ emit_error_for_missing_id true
112
+ ```
113
+ When `write_operation` is configured to anything other then `index`, setting this value to `true` will
114
+ cause the plugin to `emit_error_event` of any records which do not include an `_id` field. The default (`false`)
115
+ behavior is to silently drop the records.
116
+
107
117
  ### hosts
108
118
 
109
119
  ```
@@ -3,7 +3,7 @@ $:.push File.expand_path('../lib', __FILE__)
3
3
 
4
4
  Gem::Specification.new do |s|
5
5
  s.name = 'fluent-plugin-elasticsearch'
6
- s.version = '2.10.5'
6
+ s.version = '2.11.0'
7
7
  s.authors = ['diogo', 'pitr']
8
8
  s.email = ['pitr.vern@gmail.com', 'me@diogoterror.com']
9
9
  s.description = %q{Elasticsearch output plugin for Fluent event collector}
@@ -32,7 +32,7 @@ class Fluent::Plugin::ElasticsearchErrorHandler
32
32
  begin
33
33
  # we need a deep copy for process_message to alter
34
34
  processrecord = Marshal.load(Marshal.dump(rawrecord))
35
- @plugin.process_message(tag, meta, header, time, processrecord, bulk_message, extracted_values)
35
+ next unless @plugin.process_message(tag, meta, header, time, processrecord, bulk_message, extracted_values)
36
36
  rescue => e
37
37
  stats[:bad_chunk_record] += 1
38
38
  next
@@ -20,6 +20,10 @@ module Fluent::Plugin
20
20
  class ElasticsearchOutput < Output
21
21
  class ConnectionFailure < Fluent::UnrecoverableError; end
22
22
 
23
+ # MissingIdFieldError is raised for records that do not
24
+ # include the field for the unique record identifier
25
+ class MissingIdFieldError < StandardError; end
26
+
23
27
  # RetryStreamError privides a stream to be
24
28
  # put back in the pipeline for cases where a bulk request
25
29
  # failed (e.g some records succeed while others failed)
@@ -94,6 +98,7 @@ EOC
94
98
  config_param :reconnect_on_error, :bool, :default => false
95
99
  config_param :pipeline, :string, :default => nil
96
100
  config_param :with_transporter_log, :bool, :default => false
101
+ config_param :emit_error_for_missing_id, :bool, :default => false
97
102
  config_param :content_type, :enum, list: [:"application/json", :"application/x-ndjson"], :default => :"application/json",
98
103
  :deprecated => <<EOC
99
104
  elasticsearch gem v6.0.2 starts to use correct Content-Type. Please upgrade elasticserach gem and stop to use this option.
@@ -324,6 +329,13 @@ EOC
324
329
  end.join(', ')
325
330
  end
326
331
 
332
+ # append_record_to_messages adds a record to the bulk message
333
+ # payload to be submitted to Elasticsearch. Records that do
334
+ # not include '_id' field are skipped when 'write_operation'
335
+ # is configured for 'create' or 'update'
336
+ #
337
+ # returns 'true' if record was appended to the bulk message
338
+ # and 'false' otherwise
327
339
  def append_record_to_messages(op, meta, header, record, msgs)
328
340
  case op
329
341
  when UPDATE_OP, UPSERT_OP
@@ -331,18 +343,22 @@ EOC
331
343
  header[UPDATE_OP] = meta
332
344
  msgs << @dump_proc.call(header) << BODY_DELIMITER
333
345
  msgs << @dump_proc.call(update_body(record, op)) << BODY_DELIMITER
346
+ return true
334
347
  end
335
348
  when CREATE_OP
336
349
  if meta.has_key?(ID_FIELD)
337
350
  header[CREATE_OP] = meta
338
351
  msgs << @dump_proc.call(header) << BODY_DELIMITER
339
352
  msgs << @dump_proc.call(record) << BODY_DELIMITER
353
+ return true
340
354
  end
341
355
  when INDEX_OP
342
356
  header[INDEX_OP] = meta
343
357
  msgs << @dump_proc.call(header) << BODY_DELIMITER
344
358
  msgs << @dump_proc.call(record) << BODY_DELIMITER
359
+ return true
345
360
  end
361
+ return false
346
362
  end
347
363
 
348
364
  def update_body(record, op)
@@ -406,8 +422,15 @@ EOC
406
422
  chunk.msgpack_each do |time, record|
407
423
  next unless record.is_a? Hash
408
424
  begin
409
- process_message(tag, meta, header, time, record, bulk_message, extracted_values)
410
- bulk_message_count += 1
425
+ if process_message(tag, meta, header, time, record, bulk_message, extracted_values)
426
+ bulk_message_count += 1
427
+ else
428
+ if @emit_error_for_missing_id
429
+ raise MissingIdFieldError, "Missing '_id' field. Write operation is #{@write_operation}"
430
+ else
431
+ log.on_debug { log.debug("Dropping record because its missing an '_id' field and write_operation is #{@write_operation}: #{record}") }
432
+ end
433
+ end
411
434
  rescue => e
412
435
  router.emit_error_event(tag, time, record, e)
413
436
  end
@@ -26,6 +26,7 @@ class TestElasticsearchErrorHandler < Test::Unit::TestCase
26
26
  if record.has_key?('raise') && record['raise']
27
27
  raise Exception('process_message')
28
28
  end
29
+ return true
29
30
  end
30
31
  end
31
32
 
@@ -54,7 +55,7 @@ class TestElasticsearchErrorHandler < Test::Unit::TestCase
54
55
  end
55
56
 
56
57
  def test_dlq_400_responses
57
- records = [{time: 123, record: {"foo" => "bar"}}]
58
+ records = [{time: 123, record: {"foo" => "bar", '_id' => 'abc'}}]
58
59
  response = parse_response(%({
59
60
  "took" : 0,
60
61
  "errors" : true,
@@ -41,8 +41,8 @@ class ElasticsearchOutput < Test::Unit::TestCase
41
41
  Fluent::Plugin::ElasticsearchOutput::DEFAULT_TYPE_NAME
42
42
  end
43
43
 
44
- def sample_record
45
- {'age' => 26, 'request_id' => '42', 'parent_id' => 'parent', 'routing_id' => 'routing'}
44
+ def sample_record(content={})
45
+ {'age' => 26, 'request_id' => '42', 'parent_id' => 'parent', 'routing_id' => 'routing'}.merge(content)
46
46
  end
47
47
 
48
48
  def nested_sample_record
@@ -178,9 +178,9 @@ class ElasticsearchOutput < Test::Unit::TestCase
178
178
  stub_request(:post, url).to_return(lambda { |req| bodystr = make_response_body(req, 0, 500, error); body = JSON.parse(bodystr); body['items'][0]['unknown'] = body['items'][0].delete('create'); { :status => 200, :body => body.to_json, :headers => { 'Content-Type' => 'json' } } })
179
179
  end
180
180
 
181
- def assert_logs_include(logs, msg)
181
+ def assert_logs_include(logs, msg, exp_matches=1)
182
182
  matches = logs.grep /#{msg}/
183
- assert_equal(1, matches.length, "Logs do not contain '#{msg}' '#{logs}'")
183
+ assert_equal(exp_matches, matches.length, "Logs do not contain '#{msg}' '#{logs}'")
184
184
  end
185
185
 
186
186
  def test_configure
@@ -1799,6 +1799,106 @@ class ElasticsearchOutput < Test::Unit::TestCase
1799
1799
  assert_equal [['retry', 1, sample_record]], driver.events
1800
1800
  end
1801
1801
 
1802
+ def test_create_should_write_records_with_ids_and_skip_those_without
1803
+ driver.configure("write_operation create\nid_key my_id\n@log_level debug")
1804
+ stub_elastic_ping
1805
+ stub_request(:post, 'http://localhost:9200/_bulk')
1806
+ .to_return(lambda do |req|
1807
+ { :status => 200,
1808
+ :headers => { 'Content-Type' => 'json' },
1809
+ :body => %({
1810
+ "took" : 1,
1811
+ "errors" : true,
1812
+ "items" : [
1813
+ {
1814
+ "create" : {
1815
+ "_index" : "foo",
1816
+ "_type" : "bar",
1817
+ "_id" : "abc"
1818
+ }
1819
+ },
1820
+ {
1821
+ "create" : {
1822
+ "_index" : "foo",
1823
+ "_type" : "bar",
1824
+ "_id" : "xyz",
1825
+ "status" : 500,
1826
+ "error" : {
1827
+ "type" : "some unrecognized type",
1828
+ "reason":"some error to cause version mismatch"
1829
+ }
1830
+ }
1831
+ }
1832
+ ]
1833
+ })
1834
+ }
1835
+ end)
1836
+ sample_record1 = sample_record('my_id' => 'abc')
1837
+ sample_record4 = sample_record('my_id' => 'xyz')
1838
+
1839
+ driver.run(default_tag: 'test') do
1840
+ driver.feed(1, sample_record1)
1841
+ driver.feed(2, sample_record)
1842
+ driver.feed(3, sample_record)
1843
+ driver.feed(4, sample_record4)
1844
+ end
1845
+
1846
+ logs = driver.logs
1847
+ # one record succeeded while the other should be 'retried'
1848
+ assert_equal [['test', 4, sample_record4]], driver.events
1849
+ assert_logs_include(logs, /(Dropping record)/, 2)
1850
+ end
1851
+
1852
+ def test_create_should_write_records_with_ids_and_emit_those_without
1853
+ driver.configure("write_operation create\nid_key my_id\nemit_error_for_missing_id true\n@log_level debug")
1854
+ stub_elastic_ping
1855
+ stub_request(:post, 'http://localhost:9200/_bulk')
1856
+ .to_return(lambda do |req|
1857
+ { :status => 200,
1858
+ :headers => { 'Content-Type' => 'json' },
1859
+ :body => %({
1860
+ "took" : 1,
1861
+ "errors" : true,
1862
+ "items" : [
1863
+ {
1864
+ "create" : {
1865
+ "_index" : "foo",
1866
+ "_type" : "bar",
1867
+ "_id" : "abc"
1868
+ }
1869
+ },
1870
+ {
1871
+ "create" : {
1872
+ "_index" : "foo",
1873
+ "_type" : "bar",
1874
+ "_id" : "xyz",
1875
+ "status" : 500,
1876
+ "error" : {
1877
+ "type" : "some unrecognized type",
1878
+ "reason":"some error to cause version mismatch"
1879
+ }
1880
+ }
1881
+ }
1882
+ ]
1883
+ })
1884
+ }
1885
+ end)
1886
+ sample_record1 = sample_record('my_id' => 'abc')
1887
+ sample_record4 = sample_record('my_id' => 'xyz')
1888
+
1889
+ driver.run(default_tag: 'test') do
1890
+ driver.feed(1, sample_record1)
1891
+ driver.feed(2, sample_record)
1892
+ driver.feed(3, sample_record)
1893
+ driver.feed(4, sample_record4)
1894
+ end
1895
+
1896
+ error_log = driver.error_events.map {|e| e.last.message }
1897
+ # one record succeeded while the other should be 'retried'
1898
+ assert_equal [['test', 4, sample_record4]], driver.events
1899
+ assert_logs_include(error_log, /(Missing '_id' field)/, 2)
1900
+ end
1901
+
1802
1902
  def test_bulk_error
1803
1903
  stub_elastic_ping
1804
1904
  stub_request(:post, 'http://localhost:9200/_bulk')
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fluent-plugin-elasticsearch
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.10.5
4
+ version: 2.11.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - diogo