RubyGems - fluent-plugin-elasticsearch - Versions diffs - 1.16.2 → 1.17.0 - Mend

fluent-plugin-elasticsearch 1.16.2 → 1.17.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

checksums.yaml +4 -4
data/History.md +3 -0
data/README.md +10 -0
data/fluent-plugin-elasticsearch.gemspec +1 -1
data/lib/fluent/plugin/elasticsearch_error_handler.rb +2 -2
data/lib/fluent/plugin/out_elasticsearch.rb +25 -2
data/test/plugin/test_elasticsearch_error_handler.rb +2 -1
data/test/plugin/test_out_elasticsearch.rb +100 -4
metadata +1 -1

checksums.yaml CHANGED

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: c3921e6bfaf0cf8f2560814962f2476acd7bfa408cf1444ab33e799e445205b3
-  data.tar.gz: 545e05cb56f2d13b531d8b2bf5dc4b9c59f3f5aee36c20944e1334c4e17ff228
+  metadata.gz: 18bf8262f87eae27a0ff3e7d5efc101984ae112e0b0b1f9fcc8c286b60f760ea
+  data.tar.gz: d7964fb843241c67fa1920de545bc2c0759f4d6b7f45fbd3c2b3f8f66d7a7797
 SHA512:
-  metadata.gz: 0ab5a3c10bf1fce8e71776ed0b78019fb3e3ab7261493f212c1775e685490aead3e8f12b319d73fe10c3a889ec2f52d8b01316ee0b2bfc8b5df70c489afb1442
-  data.tar.gz: 6ee96c25dc743588dd071098e680613f12d7004198fb58893823fbd897227bcd6c94c38af9ab0d9c2880ff0ac32d17775e0b99a4b76d0fe6214a526622d5fb1f
+  metadata.gz: ffbd3297314e2f3a9ebecaa0009ac17f13d17fc97b057ac94e462b706e667f86cc21f3290e14f744ae7e71c905583cddd54303923a88e66510bbfa6ef70f3245
+  data.tar.gz: e814f94fb1110a3598136dc17ae846d80ff7e85c308a3f554d93c197dec94db4033a708502cce8c8c3257c34726ed509098f89f0f3fac96d4bb8aa2ff4cde524

data/History.md CHANGED

@@ -2,6 +2,9 @@
 ### [Unreleased]
+### 1.17.0
+- Fix #434 bulk count (#437)
 ### 1.16.2
 - add trace logging to send_bulk (#435)

data/README.md CHANGED

@@ -15,6 +15,7 @@ Note: For Amazon Elasticsearch Service please consider using [fluent-plugin-aws-
 * [Usage](#usage)
   + [Index templates](#index-templates)
 * [Configuration](#configuration)
+  + [emit_error_for_missing_id](#emit_error_for_missing_id)
   + [hosts](#hosts)
   + [user, password, path, scheme, ssl_verify](#user-password-path-scheme-ssl_verify)
   + [logstash_format](#logstash_format)
@@ -87,6 +88,15 @@ This plugin creates Elasticsearch indices by merely writing to them. Consider us
 ## Configuration
+### emit_error_for_missing_id
+```
+emit_error_for_missing_id true
+```
+When  `write_operation` is configured to anything other then `index`, setting this value to `true` will
+cause the plugin to `emit_error_event` of any records which do not include an `_id` field.  The default (`false`)
+behavior is to silently drop the records.
 ### hosts
 ```

data/fluent-plugin-elasticsearch.gemspec CHANGED

@@ -3,7 +3,7 @@ $:.push File.expand_path('../lib', __FILE__)
 Gem::Specification.new do |s|
   s.name          = 'fluent-plugin-elasticsearch'
-  s.version       = '1.16.2'
+  s.version       = '1.17.0'
   s.authors       = ['diogo', 'pitr']
   s.email         = ['pitr.vern@gmail.com', 'me@diogoterror.com']
   s.description   = %q{Elasticsearch output plugin for Fluent event collector}

data/lib/fluent/plugin/elasticsearch_error_handler.rb CHANGED

@@ -14,7 +14,7 @@ class Fluent::ElasticsearchErrorHandler
   def handle_error(response, tag, chunk, bulk_message_count)
     items = response['items']
-    if items.nil? || !items.is_a?(Array)
+    if items.nil? || !items.is_a?(Array)
       raise ElasticsearchVersionMismatch, "The response format was unrecognized: #{response}"
     end
     if bulk_message_count != items.length
@@ -30,7 +30,7 @@ class Fluent::ElasticsearchErrorHandler
       begin
         # we need a deep copy for process_message to alter
         processrecord = Marshal.load(Marshal.dump(rawrecord))
-        @plugin.process_message(tag, meta, header, time, processrecord, bulk_message)
+        next unless @plugin.process_message(tag, meta, header, time, processrecord, bulk_message)
       rescue => e
         stats[:bad_chunk_record] += 1
         next

data/lib/fluent/plugin/out_elasticsearch.rb CHANGED

@@ -18,6 +18,10 @@ require_relative 'elasticsearch_index_template'
 class Fluent::ElasticsearchOutput < Fluent::ObjectBufferedOutput
   class ConnectionFailure < StandardError; end
+  # MissingIdFieldError is raised for records that do not
+  # include the field for the unique record identifier
+  class MissingIdFieldError < StandardError; end
   # RetryStreamError privides a stream to be
   # put back in the pipeline for cases where a bulk request
   # failed (e.g some records succeed while others failed)
@@ -81,6 +85,7 @@ class Fluent::ElasticsearchOutput < Fluent::ObjectBufferedOutput
   config_param :reconnect_on_error, :bool, :default => false
   config_param :pipeline, :string, :default => nil
   config_param :with_transporter_log, :bool, :default => false
+  config_param :emit_error_for_missing_id, :bool, :default => false
   include Fluent::ElasticsearchIndexTemplate
   include Fluent::ElasticsearchConstants
@@ -267,6 +272,13 @@ class Fluent::ElasticsearchOutput < Fluent::ObjectBufferedOutput
     end.join(', ')
   end
+  # append_record_to_messages adds a record to the bulk message
+  # payload to be submitted to Elasticsearch.  Records that do
+  # not include '_id' field are skipped when 'write_operation'
+  # is configured for 'create' or 'update'
+  #
+  # returns 'true' if record was appended to the bulk message
+  #         and 'false' otherwise
   def append_record_to_messages(op, meta, header, record, msgs)
     case op
     when UPDATE_OP, UPSERT_OP
@@ -274,18 +286,22 @@ class Fluent::ElasticsearchOutput < Fluent::ObjectBufferedOutput
         header[UPDATE_OP] = meta
         msgs << @dump_proc.call(header) << BODY_DELIMITER
         msgs << @dump_proc.call(update_body(record, op)) << BODY_DELIMITER
+        return true
       end
     when CREATE_OP
       if meta.has_key?(ID_FIELD)
         header[CREATE_OP] = meta
         msgs << @dump_proc.call(header) << BODY_DELIMITER
         msgs << @dump_proc.call(record) << BODY_DELIMITER
+        return true
       end
     when INDEX_OP
       header[INDEX_OP] = meta
       msgs << @dump_proc.call(header) << BODY_DELIMITER
       msgs << @dump_proc.call(record) << BODY_DELIMITER
+      return true
     end
+    return false
   end
   def update_body(record, op)
@@ -333,8 +349,15 @@ class Fluent::ElasticsearchOutput < Fluent::ObjectBufferedOutput
     chunk.msgpack_each do |time, record|
       next unless record.is_a? Hash
       begin
-        process_message(tag, meta, header, time, record, bulk_message)
-        bulk_message_count += 1
+        if process_message(tag, meta, header, time, record, bulk_message)
+          bulk_message_count += 1
+        else
+          if @emit_error_for_missing_id
+            raise MissingIdFieldError, "Missing '_id' field. Write operation is #{@write_operation}"
+          else
+           log.on_debug { log.debug("Dropping record because its missing an '_id' field and write_operation is #{@write_operation}: #{record}") }
+          end
+        end
       rescue=>e
         router.emit_error_event(tag, time, record, e)
       end

data/test/plugin/test_elasticsearch_error_handler.rb CHANGED

@@ -26,6 +26,7 @@ class TestElasticsearchErrorHandler < Test::Unit::TestCase
       if record.has_key?('raise') && record['raise']
         raise Exception('process_message')
       end
+      return true
     end
   end
@@ -58,7 +59,7 @@ class TestElasticsearchErrorHandler < Test::Unit::TestCase
   end
   def test_dlq_400_responses
-    records = [{time: 123, record: {"foo" => "bar"}}]
+    records = [{time: 123, record: {"foo" => "bar", '_id' => 'abc'}}]
     response = parse_response(%({
       "took" : 0,
       "errors" : true,

data/test/plugin/test_out_elasticsearch.rb CHANGED

@@ -27,8 +27,8 @@ class ElasticsearchOutput < Test::Unit::TestCase
     }.configure(conf)
   end
-  def sample_record
-    {'age' => 26, 'request_id' => '42', 'parent_id' => 'parent', 'routing_id' => 'routing'}
+  def sample_record(content={})
+      {'age' => 26, 'request_id' => '42', 'parent_id' => 'parent', 'routing_id' => 'routing'}.merge(content)
   end
   def stub_elastic_ping(url="http://localhost:9200")
@@ -158,9 +158,9 @@ class ElasticsearchOutput < Test::Unit::TestCase
     stub_request(:post, url).to_return(lambda { |req| bodystr = make_response_body(req, 0, 500, error); body = JSON.parse(bodystr); body['items'][0]['unknown'] = body['items'][0].delete('create'); { :status => 200, :body => body.to_json, :headers => { 'Content-Type' => 'json' } } })
   end
-  def assert_logs_include(logs, msg)
+  def assert_logs_include(logs, msg, exp_matches=1)
     matches = logs.grep /#{msg}/
-    assert_equal(1, matches.length, "Logs do not contain '#{msg}' '#{logs}'")
+    assert_equal(exp_matches, matches.length, "Logs do not contain '#{msg}' '#{logs}'")
   end
   def test_configure
@@ -1381,6 +1381,102 @@ class ElasticsearchOutput < Test::Unit::TestCase
     driver.run
   end
+  def test_create_should_write_records_with_ids_and_skip_those_without
+    driver.configure("write_operation create\nid_key my_id\nlog_level debug")
+    log = driver.instance.router.emit_error_handler.log
+    stub_elastic_ping
+    stub_request(:post, 'http://localhost:9200/_bulk')
+        .to_return(lambda do |req|
+      { :status => 200,
+        :headers => { 'Content-Type' => 'json' },
+        :body => %({
+          "took" : 1,
+          "errors" : true,
+          "items" : [
+            {
+              "create" : {
+                "_index" : "foo",
+                "_type"  : "bar",
+                "_id" : "abc"
+              }
+            },
+            {
+              "create" : {
+                "_index" : "foo",
+                "_type"  : "bar",
+                "_id" : "xyz",
+                "status" : 500,
+                "error" : {
+                  "type" : "some unrecognized type",
+                  "reason":"some error to cause version mismatch"
+                }
+              }
+            }
+           ]
+        })
+     }
+    end)
+    sample_record1 = sample_record('my_id' => 'abc')
+    sample_record4 = sample_record('my_id' => 'xyz')
+    driver.emit(sample_record1, 1)
+    driver.emit(sample_record, 2)
+    driver.emit(sample_record, 3)
+    driver.emit(sample_record4, 4)
+    # one record succeeded while the other should be 'retried'
+    driver.expect_emit('test', 4, sample_record4)
+    driver.run
+    assert_logs_include(log.out.logs, /(Dropping record)/, 2)
+  end
+  def test_create_should_write_records_with_ids_and_emit_those_without
+    driver.configure("write_operation create\nid_key my_id\nemit_error_for_missing_id true")
+    log = driver.instance.router.emit_error_handler.log
+    stub_elastic_ping
+    stub_request(:post, 'http://localhost:9200/_bulk')
+        .to_return(lambda do |req|
+      { :status => 200,
+        :headers => { 'Content-Type' => 'json' },
+        :body => %({
+          "took" : 1,
+          "errors" : true,
+          "items" : [
+            {
+              "create" : {
+                "_index" : "foo",
+                "_type"  : "bar",
+                "_id" : "abc"
+              }
+            },
+            {
+              "create" : {
+                "_index" : "foo",
+                "_type"  : "bar",
+                "_id" : "xyz",
+                "status" : 500,
+                "error" : {
+                  "type" : "some unrecognized type",
+                  "reason":"some error to cause version mismatch"
+                }
+              }
+            }
+           ]
+        })
+     }
+    end)
+    sample_record1 = sample_record('my_id' => 'abc')
+    sample_record4 = sample_record('my_id' => 'xyz')
+    driver.emit(sample_record1, 1)
+    driver.emit(sample_record, 2)
+    driver.emit(sample_record, 3)
+    driver.emit(sample_record4, 4)
+    # one record succeeded while the other should be 'retried'
+    driver.expect_emit('test', 4, sample_record4)
+    driver.run
+    assert_logs_include(log.out.logs, /(Missing '_id' field)/, 2)
+  end
   def test_bulk_error
     stub_elastic_ping
     stub_request(:post, 'http://localhost:9200/_bulk')

metadata CHANGED

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: fluent-plugin-elasticsearch
 version: !ruby/object:Gem::Version
-  version: 1.16.2
+  version: 1.17.0
 platform: ruby
 authors:
 - diogo