fluent-plugin-elasticsearch 1.10.3 → 1.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 4ee62e58133f9ebcd78a6ebfa0eb8adb741ea207
4
- data.tar.gz: 848f36e2b35efe4013195a5b79eea21515973962
3
+ metadata.gz: 6d8cd27ced7ebf6f0c8d1a2026032b683af8eadd
4
+ data.tar.gz: 98591a5682b51f4ca97fb206620cec059c57ca74
5
5
  SHA512:
6
- metadata.gz: 5f5f61edcc15c83d546411455603554fc2e2ff7d72cc88b1ccdebed4d2b411ce5cfb5625552ec4536911ecfe9ad02660b3a1634e38dd424f67dd55cb0f75be7a
7
- data.tar.gz: 1508c853846a2142508e06a4e6c213377ede90cf5ae0c691ced885477787eb0e1df1a26c6338fe33d21e77e31dfc8c3ee740317b05e90af45bc7f6e43dae7459
6
+ metadata.gz: c9291c5f41c33732d5dc467e6171446670c1deca8ec79d2c01085e1aeda17bec50206573cede541766a0a6ddce2a566c7d8cc5d2531a410173b2ea07ad98c11e
7
+ data.tar.gz: c7c9c6badfddfd12190837ad504355d2758195c7318fef04b21ce67a0c6a051157a4b89aed074559f9f2696f7af9fc408d2f27ab07db0ae8c6e63214beee64bc
data/History.md CHANGED
@@ -4,6 +4,9 @@
4
4
  - Log ES response errors (#230)
5
5
  - Use latest elasticsearch-ruby (#240)
6
6
 
7
+ ### 1.11.0
8
+ - backport adding bulk errors handling (#324)
9
+
7
10
  ### 1.10.3
8
11
  - releasing generating hash id mechanism to avoid records duplication backport (#323)
9
12
 
@@ -3,7 +3,7 @@ $:.push File.expand_path('../lib', __FILE__)
3
3
 
4
4
  Gem::Specification.new do |s|
5
5
  s.name = 'fluent-plugin-elasticsearch'
6
- s.version = '1.10.3'
6
+ s.version = '1.11.0'
7
7
  s.authors = ['diogo', 'pitr']
8
8
  s.email = ['pitr.vern@gmail.com', 'me@diogoterror.com']
9
9
  s.description = %q{ElasticSearch output plugin for Fluent event collector}
@@ -0,0 +1,11 @@
1
+ module Fluent
2
+ module ElasticsearchConstants
3
+ BODY_DELIMITER = "\n".freeze
4
+ UPDATE_OP = "update".freeze
5
+ UPSERT_OP = "upsert".freeze
6
+ CREATE_OP = "create".freeze
7
+ INDEX_OP = "index".freeze
8
+ ID_FIELD = "_id".freeze
9
+ TIMESTAMP_FIELD = "@timestamp".freeze
10
+ end
11
+ end
@@ -0,0 +1,96 @@
1
+ require_relative 'elasticsearch_constants'
2
+
3
+ class Fluent::ElasticsearchErrorHandler
4
+ include Fluent::ElasticsearchConstants
5
+
6
+ attr_accessor :records, :bulk_message_count
7
+ class BulkIndexQueueFull < StandardError; end
8
+ class ElasticsearchOutOfMemory < StandardError; end
9
+ class ElasticsearchVersionMismatch < StandardError; end
10
+ class UnrecognizedElasticsearchError < StandardError; end
11
+ class ElasticsearchError < StandardError; end
12
+ def initialize(plugin, records = 0, bulk_message_count = 0)
13
+ @plugin = plugin
14
+ @records = records
15
+ @bulk_message_count = bulk_message_count
16
+ end
17
+
18
+ def handle_error(response)
19
+ errors = Hash.new(0)
20
+ errors_bad_resp = 0
21
+ errors_unrecognized = 0
22
+ successes = 0
23
+ duplicates = 0
24
+ bad_arguments = 0
25
+ response['items'].each do |item|
26
+ if item.has_key?(@plugin.write_operation)
27
+ write_operation = @plugin.write_operation
28
+ elsif INDEX_OP == @plugin.write_operation && item.has_key?(CREATE_OP)
29
+ write_operation = CREATE_OP
30
+ else
31
+ # When we don't have an expected ops field, something changed in the API
32
+ # expected return values (ES 2.x)
33
+ errors_bad_resp += 1
34
+ next
35
+ end
36
+ if item[write_operation].has_key?('status')
37
+ status = item[write_operation]['status']
38
+ else
39
+ # When we don't have a status field, something changed in the API
40
+ # expected return values (ES 2.x)
41
+ errors_bad_resp += 1
42
+ next
43
+ end
44
+ case
45
+ when CREATE_OP == write_operation && 409 == status
46
+ duplicates += 1
47
+ when 400 == status
48
+ bad_arguments += 1
49
+ @plugin.log.debug "Elasticsearch rejected document: #{item}"
50
+ when [429, 500].include?(status)
51
+ if item[write_operation].has_key?('error') && item[write_operation]['error'].has_key?('type')
52
+ type = item[write_operation]['error']['type']
53
+ else
54
+ # When we don't have a type field, something changed in the API
55
+ # expected return values (ES 2.x)
56
+ errors_bad_resp += 1
57
+ next
58
+ end
59
+ errors[type] += 1
60
+ when [200, 201].include?(status)
61
+ successes += 1
62
+ else
63
+ errors_unrecognized += 1
64
+ end
65
+ end
66
+ if errors_bad_resp > 0
67
+ msg = "Unable to parse error response from Elasticsearch, likely an API version mismatch #{response}"
68
+ @plugin.log.error msg
69
+ raise ElasticsearchVersionMismatch, msg
70
+ end
71
+ if bad_arguments > 0
72
+ @plugin.log.warn "Elasticsearch rejected #{bad_arguments} documents due to invalid field arguments"
73
+ end
74
+ if duplicates > 0
75
+ @plugin.log.info "Encountered #{duplicates} duplicate(s) of #{successes} indexing chunk, ignoring"
76
+ end
77
+ msg = "Indexed (op = #{@plugin.write_operation}) #{successes} successfully, #{duplicates} duplicate(s), #{bad_arguments} bad argument(s), #{errors_unrecognized} unrecognized error(s)"
78
+ errors.each_key do |key|
79
+ msg << ", #{errors[key]} #{key} error(s)"
80
+ end
81
+ @plugin.log.debug msg
82
+ if errors_unrecognized > 0
83
+ raise UnrecognizedElasticsearchError, "Unrecognized elasticsearch errors returned, retrying #{response}"
84
+ end
85
+ errors.each_key do |key|
86
+ case key
87
+ when 'out_of_memory_error'
88
+ raise ElasticsearchOutOfMemory, "Elasticsearch has exhausted its heap, retrying"
89
+ when 'es_rejected_execution_exception'
90
+ raise BulkIndexQueueFull, "Bulk index queue is full, retrying"
91
+ else
92
+ raise ElasticsearchError, "Elasticsearch errors returned, retrying #{response}"
93
+ end
94
+ end
95
+ end
96
+ end
@@ -10,6 +10,8 @@ rescue LoadError
10
10
  end
11
11
 
12
12
  require 'fluent/output'
13
+ require_relative 'elasticsearch_constants'
14
+ require_relative 'elasticsearch_error_handler'
13
15
  require_relative 'elasticsearch_index_template'
14
16
  require_relative 'generate_hash_id_support'
15
17
 
@@ -69,6 +71,7 @@ class Fluent::ElasticsearchOutput < Fluent::ObjectBufferedOutput
69
71
 
70
72
  include Fluent::ElasticsearchIndexTemplate
71
73
  include Fluent::GenerateHashIdSupport
74
+ include Fluent::ElasticsearchConstants
72
75
 
73
76
  def initialize
74
77
  super
@@ -305,8 +308,10 @@ class Fluent::ElasticsearchOutput < Fluent::ObjectBufferedOutput
305
308
  bulk_message = ''
306
309
  header = {}
307
310
  meta = {}
311
+ @error = Fluent::ElasticsearchErrorHandler.new(self)
308
312
 
309
313
  chunk.msgpack_each do |time, record|
314
+ @error.records += 1
310
315
  next unless record.is_a? Hash
311
316
 
312
317
  if @flatten_hashes
@@ -373,6 +378,7 @@ class Fluent::ElasticsearchOutput < Fluent::ObjectBufferedOutput
373
378
  end
374
379
 
375
380
  append_record_to_messages(@write_operation, meta, header, record, bulk_message)
381
+ @error.bulk_message_count += 1
376
382
  end
377
383
 
378
384
  send_bulk(bulk_message) unless bulk_message.empty?
@@ -391,6 +397,7 @@ class Fluent::ElasticsearchOutput < Fluent::ObjectBufferedOutput
391
397
  begin
392
398
  response = client.bulk body: data
393
399
  if response['errors']
400
+ @error.handle_error(response)
394
401
  log.error "Could not push log to Elasticsearch: #{response}"
395
402
  end
396
403
  rescue *client.transport.host_unreachable_exceptions => e
@@ -1,5 +1,6 @@
1
1
  require 'helper'
2
2
  require 'date'
3
+ require 'json'
3
4
  require 'flexmock/test_unit'
4
5
 
5
6
  class ElasticsearchOutput < Test::Unit::TestCase
@@ -56,6 +57,130 @@ class ElasticsearchOutput < Test::Unit::TestCase
56
57
  end
57
58
  end
58
59
 
60
+ def make_response_body(req, error_el = nil, error_status = nil, error = nil)
61
+ req_index_cmds = req.body.split("\n").map { |r| JSON.parse(r) }
62
+ items = []
63
+ count = 0
64
+ ids = 1
65
+ op = nil
66
+ index = nil
67
+ type = nil
68
+ id = nil
69
+ req_index_cmds.each do |cmd|
70
+ if count.even?
71
+ op = cmd.keys[0]
72
+ index = cmd[op]['_index']
73
+ type = cmd[op]['_type']
74
+ if cmd[op].has_key?('_id')
75
+ id = cmd[op]['_id']
76
+ else
77
+ # Note: this appears to be an undocumented feature of Elasticsearch
78
+ # https://www.elastic.co/guide/en/elasticsearch/reference/2.4/docs-bulk.html
79
+ # When you submit an "index" write_operation, with no "_id" field in the
80
+ # metadata header, Elasticsearch will turn this into a "create"
81
+ # operation in the response.
82
+ if "index" == op
83
+ op = "create"
84
+ end
85
+ id = ids
86
+ ids += 1
87
+ end
88
+ else
89
+ item = {
90
+ op => {
91
+ '_index' => index, '_type' => type, '_id' => id, '_version' => 1,
92
+ '_shards' => { 'total' => 1, 'successful' => 1, 'failed' => 0 },
93
+ 'status' => op == 'create' ? 201 : 200
94
+ }
95
+ }
96
+ items.push(item)
97
+ end
98
+ count += 1
99
+ end
100
+ if !error_el.nil? && !error_status.nil? && !error.nil?
101
+ op = items[error_el].keys[0]
102
+ items[error_el][op].delete('_version')
103
+ items[error_el][op].delete('_shards')
104
+ items[error_el][op]['error'] = error
105
+ items[error_el][op]['status'] = error_status
106
+ errors = true
107
+ else
108
+ errors = false
109
+ end
110
+ @index_cmds = items
111
+ body = { 'took' => 6, 'errors' => errors, 'items' => items }
112
+ return body.to_json
113
+ end
114
+
115
+ def stub_elastic_bad_argument(url="http://localhost:9200/_bulk")
116
+ error = {
117
+ "type" => "mapper_parsing_exception",
118
+ "reason" => "failed to parse [...]",
119
+ "caused_by" => {
120
+ "type" => "illegal_argument_exception",
121
+ "reason" => "Invalid format: \"...\""
122
+ }
123
+ }
124
+ stub_request(:post, url).to_return(lambda { |req| { :status => 200, :body => make_response_body(req, 1, 400, error), :headers => { 'Content-Type' => 'json' } } })
125
+ end
126
+
127
+ def stub_elastic_bulk_error(url="http://localhost:9200/_bulk")
128
+ error = {
129
+ "type" => "some-unrecognized-error",
130
+ "reason" => "some message printed here ...",
131
+ }
132
+ stub_request(:post, url).to_return(lambda { |req| { :status => 200, :body => make_response_body(req, 1, 500, error), :headers => { 'Content-Type' => 'json' } } })
133
+ end
134
+
135
+ def stub_elastic_bulk_rejected(url="http://localhost:9200/_bulk")
136
+ error = {
137
+ "type" => "es_rejected_execution_exception",
138
+ "reason" => "rejected execution of org.elasticsearch.transport.TransportService$4@1a34d37a on EsThreadPoolExecutor[bulk, queue capacity = 50, org.elasticsearch.common.util.concurrent.EsThreadPoolExecutor@312a2162[Running, pool size = 32, active threads = 32, queued tasks = 50, completed tasks = 327053]]"
139
+ }
140
+ stub_request(:post, url).to_return(lambda { |req| { :status => 200, :body => make_response_body(req, 1, 429, error), :headers => { 'Content-Type' => 'json' } } })
141
+ end
142
+
143
+ def stub_elastic_out_of_memory(url="http://localhost:9200/_bulk")
144
+ error = {
145
+ "type" => "out_of_memory_error",
146
+ "reason" => "Java heap space"
147
+ }
148
+ stub_request(:post, url).to_return(lambda { |req| { :status => 200, :body => make_response_body(req, 1, 500, error), :headers => { 'Content-Type' => 'json' } } })
149
+ end
150
+
151
+ def stub_elastic_unrecognized_error(url="http://localhost:9200/_bulk")
152
+ error = {
153
+ "type" => "some-other-type",
154
+ "reason" => "some-other-reason"
155
+ }
156
+ stub_request(:post, url).to_return(lambda { |req| { :status => 200, :body => make_response_body(req, 1, 504, error), :headers => { 'Content-Type' => 'json' } } })
157
+ end
158
+
159
+ def stub_elastic_version_mismatch(url="http://localhost:9200/_bulk")
160
+ error = {
161
+ "category" => "some-other-type",
162
+ "reason" => "some-other-reason"
163
+ }
164
+ stub_request(:post, url).to_return(lambda { |req| { :status => 200, :body => make_response_body(req, 1, 500, error), :headers => { 'Content-Type' => 'json' } } })
165
+ end
166
+
167
+ def stub_elastic_index_to_create(url="http://localhost:9200/_bulk")
168
+ error = {
169
+ "category" => "some-other-type",
170
+ "reason" => "some-other-reason",
171
+ "type" => "some-other-type"
172
+ }
173
+ stub_request(:post, url).to_return(lambda { |req| { :status => 200, :body => make_response_body(req, 0, 500, error), :headers => { 'Content-Type' => 'json' } } })
174
+ end
175
+
176
+ def stub_elastic_unexpected_response_op(url="http://localhost:9200/_bulk")
177
+ error = {
178
+ "category" => "some-other-type",
179
+ "reason" => "some-other-reason"
180
+ }
181
+ stub_request(:post, url).to_return(lambda { |req| bodystr = make_response_body(req, 0, 500, error); body = JSON.parse(bodystr); body['items'][0]['unknown'] = body['items'][0].delete('create'); { :status => 200, :body => body.to_json, :headers => { 'Content-Type' => 'json' } } })
182
+ end
183
+
59
184
  def test_configure
60
185
  config = %{
61
186
  host logs.google.com
@@ -1181,6 +1306,106 @@ class ElasticsearchOutput < Test::Unit::TestCase
1181
1306
  assert_equal(connection_resets, 1)
1182
1307
  end
1183
1308
 
1309
+ def test_bulk_bad_arguments
1310
+ log = driver.instance.router.emit_error_handler.log
1311
+ log.level = 'debug'
1312
+ driver = driver('@log_level debug')
1313
+
1314
+ stub_elastic_ping
1315
+ stub_elastic_bad_argument
1316
+
1317
+ driver.emit(sample_record)
1318
+ driver.emit(sample_record)
1319
+ driver.emit(sample_record)
1320
+ driver.run
1321
+
1322
+ matches = log.out.logs.grep /Elasticsearch rejected document:/
1323
+ assert_equal(1, matches.length, "Message 'Elasticsearch rejected document: ...' was not emitted")
1324
+ matches = log.out.logs.grep /documents due to invalid field arguments/
1325
+ assert_equal(1, matches.length, "Message 'Elasticsearch rejected # documents due to invalid field arguments ...' was not emitted")
1326
+ end
1327
+
1328
+ def test_bulk_error
1329
+ stub_elastic_ping
1330
+ stub_elastic_bulk_error
1331
+
1332
+ assert_raise(Fluent::ElasticsearchErrorHandler::ElasticsearchError) {
1333
+ driver.emit(sample_record)
1334
+ driver.emit(sample_record)
1335
+ driver.emit(sample_record)
1336
+ driver.run
1337
+ }
1338
+ end
1339
+
1340
+ def test_bulk_error_version_mismatch
1341
+ stub_elastic_ping
1342
+ stub_elastic_version_mismatch
1343
+
1344
+ assert_raise(Fluent::ElasticsearchErrorHandler::ElasticsearchVersionMismatch) {
1345
+ driver.emit(sample_record)
1346
+ driver.emit(sample_record)
1347
+ driver.emit(sample_record)
1348
+ driver.run
1349
+ }
1350
+ end
1351
+
1352
+ def test_bulk_error_unrecognized_error
1353
+ stub_elastic_ping
1354
+ stub_elastic_unrecognized_error
1355
+
1356
+ assert_raise(Fluent::ElasticsearchErrorHandler::UnrecognizedElasticsearchError) {
1357
+ driver.emit(sample_record)
1358
+ driver.emit(sample_record)
1359
+ driver.emit(sample_record)
1360
+ driver.run
1361
+ }
1362
+ end
1363
+
1364
+ def test_bulk_error_out_of_memory
1365
+ stub_elastic_ping
1366
+ stub_elastic_out_of_memory
1367
+
1368
+ assert_raise(Fluent::ElasticsearchErrorHandler::ElasticsearchOutOfMemory) {
1369
+ driver.emit(sample_record)
1370
+ driver.emit(sample_record)
1371
+ driver.emit(sample_record)
1372
+ driver.run
1373
+ }
1374
+ end
1375
+
1376
+ def test_bulk_error_queue_full
1377
+ stub_elastic_ping
1378
+ stub_elastic_bulk_rejected
1379
+
1380
+ assert_raise(Fluent::ElasticsearchErrorHandler::BulkIndexQueueFull) {
1381
+ driver.emit(sample_record)
1382
+ driver.emit(sample_record)
1383
+ driver.emit(sample_record)
1384
+ driver.run
1385
+ }
1386
+ end
1387
+
1388
+ def test_bulk_index_into_a_create
1389
+ stub_elastic_ping
1390
+ stub_elastic_index_to_create
1391
+
1392
+ assert_raise(Fluent::ElasticsearchErrorHandler::ElasticsearchError) {
1393
+ driver.emit(sample_record)
1394
+ driver.run
1395
+ }
1396
+ assert(index_cmds[0].has_key?("create"))
1397
+ end
1398
+
1399
+ def test_bulk_unexpected_response_op
1400
+ stub_elastic_ping
1401
+ stub_elastic_unexpected_response_op
1402
+
1403
+ assert_raise(Fluent::ElasticsearchErrorHandler::ElasticsearchVersionMismatch) {
1404
+ driver.emit(sample_record)
1405
+ driver.run
1406
+ }
1407
+ end
1408
+
1184
1409
  def test_update_should_not_write_if_theres_no_id
1185
1410
  driver.configure("write_operation update\n")
1186
1411
  stub_elastic_ping
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fluent-plugin-elasticsearch
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.10.3
4
+ version: 1.11.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - diogo
@@ -144,6 +144,8 @@ files:
144
144
  - README.md
145
145
  - Rakefile
146
146
  - fluent-plugin-elasticsearch.gemspec
147
+ - lib/fluent/plugin/elasticsearch_constants.rb
148
+ - lib/fluent/plugin/elasticsearch_error_handler.rb
147
149
  - lib/fluent/plugin/elasticsearch_index_template.rb
148
150
  - lib/fluent/plugin/generate_hash_id_support.rb
149
151
  - lib/fluent/plugin/out_elasticsearch.rb