fluent-plugin-elasticsearch 1.10.3 → 1.11.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 4ee62e58133f9ebcd78a6ebfa0eb8adb741ea207
4
- data.tar.gz: 848f36e2b35efe4013195a5b79eea21515973962
3
+ metadata.gz: 6d8cd27ced7ebf6f0c8d1a2026032b683af8eadd
4
+ data.tar.gz: 98591a5682b51f4ca97fb206620cec059c57ca74
5
5
  SHA512:
6
- metadata.gz: 5f5f61edcc15c83d546411455603554fc2e2ff7d72cc88b1ccdebed4d2b411ce5cfb5625552ec4536911ecfe9ad02660b3a1634e38dd424f67dd55cb0f75be7a
7
- data.tar.gz: 1508c853846a2142508e06a4e6c213377ede90cf5ae0c691ced885477787eb0e1df1a26c6338fe33d21e77e31dfc8c3ee740317b05e90af45bc7f6e43dae7459
6
+ metadata.gz: c9291c5f41c33732d5dc467e6171446670c1deca8ec79d2c01085e1aeda17bec50206573cede541766a0a6ddce2a566c7d8cc5d2531a410173b2ea07ad98c11e
7
+ data.tar.gz: c7c9c6badfddfd12190837ad504355d2758195c7318fef04b21ce67a0c6a051157a4b89aed074559f9f2696f7af9fc408d2f27ab07db0ae8c6e63214beee64bc
data/History.md CHANGED
@@ -4,6 +4,9 @@
4
4
  - Log ES response errors (#230)
5
5
  - Use latest elasticsearch-ruby (#240)
6
6
 
7
+ ### 1.11.0
8
+ - backport adding bulk errors handling (#324)
9
+
7
10
  ### 1.10.3
8
11
  - releasing generating hash id mechanism to avoid records duplication backport (#323)
9
12
 
@@ -3,7 +3,7 @@ $:.push File.expand_path('../lib', __FILE__)
3
3
 
4
4
  Gem::Specification.new do |s|
5
5
  s.name = 'fluent-plugin-elasticsearch'
6
- s.version = '1.10.3'
6
+ s.version = '1.11.0'
7
7
  s.authors = ['diogo', 'pitr']
8
8
  s.email = ['pitr.vern@gmail.com', 'me@diogoterror.com']
9
9
  s.description = %q{ElasticSearch output plugin for Fluent event collector}
@@ -0,0 +1,11 @@
1
+ module Fluent
2
+ module ElasticsearchConstants
3
+ BODY_DELIMITER = "\n".freeze
4
+ UPDATE_OP = "update".freeze
5
+ UPSERT_OP = "upsert".freeze
6
+ CREATE_OP = "create".freeze
7
+ INDEX_OP = "index".freeze
8
+ ID_FIELD = "_id".freeze
9
+ TIMESTAMP_FIELD = "@timestamp".freeze
10
+ end
11
+ end
@@ -0,0 +1,96 @@
1
+ require_relative 'elasticsearch_constants'
2
+
3
+ class Fluent::ElasticsearchErrorHandler
4
+ include Fluent::ElasticsearchConstants
5
+
6
+ attr_accessor :records, :bulk_message_count
7
+ class BulkIndexQueueFull < StandardError; end
8
+ class ElasticsearchOutOfMemory < StandardError; end
9
+ class ElasticsearchVersionMismatch < StandardError; end
10
+ class UnrecognizedElasticsearchError < StandardError; end
11
+ class ElasticsearchError < StandardError; end
12
+ def initialize(plugin, records = 0, bulk_message_count = 0)
13
+ @plugin = plugin
14
+ @records = records
15
+ @bulk_message_count = bulk_message_count
16
+ end
17
+
18
+ def handle_error(response)
19
+ errors = Hash.new(0)
20
+ errors_bad_resp = 0
21
+ errors_unrecognized = 0
22
+ successes = 0
23
+ duplicates = 0
24
+ bad_arguments = 0
25
+ response['items'].each do |item|
26
+ if item.has_key?(@plugin.write_operation)
27
+ write_operation = @plugin.write_operation
28
+ elsif INDEX_OP == @plugin.write_operation && item.has_key?(CREATE_OP)
29
+ write_operation = CREATE_OP
30
+ else
31
+ # When we don't have an expected ops field, something changed in the API
32
+ # expected return values (ES 2.x)
33
+ errors_bad_resp += 1
34
+ next
35
+ end
36
+ if item[write_operation].has_key?('status')
37
+ status = item[write_operation]['status']
38
+ else
39
+ # When we don't have a status field, something changed in the API
40
+ # expected return values (ES 2.x)
41
+ errors_bad_resp += 1
42
+ next
43
+ end
44
+ case
45
+ when CREATE_OP == write_operation && 409 == status
46
+ duplicates += 1
47
+ when 400 == status
48
+ bad_arguments += 1
49
+ @plugin.log.debug "Elasticsearch rejected document: #{item}"
50
+ when [429, 500].include?(status)
51
+ if item[write_operation].has_key?('error') && item[write_operation]['error'].has_key?('type')
52
+ type = item[write_operation]['error']['type']
53
+ else
54
+ # When we don't have a type field, something changed in the API
55
+ # expected return values (ES 2.x)
56
+ errors_bad_resp += 1
57
+ next
58
+ end
59
+ errors[type] += 1
60
+ when [200, 201].include?(status)
61
+ successes += 1
62
+ else
63
+ errors_unrecognized += 1
64
+ end
65
+ end
66
+ if errors_bad_resp > 0
67
+ msg = "Unable to parse error response from Elasticsearch, likely an API version mismatch #{response}"
68
+ @plugin.log.error msg
69
+ raise ElasticsearchVersionMismatch, msg
70
+ end
71
+ if bad_arguments > 0
72
+ @plugin.log.warn "Elasticsearch rejected #{bad_arguments} documents due to invalid field arguments"
73
+ end
74
+ if duplicates > 0
75
+ @plugin.log.info "Encountered #{duplicates} duplicate(s) of #{successes} indexing chunk, ignoring"
76
+ end
77
+ msg = "Indexed (op = #{@plugin.write_operation}) #{successes} successfully, #{duplicates} duplicate(s), #{bad_arguments} bad argument(s), #{errors_unrecognized} unrecognized error(s)"
78
+ errors.each_key do |key|
79
+ msg << ", #{errors[key]} #{key} error(s)"
80
+ end
81
+ @plugin.log.debug msg
82
+ if errors_unrecognized > 0
83
+ raise UnrecognizedElasticsearchError, "Unrecognized elasticsearch errors returned, retrying #{response}"
84
+ end
85
+ errors.each_key do |key|
86
+ case key
87
+ when 'out_of_memory_error'
88
+ raise ElasticsearchOutOfMemory, "Elasticsearch has exhausted its heap, retrying"
89
+ when 'es_rejected_execution_exception'
90
+ raise BulkIndexQueueFull, "Bulk index queue is full, retrying"
91
+ else
92
+ raise ElasticsearchError, "Elasticsearch errors returned, retrying #{response}"
93
+ end
94
+ end
95
+ end
96
+ end
@@ -10,6 +10,8 @@ rescue LoadError
10
10
  end
11
11
 
12
12
  require 'fluent/output'
13
+ require_relative 'elasticsearch_constants'
14
+ require_relative 'elasticsearch_error_handler'
13
15
  require_relative 'elasticsearch_index_template'
14
16
  require_relative 'generate_hash_id_support'
15
17
 
@@ -69,6 +71,7 @@ class Fluent::ElasticsearchOutput < Fluent::ObjectBufferedOutput
69
71
 
70
72
  include Fluent::ElasticsearchIndexTemplate
71
73
  include Fluent::GenerateHashIdSupport
74
+ include Fluent::ElasticsearchConstants
72
75
 
73
76
  def initialize
74
77
  super
@@ -305,8 +308,10 @@ class Fluent::ElasticsearchOutput < Fluent::ObjectBufferedOutput
305
308
  bulk_message = ''
306
309
  header = {}
307
310
  meta = {}
311
+ @error = Fluent::ElasticsearchErrorHandler.new(self)
308
312
 
309
313
  chunk.msgpack_each do |time, record|
314
+ @error.records += 1
310
315
  next unless record.is_a? Hash
311
316
 
312
317
  if @flatten_hashes
@@ -373,6 +378,7 @@ class Fluent::ElasticsearchOutput < Fluent::ObjectBufferedOutput
373
378
  end
374
379
 
375
380
  append_record_to_messages(@write_operation, meta, header, record, bulk_message)
381
+ @error.bulk_message_count += 1
376
382
  end
377
383
 
378
384
  send_bulk(bulk_message) unless bulk_message.empty?
@@ -391,6 +397,7 @@ class Fluent::ElasticsearchOutput < Fluent::ObjectBufferedOutput
391
397
  begin
392
398
  response = client.bulk body: data
393
399
  if response['errors']
400
+ @error.handle_error(response)
394
401
  log.error "Could not push log to Elasticsearch: #{response}"
395
402
  end
396
403
  rescue *client.transport.host_unreachable_exceptions => e
@@ -1,5 +1,6 @@
1
1
  require 'helper'
2
2
  require 'date'
3
+ require 'json'
3
4
  require 'flexmock/test_unit'
4
5
 
5
6
  class ElasticsearchOutput < Test::Unit::TestCase
@@ -56,6 +57,130 @@ class ElasticsearchOutput < Test::Unit::TestCase
56
57
  end
57
58
  end
58
59
 
60
+ def make_response_body(req, error_el = nil, error_status = nil, error = nil)
61
+ req_index_cmds = req.body.split("\n").map { |r| JSON.parse(r) }
62
+ items = []
63
+ count = 0
64
+ ids = 1
65
+ op = nil
66
+ index = nil
67
+ type = nil
68
+ id = nil
69
+ req_index_cmds.each do |cmd|
70
+ if count.even?
71
+ op = cmd.keys[0]
72
+ index = cmd[op]['_index']
73
+ type = cmd[op]['_type']
74
+ if cmd[op].has_key?('_id')
75
+ id = cmd[op]['_id']
76
+ else
77
+ # Note: this appears to be an undocumented feature of Elasticsearch
78
+ # https://www.elastic.co/guide/en/elasticsearch/reference/2.4/docs-bulk.html
79
+ # When you submit an "index" write_operation, with no "_id" field in the
80
+ # metadata header, Elasticsearch will turn this into a "create"
81
+ # operation in the response.
82
+ if "index" == op
83
+ op = "create"
84
+ end
85
+ id = ids
86
+ ids += 1
87
+ end
88
+ else
89
+ item = {
90
+ op => {
91
+ '_index' => index, '_type' => type, '_id' => id, '_version' => 1,
92
+ '_shards' => { 'total' => 1, 'successful' => 1, 'failed' => 0 },
93
+ 'status' => op == 'create' ? 201 : 200
94
+ }
95
+ }
96
+ items.push(item)
97
+ end
98
+ count += 1
99
+ end
100
+ if !error_el.nil? && !error_status.nil? && !error.nil?
101
+ op = items[error_el].keys[0]
102
+ items[error_el][op].delete('_version')
103
+ items[error_el][op].delete('_shards')
104
+ items[error_el][op]['error'] = error
105
+ items[error_el][op]['status'] = error_status
106
+ errors = true
107
+ else
108
+ errors = false
109
+ end
110
+ @index_cmds = items
111
+ body = { 'took' => 6, 'errors' => errors, 'items' => items }
112
+ return body.to_json
113
+ end
114
+
115
+ def stub_elastic_bad_argument(url="http://localhost:9200/_bulk")
116
+ error = {
117
+ "type" => "mapper_parsing_exception",
118
+ "reason" => "failed to parse [...]",
119
+ "caused_by" => {
120
+ "type" => "illegal_argument_exception",
121
+ "reason" => "Invalid format: \"...\""
122
+ }
123
+ }
124
+ stub_request(:post, url).to_return(lambda { |req| { :status => 200, :body => make_response_body(req, 1, 400, error), :headers => { 'Content-Type' => 'json' } } })
125
+ end
126
+
127
+ def stub_elastic_bulk_error(url="http://localhost:9200/_bulk")
128
+ error = {
129
+ "type" => "some-unrecognized-error",
130
+ "reason" => "some message printed here ...",
131
+ }
132
+ stub_request(:post, url).to_return(lambda { |req| { :status => 200, :body => make_response_body(req, 1, 500, error), :headers => { 'Content-Type' => 'json' } } })
133
+ end
134
+
135
+ def stub_elastic_bulk_rejected(url="http://localhost:9200/_bulk")
136
+ error = {
137
+ "type" => "es_rejected_execution_exception",
138
+ "reason" => "rejected execution of org.elasticsearch.transport.TransportService$4@1a34d37a on EsThreadPoolExecutor[bulk, queue capacity = 50, org.elasticsearch.common.util.concurrent.EsThreadPoolExecutor@312a2162[Running, pool size = 32, active threads = 32, queued tasks = 50, completed tasks = 327053]]"
139
+ }
140
+ stub_request(:post, url).to_return(lambda { |req| { :status => 200, :body => make_response_body(req, 1, 429, error), :headers => { 'Content-Type' => 'json' } } })
141
+ end
142
+
143
+ def stub_elastic_out_of_memory(url="http://localhost:9200/_bulk")
144
+ error = {
145
+ "type" => "out_of_memory_error",
146
+ "reason" => "Java heap space"
147
+ }
148
+ stub_request(:post, url).to_return(lambda { |req| { :status => 200, :body => make_response_body(req, 1, 500, error), :headers => { 'Content-Type' => 'json' } } })
149
+ end
150
+
151
+ def stub_elastic_unrecognized_error(url="http://localhost:9200/_bulk")
152
+ error = {
153
+ "type" => "some-other-type",
154
+ "reason" => "some-other-reason"
155
+ }
156
+ stub_request(:post, url).to_return(lambda { |req| { :status => 200, :body => make_response_body(req, 1, 504, error), :headers => { 'Content-Type' => 'json' } } })
157
+ end
158
+
159
+ def stub_elastic_version_mismatch(url="http://localhost:9200/_bulk")
160
+ error = {
161
+ "category" => "some-other-type",
162
+ "reason" => "some-other-reason"
163
+ }
164
+ stub_request(:post, url).to_return(lambda { |req| { :status => 200, :body => make_response_body(req, 1, 500, error), :headers => { 'Content-Type' => 'json' } } })
165
+ end
166
+
167
+ def stub_elastic_index_to_create(url="http://localhost:9200/_bulk")
168
+ error = {
169
+ "category" => "some-other-type",
170
+ "reason" => "some-other-reason",
171
+ "type" => "some-other-type"
172
+ }
173
+ stub_request(:post, url).to_return(lambda { |req| { :status => 200, :body => make_response_body(req, 0, 500, error), :headers => { 'Content-Type' => 'json' } } })
174
+ end
175
+
176
+ def stub_elastic_unexpected_response_op(url="http://localhost:9200/_bulk")
177
+ error = {
178
+ "category" => "some-other-type",
179
+ "reason" => "some-other-reason"
180
+ }
181
+ stub_request(:post, url).to_return(lambda { |req| bodystr = make_response_body(req, 0, 500, error); body = JSON.parse(bodystr); body['items'][0]['unknown'] = body['items'][0].delete('create'); { :status => 200, :body => body.to_json, :headers => { 'Content-Type' => 'json' } } })
182
+ end
183
+
59
184
  def test_configure
60
185
  config = %{
61
186
  host logs.google.com
@@ -1181,6 +1306,106 @@ class ElasticsearchOutput < Test::Unit::TestCase
1181
1306
  assert_equal(connection_resets, 1)
1182
1307
  end
1183
1308
 
1309
+ def test_bulk_bad_arguments
1310
+ log = driver.instance.router.emit_error_handler.log
1311
+ log.level = 'debug'
1312
+ driver = driver('@log_level debug')
1313
+
1314
+ stub_elastic_ping
1315
+ stub_elastic_bad_argument
1316
+
1317
+ driver.emit(sample_record)
1318
+ driver.emit(sample_record)
1319
+ driver.emit(sample_record)
1320
+ driver.run
1321
+
1322
+ matches = log.out.logs.grep /Elasticsearch rejected document:/
1323
+ assert_equal(1, matches.length, "Message 'Elasticsearch rejected document: ...' was not emitted")
1324
+ matches = log.out.logs.grep /documents due to invalid field arguments/
1325
+ assert_equal(1, matches.length, "Message 'Elasticsearch rejected # documents due to invalid field arguments ...' was not emitted")
1326
+ end
1327
+
1328
+ def test_bulk_error
1329
+ stub_elastic_ping
1330
+ stub_elastic_bulk_error
1331
+
1332
+ assert_raise(Fluent::ElasticsearchErrorHandler::ElasticsearchError) {
1333
+ driver.emit(sample_record)
1334
+ driver.emit(sample_record)
1335
+ driver.emit(sample_record)
1336
+ driver.run
1337
+ }
1338
+ end
1339
+
1340
+ def test_bulk_error_version_mismatch
1341
+ stub_elastic_ping
1342
+ stub_elastic_version_mismatch
1343
+
1344
+ assert_raise(Fluent::ElasticsearchErrorHandler::ElasticsearchVersionMismatch) {
1345
+ driver.emit(sample_record)
1346
+ driver.emit(sample_record)
1347
+ driver.emit(sample_record)
1348
+ driver.run
1349
+ }
1350
+ end
1351
+
1352
+ def test_bulk_error_unrecognized_error
1353
+ stub_elastic_ping
1354
+ stub_elastic_unrecognized_error
1355
+
1356
+ assert_raise(Fluent::ElasticsearchErrorHandler::UnrecognizedElasticsearchError) {
1357
+ driver.emit(sample_record)
1358
+ driver.emit(sample_record)
1359
+ driver.emit(sample_record)
1360
+ driver.run
1361
+ }
1362
+ end
1363
+
1364
+ def test_bulk_error_out_of_memory
1365
+ stub_elastic_ping
1366
+ stub_elastic_out_of_memory
1367
+
1368
+ assert_raise(Fluent::ElasticsearchErrorHandler::ElasticsearchOutOfMemory) {
1369
+ driver.emit(sample_record)
1370
+ driver.emit(sample_record)
1371
+ driver.emit(sample_record)
1372
+ driver.run
1373
+ }
1374
+ end
1375
+
1376
+ def test_bulk_error_queue_full
1377
+ stub_elastic_ping
1378
+ stub_elastic_bulk_rejected
1379
+
1380
+ assert_raise(Fluent::ElasticsearchErrorHandler::BulkIndexQueueFull) {
1381
+ driver.emit(sample_record)
1382
+ driver.emit(sample_record)
1383
+ driver.emit(sample_record)
1384
+ driver.run
1385
+ }
1386
+ end
1387
+
1388
+ def test_bulk_index_into_a_create
1389
+ stub_elastic_ping
1390
+ stub_elastic_index_to_create
1391
+
1392
+ assert_raise(Fluent::ElasticsearchErrorHandler::ElasticsearchError) {
1393
+ driver.emit(sample_record)
1394
+ driver.run
1395
+ }
1396
+ assert(index_cmds[0].has_key?("create"))
1397
+ end
1398
+
1399
+ def test_bulk_unexpected_response_op
1400
+ stub_elastic_ping
1401
+ stub_elastic_unexpected_response_op
1402
+
1403
+ assert_raise(Fluent::ElasticsearchErrorHandler::ElasticsearchVersionMismatch) {
1404
+ driver.emit(sample_record)
1405
+ driver.run
1406
+ }
1407
+ end
1408
+
1184
1409
  def test_update_should_not_write_if_theres_no_id
1185
1410
  driver.configure("write_operation update\n")
1186
1411
  stub_elastic_ping
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fluent-plugin-elasticsearch
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.10.3
4
+ version: 1.11.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - diogo
@@ -144,6 +144,8 @@ files:
144
144
  - README.md
145
145
  - Rakefile
146
146
  - fluent-plugin-elasticsearch.gemspec
147
+ - lib/fluent/plugin/elasticsearch_constants.rb
148
+ - lib/fluent/plugin/elasticsearch_error_handler.rb
147
149
  - lib/fluent/plugin/elasticsearch_index_template.rb
148
150
  - lib/fluent/plugin/generate_hash_id_support.rb
149
151
  - lib/fluent/plugin/out_elasticsearch.rb