logstash-output-kafka 5.1.10 → 5.1.11

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 1bcc9ef8d79cfeae2d45b85067cd5e918bbb48c9
4
- data.tar.gz: 4600a285a1630607f8f0f473587bbc338803d0ed
3
+ metadata.gz: ab8c64c41a0113d3e77d4d74adac247424d6a7c5
4
+ data.tar.gz: ea85a76f270acd51915e1a1a7a80da0e83e9e82e
5
5
  SHA512:
6
- metadata.gz: 0d58a1bf85115b8ed45395ea08b22adbd22e40cb3f645e9633de1d3b95fa6cbce7c106acbc9061ef82924cec53fa4c7ee7222204f4a4886ece75e7ef75e9b1d4
7
- data.tar.gz: a65b714676d04380af86949dc60563d45d7182d72db86232bc700e1b10c328ed3ffa105a9ec5b9bbf1b30dfb65d773e08fd7f08c740531af8d354f6b26040a86
6
+ metadata.gz: 18927151780d0b0d085e5472b9d8d6a07601bd847163a3bd6578b9359227c73344d4f24ef1ed498d756e3fea8594860e3cafd0ddb15a2eb566b14ed3a340f890
7
+ data.tar.gz: 76f4b59eec073cefafe18db46fae720cdede9a234d39aab6da3acc4e6588921e089e076afc8a2cd4542ba81572322229fd9e9332a66d38ea8420e5f7a49899f4
data/CHANGELOG.md CHANGED
@@ -1,3 +1,12 @@
1
+ ## 5.1.11
2
+ - Bugfix: Sends are now retried until successful. Previously, failed transmissions to Kafka
3
+ could have been lost by the KafkaProducer library. Now we verify transmission explicitly.
4
+ This changes the default 'retry' from 0 to retry-forever. It was a bug that we defaulted
5
+ to a retry count of 0.
6
+ https://github.com/logstash-plugins/logstash-output-kafka/pull/151
7
+
8
+ - Docs: Fix misleading info about the default codec
9
+
1
10
  ## 5.1.10
2
11
  - Doc fixes
3
12
 
@@ -111,9 +111,15 @@ class LogStash::Outputs::Kafka < LogStash::Outputs::Base
111
111
  # elapses the client will resend the request if necessary or fail the request if
112
112
  # retries are exhausted.
113
113
  config :request_timeout_ms, :validate => :string
114
- # Setting a value greater than zero will cause the client to
115
- # resend any record whose send fails with a potentially transient error.
116
- config :retries, :validate => :number, :default => 0
114
+ # The default retry behavior is to retry until successful. To prevent data loss,
115
+ # the use of this setting is discouraged.
116
+ #
117
+ # If you choose to set `retries`, a value greater than zero will cause the
118
+ # client to only retry a fixed number of times. This will result in data loss
119
+ # if a transient error outlasts your retry count.
120
+ #
121
+ # A value less than zero is a configuration error.
122
+ config :retries, :validate => :number
117
123
  # The amount of time to wait before attempting to retry a failed produce request to a given topic partition.
118
124
  config :retry_backoff_ms, :validate => :number, :default => 100
119
125
  # The size of the TCP send buffer to use when sending data.
@@ -175,6 +181,17 @@ class LogStash::Outputs::Kafka < LogStash::Outputs::Base
175
181
 
176
182
  public
177
183
  def register
184
+ @thread_batch_map = Concurrent::Hash.new
185
+
186
+ if !@retries.nil?
187
+ if @retries < 0
188
+ raise ConfigurationError, "A negative retry count (#{@retries}) is not valid. Must be a value >= 0"
189
+ end
190
+
191
+ @logger.warn("Kafka output is configured with finite retry. This instructs Logstash to LOSE DATA after a set number of send attempts fails. If you do not want to lose data if Kafka is down, then you must remove the retry setting.", :retries => @retries)
192
+ end
193
+
194
+
178
195
  @producer = create_producer
179
196
  @codec.on_event do |event, data|
180
197
  begin
@@ -183,7 +200,7 @@ class LogStash::Outputs::Kafka < LogStash::Outputs::Base
183
200
  else
184
201
  record = org.apache.kafka.clients.producer.ProducerRecord.new(event.sprintf(@topic_id), event.sprintf(@message_key), data)
185
202
  end
186
- @producer.send(record)
203
+ prepare(record)
187
204
  rescue LogStash::ShutdownSignal
188
205
  @logger.debug('Kafka producer got shutdown signal')
189
206
  rescue => e
@@ -191,14 +208,89 @@ class LogStash::Outputs::Kafka < LogStash::Outputs::Base
191
208
  :exception => e)
192
209
  end
193
210
  end
194
-
195
211
  end # def register
196
212
 
197
- def receive(event)
198
- if event == LogStash::SHUTDOWN
199
- return
213
+ def prepare(record)
214
+ # This output is threadsafe, so we need to keep a batch per thread.
215
+ @thread_batch_map[Thread.current].add(record)
216
+ end
217
+
218
+ def multi_receive(events)
219
+ t = Thread.current
220
+ if !@thread_batch_map.include?(t)
221
+ @thread_batch_map[t] = java.util.ArrayList.new(events.size)
222
+ end
223
+
224
+ events.each do |event|
225
+ break if event == LogStash::SHUTDOWN
226
+ @codec.encode(event)
227
+ end
228
+
229
+ batch = @thread_batch_map[t]
230
+ if batch.any?
231
+ retrying_send(batch)
232
+ batch.clear
200
233
  end
201
- @codec.encode(event)
234
+ end
235
+
236
+ def retrying_send(batch)
237
+ remaining = @retries;
238
+
239
+ while batch.any?
240
+ if !remaining.nil?
241
+ if remaining < 0
242
+ # TODO(sissel): Offer to DLQ? Then again, if it's a transient fault,
243
+ # DLQing would make things worse (you dlq data that would be successful
244
+ # after the fault is repaired)
245
+ logger.info("Exhausted user-configured retry count when sending to Kafka. Dropping these events.",
246
+ :max_retries => @retries, :drop_count => batch.count)
247
+ break
248
+ end
249
+
250
+ remaining -= 1
251
+ end
252
+
253
+ failures = []
254
+
255
+ futures = batch.collect do |record|
256
+ begin
257
+ # send() can throw an exception even before the future is created.
258
+ @producer.send(record)
259
+ rescue org.apache.kafka.common.errors.TimeoutException => e
260
+ failures << record
261
+ nil
262
+ rescue org.apache.kafka.common.errors.InterruptException => e
263
+ failures << record
264
+ nil
265
+ rescue org.apache.kafka.common.errors.SerializationException => e
266
+ # TODO(sissel): Retrying will fail because the data itself has a problem serializing.
267
+ # TODO(sissel): Let's add DLQ here.
268
+ failures << record
269
+ nil
270
+ end
271
+ end.compact
272
+
273
+ futures.each_with_index do |future, i|
274
+ begin
275
+ result = future.get()
276
+ rescue => e
277
+ # TODO(sissel): Add metric to count failures, possibly by exception type.
278
+ logger.debug? && logger.debug("KafkaProducer.send() failed: #{e}", :exception => e);
279
+ failures << batch[i]
280
+ end
281
+ end
282
+
283
+ # No failures? Cool. Let's move on.
284
+ break if failures.empty?
285
+
286
+ # Otherwise, retry with any failed transmissions
287
+ batch = failures
288
+ delay = 1.0 / @retry_backoff_ms
289
+ logger.info("Sending batch to Kafka failed. Will retry after a delay.", :batch_size => batch.size,
290
+ :failures => failures.size, :sleep => delay);
291
+ sleep(delay)
292
+ end
293
+
202
294
  end
203
295
 
204
296
  def close
@@ -222,8 +314,8 @@ class LogStash::Outputs::Kafka < LogStash::Outputs::Base
222
314
  props.put(kafka::MAX_REQUEST_SIZE_CONFIG, max_request_size.to_s)
223
315
  props.put(kafka::RECONNECT_BACKOFF_MS_CONFIG, reconnect_backoff_ms) unless reconnect_backoff_ms.nil?
224
316
  props.put(kafka::REQUEST_TIMEOUT_MS_CONFIG, request_timeout_ms) unless request_timeout_ms.nil?
225
- props.put(kafka::RETRIES_CONFIG, retries.to_s)
226
- props.put(kafka::RETRY_BACKOFF_MS_CONFIG, retry_backoff_ms.to_s)
317
+ props.put(kafka::RETRIES_CONFIG, retries.to_s) unless retries.nil?
318
+ props.put(kafka::RETRY_BACKOFF_MS_CONFIG, retry_backoff_ms.to_s)
227
319
  props.put(kafka::SEND_BUFFER_CONFIG, send_buffer_bytes.to_s)
228
320
  props.put(kafka::VALUE_SERIALIZER_CLASS_CONFIG, value_serializer)
229
321
 
@@ -1,7 +1,7 @@
1
1
  Gem::Specification.new do |s|
2
2
 
3
3
  s.name = 'logstash-output-kafka'
4
- s.version = '5.1.10'
4
+ s.version = '5.1.11'
5
5
  s.licenses = ['Apache License (2.0)']
6
6
  s.summary = 'Output events to a Kafka topic. This uses the Kafka Producer API to write messages to a topic on the broker'
7
7
  s.description = "This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program"
@@ -157,7 +157,7 @@ describe "outputs/kafka", :integration => true do
157
157
  def load_kafka_data(config)
158
158
  kafka = LogStash::Outputs::Kafka.new(config)
159
159
  kafka.register
160
- num_events.times do kafka.receive(event) end
160
+ kafka.multi_receive(num_events.times.collect { event })
161
161
  kafka.close
162
162
  end
163
163
 
@@ -25,34 +25,118 @@ describe "outputs/kafka" do
25
25
  context 'when outputting messages' do
26
26
  it 'should send logstash event to kafka broker' do
27
27
  expect_any_instance_of(org.apache.kafka.clients.producer.KafkaProducer).to receive(:send)
28
- .with(an_instance_of(org.apache.kafka.clients.producer.ProducerRecord))
28
+ .with(an_instance_of(org.apache.kafka.clients.producer.ProducerRecord)).and_call_original
29
29
  kafka = LogStash::Outputs::Kafka.new(simple_kafka_config)
30
30
  kafka.register
31
- kafka.receive(event)
31
+ kafka.multi_receive([event])
32
32
  end
33
33
 
34
34
  it 'should support Event#sprintf placeholders in topic_id' do
35
35
  topic_field = 'topic_name'
36
36
  expect(org.apache.kafka.clients.producer.ProducerRecord).to receive(:new)
37
- .with("my_topic", event.to_s)
38
- expect_any_instance_of(org.apache.kafka.clients.producer.KafkaProducer).to receive(:send)
37
+ .with("my_topic", event.to_s).and_call_original
38
+ expect_any_instance_of(org.apache.kafka.clients.producer.KafkaProducer).to receive(:send).and_call_original
39
39
  kafka = LogStash::Outputs::Kafka.new({'topic_id' => "%{#{topic_field}}"})
40
40
  kafka.register
41
- kafka.receive(event)
41
+ kafka.multi_receive([event])
42
42
  end
43
43
 
44
44
  it 'should support field referenced message_keys' do
45
45
  expect(org.apache.kafka.clients.producer.ProducerRecord).to receive(:new)
46
- .with("test", "172.0.0.1", event.to_s)
47
- expect_any_instance_of(org.apache.kafka.clients.producer.KafkaProducer).to receive(:send)
46
+ .with("test", "172.0.0.1", event.to_s).and_call_original
47
+ expect_any_instance_of(org.apache.kafka.clients.producer.KafkaProducer).to receive(:send).and_call_original
48
48
  kafka = LogStash::Outputs::Kafka.new(simple_kafka_config.merge({"message_key" => "%{host}"}))
49
49
  kafka.register
50
- kafka.receive(event)
50
+ kafka.multi_receive([event])
51
51
  end
52
52
 
53
53
  it 'should raise config error when truststore location is not set and ssl is enabled' do
54
- kafka = LogStash::Outputs::Kafka.new(simple_kafka_config.merge({"ssl" => "true"}))
54
+ kafka = LogStash::Outputs::Kafka.new(simple_kafka_config.merge("security_protocol" => "SSL"))
55
55
  expect { kafka.register }.to raise_error(LogStash::ConfigurationError, /ssl_truststore_location must be set when SSL is enabled/)
56
56
  end
57
57
  end
58
+
59
+ context "when KafkaProducer#send() raises an exception" do
60
+ let(:failcount) { (rand * 10).to_i }
61
+ let(:sendcount) { failcount + 1 }
62
+
63
+ let(:exception_classes) { [
64
+ org.apache.kafka.common.errors.TimeoutException,
65
+ org.apache.kafka.common.errors.InterruptException,
66
+ org.apache.kafka.common.errors.SerializationException
67
+ ] }
68
+
69
+ before do
70
+ count = 0
71
+ expect_any_instance_of(org.apache.kafka.clients.producer.KafkaProducer).to receive(:send)
72
+ .exactly(sendcount).times
73
+ .and_wrap_original do |m, *args|
74
+ if count < failcount # fail 'failcount' times in a row.
75
+ count += 1
76
+ # Pick an exception at random
77
+ raise exception_classes.shuffle.first.new("injected exception for testing")
78
+ else
79
+ m.call(*args) # call original
80
+ end
81
+ end
82
+ end
83
+
84
+ it "should retry until successful" do
85
+ kafka = LogStash::Outputs::Kafka.new(simple_kafka_config)
86
+ kafka.register
87
+ kafka.multi_receive([event])
88
+ end
89
+ end
90
+
91
+ context "when a send fails" do
92
+ context "and the default retries behavior is used" do
93
+ # Fail this many times and then finally succeed.
94
+ let(:failcount) { (rand * 10).to_i }
95
+
96
+ # Expect KafkaProducer.send() to get called again after every failure, plus the successful one.
97
+ let(:sendcount) { failcount + 1 }
98
+
99
+ it "should retry until successful" do
100
+ count = 0;
101
+
102
+ expect_any_instance_of(org.apache.kafka.clients.producer.KafkaProducer).to receive(:send)
103
+ .exactly(sendcount).times
104
+ .and_wrap_original do |m, *args|
105
+ if count < failcount
106
+ count += 1
107
+ # inject some failures.
108
+
109
+ # Return a custom Future that will raise an exception to simulate a Kafka send() problem.
110
+ future = java.util.concurrent.FutureTask.new { raise "Failed" }
111
+ future.run
112
+ future
113
+ else
114
+ m.call(*args)
115
+ end
116
+ end
117
+ kafka = LogStash::Outputs::Kafka.new(simple_kafka_config)
118
+ kafka.register
119
+ kafka.multi_receive([event])
120
+ end
121
+ end
122
+
123
+ context "and when retries is set by the user" do
124
+ let(:retries) { (rand * 10).to_i }
125
+ let(:max_sends) { retries + 1 }
126
+
127
+ it "should give up after retries are exhausted" do
128
+ expect_any_instance_of(org.apache.kafka.clients.producer.KafkaProducer).to receive(:send)
129
+ .at_most(max_sends).times
130
+ .and_wrap_original do |m, *args|
131
+ # Always fail.
132
+ future = java.util.concurrent.FutureTask.new { raise "Failed" }
133
+ future.run
134
+ future
135
+ end
136
+ kafka = LogStash::Outputs::Kafka.new(simple_kafka_config.merge("retries" => retries))
137
+ kafka.register
138
+ kafka.multi_receive([event])
139
+ end
140
+ end
141
+ end
58
142
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: logstash-output-kafka
3
3
  version: !ruby/object:Gem::Version
4
- version: 5.1.10
4
+ version: 5.1.11
5
5
  platform: ruby
6
6
  authors:
7
7
  - Elasticsearch
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-08-21 00:00:00.000000000 Z
11
+ date: 2017-10-09 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement