logstash-output-kafka 5.1.10 → 5.1.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 1bcc9ef8d79cfeae2d45b85067cd5e918bbb48c9
4
- data.tar.gz: 4600a285a1630607f8f0f473587bbc338803d0ed
3
+ metadata.gz: ab8c64c41a0113d3e77d4d74adac247424d6a7c5
4
+ data.tar.gz: ea85a76f270acd51915e1a1a7a80da0e83e9e82e
5
5
  SHA512:
6
- metadata.gz: 0d58a1bf85115b8ed45395ea08b22adbd22e40cb3f645e9633de1d3b95fa6cbce7c106acbc9061ef82924cec53fa4c7ee7222204f4a4886ece75e7ef75e9b1d4
7
- data.tar.gz: a65b714676d04380af86949dc60563d45d7182d72db86232bc700e1b10c328ed3ffa105a9ec5b9bbf1b30dfb65d773e08fd7f08c740531af8d354f6b26040a86
6
+ metadata.gz: 18927151780d0b0d085e5472b9d8d6a07601bd847163a3bd6578b9359227c73344d4f24ef1ed498d756e3fea8594860e3cafd0ddb15a2eb566b14ed3a340f890
7
+ data.tar.gz: 76f4b59eec073cefafe18db46fae720cdede9a234d39aab6da3acc4e6588921e089e076afc8a2cd4542ba81572322229fd9e9332a66d38ea8420e5f7a49899f4
data/CHANGELOG.md CHANGED
@@ -1,3 +1,12 @@
1
+ ## 5.1.11
2
+ - Bugfix: Sends are now retried until successful. Previously, failed transmissions to Kafka
3
+ could have been lost by the KafkaProducer library. Now we verify transmission explicitly.
4
+ This changes the default 'retry' from 0 to retry-forever. It was a bug that we defaulted
5
+ to a retry count of 0.
6
+ https://github.com/logstash-plugins/logstash-output-kafka/pull/151
7
+
8
+ - Docs: Fix misleading info about the default codec
9
+
1
10
  ## 5.1.10
2
11
  - Doc fixes
3
12
 
@@ -111,9 +111,15 @@ class LogStash::Outputs::Kafka < LogStash::Outputs::Base
111
111
  # elapses the client will resend the request if necessary or fail the request if
112
112
  # retries are exhausted.
113
113
  config :request_timeout_ms, :validate => :string
114
- # Setting a value greater than zero will cause the client to
115
- # resend any record whose send fails with a potentially transient error.
116
- config :retries, :validate => :number, :default => 0
114
+ # The default retry behavior is to retry until successful. To prevent data loss,
115
+ # the use of this setting is discouraged.
116
+ #
117
+ # If you choose to set `retries`, a value greater than zero will cause the
118
+ # client to only retry a fixed number of times. This will result in data loss
119
+ # if a transient error outlasts your retry count.
120
+ #
121
+ # A value less than zero is a configuration error.
122
+ config :retries, :validate => :number
117
123
  # The amount of time to wait before attempting to retry a failed produce request to a given topic partition.
118
124
  config :retry_backoff_ms, :validate => :number, :default => 100
119
125
  # The size of the TCP send buffer to use when sending data.
@@ -175,6 +181,17 @@ class LogStash::Outputs::Kafka < LogStash::Outputs::Base
175
181
 
176
182
  public
177
183
  def register
184
+ @thread_batch_map = Concurrent::Hash.new
185
+
186
+ if !@retries.nil?
187
+ if @retries < 0
188
+ raise ConfigurationError, "A negative retry count (#{@retries}) is not valid. Must be a value >= 0"
189
+ end
190
+
191
+ @logger.warn("Kafka output is configured with finite retry. This instructs Logstash to LOSE DATA after a set number of send attempts fails. If you do not want to lose data if Kafka is down, then you must remove the retry setting.", :retries => @retries)
192
+ end
193
+
194
+
178
195
  @producer = create_producer
179
196
  @codec.on_event do |event, data|
180
197
  begin
@@ -183,7 +200,7 @@ class LogStash::Outputs::Kafka < LogStash::Outputs::Base
183
200
  else
184
201
  record = org.apache.kafka.clients.producer.ProducerRecord.new(event.sprintf(@topic_id), event.sprintf(@message_key), data)
185
202
  end
186
- @producer.send(record)
203
+ prepare(record)
187
204
  rescue LogStash::ShutdownSignal
188
205
  @logger.debug('Kafka producer got shutdown signal')
189
206
  rescue => e
@@ -191,14 +208,89 @@ class LogStash::Outputs::Kafka < LogStash::Outputs::Base
191
208
  :exception => e)
192
209
  end
193
210
  end
194
-
195
211
  end # def register
196
212
 
197
- def receive(event)
198
- if event == LogStash::SHUTDOWN
199
- return
213
+ def prepare(record)
214
+ # This output is threadsafe, so we need to keep a batch per thread.
215
+ @thread_batch_map[Thread.current].add(record)
216
+ end
217
+
218
+ def multi_receive(events)
219
+ t = Thread.current
220
+ if !@thread_batch_map.include?(t)
221
+ @thread_batch_map[t] = java.util.ArrayList.new(events.size)
222
+ end
223
+
224
+ events.each do |event|
225
+ break if event == LogStash::SHUTDOWN
226
+ @codec.encode(event)
227
+ end
228
+
229
+ batch = @thread_batch_map[t]
230
+ if batch.any?
231
+ retrying_send(batch)
232
+ batch.clear
200
233
  end
201
- @codec.encode(event)
234
+ end
235
+
236
+ def retrying_send(batch)
237
+ remaining = @retries;
238
+
239
+ while batch.any?
240
+ if !remaining.nil?
241
+ if remaining < 0
242
+ # TODO(sissel): Offer to DLQ? Then again, if it's a transient fault,
243
+ # DLQing would make things worse (you dlq data that would be successful
244
+ # after the fault is repaired)
245
+ logger.info("Exhausted user-configured retry count when sending to Kafka. Dropping these events.",
246
+ :max_retries => @retries, :drop_count => batch.count)
247
+ break
248
+ end
249
+
250
+ remaining -= 1
251
+ end
252
+
253
+ failures = []
254
+
255
+ futures = batch.collect do |record|
256
+ begin
257
+ # send() can throw an exception even before the future is created.
258
+ @producer.send(record)
259
+ rescue org.apache.kafka.common.errors.TimeoutException => e
260
+ failures << record
261
+ nil
262
+ rescue org.apache.kafka.common.errors.InterruptException => e
263
+ failures << record
264
+ nil
265
+ rescue org.apache.kafka.common.errors.SerializationException => e
266
+ # TODO(sissel): Retrying will fail because the data itself has a problem serializing.
267
+ # TODO(sissel): Let's add DLQ here.
268
+ failures << record
269
+ nil
270
+ end
271
+ end.compact
272
+
273
+ futures.each_with_index do |future, i|
274
+ begin
275
+ result = future.get()
276
+ rescue => e
277
+ # TODO(sissel): Add metric to count failures, possibly by exception type.
278
+ logger.debug? && logger.debug("KafkaProducer.send() failed: #{e}", :exception => e);
279
+ failures << batch[i]
280
+ end
281
+ end
282
+
283
+ # No failures? Cool. Let's move on.
284
+ break if failures.empty?
285
+
286
+ # Otherwise, retry with any failed transmissions
287
+ batch = failures
288
+ delay = 1.0 / @retry_backoff_ms
289
+ logger.info("Sending batch to Kafka failed. Will retry after a delay.", :batch_size => batch.size,
290
+ :failures => failures.size, :sleep => delay);
291
+ sleep(delay)
292
+ end
293
+
202
294
  end
203
295
 
204
296
  def close
@@ -222,8 +314,8 @@ class LogStash::Outputs::Kafka < LogStash::Outputs::Base
222
314
  props.put(kafka::MAX_REQUEST_SIZE_CONFIG, max_request_size.to_s)
223
315
  props.put(kafka::RECONNECT_BACKOFF_MS_CONFIG, reconnect_backoff_ms) unless reconnect_backoff_ms.nil?
224
316
  props.put(kafka::REQUEST_TIMEOUT_MS_CONFIG, request_timeout_ms) unless request_timeout_ms.nil?
225
- props.put(kafka::RETRIES_CONFIG, retries.to_s)
226
- props.put(kafka::RETRY_BACKOFF_MS_CONFIG, retry_backoff_ms.to_s)
317
+ props.put(kafka::RETRIES_CONFIG, retries.to_s) unless retries.nil?
318
+ props.put(kafka::RETRY_BACKOFF_MS_CONFIG, retry_backoff_ms.to_s)
227
319
  props.put(kafka::SEND_BUFFER_CONFIG, send_buffer_bytes.to_s)
228
320
  props.put(kafka::VALUE_SERIALIZER_CLASS_CONFIG, value_serializer)
229
321
 
@@ -1,7 +1,7 @@
1
1
  Gem::Specification.new do |s|
2
2
 
3
3
  s.name = 'logstash-output-kafka'
4
- s.version = '5.1.10'
4
+ s.version = '5.1.11'
5
5
  s.licenses = ['Apache License (2.0)']
6
6
  s.summary = 'Output events to a Kafka topic. This uses the Kafka Producer API to write messages to a topic on the broker'
7
7
  s.description = "This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program"
@@ -157,7 +157,7 @@ describe "outputs/kafka", :integration => true do
157
157
  def load_kafka_data(config)
158
158
  kafka = LogStash::Outputs::Kafka.new(config)
159
159
  kafka.register
160
- num_events.times do kafka.receive(event) end
160
+ kafka.multi_receive(num_events.times.collect { event })
161
161
  kafka.close
162
162
  end
163
163
 
@@ -25,34 +25,118 @@ describe "outputs/kafka" do
25
25
  context 'when outputting messages' do
26
26
  it 'should send logstash event to kafka broker' do
27
27
  expect_any_instance_of(org.apache.kafka.clients.producer.KafkaProducer).to receive(:send)
28
- .with(an_instance_of(org.apache.kafka.clients.producer.ProducerRecord))
28
+ .with(an_instance_of(org.apache.kafka.clients.producer.ProducerRecord)).and_call_original
29
29
  kafka = LogStash::Outputs::Kafka.new(simple_kafka_config)
30
30
  kafka.register
31
- kafka.receive(event)
31
+ kafka.multi_receive([event])
32
32
  end
33
33
 
34
34
  it 'should support Event#sprintf placeholders in topic_id' do
35
35
  topic_field = 'topic_name'
36
36
  expect(org.apache.kafka.clients.producer.ProducerRecord).to receive(:new)
37
- .with("my_topic", event.to_s)
38
- expect_any_instance_of(org.apache.kafka.clients.producer.KafkaProducer).to receive(:send)
37
+ .with("my_topic", event.to_s).and_call_original
38
+ expect_any_instance_of(org.apache.kafka.clients.producer.KafkaProducer).to receive(:send).and_call_original
39
39
  kafka = LogStash::Outputs::Kafka.new({'topic_id' => "%{#{topic_field}}"})
40
40
  kafka.register
41
- kafka.receive(event)
41
+ kafka.multi_receive([event])
42
42
  end
43
43
 
44
44
  it 'should support field referenced message_keys' do
45
45
  expect(org.apache.kafka.clients.producer.ProducerRecord).to receive(:new)
46
- .with("test", "172.0.0.1", event.to_s)
47
- expect_any_instance_of(org.apache.kafka.clients.producer.KafkaProducer).to receive(:send)
46
+ .with("test", "172.0.0.1", event.to_s).and_call_original
47
+ expect_any_instance_of(org.apache.kafka.clients.producer.KafkaProducer).to receive(:send).and_call_original
48
48
  kafka = LogStash::Outputs::Kafka.new(simple_kafka_config.merge({"message_key" => "%{host}"}))
49
49
  kafka.register
50
- kafka.receive(event)
50
+ kafka.multi_receive([event])
51
51
  end
52
52
 
53
53
  it 'should raise config error when truststore location is not set and ssl is enabled' do
54
- kafka = LogStash::Outputs::Kafka.new(simple_kafka_config.merge({"ssl" => "true"}))
54
+ kafka = LogStash::Outputs::Kafka.new(simple_kafka_config.merge("security_protocol" => "SSL"))
55
55
  expect { kafka.register }.to raise_error(LogStash::ConfigurationError, /ssl_truststore_location must be set when SSL is enabled/)
56
56
  end
57
57
  end
58
+
59
+ context "when KafkaProducer#send() raises an exception" do
60
+ let(:failcount) { (rand * 10).to_i }
61
+ let(:sendcount) { failcount + 1 }
62
+
63
+ let(:exception_classes) { [
64
+ org.apache.kafka.common.errors.TimeoutException,
65
+ org.apache.kafka.common.errors.InterruptException,
66
+ org.apache.kafka.common.errors.SerializationException
67
+ ] }
68
+
69
+ before do
70
+ count = 0
71
+ expect_any_instance_of(org.apache.kafka.clients.producer.KafkaProducer).to receive(:send)
72
+ .exactly(sendcount).times
73
+ .and_wrap_original do |m, *args|
74
+ if count < failcount # fail 'failcount' times in a row.
75
+ count += 1
76
+ # Pick an exception at random
77
+ raise exception_classes.shuffle.first.new("injected exception for testing")
78
+ else
79
+ m.call(*args) # call original
80
+ end
81
+ end
82
+ end
83
+
84
+ it "should retry until successful" do
85
+ kafka = LogStash::Outputs::Kafka.new(simple_kafka_config)
86
+ kafka.register
87
+ kafka.multi_receive([event])
88
+ end
89
+ end
90
+
91
+ context "when a send fails" do
92
+ context "and the default retries behavior is used" do
93
+ # Fail this many times and then finally succeed.
94
+ let(:failcount) { (rand * 10).to_i }
95
+
96
+ # Expect KafkaProducer.send() to get called again after every failure, plus the successful one.
97
+ let(:sendcount) { failcount + 1 }
98
+
99
+ it "should retry until successful" do
100
+ count = 0;
101
+
102
+ expect_any_instance_of(org.apache.kafka.clients.producer.KafkaProducer).to receive(:send)
103
+ .exactly(sendcount).times
104
+ .and_wrap_original do |m, *args|
105
+ if count < failcount
106
+ count += 1
107
+ # inject some failures.
108
+
109
+ # Return a custom Future that will raise an exception to simulate a Kafka send() problem.
110
+ future = java.util.concurrent.FutureTask.new { raise "Failed" }
111
+ future.run
112
+ future
113
+ else
114
+ m.call(*args)
115
+ end
116
+ end
117
+ kafka = LogStash::Outputs::Kafka.new(simple_kafka_config)
118
+ kafka.register
119
+ kafka.multi_receive([event])
120
+ end
121
+ end
122
+
123
+ context "and when retries is set by the user" do
124
+ let(:retries) { (rand * 10).to_i }
125
+ let(:max_sends) { retries + 1 }
126
+
127
+ it "should give up after retries are exhausted" do
128
+ expect_any_instance_of(org.apache.kafka.clients.producer.KafkaProducer).to receive(:send)
129
+ .at_most(max_sends).times
130
+ .and_wrap_original do |m, *args|
131
+ # Always fail.
132
+ future = java.util.concurrent.FutureTask.new { raise "Failed" }
133
+ future.run
134
+ future
135
+ end
136
+ kafka = LogStash::Outputs::Kafka.new(simple_kafka_config.merge("retries" => retries))
137
+ kafka.register
138
+ kafka.multi_receive([event])
139
+ end
140
+ end
141
+ end
58
142
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: logstash-output-kafka
3
3
  version: !ruby/object:Gem::Version
4
- version: 5.1.10
4
+ version: 5.1.11
5
5
  platform: ruby
6
6
  authors:
7
7
  - Elasticsearch
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-08-21 00:00:00.000000000 Z
11
+ date: 2017-10-09 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement