logstash-integration-kafka 11.0.0-java → 11.1.0-java
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +3 -0
- data/docs/input-kafka.asciidoc +23 -0
- data/lib/logstash/inputs/kafka.rb +15 -3
- data/logstash-integration-kafka.gemspec +1 -1
- data/spec/integration/inputs/kafka_spec.rb +96 -0
- data/spec/unit/inputs/kafka_spec.rb +5 -5
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c8f85bfdadbbd496495603c82ed577db4c23a168db59e6d0034549de6ebb66d1
|
4
|
+
data.tar.gz: 0b3b0bc33d6e64eebcb9e757fede56f747625c864160844ed2a3c76d0b22a155
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: '09e8814f1697c1d38d478881b35102a1e9885c23cdb0b9b2d8860c7577d4b2a40eb580b535afff46273e7139fbbaf603b0ea0de1fbeb68644bcea95b79d9e470'
|
7
|
+
data.tar.gz: 1b0c7ee3ffbcb589268174753db91d0ba7272fc44c04e7ceb2e17d43f9fe2ec31ed4eee7450390adc0bada77c36fedaf14d46bf9b09372a6b55a6e76047a56e4
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,6 @@
|
|
1
|
+
## 11.1.0
|
2
|
+
- Added config `group_instance_id` to use the Kafka's consumer static membership feature [#135](https://github.com/logstash-plugins/logstash-integration-kafka/pull/135)
|
3
|
+
|
1
4
|
## 11.0.0
|
2
5
|
- Changed Kafka client to 3.3.1, requires Logstash >= 8.3.0.
|
3
6
|
- Deprecated `default` value for setting `client_dns_lookup` forcing to `use_all_dns_ips` when explicitly used [#130](https://github.com/logstash-plugins/logstash-integration-kafka/pull/130)
|
data/docs/input-kafka.asciidoc
CHANGED
@@ -113,6 +113,7 @@ See the https://kafka.apache.org/{kafka_client_doc}/documentation for more detai
|
|
113
113
|
| <<plugins-{type}s-{plugin}-fetch_max_wait_ms>> |<<number,number>>|No
|
114
114
|
| <<plugins-{type}s-{plugin}-fetch_min_bytes>> |<<number,number>>|No
|
115
115
|
| <<plugins-{type}s-{plugin}-group_id>> |<<string,string>>|No
|
116
|
+
| <<plugins-{type}s-{plugin}-group_instance_id>> |<<string,string>>|No
|
116
117
|
| <<plugins-{type}s-{plugin}-heartbeat_interval_ms>> |<<number,number>>|No
|
117
118
|
| <<plugins-{type}s-{plugin}-isolation_level>> |<<string,string>>|No
|
118
119
|
| <<plugins-{type}s-{plugin}-jaas_path>> |a valid filesystem path|No
|
@@ -344,6 +345,28 @@ NOTE: In cases when multiple inputs are being used in a single pipeline, reading
|
|
344
345
|
it's essential to set a different `group_id => ...` for each input. Setting a unique `client_id => ...`
|
345
346
|
is also recommended.
|
346
347
|
|
348
|
+
[id="plugins-{type}s-{plugin}-group_instance_id"]
|
349
|
+
===== `group_instance_id`
|
350
|
+
|
351
|
+
* Value type is <<string,string>>
|
352
|
+
* There is no default value for this setting.
|
353
|
+
|
354
|
+
The static membership identifier for this Logstash Kafka consumer. Static membership feature was introduced in
|
355
|
+
https://cwiki.apache.org/confluence/display/KAFKA/KIP-345%3A+Introduce+static+membership+protocol+to+reduce+consumer+rebalances[KIP-345],
|
356
|
+
available under Kafka property `group.instance.id`.
|
357
|
+
Its purpose is to avoid rebalances in situations in which a lot of data
|
358
|
+
has to be forwarded after a consumer goes offline.
|
359
|
+
This feature mitigates cases where the service state is heavy and the rebalance of one topic partition from instance
|
360
|
+
A to B would cause a huge amount of data to be transferred.
|
361
|
+
A client that goes offline/online frequently can avoid frequent and heavy rebalances by using this option.
|
362
|
+
|
363
|
+
NOTE: The `group_instance_id` setting must be unique across all the clients belonging to the same <<plugins-{type}s-{plugin}-group_id>>.
|
364
|
+
Otherwise, another client connecting with same `group.instance.id` value would cause the oldest instance to be disconnected.
|
365
|
+
You can set this value to use information such as a hostname, an IP, or anything that uniquely identifies the client application.
|
366
|
+
|
367
|
+
NOTE: In cases when multiple threads are configured and `consumer_threads` is greater than one, a suffix is appended to
|
368
|
+
the `group_instance_id` to avoid collisions.
|
369
|
+
|
347
370
|
[id="plugins-{type}s-{plugin}-heartbeat_interval_ms"]
|
348
371
|
===== `heartbeat_interval_ms`
|
349
372
|
|
@@ -124,6 +124,11 @@ class LogStash::Inputs::Kafka < LogStash::Inputs::Base
|
|
124
124
|
# that happens to be made up of multiple processors. Messages in a topic will be distributed to all
|
125
125
|
# Logstash instances with the same `group_id`
|
126
126
|
config :group_id, :validate => :string, :default => "logstash"
|
127
|
+
# Set a static group instance id used in static membership feature to avoid rebalancing when a
|
128
|
+
# consumer goes offline. If set and `consumer_threads` is greater than 1 then for each
|
129
|
+
# consumer crated by each thread an artificial suffix is appended to the user provided `group_instance_id`
|
130
|
+
# to avoid clashing.
|
131
|
+
config :group_instance_id, :validate => :string
|
127
132
|
# The expected time between heartbeats to the consumer coordinator. Heartbeats are used to ensure
|
128
133
|
# that the consumer's session stays active and to facilitate rebalancing when new
|
129
134
|
# consumers join or leave the group. The value must be set lower than
|
@@ -136,7 +141,7 @@ class LogStash::Inputs::Kafka < LogStash::Inputs::Base
|
|
136
141
|
# been aborted. Non-transactional messages will be returned unconditionally in either mode.
|
137
142
|
config :isolation_level, :validate => ["read_uncommitted", "read_committed"], :default => "read_uncommitted" # Kafka default
|
138
143
|
# Java Class used to deserialize the record's key
|
139
|
-
config :key_deserializer_class, :validate => :string, :default =>
|
144
|
+
config :key_deserializer_class, :validate => :string, :default => DEFAULT_DESERIALIZER_CLASS
|
140
145
|
# The maximum delay between invocations of poll() when using consumer group management. This places
|
141
146
|
# an upper bound on the amount of time that the consumer can be idle before fetching more records.
|
142
147
|
# If poll() is not called before expiration of this timeout, then the consumer is considered failed and
|
@@ -287,7 +292,10 @@ class LogStash::Inputs::Kafka < LogStash::Inputs::Base
|
|
287
292
|
|
288
293
|
public
|
289
294
|
def run(logstash_queue)
|
290
|
-
@runner_consumers = consumer_threads.times.map
|
295
|
+
@runner_consumers = consumer_threads.times.map do |i|
|
296
|
+
thread_group_instance_id = consumer_threads > 1 && group_instance_id ? "#{group_instance_id}-#{i}" : group_instance_id
|
297
|
+
subscribe(create_consumer("#{client_id}-#{i}", thread_group_instance_id))
|
298
|
+
end
|
291
299
|
@runner_threads = @runner_consumers.map.with_index { |consumer, i| thread_runner(logstash_queue, consumer,
|
292
300
|
"kafka-input-worker-#{client_id}-#{i}") }
|
293
301
|
@runner_threads.each(&:start)
|
@@ -335,6 +343,9 @@ class LogStash::Inputs::Kafka < LogStash::Inputs::Base
|
|
335
343
|
rescue org.apache.kafka.common.errors.WakeupException => e
|
336
344
|
logger.debug("Wake up from poll", :kafka_error_message => e)
|
337
345
|
raise e unless stop?
|
346
|
+
rescue org.apache.kafka.common.errors.FencedInstanceIdException => e
|
347
|
+
logger.error("Another consumer with same group.instance.id has connected", :original_error_message => e.message)
|
348
|
+
raise e unless stop?
|
338
349
|
rescue => e
|
339
350
|
logger.error("Unable to poll Kafka consumer",
|
340
351
|
:kafka_error_message => e,
|
@@ -389,7 +400,7 @@ class LogStash::Inputs::Kafka < LogStash::Inputs::Base
|
|
389
400
|
end
|
390
401
|
|
391
402
|
private
|
392
|
-
def create_consumer(client_id)
|
403
|
+
def create_consumer(client_id, group_instance_id)
|
393
404
|
begin
|
394
405
|
props = java.util.Properties.new
|
395
406
|
kafka = org.apache.kafka.clients.consumer.ConsumerConfig
|
@@ -407,6 +418,7 @@ class LogStash::Inputs::Kafka < LogStash::Inputs::Base
|
|
407
418
|
props.put(kafka::FETCH_MAX_WAIT_MS_CONFIG, fetch_max_wait_ms.to_s) unless fetch_max_wait_ms.nil?
|
408
419
|
props.put(kafka::FETCH_MIN_BYTES_CONFIG, fetch_min_bytes.to_s) unless fetch_min_bytes.nil?
|
409
420
|
props.put(kafka::GROUP_ID_CONFIG, group_id)
|
421
|
+
props.put(kafka::GROUP_INSTANCE_ID_CONFIG, group_instance_id) unless group_instance_id.nil?
|
410
422
|
props.put(kafka::HEARTBEAT_INTERVAL_MS_CONFIG, heartbeat_interval_ms.to_s) unless heartbeat_interval_ms.nil?
|
411
423
|
props.put(kafka::ISOLATION_LEVEL_CONFIG, isolation_level)
|
412
424
|
props.put(kafka::KEY_DESERIALIZER_CLASS_CONFIG, key_deserializer_class)
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Gem::Specification.new do |s|
|
2
2
|
s.name = 'logstash-integration-kafka'
|
3
|
-
s.version = '11.
|
3
|
+
s.version = '11.1.0'
|
4
4
|
s.licenses = ['Apache-2.0']
|
5
5
|
s.summary = "Integration with Kafka - input and output plugins"
|
6
6
|
s.description = "This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline "+
|
@@ -79,6 +79,7 @@ describe "inputs/kafka", :integration => true do
|
|
79
79
|
producer = org.apache.kafka.clients.producer.KafkaProducer.new(props)
|
80
80
|
|
81
81
|
producer.send(record)
|
82
|
+
producer.flush
|
82
83
|
producer.close
|
83
84
|
end
|
84
85
|
|
@@ -185,10 +186,105 @@ describe "inputs/kafka", :integration => true do
|
|
185
186
|
end
|
186
187
|
end
|
187
188
|
end
|
189
|
+
|
190
|
+
context "static membership 'group.instance.id' setting" do
|
191
|
+
let(:base_config) do
|
192
|
+
{
|
193
|
+
"topics" => ["logstash_integration_static_membership_topic"],
|
194
|
+
"group_id" => "logstash",
|
195
|
+
"consumer_threads" => 1,
|
196
|
+
# this is needed because the worker thread could be executed little after the producer sent the "up" message
|
197
|
+
"auto_offset_reset" => "earliest",
|
198
|
+
"group_instance_id" => "test_static_group_id"
|
199
|
+
}
|
200
|
+
end
|
201
|
+
let(:consumer_config) { base_config }
|
202
|
+
let(:logger) { double("logger") }
|
203
|
+
let(:queue) { java.util.concurrent.ArrayBlockingQueue.new(10) }
|
204
|
+
let(:kafka_input) { LogStash::Inputs::Kafka.new(consumer_config) }
|
205
|
+
before :each do
|
206
|
+
allow(LogStash::Inputs::Kafka).to receive(:logger).and_return(logger)
|
207
|
+
[:error, :warn, :info, :debug].each do |level|
|
208
|
+
allow(logger).to receive(level)
|
209
|
+
end
|
210
|
+
|
211
|
+
kafka_input.register
|
212
|
+
end
|
213
|
+
|
214
|
+
it "input plugin disconnects from the broker when another client with same static membership connects" do
|
215
|
+
expect(logger).to receive(:error).with("Another consumer with same group.instance.id has connected", anything)
|
216
|
+
|
217
|
+
input_worker = java.lang.Thread.new { kafka_input.run(queue) }
|
218
|
+
begin
|
219
|
+
input_worker.start
|
220
|
+
wait_kafka_input_is_ready("logstash_integration_static_membership_topic", queue)
|
221
|
+
saboteur_kafka_consumer = create_consumer_and_start_consuming("test_static_group_id")
|
222
|
+
saboteur_kafka_consumer.run # ask to be scheduled
|
223
|
+
saboteur_kafka_consumer.join
|
224
|
+
|
225
|
+
expect(saboteur_kafka_consumer.value).to eq("saboteur exited")
|
226
|
+
ensure
|
227
|
+
input_worker.join(30_000)
|
228
|
+
end
|
229
|
+
end
|
230
|
+
|
231
|
+
context "when the plugin is configured with multiple consumer threads" do
|
232
|
+
let(:consumer_config) { base_config.merge({"consumer_threads" => 2}) }
|
233
|
+
|
234
|
+
it "should avoid to connect with same 'group.instance.id'" do
|
235
|
+
expect(logger).to_not receive(:error).with("Another consumer with same group.instance.id has connected", anything)
|
236
|
+
|
237
|
+
input_worker = java.lang.Thread.new { kafka_input.run(queue) }
|
238
|
+
begin
|
239
|
+
input_worker.start
|
240
|
+
wait_kafka_input_is_ready("logstash_integration_static_membership_topic", queue)
|
241
|
+
ensure
|
242
|
+
kafka_input.stop
|
243
|
+
input_worker.join(1_000)
|
244
|
+
end
|
245
|
+
end
|
246
|
+
end
|
247
|
+
end
|
248
|
+
end
|
249
|
+
|
250
|
+
# return consumer Ruby Thread
|
251
|
+
def create_consumer_and_start_consuming(static_group_id)
|
252
|
+
props = java.util.Properties.new
|
253
|
+
kafka = org.apache.kafka.clients.consumer.ConsumerConfig
|
254
|
+
props.put(kafka::BOOTSTRAP_SERVERS_CONFIG, "localhost:9092")
|
255
|
+
props.put(kafka::KEY_DESERIALIZER_CLASS_CONFIG, LogStash::Inputs::Kafka::DEFAULT_DESERIALIZER_CLASS)
|
256
|
+
props.put(kafka::VALUE_DESERIALIZER_CLASS_CONFIG, LogStash::Inputs::Kafka::DEFAULT_DESERIALIZER_CLASS)
|
257
|
+
props.put(kafka::GROUP_ID_CONFIG, "logstash")
|
258
|
+
props.put(kafka::GROUP_INSTANCE_ID_CONFIG, static_group_id)
|
259
|
+
consumer = org.apache.kafka.clients.consumer.KafkaConsumer.new(props)
|
260
|
+
|
261
|
+
Thread.new do
|
262
|
+
LogStash::Util::set_thread_name("integration_test_simple_consumer")
|
263
|
+
begin
|
264
|
+
consumer.subscribe(["logstash_integration_static_membership_topic"])
|
265
|
+
records = consumer.poll(java.time.Duration.ofSeconds(3))
|
266
|
+
"saboteur exited"
|
267
|
+
rescue => e
|
268
|
+
e # return the exception reached in thread.value
|
269
|
+
ensure
|
270
|
+
consumer.close
|
271
|
+
end
|
272
|
+
end
|
188
273
|
end
|
189
274
|
|
190
275
|
private
|
191
276
|
|
277
|
+
def wait_kafka_input_is_ready(topic, queue)
|
278
|
+
# this is needed to give time to the kafka input to be up and running
|
279
|
+
header = org.apache.kafka.common.header.internals.RecordHeader.new("name", "Ping Up".to_java_bytes)
|
280
|
+
record = org.apache.kafka.clients.producer.ProducerRecord.new(topic, 0, "key", "value", [header])
|
281
|
+
send_message(record)
|
282
|
+
|
283
|
+
# Wait the message is processed
|
284
|
+
message = queue.poll(1, java.util.concurrent.TimeUnit::MINUTES)
|
285
|
+
expect(message).to_not eq(nil)
|
286
|
+
end
|
287
|
+
|
192
288
|
def consume_messages(config, queue: Queue.new, timeout:, event_count:)
|
193
289
|
kafka_input = LogStash::Inputs::Kafka.new(config)
|
194
290
|
kafka_input.register
|
@@ -297,7 +297,7 @@ describe LogStash::Inputs::Kafka do
|
|
297
297
|
to receive(:new).with(hash_including('client.rack' => 'EU-R1')).
|
298
298
|
and_return kafka_client = double('kafka-consumer')
|
299
299
|
|
300
|
-
expect( subject.send(:create_consumer, 'sample_client-0') ).to be kafka_client
|
300
|
+
expect( subject.send(:create_consumer, 'sample_client-0', 'group_instance_id') ).to be kafka_client
|
301
301
|
end
|
302
302
|
end
|
303
303
|
|
@@ -309,7 +309,7 @@ describe LogStash::Inputs::Kafka do
|
|
309
309
|
to receive(:new).with(hash_including('session.timeout.ms' => '25000', 'max.poll.interval.ms' => '345000')).
|
310
310
|
and_return kafka_client = double('kafka-consumer')
|
311
311
|
|
312
|
-
expect( subject.send(:create_consumer, 'sample_client-1') ).to be kafka_client
|
312
|
+
expect( subject.send(:create_consumer, 'sample_client-1', 'group_instance_id') ).to be kafka_client
|
313
313
|
end
|
314
314
|
end
|
315
315
|
|
@@ -321,7 +321,7 @@ describe LogStash::Inputs::Kafka do
|
|
321
321
|
to receive(:new).with(hash_including('session.timeout.ms' => '25200', 'max.poll.interval.ms' => '123000')).
|
322
322
|
and_return kafka_client = double('kafka-consumer')
|
323
323
|
|
324
|
-
expect( subject.send(:create_consumer, 'sample_client-2') ).to be kafka_client
|
324
|
+
expect( subject.send(:create_consumer, 'sample_client-2', 'group_instance_id') ).to be kafka_client
|
325
325
|
end
|
326
326
|
end
|
327
327
|
|
@@ -333,7 +333,7 @@ describe LogStash::Inputs::Kafka do
|
|
333
333
|
to receive(:new).with(hash_including('enable.auto.commit' => 'false', 'check.crcs' => 'true')).
|
334
334
|
and_return kafka_client = double('kafka-consumer')
|
335
335
|
|
336
|
-
expect( subject.send(:create_consumer, 'sample_client-3') ).to be kafka_client
|
336
|
+
expect( subject.send(:create_consumer, 'sample_client-3', 'group_instance_id') ).to be kafka_client
|
337
337
|
expect( subject.enable_auto_commit ).to be false
|
338
338
|
end
|
339
339
|
end
|
@@ -346,7 +346,7 @@ describe LogStash::Inputs::Kafka do
|
|
346
346
|
to receive(:new).with(hash_including('enable.auto.commit' => 'true', 'check.crcs' => 'false')).
|
347
347
|
and_return kafka_client = double('kafka-consumer')
|
348
348
|
|
349
|
-
expect( subject.send(:create_consumer, 'sample_client-4') ).to be kafka_client
|
349
|
+
expect( subject.send(:create_consumer, 'sample_client-4', 'group_instance_id') ).to be kafka_client
|
350
350
|
expect( subject.enable_auto_commit ).to be true
|
351
351
|
end
|
352
352
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: logstash-integration-kafka
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 11.
|
4
|
+
version: 11.1.0
|
5
5
|
platform: java
|
6
6
|
authors:
|
7
7
|
- Elastic
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2023-01-25 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
requirement: !ruby/object:Gem::Requirement
|