logstash-integration-kafka 11.0.0-java → 11.1.0-java
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +3 -0
- data/docs/input-kafka.asciidoc +23 -0
- data/lib/logstash/inputs/kafka.rb +15 -3
- data/logstash-integration-kafka.gemspec +1 -1
- data/spec/integration/inputs/kafka_spec.rb +96 -0
- data/spec/unit/inputs/kafka_spec.rb +5 -5
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c8f85bfdadbbd496495603c82ed577db4c23a168db59e6d0034549de6ebb66d1
|
4
|
+
data.tar.gz: 0b3b0bc33d6e64eebcb9e757fede56f747625c864160844ed2a3c76d0b22a155
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: '09e8814f1697c1d38d478881b35102a1e9885c23cdb0b9b2d8860c7577d4b2a40eb580b535afff46273e7139fbbaf603b0ea0de1fbeb68644bcea95b79d9e470'
|
7
|
+
data.tar.gz: 1b0c7ee3ffbcb589268174753db91d0ba7272fc44c04e7ceb2e17d43f9fe2ec31ed4eee7450390adc0bada77c36fedaf14d46bf9b09372a6b55a6e76047a56e4
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,6 @@
|
|
1
|
+
## 11.1.0
|
2
|
+
- Added config `group_instance_id` to use the Kafka's consumer static membership feature [#135](https://github.com/logstash-plugins/logstash-integration-kafka/pull/135)
|
3
|
+
|
1
4
|
## 11.0.0
|
2
5
|
- Changed Kafka client to 3.3.1, requires Logstash >= 8.3.0.
|
3
6
|
- Deprecated `default` value for setting `client_dns_lookup` forcing to `use_all_dns_ips` when explicitly used [#130](https://github.com/logstash-plugins/logstash-integration-kafka/pull/130)
|
data/docs/input-kafka.asciidoc
CHANGED
@@ -113,6 +113,7 @@ See the https://kafka.apache.org/{kafka_client_doc}/documentation for more detai
|
|
113
113
|
| <<plugins-{type}s-{plugin}-fetch_max_wait_ms>> |<<number,number>>|No
|
114
114
|
| <<plugins-{type}s-{plugin}-fetch_min_bytes>> |<<number,number>>|No
|
115
115
|
| <<plugins-{type}s-{plugin}-group_id>> |<<string,string>>|No
|
116
|
+
| <<plugins-{type}s-{plugin}-group_instance_id>> |<<string,string>>|No
|
116
117
|
| <<plugins-{type}s-{plugin}-heartbeat_interval_ms>> |<<number,number>>|No
|
117
118
|
| <<plugins-{type}s-{plugin}-isolation_level>> |<<string,string>>|No
|
118
119
|
| <<plugins-{type}s-{plugin}-jaas_path>> |a valid filesystem path|No
|
@@ -344,6 +345,28 @@ NOTE: In cases when multiple inputs are being used in a single pipeline, reading
|
|
344
345
|
it's essential to set a different `group_id => ...` for each input. Setting a unique `client_id => ...`
|
345
346
|
is also recommended.
|
346
347
|
|
348
|
+
[id="plugins-{type}s-{plugin}-group_instance_id"]
|
349
|
+
===== `group_instance_id`
|
350
|
+
|
351
|
+
* Value type is <<string,string>>
|
352
|
+
* There is no default value for this setting.
|
353
|
+
|
354
|
+
The static membership identifier for this Logstash Kafka consumer. Static membership feature was introduced in
|
355
|
+
https://cwiki.apache.org/confluence/display/KAFKA/KIP-345%3A+Introduce+static+membership+protocol+to+reduce+consumer+rebalances[KIP-345],
|
356
|
+
available under Kafka property `group.instance.id`.
|
357
|
+
Its purpose is to avoid rebalances in situations in which a lot of data
|
358
|
+
has to be forwarded after a consumer goes offline.
|
359
|
+
This feature mitigates cases where the service state is heavy and the rebalance of one topic partition from instance
|
360
|
+
A to B would cause a huge amount of data to be transferred.
|
361
|
+
A client that goes offline/online frequently can avoid frequent and heavy rebalances by using this option.
|
362
|
+
|
363
|
+
NOTE: The `group_instance_id` setting must be unique across all the clients belonging to the same <<plugins-{type}s-{plugin}-group_id>>.
|
364
|
+
Otherwise, another client connecting with same `group.instance.id` value would cause the oldest instance to be disconnected.
|
365
|
+
You can set this value to use information such as a hostname, an IP, or anything that uniquely identifies the client application.
|
366
|
+
|
367
|
+
NOTE: In cases when multiple threads are configured and `consumer_threads` is greater than one, a suffix is appended to
|
368
|
+
the `group_instance_id` to avoid collisions.
|
369
|
+
|
347
370
|
[id="plugins-{type}s-{plugin}-heartbeat_interval_ms"]
|
348
371
|
===== `heartbeat_interval_ms`
|
349
372
|
|
@@ -124,6 +124,11 @@ class LogStash::Inputs::Kafka < LogStash::Inputs::Base
|
|
124
124
|
# that happens to be made up of multiple processors. Messages in a topic will be distributed to all
|
125
125
|
# Logstash instances with the same `group_id`
|
126
126
|
config :group_id, :validate => :string, :default => "logstash"
|
127
|
+
# Set a static group instance id used in static membership feature to avoid rebalancing when a
|
128
|
+
# consumer goes offline. If set and `consumer_threads` is greater than 1 then for each
|
129
|
+
# consumer crated by each thread an artificial suffix is appended to the user provided `group_instance_id`
|
130
|
+
# to avoid clashing.
|
131
|
+
config :group_instance_id, :validate => :string
|
127
132
|
# The expected time between heartbeats to the consumer coordinator. Heartbeats are used to ensure
|
128
133
|
# that the consumer's session stays active and to facilitate rebalancing when new
|
129
134
|
# consumers join or leave the group. The value must be set lower than
|
@@ -136,7 +141,7 @@ class LogStash::Inputs::Kafka < LogStash::Inputs::Base
|
|
136
141
|
# been aborted. Non-transactional messages will be returned unconditionally in either mode.
|
137
142
|
config :isolation_level, :validate => ["read_uncommitted", "read_committed"], :default => "read_uncommitted" # Kafka default
|
138
143
|
# Java Class used to deserialize the record's key
|
139
|
-
config :key_deserializer_class, :validate => :string, :default =>
|
144
|
+
config :key_deserializer_class, :validate => :string, :default => DEFAULT_DESERIALIZER_CLASS
|
140
145
|
# The maximum delay between invocations of poll() when using consumer group management. This places
|
141
146
|
# an upper bound on the amount of time that the consumer can be idle before fetching more records.
|
142
147
|
# If poll() is not called before expiration of this timeout, then the consumer is considered failed and
|
@@ -287,7 +292,10 @@ class LogStash::Inputs::Kafka < LogStash::Inputs::Base
|
|
287
292
|
|
288
293
|
public
|
289
294
|
def run(logstash_queue)
|
290
|
-
@runner_consumers = consumer_threads.times.map
|
295
|
+
@runner_consumers = consumer_threads.times.map do |i|
|
296
|
+
thread_group_instance_id = consumer_threads > 1 && group_instance_id ? "#{group_instance_id}-#{i}" : group_instance_id
|
297
|
+
subscribe(create_consumer("#{client_id}-#{i}", thread_group_instance_id))
|
298
|
+
end
|
291
299
|
@runner_threads = @runner_consumers.map.with_index { |consumer, i| thread_runner(logstash_queue, consumer,
|
292
300
|
"kafka-input-worker-#{client_id}-#{i}") }
|
293
301
|
@runner_threads.each(&:start)
|
@@ -335,6 +343,9 @@ class LogStash::Inputs::Kafka < LogStash::Inputs::Base
|
|
335
343
|
rescue org.apache.kafka.common.errors.WakeupException => e
|
336
344
|
logger.debug("Wake up from poll", :kafka_error_message => e)
|
337
345
|
raise e unless stop?
|
346
|
+
rescue org.apache.kafka.common.errors.FencedInstanceIdException => e
|
347
|
+
logger.error("Another consumer with same group.instance.id has connected", :original_error_message => e.message)
|
348
|
+
raise e unless stop?
|
338
349
|
rescue => e
|
339
350
|
logger.error("Unable to poll Kafka consumer",
|
340
351
|
:kafka_error_message => e,
|
@@ -389,7 +400,7 @@ class LogStash::Inputs::Kafka < LogStash::Inputs::Base
|
|
389
400
|
end
|
390
401
|
|
391
402
|
private
|
392
|
-
def create_consumer(client_id)
|
403
|
+
def create_consumer(client_id, group_instance_id)
|
393
404
|
begin
|
394
405
|
props = java.util.Properties.new
|
395
406
|
kafka = org.apache.kafka.clients.consumer.ConsumerConfig
|
@@ -407,6 +418,7 @@ class LogStash::Inputs::Kafka < LogStash::Inputs::Base
|
|
407
418
|
props.put(kafka::FETCH_MAX_WAIT_MS_CONFIG, fetch_max_wait_ms.to_s) unless fetch_max_wait_ms.nil?
|
408
419
|
props.put(kafka::FETCH_MIN_BYTES_CONFIG, fetch_min_bytes.to_s) unless fetch_min_bytes.nil?
|
409
420
|
props.put(kafka::GROUP_ID_CONFIG, group_id)
|
421
|
+
props.put(kafka::GROUP_INSTANCE_ID_CONFIG, group_instance_id) unless group_instance_id.nil?
|
410
422
|
props.put(kafka::HEARTBEAT_INTERVAL_MS_CONFIG, heartbeat_interval_ms.to_s) unless heartbeat_interval_ms.nil?
|
411
423
|
props.put(kafka::ISOLATION_LEVEL_CONFIG, isolation_level)
|
412
424
|
props.put(kafka::KEY_DESERIALIZER_CLASS_CONFIG, key_deserializer_class)
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Gem::Specification.new do |s|
|
2
2
|
s.name = 'logstash-integration-kafka'
|
3
|
-
s.version = '11.
|
3
|
+
s.version = '11.1.0'
|
4
4
|
s.licenses = ['Apache-2.0']
|
5
5
|
s.summary = "Integration with Kafka - input and output plugins"
|
6
6
|
s.description = "This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline "+
|
@@ -79,6 +79,7 @@ describe "inputs/kafka", :integration => true do
|
|
79
79
|
producer = org.apache.kafka.clients.producer.KafkaProducer.new(props)
|
80
80
|
|
81
81
|
producer.send(record)
|
82
|
+
producer.flush
|
82
83
|
producer.close
|
83
84
|
end
|
84
85
|
|
@@ -185,10 +186,105 @@ describe "inputs/kafka", :integration => true do
|
|
185
186
|
end
|
186
187
|
end
|
187
188
|
end
|
189
|
+
|
190
|
+
context "static membership 'group.instance.id' setting" do
|
191
|
+
let(:base_config) do
|
192
|
+
{
|
193
|
+
"topics" => ["logstash_integration_static_membership_topic"],
|
194
|
+
"group_id" => "logstash",
|
195
|
+
"consumer_threads" => 1,
|
196
|
+
# this is needed because the worker thread could be executed little after the producer sent the "up" message
|
197
|
+
"auto_offset_reset" => "earliest",
|
198
|
+
"group_instance_id" => "test_static_group_id"
|
199
|
+
}
|
200
|
+
end
|
201
|
+
let(:consumer_config) { base_config }
|
202
|
+
let(:logger) { double("logger") }
|
203
|
+
let(:queue) { java.util.concurrent.ArrayBlockingQueue.new(10) }
|
204
|
+
let(:kafka_input) { LogStash::Inputs::Kafka.new(consumer_config) }
|
205
|
+
before :each do
|
206
|
+
allow(LogStash::Inputs::Kafka).to receive(:logger).and_return(logger)
|
207
|
+
[:error, :warn, :info, :debug].each do |level|
|
208
|
+
allow(logger).to receive(level)
|
209
|
+
end
|
210
|
+
|
211
|
+
kafka_input.register
|
212
|
+
end
|
213
|
+
|
214
|
+
it "input plugin disconnects from the broker when another client with same static membership connects" do
|
215
|
+
expect(logger).to receive(:error).with("Another consumer with same group.instance.id has connected", anything)
|
216
|
+
|
217
|
+
input_worker = java.lang.Thread.new { kafka_input.run(queue) }
|
218
|
+
begin
|
219
|
+
input_worker.start
|
220
|
+
wait_kafka_input_is_ready("logstash_integration_static_membership_topic", queue)
|
221
|
+
saboteur_kafka_consumer = create_consumer_and_start_consuming("test_static_group_id")
|
222
|
+
saboteur_kafka_consumer.run # ask to be scheduled
|
223
|
+
saboteur_kafka_consumer.join
|
224
|
+
|
225
|
+
expect(saboteur_kafka_consumer.value).to eq("saboteur exited")
|
226
|
+
ensure
|
227
|
+
input_worker.join(30_000)
|
228
|
+
end
|
229
|
+
end
|
230
|
+
|
231
|
+
context "when the plugin is configured with multiple consumer threads" do
|
232
|
+
let(:consumer_config) { base_config.merge({"consumer_threads" => 2}) }
|
233
|
+
|
234
|
+
it "should avoid to connect with same 'group.instance.id'" do
|
235
|
+
expect(logger).to_not receive(:error).with("Another consumer with same group.instance.id has connected", anything)
|
236
|
+
|
237
|
+
input_worker = java.lang.Thread.new { kafka_input.run(queue) }
|
238
|
+
begin
|
239
|
+
input_worker.start
|
240
|
+
wait_kafka_input_is_ready("logstash_integration_static_membership_topic", queue)
|
241
|
+
ensure
|
242
|
+
kafka_input.stop
|
243
|
+
input_worker.join(1_000)
|
244
|
+
end
|
245
|
+
end
|
246
|
+
end
|
247
|
+
end
|
248
|
+
end
|
249
|
+
|
250
|
+
# return consumer Ruby Thread
|
251
|
+
def create_consumer_and_start_consuming(static_group_id)
|
252
|
+
props = java.util.Properties.new
|
253
|
+
kafka = org.apache.kafka.clients.consumer.ConsumerConfig
|
254
|
+
props.put(kafka::BOOTSTRAP_SERVERS_CONFIG, "localhost:9092")
|
255
|
+
props.put(kafka::KEY_DESERIALIZER_CLASS_CONFIG, LogStash::Inputs::Kafka::DEFAULT_DESERIALIZER_CLASS)
|
256
|
+
props.put(kafka::VALUE_DESERIALIZER_CLASS_CONFIG, LogStash::Inputs::Kafka::DEFAULT_DESERIALIZER_CLASS)
|
257
|
+
props.put(kafka::GROUP_ID_CONFIG, "logstash")
|
258
|
+
props.put(kafka::GROUP_INSTANCE_ID_CONFIG, static_group_id)
|
259
|
+
consumer = org.apache.kafka.clients.consumer.KafkaConsumer.new(props)
|
260
|
+
|
261
|
+
Thread.new do
|
262
|
+
LogStash::Util::set_thread_name("integration_test_simple_consumer")
|
263
|
+
begin
|
264
|
+
consumer.subscribe(["logstash_integration_static_membership_topic"])
|
265
|
+
records = consumer.poll(java.time.Duration.ofSeconds(3))
|
266
|
+
"saboteur exited"
|
267
|
+
rescue => e
|
268
|
+
e # return the exception reached in thread.value
|
269
|
+
ensure
|
270
|
+
consumer.close
|
271
|
+
end
|
272
|
+
end
|
188
273
|
end
|
189
274
|
|
190
275
|
private
|
191
276
|
|
277
|
+
def wait_kafka_input_is_ready(topic, queue)
|
278
|
+
# this is needed to give time to the kafka input to be up and running
|
279
|
+
header = org.apache.kafka.common.header.internals.RecordHeader.new("name", "Ping Up".to_java_bytes)
|
280
|
+
record = org.apache.kafka.clients.producer.ProducerRecord.new(topic, 0, "key", "value", [header])
|
281
|
+
send_message(record)
|
282
|
+
|
283
|
+
# Wait the message is processed
|
284
|
+
message = queue.poll(1, java.util.concurrent.TimeUnit::MINUTES)
|
285
|
+
expect(message).to_not eq(nil)
|
286
|
+
end
|
287
|
+
|
192
288
|
def consume_messages(config, queue: Queue.new, timeout:, event_count:)
|
193
289
|
kafka_input = LogStash::Inputs::Kafka.new(config)
|
194
290
|
kafka_input.register
|
@@ -297,7 +297,7 @@ describe LogStash::Inputs::Kafka do
|
|
297
297
|
to receive(:new).with(hash_including('client.rack' => 'EU-R1')).
|
298
298
|
and_return kafka_client = double('kafka-consumer')
|
299
299
|
|
300
|
-
expect( subject.send(:create_consumer, 'sample_client-0') ).to be kafka_client
|
300
|
+
expect( subject.send(:create_consumer, 'sample_client-0', 'group_instance_id') ).to be kafka_client
|
301
301
|
end
|
302
302
|
end
|
303
303
|
|
@@ -309,7 +309,7 @@ describe LogStash::Inputs::Kafka do
|
|
309
309
|
to receive(:new).with(hash_including('session.timeout.ms' => '25000', 'max.poll.interval.ms' => '345000')).
|
310
310
|
and_return kafka_client = double('kafka-consumer')
|
311
311
|
|
312
|
-
expect( subject.send(:create_consumer, 'sample_client-1') ).to be kafka_client
|
312
|
+
expect( subject.send(:create_consumer, 'sample_client-1', 'group_instance_id') ).to be kafka_client
|
313
313
|
end
|
314
314
|
end
|
315
315
|
|
@@ -321,7 +321,7 @@ describe LogStash::Inputs::Kafka do
|
|
321
321
|
to receive(:new).with(hash_including('session.timeout.ms' => '25200', 'max.poll.interval.ms' => '123000')).
|
322
322
|
and_return kafka_client = double('kafka-consumer')
|
323
323
|
|
324
|
-
expect( subject.send(:create_consumer, 'sample_client-2') ).to be kafka_client
|
324
|
+
expect( subject.send(:create_consumer, 'sample_client-2', 'group_instance_id') ).to be kafka_client
|
325
325
|
end
|
326
326
|
end
|
327
327
|
|
@@ -333,7 +333,7 @@ describe LogStash::Inputs::Kafka do
|
|
333
333
|
to receive(:new).with(hash_including('enable.auto.commit' => 'false', 'check.crcs' => 'true')).
|
334
334
|
and_return kafka_client = double('kafka-consumer')
|
335
335
|
|
336
|
-
expect( subject.send(:create_consumer, 'sample_client-3') ).to be kafka_client
|
336
|
+
expect( subject.send(:create_consumer, 'sample_client-3', 'group_instance_id') ).to be kafka_client
|
337
337
|
expect( subject.enable_auto_commit ).to be false
|
338
338
|
end
|
339
339
|
end
|
@@ -346,7 +346,7 @@ describe LogStash::Inputs::Kafka do
|
|
346
346
|
to receive(:new).with(hash_including('enable.auto.commit' => 'true', 'check.crcs' => 'false')).
|
347
347
|
and_return kafka_client = double('kafka-consumer')
|
348
348
|
|
349
|
-
expect( subject.send(:create_consumer, 'sample_client-4') ).to be kafka_client
|
349
|
+
expect( subject.send(:create_consumer, 'sample_client-4', 'group_instance_id') ).to be kafka_client
|
350
350
|
expect( subject.enable_auto_commit ).to be true
|
351
351
|
end
|
352
352
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: logstash-integration-kafka
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 11.
|
4
|
+
version: 11.1.0
|
5
5
|
platform: java
|
6
6
|
authors:
|
7
7
|
- Elastic
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2023-01-25 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
requirement: !ruby/object:Gem::Requirement
|