logstash-integration-kafka 11.0.0-java → 11.1.0-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: c0060f7684d8dd0787c1e106fdcc1b1b9673ec42cf2ffc01141dc5a2d9351967
4
- data.tar.gz: a0d878319e3ffde777330f9a15c64c23e898f16f8cde797622a804e9f4cbf89c
3
+ metadata.gz: c8f85bfdadbbd496495603c82ed577db4c23a168db59e6d0034549de6ebb66d1
4
+ data.tar.gz: 0b3b0bc33d6e64eebcb9e757fede56f747625c864160844ed2a3c76d0b22a155
5
5
  SHA512:
6
- metadata.gz: 721c864dff1a72f31cea49f7e3c674bdd827347b7b7a60c8ab1c2f5e88839205e3e94836cc4b664a55fd16107e0412b6885d15949c2ac1fd20798e82124fa502
7
- data.tar.gz: 49823c88d015acbdaf7b91d0fe64a9975ff22c9c8691c79c9270bed33aa174d3ea4f770c841e074c64ed82a1b092814817a63447d62fa17c84f9e3666bf89203
6
+ metadata.gz: '09e8814f1697c1d38d478881b35102a1e9885c23cdb0b9b2d8860c7577d4b2a40eb580b535afff46273e7139fbbaf603b0ea0de1fbeb68644bcea95b79d9e470'
7
+ data.tar.gz: 1b0c7ee3ffbcb589268174753db91d0ba7272fc44c04e7ceb2e17d43f9fe2ec31ed4eee7450390adc0bada77c36fedaf14d46bf9b09372a6b55a6e76047a56e4
data/CHANGELOG.md CHANGED
@@ -1,3 +1,6 @@
1
+ ## 11.1.0
2
+ - Added config `group_instance_id` to use the Kafka's consumer static membership feature [#135](https://github.com/logstash-plugins/logstash-integration-kafka/pull/135)
3
+
1
4
  ## 11.0.0
2
5
  - Changed Kafka client to 3.3.1, requires Logstash >= 8.3.0.
3
6
  - Deprecated `default` value for setting `client_dns_lookup` forcing to `use_all_dns_ips` when explicitly used [#130](https://github.com/logstash-plugins/logstash-integration-kafka/pull/130)
@@ -113,6 +113,7 @@ See the https://kafka.apache.org/{kafka_client_doc}/documentation for more detai
113
113
  | <<plugins-{type}s-{plugin}-fetch_max_wait_ms>> |<<number,number>>|No
114
114
  | <<plugins-{type}s-{plugin}-fetch_min_bytes>> |<<number,number>>|No
115
115
  | <<plugins-{type}s-{plugin}-group_id>> |<<string,string>>|No
116
+ | <<plugins-{type}s-{plugin}-group_instance_id>> |<<string,string>>|No
116
117
  | <<plugins-{type}s-{plugin}-heartbeat_interval_ms>> |<<number,number>>|No
117
118
  | <<plugins-{type}s-{plugin}-isolation_level>> |<<string,string>>|No
118
119
  | <<plugins-{type}s-{plugin}-jaas_path>> |a valid filesystem path|No
@@ -344,6 +345,28 @@ NOTE: In cases when multiple inputs are being used in a single pipeline, reading
344
345
  it's essential to set a different `group_id => ...` for each input. Setting a unique `client_id => ...`
345
346
  is also recommended.
346
347
 
348
+ [id="plugins-{type}s-{plugin}-group_instance_id"]
349
+ ===== `group_instance_id`
350
+
351
+ * Value type is <<string,string>>
352
+ * There is no default value for this setting.
353
+
354
+ The static membership identifier for this Logstash Kafka consumer. Static membership feature was introduced in
355
+ https://cwiki.apache.org/confluence/display/KAFKA/KIP-345%3A+Introduce+static+membership+protocol+to+reduce+consumer+rebalances[KIP-345],
356
+ available under Kafka property `group.instance.id`.
357
+ Its purpose is to avoid rebalances in situations in which a lot of data
358
+ has to be forwarded after a consumer goes offline.
359
+ This feature mitigates cases where the service state is heavy and the rebalance of one topic partition from instance
360
+ A to B would cause a huge amount of data to be transferred.
361
+ A client that goes offline/online frequently can avoid frequent and heavy rebalances by using this option.
362
+
363
+ NOTE: The `group_instance_id` setting must be unique across all the clients belonging to the same <<plugins-{type}s-{plugin}-group_id>>.
364
+ Otherwise, another client connecting with same `group.instance.id` value would cause the oldest instance to be disconnected.
365
+ You can set this value to use information such as a hostname, an IP, or anything that uniquely identifies the client application.
366
+
367
+ NOTE: In cases when multiple threads are configured and `consumer_threads` is greater than one, a suffix is appended to
368
+ the `group_instance_id` to avoid collisions.
369
+
347
370
  [id="plugins-{type}s-{plugin}-heartbeat_interval_ms"]
348
371
  ===== `heartbeat_interval_ms`
349
372
 
@@ -124,6 +124,11 @@ class LogStash::Inputs::Kafka < LogStash::Inputs::Base
124
124
  # that happens to be made up of multiple processors. Messages in a topic will be distributed to all
125
125
  # Logstash instances with the same `group_id`
126
126
  config :group_id, :validate => :string, :default => "logstash"
127
+ # Set a static group instance id used in static membership feature to avoid rebalancing when a
128
+ # consumer goes offline. If set and `consumer_threads` is greater than 1 then for each
129
+ # consumer crated by each thread an artificial suffix is appended to the user provided `group_instance_id`
130
+ # to avoid clashing.
131
+ config :group_instance_id, :validate => :string
127
132
  # The expected time between heartbeats to the consumer coordinator. Heartbeats are used to ensure
128
133
  # that the consumer's session stays active and to facilitate rebalancing when new
129
134
  # consumers join or leave the group. The value must be set lower than
@@ -136,7 +141,7 @@ class LogStash::Inputs::Kafka < LogStash::Inputs::Base
136
141
  # been aborted. Non-transactional messages will be returned unconditionally in either mode.
137
142
  config :isolation_level, :validate => ["read_uncommitted", "read_committed"], :default => "read_uncommitted" # Kafka default
138
143
  # Java Class used to deserialize the record's key
139
- config :key_deserializer_class, :validate => :string, :default => "org.apache.kafka.common.serialization.StringDeserializer"
144
+ config :key_deserializer_class, :validate => :string, :default => DEFAULT_DESERIALIZER_CLASS
140
145
  # The maximum delay between invocations of poll() when using consumer group management. This places
141
146
  # an upper bound on the amount of time that the consumer can be idle before fetching more records.
142
147
  # If poll() is not called before expiration of this timeout, then the consumer is considered failed and
@@ -287,7 +292,10 @@ class LogStash::Inputs::Kafka < LogStash::Inputs::Base
287
292
 
288
293
  public
289
294
  def run(logstash_queue)
290
- @runner_consumers = consumer_threads.times.map { |i| subscribe(create_consumer("#{client_id}-#{i}")) }
295
+ @runner_consumers = consumer_threads.times.map do |i|
296
+ thread_group_instance_id = consumer_threads > 1 && group_instance_id ? "#{group_instance_id}-#{i}" : group_instance_id
297
+ subscribe(create_consumer("#{client_id}-#{i}", thread_group_instance_id))
298
+ end
291
299
  @runner_threads = @runner_consumers.map.with_index { |consumer, i| thread_runner(logstash_queue, consumer,
292
300
  "kafka-input-worker-#{client_id}-#{i}") }
293
301
  @runner_threads.each(&:start)
@@ -335,6 +343,9 @@ class LogStash::Inputs::Kafka < LogStash::Inputs::Base
335
343
  rescue org.apache.kafka.common.errors.WakeupException => e
336
344
  logger.debug("Wake up from poll", :kafka_error_message => e)
337
345
  raise e unless stop?
346
+ rescue org.apache.kafka.common.errors.FencedInstanceIdException => e
347
+ logger.error("Another consumer with same group.instance.id has connected", :original_error_message => e.message)
348
+ raise e unless stop?
338
349
  rescue => e
339
350
  logger.error("Unable to poll Kafka consumer",
340
351
  :kafka_error_message => e,
@@ -389,7 +400,7 @@ class LogStash::Inputs::Kafka < LogStash::Inputs::Base
389
400
  end
390
401
 
391
402
  private
392
- def create_consumer(client_id)
403
+ def create_consumer(client_id, group_instance_id)
393
404
  begin
394
405
  props = java.util.Properties.new
395
406
  kafka = org.apache.kafka.clients.consumer.ConsumerConfig
@@ -407,6 +418,7 @@ class LogStash::Inputs::Kafka < LogStash::Inputs::Base
407
418
  props.put(kafka::FETCH_MAX_WAIT_MS_CONFIG, fetch_max_wait_ms.to_s) unless fetch_max_wait_ms.nil?
408
419
  props.put(kafka::FETCH_MIN_BYTES_CONFIG, fetch_min_bytes.to_s) unless fetch_min_bytes.nil?
409
420
  props.put(kafka::GROUP_ID_CONFIG, group_id)
421
+ props.put(kafka::GROUP_INSTANCE_ID_CONFIG, group_instance_id) unless group_instance_id.nil?
410
422
  props.put(kafka::HEARTBEAT_INTERVAL_MS_CONFIG, heartbeat_interval_ms.to_s) unless heartbeat_interval_ms.nil?
411
423
  props.put(kafka::ISOLATION_LEVEL_CONFIG, isolation_level)
412
424
  props.put(kafka::KEY_DESERIALIZER_CLASS_CONFIG, key_deserializer_class)
@@ -1,6 +1,6 @@
1
1
  Gem::Specification.new do |s|
2
2
  s.name = 'logstash-integration-kafka'
3
- s.version = '11.0.0'
3
+ s.version = '11.1.0'
4
4
  s.licenses = ['Apache-2.0']
5
5
  s.summary = "Integration with Kafka - input and output plugins"
6
6
  s.description = "This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline "+
@@ -79,6 +79,7 @@ describe "inputs/kafka", :integration => true do
79
79
  producer = org.apache.kafka.clients.producer.KafkaProducer.new(props)
80
80
 
81
81
  producer.send(record)
82
+ producer.flush
82
83
  producer.close
83
84
  end
84
85
 
@@ -185,10 +186,105 @@ describe "inputs/kafka", :integration => true do
185
186
  end
186
187
  end
187
188
  end
189
+
190
+ context "static membership 'group.instance.id' setting" do
191
+ let(:base_config) do
192
+ {
193
+ "topics" => ["logstash_integration_static_membership_topic"],
194
+ "group_id" => "logstash",
195
+ "consumer_threads" => 1,
196
+ # this is needed because the worker thread could be executed little after the producer sent the "up" message
197
+ "auto_offset_reset" => "earliest",
198
+ "group_instance_id" => "test_static_group_id"
199
+ }
200
+ end
201
+ let(:consumer_config) { base_config }
202
+ let(:logger) { double("logger") }
203
+ let(:queue) { java.util.concurrent.ArrayBlockingQueue.new(10) }
204
+ let(:kafka_input) { LogStash::Inputs::Kafka.new(consumer_config) }
205
+ before :each do
206
+ allow(LogStash::Inputs::Kafka).to receive(:logger).and_return(logger)
207
+ [:error, :warn, :info, :debug].each do |level|
208
+ allow(logger).to receive(level)
209
+ end
210
+
211
+ kafka_input.register
212
+ end
213
+
214
+ it "input plugin disconnects from the broker when another client with same static membership connects" do
215
+ expect(logger).to receive(:error).with("Another consumer with same group.instance.id has connected", anything)
216
+
217
+ input_worker = java.lang.Thread.new { kafka_input.run(queue) }
218
+ begin
219
+ input_worker.start
220
+ wait_kafka_input_is_ready("logstash_integration_static_membership_topic", queue)
221
+ saboteur_kafka_consumer = create_consumer_and_start_consuming("test_static_group_id")
222
+ saboteur_kafka_consumer.run # ask to be scheduled
223
+ saboteur_kafka_consumer.join
224
+
225
+ expect(saboteur_kafka_consumer.value).to eq("saboteur exited")
226
+ ensure
227
+ input_worker.join(30_000)
228
+ end
229
+ end
230
+
231
+ context "when the plugin is configured with multiple consumer threads" do
232
+ let(:consumer_config) { base_config.merge({"consumer_threads" => 2}) }
233
+
234
+ it "should avoid to connect with same 'group.instance.id'" do
235
+ expect(logger).to_not receive(:error).with("Another consumer with same group.instance.id has connected", anything)
236
+
237
+ input_worker = java.lang.Thread.new { kafka_input.run(queue) }
238
+ begin
239
+ input_worker.start
240
+ wait_kafka_input_is_ready("logstash_integration_static_membership_topic", queue)
241
+ ensure
242
+ kafka_input.stop
243
+ input_worker.join(1_000)
244
+ end
245
+ end
246
+ end
247
+ end
248
+ end
249
+
250
+ # return consumer Ruby Thread
251
+ def create_consumer_and_start_consuming(static_group_id)
252
+ props = java.util.Properties.new
253
+ kafka = org.apache.kafka.clients.consumer.ConsumerConfig
254
+ props.put(kafka::BOOTSTRAP_SERVERS_CONFIG, "localhost:9092")
255
+ props.put(kafka::KEY_DESERIALIZER_CLASS_CONFIG, LogStash::Inputs::Kafka::DEFAULT_DESERIALIZER_CLASS)
256
+ props.put(kafka::VALUE_DESERIALIZER_CLASS_CONFIG, LogStash::Inputs::Kafka::DEFAULT_DESERIALIZER_CLASS)
257
+ props.put(kafka::GROUP_ID_CONFIG, "logstash")
258
+ props.put(kafka::GROUP_INSTANCE_ID_CONFIG, static_group_id)
259
+ consumer = org.apache.kafka.clients.consumer.KafkaConsumer.new(props)
260
+
261
+ Thread.new do
262
+ LogStash::Util::set_thread_name("integration_test_simple_consumer")
263
+ begin
264
+ consumer.subscribe(["logstash_integration_static_membership_topic"])
265
+ records = consumer.poll(java.time.Duration.ofSeconds(3))
266
+ "saboteur exited"
267
+ rescue => e
268
+ e # return the exception reached in thread.value
269
+ ensure
270
+ consumer.close
271
+ end
272
+ end
188
273
  end
189
274
 
190
275
  private
191
276
 
277
+ def wait_kafka_input_is_ready(topic, queue)
278
+ # this is needed to give time to the kafka input to be up and running
279
+ header = org.apache.kafka.common.header.internals.RecordHeader.new("name", "Ping Up".to_java_bytes)
280
+ record = org.apache.kafka.clients.producer.ProducerRecord.new(topic, 0, "key", "value", [header])
281
+ send_message(record)
282
+
283
+ # Wait the message is processed
284
+ message = queue.poll(1, java.util.concurrent.TimeUnit::MINUTES)
285
+ expect(message).to_not eq(nil)
286
+ end
287
+
192
288
  def consume_messages(config, queue: Queue.new, timeout:, event_count:)
193
289
  kafka_input = LogStash::Inputs::Kafka.new(config)
194
290
  kafka_input.register
@@ -297,7 +297,7 @@ describe LogStash::Inputs::Kafka do
297
297
  to receive(:new).with(hash_including('client.rack' => 'EU-R1')).
298
298
  and_return kafka_client = double('kafka-consumer')
299
299
 
300
- expect( subject.send(:create_consumer, 'sample_client-0') ).to be kafka_client
300
+ expect( subject.send(:create_consumer, 'sample_client-0', 'group_instance_id') ).to be kafka_client
301
301
  end
302
302
  end
303
303
 
@@ -309,7 +309,7 @@ describe LogStash::Inputs::Kafka do
309
309
  to receive(:new).with(hash_including('session.timeout.ms' => '25000', 'max.poll.interval.ms' => '345000')).
310
310
  and_return kafka_client = double('kafka-consumer')
311
311
 
312
- expect( subject.send(:create_consumer, 'sample_client-1') ).to be kafka_client
312
+ expect( subject.send(:create_consumer, 'sample_client-1', 'group_instance_id') ).to be kafka_client
313
313
  end
314
314
  end
315
315
 
@@ -321,7 +321,7 @@ describe LogStash::Inputs::Kafka do
321
321
  to receive(:new).with(hash_including('session.timeout.ms' => '25200', 'max.poll.interval.ms' => '123000')).
322
322
  and_return kafka_client = double('kafka-consumer')
323
323
 
324
- expect( subject.send(:create_consumer, 'sample_client-2') ).to be kafka_client
324
+ expect( subject.send(:create_consumer, 'sample_client-2', 'group_instance_id') ).to be kafka_client
325
325
  end
326
326
  end
327
327
 
@@ -333,7 +333,7 @@ describe LogStash::Inputs::Kafka do
333
333
  to receive(:new).with(hash_including('enable.auto.commit' => 'false', 'check.crcs' => 'true')).
334
334
  and_return kafka_client = double('kafka-consumer')
335
335
 
336
- expect( subject.send(:create_consumer, 'sample_client-3') ).to be kafka_client
336
+ expect( subject.send(:create_consumer, 'sample_client-3', 'group_instance_id') ).to be kafka_client
337
337
  expect( subject.enable_auto_commit ).to be false
338
338
  end
339
339
  end
@@ -346,7 +346,7 @@ describe LogStash::Inputs::Kafka do
346
346
  to receive(:new).with(hash_including('enable.auto.commit' => 'true', 'check.crcs' => 'false')).
347
347
  and_return kafka_client = double('kafka-consumer')
348
348
 
349
- expect( subject.send(:create_consumer, 'sample_client-4') ).to be kafka_client
349
+ expect( subject.send(:create_consumer, 'sample_client-4', 'group_instance_id') ).to be kafka_client
350
350
  expect( subject.enable_auto_commit ).to be true
351
351
  end
352
352
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: logstash-integration-kafka
3
3
  version: !ruby/object:Gem::Version
4
- version: 11.0.0
4
+ version: 11.1.0
5
5
  platform: java
6
6
  authors:
7
7
  - Elastic
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-12-16 00:00:00.000000000 Z
11
+ date: 2023-01-25 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement