logstash-integration-kafka 11.0.0-java → 11.1.0-java

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: c0060f7684d8dd0787c1e106fdcc1b1b9673ec42cf2ffc01141dc5a2d9351967
4
- data.tar.gz: a0d878319e3ffde777330f9a15c64c23e898f16f8cde797622a804e9f4cbf89c
3
+ metadata.gz: c8f85bfdadbbd496495603c82ed577db4c23a168db59e6d0034549de6ebb66d1
4
+ data.tar.gz: 0b3b0bc33d6e64eebcb9e757fede56f747625c864160844ed2a3c76d0b22a155
5
5
  SHA512:
6
- metadata.gz: 721c864dff1a72f31cea49f7e3c674bdd827347b7b7a60c8ab1c2f5e88839205e3e94836cc4b664a55fd16107e0412b6885d15949c2ac1fd20798e82124fa502
7
- data.tar.gz: 49823c88d015acbdaf7b91d0fe64a9975ff22c9c8691c79c9270bed33aa174d3ea4f770c841e074c64ed82a1b092814817a63447d62fa17c84f9e3666bf89203
6
+ metadata.gz: '09e8814f1697c1d38d478881b35102a1e9885c23cdb0b9b2d8860c7577d4b2a40eb580b535afff46273e7139fbbaf603b0ea0de1fbeb68644bcea95b79d9e470'
7
+ data.tar.gz: 1b0c7ee3ffbcb589268174753db91d0ba7272fc44c04e7ceb2e17d43f9fe2ec31ed4eee7450390adc0bada77c36fedaf14d46bf9b09372a6b55a6e76047a56e4
data/CHANGELOG.md CHANGED
@@ -1,3 +1,6 @@
1
+ ## 11.1.0
2
+ - Added config `group_instance_id` to use the Kafka's consumer static membership feature [#135](https://github.com/logstash-plugins/logstash-integration-kafka/pull/135)
3
+
1
4
  ## 11.0.0
2
5
  - Changed Kafka client to 3.3.1, requires Logstash >= 8.3.0.
3
6
  - Deprecated `default` value for setting `client_dns_lookup` forcing to `use_all_dns_ips` when explicitly used [#130](https://github.com/logstash-plugins/logstash-integration-kafka/pull/130)
@@ -113,6 +113,7 @@ See the https://kafka.apache.org/{kafka_client_doc}/documentation for more detai
113
113
  | <<plugins-{type}s-{plugin}-fetch_max_wait_ms>> |<<number,number>>|No
114
114
  | <<plugins-{type}s-{plugin}-fetch_min_bytes>> |<<number,number>>|No
115
115
  | <<plugins-{type}s-{plugin}-group_id>> |<<string,string>>|No
116
+ | <<plugins-{type}s-{plugin}-group_instance_id>> |<<string,string>>|No
116
117
  | <<plugins-{type}s-{plugin}-heartbeat_interval_ms>> |<<number,number>>|No
117
118
  | <<plugins-{type}s-{plugin}-isolation_level>> |<<string,string>>|No
118
119
  | <<plugins-{type}s-{plugin}-jaas_path>> |a valid filesystem path|No
@@ -344,6 +345,28 @@ NOTE: In cases when multiple inputs are being used in a single pipeline, reading
344
345
  it's essential to set a different `group_id => ...` for each input. Setting a unique `client_id => ...`
345
346
  is also recommended.
346
347
 
348
+ [id="plugins-{type}s-{plugin}-group_instance_id"]
349
+ ===== `group_instance_id`
350
+
351
+ * Value type is <<string,string>>
352
+ * There is no default value for this setting.
353
+
354
+ The static membership identifier for this Logstash Kafka consumer. Static membership feature was introduced in
355
+ https://cwiki.apache.org/confluence/display/KAFKA/KIP-345%3A+Introduce+static+membership+protocol+to+reduce+consumer+rebalances[KIP-345],
356
+ available under Kafka property `group.instance.id`.
357
+ Its purpose is to avoid rebalances in situations in which a lot of data
358
+ has to be forwarded after a consumer goes offline.
359
+ This feature mitigates cases where the service state is heavy and the rebalance of one topic partition from instance
360
+ A to B would cause a huge amount of data to be transferred.
361
+ A client that goes offline/online frequently can avoid frequent and heavy rebalances by using this option.
362
+
363
+ NOTE: The `group_instance_id` setting must be unique across all the clients belonging to the same <<plugins-{type}s-{plugin}-group_id>>.
364
+ Otherwise, another client connecting with same `group.instance.id` value would cause the oldest instance to be disconnected.
365
+ You can set this value to use information such as a hostname, an IP, or anything that uniquely identifies the client application.
366
+
367
+ NOTE: In cases when multiple threads are configured and `consumer_threads` is greater than one, a suffix is appended to
368
+ the `group_instance_id` to avoid collisions.
369
+
347
370
  [id="plugins-{type}s-{plugin}-heartbeat_interval_ms"]
348
371
  ===== `heartbeat_interval_ms`
349
372
 
@@ -124,6 +124,11 @@ class LogStash::Inputs::Kafka < LogStash::Inputs::Base
124
124
  # that happens to be made up of multiple processors. Messages in a topic will be distributed to all
125
125
  # Logstash instances with the same `group_id`
126
126
  config :group_id, :validate => :string, :default => "logstash"
127
+ # Set a static group instance id used in static membership feature to avoid rebalancing when a
128
+ # consumer goes offline. If set and `consumer_threads` is greater than 1 then for each
129
+ # consumer crated by each thread an artificial suffix is appended to the user provided `group_instance_id`
130
+ # to avoid clashing.
131
+ config :group_instance_id, :validate => :string
127
132
  # The expected time between heartbeats to the consumer coordinator. Heartbeats are used to ensure
128
133
  # that the consumer's session stays active and to facilitate rebalancing when new
129
134
  # consumers join or leave the group. The value must be set lower than
@@ -136,7 +141,7 @@ class LogStash::Inputs::Kafka < LogStash::Inputs::Base
136
141
  # been aborted. Non-transactional messages will be returned unconditionally in either mode.
137
142
  config :isolation_level, :validate => ["read_uncommitted", "read_committed"], :default => "read_uncommitted" # Kafka default
138
143
  # Java Class used to deserialize the record's key
139
- config :key_deserializer_class, :validate => :string, :default => "org.apache.kafka.common.serialization.StringDeserializer"
144
+ config :key_deserializer_class, :validate => :string, :default => DEFAULT_DESERIALIZER_CLASS
140
145
  # The maximum delay between invocations of poll() when using consumer group management. This places
141
146
  # an upper bound on the amount of time that the consumer can be idle before fetching more records.
142
147
  # If poll() is not called before expiration of this timeout, then the consumer is considered failed and
@@ -287,7 +292,10 @@ class LogStash::Inputs::Kafka < LogStash::Inputs::Base
287
292
 
288
293
  public
289
294
  def run(logstash_queue)
290
- @runner_consumers = consumer_threads.times.map { |i| subscribe(create_consumer("#{client_id}-#{i}")) }
295
+ @runner_consumers = consumer_threads.times.map do |i|
296
+ thread_group_instance_id = consumer_threads > 1 && group_instance_id ? "#{group_instance_id}-#{i}" : group_instance_id
297
+ subscribe(create_consumer("#{client_id}-#{i}", thread_group_instance_id))
298
+ end
291
299
  @runner_threads = @runner_consumers.map.with_index { |consumer, i| thread_runner(logstash_queue, consumer,
292
300
  "kafka-input-worker-#{client_id}-#{i}") }
293
301
  @runner_threads.each(&:start)
@@ -335,6 +343,9 @@ class LogStash::Inputs::Kafka < LogStash::Inputs::Base
335
343
  rescue org.apache.kafka.common.errors.WakeupException => e
336
344
  logger.debug("Wake up from poll", :kafka_error_message => e)
337
345
  raise e unless stop?
346
+ rescue org.apache.kafka.common.errors.FencedInstanceIdException => e
347
+ logger.error("Another consumer with same group.instance.id has connected", :original_error_message => e.message)
348
+ raise e unless stop?
338
349
  rescue => e
339
350
  logger.error("Unable to poll Kafka consumer",
340
351
  :kafka_error_message => e,
@@ -389,7 +400,7 @@ class LogStash::Inputs::Kafka < LogStash::Inputs::Base
389
400
  end
390
401
 
391
402
  private
392
- def create_consumer(client_id)
403
+ def create_consumer(client_id, group_instance_id)
393
404
  begin
394
405
  props = java.util.Properties.new
395
406
  kafka = org.apache.kafka.clients.consumer.ConsumerConfig
@@ -407,6 +418,7 @@ class LogStash::Inputs::Kafka < LogStash::Inputs::Base
407
418
  props.put(kafka::FETCH_MAX_WAIT_MS_CONFIG, fetch_max_wait_ms.to_s) unless fetch_max_wait_ms.nil?
408
419
  props.put(kafka::FETCH_MIN_BYTES_CONFIG, fetch_min_bytes.to_s) unless fetch_min_bytes.nil?
409
420
  props.put(kafka::GROUP_ID_CONFIG, group_id)
421
+ props.put(kafka::GROUP_INSTANCE_ID_CONFIG, group_instance_id) unless group_instance_id.nil?
410
422
  props.put(kafka::HEARTBEAT_INTERVAL_MS_CONFIG, heartbeat_interval_ms.to_s) unless heartbeat_interval_ms.nil?
411
423
  props.put(kafka::ISOLATION_LEVEL_CONFIG, isolation_level)
412
424
  props.put(kafka::KEY_DESERIALIZER_CLASS_CONFIG, key_deserializer_class)
@@ -1,6 +1,6 @@
1
1
  Gem::Specification.new do |s|
2
2
  s.name = 'logstash-integration-kafka'
3
- s.version = '11.0.0'
3
+ s.version = '11.1.0'
4
4
  s.licenses = ['Apache-2.0']
5
5
  s.summary = "Integration with Kafka - input and output plugins"
6
6
  s.description = "This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline "+
@@ -79,6 +79,7 @@ describe "inputs/kafka", :integration => true do
79
79
  producer = org.apache.kafka.clients.producer.KafkaProducer.new(props)
80
80
 
81
81
  producer.send(record)
82
+ producer.flush
82
83
  producer.close
83
84
  end
84
85
 
@@ -185,10 +186,105 @@ describe "inputs/kafka", :integration => true do
185
186
  end
186
187
  end
187
188
  end
189
+
190
+ context "static membership 'group.instance.id' setting" do
191
+ let(:base_config) do
192
+ {
193
+ "topics" => ["logstash_integration_static_membership_topic"],
194
+ "group_id" => "logstash",
195
+ "consumer_threads" => 1,
196
+ # this is needed because the worker thread could be executed little after the producer sent the "up" message
197
+ "auto_offset_reset" => "earliest",
198
+ "group_instance_id" => "test_static_group_id"
199
+ }
200
+ end
201
+ let(:consumer_config) { base_config }
202
+ let(:logger) { double("logger") }
203
+ let(:queue) { java.util.concurrent.ArrayBlockingQueue.new(10) }
204
+ let(:kafka_input) { LogStash::Inputs::Kafka.new(consumer_config) }
205
+ before :each do
206
+ allow(LogStash::Inputs::Kafka).to receive(:logger).and_return(logger)
207
+ [:error, :warn, :info, :debug].each do |level|
208
+ allow(logger).to receive(level)
209
+ end
210
+
211
+ kafka_input.register
212
+ end
213
+
214
+ it "input plugin disconnects from the broker when another client with same static membership connects" do
215
+ expect(logger).to receive(:error).with("Another consumer with same group.instance.id has connected", anything)
216
+
217
+ input_worker = java.lang.Thread.new { kafka_input.run(queue) }
218
+ begin
219
+ input_worker.start
220
+ wait_kafka_input_is_ready("logstash_integration_static_membership_topic", queue)
221
+ saboteur_kafka_consumer = create_consumer_and_start_consuming("test_static_group_id")
222
+ saboteur_kafka_consumer.run # ask to be scheduled
223
+ saboteur_kafka_consumer.join
224
+
225
+ expect(saboteur_kafka_consumer.value).to eq("saboteur exited")
226
+ ensure
227
+ input_worker.join(30_000)
228
+ end
229
+ end
230
+
231
+ context "when the plugin is configured with multiple consumer threads" do
232
+ let(:consumer_config) { base_config.merge({"consumer_threads" => 2}) }
233
+
234
+ it "should avoid to connect with same 'group.instance.id'" do
235
+ expect(logger).to_not receive(:error).with("Another consumer with same group.instance.id has connected", anything)
236
+
237
+ input_worker = java.lang.Thread.new { kafka_input.run(queue) }
238
+ begin
239
+ input_worker.start
240
+ wait_kafka_input_is_ready("logstash_integration_static_membership_topic", queue)
241
+ ensure
242
+ kafka_input.stop
243
+ input_worker.join(1_000)
244
+ end
245
+ end
246
+ end
247
+ end
248
+ end
249
+
250
+ # return consumer Ruby Thread
251
+ def create_consumer_and_start_consuming(static_group_id)
252
+ props = java.util.Properties.new
253
+ kafka = org.apache.kafka.clients.consumer.ConsumerConfig
254
+ props.put(kafka::BOOTSTRAP_SERVERS_CONFIG, "localhost:9092")
255
+ props.put(kafka::KEY_DESERIALIZER_CLASS_CONFIG, LogStash::Inputs::Kafka::DEFAULT_DESERIALIZER_CLASS)
256
+ props.put(kafka::VALUE_DESERIALIZER_CLASS_CONFIG, LogStash::Inputs::Kafka::DEFAULT_DESERIALIZER_CLASS)
257
+ props.put(kafka::GROUP_ID_CONFIG, "logstash")
258
+ props.put(kafka::GROUP_INSTANCE_ID_CONFIG, static_group_id)
259
+ consumer = org.apache.kafka.clients.consumer.KafkaConsumer.new(props)
260
+
261
+ Thread.new do
262
+ LogStash::Util::set_thread_name("integration_test_simple_consumer")
263
+ begin
264
+ consumer.subscribe(["logstash_integration_static_membership_topic"])
265
+ records = consumer.poll(java.time.Duration.ofSeconds(3))
266
+ "saboteur exited"
267
+ rescue => e
268
+ e # return the exception reached in thread.value
269
+ ensure
270
+ consumer.close
271
+ end
272
+ end
188
273
  end
189
274
 
190
275
  private
191
276
 
277
+ def wait_kafka_input_is_ready(topic, queue)
278
+ # this is needed to give time to the kafka input to be up and running
279
+ header = org.apache.kafka.common.header.internals.RecordHeader.new("name", "Ping Up".to_java_bytes)
280
+ record = org.apache.kafka.clients.producer.ProducerRecord.new(topic, 0, "key", "value", [header])
281
+ send_message(record)
282
+
283
+ # Wait the message is processed
284
+ message = queue.poll(1, java.util.concurrent.TimeUnit::MINUTES)
285
+ expect(message).to_not eq(nil)
286
+ end
287
+
192
288
  def consume_messages(config, queue: Queue.new, timeout:, event_count:)
193
289
  kafka_input = LogStash::Inputs::Kafka.new(config)
194
290
  kafka_input.register
@@ -297,7 +297,7 @@ describe LogStash::Inputs::Kafka do
297
297
  to receive(:new).with(hash_including('client.rack' => 'EU-R1')).
298
298
  and_return kafka_client = double('kafka-consumer')
299
299
 
300
- expect( subject.send(:create_consumer, 'sample_client-0') ).to be kafka_client
300
+ expect( subject.send(:create_consumer, 'sample_client-0', 'group_instance_id') ).to be kafka_client
301
301
  end
302
302
  end
303
303
 
@@ -309,7 +309,7 @@ describe LogStash::Inputs::Kafka do
309
309
  to receive(:new).with(hash_including('session.timeout.ms' => '25000', 'max.poll.interval.ms' => '345000')).
310
310
  and_return kafka_client = double('kafka-consumer')
311
311
 
312
- expect( subject.send(:create_consumer, 'sample_client-1') ).to be kafka_client
312
+ expect( subject.send(:create_consumer, 'sample_client-1', 'group_instance_id') ).to be kafka_client
313
313
  end
314
314
  end
315
315
 
@@ -321,7 +321,7 @@ describe LogStash::Inputs::Kafka do
321
321
  to receive(:new).with(hash_including('session.timeout.ms' => '25200', 'max.poll.interval.ms' => '123000')).
322
322
  and_return kafka_client = double('kafka-consumer')
323
323
 
324
- expect( subject.send(:create_consumer, 'sample_client-2') ).to be kafka_client
324
+ expect( subject.send(:create_consumer, 'sample_client-2', 'group_instance_id') ).to be kafka_client
325
325
  end
326
326
  end
327
327
 
@@ -333,7 +333,7 @@ describe LogStash::Inputs::Kafka do
333
333
  to receive(:new).with(hash_including('enable.auto.commit' => 'false', 'check.crcs' => 'true')).
334
334
  and_return kafka_client = double('kafka-consumer')
335
335
 
336
- expect( subject.send(:create_consumer, 'sample_client-3') ).to be kafka_client
336
+ expect( subject.send(:create_consumer, 'sample_client-3', 'group_instance_id') ).to be kafka_client
337
337
  expect( subject.enable_auto_commit ).to be false
338
338
  end
339
339
  end
@@ -346,7 +346,7 @@ describe LogStash::Inputs::Kafka do
346
346
  to receive(:new).with(hash_including('enable.auto.commit' => 'true', 'check.crcs' => 'false')).
347
347
  and_return kafka_client = double('kafka-consumer')
348
348
 
349
- expect( subject.send(:create_consumer, 'sample_client-4') ).to be kafka_client
349
+ expect( subject.send(:create_consumer, 'sample_client-4', 'group_instance_id') ).to be kafka_client
350
350
  expect( subject.enable_auto_commit ).to be true
351
351
  end
352
352
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: logstash-integration-kafka
3
3
  version: !ruby/object:Gem::Version
4
- version: 11.0.0
4
+ version: 11.1.0
5
5
  platform: java
6
6
  authors:
7
7
  - Elastic
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-12-16 00:00:00.000000000 Z
11
+ date: 2023-01-25 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement