karafka 2.2.14 → 2.3.0.alpha2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (107) hide show
  1. checksums.yaml +4 -4
  2. checksums.yaml.gz.sig +0 -0
  3. data/.github/workflows/ci.yml +38 -12
  4. data/.ruby-version +1 -1
  5. data/CHANGELOG.md +24 -0
  6. data/Gemfile.lock +16 -16
  7. data/README.md +0 -2
  8. data/SECURITY.md +23 -0
  9. data/bin/integrations +1 -1
  10. data/config/locales/errors.yml +7 -1
  11. data/config/locales/pro_errors.yml +22 -0
  12. data/docker-compose.yml +1 -1
  13. data/karafka.gemspec +2 -2
  14. data/lib/karafka/admin/acl.rb +287 -0
  15. data/lib/karafka/admin.rb +9 -13
  16. data/lib/karafka/app.rb +5 -3
  17. data/lib/karafka/base_consumer.rb +9 -1
  18. data/lib/karafka/cli/base.rb +1 -1
  19. data/lib/karafka/connection/client.rb +83 -76
  20. data/lib/karafka/connection/conductor.rb +28 -0
  21. data/lib/karafka/connection/listener.rb +159 -42
  22. data/lib/karafka/connection/listeners_batch.rb +5 -11
  23. data/lib/karafka/connection/manager.rb +72 -0
  24. data/lib/karafka/connection/messages_buffer.rb +12 -0
  25. data/lib/karafka/connection/proxy.rb +17 -0
  26. data/lib/karafka/connection/status.rb +75 -0
  27. data/lib/karafka/contracts/config.rb +14 -10
  28. data/lib/karafka/contracts/consumer_group.rb +9 -1
  29. data/lib/karafka/contracts/topic.rb +3 -1
  30. data/lib/karafka/errors.rb +17 -0
  31. data/lib/karafka/instrumentation/logger_listener.rb +3 -0
  32. data/lib/karafka/instrumentation/notifications.rb +13 -5
  33. data/lib/karafka/instrumentation/vendors/appsignal/metrics_listener.rb +31 -28
  34. data/lib/karafka/instrumentation/vendors/datadog/logger_listener.rb +20 -1
  35. data/lib/karafka/instrumentation/vendors/datadog/metrics_listener.rb +15 -12
  36. data/lib/karafka/instrumentation/vendors/kubernetes/liveness_listener.rb +39 -36
  37. data/lib/karafka/pro/base_consumer.rb +47 -0
  38. data/lib/karafka/pro/connection/manager.rb +269 -0
  39. data/lib/karafka/pro/connection/multiplexing/listener.rb +40 -0
  40. data/lib/karafka/pro/iterator/tpl_builder.rb +1 -1
  41. data/lib/karafka/pro/iterator.rb +1 -6
  42. data/lib/karafka/pro/loader.rb +14 -0
  43. data/lib/karafka/pro/processing/coordinator.rb +2 -1
  44. data/lib/karafka/pro/processing/executor.rb +37 -0
  45. data/lib/karafka/pro/processing/expansions_selector.rb +32 -0
  46. data/lib/karafka/pro/processing/jobs/periodic.rb +41 -0
  47. data/lib/karafka/pro/processing/jobs/periodic_non_blocking.rb +32 -0
  48. data/lib/karafka/pro/processing/jobs_builder.rb +14 -3
  49. data/lib/karafka/pro/processing/offset_metadata/consumer.rb +44 -0
  50. data/lib/karafka/pro/processing/offset_metadata/fetcher.rb +131 -0
  51. data/lib/karafka/pro/processing/offset_metadata/listener.rb +46 -0
  52. data/lib/karafka/pro/processing/schedulers/base.rb +39 -23
  53. data/lib/karafka/pro/processing/schedulers/default.rb +12 -14
  54. data/lib/karafka/pro/processing/strategies/default.rb +154 -1
  55. data/lib/karafka/pro/processing/strategies/dlq/default.rb +39 -0
  56. data/lib/karafka/pro/processing/strategies/vp/default.rb +65 -25
  57. data/lib/karafka/pro/processing/virtual_offset_manager.rb +41 -11
  58. data/lib/karafka/pro/routing/features/long_running_job/topic.rb +2 -0
  59. data/lib/karafka/pro/routing/features/multiplexing/config.rb +38 -0
  60. data/lib/karafka/pro/routing/features/multiplexing/contracts/topic.rb +114 -0
  61. data/lib/karafka/pro/routing/features/multiplexing/patches/contracts/consumer_group.rb +42 -0
  62. data/lib/karafka/pro/routing/features/multiplexing/proxy.rb +38 -0
  63. data/lib/karafka/pro/routing/features/multiplexing/subscription_group.rb +42 -0
  64. data/lib/karafka/pro/routing/features/multiplexing/subscription_groups_builder.rb +40 -0
  65. data/lib/karafka/pro/routing/features/multiplexing.rb +59 -0
  66. data/lib/karafka/pro/routing/features/non_blocking_job/topic.rb +32 -0
  67. data/lib/karafka/pro/routing/features/non_blocking_job.rb +37 -0
  68. data/lib/karafka/pro/routing/features/offset_metadata/config.rb +33 -0
  69. data/lib/karafka/pro/routing/features/offset_metadata/contracts/topic.rb +42 -0
  70. data/lib/karafka/pro/routing/features/offset_metadata/topic.rb +65 -0
  71. data/lib/karafka/pro/routing/features/offset_metadata.rb +40 -0
  72. data/lib/karafka/pro/routing/features/patterns/contracts/consumer_group.rb +4 -0
  73. data/lib/karafka/pro/routing/features/patterns/detector.rb +18 -10
  74. data/lib/karafka/pro/routing/features/periodic_job/config.rb +37 -0
  75. data/lib/karafka/pro/routing/features/periodic_job/contracts/topic.rb +44 -0
  76. data/lib/karafka/pro/routing/features/periodic_job/topic.rb +94 -0
  77. data/lib/karafka/pro/routing/features/periodic_job.rb +27 -0
  78. data/lib/karafka/pro/routing/features/virtual_partitions/config.rb +1 -0
  79. data/lib/karafka/pro/routing/features/virtual_partitions/contracts/topic.rb +1 -0
  80. data/lib/karafka/pro/routing/features/virtual_partitions/topic.rb +7 -2
  81. data/lib/karafka/process.rb +5 -3
  82. data/lib/karafka/processing/coordinator.rb +5 -1
  83. data/lib/karafka/processing/executor.rb +16 -10
  84. data/lib/karafka/processing/executors_buffer.rb +19 -4
  85. data/lib/karafka/processing/schedulers/default.rb +3 -2
  86. data/lib/karafka/processing/strategies/default.rb +6 -0
  87. data/lib/karafka/processing/strategies/dlq.rb +36 -0
  88. data/lib/karafka/routing/builder.rb +12 -2
  89. data/lib/karafka/routing/consumer_group.rb +5 -5
  90. data/lib/karafka/routing/features/base.rb +44 -8
  91. data/lib/karafka/routing/features/dead_letter_queue/config.rb +6 -1
  92. data/lib/karafka/routing/features/dead_letter_queue/contracts/topic.rb +1 -0
  93. data/lib/karafka/routing/features/dead_letter_queue/topic.rb +9 -2
  94. data/lib/karafka/routing/subscription_group.rb +2 -2
  95. data/lib/karafka/routing/subscription_groups_builder.rb +11 -2
  96. data/lib/karafka/routing/topic.rb +8 -10
  97. data/lib/karafka/runner.rb +13 -3
  98. data/lib/karafka/server.rb +5 -9
  99. data/lib/karafka/setup/config.rb +17 -0
  100. data/lib/karafka/status.rb +23 -14
  101. data/lib/karafka/templates/karafka.rb.erb +7 -0
  102. data/lib/karafka/time_trackers/partition_usage.rb +56 -0
  103. data/lib/karafka/version.rb +1 -1
  104. data.tar.gz.sig +0 -0
  105. metadata +42 -10
  106. metadata.gz.sig +0 -0
  107. data/lib/karafka/connection/consumer_group_coordinator.rb +0 -48
@@ -10,6 +10,10 @@ module Karafka
10
10
  class Client
11
11
  attr_reader :rebalance_manager
12
12
 
13
+ # @return [Karafka::Routing::SubscriptionGroup] subscription group to which this client
14
+ # belongs to
15
+ attr_reader :subscription_group
16
+
13
17
  # @return [String] underlying consumer name
14
18
  # @note Consumer name may change in case we regenerate it
15
19
  attr_reader :name
@@ -20,16 +24,7 @@ module Karafka
20
24
  # How many times should we retry polling in case of a failure
21
25
  MAX_POLL_RETRIES = 20
22
26
 
23
- # 1 minute of max wait for the first rebalance before a forceful attempt
24
- # This applies only to a case when a short-lived Karafka instance with a client would be
25
- # closed before first rebalance. Mitigates a librdkafka bug.
26
- COOPERATIVE_STICKY_MAX_WAIT = 60_000
27
-
28
- # We want to make sure we never close several clients in the same moment to prevent
29
- # potential race conditions and other issues
30
- SHUTDOWN_MUTEX = Mutex.new
31
-
32
- private_constant :MAX_POLL_RETRIES, :SHUTDOWN_MUTEX, :COOPERATIVE_STICKY_MAX_WAIT
27
+ private_constant :MAX_POLL_RETRIES
33
28
 
34
29
  # Creates a new consumer instance.
35
30
  #
@@ -47,7 +42,6 @@ module Karafka
47
42
  @rebalance_manager = RebalanceManager.new(@subscription_group.id)
48
43
  @rebalance_callback = Instrumentation::Callbacks::Rebalance.new(@subscription_group)
49
44
  @events_poller = Helpers::IntervalRunner.new { events_poll }
50
- @kafka = build_consumer
51
45
  # There are few operations that can happen in parallel from the listener threads as well
52
46
  # as from the workers. They are not fully thread-safe because they may be composed out of
53
47
  # few calls to Kafka or out of few internal state changes. That is why we mutex them.
@@ -122,13 +116,19 @@ module Karafka
122
116
  # Stores offset for a given partition of a given topic based on the provided message.
123
117
  #
124
118
  # @param message [Karafka::Messages::Message]
125
- def store_offset(message)
126
- internal_store_offset(message)
119
+ # @param offset_metadata [String, nil] offset storage metadata or nil if none
120
+ def store_offset(message, offset_metadata = nil)
121
+ internal_store_offset(message, offset_metadata)
127
122
  end
128
123
 
129
124
  # @return [Boolean] true if our current assignment has been lost involuntarily.
130
125
  def assignment_lost?
131
- @kafka.assignment_lost?
126
+ kafka.assignment_lost?
127
+ end
128
+
129
+ # @return [Rdkafka::Consumer::TopicPartitionList] current active assignment
130
+ def assignment
131
+ kafka.assignment
132
132
  end
133
133
 
134
134
  # Commits the offset on a current consumer in a non-blocking or blocking way.
@@ -199,7 +199,7 @@ module Karafka
199
199
 
200
200
  @paused_tpls[topic][partition] = tpl
201
201
 
202
- @kafka.pause(tpl)
202
+ kafka.pause(tpl)
203
203
 
204
204
  # If offset is not provided, will pause where it finished.
205
205
  # This makes librdkafka not purge buffers and can provide significant network savings
@@ -240,43 +240,23 @@ module Karafka
240
240
  partition: partition
241
241
  )
242
242
 
243
- @kafka.resume(tpl)
243
+ kafka.resume(tpl)
244
244
  end
245
245
  end
246
246
 
247
247
  # Gracefully stops topic consumption.
248
- #
249
- # @note Stopping running consumers without a really important reason is not recommended
250
- # as until all the consumers are stopped, the server will keep running serving only
251
- # part of the messages
252
248
  def stop
253
- # This ensures, that we do not stop the underlying client until it passes the first
254
- # rebalance for cooperative-sticky. Otherwise librdkafka may crash
255
- #
256
- # We set a timeout just in case the rebalance would never happen or would last for an
257
- # extensive time period.
258
- #
259
- # @see https://github.com/confluentinc/librdkafka/issues/4312
249
+ # In case of cooperative-sticky, there is a bug in librdkafka that may hang it.
250
+ # To mitigate it we first need to unsubscribe so we will not receive any assignments and
251
+ # only then we should be good to go.
252
+ # @see https://github.com/confluentinc/librdkafka/issues/4527
260
253
  if @subscription_group.kafka[:'partition.assignment.strategy'] == 'cooperative-sticky'
261
- active_wait = false
262
-
263
- (COOPERATIVE_STICKY_MAX_WAIT / 100).times do
264
- # If we're past the first rebalance, no need to wait
265
- if @rebalance_manager.active?
266
- # We give it a a bit of time because librdkafka has a tendency to do some-post
267
- # callback work that from its perspective is still under rebalance
268
- sleep(5) if active_wait
269
-
270
- break
271
- end
272
-
273
- active_wait = true
274
-
275
- # poll to trigger potential rebalances that could occur during stopping and to trigger
276
- # potential callbacks
277
- poll(100)
254
+ unsubscribe
278
255
 
256
+ until assignment.empty?
279
257
  sleep(0.1)
258
+
259
+ ping
280
260
  end
281
261
  end
282
262
 
@@ -285,21 +265,23 @@ module Karafka
285
265
 
286
266
  # Marks given message as consumed.
287
267
  #
288
- # @param [Karafka::Messages::Message] message that we want to mark as processed
268
+ # @param message [Karafka::Messages::Message] message that we want to mark as processed
269
+ # @param metadata [String, nil] offset storage metadata or nil if none
289
270
  # @return [Boolean] true if successful. False if we no longer own given partition
290
271
  # @note This method won't trigger automatic offsets commits, rather relying on the offset
291
272
  # check-pointing trigger that happens with each batch processed. It will however check the
292
273
  # `librdkafka` assignment ownership to increase accuracy for involuntary revocations.
293
- def mark_as_consumed(message)
294
- store_offset(message) && !assignment_lost?
274
+ def mark_as_consumed(message, metadata = nil)
275
+ store_offset(message, metadata) && !assignment_lost?
295
276
  end
296
277
 
297
278
  # Marks a given message as consumed and commits the offsets in a blocking way.
298
279
  #
299
- # @param [Karafka::Messages::Message] message that we want to mark as processed
280
+ # @param message [Karafka::Messages::Message] message that we want to mark as processed
281
+ # @param metadata [String, nil] offset storage metadata or nil if none
300
282
  # @return [Boolean] true if successful. False if we no longer own given partition
301
- def mark_as_consumed!(message)
302
- return false unless mark_as_consumed(message)
283
+ def mark_as_consumed!(message, metadata = nil)
284
+ return false unless mark_as_consumed(message, metadata)
303
285
 
304
286
  commit_offsets!
305
287
  end
@@ -316,7 +298,6 @@ module Karafka
316
298
  @events_poller.reset
317
299
  @closed = false
318
300
  @paused_tpls.clear
319
- @kafka = build_consumer
320
301
  end
321
302
  end
322
303
 
@@ -343,7 +324,27 @@ module Karafka
343
324
  # @note It is non-blocking when timeout 0 and will not wait if queue empty. It costs up to
344
325
  # 2ms when no callbacks are triggered.
345
326
  def events_poll(timeout = 0)
346
- @kafka.events_poll(timeout)
327
+ kafka.events_poll(timeout)
328
+ end
329
+
330
+ # Returns pointer to the consumer group metadata. It is used only in the context of
331
+ # exactly-once-semantics in transactions, this is why it is never remapped to Ruby
332
+ # @return [FFI::Pointer]
333
+ def consumer_group_metadata_pointer
334
+ kafka.consumer_group_metadata_pointer
335
+ end
336
+
337
+ # Return the current committed offset per partition for this consumer group.
338
+ # The offset field of each requested partition will either be set to stored offset or to
339
+ # -1001 in case there was no stored offset for that partition.
340
+ #
341
+ # @param tpl [Rdkafka::Consumer::TopicPartitionList] for which we want to get committed
342
+ # @return [Rdkafka::Consumer::TopicPartitionList]
343
+ # @raise [Rdkafka::RdkafkaError] When getting the committed positions fails.
344
+ # @note It is recommended to use this only on rebalances to get positions with metadata
345
+ # when working with metadata as this is synchronous
346
+ def committed(tpl = nil)
347
+ Proxy.new(kafka).committed(tpl)
347
348
  end
348
349
 
349
350
  private
@@ -352,9 +353,10 @@ module Karafka
352
353
  #
353
354
  # Non thread-safe offset storing method
354
355
  # @param message [Karafka::Messages::Message]
356
+ # @param metadata [String, nil] offset storage metadata or nil if none
355
357
  # @return [Boolean] true if we could store the offset (if we still own the partition)
356
- def internal_store_offset(message)
357
- @kafka.store_offset(message)
358
+ def internal_store_offset(message, metadata)
359
+ kafka.store_offset(message, metadata)
358
360
  true
359
361
  rescue Rdkafka::RdkafkaError => e
360
362
  return false if e.code == :assignment_lost
@@ -370,7 +372,7 @@ module Karafka
370
372
  # even when no stored, because with sync commit, it refreshes the ownership state of the
371
373
  # consumer in a sync way.
372
374
  def internal_commit_offsets(async: true)
373
- @kafka.commit(nil, async)
375
+ kafka.commit(nil, async)
374
376
 
375
377
  true
376
378
  rescue Rdkafka::RdkafkaError => e
@@ -407,7 +409,7 @@ module Karafka
407
409
  message.partition => message.offset
408
410
  )
409
411
 
410
- proxy = Proxy.new(@kafka)
412
+ proxy = Proxy.new(kafka)
411
413
 
412
414
  # Now we can overwrite the seek message offset with our resolved offset and we can
413
415
  # then seek to the appropriate message
@@ -429,29 +431,29 @@ module Karafka
429
431
  # seeking and pausing
430
432
  return if message.offset == topic_partition_position(message.topic, message.partition)
431
433
 
432
- @kafka.seek(message)
434
+ kafka.seek(message)
433
435
  end
434
436
 
435
437
  # Commits the stored offsets in a sync way and closes the consumer.
436
438
  def close
437
- # Allow only one client to be closed at the same time
438
- SHUTDOWN_MUTEX.synchronize do
439
- # Once client is closed, we should not close it again
440
- # This could only happen in case of a race-condition when forceful shutdown happens
441
- # and triggers this from a different thread
442
- return if @closed
439
+ # Once client is closed, we should not close it again
440
+ # This could only happen in case of a race-condition when forceful shutdown happens
441
+ # and triggers this from a different thread
442
+ return if @closed
443
443
 
444
- @closed = true
444
+ @closed = true
445
445
 
446
- # Remove callbacks runners that were registered
447
- ::Karafka::Core::Instrumentation.statistics_callbacks.delete(@subscription_group.id)
448
- ::Karafka::Core::Instrumentation.error_callbacks.delete(@subscription_group.id)
446
+ return unless @kafka
449
447
 
450
- @kafka.close
451
- @buffer.clear
452
- # @note We do not clear rebalance manager here as we may still have revocation info
453
- # here that we want to consider valid prior to running another reconnection
454
- end
448
+ # Remove callbacks runners that were registered
449
+ ::Karafka::Core::Instrumentation.statistics_callbacks.delete(@subscription_group.id)
450
+ ::Karafka::Core::Instrumentation.error_callbacks.delete(@subscription_group.id)
451
+
452
+ kafka.close
453
+ @kafka = nil
454
+ @buffer.clear
455
+ # @note We do not clear rebalance manager here as we may still have revocation info
456
+ # here that we want to consider valid prior to running another reconnection
455
457
  end
456
458
 
457
459
  # Unsubscribes from all the subscriptions
@@ -459,7 +461,7 @@ module Karafka
459
461
  # @note We do not re-raise since this is supposed to be only used on close and can be safely
460
462
  # ignored. We do however want to instrument on it
461
463
  def unsubscribe
462
- @kafka.unsubscribe
464
+ kafka.unsubscribe
463
465
  rescue ::Rdkafka::RdkafkaError => e
464
466
  Karafka.monitor.instrument(
465
467
  'error.occurred',
@@ -473,7 +475,7 @@ module Karafka
473
475
  # @param partition [Integer]
474
476
  # @return [Rdkafka::Consumer::TopicPartitionList]
475
477
  def topic_partition_list(topic, partition)
476
- rdkafka_partition = @kafka
478
+ rdkafka_partition = kafka
477
479
  .assignment
478
480
  .to_h[topic]
479
481
  &.detect { |part| part.partition == partition }
@@ -492,7 +494,7 @@ module Karafka
492
494
  rd_partition = ::Rdkafka::Consumer::Partition.new(partition, nil, 0)
493
495
  tpl = ::Rdkafka::Consumer::TopicPartitionList.new(topic => [rd_partition])
494
496
 
495
- @kafka.position(tpl).to_h.fetch(topic).first.offset || -1
497
+ kafka.position(tpl).to_h.fetch(topic).first.offset || -1
496
498
  end
497
499
 
498
500
  # Performs a single poll operation and handles retries and errors
@@ -520,7 +522,7 @@ module Karafka
520
522
  # blocking events from being handled.
521
523
  poll_tick = timeout > @tick_interval ? @tick_interval : timeout
522
524
 
523
- result = @kafka.poll(poll_tick)
525
+ result = kafka.poll(poll_tick)
524
526
 
525
527
  # If we've got a message, we can return it
526
528
  return result if result
@@ -647,6 +649,11 @@ module Karafka
647
649
 
648
650
  @buffer.uniq!
649
651
  end
652
+
653
+ # @return [Rdkafka::Consumer] librdkafka consumer instance
654
+ def kafka
655
+ @kafka ||= build_consumer
656
+ end
650
657
  end
651
658
  end
652
659
  end
@@ -0,0 +1,28 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Connection
5
+ # Conductor is responsible for time orchestration of listeners manager.
6
+ # It blocks when manager is not needed as there were no state changes that could cause any
7
+ # listeners config changes and unblocks when things change or when certain time passed.
8
+ # The time based unblocking allows for building of complex managers that could be state aware
9
+ class Conductor
10
+ # @param max_interval [Integer] after how many milliseconds of doing nothing should we wake
11
+ # up the manager despite no state changes
12
+ def initialize(max_interval = 30_000)
13
+ @lock = RUBY_VERSION < '3.2' ? Processing::TimedQueue.new : Queue.new
14
+ @timeout = max_interval / 1_000.0
15
+ end
16
+
17
+ # Waits in a blocking way until it is time to manage listeners
18
+ def wait
19
+ @lock.pop(timeout: @timeout)
20
+ end
21
+
22
+ # Releases wait lock on state change
23
+ def signal
24
+ @lock << true
25
+ end
26
+ end
27
+ end
28
+ end