karafka 2.3.3 → 2.4.0.beta2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (127) hide show
  1. checksums.yaml +4 -4
  2. checksums.yaml.gz.sig +0 -0
  3. data/.github/workflows/ci.yml +12 -38
  4. data/CHANGELOG.md +59 -0
  5. data/Gemfile +6 -3
  6. data/Gemfile.lock +29 -27
  7. data/bin/integrations +1 -1
  8. data/config/locales/errors.yml +21 -2
  9. data/config/locales/pro_errors.yml +16 -1
  10. data/karafka.gemspec +4 -2
  11. data/lib/active_job/queue_adapters/karafka_adapter.rb +2 -0
  12. data/lib/karafka/admin/configs/config.rb +81 -0
  13. data/lib/karafka/admin/configs/resource.rb +88 -0
  14. data/lib/karafka/admin/configs.rb +103 -0
  15. data/lib/karafka/admin.rb +211 -90
  16. data/lib/karafka/base_consumer.rb +2 -2
  17. data/lib/karafka/cli/info.rb +9 -7
  18. data/lib/karafka/cli/server.rb +7 -7
  19. data/lib/karafka/cli/topics/align.rb +109 -0
  20. data/lib/karafka/cli/topics/base.rb +66 -0
  21. data/lib/karafka/cli/topics/create.rb +35 -0
  22. data/lib/karafka/cli/topics/delete.rb +30 -0
  23. data/lib/karafka/cli/topics/migrate.rb +31 -0
  24. data/lib/karafka/cli/topics/plan.rb +169 -0
  25. data/lib/karafka/cli/topics/repartition.rb +41 -0
  26. data/lib/karafka/cli/topics/reset.rb +18 -0
  27. data/lib/karafka/cli/topics.rb +13 -123
  28. data/lib/karafka/connection/client.rb +55 -37
  29. data/lib/karafka/connection/listener.rb +22 -17
  30. data/lib/karafka/connection/proxy.rb +93 -4
  31. data/lib/karafka/connection/status.rb +14 -2
  32. data/lib/karafka/constraints.rb +3 -3
  33. data/lib/karafka/contracts/config.rb +14 -1
  34. data/lib/karafka/contracts/topic.rb +1 -1
  35. data/lib/karafka/deserializers/headers.rb +15 -0
  36. data/lib/karafka/deserializers/key.rb +15 -0
  37. data/lib/karafka/deserializers/payload.rb +16 -0
  38. data/lib/karafka/embedded.rb +2 -0
  39. data/lib/karafka/helpers/async.rb +5 -2
  40. data/lib/karafka/helpers/colorize.rb +6 -0
  41. data/lib/karafka/instrumentation/callbacks/oauthbearer_token_refresh.rb +29 -0
  42. data/lib/karafka/instrumentation/logger_listener.rb +23 -3
  43. data/lib/karafka/instrumentation/notifications.rb +10 -0
  44. data/lib/karafka/instrumentation/vendors/appsignal/client.rb +16 -2
  45. data/lib/karafka/instrumentation/vendors/kubernetes/liveness_listener.rb +20 -0
  46. data/lib/karafka/messages/batch_metadata.rb +1 -1
  47. data/lib/karafka/messages/builders/batch_metadata.rb +1 -1
  48. data/lib/karafka/messages/builders/message.rb +10 -6
  49. data/lib/karafka/messages/message.rb +2 -1
  50. data/lib/karafka/messages/metadata.rb +20 -4
  51. data/lib/karafka/messages/parser.rb +1 -1
  52. data/lib/karafka/pro/base_consumer.rb +12 -23
  53. data/lib/karafka/pro/encryption/cipher.rb +7 -3
  54. data/lib/karafka/pro/encryption/contracts/config.rb +1 -0
  55. data/lib/karafka/pro/encryption/errors.rb +4 -1
  56. data/lib/karafka/pro/encryption/messages/middleware.rb +13 -11
  57. data/lib/karafka/pro/encryption/messages/parser.rb +22 -20
  58. data/lib/karafka/pro/encryption/setup/config.rb +5 -0
  59. data/lib/karafka/pro/iterator/expander.rb +2 -1
  60. data/lib/karafka/pro/iterator/tpl_builder.rb +38 -0
  61. data/lib/karafka/pro/iterator.rb +28 -2
  62. data/lib/karafka/pro/loader.rb +3 -0
  63. data/lib/karafka/pro/processing/coordinator.rb +15 -2
  64. data/lib/karafka/pro/processing/expansions_selector.rb +2 -0
  65. data/lib/karafka/pro/processing/jobs_queue.rb +122 -5
  66. data/lib/karafka/pro/processing/periodic_job/consumer.rb +67 -0
  67. data/lib/karafka/pro/processing/piping/consumer.rb +126 -0
  68. data/lib/karafka/pro/processing/strategies/aj/dlq_ftr_lrj_mom.rb +1 -1
  69. data/lib/karafka/pro/processing/strategies/aj/dlq_ftr_lrj_mom_vp.rb +1 -1
  70. data/lib/karafka/pro/processing/strategies/aj/dlq_ftr_mom.rb +1 -1
  71. data/lib/karafka/pro/processing/strategies/aj/dlq_ftr_mom_vp.rb +1 -1
  72. data/lib/karafka/pro/processing/strategies/aj/dlq_lrj_mom.rb +1 -1
  73. data/lib/karafka/pro/processing/strategies/aj/dlq_lrj_mom_vp.rb +1 -1
  74. data/lib/karafka/pro/processing/strategies/aj/dlq_mom.rb +1 -1
  75. data/lib/karafka/pro/processing/strategies/aj/dlq_mom_vp.rb +1 -1
  76. data/lib/karafka/pro/processing/strategies/aj/lrj_mom_vp.rb +2 -0
  77. data/lib/karafka/pro/processing/strategies/default.rb +5 -1
  78. data/lib/karafka/pro/processing/strategies/dlq/default.rb +21 -5
  79. data/lib/karafka/pro/processing/strategies/lrj/default.rb +2 -0
  80. data/lib/karafka/pro/processing/strategies/lrj/mom.rb +2 -0
  81. data/lib/karafka/pro/processing/subscription_groups_coordinator.rb +52 -0
  82. data/lib/karafka/pro/routing/features/direct_assignments/config.rb +27 -0
  83. data/lib/karafka/pro/routing/features/direct_assignments/contracts/consumer_group.rb +53 -0
  84. data/lib/karafka/pro/routing/features/direct_assignments/contracts/topic.rb +108 -0
  85. data/lib/karafka/pro/routing/features/direct_assignments/subscription_group.rb +77 -0
  86. data/lib/karafka/pro/routing/features/direct_assignments/topic.rb +69 -0
  87. data/lib/karafka/pro/routing/features/direct_assignments.rb +25 -0
  88. data/lib/karafka/pro/routing/features/patterns/builder.rb +1 -1
  89. data/lib/karafka/pro/routing/features/swarm/contracts/routing.rb +76 -0
  90. data/lib/karafka/pro/routing/features/swarm/contracts/topic.rb +16 -5
  91. data/lib/karafka/pro/routing/features/swarm/topic.rb +25 -2
  92. data/lib/karafka/pro/routing/features/swarm.rb +11 -0
  93. data/lib/karafka/pro/swarm/liveness_listener.rb +20 -0
  94. data/lib/karafka/processing/coordinator.rb +17 -8
  95. data/lib/karafka/processing/coordinators_buffer.rb +5 -2
  96. data/lib/karafka/processing/executor.rb +6 -2
  97. data/lib/karafka/processing/executors_buffer.rb +5 -2
  98. data/lib/karafka/processing/jobs_queue.rb +9 -4
  99. data/lib/karafka/processing/strategies/aj_dlq_mom.rb +1 -1
  100. data/lib/karafka/processing/strategies/default.rb +7 -1
  101. data/lib/karafka/processing/strategies/dlq.rb +17 -2
  102. data/lib/karafka/processing/workers_batch.rb +4 -1
  103. data/lib/karafka/routing/builder.rb +6 -2
  104. data/lib/karafka/routing/consumer_group.rb +2 -1
  105. data/lib/karafka/routing/features/dead_letter_queue/config.rb +5 -0
  106. data/lib/karafka/routing/features/dead_letter_queue/contracts/topic.rb +8 -0
  107. data/lib/karafka/routing/features/dead_letter_queue/topic.rb +10 -2
  108. data/lib/karafka/routing/features/deserializers/config.rb +18 -0
  109. data/lib/karafka/routing/features/deserializers/contracts/topic.rb +31 -0
  110. data/lib/karafka/routing/features/deserializers/topic.rb +51 -0
  111. data/lib/karafka/routing/features/deserializers.rb +11 -0
  112. data/lib/karafka/routing/proxy.rb +9 -14
  113. data/lib/karafka/routing/router.rb +11 -2
  114. data/lib/karafka/routing/subscription_group.rb +9 -1
  115. data/lib/karafka/routing/topic.rb +0 -1
  116. data/lib/karafka/runner.rb +1 -1
  117. data/lib/karafka/setup/config.rb +50 -9
  118. data/lib/karafka/status.rb +7 -8
  119. data/lib/karafka/swarm/supervisor.rb +16 -2
  120. data/lib/karafka/templates/karafka.rb.erb +28 -1
  121. data/lib/karafka/version.rb +1 -1
  122. data.tar.gz.sig +0 -0
  123. metadata +38 -12
  124. metadata.gz.sig +0 -0
  125. data/lib/karafka/routing/consumer_mapper.rb +0 -23
  126. data/lib/karafka/serialization/json/deserializer.rb +0 -19
  127. data/lib/karafka/time_trackers/partition_usage.rb +0 -56
@@ -30,8 +30,11 @@ module Karafka
30
30
  #
31
31
  # @param subscription_group [Karafka::Routing::SubscriptionGroup] subscription group
32
32
  # with all the configuration details needed for us to create a client
33
+ # @param batch_poll_breaker [Proc] proc that when evaluated to false will cause the batch
34
+ # poll loop to finish early. This improves the shutdown and dynamic multiplication as it
35
+ # allows us to early break on long polls.
33
36
  # @return [Karafka::Connection::Client]
34
- def initialize(subscription_group)
37
+ def initialize(subscription_group, batch_poll_breaker)
35
38
  @id = SecureRandom.hex(6)
36
39
  # Name is set when we build consumer
37
40
  @name = ''
@@ -41,7 +44,14 @@ module Karafka
41
44
  @tick_interval = ::Karafka::App.config.internal.tick_interval
42
45
  @rebalance_manager = RebalanceManager.new(@subscription_group.id)
43
46
  @rebalance_callback = Instrumentation::Callbacks::Rebalance.new(@subscription_group)
44
- @events_poller = Helpers::IntervalRunner.new { events_poll }
47
+
48
+ @interval_runner = Helpers::IntervalRunner.new do
49
+ events_poll
50
+ # events poller returns nil when not running often enough, hence we don't use the
51
+ # boolean to be explicit
52
+ batch_poll_breaker.call ? :run : :stop
53
+ end
54
+
45
55
  # There are few operations that can happen in parallel from the listener threads as well
46
56
  # as from the workers. They are not fully thread-safe because they may be composed out of
47
57
  # few calls to Kafka or out of few internal state changes. That is why we mutex them.
@@ -98,7 +108,8 @@ module Karafka
98
108
  break
99
109
  end
100
110
 
101
- @events_poller.call
111
+ # If we were signaled from the outside to break the loop, we should
112
+ break if @interval_runner.call == :stop
102
113
 
103
114
  # Track time spent on all of the processing and polling
104
115
  time_poll.checkpoint
@@ -295,7 +306,7 @@ module Karafka
295
306
  ) do
296
307
  close
297
308
 
298
- @events_poller.reset
309
+ @interval_runner.reset
299
310
  @closed = false
300
311
  @paused_tpls.clear
301
312
  end
@@ -344,7 +355,7 @@ module Karafka
344
355
  # @note It is recommended to use this only on rebalances to get positions with metadata
345
356
  # when working with metadata as this is synchronous
346
357
  def committed(tpl = nil)
347
- Proxy.new(kafka).committed(tpl)
358
+ @wrapped_kafka.committed(tpl)
348
359
  end
349
360
 
350
361
  private
@@ -356,13 +367,7 @@ module Karafka
356
367
  # @param metadata [String, nil] offset storage metadata or nil if none
357
368
  # @return [Boolean] true if we could store the offset (if we still own the partition)
358
369
  def internal_store_offset(message, metadata)
359
- kafka.store_offset(message, metadata)
360
- true
361
- rescue Rdkafka::RdkafkaError => e
362
- return false if e.code == :assignment_lost
363
- return false if e.code == :state
364
-
365
- raise e
370
+ @wrapped_kafka.store_offset(message, metadata)
366
371
  end
367
372
 
368
373
  # Non thread-safe message committing method
@@ -372,23 +377,7 @@ module Karafka
372
377
  # even when no stored, because with sync commit, it refreshes the ownership state of the
373
378
  # consumer in a sync way.
374
379
  def internal_commit_offsets(async: true)
375
- kafka.commit(nil, async)
376
-
377
- true
378
- rescue Rdkafka::RdkafkaError => e
379
- case e.code
380
- when :assignment_lost
381
- return false
382
- when :unknown_member_id
383
- return false
384
- when :no_offset
385
- return true
386
- when :coordinator_load_in_progress
387
- sleep(1)
388
- retry
389
- end
390
-
391
- raise e
380
+ @wrapped_kafka.commit_offsets(async: async)
392
381
  end
393
382
 
394
383
  # Non-mutexed seek that should be used only internally. Outside we expose `#seek` that is
@@ -409,12 +398,10 @@ module Karafka
409
398
  message.partition => message.offset
410
399
  )
411
400
 
412
- proxy = Proxy.new(kafka)
413
-
414
401
  # Now we can overwrite the seek message offset with our resolved offset and we can
415
402
  # then seek to the appropriate message
416
403
  # We set the timeout to 2_000 to make sure that remote clusters handle this well
417
- real_offsets = proxy.offsets_for_times(tpl)
404
+ real_offsets = @wrapped_kafka.offsets_for_times(tpl)
418
405
  detected_partition = real_offsets.to_h.dig(message.topic, message.partition)
419
406
 
420
407
  # There always needs to be an offset. In case we seek into the future, where there
@@ -445,12 +432,16 @@ module Karafka
445
432
 
446
433
  return unless @kafka
447
434
 
435
+ sg_id = @subscription_group.id
436
+
448
437
  # Remove callbacks runners that were registered
449
- ::Karafka::Core::Instrumentation.statistics_callbacks.delete(@subscription_group.id)
450
- ::Karafka::Core::Instrumentation.error_callbacks.delete(@subscription_group.id)
438
+ ::Karafka::Core::Instrumentation.statistics_callbacks.delete(sg_id)
439
+ ::Karafka::Core::Instrumentation.error_callbacks.delete(sg_id)
440
+ ::Karafka::Core::Instrumentation.oauthbearer_token_refresh_callbacks.delete(sg_id)
451
441
 
452
442
  kafka.close
453
443
  @kafka = nil
444
+ @wrapped_kafka = nil
454
445
  @buffer.clear
455
446
  # @note We do not clear rebalance manager here as we may still have revocation info
456
447
  # here that we want to consider valid prior to running another reconnection
@@ -571,6 +562,12 @@ module Karafka
571
562
 
572
563
  # No sense in retrying when no topic/partition and we're no longer running
573
564
  retryable = false unless Karafka::App.running?
565
+ # If we detect the end of partition which can happen if `enable.partition.eof` is set to
566
+ # true, we can just return nil fast. This will fast yield whatever set of messages we
567
+ # already have instead of waiting. This can be used for better latency control when we do
568
+ # not expect a lof of lag and want to quickly move to processing.
569
+ when :partition_eof
570
+ return nil
574
571
  end
575
572
 
576
573
  if early_report || !retryable
@@ -614,7 +611,8 @@ module Karafka
614
611
  # new messages. This allows us to report statistics while data is still being processed
615
612
  config.consumer_poll_set = false
616
613
 
617
- consumer = config.consumer
614
+ # Do not start native kafka so we can inject the oauth bearer callbacks if needed
615
+ consumer = config.consumer(native_kafka_auto_start: false)
618
616
  @name = consumer.name
619
617
 
620
618
  # Register statistics runner for this particular type of callbacks
@@ -637,9 +635,21 @@ module Karafka
637
635
  )
638
636
  )
639
637
 
638
+ ::Karafka::Core::Instrumentation.oauthbearer_token_refresh_callbacks.add(
639
+ @subscription_group.id,
640
+ Instrumentation::Callbacks::OauthbearerTokenRefresh.new(
641
+ consumer
642
+ )
643
+ )
644
+
640
645
  # Subscription needs to happen after we assigned the rebalance callbacks just in case of
641
646
  # a race condition
642
- consumer.subscribe(*@subscription_group.subscriptions)
647
+ subscriptions = @subscription_group.subscriptions
648
+ assignments = @subscription_group.assignments(consumer)
649
+
650
+ consumer.subscribe(*subscriptions) if subscriptions
651
+ consumer.assign(assignments) if assignments
652
+
643
653
  consumer
644
654
  end
645
655
 
@@ -659,7 +669,15 @@ module Karafka
659
669
 
660
670
  # @return [Rdkafka::Consumer] librdkafka consumer instance
661
671
  def kafka
662
- @kafka ||= build_consumer
672
+ return @kafka if @kafka
673
+
674
+ @kafka = build_consumer
675
+ @wrapped_kafka = Proxy.new(@kafka)
676
+ # We start it only after everything is configured so oauth or any other early-run client
677
+ # related operations can occur. Otherwise, if all kafka referencing setup would not be
678
+ # done, we could not intercept the invocations to kafka via client methods.
679
+ @kafka.start
680
+ @kafka
663
681
  end
664
682
  end
665
683
  end
@@ -36,7 +36,7 @@ module Karafka
36
36
  @subscription_group = subscription_group
37
37
  @jobs_queue = jobs_queue
38
38
  @coordinators = Processing::CoordinatorsBuffer.new(subscription_group.topics)
39
- @client = Client.new(@subscription_group)
39
+ @client = Client.new(@subscription_group, -> { running? })
40
40
  @executors = Processing::ExecutorsBuffer.new(@client, subscription_group)
41
41
  @jobs_builder = proc_config.jobs_builder
42
42
  @partitioner = proc_config.partitioner_class.new(subscription_group)
@@ -46,11 +46,18 @@ module Karafka
46
46
  # We can do this that way because we always first schedule jobs using messages before we
47
47
  # fetch another batch.
48
48
  @messages_buffer = MessagesBuffer.new(subscription_group)
49
- @usage_tracker = TimeTrackers::PartitionUsage.new
50
49
  @mutex = Mutex.new
51
50
  @status = Status.new
52
51
 
53
52
  @jobs_queue.register(@subscription_group.id)
53
+
54
+ # This makes sure that even if we tick more often than the interval time due to frequent
55
+ # unlocks from short-lived jobs or async queues synchronization, events handling and jobs
56
+ # scheduling still happens with the expected frequency
57
+ @interval_runner = Helpers::IntervalRunner.new do
58
+ @events_poller.call
59
+ @scheduler.on_manage
60
+ end
54
61
  end
55
62
 
56
63
  # Runs the main listener fetch loop.
@@ -103,7 +110,8 @@ module Karafka
103
110
  end
104
111
 
105
112
  @status.start!
106
- async_call
113
+
114
+ async_call("karafka.listener##{@subscription_group.id}")
107
115
  end
108
116
 
109
117
  # Stops the jobs queue, triggers shutdown on all the executors (sync), commits offsets and
@@ -277,15 +285,14 @@ module Karafka
277
285
 
278
286
  revoked_partitions.each do |topic, partitions|
279
287
  partitions.each do |partition|
280
- @usage_tracker.revoke(topic, partition)
281
288
  @coordinators.revoke(topic, partition)
282
289
 
283
290
  # There may be a case where we have lost partition of which data we have never
284
291
  # processed (if it was assigned and revoked really fast), thus we may not have it
285
292
  # here. In cases like this, we do not run a revocation job
286
293
  @executors.find_all(topic, partition).each do |executor|
287
- job = @jobs_builder.revoked(executor)
288
- jobs << job
294
+ executor.coordinator.increment(:revoked)
295
+ jobs << @jobs_builder.revoked(executor)
289
296
  end
290
297
 
291
298
  # We need to remove all the executors of a given topic partition that we have lost, so
@@ -308,6 +315,7 @@ module Karafka
308
315
  jobs = []
309
316
 
310
317
  @executors.each do |executor|
318
+ executor.coordinator.increment(:shutdown)
311
319
  job = @jobs_builder.shutdown(executor)
312
320
  jobs << job
313
321
  end
@@ -328,8 +336,6 @@ module Karafka
328
336
  idle_jobs = []
329
337
 
330
338
  @messages_buffer.each do |topic, partition, messages|
331
- @usage_tracker.track(topic, partition)
332
-
333
339
  coordinator = @coordinators.find_or_create(topic, partition)
334
340
  # Start work coordination for this topic partition
335
341
  coordinator.start(messages)
@@ -337,12 +343,13 @@ module Karafka
337
343
  # We do not increment coordinator for idle job because it's not a user related one
338
344
  # and it will not go through a standard lifecycle. Same applies to revoked and shutdown
339
345
  if messages.empty?
346
+ coordinator.increment(:idle)
340
347
  executor = @executors.find_or_create(topic, partition, 0, coordinator)
341
348
  idle_jobs << @jobs_builder.idle(executor)
342
349
  else
343
350
  @partitioner.call(topic, messages, coordinator) do |group_id, partition_messages|
351
+ coordinator.increment(:consume)
344
352
  executor = @executors.find_or_create(topic, partition, group_id, coordinator)
345
- coordinator.increment
346
353
  consume_jobs << @jobs_builder.consume(executor, partition_messages)
347
354
  end
348
355
  end
@@ -391,11 +398,11 @@ module Karafka
391
398
  interval = topic.periodic_job.interval
392
399
 
393
400
  partitions.each do |partition|
394
- # Skip if we were operating on a given topic partition recently
395
- next if @usage_tracker.active?(topic_name, partition, interval)
396
-
397
401
  coordinator = @coordinators.find_or_create(topic_name, partition)
398
402
 
403
+ # Skip if we were operating on a given topic partition recently
404
+ next if coordinator.active_within?(interval)
405
+
399
406
  # Do not tick if we do not want to tick during pauses
400
407
  next if coordinator.paused? && !topic.periodic_job.during_pause?
401
408
 
@@ -405,10 +412,8 @@ module Karafka
405
412
  # run (ok) but attempt 1 means, there was an error and we will retry
406
413
  next if coordinator.attempt.positive? && !topic.periodic_job.during_retry?
407
414
 
408
- # Track so we do not run periodic job again too soon
409
- @usage_tracker.track(topic_name, partition)
410
-
411
415
  @executors.find_all_or_create(topic_name, partition, coordinator).each do |executor|
416
+ coordinator.increment(:periodic)
412
417
  jobs << @jobs_builder.periodic(executor)
413
418
  end
414
419
  end
@@ -423,8 +428,7 @@ module Karafka
423
428
  # Waits for all the jobs from a given subscription group to finish before moving forward
424
429
  def wait
425
430
  @jobs_queue.wait(@subscription_group.id) do
426
- @events_poller.call
427
- @scheduler.on_manage
431
+ @interval_runner.call
428
432
  end
429
433
  end
430
434
 
@@ -462,6 +466,7 @@ module Karafka
462
466
  @events_poller.reset
463
467
  @client.reset
464
468
  @coordinators.reset
469
+ @interval_runner.reset
465
470
  @executors = Processing::ExecutorsBuffer.new(@client, @subscription_group)
466
471
  end
467
472
  end
@@ -11,12 +11,16 @@ module Karafka
11
11
  # it would be needed
12
12
  class Proxy < SimpleDelegator
13
13
  # Errors on which we want to retry
14
- RETRYABLE_ERRORS = %i[
14
+ # Includes temporary errors related to node not being (or not yet being) coordinator or a
15
+ # leader to a given set of partitions. Usually goes away after a retry
16
+ RETRYABLE_DEFAULT_ERRORS = %i[
15
17
  all_brokers_down
16
18
  timed_out
19
+ not_coordinator
20
+ not_leader_for_partition
17
21
  ].freeze
18
22
 
19
- private_constant :RETRYABLE_ERRORS
23
+ private_constant :RETRYABLE_DEFAULT_ERRORS
20
24
 
21
25
  attr_accessor :wrapped
22
26
 
@@ -42,6 +46,9 @@ module Karafka
42
46
  def query_watermark_offsets(topic, partition)
43
47
  l_config = @config.query_watermark_offsets
44
48
 
49
+ # For newly created topics or in cases where we're trying to get them but there is no
50
+ # leader, this can fail. It happens more often for new topics under KRaft, however we
51
+ # still want to make sure things operate as expected even then
45
52
  with_broker_errors_retry(
46
53
  # required to be in seconds, not ms
47
54
  wait_time: l_config.wait_time / 1_000.to_f,
@@ -85,6 +92,87 @@ module Karafka
85
92
  end
86
93
  end
87
94
 
95
+ # When we cannot store an offset, it means we no longer own the partition
96
+ #
97
+ # Non thread-safe offset storing method
98
+ # @param message [Karafka::Messages::Message]
99
+ # @param metadata [String, nil] offset storage metadata or nil if none
100
+ # @return [Boolean] true if we could store the offset (if we still own the partition)
101
+ def store_offset(message, metadata = nil)
102
+ @wrapped.store_offset(message, metadata)
103
+
104
+ true
105
+ rescue Rdkafka::RdkafkaError => e
106
+ return false if e.code == :assignment_lost
107
+ return false if e.code == :state
108
+
109
+ raise e
110
+ end
111
+
112
+ # Non thread-safe message committing method
113
+ # @param tpl [Rdkafka::Consumer::TopicPartitionList, nil] tpl or nil
114
+ # @param async [Boolean] should the commit happen async or sync (async by default)
115
+ # @return [Boolean] true if offset commit worked, false if we've lost the assignment
116
+ # @note We do **not** consider `no_offset` as any problem and we allow to commit offsets
117
+ # even when no stored, because with sync commit, it refreshes the ownership state of the
118
+ # consumer in a sync way.
119
+ def commit_offsets(tpl = nil, async: true)
120
+ c_config = @config.commit
121
+
122
+ with_broker_errors_retry(
123
+ wait_time: c_config.wait_time / 1_000.to_f,
124
+ max_attempts: c_config.max_attempts
125
+ ) do
126
+ @wrapped.commit(tpl, async)
127
+ end
128
+
129
+ true
130
+ rescue Rdkafka::RdkafkaError => e
131
+ case e.code
132
+ when :assignment_lost
133
+ return false
134
+ when :unknown_member_id
135
+ return false
136
+ when :no_offset
137
+ return true
138
+ when :coordinator_load_in_progress
139
+ sleep(1)
140
+ retry
141
+ end
142
+
143
+ raise e
144
+ end
145
+
146
+ # @param tpl [Rdkafka::Consumer::TopicPartitionList] list of topics and partitions for which
147
+ # we want to get the lag on the defined CG
148
+ # @return [Hash<String, Hash>] hash with topics and their partitions lags
149
+ def lag(tpl)
150
+ l_config = @config.committed
151
+
152
+ with_broker_errors_retry(
153
+ # required to be in seconds, not ms
154
+ wait_time: l_config.wait_time / 1_000.to_f,
155
+ max_attempts: l_config.max_attempts
156
+ ) do
157
+ @wrapped.lag(tpl, l_config.timeout)
158
+ end
159
+ end
160
+
161
+ # @param topic_name [String, nil] Name of the topic we're interested in or nil if we want to
162
+ # get info on all topics
163
+ # @return [Rdkafka::Metadata] rdkafka metadata object with the requested details
164
+ def metadata(topic_name = nil)
165
+ m_config = @config.metadata
166
+
167
+ with_broker_errors_retry(
168
+ # required to be in seconds, not ms
169
+ wait_time: m_config.wait_time / 1_000.to_f,
170
+ max_attempts: m_config.max_attempts
171
+ ) do
172
+ @wrapped.metadata(topic_name, m_config.timeout)
173
+ end
174
+ end
175
+
88
176
  private
89
177
 
90
178
  # Runs expected block of code with few retries on all_brokers_down
@@ -94,13 +182,14 @@ module Karafka
94
182
  # completely.
95
183
  # @param wait_time [Integer, Float] how many seconds should we wait. It uses `#sleep` of Ruby
96
184
  # so it needs time in seconds.
97
- def with_broker_errors_retry(max_attempts:, wait_time: 1)
185
+ # @param errors [Array<Symbol>] rdkafka errors we want to retry on
186
+ def with_broker_errors_retry(max_attempts:, wait_time: 1, errors: RETRYABLE_DEFAULT_ERRORS)
98
187
  attempt ||= 0
99
188
  attempt += 1
100
189
 
101
190
  yield
102
191
  rescue Rdkafka::RdkafkaError => e
103
- raise unless RETRYABLE_ERRORS.include?(e.code)
192
+ raise unless errors.include?(e.code)
104
193
 
105
194
  if attempt <= max_attempts
106
195
  sleep(wait_time)
@@ -5,6 +5,11 @@ module Karafka
5
5
  module Connection
6
6
  # Listener connection status representation
7
7
  class Status
8
+ include Helpers::ConfigImporter.new(
9
+ monitor: %i[monitor],
10
+ conductor: %i[internal connection conductor]
11
+ )
12
+
8
13
  # Available states and their transitions.
9
14
  STATES = {
10
15
  pending: :pending!,
@@ -26,7 +31,8 @@ module Karafka
26
31
  return if @status && STATES.keys.index(:#{state}) <= STATES.keys.index(@status)
27
32
 
28
33
  @status = :#{state}
29
- @conductor.signal
34
+ conductor.signal
35
+ monitor.instrument("connection.listener.#{state}", caller: self)
30
36
  end
31
37
  end
32
38
 
@@ -39,7 +45,6 @@ module Karafka
39
45
 
40
46
  def initialize
41
47
  @mutex = Mutex.new
42
- @conductor = Karafka::App.config.internal.connection.conductor
43
48
  pending!
44
49
  end
45
50
 
@@ -49,11 +54,18 @@ module Karafka
49
54
  def stop!
50
55
  if pending?
51
56
  @status = :stopping
57
+ conductor.signal
58
+ monitor.instrument('connection.listener.stopping', caller: self)
59
+
52
60
  stopped!
53
61
  elsif stopped?
54
62
  nil
63
+ elsif stopping?
64
+ nil
55
65
  else
56
66
  @status = :stopping
67
+ conductor.signal
68
+ monitor.instrument('connection.listener.stopping', caller: self)
57
69
  end
58
70
  end
59
71
 
@@ -15,13 +15,13 @@ module Karafka
15
15
  # Skip verification if web is not used at all
16
16
  return unless require_version('karafka/web')
17
17
 
18
- # All good if version higher than 0.8.1 because we expect 0.8.2 or higher
19
- return if version(Karafka::Web::VERSION) >= version('0.8.2')
18
+ # All good if version higher than 0.9.0.rc1 because we expect 0.9.0.rc1 or higher
19
+ return if version(Karafka::Web::VERSION) >= version('0.9.0.rc1')
20
20
 
21
21
  # If older web-ui used, we cannot allow it
22
22
  raise(
23
23
  Errors::DependencyConstraintsError,
24
- 'karafka-web < 0.8.2 is not compatible with this karafka version'
24
+ 'karafka-web < 0.9.0 is not compatible with this karafka version'
25
25
  )
26
26
  end
27
27
 
@@ -25,13 +25,14 @@ module Karafka
25
25
 
26
26
  required(:client_id) { |val| val.is_a?(String) && Contracts::TOPIC_REGEXP.match?(val) }
27
27
  required(:concurrency) { |val| val.is_a?(Integer) && val.positive? }
28
- required(:consumer_mapper) { |val| !val.nil? }
29
28
  required(:consumer_persistence) { |val| [true, false].include?(val) }
30
29
  required(:pause_timeout) { |val| val.is_a?(Integer) && val.positive? }
31
30
  required(:pause_max_timeout) { |val| val.is_a?(Integer) && val.positive? }
32
31
  required(:pause_with_exponential_backoff) { |val| [true, false].include?(val) }
32
+ required(:strict_topics_namespacing) { |val| [true, false].include?(val) }
33
33
  required(:shutdown_timeout) { |val| val.is_a?(Integer) && val.positive? }
34
34
  required(:max_wait_time) { |val| val.is_a?(Integer) && val.positive? }
35
+ required(:group_id) { |val| val.is_a?(String) && Contracts::TOPIC_REGEXP.match?(val) }
35
36
  required(:kafka) { |val| val.is_a?(Hash) && !val.empty? }
36
37
 
37
38
  nested(:swarm) do
@@ -39,6 +40,12 @@ module Karafka
39
40
  required(:node) { |val| val == false || val.is_a?(Karafka::Swarm::Node) }
40
41
  end
41
42
 
43
+ nested(:oauth) do
44
+ required(:token_provider_listener) do |val|
45
+ val == false || val.respond_to?(:on_oauthbearer_token_refresh)
46
+ end
47
+ end
48
+
42
49
  nested(:admin) do
43
50
  # Can be empty because inherits values from the root kafka
44
51
  required(:kafka) { |val| val.is_a?(Hash) }
@@ -74,11 +81,17 @@ module Karafka
74
81
  required(:conductor) { |val| !val.nil? }
75
82
 
76
83
  nested(:proxy) do
84
+ nested(:commit) do
85
+ required(:max_attempts) { |val| val.is_a?(Integer) && val.positive? }
86
+ required(:wait_time) { |val| val.is_a?(Integer) && val.positive? }
87
+ end
88
+
77
89
  # All of them have the same requirements
78
90
  %i[
79
91
  query_watermark_offsets
80
92
  offsets_for_times
81
93
  committed
94
+ metadata
82
95
  ].each do |scope|
83
96
  nested(scope) do
84
97
  required(:timeout) { |val| val.is_a?(Integer) && val.positive? }
@@ -12,7 +12,7 @@ module Karafka
12
12
  ).fetch('en').fetch('validations').fetch('topic')
13
13
  end
14
14
 
15
- required(:deserializer) { |val| !val.nil? }
15
+ required(:deserializers) { |val| !val.nil? }
16
16
  required(:id) { |val| val.is_a?(String) && Contracts::TOPIC_REGEXP.match?(val) }
17
17
  required(:kafka) { |val| val.is_a?(Hash) && !val.empty? }
18
18
  required(:max_messages) { |val| val.is_a?(Integer) && val >= 1 }
@@ -0,0 +1,15 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Deserializers
5
+ # Default message headers deserializer
6
+ class Headers
7
+ # @param metadata [Karafka::Messages::Metadata] metadata object from which we obtain the
8
+ # `#raw_headers`
9
+ # @return [Hash] expected message headers hash
10
+ def call(metadata)
11
+ metadata.raw_headers
12
+ end
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,15 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Deserializers
5
+ # Default message key deserializer
6
+ class Key
7
+ # @param metadata [Karafka::Messages::Metadata] metadata object from which we obtain the
8
+ # `#raw_key`
9
+ # @return [String, nil] expected message key in a string format or nil if no key
10
+ def call(metadata)
11
+ metadata.raw_key
12
+ end
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,16 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ # Module for all supported by default deserializers.
5
+ module Deserializers
6
+ # Default Karafka Json deserializer for loading JSON data in payload.
7
+ class Payload
8
+ # @param message [Karafka::Messages::Message] Message object that we want to deserialize
9
+ # @return [Hash] hash with deserialized JSON data
10
+ def call(message)
11
+ # nil payload can be present for example for tombstone messages
12
+ message.raw_payload.nil? ? nil : ::JSON.parse(message.raw_payload)
13
+ end
14
+ end
15
+ end
16
+ end
@@ -8,6 +8,8 @@ module Karafka
8
8
  # so it won't interrupt other things running
9
9
  def start
10
10
  Thread.new do
11
+ Thread.current.name = 'karafka.embedded'
12
+
11
13
  Karafka::Process.tags.add(:execution_mode, 'embedded')
12
14
  Karafka::Server.start
13
15
  end
@@ -22,16 +22,19 @@ module Karafka
22
22
  def included(base)
23
23
  base.extend ::Forwardable
24
24
 
25
- base.def_delegators :@thread, :join, :terminate, :alive?
25
+ base.def_delegators :@thread, :join, :terminate, :alive?, :name
26
26
  end
27
27
  end
28
28
 
29
29
  # Runs the `#call` method in a new thread
30
- def async_call
30
+ # @param thread_name [String] name that we want to assign to the thread when we start it
31
+ def async_call(thread_name = '')
31
32
  MUTEX.synchronize do
32
33
  return if @thread&.alive?
33
34
 
34
35
  @thread = Thread.new do
36
+ Thread.current.name = thread_name
37
+
35
38
  Thread.current.abort_on_exception = true
36
39
 
37
40
  call
@@ -21,6 +21,12 @@ module Karafka
21
21
  def yellow(string)
22
22
  "\033[1;33m#{string}\033[0m"
23
23
  end
24
+
25
+ # @param string [String] string we want to have in grey
26
+ # @return [String] grey string
27
+ def grey(string)
28
+ "\e[38;5;244m#{string}\e[0m"
29
+ end
24
30
  end
25
31
  end
26
32
  end