karafka 2.3.4 → 2.4.0.beta1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (126) hide show
  1. checksums.yaml +4 -4
  2. checksums.yaml.gz.sig +0 -0
  3. data/.github/workflows/ci.yml +12 -38
  4. data/CHANGELOG.md +56 -2
  5. data/Gemfile +6 -3
  6. data/Gemfile.lock +25 -23
  7. data/bin/integrations +1 -1
  8. data/config/locales/errors.yml +21 -2
  9. data/config/locales/pro_errors.yml +16 -1
  10. data/karafka.gemspec +4 -2
  11. data/lib/active_job/queue_adapters/karafka_adapter.rb +2 -0
  12. data/lib/karafka/admin/configs/config.rb +81 -0
  13. data/lib/karafka/admin/configs/resource.rb +88 -0
  14. data/lib/karafka/admin/configs.rb +103 -0
  15. data/lib/karafka/admin.rb +201 -100
  16. data/lib/karafka/base_consumer.rb +2 -2
  17. data/lib/karafka/cli/info.rb +9 -7
  18. data/lib/karafka/cli/server.rb +7 -7
  19. data/lib/karafka/cli/topics/align.rb +109 -0
  20. data/lib/karafka/cli/topics/base.rb +66 -0
  21. data/lib/karafka/cli/topics/create.rb +35 -0
  22. data/lib/karafka/cli/topics/delete.rb +30 -0
  23. data/lib/karafka/cli/topics/migrate.rb +31 -0
  24. data/lib/karafka/cli/topics/plan.rb +169 -0
  25. data/lib/karafka/cli/topics/repartition.rb +41 -0
  26. data/lib/karafka/cli/topics/reset.rb +18 -0
  27. data/lib/karafka/cli/topics.rb +13 -123
  28. data/lib/karafka/connection/client.rb +55 -37
  29. data/lib/karafka/connection/listener.rb +22 -17
  30. data/lib/karafka/connection/proxy.rb +93 -4
  31. data/lib/karafka/connection/status.rb +14 -2
  32. data/lib/karafka/contracts/config.rb +14 -1
  33. data/lib/karafka/contracts/topic.rb +1 -1
  34. data/lib/karafka/deserializers/headers.rb +15 -0
  35. data/lib/karafka/deserializers/key.rb +15 -0
  36. data/lib/karafka/deserializers/payload.rb +16 -0
  37. data/lib/karafka/embedded.rb +2 -0
  38. data/lib/karafka/helpers/async.rb +5 -2
  39. data/lib/karafka/helpers/colorize.rb +6 -0
  40. data/lib/karafka/instrumentation/callbacks/oauthbearer_token_refresh.rb +29 -0
  41. data/lib/karafka/instrumentation/logger_listener.rb +23 -3
  42. data/lib/karafka/instrumentation/notifications.rb +10 -0
  43. data/lib/karafka/instrumentation/vendors/appsignal/client.rb +16 -2
  44. data/lib/karafka/instrumentation/vendors/kubernetes/liveness_listener.rb +20 -0
  45. data/lib/karafka/messages/batch_metadata.rb +1 -1
  46. data/lib/karafka/messages/builders/batch_metadata.rb +1 -1
  47. data/lib/karafka/messages/builders/message.rb +10 -6
  48. data/lib/karafka/messages/message.rb +2 -1
  49. data/lib/karafka/messages/metadata.rb +20 -4
  50. data/lib/karafka/messages/parser.rb +1 -1
  51. data/lib/karafka/pro/base_consumer.rb +12 -23
  52. data/lib/karafka/pro/encryption/cipher.rb +7 -3
  53. data/lib/karafka/pro/encryption/contracts/config.rb +1 -0
  54. data/lib/karafka/pro/encryption/errors.rb +4 -1
  55. data/lib/karafka/pro/encryption/messages/middleware.rb +13 -11
  56. data/lib/karafka/pro/encryption/messages/parser.rb +22 -20
  57. data/lib/karafka/pro/encryption/setup/config.rb +5 -0
  58. data/lib/karafka/pro/iterator/expander.rb +2 -1
  59. data/lib/karafka/pro/iterator/tpl_builder.rb +38 -0
  60. data/lib/karafka/pro/iterator.rb +28 -2
  61. data/lib/karafka/pro/loader.rb +3 -0
  62. data/lib/karafka/pro/processing/coordinator.rb +15 -2
  63. data/lib/karafka/pro/processing/expansions_selector.rb +2 -0
  64. data/lib/karafka/pro/processing/jobs_queue.rb +122 -5
  65. data/lib/karafka/pro/processing/periodic_job/consumer.rb +67 -0
  66. data/lib/karafka/pro/processing/piping/consumer.rb +126 -0
  67. data/lib/karafka/pro/processing/strategies/aj/dlq_ftr_lrj_mom.rb +1 -1
  68. data/lib/karafka/pro/processing/strategies/aj/dlq_ftr_lrj_mom_vp.rb +1 -1
  69. data/lib/karafka/pro/processing/strategies/aj/dlq_ftr_mom.rb +1 -1
  70. data/lib/karafka/pro/processing/strategies/aj/dlq_ftr_mom_vp.rb +1 -1
  71. data/lib/karafka/pro/processing/strategies/aj/dlq_lrj_mom.rb +1 -1
  72. data/lib/karafka/pro/processing/strategies/aj/dlq_lrj_mom_vp.rb +1 -1
  73. data/lib/karafka/pro/processing/strategies/aj/dlq_mom.rb +1 -1
  74. data/lib/karafka/pro/processing/strategies/aj/dlq_mom_vp.rb +1 -1
  75. data/lib/karafka/pro/processing/strategies/aj/lrj_mom_vp.rb +2 -0
  76. data/lib/karafka/pro/processing/strategies/default.rb +5 -1
  77. data/lib/karafka/pro/processing/strategies/dlq/default.rb +21 -5
  78. data/lib/karafka/pro/processing/strategies/lrj/default.rb +2 -0
  79. data/lib/karafka/pro/processing/strategies/lrj/mom.rb +2 -0
  80. data/lib/karafka/pro/processing/subscription_groups_coordinator.rb +52 -0
  81. data/lib/karafka/pro/routing/features/direct_assignments/config.rb +27 -0
  82. data/lib/karafka/pro/routing/features/direct_assignments/contracts/consumer_group.rb +53 -0
  83. data/lib/karafka/pro/routing/features/direct_assignments/contracts/topic.rb +108 -0
  84. data/lib/karafka/pro/routing/features/direct_assignments/subscription_group.rb +77 -0
  85. data/lib/karafka/pro/routing/features/direct_assignments/topic.rb +69 -0
  86. data/lib/karafka/pro/routing/features/direct_assignments.rb +25 -0
  87. data/lib/karafka/pro/routing/features/patterns/builder.rb +1 -1
  88. data/lib/karafka/pro/routing/features/swarm/contracts/routing.rb +76 -0
  89. data/lib/karafka/pro/routing/features/swarm/contracts/topic.rb +16 -5
  90. data/lib/karafka/pro/routing/features/swarm/topic.rb +25 -2
  91. data/lib/karafka/pro/routing/features/swarm.rb +11 -0
  92. data/lib/karafka/pro/swarm/liveness_listener.rb +20 -0
  93. data/lib/karafka/processing/coordinator.rb +17 -8
  94. data/lib/karafka/processing/coordinators_buffer.rb +5 -2
  95. data/lib/karafka/processing/executor.rb +6 -2
  96. data/lib/karafka/processing/executors_buffer.rb +5 -2
  97. data/lib/karafka/processing/jobs_queue.rb +9 -4
  98. data/lib/karafka/processing/strategies/aj_dlq_mom.rb +1 -1
  99. data/lib/karafka/processing/strategies/default.rb +7 -1
  100. data/lib/karafka/processing/strategies/dlq.rb +17 -2
  101. data/lib/karafka/processing/workers_batch.rb +4 -1
  102. data/lib/karafka/routing/builder.rb +6 -2
  103. data/lib/karafka/routing/consumer_group.rb +2 -1
  104. data/lib/karafka/routing/features/dead_letter_queue/config.rb +5 -0
  105. data/lib/karafka/routing/features/dead_letter_queue/contracts/topic.rb +8 -0
  106. data/lib/karafka/routing/features/dead_letter_queue/topic.rb +10 -2
  107. data/lib/karafka/routing/features/deserializers/config.rb +18 -0
  108. data/lib/karafka/routing/features/deserializers/contracts/topic.rb +31 -0
  109. data/lib/karafka/routing/features/deserializers/topic.rb +51 -0
  110. data/lib/karafka/routing/features/deserializers.rb +11 -0
  111. data/lib/karafka/routing/proxy.rb +9 -14
  112. data/lib/karafka/routing/router.rb +11 -2
  113. data/lib/karafka/routing/subscription_group.rb +9 -1
  114. data/lib/karafka/routing/topic.rb +0 -1
  115. data/lib/karafka/runner.rb +1 -1
  116. data/lib/karafka/setup/config.rb +50 -9
  117. data/lib/karafka/status.rb +7 -8
  118. data/lib/karafka/swarm/supervisor.rb +16 -2
  119. data/lib/karafka/templates/karafka.rb.erb +28 -1
  120. data/lib/karafka/version.rb +1 -1
  121. data.tar.gz.sig +0 -0
  122. metadata +38 -12
  123. metadata.gz.sig +0 -0
  124. data/lib/karafka/routing/consumer_mapper.rb +0 -23
  125. data/lib/karafka/serialization/json/deserializer.rb +0 -19
  126. data/lib/karafka/time_trackers/partition_usage.rb +0 -56
@@ -30,8 +30,11 @@ module Karafka
30
30
  #
31
31
  # @param subscription_group [Karafka::Routing::SubscriptionGroup] subscription group
32
32
  # with all the configuration details needed for us to create a client
33
+ # @param batch_poll_breaker [Proc] proc that when evaluated to false will cause the batch
34
+ # poll loop to finish early. This improves the shutdown and dynamic multiplication as it
35
+ # allows us to early break on long polls.
33
36
  # @return [Karafka::Connection::Client]
34
- def initialize(subscription_group)
37
+ def initialize(subscription_group, batch_poll_breaker)
35
38
  @id = SecureRandom.hex(6)
36
39
  # Name is set when we build consumer
37
40
  @name = ''
@@ -41,7 +44,14 @@ module Karafka
41
44
  @tick_interval = ::Karafka::App.config.internal.tick_interval
42
45
  @rebalance_manager = RebalanceManager.new(@subscription_group.id)
43
46
  @rebalance_callback = Instrumentation::Callbacks::Rebalance.new(@subscription_group)
44
- @events_poller = Helpers::IntervalRunner.new { events_poll }
47
+
48
+ @interval_runner = Helpers::IntervalRunner.new do
49
+ events_poll
50
+ # events poller returns nil when not running often enough, hence we don't use the
51
+ # boolean to be explicit
52
+ batch_poll_breaker.call ? :run : :stop
53
+ end
54
+
45
55
  # There are few operations that can happen in parallel from the listener threads as well
46
56
  # as from the workers. They are not fully thread-safe because they may be composed out of
47
57
  # few calls to Kafka or out of few internal state changes. That is why we mutex them.
@@ -98,7 +108,8 @@ module Karafka
98
108
  break
99
109
  end
100
110
 
101
- @events_poller.call
111
+ # If we were signaled from the outside to break the loop, we should
112
+ break if @interval_runner.call == :stop
102
113
 
103
114
  # Track time spent on all of the processing and polling
104
115
  time_poll.checkpoint
@@ -295,7 +306,7 @@ module Karafka
295
306
  ) do
296
307
  close
297
308
 
298
- @events_poller.reset
309
+ @interval_runner.reset
299
310
  @closed = false
300
311
  @paused_tpls.clear
301
312
  end
@@ -344,7 +355,7 @@ module Karafka
344
355
  # @note It is recommended to use this only on rebalances to get positions with metadata
345
356
  # when working with metadata as this is synchronous
346
357
  def committed(tpl = nil)
347
- Proxy.new(kafka).committed(tpl)
358
+ @wrapped_kafka.committed(tpl)
348
359
  end
349
360
 
350
361
  private
@@ -356,13 +367,7 @@ module Karafka
356
367
  # @param metadata [String, nil] offset storage metadata or nil if none
357
368
  # @return [Boolean] true if we could store the offset (if we still own the partition)
358
369
  def internal_store_offset(message, metadata)
359
- kafka.store_offset(message, metadata)
360
- true
361
- rescue Rdkafka::RdkafkaError => e
362
- return false if e.code == :assignment_lost
363
- return false if e.code == :state
364
-
365
- raise e
370
+ @wrapped_kafka.store_offset(message, metadata)
366
371
  end
367
372
 
368
373
  # Non thread-safe message committing method
@@ -372,23 +377,7 @@ module Karafka
372
377
  # even when no stored, because with sync commit, it refreshes the ownership state of the
373
378
  # consumer in a sync way.
374
379
  def internal_commit_offsets(async: true)
375
- kafka.commit(nil, async)
376
-
377
- true
378
- rescue Rdkafka::RdkafkaError => e
379
- case e.code
380
- when :assignment_lost
381
- return false
382
- when :unknown_member_id
383
- return false
384
- when :no_offset
385
- return true
386
- when :coordinator_load_in_progress
387
- sleep(1)
388
- retry
389
- end
390
-
391
- raise e
380
+ @wrapped_kafka.commit_offsets(async: async)
392
381
  end
393
382
 
394
383
  # Non-mutexed seek that should be used only internally. Outside we expose `#seek` that is
@@ -409,12 +398,10 @@ module Karafka
409
398
  message.partition => message.offset
410
399
  )
411
400
 
412
- proxy = Proxy.new(kafka)
413
-
414
401
  # Now we can overwrite the seek message offset with our resolved offset and we can
415
402
  # then seek to the appropriate message
416
403
  # We set the timeout to 2_000 to make sure that remote clusters handle this well
417
- real_offsets = proxy.offsets_for_times(tpl)
404
+ real_offsets = @wrapped_kafka.offsets_for_times(tpl)
418
405
  detected_partition = real_offsets.to_h.dig(message.topic, message.partition)
419
406
 
420
407
  # There always needs to be an offset. In case we seek into the future, where there
@@ -445,12 +432,16 @@ module Karafka
445
432
 
446
433
  return unless @kafka
447
434
 
435
+ sg_id = @subscription_group.id
436
+
448
437
  # Remove callbacks runners that were registered
449
- ::Karafka::Core::Instrumentation.statistics_callbacks.delete(@subscription_group.id)
450
- ::Karafka::Core::Instrumentation.error_callbacks.delete(@subscription_group.id)
438
+ ::Karafka::Core::Instrumentation.statistics_callbacks.delete(sg_id)
439
+ ::Karafka::Core::Instrumentation.error_callbacks.delete(sg_id)
440
+ ::Karafka::Core::Instrumentation.oauthbearer_token_refresh_callbacks.delete(sg_id)
451
441
 
452
442
  kafka.close
453
443
  @kafka = nil
444
+ @wrapped_kafka = nil
454
445
  @buffer.clear
455
446
  # @note We do not clear rebalance manager here as we may still have revocation info
456
447
  # here that we want to consider valid prior to running another reconnection
@@ -571,6 +562,12 @@ module Karafka
571
562
 
572
563
  # No sense in retrying when no topic/partition and we're no longer running
573
564
  retryable = false unless Karafka::App.running?
565
+ # If we detect the end of partition which can happen if `enable.partition.eof` is set to
566
+ # true, we can just return nil fast. This will fast yield whatever set of messages we
567
+ # already have instead of waiting. This can be used for better latency control when we do
568
+ # not expect a lof of lag and want to quickly move to processing.
569
+ when :partition_eof
570
+ return nil
574
571
  end
575
572
 
576
573
  if early_report || !retryable
@@ -614,7 +611,8 @@ module Karafka
614
611
  # new messages. This allows us to report statistics while data is still being processed
615
612
  config.consumer_poll_set = false
616
613
 
617
- consumer = config.consumer
614
+ # Do not start native kafka so we can inject the oauth bearer callbacks if needed
615
+ consumer = config.consumer(native_kafka_auto_start: false)
618
616
  @name = consumer.name
619
617
 
620
618
  # Register statistics runner for this particular type of callbacks
@@ -637,9 +635,21 @@ module Karafka
637
635
  )
638
636
  )
639
637
 
638
+ ::Karafka::Core::Instrumentation.oauthbearer_token_refresh_callbacks.add(
639
+ @subscription_group.id,
640
+ Instrumentation::Callbacks::OauthbearerTokenRefresh.new(
641
+ consumer
642
+ )
643
+ )
644
+
640
645
  # Subscription needs to happen after we assigned the rebalance callbacks just in case of
641
646
  # a race condition
642
- consumer.subscribe(*@subscription_group.subscriptions)
647
+ subscriptions = @subscription_group.subscriptions
648
+ assignments = @subscription_group.assignments(consumer)
649
+
650
+ consumer.subscribe(*subscriptions) if subscriptions
651
+ consumer.assign(assignments) if assignments
652
+
643
653
  consumer
644
654
  end
645
655
 
@@ -659,7 +669,15 @@ module Karafka
659
669
 
660
670
  # @return [Rdkafka::Consumer] librdkafka consumer instance
661
671
  def kafka
662
- @kafka ||= build_consumer
672
+ return @kafka if @kafka
673
+
674
+ @kafka = build_consumer
675
+ @wrapped_kafka = Proxy.new(@kafka)
676
+ # We start it only after everything is configured so oauth or any other early-run client
677
+ # related operations can occur. Otherwise, if all kafka referencing setup would not be
678
+ # done, we could not intercept the invocations to kafka via client methods.
679
+ @kafka.start
680
+ @kafka
663
681
  end
664
682
  end
665
683
  end
@@ -36,7 +36,7 @@ module Karafka
36
36
  @subscription_group = subscription_group
37
37
  @jobs_queue = jobs_queue
38
38
  @coordinators = Processing::CoordinatorsBuffer.new(subscription_group.topics)
39
- @client = Client.new(@subscription_group)
39
+ @client = Client.new(@subscription_group, -> { running? })
40
40
  @executors = Processing::ExecutorsBuffer.new(@client, subscription_group)
41
41
  @jobs_builder = proc_config.jobs_builder
42
42
  @partitioner = proc_config.partitioner_class.new(subscription_group)
@@ -46,11 +46,18 @@ module Karafka
46
46
  # We can do this that way because we always first schedule jobs using messages before we
47
47
  # fetch another batch.
48
48
  @messages_buffer = MessagesBuffer.new(subscription_group)
49
- @usage_tracker = TimeTrackers::PartitionUsage.new
50
49
  @mutex = Mutex.new
51
50
  @status = Status.new
52
51
 
53
52
  @jobs_queue.register(@subscription_group.id)
53
+
54
+ # This makes sure that even if we tick more often than the interval time due to frequent
55
+ # unlocks from short-lived jobs or async queues synchronization, events handling and jobs
56
+ # scheduling still happens with the expected frequency
57
+ @interval_runner = Helpers::IntervalRunner.new do
58
+ @events_poller.call
59
+ @scheduler.on_manage
60
+ end
54
61
  end
55
62
 
56
63
  # Runs the main listener fetch loop.
@@ -103,7 +110,8 @@ module Karafka
103
110
  end
104
111
 
105
112
  @status.start!
106
- async_call
113
+
114
+ async_call("karafka.listener##{@subscription_group.id}")
107
115
  end
108
116
 
109
117
  # Stops the jobs queue, triggers shutdown on all the executors (sync), commits offsets and
@@ -277,15 +285,14 @@ module Karafka
277
285
 
278
286
  revoked_partitions.each do |topic, partitions|
279
287
  partitions.each do |partition|
280
- @usage_tracker.revoke(topic, partition)
281
288
  @coordinators.revoke(topic, partition)
282
289
 
283
290
  # There may be a case where we have lost partition of which data we have never
284
291
  # processed (if it was assigned and revoked really fast), thus we may not have it
285
292
  # here. In cases like this, we do not run a revocation job
286
293
  @executors.find_all(topic, partition).each do |executor|
287
- job = @jobs_builder.revoked(executor)
288
- jobs << job
294
+ executor.coordinator.increment(:revoked)
295
+ jobs << @jobs_builder.revoked(executor)
289
296
  end
290
297
 
291
298
  # We need to remove all the executors of a given topic partition that we have lost, so
@@ -308,6 +315,7 @@ module Karafka
308
315
  jobs = []
309
316
 
310
317
  @executors.each do |executor|
318
+ executor.coordinator.increment(:shutdown)
311
319
  job = @jobs_builder.shutdown(executor)
312
320
  jobs << job
313
321
  end
@@ -328,8 +336,6 @@ module Karafka
328
336
  idle_jobs = []
329
337
 
330
338
  @messages_buffer.each do |topic, partition, messages|
331
- @usage_tracker.track(topic, partition)
332
-
333
339
  coordinator = @coordinators.find_or_create(topic, partition)
334
340
  # Start work coordination for this topic partition
335
341
  coordinator.start(messages)
@@ -337,12 +343,13 @@ module Karafka
337
343
  # We do not increment coordinator for idle job because it's not a user related one
338
344
  # and it will not go through a standard lifecycle. Same applies to revoked and shutdown
339
345
  if messages.empty?
346
+ coordinator.increment(:idle)
340
347
  executor = @executors.find_or_create(topic, partition, 0, coordinator)
341
348
  idle_jobs << @jobs_builder.idle(executor)
342
349
  else
343
350
  @partitioner.call(topic, messages, coordinator) do |group_id, partition_messages|
351
+ coordinator.increment(:consume)
344
352
  executor = @executors.find_or_create(topic, partition, group_id, coordinator)
345
- coordinator.increment
346
353
  consume_jobs << @jobs_builder.consume(executor, partition_messages)
347
354
  end
348
355
  end
@@ -391,11 +398,11 @@ module Karafka
391
398
  interval = topic.periodic_job.interval
392
399
 
393
400
  partitions.each do |partition|
394
- # Skip if we were operating on a given topic partition recently
395
- next if @usage_tracker.active?(topic_name, partition, interval)
396
-
397
401
  coordinator = @coordinators.find_or_create(topic_name, partition)
398
402
 
403
+ # Skip if we were operating on a given topic partition recently
404
+ next if coordinator.active_within?(interval)
405
+
399
406
  # Do not tick if we do not want to tick during pauses
400
407
  next if coordinator.paused? && !topic.periodic_job.during_pause?
401
408
 
@@ -405,10 +412,8 @@ module Karafka
405
412
  # run (ok) but attempt 1 means, there was an error and we will retry
406
413
  next if coordinator.attempt.positive? && !topic.periodic_job.during_retry?
407
414
 
408
- # Track so we do not run periodic job again too soon
409
- @usage_tracker.track(topic_name, partition)
410
-
411
415
  @executors.find_all_or_create(topic_name, partition, coordinator).each do |executor|
416
+ coordinator.increment(:periodic)
412
417
  jobs << @jobs_builder.periodic(executor)
413
418
  end
414
419
  end
@@ -423,8 +428,7 @@ module Karafka
423
428
  # Waits for all the jobs from a given subscription group to finish before moving forward
424
429
  def wait
425
430
  @jobs_queue.wait(@subscription_group.id) do
426
- @events_poller.call
427
- @scheduler.on_manage
431
+ @interval_runner.call
428
432
  end
429
433
  end
430
434
 
@@ -462,6 +466,7 @@ module Karafka
462
466
  @events_poller.reset
463
467
  @client.reset
464
468
  @coordinators.reset
469
+ @interval_runner.reset
465
470
  @executors = Processing::ExecutorsBuffer.new(@client, @subscription_group)
466
471
  end
467
472
  end
@@ -11,12 +11,16 @@ module Karafka
11
11
  # it would be needed
12
12
  class Proxy < SimpleDelegator
13
13
  # Errors on which we want to retry
14
- RETRYABLE_ERRORS = %i[
14
+ # Includes temporary errors related to node not being (or not yet being) coordinator or a
15
+ # leader to a given set of partitions. Usually goes away after a retry
16
+ RETRYABLE_DEFAULT_ERRORS = %i[
15
17
  all_brokers_down
16
18
  timed_out
19
+ not_coordinator
20
+ not_leader_for_partition
17
21
  ].freeze
18
22
 
19
- private_constant :RETRYABLE_ERRORS
23
+ private_constant :RETRYABLE_DEFAULT_ERRORS
20
24
 
21
25
  attr_accessor :wrapped
22
26
 
@@ -42,6 +46,9 @@ module Karafka
42
46
  def query_watermark_offsets(topic, partition)
43
47
  l_config = @config.query_watermark_offsets
44
48
 
49
+ # For newly created topics or in cases where we're trying to get them but there is no
50
+ # leader, this can fail. It happens more often for new topics under KRaft, however we
51
+ # still want to make sure things operate as expected even then
45
52
  with_broker_errors_retry(
46
53
  # required to be in seconds, not ms
47
54
  wait_time: l_config.wait_time / 1_000.to_f,
@@ -85,6 +92,87 @@ module Karafka
85
92
  end
86
93
  end
87
94
 
95
+ # When we cannot store an offset, it means we no longer own the partition
96
+ #
97
+ # Non thread-safe offset storing method
98
+ # @param message [Karafka::Messages::Message]
99
+ # @param metadata [String, nil] offset storage metadata or nil if none
100
+ # @return [Boolean] true if we could store the offset (if we still own the partition)
101
+ def store_offset(message, metadata = nil)
102
+ @wrapped.store_offset(message, metadata)
103
+
104
+ true
105
+ rescue Rdkafka::RdkafkaError => e
106
+ return false if e.code == :assignment_lost
107
+ return false if e.code == :state
108
+
109
+ raise e
110
+ end
111
+
112
+ # Non thread-safe message committing method
113
+ # @param tpl [Rdkafka::Consumer::TopicPartitionList, nil] tpl or nil
114
+ # @param async [Boolean] should the commit happen async or sync (async by default)
115
+ # @return [Boolean] true if offset commit worked, false if we've lost the assignment
116
+ # @note We do **not** consider `no_offset` as any problem and we allow to commit offsets
117
+ # even when no stored, because with sync commit, it refreshes the ownership state of the
118
+ # consumer in a sync way.
119
+ def commit_offsets(tpl = nil, async: true)
120
+ c_config = @config.commit
121
+
122
+ with_broker_errors_retry(
123
+ wait_time: c_config.wait_time / 1_000.to_f,
124
+ max_attempts: c_config.max_attempts
125
+ ) do
126
+ @wrapped.commit(tpl, async)
127
+ end
128
+
129
+ true
130
+ rescue Rdkafka::RdkafkaError => e
131
+ case e.code
132
+ when :assignment_lost
133
+ return false
134
+ when :unknown_member_id
135
+ return false
136
+ when :no_offset
137
+ return true
138
+ when :coordinator_load_in_progress
139
+ sleep(1)
140
+ retry
141
+ end
142
+
143
+ raise e
144
+ end
145
+
146
+ # @param tpl [Rdkafka::Consumer::TopicPartitionList] list of topics and partitions for which
147
+ # we want to get the lag on the defined CG
148
+ # @return [Hash<String, Hash>] hash with topics and their partitions lags
149
+ def lag(tpl)
150
+ l_config = @config.committed
151
+
152
+ with_broker_errors_retry(
153
+ # required to be in seconds, not ms
154
+ wait_time: l_config.wait_time / 1_000.to_f,
155
+ max_attempts: l_config.max_attempts
156
+ ) do
157
+ @wrapped.lag(tpl, l_config.timeout)
158
+ end
159
+ end
160
+
161
+ # @param topic_name [String, nil] Name of the topic we're interested in or nil if we want to
162
+ # get info on all topics
163
+ # @return [Rdkafka::Metadata] rdkafka metadata object with the requested details
164
+ def metadata(topic_name = nil)
165
+ m_config = @config.metadata
166
+
167
+ with_broker_errors_retry(
168
+ # required to be in seconds, not ms
169
+ wait_time: m_config.wait_time / 1_000.to_f,
170
+ max_attempts: m_config.max_attempts
171
+ ) do
172
+ @wrapped.metadata(topic_name, m_config.timeout)
173
+ end
174
+ end
175
+
88
176
  private
89
177
 
90
178
  # Runs expected block of code with few retries on all_brokers_down
@@ -94,13 +182,14 @@ module Karafka
94
182
  # completely.
95
183
  # @param wait_time [Integer, Float] how many seconds should we wait. It uses `#sleep` of Ruby
96
184
  # so it needs time in seconds.
97
- def with_broker_errors_retry(max_attempts:, wait_time: 1)
185
+ # @param errors [Array<Symbol>] rdkafka errors we want to retry on
186
+ def with_broker_errors_retry(max_attempts:, wait_time: 1, errors: RETRYABLE_DEFAULT_ERRORS)
98
187
  attempt ||= 0
99
188
  attempt += 1
100
189
 
101
190
  yield
102
191
  rescue Rdkafka::RdkafkaError => e
103
- raise unless RETRYABLE_ERRORS.include?(e.code)
192
+ raise unless errors.include?(e.code)
104
193
 
105
194
  if attempt <= max_attempts
106
195
  sleep(wait_time)
@@ -5,6 +5,11 @@ module Karafka
5
5
  module Connection
6
6
  # Listener connection status representation
7
7
  class Status
8
+ include Helpers::ConfigImporter.new(
9
+ monitor: %i[monitor],
10
+ conductor: %i[internal connection conductor]
11
+ )
12
+
8
13
  # Available states and their transitions.
9
14
  STATES = {
10
15
  pending: :pending!,
@@ -26,7 +31,8 @@ module Karafka
26
31
  return if @status && STATES.keys.index(:#{state}) <= STATES.keys.index(@status)
27
32
 
28
33
  @status = :#{state}
29
- @conductor.signal
34
+ conductor.signal
35
+ monitor.instrument("connection.listener.#{state}", caller: self)
30
36
  end
31
37
  end
32
38
 
@@ -39,7 +45,6 @@ module Karafka
39
45
 
40
46
  def initialize
41
47
  @mutex = Mutex.new
42
- @conductor = Karafka::App.config.internal.connection.conductor
43
48
  pending!
44
49
  end
45
50
 
@@ -49,11 +54,18 @@ module Karafka
49
54
  def stop!
50
55
  if pending?
51
56
  @status = :stopping
57
+ conductor.signal
58
+ monitor.instrument('connection.listener.stopping', caller: self)
59
+
52
60
  stopped!
53
61
  elsif stopped?
54
62
  nil
63
+ elsif stopping?
64
+ nil
55
65
  else
56
66
  @status = :stopping
67
+ conductor.signal
68
+ monitor.instrument('connection.listener.stopping', caller: self)
57
69
  end
58
70
  end
59
71
 
@@ -25,13 +25,14 @@ module Karafka
25
25
 
26
26
  required(:client_id) { |val| val.is_a?(String) && Contracts::TOPIC_REGEXP.match?(val) }
27
27
  required(:concurrency) { |val| val.is_a?(Integer) && val.positive? }
28
- required(:consumer_mapper) { |val| !val.nil? }
29
28
  required(:consumer_persistence) { |val| [true, false].include?(val) }
30
29
  required(:pause_timeout) { |val| val.is_a?(Integer) && val.positive? }
31
30
  required(:pause_max_timeout) { |val| val.is_a?(Integer) && val.positive? }
32
31
  required(:pause_with_exponential_backoff) { |val| [true, false].include?(val) }
32
+ required(:strict_topics_namespacing) { |val| [true, false].include?(val) }
33
33
  required(:shutdown_timeout) { |val| val.is_a?(Integer) && val.positive? }
34
34
  required(:max_wait_time) { |val| val.is_a?(Integer) && val.positive? }
35
+ required(:group_id) { |val| val.is_a?(String) && Contracts::TOPIC_REGEXP.match?(val) }
35
36
  required(:kafka) { |val| val.is_a?(Hash) && !val.empty? }
36
37
 
37
38
  nested(:swarm) do
@@ -39,6 +40,12 @@ module Karafka
39
40
  required(:node) { |val| val == false || val.is_a?(Karafka::Swarm::Node) }
40
41
  end
41
42
 
43
+ nested(:oauth) do
44
+ required(:token_provider_listener) do |val|
45
+ val == false || val.respond_to?(:on_oauthbearer_token_refresh)
46
+ end
47
+ end
48
+
42
49
  nested(:admin) do
43
50
  # Can be empty because inherits values from the root kafka
44
51
  required(:kafka) { |val| val.is_a?(Hash) }
@@ -74,11 +81,17 @@ module Karafka
74
81
  required(:conductor) { |val| !val.nil? }
75
82
 
76
83
  nested(:proxy) do
84
+ nested(:commit) do
85
+ required(:max_attempts) { |val| val.is_a?(Integer) && val.positive? }
86
+ required(:wait_time) { |val| val.is_a?(Integer) && val.positive? }
87
+ end
88
+
77
89
  # All of them have the same requirements
78
90
  %i[
79
91
  query_watermark_offsets
80
92
  offsets_for_times
81
93
  committed
94
+ metadata
82
95
  ].each do |scope|
83
96
  nested(scope) do
84
97
  required(:timeout) { |val| val.is_a?(Integer) && val.positive? }
@@ -12,7 +12,7 @@ module Karafka
12
12
  ).fetch('en').fetch('validations').fetch('topic')
13
13
  end
14
14
 
15
- required(:deserializer) { |val| !val.nil? }
15
+ required(:deserializers) { |val| !val.nil? }
16
16
  required(:id) { |val| val.is_a?(String) && Contracts::TOPIC_REGEXP.match?(val) }
17
17
  required(:kafka) { |val| val.is_a?(Hash) && !val.empty? }
18
18
  required(:max_messages) { |val| val.is_a?(Integer) && val >= 1 }
@@ -0,0 +1,15 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Deserializers
5
+ # Default message headers deserializer
6
+ class Headers
7
+ # @param metadata [Karafka::Messages::Metadata] metadata object from which we obtain the
8
+ # `#raw_headers`
9
+ # @return [Hash] expected message headers hash
10
+ def call(metadata)
11
+ metadata.raw_headers
12
+ end
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,15 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Deserializers
5
+ # Default message key deserializer
6
+ class Key
7
+ # @param metadata [Karafka::Messages::Metadata] metadata object from which we obtain the
8
+ # `#raw_key`
9
+ # @return [String, nil] expected message key in a string format or nil if no key
10
+ def call(metadata)
11
+ metadata.raw_key
12
+ end
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,16 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ # Module for all supported by default deserializers.
5
+ module Deserializers
6
+ # Default Karafka Json deserializer for loading JSON data in payload.
7
+ class Payload
8
+ # @param message [Karafka::Messages::Message] Message object that we want to deserialize
9
+ # @return [Hash] hash with deserialized JSON data
10
+ def call(message)
11
+ # nil payload can be present for example for tombstone messages
12
+ message.raw_payload.nil? ? nil : ::JSON.parse(message.raw_payload)
13
+ end
14
+ end
15
+ end
16
+ end
@@ -8,6 +8,8 @@ module Karafka
8
8
  # so it won't interrupt other things running
9
9
  def start
10
10
  Thread.new do
11
+ Thread.current.name = 'karafka.embedded'
12
+
11
13
  Karafka::Process.tags.add(:execution_mode, 'embedded')
12
14
  Karafka::Server.start
13
15
  end
@@ -22,16 +22,19 @@ module Karafka
22
22
  def included(base)
23
23
  base.extend ::Forwardable
24
24
 
25
- base.def_delegators :@thread, :join, :terminate, :alive?
25
+ base.def_delegators :@thread, :join, :terminate, :alive?, :name
26
26
  end
27
27
  end
28
28
 
29
29
  # Runs the `#call` method in a new thread
30
- def async_call
30
+ # @param thread_name [String] name that we want to assign to the thread when we start it
31
+ def async_call(thread_name = '')
31
32
  MUTEX.synchronize do
32
33
  return if @thread&.alive?
33
34
 
34
35
  @thread = Thread.new do
36
+ Thread.current.name = thread_name
37
+
35
38
  Thread.current.abort_on_exception = true
36
39
 
37
40
  call
@@ -21,6 +21,12 @@ module Karafka
21
21
  def yellow(string)
22
22
  "\033[1;33m#{string}\033[0m"
23
23
  end
24
+
25
+ # @param string [String] string we want to have in grey
26
+ # @return [String] grey string
27
+ def grey(string)
28
+ "\e[38;5;244m#{string}\e[0m"
29
+ end
24
30
  end
25
31
  end
26
32
  end
@@ -0,0 +1,29 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Instrumentation
5
+ module Callbacks
6
+ # Callback that is triggered when oauth token needs to be refreshed.
7
+ class OauthbearerTokenRefresh
8
+ # @param bearer [Rdkafka::Consumer, Rdkafka::Admin] given rdkafka instance. It is needed as
9
+ # we need to have a reference to call `#oauthbearer_set_token` or
10
+ # `#oauthbearer_set_token_failure` upon the event.
11
+ def initialize(bearer)
12
+ @bearer = bearer
13
+ end
14
+
15
+ # @param _rd_config [Rdkafka::Config]
16
+ # @param bearer_name [String] name of the bearer for which we refresh
17
+ def call(_rd_config, bearer_name)
18
+ return unless @bearer.name == bearer_name
19
+
20
+ ::Karafka.monitor.instrument(
21
+ 'oauthbearer.token_refresh',
22
+ bearer: @bearer,
23
+ caller: self
24
+ )
25
+ end
26
+ end
27
+ end
28
+ end
29
+ end