karafka 2.3.4 → 2.4.0.beta1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data/.github/workflows/ci.yml +12 -38
- data/CHANGELOG.md +56 -2
- data/Gemfile +6 -3
- data/Gemfile.lock +25 -23
- data/bin/integrations +1 -1
- data/config/locales/errors.yml +21 -2
- data/config/locales/pro_errors.yml +16 -1
- data/karafka.gemspec +4 -2
- data/lib/active_job/queue_adapters/karafka_adapter.rb +2 -0
- data/lib/karafka/admin/configs/config.rb +81 -0
- data/lib/karafka/admin/configs/resource.rb +88 -0
- data/lib/karafka/admin/configs.rb +103 -0
- data/lib/karafka/admin.rb +201 -100
- data/lib/karafka/base_consumer.rb +2 -2
- data/lib/karafka/cli/info.rb +9 -7
- data/lib/karafka/cli/server.rb +7 -7
- data/lib/karafka/cli/topics/align.rb +109 -0
- data/lib/karafka/cli/topics/base.rb +66 -0
- data/lib/karafka/cli/topics/create.rb +35 -0
- data/lib/karafka/cli/topics/delete.rb +30 -0
- data/lib/karafka/cli/topics/migrate.rb +31 -0
- data/lib/karafka/cli/topics/plan.rb +169 -0
- data/lib/karafka/cli/topics/repartition.rb +41 -0
- data/lib/karafka/cli/topics/reset.rb +18 -0
- data/lib/karafka/cli/topics.rb +13 -123
- data/lib/karafka/connection/client.rb +55 -37
- data/lib/karafka/connection/listener.rb +22 -17
- data/lib/karafka/connection/proxy.rb +93 -4
- data/lib/karafka/connection/status.rb +14 -2
- data/lib/karafka/contracts/config.rb +14 -1
- data/lib/karafka/contracts/topic.rb +1 -1
- data/lib/karafka/deserializers/headers.rb +15 -0
- data/lib/karafka/deserializers/key.rb +15 -0
- data/lib/karafka/deserializers/payload.rb +16 -0
- data/lib/karafka/embedded.rb +2 -0
- data/lib/karafka/helpers/async.rb +5 -2
- data/lib/karafka/helpers/colorize.rb +6 -0
- data/lib/karafka/instrumentation/callbacks/oauthbearer_token_refresh.rb +29 -0
- data/lib/karafka/instrumentation/logger_listener.rb +23 -3
- data/lib/karafka/instrumentation/notifications.rb +10 -0
- data/lib/karafka/instrumentation/vendors/appsignal/client.rb +16 -2
- data/lib/karafka/instrumentation/vendors/kubernetes/liveness_listener.rb +20 -0
- data/lib/karafka/messages/batch_metadata.rb +1 -1
- data/lib/karafka/messages/builders/batch_metadata.rb +1 -1
- data/lib/karafka/messages/builders/message.rb +10 -6
- data/lib/karafka/messages/message.rb +2 -1
- data/lib/karafka/messages/metadata.rb +20 -4
- data/lib/karafka/messages/parser.rb +1 -1
- data/lib/karafka/pro/base_consumer.rb +12 -23
- data/lib/karafka/pro/encryption/cipher.rb +7 -3
- data/lib/karafka/pro/encryption/contracts/config.rb +1 -0
- data/lib/karafka/pro/encryption/errors.rb +4 -1
- data/lib/karafka/pro/encryption/messages/middleware.rb +13 -11
- data/lib/karafka/pro/encryption/messages/parser.rb +22 -20
- data/lib/karafka/pro/encryption/setup/config.rb +5 -0
- data/lib/karafka/pro/iterator/expander.rb +2 -1
- data/lib/karafka/pro/iterator/tpl_builder.rb +38 -0
- data/lib/karafka/pro/iterator.rb +28 -2
- data/lib/karafka/pro/loader.rb +3 -0
- data/lib/karafka/pro/processing/coordinator.rb +15 -2
- data/lib/karafka/pro/processing/expansions_selector.rb +2 -0
- data/lib/karafka/pro/processing/jobs_queue.rb +122 -5
- data/lib/karafka/pro/processing/periodic_job/consumer.rb +67 -0
- data/lib/karafka/pro/processing/piping/consumer.rb +126 -0
- data/lib/karafka/pro/processing/strategies/aj/dlq_ftr_lrj_mom.rb +1 -1
- data/lib/karafka/pro/processing/strategies/aj/dlq_ftr_lrj_mom_vp.rb +1 -1
- data/lib/karafka/pro/processing/strategies/aj/dlq_ftr_mom.rb +1 -1
- data/lib/karafka/pro/processing/strategies/aj/dlq_ftr_mom_vp.rb +1 -1
- data/lib/karafka/pro/processing/strategies/aj/dlq_lrj_mom.rb +1 -1
- data/lib/karafka/pro/processing/strategies/aj/dlq_lrj_mom_vp.rb +1 -1
- data/lib/karafka/pro/processing/strategies/aj/dlq_mom.rb +1 -1
- data/lib/karafka/pro/processing/strategies/aj/dlq_mom_vp.rb +1 -1
- data/lib/karafka/pro/processing/strategies/aj/lrj_mom_vp.rb +2 -0
- data/lib/karafka/pro/processing/strategies/default.rb +5 -1
- data/lib/karafka/pro/processing/strategies/dlq/default.rb +21 -5
- data/lib/karafka/pro/processing/strategies/lrj/default.rb +2 -0
- data/lib/karafka/pro/processing/strategies/lrj/mom.rb +2 -0
- data/lib/karafka/pro/processing/subscription_groups_coordinator.rb +52 -0
- data/lib/karafka/pro/routing/features/direct_assignments/config.rb +27 -0
- data/lib/karafka/pro/routing/features/direct_assignments/contracts/consumer_group.rb +53 -0
- data/lib/karafka/pro/routing/features/direct_assignments/contracts/topic.rb +108 -0
- data/lib/karafka/pro/routing/features/direct_assignments/subscription_group.rb +77 -0
- data/lib/karafka/pro/routing/features/direct_assignments/topic.rb +69 -0
- data/lib/karafka/pro/routing/features/direct_assignments.rb +25 -0
- data/lib/karafka/pro/routing/features/patterns/builder.rb +1 -1
- data/lib/karafka/pro/routing/features/swarm/contracts/routing.rb +76 -0
- data/lib/karafka/pro/routing/features/swarm/contracts/topic.rb +16 -5
- data/lib/karafka/pro/routing/features/swarm/topic.rb +25 -2
- data/lib/karafka/pro/routing/features/swarm.rb +11 -0
- data/lib/karafka/pro/swarm/liveness_listener.rb +20 -0
- data/lib/karafka/processing/coordinator.rb +17 -8
- data/lib/karafka/processing/coordinators_buffer.rb +5 -2
- data/lib/karafka/processing/executor.rb +6 -2
- data/lib/karafka/processing/executors_buffer.rb +5 -2
- data/lib/karafka/processing/jobs_queue.rb +9 -4
- data/lib/karafka/processing/strategies/aj_dlq_mom.rb +1 -1
- data/lib/karafka/processing/strategies/default.rb +7 -1
- data/lib/karafka/processing/strategies/dlq.rb +17 -2
- data/lib/karafka/processing/workers_batch.rb +4 -1
- data/lib/karafka/routing/builder.rb +6 -2
- data/lib/karafka/routing/consumer_group.rb +2 -1
- data/lib/karafka/routing/features/dead_letter_queue/config.rb +5 -0
- data/lib/karafka/routing/features/dead_letter_queue/contracts/topic.rb +8 -0
- data/lib/karafka/routing/features/dead_letter_queue/topic.rb +10 -2
- data/lib/karafka/routing/features/deserializers/config.rb +18 -0
- data/lib/karafka/routing/features/deserializers/contracts/topic.rb +31 -0
- data/lib/karafka/routing/features/deserializers/topic.rb +51 -0
- data/lib/karafka/routing/features/deserializers.rb +11 -0
- data/lib/karafka/routing/proxy.rb +9 -14
- data/lib/karafka/routing/router.rb +11 -2
- data/lib/karafka/routing/subscription_group.rb +9 -1
- data/lib/karafka/routing/topic.rb +0 -1
- data/lib/karafka/runner.rb +1 -1
- data/lib/karafka/setup/config.rb +50 -9
- data/lib/karafka/status.rb +7 -8
- data/lib/karafka/swarm/supervisor.rb +16 -2
- data/lib/karafka/templates/karafka.rb.erb +28 -1
- data/lib/karafka/version.rb +1 -1
- data.tar.gz.sig +0 -0
- metadata +38 -12
- metadata.gz.sig +0 -0
- data/lib/karafka/routing/consumer_mapper.rb +0 -23
- data/lib/karafka/serialization/json/deserializer.rb +0 -19
- data/lib/karafka/time_trackers/partition_usage.rb +0 -56
@@ -30,8 +30,11 @@ module Karafka
|
|
30
30
|
#
|
31
31
|
# @param subscription_group [Karafka::Routing::SubscriptionGroup] subscription group
|
32
32
|
# with all the configuration details needed for us to create a client
|
33
|
+
# @param batch_poll_breaker [Proc] proc that when evaluated to false will cause the batch
|
34
|
+
# poll loop to finish early. This improves the shutdown and dynamic multiplication as it
|
35
|
+
# allows us to early break on long polls.
|
33
36
|
# @return [Karafka::Connection::Client]
|
34
|
-
def initialize(subscription_group)
|
37
|
+
def initialize(subscription_group, batch_poll_breaker)
|
35
38
|
@id = SecureRandom.hex(6)
|
36
39
|
# Name is set when we build consumer
|
37
40
|
@name = ''
|
@@ -41,7 +44,14 @@ module Karafka
|
|
41
44
|
@tick_interval = ::Karafka::App.config.internal.tick_interval
|
42
45
|
@rebalance_manager = RebalanceManager.new(@subscription_group.id)
|
43
46
|
@rebalance_callback = Instrumentation::Callbacks::Rebalance.new(@subscription_group)
|
44
|
-
|
47
|
+
|
48
|
+
@interval_runner = Helpers::IntervalRunner.new do
|
49
|
+
events_poll
|
50
|
+
# events poller returns nil when not running often enough, hence we don't use the
|
51
|
+
# boolean to be explicit
|
52
|
+
batch_poll_breaker.call ? :run : :stop
|
53
|
+
end
|
54
|
+
|
45
55
|
# There are few operations that can happen in parallel from the listener threads as well
|
46
56
|
# as from the workers. They are not fully thread-safe because they may be composed out of
|
47
57
|
# few calls to Kafka or out of few internal state changes. That is why we mutex them.
|
@@ -98,7 +108,8 @@ module Karafka
|
|
98
108
|
break
|
99
109
|
end
|
100
110
|
|
101
|
-
|
111
|
+
# If we were signaled from the outside to break the loop, we should
|
112
|
+
break if @interval_runner.call == :stop
|
102
113
|
|
103
114
|
# Track time spent on all of the processing and polling
|
104
115
|
time_poll.checkpoint
|
@@ -295,7 +306,7 @@ module Karafka
|
|
295
306
|
) do
|
296
307
|
close
|
297
308
|
|
298
|
-
@
|
309
|
+
@interval_runner.reset
|
299
310
|
@closed = false
|
300
311
|
@paused_tpls.clear
|
301
312
|
end
|
@@ -344,7 +355,7 @@ module Karafka
|
|
344
355
|
# @note It is recommended to use this only on rebalances to get positions with metadata
|
345
356
|
# when working with metadata as this is synchronous
|
346
357
|
def committed(tpl = nil)
|
347
|
-
|
358
|
+
@wrapped_kafka.committed(tpl)
|
348
359
|
end
|
349
360
|
|
350
361
|
private
|
@@ -356,13 +367,7 @@ module Karafka
|
|
356
367
|
# @param metadata [String, nil] offset storage metadata or nil if none
|
357
368
|
# @return [Boolean] true if we could store the offset (if we still own the partition)
|
358
369
|
def internal_store_offset(message, metadata)
|
359
|
-
|
360
|
-
true
|
361
|
-
rescue Rdkafka::RdkafkaError => e
|
362
|
-
return false if e.code == :assignment_lost
|
363
|
-
return false if e.code == :state
|
364
|
-
|
365
|
-
raise e
|
370
|
+
@wrapped_kafka.store_offset(message, metadata)
|
366
371
|
end
|
367
372
|
|
368
373
|
# Non thread-safe message committing method
|
@@ -372,23 +377,7 @@ module Karafka
|
|
372
377
|
# even when no stored, because with sync commit, it refreshes the ownership state of the
|
373
378
|
# consumer in a sync way.
|
374
379
|
def internal_commit_offsets(async: true)
|
375
|
-
|
376
|
-
|
377
|
-
true
|
378
|
-
rescue Rdkafka::RdkafkaError => e
|
379
|
-
case e.code
|
380
|
-
when :assignment_lost
|
381
|
-
return false
|
382
|
-
when :unknown_member_id
|
383
|
-
return false
|
384
|
-
when :no_offset
|
385
|
-
return true
|
386
|
-
when :coordinator_load_in_progress
|
387
|
-
sleep(1)
|
388
|
-
retry
|
389
|
-
end
|
390
|
-
|
391
|
-
raise e
|
380
|
+
@wrapped_kafka.commit_offsets(async: async)
|
392
381
|
end
|
393
382
|
|
394
383
|
# Non-mutexed seek that should be used only internally. Outside we expose `#seek` that is
|
@@ -409,12 +398,10 @@ module Karafka
|
|
409
398
|
message.partition => message.offset
|
410
399
|
)
|
411
400
|
|
412
|
-
proxy = Proxy.new(kafka)
|
413
|
-
|
414
401
|
# Now we can overwrite the seek message offset with our resolved offset and we can
|
415
402
|
# then seek to the appropriate message
|
416
403
|
# We set the timeout to 2_000 to make sure that remote clusters handle this well
|
417
|
-
real_offsets =
|
404
|
+
real_offsets = @wrapped_kafka.offsets_for_times(tpl)
|
418
405
|
detected_partition = real_offsets.to_h.dig(message.topic, message.partition)
|
419
406
|
|
420
407
|
# There always needs to be an offset. In case we seek into the future, where there
|
@@ -445,12 +432,16 @@ module Karafka
|
|
445
432
|
|
446
433
|
return unless @kafka
|
447
434
|
|
435
|
+
sg_id = @subscription_group.id
|
436
|
+
|
448
437
|
# Remove callbacks runners that were registered
|
449
|
-
::Karafka::Core::Instrumentation.statistics_callbacks.delete(
|
450
|
-
::Karafka::Core::Instrumentation.error_callbacks.delete(
|
438
|
+
::Karafka::Core::Instrumentation.statistics_callbacks.delete(sg_id)
|
439
|
+
::Karafka::Core::Instrumentation.error_callbacks.delete(sg_id)
|
440
|
+
::Karafka::Core::Instrumentation.oauthbearer_token_refresh_callbacks.delete(sg_id)
|
451
441
|
|
452
442
|
kafka.close
|
453
443
|
@kafka = nil
|
444
|
+
@wrapped_kafka = nil
|
454
445
|
@buffer.clear
|
455
446
|
# @note We do not clear rebalance manager here as we may still have revocation info
|
456
447
|
# here that we want to consider valid prior to running another reconnection
|
@@ -571,6 +562,12 @@ module Karafka
|
|
571
562
|
|
572
563
|
# No sense in retrying when no topic/partition and we're no longer running
|
573
564
|
retryable = false unless Karafka::App.running?
|
565
|
+
# If we detect the end of partition which can happen if `enable.partition.eof` is set to
|
566
|
+
# true, we can just return nil fast. This will fast yield whatever set of messages we
|
567
|
+
# already have instead of waiting. This can be used for better latency control when we do
|
568
|
+
# not expect a lof of lag and want to quickly move to processing.
|
569
|
+
when :partition_eof
|
570
|
+
return nil
|
574
571
|
end
|
575
572
|
|
576
573
|
if early_report || !retryable
|
@@ -614,7 +611,8 @@ module Karafka
|
|
614
611
|
# new messages. This allows us to report statistics while data is still being processed
|
615
612
|
config.consumer_poll_set = false
|
616
613
|
|
617
|
-
|
614
|
+
# Do not start native kafka so we can inject the oauth bearer callbacks if needed
|
615
|
+
consumer = config.consumer(native_kafka_auto_start: false)
|
618
616
|
@name = consumer.name
|
619
617
|
|
620
618
|
# Register statistics runner for this particular type of callbacks
|
@@ -637,9 +635,21 @@ module Karafka
|
|
637
635
|
)
|
638
636
|
)
|
639
637
|
|
638
|
+
::Karafka::Core::Instrumentation.oauthbearer_token_refresh_callbacks.add(
|
639
|
+
@subscription_group.id,
|
640
|
+
Instrumentation::Callbacks::OauthbearerTokenRefresh.new(
|
641
|
+
consumer
|
642
|
+
)
|
643
|
+
)
|
644
|
+
|
640
645
|
# Subscription needs to happen after we assigned the rebalance callbacks just in case of
|
641
646
|
# a race condition
|
642
|
-
|
647
|
+
subscriptions = @subscription_group.subscriptions
|
648
|
+
assignments = @subscription_group.assignments(consumer)
|
649
|
+
|
650
|
+
consumer.subscribe(*subscriptions) if subscriptions
|
651
|
+
consumer.assign(assignments) if assignments
|
652
|
+
|
643
653
|
consumer
|
644
654
|
end
|
645
655
|
|
@@ -659,7 +669,15 @@ module Karafka
|
|
659
669
|
|
660
670
|
# @return [Rdkafka::Consumer] librdkafka consumer instance
|
661
671
|
def kafka
|
662
|
-
@kafka
|
672
|
+
return @kafka if @kafka
|
673
|
+
|
674
|
+
@kafka = build_consumer
|
675
|
+
@wrapped_kafka = Proxy.new(@kafka)
|
676
|
+
# We start it only after everything is configured so oauth or any other early-run client
|
677
|
+
# related operations can occur. Otherwise, if all kafka referencing setup would not be
|
678
|
+
# done, we could not intercept the invocations to kafka via client methods.
|
679
|
+
@kafka.start
|
680
|
+
@kafka
|
663
681
|
end
|
664
682
|
end
|
665
683
|
end
|
@@ -36,7 +36,7 @@ module Karafka
|
|
36
36
|
@subscription_group = subscription_group
|
37
37
|
@jobs_queue = jobs_queue
|
38
38
|
@coordinators = Processing::CoordinatorsBuffer.new(subscription_group.topics)
|
39
|
-
@client = Client.new(@subscription_group)
|
39
|
+
@client = Client.new(@subscription_group, -> { running? })
|
40
40
|
@executors = Processing::ExecutorsBuffer.new(@client, subscription_group)
|
41
41
|
@jobs_builder = proc_config.jobs_builder
|
42
42
|
@partitioner = proc_config.partitioner_class.new(subscription_group)
|
@@ -46,11 +46,18 @@ module Karafka
|
|
46
46
|
# We can do this that way because we always first schedule jobs using messages before we
|
47
47
|
# fetch another batch.
|
48
48
|
@messages_buffer = MessagesBuffer.new(subscription_group)
|
49
|
-
@usage_tracker = TimeTrackers::PartitionUsage.new
|
50
49
|
@mutex = Mutex.new
|
51
50
|
@status = Status.new
|
52
51
|
|
53
52
|
@jobs_queue.register(@subscription_group.id)
|
53
|
+
|
54
|
+
# This makes sure that even if we tick more often than the interval time due to frequent
|
55
|
+
# unlocks from short-lived jobs or async queues synchronization, events handling and jobs
|
56
|
+
# scheduling still happens with the expected frequency
|
57
|
+
@interval_runner = Helpers::IntervalRunner.new do
|
58
|
+
@events_poller.call
|
59
|
+
@scheduler.on_manage
|
60
|
+
end
|
54
61
|
end
|
55
62
|
|
56
63
|
# Runs the main listener fetch loop.
|
@@ -103,7 +110,8 @@ module Karafka
|
|
103
110
|
end
|
104
111
|
|
105
112
|
@status.start!
|
106
|
-
|
113
|
+
|
114
|
+
async_call("karafka.listener##{@subscription_group.id}")
|
107
115
|
end
|
108
116
|
|
109
117
|
# Stops the jobs queue, triggers shutdown on all the executors (sync), commits offsets and
|
@@ -277,15 +285,14 @@ module Karafka
|
|
277
285
|
|
278
286
|
revoked_partitions.each do |topic, partitions|
|
279
287
|
partitions.each do |partition|
|
280
|
-
@usage_tracker.revoke(topic, partition)
|
281
288
|
@coordinators.revoke(topic, partition)
|
282
289
|
|
283
290
|
# There may be a case where we have lost partition of which data we have never
|
284
291
|
# processed (if it was assigned and revoked really fast), thus we may not have it
|
285
292
|
# here. In cases like this, we do not run a revocation job
|
286
293
|
@executors.find_all(topic, partition).each do |executor|
|
287
|
-
|
288
|
-
jobs <<
|
294
|
+
executor.coordinator.increment(:revoked)
|
295
|
+
jobs << @jobs_builder.revoked(executor)
|
289
296
|
end
|
290
297
|
|
291
298
|
# We need to remove all the executors of a given topic partition that we have lost, so
|
@@ -308,6 +315,7 @@ module Karafka
|
|
308
315
|
jobs = []
|
309
316
|
|
310
317
|
@executors.each do |executor|
|
318
|
+
executor.coordinator.increment(:shutdown)
|
311
319
|
job = @jobs_builder.shutdown(executor)
|
312
320
|
jobs << job
|
313
321
|
end
|
@@ -328,8 +336,6 @@ module Karafka
|
|
328
336
|
idle_jobs = []
|
329
337
|
|
330
338
|
@messages_buffer.each do |topic, partition, messages|
|
331
|
-
@usage_tracker.track(topic, partition)
|
332
|
-
|
333
339
|
coordinator = @coordinators.find_or_create(topic, partition)
|
334
340
|
# Start work coordination for this topic partition
|
335
341
|
coordinator.start(messages)
|
@@ -337,12 +343,13 @@ module Karafka
|
|
337
343
|
# We do not increment coordinator for idle job because it's not a user related one
|
338
344
|
# and it will not go through a standard lifecycle. Same applies to revoked and shutdown
|
339
345
|
if messages.empty?
|
346
|
+
coordinator.increment(:idle)
|
340
347
|
executor = @executors.find_or_create(topic, partition, 0, coordinator)
|
341
348
|
idle_jobs << @jobs_builder.idle(executor)
|
342
349
|
else
|
343
350
|
@partitioner.call(topic, messages, coordinator) do |group_id, partition_messages|
|
351
|
+
coordinator.increment(:consume)
|
344
352
|
executor = @executors.find_or_create(topic, partition, group_id, coordinator)
|
345
|
-
coordinator.increment
|
346
353
|
consume_jobs << @jobs_builder.consume(executor, partition_messages)
|
347
354
|
end
|
348
355
|
end
|
@@ -391,11 +398,11 @@ module Karafka
|
|
391
398
|
interval = topic.periodic_job.interval
|
392
399
|
|
393
400
|
partitions.each do |partition|
|
394
|
-
# Skip if we were operating on a given topic partition recently
|
395
|
-
next if @usage_tracker.active?(topic_name, partition, interval)
|
396
|
-
|
397
401
|
coordinator = @coordinators.find_or_create(topic_name, partition)
|
398
402
|
|
403
|
+
# Skip if we were operating on a given topic partition recently
|
404
|
+
next if coordinator.active_within?(interval)
|
405
|
+
|
399
406
|
# Do not tick if we do not want to tick during pauses
|
400
407
|
next if coordinator.paused? && !topic.periodic_job.during_pause?
|
401
408
|
|
@@ -405,10 +412,8 @@ module Karafka
|
|
405
412
|
# run (ok) but attempt 1 means, there was an error and we will retry
|
406
413
|
next if coordinator.attempt.positive? && !topic.periodic_job.during_retry?
|
407
414
|
|
408
|
-
# Track so we do not run periodic job again too soon
|
409
|
-
@usage_tracker.track(topic_name, partition)
|
410
|
-
|
411
415
|
@executors.find_all_or_create(topic_name, partition, coordinator).each do |executor|
|
416
|
+
coordinator.increment(:periodic)
|
412
417
|
jobs << @jobs_builder.periodic(executor)
|
413
418
|
end
|
414
419
|
end
|
@@ -423,8 +428,7 @@ module Karafka
|
|
423
428
|
# Waits for all the jobs from a given subscription group to finish before moving forward
|
424
429
|
def wait
|
425
430
|
@jobs_queue.wait(@subscription_group.id) do
|
426
|
-
@
|
427
|
-
@scheduler.on_manage
|
431
|
+
@interval_runner.call
|
428
432
|
end
|
429
433
|
end
|
430
434
|
|
@@ -462,6 +466,7 @@ module Karafka
|
|
462
466
|
@events_poller.reset
|
463
467
|
@client.reset
|
464
468
|
@coordinators.reset
|
469
|
+
@interval_runner.reset
|
465
470
|
@executors = Processing::ExecutorsBuffer.new(@client, @subscription_group)
|
466
471
|
end
|
467
472
|
end
|
@@ -11,12 +11,16 @@ module Karafka
|
|
11
11
|
# it would be needed
|
12
12
|
class Proxy < SimpleDelegator
|
13
13
|
# Errors on which we want to retry
|
14
|
-
|
14
|
+
# Includes temporary errors related to node not being (or not yet being) coordinator or a
|
15
|
+
# leader to a given set of partitions. Usually goes away after a retry
|
16
|
+
RETRYABLE_DEFAULT_ERRORS = %i[
|
15
17
|
all_brokers_down
|
16
18
|
timed_out
|
19
|
+
not_coordinator
|
20
|
+
not_leader_for_partition
|
17
21
|
].freeze
|
18
22
|
|
19
|
-
private_constant :
|
23
|
+
private_constant :RETRYABLE_DEFAULT_ERRORS
|
20
24
|
|
21
25
|
attr_accessor :wrapped
|
22
26
|
|
@@ -42,6 +46,9 @@ module Karafka
|
|
42
46
|
def query_watermark_offsets(topic, partition)
|
43
47
|
l_config = @config.query_watermark_offsets
|
44
48
|
|
49
|
+
# For newly created topics or in cases where we're trying to get them but there is no
|
50
|
+
# leader, this can fail. It happens more often for new topics under KRaft, however we
|
51
|
+
# still want to make sure things operate as expected even then
|
45
52
|
with_broker_errors_retry(
|
46
53
|
# required to be in seconds, not ms
|
47
54
|
wait_time: l_config.wait_time / 1_000.to_f,
|
@@ -85,6 +92,87 @@ module Karafka
|
|
85
92
|
end
|
86
93
|
end
|
87
94
|
|
95
|
+
# When we cannot store an offset, it means we no longer own the partition
|
96
|
+
#
|
97
|
+
# Non thread-safe offset storing method
|
98
|
+
# @param message [Karafka::Messages::Message]
|
99
|
+
# @param metadata [String, nil] offset storage metadata or nil if none
|
100
|
+
# @return [Boolean] true if we could store the offset (if we still own the partition)
|
101
|
+
def store_offset(message, metadata = nil)
|
102
|
+
@wrapped.store_offset(message, metadata)
|
103
|
+
|
104
|
+
true
|
105
|
+
rescue Rdkafka::RdkafkaError => e
|
106
|
+
return false if e.code == :assignment_lost
|
107
|
+
return false if e.code == :state
|
108
|
+
|
109
|
+
raise e
|
110
|
+
end
|
111
|
+
|
112
|
+
# Non thread-safe message committing method
|
113
|
+
# @param tpl [Rdkafka::Consumer::TopicPartitionList, nil] tpl or nil
|
114
|
+
# @param async [Boolean] should the commit happen async or sync (async by default)
|
115
|
+
# @return [Boolean] true if offset commit worked, false if we've lost the assignment
|
116
|
+
# @note We do **not** consider `no_offset` as any problem and we allow to commit offsets
|
117
|
+
# even when no stored, because with sync commit, it refreshes the ownership state of the
|
118
|
+
# consumer in a sync way.
|
119
|
+
def commit_offsets(tpl = nil, async: true)
|
120
|
+
c_config = @config.commit
|
121
|
+
|
122
|
+
with_broker_errors_retry(
|
123
|
+
wait_time: c_config.wait_time / 1_000.to_f,
|
124
|
+
max_attempts: c_config.max_attempts
|
125
|
+
) do
|
126
|
+
@wrapped.commit(tpl, async)
|
127
|
+
end
|
128
|
+
|
129
|
+
true
|
130
|
+
rescue Rdkafka::RdkafkaError => e
|
131
|
+
case e.code
|
132
|
+
when :assignment_lost
|
133
|
+
return false
|
134
|
+
when :unknown_member_id
|
135
|
+
return false
|
136
|
+
when :no_offset
|
137
|
+
return true
|
138
|
+
when :coordinator_load_in_progress
|
139
|
+
sleep(1)
|
140
|
+
retry
|
141
|
+
end
|
142
|
+
|
143
|
+
raise e
|
144
|
+
end
|
145
|
+
|
146
|
+
# @param tpl [Rdkafka::Consumer::TopicPartitionList] list of topics and partitions for which
|
147
|
+
# we want to get the lag on the defined CG
|
148
|
+
# @return [Hash<String, Hash>] hash with topics and their partitions lags
|
149
|
+
def lag(tpl)
|
150
|
+
l_config = @config.committed
|
151
|
+
|
152
|
+
with_broker_errors_retry(
|
153
|
+
# required to be in seconds, not ms
|
154
|
+
wait_time: l_config.wait_time / 1_000.to_f,
|
155
|
+
max_attempts: l_config.max_attempts
|
156
|
+
) do
|
157
|
+
@wrapped.lag(tpl, l_config.timeout)
|
158
|
+
end
|
159
|
+
end
|
160
|
+
|
161
|
+
# @param topic_name [String, nil] Name of the topic we're interested in or nil if we want to
|
162
|
+
# get info on all topics
|
163
|
+
# @return [Rdkafka::Metadata] rdkafka metadata object with the requested details
|
164
|
+
def metadata(topic_name = nil)
|
165
|
+
m_config = @config.metadata
|
166
|
+
|
167
|
+
with_broker_errors_retry(
|
168
|
+
# required to be in seconds, not ms
|
169
|
+
wait_time: m_config.wait_time / 1_000.to_f,
|
170
|
+
max_attempts: m_config.max_attempts
|
171
|
+
) do
|
172
|
+
@wrapped.metadata(topic_name, m_config.timeout)
|
173
|
+
end
|
174
|
+
end
|
175
|
+
|
88
176
|
private
|
89
177
|
|
90
178
|
# Runs expected block of code with few retries on all_brokers_down
|
@@ -94,13 +182,14 @@ module Karafka
|
|
94
182
|
# completely.
|
95
183
|
# @param wait_time [Integer, Float] how many seconds should we wait. It uses `#sleep` of Ruby
|
96
184
|
# so it needs time in seconds.
|
97
|
-
|
185
|
+
# @param errors [Array<Symbol>] rdkafka errors we want to retry on
|
186
|
+
def with_broker_errors_retry(max_attempts:, wait_time: 1, errors: RETRYABLE_DEFAULT_ERRORS)
|
98
187
|
attempt ||= 0
|
99
188
|
attempt += 1
|
100
189
|
|
101
190
|
yield
|
102
191
|
rescue Rdkafka::RdkafkaError => e
|
103
|
-
raise unless
|
192
|
+
raise unless errors.include?(e.code)
|
104
193
|
|
105
194
|
if attempt <= max_attempts
|
106
195
|
sleep(wait_time)
|
@@ -5,6 +5,11 @@ module Karafka
|
|
5
5
|
module Connection
|
6
6
|
# Listener connection status representation
|
7
7
|
class Status
|
8
|
+
include Helpers::ConfigImporter.new(
|
9
|
+
monitor: %i[monitor],
|
10
|
+
conductor: %i[internal connection conductor]
|
11
|
+
)
|
12
|
+
|
8
13
|
# Available states and their transitions.
|
9
14
|
STATES = {
|
10
15
|
pending: :pending!,
|
@@ -26,7 +31,8 @@ module Karafka
|
|
26
31
|
return if @status && STATES.keys.index(:#{state}) <= STATES.keys.index(@status)
|
27
32
|
|
28
33
|
@status = :#{state}
|
29
|
-
|
34
|
+
conductor.signal
|
35
|
+
monitor.instrument("connection.listener.#{state}", caller: self)
|
30
36
|
end
|
31
37
|
end
|
32
38
|
|
@@ -39,7 +45,6 @@ module Karafka
|
|
39
45
|
|
40
46
|
def initialize
|
41
47
|
@mutex = Mutex.new
|
42
|
-
@conductor = Karafka::App.config.internal.connection.conductor
|
43
48
|
pending!
|
44
49
|
end
|
45
50
|
|
@@ -49,11 +54,18 @@ module Karafka
|
|
49
54
|
def stop!
|
50
55
|
if pending?
|
51
56
|
@status = :stopping
|
57
|
+
conductor.signal
|
58
|
+
monitor.instrument('connection.listener.stopping', caller: self)
|
59
|
+
|
52
60
|
stopped!
|
53
61
|
elsif stopped?
|
54
62
|
nil
|
63
|
+
elsif stopping?
|
64
|
+
nil
|
55
65
|
else
|
56
66
|
@status = :stopping
|
67
|
+
conductor.signal
|
68
|
+
monitor.instrument('connection.listener.stopping', caller: self)
|
57
69
|
end
|
58
70
|
end
|
59
71
|
|
@@ -25,13 +25,14 @@ module Karafka
|
|
25
25
|
|
26
26
|
required(:client_id) { |val| val.is_a?(String) && Contracts::TOPIC_REGEXP.match?(val) }
|
27
27
|
required(:concurrency) { |val| val.is_a?(Integer) && val.positive? }
|
28
|
-
required(:consumer_mapper) { |val| !val.nil? }
|
29
28
|
required(:consumer_persistence) { |val| [true, false].include?(val) }
|
30
29
|
required(:pause_timeout) { |val| val.is_a?(Integer) && val.positive? }
|
31
30
|
required(:pause_max_timeout) { |val| val.is_a?(Integer) && val.positive? }
|
32
31
|
required(:pause_with_exponential_backoff) { |val| [true, false].include?(val) }
|
32
|
+
required(:strict_topics_namespacing) { |val| [true, false].include?(val) }
|
33
33
|
required(:shutdown_timeout) { |val| val.is_a?(Integer) && val.positive? }
|
34
34
|
required(:max_wait_time) { |val| val.is_a?(Integer) && val.positive? }
|
35
|
+
required(:group_id) { |val| val.is_a?(String) && Contracts::TOPIC_REGEXP.match?(val) }
|
35
36
|
required(:kafka) { |val| val.is_a?(Hash) && !val.empty? }
|
36
37
|
|
37
38
|
nested(:swarm) do
|
@@ -39,6 +40,12 @@ module Karafka
|
|
39
40
|
required(:node) { |val| val == false || val.is_a?(Karafka::Swarm::Node) }
|
40
41
|
end
|
41
42
|
|
43
|
+
nested(:oauth) do
|
44
|
+
required(:token_provider_listener) do |val|
|
45
|
+
val == false || val.respond_to?(:on_oauthbearer_token_refresh)
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
42
49
|
nested(:admin) do
|
43
50
|
# Can be empty because inherits values from the root kafka
|
44
51
|
required(:kafka) { |val| val.is_a?(Hash) }
|
@@ -74,11 +81,17 @@ module Karafka
|
|
74
81
|
required(:conductor) { |val| !val.nil? }
|
75
82
|
|
76
83
|
nested(:proxy) do
|
84
|
+
nested(:commit) do
|
85
|
+
required(:max_attempts) { |val| val.is_a?(Integer) && val.positive? }
|
86
|
+
required(:wait_time) { |val| val.is_a?(Integer) && val.positive? }
|
87
|
+
end
|
88
|
+
|
77
89
|
# All of them have the same requirements
|
78
90
|
%i[
|
79
91
|
query_watermark_offsets
|
80
92
|
offsets_for_times
|
81
93
|
committed
|
94
|
+
metadata
|
82
95
|
].each do |scope|
|
83
96
|
nested(scope) do
|
84
97
|
required(:timeout) { |val| val.is_a?(Integer) && val.positive? }
|
@@ -12,7 +12,7 @@ module Karafka
|
|
12
12
|
).fetch('en').fetch('validations').fetch('topic')
|
13
13
|
end
|
14
14
|
|
15
|
-
required(:
|
15
|
+
required(:deserializers) { |val| !val.nil? }
|
16
16
|
required(:id) { |val| val.is_a?(String) && Contracts::TOPIC_REGEXP.match?(val) }
|
17
17
|
required(:kafka) { |val| val.is_a?(Hash) && !val.empty? }
|
18
18
|
required(:max_messages) { |val| val.is_a?(Integer) && val >= 1 }
|
@@ -0,0 +1,15 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Karafka
|
4
|
+
module Deserializers
|
5
|
+
# Default message headers deserializer
|
6
|
+
class Headers
|
7
|
+
# @param metadata [Karafka::Messages::Metadata] metadata object from which we obtain the
|
8
|
+
# `#raw_headers`
|
9
|
+
# @return [Hash] expected message headers hash
|
10
|
+
def call(metadata)
|
11
|
+
metadata.raw_headers
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Karafka
|
4
|
+
module Deserializers
|
5
|
+
# Default message key deserializer
|
6
|
+
class Key
|
7
|
+
# @param metadata [Karafka::Messages::Metadata] metadata object from which we obtain the
|
8
|
+
# `#raw_key`
|
9
|
+
# @return [String, nil] expected message key in a string format or nil if no key
|
10
|
+
def call(metadata)
|
11
|
+
metadata.raw_key
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
@@ -0,0 +1,16 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Karafka
|
4
|
+
# Module for all supported by default deserializers.
|
5
|
+
module Deserializers
|
6
|
+
# Default Karafka Json deserializer for loading JSON data in payload.
|
7
|
+
class Payload
|
8
|
+
# @param message [Karafka::Messages::Message] Message object that we want to deserialize
|
9
|
+
# @return [Hash] hash with deserialized JSON data
|
10
|
+
def call(message)
|
11
|
+
# nil payload can be present for example for tombstone messages
|
12
|
+
message.raw_payload.nil? ? nil : ::JSON.parse(message.raw_payload)
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
data/lib/karafka/embedded.rb
CHANGED
@@ -22,16 +22,19 @@ module Karafka
|
|
22
22
|
def included(base)
|
23
23
|
base.extend ::Forwardable
|
24
24
|
|
25
|
-
base.def_delegators :@thread, :join, :terminate, :alive
|
25
|
+
base.def_delegators :@thread, :join, :terminate, :alive?, :name
|
26
26
|
end
|
27
27
|
end
|
28
28
|
|
29
29
|
# Runs the `#call` method in a new thread
|
30
|
-
|
30
|
+
# @param thread_name [String] name that we want to assign to the thread when we start it
|
31
|
+
def async_call(thread_name = '')
|
31
32
|
MUTEX.synchronize do
|
32
33
|
return if @thread&.alive?
|
33
34
|
|
34
35
|
@thread = Thread.new do
|
36
|
+
Thread.current.name = thread_name
|
37
|
+
|
35
38
|
Thread.current.abort_on_exception = true
|
36
39
|
|
37
40
|
call
|
@@ -21,6 +21,12 @@ module Karafka
|
|
21
21
|
def yellow(string)
|
22
22
|
"\033[1;33m#{string}\033[0m"
|
23
23
|
end
|
24
|
+
|
25
|
+
# @param string [String] string we want to have in grey
|
26
|
+
# @return [String] grey string
|
27
|
+
def grey(string)
|
28
|
+
"\e[38;5;244m#{string}\e[0m"
|
29
|
+
end
|
24
30
|
end
|
25
31
|
end
|
26
32
|
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Karafka
|
4
|
+
module Instrumentation
|
5
|
+
module Callbacks
|
6
|
+
# Callback that is triggered when oauth token needs to be refreshed.
|
7
|
+
class OauthbearerTokenRefresh
|
8
|
+
# @param bearer [Rdkafka::Consumer, Rdkafka::Admin] given rdkafka instance. It is needed as
|
9
|
+
# we need to have a reference to call `#oauthbearer_set_token` or
|
10
|
+
# `#oauthbearer_set_token_failure` upon the event.
|
11
|
+
def initialize(bearer)
|
12
|
+
@bearer = bearer
|
13
|
+
end
|
14
|
+
|
15
|
+
# @param _rd_config [Rdkafka::Config]
|
16
|
+
# @param bearer_name [String] name of the bearer for which we refresh
|
17
|
+
def call(_rd_config, bearer_name)
|
18
|
+
return unless @bearer.name == bearer_name
|
19
|
+
|
20
|
+
::Karafka.monitor.instrument(
|
21
|
+
'oauthbearer.token_refresh',
|
22
|
+
bearer: @bearer,
|
23
|
+
caller: self
|
24
|
+
)
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|