karafka 2.2.11 → 2.2.13
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +2 -4
- data/CHANGELOG.md +12 -0
- data/Gemfile.lock +13 -13
- data/config/locales/errors.yml +3 -1
- data/docker-compose.yml +1 -1
- data/karafka.gemspec +2 -2
- data/lib/karafka/connection/client.rb +77 -11
- data/lib/karafka/connection/consumer_group_coordinator.rb +3 -3
- data/lib/karafka/connection/listener.rb +30 -7
- data/lib/karafka/connection/listeners_batch.rb +6 -1
- data/lib/karafka/contracts/config.rb +5 -1
- data/lib/karafka/helpers/interval_runner.rb +39 -0
- data/lib/karafka/instrumentation/notifications.rb +1 -0
- data/lib/karafka/instrumentation/vendors/datadog/logger_listener.rb +1 -9
- data/lib/karafka/pro/loader.rb +2 -1
- data/lib/karafka/pro/processing/coordinator.rb +12 -6
- data/lib/karafka/pro/processing/jobs_queue.rb +109 -0
- data/lib/karafka/pro/processing/scheduler.rb +2 -3
- data/lib/karafka/pro/processing/strategies/default.rb +2 -0
- data/lib/karafka/pro/processing/strategies/lrj/default.rb +9 -0
- data/lib/karafka/pro/processing/strategies/vp/default.rb +8 -4
- data/lib/karafka/processing/coordinator.rb +13 -7
- data/lib/karafka/processing/inline_insights/consumer.rb +2 -0
- data/lib/karafka/processing/jobs_queue.rb +41 -13
- data/lib/karafka/processing/scheduler.rb +19 -3
- data/lib/karafka/processing/strategies/default.rb +2 -0
- data/lib/karafka/processing/timed_queue.rb +62 -0
- data/lib/karafka/routing/builder.rb +32 -17
- data/lib/karafka/routing/subscription_group.rb +11 -6
- data/lib/karafka/runner.rb +1 -1
- data/lib/karafka/setup/config.rb +13 -1
- data/lib/karafka/version.rb +1 -1
- data/lib/karafka.rb +0 -1
- data.tar.gz.sig +0 -0
- metadata +9 -6
- metadata.gz.sig +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4056d72f0d37ac46c52597ebcfed87de031f9f250d57a64ec5c665d3423a3087
|
4
|
+
data.tar.gz: 95aeab42e351043873d548a5289e8355fe48fa7b7f27aaf1549a220c76eac9c1
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8e41da4dff00dc3cb9749874568a275cdad81b7a762182cee7ea497bfe373dd1b3f777dd40638d0c30ff13f50c5913cdcad175edcc8b9b36a3e26fb5658fc986
|
7
|
+
data.tar.gz: 738352dea20404d42a80340c2fc27359d54185565e8069f8245662e02d33c8630ce7922c3938b06b07e5587bd007342c65439229484ed529ae050e356872f150
|
checksums.yaml.gz.sig
CHANGED
@@ -1,4 +1,2 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
U ���a+��fC��&�=7R�Gމ�_���ʘ���&d���q]���S68\W�?7�z� ��I�b�ԚC&D��tG�<��yF���O��I��j���?������O�t�J|
|
4
|
-
� :��=R��6kh�8��3h�E
|
1
|
+
y�0���tf�n�gG���a�+�4[�]"V��u�L��?�!����@h�8��bŶg�����)�t�l��GBn���4�6�q�<�P��3��#�����Ϗ�71.7��w@=d�������Ā�%|J��.O�x{�a����f����*5�#Aݶ��[�/�e�qޙcJ���[��w��慻��f:��D.�����"�z�� `�����R�����ۑ�7~�;�
|
2
|
+
@�����~��B�ࢭ]A�8S8�Z����������>UzZA ��!}JY����ߵHi�>����r7B�֥����p��m��@I�SX�u�[c��=Ef�Kh`s�UR�����yL.���%M}a���?Q��oR%۳�{[~-^(��
|
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,17 @@
|
|
1
1
|
# Karafka framework changelog
|
2
2
|
|
3
|
+
## 2.2.13 (2023-11-17)
|
4
|
+
- **[Feature]** Introduce low-level extended Scheduling API for granular control of schedulers and jobs execution [Pro].
|
5
|
+
- [Improvement] Use separate lock for user-facing synchronization.
|
6
|
+
- [Improvement] Instrument `consumer.before_enqueue`.
|
7
|
+
- [Improvement] Limit usage of `concurrent-ruby` (plan to remove it as a dependency fully)
|
8
|
+
- [Improvement] Provide `#synchronize` API same as in VPs for LRJs to allow for lifecycle events and consumption synchronization.
|
9
|
+
|
10
|
+
## 2.2.12 (2023-11-09)
|
11
|
+
- [Improvement] Rewrite the polling engine to update statistics and error callbacks despite longer non LRJ processing or long `max_wait_time` setups. This change provides stability to the statistics and background error emitting making them time-reliable.
|
12
|
+
- [Improvement] Auto-update Inline Insights if new insights are present for all consumers and not only LRJ (OSS and Pro).
|
13
|
+
- [Improvement] Alias `#insights` with `#inline_insights` and `#insights?` with `#inline_insights?`
|
14
|
+
|
3
15
|
## 2.2.11 (2023-11-03)
|
4
16
|
- [Improvement] Allow marking as consumed in the user `#synchronize` block.
|
5
17
|
- [Improvement] Make whole Pro VP marking as consumed concurrency safe for both async and sync scenarios.
|
data/Gemfile.lock
CHANGED
@@ -1,18 +1,18 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
karafka (2.2.
|
5
|
-
karafka-core (>= 2.2.
|
6
|
-
waterdrop (>= 2.6.
|
4
|
+
karafka (2.2.13)
|
5
|
+
karafka-core (>= 2.2.7, < 2.3.0)
|
6
|
+
waterdrop (>= 2.6.11, < 3.0.0)
|
7
7
|
zeitwerk (~> 2.3)
|
8
8
|
|
9
9
|
GEM
|
10
10
|
remote: https://rubygems.org/
|
11
11
|
specs:
|
12
|
-
activejob (7.1.
|
13
|
-
activesupport (= 7.1.
|
12
|
+
activejob (7.1.2)
|
13
|
+
activesupport (= 7.1.2)
|
14
14
|
globalid (>= 0.3.6)
|
15
|
-
activesupport (7.1.
|
15
|
+
activesupport (7.1.2)
|
16
16
|
base64
|
17
17
|
bigdecimal
|
18
18
|
concurrent-ruby (~> 1.0, >= 1.0.2)
|
@@ -22,14 +22,14 @@ GEM
|
|
22
22
|
minitest (>= 5.1)
|
23
23
|
mutex_m
|
24
24
|
tzinfo (~> 2.0)
|
25
|
-
base64 (0.
|
25
|
+
base64 (0.2.0)
|
26
26
|
bigdecimal (3.1.4)
|
27
27
|
byebug (11.1.3)
|
28
28
|
concurrent-ruby (1.2.2)
|
29
29
|
connection_pool (2.4.1)
|
30
30
|
diff-lcs (1.5.0)
|
31
31
|
docile (1.4.0)
|
32
|
-
drb (2.
|
32
|
+
drb (2.2.0)
|
33
33
|
ruby2_keywords
|
34
34
|
erubi (1.12.0)
|
35
35
|
factory_bot (6.3.0)
|
@@ -39,10 +39,10 @@ GEM
|
|
39
39
|
activesupport (>= 6.1)
|
40
40
|
i18n (1.14.1)
|
41
41
|
concurrent-ruby (~> 1.0)
|
42
|
-
karafka-core (2.2.
|
42
|
+
karafka-core (2.2.7)
|
43
43
|
concurrent-ruby (>= 1.1)
|
44
|
-
karafka-rdkafka (>= 0.13.
|
45
|
-
karafka-rdkafka (0.
|
44
|
+
karafka-rdkafka (>= 0.13.9, < 0.15.0)
|
45
|
+
karafka-rdkafka (0.14.0)
|
46
46
|
ffi (~> 1.15)
|
47
47
|
mini_portile2 (~> 2.6)
|
48
48
|
rake (> 12)
|
@@ -54,10 +54,10 @@ GEM
|
|
54
54
|
tilt (~> 2.0)
|
55
55
|
mini_portile2 (2.8.5)
|
56
56
|
minitest (5.20.0)
|
57
|
-
mutex_m (0.
|
57
|
+
mutex_m (0.2.0)
|
58
58
|
rack (3.0.8)
|
59
59
|
rake (13.1.0)
|
60
|
-
roda (3.
|
60
|
+
roda (3.74.0)
|
61
61
|
rack
|
62
62
|
rspec (3.12.0)
|
63
63
|
rspec-core (~> 3.12.0)
|
data/config/locales/errors.yml
CHANGED
@@ -16,7 +16,8 @@ en:
|
|
16
16
|
max_wait_time_format: needs to be an integer bigger than 0
|
17
17
|
kafka_format: needs to be a filled hash
|
18
18
|
internal.processing.jobs_builder_format: cannot be nil
|
19
|
-
internal.processing.
|
19
|
+
internal.processing.jobs_queue_class_format: cannot be nil
|
20
|
+
internal.processing.scheduler_class_format: cannot be nil
|
20
21
|
internal.processing.coordinator_class_format: cannot be nil
|
21
22
|
internal.processing.partitioner_class_format: cannot be nil
|
22
23
|
internal.processing.strategy_selector_format: cannot be nil
|
@@ -26,6 +27,7 @@ en:
|
|
26
27
|
internal.active_job.consumer_class: cannot be nil
|
27
28
|
internal.status_format: needs to be present
|
28
29
|
internal.process_format: needs to be present
|
30
|
+
internal.tick_interval_format: needs to be an integer bigger or equal to 1000
|
29
31
|
internal.routing.builder_format: needs to be present
|
30
32
|
internal.routing.subscription_groups_builder_format: needs to be present
|
31
33
|
internal.connection.proxy.query_watermark_offsets.timeout_format: needs to be an integer bigger than 0
|
data/docker-compose.yml
CHANGED
data/karafka.gemspec
CHANGED
@@ -21,8 +21,8 @@ Gem::Specification.new do |spec|
|
|
21
21
|
without having to focus on things that are not your business domain.
|
22
22
|
DESC
|
23
23
|
|
24
|
-
spec.add_dependency 'karafka-core', '>= 2.2.
|
25
|
-
spec.add_dependency 'waterdrop', '>= 2.6.
|
24
|
+
spec.add_dependency 'karafka-core', '>= 2.2.7', '< 2.3.0'
|
25
|
+
spec.add_dependency 'waterdrop', '>= 2.6.11', '< 3.0.0'
|
26
26
|
spec.add_dependency 'zeitwerk', '~> 2.3'
|
27
27
|
|
28
28
|
if $PROGRAM_NAME.end_with?('gem')
|
@@ -43,11 +43,13 @@ module Karafka
|
|
43
43
|
@closed = false
|
44
44
|
@subscription_group = subscription_group
|
45
45
|
@buffer = RawMessagesBuffer.new
|
46
|
+
@tick_interval = ::Karafka::App.config.internal.tick_interval
|
46
47
|
@rebalance_manager = RebalanceManager.new(@subscription_group.id)
|
47
48
|
@rebalance_callback = Instrumentation::Callbacks::Rebalance.new(
|
48
49
|
@subscription_group.id,
|
49
50
|
@subscription_group.consumer_group.id
|
50
51
|
)
|
52
|
+
@events_poller = Helpers::IntervalRunner.new { events_poll }
|
51
53
|
@kafka = build_consumer
|
52
54
|
# There are few operations that can happen in parallel from the listener threads as well
|
53
55
|
# as from the workers. They are not fully thread-safe because they may be composed out of
|
@@ -64,6 +66,8 @@ module Karafka
|
|
64
66
|
|
65
67
|
# Fetches messages within boundaries defined by the settings (time, size, topics, etc).
|
66
68
|
#
|
69
|
+
# Also periodically runs the events polling to trigger events callbacks.
|
70
|
+
#
|
67
71
|
# @return [Karafka::Connection::MessagesBuffer] messages buffer that holds messages per topic
|
68
72
|
# partition
|
69
73
|
# @note This method should not be executed from many threads at the same time
|
@@ -73,38 +77,46 @@ module Karafka
|
|
73
77
|
@buffer.clear
|
74
78
|
@rebalance_manager.clear
|
75
79
|
|
80
|
+
events_poll
|
81
|
+
|
76
82
|
loop do
|
77
83
|
time_poll.start
|
78
84
|
|
79
85
|
# Don't fetch more messages if we do not have any time left
|
80
86
|
break if time_poll.exceeded?
|
81
|
-
# Don't fetch more messages if we've fetched max
|
87
|
+
# Don't fetch more messages if we've fetched max that we've wanted
|
82
88
|
break if @buffer.size >= @subscription_group.max_messages
|
83
89
|
|
84
90
|
# Fetch message within our time boundaries
|
85
|
-
|
91
|
+
response = poll(time_poll.remaining)
|
86
92
|
|
87
93
|
# Put a message to the buffer if there is one
|
88
|
-
@buffer <<
|
94
|
+
@buffer << response if response && response != :tick_time
|
89
95
|
|
90
96
|
# Upon polling rebalance manager might have been updated.
|
91
97
|
# If partition revocation happens, we need to remove messages from revoked partitions
|
92
98
|
# as well as ensure we do not have duplicated due to the offset reset for partitions
|
93
99
|
# that we got assigned
|
100
|
+
#
|
94
101
|
# We also do early break, so the information about rebalance is used as soon as possible
|
95
102
|
if @rebalance_manager.changed?
|
103
|
+
# Since rebalances do not occur often, we can run events polling as well without
|
104
|
+
# any throttling
|
105
|
+
events_poll
|
96
106
|
remove_revoked_and_duplicated_messages
|
97
107
|
break
|
98
108
|
end
|
99
109
|
|
110
|
+
@events_poller.call
|
111
|
+
|
100
112
|
# Track time spent on all of the processing and polling
|
101
113
|
time_poll.checkpoint
|
102
114
|
|
103
115
|
# Finally once we've (potentially) removed revoked, etc, if no messages were returned
|
104
|
-
# we can break.
|
116
|
+
# and it was not an early poll exist, we can break.
|
105
117
|
# Worth keeping in mind, that the rebalance manager might have been updated despite no
|
106
118
|
# messages being returned during a poll
|
107
|
-
break unless
|
119
|
+
break unless response
|
108
120
|
end
|
109
121
|
|
110
122
|
@buffer
|
@@ -299,22 +311,38 @@ module Karafka
|
|
299
311
|
def reset
|
300
312
|
close
|
301
313
|
|
314
|
+
@events_poller.reset
|
302
315
|
@closed = false
|
303
316
|
@paused_tpls.clear
|
304
317
|
@kafka = build_consumer
|
305
318
|
end
|
306
319
|
|
307
|
-
# Runs a single poll ignoring all the potential errors
|
320
|
+
# Runs a single poll on the main queue and consumer queue ignoring all the potential errors
|
308
321
|
# This is used as a keep-alive in the shutdown stage and any errors that happen here are
|
309
322
|
# irrelevant from the shutdown process perspective
|
310
323
|
#
|
311
|
-
# This is used only to trigger rebalance callbacks
|
324
|
+
# This is used only to trigger rebalance callbacks and other callbacks
|
312
325
|
def ping
|
326
|
+
events_poll(100)
|
313
327
|
poll(100)
|
314
328
|
rescue Rdkafka::RdkafkaError
|
315
329
|
nil
|
316
330
|
end
|
317
331
|
|
332
|
+
# Triggers the rdkafka main queue events by consuming this queue. This is not the consumer
|
333
|
+
# consumption queue but the one with:
|
334
|
+
# - error callbacks
|
335
|
+
# - stats callbacks
|
336
|
+
# - OAUTHBEARER token refresh callbacks
|
337
|
+
#
|
338
|
+
# @param timeout [Integer] number of milliseconds to wait on events or 0 not to wait.
|
339
|
+
#
|
340
|
+
# @note It is non-blocking when timeout 0 and will not wait if queue empty. It costs up to
|
341
|
+
# 2ms when no callbacks are triggered.
|
342
|
+
def events_poll(timeout = 0)
|
343
|
+
@kafka.events_poll(timeout)
|
344
|
+
end
|
345
|
+
|
318
346
|
private
|
319
347
|
|
320
348
|
# When we cannot store an offset, it means we no longer own the partition
|
@@ -464,18 +492,52 @@ module Karafka
|
|
464
492
|
@kafka.position(tpl).to_h.fetch(topic).first.offset || -1
|
465
493
|
end
|
466
494
|
|
467
|
-
# Performs a single poll operation and handles retries and
|
495
|
+
# Performs a single poll operation and handles retries and errors
|
496
|
+
#
|
497
|
+
# Keep in mind, that this timeout will be limited by a tick interval value, because we cannot
|
498
|
+
# block on a single poll longer than that. Otherwise our events polling would not be able to
|
499
|
+
# run frequently enough. This means, that even if you provide big value, it will not block
|
500
|
+
# for that long. This is anyhow compensated by the `#batch_poll` that can run for extended
|
501
|
+
# period of time but will run events polling frequently while waiting for the requested total
|
502
|
+
# time.
|
468
503
|
#
|
469
|
-
# @param timeout [Integer] timeout for a single poll
|
470
|
-
# @return [Rdkafka::Consumer::Message, nil] fetched message
|
504
|
+
# @param timeout [Integer] timeout for a single poll.
|
505
|
+
# @return [Rdkafka::Consumer::Message, nil, Symbol] fetched message, nil if nothing polled
|
506
|
+
# within the time we had or symbol indicating the early return reason
|
471
507
|
def poll(timeout)
|
472
508
|
time_poll ||= TimeTrackers::Poll.new(timeout)
|
473
509
|
|
474
510
|
return nil if time_poll.exceeded?
|
475
511
|
|
476
512
|
time_poll.start
|
513
|
+
remaining = time_poll.remaining
|
514
|
+
|
515
|
+
# We should not run a single poll longer than the tick frequency. Otherwise during a single
|
516
|
+
# `#batch_poll` we would not be able to run `#events_poll` often enough effectively
|
517
|
+
# blocking events from being handled.
|
518
|
+
poll_tick = timeout > @tick_interval ? @tick_interval : timeout
|
519
|
+
|
520
|
+
result = @kafka.poll(poll_tick)
|
521
|
+
|
522
|
+
# If we've got a message, we can return it
|
523
|
+
return result if result
|
524
|
+
|
525
|
+
time_poll.checkpoint
|
526
|
+
|
527
|
+
# We need to check if we have used all the allocated time as depending on the outcome, the
|
528
|
+
# batch loop behavior will differ. Using all time means, that we had nothing to do as no
|
529
|
+
# messages were present but if we did not exceed total time, it means we can still try
|
530
|
+
# polling again as we are withing user expected max wait time
|
531
|
+
used = remaining - time_poll.remaining
|
532
|
+
|
533
|
+
# In case we did not use enough time, it means that an internal event occured that means
|
534
|
+
# that something has changed without messages being published. For example a rebalance.
|
535
|
+
# In cases like this we finish early as well
|
536
|
+
return nil if used < poll_tick
|
477
537
|
|
478
|
-
|
538
|
+
# If we did not exceed total time allocated, it means that we finished because of the
|
539
|
+
# tick interval time limitations and not because time run out without any data
|
540
|
+
time_poll.exceeded? ? nil : :tick_time
|
479
541
|
rescue ::Rdkafka::RdkafkaError => e
|
480
542
|
early_report = false
|
481
543
|
|
@@ -535,6 +597,10 @@ module Karafka
|
|
535
597
|
::Rdkafka::Config.logger = ::Karafka::App.config.logger
|
536
598
|
config = ::Rdkafka::Config.new(@subscription_group.kafka)
|
537
599
|
config.consumer_rebalance_listener = @rebalance_callback
|
600
|
+
# We want to manage the events queue independently from the messages queue. Thanks to that
|
601
|
+
# we can ensure, that we get statistics and errors often enough even when not polling
|
602
|
+
# new messages. This allows us to report statistics while data is still being processed
|
603
|
+
config.consumer_poll_set = false
|
538
604
|
|
539
605
|
consumer = config.consumer
|
540
606
|
@name = consumer.name
|
@@ -16,7 +16,7 @@ module Karafka
|
|
16
16
|
class ConsumerGroupCoordinator
|
17
17
|
# @param group_size [Integer] number of separate subscription groups in a consumer group
|
18
18
|
def initialize(group_size)
|
19
|
-
@
|
19
|
+
@shutdown_mutex = Mutex.new
|
20
20
|
@group_size = group_size
|
21
21
|
@finished = Set.new
|
22
22
|
end
|
@@ -30,12 +30,12 @@ module Karafka
|
|
30
30
|
# @return [Boolean] can we start shutdown on a given listener
|
31
31
|
# @note If true, will also obtain a lock so no-one else will be closing the same time we do
|
32
32
|
def shutdown?
|
33
|
-
finished? && @
|
33
|
+
finished? && @shutdown_mutex.try_lock
|
34
34
|
end
|
35
35
|
|
36
36
|
# Unlocks the shutdown lock
|
37
37
|
def unlock
|
38
|
-
@
|
38
|
+
@shutdown_mutex.unlock if @shutdown_mutex.owned?
|
39
39
|
end
|
40
40
|
|
41
41
|
# Marks given listener as finished
|
@@ -14,11 +14,18 @@ module Karafka
|
|
14
14
|
# @return [String] id of this listener
|
15
15
|
attr_reader :id
|
16
16
|
|
17
|
+
# How long to wait in the initial events poll. Increases chances of having the initial events
|
18
|
+
# immediately available
|
19
|
+
INITIAL_EVENTS_POLL_TIMEOUT = 100
|
20
|
+
|
21
|
+
private_constant :INITIAL_EVENTS_POLL_TIMEOUT
|
22
|
+
|
17
23
|
# @param consumer_group_coordinator [Karafka::Connection::ConsumerGroupCoordinator]
|
18
24
|
# @param subscription_group [Karafka::Routing::SubscriptionGroup]
|
19
25
|
# @param jobs_queue [Karafka::Processing::JobsQueue] queue where we should push work
|
26
|
+
# @param scheduler [Karafka::Processing::Scheduler] scheduler we want to use
|
20
27
|
# @return [Karafka::Connection::Listener] listener instance
|
21
|
-
def initialize(consumer_group_coordinator, subscription_group, jobs_queue)
|
28
|
+
def initialize(consumer_group_coordinator, subscription_group, jobs_queue, scheduler)
|
22
29
|
proc_config = ::Karafka::App.config.internal.processing
|
23
30
|
|
24
31
|
@id = SecureRandom.hex(6)
|
@@ -30,8 +37,8 @@ module Karafka
|
|
30
37
|
@executors = Processing::ExecutorsBuffer.new(@client, subscription_group)
|
31
38
|
@jobs_builder = proc_config.jobs_builder
|
32
39
|
@partitioner = proc_config.partitioner_class.new(subscription_group)
|
33
|
-
|
34
|
-
@
|
40
|
+
@scheduler = scheduler
|
41
|
+
@events_poller = Helpers::IntervalRunner.new { @client.events_poll }
|
35
42
|
# We keep one buffer for messages to preserve memory and not allocate extra objects
|
36
43
|
# We can do this that way because we always first schedule jobs using messages before we
|
37
44
|
# fetch another batch.
|
@@ -84,6 +91,15 @@ module Karafka
|
|
84
91
|
# Kafka connections / Internet connection issues / Etc. Business logic problems should not
|
85
92
|
# propagate this far.
|
86
93
|
def fetch_loop
|
94
|
+
# Run the initial events fetch to improve chances of having metrics and initial callbacks
|
95
|
+
# triggers on start.
|
96
|
+
#
|
97
|
+
# In theory this may slow down the initial boot but we limit it up to 100ms, so it should
|
98
|
+
# not have a big initial impact. It may not be enough but Karafka does not give the boot
|
99
|
+
# warranties of statistics or other callbacks being immediately available, hence this is
|
100
|
+
# a fair trade-off
|
101
|
+
@client.events_poll(INITIAL_EVENTS_POLL_TIMEOUT)
|
102
|
+
|
87
103
|
# Run the main loop as long as we are not stopping or moving into quiet mode
|
88
104
|
until Karafka::App.done?
|
89
105
|
Karafka.monitor.instrument(
|
@@ -227,7 +243,7 @@ module Karafka
|
|
227
243
|
end
|
228
244
|
end
|
229
245
|
|
230
|
-
@scheduler.schedule_revocation(
|
246
|
+
@scheduler.schedule_revocation(jobs)
|
231
247
|
end
|
232
248
|
|
233
249
|
# Enqueues the shutdown jobs for all the executors that exist in our subscription group
|
@@ -240,7 +256,7 @@ module Karafka
|
|
240
256
|
jobs << job
|
241
257
|
end
|
242
258
|
|
243
|
-
@scheduler.schedule_shutdown(
|
259
|
+
@scheduler.schedule_shutdown(jobs)
|
244
260
|
end
|
245
261
|
|
246
262
|
# Polls messages within the time and amount boundaries defined in the settings and then
|
@@ -282,12 +298,15 @@ module Karafka
|
|
282
298
|
|
283
299
|
jobs.each(&:before_enqueue)
|
284
300
|
|
285
|
-
@scheduler.schedule_consumption(
|
301
|
+
@scheduler.schedule_consumption(jobs)
|
286
302
|
end
|
287
303
|
|
288
304
|
# Waits for all the jobs from a given subscription group to finish before moving forward
|
289
305
|
def wait
|
290
|
-
@jobs_queue.wait(@subscription_group.id)
|
306
|
+
@jobs_queue.wait(@subscription_group.id) do
|
307
|
+
@events_poller.call
|
308
|
+
@scheduler.manage
|
309
|
+
end
|
291
310
|
end
|
292
311
|
|
293
312
|
# Waits without blocking the polling
|
@@ -303,6 +322,8 @@ module Karafka
|
|
303
322
|
def wait_pinging(wait_until:, after_ping: -> {})
|
304
323
|
until wait_until.call
|
305
324
|
@client.ping
|
325
|
+
@scheduler.manage
|
326
|
+
|
306
327
|
after_ping.call
|
307
328
|
sleep(0.2)
|
308
329
|
end
|
@@ -318,6 +339,8 @@ module Karafka
|
|
318
339
|
# resetting.
|
319
340
|
@jobs_queue.wait(@subscription_group.id)
|
320
341
|
@jobs_queue.clear(@subscription_group.id)
|
342
|
+
@scheduler.clear(@subscription_group.id)
|
343
|
+
@events_poller.reset
|
321
344
|
@client.reset
|
322
345
|
@coordinators.reset
|
323
346
|
@executors = Processing::ExecutorsBuffer.new(@client, @subscription_group)
|
@@ -11,6 +11,10 @@ module Karafka
|
|
11
11
|
# @param jobs_queue [JobsQueue]
|
12
12
|
# @return [ListenersBatch]
|
13
13
|
def initialize(jobs_queue)
|
14
|
+
# We need one scheduler for all the listeners because in case of complex schedulers, they
|
15
|
+
# should be able to distribute work whenever any work is done in any of the listeners
|
16
|
+
scheduler = App.config.internal.processing.scheduler_class.new(jobs_queue)
|
17
|
+
|
14
18
|
@coordinators = []
|
15
19
|
|
16
20
|
@batch = App.subscription_groups.flat_map do |_consumer_group, subscription_groups|
|
@@ -24,7 +28,8 @@ module Karafka
|
|
24
28
|
Connection::Listener.new(
|
25
29
|
consumer_group_coordinator,
|
26
30
|
subscription_group,
|
27
|
-
jobs_queue
|
31
|
+
jobs_queue,
|
32
|
+
scheduler
|
28
33
|
)
|
29
34
|
end
|
30
35
|
end
|
@@ -46,6 +46,9 @@ module Karafka
|
|
46
46
|
nested(:internal) do
|
47
47
|
required(:status) { |val| !val.nil? }
|
48
48
|
required(:process) { |val| !val.nil? }
|
49
|
+
# In theory this could be less than a second, however this would impact the maximum time
|
50
|
+
# of a single consumer queue poll, hence we prevent it
|
51
|
+
required(:tick_interval) { |val| val.is_a?(Integer) && val >= 1_000 }
|
49
52
|
|
50
53
|
nested(:connection) do
|
51
54
|
nested(:proxy) do
|
@@ -70,7 +73,8 @@ module Karafka
|
|
70
73
|
|
71
74
|
nested(:processing) do
|
72
75
|
required(:jobs_builder) { |val| !val.nil? }
|
73
|
-
required(:
|
76
|
+
required(:jobs_queue_class) { |val| !val.nil? }
|
77
|
+
required(:scheduler_class) { |val| !val.nil? }
|
74
78
|
required(:coordinator_class) { |val| !val.nil? }
|
75
79
|
required(:partitioner_class) { |val| !val.nil? }
|
76
80
|
required(:strategy_selector) { |val| !val.nil? }
|
@@ -0,0 +1,39 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Karafka
|
4
|
+
module Helpers
|
5
|
+
# Object responsible for running given code with a given interval. It won't run given code
|
6
|
+
# more often than with a given interval.
|
7
|
+
#
|
8
|
+
# This allows us to execute certain code only once in a while.
|
9
|
+
#
|
10
|
+
# This can be used when we have code that could be invoked often due to it being in loops
|
11
|
+
# or other places but would only slow things down if would run with each tick.
|
12
|
+
class IntervalRunner
|
13
|
+
include Karafka::Core::Helpers::Time
|
14
|
+
|
15
|
+
# @param interval [Integer] interval in ms for running the provided code. Defaults to the
|
16
|
+
# `internal.tick_interval` value
|
17
|
+
# @param block [Proc] block of code we want to run once in a while
|
18
|
+
def initialize(interval: ::Karafka::App.config.internal.tick_interval, &block)
|
19
|
+
@block = block
|
20
|
+
@interval = interval
|
21
|
+
@last_called_at = monotonic_now - @interval
|
22
|
+
end
|
23
|
+
|
24
|
+
# Runs the requested code if it was not executed previously recently
|
25
|
+
def call
|
26
|
+
return if monotonic_now - @last_called_at < @interval
|
27
|
+
|
28
|
+
@last_called_at = monotonic_now
|
29
|
+
|
30
|
+
@block.call
|
31
|
+
end
|
32
|
+
|
33
|
+
# Resets the runner, so next `#call` will run the underlying code
|
34
|
+
def reset
|
35
|
+
@last_called_at = monotonic_now - @interval
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
@@ -137,15 +137,7 @@ module Karafka
|
|
137
137
|
def push_tags
|
138
138
|
return unless Karafka.logger.respond_to?(:push_tags)
|
139
139
|
|
140
|
-
|
141
|
-
# to the older method for tags
|
142
|
-
tags = if client.respond_to?(:log_correlation)
|
143
|
-
client.log_correlation
|
144
|
-
else
|
145
|
-
client.active_correlation.to_s
|
146
|
-
end
|
147
|
-
|
148
|
-
Karafka.logger.push_tags(tags)
|
140
|
+
Karafka.logger.push_tags(client.log_correlation)
|
149
141
|
end
|
150
142
|
|
151
143
|
# Pops datadog's tags from the logger
|
data/lib/karafka/pro/loader.rb
CHANGED
@@ -84,7 +84,8 @@ module Karafka
|
|
84
84
|
|
85
85
|
icfg.processing.coordinator_class = Processing::Coordinator
|
86
86
|
icfg.processing.partitioner_class = Processing::Partitioner
|
87
|
-
icfg.processing.
|
87
|
+
icfg.processing.scheduler_class = Processing::Scheduler
|
88
|
+
icfg.processing.jobs_queue_class = Processing::JobsQueue
|
88
89
|
icfg.processing.jobs_builder = Processing::JobsBuilder.new
|
89
90
|
icfg.processing.strategy_selector = Processing::StrategySelector.new
|
90
91
|
|
@@ -21,14 +21,20 @@ module Karafka
|
|
21
21
|
|
22
22
|
def_delegators :@collapser, :collapsed?, :collapse_until!
|
23
23
|
|
24
|
-
attr_reader :filter, :virtual_offset_manager
|
24
|
+
attr_reader :filter, :virtual_offset_manager, :shared_mutex
|
25
25
|
|
26
26
|
# @param args [Object] anything the base coordinator accepts
|
27
27
|
def initialize(*args)
|
28
28
|
super
|
29
29
|
|
30
30
|
@executed = []
|
31
|
-
@
|
31
|
+
@flow_mutex = Mutex.new
|
32
|
+
# Lock for user code synchronization
|
33
|
+
# We do not want to mix coordinator lock with the user lock not to create cases where
|
34
|
+
# user imposed lock would lock the internal operations of Karafka
|
35
|
+
# This shared lock can be used by the end user as it is not used internally by the
|
36
|
+
# framework and can be used for user-facing locking
|
37
|
+
@shared_mutex = Mutex.new
|
32
38
|
@collapser = Collapser.new
|
33
39
|
@filter = FiltersApplier.new(self)
|
34
40
|
|
@@ -89,7 +95,7 @@ module Karafka
|
|
89
95
|
# Runs synchronized code once for a collective of virtual partitions prior to work being
|
90
96
|
# enqueued
|
91
97
|
def on_enqueued
|
92
|
-
@
|
98
|
+
@flow_mutex.synchronize do
|
93
99
|
return unless executable?(:on_enqueued)
|
94
100
|
|
95
101
|
yield(@last_message)
|
@@ -98,7 +104,7 @@ module Karafka
|
|
98
104
|
|
99
105
|
# Runs given code only once per all the coordinated jobs upon starting first of them
|
100
106
|
def on_started
|
101
|
-
@
|
107
|
+
@flow_mutex.synchronize do
|
102
108
|
return unless executable?(:on_started)
|
103
109
|
|
104
110
|
yield(@last_message)
|
@@ -109,7 +115,7 @@ module Karafka
|
|
109
115
|
# It runs once per all the coordinated jobs and should be used to run any type of post
|
110
116
|
# jobs coordination processing execution
|
111
117
|
def on_finished
|
112
|
-
@
|
118
|
+
@flow_mutex.synchronize do
|
113
119
|
return unless finished?
|
114
120
|
return unless executable?(:on_finished)
|
115
121
|
|
@@ -119,7 +125,7 @@ module Karafka
|
|
119
125
|
|
120
126
|
# Runs once after a partition is revoked
|
121
127
|
def on_revoked
|
122
|
-
@
|
128
|
+
@flow_mutex.synchronize do
|
123
129
|
return unless executable?(:on_revoked)
|
124
130
|
|
125
131
|
yield(@last_message)
|