karafka 2.2.11 → 2.2.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +2 -4
- data/CHANGELOG.md +12 -0
- data/Gemfile.lock +13 -13
- data/config/locales/errors.yml +3 -1
- data/docker-compose.yml +1 -1
- data/karafka.gemspec +2 -2
- data/lib/karafka/connection/client.rb +77 -11
- data/lib/karafka/connection/consumer_group_coordinator.rb +3 -3
- data/lib/karafka/connection/listener.rb +30 -7
- data/lib/karafka/connection/listeners_batch.rb +6 -1
- data/lib/karafka/contracts/config.rb +5 -1
- data/lib/karafka/helpers/interval_runner.rb +39 -0
- data/lib/karafka/instrumentation/notifications.rb +1 -0
- data/lib/karafka/instrumentation/vendors/datadog/logger_listener.rb +1 -9
- data/lib/karafka/pro/loader.rb +2 -1
- data/lib/karafka/pro/processing/coordinator.rb +12 -6
- data/lib/karafka/pro/processing/jobs_queue.rb +109 -0
- data/lib/karafka/pro/processing/scheduler.rb +2 -3
- data/lib/karafka/pro/processing/strategies/default.rb +2 -0
- data/lib/karafka/pro/processing/strategies/lrj/default.rb +9 -0
- data/lib/karafka/pro/processing/strategies/vp/default.rb +8 -4
- data/lib/karafka/processing/coordinator.rb +13 -7
- data/lib/karafka/processing/inline_insights/consumer.rb +2 -0
- data/lib/karafka/processing/jobs_queue.rb +41 -13
- data/lib/karafka/processing/scheduler.rb +19 -3
- data/lib/karafka/processing/strategies/default.rb +2 -0
- data/lib/karafka/processing/timed_queue.rb +62 -0
- data/lib/karafka/routing/builder.rb +32 -17
- data/lib/karafka/routing/subscription_group.rb +11 -6
- data/lib/karafka/runner.rb +1 -1
- data/lib/karafka/setup/config.rb +13 -1
- data/lib/karafka/version.rb +1 -1
- data/lib/karafka.rb +0 -1
- data.tar.gz.sig +0 -0
- metadata +9 -6
- metadata.gz.sig +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4056d72f0d37ac46c52597ebcfed87de031f9f250d57a64ec5c665d3423a3087
|
4
|
+
data.tar.gz: 95aeab42e351043873d548a5289e8355fe48fa7b7f27aaf1549a220c76eac9c1
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8e41da4dff00dc3cb9749874568a275cdad81b7a762182cee7ea497bfe373dd1b3f777dd40638d0c30ff13f50c5913cdcad175edcc8b9b36a3e26fb5658fc986
|
7
|
+
data.tar.gz: 738352dea20404d42a80340c2fc27359d54185565e8069f8245662e02d33c8630ce7922c3938b06b07e5587bd007342c65439229484ed529ae050e356872f150
|
checksums.yaml.gz.sig
CHANGED
@@ -1,4 +1,2 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
U ���a+��fC��&�=7R�Gމ�_���ʘ���&d���q]���S68\W�?7�z� ��I�b�ԚC&D��tG�<��yF���O��I��j���?������O�t�J|
|
4
|
-
� :��=R��6kh�8��3h�E
|
1
|
+
y�0���tf�n�gG���a�+�4[�]"V��u�L��?�!����@h�8��bŶg�����)�t�l��GBn���4�6�q�<�P��3��#�����Ϗ�71.7��w@=d�������Ā�%|J��.O�x{�a����f����*5�#Aݶ��[�/�e�qޙcJ���[��w��慻��f:��D.�����"�z�� `�����R�����ۑ�7~�;�
|
2
|
+
@�����~��B�ࢭ]A�8S8�Z����������>UzZA ��!}JY����ߵHi�>����r7B�֥����p��m��@I�SX�u�[c��=Ef�Kh`s�UR�����yL.���%M}a���?Q��oR%۳�{[~-^(��
|
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,17 @@
|
|
1
1
|
# Karafka framework changelog
|
2
2
|
|
3
|
+
## 2.2.13 (2023-11-17)
|
4
|
+
- **[Feature]** Introduce low-level extended Scheduling API for granular control of schedulers and jobs execution [Pro].
|
5
|
+
- [Improvement] Use separate lock for user-facing synchronization.
|
6
|
+
- [Improvement] Instrument `consumer.before_enqueue`.
|
7
|
+
- [Improvement] Limit usage of `concurrent-ruby` (plan to remove it as a dependency fully)
|
8
|
+
- [Improvement] Provide `#synchronize` API same as in VPs for LRJs to allow for lifecycle events and consumption synchronization.
|
9
|
+
|
10
|
+
## 2.2.12 (2023-11-09)
|
11
|
+
- [Improvement] Rewrite the polling engine to update statistics and error callbacks despite longer non LRJ processing or long `max_wait_time` setups. This change provides stability to the statistics and background error emitting making them time-reliable.
|
12
|
+
- [Improvement] Auto-update Inline Insights if new insights are present for all consumers and not only LRJ (OSS and Pro).
|
13
|
+
- [Improvement] Alias `#insights` with `#inline_insights` and `#insights?` with `#inline_insights?`
|
14
|
+
|
3
15
|
## 2.2.11 (2023-11-03)
|
4
16
|
- [Improvement] Allow marking as consumed in the user `#synchronize` block.
|
5
17
|
- [Improvement] Make whole Pro VP marking as consumed concurrency safe for both async and sync scenarios.
|
data/Gemfile.lock
CHANGED
@@ -1,18 +1,18 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
karafka (2.2.
|
5
|
-
karafka-core (>= 2.2.
|
6
|
-
waterdrop (>= 2.6.
|
4
|
+
karafka (2.2.13)
|
5
|
+
karafka-core (>= 2.2.7, < 2.3.0)
|
6
|
+
waterdrop (>= 2.6.11, < 3.0.0)
|
7
7
|
zeitwerk (~> 2.3)
|
8
8
|
|
9
9
|
GEM
|
10
10
|
remote: https://rubygems.org/
|
11
11
|
specs:
|
12
|
-
activejob (7.1.
|
13
|
-
activesupport (= 7.1.
|
12
|
+
activejob (7.1.2)
|
13
|
+
activesupport (= 7.1.2)
|
14
14
|
globalid (>= 0.3.6)
|
15
|
-
activesupport (7.1.
|
15
|
+
activesupport (7.1.2)
|
16
16
|
base64
|
17
17
|
bigdecimal
|
18
18
|
concurrent-ruby (~> 1.0, >= 1.0.2)
|
@@ -22,14 +22,14 @@ GEM
|
|
22
22
|
minitest (>= 5.1)
|
23
23
|
mutex_m
|
24
24
|
tzinfo (~> 2.0)
|
25
|
-
base64 (0.
|
25
|
+
base64 (0.2.0)
|
26
26
|
bigdecimal (3.1.4)
|
27
27
|
byebug (11.1.3)
|
28
28
|
concurrent-ruby (1.2.2)
|
29
29
|
connection_pool (2.4.1)
|
30
30
|
diff-lcs (1.5.0)
|
31
31
|
docile (1.4.0)
|
32
|
-
drb (2.
|
32
|
+
drb (2.2.0)
|
33
33
|
ruby2_keywords
|
34
34
|
erubi (1.12.0)
|
35
35
|
factory_bot (6.3.0)
|
@@ -39,10 +39,10 @@ GEM
|
|
39
39
|
activesupport (>= 6.1)
|
40
40
|
i18n (1.14.1)
|
41
41
|
concurrent-ruby (~> 1.0)
|
42
|
-
karafka-core (2.2.
|
42
|
+
karafka-core (2.2.7)
|
43
43
|
concurrent-ruby (>= 1.1)
|
44
|
-
karafka-rdkafka (>= 0.13.
|
45
|
-
karafka-rdkafka (0.
|
44
|
+
karafka-rdkafka (>= 0.13.9, < 0.15.0)
|
45
|
+
karafka-rdkafka (0.14.0)
|
46
46
|
ffi (~> 1.15)
|
47
47
|
mini_portile2 (~> 2.6)
|
48
48
|
rake (> 12)
|
@@ -54,10 +54,10 @@ GEM
|
|
54
54
|
tilt (~> 2.0)
|
55
55
|
mini_portile2 (2.8.5)
|
56
56
|
minitest (5.20.0)
|
57
|
-
mutex_m (0.
|
57
|
+
mutex_m (0.2.0)
|
58
58
|
rack (3.0.8)
|
59
59
|
rake (13.1.0)
|
60
|
-
roda (3.
|
60
|
+
roda (3.74.0)
|
61
61
|
rack
|
62
62
|
rspec (3.12.0)
|
63
63
|
rspec-core (~> 3.12.0)
|
data/config/locales/errors.yml
CHANGED
@@ -16,7 +16,8 @@ en:
|
|
16
16
|
max_wait_time_format: needs to be an integer bigger than 0
|
17
17
|
kafka_format: needs to be a filled hash
|
18
18
|
internal.processing.jobs_builder_format: cannot be nil
|
19
|
-
internal.processing.
|
19
|
+
internal.processing.jobs_queue_class_format: cannot be nil
|
20
|
+
internal.processing.scheduler_class_format: cannot be nil
|
20
21
|
internal.processing.coordinator_class_format: cannot be nil
|
21
22
|
internal.processing.partitioner_class_format: cannot be nil
|
22
23
|
internal.processing.strategy_selector_format: cannot be nil
|
@@ -26,6 +27,7 @@ en:
|
|
26
27
|
internal.active_job.consumer_class: cannot be nil
|
27
28
|
internal.status_format: needs to be present
|
28
29
|
internal.process_format: needs to be present
|
30
|
+
internal.tick_interval_format: needs to be an integer bigger or equal to 1000
|
29
31
|
internal.routing.builder_format: needs to be present
|
30
32
|
internal.routing.subscription_groups_builder_format: needs to be present
|
31
33
|
internal.connection.proxy.query_watermark_offsets.timeout_format: needs to be an integer bigger than 0
|
data/docker-compose.yml
CHANGED
data/karafka.gemspec
CHANGED
@@ -21,8 +21,8 @@ Gem::Specification.new do |spec|
|
|
21
21
|
without having to focus on things that are not your business domain.
|
22
22
|
DESC
|
23
23
|
|
24
|
-
spec.add_dependency 'karafka-core', '>= 2.2.
|
25
|
-
spec.add_dependency 'waterdrop', '>= 2.6.
|
24
|
+
spec.add_dependency 'karafka-core', '>= 2.2.7', '< 2.3.0'
|
25
|
+
spec.add_dependency 'waterdrop', '>= 2.6.11', '< 3.0.0'
|
26
26
|
spec.add_dependency 'zeitwerk', '~> 2.3'
|
27
27
|
|
28
28
|
if $PROGRAM_NAME.end_with?('gem')
|
@@ -43,11 +43,13 @@ module Karafka
|
|
43
43
|
@closed = false
|
44
44
|
@subscription_group = subscription_group
|
45
45
|
@buffer = RawMessagesBuffer.new
|
46
|
+
@tick_interval = ::Karafka::App.config.internal.tick_interval
|
46
47
|
@rebalance_manager = RebalanceManager.new(@subscription_group.id)
|
47
48
|
@rebalance_callback = Instrumentation::Callbacks::Rebalance.new(
|
48
49
|
@subscription_group.id,
|
49
50
|
@subscription_group.consumer_group.id
|
50
51
|
)
|
52
|
+
@events_poller = Helpers::IntervalRunner.new { events_poll }
|
51
53
|
@kafka = build_consumer
|
52
54
|
# There are few operations that can happen in parallel from the listener threads as well
|
53
55
|
# as from the workers. They are not fully thread-safe because they may be composed out of
|
@@ -64,6 +66,8 @@ module Karafka
|
|
64
66
|
|
65
67
|
# Fetches messages within boundaries defined by the settings (time, size, topics, etc).
|
66
68
|
#
|
69
|
+
# Also periodically runs the events polling to trigger events callbacks.
|
70
|
+
#
|
67
71
|
# @return [Karafka::Connection::MessagesBuffer] messages buffer that holds messages per topic
|
68
72
|
# partition
|
69
73
|
# @note This method should not be executed from many threads at the same time
|
@@ -73,38 +77,46 @@ module Karafka
|
|
73
77
|
@buffer.clear
|
74
78
|
@rebalance_manager.clear
|
75
79
|
|
80
|
+
events_poll
|
81
|
+
|
76
82
|
loop do
|
77
83
|
time_poll.start
|
78
84
|
|
79
85
|
# Don't fetch more messages if we do not have any time left
|
80
86
|
break if time_poll.exceeded?
|
81
|
-
# Don't fetch more messages if we've fetched max
|
87
|
+
# Don't fetch more messages if we've fetched max that we've wanted
|
82
88
|
break if @buffer.size >= @subscription_group.max_messages
|
83
89
|
|
84
90
|
# Fetch message within our time boundaries
|
85
|
-
|
91
|
+
response = poll(time_poll.remaining)
|
86
92
|
|
87
93
|
# Put a message to the buffer if there is one
|
88
|
-
@buffer <<
|
94
|
+
@buffer << response if response && response != :tick_time
|
89
95
|
|
90
96
|
# Upon polling rebalance manager might have been updated.
|
91
97
|
# If partition revocation happens, we need to remove messages from revoked partitions
|
92
98
|
# as well as ensure we do not have duplicated due to the offset reset for partitions
|
93
99
|
# that we got assigned
|
100
|
+
#
|
94
101
|
# We also do early break, so the information about rebalance is used as soon as possible
|
95
102
|
if @rebalance_manager.changed?
|
103
|
+
# Since rebalances do not occur often, we can run events polling as well without
|
104
|
+
# any throttling
|
105
|
+
events_poll
|
96
106
|
remove_revoked_and_duplicated_messages
|
97
107
|
break
|
98
108
|
end
|
99
109
|
|
110
|
+
@events_poller.call
|
111
|
+
|
100
112
|
# Track time spent on all of the processing and polling
|
101
113
|
time_poll.checkpoint
|
102
114
|
|
103
115
|
# Finally once we've (potentially) removed revoked, etc, if no messages were returned
|
104
|
-
# we can break.
|
116
|
+
# and it was not an early poll exist, we can break.
|
105
117
|
# Worth keeping in mind, that the rebalance manager might have been updated despite no
|
106
118
|
# messages being returned during a poll
|
107
|
-
break unless
|
119
|
+
break unless response
|
108
120
|
end
|
109
121
|
|
110
122
|
@buffer
|
@@ -299,22 +311,38 @@ module Karafka
|
|
299
311
|
def reset
|
300
312
|
close
|
301
313
|
|
314
|
+
@events_poller.reset
|
302
315
|
@closed = false
|
303
316
|
@paused_tpls.clear
|
304
317
|
@kafka = build_consumer
|
305
318
|
end
|
306
319
|
|
307
|
-
# Runs a single poll ignoring all the potential errors
|
320
|
+
# Runs a single poll on the main queue and consumer queue ignoring all the potential errors
|
308
321
|
# This is used as a keep-alive in the shutdown stage and any errors that happen here are
|
309
322
|
# irrelevant from the shutdown process perspective
|
310
323
|
#
|
311
|
-
# This is used only to trigger rebalance callbacks
|
324
|
+
# This is used only to trigger rebalance callbacks and other callbacks
|
312
325
|
def ping
|
326
|
+
events_poll(100)
|
313
327
|
poll(100)
|
314
328
|
rescue Rdkafka::RdkafkaError
|
315
329
|
nil
|
316
330
|
end
|
317
331
|
|
332
|
+
# Triggers the rdkafka main queue events by consuming this queue. This is not the consumer
|
333
|
+
# consumption queue but the one with:
|
334
|
+
# - error callbacks
|
335
|
+
# - stats callbacks
|
336
|
+
# - OAUTHBEARER token refresh callbacks
|
337
|
+
#
|
338
|
+
# @param timeout [Integer] number of milliseconds to wait on events or 0 not to wait.
|
339
|
+
#
|
340
|
+
# @note It is non-blocking when timeout 0 and will not wait if queue empty. It costs up to
|
341
|
+
# 2ms when no callbacks are triggered.
|
342
|
+
def events_poll(timeout = 0)
|
343
|
+
@kafka.events_poll(timeout)
|
344
|
+
end
|
345
|
+
|
318
346
|
private
|
319
347
|
|
320
348
|
# When we cannot store an offset, it means we no longer own the partition
|
@@ -464,18 +492,52 @@ module Karafka
|
|
464
492
|
@kafka.position(tpl).to_h.fetch(topic).first.offset || -1
|
465
493
|
end
|
466
494
|
|
467
|
-
# Performs a single poll operation and handles retries and
|
495
|
+
# Performs a single poll operation and handles retries and errors
|
496
|
+
#
|
497
|
+
# Keep in mind, that this timeout will be limited by a tick interval value, because we cannot
|
498
|
+
# block on a single poll longer than that. Otherwise our events polling would not be able to
|
499
|
+
# run frequently enough. This means, that even if you provide big value, it will not block
|
500
|
+
# for that long. This is anyhow compensated by the `#batch_poll` that can run for extended
|
501
|
+
# period of time but will run events polling frequently while waiting for the requested total
|
502
|
+
# time.
|
468
503
|
#
|
469
|
-
# @param timeout [Integer] timeout for a single poll
|
470
|
-
# @return [Rdkafka::Consumer::Message, nil] fetched message
|
504
|
+
# @param timeout [Integer] timeout for a single poll.
|
505
|
+
# @return [Rdkafka::Consumer::Message, nil, Symbol] fetched message, nil if nothing polled
|
506
|
+
# within the time we had or symbol indicating the early return reason
|
471
507
|
def poll(timeout)
|
472
508
|
time_poll ||= TimeTrackers::Poll.new(timeout)
|
473
509
|
|
474
510
|
return nil if time_poll.exceeded?
|
475
511
|
|
476
512
|
time_poll.start
|
513
|
+
remaining = time_poll.remaining
|
514
|
+
|
515
|
+
# We should not run a single poll longer than the tick frequency. Otherwise during a single
|
516
|
+
# `#batch_poll` we would not be able to run `#events_poll` often enough effectively
|
517
|
+
# blocking events from being handled.
|
518
|
+
poll_tick = timeout > @tick_interval ? @tick_interval : timeout
|
519
|
+
|
520
|
+
result = @kafka.poll(poll_tick)
|
521
|
+
|
522
|
+
# If we've got a message, we can return it
|
523
|
+
return result if result
|
524
|
+
|
525
|
+
time_poll.checkpoint
|
526
|
+
|
527
|
+
# We need to check if we have used all the allocated time as depending on the outcome, the
|
528
|
+
# batch loop behavior will differ. Using all time means, that we had nothing to do as no
|
529
|
+
# messages were present but if we did not exceed total time, it means we can still try
|
530
|
+
# polling again as we are withing user expected max wait time
|
531
|
+
used = remaining - time_poll.remaining
|
532
|
+
|
533
|
+
# In case we did not use enough time, it means that an internal event occured that means
|
534
|
+
# that something has changed without messages being published. For example a rebalance.
|
535
|
+
# In cases like this we finish early as well
|
536
|
+
return nil if used < poll_tick
|
477
537
|
|
478
|
-
|
538
|
+
# If we did not exceed total time allocated, it means that we finished because of the
|
539
|
+
# tick interval time limitations and not because time run out without any data
|
540
|
+
time_poll.exceeded? ? nil : :tick_time
|
479
541
|
rescue ::Rdkafka::RdkafkaError => e
|
480
542
|
early_report = false
|
481
543
|
|
@@ -535,6 +597,10 @@ module Karafka
|
|
535
597
|
::Rdkafka::Config.logger = ::Karafka::App.config.logger
|
536
598
|
config = ::Rdkafka::Config.new(@subscription_group.kafka)
|
537
599
|
config.consumer_rebalance_listener = @rebalance_callback
|
600
|
+
# We want to manage the events queue independently from the messages queue. Thanks to that
|
601
|
+
# we can ensure, that we get statistics and errors often enough even when not polling
|
602
|
+
# new messages. This allows us to report statistics while data is still being processed
|
603
|
+
config.consumer_poll_set = false
|
538
604
|
|
539
605
|
consumer = config.consumer
|
540
606
|
@name = consumer.name
|
@@ -16,7 +16,7 @@ module Karafka
|
|
16
16
|
class ConsumerGroupCoordinator
|
17
17
|
# @param group_size [Integer] number of separate subscription groups in a consumer group
|
18
18
|
def initialize(group_size)
|
19
|
-
@
|
19
|
+
@shutdown_mutex = Mutex.new
|
20
20
|
@group_size = group_size
|
21
21
|
@finished = Set.new
|
22
22
|
end
|
@@ -30,12 +30,12 @@ module Karafka
|
|
30
30
|
# @return [Boolean] can we start shutdown on a given listener
|
31
31
|
# @note If true, will also obtain a lock so no-one else will be closing the same time we do
|
32
32
|
def shutdown?
|
33
|
-
finished? && @
|
33
|
+
finished? && @shutdown_mutex.try_lock
|
34
34
|
end
|
35
35
|
|
36
36
|
# Unlocks the shutdown lock
|
37
37
|
def unlock
|
38
|
-
@
|
38
|
+
@shutdown_mutex.unlock if @shutdown_mutex.owned?
|
39
39
|
end
|
40
40
|
|
41
41
|
# Marks given listener as finished
|
@@ -14,11 +14,18 @@ module Karafka
|
|
14
14
|
# @return [String] id of this listener
|
15
15
|
attr_reader :id
|
16
16
|
|
17
|
+
# How long to wait in the initial events poll. Increases chances of having the initial events
|
18
|
+
# immediately available
|
19
|
+
INITIAL_EVENTS_POLL_TIMEOUT = 100
|
20
|
+
|
21
|
+
private_constant :INITIAL_EVENTS_POLL_TIMEOUT
|
22
|
+
|
17
23
|
# @param consumer_group_coordinator [Karafka::Connection::ConsumerGroupCoordinator]
|
18
24
|
# @param subscription_group [Karafka::Routing::SubscriptionGroup]
|
19
25
|
# @param jobs_queue [Karafka::Processing::JobsQueue] queue where we should push work
|
26
|
+
# @param scheduler [Karafka::Processing::Scheduler] scheduler we want to use
|
20
27
|
# @return [Karafka::Connection::Listener] listener instance
|
21
|
-
def initialize(consumer_group_coordinator, subscription_group, jobs_queue)
|
28
|
+
def initialize(consumer_group_coordinator, subscription_group, jobs_queue, scheduler)
|
22
29
|
proc_config = ::Karafka::App.config.internal.processing
|
23
30
|
|
24
31
|
@id = SecureRandom.hex(6)
|
@@ -30,8 +37,8 @@ module Karafka
|
|
30
37
|
@executors = Processing::ExecutorsBuffer.new(@client, subscription_group)
|
31
38
|
@jobs_builder = proc_config.jobs_builder
|
32
39
|
@partitioner = proc_config.partitioner_class.new(subscription_group)
|
33
|
-
|
34
|
-
@
|
40
|
+
@scheduler = scheduler
|
41
|
+
@events_poller = Helpers::IntervalRunner.new { @client.events_poll }
|
35
42
|
# We keep one buffer for messages to preserve memory and not allocate extra objects
|
36
43
|
# We can do this that way because we always first schedule jobs using messages before we
|
37
44
|
# fetch another batch.
|
@@ -84,6 +91,15 @@ module Karafka
|
|
84
91
|
# Kafka connections / Internet connection issues / Etc. Business logic problems should not
|
85
92
|
# propagate this far.
|
86
93
|
def fetch_loop
|
94
|
+
# Run the initial events fetch to improve chances of having metrics and initial callbacks
|
95
|
+
# triggers on start.
|
96
|
+
#
|
97
|
+
# In theory this may slow down the initial boot but we limit it up to 100ms, so it should
|
98
|
+
# not have a big initial impact. It may not be enough but Karafka does not give the boot
|
99
|
+
# warranties of statistics or other callbacks being immediately available, hence this is
|
100
|
+
# a fair trade-off
|
101
|
+
@client.events_poll(INITIAL_EVENTS_POLL_TIMEOUT)
|
102
|
+
|
87
103
|
# Run the main loop as long as we are not stopping or moving into quiet mode
|
88
104
|
until Karafka::App.done?
|
89
105
|
Karafka.monitor.instrument(
|
@@ -227,7 +243,7 @@ module Karafka
|
|
227
243
|
end
|
228
244
|
end
|
229
245
|
|
230
|
-
@scheduler.schedule_revocation(
|
246
|
+
@scheduler.schedule_revocation(jobs)
|
231
247
|
end
|
232
248
|
|
233
249
|
# Enqueues the shutdown jobs for all the executors that exist in our subscription group
|
@@ -240,7 +256,7 @@ module Karafka
|
|
240
256
|
jobs << job
|
241
257
|
end
|
242
258
|
|
243
|
-
@scheduler.schedule_shutdown(
|
259
|
+
@scheduler.schedule_shutdown(jobs)
|
244
260
|
end
|
245
261
|
|
246
262
|
# Polls messages within the time and amount boundaries defined in the settings and then
|
@@ -282,12 +298,15 @@ module Karafka
|
|
282
298
|
|
283
299
|
jobs.each(&:before_enqueue)
|
284
300
|
|
285
|
-
@scheduler.schedule_consumption(
|
301
|
+
@scheduler.schedule_consumption(jobs)
|
286
302
|
end
|
287
303
|
|
288
304
|
# Waits for all the jobs from a given subscription group to finish before moving forward
|
289
305
|
def wait
|
290
|
-
@jobs_queue.wait(@subscription_group.id)
|
306
|
+
@jobs_queue.wait(@subscription_group.id) do
|
307
|
+
@events_poller.call
|
308
|
+
@scheduler.manage
|
309
|
+
end
|
291
310
|
end
|
292
311
|
|
293
312
|
# Waits without blocking the polling
|
@@ -303,6 +322,8 @@ module Karafka
|
|
303
322
|
def wait_pinging(wait_until:, after_ping: -> {})
|
304
323
|
until wait_until.call
|
305
324
|
@client.ping
|
325
|
+
@scheduler.manage
|
326
|
+
|
306
327
|
after_ping.call
|
307
328
|
sleep(0.2)
|
308
329
|
end
|
@@ -318,6 +339,8 @@ module Karafka
|
|
318
339
|
# resetting.
|
319
340
|
@jobs_queue.wait(@subscription_group.id)
|
320
341
|
@jobs_queue.clear(@subscription_group.id)
|
342
|
+
@scheduler.clear(@subscription_group.id)
|
343
|
+
@events_poller.reset
|
321
344
|
@client.reset
|
322
345
|
@coordinators.reset
|
323
346
|
@executors = Processing::ExecutorsBuffer.new(@client, @subscription_group)
|
@@ -11,6 +11,10 @@ module Karafka
|
|
11
11
|
# @param jobs_queue [JobsQueue]
|
12
12
|
# @return [ListenersBatch]
|
13
13
|
def initialize(jobs_queue)
|
14
|
+
# We need one scheduler for all the listeners because in case of complex schedulers, they
|
15
|
+
# should be able to distribute work whenever any work is done in any of the listeners
|
16
|
+
scheduler = App.config.internal.processing.scheduler_class.new(jobs_queue)
|
17
|
+
|
14
18
|
@coordinators = []
|
15
19
|
|
16
20
|
@batch = App.subscription_groups.flat_map do |_consumer_group, subscription_groups|
|
@@ -24,7 +28,8 @@ module Karafka
|
|
24
28
|
Connection::Listener.new(
|
25
29
|
consumer_group_coordinator,
|
26
30
|
subscription_group,
|
27
|
-
jobs_queue
|
31
|
+
jobs_queue,
|
32
|
+
scheduler
|
28
33
|
)
|
29
34
|
end
|
30
35
|
end
|
@@ -46,6 +46,9 @@ module Karafka
|
|
46
46
|
nested(:internal) do
|
47
47
|
required(:status) { |val| !val.nil? }
|
48
48
|
required(:process) { |val| !val.nil? }
|
49
|
+
# In theory this could be less than a second, however this would impact the maximum time
|
50
|
+
# of a single consumer queue poll, hence we prevent it
|
51
|
+
required(:tick_interval) { |val| val.is_a?(Integer) && val >= 1_000 }
|
49
52
|
|
50
53
|
nested(:connection) do
|
51
54
|
nested(:proxy) do
|
@@ -70,7 +73,8 @@ module Karafka
|
|
70
73
|
|
71
74
|
nested(:processing) do
|
72
75
|
required(:jobs_builder) { |val| !val.nil? }
|
73
|
-
required(:
|
76
|
+
required(:jobs_queue_class) { |val| !val.nil? }
|
77
|
+
required(:scheduler_class) { |val| !val.nil? }
|
74
78
|
required(:coordinator_class) { |val| !val.nil? }
|
75
79
|
required(:partitioner_class) { |val| !val.nil? }
|
76
80
|
required(:strategy_selector) { |val| !val.nil? }
|
@@ -0,0 +1,39 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Karafka
|
4
|
+
module Helpers
|
5
|
+
# Object responsible for running given code with a given interval. It won't run given code
|
6
|
+
# more often than with a given interval.
|
7
|
+
#
|
8
|
+
# This allows us to execute certain code only once in a while.
|
9
|
+
#
|
10
|
+
# This can be used when we have code that could be invoked often due to it being in loops
|
11
|
+
# or other places but would only slow things down if would run with each tick.
|
12
|
+
class IntervalRunner
|
13
|
+
include Karafka::Core::Helpers::Time
|
14
|
+
|
15
|
+
# @param interval [Integer] interval in ms for running the provided code. Defaults to the
|
16
|
+
# `internal.tick_interval` value
|
17
|
+
# @param block [Proc] block of code we want to run once in a while
|
18
|
+
def initialize(interval: ::Karafka::App.config.internal.tick_interval, &block)
|
19
|
+
@block = block
|
20
|
+
@interval = interval
|
21
|
+
@last_called_at = monotonic_now - @interval
|
22
|
+
end
|
23
|
+
|
24
|
+
# Runs the requested code if it was not executed previously recently
|
25
|
+
def call
|
26
|
+
return if monotonic_now - @last_called_at < @interval
|
27
|
+
|
28
|
+
@last_called_at = monotonic_now
|
29
|
+
|
30
|
+
@block.call
|
31
|
+
end
|
32
|
+
|
33
|
+
# Resets the runner, so next `#call` will run the underlying code
|
34
|
+
def reset
|
35
|
+
@last_called_at = monotonic_now - @interval
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
@@ -137,15 +137,7 @@ module Karafka
|
|
137
137
|
def push_tags
|
138
138
|
return unless Karafka.logger.respond_to?(:push_tags)
|
139
139
|
|
140
|
-
|
141
|
-
# to the older method for tags
|
142
|
-
tags = if client.respond_to?(:log_correlation)
|
143
|
-
client.log_correlation
|
144
|
-
else
|
145
|
-
client.active_correlation.to_s
|
146
|
-
end
|
147
|
-
|
148
|
-
Karafka.logger.push_tags(tags)
|
140
|
+
Karafka.logger.push_tags(client.log_correlation)
|
149
141
|
end
|
150
142
|
|
151
143
|
# Pops datadog's tags from the logger
|
data/lib/karafka/pro/loader.rb
CHANGED
@@ -84,7 +84,8 @@ module Karafka
|
|
84
84
|
|
85
85
|
icfg.processing.coordinator_class = Processing::Coordinator
|
86
86
|
icfg.processing.partitioner_class = Processing::Partitioner
|
87
|
-
icfg.processing.
|
87
|
+
icfg.processing.scheduler_class = Processing::Scheduler
|
88
|
+
icfg.processing.jobs_queue_class = Processing::JobsQueue
|
88
89
|
icfg.processing.jobs_builder = Processing::JobsBuilder.new
|
89
90
|
icfg.processing.strategy_selector = Processing::StrategySelector.new
|
90
91
|
|
@@ -21,14 +21,20 @@ module Karafka
|
|
21
21
|
|
22
22
|
def_delegators :@collapser, :collapsed?, :collapse_until!
|
23
23
|
|
24
|
-
attr_reader :filter, :virtual_offset_manager
|
24
|
+
attr_reader :filter, :virtual_offset_manager, :shared_mutex
|
25
25
|
|
26
26
|
# @param args [Object] anything the base coordinator accepts
|
27
27
|
def initialize(*args)
|
28
28
|
super
|
29
29
|
|
30
30
|
@executed = []
|
31
|
-
@
|
31
|
+
@flow_mutex = Mutex.new
|
32
|
+
# Lock for user code synchronization
|
33
|
+
# We do not want to mix coordinator lock with the user lock not to create cases where
|
34
|
+
# user imposed lock would lock the internal operations of Karafka
|
35
|
+
# This shared lock can be used by the end user as it is not used internally by the
|
36
|
+
# framework and can be used for user-facing locking
|
37
|
+
@shared_mutex = Mutex.new
|
32
38
|
@collapser = Collapser.new
|
33
39
|
@filter = FiltersApplier.new(self)
|
34
40
|
|
@@ -89,7 +95,7 @@ module Karafka
|
|
89
95
|
# Runs synchronized code once for a collective of virtual partitions prior to work being
|
90
96
|
# enqueued
|
91
97
|
def on_enqueued
|
92
|
-
@
|
98
|
+
@flow_mutex.synchronize do
|
93
99
|
return unless executable?(:on_enqueued)
|
94
100
|
|
95
101
|
yield(@last_message)
|
@@ -98,7 +104,7 @@ module Karafka
|
|
98
104
|
|
99
105
|
# Runs given code only once per all the coordinated jobs upon starting first of them
|
100
106
|
def on_started
|
101
|
-
@
|
107
|
+
@flow_mutex.synchronize do
|
102
108
|
return unless executable?(:on_started)
|
103
109
|
|
104
110
|
yield(@last_message)
|
@@ -109,7 +115,7 @@ module Karafka
|
|
109
115
|
# It runs once per all the coordinated jobs and should be used to run any type of post
|
110
116
|
# jobs coordination processing execution
|
111
117
|
def on_finished
|
112
|
-
@
|
118
|
+
@flow_mutex.synchronize do
|
113
119
|
return unless finished?
|
114
120
|
return unless executable?(:on_finished)
|
115
121
|
|
@@ -119,7 +125,7 @@ module Karafka
|
|
119
125
|
|
120
126
|
# Runs once after a partition is revoked
|
121
127
|
def on_revoked
|
122
|
-
@
|
128
|
+
@flow_mutex.synchronize do
|
123
129
|
return unless executable?(:on_revoked)
|
124
130
|
|
125
131
|
yield(@last_message)
|