karafka 2.2.12 → 2.2.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data/CHANGELOG.md +7 -0
- data/Gemfile.lock +9 -9
- data/config/locales/errors.yml +2 -1
- data/lib/karafka/connection/consumer_group_coordinator.rb +3 -3
- data/lib/karafka/connection/listener.rb +13 -7
- data/lib/karafka/connection/listeners_batch.rb +6 -1
- data/lib/karafka/contracts/config.rb +2 -1
- data/lib/karafka/instrumentation/notifications.rb +1 -0
- data/lib/karafka/pro/loader.rb +2 -1
- data/lib/karafka/pro/processing/coordinator.rb +12 -6
- data/lib/karafka/pro/processing/jobs_queue.rb +109 -0
- data/lib/karafka/pro/processing/scheduler.rb +2 -3
- data/lib/karafka/pro/processing/strategies/default.rb +2 -0
- data/lib/karafka/pro/processing/strategies/lrj/default.rb +9 -0
- data/lib/karafka/pro/processing/strategies/vp/default.rb +8 -4
- data/lib/karafka/processing/coordinator.rb +13 -7
- data/lib/karafka/processing/jobs_queue.rb +28 -11
- data/lib/karafka/processing/scheduler.rb +19 -3
- data/lib/karafka/processing/strategies/default.rb +2 -0
- data/lib/karafka/routing/builder.rb +32 -17
- data/lib/karafka/routing/subscription_group.rb +11 -6
- data/lib/karafka/runner.rb +1 -1
- data/lib/karafka/setup/config.rb +2 -1
- data/lib/karafka/version.rb +1 -1
- data/lib/karafka.rb +0 -1
- data.tar.gz.sig +0 -0
- metadata +3 -2
- metadata.gz.sig +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4056d72f0d37ac46c52597ebcfed87de031f9f250d57a64ec5c665d3423a3087
|
4
|
+
data.tar.gz: 95aeab42e351043873d548a5289e8355fe48fa7b7f27aaf1549a220c76eac9c1
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8e41da4dff00dc3cb9749874568a275cdad81b7a762182cee7ea497bfe373dd1b3f777dd40638d0c30ff13f50c5913cdcad175edcc8b9b36a3e26fb5658fc986
|
7
|
+
data.tar.gz: 738352dea20404d42a80340c2fc27359d54185565e8069f8245662e02d33c8630ce7922c3938b06b07e5587bd007342c65439229484ed529ae050e356872f150
|
checksums.yaml.gz.sig
CHANGED
Binary file
|
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,12 @@
|
|
1
1
|
# Karafka framework changelog
|
2
2
|
|
3
|
+
## 2.2.13 (2023-11-17)
|
4
|
+
- **[Feature]** Introduce low-level extended Scheduling API for granular control of schedulers and jobs execution [Pro].
|
5
|
+
- [Improvement] Use separate lock for user-facing synchronization.
|
6
|
+
- [Improvement] Instrument `consumer.before_enqueue`.
|
7
|
+
- [Improvement] Limit usage of `concurrent-ruby` (plan to remove it as a dependency fully)
|
8
|
+
- [Improvement] Provide `#synchronize` API same as in VPs for LRJs to allow for lifecycle events and consumption synchronization.
|
9
|
+
|
3
10
|
## 2.2.12 (2023-11-09)
|
4
11
|
- [Improvement] Rewrite the polling engine to update statistics and error callbacks despite longer non LRJ processing or long `max_wait_time` setups. This change provides stability to the statistics and background error emitting making them time-reliable.
|
5
12
|
- [Improvement] Auto-update Inline Insights if new insights are present for all consumers and not only LRJ (OSS and Pro).
|
data/Gemfile.lock
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
karafka (2.2.
|
4
|
+
karafka (2.2.13)
|
5
5
|
karafka-core (>= 2.2.7, < 2.3.0)
|
6
6
|
waterdrop (>= 2.6.11, < 3.0.0)
|
7
7
|
zeitwerk (~> 2.3)
|
@@ -9,10 +9,10 @@ PATH
|
|
9
9
|
GEM
|
10
10
|
remote: https://rubygems.org/
|
11
11
|
specs:
|
12
|
-
activejob (7.1.
|
13
|
-
activesupport (= 7.1.
|
12
|
+
activejob (7.1.2)
|
13
|
+
activesupport (= 7.1.2)
|
14
14
|
globalid (>= 0.3.6)
|
15
|
-
activesupport (7.1.
|
15
|
+
activesupport (7.1.2)
|
16
16
|
base64
|
17
17
|
bigdecimal
|
18
18
|
concurrent-ruby (~> 1.0, >= 1.0.2)
|
@@ -22,14 +22,14 @@ GEM
|
|
22
22
|
minitest (>= 5.1)
|
23
23
|
mutex_m
|
24
24
|
tzinfo (~> 2.0)
|
25
|
-
base64 (0.
|
25
|
+
base64 (0.2.0)
|
26
26
|
bigdecimal (3.1.4)
|
27
27
|
byebug (11.1.3)
|
28
28
|
concurrent-ruby (1.2.2)
|
29
29
|
connection_pool (2.4.1)
|
30
30
|
diff-lcs (1.5.0)
|
31
31
|
docile (1.4.0)
|
32
|
-
drb (2.
|
32
|
+
drb (2.2.0)
|
33
33
|
ruby2_keywords
|
34
34
|
erubi (1.12.0)
|
35
35
|
factory_bot (6.3.0)
|
@@ -42,7 +42,7 @@ GEM
|
|
42
42
|
karafka-core (2.2.7)
|
43
43
|
concurrent-ruby (>= 1.1)
|
44
44
|
karafka-rdkafka (>= 0.13.9, < 0.15.0)
|
45
|
-
karafka-rdkafka (0.
|
45
|
+
karafka-rdkafka (0.14.0)
|
46
46
|
ffi (~> 1.15)
|
47
47
|
mini_portile2 (~> 2.6)
|
48
48
|
rake (> 12)
|
@@ -54,10 +54,10 @@ GEM
|
|
54
54
|
tilt (~> 2.0)
|
55
55
|
mini_portile2 (2.8.5)
|
56
56
|
minitest (5.20.0)
|
57
|
-
mutex_m (0.
|
57
|
+
mutex_m (0.2.0)
|
58
58
|
rack (3.0.8)
|
59
59
|
rake (13.1.0)
|
60
|
-
roda (3.
|
60
|
+
roda (3.74.0)
|
61
61
|
rack
|
62
62
|
rspec (3.12.0)
|
63
63
|
rspec-core (~> 3.12.0)
|
data/config/locales/errors.yml
CHANGED
@@ -16,7 +16,8 @@ en:
|
|
16
16
|
max_wait_time_format: needs to be an integer bigger than 0
|
17
17
|
kafka_format: needs to be a filled hash
|
18
18
|
internal.processing.jobs_builder_format: cannot be nil
|
19
|
-
internal.processing.
|
19
|
+
internal.processing.jobs_queue_class_format: cannot be nil
|
20
|
+
internal.processing.scheduler_class_format: cannot be nil
|
20
21
|
internal.processing.coordinator_class_format: cannot be nil
|
21
22
|
internal.processing.partitioner_class_format: cannot be nil
|
22
23
|
internal.processing.strategy_selector_format: cannot be nil
|
@@ -16,7 +16,7 @@ module Karafka
|
|
16
16
|
class ConsumerGroupCoordinator
|
17
17
|
# @param group_size [Integer] number of separate subscription groups in a consumer group
|
18
18
|
def initialize(group_size)
|
19
|
-
@
|
19
|
+
@shutdown_mutex = Mutex.new
|
20
20
|
@group_size = group_size
|
21
21
|
@finished = Set.new
|
22
22
|
end
|
@@ -30,12 +30,12 @@ module Karafka
|
|
30
30
|
# @return [Boolean] can we start shutdown on a given listener
|
31
31
|
# @note If true, will also obtain a lock so no-one else will be closing the same time we do
|
32
32
|
def shutdown?
|
33
|
-
finished? && @
|
33
|
+
finished? && @shutdown_mutex.try_lock
|
34
34
|
end
|
35
35
|
|
36
36
|
# Unlocks the shutdown lock
|
37
37
|
def unlock
|
38
|
-
@
|
38
|
+
@shutdown_mutex.unlock if @shutdown_mutex.owned?
|
39
39
|
end
|
40
40
|
|
41
41
|
# Marks given listener as finished
|
@@ -23,8 +23,9 @@ module Karafka
|
|
23
23
|
# @param consumer_group_coordinator [Karafka::Connection::ConsumerGroupCoordinator]
|
24
24
|
# @param subscription_group [Karafka::Routing::SubscriptionGroup]
|
25
25
|
# @param jobs_queue [Karafka::Processing::JobsQueue] queue where we should push work
|
26
|
+
# @param scheduler [Karafka::Processing::Scheduler] scheduler we want to use
|
26
27
|
# @return [Karafka::Connection::Listener] listener instance
|
27
|
-
def initialize(consumer_group_coordinator, subscription_group, jobs_queue)
|
28
|
+
def initialize(consumer_group_coordinator, subscription_group, jobs_queue, scheduler)
|
28
29
|
proc_config = ::Karafka::App.config.internal.processing
|
29
30
|
|
30
31
|
@id = SecureRandom.hex(6)
|
@@ -36,8 +37,7 @@ module Karafka
|
|
36
37
|
@executors = Processing::ExecutorsBuffer.new(@client, subscription_group)
|
37
38
|
@jobs_builder = proc_config.jobs_builder
|
38
39
|
@partitioner = proc_config.partitioner_class.new(subscription_group)
|
39
|
-
|
40
|
-
@scheduler = proc_config.scheduler
|
40
|
+
@scheduler = scheduler
|
41
41
|
@events_poller = Helpers::IntervalRunner.new { @client.events_poll }
|
42
42
|
# We keep one buffer for messages to preserve memory and not allocate extra objects
|
43
43
|
# We can do this that way because we always first schedule jobs using messages before we
|
@@ -243,7 +243,7 @@ module Karafka
|
|
243
243
|
end
|
244
244
|
end
|
245
245
|
|
246
|
-
@scheduler.schedule_revocation(
|
246
|
+
@scheduler.schedule_revocation(jobs)
|
247
247
|
end
|
248
248
|
|
249
249
|
# Enqueues the shutdown jobs for all the executors that exist in our subscription group
|
@@ -256,7 +256,7 @@ module Karafka
|
|
256
256
|
jobs << job
|
257
257
|
end
|
258
258
|
|
259
|
-
@scheduler.schedule_shutdown(
|
259
|
+
@scheduler.schedule_shutdown(jobs)
|
260
260
|
end
|
261
261
|
|
262
262
|
# Polls messages within the time and amount boundaries defined in the settings and then
|
@@ -298,12 +298,15 @@ module Karafka
|
|
298
298
|
|
299
299
|
jobs.each(&:before_enqueue)
|
300
300
|
|
301
|
-
@scheduler.schedule_consumption(
|
301
|
+
@scheduler.schedule_consumption(jobs)
|
302
302
|
end
|
303
303
|
|
304
304
|
# Waits for all the jobs from a given subscription group to finish before moving forward
|
305
305
|
def wait
|
306
|
-
@jobs_queue.wait(@subscription_group.id)
|
306
|
+
@jobs_queue.wait(@subscription_group.id) do
|
307
|
+
@events_poller.call
|
308
|
+
@scheduler.manage
|
309
|
+
end
|
307
310
|
end
|
308
311
|
|
309
312
|
# Waits without blocking the polling
|
@@ -319,6 +322,8 @@ module Karafka
|
|
319
322
|
def wait_pinging(wait_until:, after_ping: -> {})
|
320
323
|
until wait_until.call
|
321
324
|
@client.ping
|
325
|
+
@scheduler.manage
|
326
|
+
|
322
327
|
after_ping.call
|
323
328
|
sleep(0.2)
|
324
329
|
end
|
@@ -334,6 +339,7 @@ module Karafka
|
|
334
339
|
# resetting.
|
335
340
|
@jobs_queue.wait(@subscription_group.id)
|
336
341
|
@jobs_queue.clear(@subscription_group.id)
|
342
|
+
@scheduler.clear(@subscription_group.id)
|
337
343
|
@events_poller.reset
|
338
344
|
@client.reset
|
339
345
|
@coordinators.reset
|
@@ -11,6 +11,10 @@ module Karafka
|
|
11
11
|
# @param jobs_queue [JobsQueue]
|
12
12
|
# @return [ListenersBatch]
|
13
13
|
def initialize(jobs_queue)
|
14
|
+
# We need one scheduler for all the listeners because in case of complex schedulers, they
|
15
|
+
# should be able to distribute work whenever any work is done in any of the listeners
|
16
|
+
scheduler = App.config.internal.processing.scheduler_class.new(jobs_queue)
|
17
|
+
|
14
18
|
@coordinators = []
|
15
19
|
|
16
20
|
@batch = App.subscription_groups.flat_map do |_consumer_group, subscription_groups|
|
@@ -24,7 +28,8 @@ module Karafka
|
|
24
28
|
Connection::Listener.new(
|
25
29
|
consumer_group_coordinator,
|
26
30
|
subscription_group,
|
27
|
-
jobs_queue
|
31
|
+
jobs_queue,
|
32
|
+
scheduler
|
28
33
|
)
|
29
34
|
end
|
30
35
|
end
|
@@ -73,7 +73,8 @@ module Karafka
|
|
73
73
|
|
74
74
|
nested(:processing) do
|
75
75
|
required(:jobs_builder) { |val| !val.nil? }
|
76
|
-
required(:
|
76
|
+
required(:jobs_queue_class) { |val| !val.nil? }
|
77
|
+
required(:scheduler_class) { |val| !val.nil? }
|
77
78
|
required(:coordinator_class) { |val| !val.nil? }
|
78
79
|
required(:partitioner_class) { |val| !val.nil? }
|
79
80
|
required(:strategy_selector) { |val| !val.nil? }
|
data/lib/karafka/pro/loader.rb
CHANGED
@@ -84,7 +84,8 @@ module Karafka
|
|
84
84
|
|
85
85
|
icfg.processing.coordinator_class = Processing::Coordinator
|
86
86
|
icfg.processing.partitioner_class = Processing::Partitioner
|
87
|
-
icfg.processing.
|
87
|
+
icfg.processing.scheduler_class = Processing::Scheduler
|
88
|
+
icfg.processing.jobs_queue_class = Processing::JobsQueue
|
88
89
|
icfg.processing.jobs_builder = Processing::JobsBuilder.new
|
89
90
|
icfg.processing.strategy_selector = Processing::StrategySelector.new
|
90
91
|
|
@@ -21,14 +21,20 @@ module Karafka
|
|
21
21
|
|
22
22
|
def_delegators :@collapser, :collapsed?, :collapse_until!
|
23
23
|
|
24
|
-
attr_reader :filter, :virtual_offset_manager
|
24
|
+
attr_reader :filter, :virtual_offset_manager, :shared_mutex
|
25
25
|
|
26
26
|
# @param args [Object] anything the base coordinator accepts
|
27
27
|
def initialize(*args)
|
28
28
|
super
|
29
29
|
|
30
30
|
@executed = []
|
31
|
-
@
|
31
|
+
@flow_mutex = Mutex.new
|
32
|
+
# Lock for user code synchronization
|
33
|
+
# We do not want to mix coordinator lock with the user lock not to create cases where
|
34
|
+
# user imposed lock would lock the internal operations of Karafka
|
35
|
+
# This shared lock can be used by the end user as it is not used internally by the
|
36
|
+
# framework and can be used for user-facing locking
|
37
|
+
@shared_mutex = Mutex.new
|
32
38
|
@collapser = Collapser.new
|
33
39
|
@filter = FiltersApplier.new(self)
|
34
40
|
|
@@ -89,7 +95,7 @@ module Karafka
|
|
89
95
|
# Runs synchronized code once for a collective of virtual partitions prior to work being
|
90
96
|
# enqueued
|
91
97
|
def on_enqueued
|
92
|
-
@
|
98
|
+
@flow_mutex.synchronize do
|
93
99
|
return unless executable?(:on_enqueued)
|
94
100
|
|
95
101
|
yield(@last_message)
|
@@ -98,7 +104,7 @@ module Karafka
|
|
98
104
|
|
99
105
|
# Runs given code only once per all the coordinated jobs upon starting first of them
|
100
106
|
def on_started
|
101
|
-
@
|
107
|
+
@flow_mutex.synchronize do
|
102
108
|
return unless executable?(:on_started)
|
103
109
|
|
104
110
|
yield(@last_message)
|
@@ -109,7 +115,7 @@ module Karafka
|
|
109
115
|
# It runs once per all the coordinated jobs and should be used to run any type of post
|
110
116
|
# jobs coordination processing execution
|
111
117
|
def on_finished
|
112
|
-
@
|
118
|
+
@flow_mutex.synchronize do
|
113
119
|
return unless finished?
|
114
120
|
return unless executable?(:on_finished)
|
115
121
|
|
@@ -119,7 +125,7 @@ module Karafka
|
|
119
125
|
|
120
126
|
# Runs once after a partition is revoked
|
121
127
|
def on_revoked
|
122
|
-
@
|
128
|
+
@flow_mutex.synchronize do
|
123
129
|
return unless executable?(:on_revoked)
|
124
130
|
|
125
131
|
yield(@last_message)
|
@@ -0,0 +1,109 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# This Karafka component is a Pro component under a commercial license.
|
4
|
+
# This Karafka component is NOT licensed under LGPL.
|
5
|
+
#
|
6
|
+
# All of the commercial components are present in the lib/karafka/pro directory of this
|
7
|
+
# repository and their usage requires commercial license agreement.
|
8
|
+
#
|
9
|
+
# Karafka has also commercial-friendly license, commercial support and commercial components.
|
10
|
+
#
|
11
|
+
# By sending a pull request to the pro components, you are agreeing to transfer the copyright of
|
12
|
+
# your code to Maciej Mensfeld.
|
13
|
+
|
14
|
+
module Karafka
|
15
|
+
module Pro
|
16
|
+
module Processing
|
17
|
+
# Enhanced processing queue that provides ability to build complex work-distribution
|
18
|
+
# schedulers dedicated to particular job types
|
19
|
+
#
|
20
|
+
# Aside from the OSS queue capabilities it allows for jobless locking for advanced schedulers
|
21
|
+
class JobsQueue < Karafka::Processing::JobsQueue
|
22
|
+
attr_accessor :in_processing
|
23
|
+
|
24
|
+
# @return [Karafka::Pro::Processing::JobsQueue]
|
25
|
+
def initialize
|
26
|
+
super
|
27
|
+
|
28
|
+
@in_waiting = Hash.new { |h, k| h[k] = [] }
|
29
|
+
|
30
|
+
@statistics[:waiting] = 0
|
31
|
+
end
|
32
|
+
|
33
|
+
# Method that allows us to lock queue on a given subscription group without enqueuing the a
|
34
|
+
# job. This can be used when building complex schedulers that want to postpone enqueuing
|
35
|
+
# before certain conditions are met.
|
36
|
+
#
|
37
|
+
# @param job [Jobs::Base] job used for locking
|
38
|
+
def lock(job)
|
39
|
+
@mutex.synchronize do
|
40
|
+
group = @in_waiting[job.group_id]
|
41
|
+
|
42
|
+
# This should never happen. Same job should not be locked twice
|
43
|
+
raise(Errors::JobsQueueSynchronizationError, job.group_id) if group.include?(job)
|
44
|
+
|
45
|
+
@statistics[:waiting] += 1
|
46
|
+
|
47
|
+
group << job
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
# Method for unlocking the given subscription group queue space that was locked with a
|
52
|
+
# given job that was **not** added to the queue but used via `#lock`.
|
53
|
+
#
|
54
|
+
# @param job [Jobs::Base] job that locked the queue
|
55
|
+
def unlock(job)
|
56
|
+
@mutex.synchronize do
|
57
|
+
@statistics[:waiting] -= 1
|
58
|
+
|
59
|
+
return if @in_waiting[job.group_id].delete(job)
|
60
|
+
|
61
|
+
# This should never happen. It means there was a job being unlocked that was never
|
62
|
+
# locked in the first place
|
63
|
+
raise(Errors::JobsQueueSynchronizationError, job.group_id)
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
# Clears the processing states for a provided group. Useful when a recovery happens and we
|
68
|
+
# need to clean up state but only for a given subscription group.
|
69
|
+
#
|
70
|
+
# @param group_id [String]
|
71
|
+
def clear(group_id)
|
72
|
+
@mutex.synchronize do
|
73
|
+
@in_processing[group_id].clear
|
74
|
+
|
75
|
+
@statistics[:waiting] -= @in_waiting[group_id].size
|
76
|
+
@in_waiting[group_id].clear
|
77
|
+
|
78
|
+
# We unlock it just in case it was blocked when clearing started
|
79
|
+
tick(group_id)
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
# @param group_id [String]
|
84
|
+
#
|
85
|
+
# @return [Boolean] tell us if we have anything in the processing (or for processing) from
|
86
|
+
# a given group.
|
87
|
+
def empty?(group_id)
|
88
|
+
@mutex.synchronize do
|
89
|
+
@in_processing[group_id].empty? &&
|
90
|
+
@in_waiting[group_id].empty?
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
94
|
+
private
|
95
|
+
|
96
|
+
# @param group_id [String] id of the group in which jobs we're interested.
|
97
|
+
# @return [Boolean] should we keep waiting or not
|
98
|
+
# @note We do not wait for non-blocking jobs. Their flow should allow for `poll` running
|
99
|
+
# as they may exceed `max.poll.interval`
|
100
|
+
def wait?(group_id)
|
101
|
+
!(
|
102
|
+
@in_processing[group_id].all?(&:non_blocking?) &&
|
103
|
+
@in_waiting[group_id].all?(&:non_blocking?)
|
104
|
+
)
|
105
|
+
end
|
106
|
+
end
|
107
|
+
end
|
108
|
+
end
|
109
|
+
end
|
@@ -27,10 +27,9 @@ module Karafka
|
|
27
27
|
class Scheduler < ::Karafka::Processing::Scheduler
|
28
28
|
# Schedules jobs in the LJF order for consumption
|
29
29
|
#
|
30
|
-
# @param queue [Karafka::Processing::JobsQueue] queue where we want to put the jobs
|
31
30
|
# @param jobs_array [Array<Karafka::Processing::Jobs::Base>] jobs we want to schedule
|
32
31
|
#
|
33
|
-
def schedule_consumption(
|
32
|
+
def schedule_consumption(jobs_array)
|
34
33
|
perf_tracker = PerformanceTracker.instance
|
35
34
|
|
36
35
|
ordered = []
|
@@ -47,7 +46,7 @@ module Karafka
|
|
47
46
|
ordered.map!(&:first)
|
48
47
|
|
49
48
|
ordered.each do |job|
|
50
|
-
queue << job
|
49
|
+
@queue << job
|
51
50
|
end
|
52
51
|
end
|
53
52
|
|
@@ -77,6 +77,15 @@ module Karafka
|
|
77
77
|
revoked
|
78
78
|
end
|
79
79
|
end
|
80
|
+
|
81
|
+
# Allows for LRJ to synchronize its work. It may be needed because LRJ can run
|
82
|
+
# lifecycle events like revocation while the LRJ work is running and there may be a
|
83
|
+
# need for a critical section.
|
84
|
+
#
|
85
|
+
# @param block [Proc] block we want to run in a mutex to prevent race-conditions
|
86
|
+
def synchronize(&block)
|
87
|
+
coordinator.shared_mutex.synchronize(&block)
|
88
|
+
end
|
80
89
|
end
|
81
90
|
end
|
82
91
|
end
|
@@ -94,13 +94,15 @@ module Karafka
|
|
94
94
|
|
95
95
|
# Allows for cross-virtual-partition consumers locks
|
96
96
|
#
|
97
|
-
# This is not needed in the non-VP flows because there is always only one
|
98
|
-
# per partition at the same time, so no coordination is needed directly for
|
99
|
-
# end users
|
97
|
+
# This is not needed in the non-VP flows except LRJ because there is always only one
|
98
|
+
# consumer per partition at the same time, so no coordination is needed directly for
|
99
|
+
# the end users. With LRJ it is needed and provided in the `LRJ::Default` strategy,
|
100
|
+
# because lifecycle events on revocation can run in parallel to the LRJ job as it is
|
101
|
+
# non-blocking.
|
100
102
|
#
|
101
103
|
# @param block [Proc] block we want to run in a mutex to prevent race-conditions
|
102
104
|
def synchronize(&block)
|
103
|
-
coordinator.synchronize(&block)
|
105
|
+
coordinator.shared_mutex.synchronize(&block)
|
104
106
|
end
|
105
107
|
|
106
108
|
private
|
@@ -111,6 +113,8 @@ module Karafka
|
|
111
113
|
# @note This can be done without the mutex, because it happens from the same thread
|
112
114
|
# for all the work (listener thread)
|
113
115
|
def handle_before_enqueue
|
116
|
+
super
|
117
|
+
|
114
118
|
coordinator.virtual_offset_manager.register(
|
115
119
|
messages.map(&:offset)
|
116
120
|
)
|
@@ -162,11 +162,24 @@ module Karafka
|
|
162
162
|
@manual_seek
|
163
163
|
end
|
164
164
|
|
165
|
+
# @param consumer [Object] karafka consumer (normal or pro)
|
166
|
+
# @return [Karafka::Processing::Result] result object which we can use to indicate
|
167
|
+
# consumption processing state.
|
168
|
+
def consumption(consumer)
|
169
|
+
@consumptions[consumer] ||= Processing::Result.new
|
170
|
+
end
|
171
|
+
|
165
172
|
# Allows to run synchronized (locked) code that can operate only from a given thread
|
166
173
|
#
|
167
174
|
# @param block [Proc] code we want to run in the synchronized mode
|
175
|
+
#
|
168
176
|
# @note We check if mutex is not owned already by the current thread so we won't end up with
|
169
177
|
# a deadlock in case user runs coordinated code from inside of his own lock
|
178
|
+
#
|
179
|
+
# @note This is internal and should **not** be used to synchronize user-facing code.
|
180
|
+
# Otherwise user indirectly could cause deadlocks or prolonged locks by running his logic.
|
181
|
+
# This can and should however be used for multi-thread strategy applications and other
|
182
|
+
# internal operations locks.
|
170
183
|
def synchronize(&block)
|
171
184
|
if @mutex.owned?
|
172
185
|
yield
|
@@ -174,13 +187,6 @@ module Karafka
|
|
174
187
|
@mutex.synchronize(&block)
|
175
188
|
end
|
176
189
|
end
|
177
|
-
|
178
|
-
# @param consumer [Object] karafka consumer (normal or pro)
|
179
|
-
# @return [Karafka::Processing::Result] result object which we can use to indicate
|
180
|
-
# consumption processing state.
|
181
|
-
def consumption(consumer)
|
182
|
-
@consumptions[consumer] ||= Processing::Result.new
|
183
|
-
end
|
184
190
|
end
|
185
191
|
end
|
186
192
|
end
|
@@ -9,6 +9,9 @@ module Karafka
|
|
9
9
|
# on this queue, that's why internally we keep track of processing per group.
|
10
10
|
#
|
11
11
|
# We work with the assumption, that partitions data is evenly distributed.
|
12
|
+
#
|
13
|
+
# @note This job queue also keeps track / understands number of busy workers. This is because
|
14
|
+
# we use a single workers poll that can have granular scheduling.
|
12
15
|
class JobsQueue
|
13
16
|
# @return [Karafka::Processing::JobsQueue]
|
14
17
|
def initialize
|
@@ -26,19 +29,14 @@ module Karafka
|
|
26
29
|
h.compute_if_absent(k) { RUBY_VERSION < '3.2' ? TimedQueue.new : Queue.new }
|
27
30
|
end
|
28
31
|
|
32
|
+
@concurrency = Karafka::App.config.concurrency
|
29
33
|
@tick_interval = ::Karafka::App.config.internal.tick_interval
|
30
34
|
@in_processing = Hash.new { |h, k| h[k] = [] }
|
35
|
+
@statistics = { busy: 0, enqueued: 0 }
|
31
36
|
|
32
37
|
@mutex = Mutex.new
|
33
38
|
end
|
34
39
|
|
35
|
-
# Returns number of jobs that are either enqueued or in processing (but not finished)
|
36
|
-
# @return [Integer] number of elements in the queue
|
37
|
-
# @note Using `#pop` won't decrease this number as only marking job as completed does this
|
38
|
-
def size
|
39
|
-
@in_processing.values.map(&:size).sum
|
40
|
-
end
|
41
|
-
|
42
40
|
# Adds the job to the internal main queue, scheduling it for execution in a worker and marks
|
43
41
|
# this job as in processing pipeline.
|
44
42
|
#
|
@@ -55,6 +53,16 @@ module Karafka
|
|
55
53
|
|
56
54
|
group << job
|
57
55
|
|
56
|
+
# Assume that moving to queue means being picked up immediately not to create stats
|
57
|
+
# race conditions because of pop overhead. If there are workers available, we assume
|
58
|
+
# work is going to be handled as we never reject enqueued jobs
|
59
|
+
if @statistics[:busy] < @concurrency
|
60
|
+
@statistics[:busy] += 1
|
61
|
+
else
|
62
|
+
# If system is fully loaded, it means this job is indeed enqueued
|
63
|
+
@statistics[:enqueued] += 1
|
64
|
+
end
|
65
|
+
|
58
66
|
@queue << job
|
59
67
|
end
|
60
68
|
end
|
@@ -80,7 +88,16 @@ module Karafka
|
|
80
88
|
# @param [Jobs::Base] job that was completed
|
81
89
|
def complete(job)
|
82
90
|
@mutex.synchronize do
|
91
|
+
# We finish one job and if there is another, we pick it up
|
92
|
+
if @statistics[:enqueued].positive?
|
93
|
+
@statistics[:enqueued] -= 1
|
94
|
+
# If no more enqueued jobs, we will be just less busy
|
95
|
+
else
|
96
|
+
@statistics[:busy] -= 1
|
97
|
+
end
|
98
|
+
|
83
99
|
@in_processing[job.group_id].delete(job)
|
100
|
+
|
84
101
|
tick(job.group_id)
|
85
102
|
end
|
86
103
|
end
|
@@ -141,10 +158,10 @@ module Karafka
|
|
141
158
|
#
|
142
159
|
# @return [Hash] hash with basic usage statistics of this queue.
|
143
160
|
def statistics
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
161
|
+
# Ensures there are no race conditions when returning this data
|
162
|
+
@mutex.synchronize do
|
163
|
+
@statistics.dup.freeze
|
164
|
+
end
|
148
165
|
end
|
149
166
|
|
150
167
|
private
|
@@ -4,19 +4,35 @@ module Karafka
|
|
4
4
|
module Processing
|
5
5
|
# FIFO scheduler for messages coming from various topics and partitions
|
6
6
|
class Scheduler
|
7
|
+
# @param queue [Karafka::Processing::JobsQueue] queue where we want to put the jobs
|
8
|
+
def initialize(queue)
|
9
|
+
@queue = queue
|
10
|
+
end
|
11
|
+
|
7
12
|
# Schedules jobs in the fifo order
|
8
13
|
#
|
9
|
-
# @param queue [Karafka::Processing::JobsQueue] queue where we want to put the jobs
|
10
14
|
# @param jobs_array [Array<Karafka::Processing::Jobs::Base>] jobs we want to schedule
|
11
|
-
def schedule_consumption(
|
15
|
+
def schedule_consumption(jobs_array)
|
12
16
|
jobs_array.each do |job|
|
13
|
-
queue << job
|
17
|
+
@queue << job
|
14
18
|
end
|
15
19
|
end
|
16
20
|
|
17
21
|
# Both revocation and shutdown jobs can also run in fifo by default
|
18
22
|
alias schedule_revocation schedule_consumption
|
19
23
|
alias schedule_shutdown schedule_consumption
|
24
|
+
|
25
|
+
# This scheduler does not have anything to manage as it is a pass through and has no state
|
26
|
+
def manage
|
27
|
+
nil
|
28
|
+
end
|
29
|
+
|
30
|
+
# This scheduler does not need to be cleared because it is stateless
|
31
|
+
#
|
32
|
+
# @param _group_id [String] Subscription group id
|
33
|
+
def clear(_group_id)
|
34
|
+
nil
|
35
|
+
end
|
20
36
|
end
|
21
37
|
end
|
22
38
|
end
|
@@ -3,20 +3,25 @@
|
|
3
3
|
module Karafka
|
4
4
|
module Routing
|
5
5
|
# Builder used as a DSL layer for building consumers and telling them which topics to consume
|
6
|
+
#
|
7
|
+
# @note We lock the access just in case this is used in patterns. The locks here do not have
|
8
|
+
# any impact on routing usage unless being expanded, so no race conditions risks.
|
9
|
+
#
|
6
10
|
# @example Build a simple (most common) route
|
7
11
|
# consumers do
|
8
12
|
# topic :new_videos do
|
9
13
|
# consumer NewVideosConsumer
|
10
14
|
# end
|
11
15
|
# end
|
12
|
-
class Builder <
|
16
|
+
class Builder < Array
|
13
17
|
# Empty default per-topic config
|
14
18
|
EMPTY_DEFAULTS = ->(_) {}.freeze
|
15
19
|
|
16
20
|
private_constant :EMPTY_DEFAULTS
|
17
21
|
|
18
22
|
def initialize
|
19
|
-
@
|
23
|
+
@mutex = Mutex.new
|
24
|
+
@draws = []
|
20
25
|
@defaults = EMPTY_DEFAULTS
|
21
26
|
super
|
22
27
|
end
|
@@ -34,21 +39,23 @@ module Karafka
|
|
34
39
|
# end
|
35
40
|
# end
|
36
41
|
def draw(&block)
|
37
|
-
@
|
42
|
+
@mutex.synchronize do
|
43
|
+
@draws << block
|
38
44
|
|
39
|
-
|
45
|
+
instance_eval(&block)
|
40
46
|
|
41
|
-
|
42
|
-
|
43
|
-
|
47
|
+
each do |consumer_group|
|
48
|
+
# Validate consumer group settings
|
49
|
+
Contracts::ConsumerGroup.new.validate!(consumer_group.to_h)
|
44
50
|
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
51
|
+
# and then its topics settings
|
52
|
+
consumer_group.topics.each do |topic|
|
53
|
+
Contracts::Topic.new.validate!(topic.to_h)
|
54
|
+
end
|
49
55
|
|
50
|
-
|
51
|
-
|
56
|
+
# Initialize subscription groups after all the routing is done
|
57
|
+
consumer_group.subscription_groups
|
58
|
+
end
|
52
59
|
end
|
53
60
|
end
|
54
61
|
|
@@ -61,9 +68,11 @@ module Karafka
|
|
61
68
|
|
62
69
|
# Clears the builder and the draws memory
|
63
70
|
def clear
|
64
|
-
@
|
65
|
-
|
66
|
-
|
71
|
+
@mutex.synchronize do
|
72
|
+
@defaults = EMPTY_DEFAULTS
|
73
|
+
@draws.clear
|
74
|
+
super
|
75
|
+
end
|
67
76
|
end
|
68
77
|
|
69
78
|
# @param block [Proc] block with per-topic evaluated defaults
|
@@ -71,7 +80,13 @@ module Karafka
|
|
71
80
|
def defaults(&block)
|
72
81
|
return @defaults unless block
|
73
82
|
|
74
|
-
@
|
83
|
+
if @mutex.owned?
|
84
|
+
@defaults = block
|
85
|
+
else
|
86
|
+
@mutex.synchronize do
|
87
|
+
@defaults = block
|
88
|
+
end
|
89
|
+
end
|
75
90
|
end
|
76
91
|
|
77
92
|
private
|
@@ -10,19 +10,24 @@ module Karafka
|
|
10
10
|
class SubscriptionGroup
|
11
11
|
attr_reader :id, :name, :topics, :kafka, :consumer_group
|
12
12
|
|
13
|
-
#
|
14
|
-
|
13
|
+
# Lock for generating new ids safely
|
14
|
+
ID_MUTEX = Mutex.new
|
15
15
|
|
16
|
-
private_constant :
|
16
|
+
private_constant :ID_MUTEX
|
17
17
|
|
18
18
|
class << self
|
19
19
|
# Generates new subscription group id that will be used in case of anonymous subscription
|
20
20
|
# groups
|
21
21
|
# @return [String] hex(6) compatible reproducible id
|
22
22
|
def id
|
23
|
-
|
24
|
-
|
25
|
-
|
23
|
+
ID_MUTEX.synchronize do
|
24
|
+
@group_counter ||= 0
|
25
|
+
@group_counter += 1
|
26
|
+
|
27
|
+
::Digest::MD5.hexdigest(
|
28
|
+
@group_counter.to_s
|
29
|
+
)[0..11]
|
30
|
+
end
|
26
31
|
end
|
27
32
|
end
|
28
33
|
|
data/lib/karafka/runner.rb
CHANGED
@@ -8,7 +8,7 @@ module Karafka
|
|
8
8
|
def call
|
9
9
|
# Despite possibility of having several independent listeners, we aim to have one queue for
|
10
10
|
# jobs across and one workers poll for that
|
11
|
-
jobs_queue =
|
11
|
+
jobs_queue = App.config.internal.processing.jobs_queue_class.new
|
12
12
|
|
13
13
|
workers = Processing::WorkersBatch.new(jobs_queue)
|
14
14
|
listeners = Connection::ListenersBatch.new(jobs_queue)
|
data/lib/karafka/setup/config.rb
CHANGED
@@ -209,8 +209,9 @@ module Karafka
|
|
209
209
|
end
|
210
210
|
|
211
211
|
setting :processing do
|
212
|
+
setting :jobs_queue_class, default: Processing::JobsQueue
|
212
213
|
# option scheduler [Object] scheduler we will be using
|
213
|
-
setting :
|
214
|
+
setting :scheduler_class, default: Processing::Scheduler
|
214
215
|
# option jobs_builder [Object] jobs builder we want to use
|
215
216
|
setting :jobs_builder, default: Processing::JobsBuilder.new
|
216
217
|
# option coordinator [Class] work coordinator we want to user for processing coordination
|
data/lib/karafka/version.rb
CHANGED
data/lib/karafka.rb
CHANGED
data.tar.gz.sig
CHANGED
Binary file
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: karafka
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.2.
|
4
|
+
version: 2.2.13
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Maciej Mensfeld
|
@@ -35,7 +35,7 @@ cert_chain:
|
|
35
35
|
AnG1dJU+yL2BK7vaVytLTstJME5mepSZ46qqIJXMuWob/YPDmVaBF39TDSG9e34s
|
36
36
|
msG3BiCqgOgHAnL23+CN3Rt8MsuRfEtoTKpJVcCfoEoNHOkc
|
37
37
|
-----END CERTIFICATE-----
|
38
|
-
date: 2023-11-
|
38
|
+
date: 2023-11-17 00:00:00.000000000 Z
|
39
39
|
dependencies:
|
40
40
|
- !ruby/object:Gem::Dependency
|
41
41
|
name: karafka-core
|
@@ -246,6 +246,7 @@ files:
|
|
246
246
|
- lib/karafka/pro/processing/jobs/consume_non_blocking.rb
|
247
247
|
- lib/karafka/pro/processing/jobs/revoked_non_blocking.rb
|
248
248
|
- lib/karafka/pro/processing/jobs_builder.rb
|
249
|
+
- lib/karafka/pro/processing/jobs_queue.rb
|
249
250
|
- lib/karafka/pro/processing/partitioner.rb
|
250
251
|
- lib/karafka/pro/processing/scheduler.rb
|
251
252
|
- lib/karafka/pro/processing/strategies.rb
|
metadata.gz.sig
CHANGED
Binary file
|