karafka 2.2.12 → 2.2.13
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data/CHANGELOG.md +7 -0
- data/Gemfile.lock +9 -9
- data/config/locales/errors.yml +2 -1
- data/lib/karafka/connection/consumer_group_coordinator.rb +3 -3
- data/lib/karafka/connection/listener.rb +13 -7
- data/lib/karafka/connection/listeners_batch.rb +6 -1
- data/lib/karafka/contracts/config.rb +2 -1
- data/lib/karafka/instrumentation/notifications.rb +1 -0
- data/lib/karafka/pro/loader.rb +2 -1
- data/lib/karafka/pro/processing/coordinator.rb +12 -6
- data/lib/karafka/pro/processing/jobs_queue.rb +109 -0
- data/lib/karafka/pro/processing/scheduler.rb +2 -3
- data/lib/karafka/pro/processing/strategies/default.rb +2 -0
- data/lib/karafka/pro/processing/strategies/lrj/default.rb +9 -0
- data/lib/karafka/pro/processing/strategies/vp/default.rb +8 -4
- data/lib/karafka/processing/coordinator.rb +13 -7
- data/lib/karafka/processing/jobs_queue.rb +28 -11
- data/lib/karafka/processing/scheduler.rb +19 -3
- data/lib/karafka/processing/strategies/default.rb +2 -0
- data/lib/karafka/routing/builder.rb +32 -17
- data/lib/karafka/routing/subscription_group.rb +11 -6
- data/lib/karafka/runner.rb +1 -1
- data/lib/karafka/setup/config.rb +2 -1
- data/lib/karafka/version.rb +1 -1
- data/lib/karafka.rb +0 -1
- data.tar.gz.sig +0 -0
- metadata +3 -2
- metadata.gz.sig +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4056d72f0d37ac46c52597ebcfed87de031f9f250d57a64ec5c665d3423a3087
|
4
|
+
data.tar.gz: 95aeab42e351043873d548a5289e8355fe48fa7b7f27aaf1549a220c76eac9c1
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8e41da4dff00dc3cb9749874568a275cdad81b7a762182cee7ea497bfe373dd1b3f777dd40638d0c30ff13f50c5913cdcad175edcc8b9b36a3e26fb5658fc986
|
7
|
+
data.tar.gz: 738352dea20404d42a80340c2fc27359d54185565e8069f8245662e02d33c8630ce7922c3938b06b07e5587bd007342c65439229484ed529ae050e356872f150
|
checksums.yaml.gz.sig
CHANGED
Binary file
|
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,12 @@
|
|
1
1
|
# Karafka framework changelog
|
2
2
|
|
3
|
+
## 2.2.13 (2023-11-17)
|
4
|
+
- **[Feature]** Introduce low-level extended Scheduling API for granular control of schedulers and jobs execution [Pro].
|
5
|
+
- [Improvement] Use separate lock for user-facing synchronization.
|
6
|
+
- [Improvement] Instrument `consumer.before_enqueue`.
|
7
|
+
- [Improvement] Limit usage of `concurrent-ruby` (plan to remove it as a dependency fully)
|
8
|
+
- [Improvement] Provide `#synchronize` API same as in VPs for LRJs to allow for lifecycle events and consumption synchronization.
|
9
|
+
|
3
10
|
## 2.2.12 (2023-11-09)
|
4
11
|
- [Improvement] Rewrite the polling engine to update statistics and error callbacks despite longer non LRJ processing or long `max_wait_time` setups. This change provides stability to the statistics and background error emitting making them time-reliable.
|
5
12
|
- [Improvement] Auto-update Inline Insights if new insights are present for all consumers and not only LRJ (OSS and Pro).
|
data/Gemfile.lock
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
karafka (2.2.
|
4
|
+
karafka (2.2.13)
|
5
5
|
karafka-core (>= 2.2.7, < 2.3.0)
|
6
6
|
waterdrop (>= 2.6.11, < 3.0.0)
|
7
7
|
zeitwerk (~> 2.3)
|
@@ -9,10 +9,10 @@ PATH
|
|
9
9
|
GEM
|
10
10
|
remote: https://rubygems.org/
|
11
11
|
specs:
|
12
|
-
activejob (7.1.
|
13
|
-
activesupport (= 7.1.
|
12
|
+
activejob (7.1.2)
|
13
|
+
activesupport (= 7.1.2)
|
14
14
|
globalid (>= 0.3.6)
|
15
|
-
activesupport (7.1.
|
15
|
+
activesupport (7.1.2)
|
16
16
|
base64
|
17
17
|
bigdecimal
|
18
18
|
concurrent-ruby (~> 1.0, >= 1.0.2)
|
@@ -22,14 +22,14 @@ GEM
|
|
22
22
|
minitest (>= 5.1)
|
23
23
|
mutex_m
|
24
24
|
tzinfo (~> 2.0)
|
25
|
-
base64 (0.
|
25
|
+
base64 (0.2.0)
|
26
26
|
bigdecimal (3.1.4)
|
27
27
|
byebug (11.1.3)
|
28
28
|
concurrent-ruby (1.2.2)
|
29
29
|
connection_pool (2.4.1)
|
30
30
|
diff-lcs (1.5.0)
|
31
31
|
docile (1.4.0)
|
32
|
-
drb (2.
|
32
|
+
drb (2.2.0)
|
33
33
|
ruby2_keywords
|
34
34
|
erubi (1.12.0)
|
35
35
|
factory_bot (6.3.0)
|
@@ -42,7 +42,7 @@ GEM
|
|
42
42
|
karafka-core (2.2.7)
|
43
43
|
concurrent-ruby (>= 1.1)
|
44
44
|
karafka-rdkafka (>= 0.13.9, < 0.15.0)
|
45
|
-
karafka-rdkafka (0.
|
45
|
+
karafka-rdkafka (0.14.0)
|
46
46
|
ffi (~> 1.15)
|
47
47
|
mini_portile2 (~> 2.6)
|
48
48
|
rake (> 12)
|
@@ -54,10 +54,10 @@ GEM
|
|
54
54
|
tilt (~> 2.0)
|
55
55
|
mini_portile2 (2.8.5)
|
56
56
|
minitest (5.20.0)
|
57
|
-
mutex_m (0.
|
57
|
+
mutex_m (0.2.0)
|
58
58
|
rack (3.0.8)
|
59
59
|
rake (13.1.0)
|
60
|
-
roda (3.
|
60
|
+
roda (3.74.0)
|
61
61
|
rack
|
62
62
|
rspec (3.12.0)
|
63
63
|
rspec-core (~> 3.12.0)
|
data/config/locales/errors.yml
CHANGED
@@ -16,7 +16,8 @@ en:
|
|
16
16
|
max_wait_time_format: needs to be an integer bigger than 0
|
17
17
|
kafka_format: needs to be a filled hash
|
18
18
|
internal.processing.jobs_builder_format: cannot be nil
|
19
|
-
internal.processing.
|
19
|
+
internal.processing.jobs_queue_class_format: cannot be nil
|
20
|
+
internal.processing.scheduler_class_format: cannot be nil
|
20
21
|
internal.processing.coordinator_class_format: cannot be nil
|
21
22
|
internal.processing.partitioner_class_format: cannot be nil
|
22
23
|
internal.processing.strategy_selector_format: cannot be nil
|
@@ -16,7 +16,7 @@ module Karafka
|
|
16
16
|
class ConsumerGroupCoordinator
|
17
17
|
# @param group_size [Integer] number of separate subscription groups in a consumer group
|
18
18
|
def initialize(group_size)
|
19
|
-
@
|
19
|
+
@shutdown_mutex = Mutex.new
|
20
20
|
@group_size = group_size
|
21
21
|
@finished = Set.new
|
22
22
|
end
|
@@ -30,12 +30,12 @@ module Karafka
|
|
30
30
|
# @return [Boolean] can we start shutdown on a given listener
|
31
31
|
# @note If true, will also obtain a lock so no-one else will be closing the same time we do
|
32
32
|
def shutdown?
|
33
|
-
finished? && @
|
33
|
+
finished? && @shutdown_mutex.try_lock
|
34
34
|
end
|
35
35
|
|
36
36
|
# Unlocks the shutdown lock
|
37
37
|
def unlock
|
38
|
-
@
|
38
|
+
@shutdown_mutex.unlock if @shutdown_mutex.owned?
|
39
39
|
end
|
40
40
|
|
41
41
|
# Marks given listener as finished
|
@@ -23,8 +23,9 @@ module Karafka
|
|
23
23
|
# @param consumer_group_coordinator [Karafka::Connection::ConsumerGroupCoordinator]
|
24
24
|
# @param subscription_group [Karafka::Routing::SubscriptionGroup]
|
25
25
|
# @param jobs_queue [Karafka::Processing::JobsQueue] queue where we should push work
|
26
|
+
# @param scheduler [Karafka::Processing::Scheduler] scheduler we want to use
|
26
27
|
# @return [Karafka::Connection::Listener] listener instance
|
27
|
-
def initialize(consumer_group_coordinator, subscription_group, jobs_queue)
|
28
|
+
def initialize(consumer_group_coordinator, subscription_group, jobs_queue, scheduler)
|
28
29
|
proc_config = ::Karafka::App.config.internal.processing
|
29
30
|
|
30
31
|
@id = SecureRandom.hex(6)
|
@@ -36,8 +37,7 @@ module Karafka
|
|
36
37
|
@executors = Processing::ExecutorsBuffer.new(@client, subscription_group)
|
37
38
|
@jobs_builder = proc_config.jobs_builder
|
38
39
|
@partitioner = proc_config.partitioner_class.new(subscription_group)
|
39
|
-
|
40
|
-
@scheduler = proc_config.scheduler
|
40
|
+
@scheduler = scheduler
|
41
41
|
@events_poller = Helpers::IntervalRunner.new { @client.events_poll }
|
42
42
|
# We keep one buffer for messages to preserve memory and not allocate extra objects
|
43
43
|
# We can do this that way because we always first schedule jobs using messages before we
|
@@ -243,7 +243,7 @@ module Karafka
|
|
243
243
|
end
|
244
244
|
end
|
245
245
|
|
246
|
-
@scheduler.schedule_revocation(
|
246
|
+
@scheduler.schedule_revocation(jobs)
|
247
247
|
end
|
248
248
|
|
249
249
|
# Enqueues the shutdown jobs for all the executors that exist in our subscription group
|
@@ -256,7 +256,7 @@ module Karafka
|
|
256
256
|
jobs << job
|
257
257
|
end
|
258
258
|
|
259
|
-
@scheduler.schedule_shutdown(
|
259
|
+
@scheduler.schedule_shutdown(jobs)
|
260
260
|
end
|
261
261
|
|
262
262
|
# Polls messages within the time and amount boundaries defined in the settings and then
|
@@ -298,12 +298,15 @@ module Karafka
|
|
298
298
|
|
299
299
|
jobs.each(&:before_enqueue)
|
300
300
|
|
301
|
-
@scheduler.schedule_consumption(
|
301
|
+
@scheduler.schedule_consumption(jobs)
|
302
302
|
end
|
303
303
|
|
304
304
|
# Waits for all the jobs from a given subscription group to finish before moving forward
|
305
305
|
def wait
|
306
|
-
@jobs_queue.wait(@subscription_group.id)
|
306
|
+
@jobs_queue.wait(@subscription_group.id) do
|
307
|
+
@events_poller.call
|
308
|
+
@scheduler.manage
|
309
|
+
end
|
307
310
|
end
|
308
311
|
|
309
312
|
# Waits without blocking the polling
|
@@ -319,6 +322,8 @@ module Karafka
|
|
319
322
|
def wait_pinging(wait_until:, after_ping: -> {})
|
320
323
|
until wait_until.call
|
321
324
|
@client.ping
|
325
|
+
@scheduler.manage
|
326
|
+
|
322
327
|
after_ping.call
|
323
328
|
sleep(0.2)
|
324
329
|
end
|
@@ -334,6 +339,7 @@ module Karafka
|
|
334
339
|
# resetting.
|
335
340
|
@jobs_queue.wait(@subscription_group.id)
|
336
341
|
@jobs_queue.clear(@subscription_group.id)
|
342
|
+
@scheduler.clear(@subscription_group.id)
|
337
343
|
@events_poller.reset
|
338
344
|
@client.reset
|
339
345
|
@coordinators.reset
|
@@ -11,6 +11,10 @@ module Karafka
|
|
11
11
|
# @param jobs_queue [JobsQueue]
|
12
12
|
# @return [ListenersBatch]
|
13
13
|
def initialize(jobs_queue)
|
14
|
+
# We need one scheduler for all the listeners because in case of complex schedulers, they
|
15
|
+
# should be able to distribute work whenever any work is done in any of the listeners
|
16
|
+
scheduler = App.config.internal.processing.scheduler_class.new(jobs_queue)
|
17
|
+
|
14
18
|
@coordinators = []
|
15
19
|
|
16
20
|
@batch = App.subscription_groups.flat_map do |_consumer_group, subscription_groups|
|
@@ -24,7 +28,8 @@ module Karafka
|
|
24
28
|
Connection::Listener.new(
|
25
29
|
consumer_group_coordinator,
|
26
30
|
subscription_group,
|
27
|
-
jobs_queue
|
31
|
+
jobs_queue,
|
32
|
+
scheduler
|
28
33
|
)
|
29
34
|
end
|
30
35
|
end
|
@@ -73,7 +73,8 @@ module Karafka
|
|
73
73
|
|
74
74
|
nested(:processing) do
|
75
75
|
required(:jobs_builder) { |val| !val.nil? }
|
76
|
-
required(:
|
76
|
+
required(:jobs_queue_class) { |val| !val.nil? }
|
77
|
+
required(:scheduler_class) { |val| !val.nil? }
|
77
78
|
required(:coordinator_class) { |val| !val.nil? }
|
78
79
|
required(:partitioner_class) { |val| !val.nil? }
|
79
80
|
required(:strategy_selector) { |val| !val.nil? }
|
data/lib/karafka/pro/loader.rb
CHANGED
@@ -84,7 +84,8 @@ module Karafka
|
|
84
84
|
|
85
85
|
icfg.processing.coordinator_class = Processing::Coordinator
|
86
86
|
icfg.processing.partitioner_class = Processing::Partitioner
|
87
|
-
icfg.processing.
|
87
|
+
icfg.processing.scheduler_class = Processing::Scheduler
|
88
|
+
icfg.processing.jobs_queue_class = Processing::JobsQueue
|
88
89
|
icfg.processing.jobs_builder = Processing::JobsBuilder.new
|
89
90
|
icfg.processing.strategy_selector = Processing::StrategySelector.new
|
90
91
|
|
@@ -21,14 +21,20 @@ module Karafka
|
|
21
21
|
|
22
22
|
def_delegators :@collapser, :collapsed?, :collapse_until!
|
23
23
|
|
24
|
-
attr_reader :filter, :virtual_offset_manager
|
24
|
+
attr_reader :filter, :virtual_offset_manager, :shared_mutex
|
25
25
|
|
26
26
|
# @param args [Object] anything the base coordinator accepts
|
27
27
|
def initialize(*args)
|
28
28
|
super
|
29
29
|
|
30
30
|
@executed = []
|
31
|
-
@
|
31
|
+
@flow_mutex = Mutex.new
|
32
|
+
# Lock for user code synchronization
|
33
|
+
# We do not want to mix coordinator lock with the user lock not to create cases where
|
34
|
+
# user imposed lock would lock the internal operations of Karafka
|
35
|
+
# This shared lock can be used by the end user as it is not used internally by the
|
36
|
+
# framework and can be used for user-facing locking
|
37
|
+
@shared_mutex = Mutex.new
|
32
38
|
@collapser = Collapser.new
|
33
39
|
@filter = FiltersApplier.new(self)
|
34
40
|
|
@@ -89,7 +95,7 @@ module Karafka
|
|
89
95
|
# Runs synchronized code once for a collective of virtual partitions prior to work being
|
90
96
|
# enqueued
|
91
97
|
def on_enqueued
|
92
|
-
@
|
98
|
+
@flow_mutex.synchronize do
|
93
99
|
return unless executable?(:on_enqueued)
|
94
100
|
|
95
101
|
yield(@last_message)
|
@@ -98,7 +104,7 @@ module Karafka
|
|
98
104
|
|
99
105
|
# Runs given code only once per all the coordinated jobs upon starting first of them
|
100
106
|
def on_started
|
101
|
-
@
|
107
|
+
@flow_mutex.synchronize do
|
102
108
|
return unless executable?(:on_started)
|
103
109
|
|
104
110
|
yield(@last_message)
|
@@ -109,7 +115,7 @@ module Karafka
|
|
109
115
|
# It runs once per all the coordinated jobs and should be used to run any type of post
|
110
116
|
# jobs coordination processing execution
|
111
117
|
def on_finished
|
112
|
-
@
|
118
|
+
@flow_mutex.synchronize do
|
113
119
|
return unless finished?
|
114
120
|
return unless executable?(:on_finished)
|
115
121
|
|
@@ -119,7 +125,7 @@ module Karafka
|
|
119
125
|
|
120
126
|
# Runs once after a partition is revoked
|
121
127
|
def on_revoked
|
122
|
-
@
|
128
|
+
@flow_mutex.synchronize do
|
123
129
|
return unless executable?(:on_revoked)
|
124
130
|
|
125
131
|
yield(@last_message)
|
@@ -0,0 +1,109 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# This Karafka component is a Pro component under a commercial license.
|
4
|
+
# This Karafka component is NOT licensed under LGPL.
|
5
|
+
#
|
6
|
+
# All of the commercial components are present in the lib/karafka/pro directory of this
|
7
|
+
# repository and their usage requires commercial license agreement.
|
8
|
+
#
|
9
|
+
# Karafka has also commercial-friendly license, commercial support and commercial components.
|
10
|
+
#
|
11
|
+
# By sending a pull request to the pro components, you are agreeing to transfer the copyright of
|
12
|
+
# your code to Maciej Mensfeld.
|
13
|
+
|
14
|
+
module Karafka
|
15
|
+
module Pro
|
16
|
+
module Processing
|
17
|
+
# Enhanced processing queue that provides ability to build complex work-distribution
|
18
|
+
# schedulers dedicated to particular job types
|
19
|
+
#
|
20
|
+
# Aside from the OSS queue capabilities it allows for jobless locking for advanced schedulers
|
21
|
+
class JobsQueue < Karafka::Processing::JobsQueue
|
22
|
+
attr_accessor :in_processing
|
23
|
+
|
24
|
+
# @return [Karafka::Pro::Processing::JobsQueue]
|
25
|
+
def initialize
|
26
|
+
super
|
27
|
+
|
28
|
+
@in_waiting = Hash.new { |h, k| h[k] = [] }
|
29
|
+
|
30
|
+
@statistics[:waiting] = 0
|
31
|
+
end
|
32
|
+
|
33
|
+
# Method that allows us to lock queue on a given subscription group without enqueuing the a
|
34
|
+
# job. This can be used when building complex schedulers that want to postpone enqueuing
|
35
|
+
# before certain conditions are met.
|
36
|
+
#
|
37
|
+
# @param job [Jobs::Base] job used for locking
|
38
|
+
def lock(job)
|
39
|
+
@mutex.synchronize do
|
40
|
+
group = @in_waiting[job.group_id]
|
41
|
+
|
42
|
+
# This should never happen. Same job should not be locked twice
|
43
|
+
raise(Errors::JobsQueueSynchronizationError, job.group_id) if group.include?(job)
|
44
|
+
|
45
|
+
@statistics[:waiting] += 1
|
46
|
+
|
47
|
+
group << job
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
# Method for unlocking the given subscription group queue space that was locked with a
|
52
|
+
# given job that was **not** added to the queue but used via `#lock`.
|
53
|
+
#
|
54
|
+
# @param job [Jobs::Base] job that locked the queue
|
55
|
+
def unlock(job)
|
56
|
+
@mutex.synchronize do
|
57
|
+
@statistics[:waiting] -= 1
|
58
|
+
|
59
|
+
return if @in_waiting[job.group_id].delete(job)
|
60
|
+
|
61
|
+
# This should never happen. It means there was a job being unlocked that was never
|
62
|
+
# locked in the first place
|
63
|
+
raise(Errors::JobsQueueSynchronizationError, job.group_id)
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
# Clears the processing states for a provided group. Useful when a recovery happens and we
|
68
|
+
# need to clean up state but only for a given subscription group.
|
69
|
+
#
|
70
|
+
# @param group_id [String]
|
71
|
+
def clear(group_id)
|
72
|
+
@mutex.synchronize do
|
73
|
+
@in_processing[group_id].clear
|
74
|
+
|
75
|
+
@statistics[:waiting] -= @in_waiting[group_id].size
|
76
|
+
@in_waiting[group_id].clear
|
77
|
+
|
78
|
+
# We unlock it just in case it was blocked when clearing started
|
79
|
+
tick(group_id)
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
# @param group_id [String]
|
84
|
+
#
|
85
|
+
# @return [Boolean] tell us if we have anything in the processing (or for processing) from
|
86
|
+
# a given group.
|
87
|
+
def empty?(group_id)
|
88
|
+
@mutex.synchronize do
|
89
|
+
@in_processing[group_id].empty? &&
|
90
|
+
@in_waiting[group_id].empty?
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
94
|
+
private
|
95
|
+
|
96
|
+
# @param group_id [String] id of the group in which jobs we're interested.
|
97
|
+
# @return [Boolean] should we keep waiting or not
|
98
|
+
# @note We do not wait for non-blocking jobs. Their flow should allow for `poll` running
|
99
|
+
# as they may exceed `max.poll.interval`
|
100
|
+
def wait?(group_id)
|
101
|
+
!(
|
102
|
+
@in_processing[group_id].all?(&:non_blocking?) &&
|
103
|
+
@in_waiting[group_id].all?(&:non_blocking?)
|
104
|
+
)
|
105
|
+
end
|
106
|
+
end
|
107
|
+
end
|
108
|
+
end
|
109
|
+
end
|
@@ -27,10 +27,9 @@ module Karafka
|
|
27
27
|
class Scheduler < ::Karafka::Processing::Scheduler
|
28
28
|
# Schedules jobs in the LJF order for consumption
|
29
29
|
#
|
30
|
-
# @param queue [Karafka::Processing::JobsQueue] queue where we want to put the jobs
|
31
30
|
# @param jobs_array [Array<Karafka::Processing::Jobs::Base>] jobs we want to schedule
|
32
31
|
#
|
33
|
-
def schedule_consumption(
|
32
|
+
def schedule_consumption(jobs_array)
|
34
33
|
perf_tracker = PerformanceTracker.instance
|
35
34
|
|
36
35
|
ordered = []
|
@@ -47,7 +46,7 @@ module Karafka
|
|
47
46
|
ordered.map!(&:first)
|
48
47
|
|
49
48
|
ordered.each do |job|
|
50
|
-
queue << job
|
49
|
+
@queue << job
|
51
50
|
end
|
52
51
|
end
|
53
52
|
|
@@ -77,6 +77,15 @@ module Karafka
|
|
77
77
|
revoked
|
78
78
|
end
|
79
79
|
end
|
80
|
+
|
81
|
+
# Allows for LRJ to synchronize its work. It may be needed because LRJ can run
|
82
|
+
# lifecycle events like revocation while the LRJ work is running and there may be a
|
83
|
+
# need for a critical section.
|
84
|
+
#
|
85
|
+
# @param block [Proc] block we want to run in a mutex to prevent race-conditions
|
86
|
+
def synchronize(&block)
|
87
|
+
coordinator.shared_mutex.synchronize(&block)
|
88
|
+
end
|
80
89
|
end
|
81
90
|
end
|
82
91
|
end
|
@@ -94,13 +94,15 @@ module Karafka
|
|
94
94
|
|
95
95
|
# Allows for cross-virtual-partition consumers locks
|
96
96
|
#
|
97
|
-
# This is not needed in the non-VP flows because there is always only one
|
98
|
-
# per partition at the same time, so no coordination is needed directly for
|
99
|
-
# end users
|
97
|
+
# This is not needed in the non-VP flows except LRJ because there is always only one
|
98
|
+
# consumer per partition at the same time, so no coordination is needed directly for
|
99
|
+
# the end users. With LRJ it is needed and provided in the `LRJ::Default` strategy,
|
100
|
+
# because lifecycle events on revocation can run in parallel to the LRJ job as it is
|
101
|
+
# non-blocking.
|
100
102
|
#
|
101
103
|
# @param block [Proc] block we want to run in a mutex to prevent race-conditions
|
102
104
|
def synchronize(&block)
|
103
|
-
coordinator.synchronize(&block)
|
105
|
+
coordinator.shared_mutex.synchronize(&block)
|
104
106
|
end
|
105
107
|
|
106
108
|
private
|
@@ -111,6 +113,8 @@ module Karafka
|
|
111
113
|
# @note This can be done without the mutex, because it happens from the same thread
|
112
114
|
# for all the work (listener thread)
|
113
115
|
def handle_before_enqueue
|
116
|
+
super
|
117
|
+
|
114
118
|
coordinator.virtual_offset_manager.register(
|
115
119
|
messages.map(&:offset)
|
116
120
|
)
|
@@ -162,11 +162,24 @@ module Karafka
|
|
162
162
|
@manual_seek
|
163
163
|
end
|
164
164
|
|
165
|
+
# @param consumer [Object] karafka consumer (normal or pro)
|
166
|
+
# @return [Karafka::Processing::Result] result object which we can use to indicate
|
167
|
+
# consumption processing state.
|
168
|
+
def consumption(consumer)
|
169
|
+
@consumptions[consumer] ||= Processing::Result.new
|
170
|
+
end
|
171
|
+
|
165
172
|
# Allows to run synchronized (locked) code that can operate only from a given thread
|
166
173
|
#
|
167
174
|
# @param block [Proc] code we want to run in the synchronized mode
|
175
|
+
#
|
168
176
|
# @note We check if mutex is not owned already by the current thread so we won't end up with
|
169
177
|
# a deadlock in case user runs coordinated code from inside of his own lock
|
178
|
+
#
|
179
|
+
# @note This is internal and should **not** be used to synchronize user-facing code.
|
180
|
+
# Otherwise user indirectly could cause deadlocks or prolonged locks by running his logic.
|
181
|
+
# This can and should however be used for multi-thread strategy applications and other
|
182
|
+
# internal operations locks.
|
170
183
|
def synchronize(&block)
|
171
184
|
if @mutex.owned?
|
172
185
|
yield
|
@@ -174,13 +187,6 @@ module Karafka
|
|
174
187
|
@mutex.synchronize(&block)
|
175
188
|
end
|
176
189
|
end
|
177
|
-
|
178
|
-
# @param consumer [Object] karafka consumer (normal or pro)
|
179
|
-
# @return [Karafka::Processing::Result] result object which we can use to indicate
|
180
|
-
# consumption processing state.
|
181
|
-
def consumption(consumer)
|
182
|
-
@consumptions[consumer] ||= Processing::Result.new
|
183
|
-
end
|
184
190
|
end
|
185
191
|
end
|
186
192
|
end
|
@@ -9,6 +9,9 @@ module Karafka
|
|
9
9
|
# on this queue, that's why internally we keep track of processing per group.
|
10
10
|
#
|
11
11
|
# We work with the assumption, that partitions data is evenly distributed.
|
12
|
+
#
|
13
|
+
# @note This job queue also keeps track / understands number of busy workers. This is because
|
14
|
+
# we use a single workers poll that can have granular scheduling.
|
12
15
|
class JobsQueue
|
13
16
|
# @return [Karafka::Processing::JobsQueue]
|
14
17
|
def initialize
|
@@ -26,19 +29,14 @@ module Karafka
|
|
26
29
|
h.compute_if_absent(k) { RUBY_VERSION < '3.2' ? TimedQueue.new : Queue.new }
|
27
30
|
end
|
28
31
|
|
32
|
+
@concurrency = Karafka::App.config.concurrency
|
29
33
|
@tick_interval = ::Karafka::App.config.internal.tick_interval
|
30
34
|
@in_processing = Hash.new { |h, k| h[k] = [] }
|
35
|
+
@statistics = { busy: 0, enqueued: 0 }
|
31
36
|
|
32
37
|
@mutex = Mutex.new
|
33
38
|
end
|
34
39
|
|
35
|
-
# Returns number of jobs that are either enqueued or in processing (but not finished)
|
36
|
-
# @return [Integer] number of elements in the queue
|
37
|
-
# @note Using `#pop` won't decrease this number as only marking job as completed does this
|
38
|
-
def size
|
39
|
-
@in_processing.values.map(&:size).sum
|
40
|
-
end
|
41
|
-
|
42
40
|
# Adds the job to the internal main queue, scheduling it for execution in a worker and marks
|
43
41
|
# this job as in processing pipeline.
|
44
42
|
#
|
@@ -55,6 +53,16 @@ module Karafka
|
|
55
53
|
|
56
54
|
group << job
|
57
55
|
|
56
|
+
# Assume that moving to queue means being picked up immediately not to create stats
|
57
|
+
# race conditions because of pop overhead. If there are workers available, we assume
|
58
|
+
# work is going to be handled as we never reject enqueued jobs
|
59
|
+
if @statistics[:busy] < @concurrency
|
60
|
+
@statistics[:busy] += 1
|
61
|
+
else
|
62
|
+
# If system is fully loaded, it means this job is indeed enqueued
|
63
|
+
@statistics[:enqueued] += 1
|
64
|
+
end
|
65
|
+
|
58
66
|
@queue << job
|
59
67
|
end
|
60
68
|
end
|
@@ -80,7 +88,16 @@ module Karafka
|
|
80
88
|
# @param [Jobs::Base] job that was completed
|
81
89
|
def complete(job)
|
82
90
|
@mutex.synchronize do
|
91
|
+
# We finish one job and if there is another, we pick it up
|
92
|
+
if @statistics[:enqueued].positive?
|
93
|
+
@statistics[:enqueued] -= 1
|
94
|
+
# If no more enqueued jobs, we will be just less busy
|
95
|
+
else
|
96
|
+
@statistics[:busy] -= 1
|
97
|
+
end
|
98
|
+
|
83
99
|
@in_processing[job.group_id].delete(job)
|
100
|
+
|
84
101
|
tick(job.group_id)
|
85
102
|
end
|
86
103
|
end
|
@@ -141,10 +158,10 @@ module Karafka
|
|
141
158
|
#
|
142
159
|
# @return [Hash] hash with basic usage statistics of this queue.
|
143
160
|
def statistics
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
161
|
+
# Ensures there are no race conditions when returning this data
|
162
|
+
@mutex.synchronize do
|
163
|
+
@statistics.dup.freeze
|
164
|
+
end
|
148
165
|
end
|
149
166
|
|
150
167
|
private
|
@@ -4,19 +4,35 @@ module Karafka
|
|
4
4
|
module Processing
|
5
5
|
# FIFO scheduler for messages coming from various topics and partitions
|
6
6
|
class Scheduler
|
7
|
+
# @param queue [Karafka::Processing::JobsQueue] queue where we want to put the jobs
|
8
|
+
def initialize(queue)
|
9
|
+
@queue = queue
|
10
|
+
end
|
11
|
+
|
7
12
|
# Schedules jobs in the fifo order
|
8
13
|
#
|
9
|
-
# @param queue [Karafka::Processing::JobsQueue] queue where we want to put the jobs
|
10
14
|
# @param jobs_array [Array<Karafka::Processing::Jobs::Base>] jobs we want to schedule
|
11
|
-
def schedule_consumption(
|
15
|
+
def schedule_consumption(jobs_array)
|
12
16
|
jobs_array.each do |job|
|
13
|
-
queue << job
|
17
|
+
@queue << job
|
14
18
|
end
|
15
19
|
end
|
16
20
|
|
17
21
|
# Both revocation and shutdown jobs can also run in fifo by default
|
18
22
|
alias schedule_revocation schedule_consumption
|
19
23
|
alias schedule_shutdown schedule_consumption
|
24
|
+
|
25
|
+
# This scheduler does not have anything to manage as it is a pass through and has no state
|
26
|
+
def manage
|
27
|
+
nil
|
28
|
+
end
|
29
|
+
|
30
|
+
# This scheduler does not need to be cleared because it is stateless
|
31
|
+
#
|
32
|
+
# @param _group_id [String] Subscription group id
|
33
|
+
def clear(_group_id)
|
34
|
+
nil
|
35
|
+
end
|
20
36
|
end
|
21
37
|
end
|
22
38
|
end
|
@@ -3,20 +3,25 @@
|
|
3
3
|
module Karafka
|
4
4
|
module Routing
|
5
5
|
# Builder used as a DSL layer for building consumers and telling them which topics to consume
|
6
|
+
#
|
7
|
+
# @note We lock the access just in case this is used in patterns. The locks here do not have
|
8
|
+
# any impact on routing usage unless being expanded, so no race conditions risks.
|
9
|
+
#
|
6
10
|
# @example Build a simple (most common) route
|
7
11
|
# consumers do
|
8
12
|
# topic :new_videos do
|
9
13
|
# consumer NewVideosConsumer
|
10
14
|
# end
|
11
15
|
# end
|
12
|
-
class Builder <
|
16
|
+
class Builder < Array
|
13
17
|
# Empty default per-topic config
|
14
18
|
EMPTY_DEFAULTS = ->(_) {}.freeze
|
15
19
|
|
16
20
|
private_constant :EMPTY_DEFAULTS
|
17
21
|
|
18
22
|
def initialize
|
19
|
-
@
|
23
|
+
@mutex = Mutex.new
|
24
|
+
@draws = []
|
20
25
|
@defaults = EMPTY_DEFAULTS
|
21
26
|
super
|
22
27
|
end
|
@@ -34,21 +39,23 @@ module Karafka
|
|
34
39
|
# end
|
35
40
|
# end
|
36
41
|
def draw(&block)
|
37
|
-
@
|
42
|
+
@mutex.synchronize do
|
43
|
+
@draws << block
|
38
44
|
|
39
|
-
|
45
|
+
instance_eval(&block)
|
40
46
|
|
41
|
-
|
42
|
-
|
43
|
-
|
47
|
+
each do |consumer_group|
|
48
|
+
# Validate consumer group settings
|
49
|
+
Contracts::ConsumerGroup.new.validate!(consumer_group.to_h)
|
44
50
|
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
51
|
+
# and then its topics settings
|
52
|
+
consumer_group.topics.each do |topic|
|
53
|
+
Contracts::Topic.new.validate!(topic.to_h)
|
54
|
+
end
|
49
55
|
|
50
|
-
|
51
|
-
|
56
|
+
# Initialize subscription groups after all the routing is done
|
57
|
+
consumer_group.subscription_groups
|
58
|
+
end
|
52
59
|
end
|
53
60
|
end
|
54
61
|
|
@@ -61,9 +68,11 @@ module Karafka
|
|
61
68
|
|
62
69
|
# Clears the builder and the draws memory
|
63
70
|
def clear
|
64
|
-
@
|
65
|
-
|
66
|
-
|
71
|
+
@mutex.synchronize do
|
72
|
+
@defaults = EMPTY_DEFAULTS
|
73
|
+
@draws.clear
|
74
|
+
super
|
75
|
+
end
|
67
76
|
end
|
68
77
|
|
69
78
|
# @param block [Proc] block with per-topic evaluated defaults
|
@@ -71,7 +80,13 @@ module Karafka
|
|
71
80
|
def defaults(&block)
|
72
81
|
return @defaults unless block
|
73
82
|
|
74
|
-
@
|
83
|
+
if @mutex.owned?
|
84
|
+
@defaults = block
|
85
|
+
else
|
86
|
+
@mutex.synchronize do
|
87
|
+
@defaults = block
|
88
|
+
end
|
89
|
+
end
|
75
90
|
end
|
76
91
|
|
77
92
|
private
|
@@ -10,19 +10,24 @@ module Karafka
|
|
10
10
|
class SubscriptionGroup
|
11
11
|
attr_reader :id, :name, :topics, :kafka, :consumer_group
|
12
12
|
|
13
|
-
#
|
14
|
-
|
13
|
+
# Lock for generating new ids safely
|
14
|
+
ID_MUTEX = Mutex.new
|
15
15
|
|
16
|
-
private_constant :
|
16
|
+
private_constant :ID_MUTEX
|
17
17
|
|
18
18
|
class << self
|
19
19
|
# Generates new subscription group id that will be used in case of anonymous subscription
|
20
20
|
# groups
|
21
21
|
# @return [String] hex(6) compatible reproducible id
|
22
22
|
def id
|
23
|
-
|
24
|
-
|
25
|
-
|
23
|
+
ID_MUTEX.synchronize do
|
24
|
+
@group_counter ||= 0
|
25
|
+
@group_counter += 1
|
26
|
+
|
27
|
+
::Digest::MD5.hexdigest(
|
28
|
+
@group_counter.to_s
|
29
|
+
)[0..11]
|
30
|
+
end
|
26
31
|
end
|
27
32
|
end
|
28
33
|
|
data/lib/karafka/runner.rb
CHANGED
@@ -8,7 +8,7 @@ module Karafka
|
|
8
8
|
def call
|
9
9
|
# Despite possibility of having several independent listeners, we aim to have one queue for
|
10
10
|
# jobs across and one workers poll for that
|
11
|
-
jobs_queue =
|
11
|
+
jobs_queue = App.config.internal.processing.jobs_queue_class.new
|
12
12
|
|
13
13
|
workers = Processing::WorkersBatch.new(jobs_queue)
|
14
14
|
listeners = Connection::ListenersBatch.new(jobs_queue)
|
data/lib/karafka/setup/config.rb
CHANGED
@@ -209,8 +209,9 @@ module Karafka
|
|
209
209
|
end
|
210
210
|
|
211
211
|
setting :processing do
|
212
|
+
setting :jobs_queue_class, default: Processing::JobsQueue
|
212
213
|
# option scheduler [Object] scheduler we will be using
|
213
|
-
setting :
|
214
|
+
setting :scheduler_class, default: Processing::Scheduler
|
214
215
|
# option jobs_builder [Object] jobs builder we want to use
|
215
216
|
setting :jobs_builder, default: Processing::JobsBuilder.new
|
216
217
|
# option coordinator [Class] work coordinator we want to user for processing coordination
|
data/lib/karafka/version.rb
CHANGED
data/lib/karafka.rb
CHANGED
data.tar.gz.sig
CHANGED
Binary file
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: karafka
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.2.
|
4
|
+
version: 2.2.13
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Maciej Mensfeld
|
@@ -35,7 +35,7 @@ cert_chain:
|
|
35
35
|
AnG1dJU+yL2BK7vaVytLTstJME5mepSZ46qqIJXMuWob/YPDmVaBF39TDSG9e34s
|
36
36
|
msG3BiCqgOgHAnL23+CN3Rt8MsuRfEtoTKpJVcCfoEoNHOkc
|
37
37
|
-----END CERTIFICATE-----
|
38
|
-
date: 2023-11-
|
38
|
+
date: 2023-11-17 00:00:00.000000000 Z
|
39
39
|
dependencies:
|
40
40
|
- !ruby/object:Gem::Dependency
|
41
41
|
name: karafka-core
|
@@ -246,6 +246,7 @@ files:
|
|
246
246
|
- lib/karafka/pro/processing/jobs/consume_non_blocking.rb
|
247
247
|
- lib/karafka/pro/processing/jobs/revoked_non_blocking.rb
|
248
248
|
- lib/karafka/pro/processing/jobs_builder.rb
|
249
|
+
- lib/karafka/pro/processing/jobs_queue.rb
|
249
250
|
- lib/karafka/pro/processing/partitioner.rb
|
250
251
|
- lib/karafka/pro/processing/scheduler.rb
|
251
252
|
- lib/karafka/pro/processing/strategies.rb
|
metadata.gz.sig
CHANGED
Binary file
|