karafka 2.2.12 → 2.2.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 2ee648826503a1b1841a97e368ec2894a16eadc3509270d2a5dbbbe9ee703b3a
4
- data.tar.gz: 915285e224ab6dcaa4b2f75e2b6aa52f4f6eb0f613b8b06efe78e2ef4ff3f514
3
+ metadata.gz: 4056d72f0d37ac46c52597ebcfed87de031f9f250d57a64ec5c665d3423a3087
4
+ data.tar.gz: 95aeab42e351043873d548a5289e8355fe48fa7b7f27aaf1549a220c76eac9c1
5
5
  SHA512:
6
- metadata.gz: 1f7f109c533a98a46306be62a2172432f0d18af7003e401d3a894aa356bc2cae2622ba4c323bfdd230a66f0ae544a7cfb61ee0168b396e2e809e408a657eecb6
7
- data.tar.gz: d10de0ca361236c35bed27ca3c5db13e9e245805412f85c2d8d4e6a140fe088025403be7a65e1d97831613f02032bfe3fb2194c5ec7f6a880bc7ddc67a112813
6
+ metadata.gz: 8e41da4dff00dc3cb9749874568a275cdad81b7a762182cee7ea497bfe373dd1b3f777dd40638d0c30ff13f50c5913cdcad175edcc8b9b36a3e26fb5658fc986
7
+ data.tar.gz: 738352dea20404d42a80340c2fc27359d54185565e8069f8245662e02d33c8630ce7922c3938b06b07e5587bd007342c65439229484ed529ae050e356872f150
checksums.yaml.gz.sig CHANGED
Binary file
data/CHANGELOG.md CHANGED
@@ -1,5 +1,12 @@
1
1
  # Karafka framework changelog
2
2
 
3
+ ## 2.2.13 (2023-11-17)
4
+ - **[Feature]** Introduce low-level extended Scheduling API for granular control of schedulers and jobs execution [Pro].
5
+ - [Improvement] Use separate lock for user-facing synchronization.
6
+ - [Improvement] Instrument `consumer.before_enqueue`.
7
+ - [Improvement] Limit usage of `concurrent-ruby` (plan to remove it as a dependency fully)
8
+ - [Improvement] Provide `#synchronize` API same as in VPs for LRJs to allow for lifecycle events and consumption synchronization.
9
+
3
10
  ## 2.2.12 (2023-11-09)
4
11
  - [Improvement] Rewrite the polling engine to update statistics and error callbacks despite longer non LRJ processing or long `max_wait_time` setups. This change provides stability to the statistics and background error emitting making them time-reliable.
5
12
  - [Improvement] Auto-update Inline Insights if new insights are present for all consumers and not only LRJ (OSS and Pro).
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- karafka (2.2.12)
4
+ karafka (2.2.13)
5
5
  karafka-core (>= 2.2.7, < 2.3.0)
6
6
  waterdrop (>= 2.6.11, < 3.0.0)
7
7
  zeitwerk (~> 2.3)
@@ -9,10 +9,10 @@ PATH
9
9
  GEM
10
10
  remote: https://rubygems.org/
11
11
  specs:
12
- activejob (7.1.1)
13
- activesupport (= 7.1.1)
12
+ activejob (7.1.2)
13
+ activesupport (= 7.1.2)
14
14
  globalid (>= 0.3.6)
15
- activesupport (7.1.1)
15
+ activesupport (7.1.2)
16
16
  base64
17
17
  bigdecimal
18
18
  concurrent-ruby (~> 1.0, >= 1.0.2)
@@ -22,14 +22,14 @@ GEM
22
22
  minitest (>= 5.1)
23
23
  mutex_m
24
24
  tzinfo (~> 2.0)
25
- base64 (0.1.1)
25
+ base64 (0.2.0)
26
26
  bigdecimal (3.1.4)
27
27
  byebug (11.1.3)
28
28
  concurrent-ruby (1.2.2)
29
29
  connection_pool (2.4.1)
30
30
  diff-lcs (1.5.0)
31
31
  docile (1.4.0)
32
- drb (2.1.1)
32
+ drb (2.2.0)
33
33
  ruby2_keywords
34
34
  erubi (1.12.0)
35
35
  factory_bot (6.3.0)
@@ -42,7 +42,7 @@ GEM
42
42
  karafka-core (2.2.7)
43
43
  concurrent-ruby (>= 1.1)
44
44
  karafka-rdkafka (>= 0.13.9, < 0.15.0)
45
- karafka-rdkafka (0.13.9)
45
+ karafka-rdkafka (0.14.0)
46
46
  ffi (~> 1.15)
47
47
  mini_portile2 (~> 2.6)
48
48
  rake (> 12)
@@ -54,10 +54,10 @@ GEM
54
54
  tilt (~> 2.0)
55
55
  mini_portile2 (2.8.5)
56
56
  minitest (5.20.0)
57
- mutex_m (0.1.2)
57
+ mutex_m (0.2.0)
58
58
  rack (3.0.8)
59
59
  rake (13.1.0)
60
- roda (3.73.0)
60
+ roda (3.74.0)
61
61
  rack
62
62
  rspec (3.12.0)
63
63
  rspec-core (~> 3.12.0)
@@ -16,7 +16,8 @@ en:
16
16
  max_wait_time_format: needs to be an integer bigger than 0
17
17
  kafka_format: needs to be a filled hash
18
18
  internal.processing.jobs_builder_format: cannot be nil
19
- internal.processing.scheduler_format: cannot be nil
19
+ internal.processing.jobs_queue_class_format: cannot be nil
20
+ internal.processing.scheduler_class_format: cannot be nil
20
21
  internal.processing.coordinator_class_format: cannot be nil
21
22
  internal.processing.partitioner_class_format: cannot be nil
22
23
  internal.processing.strategy_selector_format: cannot be nil
@@ -16,7 +16,7 @@ module Karafka
16
16
  class ConsumerGroupCoordinator
17
17
  # @param group_size [Integer] number of separate subscription groups in a consumer group
18
18
  def initialize(group_size)
19
- @shutdown_lock = Mutex.new
19
+ @shutdown_mutex = Mutex.new
20
20
  @group_size = group_size
21
21
  @finished = Set.new
22
22
  end
@@ -30,12 +30,12 @@ module Karafka
30
30
  # @return [Boolean] can we start shutdown on a given listener
31
31
  # @note If true, will also obtain a lock so no-one else will be closing the same time we do
32
32
  def shutdown?
33
- finished? && @shutdown_lock.try_lock
33
+ finished? && @shutdown_mutex.try_lock
34
34
  end
35
35
 
36
36
  # Unlocks the shutdown lock
37
37
  def unlock
38
- @shutdown_lock.unlock if @shutdown_lock.owned?
38
+ @shutdown_mutex.unlock if @shutdown_mutex.owned?
39
39
  end
40
40
 
41
41
  # Marks given listener as finished
@@ -23,8 +23,9 @@ module Karafka
23
23
  # @param consumer_group_coordinator [Karafka::Connection::ConsumerGroupCoordinator]
24
24
  # @param subscription_group [Karafka::Routing::SubscriptionGroup]
25
25
  # @param jobs_queue [Karafka::Processing::JobsQueue] queue where we should push work
26
+ # @param scheduler [Karafka::Processing::Scheduler] scheduler we want to use
26
27
  # @return [Karafka::Connection::Listener] listener instance
27
- def initialize(consumer_group_coordinator, subscription_group, jobs_queue)
28
+ def initialize(consumer_group_coordinator, subscription_group, jobs_queue, scheduler)
28
29
  proc_config = ::Karafka::App.config.internal.processing
29
30
 
30
31
  @id = SecureRandom.hex(6)
@@ -36,8 +37,7 @@ module Karafka
36
37
  @executors = Processing::ExecutorsBuffer.new(@client, subscription_group)
37
38
  @jobs_builder = proc_config.jobs_builder
38
39
  @partitioner = proc_config.partitioner_class.new(subscription_group)
39
- # We reference scheduler here as it is much faster than fetching this each time
40
- @scheduler = proc_config.scheduler
40
+ @scheduler = scheduler
41
41
  @events_poller = Helpers::IntervalRunner.new { @client.events_poll }
42
42
  # We keep one buffer for messages to preserve memory and not allocate extra objects
43
43
  # We can do this that way because we always first schedule jobs using messages before we
@@ -243,7 +243,7 @@ module Karafka
243
243
  end
244
244
  end
245
245
 
246
- @scheduler.schedule_revocation(@jobs_queue, jobs)
246
+ @scheduler.schedule_revocation(jobs)
247
247
  end
248
248
 
249
249
  # Enqueues the shutdown jobs for all the executors that exist in our subscription group
@@ -256,7 +256,7 @@ module Karafka
256
256
  jobs << job
257
257
  end
258
258
 
259
- @scheduler.schedule_shutdown(@jobs_queue, jobs)
259
+ @scheduler.schedule_shutdown(jobs)
260
260
  end
261
261
 
262
262
  # Polls messages within the time and amount boundaries defined in the settings and then
@@ -298,12 +298,15 @@ module Karafka
298
298
 
299
299
  jobs.each(&:before_enqueue)
300
300
 
301
- @scheduler.schedule_consumption(@jobs_queue, jobs)
301
+ @scheduler.schedule_consumption(jobs)
302
302
  end
303
303
 
304
304
  # Waits for all the jobs from a given subscription group to finish before moving forward
305
305
  def wait
306
- @jobs_queue.wait(@subscription_group.id) { @events_poller.call }
306
+ @jobs_queue.wait(@subscription_group.id) do
307
+ @events_poller.call
308
+ @scheduler.manage
309
+ end
307
310
  end
308
311
 
309
312
  # Waits without blocking the polling
@@ -319,6 +322,8 @@ module Karafka
319
322
  def wait_pinging(wait_until:, after_ping: -> {})
320
323
  until wait_until.call
321
324
  @client.ping
325
+ @scheduler.manage
326
+
322
327
  after_ping.call
323
328
  sleep(0.2)
324
329
  end
@@ -334,6 +339,7 @@ module Karafka
334
339
  # resetting.
335
340
  @jobs_queue.wait(@subscription_group.id)
336
341
  @jobs_queue.clear(@subscription_group.id)
342
+ @scheduler.clear(@subscription_group.id)
337
343
  @events_poller.reset
338
344
  @client.reset
339
345
  @coordinators.reset
@@ -11,6 +11,10 @@ module Karafka
11
11
  # @param jobs_queue [JobsQueue]
12
12
  # @return [ListenersBatch]
13
13
  def initialize(jobs_queue)
14
+ # We need one scheduler for all the listeners because in case of complex schedulers, they
15
+ # should be able to distribute work whenever any work is done in any of the listeners
16
+ scheduler = App.config.internal.processing.scheduler_class.new(jobs_queue)
17
+
14
18
  @coordinators = []
15
19
 
16
20
  @batch = App.subscription_groups.flat_map do |_consumer_group, subscription_groups|
@@ -24,7 +28,8 @@ module Karafka
24
28
  Connection::Listener.new(
25
29
  consumer_group_coordinator,
26
30
  subscription_group,
27
- jobs_queue
31
+ jobs_queue,
32
+ scheduler
28
33
  )
29
34
  end
30
35
  end
@@ -73,7 +73,8 @@ module Karafka
73
73
 
74
74
  nested(:processing) do
75
75
  required(:jobs_builder) { |val| !val.nil? }
76
- required(:scheduler) { |val| !val.nil? }
76
+ required(:jobs_queue_class) { |val| !val.nil? }
77
+ required(:scheduler_class) { |val| !val.nil? }
77
78
  required(:coordinator_class) { |val| !val.nil? }
78
79
  required(:partitioner_class) { |val| !val.nil? }
79
80
  required(:strategy_selector) { |val| !val.nil? }
@@ -43,6 +43,7 @@ module Karafka
43
43
  rebalance.partitions_revoke
44
44
  rebalance.partitions_revoked
45
45
 
46
+ consumer.before_enqueue
46
47
  consumer.consume
47
48
  consumer.consumed
48
49
  consumer.consuming.pause
@@ -84,7 +84,8 @@ module Karafka
84
84
 
85
85
  icfg.processing.coordinator_class = Processing::Coordinator
86
86
  icfg.processing.partitioner_class = Processing::Partitioner
87
- icfg.processing.scheduler = Processing::Scheduler.new
87
+ icfg.processing.scheduler_class = Processing::Scheduler
88
+ icfg.processing.jobs_queue_class = Processing::JobsQueue
88
89
  icfg.processing.jobs_builder = Processing::JobsBuilder.new
89
90
  icfg.processing.strategy_selector = Processing::StrategySelector.new
90
91
 
@@ -21,14 +21,20 @@ module Karafka
21
21
 
22
22
  def_delegators :@collapser, :collapsed?, :collapse_until!
23
23
 
24
- attr_reader :filter, :virtual_offset_manager
24
+ attr_reader :filter, :virtual_offset_manager, :shared_mutex
25
25
 
26
26
  # @param args [Object] anything the base coordinator accepts
27
27
  def initialize(*args)
28
28
  super
29
29
 
30
30
  @executed = []
31
- @flow_lock = Mutex.new
31
+ @flow_mutex = Mutex.new
32
+ # Lock for user code synchronization
33
+ # We do not want to mix coordinator lock with the user lock not to create cases where
34
+ # user imposed lock would lock the internal operations of Karafka
35
+ # This shared lock can be used by the end user as it is not used internally by the
36
+ # framework and can be used for user-facing locking
37
+ @shared_mutex = Mutex.new
32
38
  @collapser = Collapser.new
33
39
  @filter = FiltersApplier.new(self)
34
40
 
@@ -89,7 +95,7 @@ module Karafka
89
95
  # Runs synchronized code once for a collective of virtual partitions prior to work being
90
96
  # enqueued
91
97
  def on_enqueued
92
- @flow_lock.synchronize do
98
+ @flow_mutex.synchronize do
93
99
  return unless executable?(:on_enqueued)
94
100
 
95
101
  yield(@last_message)
@@ -98,7 +104,7 @@ module Karafka
98
104
 
99
105
  # Runs given code only once per all the coordinated jobs upon starting first of them
100
106
  def on_started
101
- @flow_lock.synchronize do
107
+ @flow_mutex.synchronize do
102
108
  return unless executable?(:on_started)
103
109
 
104
110
  yield(@last_message)
@@ -109,7 +115,7 @@ module Karafka
109
115
  # It runs once per all the coordinated jobs and should be used to run any type of post
110
116
  # jobs coordination processing execution
111
117
  def on_finished
112
- @flow_lock.synchronize do
118
+ @flow_mutex.synchronize do
113
119
  return unless finished?
114
120
  return unless executable?(:on_finished)
115
121
 
@@ -119,7 +125,7 @@ module Karafka
119
125
 
120
126
  # Runs once after a partition is revoked
121
127
  def on_revoked
122
- @flow_lock.synchronize do
128
+ @flow_mutex.synchronize do
123
129
  return unless executable?(:on_revoked)
124
130
 
125
131
  yield(@last_message)
@@ -0,0 +1,109 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component under a commercial license.
4
+ # This Karafka component is NOT licensed under LGPL.
5
+ #
6
+ # All of the commercial components are present in the lib/karafka/pro directory of this
7
+ # repository and their usage requires commercial license agreement.
8
+ #
9
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
10
+ #
11
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
12
+ # your code to Maciej Mensfeld.
13
+
14
+ module Karafka
15
+ module Pro
16
+ module Processing
17
+ # Enhanced processing queue that provides ability to build complex work-distribution
18
+ # schedulers dedicated to particular job types
19
+ #
20
+ # Aside from the OSS queue capabilities it allows for jobless locking for advanced schedulers
21
+ class JobsQueue < Karafka::Processing::JobsQueue
22
+ attr_accessor :in_processing
23
+
24
+ # @return [Karafka::Pro::Processing::JobsQueue]
25
+ def initialize
26
+ super
27
+
28
+ @in_waiting = Hash.new { |h, k| h[k] = [] }
29
+
30
+ @statistics[:waiting] = 0
31
+ end
32
+
33
+ # Method that allows us to lock queue on a given subscription group without enqueuing the a
34
+ # job. This can be used when building complex schedulers that want to postpone enqueuing
35
+ # before certain conditions are met.
36
+ #
37
+ # @param job [Jobs::Base] job used for locking
38
+ def lock(job)
39
+ @mutex.synchronize do
40
+ group = @in_waiting[job.group_id]
41
+
42
+ # This should never happen. Same job should not be locked twice
43
+ raise(Errors::JobsQueueSynchronizationError, job.group_id) if group.include?(job)
44
+
45
+ @statistics[:waiting] += 1
46
+
47
+ group << job
48
+ end
49
+ end
50
+
51
+ # Method for unlocking the given subscription group queue space that was locked with a
52
+ # given job that was **not** added to the queue but used via `#lock`.
53
+ #
54
+ # @param job [Jobs::Base] job that locked the queue
55
+ def unlock(job)
56
+ @mutex.synchronize do
57
+ @statistics[:waiting] -= 1
58
+
59
+ return if @in_waiting[job.group_id].delete(job)
60
+
61
+ # This should never happen. It means there was a job being unlocked that was never
62
+ # locked in the first place
63
+ raise(Errors::JobsQueueSynchronizationError, job.group_id)
64
+ end
65
+ end
66
+
67
+ # Clears the processing states for a provided group. Useful when a recovery happens and we
68
+ # need to clean up state but only for a given subscription group.
69
+ #
70
+ # @param group_id [String]
71
+ def clear(group_id)
72
+ @mutex.synchronize do
73
+ @in_processing[group_id].clear
74
+
75
+ @statistics[:waiting] -= @in_waiting[group_id].size
76
+ @in_waiting[group_id].clear
77
+
78
+ # We unlock it just in case it was blocked when clearing started
79
+ tick(group_id)
80
+ end
81
+ end
82
+
83
+ # @param group_id [String]
84
+ #
85
+ # @return [Boolean] tell us if we have anything in the processing (or for processing) from
86
+ # a given group.
87
+ def empty?(group_id)
88
+ @mutex.synchronize do
89
+ @in_processing[group_id].empty? &&
90
+ @in_waiting[group_id].empty?
91
+ end
92
+ end
93
+
94
+ private
95
+
96
+ # @param group_id [String] id of the group in which jobs we're interested.
97
+ # @return [Boolean] should we keep waiting or not
98
+ # @note We do not wait for non-blocking jobs. Their flow should allow for `poll` running
99
+ # as they may exceed `max.poll.interval`
100
+ def wait?(group_id)
101
+ !(
102
+ @in_processing[group_id].all?(&:non_blocking?) &&
103
+ @in_waiting[group_id].all?(&:non_blocking?)
104
+ )
105
+ end
106
+ end
107
+ end
108
+ end
109
+ end
@@ -27,10 +27,9 @@ module Karafka
27
27
  class Scheduler < ::Karafka::Processing::Scheduler
28
28
  # Schedules jobs in the LJF order for consumption
29
29
  #
30
- # @param queue [Karafka::Processing::JobsQueue] queue where we want to put the jobs
31
30
  # @param jobs_array [Array<Karafka::Processing::Jobs::Base>] jobs we want to schedule
32
31
  #
33
- def schedule_consumption(queue, jobs_array)
32
+ def schedule_consumption(jobs_array)
34
33
  perf_tracker = PerformanceTracker.instance
35
34
 
36
35
  ordered = []
@@ -47,7 +46,7 @@ module Karafka
47
46
  ordered.map!(&:first)
48
47
 
49
48
  ordered.each do |job|
50
- queue << job
49
+ @queue << job
51
50
  end
52
51
  end
53
52
 
@@ -29,6 +29,8 @@ module Karafka
29
29
 
30
30
  # No actions needed for the standard flow here
31
31
  def handle_before_enqueue
32
+ Karafka.monitor.instrument('consumer.before_enqueue', caller: self)
33
+
32
34
  nil
33
35
  end
34
36
 
@@ -77,6 +77,15 @@ module Karafka
77
77
  revoked
78
78
  end
79
79
  end
80
+
81
+ # Allows for LRJ to synchronize its work. It may be needed because LRJ can run
82
+ # lifecycle events like revocation while the LRJ work is running and there may be a
83
+ # need for a critical section.
84
+ #
85
+ # @param block [Proc] block we want to run in a mutex to prevent race-conditions
86
+ def synchronize(&block)
87
+ coordinator.shared_mutex.synchronize(&block)
88
+ end
80
89
  end
81
90
  end
82
91
  end
@@ -94,13 +94,15 @@ module Karafka
94
94
 
95
95
  # Allows for cross-virtual-partition consumers locks
96
96
  #
97
- # This is not needed in the non-VP flows because there is always only one consumer
98
- # per partition at the same time, so no coordination is needed directly for the
99
- # end users
97
+ # This is not needed in the non-VP flows except LRJ because there is always only one
98
+ # consumer per partition at the same time, so no coordination is needed directly for
99
+ # the end users. With LRJ it is needed and provided in the `LRJ::Default` strategy,
100
+ # because lifecycle events on revocation can run in parallel to the LRJ job as it is
101
+ # non-blocking.
100
102
  #
101
103
  # @param block [Proc] block we want to run in a mutex to prevent race-conditions
102
104
  def synchronize(&block)
103
- coordinator.synchronize(&block)
105
+ coordinator.shared_mutex.synchronize(&block)
104
106
  end
105
107
 
106
108
  private
@@ -111,6 +113,8 @@ module Karafka
111
113
  # @note This can be done without the mutex, because it happens from the same thread
112
114
  # for all the work (listener thread)
113
115
  def handle_before_enqueue
116
+ super
117
+
114
118
  coordinator.virtual_offset_manager.register(
115
119
  messages.map(&:offset)
116
120
  )
@@ -162,11 +162,24 @@ module Karafka
162
162
  @manual_seek
163
163
  end
164
164
 
165
+ # @param consumer [Object] karafka consumer (normal or pro)
166
+ # @return [Karafka::Processing::Result] result object which we can use to indicate
167
+ # consumption processing state.
168
+ def consumption(consumer)
169
+ @consumptions[consumer] ||= Processing::Result.new
170
+ end
171
+
165
172
  # Allows to run synchronized (locked) code that can operate only from a given thread
166
173
  #
167
174
  # @param block [Proc] code we want to run in the synchronized mode
175
+ #
168
176
  # @note We check if mutex is not owned already by the current thread so we won't end up with
169
177
  # a deadlock in case user runs coordinated code from inside of his own lock
178
+ #
179
+ # @note This is internal and should **not** be used to synchronize user-facing code.
180
+ # Otherwise user indirectly could cause deadlocks or prolonged locks by running his logic.
181
+ # This can and should however be used for multi-thread strategy applications and other
182
+ # internal operations locks.
170
183
  def synchronize(&block)
171
184
  if @mutex.owned?
172
185
  yield
@@ -174,13 +187,6 @@ module Karafka
174
187
  @mutex.synchronize(&block)
175
188
  end
176
189
  end
177
-
178
- # @param consumer [Object] karafka consumer (normal or pro)
179
- # @return [Karafka::Processing::Result] result object which we can use to indicate
180
- # consumption processing state.
181
- def consumption(consumer)
182
- @consumptions[consumer] ||= Processing::Result.new
183
- end
184
190
  end
185
191
  end
186
192
  end
@@ -9,6 +9,9 @@ module Karafka
9
9
  # on this queue, that's why internally we keep track of processing per group.
10
10
  #
11
11
  # We work with the assumption, that partitions data is evenly distributed.
12
+ #
13
+ # @note This job queue also keeps track / understands number of busy workers. This is because
14
+ # we use a single workers poll that can have granular scheduling.
12
15
  class JobsQueue
13
16
  # @return [Karafka::Processing::JobsQueue]
14
17
  def initialize
@@ -26,19 +29,14 @@ module Karafka
26
29
  h.compute_if_absent(k) { RUBY_VERSION < '3.2' ? TimedQueue.new : Queue.new }
27
30
  end
28
31
 
32
+ @concurrency = Karafka::App.config.concurrency
29
33
  @tick_interval = ::Karafka::App.config.internal.tick_interval
30
34
  @in_processing = Hash.new { |h, k| h[k] = [] }
35
+ @statistics = { busy: 0, enqueued: 0 }
31
36
 
32
37
  @mutex = Mutex.new
33
38
  end
34
39
 
35
- # Returns number of jobs that are either enqueued or in processing (but not finished)
36
- # @return [Integer] number of elements in the queue
37
- # @note Using `#pop` won't decrease this number as only marking job as completed does this
38
- def size
39
- @in_processing.values.map(&:size).sum
40
- end
41
-
42
40
  # Adds the job to the internal main queue, scheduling it for execution in a worker and marks
43
41
  # this job as in processing pipeline.
44
42
  #
@@ -55,6 +53,16 @@ module Karafka
55
53
 
56
54
  group << job
57
55
 
56
+ # Assume that moving to queue means being picked up immediately not to create stats
57
+ # race conditions because of pop overhead. If there are workers available, we assume
58
+ # work is going to be handled as we never reject enqueued jobs
59
+ if @statistics[:busy] < @concurrency
60
+ @statistics[:busy] += 1
61
+ else
62
+ # If system is fully loaded, it means this job is indeed enqueued
63
+ @statistics[:enqueued] += 1
64
+ end
65
+
58
66
  @queue << job
59
67
  end
60
68
  end
@@ -80,7 +88,16 @@ module Karafka
80
88
  # @param [Jobs::Base] job that was completed
81
89
  def complete(job)
82
90
  @mutex.synchronize do
91
+ # We finish one job and if there is another, we pick it up
92
+ if @statistics[:enqueued].positive?
93
+ @statistics[:enqueued] -= 1
94
+ # If no more enqueued jobs, we will be just less busy
95
+ else
96
+ @statistics[:busy] -= 1
97
+ end
98
+
83
99
  @in_processing[job.group_id].delete(job)
100
+
84
101
  tick(job.group_id)
85
102
  end
86
103
  end
@@ -141,10 +158,10 @@ module Karafka
141
158
  #
142
159
  # @return [Hash] hash with basic usage statistics of this queue.
143
160
  def statistics
144
- {
145
- busy: size - @queue.size,
146
- enqueued: @queue.size
147
- }.freeze
161
+ # Ensures there are no race conditions when returning this data
162
+ @mutex.synchronize do
163
+ @statistics.dup.freeze
164
+ end
148
165
  end
149
166
 
150
167
  private
@@ -4,19 +4,35 @@ module Karafka
4
4
  module Processing
5
5
  # FIFO scheduler for messages coming from various topics and partitions
6
6
  class Scheduler
7
+ # @param queue [Karafka::Processing::JobsQueue] queue where we want to put the jobs
8
+ def initialize(queue)
9
+ @queue = queue
10
+ end
11
+
7
12
  # Schedules jobs in the fifo order
8
13
  #
9
- # @param queue [Karafka::Processing::JobsQueue] queue where we want to put the jobs
10
14
  # @param jobs_array [Array<Karafka::Processing::Jobs::Base>] jobs we want to schedule
11
- def schedule_consumption(queue, jobs_array)
15
+ def schedule_consumption(jobs_array)
12
16
  jobs_array.each do |job|
13
- queue << job
17
+ @queue << job
14
18
  end
15
19
  end
16
20
 
17
21
  # Both revocation and shutdown jobs can also run in fifo by default
18
22
  alias schedule_revocation schedule_consumption
19
23
  alias schedule_shutdown schedule_consumption
24
+
25
+ # This scheduler does not have anything to manage as it is a pass through and has no state
26
+ def manage
27
+ nil
28
+ end
29
+
30
+ # This scheduler does not need to be cleared because it is stateless
31
+ #
32
+ # @param _group_id [String] Subscription group id
33
+ def clear(_group_id)
34
+ nil
35
+ end
20
36
  end
21
37
  end
22
38
  end
@@ -78,6 +78,8 @@ module Karafka
78
78
 
79
79
  # No actions needed for the standard flow here
80
80
  def handle_before_enqueue
81
+ Karafka.monitor.instrument('consumer.before_enqueue', caller: self)
82
+
81
83
  nil
82
84
  end
83
85
 
@@ -3,20 +3,25 @@
3
3
  module Karafka
4
4
  module Routing
5
5
  # Builder used as a DSL layer for building consumers and telling them which topics to consume
6
+ #
7
+ # @note We lock the access just in case this is used in patterns. The locks here do not have
8
+ # any impact on routing usage unless being expanded, so no race conditions risks.
9
+ #
6
10
  # @example Build a simple (most common) route
7
11
  # consumers do
8
12
  # topic :new_videos do
9
13
  # consumer NewVideosConsumer
10
14
  # end
11
15
  # end
12
- class Builder < Concurrent::Array
16
+ class Builder < Array
13
17
  # Empty default per-topic config
14
18
  EMPTY_DEFAULTS = ->(_) {}.freeze
15
19
 
16
20
  private_constant :EMPTY_DEFAULTS
17
21
 
18
22
  def initialize
19
- @draws = Concurrent::Array.new
23
+ @mutex = Mutex.new
24
+ @draws = []
20
25
  @defaults = EMPTY_DEFAULTS
21
26
  super
22
27
  end
@@ -34,21 +39,23 @@ module Karafka
34
39
  # end
35
40
  # end
36
41
  def draw(&block)
37
- @draws << block
42
+ @mutex.synchronize do
43
+ @draws << block
38
44
 
39
- instance_eval(&block)
45
+ instance_eval(&block)
40
46
 
41
- each do |consumer_group|
42
- # Validate consumer group settings
43
- Contracts::ConsumerGroup.new.validate!(consumer_group.to_h)
47
+ each do |consumer_group|
48
+ # Validate consumer group settings
49
+ Contracts::ConsumerGroup.new.validate!(consumer_group.to_h)
44
50
 
45
- # and then its topics settings
46
- consumer_group.topics.each do |topic|
47
- Contracts::Topic.new.validate!(topic.to_h)
48
- end
51
+ # and then its topics settings
52
+ consumer_group.topics.each do |topic|
53
+ Contracts::Topic.new.validate!(topic.to_h)
54
+ end
49
55
 
50
- # Initialize subscription groups after all the routing is done
51
- consumer_group.subscription_groups
56
+ # Initialize subscription groups after all the routing is done
57
+ consumer_group.subscription_groups
58
+ end
52
59
  end
53
60
  end
54
61
 
@@ -61,9 +68,11 @@ module Karafka
61
68
 
62
69
  # Clears the builder and the draws memory
63
70
  def clear
64
- @defaults = EMPTY_DEFAULTS
65
- @draws.clear
66
- super
71
+ @mutex.synchronize do
72
+ @defaults = EMPTY_DEFAULTS
73
+ @draws.clear
74
+ super
75
+ end
67
76
  end
68
77
 
69
78
  # @param block [Proc] block with per-topic evaluated defaults
@@ -71,7 +80,13 @@ module Karafka
71
80
  def defaults(&block)
72
81
  return @defaults unless block
73
82
 
74
- @defaults = block
83
+ if @mutex.owned?
84
+ @defaults = block
85
+ else
86
+ @mutex.synchronize do
87
+ @defaults = block
88
+ end
89
+ end
75
90
  end
76
91
 
77
92
  private
@@ -10,19 +10,24 @@ module Karafka
10
10
  class SubscriptionGroup
11
11
  attr_reader :id, :name, :topics, :kafka, :consumer_group
12
12
 
13
- # Numeric for counting groups
14
- GROUP_COUNT = Concurrent::AtomicFixnum.new
13
+ # Lock for generating new ids safely
14
+ ID_MUTEX = Mutex.new
15
15
 
16
- private_constant :GROUP_COUNT
16
+ private_constant :ID_MUTEX
17
17
 
18
18
  class << self
19
19
  # Generates new subscription group id that will be used in case of anonymous subscription
20
20
  # groups
21
21
  # @return [String] hex(6) compatible reproducible id
22
22
  def id
23
- ::Digest::MD5.hexdigest(
24
- GROUP_COUNT.increment.to_s
25
- )[0..11]
23
+ ID_MUTEX.synchronize do
24
+ @group_counter ||= 0
25
+ @group_counter += 1
26
+
27
+ ::Digest::MD5.hexdigest(
28
+ @group_counter.to_s
29
+ )[0..11]
30
+ end
26
31
  end
27
32
  end
28
33
 
@@ -8,7 +8,7 @@ module Karafka
8
8
  def call
9
9
  # Despite possibility of having several independent listeners, we aim to have one queue for
10
10
  # jobs across and one workers poll for that
11
- jobs_queue = Processing::JobsQueue.new
11
+ jobs_queue = App.config.internal.processing.jobs_queue_class.new
12
12
 
13
13
  workers = Processing::WorkersBatch.new(jobs_queue)
14
14
  listeners = Connection::ListenersBatch.new(jobs_queue)
@@ -209,8 +209,9 @@ module Karafka
209
209
  end
210
210
 
211
211
  setting :processing do
212
+ setting :jobs_queue_class, default: Processing::JobsQueue
212
213
  # option scheduler [Object] scheduler we will be using
213
- setting :scheduler, default: Processing::Scheduler.new
214
+ setting :scheduler_class, default: Processing::Scheduler
214
215
  # option jobs_builder [Object] jobs builder we want to use
215
216
  setting :jobs_builder, default: Processing::JobsBuilder.new
216
217
  # option coordinator [Class] work coordinator we want to user for processing coordination
@@ -3,5 +3,5 @@
3
3
  # Main module namespace
4
4
  module Karafka
5
5
  # Current Karafka version
6
- VERSION = '2.2.12'
6
+ VERSION = '2.2.13'
7
7
  end
data/lib/karafka.rb CHANGED
@@ -16,7 +16,6 @@
16
16
  singleton
17
17
  digest
18
18
  zeitwerk
19
- concurrent/atomic/atomic_fixnum
20
19
  ].each(&method(:require))
21
20
 
22
21
  # Karafka framework main namespace
data.tar.gz.sig CHANGED
Binary file
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: karafka
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.2.12
4
+ version: 2.2.13
5
5
  platform: ruby
6
6
  authors:
7
7
  - Maciej Mensfeld
@@ -35,7 +35,7 @@ cert_chain:
35
35
  AnG1dJU+yL2BK7vaVytLTstJME5mepSZ46qqIJXMuWob/YPDmVaBF39TDSG9e34s
36
36
  msG3BiCqgOgHAnL23+CN3Rt8MsuRfEtoTKpJVcCfoEoNHOkc
37
37
  -----END CERTIFICATE-----
38
- date: 2023-11-09 00:00:00.000000000 Z
38
+ date: 2023-11-17 00:00:00.000000000 Z
39
39
  dependencies:
40
40
  - !ruby/object:Gem::Dependency
41
41
  name: karafka-core
@@ -246,6 +246,7 @@ files:
246
246
  - lib/karafka/pro/processing/jobs/consume_non_blocking.rb
247
247
  - lib/karafka/pro/processing/jobs/revoked_non_blocking.rb
248
248
  - lib/karafka/pro/processing/jobs_builder.rb
249
+ - lib/karafka/pro/processing/jobs_queue.rb
249
250
  - lib/karafka/pro/processing/partitioner.rb
250
251
  - lib/karafka/pro/processing/scheduler.rb
251
252
  - lib/karafka/pro/processing/strategies.rb
metadata.gz.sig CHANGED
Binary file