karafka 2.2.12 → 2.2.13

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 2ee648826503a1b1841a97e368ec2894a16eadc3509270d2a5dbbbe9ee703b3a
4
- data.tar.gz: 915285e224ab6dcaa4b2f75e2b6aa52f4f6eb0f613b8b06efe78e2ef4ff3f514
3
+ metadata.gz: 4056d72f0d37ac46c52597ebcfed87de031f9f250d57a64ec5c665d3423a3087
4
+ data.tar.gz: 95aeab42e351043873d548a5289e8355fe48fa7b7f27aaf1549a220c76eac9c1
5
5
  SHA512:
6
- metadata.gz: 1f7f109c533a98a46306be62a2172432f0d18af7003e401d3a894aa356bc2cae2622ba4c323bfdd230a66f0ae544a7cfb61ee0168b396e2e809e408a657eecb6
7
- data.tar.gz: d10de0ca361236c35bed27ca3c5db13e9e245805412f85c2d8d4e6a140fe088025403be7a65e1d97831613f02032bfe3fb2194c5ec7f6a880bc7ddc67a112813
6
+ metadata.gz: 8e41da4dff00dc3cb9749874568a275cdad81b7a762182cee7ea497bfe373dd1b3f777dd40638d0c30ff13f50c5913cdcad175edcc8b9b36a3e26fb5658fc986
7
+ data.tar.gz: 738352dea20404d42a80340c2fc27359d54185565e8069f8245662e02d33c8630ce7922c3938b06b07e5587bd007342c65439229484ed529ae050e356872f150
checksums.yaml.gz.sig CHANGED
Binary file
data/CHANGELOG.md CHANGED
@@ -1,5 +1,12 @@
1
1
  # Karafka framework changelog
2
2
 
3
+ ## 2.2.13 (2023-11-17)
4
+ - **[Feature]** Introduce low-level extended Scheduling API for granular control of schedulers and jobs execution [Pro].
5
+ - [Improvement] Use separate lock for user-facing synchronization.
6
+ - [Improvement] Instrument `consumer.before_enqueue`.
7
+ - [Improvement] Limit usage of `concurrent-ruby` (plan to remove it as a dependency fully)
8
+ - [Improvement] Provide `#synchronize` API same as in VPs for LRJs to allow for lifecycle events and consumption synchronization.
9
+
3
10
  ## 2.2.12 (2023-11-09)
4
11
  - [Improvement] Rewrite the polling engine to update statistics and error callbacks despite longer non LRJ processing or long `max_wait_time` setups. This change provides stability to the statistics and background error emitting making them time-reliable.
5
12
  - [Improvement] Auto-update Inline Insights if new insights are present for all consumers and not only LRJ (OSS and Pro).
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- karafka (2.2.12)
4
+ karafka (2.2.13)
5
5
  karafka-core (>= 2.2.7, < 2.3.0)
6
6
  waterdrop (>= 2.6.11, < 3.0.0)
7
7
  zeitwerk (~> 2.3)
@@ -9,10 +9,10 @@ PATH
9
9
  GEM
10
10
  remote: https://rubygems.org/
11
11
  specs:
12
- activejob (7.1.1)
13
- activesupport (= 7.1.1)
12
+ activejob (7.1.2)
13
+ activesupport (= 7.1.2)
14
14
  globalid (>= 0.3.6)
15
- activesupport (7.1.1)
15
+ activesupport (7.1.2)
16
16
  base64
17
17
  bigdecimal
18
18
  concurrent-ruby (~> 1.0, >= 1.0.2)
@@ -22,14 +22,14 @@ GEM
22
22
  minitest (>= 5.1)
23
23
  mutex_m
24
24
  tzinfo (~> 2.0)
25
- base64 (0.1.1)
25
+ base64 (0.2.0)
26
26
  bigdecimal (3.1.4)
27
27
  byebug (11.1.3)
28
28
  concurrent-ruby (1.2.2)
29
29
  connection_pool (2.4.1)
30
30
  diff-lcs (1.5.0)
31
31
  docile (1.4.0)
32
- drb (2.1.1)
32
+ drb (2.2.0)
33
33
  ruby2_keywords
34
34
  erubi (1.12.0)
35
35
  factory_bot (6.3.0)
@@ -42,7 +42,7 @@ GEM
42
42
  karafka-core (2.2.7)
43
43
  concurrent-ruby (>= 1.1)
44
44
  karafka-rdkafka (>= 0.13.9, < 0.15.0)
45
- karafka-rdkafka (0.13.9)
45
+ karafka-rdkafka (0.14.0)
46
46
  ffi (~> 1.15)
47
47
  mini_portile2 (~> 2.6)
48
48
  rake (> 12)
@@ -54,10 +54,10 @@ GEM
54
54
  tilt (~> 2.0)
55
55
  mini_portile2 (2.8.5)
56
56
  minitest (5.20.0)
57
- mutex_m (0.1.2)
57
+ mutex_m (0.2.0)
58
58
  rack (3.0.8)
59
59
  rake (13.1.0)
60
- roda (3.73.0)
60
+ roda (3.74.0)
61
61
  rack
62
62
  rspec (3.12.0)
63
63
  rspec-core (~> 3.12.0)
@@ -16,7 +16,8 @@ en:
16
16
  max_wait_time_format: needs to be an integer bigger than 0
17
17
  kafka_format: needs to be a filled hash
18
18
  internal.processing.jobs_builder_format: cannot be nil
19
- internal.processing.scheduler_format: cannot be nil
19
+ internal.processing.jobs_queue_class_format: cannot be nil
20
+ internal.processing.scheduler_class_format: cannot be nil
20
21
  internal.processing.coordinator_class_format: cannot be nil
21
22
  internal.processing.partitioner_class_format: cannot be nil
22
23
  internal.processing.strategy_selector_format: cannot be nil
@@ -16,7 +16,7 @@ module Karafka
16
16
  class ConsumerGroupCoordinator
17
17
  # @param group_size [Integer] number of separate subscription groups in a consumer group
18
18
  def initialize(group_size)
19
- @shutdown_lock = Mutex.new
19
+ @shutdown_mutex = Mutex.new
20
20
  @group_size = group_size
21
21
  @finished = Set.new
22
22
  end
@@ -30,12 +30,12 @@ module Karafka
30
30
  # @return [Boolean] can we start shutdown on a given listener
31
31
  # @note If true, will also obtain a lock so no-one else will be closing the same time we do
32
32
  def shutdown?
33
- finished? && @shutdown_lock.try_lock
33
+ finished? && @shutdown_mutex.try_lock
34
34
  end
35
35
 
36
36
  # Unlocks the shutdown lock
37
37
  def unlock
38
- @shutdown_lock.unlock if @shutdown_lock.owned?
38
+ @shutdown_mutex.unlock if @shutdown_mutex.owned?
39
39
  end
40
40
 
41
41
  # Marks given listener as finished
@@ -23,8 +23,9 @@ module Karafka
23
23
  # @param consumer_group_coordinator [Karafka::Connection::ConsumerGroupCoordinator]
24
24
  # @param subscription_group [Karafka::Routing::SubscriptionGroup]
25
25
  # @param jobs_queue [Karafka::Processing::JobsQueue] queue where we should push work
26
+ # @param scheduler [Karafka::Processing::Scheduler] scheduler we want to use
26
27
  # @return [Karafka::Connection::Listener] listener instance
27
- def initialize(consumer_group_coordinator, subscription_group, jobs_queue)
28
+ def initialize(consumer_group_coordinator, subscription_group, jobs_queue, scheduler)
28
29
  proc_config = ::Karafka::App.config.internal.processing
29
30
 
30
31
  @id = SecureRandom.hex(6)
@@ -36,8 +37,7 @@ module Karafka
36
37
  @executors = Processing::ExecutorsBuffer.new(@client, subscription_group)
37
38
  @jobs_builder = proc_config.jobs_builder
38
39
  @partitioner = proc_config.partitioner_class.new(subscription_group)
39
- # We reference scheduler here as it is much faster than fetching this each time
40
- @scheduler = proc_config.scheduler
40
+ @scheduler = scheduler
41
41
  @events_poller = Helpers::IntervalRunner.new { @client.events_poll }
42
42
  # We keep one buffer for messages to preserve memory and not allocate extra objects
43
43
  # We can do this that way because we always first schedule jobs using messages before we
@@ -243,7 +243,7 @@ module Karafka
243
243
  end
244
244
  end
245
245
 
246
- @scheduler.schedule_revocation(@jobs_queue, jobs)
246
+ @scheduler.schedule_revocation(jobs)
247
247
  end
248
248
 
249
249
  # Enqueues the shutdown jobs for all the executors that exist in our subscription group
@@ -256,7 +256,7 @@ module Karafka
256
256
  jobs << job
257
257
  end
258
258
 
259
- @scheduler.schedule_shutdown(@jobs_queue, jobs)
259
+ @scheduler.schedule_shutdown(jobs)
260
260
  end
261
261
 
262
262
  # Polls messages within the time and amount boundaries defined in the settings and then
@@ -298,12 +298,15 @@ module Karafka
298
298
 
299
299
  jobs.each(&:before_enqueue)
300
300
 
301
- @scheduler.schedule_consumption(@jobs_queue, jobs)
301
+ @scheduler.schedule_consumption(jobs)
302
302
  end
303
303
 
304
304
  # Waits for all the jobs from a given subscription group to finish before moving forward
305
305
  def wait
306
- @jobs_queue.wait(@subscription_group.id) { @events_poller.call }
306
+ @jobs_queue.wait(@subscription_group.id) do
307
+ @events_poller.call
308
+ @scheduler.manage
309
+ end
307
310
  end
308
311
 
309
312
  # Waits without blocking the polling
@@ -319,6 +322,8 @@ module Karafka
319
322
  def wait_pinging(wait_until:, after_ping: -> {})
320
323
  until wait_until.call
321
324
  @client.ping
325
+ @scheduler.manage
326
+
322
327
  after_ping.call
323
328
  sleep(0.2)
324
329
  end
@@ -334,6 +339,7 @@ module Karafka
334
339
  # resetting.
335
340
  @jobs_queue.wait(@subscription_group.id)
336
341
  @jobs_queue.clear(@subscription_group.id)
342
+ @scheduler.clear(@subscription_group.id)
337
343
  @events_poller.reset
338
344
  @client.reset
339
345
  @coordinators.reset
@@ -11,6 +11,10 @@ module Karafka
11
11
  # @param jobs_queue [JobsQueue]
12
12
  # @return [ListenersBatch]
13
13
  def initialize(jobs_queue)
14
+ # We need one scheduler for all the listeners because in case of complex schedulers, they
15
+ # should be able to distribute work whenever any work is done in any of the listeners
16
+ scheduler = App.config.internal.processing.scheduler_class.new(jobs_queue)
17
+
14
18
  @coordinators = []
15
19
 
16
20
  @batch = App.subscription_groups.flat_map do |_consumer_group, subscription_groups|
@@ -24,7 +28,8 @@ module Karafka
24
28
  Connection::Listener.new(
25
29
  consumer_group_coordinator,
26
30
  subscription_group,
27
- jobs_queue
31
+ jobs_queue,
32
+ scheduler
28
33
  )
29
34
  end
30
35
  end
@@ -73,7 +73,8 @@ module Karafka
73
73
 
74
74
  nested(:processing) do
75
75
  required(:jobs_builder) { |val| !val.nil? }
76
- required(:scheduler) { |val| !val.nil? }
76
+ required(:jobs_queue_class) { |val| !val.nil? }
77
+ required(:scheduler_class) { |val| !val.nil? }
77
78
  required(:coordinator_class) { |val| !val.nil? }
78
79
  required(:partitioner_class) { |val| !val.nil? }
79
80
  required(:strategy_selector) { |val| !val.nil? }
@@ -43,6 +43,7 @@ module Karafka
43
43
  rebalance.partitions_revoke
44
44
  rebalance.partitions_revoked
45
45
 
46
+ consumer.before_enqueue
46
47
  consumer.consume
47
48
  consumer.consumed
48
49
  consumer.consuming.pause
@@ -84,7 +84,8 @@ module Karafka
84
84
 
85
85
  icfg.processing.coordinator_class = Processing::Coordinator
86
86
  icfg.processing.partitioner_class = Processing::Partitioner
87
- icfg.processing.scheduler = Processing::Scheduler.new
87
+ icfg.processing.scheduler_class = Processing::Scheduler
88
+ icfg.processing.jobs_queue_class = Processing::JobsQueue
88
89
  icfg.processing.jobs_builder = Processing::JobsBuilder.new
89
90
  icfg.processing.strategy_selector = Processing::StrategySelector.new
90
91
 
@@ -21,14 +21,20 @@ module Karafka
21
21
 
22
22
  def_delegators :@collapser, :collapsed?, :collapse_until!
23
23
 
24
- attr_reader :filter, :virtual_offset_manager
24
+ attr_reader :filter, :virtual_offset_manager, :shared_mutex
25
25
 
26
26
  # @param args [Object] anything the base coordinator accepts
27
27
  def initialize(*args)
28
28
  super
29
29
 
30
30
  @executed = []
31
- @flow_lock = Mutex.new
31
+ @flow_mutex = Mutex.new
32
+ # Lock for user code synchronization
33
+ # We do not want to mix coordinator lock with the user lock not to create cases where
34
+ # user imposed lock would lock the internal operations of Karafka
35
+ # This shared lock can be used by the end user as it is not used internally by the
36
+ # framework and can be used for user-facing locking
37
+ @shared_mutex = Mutex.new
32
38
  @collapser = Collapser.new
33
39
  @filter = FiltersApplier.new(self)
34
40
 
@@ -89,7 +95,7 @@ module Karafka
89
95
  # Runs synchronized code once for a collective of virtual partitions prior to work being
90
96
  # enqueued
91
97
  def on_enqueued
92
- @flow_lock.synchronize do
98
+ @flow_mutex.synchronize do
93
99
  return unless executable?(:on_enqueued)
94
100
 
95
101
  yield(@last_message)
@@ -98,7 +104,7 @@ module Karafka
98
104
 
99
105
  # Runs given code only once per all the coordinated jobs upon starting first of them
100
106
  def on_started
101
- @flow_lock.synchronize do
107
+ @flow_mutex.synchronize do
102
108
  return unless executable?(:on_started)
103
109
 
104
110
  yield(@last_message)
@@ -109,7 +115,7 @@ module Karafka
109
115
  # It runs once per all the coordinated jobs and should be used to run any type of post
110
116
  # jobs coordination processing execution
111
117
  def on_finished
112
- @flow_lock.synchronize do
118
+ @flow_mutex.synchronize do
113
119
  return unless finished?
114
120
  return unless executable?(:on_finished)
115
121
 
@@ -119,7 +125,7 @@ module Karafka
119
125
 
120
126
  # Runs once after a partition is revoked
121
127
  def on_revoked
122
- @flow_lock.synchronize do
128
+ @flow_mutex.synchronize do
123
129
  return unless executable?(:on_revoked)
124
130
 
125
131
  yield(@last_message)
@@ -0,0 +1,109 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component under a commercial license.
4
+ # This Karafka component is NOT licensed under LGPL.
5
+ #
6
+ # All of the commercial components are present in the lib/karafka/pro directory of this
7
+ # repository and their usage requires commercial license agreement.
8
+ #
9
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
10
+ #
11
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
12
+ # your code to Maciej Mensfeld.
13
+
14
+ module Karafka
15
+ module Pro
16
+ module Processing
17
+ # Enhanced processing queue that provides ability to build complex work-distribution
18
+ # schedulers dedicated to particular job types
19
+ #
20
+ # Aside from the OSS queue capabilities it allows for jobless locking for advanced schedulers
21
+ class JobsQueue < Karafka::Processing::JobsQueue
22
+ attr_accessor :in_processing
23
+
24
+ # @return [Karafka::Pro::Processing::JobsQueue]
25
+ def initialize
26
+ super
27
+
28
+ @in_waiting = Hash.new { |h, k| h[k] = [] }
29
+
30
+ @statistics[:waiting] = 0
31
+ end
32
+
33
+ # Method that allows us to lock queue on a given subscription group without enqueuing the a
34
+ # job. This can be used when building complex schedulers that want to postpone enqueuing
35
+ # before certain conditions are met.
36
+ #
37
+ # @param job [Jobs::Base] job used for locking
38
+ def lock(job)
39
+ @mutex.synchronize do
40
+ group = @in_waiting[job.group_id]
41
+
42
+ # This should never happen. Same job should not be locked twice
43
+ raise(Errors::JobsQueueSynchronizationError, job.group_id) if group.include?(job)
44
+
45
+ @statistics[:waiting] += 1
46
+
47
+ group << job
48
+ end
49
+ end
50
+
51
+ # Method for unlocking the given subscription group queue space that was locked with a
52
+ # given job that was **not** added to the queue but used via `#lock`.
53
+ #
54
+ # @param job [Jobs::Base] job that locked the queue
55
+ def unlock(job)
56
+ @mutex.synchronize do
57
+ @statistics[:waiting] -= 1
58
+
59
+ return if @in_waiting[job.group_id].delete(job)
60
+
61
+ # This should never happen. It means there was a job being unlocked that was never
62
+ # locked in the first place
63
+ raise(Errors::JobsQueueSynchronizationError, job.group_id)
64
+ end
65
+ end
66
+
67
+ # Clears the processing states for a provided group. Useful when a recovery happens and we
68
+ # need to clean up state but only for a given subscription group.
69
+ #
70
+ # @param group_id [String]
71
+ def clear(group_id)
72
+ @mutex.synchronize do
73
+ @in_processing[group_id].clear
74
+
75
+ @statistics[:waiting] -= @in_waiting[group_id].size
76
+ @in_waiting[group_id].clear
77
+
78
+ # We unlock it just in case it was blocked when clearing started
79
+ tick(group_id)
80
+ end
81
+ end
82
+
83
+ # @param group_id [String]
84
+ #
85
+ # @return [Boolean] tell us if we have anything in the processing (or for processing) from
86
+ # a given group.
87
+ def empty?(group_id)
88
+ @mutex.synchronize do
89
+ @in_processing[group_id].empty? &&
90
+ @in_waiting[group_id].empty?
91
+ end
92
+ end
93
+
94
+ private
95
+
96
+ # @param group_id [String] id of the group in which jobs we're interested.
97
+ # @return [Boolean] should we keep waiting or not
98
+ # @note We do not wait for non-blocking jobs. Their flow should allow for `poll` running
99
+ # as they may exceed `max.poll.interval`
100
+ def wait?(group_id)
101
+ !(
102
+ @in_processing[group_id].all?(&:non_blocking?) &&
103
+ @in_waiting[group_id].all?(&:non_blocking?)
104
+ )
105
+ end
106
+ end
107
+ end
108
+ end
109
+ end
@@ -27,10 +27,9 @@ module Karafka
27
27
  class Scheduler < ::Karafka::Processing::Scheduler
28
28
  # Schedules jobs in the LJF order for consumption
29
29
  #
30
- # @param queue [Karafka::Processing::JobsQueue] queue where we want to put the jobs
31
30
  # @param jobs_array [Array<Karafka::Processing::Jobs::Base>] jobs we want to schedule
32
31
  #
33
- def schedule_consumption(queue, jobs_array)
32
+ def schedule_consumption(jobs_array)
34
33
  perf_tracker = PerformanceTracker.instance
35
34
 
36
35
  ordered = []
@@ -47,7 +46,7 @@ module Karafka
47
46
  ordered.map!(&:first)
48
47
 
49
48
  ordered.each do |job|
50
- queue << job
49
+ @queue << job
51
50
  end
52
51
  end
53
52
 
@@ -29,6 +29,8 @@ module Karafka
29
29
 
30
30
  # No actions needed for the standard flow here
31
31
  def handle_before_enqueue
32
+ Karafka.monitor.instrument('consumer.before_enqueue', caller: self)
33
+
32
34
  nil
33
35
  end
34
36
 
@@ -77,6 +77,15 @@ module Karafka
77
77
  revoked
78
78
  end
79
79
  end
80
+
81
+ # Allows for LRJ to synchronize its work. It may be needed because LRJ can run
82
+ # lifecycle events like revocation while the LRJ work is running and there may be a
83
+ # need for a critical section.
84
+ #
85
+ # @param block [Proc] block we want to run in a mutex to prevent race-conditions
86
+ def synchronize(&block)
87
+ coordinator.shared_mutex.synchronize(&block)
88
+ end
80
89
  end
81
90
  end
82
91
  end
@@ -94,13 +94,15 @@ module Karafka
94
94
 
95
95
  # Allows for cross-virtual-partition consumers locks
96
96
  #
97
- # This is not needed in the non-VP flows because there is always only one consumer
98
- # per partition at the same time, so no coordination is needed directly for the
99
- # end users
97
+ # This is not needed in the non-VP flows except LRJ because there is always only one
98
+ # consumer per partition at the same time, so no coordination is needed directly for
99
+ # the end users. With LRJ it is needed and provided in the `LRJ::Default` strategy,
100
+ # because lifecycle events on revocation can run in parallel to the LRJ job as it is
101
+ # non-blocking.
100
102
  #
101
103
  # @param block [Proc] block we want to run in a mutex to prevent race-conditions
102
104
  def synchronize(&block)
103
- coordinator.synchronize(&block)
105
+ coordinator.shared_mutex.synchronize(&block)
104
106
  end
105
107
 
106
108
  private
@@ -111,6 +113,8 @@ module Karafka
111
113
  # @note This can be done without the mutex, because it happens from the same thread
112
114
  # for all the work (listener thread)
113
115
  def handle_before_enqueue
116
+ super
117
+
114
118
  coordinator.virtual_offset_manager.register(
115
119
  messages.map(&:offset)
116
120
  )
@@ -162,11 +162,24 @@ module Karafka
162
162
  @manual_seek
163
163
  end
164
164
 
165
+ # @param consumer [Object] karafka consumer (normal or pro)
166
+ # @return [Karafka::Processing::Result] result object which we can use to indicate
167
+ # consumption processing state.
168
+ def consumption(consumer)
169
+ @consumptions[consumer] ||= Processing::Result.new
170
+ end
171
+
165
172
  # Allows to run synchronized (locked) code that can operate only from a given thread
166
173
  #
167
174
  # @param block [Proc] code we want to run in the synchronized mode
175
+ #
168
176
  # @note We check if mutex is not owned already by the current thread so we won't end up with
169
177
  # a deadlock in case user runs coordinated code from inside of his own lock
178
+ #
179
+ # @note This is internal and should **not** be used to synchronize user-facing code.
180
+ # Otherwise user indirectly could cause deadlocks or prolonged locks by running his logic.
181
+ # This can and should however be used for multi-thread strategy applications and other
182
+ # internal operations locks.
170
183
  def synchronize(&block)
171
184
  if @mutex.owned?
172
185
  yield
@@ -174,13 +187,6 @@ module Karafka
174
187
  @mutex.synchronize(&block)
175
188
  end
176
189
  end
177
-
178
- # @param consumer [Object] karafka consumer (normal or pro)
179
- # @return [Karafka::Processing::Result] result object which we can use to indicate
180
- # consumption processing state.
181
- def consumption(consumer)
182
- @consumptions[consumer] ||= Processing::Result.new
183
- end
184
190
  end
185
191
  end
186
192
  end
@@ -9,6 +9,9 @@ module Karafka
9
9
  # on this queue, that's why internally we keep track of processing per group.
10
10
  #
11
11
  # We work with the assumption, that partitions data is evenly distributed.
12
+ #
13
+ # @note This job queue also keeps track / understands number of busy workers. This is because
14
+ # we use a single workers poll that can have granular scheduling.
12
15
  class JobsQueue
13
16
  # @return [Karafka::Processing::JobsQueue]
14
17
  def initialize
@@ -26,19 +29,14 @@ module Karafka
26
29
  h.compute_if_absent(k) { RUBY_VERSION < '3.2' ? TimedQueue.new : Queue.new }
27
30
  end
28
31
 
32
+ @concurrency = Karafka::App.config.concurrency
29
33
  @tick_interval = ::Karafka::App.config.internal.tick_interval
30
34
  @in_processing = Hash.new { |h, k| h[k] = [] }
35
+ @statistics = { busy: 0, enqueued: 0 }
31
36
 
32
37
  @mutex = Mutex.new
33
38
  end
34
39
 
35
- # Returns number of jobs that are either enqueued or in processing (but not finished)
36
- # @return [Integer] number of elements in the queue
37
- # @note Using `#pop` won't decrease this number as only marking job as completed does this
38
- def size
39
- @in_processing.values.map(&:size).sum
40
- end
41
-
42
40
  # Adds the job to the internal main queue, scheduling it for execution in a worker and marks
43
41
  # this job as in processing pipeline.
44
42
  #
@@ -55,6 +53,16 @@ module Karafka
55
53
 
56
54
  group << job
57
55
 
56
+ # Assume that moving to queue means being picked up immediately not to create stats
57
+ # race conditions because of pop overhead. If there are workers available, we assume
58
+ # work is going to be handled as we never reject enqueued jobs
59
+ if @statistics[:busy] < @concurrency
60
+ @statistics[:busy] += 1
61
+ else
62
+ # If system is fully loaded, it means this job is indeed enqueued
63
+ @statistics[:enqueued] += 1
64
+ end
65
+
58
66
  @queue << job
59
67
  end
60
68
  end
@@ -80,7 +88,16 @@ module Karafka
80
88
  # @param [Jobs::Base] job that was completed
81
89
  def complete(job)
82
90
  @mutex.synchronize do
91
+ # We finish one job and if there is another, we pick it up
92
+ if @statistics[:enqueued].positive?
93
+ @statistics[:enqueued] -= 1
94
+ # If no more enqueued jobs, we will be just less busy
95
+ else
96
+ @statistics[:busy] -= 1
97
+ end
98
+
83
99
  @in_processing[job.group_id].delete(job)
100
+
84
101
  tick(job.group_id)
85
102
  end
86
103
  end
@@ -141,10 +158,10 @@ module Karafka
141
158
  #
142
159
  # @return [Hash] hash with basic usage statistics of this queue.
143
160
  def statistics
144
- {
145
- busy: size - @queue.size,
146
- enqueued: @queue.size
147
- }.freeze
161
+ # Ensures there are no race conditions when returning this data
162
+ @mutex.synchronize do
163
+ @statistics.dup.freeze
164
+ end
148
165
  end
149
166
 
150
167
  private
@@ -4,19 +4,35 @@ module Karafka
4
4
  module Processing
5
5
  # FIFO scheduler for messages coming from various topics and partitions
6
6
  class Scheduler
7
+ # @param queue [Karafka::Processing::JobsQueue] queue where we want to put the jobs
8
+ def initialize(queue)
9
+ @queue = queue
10
+ end
11
+
7
12
  # Schedules jobs in the fifo order
8
13
  #
9
- # @param queue [Karafka::Processing::JobsQueue] queue where we want to put the jobs
10
14
  # @param jobs_array [Array<Karafka::Processing::Jobs::Base>] jobs we want to schedule
11
- def schedule_consumption(queue, jobs_array)
15
+ def schedule_consumption(jobs_array)
12
16
  jobs_array.each do |job|
13
- queue << job
17
+ @queue << job
14
18
  end
15
19
  end
16
20
 
17
21
  # Both revocation and shutdown jobs can also run in fifo by default
18
22
  alias schedule_revocation schedule_consumption
19
23
  alias schedule_shutdown schedule_consumption
24
+
25
+ # This scheduler does not have anything to manage as it is a pass through and has no state
26
+ def manage
27
+ nil
28
+ end
29
+
30
+ # This scheduler does not need to be cleared because it is stateless
31
+ #
32
+ # @param _group_id [String] Subscription group id
33
+ def clear(_group_id)
34
+ nil
35
+ end
20
36
  end
21
37
  end
22
38
  end
@@ -78,6 +78,8 @@ module Karafka
78
78
 
79
79
  # No actions needed for the standard flow here
80
80
  def handle_before_enqueue
81
+ Karafka.monitor.instrument('consumer.before_enqueue', caller: self)
82
+
81
83
  nil
82
84
  end
83
85
 
@@ -3,20 +3,25 @@
3
3
  module Karafka
4
4
  module Routing
5
5
  # Builder used as a DSL layer for building consumers and telling them which topics to consume
6
+ #
7
+ # @note We lock the access just in case this is used in patterns. The locks here do not have
8
+ # any impact on routing usage unless being expanded, so no race conditions risks.
9
+ #
6
10
  # @example Build a simple (most common) route
7
11
  # consumers do
8
12
  # topic :new_videos do
9
13
  # consumer NewVideosConsumer
10
14
  # end
11
15
  # end
12
- class Builder < Concurrent::Array
16
+ class Builder < Array
13
17
  # Empty default per-topic config
14
18
  EMPTY_DEFAULTS = ->(_) {}.freeze
15
19
 
16
20
  private_constant :EMPTY_DEFAULTS
17
21
 
18
22
  def initialize
19
- @draws = Concurrent::Array.new
23
+ @mutex = Mutex.new
24
+ @draws = []
20
25
  @defaults = EMPTY_DEFAULTS
21
26
  super
22
27
  end
@@ -34,21 +39,23 @@ module Karafka
34
39
  # end
35
40
  # end
36
41
  def draw(&block)
37
- @draws << block
42
+ @mutex.synchronize do
43
+ @draws << block
38
44
 
39
- instance_eval(&block)
45
+ instance_eval(&block)
40
46
 
41
- each do |consumer_group|
42
- # Validate consumer group settings
43
- Contracts::ConsumerGroup.new.validate!(consumer_group.to_h)
47
+ each do |consumer_group|
48
+ # Validate consumer group settings
49
+ Contracts::ConsumerGroup.new.validate!(consumer_group.to_h)
44
50
 
45
- # and then its topics settings
46
- consumer_group.topics.each do |topic|
47
- Contracts::Topic.new.validate!(topic.to_h)
48
- end
51
+ # and then its topics settings
52
+ consumer_group.topics.each do |topic|
53
+ Contracts::Topic.new.validate!(topic.to_h)
54
+ end
49
55
 
50
- # Initialize subscription groups after all the routing is done
51
- consumer_group.subscription_groups
56
+ # Initialize subscription groups after all the routing is done
57
+ consumer_group.subscription_groups
58
+ end
52
59
  end
53
60
  end
54
61
 
@@ -61,9 +68,11 @@ module Karafka
61
68
 
62
69
  # Clears the builder and the draws memory
63
70
  def clear
64
- @defaults = EMPTY_DEFAULTS
65
- @draws.clear
66
- super
71
+ @mutex.synchronize do
72
+ @defaults = EMPTY_DEFAULTS
73
+ @draws.clear
74
+ super
75
+ end
67
76
  end
68
77
 
69
78
  # @param block [Proc] block with per-topic evaluated defaults
@@ -71,7 +80,13 @@ module Karafka
71
80
  def defaults(&block)
72
81
  return @defaults unless block
73
82
 
74
- @defaults = block
83
+ if @mutex.owned?
84
+ @defaults = block
85
+ else
86
+ @mutex.synchronize do
87
+ @defaults = block
88
+ end
89
+ end
75
90
  end
76
91
 
77
92
  private
@@ -10,19 +10,24 @@ module Karafka
10
10
  class SubscriptionGroup
11
11
  attr_reader :id, :name, :topics, :kafka, :consumer_group
12
12
 
13
- # Numeric for counting groups
14
- GROUP_COUNT = Concurrent::AtomicFixnum.new
13
+ # Lock for generating new ids safely
14
+ ID_MUTEX = Mutex.new
15
15
 
16
- private_constant :GROUP_COUNT
16
+ private_constant :ID_MUTEX
17
17
 
18
18
  class << self
19
19
  # Generates new subscription group id that will be used in case of anonymous subscription
20
20
  # groups
21
21
  # @return [String] hex(6) compatible reproducible id
22
22
  def id
23
- ::Digest::MD5.hexdigest(
24
- GROUP_COUNT.increment.to_s
25
- )[0..11]
23
+ ID_MUTEX.synchronize do
24
+ @group_counter ||= 0
25
+ @group_counter += 1
26
+
27
+ ::Digest::MD5.hexdigest(
28
+ @group_counter.to_s
29
+ )[0..11]
30
+ end
26
31
  end
27
32
  end
28
33
 
@@ -8,7 +8,7 @@ module Karafka
8
8
  def call
9
9
  # Despite possibility of having several independent listeners, we aim to have one queue for
10
10
  # jobs across and one workers poll for that
11
- jobs_queue = Processing::JobsQueue.new
11
+ jobs_queue = App.config.internal.processing.jobs_queue_class.new
12
12
 
13
13
  workers = Processing::WorkersBatch.new(jobs_queue)
14
14
  listeners = Connection::ListenersBatch.new(jobs_queue)
@@ -209,8 +209,9 @@ module Karafka
209
209
  end
210
210
 
211
211
  setting :processing do
212
+ setting :jobs_queue_class, default: Processing::JobsQueue
212
213
  # option scheduler [Object] scheduler we will be using
213
- setting :scheduler, default: Processing::Scheduler.new
214
+ setting :scheduler_class, default: Processing::Scheduler
214
215
  # option jobs_builder [Object] jobs builder we want to use
215
216
  setting :jobs_builder, default: Processing::JobsBuilder.new
216
217
  # option coordinator [Class] work coordinator we want to user for processing coordination
@@ -3,5 +3,5 @@
3
3
  # Main module namespace
4
4
  module Karafka
5
5
  # Current Karafka version
6
- VERSION = '2.2.12'
6
+ VERSION = '2.2.13'
7
7
  end
data/lib/karafka.rb CHANGED
@@ -16,7 +16,6 @@
16
16
  singleton
17
17
  digest
18
18
  zeitwerk
19
- concurrent/atomic/atomic_fixnum
20
19
  ].each(&method(:require))
21
20
 
22
21
  # Karafka framework main namespace
data.tar.gz.sig CHANGED
Binary file
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: karafka
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.2.12
4
+ version: 2.2.13
5
5
  platform: ruby
6
6
  authors:
7
7
  - Maciej Mensfeld
@@ -35,7 +35,7 @@ cert_chain:
35
35
  AnG1dJU+yL2BK7vaVytLTstJME5mepSZ46qqIJXMuWob/YPDmVaBF39TDSG9e34s
36
36
  msG3BiCqgOgHAnL23+CN3Rt8MsuRfEtoTKpJVcCfoEoNHOkc
37
37
  -----END CERTIFICATE-----
38
- date: 2023-11-09 00:00:00.000000000 Z
38
+ date: 2023-11-17 00:00:00.000000000 Z
39
39
  dependencies:
40
40
  - !ruby/object:Gem::Dependency
41
41
  name: karafka-core
@@ -246,6 +246,7 @@ files:
246
246
  - lib/karafka/pro/processing/jobs/consume_non_blocking.rb
247
247
  - lib/karafka/pro/processing/jobs/revoked_non_blocking.rb
248
248
  - lib/karafka/pro/processing/jobs_builder.rb
249
+ - lib/karafka/pro/processing/jobs_queue.rb
249
250
  - lib/karafka/pro/processing/partitioner.rb
250
251
  - lib/karafka/pro/processing/scheduler.rb
251
252
  - lib/karafka/pro/processing/strategies.rb
metadata.gz.sig CHANGED
Binary file