RubyGems - karafka - Versions diffs - 2.2.12 → 2.2.13 - Mend

karafka 2.2.12 → 2.2.13

Files changed (30) hide show

checksums.yaml +4 -4
checksums.yaml.gz.sig +0 -0
data/CHANGELOG.md +7 -0
data/Gemfile.lock +9 -9
data/config/locales/errors.yml +2 -1
data/lib/karafka/connection/consumer_group_coordinator.rb +3 -3
data/lib/karafka/connection/listener.rb +13 -7
data/lib/karafka/connection/listeners_batch.rb +6 -1
data/lib/karafka/contracts/config.rb +2 -1
data/lib/karafka/instrumentation/notifications.rb +1 -0
data/lib/karafka/pro/loader.rb +2 -1
data/lib/karafka/pro/processing/coordinator.rb +12 -6
data/lib/karafka/pro/processing/jobs_queue.rb +109 -0
data/lib/karafka/pro/processing/scheduler.rb +2 -3
data/lib/karafka/pro/processing/strategies/default.rb +2 -0
data/lib/karafka/pro/processing/strategies/lrj/default.rb +9 -0
data/lib/karafka/pro/processing/strategies/vp/default.rb +8 -4
data/lib/karafka/processing/coordinator.rb +13 -7
data/lib/karafka/processing/jobs_queue.rb +28 -11
data/lib/karafka/processing/scheduler.rb +19 -3
data/lib/karafka/processing/strategies/default.rb +2 -0
data/lib/karafka/routing/builder.rb +32 -17
data/lib/karafka/routing/subscription_group.rb +11 -6
data/lib/karafka/runner.rb +1 -1
data/lib/karafka/setup/config.rb +2 -1
data/lib/karafka/version.rb +1 -1
data/lib/karafka.rb +0 -1
data.tar.gz.sig +0 -0
metadata +3 -2
metadata.gz.sig +0 -0

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 2ee648826503a1b1841a97e368ec2894a16eadc3509270d2a5dbbbe9ee703b3a
-  data.tar.gz: 915285e224ab6dcaa4b2f75e2b6aa52f4f6eb0f613b8b06efe78e2ef4ff3f514
+  metadata.gz: 4056d72f0d37ac46c52597ebcfed87de031f9f250d57a64ec5c665d3423a3087
+  data.tar.gz: 95aeab42e351043873d548a5289e8355fe48fa7b7f27aaf1549a220c76eac9c1
 SHA512:
-  metadata.gz: 1f7f109c533a98a46306be62a2172432f0d18af7003e401d3a894aa356bc2cae2622ba4c323bfdd230a66f0ae544a7cfb61ee0168b396e2e809e408a657eecb6
-  data.tar.gz: d10de0ca361236c35bed27ca3c5db13e9e245805412f85c2d8d4e6a140fe088025403be7a65e1d97831613f02032bfe3fb2194c5ec7f6a880bc7ddc67a112813
+  metadata.gz: 8e41da4dff00dc3cb9749874568a275cdad81b7a762182cee7ea497bfe373dd1b3f777dd40638d0c30ff13f50c5913cdcad175edcc8b9b36a3e26fb5658fc986
+  data.tar.gz: 738352dea20404d42a80340c2fc27359d54185565e8069f8245662e02d33c8630ce7922c3938b06b07e5587bd007342c65439229484ed529ae050e356872f150

checksums.yaml.gz.sig CHANGED Viewed

Binary file

data/CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,12 @@
 # Karafka framework changelog
+## 2.2.13 (2023-11-17)
+- **[Feature]** Introduce low-level extended Scheduling API for granular control of schedulers and jobs execution [Pro].
+- [Improvement] Use separate lock for user-facing synchronization.
+- [Improvement] Instrument `consumer.before_enqueue`.
+- [Improvement] Limit usage of `concurrent-ruby` (plan to remove it as a dependency fully)
+- [Improvement] Provide `#synchronize` API same as in VPs for LRJs to allow for lifecycle events and consumption synchronization.
 ## 2.2.12 (2023-11-09)
 - [Improvement] Rewrite the polling engine to update statistics and error callbacks despite longer non LRJ processing or long `max_wait_time` setups. This change provides stability to the statistics and background error emitting making them time-reliable.
 - [Improvement] Auto-update Inline Insights if new insights are present for all consumers and not only LRJ (OSS and Pro).

data/Gemfile.lock CHANGED Viewed

@@ -1,7 +1,7 @@
 PATH
   remote: .
   specs:
-    karafka (2.2.12)
+    karafka (2.2.13)
       karafka-core (>= 2.2.7, < 2.3.0)
       waterdrop (>= 2.6.11, < 3.0.0)
       zeitwerk (~> 2.3)
@@ -9,10 +9,10 @@ PATH
 GEM
   remote: https://rubygems.org/
   specs:
-    activejob (7.1.1)
-      activesupport (= 7.1.1)
+    activejob (7.1.2)
+      activesupport (= 7.1.2)
       globalid (>= 0.3.6)
-    activesupport (7.1.1)
+    activesupport (7.1.2)
       base64
       bigdecimal
       concurrent-ruby (~> 1.0, >= 1.0.2)
@@ -22,14 +22,14 @@ GEM
       minitest (>= 5.1)
       mutex_m
       tzinfo (~> 2.0)
-    base64 (0.1.1)
+    base64 (0.2.0)
     bigdecimal (3.1.4)
     byebug (11.1.3)
     concurrent-ruby (1.2.2)
     connection_pool (2.4.1)
     diff-lcs (1.5.0)
     docile (1.4.0)
-    drb (2.1.1)
+    drb (2.2.0)
       ruby2_keywords
     erubi (1.12.0)
     factory_bot (6.3.0)
@@ -42,7 +42,7 @@ GEM
     karafka-core (2.2.7)
       concurrent-ruby (>= 1.1)
       karafka-rdkafka (>= 0.13.9, < 0.15.0)
-    karafka-rdkafka (0.13.9)
+    karafka-rdkafka (0.14.0)
       ffi (~> 1.15)
       mini_portile2 (~> 2.6)
       rake (> 12)
@@ -54,10 +54,10 @@ GEM
       tilt (~> 2.0)
     mini_portile2 (2.8.5)
     minitest (5.20.0)
-    mutex_m (0.1.2)
+    mutex_m (0.2.0)
     rack (3.0.8)
     rake (13.1.0)
-    roda (3.73.0)
+    roda (3.74.0)
       rack
     rspec (3.12.0)
       rspec-core (~> 3.12.0)

data/config/locales/errors.yml CHANGED Viewed

@@ -16,7 +16,8 @@ en:
       max_wait_time_format: needs to be an integer bigger than 0
       kafka_format: needs to be a filled hash
       internal.processing.jobs_builder_format: cannot be nil
-      internal.processing.scheduler_format: cannot be nil
+      internal.processing.jobs_queue_class_format: cannot be nil
+      internal.processing.scheduler_class_format: cannot be nil
       internal.processing.coordinator_class_format: cannot be nil
       internal.processing.partitioner_class_format: cannot be nil
       internal.processing.strategy_selector_format: cannot be nil

data/lib/karafka/connection/consumer_group_coordinator.rb CHANGED Viewed

@@ -16,7 +16,7 @@ module Karafka
     class ConsumerGroupCoordinator
       # @param group_size [Integer] number of separate subscription groups in a consumer group
       def initialize(group_size)
-        @shutdown_lock = Mutex.new
+        @shutdown_mutex = Mutex.new
         @group_size = group_size
         @finished = Set.new
       end
@@ -30,12 +30,12 @@ module Karafka
       # @return [Boolean] can we start shutdown on a given listener
       # @note If true, will also obtain a lock so no-one else will be closing the same time we do
       def shutdown?
-        finished? && @shutdown_lock.try_lock
+        finished? && @shutdown_mutex.try_lock
       end
       # Unlocks the shutdown lock
       def unlock
-        @shutdown_lock.unlock if @shutdown_lock.owned?
+        @shutdown_mutex.unlock if @shutdown_mutex.owned?
       end
       # Marks given listener as finished

data/lib/karafka/connection/listener.rb CHANGED Viewed

@@ -23,8 +23,9 @@ module Karafka
       # @param consumer_group_coordinator [Karafka::Connection::ConsumerGroupCoordinator]
       # @param subscription_group [Karafka::Routing::SubscriptionGroup]
       # @param jobs_queue [Karafka::Processing::JobsQueue] queue where we should push work
+      # @param scheduler [Karafka::Processing::Scheduler] scheduler we want to use
       # @return [Karafka::Connection::Listener] listener instance
-      def initialize(consumer_group_coordinator, subscription_group, jobs_queue)
+      def initialize(consumer_group_coordinator, subscription_group, jobs_queue, scheduler)
         proc_config = ::Karafka::App.config.internal.processing
         @id = SecureRandom.hex(6)
@@ -36,8 +37,7 @@ module Karafka
         @executors = Processing::ExecutorsBuffer.new(@client, subscription_group)
         @jobs_builder = proc_config.jobs_builder
         @partitioner = proc_config.partitioner_class.new(subscription_group)
-        # We reference scheduler here as it is much faster than fetching this each time
-        @scheduler = proc_config.scheduler
+        @scheduler = scheduler
         @events_poller = Helpers::IntervalRunner.new { @client.events_poll }
         # We keep one buffer for messages to preserve memory and not allocate extra objects
         # We can do this that way because we always first schedule jobs using messages before we
@@ -243,7 +243,7 @@ module Karafka
           end
         end
-        @scheduler.schedule_revocation(@jobs_queue, jobs)
+        @scheduler.schedule_revocation(jobs)
       end
       # Enqueues the shutdown jobs for all the executors that exist in our subscription group
@@ -256,7 +256,7 @@ module Karafka
           jobs << job
         end
-        @scheduler.schedule_shutdown(@jobs_queue, jobs)
+        @scheduler.schedule_shutdown(jobs)
       end
       # Polls messages within the time and amount boundaries defined in the settings and then
@@ -298,12 +298,15 @@ module Karafka
         jobs.each(&:before_enqueue)
-        @scheduler.schedule_consumption(@jobs_queue, jobs)
+        @scheduler.schedule_consumption(jobs)
       end
       # Waits for all the jobs from a given subscription group to finish before moving forward
       def wait
-        @jobs_queue.wait(@subscription_group.id) { @events_poller.call }
+        @jobs_queue.wait(@subscription_group.id) do
+          @events_poller.call
+          @scheduler.manage
+        end
       end
       # Waits without blocking the polling
@@ -319,6 +322,8 @@ module Karafka
       def wait_pinging(wait_until:, after_ping: -> {})
         until wait_until.call
           @client.ping
+          @scheduler.manage
           after_ping.call
           sleep(0.2)
         end
@@ -334,6 +339,7 @@ module Karafka
         # resetting.
         @jobs_queue.wait(@subscription_group.id)
         @jobs_queue.clear(@subscription_group.id)
+        @scheduler.clear(@subscription_group.id)
         @events_poller.reset
         @client.reset
         @coordinators.reset

data/lib/karafka/connection/listeners_batch.rb CHANGED Viewed

@@ -11,6 +11,10 @@ module Karafka
       # @param jobs_queue [JobsQueue]
       # @return [ListenersBatch]
       def initialize(jobs_queue)
+        # We need one scheduler for all the listeners because in case of complex schedulers, they
+        # should be able to distribute work whenever any work is done in any of the listeners
+        scheduler = App.config.internal.processing.scheduler_class.new(jobs_queue)
         @coordinators = []
         @batch = App.subscription_groups.flat_map do |_consumer_group, subscription_groups|
@@ -24,7 +28,8 @@ module Karafka
             Connection::Listener.new(
               consumer_group_coordinator,
               subscription_group,
-              jobs_queue
+              jobs_queue,
+              scheduler
             )
           end
         end

data/lib/karafka/contracts/config.rb CHANGED Viewed

@@ -73,7 +73,8 @@ module Karafka
         nested(:processing) do
           required(:jobs_builder) { |val| !val.nil? }
-          required(:scheduler) { |val| !val.nil? }
+          required(:jobs_queue_class) { |val| !val.nil? }
+          required(:scheduler_class) { |val| !val.nil? }
           required(:coordinator_class) { |val| !val.nil? }
           required(:partitioner_class) { |val| !val.nil? }
           required(:strategy_selector) { |val| !val.nil? }

data/lib/karafka/instrumentation/notifications.rb CHANGED Viewed

@@ -43,6 +43,7 @@ module Karafka
         rebalance.partitions_revoke
         rebalance.partitions_revoked
+        consumer.before_enqueue
         consumer.consume
         consumer.consumed
         consumer.consuming.pause

data/lib/karafka/pro/loader.rb CHANGED Viewed

@@ -84,7 +84,8 @@ module Karafka
           icfg.processing.coordinator_class = Processing::Coordinator
           icfg.processing.partitioner_class = Processing::Partitioner
-          icfg.processing.scheduler = Processing::Scheduler.new
+          icfg.processing.scheduler_class = Processing::Scheduler
+          icfg.processing.jobs_queue_class = Processing::JobsQueue
           icfg.processing.jobs_builder = Processing::JobsBuilder.new
           icfg.processing.strategy_selector = Processing::StrategySelector.new

data/lib/karafka/pro/processing/coordinator.rb CHANGED Viewed

@@ -21,14 +21,20 @@ module Karafka
         def_delegators :@collapser, :collapsed?, :collapse_until!
-        attr_reader :filter, :virtual_offset_manager
+        attr_reader :filter, :virtual_offset_manager, :shared_mutex
         # @param args [Object] anything the base coordinator accepts
         def initialize(*args)
           super
           @executed = []
-          @flow_lock = Mutex.new
+          @flow_mutex = Mutex.new
+          # Lock for user code synchronization
+          # We do not want to mix coordinator lock with the user lock not to create cases where
+          # user imposed lock would lock the internal operations of Karafka
+          # This shared lock can be used by the end user as it is not used internally by the
+          # framework and can be used for user-facing locking
+          @shared_mutex = Mutex.new
           @collapser = Collapser.new
           @filter = FiltersApplier.new(self)
@@ -89,7 +95,7 @@ module Karafka
         # Runs synchronized code once for a collective of virtual partitions prior to work being
         # enqueued
         def on_enqueued
-          @flow_lock.synchronize do
+          @flow_mutex.synchronize do
             return unless executable?(:on_enqueued)
             yield(@last_message)
@@ -98,7 +104,7 @@ module Karafka
         # Runs given code only once per all the coordinated jobs upon starting first of them
         def on_started
-          @flow_lock.synchronize do
+          @flow_mutex.synchronize do
             return unless executable?(:on_started)
             yield(@last_message)
@@ -109,7 +115,7 @@ module Karafka
         # It runs once per all the coordinated jobs and should be used to run any type of post
         # jobs coordination processing execution
         def on_finished
-          @flow_lock.synchronize do
+          @flow_mutex.synchronize do
             return unless finished?
             return unless executable?(:on_finished)
@@ -119,7 +125,7 @@ module Karafka
         # Runs once after a partition is revoked
         def on_revoked
-          @flow_lock.synchronize do
+          @flow_mutex.synchronize do
             return unless executable?(:on_revoked)
             yield(@last_message)

data/lib/karafka/pro/processing/jobs_queue.rb ADDED Viewed

@@ -0,0 +1,109 @@
+# frozen_string_literal: true
+# This Karafka component is a Pro component under a commercial license.
+# This Karafka component is NOT licensed under LGPL.
+#
+# All of the commercial components are present in the lib/karafka/pro directory of this
+# repository and their usage requires commercial license agreement.
+#
+# Karafka has also commercial-friendly license, commercial support and commercial components.
+#
+# By sending a pull request to the pro components, you are agreeing to transfer the copyright of
+# your code to Maciej Mensfeld.
+module Karafka
+  module Pro
+    module Processing
+      # Enhanced processing queue that provides ability to build complex work-distribution
+      # schedulers dedicated to particular job types
+      #
+      # Aside from the OSS queue capabilities it allows for jobless locking for advanced schedulers
+      class JobsQueue < Karafka::Processing::JobsQueue
+        attr_accessor :in_processing
+        # @return [Karafka::Pro::Processing::JobsQueue]
+        def initialize
+          super
+          @in_waiting = Hash.new { |h, k| h[k] = [] }
+          @statistics[:waiting] = 0
+        end
+        # Method that allows us to lock queue on a given subscription group without enqueuing the a
+        # job. This can be used when building complex schedulers that want to postpone enqueuing
+        # before certain conditions are met.
+        #
+        # @param job [Jobs::Base] job used for locking
+        def lock(job)
+          @mutex.synchronize do
+            group = @in_waiting[job.group_id]
+            # This should never happen. Same job should not be locked twice
+            raise(Errors::JobsQueueSynchronizationError, job.group_id) if group.include?(job)
+            @statistics[:waiting] += 1
+            group << job
+          end
+        end
+        # Method for unlocking the given subscription group queue space that was locked with a
+        # given job that was **not** added to the queue but used via `#lock`.
+        #
+        # @param job [Jobs::Base] job that locked the queue
+        def unlock(job)
+          @mutex.synchronize do
+            @statistics[:waiting] -= 1
+            return if @in_waiting[job.group_id].delete(job)
+            # This should never happen. It means there was a job being unlocked that was never
+            # locked in the first place
+            raise(Errors::JobsQueueSynchronizationError, job.group_id)
+          end
+        end
+        # Clears the processing states for a provided group. Useful when a recovery happens and we
+        # need to clean up state but only for a given subscription group.
+        #
+        # @param group_id [String]
+        def clear(group_id)
+          @mutex.synchronize do
+            @in_processing[group_id].clear
+            @statistics[:waiting] -= @in_waiting[group_id].size
+            @in_waiting[group_id].clear
+            # We unlock it just in case it was blocked when clearing started
+            tick(group_id)
+          end
+        end
+        # @param group_id [String]
+        #
+        # @return [Boolean] tell us if we have anything in the processing (or for processing) from
+        # a given group.
+        def empty?(group_id)
+          @mutex.synchronize do
+            @in_processing[group_id].empty? &&
+              @in_waiting[group_id].empty?
+          end
+        end
+        private
+        # @param group_id [String] id of the group in which jobs we're interested.
+        # @return [Boolean] should we keep waiting or not
+        # @note We do not wait for non-blocking jobs. Their flow should allow for `poll` running
+        #   as they may exceed `max.poll.interval`
+        def wait?(group_id)
+          !(
+            @in_processing[group_id].all?(&:non_blocking?) &&
+            @in_waiting[group_id].all?(&:non_blocking?)
+          )
+        end
+      end
+    end
+  end
+end

data/lib/karafka/pro/processing/scheduler.rb CHANGED Viewed

@@ -27,10 +27,9 @@ module Karafka
       class Scheduler < ::Karafka::Processing::Scheduler
         # Schedules jobs in the LJF order for consumption
         #
-        # @param queue [Karafka::Processing::JobsQueue] queue where we want to put the jobs
         # @param jobs_array [Array<Karafka::Processing::Jobs::Base>] jobs we want to schedule
         #
-        def schedule_consumption(queue, jobs_array)
+        def schedule_consumption(jobs_array)
           perf_tracker = PerformanceTracker.instance
           ordered = []
@@ -47,7 +46,7 @@ module Karafka
           ordered.map!(&:first)
           ordered.each do |job|
-            queue << job
+            @queue << job
           end
         end

data/lib/karafka/pro/processing/strategies/default.rb CHANGED Viewed

@@ -29,6 +29,8 @@ module Karafka
           # No actions needed for the standard flow here
           def handle_before_enqueue
+            Karafka.monitor.instrument('consumer.before_enqueue', caller: self)
             nil
           end

data/lib/karafka/pro/processing/strategies/lrj/default.rb CHANGED Viewed

@@ -77,6 +77,15 @@ module Karafka
                 revoked
               end
             end
+            # Allows for LRJ to synchronize its work. It may be needed because LRJ can run
+            # lifecycle events like revocation while the LRJ work is running and there may be a
+            # need for a critical section.
+            #
+            # @param block [Proc] block we want to run in a mutex to prevent race-conditions
+            def synchronize(&block)
+              coordinator.shared_mutex.synchronize(&block)
+            end
           end
         end
       end

data/lib/karafka/pro/processing/strategies/vp/default.rb CHANGED Viewed

@@ -94,13 +94,15 @@ module Karafka
             # Allows for cross-virtual-partition consumers locks
             #
-            # This is not needed in the non-VP flows because there is always only one consumer
-            # per partition at the same time, so no coordination is needed directly for the
-            # end users
+            # This is not needed in the non-VP flows except LRJ because there is always only one
+            # consumer per partition at the same time, so no coordination is needed directly for
+            # the end users. With LRJ it is needed and provided in the `LRJ::Default` strategy,
+            # because lifecycle events on revocation can run in parallel to the LRJ job as it is
+            # non-blocking.
             #
             # @param block [Proc] block we want to run in a mutex to prevent race-conditions
             def synchronize(&block)
-              coordinator.synchronize(&block)
+              coordinator.shared_mutex.synchronize(&block)
             end
             private
@@ -111,6 +113,8 @@ module Karafka
             # @note This can be done without the mutex, because it happens from the same thread
             #   for all the work (listener thread)
             def handle_before_enqueue
+              super
               coordinator.virtual_offset_manager.register(
                 messages.map(&:offset)
               )

data/lib/karafka/processing/coordinator.rb CHANGED Viewed

@@ -162,11 +162,24 @@ module Karafka
         @manual_seek
       end
+      # @param consumer [Object] karafka consumer (normal or pro)
+      # @return [Karafka::Processing::Result] result object which we can use to indicate
+      #   consumption processing state.
+      def consumption(consumer)
+        @consumptions[consumer] ||= Processing::Result.new
+      end
       # Allows to run synchronized (locked) code that can operate only from a given thread
       #
       # @param block [Proc] code we want to run in the synchronized mode
+      #
       # @note We check if mutex is not owned already by the current thread so we won't end up with
       #   a deadlock in case user runs coordinated code from inside of his own lock
+      #
+      # @note This is internal and should **not** be used to synchronize user-facing code.
+      #   Otherwise user indirectly could cause deadlocks or prolonged locks by running his logic.
+      #   This can and should however be used for multi-thread strategy applications and other
+      #   internal operations locks.
       def synchronize(&block)
         if @mutex.owned?
           yield
@@ -174,13 +187,6 @@ module Karafka
           @mutex.synchronize(&block)
         end
       end
-      # @param consumer [Object] karafka consumer (normal or pro)
-      # @return [Karafka::Processing::Result] result object which we can use to indicate
-      #   consumption processing state.
-      def consumption(consumer)
-        @consumptions[consumer] ||= Processing::Result.new
-      end
     end
   end
 end

data/lib/karafka/processing/jobs_queue.rb CHANGED Viewed

@@ -9,6 +9,9 @@ module Karafka
     # on this queue, that's why internally we keep track of processing per group.
     #
     # We work with the assumption, that partitions data is evenly distributed.
+    #
+    # @note This job queue also keeps track / understands number of busy workers. This is because
+    #   we use a single workers poll that can have granular scheduling.
     class JobsQueue
       # @return [Karafka::Processing::JobsQueue]
       def initialize
@@ -26,19 +29,14 @@ module Karafka
           h.compute_if_absent(k) { RUBY_VERSION < '3.2' ? TimedQueue.new : Queue.new }
         end
+        @concurrency = Karafka::App.config.concurrency
         @tick_interval = ::Karafka::App.config.internal.tick_interval
         @in_processing = Hash.new { |h, k| h[k] = [] }
+        @statistics = { busy: 0, enqueued: 0 }
         @mutex = Mutex.new
       end
-      # Returns number of jobs that are either enqueued or in processing (but not finished)
-      # @return [Integer] number of elements in the queue
-      # @note Using `#pop` won't decrease this number as only marking job as completed does this
-      def size
-        @in_processing.values.map(&:size).sum
-      end
       # Adds the job to the internal main queue, scheduling it for execution in a worker and marks
       # this job as in processing pipeline.
       #
@@ -55,6 +53,16 @@ module Karafka
           group << job
+          # Assume that moving to queue means being picked up immediately not to create stats
+          # race conditions because of pop overhead. If there are workers available, we assume
+          # work is going to be handled as we never reject enqueued jobs
+          if @statistics[:busy] < @concurrency
+            @statistics[:busy] += 1
+          else
+            # If system is fully loaded, it means this job is indeed enqueued
+            @statistics[:enqueued] += 1
+          end
           @queue << job
         end
       end
@@ -80,7 +88,16 @@ module Karafka
       # @param [Jobs::Base] job that was completed
       def complete(job)
         @mutex.synchronize do
+          # We finish one job and if there is another, we pick it up
+          if @statistics[:enqueued].positive?
+            @statistics[:enqueued] -= 1
+          # If no more enqueued jobs, we will be just less busy
+          else
+            @statistics[:busy] -= 1
+          end
           @in_processing[job.group_id].delete(job)
           tick(job.group_id)
         end
       end
@@ -141,10 +158,10 @@ module Karafka
       #
       # @return [Hash] hash with basic usage statistics of this queue.
       def statistics
-        {
-          busy: size - @queue.size,
-          enqueued: @queue.size
-        }.freeze
+        # Ensures there are no race conditions when returning this data
+        @mutex.synchronize do
+          @statistics.dup.freeze
+        end
       end
       private

data/lib/karafka/processing/scheduler.rb CHANGED Viewed

@@ -4,19 +4,35 @@ module Karafka
   module Processing
     # FIFO scheduler for messages coming from various topics and partitions
     class Scheduler
+      # @param queue [Karafka::Processing::JobsQueue] queue where we want to put the jobs
+      def initialize(queue)
+        @queue = queue
+      end
       # Schedules jobs in the fifo order
       #
-      # @param queue [Karafka::Processing::JobsQueue] queue where we want to put the jobs
       # @param jobs_array [Array<Karafka::Processing::Jobs::Base>] jobs we want to schedule
-      def schedule_consumption(queue, jobs_array)
+      def schedule_consumption(jobs_array)
         jobs_array.each do |job|
-          queue << job
+          @queue << job
         end
       end
       # Both revocation and shutdown jobs can also run in fifo by default
       alias schedule_revocation schedule_consumption
       alias schedule_shutdown schedule_consumption
+      # This scheduler does not have anything to manage as it is a pass through and has no state
+      def manage
+        nil
+      end
+      # This scheduler does not need to be cleared because it is stateless
+      #
+      # @param _group_id [String] Subscription group id
+      def clear(_group_id)
+        nil
+      end
     end
   end
 end

data/lib/karafka/processing/strategies/default.rb CHANGED Viewed

@@ -78,6 +78,8 @@ module Karafka
         # No actions needed for the standard flow here
         def handle_before_enqueue
+          Karafka.monitor.instrument('consumer.before_enqueue', caller: self)
           nil
         end

data/lib/karafka/routing/builder.rb CHANGED Viewed

@@ -3,20 +3,25 @@
 module Karafka
   module Routing
     # Builder used as a DSL layer for building consumers and telling them which topics to consume
+    #
+    # @note We lock the access just in case this is used in patterns. The locks here do not have
+    #   any impact on routing usage unless being expanded, so no race conditions risks.
+    #
     # @example Build a simple (most common) route
     #   consumers do
     #     topic :new_videos do
     #       consumer NewVideosConsumer
     #     end
     #   end
-    class Builder < Concurrent::Array
+    class Builder < Array
       # Empty default per-topic config
       EMPTY_DEFAULTS = ->(_) {}.freeze
       private_constant :EMPTY_DEFAULTS
       def initialize
-        @draws = Concurrent::Array.new
+        @mutex = Mutex.new
+        @draws = []
         @defaults = EMPTY_DEFAULTS
         super
       end
@@ -34,21 +39,23 @@ module Karafka
       #     end
       #   end
       def draw(&block)
-        @draws << block
+        @mutex.synchronize do
+          @draws << block
-        instance_eval(&block)
+          instance_eval(&block)
-        each do |consumer_group|
-          # Validate consumer group settings
-          Contracts::ConsumerGroup.new.validate!(consumer_group.to_h)
+          each do |consumer_group|
+            # Validate consumer group settings
+            Contracts::ConsumerGroup.new.validate!(consumer_group.to_h)
-          # and then its topics settings
-          consumer_group.topics.each do |topic|
-            Contracts::Topic.new.validate!(topic.to_h)
-          end
+            # and then its topics settings
+            consumer_group.topics.each do |topic|
+              Contracts::Topic.new.validate!(topic.to_h)
+            end
-          # Initialize subscription groups after all the routing is done
-          consumer_group.subscription_groups
+            # Initialize subscription groups after all the routing is done
+            consumer_group.subscription_groups
+          end
         end
       end
@@ -61,9 +68,11 @@ module Karafka
       # Clears the builder and the draws memory
       def clear
-        @defaults = EMPTY_DEFAULTS
-        @draws.clear
-        super
+        @mutex.synchronize do
+          @defaults = EMPTY_DEFAULTS
+          @draws.clear
+          super
+        end
       end
       # @param block [Proc] block with per-topic evaluated defaults
@@ -71,7 +80,13 @@ module Karafka
       def defaults(&block)
         return @defaults unless block
-        @defaults = block
+        if @mutex.owned?
+          @defaults = block
+        else
+          @mutex.synchronize do
+            @defaults = block
+          end
+        end
       end
       private

data/lib/karafka/routing/subscription_group.rb CHANGED Viewed

@@ -10,19 +10,24 @@ module Karafka
     class SubscriptionGroup
       attr_reader :id, :name, :topics, :kafka, :consumer_group
-      # Numeric for counting groups
-      GROUP_COUNT = Concurrent::AtomicFixnum.new
+      # Lock for generating new ids safely
+      ID_MUTEX = Mutex.new
-      private_constant :GROUP_COUNT
+      private_constant :ID_MUTEX
       class << self
         # Generates new subscription group id that will be used in case of anonymous subscription
         #   groups
         # @return [String] hex(6) compatible reproducible id
         def id
-          ::Digest::MD5.hexdigest(
-            GROUP_COUNT.increment.to_s
-          )[0..11]
+          ID_MUTEX.synchronize do
+            @group_counter ||= 0
+            @group_counter += 1
+            ::Digest::MD5.hexdigest(
+              @group_counter.to_s
+            )[0..11]
+          end
         end
       end

data/lib/karafka/runner.rb CHANGED Viewed

@@ -8,7 +8,7 @@ module Karafka
     def call
       # Despite possibility of having several independent listeners, we aim to have one queue for
       # jobs across and one workers poll for that
-      jobs_queue = Processing::JobsQueue.new
+      jobs_queue = App.config.internal.processing.jobs_queue_class.new
       workers = Processing::WorkersBatch.new(jobs_queue)
       listeners = Connection::ListenersBatch.new(jobs_queue)

data/lib/karafka/setup/config.rb CHANGED Viewed

@@ -209,8 +209,9 @@ module Karafka
         end
         setting :processing do
+          setting :jobs_queue_class, default: Processing::JobsQueue
           # option scheduler [Object] scheduler we will be using
-          setting :scheduler, default: Processing::Scheduler.new
+          setting :scheduler_class, default: Processing::Scheduler
           # option jobs_builder [Object] jobs builder we want to use
           setting :jobs_builder, default: Processing::JobsBuilder.new
           # option coordinator [Class] work coordinator we want to user for processing coordination

data/lib/karafka/version.rb CHANGED Viewed

@@ -3,5 +3,5 @@
 # Main module namespace
 module Karafka
   # Current Karafka version
-  VERSION = '2.2.12'
+  VERSION = '2.2.13'
 end

data/lib/karafka.rb CHANGED Viewed

@@ -16,7 +16,6 @@
   singleton
   digest
   zeitwerk
-  concurrent/atomic/atomic_fixnum
 ].each(&method(:require))
 # Karafka framework main namespace

data.tar.gz.sig CHANGED Viewed

Binary file

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: karafka
 version: !ruby/object:Gem::Version
-  version: 2.2.12
+  version: 2.2.13
 platform: ruby
 authors:
 - Maciej Mensfeld
@@ -35,7 +35,7 @@ cert_chain:
   AnG1dJU+yL2BK7vaVytLTstJME5mepSZ46qqIJXMuWob/YPDmVaBF39TDSG9e34s
   msG3BiCqgOgHAnL23+CN3Rt8MsuRfEtoTKpJVcCfoEoNHOkc
   -----END CERTIFICATE-----
-date: 2023-11-09 00:00:00.000000000 Z
+date: 2023-11-17 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: karafka-core
@@ -246,6 +246,7 @@ files:
 - lib/karafka/pro/processing/jobs/consume_non_blocking.rb
 - lib/karafka/pro/processing/jobs/revoked_non_blocking.rb
 - lib/karafka/pro/processing/jobs_builder.rb
+- lib/karafka/pro/processing/jobs_queue.rb
 - lib/karafka/pro/processing/partitioner.rb
 - lib/karafka/pro/processing/scheduler.rb
 - lib/karafka/pro/processing/strategies.rb

metadata.gz.sig CHANGED Viewed

Binary file