RubyGems - karafka - Versions diffs - 2.2.12 → 2.2.13 - Mend

karafka 2.2.12 → 2.2.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

checksums.yaml +4 -4
checksums.yaml.gz.sig +0 -0
data/CHANGELOG.md +7 -0
data/Gemfile.lock +9 -9
data/config/locales/errors.yml +2 -1
data/lib/karafka/connection/consumer_group_coordinator.rb +3 -3
data/lib/karafka/connection/listener.rb +13 -7
data/lib/karafka/connection/listeners_batch.rb +6 -1
data/lib/karafka/contracts/config.rb +2 -1
data/lib/karafka/instrumentation/notifications.rb +1 -0
data/lib/karafka/pro/loader.rb +2 -1
data/lib/karafka/pro/processing/coordinator.rb +12 -6
data/lib/karafka/pro/processing/jobs_queue.rb +109 -0
data/lib/karafka/pro/processing/scheduler.rb +2 -3
data/lib/karafka/pro/processing/strategies/default.rb +2 -0
data/lib/karafka/pro/processing/strategies/lrj/default.rb +9 -0
data/lib/karafka/pro/processing/strategies/vp/default.rb +8 -4
data/lib/karafka/processing/coordinator.rb +13 -7
data/lib/karafka/processing/jobs_queue.rb +28 -11
data/lib/karafka/processing/scheduler.rb +19 -3
data/lib/karafka/processing/strategies/default.rb +2 -0
data/lib/karafka/routing/builder.rb +32 -17
data/lib/karafka/routing/subscription_group.rb +11 -6
data/lib/karafka/runner.rb +1 -1
data/lib/karafka/setup/config.rb +2 -1
data/lib/karafka/version.rb +1 -1
data/lib/karafka.rb +0 -1
data.tar.gz.sig +0 -0
metadata +3 -2
metadata.gz.sig +0 -0

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 2ee648826503a1b1841a97e368ec2894a16eadc3509270d2a5dbbbe9ee703b3a
-  data.tar.gz: 915285e224ab6dcaa4b2f75e2b6aa52f4f6eb0f613b8b06efe78e2ef4ff3f514
+  metadata.gz: 4056d72f0d37ac46c52597ebcfed87de031f9f250d57a64ec5c665d3423a3087
+  data.tar.gz: 95aeab42e351043873d548a5289e8355fe48fa7b7f27aaf1549a220c76eac9c1
 SHA512:
-  metadata.gz: 1f7f109c533a98a46306be62a2172432f0d18af7003e401d3a894aa356bc2cae2622ba4c323bfdd230a66f0ae544a7cfb61ee0168b396e2e809e408a657eecb6
-  data.tar.gz: d10de0ca361236c35bed27ca3c5db13e9e245805412f85c2d8d4e6a140fe088025403be7a65e1d97831613f02032bfe3fb2194c5ec7f6a880bc7ddc67a112813
+  metadata.gz: 8e41da4dff00dc3cb9749874568a275cdad81b7a762182cee7ea497bfe373dd1b3f777dd40638d0c30ff13f50c5913cdcad175edcc8b9b36a3e26fb5658fc986
+  data.tar.gz: 738352dea20404d42a80340c2fc27359d54185565e8069f8245662e02d33c8630ce7922c3938b06b07e5587bd007342c65439229484ed529ae050e356872f150

checksums.yaml.gz.sig CHANGED Viewed

Binary file

data/CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,12 @@
 # Karafka framework changelog
+## 2.2.13 (2023-11-17)
+- **[Feature]** Introduce low-level extended Scheduling API for granular control of schedulers and jobs execution [Pro].
+- [Improvement] Use separate lock for user-facing synchronization.
+- [Improvement] Instrument `consumer.before_enqueue`.
+- [Improvement] Limit usage of `concurrent-ruby` (plan to remove it as a dependency fully)
+- [Improvement] Provide `#synchronize` API same as in VPs for LRJs to allow for lifecycle events and consumption synchronization.
 ## 2.2.12 (2023-11-09)
 - [Improvement] Rewrite the polling engine to update statistics and error callbacks despite longer non LRJ processing or long `max_wait_time` setups. This change provides stability to the statistics and background error emitting making them time-reliable.
 - [Improvement] Auto-update Inline Insights if new insights are present for all consumers and not only LRJ (OSS and Pro).

data/Gemfile.lock CHANGED Viewed

@@ -1,7 +1,7 @@
 PATH
   remote: .
   specs:
-    karafka (2.2.12)
+    karafka (2.2.13)
       karafka-core (>= 2.2.7, < 2.3.0)
       waterdrop (>= 2.6.11, < 3.0.0)
       zeitwerk (~> 2.3)
@@ -9,10 +9,10 @@ PATH
 GEM
   remote: https://rubygems.org/
   specs:
-    activejob (7.1.1)
-      activesupport (= 7.1.1)
+    activejob (7.1.2)
+      activesupport (= 7.1.2)
       globalid (>= 0.3.6)
-    activesupport (7.1.1)
+    activesupport (7.1.2)
       base64
       bigdecimal
       concurrent-ruby (~> 1.0, >= 1.0.2)
@@ -22,14 +22,14 @@ GEM
       minitest (>= 5.1)
       mutex_m
       tzinfo (~> 2.0)
-    base64 (0.1.1)
+    base64 (0.2.0)
     bigdecimal (3.1.4)
     byebug (11.1.3)
     concurrent-ruby (1.2.2)
     connection_pool (2.4.1)
     diff-lcs (1.5.0)
     docile (1.4.0)
-    drb (2.1.1)
+    drb (2.2.0)
       ruby2_keywords
     erubi (1.12.0)
     factory_bot (6.3.0)
@@ -42,7 +42,7 @@ GEM
     karafka-core (2.2.7)
       concurrent-ruby (>= 1.1)
       karafka-rdkafka (>= 0.13.9, < 0.15.0)
-    karafka-rdkafka (0.13.9)
+    karafka-rdkafka (0.14.0)
       ffi (~> 1.15)
       mini_portile2 (~> 2.6)
       rake (> 12)
@@ -54,10 +54,10 @@ GEM
       tilt (~> 2.0)
     mini_portile2 (2.8.5)
     minitest (5.20.0)
-    mutex_m (0.1.2)
+    mutex_m (0.2.0)
     rack (3.0.8)
     rake (13.1.0)
-    roda (3.73.0)
+    roda (3.74.0)
       rack
     rspec (3.12.0)
       rspec-core (~> 3.12.0)

data/config/locales/errors.yml CHANGED Viewed

@@ -16,7 +16,8 @@ en:
       max_wait_time_format: needs to be an integer bigger than 0
       kafka_format: needs to be a filled hash
       internal.processing.jobs_builder_format: cannot be nil
-      internal.processing.scheduler_format: cannot be nil
+      internal.processing.jobs_queue_class_format: cannot be nil
+      internal.processing.scheduler_class_format: cannot be nil
       internal.processing.coordinator_class_format: cannot be nil
       internal.processing.partitioner_class_format: cannot be nil
       internal.processing.strategy_selector_format: cannot be nil

data/lib/karafka/connection/consumer_group_coordinator.rb CHANGED Viewed

@@ -16,7 +16,7 @@ module Karafka
     class ConsumerGroupCoordinator
       # @param group_size [Integer] number of separate subscription groups in a consumer group
       def initialize(group_size)
-        @shutdown_lock = Mutex.new
+        @shutdown_mutex = Mutex.new
         @group_size = group_size
         @finished = Set.new
       end
@@ -30,12 +30,12 @@ module Karafka
       # @return [Boolean] can we start shutdown on a given listener
       # @note If true, will also obtain a lock so no-one else will be closing the same time we do
       def shutdown?
-        finished? && @shutdown_lock.try_lock
+        finished? && @shutdown_mutex.try_lock
       end
       # Unlocks the shutdown lock
       def unlock
-        @shutdown_lock.unlock if @shutdown_lock.owned?
+        @shutdown_mutex.unlock if @shutdown_mutex.owned?
       end
       # Marks given listener as finished

data/lib/karafka/connection/listener.rb CHANGED Viewed

@@ -23,8 +23,9 @@ module Karafka
       # @param consumer_group_coordinator [Karafka::Connection::ConsumerGroupCoordinator]
       # @param subscription_group [Karafka::Routing::SubscriptionGroup]
       # @param jobs_queue [Karafka::Processing::JobsQueue] queue where we should push work
+      # @param scheduler [Karafka::Processing::Scheduler] scheduler we want to use
       # @return [Karafka::Connection::Listener] listener instance
-      def initialize(consumer_group_coordinator, subscription_group, jobs_queue)
+      def initialize(consumer_group_coordinator, subscription_group, jobs_queue, scheduler)
         proc_config = ::Karafka::App.config.internal.processing
         @id = SecureRandom.hex(6)
@@ -36,8 +37,7 @@ module Karafka
         @executors = Processing::ExecutorsBuffer.new(@client, subscription_group)
         @jobs_builder = proc_config.jobs_builder
         @partitioner = proc_config.partitioner_class.new(subscription_group)
-        # We reference scheduler here as it is much faster than fetching this each time
-        @scheduler = proc_config.scheduler
+        @scheduler = scheduler
         @events_poller = Helpers::IntervalRunner.new { @client.events_poll }
         # We keep one buffer for messages to preserve memory and not allocate extra objects
         # We can do this that way because we always first schedule jobs using messages before we
@@ -243,7 +243,7 @@ module Karafka
           end
         end
-        @scheduler.schedule_revocation(@jobs_queue, jobs)
+        @scheduler.schedule_revocation(jobs)
       end
       # Enqueues the shutdown jobs for all the executors that exist in our subscription group
@@ -256,7 +256,7 @@ module Karafka
           jobs << job
         end
-        @scheduler.schedule_shutdown(@jobs_queue, jobs)
+        @scheduler.schedule_shutdown(jobs)
       end
       # Polls messages within the time and amount boundaries defined in the settings and then
@@ -298,12 +298,15 @@ module Karafka
         jobs.each(&:before_enqueue)
-        @scheduler.schedule_consumption(@jobs_queue, jobs)
+        @scheduler.schedule_consumption(jobs)
       end
       # Waits for all the jobs from a given subscription group to finish before moving forward
       def wait
-        @jobs_queue.wait(@subscription_group.id) { @events_poller.call }
+        @jobs_queue.wait(@subscription_group.id) do
+          @events_poller.call
+          @scheduler.manage
+        end
       end
       # Waits without blocking the polling
@@ -319,6 +322,8 @@ module Karafka
       def wait_pinging(wait_until:, after_ping: -> {})
         until wait_until.call
           @client.ping
+          @scheduler.manage
           after_ping.call
           sleep(0.2)
         end
@@ -334,6 +339,7 @@ module Karafka
         # resetting.
         @jobs_queue.wait(@subscription_group.id)
         @jobs_queue.clear(@subscription_group.id)
+        @scheduler.clear(@subscription_group.id)
         @events_poller.reset
         @client.reset
         @coordinators.reset

data/lib/karafka/connection/listeners_batch.rb CHANGED Viewed

@@ -11,6 +11,10 @@ module Karafka
       # @param jobs_queue [JobsQueue]
       # @return [ListenersBatch]
       def initialize(jobs_queue)
+        # We need one scheduler for all the listeners because in case of complex schedulers, they
+        # should be able to distribute work whenever any work is done in any of the listeners
+        scheduler = App.config.internal.processing.scheduler_class.new(jobs_queue)
         @coordinators = []
         @batch = App.subscription_groups.flat_map do |_consumer_group, subscription_groups|
@@ -24,7 +28,8 @@ module Karafka
             Connection::Listener.new(
               consumer_group_coordinator,
               subscription_group,
-              jobs_queue
+              jobs_queue,
+              scheduler
             )
           end
         end

data/lib/karafka/contracts/config.rb CHANGED Viewed

@@ -73,7 +73,8 @@ module Karafka
         nested(:processing) do
           required(:jobs_builder) { |val| !val.nil? }
-          required(:scheduler) { |val| !val.nil? }
+          required(:jobs_queue_class) { |val| !val.nil? }
+          required(:scheduler_class) { |val| !val.nil? }
           required(:coordinator_class) { |val| !val.nil? }
           required(:partitioner_class) { |val| !val.nil? }
           required(:strategy_selector) { |val| !val.nil? }

data/lib/karafka/instrumentation/notifications.rb CHANGED Viewed

@@ -43,6 +43,7 @@ module Karafka
         rebalance.partitions_revoke
         rebalance.partitions_revoked
+        consumer.before_enqueue
         consumer.consume
         consumer.consumed
         consumer.consuming.pause

data/lib/karafka/pro/loader.rb CHANGED Viewed

@@ -84,7 +84,8 @@ module Karafka
           icfg.processing.coordinator_class = Processing::Coordinator
           icfg.processing.partitioner_class = Processing::Partitioner
-          icfg.processing.scheduler = Processing::Scheduler.new
+          icfg.processing.scheduler_class = Processing::Scheduler
+          icfg.processing.jobs_queue_class = Processing::JobsQueue
           icfg.processing.jobs_builder = Processing::JobsBuilder.new
           icfg.processing.strategy_selector = Processing::StrategySelector.new

data/lib/karafka/pro/processing/coordinator.rb CHANGED Viewed

@@ -21,14 +21,20 @@ module Karafka
         def_delegators :@collapser, :collapsed?, :collapse_until!
-        attr_reader :filter, :virtual_offset_manager
+        attr_reader :filter, :virtual_offset_manager, :shared_mutex
         # @param args [Object] anything the base coordinator accepts
         def initialize(*args)
           super
           @executed = []
-          @flow_lock = Mutex.new
+          @flow_mutex = Mutex.new
+          # Lock for user code synchronization
+          # We do not want to mix coordinator lock with the user lock not to create cases where
+          # user imposed lock would lock the internal operations of Karafka
+          # This shared lock can be used by the end user as it is not used internally by the
+          # framework and can be used for user-facing locking
+          @shared_mutex = Mutex.new
           @collapser = Collapser.new
           @filter = FiltersApplier.new(self)
@@ -89,7 +95,7 @@ module Karafka
         # Runs synchronized code once for a collective of virtual partitions prior to work being
         # enqueued
         def on_enqueued
-          @flow_lock.synchronize do
+          @flow_mutex.synchronize do
             return unless executable?(:on_enqueued)
             yield(@last_message)
@@ -98,7 +104,7 @@ module Karafka
         # Runs given code only once per all the coordinated jobs upon starting first of them
         def on_started
-          @flow_lock.synchronize do
+          @flow_mutex.synchronize do
             return unless executable?(:on_started)
             yield(@last_message)
@@ -109,7 +115,7 @@ module Karafka
         # It runs once per all the coordinated jobs and should be used to run any type of post
         # jobs coordination processing execution
         def on_finished
-          @flow_lock.synchronize do
+          @flow_mutex.synchronize do
             return unless finished?
             return unless executable?(:on_finished)
@@ -119,7 +125,7 @@ module Karafka
         # Runs once after a partition is revoked
         def on_revoked
-          @flow_lock.synchronize do
+          @flow_mutex.synchronize do
             return unless executable?(:on_revoked)
             yield(@last_message)

data/lib/karafka/pro/processing/jobs_queue.rb ADDED Viewed

@@ -0,0 +1,109 @@
+# frozen_string_literal: true
+# This Karafka component is a Pro component under a commercial license.
+# This Karafka component is NOT licensed under LGPL.
+#
+# All of the commercial components are present in the lib/karafka/pro directory of this
+# repository and their usage requires commercial license agreement.
+#
+# Karafka has also commercial-friendly license, commercial support and commercial components.
+#
+# By sending a pull request to the pro components, you are agreeing to transfer the copyright of
+# your code to Maciej Mensfeld.
+module Karafka
+  module Pro
+    module Processing
+      # Enhanced processing queue that provides ability to build complex work-distribution
+      # schedulers dedicated to particular job types
+      #
+      # Aside from the OSS queue capabilities it allows for jobless locking for advanced schedulers
+      class JobsQueue < Karafka::Processing::JobsQueue
+        attr_accessor :in_processing
+        # @return [Karafka::Pro::Processing::JobsQueue]
+        def initialize
+          super
+          @in_waiting = Hash.new { |h, k| h[k] = [] }
+          @statistics[:waiting] = 0
+        end
+        # Method that allows us to lock queue on a given subscription group without enqueuing the a
+        # job. This can be used when building complex schedulers that want to postpone enqueuing
+        # before certain conditions are met.
+        #
+        # @param job [Jobs::Base] job used for locking
+        def lock(job)
+          @mutex.synchronize do
+            group = @in_waiting[job.group_id]
+            # This should never happen. Same job should not be locked twice
+            raise(Errors::JobsQueueSynchronizationError, job.group_id) if group.include?(job)
+            @statistics[:waiting] += 1
+            group << job
+          end
+        end
+        # Method for unlocking the given subscription group queue space that was locked with a
+        # given job that was **not** added to the queue but used via `#lock`.
+        #
+        # @param job [Jobs::Base] job that locked the queue
+        def unlock(job)
+          @mutex.synchronize do
+            @statistics[:waiting] -= 1
+            return if @in_waiting[job.group_id].delete(job)
+            # This should never happen. It means there was a job being unlocked that was never
+            # locked in the first place
+            raise(Errors::JobsQueueSynchronizationError, job.group_id)
+          end
+        end
+        # Clears the processing states for a provided group. Useful when a recovery happens and we
+        # need to clean up state but only for a given subscription group.
+        #
+        # @param group_id [String]
+        def clear(group_id)
+          @mutex.synchronize do
+            @in_processing[group_id].clear
+            @statistics[:waiting] -= @in_waiting[group_id].size
+            @in_waiting[group_id].clear
+            # We unlock it just in case it was blocked when clearing started
+            tick(group_id)
+          end
+        end
+        # @param group_id [String]
+        #
+        # @return [Boolean] tell us if we have anything in the processing (or for processing) from
+        # a given group.
+        def empty?(group_id)
+          @mutex.synchronize do
+            @in_processing[group_id].empty? &&
+              @in_waiting[group_id].empty?
+          end
+        end
+        private
+        # @param group_id [String] id of the group in which jobs we're interested.
+        # @return [Boolean] should we keep waiting or not
+        # @note We do not wait for non-blocking jobs. Their flow should allow for `poll` running
+        #   as they may exceed `max.poll.interval`
+        def wait?(group_id)
+          !(
+            @in_processing[group_id].all?(&:non_blocking?) &&
+            @in_waiting[group_id].all?(&:non_blocking?)
+          )
+        end
+      end
+    end
+  end
+end

data/lib/karafka/pro/processing/scheduler.rb CHANGED Viewed

@@ -27,10 +27,9 @@ module Karafka
       class Scheduler < ::Karafka::Processing::Scheduler
         # Schedules jobs in the LJF order for consumption
         #
-        # @param queue [Karafka::Processing::JobsQueue] queue where we want to put the jobs
         # @param jobs_array [Array<Karafka::Processing::Jobs::Base>] jobs we want to schedule
         #
-        def schedule_consumption(queue, jobs_array)
+        def schedule_consumption(jobs_array)
           perf_tracker = PerformanceTracker.instance
           ordered = []
@@ -47,7 +46,7 @@ module Karafka
           ordered.map!(&:first)
           ordered.each do |job|
-            queue << job
+            @queue << job
           end
         end

data/lib/karafka/pro/processing/strategies/default.rb CHANGED Viewed

@@ -29,6 +29,8 @@ module Karafka
           # No actions needed for the standard flow here
           def handle_before_enqueue
+            Karafka.monitor.instrument('consumer.before_enqueue', caller: self)
             nil
           end

data/lib/karafka/pro/processing/strategies/lrj/default.rb CHANGED Viewed

@@ -77,6 +77,15 @@ module Karafka
                 revoked
               end
             end
+            # Allows for LRJ to synchronize its work. It may be needed because LRJ can run
+            # lifecycle events like revocation while the LRJ work is running and there may be a
+            # need for a critical section.
+            #
+            # @param block [Proc] block we want to run in a mutex to prevent race-conditions
+            def synchronize(&block)
+              coordinator.shared_mutex.synchronize(&block)
+            end
           end
         end
       end

data/lib/karafka/pro/processing/strategies/vp/default.rb CHANGED Viewed

@@ -94,13 +94,15 @@ module Karafka
             # Allows for cross-virtual-partition consumers locks
             #
-            # This is not needed in the non-VP flows because there is always only one consumer
-            # per partition at the same time, so no coordination is needed directly for the
-            # end users
+            # This is not needed in the non-VP flows except LRJ because there is always only one
+            # consumer per partition at the same time, so no coordination is needed directly for
+            # the end users. With LRJ it is needed and provided in the `LRJ::Default` strategy,
+            # because lifecycle events on revocation can run in parallel to the LRJ job as it is
+            # non-blocking.
             #
             # @param block [Proc] block we want to run in a mutex to prevent race-conditions
             def synchronize(&block)
-              coordinator.synchronize(&block)
+              coordinator.shared_mutex.synchronize(&block)
             end
             private
@@ -111,6 +113,8 @@ module Karafka
             # @note This can be done without the mutex, because it happens from the same thread
             #   for all the work (listener thread)
             def handle_before_enqueue
+              super
               coordinator.virtual_offset_manager.register(
                 messages.map(&:offset)
               )

data/lib/karafka/processing/coordinator.rb CHANGED Viewed

@@ -162,11 +162,24 @@ module Karafka
         @manual_seek
       end
+      # @param consumer [Object] karafka consumer (normal or pro)
+      # @return [Karafka::Processing::Result] result object which we can use to indicate
+      #   consumption processing state.
+      def consumption(consumer)
+        @consumptions[consumer] ||= Processing::Result.new
+      end
       # Allows to run synchronized (locked) code that can operate only from a given thread
       #
       # @param block [Proc] code we want to run in the synchronized mode
+      #
       # @note We check if mutex is not owned already by the current thread so we won't end up with
       #   a deadlock in case user runs coordinated code from inside of his own lock
+      #
+      # @note This is internal and should **not** be used to synchronize user-facing code.
+      #   Otherwise user indirectly could cause deadlocks or prolonged locks by running his logic.
+      #   This can and should however be used for multi-thread strategy applications and other
+      #   internal operations locks.
       def synchronize(&block)
         if @mutex.owned?
           yield
@@ -174,13 +187,6 @@ module Karafka
           @mutex.synchronize(&block)
         end
       end
-      # @param consumer [Object] karafka consumer (normal or pro)
-      # @return [Karafka::Processing::Result] result object which we can use to indicate
-      #   consumption processing state.
-      def consumption(consumer)
-        @consumptions[consumer] ||= Processing::Result.new
-      end
     end
   end
 end

data/lib/karafka/processing/jobs_queue.rb CHANGED Viewed

@@ -9,6 +9,9 @@ module Karafka
     # on this queue, that's why internally we keep track of processing per group.
     #
     # We work with the assumption, that partitions data is evenly distributed.
+    #
+    # @note This job queue also keeps track / understands number of busy workers. This is because
+    #   we use a single workers poll that can have granular scheduling.
     class JobsQueue
       # @return [Karafka::Processing::JobsQueue]
       def initialize
@@ -26,19 +29,14 @@ module Karafka
           h.compute_if_absent(k) { RUBY_VERSION < '3.2' ? TimedQueue.new : Queue.new }
         end
+        @concurrency = Karafka::App.config.concurrency
         @tick_interval = ::Karafka::App.config.internal.tick_interval
         @in_processing = Hash.new { |h, k| h[k] = [] }
+        @statistics = { busy: 0, enqueued: 0 }
         @mutex = Mutex.new
       end
-      # Returns number of jobs that are either enqueued or in processing (but not finished)
-      # @return [Integer] number of elements in the queue
-      # @note Using `#pop` won't decrease this number as only marking job as completed does this
-      def size
-        @in_processing.values.map(&:size).sum
-      end
       # Adds the job to the internal main queue, scheduling it for execution in a worker and marks
       # this job as in processing pipeline.
       #
@@ -55,6 +53,16 @@ module Karafka
           group << job
+          # Assume that moving to queue means being picked up immediately not to create stats
+          # race conditions because of pop overhead. If there are workers available, we assume
+          # work is going to be handled as we never reject enqueued jobs
+          if @statistics[:busy] < @concurrency
+            @statistics[:busy] += 1
+          else
+            # If system is fully loaded, it means this job is indeed enqueued
+            @statistics[:enqueued] += 1
+          end
           @queue << job
         end
       end
@@ -80,7 +88,16 @@ module Karafka
       # @param [Jobs::Base] job that was completed
       def complete(job)
         @mutex.synchronize do
+          # We finish one job and if there is another, we pick it up
+          if @statistics[:enqueued].positive?
+            @statistics[:enqueued] -= 1
+          # If no more enqueued jobs, we will be just less busy
+          else
+            @statistics[:busy] -= 1
+          end
           @in_processing[job.group_id].delete(job)
           tick(job.group_id)
         end
       end
@@ -141,10 +158,10 @@ module Karafka
       #
       # @return [Hash] hash with basic usage statistics of this queue.
       def statistics
-        {
-          busy: size - @queue.size,
-          enqueued: @queue.size
-        }.freeze
+        # Ensures there are no race conditions when returning this data
+        @mutex.synchronize do
+          @statistics.dup.freeze
+        end
       end
       private

data/lib/karafka/processing/scheduler.rb CHANGED Viewed

@@ -4,19 +4,35 @@ module Karafka
   module Processing
     # FIFO scheduler for messages coming from various topics and partitions
     class Scheduler
+      # @param queue [Karafka::Processing::JobsQueue] queue where we want to put the jobs
+      def initialize(queue)
+        @queue = queue
+      end
       # Schedules jobs in the fifo order
       #
-      # @param queue [Karafka::Processing::JobsQueue] queue where we want to put the jobs
       # @param jobs_array [Array<Karafka::Processing::Jobs::Base>] jobs we want to schedule
-      def schedule_consumption(queue, jobs_array)
+      def schedule_consumption(jobs_array)
         jobs_array.each do |job|
-          queue << job
+          @queue << job
         end
       end
       # Both revocation and shutdown jobs can also run in fifo by default
       alias schedule_revocation schedule_consumption
       alias schedule_shutdown schedule_consumption
+      # This scheduler does not have anything to manage as it is a pass through and has no state
+      def manage
+        nil
+      end
+      # This scheduler does not need to be cleared because it is stateless
+      #
+      # @param _group_id [String] Subscription group id
+      def clear(_group_id)
+        nil
+      end
     end
   end
 end

data/lib/karafka/processing/strategies/default.rb CHANGED Viewed

@@ -78,6 +78,8 @@ module Karafka
         # No actions needed for the standard flow here
         def handle_before_enqueue
+          Karafka.monitor.instrument('consumer.before_enqueue', caller: self)
           nil
         end

data/lib/karafka/routing/builder.rb CHANGED Viewed

@@ -3,20 +3,25 @@
 module Karafka
   module Routing
     # Builder used as a DSL layer for building consumers and telling them which topics to consume
+    #
+    # @note We lock the access just in case this is used in patterns. The locks here do not have
+    #   any impact on routing usage unless being expanded, so no race conditions risks.
+    #
     # @example Build a simple (most common) route
     #   consumers do
     #     topic :new_videos do
     #       consumer NewVideosConsumer
     #     end
     #   end
-    class Builder < Concurrent::Array
+    class Builder < Array
       # Empty default per-topic config
       EMPTY_DEFAULTS = ->(_) {}.freeze
       private_constant :EMPTY_DEFAULTS
       def initialize
-        @draws = Concurrent::Array.new
+        @mutex = Mutex.new
+        @draws = []
         @defaults = EMPTY_DEFAULTS
         super
       end
@@ -34,21 +39,23 @@ module Karafka
       #     end
       #   end
       def draw(&block)
-        @draws << block
+        @mutex.synchronize do
+          @draws << block
-        instance_eval(&block)
+          instance_eval(&block)
-        each do |consumer_group|
-          # Validate consumer group settings
-          Contracts::ConsumerGroup.new.validate!(consumer_group.to_h)
+          each do |consumer_group|
+            # Validate consumer group settings
+            Contracts::ConsumerGroup.new.validate!(consumer_group.to_h)
-          # and then its topics settings
-          consumer_group.topics.each do |topic|
-            Contracts::Topic.new.validate!(topic.to_h)
-          end
+            # and then its topics settings
+            consumer_group.topics.each do |topic|
+              Contracts::Topic.new.validate!(topic.to_h)
+            end
-          # Initialize subscription groups after all the routing is done
-          consumer_group.subscription_groups
+            # Initialize subscription groups after all the routing is done
+            consumer_group.subscription_groups
+          end
         end
       end
@@ -61,9 +68,11 @@ module Karafka
       # Clears the builder and the draws memory
       def clear
-        @defaults = EMPTY_DEFAULTS
-        @draws.clear
-        super
+        @mutex.synchronize do
+          @defaults = EMPTY_DEFAULTS
+          @draws.clear
+          super
+        end
       end
       # @param block [Proc] block with per-topic evaluated defaults
@@ -71,7 +80,13 @@ module Karafka
       def defaults(&block)
         return @defaults unless block
-        @defaults = block
+        if @mutex.owned?
+          @defaults = block
+        else
+          @mutex.synchronize do
+            @defaults = block
+          end
+        end
       end
       private

data/lib/karafka/routing/subscription_group.rb CHANGED Viewed

@@ -10,19 +10,24 @@ module Karafka
     class SubscriptionGroup
       attr_reader :id, :name, :topics, :kafka, :consumer_group
-      # Numeric for counting groups
-      GROUP_COUNT = Concurrent::AtomicFixnum.new
+      # Lock for generating new ids safely
+      ID_MUTEX = Mutex.new
-      private_constant :GROUP_COUNT
+      private_constant :ID_MUTEX
       class << self
         # Generates new subscription group id that will be used in case of anonymous subscription
         #   groups
         # @return [String] hex(6) compatible reproducible id
         def id
-          ::Digest::MD5.hexdigest(
-            GROUP_COUNT.increment.to_s
-          )[0..11]
+          ID_MUTEX.synchronize do
+            @group_counter ||= 0
+            @group_counter += 1
+            ::Digest::MD5.hexdigest(
+              @group_counter.to_s
+            )[0..11]
+          end
         end
       end

data/lib/karafka/runner.rb CHANGED Viewed

@@ -8,7 +8,7 @@ module Karafka
     def call
       # Despite possibility of having several independent listeners, we aim to have one queue for
       # jobs across and one workers poll for that
-      jobs_queue = Processing::JobsQueue.new
+      jobs_queue = App.config.internal.processing.jobs_queue_class.new
       workers = Processing::WorkersBatch.new(jobs_queue)
       listeners = Connection::ListenersBatch.new(jobs_queue)

data/lib/karafka/setup/config.rb CHANGED Viewed

@@ -209,8 +209,9 @@ module Karafka
         end
         setting :processing do
+          setting :jobs_queue_class, default: Processing::JobsQueue
           # option scheduler [Object] scheduler we will be using
-          setting :scheduler, default: Processing::Scheduler.new
+          setting :scheduler_class, default: Processing::Scheduler
           # option jobs_builder [Object] jobs builder we want to use
           setting :jobs_builder, default: Processing::JobsBuilder.new
           # option coordinator [Class] work coordinator we want to user for processing coordination

data/lib/karafka/version.rb CHANGED Viewed

@@ -3,5 +3,5 @@
 # Main module namespace
 module Karafka
   # Current Karafka version
-  VERSION = '2.2.12'
+  VERSION = '2.2.13'
 end

data/lib/karafka.rb CHANGED Viewed

@@ -16,7 +16,6 @@
   singleton
   digest
   zeitwerk
-  concurrent/atomic/atomic_fixnum
 ].each(&method(:require))
 # Karafka framework main namespace

data.tar.gz.sig CHANGED Viewed

Binary file

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: karafka
 version: !ruby/object:Gem::Version
-  version: 2.2.12
+  version: 2.2.13
 platform: ruby
 authors:
 - Maciej Mensfeld
@@ -35,7 +35,7 @@ cert_chain:
   AnG1dJU+yL2BK7vaVytLTstJME5mepSZ46qqIJXMuWob/YPDmVaBF39TDSG9e34s
   msG3BiCqgOgHAnL23+CN3Rt8MsuRfEtoTKpJVcCfoEoNHOkc
   -----END CERTIFICATE-----
-date: 2023-11-09 00:00:00.000000000 Z
+date: 2023-11-17 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: karafka-core
@@ -246,6 +246,7 @@ files:
 - lib/karafka/pro/processing/jobs/consume_non_blocking.rb
 - lib/karafka/pro/processing/jobs/revoked_non_blocking.rb
 - lib/karafka/pro/processing/jobs_builder.rb
+- lib/karafka/pro/processing/jobs_queue.rb
 - lib/karafka/pro/processing/partitioner.rb
 - lib/karafka/pro/processing/scheduler.rb
 - lib/karafka/pro/processing/strategies.rb

metadata.gz.sig CHANGED Viewed

Binary file