RubyGems - karafka - Versions diffs - 2.0.0.alpha6 → 2.0.0.beta1 - Mend

karafka 2.0.0.alpha6 → 2.0.0.beta1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

checksums.yaml +4 -4
checksums.yaml.gz.sig +0 -0
data/.ruby-version +1 -1
data/CHANGELOG.md +9 -2
data/Gemfile.lock +6 -6
data/docker-compose.yml +1 -0
data/lib/karafka/base_consumer.rb +58 -5
data/lib/karafka/connection/listener.rb +11 -7
data/lib/karafka/connection/messages_buffer.rb +4 -13
data/lib/karafka/connection/pauses_manager.rb +2 -2
data/lib/karafka/contracts/config.rb +1 -0
data/lib/karafka/instrumentation/monitor.rb +1 -0
data/lib/karafka/pro/active_job/dispatcher.rb +9 -9
data/lib/karafka/pro/active_job/job_options_contract.rb +9 -9
data/lib/karafka/pro/loader.rb +13 -8
data/lib/karafka/pro/performance_tracker.rb +80 -0
data/lib/karafka/processing/executor.rb +15 -10
data/lib/karafka/processing/jobs/base.rb +16 -0
data/lib/karafka/processing/jobs/consume.rb +7 -2
data/lib/karafka/processing/jobs_queue.rb +18 -9
data/lib/karafka/processing/worker.rb +23 -0
data/lib/karafka/scheduler.rb +21 -0
data/lib/karafka/setup/config.rb +2 -0
data/lib/karafka/time_trackers/pause.rb +10 -2
data/lib/karafka/version.rb +1 -1
data.tar.gz.sig +0 -0
metadata +5 -3
metadata.gz.sig +0 -0

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 862df94b26c24809f82e07f71c39433b90ef08f68f053a004a87aa466b872dac
-  data.tar.gz: 2c533cbd6c271fe282f59c2030d9cc885555242bb8bc9316d0264a5ccfd694a0
+  metadata.gz: f108cb4288d0ed0510381f51c77d49e052b947f6180c9b9c0b06e0ac2b599894
+  data.tar.gz: 3d79066d0107c08f450ca9f4c3b5c4a39aae497836c80bf8380c65f1406b82c0
 SHA512:
-  metadata.gz: 8ce3720e535d65f121bcbfd957286cbc41404f2aff2751622183367862db55f8231c286d5bbede4a18649eafcd085952a7fc8e7569a74dbd3633c9db906114e5
-  data.tar.gz: e77819ccd2be263b02958fcee71ce9228c93c7dee62fabbea61711fbbdbffa997bb9a85a7f668cf8bb596b7a98a773ace560ad5075c46e70f20cd302b354b0fe
+  metadata.gz: 4aae257010c992c59ce4b01ead54ff2cfd4e8ccd8cbe6b52214b3cedf8f879690e0d577f2b41f44b1ab6888d7e27bbc92f3ba4a69e8b127687fb4c43bff51fbc
+  data.tar.gz: f65e425cb84152d20a055bdb9a94fd98280597cdf5e431337cb8604040534cacbfdd03efd6dc23b86c9ecf25721c860bd55ca75ad3f98e4c66136a88c1efc4e7

checksums.yaml.gz.sig CHANGED Viewed

Binary file

data/.ruby-version CHANGED Viewed

	@@ -1 +1 @@
1	- 3.1.0
1	+ 3.1.2

data/CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,12 @@
 # Karafka framework changelog
+## 2.0.0-beta1 (2022-05-22)
+- Update the jobs queue blocking engine and allow for non-blocking jobs execution
+- Provide `#prepared` hook that always runs before the fetching loop is unblocked
+- [Pro] Introduce performance tracker for scheduling optimizer
+- Provide ability to pause (`#pause`) and resume (`#resume`) given partitions from the consumers
+- Small integration specs refactoring + specs for pausing scenarios
 ## 2.0.0-alpha6 (2022-04-17)
 - Fix a bug, where upon missing boot file and Rails, railtie would fail with a generic exception (#818)
 - Fix an issue with parallel pristine specs colliding with each other during `bundle install` (#820)
@@ -26,12 +33,12 @@
 ## 2.0.0-alpha2 (2022-02-19)
 - Require `kafka` keys to be symbols
-- Added ActiveJob Pro adapter
+- [Pro] Added ActiveJob Pro adapter
 - Small updates to the license and docs
 ## 2.0.0-alpha1 (2022-01-30)
 - Change license to `LGPL-3.0`
-- Introduce a Pro subscription
+- [Pro] Introduce a Pro subscription
 - Switch from `ruby-kafka` to `librdkafka` as an underlying driver
 - Introduce fully automatic integration tests that go through the whole server lifecycle
 - Integrate WaterDrop tightly with autoconfiguration inheritance and an option to redefine it

data/Gemfile.lock CHANGED Viewed

@@ -1,7 +1,7 @@
 PATH
   remote: .
   specs:
-    karafka (2.0.0.alpha6)
+    karafka (2.0.0.beta1)
       dry-configurable (~> 0.13)
       dry-monitor (~> 0.5)
       dry-validation (~> 1.7)
@@ -13,10 +13,10 @@ PATH
 GEM
   remote: https://rubygems.org/
   specs:
-    activejob (7.0.2.3)
-      activesupport (= 7.0.2.3)
+    activejob (7.0.3)
+      activesupport (= 7.0.3)
       globalid (>= 0.3.6)
-    activesupport (7.0.2.3)
+    activesupport (7.0.3)
       concurrent-ruby (~> 1.0, >= 1.0.2)
       i18n (>= 1.6, < 2)
       minitest (>= 5.1)
@@ -25,7 +25,7 @@ GEM
     concurrent-ruby (1.1.10)
     diff-lcs (1.5.0)
     docile (1.4.0)
-    dry-configurable (0.14.0)
+    dry-configurable (0.15.0)
       concurrent-ruby (~> 1.0)
       dry-core (~> 0.6)
     dry-container (0.9.0)
@@ -121,4 +121,4 @@ DEPENDENCIES
   simplecov
 BUNDLED WITH
-   2.3.10
+   2.3.11

data/docker-compose.yml CHANGED Viewed

@@ -16,6 +16,7 @@ services:
       KAFKA_CREATE_TOPICS:
         "integrations_0_02:2:1,\
          integrations_1_02:2:1,\
+         integrations_2_02:2:1,\
          integrations_0_03:3:1,\
          integrations_1_03:3:1,\
          integrations_2_03:3:1,\

data/lib/karafka/base_consumer.rb CHANGED Viewed

@@ -10,8 +10,8 @@ module Karafka
     attr_accessor :messages
     # @return [Karafka::Connection::Client] kafka connection client
     attr_accessor :client
-    # @return [Karafka::TimeTrackers::Pause] current topic partition pause
-    attr_accessor :pause
+    # @return [Karafka::TimeTrackers::Pause] current topic partition pause tracker
+    attr_accessor :pause_tracker
     # @return [Waterdrop::Producer] producer instance
     attr_accessor :producer
@@ -24,7 +24,7 @@ module Karafka
       Karafka.monitor.instrument('consumer.consumed', caller: self) do
         consume
-        pause.reset
+        pause_tracker.reset
         # Mark as consumed only if manual offset management is not on
         return if topic.manual_offset_management
@@ -40,8 +40,8 @@ module Karafka
         caller: self,
         type: 'consumer.consume.error'
       )
-      client.pause(topic.name, messages.first.partition, @seek_offset || messages.first.offset)
-      pause.pause
+      pause(@seek_offset || messages.first.offset)
     end
     # Trigger method for running on shutdown.
@@ -76,8 +76,31 @@ module Karafka
       )
     end
+    # Can be used to run preparation code
+    #
+    # @private
+    # @note This should not be used by the end users as it is part of the lifecycle of things but
+    #   not as part of the public api. This can act as a hook when creating non-blocking
+    #   consumers and doing other advanced stuff
+    def on_prepared
+      Karafka.monitor.instrument('consumer.prepared', caller: self) do
+        prepared
+      end
+    rescue StandardError => e
+      Karafka.monitor.instrument(
+        'error.occurred',
+        error: e,
+        caller: self,
+        type: 'consumer.prepared.error'
+      )
+    end
     private
+    # Method that gets called in the blocking flow allowing to setup any type of resources or to
+    # send additional commands to Kafka before the proper execution starts.
+    def prepared; end
     # Method that will perform business logic and on data received from Kafka (it will consume
     #   the data)
     # @note This method needs bo be implemented in a subclass. We stub it here as a failover if
@@ -97,6 +120,10 @@ module Karafka
     # Marks message as consumed in an async way.
     #
     # @param message [Messages::Message] last successfully processed message.
+    # @note We keep track of this offset in case we would mark as consumed and got error when
+    #   processing another message. In case like this we do not pause on the message we've already
+    #   processed but rather at the next one. This applies to both sync and async versions of this
+    #   method.
     def mark_as_consumed(message)
       client.mark_as_consumed(message)
       @seek_offset = message.offset + 1
@@ -110,6 +137,32 @@ module Karafka
       @seek_offset = message.offset + 1
     end
+    # Pauses processing on a given offset for the current topic partition
+    #
+    # After given partition is resumed, it will continue processing from the given offset
+    # @param offset [Integer] offset from which we want to restart the processing
+    # @param timeout [Integer, nil] how long in milliseconds do we want to pause or nil to use the
+    #   default exponential pausing strategy defined for retries
+    def pause(offset, timeout = nil)
+      client.pause(
+        messages.metadata.topic,
+        messages.metadata.partition,
+        offset
+      )
+      timeout ? pause_tracker.pause(timeout) : pause_tracker.pause
+    end
+    # Resumes processing of the current topic partition
+    def resume
+      client.resume(
+        messages.metadata.topic,
+        messages.metadata.partition
+      )
+      pause_tracker.expire
+    end
     # Seeks in the context of current topic and partition
     #
     # @param offset [Integer] offset where we want to seek

data/lib/karafka/connection/listener.rb CHANGED Viewed

@@ -15,6 +15,8 @@ module Karafka
         @pauses_manager = PausesManager.new
         @client = Client.new(@subscription_group)
         @executors = Processing::ExecutorsBuffer.new(@client, subscription_group)
+        # We reference scheduler here as it is much faster than fetching this each time
+        @scheduler = ::Karafka::App.config.internal.scheduler
       end
       # Runs the main listener fetch loop.
@@ -66,9 +68,9 @@ module Karafka
           # distributing consuming jobs as upon revoking, we might get assigned to the same
           # partitions, thus getting their jobs. The revoking jobs need to finish before
           # appropriate consumers are taken down and re-created
-          wait(@subscription_group) if distribute_revoke_lost_partitions_jobs
+          wait(@subscription_group) if schedule_revoke_lost_partitions_jobs
-          distribute_partitions_jobs(messages_buffer)
+          schedule_partitions_jobs(messages_buffer)
           # We wait only on jobs from our subscription group. Other groups are independent.
           wait(@subscription_group)
@@ -103,15 +105,17 @@ module Karafka
       # Enqueues revoking jobs for partitions that were taken away from the running process.
       # @return [Boolean] was there anything to revoke
-      def distribute_revoke_lost_partitions_jobs
+      # @note We do not use scheduler here as those jobs are not meant to be order optimized in
+      #   any way. Since they operate occasionally it is irrelevant.
+      def schedule_revoke_lost_partitions_jobs
         revoked_partitions = @client.rebalance_manager.revoked_partitions
         return false if revoked_partitions.empty?
         revoked_partitions.each do |topic, partitions|
           partitions.each do |partition|
-            pause = @pauses_manager.fetch(topic, partition)
-            executor = @executors.fetch(topic, partition, pause)
+            pause_tracker = @pauses_manager.fetch(topic, partition)
+            executor = @executors.fetch(topic, partition, pause_tracker)
             @jobs_queue << Processing::Jobs::Revoked.new(executor)
           end
         end
@@ -122,8 +126,8 @@ module Karafka
       # Takes the messages per topic partition and enqueues processing jobs in threads.
       #
       # @param messages_buffer [Karafka::Connection::MessagesBuffer] buffer with messages
-      def distribute_partitions_jobs(messages_buffer)
-        messages_buffer.each do |topic, partition, messages|
+      def schedule_partitions_jobs(messages_buffer)
+        @scheduler.call(messages_buffer) do |topic, partition, messages|
           pause = @pauses_manager.fetch(topic, partition)
           next if pause.paused?

data/lib/karafka/connection/messages_buffer.rb CHANGED Viewed

@@ -10,6 +10,10 @@ module Karafka
     class MessagesBuffer
       attr_reader :size
+      extend Forwardable
+      def_delegators :@groups, :each
       # @return [Karafka::Connection::MessagesBuffer] buffer instance
       def initialize
         @size = 0
@@ -20,19 +24,6 @@ module Karafka
         end
       end
-      # Iterates over aggregated data providing messages per topic partition.
-      #
-      # @yieldparam [String] topic name
-      # @yieldparam [Integer] partition number
-      # @yieldparam [Array<Rdkafka::Consumer::Message>] topic partition aggregated results
-      def each
-        @groups.each do |topic, partitions|
-          partitions.each do |partition, messages|
-            yield(topic, partition, messages)
-          end
-        end
-      end
       # Adds a message to the buffer.
       #
       # @param message [Rdkafka::Consumer::Message] raw rdkafka message

data/lib/karafka/connection/pauses_manager.rb CHANGED Viewed

@@ -12,11 +12,11 @@ module Karafka
         end
       end
-      # Creates or fetches pause of a given topic partition.
+      # Creates or fetches pause tracker of a given topic partition.
       #
       # @param topic [String] topic name
       # @param partition [Integer] partition number
-      # @return [Karafka::TimeTrackers::Pause] pause instance
+      # @return [Karafka::TimeTrackers::Pause] pause tracker instance
       def fetch(topic, partition)
         @pauses[topic][partition] ||= TimeTrackers::Pause.new(
           timeout: Karafka::App.config.pause_timeout,

data/lib/karafka/contracts/config.rb CHANGED Viewed

@@ -32,6 +32,7 @@ module Karafka
           required(:routing_builder)
           required(:status)
           required(:process)
+          required(:scheduler)
           required(:subscription_groups_builder)
         end
       end

data/lib/karafka/instrumentation/monitor.rb CHANGED Viewed

@@ -22,6 +22,7 @@ module Karafka
         app.stopping
         app.stopped
+        consumer.prepared
         consumer.consumed
         consumer.revoked
         consumer.shutdown

data/lib/karafka/pro/active_job/dispatcher.rb CHANGED Viewed

@@ -1,18 +1,18 @@
 # frozen_string_literal: true
-# This Karafka component is a Pro component.
-# All of the commercial components are present in the lib/karafka/pro directory of this repository
-# and their usage requires commercial license agreement.
-#
-# Karafka has also commercial-friendly license, commercial support and commercial components.
-#
-# By sending a pull request to the pro components, you are agreeing to transfer the copyright of
-# your code to Maciej Mensfeld.
 module Karafka
   module Pro
     # Karafka Pro ActiveJob components
     module ActiveJob
+      # This Karafka component is a Pro component.
+      # All of the commercial components are present in the lib/karafka/pro directory of this
+      # repository and their usage requires commercial license agreement.
+      #
+      # Karafka has also commercial-friendly license, commercial support and commercial components.
+      #
+      # By sending a pull request to the pro components, you are agreeing to transfer the copyright
+      # of your code to Maciej Mensfeld.
       # Pro dispatcher that sends the ActiveJob job to a proper topic based on the queue name
       # and that allows to inject additional options into the producer, effectively allowing for a
       # much better and more granular control over the dispatch and consumption process.

data/lib/karafka/pro/active_job/job_options_contract.rb CHANGED Viewed

@@ -1,17 +1,17 @@
 # frozen_string_literal: true
-# This Karafka component is a Pro component.
-# All of the commercial components are present in the lib/karafka/pro directory of this repository
-# and their usage requires commercial license agreement.
-#
-# Karafka has also commercial-friendly license, commercial support and commercial components.
-#
-# By sending a pull request to the pro components, you are agreeing to transfer the copyright of
-# your code to Maciej Mensfeld.
 module Karafka
   module Pro
     module ActiveJob
+      # This Karafka component is a Pro component.
+      # All of the commercial components are present in the lib/karafka/pro directory of this
+      # repository and their usage requires commercial license agreement.
+      #
+      # Karafka has also commercial-friendly license, commercial support and commercial components.
+      #
+      # By sending a pull request to the pro components, you are agreeing to transfer the copyright
+      # of your code to Maciej Mensfeld.
       # Contract for validating the options that can be altered with `#karafka_options` per job
       # class that works with Pro features.
       class JobOptionsContract < ::Karafka::ActiveJob::JobOptionsContract

data/lib/karafka/pro/loader.rb CHANGED Viewed

@@ -1,15 +1,16 @@
 # frozen_string_literal: true
-# This Karafka component is a Pro component.
-# All of the commercial components are present in the lib/karafka/pro directory of this repository
-# and their usage requires commercial license agreement.
-#
-# Karafka has also commercial-friendly license, commercial support and commercial components.
-#
-# By sending a pull request to the pro components, you are agreeing to transfer the copyright of
-# your code to Maciej Mensfeld.
 module Karafka
   module Pro
+    # This Karafka component is a Pro component.
+    # All of the commercial components are present in the lib/karafka/pro directory of this
+    # repository and their usage requires commercial license agreement.
+    #
+    # Karafka has also commercial-friendly license, commercial support and commercial components.
+    #
+    # By sending a pull request to the pro components, you are agreeing to transfer the copyright
+    # of your code to Maciej Mensfeld.
     # Loader requires and loads all the pro components only when they are needed
     class Loader
       class << self
@@ -17,11 +18,15 @@ module Karafka
         # @param config [Dry::Configurable::Config] whole app config that we can alter with pro
         #   components
         def setup(config)
+          require_relative 'performance_tracker'
           require_relative 'active_job/dispatcher'
           require_relative 'active_job/job_options_contract'
           config.internal.active_job.dispatcher = ActiveJob::Dispatcher.new
           config.internal.active_job.job_options_contract = ActiveJob::JobOptionsContract.new
+          # Monitor time needed to process each message from a single partition
+          config.monitor.subscribe(PerformanceTracker.instance)
         end
       end
     end

data/lib/karafka/pro/performance_tracker.rb ADDED Viewed

@@ -0,0 +1,80 @@
+# frozen_string_literal: true
+module Karafka
+  module Pro
+    # This Karafka component is a Pro component.
+    # All of the commercial components are present in the lib/karafka/pro directory of this
+    # repository and their usage requires commercial license agreement.
+    #
+    # Karafka has also commercial-friendly license, commercial support and commercial components.
+    #
+    # By sending a pull request to the pro components, you are agreeing to transfer the copyright
+    # of your code to Maciej Mensfeld.
+    # Tracker used to keep track of performance metrics
+    # It provides insights that can be used to optimize processing flow
+    class PerformanceTracker
+      include Singleton
+      # How many samples do we collect per topic partition
+      SAMPLES_COUNT = 200
+      private_constant :SAMPLES_COUNT
+      # Builds up nested concurrent hash for data tracking
+      def initialize
+        @processing_times = Concurrent::Hash.new do |topics_hash, topic|
+          topics_hash[topic] = Concurrent::Hash.new do |partitions_hash, partition|
+            # This array does not have to be concurrent because we always access single partition
+            # data via instrumentation that operates in a single thread via consumer
+            partitions_hash[partition] = []
+          end
+        end
+      end
+      # @param topic [String]
+      # @param partition [Integer]
+      # @return [Float] p95 processing time of a single message from a single topic partition
+      def processing_time_p95(topic, partition)
+        values = @processing_times[topic][partition]
+        return 0 if values.empty?
+        return values.first if values.size == 1
+        percentile(0.95, values)
+      end
+      # @private
+      # @param event [Dry::Events::Event] event details
+      # Tracks time taken to process a single message of a given topic partition
+      def on_consumer_consumed(event)
+        consumer = event[:caller]
+        messages = consumer.messages
+        topic = messages.metadata.topic
+        partition = messages.metadata.partition
+        samples = @processing_times[topic][partition]
+        samples << event[:time] / messages.count
+        return unless samples.size > SAMPLES_COUNT
+        samples.shift
+      end
+      private
+      # Computers the requested percentile out of provided values
+      # @param percentile [Float]
+      # @param values [Array<String>] all the values based on which we should
+      # @return [Float] computed percentile
+      def percentile(percentile, values)
+        values_sorted = values.sort
+        floor = (percentile * (values_sorted.length - 1) + 1).floor - 1
+        mod = (percentile * (values_sorted.length - 1) + 1).modulo(1)
+        values_sorted[floor] + (mod * (values_sorted[floor + 1] - values_sorted[floor]))
+      end
+    end
+  end
+end

data/lib/karafka/processing/executor.rb CHANGED Viewed

@@ -4,10 +4,10 @@ module Karafka
   # Namespace that encapsulates all the logic related to processing data.
   module Processing
     # Executors:
-    # - run consumers code with provided messages batch (for `#call`) or run given teardown
-    #   operations when needed from separate threads.
-    # - they re-create consumer instances in case of partitions that were revoked
-    #   and assigned back.
+    # - run consumers code (for `#call`) or run given preparation / teardown operations when needed
+    #   from separate threads.
+    # - they re-create consumer instances in case of partitions that were revoked and assigned
+    #   back.
     #
     # @note Executors are not removed after partition is revoked. They are not that big and will
     #   be re-used in case of a re-claim
@@ -21,21 +21,21 @@ module Karafka
       # @param group_id [String] id of the subscription group to which the executor belongs
       # @param client [Karafka::Connection::Client] kafka client
       # @param topic [Karafka::Routing::Topic] topic for which this executor will run
-      # @param pause [Karafka::TimeTrackers::Pause] fetch pause object for crash pausing
-      def initialize(group_id, client, topic, pause)
+      # @param pause_tracker [Karafka::TimeTrackers::Pause] fetch pause tracker for pausing
+      def initialize(group_id, client, topic, pause_tracker)
         @id = SecureRandom.uuid
         @group_id = group_id
         @client = client
         @topic = topic
-        @pause = pause
+        @pause_tracker = pause_tracker
       end
-      # Runs consumer data processing against given batch and handles failures and errors.
+      # Builds the consumer instance and sets all that is needed to run the user consumption logic
       #
       # @param messages [Array<Rdkafka::Consumer::Message>] raw rdkafka messages
       # @param received_at [Time] the moment we've received the batch (actually the moment we've)
       #   enqueued it, but good enough
-      def consume(messages, received_at)
+      def prepare(messages, received_at)
         # Recreate consumer with each batch if persistence is not enabled
         # We reload the consumers with each batch instead of relying on some external signals
         # when needed for consistency. That way devs may have it on or off and not in this
@@ -49,6 +49,11 @@ module Karafka
           received_at
         )
+        consumer.on_prepared
+      end
+      # Runs consumer data processing against given batch and handles failures and errors.
+      def consume
         # We run the consumer client logic...
         consumer.on_consume
       end
@@ -86,7 +91,7 @@ module Karafka
           consumer = @topic.consumer.new
           consumer.topic = @topic
           consumer.client = @client
-          consumer.pause = @pause
+          consumer.pause_tracker = @pause_tracker
           consumer.producer = ::Karafka::App.producer
           consumer
         end

data/lib/karafka/processing/jobs/base.rb CHANGED Viewed

@@ -5,6 +5,8 @@ module Karafka
     # Namespace for all the jobs that are suppose to run in workers.
     module Jobs
       # Base class for all the jobs types that are suppose to run in workers threads.
+      # Each job can have 3 main entry-points: `#prepare`, `#call` and `#teardown`
+      # Only `#call` is required.
       class Base
         extend Forwardable
@@ -12,6 +14,20 @@ module Karafka
         def_delegators :executor, :id, :group_id
         attr_reader :executor
+        # When redefined can run any code that should run before executing the proper code
+        def prepare; end
+        # When redefined can run any code that should run after executing the proper code
+        def teardown; end
+        # @return [Boolean] is this a non-blocking job
+        # @note Blocking job is a job, that will cause the job queue to wait until it is finished
+        #   before removing the lock on new jobs being added
+        # @note All the jobs are blocking by default
+        def non_blocking?
+          false
+        end
       end
     end
   end

data/lib/karafka/processing/jobs/consume.rb CHANGED Viewed

@@ -18,9 +18,14 @@ module Karafka
           super()
         end
-        # Runs the given executor.
+        # Runs the preparations on the executor
+        def prepare
+          executor.prepare(@messages, @created_at)
+        end
+        # Runs the given executor
         def call
-          executor.consume(@messages, @created_at)
+          executor.consume
         end
       end
     end

data/lib/karafka/processing/jobs_queue.rb CHANGED Viewed

@@ -21,7 +21,7 @@ module Karafka
         # We cannot use a single semaphore as it could potentially block in listeners that should
         # process with their data and also could unlock when a given group needs to remain locked
         @semaphores = Hash.new { |h, k| h[k] = Queue.new }
-        @in_processing = Hash.new { |h, k| h[k] = {} }
+        @in_processing = Hash.new { |h, k| h[k] = [] }
         @mutex = Mutex.new
       end
@@ -44,9 +44,9 @@ module Karafka
         @mutex.synchronize do
           group = @in_processing[job.group_id]
-          raise(Errors::JobsQueueSynchronizationError, job.group_id) if group.key?(job.id)
+          raise(Errors::JobsQueueSynchronizationError, job.group_id) if group.include?(job)
-          group[job.id] = true
+          group << job
         end
         @queue << job
@@ -60,14 +60,21 @@ module Karafka
         @queue.pop
       end
+      # Causes the wait lock to re-check the lock conditions and potential unlock.
+      # @param group_id [String] id of the group we want to unlock for one tick
+      # @note This does not release the wait lock. It just causes a conditions recheck
+      def tick(group_id)
+        @semaphores[group_id] << true
+      end
       # Marks a given job from a given group as completed. When there are no more jobs from a given
       # group to be executed, we won't wait.
       #
       # @param [Jobs::Base] job that was completed
       def complete(job)
         @mutex.synchronize do
-          @in_processing[job.group_id].delete(job.id)
-          @semaphores[job.group_id] << true
+          @in_processing[job.group_id].delete(job)
+          tick(job.group_id)
         end
       end
@@ -79,7 +86,7 @@ module Karafka
         @mutex.synchronize do
           @in_processing[group_id].clear
           # We unlock it just in case it was blocked when clearing started
-          @semaphores[group_id] << true
+          tick(group_id)
         end
       end
@@ -108,13 +115,15 @@ module Karafka
       # @param group_id [String] id of the group in which jobs we're interested.
       # @return [Boolean] should we keep waiting or not
       def wait?(group_id)
+        group = @in_processing[group_id]
         # If it is stopping, all the previous messages that are processed at the moment need to
         # finish. Otherwise we may risk closing the client and committing offsets afterwards
-        return false if Karafka::App.stopping? && @in_processing[group_id].empty?
+        return false if Karafka::App.stopping? && group.empty?
         return false if @queue.closed?
-        return false if @in_processing[group_id].empty?
+        return false if group.empty?
-        true
+        !group.all?(&:non_blocking?)
       end
     end
   end

data/lib/karafka/processing/worker.rb CHANGED Viewed

@@ -4,6 +4,18 @@ module Karafka
   module Processing
     # Workers are used to run jobs in separate threads.
     # Workers are the main processing units of the Karafka framework.
+    #
+    # Each job runs in three stages:
+    #   - prepare - here we can run any code that we would need to run blocking before we allow
+    #               the job to run fully async (non blocking). This will always run in a blocking
+    #               way and can be used to make sure all the resources and external dependencies
+    #               are satisfied before going async.
+    #
+    #   - call - actual processing logic that can run sync or async
+    #
+    #   - teardown - it should include any code that we want to run after we executed the user
+    #                code. This can be used to unlock certain resources or do other things that are
+    #                not user code but need to run after user code base is executed.
     class Worker
       extend Forwardable
@@ -33,7 +45,18 @@ module Karafka
         job = @jobs_queue.pop
         if job
+          job.prepare
+          # If a job is marked as non blocking, we can run a tick in the job queue and if there
+          # are no other blocking factors, the job queue will be unlocked.
+          # If this does not run, all the things will be blocking and job queue won't allow to
+          # pass it until done.
+          @jobs_queue.tick(job.group_id) if job.non_blocking?
           job.call
+          job.teardown
           true
         else
           false

data/lib/karafka/scheduler.rb ADDED Viewed

@@ -0,0 +1,21 @@
+# frozen_string_literal: true
+module Karafka
+  # FIFO scheduler for messages coming from various topics and partitions
+  class Scheduler
+    # Yields messages from partitions in the fifo order
+    #
+    # @param messages_buffer [Karafka::Connection::MessagesBuffer] messages buffer with data from
+    #   multiple topics and partitions
+    # @yieldparam [String] topic name
+    # @yieldparam [Integer] partition number
+    # @yieldparam [Array<Rdkafka::Consumer::Message>] topic partition aggregated results
+    def call(messages_buffer)
+      messages_buffer.each do |topic, partitions|
+        partitions.each do |partition, messages|
+          yield(topic, partition, messages)
+        end
+      end
+    end
+  end
+end

data/lib/karafka/setup/config.rb CHANGED Viewed

@@ -96,6 +96,8 @@ module Karafka
         # option subscription_groups_builder [Routing::SubscriptionGroupsBuilder] subscription
         #   group builder
         setting :subscription_groups_builder, default: Routing::SubscriptionGroupsBuilder.new
+        # option scheduler [Class] scheduler we will be using
+        setting :scheduler, default: Scheduler.new
         # Karafka components for ActiveJob
         setting :active_job do

data/lib/karafka/time_trackers/pause.rb CHANGED Viewed

@@ -41,9 +41,12 @@ module Karafka
       # Pauses the processing from now till the end of the interval (backoff or non-backoff)
       # and records the count.
-      def pause
+      # @param timeout [Integer] timeout value in milliseconds that overwrites the default timeout
+      # @note Providing this value can be useful when we explicitly want to pause for a certain
+      #   period of time, outside of any regular pausing logic
+      def pause(timeout = backoff_interval)
         @started_at = now
-        @ends_at = @started_at + backoff_interval
+        @ends_at = @started_at + timeout
         @count += 1
       end
@@ -53,6 +56,11 @@ module Karafka
         @ends_at = nil
       end
+      # Expires the pause, so it can be considered expired
+      def expire
+        @ends_at = nil
+      end
       # @return [Boolean] are we paused from processing
       def paused?
         !@started_at.nil?

data/lib/karafka/version.rb CHANGED Viewed

@@ -3,5 +3,5 @@
 # Main module namespace
 module Karafka
   # Current Karafka version
-  VERSION = '2.0.0.alpha6'
+  VERSION = '2.0.0.beta1'
 end

data.tar.gz.sig CHANGED Viewed

Binary file

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: karafka
 version: !ruby/object:Gem::Version
-  version: 2.0.0.alpha6
+  version: 2.0.0.beta1
 platform: ruby
 authors:
 - Maciej Mensfeld
@@ -34,7 +34,7 @@ cert_chain:
   R2P11bWoCtr70BsccVrN8jEhzwXngMyI2gVt750Y+dbTu1KgRqZKp/ECe7ZzPzXj
   pIy9vHxTANKYVyI4qj8OrFdEM5BQNu8oQpL0iQ==
   -----END CERTIFICATE-----
-date: 2022-04-17 00:00:00.000000000 Z
+date: 2022-05-22 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: dry-configurable
@@ -228,6 +228,7 @@ files:
 - lib/karafka/pro/active_job/dispatcher.rb
 - lib/karafka/pro/active_job/job_options_contract.rb
 - lib/karafka/pro/loader.rb
+- lib/karafka/pro/performance_tracker.rb
 - lib/karafka/process.rb
 - lib/karafka/processing/executor.rb
 - lib/karafka/processing/executors_buffer.rb
@@ -248,6 +249,7 @@ files:
 - lib/karafka/routing/subscription_groups_builder.rb
 - lib/karafka/routing/topic.rb
 - lib/karafka/runner.rb
+- lib/karafka/scheduler.rb
 - lib/karafka/serialization/json/deserializer.rb
 - lib/karafka/server.rb
 - lib/karafka/setup/config.rb
@@ -282,7 +284,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
     - !ruby/object:Gem::Version
       version: 1.3.1
 requirements: []
-rubygems_version: 3.3.3
+rubygems_version: 3.3.7
 signing_key:
 specification_version: 4
 summary: Ruby based framework for working with Apache Kafka

metadata.gz.sig CHANGED Viewed

Binary file