RubyGems - karafka - Versions diffs - 2.0.0.alpha6 → 2.0.0.beta1 - Mend

karafka 2.0.0.alpha6 → 2.0.0.beta1

Files changed (28) hide show

checksums.yaml +4 -4
checksums.yaml.gz.sig +0 -0
data/.ruby-version +1 -1
data/CHANGELOG.md +9 -2
data/Gemfile.lock +6 -6
data/docker-compose.yml +1 -0
data/lib/karafka/base_consumer.rb +58 -5
data/lib/karafka/connection/listener.rb +11 -7
data/lib/karafka/connection/messages_buffer.rb +4 -13
data/lib/karafka/connection/pauses_manager.rb +2 -2
data/lib/karafka/contracts/config.rb +1 -0
data/lib/karafka/instrumentation/monitor.rb +1 -0
data/lib/karafka/pro/active_job/dispatcher.rb +9 -9
data/lib/karafka/pro/active_job/job_options_contract.rb +9 -9
data/lib/karafka/pro/loader.rb +13 -8
data/lib/karafka/pro/performance_tracker.rb +80 -0
data/lib/karafka/processing/executor.rb +15 -10
data/lib/karafka/processing/jobs/base.rb +16 -0
data/lib/karafka/processing/jobs/consume.rb +7 -2
data/lib/karafka/processing/jobs_queue.rb +18 -9
data/lib/karafka/processing/worker.rb +23 -0
data/lib/karafka/scheduler.rb +21 -0
data/lib/karafka/setup/config.rb +2 -0
data/lib/karafka/time_trackers/pause.rb +10 -2
data/lib/karafka/version.rb +1 -1
data.tar.gz.sig +0 -0
metadata +5 -3
metadata.gz.sig +0 -0

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 862df94b26c24809f82e07f71c39433b90ef08f68f053a004a87aa466b872dac
-  data.tar.gz: 2c533cbd6c271fe282f59c2030d9cc885555242bb8bc9316d0264a5ccfd694a0
+  metadata.gz: f108cb4288d0ed0510381f51c77d49e052b947f6180c9b9c0b06e0ac2b599894
+  data.tar.gz: 3d79066d0107c08f450ca9f4c3b5c4a39aae497836c80bf8380c65f1406b82c0
 SHA512:
-  metadata.gz: 8ce3720e535d65f121bcbfd957286cbc41404f2aff2751622183367862db55f8231c286d5bbede4a18649eafcd085952a7fc8e7569a74dbd3633c9db906114e5
-  data.tar.gz: e77819ccd2be263b02958fcee71ce9228c93c7dee62fabbea61711fbbdbffa997bb9a85a7f668cf8bb596b7a98a773ace560ad5075c46e70f20cd302b354b0fe
+  metadata.gz: 4aae257010c992c59ce4b01ead54ff2cfd4e8ccd8cbe6b52214b3cedf8f879690e0d577f2b41f44b1ab6888d7e27bbc92f3ba4a69e8b127687fb4c43bff51fbc
+  data.tar.gz: f65e425cb84152d20a055bdb9a94fd98280597cdf5e431337cb8604040534cacbfdd03efd6dc23b86c9ecf25721c860bd55ca75ad3f98e4c66136a88c1efc4e7

checksums.yaml.gz.sig CHANGED Viewed

Binary file

data/.ruby-version CHANGED Viewed

	@@ -1 +1 @@
1	- 3.1.0
1	+ 3.1.2

data/CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,12 @@
 # Karafka framework changelog
+## 2.0.0-beta1 (2022-05-22)
+- Update the jobs queue blocking engine and allow for non-blocking jobs execution
+- Provide `#prepared` hook that always runs before the fetching loop is unblocked
+- [Pro] Introduce performance tracker for scheduling optimizer
+- Provide ability to pause (`#pause`) and resume (`#resume`) given partitions from the consumers
+- Small integration specs refactoring + specs for pausing scenarios
 ## 2.0.0-alpha6 (2022-04-17)
 - Fix a bug, where upon missing boot file and Rails, railtie would fail with a generic exception (#818)
 - Fix an issue with parallel pristine specs colliding with each other during `bundle install` (#820)
@@ -26,12 +33,12 @@
 ## 2.0.0-alpha2 (2022-02-19)
 - Require `kafka` keys to be symbols
-- Added ActiveJob Pro adapter
+- [Pro] Added ActiveJob Pro adapter
 - Small updates to the license and docs
 ## 2.0.0-alpha1 (2022-01-30)
 - Change license to `LGPL-3.0`
-- Introduce a Pro subscription
+- [Pro] Introduce a Pro subscription
 - Switch from `ruby-kafka` to `librdkafka` as an underlying driver
 - Introduce fully automatic integration tests that go through the whole server lifecycle
 - Integrate WaterDrop tightly with autoconfiguration inheritance and an option to redefine it

data/Gemfile.lock CHANGED Viewed

@@ -1,7 +1,7 @@
 PATH
   remote: .
   specs:
-    karafka (2.0.0.alpha6)
+    karafka (2.0.0.beta1)
       dry-configurable (~> 0.13)
       dry-monitor (~> 0.5)
       dry-validation (~> 1.7)
@@ -13,10 +13,10 @@ PATH
 GEM
   remote: https://rubygems.org/
   specs:
-    activejob (7.0.2.3)
-      activesupport (= 7.0.2.3)
+    activejob (7.0.3)
+      activesupport (= 7.0.3)
       globalid (>= 0.3.6)
-    activesupport (7.0.2.3)
+    activesupport (7.0.3)
       concurrent-ruby (~> 1.0, >= 1.0.2)
       i18n (>= 1.6, < 2)
       minitest (>= 5.1)
@@ -25,7 +25,7 @@ GEM
     concurrent-ruby (1.1.10)
     diff-lcs (1.5.0)
     docile (1.4.0)
-    dry-configurable (0.14.0)
+    dry-configurable (0.15.0)
       concurrent-ruby (~> 1.0)
       dry-core (~> 0.6)
     dry-container (0.9.0)
@@ -121,4 +121,4 @@ DEPENDENCIES
   simplecov
 BUNDLED WITH
-   2.3.10
+   2.3.11

data/docker-compose.yml CHANGED Viewed

@@ -16,6 +16,7 @@ services:
       KAFKA_CREATE_TOPICS:
         "integrations_0_02:2:1,\
          integrations_1_02:2:1,\
+         integrations_2_02:2:1,\
          integrations_0_03:3:1,\
          integrations_1_03:3:1,\
          integrations_2_03:3:1,\

data/lib/karafka/base_consumer.rb CHANGED Viewed

@@ -10,8 +10,8 @@ module Karafka
     attr_accessor :messages
     # @return [Karafka::Connection::Client] kafka connection client
     attr_accessor :client
-    # @return [Karafka::TimeTrackers::Pause] current topic partition pause
-    attr_accessor :pause
+    # @return [Karafka::TimeTrackers::Pause] current topic partition pause tracker
+    attr_accessor :pause_tracker
     # @return [Waterdrop::Producer] producer instance
     attr_accessor :producer
@@ -24,7 +24,7 @@ module Karafka
       Karafka.monitor.instrument('consumer.consumed', caller: self) do
         consume
-        pause.reset
+        pause_tracker.reset
         # Mark as consumed only if manual offset management is not on
         return if topic.manual_offset_management
@@ -40,8 +40,8 @@ module Karafka
         caller: self,
         type: 'consumer.consume.error'
       )
-      client.pause(topic.name, messages.first.partition, @seek_offset || messages.first.offset)
-      pause.pause
+      pause(@seek_offset || messages.first.offset)
     end
     # Trigger method for running on shutdown.
@@ -76,8 +76,31 @@ module Karafka
       )
     end
+    # Can be used to run preparation code
+    #
+    # @private
+    # @note This should not be used by the end users as it is part of the lifecycle of things but
+    #   not as part of the public api. This can act as a hook when creating non-blocking
+    #   consumers and doing other advanced stuff
+    def on_prepared
+      Karafka.monitor.instrument('consumer.prepared', caller: self) do
+        prepared
+      end
+    rescue StandardError => e
+      Karafka.monitor.instrument(
+        'error.occurred',
+        error: e,
+        caller: self,
+        type: 'consumer.prepared.error'
+      )
+    end
     private
+    # Method that gets called in the blocking flow allowing to setup any type of resources or to
+    # send additional commands to Kafka before the proper execution starts.
+    def prepared; end
     # Method that will perform business logic and on data received from Kafka (it will consume
     #   the data)
     # @note This method needs bo be implemented in a subclass. We stub it here as a failover if
@@ -97,6 +120,10 @@ module Karafka
     # Marks message as consumed in an async way.
     #
     # @param message [Messages::Message] last successfully processed message.
+    # @note We keep track of this offset in case we would mark as consumed and got error when
+    #   processing another message. In case like this we do not pause on the message we've already
+    #   processed but rather at the next one. This applies to both sync and async versions of this
+    #   method.
     def mark_as_consumed(message)
       client.mark_as_consumed(message)
       @seek_offset = message.offset + 1
@@ -110,6 +137,32 @@ module Karafka
       @seek_offset = message.offset + 1
     end
+    # Pauses processing on a given offset for the current topic partition
+    #
+    # After given partition is resumed, it will continue processing from the given offset
+    # @param offset [Integer] offset from which we want to restart the processing
+    # @param timeout [Integer, nil] how long in milliseconds do we want to pause or nil to use the
+    #   default exponential pausing strategy defined for retries
+    def pause(offset, timeout = nil)
+      client.pause(
+        messages.metadata.topic,
+        messages.metadata.partition,
+        offset
+      )
+      timeout ? pause_tracker.pause(timeout) : pause_tracker.pause
+    end
+    # Resumes processing of the current topic partition
+    def resume
+      client.resume(
+        messages.metadata.topic,
+        messages.metadata.partition
+      )
+      pause_tracker.expire
+    end
     # Seeks in the context of current topic and partition
     #
     # @param offset [Integer] offset where we want to seek

data/lib/karafka/connection/listener.rb CHANGED Viewed

@@ -15,6 +15,8 @@ module Karafka
         @pauses_manager = PausesManager.new
         @client = Client.new(@subscription_group)
         @executors = Processing::ExecutorsBuffer.new(@client, subscription_group)
+        # We reference scheduler here as it is much faster than fetching this each time
+        @scheduler = ::Karafka::App.config.internal.scheduler
       end
       # Runs the main listener fetch loop.
@@ -66,9 +68,9 @@ module Karafka
           # distributing consuming jobs as upon revoking, we might get assigned to the same
           # partitions, thus getting their jobs. The revoking jobs need to finish before
           # appropriate consumers are taken down and re-created
-          wait(@subscription_group) if distribute_revoke_lost_partitions_jobs
+          wait(@subscription_group) if schedule_revoke_lost_partitions_jobs
-          distribute_partitions_jobs(messages_buffer)
+          schedule_partitions_jobs(messages_buffer)
           # We wait only on jobs from our subscription group. Other groups are independent.
           wait(@subscription_group)
@@ -103,15 +105,17 @@ module Karafka
       # Enqueues revoking jobs for partitions that were taken away from the running process.
       # @return [Boolean] was there anything to revoke
-      def distribute_revoke_lost_partitions_jobs
+      # @note We do not use scheduler here as those jobs are not meant to be order optimized in
+      #   any way. Since they operate occasionally it is irrelevant.
+      def schedule_revoke_lost_partitions_jobs
         revoked_partitions = @client.rebalance_manager.revoked_partitions
         return false if revoked_partitions.empty?
         revoked_partitions.each do |topic, partitions|
           partitions.each do |partition|
-            pause = @pauses_manager.fetch(topic, partition)
-            executor = @executors.fetch(topic, partition, pause)
+            pause_tracker = @pauses_manager.fetch(topic, partition)
+            executor = @executors.fetch(topic, partition, pause_tracker)
             @jobs_queue << Processing::Jobs::Revoked.new(executor)
           end
         end
@@ -122,8 +126,8 @@ module Karafka
       # Takes the messages per topic partition and enqueues processing jobs in threads.
       #
       # @param messages_buffer [Karafka::Connection::MessagesBuffer] buffer with messages
-      def distribute_partitions_jobs(messages_buffer)
-        messages_buffer.each do |topic, partition, messages|
+      def schedule_partitions_jobs(messages_buffer)
+        @scheduler.call(messages_buffer) do |topic, partition, messages|
           pause = @pauses_manager.fetch(topic, partition)
           next if pause.paused?

data/lib/karafka/connection/messages_buffer.rb CHANGED Viewed

@@ -10,6 +10,10 @@ module Karafka
     class MessagesBuffer
       attr_reader :size
+      extend Forwardable
+      def_delegators :@groups, :each
       # @return [Karafka::Connection::MessagesBuffer] buffer instance
       def initialize
         @size = 0
@@ -20,19 +24,6 @@ module Karafka
         end
       end
-      # Iterates over aggregated data providing messages per topic partition.
-      #
-      # @yieldparam [String] topic name
-      # @yieldparam [Integer] partition number
-      # @yieldparam [Array<Rdkafka::Consumer::Message>] topic partition aggregated results
-      def each
-        @groups.each do |topic, partitions|
-          partitions.each do |partition, messages|
-            yield(topic, partition, messages)
-          end
-        end
-      end
       # Adds a message to the buffer.
       #
       # @param message [Rdkafka::Consumer::Message] raw rdkafka message

data/lib/karafka/connection/pauses_manager.rb CHANGED Viewed

@@ -12,11 +12,11 @@ module Karafka
         end
       end
-      # Creates or fetches pause of a given topic partition.
+      # Creates or fetches pause tracker of a given topic partition.
       #
       # @param topic [String] topic name
       # @param partition [Integer] partition number
-      # @return [Karafka::TimeTrackers::Pause] pause instance
+      # @return [Karafka::TimeTrackers::Pause] pause tracker instance
       def fetch(topic, partition)
         @pauses[topic][partition] ||= TimeTrackers::Pause.new(
           timeout: Karafka::App.config.pause_timeout,

data/lib/karafka/contracts/config.rb CHANGED Viewed

@@ -32,6 +32,7 @@ module Karafka
           required(:routing_builder)
           required(:status)
           required(:process)
+          required(:scheduler)
           required(:subscription_groups_builder)
         end
       end

data/lib/karafka/instrumentation/monitor.rb CHANGED Viewed

@@ -22,6 +22,7 @@ module Karafka
         app.stopping
         app.stopped
+        consumer.prepared
         consumer.consumed
         consumer.revoked
         consumer.shutdown

data/lib/karafka/pro/active_job/dispatcher.rb CHANGED Viewed

@@ -1,18 +1,18 @@
 # frozen_string_literal: true
-# This Karafka component is a Pro component.
-# All of the commercial components are present in the lib/karafka/pro directory of this repository
-# and their usage requires commercial license agreement.
-#
-# Karafka has also commercial-friendly license, commercial support and commercial components.
-#
-# By sending a pull request to the pro components, you are agreeing to transfer the copyright of
-# your code to Maciej Mensfeld.
 module Karafka
   module Pro
     # Karafka Pro ActiveJob components
     module ActiveJob
+      # This Karafka component is a Pro component.
+      # All of the commercial components are present in the lib/karafka/pro directory of this
+      # repository and their usage requires commercial license agreement.
+      #
+      # Karafka has also commercial-friendly license, commercial support and commercial components.
+      #
+      # By sending a pull request to the pro components, you are agreeing to transfer the copyright
+      # of your code to Maciej Mensfeld.
       # Pro dispatcher that sends the ActiveJob job to a proper topic based on the queue name
       # and that allows to inject additional options into the producer, effectively allowing for a
       # much better and more granular control over the dispatch and consumption process.

data/lib/karafka/pro/active_job/job_options_contract.rb CHANGED Viewed

@@ -1,17 +1,17 @@
 # frozen_string_literal: true
-# This Karafka component is a Pro component.
-# All of the commercial components are present in the lib/karafka/pro directory of this repository
-# and their usage requires commercial license agreement.
-#
-# Karafka has also commercial-friendly license, commercial support and commercial components.
-#
-# By sending a pull request to the pro components, you are agreeing to transfer the copyright of
-# your code to Maciej Mensfeld.
 module Karafka
   module Pro
     module ActiveJob
+      # This Karafka component is a Pro component.
+      # All of the commercial components are present in the lib/karafka/pro directory of this
+      # repository and their usage requires commercial license agreement.
+      #
+      # Karafka has also commercial-friendly license, commercial support and commercial components.
+      #
+      # By sending a pull request to the pro components, you are agreeing to transfer the copyright
+      # of your code to Maciej Mensfeld.
       # Contract for validating the options that can be altered with `#karafka_options` per job
       # class that works with Pro features.
       class JobOptionsContract < ::Karafka::ActiveJob::JobOptionsContract

data/lib/karafka/pro/loader.rb CHANGED Viewed

@@ -1,15 +1,16 @@
 # frozen_string_literal: true
-# This Karafka component is a Pro component.
-# All of the commercial components are present in the lib/karafka/pro directory of this repository
-# and their usage requires commercial license agreement.
-#
-# Karafka has also commercial-friendly license, commercial support and commercial components.
-#
-# By sending a pull request to the pro components, you are agreeing to transfer the copyright of
-# your code to Maciej Mensfeld.
 module Karafka
   module Pro
+    # This Karafka component is a Pro component.
+    # All of the commercial components are present in the lib/karafka/pro directory of this
+    # repository and their usage requires commercial license agreement.
+    #
+    # Karafka has also commercial-friendly license, commercial support and commercial components.
+    #
+    # By sending a pull request to the pro components, you are agreeing to transfer the copyright
+    # of your code to Maciej Mensfeld.
     # Loader requires and loads all the pro components only when they are needed
     class Loader
       class << self
@@ -17,11 +18,15 @@ module Karafka
         # @param config [Dry::Configurable::Config] whole app config that we can alter with pro
         #   components
         def setup(config)
+          require_relative 'performance_tracker'
           require_relative 'active_job/dispatcher'
           require_relative 'active_job/job_options_contract'
           config.internal.active_job.dispatcher = ActiveJob::Dispatcher.new
           config.internal.active_job.job_options_contract = ActiveJob::JobOptionsContract.new
+          # Monitor time needed to process each message from a single partition
+          config.monitor.subscribe(PerformanceTracker.instance)
         end
       end
     end

data/lib/karafka/pro/performance_tracker.rb ADDED Viewed

@@ -0,0 +1,80 @@
+# frozen_string_literal: true
+module Karafka
+  module Pro
+    # This Karafka component is a Pro component.
+    # All of the commercial components are present in the lib/karafka/pro directory of this
+    # repository and their usage requires commercial license agreement.
+    #
+    # Karafka has also commercial-friendly license, commercial support and commercial components.
+    #
+    # By sending a pull request to the pro components, you are agreeing to transfer the copyright
+    # of your code to Maciej Mensfeld.
+    # Tracker used to keep track of performance metrics
+    # It provides insights that can be used to optimize processing flow
+    class PerformanceTracker
+      include Singleton
+      # How many samples do we collect per topic partition
+      SAMPLES_COUNT = 200
+      private_constant :SAMPLES_COUNT
+      # Builds up nested concurrent hash for data tracking
+      def initialize
+        @processing_times = Concurrent::Hash.new do |topics_hash, topic|
+          topics_hash[topic] = Concurrent::Hash.new do |partitions_hash, partition|
+            # This array does not have to be concurrent because we always access single partition
+            # data via instrumentation that operates in a single thread via consumer
+            partitions_hash[partition] = []
+          end
+        end
+      end
+      # @param topic [String]
+      # @param partition [Integer]
+      # @return [Float] p95 processing time of a single message from a single topic partition
+      def processing_time_p95(topic, partition)
+        values = @processing_times[topic][partition]
+        return 0 if values.empty?
+        return values.first if values.size == 1
+        percentile(0.95, values)
+      end
+      # @private
+      # @param event [Dry::Events::Event] event details
+      # Tracks time taken to process a single message of a given topic partition
+      def on_consumer_consumed(event)
+        consumer = event[:caller]
+        messages = consumer.messages
+        topic = messages.metadata.topic
+        partition = messages.metadata.partition
+        samples = @processing_times[topic][partition]
+        samples << event[:time] / messages.count
+        return unless samples.size > SAMPLES_COUNT
+        samples.shift
+      end
+      private
+      # Computers the requested percentile out of provided values
+      # @param percentile [Float]
+      # @param values [Array<String>] all the values based on which we should
+      # @return [Float] computed percentile
+      def percentile(percentile, values)
+        values_sorted = values.sort
+        floor = (percentile * (values_sorted.length - 1) + 1).floor - 1
+        mod = (percentile * (values_sorted.length - 1) + 1).modulo(1)
+        values_sorted[floor] + (mod * (values_sorted[floor + 1] - values_sorted[floor]))
+      end
+    end
+  end
+end

data/lib/karafka/processing/executor.rb CHANGED Viewed

@@ -4,10 +4,10 @@ module Karafka
   # Namespace that encapsulates all the logic related to processing data.
   module Processing
     # Executors:
-    # - run consumers code with provided messages batch (for `#call`) or run given teardown
-    #   operations when needed from separate threads.
-    # - they re-create consumer instances in case of partitions that were revoked
-    #   and assigned back.
+    # - run consumers code (for `#call`) or run given preparation / teardown operations when needed
+    #   from separate threads.
+    # - they re-create consumer instances in case of partitions that were revoked and assigned
+    #   back.
     #
     # @note Executors are not removed after partition is revoked. They are not that big and will
     #   be re-used in case of a re-claim
@@ -21,21 +21,21 @@ module Karafka
       # @param group_id [String] id of the subscription group to which the executor belongs
       # @param client [Karafka::Connection::Client] kafka client
       # @param topic [Karafka::Routing::Topic] topic for which this executor will run
-      # @param pause [Karafka::TimeTrackers::Pause] fetch pause object for crash pausing
-      def initialize(group_id, client, topic, pause)
+      # @param pause_tracker [Karafka::TimeTrackers::Pause] fetch pause tracker for pausing
+      def initialize(group_id, client, topic, pause_tracker)
         @id = SecureRandom.uuid
         @group_id = group_id
         @client = client
         @topic = topic
-        @pause = pause
+        @pause_tracker = pause_tracker
       end
-      # Runs consumer data processing against given batch and handles failures and errors.
+      # Builds the consumer instance and sets all that is needed to run the user consumption logic
       #
       # @param messages [Array<Rdkafka::Consumer::Message>] raw rdkafka messages
       # @param received_at [Time] the moment we've received the batch (actually the moment we've)
       #   enqueued it, but good enough
-      def consume(messages, received_at)
+      def prepare(messages, received_at)
         # Recreate consumer with each batch if persistence is not enabled
         # We reload the consumers with each batch instead of relying on some external signals
         # when needed for consistency. That way devs may have it on or off and not in this
@@ -49,6 +49,11 @@ module Karafka
           received_at
         )
+        consumer.on_prepared
+      end
+      # Runs consumer data processing against given batch and handles failures and errors.
+      def consume
         # We run the consumer client logic...
         consumer.on_consume
       end
@@ -86,7 +91,7 @@ module Karafka
           consumer = @topic.consumer.new
           consumer.topic = @topic
           consumer.client = @client
-          consumer.pause = @pause
+          consumer.pause_tracker = @pause_tracker
           consumer.producer = ::Karafka::App.producer
           consumer
         end

data/lib/karafka/processing/jobs/base.rb CHANGED Viewed

@@ -5,6 +5,8 @@ module Karafka
     # Namespace for all the jobs that are suppose to run in workers.
     module Jobs
       # Base class for all the jobs types that are suppose to run in workers threads.
+      # Each job can have 3 main entry-points: `#prepare`, `#call` and `#teardown`
+      # Only `#call` is required.
       class Base
         extend Forwardable
@@ -12,6 +14,20 @@ module Karafka
         def_delegators :executor, :id, :group_id
         attr_reader :executor
+        # When redefined can run any code that should run before executing the proper code
+        def prepare; end
+        # When redefined can run any code that should run after executing the proper code
+        def teardown; end
+        # @return [Boolean] is this a non-blocking job
+        # @note Blocking job is a job, that will cause the job queue to wait until it is finished
+        #   before removing the lock on new jobs being added
+        # @note All the jobs are blocking by default
+        def non_blocking?
+          false
+        end
       end
     end
   end

data/lib/karafka/processing/jobs/consume.rb CHANGED Viewed

@@ -18,9 +18,14 @@ module Karafka
           super()
         end
-        # Runs the given executor.
+        # Runs the preparations on the executor
+        def prepare
+          executor.prepare(@messages, @created_at)
+        end
+        # Runs the given executor
         def call
-          executor.consume(@messages, @created_at)
+          executor.consume
         end
       end
     end

data/lib/karafka/processing/jobs_queue.rb CHANGED Viewed

@@ -21,7 +21,7 @@ module Karafka
         # We cannot use a single semaphore as it could potentially block in listeners that should
         # process with their data and also could unlock when a given group needs to remain locked
         @semaphores = Hash.new { |h, k| h[k] = Queue.new }
-        @in_processing = Hash.new { |h, k| h[k] = {} }
+        @in_processing = Hash.new { |h, k| h[k] = [] }
         @mutex = Mutex.new
       end
@@ -44,9 +44,9 @@ module Karafka
         @mutex.synchronize do
           group = @in_processing[job.group_id]
-          raise(Errors::JobsQueueSynchronizationError, job.group_id) if group.key?(job.id)
+          raise(Errors::JobsQueueSynchronizationError, job.group_id) if group.include?(job)
-          group[job.id] = true
+          group << job
         end
         @queue << job
@@ -60,14 +60,21 @@ module Karafka
         @queue.pop
       end
+      # Causes the wait lock to re-check the lock conditions and potential unlock.
+      # @param group_id [String] id of the group we want to unlock for one tick
+      # @note This does not release the wait lock. It just causes a conditions recheck
+      def tick(group_id)
+        @semaphores[group_id] << true
+      end
       # Marks a given job from a given group as completed. When there are no more jobs from a given
       # group to be executed, we won't wait.
       #
       # @param [Jobs::Base] job that was completed
       def complete(job)
         @mutex.synchronize do
-          @in_processing[job.group_id].delete(job.id)
-          @semaphores[job.group_id] << true
+          @in_processing[job.group_id].delete(job)
+          tick(job.group_id)
         end
       end
@@ -79,7 +86,7 @@ module Karafka
         @mutex.synchronize do
           @in_processing[group_id].clear
           # We unlock it just in case it was blocked when clearing started
-          @semaphores[group_id] << true
+          tick(group_id)
         end
       end
@@ -108,13 +115,15 @@ module Karafka
       # @param group_id [String] id of the group in which jobs we're interested.
       # @return [Boolean] should we keep waiting or not
       def wait?(group_id)
+        group = @in_processing[group_id]
         # If it is stopping, all the previous messages that are processed at the moment need to
         # finish. Otherwise we may risk closing the client and committing offsets afterwards
-        return false if Karafka::App.stopping? && @in_processing[group_id].empty?
+        return false if Karafka::App.stopping? && group.empty?
         return false if @queue.closed?
-        return false if @in_processing[group_id].empty?
+        return false if group.empty?
-        true
+        !group.all?(&:non_blocking?)
       end
     end
   end

data/lib/karafka/processing/worker.rb CHANGED Viewed

@@ -4,6 +4,18 @@ module Karafka
   module Processing
     # Workers are used to run jobs in separate threads.
     # Workers are the main processing units of the Karafka framework.
+    #
+    # Each job runs in three stages:
+    #   - prepare - here we can run any code that we would need to run blocking before we allow
+    #               the job to run fully async (non blocking). This will always run in a blocking
+    #               way and can be used to make sure all the resources and external dependencies
+    #               are satisfied before going async.
+    #
+    #   - call - actual processing logic that can run sync or async
+    #
+    #   - teardown - it should include any code that we want to run after we executed the user
+    #                code. This can be used to unlock certain resources or do other things that are
+    #                not user code but need to run after user code base is executed.
     class Worker
       extend Forwardable
@@ -33,7 +45,18 @@ module Karafka
         job = @jobs_queue.pop
         if job
+          job.prepare
+          # If a job is marked as non blocking, we can run a tick in the job queue and if there
+          # are no other blocking factors, the job queue will be unlocked.
+          # If this does not run, all the things will be blocking and job queue won't allow to
+          # pass it until done.
+          @jobs_queue.tick(job.group_id) if job.non_blocking?
           job.call
+          job.teardown
           true
         else
           false

data/lib/karafka/scheduler.rb ADDED Viewed

@@ -0,0 +1,21 @@
+# frozen_string_literal: true
+module Karafka
+  # FIFO scheduler for messages coming from various topics and partitions
+  class Scheduler
+    # Yields messages from partitions in the fifo order
+    #
+    # @param messages_buffer [Karafka::Connection::MessagesBuffer] messages buffer with data from
+    #   multiple topics and partitions
+    # @yieldparam [String] topic name
+    # @yieldparam [Integer] partition number
+    # @yieldparam [Array<Rdkafka::Consumer::Message>] topic partition aggregated results
+    def call(messages_buffer)
+      messages_buffer.each do |topic, partitions|
+        partitions.each do |partition, messages|
+          yield(topic, partition, messages)
+        end
+      end
+    end
+  end
+end

data/lib/karafka/setup/config.rb CHANGED Viewed

@@ -96,6 +96,8 @@ module Karafka
         # option subscription_groups_builder [Routing::SubscriptionGroupsBuilder] subscription
         #   group builder
         setting :subscription_groups_builder, default: Routing::SubscriptionGroupsBuilder.new
+        # option scheduler [Class] scheduler we will be using
+        setting :scheduler, default: Scheduler.new
         # Karafka components for ActiveJob
         setting :active_job do

data/lib/karafka/time_trackers/pause.rb CHANGED Viewed

@@ -41,9 +41,12 @@ module Karafka
       # Pauses the processing from now till the end of the interval (backoff or non-backoff)
       # and records the count.
-      def pause
+      # @param timeout [Integer] timeout value in milliseconds that overwrites the default timeout
+      # @note Providing this value can be useful when we explicitly want to pause for a certain
+      #   period of time, outside of any regular pausing logic
+      def pause(timeout = backoff_interval)
         @started_at = now
-        @ends_at = @started_at + backoff_interval
+        @ends_at = @started_at + timeout
         @count += 1
       end
@@ -53,6 +56,11 @@ module Karafka
         @ends_at = nil
       end
+      # Expires the pause, so it can be considered expired
+      def expire
+        @ends_at = nil
+      end
       # @return [Boolean] are we paused from processing
       def paused?
         !@started_at.nil?

data/lib/karafka/version.rb CHANGED Viewed

@@ -3,5 +3,5 @@
 # Main module namespace
 module Karafka
   # Current Karafka version
-  VERSION = '2.0.0.alpha6'
+  VERSION = '2.0.0.beta1'
 end

data.tar.gz.sig CHANGED Viewed

Binary file

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: karafka
 version: !ruby/object:Gem::Version
-  version: 2.0.0.alpha6
+  version: 2.0.0.beta1
 platform: ruby
 authors:
 - Maciej Mensfeld
@@ -34,7 +34,7 @@ cert_chain:
   R2P11bWoCtr70BsccVrN8jEhzwXngMyI2gVt750Y+dbTu1KgRqZKp/ECe7ZzPzXj
   pIy9vHxTANKYVyI4qj8OrFdEM5BQNu8oQpL0iQ==
   -----END CERTIFICATE-----
-date: 2022-04-17 00:00:00.000000000 Z
+date: 2022-05-22 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: dry-configurable
@@ -228,6 +228,7 @@ files:
 - lib/karafka/pro/active_job/dispatcher.rb
 - lib/karafka/pro/active_job/job_options_contract.rb
 - lib/karafka/pro/loader.rb
+- lib/karafka/pro/performance_tracker.rb
 - lib/karafka/process.rb
 - lib/karafka/processing/executor.rb
 - lib/karafka/processing/executors_buffer.rb
@@ -248,6 +249,7 @@ files:
 - lib/karafka/routing/subscription_groups_builder.rb
 - lib/karafka/routing/topic.rb
 - lib/karafka/runner.rb
+- lib/karafka/scheduler.rb
 - lib/karafka/serialization/json/deserializer.rb
 - lib/karafka/server.rb
 - lib/karafka/setup/config.rb
@@ -282,7 +284,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
     - !ruby/object:Gem::Version
       version: 1.3.1
 requirements: []
-rubygems_version: 3.3.3
+rubygems_version: 3.3.7
 signing_key:
 specification_version: 4
 summary: Ruby based framework for working with Apache Kafka

metadata.gz.sig CHANGED Viewed

Binary file