RubyGems - karafka - Versions diffs - 1.4.13 → 2.0.0 - Mend

karafka 1.4.13 → 2.0.0

Files changed (170) hide show

checksums.yaml +4 -4
checksums.yaml.gz.sig +3 -3
data/.github/workflows/ci.yml +85 -30
data/.ruby-version +1 -1
data/CHANGELOG.md +268 -7
data/CONTRIBUTING.md +10 -19
data/Gemfile +6 -0
data/Gemfile.lock +44 -87
data/LICENSE +17 -0
data/LICENSE-COMM +89 -0
data/LICENSE-LGPL +165 -0
data/README.md +44 -48
data/bin/benchmarks +85 -0
data/bin/create_token +22 -0
data/bin/integrations +237 -0
data/bin/karafka +4 -0
data/bin/scenario +29 -0
data/bin/stress_many +13 -0
data/bin/stress_one +13 -0
data/bin/wait_for_kafka +20 -0
data/certs/karafka-pro.pem +11 -0
data/config/errors.yml +55 -40
data/docker-compose.yml +39 -3
data/karafka.gemspec +11 -17
data/lib/active_job/karafka.rb +21 -0
data/lib/active_job/queue_adapters/karafka_adapter.rb +26 -0
data/lib/karafka/active_job/consumer.rb +26 -0
data/lib/karafka/active_job/dispatcher.rb +38 -0
data/lib/karafka/active_job/job_extensions.rb +34 -0
data/lib/karafka/active_job/job_options_contract.rb +21 -0
data/lib/karafka/active_job/routing/extensions.rb +31 -0
data/lib/karafka/app.rb +15 -20
data/lib/karafka/base_consumer.rb +181 -31
data/lib/karafka/cli/base.rb +4 -4
data/lib/karafka/cli/info.rb +43 -9
data/lib/karafka/cli/install.rb +19 -10
data/lib/karafka/cli/server.rb +17 -42
data/lib/karafka/cli.rb +4 -11
data/lib/karafka/connection/client.rb +385 -90
data/lib/karafka/connection/listener.rb +246 -38
data/lib/karafka/connection/listeners_batch.rb +24 -0
data/lib/karafka/connection/messages_buffer.rb +84 -0
data/lib/karafka/connection/pauses_manager.rb +46 -0
data/lib/karafka/connection/raw_messages_buffer.rb +101 -0
data/lib/karafka/connection/rebalance_manager.rb +78 -0
data/lib/karafka/contracts/base.rb +17 -0
data/lib/karafka/contracts/config.rb +88 -11
data/lib/karafka/contracts/consumer_group.rb +21 -189
data/lib/karafka/contracts/consumer_group_topic.rb +34 -11
data/lib/karafka/contracts/server_cli_options.rb +19 -18
data/lib/karafka/contracts.rb +1 -1
data/lib/karafka/env.rb +46 -0
data/lib/karafka/errors.rb +21 -21
data/lib/karafka/helpers/async.rb +33 -0
data/lib/karafka/helpers/colorize.rb +20 -0
data/lib/karafka/helpers/multi_delegator.rb +2 -2
data/lib/karafka/instrumentation/callbacks/error.rb +40 -0
data/lib/karafka/instrumentation/callbacks/statistics.rb +41 -0
data/lib/karafka/instrumentation/logger_listener.rb +164 -0
data/lib/karafka/instrumentation/monitor.rb +13 -61
data/lib/karafka/instrumentation/notifications.rb +52 -0
data/lib/karafka/instrumentation/proctitle_listener.rb +3 -3
data/lib/karafka/instrumentation/vendors/datadog/dashboard.json +1 -0
data/lib/karafka/instrumentation/vendors/datadog/listener.rb +232 -0
data/lib/karafka/instrumentation.rb +21 -0
data/lib/karafka/licenser.rb +75 -0
data/lib/karafka/messages/batch_metadata.rb +45 -0
data/lib/karafka/messages/builders/batch_metadata.rb +40 -0
data/lib/karafka/messages/builders/message.rb +39 -0
data/lib/karafka/messages/builders/messages.rb +32 -0
data/lib/karafka/{params/params.rb → messages/message.rb} +7 -12
data/lib/karafka/messages/messages.rb +64 -0
data/lib/karafka/{params → messages}/metadata.rb +4 -6
data/lib/karafka/messages/seek.rb +9 -0
data/lib/karafka/patches/rdkafka/consumer.rb +22 -0
data/lib/karafka/pro/active_job/consumer.rb +46 -0
data/lib/karafka/pro/active_job/dispatcher.rb +61 -0
data/lib/karafka/pro/active_job/job_options_contract.rb +32 -0
data/lib/karafka/pro/base_consumer.rb +82 -0
data/lib/karafka/pro/contracts/base.rb +21 -0
data/lib/karafka/pro/contracts/consumer_group.rb +34 -0
data/lib/karafka/pro/contracts/consumer_group_topic.rb +33 -0
data/lib/karafka/pro/loader.rb +76 -0
data/lib/karafka/pro/performance_tracker.rb +80 -0
data/lib/karafka/pro/processing/coordinator.rb +72 -0
data/lib/karafka/pro/processing/jobs/consume_non_blocking.rb +37 -0
data/lib/karafka/pro/processing/jobs_builder.rb +32 -0
data/lib/karafka/pro/processing/partitioner.rb +60 -0
data/lib/karafka/pro/processing/scheduler.rb +56 -0
data/lib/karafka/pro/routing/builder_extensions.rb +30 -0
data/lib/karafka/pro/routing/topic_extensions.rb +38 -0
data/lib/karafka/pro.rb +13 -0
data/lib/karafka/process.rb +1 -0
data/lib/karafka/processing/coordinator.rb +88 -0
data/lib/karafka/processing/coordinators_buffer.rb +54 -0
data/lib/karafka/processing/executor.rb +118 -0
data/lib/karafka/processing/executors_buffer.rb +88 -0
data/lib/karafka/processing/jobs/base.rb +51 -0
data/lib/karafka/processing/jobs/consume.rb +42 -0
data/lib/karafka/processing/jobs/revoked.rb +22 -0
data/lib/karafka/processing/jobs/shutdown.rb +23 -0
data/lib/karafka/processing/jobs_builder.rb +29 -0
data/lib/karafka/processing/jobs_queue.rb +144 -0
data/lib/karafka/processing/partitioner.rb +22 -0
data/lib/karafka/processing/result.rb +29 -0
data/lib/karafka/processing/scheduler.rb +22 -0
data/lib/karafka/processing/worker.rb +88 -0
data/lib/karafka/processing/workers_batch.rb +27 -0
data/lib/karafka/railtie.rb +113 -0
data/lib/karafka/routing/builder.rb +15 -24
data/lib/karafka/routing/consumer_group.rb +11 -19
data/lib/karafka/routing/consumer_mapper.rb +1 -2
data/lib/karafka/routing/router.rb +1 -1
data/lib/karafka/routing/subscription_group.rb +53 -0
data/lib/karafka/routing/subscription_groups_builder.rb +53 -0
data/lib/karafka/routing/topic.rb +61 -24
data/lib/karafka/routing/topics.rb +38 -0
data/lib/karafka/runner.rb +51 -0
data/lib/karafka/serialization/json/deserializer.rb +6 -15
data/lib/karafka/server.rb +67 -26
data/lib/karafka/setup/config.rb +147 -175
data/lib/karafka/status.rb +14 -5
data/lib/karafka/templates/example_consumer.rb.erb +16 -0
data/lib/karafka/templates/karafka.rb.erb +15 -51
data/lib/karafka/time_trackers/base.rb +19 -0
data/lib/karafka/time_trackers/pause.rb +92 -0
data/lib/karafka/time_trackers/poll.rb +65 -0
data/lib/karafka/version.rb +1 -1
data/lib/karafka.rb +38 -17
data.tar.gz.sig +0 -0
metadata +118 -120
metadata.gz.sig +0 -0
data/MIT-LICENCE +0 -18
data/lib/karafka/assignment_strategies/round_robin.rb +0 -13
data/lib/karafka/attributes_map.rb +0 -63
data/lib/karafka/backends/inline.rb +0 -16
data/lib/karafka/base_responder.rb +0 -226
data/lib/karafka/cli/flow.rb +0 -48
data/lib/karafka/cli/missingno.rb +0 -19
data/lib/karafka/code_reloader.rb +0 -67
data/lib/karafka/connection/api_adapter.rb +0 -158
data/lib/karafka/connection/batch_delegator.rb +0 -55
data/lib/karafka/connection/builder.rb +0 -23
data/lib/karafka/connection/message_delegator.rb +0 -36
data/lib/karafka/consumers/batch_metadata.rb +0 -10
data/lib/karafka/consumers/callbacks.rb +0 -71
data/lib/karafka/consumers/includer.rb +0 -64
data/lib/karafka/consumers/responders.rb +0 -24
data/lib/karafka/consumers/single_params.rb +0 -15
data/lib/karafka/contracts/responder_usage.rb +0 -54
data/lib/karafka/fetcher.rb +0 -42
data/lib/karafka/helpers/class_matcher.rb +0 -88
data/lib/karafka/helpers/config_retriever.rb +0 -46
data/lib/karafka/helpers/inflector.rb +0 -26
data/lib/karafka/instrumentation/stdout_listener.rb +0 -140
data/lib/karafka/params/batch_metadata.rb +0 -26
data/lib/karafka/params/builders/batch_metadata.rb +0 -30
data/lib/karafka/params/builders/params.rb +0 -38
data/lib/karafka/params/builders/params_batch.rb +0 -25
data/lib/karafka/params/params_batch.rb +0 -60
data/lib/karafka/patches/ruby_kafka.rb +0 -47
data/lib/karafka/persistence/client.rb +0 -29
data/lib/karafka/persistence/consumers.rb +0 -45
data/lib/karafka/persistence/topics.rb +0 -48
data/lib/karafka/responders/builder.rb +0 -36
data/lib/karafka/responders/topic.rb +0 -55
data/lib/karafka/routing/topic_mapper.rb +0 -53
data/lib/karafka/serialization/json/serializer.rb +0 -31
data/lib/karafka/setup/configurators/water_drop.rb +0 -36
data/lib/karafka/templates/application_responder.rb.erb +0 -11

data/lib/karafka/processing/coordinators_buffer.rb ADDED Viewed

@@ -0,0 +1,54 @@
+# frozen_string_literal: true
+module Karafka
+  module Processing
+    # Coordinators builder used to build coordinators per topic partition
+    #
+    # It provides direct pauses access for revocation
+    #
+    # @note This buffer operates only from the listener loop, thus we do not have to make it
+    #   thread-safe.
+    class CoordinatorsBuffer
+      def initialize
+        @pauses_manager = Connection::PausesManager.new
+        @coordinator_class = ::Karafka::App.config.internal.processing.coordinator_class
+        @coordinators = Hash.new { |h, k| h[k] = {} }
+      end
+      # @param topic [String] topic name
+      # @param partition [Integer] partition number
+      def find_or_create(topic, partition)
+        @coordinators[topic][partition] ||= @coordinator_class.new(
+          @pauses_manager.fetch(topic, partition)
+        )
+      end
+      # Resumes processing of partitions for which pause time has ended.
+      # @param block we want to run for resumed topic partitions
+      # @yieldparam [String] topic name
+      # @yieldparam [Integer] partition number
+      def resume(&block)
+        @pauses_manager.resume(&block)
+      end
+      # @param topic [String] topic name
+      # @param partition [Integer] partition number
+      def revoke(topic, partition)
+        return unless @coordinators[topic].key?(partition)
+        # The fact that we delete here does not change the fact that the executor still holds the
+        # reference to this coordinator. We delete it here, as we will no longer process any
+        # new stuff with it and we may need a new coordinator if we regain this partition, but the
+        # coordinator may still be in use
+        @coordinators[topic].delete(partition).revoke
+      end
+      # Clears coordinators and re-created the pauses manager
+      # This should be used only for critical errors recovery
+      def reset
+        @pauses_manager = Connection::PausesManager.new
+        @coordinators.clear
+      end
+    end
+  end
+end

data/lib/karafka/processing/executor.rb ADDED Viewed

@@ -0,0 +1,118 @@
+# frozen_string_literal: true
+module Karafka
+  # Namespace that encapsulates all the logic related to processing data.
+  module Processing
+    # Executors:
+    # - run consumers code (for `#call`) or run given preparation / teardown operations when needed
+    #   from separate threads.
+    # - they re-create consumer instances in case of partitions that were revoked and assigned
+    #   back.
+    #
+    # @note Executors are not removed after partition is revoked. They are not that big and will
+    #   be re-used in case of a re-claim
+    class Executor
+      # @return [String] unique id that we use to ensure, that we use for state tracking
+      attr_reader :id
+      # @return [String] subscription group id to which a given executor belongs
+      attr_reader :group_id
+      # @return [Karafka::Messages::Messages] messages batch
+      attr_reader :messages
+      # Topic accessibility may be needed for the jobs builder to be able to build a proper job
+      # based on the topic settings defined by the end user
+      #
+      # @return [Karafka::Routing::Topic] topic of this executor
+      attr_reader :topic
+      # @param group_id [String] id of the subscription group to which the executor belongs
+      # @param client [Karafka::Connection::Client] kafka client
+      # @param topic [Karafka::Routing::Topic] topic for which this executor will run
+      def initialize(group_id, client, topic)
+        @id = SecureRandom.uuid
+        @group_id = group_id
+        @client = client
+        @topic = topic
+      end
+      # Builds the consumer instance, builds messages batch and sets all that is needed to run the
+      # user consumption logic
+      #
+      # @param messages [Array<Karafka::Messages::Message>]
+      # @param received_at [Time] the moment we've received the batch (actually the moment we've)
+      #   enqueued it, but good enough
+      # @param coordinator [Karafka::Processing::Coordinator] coordinator for processing management
+      def before_consume(messages, received_at, coordinator)
+        # Recreate consumer with each batch if persistence is not enabled
+        # We reload the consumers with each batch instead of relying on some external signals
+        # when needed for consistency. That way devs may have it on or off and not in this
+        # middle state, where re-creation of a consumer instance would occur only sometimes
+        @consumer = nil unless ::Karafka::App.config.consumer_persistence
+        consumer.coordinator = coordinator
+        # First we build messages batch...
+        consumer.messages = Messages::Builders::Messages.call(
+          messages,
+          @topic,
+          received_at
+        )
+        consumer.on_before_consume
+      end
+      # Runs consumer data processing against given batch and handles failures and errors.
+      def consume
+        # We run the consumer client logic...
+        consumer.on_consume
+      end
+      # Runs consumer after consumption code
+      def after_consume
+        consumer.on_after_consume
+      end
+      # Runs the controller `#revoked` method that should be triggered when a given consumer is
+      # no longer needed due to partitions reassignment.
+      #
+      # @note Clearing the consumer will ensure, that if we get the partition back, it will be
+      #   handled with a consumer with a clean state.
+      #
+      # @note We run it only when consumer was present, because presence indicates, that at least
+      #   a single message has been consumed.
+      #
+      # @note We do not reset the consumer but we indicate need for recreation instead, because
+      #   after the revocation, there still may be `#after_consume` running that needs a given
+      #   consumer instance.
+      def revoked
+        consumer.on_revoked if @consumer
+      end
+      # Runs the controller `#shutdown` method that should be triggered when a given consumer is
+      # no longer needed as we're closing the process.
+      #
+      # @note While we do not need to clear the consumer here, it's a good habit to clean after
+      #   work is done.
+      def shutdown
+        # There is a case, where the consumer no longer exists because it was revoked, in case like
+        # that we do not build a new instance and shutdown should not be triggered.
+        consumer.on_shutdown if @consumer
+      end
+      private
+      # @return [Object] cached consumer instance
+      def consumer
+        @consumer ||= begin
+          consumer = @topic.consumer_class.new
+          consumer.topic = @topic
+          consumer.client = @client
+          consumer.producer = ::Karafka::App.producer
+          consumer
+        end
+      end
+    end
+  end
+end

data/lib/karafka/processing/executors_buffer.rb ADDED Viewed

@@ -0,0 +1,88 @@
+# frozen_string_literal: true
+module Karafka
+  module Processing
+    # Buffer for executors of a given subscription group. It wraps around the concept of building
+    # and caching them, so we can re-use them instead of creating new each time.
+    class ExecutorsBuffer
+      # @param client [Connection::Client]
+      # @param subscription_group [Routing::SubscriptionGroup]
+      # @return [ExecutorsBuffer]
+      def initialize(client, subscription_group)
+        @subscription_group = subscription_group
+        @client = client
+        # We need two layers here to keep track of topics, partitions and processing groups
+        @buffer = Hash.new { |h, k| h[k] = Hash.new { |h2, k2| h2[k2] = {} } }
+      end
+      # Finds or creates an executor based on the provided details
+      #
+      # @param topic [String] topic name
+      # @param partition [Integer] partition number
+      # @param parallel_key [String] parallel group key
+      # @return [Executor] consumer executor
+      def find_or_create(topic, partition, parallel_key)
+        ktopic = find_topic(topic)
+        @buffer[ktopic][partition][parallel_key] ||= Executor.new(
+          @subscription_group.id,
+          @client,
+          ktopic
+        )
+      end
+      # Revokes executors of a given topic partition, so they won't be used anymore for incoming
+      # messages
+      #
+      # @param topic [String] topic name
+      # @param partition [Integer] partition number
+      def revoke(topic, partition)
+        ktopic = find_topic(topic)
+        @buffer[ktopic][partition].clear
+      end
+      # Finds all the executors available for a given topic partition
+      #
+      # @param topic [String] topic name
+      # @param partition [Integer] partition number
+      # @return [Array<Executor>] executors in use for this topic + partition
+      def find_all(topic, partition)
+        ktopic = find_topic(topic)
+        @buffer[ktopic][partition].values
+      end
+      # Iterates over all available executors and yields them together with topic and partition
+      # info
+      # @yieldparam [Routing::Topic] karafka routing topic object
+      # @yieldparam [Integer] partition number
+      # @yieldparam [Executor] given executor
+      def each
+        @buffer.each do |ktopic, partitions|
+          partitions.each do |partition, executors|
+            executors.each do |_parallel_key, executor|
+              # We skip the parallel key here as it does not serve any value when iterating
+              yield(ktopic, partition, executor)
+            end
+          end
+        end
+      end
+      # Clears the executors buffer. Useful for critical errors recovery.
+      def clear
+        @buffer.clear
+      end
+      private
+      # Finds topic based on its name
+      #
+      # @param topic [String] topic we're looking for
+      # @return [Karafka::Routing::Topic] topic we're interested in
+      def find_topic(topic)
+        @subscription_group.topics.find(topic) || raise(Errors::TopicNotFoundError, topic)
+      end
+    end
+  end
+end

data/lib/karafka/processing/jobs/base.rb ADDED Viewed

@@ -0,0 +1,51 @@
+# frozen_string_literal: true
+module Karafka
+  module Processing
+    # Namespace for all the jobs that are suppose to run in workers.
+    module Jobs
+      # Base class for all the jobs types that are suppose to run in workers threads.
+      # Each job can have 3 main entry-points: `#before_call`, `#call` and `#after_call`
+      # Only `#call` is required.
+      class Base
+        extend Forwardable
+        # @note Since one job has always one executer, we use the jobs id and group id as reference
+        def_delegators :executor, :id, :group_id
+        attr_reader :executor
+        # Creates a new job instance
+        def initialize
+          # All jobs are blocking by default and they can release the lock when blocking operations
+          # are done (if needed)
+          @non_blocking = false
+        end
+        # When redefined can run any code that should run before executing the proper code
+        def before_call; end
+        # The main entry-point of a job
+        def call
+          raise NotImplementedError, 'Please implement in a subclass'
+        end
+        # When redefined can run any code that should run after executing the proper code
+        def after_call; end
+        # @return [Boolean] is this a non-blocking job
+        #
+        # @note Blocking job is a job, that will cause the job queue to wait until it is finished
+        #   before removing the lock on new jobs being added
+        #
+        # @note All the jobs are blocking by default
+        #
+        # @note Job **needs** to mark itself as non-blocking only **after** it is done with all
+        #   the blocking things (pausing partition, etc).
+        def non_blocking?
+          @non_blocking
+        end
+      end
+    end
+  end
+end

data/lib/karafka/processing/jobs/consume.rb ADDED Viewed

@@ -0,0 +1,42 @@
+# frozen_string_literal: true
+module Karafka
+  module Processing
+    module Jobs
+      # The main job type. It runs the executor that triggers given topic partition messages
+      # processing in an underlying consumer instance.
+      class Consume < Base
+        # @return [Array<Rdkafka::Consumer::Message>] array with messages
+        attr_reader :messages
+        # @param executor [Karafka::Processing::Executor] executor that is suppose to run a given
+        #   job
+        # @param messages [Karafka::Messages::Messages] karafka messages batch
+        # @param coordinator [Karafka::Processing::Coordinator] processing coordinator
+        # @return [Consume]
+        def initialize(executor, messages, coordinator)
+          @executor = executor
+          @messages = messages
+          @coordinator = coordinator
+          @created_at = Time.now
+          super()
+        end
+        # Runs the before consumption preparations on the executor
+        def before_call
+          executor.before_consume(@messages, @created_at, @coordinator)
+        end
+        # Runs the given executor
+        def call
+          executor.consume
+        end
+        # Runs any error handling and other post-consumption stuff on the executor
+        def after_call
+          executor.after_consume
+        end
+      end
+    end
+  end
+end

data/lib/karafka/processing/jobs/revoked.rb ADDED Viewed

@@ -0,0 +1,22 @@
+# frozen_string_literal: true
+module Karafka
+  module Processing
+    module Jobs
+      # Job that runs the revoked operation when we loose a partition on a consumer that lost it.
+      class Revoked < Base
+        # @param executor [Karafka::Processing::Executor] executor that is suppose to run the job
+        # @return [Revoked]
+        def initialize(executor)
+          @executor = executor
+          super()
+        end
+        # Runs the revoking job via an executor.
+        def call
+          executor.revoked
+        end
+      end
+    end
+  end
+end

data/lib/karafka/processing/jobs/shutdown.rb ADDED Viewed

@@ -0,0 +1,23 @@
+# frozen_string_literal: true
+module Karafka
+  module Processing
+    module Jobs
+      # Job that runs on each active consumer upon process shutdown (one job per consumer).
+      class Shutdown < Base
+        # @param executor [Karafka::Processing::Executor] executor that is suppose to run a given
+        #   job on an active consumer
+        # @return [Shutdown]
+        def initialize(executor)
+          @executor = executor
+          super()
+        end
+        # Runs the shutdown job via an executor.
+        def call
+          executor.shutdown
+        end
+      end
+    end
+  end
+end

data/lib/karafka/processing/jobs_builder.rb ADDED Viewed

@@ -0,0 +1,29 @@
+# frozen_string_literal: true
+module Karafka
+  module Processing
+    # Class responsible for deciding what type of job should we build to run a given command and
+    # for building a proper job for it.
+    class JobsBuilder
+      # @param executor [Karafka::Processing::Executor]
+      # @param messages [Karafka::Messages::Messages] messages batch to be consumed
+      # @param coordinator [Karafka::Processing::Coordinator]
+      # @return [Karafka::Processing::Jobs::Consume] consumption job
+      def consume(executor, messages, coordinator)
+        Jobs::Consume.new(executor, messages, coordinator)
+      end
+      # @param executor [Karafka::Processing::Executor]
+      # @return [Karafka::Processing::Jobs::Revoked] revocation job
+      def revoked(executor)
+        Jobs::Revoked.new(executor)
+      end
+      # @param executor [Karafka::Processing::Executor]
+      # @return [Karafka::Processing::Jobs::Shutdown] shutdown job
+      def shutdown(executor)
+        Jobs::Shutdown.new(executor)
+      end
+    end
+  end
+end

data/lib/karafka/processing/jobs_queue.rb ADDED Viewed

@@ -0,0 +1,144 @@
+# frozen_string_literal: true
+module Karafka
+  module Processing
+    # This is the key work component for Karafka jobs distribution. It provides API for running
+    # jobs in parallel while operating within more than one subscription group.
+    #
+    # We need to take into consideration fact, that more than one subscription group can operate
+    # on this queue, that's why internally we keep track of processing per group.
+    #
+    # We work with the assumption, that partitions data is evenly distributed.
+    class JobsQueue
+      # @return [Karafka::Processing::JobsQueue]
+      def initialize
+        @queue = Queue.new
+        # Those queues will act as a semaphores internally. Since we need an indicator for waiting
+        # we could use Thread.pass but this is expensive. Instead we can just lock until any
+        # of the workers finishes their work and we can re-check. This means that in the worse
+        # scenario, we will context switch 10 times per poll instead of getting this thread
+        # scheduled by Ruby hundreds of thousands of times per group.
+        # We cannot use a single semaphore as it could potentially block in listeners that should
+        # process with their data and also could unlock when a given group needs to remain locked
+        @semaphores = Hash.new { |h, k| h[k] = Queue.new }
+        @in_processing = Hash.new { |h, k| h[k] = [] }
+        @mutex = Mutex.new
+      end
+      # Returns number of jobs that are either enqueued or in processing (but not finished)
+      # @return [Integer] number of elements in the queue
+      # @note Using `#pop` won't decrease this number as only marking job as completed does this
+      def size
+        @in_processing.values.map(&:size).sum
+      end
+      # Adds the job to the internal main queue, scheduling it for execution in a worker and marks
+      # this job as in processing pipeline.
+      #
+      # @param job [Jobs::Base] job that we want to run
+      def <<(job)
+        # We do not push the job if the queue is closed as it means that it would anyhow not be
+        # executed
+        return if @queue.closed?
+        @mutex.synchronize do
+          group = @in_processing[job.group_id]
+          raise(Errors::JobsQueueSynchronizationError, job.group_id) if group.include?(job)
+          group << job
+        end
+        @queue << job
+      end
+      # @return [Jobs::Base, nil] waits for a job from the main queue and returns it once available
+      #   or returns nil if the queue has been stopped and there won't be anything more to process
+      #   ever.
+      # @note This command is blocking and will wait until any job is available on the main queue
+      def pop
+        @queue.pop
+      end
+      # Causes the wait lock to re-check the lock conditions and potential unlock.
+      # @param group_id [String] id of the group we want to unlock for one tick
+      # @note This does not release the wait lock. It just causes a conditions recheck
+      def tick(group_id)
+        @semaphores[group_id] << true
+      end
+      # Marks a given job from a given group as completed. When there are no more jobs from a given
+      # group to be executed, we won't wait.
+      #
+      # @param [Jobs::Base] job that was completed
+      def complete(job)
+        @mutex.synchronize do
+          @in_processing[job.group_id].delete(job)
+          tick(job.group_id)
+        end
+      end
+      # Clears the processing states for a provided group. Useful when a recovery happens and we
+      # need to clean up state but only for a given subscription group.
+      #
+      # @param group_id [String]
+      def clear(group_id)
+        @mutex.synchronize do
+          @in_processing[group_id].clear
+          # We unlock it just in case it was blocked when clearing started
+          tick(group_id)
+        end
+      end
+      # Stops the whole processing queue.
+      def close
+        @mutex.synchronize do
+          return if @queue.closed?
+          @queue.close
+          @semaphores.values.each(&:close)
+        end
+      end
+      # @param group_id [String]
+      #
+      # @return [Boolean] tell us if we have anything in the processing (or for processing) from
+      # a given group.
+      def empty?(group_id)
+        @in_processing[group_id].empty?
+      end
+      # Blocks when there are things in the queue in a given group and waits until all the blocking
+      #   jobs from a given group are completed
+      #
+      # @param group_id [String] id of the group in which jobs we're interested.
+      # @note This method is blocking.
+      def wait(group_id)
+        # Go doing other things while we cannot process and wait for anyone to finish their work
+        # and re-check the wait status
+        @semaphores[group_id].pop while wait?(group_id)
+      end
+      # - `processing` - number of jobs that are currently being processed (active work)
+      # - `enqueued` - number of jobs in the queue that are waiting to be picked up by a worker
+      #
+      # @return [Hash] hash with basic usage statistics of this queue.
+      def statistics
+        {
+          processing: size - @queue.size,
+          enqueued: @queue.size
+        }.freeze
+      end
+      private
+      # @param group_id [String] id of the group in which jobs we're interested.
+      # @return [Boolean] should we keep waiting or not
+      # @note We do not wait for non-blocking jobs. Their flow should allow for `poll` running
+      #   as they may exceed `max.poll.interval`
+      def wait?(group_id)
+        !@in_processing[group_id].all?(&:non_blocking?)
+      end
+    end
+  end
+end

data/lib/karafka/processing/partitioner.rb ADDED Viewed

@@ -0,0 +1,22 @@
+# frozen_string_literal: true
+module Karafka
+  module Processing
+    # Basic partitioner for work division
+    # It does not divide any work.
+    class Partitioner
+      # @param subscription_group [Karafka::Routing::SubscriptionGroup] subscription group
+      def initialize(subscription_group)
+        @subscription_group = subscription_group
+      end
+      # @param _topic [String] topic name
+      # @param messages [Array<Karafka::Messages::Message>] karafka messages
+      # @yieldparam [Integer] group id
+      # @yieldparam [Array<Karafka::Messages::Message>] karafka messages
+      def call(_topic, messages)
+        yield(0, messages)
+      end
+    end
+  end
+end

data/lib/karafka/processing/result.rb ADDED Viewed

@@ -0,0 +1,29 @@
+# frozen_string_literal: true
+module Karafka
+  module Processing
+    # A simple object that allows us to keep track of processing state.
+    # It allows to indicate if given thing moved from success to a failure or the other way around
+    # Useful for tracking consumption state
+    class Result
+      def initialize
+        @success = true
+      end
+      # @return [Boolean]
+      def success?
+        @success
+      end
+      # Marks state as successful
+      def success!
+        @success = true
+      end
+      # Marks state as failure
+      def failure!
+        @success = false
+      end
+    end
+  end
+end

data/lib/karafka/processing/scheduler.rb ADDED Viewed

@@ -0,0 +1,22 @@
+# frozen_string_literal: true
+module Karafka
+  module Processing
+    # FIFO scheduler for messages coming from various topics and partitions
+    class Scheduler
+      # Schedules jobs in the fifo order
+      #
+      # @param queue [Karafka::Processing::JobsQueue] queue where we want to put the jobs
+      # @param jobs_array [Array<Karafka::Processing::Jobs::Base>] jobs we want to schedule
+      def schedule_consumption(queue, jobs_array)
+        jobs_array.each do |job|
+          queue << job
+        end
+      end
+      # Both revocation and shutdown jobs can also run in fifo by default
+      alias schedule_revocation schedule_consumption
+      alias schedule_shutdown schedule_consumption
+    end
+  end
+end