RubyGems - karafka - Versions diffs - 1.4.0 → 2.0.10 - Mend

karafka 1.4.0 → 2.0.10

Files changed (172) hide show

checksums.yaml +4 -4
checksums.yaml.gz.sig +0 -0
data/.github/workflows/ci.yml +89 -18
data/.ruby-version +1 -1
data/CHANGELOG.md +365 -1
data/CONTRIBUTING.md +10 -19
data/Gemfile +6 -0
data/Gemfile.lock +56 -112
data/LICENSE +17 -0
data/LICENSE-COMM +89 -0
data/LICENSE-LGPL +165 -0
data/README.md +61 -68
data/bin/benchmarks +85 -0
data/bin/create_token +22 -0
data/bin/integrations +272 -0
data/bin/karafka +10 -0
data/bin/scenario +29 -0
data/bin/stress_many +13 -0
data/bin/stress_one +13 -0
data/certs/cert_chain.pem +26 -0
data/certs/karafka-pro.pem +11 -0
data/config/errors.yml +59 -38
data/docker-compose.yml +10 -3
data/karafka.gemspec +18 -21
data/lib/active_job/karafka.rb +21 -0
data/lib/active_job/queue_adapters/karafka_adapter.rb +26 -0
data/lib/karafka/active_job/consumer.rb +26 -0
data/lib/karafka/active_job/dispatcher.rb +38 -0
data/lib/karafka/active_job/job_extensions.rb +34 -0
data/lib/karafka/active_job/job_options_contract.rb +21 -0
data/lib/karafka/active_job/routing/extensions.rb +33 -0
data/lib/karafka/admin.rb +63 -0
data/lib/karafka/app.rb +15 -20
data/lib/karafka/base_consumer.rb +197 -31
data/lib/karafka/cli/info.rb +44 -10
data/lib/karafka/cli/install.rb +22 -12
data/lib/karafka/cli/server.rb +17 -42
data/lib/karafka/cli.rb +4 -3
data/lib/karafka/connection/client.rb +379 -89
data/lib/karafka/connection/listener.rb +250 -38
data/lib/karafka/connection/listeners_batch.rb +24 -0
data/lib/karafka/connection/messages_buffer.rb +84 -0
data/lib/karafka/connection/pauses_manager.rb +46 -0
data/lib/karafka/connection/raw_messages_buffer.rb +101 -0
data/lib/karafka/connection/rebalance_manager.rb +78 -0
data/lib/karafka/contracts/base.rb +17 -0
data/lib/karafka/contracts/config.rb +88 -11
data/lib/karafka/contracts/consumer_group.rb +21 -184
data/lib/karafka/contracts/consumer_group_topic.rb +35 -11
data/lib/karafka/contracts/server_cli_options.rb +19 -18
data/lib/karafka/contracts.rb +1 -1
data/lib/karafka/env.rb +46 -0
data/lib/karafka/errors.rb +21 -21
data/lib/karafka/helpers/async.rb +33 -0
data/lib/karafka/helpers/colorize.rb +20 -0
data/lib/karafka/helpers/multi_delegator.rb +2 -2
data/lib/karafka/instrumentation/callbacks/error.rb +40 -0
data/lib/karafka/instrumentation/callbacks/statistics.rb +41 -0
data/lib/karafka/instrumentation/logger.rb +6 -10
data/lib/karafka/instrumentation/logger_listener.rb +174 -0
data/lib/karafka/instrumentation/monitor.rb +13 -61
data/lib/karafka/instrumentation/notifications.rb +53 -0
data/lib/karafka/instrumentation/proctitle_listener.rb +3 -3
data/lib/karafka/instrumentation/vendors/datadog/dashboard.json +1 -0
data/lib/karafka/instrumentation/vendors/datadog/listener.rb +232 -0
data/lib/karafka/instrumentation.rb +21 -0
data/lib/karafka/licenser.rb +75 -0
data/lib/karafka/messages/batch_metadata.rb +45 -0
data/lib/karafka/messages/builders/batch_metadata.rb +39 -0
data/lib/karafka/messages/builders/message.rb +39 -0
data/lib/karafka/messages/builders/messages.rb +34 -0
data/lib/karafka/{params/params.rb → messages/message.rb} +7 -12
data/lib/karafka/messages/messages.rb +64 -0
data/lib/karafka/{params → messages}/metadata.rb +4 -6
data/lib/karafka/messages/seek.rb +9 -0
data/lib/karafka/patches/rdkafka/consumer.rb +22 -0
data/lib/karafka/pro/active_job/consumer.rb +46 -0
data/lib/karafka/pro/active_job/dispatcher.rb +61 -0
data/lib/karafka/pro/active_job/job_options_contract.rb +32 -0
data/lib/karafka/pro/base_consumer.rb +107 -0
data/lib/karafka/pro/contracts/base.rb +21 -0
data/lib/karafka/pro/contracts/consumer_group.rb +34 -0
data/lib/karafka/pro/contracts/consumer_group_topic.rb +69 -0
data/lib/karafka/pro/loader.rb +76 -0
data/lib/karafka/pro/performance_tracker.rb +80 -0
data/lib/karafka/pro/processing/coordinator.rb +85 -0
data/lib/karafka/pro/processing/jobs/consume_non_blocking.rb +38 -0
data/lib/karafka/pro/processing/jobs_builder.rb +32 -0
data/lib/karafka/pro/processing/partitioner.rb +58 -0
data/lib/karafka/pro/processing/scheduler.rb +56 -0
data/lib/karafka/pro/routing/builder_extensions.rb +30 -0
data/lib/karafka/pro/routing/topic_extensions.rb +74 -0
data/lib/karafka/pro.rb +13 -0
data/lib/karafka/process.rb +1 -0
data/lib/karafka/processing/coordinator.rb +103 -0
data/lib/karafka/processing/coordinators_buffer.rb +54 -0
data/lib/karafka/processing/executor.rb +126 -0
data/lib/karafka/processing/executors_buffer.rb +88 -0
data/lib/karafka/processing/jobs/base.rb +55 -0
data/lib/karafka/processing/jobs/consume.rb +47 -0
data/lib/karafka/processing/jobs/revoked.rb +22 -0
data/lib/karafka/processing/jobs/shutdown.rb +23 -0
data/lib/karafka/processing/jobs_builder.rb +29 -0
data/lib/karafka/processing/jobs_queue.rb +144 -0
data/lib/karafka/processing/partitioner.rb +22 -0
data/lib/karafka/processing/result.rb +37 -0
data/lib/karafka/processing/scheduler.rb +22 -0
data/lib/karafka/processing/worker.rb +91 -0
data/lib/karafka/processing/workers_batch.rb +27 -0
data/lib/karafka/railtie.rb +127 -0
data/lib/karafka/routing/builder.rb +26 -23
data/lib/karafka/routing/consumer_group.rb +37 -17
data/lib/karafka/routing/consumer_mapper.rb +1 -2
data/lib/karafka/routing/proxy.rb +9 -16
data/lib/karafka/routing/router.rb +1 -1
data/lib/karafka/routing/subscription_group.rb +53 -0
data/lib/karafka/routing/subscription_groups_builder.rb +54 -0
data/lib/karafka/routing/topic.rb +65 -24
data/lib/karafka/routing/topics.rb +38 -0
data/lib/karafka/runner.rb +51 -0
data/lib/karafka/serialization/json/deserializer.rb +6 -15
data/lib/karafka/server.rb +67 -26
data/lib/karafka/setup/config.rb +153 -175
data/lib/karafka/status.rb +14 -5
data/lib/karafka/templates/example_consumer.rb.erb +16 -0
data/lib/karafka/templates/karafka.rb.erb +17 -55
data/lib/karafka/time_trackers/base.rb +19 -0
data/lib/karafka/time_trackers/pause.rb +92 -0
data/lib/karafka/time_trackers/poll.rb +65 -0
data/lib/karafka/version.rb +1 -1
data/lib/karafka.rb +46 -16
data.tar.gz.sig +0 -0
metadata +145 -171
metadata.gz.sig +0 -0
data/.github/FUNDING.yml +0 -3
data/MIT-LICENCE +0 -18
data/certs/mensfeld.pem +0 -25
data/lib/karafka/attributes_map.rb +0 -62
data/lib/karafka/backends/inline.rb +0 -16
data/lib/karafka/base_responder.rb +0 -226
data/lib/karafka/cli/flow.rb +0 -48
data/lib/karafka/code_reloader.rb +0 -67
data/lib/karafka/connection/api_adapter.rb +0 -161
data/lib/karafka/connection/batch_delegator.rb +0 -55
data/lib/karafka/connection/builder.rb +0 -18
data/lib/karafka/connection/message_delegator.rb +0 -36
data/lib/karafka/consumers/batch_metadata.rb +0 -10
data/lib/karafka/consumers/callbacks.rb +0 -71
data/lib/karafka/consumers/includer.rb +0 -64
data/lib/karafka/consumers/responders.rb +0 -24
data/lib/karafka/consumers/single_params.rb +0 -15
data/lib/karafka/contracts/responder_usage.rb +0 -54
data/lib/karafka/fetcher.rb +0 -42
data/lib/karafka/helpers/class_matcher.rb +0 -88
data/lib/karafka/helpers/config_retriever.rb +0 -46
data/lib/karafka/helpers/inflector.rb +0 -26
data/lib/karafka/instrumentation/stdout_listener.rb +0 -140
data/lib/karafka/params/batch_metadata.rb +0 -26
data/lib/karafka/params/builders/batch_metadata.rb +0 -30
data/lib/karafka/params/builders/params.rb +0 -38
data/lib/karafka/params/builders/params_batch.rb +0 -25
data/lib/karafka/params/params_batch.rb +0 -60
data/lib/karafka/patches/ruby_kafka.rb +0 -47
data/lib/karafka/persistence/client.rb +0 -29
data/lib/karafka/persistence/consumers.rb +0 -45
data/lib/karafka/persistence/topics.rb +0 -48
data/lib/karafka/responders/builder.rb +0 -36
data/lib/karafka/responders/topic.rb +0 -55
data/lib/karafka/routing/topic_mapper.rb +0 -53
data/lib/karafka/serialization/json/serializer.rb +0 -31
data/lib/karafka/setup/configurators/water_drop.rb +0 -36
data/lib/karafka/templates/application_responder.rb.erb +0 -11

data/lib/karafka/pro/processing/partitioner.rb ADDED Viewed

@@ -0,0 +1,58 @@
+# frozen_string_literal: true
+# This Karafka component is a Pro component.
+# All of the commercial components are present in the lib/karafka/pro directory of this
+# repository and their usage requires commercial license agreement.
+#
+# Karafka has also commercial-friendly license, commercial support and commercial components.
+#
+# By sending a pull request to the pro components, you are agreeing to transfer the copyright of
+# your code to Maciej Mensfeld.
+module Karafka
+  module Pro
+    module Processing
+      # Pro partitioner that can distribute work based on the virtual partitioner settings
+      class Partitioner < ::Karafka::Processing::Partitioner
+        # @param topic [String] topic name
+        # @param messages [Array<Karafka::Messages::Message>] karafka messages
+        # @yieldparam [Integer] group id
+        # @yieldparam [Array<Karafka::Messages::Message>] karafka messages
+        def call(topic, messages)
+          ktopic = @subscription_group.topics.find(topic)
+          # We only partition work if we have a virtual partitioner and more than one thread to
+          # process the data. With one thread it is not worth partitioning the work as the work
+          # itself will be assigned to one thread (pointless work)
+          if ktopic.virtual_partitions? && ktopic.virtual_partitions.max_partitions > 1
+            # We need to reduce it to number of threads, so the group_id is not a direct effect
+            # of the end user action. Otherwise the persistence layer for consumers would cache
+            # it forever and it would cause memory leaks
+            groupings = messages
+                        .group_by { |msg| ktopic.virtual_partitions.partitioner.call(msg) }
+                        .values
+            # Reduce the number of virtual partitions to a size that matches the max_partitions
+            # As mentioned above we cannot use the partitioning keys directly as it could cause
+            # memory leaks
+            #
+            # The algorithm here is simple, we assume that the most costly in terms of processing,
+            # will be processing of the biggest group and we reduce the smallest once to have
+            # max of groups equal to max_partitions
+            while groupings.size > ktopic.virtual_partitions.max_partitions
+              groupings.sort_by! { |grouping| -grouping.size }
+              # Offset order needs to be maintained for virtual partitions
+              groupings << (groupings.pop + groupings.pop).sort_by!(&:offset)
+            end
+            groupings.each_with_index { |messages_group, index| yield(index, messages_group) }
+          else
+            # When no virtual partitioner, works as regular one
+            yield(0, messages)
+          end
+        end
+      end
+    end
+  end
+end

data/lib/karafka/pro/processing/scheduler.rb ADDED Viewed

@@ -0,0 +1,56 @@
+# frozen_string_literal: true
+# This Karafka component is a Pro component.
+# All of the commercial components are present in the lib/karafka/pro directory of this
+# repository and their usage requires commercial license agreement.
+#
+# Karafka has also commercial-friendly license, commercial support and commercial components.
+#
+# By sending a pull request to the pro components, you are agreeing to transfer the copyright of
+# your code to Maciej Mensfeld.
+module Karafka
+  module Pro
+    module Processing
+      # Optimizes scheduler that takes into consideration of execution time needed to process
+      # messages from given topics partitions. It uses the non-preemptive LJF algorithm
+      #
+      # This scheduler is designed to optimize execution times on jobs that perform IO operations
+      # as when taking IO into consideration, the can achieve optimized parallel processing.
+      #
+      # This scheduler can also work with virtual partitions.
+      #
+      # Aside from consumption jobs, other jobs do not run often, thus we can leave them with
+      # default FIFO scheduler from the default Karafka scheduler
+      class Scheduler < ::Karafka::Processing::Scheduler
+        # Schedules jobs in the LJF order for consumption
+        #
+        # @param queue [Karafka::Processing::JobsQueue] queue where we want to put the jobs
+        # @param jobs_array [Array<Karafka::Processing::Jobs::Base>] jobs we want to schedule
+        #
+        def schedule_consumption(queue, jobs_array)
+          pt = PerformanceTracker.instance
+          ordered = []
+          jobs_array.each do |job|
+            messages = job.messages
+            message = messages.first
+            cost = pt.processing_time_p95(message.topic, message.partition) * messages.size
+            ordered << [job, cost]
+          end
+          ordered.sort_by!(&:last)
+          ordered.reverse!
+          ordered.map!(&:first)
+          ordered.each do |job|
+            queue << job
+          end
+        end
+      end
+    end
+  end
+end

data/lib/karafka/pro/routing/builder_extensions.rb ADDED Viewed

@@ -0,0 +1,30 @@
+# frozen_string_literal: true
+# This Karafka component is a Pro component.
+# All of the commercial components are present in the lib/karafka/pro directory of this
+# repository and their usage requires commercial license agreement.
+#
+# Karafka has also commercial-friendly license, commercial support and commercial components.
+#
+# By sending a pull request to the pro components, you are agreeing to transfer the copyright of
+# your code to Maciej Mensfeld.
+module Karafka
+  module Pro
+    # Pro routing components
+    module Routing
+      # Routing extensions for builder to be able to validate Pro components correct usage
+      module BuilderExtensions
+        # Validate consumer groups with pro contracts
+        # @param block [Proc] routing defining block
+        def draw(&block)
+          super
+          each do |consumer_group|
+            ::Karafka::Pro::Contracts::ConsumerGroup.new.validate!(consumer_group.to_h)
+          end
+        end
+      end
+    end
+  end
+end

data/lib/karafka/pro/routing/topic_extensions.rb ADDED Viewed

@@ -0,0 +1,74 @@
+# frozen_string_literal: true
+# This Karafka component is a Pro component.
+# All of the commercial components are present in the lib/karafka/pro directory of this
+# repository and their usage requires commercial license agreement.
+#
+# Karafka has also commercial-friendly license, commercial support and commercial components.
+#
+# By sending a pull request to the pro components, you are agreeing to transfer the copyright of
+# your code to Maciej Mensfeld.
+module Karafka
+  module Pro
+    # Pro routing components
+    module Routing
+      # Routing extensions that allow to configure some extra PRO routing options
+      module TopicExtensions
+        # Internal representation of the virtual partitions settings and configuration
+        # This allows us to abstract away things in a nice manner
+        #
+        # For features with more options than just on/off we use this approach as it simplifies
+        # the code. We do not use it for all not to create unneeded complexity
+        VirtualPartitions = Struct.new(
+          :active,
+          :partitioner,
+          :max_partitions,
+          keyword_init: true
+        ) { alias_method :active?, :active }
+        class << self
+          # @param base [Class] class we extend
+          def prepended(base)
+            base.attr_accessor :long_running_job
+          end
+        end
+        # @param max_partitions [Integer] max number of virtual partitions that can come out of the
+        #   single distribution flow. When set to more than the Karafka threading, will create
+        #   more work than workers. When less, can ensure we have spare resources to process other
+        #   things in parallel.
+        # @param partitioner [nil, #call] nil or callable partitioner
+        # @return [VirtualPartitions] method that allows to set the virtual partitions details
+        #   during the routing configuration and then allows to retrieve it
+        def virtual_partitions(
+          max_partitions: Karafka::App.config.concurrency,
+          partitioner: nil
+        )
+          @virtual_partitions ||= VirtualPartitions.new(
+            active: !partitioner.nil?,
+            max_partitions: max_partitions,
+            partitioner: partitioner
+          )
+        end
+        # @return [Boolean] are virtual partitions enabled for given topic
+        def virtual_partitions?
+          virtual_partitions.active?
+        end
+        # @return [Boolean] is a given job on a topic a long-running one
+        def long_running_job?
+          @long_running_job || false
+        end
+        # @return [Hash] hash with topic details and the extensions details
+        def to_h
+          super.merge(
+            virtual_partitions: virtual_partitions.to_h
+          )
+        end
+      end
+    end
+  end
+end

data/lib/karafka/pro.rb ADDED Viewed

@@ -0,0 +1,13 @@
+# frozen_string_literal: true
+# This Karafka component is a Pro component.
+# All of the commercial components are present in the lib/karafka/pro directory of this repository
+# and their usage requires commercial license agreement.
+#
+# Karafka has also commercial-friendly license, commercial support and commercial components.
+#
+module Karafka
+  # Namespace for pro components, licensed under the commercial license agreement.
+  module Pro
+  end
+end

data/lib/karafka/process.rb CHANGED Viewed

@@ -9,6 +9,7 @@ module Karafka
       SIGINT
       SIGQUIT
       SIGTERM
+      SIGTTIN
     ].freeze
     HANDLED_SIGNALS.each do |signal|

data/lib/karafka/processing/coordinator.rb ADDED Viewed

@@ -0,0 +1,103 @@
+# frozen_string_literal: true
+module Karafka
+  module Processing
+    # Basic coordinator that allows us to provide coordination objects into consumers.
+    #
+    # This is a wrapping layer to simplify management of work to be handled around consumption.
+    #
+    # @note This coordinator needs to be thread safe. Some operations are performed only in the
+    #   listener thread, but we go with thread-safe by default for all not to worry about potential
+    #   future mistakes.
+    class Coordinator
+      # @return [Karafka::TimeTrackers::Pause]
+      attr_reader :pause_tracker
+      attr_reader :seek_offset
+      # @param pause_tracker [Karafka::TimeTrackers::Pause] pause tracker for given topic partition
+      def initialize(pause_tracker)
+        @pause_tracker = pause_tracker
+        @revoked = false
+        @consumptions = {}
+        @running_jobs = 0
+        @mutex = Mutex.new
+      end
+      # Starts the coordinator for given consumption jobs
+      # @param messages [Array<Karafka::Messages::Message>] batch of message for which we are
+      #   going to coordinate work. Not used with regular coordinator.
+      def start(messages)
+        @mutex.synchronize do
+          @running_jobs = 0
+          # We need to clear the consumption results hash here, otherwise we could end up storing
+          # consumption results of consumer instances we no longer control
+          @consumptions.clear
+          # We set it on the first encounter and never again, because then the offset setting
+          # should be up to the consumers logic (our or the end user)
+          # Seek offset needs to be always initialized as for case where manual offset management
+          # is turned on, we need to have reference to the first offset even in case of running
+          # multiple batches without marking any messages as consumed. Rollback needs to happen to
+          # the last place we know of or the last message + 1 that was marked
+          @seek_offset ||= messages.first.offset
+        end
+      end
+      # @param offset [Integer] message offset
+      def seek_offset=(offset)
+        @mutex.synchronize { @seek_offset = offset }
+      end
+      # Increases number of jobs that we handle with this coordinator
+      def increment
+        @mutex.synchronize { @running_jobs += 1 }
+      end
+      # Decrements number of jobs we handle at the moment
+      def decrement
+        @mutex.synchronize do
+          @running_jobs -= 1
+          return @running_jobs unless @running_jobs.negative?
+          # This should never happen. If it does, something is heavily out of sync. Please reach
+          # out to us if you encounter this
+          raise Karafka::Errors::InvalidCoordinatorState, 'Was zero before decrementation'
+        end
+      end
+      # @param consumer [Object] karafka consumer (normal or pro)
+      # @return [Karafka::Processing::Result] result object which we can use to indicate
+      #   consumption processing state.
+      def consumption(consumer)
+        @mutex.synchronize do
+          @consumptions[consumer] ||= Processing::Result.new
+        end
+      end
+      # Is all the consumption done and finished successfully for this coordinator
+      def success?
+        @mutex.synchronize { @running_jobs.zero? && @consumptions.values.all?(&:success?) }
+      end
+      # Marks given coordinator for processing group as revoked
+      #
+      # This is invoked in two places:
+      #   - from the main listener loop when we detect revoked partitions
+      #   - from the consumer in case checkpointing fails
+      #
+      # This means, we can end up having consumer being aware that it was revoked prior to the
+      # listener loop dispatching the revocation job. It is ok, as effectively nothing will be
+      # processed until revocation jobs are done.
+      def revoke
+        @mutex.synchronize { @revoked = true }
+      end
+      # @return [Boolean] is the partition we are processing revoked or not
+      def revoked?
+        @revoked
+      end
+    end
+  end
+end

data/lib/karafka/processing/coordinators_buffer.rb ADDED Viewed

@@ -0,0 +1,54 @@
+# frozen_string_literal: true
+module Karafka
+  module Processing
+    # Coordinators builder used to build coordinators per topic partition
+    #
+    # It provides direct pauses access for revocation
+    #
+    # @note This buffer operates only from the listener loop, thus we do not have to make it
+    #   thread-safe.
+    class CoordinatorsBuffer
+      def initialize
+        @pauses_manager = Connection::PausesManager.new
+        @coordinator_class = ::Karafka::App.config.internal.processing.coordinator_class
+        @coordinators = Hash.new { |h, k| h[k] = {} }
+      end
+      # @param topic [String] topic name
+      # @param partition [Integer] partition number
+      def find_or_create(topic, partition)
+        @coordinators[topic][partition] ||= @coordinator_class.new(
+          @pauses_manager.fetch(topic, partition)
+        )
+      end
+      # Resumes processing of partitions for which pause time has ended.
+      # @param block we want to run for resumed topic partitions
+      # @yieldparam [String] topic name
+      # @yieldparam [Integer] partition number
+      def resume(&block)
+        @pauses_manager.resume(&block)
+      end
+      # @param topic [String] topic name
+      # @param partition [Integer] partition number
+      def revoke(topic, partition)
+        return unless @coordinators[topic].key?(partition)
+        # The fact that we delete here does not change the fact that the executor still holds the
+        # reference to this coordinator. We delete it here, as we will no longer process any
+        # new stuff with it and we may need a new coordinator if we regain this partition, but the
+        # coordinator may still be in use
+        @coordinators[topic].delete(partition).revoke
+      end
+      # Clears coordinators and re-created the pauses manager
+      # This should be used only for critical errors recovery
+      def reset
+        @pauses_manager = Connection::PausesManager.new
+        @coordinators.clear
+      end
+    end
+  end
+end

data/lib/karafka/processing/executor.rb ADDED Viewed

@@ -0,0 +1,126 @@
+# frozen_string_literal: true
+module Karafka
+  # Namespace that encapsulates all the logic related to processing data.
+  module Processing
+    # Executors:
+    # - run consumers code (for `#call`) or run given preparation / teardown operations when needed
+    #   from separate threads.
+    # - they re-create consumer instances in case of partitions that were revoked and assigned
+    #   back.
+    #
+    # @note Executors are not removed after partition is revoked. They are not that big and will
+    #   be re-used in case of a re-claim
+    class Executor
+      # @return [String] unique id that we use to ensure, that we use for state tracking
+      attr_reader :id
+      # @return [String] subscription group id to which a given executor belongs
+      attr_reader :group_id
+      # @return [Karafka::Messages::Messages] messages batch
+      attr_reader :messages
+      # Topic accessibility may be needed for the jobs builder to be able to build a proper job
+      # based on the topic settings defined by the end user
+      #
+      # @return [Karafka::Routing::Topic] topic of this executor
+      attr_reader :topic
+      # @param group_id [String] id of the subscription group to which the executor belongs
+      # @param client [Karafka::Connection::Client] kafka client
+      # @param topic [Karafka::Routing::Topic] topic for which this executor will run
+      def initialize(group_id, client, topic)
+        @id = SecureRandom.uuid
+        @group_id = group_id
+        @client = client
+        @topic = topic
+      end
+      # Allows us to prepare the consumer in the listener thread prior to the job being send to
+      # the queue. It also allows to run some code that is time sensitive and cannot wait in the
+      # queue as it could cause starvation.
+      #
+      # @param messages [Array<Karafka::Messages::Message>]
+      # @param coordinator [Karafka::Processing::Coordinator] coordinator for processing management
+      def before_enqueue(messages, coordinator)
+        # the moment we've received the batch or actually the moment we've enqueued it,
+        # but good enough
+        @enqueued_at = Time.now
+        # Recreate consumer with each batch if persistence is not enabled
+        # We reload the consumers with each batch instead of relying on some external signals
+        # when needed for consistency. That way devs may have it on or off and not in this
+        # middle state, where re-creation of a consumer instance would occur only sometimes
+        @consumer = nil unless ::Karafka::App.config.consumer_persistence
+        consumer.coordinator = coordinator
+        # First we build messages batch...
+        consumer.messages = Messages::Builders::Messages.call(
+          messages,
+          @topic,
+          @enqueued_at
+        )
+        consumer.on_before_enqueue
+      end
+      # Runs setup and warm-up code in the worker prior to running the consumption
+      def before_consume
+        consumer.on_before_consume
+      end
+      # Runs consumer data processing against given batch and handles failures and errors.
+      def consume
+        # We run the consumer client logic...
+        consumer.on_consume
+      end
+      # Runs consumer after consumption code
+      def after_consume
+        consumer.on_after_consume
+      end
+      # Runs the controller `#revoked` method that should be triggered when a given consumer is
+      # no longer needed due to partitions reassignment.
+      #
+      # @note Clearing the consumer will ensure, that if we get the partition back, it will be
+      #   handled with a consumer with a clean state.
+      #
+      # @note We run it only when consumer was present, because presence indicates, that at least
+      #   a single message has been consumed.
+      #
+      # @note We do not reset the consumer but we indicate need for recreation instead, because
+      #   after the revocation, there still may be `#after_consume` running that needs a given
+      #   consumer instance.
+      def revoked
+        consumer.on_revoked if @consumer
+      end
+      # Runs the controller `#shutdown` method that should be triggered when a given consumer is
+      # no longer needed as we're closing the process.
+      #
+      # @note While we do not need to clear the consumer here, it's a good habit to clean after
+      #   work is done.
+      def shutdown
+        # There is a case, where the consumer no longer exists because it was revoked, in case like
+        # that we do not build a new instance and shutdown should not be triggered.
+        consumer.on_shutdown if @consumer
+      end
+      private
+      # @return [Object] cached consumer instance
+      def consumer
+        @consumer ||= begin
+          consumer = @topic.consumer_class.new
+          consumer.topic = @topic
+          consumer.client = @client
+          consumer.producer = ::Karafka::App.producer
+          consumer
+        end
+      end
+    end
+  end
+end

data/lib/karafka/processing/executors_buffer.rb ADDED Viewed

@@ -0,0 +1,88 @@
+# frozen_string_literal: true
+module Karafka
+  module Processing
+    # Buffer for executors of a given subscription group. It wraps around the concept of building
+    # and caching them, so we can re-use them instead of creating new each time.
+    class ExecutorsBuffer
+      # @param client [Connection::Client]
+      # @param subscription_group [Routing::SubscriptionGroup]
+      # @return [ExecutorsBuffer]
+      def initialize(client, subscription_group)
+        @subscription_group = subscription_group
+        @client = client
+        # We need two layers here to keep track of topics, partitions and processing groups
+        @buffer = Hash.new { |h, k| h[k] = Hash.new { |h2, k2| h2[k2] = {} } }
+      end
+      # Finds or creates an executor based on the provided details
+      #
+      # @param topic [String] topic name
+      # @param partition [Integer] partition number
+      # @param parallel_key [String] parallel group key
+      # @return [Executor] consumer executor
+      def find_or_create(topic, partition, parallel_key)
+        ktopic = find_topic(topic)
+        @buffer[ktopic][partition][parallel_key] ||= Executor.new(
+          @subscription_group.id,
+          @client,
+          ktopic
+        )
+      end
+      # Revokes executors of a given topic partition, so they won't be used anymore for incoming
+      # messages
+      #
+      # @param topic [String] topic name
+      # @param partition [Integer] partition number
+      def revoke(topic, partition)
+        ktopic = find_topic(topic)
+        @buffer[ktopic][partition].clear
+      end
+      # Finds all the executors available for a given topic partition
+      #
+      # @param topic [String] topic name
+      # @param partition [Integer] partition number
+      # @return [Array<Executor>] executors in use for this topic + partition
+      def find_all(topic, partition)
+        ktopic = find_topic(topic)
+        @buffer[ktopic][partition].values
+      end
+      # Iterates over all available executors and yields them together with topic and partition
+      # info
+      # @yieldparam [Routing::Topic] karafka routing topic object
+      # @yieldparam [Integer] partition number
+      # @yieldparam [Executor] given executor
+      def each
+        @buffer.each do |ktopic, partitions|
+          partitions.each do |partition, executors|
+            executors.each do |_parallel_key, executor|
+              # We skip the parallel key here as it does not serve any value when iterating
+              yield(ktopic, partition, executor)
+            end
+          end
+        end
+      end
+      # Clears the executors buffer. Useful for critical errors recovery.
+      def clear
+        @buffer.clear
+      end
+      private
+      # Finds topic based on its name
+      #
+      # @param topic [String] topic we're looking for
+      # @return [Karafka::Routing::Topic] topic we're interested in
+      def find_topic(topic)
+        @subscription_group.topics.find(topic) || raise(Errors::TopicNotFoundError, topic)
+      end
+    end
+  end
+end

data/lib/karafka/processing/jobs/base.rb ADDED Viewed

@@ -0,0 +1,55 @@
+# frozen_string_literal: true
+module Karafka
+  module Processing
+    # Namespace for all the jobs that are supposed to run in workers.
+    module Jobs
+      # Base class for all the jobs types that are suppose to run in workers threads.
+      # Each job can have 3 main entry-points: `#before_call`, `#call` and `#after_call`
+      # Only `#call` is required.
+      class Base
+        extend Forwardable
+        # @note Since one job has always one executor, we use the jobs id and group id as reference
+        def_delegators :executor, :id, :group_id
+        attr_reader :executor
+        # Creates a new job instance
+        def initialize
+          # All jobs are blocking by default and they can release the lock when blocking operations
+          # are done (if needed)
+          @non_blocking = false
+        end
+        # When redefined can run any code prior to the job being enqueued
+        # @note This will run in the listener thread and not in the worker
+        def before_enqueue; end
+        # When redefined can run any code that should run before executing the proper code
+        def before_call; end
+        # The main entry-point of a job
+        def call
+          raise NotImplementedError, 'Please implement in a subclass'
+        end
+        # When redefined can run any code that should run after executing the proper code
+        def after_call; end
+        # @return [Boolean] is this a non-blocking job
+        #
+        # @note Blocking job is a job, that will cause the job queue to wait until it is finished
+        #   before removing the lock on new jobs being added
+        #
+        # @note All the jobs are blocking by default
+        #
+        # @note Job **needs** to mark itself as non-blocking only **after** it is done with all
+        #   the blocking things (pausing partition, etc).
+        def non_blocking?
+          @non_blocking
+        end
+      end
+    end
+  end
+end