RubyGems - karafka - Versions diffs - 2.2.13 → 2.3.0.alpha1 - Mend

karafka 2.2.13 → 2.3.0.alpha1

Files changed (125) hide show

checksums.yaml +4 -4
checksums.yaml.gz.sig +0 -0
data/.github/workflows/ci.yml +38 -12
data/.ruby-version +1 -1
data/CHANGELOG.md +161 -125
data/Gemfile.lock +12 -12
data/README.md +0 -2
data/SECURITY.md +23 -0
data/config/locales/errors.yml +7 -1
data/config/locales/pro_errors.yml +22 -0
data/docker-compose.yml +3 -1
data/karafka.gemspec +2 -2
data/lib/karafka/admin/acl.rb +287 -0
data/lib/karafka/admin.rb +118 -16
data/lib/karafka/app.rb +12 -3
data/lib/karafka/base_consumer.rb +32 -31
data/lib/karafka/cli/base.rb +1 -1
data/lib/karafka/connection/client.rb +94 -84
data/lib/karafka/connection/conductor.rb +28 -0
data/lib/karafka/connection/listener.rb +165 -46
data/lib/karafka/connection/listeners_batch.rb +5 -11
data/lib/karafka/connection/manager.rb +72 -0
data/lib/karafka/connection/messages_buffer.rb +12 -0
data/lib/karafka/connection/proxy.rb +17 -0
data/lib/karafka/connection/status.rb +75 -0
data/lib/karafka/contracts/config.rb +14 -10
data/lib/karafka/contracts/consumer_group.rb +9 -1
data/lib/karafka/contracts/topic.rb +3 -1
data/lib/karafka/errors.rb +13 -0
data/lib/karafka/instrumentation/assignments_tracker.rb +96 -0
data/lib/karafka/instrumentation/callbacks/rebalance.rb +10 -7
data/lib/karafka/instrumentation/logger_listener.rb +3 -9
data/lib/karafka/instrumentation/notifications.rb +19 -9
data/lib/karafka/instrumentation/vendors/appsignal/metrics_listener.rb +31 -28
data/lib/karafka/instrumentation/vendors/datadog/logger_listener.rb +22 -3
data/lib/karafka/instrumentation/vendors/datadog/metrics_listener.rb +15 -12
data/lib/karafka/instrumentation/vendors/kubernetes/liveness_listener.rb +39 -36
data/lib/karafka/pro/base_consumer.rb +47 -0
data/lib/karafka/pro/connection/manager.rb +300 -0
data/lib/karafka/pro/connection/multiplexing/listener.rb +40 -0
data/lib/karafka/pro/instrumentation/performance_tracker.rb +85 -0
data/lib/karafka/pro/iterator/tpl_builder.rb +1 -1
data/lib/karafka/pro/iterator.rb +1 -6
data/lib/karafka/pro/loader.rb +16 -2
data/lib/karafka/pro/processing/coordinator.rb +2 -1
data/lib/karafka/pro/processing/executor.rb +37 -0
data/lib/karafka/pro/processing/expansions_selector.rb +32 -0
data/lib/karafka/pro/processing/jobs/periodic.rb +41 -0
data/lib/karafka/pro/processing/jobs/periodic_non_blocking.rb +32 -0
data/lib/karafka/pro/processing/jobs_builder.rb +14 -3
data/lib/karafka/pro/processing/offset_metadata/consumer.rb +44 -0
data/lib/karafka/pro/processing/offset_metadata/fetcher.rb +131 -0
data/lib/karafka/pro/processing/offset_metadata/listener.rb +46 -0
data/lib/karafka/pro/processing/schedulers/base.rb +143 -0
data/lib/karafka/pro/processing/schedulers/default.rb +107 -0
data/lib/karafka/pro/processing/strategies/aj/lrj_mom_vp.rb +1 -1
data/lib/karafka/pro/processing/strategies/default.rb +136 -3
data/lib/karafka/pro/processing/strategies/dlq/default.rb +35 -0
data/lib/karafka/pro/processing/strategies/lrj/default.rb +1 -1
data/lib/karafka/pro/processing/strategies/lrj/mom.rb +1 -1
data/lib/karafka/pro/processing/strategies/vp/default.rb +60 -26
data/lib/karafka/pro/processing/virtual_offset_manager.rb +41 -11
data/lib/karafka/pro/routing/features/long_running_job/topic.rb +2 -0
data/lib/karafka/pro/routing/features/multiplexing/config.rb +38 -0
data/lib/karafka/pro/routing/features/multiplexing/contracts/topic.rb +114 -0
data/lib/karafka/pro/routing/features/multiplexing/patches/contracts/consumer_group.rb +42 -0
data/lib/karafka/pro/routing/features/multiplexing/proxy.rb +38 -0
data/lib/karafka/pro/routing/features/multiplexing/subscription_group.rb +42 -0
data/lib/karafka/pro/routing/features/multiplexing/subscription_groups_builder.rb +40 -0
data/lib/karafka/pro/routing/features/multiplexing.rb +59 -0
data/lib/karafka/pro/routing/features/non_blocking_job/topic.rb +32 -0
data/lib/karafka/pro/routing/features/non_blocking_job.rb +37 -0
data/lib/karafka/pro/routing/features/offset_metadata/config.rb +33 -0
data/lib/karafka/pro/routing/features/offset_metadata/contracts/topic.rb +42 -0
data/lib/karafka/pro/routing/features/offset_metadata/topic.rb +65 -0
data/lib/karafka/pro/routing/features/offset_metadata.rb +40 -0
data/lib/karafka/pro/routing/features/patterns/contracts/consumer_group.rb +4 -0
data/lib/karafka/pro/routing/features/patterns/detector.rb +18 -10
data/lib/karafka/pro/routing/features/periodic_job/config.rb +37 -0
data/lib/karafka/pro/routing/features/periodic_job/contracts/topic.rb +44 -0
data/lib/karafka/pro/routing/features/periodic_job/topic.rb +94 -0
data/lib/karafka/pro/routing/features/periodic_job.rb +27 -0
data/lib/karafka/pro/routing/features/virtual_partitions/config.rb +1 -0
data/lib/karafka/pro/routing/features/virtual_partitions/contracts/topic.rb +1 -0
data/lib/karafka/pro/routing/features/virtual_partitions/topic.rb +7 -2
data/lib/karafka/process.rb +5 -3
data/lib/karafka/processing/coordinator.rb +5 -1
data/lib/karafka/processing/executor.rb +43 -13
data/lib/karafka/processing/executors_buffer.rb +22 -7
data/lib/karafka/processing/jobs/base.rb +19 -2
data/lib/karafka/processing/jobs/consume.rb +3 -3
data/lib/karafka/processing/jobs/idle.rb +5 -0
data/lib/karafka/processing/jobs/revoked.rb +5 -0
data/lib/karafka/processing/jobs/shutdown.rb +5 -0
data/lib/karafka/processing/jobs_queue.rb +19 -8
data/lib/karafka/processing/schedulers/default.rb +42 -0
data/lib/karafka/processing/strategies/base.rb +13 -4
data/lib/karafka/processing/strategies/default.rb +23 -7
data/lib/karafka/processing/strategies/dlq.rb +36 -0
data/lib/karafka/processing/worker.rb +4 -1
data/lib/karafka/routing/builder.rb +12 -2
data/lib/karafka/routing/consumer_group.rb +5 -5
data/lib/karafka/routing/features/base.rb +44 -8
data/lib/karafka/routing/features/dead_letter_queue/config.rb +6 -1
data/lib/karafka/routing/features/dead_letter_queue/contracts/topic.rb +1 -0
data/lib/karafka/routing/features/dead_letter_queue/topic.rb +9 -2
data/lib/karafka/routing/proxy.rb +4 -3
data/lib/karafka/routing/subscription_group.rb +2 -2
data/lib/karafka/routing/subscription_groups_builder.rb +11 -2
data/lib/karafka/routing/topic.rb +8 -10
data/lib/karafka/routing/topics.rb +1 -1
data/lib/karafka/runner.rb +13 -3
data/lib/karafka/server.rb +5 -9
data/lib/karafka/setup/config.rb +21 -1
data/lib/karafka/status.rb +23 -14
data/lib/karafka/templates/karafka.rb.erb +7 -0
data/lib/karafka/time_trackers/partition_usage.rb +56 -0
data/lib/karafka/version.rb +1 -1
data.tar.gz.sig +0 -0
metadata +47 -13
metadata.gz.sig +0 -0
data/lib/karafka/connection/consumer_group_coordinator.rb +0 -48
data/lib/karafka/pro/performance_tracker.rb +0 -84
data/lib/karafka/pro/processing/scheduler.rb +0 -74
data/lib/karafka/processing/scheduler.rb +0 -38

data/lib/karafka/processing/executor.rb CHANGED Viewed

@@ -11,6 +11,15 @@ module Karafka
     #
     # @note Executors are not removed after partition is revoked. They are not that big and will
     #   be re-used in case of a re-claim
+    #
+    # @note Since given consumer can run various operations, executor manages that and its
+    #   lifecycle. There are following types of operations with appropriate before/after, etc:
+    #
+    #   - consume - primary operation related to running user consumption code
+    #   - idle - cleanup job that runs on idle runs where no messages would be passed to the end
+    #     user. This is used for complex flows with filters, etc
+    #   - revoked - runs after the partition was revoked
+    #   - shutdown - runs when process is going to shutdown
     class Executor
       extend Forwardable
@@ -39,11 +48,11 @@ module Karafka
       end
       # Allows us to prepare the consumer in the listener thread prior to the job being send to
-      # the queue. It also allows to run some code that is time sensitive and cannot wait in the
+      # be scheduled. It also allows to run some code that is time sensitive and cannot wait in the
       # queue as it could cause starvation.
       #
       # @param messages [Array<Karafka::Messages::Message>]
-      def before_enqueue(messages)
+      def before_schedule_consume(messages)
         # Recreate consumer with each batch if persistence is not enabled
         # We reload the consumers with each batch instead of relying on some external signals
         # when needed for consistency. That way devs may have it on or off and not in this
@@ -60,7 +69,7 @@ module Karafka
           Time.now
         )
-        consumer.on_before_enqueue
+        consumer.on_before_schedule_consume
       end
       # Runs setup and warm-up code in the worker prior to running the consumption
@@ -79,23 +88,23 @@ module Karafka
         consumer.on_after_consume
       end
+      # Runs the code needed before idle work is scheduled
+      def before_schedule_idle
+        consumer.on_before_schedule_idle
+      end
       # Runs consumer idle operations
       # This may include house-keeping or other state management changes that can occur but that
       # not mean there are any new messages available for the end user to process
       def idle
-        # Initializes the messages set in case idle operation would happen before any processing
-        # This prevents us from having no messages object at all as the messages object and
-        # its metadata may be used for statistics
-        consumer.messages ||= Messages::Builders::Messages.call(
-          [],
-          topic,
-          partition,
-          Time.now
-        )
         consumer.on_idle
       end
+      # Runs code needed before revoked job is scheduled
+      def before_schedule_revoked
+        consumer.on_before_schedule_revoked if @consumer
+      end
       # Runs the controller `#revoked` method that should be triggered when a given consumer is
       # no longer needed due to partitions reassignment.
       #
@@ -112,6 +121,11 @@ module Karafka
         consumer.on_revoked if @consumer
       end
+      # Runs code needed before shutdown job is scheduled
+      def before_schedule_shutdown
+        consumer.on_before_schedule_shutdown if @consumer
+      end
       # Runs the controller `#shutdown` method that should be triggered when a given consumer is
       # no longer needed as we're closing the process.
       #
@@ -146,10 +160,26 @@ module Karafka
           consumer.client = @client
           consumer.producer = ::Karafka::App.producer
           consumer.coordinator = @coordinator
+          # Since we have some message-less flows (idle, etc), we initialize consumer with empty
+          # messages set. In production we have persistent consumers, so this is not a performance
+          # overhead as this will happen only once per consumer lifetime
+          consumer.messages = empty_messages
           consumer
         end
       end
+      # Initializes the messages set in case given operation would happen before any processing
+      # This prevents us from having no messages object at all as the messages object and
+      # its metadata may be used for statistics
+      def empty_messages
+        Messages::Builders::Messages.call(
+          [],
+          topic,
+          partition,
+          Time.now
+        )
+      end
     end
   end
 end

data/lib/karafka/processing/executors_buffer.rb CHANGED Viewed

@@ -13,6 +13,7 @@ module Karafka
         @client = client
         # We need two layers here to keep track of topics, partitions and processing groups
         @buffer = Hash.new { |h, k| h[k] = Hash.new { |h2, k2| h2[k2] = {} } }
+        @executor_class = Karafka::App.config.internal.processing.executor_class
       end
       # Finds or creates an executor based on the provided details
@@ -21,15 +22,28 @@ module Karafka
       # @param partition [Integer] partition number
       # @param parallel_key [String] parallel group key
       # @param coordinator [Karafka::Processing::Coordinator]
-      # @return [Executor] consumer executor
+      # @return [Executor, Pro::Processing::Executor] consumer executor
       def find_or_create(topic, partition, parallel_key, coordinator)
-        @buffer[topic][partition][parallel_key] ||= Executor.new(
+        @buffer[topic][partition][parallel_key] ||= @executor_class.new(
           @subscription_group.id,
           @client,
           coordinator
         )
       end
+      # Finds all existing executors for given topic partition or creates one for it
+      # @param topic [String] topic name
+      # @param partition [Integer] partition number
+      # @param coordinator [Karafka::Processing::Coordinator]
+      # @return [Array<Executor, Pro::Processing::Executor>]
+      def find_all_or_create(topic, partition, coordinator)
+        existing = find_all(topic, partition)
+        return existing unless existing.empty?
+        [find_or_create(topic, partition, 0, coordinator)]
+      end
       # Revokes executors of a given topic partition, so they won't be used anymore for incoming
       # messages
       #
@@ -43,7 +57,8 @@ module Karafka
       #
       # @param topic [String] topic name
       # @param partition [Integer] partition number
-      # @return [Array<Executor>] executors in use for this topic + partition
+      # @return [Array<Executor, Pro::Processing::Executor>] executors in use for this
+      #   topic + partition
       def find_all(topic, partition)
         @buffer[topic][partition].values
       end
@@ -52,11 +67,11 @@ module Karafka
       # info
       # @yieldparam [Routing::Topic] karafka routing topic object
       # @yieldparam [Integer] partition number
-      # @yieldparam [Executor] given executor
+      # @yieldparam [Executor, Pro::Processing::Executor] given executor
       def each
-        @buffer.each do |_, partitions|
-          partitions.each do |_, executors|
-            executors.each do |_, executor|
+        @buffer.each_value do |partitions|
+          partitions.each_value do |executors|
+            executors.each_value do |executor|
               yield(executor)
             end
           end

data/lib/karafka/processing/jobs/base.rb CHANGED Viewed

@@ -20,11 +20,14 @@ module Karafka
           # All jobs are blocking by default and they can release the lock when blocking operations
           # are done (if needed)
           @non_blocking = false
+          @status = :pending
         end
-        # When redefined can run any code prior to the job being enqueued
+        # When redefined can run any code prior to the job being scheduled
         # @note This will run in the listener thread and not in the worker
-        def before_enqueue; end
+        def before_schedule
+          raise NotImplementedError, 'Please implement in a subclass'
+        end
         # When redefined can run any code that should run before executing the proper code
         def before_call; end
@@ -49,6 +52,20 @@ module Karafka
         def non_blocking?
           @non_blocking
         end
+        # @return [Boolean] was this job finished.
+        def finished?
+          @status == :finished
+        end
+        # Marks the job as finished. Used by the worker to indicate, that this job is done.
+        #
+        # @note Since the scheduler knows exactly when it schedules jobs and when it keeps them
+        #   pending, we do not need advanced state tracking and the only information from the
+        #   "outside" is whether it was finished or not after it was scheduled for execution.
+        def finish!
+          @status = :finished
+        end
       end
     end
   end

data/lib/karafka/processing/jobs/consume.rb CHANGED Viewed

@@ -20,9 +20,9 @@ module Karafka
         end
         # Runs all the preparation code on the executor that needs to happen before the job is
-        # enqueued.
-        def before_enqueue
-          executor.before_enqueue(@messages)
+        # scheduled.
+        def before_schedule
+          executor.before_schedule_consume(@messages)
         end
         # Runs the before consumption preparations on the executor

data/lib/karafka/processing/jobs/idle.rb CHANGED Viewed

@@ -14,6 +14,11 @@ module Karafka
           super()
         end
+        # Runs code prior to scheduling this idle job
+        def before_schedule
+          executor.before_schedule_idle
+        end
         # Run the idle work via the executor
         def call
           executor.idle

data/lib/karafka/processing/jobs/revoked.rb CHANGED Viewed

@@ -12,6 +12,11 @@ module Karafka
           super()
         end
+        # Runs code prior to scheduling this revoked job
+        def before_schedule
+          executor.before_schedule_revoked
+        end
         # Runs the revoking job via an executor.
         def call
           executor.revoked

data/lib/karafka/processing/jobs/shutdown.rb CHANGED Viewed

@@ -13,6 +13,11 @@ module Karafka
           super()
         end
+        # Runs code prior to scheduling this shutdown job
+        def before_schedule
+          executor.before_schedule_shutdown
+        end
         # Runs the shutdown job via an executor.
         def call
           executor.shutdown

data/lib/karafka/processing/jobs_queue.rb CHANGED Viewed

@@ -23,12 +23,7 @@ module Karafka
         # scheduled by Ruby hundreds of thousands of times per group.
         # We cannot use a single semaphore as it could potentially block in listeners that should
         # process with their data and also could unlock when a given group needs to remain locked
-        @semaphores = Concurrent::Map.new do |h, k|
-          # Ruby prior to 3.2 did not have queue with a timeout on `#pop`, that is why for those
-          # versions we use our custom queue wrapper
-          h.compute_if_absent(k) { RUBY_VERSION < '3.2' ? TimedQueue.new : Queue.new }
-        end
+        @semaphores = {}
         @concurrency = Karafka::App.config.concurrency
         @tick_interval = ::Karafka::App.config.internal.tick_interval
         @in_processing = Hash.new { |h, k| h[k] = [] }
@@ -37,6 +32,22 @@ module Karafka
         @mutex = Mutex.new
       end
+      # Registers given subscription group id in the queue. It is needed so we do not dynamically
+      # create semaphore, hence avoiding potential race conditions
+      #
+      # @param group_id [String]
+      def register(group_id)
+        # Ruby prior to 3.2 did not have queue with a timeout on `#pop`, that is why for those
+        @mutex.synchronize do
+          # versions we use our custom queue wrapper
+          #
+          # Initializes this semaphore from the mutex, so it is never auto-created
+          # Since we always schedule a job before waiting using semaphores, there won't be any
+          # concurrency problems
+          @semaphores[group_id] = RUBY_VERSION < '3.2' ? TimedQueue.new : Queue.new
+        end
+      end
       # Adds the job to the internal main queue, scheduling it for execution in a worker and marks
       # this job as in processing pipeline.
       #
@@ -79,7 +90,7 @@ module Karafka
       # @param group_id [String] id of the group we want to unlock for one tick
       # @note This does not release the wait lock. It just causes a conditions recheck
       def tick(group_id)
-        @semaphores[group_id] << true
+        @semaphores.fetch(group_id) << true
       end
       # Marks a given job from a given group as completed. When there are no more jobs from a given
@@ -149,7 +160,7 @@ module Karafka
         while wait?(group_id)
           yield if block_given?
-          @semaphores[group_id].pop(timeout: @tick_interval / 1_000.0)
+          @semaphores.fetch(group_id).pop(timeout: @tick_interval / 1_000.0)
         end
       end

data/lib/karafka/processing/schedulers/default.rb ADDED Viewed

@@ -0,0 +1,42 @@
+# frozen_string_literal: true
+module Karafka
+  module Processing
+    # Namespace for Karafka OSS schedulers
+    module Schedulers
+      # FIFO scheduler for messages coming from various topics and partitions
+      class Default
+        # @param queue [Karafka::Processing::JobsQueue] queue where we want to put the jobs
+        def initialize(queue)
+          @queue = queue
+        end
+        # Schedules jobs in the fifo order
+        #
+        # @param jobs_array [Array<Karafka::Processing::Jobs::Consume>] jobs we want to schedule
+        def on_schedule_consumption(jobs_array)
+          jobs_array.each do |job|
+            @queue << job
+          end
+        end
+        # Revocation, shutdown and idle jobs can also run in fifo by default
+        alias on_schedule_revocation on_schedule_consumption
+        alias on_schedule_shutdown on_schedule_consumption
+        alias on_schedule_idle on_schedule_consumption
+        # This scheduler does not have anything to manage as it is a pass through and has no state
+        def on_manage
+          nil
+        end
+        # This scheduler does not need to be cleared because it is stateless
+        #
+        # @param _group_id [String] Subscription group id
+        def on_clear(_group_id)
+          nil
+        end
+      end
+    end
+  end
+end

data/lib/karafka/processing/strategies/base.rb CHANGED Viewed

@@ -11,10 +11,19 @@ module Karafka
     module Strategies
       # Base strategy that should be included in each strategy, just to ensure the API
       module Base
-        # What should happen before jobs are enqueued
-        # @note This runs from the listener thread, not recommended to put anything slow here
-        def handle_before_enqueue
-          raise NotImplementedError, 'Implement in a subclass'
+        # Defines all the before schedule handlers for appropriate actions
+        %i[
+          consume
+          idle
+          revoked
+          shutdown
+        ].each do |action|
+          class_eval <<~RUBY, __FILE__, __LINE__ + 1
+            def handle_before_schedule_#{action}
+              # What should happen before scheduling this work
+              raise NotImplementedError, 'Implement in a subclass'
+            end
+          RUBY
         end
         # What should happen before we kick in the processing

data/lib/karafka/processing/strategies/default.rb CHANGED Viewed

@@ -13,6 +13,23 @@ module Karafka
         # Apply strategy for a non-feature based flow
         FEATURES = %i[].freeze
+        # By default on all "before schedule" we just run instrumentation, nothing more
+        %i[
+          consume
+          idle
+          revoked
+          shutdown
+        ].each do |action|
+          class_eval <<~RUBY, __FILE__, __LINE__ + 1
+            # No actions needed for the standard flow here
+            def handle_before_schedule_#{action}
+              Karafka.monitor.instrument('consumer.before_schedule_#{action}', caller: self)
+              nil
+            end
+          RUBY
+        end
         # Marks message as consumed in an async way.
         #
         # @param message [Messages::Message] last successfully processed message.
@@ -24,6 +41,9 @@ module Karafka
         #   already processed but rather at the next one. This applies to both sync and async
         #   versions of this method.
         def mark_as_consumed(message)
+          # seek offset can be nil only in case `#seek` was invoked with offset reset request
+          # In case like this we ignore marking
+          return true if coordinator.seek_offset.nil?
           # Ignore earlier offsets than the one we already committed
           return true if coordinator.seek_offset > message.offset
           return false if revoked?
@@ -40,6 +60,9 @@ module Karafka
         # @return [Boolean] true if we were able to mark the offset, false otherwise.
         #   False indicates that we were not able and that we have lost the partition.
         def mark_as_consumed!(message)
+          # seek offset can be nil only in case `#seek` was invoked with offset reset request
+          # In case like this we ignore marking
+          return true if coordinator.seek_offset.nil?
           # Ignore earlier offsets than the one we already committed
           return true if coordinator.seek_offset > message.offset
           return false if revoked?
@@ -76,13 +99,6 @@ module Karafka
           commit_offsets(async: false)
         end
-        # No actions needed for the standard flow here
-        def handle_before_enqueue
-          Karafka.monitor.instrument('consumer.before_enqueue', caller: self)
-          nil
-        end
         # Increment number of attempts
         def handle_before_consume
           coordinator.pause_tracker.increment

data/lib/karafka/processing/strategies/dlq.rb CHANGED Viewed

@@ -14,6 +14,42 @@ module Karafka
           dead_letter_queue
         ].freeze
+        # Override of the standard `#mark_as_consumed` in order to handle the pause tracker
+        # reset in case DLQ is marked as fully independent. When DLQ is marked independent,
+        # any offset marking causes the pause count tracker to reset. This is useful when
+        # the error is not due to the collective batch operations state but due to intermediate
+        # "crawling" errors that move with it
+        #
+        # @see `Strategies::Default#mark_as_consumed` for more details
+        # @param message [Messages::Message]
+        def mark_as_consumed(message)
+          # If we are not retrying pause count is already 0, no need to try to reset the state
+          return super unless retrying?
+          # If we do not use independent marking on DLQ, we just mark as consumed
+          return super unless topic.dead_letter_queue.independent?
+          # If we were not able to mark no need to reset
+          return false unless super
+          coordinator.pause_tracker.reset
+          true
+        end
+        # Override of the standard `#mark_as_consumed!`. Resets the pause tracker count in case
+        # DLQ was configured with the `independent` flag.
+        #
+        # @see `Strategies::Default#mark_as_consumed!` for more details
+        # @param message [Messages::Message]
+        def mark_as_consumed!(message)
+          return super unless retrying?
+          return super unless topic.dead_letter_queue.independent?
+          return false unless super
+          coordinator.pause_tracker.reset
+          true
+        end
         # When manual offset management is on, we do not mark anything as consumed automatically
         # and we rely on the user to figure things out
         def handle_after_consume

data/lib/karafka/processing/worker.rb CHANGED Viewed

@@ -83,7 +83,10 @@ module Karafka
         )
       ensure
         # job can be nil when the queue is being closed
-        @jobs_queue.complete(job) if job
+        if job
+          @jobs_queue.complete(job)
+          job.finish!
+        end
         # Always publish info, that we completed all the work despite its result
         Karafka.monitor.instrument('worker.completed', instrument_details)

data/lib/karafka/routing/builder.rb CHANGED Viewed

@@ -109,10 +109,20 @@ module Karafka
       # subscription group customization
       # @param subscription_group_name [String, Symbol] subscription group id. When not provided,
       #   a random uuid will be used
+      # @param args [Array] any extra arguments accepted by the subscription group builder
       # @param block [Proc] further topics definitions
-      def subscription_group(subscription_group_name = SubscriptionGroup.id, &block)
+      def subscription_group(
+        subscription_group_name = SubscriptionGroup.id,
+        **args,
+        &block
+      )
         consumer_group('app') do
-          target.public_send(:subscription_group=, subscription_group_name.to_s, &block)
+          target.public_send(
+            :subscription_group=,
+            subscription_group_name.to_s,
+            **args,
+            &block
+          )
         end
       end

data/lib/karafka/routing/consumer_group.rb CHANGED Viewed

@@ -14,7 +14,7 @@ module Karafka
       # It allows us to store the "current" subscription group defined in the routing
       # This subscription group id is then injected into topics, so we can compute the subscription
       # groups
-      attr_accessor :current_subscription_group_name
+      attr_accessor :current_subscription_group_details
       # @param name [String, Symbol] raw name of this consumer group. Raw means, that it does not
       #   yet have an application client_id namespace, this will be added here by default.
@@ -26,7 +26,7 @@ module Karafka
         @topics = Topics.new([])
         # Initialize the subscription group so there's always a value for it, since even if not
         # defined directly, a subscription group will be created
-        @current_subscription_group_name = SubscriptionGroup.id
+        @current_subscription_group_details = { name: SubscriptionGroup.id }
       end
       # @return [Boolean] true if this consumer group should be active in our current process
@@ -48,7 +48,7 @@ module Karafka
         built_topic = @topics.last
         # We overwrite it conditionally in case it was not set by the user inline in the topic
         # block definition
-        built_topic.subscription_group_name ||= current_subscription_group_name
+        built_topic.subscription_group_details ||= current_subscription_group_details
         built_topic
       end
@@ -59,13 +59,13 @@ module Karafka
       def subscription_group=(name = SubscriptionGroup.id, &block)
         # We cast it here, so the routing supports symbol based but that's anyhow later on
         # validated as a string
-        @current_subscription_group_name = name.to_s
+        @current_subscription_group_details = { name: name.to_s }
         Proxy.new(self, &block)
         # We need to reset the current subscription group after it is used, so it won't leak
         # outside to other topics that would be defined without a defined subscription group
-        @current_subscription_group_name = SubscriptionGroup.id
+        @current_subscription_group_details = { name: SubscriptionGroup.id }
       end
       # @return [Array<Routing::SubscriptionGroup>] all the subscription groups build based on

data/lib/karafka/routing/features/base.rb CHANGED Viewed

@@ -13,17 +13,46 @@ module Karafka
         class << self
           # Extends topic and builder with given feature API
           def activate
-            Topic.prepend(self::Topic) if const_defined?('Topic', false)
-            Topics.prepend(self::Topics) if const_defined?('Topics', false)
-            ConsumerGroup.prepend(self::ConsumerGroup) if const_defined?('ConsumerGroup', false)
-            Proxy.prepend(self::Proxy) if const_defined?('Proxy', false)
-            Builder.prepend(self::Builder) if const_defined?('Builder', false)
-            Builder.prepend(Base::Expander.new(self)) if const_defined?('Contracts', false)
+            if const_defined?('Topic', false)
+              Topic.prepend(self::Topic)
+            end
+            if const_defined?('Topics', false)
+              Topics.prepend(self::Topics)
+            end
+            if const_defined?('ConsumerGroup', false)
+              ConsumerGroup.prepend(self::ConsumerGroup)
+            end
+            if const_defined?('Proxy', false)
+              Proxy.prepend(self::Proxy)
+            end
+            if const_defined?('Builder', false)
+              Builder.prepend(self::Builder)
+            end
+            if const_defined?('Contracts', false)
+              Builder.prepend(Base::Expander.new(self))
+            end
+            if const_defined?('SubscriptionGroup', false)
+              SubscriptionGroup.prepend(self::SubscriptionGroup)
+            end
+            if const_defined?('SubscriptionGroupsBuilder', false)
+              SubscriptionGroupsBuilder.prepend(self::SubscriptionGroupsBuilder)
+            end
           end
-          # Loads all the features and activates them
+          # Loads all the features and activates them once
           def load_all
+            return if @loaded
             features.each(&:activate)
+            @loaded = true
           end
           # @param config [Karafka::Core::Configurable::Node] app config that we can alter with
@@ -41,11 +70,18 @@ module Karafka
           private
-          # @return [Array<Class>] all available routing features
+          # @return [Array<Class>] all available routing features that are direct descendants of
+          #   the features base.Approach with using `#superclass` prevents us from accidentally
+          #   loading Pro components
           def features
             ObjectSpace
               .each_object(Class)
               .select { |klass| klass < self }
+              # Ensures, that Pro components are only loaded when we operate in Pro mode. Since
+              # outside of specs Zeitwerk does not require them at all, they will not be loaded
+              # anyhow, but for specs this needs to be done as RSpec requires all files to be
+              # present
+              .reject { |klass| Karafka.pro? ? false : klass.superclass != self }
               .sort_by(&:to_s)
           end

data/lib/karafka/routing/features/dead_letter_queue/config.rb CHANGED Viewed

@@ -11,8 +11,13 @@ module Karafka
           :max_retries,
           # To what topic the skipped messages should be moved
           :topic,
+          # Should retries be handled collectively on a batch or independently per message
+          :independent,
           keyword_init: true
-        ) { alias_method :active?, :active }
+        ) do
+          alias_method :active?, :active
+          alias_method :independent?, :independent
+        end
       end
     end
   end

data/lib/karafka/routing/features/dead_letter_queue/contracts/topic.rb CHANGED Viewed

@@ -18,6 +18,7 @@ module Karafka
             nested :dead_letter_queue do
               required(:active) { |val| [true, false].include?(val) }
+              required(:independent) { |val| [true, false].include?(val) }
               required(:max_retries) { |val| val.is_a?(Integer) && val >= 0 }
             end