RubyGems - karafka - Versions diffs - 2.0.41 → 2.1.0 - Mend

karafka 2.0.41 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (54) hide show

checksums.yaml +4 -4
checksums.yaml.gz.sig +0 -0
data/.github/workflows/ci.yml +2 -2
data/CHANGELOG.md +20 -1
data/Gemfile.lock +2 -1
data/config/locales/errors.yml +10 -0
data/config/locales/pro_errors.yml +0 -2
data/lib/karafka/active_job/consumer.rb +16 -11
data/lib/karafka/active_job/current_attributes/loading.rb +36 -0
data/lib/karafka/active_job/current_attributes/persistence.rb +28 -0
data/lib/karafka/active_job/current_attributes.rb +42 -0
data/lib/karafka/active_job/dispatcher.rb +8 -2
data/lib/karafka/connection/client.rb +1 -1
data/lib/karafka/errors.rb +3 -0
data/lib/karafka/instrumentation/vendors/kubernetes/liveness_listener.rb +166 -0
data/lib/karafka/pro/active_job/consumer.rb +1 -10
data/lib/karafka/pro/active_job/dispatcher.rb +2 -2
data/lib/karafka/pro/processing/coordinator.rb +20 -1
data/lib/karafka/pro/processing/filters/virtual_limiter.rb +52 -0
data/lib/karafka/pro/processing/filters_applier.rb +4 -0
data/lib/karafka/pro/processing/strategies/aj/dlq_ftr_mom_vp.rb +1 -1
data/lib/karafka/pro/processing/strategies/aj/dlq_lrj_mom.rb +3 -1
data/lib/karafka/pro/processing/strategies/aj/dlq_mom_vp.rb +2 -2
data/lib/karafka/pro/processing/strategies/aj/lrj_mom_vp.rb +2 -0
data/lib/karafka/pro/processing/strategies/aj/mom_vp.rb +1 -1
data/lib/karafka/pro/processing/strategies/dlq/ftr.rb +1 -1
data/lib/karafka/pro/processing/strategies/dlq/ftr_lrj_mom.rb +3 -6
data/lib/karafka/pro/processing/strategies/dlq/ftr_lrj_mom_vp.rb +43 -0
data/lib/karafka/pro/processing/strategies/dlq/ftr_lrj_vp.rb +1 -0
data/lib/karafka/pro/processing/strategies/dlq/ftr_mom.rb +3 -7
data/lib/karafka/pro/processing/strategies/dlq/ftr_mom_vp.rb +41 -0
data/lib/karafka/pro/processing/strategies/dlq/ftr_vp.rb +1 -0
data/lib/karafka/pro/processing/strategies/dlq/lrj_mom.rb +3 -6
data/lib/karafka/pro/processing/strategies/dlq/lrj_mom_vp.rb +36 -0
data/lib/karafka/pro/processing/strategies/dlq/lrj_vp.rb +1 -0
data/lib/karafka/pro/processing/strategies/dlq/mom.rb +8 -7
data/lib/karafka/pro/processing/strategies/dlq/mom_vp.rb +37 -0
data/lib/karafka/pro/processing/strategies/lrj/default.rb +2 -0
data/lib/karafka/pro/processing/strategies/lrj/ftr_mom_vp.rb +40 -0
data/lib/karafka/pro/processing/strategies/lrj/mom.rb +2 -0
data/lib/karafka/pro/processing/strategies/lrj/mom_vp.rb +38 -0
data/lib/karafka/pro/processing/strategies/mom/ftr_vp.rb +37 -0
data/lib/karafka/pro/{base_consumer.rb → processing/strategies/mom/vp.rb} +17 -7
data/lib/karafka/pro/processing/strategies/vp/default.rb +51 -0
data/lib/karafka/pro/processing/virtual_offset_manager.rb +147 -0
data/lib/karafka/pro/routing/features/virtual_partitions/contract.rb +0 -17
data/lib/karafka/processing/strategies/default.rb +2 -0
data/lib/karafka/processing/strategies/dlq_mom.rb +9 -7
data/lib/karafka/version.rb +1 -1
data/lib/karafka.rb +5 -0
data.tar.gz.sig +0 -0
metadata +16 -4
metadata.gz.sig +0 -0
data/lib/karafka/instrumentation/vendors/datadog/listener.rb +0 -16

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 9560b22fc8cfd59dcaeb6551bcc0b2d2ebfef2f162a12905fa3aefe0c9c5865e
-  data.tar.gz: a5b7aba125288ec96cf3b862e72447bf467fe23f923c232fe1d3ff9c48b1fdb4
+  metadata.gz: b3d0a2f78b4bf7fa8f49527d48d2e877b95597566e07beabf0166a02259a936b
+  data.tar.gz: fc6054ad5f99bfe8a678c337167f93fc612dddfe88494f8891158dbd8610fb7f
 SHA512:
-  metadata.gz: d6581af85f8900d2d5ce91b6f9ec8ed0e1f6be5f3e80c36315c44c8dc07c30949566e281f40feb1b54cc9bbca771ac2188637e916d19edcb2fe26c04aeb362e1
-  data.tar.gz: e467612b3185b5ec764d387e72507b617bf49d702436da2021d467fb0c23630aa98151c9679444a34114317a8c52e3c37f0268c6a6bdb4564ffa1bab51993109
+  metadata.gz: 0fb1fa88ef76ce81e145797a1364ac36bea2b94c47e733856cfd5ec9b37d0d9e2e984a3e4ef7fc36d2ac34c448e490cfdea4e10fae886cd80fb289798e55d308
+  data.tar.gz: 68df2bc1edb9acccd45d32428b43fd5dee12b9333c6059c801aa4ac03b3b89c01e93e8ea4ebbb8021618447c1122c8c5df64afc86747d6f0deec0cf992237e82

checksums.yaml.gz.sig CHANGED Viewed

Binary file

data/.github/workflows/ci.yml CHANGED Viewed

@@ -62,7 +62,7 @@ jobs:
         run: \curl -sSL https://api.coditsu.io/run/ci | bash
   specs:
-    timeout-minutes: 45
+    timeout-minutes: 30
     runs-on: ubuntu-latest
     needs: diffend
     strategy:
@@ -102,7 +102,7 @@ jobs:
         run: bin/rspecs
   integrations:
-    timeout-minutes: 30
+    timeout-minutes: 45
     runs-on: ubuntu-latest
     needs: diffend
     strategy:

data/CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,24 @@
 # Karafka framework changelog
+## 2.1.0 (2023-05-22)
+- **[Feature]** Provide ability to use CurrentAttributes with ActiveJob's Karafka adapter.
+- **[Feature]** Introduce collective Virtual Partitions offset management.
+- **[Feature]** Use virtual offsets to filter out messages that would be re-processed upon retries.
+- [Improvement] No longer break processing on failing parallel virtual partitions in ActiveJob because it is compensated by virtual marking.
+- [Improvement] Always use Virtual offset management for Pro ActiveJobs.
+- [Improvement] Do not attempt to mark offsets on already revoked partitions.
+- [Improvement] Make sure, that VP components are not injected into non VP strategies.
+- [Improvement] Improve complex strategies inheritance flow.
+- [Improvement] Optimize offset management for DLQ + MoM feature combinations.
+- [Change] Removed `Karafka::Pro::BaseConsumer` in favor of `Karafka::BaseConsumer`. (#1345)
+- [Fix] Fix for `max_messages` and `max_wait_time` not having reference in errors.yml (#1443)
+### Upgrade notes
+1. Upgrade to Karafka `2.0.41` prior to upgrading to `2.1.0`.
+2. Replace `Karafka::Pro::BaseConsumer` references to `Karafka::BaseConsumer`.
+3. Replace `Karafka::Instrumentation::Vendors::Datadog:Listener` with `Karafka::Instrumentation::Vendors::Datadog::MetricsListener`.
 ## 2.0.41 (2023-14-19)
 - **[Feature]** Provide `Karafka::Pro::Iterator` for anonymous topic/partitions iterations and messages lookups (#1389 and #1427).
 - [Improvement] Optimize topic lookup for `read_topic` admin method usage.
@@ -60,7 +79,7 @@
 ## 2.0.35 (2023-03-13)
 - **[Feature]** Allow for defining topics config via the DSL and its automatic creation via CLI command.
-- **[Feature]** Allow for full topics reset and topics repartitioning via the CLI.
+- **[Feature]** Allow for full topics reset and topics repartitioning via the CLI.
 ## 2.0.34 (2023-03-04)
 - [Improvement] Attach an `embedded` tag to Karafka processes started using the embedded API.

data/Gemfile.lock CHANGED Viewed

@@ -1,7 +1,7 @@
 PATH
   remote: .
   specs:
-    karafka (2.0.41)
+    karafka (2.1.0)
       karafka-core (>= 2.0.12, < 3.0.0)
       thor (>= 0.20)
       waterdrop (>= 2.4.10, < 3.0.0)
@@ -78,6 +78,7 @@ GEM
     zeitwerk (2.6.7)
 PLATFORMS
+  arm64-darwin-21
   x86_64-linux
 DEPENDENCIES

data/config/locales/errors.yml CHANGED Viewed

@@ -15,6 +15,13 @@ en:
       shutdown_timeout_format: needs to be an integer bigger than 0
       max_wait_time_format: needs to be an integer bigger than 0
       kafka_format: needs to be a filled hash
+      internal.processing.jobs_builder_format: cannot be nil
+      internal.processing.scheduler: cannot be nil
+      internal.processing.coordinator_class: cannot be nil
+      internal.processing.partitioner_class: cannot be nil
+      internal.active_job.dispatcher: cannot be nil
+      internal.active_job.job_options_contract: cannot be nil
+      internal.active_job.consumer_class: cannot be nil
       internal.status_format: needs to be present
       internal.process_format: needs to be present
       internal.routing.builder_format: needs to be present
@@ -31,7 +38,10 @@ en:
       topics_missing: No topics to subscribe to
     topic:
+      kafka: needs to be a hash with kafka scope settings details
       missing: needs to be present
+      max_messages_format: 'needs to be an integer bigger than 0'
+      max_wait_time_format: 'needs to be an integer bigger than 0'
       name_format: 'needs to be a string with a Kafka accepted format'
       deserializer_format: needs to be present
       consumer_format: needs to be present

data/config/locales/pro_errors.yml CHANGED Viewed

@@ -4,8 +4,6 @@ en:
       virtual_partitions.partitioner_respond_to_call: needs to be defined and needs to respond to `#call`
       virtual_partitions.max_partitions_format: needs to be equal or more than 1
-      manual_offset_management_not_with_virtual_partitions: cannot be used together with Virtual Partitions
       long_running_job.active_format: needs to be either true or false
       dead_letter_queue_with_virtual_partitions: when using Dead Letter Queue with Virtual Partitions, at least one retry is required.

data/lib/karafka/active_job/consumer.rb CHANGED Viewed

@@ -24,21 +24,26 @@ module Karafka
       #
       # @param job_message [Karafka::Messages::Message] message with active job
       def consume_job(job_message)
-        # We technically speaking could set this as deserializer and reference it from the
-        # message instead of using the `#raw_payload`. This is not done on purpose to simplify
-        # the ActiveJob setup here
-        job = ::ActiveSupport::JSON.decode(job_message.raw_payload)
+        with_deserialized_job(job_message) do |job|
+          tags.add(:job_class, job['job_class'])
-        tags.add(:job_class, job['job_class'])
+          payload = { caller: self, job: job, message: job_message }
-        payload = { caller: self, job: job, message: job_message }
-        # We publish both to make it consistent with `consumer.x` events
-        Karafka.monitor.instrument('active_job.consume', payload)
-        Karafka.monitor.instrument('active_job.consumed', payload) do
-          ::ActiveJob::Base.execute(job)
+          # We publish both to make it consistent with `consumer.x` events
+          Karafka.monitor.instrument('active_job.consume', payload)
+          Karafka.monitor.instrument('active_job.consumed', payload) do
+            ::ActiveJob::Base.execute(job)
+          end
         end
       end
+      # @param job_message [Karafka::Messages::Message] message with active job
+      def with_deserialized_job(job_message)
+        # We technically speaking could set this as deserializer and reference it from the
+        # message instead of using the `#raw_payload`. This is not done on purpose to simplify
+        # the ActiveJob setup here
+        yield ::ActiveSupport::JSON.decode(job_message.raw_payload)
+      end
     end
   end
 end

data/lib/karafka/active_job/current_attributes/loading.rb ADDED Viewed

@@ -0,0 +1,36 @@
+# frozen_string_literal: true
+module Karafka
+  module ActiveJob
+    module CurrentAttributes
+      # Module expanding the job deserialization to extract current attributes and load them
+      # for the time of the job execution
+      module Loading
+        # @param job_message [Karafka::Messages::Message] message with active job
+        def with_deserialized_job(job_message)
+          super(job_message) do |job|
+            resetable = []
+            _cattr_klasses.each do |key, cattr_klass_str|
+              next unless job.key?(key)
+              attributes = job.delete(key)
+              cattr_klass = cattr_klass_str.constantize
+              attributes.each do |name, value|
+                cattr_klass.public_send("#{name}=", value)
+              end
+              resetable << cattr_klass
+            end
+            yield(job)
+            resetable.each(&:reset)
+          end
+        end
+      end
+    end
+  end
+end

data/lib/karafka/active_job/current_attributes/persistence.rb ADDED Viewed

@@ -0,0 +1,28 @@
+# frozen_string_literal: true
+module Karafka
+  module ActiveJob
+    module CurrentAttributes
+      # Module adding the current attributes persistence into the ActiveJob jobs
+      module Persistence
+        # Alters the job serialization to inject the current attributes into the json before we
+        # send it to Kafka
+        #
+        # @param job [ActiveJob::Base] job
+        def serialize_job(job)
+          json = super(job)
+          _cattr_klasses.each do |key, cattr_klass_str|
+            next if json.key?(key)
+            attrs = cattr_klass_str.constantize.attributes
+            json[key] = attrs unless attrs.empty?
+          end
+          json
+        end
+      end
+    end
+  end
+end

data/lib/karafka/active_job/current_attributes.rb ADDED Viewed

@@ -0,0 +1,42 @@
+# frozen_string_literal: true
+require 'active_support/current_attributes'
+require_relative 'current_attributes/loading'
+require_relative 'current_attributes/persistence'
+# This code is based on Sidekiqs approach to persisting current attributes
+# @see https://github.com/sidekiq/sidekiq/blob/main/lib/sidekiq/middleware/current_attributes.rb
+module Karafka
+  module ActiveJob
+    # Module that allows to persist current attributes on Karafka jobs
+    module CurrentAttributes
+      # Allows for persistence of given current attributes via AJ + Karafka
+      #
+      # @param klasses [Array<String, Class>] classes or names of the current attributes classes
+      def persist(*klasses)
+        # Support for providing multiple classes
+        klasses = Array(klasses).flatten
+        [Dispatcher, Consumer]
+          .reject { |expandable| expandable.respond_to?(:_cattr_klasses) }
+          .each { |expandable| expandable.class_attribute :_cattr_klasses, default: {} }
+        # Do not double inject in case of running persist multiple times
+        Dispatcher.prepend(Persistence) unless Dispatcher.ancestors.include?(Persistence)
+        Consumer.prepend(Loading) unless Consumer.ancestors.include?(Loading)
+        klasses.map(&:to_s).each do |stringified_klass|
+          # Prevent registering same klass multiple times
+          next if Dispatcher._cattr_klasses.value?(stringified_klass)
+          key = "cattr_#{Dispatcher._cattr_klasses.count}"
+          Dispatcher._cattr_klasses[key] = stringified_klass
+          Consumer._cattr_klasses[key] = stringified_klass
+        end
+      end
+      module_function :persist
+    end
+  end
+end

data/lib/karafka/active_job/dispatcher.rb CHANGED Viewed

@@ -18,7 +18,7 @@ module Karafka
         ::Karafka.producer.public_send(
           fetch_option(job, :dispatch_method, DEFAULTS),
           topic: job.queue_name,
-          payload: ::ActiveSupport::JSON.encode(job.serialize)
+          payload: ::ActiveSupport::JSON.encode(serialize_job(job))
         )
       end
@@ -34,7 +34,7 @@ module Karafka
           dispatches[d_method] << {
             topic: job.queue_name,
-            payload: ::ActiveSupport::JSON.encode(job.serialize)
+            payload: ::ActiveSupport::JSON.encode(serialize_job(job))
           }
         end
@@ -58,6 +58,12 @@ module Karafka
           .karafka_options
           .fetch(key, defaults.fetch(key))
       end
+      # @param job [ActiveJob::Base] job
+      # @return [Hash] json representation of the job
+      def serialize_job(job)
+        job.serialize
+      end
     end
   end
 end

data/lib/karafka/connection/client.rb CHANGED Viewed

@@ -30,7 +30,7 @@ module Karafka
       #
       # @param subscription_group [Karafka::Routing::SubscriptionGroup] subscription group
       #   with all the configuration details needed for us to create a client
-      # @return [Karafka::Connection::Rdk::Consumer]
+      # @return [Karafka::Connection::Client]
       def initialize(subscription_group)
         @id = SecureRandom.hex(6)
         # Name is set when we build consumer

data/lib/karafka/errors.rb CHANGED Viewed

@@ -46,5 +46,8 @@ module Karafka
     # This should never happen. Please open an issue if it does.
     StrategyNotFoundError = Class.new(BaseError)
+    # This should never happen. Please open an issue if it does.
+    InvalidRealOffsetUsage = Class.new(BaseError)
   end
 end

data/lib/karafka/instrumentation/vendors/kubernetes/liveness_listener.rb ADDED Viewed

@@ -0,0 +1,166 @@
+# frozen_string_literal: true
+require 'socket'
+module Karafka
+  module Instrumentation
+    module Vendors
+      # Namespace for instrumentation related with Kubernetes
+      module Kubernetes
+        # Kubernetes HTTP listener that does not only reply when process is not fully hanging, but
+        # also allows to define max time of processing and looping.
+        #
+        # Processes like Karafka server can hang while still being reachable. For example, in case
+        # something would hang inside of the user code, Karafka could stop polling and no new
+        # data would be processed, but process itself would still be active. This listener allows
+        # for defining of a ttl that gets bumped on each poll loop and before and after processing
+        # of a given messages batch.
+        class LivenessListener
+          include ::Karafka::Core::Helpers::Time
+          # @param hostname [String, nil] hostname or nil to bind on all
+          # @param port [Integer] TCP port on which we want to run our HTTP status server
+          # @param consuming_ttl [Integer] time in ms after which we consider consumption hanging.
+          #   It allows us to define max consumption time after which k8s should consider given
+          #   process as hanging
+          # @param polling_ttl [Integer] max time in ms for polling. If polling (any) does not
+          #   happen that often, process should be considered dead.
+          # @note The default TTL matches the default `max.poll.interval.ms`
+          def initialize(
+            hostname: nil,
+            port: 3000,
+            consuming_ttl: 5 * 60 * 1_000,
+            polling_ttl: 5 * 60 * 1_000
+          )
+            @server = TCPServer.new(*[hostname, port].compact)
+            @polling_ttl = polling_ttl
+            @consuming_ttl = consuming_ttl
+            @mutex = Mutex.new
+            @pollings = {}
+            @consumptions = {}
+            Thread.new do
+              loop do
+                break unless respond
+              end
+            end
+          end
+          # Tick on each fetch
+          # @param _event [Karafka::Core::Monitoring::Event]
+          def on_connection_listener_fetch_loop(_event)
+            mark_polling_tick
+          end
+          # Tick on starting work
+          # @param _event [Karafka::Core::Monitoring::Event]
+          def on_consumer_consume(_event)
+            mark_consumption_tick
+          end
+          # Tick on finished work
+          # @param _event [Karafka::Core::Monitoring::Event]
+          def on_consumer_consumed(_event)
+            clear_consumption_tick
+          end
+          # @param _event [Karafka::Core::Monitoring::Event]
+          def on_consumer_revoke(_event)
+            mark_consumption_tick
+          end
+          # @param _event [Karafka::Core::Monitoring::Event]
+          def on_consumer_revoked(_event)
+            clear_consumption_tick
+          end
+          # @param _event [Karafka::Core::Monitoring::Event]
+          def on_consumer_shutting_down(_event)
+            mark_consumption_tick
+          end
+          # @param _event [Karafka::Core::Monitoring::Event]
+          def on_consumer_shutdown(_event)
+            clear_consumption_tick
+          end
+          # @param _event [Karafka::Core::Monitoring::Event]
+          def on_error_occurred(_event)
+            clear_consumption_tick
+            clear_polling_tick
+          end
+          # Stop the http server when we stop the process
+          # @param _event [Karafka::Core::Monitoring::Event]
+          def on_app_stopped(_event)
+            @server.close
+          end
+          private
+          # Wraps the logic with a mutex
+          # @param block [Proc] code we want to run in mutex
+          def synchronize(&block)
+            @mutex.synchronize(&block)
+          end
+          # @return [Integer] object id of the current thread
+          def thread_id
+            Thread.current.object_id
+          end
+          # Update the polling tick time for current thread
+          def mark_polling_tick
+            synchronize do
+              @pollings[thread_id] = monotonic_now
+            end
+          end
+          # Clear current thread polling time tracker
+          def clear_polling_tick
+            synchronize do
+              @pollings.delete(thread_id)
+            end
+          end
+          # Update the processing tick time
+          def mark_consumption_tick
+            synchronize do
+              @consumptions[thread_id] = monotonic_now
+            end
+          end
+          # Clear current thread consumption time tracker
+          def clear_consumption_tick
+            synchronize do
+              @consumptions.delete(thread_id)
+            end
+          end
+          # Responds to a HTTP request with the process liveness status
+          def respond
+            client = @server.accept
+            client.gets
+            client.print "HTTP/1.1 #{status}\r\n"
+            client.close
+            true
+          rescue Errno::ECONNRESET, Errno::EPIPE, IOError
+            !@server.closed?
+          end
+          # Did we exceed any of the ttls
+          # @return [String] 204 string if ok, 500 otherwise
+          def status
+            time = monotonic_now
+            return '500' if @pollings.values.any? { |tick| (time - tick) > @polling_ttl }
+            return '500' if @consumptions.values.any? { |tick| (time - tick) > @consuming_ttl }
+            '204'
+          end
+        end
+      end
+    end
+  end
+end

data/lib/karafka/pro/active_job/consumer.rb CHANGED Viewed

@@ -35,18 +35,9 @@ module Karafka
             # double-processing
             break if Karafka::App.stopping? && !topic.virtual_partitions?
-            # Break if we already know, that one of virtual partitions has failed and we will
-            # be restarting processing all together after all VPs are done. This will minimize
-            # number of jobs that will be re-processed
-            break if topic.virtual_partitions? && failing?
             consume_job(message)
-            # We cannot mark jobs as done after each if there are virtual partitions. Otherwise
-            # this could create random markings.
-            # The exception here is the collapsed state where we can move one after another
-            next if topic.virtual_partitions? && !collapsed?
+            # We can always mark because of the virtual offset management that we have in VPs
             mark_as_consumed(message)
           end
         end

data/lib/karafka/pro/active_job/dispatcher.rb CHANGED Viewed

@@ -39,7 +39,7 @@ module Karafka
             fetch_option(job, :dispatch_method, DEFAULTS),
             dispatch_details(job).merge!(
               topic: job.queue_name,
-              payload: ::ActiveSupport::JSON.encode(job.serialize)
+              payload: ::ActiveSupport::JSON.encode(serialize_job(job))
             )
           )
         end
@@ -54,7 +54,7 @@ module Karafka
             dispatches[d_method] << dispatch_details(job).merge!(
               topic: job.queue_name,
-              payload: ::ActiveSupport::JSON.encode(job.serialize)
+              payload: ::ActiveSupport::JSON.encode(serialize_job(job))
             )
           end

data/lib/karafka/pro/processing/coordinator.rb CHANGED Viewed

@@ -17,7 +17,7 @@ module Karafka
       # Pro coordinator that provides extra orchestration methods useful for parallel processing
       # within the same partition
       class Coordinator < ::Karafka::Processing::Coordinator
-        attr_reader :filter
+        attr_reader :filter, :virtual_offset_manager
         # @param args [Object] anything the base coordinator accepts
         def initialize(*args)
@@ -27,6 +27,20 @@ module Karafka
           @flow_lock = Mutex.new
           @collapser = Collapser.new
           @filter = FiltersApplier.new(self)
+          return unless topic.virtual_partitions?
+          @virtual_offset_manager = VirtualOffsetManager.new(
+            topic.name,
+            partition
+          )
+          # We register our own "internal" filter to support filtering of messages that were marked
+          # as consumed virtually
+          @filter.filters << Filters::VirtualLimiter.new(
+            @virtual_offset_manager,
+            @collapser
+          )
         end
         # Starts the coordination process
@@ -40,6 +54,11 @@ module Karafka
           @filter.apply!(messages)
           @executed.clear
+          # We keep the old processed offsets until the collapsing is done and regular processing
+          # with virtualization is restored
+          @virtual_offset_manager.clear if topic.virtual_partitions? && !@collapser.collapsed?
           @last_message = messages.last
         end

data/lib/karafka/pro/processing/filters/virtual_limiter.rb ADDED Viewed

@@ -0,0 +1,52 @@
+# frozen_string_literal: true
+# This Karafka component is a Pro component under a commercial license.
+# This Karafka component is NOT licensed under LGPL.
+#
+# All of the commercial components are present in the lib/karafka/pro directory of this
+# repository and their usage requires commercial license agreement.
+#
+# Karafka has also commercial-friendly license, commercial support and commercial components.
+#
+# By sending a pull request to the pro components, you are agreeing to transfer the copyright of
+# your code to Maciej Mensfeld.
+module Karafka
+  module Pro
+    module Processing
+      module Filters
+        # Removes messages that are already marked as consumed in the virtual offset manager
+        # This should operate only when using virtual partitions.
+        #
+        # This cleaner prevents us from duplicated processing of messages that were virtually
+        # marked as consumed even if we could not mark them as consumed in Kafka. This allows us
+        # to limit reprocessing when errors occur drastically when operating with virtual
+        # partitions
+        #
+        # @note It should be registered only when VPs are used
+        class VirtualLimiter < Base
+          # @param manager [Processing::VirtualOffsetManager]
+          # @param collapser [Processing::Collapser]
+          def initialize(manager, collapser)
+            @manager = manager
+            @collapser = collapser
+            super()
+          end
+          # Remove messages that we already marked as virtually consumed. Does nothing if not in
+          # the collapsed mode.
+          #
+          # @param messages [Array<Karafka::Messages::Message>]
+          def apply!(messages)
+            return unless @collapser.collapsed?
+            marked = @manager.marked
+            messages.delete_if { |message| marked.include?(message.offset) }
+          end
+        end
+      end
+    end
+  end
+end

data/lib/karafka/pro/processing/filters_applier.rb CHANGED Viewed

@@ -21,6 +21,10 @@ module Karafka
       # This means that this is the API we expose as a single filter, allowing us to control
       # the filtering via many filters easily.
       class FiltersApplier
+        # @return [Array] registered filters array. Useful if we want to inject internal context
+        #   aware filters.
+        attr_reader :filters
         # @param coordinator [Pro::Coordinator] pro coordinator
         def initialize(coordinator)
           # Builds filters out of their factories

data/lib/karafka/pro/processing/strategies/aj/dlq_ftr_mom_vp.rb CHANGED Viewed

@@ -22,7 +22,7 @@ module Karafka
           # - Mom
           # - VP
           module DlqFtrMomVp
-            include Strategies::Vp::Default
+            include Strategies::Aj::DlqMomVp
             include Strategies::Aj::DlqFtrMom
             # Features for this strategy

data/lib/karafka/pro/processing/strategies/aj/dlq_lrj_mom.rb CHANGED Viewed

@@ -24,7 +24,9 @@ module Karafka
           # This case is a bit of special. Please see the `AjDlqMom` for explanation on how the
           # offset management works in this case.
           module DlqLrjMom
-            include Strategies::Aj::DlqLrjMomVp
+            include Strategies::Default
+            include Strategies::Dlq::Default
+            include Strategies::Aj::LrjMom
             # Features for this strategy
             FEATURES = %i[

data/lib/karafka/pro/processing/strategies/aj/dlq_mom_vp.rb CHANGED Viewed

@@ -20,9 +20,9 @@ module Karafka
           # Manual offset management enabled
           # Virtual Partitions enabled
           module DlqMomVp
-            include Strategies::Dlq::Default
-            include Strategies::Vp::Default
             include Strategies::Default
+            include Strategies::Dlq::Vp
+            include Strategies::Vp::Default
             # Features for this strategy
             FEATURES = %i[