RubyGems - karafka - Versions diffs - 2.0.37 → 2.0.39 - Mend

karafka 2.0.37 → 2.0.39

Files changed (116) hide show

checksums.yaml +4 -4
checksums.yaml.gz.sig +0 -0
data/.github/workflows/ci.yml +1 -1
data/.ruby-version +1 -1
data/CHANGELOG.md +34 -0
data/Gemfile.lock +7 -7
data/README.md +1 -1
data/bin/integrations +1 -1
data/config/locales/errors.yml +0 -7
data/config/locales/pro_errors.yml +18 -0
data/lib/karafka/active_job/consumer.rb +22 -7
data/lib/karafka/admin.rb +46 -14
data/lib/karafka/base_consumer.rb +35 -55
data/lib/karafka/connection/listener.rb +15 -10
data/lib/karafka/errors.rb +0 -3
data/lib/karafka/instrumentation/logger_listener.rb +44 -3
data/lib/karafka/instrumentation/notifications.rb +7 -0
data/lib/karafka/pro/active_job/consumer.rb +10 -5
data/lib/karafka/pro/processing/coordinator.rb +13 -4
data/lib/karafka/pro/processing/filters/base.rb +61 -0
data/lib/karafka/pro/processing/filters/delayer.rb +70 -0
data/lib/karafka/pro/processing/filters/expirer.rb +51 -0
data/lib/karafka/pro/processing/filters/throttler.rb +84 -0
data/lib/karafka/pro/processing/filters_applier.rb +100 -0
data/lib/karafka/pro/processing/jobs_builder.rb +7 -3
data/lib/karafka/pro/processing/scheduler.rb +24 -7
data/lib/karafka/pro/processing/strategies/aj/dlq_ftr_lrj_mom.rb +68 -0
data/lib/karafka/pro/processing/strategies/aj/dlq_ftr_lrj_mom_vp.rb +74 -0
data/lib/karafka/pro/processing/strategies/aj/dlq_ftr_mom.rb +72 -0
data/lib/karafka/pro/processing/strategies/aj/dlq_ftr_mom_vp.rb +76 -0
data/lib/karafka/pro/processing/strategies/aj/dlq_lrj_mom.rb +62 -0
data/lib/karafka/pro/processing/strategies/aj/dlq_lrj_mom_vp.rb +68 -0
data/lib/karafka/pro/processing/strategies/aj/dlq_mom.rb +64 -0
data/lib/karafka/pro/processing/strategies/aj/dlq_mom_vp.rb +69 -0
data/lib/karafka/pro/processing/strategies/aj/ftr_lrj_mom.rb +38 -0
data/lib/karafka/pro/processing/strategies/aj/ftr_lrj_mom_vp.rb +64 -0
data/lib/karafka/pro/processing/strategies/aj/ftr_mom.rb +38 -0
data/lib/karafka/pro/processing/strategies/aj/ftr_mom_vp.rb +58 -0
data/lib/karafka/pro/processing/strategies/{dlq_lrj_vp.rb → aj/lrj_mom.rb} +14 -13
data/lib/karafka/pro/processing/strategies/aj/lrj_mom_vp.rb +77 -0
data/lib/karafka/pro/processing/strategies/aj/mom.rb +36 -0
data/lib/karafka/pro/processing/strategies/aj/mom_vp.rb +52 -0
data/lib/karafka/pro/processing/strategies/dlq/default.rb +131 -0
data/lib/karafka/pro/processing/strategies/dlq/ftr.rb +61 -0
data/lib/karafka/pro/processing/strategies/dlq/ftr_lrj.rb +75 -0
data/lib/karafka/pro/processing/strategies/dlq/ftr_lrj_mom.rb +74 -0
data/lib/karafka/pro/processing/strategies/{mom.rb → dlq/ftr_lrj_vp.rb} +16 -19
data/lib/karafka/pro/processing/strategies/dlq/ftr_mom.rb +73 -0
data/lib/karafka/pro/processing/strategies/dlq/ftr_vp.rb +39 -0
data/lib/karafka/pro/processing/strategies/dlq/lrj.rb +63 -0
data/lib/karafka/pro/processing/strategies/dlq/lrj_mom.rb +66 -0
data/lib/karafka/pro/processing/strategies/dlq/lrj_vp.rb +38 -0
data/lib/karafka/pro/processing/strategies/dlq/mom.rb +67 -0
data/lib/karafka/pro/processing/strategies/dlq/vp.rb +39 -0
data/lib/karafka/pro/processing/strategies/ftr/default.rb +104 -0
data/lib/karafka/pro/processing/strategies/ftr/vp.rb +40 -0
data/lib/karafka/pro/processing/strategies/lrj/default.rb +85 -0
data/lib/karafka/pro/processing/strategies/lrj/ftr.rb +69 -0
data/lib/karafka/pro/processing/strategies/lrj/ftr_mom.rb +67 -0
data/lib/karafka/pro/processing/strategies/{vp.rb → lrj/ftr_vp.rb} +15 -13
data/lib/karafka/pro/processing/strategies/lrj/mom.rb +78 -0
data/lib/karafka/pro/processing/strategies/{aj_lrj_mom.rb → lrj/vp.rb} +13 -12
data/lib/karafka/pro/processing/strategies/mom/default.rb +46 -0
data/lib/karafka/pro/processing/strategies/mom/ftr.rb +53 -0
data/lib/karafka/pro/processing/strategies/vp/default.rb +53 -0
data/lib/karafka/pro/processing/{strategies/lrj_vp.rb → strategies.rb} +1 -13
data/lib/karafka/pro/processing/strategy_selector.rb +44 -18
data/lib/karafka/pro/{processing/strategies/aj_mom.rb → routing/features/delaying/config.rb} +7 -13
data/lib/karafka/pro/routing/features/delaying/contract.rb +38 -0
data/lib/karafka/pro/routing/features/delaying/topic.rb +59 -0
data/lib/karafka/pro/routing/features/delaying.rb +29 -0
data/lib/karafka/pro/routing/features/expiring/config.rb +27 -0
data/lib/karafka/pro/routing/features/expiring/contract.rb +38 -0
data/lib/karafka/pro/routing/features/expiring/topic.rb +59 -0
data/lib/karafka/pro/routing/features/expiring.rb +27 -0
data/lib/karafka/pro/routing/features/filtering/config.rb +40 -0
data/lib/karafka/pro/routing/features/filtering/contract.rb +41 -0
data/lib/karafka/pro/routing/features/filtering/topic.rb +51 -0
data/lib/karafka/pro/routing/features/filtering.rb +27 -0
data/lib/karafka/pro/routing/features/long_running_job/contract.rb +1 -1
data/lib/karafka/pro/routing/features/throttling/config.rb +32 -0
data/lib/karafka/pro/routing/features/throttling/contract.rb +41 -0
data/lib/karafka/pro/routing/features/throttling/topic.rb +69 -0
data/lib/karafka/pro/routing/features/throttling.rb +30 -0
data/lib/karafka/processing/coordinator.rb +60 -30
data/lib/karafka/processing/coordinators_buffer.rb +5 -1
data/lib/karafka/processing/executor.rb +23 -16
data/lib/karafka/processing/executors_buffer.rb +10 -26
data/lib/karafka/processing/jobs/consume.rb +2 -4
data/lib/karafka/processing/jobs/idle.rb +24 -0
data/lib/karafka/processing/jobs_builder.rb +2 -3
data/lib/karafka/processing/result.rb +5 -0
data/lib/karafka/processing/strategies/aj_dlq_mom.rb +1 -1
data/lib/karafka/processing/strategies/base.rb +5 -0
data/lib/karafka/processing/strategies/default.rb +50 -0
data/lib/karafka/processing/strategies/dlq.rb +13 -4
data/lib/karafka/processing/strategies/dlq_mom.rb +8 -3
data/lib/karafka/processing/strategy_selector.rb +27 -10
data/lib/karafka/version.rb +1 -1
data/renovate.json +6 -0
data.tar.gz.sig +0 -0
metadata +66 -22
metadata.gz.sig +0 -0
data/lib/karafka/pro/processing/strategies/aj_dlq_lrj_mom.rb +0 -42
data/lib/karafka/pro/processing/strategies/aj_dlq_lrj_mom_vp.rb +0 -70
data/lib/karafka/pro/processing/strategies/aj_dlq_mom.rb +0 -62
data/lib/karafka/pro/processing/strategies/aj_dlq_mom_vp.rb +0 -68
data/lib/karafka/pro/processing/strategies/aj_lrj_mom_vp.rb +0 -75
data/lib/karafka/pro/processing/strategies/aj_mom_vp.rb +0 -62
data/lib/karafka/pro/processing/strategies/dlq.rb +0 -120
data/lib/karafka/pro/processing/strategies/dlq_lrj.rb +0 -65
data/lib/karafka/pro/processing/strategies/dlq_lrj_mom.rb +0 -62
data/lib/karafka/pro/processing/strategies/dlq_mom.rb +0 -62
data/lib/karafka/pro/processing/strategies/dlq_vp.rb +0 -37
data/lib/karafka/pro/processing/strategies/lrj.rb +0 -83
data/lib/karafka/pro/processing/strategies/lrj_mom.rb +0 -73

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: bab9c1d7bc952b4ecbfc4fad794d7e7c861cd3a332cc5d9058cef6c0bd9b57cb
-  data.tar.gz: 7662bd8dc5748d9112f3c72b2912619534e45750188f78df2f69a7e6ae1f9c31
+  metadata.gz: 12fe8a47dc0ab16b0f7783424cd1aa043c2d2b228b4f4164f1cecefe604269d9
+  data.tar.gz: 9fa3bae282770dd67503c41ef4b73a27a38bfcff3bf472ddd63753d14d03614f
 SHA512:
-  metadata.gz: 9a99a84d538a74bd27d5a0f585a12dbbe67eb76ab63cc1a0984cbe1562f230070ad482418f85393a3a479e81534a0957a0863c91f3a7f5b6433f74efd317c79e
-  data.tar.gz: 7da6129cd795f65d821bae897864648e4a5e37c0d07e8745f110f0d03a23d688d7717ab6c42652c4660fe3be3d26ee7051a8c2c66c1fcb62834a1c4159bd4ac4
+  metadata.gz: 9e6536c90a411a0b42337f73c00d9f454028366f42eabb1b7f40902181bcbcfd43258741d6fc51c6e29046b9ee1f8598755440d28a00ca96104a61a8095c20c2
+  data.tar.gz: be75dd1bfa744187f770f2e1f0deeedfba4f3fb1b824d6bab91f056f96e60a33498429e35ea22841404def0935e584db0df4289d2818631ace2e597d28785960

checksums.yaml.gz.sig CHANGED Viewed

Binary file

data/.github/workflows/ci.yml CHANGED Viewed

@@ -62,7 +62,7 @@ jobs:
         run: \curl -sSL https://api.coditsu.io/run/ci | bash
   specs:
-    timeout-minutes: 30
+    timeout-minutes: 45
     runs-on: ubuntu-latest
     needs: diffend
     strategy:

data/.ruby-version CHANGED Viewed

	@@ -1 +1 @@
1	- 3.2.1
1	+ 3.2.2

data/CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,39 @@
 # Karafka framework changelog
+## 2.0.39 (2023-04-11)
+- **[Feature]** Provide ability to throttle/limit number of messages processed in a time unit (#1203)
+- **[Feature]** Provide Delayed Topics (#1000)
+- **[Feature]** Provide ability to expire messages (expiring topics)
+- **[Feature]** Provide ability to apply filters after messages are polled and before enqueued. This is a generic filter API for any usage.
+- [Improvement] When using ActiveJob with Virtual Partitions, Karafka will stop if collectively VPs are failing. This minimizes number of jobs that will be collectively re-processed.
+- [Improvement] `#retrying?` method has been added to consumers to provide ability to check, that we're reprocessing data after a failure. This is useful for branching out processing based on errors.
+- [Improvement] Track active_job_id in instrumentation (#1372)
+- [Improvement] Introduce new housekeeping job type called `Idle` for non-consumption execution flows.
+- [Improvement] Change how a manual offset management works with Long-Running Jobs. Use the last message offset to move forward instead of relying on the last message marked as consumed for a scenario where no message is marked.
+- [Improvement] Prioritize in Pro non-consumption jobs execution over consumption despite LJF. This will ensure, that housekeeping as well as other non-consumption events are not saturated when running a lot of work.
+- [Improvement] Normalize the DLQ behaviour with MoM. Always pause on dispatch for all the strategies.
+- [Improvement] Improve the manual offset management and DLQ behaviour when no markings occur for OSS.
+- [Improvement] Do not early stop ActiveJob work running under virtual partitions to prevent extensive reprocessing.
+- [Improvement] Drastically increase number of scenarios covered by integration specs (OSS and Pro).
+- [Improvement] Introduce a `Coordinator#synchronize` lock for cross virtual partitions operations.
+- [Fix] Do not resume partition that is not paused.
+- [Fix] Fix `LoggerListener` cases where logs would not include caller id (when available)
+- [Fix] Fix not working benchmark tests.
+- [Fix] Fix a case where when using manual offset management with a user pause would ignore the pause and seek to the next message.
+- [Fix] Fix a case where dead letter queue would go into an infinite loop on message with first ever offset if the first ever offset would not recover.
+- [Fix] Make sure to resume always for all LRJ strategies on revocation.
+- [Refactor] Make sure that coordinator is topic aware. Needed for throttling, delayed processing and expired jobs.
+- [Refactor] Put Pro strategies into namespaces to better organize multiple combinations.
+- [Refactor] Do not rely on messages metadata for internal topic and partition operations like `#seek` so they can run independently from the consumption flow.
+- [Refactor] Hold a single topic/partition reference on a coordinator instead of in executor, coordinator and consumer.
+- [Refactor] Move `#mark_as_consumed` and `#mark_as_consumed!`into `Strategies::Default` to be able to introduce marking for virtual partitions.
+## 2.0.38 (2023-03-27)
+- [Improvement] Introduce `Karafka::Admin#read_watermark_offsets` to get low and high watermark offsets values.
+- [Improvement] Track active_job_id in instrumentation (#1372)
+- [Improvement] Improve `#read_topic` reading in case of a compacted partition where the offset is below the low watermark offset. This should optimize reading and should not go beyond the low watermark offset.
+- [Improvement] Allow `#read_topic` to accept instance settings to overwrite any settings needed to customize reading behaviours.
 ## 2.0.37 (2023-03-20)
 - [Fix] Declarative topics execution on a secondary cluster run topics creation on the primary one (#1365)
 - [Fix]  Admin read operations commit offset when not needed (#1369)

data/Gemfile.lock CHANGED Viewed

@@ -1,7 +1,7 @@
 PATH
   remote: .
   specs:
-    karafka (2.0.37)
+    karafka (2.0.39)
       karafka-core (>= 2.0.12, < 3.0.0)
       thor (>= 0.20)
       waterdrop (>= 2.4.10, < 3.0.0)
@@ -10,10 +10,10 @@ PATH
 GEM
   remote: https://rubygems.org/
   specs:
-    activejob (7.0.4.2)
-      activesupport (= 7.0.4.2)
+    activejob (7.0.4.3)
+      activesupport (= 7.0.4.3)
       globalid (>= 0.3.6)
-    activesupport (7.0.4.2)
+    activesupport (7.0.4.3)
       concurrent-ruby (~> 1.0, >= 1.0.2)
       i18n (>= 1.6, < 2)
       minitest (>= 5.1)
@@ -48,7 +48,7 @@ GEM
     rspec-expectations (3.12.2)
       diff-lcs (>= 1.2.0, < 2.0)
       rspec-support (~> 3.12.0)
-    rspec-mocks (3.12.3)
+    rspec-mocks (3.12.5)
       diff-lcs (>= 1.2.0, < 2.0)
       rspec-support (~> 3.12.0)
     rspec-support (3.12.0)
@@ -61,7 +61,7 @@ GEM
     thor (1.2.1)
     tzinfo (2.0.6)
       concurrent-ruby (~> 1.0)
-    waterdrop (2.5.0)
+    waterdrop (2.5.1)
       karafka-core (>= 2.0.12, < 3.0.0)
       zeitwerk (~> 2.3)
     zeitwerk (2.6.7)
@@ -79,4 +79,4 @@ DEPENDENCIES
   simplecov
 BUNDLED WITH
-   2.4.7
+   2.4.10

data/README.md CHANGED Viewed

@@ -86,7 +86,7 @@ bundle exec karafka server
 I also sell Karafka Pro subscriptions. It includes a commercial-friendly license, priority support, architecture consultations, enhanced Web UI and high throughput data processing-related features (virtual partitions, long-running jobs, and more).
-**20%** of the income will be distributed back to other OSS projects that Karafka uses under the hood.
+**10%** of the income will be distributed back to other OSS projects that Karafka uses under the hood.
 Help me provide high-quality open-source software. Please see the Karafka [homepage](https://karafka.io/#become-pro) for more details.

data/bin/integrations CHANGED Viewed

@@ -25,7 +25,7 @@ ROOT_PATH = Pathname.new(File.expand_path(File.join(File.dirname(__FILE__), '../
 # we limit it. Locally we can run a lot of those, as many of them have sleeps and do not use a lot
 # of CPU. Locally we also cannot go beyond certain limit due to how often and how many topics we
 # create in Kafka. With an overloaded system, we start getting timeouts.
-CONCURRENCY = ENV.key?('CI') ? 4 : Etc.nprocessors * 2
+CONCURRENCY = ENV.key?('CI') ? 5 : Etc.nprocessors * 3
 # How may bytes do we want to keep from the stdout in the buffer for when we need to print it
 MAX_BUFFER_OUTPUT = 51_200

data/config/locales/errors.yml CHANGED Viewed

@@ -72,10 +72,3 @@ en:
     test:
       missing: needs to be present
       id_format: needs to be a String
-    pro_topic:
-      virtual_partitions.partitioner_respond_to_call: needs to be defined and needs to respond to `#call`
-      virtual_partitions.max_partitions_format: needs to be equal or more than 1
-      manual_offset_management_not_with_virtual_partitions: cannot be used together with Virtual Partitions
-      long_running_job.active_format: needs to be either true or false
-      dead_letter_queue_not_with_virtual_partitions: cannot be used together with Virtual Partitions

data/config/locales/pro_errors.yml CHANGED Viewed

@@ -3,10 +3,28 @@ en:
     topic:
       virtual_partitions.partitioner_respond_to_call: needs to be defined and needs to respond to `#call`
       virtual_partitions.max_partitions_format: needs to be equal or more than 1
       manual_offset_management_not_with_virtual_partitions: cannot be used together with Virtual Partitions
       long_running_job.active_format: needs to be either true or false
       dead_letter_queue_with_virtual_partitions: when using Dead Letter Queue with Virtual Partitions, at least one retry is required.
+      throttling.active_format: needs to be either true or false
+      throttling.limit_format: needs to be equal or more than 1
+      throttling.interval_format: needs to be equal or more than 1
+      filtering.active_missing: needs to be present
+      filtering.factory_format: 'needs to respond to #call'
+      filtering.factories_format: 'needs to contain only factories responding to #call'
+      filtering.active_format: 'needs to be boolean'
+      expiring.ttl_format: 'needs to be equal or more than 0 and an integer'
+      expiring.active_format: 'needs to be boolean'
+      delaying.delay_format: 'needs to be equal or more than 0 and an integer'
+      delaying.active_format: 'needs to be boolean'
     config:
       encryption.active_format: 'needs to be either true or false'
       encryption.public_key_invalid: 'is not a valid public RSA key'

data/lib/karafka/active_job/consumer.rb CHANGED Viewed

@@ -12,16 +12,31 @@ module Karafka
         messages.each do |message|
           break if Karafka::App.stopping?
-          # We technically speaking could set this as deserializer and reference it from the
-          # message instead of using the `#raw_payload`. This is not done on purpose to simplify
-          # the ActiveJob setup here
-          job = ::ActiveSupport::JSON.decode(message.raw_payload)
+          consume_job(message)
-          tags.add(:job_class, job['job_class'])
+          mark_as_consumed(message)
+        end
+      end
-          ::ActiveJob::Base.execute(job)
+      private
-          mark_as_consumed(message)
+      # Consumes a message with the job and runs needed instrumentation
+      #
+      # @param job_message [Karafka::Messages::Message] message with active job
+      def consume_job(job_message)
+        # We technically speaking could set this as deserializer and reference it from the
+        # message instead of using the `#raw_payload`. This is not done on purpose to simplify
+        # the ActiveJob setup here
+        job = ::ActiveSupport::JSON.decode(job_message.raw_payload)
+        tags.add(:job_class, job['job_class'])
+        payload = { caller: self, job: job, message: job_message }
+        # We publish both to make it consistent with `consumer.x` events
+        Karafka.monitor.instrument('active_job.consume', payload)
+        Karafka.monitor.instrument('active_job.consumed', payload) do
+          ::ActiveJob::Base.execute(job)
         end
       end
     end

data/lib/karafka/admin.rb CHANGED Viewed

@@ -44,17 +44,32 @@ module Karafka
       # @param count [Integer] how many messages we want to get at most
       # @param start_offset [Integer] offset from which we should start. If -1 is provided
       #   (default) we will start from the latest offset
+      # @param settings [Hash] kafka extra settings (optional)
       #
       # @return [Array<Karafka::Messages::Message>] array with messages
-      def read_topic(name, partition, count, start_offset = -1)
+      def read_topic(name, partition, count, start_offset = -1, settings = {})
         messages = []
         tpl = Rdkafka::Consumer::TopicPartitionList.new
+        low_offset, high_offset = nil
-        with_consumer do |consumer|
-          offsets = consumer.query_watermark_offsets(name, partition)
-          end_offset = offsets.last
+        with_consumer(settings) do |consumer|
+          low_offset, high_offset = consumer.query_watermark_offsets(name, partition)
+          # Select offset dynamically if -1 or less
+          start_offset = high_offset - count if start_offset.negative?
-          start_offset = [0, offsets.last - count].max if start_offset.negative?
+          # Build the requested range - since first element is on the start offset we need to
+          # subtract one from requested count to end up with expected number of elements
+          requested_range = (start_offset..start_offset + (count - 1))
+          # Establish theoretical available range. Note, that this does not handle cases related to
+          # log retention or compaction
+          available_range = (low_offset..high_offset)
+          # Select only offset that we can select. This will remove all the potential offsets that
+          # are below the low watermark offset
+          possible_range = requested_range.select { |offset| available_range.include?(offset) }
+          start_offset = possible_range.first
+          count = possible_range.count
           tpl.add_topic_and_partitions_with_offsets(name, partition => start_offset)
           consumer.assign(tpl)
@@ -64,11 +79,15 @@ module Karafka
           loop do
             # If we've got as many messages as we've wanted stop
             break if messages.size >= count
-            # If we've reached end of the topic messages, don't process more
-            break if !messages.empty? && end_offset <= messages.last.offset
             message = consumer.poll(200)
-            messages << message if message
+            next unless message
+            # If the message we've got is beyond the requested range, stop
+            break unless possible_range.include?(message.offset)
+            messages << message
           rescue Rdkafka::RdkafkaError => e
             # End of partition
             break if e.code == :partition_eof
@@ -77,7 +96,7 @@ module Karafka
           end
         end
-        messages.map do |message|
+        messages.map! do |message|
           Messages::Builders::Message.call(
             message,
             # Use topic from routes if we can match it or create a dummy one
@@ -136,6 +155,17 @@ module Karafka
         end
       end
+      # Fetches the watermark offsets for a given topic partition
+      #
+      # @param name [String, Symbol] topic name
+      # @param partition [Integer] partition
+      # @return [Array<Integer, Integer>] low watermark offset and high watermark offset
+      def read_watermark_offsets(name, partition)
+        with_consumer do |consumer|
+          consumer.query_watermark_offsets(name, partition)
+        end
+      end
       # @return [Rdkafka::Metadata] cluster metadata info
       def cluster_info
         with_admin do |admin|
@@ -159,15 +189,16 @@ module Karafka
       # Creates admin instance and yields it. After usage it closes the admin instance
       def with_admin
-        admin = config(:producer).admin
+        admin = config(:producer, {}).admin
         yield(admin)
       ensure
         admin&.close
       end
       # Creates consumer instance and yields it. After usage it closes the consumer instance
-      def with_consumer
-        consumer = config(:consumer).consumer
+      # @param settings [Hash] extra settings to customize consumer
+      def with_consumer(settings = {})
+        consumer = config(:consumer, settings).consumer
         yield(consumer)
       ensure
         consumer&.close
@@ -196,11 +227,12 @@ module Karafka
       end
       # @param type [Symbol] type of config we want
+      # @param settings [Hash] extra settings for config (if needed)
       # @return [::Rdkafka::Config] rdkafka config
-      def config(type)
+      def config(type, settings)
         config_hash = Karafka::Setup::AttributesMap.public_send(
           type,
-          Karafka::App.config.kafka.dup.merge(CONFIG_DEFAULTS)
+          Karafka::App.config.kafka.dup.merge(CONFIG_DEFAULTS).merge!(settings)
         )
         ::Rdkafka::Config.new(config_hash)

data/lib/karafka/base_consumer.rb CHANGED Viewed

@@ -7,11 +7,13 @@ module Karafka
     # Allow for consumer instance tagging for instrumentation
     include ::Karafka::Core::Taggable
+    extend Forwardable
+    def_delegators :@coordinator, :topic, :partition
     # @return [String] id of the current consumer
     attr_reader :id
     # @return [Karafka::Routing::Topic] topic to which a given consumer is subscribed
-    attr_accessor :topic
-    # @return [Karafka::Messages::Messages] current messages batch
     attr_accessor :messages
     # @return [Karafka::Connection::Client] kafka connection client
     attr_accessor :client
@@ -97,6 +99,20 @@ module Karafka
       )
     end
+    # Trigger method for running on idle runs without messages
+    #
+    # @private
+    def on_idle
+      handle_idle
+    rescue StandardError => e
+      Karafka.monitor.instrument(
+        'error.occurred',
+        error: e,
+        caller: self,
+        type: 'consumer.idle.error'
+      )
+    end
     # Trigger method for running on partition revocation.
     #
     # @private
@@ -143,51 +159,6 @@ module Karafka
     # some teardown procedures (closing file handler, etc).
     def shutdown; end
-    # Marks message as consumed in an async way.
-    #
-    # @param message [Messages::Message] last successfully processed message.
-    # @return [Boolean] true if we were able to mark the offset, false otherwise. False indicates
-    #   that we were not able and that we have lost the partition.
-    #
-    # @note We keep track of this offset in case we would mark as consumed and got error when
-    #   processing another message. In case like this we do not pause on the message we've already
-    #   processed but rather at the next one. This applies to both sync and async versions of this
-    #   method.
-    def mark_as_consumed(message)
-      # Ignore earlier offsets than the one we alread committed
-      return true if coordinator.seek_offset > message.offset
-      unless client.mark_as_consumed(message)
-        coordinator.revoke
-        return false
-      end
-      coordinator.seek_offset = message.offset + 1
-      true
-    end
-    # Marks message as consumed in a sync way.
-    #
-    # @param message [Messages::Message] last successfully processed message.
-    # @return [Boolean] true if we were able to mark the offset, false otherwise. False indicates
-    #   that we were not able and that we have lost the partition.
-    def mark_as_consumed!(message)
-      # Ignore earlier offsets than the one we alread committed
-      return true if coordinator.seek_offset > message.offset
-      unless client.mark_as_consumed!(message)
-        coordinator.revoke
-        return false
-      end
-      coordinator.seek_offset = message.offset + 1
-      true
-    end
     # Pauses processing on a given offset for the current topic partition
     #
     # After given partition is resumed, it will continue processing from the given offset
@@ -201,8 +172,8 @@ module Karafka
       timeout ? coordinator.pause_tracker.pause(timeout) : coordinator.pause_tracker.pause
       client.pause(
-        messages.metadata.topic,
-        messages.metadata.partition,
+        topic.name,
+        partition,
         offset
       )
@@ -213,8 +184,8 @@ module Karafka
         'consumer.consuming.pause',
         caller: self,
         manual: manual_pause,
-        topic: messages.metadata.topic,
-        partition: messages.metadata.partition,
+        topic: topic.name,
+        partition: partition,
         offset: offset,
         timeout: coordinator.pause_tracker.current_timeout,
         attempt: coordinator.pause_tracker.attempt
@@ -223,6 +194,8 @@ module Karafka
     # Resumes processing of the current topic partition
     def resume
+      return unless coordinator.pause_tracker.paused?
       # This is sufficient to expire a partition pause, as with it will be resumed by the listener
       # thread before the next poll.
       coordinator.pause_tracker.expire
@@ -234,8 +207,8 @@ module Karafka
     def seek(offset)
       client.seek(
         Karafka::Messages::Seek.new(
-          messages.metadata.topic,
-          messages.metadata.partition,
+          topic.name,
+          partition,
           offset
         )
       )
@@ -248,6 +221,13 @@ module Karafka
       coordinator.revoked?
     end
+    # @return [Boolean] are we retrying processing after an error. This can be used to provide a
+    #   different flow after there is an error, for example for resources cleanup, small manual
+    #   backoff or different instrumentation tracking.
+    def retrying?
+      coordinator.pause_tracker.attempt.positive?
+    end
     # Pauses the processing from the last offset to retry on given message
     # @private
     def retry_after_pause
@@ -258,8 +238,8 @@ module Karafka
       Karafka.monitor.instrument(
         'consumer.consuming.retry',
         caller: self,
-        topic: messages.metadata.topic,
-        partition: messages.metadata.partition,
+        topic: topic.name,
+        partition: partition,
         offset: coordinator.seek_offset,
         timeout: coordinator.pause_tracker.current_timeout,
         attempt: coordinator.pause_tracker.attempt

data/lib/karafka/connection/listener.rb CHANGED Viewed

@@ -25,7 +25,7 @@ module Karafka
         @consumer_group_coordinator = consumer_group_coordinator
         @subscription_group = subscription_group
         @jobs_queue = jobs_queue
-        @coordinators = Processing::CoordinatorsBuffer.new
+        @coordinators = Processing::CoordinatorsBuffer.new(subscription_group.topics)
         @client = Client.new(@subscription_group)
         @executors = Processing::ExecutorsBuffer.new(@client, subscription_group)
         @jobs_builder = proc_config.jobs_builder
@@ -234,7 +234,7 @@ module Karafka
       def build_and_schedule_shutdown_jobs
         jobs = []
-        @executors.each do |_, _, executor|
+        @executors.each do |executor|
           job = @jobs_builder.shutdown(executor)
           job.before_enqueue
           jobs << job
@@ -263,20 +263,25 @@ module Karafka
         @messages_buffer.each do |topic, partition, messages|
           coordinator = @coordinators.find_or_create(topic, partition)
           # Start work coordination for this topic partition
           coordinator.start(messages)
-          @partitioner.call(topic, messages, coordinator) do |group_id, partition_messages|
-            # Count the job we're going to create here
-            coordinator.increment
-            executor = @executors.find_or_create(topic, partition, group_id)
-            job = @jobs_builder.consume(executor, partition_messages, coordinator)
-            job.before_enqueue
-            jobs << job
+          # We do not increment coordinator for idle job because it's not a user related one
+          # and it will not go through a standard lifecycle. Same applies to revoked and shutdown
+          if messages.empty?
+            executor = @executors.find_or_create(topic, partition, 0, coordinator)
+            jobs << @jobs_builder.idle(executor)
+          else
+            @partitioner.call(topic, messages, coordinator) do |group_id, partition_messages|
+              executor = @executors.find_or_create(topic, partition, group_id, coordinator)
+              coordinator.increment
+              jobs << @jobs_builder.consume(executor, partition_messages)
+            end
           end
         end
+        jobs.each(&:before_enqueue)
         @scheduler.schedule_consumption(@jobs_queue, jobs)
       end

data/lib/karafka/errors.rb CHANGED Viewed

@@ -46,8 +46,5 @@ module Karafka
     # This should never happen. Please open an issue if it does.
     StrategyNotFoundError = Class.new(BaseError)
-    # This should never happen. Please open an issue if it does.
-    SkipMessageNotFoundError = Class.new(BaseError)
   end
 end

data/lib/karafka/instrumentation/logger_listener.rb CHANGED Viewed

@@ -170,13 +170,51 @@ module Karafka
       #
       # @param event [Karafka::Core::Monitoring::Event] event details including payload
       def on_dead_letter_queue_dispatched(event)
+        consumer = event[:caller]
+        topic = consumer.topic.name
         message = event[:message]
         offset = message.offset
-        topic = event[:caller].topic.name
-        dlq_topic = event[:caller].topic.dead_letter_queue.topic
+        dlq_topic = consumer.topic.dead_letter_queue.topic
         partition = message.partition
-        info "Dispatched message #{offset} from #{topic}/#{partition} to DLQ topic: #{dlq_topic}"
+        info <<~MSG.tr("\n", ' ').strip!
+          [#{consumer.id}] Dispatched message #{offset}
+          from #{topic}/#{partition}
+          to DLQ topic: #{dlq_topic}
+        MSG
+      end
+      # Logs info about throttling event
+      #
+      # @param event [Karafka::Core::Monitoring::Event] event details including payload
+      def on_filtering_throttled(event)
+        consumer = event[:caller]
+        topic = consumer.topic.name
+        # Here we get last message before throttle
+        message = event[:message]
+        partition = message.partition
+        offset = message.offset
+        info <<~MSG.tr("\n", ' ').strip!
+          [#{consumer.id}] Throttled and will resume
+          from message #{offset}
+          on #{topic}/#{partition}
+        MSG
+      end
+      # @param event [Karafka::Core::Monitoring::Event] event details including payload
+      def on_filtering_seek(event)
+        consumer = event[:caller]
+        topic = consumer.topic.name
+        # Message to which we seek
+        message = event[:message]
+        partition = message.partition
+        offset = message.offset
+        info <<~MSG.tr("\n", ' ').strip!
+          [#{consumer.id}] Post-filtering seeking to message #{offset}
+          on #{topic}/#{partition}
+        MSG
       end
       # There are many types of errors that can occur in many places, but we provide a single
@@ -203,6 +241,9 @@ module Karafka
         when 'consumer.after_consume.error'
           error "Consumer after consume failed due to an error: #{error}"
           error details
+        when 'consumer.idle.error'
+          error "Consumer idle failed due to an error: #{error}"
+          error details
         when 'consumer.shutdown.error'
           error "Consumer on shutdown failed due to an error: #{error}"
           error details

data/lib/karafka/instrumentation/notifications.rb CHANGED Viewed

@@ -17,6 +17,9 @@ module Karafka
       #   complete list of all the events. Please use the #available_events on fully loaded
       #   Karafka system to determine all of the events you can use.
       EVENTS = %w[
+        active_job.consume
+        active_job.consumed
         app.initialized
         app.running
         app.quieting
@@ -36,6 +39,7 @@ module Karafka
         consumer.consumed
         consumer.consuming.pause
         consumer.consuming.retry
+        consumer.idle
         consumer.revoke
         consumer.revoked
         consumer.shutting_down
@@ -43,6 +47,9 @@ module Karafka
         dead_letter_queue.dispatched
+        filtering.throttled
+        filtering.seek
         process.notice_signal
         statistics.emitted