RubyGems - karafka - Versions diffs - 2.0.39 → 2.0.41 - Mend

karafka 2.0.39 → 2.0.41

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

checksums.yaml +4 -4
checksums.yaml.gz.sig +0 -0
data/CHANGELOG.md +16 -0
data/Gemfile +3 -2
data/Gemfile.lock +13 -2
data/bin/integrations +17 -2
data/lib/karafka/admin.rb +17 -13
data/lib/karafka/connection/client.rb +5 -2
data/lib/karafka/instrumentation/callbacks/statistics.rb +12 -0
data/lib/karafka/instrumentation/logger_listener.rb +16 -5
data/lib/karafka/messages/builders/batch_metadata.rb +6 -5
data/lib/karafka/messages/builders/messages.rb +3 -1
data/lib/karafka/messages/messages.rb +5 -0
data/lib/karafka/pro/iterator.rb +253 -0
data/lib/karafka/pro/processing/strategies/dlq/vp.rb +2 -1
data/lib/karafka/processing/executor.rb +15 -6
data/lib/karafka/routing/router.rb +15 -0
data/lib/karafka/setup/config.rb +7 -1
data/lib/karafka/version.rb +1 -1
data.tar.gz.sig +0 -0
metadata +3 -2
metadata.gz.sig +0 -0

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 12fe8a47dc0ab16b0f7783424cd1aa043c2d2b228b4f4164f1cecefe604269d9
-  data.tar.gz: 9fa3bae282770dd67503c41ef4b73a27a38bfcff3bf472ddd63753d14d03614f
+  metadata.gz: 9560b22fc8cfd59dcaeb6551bcc0b2d2ebfef2f162a12905fa3aefe0c9c5865e
+  data.tar.gz: a5b7aba125288ec96cf3b862e72447bf467fe23f923c232fe1d3ff9c48b1fdb4
 SHA512:
-  metadata.gz: 9e6536c90a411a0b42337f73c00d9f454028366f42eabb1b7f40902181bcbcfd43258741d6fc51c6e29046b9ee1f8598755440d28a00ca96104a61a8095c20c2
-  data.tar.gz: be75dd1bfa744187f770f2e1f0deeedfba4f3fb1b824d6bab91f056f96e60a33498429e35ea22841404def0935e584db0df4289d2818631ace2e597d28785960
+  metadata.gz: d6581af85f8900d2d5ce91b6f9ec8ed0e1f6be5f3e80c36315c44c8dc07c30949566e281f40feb1b54cc9bbca771ac2188637e916d19edcb2fe26c04aeb362e1
+  data.tar.gz: e467612b3185b5ec764d387e72507b617bf49d702436da2021d467fb0c23630aa98151c9679444a34114317a8c52e3c37f0268c6a6bdb4564ffa1bab51993109

checksums.yaml.gz.sig CHANGED Viewed

Binary file

data/CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,21 @@
 # Karafka framework changelog
+## 2.0.41 (2023-14-19)
+- **[Feature]** Provide `Karafka::Pro::Iterator` for anonymous topic/partitions iterations and messages lookups (#1389 and #1427).
+- [Improvement] Optimize topic lookup for `read_topic` admin method usage.
+- [Improvement] Report via `LoggerListener` information about the partition on which a given job has started and finished.
+- [Improvement] Slightly normalize the `LoggerListener` format. Always report partition related operations as followed: `TOPIC_NAME/PARTITION`.
+- [Improvement] Do not retry recovery from `unknown_topic_or_part` when Karafka is shutting down as there is no point and no risk of any data losses.
+- [Improvement] Report `client.software.name` and `client.software.version` according to `librdkafka` recommendation.
+- [Improvement] Report ten longest integration specs after the suite execution.
+- [Improvement] Prevent user originating errors related to statistics processing after listener loop crash from potentially crashing the listener loop and hanging Karafka process.
+## 2.0.40 (2023-04-13)
+- [Improvement] Introduce `Karafka::Messages::Messages#empty?` method to handle Idle related cases where shutdown or revocation would be called on an empty messages set. This method allows for checking if there are any messages in the messages batch.
+- [Refactor] Require messages builder to accept partition and do not fetch it from messages.
+- [Refactor] Use empty messages set for internal APIs (Idle) (so there always is `Karafka::Messages::Messages`)
+- [Refactor] Allow for empty messages set initialization with -1001 and -1 on metadata (similar to `librdkafka`)
 ## 2.0.39 (2023-04-11)
 - **[Feature]** Provide ability to throttle/limit number of messages processed in a time unit (#1203)
 - **[Feature]** Provide Delayed Topics (#1000)

data/Gemfile CHANGED Viewed

@@ -6,10 +6,11 @@ plugin 'diffend'
 gemspec
-# Karafka gem does not require this but we add it here so we can test the integration with
-# ActiveJob much easier
+# Karafka gem does not require activejob nor karafka-web  to work
+# They are added here because they are part of the integration suite
 group :integrations do
   gem 'activejob'
+  gem 'karafka-web'
 end
 group :test do

data/Gemfile.lock CHANGED Viewed

@@ -1,7 +1,7 @@
 PATH
   remote: .
   specs:
-    karafka (2.0.39)
+    karafka (2.0.41)
       karafka-core (>= 2.0.12, < 3.0.0)
       thor (>= 0.20)
       waterdrop (>= 2.4.10, < 3.0.0)
@@ -22,6 +22,7 @@ GEM
     concurrent-ruby (1.2.2)
     diff-lcs (1.5.0)
     docile (1.4.0)
+    erubi (1.12.0)
     factory_bot (6.2.1)
       activesupport (>= 5.0.0)
     ffi (1.15.5)
@@ -36,9 +37,18 @@ GEM
       ffi (~> 1.15)
       mini_portile2 (~> 2.6)
       rake (> 12)
+    karafka-web (0.5.1)
+      erubi (~> 1.4)
+      karafka (>= 2.0.40, < 3.0.0)
+      karafka-core (>= 2.0.12, < 3.0.0)
+      roda (~> 3.63)
+      tilt (~> 2.0)
     mini_portile2 (2.8.1)
     minitest (5.18.0)
+    rack (3.0.7)
     rake (13.0.6)
+    roda (3.67.0)
+      rack
     rspec (3.12.0)
       rspec-core (~> 3.12.0)
       rspec-expectations (~> 3.12.0)
@@ -59,6 +69,7 @@ GEM
     simplecov-html (0.12.3)
     simplecov_json_formatter (0.1.4)
     thor (1.2.1)
+    tilt (2.1.0)
     tzinfo (2.0.6)
       concurrent-ruby (~> 1.0)
     waterdrop (2.5.1)
@@ -67,7 +78,6 @@ GEM
     zeitwerk (2.6.7)
 PLATFORMS
-  arm64-darwin-21
   x86_64-linux
 DEPENDENCIES
@@ -75,6 +85,7 @@ DEPENDENCIES
   byebug
   factory_bot
   karafka!
+  karafka-web
   rspec
   simplecov

data/bin/integrations CHANGED Viewed

@@ -152,8 +152,14 @@ class Scenario
     end
   end
+  # @return [Float] number of seconds that a given spec took to run
+  def time_taken
+    @finished_at - @started_at
+  end
   # Close all the files that are open, so they do not pile up
   def close
+    @finished_at = current_time
     @stdin.close
     @stdout.close
     @stderr.close
@@ -262,13 +268,22 @@ while finished_scenarios.size < scenarios.size
   sleep(0.1)
 end
+# Report longest scenarios
+puts
+puts "\nLongest scenarios:\n\n"
+finished_scenarios.sort_by(&:time_taken).reverse.first(10).each do |long_scenario|
+  puts "[#{'%6.2f' % long_scenario.time_taken}] #{long_scenario.name}"
+end
 failed_scenarios = finished_scenarios.reject(&:success?)
-# Report once more on the failed jobs
-# This will only list scenarios that failed without printing their stdout here.
 if failed_scenarios.empty?
   puts
 else
+  # Report once more on the failed jobs
+  # This will only list scenarios that failed without printing their stdout here.
+  puts
   puts "\nFailed scenarios:\n\n"
   failed_scenarios.each do |scenario|

data/lib/karafka/admin.rb CHANGED Viewed

@@ -96,13 +96,15 @@ module Karafka
           end
         end
+        # Use topic from routes if we can match it or create a dummy one
+        # Dummy one is used in case we cannot match the topic with routes. This can happen
+        # when admin API is used to read topics that are not part of the routing
+        topic = ::Karafka::Routing::Router.find_or_initialize_by_name(name)
         messages.map! do |message|
           Messages::Builders::Message.call(
             message,
-            # Use topic from routes if we can match it or create a dummy one
-            # Dummy one is used in case we cannot match the topic with routes. This can happen
-            # when admin API is used to read topics that are not part of the routing
-            Routing::Router.find_by(name: name) || Topic.new(name, App.config.deserializer),
+            topic,
             Time.now
           )
         end
@@ -173,6 +175,17 @@ module Karafka
         end
       end
+      # Creates consumer instance and yields it. After usage it closes the consumer instance
+      # This API can be used in other pieces of code and allows for low-level consumer usage
+      #
+      # @param settings [Hash] extra settings to customize consumer
+      def with_consumer(settings = {})
+        consumer = config(:consumer, settings).consumer
+        yield(consumer)
+      ensure
+        consumer&.close
+      end
       private
       # @return [Array<String>] topics names
@@ -195,15 +208,6 @@ module Karafka
         admin&.close
       end
-      # Creates consumer instance and yields it. After usage it closes the consumer instance
-      # @param settings [Hash] extra settings to customize consumer
-      def with_consumer(settings = {})
-        consumer = config(:consumer, settings).consumer
-        yield(consumer)
-      ensure
-        consumer&.close
-      end
       # There are some cases where rdkafka admin operations finish successfully but without the
       # callback being triggered to materialize the post-promise object. Until this is fixed we
       # can figure out, that operation we wanted to do finished successfully by checking that the

data/lib/karafka/connection/client.rb CHANGED Viewed

@@ -369,6 +369,8 @@ module Karafka
       rescue ::Rdkafka::RdkafkaError => e
         early_report = false
+        retryable = time_poll.attempts <= MAX_POLL_RETRIES && time_poll.retryable?
         # There are retryable issues on which we want to report fast as they are source of
         # problems and can mean some bigger system instabilities
         # Those are mainly network issues and exceeding the max poll interval
@@ -389,9 +391,10 @@ module Karafka
           return nil if @subscription_group.kafka[:'allow.auto.create.topics']
           early_report = true
-        end
-        retryable = time_poll.attempts <= MAX_POLL_RETRIES && time_poll.retryable?
+          # No sense in retrying when no topic/partition and we're no longer running
+          retryable = false unless Karafka::App.running?
+        end
         if early_report || !retryable
           Karafka.monitor.instrument(

data/lib/karafka/instrumentation/callbacks/statistics.rb CHANGED Viewed

@@ -32,6 +32,18 @@ module Karafka
             consumer_group_id: @consumer_group_id,
             statistics: @statistics_decorator.call(statistics)
           )
+        # We need to catch and handle any potential errors coming from the instrumentation pipeline
+        # as otherwise, in case of statistics which run in the main librdkafka thread, any crash
+        # will hang the whole process.
+        rescue StandardError => e
+          ::Karafka.monitor.instrument(
+            'error.occurred',
+            caller: self,
+            subscription_group_id: @subscription_group_id,
+            consumer_group_id: @consumer_group_id,
+            type: 'statistics.emitted.error',
+            error: e
+          )
         end
       end
     end

data/lib/karafka/instrumentation/logger_listener.rb CHANGED Viewed

@@ -48,7 +48,8 @@ module Karafka
         job_type = job.class.to_s.split('::').last
         consumer = job.executor.topic.consumer
         topic = job.executor.topic.name
-        info "[#{job.id}] #{job_type} job for #{consumer} on #{topic} started"
+        partition = job.executor.partition
+        info "[#{job.id}] #{job_type} job for #{consumer} on #{topic}/#{partition} started"
       end
       # Prints info about the fact that a given job has finished
@@ -60,7 +61,11 @@ module Karafka
         job_type = job.class.to_s.split('::').last
         consumer = job.executor.topic.consumer
         topic = job.executor.topic.name
-        info "[#{job.id}] #{job_type} job for #{consumer} on #{topic} finished in #{time}ms"
+        partition = job.executor.partition
+        info <<~MSG.tr("\n", ' ').strip!
+          [#{job.id}] #{job_type} job for #{consumer}
+          on #{topic}/#{partition} finished in #{time}ms
+        MSG
       end
       # Prints info about a consumer pause occurrence. Irrelevant if user or system initiated.
@@ -73,7 +78,7 @@ module Karafka
         client = event[:caller]
         info <<~MSG.tr("\n", ' ').strip!
-          [#{client.id}] Pausing partition #{partition} of topic #{topic} on offset #{offset}
+          [#{client.id}] Pausing on topic #{topic}/#{partition} on offset #{offset}
         MSG
       end
@@ -86,7 +91,7 @@ module Karafka
         client = event[:caller]
         info <<~MSG.tr("\n", ' ').strip!
-          [#{client.id}] Resuming partition #{partition} of topic #{topic}
+          [#{client.id}] Resuming on topic #{topic}/#{partition}
         MSG
       end
@@ -102,7 +107,7 @@ module Karafka
         info <<~MSG.tr("\n", ' ').strip!
           [#{consumer.id}] Retrying of #{consumer.class} after #{timeout} ms
-          on partition #{partition} of topic #{topic} from offset #{offset}
+          on topic #{topic}/#{partition} from offset #{offset}
         MSG
       end
@@ -261,6 +266,12 @@ module Karafka
         when 'librdkafka.error'
           error "librdkafka internal error occurred: #{error}"
           error details
+        # Those can occur when emitted statistics are consumed by the end user and the processing
+        # of statistics fails. The statistics are emitted from librdkafka main loop thread and
+        # any errors there crash the whole thread
+        when 'statistics.emitted.error'
+          error "statistics.emitted processing failed due to an error: #{error}"
+          error details
         # Those will only occur when retries in the client fail and when they did not stop after
         # back-offs
         when 'connection.client.poll.error'

data/lib/karafka/messages/builders/batch_metadata.rb CHANGED Viewed

@@ -10,22 +10,23 @@ module Karafka
           #
           # @param messages [Array<Karafka::Messages::Message>] messages array
           # @param topic [Karafka::Routing::Topic] topic for which we've fetched the batch
+          # @param partition [Integer] partition of this metadata
           # @param scheduled_at [Time] moment when the batch was scheduled for processing
           # @return [Karafka::Messages::BatchMetadata] batch metadata object
           #
           # @note We do not set `processed_at` as this needs to be assigned when the batch is
           #   picked up for processing.
-          def call(messages, topic, scheduled_at)
+          def call(messages, topic, partition, scheduled_at)
             Karafka::Messages::BatchMetadata.new(
               size: messages.count,
-              first_offset: messages.first.offset,
-              last_offset: messages.last.offset,
+              first_offset: messages.first&.offset || -1001,
+              last_offset: messages.last&.offset || -1001,
               deserializer: topic.deserializer,
-              partition: messages.first.partition,
+              partition: partition,
               topic: topic.name,
               # We go with the assumption that the creation of the whole batch is the last message
               # creation time
-              created_at: messages.last.timestamp,
+              created_at: messages.last&.timestamp || nil,
               # When this batch was built and scheduled for execution
               scheduled_at: scheduled_at,
               # This needs to be set to a correct value prior to processing starting

data/lib/karafka/messages/builders/messages.rb CHANGED Viewed

@@ -11,14 +11,16 @@ module Karafka
           #
           # @param messages [Array<Karafka::Messages::Message>] karafka messages array
           # @param topic [Karafka::Routing::Topic] topic for which we're received messages
+          # @param partition [Integer] partition of those messages
           # @param received_at [Time] moment in time when the messages were received
           # @return [Karafka::Messages::Messages] messages batch object
-          def call(messages, topic, received_at)
+          def call(messages, topic, partition, received_at)
             # We cannot freeze the batch metadata because it is altered with the processed_at time
             # prior to the consumption. It is being frozen there
             metadata = BatchMetadata.call(
               messages,
               topic,
+              partition,
               received_at
             )

data/lib/karafka/messages/messages.rb CHANGED Viewed

@@ -40,6 +40,11 @@ module Karafka
         map(&:raw_payload)
       end
+      # @return [Boolean] is the messages batch empty
+      def empty?
+        @messages_array.empty?
+      end
       # @return [Karafka::Messages::Message] first message
       def first
         @messages_array.first

data/lib/karafka/pro/iterator.rb ADDED Viewed

@@ -0,0 +1,253 @@
+# frozen_string_literal: true
+# This Karafka component is a Pro component under a commercial license.
+# This Karafka component is NOT licensed under LGPL.
+#
+# All of the commercial components are present in the lib/karafka/pro directory of this
+# repository and their usage requires commercial license agreement.
+#
+# Karafka has also commercial-friendly license, commercial support and commercial components.
+#
+# By sending a pull request to the pro components, you are agreeing to transfer the copyright of
+# your code to Maciej Mensfeld.
+module Karafka
+  module Pro
+    # Topic iterator allows you to iterate over topic/partition data and perform lookups for
+    # information that you need.
+    #
+    # It supports early stops on finding the requested data and allows for seeking till
+    # the end. It also allows for signaling, when a given message should be last out of certain
+    # partition, but we still want to continue iterating in other messages.
+    #
+    # It does **not** create a consumer group and does not have any offset management.
+    class Iterator
+      # Local partition reference for librdkafka
+      Partition = Struct.new(:partition, :offset)
+      private_constant :Partition
+      # A simple API allowing to iterate over topic/partition data, without having to subscribe
+      # and deal with rebalances. This API allows for multi-partition streaming and is optimized
+      # for data lookups. It allows for explicit stopping iteration over any partition during
+      # the iteration process, allowing for optimized lookups.
+      #
+      # @param topics [Array<String>, Hash] list of strings if we want to subscribe to multiple
+      #   topics and all of their partitions or a hash where keys are the topics and values are
+      #   hashes with partitions and their initial offsets.
+      # @param settings [Hash] extra settings for the consumer. Please keep in mind, that if
+      #   overwritten, you may want to include `auto.offset.reset` to match your case.
+      # @param yield_nil [Boolean] should we yield also `nil` values when poll returns nothing.
+      #   Useful in particular for long-living iterators.
+      #
+      # @note It is worth keeping in mind, that this API also needs to operate within
+      #   `max.poll.interval.ms` limitations on each iteration
+      #
+      # @note In case of a never-ending iterator, you need to set `enable.partition.eof` to `false`
+      #   so we don't stop polling data even when reaching the end (end on a given moment)
+      def initialize(
+        topics,
+        settings: { 'auto.offset.reset': 'beginning' },
+        yield_nil: false
+      )
+        @topics_with_partitions = expand_topics_with_partitions(topics)
+        @routing_topics = @topics_with_partitions.map do |name, _|
+          [name, ::Karafka::Routing::Router.find_or_initialize_by_name(name)]
+        end.to_h
+        @total_partitions = @topics_with_partitions.map(&:last).sum(&:count)
+        @stopped_partitions = 0
+        @settings = settings
+        @yield_nil = yield_nil
+      end
+      # Iterates over requested topic partitions and yields the results with the iterator itself
+      # Iterator instance is yielded because one can run `stop_partition` to stop iterating over
+      # part of data. It is useful for scenarios where we are looking for some information in all
+      # the partitions but once we found it, given partition data is no longer needed and would
+      # only eat up resources.
+      def each
+        Admin.with_consumer(@settings) do |consumer|
+          tpl = tpl_with_expanded_offsets(consumer)
+          consumer.assign(tpl)
+          # We need this for self-referenced APIs like pausing
+          @current_consumer = consumer
+          # Stream data until we reach the end of all the partitions or until the end user
+          # indicates that they are done
+          until done?
+            message = poll(200)
+            # Skip nils if not explicitly required
+            next if message.nil? && !@yield_nil
+            if message
+              @current_message = build_message(message)
+              yield(@current_message, self)
+            else
+              yield(nil, self)
+            end
+          end
+          @current_message = nil
+          @current_consumer = nil
+        end
+        # Reset so we can use the same iterator again if needed
+        @stopped_partitions = 0
+      end
+      # Stops the partition we're currently yielded into
+      def stop_current_partition
+        stop_partition(
+          @current_message.topic,
+          @current_message.partition
+        )
+      end
+      # Stops processing of a given partition
+      # We expect the partition to be provided because of a scenario, where there is a
+      # multi-partition iteration and we want to stop a different partition that the one that
+      # is currently yielded.
+      #
+      # We pause it forever and no longer work with it.
+      #
+      # @param name [String] topic name of which partition we want to stop
+      # @param partition [Integer] partition we want to stop processing
+      def stop_partition(name, partition)
+        @stopped_partitions += 1
+        @current_consumer.pause(
+          Rdkafka::Consumer::TopicPartitionList.new(
+            name => [Partition.new(partition, 0)]
+          )
+        )
+      end
+      private
+      # Expands topics to which we want to subscribe with partitions information in case this
+      # info is not provided. For our convenience we want to support 5 formats of defining
+      # the subscribed topics:
+      #
+      # - 'topic1' - just a string with one topic name
+      # - ['topic1', 'topic2'] - just the names
+      # - { 'topic1' => -100 } - names with negative lookup offset
+      # - { 'topic1' => { 0 => 5 } } - names with exact partitions offsets
+      # - { 'topic1' => { 0 => -5 }, 'topic2' => { 1 => 5 } } - with per partition negative offsets
+      #
+      # @param topics [Array, Hash] topics definitions
+      # @return [Hash] hash with topics containing partitions definitions
+      def expand_topics_with_partitions(topics)
+        # Simplification for the single topic case
+        topics = [topics] if topics.is_a?(String)
+        # If we've got just array with topics, we need to convert that into a representation
+        # that we can expand with offsets
+        topics = topics.map { |name| [name, false] }.to_h if topics.is_a?(Array)
+        expanded = Hash.new { |h, k| h[k] = {} }
+        topics.map do |topic, details|
+          if details.is_a?(Hash)
+            details.each do |partition, offset|
+              expanded[topic][partition] = offset
+            end
+          else
+            partition_count(topic.to_s).times do |partition|
+              # If no offsets are provided, we just start from zero
+              expanded[topic][partition] = details || 0
+            end
+          end
+        end
+        expanded
+      end
+      # @param timeout [Integer] timeout in ms
+      # @return [Rdkafka::Consumer::Message, nil] message or nil if nothing to do
+      def poll(timeout)
+        @current_consumer.poll(timeout)
+      rescue Rdkafka::RdkafkaError => e
+        # End of partition
+        if e.code == :partition_eof
+          @stopped_partitions += 1
+          retry
+        end
+        raise e
+      end
+      # Converts raw rdkafka message into Karafka message
+      #
+      # @param message [Rdkafka::Consumer::Message] raw rdkafka message
+      # @return [::Karafka::Messages::Message]
+      def build_message(message)
+        Messages::Builders::Message.call(
+          message,
+          @routing_topics.fetch(message.topic),
+          Time.now
+        )
+      end
+      # Do we have all the data we wanted or did every topic partition has reached eof.
+      # @return [Boolean]
+      def done?
+        @stopped_partitions >= @total_partitions
+      end
+      # Builds the tpl representing all the subscriptions we want to run
+      #
+      # Additionally for negative offsets, does the watermark calculation where to start
+      #
+      # @param consumer [Rdkafka::Consumer] consumer we need in case of negative offsets as
+      #   negative are going to be used to do "give me last X". We use the already initialized
+      #   consumer instance, not to start another one again.
+      # @return [Rdkafka::Consumer::TopicPartitionList]
+      def tpl_with_expanded_offsets(consumer)
+        tpl = Rdkafka::Consumer::TopicPartitionList.new
+        @topics_with_partitions.each do |name, partitions|
+          partitions_with_offsets = {}
+          # When no offsets defined, we just start from zero
+          if partitions.is_a?(Array) || partitions.is_a?(Range)
+            partitions_with_offsets = partitions.map { |partition| [partition, 0] }.to_h
+          else
+            # When offsets defined, we can either use them if positive or expand and move back
+            # in case of negative (-1000 means last 1000 messages, etc)
+            partitions.each do |partition, offset|
+              if offset.negative?
+                _, high_watermark_offset = consumer.query_watermark_offsets(name, partition)
+                # We add because this offset is negative
+                partitions_with_offsets[partition] = high_watermark_offset + offset
+              else
+                partitions_with_offsets[partition] = offset
+              end
+            end
+          end
+          tpl.add_topic_and_partitions_with_offsets(name, partitions_with_offsets)
+        end
+        tpl
+      end
+      # @param name [String] topic name
+      # @return [Integer] number of partitions of the topic we want to iterate over
+      def partition_count(name)
+        Admin
+          .cluster_info
+          .topics
+          .find { |topic| topic.fetch(:topic_name) == name }
+          .fetch(:partitions)
+          .count
+      end
+    end
+  end
+end

data/lib/karafka/pro/processing/strategies/dlq/vp.rb CHANGED Viewed

@@ -21,7 +21,8 @@ module Karafka
           #
           # In general because we collapse processing in virtual partitions to one on errors, there
           # is no special action that needs to be taken because we warranty that even with VPs
-          # on errors a retry collapses into a single state.
+          # on errors a retry collapses into a single state and from this single state we can
+          # mark as consumed the message that we are moving to the DLQ.
           module Vp
             # Features for this strategy
             FEATURES = %i[

data/lib/karafka/processing/executor.rb CHANGED Viewed

@@ -44,10 +44,6 @@ module Karafka
       #
       # @param messages [Array<Karafka::Messages::Message>]
       def before_enqueue(messages)
-        # the moment we've received the batch or actually the moment we've enqueued it,
-        # but good enough
-        @enqueued_at = Time.now
         # Recreate consumer with each batch if persistence is not enabled
         # We reload the consumers with each batch instead of relying on some external signals
         # when needed for consistency. That way devs may have it on or off and not in this
@@ -57,8 +53,11 @@ module Karafka
         # First we build messages batch...
         consumer.messages = Messages::Builders::Messages.call(
           messages,
-          coordinator.topic,
-          @enqueued_at
+          topic,
+          partition,
+          # the moment we've received the batch or actually the moment we've enqueued it,
+          # but good enough
+          Time.now
         )
         consumer.on_before_enqueue
@@ -84,6 +83,16 @@ module Karafka
       # This may include house-keeping or other state management changes that can occur but that
       # not mean there are any new messages available for the end user to process
       def idle
+        # Initializes the messages set in case idle operation would happen before any processing
+        # This prevents us from having no messages object at all as the messages object and
+        # its metadata may be used for statistics
+        consumer.messages ||= Messages::Builders::Messages.call(
+          [],
+          topic,
+          partition,
+          Time.now
+        )
         consumer.on_idle
       end

data/lib/karafka/routing/router.rb CHANGED Viewed

@@ -32,8 +32,23 @@ module Karafka
         nil
       end
+      # Finds the topic by name (in any consumer group) and if not present, will built a new
+      # representation of the topic with the defaults and default deserializer.
+      #
+      # This is used in places where we may operate on topics that are not part of the routing
+      # but we want to do something on them (display data, iterate over, etc)
+      # @param name [String] name of the topic we are looking for
+      # @return [Karafka::Routing::Topic]
+      #
+      # @note Please note, that in case of a new topic, it will have a newly built consumer group
+      #   as well, that is not part of the routing.
+      def find_or_initialize_by_name(name)
+        find_by(name: name) || Topic.new(name, ConsumerGroup.new(name))
+      end
       module_function :find
       module_function :find_by
+      module_function :find_or_initialize_by_name
     end
   end
 end

data/lib/karafka/setup/config.rb CHANGED Viewed

@@ -18,7 +18,13 @@ module Karafka
       KAFKA_DEFAULTS = {
         # We emit the statistics by default, so all the instrumentation and web-ui work out of
         # the box, without requiring users to take any extra actions aside from enabling.
-        'statistics.interval.ms': 5_000
+        'statistics.interval.ms': 5_000,
+        'client.software.name': 'karafka',
+        'client.software.version': [
+          "v#{Karafka::VERSION}",
+          "rdkafka-ruby-v#{Rdkafka::VERSION}",
+          "librdkafka-v#{Rdkafka::LIBRDKAFKA_VERSION}"
+        ].join('-')
       }.freeze
       # Contains settings that should not be used in production but make life easier in dev

data/lib/karafka/version.rb CHANGED Viewed

@@ -3,5 +3,5 @@
 # Main module namespace
 module Karafka
   # Current Karafka version
-  VERSION = '2.0.39'
+  VERSION = '2.0.41'
 end

data.tar.gz.sig CHANGED Viewed

Binary file

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: karafka
 version: !ruby/object:Gem::Version
-  version: 2.0.39
+  version: 2.0.41
 platform: ruby
 authors:
 - Maciej Mensfeld
@@ -35,7 +35,7 @@ cert_chain:
   Qf04B9ceLUaC4fPVEz10FyobjaFoY4i32xRto3XnrzeAgfEe4swLq8bQsR3w/EF3
   MGU0FeSV2Yj7Xc2x/7BzLK8xQn5l7Yy75iPF+KP3vVmDHnNl
   -----END CERTIFICATE-----
-date: 2023-04-11 00:00:00.000000000 Z
+date: 2023-04-19 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: karafka-core
@@ -224,6 +224,7 @@ files:
 - lib/karafka/pro/encryption/messages/middleware.rb
 - lib/karafka/pro/encryption/messages/parser.rb
 - lib/karafka/pro/encryption/setup/config.rb
+- lib/karafka/pro/iterator.rb
 - lib/karafka/pro/loader.rb
 - lib/karafka/pro/performance_tracker.rb
 - lib/karafka/pro/processing/collapser.rb

metadata.gz.sig CHANGED Viewed

Binary file