RubyGems - karafka - Versions diffs - 2.1.6 → 2.1.8 - Mend

karafka 2.1.6 → 2.1.8

Files changed (19) hide show

checksums.yaml +4 -4
checksums.yaml.gz.sig +0 -0
data/CHANGELOG.md +16 -1
data/Gemfile.lock +12 -12
data/lib/karafka/admin.rb +2 -7
data/lib/karafka/base_consumer.rb +10 -0
data/lib/karafka/connection/client.rb +23 -1
data/lib/karafka/connection/rebalance_manager.rb +11 -0
data/lib/karafka/instrumentation/logger_listener.rb +3 -0
data/lib/karafka/messages/messages.rb +4 -2
data/lib/karafka/pro/iterator/tpl_builder.rb +18 -3
data/lib/karafka/pro/iterator.rb +7 -5
data/lib/karafka/processing/executor.rb +1 -1
data/lib/karafka/routing/topic.rb +2 -1
data/lib/karafka/setup/attributes_map.rb +2 -0
data/lib/karafka/version.rb +1 -1
data.tar.gz.sig +0 -0
metadata +2 -2
metadata.gz.sig +0 -0

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: a6994a6d579728a877f84c87086d093aae8a1f830b891fcb4904883085432fe4
-  data.tar.gz: 13b21009a471194a72971ca81ddc718e044bb96587db0e8f186974f554e9ec62
+  metadata.gz: 042f365fb134a24ae360d678590ce798014751c8a23fb267001c920a42aa5324
+  data.tar.gz: ba2950de557a5f6c775577ce392d60ee839184f50b7d9225969c684625c9ecd0
 SHA512:
-  metadata.gz: e4711880bde1d2cd1cb34959f740459979b74ff4d28a671a232f88adbe7473cf67e366fc2b492fac761c572f3a6dfc147a59d46fc08e1c5e18df8ac5f108afdd
-  data.tar.gz: c094600c2bd421ce309c0125d60ea82ed0106d5ce4566b3bb8c1aab13c553e7bd2f6651b98029e42ac831b132563b2c502dd1c76defbf8307cd9bd2393b258f7
+  metadata.gz: 30b5fcd92c348c50482cb84542380ad28b317d47e01efad2d2049cd3ba7872c5f66a7a4fde93d8bfa262d7fcb3745dedbdb89a4fd5e6cdf2288a43606ea2361d
+  data.tar.gz: d9c8ba95b2b71f46a3d35e2f5be634473a7aa9f45d9b5924e1019dcb53c7cea6aca8c594f9f4d3bf85a14176c3cb98a7167b4eb6e1f6f2192059d8040440e4a9

checksums.yaml.gz.sig CHANGED Viewed

Binary file

data/CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,20 @@
 # Karafka framework changelog
+## 2.1.8 (2023-07-29)
+- [Improvement] Introduce `Karafka::BaseConsumer#used?` method to indicate, that at least one invocation of `#consume` took or will take place. This can be used as a replacement to the non-direct `messages.count` check for shutdown and revocation to ensure, that the consumption took place or is taking place (in case of running LRJ).
+- [Improvement] Make `messages#to_a` return copy of the underlying array to prevent scenarios, where the mutation impacts offset management.
+- [Improvement] Mitigate a librdkafka `cooperative-sticky` rebalance crash issue.
+- [Improvement] Provide ability to overwrite `consumer_persistence` per subscribed topic. This is mostly useful for plugins and extensions developers.
+- [Fix] Fix a case where the performance tracker would crash in case of mutation of messages to an empty state.
+## 2.1.7 (2023-07-22)
+- [Improvement] Always query for watermarks in the Iterator to improve the initial response time.
+- [Improvement] Add `max_wait_time` option to the Iterator.
+- [Fix] Fix a case where `Admin#read_topic` would wait for poll interval on non-existing messages instead of early exit.
+- [Fix] Fix a case where Iterator with per partition offsets with negative lookups would go below the number of available messages.
+- [Fix] Remove unused constant from Admin module.
+- [Fix] Add missing `connection.client.rebalance_callback.error` to the `LoggerListener` instrumentation hook.
 ## 2.1.6 (2023-06-29)
 - [Improvement] Provide time support for iterator
 - [Improvement] Provide time support for admin `#read_topic`
@@ -63,7 +78,7 @@
 2. Replace `Karafka::Pro::BaseConsumer` references to `Karafka::BaseConsumer`.
 3. Replace `Karafka::Instrumentation::Vendors::Datadog:Listener` with `Karafka::Instrumentation::Vendors::Datadog::MetricsListener`.
-## 2.0.41 (2023-14-19)
+## 2.0.41 (2023-04-19)
 - **[Feature]** Provide `Karafka::Pro::Iterator` for anonymous topic/partitions iterations and messages lookups (#1389 and #1427).
 - [Improvement] Optimize topic lookup for `read_topic` admin method usage.
 - [Improvement] Report via `LoggerListener` information about the partition on which a given job has started and finished.

data/Gemfile.lock CHANGED Viewed

@@ -1,7 +1,7 @@
 PATH
   remote: .
   specs:
-    karafka (2.1.6)
+    karafka (2.1.8)
       karafka-core (>= 2.1.1, < 2.2.0)
       thor (>= 0.20)
       waterdrop (>= 2.6.2, < 3.0.0)
@@ -10,10 +10,10 @@ PATH
 GEM
   remote: https://rubygems.org/
   specs:
-    activejob (7.0.5)
-      activesupport (= 7.0.5)
+    activejob (7.0.6)
+      activesupport (= 7.0.6)
       globalid (>= 0.3.6)
-    activesupport (7.0.5)
+    activesupport (7.0.6)
       concurrent-ruby (~> 1.0, >= 1.0.2)
       i18n (>= 1.6, < 2)
       minitest (>= 5.1)
@@ -33,21 +33,21 @@ GEM
     karafka-core (2.1.1)
       concurrent-ruby (>= 1.1)
       karafka-rdkafka (>= 0.13.1, < 0.14.0)
-    karafka-rdkafka (0.13.1)
+    karafka-rdkafka (0.13.3)
       ffi (~> 1.15)
       mini_portile2 (~> 2.6)
       rake (> 12)
-    karafka-web (0.6.1)
+    karafka-web (0.6.3)
       erubi (~> 1.4)
       karafka (>= 2.1.4, < 3.0.0)
       karafka-core (>= 2.0.13, < 3.0.0)
       roda (~> 3.68, >= 3.68)
       tilt (~> 2.0)
-    mini_portile2 (2.8.2)
+    mini_portile2 (2.8.4)
     minitest (5.18.1)
     rack (3.0.8)
     rake (13.0.6)
-    roda (3.69.0)
+    roda (3.70.0)
       rack
     rspec (3.12.0)
       rspec-core (~> 3.12.0)
@@ -58,10 +58,10 @@ GEM
     rspec-expectations (3.12.3)
       diff-lcs (>= 1.2.0, < 2.0)
       rspec-support (~> 3.12.0)
-    rspec-mocks (3.12.5)
+    rspec-mocks (3.12.6)
       diff-lcs (>= 1.2.0, < 2.0)
       rspec-support (~> 3.12.0)
-    rspec-support (3.12.0)
+    rspec-support (3.12.1)
     simplecov (0.22.0)
       docile (~> 1.1)
       simplecov-html (~> 0.11)
@@ -72,8 +72,8 @@ GEM
     tilt (2.2.0)
     tzinfo (2.0.6)
       concurrent-ruby (~> 1.0)
-    waterdrop (2.6.2)
-      karafka-core (>= 2.1.0, < 3.0.0)
+    waterdrop (2.6.5)
+      karafka-core (>= 2.1.1, < 3.0.0)
       zeitwerk (~> 2.3)
     zeitwerk (2.6.8)

data/lib/karafka/admin.rb CHANGED Viewed

@@ -9,11 +9,6 @@ module Karafka
   # @note It always uses the primary defined cluster and does not support multi-cluster work.
   #   If you need this, just replace the cluster info for the time you use this
   module Admin
-    # A fake admin topic representation that we use for messages fetched using this API
-    # We cannot use the topics directly because we may want to request data from topics that we
-    # do not have in the routing
-    Topic = Struct.new(:name, :deserializer)
     # We wait only for this amount of time before raising error as we intercept this error and
     # retry after checking that the operation was finished or failed using external factor.
     MAX_WAIT_TIMEOUT = 1
@@ -37,7 +32,7 @@ module Karafka
       'enable.auto.commit': false
     }.freeze
-    private_constant :Topic, :CONFIG_DEFAULTS, :MAX_WAIT_TIMEOUT, :TPL_REQUEST_TIMEOUT,
+    private_constant :CONFIG_DEFAULTS, :MAX_WAIT_TIMEOUT, :TPL_REQUEST_TIMEOUT,
                      :MAX_ATTEMPTS
     class << self
@@ -71,7 +66,7 @@ module Karafka
           requested_range = (start_offset..start_offset + (count - 1))
           # Establish theoretical available range. Note, that this does not handle cases related to
           # log retention or compaction
-          available_range = (low_offset..high_offset)
+          available_range = (low_offset..(high_offset - 1))
           # Select only offset that we can select. This will remove all the potential offsets that
           # are below the low watermark offset
           possible_range = requested_range.select { |offset| available_range.include?(offset) }

data/lib/karafka/base_consumer.rb CHANGED Viewed

@@ -25,6 +25,7 @@ module Karafka
     # Creates new consumer and assigns it an id
     def initialize
       @id = SecureRandom.hex(6)
+      @used = false
     end
     # Can be used to run preparation code prior to the job being enqueued
@@ -34,6 +35,7 @@ module Karafka
     #   not as a part of the public api. This should not perform any extensive operations as it is
     #   blocking and running in the listener thread.
     def on_before_enqueue
+      @used = true
       handle_before_enqueue
     rescue StandardError => e
       Karafka.monitor.instrument(
@@ -160,6 +162,14 @@ module Karafka
     # some teardown procedures (closing file handler, etc).
     def shutdown; end
+    # @return [Boolean] was this consumer in active use. Active use means running `#consume` at
+    #   least once. Consumer may have to run `#revoked` or `#shutdown` despite not running
+    #   `#consume` previously in delayed job cases and other cases that potentially involve running
+    #   the `Jobs::Idle` for house-keeping
+    def used?
+      @used
+    end
     # Pauses processing on a given offset for the current topic partition
     #
     # After given partition is resumed, it will continue processing from the given offset

data/lib/karafka/connection/client.rb CHANGED Viewed

@@ -23,11 +23,17 @@ module Karafka
       # Max time for a TPL request. We increase it to compensate for remote clusters latency
       TPL_REQUEST_TIMEOUT = 2_000
+      # 1 minute of max wait for the first rebalance before a forceful attempt
+      # This applies only to a case when a short-lived Karafka instance with a client would be
+      # closed before first rebalance. Mitigates a librdkafka bug.
+      COOPERATIVE_STICKY_MAX_WAIT = 60_000
       # We want to make sure we never close several clients in the same moment to prevent
       # potential race conditions and other issues
       SHUTDOWN_MUTEX = Mutex.new
-      private_constant :MAX_POLL_RETRIES, :SHUTDOWN_MUTEX, :TPL_REQUEST_TIMEOUT
+      private_constant :MAX_POLL_RETRIES, :SHUTDOWN_MUTEX, :TPL_REQUEST_TIMEOUT,
+                       :COOPERATIVE_STICKY_MAX_WAIT
       # Creates a new consumer instance.
       #
@@ -226,6 +232,22 @@ module Karafka
       #   as until all the consumers are stopped, the server will keep running serving only
       #   part of the messages
       def stop
+        # This ensures, that we do not stop the underlying client until it passes the first
+        # rebalance for cooperative-sticky. Otherwise librdkafka may crash
+        #
+        # We set a timeout just in case the rebalance would never happen or would last for an
+        # extensive time period.
+        #
+        # @see https://github.com/confluentinc/librdkafka/issues/4312
+        if @subscription_group.kafka[:'partition.assignment.strategy'] == 'cooperative-sticky'
+          (COOPERATIVE_STICKY_MAX_WAIT / 100).times do
+            # If we're past the first rebalance, no need to wait
+            break if @rebalance_manager.active?
+            sleep(0.1)
+          end
+        end
         close
       end

data/lib/karafka/connection/rebalance_manager.rb CHANGED Viewed

@@ -30,6 +30,7 @@ module Karafka
         @assigned_partitions = {}
         @revoked_partitions = {}
         @changed = false
+        @active = false
       end
       # Resets the rebalance manager state
@@ -46,11 +47,20 @@ module Karafka
         @changed
       end
+      # @return [Boolean] true if there was at least one rebalance
+      # @note This method is needed to make sure that when using cooperative-sticky, we do not
+      #   close until first rebalance. Otherwise librdkafka may crash.
+      # @see https://github.com/confluentinc/librdkafka/issues/4312
+      def active?
+        @active
+      end
       # Callback that kicks in inside of rdkafka, when new partitions are assigned.
       #
       # @private
       # @param partitions [Rdkafka::Consumer::TopicPartitionList]
       def on_partitions_assigned(partitions)
+        @active = true
         @assigned_partitions = partitions.to_h.transform_values { |part| part.map(&:partition) }
         @changed = true
       end
@@ -60,6 +70,7 @@ module Karafka
       # @private
       # @param partitions [Rdkafka::Consumer::TopicPartitionList]
       def on_partitions_revoked(partitions)
+        @active = true
         @revoked_partitions = partitions.to_h.transform_values { |part| part.map(&:partition) }
         @changed = true
       end

data/lib/karafka/instrumentation/logger_listener.rb CHANGED Viewed

@@ -277,6 +277,9 @@ module Karafka
         when 'connection.client.poll.error'
           error "Data polling error occurred: #{error}"
           error details
+        when 'connection.client.rebalance_callback.error'
+          error "Rebalance callback error occurred: #{error}"
+          error details
         else
           # This should never happen. Please contact the maintainers
           raise Errors::UnsupportedCaseError, event

data/lib/karafka/messages/messages.rb CHANGED Viewed

@@ -60,10 +60,12 @@ module Karafka
         @messages_array.size
       end
-      # @return [Array<Karafka::Messages::Message>] pure array with messages
+      # @return [Array<Karafka::Messages::Message>] copy of the pure array with messages
       def to_a
-        @messages_array
+        @messages_array.dup
       end
+      alias count size
     end
   end
 end

data/lib/karafka/pro/iterator/tpl_builder.rb CHANGED Viewed

@@ -93,12 +93,27 @@ module Karafka
             next unless partitions.is_a?(Hash)
             partitions.each do |partition, offset|
+              # Care only about numerical offsets
+              #
+              # For time based we already resolve them via librdkafka lookup API
+              next unless offset.is_a?(Integer)
+              low_offset, high_offset = @consumer.query_watermark_offsets(name, partition)
               # Care only about negative offsets (last n messages)
-              next unless offset.is_a?(Integer) && offset.negative?
+              #
+              # We reject the above results but we **NEED** to run the `#query_watermark_offsets`
+              # for each topic partition nonetheless. Without this, librdkafka fetches a lot more
+              # metadata about each topic and each partition and this takes much more time than
+              # just getting watermarks. If we do not run watermark, at least an extra second
+              # is added at the beginning of iterator flow
+              #
+              # This may not be significant when this runs in the background but in case of
+              # using iterator in thins like Puma, it heavily impacts the end user experience
+              next unless offset.negative?
-              _, high_watermark_offset = @consumer.query_watermark_offsets(name, partition)
               # We add because this offset is negative
-              @mapped_topics[name][partition] = high_watermark_offset + offset
+              @mapped_topics[name][partition] = [high_offset + offset, low_offset].max
             end
           end
         end

data/lib/karafka/pro/iterator.rb CHANGED Viewed

@@ -39,6 +39,7 @@ module Karafka
       #   overwritten, you may want to include `auto.offset.reset` to match your case.
       # @param yield_nil [Boolean] should we yield also `nil` values when poll returns nothing.
       #   Useful in particular for long-living iterators.
+      # @param max_wait_time [Integer] max wait in ms when iterator did not receive any messages
       #
       # @note It is worth keeping in mind, that this API also needs to operate within
       #   `max.poll.interval.ms` limitations on each iteration
@@ -48,7 +49,8 @@ module Karafka
       def initialize(
         topics,
         settings: { 'auto.offset.reset': 'beginning' },
-        yield_nil: false
+        yield_nil: false,
+        max_wait_time: 200
       )
         @topics_with_partitions = Expander.new.call(topics)
@@ -62,6 +64,7 @@ module Karafka
         @settings = settings
         @yield_nil = yield_nil
+        @max_wait_time = max_wait_time
       end
       # Iterates over requested topic partitions and yields the results with the iterator itself
@@ -80,7 +83,7 @@ module Karafka
           # Stream data until we reach the end of all the partitions or until the end user
           # indicates that they are done
           until done?
-            message = poll(200)
+            message = poll
             # Skip nils if not explicitly required
             next if message.nil? && !@yield_nil
@@ -131,10 +134,9 @@ module Karafka
       private
-      # @param timeout [Integer] timeout in ms
       # @return [Rdkafka::Consumer::Message, nil] message or nil if nothing to do
-      def poll(timeout)
-        @current_consumer.poll(timeout)
+      def poll
+        @current_consumer.poll(@max_wait_time)
       rescue Rdkafka::RdkafkaError => e
         # End of partition
         if e.code == :partition_eof

data/lib/karafka/processing/executor.rb CHANGED Viewed

@@ -48,7 +48,7 @@ module Karafka
         # We reload the consumers with each batch instead of relying on some external signals
         # when needed for consistency. That way devs may have it on or off and not in this
         # middle state, where re-creation of a consumer instance would occur only sometimes
-        @consumer = nil unless ::Karafka::App.config.consumer_persistence
+        @consumer = nil unless topic.consumer_persistence
         # First we build messages batch...
         consumer.messages = Messages::Builders::Messages.call(

data/lib/karafka/routing/topic.rb CHANGED Viewed

@@ -17,6 +17,7 @@ module Karafka
         max_messages
         max_wait_time
         initial_offset
+        consumer_persistence
       ].freeze
       private_constant :INHERITABLE_ATTRIBUTES
@@ -50,7 +51,7 @@ module Karafka
       # @return [Class] consumer class that we should use
       def consumer
-        if Karafka::App.config.consumer_persistence
+        if consumer_persistence
           # When persistence of consumers is on, no need to reload them
           @consumer
         else

data/lib/karafka/setup/attributes_map.rb CHANGED Viewed

@@ -25,6 +25,7 @@ module Karafka
         broker.version.fallback
         builtin.features
         check.crcs
+        client.dns.lookup
         client.id
         client.rack
         closesocket_cb
@@ -161,6 +162,7 @@ module Karafka
         broker.address.ttl
         broker.version.fallback
         builtin.features
+        client.dns.lookup
         client.id
         client.rack
         closesocket_cb

data/lib/karafka/version.rb CHANGED Viewed

@@ -3,5 +3,5 @@
 # Main module namespace
 module Karafka
   # Current Karafka version
-  VERSION = '2.1.6'
+  VERSION = '2.1.8'
 end

data.tar.gz.sig CHANGED Viewed

Binary file

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: karafka
 version: !ruby/object:Gem::Version
-  version: 2.1.6
+  version: 2.1.8
 platform: ruby
 authors:
 - Maciej Mensfeld
@@ -35,7 +35,7 @@ cert_chain:
   Qf04B9ceLUaC4fPVEz10FyobjaFoY4i32xRto3XnrzeAgfEe4swLq8bQsR3w/EF3
   MGU0FeSV2Yj7Xc2x/7BzLK8xQn5l7Yy75iPF+KP3vVmDHnNl
   -----END CERTIFICATE-----
-date: 2023-06-29 00:00:00.000000000 Z
+date: 2023-07-29 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: karafka-core

metadata.gz.sig CHANGED Viewed

Binary file