RubyGems - karafka - Versions diffs - 2.0.8 → 2.0.9 - Mend

karafka 2.0.8 → 2.0.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

checksums.yaml +4 -4
checksums.yaml.gz.sig +0 -0
data/CHANGELOG.md +12 -0
data/Gemfile.lock +9 -8
data/bin/integrations +38 -13
data/config/errors.yml +1 -0
data/lib/karafka/active_job/routing/extensions.rb +2 -0
data/lib/karafka/base_consumer.rb +3 -3
data/lib/karafka/connection/client.rb +24 -14
data/lib/karafka/pro/base_consumer.rb +15 -19
data/lib/karafka/pro/contracts/consumer_group_topic.rb +16 -0
data/lib/karafka/pro/processing/coordinator.rb +3 -4
data/lib/karafka/processing/coordinator.rb +17 -2
data/lib/karafka/routing/builder.rb +3 -2
data/lib/karafka/routing/topic.rb +4 -2
data/lib/karafka/version.rb +1 -1
data/lib/karafka.rb +1 -0
data.tar.gz.sig +0 -0
metadata +2 -2
metadata.gz.sig +0 -0

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 1a646d63d674e6ecb80625097426f48294fe8e8b3f3e9e020cf92645a5728251
-  data.tar.gz: 461c58c355f84b81c3396cb4164a426c3f1e87747b3101de1e7c6fbf2a876778
+  metadata.gz: ee9c609249fea5e996d9506bd3e04435dbcc189addcd6383a092b080e776f525
+  data.tar.gz: 6ed354b21361966f1988bb90b29cea14d842fb02299f12bd30ab4ec57eaf8cc1
 SHA512:
-  metadata.gz: 623832478b11b1fa61c7906f423417500838e30ef97256a59290d73dcfe17670cf7eb11adcf33d422ae590c6ce5f19215340c89d369f8ec5b4e609af07d0befb
-  data.tar.gz: 97e50ed131c939d09de884d3ddba826381e7d314d3bc4660ecf947fa6582d9e6f16e023dad121612eadffcd441a73874c96e287e5ec1885e008d843357e94b93
+  metadata.gz: a20a1bf2d2b86fcd63bf2e036d535c1d7aa4d06943cc00a414851e8f0a0054054eff621b67bfd355456ac93f1561931aec598b43c0ca535ceaa57c9d94957378
+  data.tar.gz: '05996101b929a143926508a0afb69e6e6b09de04fd088dd06c83cba122efc9cfecdd7586892c80ab5409964737dd4a74b0ecea0f8a30ff1dfbf032f6bb289288'

checksums.yaml.gz.sig CHANGED Viewed

Binary file

data/CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,17 @@
 # Karafka framework changelog
+## 2.0.9 (2022-09-22)
+- Fix Singleton not visible when used in PORO (#1034)
+- Divide pristine specs into pristine and poro. Pristine will still have helpers loaded, poro will have nothing.
+- Fix a case where `manual_offset_management` offset upon error is not reverted to the first message in a case where there were no markings as consumed at all for multiple batches.
+- Implement small reliability improvements around marking as consumed.
+- Introduce a config sanity check to make sure Virtual Partitions are not used with manual offset management.
+- Fix a possibility of using `active_job_topic` with Virtual Partitions and manual offset management (ActiveJob still can use due to atomicity of jobs).
+- Move seek offset ownership to the coordinator to allow Virtual Partitions further development.
+- Improve client shutdown in specs.
+- Do not reset client on network issue and rely on `librdkafka` to do so.
+- Allow for nameless (anonymous) subscription groups (#1033)
 ## 2.0.8 (2022-09-19)
 - [Breaking change] Rename Virtual Partitions `concurrency` to `max_partitions` to avoid confusion  (#1023).
 -  Allow for block based subscription groups management (#1030).

data/Gemfile.lock CHANGED Viewed

@@ -1,7 +1,7 @@
 PATH
   remote: .
   specs:
-    karafka (2.0.8)
+    karafka (2.0.9)
       karafka-core (>= 2.0.2, < 3.0.0)
       rdkafka (>= 0.12)
       thor (>= 0.20)
@@ -11,10 +11,10 @@ PATH
 GEM
   remote: https://rubygems.org/
   specs:
-    activejob (7.0.3.1)
-      activesupport (= 7.0.3.1)
+    activejob (7.0.4)
+      activesupport (= 7.0.4)
       globalid (>= 0.3.6)
-    activesupport (7.0.3.1)
+    activesupport (7.0.4)
       concurrent-ruby (~> 1.0, >= 1.0.2)
       i18n (>= 1.6, < 2)
       minitest (>= 5.1)
@@ -33,7 +33,7 @@ GEM
     karafka-core (2.0.2)
       concurrent-ruby (>= 1.1)
     mini_portile2 (2.8.0)
-    minitest (5.16.2)
+    minitest (5.16.3)
     rake (13.0.6)
     rdkafka (0.12.0)
       ffi (~> 1.15)
@@ -45,13 +45,13 @@ GEM
       rspec-mocks (~> 3.11.0)
     rspec-core (3.11.0)
       rspec-support (~> 3.11.0)
-    rspec-expectations (3.11.0)
+    rspec-expectations (3.11.1)
       diff-lcs (>= 1.2.0, < 2.0)
       rspec-support (~> 3.11.0)
     rspec-mocks (3.11.1)
       diff-lcs (>= 1.2.0, < 2.0)
       rspec-support (~> 3.11.0)
-    rspec-support (3.11.0)
+    rspec-support (3.11.1)
     simplecov (0.21.2)
       docile (~> 1.1)
       simplecov-html (~> 0.11)
@@ -68,6 +68,7 @@ GEM
     zeitwerk (2.6.0)
 PLATFORMS
+  arm64-darwin
   x86_64-linux
 DEPENDENCIES
@@ -79,4 +80,4 @@ DEPENDENCIES
   simplecov
 BUNDLED WITH
-   2.3.15
+   2.3.22

data/bin/integrations CHANGED Viewed

@@ -2,9 +2,14 @@
 # Runner to run integration specs in parallel
-# Part of integration specs run pristine without bundler.
+# Part of integration specs run linear without bundler.
 # If we would run bundle exec when running this code, bundler would inject its own context
 # into them, messing things up heavily
+#
+# Types of specs:
+# - regular - can run in parallel, includes all the helpers
+# - pristine - cannot run in parallel, uses custom bundler but includes helpers
+# - poro - cannot run in parallel, uses custom bundler, does not include any helpers
 raise 'This code needs to be executed WITHOUT bundle exec' if Kernel.const_defined?(:Bundler)
 require 'open3'
@@ -64,13 +69,19 @@ class Scenario
     @path.gsub("#{ROOT_PATH}/spec/integrations/", '')
   end
-  # @return [Boolean] true if spec is pristine
-  def pristine?
+  # @return [Symbol] type of spec
+  def type
     scenario_dir = File.dirname(@path)
-    # If there is a Gemfile in a scenario directory, it means it is a pristine spec and we need
-    # to run bundle install, etc in order to run it
-    File.exist?(File.join(scenario_dir, 'Gemfile'))
+    return :poro if scenario_dir.end_with?('_poro')
+    return :pristine if scenario_dir.end_with?('_pristine')
+    :regular
+  end
+  # @return [Boolean] any spec that is not a regular one should not run in parallel with others
+  def linear?
+    !type == :regular
   end
   # @return [Boolean] did this scenario finished or is it still running
@@ -145,9 +156,23 @@ class Scenario
   # Sets up a proper environment for a given spec to run and returns the run command
   # @return [String] run command
   def init_and_build_cmd
-    # If there is a Gemfile in a scenario directory, it means it is a pristine spec and we need
-    # to run bundle install, etc in order to run it
-    if pristine?
+    case type
+    when :poro
+      scenario_dir = File.dirname(@path)
+      # We copy the spec into a temp dir, not to pollute the spec location with logs, etc
+      temp_dir = Dir.mktmpdir
+      file_name = File.basename(@path)
+      FileUtils.cp_r("#{scenario_dir}/.", temp_dir)
+      <<~CMD
+        cd #{temp_dir} &&
+        KARAFKA_GEM_DIR=#{ROOT_PATH} \
+        BUNDLE_AUTO_INSTALL=true \
+        PRISTINE_MODE=true \
+        bundle exec ruby #{file_name}
+      CMD
+    when :pristine
       scenario_dir = File.dirname(@path)
       # We copy the spec into a temp dir, not to pollute the spec location with logs, etc
       temp_dir = Dir.mktmpdir
@@ -196,8 +221,8 @@ scenarios = specs
             .shuffle(random: Random.new(seed))
             .map { |integration_test| Scenario.new(integration_test) }
-regulars = scenarios.reject(&:pristine?)
-pristine = scenarios.select(&:pristine?)
+regulars = scenarios.reject(&:linear?)
+linears = scenarios - regulars
 active_scenarios = []
 finished_scenarios = []
@@ -206,10 +231,10 @@ while finished_scenarios.size < scenarios.size
   # If we have space to run another scenario, we add it
   if active_scenarios.size < CONCURRENCY
     scenario = nil
-    # We can run only one pristine at the same time due to concurrency issues within bundler
+    # We can run only one linear at the same time due to concurrency issues within bundler
     # Since they usually take longer than others, we try to run them as fast as possible when there
     # is a slot
-    scenario = pristine.pop unless active_scenarios.any?(&:pristine?)
+    scenario = linears.pop unless active_scenarios.any?(&:linear?)
     scenario ||= regulars.pop
     if scenario

data/config/errors.yml CHANGED Viewed

@@ -57,3 +57,4 @@ en:
       consumer_format: needs to inherit from Karafka::Pro::BaseConsumer and not Karafka::Consumer
       virtual_partitions.partitioner_respond_to_call: needs to be defined and needs to respond to `#call`
       virtual_partitions.max_partitions_format: needs to be equl or more than 1
+      manual_offset_management_not_with_virtual_partitions: cannot be used together with Virtual Partitions

data/lib/karafka/active_job/routing/extensions.rb CHANGED Viewed

@@ -19,6 +19,8 @@ module Karafka
             instance_eval(&block)
+            target.tags << :active_job
             # This is handled by our custom ActiveJob consumer
             # Without this, default behaviour would cause messages to skip upon shutdown as the
             # offset would be committed for the last message

data/lib/karafka/base_consumer.rb CHANGED Viewed

@@ -77,7 +77,7 @@ module Karafka
         # with manual offset management
         mark_as_consumed(messages.last)
       else
-        pause(@seek_offset || messages.first.offset)
+        pause(coordinator.seek_offset)
       end
     end
@@ -155,7 +155,7 @@ module Karafka
         return false
       end
-      @seek_offset = message.offset + 1
+      coordinator.seek_offset = message.offset + 1
       true
     end
@@ -172,7 +172,7 @@ module Karafka
         return false
       end
-      @seek_offset = message.offset + 1
+      coordinator.seek_offset = message.offset + 1
       true
     end

data/lib/karafka/connection/client.rb CHANGED Viewed

@@ -153,7 +153,7 @@ module Karafka
         pause_msg = Messages::Seek.new(topic, partition, offset)
-        internal_commit_offsets(async: false)
+        internal_commit_offsets(async: true)
         # Here we do not use our cached tpls because we should not try to pause something we do
         # not own anymore.
@@ -267,8 +267,15 @@ module Karafka
         true
       rescue Rdkafka::RdkafkaError => e
-        return false if e.code == :assignment_lost
-        return true if e.code == :no_offset
+        case e.code
+        when :assignment_lost
+          return false
+        when :no_offset
+          return true
+        when :coordinator_load_in_progress
+          sleep(1)
+          retry
+        end
         raise e
       end
@@ -329,25 +336,27 @@ module Karafka
         #
         # If we would retry here, the client reset would become transparent and we would not have
         # a chance to take any actions
+        early_return = false
         case e.code
         when :max_poll_exceeded # -147
           reset
-          return nil
+          early_return = true
         when :transport # -195
           reset
-          return nil
-        when :rebalance_in_progress # -27
-          reset
-          return nil
+          early_return = true
         when :not_coordinator # 16
           reset
-          return nil
+          early_return = true
         when :network_exception # 13
-          reset
-          return nil
+          early_return = true
+        when :rebalance_in_progress # -27
+          early_return = true
+        when :coordinator_load_in_progress # 14
+          early_return = true
         when :unknown_topic_or_part
           # This is expected and temporary until rdkafka catches up with metadata
-          return nil
+          early_return = true
         end
         raise if time_poll.attempts > MAX_POLL_RETRIES
@@ -356,8 +365,9 @@ module Karafka
         time_poll.checkpoint
         time_poll.backoff
-        # On unknown errors we do our best to retry and handle them before raising
-        retry
+        # On unknown errors we do our best to retry and handle them before raising unless we
+        # decide to early return
+        early_return ? nil : retry
       end
       # Builds a new rdkafka consumer instance based on the subscription group configuration

data/lib/karafka/pro/base_consumer.rb CHANGED Viewed

@@ -32,19 +32,23 @@ module Karafka
       def on_before_enqueue
         return unless topic.long_running_job?
-        # This ensures, that when running LRJ with VP, things operate as expected
-        coordinator.on_enqueued do |first_group_message|
+        # This ensures that when running LRJ with VP, things operate as expected run only once
+        # for all the virtual partitions collectively
+        coordinator.on_enqueued do
           # Pause at the first message in a batch. That way in case of a crash, we will not loose
-          # any messages
-          pause(first_group_message.offset, MAX_PAUSE_TIME)
+          # any messages.
+          #
+          # For VP it applies the same way and since VP cannot be used with MOM we should not have
+          # any edge cases here.
+          pause(coordinator.seek_offset, MAX_PAUSE_TIME)
         end
       end
       # Runs extra logic after consumption that is related to handling long-running jobs
       # @note This overwrites the '#on_after_consume' from the base consumer
       def on_after_consume
-        coordinator.on_finished do |first_group_message, last_group_message|
-          on_after_consume_regular(first_group_message, last_group_message)
+        coordinator.on_finished do |last_group_message|
+          on_after_consume_regular(last_group_message)
         end
       end
@@ -75,35 +79,27 @@ module Karafka
       # Handles the post-consumption flow depending on topic settings
       #
-      # @param first_message [Karafka::Messages::Message]
-      # @param last_message [Karafka::Messages::Message]
-      def on_after_consume_regular(first_message, last_message)
+      # @param last_group_message [Karafka::Messages::Message]
+      def on_after_consume_regular(last_group_message)
         if coordinator.success?
           coordinator.pause_tracker.reset
           # We use the non-blocking one here. If someone needs the blocking one, can implement it
           # with manual offset management
           # Mark as consumed only if manual offset management is not on
-          mark_as_consumed(last_message) unless topic.manual_offset_management? || revoked?
+          mark_as_consumed(last_group_message) unless topic.manual_offset_management? || revoked?
           # If this is not a long-running job there is nothing for us to do here
           return unless topic.long_running_job?
-          # Once processing is done, we move to the new offset based on commits
-          # Here, in case manual offset management is off, we have the new proper offset of a
-          # first message from another batch from `@seek_offset`. If manual offset management
-          # is on, we move to place where the user indicated it was finished. This can create an
-          # interesting (yet valid) corner case, where with manual offset management on and no
-          # marking as consumed, we end up with an infinite loop processing same messages over and
-          # over again
-          seek(@seek_offset || first_message.offset)
+          seek(coordinator.seek_offset)
           resume
         else
           # If processing failed, we need to pause
           # For long running job this will overwrite the default never-ending pause and will cause
           # the processing to keep going after the error backoff
-          pause(@seek_offset || first_message.offset)
+          pause(coordinator.seek_offset)
         end
       end
     end

data/lib/karafka/pro/contracts/consumer_group_topic.rb CHANGED Viewed

@@ -47,6 +47,22 @@ module Karafka
           [[%i[virtual_partitions partitioner], :respond_to_call]]
         end
+        # Make sure that manual offset management is not used together with Virtual Partitions
+        # This would not make any sense as there would be edge cases related to skipping
+        # messages even if there were errors.
+        virtual do |data, errors|
+          next unless errors.empty?
+          virtual_partitions = data[:virtual_partitions]
+          manual_offset_management = data[:manual_offset_management]
+          next unless virtual_partitions[:active]
+          next unless manual_offset_management
+          next if data[:tags].include?(:active_job)
+          [[%i[manual_offset_management], :not_with_virtual_partitions]]
+        end
       end
     end
   end

data/lib/karafka/pro/processing/coordinator.rb CHANGED Viewed

@@ -34,7 +34,6 @@ module Karafka
             @on_enqueued_invoked = false
             @on_started_invoked = false
             @on_finished_invoked = false
-            @first_message = messages.first
             @last_message = messages.last
           end
         end
@@ -52,7 +51,7 @@ module Karafka
             @on_enqueued_invoked = true
-            yield(@first_message, @last_message)
+            yield(@last_message)
           end
         end
@@ -63,7 +62,7 @@ module Karafka
             @on_started_invoked = true
-            yield(@first_message, @last_message)
+            yield(@last_message)
           end
         end
@@ -77,7 +76,7 @@ module Karafka
             @on_finished_invoked = true
-            yield(@first_message, @last_message)
+            yield(@last_message)
           end
         end
       end

data/lib/karafka/processing/coordinator.rb CHANGED Viewed

@@ -13,6 +13,8 @@ module Karafka
       # @return [Karafka::TimeTrackers::Pause]
       attr_reader :pause_tracker
+      attr_reader :seek_offset
       # @param pause_tracker [Karafka::TimeTrackers::Pause] pause tracker for given topic partition
       def initialize(pause_tracker)
         @pause_tracker = pause_tracker
@@ -23,17 +25,30 @@ module Karafka
       end
       # Starts the coordinator for given consumption jobs
-      # @param _messages [Array<Karafka::Messages::Message>] batch of message for which we are
+      # @param messages [Array<Karafka::Messages::Message>] batch of message for which we are
       #   going to coordinate work. Not used with regular coordinator.
-      def start(_messages)
+      def start(messages)
         @mutex.synchronize do
           @running_jobs = 0
           # We need to clear the consumption results hash here, otherwise we could end up storing
           # consumption results of consumer instances we no longer control
           @consumptions.clear
+          # We set it on the first encounter and never again, because then the offset setting
+          # should be up to the consumers logic (our or the end user)
+          # Seek offset needs to be always initialized as for case where manual offset management
+          # is turned on, we need to have reference to the first offset even in case of running
+          # multiple batches without marking any messages as consumed. Rollback needs to happen to
+          # the last place we know of or the last message + 1 that was marked
+          @seek_offset ||= messages.first.offset
         end
       end
+      # @param offset [Integer] message offset
+      def seek_offset=(offset)
+        @mutex.synchronize { @seek_offset = offset }
+      end
       # Increases number of jobs that we handle with this coordinator
       def increment
         @mutex.synchronize { @running_jobs += 1 }

data/lib/karafka/routing/builder.rb CHANGED Viewed

@@ -68,9 +68,10 @@ module Karafka
       # Handles the simple routing case where we create one consumer group and allow for further
       # subscription group customization
-      # @param subscription_group_name [String, Symbol] subscription group id
+      # @param subscription_group_name [String, Symbol] subscription group id. When not provided,
+      #   a random uuid will be used
       # @param block [Proc] further topics definitions
-      def subscription_group(subscription_group_name, &block)
+      def subscription_group(subscription_group_name = SecureRandom.uuid, &block)
         consumer_group('app') do
           target.public_send(:subscription_group=, subscription_group_name, &block)
         end

data/lib/karafka/routing/topic.rb CHANGED Viewed

@@ -6,7 +6,7 @@ module Karafka
     # It belongs to a consumer group as from 0.6 all the topics can work in the same consumer group
     # It is a part of Karafka's DSL.
     class Topic
-      attr_reader :id, :name, :consumer_group
+      attr_reader :id, :name, :consumer_group, :tags
       attr_writer :consumer
       attr_accessor :subscription_group
@@ -32,6 +32,7 @@ module Karafka
         #   Karafka 0.6 we can handle multiple Kafka instances with the same process and we can
         #   have same topic name across multiple consumer groups
         @id = "#{consumer_group.id}_#{@name}"
+        @tags = []
       end
       INHERITABLE_ATTRIBUTES.each do |attribute|
@@ -93,7 +94,8 @@ module Karafka
           name: name,
           consumer: consumer,
           consumer_group_id: consumer_group.id,
-          subscription_group: subscription_group
+          subscription_group: subscription_group,
+          tags: tags
         ).freeze
       end
     end

data/lib/karafka/version.rb CHANGED Viewed

@@ -3,5 +3,5 @@
 # Main module namespace
 module Karafka
   # Current Karafka version
-  VERSION = '2.0.8'
+  VERSION = '2.0.9'
 end

data/lib/karafka.rb CHANGED Viewed

@@ -13,6 +13,7 @@
   openssl
   base64
   date
+  singleton
   zeitwerk
 ].each(&method(:require))

data.tar.gz.sig CHANGED Viewed

Binary file

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: karafka
 version: !ruby/object:Gem::Version
-  version: 2.0.8
+  version: 2.0.9
 platform: ruby
 authors:
 - Maciej Mensfeld
@@ -35,7 +35,7 @@ cert_chain:
   Qf04B9ceLUaC4fPVEz10FyobjaFoY4i32xRto3XnrzeAgfEe4swLq8bQsR3w/EF3
   MGU0FeSV2Yj7Xc2x/7BzLK8xQn5l7Yy75iPF+KP3vVmDHnNl
   -----END CERTIFICATE-----
-date: 2022-09-19 00:00:00.000000000 Z
+date: 2022-09-22 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: karafka-core

metadata.gz.sig CHANGED Viewed

Binary file