RubyGems - karafka - Versions diffs - 2.0.0.beta3 → 2.0.0.beta4 - Mend

karafka 2.0.0.beta3 → 2.0.0.beta4

Files changed (27) hide show

checksums.yaml +4 -4
checksums.yaml.gz.sig +0 -0
data/.github/workflows/ci.yml +9 -23
data/CHANGELOG.md +14 -0
data/Gemfile.lock +5 -5
data/bin/wait_for_kafka +20 -0
data/docker-compose.yml +10 -0
data/karafka.gemspec +1 -1
data/lib/karafka/base_consumer.rb +50 -42
data/lib/karafka/connection/client.rb +28 -5
data/lib/karafka/instrumentation/logger_listener.rb +0 -3
data/lib/karafka/instrumentation/monitor.rb +0 -1
data/lib/karafka/pro/active_job/consumer.rb +4 -3
data/lib/karafka/pro/base_consumer.rb +76 -0
data/lib/karafka/pro/loader.rb +1 -2
data/lib/karafka/pro/processing/jobs/consume_non_blocking.rb +1 -1
data/lib/karafka/processing/executor.rb +22 -5
data/lib/karafka/processing/jobs/base.rb +8 -3
data/lib/karafka/processing/jobs/consume.rb +8 -3
data/lib/karafka/processing/result.rb +34 -0
data/lib/karafka/processing/worker.rb +2 -2
data/lib/karafka/routing/topic.rb +5 -0
data/lib/karafka/version.rb +1 -1
data.tar.gz.sig +0 -0
metadata +7 -5
metadata.gz.sig +0 -0
data/lib/karafka/pro/base_consumer_extensions.rb +0 -66

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 86b352cc7737bde8484567662d546165793496adb6a923e33aa7e4823e4a456f
-  data.tar.gz: 2a180ba7b177db9f4c67de387262d47ac3eb2580860a548992912cd89a049da7
+  metadata.gz: e4e9430d2278617cbed38f5696011603d9c0d8c53813dfc180499dc6e4b97563
+  data.tar.gz: f082a95aa9841912f819dc0598591c4b96d7ef1199eff324e65ca0c601008dae
 SHA512:
-  metadata.gz: 207700c3e1fab4d3370de7eddae72453fc180c65d817f4d7c021929327e5464fcd8a9bb1610c641080f82a60bc038e4517dc7bc86c0aa4c83852dd9467441e75
-  data.tar.gz: ab94239255ff841e0728c6e6585d4145a9c4f42dfd02d8df062975bd28ad3caf2cef50f5488d30a1356ad49e2529f772379bc947ba5804a15f12e56989cf9a7a
+  metadata.gz: 7252c5503234ab4d35fa02d2bb0a18dd8239584fdddc5b451cfdf028a61f37d59a269bac804913d0abf46e2d3273188560e48aa9de40fbb319c766624c1a3b95
+  data.tar.gz: a4cc5d7c18d2a45483ee26acbacf62c9c13f8824697af96a3f2bf5bccb232d5b07097ed49cfb84a9b46e09f31405813d50b1564d6668f0a483023f449427428b

checksums.yaml.gz.sig CHANGED Viewed

Binary file

data/.github/workflows/ci.yml CHANGED Viewed

@@ -21,8 +21,7 @@ jobs:
         uses: ruby/setup-ruby@v1
         with:
           ruby-version: 3.1
-      - name: Install latest bundler
-        run: gem install bundler --no-document
+          bundler-cache: true
       - name: Install Diffend plugin
         run: bundle plugin install diffend
       - name: Bundle Secure
@@ -57,25 +56,19 @@ jobs:
       - name: Install package dependencies
         run: "[ -e $APT_DEPS ] || sudo apt-get install -y --no-install-recommends $APT_DEPS"
-      - name: Run Kafka with docker-compose
+      - name: Start Kafka with docker-compose
         run: |
           docker-compose up -d
-          sleep 10
       - name: Set up Ruby
         uses: ruby/setup-ruby@v1
         with:
           ruby-version: ${{matrix.ruby}}
+          bundler-cache: true
-      - name: Install latest Bundler
+      - name: Ensure all needed Kafka topics are created and wait if not
         run: |
-          gem install bundler --no-document
-          bundle config set without 'tools benchmarks docs'
-      - name: Bundle install
-        run: |
-          bundle config set without development
-          bundle install --jobs 4 --retry 3
+          bin/wait_for_kafka
       - name: Run all specs
         env:
@@ -100,26 +93,19 @@ jobs:
       - name: Install package dependencies
         run: "[ -e $APT_DEPS ] || sudo apt-get install -y --no-install-recommends $APT_DEPS"
-      - name: Run Kafka with docker-compose
+      - name: Start Kafka with docker-compose
         run: |
           docker-compose up -d
-          sleep 5
       - name: Set up Ruby
         uses: ruby/setup-ruby@v1
         with:
           ruby-version: ${{matrix.ruby}}
+          bundler-cache: true
-      - name: Install latest Bundler
-        run: |
-          gem install bundler --no-document
-          gem update --system --no-document
-          bundle config set without 'tools benchmarks docs'
-      - name: Bundle install
+      - name: Ensure all needed Kafka topics are created and wait if not
         run: |
-          bundle config set without development
-          bundle install --jobs 4 --retry 3
+          bin/wait_for_kafka
       - name: Run integration tests
         env:

data/CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,19 @@
 # Karafka framework changelog
+## 2.0.0-beta4 (2022-06-20)
+- Rename job internal api methods from `#prepare` to `#before_call` and from `#teardown` to `#after_call` to abstract away jobs execution from any type of executors and consumers logic
+- Remove ability of running `before_consume` and `after_consume` completely. Those should be for internal usage only.
+- Reorganize how Pro consumer and Pro AJ consumers inherit.
+- Require WaterDrop `2.3.1`.
+- Add more integration specs for rebalancing and max poll exceeded.
+- Move `revoked?` state from PRO to regular Karafka.
+- Use return value of `mark_as_consumed!` and `mark_as_consumed` as indicator of partition ownership + use it to switch the ownership state.
+- Do not remove rebalance manager upon client reset and recovery. This will allow us to keep the notion of lost partitions, so we can run revocation jobs for blocking jobs that exceeded the max poll interval.
+- Run revocation jobs upon reaching max poll interval for blocking jobs.
+- Early exit `poll` operation upon partition lost or max poll exceeded event.
+- Always reset consumer instances on timeout exceeded.
+- Wait for Kafka to create all the needed topics before running specs in CI.
 ## 2.0.0-beta3 (2022-06-14)
 - Jobs building responsibility extracted out of the listener code base.
 - Fix a case where specs supervisor would try to kill no longer running process (#868)

data/Gemfile.lock CHANGED Viewed

@@ -1,13 +1,13 @@
 PATH
   remote: .
   specs:
-    karafka (2.0.0.beta3)
+    karafka (2.0.0.beta4)
       dry-configurable (~> 0.13)
       dry-monitor (~> 0.5)
       dry-validation (~> 1.7)
       rdkafka (>= 0.10)
       thor (>= 0.20)
-      waterdrop (>= 2.3.0, < 3.0.0)
+      waterdrop (>= 2.3.1, < 3.0.0)
       zeitwerk (~> 2.3)
 GEM
@@ -74,7 +74,7 @@ GEM
     mini_portile2 (2.8.0)
     minitest (5.15.0)
     rake (13.0.6)
-    rdkafka (0.11.1)
+    rdkafka (0.12.0)
       ffi (~> 1.15)
       mini_portile2 (~> 2.6)
       rake (> 12)
@@ -100,14 +100,14 @@ GEM
     thor (1.2.1)
     tzinfo (2.0.4)
       concurrent-ruby (~> 1.0)
-    waterdrop (2.3.0)
+    waterdrop (2.3.1)
       concurrent-ruby (>= 1.1)
       dry-configurable (~> 0.13)
       dry-monitor (~> 0.5)
       dry-validation (~> 1.7)
       rdkafka (>= 0.10)
       zeitwerk (~> 2.3)
-    zeitwerk (2.5.4)
+    zeitwerk (2.6.0)
 PLATFORMS
   x86_64-linux

data/bin/wait_for_kafka ADDED Viewed

@@ -0,0 +1,20 @@
+#!/bin/bash
+# This script allows us to wait for Kafka docker to fully be ready
+# We consider it fully ready when all our topics that need to be created are created as expected
+KAFKA_NAME='karafka_20_kafka'
+ZOOKEEPER='zookeeper:2181'
+LIST_CMD="kafka-topics.sh --list --zookeeper $ZOOKEEPER"
+# Take the number of topics that we need to create prior to running anything
+TOPICS_COUNT=`cat docker-compose.yml | grep -E -i 'integrations_|benchmarks_' | wc -l`
+# And wait until all of them are created
+until (((`docker exec $KAFKA_NAME $LIST_CMD | wc -l`) >= $TOPICS_COUNT));
+do
+  echo "Waiting for Kafka to create all the needed topics..."
+  sleep 1
+done
+echo "All the needed topics created."

data/docker-compose.yml CHANGED Viewed

@@ -1,10 +1,12 @@
 version: '2'
 services:
   zookeeper:
+    container_name: karafka_20_zookeeper
     image: wurstmeister/zookeeper
     ports:
       - '2181:2181'
   kafka:
+    container_name: karafka_20_kafka
     image: wurstmeister/kafka
     ports:
       - '9092:9092'
@@ -19,6 +21,14 @@ services:
          integrations_2_02:2:1,\
          integrations_3_02:2:1,\
          integrations_4_02:2:1,\
+         integrations_5_02:2:1,\
+         integrations_6_02:2:1,\
+         integrations_7_02:2:1,\
+         integrations_8_02:2:1,\
+         integrations_9_02:2:1,\
+         integrations_10_02:2:1,\
+         integrations_11_02:2:1,\
+         integrations_12_02:2:1,\
          integrations_0_03:3:1,\
          integrations_1_03:3:1,\
          integrations_2_03:3:1,\

data/karafka.gemspec CHANGED Viewed

@@ -21,7 +21,7 @@ Gem::Specification.new do |spec|
   spec.add_dependency 'dry-validation', '~> 1.7'
   spec.add_dependency 'rdkafka', '>= 0.10'
   spec.add_dependency 'thor', '>= 0.20'
-  spec.add_dependency 'waterdrop', '>= 2.3.0', '< 3.0.0'
+  spec.add_dependency 'waterdrop', '>= 2.3.1', '< 3.0.0'
   spec.add_dependency 'zeitwerk', '~> 2.3'
   spec.required_ruby_version = '>= 2.6.0'

data/lib/karafka/base_consumer.rb CHANGED Viewed

@@ -15,28 +15,19 @@ module Karafka
     # @return [Waterdrop::Producer] producer instance
     attr_accessor :producer
+    def initialize
+      # We re-use one to save on object allocation
+      # It also allows us to transfer the consumption notion to another batch
+      @consumption = Processing::Result.new
+    end
     # Can be used to run preparation code
     #
     # @private
     # @note This should not be used by the end users as it is part of the lifecycle of things but
     #   not as part of the public api. This can act as a hook when creating non-blocking
     #   consumers and doing other advanced stuff
-    def on_prepare
-      Karafka.monitor.instrument('consumer.prepared', caller: self) do
-        prepare
-      end
-      true
-    rescue StandardError => e
-      Karafka.monitor.instrument(
-        'error.occurred',
-        error: e,
-        caller: self,
-        type: 'consumer.prepare.error'
-      )
-      false
-    end
+    def on_before_consume; end
     # Executes the default consumer flow.
     #
@@ -48,29 +39,36 @@ module Karafka
     def on_consume
       Karafka.monitor.instrument('consumer.consumed', caller: self) do
         consume
-        pause_tracker.reset
-        # Mark as consumed only if manual offset management is not on
-        next if topic.manual_offset_management
-        # We use the non-blocking one here. If someone needs the blocking one, can implement it
-        # with manual offset management
-        mark_as_consumed(messages.last)
       end
-      true
+      @consumption.success!
     rescue StandardError => e
+      @consumption.failure!
       Karafka.monitor.instrument(
         'error.occurred',
         error: e,
         caller: self,
         type: 'consumer.consume.error'
       )
+    end
-      pause(@seek_offset || messages.first.offset)
+    # @private
+    # @note This should not be used by the end users as it is part of the lifecycle of things but
+    #   not as part of the public api.
+    def on_after_consume
+      if @consumption.success?
+        pause_tracker.reset
+        # Mark as consumed only if manual offset management is not on
+        return if topic.manual_offset_management?
-      false
+        # We use the non-blocking one here. If someone needs the blocking one, can implement it
+        # with manual offset management
+        mark_as_consumed(messages.last)
+      else
+        pause(@seek_offset || messages.first.offset)
+      end
     end
     # Trigger method for running on shutdown.
@@ -80,8 +78,6 @@ module Karafka
       Karafka.monitor.instrument('consumer.revoked', caller: self) do
         revoked
       end
-      true
     rescue StandardError => e
       Karafka.monitor.instrument(
         'error.occurred',
@@ -89,8 +85,6 @@ module Karafka
         caller: self,
         type: 'consumer.revoked.error'
       )
-      false
     end
     # Trigger method for running on shutdown.
@@ -100,8 +94,6 @@ module Karafka
       Karafka.monitor.instrument('consumer.shutdown', caller: self) do
         shutdown
       end
-      true
     rescue StandardError => e
       Karafka.monitor.instrument(
         'error.occurred',
@@ -109,16 +101,10 @@ module Karafka
         caller: self,
         type: 'consumer.shutdown.error'
       )
-      false
     end
     private
-    # Method that gets called in the blocking flow allowing to setup any type of resources or to
-    # send additional commands to Kafka before the proper execution starts.
-    def prepare; end
     # Method that will perform business logic and on data received from Kafka (it will consume
     #   the data)
     # @note This method needs bo be implemented in a subclass. We stub it here as a failover if
@@ -138,21 +124,36 @@ module Karafka
     # Marks message as consumed in an async way.
     #
     # @param message [Messages::Message] last successfully processed message.
+    # @return [Boolean] true if we were able to mark the offset, false otherwise. False indicates
+    #   that we were not able and that we have lost the partition.
+    #
     # @note We keep track of this offset in case we would mark as consumed and got error when
     #   processing another message. In case like this we do not pause on the message we've already
     #   processed but rather at the next one. This applies to both sync and async versions of this
     #   method.
     def mark_as_consumed(message)
-      client.mark_as_consumed(message)
+      @revoked = !client.mark_as_consumed(message)
+      return false if revoked?
       @seek_offset = message.offset + 1
+      true
     end
     # Marks message as consumed in a sync way.
     #
     # @param message [Messages::Message] last successfully processed message.
+    # @return [Boolean] true if we were able to mark the offset, false otherwise. False indicates
+    #   that we were not able and that we have lost the partition.
     def mark_as_consumed!(message)
-      client.mark_as_consumed!(message)
+      @revoked = !client.mark_as_consumed!(message)
+      return false if revoked?
       @seek_offset = message.offset + 1
+      true
     end
     # Pauses processing on a given offset for the current topic partition
@@ -190,5 +191,12 @@ module Karafka
         )
       )
     end
+    # @return [Boolean] true if partition was revoked from the current consumer
+    # @note We know that partition got revoked because when we try to mark message as consumed,
+    #   unless if is successful, it will return false
+    def revoked?
+      @revoked || false
+    end
   end
 end

data/lib/karafka/connection/client.rb CHANGED Viewed

@@ -190,6 +190,7 @@ module Karafka
       # Marks given message as consumed.
       #
       # @param [Karafka::Messages::Message] message that we want to mark as processed
+      # @return [Boolean] true if successful. False if we no longer own given partition
       # @note This method won't trigger automatic offsets commits, rather relying on the offset
       #   check-pointing trigger that happens with each batch processed
       def mark_as_consumed(message)
@@ -199,8 +200,10 @@ module Karafka
       # Marks a given message as consumed and commits the offsets in a blocking way.
       #
       # @param [Karafka::Messages::Message] message that we want to mark as processed
+      # @return [Boolean] true if successful. False if we no longer own given partition
       def mark_as_consumed!(message)
-        mark_as_consumed(message)
+        return false unless mark_as_consumed(message)
         commit_offsets!
       end
@@ -217,22 +220,35 @@ module Karafka
       private
+      # When we cannot store an offset, it means we no longer own the partition
+      #
       # Non thread-safe offset storing method
       # @param message [Karafka::Messages::Message]
+      # @return [Boolean] true if we could store the offset (if we still own the partition)
       def internal_store_offset(message)
         @offsetting = true
         @kafka.store_offset(message)
+        true
+      rescue Rdkafka::RdkafkaError => e
+        return false if e.code == :assignment_lost
+        return false if e.code == :state
+        raise e
       end
       # Non thread-safe message committing method
       # @param async [Boolean] should the commit happen async or sync (async by default)
+      # @return [Boolean] true if offset commit worked, false if we've lost the assignment
       def internal_commit_offsets(async: true)
-        return unless @offsetting
+        return true unless @offsetting
         @kafka.commit(nil, async)
         @offsetting = false
+        true
       rescue Rdkafka::RdkafkaError => e
-        return if e.code == :no_offset
+        return false if e.code == :assignment_lost
+        return false if e.code == :no_offset
         raise e
       end
@@ -250,7 +266,8 @@ module Karafka
           @kafka.close
           @buffer.clear
-          @rebalance_manager.clear
+          # @note We do not clear rebalance manager here as we may still have revocation info here
+          # that we want to consider valid prior to running another reconnection
         end
       end
@@ -303,7 +320,13 @@ module Karafka
         time_poll.backoff
-        retry
+        # We return nil, so we do not restart until running the whole loop
+        # This allows us to run revocation jobs and other things and we will pick up new work
+        # next time after dispatching all the things that are needed
+        #
+        # If we would retry here, the client reset would become transparent and we would not have
+        # a chance to take any actions
+        nil
       end
       # Builds a new rdkafka consumer instance based on the subscription group configuration

data/lib/karafka/instrumentation/logger_listener.rb CHANGED Viewed

@@ -98,9 +98,6 @@ module Karafka
         details = (error.backtrace || []).join("\n")
         case type
-        when 'consumer.prepared.error'
-          error "Consumer prepared error: #{error}"
-          error details
         when 'consumer.consume.error'
           error "Consumer consuming error: #{error}"
           error details

data/lib/karafka/instrumentation/monitor.rb CHANGED Viewed

@@ -22,7 +22,6 @@ module Karafka
         app.stopping
         app.stopped
-        consumer.prepared
         consumer.consumed
         consumer.revoked
         consumer.shutdown

data/lib/karafka/pro/active_job/consumer.rb CHANGED Viewed

@@ -20,7 +20,7 @@ module Karafka
       #
       # It contains slightly better revocation warranties than the regular blocking consumer as
       # it can stop processing batch of jobs in the middle after the revocation.
-      class Consumer < Karafka::ActiveJob::Consumer
+      class Consumer < Karafka::Pro::BaseConsumer
         # Runs ActiveJob jobs processing and handles lrj if needed
         def consume
           messages.each do |message|
@@ -33,11 +33,12 @@ module Karafka
               ::ActiveSupport::JSON.decode(message.raw_payload)
             )
+            mark_as_consumed(message)
             # We check it twice as the job may be long running
+            # If marking fails, it also means it got revoked and we can stop consuming
             return if revoked?
-            mark_as_consumed(message)
             # Do not process more if we are shutting down
             break if Karafka::App.stopping?
           end

data/lib/karafka/pro/base_consumer.rb ADDED Viewed

@@ -0,0 +1,76 @@
+# frozen_string_literal: true
+# This Karafka component is a Pro component.
+# All of the commercial components are present in the lib/karafka/pro directory of this
+# repository and their usage requires commercial license agreement.
+#
+# Karafka has also commercial-friendly license, commercial support and commercial components.
+#
+# By sending a pull request to the pro components, you are agreeing to transfer the copyright of
+# your code to Maciej Mensfeld.
+module Karafka
+  module Pro
+    # Karafka PRO consumer.
+    #
+    # If you use PRO, all your consumers should inherit (indirectly) from it.
+    #
+    # @note In case of using lrj, manual pausing may not be the best idea as resume needs to happen
+    #   after each batch is processed.
+    class BaseConsumer < Karafka::BaseConsumer
+      # Pause for tops 31 years
+      MAX_PAUSE_TIME = 1_000_000_000_000
+      private_constant :MAX_PAUSE_TIME
+      # Pauses processing of a given partition until we're done with the processing
+      # This ensures, that we can easily poll not reaching the `max.poll.interval`
+      def on_before_consume
+        # Pause at the first message in a batch. That way in case of a crash, we will not loose
+        # any messages
+        return unless topic.long_running_job?
+        pause(messages.first.offset, MAX_PAUSE_TIME)
+      end
+      # Runs extra logic after consumption that is related to handling long running jobs
+      # @note This overwrites the '#on_after_consume' from the base consumer
+      def on_after_consume
+        # Nothing to do if we lost the partition
+        return if revoked?
+        if @consumption.success?
+          pause_tracker.reset
+          # We use the non-blocking one here. If someone needs the blocking one, can implement it
+          # with manual offset management
+          # Mark as consumed only if manual offset management is not on
+          mark_as_consumed(messages.last) unless topic.manual_offset_management?
+          # If this is not a long running job there is nothing for us to do here
+          return unless topic.long_running_job?
+          # Once processing is done, we move to the new offset based on commits
+          # Here, in case manual offset management is off, we have the new proper offset of a
+          # first message from another batch from `@seek_offset`. If manual offset management
+          # is on, we move to place where the user indicated it was finished.
+          seek(@seek_offset || messages.first.offset)
+          resume
+        else
+          # If processing failed, we need to pause
+          pause(@seek_offset || messages.first.offset)
+        end
+      end
+      # Marks this consumer revoked state as true
+      # This allows us for things like lrj to finish early as this state may change during lrj
+      # execution
+      def on_revoked
+        # @note This may already be set to true if we tried to commit offsets and failed. In case
+        # like this it will automatically be marked as revoked.
+        @revoked = true
+        super
+      end
+    end
+  end
+end

data/lib/karafka/pro/loader.rb CHANGED Viewed

@@ -15,9 +15,9 @@ module Karafka
     class Loader
       # All the pro components that need to be loaded
       COMPONENTS = %w[
+        base_consumer
         performance_tracker
         scheduler
-        base_consumer_extensions
         processing/jobs/consume_non_blocking
         processing/jobs_builder
         routing/extensions
@@ -42,7 +42,6 @@ module Karafka
           config.internal.active_job.job_options_contract = ActiveJob::JobOptionsContract.new
           ::Karafka::Routing::Topic.include(Routing::Extensions)
-          ::Karafka::BaseConsumer.prepend(BaseConsumerExtensions)
           config.monitor.subscribe(PerformanceTracker.instance)
         end

data/lib/karafka/pro/processing/jobs/consume_non_blocking.rb CHANGED Viewed

@@ -26,7 +26,7 @@ module Karafka
         #   management. This layer of the framework knows nothing about Kafka messages consumption.
         class ConsumeNonBlocking < ::Karafka::Processing::Jobs::Consume
           # Releases the blocking lock after it is done with the preparation phase for this job
-          def prepare
+          def before_call
             super
             @non_blocking = true
           end

data/lib/karafka/processing/executor.rb CHANGED Viewed

@@ -45,12 +45,20 @@ module Karafka
       # @param messages [Array<Karafka::Messages::Message>]
       # @param received_at [Time] the moment we've received the batch (actually the moment we've)
       #   enqueued it, but good enough
-      def prepare(messages, received_at)
+      def before_consume(messages, received_at)
         # Recreate consumer with each batch if persistence is not enabled
         # We reload the consumers with each batch instead of relying on some external signals
         # when needed for consistency. That way devs may have it on or off and not in this
         # middle state, where re-creation of a consumer instance would occur only sometimes
-        @consumer = nil unless ::Karafka::App.config.consumer_persistence
+        @recreate = true unless ::Karafka::App.config.consumer_persistence
+        # If @recreate was set to true (aside from non persistent), it means, that revocation or
+        # a shutdown happened and we need to have a new instance for running another consume for
+        # this topic partition
+        if @recreate
+          @consumer = nil
+          @recreate = false
+        end
         # First we build messages batch...
         consumer.messages = Messages::Builders::Messages.call(
@@ -59,7 +67,7 @@ module Karafka
           received_at
         )
-        consumer.on_prepare
+        consumer.on_before_consume
       end
       # Runs consumer data processing against given batch and handles failures and errors.
@@ -68,6 +76,11 @@ module Karafka
         consumer.on_consume
       end
+      # Runs consumer after consumption code
+      def after_consume
+        consumer.on_after_consume if @consumer
+      end
       # Runs the controller `#revoked` method that should be triggered when a given consumer is
       # no longer needed due to partitions reassignment.
       #
@@ -76,9 +89,13 @@ module Karafka
       #
       # @note We run it only when consumer was present, because presence indicates, that at least
       #   a single message has been consumed.
+      #
+      # @note We do not reset the consumer but we indicate need for recreation instead, because
+      #   after the revocation, there still may be `#after_consume` running that needs a given
+      #   consumer instance.
       def revoked
         consumer.on_revoked if @consumer
-        @consumer = nil
+        @recreate = true
       end
       # Runs the controller `#shutdown` method that should be triggered when a given consumer is
@@ -90,7 +107,7 @@ module Karafka
         # There is a case, where the consumer no longer exists because it was revoked, in case like
         # that we do not build a new instance and shutdown should not be triggered.
         consumer.on_shutdown if @consumer
-        @consumer = nil
+        @recreate = true
       end
       private

data/lib/karafka/processing/jobs/base.rb CHANGED Viewed

@@ -5,7 +5,7 @@ module Karafka
     # Namespace for all the jobs that are suppose to run in workers.
     module Jobs
       # Base class for all the jobs types that are suppose to run in workers threads.
-      # Each job can have 3 main entry-points: `#prepare`, `#call` and `#teardown`
+      # Each job can have 3 main entry-points: `#before_call`, `#call` and `#after_call`
       # Only `#call` is required.
       class Base
         extend Forwardable
@@ -23,10 +23,15 @@ module Karafka
         end
         # When redefined can run any code that should run before executing the proper code
-        def prepare; end
+        def before_call; end
+        # The main entry-point of a job
+        def call
+          raise NotImplementedError, 'Please implement in a subclass'
+        end
         # When redefined can run any code that should run after executing the proper code
-        def teardown; end
+        def after_call; end
         # @return [Boolean] is this a non-blocking job
         #

data/lib/karafka/processing/jobs/consume.rb CHANGED Viewed

@@ -20,15 +20,20 @@ module Karafka
           super()
         end
-        # Runs the preparations on the executor
-        def prepare
-          executor.prepare(@messages, @created_at)
+        # Runs the before consumption preparations on the executor
+        def before_call
+          executor.before_consume(@messages, @created_at)
         end
         # Runs the given executor
         def call
           executor.consume
         end
+        # Runs any error handling and other post-consumption stuff on the executor
+        def after_call
+          executor.after_consume
+        end
       end
     end
   end

data/lib/karafka/processing/result.rb ADDED Viewed

@@ -0,0 +1,34 @@
+# frozen_string_literal: true
+module Karafka
+  module Processing
+    # A simple object that allows us to keep track of processing state.
+    # It allows to indicate if given thing moved from success to a failure or the other way around
+    # Useful for tracking consumption state
+    class Result
+      def initialize
+        @success = true
+      end
+      # @return [Boolean]
+      def failure?
+        !success?
+      end
+      # @return [Boolean]
+      def success?
+        @success
+      end
+      # Marks state as successful
+      def success!
+        @success = true
+      end
+      # Marks state as failure
+      def failure!
+        @success = false
+      end
+    end
+  end
+end

data/lib/karafka/processing/worker.rb CHANGED Viewed

@@ -50,7 +50,7 @@ module Karafka
           Karafka.monitor.instrument('worker.process', caller: self, job: job)
           Karafka.monitor.instrument('worker.processed', caller: self, job: job) do
-            job.prepare
+            job.before_call
             # If a job is marked as non blocking, we can run a tick in the job queue and if there
             # are no other blocking factors, the job queue will be unlocked.
@@ -60,7 +60,7 @@ module Karafka
             job.call
-            job.teardown
+            job.after_call
             true
           end

data/lib/karafka/routing/topic.rb CHANGED Viewed

@@ -66,6 +66,11 @@ module Karafka
         end
       end
+      # @return [Boolean] true if this topic offset is handled by the end user
+      def manual_offset_management?
+        manual_offset_management
+      end
       # @return [Hash] hash with all the topic attributes
       # @note This is being used when we validate the consumer_group and its topics
       def to_h

data/lib/karafka/version.rb CHANGED Viewed

@@ -3,5 +3,5 @@
 # Main module namespace
 module Karafka
   # Current Karafka version
-  VERSION = '2.0.0.beta3'
+  VERSION = '2.0.0.beta4'
 end

data.tar.gz.sig CHANGED Viewed

Binary file

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: karafka
 version: !ruby/object:Gem::Version
-  version: 2.0.0.beta3
+  version: 2.0.0.beta4
 platform: ruby
 authors:
 - Maciej Mensfeld
@@ -34,7 +34,7 @@ cert_chain:
   R2P11bWoCtr70BsccVrN8jEhzwXngMyI2gVt750Y+dbTu1KgRqZKp/ECe7ZzPzXj
   pIy9vHxTANKYVyI4qj8OrFdEM5BQNu8oQpL0iQ==
   -----END CERTIFICATE-----
-date: 2022-06-14 00:00:00.000000000 Z
+date: 2022-06-20 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: dry-configurable
@@ -112,7 +112,7 @@ dependencies:
     requirements:
     - - ">="
       - !ruby/object:Gem::Version
-        version: 2.3.0
+        version: 2.3.1
     - - "<"
       - !ruby/object:Gem::Version
         version: 3.0.0
@@ -122,7 +122,7 @@ dependencies:
     requirements:
     - - ">="
       - !ruby/object:Gem::Version
-        version: 2.3.0
+        version: 2.3.1
     - - "<"
       - !ruby/object:Gem::Version
         version: 3.0.0
@@ -173,6 +173,7 @@ files:
 - bin/karafka
 - bin/scenario
 - bin/stress
+- bin/wait_for_kafka
 - certs/karafka-pro.pem
 - certs/mensfeld.pem
 - config/errors.yml
@@ -232,7 +233,7 @@ files:
 - lib/karafka/pro/active_job/consumer.rb
 - lib/karafka/pro/active_job/dispatcher.rb
 - lib/karafka/pro/active_job/job_options_contract.rb
-- lib/karafka/pro/base_consumer_extensions.rb
+- lib/karafka/pro/base_consumer.rb
 - lib/karafka/pro/loader.rb
 - lib/karafka/pro/performance_tracker.rb
 - lib/karafka/pro/processing/jobs/consume_non_blocking.rb
@@ -248,6 +249,7 @@ files:
 - lib/karafka/processing/jobs/shutdown.rb
 - lib/karafka/processing/jobs_builder.rb
 - lib/karafka/processing/jobs_queue.rb
+- lib/karafka/processing/result.rb
 - lib/karafka/processing/worker.rb
 - lib/karafka/processing/workers_batch.rb
 - lib/karafka/railtie.rb

metadata.gz.sig CHANGED Viewed

Binary file

data/lib/karafka/pro/base_consumer_extensions.rb DELETED Viewed

@@ -1,66 +0,0 @@
-# frozen_string_literal: true
-# This Karafka component is a Pro component.
-# All of the commercial components are present in the lib/karafka/pro directory of this
-# repository and their usage requires commercial license agreement.
-#
-# Karafka has also commercial-friendly license, commercial support and commercial components.
-#
-# By sending a pull request to the pro components, you are agreeing to transfer the copyright of
-# your code to Maciej Mensfeld.
-module Karafka
-  module Pro
-    # Extensions to the base consumer that make it more pro and fancy
-    #
-    # @note In case of using lrj, manual pausing may not be the best idea as resume needs to happen
-    #   after each batch is processed.
-    #
-    # They need to be added to the consumer via `#prepend`
-    module BaseConsumerExtensions
-      # Pause for tops 31 years
-      MAX_PAUSE_TIME = 1_000_000_000_000
-      private_constant :MAX_PAUSE_TIME
-      # Pauses processing of a given partition until we're done with the processing
-      # This ensures, that we can easily poll not reaching the `max.poll.interval`
-      def on_prepare
-        # Pause at the first message in a batch. That way in case of a crash, we will not loose
-        # any messages
-        pause(messages.first.offset, MAX_PAUSE_TIME) if topic.long_running_job?
-        super
-      end
-      # After user code, we seek and un-pause our partition
-      def on_consume
-        # If anything went wrong here, we should not run any partition management as it's Karafka
-        # core that will handle the backoff
-        return unless super
-        return unless topic.long_running_job?
-        # Nothing to resume if it was revoked
-        return if revoked?
-        # Once processing is done, we move to the new offset based on commits
-        seek(@seek_offset || messages.first.offset)
-        resume
-      end
-      # Marks this consumer revoked state as true
-      # This allows us for things like lrj to finish early as this state may change during lrj
-      # execution
-      def on_revoked
-        @revoked = true
-        super
-      end
-      # @return [Boolean] true if partition was revoked from the current consumer
-      def revoked?
-        @revoked || false
-      end
-    end
-  end
-end