RubyGems - karafka - Versions diffs - 2.2.13 → 2.3.0.alpha1 - Mend

karafka 2.2.13 → 2.3.0.alpha1

Files changed (125) hide show

checksums.yaml +4 -4
checksums.yaml.gz.sig +0 -0
data/.github/workflows/ci.yml +38 -12
data/.ruby-version +1 -1
data/CHANGELOG.md +161 -125
data/Gemfile.lock +12 -12
data/README.md +0 -2
data/SECURITY.md +23 -0
data/config/locales/errors.yml +7 -1
data/config/locales/pro_errors.yml +22 -0
data/docker-compose.yml +3 -1
data/karafka.gemspec +2 -2
data/lib/karafka/admin/acl.rb +287 -0
data/lib/karafka/admin.rb +118 -16
data/lib/karafka/app.rb +12 -3
data/lib/karafka/base_consumer.rb +32 -31
data/lib/karafka/cli/base.rb +1 -1
data/lib/karafka/connection/client.rb +94 -84
data/lib/karafka/connection/conductor.rb +28 -0
data/lib/karafka/connection/listener.rb +165 -46
data/lib/karafka/connection/listeners_batch.rb +5 -11
data/lib/karafka/connection/manager.rb +72 -0
data/lib/karafka/connection/messages_buffer.rb +12 -0
data/lib/karafka/connection/proxy.rb +17 -0
data/lib/karafka/connection/status.rb +75 -0
data/lib/karafka/contracts/config.rb +14 -10
data/lib/karafka/contracts/consumer_group.rb +9 -1
data/lib/karafka/contracts/topic.rb +3 -1
data/lib/karafka/errors.rb +13 -0
data/lib/karafka/instrumentation/assignments_tracker.rb +96 -0
data/lib/karafka/instrumentation/callbacks/rebalance.rb +10 -7
data/lib/karafka/instrumentation/logger_listener.rb +3 -9
data/lib/karafka/instrumentation/notifications.rb +19 -9
data/lib/karafka/instrumentation/vendors/appsignal/metrics_listener.rb +31 -28
data/lib/karafka/instrumentation/vendors/datadog/logger_listener.rb +22 -3
data/lib/karafka/instrumentation/vendors/datadog/metrics_listener.rb +15 -12
data/lib/karafka/instrumentation/vendors/kubernetes/liveness_listener.rb +39 -36
data/lib/karafka/pro/base_consumer.rb +47 -0
data/lib/karafka/pro/connection/manager.rb +300 -0
data/lib/karafka/pro/connection/multiplexing/listener.rb +40 -0
data/lib/karafka/pro/instrumentation/performance_tracker.rb +85 -0
data/lib/karafka/pro/iterator/tpl_builder.rb +1 -1
data/lib/karafka/pro/iterator.rb +1 -6
data/lib/karafka/pro/loader.rb +16 -2
data/lib/karafka/pro/processing/coordinator.rb +2 -1
data/lib/karafka/pro/processing/executor.rb +37 -0
data/lib/karafka/pro/processing/expansions_selector.rb +32 -0
data/lib/karafka/pro/processing/jobs/periodic.rb +41 -0
data/lib/karafka/pro/processing/jobs/periodic_non_blocking.rb +32 -0
data/lib/karafka/pro/processing/jobs_builder.rb +14 -3
data/lib/karafka/pro/processing/offset_metadata/consumer.rb +44 -0
data/lib/karafka/pro/processing/offset_metadata/fetcher.rb +131 -0
data/lib/karafka/pro/processing/offset_metadata/listener.rb +46 -0
data/lib/karafka/pro/processing/schedulers/base.rb +143 -0
data/lib/karafka/pro/processing/schedulers/default.rb +107 -0
data/lib/karafka/pro/processing/strategies/aj/lrj_mom_vp.rb +1 -1
data/lib/karafka/pro/processing/strategies/default.rb +136 -3
data/lib/karafka/pro/processing/strategies/dlq/default.rb +35 -0
data/lib/karafka/pro/processing/strategies/lrj/default.rb +1 -1
data/lib/karafka/pro/processing/strategies/lrj/mom.rb +1 -1
data/lib/karafka/pro/processing/strategies/vp/default.rb +60 -26
data/lib/karafka/pro/processing/virtual_offset_manager.rb +41 -11
data/lib/karafka/pro/routing/features/long_running_job/topic.rb +2 -0
data/lib/karafka/pro/routing/features/multiplexing/config.rb +38 -0
data/lib/karafka/pro/routing/features/multiplexing/contracts/topic.rb +114 -0
data/lib/karafka/pro/routing/features/multiplexing/patches/contracts/consumer_group.rb +42 -0
data/lib/karafka/pro/routing/features/multiplexing/proxy.rb +38 -0
data/lib/karafka/pro/routing/features/multiplexing/subscription_group.rb +42 -0
data/lib/karafka/pro/routing/features/multiplexing/subscription_groups_builder.rb +40 -0
data/lib/karafka/pro/routing/features/multiplexing.rb +59 -0
data/lib/karafka/pro/routing/features/non_blocking_job/topic.rb +32 -0
data/lib/karafka/pro/routing/features/non_blocking_job.rb +37 -0
data/lib/karafka/pro/routing/features/offset_metadata/config.rb +33 -0
data/lib/karafka/pro/routing/features/offset_metadata/contracts/topic.rb +42 -0
data/lib/karafka/pro/routing/features/offset_metadata/topic.rb +65 -0
data/lib/karafka/pro/routing/features/offset_metadata.rb +40 -0
data/lib/karafka/pro/routing/features/patterns/contracts/consumer_group.rb +4 -0
data/lib/karafka/pro/routing/features/patterns/detector.rb +18 -10
data/lib/karafka/pro/routing/features/periodic_job/config.rb +37 -0
data/lib/karafka/pro/routing/features/periodic_job/contracts/topic.rb +44 -0
data/lib/karafka/pro/routing/features/periodic_job/topic.rb +94 -0
data/lib/karafka/pro/routing/features/periodic_job.rb +27 -0
data/lib/karafka/pro/routing/features/virtual_partitions/config.rb +1 -0
data/lib/karafka/pro/routing/features/virtual_partitions/contracts/topic.rb +1 -0
data/lib/karafka/pro/routing/features/virtual_partitions/topic.rb +7 -2
data/lib/karafka/process.rb +5 -3
data/lib/karafka/processing/coordinator.rb +5 -1
data/lib/karafka/processing/executor.rb +43 -13
data/lib/karafka/processing/executors_buffer.rb +22 -7
data/lib/karafka/processing/jobs/base.rb +19 -2
data/lib/karafka/processing/jobs/consume.rb +3 -3
data/lib/karafka/processing/jobs/idle.rb +5 -0
data/lib/karafka/processing/jobs/revoked.rb +5 -0
data/lib/karafka/processing/jobs/shutdown.rb +5 -0
data/lib/karafka/processing/jobs_queue.rb +19 -8
data/lib/karafka/processing/schedulers/default.rb +42 -0
data/lib/karafka/processing/strategies/base.rb +13 -4
data/lib/karafka/processing/strategies/default.rb +23 -7
data/lib/karafka/processing/strategies/dlq.rb +36 -0
data/lib/karafka/processing/worker.rb +4 -1
data/lib/karafka/routing/builder.rb +12 -2
data/lib/karafka/routing/consumer_group.rb +5 -5
data/lib/karafka/routing/features/base.rb +44 -8
data/lib/karafka/routing/features/dead_letter_queue/config.rb +6 -1
data/lib/karafka/routing/features/dead_letter_queue/contracts/topic.rb +1 -0
data/lib/karafka/routing/features/dead_letter_queue/topic.rb +9 -2
data/lib/karafka/routing/proxy.rb +4 -3
data/lib/karafka/routing/subscription_group.rb +2 -2
data/lib/karafka/routing/subscription_groups_builder.rb +11 -2
data/lib/karafka/routing/topic.rb +8 -10
data/lib/karafka/routing/topics.rb +1 -1
data/lib/karafka/runner.rb +13 -3
data/lib/karafka/server.rb +5 -9
data/lib/karafka/setup/config.rb +21 -1
data/lib/karafka/status.rb +23 -14
data/lib/karafka/templates/karafka.rb.erb +7 -0
data/lib/karafka/time_trackers/partition_usage.rb +56 -0
data/lib/karafka/version.rb +1 -1
data.tar.gz.sig +0 -0
metadata +47 -13
metadata.gz.sig +0 -0
data/lib/karafka/connection/consumer_group_coordinator.rb +0 -48
data/lib/karafka/pro/performance_tracker.rb +0 -84
data/lib/karafka/pro/processing/scheduler.rb +0 -74
data/lib/karafka/processing/scheduler.rb +0 -38

data/lib/karafka/connection/listener.rb CHANGED Viewed

@@ -7,6 +7,8 @@ module Karafka
     # critical errors by restarting everything in a safe manner.
     #
     # This is the heart of the consumption process.
+    #
+    # It provides async API for managing, so all status changes are expected to be async.
     class Listener
       include Helpers::Async
@@ -14,22 +16,23 @@ module Karafka
       # @return [String] id of this listener
       attr_reader :id
+      # @return [Karafka::Routing::SubscriptionGroup] subscription group that this listener handles
+      attr_reader :subscription_group
       # How long to wait in the initial events poll. Increases chances of having the initial events
       # immediately available
       INITIAL_EVENTS_POLL_TIMEOUT = 100
       private_constant :INITIAL_EVENTS_POLL_TIMEOUT
-      # @param consumer_group_coordinator [Karafka::Connection::ConsumerGroupCoordinator]
       # @param subscription_group [Karafka::Routing::SubscriptionGroup]
       # @param jobs_queue [Karafka::Processing::JobsQueue] queue where we should push work
       # @param scheduler [Karafka::Processing::Scheduler] scheduler we want to use
       # @return [Karafka::Connection::Listener] listener instance
-      def initialize(consumer_group_coordinator, subscription_group, jobs_queue, scheduler)
+      def initialize(subscription_group, jobs_queue, scheduler)
         proc_config = ::Karafka::App.config.internal.processing
         @id = SecureRandom.hex(6)
-        @consumer_group_coordinator = consumer_group_coordinator
         @subscription_group = subscription_group
         @jobs_queue = jobs_queue
         @coordinators = Processing::CoordinatorsBuffer.new(subscription_group.topics)
@@ -43,8 +46,11 @@ module Karafka
         # We can do this that way because we always first schedule jobs using messages before we
         # fetch another batch.
         @messages_buffer = MessagesBuffer.new(subscription_group)
+        @usage_tracker = TimeTrackers::PartitionUsage.new
         @mutex = Mutex.new
-        @stopped = false
+        @status = Status.new
+        @jobs_queue.register(@subscription_group.id)
       end
       # Runs the main listener fetch loop.
@@ -60,6 +66,44 @@ module Karafka
         )
         fetch_loop
+        Karafka.monitor.instrument(
+          'connection.listener.after_fetch_loop',
+          caller: self,
+          client: @client,
+          subscription_group: @subscription_group
+        )
+      end
+      # Aliases all statuses operations directly on the listener so we have a listener-facing API
+      Status::STATES.each do |state, transition|
+        # @return [Boolean] is the listener in a given state
+        define_method "#{state}?" do
+          @status.public_send("#{state}?")
+        end
+        # Moves listener to a given state
+        define_method transition do
+          @status.public_send(transition)
+        end
+      end
+      # @return [Boolean] is this listener active (not stopped and not pending)
+      def active?
+        @status.active?
+      end
+      # We overwrite the state `#start` because on start we need to also start running listener in
+      # the async thread. While other state transitions happen automatically and status state
+      # change is enough, here we need to run the background threads
+      def start!
+        if stopped?
+          @client.reset
+          @status.reset!
+        end
+        @status.start!
+        async_call
       end
       # Stops the jobs queue, triggers shutdown on all the executors (sync), commits offsets and
@@ -70,13 +114,16 @@ module Karafka
       #
       # @note We wrap it with a mutex exactly because of the above case of forceful shutdown
       def shutdown
-        return if @stopped
         @mutex.synchronize do
-          @stopped = true
+          return if stopped?
+          # Nothing to clear if it was not even running
+          return stopped! if pending?
           @executors.clear
           @coordinators.reset
           @client.stop
+          stopped!
         end
       end
@@ -91,6 +138,7 @@ module Karafka
       #   Kafka connections / Internet connection issues / Etc. Business logic problems should not
       #   propagate this far.
       def fetch_loop
+        running!
         # Run the initial events fetch to improve chances of having metrics and initial callbacks
         # triggers on start.
         #
@@ -101,7 +149,7 @@ module Karafka
         @client.events_poll(INITIAL_EVENTS_POLL_TIMEOUT)
         # Run the main loop as long as we are not stopping or moving into quiet mode
-        until Karafka::App.done?
+        while running?
           Karafka.monitor.instrument(
             'connection.listener.fetch_loop',
             caller: self,
@@ -136,7 +184,11 @@ module Karafka
           # simplifies the overall design and prevents from race conditions
           wait
-          build_and_schedule_consumption_jobs
+          build_and_schedule_flow_jobs
+          # periodic jobs never run on topics and partitions that were scheduled, so no risk in
+          # having collective wait after both
+          build_and_schedule_periodic_jobs if Karafka.pro?
           wait
         end
@@ -168,18 +220,11 @@ module Karafka
         # Wait until all the shutdown jobs are done
         wait_pinging(wait_until: -> { @jobs_queue.empty?(@subscription_group.id) })
-        # Once all the work is done, we need to decrement counter of active subscription groups
-        # within this consumer group
-        @consumer_group_coordinator.finish_work(id)
+        quieted!
         # Wait if we're in the process of finishing started work or finished all the work and
         # just sitting and being quiet
-        wait_pinging(wait_until: -> { !(Karafka::App.quieting? || Karafka::App.quiet?) })
-        # We need to wait until all the work in the whole consumer group (local to the process)
-        # is done. Otherwise we may end up with locks and `Timed out LeaveGroupRequest in flight`
-        # warning notifications.
-        wait_pinging(wait_until: -> { @consumer_group_coordinator.shutdown? })
+        wait_pinging(wait_until: -> { !quiet? })
         # This extra ping will make sure we've refreshed the rebalance state after other instances
         # potentially shutdown. This will prevent us from closing with a dangling callback
@@ -198,11 +243,9 @@ module Karafka
           type: 'connection.listener.fetch_loop.error'
         )
-        restart
+        reset
         sleep(1) && retry
-      ensure
-        @consumer_group_coordinator.unlock
       end
       # Resumes processing of partitions that were paused due to an error.
@@ -212,6 +255,17 @@ module Karafka
         end
       end
+      # Polls messages within the time and amount boundaries defined in the settings and then
+      # builds karafka messages based on the raw rdkafka messages buffer returned by the
+      # `#batch_poll` method.
+      #
+      # @note There are two buffers, one for raw messages and one for "built" karafka messages
+      def poll_and_remap_messages
+        @messages_buffer.remap(
+          @client.batch_poll
+        )
+      end
       # Enqueues revoking jobs for partitions that were taken away from the running process.
       def build_and_schedule_revoked_jobs_for_revoked_partitions
         revoked_partitions = @client.rebalance_manager.revoked_partitions
@@ -223,6 +277,7 @@ module Karafka
         revoked_partitions.each do |topic, partitions|
           partitions.each do |partition|
+            @usage_tracker.revoke(topic, partition)
             @coordinators.revoke(topic, partition)
             # There may be a case where we have lost partition of which data we have never
@@ -230,7 +285,6 @@ module Karafka
             # here. In cases like this, we do not run a revocation job
             @executors.find_all(topic, partition).each do |executor|
               job = @jobs_builder.revoked(executor)
-              job.before_enqueue
               jobs << job
             end
@@ -243,7 +297,10 @@ module Karafka
           end
         end
-        @scheduler.schedule_revocation(jobs)
+        return if jobs.empty?
+        jobs.each(&:before_schedule)
+        @scheduler.on_schedule_revocation(jobs)
       end
       # Enqueues the shutdown jobs for all the executors that exist in our subscription group
@@ -252,32 +309,27 @@ module Karafka
         @executors.each do |executor|
           job = @jobs_builder.shutdown(executor)
-          job.before_enqueue
           jobs << job
         end
-        @scheduler.schedule_shutdown(jobs)
-      end
+        return if jobs.empty?
-      # Polls messages within the time and amount boundaries defined in the settings and then
-      # builds karafka messages based on the raw rdkafka messages buffer returned by the
-      # `#batch_poll` method.
-      #
-      # @note There are two buffers, one for raw messages and one for "built" karafka messages
-      def poll_and_remap_messages
-        @messages_buffer.remap(
-          @client.batch_poll
-        )
+        jobs.each(&:before_schedule)
+        @scheduler.on_schedule_shutdown(jobs)
       end
       # Takes the messages per topic partition and enqueues processing jobs in threads using
-      # given scheduler.
-      def build_and_schedule_consumption_jobs
+      # given scheduler. It also handles the idle jobs when filtering API removed all messages
+      # and we need to run house-keeping
+      def build_and_schedule_flow_jobs
         return if @messages_buffer.empty?
-        jobs = []
+        consume_jobs = []
+        idle_jobs = []
         @messages_buffer.each do |topic, partition, messages|
+          @usage_tracker.track(topic, partition)
           coordinator = @coordinators.find_or_create(topic, partition)
           # Start work coordination for this topic partition
           coordinator.start(messages)
@@ -286,26 +338,93 @@ module Karafka
           # and it will not go through a standard lifecycle. Same applies to revoked and shutdown
           if messages.empty?
             executor = @executors.find_or_create(topic, partition, 0, coordinator)
-            jobs << @jobs_builder.idle(executor)
+            idle_jobs << @jobs_builder.idle(executor)
           else
             @partitioner.call(topic, messages, coordinator) do |group_id, partition_messages|
               executor = @executors.find_or_create(topic, partition, group_id, coordinator)
               coordinator.increment
-              jobs << @jobs_builder.consume(executor, partition_messages)
+              consume_jobs << @jobs_builder.consume(executor, partition_messages)
+            end
+          end
+        end
+        # We schedule the idle jobs before running the `#before_schedule` on the consume jobs so
+        # workers can already pick up the idle jobs while the `#before_schedule` on consumption
+        # jobs runs
+        unless idle_jobs.empty?
+          idle_jobs.each(&:before_schedule)
+          @scheduler.on_schedule_idle(idle_jobs)
+        end
+        unless consume_jobs.empty?
+          consume_jobs.each(&:before_schedule)
+          @scheduler.on_schedule_consumption(consume_jobs)
+        end
+      end
+      # Builds and schedules periodic jobs for topics partitions for which no messages were
+      # received recently. In case `Idle` job is invoked, we do not run periodic. Idle means that
+      # a complex flow kicked in and it was a user choice not to run consumption but messages were
+      # shipped.
+      def build_and_schedule_periodic_jobs
+        # Shortcut if periodic jobs are not used at all. No need to run the complex flow when it
+        # will never end up with anything. If periodics on any of the topics are not even defined,
+        # we can finish fast
+        @periodic_jobs ||= @subscription_group.topics.count(&:periodic_job?)
+        return if @periodic_jobs.zero?
+        jobs = []
+        # We select only currently assigned topics and partitions from the current subscription
+        # group as only those are of our interest. We then filter that to only pick those for whom
+        # we want to run periodic jobs and then we select only those that did not receive any
+        # messages recently. This ensures, that we do not tick close to recent arrival of messages
+        # but rather after certain period of inactivity
+        Karafka::App.assignments.each do |topic, partitions|
+          # Skip for assignments not from our subscription group
+          next unless topic.subscription_group == @subscription_group
+          # Skip if this topic does not have periodic jobs enabled
+          next unless topic.periodic_job?
+          topic_name = topic.name
+          interval = topic.periodic_job.interval
+          partitions.each do |partition|
+            # Skip if we were operating on a given topic partition recently
+            next if @usage_tracker.active?(topic_name, partition, interval)
+            coordinator = @coordinators.find_or_create(topic_name, partition)
+            # Do not tick if we do not want to tick during pauses
+            next if coordinator.paused? && !topic.periodic_job.during_pause?
+            # If we do not want to run periodics during retry flows, we should not
+            # Since this counter is incremented before processing, here it is always -1 from what
+            # we see in the consumer flow. This is why attempt 0 means that we will have first
+            # run (ok) but attempt 1 means, there was an error and we will retry
+            next if coordinator.attempt.positive? && !topic.periodic_job.during_retry?
+            # Track so we do not run periodic job again too soon
+            @usage_tracker.track(topic_name, partition)
+            @executors.find_all_or_create(topic_name, partition, coordinator).each do |executor|
+              jobs << @jobs_builder.periodic(executor)
             end
           end
         end
-        jobs.each(&:before_enqueue)
+        return if jobs.empty?
-        @scheduler.schedule_consumption(jobs)
+        jobs.each(&:before_schedule)
+        @scheduler.on_schedule_periodic(jobs)
       end
       # Waits for all the jobs from a given subscription group to finish before moving forward
       def wait
         @jobs_queue.wait(@subscription_group.id) do
           @events_poller.call
-          @scheduler.manage
+          @scheduler.on_manage
         end
       end
@@ -322,7 +441,7 @@ module Karafka
       def wait_pinging(wait_until:, after_ping: -> {})
         until wait_until.call
           @client.ping
-          @scheduler.manage
+          @scheduler.on_manage
           after_ping.call
           sleep(0.2)
@@ -333,13 +452,13 @@ module Karafka
       # `#fetch_loop` again. We just need to remember to also reset the runner as it is a long
       # running one, so with a new connection to Kafka, we need to initialize the state of the
       # runner and underlying consumers once again.
-      def restart
+      def reset
         # If there was any problem with processing, before we reset things we need to make sure,
         # there are no jobs in the queue. Otherwise it could lead to leakage in between client
         # resetting.
         @jobs_queue.wait(@subscription_group.id)
         @jobs_queue.clear(@subscription_group.id)
-        @scheduler.clear(@subscription_group.id)
+        @scheduler.on_clear(@subscription_group.id)
         @events_poller.reset
         @client.reset
         @coordinators.reset

data/lib/karafka/connection/listeners_batch.rb CHANGED Viewed

@@ -6,8 +6,6 @@ module Karafka
     class ListenersBatch
       include Enumerable
-      attr_reader :coordinators
       # @param jobs_queue [JobsQueue]
       # @return [ListenersBatch]
       def initialize(jobs_queue)
@@ -15,18 +13,9 @@ module Karafka
         # should be able to distribute work whenever any work is done in any of the listeners
         scheduler = App.config.internal.processing.scheduler_class.new(jobs_queue)
-        @coordinators = []
         @batch = App.subscription_groups.flat_map do |_consumer_group, subscription_groups|
-          consumer_group_coordinator = Connection::ConsumerGroupCoordinator.new(
-            subscription_groups.size
-          )
-          @coordinators << consumer_group_coordinator
           subscription_groups.map do |subscription_group|
             Connection::Listener.new(
-              consumer_group_coordinator,
               subscription_group,
               jobs_queue,
               scheduler
@@ -40,6 +29,11 @@ module Karafka
       def each(&block)
         @batch.each(&block)
       end
+      # @return [Array<Listener>] active listeners
+      def active
+        select(&:active?)
+      end
     end
   end
 end

data/lib/karafka/connection/manager.rb ADDED Viewed

@@ -0,0 +1,72 @@
+# frozen_string_literal: true
+module Karafka
+  # Namespace for Kafka connection related logic
+  module Connection
+    # Connections manager responsible for starting and managing listeners connections
+    #
+    # In the OSS version it starts listeners as they are without any connection management or
+    # resources utilization supervision and shuts them down or quiets  when time has come
+    class Manager
+      def initialize
+        @once_executions = Set.new
+      end
+      # Registers provided listeners and starts all of them
+      #
+      # @param listeners [Connection::ListenersBatch]
+      def register(listeners)
+        @listeners = listeners
+        @listeners.each(&:start!)
+      end
+      # @return [Boolean] true if all listeners are stopped
+      def done?
+        @listeners.all?(&:stopped?)
+      end
+      # Controls the state of listeners upon shutdown and quiet requests
+      # In both cases (quieting and shutdown) we first need to stop processing more work and tell
+      # listeners to become quiet (connected but not yielding messages) and then depending on
+      # whether we want to stop fully or just keep quiet we apply different flow.
+      #
+      # @note It is important to ensure, that all listeners from the same consumer group are always
+      #   all quiet before we can fully shutdown given consumer group. Skipping this can cause
+      #   `Timed out LeaveGroupRequest in flight` and other errors. For the simplification, we just
+      #   quiet all and only then move forward.
+      #
+      # @note This manager works with the assumption, that all listeners are executed on register.
+      def control
+        # Do nothing until shutdown or quiet
+        return unless Karafka::App.done?
+        # When we are done processing, immediately quiet all the listeners so they do not pick up
+        # new work to do
+        once(:quiet!) { @listeners.each(&:quiet!) }
+        return unless @listeners.all?(&:quiet?)
+        # If we are in the process of moving to quiet state, we need to check it.
+        # Switch to quieted status only when all listeners are fully quieted and do nothing after
+        # that until further state changes
+        once(:quieted!) { Karafka::App.quieted! } if Karafka::App.quieting?
+        return if Karafka::App.quiet?
+        once(:stop!) { @listeners.each(&:stop!) }
+      end
+      private
+      # Runs code only once and never again
+      # @param args [Object] anything we want to use as a set of unique keys for given execution
+      def once(*args)
+        return if @once_executions.include?(args)
+        @once_executions << args
+        yield
+      end
+    end
+  end
+end

data/lib/karafka/connection/messages_buffer.rb CHANGED Viewed

@@ -67,6 +67,18 @@ module Karafka
         end
       end
+      # Checks if there are any messages from a given topic partition in the buffer
+      # @param topic [String] topic name
+      # @param partition [Integer] partition number
+      # @return [Boolean] true if there is at least one message from this topic partition,
+      #   otherwise false
+      def present?(topic, partition)
+        return false unless @groups.include?(topic)
+        return false unless @groups[topic].include?(partition)
+        true
+      end
       # @return [Boolean] is the buffer empty or does it contain any messages
       def empty?
         @size.zero?

data/lib/karafka/connection/proxy.rb CHANGED Viewed

@@ -68,6 +68,23 @@ module Karafka
         end
       end
+      # Similar to `#query_watermark_offsets`.
+      #
+      # @param tpl [Rdkafka::Consumer::TopicPartitionList, nil] tpl or nil for full current
+      #   assignment tpl usage
+      # @return [Rdkafka::Consumer::TopicPartitionList] tpl with committed offsets and metadata
+      def committed(tpl = nil)
+        c_config = @config.committed
+        with_broker_errors_retry(
+          # required to be in seconds, not ms
+          wait_time: c_config.wait_time / 1_000.to_f,
+          max_attempts: c_config.max_attempts
+        ) do
+          @wrapped.committed(tpl, c_config.timeout)
+        end
+      end
       private
       # Runs expected block of code with few retries on all_brokers_down

data/lib/karafka/connection/status.rb ADDED Viewed

@@ -0,0 +1,75 @@
+# frozen_string_literal: true
+module Karafka
+  # Namespace for Kafka connection related logic
+  module Connection
+    # Listener connection status representation
+    class Status
+      # Available states and their transitions.
+      STATES = {
+        pending: :pending!,
+        starting: :start!,
+        running: :running!,
+        quieting: :quiet!,
+        quiet: :quieted!,
+        stopping: :stop!,
+        stopped: :stopped!
+      }.freeze
+      STATES.each do |state, transition|
+        class_eval <<~RUBY, __FILE__, __LINE__ + 1
+          # Moves status to a different state
+          def #{transition}
+            @mutex.synchronize do
+              # Do not allow reverse state transitions (we always go one way) or transition to the
+              # same state as currently
+              return if @status && STATES.keys.index(:#{state}) <= STATES.keys.index(@status)
+              @status = :#{state}
+              @conductor.signal
+            end
+          end
+          # @return [Boolean] are we in a given state
+          def #{state}?
+            @status == :#{state}
+          end
+        RUBY
+      end
+      def initialize
+        @mutex = Mutex.new
+        @conductor = Karafka::App.config.internal.connection.conductor
+        pending!
+      end
+      # If this listener was not even running, will just move it through states until final.
+      # If it was running, will start the stopping procedures.
+      # Will do nothing if it was already stopped
+      def stop!
+        if pending?
+          @status = :stopping
+          stopped!
+        elsif stopped?
+          nil
+        else
+          @status = :stopping
+        end
+      end
+      # Moves status back from stopped to pending (and only that). We should not be able to reset
+      # listeners that are not stopped
+      def reset!
+        return unless stopped?
+        @status = :pending
+      end
+      # @return [Boolean] listener is considered active when it has a client reference that may
+      #   be active and connected to Kafka
+      def active?
+        !pending? && !stopped?
+      end
+    end
+  end
+end

data/lib/karafka/contracts/config.rb CHANGED Viewed

@@ -51,17 +51,21 @@ module Karafka
         required(:tick_interval) { |val| val.is_a?(Integer) && val >= 1_000 }
         nested(:connection) do
-          nested(:proxy) do
-            nested(:query_watermark_offsets) do
-              required(:timeout) { |val| val.is_a?(Integer) && val.positive? }
-              required(:max_attempts) { |val| val.is_a?(Integer) && val.positive? }
-              required(:wait_time) { |val| val.is_a?(Integer) && val.positive? }
-            end
+          required(:manager) { |val| !val.nil? }
+          required(:conductor) { |val| !val.nil? }
-            nested(:offsets_for_times) do
-              required(:timeout) { |val| val.is_a?(Integer) && val.positive? }
-              required(:max_attempts) { |val| val.is_a?(Integer) && val.positive? }
-              required(:wait_time) { |val| val.is_a?(Integer) && val.positive? }
+          nested(:proxy) do
+            # All of them have the same requirements
+            %i[
+              query_watermark_offsets
+              offsets_for_times
+              committed
+            ].each do |scope|
+              nested(scope) do
+                required(:timeout) { |val| val.is_a?(Integer) && val.positive? }
+                required(:max_attempts) { |val| val.is_a?(Integer) && val.positive? }
+                required(:wait_time) { |val| val.is_a?(Integer) && val.positive? }
+              end
             end
           end
         end

data/lib/karafka/contracts/consumer_group.rb CHANGED Viewed

@@ -18,7 +18,7 @@ module Karafka
       virtual do |data, errors|
         next unless errors.empty?
-        names = data.fetch(:topics).map { |topic| topic[:name] }
+        names = data.fetch(:topics).map { |topic| topic_unique_key(topic) }
         next if names.size == names.uniq.size
@@ -51,6 +51,14 @@ module Karafka
         [[%i[topics], :topics_namespaced_names_not_unique]]
       end
+      class << self
+        # @param topic [Hash] topic config hash
+        # @return [String] topic unique key for validators
+        def topic_unique_key(topic)
+          topic[:name]
+        end
+      end
     end
   end
 end

data/lib/karafka/contracts/topic.rb CHANGED Viewed

@@ -20,7 +20,9 @@ module Karafka
       required(:max_wait_time) { |val| val.is_a?(Integer) && val >= 10 }
       required(:name) { |val| val.is_a?(String) && Contracts::TOPIC_REGEXP.match?(val) }
       required(:active) { |val| [true, false].include?(val) }
-      required(:subscription_group_name) { |val| val.is_a?(String) && !val.empty? }
+      nested(:subscription_group_details) do
+        required(:name) { |val| val.is_a?(String) && !val.empty? }
+      end
       # Consumer needs to be present only if topic is active
       # We allow not to define consumer for non-active because they may be only used via admin