RubyGems - karafka - Versions diffs - 2.2.14 → 2.3.0.alpha2 - Mend

karafka 2.2.14 → 2.3.0.alpha2

Files changed (107) hide show

checksums.yaml +4 -4
checksums.yaml.gz.sig +0 -0
data/.github/workflows/ci.yml +38 -12
data/.ruby-version +1 -1
data/CHANGELOG.md +24 -0
data/Gemfile.lock +16 -16
data/README.md +0 -2
data/SECURITY.md +23 -0
data/bin/integrations +1 -1
data/config/locales/errors.yml +7 -1
data/config/locales/pro_errors.yml +22 -0
data/docker-compose.yml +1 -1
data/karafka.gemspec +2 -2
data/lib/karafka/admin/acl.rb +287 -0
data/lib/karafka/admin.rb +9 -13
data/lib/karafka/app.rb +5 -3
data/lib/karafka/base_consumer.rb +9 -1
data/lib/karafka/cli/base.rb +1 -1
data/lib/karafka/connection/client.rb +83 -76
data/lib/karafka/connection/conductor.rb +28 -0
data/lib/karafka/connection/listener.rb +159 -42
data/lib/karafka/connection/listeners_batch.rb +5 -11
data/lib/karafka/connection/manager.rb +72 -0
data/lib/karafka/connection/messages_buffer.rb +12 -0
data/lib/karafka/connection/proxy.rb +17 -0
data/lib/karafka/connection/status.rb +75 -0
data/lib/karafka/contracts/config.rb +14 -10
data/lib/karafka/contracts/consumer_group.rb +9 -1
data/lib/karafka/contracts/topic.rb +3 -1
data/lib/karafka/errors.rb +17 -0
data/lib/karafka/instrumentation/logger_listener.rb +3 -0
data/lib/karafka/instrumentation/notifications.rb +13 -5
data/lib/karafka/instrumentation/vendors/appsignal/metrics_listener.rb +31 -28
data/lib/karafka/instrumentation/vendors/datadog/logger_listener.rb +20 -1
data/lib/karafka/instrumentation/vendors/datadog/metrics_listener.rb +15 -12
data/lib/karafka/instrumentation/vendors/kubernetes/liveness_listener.rb +39 -36
data/lib/karafka/pro/base_consumer.rb +47 -0
data/lib/karafka/pro/connection/manager.rb +269 -0
data/lib/karafka/pro/connection/multiplexing/listener.rb +40 -0
data/lib/karafka/pro/iterator/tpl_builder.rb +1 -1
data/lib/karafka/pro/iterator.rb +1 -6
data/lib/karafka/pro/loader.rb +14 -0
data/lib/karafka/pro/processing/coordinator.rb +2 -1
data/lib/karafka/pro/processing/executor.rb +37 -0
data/lib/karafka/pro/processing/expansions_selector.rb +32 -0
data/lib/karafka/pro/processing/jobs/periodic.rb +41 -0
data/lib/karafka/pro/processing/jobs/periodic_non_blocking.rb +32 -0
data/lib/karafka/pro/processing/jobs_builder.rb +14 -3
data/lib/karafka/pro/processing/offset_metadata/consumer.rb +44 -0
data/lib/karafka/pro/processing/offset_metadata/fetcher.rb +131 -0
data/lib/karafka/pro/processing/offset_metadata/listener.rb +46 -0
data/lib/karafka/pro/processing/schedulers/base.rb +39 -23
data/lib/karafka/pro/processing/schedulers/default.rb +12 -14
data/lib/karafka/pro/processing/strategies/default.rb +154 -1
data/lib/karafka/pro/processing/strategies/dlq/default.rb +39 -0
data/lib/karafka/pro/processing/strategies/vp/default.rb +65 -25
data/lib/karafka/pro/processing/virtual_offset_manager.rb +41 -11
data/lib/karafka/pro/routing/features/long_running_job/topic.rb +2 -0
data/lib/karafka/pro/routing/features/multiplexing/config.rb +38 -0
data/lib/karafka/pro/routing/features/multiplexing/contracts/topic.rb +114 -0
data/lib/karafka/pro/routing/features/multiplexing/patches/contracts/consumer_group.rb +42 -0
data/lib/karafka/pro/routing/features/multiplexing/proxy.rb +38 -0
data/lib/karafka/pro/routing/features/multiplexing/subscription_group.rb +42 -0
data/lib/karafka/pro/routing/features/multiplexing/subscription_groups_builder.rb +40 -0
data/lib/karafka/pro/routing/features/multiplexing.rb +59 -0
data/lib/karafka/pro/routing/features/non_blocking_job/topic.rb +32 -0
data/lib/karafka/pro/routing/features/non_blocking_job.rb +37 -0
data/lib/karafka/pro/routing/features/offset_metadata/config.rb +33 -0
data/lib/karafka/pro/routing/features/offset_metadata/contracts/topic.rb +42 -0
data/lib/karafka/pro/routing/features/offset_metadata/topic.rb +65 -0
data/lib/karafka/pro/routing/features/offset_metadata.rb +40 -0
data/lib/karafka/pro/routing/features/patterns/contracts/consumer_group.rb +4 -0
data/lib/karafka/pro/routing/features/patterns/detector.rb +18 -10
data/lib/karafka/pro/routing/features/periodic_job/config.rb +37 -0
data/lib/karafka/pro/routing/features/periodic_job/contracts/topic.rb +44 -0
data/lib/karafka/pro/routing/features/periodic_job/topic.rb +94 -0
data/lib/karafka/pro/routing/features/periodic_job.rb +27 -0
data/lib/karafka/pro/routing/features/virtual_partitions/config.rb +1 -0
data/lib/karafka/pro/routing/features/virtual_partitions/contracts/topic.rb +1 -0
data/lib/karafka/pro/routing/features/virtual_partitions/topic.rb +7 -2
data/lib/karafka/process.rb +5 -3
data/lib/karafka/processing/coordinator.rb +5 -1
data/lib/karafka/processing/executor.rb +16 -10
data/lib/karafka/processing/executors_buffer.rb +19 -4
data/lib/karafka/processing/schedulers/default.rb +3 -2
data/lib/karafka/processing/strategies/default.rb +6 -0
data/lib/karafka/processing/strategies/dlq.rb +36 -0
data/lib/karafka/routing/builder.rb +12 -2
data/lib/karafka/routing/consumer_group.rb +5 -5
data/lib/karafka/routing/features/base.rb +44 -8
data/lib/karafka/routing/features/dead_letter_queue/config.rb +6 -1
data/lib/karafka/routing/features/dead_letter_queue/contracts/topic.rb +1 -0
data/lib/karafka/routing/features/dead_letter_queue/topic.rb +9 -2
data/lib/karafka/routing/subscription_group.rb +2 -2
data/lib/karafka/routing/subscription_groups_builder.rb +11 -2
data/lib/karafka/routing/topic.rb +8 -10
data/lib/karafka/runner.rb +13 -3
data/lib/karafka/server.rb +5 -9
data/lib/karafka/setup/config.rb +17 -0
data/lib/karafka/status.rb +23 -14
data/lib/karafka/templates/karafka.rb.erb +7 -0
data/lib/karafka/time_trackers/partition_usage.rb +56 -0
data/lib/karafka/version.rb +1 -1
data.tar.gz.sig +0 -0
metadata +42 -10
metadata.gz.sig +0 -0
data/lib/karafka/connection/consumer_group_coordinator.rb +0 -48

data/lib/karafka/connection/client.rb CHANGED Viewed

@@ -10,6 +10,10 @@ module Karafka
     class Client
       attr_reader :rebalance_manager
+      # @return [Karafka::Routing::SubscriptionGroup] subscription group to which this client
+      #   belongs to
+      attr_reader :subscription_group
       # @return [String] underlying consumer name
       # @note Consumer name may change in case we regenerate it
       attr_reader :name
@@ -20,16 +24,7 @@ module Karafka
       # How many times should we retry polling in case of a failure
       MAX_POLL_RETRIES = 20
-      # 1 minute of max wait for the first rebalance before a forceful attempt
-      # This applies only to a case when a short-lived Karafka instance with a client would be
-      # closed before first rebalance. Mitigates a librdkafka bug.
-      COOPERATIVE_STICKY_MAX_WAIT = 60_000
-      # We want to make sure we never close several clients in the same moment to prevent
-      # potential race conditions and other issues
-      SHUTDOWN_MUTEX = Mutex.new
-      private_constant :MAX_POLL_RETRIES, :SHUTDOWN_MUTEX, :COOPERATIVE_STICKY_MAX_WAIT
+      private_constant :MAX_POLL_RETRIES
       # Creates a new consumer instance.
       #
@@ -47,7 +42,6 @@ module Karafka
         @rebalance_manager = RebalanceManager.new(@subscription_group.id)
         @rebalance_callback = Instrumentation::Callbacks::Rebalance.new(@subscription_group)
         @events_poller = Helpers::IntervalRunner.new { events_poll }
-        @kafka = build_consumer
         # There are few operations that can happen in parallel from the listener threads as well
         # as from the workers. They are not fully thread-safe because they may be composed out of
         # few calls to Kafka or out of few internal state changes. That is why we mutex them.
@@ -122,13 +116,19 @@ module Karafka
       # Stores offset for a given partition of a given topic based on the provided message.
       #
       # @param message [Karafka::Messages::Message]
-      def store_offset(message)
-        internal_store_offset(message)
+      # @param offset_metadata [String, nil] offset storage metadata or nil if none
+      def store_offset(message, offset_metadata = nil)
+        internal_store_offset(message, offset_metadata)
       end
       # @return [Boolean] true if our current assignment has been lost involuntarily.
       def assignment_lost?
-        @kafka.assignment_lost?
+        kafka.assignment_lost?
+      end
+      # @return [Rdkafka::Consumer::TopicPartitionList] current active assignment
+      def assignment
+        kafka.assignment
       end
       # Commits the offset on a current consumer in a non-blocking or blocking way.
@@ -199,7 +199,7 @@ module Karafka
           @paused_tpls[topic][partition] = tpl
-          @kafka.pause(tpl)
+          kafka.pause(tpl)
           # If offset is not provided, will pause where it finished.
           # This makes librdkafka not purge buffers and can provide significant network savings
@@ -240,43 +240,23 @@ module Karafka
             partition: partition
           )
-          @kafka.resume(tpl)
+          kafka.resume(tpl)
         end
       end
       # Gracefully stops topic consumption.
-      #
-      # @note Stopping running consumers without a really important reason is not recommended
-      #   as until all the consumers are stopped, the server will keep running serving only
-      #   part of the messages
       def stop
-        # This ensures, that we do not stop the underlying client until it passes the first
-        # rebalance for cooperative-sticky. Otherwise librdkafka may crash
-        #
-        # We set a timeout just in case the rebalance would never happen or would last for an
-        # extensive time period.
-        #
-        # @see https://github.com/confluentinc/librdkafka/issues/4312
+        # In case of cooperative-sticky, there is a bug in librdkafka that may hang it.
+        # To mitigate it we first need to unsubscribe so we will not receive any assignments and
+        # only then we should be good to go.
+        # @see https://github.com/confluentinc/librdkafka/issues/4527
         if @subscription_group.kafka[:'partition.assignment.strategy'] == 'cooperative-sticky'
-          active_wait = false
-          (COOPERATIVE_STICKY_MAX_WAIT / 100).times do
-            # If we're past the first rebalance, no need to wait
-            if @rebalance_manager.active?
-              # We give it a a bit of time because librdkafka has a tendency to do some-post
-              # callback work that from its perspective is still under rebalance
-              sleep(5) if active_wait
-              break
-            end
-            active_wait = true
-            # poll to trigger potential rebalances that could occur during stopping and to trigger
-            # potential callbacks
-            poll(100)
+          unsubscribe
+          until assignment.empty?
             sleep(0.1)
+            ping
           end
         end
@@ -285,21 +265,23 @@ module Karafka
       # Marks given message as consumed.
       #
-      # @param [Karafka::Messages::Message] message that we want to mark as processed
+      # @param message [Karafka::Messages::Message] message that we want to mark as processed
+      # @param metadata [String, nil] offset storage metadata or nil if none
       # @return [Boolean] true if successful. False if we no longer own given partition
       # @note This method won't trigger automatic offsets commits, rather relying on the offset
       #   check-pointing trigger that happens with each batch processed. It will however check the
       #   `librdkafka` assignment ownership to increase accuracy for involuntary revocations.
-      def mark_as_consumed(message)
-        store_offset(message) && !assignment_lost?
+      def mark_as_consumed(message, metadata = nil)
+        store_offset(message, metadata) && !assignment_lost?
       end
       # Marks a given message as consumed and commits the offsets in a blocking way.
       #
-      # @param [Karafka::Messages::Message] message that we want to mark as processed
+      # @param message [Karafka::Messages::Message] message that we want to mark as processed
+      # @param metadata [String, nil] offset storage metadata or nil if none
       # @return [Boolean] true if successful. False if we no longer own given partition
-      def mark_as_consumed!(message)
-        return false unless mark_as_consumed(message)
+      def mark_as_consumed!(message, metadata = nil)
+        return false unless mark_as_consumed(message, metadata)
         commit_offsets!
       end
@@ -316,7 +298,6 @@ module Karafka
           @events_poller.reset
           @closed = false
           @paused_tpls.clear
-          @kafka = build_consumer
         end
       end
@@ -343,7 +324,27 @@ module Karafka
       # @note It is non-blocking when timeout 0 and will not wait if queue empty. It costs up to
       #   2ms when no callbacks are triggered.
       def events_poll(timeout = 0)
-        @kafka.events_poll(timeout)
+        kafka.events_poll(timeout)
+      end
+      # Returns pointer to the consumer group metadata. It is used only in the context of
+      # exactly-once-semantics in transactions, this is why it is never remapped to Ruby
+      # @return [FFI::Pointer]
+      def consumer_group_metadata_pointer
+        kafka.consumer_group_metadata_pointer
+      end
+      # Return the current committed offset per partition for this consumer group.
+      # The offset field of each requested partition will either be set to stored offset or to
+      # -1001 in case there was no stored offset for that partition.
+      #
+      # @param tpl [Rdkafka::Consumer::TopicPartitionList] for which we want to get committed
+      # @return [Rdkafka::Consumer::TopicPartitionList]
+      # @raise [Rdkafka::RdkafkaError] When getting the committed positions fails.
+      # @note It is recommended to use this only on rebalances to get positions with metadata
+      #   when working with metadata as this is synchronous
+      def committed(tpl = nil)
+        Proxy.new(kafka).committed(tpl)
       end
       private
@@ -352,9 +353,10 @@ module Karafka
       #
       # Non thread-safe offset storing method
       # @param message [Karafka::Messages::Message]
+      # @param metadata [String, nil] offset storage metadata or nil if none
       # @return [Boolean] true if we could store the offset (if we still own the partition)
-      def internal_store_offset(message)
-        @kafka.store_offset(message)
+      def internal_store_offset(message, metadata)
+        kafka.store_offset(message, metadata)
         true
       rescue Rdkafka::RdkafkaError => e
         return false if e.code == :assignment_lost
@@ -370,7 +372,7 @@ module Karafka
       #   even when no stored, because with sync commit, it refreshes the ownership state of the
       #   consumer in a sync way.
       def internal_commit_offsets(async: true)
-        @kafka.commit(nil, async)
+        kafka.commit(nil, async)
         true
       rescue Rdkafka::RdkafkaError => e
@@ -407,7 +409,7 @@ module Karafka
             message.partition => message.offset
           )
-          proxy = Proxy.new(@kafka)
+          proxy = Proxy.new(kafka)
           # Now we can overwrite the seek message offset with our resolved offset and we can
           # then seek to the appropriate message
@@ -429,29 +431,29 @@ module Karafka
         # seeking and pausing
         return if message.offset == topic_partition_position(message.topic, message.partition)
-        @kafka.seek(message)
+        kafka.seek(message)
       end
       # Commits the stored offsets in a sync way and closes the consumer.
       def close
-        # Allow only one client to be closed at the same time
-        SHUTDOWN_MUTEX.synchronize do
-          # Once client is closed, we should not close it again
-          # This could only happen in case of a race-condition when forceful shutdown happens
-          # and triggers this from a different thread
-          return if @closed
+        # Once client is closed, we should not close it again
+        # This could only happen in case of a race-condition when forceful shutdown happens
+        # and triggers this from a different thread
+        return if @closed
-          @closed = true
+        @closed = true
-          # Remove callbacks runners that were registered
-          ::Karafka::Core::Instrumentation.statistics_callbacks.delete(@subscription_group.id)
-          ::Karafka::Core::Instrumentation.error_callbacks.delete(@subscription_group.id)
+        return unless @kafka
-          @kafka.close
-          @buffer.clear
-          # @note We do not clear rebalance manager here as we may still have revocation info
-          # here that we want to consider valid prior to running another reconnection
-        end
+        # Remove callbacks runners that were registered
+        ::Karafka::Core::Instrumentation.statistics_callbacks.delete(@subscription_group.id)
+        ::Karafka::Core::Instrumentation.error_callbacks.delete(@subscription_group.id)
+        kafka.close
+        @kafka = nil
+        @buffer.clear
+        # @note We do not clear rebalance manager here as we may still have revocation info
+        # here that we want to consider valid prior to running another reconnection
       end
       # Unsubscribes from all the subscriptions
@@ -459,7 +461,7 @@ module Karafka
       # @note We do not re-raise since this is supposed to be only used on close and can be safely
       #   ignored. We do however want to instrument on it
       def unsubscribe
-        @kafka.unsubscribe
+        kafka.unsubscribe
       rescue ::Rdkafka::RdkafkaError => e
         Karafka.monitor.instrument(
           'error.occurred',
@@ -473,7 +475,7 @@ module Karafka
       # @param partition [Integer]
       # @return [Rdkafka::Consumer::TopicPartitionList]
       def topic_partition_list(topic, partition)
-        rdkafka_partition = @kafka
+        rdkafka_partition = kafka
                             .assignment
                             .to_h[topic]
                             &.detect { |part| part.partition == partition }
@@ -492,7 +494,7 @@ module Karafka
         rd_partition = ::Rdkafka::Consumer::Partition.new(partition, nil, 0)
         tpl = ::Rdkafka::Consumer::TopicPartitionList.new(topic => [rd_partition])
-        @kafka.position(tpl).to_h.fetch(topic).first.offset || -1
+        kafka.position(tpl).to_h.fetch(topic).first.offset || -1
       end
       # Performs a single poll operation and handles retries and errors
@@ -520,7 +522,7 @@ module Karafka
         # blocking events from being handled.
         poll_tick = timeout > @tick_interval ? @tick_interval : timeout
-        result = @kafka.poll(poll_tick)
+        result = kafka.poll(poll_tick)
         # If we've got a message, we can return it
         return result if result
@@ -647,6 +649,11 @@ module Karafka
         @buffer.uniq!
       end
+      # @return [Rdkafka::Consumer] librdkafka consumer instance
+      def kafka
+        @kafka ||= build_consumer
+      end
     end
   end
 end

data/lib/karafka/connection/conductor.rb ADDED Viewed

@@ -0,0 +1,28 @@
+# frozen_string_literal: true
+module Karafka
+  module Connection
+    # Conductor is responsible for time orchestration of listeners manager.
+    # It blocks when manager is not needed as there were no state changes that could cause any
+    # listeners config changes and unblocks when things change or when certain time passed.
+    # The time based unblocking allows for building of complex managers that could be state aware
+    class Conductor
+      # @param max_interval [Integer] after how many milliseconds of doing nothing should we wake
+      #   up the manager despite no state changes
+      def initialize(max_interval = 30_000)
+        @lock = RUBY_VERSION < '3.2' ? Processing::TimedQueue.new : Queue.new
+        @timeout = max_interval / 1_000.0
+      end
+      # Waits in a blocking way until it is time to manage listeners
+      def wait
+        @lock.pop(timeout: @timeout)
+      end
+      # Releases wait lock on state change
+      def signal
+        @lock << true
+      end
+    end
+  end
+end