RubyGems - karafka - Versions diffs - 2.4.18 → 2.5.0.beta2 - Mend

karafka 2.4.18 → 2.5.0.beta2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (143) hide show

checksums.yaml +4 -4
data/.github/CODEOWNERS +3 -0
data/.github/workflows/ci.yml +58 -14
data/.github/workflows/push.yml +36 -0
data/.github/workflows/verify-action-pins.yml +16 -0
data/.ruby-version +1 -1
data/CHANGELOG.md +60 -0
data/Gemfile +2 -2
data/Gemfile.lock +69 -50
data/LICENSE-COMM +2 -2
data/README.md +1 -1
data/Rakefile +4 -0
data/bin/clean_kafka +43 -0
data/bin/integrations +19 -6
data/bin/rspecs +15 -3
data/bin/verify_kafka_warnings +35 -0
data/bin/verify_topics_naming +27 -0
data/config/locales/errors.yml +3 -0
data/config/locales/pro_errors.yml +13 -2
data/docker-compose.yml +1 -1
data/examples/payloads/json/enrollment_event.json +579 -0
data/examples/payloads/json/ingestion_event.json +30 -0
data/examples/payloads/json/transaction_event.json +17 -0
data/examples/payloads/json/user_event.json +11 -0
data/karafka.gemspec +3 -8
data/lib/karafka/active_job/current_attributes.rb +1 -1
data/lib/karafka/admin/acl.rb +5 -1
data/lib/karafka/admin/configs.rb +5 -1
data/lib/karafka/admin.rb +69 -34
data/lib/karafka/base_consumer.rb +17 -8
data/lib/karafka/cli/base.rb +8 -2
data/lib/karafka/cli/topics/align.rb +7 -4
data/lib/karafka/cli/topics/base.rb +17 -0
data/lib/karafka/cli/topics/create.rb +9 -7
data/lib/karafka/cli/topics/delete.rb +4 -2
data/lib/karafka/cli/topics/help.rb +39 -0
data/lib/karafka/cli/topics/repartition.rb +4 -2
data/lib/karafka/cli/topics.rb +10 -3
data/lib/karafka/cli.rb +2 -0
data/lib/karafka/connection/client.rb +30 -9
data/lib/karafka/connection/listener.rb +24 -12
data/lib/karafka/connection/messages_buffer.rb +1 -1
data/lib/karafka/connection/proxy.rb +3 -0
data/lib/karafka/constraints.rb +3 -3
data/lib/karafka/contracts/config.rb +3 -0
data/lib/karafka/contracts/topic.rb +1 -1
data/lib/karafka/errors.rb +46 -2
data/lib/karafka/helpers/async.rb +3 -1
data/lib/karafka/instrumentation/callbacks/rebalance.rb +5 -1
data/lib/karafka/instrumentation/logger_listener.rb +86 -23
data/lib/karafka/instrumentation/proctitle_listener.rb +5 -1
data/lib/karafka/instrumentation/vendors/datadog/metrics_listener.rb +2 -2
data/lib/karafka/messages/builders/batch_metadata.rb +1 -1
data/lib/karafka/pro/cleaner.rb +8 -0
data/lib/karafka/pro/cli/parallel_segments/base.rb +89 -0
data/lib/karafka/pro/cli/parallel_segments/collapse.rb +164 -0
data/lib/karafka/pro/cli/parallel_segments/distribute.rb +164 -0
data/lib/karafka/pro/cli/parallel_segments.rb +60 -0
data/lib/karafka/pro/connection/manager.rb +5 -8
data/lib/karafka/pro/encryption.rb +8 -0
data/lib/karafka/pro/instrumentation/performance_tracker.rb +1 -1
data/lib/karafka/pro/iterator/expander.rb +5 -3
data/lib/karafka/pro/iterator/tpl_builder.rb +23 -0
data/lib/karafka/pro/loader.rb +10 -0
data/lib/karafka/pro/processing/coordinator.rb +4 -1
data/lib/karafka/pro/processing/coordinators/errors_tracker.rb +27 -3
data/lib/karafka/pro/processing/coordinators/filters_applier.rb +11 -0
data/lib/karafka/pro/processing/filters/base.rb +10 -2
data/lib/karafka/pro/processing/filters/expirer.rb +5 -0
data/lib/karafka/pro/processing/filters/inline_insights_delayer.rb +2 -2
data/lib/karafka/pro/processing/filters/virtual_limiter.rb +5 -0
data/lib/karafka/pro/processing/parallel_segments/filters/base.rb +73 -0
data/lib/karafka/pro/processing/parallel_segments/filters/default.rb +85 -0
data/lib/karafka/pro/processing/parallel_segments/filters/mom.rb +66 -0
data/lib/karafka/pro/processing/partitioner.rb +1 -13
data/lib/karafka/pro/processing/piping/consumer.rb +13 -13
data/lib/karafka/pro/processing/strategies/aj/dlq_ftr_lrj_mom.rb +1 -1
data/lib/karafka/pro/processing/strategies/aj/dlq_ftr_lrj_mom_vp.rb +1 -1
data/lib/karafka/pro/processing/strategies/aj/dlq_lrj_mom.rb +1 -1
data/lib/karafka/pro/processing/strategies/aj/dlq_lrj_mom_vp.rb +1 -1
data/lib/karafka/pro/processing/strategies/aj/ftr_lrj_mom_vp.rb +1 -1
data/lib/karafka/pro/processing/strategies/aj/lrj_mom_vp.rb +1 -1
data/lib/karafka/pro/processing/strategies/default.rb +36 -8
data/lib/karafka/pro/processing/strategies/dlq/default.rb +14 -10
data/lib/karafka/pro/processing/strategies/dlq/ftr_lrj.rb +1 -1
data/lib/karafka/pro/processing/strategies/dlq/ftr_lrj_mom.rb +1 -1
data/lib/karafka/pro/processing/strategies/dlq/lrj.rb +3 -1
data/lib/karafka/pro/processing/strategies/dlq/lrj_mom.rb +1 -1
data/lib/karafka/pro/processing/strategies/ftr/default.rb +1 -1
data/lib/karafka/pro/processing/strategies/lrj/default.rb +4 -1
data/lib/karafka/pro/processing/strategies/lrj/ftr.rb +1 -1
data/lib/karafka/pro/processing/strategies/lrj/ftr_mom.rb +1 -1
data/lib/karafka/pro/processing/strategies/lrj/mom.rb +1 -1
data/lib/karafka/pro/processing/virtual_partitions/distributors/balanced.rb +50 -0
data/lib/karafka/pro/processing/virtual_partitions/distributors/base.rb +29 -0
data/lib/karafka/pro/processing/virtual_partitions/distributors/consistent.rb +27 -0
data/lib/karafka/pro/recurring_tasks/contracts/config.rb +8 -4
data/lib/karafka/pro/recurring_tasks/dispatcher.rb +3 -3
data/lib/karafka/pro/recurring_tasks/setup/config.rb +7 -2
data/lib/karafka/pro/recurring_tasks.rb +13 -0
data/lib/karafka/pro/routing/features/dead_letter_queue/topic.rb +1 -1
data/lib/karafka/pro/routing/features/multiplexing/config.rb +1 -0
data/lib/karafka/pro/routing/features/multiplexing/contracts/topic.rb +17 -0
data/lib/karafka/pro/routing/features/multiplexing/proxy.rb +5 -2
data/lib/karafka/pro/routing/features/multiplexing/subscription_group.rb +8 -1
data/lib/karafka/pro/routing/features/parallel_segments/builder.rb +47 -0
data/lib/karafka/pro/routing/features/parallel_segments/config.rb +27 -0
data/lib/karafka/pro/routing/features/parallel_segments/consumer_group.rb +83 -0
data/lib/karafka/pro/routing/features/parallel_segments/contracts/consumer_group.rb +49 -0
data/lib/karafka/pro/routing/features/parallel_segments/topic.rb +43 -0
data/lib/karafka/pro/routing/features/parallel_segments.rb +24 -0
data/lib/karafka/pro/routing/features/patterns/pattern.rb +1 -1
data/lib/karafka/pro/routing/features/recurring_tasks/builder.rb +2 -2
data/lib/karafka/pro/routing/features/scheduled_messages/builder.rb +10 -6
data/lib/karafka/pro/routing/features/virtual_partitions/config.rb +20 -2
data/lib/karafka/pro/routing/features/virtual_partitions/contracts/topic.rb +1 -0
data/lib/karafka/pro/routing/features/virtual_partitions/topic.rb +8 -2
data/lib/karafka/pro/scheduled_messages/consumer.rb +19 -21
data/lib/karafka/pro/scheduled_messages/daily_buffer.rb +9 -6
data/lib/karafka/pro/scheduled_messages/deserializers/headers.rb +7 -1
data/lib/karafka/pro/scheduled_messages/max_epoch.rb +15 -6
data/lib/karafka/pro/scheduled_messages.rb +13 -0
data/lib/karafka/processing/coordinators_buffer.rb +1 -0
data/lib/karafka/processing/strategies/default.rb +4 -4
data/lib/karafka/routing/features/dead_letter_queue/contracts/topic.rb +1 -0
data/lib/karafka/routing/subscription_group.rb +1 -1
data/lib/karafka/runner.rb +7 -1
data/lib/karafka/server.rb +19 -19
data/lib/karafka/setup/attributes_map.rb +2 -0
data/lib/karafka/setup/config.rb +22 -1
data/lib/karafka/setup/defaults_injector.rb +26 -1
data/lib/karafka/status.rb +6 -1
data/lib/karafka/swarm/node.rb +31 -0
data/lib/karafka/swarm/supervisor.rb +4 -0
data/lib/karafka/templates/karafka.rb.erb +14 -1
data/lib/karafka/version.rb +1 -1
data/lib/karafka.rb +17 -9
data/renovate.json +14 -2
metadata +40 -40
checksums.yaml.gz.sig +0 -0
data/certs/cert.pem +0 -26
data.tar.gz.sig +0 -0
metadata.gz.sig +0 -0

data/lib/karafka/pro/routing/features/virtual_partitions/config.rb CHANGED Viewed

@@ -8,15 +8,33 @@ module Karafka
     module Routing
       module Features
         class VirtualPartitions < Base
-          # Config for virtual partitions
+          # Configuration for virtual partitions feature
           Config = Struct.new(
             :active,
             :partitioner,
             :max_partitions,
             :offset_metadata_strategy,
             :reducer,
+            :distribution,
             keyword_init: true
-          ) { alias_method :active?, :active }
+          ) do
+            # @return [Boolean] is this feature active
+            def active?
+              active
+            end
+            # @return [Object] distributor instance for the current distribution
+            def distributor
+              @distributor ||= case distribution
+                               when :balanced
+                                 Processing::VirtualPartitions::Distributors::Balanced.new(self)
+                               when :consistent
+                                 Processing::VirtualPartitions::Distributors::Consistent.new(self)
+                               else
+                                 raise Karafka::Errors::UnsupportedCaseError, distribution
+                               end
+            end
+          end
         end
       end
     end

data/lib/karafka/pro/routing/features/virtual_partitions/contracts/topic.rb CHANGED Viewed

@@ -26,6 +26,7 @@ module Karafka
                 required(:reducer) { |val| val.respond_to?(:call) }
                 required(:max_partitions) { |val| val.is_a?(Integer) && val >= 1 }
                 required(:offset_metadata_strategy) { |val| %i[exact current].include?(val) }
+                required(:distribution) { |val| %i[consistent balanced].include?(val) }
               end
               # When virtual partitions are defined, partitioner needs to respond to `#call` and it

data/lib/karafka/pro/routing/features/virtual_partitions/topic.rb CHANGED Viewed

@@ -20,13 +20,18 @@ module Karafka
             #   the most recently reported metadata
             # @param reducer [nil, #call] reducer for VPs key. It allows for using a custom
             #   reducer to achieve enhanced parallelization when the default reducer is not enough.
+            # @param distribution [Symbol] the strategy to use for virtual partitioning. Can be
+            #   either `:consistent` or `:balanced`. The `:balanced` strategy ensures balanced
+            #   distribution of work across available workers while maintaining message order
+            #   within groups.
             # @return [VirtualPartitions] method that allows to set the virtual partitions details
             #   during the routing configuration and then allows to retrieve it
             def virtual_partitions(
               max_partitions: Karafka::App.config.concurrency,
               partitioner: nil,
               offset_metadata_strategy: :current,
-              reducer: nil
+              reducer: nil,
+              distribution: :consistent
             )
               @virtual_partitions ||= Config.new(
                 active: !partitioner.nil?,
@@ -35,7 +40,8 @@ module Karafka
                 offset_metadata_strategy: offset_metadata_strategy,
                 # If no reducer provided, we use this one. It just runs a modulo on the sum of
                 # a stringified version, providing fairly good distribution.
-                reducer: reducer || ->(virtual_key) { virtual_key.to_s.sum % max_partitions }
+                reducer: reducer || ->(virtual_key) { virtual_key.to_s.sum % max_partitions },
+                distribution: distribution
               )
             end

data/lib/karafka/pro/scheduled_messages/consumer.rb CHANGED Viewed

@@ -8,6 +8,10 @@ module Karafka
     module ScheduledMessages
       # Consumer that coordinates scheduling of messages when the time comes
       class Consumer < ::Karafka::BaseConsumer
+        include Helpers::ConfigImporter.new(
+          dispatcher_class: %i[scheduled_messages dispatcher_class]
+        )
         # Prepares the initial state of all stateful components
         def initialized
           clear!
@@ -52,6 +56,9 @@ module Karafka
           # If end of the partition is reached, it always means all data is loaded
           @state.loaded!
+          tags.add(:state, @state.to_s)
           @states_reporter.call
         end
@@ -64,7 +71,6 @@ module Karafka
           return unless @state.loaded?
           keys = []
-          epochs = []
           # We first collect all the data for dispatch and then dispatch and **only** after
           # dispatch that is sync is successful we remove those messages from the daily buffer
@@ -72,16 +78,13 @@ module Karafka
           # with timeouts, etc, we need to be sure it wen through prior to deleting those messages
           # from the daily buffer. That way we ensure the at least once delivery and in case of
           # a transactional producer, exactly once delivery.
-          @daily_buffer.for_dispatch do |epoch, message|
-            epochs << epoch
+          @daily_buffer.for_dispatch do |message|
             keys << message.key
             @dispatcher << message
           end
           @dispatcher.flush
-          @max_epoch.update(epochs.max)
           keys.each { |key| @daily_buffer.delete(key) }
           @states_reporter.call
@@ -93,14 +96,6 @@ module Karafka
         # accumulator and time related per-message operations.
         # @param message [Karafka::Messages::Message]
         def process_message(message)
-          # If we started to receive messages younger than the moment we created the consumer for
-          # the given day, it means we have loaded all the history and we are no longer in the
-          # loading phase.
-          if message.timestamp.to_i > @today.created_at
-            @state.loaded!
-            tags.add(:state, @state.to_s)
-          end
           # If this is a schedule message we need to check if this is for today. Tombstone events
           # are always considered immediate as they indicate, that a message with a given key
           # was already dispatched or that user decided not to dispatch and cancelled the dispatch
@@ -120,6 +115,14 @@ module Karafka
             end
           end
+          # Tombstone events are only published after we have dispatched given message. This means
+          # that we've got that far in the dispatching time. This allows us (with a certain buffer)
+          # to quickly reject older messages (older in sense of being scheduled for previous times)
+          # instead of loading them into memory until they are expired
+          if message.headers['schedule_source_type'] == 'tombstone'
+            @max_epoch.update(message.headers['schedule_target_epoch'])
+          end
           # Add to buffer all tombstones and messages for the same day
           @daily_buffer << message
         end
@@ -129,7 +132,7 @@ module Karafka
           # If this is a new assignment we always need to seek from beginning to load the data
           if @state.fresh?
             clear!
-            seek(0)
+            seek(:earliest)
             return true
           end
@@ -140,7 +143,7 @@ module Karafka
           # If day has ended we reload and start new day with new schedules
           if @today.ended?
             clear!
-            seek(0)
+            seek(:earliest)
             return true
           end
@@ -156,7 +159,7 @@ module Karafka
           @today = Day.new
           @tracker = Tracker.new
           @state = State.new(false)
-          @dispatcher = config.dispatcher_class.new(topic.name, partition)
+          @dispatcher = dispatcher_class.new(topic.name, partition)
           @states_reporter = Helpers::IntervalRunner.new do
             @tracker.today = @daily_buffer.size
             @tracker.state = @state.to_s
@@ -166,11 +169,6 @@ module Karafka
           tags.add(:state, @state.to_s)
         end
-        # @return [Karafka::Core::Configurable::Node] Schedules config node
-        def config
-          @config ||= Karafka::App.config.scheduled_messages
-        end
       end
     end
   end

data/lib/karafka/pro/scheduled_messages/daily_buffer.rb CHANGED Viewed

@@ -45,19 +45,22 @@ module Karafka
         # Yields messages that should be dispatched (sent) to Kafka
         #
-        # @yieldparam [Integer, Karafka::Messages::Message] epoch of the message and the message
-        #   itself
-        #
-        # @note We yield epoch alongside of the message so we do not have to extract it several
-        #   times later on. This simplifies the API
+        # @yieldparam [Karafka::Messages::Message] messages to be dispatched sorted from the once
+        #   that are the oldest (lowest epoch)
         def for_dispatch
           dispatch = Time.now.to_i
+          selected = []
           @accu.each_value do |epoch, message|
             next unless epoch <= dispatch
-            yield(epoch, message)
+            selected << [epoch, message]
           end
+          selected
+            .sort_by!(&:first)
+            .each { |_, message| yield(message) }
         end
         # Removes given key from the accumulator

data/lib/karafka/pro/scheduled_messages/deserializers/headers.rb CHANGED Viewed

@@ -10,6 +10,12 @@ module Karafka
       module Deserializers
         # Converts certain pieces of headers into their integer form for messages
         class Headers
+          # We only directly operate on epoch and other details for schedules and tombstones.
+          # cancel requests don't have to be deserialized that way since they don't have epoch
+          WORKABLE_TYPES = %w[schedule tombstone].freeze
+          private_constant :WORKABLE_TYPES
           # @param metadata [Karafka::aMessages::Metadata]
           # @return [Hash] headers
           def call(metadata)
@@ -19,7 +25,7 @@ module Karafka
             # tombstone and cancellation events are not operable, thus we do not have to cast any
             # of the headers pieces
-            return raw_headers unless type == 'schedule'
+            return raw_headers unless WORKABLE_TYPES.include?(type)
             headers = raw_headers.dup
             headers['schedule_target_epoch'] = headers['schedule_target_epoch'].to_i

data/lib/karafka/pro/scheduled_messages/max_epoch.rb CHANGED Viewed

@@ -10,22 +10,31 @@ module Karafka
       # until which messages were dispatched by us. This allows us to quickly skip those messages
       # during recovery, because we do know, they were dispatched.
       class MaxEpoch
+        # We always give a bit of a buffer when using the max dispatch epoch because while we
+        # are dispatching messages, we could also later receive data for time close to our
+        # dispatch times. This is why when reloading days we give ourselves one hour of a window
+        # that we will keep until tombstones expire them. This prevents edge cases race-conditions
+        # when multiple scheduled events scheduled close to each other would bump epoch in such a
+        # way, that it would end up ignoring certain events.
+        GRACE_PERIOD = 60 * 60
+        private_constant :GRACE_PERIOD
+        # @return [Integer] max epoch recorded
+        attr_reader :to_i
         def initialize
           @max = -1
+          @to_i = @max
         end
         # Updates epoch if bigger than current max
         # @param new_max [Integer] potential new max epoch
         def update(new_max)
-          return unless new_max
           return unless new_max > @max
           @max = new_max
-        end
-        # @return [Integer] max epoch recorded
-        def to_i
-          @max
+          @to_i = @max - GRACE_PERIOD
         end
       end
     end

data/lib/karafka/pro/scheduled_messages.rb CHANGED Viewed

@@ -53,6 +53,19 @@ module Karafka
         def post_setup(config)
           RecurringTasks::Contracts::Config.new.validate!(config.to_h)
         end
+        # Basically since we may have custom producers configured that are not the same as the
+        # default one, we hold a reference to old pre-fork producer. This means, that when we
+        # initialize it again in post-fork, as long as user uses defaults we should re-inherit
+        # it from the default config.
+        #
+        # @param config [Karafka::Core::Configurable::Node]
+        # @param pre_fork_producer [WaterDrop::Producer]
+        def post_fork(config, pre_fork_producer)
+          return unless config.scheduled_messages.producer == pre_fork_producer
+          config.scheduled_messages.producer = config.producer
+        end
       end
     end
   end

data/lib/karafka/processing/coordinators_buffer.rb CHANGED Viewed

@@ -22,6 +22,7 @@ module Karafka
       # @param topic_name [String] topic name
       # @param partition [Integer] partition number
+      # @return [Karafka::Processing::Coordinator] found or created coordinator
       def find_or_create(topic_name, partition)
         @coordinators[topic_name][partition] ||= begin
           routing_topic = @topics.find(topic_name)

data/lib/karafka/processing/strategies/default.rb CHANGED Viewed

@@ -55,8 +55,8 @@ module Karafka
           # seek offset can be nil only in case `#seek` was invoked with offset reset request
           # In case like this we ignore marking
           return true if seek_offset.nil?
-          # Ignore earlier offsets than the one we already committed
-          return true if seek_offset > message.offset
+          # Ignore double markings of the same offset
+          return true if (seek_offset - 1) == message.offset
           return false if revoked?
           return revoked? unless client.mark_as_consumed(message)
@@ -74,8 +74,8 @@ module Karafka
           # seek offset can be nil only in case `#seek` was invoked with offset reset request
           # In case like this we ignore marking
           return true if seek_offset.nil?
-          # Ignore earlier offsets than the one we already committed
-          return true if seek_offset > message.offset
+          # Ignore double markings of the same offset
+          return true if (seek_offset - 1) == message.offset
           return false if revoked?
           return revoked? unless client.mark_as_consumed!(message)

data/lib/karafka/routing/features/dead_letter_queue/contracts/topic.rb CHANGED Viewed

@@ -46,6 +46,7 @@ module Karafka
               # When topic is set to false, it means we just want to skip dispatch on DLQ
               next if topic == false
               next if topic.is_a?(String) && topic_regexp.match?(topic)
+              next if topic == :strategy
               [[%i[dead_letter_queue topic], :format]]
             end

data/lib/karafka/routing/subscription_group.rb CHANGED Viewed

@@ -30,7 +30,7 @@ module Karafka
             @group_counter ||= 0
             @group_counter += 1
-            ::Digest::MD5.hexdigest(
+            ::Digest::SHA256.hexdigest(
               @group_counter.to_s
             )[0..11]
           end

data/lib/karafka/runner.rb CHANGED Viewed

@@ -4,6 +4,7 @@ module Karafka
   # Class used to run the Karafka listeners in separate threads
   class Runner
     include Helpers::ConfigImporter.new(
+      worker_thread_priority: %i[worker_thread_priority],
       manager: %i[internal connection manager],
       conductor: %i[internal connection conductor],
       jobs_queue_class: %i[internal processing jobs_queue_class]
@@ -26,7 +27,12 @@ module Karafka
       # Register all the listeners so they can be started and managed
       manager.register(listeners)
-      workers.each_with_index { |worker, i| worker.async_call("karafka.worker##{i}") }
+      workers.each_with_index do |worker, i|
+        worker.async_call(
+          "karafka.worker##{i}",
+          worker_thread_priority
+        )
+      end
       # We aggregate threads here for a supervised shutdown process
       Karafka::Server.workers = workers

data/lib/karafka/server.rb CHANGED Viewed

@@ -9,6 +9,15 @@ module Karafka
     private_constant :FORCEFUL_SHUTDOWN_WAIT
+    extend Helpers::ConfigImporter.new(
+      cli_contract: %i[internal cli contract],
+      activity_manager: %i[internal routing activity_manager],
+      supervision_sleep: %i[internal supervision_sleep],
+      shutdown_timeout: %i[shutdown_timeout],
+      forceful_exit_code: %i[internal forceful_exit_code],
+      process: %i[internal process]
+    )
     class << self
       # Set of consuming threads. Each consumer thread contains a single consumer
       attr_accessor :listeners
@@ -30,6 +39,9 @@ module Karafka
       # as not everything is possible when operating in non-standalone mode, etc.
       attr_accessor :execution_mode
+      # id of the server. Useful for logging when we want to reference things issued by the server.
+      attr_accessor :id
       # Method which runs app
       def run
         self.listeners = []
@@ -39,9 +51,7 @@ module Karafka
         # embedded
         # We cannot validate this during the start because config needs to be populated and routes
         # need to be defined.
-        config.internal.cli.contract.validate!(
-          config.internal.routing.activity_manager.to_h
-        )
+        cli_contract.validate!(activity_manager.to_h)
         # We clear as we do not want parent handlers in case of working from fork
         process.clear
@@ -96,18 +106,18 @@ module Karafka
         Karafka::App.stop!
-        timeout = config.shutdown_timeout
+        timeout = shutdown_timeout
         # We check from time to time (for the timeout period) if all the threads finished
         # their work and if so, we can just return and normal shutdown process will take place
         # We divide it by 1000 because we use time in ms.
-        ((timeout / 1_000) * (1 / config.internal.supervision_sleep)).to_i.times do
+        ((timeout / 1_000) * (1 / supervision_sleep)).to_i.times do
           all_listeners_stopped = listeners.all?(&:stopped?)
           all_workers_stopped = workers.none?(&:alive?)
           return if all_listeners_stopped && all_workers_stopped
-          sleep(config.internal.supervision_sleep)
+          sleep(supervision_sleep)
         end
         raise Errors::ForcefulShutdownError
@@ -145,7 +155,7 @@ module Karafka
         return unless process.supervised?
         # exit! is not within the instrumentation as it would not trigger due to exit
-        Kernel.exit!(config.internal.forceful_exit_code)
+        Kernel.exit!(forceful_exit_code)
       ensure
         # We need to check if it wasn't an early exit to make sure that only on stop invocation
         # can change the status after everything is closed
@@ -169,23 +179,13 @@ module Karafka
         # in one direction
         Karafka::App.quiet!
       end
-      private
-      # @return [Karafka::Core::Configurable::Node] root config node
-      def config
-        Karafka::App.config
-      end
-      # @return [Karafka::Process] process wrapper instance used to catch system signal calls
-      def process
-        config.internal.process
-      end
     end
     # Always start with standalone so there always is a value for the execution mode.
     # This is overwritten quickly during boot, but just in case someone would reach it prior to
     # booting, we want to have the default value.
     self.execution_mode = :standalone
+    self.id = SecureRandom.hex(6)
   end
 end

data/lib/karafka/setup/attributes_map.rb CHANGED Viewed

@@ -73,6 +73,7 @@ module Karafka
         message.max.bytes
         metadata.broker.list
         metadata.max.age.ms
+        metadata.recovery.strategy
         oauthbearer_token_refresh_cb
         offset.store.method
         offset.store.path
@@ -207,6 +208,7 @@ module Karafka
         message.timeout.ms
         metadata.broker.list
         metadata.max.age.ms
+        metadata.recovery.strategy
         msg_order_cmp
         oauthbearer_token_refresh_cb
         opaque

data/lib/karafka/setup/config.rb CHANGED Viewed

@@ -73,6 +73,9 @@ module Karafka
       # Really useful when you want to ensure that all topics in routing are managed via
       # declaratives.
       setting :strict_declarative_topics, default: false
+      # Defaults to the CPU thread priority slice to -1 (50ms) to ensure that CPU intense
+      # processing does not affect other threads and prevents starvation
+      setting :worker_thread_priority, default: -1
       setting :oauth do
         # option [false, #call] Listener for using oauth bearer. This listener will be able to
@@ -133,6 +136,14 @@ module Karafka
         # How many times should be try. 1 000 ms x 60 => 60 seconds wait in total and then we give
         # up on pending operations
         setting :max_attempts, default: 60
+        # option poll_timeout [Integer] time in ms
+        # How long should a poll wait before yielding on no results (rdkafka-ruby setting)
+        # Lower value can be especially useful when working with Web UI, because it allows for
+        # increased responsiveness. Many admin operations do not take 100ms but they wait on poll
+        # until then prior to finishing, blocking the execution. Lowering to 25 ms can
+        # improve responsiveness of the Web UI. 50ms is a good trade-off for admin.
+        setting :poll_timeout, default: 50
       end
       # Namespace for internal settings that should not be modified directly
@@ -211,6 +222,10 @@ module Karafka
           # How long should we wait before a critical listener recovery
           # Too short may cause endless rebalance loops
           setting :reset_backoff, default: 60_000
+          # Similar to the `#worker_thread_priority`. Listener threads do not operate for long
+          # time and release GVL on polling but we provide this for API consistency and some
+          # special edge cases.
+          setting :listener_thread_priority, default: 0
           # Settings that are altered by our client proxy layer
           setting :proxy do
@@ -282,6 +297,9 @@ module Karafka
           setting :jobs_builder, default: Processing::JobsBuilder.new
           # option coordinator [Class] work coordinator we want to user for processing coordination
           setting :coordinator_class, default: Processing::Coordinator
+          # option errors_tracker_class [Class, nil] errors tracker that is used by the coordinator
+          #   for granular error tracking. `nil` for OSS as it is not in use.
+          setting :errors_tracker_class, default: nil
           # option partitioner_class [Class] partitioner we use against a batch of data
           setting :partitioner_class, default: Processing::Partitioner
           # option strategy_selector [Object] processing strategy selector to be used
@@ -367,7 +385,10 @@ module Karafka
           config.producer ||= ::WaterDrop::Producer.new do |producer_config|
             # In some cases WaterDrop updates the config and we don't want our consumer config to
             # be polluted by those updates, that's why we copy
-            producer_config.kafka = AttributesMap.producer(config.kafka.dup)
+            producer_kafka = AttributesMap.producer(config.kafka.dup)
+            # We inject some defaults (mostly for dev) unless user defined them
+            Setup::DefaultsInjector.producer(producer_kafka)
+            producer_config.kafka = producer_kafka
             # We also propagate same listener to the default producer to make sure, that the
             # listener for oauth is also automatically used by the producer. That way we don't
             # have to configure it manually for the default producer

data/lib/karafka/setup/defaults_injector.rb CHANGED Viewed

@@ -36,7 +36,17 @@ module Karafka
         'topic.metadata.refresh.interval.ms': 5_000
       }.freeze
-      private_constant :CONSUMER_KAFKA_DEFAULTS, :CONSUMER_KAFKA_DEV_DEFAULTS
+      # Contains settings that should not be used in production but make life easier in dev
+      # It is applied only to the default producer. If users setup their own producers, then
+      # they have to set this by themselves.
+      PRODUCER_KAFKA_DEV_DEFAULTS = {
+        # For all of those same reasoning as for the consumer
+        'allow.auto.create.topics': 'true',
+        'topic.metadata.refresh.interval.ms': 5_000
+      }.freeze
+      private_constant :CONSUMER_KAFKA_DEFAULTS, :CONSUMER_KAFKA_DEV_DEFAULTS,
+                       :PRODUCER_KAFKA_DEV_DEFAULTS
       class << self
         # Propagates the kafka setting defaults unless they are already present for consumer config
@@ -58,6 +68,21 @@ module Karafka
             kafka_config[key] = value
           end
         end
+        # Propagates the kafka settings defaults unless they are already present for producer
+        # config. This makes it easier to set some values that users usually don't change but still
+        # allows them to overwrite the whole hash.
+        #
+        # @param kafka_config [Hash] kafka scoped config
+        def producer(kafka_config)
+          return if Karafka::App.env.production?
+          PRODUCER_KAFKA_DEV_DEFAULTS.each do |key, value|
+            next if kafka_config.key?(key)
+            kafka_config[key] = value
+          end
+        end
       end
     end
   end

data/lib/karafka/status.rb CHANGED Viewed

@@ -66,7 +66,12 @@ module Karafka
             return if initializing?
             conductor.signal
-            monitor.instrument("app.#{state}", caller: self)
+            monitor.instrument(
+              "app.#{state}",
+              caller: self,
+              server_id: Karafka::Server.id
+            )
           end
         end
       RUBY

data/lib/karafka/swarm/node.rb CHANGED Viewed

@@ -27,6 +27,18 @@ module Karafka
       # @return [Integer] pid of the node
       attr_reader :pid
+      # When re-creating a producer in the fork, those are not attributes we want to inherit
+      # from the parent process because they are updated in the fork. If user wants to take those
+      # from the parent process, he should redefine them by overwriting the whole producer.
+      SKIPPABLE_NEW_PRODUCER_ATTRIBUTES = %i[
+        id
+        kafka
+        logger
+        oauth
+      ].freeze
+      private_constant :SKIPPABLE_NEW_PRODUCER_ATTRIBUTES
       # @param id [Integer] number of the fork. Used for uniqueness setup for group client ids and
       #   other stuff where we need to know a unique reference of the fork in regards to the rest
       #   of them.
@@ -52,15 +64,32 @@ module Karafka
           # an attempt to close it when finalized, meaning it would be kept in memory.
           config.producer.close
+          old_producer = config.producer
+          old_producer_config = old_producer.config
           # Supervisor producer is closed, hence we need a new one here
           config.producer = ::WaterDrop::Producer.new do |p_config|
             p_config.kafka = Setup::AttributesMap.producer(kafka.dup)
             p_config.logger = config.logger
+            old_producer_config.to_h.each do |key, value|
+              next if SKIPPABLE_NEW_PRODUCER_ATTRIBUTES.include?(key)
+              p_config.public_send("#{key}=", value)
+            end
+            # Namespaced attributes need to be migrated directly on their config node
+            old_producer_config.oauth.to_h.each do |key, value|
+              p_config.oauth.public_send("#{key}=", value)
+            end
           end
           @pid = ::Process.pid
           @reader.close
+          # Certain features need to be reconfigured / reinitialized after fork in Pro
+          Pro::Loader.post_fork(config, old_producer) if Karafka.pro?
           # Indicate we are alive right after start
           healthy
@@ -69,6 +98,8 @@ module Karafka
           monitor.instrument('swarm.node.after_fork', caller: self)
           Karafka::Process.tags.add(:execution_mode, 'mode:swarm')
+          Karafka::Process.tags.add(:swarm_nodeid, "node:#{@id}")
           Server.execution_mode = :swarm
           Server.run

data/lib/karafka/swarm/supervisor.rb CHANGED Viewed

@@ -50,6 +50,10 @@ module Karafka
         # producer (should not be initialized but just in case)
         Karafka.producer.close
+        # Ensure rdkafka stuff is loaded into memory pre-fork. This will ensure, that we save
+        # few MB on forking as this will be already in memory.
+        Rdkafka::Bindings.rd_kafka_global_init
         Karafka::App.warmup
         manager.start