RubyGems - karafka - Versions diffs - 2.2.13 → 2.3.0.alpha1 - Mend

karafka 2.2.13 → 2.3.0.alpha1

Files changed (125) hide show

checksums.yaml +4 -4
checksums.yaml.gz.sig +0 -0
data/.github/workflows/ci.yml +38 -12
data/.ruby-version +1 -1
data/CHANGELOG.md +161 -125
data/Gemfile.lock +12 -12
data/README.md +0 -2
data/SECURITY.md +23 -0
data/config/locales/errors.yml +7 -1
data/config/locales/pro_errors.yml +22 -0
data/docker-compose.yml +3 -1
data/karafka.gemspec +2 -2
data/lib/karafka/admin/acl.rb +287 -0
data/lib/karafka/admin.rb +118 -16
data/lib/karafka/app.rb +12 -3
data/lib/karafka/base_consumer.rb +32 -31
data/lib/karafka/cli/base.rb +1 -1
data/lib/karafka/connection/client.rb +94 -84
data/lib/karafka/connection/conductor.rb +28 -0
data/lib/karafka/connection/listener.rb +165 -46
data/lib/karafka/connection/listeners_batch.rb +5 -11
data/lib/karafka/connection/manager.rb +72 -0
data/lib/karafka/connection/messages_buffer.rb +12 -0
data/lib/karafka/connection/proxy.rb +17 -0
data/lib/karafka/connection/status.rb +75 -0
data/lib/karafka/contracts/config.rb +14 -10
data/lib/karafka/contracts/consumer_group.rb +9 -1
data/lib/karafka/contracts/topic.rb +3 -1
data/lib/karafka/errors.rb +13 -0
data/lib/karafka/instrumentation/assignments_tracker.rb +96 -0
data/lib/karafka/instrumentation/callbacks/rebalance.rb +10 -7
data/lib/karafka/instrumentation/logger_listener.rb +3 -9
data/lib/karafka/instrumentation/notifications.rb +19 -9
data/lib/karafka/instrumentation/vendors/appsignal/metrics_listener.rb +31 -28
data/lib/karafka/instrumentation/vendors/datadog/logger_listener.rb +22 -3
data/lib/karafka/instrumentation/vendors/datadog/metrics_listener.rb +15 -12
data/lib/karafka/instrumentation/vendors/kubernetes/liveness_listener.rb +39 -36
data/lib/karafka/pro/base_consumer.rb +47 -0
data/lib/karafka/pro/connection/manager.rb +300 -0
data/lib/karafka/pro/connection/multiplexing/listener.rb +40 -0
data/lib/karafka/pro/instrumentation/performance_tracker.rb +85 -0
data/lib/karafka/pro/iterator/tpl_builder.rb +1 -1
data/lib/karafka/pro/iterator.rb +1 -6
data/lib/karafka/pro/loader.rb +16 -2
data/lib/karafka/pro/processing/coordinator.rb +2 -1
data/lib/karafka/pro/processing/executor.rb +37 -0
data/lib/karafka/pro/processing/expansions_selector.rb +32 -0
data/lib/karafka/pro/processing/jobs/periodic.rb +41 -0
data/lib/karafka/pro/processing/jobs/periodic_non_blocking.rb +32 -0
data/lib/karafka/pro/processing/jobs_builder.rb +14 -3
data/lib/karafka/pro/processing/offset_metadata/consumer.rb +44 -0
data/lib/karafka/pro/processing/offset_metadata/fetcher.rb +131 -0
data/lib/karafka/pro/processing/offset_metadata/listener.rb +46 -0
data/lib/karafka/pro/processing/schedulers/base.rb +143 -0
data/lib/karafka/pro/processing/schedulers/default.rb +107 -0
data/lib/karafka/pro/processing/strategies/aj/lrj_mom_vp.rb +1 -1
data/lib/karafka/pro/processing/strategies/default.rb +136 -3
data/lib/karafka/pro/processing/strategies/dlq/default.rb +35 -0
data/lib/karafka/pro/processing/strategies/lrj/default.rb +1 -1
data/lib/karafka/pro/processing/strategies/lrj/mom.rb +1 -1
data/lib/karafka/pro/processing/strategies/vp/default.rb +60 -26
data/lib/karafka/pro/processing/virtual_offset_manager.rb +41 -11
data/lib/karafka/pro/routing/features/long_running_job/topic.rb +2 -0
data/lib/karafka/pro/routing/features/multiplexing/config.rb +38 -0
data/lib/karafka/pro/routing/features/multiplexing/contracts/topic.rb +114 -0
data/lib/karafka/pro/routing/features/multiplexing/patches/contracts/consumer_group.rb +42 -0
data/lib/karafka/pro/routing/features/multiplexing/proxy.rb +38 -0
data/lib/karafka/pro/routing/features/multiplexing/subscription_group.rb +42 -0
data/lib/karafka/pro/routing/features/multiplexing/subscription_groups_builder.rb +40 -0
data/lib/karafka/pro/routing/features/multiplexing.rb +59 -0
data/lib/karafka/pro/routing/features/non_blocking_job/topic.rb +32 -0
data/lib/karafka/pro/routing/features/non_blocking_job.rb +37 -0
data/lib/karafka/pro/routing/features/offset_metadata/config.rb +33 -0
data/lib/karafka/pro/routing/features/offset_metadata/contracts/topic.rb +42 -0
data/lib/karafka/pro/routing/features/offset_metadata/topic.rb +65 -0
data/lib/karafka/pro/routing/features/offset_metadata.rb +40 -0
data/lib/karafka/pro/routing/features/patterns/contracts/consumer_group.rb +4 -0
data/lib/karafka/pro/routing/features/patterns/detector.rb +18 -10
data/lib/karafka/pro/routing/features/periodic_job/config.rb +37 -0
data/lib/karafka/pro/routing/features/periodic_job/contracts/topic.rb +44 -0
data/lib/karafka/pro/routing/features/periodic_job/topic.rb +94 -0
data/lib/karafka/pro/routing/features/periodic_job.rb +27 -0
data/lib/karafka/pro/routing/features/virtual_partitions/config.rb +1 -0
data/lib/karafka/pro/routing/features/virtual_partitions/contracts/topic.rb +1 -0
data/lib/karafka/pro/routing/features/virtual_partitions/topic.rb +7 -2
data/lib/karafka/process.rb +5 -3
data/lib/karafka/processing/coordinator.rb +5 -1
data/lib/karafka/processing/executor.rb +43 -13
data/lib/karafka/processing/executors_buffer.rb +22 -7
data/lib/karafka/processing/jobs/base.rb +19 -2
data/lib/karafka/processing/jobs/consume.rb +3 -3
data/lib/karafka/processing/jobs/idle.rb +5 -0
data/lib/karafka/processing/jobs/revoked.rb +5 -0
data/lib/karafka/processing/jobs/shutdown.rb +5 -0
data/lib/karafka/processing/jobs_queue.rb +19 -8
data/lib/karafka/processing/schedulers/default.rb +42 -0
data/lib/karafka/processing/strategies/base.rb +13 -4
data/lib/karafka/processing/strategies/default.rb +23 -7
data/lib/karafka/processing/strategies/dlq.rb +36 -0
data/lib/karafka/processing/worker.rb +4 -1
data/lib/karafka/routing/builder.rb +12 -2
data/lib/karafka/routing/consumer_group.rb +5 -5
data/lib/karafka/routing/features/base.rb +44 -8
data/lib/karafka/routing/features/dead_letter_queue/config.rb +6 -1
data/lib/karafka/routing/features/dead_letter_queue/contracts/topic.rb +1 -0
data/lib/karafka/routing/features/dead_letter_queue/topic.rb +9 -2
data/lib/karafka/routing/proxy.rb +4 -3
data/lib/karafka/routing/subscription_group.rb +2 -2
data/lib/karafka/routing/subscription_groups_builder.rb +11 -2
data/lib/karafka/routing/topic.rb +8 -10
data/lib/karafka/routing/topics.rb +1 -1
data/lib/karafka/runner.rb +13 -3
data/lib/karafka/server.rb +5 -9
data/lib/karafka/setup/config.rb +21 -1
data/lib/karafka/status.rb +23 -14
data/lib/karafka/templates/karafka.rb.erb +7 -0
data/lib/karafka/time_trackers/partition_usage.rb +56 -0
data/lib/karafka/version.rb +1 -1
data.tar.gz.sig +0 -0
metadata +47 -13
metadata.gz.sig +0 -0
data/lib/karafka/connection/consumer_group_coordinator.rb +0 -48
data/lib/karafka/pro/performance_tracker.rb +0 -84
data/lib/karafka/pro/processing/scheduler.rb +0 -74
data/lib/karafka/processing/scheduler.rb +0 -38

data/lib/karafka/pro/processing/jobs_builder.rb CHANGED Viewed

@@ -16,12 +16,12 @@ module Karafka
     module Processing
       # Pro jobs builder that supports lrj
       class JobsBuilder < ::Karafka::Processing::JobsBuilder
-        # @param executor [Karafka::Processing::Executor]
+        # @param executor [Karafka::Pro::Processing::Executor]
         def idle(executor)
           Karafka::Processing::Jobs::Idle.new(executor)
         end
-        # @param executor [Karafka::Processing::Executor]
+        # @param executor [Karafka::Pro::Processing::Executor]
         # @param messages [Karafka::Messages::Messages] messages batch to be consumed
         # @return [Karafka::Processing::Jobs::Consume] blocking job
         # @return [Karafka::Pro::Processing::Jobs::ConsumeNonBlocking] non blocking for lrj
@@ -33,7 +33,7 @@ module Karafka
           end
         end
-        # @param executor [Karafka::Processing::Executor]
+        # @param executor [Karafka::Pro::Processing::Executor]
         # @return [Karafka::Processing::Jobs::Revoked] revocation job for non LRJ
         # @return [Karafka::Processing::Jobs::RevokedNonBlocking] revocation job that is
         #   non-blocking, so when revocation job is scheduled for LRJ it also will not block
@@ -44,6 +44,17 @@ module Karafka
             super
           end
         end
+        # @param executor [Karafka::Pro::Processing::Executor]
+        # @return [Jobs::Periodic] Periodic job
+        # @return [Jobs::PeriodicNonBlocking] Periodic non-blocking job
+        def periodic(executor)
+          if executor.topic.long_running_job?
+            Jobs::PeriodicNonBlocking.new(executor)
+          else
+            Jobs::Periodic.new(executor)
+          end
+        end
       end
     end
   end

data/lib/karafka/pro/processing/offset_metadata/consumer.rb ADDED Viewed

@@ -0,0 +1,44 @@
+# frozen_string_literal: true
+# This Karafka component is a Pro component under a commercial license.
+# This Karafka component is NOT licensed under LGPL.
+#
+# All of the commercial components are present in the lib/karafka/pro directory of this
+# repository and their usage requires commercial license agreement.
+#
+# Karafka has also commercial-friendly license, commercial support and commercial components.
+#
+# By sending a pull request to the pro components, you are agreeing to transfer the copyright of
+# your code to Maciej Mensfeld.
+module Karafka
+  module Pro
+    module Processing
+      # Offset Metadata support on the processing side
+      module OffsetMetadata
+        # Extra API methods for offset metadata fetching
+        # @note Part of this feature API is embedded directly into the strategies because it alters
+        #   how marking methods (`#mark_as_consumed` and `#mark_as_consumed!`) operate. Because
+        #   of that, they had to be embedded into the strategies.
+        module Consumer
+          # @param cache [Boolean] should we use cached result if present (true by default)
+          # @return [false, Object] false in case we do not own the partition anymore or
+          #   deserialized metadata based on the deserializer
+          # @note Caching is on as the assumption here is, that most of the time user will be
+          #   interested only in the offset metadata that "came" from the time prior to the
+          #   rebalance. That is because the rest of the metadata (current) is created and
+          #   controlled by the user himself, thus there is no need to retrieve it. In case this
+          #   is not true and user wants to always get the Kafka metadata, `cache` value of this
+          #   feature can be set to false.
+          def offset_metadata(cache: true)
+            return false if revoked?
+            Fetcher.find(topic, partition, cache: cache)
+          end
+          alias committed_offset_metadata offset_metadata
+        end
+      end
+    end
+  end
+end

data/lib/karafka/pro/processing/offset_metadata/fetcher.rb ADDED Viewed

@@ -0,0 +1,131 @@
+# frozen_string_literal: true
+# This Karafka component is a Pro component under a commercial license.
+# This Karafka component is NOT licensed under LGPL.
+#
+# All of the commercial components are present in the lib/karafka/pro directory of this
+# repository and their usage requires commercial license agreement.
+#
+# Karafka has also commercial-friendly license, commercial support and commercial components.
+#
+# By sending a pull request to the pro components, you are agreeing to transfer the copyright of
+# your code to Maciej Mensfeld.
+module Karafka
+  module Pro
+    module Processing
+      # Offset Metadata support on the processing side
+      module OffsetMetadata
+        # This fetcher is responsible for fetching and caching committed offsets metadata
+        # information.
+        #
+        # By design we fetch all information for a requested topic assignments. Not all topics from
+        # the same subscription group may need metadata and even if, we can run the few smaller
+        # queries. This approach prevents us from querying all assigned topics data in one go
+        # preventing excessive queries.
+        #
+        # Since the assumption is, that user will not have to reach out for the later metadata
+        # since it is produced in the context of a given consumer assignment, we can cache the
+        # initial result and only allow users for explicit invalidation.
+        class Fetcher
+          include Singleton
+          class << self
+            extend Forwardable
+            def_delegators :instance, :register, :clear, :find
+          end
+          def initialize
+            @mutexes = {}
+            @clients = {}
+            @tpls = {}
+          end
+          # Registers a client of a given subscription group, so we can use it for queries later on
+          # @param client [Karafka::Connection::Client]
+          # @note Since we store the client reference and not the underlying rdkafka consumer
+          #   instance, we do not have to deal with the recovery as it is abstracted away
+          def register(client)
+            @clients[client.subscription_group] = client
+            # We use one mutex per SG because independent SGs can query in parallel
+            @mutexes[client.subscription_group] = Mutex.new
+            @tpls[client.subscription_group] = {}
+          end
+          # Queries or retrieves from cache the given offset metadata for the selected partition
+          #
+          # @param topic [Karafka::Routing::Topic] routing topic with subscription group reference
+          # @param partition [Integer] partition for which we want to get stored offset metadata
+          # @param cache [Boolean] forces explicit query to Kafka when false and cache refresh.
+          #   By default we use the setting from the topic level but this can be overwritten on
+          #   a per request basis if needed.
+          # @return [Object, false] deserialized metadata (string deserializer by default) or
+          #   false in case we were not able to obtain the details because we have lost the
+          #   assignment
+          def find(topic, partition, cache: true)
+            cache = topic.offset_metadata.cache? && cache
+            tpls = fetch(topic, cache)
+            return false unless tpls
+            t_partitions = tpls.fetch(topic.name, [])
+            t_partition = t_partitions.find { |t_p| t_p.partition == partition }
+            # If we do not have given topic partition here, it means it is no longer part of our
+            # assignment and we should return false
+            return false unless t_partition
+            topic.offset_metadata.deserializer.call(t_partition.metadata)
+          end
+          # Clears cache of a given subscription group. It is triggered on assignment changes.
+          #
+          # @param subscription_group [Karafka::Routing::SubscriptionGroup] subscription group that
+          #   we want to clear.
+          def clear(subscription_group)
+            @mutexes.fetch(subscription_group).synchronize do
+              @tpls[subscription_group].clear
+            end
+          end
+          private
+          # Fetches from Kafka all committed offsets for the given topic partitions that are
+          # assigned to this process.
+          #
+          # We fetch all because in majority of the cases, the behavior of the end user code is
+          #   not specific to a given partition both same for all. In such cases we save on
+          #   querying as we get all data for all partitions in one go.
+          #
+          # @param topic [Karafka::Routing::Topic] topic for which we want to fetch tpls data
+          # @param cache [Boolean] should we return cached data if present
+          def fetch(topic, cache)
+            subscription_group = topic.subscription_group
+            t_tpls = @tpls.fetch(subscription_group, false)
+            t_tpl = t_tpls[topic]
+            return t_tpl if t_tpl && cache
+            assigned_tpls = @clients.fetch(subscription_group).assignment
+            t_tpl = assigned_tpls.to_h.fetch(topic.name, false)
+            # May be false in case we lost given assignment but still run LRJ
+            return false unless t_tpl
+            return false if t_tpl.empty?
+            @mutexes.fetch(subscription_group).synchronize do
+              rd_tpl = Rdkafka::Consumer::TopicPartitionList.new(topic.name => t_tpl)
+              # While in theory we could lost assignment while being here, this will work and will
+              # return us proper tpl, we do not deal with this case on this layer and report anyhow
+              # There will not be any exception and this will operate correctly
+              t_tpls[topic] = @clients.fetch(subscription_group).committed(rd_tpl).to_h
+            end
+          end
+        end
+      end
+    end
+  end
+end

data/lib/karafka/pro/processing/offset_metadata/listener.rb ADDED Viewed

@@ -0,0 +1,46 @@
+# frozen_string_literal: true
+# This Karafka component is a Pro component under a commercial license.
+# This Karafka component is NOT licensed under LGPL.
+#
+# All of the commercial components are present in the lib/karafka/pro directory of this
+# repository and their usage requires commercial license agreement.
+#
+# Karafka has also commercial-friendly license, commercial support and commercial components.
+#
+# By sending a pull request to the pro components, you are agreeing to transfer the copyright of
+# your code to Maciej Mensfeld.
+module Karafka
+  module Pro
+    module Processing
+      module OffsetMetadata
+        # Keeps track of rebalances and updates the fetcher
+        # Since we cache the tpls with metadata, we need to invalidate them on events that would
+        # cause changes in the assignments
+        class Listener
+          # When we start listening we need to register this client in the metadata fetcher, so
+          # we have the client related to a given subscription group that we can use in fetcher
+          # since fetcher may be used in filtering API and other places outside of the standard
+          # consumer flow
+          # @param event [Karafka::Core::Monitoring::Event]
+          def on_connection_listener_before_fetch_loop(event)
+            Fetcher.register event[:client]
+          end
+          # Invalidates internal cache when assignments change so we can get correct metadata
+          # @param event [Karafka::Core::Monitoring::Event]
+          def on_rebalance_partitions_assigned(event)
+            Fetcher.clear event[:subscription_group]
+          end
+          # Invalidates internal cache when assignments change so we can get correct metadata
+          # @param event [Karafka::Core::Monitoring::Event]
+          def on_rebalance_partitions_revoked(event)
+            Fetcher.clear event[:subscription_group]
+          end
+        end
+      end
+    end
+  end
+end

data/lib/karafka/pro/processing/schedulers/base.rb ADDED Viewed

@@ -0,0 +1,143 @@
+# frozen_string_literal: true
+# This Karafka component is a Pro component under a commercial license.
+# This Karafka component is NOT licensed under LGPL.
+#
+# All of the commercial components are present in the lib/karafka/pro directory of this
+# repository and their usage requires commercial license agreement.
+#
+# Karafka has also commercial-friendly license, commercial support and commercial components.
+#
+# By sending a pull request to the pro components, you are agreeing to transfer the copyright of
+# your code to Maciej Mensfeld.
+module Karafka
+  module Pro
+    module Processing
+      # Namespace for Pro schedulers related components
+      module Schedulers
+        # Base for all the Pro custom schedulers
+        #
+        # It wraps the Scheduler API with mutex to ensure, that during scheduling we do not start
+        # scheduling other work that could impact the decision making in between multiple
+        # subscription groups running in separate threads.
+        #
+        # @note All the `on_` methods can be redefined with a non-thread-safe versions without
+        #   locks if needed, however when doing so, ensure that your scheduler is stateless.
+        class Base
+          # @param queue [Karafka::Processing::JobsQueue] queue where we want to put the jobs
+          def initialize(queue)
+            @queue = queue
+            @mutex = Mutex.new
+          end
+          # Schedules any jobs provided in a fifo order
+          # @param jobs_array [Array<Karafka::Processing::Jobs::Base>]
+          def schedule_fifo(jobs_array)
+            jobs_array.each do |job|
+              @queue << job
+            end
+          end
+          # Runs the consumption jobs scheduling flow under a mutex
+          #
+          # @param jobs_array
+          #   [Array<Karafka::Processing::Jobs::Consume, Processing::Jobs::ConsumeNonBlocking>]
+          #   jobs for scheduling
+          def on_schedule_consumption(jobs_array)
+            @mutex.synchronize do
+              schedule_consumption(jobs_array)
+            end
+          end
+          # Should schedule the consumption jobs
+          #
+          # @param _jobs_array
+          #   [Array<Karafka::Processing::Jobs::Consume, Processing::Jobs::ConsumeNonBlocking>]
+          #   jobs for scheduling
+          def schedule_consumption(_jobs_array)
+            raise NotImplementedError, 'Implement in a subclass'
+          end
+          # Runs the revocation jobs scheduling flow under a mutex
+          #
+          # @param jobs_array
+          #   [Array<Karafka::Processing::Jobs::Revoked, Processing::Jobs::RevokedNonBlocking>]
+          #   jobs for scheduling
+          def on_schedule_revocation(jobs_array)
+            @mutex.synchronize do
+              schedule_revocation(jobs_array)
+            end
+          end
+          # Runs the shutdown jobs scheduling flow under a mutex
+          #
+          # @param jobs_array [Array<Karafka::Processing::Jobs::Shutdown>] jobs for scheduling
+          def on_schedule_shutdown(jobs_array)
+            @mutex.synchronize do
+              schedule_shutdown(jobs_array)
+            end
+          end
+          # Runs the idle jobs scheduling flow under a mutex
+          #
+          # @param jobs_array [Array<Karafka::Processing::Jobs::Idle>] jobs for scheduling
+          def on_schedule_idle(jobs_array)
+            @mutex.synchronize do
+              schedule_idle(jobs_array)
+            end
+          end
+          # Runs the periodic jobs scheduling flow under a mutex
+          #
+          # @param jobs_array
+          #   [Array<Processing::Jobs::Periodic, Processing::Jobs::PeriodicNonBlocking>]
+          #   jobs for scheduling
+          def on_schedule_periodic(jobs_array)
+            @mutex.synchronize do
+              schedule_periodic(jobs_array)
+            end
+          end
+          # Schedule by default all except consumption as fifo
+          alias schedule_revocation schedule_fifo
+          alias schedule_shutdown schedule_fifo
+          alias schedule_idle schedule_fifo
+          alias schedule_periodic schedule_fifo
+          # Runs the manage tick under mutex
+          def on_manage
+            @mutex.synchronize { manage }
+          end
+          # Should manage scheduling on jobs state changes
+          #
+          # By default does nothing as default schedulers are stateless
+          def manage
+            nil
+          end
+          # Runs clearing under mutex
+          #
+          # @param group_id [String] Subscription group id
+          def on_clear(group_id)
+            @mutex.synchronize { clear(group_id) }
+          end
+          # By default schedulers are stateless, so nothing to clear.
+          #
+          # @param _group_id [String] Subscription group id
+          def clear(_group_id)
+            nil
+          end
+          private
+          # @return [Karafka::Processing::JobsQueue] jobs queue reference for internal usage
+          #   inside of the scheduler
+          attr_reader :queue
+        end
+      end
+    end
+  end
+end

data/lib/karafka/pro/processing/schedulers/default.rb ADDED Viewed

@@ -0,0 +1,107 @@
+# frozen_string_literal: true
+# This Karafka component is a Pro component under a commercial license.
+# This Karafka component is NOT licensed under LGPL.
+#
+# All of the commercial components are present in the lib/karafka/pro directory of this
+# repository and their usage requires commercial license agreement.
+#
+# Karafka has also commercial-friendly license, commercial support and commercial components.
+#
+# By sending a pull request to the pro components, you are agreeing to transfer the copyright of
+# your code to Maciej Mensfeld.
+module Karafka
+  module Pro
+    module Processing
+      # Namespace for Pro schedulers
+      module Schedulers
+        # Optimizes scheduler that takes into consideration of execution time needed to process
+        # messages from given topics partitions. It uses the non-preemptive LJF algorithm
+        #
+        # This scheduler is designed to optimize execution times on jobs that perform IO operations
+        # as when taking IO into consideration, the can achieve optimized parallel processing.
+        #
+        # This scheduler can also work with virtual partitions.
+        #
+        # Aside from consumption jobs, other jobs do not run often, thus we can leave them with
+        # default FIFO scheduler from the default Karafka scheduler
+        #
+        # @note This is a stateless scheduler, thus we can override the `#on_` API.
+        class Default < Base
+          # Schedules jobs in the LJF order for consumption
+          #
+          # @param jobs_array
+          #   [Array<Karafka::Processing::Jobs::Consume, Processing::Jobs::ConsumeNonBlocking>]
+          #   jobs for scheduling
+          def on_schedule_consumption(jobs_array)
+            perf_tracker = Instrumentation::PerformanceTracker.instance
+            ordered = []
+            jobs_array.each do |job|
+              ordered << [
+                job,
+                processing_cost(perf_tracker, job)
+              ]
+            end
+            ordered.sort_by!(&:last)
+            ordered.reverse!
+            ordered.map!(&:first)
+            ordered.each do |job|
+              @queue << job
+            end
+          end
+          # Schedules any jobs provided in a fifo order
+          # @param jobs_array [Array<Karafka::Processing::Jobs::Base>]
+          def schedule_fifo(jobs_array)
+            jobs_array.each do |job|
+              @queue << job
+            end
+          end
+          # By default all non-consumption work is scheduled in a fifo order
+          alias on_schedule_revocation schedule_fifo
+          alias on_schedule_shutdown schedule_fifo
+          alias on_schedule_idle schedule_fifo
+          alias on_schedule_periodic schedule_fifo
+          # This scheduler does not have anything to manage as it is a pass through and has no
+          # state
+          def on_manage
+            nil
+          end
+          # This scheduler does not need to be cleared because it is stateless
+          #
+          # @param _group_id [String] Subscription group id
+          def on_clear(_group_id)
+            nil
+          end
+          private
+          # @param perf_tracker [PerformanceTracker]
+          # @param job [Karafka::Processing::Jobs::Consume] job we will be processing
+          # @return [Numeric] estimated cost of processing this job
+          def processing_cost(perf_tracker, job)
+            if job.is_a?(::Karafka::Processing::Jobs::Consume)
+              messages = job.messages
+              message = messages.first
+              perf_tracker.processing_time_p95(message.topic, message.partition) * messages.size
+            else
+              # LJF will set first the most expensive, but we want to run the zero cost jobs
+              # related to the lifecycle always first. That is why we "emulate" that they
+              # the longest possible jobs that anyone can run
+              Float::INFINITY
+            end
+          end
+        end
+      end
+    end
+  end
+end

data/lib/karafka/pro/processing/strategies/aj/lrj_mom_vp.rb CHANGED Viewed

@@ -33,7 +33,7 @@ module Karafka
             ].freeze
             # No actions needed for the standard flow here
-            def handle_before_enqueue
+            def handle_before_schedule_consume
               super
               coordinator.on_enqueued do

data/lib/karafka/pro/processing/strategies/default.rb CHANGED Viewed

@@ -27,9 +27,127 @@ module Karafka
           # Apply strategy for a non-feature based flow
           FEATURES = %i[].freeze
+          # Marks message as consumed in an async way.
+          #
+          # @param message [Messages::Message] last successfully processed message.
+          # @param offset_metadata [String, nil] offset metadata string or nil if nothing
+          # @return [Boolean] true if we were able to mark the offset, false otherwise.
+          #   False indicates that we were not able and that we have lost the partition.
+          #
+          # @note We keep track of this offset in case we would mark as consumed and got error when
+          #   processing another message. In case like this we do not pause on the message we've
+          #   already processed but rather at the next one. This applies to both sync and async
+          #   versions of this method.
+          def mark_as_consumed(message, offset_metadata = nil)
+            if @_in_transaction
+              mark_in_transaction(message, offset_metadata, true)
+            else
+              # seek offset can be nil only in case `#seek` was invoked with offset reset request
+              # In case like this we ignore marking
+              return true if coordinator.seek_offset.nil?
+              # Ignore earlier offsets than the one we already committed
+              return true if coordinator.seek_offset > message.offset
+              return false if revoked?
+              return revoked? unless client.mark_as_consumed(message, offset_metadata)
+              coordinator.seek_offset = message.offset + 1
+            end
+            true
+          end
+          # Marks message as consumed in a sync way.
+          #
+          # @param message [Messages::Message] last successfully processed message.
+          # @param offset_metadata [String, nil] offset metadata string or nil if nothing
+          # @return [Boolean] true if we were able to mark the offset, false otherwise.
+          #   False indicates that we were not able and that we have lost the partition.
+          def mark_as_consumed!(message, offset_metadata = nil)
+            if @_in_transaction
+              mark_in_transaction(message, offset_metadata, false)
+            else
+              # seek offset can be nil only in case `#seek` was invoked with offset reset request
+              # In case like this we ignore marking
+              return true if coordinator.seek_offset.nil?
+              # Ignore earlier offsets than the one we already committed
+              return true if coordinator.seek_offset > message.offset
+              return false if revoked?
+              return revoked? unless client.mark_as_consumed!(message, offset_metadata)
+              coordinator.seek_offset = message.offset + 1
+            end
+            true
+          end
+          # Starts producer transaction, saves the transaction context for transactional marking
+          # and runs user code in this context
+          #
+          # Transactions on a consumer level differ from those initiated by the producer as they
+          # allow to mark offsets inside of the transaction. If the transaction is initialized
+          # only from the consumer, the offset will be stored in a regular fashion.
+          #
+          # @param block [Proc] code that we want to run in a transaction
+          def transaction(&block)
+            transaction_started = false
+            # Prevent from nested transactions. It would not make any sense
+            raise Errors::TransactionAlreadyInitializedError if @_in_transaction
+            transaction_started = true
+            @_transaction_marked = []
+            @_in_transaction = true
+            producer.transaction(&block)
+            @_in_transaction = false
+            # This offset is already stored in transaction but we set it here anyhow because we
+            # want to make sure our internal in-memory state is aligned with the transaction
+            #
+            # @note We never need to use the blocking `#mark_as_consumed!` here because the offset
+            #   anyhow was already stored during the transaction
+            #
+            # @note In theory we could only keep reference to the most recent marking and reject
+            #   others. We however do not do it for two reasons:
+            #   - User may have non standard flow relying on some alternative order and we want to
+            #     mimic this
+            #   - Complex strategies like VPs can use this in VPs to mark in parallel without
+            #     having to redefine the transactional flow completely
+            @_transaction_marked.each do |marking|
+              marking.pop ? mark_as_consumed(*marking) : mark_as_consumed!(*marking)
+            end
+          ensure
+            if transaction_started
+              @_transaction_marked.clear
+              @_in_transaction = false
+            end
+          end
+          # Stores the next offset for processing inside of the transaction and stores it in a
+          # local accumulator for post-transaction status update
+          #
+          # @param message [Messages::Message] message we want to commit inside of a transaction
+          # @param offset_metadata [String, nil] offset metadata or nil if none
+          # @param async [Boolean] should we mark in async or sync way (applicable only to post
+          #   transaction state synchronization usage as within transaction it is always sync)
+          def mark_in_transaction(message, offset_metadata, async)
+            raise Errors::TransactionRequiredError unless @_in_transaction
+            producer.transaction_mark_as_consumed(
+              client,
+              message,
+              offset_metadata
+            )
+            @_transaction_marked ||= []
+            @_transaction_marked << [message, offset_metadata, async]
+          end
           # No actions needed for the standard flow here
-          def handle_before_enqueue
-            Karafka.monitor.instrument('consumer.before_enqueue', caller: self)
+          def handle_before_schedule_consume
+            Karafka.monitor.instrument('consumer.before_schedule_consume', caller: self)
             nil
           end
@@ -87,7 +205,7 @@ module Karafka
             end
           end
-          # Standard
+          # Standard flow for revocation
           def handle_revoked
             coordinator.on_revoked do
               resume
@@ -100,6 +218,21 @@ module Karafka
               revoked
             end
           end
+          # No action needed for the tick standard flow
+          def handle_before_schedule_tick
+            Karafka.monitor.instrument('consumer.before_schedule_tick', caller: self)
+            nil
+          end
+          # Runs the consumer `#tick` method with reporting
+          def handle_tick
+            Karafka.monitor.instrument('consumer.tick', caller: self)
+            Karafka.monitor.instrument('consumer.ticked', caller: self) do
+              tick
+            end
+          end
         end
       end
     end