RubyGems - dispatch_policy - Versions diffs - 0.2.0 → 0.3.0 - Mend

dispatch_policy 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (70) hide show

checksums.yaml +4 -4
data/MIT-LICENSE +16 -17
data/README.md +433 -388
data/app/assets/stylesheets/dispatch_policy/application.css +157 -0
data/app/controllers/dispatch_policy/application_controller.rb +45 -1
data/app/controllers/dispatch_policy/dashboard_controller.rb +91 -0
data/app/controllers/dispatch_policy/partitions_controller.rb +122 -0
data/app/controllers/dispatch_policy/policies_controller.rb +94 -267
data/app/controllers/dispatch_policy/staged_jobs_controller.rb +9 -0
data/app/models/dispatch_policy/adaptive_concurrency_stats.rb +11 -81
data/app/models/dispatch_policy/inflight_job.rb +12 -0
data/app/models/dispatch_policy/partition.rb +21 -0
data/app/models/dispatch_policy/staged_job.rb +4 -97
data/app/models/dispatch_policy/tick_sample.rb +11 -0
data/app/views/dispatch_policy/dashboard/index.html.erb +109 -0
data/app/views/dispatch_policy/partitions/index.html.erb +63 -0
data/app/views/dispatch_policy/partitions/show.html.erb +106 -0
data/app/views/dispatch_policy/policies/index.html.erb +15 -37
data/app/views/dispatch_policy/policies/show.html.erb +139 -223
data/app/views/dispatch_policy/shared/_capacity.html.erb +67 -0
data/app/views/dispatch_policy/shared/_hints.html.erb +13 -0
data/app/views/dispatch_policy/shared/_partition_row.html.erb +12 -0
data/app/views/dispatch_policy/staged_jobs/show.html.erb +31 -0
data/app/views/layouts/dispatch_policy/application.html.erb +95 -238
data/config/routes.rb +18 -2
data/db/migrate/20260501000001_create_dispatch_policy_tables.rb +103 -0
data/lib/dispatch_policy/bypass.rb +23 -0
data/lib/dispatch_policy/config.rb +85 -0
data/lib/dispatch_policy/context.rb +50 -0
data/lib/dispatch_policy/cursor_pagination.rb +121 -0
data/lib/dispatch_policy/decision.rb +22 -0
data/lib/dispatch_policy/engine.rb +4 -27
data/lib/dispatch_policy/forwarder.rb +63 -0
data/lib/dispatch_policy/gate.rb +10 -38
data/lib/dispatch_policy/gates/adaptive_concurrency.rb +99 -97
data/lib/dispatch_policy/gates/concurrency.rb +45 -26
data/lib/dispatch_policy/gates/throttle.rb +65 -41
data/lib/dispatch_policy/inflight_tracker.rb +174 -0
data/lib/dispatch_policy/job_extension.rb +155 -0
data/lib/dispatch_policy/operator_hints.rb +126 -0
data/lib/dispatch_policy/pipeline.rb +48 -0
data/lib/dispatch_policy/policy.rb +61 -59
data/lib/dispatch_policy/policy_dsl.rb +120 -0
data/lib/dispatch_policy/railtie.rb +35 -0
data/lib/dispatch_policy/registry.rb +46 -0
data/lib/dispatch_policy/repository.rb +723 -0
data/lib/dispatch_policy/serializer.rb +36 -0
data/lib/dispatch_policy/tick.rb +260 -256
data/lib/dispatch_policy/tick_loop.rb +59 -26
data/lib/dispatch_policy/version.rb +1 -1
data/lib/dispatch_policy.rb +71 -52
data/lib/generators/dispatch_policy/install/install_generator.rb +70 -0
data/lib/generators/dispatch_policy/install/templates/create_dispatch_policy_tables.rb.tt +95 -0
data/lib/generators/dispatch_policy/install/templates/dispatch_tick_loop_job.rb.tt +53 -0
data/lib/generators/dispatch_policy/install/templates/initializer.rb.tt +11 -0
metadata +101 -43
data/CHANGELOG.md +0 -43
data/app/models/dispatch_policy/partition_inflight_count.rb +0 -42
data/app/models/dispatch_policy/partition_observation.rb +0 -76
data/app/models/dispatch_policy/throttle_bucket.rb +0 -41
data/db/migrate/20260424000001_create_dispatch_policy_tables.rb +0 -80
data/db/migrate/20260424000002_create_adaptive_concurrency_stats.rb +0 -22
data/db/migrate/20260424000003_create_adaptive_concurrency_samples.rb +0 -25
data/db/migrate/20260424000004_rename_samples_to_partition_observations.rb +0 -32
data/db/migrate/20260425000001_add_duration_to_partition_observations.rb +0 -8
data/lib/dispatch_policy/active_job_perform_all_later_patch.rb +0 -32
data/lib/dispatch_policy/dispatch_context.rb +0 -53
data/lib/dispatch_policy/dispatchable.rb +0 -123
data/lib/dispatch_policy/gates/fair_interleave.rb +0 -32
data/lib/dispatch_policy/gates/global_cap.rb +0 -26

data/lib/dispatch_policy/cursor_pagination.rb ADDED Viewed

@@ -0,0 +1,121 @@
+# frozen_string_literal: true
+require "base64"
+require "json"
+module DispatchPolicy
+  # Tiny keyset-pagination helper for the engine UI. Each sort mode declares
+  # a single sortable column plus the row id as a deterministic tiebreaker
+  # so two rows can never share the same cursor. NULLable columns are
+  # coalesced to a sentinel ('1970-01-01' for timestamps) so the cursor
+  # clause stays a simple tuple comparison.
+  module CursorPagination
+    SENTINEL_TS = "1970-01-01 00:00:00".freeze
+    # name => { sql_order:, cursor_sql:, direction:, label: }
+    # cursor_sql is the expression to extract the sort key for a row
+    # (used both in ORDER BY and to build the cursor tuple).
+    SORTS = {
+      "pending" => {
+        sql_order:  "pending_count DESC, id ASC",
+        cursor_sql: "pending_count",
+        direction:  :desc,
+        label:      "pending desc"
+      },
+      "admitted" => {
+        sql_order:  "total_admitted DESC, id ASC",
+        cursor_sql: "total_admitted",
+        direction:  :desc,
+        label:      "lifetime admitted"
+      },
+      "stale" => {
+        sql_order:  "COALESCE(last_checked_at, TIMESTAMP '#{SENTINEL_TS}') ASC, id ASC",
+        cursor_sql: "COALESCE(last_checked_at, TIMESTAMP '#{SENTINEL_TS}')",
+        direction:  :asc,
+        label:      "stalest (round-trip)"
+      },
+      "recent" => {
+        sql_order:  "COALESCE(last_admit_at, TIMESTAMP '#{SENTINEL_TS}') DESC, id ASC",
+        cursor_sql: "COALESCE(last_admit_at, TIMESTAMP '#{SENTINEL_TS}')",
+        direction:  :desc,
+        label:      "recent admit"
+      },
+      "key" => {
+        sql_order:  "partition_key ASC, id ASC",
+        cursor_sql: "partition_key",
+        direction:  :asc,
+        label:      "partition key"
+      }
+    }.freeze
+    DEFAULT_SORT = "pending"
+    module_function
+    def sort_for(name)
+      SORTS[name] || SORTS.fetch(DEFAULT_SORT)
+    end
+    def encode(value, id)
+      Base64.urlsafe_encode64(JSON.dump([value, id]), padding: false)
+    end
+    def decode(cursor)
+      return nil if cursor.nil? || cursor.empty?
+      decoded = JSON.parse(Base64.urlsafe_decode64(cursor))
+      return nil unless decoded.is_a?(Array) && decoded.size == 2
+      decoded
+    rescue StandardError
+      nil
+    end
+    # Apply a cursor tuple (value, id) to an AR scope under the given sort.
+    # The tiebreaker on id is always ASC so id strictly advances forward.
+    def apply(scope, sort_name, cursor)
+      sort = sort_for(sort_name)
+      return scope if cursor.nil?
+      value, last_id = cursor
+      case sort[:direction]
+      when :desc
+        scope.where(
+          "(#{sort[:cursor_sql]} < ?) OR (#{sort[:cursor_sql]} = ? AND id > ?)",
+          value, value, last_id
+        )
+      when :asc
+        scope.where(
+          "(#{sort[:cursor_sql]} > ?) OR (#{sort[:cursor_sql]} = ? AND id > ?)",
+          value, value, last_id
+        )
+      end
+    end
+    # Read the cursor key from a row using the given sort. Returns the
+    # raw value the cursor was built from (for emitting to the next link).
+    def extract(row, sort_name)
+      sort = sort_for(sort_name)
+      column = sort[:cursor_sql]
+      # cursor_sql may include a COALESCE(...). For row-side extraction we
+      # mirror that with Ruby. The columns we coalesce are timestamps; we
+      # use Time.at(0) as the equivalent sentinel.
+      raw = case column
+            when "pending_count", "total_admitted", "partition_key"
+              row.send(column)
+            when /COALESCE\(last_checked_at,/
+              row.last_checked_at || Time.at(0)
+            when /COALESCE\(last_admit_at,/
+              row.last_admit_at   || Time.at(0)
+            end
+      [serialize_value(raw), row.id]
+    end
+    def serialize_value(v)
+      case v
+      when Time, ActiveSupport::TimeWithZone then v.utc.iso8601(6)
+      else v
+      end
+    end
+  end
+end

data/lib/dispatch_policy/decision.rb ADDED Viewed

@@ -0,0 +1,22 @@
+# frozen_string_literal: true
+module DispatchPolicy
+  class Decision
+    attr_reader :allowed, :retry_after, :gate_state_patch, :reason
+    def initialize(allowed:, retry_after: nil, gate_state_patch: nil, reason: nil)
+      @allowed           = allowed
+      @retry_after       = retry_after
+      @gate_state_patch  = gate_state_patch
+      @reason            = reason
+    end
+    def self.unlimited
+      new(allowed: Float::INFINITY)
+    end
+    def self.deny(retry_after: nil, reason: nil)
+      new(allowed: 0, retry_after: retry_after, reason: reason)
+    end
+  end
+end

data/lib/dispatch_policy/engine.rb CHANGED Viewed

@@ -3,34 +3,11 @@
 require "rails/engine"
 module DispatchPolicy
+  # Mounted by the host app. Views, controllers, and AR models live under
+  # `app/`; the layout inlines the engine CSS by reading
+  # `app/assets/stylesheets/dispatch_policy/application.css` at render time,
+  # so no asset pipeline integration is required.
   class Engine < ::Rails::Engine
     isolate_namespace DispatchPolicy
-    initializer "dispatch_policy.reference_gates" do
-      config.to_prepare do
-        # Reference the built-in gates so they register in Gate.registry.
-        DispatchPolicy::Gates::Concurrency
-        DispatchPolicy::Gates::Throttle
-        DispatchPolicy::Gates::GlobalCap
-        DispatchPolicy::Gates::FairInterleave
-        DispatchPolicy::Gates::AdaptiveConcurrency
-        DispatchPolicy::ActiveJobPerformAllLaterPatch
-      end
-    end
-    initializer "dispatch_policy.boot_prune", after: :load_config_initializers do
-      config.to_prepare do
-        begin
-          DispatchPolicy::Tick.prune_orphan_gate_rows
-          DispatchPolicy::Tick.prune_idle_partitions
-          DispatchPolicy::PartitionObservation.prune!
-        rescue ActiveRecord::NoDatabaseError,
-               ActiveRecord::StatementInvalid,
-               ActiveRecord::ConnectionNotEstablished
-          # DB not ready — skip silently.
-        end
-      end
-    end
   end
 end

data/lib/dispatch_policy/forwarder.rb ADDED Viewed

@@ -0,0 +1,63 @@
+# frozen_string_literal: true
+module DispatchPolicy
+  # Re-enqueues admitted jobs onto the real ActiveJob adapter under a
+  # `Bypass.with` block, so the around_enqueue callback that staged them
+  # in the first place lets the call through.
+  #
+  # Called from inside Tick's admission transaction. With a PG-backed
+  # adapter (good_job / solid_queue) the adapter's INSERT shares the
+  # transaction, so any exception here aborts the whole admission
+  # atomically (staged_jobs return, inflight rows disappear, partition
+  # counters revert, adapter rows revert). There is intentionally no
+  # rescue here: failures must propagate to roll back the surrounding TX.
+  #
+  # Bulk path: rows without scheduled_at go through ActiveJob.perform_all_later,
+  # which collapses to a single multi-row INSERT on adapters that implement
+  # enqueue_all natively (good_job, solid_queue). Rows with scheduled_at
+  # keep the per-row path because perform_all_later doesn't accept a
+  # wait_until per job.
+  module Forwarder
+    module_function
+    # @param rows [Array<Hash>] admitted staged_job rows (already deleted from staging)
+    # @raise StandardError propagates any error from deserialize / adapter enqueue
+    # @raise EnqueueFailed if the adapter's enqueue_all returned without
+    #   raising but flagged any job as not-successfully-enqueued (the
+    #   atomic contract requires caller-visible failure so the surrounding
+    #   TX rolls back).
+    def dispatch(rows)
+      return if rows.empty?
+      scheduled, immediate = rows.partition { |row| row["scheduled_at"] }
+      if immediate.any?
+        jobs = immediate.map { |row| Serializer.deserialize(row["job_data"]) }
+        Bypass.with { ::ActiveJob.perform_all_later(jobs) }
+        not_enqueued = jobs.reject { |j| j.respond_to?(:successfully_enqueued?) ? j.successfully_enqueued? : true }
+        if not_enqueued.any?
+          ids = not_enqueued.map(&:job_id).join(", ")
+          raise EnqueueFailed,
+                "perform_all_later soft-failed #{not_enqueued.size}/#{jobs.size} jobs (#{ids})"
+        end
+      end
+      scheduled.each do |row|
+        job        = Serializer.deserialize(row["job_data"])
+        wait_until = enqueue_wait_until(row)
+        Bypass.with { job.set(wait_until: wait_until).enqueue }
+        if job.respond_to?(:successfully_enqueued?) && !job.successfully_enqueued?
+          raise EnqueueFailed, "scheduled enqueue soft-failed for #{job.job_id}"
+        end
+      end
+    end
+    def enqueue_wait_until(row)
+      ts = row["scheduled_at"]
+      return nil unless ts
+      ts.is_a?(Time) ? ts : Time.parse(ts.to_s)
+    rescue ArgumentError
+      nil
+    end
+  end
+end

data/lib/dispatch_policy/gate.rb CHANGED Viewed

@@ -2,48 +2,20 @@
 module DispatchPolicy
   class Gate
-    class << self
-      def registry
-        @registry ||= {}
-      end
-      def register(name, klass)
-        registry[name.to_sym] = klass
-      end
-    end
-    attr_reader :policy, :partition_by, :name
-    def initialize(policy:, name:, partition_by: nil, **opts)
-      @policy       = policy
-      @name         = name
-      @partition_by = partition_by
-      configure(**opts)
-    end
-    def configure(**_opts); end
-    # Resolve a partition key for a given context.
-    def partition_key_for(ctx)
-      return "default" if @partition_by.nil?
-      @partition_by.call(ctx).to_s
-    end
-    # Subclasses must implement.
-    def filter(_batch, _context)
+    def name
       raise NotImplementedError
     end
-    # Whether this gate keeps an in-flight count that must be released
-    # when the job finishes.
-    def tracks_inflight?
-      false
+    # @param ctx [DispatchPolicy::Context]
+    # @param partition [Hash] the partitions row (string keys)
+    # @param admit_budget [Integer] the budget remaining from earlier gates
+    # @return [DispatchPolicy::Decision]
+    def evaluate(_ctx, _partition, _admit_budget)
+      raise NotImplementedError
     end
-    protected
-    def resolve(value, ctx)
-      value.respond_to?(:call) ? value.call(ctx) : value
-    end
+    # Called after a successful admit to update gate-local state.
+    # Returns a hash patch to merge into partition.gate_state, or nil.
+    def consume(_decision, _admitted_count); nil; end
   end
 end

data/lib/dispatch_policy/gates/adaptive_concurrency.rb CHANGED Viewed

@@ -2,122 +2,124 @@
 module DispatchPolicy
   module Gates
-    # Adaptive variant of :concurrency. The cap per partition (current_max)
-    # shrinks when the adapter queue backs up (recent queue_lag > target) or
-    # when performs fail; grows back when workers drain admissions quickly
-    # (queue_lag near zero). The signal is pure queue wait — admitted_at →
-    # perform_start — so it reflects "are we admitting too fast?" without
-    # getting polluted by how long the external work takes.
+    # Self-tuning concurrency gate. Like :concurrency but with a
+    # per-partition cap (`current_max`) that grows when the adapter
+    # queue is empty and shrinks when it builds up. AIMD loop persisted
+    # in `dispatch_policy_adaptive_concurrency_stats`.
     #
-    # AIMD loop on a per-partition stats row; the underlying in-flight
-    # counter is the same PartitionInflightCount used by :concurrency.
+    # Feedback signal is `queue_lag_ms = perform_start - admitted_at`
+    # (time the job spent waiting in the adapter after admission).
+    # Pure saturation signal — slow performs in the downstream service
+    # don't punish admissions if workers still drain the queue quickly.
+    #
+    # Update rule applied after each perform (in InflightTracker.track):
+    #
+    #   succeeded? & ewma_lag <= target_lag_ms → current_max += 1
+    #   succeeded? & ewma_lag >  target_lag_ms → current_max *= slow_factor
+    #   failed?                                → current_max *= fail_factor
+    #
+    # Always clamped to >= min. Never grows without bound — the
+    # algorithm self-limits via target_lag_ms.
     class AdaptiveConcurrency < Gate
-      # alpha is fast enough that a single spike is forgotten in ~3
-      # observations instead of ~15. slow_factor 0.95 halves the per-
-      # observation shrink magnitude so the cap no longer overshoots
-      # after a burst drains the adapter queue.
-      DEFAULT_EWMA_ALPHA  = 0.5
-      DEFAULT_FAIL_FACTOR = 0.5
-      DEFAULT_SLOW_FACTOR = 0.95
+      DEFAULT_FULL_BACKOFF = 1.0  # seconds
+      DEFAULT_EWMA_ALPHA   = 0.5  # weight of the new sample in the EWMA
+      DEFAULT_FAIL_FACTOR  = 0.5  # halve on perform raise
+      DEFAULT_SLOW_FACTOR  = 0.95 # gentle shrink on overload
-      # target_lag_ms accepts the legacy alias `target_latency` for
-      # backwards compatibility.
-      def configure(initial_max:,
-                    target_lag_ms: nil,
-                    target_latency: nil,
-                    min: 1,
-                    ewma_alpha: DEFAULT_EWMA_ALPHA,
-                    failure_decrease_factor: DEFAULT_FAIL_FACTOR,
-                    overload_decrease_factor: DEFAULT_SLOW_FACTOR)
-        @initial_max    = initial_max
-        @min            = min
-        @target_lag_ms  = target_lag_ms || target_latency
-        @ewma_alpha     = ewma_alpha
-        @fail_factor    = failure_decrease_factor
-        @slow_factor    = overload_decrease_factor
-        raise ArgumentError, "adaptive_concurrency requires target_lag_ms" if @target_lag_ms.nil?
-      end
+      attr_reader :initial_max, :target_lag_ms, :min,
+                  :ewma_alpha, :fail_factor, :slow_factor, :full_backoff
-      def tracks_inflight?
-        true
+      def initialize(initial_max:, target_lag_ms:, min: 1,
+                     ewma_alpha: DEFAULT_EWMA_ALPHA,
+                     failure_decrease_factor: DEFAULT_FAIL_FACTOR,
+                     overload_decrease_factor: DEFAULT_SLOW_FACTOR,
+                     full_backoff: DEFAULT_FULL_BACKOFF)
+        super()
+        @initial_max   = Integer(initial_max)
+        @target_lag_ms = Float(target_lag_ms)
+        @min           = Integer(min)
+        @ewma_alpha    = Float(ewma_alpha)
+        @fail_factor   = Float(failure_decrease_factor)
+        @slow_factor   = Float(overload_decrease_factor)
+        @full_backoff  = Float(full_backoff)
+        raise ArgumentError, "target_lag_ms must be > 0" unless @target_lag_ms.positive?
+        raise ArgumentError, "min must be >= 1"          unless @min >= 1
+        raise ArgumentError, "initial_max must be >= min" unless @initial_max >= @min
       end
-      attr_reader :initial_max, :min, :target_lag_ms,
-                  :ewma_alpha, :fail_factor, :slow_factor
-      def filter(batch, context)
-        by_partition = batch.group_by { |staged| partition_key_for(context.for(staged)) }
-        # Seed any missing stats rows so the first admission has something
-        # to read. Cheap: one INSERT ... ON CONFLICT DO NOTHING per key.
-        by_partition.each_key do |key|
-          AdaptiveConcurrencyStats.seed!(
-            policy_name:   policy.name,
-            gate_name:     name,
-            partition_key: key,
-            initial_max:   resolve(@initial_max, nil).to_i
-          )
-        end
+      def name
+        :adaptive_concurrency
+      end
-        stats = AdaptiveConcurrencyStats.fetch_many(
-          policy_name:    policy.name,
-          gate_name:      name,
-          partition_keys: by_partition.keys
-        )
+      def evaluate(ctx, partition, admit_budget)
+        policy_name = partition["policy_name"]
+        key         = inflight_partition_key(policy_name, ctx)
-        in_flight = PartitionInflightCount.fetch_many(
-          policy_name:    policy.name,
-          gate_name:      name,
-          partition_keys: by_partition.keys
+        # Seed lazily so the very first admission has a row to read
+        # (and so record_observation can UPDATE without a check).
+        Repository.adaptive_seed!(
+          policy_name:   policy_name,
+          partition_key: key,
+          initial_max:   @initial_max
         )
-        min_v = resolve(@min, nil).to_i
+        cap = Repository.adaptive_current_max(
+          policy_name:   policy_name,
+          partition_key: key
+        ) || @initial_max
+        cap = [cap, @min].max
-        admitted = []
-        by_partition.each do |partition_key, jobs|
-          effective_max = stats.dig(partition_key, :current_max) || resolve(@initial_max, nil).to_i
-          effective_max = [ effective_max, min_v ].max
-          used = in_flight.fetch(partition_key, 0)
+        in_flight = Repository.count_inflight(
+          policy_name:   policy_name,
+          partition_key: key
+        )
+        remaining = cap - in_flight
-          # Safety valve: if nothing is in-flight for this partition and
-          # there's pending, the adapter queue is (or is about to be)
-          # empty and workers will idle. Ensure we hand over at least
-          # initial_max so the stream never dries up on its own.
-          if used.zero? && jobs.any?
-            effective_max = [ effective_max, resolve(@initial_max, nil).to_i ].max
-          end
+        # Safety valve. AIMD can shrink current_max during a slow burst;
+        # if the partition then idles, no observations come in to grow
+        # the cap back. When in_flight == 0 we ensure at least
+        # initial_max so the partition never fossilizes at min.
+        remaining = [remaining, @initial_max].max if in_flight.zero?
-          jobs.each do |staged|
-            break unless used < effective_max
-            admitted << [ staged, partition_key ]
-            used += 1
-          end
+        if remaining <= 0
+          return Decision.new(allowed: 0,
+                              retry_after: @full_backoff,
+                              reason: "adaptive_concurrency_full")
         end
-        context.record_partitions(admitted, gate: name)
-        admitted.map(&:first)
+        Decision.new(allowed: [remaining, admit_budget].min)
       end
-      # Called by Dispatchable#around_perform for each adaptive gate that
-      # touched this job. Lives on the gate instance because configuration
-      # (alpha, target_latency, etc.) is per gate.
-      def record_observation(partition_key:, queue_lag_ms:, succeeded:)
-        AdaptiveConcurrencyStats.record_observation!(
-          policy_name:       policy.name,
-          gate_name:         name,
-          partition_key:     partition_key.to_s,
-          queue_lag_ms:      queue_lag_ms,
-          succeeded:         succeeded,
-          alpha:             @ewma_alpha,
-          min:               resolve(@min, nil).to_i,
-          target_lag_ms:     resolve(@target_lag_ms, nil).to_f,
-          fail_factor:       @fail_factor,
-          slow_factor:       @slow_factor,
-          initial_max:       resolve(@initial_max, nil).to_i
+      # Same canonical scope as the staged_jobs partition_key — every
+      # gate in a policy uses `policy.partition_for(ctx)` so the
+      # inflight count and the adaptive stats line up exactly.
+      def inflight_partition_key(policy_name, ctx)
+        policy = DispatchPolicy.registry.fetch(policy_name)
+        raise InvalidPolicy, "unknown policy #{policy_name.inspect}" unless policy
+        policy.partition_for(ctx)
+      end
+      # Called from InflightTracker.track after each perform completes
+      # (success or failure). Updates the AIMD state atomically in one
+      # SQL statement.
+      def record_observation(policy_name:, partition_key:, queue_lag_ms:, succeeded:)
+        Repository.adaptive_seed!(
+          policy_name:   policy_name,
+          partition_key: partition_key.to_s,
+          initial_max:   @initial_max
+        )
+        Repository.adaptive_record!(
+          policy_name:   policy_name,
+          partition_key: partition_key.to_s,
+          queue_lag_ms:  queue_lag_ms,
+          succeeded:     succeeded,
+          alpha:         @ewma_alpha,
+          target_lag_ms: @target_lag_ms,
+          fail_factor:   @fail_factor,
+          slow_factor:   @slow_factor,
+          min:           @min
         )
       end
     end
-    Gate.register(:adaptive_concurrency, AdaptiveConcurrency)
   end
 end

data/lib/dispatch_policy/gates/concurrency.rb CHANGED Viewed

@@ -2,42 +2,61 @@
 module DispatchPolicy
   module Gates
+    # Concurrency gate: caps in-flight jobs per partition.
+    #
+    # The partition scope is the policy's `partition_by`. Inflight rows
+    # are written by InflightTracker around_perform with the same key,
+    # so this gate's COUNT(*) aggregates the same canonical scope as
+    # the staged_jobs row.
     class Concurrency < Gate
-      def configure(max:)
-        @max = max
+      DEFAULT_FULL_BACKOFF = 1.0 # seconds
+      attr_reader :max_proc, :full_backoff
+      def initialize(max:, full_backoff: DEFAULT_FULL_BACKOFF)
+        super()
+        @max_proc     = max.respond_to?(:call) ? max : ->(_ctx) { max }
+        @full_backoff = full_backoff.to_f
       end
-      def tracks_inflight?
-        true
+      def name
+        :concurrency
       end
-      def filter(batch, context)
-        by_partition = batch.group_by { |staged| partition_key_for(context.for(staged)) }
+      def evaluate(ctx, partition, admit_budget)
+        cap = capacity_for(ctx)
+        return Decision.deny(retry_after: @full_backoff, reason: "max=0") if cap <= 0
-        in_flight = PartitionInflightCount.fetch_many(
-          policy_name:    policy.name,
-          gate_name:      name,
-          partition_keys: by_partition.keys
+        in_flight = Repository.count_inflight(
+          policy_name:   partition["policy_name"],
+          partition_key: inflight_partition_key(partition["policy_name"], ctx)
         )
-        admitted = []
-        by_partition.each do |partition_key, jobs|
-          jobs.each do |staged|
-            ctx   = context.for(staged)
-            limit = resolve(@max, ctx).to_i
-            used  = in_flight.fetch(partition_key, 0)
-            if used < limit
-              admitted << [ staged, partition_key ]
-              in_flight[partition_key] = used + 1
-            end
-          end
+        remaining = cap - in_flight
+        if remaining <= 0
+          # Stop hammering this partition with COUNT(*) every tick — back off
+          # until enough jobs are likely to have finished.
+          return Decision.new(allowed: 0, retry_after: @full_backoff, reason: "concurrency_full")
         end
-        context.record_partitions(admitted, gate: name)
-        admitted.map(&:first)
+        Decision.new(allowed: [remaining, admit_budget].min)
       end
-    end
-    Gate.register(:concurrency, Concurrency)
+      # The inflight key is always the policy's canonical partition
+      # value — same as what's stored in staged_jobs.partition_key.
+      # This is what makes throttle + concurrency in the same policy
+      # enforce their state at exactly one consistent scope.
+      def inflight_partition_key(policy_name, ctx)
+        policy = DispatchPolicy.registry.fetch(policy_name)
+        raise InvalidPolicy, "unknown policy #{policy_name.inspect}" unless policy
+        policy.partition_for(ctx)
+      end
+      private
+      def capacity_for(ctx)
+        value = @max_proc.call(ctx)
+        value.nil? ? 0 : Integer(value)
+      end
+    end
   end
 end