RubyGems - dispatch_policy - Versions diffs - 0.2.0 → 0.3.0 - Mend

dispatch_policy 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (70) hide show

checksums.yaml +4 -4
data/MIT-LICENSE +16 -17
data/README.md +433 -388
data/app/assets/stylesheets/dispatch_policy/application.css +157 -0
data/app/controllers/dispatch_policy/application_controller.rb +45 -1
data/app/controllers/dispatch_policy/dashboard_controller.rb +91 -0
data/app/controllers/dispatch_policy/partitions_controller.rb +122 -0
data/app/controllers/dispatch_policy/policies_controller.rb +94 -267
data/app/controllers/dispatch_policy/staged_jobs_controller.rb +9 -0
data/app/models/dispatch_policy/adaptive_concurrency_stats.rb +11 -81
data/app/models/dispatch_policy/inflight_job.rb +12 -0
data/app/models/dispatch_policy/partition.rb +21 -0
data/app/models/dispatch_policy/staged_job.rb +4 -97
data/app/models/dispatch_policy/tick_sample.rb +11 -0
data/app/views/dispatch_policy/dashboard/index.html.erb +109 -0
data/app/views/dispatch_policy/partitions/index.html.erb +63 -0
data/app/views/dispatch_policy/partitions/show.html.erb +106 -0
data/app/views/dispatch_policy/policies/index.html.erb +15 -37
data/app/views/dispatch_policy/policies/show.html.erb +139 -223
data/app/views/dispatch_policy/shared/_capacity.html.erb +67 -0
data/app/views/dispatch_policy/shared/_hints.html.erb +13 -0
data/app/views/dispatch_policy/shared/_partition_row.html.erb +12 -0
data/app/views/dispatch_policy/staged_jobs/show.html.erb +31 -0
data/app/views/layouts/dispatch_policy/application.html.erb +95 -238
data/config/routes.rb +18 -2
data/db/migrate/20260501000001_create_dispatch_policy_tables.rb +103 -0
data/lib/dispatch_policy/bypass.rb +23 -0
data/lib/dispatch_policy/config.rb +85 -0
data/lib/dispatch_policy/context.rb +50 -0
data/lib/dispatch_policy/cursor_pagination.rb +121 -0
data/lib/dispatch_policy/decision.rb +22 -0
data/lib/dispatch_policy/engine.rb +4 -27
data/lib/dispatch_policy/forwarder.rb +63 -0
data/lib/dispatch_policy/gate.rb +10 -38
data/lib/dispatch_policy/gates/adaptive_concurrency.rb +99 -97
data/lib/dispatch_policy/gates/concurrency.rb +45 -26
data/lib/dispatch_policy/gates/throttle.rb +65 -41
data/lib/dispatch_policy/inflight_tracker.rb +174 -0
data/lib/dispatch_policy/job_extension.rb +155 -0
data/lib/dispatch_policy/operator_hints.rb +126 -0
data/lib/dispatch_policy/pipeline.rb +48 -0
data/lib/dispatch_policy/policy.rb +61 -59
data/lib/dispatch_policy/policy_dsl.rb +120 -0
data/lib/dispatch_policy/railtie.rb +35 -0
data/lib/dispatch_policy/registry.rb +46 -0
data/lib/dispatch_policy/repository.rb +723 -0
data/lib/dispatch_policy/serializer.rb +36 -0
data/lib/dispatch_policy/tick.rb +260 -256
data/lib/dispatch_policy/tick_loop.rb +59 -26
data/lib/dispatch_policy/version.rb +1 -1
data/lib/dispatch_policy.rb +71 -52
data/lib/generators/dispatch_policy/install/install_generator.rb +70 -0
data/lib/generators/dispatch_policy/install/templates/create_dispatch_policy_tables.rb.tt +95 -0
data/lib/generators/dispatch_policy/install/templates/dispatch_tick_loop_job.rb.tt +53 -0
data/lib/generators/dispatch_policy/install/templates/initializer.rb.tt +11 -0
metadata +101 -43
data/CHANGELOG.md +0 -43
data/app/models/dispatch_policy/partition_inflight_count.rb +0 -42
data/app/models/dispatch_policy/partition_observation.rb +0 -76
data/app/models/dispatch_policy/throttle_bucket.rb +0 -41
data/db/migrate/20260424000001_create_dispatch_policy_tables.rb +0 -80
data/db/migrate/20260424000002_create_adaptive_concurrency_stats.rb +0 -22
data/db/migrate/20260424000003_create_adaptive_concurrency_samples.rb +0 -25
data/db/migrate/20260424000004_rename_samples_to_partition_observations.rb +0 -32
data/db/migrate/20260425000001_add_duration_to_partition_observations.rb +0 -8
data/lib/dispatch_policy/active_job_perform_all_later_patch.rb +0 -32
data/lib/dispatch_policy/dispatch_context.rb +0 -53
data/lib/dispatch_policy/dispatchable.rb +0 -123
data/lib/dispatch_policy/gates/fair_interleave.rb +0 -32
data/lib/dispatch_policy/gates/global_cap.rb +0 -26

data/app/controllers/dispatch_policy/policies_controller.rb CHANGED Viewed

@@ -2,294 +2,121 @@
 module DispatchPolicy
   class PoliciesController < ApplicationController
-    STALE_PENDING_THRESHOLD = 1.hour
-    PARTITION_LIST_PAGE_SIZE = 25
+    before_action :find_policy, only: %i[show pause resume drain]
-    before_action :load_policy, only: :show
+    DRAIN_MAX_PER_REQUEST = 10_000
     def index
-      @policies = DispatchPolicy.registry.map do |name, job_class|
-        scope   = StagedJob.where(policy_name: name)
-        pending = scope.pending
+      registry_names = DispatchPolicy.registry.names
+      db_names       = Partition.distinct.pluck(:policy_name)
+      names          = (registry_names + db_names).uniq.sort
+      in_flight_by_policy = InflightJob.where(policy_name: names).group(:policy_name).count
+      @rows = names.map do |name|
+        partitions = Partition.for_policy(name)
         {
-          name:            name,
-          job_class:       job_class,
-          policy:          job_class.resolved_dispatch_policy,
-          pending_count:   pending.count,
-          admitted_count:  scope.admitted.count,
-          completed_24h:   scope.completed.where(completed_at: 24.hours.ago..).count,
-          oldest_pending:  pending.minimum(:staged_at),
-          stale_threshold: STALE_PENDING_THRESHOLD
+          name:           name,
+          registered:     registry_names.include?(name),
+          pending:        partitions.sum(:pending_count),
+          in_flight:      in_flight_by_policy[name] || 0,
+          partitions:     partitions.count,
+          paused_count:   partitions.paused.count
         }
-      end.sort_by { |p| -p[:pending_count] }
-      @active_partitions = PartitionInflightCount.where("in_flight > 0").count
-      @expired_leases    = StagedJob.expired_leases.count
+      end
     end
     def show
-      scope = StagedJob.where(policy_name: @policy_name)
-      @pending_count           = scope.pending.count
-      @pending_eligible_count  = scope.pending.where("not_before_at IS NULL OR not_before_at <= ?", Time.current).count
-      @pending_scheduled_count = @pending_count - @pending_eligible_count
-      @admitted_count          = scope.admitted.count
-      @completed_24h           = scope.completed.where(completed_at: 24.hours.ago..).count
-      all_breakdown = partition_breakdown(scope)
-      # "Watched" subset (passed via ?watch=a,b,c; the JS layer syncs it
-      # with localStorage so the choice sticks across reloads).
-      @watched_keys        = (params[:watch] || "").split(",").map(&:strip).reject(&:empty?)
-      @partition_breakdown = @watched_keys.any? ? all_breakdown.select { |r| @watched_keys.include?(r[:partition]) } : []
-      # Browsable list of every active partition with filter + sort + pagination.
-      @partition_search = params[:q].to_s.strip
-      @partition_page   = [ params[:page].to_i, 1 ].max
-      @partition_sort   = %w[source partition pending in_flight completed_24h last_enqueued_at last_dispatched_at].include?(params[:sort]) ? params[:sort] : "activity"
-      @partition_dir    = params[:dir] == "asc" ? "asc" : "desc"
-      list = all_breakdown
-      list = list.select { |r| r[:partition].to_s.downcase.include?(@partition_search.downcase) } if @partition_search.present?
-      list = sort_partition_list(list, @partition_sort, @partition_dir)
-      @partition_total_list = list.size
-      offset                = (@partition_page - 1) * PARTITION_LIST_PAGE_SIZE
-      @partition_list       = list[offset, PARTITION_LIST_PAGE_SIZE] || []
-      load_adaptive_chart_data
-      @throttle_buckets = ThrottleBucket
-        .where(policy_name: @policy_name).order(:gate_name, :partition_key).limit(50)
-      # Explicit select: don't load the `arguments` jsonb (job payload —
-      # may contain PII / tokens) into memory just to render six fields.
-      @pending_jobs = scope.pending
-        .select(:id, :dedupe_key, :round_robin_key, :priority, :staged_at, :not_before_at)
-        .order(:priority, :staged_at)
-        .limit(50)
-    end
-    private
-    def load_policy
-      @policy_name = params[:policy_name]
-      @job_class   = DispatchPolicy.registry[@policy_name] ||
-                     Tick.autoload_job_for(@policy_name)
-      raise ActiveRecord::RecordNotFound unless @job_class
-      @policy = @job_class.resolved_dispatch_policy
-    end
-    # Per-(source, partition) breakdown of pending-eligible / pending-scheduled
-    # / in-flight / completed-24h. A "source" is either a gate with a
-    # partition_by (uses gate.partition_key_for(context)) or the policy's
-    # round_robin_by declaration (uses the round_robin_key column directly).
-    # All four counts come from StagedJob groupings; PartitionInflightCount
-    # is an admission-time optimization, not the user-facing truth.
-    def partition_breakdown(scope)
-      sources = partition_sources
-      return [] if sources.empty?
-      now       = Time.current
-      now_iso   = now.iso8601
-      since_24h = 24.hours.ago.iso8601
-      limit     = DispatchPolicy.config.admin_partition_limit
-      @partition_breakdown_truncated = false
-      adaptive_stats = AdaptiveConcurrencyStats.where(policy_name: @policy_name)
-        .order(updated_at: :desc)
-        .limit(limit)
-        .pluck(:gate_name, :partition_key, :current_max, :ewma_latency_ms)
-        .each_with_object({}) { |(g, k, c, l), h|
-          h[[ g, k ]] = { current_max: c, ewma_latency_ms: l.to_f.round(1) }
-        }
-      rows = Hash.new { |h, k|
-        h[k] = {
-          source:             k[0],
-          partition:          k[1],
-          eligible:           0,
-          scheduled:          0,
-          in_flight:          0,
-          completed_24h:      0,
-          last_enqueued_at:   nil,
-          last_dispatched_at: nil,
-          current_max:        nil,
-          ewma_latency_ms:    nil
-        }
+      @policy_object = DispatchPolicy.registry.fetch(@policy_name)
+      @partitions    = Partition.for_policy(@policy_name)
+                                .order(Arel.sql("pending_count DESC, last_admit_at DESC NULLS LAST"))
+                                .limit(100)
+      @top_admitted  = Partition.for_policy(@policy_name)
+                                .order(total_admitted: :desc)
+                                .limit(20)
+      @totals = {
+        pending:    Partition.for_policy(@policy_name).sum(:pending_count),
+        in_flight:  InflightJob.where(policy_name: @policy_name).count,
+        partitions: Partition.for_policy(@policy_name).count
       }
-      # Each aggregation below is order-by-count + limited so that a
-      # policy with tens of thousands of distinct (context, round_robin_key)
-      # tuples can't pull megabytes of rows into memory per request. We
-      # show the top-N most-active partitions per axis and flip the
-      # truncation flag for the view banner.
-      # Activity timestamps bounded to the last 24h so the scan stays on
-      # an index-friendly slice of staged_jobs.
-      activity_rows = scope
-        .where("staged_at > ?", since_24h)
-        .group(:context, :round_robin_key)
-        .order(Arel.sql("MAX(staged_at) DESC"))
-        .limit(limit)
-        .pluck(
-          :context,
-          :round_robin_key,
-          Arel.sql("MAX(staged_at)"),
-          Arel.sql("MAX(admitted_at)")
-        )
-      @partition_breakdown_truncated = true if activity_rows.size >= limit
-      sources.each do |name, extract|
-        pending_counts = scope.pending.group(:context, :round_robin_key)
-          .order(Arel.sql("count(*) DESC"))
-          .limit(limit)
-          .pluck(
-            :context,
-            :round_robin_key,
-            Arel.sql("count(*) filter (where not_before_at is null or not_before_at <= '#{now_iso}')"),
-            Arel.sql("count(*) filter (where not_before_at > '#{now_iso}')")
-          )
-        @partition_breakdown_truncated = true if pending_counts.size >= limit
-        pending_counts.each do |ctx, rr_key, eligible, scheduled|
-          partition = extract.call(ctx, rr_key)
-          row = rows[[ name, partition ]]
-          row[:eligible]  += eligible
-          row[:scheduled] += scheduled
-        end
-        admitted_counts = scope.admitted.group(:context, :round_robin_key)
-          .order(Arel.sql("count(*) DESC"))
-          .limit(limit)
-          .pluck(:context, :round_robin_key, Arel.sql("count(*)"))
-        @partition_breakdown_truncated = true if admitted_counts.size >= limit
-        admitted_counts.each do |ctx, rr_key, in_flight|
-          partition = extract.call(ctx, rr_key)
-          rows[[ name, partition ]][:in_flight] += in_flight
-        end
-        completed_counts = scope.completed.where("completed_at > ?", since_24h)
-          .group(:context, :round_robin_key)
-          .order(Arel.sql("count(*) DESC"))
-          .limit(limit)
-          .pluck(:context, :round_robin_key, Arel.sql("count(*)"))
-        @partition_breakdown_truncated = true if completed_counts.size >= limit
-        completed_counts.each do |ctx, rr_key, completed|
-          partition = extract.call(ctx, rr_key)
-          rows[[ name, partition ]][:completed_24h] += completed
-        end
-        activity_rows.each do |ctx, rr_key, last_staged, last_admitted|
-          partition = extract.call(ctx, rr_key)
-          row       = rows[[ name, partition ]]
-          row[:last_enqueued_at]   = [ row[:last_enqueued_at], last_staged ].compact.max
-          row[:last_dispatched_at] = [ row[:last_dispatched_at], last_admitted ].compact.max
-        end
-      end
-      rows.each do |(source, partition), row|
-        stats = adaptive_stats[[ source, partition ]]
-        next unless stats
-        row[:current_max]     = stats[:current_max]
-        row[:ewma_latency_ms] = stats[:ewma_latency_ms]
-      end
-      # Two different sources (say round_robin_by account_id + a gate
-      # partitioned by account_id) producing the same partition key yield
-      # identical counts — collapse them into one row with a merged source
-      # label instead of listing the same numbers twice.
-      merged = rows.values
-        .reject { |r| r[:partition].nil? || r[:partition].empty? }
-        .group_by { |r| [ r[:partition], r[:eligible], r[:scheduled], r[:in_flight], r[:completed_24h] ] }
-        .map { |_, group|
-          base = group.first.dup
-          base[:source] = group.map { |r| r[:source] }.uniq.sort.join(" + ")
-          group.each do |r|
-            base[:current_max]        ||= r[:current_max]
-            base[:ewma_latency_ms]    ||= r[:ewma_latency_ms]
-            base[:last_enqueued_at]     = [ base[:last_enqueued_at], r[:last_enqueued_at] ].compact.max
-            base[:last_dispatched_at]   = [ base[:last_dispatched_at], r[:last_dispatched_at] ].compact.max
-          end
-          base
-        }
-      merged.sort_by { |r|
-        [ -(r[:eligible] + r[:scheduled] + r[:in_flight] + r[:completed_24h]), r[:source], r[:partition] ]
+      now = Time.current
+      @windows = {
+        "1m"  => Repository.tick_summary(policy_name: @policy_name, since: now - 60),
+        "5m"  => Repository.tick_summary(policy_name: @policy_name, since: now - 5 * 60),
+        "15m" => Repository.tick_summary(policy_name: @policy_name, since: now - 15 * 60)
+      }
+      @denied_reasons = Repository.denied_reasons_summary(policy_name: @policy_name, since: now - 15 * 60)
+      @round_trip     = Repository.partition_round_trip_stats(policy_name: @policy_name)
+      @sparkline      = Repository.tick_samples_buckets(policy_name: @policy_name, since: now - 30 * 60, bucket_seconds: 60)
+      @pending_trend  = Repository.trend_direction(@sparkline.map { |b| b[:pending_total] })
+      cfg = DispatchPolicy.config
+      @capacity = {
+        admitted_per_minute:  @windows["1m"][:jobs_admitted],
+        adapter_target_jps:   cfg.adapter_throughput_target,
+        avg_tick_ms:          @windows["1m"][:avg_duration_ms],
+        max_tick_ms:          @windows["1m"][:max_duration_ms],
+        tick_max_duration_ms: cfg.tick_max_duration.to_i * 1000
       }
-    end
-    def sort_partition_list(list, sort, dir)
-      # Put nulls at the bottom regardless of direction (Time#to_f on nil
-      # would crash; -Float::INFINITY sorts first, +Float::INFINITY last).
-      key =
-        case sort
-        when "source"             then ->(r) { [ r[:source], r[:partition] ] }
-        when "partition"          then ->(r) { r[:partition] }
-        when "pending"            then ->(r) { r[:eligible] + r[:scheduled] }
-        when "in_flight"          then ->(r) { r[:in_flight] }
-        when "completed_24h"      then ->(r) { r[:completed_24h] }
-        when "last_enqueued_at"   then ->(r) { r[:last_enqueued_at]&.to_f || 0 }
-        when "last_dispatched_at" then ->(r) { r[:last_dispatched_at]&.to_f || 0 }
-        else ->(r) { r[:eligible] + r[:scheduled] + r[:in_flight] + r[:completed_24h] }
-        end
-      sorted = list.sort_by(&key)
-      dir == "asc" ? sorted : sorted.reverse
+      @hints = OperatorHints.for(
+        tick_max_duration_ms: @capacity[:tick_max_duration_ms],
+        avg_tick_ms:          @capacity[:avg_tick_ms],
+        max_tick_ms:          @capacity[:max_tick_ms],
+        pending_total:        @totals[:pending],
+        admitted_per_minute:  @capacity[:admitted_per_minute],
+        forward_failures:     @windows["1m"][:forward_failures],
+        jobs_admitted:        @windows["1m"][:jobs_admitted],
+        active_partitions:    @round_trip[:active_partitions],
+        never_checked:        @round_trip[:never_checked],
+        in_backoff:           @round_trip[:in_backoff],
+        total_partitions:     @totals[:partitions],
+        adapter_target_jps:   @capacity[:adapter_target_jps],
+        pending_trend:        @pending_trend
+      )
     end
-    # Returns [[source_name, ->(ctx, rr_key) { partition_key }], ...]
-    # covering every partition-producing declaration on the policy: every
-    # gate with a partition_by, plus round_robin_by if declared.
-    def partition_sources
-      return [] unless @policy
+    def pause
+      Partition.for_policy(@policy_name).update_all(status: "paused", updated_at: Time.current)
+      redirect_to policy_path(@policy_name), notice: "Policy paused."
+    end
-      sources = @policy.gates.select(&:partition_by).map do |gate|
-        [ gate.name.to_s, ->(ctx, _rr) { gate.partition_key_for((ctx || {}).symbolize_keys) } ]
-      end
-      sources << [ "round_robin_by", ->(_ctx, rr) { rr } ] if @policy.round_robin?
-      sources
+    def resume
+      Partition.for_policy(@policy_name).update_all(status: "active", updated_at: Time.current)
+      redirect_to policy_path(@policy_name), notice: "Policy resumed."
     end
-    # Build chart data from PartitionObservation. Two queries:
-    # - Global aggregated (one row per minute): cheap even with 1000s of
-    #   partitions because we SUM/AVG in SQL, not in Ruby.
-    # - Per-partition sparkline data, scoped to only the partitions we're
-    #   going to actually render (breakdown's top N).
-    def load_adaptive_chart_data
-      last_minute   = Time.current.utc.beginning_of_minute
-      @chart_slots  = (0..59).map { |i| last_minute - (59 - i).minutes }
-      @chart_labels = @chart_slots.map { |t| t.strftime("%H:%M") }
-      slot_index    = @chart_slots.each_with_index.to_h
+    # Force-admits every staged job across every partition of the policy,
+    # bypassing all gates. Walks partitions in pending-DESC order so the
+    # busiest ones drain first. Bounded at DRAIN_MAX_PER_REQUEST per click.
+    def drain
+      drained = 0
+      Partition.for_policy(@policy_name)
+               .where("pending_count > 0")
+               .order(pending_count: :desc, id: :asc)
+               .limit(500)
+               .each do |partition|
+        break if drained >= DRAIN_MAX_PER_REQUEST
+        batch, _ = PartitionsController.drain_partition!(partition)
+        drained += batch
+      end
-      @adaptive_global    = Array.new(@chart_slots.size)
-      @completions_global = Array.new(@chart_slots.size, 0)
-      global_rows = PartitionObservation
-        .where(policy_name: @policy_name)
-        .where("minute_bucket >= ?", @chart_slots.first)
-        .group(:minute_bucket)
-        .pluck(:minute_bucket, Arel.sql("SUM(total_lag_ms)"), Arel.sql("SUM(observation_count)"))
-      global_rows.each do |bucket, total_lag, obs_count|
-        idx = slot_index[bucket.utc.beginning_of_minute]
-        next unless idx
-        @completions_global[idx] = obs_count
-        @adaptive_global[idx]    = obs_count.positive? ? (total_lag.to_f / obs_count).round(1) : nil
+      remaining = Partition.for_policy(@policy_name).sum(:pending_count)
+      notice = if remaining.positive?
+        "Drained #{drained} job(s) across this policy; #{remaining} still pending — click drain again to continue."
+      else
+        "Drained #{drained} job(s); policy fully drained."
       end
+      redirect_to policy_path(@policy_name), notice: notice
+    end
-      partition_keys = (@partition_breakdown || []).map { |r| r[:partition] }.uniq
-      @adaptive_samples    = {}
-      @completions_samples = {}
-      return if partition_keys.empty?
+    private
-      per_partition_lag    = Hash.new { |h, k| h[k] = Array.new(@chart_slots.size) }
-      per_partition_counts = Hash.new { |h, k| h[k] = Array.new(@chart_slots.size, 0) }
-      rows = PartitionObservation
-        .where(policy_name: @policy_name, partition_key: partition_keys)
-        .where("minute_bucket >= ?", @chart_slots.first)
-        .pluck(:partition_key, :minute_bucket, :total_lag_ms, :observation_count)
-      rows.each do |pk, bucket, total, count|
-        idx = slot_index[bucket.utc.beginning_of_minute]
-        next unless idx
-        per_partition_lag[pk][idx]    = count.positive? ? (total.to_f / count).round(1) : nil
-        per_partition_counts[pk][idx] = count
-      end
-      @adaptive_samples    = per_partition_lag
-      @completions_samples = per_partition_counts
+    def find_policy
+      @policy_name = params[:name]
     end
   end
 end

data/app/controllers/dispatch_policy/staged_jobs_controller.rb ADDED Viewed

@@ -0,0 +1,9 @@
+# frozen_string_literal: true
+module DispatchPolicy
+  class StagedJobsController < ApplicationController
+    def show
+      @job = StagedJob.find(params[:id])
+    end
+  end
+end

data/app/models/dispatch_policy/adaptive_concurrency_stats.rb CHANGED Viewed

@@ -1,89 +1,19 @@
 # frozen_string_literal: true
 module DispatchPolicy
+  # One row per (policy_name, partition_key) for partitions whose policy
+  # declares an `:adaptive_concurrency` gate. Holds the AIMD-tuned
+  # `current_max` plus the EWMA of recent queue-lag observations the cap
+  # adapts on.
+  #
+  # Read by `Gates::AdaptiveConcurrency#evaluate` to learn how many jobs
+  # this partition may admit right now. Written atomically by
+  # `Repository.adaptive_record!` from `InflightTracker.track`'s ensure
+  # block after each perform — the EWMA + AIMD update lives in a single
+  # SQL statement so concurrent workers can't race on read-modify-write.
   class AdaptiveConcurrencyStats < ApplicationRecord
     self.table_name = "dispatch_policy_adaptive_concurrency_stats"
-    # Seed a stats row if one doesn't exist yet. Mirrors ThrottleBucket.lock.
-    def self.seed!(policy_name:, gate_name:, partition_key:, initial_max:)
-      now = Time.current
-      sql = <<~SQL.squish
-        INSERT INTO #{quoted_table_name}
-          (policy_name, gate_name, partition_key, current_max,
-           ewma_latency_ms, sample_count, created_at, updated_at)
-        VALUES (?, ?, ?, ?, 0, 0, ?, ?)
-        ON CONFLICT (policy_name, gate_name, partition_key) DO NOTHING
-      SQL
-      connection.exec_update(
-        sanitize_sql_array([
-          sql, policy_name, gate_name.to_s, partition_key.to_s,
-          initial_max.to_i, now, now
-        ])
-      )
-    end
-    def self.fetch_many(policy_name:, gate_name:, partition_keys:)
-      return {} if partition_keys.empty?
-      where(policy_name: policy_name, gate_name: gate_name.to_s, partition_key: partition_keys)
-        .pluck(:partition_key, :current_max, :ewma_latency_ms)
-        .each_with_object({}) { |(k, c, l), h| h[k] = { current_max: c, ewma_latency_ms: l } }
-    end
-    # Single-statement EWMA + AIMD update so concurrent performs can't race
-    # on read-modify-write. Seed first (INSERT ON CONFLICT DO NOTHING), then
-    # apply the adjustment.
-    def self.record_observation!(
-      policy_name:, gate_name:, partition_key:,
-      queue_lag_ms:, succeeded:,
-      alpha:, min:, target_lag_ms:,
-      fail_factor:, slow_factor:, initial_max:
-    )
-      seed!(
-        policy_name:   policy_name,
-        gate_name:     gate_name,
-        partition_key: partition_key,
-        initial_max:   initial_max
-      )
-      # Feedback signal is queue_lag (admitted_at → perform_start). When
-      # the adapter queue is empty, lag ≈ 0 → +1 grow. When the queue
-      # backs up, lag rises past target → multiplicative shrink. Failures
-      # shrink harder. Only `min` is enforced so a partition can't lock
-      # out entirely.
-      sql = <<~SQL.squish
-        UPDATE #{quoted_table_name}
-        SET
-          ewma_latency_ms = ewma_latency_ms * (1 - ?) + ? * ?,
-          sample_count    = sample_count + 1,
-          current_max = GREATEST(?, CASE
-            WHEN ? = FALSE                                THEN FLOOR(current_max * ?)::int
-            WHEN (ewma_latency_ms * (1 - ?) + ? * ?) > ?  THEN FLOOR(current_max * ?)::int
-            ELSE current_max + 1
-          END),
-          last_observed_at = ?,
-          updated_at       = ?
-        WHERE policy_name = ? AND gate_name = ? AND partition_key = ?
-      SQL
-      now = Time.current
-      connection.exec_update(
-        sanitize_sql_array([
-          sql,
-          alpha, alpha, queue_lag_ms,
-          min.to_i,
-          succeeded, fail_factor,
-          alpha, alpha, queue_lag_ms, target_lag_ms, slow_factor,
-          now, now,
-          policy_name, gate_name.to_s, partition_key.to_s
-        ])
-      )
-    end
-    # Quick lookup used by Dispatchable to denormalize current_max into
-    # the generic partition observation row.
-    def self.current_max_for(policy_name:, partition_key:)
-      where(policy_name: policy_name, partition_key: partition_key.to_s)
-        .limit(1).pick(:current_max)
-    end
+    scope :for_policy, ->(name) { where(policy_name: name) }
   end
 end

data/app/models/dispatch_policy/inflight_job.rb ADDED Viewed

@@ -0,0 +1,12 @@
+# frozen_string_literal: true
+module DispatchPolicy
+  class InflightJob < ApplicationRecord
+    self.table_name = "dispatch_policy_inflight_jobs"
+    scope :for_partition, ->(policy_name, partition_key) {
+      where(policy_name: policy_name, partition_key: partition_key)
+    }
+    scope :stale, ->(cutoff) { where("heartbeat_at < ?", cutoff) }
+  end
+end

data/app/models/dispatch_policy/partition.rb ADDED Viewed

@@ -0,0 +1,21 @@
+# frozen_string_literal: true
+module DispatchPolicy
+  class Partition < ApplicationRecord
+    self.table_name = "dispatch_policy_partitions"
+    scope :for_policy, ->(name) { where(policy_name: name) }
+    scope :for_shard,  ->(s)    { s ? where(shard: s) : all }
+    scope :active,     -> { where(status: "active") }
+    scope :paused,     -> { where(status: "paused") }
+    scope :pending,    -> { where("pending_count > 0") }
+    scope :stale_inactive, ->(cutoff) {
+      where("pending_count = 0 AND in_flight_count = 0")
+        .where("last_admit_at < ? OR (last_admit_at IS NULL AND created_at < ?)", cutoff, cutoff)
+    }
+    def paused?
+      status == "paused"
+    end
+  end
+end

data/app/models/dispatch_policy/staged_job.rb CHANGED Viewed

@@ -4,102 +4,9 @@ module DispatchPolicy
   class StagedJob < ApplicationRecord
     self.table_name = "dispatch_policy_staged_jobs"
-    scope :pending,   -> { where(admitted_at: nil, completed_at: nil) }
-    scope :admitted,  -> { where.not(admitted_at: nil).where(completed_at: nil) }
-    scope :completed, -> { where.not(completed_at: nil) }
-    scope :active,    -> { where(completed_at: nil) }
-    scope :expired_leases, -> {
-      admitted.where("lease_expires_at IS NOT NULL AND lease_expires_at < ?", Time.current)
-    }
-    # Merge the job's ActiveJob metadata (queue_name, priority) into the
-    # context hash so gate lambdas can partition_by :queue_name without
-    # the user having to pass it as a kwarg. User-provided keys win.
-    def self.context_for(job_instance, policy)
-      built = policy.context_builder.call(job_instance.arguments)
-      return built unless built.is_a?(Hash)
-      {
-        queue_name: job_instance.queue_name,
-        priority:   job_instance.priority
-      }.merge(built.symbolize_keys)
-    end
-    # Stages a job in the admission queue. Returns the created row, or nil if
-    # the policy declares a dedupe_key and an active row already exists.
-    def self.stage!(job_instance:, policy:)
-      dedupe_key = policy.build_dedupe_key(job_instance.arguments)
-      if dedupe_key && exists?(policy_name: policy.name, dedupe_key: dedupe_key, completed_at: nil)
-        return nil
-      end
-      create!(
-        job_class:       job_instance.class.name,
-        policy_name:     policy.name,
-        arguments:       job_instance.serialize,
-        snapshot:        policy.build_snapshot(job_instance.arguments),
-        context:         context_for(job_instance, policy),
-        priority:        job_instance.priority || 100,
-        not_before_at:   job_instance.scheduled_at,
-        staged_at:       Time.current,
-        dedupe_key:      dedupe_key,
-        round_robin_key: policy.build_round_robin_key(job_instance.arguments)
-      )
-    rescue ActiveRecord::RecordNotUnique
-      nil
-    end
-    # Batch-insert variant of stage!.
-    def self.stage_many!(policy:, jobs:)
-      return 0 if jobs.empty?
-      now = Time.current
-      rows = jobs.map do |job_instance|
-        {
-          job_class:       job_instance.class.name,
-          policy_name:     policy.name,
-          arguments:       job_instance.serialize,
-          snapshot:        policy.build_snapshot(job_instance.arguments),
-          context:         context_for(job_instance, policy),
-          priority:        job_instance.priority || 100,
-          not_before_at:   job_instance.scheduled_at,
-          staged_at:       now,
-          dedupe_key:      policy.build_dedupe_key(job_instance.arguments),
-          round_robin_key: policy.build_round_robin_key(job_instance.arguments),
-          partitions:      {},
-          created_at:      now,
-          updated_at:      now
-        }
-      end
-      result = insert_all(rows, unique_by: :idx_dp_staged_dedupe_active)
-      result.rows.size
-    end
-    def self.mark_completed_by_active_job_id(active_job_id)
-      return 0 if active_job_id.blank?
-      where(active_job_id: active_job_id, completed_at: nil)
-        .update_all(completed_at: Time.current, lease_expires_at: nil)
-    end
-    def mark_admitted!(partitions:)
-      now = Time.current
-      job = instantiate_active_job
-      job._dispatch_partitions  = partitions
-      job._dispatch_admitted_at = now
-      update!(
-        admitted_at:      now,
-        lease_expires_at: now + DispatchPolicy.config.lease_duration,
-        active_job_id:    job.job_id,
-        partitions:       partitions
-      )
-      job
-    end
-    def instantiate_active_job
-      ActiveJob::Base.deserialize(arguments)
-    end
+    scope :for_policy,    ->(name) { where(policy_name: name) }
+    scope :for_partition, ->(name, key) { where(policy_name: name, partition_key: key) }
+    scope :due,           -> { where("scheduled_at IS NULL OR scheduled_at <= now()") }
+    scope :recent,        -> { order(enqueued_at: :desc) }
   end
 end

data/app/models/dispatch_policy/tick_sample.rb ADDED Viewed

@@ -0,0 +1,11 @@
+# frozen_string_literal: true
+module DispatchPolicy
+  class TickSample < ApplicationRecord
+    self.table_name = "dispatch_policy_tick_samples"
+    scope :for_policy, ->(name) { where(policy_name: name) }
+    scope :since,      ->(time) { where("sampled_at >= ?", time) }
+    scope :recent,     -> { order(sampled_at: :desc) }
+  end
+end