dispatch_policy 0.2.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +98 -28
  3. data/MIT-LICENSE +16 -17
  4. data/README.md +452 -388
  5. data/app/assets/images/dispatch_policy/logo-large.svg +9 -0
  6. data/app/assets/images/dispatch_policy/logo-small.svg +7 -0
  7. data/app/assets/javascripts/dispatch_policy/turbo.es2017-umd.min.js +35 -0
  8. data/app/assets/stylesheets/dispatch_policy/application.css +294 -0
  9. data/app/controllers/dispatch_policy/application_controller.rb +45 -1
  10. data/app/controllers/dispatch_policy/assets_controller.rb +31 -0
  11. data/app/controllers/dispatch_policy/dashboard_controller.rb +91 -0
  12. data/app/controllers/dispatch_policy/partitions_controller.rb +122 -0
  13. data/app/controllers/dispatch_policy/policies_controller.rb +94 -267
  14. data/app/controllers/dispatch_policy/staged_jobs_controller.rb +9 -0
  15. data/app/models/dispatch_policy/adaptive_concurrency_stats.rb +11 -81
  16. data/app/models/dispatch_policy/inflight_job.rb +12 -0
  17. data/app/models/dispatch_policy/partition.rb +21 -0
  18. data/app/models/dispatch_policy/staged_job.rb +4 -97
  19. data/app/models/dispatch_policy/tick_sample.rb +11 -0
  20. data/app/views/dispatch_policy/dashboard/index.html.erb +109 -0
  21. data/app/views/dispatch_policy/partitions/index.html.erb +63 -0
  22. data/app/views/dispatch_policy/partitions/show.html.erb +106 -0
  23. data/app/views/dispatch_policy/policies/index.html.erb +15 -37
  24. data/app/views/dispatch_policy/policies/show.html.erb +139 -223
  25. data/app/views/dispatch_policy/shared/_capacity.html.erb +67 -0
  26. data/app/views/dispatch_policy/shared/_hints.html.erb +13 -0
  27. data/app/views/dispatch_policy/shared/_partition_row.html.erb +12 -0
  28. data/app/views/dispatch_policy/staged_jobs/show.html.erb +31 -0
  29. data/app/views/layouts/dispatch_policy/application.html.erb +164 -231
  30. data/config/routes.rb +21 -2
  31. data/db/migrate/20260501000001_create_dispatch_policy_tables.rb +103 -0
  32. data/lib/dispatch_policy/assets.rb +38 -0
  33. data/lib/dispatch_policy/bypass.rb +23 -0
  34. data/lib/dispatch_policy/config.rb +85 -0
  35. data/lib/dispatch_policy/context.rb +50 -0
  36. data/lib/dispatch_policy/cursor_pagination.rb +121 -0
  37. data/lib/dispatch_policy/decision.rb +22 -0
  38. data/lib/dispatch_policy/engine.rb +5 -27
  39. data/lib/dispatch_policy/forwarder.rb +63 -0
  40. data/lib/dispatch_policy/gate.rb +10 -38
  41. data/lib/dispatch_policy/gates/adaptive_concurrency.rb +99 -97
  42. data/lib/dispatch_policy/gates/concurrency.rb +45 -26
  43. data/lib/dispatch_policy/gates/throttle.rb +65 -41
  44. data/lib/dispatch_policy/inflight_tracker.rb +174 -0
  45. data/lib/dispatch_policy/job_extension.rb +155 -0
  46. data/lib/dispatch_policy/operator_hints.rb +126 -0
  47. data/lib/dispatch_policy/pipeline.rb +48 -0
  48. data/lib/dispatch_policy/policy.rb +61 -59
  49. data/lib/dispatch_policy/policy_dsl.rb +120 -0
  50. data/lib/dispatch_policy/railtie.rb +35 -0
  51. data/lib/dispatch_policy/registry.rb +46 -0
  52. data/lib/dispatch_policy/repository.rb +723 -0
  53. data/lib/dispatch_policy/serializer.rb +36 -0
  54. data/lib/dispatch_policy/tick.rb +260 -256
  55. data/lib/dispatch_policy/tick_loop.rb +59 -26
  56. data/lib/dispatch_policy/version.rb +1 -1
  57. data/lib/dispatch_policy.rb +72 -52
  58. data/lib/generators/dispatch_policy/install/install_generator.rb +70 -0
  59. data/lib/generators/dispatch_policy/install/templates/create_dispatch_policy_tables.rb.tt +95 -0
  60. data/lib/generators/dispatch_policy/install/templates/dispatch_tick_loop_job.rb.tt +53 -0
  61. data/lib/generators/dispatch_policy/install/templates/initializer.rb.tt +11 -0
  62. metadata +134 -42
  63. data/app/models/dispatch_policy/partition_inflight_count.rb +0 -42
  64. data/app/models/dispatch_policy/partition_observation.rb +0 -76
  65. data/app/models/dispatch_policy/throttle_bucket.rb +0 -41
  66. data/db/migrate/20260424000001_create_dispatch_policy_tables.rb +0 -80
  67. data/db/migrate/20260424000002_create_adaptive_concurrency_stats.rb +0 -22
  68. data/db/migrate/20260424000003_create_adaptive_concurrency_samples.rb +0 -25
  69. data/db/migrate/20260424000004_rename_samples_to_partition_observations.rb +0 -32
  70. data/db/migrate/20260425000001_add_duration_to_partition_observations.rb +0 -8
  71. data/lib/dispatch_policy/active_job_perform_all_later_patch.rb +0 -32
  72. data/lib/dispatch_policy/dispatch_context.rb +0 -53
  73. data/lib/dispatch_policy/dispatchable.rb +0 -123
  74. data/lib/dispatch_policy/gates/fair_interleave.rb +0 -32
  75. data/lib/dispatch_policy/gates/global_cap.rb +0 -26
@@ -0,0 +1,36 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "json"
4
+
5
+ module DispatchPolicy
6
+ module Serializer
7
+ module_function
8
+
9
+ # Serialize an ActiveJob instance for storage in staged_jobs.job_data.
10
+ # Returns a Ruby hash compatible with PostgreSQL jsonb (string keys).
11
+ def serialize(job)
12
+ job.serialize
13
+ end
14
+
15
+ # Deserialize stored job_data into a fresh ActiveJob instance ready
16
+ # to be enqueued via `#enqueue`.
17
+ def deserialize(payload)
18
+ job_class = payload["job_class"] || payload[:job_class]
19
+ raise InvalidPolicy, "missing job_class in stored payload" unless job_class
20
+
21
+ klass = job_class.constantize
22
+ klass.deserialize(payload)
23
+ end
24
+
25
+ def dump_jsonb(value)
26
+ JSON.dump(value)
27
+ end
28
+
29
+ def load_jsonb(text)
30
+ return text if text.is_a?(Hash) || text.is_a?(Array)
31
+ return {} if text.nil? || text == ""
32
+
33
+ JSON.parse(text)
34
+ end
35
+ end
36
+ end
@@ -1,301 +1,305 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module DispatchPolicy
4
+ # One pass of admission for a single policy.
5
+ #
6
+ # Records a row in dispatch_policy_tick_samples at the end so the engine UI
7
+ # can show throughput, denial reasons, and tick duration without sampling
8
+ # on the read path.
4
9
  class Tick
5
- THROTTLE_ZERO_THRESHOLD = 0.001
10
+ Result = Struct.new(:partitions_seen, :jobs_admitted, keyword_init: true)
6
11
 
7
- # Single admission pass: fetch pending staged jobs per policy, evaluate
8
- # gates, mark survivors as admitted, then enqueue them on the real
9
- # backend outside the locking transaction.
10
- def self.run(policy_name: nil)
11
- return 0 unless DispatchPolicy.enabled?
12
-
13
- pending_enqueue = []
14
-
15
- StagedJob.transaction do
16
- active_policies(policy_name).each do |pname|
17
- policy = lookup_policy(pname)
18
- next unless policy
12
+ def self.run(policy_name:, shard: nil)
13
+ new(policy_name, shard: shard).call
14
+ end
19
15
 
20
- batch = fetch_batch(policy)
21
- next if batch.empty?
16
+ def initialize(policy_name, shard: nil)
17
+ @policy_name = policy_name
18
+ @shard = shard
19
+ @policy = DispatchPolicy.registry.fetch(policy_name) || raise(InvalidPolicy, "unknown policy #{policy_name.inspect}")
20
+ @config = DispatchPolicy.config
21
+ end
22
22
 
23
- pending_enqueue.concat(run_policy(policy, batch))
24
- end
25
- end
23
+ def call
24
+ started_at = monotonic_now_ms
25
+ partitions_seen = 0
26
+ partitions_admitted = 0
27
+ partitions_denied = 0
28
+ jobs_admitted = 0
29
+ forward_failures = 0
30
+ denied_reasons = Hash.new(0)
31
+
32
+ partitions = Repository.claim_partitions(
33
+ policy_name: @policy_name,
34
+ shard: @shard,
35
+ limit: @config.partition_batch_size
36
+ )
26
37
 
27
- admitted_count = 0
28
- pending_enqueue.each do |staged, job|
29
- begin
30
- job.enqueue(_bypass_staging: true)
31
- # ActiveJob adapters report a polite failure by setting
32
- # enqueue_error and leaving successfully_enqueued? false
33
- # instead of raising. Without this check the staged row
34
- # would stay marked admitted while the adapter never queued
35
- # the job — losing it silently.
36
- if job.successfully_enqueued?
37
- admitted_count += 1
38
- else
39
- Rails.logger&.warn(
40
- "[DispatchPolicy] adapter did not enqueue staged=#{staged.id}: " \
41
- "#{job.enqueue_error&.class}: #{job.enqueue_error&.message}"
42
- )
43
- revert_admission(staged)
44
- end
45
- rescue StandardError => e
46
- Rails.logger&.error("[DispatchPolicy] enqueue failed staged=#{staged.id}: #{e.class}: #{e.message}")
47
- revert_admission(staged)
38
+ # Reorder by least-recent-admit-weighted (EWMA decayed_admits ASC)
39
+ # so under-admitted partitions get first crack at the tick budget.
40
+ # claim_partitions ALREADY enforced anti-stagnation via
41
+ # last_checked_at — every partition with pending is visited within
42
+ # ⌈active_partitions / partition_batch_size⌉ ticks regardless of
43
+ # decayed_admits. Reordering here only decides order *inside* this
44
+ # already-fair selection.
45
+ sort_partitions_for_fairness!(partitions)
46
+
47
+ # Per-partition fair share. When tick_admission_budget is set, we
48
+ # divide it evenly across the partitions we just claimed. Otherwise
49
+ # the legacy admission_batch_size is the per-partition ceiling.
50
+ #
51
+ # We deliberately do NOT clamp fair_share to a minimum of 1 when
52
+ # tick_cap < N. The hard global cap wins over a per-partition
53
+ # admit floor; partitions that don't admit this tick are still
54
+ # visited (last_checked_at bumped) and re-visited next tick when
55
+ # they'll be at the front of the in-tick decay order.
56
+ # Anti-stagnation comes from claim_partitions, not from forcing
57
+ # an admit on every claimed partition.
58
+ tick_cap = @policy.tick_admission_budget || @config.tick_admission_budget
59
+ per_part = @policy.admission_batch_size || @config.admission_batch_size
60
+ fair_share = if tick_cap && partitions.any?
61
+ (tick_cap.to_f / partitions.size).ceil
62
+ else
63
+ per_part
64
+ end
65
+
66
+ pending_denies = []
67
+ admitted_per_partition = Hash.new(0)
68
+ used = 0
69
+
70
+ partitions.each do |partition|
71
+ partitions_seen += 1
72
+
73
+ if tick_cap && used >= tick_cap
74
+ # Global cap exhausted in pass-1. The partition is still
75
+ # observed (claim_partitions bumped its last_checked_at), so
76
+ # the round-robin invariant for anti-stagnation holds; we
77
+ # just admit nothing this tick.
78
+ partitions_denied += 1
79
+ denied_reasons["tick_cap_exhausted"] += 1
80
+ # Push this partition to the deny path so its gate state
81
+ # still gets persisted — the pipeline already evaluated it
82
+ # in admit_partition... actually we haven't called admit yet.
83
+ # Skip: not adding to pending_denies because the pipeline
84
+ # didn't run, no gate_state_patch to flush.
85
+ next
48
86
  end
49
- end
50
87
 
51
- admitted_count
52
- end
53
-
54
- def self.prune_idle_partitions
55
- ttl = DispatchPolicy.config.partition_idle_ttl
56
- return if ttl.nil? || ttl <= 0
88
+ budget_for_this = if tick_cap
89
+ [fair_share, tick_cap - used].min
90
+ else
91
+ fair_share
92
+ end
93
+ budget_for_this = 0 if budget_for_this.negative?
57
94
 
58
- cutoff = Time.current - ttl
59
- PartitionInflightCount.where(in_flight: 0).where("updated_at < ?", cutoff).delete_all
60
- ThrottleBucket.where("tokens <= ? AND refilled_at < ?", THROTTLE_ZERO_THRESHOLD, cutoff).delete_all
61
- end
95
+ outcome = admit_partition(partition, pending_denies, max_budget: budget_for_this)
96
+ admitted_per_partition[partition["partition_key"]] = outcome[:admitted]
62
97
 
63
- def self.prune_orphan_gate_rows
64
- [ PartitionInflightCount, ThrottleBucket ].each do |model|
65
- model.distinct.pluck(:policy_name, :gate_name).each do |policy_name, gate_name|
66
- policy = lookup_policy(policy_name)
67
- next if policy && policy.gates.any? { |g| g.name == gate_name.to_sym }
98
+ jobs_admitted += outcome[:admitted]
99
+ forward_failures += outcome[:failures]
100
+ used += outcome[:admitted]
68
101
 
69
- model.where(policy_name: policy_name, gate_name: gate_name).delete_all
102
+ if outcome[:admitted].positive?
103
+ partitions_admitted += 1
104
+ else
105
+ partitions_denied += 1
106
+ outcome[:reasons].each { |r| denied_reasons[r] += 1 }
70
107
  end
71
108
  end
72
- end
73
109
 
74
- def self.reap
75
- StagedJob.expired_leases.find_each do |staged|
76
- (staged.partitions || {}).each do |gate_name, partition_key|
77
- policy = lookup_policy(staged.policy_name)
78
- gate = policy&.gates&.find { |g| g.name == gate_name.to_sym }
79
- next unless gate&.tracks_inflight?
80
-
81
- PartitionInflightCount.decrement(
82
- policy_name: staged.policy_name,
83
- gate_name: gate_name.to_s,
84
- partition_key: partition_key.to_s
85
- )
110
+ # Pass-2: redistribution. Pass-1 may have left budget unused if
111
+ # some partitions had less pending than their fair share. Walk the
112
+ # claimed partitions (still in decay-sorted order) and offer the
113
+ # leftover to whoever filled their fair share in pass-1 — a signal
114
+ # they had more pending than we let them admit.
115
+ if tick_cap
116
+ remaining = tick_cap - used
117
+ if remaining.positive?
118
+ partitions.each do |p|
119
+ break if remaining <= 0
120
+ next if admitted_per_partition[p["partition_key"]] < fair_share
121
+
122
+ extra_cap = [remaining, fair_share].min
123
+ outcome = admit_partition(p, pending_denies, max_budget: extra_cap)
124
+ jobs_admitted += outcome[:admitted]
125
+ forward_failures += outcome[:failures]
126
+ admitted_per_partition[p["partition_key"]] += outcome[:admitted]
127
+ remaining -= outcome[:admitted]
128
+ end
86
129
  end
87
- staged.update!(lease_expires_at: nil, completed_at: Time.current)
88
130
  end
89
- end
90
131
 
91
- def self.release(policy_name:, partitions:)
92
- partitions.each do |gate_name, partition_key|
93
- policy = lookup_policy(policy_name)
94
- gate = policy&.gates&.find { |g| g.name == gate_name.to_sym }
95
- next unless gate&.tracks_inflight?
96
-
97
- PartitionInflightCount.decrement(
98
- policy_name: policy_name,
99
- gate_name: gate_name.to_s,
100
- partition_key: partition_key.to_s
101
- )
102
- end
103
- end
132
+ flush_denies!(pending_denies) if pending_denies.any?
133
+
134
+ duration_ms = monotonic_now_ms - started_at
104
135
 
105
- def self.active_policies(policy_name)
106
- return [ policy_name ] if policy_name
136
+ record_sample!(
137
+ duration_ms: duration_ms,
138
+ partitions_seen: partitions_seen,
139
+ partitions_admitted: partitions_admitted,
140
+ partitions_denied: partitions_denied,
141
+ jobs_admitted: jobs_admitted,
142
+ forward_failures: forward_failures,
143
+ denied_reasons: denied_reasons
144
+ )
107
145
 
108
- StagedJob.pending
109
- .where("not_before_at IS NULL OR not_before_at <= ?", Time.current)
110
- .distinct
111
- .pluck(:policy_name)
146
+ Result.new(partitions_seen: partitions_seen, jobs_admitted: jobs_admitted)
112
147
  end
113
148
 
114
- def self.fetch_batch(policy)
115
- if policy.round_robin?
116
- if policy.round_robin_weight == :time
117
- fetch_time_weighted_batch(policy)
118
- else
119
- fetch_round_robin_batch(policy)
120
- end
121
- else
122
- fetch_plain_batch(policy)
149
+ private
150
+
151
+ # In-place sort by current decayed_admits ASC, computed in Ruby from
152
+ # the row's stored decayed_admits + the elapsed time since
153
+ # decayed_admits_at. We do this here (rather than in the SQL of
154
+ # claim_partitions) because:
155
+ #
156
+ # - claim_partitions's ORDER BY is anti-stagnation (last_checked_at
157
+ # NULLS FIRST); reordering there would bias selection itself,
158
+ # reintroducing the stagnation risk.
159
+ # - The math is cheap on N ≤ partition_batch_size rows already in
160
+ # memory.
161
+ def sort_partitions_for_fairness!(partitions)
162
+ half_life = @policy.fairness_half_life_seconds || @config.fairness_half_life_seconds
163
+ return partitions if half_life.nil? || half_life <= 0
164
+
165
+ tau = half_life.to_f / Math.log(2)
166
+ now = Time.current.to_f
167
+
168
+ partitions.sort_by! do |p|
169
+ last_t = decayed_admits_epoch(p["decayed_admits_at"]) || now
170
+ elapsed = [now - last_t, 0.0].max
171
+ (p["decayed_admits"] || 0.0).to_f * Math.exp(-elapsed / tau)
123
172
  end
124
173
  end
125
174
 
126
- def self.fetch_plain_batch(policy)
127
- StagedJob.pending
128
- .where(policy_name: policy.name)
129
- .where("not_before_at IS NULL OR not_before_at <= ?", Time.current)
130
- .order(:priority, :staged_at)
131
- .limit(DispatchPolicy.config.batch_size)
132
- .lock("FOR UPDATE SKIP LOCKED")
133
- .to_a
175
+ def decayed_admits_epoch(value)
176
+ return nil if value.nil?
177
+ return value.to_f if value.is_a?(Numeric)
178
+ return value.to_time.to_f if value.respond_to?(:to_time)
179
+ Time.parse(value.to_s).to_f
180
+ rescue ArgumentError, TypeError
181
+ nil
134
182
  end
135
183
 
136
- def self.fetch_round_robin_batch(policy)
137
- quantum = DispatchPolicy.config.round_robin_quantum
138
- batch_size = DispatchPolicy.config.batch_size
139
- now = Time.current
140
-
141
- sql = <<~SQL.squish
142
- SELECT rows.*
143
- FROM (
144
- SELECT DISTINCT round_robin_key
145
- FROM dispatch_policy_staged_jobs
146
- WHERE policy_name = ?
147
- AND admitted_at IS NULL
148
- AND round_robin_key IS NOT NULL
149
- AND (not_before_at IS NULL OR not_before_at <= ?)
150
- ) AS keys
151
- CROSS JOIN LATERAL (
152
- SELECT *
153
- FROM dispatch_policy_staged_jobs
154
- WHERE policy_name = ?
155
- AND admitted_at IS NULL
156
- AND round_robin_key = keys.round_robin_key
157
- AND (not_before_at IS NULL OR not_before_at <= ?)
158
- ORDER BY priority, staged_at
159
- LIMIT ?
160
- FOR UPDATE SKIP LOCKED
161
- ) AS rows
162
- LIMIT ?
163
- SQL
164
-
165
- batch = StagedJob.find_by_sql([ sql, policy.name, now, policy.name, now, quantum, batch_size ])
166
-
167
- remaining = batch_size - batch.size
168
- return batch if remaining <= 0
169
-
170
- top_up = StagedJob.pending
171
- .where(policy_name: policy.name)
172
- .where("not_before_at IS NULL OR not_before_at <= ?", now)
173
- .where.not(id: batch.map(&:id))
174
- .order(:priority, :staged_at)
175
- .limit(remaining)
176
- .lock("FOR UPDATE SKIP LOCKED")
177
- .to_a
178
-
179
- batch + top_up
180
- end
184
+ def admit_partition(partition, pending_denies, max_budget:)
185
+ ctx = Context.wrap(partition["context"])
186
+ pipe = Pipeline.new(@policy)
187
+ result = pipe.call(ctx, partition, max_budget)
188
+
189
+ # Pure-deny path (gate said no capacity for this partition this tick).
190
+ # Defer the partition state UPDATE to the bulk flush at the end of
191
+ # the tick instead of issuing a per-partition statement now.
192
+ if result.admit_count.zero?
193
+ pending_denies << {
194
+ policy_name: @policy_name,
195
+ partition_key: partition["partition_key"],
196
+ gate_state_patch: result.gate_state_patch,
197
+ retry_after: result.retry_after
198
+ }
199
+ return { admitted: 0, failures: 0, reasons: deduce_reasons(result) }
200
+ end
181
201
 
182
- # Time-weighted variant of round-robin: instead of an equal quantum
183
- # per active partition, allocate quanta proportional to the inverse
184
- # of recently-consumed compute time. Solo partitions get the full
185
- # batch_size; competing partitions get slices that bias admission
186
- # toward whoever has consumed less, so total compute time stays
187
- # balanced even when one tenant's backlog is much bigger than
188
- # another's. Falls back to the same trailing top-up as the equal
189
- # round-robin so we never under-fill the batch when only a few
190
- # partitions are active.
191
- DEFAULT_TIME_SHARE_DURATION_MS = 100
192
-
193
- def self.fetch_time_weighted_batch(policy)
194
- batch_size = DispatchPolicy.config.batch_size
195
- now = Time.current
196
-
197
- partitions = StagedJob.pending
198
- .where(policy_name: policy.name)
199
- .where("not_before_at IS NULL OR not_before_at <= ?", now)
200
- .where.not(round_robin_key: nil)
201
- .distinct
202
- .pluck(:round_robin_key)
203
-
204
- return fetch_plain_batch(policy) if partitions.empty?
205
-
206
- consumed = PartitionObservation.consumed_ms_by_partition(
207
- policy_name: policy.name,
208
- partition_keys: partitions,
209
- window: policy.round_robin_window
210
- )
202
+ admitted = 0
203
+ half_life = @policy.fairness_half_life_seconds || @config.fairness_half_life_seconds
204
+
205
+ Repository.with_connection do
206
+ ActiveRecord::Base.transaction(requires_new: true) do
207
+ rows = Repository.claim_staged_jobs!(
208
+ policy_name: @policy_name,
209
+ partition_key: partition["partition_key"],
210
+ limit: result.admit_count,
211
+ gate_state_patch: result.gate_state_patch,
212
+ retry_after: result.retry_after,
213
+ half_life_seconds: half_life
214
+ )
211
215
 
212
- # Inverse-of-consumed weights, with a floor so a brand-new partition
213
- # (no observations) doesn't dominate to infinity.
214
- weights = partitions.each_with_object({}) do |key, acc|
215
- consumed_ms = consumed.dig(key, :consumed_ms) || 0
216
- denom = [ consumed_ms, DEFAULT_TIME_SHARE_DURATION_MS ].max
217
- acc[key] = 1.0 / denom
218
- end
219
- total_weight = weights.values.sum
220
- quanta = weights.transform_values do |w|
221
- [ (batch_size * w / total_weight).floor, 1 ].max
216
+ # `claim_staged_jobs!` always runs `record_partition_admit!` so
217
+ # the partition's counters and gate_state commit even when the
218
+ # actual DELETE returned zero rows (e.g. all staged rows are
219
+ # scheduled in the future, or another tick raced us to them).
220
+ next if rows.empty?
221
+
222
+ # Pre-insert an inflight row per admitted job so the concurrency
223
+ # gate sees them immediately. With a concurrency gate, use its
224
+ # (coarser) partition key so the gate's COUNT(*) keeps aggregating
225
+ # correctly across staged sub-partitions.
226
+ concurrency_gate = @policy.gates.find { |g| g.name == :concurrency }
227
+ inflight_rows = rows.filter_map do |row|
228
+ ajid = row.dig("job_data", "job_id")
229
+ next unless ajid
230
+
231
+ key = if concurrency_gate
232
+ concurrency_gate.inflight_partition_key(@policy_name, Context.wrap(row["context"]))
233
+ else
234
+ row["partition_key"]
235
+ end
236
+ { policy_name: @policy_name, partition_key: key, active_job_id: ajid }
237
+ end
238
+ Repository.insert_inflight!(inflight_rows) if inflight_rows.any?
239
+
240
+ # Re-enqueue to the real adapter *inside this transaction*. The
241
+ # adapter (good_job / solid_queue) shares ActiveRecord::Base's
242
+ # connection, so its INSERT into good_jobs / solid_queue_jobs
243
+ # participates in the same TX. If anything raises (deserialize,
244
+ # adapter error, network), the whole TX rolls back atomically:
245
+ # staged_jobs return, inflight rows vanish, partition counters
246
+ # revert, and the adapter rows are also reverted. This is the
247
+ # at-least-once guarantee — there is no window where staged is
248
+ # gone but the adapter never received the job.
249
+ Forwarder.dispatch(rows)
250
+ admitted = rows.size
251
+ end
222
252
  end
223
253
 
224
- batch = []
225
- partitions.each do |key|
226
- rows = StagedJob.pending
227
- .where(policy_name: policy.name, round_robin_key: key)
228
- .where("not_before_at IS NULL OR not_before_at <= ?", now)
229
- .order(:priority, :staged_at)
230
- .limit(quanta[key])
231
- .lock("FOR UPDATE SKIP LOCKED")
232
- .to_a
233
- batch.concat(rows)
234
- break if batch.size >= batch_size
254
+ if admitted.zero?
255
+ { admitted: 0, failures: 0, reasons: ["no_rows_claimed"] }
256
+ else
257
+ { admitted: admitted, failures: 0, reasons: [] }
235
258
  end
236
-
237
- remaining = batch_size - batch.size
238
- return batch if remaining <= 0 || batch.empty?
239
-
240
- top_up = StagedJob.pending
241
- .where(policy_name: policy.name)
242
- .where("not_before_at IS NULL OR not_before_at <= ?", now)
243
- .where.not(id: batch.map(&:id))
244
- .order(:priority, :staged_at)
245
- .limit(remaining)
246
- .lock("FOR UPDATE SKIP LOCKED")
247
- .to_a
248
-
249
- batch + top_up
250
- end
251
-
252
- def self.lookup_policy(policy_name)
253
- job_class = DispatchPolicy.registry[policy_name] || autoload_job_for(policy_name)
254
- return nil unless job_class
255
- job_class.resolved_dispatch_policy
259
+ rescue StandardError => e
260
+ DispatchPolicy.config.logger&.error(
261
+ "[dispatch_policy] forward failed for #{@policy_name}/#{partition['partition_key']}: " \
262
+ "#{e.class}: #{e.message}"
263
+ )
264
+ { admitted: 0, failures: 1, reasons: ["forward_failed"] }
256
265
  end
257
266
 
258
- def self.autoload_job_for(policy_name)
259
- const_name = policy_name.tr("-", "/").camelize
260
- const_name.safe_constantize
261
- DispatchPolicy.registry[policy_name]
267
+ def flush_denies!(entries)
268
+ Repository.with_connection { Repository.bulk_record_partition_denies!(entries) }
269
+ rescue StandardError => e
270
+ DispatchPolicy.config.logger&.error(
271
+ "[dispatch_policy] bulk_record_partition_denies failed: #{e.class}: #{e.message}"
272
+ )
262
273
  end
263
274
 
264
- def self.run_policy(policy, batch)
265
- context = DispatchContext.new(policy: policy, batch: batch)
266
- survivors = batch
267
- policy.gates.each do |gate|
268
- survivors = gate.filter(survivors, context)
275
+ # When admit_count was 0, the Pipeline's `reasons` array contains entries
276
+ # like "throttle:rate=0", "concurrency:concurrency_full". We strip the
277
+ # `gate:` prefix's value separator so callers see "throttle" / "concurrency_full".
278
+ def deduce_reasons(result)
279
+ reasons = result.reasons.map do |s|
280
+ gate, msg = s.split(":", 2)
281
+ msg.presence || gate
269
282
  end
283
+ reasons << "no_capacity" if reasons.empty?
284
+ reasons
285
+ end
270
286
 
271
- survivors.map do |staged|
272
- partitions = context.partitions_for(staged)
273
-
274
- partitions.each do |gate_name, partition_key|
275
- gate = policy.gates.find { |g| g.name == gate_name.to_sym }
276
- next unless gate&.tracks_inflight?
287
+ def record_sample!(**fields)
288
+ pending_total = DispatchPolicy::Partition.for_policy(@policy_name).sum(:pending_count)
289
+ inflight_total = DispatchPolicy::InflightJob.where(policy_name: @policy_name).count
277
290
 
278
- PartitionInflightCount.increment(
279
- policy_name: policy.name,
280
- gate_name: gate_name.to_s,
281
- partition_key: partition_key.to_s
282
- )
283
- end
284
-
285
- job = staged.mark_admitted!(partitions: partitions)
286
- [ staged, job ]
287
- end
291
+ Repository.record_tick_sample!(
292
+ policy_name: @policy_name,
293
+ pending_total: pending_total,
294
+ inflight_total: inflight_total,
295
+ **fields
296
+ )
297
+ rescue StandardError => e
298
+ DispatchPolicy.config.logger&.warn("[dispatch_policy] failed to record tick sample: #{e.class}: #{e.message}")
288
299
  end
289
300
 
290
- def self.revert_admission(staged)
291
- partitions = staged.partitions || {}
292
- release(policy_name: staged.policy_name, partitions: partitions)
293
- staged.update_columns(
294
- admitted_at: nil,
295
- lease_expires_at: nil,
296
- active_job_id: nil,
297
- partitions: {}
298
- )
301
+ def monotonic_now_ms
302
+ (Process.clock_gettime(Process::CLOCK_MONOTONIC) * 1000).to_i
299
303
  end
300
304
  end
301
305
  end