dispatch_policy 0.2.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +98 -28
  3. data/MIT-LICENSE +16 -17
  4. data/README.md +452 -388
  5. data/app/assets/images/dispatch_policy/logo-large.svg +9 -0
  6. data/app/assets/images/dispatch_policy/logo-small.svg +7 -0
  7. data/app/assets/javascripts/dispatch_policy/turbo.es2017-umd.min.js +35 -0
  8. data/app/assets/stylesheets/dispatch_policy/application.css +294 -0
  9. data/app/controllers/dispatch_policy/application_controller.rb +45 -1
  10. data/app/controllers/dispatch_policy/assets_controller.rb +31 -0
  11. data/app/controllers/dispatch_policy/dashboard_controller.rb +91 -0
  12. data/app/controllers/dispatch_policy/partitions_controller.rb +122 -0
  13. data/app/controllers/dispatch_policy/policies_controller.rb +94 -267
  14. data/app/controllers/dispatch_policy/staged_jobs_controller.rb +9 -0
  15. data/app/models/dispatch_policy/adaptive_concurrency_stats.rb +11 -81
  16. data/app/models/dispatch_policy/inflight_job.rb +12 -0
  17. data/app/models/dispatch_policy/partition.rb +21 -0
  18. data/app/models/dispatch_policy/staged_job.rb +4 -97
  19. data/app/models/dispatch_policy/tick_sample.rb +11 -0
  20. data/app/views/dispatch_policy/dashboard/index.html.erb +109 -0
  21. data/app/views/dispatch_policy/partitions/index.html.erb +63 -0
  22. data/app/views/dispatch_policy/partitions/show.html.erb +106 -0
  23. data/app/views/dispatch_policy/policies/index.html.erb +15 -37
  24. data/app/views/dispatch_policy/policies/show.html.erb +139 -223
  25. data/app/views/dispatch_policy/shared/_capacity.html.erb +67 -0
  26. data/app/views/dispatch_policy/shared/_hints.html.erb +13 -0
  27. data/app/views/dispatch_policy/shared/_partition_row.html.erb +12 -0
  28. data/app/views/dispatch_policy/staged_jobs/show.html.erb +31 -0
  29. data/app/views/layouts/dispatch_policy/application.html.erb +164 -231
  30. data/config/routes.rb +21 -2
  31. data/db/migrate/20260501000001_create_dispatch_policy_tables.rb +103 -0
  32. data/lib/dispatch_policy/assets.rb +38 -0
  33. data/lib/dispatch_policy/bypass.rb +23 -0
  34. data/lib/dispatch_policy/config.rb +85 -0
  35. data/lib/dispatch_policy/context.rb +50 -0
  36. data/lib/dispatch_policy/cursor_pagination.rb +121 -0
  37. data/lib/dispatch_policy/decision.rb +22 -0
  38. data/lib/dispatch_policy/engine.rb +5 -27
  39. data/lib/dispatch_policy/forwarder.rb +63 -0
  40. data/lib/dispatch_policy/gate.rb +10 -38
  41. data/lib/dispatch_policy/gates/adaptive_concurrency.rb +99 -97
  42. data/lib/dispatch_policy/gates/concurrency.rb +45 -26
  43. data/lib/dispatch_policy/gates/throttle.rb +65 -41
  44. data/lib/dispatch_policy/inflight_tracker.rb +174 -0
  45. data/lib/dispatch_policy/job_extension.rb +155 -0
  46. data/lib/dispatch_policy/operator_hints.rb +126 -0
  47. data/lib/dispatch_policy/pipeline.rb +48 -0
  48. data/lib/dispatch_policy/policy.rb +61 -59
  49. data/lib/dispatch_policy/policy_dsl.rb +120 -0
  50. data/lib/dispatch_policy/railtie.rb +35 -0
  51. data/lib/dispatch_policy/registry.rb +46 -0
  52. data/lib/dispatch_policy/repository.rb +723 -0
  53. data/lib/dispatch_policy/serializer.rb +36 -0
  54. data/lib/dispatch_policy/tick.rb +260 -256
  55. data/lib/dispatch_policy/tick_loop.rb +59 -26
  56. data/lib/dispatch_policy/version.rb +1 -1
  57. data/lib/dispatch_policy.rb +72 -52
  58. data/lib/generators/dispatch_policy/install/install_generator.rb +70 -0
  59. data/lib/generators/dispatch_policy/install/templates/create_dispatch_policy_tables.rb.tt +95 -0
  60. data/lib/generators/dispatch_policy/install/templates/dispatch_tick_loop_job.rb.tt +53 -0
  61. data/lib/generators/dispatch_policy/install/templates/initializer.rb.tt +11 -0
  62. metadata +134 -42
  63. data/app/models/dispatch_policy/partition_inflight_count.rb +0 -42
  64. data/app/models/dispatch_policy/partition_observation.rb +0 -76
  65. data/app/models/dispatch_policy/throttle_bucket.rb +0 -41
  66. data/db/migrate/20260424000001_create_dispatch_policy_tables.rb +0 -80
  67. data/db/migrate/20260424000002_create_adaptive_concurrency_stats.rb +0 -22
  68. data/db/migrate/20260424000003_create_adaptive_concurrency_samples.rb +0 -25
  69. data/db/migrate/20260424000004_rename_samples_to_partition_observations.rb +0 -32
  70. data/db/migrate/20260425000001_add_duration_to_partition_observations.rb +0 -8
  71. data/lib/dispatch_policy/active_job_perform_all_later_patch.rb +0 -32
  72. data/lib/dispatch_policy/dispatch_context.rb +0 -53
  73. data/lib/dispatch_policy/dispatchable.rb +0 -123
  74. data/lib/dispatch_policy/gates/fair_interleave.rb +0 -32
  75. data/lib/dispatch_policy/gates/global_cap.rb +0 -26
@@ -0,0 +1,723 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "json"
4
+
5
+ module DispatchPolicy
6
+ # SQL access layer for staged_jobs / partitions / inflight_jobs.
7
+ #
8
+ # Hot paths use raw SQL via ActiveRecord::Base.connection so we get
9
+ # `FOR UPDATE SKIP LOCKED`, multi-row UPSERTs, and DELETE … RETURNING
10
+ # without ActiveRecord overhead. Read paths in the engine UI use the
11
+ # AR models in app/models/dispatch_policy/*.
12
+ module Repository
13
+ STAGED_TABLE = "dispatch_policy_staged_jobs"
14
+ PARTITIONS_TABLE = "dispatch_policy_partitions"
15
+ INFLIGHT_TABLE = "dispatch_policy_inflight_jobs"
16
+ SAMPLES_TABLE = "dispatch_policy_tick_samples"
17
+ ADAPTIVE_TABLE = "dispatch_policy_adaptive_concurrency_stats"
18
+
19
+ module_function
20
+
21
+ def connection
22
+ ActiveRecord::Base.connection
23
+ end
24
+
25
+ # Wraps `block` in `connected_to(role: …)` when DispatchPolicy.config
26
+ # .database_role is set. Used by Tick to ensure the admission TX is
27
+ # opened against the same DB role that good_job / solid_queue uses,
28
+ # critical for multi-DB Rails setups (e.g. solid_queue on a separate
29
+ # `:queue` DB) where atomicity only holds when the staging TX and the
30
+ # adapter INSERT share a connection.
31
+ def with_connection
32
+ role = DispatchPolicy.config.database_role
33
+ if role && ActiveRecord::Base.respond_to?(:connected_to)
34
+ ActiveRecord::Base.connected_to(role: role) { yield }
35
+ else
36
+ yield
37
+ end
38
+ end
39
+
40
+ # ----- staging (write path) ------------------------------------------------
41
+
42
+ # Insert one staged_job row + UPSERT its partition. The partition's
43
+ # `context` is refreshed on every call so admission-time gates always
44
+ # see the latest dynamic config.
45
+ #
46
+ # @param policy_name [String]
47
+ # @param partition_key [String]
48
+ # @param queue_name [String, nil]
49
+ # @param job_class [String]
50
+ # @param job_data [Hash]
51
+ # @param context [Hash]
52
+ # @param scheduled_at [Time, nil]
53
+ # @param priority [Integer]
54
+ def stage!(policy_name:, partition_key:, queue_name:, job_class:, job_data:, context:,
55
+ shard: Policy::DEFAULT_SHARD, scheduled_at: nil, priority: 0)
56
+ connection.transaction(requires_new: true) do
57
+ connection.exec_query(
58
+ <<~SQL.squish,
59
+ INSERT INTO #{STAGED_TABLE}
60
+ (policy_name, partition_key, queue_name, job_class, job_data, context, scheduled_at, priority, enqueued_at)
61
+ VALUES ($1, $2, $3, $4, $5::jsonb, $6::jsonb, $7, $8, now())
62
+ SQL
63
+ "stage_job",
64
+ [policy_name, partition_key, queue_name, job_class, JSON.dump(job_data), JSON.dump(context), scheduled_at, priority]
65
+ )
66
+ upsert_partition!(
67
+ policy_name: policy_name,
68
+ partition_key: partition_key,
69
+ queue_name: queue_name,
70
+ shard: shard,
71
+ context: context,
72
+ delta_pending: 1
73
+ )
74
+ end
75
+ true
76
+ end
77
+
78
+ # Bulk version for perform_all_later. Receives an array of hashes with
79
+ # the same keys as #stage!. Performs one INSERT for staged_jobs and
80
+ # one UPSERT per (policy_name, partition_key) group.
81
+ def stage_many!(rows)
82
+ return 0 if rows.empty?
83
+
84
+ connection.transaction(requires_new: true) do
85
+ values_sql = []
86
+ params = []
87
+ rows.each_with_index do |row, idx|
88
+ base = idx * 8
89
+ values_sql << "($#{base + 1}, $#{base + 2}, $#{base + 3}, $#{base + 4}, $#{base + 5}::jsonb, $#{base + 6}::jsonb, $#{base + 7}, $#{base + 8})"
90
+ params.push(
91
+ row[:policy_name],
92
+ row[:partition_key],
93
+ row[:queue_name],
94
+ row[:job_class],
95
+ JSON.dump(row[:job_data]),
96
+ JSON.dump(row[:context] || {}),
97
+ row[:scheduled_at],
98
+ row[:priority] || 0
99
+ )
100
+ end
101
+ connection.exec_query(
102
+ <<~SQL.squish,
103
+ INSERT INTO #{STAGED_TABLE}
104
+ (policy_name, partition_key, queue_name, job_class, job_data, context, scheduled_at, priority)
105
+ VALUES #{values_sql.join(", ")}
106
+ SQL
107
+ "stage_many",
108
+ params
109
+ )
110
+
111
+ rows.group_by { |r| [r[:policy_name], r[:partition_key]] }.each do |(policy_name, partition_key), group|
112
+ upsert_partition!(
113
+ policy_name: policy_name,
114
+ partition_key: partition_key,
115
+ queue_name: group.first[:queue_name],
116
+ shard: group.first[:shard] || Policy::DEFAULT_SHARD,
117
+ context: group.last[:context] || {},
118
+ delta_pending: group.size
119
+ )
120
+ end
121
+ end
122
+ rows.size
123
+ end
124
+
125
+ def upsert_partition!(policy_name:, partition_key:, queue_name:, context:, delta_pending:,
126
+ shard: Policy::DEFAULT_SHARD)
127
+ connection.exec_query(
128
+ <<~SQL.squish,
129
+ INSERT INTO #{PARTITIONS_TABLE}
130
+ (policy_name, partition_key, queue_name, shard, context, context_updated_at,
131
+ pending_count, last_enqueued_at, status, gate_state, created_at, updated_at)
132
+ VALUES ($1, $2, $3, $4, $5::jsonb, now(), $6, now(), 'active', '{}'::jsonb, now(), now())
133
+ ON CONFLICT (policy_name, partition_key) DO UPDATE SET
134
+ context = EXCLUDED.context,
135
+ context_updated_at = EXCLUDED.context_updated_at,
136
+ queue_name = COALESCE(EXCLUDED.queue_name, #{PARTITIONS_TABLE}.queue_name),
137
+ shard = #{PARTITIONS_TABLE}.shard,
138
+ pending_count = #{PARTITIONS_TABLE}.pending_count + EXCLUDED.pending_count,
139
+ last_enqueued_at = EXCLUDED.last_enqueued_at,
140
+ updated_at = now()
141
+ SQL
142
+ "upsert_partition",
143
+ [policy_name, partition_key, queue_name, shard, JSON.dump(context), delta_pending]
144
+ )
145
+ end
146
+
147
+ # ----- tick path -----------------------------------------------------------
148
+
149
+ # Lock + return up to `limit` partitions ready to be evaluated by the tick.
150
+ # Each row's last_checked_at is bumped to now() so the next tick fairly
151
+ # picks others. Locked rows are released when the transaction commits.
152
+ #
153
+ # When `shard` is non-nil, only partitions on that shard are claimed —
154
+ # this lets several tick processes work on the same policy in parallel,
155
+ # one per shard.
156
+ def claim_partitions(policy_name:, limit:, shard: nil)
157
+ params = [policy_name]
158
+ shard_sql = ""
159
+ if shard
160
+ params << shard
161
+ shard_sql = " AND shard = $#{params.size}"
162
+ end
163
+ params << limit
164
+
165
+ sql = <<~SQL.squish
166
+ WITH candidates AS (
167
+ SELECT id FROM #{PARTITIONS_TABLE}
168
+ WHERE policy_name = $1
169
+ AND status = 'active'
170
+ AND pending_count > 0
171
+ AND (next_eligible_at IS NULL OR next_eligible_at <= now())
172
+ #{shard_sql}
173
+ ORDER BY last_checked_at NULLS FIRST, id
174
+ LIMIT $#{params.size}
175
+ FOR UPDATE SKIP LOCKED
176
+ )
177
+ UPDATE #{PARTITIONS_TABLE} p
178
+ SET last_checked_at = now()
179
+ FROM candidates
180
+ WHERE p.id = candidates.id
181
+ RETURNING p.*
182
+ SQL
183
+ result = connection.exec_query(sql, "claim_partitions", params)
184
+ result.to_a.map { |row| normalize_partition(row) }
185
+ end
186
+
187
+ # Atomically claim up to `limit` staged rows for a partition (DELETE …
188
+ # RETURNING) and update the partition's counters / gate_state /
189
+ # next_eligible_at in the same transaction.
190
+ #
191
+ # `limit` MUST be positive: the deny path (no rows to admit) goes
192
+ # through `bulk_record_partition_denies!` instead, which collapses
193
+ # many partitions into a single UPDATE…FROM(VALUES…) at the end of
194
+ # the tick.
195
+ def claim_staged_jobs!(policy_name:, partition_key:, limit:, gate_state_patch:, retry_after:,
196
+ half_life_seconds: nil)
197
+ raise ArgumentError, "claim_staged_jobs! requires limit > 0" unless limit.positive?
198
+
199
+ sql_select = <<~SQL.squish
200
+ WITH claimed AS (
201
+ SELECT id FROM #{STAGED_TABLE}
202
+ WHERE policy_name = $1 AND partition_key = $2
203
+ AND (scheduled_at IS NULL OR scheduled_at <= now())
204
+ ORDER BY priority DESC, scheduled_at NULLS FIRST, id
205
+ LIMIT $3
206
+ FOR UPDATE SKIP LOCKED
207
+ )
208
+ DELETE FROM #{STAGED_TABLE} s
209
+ USING claimed
210
+ WHERE s.id = claimed.id
211
+ RETURNING s.*
212
+ SQL
213
+ rows = connection.exec_query(sql_select, "claim_staged_jobs", [policy_name, partition_key, limit]).to_a
214
+
215
+ record_partition_admit!(
216
+ policy_name: policy_name,
217
+ partition_key: partition_key,
218
+ admitted: rows.size,
219
+ gate_state_patch: gate_state_patch,
220
+ retry_after: retry_after,
221
+ half_life_seconds: half_life_seconds
222
+ )
223
+
224
+ rows.map { |r| normalize_staged(r) }
225
+ end
226
+
227
+ # Per-partition admit-state UPDATE. Runs inside the per-partition
228
+ # admission TX alongside the DELETE, so pending_count / total_admitted
229
+ # / gate_state changes commit atomically with the claim and the
230
+ # adapter handoff. For the deny case use `bulk_record_partition_denies!`.
231
+ #
232
+ # When `half_life_seconds` is non-nil, the row's EWMA decayed_admits
233
+ # counter is also refreshed in the same UPDATE: previous value
234
+ # decays exponentially based on the elapsed wall time since the
235
+ # last update, then `admitted` is added on top. This keeps fairness
236
+ # state atomic with the admit (no separate write, no race) and
237
+ # leaves the partitions row's lock undisturbed.
238
+ def record_partition_admit!(policy_name:, partition_key:, admitted:, gate_state_patch:,
239
+ retry_after:, half_life_seconds: nil)
240
+ next_eligible_sql, next_eligible_params = next_eligible_clause(retry_after)
241
+ gate_state_json = JSON.dump(gate_state_patch || {})
242
+
243
+ params = [policy_name, partition_key, admitted, gate_state_json, *next_eligible_params]
244
+
245
+ if half_life_seconds && half_life_seconds.to_f.positive?
246
+ # decay constant τ such that exp(-Δt/τ) halves every half_life:
247
+ # τ = half_life / ln(2). NULLIF guards a degenerate τ=0.
248
+ decay_idx = params.size + 1
249
+ admitted_idx_for_ewma = 3
250
+ decay_tau = half_life_seconds.to_f / Math.log(2)
251
+ params << decay_tau
252
+ decay_sql = <<~SQL.squish
253
+ decayed_admits = decayed_admits *
254
+ exp(- COALESCE(EXTRACT(EPOCH FROM (now() - decayed_admits_at)), 0)
255
+ / NULLIF($#{decay_idx}::double precision, 0))
256
+ + $#{admitted_idx_for_ewma},
257
+ decayed_admits_at = now(),
258
+ SQL
259
+ else
260
+ decay_sql = ""
261
+ end
262
+
263
+ connection.exec_query(
264
+ <<~SQL.squish,
265
+ UPDATE #{PARTITIONS_TABLE}
266
+ SET pending_count = GREATEST(pending_count - $3, 0),
267
+ total_admitted = total_admitted + $3,
268
+ last_admit_at = CASE WHEN $3 > 0 THEN now() ELSE last_admit_at END,
269
+ gate_state = gate_state || $4::jsonb,
270
+ next_eligible_at = #{next_eligible_sql},
271
+ #{decay_sql}
272
+ updated_at = now()
273
+ WHERE policy_name = $1 AND partition_key = $2
274
+ SQL
275
+ "record_partition_admit",
276
+ params
277
+ )
278
+ end
279
+
280
+ # Bulk-update many partitions whose pipeline this tick decided to deny.
281
+ # One UPDATE…FROM(VALUES…) instead of one UPDATE per partition, which
282
+ # cuts a tick with `partition_batch_size = 50` from ~50 round-trips on
283
+ # the deny path to one. The deny path doesn't touch pending_count or
284
+ # total_admitted (admitted = 0 makes them no-ops in the per-row
285
+ # UPDATE), so we only write gate_state and next_eligible_at here.
286
+ #
287
+ # Each entry: { policy_name:, partition_key:, gate_state_patch:, retry_after: }.
288
+ # Independent per row — the join via FROM(VALUES…) makes the bulk
289
+ # statement equivalent to N sequential UPDATEs in correctness terms;
290
+ # the row-level locks held by `claim_partitions` (FOR UPDATE SKIP
291
+ # LOCKED, last_checked_at bumped) keep concurrent ticks away from the
292
+ # same partitions while we batch.
293
+ def bulk_record_partition_denies!(entries)
294
+ return if entries.empty?
295
+
296
+ values_sql = []
297
+ params = []
298
+ entries.each_with_index do |e, idx|
299
+ base = idx * 4
300
+ values_sql << "($#{base + 1}::text, $#{base + 2}::text, $#{base + 3}::jsonb, $#{base + 4}::numeric)"
301
+ params.push(
302
+ e[:policy_name],
303
+ e[:partition_key],
304
+ JSON.dump(e[:gate_state_patch] || {}),
305
+ e[:retry_after].nil? ? nil : e[:retry_after].to_f.round(3)
306
+ )
307
+ end
308
+
309
+ connection.exec_query(
310
+ <<~SQL.squish,
311
+ UPDATE #{PARTITIONS_TABLE} p
312
+ SET gate_state = p.gate_state || v.gate_state_patch,
313
+ next_eligible_at = CASE
314
+ WHEN v.retry_after_secs IS NULL THEN NULL
315
+ ELSE now() + (v.retry_after_secs || ' seconds')::interval
316
+ END,
317
+ updated_at = now()
318
+ FROM (VALUES #{values_sql.join(", ")})
319
+ AS v(policy_name, partition_key, gate_state_patch, retry_after_secs)
320
+ WHERE p.policy_name = v.policy_name AND p.partition_key = v.partition_key
321
+ SQL
322
+ "bulk_record_partition_denies",
323
+ params
324
+ )
325
+ end
326
+
327
+ # ----- inflight tracking ---------------------------------------------------
328
+
329
+ def insert_inflight!(rows)
330
+ return if rows.empty?
331
+
332
+ values_sql = []
333
+ params = []
334
+ rows.each_with_index do |row, idx|
335
+ base = idx * 3
336
+ values_sql << "($#{base + 1}, $#{base + 2}, $#{base + 3}, now(), now())"
337
+ params.push(row[:policy_name], row[:partition_key], row[:active_job_id])
338
+ end
339
+ connection.exec_query(
340
+ <<~SQL.squish,
341
+ INSERT INTO #{INFLIGHT_TABLE}
342
+ (policy_name, partition_key, active_job_id, admitted_at, heartbeat_at)
343
+ VALUES #{values_sql.join(", ")}
344
+ ON CONFLICT (active_job_id) DO NOTHING
345
+ SQL
346
+ "insert_inflight",
347
+ params
348
+ )
349
+ end
350
+
351
+ def delete_inflight!(active_job_id:)
352
+ connection.exec_query(
353
+ "DELETE FROM #{INFLIGHT_TABLE} WHERE active_job_id = $1",
354
+ "delete_inflight",
355
+ [active_job_id]
356
+ )
357
+ end
358
+
359
+ def heartbeat_inflight!(active_job_id:)
360
+ connection.exec_query(
361
+ "UPDATE #{INFLIGHT_TABLE} SET heartbeat_at = now() WHERE active_job_id = $1",
362
+ "heartbeat_inflight",
363
+ [active_job_id]
364
+ )
365
+ end
366
+
367
+ def count_inflight(policy_name:, partition_key:)
368
+ result = connection.exec_query(
369
+ "SELECT count(*)::int AS n FROM #{INFLIGHT_TABLE} WHERE policy_name = $1 AND partition_key = $2",
370
+ "count_inflight",
371
+ [policy_name, partition_key]
372
+ )
373
+ Integer(result.rows.first.first)
374
+ end
375
+
376
+ def sweep_stale_inflight!(cutoff_seconds:)
377
+ connection.exec_query(
378
+ <<~SQL.squish,
379
+ DELETE FROM #{INFLIGHT_TABLE}
380
+ WHERE heartbeat_at < now() - ($1 || ' seconds')::interval
381
+ SQL
382
+ "sweep_stale_inflight",
383
+ [cutoff_seconds.to_i]
384
+ )
385
+ end
386
+
387
+ # Removes partitions that have no pending staged jobs and have been
388
+ # idle for `cutoff_seconds`. The default cutoff (24h) is well past any
389
+ # reasonable inflight job — concurrency state lives in inflight_jobs
390
+ # and is independent of partition rows, so a recreated partition will
391
+ # re-observe the live in-flight count via the concurrency gate.
392
+ # ----- metrics --------------------------------------------------------------
393
+
394
+ # Records one row per Tick.run with admission and timing aggregates so the
395
+ # operator UI can display rates over time without sampling on the read
396
+ # path.
397
+ def record_tick_sample!(policy_name:, duration_ms:, partitions_seen:, partitions_admitted:,
398
+ partitions_denied:, jobs_admitted:, forward_failures:,
399
+ pending_total:, inflight_total:, denied_reasons:)
400
+ connection.exec_query(
401
+ <<~SQL.squish,
402
+ INSERT INTO #{SAMPLES_TABLE}
403
+ (policy_name, sampled_at, duration_ms, partitions_seen, partitions_admitted,
404
+ partitions_denied, jobs_admitted, forward_failures, pending_total,
405
+ inflight_total, denied_reasons)
406
+ VALUES ($1, now(), $2, $3, $4, $5, $6, $7, $8, $9, $10::jsonb)
407
+ SQL
408
+ "record_tick_sample",
409
+ [policy_name, duration_ms.to_i, partitions_seen.to_i, partitions_admitted.to_i,
410
+ partitions_denied.to_i, jobs_admitted.to_i, forward_failures.to_i,
411
+ pending_total.to_i, inflight_total.to_i, JSON.dump(denied_reasons || {})]
412
+ )
413
+ end
414
+
415
+ # Aggregate counters since `since` (a Time). If `policy_name` is nil,
416
+ # aggregates across all policies. Returns a Hash with summary keys.
417
+ def tick_summary(policy_name: nil, since:)
418
+ where_sql, params = sample_filter(policy_name, since)
419
+ result = connection.exec_query(
420
+ <<~SQL.squish,
421
+ SELECT
422
+ COALESCE(SUM(jobs_admitted), 0)::int AS jobs_admitted,
423
+ COALESCE(SUM(partitions_seen), 0)::int AS partitions_seen,
424
+ COALESCE(SUM(partitions_admitted), 0)::int AS partitions_admitted,
425
+ COALESCE(SUM(partitions_denied), 0)::int AS partitions_denied,
426
+ COALESCE(SUM(forward_failures), 0)::int AS forward_failures,
427
+ COUNT(*)::int AS ticks,
428
+ COALESCE(AVG(duration_ms), 0)::int AS avg_duration_ms,
429
+ COALESCE(MAX(duration_ms), 0)::int AS max_duration_ms,
430
+ MAX(sampled_at) AS last_sampled_at
431
+ FROM #{SAMPLES_TABLE}
432
+ #{where_sql}
433
+ SQL
434
+ "tick_summary",
435
+ params
436
+ )
437
+ row = result.first || {}
438
+ {
439
+ jobs_admitted: row["jobs_admitted"].to_i,
440
+ partitions_seen: row["partitions_seen"].to_i,
441
+ partitions_admitted: row["partitions_admitted"].to_i,
442
+ partitions_denied: row["partitions_denied"].to_i,
443
+ forward_failures: row["forward_failures"].to_i,
444
+ ticks: row["ticks"].to_i,
445
+ avg_duration_ms: row["avg_duration_ms"].to_i,
446
+ max_duration_ms: row["max_duration_ms"].to_i,
447
+ last_sampled_at: row["last_sampled_at"]
448
+ }
449
+ end
450
+
451
+ # Aggregate denied_reasons jsonb across samples in window: returns
452
+ # { "throttle" => 12, "concurrency_full" => 3, ... }
453
+ def denied_reasons_summary(policy_name: nil, since:)
454
+ where_sql, params = sample_filter(policy_name, since)
455
+ result = connection.exec_query(
456
+ <<~SQL.squish,
457
+ SELECT key, SUM(value::int)::int AS total
458
+ FROM #{SAMPLES_TABLE},
459
+ LATERAL jsonb_each_text(denied_reasons)
460
+ #{where_sql}
461
+ GROUP BY key
462
+ ORDER BY total DESC
463
+ SQL
464
+ "denied_reasons_summary",
465
+ params
466
+ )
467
+ result.to_a.each_with_object({}) { |r, h| h[r["key"]] = r["total"].to_i }
468
+ end
469
+
470
+ # Returns time-bucketed series for sparklines. `bucket_seconds` is the
471
+ # bucket width. Each row: { bucket_at:, jobs_admitted:, forward_failures:,
472
+ # pending_total:, ticks: }.
473
+ #
474
+ # `pending_total` is the AVERAGE pending observed across the ticks
475
+ # in that bucket — using AVG (not MAX/last) gives a smoother trend
476
+ # that's resilient to a single outlier sample dragging the bucket up.
477
+ def tick_samples_buckets(policy_name: nil, since:, bucket_seconds: 60)
478
+ where_sql, params = sample_filter(policy_name, since)
479
+ bucket_param_idx = params.size + 1
480
+ params << bucket_seconds.to_i
481
+
482
+ # `date_bin` requires Postgres 14+. We compute the bucket via floor on
483
+ # the epoch instead so the gem also runs on Postgres 12/13.
484
+ result = connection.exec_query(
485
+ <<~SQL.squish,
486
+ SELECT
487
+ to_timestamp(floor(extract(epoch from sampled_at) / $#{bucket_param_idx})::bigint * $#{bucket_param_idx}) AS bucket_at,
488
+ COALESCE(SUM(jobs_admitted), 0)::int AS jobs_admitted,
489
+ COALESCE(SUM(forward_failures), 0)::int AS forward_failures,
490
+ COALESCE(AVG(pending_total), 0)::int AS pending_total,
491
+ COUNT(*)::int AS ticks
492
+ FROM #{SAMPLES_TABLE}
493
+ #{where_sql}
494
+ GROUP BY bucket_at
495
+ ORDER BY bucket_at ASC
496
+ SQL
497
+ "tick_samples_buckets",
498
+ params
499
+ )
500
+ result.to_a.map do |r|
501
+ { bucket_at: r["bucket_at"],
502
+ jobs_admitted: r["jobs_admitted"].to_i,
503
+ forward_failures: r["forward_failures"].to_i,
504
+ pending_total: r["pending_total"].to_i,
505
+ ticks: r["ticks"].to_i }
506
+ end
507
+ end
508
+
509
+ # Direction of a numeric series. Compares the average of the first
510
+ # third to the last third — robust to noise on the ends.
511
+ def self.trend_direction(values, threshold_ratio: 0.10)
512
+ return :flat if values.size < 3
513
+
514
+ n = values.size
515
+ head = values.first(n / 3)
516
+ tail = values.last(n / 3)
517
+ head_avg = head.sum.to_f / head.size
518
+ tail_avg = tail.sum.to_f / tail.size
519
+
520
+ return :flat if head_avg.zero? && tail_avg.zero?
521
+
522
+ delta_ratio = (tail_avg - head_avg) / [head_avg, 1.0].max
523
+ if delta_ratio >= threshold_ratio
524
+ :up
525
+ elsif delta_ratio <= -threshold_ratio
526
+ :down
527
+ else
528
+ :flat
529
+ end
530
+ end
531
+
532
+ # Round-trip statistics across active partitions: how stale is the most-
533
+ # stale partition the tick has yet to revisit? P50/P95/oldest ages help
534
+ # decide if partition_batch_size needs to grow or ticks need sharding.
535
+ def partition_round_trip_stats(policy_name: nil)
536
+ filter_sql = "WHERE p.status = 'active' AND p.pending_count > 0"
537
+ params = []
538
+ if policy_name
539
+ filter_sql += " AND p.policy_name = $1"
540
+ params << policy_name
541
+ end
542
+
543
+ # For ages (now - last_checked_at) the percentile direction inverts:
544
+ # the 95th percentile of *age* corresponds to the 5th percentile of the
545
+ # *timestamp* (the oldest 5% of last_checked_at values). Computing the
546
+ # percentile directly on now()-last_checked_at would be cleaner but
547
+ # PostgreSQL's PERCENTILE_DISC needs an ordered set on a column, so we
548
+ # invert the percentile argument instead.
549
+ result = connection.exec_query(
550
+ <<~SQL.squish,
551
+ SELECT
552
+ COUNT(*)::int AS active_partitions,
553
+ COUNT(*) FILTER (WHERE p.last_checked_at IS NULL)::int AS never_checked,
554
+ COUNT(*) FILTER (WHERE p.next_eligible_at IS NOT NULL AND p.next_eligible_at > now())::int AS in_backoff,
555
+ EXTRACT(EPOCH FROM (now() - MIN(p.last_checked_at)))::float AS oldest_age_seconds,
556
+ EXTRACT(EPOCH FROM (now() - PERCENTILE_DISC(0.5) WITHIN GROUP (ORDER BY p.last_checked_at)))::float AS p50_age_seconds,
557
+ EXTRACT(EPOCH FROM (now() - PERCENTILE_DISC(0.05) WITHIN GROUP (ORDER BY p.last_checked_at)))::float AS p95_age_seconds
558
+ FROM #{PARTITIONS_TABLE} p
559
+ #{filter_sql}
560
+ SQL
561
+ "partition_round_trip_stats",
562
+ params
563
+ )
564
+ row = result.first || {}
565
+ {
566
+ active_partitions: row["active_partitions"].to_i,
567
+ never_checked: row["never_checked"].to_i,
568
+ in_backoff: row["in_backoff"].to_i,
569
+ oldest_age_seconds: row["oldest_age_seconds"]&.to_f,
570
+ p50_age_seconds: row["p50_age_seconds"]&.to_f,
571
+ p95_age_seconds: row["p95_age_seconds"]&.to_f
572
+ }
573
+ end
574
+
575
+ # ----- adaptive_concurrency stats -----------------------------------------
576
+
577
+ # Insert a fresh stats row for the given partition if none exists.
578
+ # Idempotent — runs as `INSERT … ON CONFLICT DO NOTHING`. Cheap to
579
+ # call on every admission so the gate's evaluate path can read
580
+ # current_max safely without checking for existence first.
581
+ def adaptive_seed!(policy_name:, partition_key:, initial_max:)
582
+ connection.exec_query(
583
+ <<~SQL.squish,
584
+ INSERT INTO #{ADAPTIVE_TABLE}
585
+ (policy_name, partition_key, current_max, ewma_latency_ms,
586
+ sample_count, created_at, updated_at)
587
+ VALUES ($1, $2, $3, 0, 0, now(), now())
588
+ ON CONFLICT (policy_name, partition_key) DO NOTHING
589
+ SQL
590
+ "adaptive_seed",
591
+ [policy_name, partition_key, initial_max.to_i]
592
+ )
593
+ end
594
+
595
+ # Fetch the AIMD-tuned cap for a partition. Returns nil when the
596
+ # row doesn't exist yet — caller should fall back to initial_max.
597
+ def adaptive_current_max(policy_name:, partition_key:)
598
+ result = connection.exec_query(
599
+ "SELECT current_max FROM #{ADAPTIVE_TABLE} WHERE policy_name = $1 AND partition_key = $2 LIMIT 1",
600
+ "adaptive_current_max",
601
+ [policy_name, partition_key]
602
+ )
603
+ row = result.first
604
+ row && row["current_max"].to_i
605
+ end
606
+
607
+ # Single-statement EWMA + AIMD update. Concurrent workers can call
608
+ # this in any order without read-modify-write races: every clause
609
+ # reads the row's current value at the start of the UPDATE.
610
+ #
611
+ # ewma_latency_ms_new = ewma_latency_ms * (1 - α) + α * queue_lag_ms
612
+ # current_max_new = GREATEST(min,
613
+ # FAILED? FLOOR(current_max * fail_factor)
614
+ # OVERLOADED? FLOOR(current_max * slow_factor)
615
+ # else current_max + 1)
616
+ def adaptive_record!(policy_name:, partition_key:, queue_lag_ms:, succeeded:,
617
+ alpha:, target_lag_ms:, fail_factor:, slow_factor:, min:)
618
+ connection.exec_query(
619
+ <<~SQL.squish,
620
+ UPDATE #{ADAPTIVE_TABLE}
621
+ SET
622
+ ewma_latency_ms = ewma_latency_ms * (1 - $3::double precision)
623
+ + $3::double precision * $4::double precision,
624
+ sample_count = sample_count + 1,
625
+ current_max = GREATEST($5::int, CASE
626
+ WHEN $6::boolean = FALSE
627
+ THEN FLOOR(current_max * $7::double precision)::int
628
+ WHEN (ewma_latency_ms * (1 - $3::double precision)
629
+ + $3::double precision * $4::double precision) > $8::double precision
630
+ THEN FLOOR(current_max * $9::double precision)::int
631
+ ELSE current_max + 1
632
+ END),
633
+ last_observed_at = now(),
634
+ updated_at = now()
635
+ WHERE policy_name = $1 AND partition_key = $2
636
+ SQL
637
+ "adaptive_record",
638
+ [policy_name, partition_key, alpha.to_f, queue_lag_ms.to_f,
639
+ min.to_i, succeeded ? true : false,
640
+ fail_factor.to_f, target_lag_ms.to_f, slow_factor.to_f]
641
+ )
642
+ end
643
+
644
+ # ----- tick samples sweep -------------------------------------------------
645
+
646
+ def sweep_old_tick_samples!(cutoff_seconds:)
647
+ connection.exec_query(
648
+ "DELETE FROM #{SAMPLES_TABLE} WHERE sampled_at < now() - ($1 || ' seconds')::interval",
649
+ "sweep_old_tick_samples",
650
+ [cutoff_seconds.to_i]
651
+ )
652
+ end
653
+
654
+ # ----------------------------------------------------------------------------
655
+
656
+ def sweep_inactive_partitions!(cutoff_seconds:)
657
+ connection.exec_query(
658
+ <<~SQL.squish,
659
+ DELETE FROM #{PARTITIONS_TABLE}
660
+ WHERE pending_count = 0
661
+ AND status = 'active'
662
+ AND (
663
+ (last_admit_at IS NOT NULL AND last_admit_at < now() - ($1 || ' seconds')::interval)
664
+ OR
665
+ (last_admit_at IS NULL AND created_at < now() - ($1 || ' seconds')::interval)
666
+ )
667
+ SQL
668
+ "sweep_inactive_partitions",
669
+ [cutoff_seconds.to_i]
670
+ )
671
+ end
672
+
673
+ # ----- helpers --------------------------------------------------------------
674
+
675
+ def normalize_partition(row)
676
+ out = {}
677
+ row.each { |k, v| out[k.to_s] = v }
678
+ out["context"] = parse_jsonb(out["context"])
679
+ out["gate_state"] = parse_jsonb(out["gate_state"])
680
+ out
681
+ end
682
+
683
+ def normalize_staged(row)
684
+ out = {}
685
+ row.each { |k, v| out[k.to_s] = v }
686
+ out["job_data"] = parse_jsonb(out["job_data"])
687
+ out["context"] = parse_jsonb(out["context"])
688
+ out
689
+ end
690
+
691
+ def parse_jsonb(value)
692
+ case value
693
+ when Hash, Array then value
694
+ when nil, "" then {}
695
+ else
696
+ begin
697
+ JSON.parse(value)
698
+ rescue JSON::ParserError
699
+ {}
700
+ end
701
+ end
702
+ end
703
+
704
+ def sample_filter(policy_name, since)
705
+ params = [since]
706
+ if policy_name
707
+ params << policy_name
708
+ ["WHERE sampled_at >= $1 AND policy_name = $2", params]
709
+ else
710
+ ["WHERE sampled_at >= $1", params]
711
+ end
712
+ end
713
+
714
+ def next_eligible_clause(retry_after)
715
+ if retry_after.nil?
716
+ ["NULL", []]
717
+ else
718
+ # 5th param ($5) — caller appends params to those of the parent UPDATE
719
+ ["now() + ($5 || ' seconds')::interval", [retry_after.to_f.round(3)]]
720
+ end
721
+ end
722
+ end
723
+ end