dispatch_policy 0.1.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/MIT-LICENSE +16 -17
- data/README.md +449 -288
- data/app/assets/stylesheets/dispatch_policy/application.css +157 -0
- data/app/controllers/dispatch_policy/application_controller.rb +45 -1
- data/app/controllers/dispatch_policy/dashboard_controller.rb +91 -0
- data/app/controllers/dispatch_policy/partitions_controller.rb +122 -0
- data/app/controllers/dispatch_policy/policies_controller.rb +94 -241
- data/app/controllers/dispatch_policy/staged_jobs_controller.rb +9 -0
- data/app/models/dispatch_policy/adaptive_concurrency_stats.rb +11 -81
- data/app/models/dispatch_policy/inflight_job.rb +12 -0
- data/app/models/dispatch_policy/partition.rb +21 -0
- data/app/models/dispatch_policy/staged_job.rb +4 -97
- data/app/models/dispatch_policy/tick_sample.rb +11 -0
- data/app/views/dispatch_policy/dashboard/index.html.erb +109 -0
- data/app/views/dispatch_policy/partitions/index.html.erb +63 -0
- data/app/views/dispatch_policy/partitions/show.html.erb +106 -0
- data/app/views/dispatch_policy/policies/index.html.erb +15 -37
- data/app/views/dispatch_policy/policies/show.html.erb +140 -216
- data/app/views/dispatch_policy/shared/_capacity.html.erb +67 -0
- data/app/views/dispatch_policy/shared/_hints.html.erb +13 -0
- data/app/views/dispatch_policy/shared/_partition_row.html.erb +12 -0
- data/app/views/dispatch_policy/staged_jobs/show.html.erb +31 -0
- data/app/views/layouts/dispatch_policy/application.html.erb +95 -238
- data/config/routes.rb +18 -2
- data/db/migrate/20260501000001_create_dispatch_policy_tables.rb +103 -0
- data/lib/dispatch_policy/bypass.rb +23 -0
- data/lib/dispatch_policy/config.rb +85 -0
- data/lib/dispatch_policy/context.rb +50 -0
- data/lib/dispatch_policy/cursor_pagination.rb +121 -0
- data/lib/dispatch_policy/decision.rb +22 -0
- data/lib/dispatch_policy/engine.rb +4 -27
- data/lib/dispatch_policy/forwarder.rb +63 -0
- data/lib/dispatch_policy/gate.rb +10 -38
- data/lib/dispatch_policy/gates/adaptive_concurrency.rb +99 -97
- data/lib/dispatch_policy/gates/concurrency.rb +45 -26
- data/lib/dispatch_policy/gates/throttle.rb +65 -37
- data/lib/dispatch_policy/inflight_tracker.rb +174 -0
- data/lib/dispatch_policy/job_extension.rb +155 -0
- data/lib/dispatch_policy/operator_hints.rb +126 -0
- data/lib/dispatch_policy/pipeline.rb +48 -0
- data/lib/dispatch_policy/policy.rb +62 -47
- data/lib/dispatch_policy/policy_dsl.rb +120 -0
- data/lib/dispatch_policy/railtie.rb +35 -0
- data/lib/dispatch_policy/registry.rb +46 -0
- data/lib/dispatch_policy/repository.rb +723 -0
- data/lib/dispatch_policy/serializer.rb +36 -0
- data/lib/dispatch_policy/tick.rb +263 -172
- data/lib/dispatch_policy/tick_loop.rb +59 -26
- data/lib/dispatch_policy/version.rb +1 -1
- data/lib/dispatch_policy.rb +71 -46
- data/lib/generators/dispatch_policy/install/install_generator.rb +70 -0
- data/lib/generators/dispatch_policy/install/templates/create_dispatch_policy_tables.rb.tt +95 -0
- data/lib/generators/dispatch_policy/install/templates/dispatch_tick_loop_job.rb.tt +53 -0
- data/lib/generators/dispatch_policy/install/templates/initializer.rb.tt +11 -0
- metadata +101 -43
- data/CHANGELOG.md +0 -12
- data/app/models/dispatch_policy/partition_inflight_count.rb +0 -42
- data/app/models/dispatch_policy/partition_observation.rb +0 -49
- data/app/models/dispatch_policy/throttle_bucket.rb +0 -41
- data/db/migrate/20260424000001_create_dispatch_policy_tables.rb +0 -80
- data/db/migrate/20260424000002_create_adaptive_concurrency_stats.rb +0 -22
- data/db/migrate/20260424000003_create_adaptive_concurrency_samples.rb +0 -25
- data/db/migrate/20260424000004_rename_samples_to_partition_observations.rb +0 -32
- data/lib/dispatch_policy/active_job_perform_all_later_patch.rb +0 -32
- data/lib/dispatch_policy/dispatch_context.rb +0 -53
- data/lib/dispatch_policy/dispatchable.rb +0 -120
- data/lib/dispatch_policy/gates/fair_interleave.rb +0 -32
- data/lib/dispatch_policy/gates/global_cap.rb +0 -26
- data/lib/dispatch_policy/install_generator.rb +0 -23
|
@@ -0,0 +1,723 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "json"
|
|
4
|
+
|
|
5
|
+
module DispatchPolicy
|
|
6
|
+
# SQL access layer for staged_jobs / partitions / inflight_jobs.
|
|
7
|
+
#
|
|
8
|
+
# Hot paths use raw SQL via ActiveRecord::Base.connection so we get
|
|
9
|
+
# `FOR UPDATE SKIP LOCKED`, multi-row UPSERTs, and DELETE … RETURNING
|
|
10
|
+
# without ActiveRecord overhead. Read paths in the engine UI use the
|
|
11
|
+
# AR models in app/models/dispatch_policy/*.
|
|
12
|
+
module Repository
|
|
13
|
+
STAGED_TABLE = "dispatch_policy_staged_jobs"
|
|
14
|
+
PARTITIONS_TABLE = "dispatch_policy_partitions"
|
|
15
|
+
INFLIGHT_TABLE = "dispatch_policy_inflight_jobs"
|
|
16
|
+
SAMPLES_TABLE = "dispatch_policy_tick_samples"
|
|
17
|
+
ADAPTIVE_TABLE = "dispatch_policy_adaptive_concurrency_stats"
|
|
18
|
+
|
|
19
|
+
module_function
|
|
20
|
+
|
|
21
|
+
def connection
|
|
22
|
+
ActiveRecord::Base.connection
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
# Wraps `block` in `connected_to(role: …)` when DispatchPolicy.config
|
|
26
|
+
# .database_role is set. Used by Tick to ensure the admission TX is
|
|
27
|
+
# opened against the same DB role that good_job / solid_queue uses,
|
|
28
|
+
# critical for multi-DB Rails setups (e.g. solid_queue on a separate
|
|
29
|
+
# `:queue` DB) where atomicity only holds when the staging TX and the
|
|
30
|
+
# adapter INSERT share a connection.
|
|
31
|
+
def with_connection
|
|
32
|
+
role = DispatchPolicy.config.database_role
|
|
33
|
+
if role && ActiveRecord::Base.respond_to?(:connected_to)
|
|
34
|
+
ActiveRecord::Base.connected_to(role: role) { yield }
|
|
35
|
+
else
|
|
36
|
+
yield
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
# ----- staging (write path) ------------------------------------------------
|
|
41
|
+
|
|
42
|
+
# Insert one staged_job row + UPSERT its partition. The partition's
|
|
43
|
+
# `context` is refreshed on every call so admission-time gates always
|
|
44
|
+
# see the latest dynamic config.
|
|
45
|
+
#
|
|
46
|
+
# @param policy_name [String]
|
|
47
|
+
# @param partition_key [String]
|
|
48
|
+
# @param queue_name [String, nil]
|
|
49
|
+
# @param job_class [String]
|
|
50
|
+
# @param job_data [Hash]
|
|
51
|
+
# @param context [Hash]
|
|
52
|
+
# @param scheduled_at [Time, nil]
|
|
53
|
+
# @param priority [Integer]
|
|
54
|
+
def stage!(policy_name:, partition_key:, queue_name:, job_class:, job_data:, context:,
|
|
55
|
+
shard: Policy::DEFAULT_SHARD, scheduled_at: nil, priority: 0)
|
|
56
|
+
connection.transaction(requires_new: true) do
|
|
57
|
+
connection.exec_query(
|
|
58
|
+
<<~SQL.squish,
|
|
59
|
+
INSERT INTO #{STAGED_TABLE}
|
|
60
|
+
(policy_name, partition_key, queue_name, job_class, job_data, context, scheduled_at, priority, enqueued_at)
|
|
61
|
+
VALUES ($1, $2, $3, $4, $5::jsonb, $6::jsonb, $7, $8, now())
|
|
62
|
+
SQL
|
|
63
|
+
"stage_job",
|
|
64
|
+
[policy_name, partition_key, queue_name, job_class, JSON.dump(job_data), JSON.dump(context), scheduled_at, priority]
|
|
65
|
+
)
|
|
66
|
+
upsert_partition!(
|
|
67
|
+
policy_name: policy_name,
|
|
68
|
+
partition_key: partition_key,
|
|
69
|
+
queue_name: queue_name,
|
|
70
|
+
shard: shard,
|
|
71
|
+
context: context,
|
|
72
|
+
delta_pending: 1
|
|
73
|
+
)
|
|
74
|
+
end
|
|
75
|
+
true
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
# Bulk version for perform_all_later. Receives an array of hashes with
|
|
79
|
+
# the same keys as #stage!. Performs one INSERT for staged_jobs and
|
|
80
|
+
# one UPSERT per (policy_name, partition_key) group.
|
|
81
|
+
def stage_many!(rows)
|
|
82
|
+
return 0 if rows.empty?
|
|
83
|
+
|
|
84
|
+
connection.transaction(requires_new: true) do
|
|
85
|
+
values_sql = []
|
|
86
|
+
params = []
|
|
87
|
+
rows.each_with_index do |row, idx|
|
|
88
|
+
base = idx * 8
|
|
89
|
+
values_sql << "($#{base + 1}, $#{base + 2}, $#{base + 3}, $#{base + 4}, $#{base + 5}::jsonb, $#{base + 6}::jsonb, $#{base + 7}, $#{base + 8})"
|
|
90
|
+
params.push(
|
|
91
|
+
row[:policy_name],
|
|
92
|
+
row[:partition_key],
|
|
93
|
+
row[:queue_name],
|
|
94
|
+
row[:job_class],
|
|
95
|
+
JSON.dump(row[:job_data]),
|
|
96
|
+
JSON.dump(row[:context] || {}),
|
|
97
|
+
row[:scheduled_at],
|
|
98
|
+
row[:priority] || 0
|
|
99
|
+
)
|
|
100
|
+
end
|
|
101
|
+
connection.exec_query(
|
|
102
|
+
<<~SQL.squish,
|
|
103
|
+
INSERT INTO #{STAGED_TABLE}
|
|
104
|
+
(policy_name, partition_key, queue_name, job_class, job_data, context, scheduled_at, priority)
|
|
105
|
+
VALUES #{values_sql.join(", ")}
|
|
106
|
+
SQL
|
|
107
|
+
"stage_many",
|
|
108
|
+
params
|
|
109
|
+
)
|
|
110
|
+
|
|
111
|
+
rows.group_by { |r| [r[:policy_name], r[:partition_key]] }.each do |(policy_name, partition_key), group|
|
|
112
|
+
upsert_partition!(
|
|
113
|
+
policy_name: policy_name,
|
|
114
|
+
partition_key: partition_key,
|
|
115
|
+
queue_name: group.first[:queue_name],
|
|
116
|
+
shard: group.first[:shard] || Policy::DEFAULT_SHARD,
|
|
117
|
+
context: group.last[:context] || {},
|
|
118
|
+
delta_pending: group.size
|
|
119
|
+
)
|
|
120
|
+
end
|
|
121
|
+
end
|
|
122
|
+
rows.size
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
def upsert_partition!(policy_name:, partition_key:, queue_name:, context:, delta_pending:,
|
|
126
|
+
shard: Policy::DEFAULT_SHARD)
|
|
127
|
+
connection.exec_query(
|
|
128
|
+
<<~SQL.squish,
|
|
129
|
+
INSERT INTO #{PARTITIONS_TABLE}
|
|
130
|
+
(policy_name, partition_key, queue_name, shard, context, context_updated_at,
|
|
131
|
+
pending_count, last_enqueued_at, status, gate_state, created_at, updated_at)
|
|
132
|
+
VALUES ($1, $2, $3, $4, $5::jsonb, now(), $6, now(), 'active', '{}'::jsonb, now(), now())
|
|
133
|
+
ON CONFLICT (policy_name, partition_key) DO UPDATE SET
|
|
134
|
+
context = EXCLUDED.context,
|
|
135
|
+
context_updated_at = EXCLUDED.context_updated_at,
|
|
136
|
+
queue_name = COALESCE(EXCLUDED.queue_name, #{PARTITIONS_TABLE}.queue_name),
|
|
137
|
+
shard = #{PARTITIONS_TABLE}.shard,
|
|
138
|
+
pending_count = #{PARTITIONS_TABLE}.pending_count + EXCLUDED.pending_count,
|
|
139
|
+
last_enqueued_at = EXCLUDED.last_enqueued_at,
|
|
140
|
+
updated_at = now()
|
|
141
|
+
SQL
|
|
142
|
+
"upsert_partition",
|
|
143
|
+
[policy_name, partition_key, queue_name, shard, JSON.dump(context), delta_pending]
|
|
144
|
+
)
|
|
145
|
+
end
|
|
146
|
+
|
|
147
|
+
# ----- tick path -----------------------------------------------------------
|
|
148
|
+
|
|
149
|
+
# Lock + return up to `limit` partitions ready to be evaluated by the tick.
|
|
150
|
+
# Each row's last_checked_at is bumped to now() so the next tick fairly
|
|
151
|
+
# picks others. Locked rows are released when the transaction commits.
|
|
152
|
+
#
|
|
153
|
+
# When `shard` is non-nil, only partitions on that shard are claimed —
|
|
154
|
+
# this lets several tick processes work on the same policy in parallel,
|
|
155
|
+
# one per shard.
|
|
156
|
+
def claim_partitions(policy_name:, limit:, shard: nil)
|
|
157
|
+
params = [policy_name]
|
|
158
|
+
shard_sql = ""
|
|
159
|
+
if shard
|
|
160
|
+
params << shard
|
|
161
|
+
shard_sql = " AND shard = $#{params.size}"
|
|
162
|
+
end
|
|
163
|
+
params << limit
|
|
164
|
+
|
|
165
|
+
sql = <<~SQL.squish
|
|
166
|
+
WITH candidates AS (
|
|
167
|
+
SELECT id FROM #{PARTITIONS_TABLE}
|
|
168
|
+
WHERE policy_name = $1
|
|
169
|
+
AND status = 'active'
|
|
170
|
+
AND pending_count > 0
|
|
171
|
+
AND (next_eligible_at IS NULL OR next_eligible_at <= now())
|
|
172
|
+
#{shard_sql}
|
|
173
|
+
ORDER BY last_checked_at NULLS FIRST, id
|
|
174
|
+
LIMIT $#{params.size}
|
|
175
|
+
FOR UPDATE SKIP LOCKED
|
|
176
|
+
)
|
|
177
|
+
UPDATE #{PARTITIONS_TABLE} p
|
|
178
|
+
SET last_checked_at = now()
|
|
179
|
+
FROM candidates
|
|
180
|
+
WHERE p.id = candidates.id
|
|
181
|
+
RETURNING p.*
|
|
182
|
+
SQL
|
|
183
|
+
result = connection.exec_query(sql, "claim_partitions", params)
|
|
184
|
+
result.to_a.map { |row| normalize_partition(row) }
|
|
185
|
+
end
|
|
186
|
+
|
|
187
|
+
# Atomically claim up to `limit` staged rows for a partition (DELETE …
|
|
188
|
+
# RETURNING) and update the partition's counters / gate_state /
|
|
189
|
+
# next_eligible_at in the same transaction.
|
|
190
|
+
#
|
|
191
|
+
# `limit` MUST be positive: the deny path (no rows to admit) goes
|
|
192
|
+
# through `bulk_record_partition_denies!` instead, which collapses
|
|
193
|
+
# many partitions into a single UPDATE…FROM(VALUES…) at the end of
|
|
194
|
+
# the tick.
|
|
195
|
+
def claim_staged_jobs!(policy_name:, partition_key:, limit:, gate_state_patch:, retry_after:,
|
|
196
|
+
half_life_seconds: nil)
|
|
197
|
+
raise ArgumentError, "claim_staged_jobs! requires limit > 0" unless limit.positive?
|
|
198
|
+
|
|
199
|
+
sql_select = <<~SQL.squish
|
|
200
|
+
WITH claimed AS (
|
|
201
|
+
SELECT id FROM #{STAGED_TABLE}
|
|
202
|
+
WHERE policy_name = $1 AND partition_key = $2
|
|
203
|
+
AND (scheduled_at IS NULL OR scheduled_at <= now())
|
|
204
|
+
ORDER BY priority DESC, scheduled_at NULLS FIRST, id
|
|
205
|
+
LIMIT $3
|
|
206
|
+
FOR UPDATE SKIP LOCKED
|
|
207
|
+
)
|
|
208
|
+
DELETE FROM #{STAGED_TABLE} s
|
|
209
|
+
USING claimed
|
|
210
|
+
WHERE s.id = claimed.id
|
|
211
|
+
RETURNING s.*
|
|
212
|
+
SQL
|
|
213
|
+
rows = connection.exec_query(sql_select, "claim_staged_jobs", [policy_name, partition_key, limit]).to_a
|
|
214
|
+
|
|
215
|
+
record_partition_admit!(
|
|
216
|
+
policy_name: policy_name,
|
|
217
|
+
partition_key: partition_key,
|
|
218
|
+
admitted: rows.size,
|
|
219
|
+
gate_state_patch: gate_state_patch,
|
|
220
|
+
retry_after: retry_after,
|
|
221
|
+
half_life_seconds: half_life_seconds
|
|
222
|
+
)
|
|
223
|
+
|
|
224
|
+
rows.map { |r| normalize_staged(r) }
|
|
225
|
+
end
|
|
226
|
+
|
|
227
|
+
# Per-partition admit-state UPDATE. Runs inside the per-partition
|
|
228
|
+
# admission TX alongside the DELETE, so pending_count / total_admitted
|
|
229
|
+
# / gate_state changes commit atomically with the claim and the
|
|
230
|
+
# adapter handoff. For the deny case use `bulk_record_partition_denies!`.
|
|
231
|
+
#
|
|
232
|
+
# When `half_life_seconds` is non-nil, the row's EWMA decayed_admits
|
|
233
|
+
# counter is also refreshed in the same UPDATE: previous value
|
|
234
|
+
# decays exponentially based on the elapsed wall time since the
|
|
235
|
+
# last update, then `admitted` is added on top. This keeps fairness
|
|
236
|
+
# state atomic with the admit (no separate write, no race) and
|
|
237
|
+
# leaves the partitions row's lock undisturbed.
|
|
238
|
+
def record_partition_admit!(policy_name:, partition_key:, admitted:, gate_state_patch:,
|
|
239
|
+
retry_after:, half_life_seconds: nil)
|
|
240
|
+
next_eligible_sql, next_eligible_params = next_eligible_clause(retry_after)
|
|
241
|
+
gate_state_json = JSON.dump(gate_state_patch || {})
|
|
242
|
+
|
|
243
|
+
params = [policy_name, partition_key, admitted, gate_state_json, *next_eligible_params]
|
|
244
|
+
|
|
245
|
+
if half_life_seconds && half_life_seconds.to_f.positive?
|
|
246
|
+
# decay constant τ such that exp(-Δt/τ) halves every half_life:
|
|
247
|
+
# τ = half_life / ln(2). NULLIF guards a degenerate τ=0.
|
|
248
|
+
decay_idx = params.size + 1
|
|
249
|
+
admitted_idx_for_ewma = 3
|
|
250
|
+
decay_tau = half_life_seconds.to_f / Math.log(2)
|
|
251
|
+
params << decay_tau
|
|
252
|
+
decay_sql = <<~SQL.squish
|
|
253
|
+
decayed_admits = decayed_admits *
|
|
254
|
+
exp(- COALESCE(EXTRACT(EPOCH FROM (now() - decayed_admits_at)), 0)
|
|
255
|
+
/ NULLIF($#{decay_idx}::double precision, 0))
|
|
256
|
+
+ $#{admitted_idx_for_ewma},
|
|
257
|
+
decayed_admits_at = now(),
|
|
258
|
+
SQL
|
|
259
|
+
else
|
|
260
|
+
decay_sql = ""
|
|
261
|
+
end
|
|
262
|
+
|
|
263
|
+
connection.exec_query(
|
|
264
|
+
<<~SQL.squish,
|
|
265
|
+
UPDATE #{PARTITIONS_TABLE}
|
|
266
|
+
SET pending_count = GREATEST(pending_count - $3, 0),
|
|
267
|
+
total_admitted = total_admitted + $3,
|
|
268
|
+
last_admit_at = CASE WHEN $3 > 0 THEN now() ELSE last_admit_at END,
|
|
269
|
+
gate_state = gate_state || $4::jsonb,
|
|
270
|
+
next_eligible_at = #{next_eligible_sql},
|
|
271
|
+
#{decay_sql}
|
|
272
|
+
updated_at = now()
|
|
273
|
+
WHERE policy_name = $1 AND partition_key = $2
|
|
274
|
+
SQL
|
|
275
|
+
"record_partition_admit",
|
|
276
|
+
params
|
|
277
|
+
)
|
|
278
|
+
end
|
|
279
|
+
|
|
280
|
+
# Bulk-update many partitions whose pipeline this tick decided to deny.
|
|
281
|
+
# One UPDATE…FROM(VALUES…) instead of one UPDATE per partition, which
|
|
282
|
+
# cuts a tick with `partition_batch_size = 50` from ~50 round-trips on
|
|
283
|
+
# the deny path to one. The deny path doesn't touch pending_count or
|
|
284
|
+
# total_admitted (admitted = 0 makes them no-ops in the per-row
|
|
285
|
+
# UPDATE), so we only write gate_state and next_eligible_at here.
|
|
286
|
+
#
|
|
287
|
+
# Each entry: { policy_name:, partition_key:, gate_state_patch:, retry_after: }.
|
|
288
|
+
# Independent per row — the join via FROM(VALUES…) makes the bulk
|
|
289
|
+
# statement equivalent to N sequential UPDATEs in correctness terms;
|
|
290
|
+
# the row-level locks held by `claim_partitions` (FOR UPDATE SKIP
|
|
291
|
+
# LOCKED, last_checked_at bumped) keep concurrent ticks away from the
|
|
292
|
+
# same partitions while we batch.
|
|
293
|
+
def bulk_record_partition_denies!(entries)
|
|
294
|
+
return if entries.empty?
|
|
295
|
+
|
|
296
|
+
values_sql = []
|
|
297
|
+
params = []
|
|
298
|
+
entries.each_with_index do |e, idx|
|
|
299
|
+
base = idx * 4
|
|
300
|
+
values_sql << "($#{base + 1}::text, $#{base + 2}::text, $#{base + 3}::jsonb, $#{base + 4}::numeric)"
|
|
301
|
+
params.push(
|
|
302
|
+
e[:policy_name],
|
|
303
|
+
e[:partition_key],
|
|
304
|
+
JSON.dump(e[:gate_state_patch] || {}),
|
|
305
|
+
e[:retry_after].nil? ? nil : e[:retry_after].to_f.round(3)
|
|
306
|
+
)
|
|
307
|
+
end
|
|
308
|
+
|
|
309
|
+
connection.exec_query(
|
|
310
|
+
<<~SQL.squish,
|
|
311
|
+
UPDATE #{PARTITIONS_TABLE} p
|
|
312
|
+
SET gate_state = p.gate_state || v.gate_state_patch,
|
|
313
|
+
next_eligible_at = CASE
|
|
314
|
+
WHEN v.retry_after_secs IS NULL THEN NULL
|
|
315
|
+
ELSE now() + (v.retry_after_secs || ' seconds')::interval
|
|
316
|
+
END,
|
|
317
|
+
updated_at = now()
|
|
318
|
+
FROM (VALUES #{values_sql.join(", ")})
|
|
319
|
+
AS v(policy_name, partition_key, gate_state_patch, retry_after_secs)
|
|
320
|
+
WHERE p.policy_name = v.policy_name AND p.partition_key = v.partition_key
|
|
321
|
+
SQL
|
|
322
|
+
"bulk_record_partition_denies",
|
|
323
|
+
params
|
|
324
|
+
)
|
|
325
|
+
end
|
|
326
|
+
|
|
327
|
+
# ----- inflight tracking ---------------------------------------------------
|
|
328
|
+
|
|
329
|
+
def insert_inflight!(rows)
|
|
330
|
+
return if rows.empty?
|
|
331
|
+
|
|
332
|
+
values_sql = []
|
|
333
|
+
params = []
|
|
334
|
+
rows.each_with_index do |row, idx|
|
|
335
|
+
base = idx * 3
|
|
336
|
+
values_sql << "($#{base + 1}, $#{base + 2}, $#{base + 3}, now(), now())"
|
|
337
|
+
params.push(row[:policy_name], row[:partition_key], row[:active_job_id])
|
|
338
|
+
end
|
|
339
|
+
connection.exec_query(
|
|
340
|
+
<<~SQL.squish,
|
|
341
|
+
INSERT INTO #{INFLIGHT_TABLE}
|
|
342
|
+
(policy_name, partition_key, active_job_id, admitted_at, heartbeat_at)
|
|
343
|
+
VALUES #{values_sql.join(", ")}
|
|
344
|
+
ON CONFLICT (active_job_id) DO NOTHING
|
|
345
|
+
SQL
|
|
346
|
+
"insert_inflight",
|
|
347
|
+
params
|
|
348
|
+
)
|
|
349
|
+
end
|
|
350
|
+
|
|
351
|
+
def delete_inflight!(active_job_id:)
|
|
352
|
+
connection.exec_query(
|
|
353
|
+
"DELETE FROM #{INFLIGHT_TABLE} WHERE active_job_id = $1",
|
|
354
|
+
"delete_inflight",
|
|
355
|
+
[active_job_id]
|
|
356
|
+
)
|
|
357
|
+
end
|
|
358
|
+
|
|
359
|
+
def heartbeat_inflight!(active_job_id:)
|
|
360
|
+
connection.exec_query(
|
|
361
|
+
"UPDATE #{INFLIGHT_TABLE} SET heartbeat_at = now() WHERE active_job_id = $1",
|
|
362
|
+
"heartbeat_inflight",
|
|
363
|
+
[active_job_id]
|
|
364
|
+
)
|
|
365
|
+
end
|
|
366
|
+
|
|
367
|
+
def count_inflight(policy_name:, partition_key:)
|
|
368
|
+
result = connection.exec_query(
|
|
369
|
+
"SELECT count(*)::int AS n FROM #{INFLIGHT_TABLE} WHERE policy_name = $1 AND partition_key = $2",
|
|
370
|
+
"count_inflight",
|
|
371
|
+
[policy_name, partition_key]
|
|
372
|
+
)
|
|
373
|
+
Integer(result.rows.first.first)
|
|
374
|
+
end
|
|
375
|
+
|
|
376
|
+
def sweep_stale_inflight!(cutoff_seconds:)
|
|
377
|
+
connection.exec_query(
|
|
378
|
+
<<~SQL.squish,
|
|
379
|
+
DELETE FROM #{INFLIGHT_TABLE}
|
|
380
|
+
WHERE heartbeat_at < now() - ($1 || ' seconds')::interval
|
|
381
|
+
SQL
|
|
382
|
+
"sweep_stale_inflight",
|
|
383
|
+
[cutoff_seconds.to_i]
|
|
384
|
+
)
|
|
385
|
+
end
|
|
386
|
+
|
|
387
|
+
# Removes partitions that have no pending staged jobs and have been
|
|
388
|
+
# idle for `cutoff_seconds`. The default cutoff (24h) is well past any
|
|
389
|
+
# reasonable inflight job — concurrency state lives in inflight_jobs
|
|
390
|
+
# and is independent of partition rows, so a recreated partition will
|
|
391
|
+
# re-observe the live in-flight count via the concurrency gate.
|
|
392
|
+
# ----- metrics --------------------------------------------------------------
|
|
393
|
+
|
|
394
|
+
# Records one row per Tick.run with admission and timing aggregates so the
|
|
395
|
+
# operator UI can display rates over time without sampling on the read
|
|
396
|
+
# path.
|
|
397
|
+
def record_tick_sample!(policy_name:, duration_ms:, partitions_seen:, partitions_admitted:,
|
|
398
|
+
partitions_denied:, jobs_admitted:, forward_failures:,
|
|
399
|
+
pending_total:, inflight_total:, denied_reasons:)
|
|
400
|
+
connection.exec_query(
|
|
401
|
+
<<~SQL.squish,
|
|
402
|
+
INSERT INTO #{SAMPLES_TABLE}
|
|
403
|
+
(policy_name, sampled_at, duration_ms, partitions_seen, partitions_admitted,
|
|
404
|
+
partitions_denied, jobs_admitted, forward_failures, pending_total,
|
|
405
|
+
inflight_total, denied_reasons)
|
|
406
|
+
VALUES ($1, now(), $2, $3, $4, $5, $6, $7, $8, $9, $10::jsonb)
|
|
407
|
+
SQL
|
|
408
|
+
"record_tick_sample",
|
|
409
|
+
[policy_name, duration_ms.to_i, partitions_seen.to_i, partitions_admitted.to_i,
|
|
410
|
+
partitions_denied.to_i, jobs_admitted.to_i, forward_failures.to_i,
|
|
411
|
+
pending_total.to_i, inflight_total.to_i, JSON.dump(denied_reasons || {})]
|
|
412
|
+
)
|
|
413
|
+
end
|
|
414
|
+
|
|
415
|
+
# Aggregate counters since `since` (a Time). If `policy_name` is nil,
|
|
416
|
+
# aggregates across all policies. Returns a Hash with summary keys.
|
|
417
|
+
def tick_summary(policy_name: nil, since:)
|
|
418
|
+
where_sql, params = sample_filter(policy_name, since)
|
|
419
|
+
result = connection.exec_query(
|
|
420
|
+
<<~SQL.squish,
|
|
421
|
+
SELECT
|
|
422
|
+
COALESCE(SUM(jobs_admitted), 0)::int AS jobs_admitted,
|
|
423
|
+
COALESCE(SUM(partitions_seen), 0)::int AS partitions_seen,
|
|
424
|
+
COALESCE(SUM(partitions_admitted), 0)::int AS partitions_admitted,
|
|
425
|
+
COALESCE(SUM(partitions_denied), 0)::int AS partitions_denied,
|
|
426
|
+
COALESCE(SUM(forward_failures), 0)::int AS forward_failures,
|
|
427
|
+
COUNT(*)::int AS ticks,
|
|
428
|
+
COALESCE(AVG(duration_ms), 0)::int AS avg_duration_ms,
|
|
429
|
+
COALESCE(MAX(duration_ms), 0)::int AS max_duration_ms,
|
|
430
|
+
MAX(sampled_at) AS last_sampled_at
|
|
431
|
+
FROM #{SAMPLES_TABLE}
|
|
432
|
+
#{where_sql}
|
|
433
|
+
SQL
|
|
434
|
+
"tick_summary",
|
|
435
|
+
params
|
|
436
|
+
)
|
|
437
|
+
row = result.first || {}
|
|
438
|
+
{
|
|
439
|
+
jobs_admitted: row["jobs_admitted"].to_i,
|
|
440
|
+
partitions_seen: row["partitions_seen"].to_i,
|
|
441
|
+
partitions_admitted: row["partitions_admitted"].to_i,
|
|
442
|
+
partitions_denied: row["partitions_denied"].to_i,
|
|
443
|
+
forward_failures: row["forward_failures"].to_i,
|
|
444
|
+
ticks: row["ticks"].to_i,
|
|
445
|
+
avg_duration_ms: row["avg_duration_ms"].to_i,
|
|
446
|
+
max_duration_ms: row["max_duration_ms"].to_i,
|
|
447
|
+
last_sampled_at: row["last_sampled_at"]
|
|
448
|
+
}
|
|
449
|
+
end
|
|
450
|
+
|
|
451
|
+
# Aggregate denied_reasons jsonb across samples in window: returns
|
|
452
|
+
# { "throttle" => 12, "concurrency_full" => 3, ... }
|
|
453
|
+
def denied_reasons_summary(policy_name: nil, since:)
|
|
454
|
+
where_sql, params = sample_filter(policy_name, since)
|
|
455
|
+
result = connection.exec_query(
|
|
456
|
+
<<~SQL.squish,
|
|
457
|
+
SELECT key, SUM(value::int)::int AS total
|
|
458
|
+
FROM #{SAMPLES_TABLE},
|
|
459
|
+
LATERAL jsonb_each_text(denied_reasons)
|
|
460
|
+
#{where_sql}
|
|
461
|
+
GROUP BY key
|
|
462
|
+
ORDER BY total DESC
|
|
463
|
+
SQL
|
|
464
|
+
"denied_reasons_summary",
|
|
465
|
+
params
|
|
466
|
+
)
|
|
467
|
+
result.to_a.each_with_object({}) { |r, h| h[r["key"]] = r["total"].to_i }
|
|
468
|
+
end
|
|
469
|
+
|
|
470
|
+
# Returns time-bucketed series for sparklines. `bucket_seconds` is the
|
|
471
|
+
# bucket width. Each row: { bucket_at:, jobs_admitted:, forward_failures:,
|
|
472
|
+
# pending_total:, ticks: }.
|
|
473
|
+
#
|
|
474
|
+
# `pending_total` is the AVERAGE pending observed across the ticks
|
|
475
|
+
# in that bucket — using AVG (not MAX/last) gives a smoother trend
|
|
476
|
+
# that's resilient to a single outlier sample dragging the bucket up.
|
|
477
|
+
def tick_samples_buckets(policy_name: nil, since:, bucket_seconds: 60)
|
|
478
|
+
where_sql, params = sample_filter(policy_name, since)
|
|
479
|
+
bucket_param_idx = params.size + 1
|
|
480
|
+
params << bucket_seconds.to_i
|
|
481
|
+
|
|
482
|
+
# `date_bin` requires Postgres 14+. We compute the bucket via floor on
|
|
483
|
+
# the epoch instead so the gem also runs on Postgres 12/13.
|
|
484
|
+
result = connection.exec_query(
|
|
485
|
+
<<~SQL.squish,
|
|
486
|
+
SELECT
|
|
487
|
+
to_timestamp(floor(extract(epoch from sampled_at) / $#{bucket_param_idx})::bigint * $#{bucket_param_idx}) AS bucket_at,
|
|
488
|
+
COALESCE(SUM(jobs_admitted), 0)::int AS jobs_admitted,
|
|
489
|
+
COALESCE(SUM(forward_failures), 0)::int AS forward_failures,
|
|
490
|
+
COALESCE(AVG(pending_total), 0)::int AS pending_total,
|
|
491
|
+
COUNT(*)::int AS ticks
|
|
492
|
+
FROM #{SAMPLES_TABLE}
|
|
493
|
+
#{where_sql}
|
|
494
|
+
GROUP BY bucket_at
|
|
495
|
+
ORDER BY bucket_at ASC
|
|
496
|
+
SQL
|
|
497
|
+
"tick_samples_buckets",
|
|
498
|
+
params
|
|
499
|
+
)
|
|
500
|
+
result.to_a.map do |r|
|
|
501
|
+
{ bucket_at: r["bucket_at"],
|
|
502
|
+
jobs_admitted: r["jobs_admitted"].to_i,
|
|
503
|
+
forward_failures: r["forward_failures"].to_i,
|
|
504
|
+
pending_total: r["pending_total"].to_i,
|
|
505
|
+
ticks: r["ticks"].to_i }
|
|
506
|
+
end
|
|
507
|
+
end
|
|
508
|
+
|
|
509
|
+
# Direction of a numeric series. Compares the average of the first
|
|
510
|
+
# third to the last third — robust to noise on the ends.
|
|
511
|
+
def self.trend_direction(values, threshold_ratio: 0.10)
|
|
512
|
+
return :flat if values.size < 3
|
|
513
|
+
|
|
514
|
+
n = values.size
|
|
515
|
+
head = values.first(n / 3)
|
|
516
|
+
tail = values.last(n / 3)
|
|
517
|
+
head_avg = head.sum.to_f / head.size
|
|
518
|
+
tail_avg = tail.sum.to_f / tail.size
|
|
519
|
+
|
|
520
|
+
return :flat if head_avg.zero? && tail_avg.zero?
|
|
521
|
+
|
|
522
|
+
delta_ratio = (tail_avg - head_avg) / [head_avg, 1.0].max
|
|
523
|
+
if delta_ratio >= threshold_ratio
|
|
524
|
+
:up
|
|
525
|
+
elsif delta_ratio <= -threshold_ratio
|
|
526
|
+
:down
|
|
527
|
+
else
|
|
528
|
+
:flat
|
|
529
|
+
end
|
|
530
|
+
end
|
|
531
|
+
|
|
532
|
+
# Round-trip statistics across active partitions: how stale is the most-
|
|
533
|
+
# stale partition the tick has yet to revisit? P50/P95/oldest ages help
|
|
534
|
+
# decide if partition_batch_size needs to grow or ticks need sharding.
|
|
535
|
+
def partition_round_trip_stats(policy_name: nil)
|
|
536
|
+
filter_sql = "WHERE p.status = 'active' AND p.pending_count > 0"
|
|
537
|
+
params = []
|
|
538
|
+
if policy_name
|
|
539
|
+
filter_sql += " AND p.policy_name = $1"
|
|
540
|
+
params << policy_name
|
|
541
|
+
end
|
|
542
|
+
|
|
543
|
+
# For ages (now - last_checked_at) the percentile direction inverts:
|
|
544
|
+
# the 95th percentile of *age* corresponds to the 5th percentile of the
|
|
545
|
+
# *timestamp* (the oldest 5% of last_checked_at values). Computing the
|
|
546
|
+
# percentile directly on now()-last_checked_at would be cleaner but
|
|
547
|
+
# PostgreSQL's PERCENTILE_DISC needs an ordered set on a column, so we
|
|
548
|
+
# invert the percentile argument instead.
|
|
549
|
+
result = connection.exec_query(
|
|
550
|
+
<<~SQL.squish,
|
|
551
|
+
SELECT
|
|
552
|
+
COUNT(*)::int AS active_partitions,
|
|
553
|
+
COUNT(*) FILTER (WHERE p.last_checked_at IS NULL)::int AS never_checked,
|
|
554
|
+
COUNT(*) FILTER (WHERE p.next_eligible_at IS NOT NULL AND p.next_eligible_at > now())::int AS in_backoff,
|
|
555
|
+
EXTRACT(EPOCH FROM (now() - MIN(p.last_checked_at)))::float AS oldest_age_seconds,
|
|
556
|
+
EXTRACT(EPOCH FROM (now() - PERCENTILE_DISC(0.5) WITHIN GROUP (ORDER BY p.last_checked_at)))::float AS p50_age_seconds,
|
|
557
|
+
EXTRACT(EPOCH FROM (now() - PERCENTILE_DISC(0.05) WITHIN GROUP (ORDER BY p.last_checked_at)))::float AS p95_age_seconds
|
|
558
|
+
FROM #{PARTITIONS_TABLE} p
|
|
559
|
+
#{filter_sql}
|
|
560
|
+
SQL
|
|
561
|
+
"partition_round_trip_stats",
|
|
562
|
+
params
|
|
563
|
+
)
|
|
564
|
+
row = result.first || {}
|
|
565
|
+
{
|
|
566
|
+
active_partitions: row["active_partitions"].to_i,
|
|
567
|
+
never_checked: row["never_checked"].to_i,
|
|
568
|
+
in_backoff: row["in_backoff"].to_i,
|
|
569
|
+
oldest_age_seconds: row["oldest_age_seconds"]&.to_f,
|
|
570
|
+
p50_age_seconds: row["p50_age_seconds"]&.to_f,
|
|
571
|
+
p95_age_seconds: row["p95_age_seconds"]&.to_f
|
|
572
|
+
}
|
|
573
|
+
end
|
|
574
|
+
|
|
575
|
+
# ----- adaptive_concurrency stats -----------------------------------------
|
|
576
|
+
|
|
577
|
+
# Insert a fresh stats row for the given partition if none exists.
|
|
578
|
+
# Idempotent — runs as `INSERT … ON CONFLICT DO NOTHING`. Cheap to
|
|
579
|
+
# call on every admission so the gate's evaluate path can read
|
|
580
|
+
# current_max safely without checking for existence first.
|
|
581
|
+
def adaptive_seed!(policy_name:, partition_key:, initial_max:)
|
|
582
|
+
connection.exec_query(
|
|
583
|
+
<<~SQL.squish,
|
|
584
|
+
INSERT INTO #{ADAPTIVE_TABLE}
|
|
585
|
+
(policy_name, partition_key, current_max, ewma_latency_ms,
|
|
586
|
+
sample_count, created_at, updated_at)
|
|
587
|
+
VALUES ($1, $2, $3, 0, 0, now(), now())
|
|
588
|
+
ON CONFLICT (policy_name, partition_key) DO NOTHING
|
|
589
|
+
SQL
|
|
590
|
+
"adaptive_seed",
|
|
591
|
+
[policy_name, partition_key, initial_max.to_i]
|
|
592
|
+
)
|
|
593
|
+
end
|
|
594
|
+
|
|
595
|
+
# Fetch the AIMD-tuned cap for a partition. Returns nil when the
|
|
596
|
+
# row doesn't exist yet — caller should fall back to initial_max.
|
|
597
|
+
def adaptive_current_max(policy_name:, partition_key:)
|
|
598
|
+
result = connection.exec_query(
|
|
599
|
+
"SELECT current_max FROM #{ADAPTIVE_TABLE} WHERE policy_name = $1 AND partition_key = $2 LIMIT 1",
|
|
600
|
+
"adaptive_current_max",
|
|
601
|
+
[policy_name, partition_key]
|
|
602
|
+
)
|
|
603
|
+
row = result.first
|
|
604
|
+
row && row["current_max"].to_i
|
|
605
|
+
end
|
|
606
|
+
|
|
607
|
+
# Single-statement EWMA + AIMD update. Concurrent workers can call
|
|
608
|
+
# this in any order without read-modify-write races: every clause
|
|
609
|
+
# reads the row's current value at the start of the UPDATE.
|
|
610
|
+
#
|
|
611
|
+
# ewma_latency_ms_new = ewma_latency_ms * (1 - α) + α * queue_lag_ms
|
|
612
|
+
# current_max_new = GREATEST(min,
|
|
613
|
+
# FAILED? FLOOR(current_max * fail_factor)
|
|
614
|
+
# OVERLOADED? FLOOR(current_max * slow_factor)
|
|
615
|
+
# else current_max + 1)
|
|
616
|
+
def adaptive_record!(policy_name:, partition_key:, queue_lag_ms:, succeeded:,
|
|
617
|
+
alpha:, target_lag_ms:, fail_factor:, slow_factor:, min:)
|
|
618
|
+
connection.exec_query(
|
|
619
|
+
<<~SQL.squish,
|
|
620
|
+
UPDATE #{ADAPTIVE_TABLE}
|
|
621
|
+
SET
|
|
622
|
+
ewma_latency_ms = ewma_latency_ms * (1 - $3::double precision)
|
|
623
|
+
+ $3::double precision * $4::double precision,
|
|
624
|
+
sample_count = sample_count + 1,
|
|
625
|
+
current_max = GREATEST($5::int, CASE
|
|
626
|
+
WHEN $6::boolean = FALSE
|
|
627
|
+
THEN FLOOR(current_max * $7::double precision)::int
|
|
628
|
+
WHEN (ewma_latency_ms * (1 - $3::double precision)
|
|
629
|
+
+ $3::double precision * $4::double precision) > $8::double precision
|
|
630
|
+
THEN FLOOR(current_max * $9::double precision)::int
|
|
631
|
+
ELSE current_max + 1
|
|
632
|
+
END),
|
|
633
|
+
last_observed_at = now(),
|
|
634
|
+
updated_at = now()
|
|
635
|
+
WHERE policy_name = $1 AND partition_key = $2
|
|
636
|
+
SQL
|
|
637
|
+
"adaptive_record",
|
|
638
|
+
[policy_name, partition_key, alpha.to_f, queue_lag_ms.to_f,
|
|
639
|
+
min.to_i, succeeded ? true : false,
|
|
640
|
+
fail_factor.to_f, target_lag_ms.to_f, slow_factor.to_f]
|
|
641
|
+
)
|
|
642
|
+
end
|
|
643
|
+
|
|
644
|
+
# ----- tick samples sweep -------------------------------------------------
|
|
645
|
+
|
|
646
|
+
def sweep_old_tick_samples!(cutoff_seconds:)
|
|
647
|
+
connection.exec_query(
|
|
648
|
+
"DELETE FROM #{SAMPLES_TABLE} WHERE sampled_at < now() - ($1 || ' seconds')::interval",
|
|
649
|
+
"sweep_old_tick_samples",
|
|
650
|
+
[cutoff_seconds.to_i]
|
|
651
|
+
)
|
|
652
|
+
end
|
|
653
|
+
|
|
654
|
+
# ----------------------------------------------------------------------------
|
|
655
|
+
|
|
656
|
+
def sweep_inactive_partitions!(cutoff_seconds:)
|
|
657
|
+
connection.exec_query(
|
|
658
|
+
<<~SQL.squish,
|
|
659
|
+
DELETE FROM #{PARTITIONS_TABLE}
|
|
660
|
+
WHERE pending_count = 0
|
|
661
|
+
AND status = 'active'
|
|
662
|
+
AND (
|
|
663
|
+
(last_admit_at IS NOT NULL AND last_admit_at < now() - ($1 || ' seconds')::interval)
|
|
664
|
+
OR
|
|
665
|
+
(last_admit_at IS NULL AND created_at < now() - ($1 || ' seconds')::interval)
|
|
666
|
+
)
|
|
667
|
+
SQL
|
|
668
|
+
"sweep_inactive_partitions",
|
|
669
|
+
[cutoff_seconds.to_i]
|
|
670
|
+
)
|
|
671
|
+
end
|
|
672
|
+
|
|
673
|
+
# ----- helpers --------------------------------------------------------------
|
|
674
|
+
|
|
675
|
+
def normalize_partition(row)
|
|
676
|
+
out = {}
|
|
677
|
+
row.each { |k, v| out[k.to_s] = v }
|
|
678
|
+
out["context"] = parse_jsonb(out["context"])
|
|
679
|
+
out["gate_state"] = parse_jsonb(out["gate_state"])
|
|
680
|
+
out
|
|
681
|
+
end
|
|
682
|
+
|
|
683
|
+
def normalize_staged(row)
|
|
684
|
+
out = {}
|
|
685
|
+
row.each { |k, v| out[k.to_s] = v }
|
|
686
|
+
out["job_data"] = parse_jsonb(out["job_data"])
|
|
687
|
+
out["context"] = parse_jsonb(out["context"])
|
|
688
|
+
out
|
|
689
|
+
end
|
|
690
|
+
|
|
691
|
+
def parse_jsonb(value)
|
|
692
|
+
case value
|
|
693
|
+
when Hash, Array then value
|
|
694
|
+
when nil, "" then {}
|
|
695
|
+
else
|
|
696
|
+
begin
|
|
697
|
+
JSON.parse(value)
|
|
698
|
+
rescue JSON::ParserError
|
|
699
|
+
{}
|
|
700
|
+
end
|
|
701
|
+
end
|
|
702
|
+
end
|
|
703
|
+
|
|
704
|
+
def sample_filter(policy_name, since)
|
|
705
|
+
params = [since]
|
|
706
|
+
if policy_name
|
|
707
|
+
params << policy_name
|
|
708
|
+
["WHERE sampled_at >= $1 AND policy_name = $2", params]
|
|
709
|
+
else
|
|
710
|
+
["WHERE sampled_at >= $1", params]
|
|
711
|
+
end
|
|
712
|
+
end
|
|
713
|
+
|
|
714
|
+
def next_eligible_clause(retry_after)
|
|
715
|
+
if retry_after.nil?
|
|
716
|
+
["NULL", []]
|
|
717
|
+
else
|
|
718
|
+
# 5th param ($5) — caller appends params to those of the parent UPDATE
|
|
719
|
+
["now() + ($5 || ' seconds')::interval", [retry_after.to_f.round(3)]]
|
|
720
|
+
end
|
|
721
|
+
end
|
|
722
|
+
end
|
|
723
|
+
end
|