@lunora/scheduler 0.0.0 → 1.0.0-alpha.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE.md +105 -0
- package/README.md +140 -9
- package/__assets__/package-og.svg +14 -0
- package/dist/index.d.mts +785 -0
- package/dist/index.d.ts +785 -0
- package/dist/index.mjs +8 -0
- package/dist/packem_shared/SchedulerDO-BNzXNnS4.mjs +678 -0
- package/dist/packem_shared/assertValidCronExpression-BLfrDgmK.mjs +20 -0
- package/dist/packem_shared/compileCronSchedule-BaLlXJiN.mjs +123 -0
- package/dist/packem_shared/createCronTrigger-Cq9IBcWQ.mjs +27 -0
- package/dist/packem_shared/createQueueConsumer-DWahNPfz.mjs +59 -0
- package/dist/packem_shared/createScheduler-KCso4_at.mjs +63 -0
- package/dist/packem_shared/createWorkpool-D5cWPQmH.mjs +56 -0
- package/dist/packem_shared/isWorkflowReference-C9mQkMXt.mjs +3 -0
- package/package.json +40 -17
package/dist/index.mjs
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
export { default as createScheduler } from './packem_shared/createScheduler-KCso4_at.mjs';
|
|
2
|
+
export { default as createWorkpool } from './packem_shared/createWorkpool-D5cWPQmH.mjs';
|
|
3
|
+
export { createCronTrigger } from './packem_shared/createCronTrigger-Cq9IBcWQ.mjs';
|
|
4
|
+
export { CRON_SCHEDULE_KINDS, compileCronSchedule, cronJobs } from './packem_shared/compileCronSchedule-BaLlXJiN.mjs';
|
|
5
|
+
export { createQueueConsumer, createQueueWorkpool, httpDispatcher } from './packem_shared/createQueueConsumer-DWahNPfz.mjs';
|
|
6
|
+
export { SchedulerDO } from './packem_shared/SchedulerDO-BNzXNnS4.mjs';
|
|
7
|
+
export { isWorkflowReference } from './packem_shared/isWorkflowReference-C9mQkMXt.mjs';
|
|
8
|
+
export { assertValidCronExpression, isValidCronExpression } from './packem_shared/assertValidCronExpression-BLfrDgmK.mjs';
|
|
@@ -0,0 +1,678 @@
|
|
|
1
|
+
const HEADER_PREFIX = "id:";
|
|
2
|
+
const RETRY_PREFIX = "retry:";
|
|
3
|
+
const DEAD_PREFIX = "dead:";
|
|
4
|
+
const POOL_PREFIX = "pool:";
|
|
5
|
+
const MAX_RETRY_ATTEMPTS = 5;
|
|
6
|
+
const RETRY_BASE_DELAY_MS = 3e4;
|
|
7
|
+
const POOL_BACKPRESSURE_DELAY_MS = 1e3;
|
|
8
|
+
const MAX_SCHEDULED_FOR_MS = 864e13;
|
|
9
|
+
const TIME_PAD = 15;
|
|
10
|
+
const padTime = (n) => String(n).padStart(TIME_PAD, "0");
|
|
11
|
+
const generateId = () => {
|
|
12
|
+
const bytes = crypto.getRandomValues(new Uint8Array(12));
|
|
13
|
+
let binary = "";
|
|
14
|
+
for (const byte of bytes) {
|
|
15
|
+
binary += String.fromCodePoint(byte);
|
|
16
|
+
}
|
|
17
|
+
return btoa(binary).replaceAll("+", "-").replaceAll("/", "_").replaceAll("=", "");
|
|
18
|
+
};
|
|
19
|
+
class SchedulerDO {
|
|
20
|
+
static indexKey(scheduledFor, id) {
|
|
21
|
+
return `t:${padTime(scheduledFor)}:${id}`;
|
|
22
|
+
}
|
|
23
|
+
static json(body, status = 200) {
|
|
24
|
+
return Response.json(body, { headers: { "content-type": "application/json" }, status });
|
|
25
|
+
}
|
|
26
|
+
static error(status, code, message) {
|
|
27
|
+
return SchedulerDO.json({ error: { code, message } }, status);
|
|
28
|
+
}
|
|
29
|
+
/**
|
|
30
|
+
* Resolve the effective retry parameters for a record: its per-job
|
|
31
|
+
* {@link RetryPolicy} merged over the DO's built-in defaults. Callers that
|
|
32
|
+
* never set `record.retry` get today's behaviour verbatim
|
|
33
|
+
* (`maxAttempts: 5`, exponential, `baseMs: 30_000`, no ceiling).
|
|
34
|
+
*/
|
|
35
|
+
static resolveRetry(record) {
|
|
36
|
+
const policy = record.retry;
|
|
37
|
+
const maxAttempts = typeof policy?.maxAttempts === "number" && Number.isInteger(policy.maxAttempts) && policy.maxAttempts > 0 ? policy.maxAttempts : MAX_RETRY_ATTEMPTS;
|
|
38
|
+
const baseMs = typeof policy?.baseMs === "number" && Number.isFinite(policy.baseMs) && policy.baseMs >= 0 ? policy.baseMs : RETRY_BASE_DELAY_MS;
|
|
39
|
+
const backoff = policy?.backoff === "linear" ? "linear" : "exponential";
|
|
40
|
+
const maxMs = typeof policy?.maxMs === "number" && Number.isFinite(policy.maxMs) && policy.maxMs >= 0 ? policy.maxMs : void 0;
|
|
41
|
+
return { backoff, baseMs, maxAttempts, maxMs };
|
|
42
|
+
}
|
|
43
|
+
/** Clamp an untrusted `maxConcurrency` to a positive integer, else fall back. */
|
|
44
|
+
static normalizeConcurrency(value, fallback) {
|
|
45
|
+
return typeof value === "number" && Number.isInteger(value) && value > 0 ? value : fallback;
|
|
46
|
+
}
|
|
47
|
+
/**
|
|
48
|
+
* Sanitize an untrusted retry policy from the wire into a `RetryPolicy` (or
|
|
49
|
+
* `undefined` when nothing valid was provided). Keeps obviously-bad values
|
|
50
|
+
* out of storage so {@link SchedulerDO.resolveRetry} never has to re-guard.
|
|
51
|
+
* @returns The normalized policy, or `undefined` if no valid policy was found.
|
|
52
|
+
*/
|
|
53
|
+
static normalizeRetry(value) {
|
|
54
|
+
if (typeof value !== "object" || value === null) {
|
|
55
|
+
return void 0;
|
|
56
|
+
}
|
|
57
|
+
const raw = value;
|
|
58
|
+
const policy = {};
|
|
59
|
+
if (typeof raw.maxAttempts === "number" && Number.isInteger(raw.maxAttempts) && raw.maxAttempts > 0) {
|
|
60
|
+
policy.maxAttempts = raw.maxAttempts;
|
|
61
|
+
}
|
|
62
|
+
if (typeof raw.baseMs === "number" && Number.isFinite(raw.baseMs) && raw.baseMs >= 0) {
|
|
63
|
+
policy.baseMs = raw.baseMs;
|
|
64
|
+
}
|
|
65
|
+
if (raw.backoff === "exponential" || raw.backoff === "linear") {
|
|
66
|
+
policy.backoff = raw.backoff;
|
|
67
|
+
}
|
|
68
|
+
if (typeof raw.maxMs === "number" && Number.isFinite(raw.maxMs) && raw.maxMs >= 0) {
|
|
69
|
+
policy.maxMs = raw.maxMs;
|
|
70
|
+
}
|
|
71
|
+
return Object.keys(policy).length === 0 ? void 0 : policy;
|
|
72
|
+
}
|
|
73
|
+
/**
|
|
74
|
+
* Idempotently release the slot held by `jobId`, returning the updated
|
|
75
|
+
* {@link PoolState} (pure — the caller persists it). A duplicate release for
|
|
76
|
+
* an id that no longer holds a slot is a no-op, so an at-least-once
|
|
77
|
+
* `/complete` (or a complete racing a failed-kick release) can never push
|
|
78
|
+
* `inFlight` below the true number of running jobs and oversubscribe the
|
|
79
|
+
* pool. Pools persisted before `inFlightIds` existed fall back to a clamped
|
|
80
|
+
* counter decrement.
|
|
81
|
+
*/
|
|
82
|
+
static releaseSlot(pool, jobId) {
|
|
83
|
+
if (pool.inFlightIds === void 0) {
|
|
84
|
+
return { ...pool, inFlight: Math.max(0, pool.inFlight - 1) };
|
|
85
|
+
}
|
|
86
|
+
const next = pool.inFlightIds.filter((id) => id !== jobId);
|
|
87
|
+
return { ...pool, inFlight: next.length, inFlightIds: next };
|
|
88
|
+
}
|
|
89
|
+
/**
|
|
90
|
+
* Best-effort release with no job id (legacy `/complete` payloads). Drops one
|
|
91
|
+
* tracked id if the set exists, else clamps the counter. Less precise than
|
|
92
|
+
* {@link SchedulerDO.releaseSlot} — a duplicate id-less complete CAN
|
|
93
|
+
* over-release — but every current client sends the id, so this is the
|
|
94
|
+
* compatibility shim, not the hot path.
|
|
95
|
+
*/
|
|
96
|
+
static releaseFirstSlot(pool) {
|
|
97
|
+
if (pool.inFlightIds === void 0) {
|
|
98
|
+
return { ...pool, inFlight: Math.max(0, pool.inFlight - 1) };
|
|
99
|
+
}
|
|
100
|
+
const next = pool.inFlightIds.slice(0, Math.max(0, pool.inFlightIds.length - 1));
|
|
101
|
+
return { ...pool, inFlight: next.length, inFlightIds: next };
|
|
102
|
+
}
|
|
103
|
+
state;
|
|
104
|
+
env;
|
|
105
|
+
constructor(state, env) {
|
|
106
|
+
this.state = state;
|
|
107
|
+
this.env = env;
|
|
108
|
+
}
|
|
109
|
+
async fetch(request) {
|
|
110
|
+
const url = new URL(request.url);
|
|
111
|
+
if (url.pathname === "/ws" && request.headers.get("Upgrade") === "websocket") {
|
|
112
|
+
return this.handleWebSocketUpgrade();
|
|
113
|
+
}
|
|
114
|
+
switch (`${request.method} ${url.pathname}`) {
|
|
115
|
+
case "GET /dead": {
|
|
116
|
+
return this.handleDeadList();
|
|
117
|
+
}
|
|
118
|
+
case "GET /get": {
|
|
119
|
+
return this.handleGet(url);
|
|
120
|
+
}
|
|
121
|
+
case "GET /list": {
|
|
122
|
+
return this.handleList();
|
|
123
|
+
}
|
|
124
|
+
case "GET /pool": {
|
|
125
|
+
return this.handlePoolStatus(url);
|
|
126
|
+
}
|
|
127
|
+
case "GET /status": {
|
|
128
|
+
return this.handleStatus();
|
|
129
|
+
}
|
|
130
|
+
case "POST /cancel": {
|
|
131
|
+
return this.handleCancel(request);
|
|
132
|
+
}
|
|
133
|
+
case "POST /complete": {
|
|
134
|
+
return this.handleComplete(request);
|
|
135
|
+
}
|
|
136
|
+
case "POST /dead/cancel": {
|
|
137
|
+
return this.handleDeadCancel(request);
|
|
138
|
+
}
|
|
139
|
+
case "POST /dead/retry": {
|
|
140
|
+
return this.handleDeadRetry(request);
|
|
141
|
+
}
|
|
142
|
+
case "POST /schedule": {
|
|
143
|
+
return this.handleSchedule(request);
|
|
144
|
+
}
|
|
145
|
+
}
|
|
146
|
+
return Response.json(
|
|
147
|
+
{ error: { code: "NOT_FOUND" } },
|
|
148
|
+
{
|
|
149
|
+
headers: { "content-type": "application/json" },
|
|
150
|
+
status: 404
|
|
151
|
+
}
|
|
152
|
+
);
|
|
153
|
+
}
|
|
154
|
+
/** Called by the Workers runtime when the alarm previously set by `_rescheduleAlarm()` fires. */
|
|
155
|
+
async alarm() {
|
|
156
|
+
const now = Date.now();
|
|
157
|
+
const due = [];
|
|
158
|
+
const indexEntries = await this.state.storage.list({
|
|
159
|
+
end: `t:${padTime(now)}:~`,
|
|
160
|
+
limit: 100,
|
|
161
|
+
prefix: "t:"
|
|
162
|
+
});
|
|
163
|
+
for (const [indexKey, recordId] of indexEntries.entries()) {
|
|
164
|
+
const dueAt = Number.parseInt(indexKey.slice(2, indexKey.indexOf(":", 2)), 10);
|
|
165
|
+
if (Number.isFinite(dueAt) && dueAt <= now) {
|
|
166
|
+
const record = await this.state.storage.get(`${HEADER_PREFIX}${recordId}`);
|
|
167
|
+
if (record) {
|
|
168
|
+
due.push(record);
|
|
169
|
+
}
|
|
170
|
+
}
|
|
171
|
+
}
|
|
172
|
+
const pools = /* @__PURE__ */ new Map();
|
|
173
|
+
try {
|
|
174
|
+
for (const record of due) {
|
|
175
|
+
await this.drainRecordGuarded(record, pools);
|
|
176
|
+
}
|
|
177
|
+
} finally {
|
|
178
|
+
await this.rescheduleAlarm();
|
|
179
|
+
}
|
|
180
|
+
if (due.length > 0) {
|
|
181
|
+
await this.broadcastChange();
|
|
182
|
+
}
|
|
183
|
+
}
|
|
184
|
+
/**
|
|
185
|
+
* Internal dispatch hook; overridden in unit tests to capture the outgoing
|
|
186
|
+
* request. Returns `true` ONLY on an explicit 2xx response (`response.ok`).
|
|
187
|
+
* Anything else — a network failure, a 5xx, OR a non-2xx such as 404
|
|
188
|
+
* (receiver route not mounted) / 401 / 403 / 4xx — returns `false` and
|
|
189
|
+
* enters the retry pipeline via {@link recordRetry}. Treating 4xx as
|
|
190
|
+
* success used to permanently delete the job; since the receiver may simply
|
|
191
|
+
* be missing (404) or transiently failing, we retry rather than silently
|
|
192
|
+
* drop. After {@link MAX_RETRY_ATTEMPTS} the record is parked under a
|
|
193
|
+
* `dead:` key for inspection — never silently deleted.
|
|
194
|
+
*
|
|
195
|
+
* The dispatch target is taken from `env.LUNORA_ORIGIN_URL` (NOT from the
|
|
196
|
+
* stored record) to prevent SSRF via a forged `originUrl` on the schedule
|
|
197
|
+
* request. If that env var is missing at fire time (a deploy/binding
|
|
198
|
+
* regression — schedule time already enforced its presence) we return
|
|
199
|
+
* `false` so the record is retried rather than silently dropped.
|
|
200
|
+
*/
|
|
201
|
+
async dispatch(record) {
|
|
202
|
+
const originUrl = typeof this.env.LUNORA_ORIGIN_URL === "string" && this.env.LUNORA_ORIGIN_URL.length > 0 ? this.env.LUNORA_ORIGIN_URL : void 0;
|
|
203
|
+
if (!originUrl) {
|
|
204
|
+
return false;
|
|
205
|
+
}
|
|
206
|
+
const body = JSON.stringify({
|
|
207
|
+
args: record.args,
|
|
208
|
+
functionPath: record.functionPath,
|
|
209
|
+
id: record.id,
|
|
210
|
+
// Echoed so the receiver can call back the SAME DO instance.
|
|
211
|
+
instanceName: record.instanceName,
|
|
212
|
+
// When the job belongs to a workpool, the receiver must report
|
|
213
|
+
// completion back to the SchedulerDO (`POST /complete { pool, id }`)
|
|
214
|
+
// so the pool's concurrency slot is released — see handleComplete().
|
|
215
|
+
pool: record.pool,
|
|
216
|
+
scheduledFor: record.scheduledFor,
|
|
217
|
+
shardKey: record.shardKey
|
|
218
|
+
});
|
|
219
|
+
try {
|
|
220
|
+
const headers = { "content-type": "application/json" };
|
|
221
|
+
const signature = await this.signDispatch(body);
|
|
222
|
+
if (signature !== void 0) {
|
|
223
|
+
headers["x-lunora-scheduler-signature"] = signature;
|
|
224
|
+
} else if (typeof this.env.LUNORA_ADMIN_TOKEN === "string" && this.env.LUNORA_ADMIN_TOKEN.length > 0) {
|
|
225
|
+
headers.authorization = `Bearer ${this.env.LUNORA_ADMIN_TOKEN}`;
|
|
226
|
+
}
|
|
227
|
+
const response = await fetch(`${originUrl}/_lunora/scheduler/dispatch`, {
|
|
228
|
+
body,
|
|
229
|
+
headers,
|
|
230
|
+
method: "POST"
|
|
231
|
+
});
|
|
232
|
+
return response.ok;
|
|
233
|
+
} catch {
|
|
234
|
+
return false;
|
|
235
|
+
}
|
|
236
|
+
}
|
|
237
|
+
/**
|
|
238
|
+
* Claim + drain one due record with per-record fault isolation, so a storage
|
|
239
|
+
* throw can never abort the whole alarm pass (which would skip the remaining
|
|
240
|
+
* due records and the `rescheduleAlarm()` that re-arms the clock).
|
|
241
|
+
*
|
|
242
|
+
* Claims the job by deleting its time-index entry BEFORE dispatch (an alarm
|
|
243
|
+
* re-fire then won't pick it up again), runs {@link drainRecord}, and on a
|
|
244
|
+
* thrown storage op decides whether the job stays re-fireable.
|
|
245
|
+
*
|
|
246
|
+
* When the record was NOT successfully dispatched, re-assert the time-index
|
|
247
|
+
* claim so a later alarm re-attempts it (at-least-once): the claim delete may
|
|
248
|
+
* have removed it and recordRetry()/requeuePooled() may not have re-armed it
|
|
249
|
+
* before throwing, and re-inserting the same key is idempotent, so a
|
|
250
|
+
* surviving claim is simply rewritten to its prior value. When the record WAS
|
|
251
|
+
* dispatched (the throw came from post-dispatch cleanup), leave the index
|
|
252
|
+
* deleted so the already-kicked, idempotent job is not re-fired.
|
|
253
|
+
*/
|
|
254
|
+
async drainRecordGuarded(record, pools) {
|
|
255
|
+
let dispatched = false;
|
|
256
|
+
try {
|
|
257
|
+
await this.state.storage.delete(SchedulerDO.indexKey(record.scheduledFor, record.id));
|
|
258
|
+
dispatched = await this.drainRecord(record, pools);
|
|
259
|
+
} catch {
|
|
260
|
+
if (!dispatched) {
|
|
261
|
+
try {
|
|
262
|
+
await this.state.storage.put(SchedulerDO.indexKey(record.scheduledFor, record.id), record.id);
|
|
263
|
+
} catch {
|
|
264
|
+
}
|
|
265
|
+
}
|
|
266
|
+
}
|
|
267
|
+
}
|
|
268
|
+
/**
|
|
269
|
+
* Process one due (already index-claimed) record within an alarm drain:
|
|
270
|
+
* apply the workpool concurrency gate, dispatch, and settle the result.
|
|
271
|
+
* A saturated pool re-arms the job (backpressure, no attempt charged); a
|
|
272
|
+
* free slot is reserved durably before dispatch and released immediately if
|
|
273
|
+
* the kick fails (success holds it until the runtime reports completion).
|
|
274
|
+
* Success clears the `id:`/`retry:` rows; failure routes to
|
|
275
|
+
* {@link recordRetry}. `pools` caches each pool's {@link PoolState} for the
|
|
276
|
+
* lifetime of the drain so the budget decrements without re-reading storage.
|
|
277
|
+
* @returns `true` only when the record was successfully dispatched (a 2xx
|
|
278
|
+
* kick). The caller ({@link drainRecordGuarded}) uses this in its per-record
|
|
279
|
+
* error guard: a record that returns `true` (or whose post-dispatch cleanup
|
|
280
|
+
* later throws) must NOT have its time-index claim restored, since re-firing
|
|
281
|
+
* an already-kicked job would break idempotency. A `false` return (pool
|
|
282
|
+
* backpressure or a failed dispatch) means the job is still re-fireable —
|
|
283
|
+
* either already re-armed here, or, if a throw escapes, re-claimed by the
|
|
284
|
+
* guard's catch.
|
|
285
|
+
*/
|
|
286
|
+
async drainRecord(record, pools) {
|
|
287
|
+
const reserved = await this.reservePoolSlot(record, pools);
|
|
288
|
+
if (!reserved) {
|
|
289
|
+
return false;
|
|
290
|
+
}
|
|
291
|
+
const ok = await this.dispatch(record);
|
|
292
|
+
if (!ok && record.pool !== void 0) {
|
|
293
|
+
const pool = pools.get(record.pool);
|
|
294
|
+
if (pool !== void 0) {
|
|
295
|
+
const released = SchedulerDO.releaseSlot(pool, record.id);
|
|
296
|
+
pools.set(record.pool, released);
|
|
297
|
+
await this.savePool(record.pool, released);
|
|
298
|
+
}
|
|
299
|
+
}
|
|
300
|
+
if (ok) {
|
|
301
|
+
try {
|
|
302
|
+
await this.state.storage.delete([`${HEADER_PREFIX}${record.id}`, `${RETRY_PREFIX}${record.id}`]);
|
|
303
|
+
} catch {
|
|
304
|
+
}
|
|
305
|
+
return true;
|
|
306
|
+
}
|
|
307
|
+
await this.recordRetry(record);
|
|
308
|
+
return false;
|
|
309
|
+
}
|
|
310
|
+
/**
|
|
311
|
+
* Concurrency gate for a pooled record. Returns `false` (and re-arms the
|
|
312
|
+
* job via {@link requeuePooled}) when the pool is at `maxConcurrency`;
|
|
313
|
+
* otherwise reserves a slot durably and returns `true`. Non-pooled records
|
|
314
|
+
* always return `true` without touching any pool state.
|
|
315
|
+
*/
|
|
316
|
+
async reservePoolSlot(record, pools) {
|
|
317
|
+
if (record.pool === void 0) {
|
|
318
|
+
return true;
|
|
319
|
+
}
|
|
320
|
+
const pool = pools.get(record.pool) ?? await this.loadPool(record.pool);
|
|
321
|
+
pools.set(record.pool, pool);
|
|
322
|
+
if (pool.inFlight >= pool.maxConcurrency) {
|
|
323
|
+
await this.requeuePooled(record);
|
|
324
|
+
return false;
|
|
325
|
+
}
|
|
326
|
+
const ids = pool.inFlightIds ?? [];
|
|
327
|
+
if (!ids.includes(record.id)) {
|
|
328
|
+
ids.push(record.id);
|
|
329
|
+
}
|
|
330
|
+
pool.inFlightIds = ids;
|
|
331
|
+
pool.inFlight = ids.length;
|
|
332
|
+
await this.savePool(record.pool, pool);
|
|
333
|
+
return true;
|
|
334
|
+
}
|
|
335
|
+
/**
|
|
336
|
+
* Accept a hibernatable live subscription to the job list. The scheduler has
|
|
337
|
+
* exactly one subscription shape (the whole list), so there's no per-socket
|
|
338
|
+
* registry or dependency tracking — every accepted socket gets the full list
|
|
339
|
+
* on connect and on every change. The worker is responsible for gating the
|
|
340
|
+
* upgrade behind the admin token before it reaches here.
|
|
341
|
+
*/
|
|
342
|
+
async handleWebSocketUpgrade() {
|
|
343
|
+
if (this.state.acceptWebSocket === void 0) {
|
|
344
|
+
return SchedulerDO.error(501, "WS_UNSUPPORTED", "WebSocket subscriptions are not supported in this runtime");
|
|
345
|
+
}
|
|
346
|
+
const pair = new WebSocketPair();
|
|
347
|
+
const client = pair[0];
|
|
348
|
+
const server = pair[1];
|
|
349
|
+
this.state.acceptWebSocket(server);
|
|
350
|
+
server.send(JSON.stringify({ records: await this.listRecords(), type: "jobs" }));
|
|
351
|
+
return new Response(null, { status: 101, webSocket: client });
|
|
352
|
+
}
|
|
353
|
+
/**
|
|
354
|
+
* Re-list the jobs and push them to every connected subscriber. Called after
|
|
355
|
+
* any change (schedule / cancel / alarm-fire) so live studios reflect it
|
|
356
|
+
* immediately. A no-op when the runtime doesn't support hibernated sockets.
|
|
357
|
+
*/
|
|
358
|
+
async broadcastChange() {
|
|
359
|
+
const sockets = this.state.getWebSockets?.();
|
|
360
|
+
if (sockets === void 0 || sockets.length === 0) {
|
|
361
|
+
return;
|
|
362
|
+
}
|
|
363
|
+
const message = JSON.stringify({ records: await this.listRecords(), type: "jobs" });
|
|
364
|
+
for (const socket of sockets) {
|
|
365
|
+
try {
|
|
366
|
+
socket.send(message);
|
|
367
|
+
} catch {
|
|
368
|
+
}
|
|
369
|
+
}
|
|
370
|
+
}
|
|
371
|
+
/** The current pending job records (shared by `/list` and the live channel). */
|
|
372
|
+
async listRecords() {
|
|
373
|
+
const entries = await this.state.storage.list({ prefix: HEADER_PREFIX });
|
|
374
|
+
return [...entries.values()];
|
|
375
|
+
}
|
|
376
|
+
/**
|
|
377
|
+
* HMAC-SHA-256 sign the dispatch body with `env.LUNORA_SCHEDULER_SECRET`,
|
|
378
|
+
* returning a base64url signature, or `undefined` when no secret is
|
|
379
|
+
* configured. Mirrors `@lunora/storage`'s signed-URL HMAC pattern (WebCrypto
|
|
380
|
+
* `crypto.subtle`, available in workerd).
|
|
381
|
+
*/
|
|
382
|
+
async signDispatch(body) {
|
|
383
|
+
const secret = typeof this.env.LUNORA_SCHEDULER_SECRET === "string" ? this.env.LUNORA_SCHEDULER_SECRET : void 0;
|
|
384
|
+
if (!secret || secret.length === 0) {
|
|
385
|
+
return void 0;
|
|
386
|
+
}
|
|
387
|
+
const encoder = new TextEncoder();
|
|
388
|
+
const key = await crypto.subtle.importKey("raw", encoder.encode(secret), { hash: "SHA-256", name: "HMAC" }, false, ["sign"]);
|
|
389
|
+
const signature = await crypto.subtle.sign("HMAC", key, encoder.encode(body));
|
|
390
|
+
const bytes = new Uint8Array(signature);
|
|
391
|
+
let binary = "";
|
|
392
|
+
for (const byte of bytes) {
|
|
393
|
+
binary += String.fromCodePoint(byte);
|
|
394
|
+
}
|
|
395
|
+
return btoa(binary).replaceAll("+", "-").replaceAll("/", "_").replaceAll("=", "");
|
|
396
|
+
}
|
|
397
|
+
/**
|
|
398
|
+
* Move a failed record into the retry pipeline with configurable backoff.
|
|
399
|
+
* The retry budget/backoff comes from the record's {@link RetryPolicy}
|
|
400
|
+
* (falling back to the DO defaults); on exhaustion the record is parked
|
|
401
|
+
* under a `dead:` key for manual inspection.
|
|
402
|
+
*/
|
|
403
|
+
async recordRetry(record) {
|
|
404
|
+
const attempts = (record.attempts ?? 0) + 1;
|
|
405
|
+
const { backoff, baseMs, maxAttempts, maxMs } = SchedulerDO.resolveRetry(record);
|
|
406
|
+
if (attempts > maxAttempts) {
|
|
407
|
+
await this.state.storage.put(`${DEAD_PREFIX}${record.id}`, { ...record, attempts });
|
|
408
|
+
await this.state.storage.delete([`${RETRY_PREFIX}${record.id}`, `${HEADER_PREFIX}${record.id}`]);
|
|
409
|
+
return;
|
|
410
|
+
}
|
|
411
|
+
const rawDelay = backoff === "linear" ? baseMs * attempts : baseMs * 2 ** (attempts - 1);
|
|
412
|
+
const delayMs = maxMs === void 0 ? rawDelay : Math.min(rawDelay, maxMs);
|
|
413
|
+
const nextScheduledFor = Date.now() + delayMs;
|
|
414
|
+
const retryRecord = {
|
|
415
|
+
...record,
|
|
416
|
+
attempts,
|
|
417
|
+
scheduledFor: nextScheduledFor
|
|
418
|
+
};
|
|
419
|
+
await this.state.storage.put(`${RETRY_PREFIX}${record.id}`, retryRecord);
|
|
420
|
+
await this.state.storage.put(`${HEADER_PREFIX}${record.id}`, retryRecord);
|
|
421
|
+
await this.state.storage.put(SchedulerDO.indexKey(nextScheduledFor, record.id), record.id);
|
|
422
|
+
}
|
|
423
|
+
/** Read the durable `pool:<name>` row, defaulting to a fresh `inFlight: 0` pool. */
|
|
424
|
+
async loadPool(name, maxConcurrencyHint) {
|
|
425
|
+
const stored = await this.state.storage.get(`${POOL_PREFIX}${name}`);
|
|
426
|
+
if (stored !== void 0) {
|
|
427
|
+
if (Array.isArray(stored.inFlightIds)) {
|
|
428
|
+
return { inFlight: stored.inFlightIds.length, inFlightIds: [...stored.inFlightIds], maxConcurrency: stored.maxConcurrency };
|
|
429
|
+
}
|
|
430
|
+
return { inFlight: Math.max(0, stored.inFlight), maxConcurrency: stored.maxConcurrency };
|
|
431
|
+
}
|
|
432
|
+
return { inFlight: 0, inFlightIds: [], maxConcurrency: SchedulerDO.normalizeConcurrency(maxConcurrencyHint, 1) };
|
|
433
|
+
}
|
|
434
|
+
async savePool(name, pool) {
|
|
435
|
+
await this.state.storage.put(`${POOL_PREFIX}${name}`, pool);
|
|
436
|
+
}
|
|
437
|
+
/**
|
|
438
|
+
* Re-arm a pooled job that couldn't run because its pool was at capacity.
|
|
439
|
+
* No attempt is charged (this is backpressure, not a failure): the job is
|
|
440
|
+
* pushed `POOL_BACKPRESSURE_DELAY_MS` into the future so a later alarm
|
|
441
|
+
* drains it once a slot frees, keeping its `id:` header and retry policy.
|
|
442
|
+
*/
|
|
443
|
+
async requeuePooled(record) {
|
|
444
|
+
const nextScheduledFor = Date.now() + POOL_BACKPRESSURE_DELAY_MS;
|
|
445
|
+
const requeued = { ...record, scheduledFor: nextScheduledFor };
|
|
446
|
+
await this.state.storage.put(`${HEADER_PREFIX}${record.id}`, requeued);
|
|
447
|
+
await this.state.storage.put(SchedulerDO.indexKey(nextScheduledFor, record.id), record.id);
|
|
448
|
+
}
|
|
449
|
+
/**
|
|
450
|
+
* Release a pool slot when the runtime reports an action finished. This is
|
|
451
|
+
* the durable-semaphore decrement: dispatch() only KICKS the action and
|
|
452
|
+
* holds the slot; the runtime calls back here (`POST /complete { id }`) once
|
|
453
|
+
* the action settles, freeing the slot for the next queued job. Idempotent
|
|
454
|
+
* and safe if the job/pool is already gone.
|
|
455
|
+
*/
|
|
456
|
+
async handleComplete(request) {
|
|
457
|
+
const body = await request.json().catch(() => void 0);
|
|
458
|
+
const poolName = typeof body?.pool === "string" && body.pool.length > 0 ? body.pool : void 0;
|
|
459
|
+
const jobId = typeof body?.id === "string" && body.id.length > 0 ? body.id : void 0;
|
|
460
|
+
if (poolName === void 0) {
|
|
461
|
+
return SchedulerDO.error(400, "INVALID_INPUT", "pool is required");
|
|
462
|
+
}
|
|
463
|
+
const pool = await this.loadPool(poolName);
|
|
464
|
+
const next = jobId === void 0 ? SchedulerDO.releaseFirstSlot(pool) : SchedulerDO.releaseSlot(pool, jobId);
|
|
465
|
+
await this.savePool(poolName, next);
|
|
466
|
+
await this.armAlarmIfEarlier(Date.now());
|
|
467
|
+
return SchedulerDO.json({ inFlight: next.inFlight });
|
|
468
|
+
}
|
|
469
|
+
/** `GET /pool?name=` — inspect a pool's slot usage + queued count. */
|
|
470
|
+
async handlePoolStatus(url) {
|
|
471
|
+
const name = url.searchParams.get("name");
|
|
472
|
+
if (name === null || name.length === 0) {
|
|
473
|
+
return SchedulerDO.error(400, "INVALID_INPUT", "name is required");
|
|
474
|
+
}
|
|
475
|
+
const pool = await this.loadPool(name);
|
|
476
|
+
const headers = await this.state.storage.list({ prefix: HEADER_PREFIX });
|
|
477
|
+
let queued = 0;
|
|
478
|
+
for (const record of headers.values()) {
|
|
479
|
+
if (record.pool === name) {
|
|
480
|
+
queued += 1;
|
|
481
|
+
}
|
|
482
|
+
}
|
|
483
|
+
return SchedulerDO.json({ inFlight: pool.inFlight, maxConcurrency: pool.maxConcurrency, queued });
|
|
484
|
+
}
|
|
485
|
+
/**
|
|
486
|
+
* `GET /status` — the app-level backlog signal that powers the studio's
|
|
487
|
+
* SLO view. Enumerates every durable `pool:<name>` row for its `inFlight`/
|
|
488
|
+
* `maxConcurrency` semaphore, counts the pending (not-yet-dispatched) jobs
|
|
489
|
+
* routed to each pool with the same single-pass scan {@link handlePoolStatus}
|
|
490
|
+
* uses, and rolls those up into app-wide `backlog` (sum of `queued`) and
|
|
491
|
+
* `inFlight` (sum of held slots) totals.
|
|
492
|
+
*
|
|
493
|
+
* Pools that have rows but no queued jobs still appear (with `queued: 0`) so
|
|
494
|
+
* a saturated-but-idle pool stays visible; a pool that only ever existed as
|
|
495
|
+
* queued jobs without a persisted row is unreachable here (the schedule path
|
|
496
|
+
* always writes a `pool:<name>` row before the job's header), so a single
|
|
497
|
+
* scan over `pool:`/`id:` is sufficient.
|
|
498
|
+
*/
|
|
499
|
+
async handleStatus() {
|
|
500
|
+
const poolRows = await this.state.storage.list({ prefix: POOL_PREFIX });
|
|
501
|
+
const headers = await this.state.storage.list({ prefix: HEADER_PREFIX });
|
|
502
|
+
const queuedByPool = /* @__PURE__ */ new Map();
|
|
503
|
+
for (const record of headers.values()) {
|
|
504
|
+
if (record.pool !== void 0) {
|
|
505
|
+
queuedByPool.set(record.pool, (queuedByPool.get(record.pool) ?? 0) + 1);
|
|
506
|
+
}
|
|
507
|
+
}
|
|
508
|
+
const pools = [];
|
|
509
|
+
let backlog = 0;
|
|
510
|
+
let inFlight = 0;
|
|
511
|
+
for (const [key, pool] of poolRows.entries()) {
|
|
512
|
+
const name = key.slice(POOL_PREFIX.length);
|
|
513
|
+
const slots = Math.max(0, pool.inFlight);
|
|
514
|
+
const queued = queuedByPool.get(name) ?? 0;
|
|
515
|
+
pools.push({ inFlight: slots, maxConcurrency: pool.maxConcurrency, name, queued });
|
|
516
|
+
backlog += queued;
|
|
517
|
+
inFlight += slots;
|
|
518
|
+
}
|
|
519
|
+
const status = { backlog, inFlight, pools };
|
|
520
|
+
return SchedulerDO.json(status);
|
|
521
|
+
}
|
|
522
|
+
async handleSchedule(request) {
|
|
523
|
+
const body = await request.json().catch(() => void 0);
|
|
524
|
+
if (!body || typeof body.functionPath !== "string") {
|
|
525
|
+
return SchedulerDO.error(400, "INVALID_INPUT", "functionPath is required");
|
|
526
|
+
}
|
|
527
|
+
if (typeof body.scheduledFor !== "number" || !Number.isInteger(body.scheduledFor) || body.scheduledFor <= 0 || body.scheduledFor > MAX_SCHEDULED_FOR_MS) {
|
|
528
|
+
return SchedulerDO.error(400, "INVALID_INPUT", "scheduledFor must be a positive integer epoch-millisecond number no greater than 8640000000000000");
|
|
529
|
+
}
|
|
530
|
+
if (typeof this.env.LUNORA_ORIGIN_URL !== "string" || this.env.LUNORA_ORIGIN_URL.length === 0) {
|
|
531
|
+
return SchedulerDO.error(500, "ORIGIN_NOT_CONFIGURED", "LUNORA_ORIGIN_URL env binding must be set on the SchedulerDO");
|
|
532
|
+
}
|
|
533
|
+
const pool = typeof body.pool === "string" && body.pool.length > 0 ? body.pool : void 0;
|
|
534
|
+
const instanceName = typeof body.instanceName === "string" && body.instanceName.length > 0 ? body.instanceName : void 0;
|
|
535
|
+
const retry = SchedulerDO.normalizeRetry(body.retry);
|
|
536
|
+
const id = generateId();
|
|
537
|
+
const record = {
|
|
538
|
+
// body is parsed from an untrusted request; args may be absent at runtime
|
|
539
|
+
// despite the type, so the ?? fallback is a real guard.
|
|
540
|
+
// eslint-disable-next-line @typescript-eslint/no-unnecessary-condition -- parsed wire data can omit args
|
|
541
|
+
args: body.args ?? {},
|
|
542
|
+
enqueuedAt: Date.now(),
|
|
543
|
+
functionPath: body.functionPath,
|
|
544
|
+
id,
|
|
545
|
+
...instanceName === void 0 ? {} : { instanceName },
|
|
546
|
+
...pool === void 0 ? {} : { pool },
|
|
547
|
+
...retry === void 0 ? {} : { retry },
|
|
548
|
+
scheduledFor: body.scheduledFor,
|
|
549
|
+
shardKey: body.shardKey
|
|
550
|
+
};
|
|
551
|
+
if (pool !== void 0) {
|
|
552
|
+
const current = await this.loadPool(pool, body.maxConcurrency);
|
|
553
|
+
await this.savePool(pool, {
|
|
554
|
+
inFlight: current.inFlight,
|
|
555
|
+
// Preserve the in-flight id set so refreshing the cap on a new
|
|
556
|
+
// enqueue can't wipe the held-slot bookkeeping (which would let
|
|
557
|
+
// a later /complete over-release).
|
|
558
|
+
...current.inFlightIds === void 0 ? {} : { inFlightIds: current.inFlightIds },
|
|
559
|
+
maxConcurrency: SchedulerDO.normalizeConcurrency(body.maxConcurrency, current.maxConcurrency)
|
|
560
|
+
});
|
|
561
|
+
}
|
|
562
|
+
await this.state.storage.put(`${HEADER_PREFIX}${id}`, record);
|
|
563
|
+
await this.state.storage.put(SchedulerDO.indexKey(record.scheduledFor, id), id);
|
|
564
|
+
await this.armAlarmIfEarlier(record.scheduledFor);
|
|
565
|
+
await this.broadcastChange();
|
|
566
|
+
return SchedulerDO.json({ id, scheduledFor: record.scheduledFor });
|
|
567
|
+
}
|
|
568
|
+
async handleCancel(request) {
|
|
569
|
+
const body = await request.json().catch(() => void 0);
|
|
570
|
+
if (!body?.id) {
|
|
571
|
+
return SchedulerDO.error(400, "INVALID_INPUT", "id is required");
|
|
572
|
+
}
|
|
573
|
+
const record = await this.state.storage.get(`${HEADER_PREFIX}${body.id}`);
|
|
574
|
+
if (!record) {
|
|
575
|
+
return SchedulerDO.json({ cancelled: false });
|
|
576
|
+
}
|
|
577
|
+
await this.removeRecord(record);
|
|
578
|
+
await this.rescheduleAlarm();
|
|
579
|
+
await this.broadcastChange();
|
|
580
|
+
return SchedulerDO.json({ cancelled: true });
|
|
581
|
+
}
|
|
582
|
+
async handleList() {
|
|
583
|
+
return SchedulerDO.json({ records: await this.listRecords() });
|
|
584
|
+
}
|
|
585
|
+
/**
|
|
586
|
+
* `GET /dead` — list the dead-letter records: jobs that exhausted their
|
|
587
|
+
* retry budget ({@link recordRetry}) and were parked under `dead:<id>`
|
|
588
|
+
* instead of being silently dropped. These never appear in `/list` (their
|
|
589
|
+
* `id:` header is deleted on park), so this is the ONLY way the studio can
|
|
590
|
+
* surface — and recover — a permanently-failed job.
|
|
591
|
+
*/
|
|
592
|
+
async handleDeadList() {
|
|
593
|
+
const entries = await this.state.storage.list({ prefix: DEAD_PREFIX });
|
|
594
|
+
return SchedulerDO.json({ records: [...entries.values()] });
|
|
595
|
+
}
|
|
596
|
+
/**
|
|
597
|
+
* `POST /dead/retry { id }` — resurrect a dead-letter record: reset its
|
|
598
|
+
* exhausted attempt count to 0 (a fresh retry budget), re-arm it for
|
|
599
|
+
* immediate dispatch via the standard time index, and drop the `dead:` row.
|
|
600
|
+
* The new `id:` header makes it visible to `/list` and the live `/ws`
|
|
601
|
+
* subscription again. A miss is a no-op (`{ retried: false }`).
|
|
602
|
+
*/
|
|
603
|
+
async handleDeadRetry(request) {
|
|
604
|
+
const body = await request.json().catch(() => void 0);
|
|
605
|
+
if (typeof body?.id !== "string" || body.id.length === 0) {
|
|
606
|
+
return SchedulerDO.error(400, "INVALID_INPUT", "id is required");
|
|
607
|
+
}
|
|
608
|
+
const dead = await this.state.storage.get(`${DEAD_PREFIX}${body.id}`);
|
|
609
|
+
if (dead === void 0) {
|
|
610
|
+
return SchedulerDO.json({ retried: false });
|
|
611
|
+
}
|
|
612
|
+
const scheduledFor = Date.now();
|
|
613
|
+
const revived = { ...dead, attempts: 0, scheduledFor };
|
|
614
|
+
await this.state.storage.put(`${HEADER_PREFIX}${dead.id}`, revived);
|
|
615
|
+
await this.state.storage.put(SchedulerDO.indexKey(scheduledFor, dead.id), dead.id);
|
|
616
|
+
await this.state.storage.delete(`${DEAD_PREFIX}${dead.id}`);
|
|
617
|
+
await this.armAlarmIfEarlier(scheduledFor);
|
|
618
|
+
await this.broadcastChange();
|
|
619
|
+
return SchedulerDO.json({ id: dead.id, retried: true, scheduledFor });
|
|
620
|
+
}
|
|
621
|
+
/**
|
|
622
|
+
* `POST /dead/cancel { id }` — permanently drop a dead-letter record the
|
|
623
|
+
* operator has decided not to recover. Returns `{ removed }` (false when
|
|
624
|
+
* nothing matched). Idempotent: a repeated purge is a harmless no-op.
|
|
625
|
+
*/
|
|
626
|
+
async handleDeadCancel(request) {
|
|
627
|
+
const body = await request.json().catch(() => void 0);
|
|
628
|
+
if (typeof body?.id !== "string" || body.id.length === 0) {
|
|
629
|
+
return SchedulerDO.error(400, "INVALID_INPUT", "id is required");
|
|
630
|
+
}
|
|
631
|
+
const removed = await this.state.storage.delete(`${DEAD_PREFIX}${body.id}`);
|
|
632
|
+
return SchedulerDO.json({ removed: Boolean(removed) });
|
|
633
|
+
}
|
|
634
|
+
/**
|
|
635
|
+
* Resolve a single pending job by id via a direct `id:<id>` storage read —
|
|
636
|
+
* O(1), versus scanning the whole `/list` view. Responds `{ record }` on a
|
|
637
|
+
* hit and `{}` on a miss (an absent `record` field — JSON has no `undefined`
|
|
638
|
+
* — which the client reads back as `null`).
|
|
639
|
+
*/
|
|
640
|
+
async handleGet(url) {
|
|
641
|
+
const id = url.searchParams.get("id");
|
|
642
|
+
if (id === null || id.length === 0) {
|
|
643
|
+
return SchedulerDO.error(400, "INVALID_INPUT", "id is required");
|
|
644
|
+
}
|
|
645
|
+
const record = await this.state.storage.get(`${HEADER_PREFIX}${id}`);
|
|
646
|
+
return SchedulerDO.json(record === void 0 ? {} : { record });
|
|
647
|
+
}
|
|
648
|
+
async removeRecord(record) {
|
|
649
|
+
await this.state.storage.delete([`${HEADER_PREFIX}${record.id}`, SchedulerDO.indexKey(record.scheduledFor, record.id), `${RETRY_PREFIX}${record.id}`]);
|
|
650
|
+
}
|
|
651
|
+
/**
|
|
652
|
+
* Arm the alarm for `scheduledFor` only if it is sooner than the currently
|
|
653
|
+
* set alarm (or none is set). Used on the schedule path: inserting a job
|
|
654
|
+
* can only ever pull the earliest-pending time *earlier*, never later, so a
|
|
655
|
+
* full `t:` rescan is unnecessary unless the new job is the new earliest.
|
|
656
|
+
*/
|
|
657
|
+
async armAlarmIfEarlier(scheduledFor) {
|
|
658
|
+
const current = await this.state.storage.getAlarm();
|
|
659
|
+
if (current === null || scheduledFor < current) {
|
|
660
|
+
await this.state.storage.setAlarm(scheduledFor);
|
|
661
|
+
}
|
|
662
|
+
}
|
|
663
|
+
async rescheduleAlarm() {
|
|
664
|
+
const entries = await this.state.storage.list({ limit: 1, prefix: "t:" });
|
|
665
|
+
const first = entries.entries().next();
|
|
666
|
+
if (first.done) {
|
|
667
|
+
await this.state.storage.deleteAlarm();
|
|
668
|
+
return;
|
|
669
|
+
}
|
|
670
|
+
const [indexKey] = first.value;
|
|
671
|
+
const dueAt = Number.parseInt(indexKey.slice(2, indexKey.indexOf(":", 2)), 10);
|
|
672
|
+
if (Number.isFinite(dueAt)) {
|
|
673
|
+
await this.state.storage.setAlarm(dueAt);
|
|
674
|
+
}
|
|
675
|
+
}
|
|
676
|
+
}
|
|
677
|
+
|
|
678
|
+
export { SchedulerDO };
|