@convex-dev/workpool 0.4.6 → 0.4.7-alpha.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/component/_generated/api.d.ts +2 -0
- package/dist/component/_generated/api.d.ts.map +1 -1
- package/dist/component/_generated/api.js.map +1 -1
- package/dist/component/complete.d.ts.map +1 -1
- package/dist/component/complete.js +8 -7
- package/dist/component/complete.js.map +1 -1
- package/dist/component/danger.js +7 -7
- package/dist/component/danger.js.map +1 -1
- package/dist/component/future.d.ts +11 -0
- package/dist/component/future.d.ts.map +1 -0
- package/dist/component/future.js +21 -0
- package/dist/component/future.js.map +1 -0
- package/dist/component/kick.d.ts +3 -3
- package/dist/component/kick.d.ts.map +1 -1
- package/dist/component/kick.js +14 -16
- package/dist/component/kick.js.map +1 -1
- package/dist/component/lib.d.ts.map +1 -1
- package/dist/component/lib.js +13 -13
- package/dist/component/lib.js.map +1 -1
- package/dist/component/loop.d.ts +44 -1
- package/dist/component/loop.d.ts.map +1 -1
- package/dist/component/loop.js +171 -217
- package/dist/component/loop.js.map +1 -1
- package/dist/component/recovery.d.ts.map +1 -1
- package/dist/component/recovery.js +2 -2
- package/dist/component/recovery.js.map +1 -1
- package/dist/component/schema.d.ts.map +1 -1
- package/dist/component/schema.js +2 -1
- package/dist/component/schema.js.map +1 -1
- package/dist/component/worker.js +1 -1
- package/dist/component/worker.js.map +1 -1
- package/package.json +8 -12
- package/src/component/_generated/api.ts +2 -0
- package/src/component/complete.test.ts +13 -13
- package/src/component/complete.ts +13 -7
- package/src/component/danger.ts +7 -7
- package/src/component/future.ts +38 -0
- package/src/component/kick.test.ts +17 -20
- package/src/component/kick.ts +20 -17
- package/src/component/lib.test.ts +7 -7
- package/src/component/lib.ts +12 -15
- package/src/component/loop.test.ts +695 -1127
- package/src/component/loop.ts +212 -283
- package/src/component/recovery.test.ts +3 -3
- package/src/component/recovery.ts +5 -2
- package/src/component/schema.ts +2 -1
- package/src/component/stateMachine.test.ts +1246 -0
- package/src/component/stats.test.ts +4 -4
- package/src/component/worker.ts +1 -1
|
@@ -0,0 +1,1246 @@
|
|
|
1
|
+
import { convexTest } from "convex-test";
|
|
2
|
+
import type { WithoutSystemFields } from "convex/server";
|
|
3
|
+
import {
|
|
4
|
+
afterEach,
|
|
5
|
+
assert,
|
|
6
|
+
beforeEach,
|
|
7
|
+
describe,
|
|
8
|
+
expect,
|
|
9
|
+
it,
|
|
10
|
+
vi,
|
|
11
|
+
} from "vitest";
|
|
12
|
+
import { api, internal } from "./_generated/api.js";
|
|
13
|
+
import type { Doc, Id } from "./_generated/dataModel.js";
|
|
14
|
+
import type { MutationCtx } from "./_generated/server.js";
|
|
15
|
+
import schema from "./schema.js";
|
|
16
|
+
import {
|
|
17
|
+
DEFAULT_MAX_PARALLELISM,
|
|
18
|
+
getCurrentSegment,
|
|
19
|
+
getNextSegment,
|
|
20
|
+
SECOND,
|
|
21
|
+
} from "./shared.js";
|
|
22
|
+
import { RECOVERY_PERIOD_SEGMENTS } from "./loop.js";
|
|
23
|
+
|
|
24
|
+
const modules = import.meta.glob("./**/*.ts");
|
|
25
|
+
|
|
26
|
+
// ---------------------------------------------------------------------------
|
|
27
|
+
// Types
|
|
28
|
+
// ---------------------------------------------------------------------------
|
|
29
|
+
|
|
30
|
+
type WorkConfig = {
|
|
31
|
+
attempts: number;
|
|
32
|
+
canceled?: boolean;
|
|
33
|
+
hasRetryBehavior: boolean;
|
|
34
|
+
fnType: "action" | "mutation";
|
|
35
|
+
};
|
|
36
|
+
|
|
37
|
+
type PendingCompletionConfig =
|
|
38
|
+
| false
|
|
39
|
+
| { retry: boolean; resultKind: "success" | "failed" | "canceled" };
|
|
40
|
+
|
|
41
|
+
/** Declarative description of a job's composite state across all tables. */
|
|
42
|
+
type CompositeState = {
|
|
43
|
+
work: false | WorkConfig;
|
|
44
|
+
pendingStart: boolean;
|
|
45
|
+
running: boolean;
|
|
46
|
+
pendingCompletion: PendingCompletionConfig;
|
|
47
|
+
pendingCancelation: boolean;
|
|
48
|
+
};
|
|
49
|
+
|
|
50
|
+
/** What we actually observe after reading the DB. */
|
|
51
|
+
type ObservedState = {
|
|
52
|
+
work:
|
|
53
|
+
| false
|
|
54
|
+
| {
|
|
55
|
+
attempts: number;
|
|
56
|
+
canceled: boolean;
|
|
57
|
+
hasRetryBehavior: boolean;
|
|
58
|
+
fnType: string;
|
|
59
|
+
};
|
|
60
|
+
pendingStart: boolean;
|
|
61
|
+
running: boolean;
|
|
62
|
+
pendingCompletion: false | { retry: boolean; resultKind: string };
|
|
63
|
+
pendingCancelation: boolean;
|
|
64
|
+
};
|
|
65
|
+
|
|
66
|
+
// ---------------------------------------------------------------------------
|
|
67
|
+
// Named states
|
|
68
|
+
// ---------------------------------------------------------------------------
|
|
69
|
+
|
|
70
|
+
const S1_ENQUEUED: CompositeState = {
|
|
71
|
+
work: { attempts: 0, hasRetryBehavior: false, fnType: "action" },
|
|
72
|
+
pendingStart: true,
|
|
73
|
+
running: false,
|
|
74
|
+
pendingCompletion: false,
|
|
75
|
+
pendingCancelation: false,
|
|
76
|
+
};
|
|
77
|
+
|
|
78
|
+
const S2_RUNNING: CompositeState = {
|
|
79
|
+
work: { attempts: 0, hasRetryBehavior: false, fnType: "action" },
|
|
80
|
+
pendingStart: false,
|
|
81
|
+
running: true,
|
|
82
|
+
pendingCompletion: false,
|
|
83
|
+
pendingCancelation: false,
|
|
84
|
+
};
|
|
85
|
+
|
|
86
|
+
const S2_RUNNING_WITH_RETRIES: CompositeState = {
|
|
87
|
+
...S2_RUNNING,
|
|
88
|
+
work: { attempts: 0, hasRetryBehavior: true, fnType: "action" },
|
|
89
|
+
};
|
|
90
|
+
|
|
91
|
+
const S3_COMPLETING_NO_RETRY: CompositeState = {
|
|
92
|
+
work: false,
|
|
93
|
+
pendingStart: false,
|
|
94
|
+
running: true,
|
|
95
|
+
pendingCompletion: { retry: false, resultKind: "success" },
|
|
96
|
+
pendingCancelation: false,
|
|
97
|
+
};
|
|
98
|
+
|
|
99
|
+
const S3_COMPLETING_FAILED_FINAL: CompositeState = {
|
|
100
|
+
work: false,
|
|
101
|
+
pendingStart: false,
|
|
102
|
+
running: true,
|
|
103
|
+
pendingCompletion: { retry: false, resultKind: "failed" },
|
|
104
|
+
pendingCancelation: false,
|
|
105
|
+
};
|
|
106
|
+
|
|
107
|
+
const S5_COMPLETING_WILL_RETRY: CompositeState = {
|
|
108
|
+
work: { attempts: 1, hasRetryBehavior: true, fnType: "action" },
|
|
109
|
+
pendingStart: false,
|
|
110
|
+
running: true,
|
|
111
|
+
pendingCompletion: { retry: true, resultKind: "failed" },
|
|
112
|
+
pendingCancelation: false,
|
|
113
|
+
};
|
|
114
|
+
|
|
115
|
+
const S7_CANCEL_PENDING: CompositeState = {
|
|
116
|
+
work: { attempts: 0, hasRetryBehavior: false, fnType: "action" },
|
|
117
|
+
pendingStart: true,
|
|
118
|
+
running: false,
|
|
119
|
+
pendingCompletion: false,
|
|
120
|
+
pendingCancelation: true,
|
|
121
|
+
};
|
|
122
|
+
|
|
123
|
+
const S8_CANCEL_RUNNING: CompositeState = {
|
|
124
|
+
work: { attempts: 0, hasRetryBehavior: false, fnType: "action" },
|
|
125
|
+
pendingStart: false,
|
|
126
|
+
running: true,
|
|
127
|
+
pendingCompletion: false,
|
|
128
|
+
pendingCancelation: true,
|
|
129
|
+
};
|
|
130
|
+
|
|
131
|
+
const S9_CANCELED_RETRYING: CompositeState = {
|
|
132
|
+
work: {
|
|
133
|
+
attempts: 1,
|
|
134
|
+
canceled: true,
|
|
135
|
+
hasRetryBehavior: true,
|
|
136
|
+
fnType: "action",
|
|
137
|
+
},
|
|
138
|
+
pendingStart: false,
|
|
139
|
+
running: true,
|
|
140
|
+
pendingCompletion: { retry: true, resultKind: "failed" },
|
|
141
|
+
pendingCancelation: false,
|
|
142
|
+
};
|
|
143
|
+
|
|
144
|
+
const S10_REENQUEUED: CompositeState = {
|
|
145
|
+
work: { attempts: 1, hasRetryBehavior: true, fnType: "action" },
|
|
146
|
+
pendingStart: true,
|
|
147
|
+
running: false,
|
|
148
|
+
pendingCompletion: false,
|
|
149
|
+
pendingCancelation: false,
|
|
150
|
+
};
|
|
151
|
+
|
|
152
|
+
const S12_CANCELED_AWAITING_COMPLETE: CompositeState = {
|
|
153
|
+
work: {
|
|
154
|
+
attempts: 0,
|
|
155
|
+
canceled: true,
|
|
156
|
+
hasRetryBehavior: false,
|
|
157
|
+
fnType: "action",
|
|
158
|
+
},
|
|
159
|
+
pendingStart: false,
|
|
160
|
+
running: true,
|
|
161
|
+
pendingCompletion: false,
|
|
162
|
+
pendingCancelation: false,
|
|
163
|
+
};
|
|
164
|
+
|
|
165
|
+
// ---------------------------------------------------------------------------
|
|
166
|
+
// Test helpers
|
|
167
|
+
// ---------------------------------------------------------------------------
|
|
168
|
+
|
|
169
|
+
const ACTION_RECOVERY_THRESHOLD_MS = 5 * 60 * 1000;
|
|
170
|
+
|
|
171
|
+
describe("state machine", () => {
|
|
172
|
+
function setupTest() {
|
|
173
|
+
return convexTest(schema, modules);
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
let t: ReturnType<typeof setupTest>;
|
|
177
|
+
let generation: bigint;
|
|
178
|
+
|
|
179
|
+
function nextGen() {
|
|
180
|
+
return generation++;
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
async function makeDummyWork(
|
|
184
|
+
ctx: MutationCtx,
|
|
185
|
+
overrides: Partial<WithoutSystemFields<Doc<"work">>> = {},
|
|
186
|
+
) {
|
|
187
|
+
return ctx.db.insert("work", {
|
|
188
|
+
fnType: "action",
|
|
189
|
+
fnHandle: "test_handle",
|
|
190
|
+
fnName: "test_fn",
|
|
191
|
+
fnArgs: {},
|
|
192
|
+
attempts: 0,
|
|
193
|
+
...overrides,
|
|
194
|
+
});
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
async function makeDummyScheduledFunction(
|
|
198
|
+
ctx: MutationCtx,
|
|
199
|
+
workId: Id<"work">,
|
|
200
|
+
) {
|
|
201
|
+
return ctx.scheduler.runAfter(0, internal.worker.runActionWrapper, {
|
|
202
|
+
workId,
|
|
203
|
+
fnHandle: "test_handle",
|
|
204
|
+
fnArgs: {},
|
|
205
|
+
logLevel: "WARN",
|
|
206
|
+
attempt: 0,
|
|
207
|
+
});
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
/**
|
|
211
|
+
* Materialize a CompositeState in the DB.
|
|
212
|
+
* Returns the workId (a real one if work exists, a deleted one if absent).
|
|
213
|
+
*/
|
|
214
|
+
async function setupState(
|
|
215
|
+
state: CompositeState,
|
|
216
|
+
opts?: {
|
|
217
|
+
/** Make running jobs old enough for recovery. */
|
|
218
|
+
oldForRecovery?: boolean;
|
|
219
|
+
/** Override the segment for pending entries. */
|
|
220
|
+
segment?: bigint;
|
|
221
|
+
},
|
|
222
|
+
): Promise<{ workId: Id<"work">; segment: bigint }> {
|
|
223
|
+
// Default to the current segment so pendingStart entries are eligible
|
|
224
|
+
// to start in the same iteration; main reads pendingStart with
|
|
225
|
+
// segment <= getCurrentSegment().
|
|
226
|
+
const seg = opts?.segment ?? getCurrentSegment();
|
|
227
|
+
const workId = await t.run<Id<"work">>(async (ctx) => {
|
|
228
|
+
let wId: Id<"work">;
|
|
229
|
+
if (state.work) {
|
|
230
|
+
wId = await makeDummyWork(ctx, {
|
|
231
|
+
fnType: state.work.fnType,
|
|
232
|
+
attempts: state.work.attempts,
|
|
233
|
+
canceled: state.work.canceled,
|
|
234
|
+
retryBehavior: state.work.hasRetryBehavior
|
|
235
|
+
? { maxAttempts: 5, initialBackoffMs: 100, base: 2 }
|
|
236
|
+
: undefined,
|
|
237
|
+
});
|
|
238
|
+
} else {
|
|
239
|
+
// Create and delete to get a valid Id
|
|
240
|
+
wId = await makeDummyWork(ctx);
|
|
241
|
+
await ctx.db.delete("work", wId);
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
// Set up running array
|
|
245
|
+
const runningEntry: Doc<"internalState">["running"] = [];
|
|
246
|
+
if (state.running) {
|
|
247
|
+
const scheduledId = await makeDummyScheduledFunction(ctx, wId);
|
|
248
|
+
const started = opts?.oldForRecovery
|
|
249
|
+
? Date.now() - ACTION_RECOVERY_THRESHOLD_MS - 10000
|
|
250
|
+
: Date.now();
|
|
251
|
+
runningEntry.push({ workId: wId, scheduledId, started });
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
// Set up internalState
|
|
255
|
+
const lastRecovery = opts?.oldForRecovery
|
|
256
|
+
? getCurrentSegment() - RECOVERY_PERIOD_SEGMENTS - 1n
|
|
257
|
+
: getCurrentSegment();
|
|
258
|
+
await ctx.db.insert("internalState", {
|
|
259
|
+
generation,
|
|
260
|
+
segmentCursors: { incoming: 0n, completion: 0n, cancelation: 0n },
|
|
261
|
+
lastRecovery,
|
|
262
|
+
report: {
|
|
263
|
+
completed: 0,
|
|
264
|
+
succeeded: 0,
|
|
265
|
+
failed: 0,
|
|
266
|
+
retries: 0,
|
|
267
|
+
canceled: 0,
|
|
268
|
+
conflicted: 0,
|
|
269
|
+
lastReportTs: Date.now(),
|
|
270
|
+
},
|
|
271
|
+
running: runningEntry,
|
|
272
|
+
});
|
|
273
|
+
|
|
274
|
+
// runStatus
|
|
275
|
+
await ctx.db.insert("runStatus", { state: { kind: "running" } });
|
|
276
|
+
|
|
277
|
+
// pendingStart
|
|
278
|
+
if (state.pendingStart) {
|
|
279
|
+
await ctx.db.insert("pendingStart", { workId: wId, segment: seg });
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
// pendingCompletion
|
|
283
|
+
if (state.pendingCompletion) {
|
|
284
|
+
const resultMap = {
|
|
285
|
+
success: { kind: "success" as const, returnValue: null },
|
|
286
|
+
failed: { kind: "failed" as const, error: "test error" },
|
|
287
|
+
canceled: { kind: "canceled" as const },
|
|
288
|
+
};
|
|
289
|
+
await ctx.db.insert("pendingCompletion", {
|
|
290
|
+
workId: wId,
|
|
291
|
+
segment: seg,
|
|
292
|
+
retry: state.pendingCompletion.retry,
|
|
293
|
+
runResult: resultMap[state.pendingCompletion.resultKind],
|
|
294
|
+
});
|
|
295
|
+
}
|
|
296
|
+
|
|
297
|
+
// pendingCancelation
|
|
298
|
+
if (state.pendingCancelation) {
|
|
299
|
+
await ctx.db.insert("pendingCancelation", {
|
|
300
|
+
workId: wId,
|
|
301
|
+
segment: seg,
|
|
302
|
+
});
|
|
303
|
+
}
|
|
304
|
+
|
|
305
|
+
return wId;
|
|
306
|
+
});
|
|
307
|
+
|
|
308
|
+
return { workId, segment: seg };
|
|
309
|
+
}
|
|
310
|
+
|
|
311
|
+
/** Read the DB and return the observed state for a given workId. */
|
|
312
|
+
async function observeState(workId: Id<"work">): Promise<ObservedState> {
|
|
313
|
+
return t.run(async (ctx) => {
|
|
314
|
+
const work = await ctx.db.get("work", workId);
|
|
315
|
+
const ps = await ctx.db
|
|
316
|
+
.query("pendingStart")
|
|
317
|
+
.withIndex("workId", (q) => q.eq("workId", workId))
|
|
318
|
+
.first();
|
|
319
|
+
const state = await ctx.db.query("internalState").unique();
|
|
320
|
+
const inRunning =
|
|
321
|
+
state?.running.some((r) => r.workId === workId) ?? false;
|
|
322
|
+
const pc = await ctx.db
|
|
323
|
+
.query("pendingCompletion")
|
|
324
|
+
.withIndex("workId", (q) => q.eq("workId", workId))
|
|
325
|
+
.first();
|
|
326
|
+
const pcancel = await ctx.db
|
|
327
|
+
.query("pendingCancelation")
|
|
328
|
+
.withIndex("workId", (q) => q.eq("workId", workId))
|
|
329
|
+
.first();
|
|
330
|
+
|
|
331
|
+
return {
|
|
332
|
+
work: work
|
|
333
|
+
? {
|
|
334
|
+
attempts: work.attempts,
|
|
335
|
+
canceled: !!work.canceled,
|
|
336
|
+
hasRetryBehavior: !!work.retryBehavior,
|
|
337
|
+
fnType: work.fnType,
|
|
338
|
+
}
|
|
339
|
+
: false,
|
|
340
|
+
pendingStart: !!ps,
|
|
341
|
+
running: inRunning,
|
|
342
|
+
pendingCompletion: pc
|
|
343
|
+
? { retry: pc.retry, resultKind: pc.runResult.kind }
|
|
344
|
+
: false,
|
|
345
|
+
pendingCancelation: !!pcancel,
|
|
346
|
+
};
|
|
347
|
+
});
|
|
348
|
+
}
|
|
349
|
+
|
|
350
|
+
/** Run main loop at given segment. */
|
|
351
|
+
async function runMain(segment: bigint) {
|
|
352
|
+
await t.mutation(internal.loop.main, {
|
|
353
|
+
generation: nextGen(),
|
|
354
|
+
segment,
|
|
355
|
+
});
|
|
356
|
+
}
|
|
357
|
+
|
|
358
|
+
/** Run complete with given result. */
|
|
359
|
+
async function runComplete(
|
|
360
|
+
workId: Id<"work">,
|
|
361
|
+
result: { kind: "success" } | { kind: "failed" } | { kind: "canceled" },
|
|
362
|
+
attempt: number,
|
|
363
|
+
) {
|
|
364
|
+
const runResult =
|
|
365
|
+
result.kind === "success"
|
|
366
|
+
? { kind: "success" as const, returnValue: null }
|
|
367
|
+
: result.kind === "failed"
|
|
368
|
+
? { kind: "failed" as const, error: "test error" }
|
|
369
|
+
: { kind: "canceled" as const };
|
|
370
|
+
await t.mutation(internal.complete.complete, {
|
|
371
|
+
jobs: [{ workId, runResult, attempt }],
|
|
372
|
+
});
|
|
373
|
+
}
|
|
374
|
+
|
|
375
|
+
beforeEach(async () => {
|
|
376
|
+
vi.useFakeTimers();
|
|
377
|
+
t = setupTest();
|
|
378
|
+
generation = 1n;
|
|
379
|
+
await t.run(async (ctx) => {
|
|
380
|
+
await ctx.db.insert("globals", {
|
|
381
|
+
logLevel: "ERROR",
|
|
382
|
+
maxParallelism: DEFAULT_MAX_PARALLELISM,
|
|
383
|
+
});
|
|
384
|
+
});
|
|
385
|
+
});
|
|
386
|
+
|
|
387
|
+
afterEach(() => {
|
|
388
|
+
vi.useRealTimers();
|
|
389
|
+
});
|
|
390
|
+
|
|
391
|
+
// =========================================================================
|
|
392
|
+
// main loop transitions
|
|
393
|
+
// =========================================================================
|
|
394
|
+
|
|
395
|
+
describe("main loop transitions", () => {
|
|
396
|
+
it("S1 enqueued -> S2 running: starts work", async () => {
|
|
397
|
+
const { workId, segment } = await setupState(S1_ENQUEUED);
|
|
398
|
+
await runMain(segment);
|
|
399
|
+
const s = await observeState(workId);
|
|
400
|
+
expect(s.work).toBeTruthy();
|
|
401
|
+
expect(s.pendingStart).toBe(false);
|
|
402
|
+
expect(s.running).toBe(true);
|
|
403
|
+
expect(s.pendingCompletion).toBe(false);
|
|
404
|
+
expect(s.pendingCancelation).toBe(false);
|
|
405
|
+
});
|
|
406
|
+
|
|
407
|
+
it("S10 re-enqueued -> S2 running: starts retry work", async () => {
|
|
408
|
+
const { workId, segment } = await setupState(S10_REENQUEUED);
|
|
409
|
+
await runMain(segment);
|
|
410
|
+
const s = await observeState(workId);
|
|
411
|
+
expect(s.work).toBeTruthy();
|
|
412
|
+
expect(s.pendingStart).toBe(false);
|
|
413
|
+
expect(s.running).toBe(true);
|
|
414
|
+
});
|
|
415
|
+
|
|
416
|
+
it("S3 completing (success, no retry) -> finished: removes from running", async () => {
|
|
417
|
+
const { workId, segment } = await setupState(S3_COMPLETING_NO_RETRY);
|
|
418
|
+
await runMain(segment);
|
|
419
|
+
const s = await observeState(workId);
|
|
420
|
+
expect(s.work).toBe(false);
|
|
421
|
+
expect(s.pendingStart).toBe(false);
|
|
422
|
+
expect(s.running).toBe(false);
|
|
423
|
+
expect(s.pendingCompletion).toBe(false);
|
|
424
|
+
expect(s.pendingCancelation).toBe(false);
|
|
425
|
+
});
|
|
426
|
+
|
|
427
|
+
it("S3 completing (failed, no retry) -> finished: removes from running", async () => {
|
|
428
|
+
const { workId, segment } = await setupState(S3_COMPLETING_FAILED_FINAL);
|
|
429
|
+
await runMain(segment);
|
|
430
|
+
const s = await observeState(workId);
|
|
431
|
+
expect(s.work).toBe(false);
|
|
432
|
+
expect(s.running).toBe(false);
|
|
433
|
+
expect(s.pendingCompletion).toBe(false);
|
|
434
|
+
});
|
|
435
|
+
|
|
436
|
+
it("S5 completing (will retry) -> retried: reschedules and may restart immediately", async () => {
|
|
437
|
+
const { workId, segment } = await setupState(S5_COMPLETING_WILL_RETRY);
|
|
438
|
+
await runMain(segment);
|
|
439
|
+
const s = await observeState(workId);
|
|
440
|
+
expect(s.work).toBeTruthy();
|
|
441
|
+
expect(s.pendingCompletion).toBe(false);
|
|
442
|
+
// With small backoff (100ms), the retry pendingStart may be picked up
|
|
443
|
+
// immediately by handleStart in the same main pass. Either way, the
|
|
444
|
+
// job should be progressing: either in pendingStart or running.
|
|
445
|
+
expect(s.pendingStart || s.running).toBe(true);
|
|
446
|
+
});
|
|
447
|
+
|
|
448
|
+
it("S7 cancel-pending -> canceled: deletes pendingStart, marks canceled", async () => {
|
|
449
|
+
const { workId, segment } = await setupState(S7_CANCEL_PENDING);
|
|
450
|
+
await runMain(segment);
|
|
451
|
+
const s = await observeState(workId);
|
|
452
|
+
expect(s.pendingStart).toBe(false);
|
|
453
|
+
expect(s.pendingCancelation).toBe(false);
|
|
454
|
+
expect(s.running).toBe(false);
|
|
455
|
+
if (s.work) {
|
|
456
|
+
expect(s.work.canceled).toBe(true);
|
|
457
|
+
}
|
|
458
|
+
});
|
|
459
|
+
|
|
460
|
+
it("S8 cancel-running -> S12 canceled, still running", async () => {
|
|
461
|
+
const { workId, segment } = await setupState(S8_CANCEL_RUNNING);
|
|
462
|
+
await runMain(segment);
|
|
463
|
+
const s = await observeState(workId);
|
|
464
|
+
expect(s.pendingCancelation).toBe(false);
|
|
465
|
+
expect(s.running).toBe(true);
|
|
466
|
+
assert(s.work);
|
|
467
|
+
expect(s.work.canceled).toBe(true);
|
|
468
|
+
});
|
|
469
|
+
|
|
470
|
+
it("S9 canceled+retrying -> does not retry, cancels instead", async () => {
|
|
471
|
+
const { workId, segment } = await setupState(S9_CANCELED_RETRYING);
|
|
472
|
+
await runMain(segment);
|
|
473
|
+
const s = await observeState(workId);
|
|
474
|
+
expect(s.running).toBe(false);
|
|
475
|
+
expect(s.pendingCompletion).toBe(false);
|
|
476
|
+
// Should not have re-enqueued
|
|
477
|
+
expect(s.pendingStart).toBe(false);
|
|
478
|
+
});
|
|
479
|
+
|
|
480
|
+
it("S2 running (no pending events) -> S2 no change", async () => {
|
|
481
|
+
const { workId, segment } = await setupState(S2_RUNNING);
|
|
482
|
+
await runMain(segment);
|
|
483
|
+
const s = await observeState(workId);
|
|
484
|
+
expect(s.running).toBe(true);
|
|
485
|
+
assert(s.work);
|
|
486
|
+
expect(s.work.attempts).toBe(0);
|
|
487
|
+
});
|
|
488
|
+
|
|
489
|
+
it("S12 canceled awaiting complete -> no change (still running)", async () => {
|
|
490
|
+
const { workId, segment } = await setupState(
|
|
491
|
+
S12_CANCELED_AWAITING_COMPLETE,
|
|
492
|
+
);
|
|
493
|
+
await runMain(segment);
|
|
494
|
+
const s = await observeState(workId);
|
|
495
|
+
expect(s.running).toBe(true);
|
|
496
|
+
assert(s.work);
|
|
497
|
+
expect(s.work.canceled).toBe(true);
|
|
498
|
+
});
|
|
499
|
+
});
|
|
500
|
+
|
|
501
|
+
// =========================================================================
|
|
502
|
+
// complete.complete transitions
|
|
503
|
+
// =========================================================================
|
|
504
|
+
|
|
505
|
+
describe("complete.complete transitions", () => {
|
|
506
|
+
it("S2 running + complete(success) -> work deleted, pendingCompletion(retry=false)", async () => {
|
|
507
|
+
const { workId } = await setupState(S2_RUNNING);
|
|
508
|
+
await runComplete(workId, { kind: "success" }, 0);
|
|
509
|
+
const s = await observeState(workId);
|
|
510
|
+
expect(s.work).toBe(false);
|
|
511
|
+
assert(s.pendingCompletion);
|
|
512
|
+
expect(s.pendingCompletion.retry).toBe(false);
|
|
513
|
+
expect(s.pendingCompletion.resultKind).toBe("success");
|
|
514
|
+
});
|
|
515
|
+
|
|
516
|
+
it("S2 running (with retries) + complete(failed) -> work kept, pendingCompletion(retry=true)", async () => {
|
|
517
|
+
const { workId } = await setupState(S2_RUNNING_WITH_RETRIES);
|
|
518
|
+
await runComplete(workId, { kind: "failed" }, 0);
|
|
519
|
+
const s = await observeState(workId);
|
|
520
|
+
assert(s.work);
|
|
521
|
+
expect(s.work.attempts).toBe(1);
|
|
522
|
+
assert(s.pendingCompletion);
|
|
523
|
+
expect(s.pendingCompletion.retry).toBe(true);
|
|
524
|
+
});
|
|
525
|
+
|
|
526
|
+
it("S2 running (no retries) + complete(failed) -> work deleted, pendingCompletion(retry=false)", async () => {
|
|
527
|
+
const { workId } = await setupState(S2_RUNNING);
|
|
528
|
+
await runComplete(workId, { kind: "failed" }, 0);
|
|
529
|
+
const s = await observeState(workId);
|
|
530
|
+
expect(s.work).toBe(false);
|
|
531
|
+
assert(s.pendingCompletion);
|
|
532
|
+
expect(s.pendingCompletion.retry).toBe(false);
|
|
533
|
+
expect(s.pendingCompletion.resultKind).toBe("failed");
|
|
534
|
+
});
|
|
535
|
+
|
|
536
|
+
it("S2 running + complete(canceled) -> work deleted, NO pendingCompletion", async () => {
|
|
537
|
+
const { workId } = await setupState(S2_RUNNING);
|
|
538
|
+
await runComplete(workId, { kind: "canceled" }, 0);
|
|
539
|
+
const s = await observeState(workId);
|
|
540
|
+
expect(s.work).toBe(false);
|
|
541
|
+
expect(s.pendingCompletion).toBe(false);
|
|
542
|
+
});
|
|
543
|
+
|
|
544
|
+
it("S12 canceled running + complete(success) -> work deleted (complete ignores canceled flag)", async () => {
|
|
545
|
+
const { workId } = await setupState(S12_CANCELED_AWAITING_COMPLETE);
|
|
546
|
+
await runComplete(workId, { kind: "success" }, 0);
|
|
547
|
+
const s = await observeState(workId);
|
|
548
|
+
expect(s.work).toBe(false);
|
|
549
|
+
assert(s.pendingCompletion);
|
|
550
|
+
expect(s.pendingCompletion.retry).toBe(false);
|
|
551
|
+
});
|
|
552
|
+
|
|
553
|
+
it("complete with wrong attempt number -> no-op", async () => {
|
|
554
|
+
const { workId } = await setupState(S2_RUNNING);
|
|
555
|
+
await runComplete(workId, { kind: "success" }, 999);
|
|
556
|
+
const s = await observeState(workId);
|
|
557
|
+
// Work should still exist (complete ignored the mismatched attempt)
|
|
558
|
+
assert(s.work);
|
|
559
|
+
expect(s.work.attempts).toBe(0);
|
|
560
|
+
expect(s.pendingCompletion).toBe(false);
|
|
561
|
+
});
|
|
562
|
+
|
|
563
|
+
it("complete on absent work -> no-op", async () => {
|
|
564
|
+
const { workId } = await setupState({
|
|
565
|
+
work: false,
|
|
566
|
+
pendingStart: false,
|
|
567
|
+
running: true,
|
|
568
|
+
pendingCompletion: false,
|
|
569
|
+
pendingCancelation: false,
|
|
570
|
+
});
|
|
571
|
+
// Should not throw
|
|
572
|
+
await runComplete(workId, { kind: "success" }, 0);
|
|
573
|
+
const s = await observeState(workId);
|
|
574
|
+
expect(s.work).toBe(false);
|
|
575
|
+
expect(s.pendingCompletion).toBe(false);
|
|
576
|
+
});
|
|
577
|
+
|
|
578
|
+
it("duplicate complete -> second one is no-op (attempt mismatch)", async () => {
|
|
579
|
+
const { workId } = await setupState(S2_RUNNING_WITH_RETRIES);
|
|
580
|
+
await runComplete(workId, { kind: "failed" }, 0);
|
|
581
|
+
const s1 = await observeState(workId);
|
|
582
|
+
assert(s1.work);
|
|
583
|
+
expect(s1.work.attempts).toBe(1);
|
|
584
|
+
|
|
585
|
+
// Second complete with attempt=0 -> mismatched, should be no-op
|
|
586
|
+
await runComplete(workId, { kind: "failed" }, 0);
|
|
587
|
+
const s2 = await observeState(workId);
|
|
588
|
+
assert(s2.work);
|
|
589
|
+
expect(s2.work.attempts).toBe(1); // unchanged
|
|
590
|
+
});
|
|
591
|
+
|
|
592
|
+
it("duplicate complete with correct attempt -> BUG: attempts incremented before dedup check", async () => {
|
|
593
|
+
const { workId } = await setupState(S2_RUNNING_WITH_RETRIES);
|
|
594
|
+
await runComplete(workId, { kind: "failed" }, 0);
|
|
595
|
+
const s1 = await observeState(workId);
|
|
596
|
+
assert(s1.work);
|
|
597
|
+
expect(s1.work.attempts).toBe(1);
|
|
598
|
+
|
|
599
|
+
// Second complete with attempt=1 -> pendingCompletion already exists.
|
|
600
|
+
// BUG: complete.complete increments work.attempts BEFORE checking for
|
|
601
|
+
// existing pendingCompletion. So attempts goes to 2 even though the
|
|
602
|
+
// second complete was effectively a no-op for pendingCompletion.
|
|
603
|
+
await runComplete(workId, { kind: "failed" }, 1);
|
|
604
|
+
const s2 = await observeState(workId);
|
|
605
|
+
assert(s2.work);
|
|
606
|
+
expect(s2.work.attempts).toBe(2); // Should ideally be 1
|
|
607
|
+
});
|
|
608
|
+
});
|
|
609
|
+
|
|
610
|
+
// =========================================================================
|
|
611
|
+
// lib.cancel transitions
|
|
612
|
+
// =========================================================================
|
|
613
|
+
|
|
614
|
+
describe("lib.cancel transitions", () => {
|
|
615
|
+
it("S1 enqueued + cancel -> adds pendingCancelation", async () => {
|
|
616
|
+
const { workId } = await setupState(S1_ENQUEUED);
|
|
617
|
+
await t.mutation(api.lib.cancel, { id: workId });
|
|
618
|
+
const s = await observeState(workId);
|
|
619
|
+
assert(s.work);
|
|
620
|
+
expect(s.pendingStart).toBe(true);
|
|
621
|
+
expect(s.pendingCancelation).toBe(true);
|
|
622
|
+
});
|
|
623
|
+
|
|
624
|
+
it("S2 running + cancel -> adds pendingCancelation", async () => {
|
|
625
|
+
const { workId } = await setupState(S2_RUNNING);
|
|
626
|
+
await t.mutation(api.lib.cancel, { id: workId });
|
|
627
|
+
const s = await observeState(workId);
|
|
628
|
+
assert(s.work);
|
|
629
|
+
expect(s.running).toBe(true);
|
|
630
|
+
expect(s.pendingCancelation).toBe(true);
|
|
631
|
+
});
|
|
632
|
+
|
|
633
|
+
it("S10 re-enqueued + cancel -> adds pendingCancelation", async () => {
|
|
634
|
+
const { workId } = await setupState(S10_REENQUEUED);
|
|
635
|
+
await t.mutation(api.lib.cancel, { id: workId });
|
|
636
|
+
const s = await observeState(workId);
|
|
637
|
+
expect(s.pendingStart).toBe(true);
|
|
638
|
+
expect(s.pendingCancelation).toBe(true);
|
|
639
|
+
});
|
|
640
|
+
|
|
641
|
+
it("absent work + cancel -> no-op", async () => {
|
|
642
|
+
const { workId } = await setupState({
|
|
643
|
+
work: false,
|
|
644
|
+
pendingStart: false,
|
|
645
|
+
running: false,
|
|
646
|
+
pendingCompletion: false,
|
|
647
|
+
pendingCancelation: false,
|
|
648
|
+
});
|
|
649
|
+
await t.mutation(api.lib.cancel, { id: workId });
|
|
650
|
+
const s = await observeState(workId);
|
|
651
|
+
expect(s.pendingCancelation).toBe(false);
|
|
652
|
+
});
|
|
653
|
+
|
|
654
|
+
it("S7 already cancel-pending + cancel -> no-op (dedup)", async () => {
|
|
655
|
+
const { workId } = await setupState(S7_CANCEL_PENDING);
|
|
656
|
+
await t.mutation(api.lib.cancel, { id: workId });
|
|
657
|
+
const count = await t.run(async (ctx) => {
|
|
658
|
+
const all = await ctx.db
|
|
659
|
+
.query("pendingCancelation")
|
|
660
|
+
.withIndex("workId", (q) => q.eq("workId", workId))
|
|
661
|
+
.collect();
|
|
662
|
+
return all.length;
|
|
663
|
+
});
|
|
664
|
+
expect(count).toBe(1);
|
|
665
|
+
});
|
|
666
|
+
});
|
|
667
|
+
|
|
668
|
+
// =========================================================================
|
|
669
|
+
// Recovery transitions
|
|
670
|
+
// =========================================================================
|
|
671
|
+
|
|
672
|
+
describe("recovery transitions", () => {
|
|
673
|
+
it("work absent + running (old, no pendingCompletion) -> removed from running", async () => {
|
|
674
|
+
const { workId } = await setupState(
|
|
675
|
+
{
|
|
676
|
+
work: false,
|
|
677
|
+
pendingStart: false,
|
|
678
|
+
running: true,
|
|
679
|
+
pendingCompletion: false,
|
|
680
|
+
pendingCancelation: false,
|
|
681
|
+
},
|
|
682
|
+
{ oldForRecovery: true },
|
|
683
|
+
);
|
|
684
|
+
const recoverySeg = getCurrentSegment() + RECOVERY_PERIOD_SEGMENTS + 1n;
|
|
685
|
+
await runMain(recoverySeg);
|
|
686
|
+
const s = await observeState(workId);
|
|
687
|
+
expect(s.running).toBe(false);
|
|
688
|
+
});
|
|
689
|
+
|
|
690
|
+
it("work absent + running (old) + pendingCompletion -> handleCompletions processes first", async () => {
|
|
691
|
+
// handleCompletions runs before handleRecovery. Since work is absent and
|
|
692
|
+
// the completion has retry=false, it will log an error but still remove
|
|
693
|
+
// from running.
|
|
694
|
+
const { workId } = await setupState(
|
|
695
|
+
{
|
|
696
|
+
work: false,
|
|
697
|
+
pendingStart: false,
|
|
698
|
+
running: true,
|
|
699
|
+
pendingCompletion: { retry: false, resultKind: "success" },
|
|
700
|
+
pendingCancelation: false,
|
|
701
|
+
},
|
|
702
|
+
{ oldForRecovery: true },
|
|
703
|
+
);
|
|
704
|
+
const recoverySeg = getCurrentSegment() + RECOVERY_PERIOD_SEGMENTS + 1n;
|
|
705
|
+
await runMain(recoverySeg);
|
|
706
|
+
const s = await observeState(workId);
|
|
707
|
+
expect(s.pendingCompletion).toBe(false);
|
|
708
|
+
expect(s.running).toBe(false);
|
|
709
|
+
});
|
|
710
|
+
|
|
711
|
+
it("S2 running action (old, not yet completed) -> recovery checks scheduled function", async () => {
|
|
712
|
+
// In convex-test the scheduled function runs immediately, so recovery
|
|
713
|
+
// will likely see it as succeeded. The key thing is it doesn't crash.
|
|
714
|
+
const { workId } = await setupState(S2_RUNNING, {
|
|
715
|
+
oldForRecovery: true,
|
|
716
|
+
});
|
|
717
|
+
const recoverySeg = getCurrentSegment() + RECOVERY_PERIOD_SEGMENTS + 1n;
|
|
718
|
+
// Should not throw
|
|
719
|
+
await runMain(recoverySeg);
|
|
720
|
+
const s = await observeState(workId);
|
|
721
|
+
// Job should still be tracked (recovery saw function as completed/running)
|
|
722
|
+
expect(s.running).toBe(true);
|
|
723
|
+
});
|
|
724
|
+
|
|
725
|
+
it("S12 canceled running (old) -> recovery doesn't crash on canceled work", async () => {
|
|
726
|
+
const { workId } = await setupState(S12_CANCELED_AWAITING_COMPLETE, {
|
|
727
|
+
oldForRecovery: true,
|
|
728
|
+
});
|
|
729
|
+
const recoverySeg = getCurrentSegment() + RECOVERY_PERIOD_SEGMENTS + 1n;
|
|
730
|
+
await runMain(recoverySeg);
|
|
731
|
+
const s = await observeState(workId);
|
|
732
|
+
// Should still be in a valid state
|
|
733
|
+
assert(s.work);
|
|
734
|
+
expect(s.work.canceled).toBe(true);
|
|
735
|
+
});
|
|
736
|
+
});
|
|
737
|
+
|
|
738
|
+
// =========================================================================
|
|
739
|
+
// Invalid / undefined states
|
|
740
|
+
// =========================================================================
|
|
741
|
+
|
|
742
|
+
describe("invalid states", () => {
|
|
743
|
+
it("work absent + pendingStart -> main handles gracefully (no throw)", async () => {
|
|
744
|
+
const { workId, segment } = await setupState({
|
|
745
|
+
work: false,
|
|
746
|
+
pendingStart: true,
|
|
747
|
+
running: false,
|
|
748
|
+
pendingCompletion: false,
|
|
749
|
+
pendingCancelation: false,
|
|
750
|
+
});
|
|
751
|
+
// After fix-missing-items, beginWork returns null instead of throwing
|
|
752
|
+
await runMain(segment);
|
|
753
|
+
const s = await observeState(workId);
|
|
754
|
+
// pendingStart should be consumed
|
|
755
|
+
expect(s.pendingStart).toBe(false);
|
|
756
|
+
// work should still be absent
|
|
757
|
+
expect(s.work).toBe(false);
|
|
758
|
+
// should NOT be in running (beginWork returned null)
|
|
759
|
+
expect(s.running).toBe(false);
|
|
760
|
+
});
|
|
761
|
+
|
|
762
|
+
it("pendingStart + running for same workId -> skips start but leaves pendingStart", async () => {
|
|
763
|
+
const { workId, segment } = await setupState({
|
|
764
|
+
work: { attempts: 0, hasRetryBehavior: false, fnType: "action" },
|
|
765
|
+
pendingStart: true,
|
|
766
|
+
running: true,
|
|
767
|
+
pendingCompletion: false,
|
|
768
|
+
pendingCancelation: false,
|
|
769
|
+
});
|
|
770
|
+
await runMain(segment);
|
|
771
|
+
const s = await observeState(workId);
|
|
772
|
+
expect(s.running).toBe(true);
|
|
773
|
+
// BUG: handleStart skips the start but does NOT delete the pendingStart
|
|
774
|
+
// entry (returns null before the delete call). This means the orphaned
|
|
775
|
+
// pendingStart will be picked up again on the next main loop iteration.
|
|
776
|
+
expect(s.pendingStart).toBe(true);
|
|
777
|
+
});
|
|
778
|
+
|
|
779
|
+
it("duplicate pendingCompletion via complete.complete -> BUG: attempts still incremented", async () => {
|
|
780
|
+
const { workId } = await setupState(S2_RUNNING_WITH_RETRIES);
|
|
781
|
+
await runComplete(workId, { kind: "failed" }, 0);
|
|
782
|
+
// Now attempt=1, and there's already a pendingCompletion.
|
|
783
|
+
// BUG: complete.complete increments attempts BEFORE checking for
|
|
784
|
+
// existing pendingCompletion, so attempts goes to 2.
|
|
785
|
+
await runComplete(workId, { kind: "failed" }, 1);
|
|
786
|
+
const s = await observeState(workId);
|
|
787
|
+
assert(s.work);
|
|
788
|
+
expect(s.work.attempts).toBe(2); // Should ideally be 1
|
|
789
|
+
// But only one pendingCompletion exists (dedup worked for that)
|
|
790
|
+
const pcCount = await t.run(async (ctx) => {
|
|
791
|
+
return (
|
|
792
|
+
await ctx.db
|
|
793
|
+
.query("pendingCompletion")
|
|
794
|
+
.withIndex("workId", (q) => q.eq("workId", workId))
|
|
795
|
+
.collect()
|
|
796
|
+
).length;
|
|
797
|
+
});
|
|
798
|
+
expect(pcCount).toBe(1);
|
|
799
|
+
});
|
|
800
|
+
});
|
|
801
|
+
|
|
802
|
+
// =========================================================================
|
|
803
|
+
// Multi-step / interleaved transitions
|
|
804
|
+
// =========================================================================
|
|
805
|
+
|
|
806
|
+
describe("multi-step / interleaved transitions", () => {
|
|
807
|
+
it("completion + new enqueue -> main processes both in one pass", async () => {
|
|
808
|
+
const seg = getCurrentSegment();
|
|
809
|
+
const { workId: w1 } = await setupState(S1_ENQUEUED, { segment: seg });
|
|
810
|
+
await runMain(seg);
|
|
811
|
+
// w1 is now running
|
|
812
|
+
await runComplete(w1, { kind: "success" }, 0);
|
|
813
|
+
|
|
814
|
+
// Enqueue second job directly in DB
|
|
815
|
+
const w2 = await t.run<Id<"work">>(async (ctx) => {
|
|
816
|
+
const id = await ctx.db.insert("work", {
|
|
817
|
+
fnType: "action",
|
|
818
|
+
fnHandle: "test_handle",
|
|
819
|
+
fnName: "test_fn2",
|
|
820
|
+
fnArgs: {},
|
|
821
|
+
attempts: 0,
|
|
822
|
+
});
|
|
823
|
+
await ctx.db.insert("pendingStart", { workId: id, segment: seg });
|
|
824
|
+
return id;
|
|
825
|
+
});
|
|
826
|
+
|
|
827
|
+
// Run main again - should process completion of w1 AND start w2
|
|
828
|
+
await runMain(seg);
|
|
829
|
+
const s1 = await observeState(w1);
|
|
830
|
+
const s2 = await observeState(w2);
|
|
831
|
+
|
|
832
|
+
expect(s1.work).toBe(false);
|
|
833
|
+
expect(s1.running).toBe(false);
|
|
834
|
+
expect(s2.running).toBe(true);
|
|
835
|
+
});
|
|
836
|
+
|
|
837
|
+
it("two jobs complete(retry) before main -> main retries both", async () => {
|
|
838
|
+
const seg = getCurrentSegment();
|
|
839
|
+
const ids = await t.run(async (ctx) => {
|
|
840
|
+
const w1 = await ctx.db.insert("work", {
|
|
841
|
+
fnType: "action",
|
|
842
|
+
fnHandle: "h1",
|
|
843
|
+
fnName: "fn1",
|
|
844
|
+
fnArgs: {},
|
|
845
|
+
attempts: 0,
|
|
846
|
+
retryBehavior: { maxAttempts: 5, initialBackoffMs: 100, base: 2 },
|
|
847
|
+
});
|
|
848
|
+
const w2 = await ctx.db.insert("work", {
|
|
849
|
+
fnType: "action",
|
|
850
|
+
fnHandle: "h2",
|
|
851
|
+
fnName: "fn2",
|
|
852
|
+
fnArgs: {},
|
|
853
|
+
attempts: 0,
|
|
854
|
+
retryBehavior: { maxAttempts: 5, initialBackoffMs: 100, base: 2 },
|
|
855
|
+
});
|
|
856
|
+
const s1 = await ctx.scheduler.runAfter(
|
|
857
|
+
0,
|
|
858
|
+
internal.worker.runActionWrapper,
|
|
859
|
+
{
|
|
860
|
+
workId: w1,
|
|
861
|
+
fnHandle: "h1",
|
|
862
|
+
fnArgs: {},
|
|
863
|
+
logLevel: "WARN",
|
|
864
|
+
attempt: 0,
|
|
865
|
+
},
|
|
866
|
+
);
|
|
867
|
+
const s2 = await ctx.scheduler.runAfter(
|
|
868
|
+
0,
|
|
869
|
+
internal.worker.runActionWrapper,
|
|
870
|
+
{
|
|
871
|
+
workId: w2,
|
|
872
|
+
fnHandle: "h2",
|
|
873
|
+
fnArgs: {},
|
|
874
|
+
logLevel: "WARN",
|
|
875
|
+
attempt: 0,
|
|
876
|
+
},
|
|
877
|
+
);
|
|
878
|
+
await ctx.db.insert("internalState", {
|
|
879
|
+
generation,
|
|
880
|
+
segmentCursors: { incoming: 0n, completion: 0n, cancelation: 0n },
|
|
881
|
+
lastRecovery: getCurrentSegment(),
|
|
882
|
+
report: {
|
|
883
|
+
completed: 0,
|
|
884
|
+
succeeded: 0,
|
|
885
|
+
failed: 0,
|
|
886
|
+
retries: 0,
|
|
887
|
+
canceled: 0,
|
|
888
|
+
conflicted: 0,
|
|
889
|
+
lastReportTs: Date.now(),
|
|
890
|
+
},
|
|
891
|
+
running: [
|
|
892
|
+
{ workId: w1, scheduledId: s1, started: Date.now() },
|
|
893
|
+
{ workId: w2, scheduledId: s2, started: Date.now() },
|
|
894
|
+
],
|
|
895
|
+
});
|
|
896
|
+
await ctx.db.insert("runStatus", { state: { kind: "running" } });
|
|
897
|
+
return { w1, w2 };
|
|
898
|
+
});
|
|
899
|
+
|
|
900
|
+
// Both complete with failure before main runs
|
|
901
|
+
await runComplete(ids.w1, { kind: "failed" }, 0);
|
|
902
|
+
await runComplete(ids.w2, { kind: "failed" }, 0);
|
|
903
|
+
|
|
904
|
+
const s1Before = await observeState(ids.w1);
|
|
905
|
+
const s2Before = await observeState(ids.w2);
|
|
906
|
+
assert(s1Before.pendingCompletion);
|
|
907
|
+
assert(s2Before.pendingCompletion);
|
|
908
|
+
expect(s1Before.pendingCompletion.retry).toBe(true);
|
|
909
|
+
expect(s2Before.pendingCompletion.retry).toBe(true);
|
|
910
|
+
|
|
911
|
+
// First main pass: process pendingCompletions, queue retry
|
|
912
|
+
// pendingStart entries (segment = now + jittered backoff, so they
|
|
913
|
+
// can land in the next segment and be ineligible this iteration).
|
|
914
|
+
await runMain(seg);
|
|
915
|
+
// Advance past the retry backoff window so the retry pendingStart
|
|
916
|
+
// segments are <= getCurrentSegment().
|
|
917
|
+
vi.setSystemTime(Date.now() + SECOND);
|
|
918
|
+
await runMain(getCurrentSegment());
|
|
919
|
+
|
|
920
|
+
const s1After = await observeState(ids.w1);
|
|
921
|
+
const s2After = await observeState(ids.w2);
|
|
922
|
+
|
|
923
|
+
// Both pendingCompletions consumed and retries are running.
|
|
924
|
+
expect(s1After.pendingCompletion).toBe(false);
|
|
925
|
+
expect(s2After.pendingCompletion).toBe(false);
|
|
926
|
+
expect(s1After.running).toBe(true);
|
|
927
|
+
expect(s2After.running).toBe(true);
|
|
928
|
+
expect(s1After.pendingStart).toBe(false);
|
|
929
|
+
expect(s2After.pendingStart).toBe(false);
|
|
930
|
+
assert(s1After.work);
|
|
931
|
+
assert(s2After.work);
|
|
932
|
+
expect(s1After.work.attempts).toBe(1);
|
|
933
|
+
expect(s2After.work.attempts).toBe(1);
|
|
934
|
+
});
|
|
935
|
+
|
|
936
|
+
it("cancel arrives while retry completion is pending -> cancel wins", async () => {
|
|
937
|
+
const { workId, segment } = await setupState(S5_COMPLETING_WILL_RETRY);
|
|
938
|
+
|
|
939
|
+
// Cancel arrives before main processes the completion
|
|
940
|
+
await t.run(async (ctx) => {
|
|
941
|
+
await ctx.db.insert("pendingCancelation", {
|
|
942
|
+
workId,
|
|
943
|
+
segment,
|
|
944
|
+
});
|
|
945
|
+
});
|
|
946
|
+
|
|
947
|
+
await runMain(segment);
|
|
948
|
+
const s = await observeState(workId);
|
|
949
|
+
expect(s.pendingStart).toBe(false);
|
|
950
|
+
expect(s.running).toBe(false);
|
|
951
|
+
expect(s.pendingCompletion).toBe(false);
|
|
952
|
+
});
|
|
953
|
+
|
|
954
|
+
it("multiple cancels for same work -> BUG: crashes with double delete", async () => {
|
|
955
|
+
const seg = getNextSegment();
|
|
956
|
+
await t.run<Id<"work">>(async (ctx) => {
|
|
957
|
+
const wId = await ctx.db.insert("work", {
|
|
958
|
+
fnType: "action",
|
|
959
|
+
fnHandle: "h",
|
|
960
|
+
fnName: "fn",
|
|
961
|
+
fnArgs: {},
|
|
962
|
+
attempts: 0,
|
|
963
|
+
});
|
|
964
|
+
await ctx.db.insert("pendingStart", { workId: wId, segment: seg });
|
|
965
|
+
await ctx.db.insert("pendingCancelation", {
|
|
966
|
+
workId: wId,
|
|
967
|
+
segment: seg,
|
|
968
|
+
});
|
|
969
|
+
await ctx.db.insert("pendingCancelation", {
|
|
970
|
+
workId: wId,
|
|
971
|
+
segment: seg,
|
|
972
|
+
});
|
|
973
|
+
|
|
974
|
+
await ctx.db.insert("internalState", {
|
|
975
|
+
generation,
|
|
976
|
+
segmentCursors: { incoming: 0n, completion: 0n, cancelation: 0n },
|
|
977
|
+
lastRecovery: getCurrentSegment(),
|
|
978
|
+
report: {
|
|
979
|
+
completed: 0,
|
|
980
|
+
succeeded: 0,
|
|
981
|
+
failed: 0,
|
|
982
|
+
retries: 0,
|
|
983
|
+
canceled: 0,
|
|
984
|
+
conflicted: 0,
|
|
985
|
+
lastReportTs: Date.now(),
|
|
986
|
+
},
|
|
987
|
+
running: [],
|
|
988
|
+
});
|
|
989
|
+
await ctx.db.insert("runStatus", { state: { kind: "running" } });
|
|
990
|
+
return wId;
|
|
991
|
+
});
|
|
992
|
+
|
|
993
|
+
// BUG: handleCancelation processes duplicate pendingCancelation entries
|
|
994
|
+
// in parallel. Both find the same pendingStart and try to delete it,
|
|
995
|
+
// causing a "Delete on non-existent doc" crash.
|
|
996
|
+
await expect(runMain(seg)).rejects.toThrow();
|
|
997
|
+
});
|
|
998
|
+
|
|
999
|
+
it("complete(success) + cancel interleaved -> cancel is no-op (work already gone)", async () => {
|
|
1000
|
+
const { workId, segment } = await setupState(S2_RUNNING);
|
|
1001
|
+
await runComplete(workId, { kind: "success" }, 0);
|
|
1002
|
+
|
|
1003
|
+
// Cancel after work was already deleted by complete
|
|
1004
|
+
await t.mutation(api.lib.cancel, { id: workId });
|
|
1005
|
+
|
|
1006
|
+
await runMain(segment);
|
|
1007
|
+
const s = await observeState(workId);
|
|
1008
|
+
expect(s.work).toBe(false);
|
|
1009
|
+
expect(s.running).toBe(false);
|
|
1010
|
+
});
|
|
1011
|
+
|
|
1012
|
+
it("retry with large backoff -> pendingStart not picked up in same main pass", async () => {
|
|
1013
|
+
const seg = getNextSegment();
|
|
1014
|
+
const workId = await t.run<Id<"work">>(async (ctx) => {
|
|
1015
|
+
const wId = await ctx.db.insert("work", {
|
|
1016
|
+
fnType: "action",
|
|
1017
|
+
fnHandle: "h",
|
|
1018
|
+
fnName: "fn",
|
|
1019
|
+
fnArgs: {},
|
|
1020
|
+
attempts: 1,
|
|
1021
|
+
retryBehavior: { maxAttempts: 5, initialBackoffMs: 60000, base: 2 },
|
|
1022
|
+
});
|
|
1023
|
+
const sId = await ctx.scheduler.runAfter(
|
|
1024
|
+
0,
|
|
1025
|
+
internal.worker.runActionWrapper,
|
|
1026
|
+
{
|
|
1027
|
+
workId: wId,
|
|
1028
|
+
fnHandle: "h",
|
|
1029
|
+
fnArgs: {},
|
|
1030
|
+
logLevel: "WARN",
|
|
1031
|
+
attempt: 0,
|
|
1032
|
+
},
|
|
1033
|
+
);
|
|
1034
|
+
await ctx.db.insert("internalState", {
|
|
1035
|
+
generation,
|
|
1036
|
+
segmentCursors: { incoming: 0n, completion: 0n, cancelation: 0n },
|
|
1037
|
+
lastRecovery: getCurrentSegment(),
|
|
1038
|
+
report: {
|
|
1039
|
+
completed: 0,
|
|
1040
|
+
succeeded: 0,
|
|
1041
|
+
failed: 0,
|
|
1042
|
+
retries: 0,
|
|
1043
|
+
canceled: 0,
|
|
1044
|
+
conflicted: 0,
|
|
1045
|
+
lastReportTs: Date.now(),
|
|
1046
|
+
},
|
|
1047
|
+
running: [{ workId: wId, scheduledId: sId, started: Date.now() }],
|
|
1048
|
+
});
|
|
1049
|
+
await ctx.db.insert("runStatus", { state: { kind: "running" } });
|
|
1050
|
+
await ctx.db.insert("pendingCompletion", {
|
|
1051
|
+
workId: wId,
|
|
1052
|
+
segment: seg,
|
|
1053
|
+
retry: true,
|
|
1054
|
+
runResult: { kind: "failed", error: "test" },
|
|
1055
|
+
});
|
|
1056
|
+
return wId;
|
|
1057
|
+
});
|
|
1058
|
+
|
|
1059
|
+
await runMain(seg);
|
|
1060
|
+
const s = await observeState(workId);
|
|
1061
|
+
expect(s.pendingCompletion).toBe(false);
|
|
1062
|
+
expect(s.running).toBe(false);
|
|
1063
|
+
// Re-enqueued with future segment, not picked up
|
|
1064
|
+
expect(s.pendingStart).toBe(true);
|
|
1065
|
+
assert(s.work);
|
|
1066
|
+
});
|
|
1067
|
+
|
|
1068
|
+
it("main crash recovery: state recoverable after restart", async () => {
|
|
1069
|
+
const seg = getCurrentSegment();
|
|
1070
|
+
const { workId } = await setupState(S1_ENQUEUED, { segment: seg });
|
|
1071
|
+
|
|
1072
|
+
await runMain(seg);
|
|
1073
|
+
expect((await observeState(workId)).running).toBe(true);
|
|
1074
|
+
|
|
1075
|
+
await runComplete(workId, { kind: "success" }, 0);
|
|
1076
|
+
|
|
1077
|
+
// New main picks up completion
|
|
1078
|
+
const seg2 = getCurrentSegment();
|
|
1079
|
+
await runMain(seg2);
|
|
1080
|
+
const s = await observeState(workId);
|
|
1081
|
+
expect(s.work).toBe(false);
|
|
1082
|
+
expect(s.running).toBe(false);
|
|
1083
|
+
});
|
|
1084
|
+
|
|
1085
|
+
it("all three pending queues populated -> main processes all in order", async () => {
|
|
1086
|
+
const seg = getCurrentSegment();
|
|
1087
|
+
const ids = await t.run(async (ctx) => {
|
|
1088
|
+
// Job 1: in running, has pendingCompletion
|
|
1089
|
+
const w1 = await ctx.db.insert("work", {
|
|
1090
|
+
fnType: "action",
|
|
1091
|
+
fnHandle: "h1",
|
|
1092
|
+
fnName: "fn1",
|
|
1093
|
+
fnArgs: {},
|
|
1094
|
+
attempts: 0,
|
|
1095
|
+
});
|
|
1096
|
+
const s1 = await ctx.scheduler.runAfter(
|
|
1097
|
+
0,
|
|
1098
|
+
internal.worker.runActionWrapper,
|
|
1099
|
+
{
|
|
1100
|
+
workId: w1,
|
|
1101
|
+
fnHandle: "h1",
|
|
1102
|
+
fnArgs: {},
|
|
1103
|
+
logLevel: "WARN",
|
|
1104
|
+
attempt: 0,
|
|
1105
|
+
},
|
|
1106
|
+
);
|
|
1107
|
+
|
|
1108
|
+
// Job 2: has pendingStart + pendingCancelation
|
|
1109
|
+
const w2 = await ctx.db.insert("work", {
|
|
1110
|
+
fnType: "action",
|
|
1111
|
+
fnHandle: "h2",
|
|
1112
|
+
fnName: "fn2",
|
|
1113
|
+
fnArgs: {},
|
|
1114
|
+
attempts: 0,
|
|
1115
|
+
});
|
|
1116
|
+
|
|
1117
|
+
// Job 3: has pendingStart only
|
|
1118
|
+
const w3 = await ctx.db.insert("work", {
|
|
1119
|
+
fnType: "action",
|
|
1120
|
+
fnHandle: "h3",
|
|
1121
|
+
fnName: "fn3",
|
|
1122
|
+
fnArgs: {},
|
|
1123
|
+
attempts: 0,
|
|
1124
|
+
});
|
|
1125
|
+
|
|
1126
|
+
await ctx.db.insert("internalState", {
|
|
1127
|
+
generation,
|
|
1128
|
+
segmentCursors: { incoming: 0n, completion: 0n, cancelation: 0n },
|
|
1129
|
+
lastRecovery: getCurrentSegment(),
|
|
1130
|
+
report: {
|
|
1131
|
+
completed: 0,
|
|
1132
|
+
succeeded: 0,
|
|
1133
|
+
failed: 0,
|
|
1134
|
+
retries: 0,
|
|
1135
|
+
canceled: 0,
|
|
1136
|
+
conflicted: 0,
|
|
1137
|
+
lastReportTs: Date.now(),
|
|
1138
|
+
},
|
|
1139
|
+
running: [{ workId: w1, scheduledId: s1, started: Date.now() }],
|
|
1140
|
+
});
|
|
1141
|
+
await ctx.db.insert("runStatus", { state: { kind: "running" } });
|
|
1142
|
+
|
|
1143
|
+
await ctx.db.insert("pendingCompletion", {
|
|
1144
|
+
workId: w1,
|
|
1145
|
+
segment: seg,
|
|
1146
|
+
retry: false,
|
|
1147
|
+
runResult: { kind: "success", returnValue: null },
|
|
1148
|
+
});
|
|
1149
|
+
await ctx.db.insert("pendingStart", { workId: w2, segment: seg });
|
|
1150
|
+
await ctx.db.insert("pendingCancelation", {
|
|
1151
|
+
workId: w2,
|
|
1152
|
+
segment: seg,
|
|
1153
|
+
});
|
|
1154
|
+
await ctx.db.insert("pendingStart", { workId: w3, segment: seg });
|
|
1155
|
+
|
|
1156
|
+
return { w1, w2, w3 };
|
|
1157
|
+
});
|
|
1158
|
+
|
|
1159
|
+
await runMain(seg);
|
|
1160
|
+
|
|
1161
|
+
const s1 = await observeState(ids.w1);
|
|
1162
|
+
const s2 = await observeState(ids.w2);
|
|
1163
|
+
const s3 = await observeState(ids.w3);
|
|
1164
|
+
|
|
1165
|
+
// w1: pendingCompletion(retry=false) processed, removed from running.
|
|
1166
|
+
// Note: handleCompletions does NOT delete work - that's done by complete.complete.
|
|
1167
|
+
// Since we set up the state directly (not via complete), work still exists.
|
|
1168
|
+
expect(s1.running).toBe(false);
|
|
1169
|
+
expect(s1.pendingCompletion).toBe(false);
|
|
1170
|
+
|
|
1171
|
+
// w2: enqueued + canceled -> canceled
|
|
1172
|
+
expect(s2.pendingStart).toBe(false);
|
|
1173
|
+
expect(s2.pendingCancelation).toBe(false);
|
|
1174
|
+
assert(s2.work);
|
|
1175
|
+
expect(s2.work.canceled).toBe(true);
|
|
1176
|
+
|
|
1177
|
+
// w3: enqueued -> running
|
|
1178
|
+
expect(s3.running).toBe(true);
|
|
1179
|
+
expect(s3.pendingStart).toBe(false);
|
|
1180
|
+
});
|
|
1181
|
+
|
|
1182
|
+
it("cancel during S5 (completing+retry) with pendingCancelation still present -> no retry", async () => {
|
|
1183
|
+
// Like S9 but the pendingCancelation hasn't been processed yet
|
|
1184
|
+
const seg = getNextSegment();
|
|
1185
|
+
const workId = await t.run<Id<"work">>(async (ctx) => {
|
|
1186
|
+
const wId = await ctx.db.insert("work", {
|
|
1187
|
+
fnType: "action",
|
|
1188
|
+
fnHandle: "h",
|
|
1189
|
+
fnName: "fn",
|
|
1190
|
+
fnArgs: {},
|
|
1191
|
+
attempts: 1,
|
|
1192
|
+
retryBehavior: { maxAttempts: 5, initialBackoffMs: 100, base: 2 },
|
|
1193
|
+
});
|
|
1194
|
+
const sId = await ctx.scheduler.runAfter(
|
|
1195
|
+
0,
|
|
1196
|
+
internal.worker.runActionWrapper,
|
|
1197
|
+
{
|
|
1198
|
+
workId: wId,
|
|
1199
|
+
fnHandle: "h",
|
|
1200
|
+
fnArgs: {},
|
|
1201
|
+
logLevel: "WARN",
|
|
1202
|
+
attempt: 0,
|
|
1203
|
+
},
|
|
1204
|
+
);
|
|
1205
|
+
await ctx.db.insert("internalState", {
|
|
1206
|
+
generation,
|
|
1207
|
+
segmentCursors: { incoming: 0n, completion: 0n, cancelation: 0n },
|
|
1208
|
+
lastRecovery: getCurrentSegment(),
|
|
1209
|
+
report: {
|
|
1210
|
+
completed: 0,
|
|
1211
|
+
succeeded: 0,
|
|
1212
|
+
failed: 0,
|
|
1213
|
+
retries: 0,
|
|
1214
|
+
canceled: 0,
|
|
1215
|
+
conflicted: 0,
|
|
1216
|
+
lastReportTs: Date.now(),
|
|
1217
|
+
},
|
|
1218
|
+
running: [{ workId: wId, scheduledId: sId, started: Date.now() }],
|
|
1219
|
+
});
|
|
1220
|
+
await ctx.db.insert("runStatus", { state: { kind: "running" } });
|
|
1221
|
+
|
|
1222
|
+
// Pending retry + pending cancel at the same time
|
|
1223
|
+
await ctx.db.insert("pendingCompletion", {
|
|
1224
|
+
workId: wId,
|
|
1225
|
+
segment: seg,
|
|
1226
|
+
retry: true,
|
|
1227
|
+
runResult: { kind: "failed", error: "test" },
|
|
1228
|
+
});
|
|
1229
|
+
await ctx.db.insert("pendingCancelation", {
|
|
1230
|
+
workId: wId,
|
|
1231
|
+
segment: seg,
|
|
1232
|
+
});
|
|
1233
|
+
|
|
1234
|
+
return wId;
|
|
1235
|
+
});
|
|
1236
|
+
|
|
1237
|
+
await runMain(seg);
|
|
1238
|
+
const s = await observeState(workId);
|
|
1239
|
+
// Cancel should win: no retry, no pendingStart
|
|
1240
|
+
expect(s.pendingStart).toBe(false);
|
|
1241
|
+
expect(s.running).toBe(false);
|
|
1242
|
+
expect(s.pendingCompletion).toBe(false);
|
|
1243
|
+
expect(s.pendingCancelation).toBe(false);
|
|
1244
|
+
});
|
|
1245
|
+
});
|
|
1246
|
+
});
|