@convex-dev/workpool 0.4.6 → 0.4.7-alpha.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/component/_generated/api.d.ts +2 -0
- package/dist/component/_generated/api.d.ts.map +1 -1
- package/dist/component/_generated/api.js.map +1 -1
- package/dist/component/complete.d.ts.map +1 -1
- package/dist/component/complete.js +8 -7
- package/dist/component/complete.js.map +1 -1
- package/dist/component/danger.js +7 -7
- package/dist/component/danger.js.map +1 -1
- package/dist/component/future.d.ts +11 -0
- package/dist/component/future.d.ts.map +1 -0
- package/dist/component/future.js +21 -0
- package/dist/component/future.js.map +1 -0
- package/dist/component/kick.d.ts +3 -3
- package/dist/component/kick.d.ts.map +1 -1
- package/dist/component/kick.js +14 -16
- package/dist/component/kick.js.map +1 -1
- package/dist/component/lib.d.ts.map +1 -1
- package/dist/component/lib.js +13 -13
- package/dist/component/lib.js.map +1 -1
- package/dist/component/loop.d.ts +44 -1
- package/dist/component/loop.d.ts.map +1 -1
- package/dist/component/loop.js +171 -217
- package/dist/component/loop.js.map +1 -1
- package/dist/component/recovery.d.ts.map +1 -1
- package/dist/component/recovery.js +2 -2
- package/dist/component/recovery.js.map +1 -1
- package/dist/component/schema.d.ts.map +1 -1
- package/dist/component/schema.js +2 -1
- package/dist/component/schema.js.map +1 -1
- package/dist/component/worker.js +1 -1
- package/dist/component/worker.js.map +1 -1
- package/package.json +8 -12
- package/src/component/_generated/api.ts +2 -0
- package/src/component/complete.test.ts +13 -13
- package/src/component/complete.ts +13 -7
- package/src/component/danger.ts +7 -7
- package/src/component/future.ts +38 -0
- package/src/component/kick.test.ts +17 -20
- package/src/component/kick.ts +20 -17
- package/src/component/lib.test.ts +7 -7
- package/src/component/lib.ts +12 -15
- package/src/component/loop.test.ts +695 -1127
- package/src/component/loop.ts +212 -283
- package/src/component/recovery.test.ts +3 -3
- package/src/component/recovery.ts +5 -2
- package/src/component/schema.ts +2 -1
- package/src/component/stateMachine.test.ts +1246 -0
- package/src/component/stats.test.ts +4 -4
- package/src/component/worker.ts +1 -1
|
@@ -11,1276 +11,844 @@ import {
|
|
|
11
11
|
} from "vitest";
|
|
12
12
|
import { api, internal } from "./_generated/api.js";
|
|
13
13
|
import type { Doc, Id } from "./_generated/dataModel.js";
|
|
14
|
-
import type { MutationCtx } from "./_generated/server.js";
|
|
15
|
-
import { DEFAULT_LOG_LEVEL } from "./logging.js";
|
|
16
14
|
import schema from "./schema.js";
|
|
17
|
-
import {
|
|
18
|
-
DEFAULT_MAX_PARALLELISM,
|
|
19
|
-
getCurrentSegment,
|
|
20
|
-
getNextSegment,
|
|
21
|
-
toSegment,
|
|
22
|
-
} from "./shared.js";
|
|
15
|
+
import { DEFAULT_MAX_PARALLELISM, getCurrentSegment } from "./shared.js";
|
|
23
16
|
import { STATUS_COOLDOWN } from "./loop.js";
|
|
24
17
|
|
|
25
18
|
const modules = import.meta.glob("./**/*.ts");
|
|
26
|
-
|
|
19
|
+
const SECOND = 1000;
|
|
20
|
+
const MINUTE = 60 * SECOND;
|
|
21
|
+
|
|
22
|
+
/**
|
|
23
|
+
* Behavior tests for the main loop, designed from first principles around
|
|
24
|
+
* what an external observer can see:
|
|
25
|
+
*
|
|
26
|
+
* - api.lib.status — public-facing state of a single work item
|
|
27
|
+
* - runStatus.state — loop lifecycle (running / scheduled / idle)
|
|
28
|
+
* - pending* tables — work in flight that the loop will process
|
|
29
|
+
* - state.running — slots currently occupied by workers
|
|
30
|
+
*
|
|
31
|
+
* These tests do NOT assert on implementation specifics like cursor
|
|
32
|
+
* positions, segment values, or which scheduler call was made — those
|
|
33
|
+
* change when the loop's internals change, and they're not the contract.
|
|
34
|
+
*
|
|
35
|
+
* Setup conventions:
|
|
36
|
+
* - vi.useFakeTimers() so time advances deterministically
|
|
37
|
+
* - The loop is driven manually via runMain(); convex-test doesn't
|
|
38
|
+
* auto-flush scheduled functions
|
|
39
|
+
* - simulateCompletion() pretends a worker finished its job by
|
|
40
|
+
* calling internal.complete.complete; this is how production gets
|
|
41
|
+
* work into pendingCompletion, so it's the correct seam for testing
|
|
42
|
+
*/
|
|
27
43
|
describe("loop", () => {
|
|
28
44
|
async function setupTest() {
|
|
29
45
|
const t = convexTest(schema, modules);
|
|
46
|
+
await t.run(async (ctx) => {
|
|
47
|
+
await ctx.db.insert("globals", {
|
|
48
|
+
logLevel: "WARN",
|
|
49
|
+
maxParallelism: DEFAULT_MAX_PARALLELISM,
|
|
50
|
+
});
|
|
51
|
+
});
|
|
30
52
|
return t;
|
|
31
53
|
}
|
|
32
|
-
|
|
33
54
|
let t: Awaited<ReturnType<typeof setupTest>>;
|
|
34
55
|
|
|
35
|
-
async
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
56
|
+
beforeEach(async () => {
|
|
57
|
+
vi.useFakeTimers();
|
|
58
|
+
t = await setupTest();
|
|
59
|
+
});
|
|
60
|
+
afterEach(() => {
|
|
61
|
+
vi.useRealTimers();
|
|
62
|
+
});
|
|
63
|
+
|
|
64
|
+
// ── helpers ──────────────────────────────────────────────────────────
|
|
65
|
+
|
|
66
|
+
/** Seed an empty running loop: internalState + runStatus=running. */
|
|
67
|
+
async function initialize(opts: { maxParallelism?: number } = {}) {
|
|
68
|
+
if (opts.maxParallelism !== undefined) {
|
|
69
|
+
await t.run(async (ctx) => {
|
|
70
|
+
const g = await ctx.db.query("globals").unique();
|
|
71
|
+
assert(g);
|
|
72
|
+
await ctx.db.patch("globals", g._id, {
|
|
73
|
+
maxParallelism: opts.maxParallelism!,
|
|
46
74
|
});
|
|
47
|
-
}
|
|
75
|
+
});
|
|
76
|
+
}
|
|
77
|
+
await t.run(async (ctx) => {
|
|
78
|
+
await ctx.db.insert("internalState", {
|
|
79
|
+
generation: 1n,
|
|
80
|
+
segmentCursors: { incoming: 0n, completion: 0n, cancelation: 0n },
|
|
81
|
+
lastRecovery: 0n,
|
|
82
|
+
report: {
|
|
83
|
+
completed: 0,
|
|
84
|
+
succeeded: 0,
|
|
85
|
+
failed: 0,
|
|
86
|
+
retries: 0,
|
|
87
|
+
canceled: 0,
|
|
88
|
+
lastReportTs: Date.now(),
|
|
89
|
+
},
|
|
90
|
+
running: [],
|
|
91
|
+
});
|
|
92
|
+
await ctx.db.insert("runStatus", { state: { kind: "running" } });
|
|
48
93
|
});
|
|
49
94
|
}
|
|
50
95
|
|
|
51
|
-
|
|
52
|
-
|
|
96
|
+
/**
|
|
97
|
+
* Insert a work doc + pendingStart at the given segment (default: now).
|
|
98
|
+
* Bypasses the public enqueue API to keep tests focused on the loop.
|
|
99
|
+
*/
|
|
100
|
+
async function enqueueWork(
|
|
53
101
|
overrides: Partial<WithoutSystemFields<Doc<"work">>> = {},
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
102
|
+
segment = getCurrentSegment(),
|
|
103
|
+
): Promise<Id<"work">> {
|
|
104
|
+
return t.run(async (ctx) => {
|
|
105
|
+
const workId = await ctx.db.insert("work", {
|
|
106
|
+
fnType: "action",
|
|
107
|
+
fnHandle: "test_handle",
|
|
108
|
+
fnName: "test_handle",
|
|
109
|
+
fnArgs: {},
|
|
110
|
+
attempts: 0,
|
|
111
|
+
...overrides,
|
|
112
|
+
});
|
|
113
|
+
await ctx.db.insert("pendingStart", { workId, segment });
|
|
114
|
+
return workId;
|
|
62
115
|
});
|
|
63
116
|
}
|
|
64
117
|
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
workId,
|
|
71
|
-
fnHandle: "test_handle",
|
|
72
|
-
fnArgs: {},
|
|
73
|
-
logLevel: "WARN",
|
|
74
|
-
attempt: 0,
|
|
118
|
+
/** Drive the main loop one iteration with the current generation. */
|
|
119
|
+
async function runMain() {
|
|
120
|
+
const generation = await t.run(async (ctx) => {
|
|
121
|
+
const s = await ctx.db.query("internalState").unique();
|
|
122
|
+
return s?.generation ?? 0n;
|
|
75
123
|
});
|
|
124
|
+
await t.mutation(internal.loop.main, { generation });
|
|
76
125
|
}
|
|
77
126
|
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
127
|
+
/** Pretend a worker finished a job by inserting pendingCompletion. */
|
|
128
|
+
async function simulateCompletion(
|
|
129
|
+
workId: Id<"work">,
|
|
130
|
+
result:
|
|
131
|
+
| { kind: "success"; returnValue: unknown }
|
|
132
|
+
| { kind: "failed"; error: string }
|
|
133
|
+
| { kind: "canceled" },
|
|
134
|
+
attempt = 0,
|
|
81
135
|
) {
|
|
82
|
-
await
|
|
83
|
-
|
|
84
|
-
segmentCursors: { incoming: 0n, completion: 0n, cancelation: 0n },
|
|
85
|
-
lastRecovery: getCurrentSegment(),
|
|
86
|
-
report: {
|
|
87
|
-
completed: 0,
|
|
88
|
-
succeeded: 0,
|
|
89
|
-
failed: 0,
|
|
90
|
-
retries: 0,
|
|
91
|
-
canceled: 0,
|
|
92
|
-
lastReportTs: Date.now(),
|
|
93
|
-
},
|
|
94
|
-
running: [],
|
|
95
|
-
...overrides,
|
|
136
|
+
await t.mutation(internal.complete.complete, {
|
|
137
|
+
jobs: [{ workId, runResult: result, attempt }],
|
|
96
138
|
});
|
|
97
139
|
}
|
|
98
140
|
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
t
|
|
102
|
-
|
|
103
|
-
await ctx.db.
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
141
|
+
/** Snapshot of everything an outside observer might check. */
|
|
142
|
+
async function observe() {
|
|
143
|
+
return t.run(async (ctx) => {
|
|
144
|
+
const state = await ctx.db.query("internalState").unique();
|
|
145
|
+
const runStatus = await ctx.db.query("runStatus").unique();
|
|
146
|
+
const pendingStart = await ctx.db.query("pendingStart").collect();
|
|
147
|
+
const pendingCompletion = await ctx.db
|
|
148
|
+
.query("pendingCompletion")
|
|
149
|
+
.collect();
|
|
150
|
+
const pendingCancelation = await ctx.db
|
|
151
|
+
.query("pendingCancelation")
|
|
152
|
+
.collect();
|
|
153
|
+
return {
|
|
154
|
+
running: state?.running ?? [],
|
|
155
|
+
generation: state?.generation ?? 0n,
|
|
156
|
+
runStatus: runStatus?.state,
|
|
157
|
+
pendingStart,
|
|
158
|
+
pendingCompletion,
|
|
159
|
+
pendingCancelation,
|
|
160
|
+
};
|
|
107
161
|
});
|
|
108
|
-
}
|
|
162
|
+
}
|
|
109
163
|
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
}
|
|
164
|
+
async function statusOf(workId: Id<"work">) {
|
|
165
|
+
return t.query(api.lib.status, { id: workId });
|
|
166
|
+
}
|
|
113
167
|
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
const workId = await t.run<Id<"work">>(async (ctx) => {
|
|
118
|
-
// Create internal state
|
|
119
|
-
await insertInternalState(ctx);
|
|
168
|
+
// ────────────────────────────────────────────────────────────────────
|
|
169
|
+
// Forward progress: work moves through the pipeline
|
|
170
|
+
// ────────────────────────────────────────────────────────────────────
|
|
120
171
|
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
172
|
+
describe("forward progress", () => {
|
|
173
|
+
it("starts a pending work item when main runs", async () => {
|
|
174
|
+
await initialize();
|
|
175
|
+
const workId = await enqueueWork();
|
|
125
176
|
|
|
126
|
-
|
|
127
|
-
const workId = await makeDummyWork(ctx, { attempts: 0 });
|
|
177
|
+
await runMain();
|
|
128
178
|
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
179
|
+
const o = await observe();
|
|
180
|
+
expect(o.pendingStart).toHaveLength(0);
|
|
181
|
+
expect(o.running.map((r) => r.workId)).toEqual([workId]);
|
|
182
|
+
expect(await statusOf(workId)).toMatchObject({ state: "running" });
|
|
183
|
+
});
|
|
134
184
|
|
|
135
|
-
|
|
136
|
-
|
|
185
|
+
it("removes work from running once a successful completion is processed", async () => {
|
|
186
|
+
await initialize();
|
|
187
|
+
const workId = await enqueueWork();
|
|
188
|
+
await runMain();
|
|
189
|
+
|
|
190
|
+
await simulateCompletion(
|
|
191
|
+
workId,
|
|
192
|
+
{ kind: "success", returnValue: null },
|
|
193
|
+
0,
|
|
194
|
+
);
|
|
195
|
+
await runMain();
|
|
196
|
+
|
|
197
|
+
const o = await observe();
|
|
198
|
+
expect(o.running).toHaveLength(0);
|
|
199
|
+
expect(o.pendingCompletion).toHaveLength(0);
|
|
200
|
+
// Work doc deleted → status reports "finished".
|
|
201
|
+
expect(await statusOf(workId)).toMatchObject({ state: "finished" });
|
|
202
|
+
});
|
|
137
203
|
|
|
138
|
-
|
|
139
|
-
await
|
|
204
|
+
it("treats a final failure (no retry policy) as terminal", async () => {
|
|
205
|
+
await initialize();
|
|
206
|
+
const workId = await enqueueWork();
|
|
207
|
+
await runMain();
|
|
140
208
|
|
|
141
|
-
|
|
142
|
-
await
|
|
143
|
-
// Check that pendingStart was deleted
|
|
144
|
-
const pendingStarts = await ctx.db.query("pendingStart").collect();
|
|
145
|
-
expect(pendingStarts).toHaveLength(0);
|
|
146
|
-
|
|
147
|
-
// Check that work is in running list
|
|
148
|
-
const state = await ctx.db.query("internalState").unique();
|
|
149
|
-
expect(state).toBeDefined();
|
|
150
|
-
assert(state);
|
|
151
|
-
expect(state.running).toHaveLength(1);
|
|
152
|
-
expect(state.running[0].workId).toBe(workId);
|
|
153
|
-
});
|
|
209
|
+
await simulateCompletion(workId, { kind: "failed", error: "boom" }, 0);
|
|
210
|
+
await runMain();
|
|
154
211
|
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
{
|
|
159
|
-
workId,
|
|
160
|
-
runResult: { kind: "success", returnValue: null },
|
|
161
|
-
attempt: 0,
|
|
162
|
-
},
|
|
163
|
-
],
|
|
164
|
-
});
|
|
165
|
-
|
|
166
|
-
// Verify pendingCompletion was created
|
|
167
|
-
await t.run(async (ctx) => {
|
|
168
|
-
const pendingCompletions = await ctx.db
|
|
169
|
-
.query("pendingCompletion")
|
|
170
|
-
.collect();
|
|
171
|
-
expect(pendingCompletions).toHaveLength(1);
|
|
172
|
-
expect(pendingCompletions[0].workId).toBe(workId);
|
|
173
|
-
expect(pendingCompletions[0].runResult.kind).toBe("success");
|
|
174
|
-
expect(pendingCompletions[0].retry).toBe(false);
|
|
175
|
-
});
|
|
212
|
+
const o = await observe();
|
|
213
|
+
expect(o.running).toHaveLength(0);
|
|
214
|
+
expect(await statusOf(workId)).toMatchObject({ state: "finished" });
|
|
176
215
|
});
|
|
177
216
|
|
|
178
|
-
it("
|
|
179
|
-
|
|
180
|
-
const
|
|
181
|
-
|
|
182
|
-
await insertInternalState(ctx);
|
|
183
|
-
|
|
184
|
-
// Create running runStatus
|
|
185
|
-
await ctx.db.insert("runStatus", {
|
|
186
|
-
state: { kind: "running" },
|
|
187
|
-
});
|
|
217
|
+
it("processes multiple work items concurrently within capacity", async () => {
|
|
218
|
+
await initialize({ maxParallelism: 5 });
|
|
219
|
+
const ids = [];
|
|
220
|
+
for (let i = 0; i < 3; i++) ids.push(await enqueueWork());
|
|
188
221
|
|
|
189
|
-
|
|
190
|
-
const workId = await makeDummyWork(ctx, { attempts: 0 });
|
|
222
|
+
await runMain();
|
|
191
223
|
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
224
|
+
const o = await observe();
|
|
225
|
+
expect(o.running).toHaveLength(3);
|
|
226
|
+
expect(new Set(o.running.map((r) => r.workId))).toEqual(new Set(ids));
|
|
227
|
+
});
|
|
228
|
+
});
|
|
197
229
|
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
segment: 1n,
|
|
202
|
-
});
|
|
230
|
+
// ────────────────────────────────────────────────────────────────────
|
|
231
|
+
// Capacity: maxParallelism is respected
|
|
232
|
+
// ────────────────────────────────────────────────────────────────────
|
|
203
233
|
|
|
204
|
-
|
|
205
|
-
|
|
234
|
+
describe("capacity", () => {
|
|
235
|
+
it("never starts more than maxParallelism in one iteration", async () => {
|
|
236
|
+
await initialize({ maxParallelism: 3 });
|
|
237
|
+
for (let i = 0; i < 7; i++) await enqueueWork();
|
|
206
238
|
|
|
207
|
-
|
|
208
|
-
await t.mutation(internal.loop.main, { generation: 1n, segment: 1n });
|
|
239
|
+
await runMain();
|
|
209
240
|
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
const pendingStarts = await ctx.db.query("pendingStart").collect();
|
|
214
|
-
expect(pendingStarts).toHaveLength(0);
|
|
215
|
-
|
|
216
|
-
// Check that pendingCancelation was deleted
|
|
217
|
-
const pendingCancelations = await ctx.db
|
|
218
|
-
.query("pendingCancelation")
|
|
219
|
-
.collect();
|
|
220
|
-
expect(pendingCancelations).toHaveLength(0);
|
|
221
|
-
|
|
222
|
-
// Check that work is not in running list
|
|
223
|
-
const state = await ctx.db.query("internalState").unique();
|
|
224
|
-
expect(state).toBeDefined();
|
|
225
|
-
assert(state);
|
|
226
|
-
expect(state.running).toHaveLength(0);
|
|
227
|
-
expect(state.report.canceled).toBe(1);
|
|
228
|
-
|
|
229
|
-
const work = await ctx.db.get(workId);
|
|
230
|
-
expect(work).not.toBeNull();
|
|
231
|
-
expect(work!.canceled).toBe(true);
|
|
232
|
-
});
|
|
241
|
+
const o = await observe();
|
|
242
|
+
expect(o.running).toHaveLength(3);
|
|
243
|
+
expect(o.pendingStart).toHaveLength(7 - 3);
|
|
233
244
|
});
|
|
234
245
|
|
|
235
|
-
it("
|
|
236
|
-
|
|
237
|
-
const
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
246
|
+
it("picks up overflow on subsequent iterations as slots free", async () => {
|
|
247
|
+
await initialize({ maxParallelism: 2 });
|
|
248
|
+
const ids = [];
|
|
249
|
+
for (let i = 0; i < 4; i++) ids.push(await enqueueWork());
|
|
250
|
+
|
|
251
|
+
await runMain();
|
|
252
|
+
let o = await observe();
|
|
253
|
+
expect(o.running).toHaveLength(2);
|
|
254
|
+
expect(o.pendingStart).toHaveLength(2);
|
|
255
|
+
|
|
256
|
+
// Complete one running job; another should take its place.
|
|
257
|
+
const finished = o.running[0].workId;
|
|
258
|
+
await simulateCompletion(
|
|
259
|
+
finished,
|
|
260
|
+
{ kind: "success", returnValue: null },
|
|
261
|
+
0,
|
|
262
|
+
);
|
|
263
|
+
await runMain();
|
|
264
|
+
|
|
265
|
+
o = await observe();
|
|
266
|
+
expect(o.running).toHaveLength(2);
|
|
267
|
+
expect(o.pendingStart).toHaveLength(1);
|
|
268
|
+
// The completed one is gone.
|
|
269
|
+
expect(o.running.map((r) => r.workId)).not.toContain(finished);
|
|
270
|
+
});
|
|
258
271
|
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
272
|
+
it("does not start new work when running.length already exceeds maxParallelism", async () => {
|
|
273
|
+
// Edge case: maxParallelism was lowered while jobs were running.
|
|
274
|
+
await initialize({ maxParallelism: 2 });
|
|
275
|
+
// Pre-populate state.running with 4 entries.
|
|
276
|
+
const runningIds: {
|
|
277
|
+
workId: Id<"work">;
|
|
278
|
+
scheduledId: Id<"_scheduled_functions">;
|
|
279
|
+
}[] = [];
|
|
280
|
+
for (let i = 0; i < 4; i++) {
|
|
281
|
+
const workId = await t.run(async (ctx) => {
|
|
282
|
+
return ctx.db.insert("work", {
|
|
283
|
+
fnType: "action",
|
|
284
|
+
fnHandle: "h",
|
|
285
|
+
fnName: "h",
|
|
286
|
+
fnArgs: {},
|
|
287
|
+
attempts: 0,
|
|
288
|
+
});
|
|
264
289
|
});
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
});
|
|
268
|
-
|
|
269
|
-
// Complete the work with failure (workerRunning -> complete)
|
|
270
|
-
await t.mutation(internal.complete.complete, {
|
|
271
|
-
jobs: [
|
|
272
|
-
{
|
|
290
|
+
const scheduledId = await t.run(async (ctx) => {
|
|
291
|
+
return ctx.scheduler.runAfter(0, internal.worker.runActionWrapper, {
|
|
273
292
|
workId,
|
|
274
|
-
|
|
293
|
+
fnHandle: "h",
|
|
294
|
+
fnArgs: {},
|
|
295
|
+
logLevel: "WARN",
|
|
275
296
|
attempt: 0,
|
|
276
|
-
}
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
// Verify pendingCompletion was created with retry=true
|
|
297
|
+
});
|
|
298
|
+
});
|
|
299
|
+
runningIds.push({ workId, scheduledId });
|
|
300
|
+
}
|
|
281
301
|
await t.run(async (ctx) => {
|
|
282
|
-
const
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
302
|
+
const s = await ctx.db.query("internalState").unique();
|
|
303
|
+
assert(s);
|
|
304
|
+
await ctx.db.patch("internalState", s._id, {
|
|
305
|
+
running: runningIds.map((r) => ({
|
|
306
|
+
...r,
|
|
307
|
+
started: Date.now(),
|
|
308
|
+
})),
|
|
309
|
+
});
|
|
289
310
|
});
|
|
311
|
+
// New pending work arrives while we're already over capacity.
|
|
312
|
+
await enqueueWork();
|
|
290
313
|
|
|
291
|
-
|
|
292
|
-
await t.mutation(internal.loop.main, {
|
|
293
|
-
generation: 1n,
|
|
294
|
-
segment: getNextSegment(),
|
|
295
|
-
});
|
|
314
|
+
await runMain();
|
|
296
315
|
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
.query("pendingCompletion")
|
|
302
|
-
.collect();
|
|
303
|
-
expect(pendingCompletions).toHaveLength(0);
|
|
304
|
-
|
|
305
|
-
// Check that pendingStart was created for retry
|
|
306
|
-
const pendingStarts = await ctx.db.query("pendingStart").collect();
|
|
307
|
-
expect(pendingStarts).toHaveLength(1);
|
|
308
|
-
expect(pendingStarts[0].workId).toBe(workId);
|
|
309
|
-
|
|
310
|
-
// Check that work still exists
|
|
311
|
-
const work = await ctx.db.get(workId);
|
|
312
|
-
expect(work).not.toBeNull();
|
|
313
|
-
expect(work!.attempts).toBe(1);
|
|
314
|
-
});
|
|
316
|
+
const o = await observe();
|
|
317
|
+
// No new starts — already over capacity.
|
|
318
|
+
expect(o.running).toHaveLength(4);
|
|
319
|
+
expect(o.pendingStart).toHaveLength(1);
|
|
315
320
|
});
|
|
316
321
|
});
|
|
317
322
|
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
// Enqueue work
|
|
332
|
-
await t.mutation(api.lib.enqueue, {
|
|
333
|
-
fnHandle: "testHandle",
|
|
334
|
-
fnName: "testFunction",
|
|
335
|
-
fnArgs: { test: true },
|
|
336
|
-
fnType: "mutation",
|
|
337
|
-
runAt: Date.now(),
|
|
338
|
-
config: {
|
|
339
|
-
maxParallelism: 10,
|
|
340
|
-
logLevel: "INFO",
|
|
323
|
+
// ────────────────────────────────────────────────────────────────────
|
|
324
|
+
// Retry: failed work is retried per the retry policy
|
|
325
|
+
// ────────────────────────────────────────────────────────────────────
|
|
326
|
+
|
|
327
|
+
describe("retry", () => {
|
|
328
|
+
it("re-enqueues a failed job that has a retry policy with attempts left", async () => {
|
|
329
|
+
await initialize();
|
|
330
|
+
const workId = await enqueueWork({
|
|
331
|
+
retryBehavior: {
|
|
332
|
+
maxAttempts: 3,
|
|
333
|
+
initialBackoffMs: 100,
|
|
334
|
+
base: 2,
|
|
341
335
|
},
|
|
342
336
|
});
|
|
337
|
+
await runMain();
|
|
343
338
|
|
|
344
|
-
//
|
|
345
|
-
await
|
|
346
|
-
|
|
347
|
-
expect(runStatus).toBeDefined();
|
|
348
|
-
assert(runStatus);
|
|
349
|
-
expect(runStatus.state.kind).toBe("running");
|
|
350
|
-
});
|
|
351
|
-
});
|
|
352
|
-
|
|
353
|
-
it("should transition from running to scheduled when all work is started and there's leftover capacity", async () => {
|
|
354
|
-
// Setup initial running state with work
|
|
355
|
-
await t.run(async (ctx) => {
|
|
356
|
-
// Create internal state
|
|
357
|
-
await insertInternalState(ctx);
|
|
358
|
-
|
|
359
|
-
// Create running runStatus
|
|
360
|
-
await ctx.db.insert("runStatus", {
|
|
361
|
-
state: { kind: "running" },
|
|
362
|
-
});
|
|
339
|
+
// Worker reports failure on first attempt.
|
|
340
|
+
await simulateCompletion(workId, { kind: "failed", error: "boom" }, 0);
|
|
341
|
+
await runMain();
|
|
363
342
|
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
});
|
|
343
|
+
// Work doc still exists; pendingStart was re-inserted with backoff segment.
|
|
344
|
+
const o = await observe();
|
|
345
|
+
expect(o.pendingStart).toHaveLength(1);
|
|
346
|
+
expect(o.pendingStart[0].workId).toBe(workId);
|
|
347
|
+
expect(await statusOf(workId)).toMatchObject({
|
|
348
|
+
state: "pending",
|
|
349
|
+
previousAttempts: 1,
|
|
372
350
|
});
|
|
351
|
+
});
|
|
373
352
|
|
|
374
|
-
|
|
375
|
-
await
|
|
376
|
-
|
|
377
|
-
|
|
353
|
+
it("does NOT re-enqueue a failed job that was canceled before retry processed", async () => {
|
|
354
|
+
await initialize();
|
|
355
|
+
const workId = await enqueueWork({
|
|
356
|
+
retryBehavior: {
|
|
357
|
+
maxAttempts: 3,
|
|
358
|
+
initialBackoffMs: 100,
|
|
359
|
+
base: 2,
|
|
360
|
+
},
|
|
378
361
|
});
|
|
362
|
+
await runMain();
|
|
379
363
|
|
|
380
|
-
//
|
|
381
|
-
|
|
364
|
+
// Worker reports failure (would normally retry).
|
|
365
|
+
await simulateCompletion(workId, { kind: "failed", error: "boom" }, 0);
|
|
366
|
+
// Cancel arrives before main can process the retry.
|
|
367
|
+
await t.mutation(api.lib.cancel, { id: workId });
|
|
382
368
|
|
|
383
|
-
|
|
384
|
-
await t.mutation(internal.loop.updateRunStatus, {
|
|
385
|
-
generation: 2n,
|
|
386
|
-
segment: getNextSegment(),
|
|
387
|
-
});
|
|
369
|
+
await runMain();
|
|
388
370
|
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
expect(runStatus.state.saturated).toBe(false);
|
|
397
|
-
});
|
|
371
|
+
const o = await observe();
|
|
372
|
+
// Loop's direct effect: no retry was queued, work is marked canceled.
|
|
373
|
+
// (A follow-up `complete` mutation is scheduled to finalize the work
|
|
374
|
+
// doc deletion — that's complete.ts's responsibility, not the loop's.)
|
|
375
|
+
expect(o.pendingStart).toHaveLength(0);
|
|
376
|
+
const work = await t.run(async (ctx) => ctx.db.get("work", workId));
|
|
377
|
+
expect(work?.canceled).toBe(true);
|
|
398
378
|
});
|
|
379
|
+
});
|
|
399
380
|
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
381
|
+
// ────────────────────────────────────────────────────────────────────
|
|
382
|
+
// Cancellation
|
|
383
|
+
// ────────────────────────────────────────────────────────────────────
|
|
384
|
+
|
|
385
|
+
describe("cancellation", () => {
|
|
386
|
+
it("removes a pendingStart cancellation before the work runs", async () => {
|
|
387
|
+
await initialize();
|
|
388
|
+
const workId = await enqueueWork();
|
|
389
|
+
await t.mutation(api.lib.cancel, { id: workId });
|
|
390
|
+
|
|
391
|
+
await runMain();
|
|
392
|
+
|
|
393
|
+
const o = await observe();
|
|
394
|
+
expect(o.pendingStart).toHaveLength(0);
|
|
395
|
+
expect(o.running).toHaveLength(0);
|
|
396
|
+
// Work is marked canceled by the loop. Final deletion happens when
|
|
397
|
+
// the scheduled `complete` mutation runs (separate concern).
|
|
398
|
+
const work = await t.run(async (ctx) => ctx.db.get("work", workId));
|
|
399
|
+
expect(work?.canceled).toBe(true);
|
|
400
|
+
});
|
|
420
401
|
|
|
421
|
-
|
|
422
|
-
|
|
402
|
+
it("marks an already-running work as canceled", async () => {
|
|
403
|
+
await initialize();
|
|
404
|
+
const workId = await enqueueWork();
|
|
405
|
+
await runMain(); // start it
|
|
406
|
+
expect((await observe()).running).toHaveLength(1);
|
|
423
407
|
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
segment,
|
|
427
|
-
});
|
|
428
|
-
});
|
|
408
|
+
await t.mutation(api.lib.cancel, { id: workId });
|
|
409
|
+
await runMain(); // process the cancellation
|
|
429
410
|
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
generation: 1n,
|
|
433
|
-
segment,
|
|
434
|
-
});
|
|
435
|
-
|
|
436
|
-
// Verify state transition to scheduled with saturated=true
|
|
437
|
-
await t.run(async (ctx) => {
|
|
438
|
-
const runStatus = await ctx.db.query("runStatus").unique();
|
|
439
|
-
expect(runStatus).toBeDefined();
|
|
440
|
-
assert(runStatus);
|
|
441
|
-
expect(runStatus.state.kind).toBe("scheduled");
|
|
442
|
-
assert(runStatus.state.kind === "scheduled");
|
|
443
|
-
expect(runStatus.state.saturated).toBe(true);
|
|
444
|
-
});
|
|
411
|
+
const work = await t.run(async (ctx) => ctx.db.get("work", workId));
|
|
412
|
+
expect(work?.canceled).toBe(true);
|
|
445
413
|
});
|
|
446
414
|
|
|
447
|
-
it("
|
|
448
|
-
|
|
449
|
-
await
|
|
450
|
-
|
|
451
|
-
|
|
415
|
+
it("is a graceful no-op for already-finished work", async () => {
|
|
416
|
+
await initialize();
|
|
417
|
+
const workId = await enqueueWork();
|
|
418
|
+
await runMain();
|
|
419
|
+
await simulateCompletion(
|
|
420
|
+
workId,
|
|
421
|
+
{ kind: "success", returnValue: null },
|
|
422
|
+
0,
|
|
423
|
+
);
|
|
424
|
+
await runMain();
|
|
425
|
+
|
|
426
|
+
// Work doc already gone — cancel should not throw.
|
|
427
|
+
await t.mutation(api.lib.cancel, { id: workId });
|
|
428
|
+
const o = await observe();
|
|
429
|
+
expect(o.pendingCancelation).toHaveLength(0);
|
|
430
|
+
});
|
|
431
|
+
});
|
|
452
432
|
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
internal.loop.main,
|
|
457
|
-
{ generation: 1n, segment: getNextSegment() + 10n },
|
|
458
|
-
);
|
|
433
|
+
// ────────────────────────────────────────────────────────────────────
|
|
434
|
+
// Lifecycle: runStatus transitions
|
|
435
|
+
// ────────────────────────────────────────────────────────────────────
|
|
459
436
|
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
scheduledId,
|
|
466
|
-
saturated: false,
|
|
467
|
-
generation: 1n,
|
|
468
|
-
},
|
|
469
|
-
});
|
|
437
|
+
describe("lifecycle", () => {
|
|
438
|
+
it("transitions running -> idle when there's nothing to do (past cooldown)", async () => {
|
|
439
|
+
await initialize();
|
|
440
|
+
// No pending work, cursors at 0 → far in the past, past cooldown.
|
|
441
|
+
vi.setSystemTime(Date.now() + STATUS_COOLDOWN + SECOND);
|
|
470
442
|
|
|
471
|
-
|
|
472
|
-
});
|
|
443
|
+
await runMain();
|
|
473
444
|
|
|
474
|
-
|
|
475
|
-
await t.mutation(api.lib.enqueue, {
|
|
476
|
-
fnHandle: "testHandle",
|
|
477
|
-
fnName: "testFunction",
|
|
478
|
-
fnArgs: { test: true },
|
|
479
|
-
fnType: "mutation",
|
|
480
|
-
runAt: Date.now(),
|
|
481
|
-
config: {
|
|
482
|
-
maxParallelism: 10,
|
|
483
|
-
logLevel: "INFO",
|
|
484
|
-
},
|
|
485
|
-
});
|
|
486
|
-
|
|
487
|
-
// Verify state transition to running
|
|
488
|
-
await t.run(async (ctx) => {
|
|
489
|
-
const runStatus = await ctx.db.query("runStatus").unique();
|
|
490
|
-
expect(runStatus).toBeDefined();
|
|
491
|
-
assert(runStatus);
|
|
492
|
-
expect(runStatus.state.kind).toBe("running");
|
|
493
|
-
});
|
|
445
|
+
expect((await observe()).runStatus).toMatchObject({ kind: "idle" });
|
|
494
446
|
});
|
|
495
447
|
|
|
496
|
-
it("
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
|
|
506
|
-
|
|
448
|
+
it("stays running during the cooldown window", async () => {
|
|
449
|
+
await initialize();
|
|
450
|
+
const workId = await enqueueWork();
|
|
451
|
+
await runMain(); // process the work; cursors advance to ~now
|
|
452
|
+
|
|
453
|
+
// Complete it so there's no work in flight.
|
|
454
|
+
await simulateCompletion(
|
|
455
|
+
workId,
|
|
456
|
+
{ kind: "success", returnValue: null },
|
|
457
|
+
0,
|
|
458
|
+
);
|
|
459
|
+
await runMain(); // process completion; cursors at ~now
|
|
460
|
+
|
|
461
|
+
// Within cooldown — should stay running.
|
|
462
|
+
const o = await observe();
|
|
463
|
+
expect(o.runStatus).toMatchObject({ kind: "running" });
|
|
464
|
+
});
|
|
507
465
|
|
|
508
|
-
|
|
509
|
-
|
|
466
|
+
it("transitions to scheduled (saturated=false) when only future-scheduled work remains", async () => {
|
|
467
|
+
await initialize();
|
|
468
|
+
// A retry-style pendingStart in the future.
|
|
469
|
+
const future = getCurrentSegment() + 1000n;
|
|
470
|
+
await enqueueWork({}, future);
|
|
471
|
+
// Cursors at 0 → past cooldown, so we're not held in cooldown.
|
|
472
|
+
|
|
473
|
+
await runMain();
|
|
474
|
+
|
|
475
|
+
const o = await observe();
|
|
476
|
+
expect(o.runStatus?.kind).toBe("scheduled");
|
|
477
|
+
if (o.runStatus?.kind === "scheduled") {
|
|
478
|
+
expect(o.runStatus.segment).toBeLessThanOrEqual(future);
|
|
479
|
+
// No running jobs; capacity isn't full → saturated must be false.
|
|
480
|
+
expect(o.runStatus.saturated).toBe(false);
|
|
481
|
+
}
|
|
482
|
+
});
|
|
510
483
|
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
|
|
484
|
+
it("doesn't lose work when re-checking before going idle", async () => {
|
|
485
|
+
// Snapshot-then-confirm safety net: even if the snapshot shows no
|
|
486
|
+
// work, the runQuery confirmation should pick up data committed
|
|
487
|
+
// before this iteration started.
|
|
488
|
+
await initialize();
|
|
489
|
+
const workId = await enqueueWork();
|
|
516
490
|
|
|
517
|
-
|
|
518
|
-
});
|
|
491
|
+
await runMain();
|
|
519
492
|
|
|
520
|
-
|
|
521
|
-
|
|
493
|
+
const o = await observe();
|
|
494
|
+
// The work was started, NOT lost to a "go idle" decision.
|
|
495
|
+
expect(o.running.map((r) => r.workId)).toEqual([workId]);
|
|
496
|
+
});
|
|
497
|
+
});
|
|
522
498
|
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
|
|
499
|
+
// ────────────────────────────────────────────────────────────────────
|
|
500
|
+
// Saturated state: scheduled with running.length == maxParallelism
|
|
501
|
+
// The flag changes how kickMainLoop behaves (no enqueue-kicks; yes
|
|
502
|
+
// completion-kicks).
|
|
503
|
+
// ────────────────────────────────────────────────────────────────────
|
|
504
|
+
|
|
505
|
+
describe("saturated", () => {
|
|
506
|
+
/**
|
|
507
|
+
* Pre-populate state.running with N entries, each backed by a real work
|
|
508
|
+
* doc + scheduled worker (so recovery checks don't fire). Useful for
|
|
509
|
+
* exercising main when the loop is already at-capacity.
|
|
510
|
+
*/
|
|
511
|
+
async function fillRunningTo(count: number): Promise<Id<"work">[]> {
|
|
512
|
+
const ids: Id<"work">[] = [];
|
|
513
|
+
const entries: {
|
|
514
|
+
workId: Id<"work">;
|
|
515
|
+
scheduledId: Id<"_scheduled_functions">;
|
|
516
|
+
started: number;
|
|
517
|
+
}[] = [];
|
|
518
|
+
for (let i = 0; i < count; i++) {
|
|
519
|
+
const workId = await t.run(async (ctx) =>
|
|
520
|
+
ctx.db.insert("work", {
|
|
521
|
+
fnType: "action",
|
|
522
|
+
fnHandle: "test_handle",
|
|
523
|
+
fnName: "test_handle",
|
|
524
|
+
fnArgs: {},
|
|
525
|
+
attempts: 0,
|
|
526
|
+
}),
|
|
527
|
+
);
|
|
528
|
+
const scheduledId = await t.run(async (ctx) =>
|
|
529
|
+
ctx.scheduler.runAfter(0, internal.worker.runActionWrapper, {
|
|
527
530
|
workId,
|
|
528
|
-
|
|
531
|
+
fnHandle: "test_handle",
|
|
532
|
+
fnArgs: {},
|
|
533
|
+
logLevel: "WARN",
|
|
529
534
|
attempt: 0,
|
|
530
|
-
},
|
|
531
|
-
|
|
535
|
+
}),
|
|
536
|
+
);
|
|
537
|
+
ids.push(workId);
|
|
538
|
+
entries.push({ workId, scheduledId, started: Date.now() });
|
|
539
|
+
}
|
|
540
|
+
await t.run(async (ctx) => {
|
|
541
|
+
const s = await ctx.db.query("internalState").unique();
|
|
542
|
+
assert(s);
|
|
543
|
+
await ctx.db.patch("internalState", s._id, { running: entries });
|
|
532
544
|
});
|
|
545
|
+
return ids;
|
|
546
|
+
}
|
|
533
547
|
|
|
534
|
-
|
|
535
|
-
await
|
|
536
|
-
|
|
537
|
-
//
|
|
538
|
-
|
|
548
|
+
it("records saturated=true when transitioning to scheduled at full capacity", async () => {
|
|
549
|
+
await initialize({ maxParallelism: 3 });
|
|
550
|
+
// Fill to capacity. No completions, no future starts → main has
|
|
551
|
+
// nothing to do this iteration but jobs are running, so it should
|
|
552
|
+
// schedule itself (e.g. for recovery) with saturated=true.
|
|
553
|
+
await fillRunningTo(3);
|
|
539
554
|
|
|
540
|
-
|
|
541
|
-
await t.mutation(internal.loop.updateRunStatus, {
|
|
542
|
-
generation: 3n,
|
|
543
|
-
segment,
|
|
544
|
-
});
|
|
555
|
+
await runMain();
|
|
545
556
|
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
|
|
549
|
-
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
assert(runStatus.state.kind === "idle");
|
|
553
|
-
});
|
|
557
|
+
const o = await observe();
|
|
558
|
+
assert(o.runStatus);
|
|
559
|
+
expect(o.runStatus.kind).toBe("scheduled");
|
|
560
|
+
if (o.runStatus.kind === "scheduled") {
|
|
561
|
+
expect(o.runStatus.saturated).toBe(true);
|
|
562
|
+
}
|
|
554
563
|
});
|
|
555
|
-
it("should transition from scheduled to running when main loop runs", async () => {
|
|
556
|
-
const segment = getNextSegment();
|
|
557
|
-
await t.run(async (ctx) => {
|
|
558
|
-
await insertInternalState(ctx);
|
|
559
564
|
|
|
560
|
-
|
|
561
|
-
|
|
562
|
-
|
|
563
|
-
|
|
564
|
-
);
|
|
565
|
+
it("records saturated=false when scheduling with under-capacity running jobs", async () => {
|
|
566
|
+
await initialize({ maxParallelism: 5 });
|
|
567
|
+
// Fewer running jobs than max → not saturated.
|
|
568
|
+
await fillRunningTo(2);
|
|
565
569
|
|
|
566
|
-
|
|
567
|
-
state: {
|
|
568
|
-
kind: "scheduled",
|
|
569
|
-
scheduledId,
|
|
570
|
-
generation: 1n,
|
|
571
|
-
segment,
|
|
572
|
-
saturated: false,
|
|
573
|
-
},
|
|
574
|
-
});
|
|
575
|
-
});
|
|
576
|
-
// Run main loop
|
|
577
|
-
await t.mutation(internal.loop.main, { generation: 1n, segment });
|
|
570
|
+
await runMain();
|
|
578
571
|
|
|
579
|
-
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
|
|
585
|
-
});
|
|
572
|
+
const o = await observe();
|
|
573
|
+
assert(o.runStatus);
|
|
574
|
+
expect(o.runStatus.kind).toBe("scheduled");
|
|
575
|
+
if (o.runStatus.kind === "scheduled") {
|
|
576
|
+
expect(o.runStatus.saturated).toBe(false);
|
|
577
|
+
}
|
|
586
578
|
});
|
|
587
|
-
});
|
|
588
|
-
|
|
589
|
-
describe("main function", () => {
|
|
590
|
-
it("should handle generation mismatch", async () => {
|
|
591
|
-
// Setup state with different generation
|
|
592
|
-
await t.run(async (ctx) => {
|
|
593
|
-
await insertInternalState(ctx, { generation: 2n });
|
|
594
|
-
});
|
|
595
579
|
|
|
596
|
-
|
|
597
|
-
|
|
598
|
-
|
|
599
|
-
|
|
580
|
+
it("clears saturated when a completion frees a slot", async () => {
|
|
581
|
+
// Saturated → completion arrives → kick wakes main → main runs and
|
|
582
|
+
// sees a freed slot → next scheduled state has saturated=false.
|
|
583
|
+
await initialize({ maxParallelism: 2 });
|
|
584
|
+
const ids = await fillRunningTo(2);
|
|
585
|
+
await runMain(); // first transition: scheduled, saturated=true
|
|
586
|
+
expect((await observe()).runStatus).toMatchObject({
|
|
587
|
+
kind: "scheduled",
|
|
588
|
+
saturated: true,
|
|
589
|
+
});
|
|
590
|
+
|
|
591
|
+
// A worker completes — frees a slot.
|
|
592
|
+
await simulateCompletion(
|
|
593
|
+
ids[0],
|
|
594
|
+
{ kind: "success", returnValue: null },
|
|
595
|
+
0,
|
|
596
|
+
);
|
|
597
|
+
await runMain();
|
|
598
|
+
|
|
599
|
+
const o = await observe();
|
|
600
|
+
assert(o.runStatus);
|
|
601
|
+
// After processing the completion, running.length is 1 < 2, so any
|
|
602
|
+
// subsequent scheduled state should NOT be saturated.
|
|
603
|
+
if (o.runStatus.kind === "scheduled") {
|
|
604
|
+
expect(o.runStatus.saturated).toBe(false);
|
|
605
|
+
} else {
|
|
606
|
+
// Or we might be in 'running' (within cooldown) — also fine; just
|
|
607
|
+
// ensure we did not stay saturated=true.
|
|
608
|
+
expect(o.runStatus.kind).not.toBe("idle");
|
|
609
|
+
}
|
|
600
610
|
});
|
|
601
611
|
|
|
602
|
-
it("
|
|
603
|
-
//
|
|
604
|
-
|
|
605
|
-
|
|
606
|
-
|
|
607
|
-
|
|
608
|
-
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
|
|
612
|
-
|
|
613
|
-
|
|
614
|
-
|
|
612
|
+
it("does not start new work while saturated, even when pendingStart accumulates", async () => {
|
|
613
|
+
// Demonstrates that the capacity-aware query honors the running cap:
|
|
614
|
+
// when running == max, getPending returns zero starts, so new
|
|
615
|
+
// enqueues sit in pendingStart until a slot opens.
|
|
616
|
+
await initialize({ maxParallelism: 2 });
|
|
617
|
+
await fillRunningTo(2);
|
|
618
|
+
|
|
619
|
+
// New work arrives while saturated.
|
|
620
|
+
const newWorkId = await enqueueWork();
|
|
621
|
+
|
|
622
|
+
await runMain();
|
|
623
|
+
|
|
624
|
+
const o = await observe();
|
|
625
|
+
// No new starts — we're at max capacity.
|
|
626
|
+
expect(o.running).toHaveLength(2);
|
|
627
|
+
expect(o.pendingStart.map((p) => p.workId)).toContain(newWorkId);
|
|
628
|
+
assert(o.runStatus);
|
|
629
|
+
if (o.runStatus.kind === "scheduled") {
|
|
630
|
+
expect(o.runStatus.saturated).toBe(true);
|
|
631
|
+
}
|
|
632
|
+
});
|
|
615
633
|
|
|
616
|
-
|
|
617
|
-
|
|
618
|
-
|
|
619
|
-
|
|
620
|
-
|
|
621
|
-
|
|
622
|
-
|
|
623
|
-
|
|
624
|
-
|
|
625
|
-
|
|
626
|
-
await
|
|
627
|
-
|
|
628
|
-
//
|
|
629
|
-
await
|
|
630
|
-
|
|
631
|
-
|
|
632
|
-
|
|
633
|
-
|
|
634
|
-
|
|
635
|
-
|
|
636
|
-
|
|
637
|
-
|
|
638
|
-
|
|
639
|
-
|
|
640
|
-
|
|
634
|
+
it("stays saturated when a completion frees a slot but more work is waiting", async () => {
|
|
635
|
+
// Externally observable: a completion arriving while saturated, with
|
|
636
|
+
// more pendingStart queued, should leave runStatus = scheduled +
|
|
637
|
+
// saturated=true. The freed slot gets refilled from pendingStart in
|
|
638
|
+
// the same iteration, so running.length stays at max and the visible
|
|
639
|
+
// saturated state doesn't drop.
|
|
640
|
+
await initialize({ maxParallelism: 2 });
|
|
641
|
+
const ids = await fillRunningTo(2);
|
|
642
|
+
// Two more items waiting behind the at-capacity loop.
|
|
643
|
+
await enqueueWork();
|
|
644
|
+
await enqueueWork();
|
|
645
|
+
|
|
646
|
+
// First main iteration arrives at the saturated end state.
|
|
647
|
+
await runMain();
|
|
648
|
+
expect((await observe()).runStatus).toMatchObject({
|
|
649
|
+
kind: "scheduled",
|
|
650
|
+
saturated: true,
|
|
651
|
+
});
|
|
652
|
+
|
|
653
|
+
// A worker completes — frees a slot, but pendingStart still has work.
|
|
654
|
+
await simulateCompletion(
|
|
655
|
+
ids[0],
|
|
656
|
+
{ kind: "success", returnValue: null },
|
|
657
|
+
0,
|
|
658
|
+
);
|
|
659
|
+
|
|
660
|
+
// First iteration after the completion does work (processes
|
|
661
|
+
// completion + starts a new pending), so didWork=true and main
|
|
662
|
+
// self-reschedules with runStatus = "running".
|
|
663
|
+
await runMain();
|
|
664
|
+
// Advance past the cooldown so the next iteration actually records
|
|
665
|
+
// the end-of-run state instead of holding "running" via cooldown.
|
|
666
|
+
vi.setSystemTime(Date.now() + STATUS_COOLDOWN + SECOND);
|
|
667
|
+
await runMain();
|
|
668
|
+
|
|
669
|
+
const o = await observe();
|
|
670
|
+
assert(o.runStatus);
|
|
671
|
+
// Slot was refilled from pendingStart → running back at max →
|
|
672
|
+
// saturated=true is the externally observed state again.
|
|
673
|
+
expect(o.running).toHaveLength(2);
|
|
674
|
+
expect(o.runStatus).toMatchObject({
|
|
675
|
+
kind: "scheduled",
|
|
676
|
+
saturated: true,
|
|
641
677
|
});
|
|
642
678
|
});
|
|
679
|
+
});
|
|
643
680
|
|
|
644
|
-
|
|
645
|
-
|
|
646
|
-
|
|
647
|
-
// Create a work item for the running list
|
|
648
|
-
const workId = await makeDummyWork(ctx, {
|
|
649
|
-
attempts: 1,
|
|
650
|
-
retryBehavior: {
|
|
651
|
-
maxAttempts: 3,
|
|
652
|
-
initialBackoffMs: 1000,
|
|
653
|
-
base: 2,
|
|
654
|
-
},
|
|
655
|
-
});
|
|
656
|
-
|
|
657
|
-
// Schedule a function and get its ID
|
|
658
|
-
const scheduledId = await makeDummyScheduledFunction(ctx, workId);
|
|
681
|
+
// ────────────────────────────────────────────────────────────────────
|
|
682
|
+
// Recovery: stuck running jobs get cleaned up
|
|
683
|
+
// ────────────────────────────────────────────────────────────────────
|
|
659
684
|
|
|
660
|
-
|
|
661
|
-
|
|
685
|
+
describe("recovery", () => {
|
|
686
|
+
it("flags running entries whose worker has been silent past the threshold", async () => {
|
|
687
|
+
await initialize();
|
|
688
|
+
// Pre-populate state.running with an old entry.
|
|
689
|
+
const workId = await t.run(async (ctx) => {
|
|
690
|
+
const wid = await ctx.db.insert("work", {
|
|
691
|
+
fnType: "action",
|
|
692
|
+
fnHandle: "h",
|
|
693
|
+
fnName: "h",
|
|
694
|
+
fnArgs: {},
|
|
695
|
+
attempts: 0,
|
|
696
|
+
});
|
|
697
|
+
const scheduledId = await ctx.scheduler.runAfter(
|
|
698
|
+
0,
|
|
699
|
+
internal.worker.runActionWrapper,
|
|
700
|
+
{
|
|
701
|
+
workId: wid,
|
|
702
|
+
fnHandle: "h",
|
|
703
|
+
fnArgs: {},
|
|
704
|
+
logLevel: "WARN",
|
|
705
|
+
attempt: 0,
|
|
706
|
+
},
|
|
707
|
+
);
|
|
708
|
+
const s = await ctx.db.query("internalState").unique();
|
|
709
|
+
assert(s);
|
|
710
|
+
await ctx.db.patch("internalState", s._id, {
|
|
662
711
|
running: [
|
|
663
712
|
{
|
|
664
|
-
workId,
|
|
713
|
+
workId: wid,
|
|
665
714
|
scheduledId,
|
|
666
|
-
|
|
715
|
+
// Started 10 minutes ago — past 5-minute recovery threshold.
|
|
716
|
+
started: Date.now() - 10 * MINUTE,
|
|
667
717
|
},
|
|
668
718
|
],
|
|
669
|
-
|
|
670
|
-
|
|
671
|
-
// Create pending completion with failed result
|
|
672
|
-
await ctx.db.insert("pendingCompletion", {
|
|
673
|
-
workId,
|
|
674
|
-
runResult: { kind: "failed", error: "test error" },
|
|
675
|
-
segment: 1n,
|
|
676
|
-
retry: true,
|
|
677
|
-
});
|
|
678
|
-
|
|
679
|
-
return workId;
|
|
680
|
-
});
|
|
681
|
-
|
|
682
|
-
// Call main
|
|
683
|
-
await t.mutation(internal.loop.main, { generation: 1n, segment: 1n });
|
|
684
|
-
|
|
685
|
-
// Verify job was retried
|
|
686
|
-
await t.run(async (ctx) => {
|
|
687
|
-
// Check that pendingCompletion was deleted
|
|
688
|
-
const completions = await ctx.db.query("pendingCompletion").collect();
|
|
689
|
-
expect(completions).toHaveLength(0);
|
|
690
|
-
|
|
691
|
-
// Check that work was updated
|
|
692
|
-
const work = await ctx.db.get(workId);
|
|
693
|
-
expect(work).toBeDefined();
|
|
694
|
-
expect(work!.attempts).toBe(1);
|
|
695
|
-
|
|
696
|
-
// Check that a new pendingStart was created
|
|
697
|
-
const pendingStarts = await ctx.db.query("pendingStart").collect();
|
|
698
|
-
expect(pendingStarts).toHaveLength(1);
|
|
699
|
-
expect(pendingStarts[0].workId).toBe(workId);
|
|
700
|
-
|
|
701
|
-
// Check that report was updated
|
|
702
|
-
const state = await ctx.db.query("internalState").unique();
|
|
703
|
-
expect(state).toBeDefined();
|
|
704
|
-
expect(state!.report.retries).toBe(1);
|
|
705
|
-
});
|
|
706
|
-
});
|
|
707
|
-
|
|
708
|
-
it("should process pending cancelations", async () => {
|
|
709
|
-
// Setup state with a pending cancelation
|
|
710
|
-
const workId = await t.run<Id<"work">>(async (ctx) => {
|
|
711
|
-
// Create a work item for the running list
|
|
712
|
-
const runningWorkId = await makeDummyWork(ctx);
|
|
713
|
-
|
|
714
|
-
// Schedule a function and get its ID
|
|
715
|
-
const scheduledId = await makeDummyScheduledFunction(
|
|
716
|
-
ctx,
|
|
717
|
-
runningWorkId,
|
|
718
|
-
);
|
|
719
|
-
|
|
720
|
-
// Create internal state
|
|
721
|
-
await insertInternalState(ctx, {
|
|
722
|
-
running: [{ workId: runningWorkId, scheduledId, started: 900000 }],
|
|
723
|
-
});
|
|
724
|
-
|
|
725
|
-
// Create work
|
|
726
|
-
const workId = await makeDummyWork(ctx, {
|
|
727
|
-
retryBehavior: {
|
|
728
|
-
maxAttempts: 3,
|
|
729
|
-
initialBackoffMs: 1000,
|
|
730
|
-
base: 2,
|
|
731
|
-
},
|
|
732
|
-
});
|
|
733
|
-
|
|
734
|
-
// Create pending start
|
|
735
|
-
await ctx.db.insert("pendingStart", {
|
|
736
|
-
workId,
|
|
737
|
-
segment: 1n,
|
|
738
|
-
});
|
|
739
|
-
|
|
740
|
-
// Create pending cancelation
|
|
741
|
-
await ctx.db.insert("pendingCancelation", {
|
|
742
|
-
workId,
|
|
743
|
-
segment: 1n,
|
|
744
|
-
});
|
|
745
|
-
|
|
746
|
-
return workId;
|
|
747
|
-
});
|
|
748
|
-
|
|
749
|
-
// Call main
|
|
750
|
-
await t.mutation(internal.loop.main, { generation: 1n, segment: 1n });
|
|
751
|
-
|
|
752
|
-
// Verify cancelation was processed
|
|
753
|
-
await t.run(async (ctx) => {
|
|
754
|
-
// Check that pendingCancelation was deleted
|
|
755
|
-
const cancelations = await ctx.db.query("pendingCancelation").collect();
|
|
756
|
-
expect(cancelations).toHaveLength(0);
|
|
757
|
-
|
|
758
|
-
// Check that pendingStart was deleted
|
|
759
|
-
const pendingStarts = await ctx.db.query("pendingStart").collect();
|
|
760
|
-
expect(pendingStarts).toHaveLength(0);
|
|
761
|
-
|
|
762
|
-
const work = await ctx.db.get(workId);
|
|
763
|
-
expect(work).toBeDefined();
|
|
764
|
-
expect(work!.canceled).toBe(true);
|
|
765
|
-
|
|
766
|
-
// Check that report was updated
|
|
767
|
-
const state = await ctx.db.query("internalState").unique();
|
|
768
|
-
expect(state).toBeDefined();
|
|
769
|
-
expect(state!.report.canceled).toBe(1);
|
|
770
|
-
});
|
|
771
|
-
});
|
|
772
|
-
|
|
773
|
-
it("should schedule new work", async () => {
|
|
774
|
-
// Setup state with pending start items
|
|
775
|
-
const workId = await t.run<Id<"work">>(async (ctx) => {
|
|
776
|
-
// Create internal state
|
|
777
|
-
await insertInternalState(ctx);
|
|
778
|
-
|
|
779
|
-
// Create work
|
|
780
|
-
const workId = await makeDummyWork(ctx);
|
|
781
|
-
|
|
782
|
-
// Create pending start
|
|
783
|
-
await ctx.db.insert("pendingStart", {
|
|
784
|
-
workId,
|
|
785
|
-
segment: 1n,
|
|
786
|
-
});
|
|
787
|
-
|
|
788
|
-
return workId;
|
|
789
|
-
});
|
|
790
|
-
|
|
791
|
-
// Call main
|
|
792
|
-
await t.mutation(internal.loop.main, { generation: 1n, segment: 1n });
|
|
793
|
-
|
|
794
|
-
// Verify work was started
|
|
795
|
-
await t.run(async (ctx) => {
|
|
796
|
-
// Check that pendingStart was deleted
|
|
797
|
-
const pendingStarts = await ctx.db.query("pendingStart").collect();
|
|
798
|
-
expect(pendingStarts).toHaveLength(0);
|
|
799
|
-
|
|
800
|
-
// Check that work was added to running list
|
|
801
|
-
const state = await ctx.db.query("internalState").unique();
|
|
802
|
-
expect(state).toBeDefined();
|
|
803
|
-
expect(state!.running).toHaveLength(1);
|
|
804
|
-
expect(state!.running[0].workId).toBe(workId);
|
|
805
|
-
});
|
|
806
|
-
});
|
|
807
|
-
|
|
808
|
-
it("should schedule recovery for old jobs", async () => {
|
|
809
|
-
// Setup state with old running jobs
|
|
810
|
-
const oldTime = Date.now() - 5 * 60 * 1000 - 1000; // Older than recovery threshold
|
|
811
|
-
|
|
812
|
-
await t.run(async (ctx) => {
|
|
813
|
-
// Create work for the running list
|
|
814
|
-
const workId = await makeDummyWork(ctx);
|
|
815
|
-
|
|
816
|
-
// Schedule a function and get its ID
|
|
817
|
-
const scheduledId = await makeDummyScheduledFunction(ctx, workId);
|
|
818
|
-
|
|
819
|
-
// Create internal state with old job
|
|
820
|
-
await insertInternalState(ctx, {
|
|
719
|
+
// Force recovery to be eligible to run this iteration.
|
|
821
720
|
lastRecovery: 0n,
|
|
822
|
-
running: [{ workId, scheduledId, started: oldTime }],
|
|
823
721
|
});
|
|
722
|
+
return wid;
|
|
824
723
|
});
|
|
825
724
|
|
|
826
|
-
|
|
827
|
-
const segment = toSegment(60 * 60 * 1000);
|
|
828
|
-
await t.mutation(internal.loop.main, {
|
|
829
|
-
generation: 1n,
|
|
830
|
-
segment,
|
|
831
|
-
});
|
|
725
|
+
await runMain();
|
|
832
726
|
|
|
833
|
-
//
|
|
834
|
-
|
|
835
|
-
|
|
836
|
-
|
|
837
|
-
|
|
838
|
-
|
|
839
|
-
|
|
840
|
-
|
|
841
|
-
|
|
842
|
-
|
|
727
|
+
// We can't directly verify "recovery was scheduled" without inspecting
|
|
728
|
+
// the scheduler queue, but we can verify lastRecovery was advanced.
|
|
729
|
+
const after = await observe();
|
|
730
|
+
const state = await t.run(async (ctx) =>
|
|
731
|
+
ctx.db.query("internalState").unique(),
|
|
732
|
+
);
|
|
733
|
+
assert(state);
|
|
734
|
+
expect(state.lastRecovery).toBeGreaterThan(0n);
|
|
735
|
+
// Work is still in running (recovery removes it via complete, which
|
|
736
|
+
// happens in a separately-scheduled mutation).
|
|
737
|
+
expect(after.running.map((r) => r.workId)).toContain(workId);
|
|
843
738
|
});
|
|
844
739
|
});
|
|
845
740
|
|
|
846
|
-
|
|
847
|
-
|
|
848
|
-
|
|
849
|
-
await t.run(async (ctx) => {
|
|
850
|
-
await insertInternalState(ctx, { generation: 2n });
|
|
851
|
-
});
|
|
741
|
+
// ────────────────────────────────────────────────────────────────────
|
|
742
|
+
// Generation safety: stale main calls cannot clobber state
|
|
743
|
+
// ────────────────────────────────────────────────────────────────────
|
|
852
744
|
|
|
853
|
-
|
|
745
|
+
describe("generation safety", () => {
|
|
746
|
+
it("rejects main calls with the wrong generation", async () => {
|
|
747
|
+
await initialize();
|
|
748
|
+
// Current generation is 1n. Calling with 99n should error.
|
|
854
749
|
await expect(
|
|
855
|
-
t.mutation(internal.loop.
|
|
856
|
-
|
|
857
|
-
segment: 1n,
|
|
858
|
-
}),
|
|
859
|
-
).rejects.toThrow("generation mismatch");
|
|
860
|
-
});
|
|
861
|
-
|
|
862
|
-
it("should schedule main immediately if there are outstanding cancelations", async () => {
|
|
863
|
-
// Setup state with outstanding cancelations
|
|
864
|
-
await t.run(async (ctx) => {
|
|
865
|
-
// Create work for cancelation
|
|
866
|
-
const workId = await makeDummyWork(ctx);
|
|
867
|
-
|
|
868
|
-
// Create internal state
|
|
869
|
-
await insertInternalState(ctx, {});
|
|
870
|
-
|
|
871
|
-
// Create run status
|
|
872
|
-
await ctx.db.insert("runStatus", {
|
|
873
|
-
state: { kind: "running" },
|
|
874
|
-
});
|
|
875
|
-
|
|
876
|
-
// Create pending cancelation
|
|
877
|
-
await ctx.db.insert("pendingCancelation", {
|
|
878
|
-
workId,
|
|
879
|
-
segment: 1n,
|
|
880
|
-
});
|
|
881
|
-
});
|
|
882
|
-
|
|
883
|
-
// Call updateRunStatus
|
|
884
|
-
await t.mutation(internal.loop.updateRunStatus, {
|
|
885
|
-
generation: 1n,
|
|
886
|
-
segment: 1n,
|
|
887
|
-
});
|
|
888
|
-
|
|
889
|
-
// Verify main was scheduled (indirectly by checking runStatus)
|
|
890
|
-
await t.run(async (ctx) => {
|
|
891
|
-
// We can't directly check if main was scheduled,
|
|
892
|
-
// but we can verify the state was updated correctly
|
|
893
|
-
const runStatus = await ctx.db.query("runStatus").unique();
|
|
894
|
-
expect(runStatus).toBeDefined();
|
|
895
|
-
// The state should no longer be idle
|
|
896
|
-
expect(runStatus!.state.kind).not.toBe("idle");
|
|
897
|
-
});
|
|
898
|
-
});
|
|
899
|
-
|
|
900
|
-
it("should transition to idle state when there is no work", async () => {
|
|
901
|
-
// Setup state with no work
|
|
902
|
-
await t.run(async (ctx) => {
|
|
903
|
-
// Create internal state with no running jobs
|
|
904
|
-
await insertInternalState(ctx, {});
|
|
905
|
-
|
|
906
|
-
// Create run status in running state
|
|
907
|
-
await ctx.db.insert("runStatus", {
|
|
908
|
-
state: { kind: "running" },
|
|
909
|
-
});
|
|
910
|
-
});
|
|
911
|
-
|
|
912
|
-
// Call updateRunStatus
|
|
913
|
-
await t.mutation(internal.loop.updateRunStatus, {
|
|
914
|
-
generation: 1n,
|
|
915
|
-
segment: 1n,
|
|
916
|
-
});
|
|
917
|
-
|
|
918
|
-
// Verify idle state was set
|
|
919
|
-
await t.run(async (ctx) => {
|
|
920
|
-
const runStatus = await ctx.db.query("runStatus").unique();
|
|
921
|
-
expect(runStatus).toBeDefined();
|
|
922
|
-
expect(runStatus!.state.kind).toBe("idle");
|
|
923
|
-
assert(runStatus!.state.kind === "idle");
|
|
924
|
-
expect(runStatus!.state.generation).toBe(1n);
|
|
925
|
-
});
|
|
750
|
+
t.mutation(internal.loop.main, { generation: 99n }),
|
|
751
|
+
).rejects.toThrow(/generation mismatch/);
|
|
926
752
|
});
|
|
927
753
|
|
|
928
|
-
it("
|
|
929
|
-
|
|
930
|
-
const
|
|
931
|
-
|
|
932
|
-
await
|
|
933
|
-
|
|
934
|
-
// Create 10 work items and scheduled functions
|
|
935
|
-
const runningJobs = await Promise.all(
|
|
936
|
-
Array(10)
|
|
937
|
-
.fill(0)
|
|
938
|
-
.map(async () => {
|
|
939
|
-
const workId = await makeDummyWork(ctx);
|
|
940
|
-
|
|
941
|
-
// Schedule a function and get its ID
|
|
942
|
-
const scheduledId = await makeDummyScheduledFunction(ctx, workId);
|
|
943
|
-
|
|
944
|
-
return { workId, scheduledId, started: Date.now() };
|
|
945
|
-
}),
|
|
946
|
-
);
|
|
947
|
-
|
|
948
|
-
// Create internal state with max running jobs
|
|
949
|
-
await insertInternalState(ctx, {
|
|
950
|
-
running: runningJobs,
|
|
951
|
-
});
|
|
952
|
-
|
|
953
|
-
// Create run status
|
|
954
|
-
await ctx.db.insert("runStatus", {
|
|
955
|
-
state: { kind: "running" },
|
|
956
|
-
});
|
|
957
|
-
|
|
958
|
-
// Create future completion to trigger scheduling
|
|
959
|
-
await ctx.db.insert("pendingCompletion", {
|
|
960
|
-
workId: runningJobs[0].workId,
|
|
961
|
-
runResult: { kind: "success", returnValue: null },
|
|
962
|
-
segment: later,
|
|
963
|
-
retry: false,
|
|
964
|
-
});
|
|
965
|
-
});
|
|
966
|
-
|
|
967
|
-
// Call updateRunStatus
|
|
968
|
-
await t.mutation(internal.loop.updateRunStatus, {
|
|
969
|
-
generation: 1n,
|
|
970
|
-
segment: 1n,
|
|
971
|
-
});
|
|
972
|
-
|
|
973
|
-
// Verify scheduled state was set with saturated flag
|
|
974
|
-
await t.run(async (ctx) => {
|
|
975
|
-
const runStatus = await ctx.db.query("runStatus").unique();
|
|
976
|
-
expect(runStatus).toBeDefined();
|
|
977
|
-
expect(runStatus!.state.kind).toBe("scheduled");
|
|
978
|
-
assert(runStatus!.state.kind === "scheduled");
|
|
979
|
-
expect(runStatus!.state.saturated).toBe(true);
|
|
980
|
-
});
|
|
754
|
+
it("increments the generation each time main runs", async () => {
|
|
755
|
+
await initialize();
|
|
756
|
+
const before = (await observe()).generation;
|
|
757
|
+
await runMain();
|
|
758
|
+
const after = (await observe()).generation;
|
|
759
|
+
expect(after).toBeGreaterThan(before);
|
|
981
760
|
});
|
|
761
|
+
});
|
|
982
762
|
|
|
983
|
-
|
|
984
|
-
|
|
985
|
-
|
|
986
|
-
|
|
987
|
-
|
|
988
|
-
|
|
989
|
-
|
|
990
|
-
|
|
991
|
-
|
|
992
|
-
|
|
993
|
-
|
|
763
|
+
// ────────────────────────────────────────────────────────────────────
|
|
764
|
+
// Snapshot semantics: the snapshot-then-confirm safety net
|
|
765
|
+
// ────────────────────────────────────────────────────────────────────
|
|
766
|
+
|
|
767
|
+
describe("snapshot semantics", () => {
|
|
768
|
+
it("the snapshot read does not see the calling mutation's pending writes", async () => {
|
|
769
|
+
// Verifies the prototype's distinguishing feature:
|
|
770
|
+
// runSnapshotQuery from inside a mutation does NOT see writes the
|
|
771
|
+
// mutation has performed. ctx.runQuery does. This is what makes
|
|
772
|
+
// the snapshot-then-confirm pattern correct.
|
|
773
|
+
const { runSnapshotQuery } = await import("./future.js");
|
|
774
|
+
const result = await t.run(async (ctx) => {
|
|
775
|
+
const workId = await ctx.db.insert("work", {
|
|
776
|
+
fnType: "action",
|
|
777
|
+
fnHandle: "h",
|
|
778
|
+
fnName: "h",
|
|
779
|
+
fnArgs: {},
|
|
780
|
+
attempts: 0,
|
|
994
781
|
});
|
|
995
|
-
});
|
|
996
|
-
|
|
997
|
-
// Insert very old work
|
|
998
|
-
await t.run(async (ctx) => {
|
|
999
|
-
const workId = await makeDummyWork(ctx);
|
|
1000
782
|
await ctx.db.insert("pendingStart", {
|
|
1001
783
|
workId,
|
|
1002
|
-
segment:
|
|
784
|
+
segment: getCurrentSegment(),
|
|
1003
785
|
});
|
|
1004
|
-
|
|
1005
|
-
|
|
1006
|
-
|
|
1007
|
-
|
|
1008
|
-
|
|
1009
|
-
|
|
1010
|
-
});
|
|
1011
|
-
|
|
1012
|
-
// Verify cursors were reset
|
|
1013
|
-
await t.run(async (ctx) => {
|
|
1014
|
-
const state = await ctx.db.query("internalState").unique();
|
|
1015
|
-
expect(state).toBeDefined();
|
|
1016
|
-
expect(state!.segmentCursors.incoming).toBe(0n);
|
|
1017
|
-
});
|
|
1018
|
-
|
|
1019
|
-
// Set maxParallelism to 0 so it doesn't schedule anything / make progress
|
|
1020
|
-
await setMaxParallelism(0);
|
|
1021
|
-
|
|
1022
|
-
// Run main
|
|
1023
|
-
await t.mutation(internal.loop.main, {
|
|
1024
|
-
generation: 1n,
|
|
1025
|
-
segment: now,
|
|
1026
|
-
});
|
|
1027
|
-
|
|
1028
|
-
// Verify start cursor weren't updated
|
|
1029
|
-
await t.run(async (ctx) => {
|
|
1030
|
-
const state = await ctx.db.query("internalState").unique();
|
|
1031
|
-
expect(state).toBeDefined();
|
|
1032
|
-
expect(state!.segmentCursors.incoming).toBe(0n);
|
|
1033
|
-
});
|
|
1034
|
-
});
|
|
1035
|
-
});
|
|
1036
|
-
|
|
1037
|
-
describe("complete function", () => {
|
|
1038
|
-
it("should run onComplete handlers and delete work", async () => {
|
|
1039
|
-
// Setup mock work with onComplete handler
|
|
1040
|
-
const workId = await t.run<Id<"work">>(async (ctx) => {
|
|
1041
|
-
const workId = await makeDummyWork(ctx, {
|
|
1042
|
-
attempts: 0,
|
|
1043
|
-
onComplete: {
|
|
1044
|
-
// TODO: make this a real handle
|
|
1045
|
-
fnHandle: "onComplete_handle",
|
|
1046
|
-
context: { data: "test" },
|
|
1047
|
-
},
|
|
786
|
+
const snap = await runSnapshotQuery(internal.loop.getPending, {
|
|
787
|
+
completionCursor: 0n,
|
|
788
|
+
cancelationCursor: 0n,
|
|
789
|
+
incomingCursor: 0n,
|
|
790
|
+
maxParallelism: 10,
|
|
791
|
+
runningCount: 0,
|
|
1048
792
|
});
|
|
1049
|
-
|
|
1050
|
-
|
|
1051
|
-
|
|
1052
|
-
|
|
1053
|
-
|
|
1054
|
-
|
|
1055
|
-
|
|
1056
|
-
|
|
1057
|
-
runResult: { kind: "success", returnValue: null },
|
|
1058
|
-
attempt: 0,
|
|
1059
|
-
},
|
|
1060
|
-
],
|
|
1061
|
-
});
|
|
1062
|
-
|
|
1063
|
-
// Verify work was deleted
|
|
1064
|
-
await t.run(async (ctx) => {
|
|
1065
|
-
const work = await ctx.db.get(workId);
|
|
1066
|
-
expect(work).toBeNull();
|
|
1067
|
-
});
|
|
1068
|
-
});
|
|
1069
|
-
|
|
1070
|
-
it("should handle missing work gracefully", async () => {
|
|
1071
|
-
// Call complete with non-existent work ID
|
|
1072
|
-
const workId = await t.run(async (ctx) => {
|
|
1073
|
-
const id = await makeDummyWork(ctx, { attempts: 0 });
|
|
1074
|
-
await ctx.db.delete(id);
|
|
1075
|
-
return id;
|
|
1076
|
-
});
|
|
1077
|
-
await t.mutation(internal.complete.complete, {
|
|
1078
|
-
jobs: [
|
|
1079
|
-
{
|
|
1080
|
-
workId,
|
|
1081
|
-
runResult: { kind: "success", returnValue: null },
|
|
1082
|
-
attempt: 0,
|
|
1083
|
-
},
|
|
1084
|
-
],
|
|
1085
|
-
});
|
|
1086
|
-
|
|
1087
|
-
// No error should be thrown
|
|
1088
|
-
});
|
|
1089
|
-
});
|
|
1090
|
-
|
|
1091
|
-
describe("status cooldown", () => {
|
|
1092
|
-
it("should stay running within the cooldown window", async () => {
|
|
1093
|
-
const segment = getNextSegment();
|
|
1094
|
-
await t.run(async (ctx) => {
|
|
1095
|
-
await insertInternalState(ctx);
|
|
1096
|
-
await ctx.db.insert("runStatus", { state: { kind: "running" } });
|
|
1097
|
-
|
|
1098
|
-
const workId = await makeDummyWork(ctx);
|
|
1099
|
-
await ctx.db.insert("pendingStart", { workId, segment });
|
|
1100
|
-
});
|
|
1101
|
-
|
|
1102
|
-
// Process the work
|
|
1103
|
-
await t.mutation(internal.loop.main, { generation: 1n, segment });
|
|
1104
|
-
|
|
1105
|
-
// Advance less than the cooldown
|
|
1106
|
-
vi.setSystemTime(Date.now() + STATUS_COOLDOWN - 1000);
|
|
1107
|
-
|
|
1108
|
-
// updateRunStatus should schedule main again (staying running)
|
|
1109
|
-
await t.mutation(internal.loop.updateRunStatus, {
|
|
1110
|
-
generation: 2n,
|
|
1111
|
-
segment,
|
|
1112
|
-
});
|
|
1113
|
-
|
|
1114
|
-
// runStatus should still be "running" — no transition
|
|
1115
|
-
await t.run(async (ctx) => {
|
|
1116
|
-
const runStatus = await ctx.db.query("runStatus").unique();
|
|
1117
|
-
assert(runStatus);
|
|
1118
|
-
expect(runStatus.state.kind).toBe("running");
|
|
793
|
+
const real = await ctx.runQuery(internal.loop.getPending, {
|
|
794
|
+
completionCursor: 0n,
|
|
795
|
+
cancelationCursor: 0n,
|
|
796
|
+
incomingCursor: 0n,
|
|
797
|
+
maxParallelism: 10,
|
|
798
|
+
runningCount: 0,
|
|
799
|
+
});
|
|
800
|
+
return { snap: snap.allStarts.length, real: real.allStarts.length };
|
|
1119
801
|
});
|
|
802
|
+
expect(result.snap).toBe(0);
|
|
803
|
+
expect(result.real).toBe(1);
|
|
1120
804
|
});
|
|
1121
805
|
|
|
1122
|
-
it("
|
|
1123
|
-
|
|
1124
|
-
|
|
1125
|
-
|
|
1126
|
-
|
|
1127
|
-
|
|
1128
|
-
const workId = await makeDummyWork(ctx);
|
|
1129
|
-
await ctx.db.insert("pendingStart", { workId, segment });
|
|
1130
|
-
});
|
|
1131
|
-
|
|
1132
|
-
// Process the work
|
|
1133
|
-
await t.mutation(internal.loop.main, { generation: 1n, segment });
|
|
806
|
+
it("processes work that was committed before main started", async () => {
|
|
807
|
+
// The snapshot read is at a later snapshot than the inserts,
|
|
808
|
+
// so it sees them. This is the common case.
|
|
809
|
+
await initialize();
|
|
810
|
+
const workId = await enqueueWork();
|
|
1134
811
|
|
|
1135
|
-
|
|
1136
|
-
vi.setSystemTime(Date.now() + STATUS_COOLDOWN + 1000);
|
|
812
|
+
await runMain();
|
|
1137
813
|
|
|
1138
|
-
|
|
1139
|
-
await t.mutation(internal.loop.updateRunStatus, {
|
|
1140
|
-
generation: 2n,
|
|
1141
|
-
segment,
|
|
1142
|
-
});
|
|
1143
|
-
|
|
1144
|
-
await t.run(async (ctx) => {
|
|
1145
|
-
const runStatus = await ctx.db.query("runStatus").unique();
|
|
1146
|
-
assert(runStatus);
|
|
1147
|
-
// Should have transitioned out of running (to scheduled or idle)
|
|
1148
|
-
expect(runStatus.state.kind).not.toBe("running");
|
|
1149
|
-
});
|
|
814
|
+
expect((await observe()).running.map((r) => r.workId)).toEqual([workId]);
|
|
1150
815
|
});
|
|
816
|
+
});
|
|
1151
817
|
|
|
1152
|
-
|
|
1153
|
-
|
|
1154
|
-
|
|
1155
|
-
await insertInternalState(ctx);
|
|
1156
|
-
await ctx.db.insert("runStatus", { state: { kind: "running" } });
|
|
1157
|
-
|
|
1158
|
-
const workId = await makeDummyWork(ctx);
|
|
1159
|
-
await ctx.db.insert("pendingStart", { workId, segment });
|
|
1160
|
-
});
|
|
1161
|
-
|
|
1162
|
-
// Process wave 1
|
|
1163
|
-
await t.mutation(internal.loop.main, { generation: 1n, segment });
|
|
1164
|
-
|
|
1165
|
-
// Advance 1 second (within cooldown)
|
|
1166
|
-
vi.setSystemTime(Date.now() + 1000);
|
|
1167
|
-
const segment2 = getNextSegment();
|
|
818
|
+
// ────────────────────────────────────────────────────────────────────
|
|
819
|
+
// Backwards compatibility with the pre-merge API
|
|
820
|
+
// ────────────────────────────────────────────────────────────────────
|
|
1168
821
|
|
|
1169
|
-
|
|
1170
|
-
|
|
1171
|
-
|
|
1172
|
-
|
|
1173
|
-
});
|
|
1174
|
-
|
|
1175
|
-
// Enqueue wave 2 while the loop is still warm
|
|
1176
|
-
await t.run(async (ctx) => {
|
|
1177
|
-
const workId2 = await makeDummyWork(ctx);
|
|
1178
|
-
await ctx.db.insert("pendingStart", {
|
|
1179
|
-
workId: workId2,
|
|
1180
|
-
segment: segment2,
|
|
1181
|
-
});
|
|
1182
|
-
});
|
|
822
|
+
describe("backwards compatibility", () => {
|
|
823
|
+
it("main accepts (and ignores) a legacy `segment` arg", async () => {
|
|
824
|
+
await initialize();
|
|
825
|
+
const workId = await enqueueWork();
|
|
1183
826
|
|
|
1184
|
-
// The
|
|
827
|
+
// The legacy callsites pass `segment`; the new main treats it as
|
|
828
|
+
// optional. Calls should still process work as expected.
|
|
1185
829
|
await t.mutation(internal.loop.main, {
|
|
1186
|
-
generation:
|
|
1187
|
-
segment:
|
|
830
|
+
generation: 1n,
|
|
831
|
+
segment: 12345n,
|
|
1188
832
|
});
|
|
1189
833
|
|
|
1190
|
-
|
|
1191
|
-
await t.run(async (ctx) => {
|
|
1192
|
-
const state = await ctx.db.query("internalState").unique();
|
|
1193
|
-
assert(state);
|
|
1194
|
-
expect(state.running).toHaveLength(2);
|
|
1195
|
-
// pendingStart should be empty
|
|
1196
|
-
const pending = await ctx.db.query("pendingStart").collect();
|
|
1197
|
-
expect(pending).toHaveLength(0);
|
|
1198
|
-
});
|
|
834
|
+
expect((await observe()).running.map((r) => r.workId)).toEqual([workId]);
|
|
1199
835
|
});
|
|
1200
836
|
|
|
1201
|
-
it("
|
|
1202
|
-
|
|
1203
|
-
|
|
1204
|
-
const WAVE_GAP_MS = 1000; // 1s between waves, well within 5s cooldown
|
|
1205
|
-
|
|
1206
|
-
await t.run(async (ctx) => {
|
|
1207
|
-
await insertInternalState(ctx);
|
|
1208
|
-
await ctx.db.insert("runStatus", { state: { kind: "running" } });
|
|
1209
|
-
});
|
|
1210
|
-
|
|
1211
|
-
let generation = 1n;
|
|
1212
|
-
const statusChecks: string[] = [];
|
|
1213
|
-
|
|
1214
|
-
for (let wave = 0; wave < WAVE_COUNT; wave++) {
|
|
1215
|
-
if (wave > 0) {
|
|
1216
|
-
// Advance time between waves (within cooldown)
|
|
1217
|
-
vi.setSystemTime(Date.now() + WAVE_GAP_MS);
|
|
1218
|
-
}
|
|
1219
|
-
|
|
1220
|
-
const waveSeg = getNextSegment();
|
|
1221
|
-
|
|
1222
|
-
// Enqueue tasks for this wave
|
|
1223
|
-
await t.run(async (ctx) => {
|
|
1224
|
-
for (let i = 0; i < TASKS_PER_WAVE; i++) {
|
|
1225
|
-
const workId = await makeDummyWork(ctx);
|
|
1226
|
-
await ctx.db.insert("pendingStart", { workId, segment: waveSeg });
|
|
1227
|
-
}
|
|
1228
|
-
});
|
|
1229
|
-
|
|
1230
|
-
// Run main to process the wave
|
|
1231
|
-
await t.mutation(internal.loop.main, {
|
|
1232
|
-
generation,
|
|
1233
|
-
segment: waveSeg,
|
|
1234
|
-
});
|
|
1235
|
-
generation++;
|
|
1236
|
-
|
|
1237
|
-
// Check status after updateRunStatus
|
|
1238
|
-
await t.mutation(internal.loop.updateRunStatus, {
|
|
1239
|
-
generation,
|
|
1240
|
-
segment: waveSeg,
|
|
1241
|
-
});
|
|
1242
|
-
|
|
1243
|
-
const status = await t.run(async (ctx) => {
|
|
1244
|
-
const runStatus = await ctx.db.query("runStatus").unique();
|
|
1245
|
-
assert(runStatus);
|
|
1246
|
-
return runStatus.state.kind;
|
|
1247
|
-
});
|
|
1248
|
-
statusChecks.push(status);
|
|
1249
|
-
|
|
1250
|
-
// If main was scheduled by cooldown, run it to advance generation
|
|
1251
|
-
if (status === "running") {
|
|
1252
|
-
// The cooldown scheduled main for next segment — run it so
|
|
1253
|
-
// generation stays consistent for the next wave.
|
|
1254
|
-
const nextSeg = getNextSegment();
|
|
1255
|
-
await t.mutation(internal.loop.main, {
|
|
1256
|
-
generation,
|
|
1257
|
-
segment: nextSeg,
|
|
1258
|
-
});
|
|
1259
|
-
generation++;
|
|
1260
|
-
}
|
|
1261
|
-
}
|
|
1262
|
-
|
|
1263
|
-
// During the cooldown window, every wave should see "running"
|
|
1264
|
-
for (let i = 0; i < WAVE_COUNT; i++) {
|
|
1265
|
-
expect(statusChecks[i]).toBe("running");
|
|
1266
|
-
}
|
|
1267
|
-
|
|
1268
|
-
// After the cooldown expires, updateRunStatus should transition.
|
|
1269
|
-
// Don't run main again — that would refresh the cursors.
|
|
1270
|
-
vi.setSystemTime(Date.now() + STATUS_COOLDOWN + 1000);
|
|
1271
|
-
|
|
837
|
+
it("updateRunStatus schedules a main call (forwards in-flight upgrade traffic)", async () => {
|
|
838
|
+
await initialize();
|
|
839
|
+
// A pre-upgrade scheduled call lands here after deploy.
|
|
1272
840
|
await t.mutation(internal.loop.updateRunStatus, {
|
|
1273
|
-
generation,
|
|
1274
|
-
segment:
|
|
1275
|
-
});
|
|
1276
|
-
|
|
1277
|
-
|
|
1278
|
-
|
|
1279
|
-
|
|
1280
|
-
|
|
1281
|
-
|
|
1282
|
-
|
|
1283
|
-
expect(
|
|
841
|
+
generation: 1n,
|
|
842
|
+
segment: 12345n,
|
|
843
|
+
});
|
|
844
|
+
// The forwarder should have scheduled main; we don't drain the
|
|
845
|
+
// full pipeline (that's covered by the other tests). Just verify
|
|
846
|
+
// a main call was queued.
|
|
847
|
+
const scheduled = await t.run(async (ctx) =>
|
|
848
|
+
ctx.db.system.query("_scheduled_functions").collect(),
|
|
849
|
+
);
|
|
850
|
+
const mainCalls = scheduled.filter((s) => s.name.endsWith("loop:main"));
|
|
851
|
+
expect(mainCalls.length).toBeGreaterThan(0);
|
|
1284
852
|
});
|
|
1285
853
|
});
|
|
1286
854
|
});
|