@convex-dev/workpool 0.2.0-beta.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +7 -16
- package/dist/commonjs/client/index.d.ts +3 -3
- package/dist/commonjs/client/index.d.ts.map +1 -1
- package/dist/commonjs/client/index.js +10 -5
- package/dist/commonjs/client/index.js.map +1 -1
- package/dist/commonjs/component/complete.d.ts +89 -0
- package/dist/commonjs/component/complete.d.ts.map +1 -0
- package/dist/commonjs/component/complete.js +80 -0
- package/dist/commonjs/component/complete.js.map +1 -0
- package/dist/commonjs/component/kick.d.ts +1 -2
- package/dist/commonjs/component/kick.d.ts.map +1 -1
- package/dist/commonjs/component/kick.js +7 -5
- package/dist/commonjs/component/kick.js.map +1 -1
- package/dist/commonjs/component/lib.d.ts +3 -3
- package/dist/commonjs/component/lib.d.ts.map +1 -1
- package/dist/commonjs/component/lib.js +43 -20
- package/dist/commonjs/component/lib.js.map +1 -1
- package/dist/commonjs/component/logging.d.ts.map +1 -1
- package/dist/commonjs/component/logging.js +1 -2
- package/dist/commonjs/component/logging.js.map +1 -1
- package/dist/commonjs/component/loop.d.ts +1 -14
- package/dist/commonjs/component/loop.d.ts.map +1 -1
- package/dist/commonjs/component/loop.js +215 -178
- package/dist/commonjs/component/loop.js.map +1 -1
- package/dist/commonjs/component/recovery.d.ts +16 -0
- package/dist/commonjs/component/recovery.d.ts.map +1 -1
- package/dist/commonjs/component/recovery.js +64 -44
- package/dist/commonjs/component/recovery.js.map +1 -1
- package/dist/commonjs/component/schema.d.ts +6 -2
- package/dist/commonjs/component/schema.d.ts.map +1 -1
- package/dist/commonjs/component/schema.js +5 -3
- package/dist/commonjs/component/schema.js.map +1 -1
- package/dist/commonjs/component/shared.d.ts +20 -11
- package/dist/commonjs/component/shared.d.ts.map +1 -1
- package/dist/commonjs/component/shared.js +18 -5
- package/dist/commonjs/component/shared.js.map +1 -1
- package/dist/commonjs/component/stats.d.ts +21 -13
- package/dist/commonjs/component/stats.d.ts.map +1 -1
- package/dist/commonjs/component/stats.js +32 -22
- package/dist/commonjs/component/stats.js.map +1 -1
- package/dist/commonjs/component/worker.d.ts +2 -12
- package/dist/commonjs/component/worker.d.ts.map +1 -1
- package/dist/commonjs/component/worker.js +23 -36
- package/dist/commonjs/component/worker.js.map +1 -1
- package/dist/esm/client/index.d.ts +3 -3
- package/dist/esm/client/index.d.ts.map +1 -1
- package/dist/esm/client/index.js +10 -5
- package/dist/esm/client/index.js.map +1 -1
- package/dist/esm/component/complete.d.ts +89 -0
- package/dist/esm/component/complete.d.ts.map +1 -0
- package/dist/esm/component/complete.js +80 -0
- package/dist/esm/component/complete.js.map +1 -0
- package/dist/esm/component/kick.d.ts +1 -2
- package/dist/esm/component/kick.d.ts.map +1 -1
- package/dist/esm/component/kick.js +7 -5
- package/dist/esm/component/kick.js.map +1 -1
- package/dist/esm/component/lib.d.ts +3 -3
- package/dist/esm/component/lib.d.ts.map +1 -1
- package/dist/esm/component/lib.js +43 -20
- package/dist/esm/component/lib.js.map +1 -1
- package/dist/esm/component/logging.d.ts.map +1 -1
- package/dist/esm/component/logging.js +1 -2
- package/dist/esm/component/logging.js.map +1 -1
- package/dist/esm/component/loop.d.ts +1 -14
- package/dist/esm/component/loop.d.ts.map +1 -1
- package/dist/esm/component/loop.js +215 -178
- package/dist/esm/component/loop.js.map +1 -1
- package/dist/esm/component/recovery.d.ts +16 -0
- package/dist/esm/component/recovery.d.ts.map +1 -1
- package/dist/esm/component/recovery.js +64 -44
- package/dist/esm/component/recovery.js.map +1 -1
- package/dist/esm/component/schema.d.ts +6 -2
- package/dist/esm/component/schema.d.ts.map +1 -1
- package/dist/esm/component/schema.js +5 -3
- package/dist/esm/component/schema.js.map +1 -1
- package/dist/esm/component/shared.d.ts +20 -11
- package/dist/esm/component/shared.d.ts.map +1 -1
- package/dist/esm/component/shared.js +18 -5
- package/dist/esm/component/shared.js.map +1 -1
- package/dist/esm/component/stats.d.ts +21 -13
- package/dist/esm/component/stats.d.ts.map +1 -1
- package/dist/esm/component/stats.js +32 -22
- package/dist/esm/component/stats.js.map +1 -1
- package/dist/esm/component/worker.d.ts +2 -12
- package/dist/esm/component/worker.d.ts.map +1 -1
- package/dist/esm/component/worker.js +23 -36
- package/dist/esm/component/worker.js.map +1 -1
- package/package.json +7 -6
- package/src/client/index.ts +18 -8
- package/src/component/README.md +15 -15
- package/src/component/_generated/api.d.ts +7 -2
- package/src/component/complete.test.ts +508 -0
- package/src/component/complete.ts +98 -0
- package/src/component/kick.test.ts +13 -13
- package/src/component/kick.ts +13 -8
- package/src/component/lib.test.ts +262 -17
- package/src/component/lib.ts +55 -24
- package/src/component/logging.ts +1 -2
- package/src/component/loop.test.ts +1158 -0
- package/src/component/loop.ts +289 -221
- package/src/component/recovery.test.ts +541 -0
- package/src/component/recovery.ts +80 -63
- package/src/component/schema.ts +6 -4
- package/src/component/shared.ts +21 -6
- package/src/component/stats.ts +48 -25
- package/src/component/worker.ts +25 -38
package/src/component/loop.ts
CHANGED
|
@@ -1,9 +1,9 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { WithoutSystemFields } from "convex/server";
|
|
2
2
|
import { v } from "convex/values";
|
|
3
3
|
import { internal } from "./_generated/api.js";
|
|
4
4
|
import { Doc, Id } from "./_generated/dataModel.js";
|
|
5
5
|
import { internalMutation, MutationCtx } from "./_generated/server.js";
|
|
6
|
-
import {
|
|
6
|
+
import type { CompleteJob } from "./complete.js";
|
|
7
7
|
import {
|
|
8
8
|
createLogger,
|
|
9
9
|
DEFAULT_LOG_LEVEL,
|
|
@@ -14,10 +14,11 @@ import {
|
|
|
14
14
|
boundScheduledTime,
|
|
15
15
|
Config,
|
|
16
16
|
currentSegment,
|
|
17
|
+
DEFAULT_MAX_PARALLELISM,
|
|
17
18
|
fromSegment,
|
|
19
|
+
max,
|
|
18
20
|
nextSegment,
|
|
19
|
-
|
|
20
|
-
runResult,
|
|
21
|
+
RunResult,
|
|
21
22
|
toSegment,
|
|
22
23
|
} from "./shared.js";
|
|
23
24
|
import { recordCompleted, recordReport, recordStarted } from "./stats.js";
|
|
@@ -45,49 +46,50 @@ export const INITIAL_STATE: WithoutSystemFields<Doc<"internalState">> = {
|
|
|
45
46
|
|
|
46
47
|
// There should only ever be at most one of these scheduled or running.
|
|
47
48
|
export const main = internalMutation({
|
|
48
|
-
args: {
|
|
49
|
-
|
|
50
|
-
segment: v.int64(),
|
|
51
|
-
},
|
|
52
|
-
handler: async (ctx, args) => {
|
|
49
|
+
args: { generation: v.int64(), segment: v.int64() },
|
|
50
|
+
handler: async (ctx, { generation, segment }) => {
|
|
53
51
|
// State will be modified and patched at the end of the function.
|
|
54
52
|
const state = await getOrCreateState(ctx);
|
|
55
|
-
if (
|
|
53
|
+
if (generation !== state.generation) {
|
|
56
54
|
throw new Error(
|
|
57
|
-
`generation mismatch: ${
|
|
55
|
+
`generation mismatch: ${generation} !== ${state.generation}`
|
|
58
56
|
);
|
|
59
57
|
}
|
|
60
58
|
state.generation++;
|
|
59
|
+
const runStatus = await getOrCreateRunningStatus(ctx);
|
|
60
|
+
if (runStatus.state.kind !== "running") {
|
|
61
|
+
await ctx.db.patch(runStatus._id, {
|
|
62
|
+
state: { kind: "running" },
|
|
63
|
+
});
|
|
64
|
+
}
|
|
61
65
|
|
|
62
66
|
const globals = await getGlobals(ctx);
|
|
63
67
|
const console = createLogger(globals.logLevel);
|
|
68
|
+
const delayMs = Date.now() - fromSegment(segment);
|
|
69
|
+
console.debug(`[main] generation ${generation} behind: ${delayMs}ms`);
|
|
64
70
|
|
|
65
71
|
// Read pendingCompletions, including retry handling.
|
|
66
72
|
console.time("[main] pendingCompletion");
|
|
67
|
-
const
|
|
73
|
+
const toCancel = await handleCompletions(ctx, state, segment, console);
|
|
68
74
|
console.timeEnd("[main] pendingCompletion");
|
|
69
75
|
|
|
70
76
|
// Read pendingCancelation, deleting from pendingStart. If it's still running, queue to cancel.
|
|
71
77
|
console.time("[main] pendingCancelation");
|
|
72
|
-
|
|
78
|
+
await handleCancelation(ctx, state, segment, console, toCancel);
|
|
73
79
|
console.timeEnd("[main] pendingCancelation");
|
|
74
80
|
|
|
75
81
|
if (state.running.length === 0) {
|
|
76
82
|
// If there's nothing active, reset lastRecovery.
|
|
77
|
-
state.lastRecovery =
|
|
78
|
-
} else if (
|
|
83
|
+
state.lastRecovery = segment;
|
|
84
|
+
} else if (segment - state.lastRecovery >= RECOVERY_PERIOD_SEGMENTS) {
|
|
79
85
|
// Otherwise schedule recovery for any old jobs.
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
if (jobs.length) {
|
|
83
|
-
await ctx.scheduler.runAfter(0, internal.recovery.recover, { jobs });
|
|
84
|
-
}
|
|
85
|
-
state.lastRecovery = args.segment;
|
|
86
|
+
await handleRecovery(ctx, state, console);
|
|
87
|
+
state.lastRecovery = segment;
|
|
86
88
|
}
|
|
87
89
|
|
|
88
90
|
// Read pendingStart up to max capacity. Update the config, and incomingSegmentCursor.
|
|
89
91
|
console.time("[main] pendingStart");
|
|
90
|
-
await handleStart(ctx, state,
|
|
92
|
+
await handleStart(ctx, state, segment, console, globals);
|
|
91
93
|
console.timeEnd("[main] pendingStart");
|
|
92
94
|
|
|
93
95
|
if (Date.now() - state.report.lastReportTs >= MINUTE) {
|
|
@@ -98,7 +100,7 @@ export const main = internalMutation({
|
|
|
98
100
|
// It's been a while, let's start fresh.
|
|
99
101
|
lastReportTs = Date.now();
|
|
100
102
|
}
|
|
101
|
-
|
|
103
|
+
recordReport(console, state);
|
|
102
104
|
state.report = {
|
|
103
105
|
completed: 0,
|
|
104
106
|
succeeded: 0,
|
|
@@ -110,100 +112,82 @@ export const main = internalMutation({
|
|
|
110
112
|
}
|
|
111
113
|
|
|
112
114
|
await ctx.db.replace(state._id, state);
|
|
113
|
-
await ctx.scheduler.runAfter(0, internal.loop.complete, { done });
|
|
114
115
|
await ctx.scheduler.runAfter(0, internal.loop.updateRunStatus, {
|
|
115
116
|
generation: state.generation,
|
|
117
|
+
segment,
|
|
116
118
|
});
|
|
117
|
-
|
|
118
|
-
});
|
|
119
|
-
|
|
120
|
-
export const complete = internalMutation({
|
|
121
|
-
args: {
|
|
122
|
-
done: v.array(v.object({ runResult, workId: v.id("work") })),
|
|
123
|
-
},
|
|
124
|
-
handler: async (ctx, args) => {
|
|
125
|
-
const globals = await getGlobals(ctx);
|
|
126
|
-
const console = createLogger(globals.logLevel);
|
|
127
|
-
await Promise.all(
|
|
128
|
-
args.done.map(async ({ runResult, workId }) => {
|
|
129
|
-
const work = await ctx.db.get(workId);
|
|
130
|
-
if (!work) {
|
|
131
|
-
console.warn(`[complete] ${workId} is done, but its work is gone`);
|
|
132
|
-
return;
|
|
133
|
-
}
|
|
134
|
-
if (work.onComplete) {
|
|
135
|
-
try {
|
|
136
|
-
const handle = work.onComplete.fnHandle as FunctionHandle<
|
|
137
|
-
"mutation",
|
|
138
|
-
OnCompleteArgs,
|
|
139
|
-
void
|
|
140
|
-
>;
|
|
141
|
-
await ctx.runMutation(handle, {
|
|
142
|
-
workId: work._id,
|
|
143
|
-
context: work.onComplete.context,
|
|
144
|
-
result: runResult,
|
|
145
|
-
});
|
|
146
|
-
console.debug(`[complete] onComplete for ${workId} completed`);
|
|
147
|
-
} catch (e) {
|
|
148
|
-
console.error(
|
|
149
|
-
`[complete] error running onComplete for ${workId}`,
|
|
150
|
-
e
|
|
151
|
-
);
|
|
152
|
-
}
|
|
153
|
-
}
|
|
154
|
-
await ctx.db.delete(workId);
|
|
155
|
-
})
|
|
156
|
-
);
|
|
119
|
+
// TODO: if there were more cancellations, schedule main directly.
|
|
157
120
|
},
|
|
158
121
|
});
|
|
159
122
|
|
|
160
123
|
export const updateRunStatus = internalMutation({
|
|
161
|
-
args: { generation: v.int64() },
|
|
162
|
-
handler: async (ctx,
|
|
124
|
+
args: { generation: v.int64(), segment: v.int64() },
|
|
125
|
+
handler: async (ctx, { generation, segment }) => {
|
|
163
126
|
const globals = await getGlobals(ctx);
|
|
164
127
|
const console = createLogger(globals.logLevel);
|
|
165
128
|
const maxParallelism = globals.maxParallelism;
|
|
166
129
|
const state = await getOrCreateState(ctx);
|
|
167
|
-
if (
|
|
130
|
+
if (generation !== state.generation) {
|
|
168
131
|
throw new Error(
|
|
169
|
-
`generation mismatch: ${
|
|
132
|
+
`generation mismatch: ${generation} !== ${state.generation}`
|
|
170
133
|
);
|
|
171
134
|
}
|
|
172
135
|
|
|
173
136
|
console.time("[updateRunStatus] outstandingCancelations");
|
|
174
|
-
const thisSegment = currentSegment();
|
|
175
137
|
const outstandingCancelations = await getNextUp(ctx, "pendingCancelation", {
|
|
176
138
|
start: state.segmentCursors.cancelation,
|
|
177
|
-
end:
|
|
139
|
+
end: segment,
|
|
178
140
|
});
|
|
179
141
|
console.timeEnd("[updateRunStatus] outstandingCancelations");
|
|
180
142
|
if (outstandingCancelations) {
|
|
181
143
|
await ctx.scheduler.runAfter(0, internal.loop.main, {
|
|
182
|
-
generation
|
|
183
|
-
segment
|
|
144
|
+
generation,
|
|
145
|
+
segment,
|
|
184
146
|
});
|
|
185
147
|
return;
|
|
186
148
|
}
|
|
187
149
|
|
|
150
|
+
// TODO: check for current segment (or from args) first, to avoid OCCs.
|
|
188
151
|
console.time("[updateRunStatus] nextSegmentIsActionable");
|
|
189
|
-
const
|
|
152
|
+
const next = max(segment + 1n, currentSegment());
|
|
153
|
+
const nextIsActionable = await nextSegmentIsActionable(
|
|
190
154
|
ctx,
|
|
191
155
|
state,
|
|
192
|
-
maxParallelism
|
|
156
|
+
maxParallelism,
|
|
157
|
+
next
|
|
193
158
|
);
|
|
194
159
|
console.timeEnd("[updateRunStatus] nextSegmentIsActionable");
|
|
195
160
|
|
|
196
|
-
const start = nextSegment();
|
|
197
161
|
if (nextIsActionable) {
|
|
162
|
+
await ctx.scheduler.runAt(
|
|
163
|
+
boundScheduledTime(fromSegment(next), console),
|
|
164
|
+
internal.loop.main,
|
|
165
|
+
{
|
|
166
|
+
generation,
|
|
167
|
+
segment: next,
|
|
168
|
+
}
|
|
169
|
+
);
|
|
170
|
+
return;
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
console.time("[updateRunStatus] oldSegmentIsActionable");
|
|
174
|
+
const [oldIsActionable, cursors] = await oldSegmentIsActionable(
|
|
175
|
+
ctx,
|
|
176
|
+
state,
|
|
177
|
+
maxParallelism
|
|
178
|
+
);
|
|
179
|
+
console.timeEnd("[updateRunStatus] oldSegmentIsActionable");
|
|
180
|
+
|
|
181
|
+
if (oldIsActionable) {
|
|
198
182
|
await ctx.db.patch(state._id, {
|
|
199
183
|
segmentCursors: {
|
|
200
184
|
...state.segmentCursors,
|
|
201
185
|
...cursors,
|
|
202
186
|
},
|
|
203
187
|
});
|
|
204
|
-
await ctx.scheduler.
|
|
205
|
-
generation
|
|
206
|
-
segment:
|
|
188
|
+
await ctx.scheduler.runAfter(0, internal.loop.main, {
|
|
189
|
+
generation,
|
|
190
|
+
segment: currentSegment(),
|
|
207
191
|
});
|
|
208
192
|
return;
|
|
209
193
|
}
|
|
@@ -220,39 +204,45 @@ export const updateRunStatus = internalMutation({
|
|
|
220
204
|
}
|
|
221
205
|
const docs = await Promise.all(
|
|
222
206
|
actionableTables.map(async (tableName) =>
|
|
223
|
-
getNextUp(ctx, tableName, { start })
|
|
207
|
+
getNextUp(ctx, tableName, { start: next })
|
|
224
208
|
)
|
|
225
209
|
);
|
|
226
210
|
console.timeEnd("[updateRunStatus] findNextSegment");
|
|
227
|
-
let
|
|
211
|
+
let targetSegment = docs.map((d) => d?.segment).sort()[0];
|
|
228
212
|
const runStatus = await getOrCreateRunningStatus(ctx);
|
|
229
213
|
const saturated = state.running.length >= maxParallelism;
|
|
230
|
-
if (
|
|
214
|
+
if (targetSegment !== undefined || state.running.length > 0) {
|
|
231
215
|
// If there's something to do, schedule for next actionable segment.
|
|
232
216
|
// Or the next recovery, whichever comes first.
|
|
233
217
|
const nextRecoverySegment = state.lastRecovery + RECOVERY_PERIOD_SEGMENTS;
|
|
234
|
-
if (!
|
|
235
|
-
|
|
218
|
+
if (!targetSegment || targetSegment > nextRecoverySegment) {
|
|
219
|
+
targetSegment = nextRecoverySegment;
|
|
236
220
|
}
|
|
237
221
|
const scheduledId = await ctx.scheduler.runAt(
|
|
238
|
-
fromSegment(
|
|
222
|
+
boundScheduledTime(fromSegment(targetSegment), console),
|
|
239
223
|
internal.loop.main,
|
|
240
|
-
{ generation
|
|
224
|
+
{ generation, segment: targetSegment }
|
|
241
225
|
);
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
226
|
+
if (targetSegment > nextSegment()) {
|
|
227
|
+
await ctx.db.patch(runStatus._id, {
|
|
228
|
+
state: {
|
|
229
|
+
kind: "scheduled",
|
|
230
|
+
scheduledId,
|
|
231
|
+
saturated,
|
|
232
|
+
generation,
|
|
233
|
+
segment: targetSegment,
|
|
234
|
+
},
|
|
235
|
+
});
|
|
236
|
+
} else {
|
|
237
|
+
console.debug(
|
|
238
|
+
`[updateRunStatus] staying running because it's the next segment`
|
|
239
|
+
);
|
|
240
|
+
}
|
|
251
241
|
return;
|
|
252
242
|
}
|
|
253
243
|
// There seems to be nothing in the future to do, so go idle.
|
|
254
244
|
await ctx.db.patch(runStatus._id, {
|
|
255
|
-
state: { kind: "idle", generation
|
|
245
|
+
state: { kind: "idle", generation },
|
|
256
246
|
});
|
|
257
247
|
},
|
|
258
248
|
});
|
|
@@ -260,19 +250,17 @@ export const updateRunStatus = internalMutation({
|
|
|
260
250
|
async function nextSegmentIsActionable(
|
|
261
251
|
ctx: MutationCtx,
|
|
262
252
|
state: Doc<"internalState">,
|
|
263
|
-
maxParallelism: number
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
// First, try with our cursor range, up to next segment.
|
|
268
|
-
const end = nextSegment();
|
|
253
|
+
maxParallelism: number,
|
|
254
|
+
end: bigint
|
|
255
|
+
): Promise<boolean> {
|
|
256
|
+
// First, try with our cursor range, up to end.
|
|
269
257
|
if (
|
|
270
258
|
await getNextUp(ctx, "pendingCancelation", {
|
|
271
259
|
start: state.segmentCursors.cancelation,
|
|
272
260
|
end,
|
|
273
261
|
})
|
|
274
262
|
) {
|
|
275
|
-
return
|
|
263
|
+
return true;
|
|
276
264
|
}
|
|
277
265
|
if (
|
|
278
266
|
await getNextUp(ctx, "pendingCompletion", {
|
|
@@ -280,7 +268,7 @@ async function nextSegmentIsActionable(
|
|
|
280
268
|
end,
|
|
281
269
|
})
|
|
282
270
|
) {
|
|
283
|
-
return
|
|
271
|
+
return true;
|
|
284
272
|
}
|
|
285
273
|
if (state.running.length < maxParallelism) {
|
|
286
274
|
if (
|
|
@@ -289,9 +277,19 @@ async function nextSegmentIsActionable(
|
|
|
289
277
|
end,
|
|
290
278
|
})
|
|
291
279
|
) {
|
|
292
|
-
return
|
|
280
|
+
return true;
|
|
293
281
|
}
|
|
294
282
|
}
|
|
283
|
+
return false;
|
|
284
|
+
}
|
|
285
|
+
|
|
286
|
+
async function oldSegmentIsActionable(
|
|
287
|
+
ctx: MutationCtx,
|
|
288
|
+
state: Doc<"internalState">,
|
|
289
|
+
maxParallelism: number
|
|
290
|
+
): Promise<
|
|
291
|
+
[boolean, { completion?: bigint; cancelation?: bigint; incoming?: bigint }]
|
|
292
|
+
> {
|
|
295
293
|
// Next, we look for out-of-order additions we may have missed.
|
|
296
294
|
const oldCompletion = await getNextUp(ctx, "pendingCompletion", {
|
|
297
295
|
end: state.segmentCursors.completion,
|
|
@@ -325,13 +323,13 @@ async function getNextUp(
|
|
|
325
323
|
return ctx.db
|
|
326
324
|
.query(table)
|
|
327
325
|
.withIndex("segment", (q) =>
|
|
328
|
-
range.start
|
|
329
|
-
? range.end
|
|
326
|
+
range.start !== undefined
|
|
327
|
+
? range.end !== undefined
|
|
330
328
|
? q
|
|
331
329
|
.gte("segment", range.start - CURSOR_BUFFER_SEGMENTS)
|
|
332
330
|
.lte("segment", range.end)
|
|
333
331
|
: q.gt("segment", range.start - CURSOR_BUFFER_SEGMENTS)
|
|
334
|
-
: range.end
|
|
332
|
+
: range.end !== undefined
|
|
335
333
|
? q.lt("segment", range.end)
|
|
336
334
|
: q
|
|
337
335
|
)
|
|
@@ -340,9 +338,7 @@ async function getNextUp(
|
|
|
340
338
|
|
|
341
339
|
/**
|
|
342
340
|
* Handles the completion of pending completions.
|
|
343
|
-
|
|
344
|
-
* Important: It should handle retries before cancelations are processed,
|
|
345
|
-
* to allow retries to be canceled.
|
|
341
|
+
* This only processes work that succeeded or failed, not canceled.
|
|
346
342
|
*/
|
|
347
343
|
async function handleCompletions(
|
|
348
344
|
ctx: MutationCtx,
|
|
@@ -359,87 +355,66 @@ async function handleCompletions(
|
|
|
359
355
|
q.gte("segment", startSegment).lte("segment", segment)
|
|
360
356
|
)
|
|
361
357
|
.collect();
|
|
362
|
-
state.report.completed += completed.length;
|
|
363
358
|
state.segmentCursors.completion = segment;
|
|
364
|
-
|
|
359
|
+
// Completions that were going to be retried but have since been canceled.
|
|
360
|
+
const toCancel: CompleteJob[] = [];
|
|
365
361
|
await Promise.all(
|
|
366
362
|
completed.map(async (c) => {
|
|
367
363
|
await ctx.db.delete(c._id);
|
|
368
|
-
|
|
369
|
-
const
|
|
370
|
-
|
|
371
|
-
.
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
364
|
+
|
|
365
|
+
const running = state.running.find((r) => r.workId === c.workId);
|
|
366
|
+
if (!running) {
|
|
367
|
+
console.error(
|
|
368
|
+
`[main] completing ${c.workId} but it's not in "running"`
|
|
369
|
+
);
|
|
370
|
+
return;
|
|
371
|
+
}
|
|
372
|
+
if (c.retry) {
|
|
373
|
+
// Only check for work if it's going to be retried.
|
|
374
|
+
const work = await ctx.db.get(c.workId);
|
|
375
|
+
if (!work) {
|
|
376
|
+
console.warn(`[main] ${c.workId} is gone, but trying to complete`);
|
|
377
|
+
return;
|
|
378
|
+
}
|
|
379
|
+
const retried = await rescheduleJob(ctx, work, console);
|
|
380
|
+
if (retried) {
|
|
382
381
|
state.report.retries++;
|
|
382
|
+
recordCompleted(console, work, "retrying");
|
|
383
383
|
} else {
|
|
384
|
-
if
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
await ctx.db.delete(pendingCancelation._id);
|
|
392
|
-
}
|
|
393
|
-
done.push(c);
|
|
384
|
+
// We don't retry if it's been canceled in the mean time.
|
|
385
|
+
state.report.canceled++;
|
|
386
|
+
toCancel.push({
|
|
387
|
+
workId: c.workId,
|
|
388
|
+
runResult: { kind: "canceled" },
|
|
389
|
+
attempt: work.attempts,
|
|
390
|
+
});
|
|
394
391
|
}
|
|
395
|
-
console.info(recordCompleted(work, c.runResult.kind));
|
|
396
|
-
} else if (work) {
|
|
397
|
-
console.warn(`[main] completing ${c.workId} but it's not in "running"`);
|
|
398
392
|
} else {
|
|
399
|
-
|
|
393
|
+
if (c.runResult.kind === "success") {
|
|
394
|
+
state.report.succeeded++;
|
|
395
|
+
} else if (c.runResult.kind === "failed") {
|
|
396
|
+
state.report.failed++;
|
|
397
|
+
}
|
|
400
398
|
}
|
|
401
399
|
})
|
|
402
400
|
);
|
|
403
|
-
|
|
401
|
+
// We do this after so the stats above know if it was in progress.
|
|
402
|
+
const before = state.running.length;
|
|
404
403
|
state.running = state.running.filter(
|
|
405
404
|
(r) => !completed.some((c) => c.workId === r.workId)
|
|
406
405
|
);
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
ctx: MutationCtx,
|
|
412
|
-
work: Doc<"work">,
|
|
413
|
-
console: Logger
|
|
414
|
-
): Promise<number> {
|
|
415
|
-
if (!work.retryBehavior) {
|
|
416
|
-
throw new Error("work has no retryBehavior");
|
|
417
|
-
}
|
|
418
|
-
const backoffMs =
|
|
419
|
-
work.retryBehavior.initialBackoffMs *
|
|
420
|
-
Math.pow(work.retryBehavior.base, work.attempts - 1);
|
|
421
|
-
const nextAttempt = withJitter(backoffMs);
|
|
422
|
-
const startTime = boundScheduledTime(Date.now() + nextAttempt, console);
|
|
423
|
-
const segment = toSegment(startTime);
|
|
424
|
-
await ctx.db.patch(work._id, {
|
|
425
|
-
attempts: work.attempts + 1,
|
|
426
|
-
});
|
|
427
|
-
await ctx.db.insert("pendingStart", {
|
|
428
|
-
workId: work._id,
|
|
429
|
-
segment,
|
|
430
|
-
});
|
|
431
|
-
return nextAttempt;
|
|
432
|
-
}
|
|
433
|
-
|
|
434
|
-
export function withJitter(delay: number) {
|
|
435
|
-
return delay * (0.5 + Math.random());
|
|
406
|
+
const numCompleted = before - state.running.length;
|
|
407
|
+
state.report.completed += numCompleted;
|
|
408
|
+
console.debug(`[main] completed ${numCompleted} work`);
|
|
409
|
+
return toCancel;
|
|
436
410
|
}
|
|
437
411
|
|
|
438
412
|
async function handleCancelation(
|
|
439
413
|
ctx: MutationCtx,
|
|
440
414
|
state: Doc<"internalState">,
|
|
441
415
|
segment: bigint,
|
|
442
|
-
console: Logger
|
|
416
|
+
console: Logger,
|
|
417
|
+
toCancel: CompleteJob[]
|
|
443
418
|
) {
|
|
444
419
|
const start = state.segmentCursors.cancelation - CURSOR_BUFFER_SEGMENTS;
|
|
445
420
|
const canceled = await ctx.db
|
|
@@ -449,35 +424,76 @@ async function handleCancelation(
|
|
|
449
424
|
)
|
|
450
425
|
.take(CANCELLATION_BATCH_SIZE);
|
|
451
426
|
state.segmentCursors.cancelation = canceled.at(-1)?.segment ?? segment;
|
|
452
|
-
|
|
427
|
+
if (canceled.length) {
|
|
428
|
+
console.debug(`[main] attempting to cancel ${canceled.length}`);
|
|
429
|
+
}
|
|
453
430
|
const canceledWork: Set<Id<"work">> = new Set();
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
431
|
+
const runResult: RunResult = { kind: "canceled" };
|
|
432
|
+
const jobs = toCancel.concat(
|
|
433
|
+
...(
|
|
434
|
+
await Promise.all(
|
|
435
|
+
canceled.map(async ({ _id, _creationTime, workId }) => {
|
|
436
|
+
await ctx.db.delete(_id);
|
|
437
|
+
if (canceledWork.has(workId)) {
|
|
438
|
+
// We shouldn't have multiple pending cancelations for the same work.
|
|
439
|
+
console.error(`[main] ${workId} already canceled`);
|
|
440
|
+
return null;
|
|
441
|
+
}
|
|
442
|
+
const work = await ctx.db.get(workId);
|
|
443
|
+
if (!work) {
|
|
444
|
+
console.warn(`[main] ${workId} is gone, but trying to cancel`);
|
|
445
|
+
return null;
|
|
446
|
+
}
|
|
447
|
+
// Ensure it doesn't retry.
|
|
448
|
+
await ctx.db.patch(workId, { canceled: true });
|
|
449
|
+
// Ensure it doesn't start.
|
|
450
|
+
const pendingStart = await ctx.db
|
|
451
|
+
.query("pendingStart")
|
|
452
|
+
.withIndex("workId", (q) => q.eq("workId", workId))
|
|
453
|
+
.unique();
|
|
454
|
+
if (pendingStart && !canceledWork.has(workId)) {
|
|
455
|
+
state.report.canceled++;
|
|
456
|
+
await ctx.db.delete(pendingStart._id);
|
|
457
|
+
canceledWork.add(workId);
|
|
458
|
+
return { workId, runResult, attempt: work.attempts };
|
|
459
|
+
}
|
|
460
|
+
return null;
|
|
461
|
+
})
|
|
462
|
+
)
|
|
463
|
+
).flatMap((r) => (r ? [r] : []))
|
|
476
464
|
);
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
|
|
465
|
+
if (jobs.length) {
|
|
466
|
+
await ctx.scheduler.runAfter(0, internal.complete.complete, { jobs });
|
|
467
|
+
}
|
|
468
|
+
}
|
|
469
|
+
|
|
470
|
+
async function handleRecovery(
|
|
471
|
+
ctx: MutationCtx,
|
|
472
|
+
state: Doc<"internalState">,
|
|
473
|
+
console: Logger
|
|
474
|
+
) {
|
|
475
|
+
const missing = new Set<Id<"work">>();
|
|
476
|
+
const oldEnoughToConsider = Date.now() - RECOVERY_THRESHOLD_MS;
|
|
477
|
+
const jobs = (
|
|
478
|
+
await Promise.all(
|
|
479
|
+
state.running.map(async (r) => {
|
|
480
|
+
if (r.started >= oldEnoughToConsider) {
|
|
481
|
+
return null;
|
|
482
|
+
}
|
|
483
|
+
const work = await ctx.db.get(r.workId);
|
|
484
|
+
if (!work) {
|
|
485
|
+
missing.add(r.workId);
|
|
486
|
+
console.error(`[main] ${r.workId} already gone (skipping recovery)`);
|
|
487
|
+
return null;
|
|
488
|
+
}
|
|
489
|
+
return { ...r, attempt: work.attempts };
|
|
490
|
+
})
|
|
491
|
+
)
|
|
492
|
+
).flatMap((r) => (r ? [r] : []));
|
|
493
|
+
state.running = state.running.filter((r) => !missing.has(r.workId));
|
|
494
|
+
if (jobs.length) {
|
|
495
|
+
await ctx.scheduler.runAfter(0, internal.recovery.recover, { jobs });
|
|
496
|
+
}
|
|
481
497
|
}
|
|
482
498
|
|
|
483
499
|
async function handleStart(
|
|
@@ -485,9 +501,8 @@ async function handleStart(
|
|
|
485
501
|
state: Doc<"internalState">,
|
|
486
502
|
segment: bigint,
|
|
487
503
|
console: Logger,
|
|
488
|
-
|
|
504
|
+
{ maxParallelism, logLevel }: Config
|
|
489
505
|
) {
|
|
490
|
-
const maxParallelism = globals.maxParallelism;
|
|
491
506
|
// Schedule as many as needed to reach maxParallelism.
|
|
492
507
|
const toSchedule = maxParallelism - state.running.length;
|
|
493
508
|
|
|
@@ -499,50 +514,103 @@ async function handleStart(
|
|
|
499
514
|
.lte("segment", segment)
|
|
500
515
|
)
|
|
501
516
|
.take(toSchedule);
|
|
517
|
+
|
|
502
518
|
state.segmentCursors.incoming = pending.at(-1)?.segment ?? segment;
|
|
503
519
|
console.debug(`[main] scheduling ${pending.length} pending work`);
|
|
504
520
|
// Start new work.
|
|
505
521
|
state.running.push(
|
|
506
|
-
...(
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
|
|
522
|
+
...(
|
|
523
|
+
await Promise.all(
|
|
524
|
+
pending.map(async ({ _id, workId, segment }) => {
|
|
525
|
+
if (state.running.some((r) => r.workId === workId)) {
|
|
526
|
+
console.error(`[main] ${workId} already running (skipping start)`);
|
|
527
|
+
return null;
|
|
528
|
+
}
|
|
529
|
+
const lagMs = Date.now() - fromSegment(segment);
|
|
530
|
+
const scheduledId = await beginWork(ctx, workId, logLevel, lagMs);
|
|
531
|
+
await ctx.db.delete(_id);
|
|
532
|
+
return { scheduledId, workId, started: Date.now() };
|
|
533
|
+
})
|
|
534
|
+
)
|
|
535
|
+
).flatMap((r) => (r ? [r] : []))
|
|
513
536
|
);
|
|
514
537
|
}
|
|
515
538
|
|
|
516
539
|
async function beginWork(
|
|
517
540
|
ctx: MutationCtx,
|
|
518
541
|
workId: Id<"work">,
|
|
519
|
-
logLevel: LogLevel
|
|
542
|
+
logLevel: LogLevel,
|
|
543
|
+
lagMs: number
|
|
520
544
|
): Promise<Id<"_scheduled_functions">> {
|
|
521
545
|
const console = createLogger(logLevel);
|
|
522
546
|
const work = await ctx.db.get(workId);
|
|
523
547
|
if (!work) {
|
|
524
548
|
throw new Error("work not found");
|
|
525
549
|
}
|
|
526
|
-
|
|
550
|
+
recordStarted(console, work, lagMs);
|
|
551
|
+
const { attempts: attempt, fnHandle, fnArgs } = work;
|
|
552
|
+
const args = { workId, fnHandle, fnArgs, logLevel, attempt };
|
|
527
553
|
if (work.fnType === "action") {
|
|
528
|
-
return
|
|
529
|
-
workId: work._id,
|
|
530
|
-
fnHandle: work.fnHandle,
|
|
531
|
-
fnArgs: work.fnArgs,
|
|
532
|
-
logLevel,
|
|
533
|
-
});
|
|
554
|
+
return ctx.scheduler.runAfter(0, internal.worker.runActionWrapper, args);
|
|
534
555
|
} else if (work.fnType === "mutation") {
|
|
535
|
-
return
|
|
536
|
-
workId: work._id,
|
|
537
|
-
fnHandle: work.fnHandle,
|
|
538
|
-
fnArgs: work.fnArgs,
|
|
539
|
-
logLevel,
|
|
540
|
-
});
|
|
556
|
+
return ctx.scheduler.runAfter(0, internal.worker.runMutationWrapper, args);
|
|
541
557
|
} else {
|
|
542
558
|
throw new Error(`Unexpected fnType ${work.fnType}`);
|
|
543
559
|
}
|
|
544
560
|
}
|
|
545
561
|
|
|
562
|
+
/**
|
|
563
|
+
* Reschedules a job for retry.
|
|
564
|
+
* If it's been canceled in the mean time, don't retry.
|
|
565
|
+
* @returns true if the job was rescheduled, false if it was not.
|
|
566
|
+
*/
|
|
567
|
+
async function rescheduleJob(
|
|
568
|
+
ctx: MutationCtx,
|
|
569
|
+
work: Doc<"work">,
|
|
570
|
+
console: Logger
|
|
571
|
+
): Promise<boolean> {
|
|
572
|
+
const pendingCancelation = await ctx.db
|
|
573
|
+
.query("pendingCancelation")
|
|
574
|
+
.withIndex("workId", (q) => q.eq("workId", work._id))
|
|
575
|
+
.unique();
|
|
576
|
+
if (pendingCancelation) {
|
|
577
|
+
// If there's an un-processed cancelation request, don't retry.
|
|
578
|
+
console.warn(`[main] ${work._id} in pendingCancelation so not retrying`);
|
|
579
|
+
return false;
|
|
580
|
+
}
|
|
581
|
+
if (work.canceled) {
|
|
582
|
+
return false;
|
|
583
|
+
}
|
|
584
|
+
if (!work.retryBehavior) {
|
|
585
|
+
console.warn(`[main] ${work._id} has no retryBehavior so not retrying`);
|
|
586
|
+
return false;
|
|
587
|
+
}
|
|
588
|
+
const existing = await ctx.db
|
|
589
|
+
.query("pendingStart")
|
|
590
|
+
.withIndex("workId", (q) => q.eq("workId", work._id))
|
|
591
|
+
.first();
|
|
592
|
+
if (existing) {
|
|
593
|
+
// Not sure why this would ever happen, but ensure uniqueness explicitly.
|
|
594
|
+
console.error(`[main] ${work._id} already in pendingStart so not retrying`);
|
|
595
|
+
return false;
|
|
596
|
+
}
|
|
597
|
+
const backoffMs =
|
|
598
|
+
work.retryBehavior.initialBackoffMs *
|
|
599
|
+
Math.pow(work.retryBehavior.base, work.attempts - 1);
|
|
600
|
+
const nextAttempt = withJitter(backoffMs);
|
|
601
|
+
const startTime = boundScheduledTime(Date.now() + nextAttempt, console);
|
|
602
|
+
const segment = toSegment(startTime);
|
|
603
|
+
await ctx.db.insert("pendingStart", {
|
|
604
|
+
workId: work._id,
|
|
605
|
+
segment,
|
|
606
|
+
});
|
|
607
|
+
return true;
|
|
608
|
+
}
|
|
609
|
+
|
|
610
|
+
export function withJitter(delay: number) {
|
|
611
|
+
return delay * (0.5 + Math.random());
|
|
612
|
+
}
|
|
613
|
+
|
|
546
614
|
async function getGlobals(ctx: MutationCtx) {
|
|
547
615
|
const globals = await ctx.db.query("globals").unique();
|
|
548
616
|
if (!globals) {
|