@convex-dev/workpool 0.2.0-beta.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +7 -16
- package/dist/commonjs/client/index.d.ts +3 -3
- package/dist/commonjs/client/index.d.ts.map +1 -1
- package/dist/commonjs/client/index.js +10 -5
- package/dist/commonjs/client/index.js.map +1 -1
- package/dist/commonjs/component/complete.d.ts +89 -0
- package/dist/commonjs/component/complete.d.ts.map +1 -0
- package/dist/commonjs/component/complete.js +80 -0
- package/dist/commonjs/component/complete.js.map +1 -0
- package/dist/commonjs/component/kick.d.ts +1 -2
- package/dist/commonjs/component/kick.d.ts.map +1 -1
- package/dist/commonjs/component/kick.js +7 -5
- package/dist/commonjs/component/kick.js.map +1 -1
- package/dist/commonjs/component/lib.d.ts +3 -3
- package/dist/commonjs/component/lib.d.ts.map +1 -1
- package/dist/commonjs/component/lib.js +43 -20
- package/dist/commonjs/component/lib.js.map +1 -1
- package/dist/commonjs/component/logging.d.ts.map +1 -1
- package/dist/commonjs/component/logging.js +1 -2
- package/dist/commonjs/component/logging.js.map +1 -1
- package/dist/commonjs/component/loop.d.ts +1 -14
- package/dist/commonjs/component/loop.d.ts.map +1 -1
- package/dist/commonjs/component/loop.js +215 -178
- package/dist/commonjs/component/loop.js.map +1 -1
- package/dist/commonjs/component/recovery.d.ts +16 -0
- package/dist/commonjs/component/recovery.d.ts.map +1 -1
- package/dist/commonjs/component/recovery.js +64 -44
- package/dist/commonjs/component/recovery.js.map +1 -1
- package/dist/commonjs/component/schema.d.ts +6 -2
- package/dist/commonjs/component/schema.d.ts.map +1 -1
- package/dist/commonjs/component/schema.js +5 -3
- package/dist/commonjs/component/schema.js.map +1 -1
- package/dist/commonjs/component/shared.d.ts +20 -11
- package/dist/commonjs/component/shared.d.ts.map +1 -1
- package/dist/commonjs/component/shared.js +18 -5
- package/dist/commonjs/component/shared.js.map +1 -1
- package/dist/commonjs/component/stats.d.ts +21 -13
- package/dist/commonjs/component/stats.d.ts.map +1 -1
- package/dist/commonjs/component/stats.js +32 -22
- package/dist/commonjs/component/stats.js.map +1 -1
- package/dist/commonjs/component/worker.d.ts +2 -12
- package/dist/commonjs/component/worker.d.ts.map +1 -1
- package/dist/commonjs/component/worker.js +23 -36
- package/dist/commonjs/component/worker.js.map +1 -1
- package/dist/esm/client/index.d.ts +3 -3
- package/dist/esm/client/index.d.ts.map +1 -1
- package/dist/esm/client/index.js +10 -5
- package/dist/esm/client/index.js.map +1 -1
- package/dist/esm/component/complete.d.ts +89 -0
- package/dist/esm/component/complete.d.ts.map +1 -0
- package/dist/esm/component/complete.js +80 -0
- package/dist/esm/component/complete.js.map +1 -0
- package/dist/esm/component/kick.d.ts +1 -2
- package/dist/esm/component/kick.d.ts.map +1 -1
- package/dist/esm/component/kick.js +7 -5
- package/dist/esm/component/kick.js.map +1 -1
- package/dist/esm/component/lib.d.ts +3 -3
- package/dist/esm/component/lib.d.ts.map +1 -1
- package/dist/esm/component/lib.js +43 -20
- package/dist/esm/component/lib.js.map +1 -1
- package/dist/esm/component/logging.d.ts.map +1 -1
- package/dist/esm/component/logging.js +1 -2
- package/dist/esm/component/logging.js.map +1 -1
- package/dist/esm/component/loop.d.ts +1 -14
- package/dist/esm/component/loop.d.ts.map +1 -1
- package/dist/esm/component/loop.js +215 -178
- package/dist/esm/component/loop.js.map +1 -1
- package/dist/esm/component/recovery.d.ts +16 -0
- package/dist/esm/component/recovery.d.ts.map +1 -1
- package/dist/esm/component/recovery.js +64 -44
- package/dist/esm/component/recovery.js.map +1 -1
- package/dist/esm/component/schema.d.ts +6 -2
- package/dist/esm/component/schema.d.ts.map +1 -1
- package/dist/esm/component/schema.js +5 -3
- package/dist/esm/component/schema.js.map +1 -1
- package/dist/esm/component/shared.d.ts +20 -11
- package/dist/esm/component/shared.d.ts.map +1 -1
- package/dist/esm/component/shared.js +18 -5
- package/dist/esm/component/shared.js.map +1 -1
- package/dist/esm/component/stats.d.ts +21 -13
- package/dist/esm/component/stats.d.ts.map +1 -1
- package/dist/esm/component/stats.js +32 -22
- package/dist/esm/component/stats.js.map +1 -1
- package/dist/esm/component/worker.d.ts +2 -12
- package/dist/esm/component/worker.d.ts.map +1 -1
- package/dist/esm/component/worker.js +23 -36
- package/dist/esm/component/worker.js.map +1 -1
- package/package.json +7 -6
- package/src/client/index.ts +18 -8
- package/src/component/README.md +15 -15
- package/src/component/_generated/api.d.ts +7 -2
- package/src/component/complete.test.ts +508 -0
- package/src/component/complete.ts +98 -0
- package/src/component/kick.test.ts +13 -13
- package/src/component/kick.ts +13 -8
- package/src/component/lib.test.ts +262 -17
- package/src/component/lib.ts +55 -24
- package/src/component/logging.ts +1 -2
- package/src/component/loop.test.ts +1158 -0
- package/src/component/loop.ts +289 -221
- package/src/component/recovery.test.ts +541 -0
- package/src/component/recovery.ts +80 -63
- package/src/component/schema.ts +6 -4
- package/src/component/shared.ts +21 -6
- package/src/component/stats.ts +48 -25
- package/src/component/worker.ts +25 -38
|
@@ -1,9 +1,8 @@
|
|
|
1
1
|
import { v } from "convex/values";
|
|
2
2
|
import { internal } from "./_generated/api.js";
|
|
3
3
|
import { internalMutation } from "./_generated/server.js";
|
|
4
|
-
import { DEFAULT_MAX_PARALLELISM } from "./kick.js";
|
|
5
4
|
import { createLogger, DEFAULT_LOG_LEVEL, } from "./logging.js";
|
|
6
|
-
import { boundScheduledTime, currentSegment, fromSegment,
|
|
5
|
+
import { boundScheduledTime, currentSegment, DEFAULT_MAX_PARALLELISM, fromSegment, max, nextSegment, toSegment, } from "./shared.js";
|
|
7
6
|
import { recordCompleted, recordReport, recordStarted } from "./stats.js";
|
|
8
7
|
const CANCELLATION_BATCH_SIZE = 64; // the only queue that can get unbounded.
|
|
9
8
|
const SECOND = 1000;
|
|
@@ -27,43 +26,44 @@ export const INITIAL_STATE = {
|
|
|
27
26
|
};
|
|
28
27
|
// There should only ever be at most one of these scheduled or running.
|
|
29
28
|
export const main = internalMutation({
|
|
30
|
-
args: {
|
|
31
|
-
|
|
32
|
-
segment: v.int64(),
|
|
33
|
-
},
|
|
34
|
-
handler: async (ctx, args) => {
|
|
29
|
+
args: { generation: v.int64(), segment: v.int64() },
|
|
30
|
+
handler: async (ctx, { generation, segment }) => {
|
|
35
31
|
// State will be modified and patched at the end of the function.
|
|
36
32
|
const state = await getOrCreateState(ctx);
|
|
37
|
-
if (
|
|
38
|
-
throw new Error(`generation mismatch: ${
|
|
33
|
+
if (generation !== state.generation) {
|
|
34
|
+
throw new Error(`generation mismatch: ${generation} !== ${state.generation}`);
|
|
39
35
|
}
|
|
40
36
|
state.generation++;
|
|
37
|
+
const runStatus = await getOrCreateRunningStatus(ctx);
|
|
38
|
+
if (runStatus.state.kind !== "running") {
|
|
39
|
+
await ctx.db.patch(runStatus._id, {
|
|
40
|
+
state: { kind: "running" },
|
|
41
|
+
});
|
|
42
|
+
}
|
|
41
43
|
const globals = await getGlobals(ctx);
|
|
42
44
|
const console = createLogger(globals.logLevel);
|
|
45
|
+
const delayMs = Date.now() - fromSegment(segment);
|
|
46
|
+
console.debug(`[main] generation ${generation} behind: ${delayMs}ms`);
|
|
43
47
|
// Read pendingCompletions, including retry handling.
|
|
44
48
|
console.time("[main] pendingCompletion");
|
|
45
|
-
const
|
|
49
|
+
const toCancel = await handleCompletions(ctx, state, segment, console);
|
|
46
50
|
console.timeEnd("[main] pendingCompletion");
|
|
47
51
|
// Read pendingCancelation, deleting from pendingStart. If it's still running, queue to cancel.
|
|
48
52
|
console.time("[main] pendingCancelation");
|
|
49
|
-
|
|
53
|
+
await handleCancelation(ctx, state, segment, console, toCancel);
|
|
50
54
|
console.timeEnd("[main] pendingCancelation");
|
|
51
55
|
if (state.running.length === 0) {
|
|
52
56
|
// If there's nothing active, reset lastRecovery.
|
|
53
|
-
state.lastRecovery =
|
|
57
|
+
state.lastRecovery = segment;
|
|
54
58
|
}
|
|
55
|
-
else if (
|
|
59
|
+
else if (segment - state.lastRecovery >= RECOVERY_PERIOD_SEGMENTS) {
|
|
56
60
|
// Otherwise schedule recovery for any old jobs.
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
if (jobs.length) {
|
|
60
|
-
await ctx.scheduler.runAfter(0, internal.recovery.recover, { jobs });
|
|
61
|
-
}
|
|
62
|
-
state.lastRecovery = args.segment;
|
|
61
|
+
await handleRecovery(ctx, state, console);
|
|
62
|
+
state.lastRecovery = segment;
|
|
63
63
|
}
|
|
64
64
|
// Read pendingStart up to max capacity. Update the config, and incomingSegmentCursor.
|
|
65
65
|
console.time("[main] pendingStart");
|
|
66
|
-
await handleStart(ctx, state,
|
|
66
|
+
await handleStart(ctx, state, segment, console, globals);
|
|
67
67
|
console.timeEnd("[main] pendingStart");
|
|
68
68
|
if (Date.now() - state.report.lastReportTs >= MINUTE) {
|
|
69
69
|
// If minute rollover since last report, log report.
|
|
@@ -73,7 +73,7 @@ export const main = internalMutation({
|
|
|
73
73
|
// It's been a while, let's start fresh.
|
|
74
74
|
lastReportTs = Date.now();
|
|
75
75
|
}
|
|
76
|
-
|
|
76
|
+
recordReport(console, state);
|
|
77
77
|
state.report = {
|
|
78
78
|
completed: 0,
|
|
79
79
|
succeeded: 0,
|
|
@@ -84,81 +84,61 @@ export const main = internalMutation({
|
|
|
84
84
|
};
|
|
85
85
|
}
|
|
86
86
|
await ctx.db.replace(state._id, state);
|
|
87
|
-
await ctx.scheduler.runAfter(0, internal.loop.complete, { done });
|
|
88
87
|
await ctx.scheduler.runAfter(0, internal.loop.updateRunStatus, {
|
|
89
88
|
generation: state.generation,
|
|
89
|
+
segment,
|
|
90
90
|
});
|
|
91
|
-
|
|
92
|
-
});
|
|
93
|
-
export const complete = internalMutation({
|
|
94
|
-
args: {
|
|
95
|
-
done: v.array(v.object({ runResult, workId: v.id("work") })),
|
|
96
|
-
},
|
|
97
|
-
handler: async (ctx, args) => {
|
|
98
|
-
const globals = await getGlobals(ctx);
|
|
99
|
-
const console = createLogger(globals.logLevel);
|
|
100
|
-
await Promise.all(args.done.map(async ({ runResult, workId }) => {
|
|
101
|
-
const work = await ctx.db.get(workId);
|
|
102
|
-
if (!work) {
|
|
103
|
-
console.warn(`[complete] ${workId} is done, but its work is gone`);
|
|
104
|
-
return;
|
|
105
|
-
}
|
|
106
|
-
if (work.onComplete) {
|
|
107
|
-
try {
|
|
108
|
-
const handle = work.onComplete.fnHandle;
|
|
109
|
-
await ctx.runMutation(handle, {
|
|
110
|
-
workId: work._id,
|
|
111
|
-
context: work.onComplete.context,
|
|
112
|
-
result: runResult,
|
|
113
|
-
});
|
|
114
|
-
console.debug(`[complete] onComplete for ${workId} completed`);
|
|
115
|
-
}
|
|
116
|
-
catch (e) {
|
|
117
|
-
console.error(`[complete] error running onComplete for ${workId}`, e);
|
|
118
|
-
}
|
|
119
|
-
}
|
|
120
|
-
await ctx.db.delete(workId);
|
|
121
|
-
}));
|
|
91
|
+
// TODO: if there were more cancellations, schedule main directly.
|
|
122
92
|
},
|
|
123
93
|
});
|
|
124
94
|
export const updateRunStatus = internalMutation({
|
|
125
|
-
args: { generation: v.int64() },
|
|
126
|
-
handler: async (ctx,
|
|
95
|
+
args: { generation: v.int64(), segment: v.int64() },
|
|
96
|
+
handler: async (ctx, { generation, segment }) => {
|
|
127
97
|
const globals = await getGlobals(ctx);
|
|
128
98
|
const console = createLogger(globals.logLevel);
|
|
129
99
|
const maxParallelism = globals.maxParallelism;
|
|
130
100
|
const state = await getOrCreateState(ctx);
|
|
131
|
-
if (
|
|
132
|
-
throw new Error(`generation mismatch: ${
|
|
101
|
+
if (generation !== state.generation) {
|
|
102
|
+
throw new Error(`generation mismatch: ${generation} !== ${state.generation}`);
|
|
133
103
|
}
|
|
134
104
|
console.time("[updateRunStatus] outstandingCancelations");
|
|
135
|
-
const thisSegment = currentSegment();
|
|
136
105
|
const outstandingCancelations = await getNextUp(ctx, "pendingCancelation", {
|
|
137
106
|
start: state.segmentCursors.cancelation,
|
|
138
|
-
end:
|
|
107
|
+
end: segment,
|
|
139
108
|
});
|
|
140
109
|
console.timeEnd("[updateRunStatus] outstandingCancelations");
|
|
141
110
|
if (outstandingCancelations) {
|
|
142
111
|
await ctx.scheduler.runAfter(0, internal.loop.main, {
|
|
143
|
-
generation
|
|
144
|
-
segment
|
|
112
|
+
generation,
|
|
113
|
+
segment,
|
|
145
114
|
});
|
|
146
115
|
return;
|
|
147
116
|
}
|
|
117
|
+
// TODO: check for current segment (or from args) first, to avoid OCCs.
|
|
148
118
|
console.time("[updateRunStatus] nextSegmentIsActionable");
|
|
149
|
-
const
|
|
119
|
+
const next = max(segment + 1n, currentSegment());
|
|
120
|
+
const nextIsActionable = await nextSegmentIsActionable(ctx, state, maxParallelism, next);
|
|
150
121
|
console.timeEnd("[updateRunStatus] nextSegmentIsActionable");
|
|
151
|
-
const start = nextSegment();
|
|
152
122
|
if (nextIsActionable) {
|
|
123
|
+
await ctx.scheduler.runAt(boundScheduledTime(fromSegment(next), console), internal.loop.main, {
|
|
124
|
+
generation,
|
|
125
|
+
segment: next,
|
|
126
|
+
});
|
|
127
|
+
return;
|
|
128
|
+
}
|
|
129
|
+
console.time("[updateRunStatus] oldSegmentIsActionable");
|
|
130
|
+
const [oldIsActionable, cursors] = await oldSegmentIsActionable(ctx, state, maxParallelism);
|
|
131
|
+
console.timeEnd("[updateRunStatus] oldSegmentIsActionable");
|
|
132
|
+
if (oldIsActionable) {
|
|
153
133
|
await ctx.db.patch(state._id, {
|
|
154
134
|
segmentCursors: {
|
|
155
135
|
...state.segmentCursors,
|
|
156
136
|
...cursors,
|
|
157
137
|
},
|
|
158
138
|
});
|
|
159
|
-
await ctx.scheduler.
|
|
160
|
-
generation
|
|
161
|
-
segment:
|
|
139
|
+
await ctx.scheduler.runAfter(0, internal.loop.main, {
|
|
140
|
+
generation,
|
|
141
|
+
segment: currentSegment(),
|
|
162
142
|
});
|
|
163
143
|
return;
|
|
164
144
|
}
|
|
@@ -168,59 +148,66 @@ export const updateRunStatus = internalMutation({
|
|
|
168
148
|
if (state.running.length < maxParallelism) {
|
|
169
149
|
actionableTables.push("pendingStart");
|
|
170
150
|
}
|
|
171
|
-
const docs = await Promise.all(actionableTables.map(async (tableName) => getNextUp(ctx, tableName, { start })));
|
|
151
|
+
const docs = await Promise.all(actionableTables.map(async (tableName) => getNextUp(ctx, tableName, { start: next })));
|
|
172
152
|
console.timeEnd("[updateRunStatus] findNextSegment");
|
|
173
|
-
let
|
|
153
|
+
let targetSegment = docs.map((d) => d?.segment).sort()[0];
|
|
174
154
|
const runStatus = await getOrCreateRunningStatus(ctx);
|
|
175
155
|
const saturated = state.running.length >= maxParallelism;
|
|
176
|
-
if (
|
|
156
|
+
if (targetSegment !== undefined || state.running.length > 0) {
|
|
177
157
|
// If there's something to do, schedule for next actionable segment.
|
|
178
158
|
// Or the next recovery, whichever comes first.
|
|
179
159
|
const nextRecoverySegment = state.lastRecovery + RECOVERY_PERIOD_SEGMENTS;
|
|
180
|
-
if (!
|
|
181
|
-
|
|
160
|
+
if (!targetSegment || targetSegment > nextRecoverySegment) {
|
|
161
|
+
targetSegment = nextRecoverySegment;
|
|
162
|
+
}
|
|
163
|
+
const scheduledId = await ctx.scheduler.runAt(boundScheduledTime(fromSegment(targetSegment), console), internal.loop.main, { generation, segment: targetSegment });
|
|
164
|
+
if (targetSegment > nextSegment()) {
|
|
165
|
+
await ctx.db.patch(runStatus._id, {
|
|
166
|
+
state: {
|
|
167
|
+
kind: "scheduled",
|
|
168
|
+
scheduledId,
|
|
169
|
+
saturated,
|
|
170
|
+
generation,
|
|
171
|
+
segment: targetSegment,
|
|
172
|
+
},
|
|
173
|
+
});
|
|
174
|
+
}
|
|
175
|
+
else {
|
|
176
|
+
console.debug(`[updateRunStatus] staying running because it's the next segment`);
|
|
182
177
|
}
|
|
183
|
-
const scheduledId = await ctx.scheduler.runAt(fromSegment(segment), internal.loop.main, { generation: args.generation, segment });
|
|
184
|
-
await ctx.db.patch(runStatus._id, {
|
|
185
|
-
state: {
|
|
186
|
-
kind: "scheduled",
|
|
187
|
-
scheduledId,
|
|
188
|
-
saturated,
|
|
189
|
-
generation: args.generation,
|
|
190
|
-
segment,
|
|
191
|
-
},
|
|
192
|
-
});
|
|
193
178
|
return;
|
|
194
179
|
}
|
|
195
180
|
// There seems to be nothing in the future to do, so go idle.
|
|
196
181
|
await ctx.db.patch(runStatus._id, {
|
|
197
|
-
state: { kind: "idle", generation
|
|
182
|
+
state: { kind: "idle", generation },
|
|
198
183
|
});
|
|
199
184
|
},
|
|
200
185
|
});
|
|
201
|
-
async function nextSegmentIsActionable(ctx, state, maxParallelism) {
|
|
202
|
-
// First, try with our cursor range, up to
|
|
203
|
-
const end = nextSegment();
|
|
186
|
+
async function nextSegmentIsActionable(ctx, state, maxParallelism, end) {
|
|
187
|
+
// First, try with our cursor range, up to end.
|
|
204
188
|
if (await getNextUp(ctx, "pendingCancelation", {
|
|
205
189
|
start: state.segmentCursors.cancelation,
|
|
206
190
|
end,
|
|
207
191
|
})) {
|
|
208
|
-
return
|
|
192
|
+
return true;
|
|
209
193
|
}
|
|
210
194
|
if (await getNextUp(ctx, "pendingCompletion", {
|
|
211
195
|
start: state.segmentCursors.completion,
|
|
212
196
|
end,
|
|
213
197
|
})) {
|
|
214
|
-
return
|
|
198
|
+
return true;
|
|
215
199
|
}
|
|
216
200
|
if (state.running.length < maxParallelism) {
|
|
217
201
|
if (await getNextUp(ctx, "pendingStart", {
|
|
218
202
|
start: state.segmentCursors.incoming,
|
|
219
203
|
end,
|
|
220
204
|
})) {
|
|
221
|
-
return
|
|
205
|
+
return true;
|
|
222
206
|
}
|
|
223
207
|
}
|
|
208
|
+
return false;
|
|
209
|
+
}
|
|
210
|
+
async function oldSegmentIsActionable(ctx, state, maxParallelism) {
|
|
224
211
|
// Next, we look for out-of-order additions we may have missed.
|
|
225
212
|
const oldCompletion = await getNextUp(ctx, "pendingCompletion", {
|
|
226
213
|
end: state.segmentCursors.completion,
|
|
@@ -248,22 +235,20 @@ async function nextSegmentIsActionable(ctx, state, maxParallelism) {
|
|
|
248
235
|
async function getNextUp(ctx, table, range) {
|
|
249
236
|
return ctx.db
|
|
250
237
|
.query(table)
|
|
251
|
-
.withIndex("segment", (q) => range.start
|
|
252
|
-
? range.end
|
|
238
|
+
.withIndex("segment", (q) => range.start !== undefined
|
|
239
|
+
? range.end !== undefined
|
|
253
240
|
? q
|
|
254
241
|
.gte("segment", range.start - CURSOR_BUFFER_SEGMENTS)
|
|
255
242
|
.lte("segment", range.end)
|
|
256
243
|
: q.gt("segment", range.start - CURSOR_BUFFER_SEGMENTS)
|
|
257
|
-
: range.end
|
|
244
|
+
: range.end !== undefined
|
|
258
245
|
? q.lt("segment", range.end)
|
|
259
246
|
: q)
|
|
260
247
|
.first();
|
|
261
248
|
}
|
|
262
249
|
/**
|
|
263
250
|
* Handles the completion of pending completions.
|
|
264
|
-
|
|
265
|
-
* Important: It should handle retries before cancelations are processed,
|
|
266
|
-
* to allow retries to be canceled.
|
|
251
|
+
* This only processes work that succeeded or failed, not canceled.
|
|
267
252
|
*/
|
|
268
253
|
async function handleCompletions(ctx, state, segment, console) {
|
|
269
254
|
const startSegment = state.segmentCursors.completion - CURSOR_BUFFER_SEGMENTS;
|
|
@@ -273,109 +258,119 @@ async function handleCompletions(ctx, state, segment, console) {
|
|
|
273
258
|
.query("pendingCompletion")
|
|
274
259
|
.withIndex("segment", (q) => q.gte("segment", startSegment).lte("segment", segment))
|
|
275
260
|
.collect();
|
|
276
|
-
state.report.completed += completed.length;
|
|
277
261
|
state.segmentCursors.completion = segment;
|
|
278
|
-
|
|
262
|
+
// Completions that were going to be retried but have since been canceled.
|
|
263
|
+
const toCancel = [];
|
|
279
264
|
await Promise.all(completed.map(async (c) => {
|
|
280
265
|
await ctx.db.delete(c._id);
|
|
281
|
-
const
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
266
|
+
const running = state.running.find((r) => r.workId === c.workId);
|
|
267
|
+
if (!running) {
|
|
268
|
+
console.error(`[main] completing ${c.workId} but it's not in "running"`);
|
|
269
|
+
return;
|
|
270
|
+
}
|
|
271
|
+
if (c.retry) {
|
|
272
|
+
// Only check for work if it's going to be retried.
|
|
273
|
+
const work = await ctx.db.get(c.workId);
|
|
274
|
+
if (!work) {
|
|
275
|
+
console.warn(`[main] ${c.workId} is gone, but trying to complete`);
|
|
276
|
+
return;
|
|
277
|
+
}
|
|
278
|
+
const retried = await rescheduleJob(ctx, work, console);
|
|
279
|
+
if (retried) {
|
|
293
280
|
state.report.retries++;
|
|
281
|
+
recordCompleted(console, work, "retrying");
|
|
294
282
|
}
|
|
295
283
|
else {
|
|
296
|
-
if
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
for (const pendingCancelation of pendingCancelations) {
|
|
304
|
-
await ctx.db.delete(pendingCancelation._id);
|
|
305
|
-
}
|
|
306
|
-
done.push(c);
|
|
284
|
+
// We don't retry if it's been canceled in the mean time.
|
|
285
|
+
state.report.canceled++;
|
|
286
|
+
toCancel.push({
|
|
287
|
+
workId: c.workId,
|
|
288
|
+
runResult: { kind: "canceled" },
|
|
289
|
+
attempt: work.attempts,
|
|
290
|
+
});
|
|
307
291
|
}
|
|
308
|
-
console.info(recordCompleted(work, c.runResult.kind));
|
|
309
|
-
}
|
|
310
|
-
else if (work) {
|
|
311
|
-
console.warn(`[main] completing ${c.workId} but it's not in "running"`);
|
|
312
292
|
}
|
|
313
293
|
else {
|
|
314
|
-
|
|
294
|
+
if (c.runResult.kind === "success") {
|
|
295
|
+
state.report.succeeded++;
|
|
296
|
+
}
|
|
297
|
+
else if (c.runResult.kind === "failed") {
|
|
298
|
+
state.report.failed++;
|
|
299
|
+
}
|
|
315
300
|
}
|
|
316
301
|
}));
|
|
317
|
-
|
|
302
|
+
// We do this after so the stats above know if it was in progress.
|
|
303
|
+
const before = state.running.length;
|
|
318
304
|
state.running = state.running.filter((r) => !completed.some((c) => c.workId === r.workId));
|
|
319
|
-
|
|
305
|
+
const numCompleted = before - state.running.length;
|
|
306
|
+
state.report.completed += numCompleted;
|
|
307
|
+
console.debug(`[main] completed ${numCompleted} work`);
|
|
308
|
+
return toCancel;
|
|
320
309
|
}
|
|
321
|
-
async function
|
|
322
|
-
if (!work.retryBehavior) {
|
|
323
|
-
throw new Error("work has no retryBehavior");
|
|
324
|
-
}
|
|
325
|
-
const backoffMs = work.retryBehavior.initialBackoffMs *
|
|
326
|
-
Math.pow(work.retryBehavior.base, work.attempts - 1);
|
|
327
|
-
const nextAttempt = withJitter(backoffMs);
|
|
328
|
-
const startTime = boundScheduledTime(Date.now() + nextAttempt, console);
|
|
329
|
-
const segment = toSegment(startTime);
|
|
330
|
-
await ctx.db.patch(work._id, {
|
|
331
|
-
attempts: work.attempts + 1,
|
|
332
|
-
});
|
|
333
|
-
await ctx.db.insert("pendingStart", {
|
|
334
|
-
workId: work._id,
|
|
335
|
-
segment,
|
|
336
|
-
});
|
|
337
|
-
return nextAttempt;
|
|
338
|
-
}
|
|
339
|
-
export function withJitter(delay) {
|
|
340
|
-
return delay * (0.5 + Math.random());
|
|
341
|
-
}
|
|
342
|
-
async function handleCancelation(ctx, state, segment, console) {
|
|
310
|
+
async function handleCancelation(ctx, state, segment, console, toCancel) {
|
|
343
311
|
const start = state.segmentCursors.cancelation - CURSOR_BUFFER_SEGMENTS;
|
|
344
312
|
const canceled = await ctx.db
|
|
345
313
|
.query("pendingCancelation")
|
|
346
314
|
.withIndex("segment", (q) => q.gte("segment", start).lte("segment", segment))
|
|
347
315
|
.take(CANCELLATION_BATCH_SIZE);
|
|
348
316
|
state.segmentCursors.cancelation = canceled.at(-1)?.segment ?? segment;
|
|
349
|
-
|
|
317
|
+
if (canceled.length) {
|
|
318
|
+
console.debug(`[main] attempting to cancel ${canceled.length}`);
|
|
319
|
+
}
|
|
350
320
|
const canceledWork = new Set();
|
|
351
|
-
|
|
321
|
+
const runResult = { kind: "canceled" };
|
|
322
|
+
const jobs = toCancel.concat(...(await Promise.all(canceled.map(async ({ _id, _creationTime, workId }) => {
|
|
352
323
|
await ctx.db.delete(_id);
|
|
324
|
+
if (canceledWork.has(workId)) {
|
|
325
|
+
// We shouldn't have multiple pending cancelations for the same work.
|
|
326
|
+
console.error(`[main] ${workId} already canceled`);
|
|
327
|
+
return null;
|
|
328
|
+
}
|
|
353
329
|
const work = await ctx.db.get(workId);
|
|
354
330
|
if (!work) {
|
|
355
|
-
console.warn(`[
|
|
356
|
-
return;
|
|
331
|
+
console.warn(`[main] ${workId} is gone, but trying to cancel`);
|
|
332
|
+
return null;
|
|
357
333
|
}
|
|
358
334
|
// Ensure it doesn't retry.
|
|
359
|
-
await ctx.db.patch(workId, {
|
|
335
|
+
await ctx.db.patch(workId, { canceled: true });
|
|
360
336
|
// Ensure it doesn't start.
|
|
361
337
|
const pendingStart = await ctx.db
|
|
362
338
|
.query("pendingStart")
|
|
363
339
|
.withIndex("workId", (q) => q.eq("workId", workId))
|
|
364
340
|
.unique();
|
|
365
341
|
if (pendingStart && !canceledWork.has(workId)) {
|
|
366
|
-
console.info(recordCompleted(work, "canceled"));
|
|
367
342
|
state.report.canceled++;
|
|
368
343
|
await ctx.db.delete(pendingStart._id);
|
|
369
344
|
canceledWork.add(workId);
|
|
345
|
+
return { workId, runResult, attempt: work.attempts };
|
|
370
346
|
}
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
}
|
|
347
|
+
return null;
|
|
348
|
+
}))).flatMap((r) => (r ? [r] : [])));
|
|
349
|
+
if (jobs.length) {
|
|
350
|
+
await ctx.scheduler.runAfter(0, internal.complete.complete, { jobs });
|
|
351
|
+
}
|
|
352
|
+
}
|
|
353
|
+
async function handleRecovery(ctx, state, console) {
|
|
354
|
+
const missing = new Set();
|
|
355
|
+
const oldEnoughToConsider = Date.now() - RECOVERY_THRESHOLD_MS;
|
|
356
|
+
const jobs = (await Promise.all(state.running.map(async (r) => {
|
|
357
|
+
if (r.started >= oldEnoughToConsider) {
|
|
358
|
+
return null;
|
|
359
|
+
}
|
|
360
|
+
const work = await ctx.db.get(r.workId);
|
|
361
|
+
if (!work) {
|
|
362
|
+
missing.add(r.workId);
|
|
363
|
+
console.error(`[main] ${r.workId} already gone (skipping recovery)`);
|
|
364
|
+
return null;
|
|
365
|
+
}
|
|
366
|
+
return { ...r, attempt: work.attempts };
|
|
367
|
+
}))).flatMap((r) => (r ? [r] : []));
|
|
368
|
+
state.running = state.running.filter((r) => !missing.has(r.workId));
|
|
369
|
+
if (jobs.length) {
|
|
370
|
+
await ctx.scheduler.runAfter(0, internal.recovery.recover, { jobs });
|
|
371
|
+
}
|
|
376
372
|
}
|
|
377
|
-
async function handleStart(ctx, state, segment, console,
|
|
378
|
-
const maxParallelism = globals.maxParallelism;
|
|
373
|
+
async function handleStart(ctx, state, segment, console, { maxParallelism, logLevel }) {
|
|
379
374
|
// Schedule as many as needed to reach maxParallelism.
|
|
380
375
|
const toSchedule = maxParallelism - state.running.length;
|
|
381
376
|
const pending = await ctx.db
|
|
@@ -387,39 +382,81 @@ async function handleStart(ctx, state, segment, console, globals) {
|
|
|
387
382
|
state.segmentCursors.incoming = pending.at(-1)?.segment ?? segment;
|
|
388
383
|
console.debug(`[main] scheduling ${pending.length} pending work`);
|
|
389
384
|
// Start new work.
|
|
390
|
-
state.running.push(...(await Promise.all(pending.map(async ({ _id, workId }) => {
|
|
391
|
-
|
|
385
|
+
state.running.push(...(await Promise.all(pending.map(async ({ _id, workId, segment }) => {
|
|
386
|
+
if (state.running.some((r) => r.workId === workId)) {
|
|
387
|
+
console.error(`[main] ${workId} already running (skipping start)`);
|
|
388
|
+
return null;
|
|
389
|
+
}
|
|
390
|
+
const lagMs = Date.now() - fromSegment(segment);
|
|
391
|
+
const scheduledId = await beginWork(ctx, workId, logLevel, lagMs);
|
|
392
392
|
await ctx.db.delete(_id);
|
|
393
393
|
return { scheduledId, workId, started: Date.now() };
|
|
394
|
-
}))));
|
|
394
|
+
}))).flatMap((r) => (r ? [r] : [])));
|
|
395
395
|
}
|
|
396
|
-
async function beginWork(ctx, workId, logLevel) {
|
|
396
|
+
async function beginWork(ctx, workId, logLevel, lagMs) {
|
|
397
397
|
const console = createLogger(logLevel);
|
|
398
398
|
const work = await ctx.db.get(workId);
|
|
399
399
|
if (!work) {
|
|
400
400
|
throw new Error("work not found");
|
|
401
401
|
}
|
|
402
|
-
|
|
402
|
+
recordStarted(console, work, lagMs);
|
|
403
|
+
const { attempts: attempt, fnHandle, fnArgs } = work;
|
|
404
|
+
const args = { workId, fnHandle, fnArgs, logLevel, attempt };
|
|
403
405
|
if (work.fnType === "action") {
|
|
404
|
-
return
|
|
405
|
-
workId: work._id,
|
|
406
|
-
fnHandle: work.fnHandle,
|
|
407
|
-
fnArgs: work.fnArgs,
|
|
408
|
-
logLevel,
|
|
409
|
-
});
|
|
406
|
+
return ctx.scheduler.runAfter(0, internal.worker.runActionWrapper, args);
|
|
410
407
|
}
|
|
411
408
|
else if (work.fnType === "mutation") {
|
|
412
|
-
return
|
|
413
|
-
workId: work._id,
|
|
414
|
-
fnHandle: work.fnHandle,
|
|
415
|
-
fnArgs: work.fnArgs,
|
|
416
|
-
logLevel,
|
|
417
|
-
});
|
|
409
|
+
return ctx.scheduler.runAfter(0, internal.worker.runMutationWrapper, args);
|
|
418
410
|
}
|
|
419
411
|
else {
|
|
420
412
|
throw new Error(`Unexpected fnType ${work.fnType}`);
|
|
421
413
|
}
|
|
422
414
|
}
|
|
415
|
+
/**
|
|
416
|
+
* Reschedules a job for retry.
|
|
417
|
+
* If it's been canceled in the mean time, don't retry.
|
|
418
|
+
* @returns true if the job was rescheduled, false if it was not.
|
|
419
|
+
*/
|
|
420
|
+
async function rescheduleJob(ctx, work, console) {
|
|
421
|
+
const pendingCancelation = await ctx.db
|
|
422
|
+
.query("pendingCancelation")
|
|
423
|
+
.withIndex("workId", (q) => q.eq("workId", work._id))
|
|
424
|
+
.unique();
|
|
425
|
+
if (pendingCancelation) {
|
|
426
|
+
// If there's an un-processed cancelation request, don't retry.
|
|
427
|
+
console.warn(`[main] ${work._id} in pendingCancelation so not retrying`);
|
|
428
|
+
return false;
|
|
429
|
+
}
|
|
430
|
+
if (work.canceled) {
|
|
431
|
+
return false;
|
|
432
|
+
}
|
|
433
|
+
if (!work.retryBehavior) {
|
|
434
|
+
console.warn(`[main] ${work._id} has no retryBehavior so not retrying`);
|
|
435
|
+
return false;
|
|
436
|
+
}
|
|
437
|
+
const existing = await ctx.db
|
|
438
|
+
.query("pendingStart")
|
|
439
|
+
.withIndex("workId", (q) => q.eq("workId", work._id))
|
|
440
|
+
.first();
|
|
441
|
+
if (existing) {
|
|
442
|
+
// Not sure why this would ever happen, but ensure uniqueness explicitly.
|
|
443
|
+
console.error(`[main] ${work._id} already in pendingStart so not retrying`);
|
|
444
|
+
return false;
|
|
445
|
+
}
|
|
446
|
+
const backoffMs = work.retryBehavior.initialBackoffMs *
|
|
447
|
+
Math.pow(work.retryBehavior.base, work.attempts - 1);
|
|
448
|
+
const nextAttempt = withJitter(backoffMs);
|
|
449
|
+
const startTime = boundScheduledTime(Date.now() + nextAttempt, console);
|
|
450
|
+
const segment = toSegment(startTime);
|
|
451
|
+
await ctx.db.insert("pendingStart", {
|
|
452
|
+
workId: work._id,
|
|
453
|
+
segment,
|
|
454
|
+
});
|
|
455
|
+
return true;
|
|
456
|
+
}
|
|
457
|
+
export function withJitter(delay) {
|
|
458
|
+
return delay * (0.5 + Math.random());
|
|
459
|
+
}
|
|
423
460
|
async function getGlobals(ctx) {
|
|
424
461
|
const globals = await ctx.db.query("globals").unique();
|
|
425
462
|
if (!globals) {
|