@convex-dev/workpool 0.1.2 → 0.2.0-beta.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +155 -8
- package/dist/commonjs/client/index.d.ts +123 -35
- package/dist/commonjs/client/index.d.ts.map +1 -1
- package/dist/commonjs/client/index.js +122 -15
- package/dist/commonjs/client/index.js.map +1 -1
- package/dist/commonjs/client/utils.d.ts +16 -0
- package/dist/commonjs/client/utils.d.ts.map +1 -0
- package/dist/commonjs/client/utils.js +2 -0
- package/dist/commonjs/client/utils.js.map +1 -0
- package/dist/commonjs/component/convex.config.d.ts.map +1 -1
- package/dist/commonjs/component/convex.config.js +0 -2
- package/dist/commonjs/component/convex.config.js.map +1 -1
- package/dist/commonjs/component/kick.d.ts +9 -0
- package/dist/commonjs/component/kick.d.ts.map +1 -0
- package/dist/commonjs/component/kick.js +97 -0
- package/dist/commonjs/component/kick.js.map +1 -0
- package/dist/commonjs/component/lib.d.ts +23 -32
- package/dist/commonjs/component/lib.d.ts.map +1 -1
- package/dist/commonjs/component/lib.js +70 -564
- package/dist/commonjs/component/lib.js.map +1 -1
- package/dist/commonjs/component/logging.d.ts +5 -3
- package/dist/commonjs/component/logging.d.ts.map +1 -1
- package/dist/commonjs/component/logging.js +13 -2
- package/dist/commonjs/component/logging.js.map +1 -1
- package/dist/commonjs/component/loop.d.ts +26 -0
- package/dist/commonjs/component/loop.d.ts.map +1 -0
- package/dist/commonjs/component/loop.js +453 -0
- package/dist/commonjs/component/loop.js.map +1 -0
- package/dist/commonjs/component/recovery.d.ts +8 -0
- package/dist/commonjs/component/recovery.d.ts.map +1 -0
- package/dist/commonjs/component/recovery.js +74 -0
- package/dist/commonjs/component/recovery.js.map +1 -0
- package/dist/commonjs/component/schema.d.ts +163 -93
- package/dist/commonjs/component/schema.d.ts.map +1 -1
- package/dist/commonjs/component/schema.js +54 -65
- package/dist/commonjs/component/schema.js.map +1 -1
- package/dist/commonjs/component/shared.d.ts +130 -0
- package/dist/commonjs/component/shared.d.ts.map +1 -0
- package/dist/commonjs/component/shared.js +65 -0
- package/dist/commonjs/component/shared.js.map +1 -0
- package/dist/commonjs/component/stats.d.ts +4 -3
- package/dist/commonjs/component/stats.d.ts.map +1 -1
- package/dist/commonjs/component/stats.js +18 -4
- package/dist/commonjs/component/stats.js.map +1 -1
- package/dist/commonjs/component/worker.d.ts +25 -0
- package/dist/commonjs/component/worker.d.ts.map +1 -0
- package/dist/commonjs/component/worker.js +86 -0
- package/dist/commonjs/component/worker.js.map +1 -0
- package/dist/esm/client/index.d.ts +123 -35
- package/dist/esm/client/index.d.ts.map +1 -1
- package/dist/esm/client/index.js +122 -15
- package/dist/esm/client/index.js.map +1 -1
- package/dist/esm/client/utils.d.ts +16 -0
- package/dist/esm/client/utils.d.ts.map +1 -0
- package/dist/esm/client/utils.js +2 -0
- package/dist/esm/client/utils.js.map +1 -0
- package/dist/esm/component/convex.config.d.ts.map +1 -1
- package/dist/esm/component/convex.config.js +0 -2
- package/dist/esm/component/convex.config.js.map +1 -1
- package/dist/esm/component/kick.d.ts +9 -0
- package/dist/esm/component/kick.d.ts.map +1 -0
- package/dist/esm/component/kick.js +97 -0
- package/dist/esm/component/kick.js.map +1 -0
- package/dist/esm/component/lib.d.ts +23 -32
- package/dist/esm/component/lib.d.ts.map +1 -1
- package/dist/esm/component/lib.js +70 -564
- package/dist/esm/component/lib.js.map +1 -1
- package/dist/esm/component/logging.d.ts +5 -3
- package/dist/esm/component/logging.d.ts.map +1 -1
- package/dist/esm/component/logging.js +13 -2
- package/dist/esm/component/logging.js.map +1 -1
- package/dist/esm/component/loop.d.ts +26 -0
- package/dist/esm/component/loop.d.ts.map +1 -0
- package/dist/esm/component/loop.js +453 -0
- package/dist/esm/component/loop.js.map +1 -0
- package/dist/esm/component/recovery.d.ts +8 -0
- package/dist/esm/component/recovery.d.ts.map +1 -0
- package/dist/esm/component/recovery.js +74 -0
- package/dist/esm/component/recovery.js.map +1 -0
- package/dist/esm/component/schema.d.ts +163 -93
- package/dist/esm/component/schema.d.ts.map +1 -1
- package/dist/esm/component/schema.js +54 -65
- package/dist/esm/component/schema.js.map +1 -1
- package/dist/esm/component/shared.d.ts +130 -0
- package/dist/esm/component/shared.d.ts.map +1 -0
- package/dist/esm/component/shared.js +65 -0
- package/dist/esm/component/shared.js.map +1 -0
- package/dist/esm/component/stats.d.ts +4 -3
- package/dist/esm/component/stats.d.ts.map +1 -1
- package/dist/esm/component/stats.js +18 -4
- package/dist/esm/component/stats.js.map +1 -1
- package/dist/esm/component/worker.d.ts +25 -0
- package/dist/esm/component/worker.d.ts.map +1 -0
- package/dist/esm/component/worker.js +86 -0
- package/dist/esm/component/worker.js.map +1 -0
- package/package.json +6 -5
- package/src/client/index.ts +232 -68
- package/src/client/utils.ts +45 -0
- package/src/component/README.md +73 -0
- package/src/component/_generated/api.d.ts +36 -66
- package/src/component/convex.config.ts +0 -3
- package/src/component/kick.test.ts +286 -0
- package/src/component/kick.ts +118 -0
- package/src/component/lib.test.ts +203 -0
- package/src/component/lib.ts +80 -671
- package/src/component/logging.ts +24 -12
- package/src/component/loop.ts +579 -0
- package/src/component/recovery.ts +79 -0
- package/src/component/schema.ts +59 -77
- package/src/component/setup.test.ts +5 -0
- package/src/component/shared.ts +127 -0
- package/src/component/stats.ts +22 -8
- package/src/component/worker.ts +94 -0
package/src/component/logging.ts
CHANGED
|
@@ -1,14 +1,6 @@
|
|
|
1
|
-
import { v } from "convex/values";
|
|
1
|
+
import { v, Infer } from "convex/values";
|
|
2
2
|
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
export const logLevel = v.union(
|
|
6
|
-
v.literal("DEBUG"),
|
|
7
|
-
v.literal("INFO"),
|
|
8
|
-
v.literal("WARN"),
|
|
9
|
-
v.literal("ERROR")
|
|
10
|
-
);
|
|
11
|
-
export type LogLevel = Infer<typeof logLevel>;
|
|
3
|
+
export const DEFAULT_LOG_LEVEL: LogLevel = "WARN";
|
|
12
4
|
|
|
13
5
|
export type Logger = {
|
|
14
6
|
debug: (...args: unknown[]) => void;
|
|
@@ -17,10 +9,13 @@ export type Logger = {
|
|
|
17
9
|
error: (...args: unknown[]) => void;
|
|
18
10
|
time: (label: string) => void;
|
|
19
11
|
timeEnd: (label: string) => void;
|
|
12
|
+
event: (event: string, payload: Record<string, unknown>) => void;
|
|
20
13
|
};
|
|
21
14
|
|
|
22
|
-
export function createLogger(level
|
|
23
|
-
const levelIndex = ["DEBUG", "INFO", "WARN", "ERROR"].indexOf(
|
|
15
|
+
export function createLogger(level?: LogLevel): Logger {
|
|
16
|
+
const levelIndex = ["DEBUG", "INFO", "WARN", "ERROR"].indexOf(
|
|
17
|
+
level ?? DEFAULT_LOG_LEVEL
|
|
18
|
+
);
|
|
24
19
|
if (levelIndex === -1) {
|
|
25
20
|
throw new Error(`Invalid log level: ${level}`);
|
|
26
21
|
}
|
|
@@ -55,5 +50,22 @@ export function createLogger(level: LogLevel): Logger {
|
|
|
55
50
|
console.timeEnd(label);
|
|
56
51
|
}
|
|
57
52
|
},
|
|
53
|
+
event: (event: string, payload: Record<string, unknown>) => {
|
|
54
|
+
if (levelIndex <= 1) {
|
|
55
|
+
const fullPayload = {
|
|
56
|
+
system: "idempotent-workpool-component",
|
|
57
|
+
event,
|
|
58
|
+
payload,
|
|
59
|
+
};
|
|
60
|
+
console.info(JSON.stringify(fullPayload));
|
|
61
|
+
}
|
|
62
|
+
},
|
|
58
63
|
};
|
|
59
64
|
}
|
|
65
|
+
export const logLevel = v.union(
|
|
66
|
+
v.literal("DEBUG"),
|
|
67
|
+
v.literal("INFO"),
|
|
68
|
+
v.literal("WARN"),
|
|
69
|
+
v.literal("ERROR")
|
|
70
|
+
);
|
|
71
|
+
export type LogLevel = Infer<typeof logLevel>;
|
|
@@ -0,0 +1,579 @@
|
|
|
1
|
+
import { FunctionHandle, WithoutSystemFields } from "convex/server";
|
|
2
|
+
import { v } from "convex/values";
|
|
3
|
+
import { internal } from "./_generated/api.js";
|
|
4
|
+
import { Doc, Id } from "./_generated/dataModel.js";
|
|
5
|
+
import { internalMutation, MutationCtx } from "./_generated/server.js";
|
|
6
|
+
import { DEFAULT_MAX_PARALLELISM } from "./kick.js";
|
|
7
|
+
import {
|
|
8
|
+
createLogger,
|
|
9
|
+
DEFAULT_LOG_LEVEL,
|
|
10
|
+
Logger,
|
|
11
|
+
LogLevel,
|
|
12
|
+
} from "./logging.js";
|
|
13
|
+
import {
|
|
14
|
+
boundScheduledTime,
|
|
15
|
+
Config,
|
|
16
|
+
currentSegment,
|
|
17
|
+
fromSegment,
|
|
18
|
+
nextSegment,
|
|
19
|
+
OnCompleteArgs,
|
|
20
|
+
runResult,
|
|
21
|
+
toSegment,
|
|
22
|
+
} from "./shared.js";
|
|
23
|
+
import { recordCompleted, recordReport, recordStarted } from "./stats.js";
|
|
24
|
+
|
|
25
|
+
const CANCELLATION_BATCH_SIZE = 64; // the only queue that can get unbounded.
|
|
26
|
+
const SECOND = 1000;
|
|
27
|
+
const MINUTE = 60 * SECOND;
|
|
28
|
+
const RECOVERY_THRESHOLD_MS = 5 * MINUTE; // attempt to recover jobs this old.
|
|
29
|
+
const RECOVERY_PERIOD_SEGMENTS = toSegment(1 * MINUTE); // how often to check.
|
|
30
|
+
const CURSOR_BUFFER_SEGMENTS = toSegment(2 * SECOND); // buffer for cursor updates.
|
|
31
|
+
export const INITIAL_STATE: WithoutSystemFields<Doc<"internalState">> = {
|
|
32
|
+
generation: 0n,
|
|
33
|
+
segmentCursors: { incoming: 0n, completion: 0n, cancelation: 0n },
|
|
34
|
+
lastRecovery: 0n,
|
|
35
|
+
report: {
|
|
36
|
+
completed: 0,
|
|
37
|
+
succeeded: 0,
|
|
38
|
+
failed: 0,
|
|
39
|
+
retries: 0,
|
|
40
|
+
canceled: 0,
|
|
41
|
+
lastReportTs: 0,
|
|
42
|
+
},
|
|
43
|
+
running: [],
|
|
44
|
+
};
|
|
45
|
+
|
|
46
|
+
// There should only ever be at most one of these scheduled or running.
|
|
47
|
+
export const main = internalMutation({
|
|
48
|
+
args: {
|
|
49
|
+
generation: v.int64(),
|
|
50
|
+
segment: v.int64(),
|
|
51
|
+
},
|
|
52
|
+
handler: async (ctx, args) => {
|
|
53
|
+
// State will be modified and patched at the end of the function.
|
|
54
|
+
const state = await getOrCreateState(ctx);
|
|
55
|
+
if (args.generation !== state.generation) {
|
|
56
|
+
throw new Error(
|
|
57
|
+
`generation mismatch: ${args.generation} !== ${state.generation}`
|
|
58
|
+
);
|
|
59
|
+
}
|
|
60
|
+
state.generation++;
|
|
61
|
+
|
|
62
|
+
const globals = await getGlobals(ctx);
|
|
63
|
+
const console = createLogger(globals.logLevel);
|
|
64
|
+
|
|
65
|
+
// Read pendingCompletions, including retry handling.
|
|
66
|
+
console.time("[main] pendingCompletion");
|
|
67
|
+
const done = await handleCompletions(ctx, state, args.segment, console);
|
|
68
|
+
console.timeEnd("[main] pendingCompletion");
|
|
69
|
+
|
|
70
|
+
// Read pendingCancelation, deleting from pendingStart. If it's still running, queue to cancel.
|
|
71
|
+
console.time("[main] pendingCancelation");
|
|
72
|
+
done.push(...(await handleCancelation(ctx, state, args.segment, console)));
|
|
73
|
+
console.timeEnd("[main] pendingCancelation");
|
|
74
|
+
|
|
75
|
+
if (state.running.length === 0) {
|
|
76
|
+
// If there's nothing active, reset lastRecovery.
|
|
77
|
+
state.lastRecovery = args.segment;
|
|
78
|
+
} else if (args.segment - state.lastRecovery >= RECOVERY_PERIOD_SEGMENTS) {
|
|
79
|
+
// Otherwise schedule recovery for any old jobs.
|
|
80
|
+
const oldEnoughToConsider = Date.now() - RECOVERY_THRESHOLD_MS;
|
|
81
|
+
const jobs = state.running.filter((r) => r.started < oldEnoughToConsider);
|
|
82
|
+
if (jobs.length) {
|
|
83
|
+
await ctx.scheduler.runAfter(0, internal.recovery.recover, { jobs });
|
|
84
|
+
}
|
|
85
|
+
state.lastRecovery = args.segment;
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
// Read pendingStart up to max capacity. Update the config, and incomingSegmentCursor.
|
|
89
|
+
console.time("[main] pendingStart");
|
|
90
|
+
await handleStart(ctx, state, args.segment, console, globals);
|
|
91
|
+
console.timeEnd("[main] pendingStart");
|
|
92
|
+
|
|
93
|
+
if (Date.now() - state.report.lastReportTs >= MINUTE) {
|
|
94
|
+
// If minute rollover since last report, log report.
|
|
95
|
+
// Try to avoid clock skew by shifting by a minute.
|
|
96
|
+
let lastReportTs = state.report.lastReportTs + MINUTE;
|
|
97
|
+
if (Date.now() > lastReportTs + MINUTE / 2) {
|
|
98
|
+
// It's been a while, let's start fresh.
|
|
99
|
+
lastReportTs = Date.now();
|
|
100
|
+
}
|
|
101
|
+
console.info(recordReport(state));
|
|
102
|
+
state.report = {
|
|
103
|
+
completed: 0,
|
|
104
|
+
succeeded: 0,
|
|
105
|
+
failed: 0,
|
|
106
|
+
retries: 0,
|
|
107
|
+
canceled: 0,
|
|
108
|
+
lastReportTs,
|
|
109
|
+
};
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
await ctx.db.replace(state._id, state);
|
|
113
|
+
await ctx.scheduler.runAfter(0, internal.loop.complete, { done });
|
|
114
|
+
await ctx.scheduler.runAfter(0, internal.loop.updateRunStatus, {
|
|
115
|
+
generation: state.generation,
|
|
116
|
+
});
|
|
117
|
+
},
|
|
118
|
+
});
|
|
119
|
+
|
|
120
|
+
export const complete = internalMutation({
|
|
121
|
+
args: {
|
|
122
|
+
done: v.array(v.object({ runResult, workId: v.id("work") })),
|
|
123
|
+
},
|
|
124
|
+
handler: async (ctx, args) => {
|
|
125
|
+
const globals = await getGlobals(ctx);
|
|
126
|
+
const console = createLogger(globals.logLevel);
|
|
127
|
+
await Promise.all(
|
|
128
|
+
args.done.map(async ({ runResult, workId }) => {
|
|
129
|
+
const work = await ctx.db.get(workId);
|
|
130
|
+
if (!work) {
|
|
131
|
+
console.warn(`[complete] ${workId} is done, but its work is gone`);
|
|
132
|
+
return;
|
|
133
|
+
}
|
|
134
|
+
if (work.onComplete) {
|
|
135
|
+
try {
|
|
136
|
+
const handle = work.onComplete.fnHandle as FunctionHandle<
|
|
137
|
+
"mutation",
|
|
138
|
+
OnCompleteArgs,
|
|
139
|
+
void
|
|
140
|
+
>;
|
|
141
|
+
await ctx.runMutation(handle, {
|
|
142
|
+
workId: work._id,
|
|
143
|
+
context: work.onComplete.context,
|
|
144
|
+
result: runResult,
|
|
145
|
+
});
|
|
146
|
+
console.debug(`[complete] onComplete for ${workId} completed`);
|
|
147
|
+
} catch (e) {
|
|
148
|
+
console.error(
|
|
149
|
+
`[complete] error running onComplete for ${workId}`,
|
|
150
|
+
e
|
|
151
|
+
);
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
await ctx.db.delete(workId);
|
|
155
|
+
})
|
|
156
|
+
);
|
|
157
|
+
},
|
|
158
|
+
});
|
|
159
|
+
|
|
160
|
+
export const updateRunStatus = internalMutation({
|
|
161
|
+
args: { generation: v.int64() },
|
|
162
|
+
handler: async (ctx, args) => {
|
|
163
|
+
const globals = await getGlobals(ctx);
|
|
164
|
+
const console = createLogger(globals.logLevel);
|
|
165
|
+
const maxParallelism = globals.maxParallelism;
|
|
166
|
+
const state = await getOrCreateState(ctx);
|
|
167
|
+
if (args.generation !== state.generation) {
|
|
168
|
+
throw new Error(
|
|
169
|
+
`generation mismatch: ${args.generation} !== ${state.generation}`
|
|
170
|
+
);
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
console.time("[updateRunStatus] outstandingCancelations");
|
|
174
|
+
const thisSegment = currentSegment();
|
|
175
|
+
const outstandingCancelations = await getNextUp(ctx, "pendingCancelation", {
|
|
176
|
+
start: state.segmentCursors.cancelation,
|
|
177
|
+
end: thisSegment,
|
|
178
|
+
});
|
|
179
|
+
console.timeEnd("[updateRunStatus] outstandingCancelations");
|
|
180
|
+
if (outstandingCancelations) {
|
|
181
|
+
await ctx.scheduler.runAfter(0, internal.loop.main, {
|
|
182
|
+
generation: args.generation,
|
|
183
|
+
segment: thisSegment,
|
|
184
|
+
});
|
|
185
|
+
return;
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
console.time("[updateRunStatus] nextSegmentIsActionable");
|
|
189
|
+
const [nextIsActionable, cursors] = await nextSegmentIsActionable(
|
|
190
|
+
ctx,
|
|
191
|
+
state,
|
|
192
|
+
maxParallelism
|
|
193
|
+
);
|
|
194
|
+
console.timeEnd("[updateRunStatus] nextSegmentIsActionable");
|
|
195
|
+
|
|
196
|
+
const start = nextSegment();
|
|
197
|
+
if (nextIsActionable) {
|
|
198
|
+
await ctx.db.patch(state._id, {
|
|
199
|
+
segmentCursors: {
|
|
200
|
+
...state.segmentCursors,
|
|
201
|
+
...cursors,
|
|
202
|
+
},
|
|
203
|
+
});
|
|
204
|
+
await ctx.scheduler.runAt(fromSegment(start), internal.loop.main, {
|
|
205
|
+
generation: args.generation,
|
|
206
|
+
segment: start,
|
|
207
|
+
});
|
|
208
|
+
return;
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
// Find next actionable segment (min next segment).
|
|
212
|
+
console.time("[updateRunStatus] findNextSegment");
|
|
213
|
+
const actionableTables: (
|
|
214
|
+
| "pendingCompletion"
|
|
215
|
+
| "pendingCancelation"
|
|
216
|
+
| "pendingStart"
|
|
217
|
+
)[] = ["pendingCompletion", "pendingCancelation"];
|
|
218
|
+
if (state.running.length < maxParallelism) {
|
|
219
|
+
actionableTables.push("pendingStart");
|
|
220
|
+
}
|
|
221
|
+
const docs = await Promise.all(
|
|
222
|
+
actionableTables.map(async (tableName) =>
|
|
223
|
+
getNextUp(ctx, tableName, { start })
|
|
224
|
+
)
|
|
225
|
+
);
|
|
226
|
+
console.timeEnd("[updateRunStatus] findNextSegment");
|
|
227
|
+
let segment = docs.map((d) => d?.segment).sort()[0];
|
|
228
|
+
const runStatus = await getOrCreateRunningStatus(ctx);
|
|
229
|
+
const saturated = state.running.length >= maxParallelism;
|
|
230
|
+
if (segment || state.running.length > 0) {
|
|
231
|
+
// If there's something to do, schedule for next actionable segment.
|
|
232
|
+
// Or the next recovery, whichever comes first.
|
|
233
|
+
const nextRecoverySegment = state.lastRecovery + RECOVERY_PERIOD_SEGMENTS;
|
|
234
|
+
if (!segment || segment > nextRecoverySegment) {
|
|
235
|
+
segment = nextRecoverySegment;
|
|
236
|
+
}
|
|
237
|
+
const scheduledId = await ctx.scheduler.runAt(
|
|
238
|
+
fromSegment(segment),
|
|
239
|
+
internal.loop.main,
|
|
240
|
+
{ generation: args.generation, segment }
|
|
241
|
+
);
|
|
242
|
+
await ctx.db.patch(runStatus._id, {
|
|
243
|
+
state: {
|
|
244
|
+
kind: "scheduled",
|
|
245
|
+
scheduledId,
|
|
246
|
+
saturated,
|
|
247
|
+
generation: args.generation,
|
|
248
|
+
segment,
|
|
249
|
+
},
|
|
250
|
+
});
|
|
251
|
+
return;
|
|
252
|
+
}
|
|
253
|
+
// There seems to be nothing in the future to do, so go idle.
|
|
254
|
+
await ctx.db.patch(runStatus._id, {
|
|
255
|
+
state: { kind: "idle", generation: args.generation },
|
|
256
|
+
});
|
|
257
|
+
},
|
|
258
|
+
});
|
|
259
|
+
|
|
260
|
+
async function nextSegmentIsActionable(
|
|
261
|
+
ctx: MutationCtx,
|
|
262
|
+
state: Doc<"internalState">,
|
|
263
|
+
maxParallelism: number
|
|
264
|
+
): Promise<
|
|
265
|
+
[boolean, { completion?: bigint; cancelation?: bigint; incoming?: bigint }]
|
|
266
|
+
> {
|
|
267
|
+
// First, try with our cursor range, up to next segment.
|
|
268
|
+
const end = nextSegment();
|
|
269
|
+
if (
|
|
270
|
+
await getNextUp(ctx, "pendingCancelation", {
|
|
271
|
+
start: state.segmentCursors.cancelation,
|
|
272
|
+
end,
|
|
273
|
+
})
|
|
274
|
+
) {
|
|
275
|
+
return [true, {}];
|
|
276
|
+
}
|
|
277
|
+
if (
|
|
278
|
+
await getNextUp(ctx, "pendingCompletion", {
|
|
279
|
+
start: state.segmentCursors.completion,
|
|
280
|
+
end,
|
|
281
|
+
})
|
|
282
|
+
) {
|
|
283
|
+
return [true, {}];
|
|
284
|
+
}
|
|
285
|
+
if (state.running.length < maxParallelism) {
|
|
286
|
+
if (
|
|
287
|
+
await getNextUp(ctx, "pendingStart", {
|
|
288
|
+
start: state.segmentCursors.incoming,
|
|
289
|
+
end,
|
|
290
|
+
})
|
|
291
|
+
) {
|
|
292
|
+
return [true, {}];
|
|
293
|
+
}
|
|
294
|
+
}
|
|
295
|
+
// Next, we look for out-of-order additions we may have missed.
|
|
296
|
+
const oldCompletion = await getNextUp(ctx, "pendingCompletion", {
|
|
297
|
+
end: state.segmentCursors.completion,
|
|
298
|
+
});
|
|
299
|
+
if (oldCompletion) {
|
|
300
|
+
return [true, { completion: oldCompletion.segment }];
|
|
301
|
+
}
|
|
302
|
+
const oldCancelation = await getNextUp(ctx, "pendingCancelation", {
|
|
303
|
+
end: state.segmentCursors.cancelation,
|
|
304
|
+
});
|
|
305
|
+
if (oldCancelation) {
|
|
306
|
+
return [true, { cancelation: oldCancelation.segment }];
|
|
307
|
+
}
|
|
308
|
+
if (state.running.length < maxParallelism) {
|
|
309
|
+
const oldStart = await getNextUp(ctx, "pendingStart", {
|
|
310
|
+
end: state.segmentCursors.incoming,
|
|
311
|
+
});
|
|
312
|
+
if (oldStart) {
|
|
313
|
+
return [true, { incoming: oldStart.segment }];
|
|
314
|
+
}
|
|
315
|
+
}
|
|
316
|
+
return [false, {}];
|
|
317
|
+
}
|
|
318
|
+
|
|
319
|
+
// Fetch the next item. If only one of start & end are provided, it's exclusive.
|
|
320
|
+
async function getNextUp(
|
|
321
|
+
ctx: MutationCtx,
|
|
322
|
+
table: "pendingCompletion" | "pendingCancelation" | "pendingStart",
|
|
323
|
+
range: { start?: bigint; end?: bigint }
|
|
324
|
+
) {
|
|
325
|
+
return ctx.db
|
|
326
|
+
.query(table)
|
|
327
|
+
.withIndex("segment", (q) =>
|
|
328
|
+
range.start
|
|
329
|
+
? range.end
|
|
330
|
+
? q
|
|
331
|
+
.gte("segment", range.start - CURSOR_BUFFER_SEGMENTS)
|
|
332
|
+
.lte("segment", range.end)
|
|
333
|
+
: q.gt("segment", range.start - CURSOR_BUFFER_SEGMENTS)
|
|
334
|
+
: range.end
|
|
335
|
+
? q.lt("segment", range.end)
|
|
336
|
+
: q
|
|
337
|
+
)
|
|
338
|
+
.first();
|
|
339
|
+
}
|
|
340
|
+
|
|
341
|
+
/**
|
|
342
|
+
* Handles the completion of pending completions.
|
|
343
|
+
|
|
344
|
+
* Important: It should handle retries before cancelations are processed,
|
|
345
|
+
* to allow retries to be canceled.
|
|
346
|
+
*/
|
|
347
|
+
async function handleCompletions(
|
|
348
|
+
ctx: MutationCtx,
|
|
349
|
+
state: Doc<"internalState">,
|
|
350
|
+
segment: bigint,
|
|
351
|
+
console: Logger
|
|
352
|
+
) {
|
|
353
|
+
const startSegment = state.segmentCursors.completion - CURSOR_BUFFER_SEGMENTS;
|
|
354
|
+
// This won't be too many because the jobs all correspond to being scheduled
|
|
355
|
+
// by a single main (the previous one), so they're limited by MAX_PARALLELISM.
|
|
356
|
+
const completed = await ctx.db
|
|
357
|
+
.query("pendingCompletion")
|
|
358
|
+
.withIndex("segment", (q) =>
|
|
359
|
+
q.gte("segment", startSegment).lte("segment", segment)
|
|
360
|
+
)
|
|
361
|
+
.collect();
|
|
362
|
+
state.report.completed += completed.length;
|
|
363
|
+
state.segmentCursors.completion = segment;
|
|
364
|
+
const done: Doc<"pendingCompletion">[] = [];
|
|
365
|
+
await Promise.all(
|
|
366
|
+
completed.map(async (c) => {
|
|
367
|
+
await ctx.db.delete(c._id);
|
|
368
|
+
const work = await ctx.db.get(c.workId);
|
|
369
|
+
const maxAttempts = work?.retryBehavior?.maxAttempts;
|
|
370
|
+
const pendingCancelations = await ctx.db
|
|
371
|
+
.query("pendingCancelation")
|
|
372
|
+
.withIndex("workId", (q) => q.eq("workId", c.workId))
|
|
373
|
+
.collect();
|
|
374
|
+
if (work && state.running.some((r) => r.workId === c.workId)) {
|
|
375
|
+
if (
|
|
376
|
+
c.runResult.kind === "failed" &&
|
|
377
|
+
maxAttempts &&
|
|
378
|
+
pendingCancelations.length === 0 &&
|
|
379
|
+
work.attempts < maxAttempts
|
|
380
|
+
) {
|
|
381
|
+
await rescheduleJob(ctx, work, console);
|
|
382
|
+
state.report.retries++;
|
|
383
|
+
} else {
|
|
384
|
+
if (c.runResult.kind === "success") {
|
|
385
|
+
state.report.succeeded++;
|
|
386
|
+
} else if (c.runResult.kind === "failed") {
|
|
387
|
+
state.report.failed++;
|
|
388
|
+
}
|
|
389
|
+
// Ensure there aren't any pending cancelations for this work.
|
|
390
|
+
for (const pendingCancelation of pendingCancelations) {
|
|
391
|
+
await ctx.db.delete(pendingCancelation._id);
|
|
392
|
+
}
|
|
393
|
+
done.push(c);
|
|
394
|
+
}
|
|
395
|
+
console.info(recordCompleted(work, c.runResult.kind));
|
|
396
|
+
} else if (work) {
|
|
397
|
+
console.warn(`[main] completing ${c.workId} but it's not in "running"`);
|
|
398
|
+
} else {
|
|
399
|
+
console.warn(`[main] completing ${c.workId} but it's not found`);
|
|
400
|
+
}
|
|
401
|
+
})
|
|
402
|
+
);
|
|
403
|
+
console.debug(`[main] completing ${done.length}`);
|
|
404
|
+
state.running = state.running.filter(
|
|
405
|
+
(r) => !completed.some((c) => c.workId === r.workId)
|
|
406
|
+
);
|
|
407
|
+
return done.map((c) => ({ runResult: c.runResult, workId: c.workId }));
|
|
408
|
+
}
|
|
409
|
+
|
|
410
|
+
async function rescheduleJob(
|
|
411
|
+
ctx: MutationCtx,
|
|
412
|
+
work: Doc<"work">,
|
|
413
|
+
console: Logger
|
|
414
|
+
): Promise<number> {
|
|
415
|
+
if (!work.retryBehavior) {
|
|
416
|
+
throw new Error("work has no retryBehavior");
|
|
417
|
+
}
|
|
418
|
+
const backoffMs =
|
|
419
|
+
work.retryBehavior.initialBackoffMs *
|
|
420
|
+
Math.pow(work.retryBehavior.base, work.attempts - 1);
|
|
421
|
+
const nextAttempt = withJitter(backoffMs);
|
|
422
|
+
const startTime = boundScheduledTime(Date.now() + nextAttempt, console);
|
|
423
|
+
const segment = toSegment(startTime);
|
|
424
|
+
await ctx.db.patch(work._id, {
|
|
425
|
+
attempts: work.attempts + 1,
|
|
426
|
+
});
|
|
427
|
+
await ctx.db.insert("pendingStart", {
|
|
428
|
+
workId: work._id,
|
|
429
|
+
segment,
|
|
430
|
+
});
|
|
431
|
+
return nextAttempt;
|
|
432
|
+
}
|
|
433
|
+
|
|
434
|
+
export function withJitter(delay: number) {
|
|
435
|
+
return delay * (0.5 + Math.random());
|
|
436
|
+
}
|
|
437
|
+
|
|
438
|
+
async function handleCancelation(
|
|
439
|
+
ctx: MutationCtx,
|
|
440
|
+
state: Doc<"internalState">,
|
|
441
|
+
segment: bigint,
|
|
442
|
+
console: Logger
|
|
443
|
+
) {
|
|
444
|
+
const start = state.segmentCursors.cancelation - CURSOR_BUFFER_SEGMENTS;
|
|
445
|
+
const canceled = await ctx.db
|
|
446
|
+
.query("pendingCancelation")
|
|
447
|
+
.withIndex("segment", (q) =>
|
|
448
|
+
q.gte("segment", start).lte("segment", segment)
|
|
449
|
+
)
|
|
450
|
+
.take(CANCELLATION_BATCH_SIZE);
|
|
451
|
+
state.segmentCursors.cancelation = canceled.at(-1)?.segment ?? segment;
|
|
452
|
+
console.debug(`[main] attempting to cancel ${canceled.length}`);
|
|
453
|
+
const canceledWork: Set<Id<"work">> = new Set();
|
|
454
|
+
await Promise.all(
|
|
455
|
+
canceled.map(async ({ _id, workId }) => {
|
|
456
|
+
await ctx.db.delete(_id);
|
|
457
|
+
const work = await ctx.db.get(workId);
|
|
458
|
+
if (!work) {
|
|
459
|
+
console.warn(`[handleCancelation] ${workId} is gone`);
|
|
460
|
+
return;
|
|
461
|
+
}
|
|
462
|
+
// Ensure it doesn't retry.
|
|
463
|
+
await ctx.db.patch(workId, { retryBehavior: undefined });
|
|
464
|
+
// Ensure it doesn't start.
|
|
465
|
+
const pendingStart = await ctx.db
|
|
466
|
+
.query("pendingStart")
|
|
467
|
+
.withIndex("workId", (q) => q.eq("workId", workId))
|
|
468
|
+
.unique();
|
|
469
|
+
if (pendingStart && !canceledWork.has(workId)) {
|
|
470
|
+
console.info(recordCompleted(work, "canceled"));
|
|
471
|
+
state.report.canceled++;
|
|
472
|
+
await ctx.db.delete(pendingStart._id);
|
|
473
|
+
canceledWork.add(workId);
|
|
474
|
+
}
|
|
475
|
+
})
|
|
476
|
+
);
|
|
477
|
+
return Array.from(canceledWork).map((id) => ({
|
|
478
|
+
runResult: { kind: "canceled" as const },
|
|
479
|
+
workId: id,
|
|
480
|
+
}));
|
|
481
|
+
}
|
|
482
|
+
|
|
483
|
+
async function handleStart(
|
|
484
|
+
ctx: MutationCtx,
|
|
485
|
+
state: Doc<"internalState">,
|
|
486
|
+
segment: bigint,
|
|
487
|
+
console: Logger,
|
|
488
|
+
globals: Config
|
|
489
|
+
) {
|
|
490
|
+
const maxParallelism = globals.maxParallelism;
|
|
491
|
+
// Schedule as many as needed to reach maxParallelism.
|
|
492
|
+
const toSchedule = maxParallelism - state.running.length;
|
|
493
|
+
|
|
494
|
+
const pending = await ctx.db
|
|
495
|
+
.query("pendingStart")
|
|
496
|
+
.withIndex("segment", (q) =>
|
|
497
|
+
q
|
|
498
|
+
.gte("segment", state.segmentCursors.incoming - CURSOR_BUFFER_SEGMENTS)
|
|
499
|
+
.lte("segment", segment)
|
|
500
|
+
)
|
|
501
|
+
.take(toSchedule);
|
|
502
|
+
state.segmentCursors.incoming = pending.at(-1)?.segment ?? segment;
|
|
503
|
+
console.debug(`[main] scheduling ${pending.length} pending work`);
|
|
504
|
+
// Start new work.
|
|
505
|
+
state.running.push(
|
|
506
|
+
...(await Promise.all(
|
|
507
|
+
pending.map(async ({ _id, workId }) => {
|
|
508
|
+
const scheduledId = await beginWork(ctx, workId, globals.logLevel);
|
|
509
|
+
await ctx.db.delete(_id);
|
|
510
|
+
return { scheduledId, workId, started: Date.now() };
|
|
511
|
+
})
|
|
512
|
+
))
|
|
513
|
+
);
|
|
514
|
+
}
|
|
515
|
+
|
|
516
|
+
async function beginWork(
|
|
517
|
+
ctx: MutationCtx,
|
|
518
|
+
workId: Id<"work">,
|
|
519
|
+
logLevel: LogLevel
|
|
520
|
+
): Promise<Id<"_scheduled_functions">> {
|
|
521
|
+
const console = createLogger(logLevel);
|
|
522
|
+
const work = await ctx.db.get(workId);
|
|
523
|
+
if (!work) {
|
|
524
|
+
throw new Error("work not found");
|
|
525
|
+
}
|
|
526
|
+
console.info(recordStarted(work));
|
|
527
|
+
if (work.fnType === "action") {
|
|
528
|
+
return await ctx.scheduler.runAfter(0, internal.worker.runActionWrapper, {
|
|
529
|
+
workId: work._id,
|
|
530
|
+
fnHandle: work.fnHandle,
|
|
531
|
+
fnArgs: work.fnArgs,
|
|
532
|
+
logLevel,
|
|
533
|
+
});
|
|
534
|
+
} else if (work.fnType === "mutation") {
|
|
535
|
+
return await ctx.scheduler.runAfter(0, internal.worker.runMutationWrapper, {
|
|
536
|
+
workId: work._id,
|
|
537
|
+
fnHandle: work.fnHandle,
|
|
538
|
+
fnArgs: work.fnArgs,
|
|
539
|
+
logLevel,
|
|
540
|
+
});
|
|
541
|
+
} else {
|
|
542
|
+
throw new Error(`Unexpected fnType ${work.fnType}`);
|
|
543
|
+
}
|
|
544
|
+
}
|
|
545
|
+
|
|
546
|
+
async function getGlobals(ctx: MutationCtx) {
|
|
547
|
+
const globals = await ctx.db.query("globals").unique();
|
|
548
|
+
if (!globals) {
|
|
549
|
+
return {
|
|
550
|
+
maxParallelism: DEFAULT_MAX_PARALLELISM,
|
|
551
|
+
logLevel: DEFAULT_LOG_LEVEL,
|
|
552
|
+
};
|
|
553
|
+
}
|
|
554
|
+
return globals;
|
|
555
|
+
}
|
|
556
|
+
|
|
557
|
+
async function getOrCreateState(ctx: MutationCtx) {
|
|
558
|
+
const state = await ctx.db.query("internalState").unique();
|
|
559
|
+
if (state) return state;
|
|
560
|
+
const globals = await getGlobals(ctx);
|
|
561
|
+
const console = createLogger(globals.logLevel);
|
|
562
|
+
console.error("No internalState in running loop! Re-creating empty one...");
|
|
563
|
+
return (await ctx.db.get(
|
|
564
|
+
await ctx.db.insert("internalState", INITIAL_STATE)
|
|
565
|
+
))!;
|
|
566
|
+
}
|
|
567
|
+
|
|
568
|
+
async function getOrCreateRunningStatus(ctx: MutationCtx) {
|
|
569
|
+
const runStatus = await ctx.db.query("runStatus").unique();
|
|
570
|
+
if (runStatus) return runStatus;
|
|
571
|
+
const globals = await getGlobals(ctx);
|
|
572
|
+
const console = createLogger(globals.logLevel);
|
|
573
|
+
console.error("No runStatus in running loop! Re-creating one...");
|
|
574
|
+
return (await ctx.db.get(
|
|
575
|
+
await ctx.db.insert("runStatus", { state: { kind: "running" } })
|
|
576
|
+
))!;
|
|
577
|
+
}
|
|
578
|
+
// eslint-disable-next-line @typescript-eslint/no-unused-vars
|
|
579
|
+
const console = "THIS IS A REMINDER TO USE createLogger";
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
import { Id } from "./_generated/dataModel.js";
|
|
2
|
+
import { internalMutation } from "./_generated/server.js";
|
|
3
|
+
import { kickMainLoop } from "./kick.js";
|
|
4
|
+
import { createLogger } from "./logging.js";
|
|
5
|
+
import schema from "./schema.js";
|
|
6
|
+
import { RunResult, nextSegment } from "./shared.js";
|
|
7
|
+
|
|
8
|
+
export const recover = internalMutation({
|
|
9
|
+
args: {
|
|
10
|
+
jobs: schema.tables.internalState.validator.fields.running,
|
|
11
|
+
},
|
|
12
|
+
handler: async (ctx, { jobs }) => {
|
|
13
|
+
const globals = await ctx.db.query("globals").unique();
|
|
14
|
+
const console = createLogger(globals?.logLevel);
|
|
15
|
+
const completed: { workId: Id<"work">; runResult: RunResult }[] = [];
|
|
16
|
+
let didAnything = false;
|
|
17
|
+
const segment = nextSegment();
|
|
18
|
+
await Promise.all(
|
|
19
|
+
jobs.map(async (job) => {
|
|
20
|
+
const scheduled = await ctx.db.system.get(job.scheduledId);
|
|
21
|
+
const preamble = `[recovery] Scheduled job ${job.scheduledId} for work ${job.workId}`;
|
|
22
|
+
if (scheduled === null) {
|
|
23
|
+
console.warn(`${preamble} not found`);
|
|
24
|
+
completed.push({
|
|
25
|
+
workId: job.workId,
|
|
26
|
+
runResult: { kind: "failed", error: `Scheduled job not found` },
|
|
27
|
+
});
|
|
28
|
+
return;
|
|
29
|
+
}
|
|
30
|
+
// This will find everything that timed out, failed ungracefully, was
|
|
31
|
+
// canceled, or succeeded without a return value.
|
|
32
|
+
switch (scheduled.state.kind) {
|
|
33
|
+
case "failed": {
|
|
34
|
+
console.debug(`${preamble} failed and detected in recovery`);
|
|
35
|
+
const pendingCompletion = await ctx.db
|
|
36
|
+
.query("pendingCompletion")
|
|
37
|
+
.withIndex("workId", (q) => q.eq("workId", job.workId))
|
|
38
|
+
.first();
|
|
39
|
+
if (pendingCompletion) {
|
|
40
|
+
console.debug(
|
|
41
|
+
`${preamble} already in pendingCompletion, not reporting`
|
|
42
|
+
);
|
|
43
|
+
} else {
|
|
44
|
+
await ctx.db.insert("pendingCompletion", {
|
|
45
|
+
runResult: scheduled.state,
|
|
46
|
+
workId: job.workId,
|
|
47
|
+
segment,
|
|
48
|
+
});
|
|
49
|
+
didAnything = true;
|
|
50
|
+
}
|
|
51
|
+
break;
|
|
52
|
+
}
|
|
53
|
+
case "canceled": {
|
|
54
|
+
console.debug(`${preamble} was canceled and detected in recovery`);
|
|
55
|
+
const pendingCancelation = await ctx.db
|
|
56
|
+
.query("pendingCancelation")
|
|
57
|
+
.withIndex("workId", (q) => q.eq("workId", job.workId))
|
|
58
|
+
.first();
|
|
59
|
+
if (pendingCancelation) {
|
|
60
|
+
console.debug(
|
|
61
|
+
`${preamble} already in pendingCancelation, not reporting`
|
|
62
|
+
);
|
|
63
|
+
} else {
|
|
64
|
+
await ctx.db.insert("pendingCancelation", {
|
|
65
|
+
workId: job.workId,
|
|
66
|
+
segment,
|
|
67
|
+
});
|
|
68
|
+
didAnything = true;
|
|
69
|
+
}
|
|
70
|
+
break;
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
})
|
|
74
|
+
);
|
|
75
|
+
if (didAnything) {
|
|
76
|
+
await kickMainLoop(ctx, "recovery");
|
|
77
|
+
}
|
|
78
|
+
},
|
|
79
|
+
});
|