@convex-dev/workpool 0.4.6 → 0.4.7-alpha.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/component/_generated/api.d.ts +2 -0
- package/dist/component/_generated/api.d.ts.map +1 -1
- package/dist/component/_generated/api.js.map +1 -1
- package/dist/component/complete.d.ts.map +1 -1
- package/dist/component/complete.js +8 -7
- package/dist/component/complete.js.map +1 -1
- package/dist/component/danger.js +7 -7
- package/dist/component/danger.js.map +1 -1
- package/dist/component/future.d.ts +11 -0
- package/dist/component/future.d.ts.map +1 -0
- package/dist/component/future.js +21 -0
- package/dist/component/future.js.map +1 -0
- package/dist/component/kick.d.ts +3 -3
- package/dist/component/kick.d.ts.map +1 -1
- package/dist/component/kick.js +14 -16
- package/dist/component/kick.js.map +1 -1
- package/dist/component/lib.d.ts.map +1 -1
- package/dist/component/lib.js +13 -13
- package/dist/component/lib.js.map +1 -1
- package/dist/component/loop.d.ts +44 -1
- package/dist/component/loop.d.ts.map +1 -1
- package/dist/component/loop.js +171 -217
- package/dist/component/loop.js.map +1 -1
- package/dist/component/recovery.d.ts.map +1 -1
- package/dist/component/recovery.js +2 -2
- package/dist/component/recovery.js.map +1 -1
- package/dist/component/schema.d.ts.map +1 -1
- package/dist/component/schema.js +2 -1
- package/dist/component/schema.js.map +1 -1
- package/dist/component/worker.js +1 -1
- package/dist/component/worker.js.map +1 -1
- package/package.json +8 -12
- package/src/component/_generated/api.ts +2 -0
- package/src/component/complete.test.ts +13 -13
- package/src/component/complete.ts +13 -7
- package/src/component/danger.ts +7 -7
- package/src/component/future.ts +38 -0
- package/src/component/kick.test.ts +17 -20
- package/src/component/kick.ts +20 -17
- package/src/component/lib.test.ts +7 -7
- package/src/component/lib.ts +12 -15
- package/src/component/loop.test.ts +695 -1127
- package/src/component/loop.ts +212 -283
- package/src/component/recovery.test.ts +3 -3
- package/src/component/recovery.ts +5 -2
- package/src/component/schema.ts +2 -1
- package/src/component/stateMachine.test.ts +1246 -0
- package/src/component/stats.test.ts +4 -4
- package/src/component/worker.ts +1 -1
package/src/component/loop.ts
CHANGED
|
@@ -1,8 +1,13 @@
|
|
|
1
1
|
import type { WithoutSystemFields } from "convex/server";
|
|
2
2
|
import { v } from "convex/values";
|
|
3
|
+
import { runSnapshotQuery } from "./future.js";
|
|
3
4
|
import { internal } from "./_generated/api.js";
|
|
4
5
|
import type { Doc, Id } from "./_generated/dataModel.js";
|
|
5
|
-
import {
|
|
6
|
+
import {
|
|
7
|
+
internalMutation,
|
|
8
|
+
internalQuery,
|
|
9
|
+
type MutationCtx,
|
|
10
|
+
} from "./_generated/server.js";
|
|
6
11
|
import type { CompleteJob } from "./complete.js";
|
|
7
12
|
import {
|
|
8
13
|
createLogger,
|
|
@@ -16,8 +21,8 @@ import {
|
|
|
16
21
|
DEFAULT_MAX_PARALLELISM,
|
|
17
22
|
fromSegment,
|
|
18
23
|
getCurrentSegment,
|
|
19
|
-
getNextSegment,
|
|
20
24
|
max,
|
|
25
|
+
min,
|
|
21
26
|
type RunResult,
|
|
22
27
|
toSegment,
|
|
23
28
|
} from "./shared.js";
|
|
@@ -32,7 +37,11 @@ const RECOVERY_THRESHOLD_MS = 5 * MINUTE; // attempt to recover jobs this old.
|
|
|
32
37
|
export const RECOVERY_PERIOD_SEGMENTS = toSegment(1 * MINUTE); // how often to check.
|
|
33
38
|
export const STATUS_COOLDOWN = 2 * SECOND;
|
|
34
39
|
export const COOLDOWN_CHECK_INTERVAL = 200 * MS;
|
|
35
|
-
|
|
40
|
+
// Buffer applied when querying with cursors. Transactions that started
|
|
41
|
+
// before ours may still be running and commit inserts at segments behind
|
|
42
|
+
// a previously advanced cursor — the buffer lets us pick those up.
|
|
43
|
+
const CURSOR_BUFFER_SEGMENTS = toSegment(30 * SECOND);
|
|
44
|
+
|
|
36
45
|
export const INITIAL_STATE: WithoutSystemFields<Doc<"internalState">> = {
|
|
37
46
|
generation: 0n,
|
|
38
47
|
segmentCursors: { incoming: 0n, completion: 0n, cancelation: 0n },
|
|
@@ -48,10 +57,66 @@ export const INITIAL_STATE: WithoutSystemFields<Doc<"internalState">> = {
|
|
|
48
57
|
running: [],
|
|
49
58
|
};
|
|
50
59
|
|
|
60
|
+
/**
|
|
61
|
+
* Single query that returns everything the main loop needs to process.
|
|
62
|
+
*/
|
|
63
|
+
export const getPending = internalQuery({
|
|
64
|
+
args: {
|
|
65
|
+
completionCursor: v.int64(),
|
|
66
|
+
cancelationCursor: v.int64(),
|
|
67
|
+
incomingCursor: v.int64(),
|
|
68
|
+
maxParallelism: v.number(),
|
|
69
|
+
runningCount: v.number(),
|
|
70
|
+
},
|
|
71
|
+
handler: async (
|
|
72
|
+
ctx,
|
|
73
|
+
{
|
|
74
|
+
completionCursor,
|
|
75
|
+
cancelationCursor,
|
|
76
|
+
incomingCursor,
|
|
77
|
+
maxParallelism,
|
|
78
|
+
runningCount,
|
|
79
|
+
},
|
|
80
|
+
) => {
|
|
81
|
+
const completions = await ctx.db
|
|
82
|
+
.query("pendingCompletion")
|
|
83
|
+
.withIndex("segment", (q) => q.gte("segment", completionCursor))
|
|
84
|
+
.take(maxParallelism);
|
|
85
|
+
const cancelations = await ctx.db
|
|
86
|
+
.query("pendingCancelation")
|
|
87
|
+
.withIndex("segment", (q) => q.gte("segment", cancelationCursor))
|
|
88
|
+
.take(CANCELLATION_BATCH_SIZE);
|
|
89
|
+
// Available slots after we process this batch's completions, plus 1
|
|
90
|
+
// for the +1 trick (detect overflow vs. a future-scheduled retry).
|
|
91
|
+
const startLimit = Math.max(
|
|
92
|
+
0,
|
|
93
|
+
maxParallelism - runningCount + completions.length,
|
|
94
|
+
);
|
|
95
|
+
const excludedIds = [
|
|
96
|
+
...completions.map((c) => c.workId),
|
|
97
|
+
...cancelations.map((c) => c.workId),
|
|
98
|
+
];
|
|
99
|
+
const allStarts =
|
|
100
|
+
startLimit === 0
|
|
101
|
+
? []
|
|
102
|
+
: await ctx.db
|
|
103
|
+
.query("pendingStart")
|
|
104
|
+
.withIndex("segment", (q) => q.gte("segment", incomingCursor))
|
|
105
|
+
// eslint-disable-next-line @convex-dev/no-filter-in-query
|
|
106
|
+
.filter((q) =>
|
|
107
|
+
q.and(...excludedIds.map((id) => q.neq(q.field("workId"), id))),
|
|
108
|
+
)
|
|
109
|
+
.take(startLimit + 1);
|
|
110
|
+
return { completions, cancelations, allStarts };
|
|
111
|
+
},
|
|
112
|
+
});
|
|
113
|
+
|
|
51
114
|
// There should only ever be at most one of these scheduled or running.
|
|
52
115
|
export const main = internalMutation({
|
|
53
|
-
|
|
54
|
-
|
|
116
|
+
// `segment` is kept for backwards compatibility with in-flight scheduled
|
|
117
|
+
// calls from before the upgrade — it's no longer used internally.
|
|
118
|
+
args: { generation: v.int64(), segment: v.optional(v.int64()) },
|
|
119
|
+
handler: async (ctx, { generation }) => {
|
|
55
120
|
// State will be modified and patched at the end of the function.
|
|
56
121
|
const state = await getOrCreateState(ctx);
|
|
57
122
|
if (generation !== state.generation) {
|
|
@@ -62,24 +127,44 @@ export const main = internalMutation({
|
|
|
62
127
|
state.generation++;
|
|
63
128
|
const runStatus = await getOrCreateRunningStatus(ctx);
|
|
64
129
|
if (runStatus.state.kind !== "running") {
|
|
65
|
-
await ctx.db.patch(runStatus._id, {
|
|
130
|
+
await ctx.db.patch("runStatus", runStatus._id, {
|
|
66
131
|
state: { kind: "running" },
|
|
67
132
|
});
|
|
68
133
|
}
|
|
69
134
|
|
|
70
135
|
const globals = await getGlobals(ctx);
|
|
71
136
|
const console = createLogger(globals.logLevel);
|
|
72
|
-
const
|
|
73
|
-
|
|
137
|
+
const segment = getCurrentSegment();
|
|
138
|
+
|
|
139
|
+
// Pass maxParallelism + runningCount so the query bounds each batch to
|
|
140
|
+
// what we can actually consume this iteration. Apply CURSOR_BUFFER_SEGMENTS
|
|
141
|
+
// so we still pick up out-of-order inserts that landed behind the cursor
|
|
142
|
+
// since our last scan.
|
|
143
|
+
const queryArgs = {
|
|
144
|
+
completionCursor:
|
|
145
|
+
state.segmentCursors.completion - CURSOR_BUFFER_SEGMENTS,
|
|
146
|
+
cancelationCursor:
|
|
147
|
+
state.segmentCursors.cancelation - CURSOR_BUFFER_SEGMENTS,
|
|
148
|
+
incomingCursor: state.segmentCursors.incoming - CURSOR_BUFFER_SEGMENTS,
|
|
149
|
+
maxParallelism: globals.maxParallelism,
|
|
150
|
+
runningCount: state.running.length,
|
|
151
|
+
};
|
|
152
|
+
|
|
153
|
+
// Snapshot read — no read dependency, no OCC conflicts.
|
|
154
|
+
console.time("[main] getPending");
|
|
155
|
+
const { allStarts, cancelations, completions } = await runSnapshotQuery(
|
|
156
|
+
internal.loop.getPending,
|
|
157
|
+
queryArgs,
|
|
158
|
+
);
|
|
159
|
+
const toStart = allStarts.filter((s) => s.segment <= segment);
|
|
160
|
+
console.timeEnd("[main] getPending");
|
|
74
161
|
|
|
75
|
-
// Read pendingCompletions, including retry handling.
|
|
76
162
|
console.time("[main] pendingCompletion");
|
|
77
|
-
const toCancel = await handleCompletions(ctx, state,
|
|
163
|
+
const toCancel = await handleCompletions(ctx, state, completions, console);
|
|
78
164
|
console.timeEnd("[main] pendingCompletion");
|
|
79
165
|
|
|
80
|
-
// Read pendingCancelation, deleting from pendingStart. If it's still running, queue to cancel.
|
|
81
166
|
console.time("[main] pendingCancelation");
|
|
82
|
-
await handleCancelation(ctx, state,
|
|
167
|
+
await handleCancelation(ctx, state, cancelations, console, toCancel);
|
|
83
168
|
console.timeEnd("[main] pendingCancelation");
|
|
84
169
|
|
|
85
170
|
if (state.running.length === 0) {
|
|
@@ -91,9 +176,14 @@ export const main = internalMutation({
|
|
|
91
176
|
state.lastRecovery = segment;
|
|
92
177
|
}
|
|
93
178
|
|
|
94
|
-
//
|
|
179
|
+
// ── Start new work ──
|
|
180
|
+
// Slice to actual available capacity (completions may have freed slots).
|
|
181
|
+
// Guard against negative numbers in case running.length > maxParallelism.
|
|
182
|
+
const actualCapacity = globals.maxParallelism - state.running.length;
|
|
183
|
+
const pending: Doc<"pendingStart">[] =
|
|
184
|
+
actualCapacity > 0 ? toStart.slice(0, actualCapacity) : [];
|
|
95
185
|
console.time("[main] pendingStart");
|
|
96
|
-
await handleStart(ctx, state,
|
|
186
|
+
await handleStart(ctx, state, pending, console, globals);
|
|
97
187
|
console.timeEnd("[main] pendingStart");
|
|
98
188
|
|
|
99
189
|
if (Date.now() - state.report.lastReportTs >= MINUTE) {
|
|
@@ -115,88 +205,53 @@ export const main = internalMutation({
|
|
|
115
205
|
};
|
|
116
206
|
}
|
|
117
207
|
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
const state = await getOrCreateState(ctx);
|
|
134
|
-
if (generation !== state.generation) {
|
|
135
|
-
throw new Error(
|
|
136
|
-
`generation mismatch: ${generation} !== ${state.generation}`,
|
|
137
|
-
);
|
|
208
|
+
// Advance cursors to skip tombstones on next scan. Only do this when
|
|
209
|
+
// we actually did work — the cursor doubles as the cooldown signal
|
|
210
|
+
// ("how long since we last processed something").
|
|
211
|
+
const didWork =
|
|
212
|
+
completions.length > 0 || cancelations.length > 0 || pending.length > 0;
|
|
213
|
+
if (didWork) {
|
|
214
|
+
state.segmentCursors.completion = completions.at(-1)?.segment ?? segment;
|
|
215
|
+
state.segmentCursors.cancelation =
|
|
216
|
+
cancelations.at(-1)?.segment ?? segment;
|
|
217
|
+
if (pending.length > 0) {
|
|
218
|
+
state.segmentCursors.incoming = pending.at(-1)!.segment;
|
|
219
|
+
} else if (actualCapacity > 0) {
|
|
220
|
+
// We have no more pending work, update to now
|
|
221
|
+
state.segmentCursors.incoming = segment;
|
|
222
|
+
}
|
|
138
223
|
}
|
|
139
224
|
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
console.timeEnd("[updateRunStatus] outstandingCancelations");
|
|
146
|
-
if (outstandingCancelations) {
|
|
225
|
+
await ctx.db.replace("internalState", state._id, state);
|
|
226
|
+
|
|
227
|
+
// ── Schedule next iteration ──
|
|
228
|
+
if (didWork) {
|
|
229
|
+
// More work might have arrived while we were processing. Check again.
|
|
147
230
|
await ctx.scheduler.runAfter(0, internal.loop.main, {
|
|
148
|
-
generation,
|
|
149
|
-
segment,
|
|
231
|
+
generation: state.generation,
|
|
150
232
|
});
|
|
151
233
|
return;
|
|
152
234
|
}
|
|
153
235
|
|
|
154
|
-
//
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
const
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
await ctx.scheduler.runAt(
|
|
167
|
-
boundScheduledTime(fromSegment(nextSegment), console),
|
|
168
|
-
internal.loop.main,
|
|
169
|
-
{
|
|
170
|
-
generation,
|
|
171
|
-
segment: nextSegment,
|
|
172
|
-
},
|
|
173
|
-
);
|
|
174
|
-
return;
|
|
175
|
-
}
|
|
176
|
-
|
|
177
|
-
console.time("[updateRunStatus] oldSegmentIsActionable");
|
|
178
|
-
const [oldIsActionable, cursors] = await oldSegmentIsActionable(
|
|
179
|
-
ctx,
|
|
180
|
-
state,
|
|
181
|
-
maxParallelism,
|
|
182
|
-
);
|
|
183
|
-
console.timeEnd("[updateRunStatus] oldSegmentIsActionable");
|
|
184
|
-
|
|
185
|
-
if (oldIsActionable) {
|
|
186
|
-
await ctx.db.patch(state._id, {
|
|
187
|
-
segmentCursors: {
|
|
188
|
-
...state.segmentCursors,
|
|
189
|
-
...cursors,
|
|
190
|
-
},
|
|
191
|
-
});
|
|
236
|
+
// Nothing found in snapshot. Re-read with a real dependency (same args
|
|
237
|
+
// for cache-hit efficiency) so a concurrent insert forces an OCC retry.
|
|
238
|
+
console.debug("[main] no work — confirming with read dependency");
|
|
239
|
+
const confirm = await ctx.runQuery(internal.loop.getPending, queryArgs);
|
|
240
|
+
const confirmStarts = confirm.allStarts;
|
|
241
|
+
const confirmStartsNow = confirmStarts.filter((s) => s.segment <= segment);
|
|
242
|
+
const confirmFuture = confirmStarts.find((s) => s.segment > segment);
|
|
243
|
+
if (
|
|
244
|
+
confirm.completions.length > 0 ||
|
|
245
|
+
confirm.cancelations.length > 0 ||
|
|
246
|
+
confirmStartsNow.length > 0
|
|
247
|
+
) {
|
|
192
248
|
await ctx.scheduler.runAfter(0, internal.loop.main, {
|
|
193
|
-
generation,
|
|
194
|
-
segment: getCurrentSegment(),
|
|
249
|
+
generation: state.generation,
|
|
195
250
|
});
|
|
196
251
|
return;
|
|
197
252
|
}
|
|
198
253
|
|
|
199
|
-
// Cooldown: if any cursor was active within
|
|
254
|
+
// Cooldown: if any cursor was active within STATUS_COOLDOWN, stay running.
|
|
200
255
|
const { incoming, completion, cancelation } = state.segmentCursors;
|
|
201
256
|
const latestCursor = fromSegment(
|
|
202
257
|
max(incoming, max(completion, cancelation)),
|
|
@@ -204,187 +259,75 @@ export const updateRunStatus = internalMutation({
|
|
|
204
259
|
if (Date.now() - latestCursor < STATUS_COOLDOWN) {
|
|
205
260
|
const remaining = STATUS_COOLDOWN - (Date.now() - latestCursor);
|
|
206
261
|
console.debug(
|
|
207
|
-
`[
|
|
262
|
+
`[main] cooldown: ${remaining}ms remaining, checking again in ${COOLDOWN_CHECK_INTERVAL}ms`,
|
|
208
263
|
);
|
|
209
|
-
const checkAt = Date.now() + COOLDOWN_CHECK_INTERVAL;
|
|
210
|
-
const checkSegment = toSegment(checkAt);
|
|
211
264
|
await ctx.scheduler.runAt(
|
|
212
|
-
|
|
213
|
-
internal.loop.
|
|
214
|
-
{ generation
|
|
265
|
+
Date.now() + COOLDOWN_CHECK_INTERVAL,
|
|
266
|
+
internal.loop.main,
|
|
267
|
+
{ generation: state.generation },
|
|
215
268
|
);
|
|
216
269
|
return;
|
|
217
270
|
}
|
|
218
271
|
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
| "pendingCompletion"
|
|
223
|
-
| "pendingCancelation"
|
|
224
|
-
| "pendingStart"
|
|
225
|
-
)[] = ["pendingCompletion", "pendingCancelation"];
|
|
226
|
-
if (state.running.length < maxParallelism) {
|
|
227
|
-
actionableTables.push("pendingStart");
|
|
228
|
-
}
|
|
229
|
-
const docs = await Promise.all(
|
|
230
|
-
actionableTables.map(async (tableName) =>
|
|
231
|
-
getNextUp(ctx, tableName, { start: nextSegment }),
|
|
232
|
-
),
|
|
233
|
-
);
|
|
234
|
-
console.timeEnd("[updateRunStatus] findNextSegment");
|
|
235
|
-
let targetSegment = docs.map((d) => d?.segment).sort()[0];
|
|
236
|
-
const runStatus = await getOrCreateRunningStatus(ctx);
|
|
237
|
-
const saturated = state.running.length >= maxParallelism;
|
|
238
|
-
if (targetSegment !== undefined || state.running.length > 0) {
|
|
239
|
-
// If there's something to do, schedule for next actionable segment.
|
|
240
|
-
// Or the next recovery, whichever comes first.
|
|
272
|
+
if (state.running.length > 0 || confirmFuture) {
|
|
273
|
+
// Jobs are running and/or there's future-scheduled work.
|
|
274
|
+
// Schedule for the future start or next recovery, whichever is sooner.
|
|
241
275
|
const nextRecoverySegment = state.lastRecovery + RECOVERY_PERIOD_SEGMENTS;
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
276
|
+
const target = confirmFuture
|
|
277
|
+
? min(confirmFuture.segment, nextRecoverySegment)
|
|
278
|
+
: nextRecoverySegment;
|
|
279
|
+
|
|
245
280
|
const scheduledId = await ctx.scheduler.runAt(
|
|
246
|
-
boundScheduledTime(fromSegment(
|
|
281
|
+
boundScheduledTime(fromSegment(target), console),
|
|
247
282
|
internal.loop.main,
|
|
248
|
-
{ generation
|
|
283
|
+
{ generation: state.generation },
|
|
249
284
|
);
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
});
|
|
260
|
-
} else {
|
|
261
|
-
console.debug(
|
|
262
|
-
`[updateRunStatus] staying running because it's the next segment`,
|
|
263
|
-
);
|
|
264
|
-
}
|
|
285
|
+
await ctx.db.patch("runStatus", runStatus._id, {
|
|
286
|
+
state: {
|
|
287
|
+
kind: "scheduled",
|
|
288
|
+
scheduledId,
|
|
289
|
+
saturated: state.running.length >= globals.maxParallelism,
|
|
290
|
+
generation: state.generation,
|
|
291
|
+
segment: target,
|
|
292
|
+
},
|
|
293
|
+
});
|
|
265
294
|
return;
|
|
266
295
|
}
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
296
|
+
|
|
297
|
+
// Nothing to do — go idle.
|
|
298
|
+
await ctx.db.patch("runStatus", runStatus._id, {
|
|
299
|
+
state: { kind: "idle", generation: state.generation },
|
|
270
300
|
});
|
|
271
301
|
},
|
|
272
302
|
});
|
|
273
303
|
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
):
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
end,
|
|
285
|
-
})
|
|
286
|
-
) {
|
|
287
|
-
return true;
|
|
288
|
-
}
|
|
289
|
-
if (
|
|
290
|
-
await getNextUp(ctx, "pendingCompletion", {
|
|
291
|
-
start: state.segmentCursors.completion,
|
|
292
|
-
end,
|
|
293
|
-
})
|
|
294
|
-
) {
|
|
295
|
-
return true;
|
|
296
|
-
}
|
|
297
|
-
if (state.running.length < maxParallelism) {
|
|
298
|
-
if (
|
|
299
|
-
await getNextUp(ctx, "pendingStart", {
|
|
300
|
-
start: state.segmentCursors.incoming,
|
|
301
|
-
end,
|
|
302
|
-
})
|
|
303
|
-
) {
|
|
304
|
-
return true;
|
|
305
|
-
}
|
|
306
|
-
}
|
|
307
|
-
return false;
|
|
308
|
-
}
|
|
309
|
-
|
|
310
|
-
async function oldSegmentIsActionable(
|
|
311
|
-
ctx: MutationCtx,
|
|
312
|
-
state: Doc<"internalState">,
|
|
313
|
-
maxParallelism: number,
|
|
314
|
-
): Promise<
|
|
315
|
-
[boolean, { completion?: bigint; cancelation?: bigint; incoming?: bigint }]
|
|
316
|
-
> {
|
|
317
|
-
// Next, we look for out-of-order additions we may have missed.
|
|
318
|
-
const oldCompletion = await getNextUp(ctx, "pendingCompletion", {
|
|
319
|
-
end: state.segmentCursors.completion,
|
|
320
|
-
});
|
|
321
|
-
if (oldCompletion) {
|
|
322
|
-
return [true, { completion: oldCompletion.segment }];
|
|
323
|
-
}
|
|
324
|
-
const oldCancelation = await getNextUp(ctx, "pendingCancelation", {
|
|
325
|
-
end: state.segmentCursors.cancelation,
|
|
326
|
-
});
|
|
327
|
-
if (oldCancelation) {
|
|
328
|
-
return [true, { cancelation: oldCancelation.segment }];
|
|
329
|
-
}
|
|
330
|
-
if (state.running.length < maxParallelism) {
|
|
331
|
-
const oldStart = await getNextUp(ctx, "pendingStart", {
|
|
332
|
-
end: state.segmentCursors.incoming,
|
|
333
|
-
});
|
|
334
|
-
if (oldStart) {
|
|
335
|
-
return [true, { incoming: oldStart.segment }];
|
|
336
|
-
}
|
|
337
|
-
}
|
|
338
|
-
return [false, {}];
|
|
339
|
-
}
|
|
340
|
-
|
|
341
|
-
// Fetch the next item. If only one of start & end are provided, it's exclusive.
|
|
342
|
-
async function getNextUp(
|
|
343
|
-
ctx: MutationCtx,
|
|
344
|
-
table: "pendingCompletion" | "pendingCancelation" | "pendingStart",
|
|
345
|
-
range: { start?: bigint; end?: bigint },
|
|
346
|
-
) {
|
|
347
|
-
return ctx.db
|
|
348
|
-
.query(table)
|
|
349
|
-
.withIndex("segment", (q) =>
|
|
350
|
-
range.start !== undefined
|
|
351
|
-
? range.end !== undefined
|
|
352
|
-
? q
|
|
353
|
-
.gte("segment", range.start - CURSOR_BUFFER_SEGMENTS)
|
|
354
|
-
.lte("segment", range.end)
|
|
355
|
-
: q.gt("segment", range.start - CURSOR_BUFFER_SEGMENTS)
|
|
356
|
-
: range.end !== undefined
|
|
357
|
-
? q.lt("segment", range.end)
|
|
358
|
-
: q,
|
|
359
|
-
)
|
|
360
|
-
.first();
|
|
361
|
-
}
|
|
304
|
+
/**
|
|
305
|
+
* @deprecated Forwarder for in-flight scheduled calls from before the
|
|
306
|
+
* upgrade. The scheduling logic has been merged into `main`.
|
|
307
|
+
*/
|
|
308
|
+
export const updateRunStatus = internalMutation({
|
|
309
|
+
args: { generation: v.int64(), segment: v.int64() },
|
|
310
|
+
handler: async (ctx, { generation }) => {
|
|
311
|
+
await ctx.scheduler.runAfter(0, internal.loop.main, { generation });
|
|
312
|
+
},
|
|
313
|
+
});
|
|
362
314
|
|
|
363
315
|
/**
|
|
364
316
|
* Handles the completion of pending completions.
|
|
365
317
|
* This only processes work that succeeded or failed, not canceled.
|
|
318
|
+
* Accepts pre-fetched completion docs (from snapshot query).
|
|
366
319
|
*/
|
|
367
320
|
async function handleCompletions(
|
|
368
321
|
ctx: MutationCtx,
|
|
369
322
|
state: Doc<"internalState">,
|
|
370
|
-
|
|
323
|
+
completed: Doc<"pendingCompletion">[],
|
|
371
324
|
console: Logger,
|
|
372
325
|
) {
|
|
373
|
-
const startSegment = state.segmentCursors.completion - CURSOR_BUFFER_SEGMENTS;
|
|
374
|
-
// This won't be too many because the jobs all correspond to being scheduled
|
|
375
|
-
// by a single main (the previous one), so they're limited by MAX_PARALLELISM.
|
|
376
|
-
const completed = await ctx.db
|
|
377
|
-
.query("pendingCompletion")
|
|
378
|
-
.withIndex("segment", (q) =>
|
|
379
|
-
q.gte("segment", startSegment).lte("segment", segment),
|
|
380
|
-
)
|
|
381
|
-
.collect();
|
|
382
|
-
state.segmentCursors.completion = segment;
|
|
383
326
|
// Completions that were going to be retried but have since been canceled.
|
|
384
327
|
const toCancel: CompleteJob[] = [];
|
|
385
328
|
await Promise.all(
|
|
386
329
|
completed.map(async (c) => {
|
|
387
|
-
await ctx.db.delete(c._id);
|
|
330
|
+
await ctx.db.delete("pendingCompletion", c._id);
|
|
388
331
|
|
|
389
332
|
const running = state.running.find((r) => r.workId === c.workId);
|
|
390
333
|
if (!running) {
|
|
@@ -395,7 +338,7 @@ async function handleCompletions(
|
|
|
395
338
|
}
|
|
396
339
|
if (c.retry) {
|
|
397
340
|
// Only check for work if it's going to be retried.
|
|
398
|
-
const work = await ctx.db.get(c.workId);
|
|
341
|
+
const work = await ctx.db.get("work", c.workId);
|
|
399
342
|
if (!work) {
|
|
400
343
|
console.warn(`[main] ${c.workId} is gone, but trying to complete`);
|
|
401
344
|
return;
|
|
@@ -433,21 +376,16 @@ async function handleCompletions(
|
|
|
433
376
|
return toCancel;
|
|
434
377
|
}
|
|
435
378
|
|
|
379
|
+
/**
|
|
380
|
+
* Handles cancelation. Accepts pre-fetched cancelation docs.
|
|
381
|
+
*/
|
|
436
382
|
async function handleCancelation(
|
|
437
383
|
ctx: MutationCtx,
|
|
438
384
|
state: Doc<"internalState">,
|
|
439
|
-
|
|
385
|
+
canceled: Doc<"pendingCancelation">[],
|
|
440
386
|
console: Logger,
|
|
441
387
|
toCancel: CompleteJob[],
|
|
442
388
|
) {
|
|
443
|
-
const start = state.segmentCursors.cancelation - CURSOR_BUFFER_SEGMENTS;
|
|
444
|
-
const canceled = await ctx.db
|
|
445
|
-
.query("pendingCancelation")
|
|
446
|
-
.withIndex("segment", (q) =>
|
|
447
|
-
q.gte("segment", start).lte("segment", segment),
|
|
448
|
-
)
|
|
449
|
-
.take(CANCELLATION_BATCH_SIZE);
|
|
450
|
-
state.segmentCursors.cancelation = canceled.at(-1)?.segment ?? segment;
|
|
451
389
|
if (canceled.length) {
|
|
452
390
|
console.debug(`[main] attempting to cancel ${canceled.length}`);
|
|
453
391
|
}
|
|
@@ -456,20 +394,20 @@ async function handleCancelation(
|
|
|
456
394
|
const jobs = toCancel.concat(
|
|
457
395
|
...(
|
|
458
396
|
await Promise.all(
|
|
459
|
-
canceled.map(async ({ _id,
|
|
460
|
-
await ctx.db.delete(_id);
|
|
397
|
+
canceled.map(async ({ _id, workId }) => {
|
|
398
|
+
await ctx.db.delete("pendingCancelation", _id);
|
|
461
399
|
if (canceledWork.has(workId)) {
|
|
462
400
|
// We shouldn't have multiple pending cancelations for the same work.
|
|
463
401
|
console.error(`[main] ${workId} already canceled`);
|
|
464
402
|
return null;
|
|
465
403
|
}
|
|
466
|
-
const work = await ctx.db.get(workId);
|
|
404
|
+
const work = await ctx.db.get("work", workId);
|
|
467
405
|
if (!work) {
|
|
468
406
|
console.warn(`[main] ${workId} is gone, but trying to cancel`);
|
|
469
407
|
return null;
|
|
470
408
|
}
|
|
471
409
|
// Ensure it doesn't retry.
|
|
472
|
-
await ctx.db.patch(workId, { canceled: true });
|
|
410
|
+
await ctx.db.patch("work", workId, { canceled: true });
|
|
473
411
|
// Ensure it doesn't start.
|
|
474
412
|
const pendingStart = await ctx.db
|
|
475
413
|
.query("pendingStart")
|
|
@@ -477,7 +415,7 @@ async function handleCancelation(
|
|
|
477
415
|
.unique();
|
|
478
416
|
if (pendingStart && !canceledWork.has(workId)) {
|
|
479
417
|
state.report.canceled++;
|
|
480
|
-
await ctx.db.delete(pendingStart._id);
|
|
418
|
+
await ctx.db.delete("pendingStart", pendingStart._id);
|
|
481
419
|
canceledWork.add(workId);
|
|
482
420
|
return { workId, runResult, attempt: work.attempts };
|
|
483
421
|
}
|
|
@@ -504,10 +442,22 @@ async function handleRecovery(
|
|
|
504
442
|
if (r.started >= oldEnoughToConsider) {
|
|
505
443
|
return null;
|
|
506
444
|
}
|
|
507
|
-
const work = await ctx.db.get(r.workId);
|
|
445
|
+
const work = await ctx.db.get("work", r.workId);
|
|
508
446
|
if (!work) {
|
|
509
|
-
|
|
510
|
-
|
|
447
|
+
const pendingCompletion = await ctx.db
|
|
448
|
+
.query("pendingCompletion")
|
|
449
|
+
.withIndex("workId", (q) => q.eq("workId", r.workId))
|
|
450
|
+
.first();
|
|
451
|
+
if (!pendingCompletion) {
|
|
452
|
+
missing.add(r.workId);
|
|
453
|
+
console.error(
|
|
454
|
+
`[main] ${r.workId} already gone (skipping recovery)`,
|
|
455
|
+
);
|
|
456
|
+
} else {
|
|
457
|
+
console.debug(
|
|
458
|
+
`[main] ${r.workId} already gone but has pendingCompletion`,
|
|
459
|
+
);
|
|
460
|
+
}
|
|
511
461
|
return null;
|
|
512
462
|
}
|
|
513
463
|
return { ...r, attempt: work.attempts };
|
|
@@ -521,39 +471,16 @@ async function handleRecovery(
|
|
|
521
471
|
}
|
|
522
472
|
}
|
|
523
473
|
|
|
474
|
+
/**
|
|
475
|
+
* Starts pending work. Accepts pre-fetched pendingStart docs.
|
|
476
|
+
*/
|
|
524
477
|
async function handleStart(
|
|
525
478
|
ctx: MutationCtx,
|
|
526
479
|
state: Doc<"internalState">,
|
|
527
|
-
|
|
480
|
+
pending: Doc<"pendingStart">[],
|
|
528
481
|
console: Logger,
|
|
529
|
-
{
|
|
482
|
+
{ logLevel }: Config,
|
|
530
483
|
) {
|
|
531
|
-
// Schedule as many as needed to reach maxParallelism.
|
|
532
|
-
const toSchedule = maxParallelism - state.running.length;
|
|
533
|
-
|
|
534
|
-
const pending =
|
|
535
|
-
toSchedule > 0
|
|
536
|
-
? await ctx.db
|
|
537
|
-
.query("pendingStart")
|
|
538
|
-
.withIndex("segment", (q) =>
|
|
539
|
-
q
|
|
540
|
-
.gte(
|
|
541
|
-
"segment",
|
|
542
|
-
state.segmentCursors.incoming - CURSOR_BUFFER_SEGMENTS,
|
|
543
|
-
)
|
|
544
|
-
.lte("segment", segment),
|
|
545
|
-
)
|
|
546
|
-
.take(toSchedule)
|
|
547
|
-
: [];
|
|
548
|
-
|
|
549
|
-
if (pending) {
|
|
550
|
-
if (pending.length > 0) {
|
|
551
|
-
state.segmentCursors.incoming = pending.at(-1)!.segment;
|
|
552
|
-
} else if (toSchedule > 0) {
|
|
553
|
-
// We have no more pending work, update to now
|
|
554
|
-
state.segmentCursors.incoming = segment;
|
|
555
|
-
}
|
|
556
|
-
}
|
|
557
484
|
console.debug(`[main] scheduling ${pending.length} pending work`);
|
|
558
485
|
// Start new work.
|
|
559
486
|
state.running.push(
|
|
@@ -566,7 +493,7 @@ async function handleStart(
|
|
|
566
493
|
}
|
|
567
494
|
const lagMs = Date.now() - fromSegment(segment);
|
|
568
495
|
const scheduledId = await beginWork(ctx, workId, logLevel, lagMs);
|
|
569
|
-
await ctx.db.delete(_id);
|
|
496
|
+
await ctx.db.delete("pendingStart", _id);
|
|
570
497
|
if (!scheduledId) return null;
|
|
571
498
|
return { scheduledId, workId, started: Date.now() };
|
|
572
499
|
}),
|
|
@@ -582,7 +509,7 @@ async function beginWork(
|
|
|
582
509
|
lagMs: number,
|
|
583
510
|
): Promise<Id<"_scheduled_functions"> | null> {
|
|
584
511
|
const console = createLogger(logLevel);
|
|
585
|
-
const work = await ctx.db.get(workId);
|
|
512
|
+
const work = await ctx.db.get("work", workId);
|
|
586
513
|
if (!work) {
|
|
587
514
|
console.error(`Trying to start, but work not found: ${workId}`);
|
|
588
515
|
return null;
|
|
@@ -682,6 +609,7 @@ async function getOrCreateState(ctx: MutationCtx) {
|
|
|
682
609
|
const console = createLogger(globals.logLevel);
|
|
683
610
|
console.error("No internalState in running loop! Re-creating empty one...");
|
|
684
611
|
return (await ctx.db.get(
|
|
612
|
+
"internalState",
|
|
685
613
|
await ctx.db.insert("internalState", INITIAL_STATE),
|
|
686
614
|
))!;
|
|
687
615
|
}
|
|
@@ -693,6 +621,7 @@ async function getOrCreateRunningStatus(ctx: MutationCtx) {
|
|
|
693
621
|
const console = createLogger(globals.logLevel);
|
|
694
622
|
console.error("No runStatus in running loop! Re-creating one...");
|
|
695
623
|
return (await ctx.db.get(
|
|
624
|
+
"runStatus",
|
|
696
625
|
await ctx.db.insert("runStatus", { state: { kind: "running" } }),
|
|
697
626
|
))!;
|
|
698
627
|
}
|