@convex-dev/workpool 0.2.0-beta.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (106) hide show
  1. package/README.md +7 -16
  2. package/dist/commonjs/client/index.d.ts +3 -3
  3. package/dist/commonjs/client/index.d.ts.map +1 -1
  4. package/dist/commonjs/client/index.js +10 -5
  5. package/dist/commonjs/client/index.js.map +1 -1
  6. package/dist/commonjs/component/complete.d.ts +89 -0
  7. package/dist/commonjs/component/complete.d.ts.map +1 -0
  8. package/dist/commonjs/component/complete.js +80 -0
  9. package/dist/commonjs/component/complete.js.map +1 -0
  10. package/dist/commonjs/component/kick.d.ts +1 -2
  11. package/dist/commonjs/component/kick.d.ts.map +1 -1
  12. package/dist/commonjs/component/kick.js +7 -5
  13. package/dist/commonjs/component/kick.js.map +1 -1
  14. package/dist/commonjs/component/lib.d.ts +3 -3
  15. package/dist/commonjs/component/lib.d.ts.map +1 -1
  16. package/dist/commonjs/component/lib.js +43 -20
  17. package/dist/commonjs/component/lib.js.map +1 -1
  18. package/dist/commonjs/component/logging.d.ts.map +1 -1
  19. package/dist/commonjs/component/logging.js +1 -2
  20. package/dist/commonjs/component/logging.js.map +1 -1
  21. package/dist/commonjs/component/loop.d.ts +1 -14
  22. package/dist/commonjs/component/loop.d.ts.map +1 -1
  23. package/dist/commonjs/component/loop.js +215 -178
  24. package/dist/commonjs/component/loop.js.map +1 -1
  25. package/dist/commonjs/component/recovery.d.ts +16 -0
  26. package/dist/commonjs/component/recovery.d.ts.map +1 -1
  27. package/dist/commonjs/component/recovery.js +64 -44
  28. package/dist/commonjs/component/recovery.js.map +1 -1
  29. package/dist/commonjs/component/schema.d.ts +6 -2
  30. package/dist/commonjs/component/schema.d.ts.map +1 -1
  31. package/dist/commonjs/component/schema.js +5 -3
  32. package/dist/commonjs/component/schema.js.map +1 -1
  33. package/dist/commonjs/component/shared.d.ts +20 -11
  34. package/dist/commonjs/component/shared.d.ts.map +1 -1
  35. package/dist/commonjs/component/shared.js +18 -5
  36. package/dist/commonjs/component/shared.js.map +1 -1
  37. package/dist/commonjs/component/stats.d.ts +21 -13
  38. package/dist/commonjs/component/stats.d.ts.map +1 -1
  39. package/dist/commonjs/component/stats.js +32 -22
  40. package/dist/commonjs/component/stats.js.map +1 -1
  41. package/dist/commonjs/component/worker.d.ts +2 -12
  42. package/dist/commonjs/component/worker.d.ts.map +1 -1
  43. package/dist/commonjs/component/worker.js +23 -36
  44. package/dist/commonjs/component/worker.js.map +1 -1
  45. package/dist/esm/client/index.d.ts +3 -3
  46. package/dist/esm/client/index.d.ts.map +1 -1
  47. package/dist/esm/client/index.js +10 -5
  48. package/dist/esm/client/index.js.map +1 -1
  49. package/dist/esm/component/complete.d.ts +89 -0
  50. package/dist/esm/component/complete.d.ts.map +1 -0
  51. package/dist/esm/component/complete.js +80 -0
  52. package/dist/esm/component/complete.js.map +1 -0
  53. package/dist/esm/component/kick.d.ts +1 -2
  54. package/dist/esm/component/kick.d.ts.map +1 -1
  55. package/dist/esm/component/kick.js +7 -5
  56. package/dist/esm/component/kick.js.map +1 -1
  57. package/dist/esm/component/lib.d.ts +3 -3
  58. package/dist/esm/component/lib.d.ts.map +1 -1
  59. package/dist/esm/component/lib.js +43 -20
  60. package/dist/esm/component/lib.js.map +1 -1
  61. package/dist/esm/component/logging.d.ts.map +1 -1
  62. package/dist/esm/component/logging.js +1 -2
  63. package/dist/esm/component/logging.js.map +1 -1
  64. package/dist/esm/component/loop.d.ts +1 -14
  65. package/dist/esm/component/loop.d.ts.map +1 -1
  66. package/dist/esm/component/loop.js +215 -178
  67. package/dist/esm/component/loop.js.map +1 -1
  68. package/dist/esm/component/recovery.d.ts +16 -0
  69. package/dist/esm/component/recovery.d.ts.map +1 -1
  70. package/dist/esm/component/recovery.js +64 -44
  71. package/dist/esm/component/recovery.js.map +1 -1
  72. package/dist/esm/component/schema.d.ts +6 -2
  73. package/dist/esm/component/schema.d.ts.map +1 -1
  74. package/dist/esm/component/schema.js +5 -3
  75. package/dist/esm/component/schema.js.map +1 -1
  76. package/dist/esm/component/shared.d.ts +20 -11
  77. package/dist/esm/component/shared.d.ts.map +1 -1
  78. package/dist/esm/component/shared.js +18 -5
  79. package/dist/esm/component/shared.js.map +1 -1
  80. package/dist/esm/component/stats.d.ts +21 -13
  81. package/dist/esm/component/stats.d.ts.map +1 -1
  82. package/dist/esm/component/stats.js +32 -22
  83. package/dist/esm/component/stats.js.map +1 -1
  84. package/dist/esm/component/worker.d.ts +2 -12
  85. package/dist/esm/component/worker.d.ts.map +1 -1
  86. package/dist/esm/component/worker.js +23 -36
  87. package/dist/esm/component/worker.js.map +1 -1
  88. package/package.json +7 -6
  89. package/src/client/index.ts +18 -8
  90. package/src/component/README.md +15 -15
  91. package/src/component/_generated/api.d.ts +7 -2
  92. package/src/component/complete.test.ts +508 -0
  93. package/src/component/complete.ts +98 -0
  94. package/src/component/kick.test.ts +13 -13
  95. package/src/component/kick.ts +13 -8
  96. package/src/component/lib.test.ts +262 -17
  97. package/src/component/lib.ts +55 -24
  98. package/src/component/logging.ts +1 -2
  99. package/src/component/loop.test.ts +1158 -0
  100. package/src/component/loop.ts +289 -221
  101. package/src/component/recovery.test.ts +541 -0
  102. package/src/component/recovery.ts +80 -63
  103. package/src/component/schema.ts +6 -4
  104. package/src/component/shared.ts +21 -6
  105. package/src/component/stats.ts +48 -25
  106. package/src/component/worker.ts +25 -38
@@ -1,9 +1,8 @@
1
1
  import { v } from "convex/values";
2
2
  import { internal } from "./_generated/api.js";
3
3
  import { internalMutation } from "./_generated/server.js";
4
- import { DEFAULT_MAX_PARALLELISM } from "./kick.js";
5
4
  import { createLogger, DEFAULT_LOG_LEVEL, } from "./logging.js";
6
- import { boundScheduledTime, currentSegment, fromSegment, nextSegment, runResult, toSegment, } from "./shared.js";
5
+ import { boundScheduledTime, currentSegment, DEFAULT_MAX_PARALLELISM, fromSegment, max, nextSegment, toSegment, } from "./shared.js";
7
6
  import { recordCompleted, recordReport, recordStarted } from "./stats.js";
8
7
  const CANCELLATION_BATCH_SIZE = 64; // the only queue that can get unbounded.
9
8
  const SECOND = 1000;
@@ -27,43 +26,44 @@ export const INITIAL_STATE = {
27
26
  };
28
27
  // There should only ever be at most one of these scheduled or running.
29
28
  export const main = internalMutation({
30
- args: {
31
- generation: v.int64(),
32
- segment: v.int64(),
33
- },
34
- handler: async (ctx, args) => {
29
+ args: { generation: v.int64(), segment: v.int64() },
30
+ handler: async (ctx, { generation, segment }) => {
35
31
  // State will be modified and patched at the end of the function.
36
32
  const state = await getOrCreateState(ctx);
37
- if (args.generation !== state.generation) {
38
- throw new Error(`generation mismatch: ${args.generation} !== ${state.generation}`);
33
+ if (generation !== state.generation) {
34
+ throw new Error(`generation mismatch: ${generation} !== ${state.generation}`);
39
35
  }
40
36
  state.generation++;
37
+ const runStatus = await getOrCreateRunningStatus(ctx);
38
+ if (runStatus.state.kind !== "running") {
39
+ await ctx.db.patch(runStatus._id, {
40
+ state: { kind: "running" },
41
+ });
42
+ }
41
43
  const globals = await getGlobals(ctx);
42
44
  const console = createLogger(globals.logLevel);
45
+ const delayMs = Date.now() - fromSegment(segment);
46
+ console.debug(`[main] generation ${generation} behind: ${delayMs}ms`);
43
47
  // Read pendingCompletions, including retry handling.
44
48
  console.time("[main] pendingCompletion");
45
- const done = await handleCompletions(ctx, state, args.segment, console);
49
+ const toCancel = await handleCompletions(ctx, state, segment, console);
46
50
  console.timeEnd("[main] pendingCompletion");
47
51
  // Read pendingCancelation, deleting from pendingStart. If it's still running, queue to cancel.
48
52
  console.time("[main] pendingCancelation");
49
- done.push(...(await handleCancelation(ctx, state, args.segment, console)));
53
+ await handleCancelation(ctx, state, segment, console, toCancel);
50
54
  console.timeEnd("[main] pendingCancelation");
51
55
  if (state.running.length === 0) {
52
56
  // If there's nothing active, reset lastRecovery.
53
- state.lastRecovery = args.segment;
57
+ state.lastRecovery = segment;
54
58
  }
55
- else if (args.segment - state.lastRecovery >= RECOVERY_PERIOD_SEGMENTS) {
59
+ else if (segment - state.lastRecovery >= RECOVERY_PERIOD_SEGMENTS) {
56
60
  // Otherwise schedule recovery for any old jobs.
57
- const oldEnoughToConsider = Date.now() - RECOVERY_THRESHOLD_MS;
58
- const jobs = state.running.filter((r) => r.started < oldEnoughToConsider);
59
- if (jobs.length) {
60
- await ctx.scheduler.runAfter(0, internal.recovery.recover, { jobs });
61
- }
62
- state.lastRecovery = args.segment;
61
+ await handleRecovery(ctx, state, console);
62
+ state.lastRecovery = segment;
63
63
  }
64
64
  // Read pendingStart up to max capacity. Update the config, and incomingSegmentCursor.
65
65
  console.time("[main] pendingStart");
66
- await handleStart(ctx, state, args.segment, console, globals);
66
+ await handleStart(ctx, state, segment, console, globals);
67
67
  console.timeEnd("[main] pendingStart");
68
68
  if (Date.now() - state.report.lastReportTs >= MINUTE) {
69
69
  // If minute rollover since last report, log report.
@@ -73,7 +73,7 @@ export const main = internalMutation({
73
73
  // It's been a while, let's start fresh.
74
74
  lastReportTs = Date.now();
75
75
  }
76
- console.info(recordReport(state));
76
+ recordReport(console, state);
77
77
  state.report = {
78
78
  completed: 0,
79
79
  succeeded: 0,
@@ -84,81 +84,61 @@ export const main = internalMutation({
84
84
  };
85
85
  }
86
86
  await ctx.db.replace(state._id, state);
87
- await ctx.scheduler.runAfter(0, internal.loop.complete, { done });
88
87
  await ctx.scheduler.runAfter(0, internal.loop.updateRunStatus, {
89
88
  generation: state.generation,
89
+ segment,
90
90
  });
91
- },
92
- });
93
- export const complete = internalMutation({
94
- args: {
95
- done: v.array(v.object({ runResult, workId: v.id("work") })),
96
- },
97
- handler: async (ctx, args) => {
98
- const globals = await getGlobals(ctx);
99
- const console = createLogger(globals.logLevel);
100
- await Promise.all(args.done.map(async ({ runResult, workId }) => {
101
- const work = await ctx.db.get(workId);
102
- if (!work) {
103
- console.warn(`[complete] ${workId} is done, but its work is gone`);
104
- return;
105
- }
106
- if (work.onComplete) {
107
- try {
108
- const handle = work.onComplete.fnHandle;
109
- await ctx.runMutation(handle, {
110
- workId: work._id,
111
- context: work.onComplete.context,
112
- result: runResult,
113
- });
114
- console.debug(`[complete] onComplete for ${workId} completed`);
115
- }
116
- catch (e) {
117
- console.error(`[complete] error running onComplete for ${workId}`, e);
118
- }
119
- }
120
- await ctx.db.delete(workId);
121
- }));
91
+ // TODO: if there were more cancellations, schedule main directly.
122
92
  },
123
93
  });
124
94
  export const updateRunStatus = internalMutation({
125
- args: { generation: v.int64() },
126
- handler: async (ctx, args) => {
95
+ args: { generation: v.int64(), segment: v.int64() },
96
+ handler: async (ctx, { generation, segment }) => {
127
97
  const globals = await getGlobals(ctx);
128
98
  const console = createLogger(globals.logLevel);
129
99
  const maxParallelism = globals.maxParallelism;
130
100
  const state = await getOrCreateState(ctx);
131
- if (args.generation !== state.generation) {
132
- throw new Error(`generation mismatch: ${args.generation} !== ${state.generation}`);
101
+ if (generation !== state.generation) {
102
+ throw new Error(`generation mismatch: ${generation} !== ${state.generation}`);
133
103
  }
134
104
  console.time("[updateRunStatus] outstandingCancelations");
135
- const thisSegment = currentSegment();
136
105
  const outstandingCancelations = await getNextUp(ctx, "pendingCancelation", {
137
106
  start: state.segmentCursors.cancelation,
138
- end: thisSegment,
107
+ end: segment,
139
108
  });
140
109
  console.timeEnd("[updateRunStatus] outstandingCancelations");
141
110
  if (outstandingCancelations) {
142
111
  await ctx.scheduler.runAfter(0, internal.loop.main, {
143
- generation: args.generation,
144
- segment: thisSegment,
112
+ generation,
113
+ segment,
145
114
  });
146
115
  return;
147
116
  }
117
+ // TODO: check for current segment (or from args) first, to avoid OCCs.
148
118
  console.time("[updateRunStatus] nextSegmentIsActionable");
149
- const [nextIsActionable, cursors] = await nextSegmentIsActionable(ctx, state, maxParallelism);
119
+ const next = max(segment + 1n, currentSegment());
120
+ const nextIsActionable = await nextSegmentIsActionable(ctx, state, maxParallelism, next);
150
121
  console.timeEnd("[updateRunStatus] nextSegmentIsActionable");
151
- const start = nextSegment();
152
122
  if (nextIsActionable) {
123
+ await ctx.scheduler.runAt(boundScheduledTime(fromSegment(next), console), internal.loop.main, {
124
+ generation,
125
+ segment: next,
126
+ });
127
+ return;
128
+ }
129
+ console.time("[updateRunStatus] oldSegmentIsActionable");
130
+ const [oldIsActionable, cursors] = await oldSegmentIsActionable(ctx, state, maxParallelism);
131
+ console.timeEnd("[updateRunStatus] oldSegmentIsActionable");
132
+ if (oldIsActionable) {
153
133
  await ctx.db.patch(state._id, {
154
134
  segmentCursors: {
155
135
  ...state.segmentCursors,
156
136
  ...cursors,
157
137
  },
158
138
  });
159
- await ctx.scheduler.runAt(fromSegment(start), internal.loop.main, {
160
- generation: args.generation,
161
- segment: start,
139
+ await ctx.scheduler.runAfter(0, internal.loop.main, {
140
+ generation,
141
+ segment: currentSegment(),
162
142
  });
163
143
  return;
164
144
  }
@@ -168,59 +148,66 @@ export const updateRunStatus = internalMutation({
168
148
  if (state.running.length < maxParallelism) {
169
149
  actionableTables.push("pendingStart");
170
150
  }
171
- const docs = await Promise.all(actionableTables.map(async (tableName) => getNextUp(ctx, tableName, { start })));
151
+ const docs = await Promise.all(actionableTables.map(async (tableName) => getNextUp(ctx, tableName, { start: next })));
172
152
  console.timeEnd("[updateRunStatus] findNextSegment");
173
- let segment = docs.map((d) => d?.segment).sort()[0];
153
+ let targetSegment = docs.map((d) => d?.segment).sort()[0];
174
154
  const runStatus = await getOrCreateRunningStatus(ctx);
175
155
  const saturated = state.running.length >= maxParallelism;
176
- if (segment || state.running.length > 0) {
156
+ if (targetSegment !== undefined || state.running.length > 0) {
177
157
  // If there's something to do, schedule for next actionable segment.
178
158
  // Or the next recovery, whichever comes first.
179
159
  const nextRecoverySegment = state.lastRecovery + RECOVERY_PERIOD_SEGMENTS;
180
- if (!segment || segment > nextRecoverySegment) {
181
- segment = nextRecoverySegment;
160
+ if (!targetSegment || targetSegment > nextRecoverySegment) {
161
+ targetSegment = nextRecoverySegment;
162
+ }
163
+ const scheduledId = await ctx.scheduler.runAt(boundScheduledTime(fromSegment(targetSegment), console), internal.loop.main, { generation, segment: targetSegment });
164
+ if (targetSegment > nextSegment()) {
165
+ await ctx.db.patch(runStatus._id, {
166
+ state: {
167
+ kind: "scheduled",
168
+ scheduledId,
169
+ saturated,
170
+ generation,
171
+ segment: targetSegment,
172
+ },
173
+ });
174
+ }
175
+ else {
176
+ console.debug(`[updateRunStatus] staying running because it's the next segment`);
182
177
  }
183
- const scheduledId = await ctx.scheduler.runAt(fromSegment(segment), internal.loop.main, { generation: args.generation, segment });
184
- await ctx.db.patch(runStatus._id, {
185
- state: {
186
- kind: "scheduled",
187
- scheduledId,
188
- saturated,
189
- generation: args.generation,
190
- segment,
191
- },
192
- });
193
178
  return;
194
179
  }
195
180
  // There seems to be nothing in the future to do, so go idle.
196
181
  await ctx.db.patch(runStatus._id, {
197
- state: { kind: "idle", generation: args.generation },
182
+ state: { kind: "idle", generation },
198
183
  });
199
184
  },
200
185
  });
201
- async function nextSegmentIsActionable(ctx, state, maxParallelism) {
202
- // First, try with our cursor range, up to next segment.
203
- const end = nextSegment();
186
+ async function nextSegmentIsActionable(ctx, state, maxParallelism, end) {
187
+ // First, try with our cursor range, up to end.
204
188
  if (await getNextUp(ctx, "pendingCancelation", {
205
189
  start: state.segmentCursors.cancelation,
206
190
  end,
207
191
  })) {
208
- return [true, {}];
192
+ return true;
209
193
  }
210
194
  if (await getNextUp(ctx, "pendingCompletion", {
211
195
  start: state.segmentCursors.completion,
212
196
  end,
213
197
  })) {
214
- return [true, {}];
198
+ return true;
215
199
  }
216
200
  if (state.running.length < maxParallelism) {
217
201
  if (await getNextUp(ctx, "pendingStart", {
218
202
  start: state.segmentCursors.incoming,
219
203
  end,
220
204
  })) {
221
- return [true, {}];
205
+ return true;
222
206
  }
223
207
  }
208
+ return false;
209
+ }
210
+ async function oldSegmentIsActionable(ctx, state, maxParallelism) {
224
211
  // Next, we look for out-of-order additions we may have missed.
225
212
  const oldCompletion = await getNextUp(ctx, "pendingCompletion", {
226
213
  end: state.segmentCursors.completion,
@@ -248,22 +235,20 @@ async function nextSegmentIsActionable(ctx, state, maxParallelism) {
248
235
  async function getNextUp(ctx, table, range) {
249
236
  return ctx.db
250
237
  .query(table)
251
- .withIndex("segment", (q) => range.start
252
- ? range.end
238
+ .withIndex("segment", (q) => range.start !== undefined
239
+ ? range.end !== undefined
253
240
  ? q
254
241
  .gte("segment", range.start - CURSOR_BUFFER_SEGMENTS)
255
242
  .lte("segment", range.end)
256
243
  : q.gt("segment", range.start - CURSOR_BUFFER_SEGMENTS)
257
- : range.end
244
+ : range.end !== undefined
258
245
  ? q.lt("segment", range.end)
259
246
  : q)
260
247
  .first();
261
248
  }
262
249
  /**
263
250
  * Handles the completion of pending completions.
264
-
265
- * Important: It should handle retries before cancelations are processed,
266
- * to allow retries to be canceled.
251
+ * This only processes work that succeeded or failed, not canceled.
267
252
  */
268
253
  async function handleCompletions(ctx, state, segment, console) {
269
254
  const startSegment = state.segmentCursors.completion - CURSOR_BUFFER_SEGMENTS;
@@ -273,109 +258,119 @@ async function handleCompletions(ctx, state, segment, console) {
273
258
  .query("pendingCompletion")
274
259
  .withIndex("segment", (q) => q.gte("segment", startSegment).lte("segment", segment))
275
260
  .collect();
276
- state.report.completed += completed.length;
277
261
  state.segmentCursors.completion = segment;
278
- const done = [];
262
+ // Completions that were going to be retried but have since been canceled.
263
+ const toCancel = [];
279
264
  await Promise.all(completed.map(async (c) => {
280
265
  await ctx.db.delete(c._id);
281
- const work = await ctx.db.get(c.workId);
282
- const maxAttempts = work?.retryBehavior?.maxAttempts;
283
- const pendingCancelations = await ctx.db
284
- .query("pendingCancelation")
285
- .withIndex("workId", (q) => q.eq("workId", c.workId))
286
- .collect();
287
- if (work && state.running.some((r) => r.workId === c.workId)) {
288
- if (c.runResult.kind === "failed" &&
289
- maxAttempts &&
290
- pendingCancelations.length === 0 &&
291
- work.attempts < maxAttempts) {
292
- await rescheduleJob(ctx, work, console);
266
+ const running = state.running.find((r) => r.workId === c.workId);
267
+ if (!running) {
268
+ console.error(`[main] completing ${c.workId} but it's not in "running"`);
269
+ return;
270
+ }
271
+ if (c.retry) {
272
+ // Only check for work if it's going to be retried.
273
+ const work = await ctx.db.get(c.workId);
274
+ if (!work) {
275
+ console.warn(`[main] ${c.workId} is gone, but trying to complete`);
276
+ return;
277
+ }
278
+ const retried = await rescheduleJob(ctx, work, console);
279
+ if (retried) {
293
280
  state.report.retries++;
281
+ recordCompleted(console, work, "retrying");
294
282
  }
295
283
  else {
296
- if (c.runResult.kind === "success") {
297
- state.report.succeeded++;
298
- }
299
- else if (c.runResult.kind === "failed") {
300
- state.report.failed++;
301
- }
302
- // Ensure there aren't any pending cancelations for this work.
303
- for (const pendingCancelation of pendingCancelations) {
304
- await ctx.db.delete(pendingCancelation._id);
305
- }
306
- done.push(c);
284
+ // We don't retry if it's been canceled in the mean time.
285
+ state.report.canceled++;
286
+ toCancel.push({
287
+ workId: c.workId,
288
+ runResult: { kind: "canceled" },
289
+ attempt: work.attempts,
290
+ });
307
291
  }
308
- console.info(recordCompleted(work, c.runResult.kind));
309
- }
310
- else if (work) {
311
- console.warn(`[main] completing ${c.workId} but it's not in "running"`);
312
292
  }
313
293
  else {
314
- console.warn(`[main] completing ${c.workId} but it's not found`);
294
+ if (c.runResult.kind === "success") {
295
+ state.report.succeeded++;
296
+ }
297
+ else if (c.runResult.kind === "failed") {
298
+ state.report.failed++;
299
+ }
315
300
  }
316
301
  }));
317
- console.debug(`[main] completing ${done.length}`);
302
+ // We do this after so the stats above know if it was in progress.
303
+ const before = state.running.length;
318
304
  state.running = state.running.filter((r) => !completed.some((c) => c.workId === r.workId));
319
- return done.map((c) => ({ runResult: c.runResult, workId: c.workId }));
305
+ const numCompleted = before - state.running.length;
306
+ state.report.completed += numCompleted;
307
+ console.debug(`[main] completed ${numCompleted} work`);
308
+ return toCancel;
320
309
  }
321
- async function rescheduleJob(ctx, work, console) {
322
- if (!work.retryBehavior) {
323
- throw new Error("work has no retryBehavior");
324
- }
325
- const backoffMs = work.retryBehavior.initialBackoffMs *
326
- Math.pow(work.retryBehavior.base, work.attempts - 1);
327
- const nextAttempt = withJitter(backoffMs);
328
- const startTime = boundScheduledTime(Date.now() + nextAttempt, console);
329
- const segment = toSegment(startTime);
330
- await ctx.db.patch(work._id, {
331
- attempts: work.attempts + 1,
332
- });
333
- await ctx.db.insert("pendingStart", {
334
- workId: work._id,
335
- segment,
336
- });
337
- return nextAttempt;
338
- }
339
- export function withJitter(delay) {
340
- return delay * (0.5 + Math.random());
341
- }
342
- async function handleCancelation(ctx, state, segment, console) {
310
+ async function handleCancelation(ctx, state, segment, console, toCancel) {
343
311
  const start = state.segmentCursors.cancelation - CURSOR_BUFFER_SEGMENTS;
344
312
  const canceled = await ctx.db
345
313
  .query("pendingCancelation")
346
314
  .withIndex("segment", (q) => q.gte("segment", start).lte("segment", segment))
347
315
  .take(CANCELLATION_BATCH_SIZE);
348
316
  state.segmentCursors.cancelation = canceled.at(-1)?.segment ?? segment;
349
- console.debug(`[main] attempting to cancel ${canceled.length}`);
317
+ if (canceled.length) {
318
+ console.debug(`[main] attempting to cancel ${canceled.length}`);
319
+ }
350
320
  const canceledWork = new Set();
351
- await Promise.all(canceled.map(async ({ _id, workId }) => {
321
+ const runResult = { kind: "canceled" };
322
+ const jobs = toCancel.concat(...(await Promise.all(canceled.map(async ({ _id, _creationTime, workId }) => {
352
323
  await ctx.db.delete(_id);
324
+ if (canceledWork.has(workId)) {
325
+ // We shouldn't have multiple pending cancelations for the same work.
326
+ console.error(`[main] ${workId} already canceled`);
327
+ return null;
328
+ }
353
329
  const work = await ctx.db.get(workId);
354
330
  if (!work) {
355
- console.warn(`[handleCancelation] ${workId} is gone`);
356
- return;
331
+ console.warn(`[main] ${workId} is gone, but trying to cancel`);
332
+ return null;
357
333
  }
358
334
  // Ensure it doesn't retry.
359
- await ctx.db.patch(workId, { retryBehavior: undefined });
335
+ await ctx.db.patch(workId, { canceled: true });
360
336
  // Ensure it doesn't start.
361
337
  const pendingStart = await ctx.db
362
338
  .query("pendingStart")
363
339
  .withIndex("workId", (q) => q.eq("workId", workId))
364
340
  .unique();
365
341
  if (pendingStart && !canceledWork.has(workId)) {
366
- console.info(recordCompleted(work, "canceled"));
367
342
  state.report.canceled++;
368
343
  await ctx.db.delete(pendingStart._id);
369
344
  canceledWork.add(workId);
345
+ return { workId, runResult, attempt: work.attempts };
370
346
  }
371
- }));
372
- return Array.from(canceledWork).map((id) => ({
373
- runResult: { kind: "canceled" },
374
- workId: id,
375
- }));
347
+ return null;
348
+ }))).flatMap((r) => (r ? [r] : [])));
349
+ if (jobs.length) {
350
+ await ctx.scheduler.runAfter(0, internal.complete.complete, { jobs });
351
+ }
352
+ }
353
+ async function handleRecovery(ctx, state, console) {
354
+ const missing = new Set();
355
+ const oldEnoughToConsider = Date.now() - RECOVERY_THRESHOLD_MS;
356
+ const jobs = (await Promise.all(state.running.map(async (r) => {
357
+ if (r.started >= oldEnoughToConsider) {
358
+ return null;
359
+ }
360
+ const work = await ctx.db.get(r.workId);
361
+ if (!work) {
362
+ missing.add(r.workId);
363
+ console.error(`[main] ${r.workId} already gone (skipping recovery)`);
364
+ return null;
365
+ }
366
+ return { ...r, attempt: work.attempts };
367
+ }))).flatMap((r) => (r ? [r] : []));
368
+ state.running = state.running.filter((r) => !missing.has(r.workId));
369
+ if (jobs.length) {
370
+ await ctx.scheduler.runAfter(0, internal.recovery.recover, { jobs });
371
+ }
376
372
  }
377
- async function handleStart(ctx, state, segment, console, globals) {
378
- const maxParallelism = globals.maxParallelism;
373
+ async function handleStart(ctx, state, segment, console, { maxParallelism, logLevel }) {
379
374
  // Schedule as many as needed to reach maxParallelism.
380
375
  const toSchedule = maxParallelism - state.running.length;
381
376
  const pending = await ctx.db
@@ -387,39 +382,81 @@ async function handleStart(ctx, state, segment, console, globals) {
387
382
  state.segmentCursors.incoming = pending.at(-1)?.segment ?? segment;
388
383
  console.debug(`[main] scheduling ${pending.length} pending work`);
389
384
  // Start new work.
390
- state.running.push(...(await Promise.all(pending.map(async ({ _id, workId }) => {
391
- const scheduledId = await beginWork(ctx, workId, globals.logLevel);
385
+ state.running.push(...(await Promise.all(pending.map(async ({ _id, workId, segment }) => {
386
+ if (state.running.some((r) => r.workId === workId)) {
387
+ console.error(`[main] ${workId} already running (skipping start)`);
388
+ return null;
389
+ }
390
+ const lagMs = Date.now() - fromSegment(segment);
391
+ const scheduledId = await beginWork(ctx, workId, logLevel, lagMs);
392
392
  await ctx.db.delete(_id);
393
393
  return { scheduledId, workId, started: Date.now() };
394
- }))));
394
+ }))).flatMap((r) => (r ? [r] : [])));
395
395
  }
396
- async function beginWork(ctx, workId, logLevel) {
396
+ async function beginWork(ctx, workId, logLevel, lagMs) {
397
397
  const console = createLogger(logLevel);
398
398
  const work = await ctx.db.get(workId);
399
399
  if (!work) {
400
400
  throw new Error("work not found");
401
401
  }
402
- console.info(recordStarted(work));
402
+ recordStarted(console, work, lagMs);
403
+ const { attempts: attempt, fnHandle, fnArgs } = work;
404
+ const args = { workId, fnHandle, fnArgs, logLevel, attempt };
403
405
  if (work.fnType === "action") {
404
- return await ctx.scheduler.runAfter(0, internal.worker.runActionWrapper, {
405
- workId: work._id,
406
- fnHandle: work.fnHandle,
407
- fnArgs: work.fnArgs,
408
- logLevel,
409
- });
406
+ return ctx.scheduler.runAfter(0, internal.worker.runActionWrapper, args);
410
407
  }
411
408
  else if (work.fnType === "mutation") {
412
- return await ctx.scheduler.runAfter(0, internal.worker.runMutationWrapper, {
413
- workId: work._id,
414
- fnHandle: work.fnHandle,
415
- fnArgs: work.fnArgs,
416
- logLevel,
417
- });
409
+ return ctx.scheduler.runAfter(0, internal.worker.runMutationWrapper, args);
418
410
  }
419
411
  else {
420
412
  throw new Error(`Unexpected fnType ${work.fnType}`);
421
413
  }
422
414
  }
415
+ /**
416
+ * Reschedules a job for retry.
417
+ * If it's been canceled in the mean time, don't retry.
418
+ * @returns true if the job was rescheduled, false if it was not.
419
+ */
420
+ async function rescheduleJob(ctx, work, console) {
421
+ const pendingCancelation = await ctx.db
422
+ .query("pendingCancelation")
423
+ .withIndex("workId", (q) => q.eq("workId", work._id))
424
+ .unique();
425
+ if (pendingCancelation) {
426
+ // If there's an un-processed cancelation request, don't retry.
427
+ console.warn(`[main] ${work._id} in pendingCancelation so not retrying`);
428
+ return false;
429
+ }
430
+ if (work.canceled) {
431
+ return false;
432
+ }
433
+ if (!work.retryBehavior) {
434
+ console.warn(`[main] ${work._id} has no retryBehavior so not retrying`);
435
+ return false;
436
+ }
437
+ const existing = await ctx.db
438
+ .query("pendingStart")
439
+ .withIndex("workId", (q) => q.eq("workId", work._id))
440
+ .first();
441
+ if (existing) {
442
+ // Not sure why this would ever happen, but ensure uniqueness explicitly.
443
+ console.error(`[main] ${work._id} already in pendingStart so not retrying`);
444
+ return false;
445
+ }
446
+ const backoffMs = work.retryBehavior.initialBackoffMs *
447
+ Math.pow(work.retryBehavior.base, work.attempts - 1);
448
+ const nextAttempt = withJitter(backoffMs);
449
+ const startTime = boundScheduledTime(Date.now() + nextAttempt, console);
450
+ const segment = toSegment(startTime);
451
+ await ctx.db.insert("pendingStart", {
452
+ workId: work._id,
453
+ segment,
454
+ });
455
+ return true;
456
+ }
457
+ export function withJitter(delay) {
458
+ return delay * (0.5 + Math.random());
459
+ }
423
460
  async function getGlobals(ctx) {
424
461
  const globals = await ctx.db.query("globals").unique();
425
462
  if (!globals) {