@nicnocquee/dataqueue 1.25.0 → 1.26.0-beta.20260223202259

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. package/ai/build-docs-content.ts +96 -0
  2. package/ai/build-llms-full.ts +42 -0
  3. package/ai/docs-content.json +284 -0
  4. package/ai/rules/advanced.md +150 -0
  5. package/ai/rules/basic.md +159 -0
  6. package/ai/rules/react-dashboard.md +83 -0
  7. package/ai/skills/dataqueue-advanced/SKILL.md +370 -0
  8. package/ai/skills/dataqueue-core/SKILL.md +234 -0
  9. package/ai/skills/dataqueue-react/SKILL.md +189 -0
  10. package/dist/cli.cjs +1149 -14
  11. package/dist/cli.cjs.map +1 -1
  12. package/dist/cli.d.cts +66 -1
  13. package/dist/cli.d.ts +66 -1
  14. package/dist/cli.js +1146 -13
  15. package/dist/cli.js.map +1 -1
  16. package/dist/index.cjs +3236 -1237
  17. package/dist/index.cjs.map +1 -1
  18. package/dist/index.d.cts +697 -23
  19. package/dist/index.d.ts +697 -23
  20. package/dist/index.js +3235 -1238
  21. package/dist/index.js.map +1 -1
  22. package/dist/mcp-server.cjs +186 -0
  23. package/dist/mcp-server.cjs.map +1 -0
  24. package/dist/mcp-server.d.cts +32 -0
  25. package/dist/mcp-server.d.ts +32 -0
  26. package/dist/mcp-server.js +175 -0
  27. package/dist/mcp-server.js.map +1 -0
  28. package/migrations/1781200000004_create_cron_schedules_table.sql +33 -0
  29. package/migrations/1781200000005_add_retry_config_to_job_queue.sql +17 -0
  30. package/package.json +24 -21
  31. package/src/backend.ts +170 -5
  32. package/src/backends/postgres.ts +992 -63
  33. package/src/backends/redis-scripts.ts +358 -26
  34. package/src/backends/redis.test.ts +1532 -0
  35. package/src/backends/redis.ts +993 -35
  36. package/src/cli.test.ts +82 -6
  37. package/src/cli.ts +73 -10
  38. package/src/cron.test.ts +126 -0
  39. package/src/cron.ts +40 -0
  40. package/src/db-util.ts +1 -1
  41. package/src/index.test.ts +1034 -11
  42. package/src/index.ts +267 -39
  43. package/src/init-command.test.ts +449 -0
  44. package/src/init-command.ts +709 -0
  45. package/src/install-mcp-command.test.ts +216 -0
  46. package/src/install-mcp-command.ts +185 -0
  47. package/src/install-rules-command.test.ts +218 -0
  48. package/src/install-rules-command.ts +233 -0
  49. package/src/install-skills-command.test.ts +176 -0
  50. package/src/install-skills-command.ts +124 -0
  51. package/src/mcp-server.test.ts +162 -0
  52. package/src/mcp-server.ts +231 -0
  53. package/src/processor.ts +104 -113
  54. package/src/queue.test.ts +465 -0
  55. package/src/queue.ts +34 -252
  56. package/src/supervisor.test.ts +340 -0
  57. package/src/supervisor.ts +177 -0
  58. package/src/types.ts +476 -12
  59. package/LICENSE +0 -21
@@ -7,10 +7,60 @@ import {
7
7
  JobEventType,
8
8
  TagQueryMode,
9
9
  JobType,
10
+ CronScheduleRecord,
11
+ CronScheduleStatus,
12
+ EditCronScheduleOptions,
13
+ WaitpointRecord,
14
+ CreateTokenOptions,
15
+ AddJobOptions,
16
+ DatabaseClient,
10
17
  } from '../types.js';
11
- import { QueueBackend, JobFilters, JobUpdates } from '../backend.js';
18
+ import { randomUUID } from 'crypto';
19
+ import {
20
+ QueueBackend,
21
+ JobFilters,
22
+ JobUpdates,
23
+ CronScheduleInput,
24
+ } from '../backend.js';
12
25
  import { log } from '../log-context.js';
13
26
 
27
+ const MAX_TIMEOUT_MS = 365 * 24 * 60 * 60 * 1000;
28
+
29
+ /** Parse a timeout string like '10m', '1h', '24h', '7d' into milliseconds. */
30
+ function parseTimeoutString(timeout: string): number {
31
+ const match = timeout.match(/^(\d+)(s|m|h|d)$/);
32
+ if (!match) {
33
+ throw new Error(
34
+ `Invalid timeout format: "${timeout}". Expected format like "10m", "1h", "24h", "7d".`,
35
+ );
36
+ }
37
+ const value = parseInt(match[1], 10);
38
+ const unit = match[2];
39
+ let ms: number;
40
+ switch (unit) {
41
+ case 's':
42
+ ms = value * 1000;
43
+ break;
44
+ case 'm':
45
+ ms = value * 60 * 1000;
46
+ break;
47
+ case 'h':
48
+ ms = value * 60 * 60 * 1000;
49
+ break;
50
+ case 'd':
51
+ ms = value * 24 * 60 * 60 * 1000;
52
+ break;
53
+ default:
54
+ throw new Error(`Unknown timeout unit: "${unit}"`);
55
+ }
56
+ if (!Number.isFinite(ms) || ms > MAX_TIMEOUT_MS) {
57
+ throw new Error(
58
+ `Timeout value "${timeout}" is too large. Maximum allowed is 365 days.`,
59
+ );
60
+ }
61
+ return ms;
62
+ }
63
+
14
64
  export class PostgresBackend implements QueueBackend {
15
65
  constructor(private pool: Pool) {}
16
66
 
@@ -55,18 +105,34 @@ export class PostgresBackend implements QueueBackend {
55
105
 
56
106
  // ── Job CRUD ──────────────────────────────────────────────────────────
57
107
 
58
- async addJob<PayloadMap, T extends JobType<PayloadMap>>({
59
- jobType,
60
- payload,
61
- maxAttempts = 3,
62
- priority = 0,
63
- runAt = null,
64
- timeoutMs = undefined,
65
- forceKillOnTimeout = false,
66
- tags = undefined,
67
- idempotencyKey = undefined,
68
- }: JobOptions<PayloadMap, T>): Promise<number> {
69
- const client = await this.pool.connect();
108
+ /**
109
+ * Add a job and return its numeric ID.
110
+ *
111
+ * @param job - Job configuration.
112
+ * @param options - Optional. Pass `{ db }` to run the INSERT on an external
113
+ * client (e.g., inside a transaction) so the job is part of the caller's
114
+ * transaction. The event INSERT also uses the same client.
115
+ */
116
+ async addJob<PayloadMap, T extends JobType<PayloadMap>>(
117
+ {
118
+ jobType,
119
+ payload,
120
+ maxAttempts = 3,
121
+ priority = 0,
122
+ runAt = null,
123
+ timeoutMs = undefined,
124
+ forceKillOnTimeout = false,
125
+ tags = undefined,
126
+ idempotencyKey = undefined,
127
+ retryDelay = undefined,
128
+ retryBackoff = undefined,
129
+ retryDelayMax = undefined,
130
+ }: JobOptions<PayloadMap, T>,
131
+ options?: AddJobOptions,
132
+ ): Promise<number> {
133
+ const externalClient = options?.db;
134
+ const client: DatabaseClient =
135
+ externalClient ?? (await this.pool.connect());
70
136
  try {
71
137
  let result;
72
138
  const onConflict = idempotencyKey
@@ -76,8 +142,8 @@ export class PostgresBackend implements QueueBackend {
76
142
  if (runAt) {
77
143
  result = await client.query(
78
144
  `INSERT INTO job_queue
79
- (job_type, payload, max_attempts, priority, run_at, timeout_ms, force_kill_on_timeout, tags, idempotency_key)
80
- VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9)
145
+ (job_type, payload, max_attempts, priority, run_at, timeout_ms, force_kill_on_timeout, tags, idempotency_key, retry_delay, retry_backoff, retry_delay_max)
146
+ VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12)
81
147
  ${onConflict}
82
148
  RETURNING id`,
83
149
  [
@@ -90,13 +156,16 @@ export class PostgresBackend implements QueueBackend {
90
156
  forceKillOnTimeout ?? false,
91
157
  tags ?? null,
92
158
  idempotencyKey ?? null,
159
+ retryDelay ?? null,
160
+ retryBackoff ?? null,
161
+ retryDelayMax ?? null,
93
162
  ],
94
163
  );
95
164
  } else {
96
165
  result = await client.query(
97
166
  `INSERT INTO job_queue
98
- (job_type, payload, max_attempts, priority, timeout_ms, force_kill_on_timeout, tags, idempotency_key)
99
- VALUES ($1, $2, $3, $4, $5, $6, $7, $8)
167
+ (job_type, payload, max_attempts, priority, timeout_ms, force_kill_on_timeout, tags, idempotency_key, retry_delay, retry_backoff, retry_delay_max)
168
+ VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11)
100
169
  ${onConflict}
101
170
  RETURNING id`,
102
171
  [
@@ -108,11 +177,13 @@ export class PostgresBackend implements QueueBackend {
108
177
  forceKillOnTimeout ?? false,
109
178
  tags ?? null,
110
179
  idempotencyKey ?? null,
180
+ retryDelay ?? null,
181
+ retryBackoff ?? null,
182
+ retryDelayMax ?? null,
111
183
  ],
112
184
  );
113
185
  }
114
186
 
115
- // If ON CONFLICT DO NOTHING was triggered, no rows are returned.
116
187
  if (result.rows.length === 0 && idempotencyKey) {
117
188
  const existing = await client.query(
118
189
  `SELECT id FROM job_queue WHERE idempotency_key = $1`,
@@ -133,18 +204,217 @@ export class PostgresBackend implements QueueBackend {
133
204
  log(
134
205
  `Added job ${jobId}: payload ${JSON.stringify(payload)}, ${runAt ? `runAt ${runAt.toISOString()}, ` : ''}priority ${priority}, maxAttempts ${maxAttempts}, jobType ${jobType}, tags ${JSON.stringify(tags)}${idempotencyKey ? `, idempotencyKey "${idempotencyKey}"` : ''}`,
135
206
  );
136
- await this.recordJobEvent(jobId, JobEventType.Added, {
137
- jobType,
138
- payload,
139
- tags,
140
- idempotencyKey,
141
- });
207
+
208
+ if (externalClient) {
209
+ try {
210
+ await client.query(
211
+ `INSERT INTO job_events (job_id, event_type, metadata) VALUES ($1, $2, $3)`,
212
+ [
213
+ jobId,
214
+ JobEventType.Added,
215
+ JSON.stringify({ jobType, payload, tags, idempotencyKey }),
216
+ ],
217
+ );
218
+ } catch (error) {
219
+ log(`Error recording job event for job ${jobId}: ${error}`);
220
+ }
221
+ } else {
222
+ await this.recordJobEvent(jobId, JobEventType.Added, {
223
+ jobType,
224
+ payload,
225
+ tags,
226
+ idempotencyKey,
227
+ });
228
+ }
142
229
  return jobId;
143
230
  } catch (error) {
144
231
  log(`Error adding job: ${error}`);
145
232
  throw error;
146
233
  } finally {
147
- client.release();
234
+ if (!externalClient) (client as any).release();
235
+ }
236
+ }
237
+
238
+ /**
239
+ * Insert multiple jobs in a single database round-trip.
240
+ *
241
+ * Uses a multi-row INSERT with ON CONFLICT handling for idempotency keys.
242
+ * Returns IDs in the same order as the input array.
243
+ */
244
+ async addJobs<PayloadMap, T extends JobType<PayloadMap>>(
245
+ jobs: JobOptions<PayloadMap, T>[],
246
+ options?: AddJobOptions,
247
+ ): Promise<number[]> {
248
+ if (jobs.length === 0) return [];
249
+
250
+ const externalClient = options?.db;
251
+ const client: DatabaseClient =
252
+ externalClient ?? (await this.pool.connect());
253
+ try {
254
+ const COLS_PER_JOB = 12;
255
+ const valueClauses: string[] = [];
256
+ const params: any[] = [];
257
+
258
+ const hasAnyIdempotencyKey = jobs.some((j) => j.idempotencyKey);
259
+
260
+ for (let i = 0; i < jobs.length; i++) {
261
+ const {
262
+ jobType,
263
+ payload,
264
+ maxAttempts = 3,
265
+ priority = 0,
266
+ runAt = null,
267
+ timeoutMs = undefined,
268
+ forceKillOnTimeout = false,
269
+ tags = undefined,
270
+ idempotencyKey = undefined,
271
+ retryDelay = undefined,
272
+ retryBackoff = undefined,
273
+ retryDelayMax = undefined,
274
+ } = jobs[i];
275
+
276
+ const base = i * COLS_PER_JOB;
277
+ valueClauses.push(
278
+ `($${base + 1}, $${base + 2}, $${base + 3}, $${base + 4}, ` +
279
+ `COALESCE($${base + 5}::timestamptz, CURRENT_TIMESTAMP), ` +
280
+ `$${base + 6}, $${base + 7}, $${base + 8}, $${base + 9}, ` +
281
+ `$${base + 10}, $${base + 11}, $${base + 12})`,
282
+ );
283
+ params.push(
284
+ jobType,
285
+ payload,
286
+ maxAttempts,
287
+ priority,
288
+ runAt,
289
+ timeoutMs ?? null,
290
+ forceKillOnTimeout ?? false,
291
+ tags ?? null,
292
+ idempotencyKey ?? null,
293
+ retryDelay ?? null,
294
+ retryBackoff ?? null,
295
+ retryDelayMax ?? null,
296
+ );
297
+ }
298
+
299
+ const onConflict = hasAnyIdempotencyKey
300
+ ? `ON CONFLICT (idempotency_key) WHERE idempotency_key IS NOT NULL DO NOTHING`
301
+ : '';
302
+
303
+ const result = await client.query(
304
+ `INSERT INTO job_queue
305
+ (job_type, payload, max_attempts, priority, run_at, timeout_ms, force_kill_on_timeout, tags, idempotency_key, retry_delay, retry_backoff, retry_delay_max)
306
+ VALUES ${valueClauses.join(', ')}
307
+ ${onConflict}
308
+ RETURNING id, idempotency_key`,
309
+ params,
310
+ );
311
+
312
+ // Build a map of idempotency_key -> id from returned rows
313
+ const returnedKeyToId = new Map<string, number>();
314
+ const returnedNullKeyIds: number[] = [];
315
+ for (const row of result.rows) {
316
+ if (row.idempotency_key != null) {
317
+ returnedKeyToId.set(row.idempotency_key, row.id);
318
+ } else {
319
+ returnedNullKeyIds.push(row.id);
320
+ }
321
+ }
322
+
323
+ // Identify idempotency keys that conflicted (not in RETURNING)
324
+ const missingKeys: string[] = [];
325
+ for (const job of jobs) {
326
+ if (job.idempotencyKey && !returnedKeyToId.has(job.idempotencyKey)) {
327
+ missingKeys.push(job.idempotencyKey);
328
+ }
329
+ }
330
+
331
+ // Batch-fetch existing IDs for conflicted keys
332
+ if (missingKeys.length > 0) {
333
+ const existing = await client.query(
334
+ `SELECT id, idempotency_key FROM job_queue WHERE idempotency_key = ANY($1)`,
335
+ [missingKeys],
336
+ );
337
+ for (const row of existing.rows) {
338
+ returnedKeyToId.set(row.idempotency_key, row.id);
339
+ }
340
+ }
341
+
342
+ // Assemble result array in input order
343
+ let nullKeyIdx = 0;
344
+ const ids: number[] = [];
345
+ for (const job of jobs) {
346
+ if (job.idempotencyKey) {
347
+ const id = returnedKeyToId.get(job.idempotencyKey);
348
+ if (id === undefined) {
349
+ throw new Error(
350
+ `Failed to resolve job ID for idempotency key "${job.idempotencyKey}"`,
351
+ );
352
+ }
353
+ ids.push(id);
354
+ } else {
355
+ ids.push(returnedNullKeyIds[nullKeyIdx++]);
356
+ }
357
+ }
358
+
359
+ log(`Batch-inserted ${jobs.length} jobs, IDs: [${ids.join(', ')}]`);
360
+
361
+ // Record 'added' events — only for newly inserted jobs
362
+ const newJobEvents: {
363
+ jobId: number;
364
+ eventType: JobEventType;
365
+ metadata?: any;
366
+ }[] = [];
367
+ for (let i = 0; i < jobs.length; i++) {
368
+ const job = jobs[i];
369
+ const wasInserted =
370
+ !job.idempotencyKey || !missingKeys.includes(job.idempotencyKey);
371
+ if (wasInserted) {
372
+ newJobEvents.push({
373
+ jobId: ids[i],
374
+ eventType: JobEventType.Added,
375
+ metadata: {
376
+ jobType: job.jobType,
377
+ payload: job.payload,
378
+ tags: job.tags,
379
+ idempotencyKey: job.idempotencyKey,
380
+ },
381
+ });
382
+ }
383
+ }
384
+
385
+ if (newJobEvents.length > 0) {
386
+ if (externalClient) {
387
+ // Record events on the same transaction client
388
+ const evtValues: string[] = [];
389
+ const evtParams: any[] = [];
390
+ let evtIdx = 1;
391
+ for (const evt of newJobEvents) {
392
+ evtValues.push(`($${evtIdx++}, $${evtIdx++}, $${evtIdx++})`);
393
+ evtParams.push(
394
+ evt.jobId,
395
+ evt.eventType,
396
+ evt.metadata ? JSON.stringify(evt.metadata) : null,
397
+ );
398
+ }
399
+ try {
400
+ await client.query(
401
+ `INSERT INTO job_events (job_id, event_type, metadata) VALUES ${evtValues.join(', ')}`,
402
+ evtParams,
403
+ );
404
+ } catch (error) {
405
+ log(`Error recording batch job events: ${error}`);
406
+ }
407
+ } else {
408
+ await this.recordJobEventsBatch(newJobEvents);
409
+ }
410
+ }
411
+
412
+ return ids;
413
+ } catch (error) {
414
+ log(`Error batch-inserting jobs: ${error}`);
415
+ throw error;
416
+ } finally {
417
+ if (!externalClient) (client as any).release();
148
418
  }
149
419
  }
150
420
 
@@ -154,7 +424,7 @@ export class PostgresBackend implements QueueBackend {
154
424
  const client = await this.pool.connect();
155
425
  try {
156
426
  const result = await client.query(
157
- `SELECT id, job_type AS "jobType", payload, status, max_attempts AS "maxAttempts", attempts, priority, run_at AS "runAt", timeout_ms AS "timeoutMs", force_kill_on_timeout AS "forceKillOnTimeout", created_at AS "createdAt", updated_at AS "updatedAt", started_at AS "startedAt", completed_at AS "completedAt", last_failed_at AS "lastFailedAt", locked_at AS "lockedAt", locked_by AS "lockedBy", error_history AS "errorHistory", failure_reason AS "failureReason", next_attempt_at AS "nextAttemptAt", last_failed_at AS "lastFailedAt", last_retried_at AS "lastRetriedAt", last_cancelled_at AS "lastCancelledAt", pending_reason AS "pendingReason", tags, idempotency_key AS "idempotencyKey", wait_until AS "waitUntil", wait_token_id AS "waitTokenId", step_data AS "stepData", progress FROM job_queue WHERE id = $1`,
427
+ `SELECT id, job_type AS "jobType", payload, status, max_attempts AS "maxAttempts", attempts, priority, run_at AS "runAt", timeout_ms AS "timeoutMs", force_kill_on_timeout AS "forceKillOnTimeout", created_at AS "createdAt", updated_at AS "updatedAt", started_at AS "startedAt", completed_at AS "completedAt", last_failed_at AS "lastFailedAt", locked_at AS "lockedAt", locked_by AS "lockedBy", error_history AS "errorHistory", failure_reason AS "failureReason", next_attempt_at AS "nextAttemptAt", last_failed_at AS "lastFailedAt", last_retried_at AS "lastRetriedAt", last_cancelled_at AS "lastCancelledAt", pending_reason AS "pendingReason", tags, idempotency_key AS "idempotencyKey", wait_until AS "waitUntil", wait_token_id AS "waitTokenId", step_data AS "stepData", progress, retry_delay AS "retryDelay", retry_backoff AS "retryBackoff", retry_delay_max AS "retryDelayMax" FROM job_queue WHERE id = $1`,
158
428
  [id],
159
429
  );
160
430
 
@@ -188,7 +458,7 @@ export class PostgresBackend implements QueueBackend {
188
458
  const client = await this.pool.connect();
189
459
  try {
190
460
  const result = await client.query(
191
- `SELECT id, job_type AS "jobType", payload, status, max_attempts AS "maxAttempts", attempts, priority, run_at AS "runAt", timeout_ms AS "timeoutMs", force_kill_on_timeout AS "forceKillOnTimeout", created_at AS "createdAt", updated_at AS "updatedAt", started_at AS "startedAt", completed_at AS "completedAt", last_failed_at AS "lastFailedAt", locked_at AS "lockedAt", locked_by AS "lockedBy", error_history AS "errorHistory", failure_reason AS "failureReason", next_attempt_at AS "nextAttemptAt", last_failed_at AS "lastFailedAt", last_retried_at AS "lastRetriedAt", last_cancelled_at AS "lastCancelledAt", pending_reason AS "pendingReason", idempotency_key AS "idempotencyKey", wait_until AS "waitUntil", wait_token_id AS "waitTokenId", step_data AS "stepData", progress FROM job_queue WHERE status = $1 ORDER BY created_at DESC LIMIT $2 OFFSET $3`,
461
+ `SELECT id, job_type AS "jobType", payload, status, max_attempts AS "maxAttempts", attempts, priority, run_at AS "runAt", timeout_ms AS "timeoutMs", force_kill_on_timeout AS "forceKillOnTimeout", created_at AS "createdAt", updated_at AS "updatedAt", started_at AS "startedAt", completed_at AS "completedAt", last_failed_at AS "lastFailedAt", locked_at AS "lockedAt", locked_by AS "lockedBy", error_history AS "errorHistory", failure_reason AS "failureReason", next_attempt_at AS "nextAttemptAt", last_failed_at AS "lastFailedAt", last_retried_at AS "lastRetriedAt", last_cancelled_at AS "lastCancelledAt", pending_reason AS "pendingReason", idempotency_key AS "idempotencyKey", wait_until AS "waitUntil", wait_token_id AS "waitTokenId", step_data AS "stepData", progress, retry_delay AS "retryDelay", retry_backoff AS "retryBackoff", retry_delay_max AS "retryDelayMax" FROM job_queue WHERE status = $1 ORDER BY created_at DESC LIMIT $2 OFFSET $3`,
192
462
  [status, limit, offset],
193
463
  );
194
464
  log(`Found ${result.rows.length} jobs by status ${status}`);
@@ -214,7 +484,7 @@ export class PostgresBackend implements QueueBackend {
214
484
  const client = await this.pool.connect();
215
485
  try {
216
486
  const result = await client.query(
217
- `SELECT id, job_type AS "jobType", payload, status, max_attempts AS "maxAttempts", attempts, priority, run_at AS "runAt", timeout_ms AS "timeoutMs", force_kill_on_timeout AS "forceKillOnTimeout", created_at AS "createdAt", updated_at AS "updatedAt", started_at AS "startedAt", completed_at AS "completedAt", last_failed_at AS "lastFailedAt", locked_at AS "lockedAt", locked_by AS "lockedBy", error_history AS "errorHistory", failure_reason AS "failureReason", next_attempt_at AS "nextAttemptAt", last_failed_at AS "lastFailedAt", last_retried_at AS "lastRetriedAt", last_cancelled_at AS "lastCancelledAt", pending_reason AS "pendingReason", idempotency_key AS "idempotencyKey", wait_until AS "waitUntil", wait_token_id AS "waitTokenId", step_data AS "stepData", progress FROM job_queue ORDER BY created_at DESC LIMIT $1 OFFSET $2`,
487
+ `SELECT id, job_type AS "jobType", payload, status, max_attempts AS "maxAttempts", attempts, priority, run_at AS "runAt", timeout_ms AS "timeoutMs", force_kill_on_timeout AS "forceKillOnTimeout", created_at AS "createdAt", updated_at AS "updatedAt", started_at AS "startedAt", completed_at AS "completedAt", last_failed_at AS "lastFailedAt", locked_at AS "lockedAt", locked_by AS "lockedBy", error_history AS "errorHistory", failure_reason AS "failureReason", next_attempt_at AS "nextAttemptAt", last_failed_at AS "lastFailedAt", last_retried_at AS "lastRetriedAt", last_cancelled_at AS "lastCancelledAt", pending_reason AS "pendingReason", idempotency_key AS "idempotencyKey", wait_until AS "waitUntil", wait_token_id AS "waitTokenId", step_data AS "stepData", progress, retry_delay AS "retryDelay", retry_backoff AS "retryBackoff", retry_delay_max AS "retryDelayMax" FROM job_queue ORDER BY created_at DESC LIMIT $1 OFFSET $2`,
218
488
  [limit, offset],
219
489
  );
220
490
  log(`Found ${result.rows.length} jobs (all)`);
@@ -239,7 +509,7 @@ export class PostgresBackend implements QueueBackend {
239
509
  ): Promise<JobRecord<PayloadMap, T>[]> {
240
510
  const client = await this.pool.connect();
241
511
  try {
242
- let query = `SELECT id, job_type AS "jobType", payload, status, max_attempts AS "maxAttempts", attempts, priority, run_at AS "runAt", timeout_ms AS "timeoutMs", force_kill_on_timeout AS "forceKillOnTimeout", created_at AS "createdAt", updated_at AS "updatedAt", started_at AS "startedAt", completed_at AS "completedAt", last_failed_at AS "lastFailedAt", locked_at AS "lockedAt", locked_by AS "lockedBy", error_history AS "errorHistory", failure_reason AS "failureReason", next_attempt_at AS "nextAttemptAt", last_failed_at AS "lastFailedAt", last_retried_at AS "lastRetriedAt", last_cancelled_at AS "lastCancelledAt", pending_reason AS "pendingReason", tags, idempotency_key AS "idempotencyKey", wait_until AS "waitUntil", wait_token_id AS "waitTokenId", step_data AS "stepData", progress FROM job_queue`;
512
+ let query = `SELECT id, job_type AS "jobType", payload, status, max_attempts AS "maxAttempts", attempts, priority, run_at AS "runAt", timeout_ms AS "timeoutMs", force_kill_on_timeout AS "forceKillOnTimeout", created_at AS "createdAt", updated_at AS "updatedAt", started_at AS "startedAt", completed_at AS "completedAt", last_failed_at AS "lastFailedAt", locked_at AS "lockedAt", locked_by AS "lockedBy", error_history AS "errorHistory", failure_reason AS "failureReason", next_attempt_at AS "nextAttemptAt", last_failed_at AS "lastFailedAt", last_retried_at AS "lastRetriedAt", last_cancelled_at AS "lastCancelledAt", pending_reason AS "pendingReason", tags, idempotency_key AS "idempotencyKey", wait_until AS "waitUntil", wait_token_id AS "waitTokenId", step_data AS "stepData", progress, retry_delay AS "retryDelay", retry_backoff AS "retryBackoff", retry_delay_max AS "retryDelayMax" FROM job_queue`;
243
513
  const params: any[] = [];
244
514
  const where: string[] = [];
245
515
  let paramIdx = 1;
@@ -366,7 +636,7 @@ export class PostgresBackend implements QueueBackend {
366
636
  ): Promise<JobRecord<PayloadMap, T>[]> {
367
637
  const client = await this.pool.connect();
368
638
  try {
369
- let query = `SELECT id, job_type AS "jobType", payload, status, max_attempts AS "maxAttempts", attempts, priority, run_at AS "runAt", timeout_ms AS "timeoutMs", created_at AS "createdAt", updated_at AS "updatedAt", started_at AS "startedAt", completed_at AS "completedAt", last_failed_at AS "lastFailedAt", locked_at AS "lockedAt", locked_by AS "lockedBy", error_history AS "errorHistory", failure_reason AS "failureReason", next_attempt_at AS "nextAttemptAt", last_failed_at AS "lastFailedAt", last_retried_at AS "lastRetriedAt", last_cancelled_at AS "lastCancelledAt", pending_reason AS "pendingReason", tags, idempotency_key AS "idempotencyKey", wait_until AS "waitUntil", wait_token_id AS "waitTokenId", step_data AS "stepData", progress
639
+ let query = `SELECT id, job_type AS "jobType", payload, status, max_attempts AS "maxAttempts", attempts, priority, run_at AS "runAt", timeout_ms AS "timeoutMs", created_at AS "createdAt", updated_at AS "updatedAt", started_at AS "startedAt", completed_at AS "completedAt", last_failed_at AS "lastFailedAt", locked_at AS "lockedAt", locked_by AS "lockedBy", error_history AS "errorHistory", failure_reason AS "failureReason", next_attempt_at AS "nextAttemptAt", last_failed_at AS "lastFailedAt", last_retried_at AS "lastRetriedAt", last_cancelled_at AS "lastCancelledAt", pending_reason AS "pendingReason", tags, idempotency_key AS "idempotencyKey", wait_until AS "waitUntil", wait_token_id AS "waitTokenId", step_data AS "stepData", progress, retry_delay AS "retryDelay", retry_backoff AS "retryBackoff", retry_delay_max AS "retryDelayMax"
370
640
  FROM job_queue`;
371
641
  let params: any[] = [];
372
642
  switch (mode) {
@@ -468,7 +738,7 @@ export class PostgresBackend implements QueueBackend {
468
738
  LIMIT $2
469
739
  FOR UPDATE SKIP LOCKED
470
740
  )
471
- RETURNING id, job_type AS "jobType", payload, status, max_attempts AS "maxAttempts", attempts, priority, run_at AS "runAt", timeout_ms AS "timeoutMs", force_kill_on_timeout AS "forceKillOnTimeout", created_at AS "createdAt", updated_at AS "updatedAt", started_at AS "startedAt", completed_at AS "completedAt", last_failed_at AS "lastFailedAt", locked_at AS "lockedAt", locked_by AS "lockedBy", error_history AS "errorHistory", failure_reason AS "failureReason", next_attempt_at AS "nextAttemptAt", last_retried_at AS "lastRetriedAt", last_cancelled_at AS "lastCancelledAt", pending_reason AS "pendingReason", idempotency_key AS "idempotencyKey", wait_until AS "waitUntil", wait_token_id AS "waitTokenId", step_data AS "stepData", progress
741
+ RETURNING id, job_type AS "jobType", payload, status, max_attempts AS "maxAttempts", attempts, priority, run_at AS "runAt", timeout_ms AS "timeoutMs", force_kill_on_timeout AS "forceKillOnTimeout", created_at AS "createdAt", updated_at AS "updatedAt", started_at AS "startedAt", completed_at AS "completedAt", last_failed_at AS "lastFailedAt", locked_at AS "lockedAt", locked_by AS "lockedBy", error_history AS "errorHistory", failure_reason AS "failureReason", next_attempt_at AS "nextAttemptAt", last_retried_at AS "lastRetriedAt", last_cancelled_at AS "lastCancelledAt", pending_reason AS "pendingReason", idempotency_key AS "idempotencyKey", wait_until AS "waitUntil", wait_token_id AS "waitTokenId", step_data AS "stepData", progress, retry_delay AS "retryDelay", retry_backoff AS "retryBackoff", retry_delay_max AS "retryDelayMax"
472
742
  `,
473
743
  params,
474
744
  );
@@ -540,9 +810,17 @@ export class PostgresBackend implements QueueBackend {
540
810
  UPDATE job_queue
541
811
  SET status = 'failed',
542
812
  updated_at = NOW(),
543
- next_attempt_at = CASE
544
- WHEN attempts < max_attempts THEN NOW() + (POWER(2, attempts) * INTERVAL '1 minute')
545
- ELSE NULL
813
+ next_attempt_at = CASE
814
+ WHEN attempts >= max_attempts THEN NULL
815
+ WHEN retry_delay IS NULL AND retry_backoff IS NULL AND retry_delay_max IS NULL
816
+ THEN NOW() + (POWER(2, attempts) * INTERVAL '1 minute')
817
+ WHEN COALESCE(retry_backoff, true) = true
818
+ THEN NOW() + (LEAST(
819
+ COALESCE(retry_delay_max, 2147483647),
820
+ COALESCE(retry_delay, 60) * POWER(2, attempts)
821
+ ) * (0.5 + 0.5 * random()) * INTERVAL '1 second')
822
+ ELSE
823
+ NOW() + (COALESCE(retry_delay, 60) * INTERVAL '1 second')
546
824
  END,
547
825
  error_history = COALESCE(error_history, '[]'::jsonb) || $2::jsonb,
548
826
  failure_reason = $3,
@@ -795,6 +1073,18 @@ export class PostgresBackend implements QueueBackend {
795
1073
  updateFields.push(`tags = $${paramIdx++}`);
796
1074
  params.push(updates.tags ?? null);
797
1075
  }
1076
+ if (updates.retryDelay !== undefined) {
1077
+ updateFields.push(`retry_delay = $${paramIdx++}`);
1078
+ params.push(updates.retryDelay ?? null);
1079
+ }
1080
+ if (updates.retryBackoff !== undefined) {
1081
+ updateFields.push(`retry_backoff = $${paramIdx++}`);
1082
+ params.push(updates.retryBackoff ?? null);
1083
+ }
1084
+ if (updates.retryDelayMax !== undefined) {
1085
+ updateFields.push(`retry_delay_max = $${paramIdx++}`);
1086
+ params.push(updates.retryDelayMax ?? null);
1087
+ }
798
1088
 
799
1089
  if (updateFields.length === 0) {
800
1090
  log(`No fields to update for job ${jobId}`);
@@ -821,6 +1111,12 @@ export class PostgresBackend implements QueueBackend {
821
1111
  if (updates.timeoutMs !== undefined)
822
1112
  metadata.timeoutMs = updates.timeoutMs;
823
1113
  if (updates.tags !== undefined) metadata.tags = updates.tags;
1114
+ if (updates.retryDelay !== undefined)
1115
+ metadata.retryDelay = updates.retryDelay;
1116
+ if (updates.retryBackoff !== undefined)
1117
+ metadata.retryBackoff = updates.retryBackoff;
1118
+ if (updates.retryDelayMax !== undefined)
1119
+ metadata.retryDelayMax = updates.retryDelayMax;
824
1120
 
825
1121
  await this.recordJobEvent(jobId, JobEventType.Edited, metadata);
826
1122
  log(`Edited job ${jobId}: ${JSON.stringify(metadata)}`);
@@ -870,6 +1166,18 @@ export class PostgresBackend implements QueueBackend {
870
1166
  updateFields.push(`tags = $${paramIdx++}`);
871
1167
  params.push(updates.tags ?? null);
872
1168
  }
1169
+ if (updates.retryDelay !== undefined) {
1170
+ updateFields.push(`retry_delay = $${paramIdx++}`);
1171
+ params.push(updates.retryDelay ?? null);
1172
+ }
1173
+ if (updates.retryBackoff !== undefined) {
1174
+ updateFields.push(`retry_backoff = $${paramIdx++}`);
1175
+ params.push(updates.retryBackoff ?? null);
1176
+ }
1177
+ if (updates.retryDelayMax !== undefined) {
1178
+ updateFields.push(`retry_delay_max = $${paramIdx++}`);
1179
+ params.push(updates.retryDelayMax ?? null);
1180
+ }
873
1181
 
874
1182
  if (updateFields.length === 0) {
875
1183
  log(`No fields to update for batch edit`);
@@ -979,46 +1287,93 @@ export class PostgresBackend implements QueueBackend {
979
1287
  }
980
1288
  }
981
1289
 
982
- async cleanupOldJobs(daysToKeep = 30): Promise<number> {
983
- const client = await this.pool.connect();
1290
+ /**
1291
+ * Delete completed jobs older than the given number of days.
1292
+ * Deletes in batches of 1000 to avoid long-running transactions
1293
+ * and excessive WAL bloat at scale.
1294
+ *
1295
+ * @param daysToKeep - Number of days to retain completed jobs (default 30).
1296
+ * @param batchSize - Number of rows to delete per batch (default 1000).
1297
+ * @returns Total number of deleted jobs.
1298
+ */
1299
+ async cleanupOldJobs(daysToKeep = 30, batchSize = 1000): Promise<number> {
1300
+ let totalDeleted = 0;
1301
+
984
1302
  try {
985
- const result = await client.query(
986
- `
987
- DELETE FROM job_queue
988
- WHERE status = 'completed'
989
- AND updated_at < NOW() - INTERVAL '1 day' * $1::int
990
- RETURNING id
991
- `,
992
- [daysToKeep],
993
- );
994
- log(`Deleted ${result.rowCount} old jobs`);
995
- return result.rowCount || 0;
1303
+ let deletedInBatch: number;
1304
+ do {
1305
+ const client = await this.pool.connect();
1306
+ try {
1307
+ const result = await client.query(
1308
+ `
1309
+ DELETE FROM job_queue
1310
+ WHERE id IN (
1311
+ SELECT id FROM job_queue
1312
+ WHERE status = 'completed'
1313
+ AND updated_at < NOW() - INTERVAL '1 day' * $1::int
1314
+ LIMIT $2
1315
+ )
1316
+ `,
1317
+ [daysToKeep, batchSize],
1318
+ );
1319
+ deletedInBatch = result.rowCount || 0;
1320
+ totalDeleted += deletedInBatch;
1321
+ } finally {
1322
+ client.release();
1323
+ }
1324
+ } while (deletedInBatch === batchSize);
1325
+
1326
+ log(`Deleted ${totalDeleted} old jobs`);
1327
+ return totalDeleted;
996
1328
  } catch (error) {
997
1329
  log(`Error cleaning up old jobs: ${error}`);
998
1330
  throw error;
999
- } finally {
1000
- client.release();
1001
1331
  }
1002
1332
  }
1003
1333
 
1004
- async cleanupOldJobEvents(daysToKeep = 30): Promise<number> {
1005
- const client = await this.pool.connect();
1334
+ /**
1335
+ * Delete job events older than the given number of days.
1336
+ * Deletes in batches of 1000 to avoid long-running transactions
1337
+ * and excessive WAL bloat at scale.
1338
+ *
1339
+ * @param daysToKeep - Number of days to retain events (default 30).
1340
+ * @param batchSize - Number of rows to delete per batch (default 1000).
1341
+ * @returns Total number of deleted events.
1342
+ */
1343
+ async cleanupOldJobEvents(
1344
+ daysToKeep = 30,
1345
+ batchSize = 1000,
1346
+ ): Promise<number> {
1347
+ let totalDeleted = 0;
1348
+
1006
1349
  try {
1007
- const result = await client.query(
1008
- `
1009
- DELETE FROM job_events
1010
- WHERE created_at < NOW() - INTERVAL '1 day' * $1::int
1011
- RETURNING id
1012
- `,
1013
- [daysToKeep],
1014
- );
1015
- log(`Deleted ${result.rowCount} old job events`);
1016
- return result.rowCount || 0;
1350
+ let deletedInBatch: number;
1351
+ do {
1352
+ const client = await this.pool.connect();
1353
+ try {
1354
+ const result = await client.query(
1355
+ `
1356
+ DELETE FROM job_events
1357
+ WHERE id IN (
1358
+ SELECT id FROM job_events
1359
+ WHERE created_at < NOW() - INTERVAL '1 day' * $1::int
1360
+ LIMIT $2
1361
+ )
1362
+ `,
1363
+ [daysToKeep, batchSize],
1364
+ );
1365
+ deletedInBatch = result.rowCount || 0;
1366
+ totalDeleted += deletedInBatch;
1367
+ } finally {
1368
+ client.release();
1369
+ }
1370
+ } while (deletedInBatch === batchSize);
1371
+
1372
+ log(`Deleted ${totalDeleted} old job events`);
1373
+ return totalDeleted;
1017
1374
  } catch (error) {
1018
1375
  log(`Error cleaning up old job events: ${error}`);
1019
1376
  throw error;
1020
- } finally {
1021
- client.release();
1022
1377
  }
1023
1378
  }
1024
1379
 
@@ -1083,6 +1438,580 @@ export class PostgresBackend implements QueueBackend {
1083
1438
  }
1084
1439
  }
1085
1440
 
1441
+ // ── Cron schedules ──────────────────────────────────────────────────
1442
+
1443
+ /** Create a cron schedule and return its ID. */
1444
+ async addCronSchedule(input: CronScheduleInput): Promise<number> {
1445
+ const client = await this.pool.connect();
1446
+ try {
1447
+ const result = await client.query(
1448
+ `INSERT INTO cron_schedules
1449
+ (schedule_name, cron_expression, job_type, payload, max_attempts,
1450
+ priority, timeout_ms, force_kill_on_timeout, tags, timezone,
1451
+ allow_overlap, next_run_at, retry_delay, retry_backoff, retry_delay_max)
1452
+ VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15)
1453
+ RETURNING id`,
1454
+ [
1455
+ input.scheduleName,
1456
+ input.cronExpression,
1457
+ input.jobType,
1458
+ input.payload,
1459
+ input.maxAttempts,
1460
+ input.priority,
1461
+ input.timeoutMs,
1462
+ input.forceKillOnTimeout,
1463
+ input.tags ?? null,
1464
+ input.timezone,
1465
+ input.allowOverlap,
1466
+ input.nextRunAt,
1467
+ input.retryDelay,
1468
+ input.retryBackoff,
1469
+ input.retryDelayMax,
1470
+ ],
1471
+ );
1472
+ const id = result.rows[0].id;
1473
+ log(`Added cron schedule ${id}: "${input.scheduleName}"`);
1474
+ return id;
1475
+ } catch (error: any) {
1476
+ // Unique constraint violation on schedule_name
1477
+ if (error?.code === '23505') {
1478
+ throw new Error(
1479
+ `Cron schedule with name "${input.scheduleName}" already exists`,
1480
+ );
1481
+ }
1482
+ log(`Error adding cron schedule: ${error}`);
1483
+ throw error;
1484
+ } finally {
1485
+ client.release();
1486
+ }
1487
+ }
1488
+
1489
+ /** Get a cron schedule by ID. */
1490
+ async getCronSchedule(id: number): Promise<CronScheduleRecord | null> {
1491
+ const client = await this.pool.connect();
1492
+ try {
1493
+ const result = await client.query(
1494
+ `SELECT id, schedule_name AS "scheduleName", cron_expression AS "cronExpression",
1495
+ job_type AS "jobType", payload, max_attempts AS "maxAttempts",
1496
+ priority, timeout_ms AS "timeoutMs",
1497
+ force_kill_on_timeout AS "forceKillOnTimeout", tags,
1498
+ timezone, allow_overlap AS "allowOverlap", status,
1499
+ last_enqueued_at AS "lastEnqueuedAt", last_job_id AS "lastJobId",
1500
+ next_run_at AS "nextRunAt",
1501
+ created_at AS "createdAt", updated_at AS "updatedAt",
1502
+ retry_delay AS "retryDelay", retry_backoff AS "retryBackoff",
1503
+ retry_delay_max AS "retryDelayMax"
1504
+ FROM cron_schedules WHERE id = $1`,
1505
+ [id],
1506
+ );
1507
+ if (result.rows.length === 0) return null;
1508
+ return result.rows[0] as CronScheduleRecord;
1509
+ } catch (error) {
1510
+ log(`Error getting cron schedule ${id}: ${error}`);
1511
+ throw error;
1512
+ } finally {
1513
+ client.release();
1514
+ }
1515
+ }
1516
+
1517
+ /** Get a cron schedule by its unique name. */
1518
+ async getCronScheduleByName(
1519
+ name: string,
1520
+ ): Promise<CronScheduleRecord | null> {
1521
+ const client = await this.pool.connect();
1522
+ try {
1523
+ const result = await client.query(
1524
+ `SELECT id, schedule_name AS "scheduleName", cron_expression AS "cronExpression",
1525
+ job_type AS "jobType", payload, max_attempts AS "maxAttempts",
1526
+ priority, timeout_ms AS "timeoutMs",
1527
+ force_kill_on_timeout AS "forceKillOnTimeout", tags,
1528
+ timezone, allow_overlap AS "allowOverlap", status,
1529
+ last_enqueued_at AS "lastEnqueuedAt", last_job_id AS "lastJobId",
1530
+ next_run_at AS "nextRunAt",
1531
+ created_at AS "createdAt", updated_at AS "updatedAt",
1532
+ retry_delay AS "retryDelay", retry_backoff AS "retryBackoff",
1533
+ retry_delay_max AS "retryDelayMax"
1534
+ FROM cron_schedules WHERE schedule_name = $1`,
1535
+ [name],
1536
+ );
1537
+ if (result.rows.length === 0) return null;
1538
+ return result.rows[0] as CronScheduleRecord;
1539
+ } catch (error) {
1540
+ log(`Error getting cron schedule by name "${name}": ${error}`);
1541
+ throw error;
1542
+ } finally {
1543
+ client.release();
1544
+ }
1545
+ }
1546
+
1547
+ /** List cron schedules, optionally filtered by status. */
1548
+ async listCronSchedules(
1549
+ status?: CronScheduleStatus,
1550
+ ): Promise<CronScheduleRecord[]> {
1551
+ const client = await this.pool.connect();
1552
+ try {
1553
+ let query = `SELECT id, schedule_name AS "scheduleName", cron_expression AS "cronExpression",
1554
+ job_type AS "jobType", payload, max_attempts AS "maxAttempts",
1555
+ priority, timeout_ms AS "timeoutMs",
1556
+ force_kill_on_timeout AS "forceKillOnTimeout", tags,
1557
+ timezone, allow_overlap AS "allowOverlap", status,
1558
+ last_enqueued_at AS "lastEnqueuedAt", last_job_id AS "lastJobId",
1559
+ next_run_at AS "nextRunAt",
1560
+ created_at AS "createdAt", updated_at AS "updatedAt",
1561
+ retry_delay AS "retryDelay", retry_backoff AS "retryBackoff",
1562
+ retry_delay_max AS "retryDelayMax"
1563
+ FROM cron_schedules`;
1564
+ const params: any[] = [];
1565
+ if (status) {
1566
+ query += ` WHERE status = $1`;
1567
+ params.push(status);
1568
+ }
1569
+ query += ` ORDER BY created_at ASC`;
1570
+ const result = await client.query(query, params);
1571
+ return result.rows as CronScheduleRecord[];
1572
+ } catch (error) {
1573
+ log(`Error listing cron schedules: ${error}`);
1574
+ throw error;
1575
+ } finally {
1576
+ client.release();
1577
+ }
1578
+ }
1579
+
1580
+ /** Delete a cron schedule by ID. */
1581
+ async removeCronSchedule(id: number): Promise<void> {
1582
+ const client = await this.pool.connect();
1583
+ try {
1584
+ await client.query(`DELETE FROM cron_schedules WHERE id = $1`, [id]);
1585
+ log(`Removed cron schedule ${id}`);
1586
+ } catch (error) {
1587
+ log(`Error removing cron schedule ${id}: ${error}`);
1588
+ throw error;
1589
+ } finally {
1590
+ client.release();
1591
+ }
1592
+ }
1593
+
1594
+ /** Pause a cron schedule. */
1595
+ async pauseCronSchedule(id: number): Promise<void> {
1596
+ const client = await this.pool.connect();
1597
+ try {
1598
+ await client.query(
1599
+ `UPDATE cron_schedules SET status = 'paused', updated_at = NOW() WHERE id = $1`,
1600
+ [id],
1601
+ );
1602
+ log(`Paused cron schedule ${id}`);
1603
+ } catch (error) {
1604
+ log(`Error pausing cron schedule ${id}: ${error}`);
1605
+ throw error;
1606
+ } finally {
1607
+ client.release();
1608
+ }
1609
+ }
1610
+
1611
+ /** Resume a paused cron schedule. */
1612
+ async resumeCronSchedule(id: number): Promise<void> {
1613
+ const client = await this.pool.connect();
1614
+ try {
1615
+ await client.query(
1616
+ `UPDATE cron_schedules SET status = 'active', updated_at = NOW() WHERE id = $1`,
1617
+ [id],
1618
+ );
1619
+ log(`Resumed cron schedule ${id}`);
1620
+ } catch (error) {
1621
+ log(`Error resuming cron schedule ${id}: ${error}`);
1622
+ throw error;
1623
+ } finally {
1624
+ client.release();
1625
+ }
1626
+ }
1627
+
1628
+ /** Edit a cron schedule. */
1629
+ async editCronSchedule(
1630
+ id: number,
1631
+ updates: EditCronScheduleOptions,
1632
+ nextRunAt?: Date | null,
1633
+ ): Promise<void> {
1634
+ const client = await this.pool.connect();
1635
+ try {
1636
+ const updateFields: string[] = [];
1637
+ const params: any[] = [];
1638
+ let paramIdx = 1;
1639
+
1640
+ if (updates.cronExpression !== undefined) {
1641
+ updateFields.push(`cron_expression = $${paramIdx++}`);
1642
+ params.push(updates.cronExpression);
1643
+ }
1644
+ if (updates.payload !== undefined) {
1645
+ updateFields.push(`payload = $${paramIdx++}`);
1646
+ params.push(updates.payload);
1647
+ }
1648
+ if (updates.maxAttempts !== undefined) {
1649
+ updateFields.push(`max_attempts = $${paramIdx++}`);
1650
+ params.push(updates.maxAttempts);
1651
+ }
1652
+ if (updates.priority !== undefined) {
1653
+ updateFields.push(`priority = $${paramIdx++}`);
1654
+ params.push(updates.priority);
1655
+ }
1656
+ if (updates.timeoutMs !== undefined) {
1657
+ updateFields.push(`timeout_ms = $${paramIdx++}`);
1658
+ params.push(updates.timeoutMs);
1659
+ }
1660
+ if (updates.forceKillOnTimeout !== undefined) {
1661
+ updateFields.push(`force_kill_on_timeout = $${paramIdx++}`);
1662
+ params.push(updates.forceKillOnTimeout);
1663
+ }
1664
+ if (updates.tags !== undefined) {
1665
+ updateFields.push(`tags = $${paramIdx++}`);
1666
+ params.push(updates.tags);
1667
+ }
1668
+ if (updates.timezone !== undefined) {
1669
+ updateFields.push(`timezone = $${paramIdx++}`);
1670
+ params.push(updates.timezone);
1671
+ }
1672
+ if (updates.allowOverlap !== undefined) {
1673
+ updateFields.push(`allow_overlap = $${paramIdx++}`);
1674
+ params.push(updates.allowOverlap);
1675
+ }
1676
+ if (updates.retryDelay !== undefined) {
1677
+ updateFields.push(`retry_delay = $${paramIdx++}`);
1678
+ params.push(updates.retryDelay);
1679
+ }
1680
+ if (updates.retryBackoff !== undefined) {
1681
+ updateFields.push(`retry_backoff = $${paramIdx++}`);
1682
+ params.push(updates.retryBackoff);
1683
+ }
1684
+ if (updates.retryDelayMax !== undefined) {
1685
+ updateFields.push(`retry_delay_max = $${paramIdx++}`);
1686
+ params.push(updates.retryDelayMax);
1687
+ }
1688
+ if (nextRunAt !== undefined) {
1689
+ updateFields.push(`next_run_at = $${paramIdx++}`);
1690
+ params.push(nextRunAt);
1691
+ }
1692
+
1693
+ if (updateFields.length === 0) {
1694
+ log(`No fields to update for cron schedule ${id}`);
1695
+ return;
1696
+ }
1697
+
1698
+ updateFields.push(`updated_at = NOW()`);
1699
+ params.push(id);
1700
+
1701
+ const query = `UPDATE cron_schedules SET ${updateFields.join(', ')} WHERE id = $${paramIdx}`;
1702
+ await client.query(query, params);
1703
+ log(`Edited cron schedule ${id}`);
1704
+ } catch (error) {
1705
+ log(`Error editing cron schedule ${id}: ${error}`);
1706
+ throw error;
1707
+ } finally {
1708
+ client.release();
1709
+ }
1710
+ }
1711
+
1712
+ /**
1713
+ * Atomically fetch all active cron schedules whose nextRunAt <= NOW().
1714
+ * Uses FOR UPDATE SKIP LOCKED to prevent duplicate enqueuing across workers.
1715
+ */
1716
+ async getDueCronSchedules(): Promise<CronScheduleRecord[]> {
1717
+ const client = await this.pool.connect();
1718
+ try {
1719
+ const result = await client.query(
1720
+ `SELECT id, schedule_name AS "scheduleName", cron_expression AS "cronExpression",
1721
+ job_type AS "jobType", payload, max_attempts AS "maxAttempts",
1722
+ priority, timeout_ms AS "timeoutMs",
1723
+ force_kill_on_timeout AS "forceKillOnTimeout", tags,
1724
+ timezone, allow_overlap AS "allowOverlap", status,
1725
+ last_enqueued_at AS "lastEnqueuedAt", last_job_id AS "lastJobId",
1726
+ next_run_at AS "nextRunAt",
1727
+ created_at AS "createdAt", updated_at AS "updatedAt",
1728
+ retry_delay AS "retryDelay", retry_backoff AS "retryBackoff",
1729
+ retry_delay_max AS "retryDelayMax"
1730
+ FROM cron_schedules
1731
+ WHERE status = 'active'
1732
+ AND next_run_at IS NOT NULL
1733
+ AND next_run_at <= NOW()
1734
+ ORDER BY next_run_at ASC
1735
+ FOR UPDATE SKIP LOCKED`,
1736
+ );
1737
+ log(`Found ${result.rows.length} due cron schedules`);
1738
+ return result.rows as CronScheduleRecord[];
1739
+ } catch (error: any) {
1740
+ // 42P01 = undefined_table — cron migration hasn't been run yet
1741
+ if (error?.code === '42P01') {
1742
+ log('cron_schedules table does not exist, skipping cron enqueue');
1743
+ return [];
1744
+ }
1745
+ log(`Error getting due cron schedules: ${error}`);
1746
+ throw error;
1747
+ } finally {
1748
+ client.release();
1749
+ }
1750
+ }
1751
+
1752
+ /**
1753
+ * Update a cron schedule after a job has been enqueued.
1754
+ * Sets lastEnqueuedAt, lastJobId, and advances nextRunAt.
1755
+ */
1756
+ async updateCronScheduleAfterEnqueue(
1757
+ id: number,
1758
+ lastEnqueuedAt: Date,
1759
+ lastJobId: number,
1760
+ nextRunAt: Date | null,
1761
+ ): Promise<void> {
1762
+ const client = await this.pool.connect();
1763
+ try {
1764
+ await client.query(
1765
+ `UPDATE cron_schedules
1766
+ SET last_enqueued_at = $2,
1767
+ last_job_id = $3,
1768
+ next_run_at = $4,
1769
+ updated_at = NOW()
1770
+ WHERE id = $1`,
1771
+ [id, lastEnqueuedAt, lastJobId, nextRunAt],
1772
+ );
1773
+ log(
1774
+ `Updated cron schedule ${id}: lastJobId=${lastJobId}, nextRunAt=${nextRunAt?.toISOString() ?? 'null'}`,
1775
+ );
1776
+ } catch (error) {
1777
+ log(`Error updating cron schedule ${id} after enqueue: ${error}`);
1778
+ throw error;
1779
+ } finally {
1780
+ client.release();
1781
+ }
1782
+ }
1783
+
1784
+ // ── Wait / step-data support ────────────────────────────────────────
1785
+
1786
+ /**
1787
+ * Transition a job from 'processing' to 'waiting' status.
1788
+ * Persists step data so the handler can resume from where it left off.
1789
+ *
1790
+ * @param jobId - The job to pause.
1791
+ * @param options - Wait configuration including optional waitUntil date, token ID, and step data.
1792
+ */
1793
+ async waitJob(
1794
+ jobId: number,
1795
+ options: {
1796
+ waitUntil?: Date;
1797
+ waitTokenId?: string;
1798
+ stepData: Record<string, any>;
1799
+ },
1800
+ ): Promise<void> {
1801
+ const client = await this.pool.connect();
1802
+ try {
1803
+ const result = await client.query(
1804
+ `
1805
+ UPDATE job_queue
1806
+ SET status = 'waiting',
1807
+ wait_until = $2,
1808
+ wait_token_id = $3,
1809
+ step_data = $4,
1810
+ locked_at = NULL,
1811
+ locked_by = NULL,
1812
+ updated_at = NOW()
1813
+ WHERE id = $1 AND status = 'processing'
1814
+ `,
1815
+ [
1816
+ jobId,
1817
+ options.waitUntil ?? null,
1818
+ options.waitTokenId ?? null,
1819
+ JSON.stringify(options.stepData),
1820
+ ],
1821
+ );
1822
+ if (result.rowCount === 0) {
1823
+ log(
1824
+ `Job ${jobId} could not be set to waiting (may have been reclaimed or is no longer processing)`,
1825
+ );
1826
+ return;
1827
+ }
1828
+ await this.recordJobEvent(jobId, JobEventType.Waiting, {
1829
+ waitUntil: options.waitUntil?.toISOString() ?? null,
1830
+ waitTokenId: options.waitTokenId ?? null,
1831
+ });
1832
+ log(`Job ${jobId} set to waiting`);
1833
+ } catch (error) {
1834
+ log(`Error setting job ${jobId} to waiting: ${error}`);
1835
+ throw error;
1836
+ } finally {
1837
+ client.release();
1838
+ }
1839
+ }
1840
+
1841
+ /**
1842
+ * Persist step data for a job. Called after each ctx.run() step completes.
1843
+ * Best-effort: does not throw to avoid killing the running handler.
1844
+ *
1845
+ * @param jobId - The job to update.
1846
+ * @param stepData - The step data to persist.
1847
+ */
1848
+ async updateStepData(
1849
+ jobId: number,
1850
+ stepData: Record<string, any>,
1851
+ ): Promise<void> {
1852
+ const client = await this.pool.connect();
1853
+ try {
1854
+ await client.query(
1855
+ `UPDATE job_queue SET step_data = $2, updated_at = NOW() WHERE id = $1`,
1856
+ [jobId, JSON.stringify(stepData)],
1857
+ );
1858
+ } catch (error) {
1859
+ log(`Error updating step_data for job ${jobId}: ${error}`);
1860
+ } finally {
1861
+ client.release();
1862
+ }
1863
+ }
1864
+
1865
+ /**
1866
+ * Create a waitpoint token in the database.
1867
+ *
1868
+ * @param jobId - The job ID to associate with the token (null if created outside a handler).
1869
+ * @param options - Optional timeout string (e.g. '10m', '1h') and tags.
1870
+ * @returns The created waitpoint with its unique ID.
1871
+ */
1872
+ async createWaitpoint(
1873
+ jobId: number | null,
1874
+ options?: CreateTokenOptions,
1875
+ ): Promise<{ id: string }> {
1876
+ const client = await this.pool.connect();
1877
+ try {
1878
+ const id = `wp_${randomUUID()}`;
1879
+ let timeoutAt: Date | null = null;
1880
+
1881
+ if (options?.timeout) {
1882
+ const ms = parseTimeoutString(options.timeout);
1883
+ timeoutAt = new Date(Date.now() + ms);
1884
+ }
1885
+
1886
+ await client.query(
1887
+ `INSERT INTO waitpoints (id, job_id, status, timeout_at, tags) VALUES ($1, $2, 'waiting', $3, $4)`,
1888
+ [id, jobId, timeoutAt, options?.tags ?? null],
1889
+ );
1890
+
1891
+ log(`Created waitpoint ${id} for job ${jobId}`);
1892
+ return { id };
1893
+ } catch (error) {
1894
+ log(`Error creating waitpoint: ${error}`);
1895
+ throw error;
1896
+ } finally {
1897
+ client.release();
1898
+ }
1899
+ }
1900
+
1901
+ /**
1902
+ * Complete a waitpoint token and move the associated job back to 'pending'.
1903
+ *
1904
+ * @param tokenId - The waitpoint token ID to complete.
1905
+ * @param data - Optional data to pass to the waiting handler.
1906
+ */
1907
+ async completeWaitpoint(tokenId: string, data?: any): Promise<void> {
1908
+ const client = await this.pool.connect();
1909
+ try {
1910
+ await client.query('BEGIN');
1911
+
1912
+ const wpResult = await client.query(
1913
+ `UPDATE waitpoints SET status = 'completed', output = $2, completed_at = NOW()
1914
+ WHERE id = $1 AND status = 'waiting'
1915
+ RETURNING job_id`,
1916
+ [tokenId, data != null ? JSON.stringify(data) : null],
1917
+ );
1918
+
1919
+ if (wpResult.rows.length === 0) {
1920
+ await client.query('ROLLBACK');
1921
+ log(`Waitpoint ${tokenId} not found or already completed`);
1922
+ return;
1923
+ }
1924
+
1925
+ const jobId = wpResult.rows[0].job_id;
1926
+
1927
+ if (jobId != null) {
1928
+ await client.query(
1929
+ `UPDATE job_queue
1930
+ SET status = 'pending', wait_token_id = NULL, wait_until = NULL, updated_at = NOW()
1931
+ WHERE id = $1 AND status = 'waiting'`,
1932
+ [jobId],
1933
+ );
1934
+ }
1935
+
1936
+ await client.query('COMMIT');
1937
+ log(`Completed waitpoint ${tokenId} for job ${jobId}`);
1938
+ } catch (error) {
1939
+ await client.query('ROLLBACK');
1940
+ log(`Error completing waitpoint ${tokenId}: ${error}`);
1941
+ throw error;
1942
+ } finally {
1943
+ client.release();
1944
+ }
1945
+ }
1946
+
1947
+ /**
1948
+ * Retrieve a waitpoint token by its ID.
1949
+ *
1950
+ * @param tokenId - The waitpoint token ID to look up.
1951
+ * @returns The waitpoint record, or null if not found.
1952
+ */
1953
+ async getWaitpoint(tokenId: string): Promise<WaitpointRecord | null> {
1954
+ const client = await this.pool.connect();
1955
+ try {
1956
+ const result = await client.query(
1957
+ `SELECT id, job_id AS "jobId", status, output, timeout_at AS "timeoutAt", created_at AS "createdAt", completed_at AS "completedAt", tags FROM waitpoints WHERE id = $1`,
1958
+ [tokenId],
1959
+ );
1960
+ if (result.rows.length === 0) return null;
1961
+ return result.rows[0] as WaitpointRecord;
1962
+ } catch (error) {
1963
+ log(`Error getting waitpoint ${tokenId}: ${error}`);
1964
+ throw error;
1965
+ } finally {
1966
+ client.release();
1967
+ }
1968
+ }
1969
+
1970
+ /**
1971
+ * Expire timed-out waitpoint tokens and move their associated jobs back to 'pending'.
1972
+ *
1973
+ * @returns The number of tokens that were expired.
1974
+ */
1975
+ async expireTimedOutWaitpoints(): Promise<number> {
1976
+ const client = await this.pool.connect();
1977
+ try {
1978
+ await client.query('BEGIN');
1979
+
1980
+ const result = await client.query(
1981
+ `UPDATE waitpoints
1982
+ SET status = 'timed_out'
1983
+ WHERE status = 'waiting' AND timeout_at IS NOT NULL AND timeout_at <= NOW()
1984
+ RETURNING id, job_id`,
1985
+ );
1986
+
1987
+ for (const row of result.rows) {
1988
+ if (row.job_id != null) {
1989
+ await client.query(
1990
+ `UPDATE job_queue
1991
+ SET status = 'pending', wait_token_id = NULL, wait_until = NULL, updated_at = NOW()
1992
+ WHERE id = $1 AND status = 'waiting'`,
1993
+ [row.job_id],
1994
+ );
1995
+ }
1996
+ }
1997
+
1998
+ await client.query('COMMIT');
1999
+ const count = result.rowCount || 0;
2000
+ if (count > 0) {
2001
+ log(`Expired ${count} timed-out waitpoints`);
2002
+ }
2003
+ return count;
2004
+ } catch (error) {
2005
+ await client.query('ROLLBACK');
2006
+ log(`Error expiring timed-out waitpoints: ${error}`);
2007
+ throw error;
2008
+ } finally {
2009
+ client.release();
2010
+ }
2011
+ }
2012
+
2013
+ // ── Internal helpers ──────────────────────────────────────────────────
2014
+
1086
2015
  async setPendingReasonForUnpickedJobs(
1087
2016
  reason: string,
1088
2017
  jobType?: string | string[],