@nicnocquee/dataqueue 1.22.0 → 1.25.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. package/README.md +44 -0
  2. package/dist/index.cjs +2822 -583
  3. package/dist/index.cjs.map +1 -1
  4. package/dist/index.d.cts +589 -12
  5. package/dist/index.d.ts +589 -12
  6. package/dist/index.js +2818 -584
  7. package/dist/index.js.map +1 -1
  8. package/migrations/1751131910825_add_timeout_seconds_to_job_queue.sql +2 -2
  9. package/migrations/1751186053000_add_job_events_table.sql +12 -8
  10. package/migrations/1751984773000_add_tags_to_job_queue.sql +1 -1
  11. package/migrations/1765809419000_add_force_kill_on_timeout_to_job_queue.sql +6 -0
  12. package/migrations/1771100000000_add_idempotency_key_to_job_queue.sql +7 -0
  13. package/migrations/1781200000000_add_wait_support.sql +12 -0
  14. package/migrations/1781200000001_create_waitpoints_table.sql +18 -0
  15. package/migrations/1781200000002_add_performance_indexes.sql +34 -0
  16. package/migrations/1781200000003_add_progress_to_job_queue.sql +7 -0
  17. package/package.json +20 -6
  18. package/src/backend.ts +163 -0
  19. package/src/backends/postgres.ts +1111 -0
  20. package/src/backends/redis-scripts.ts +533 -0
  21. package/src/backends/redis.test.ts +543 -0
  22. package/src/backends/redis.ts +834 -0
  23. package/src/db-util.ts +4 -2
  24. package/src/handler-validation.test.ts +414 -0
  25. package/src/handler-validation.ts +168 -0
  26. package/src/index.test.ts +230 -1
  27. package/src/index.ts +128 -32
  28. package/src/processor.test.ts +612 -16
  29. package/src/processor.ts +759 -47
  30. package/src/queue.test.ts +736 -3
  31. package/src/queue.ts +346 -660
  32. package/src/test-util.ts +32 -0
  33. package/src/types.ts +451 -16
  34. package/src/wait.test.ts +698 -0
package/src/queue.ts CHANGED
@@ -1,3 +1,12 @@
1
+ /**
2
+ * Backward-compatible re-exports.
3
+ * All SQL logic has moved to backends/postgres.ts (PostgresBackend class).
4
+ * These functions delegate to a temporary PostgresBackend instance so that
5
+ * any existing internal callers continue to work.
6
+ *
7
+ * Wait-related functions (waitJob, updateStepData, createWaitpoint, etc.)
8
+ * are PostgreSQL-only and use direct SQL queries.
9
+ */
1
10
  import { Pool } from 'pg';
2
11
  import {
3
12
  JobOptions,
@@ -6,142 +15,35 @@ import {
6
15
  JobEvent,
7
16
  JobEventType,
8
17
  TagQueryMode,
18
+ WaitpointRecord,
9
19
  } from './types.js';
20
+ import { PostgresBackend } from './backends/postgres.js';
21
+ import { randomUUID } from 'crypto';
10
22
  import { log } from './log-context.js';
11
23
 
12
- /**
13
- * Record a job event in the job_events table
14
- */
24
+ /* Thin wrappers — every function creates a lightweight backend wrapper
25
+ around the given pool and forwards the call. The class itself holds
26
+ no mutable state so this is safe and cheap. */
27
+
15
28
  export const recordJobEvent = async (
16
29
  pool: Pool,
17
30
  jobId: number,
18
31
  eventType: JobEventType,
19
32
  metadata?: any,
20
- ): Promise<void> => {
21
- const client = await pool.connect();
22
- try {
23
- await client.query(
24
- `INSERT INTO job_events (job_id, event_type, metadata) VALUES ($1, $2, $3)`,
25
- [jobId, eventType, metadata ? JSON.stringify(metadata) : null],
26
- );
27
- } catch (error) {
28
- log(`Error recording job event for job ${jobId}: ${error}`);
29
- // Do not throw, to avoid interfering with main job logic
30
- } finally {
31
- client.release();
32
- }
33
- };
33
+ ): Promise<void> =>
34
+ new PostgresBackend(pool).recordJobEvent(jobId, eventType, metadata);
34
35
 
35
- /**
36
- * Add a job to the queue
37
- */
38
36
  export const addJob = async <PayloadMap, T extends keyof PayloadMap & string>(
39
37
  pool: Pool,
40
- {
41
- jobType,
42
- payload,
43
- maxAttempts = 3,
44
- priority = 0,
45
- runAt = null,
46
- timeoutMs = undefined,
47
- tags = undefined,
48
- }: JobOptions<PayloadMap, T>,
49
- ): Promise<number> => {
50
- const client = await pool.connect();
51
- try {
52
- let result;
53
- if (runAt) {
54
- result = await client.query(
55
- `INSERT INTO job_queue
56
- (job_type, payload, max_attempts, priority, run_at, timeout_ms, tags)
57
- VALUES ($1, $2, $3, $4, $5, $6, $7)
58
- RETURNING id`,
59
- [
60
- jobType,
61
- payload,
62
- maxAttempts,
63
- priority,
64
- runAt,
65
- timeoutMs ?? null,
66
- tags ?? null,
67
- ],
68
- );
69
- log(
70
- `Added job ${result.rows[0].id}: payload ${JSON.stringify(payload)}, runAt ${runAt.toISOString()}, priority ${priority}, maxAttempts ${maxAttempts} jobType ${jobType}, tags ${JSON.stringify(tags)}`,
71
- );
72
- } else {
73
- result = await client.query(
74
- `INSERT INTO job_queue
75
- (job_type, payload, max_attempts, priority, timeout_ms, tags)
76
- VALUES ($1, $2, $3, $4, $5, $6)
77
- RETURNING id`,
78
- [
79
- jobType,
80
- payload,
81
- maxAttempts,
82
- priority,
83
- timeoutMs ?? null,
84
- tags ?? null,
85
- ],
86
- );
87
- log(
88
- `Added job ${result.rows[0].id}: payload ${JSON.stringify(payload)}, priority ${priority}, maxAttempts ${maxAttempts} jobType ${jobType}, tags ${JSON.stringify(tags)}`,
89
- );
90
- }
91
- await recordJobEvent(pool, result.rows[0].id, JobEventType.Added, {
92
- jobType,
93
- payload,
94
- tags,
95
- });
96
- return result.rows[0].id;
97
- } catch (error) {
98
- log(`Error adding job: ${error}`);
99
- throw error;
100
- } finally {
101
- client.release();
102
- }
103
- };
38
+ job: JobOptions<PayloadMap, T>,
39
+ ): Promise<number> => new PostgresBackend(pool).addJob(job);
104
40
 
105
- /**
106
- * Get a job by ID
107
- */
108
41
  export const getJob = async <PayloadMap, T extends keyof PayloadMap & string>(
109
42
  pool: Pool,
110
43
  id: number,
111
- ): Promise<JobRecord<PayloadMap, T> | null> => {
112
- const client = await pool.connect();
113
- try {
114
- const result = await client.query(
115
- `SELECT id, job_type AS "jobType", payload, status, max_attempts AS "maxAttempts", attempts, priority, run_at AS "runAt", timeout_ms AS "timeoutMs", created_at AS "createdAt", updated_at AS "updatedAt", started_at AS "startedAt", completed_at AS "completedAt", last_failed_at AS "lastFailedAt", locked_at AS "lockedAt", locked_by AS "lockedBy", error_history AS "errorHistory", failure_reason AS "failureReason", next_attempt_at AS "nextAttemptAt", last_failed_at AS "lastFailedAt", last_retried_at AS "lastRetriedAt", last_cancelled_at AS "lastCancelledAt", pending_reason AS "pendingReason" FROM job_queue WHERE id = $1`,
116
- [id],
117
- );
118
-
119
- if (result.rows.length === 0) {
120
- log(`Job ${id} not found`);
121
- return null;
122
- }
44
+ ): Promise<JobRecord<PayloadMap, T> | null> =>
45
+ new PostgresBackend(pool).getJob<PayloadMap, T>(id);
123
46
 
124
- log(`Found job ${id}`);
125
-
126
- const job = result.rows[0] as JobRecord<PayloadMap, T>;
127
-
128
- return {
129
- ...job,
130
- payload: job.payload,
131
- timeoutMs: job.timeoutMs,
132
- failureReason: job.failureReason,
133
- };
134
- } catch (error) {
135
- log(`Error getting job ${id}: ${error}`);
136
- throw error;
137
- } finally {
138
- client.release();
139
- }
140
- };
141
-
142
- /**
143
- * Get jobs by status
144
- */
145
47
  export const getJobsByStatus = async <
146
48
  PayloadMap,
147
49
  T extends keyof PayloadMap & string,
@@ -150,37 +52,13 @@ export const getJobsByStatus = async <
150
52
  status: string,
151
53
  limit = 100,
152
54
  offset = 0,
153
- ): Promise<JobRecord<PayloadMap, T>[]> => {
154
- const client = await pool.connect();
155
- try {
156
- const result = await client.query(
157
- `SELECT id, job_type AS "jobType", payload, status, max_attempts AS "maxAttempts", attempts, priority, run_at AS "runAt", timeout_ms AS "timeoutMs", created_at AS "createdAt", updated_at AS "updatedAt", started_at AS "startedAt", completed_at AS "completedAt", last_failed_at AS "lastFailedAt", locked_at AS "lockedAt", locked_by AS "lockedBy", error_history AS "errorHistory", failure_reason AS "failureReason", next_attempt_at AS "nextAttemptAt", last_failed_at AS "lastFailedAt", last_retried_at AS "lastRetriedAt", last_cancelled_at AS "lastCancelledAt", pending_reason AS "pendingReason" FROM job_queue WHERE status = $1 ORDER BY created_at DESC LIMIT $2 OFFSET $3`,
158
- [status, limit, offset],
159
- );
160
-
161
- log(`Found ${result.rows.length} jobs by status ${status}`);
162
-
163
- return result.rows.map((job) => ({
164
- ...job,
165
- payload: job.payload,
166
- timeoutMs: job.timeoutMs,
167
- failureReason: job.failureReason,
168
- }));
169
- } catch (error) {
170
- log(`Error getting jobs by status ${status}: ${error}`);
171
- throw error;
172
- } finally {
173
- client.release();
174
- }
175
- };
55
+ ): Promise<JobRecord<PayloadMap, T>[]> =>
56
+ new PostgresBackend(pool).getJobsByStatus<PayloadMap, T>(
57
+ status,
58
+ limit,
59
+ offset,
60
+ );
176
61
 
177
- /**
178
- * Get the next batch of jobs to process
179
- * @param pool - The database pool
180
- * @param workerId - The worker ID
181
- * @param batchSize - The batch size
182
- * @param jobType - Only fetch jobs with this job type (string or array of strings)
183
- */
184
62
  export const getNextBatch = async <
185
63
  PayloadMap,
186
64
  T extends keyof PayloadMap & string,
@@ -189,354 +67,307 @@ export const getNextBatch = async <
189
67
  workerId: string,
190
68
  batchSize = 10,
191
69
  jobType?: string | string[],
192
- ): Promise<JobRecord<PayloadMap, T>[]> => {
193
- const client = await pool.connect();
194
- try {
195
- // Begin transaction
196
- await client.query('BEGIN');
70
+ ): Promise<JobRecord<PayloadMap, T>[]> =>
71
+ new PostgresBackend(pool).getNextBatch<PayloadMap, T>(
72
+ workerId,
73
+ batchSize,
74
+ jobType,
75
+ );
76
+
77
+ export const completeJob = async (pool: Pool, jobId: number): Promise<void> =>
78
+ new PostgresBackend(pool).completeJob(jobId);
79
+
80
+ export const prolongJob = async (pool: Pool, jobId: number): Promise<void> =>
81
+ new PostgresBackend(pool).prolongJob(jobId);
197
82
 
198
- // Build job type filter
199
- let jobTypeFilter = '';
200
- let params: any[] = [workerId, batchSize];
201
- if (jobType) {
202
- if (Array.isArray(jobType)) {
203
- jobTypeFilter = ` AND job_type = ANY($3)`;
204
- params.push(jobType);
205
- } else {
206
- jobTypeFilter = ` AND job_type = $3`;
207
- params.push(jobType);
83
+ export const failJob = async (
84
+ pool: Pool,
85
+ jobId: number,
86
+ error: Error,
87
+ failureReason?: FailureReason,
88
+ ): Promise<void> =>
89
+ new PostgresBackend(pool).failJob(jobId, error, failureReason);
90
+
91
+ export const retryJob = async (pool: Pool, jobId: number): Promise<void> =>
92
+ new PostgresBackend(pool).retryJob(jobId);
93
+
94
+ export const cleanupOldJobs = async (
95
+ pool: Pool,
96
+ daysToKeep = 30,
97
+ ): Promise<number> => new PostgresBackend(pool).cleanupOldJobs(daysToKeep);
98
+
99
+ export const cancelJob = async (pool: Pool, jobId: number): Promise<void> =>
100
+ new PostgresBackend(pool).cancelJob(jobId);
101
+
102
+ export const editJob = async <PayloadMap, T extends keyof PayloadMap & string>(
103
+ pool: Pool,
104
+ jobId: number,
105
+ updates: {
106
+ payload?: PayloadMap[T];
107
+ maxAttempts?: number;
108
+ priority?: number;
109
+ runAt?: Date | null;
110
+ timeoutMs?: number | null;
111
+ tags?: string[] | null;
112
+ },
113
+ ): Promise<void> => new PostgresBackend(pool).editJob(jobId, updates);
114
+
115
+ export const editAllPendingJobs = async <
116
+ PayloadMap,
117
+ T extends keyof PayloadMap & string,
118
+ >(
119
+ pool: Pool,
120
+ filters:
121
+ | {
122
+ jobType?: string;
123
+ priority?: number;
124
+ runAt?:
125
+ | Date
126
+ | { gt?: Date; gte?: Date; lt?: Date; lte?: Date; eq?: Date };
127
+ tags?: { values: string[]; mode?: TagQueryMode };
208
128
  }
209
- }
129
+ | undefined,
130
+ updates: {
131
+ payload?: PayloadMap[T];
132
+ maxAttempts?: number;
133
+ priority?: number;
134
+ runAt?: Date | null;
135
+ timeoutMs?: number;
136
+ tags?: string[];
137
+ },
138
+ ): Promise<number> =>
139
+ new PostgresBackend(pool).editAllPendingJobs(filters, updates);
210
140
 
211
- // Get and lock a batch of jobs
212
- const result = await client.query(
213
- `
214
- UPDATE job_queue
215
- SET status = 'processing',
216
- locked_at = NOW(),
217
- locked_by = $1,
218
- attempts = attempts + 1,
219
- updated_at = NOW(),
220
- pending_reason = NULL,
221
- started_at = COALESCE(started_at, NOW()),
222
- last_retried_at = CASE WHEN attempts > 0 THEN NOW() ELSE last_retried_at END
223
- WHERE id IN (
224
- SELECT id FROM job_queue
225
- WHERE (status = 'pending' OR (status = 'failed' AND next_attempt_at <= NOW()))
226
- AND (attempts < max_attempts)
227
- AND run_at <= NOW()
228
- ${jobTypeFilter}
229
- ORDER BY priority DESC, created_at ASC
230
- LIMIT $2
231
- FOR UPDATE SKIP LOCKED
232
- )
233
- RETURNING id, job_type AS "jobType", payload, status, max_attempts AS "maxAttempts", attempts, priority, run_at AS "runAt", timeout_ms AS "timeoutMs", created_at AS "createdAt", updated_at AS "updatedAt", started_at AS "startedAt", completed_at AS "completedAt", last_failed_at AS "lastFailedAt", locked_at AS "lockedAt", locked_by AS "lockedBy", error_history AS "errorHistory", failure_reason AS "failureReason", next_attempt_at AS "nextAttemptAt", last_retried_at AS "lastRetriedAt", last_cancelled_at AS "lastCancelledAt", pending_reason AS "pendingReason"
234
- `,
235
- params,
236
- );
141
+ export const cancelAllUpcomingJobs = async (
142
+ pool: Pool,
143
+ filters?: {
144
+ jobType?: string;
145
+ priority?: number;
146
+ runAt?: Date | { gt?: Date; gte?: Date; lt?: Date; lte?: Date; eq?: Date };
147
+ tags?: { values: string[]; mode?: TagQueryMode };
148
+ },
149
+ ): Promise<number> => new PostgresBackend(pool).cancelAllUpcomingJobs(filters);
237
150
 
238
- log(`Found ${result.rows.length} jobs to process`);
151
+ export const getAllJobs = async <
152
+ PayloadMap,
153
+ T extends keyof PayloadMap & string,
154
+ >(
155
+ pool: Pool,
156
+ limit = 100,
157
+ offset = 0,
158
+ ): Promise<JobRecord<PayloadMap, T>[]> =>
159
+ new PostgresBackend(pool).getAllJobs<PayloadMap, T>(limit, offset);
239
160
 
240
- // Commit transaction
241
- await client.query('COMMIT');
161
+ export const setPendingReasonForUnpickedJobs = async (
162
+ pool: Pool,
163
+ reason: string,
164
+ jobType?: string | string[],
165
+ ): Promise<void> =>
166
+ new PostgresBackend(pool).setPendingReasonForUnpickedJobs(reason, jobType);
242
167
 
243
- // Record processing event for each job
244
- for (const row of result.rows) {
245
- await recordJobEvent(pool, row.id, JobEventType.Processing);
246
- }
168
+ export const reclaimStuckJobs = async (
169
+ pool: Pool,
170
+ maxProcessingTimeMinutes = 10,
171
+ ): Promise<number> =>
172
+ new PostgresBackend(pool).reclaimStuckJobs(maxProcessingTimeMinutes);
247
173
 
248
- return result.rows.map((job) => ({
249
- ...job,
250
- payload: job.payload,
251
- timeoutMs: job.timeoutMs,
252
- }));
253
- } catch (error) {
254
- log(`Error getting next batch: ${error}`);
255
- await client.query('ROLLBACK');
256
- throw error;
257
- } finally {
258
- client.release();
259
- }
260
- };
174
+ export const getJobEvents = async (
175
+ pool: Pool,
176
+ jobId: number,
177
+ ): Promise<JobEvent[]> => new PostgresBackend(pool).getJobEvents(jobId);
261
178
 
262
- /**
263
- * Mark a job as completed
264
- */
265
- export const completeJob = async (pool: Pool, jobId: number): Promise<void> => {
266
- const client = await pool.connect();
267
- try {
268
- await client.query(
269
- `
270
- UPDATE job_queue
271
- SET status = 'completed', updated_at = NOW(), completed_at = NOW()
272
- WHERE id = $1
273
- `,
274
- [jobId],
275
- );
276
- await recordJobEvent(pool, jobId, JobEventType.Completed);
277
- } catch (error) {
278
- log(`Error completing job ${jobId}: ${error}`);
279
- throw error;
280
- } finally {
281
- log(`Completed job ${jobId}`);
282
- client.release();
283
- }
284
- };
179
+ export const getJobsByTags = async <
180
+ PayloadMap,
181
+ T extends keyof PayloadMap & string,
182
+ >(
183
+ pool: Pool,
184
+ tags: string[],
185
+ mode: TagQueryMode = 'all',
186
+ limit = 100,
187
+ offset = 0,
188
+ ): Promise<JobRecord<PayloadMap, T>[]> =>
189
+ new PostgresBackend(pool).getJobsByTags<PayloadMap, T>(
190
+ tags,
191
+ mode,
192
+ limit,
193
+ offset,
194
+ );
195
+
196
+ export const getJobs = async <PayloadMap, T extends keyof PayloadMap & string>(
197
+ pool: Pool,
198
+ filters?: {
199
+ jobType?: string;
200
+ priority?: number;
201
+ runAt?: Date | { gt?: Date; gte?: Date; lt?: Date; lte?: Date; eq?: Date };
202
+ tags?: { values: string[]; mode?: TagQueryMode };
203
+ },
204
+ limit = 100,
205
+ offset = 0,
206
+ ): Promise<JobRecord<PayloadMap, T>[]> =>
207
+ new PostgresBackend(pool).getJobs<PayloadMap, T>(filters, limit, offset);
208
+
209
+ // ── Progress ──────────────────────────────────────────────────────────────────
210
+
211
+ export const updateProgress = async (
212
+ pool: Pool,
213
+ jobId: number,
214
+ progress: number,
215
+ ): Promise<void> => new PostgresBackend(pool).updateProgress(jobId, progress);
216
+
217
+ // ── Wait support functions (PostgreSQL-only) ─────────────────────────────────
285
218
 
286
219
  /**
287
- * Mark a job as failed
220
+ * Transition a job to 'waiting' status with wait_until and/or wait_token_id.
221
+ * Saves step_data so the handler can resume from where it left off.
288
222
  */
289
- export const failJob = async (
223
+ export const waitJob = async (
290
224
  pool: Pool,
291
225
  jobId: number,
292
- error: Error,
293
- failureReason?: FailureReason,
226
+ options: {
227
+ waitUntil?: Date;
228
+ waitTokenId?: string;
229
+ stepData: Record<string, any>;
230
+ },
294
231
  ): Promise<void> => {
295
232
  const client = await pool.connect();
296
233
  try {
297
- /**
298
- * The next attempt will be scheduled after `2^attempts * 1 minute` from the last attempt.
299
- */
300
- await client.query(
234
+ const result = await client.query(
301
235
  `
302
236
  UPDATE job_queue
303
- SET status = 'failed',
304
- updated_at = NOW(),
305
- next_attempt_at = CASE
306
- WHEN attempts < max_attempts THEN NOW() + (POWER(2, attempts) * INTERVAL '1 minute')
307
- ELSE NULL
308
- END,
309
- error_history = COALESCE(error_history, '[]'::jsonb) || $2::jsonb,
310
- failure_reason = $3,
311
- last_failed_at = NOW()
312
- WHERE id = $1
237
+ SET status = 'waiting',
238
+ wait_until = $2,
239
+ wait_token_id = $3,
240
+ step_data = $4,
241
+ locked_at = NULL,
242
+ locked_by = NULL,
243
+ updated_at = NOW()
244
+ WHERE id = $1 AND status = 'processing'
313
245
  `,
314
246
  [
315
247
  jobId,
316
- JSON.stringify([
317
- {
318
- message: error.message || String(error),
319
- timestamp: new Date().toISOString(),
320
- },
321
- ]),
322
- failureReason ?? null,
248
+ options.waitUntil ?? null,
249
+ options.waitTokenId ?? null,
250
+ JSON.stringify(options.stepData),
323
251
  ],
324
252
  );
325
- await recordJobEvent(pool, jobId, JobEventType.Failed, {
326
- message: error.message || String(error),
327
- failureReason,
253
+ if (result.rowCount === 0) {
254
+ log(
255
+ `Job ${jobId} could not be set to waiting (may have been reclaimed or is no longer processing)`,
256
+ );
257
+ return;
258
+ }
259
+ await recordJobEvent(pool, jobId, JobEventType.Waiting, {
260
+ waitUntil: options.waitUntil?.toISOString() ?? null,
261
+ waitTokenId: options.waitTokenId ?? null,
328
262
  });
263
+ log(`Job ${jobId} set to waiting`);
329
264
  } catch (error) {
330
- log(`Error failing job ${jobId}: ${error}`);
265
+ log(`Error setting job ${jobId} to waiting: ${error}`);
331
266
  throw error;
332
267
  } finally {
333
- log(`Failed job ${jobId}`);
334
268
  client.release();
335
269
  }
336
270
  };
337
271
 
338
272
  /**
339
- * Retry a failed job immediately
273
+ * Update step_data for a job. Called after each ctx.run() step completes
274
+ * to persist intermediate progress.
340
275
  */
341
- export const retryJob = async (pool: Pool, jobId: number): Promise<void> => {
276
+ export const updateStepData = async (
277
+ pool: Pool,
278
+ jobId: number,
279
+ stepData: Record<string, any>,
280
+ ): Promise<void> => {
342
281
  const client = await pool.connect();
343
282
  try {
344
283
  await client.query(
345
- `
346
- UPDATE job_queue
347
- SET status = 'pending',
348
- updated_at = NOW(),
349
- locked_at = NULL,
350
- locked_by = NULL,
351
- next_attempt_at = NOW(),
352
- last_retried_at = NOW()
353
- WHERE id = $1
354
- `,
355
- [jobId],
284
+ `UPDATE job_queue SET step_data = $2, updated_at = NOW() WHERE id = $1`,
285
+ [jobId, JSON.stringify(stepData)],
356
286
  );
357
- await recordJobEvent(pool, jobId, JobEventType.Retried);
358
287
  } catch (error) {
359
- log(`Error retrying job ${jobId}: ${error}`);
360
- throw error;
288
+ log(`Error updating step_data for job ${jobId}: ${error}`);
289
+ // Best-effort: do not throw to avoid killing the running handler
361
290
  } finally {
362
- log(`Retried job ${jobId}`);
363
291
  client.release();
364
292
  }
365
293
  };
366
294
 
367
295
  /**
368
- * Delete old completed jobs
296
+ * Parse a timeout string like '10m', '1h', '24h', '7d' into milliseconds.
369
297
  */
370
- export const cleanupOldJobs = async (
371
- pool: Pool,
372
- daysToKeep = 30,
373
- ): Promise<number> => {
374
- const client = await pool.connect();
375
- try {
376
- const result = await client.query(`
377
- DELETE FROM job_queue
378
- WHERE status = 'completed'
379
- AND updated_at < NOW() - INTERVAL '${daysToKeep} days'
380
- RETURNING id
381
- `);
382
- log(`Deleted ${result.rowCount} old jobs`);
383
- return result.rowCount || 0;
384
- } catch (error) {
385
- log(`Error cleaning up old jobs: ${error}`);
386
- throw error;
387
- } finally {
388
- client.release();
389
- }
390
- };
391
-
392
298
  /**
393
- * Cancel a scheduled job (only if still pending)
299
+ * Maximum allowed timeout in milliseconds (~365 days).
300
+ * Prevents overflow to Infinity when computing Date offsets.
394
301
  */
395
- export const cancelJob = async (pool: Pool, jobId: number): Promise<void> => {
396
- const client = await pool.connect();
397
- try {
398
- await client.query(
399
- `
400
- UPDATE job_queue
401
- SET status = 'cancelled', updated_at = NOW(), last_cancelled_at = NOW()
402
- WHERE id = $1 AND status = 'pending'
403
- `,
404
- [jobId],
302
+ const MAX_TIMEOUT_MS = 365 * 24 * 60 * 60 * 1000;
303
+
304
+ function parseTimeoutString(timeout: string): number {
305
+ const match = timeout.match(/^(\d+)(s|m|h|d)$/);
306
+ if (!match) {
307
+ throw new Error(
308
+ `Invalid timeout format: "${timeout}". Expected format like "10m", "1h", "24h", "7d".`,
405
309
  );
406
- await recordJobEvent(pool, jobId, JobEventType.Cancelled);
407
- } catch (error) {
408
- log(`Error cancelling job ${jobId}: ${error}`);
409
- throw error;
410
- } finally {
411
- log(`Cancelled job ${jobId}`);
412
- client.release();
413
310
  }
414
- };
311
+ const value = parseInt(match[1], 10);
312
+ const unit = match[2];
313
+ let ms: number;
314
+ switch (unit) {
315
+ case 's':
316
+ ms = value * 1000;
317
+ break;
318
+ case 'm':
319
+ ms = value * 60 * 1000;
320
+ break;
321
+ case 'h':
322
+ ms = value * 60 * 60 * 1000;
323
+ break;
324
+ case 'd':
325
+ ms = value * 24 * 60 * 60 * 1000;
326
+ break;
327
+ default:
328
+ throw new Error(`Unknown timeout unit: "${unit}"`);
329
+ }
330
+ if (!Number.isFinite(ms) || ms > MAX_TIMEOUT_MS) {
331
+ throw new Error(
332
+ `Timeout value "${timeout}" is too large. Maximum allowed is 365 days.`,
333
+ );
334
+ }
335
+ return ms;
336
+ }
415
337
 
416
338
  /**
417
- * Cancel all upcoming jobs (pending and scheduled in the future) with optional filters
339
+ * Create a waitpoint token in the database.
340
+ * The token can be used to pause a job until an external signal completes it.
341
+ *
342
+ * @param pool - The database pool
343
+ * @param jobId - The job ID to associate with the token (null if created outside a handler)
344
+ * @param options - Optional timeout and tags
345
+ * @returns The created waitpoint token
418
346
  */
419
- export const cancelAllUpcomingJobs = async (
347
+ export const createWaitpoint = async (
420
348
  pool: Pool,
421
- filters?: {
422
- jobType?: string;
423
- priority?: number;
424
- runAt?: Date | { gt?: Date; gte?: Date; lt?: Date; lte?: Date; eq?: Date };
425
- tags?: { values: string[]; mode?: TagQueryMode };
426
- },
427
- ): Promise<number> => {
349
+ jobId: number | null,
350
+ options?: { timeout?: string; tags?: string[] },
351
+ ): Promise<{ id: string }> => {
428
352
  const client = await pool.connect();
429
353
  try {
430
- let query = `
431
- UPDATE job_queue
432
- SET status = 'cancelled', updated_at = NOW()
433
- WHERE status = 'pending'`;
434
- const params: any[] = [];
435
- let paramIdx = 1;
436
- if (filters) {
437
- if (filters.jobType) {
438
- query += ` AND job_type = $${paramIdx++}`;
439
- params.push(filters.jobType);
440
- }
441
- if (filters.priority !== undefined) {
442
- query += ` AND priority = $${paramIdx++}`;
443
- params.push(filters.priority);
444
- }
445
- if (filters.runAt) {
446
- if (filters.runAt instanceof Date) {
447
- query += ` AND run_at = $${paramIdx++}`;
448
- params.push(filters.runAt);
449
- } else if (typeof filters.runAt === 'object') {
450
- const ops = filters.runAt;
451
- if (ops.gt) {
452
- query += ` AND run_at > $${paramIdx++}`;
453
- params.push(ops.gt);
454
- }
455
- if (ops.gte) {
456
- query += ` AND run_at >= $${paramIdx++}`;
457
- params.push(ops.gte);
458
- }
459
- if (ops.lt) {
460
- query += ` AND run_at < $${paramIdx++}`;
461
- params.push(ops.lt);
462
- }
463
- if (ops.lte) {
464
- query += ` AND run_at <= $${paramIdx++}`;
465
- params.push(ops.lte);
466
- }
467
- if (ops.eq) {
468
- query += ` AND run_at = $${paramIdx++}`;
469
- params.push(ops.eq);
470
- }
471
- }
472
- }
473
- if (
474
- filters.tags &&
475
- filters.tags.values &&
476
- filters.tags.values.length > 0
477
- ) {
478
- const mode = filters.tags.mode || 'all';
479
- const tagValues = filters.tags.values;
480
- switch (mode) {
481
- case 'exact':
482
- query += ` AND tags = $${paramIdx++}`;
483
- params.push(tagValues);
484
- break;
485
- case 'all':
486
- query += ` AND tags @> $${paramIdx++}`;
487
- params.push(tagValues);
488
- break;
489
- case 'any':
490
- query += ` AND tags && $${paramIdx++}`;
491
- params.push(tagValues);
492
- break;
493
- case 'none':
494
- query += ` AND NOT (tags && $${paramIdx++})`;
495
- params.push(tagValues);
496
- break;
497
- default:
498
- query += ` AND tags @> $${paramIdx++}`;
499
- params.push(tagValues);
500
- }
501
- }
354
+ const id = `wp_${randomUUID()}`;
355
+ let timeoutAt: Date | null = null;
356
+
357
+ if (options?.timeout) {
358
+ const ms = parseTimeoutString(options.timeout);
359
+ timeoutAt = new Date(Date.now() + ms);
502
360
  }
503
- query += '\nRETURNING id';
504
- const result = await client.query(query, params);
505
- log(`Cancelled ${result.rowCount} jobs`);
506
- return result.rowCount || 0;
507
- } catch (error) {
508
- log(`Error cancelling upcoming jobs: ${error}`);
509
- throw error;
510
- } finally {
511
- client.release();
512
- }
513
- };
514
361
 
515
- /**
516
- * Get all jobs with optional pagination
517
- */
518
- export const getAllJobs = async <
519
- PayloadMap,
520
- T extends keyof PayloadMap & string,
521
- >(
522
- pool: Pool,
523
- limit = 100,
524
- offset = 0,
525
- ): Promise<JobRecord<PayloadMap, T>[]> => {
526
- const client = await pool.connect();
527
- try {
528
- const result = await client.query(
529
- `SELECT id, job_type AS "jobType", payload, status, max_attempts AS "maxAttempts", attempts, priority, run_at AS "runAt", timeout_ms AS "timeoutMs", created_at AS "createdAt", updated_at AS "updatedAt", started_at AS "startedAt", completed_at AS "completedAt", last_failed_at AS "lastFailedAt", locked_at AS "lockedAt", locked_by AS "lockedBy", error_history AS "errorHistory", failure_reason AS "failureReason", next_attempt_at AS "nextAttemptAt", last_failed_at AS "lastFailedAt", last_retried_at AS "lastRetriedAt", last_cancelled_at AS "lastCancelledAt", pending_reason AS "pendingReason" FROM job_queue ORDER BY created_at DESC LIMIT $1 OFFSET $2`,
530
- [limit, offset],
362
+ await client.query(
363
+ `INSERT INTO waitpoints (id, job_id, status, timeout_at, tags) VALUES ($1, $2, 'waiting', $3, $4)`,
364
+ [id, jobId, timeoutAt, options?.tags ?? null],
531
365
  );
532
- log(`Found ${result.rows.length} jobs (all)`);
533
- return result.rows.map((job) => ({
534
- ...job,
535
- payload: job.payload,
536
- timeoutMs: job.timeoutMs,
537
- }));
366
+
367
+ log(`Created waitpoint ${id} for job ${jobId}`);
368
+ return { id };
538
369
  } catch (error) {
539
- log(`Error getting all jobs: ${error}`);
370
+ log(`Error creating waitpoint: ${error}`);
540
371
  throw error;
541
372
  } finally {
542
373
  client.release();
@@ -544,62 +375,73 @@ export const getAllJobs = async <
544
375
  };
545
376
 
546
377
  /**
547
- * Set a pending reason for unpicked jobs
378
+ * Complete a waitpoint token, optionally providing output data.
379
+ * This also moves the associated job from 'waiting' back to 'pending' so
380
+ * it gets picked up by the polling loop.
548
381
  */
549
- export const setPendingReasonForUnpickedJobs = async (
382
+ export const completeWaitpoint = async (
550
383
  pool: Pool,
551
- reason: string,
552
- jobType?: string | string[],
553
- ) => {
384
+ tokenId: string,
385
+ data?: any,
386
+ ): Promise<void> => {
554
387
  const client = await pool.connect();
555
388
  try {
556
- let jobTypeFilter = '';
557
- let params: any[] = [reason];
558
- if (jobType) {
559
- if (Array.isArray(jobType)) {
560
- jobTypeFilter = ` AND job_type = ANY($2)`;
561
- params.push(jobType);
562
- } else {
563
- jobTypeFilter = ` AND job_type = $2`;
564
- params.push(jobType);
565
- }
566
- }
567
- await client.query(
568
- `UPDATE job_queue SET pending_reason = $1 WHERE status = 'pending'${jobTypeFilter}`,
569
- params,
389
+ await client.query('BEGIN');
390
+
391
+ // Update the waitpoint
392
+ const wpResult = await client.query(
393
+ `UPDATE waitpoints SET status = 'completed', output = $2, completed_at = NOW()
394
+ WHERE id = $1 AND status = 'waiting'
395
+ RETURNING job_id`,
396
+ [tokenId, data != null ? JSON.stringify(data) : null],
570
397
  );
398
+
399
+ if (wpResult.rows.length === 0) {
400
+ await client.query('ROLLBACK');
401
+ log(`Waitpoint ${tokenId} not found or already completed`);
402
+ return;
403
+ }
404
+
405
+ const jobId = wpResult.rows[0].job_id;
406
+
407
+ // Move the associated job back to 'pending' so it gets picked up
408
+ if (jobId != null) {
409
+ await client.query(
410
+ `UPDATE job_queue
411
+ SET status = 'pending', wait_token_id = NULL, wait_until = NULL, updated_at = NOW()
412
+ WHERE id = $1 AND status = 'waiting'`,
413
+ [jobId],
414
+ );
415
+ }
416
+
417
+ await client.query('COMMIT');
418
+ log(`Completed waitpoint ${tokenId} for job ${jobId}`);
419
+ } catch (error) {
420
+ await client.query('ROLLBACK');
421
+ log(`Error completing waitpoint ${tokenId}: ${error}`);
422
+ throw error;
571
423
  } finally {
572
424
  client.release();
573
425
  }
574
426
  };
575
427
 
576
428
  /**
577
- * Reclaim jobs stuck in 'processing' for too long.
578
- *
579
- * If a process (e.g., API route or worker) crashes after marking a job as 'processing' but before completing it, the job can remain stuck in the 'processing' state indefinitely. This can happen if the process is killed or encounters an unhandled error after updating the job status but before marking it as 'completed' or 'failed'.
580
- * @param pool - The database pool
581
- * @param maxProcessingTimeMinutes - Max allowed processing time in minutes (default: 10)
582
- * @returns Number of jobs reclaimed
429
+ * Retrieve a waitpoint token by its ID.
583
430
  */
584
- export const reclaimStuckJobs = async (
431
+ export const getWaitpoint = async (
585
432
  pool: Pool,
586
- maxProcessingTimeMinutes = 10,
587
- ): Promise<number> => {
433
+ tokenId: string,
434
+ ): Promise<WaitpointRecord | null> => {
588
435
  const client = await pool.connect();
589
436
  try {
590
437
  const result = await client.query(
591
- `
592
- UPDATE job_queue
593
- SET status = 'pending', locked_at = NULL, locked_by = NULL, updated_at = NOW()
594
- WHERE status = 'processing'
595
- AND locked_at < NOW() - INTERVAL '${maxProcessingTimeMinutes} minutes'
596
- RETURNING id
597
- `,
438
+ `SELECT id, job_id AS "jobId", status, output, timeout_at AS "timeoutAt", created_at AS "createdAt", completed_at AS "completedAt", tags FROM waitpoints WHERE id = $1`,
439
+ [tokenId],
598
440
  );
599
- log(`Reclaimed ${result.rowCount} stuck jobs`);
600
- return result.rowCount || 0;
441
+ if (result.rows.length === 0) return null;
442
+ return result.rows[0] as WaitpointRecord;
601
443
  } catch (error) {
602
- log(`Error reclaiming stuck jobs: ${error}`);
444
+ log(`Error getting waitpoint ${tokenId}: ${error}`);
603
445
  throw error;
604
446
  } finally {
605
447
  client.release();
@@ -607,199 +449,43 @@ export const reclaimStuckJobs = async (
607
449
  };
608
450
 
609
451
  /**
610
- * Get all events for a job, ordered by createdAt ascending
452
+ * Expire timed-out waitpoint tokens and move their associated jobs back to 'pending'.
453
+ * Should be called periodically (e.g., alongside reclaimStuckJobs).
611
454
  */
612
- export const getJobEvents = async (
613
- pool: Pool,
614
- jobId: number,
615
- ): Promise<JobEvent[]> => {
455
+ export const expireTimedOutWaitpoints = async (pool: Pool): Promise<number> => {
616
456
  const client = await pool.connect();
617
457
  try {
618
- const res = await client.query(
619
- `SELECT id, job_id AS "jobId", event_type AS "eventType", metadata, created_at AS "createdAt" FROM job_events WHERE job_id = $1 ORDER BY created_at ASC`,
620
- [jobId],
621
- );
622
- return res.rows as JobEvent[];
623
- } finally {
624
- client.release();
625
- }
626
- };
458
+ await client.query('BEGIN');
627
459
 
628
- /**
629
- * Get jobs by tags (matches all specified tags)
630
- */
631
- export const getJobsByTags = async <
632
- PayloadMap,
633
- T extends keyof PayloadMap & string,
634
- >(
635
- pool: Pool,
636
- tags: string[],
637
- mode: TagQueryMode = 'all',
638
- limit = 100,
639
- offset = 0,
640
- ): Promise<JobRecord<PayloadMap, T>[]> => {
641
- const client = await pool.connect();
642
- try {
643
- let query = `SELECT id, job_type AS "jobType", payload, status, max_attempts AS "maxAttempts", attempts, priority, run_at AS "runAt", timeout_ms AS "timeoutMs", created_at AS "createdAt", updated_at AS "updatedAt", started_at AS "startedAt", completed_at AS "completedAt", last_failed_at AS "lastFailedAt", locked_at AS "lockedAt", locked_by AS "lockedBy", error_history AS "errorHistory", failure_reason AS "failureReason", next_attempt_at AS "nextAttemptAt", last_failed_at AS "lastFailedAt", last_retried_at AS "lastRetriedAt", last_cancelled_at AS "lastCancelledAt", pending_reason AS "pendingReason", tags
644
- FROM job_queue`;
645
- let params: any[] = [];
646
- switch (mode) {
647
- case 'exact':
648
- query += ' WHERE tags = $1';
649
- params = [tags];
650
- break;
651
- case 'all':
652
- query += ' WHERE tags @> $1';
653
- params = [tags];
654
- break;
655
- case 'any':
656
- query += ' WHERE tags && $1';
657
- params = [tags];
658
- break;
659
- case 'none':
660
- query += ' WHERE NOT (tags && $1)';
661
- params = [tags];
662
- break;
663
- default:
664
- query += ' WHERE tags @> $1';
665
- params = [tags];
666
- }
667
- query += ' ORDER BY created_at DESC LIMIT $2 OFFSET $3';
668
- params.push(limit, offset);
669
- const result = await client.query(query, params);
670
- log(
671
- `Found ${result.rows.length} jobs by tags ${JSON.stringify(tags)} (mode: ${mode})`,
672
- );
673
- return result.rows.map((job) => ({
674
- ...job,
675
- payload: job.payload,
676
- timeoutMs: job.timeoutMs,
677
- failureReason: job.failureReason,
678
- }));
679
- } catch (error) {
680
- log(
681
- `Error getting jobs by tags ${JSON.stringify(tags)} (mode: ${mode}): ${error}`,
460
+ // Find and expire timed-out waitpoints
461
+ const result = await client.query(
462
+ `UPDATE waitpoints
463
+ SET status = 'timed_out'
464
+ WHERE status = 'waiting' AND timeout_at IS NOT NULL AND timeout_at <= NOW()
465
+ RETURNING id, job_id`,
682
466
  );
683
- throw error;
684
- } finally {
685
- client.release();
686
- }
687
- };
688
467
 
689
- export const getJobs = async <PayloadMap, T extends keyof PayloadMap & string>(
690
- pool: Pool,
691
- filters?: {
692
- jobType?: string;
693
- priority?: number;
694
- runAt?: Date | { gt?: Date; gte?: Date; lt?: Date; lte?: Date; eq?: Date };
695
- tags?: { values: string[]; mode?: TagQueryMode };
696
- },
697
- limit = 100,
698
- offset = 0,
699
- ): Promise<JobRecord<PayloadMap, T>[]> => {
700
- const client = await pool.connect();
701
- try {
702
- let query = `SELECT id, job_type AS "jobType", payload, status, max_attempts AS "maxAttempts", attempts, priority, run_at AS "runAt", timeout_ms AS "timeoutMs", created_at AS "createdAt", updated_at AS "updatedAt", started_at AS "startedAt", completed_at AS "completedAt", last_failed_at AS "lastFailedAt", locked_at AS "lockedAt", locked_by AS "lockedBy", error_history AS "errorHistory", failure_reason AS "failureReason", next_attempt_at AS "nextAttemptAt", last_failed_at AS "lastFailedAt", last_retried_at AS "lastRetriedAt", last_cancelled_at AS "lastCancelledAt", pending_reason AS "pendingReason", tags FROM job_queue`;
703
- const params: any[] = [];
704
- let where: string[] = [];
705
- let paramIdx = 1;
706
- if (filters) {
707
- if (filters.jobType) {
708
- where.push(`job_type = $${paramIdx++}`);
709
- params.push(filters.jobType);
710
- }
711
- if (filters.priority !== undefined) {
712
- where.push(`priority = $${paramIdx++}`);
713
- params.push(filters.priority);
714
- }
715
- if (filters.runAt) {
716
- if (filters.runAt instanceof Date) {
717
- where.push(`run_at = $${paramIdx++}`);
718
- params.push(filters.runAt);
719
- } else if (
720
- typeof filters.runAt === 'object' &&
721
- (filters.runAt.gt !== undefined ||
722
- filters.runAt.gte !== undefined ||
723
- filters.runAt.lt !== undefined ||
724
- filters.runAt.lte !== undefined ||
725
- filters.runAt.eq !== undefined)
726
- ) {
727
- const ops = filters.runAt as {
728
- gt?: Date;
729
- gte?: Date;
730
- lt?: Date;
731
- lte?: Date;
732
- eq?: Date;
733
- };
734
- if (ops.gt) {
735
- where.push(`run_at > $${paramIdx++}`);
736
- params.push(ops.gt);
737
- }
738
- if (ops.gte) {
739
- where.push(`run_at >= $${paramIdx++}`);
740
- params.push(ops.gte);
741
- }
742
- if (ops.lt) {
743
- where.push(`run_at < $${paramIdx++}`);
744
- params.push(ops.lt);
745
- }
746
- if (ops.lte) {
747
- where.push(`run_at <= $${paramIdx++}`);
748
- params.push(ops.lte);
749
- }
750
- if (ops.eq) {
751
- where.push(`run_at = $${paramIdx++}`);
752
- params.push(ops.eq);
753
- }
754
- }
755
- }
756
- if (
757
- filters.tags &&
758
- filters.tags.values &&
759
- filters.tags.values.length > 0
760
- ) {
761
- const mode = filters.tags.mode || 'all';
762
- const tagValues = filters.tags.values;
763
- switch (mode) {
764
- case 'exact':
765
- where.push(`tags = $${paramIdx++}`);
766
- params.push(tagValues);
767
- break;
768
- case 'all':
769
- where.push(`tags @> $${paramIdx++}`);
770
- params.push(tagValues);
771
- break;
772
- case 'any':
773
- where.push(`tags && $${paramIdx++}`);
774
- params.push(tagValues);
775
- break;
776
- case 'none':
777
- where.push(`NOT (tags && $${paramIdx++})`);
778
- params.push(tagValues);
779
- break;
780
- default:
781
- where.push(`tags @> $${paramIdx++}`);
782
- params.push(tagValues);
783
- }
468
+ // Move associated jobs back to 'pending'
469
+ for (const row of result.rows) {
470
+ if (row.job_id != null) {
471
+ await client.query(
472
+ `UPDATE job_queue
473
+ SET status = 'pending', wait_token_id = NULL, wait_until = NULL, updated_at = NOW()
474
+ WHERE id = $1 AND status = 'waiting'`,
475
+ [row.job_id],
476
+ );
784
477
  }
785
478
  }
786
- if (where.length > 0) {
787
- query += ` WHERE ${where.join(' AND ')}`;
479
+
480
+ await client.query('COMMIT');
481
+ const count = result.rowCount || 0;
482
+ if (count > 0) {
483
+ log(`Expired ${count} timed-out waitpoints`);
788
484
  }
789
- // Always add LIMIT and OFFSET as the last parameters
790
- paramIdx = params.length + 1;
791
- query += ` ORDER BY created_at DESC LIMIT $${paramIdx++} OFFSET $${paramIdx}`;
792
- params.push(limit, offset);
793
- const result = await client.query(query, params);
794
- log(`Found ${result.rows.length} jobs`);
795
- return result.rows.map((job) => ({
796
- ...job,
797
- payload: job.payload,
798
- timeoutMs: job.timeoutMs,
799
- failureReason: job.failureReason,
800
- }));
485
+ return count;
801
486
  } catch (error) {
802
- log(`Error getting jobs: ${error}`);
487
+ await client.query('ROLLBACK');
488
+ log(`Error expiring timed-out waitpoints: ${error}`);
803
489
  throw error;
804
490
  } finally {
805
491
  client.release();