@nicnocquee/dataqueue 1.22.0 → 1.25.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. package/README.md +44 -0
  2. package/dist/index.cjs +2822 -583
  3. package/dist/index.cjs.map +1 -1
  4. package/dist/index.d.cts +589 -12
  5. package/dist/index.d.ts +589 -12
  6. package/dist/index.js +2818 -584
  7. package/dist/index.js.map +1 -1
  8. package/migrations/1751131910825_add_timeout_seconds_to_job_queue.sql +2 -2
  9. package/migrations/1751186053000_add_job_events_table.sql +12 -8
  10. package/migrations/1751984773000_add_tags_to_job_queue.sql +1 -1
  11. package/migrations/1765809419000_add_force_kill_on_timeout_to_job_queue.sql +6 -0
  12. package/migrations/1771100000000_add_idempotency_key_to_job_queue.sql +7 -0
  13. package/migrations/1781200000000_add_wait_support.sql +12 -0
  14. package/migrations/1781200000001_create_waitpoints_table.sql +18 -0
  15. package/migrations/1781200000002_add_performance_indexes.sql +34 -0
  16. package/migrations/1781200000003_add_progress_to_job_queue.sql +7 -0
  17. package/package.json +20 -6
  18. package/src/backend.ts +163 -0
  19. package/src/backends/postgres.ts +1111 -0
  20. package/src/backends/redis-scripts.ts +533 -0
  21. package/src/backends/redis.test.ts +543 -0
  22. package/src/backends/redis.ts +834 -0
  23. package/src/db-util.ts +4 -2
  24. package/src/handler-validation.test.ts +414 -0
  25. package/src/handler-validation.ts +168 -0
  26. package/src/index.test.ts +230 -1
  27. package/src/index.ts +128 -32
  28. package/src/processor.test.ts +612 -16
  29. package/src/processor.ts +759 -47
  30. package/src/queue.test.ts +736 -3
  31. package/src/queue.ts +346 -660
  32. package/src/test-util.ts +32 -0
  33. package/src/types.ts +451 -16
  34. package/src/wait.test.ts +698 -0
package/src/processor.ts CHANGED
@@ -1,3 +1,4 @@
1
+ import { Worker } from 'worker_threads';
1
2
  import { Pool } from 'pg';
2
3
  import {
3
4
  JobRecord,
@@ -7,15 +8,560 @@ import {
7
8
  JobType,
8
9
  FailureReason,
9
10
  JobHandlers,
11
+ JobContext,
12
+ OnTimeoutCallback,
13
+ WaitSignal,
14
+ WaitDuration,
15
+ WaitTokenResult,
10
16
  } from './types.js';
17
+ import { QueueBackend } from './backend.js';
18
+ import { PostgresBackend } from './backends/postgres.js';
11
19
  import {
12
- getNextBatch,
13
- completeJob,
14
- failJob,
15
- setPendingReasonForUnpickedJobs,
20
+ waitJob,
21
+ updateStepData,
22
+ createWaitpoint,
23
+ getWaitpoint,
16
24
  } from './queue.js';
17
25
  import { log, setLogContext } from './log-context.js';
18
26
 
27
+ /**
28
+ * Try to extract the underlying pg Pool from a QueueBackend.
29
+ * Returns null for non-PostgreSQL backends.
30
+ */
31
+ function tryExtractPool(backend: QueueBackend): Pool | null {
32
+ if (backend instanceof PostgresBackend) {
33
+ return backend.getPool();
34
+ }
35
+ return null;
36
+ }
37
+
38
+ /**
39
+ * Build a JobContext without wait support (for non-PostgreSQL backends).
40
+ * prolong/onTimeout work normally; wait-related methods throw helpful errors.
41
+ */
42
+ function buildBasicContext(
43
+ backend: QueueBackend,
44
+ jobId: number,
45
+ baseCtx: {
46
+ prolong: JobContext['prolong'];
47
+ onTimeout: JobContext['onTimeout'];
48
+ },
49
+ ): JobContext {
50
+ const waitError = () =>
51
+ new Error(
52
+ 'Wait features (waitFor, waitUntil, createToken, waitForToken, ctx.run) are currently only supported with the PostgreSQL backend.',
53
+ );
54
+ return {
55
+ prolong: baseCtx.prolong,
56
+ onTimeout: baseCtx.onTimeout,
57
+ run: async <T>(_stepName: string, fn: () => Promise<T>): Promise<T> => {
58
+ // Without PostgreSQL, just execute the function directly (no persistence)
59
+ return fn();
60
+ },
61
+ waitFor: async () => {
62
+ throw waitError();
63
+ },
64
+ waitUntil: async () => {
65
+ throw waitError();
66
+ },
67
+ createToken: async () => {
68
+ throw waitError();
69
+ },
70
+ waitForToken: async () => {
71
+ throw waitError();
72
+ },
73
+ setProgress: async (percent: number) => {
74
+ if (percent < 0 || percent > 100)
75
+ throw new Error('Progress must be between 0 and 100');
76
+ await backend.updateProgress(jobId, Math.round(percent));
77
+ },
78
+ };
79
+ }
80
+
81
+ /**
82
+ * Validates that a handler can be serialized for worker thread execution.
83
+ * Throws an error with helpful message if serialization fails.
84
+ */
85
+ function validateHandlerSerializable<
86
+ PayloadMap,
87
+ T extends keyof PayloadMap & string,
88
+ >(handler: JobHandler<PayloadMap, T>, jobType: string): void {
89
+ try {
90
+ const handlerString = handler.toString();
91
+
92
+ // Check for common patterns that indicate non-serializable handlers
93
+ // 1. Arrow functions that capture 'this' (indicated by 'this' in the function body but not in parameters)
94
+ if (
95
+ handlerString.includes('this.') &&
96
+ !handlerString.match(/\([^)]*this[^)]*\)/)
97
+ ) {
98
+ throw new Error(
99
+ `Handler for job type "${jobType}" uses 'this' context which cannot be serialized. ` +
100
+ `Use a regular function or avoid 'this' references when forceKillOnTimeout is enabled.`,
101
+ );
102
+ }
103
+
104
+ // 2. Check if handler string looks like it might have closures
105
+ // This is a heuristic - we can't perfectly detect closures, but we can warn about common patterns
106
+ if (handlerString.includes('[native code]')) {
107
+ throw new Error(
108
+ `Handler for job type "${jobType}" contains native code which cannot be serialized. ` +
109
+ `Ensure your handler is a plain function when forceKillOnTimeout is enabled.`,
110
+ );
111
+ }
112
+
113
+ // 3. Try to create a function from the string to validate it's parseable
114
+ // This will catch syntax errors early
115
+ try {
116
+ new Function('return ' + handlerString);
117
+ } catch (parseError) {
118
+ throw new Error(
119
+ `Handler for job type "${jobType}" cannot be serialized: ${parseError instanceof Error ? parseError.message : String(parseError)}. ` +
120
+ `When using forceKillOnTimeout, handlers must be serializable functions without closures over external variables.`,
121
+ );
122
+ }
123
+ } catch (error) {
124
+ if (error instanceof Error) {
125
+ throw error;
126
+ }
127
+ throw new Error(
128
+ `Failed to validate handler serialization for job type "${jobType}": ${String(error)}`,
129
+ );
130
+ }
131
+ }
132
+
133
+ /**
134
+ * Run a handler in a worker thread for force-kill capability.
135
+ *
136
+ * **IMPORTANT**: The handler must be serializable for this to work. This means:
137
+ * - The handler should be a standalone function or arrow function
138
+ * - It should not capture variables from outer scopes (closures) that reference external dependencies
139
+ * - It should not use 'this' context unless it's a bound method
140
+ * - All dependencies must be importable in the worker thread context
141
+ *
142
+ * If your handler doesn't meet these requirements, use the default graceful shutdown
143
+ * (forceKillOnTimeout: false) and ensure your handler checks signal.aborted.
144
+ *
145
+ * @throws {Error} If the handler cannot be serialized
146
+ */
147
+ async function runHandlerInWorker<
148
+ PayloadMap,
149
+ T extends keyof PayloadMap & string,
150
+ >(
151
+ handler: JobHandler<PayloadMap, T>,
152
+ payload: PayloadMap[T],
153
+ timeoutMs: number,
154
+ jobType: string,
155
+ ): Promise<void> {
156
+ // Validate handler can be serialized before attempting to run in worker
157
+ validateHandlerSerializable(handler, jobType);
158
+
159
+ return new Promise((resolve, reject) => {
160
+ // Use inline worker code for better compatibility
161
+ // Note: This requires the handler to be serializable (no closures with external dependencies)
162
+ // Wrap in IIFE to allow return statements
163
+ const workerCode = `
164
+ (function() {
165
+ const { parentPort, workerData } = require('worker_threads');
166
+ const { handlerCode, payload, timeoutMs } = workerData;
167
+
168
+ // Create an AbortController for the handler
169
+ const controller = new AbortController();
170
+ const signal = controller.signal;
171
+
172
+ // Set up timeout
173
+ const timeoutId = setTimeout(() => {
174
+ controller.abort();
175
+ parentPort.postMessage({ type: 'timeout' });
176
+ }, timeoutMs);
177
+
178
+ try {
179
+ // Execute the handler
180
+ // Note: This uses Function constructor which requires the handler to be serializable.
181
+ // The handler should be validated before reaching this point.
182
+ let handlerFn;
183
+ try {
184
+ // Wrap handlerCode in parentheses to ensure it's treated as an expression
185
+ // This handles both arrow functions and regular functions
186
+ const wrappedCode = handlerCode.trim().startsWith('async') || handlerCode.trim().startsWith('function')
187
+ ? handlerCode
188
+ : '(' + handlerCode + ')';
189
+ handlerFn = new Function('return ' + wrappedCode)();
190
+ } catch (parseError) {
191
+ clearTimeout(timeoutId);
192
+ parentPort.postMessage({
193
+ type: 'error',
194
+ error: {
195
+ message: 'Handler cannot be deserialized in worker thread. ' +
196
+ 'Ensure your handler is a standalone function without closures over external variables. ' +
197
+ 'Original error: ' + (parseError instanceof Error ? parseError.message : String(parseError)),
198
+ stack: parseError instanceof Error ? parseError.stack : undefined,
199
+ name: 'SerializationError',
200
+ },
201
+ });
202
+ return;
203
+ }
204
+
205
+ // Ensure handlerFn is actually a function
206
+ if (typeof handlerFn !== 'function') {
207
+ clearTimeout(timeoutId);
208
+ parentPort.postMessage({
209
+ type: 'error',
210
+ error: {
211
+ message: 'Handler deserialization did not produce a function. ' +
212
+ 'Ensure your handler is a valid function when forceKillOnTimeout is enabled.',
213
+ name: 'SerializationError',
214
+ },
215
+ });
216
+ return;
217
+ }
218
+
219
+ handlerFn(payload, signal)
220
+ .then(() => {
221
+ clearTimeout(timeoutId);
222
+ parentPort.postMessage({ type: 'success' });
223
+ })
224
+ .catch((error) => {
225
+ clearTimeout(timeoutId);
226
+ parentPort.postMessage({
227
+ type: 'error',
228
+ error: {
229
+ message: error.message,
230
+ stack: error.stack,
231
+ name: error.name,
232
+ },
233
+ });
234
+ });
235
+ } catch (error) {
236
+ clearTimeout(timeoutId);
237
+ parentPort.postMessage({
238
+ type: 'error',
239
+ error: {
240
+ message: error.message,
241
+ stack: error.stack,
242
+ name: error.name,
243
+ },
244
+ });
245
+ }
246
+ })();
247
+ `;
248
+
249
+ const worker = new Worker(workerCode, {
250
+ eval: true,
251
+ workerData: {
252
+ handlerCode: handler.toString(),
253
+ payload,
254
+ timeoutMs,
255
+ },
256
+ });
257
+
258
+ let resolved = false;
259
+
260
+ worker.on('message', (message: { type: string; error?: any }) => {
261
+ if (resolved) return;
262
+ resolved = true;
263
+
264
+ if (message.type === 'success') {
265
+ resolve();
266
+ } else if (message.type === 'timeout') {
267
+ const timeoutError = new Error(
268
+ `Job timed out after ${timeoutMs} ms and was forcefully terminated`,
269
+ );
270
+ // @ts-ignore
271
+ timeoutError.failureReason = FailureReason.Timeout;
272
+ reject(timeoutError);
273
+ } else if (message.type === 'error') {
274
+ const error = new Error(message.error.message);
275
+ error.stack = message.error.stack;
276
+ error.name = message.error.name;
277
+ reject(error);
278
+ }
279
+ });
280
+
281
+ worker.on('error', (error) => {
282
+ if (resolved) return;
283
+ resolved = true;
284
+ reject(error);
285
+ });
286
+
287
+ worker.on('exit', (code) => {
288
+ if (resolved) return;
289
+ if (code !== 0) {
290
+ resolved = true;
291
+ reject(new Error(`Worker stopped with exit code ${code}`));
292
+ }
293
+ });
294
+
295
+ // Force terminate worker on timeout
296
+ setTimeout(() => {
297
+ if (!resolved) {
298
+ resolved = true;
299
+ worker
300
+ .terminate()
301
+ .then(() => {
302
+ const timeoutError = new Error(
303
+ `Job timed out after ${timeoutMs} ms and was forcefully terminated`,
304
+ );
305
+ // @ts-ignore
306
+ timeoutError.failureReason = FailureReason.Timeout;
307
+ reject(timeoutError);
308
+ })
309
+ .catch((err) => {
310
+ reject(err);
311
+ });
312
+ }
313
+ }, timeoutMs + 100); // Small buffer to ensure timeout is handled
314
+ });
315
+ }
316
+
317
+ /**
318
+ * Convert a WaitDuration to a target Date.
319
+ */
320
+ function calculateWaitUntil(duration: WaitDuration): Date {
321
+ const now = Date.now();
322
+ let ms = 0;
323
+ if (duration.seconds) ms += duration.seconds * 1000;
324
+ if (duration.minutes) ms += duration.minutes * 60 * 1000;
325
+ if (duration.hours) ms += duration.hours * 60 * 60 * 1000;
326
+ if (duration.days) ms += duration.days * 24 * 60 * 60 * 1000;
327
+ if (duration.weeks) ms += duration.weeks * 7 * 24 * 60 * 60 * 1000;
328
+ if (duration.months) ms += duration.months * 30 * 24 * 60 * 60 * 1000;
329
+ if (duration.years) ms += duration.years * 365 * 24 * 60 * 60 * 1000;
330
+ if (ms <= 0) {
331
+ throw new Error(
332
+ 'waitFor duration must be positive. Provide at least one positive duration field.',
333
+ );
334
+ }
335
+ return new Date(now + ms);
336
+ }
337
+
338
+ /**
339
+ * Create a no-op JobContext for cases where prolong/onTimeout are not supported
340
+ * (e.g. forceKillOnTimeout mode or no timeout set).
341
+ */
342
+ function createNoOpContext(
343
+ backend: QueueBackend,
344
+ jobId: number,
345
+ reason: string,
346
+ ): JobContext {
347
+ return {
348
+ prolong: () => {
349
+ log(`prolong() called but ignored: ${reason}`);
350
+ },
351
+ onTimeout: () => {
352
+ log(`onTimeout() called but ignored: ${reason}`);
353
+ },
354
+ run: async <T>(_stepName: string, fn: () => Promise<T>): Promise<T> => {
355
+ // In no-op context (forceKillOnTimeout), just execute the function directly
356
+ return fn();
357
+ },
358
+ waitFor: async () => {
359
+ throw new Error(
360
+ `waitFor() is not supported when forceKillOnTimeout is enabled. ${reason}`,
361
+ );
362
+ },
363
+ waitUntil: async () => {
364
+ throw new Error(
365
+ `waitUntil() is not supported when forceKillOnTimeout is enabled. ${reason}`,
366
+ );
367
+ },
368
+ createToken: async () => {
369
+ throw new Error(
370
+ `createToken() is not supported when forceKillOnTimeout is enabled. ${reason}`,
371
+ );
372
+ },
373
+ waitForToken: async () => {
374
+ throw new Error(
375
+ `waitForToken() is not supported when forceKillOnTimeout is enabled. ${reason}`,
376
+ );
377
+ },
378
+ setProgress: async (percent: number) => {
379
+ if (percent < 0 || percent > 100)
380
+ throw new Error('Progress must be between 0 and 100');
381
+ await backend.updateProgress(jobId, Math.round(percent));
382
+ },
383
+ };
384
+ }
385
+
386
+ /**
387
+ * Pre-process stepData before handler re-invocation.
388
+ * Marks pending waits as completed and fetches token outputs.
389
+ */
390
+ async function resolveCompletedWaits(
391
+ pool: Pool,
392
+ stepData: Record<string, any>,
393
+ ): Promise<void> {
394
+ for (const key of Object.keys(stepData)) {
395
+ if (!key.startsWith('__wait_')) continue;
396
+ const entry = stepData[key];
397
+ if (!entry || typeof entry !== 'object' || entry.completed) continue;
398
+
399
+ if (entry.type === 'duration' || entry.type === 'date') {
400
+ // Time-based wait has elapsed (we got picked up, so it must have)
401
+ stepData[key] = { ...entry, completed: true };
402
+ } else if (entry.type === 'token' && entry.tokenId) {
403
+ // Token-based wait -- fetch the waitpoint result
404
+ const wp = await getWaitpoint(pool, entry.tokenId);
405
+ if (wp && wp.status === 'completed') {
406
+ stepData[key] = {
407
+ ...entry,
408
+ completed: true,
409
+ result: { ok: true, output: wp.output },
410
+ };
411
+ } else if (wp && wp.status === 'timed_out') {
412
+ stepData[key] = {
413
+ ...entry,
414
+ completed: true,
415
+ result: { ok: false, error: 'Token timed out' },
416
+ };
417
+ }
418
+ // If still waiting (shouldn't happen), leave as pending
419
+ }
420
+ }
421
+ }
422
+
423
+ /**
424
+ * Build the extended JobContext with step tracking and wait support.
425
+ */
426
+ function buildWaitContext(
427
+ backend: QueueBackend,
428
+ pool: Pool,
429
+ jobId: number,
430
+ stepData: Record<string, any>,
431
+ baseCtx: {
432
+ prolong: JobContext['prolong'];
433
+ onTimeout: JobContext['onTimeout'];
434
+ },
435
+ ): JobContext {
436
+ // Wait counter always starts at 0 per invocation.
437
+ // The handler replays from the top each time, so the counter position
438
+ // must match the order of waitFor/waitUntil/waitForToken calls in code.
439
+ let waitCounter = 0;
440
+
441
+ const ctx: JobContext = {
442
+ prolong: baseCtx.prolong,
443
+ onTimeout: baseCtx.onTimeout,
444
+
445
+ run: async <T>(stepName: string, fn: () => Promise<T>): Promise<T> => {
446
+ // Check if step was already completed in a previous invocation
447
+ const cached = stepData[stepName];
448
+ if (cached && typeof cached === 'object' && cached.__completed) {
449
+ log(`Step "${stepName}" replayed from cache for job ${jobId}`);
450
+ return cached.result as T;
451
+ }
452
+
453
+ // Execute the step
454
+ const result = await fn();
455
+
456
+ // Persist step result
457
+ stepData[stepName] = { __completed: true, result };
458
+ await updateStepData(pool, jobId, stepData);
459
+
460
+ return result;
461
+ },
462
+
463
+ waitFor: async (duration: WaitDuration): Promise<void> => {
464
+ const waitKey = `__wait_${waitCounter++}`;
465
+
466
+ // Check if this wait was already completed (from a previous invocation)
467
+ const cached = stepData[waitKey];
468
+ if (cached && typeof cached === 'object' && cached.completed) {
469
+ log(`Wait "${waitKey}" already completed for job ${jobId}, skipping`);
470
+ return;
471
+ }
472
+
473
+ // Calculate when to resume
474
+ const waitUntilDate = calculateWaitUntil(duration);
475
+
476
+ // Record this wait as pending in step data
477
+ stepData[waitKey] = { type: 'duration', completed: false };
478
+
479
+ // Throw WaitSignal to pause the handler
480
+ throw new WaitSignal('duration', waitUntilDate, undefined, stepData);
481
+ },
482
+
483
+ waitUntil: async (date: Date): Promise<void> => {
484
+ const waitKey = `__wait_${waitCounter++}`;
485
+
486
+ // Check if this wait was already completed
487
+ const cached = stepData[waitKey];
488
+ if (cached && typeof cached === 'object' && cached.completed) {
489
+ log(`Wait "${waitKey}" already completed for job ${jobId}, skipping`);
490
+ return;
491
+ }
492
+
493
+ // Record this wait as pending
494
+ stepData[waitKey] = { type: 'date', completed: false };
495
+
496
+ // Throw WaitSignal to pause the handler
497
+ throw new WaitSignal('date', date, undefined, stepData);
498
+ },
499
+
500
+ createToken: async (options?) => {
501
+ const token = await createWaitpoint(pool, jobId, options);
502
+ return token;
503
+ },
504
+
505
+ waitForToken: async <T = any>(
506
+ tokenId: string,
507
+ ): Promise<WaitTokenResult<T>> => {
508
+ const waitKey = `__wait_${waitCounter++}`;
509
+
510
+ // Check if this wait was already completed
511
+ const cached = stepData[waitKey];
512
+ if (cached && typeof cached === 'object' && cached.completed) {
513
+ log(
514
+ `Token wait "${waitKey}" already completed for job ${jobId}, returning cached result`,
515
+ );
516
+ return cached.result as WaitTokenResult<T>;
517
+ }
518
+
519
+ // Check if the token is already completed (e.g., completed while job was still processing)
520
+ const wp = await getWaitpoint(pool, tokenId);
521
+ if (wp && wp.status === 'completed') {
522
+ const result: WaitTokenResult<T> = {
523
+ ok: true,
524
+ output: wp.output as T,
525
+ };
526
+ stepData[waitKey] = {
527
+ type: 'token',
528
+ tokenId,
529
+ completed: true,
530
+ result,
531
+ };
532
+ await updateStepData(pool, jobId, stepData);
533
+ return result;
534
+ }
535
+ if (wp && wp.status === 'timed_out') {
536
+ const result: WaitTokenResult<T> = {
537
+ ok: false,
538
+ error: 'Token timed out',
539
+ };
540
+ stepData[waitKey] = {
541
+ type: 'token',
542
+ tokenId,
543
+ completed: true,
544
+ result,
545
+ };
546
+ await updateStepData(pool, jobId, stepData);
547
+ return result;
548
+ }
549
+
550
+ // Token not yet completed -- save pending state and throw WaitSignal
551
+ stepData[waitKey] = { type: 'token', tokenId, completed: false };
552
+ throw new WaitSignal('token', undefined, tokenId, stepData);
553
+ },
554
+
555
+ setProgress: async (percent: number) => {
556
+ if (percent < 0 || percent > 100)
557
+ throw new Error('Progress must be between 0 and 100');
558
+ await backend.updateProgress(jobId, Math.round(percent));
559
+ },
560
+ };
561
+
562
+ return ctx;
563
+ }
564
+
19
565
  /**
20
566
  * Process a single job using the provided handler map
21
567
  */
@@ -23,20 +569,18 @@ export async function processJobWithHandlers<
23
569
  PayloadMap,
24
570
  T extends keyof PayloadMap & string,
25
571
  >(
26
- pool: Pool,
572
+ backend: QueueBackend,
27
573
  job: JobRecord<PayloadMap, T>,
28
574
  jobHandlers: JobHandlers<PayloadMap>,
29
575
  ): Promise<void> {
30
576
  const handler = jobHandlers[job.jobType];
31
577
 
32
578
  if (!handler) {
33
- await setPendingReasonForUnpickedJobs(
34
- pool,
579
+ await backend.setPendingReasonForUnpickedJobs(
35
580
  `No handler registered for job type: ${job.jobType}`,
36
581
  job.jobType,
37
582
  );
38
- await failJob(
39
- pool,
583
+ await backend.failJob(
40
584
  job.id,
41
585
  new Error(`No handler registered for job type: ${job.jobType}`),
42
586
  FailureReason.NoHandler,
@@ -44,46 +588,174 @@ export async function processJobWithHandlers<
44
588
  return;
45
589
  }
46
590
 
591
+ // Load step data (may contain completed steps from previous invocations)
592
+ const stepData: Record<string, any> = { ...(job.stepData || {}) };
593
+
594
+ // Try to get pool for wait features (PostgreSQL-only)
595
+ const pool = tryExtractPool(backend);
596
+
597
+ // If resuming from a wait, resolve any pending wait entries
598
+ const hasStepHistory = Object.keys(stepData).some((k) =>
599
+ k.startsWith('__wait_'),
600
+ );
601
+ if (hasStepHistory && pool) {
602
+ await resolveCompletedWaits(pool, stepData);
603
+ // Persist the resolved step data
604
+ await updateStepData(pool, job.id, stepData);
605
+ }
606
+
47
607
  // Per-job timeout logic
48
608
  const timeoutMs = job.timeoutMs ?? undefined;
609
+ const forceKillOnTimeout = job.forceKillOnTimeout ?? false;
49
610
  let timeoutId: NodeJS.Timeout | undefined;
50
611
  const controller = new AbortController();
51
612
  try {
52
- const jobPromise = handler(job.payload, controller.signal);
53
- if (timeoutMs && timeoutMs > 0) {
54
- await Promise.race([
55
- jobPromise,
56
- new Promise((_, reject) => {
57
- timeoutId = setTimeout(() => {
58
- controller.abort();
59
- const timeoutError = new Error(
60
- `Job timed out after ${timeoutMs} ms`,
61
- );
62
- // @ts-ignore
63
- timeoutError.failureReason = FailureReason.Timeout;
64
- reject(timeoutError);
65
- }, timeoutMs);
66
- }),
67
- ]);
613
+ // If forceKillOnTimeout is true, run handler in a worker thread
614
+ // Note: wait features are not available in forceKillOnTimeout mode
615
+ if (forceKillOnTimeout && timeoutMs && timeoutMs > 0) {
616
+ await runHandlerInWorker(handler, job.payload, timeoutMs, job.jobType);
68
617
  } else {
69
- await jobPromise;
618
+ // Build the JobContext for prolong/onTimeout support
619
+ let onTimeoutCallback: OnTimeoutCallback | undefined;
620
+
621
+ // Reference to the reject function of the timeout promise so we can re-arm it
622
+ let timeoutReject: ((error: Error) => void) | undefined;
623
+
624
+ /**
625
+ * Arms (or re-arms) the timeout. When it fires:
626
+ * 1. If an onTimeout callback is registered, call it first.
627
+ * - If it returns a positive number, re-arm with that duration and update DB.
628
+ * - Otherwise, proceed with abort.
629
+ * 2. If no onTimeout callback, proceed with abort.
630
+ */
631
+ const armTimeout = (ms: number) => {
632
+ if (timeoutId) clearTimeout(timeoutId);
633
+ timeoutId = setTimeout(() => {
634
+ // Check if an onTimeout callback wants to extend
635
+ if (onTimeoutCallback) {
636
+ try {
637
+ const extension = onTimeoutCallback();
638
+ if (typeof extension === 'number' && extension > 0) {
639
+ // Extend: re-arm timeout and update DB
640
+ backend.prolongJob(job.id).catch(() => {});
641
+ armTimeout(extension);
642
+ return;
643
+ }
644
+ } catch (callbackError) {
645
+ log(
646
+ `onTimeout callback threw for job ${job.id}: ${callbackError}`,
647
+ );
648
+ // Treat as "no extension" and proceed with abort
649
+ }
650
+ }
651
+ // No extension -- proceed with abort
652
+ controller.abort();
653
+ const timeoutError = new Error(`Job timed out after ${ms} ms`);
654
+ // @ts-ignore
655
+ timeoutError.failureReason = FailureReason.Timeout;
656
+ if (timeoutReject) {
657
+ timeoutReject(timeoutError);
658
+ }
659
+ }, ms);
660
+ };
661
+
662
+ const hasTimeout = timeoutMs != null && timeoutMs > 0;
663
+
664
+ // Build base prolong/onTimeout context
665
+ const baseCtx = hasTimeout
666
+ ? {
667
+ prolong: (ms?: number) => {
668
+ const duration = ms ?? timeoutMs;
669
+ if (duration != null && duration > 0) {
670
+ armTimeout(duration);
671
+ // Update DB locked_at to prevent reclaimStuckJobs
672
+ backend.prolongJob(job.id).catch(() => {});
673
+ }
674
+ },
675
+ onTimeout: (callback: OnTimeoutCallback) => {
676
+ onTimeoutCallback = callback;
677
+ },
678
+ }
679
+ : {
680
+ prolong: () => {
681
+ log('prolong() called but ignored: job has no timeout set');
682
+ },
683
+ onTimeout: () => {
684
+ log('onTimeout() called but ignored: job has no timeout set');
685
+ },
686
+ };
687
+
688
+ // Build context: full wait support for PostgreSQL, basic for others
689
+ const ctx = pool
690
+ ? buildWaitContext(backend, pool, job.id, stepData, baseCtx)
691
+ : buildBasicContext(backend, job.id, baseCtx);
692
+
693
+ // If forceKillOnTimeout was set but timeoutMs was missing, warn
694
+ if (forceKillOnTimeout && !hasTimeout) {
695
+ log(
696
+ `forceKillOnTimeout is set but no timeoutMs for job ${job.id}, running without force kill`,
697
+ );
698
+ }
699
+
700
+ const jobPromise = handler(job.payload, controller.signal, ctx);
701
+
702
+ if (hasTimeout) {
703
+ await Promise.race([
704
+ jobPromise,
705
+ new Promise<never>((_, reject) => {
706
+ timeoutReject = reject;
707
+ armTimeout(timeoutMs!);
708
+ }),
709
+ ]);
710
+ } else {
711
+ await jobPromise;
712
+ }
70
713
  }
71
714
  if (timeoutId) clearTimeout(timeoutId);
72
- await completeJob(pool, job.id);
715
+
716
+ // Job completed successfully -- complete via backend
717
+ await backend.completeJob(job.id);
73
718
  } catch (error) {
74
719
  if (timeoutId) clearTimeout(timeoutId);
720
+
721
+ // Check if this is a WaitSignal (not a real error)
722
+ if (error instanceof WaitSignal) {
723
+ if (!pool) {
724
+ // Wait signals should never happen with non-PostgreSQL backends
725
+ // since the context methods throw, but guard just in case
726
+ await backend.failJob(
727
+ job.id,
728
+ new Error(
729
+ 'WaitSignal received but wait features require the PostgreSQL backend.',
730
+ ),
731
+ FailureReason.HandlerError,
732
+ );
733
+ return;
734
+ }
735
+ log(
736
+ `Job ${job.id} entering wait: type=${error.type}, waitUntil=${error.waitUntil?.toISOString() ?? 'none'}, tokenId=${error.tokenId ?? 'none'}`,
737
+ );
738
+ await waitJob(pool, job.id, {
739
+ waitUntil: error.waitUntil,
740
+ waitTokenId: error.tokenId,
741
+ stepData: error.stepData,
742
+ });
743
+ return;
744
+ }
745
+
746
+ // Real error -- handle as failure
75
747
  console.error(`Error processing job ${job.id}:`, error);
76
748
  let failureReason = FailureReason.HandlerError;
77
749
  if (
78
750
  error &&
79
751
  typeof error === 'object' &&
80
752
  'failureReason' in error &&
81
- (error as any).failureReason === FailureReason.Timeout
753
+ (error as { failureReason?: FailureReason }).failureReason ===
754
+ FailureReason.Timeout
82
755
  ) {
83
756
  failureReason = FailureReason.Timeout;
84
757
  }
85
- await failJob(
86
- pool,
758
+ await backend.failJob(
87
759
  job.id,
88
760
  error instanceof Error ? error : new Error(String(error)),
89
761
  failureReason,
@@ -95,15 +767,15 @@ export async function processJobWithHandlers<
95
767
  * Process a batch of jobs using the provided handler map and concurrency limit
96
768
  */
97
769
  export async function processBatchWithHandlers<PayloadMap>(
98
- pool: Pool,
770
+ backend: QueueBackend,
99
771
  workerId: string,
100
772
  batchSize: number,
101
773
  jobType: string | string[] | undefined,
102
774
  jobHandlers: JobHandlers<PayloadMap>,
103
775
  concurrency?: number,
776
+ onError?: (error: Error) => void,
104
777
  ): Promise<number> {
105
- const jobs = await getNextBatch<PayloadMap, JobType<PayloadMap>>(
106
- pool,
778
+ const jobs = await backend.getNextBatch<PayloadMap, JobType<PayloadMap>>(
107
779
  workerId,
108
780
  batchSize,
109
781
  jobType,
@@ -111,7 +783,7 @@ export async function processBatchWithHandlers<PayloadMap>(
111
783
  if (!concurrency || concurrency >= jobs.length) {
112
784
  // Default: all in parallel
113
785
  await Promise.all(
114
- jobs.map((job) => processJobWithHandlers(pool, job, jobHandlers)),
786
+ jobs.map((job) => processJobWithHandlers(backend, job, jobHandlers)),
115
787
  );
116
788
  return jobs.length;
117
789
  }
@@ -125,7 +797,7 @@ export async function processBatchWithHandlers<PayloadMap>(
125
797
  while (running < concurrency && idx < jobs.length) {
126
798
  const job = jobs[idx++];
127
799
  running++;
128
- processJobWithHandlers(pool, job, jobHandlers)
800
+ processJobWithHandlers(backend, job, jobHandlers)
129
801
  .then(() => {
130
802
  running--;
131
803
  finished++;
@@ -134,6 +806,9 @@ export async function processBatchWithHandlers<PayloadMap>(
134
806
  .catch((err) => {
135
807
  running--;
136
808
  finished++;
809
+ if (onError) {
810
+ onError(err instanceof Error ? err : new Error(String(err)));
811
+ }
137
812
  next();
138
813
  });
139
814
  }
@@ -144,13 +819,13 @@ export async function processBatchWithHandlers<PayloadMap>(
144
819
 
145
820
  /**
146
821
  * Start a job processor that continuously processes jobs
147
- * @param pool - The database pool
822
+ * @param backend - The queue backend
148
823
  * @param handlers - The job handlers for this processor instance
149
824
  * @param options - The processor options. Leave pollInterval empty to run only once. Use jobType to filter jobs by type.
150
825
  * @returns {Processor} The processor instance
151
826
  */
152
827
  export const createProcessor = <PayloadMap = any>(
153
- pool: Pool,
828
+ backend: QueueBackend,
154
829
  handlers: JobHandlers<PayloadMap>,
155
830
  options: ProcessorOptions = {},
156
831
  ): Processor => {
@@ -165,6 +840,7 @@ export const createProcessor = <PayloadMap = any>(
165
840
 
166
841
  let running = false;
167
842
  let intervalId: NodeJS.Timeout | null = null;
843
+ let currentBatchPromise: Promise<number> | null = null;
168
844
 
169
845
  setLogContext(options.verbose ?? false);
170
846
 
@@ -177,12 +853,13 @@ export const createProcessor = <PayloadMap = any>(
177
853
 
178
854
  try {
179
855
  const processed = await processBatchWithHandlers(
180
- pool,
856
+ backend,
181
857
  workerId,
182
858
  batchSize,
183
859
  jobType,
184
860
  handlers,
185
861
  concurrency,
862
+ onError,
186
863
  );
187
864
  // Only process one batch in start; do not schedule next batch here
188
865
  return processed;
@@ -203,28 +880,63 @@ export const createProcessor = <PayloadMap = any>(
203
880
 
204
881
  log(`Starting job processor with workerId: ${workerId}`);
205
882
  running = true;
206
- // Background: process batches repeatedly if needed
207
- const processBatches = async () => {
883
+
884
+ // Single serialized loop: process a batch, then either immediately
885
+ // continue (if full batch was returned) or wait pollInterval.
886
+ const scheduleNext = (immediate: boolean) => {
208
887
  if (!running) return;
209
- const processed = await processJobs();
210
- if (processed === batchSize && running) {
211
- setImmediate(processBatches);
888
+ if (immediate) {
889
+ intervalId = setTimeout(loop, 0);
890
+ } else {
891
+ intervalId = setTimeout(loop, pollInterval);
212
892
  }
213
893
  };
214
- processBatches(); // Process immediately on start
215
- intervalId = setInterval(processJobs, pollInterval);
894
+
895
+ const loop = async () => {
896
+ if (!running) return;
897
+ currentBatchPromise = processJobs();
898
+ const processed = await currentBatchPromise;
899
+ currentBatchPromise = null;
900
+ // If we got a full batch, there may be more work — process immediately
901
+ scheduleNext(processed === batchSize);
902
+ };
903
+
904
+ // Start the first iteration immediately
905
+ loop();
216
906
  },
217
907
  /**
218
- * Stop the job processor that runs in the background
908
+ * Stop the job processor that runs in the background.
909
+ * Does not wait for in-flight jobs.
219
910
  */
220
911
  stop: () => {
221
912
  log(`Stopping job processor with workerId: ${workerId}`);
222
913
  running = false;
223
914
  if (intervalId) {
224
- clearInterval(intervalId);
915
+ clearTimeout(intervalId);
225
916
  intervalId = null;
226
917
  }
227
918
  },
919
+ /**
920
+ * Stop the job processor and wait for all in-flight jobs to complete.
921
+ * Useful for graceful shutdown (e.g., SIGTERM handling).
922
+ */
923
+ stopAndDrain: async (drainTimeoutMs = 30000) => {
924
+ log(`Stopping and draining job processor with workerId: ${workerId}`);
925
+ running = false;
926
+ if (intervalId) {
927
+ clearTimeout(intervalId);
928
+ intervalId = null;
929
+ }
930
+ // Wait for current batch to finish, with a timeout
931
+ if (currentBatchPromise) {
932
+ await Promise.race([
933
+ currentBatchPromise.catch(() => {}),
934
+ new Promise<void>((resolve) => setTimeout(resolve, drainTimeoutMs)),
935
+ ]);
936
+ currentBatchPromise = null;
937
+ }
938
+ log(`Job processor ${workerId} drained`);
939
+ },
228
940
  /**
229
941
  * Start the job processor synchronously.
230
942
  * - This will process all jobs immediately and then stop.