bunqueue 1.9.1 → 1.9.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (165) hide show
  1. package/dist/application/operations/queueControl.d.ts.map +1 -1
  2. package/dist/application/operations/queueControl.js +6 -1
  3. package/dist/application/operations/queueControl.js.map +1 -1
  4. package/dist/application/queueManager.d.ts +115 -9
  5. package/dist/application/queueManager.d.ts.map +1 -1
  6. package/dist/application/queueManager.js +535 -26
  7. package/dist/application/queueManager.js.map +1 -1
  8. package/dist/cli/client.d.ts +6 -0
  9. package/dist/cli/client.d.ts.map +1 -1
  10. package/dist/cli/client.js +60 -48
  11. package/dist/cli/client.js.map +1 -1
  12. package/dist/cli/commands/server.d.ts.map +1 -1
  13. package/dist/cli/commands/server.js +30 -14
  14. package/dist/cli/commands/server.js.map +1 -1
  15. package/dist/cli/help.d.ts.map +1 -1
  16. package/dist/cli/help.js +10 -6
  17. package/dist/cli/help.js.map +1 -1
  18. package/dist/cli/index.d.ts.map +1 -1
  19. package/dist/cli/index.js +15 -2
  20. package/dist/cli/index.js.map +1 -1
  21. package/dist/client/queue/dlqOps.d.ts +24 -0
  22. package/dist/client/queue/dlqOps.d.ts.map +1 -0
  23. package/dist/client/queue/dlqOps.js +73 -0
  24. package/dist/client/queue/dlqOps.js.map +1 -0
  25. package/dist/client/queue/helpers.d.ts +20 -0
  26. package/dist/client/queue/helpers.d.ts.map +1 -0
  27. package/dist/client/queue/helpers.js +34 -0
  28. package/dist/client/queue/helpers.js.map +1 -0
  29. package/dist/client/queue/index.d.ts +8 -0
  30. package/dist/client/queue/index.d.ts.map +1 -0
  31. package/dist/client/queue/index.js +8 -0
  32. package/dist/client/queue/index.js.map +1 -0
  33. package/dist/client/queue/queue.d.ts +60 -0
  34. package/dist/client/queue/queue.d.ts.map +1 -0
  35. package/dist/client/queue/queue.js +322 -0
  36. package/dist/client/queue/queue.js.map +1 -0
  37. package/dist/client/queue.d.ts +3 -78
  38. package/dist/client/queue.d.ts.map +1 -1
  39. package/dist/client/queue.js +3 -463
  40. package/dist/client/queue.js.map +1 -1
  41. package/dist/client/sandboxed/index.d.ts +8 -0
  42. package/dist/client/sandboxed/index.d.ts.map +1 -0
  43. package/dist/client/sandboxed/index.js +7 -0
  44. package/dist/client/sandboxed/index.js.map +1 -0
  45. package/dist/client/sandboxed/types.d.ts +62 -0
  46. package/dist/client/sandboxed/types.d.ts.map +1 -0
  47. package/dist/client/sandboxed/types.js +6 -0
  48. package/dist/client/sandboxed/types.js.map +1 -0
  49. package/dist/client/sandboxed/worker.d.ts +38 -0
  50. package/dist/client/sandboxed/worker.d.ts.map +1 -0
  51. package/dist/client/sandboxed/worker.js +176 -0
  52. package/dist/client/sandboxed/worker.js.map +1 -0
  53. package/dist/client/sandboxed/wrapper.d.ts +13 -0
  54. package/dist/client/sandboxed/wrapper.d.ts.map +1 -0
  55. package/dist/client/sandboxed/wrapper.js +65 -0
  56. package/dist/client/sandboxed/wrapper.js.map +1 -0
  57. package/dist/client/sandboxedWorker.d.ts +4 -87
  58. package/dist/client/sandboxedWorker.d.ts.map +1 -1
  59. package/dist/client/sandboxedWorker.js +3 -296
  60. package/dist/client/sandboxedWorker.js.map +1 -1
  61. package/dist/client/tcp/client.d.ts +40 -0
  62. package/dist/client/tcp/client.d.ts.map +1 -0
  63. package/dist/client/tcp/client.js +289 -0
  64. package/dist/client/tcp/client.js.map +1 -0
  65. package/dist/client/tcp/connection.d.ts +57 -0
  66. package/dist/client/tcp/connection.d.ts.map +1 -0
  67. package/dist/client/tcp/connection.js +162 -0
  68. package/dist/client/tcp/connection.js.map +1 -0
  69. package/dist/client/tcp/health.d.ts +47 -0
  70. package/dist/client/tcp/health.d.ts.map +1 -0
  71. package/dist/client/tcp/health.js +95 -0
  72. package/dist/client/tcp/health.js.map +1 -0
  73. package/dist/client/tcp/index.d.ts +13 -0
  74. package/dist/client/tcp/index.d.ts.map +1 -0
  75. package/dist/client/tcp/index.js +12 -0
  76. package/dist/client/tcp/index.js.map +1 -0
  77. package/dist/client/tcp/lineBuffer.d.ts +17 -0
  78. package/dist/client/tcp/lineBuffer.d.ts.map +1 -0
  79. package/dist/client/tcp/lineBuffer.js +32 -0
  80. package/dist/client/tcp/lineBuffer.js.map +1 -0
  81. package/dist/client/tcp/reconnect.d.ts +38 -0
  82. package/dist/client/tcp/reconnect.d.ts.map +1 -0
  83. package/dist/client/tcp/reconnect.js +70 -0
  84. package/dist/client/tcp/reconnect.js.map +1 -0
  85. package/dist/client/tcp/shared.d.ts +11 -0
  86. package/dist/client/tcp/shared.d.ts.map +1 -0
  87. package/dist/client/tcp/shared.js +20 -0
  88. package/dist/client/tcp/shared.js.map +1 -0
  89. package/dist/client/tcp/types.d.ts +76 -0
  90. package/dist/client/tcp/types.d.ts.map +1 -0
  91. package/dist/client/tcp/types.js +20 -0
  92. package/dist/client/tcp/types.js.map +1 -0
  93. package/dist/client/tcpClient.d.ts +4 -110
  94. package/dist/client/tcpClient.d.ts.map +1 -1
  95. package/dist/client/tcpClient.js +3 -523
  96. package/dist/client/tcpClient.js.map +1 -1
  97. package/dist/client/tcpPool.d.ts +3 -0
  98. package/dist/client/tcpPool.d.ts.map +1 -1
  99. package/dist/client/tcpPool.js +21 -2
  100. package/dist/client/tcpPool.js.map +1 -1
  101. package/dist/client/types.d.ts +11 -2
  102. package/dist/client/types.d.ts.map +1 -1
  103. package/dist/client/types.js.map +1 -1
  104. package/dist/client/worker/ackBatcher.d.ts +40 -0
  105. package/dist/client/worker/ackBatcher.d.ts.map +1 -0
  106. package/dist/client/worker/ackBatcher.js +137 -0
  107. package/dist/client/worker/ackBatcher.js.map +1 -0
  108. package/dist/client/worker/index.d.ts +11 -0
  109. package/dist/client/worker/index.d.ts.map +1 -0
  110. package/dist/client/worker/index.js +10 -0
  111. package/dist/client/worker/index.js.map +1 -0
  112. package/dist/client/worker/jobParser.d.ts +10 -0
  113. package/dist/client/worker/jobParser.d.ts.map +1 -0
  114. package/dist/client/worker/jobParser.js +43 -0
  115. package/dist/client/worker/jobParser.js.map +1 -0
  116. package/dist/client/worker/processor.d.ts +24 -0
  117. package/dist/client/worker/processor.d.ts.map +1 -0
  118. package/dist/client/worker/processor.js +86 -0
  119. package/dist/client/worker/processor.js.map +1 -0
  120. package/dist/client/worker/types.d.ts +38 -0
  121. package/dist/client/worker/types.d.ts.map +1 -0
  122. package/dist/client/worker/types.js +14 -0
  123. package/dist/client/worker/types.js.map +1 -0
  124. package/dist/client/worker/worker.d.ts +53 -0
  125. package/dist/client/worker/worker.d.ts.map +1 -0
  126. package/dist/client/worker/worker.js +367 -0
  127. package/dist/client/worker/worker.js.map +1 -0
  128. package/dist/client/worker.d.ts +3 -69
  129. package/dist/client/worker.d.ts.map +1 -1
  130. package/dist/client/worker.js +3 -472
  131. package/dist/client/worker.js.map +1 -1
  132. package/dist/domain/queue/shard.d.ts +19 -2
  133. package/dist/domain/queue/shard.d.ts.map +1 -1
  134. package/dist/domain/queue/shard.js +36 -4
  135. package/dist/domain/queue/shard.js.map +1 -1
  136. package/dist/domain/types/command.d.ts +9 -0
  137. package/dist/domain/types/command.d.ts.map +1 -1
  138. package/dist/domain/types/job.d.ts +27 -0
  139. package/dist/domain/types/job.d.ts.map +1 -1
  140. package/dist/domain/types/job.js +34 -0
  141. package/dist/domain/types/job.js.map +1 -1
  142. package/dist/domain/types/response.d.ts +15 -1
  143. package/dist/domain/types/response.d.ts.map +1 -1
  144. package/dist/domain/types/response.js +16 -0
  145. package/dist/domain/types/response.js.map +1 -1
  146. package/dist/infrastructure/server/handlers/core.d.ts +1 -1
  147. package/dist/infrastructure/server/handlers/core.d.ts.map +1 -1
  148. package/dist/infrastructure/server/handlers/core.js +74 -15
  149. package/dist/infrastructure/server/handlers/core.js.map +1 -1
  150. package/dist/infrastructure/server/handlers/monitoring.d.ts.map +1 -1
  151. package/dist/infrastructure/server/handlers/monitoring.js +6 -4
  152. package/dist/infrastructure/server/handlers/monitoring.js.map +1 -1
  153. package/dist/infrastructure/server/http.d.ts +10 -3
  154. package/dist/infrastructure/server/http.d.ts.map +1 -1
  155. package/dist/infrastructure/server/http.js +244 -163
  156. package/dist/infrastructure/server/http.js.map +1 -1
  157. package/dist/infrastructure/server/tcp.d.ts +8 -3
  158. package/dist/infrastructure/server/tcp.d.ts.map +1 -1
  159. package/dist/infrastructure/server/tcp.js +77 -57
  160. package/dist/infrastructure/server/tcp.js.map +1 -1
  161. package/dist/infrastructure/server/types.d.ts +2 -0
  162. package/dist/infrastructure/server/types.d.ts.map +1 -1
  163. package/dist/main.js +24 -4
  164. package/dist/main.js.map +1 -1
  165. package/package.json +1 -1
@@ -2,7 +2,7 @@
2
2
  * Queue Manager
3
3
  * Core orchestrator for all queue operations
4
4
  */
5
- import { calculateBackoff } from '../domain/types/job';
5
+ import { calculateBackoff, createJobLock, isLockExpired, renewLock, DEFAULT_LOCK_TTL, } from '../domain/types/job';
6
6
  import { queueLog } from '../shared/logger';
7
7
  import { getStallAction, incrementStallCount } from '../domain/types/stall';
8
8
  import { Shard } from '../domain/queue/shard';
@@ -55,6 +55,15 @@ export class QueueManager {
55
55
  // Deferred dependency resolution queue (to avoid lock order violations)
56
56
  pendingDepChecks = new Set();
57
57
  depCheckInterval = null;
58
+ // Two-phase stall detection (like BullMQ)
59
+ // Jobs are added here on first check, confirmed stalled on second check
60
+ stalledCandidates = new Set();
61
+ // Lock-based job ownership tracking (BullMQ-style)
62
+ // Maps jobId to lock info (token, owner, expiration)
63
+ jobLocks = new Map();
64
+ // Client-job tracking for connection-based release
65
+ // When a TCP connection closes, all jobs owned by that client are released
66
+ clientJobs = new Map();
58
67
  // Cron scheduler
59
68
  cronScheduler;
60
69
  // Managers
@@ -76,6 +85,7 @@ export class QueueManager {
76
85
  timeoutInterval = null;
77
86
  stallCheckInterval = null;
78
87
  dlqMaintenanceInterval = null;
88
+ lockCheckInterval = null;
79
89
  // Queue names cache for O(1) listQueues instead of O(32 * queues)
80
90
  queueNamesCache = new Set();
81
91
  constructor(config = {}) {
@@ -224,29 +234,98 @@ export class QueueManager {
224
234
  async pull(queue, timeoutMs = 0) {
225
235
  return pullJob(queue, timeoutMs, this.getPullContext());
226
236
  }
237
+ /**
238
+ * Pull a job and create a lock for it (BullMQ-style).
239
+ * Returns both the job and its lock token for ownership verification.
240
+ */
241
+ async pullWithLock(queue, owner, timeoutMs = 0, lockTtl = DEFAULT_LOCK_TTL) {
242
+ const job = await pullJob(queue, timeoutMs, this.getPullContext());
243
+ if (!job)
244
+ return { job: null, token: null };
245
+ const token = this.createLock(job.id, owner, lockTtl);
246
+ return { job, token };
247
+ }
227
248
  /** Pull multiple jobs in single lock acquisition - O(1) instead of O(n) locks */
228
249
  async pullBatch(queue, count, timeoutMs = 0) {
229
250
  return pullJobBatch(queue, count, timeoutMs, this.getPullContext());
230
251
  }
231
- async ack(jobId, result) {
232
- return ackJob(jobId, result, this.getAckContext());
252
+ /**
253
+ * Pull multiple jobs and create locks for them (BullMQ-style).
254
+ * Returns both jobs and their lock tokens for ownership verification.
255
+ */
256
+ async pullBatchWithLock(queue, count, owner, timeoutMs = 0, lockTtl = DEFAULT_LOCK_TTL) {
257
+ const jobs = await pullJobBatch(queue, count, timeoutMs, this.getPullContext());
258
+ const tokens = [];
259
+ for (const job of jobs) {
260
+ const token = this.createLock(job.id, owner, lockTtl);
261
+ tokens.push(token ?? '');
262
+ }
263
+ return { jobs, tokens };
264
+ }
265
+ async ack(jobId, result, token) {
266
+ // If token provided, verify ownership before acknowledging
267
+ if (token && !this.verifyLock(jobId, token)) {
268
+ throw new Error(`Invalid or expired lock token for job ${jobId}`);
269
+ }
270
+ await ackJob(jobId, result, this.getAckContext());
271
+ // Release lock after successful ack
272
+ this.releaseLock(jobId, token);
233
273
  }
234
274
  /** Acknowledge multiple jobs in parallel with Promise.all */
235
- async ackBatch(jobIds) {
236
- return ackJobBatch(jobIds, this.getAckContext());
275
+ async ackBatch(jobIds, tokens) {
276
+ // Verify all tokens first if provided
277
+ if (tokens?.length === jobIds.length) {
278
+ for (let i = 0; i < jobIds.length; i++) {
279
+ const t = tokens[i];
280
+ if (t && !this.verifyLock(jobIds[i], t)) {
281
+ throw new Error(`Invalid or expired lock token for job ${jobIds[i]}`);
282
+ }
283
+ }
284
+ }
285
+ await ackJobBatch(jobIds, this.getAckContext());
286
+ // Release locks after successful ack
287
+ if (tokens) {
288
+ for (let i = 0; i < jobIds.length; i++) {
289
+ this.releaseLock(jobIds[i], tokens[i]);
290
+ }
291
+ }
237
292
  }
238
293
  /** Acknowledge multiple jobs with individual results - batch optimized */
239
294
  async ackBatchWithResults(items) {
240
- return ackJobBatchWithResults(items, this.getAckContext());
295
+ // Verify all tokens first if provided
296
+ for (const item of items) {
297
+ if (item.token && !this.verifyLock(item.id, item.token)) {
298
+ throw new Error(`Invalid or expired lock token for job ${item.id}`);
299
+ }
300
+ }
301
+ await ackJobBatchWithResults(items, this.getAckContext());
302
+ // Release locks after successful ack
303
+ for (const item of items) {
304
+ this.releaseLock(item.id, item.token);
305
+ }
241
306
  }
242
- async fail(jobId, error) {
243
- return failJob(jobId, error, this.getAckContext());
307
+ async fail(jobId, error, token) {
308
+ // If token provided, verify ownership before failing
309
+ if (token && !this.verifyLock(jobId, token)) {
310
+ throw new Error(`Invalid or expired lock token for job ${jobId}`);
311
+ }
312
+ await failJob(jobId, error, this.getAckContext());
313
+ // Release lock after fail
314
+ this.releaseLock(jobId, token);
244
315
  }
245
- /** Update job heartbeat for stall detection (single job) */
246
- jobHeartbeat(jobId) {
316
+ /**
317
+ * Update job heartbeat for stall detection (single job).
318
+ * If token is provided, also renews the lock.
319
+ */
320
+ jobHeartbeat(jobId, token) {
247
321
  const loc = this.jobIndex.get(jobId);
248
322
  if (loc?.type !== 'processing')
249
323
  return false;
324
+ // If token provided, renew lock (which also updates heartbeat)
325
+ if (token) {
326
+ return this.renewJobLock(jobId, token);
327
+ }
328
+ // Legacy mode: just update heartbeat without token verification
250
329
  const processing = this.processingShards[loc.shardIdx];
251
330
  const job = processing.get(jobId);
252
331
  if (job) {
@@ -255,15 +334,263 @@ export class QueueManager {
255
334
  }
256
335
  return false;
257
336
  }
258
- /** Update job heartbeat for multiple jobs (batch) */
259
- jobHeartbeatBatch(jobIds) {
337
+ /**
338
+ * Update job heartbeat for multiple jobs (batch).
339
+ * If tokens are provided, also renews the locks.
340
+ */
341
+ jobHeartbeatBatch(jobIds, tokens) {
260
342
  let count = 0;
261
- for (const id of jobIds) {
262
- if (this.jobHeartbeat(id))
343
+ for (let i = 0; i < jobIds.length; i++) {
344
+ const token = tokens?.[i];
345
+ if (this.jobHeartbeat(jobIds[i], token))
263
346
  count++;
264
347
  }
265
348
  return count;
266
349
  }
350
+ // ============ Lock Management (BullMQ-style) ============
351
+ /**
352
+ * Create a lock for a job when it's pulled for processing.
353
+ * @returns The lock token, or null if job not in processing
354
+ */
355
+ createLock(jobId, owner, ttl = DEFAULT_LOCK_TTL) {
356
+ const loc = this.jobIndex.get(jobId);
357
+ if (loc?.type !== 'processing')
358
+ return null;
359
+ // Check if lock already exists (shouldn't happen, but defensive)
360
+ if (this.jobLocks.has(jobId)) {
361
+ queueLog.warn('Lock already exists for job', { jobId: String(jobId), owner });
362
+ return null;
363
+ }
364
+ const lock = createJobLock(jobId, owner, ttl);
365
+ this.jobLocks.set(jobId, lock);
366
+ return lock.token;
367
+ }
368
+ /**
369
+ * Verify that a token is valid for a job.
370
+ * @returns true if token matches the active lock
371
+ */
372
+ verifyLock(jobId, token) {
373
+ const lock = this.jobLocks.get(jobId);
374
+ if (!lock)
375
+ return false;
376
+ if (lock.token !== token)
377
+ return false;
378
+ if (isLockExpired(lock))
379
+ return false;
380
+ return true;
381
+ }
382
+ /**
383
+ * Renew a lock with the given token.
384
+ * @returns true if renewal succeeded, false if token invalid or lock expired
385
+ */
386
+ renewJobLock(jobId, token, newTtl) {
387
+ const lock = this.jobLocks.get(jobId);
388
+ if (!lock)
389
+ return false;
390
+ if (lock.token !== token)
391
+ return false;
392
+ if (isLockExpired(lock)) {
393
+ // Lock already expired, remove it
394
+ this.jobLocks.delete(jobId);
395
+ return false;
396
+ }
397
+ renewLock(lock, newTtl);
398
+ // Also update lastHeartbeat on the job (for legacy stall detection compatibility)
399
+ const loc = this.jobIndex.get(jobId);
400
+ if (loc?.type === 'processing') {
401
+ const job = this.processingShards[loc.shardIdx].get(jobId);
402
+ if (job)
403
+ job.lastHeartbeat = Date.now();
404
+ }
405
+ return true;
406
+ }
407
+ /**
408
+ * Renew locks for multiple jobs (batch operation).
409
+ * @returns Array of jobIds that were successfully renewed
410
+ */
411
+ renewJobLockBatch(items) {
412
+ const renewed = [];
413
+ for (const item of items) {
414
+ if (this.renewJobLock(item.id, item.token, item.ttl)) {
415
+ renewed.push(String(item.id));
416
+ }
417
+ }
418
+ return renewed;
419
+ }
420
+ /**
421
+ * Release a lock when job is completed or failed.
422
+ * Should be called by ACK/FAIL operations.
423
+ */
424
+ releaseLock(jobId, token) {
425
+ const lock = this.jobLocks.get(jobId);
426
+ if (!lock)
427
+ return true; // No lock to release
428
+ // If token provided, verify it matches
429
+ if (token && lock.token !== token) {
430
+ queueLog.warn('Token mismatch on lock release', {
431
+ jobId: String(jobId),
432
+ expected: lock.token.substring(0, 8),
433
+ got: token.substring(0, 8),
434
+ });
435
+ return false;
436
+ }
437
+ this.jobLocks.delete(jobId);
438
+ return true;
439
+ }
440
+ /**
441
+ * Get lock info for a job (for debugging/monitoring).
442
+ */
443
+ getLockInfo(jobId) {
444
+ return this.jobLocks.get(jobId) ?? null;
445
+ }
446
+ // ============ Client-Job Tracking ============
447
+ /**
448
+ * Register a job as owned by a client (called on PULL).
449
+ */
450
+ registerClientJob(clientId, jobId) {
451
+ let jobs = this.clientJobs.get(clientId);
452
+ if (!jobs) {
453
+ jobs = new Set();
454
+ this.clientJobs.set(clientId, jobs);
455
+ }
456
+ jobs.add(jobId);
457
+ }
458
+ /**
459
+ * Unregister a job from a client (called on ACK/FAIL).
460
+ */
461
+ unregisterClientJob(clientId, jobId) {
462
+ if (!clientId)
463
+ return;
464
+ const jobs = this.clientJobs.get(clientId);
465
+ if (jobs) {
466
+ jobs.delete(jobId);
467
+ if (jobs.size === 0) {
468
+ this.clientJobs.delete(clientId);
469
+ }
470
+ }
471
+ }
472
+ /**
473
+ * Release all jobs owned by a client back to queue (called on TCP disconnect).
474
+ * Returns the number of jobs released.
475
+ */
476
+ releaseClientJobs(clientId) {
477
+ const jobs = this.clientJobs.get(clientId);
478
+ if (!jobs || jobs.size === 0) {
479
+ this.clientJobs.delete(clientId);
480
+ return 0;
481
+ }
482
+ let released = 0;
483
+ const now = Date.now();
484
+ for (const jobId of jobs) {
485
+ const loc = this.jobIndex.get(jobId);
486
+ if (loc?.type !== 'processing')
487
+ continue;
488
+ const procIdx = loc.shardIdx;
489
+ const job = this.processingShards[procIdx].get(jobId);
490
+ if (!job)
491
+ continue;
492
+ // Remove from processing
493
+ this.processingShards[procIdx].delete(jobId);
494
+ // Release lock if exists
495
+ this.jobLocks.delete(jobId);
496
+ // Release concurrency
497
+ const idx = shardIndex(job.queue);
498
+ const shard = this.shards[idx];
499
+ shard.releaseConcurrency(job.queue);
500
+ // Release group if active
501
+ if (job.groupId) {
502
+ shard.releaseGroup(job.queue, job.groupId);
503
+ }
504
+ // Reset job state for retry
505
+ job.startedAt = null;
506
+ job.lastHeartbeat = now;
507
+ // Re-queue the job
508
+ shard.getQueue(job.queue).push(job);
509
+ const isDelayed = job.runAt > now;
510
+ shard.incrementQueued(jobId, isDelayed, job.createdAt, job.queue, job.runAt);
511
+ this.jobIndex.set(jobId, { type: 'queue', shardIdx: idx, queueName: job.queue });
512
+ released++;
513
+ }
514
+ // Clear client tracking
515
+ this.clientJobs.delete(clientId);
516
+ if (released > 0) {
517
+ queueLog.info('Released client jobs', { clientId: clientId.substring(0, 8), released });
518
+ }
519
+ return released;
520
+ }
521
+ /**
522
+ * Check and handle expired locks.
523
+ * Jobs with expired locks are requeued for retry.
524
+ */
525
+ checkExpiredLocks() {
526
+ const now = Date.now();
527
+ const expired = [];
528
+ for (const [jobId, lock] of this.jobLocks) {
529
+ if (isLockExpired(lock, now)) {
530
+ expired.push({ jobId, lock });
531
+ }
532
+ }
533
+ for (const { jobId, lock } of expired) {
534
+ const procIdx = processingShardIndex(String(jobId));
535
+ const job = this.processingShards[procIdx].get(jobId);
536
+ if (job) {
537
+ const idx = shardIndex(job.queue);
538
+ const shard = this.shards[idx];
539
+ const queue = shard.getQueue(job.queue);
540
+ // Remove from processing
541
+ this.processingShards[procIdx].delete(jobId);
542
+ // Increment attempts and reset state
543
+ job.attempts++;
544
+ job.startedAt = null;
545
+ job.lastHeartbeat = now;
546
+ job.stallCount++;
547
+ // Check if max stalls exceeded
548
+ const stallConfig = shard.getStallConfig(job.queue);
549
+ if (stallConfig.maxStalls > 0 && job.stallCount >= stallConfig.maxStalls) {
550
+ // Move to DLQ using shard's addToDlq method
551
+ shard.addToDlq(job, "stalled" /* FailureReason.Stalled */, `Lock expired after ${lock.renewalCount} renewals`);
552
+ this.jobIndex.set(jobId, { type: 'dlq', queueName: job.queue });
553
+ queueLog.warn('Job moved to DLQ due to lock expiration', {
554
+ jobId: String(jobId),
555
+ queue: job.queue,
556
+ owner: lock.owner,
557
+ renewals: lock.renewalCount,
558
+ stallCount: job.stallCount,
559
+ });
560
+ this.eventsManager.broadcast({
561
+ eventType: "failed" /* EventType.Failed */,
562
+ jobId,
563
+ queue: job.queue,
564
+ timestamp: now,
565
+ error: 'Lock expired (max stalls reached)',
566
+ });
567
+ }
568
+ else {
569
+ // Requeue for retry (always push - priority queue handles ordering)
570
+ queue.push(job);
571
+ this.jobIndex.set(jobId, { type: 'queue', shardIdx: idx, queueName: job.queue });
572
+ queueLog.info('Job requeued due to lock expiration', {
573
+ jobId: String(jobId),
574
+ queue: job.queue,
575
+ owner: lock.owner,
576
+ renewals: lock.renewalCount,
577
+ attempt: job.attempts,
578
+ });
579
+ this.eventsManager.broadcast({
580
+ eventType: "stalled" /* EventType.Stalled */,
581
+ jobId,
582
+ queue: job.queue,
583
+ timestamp: now,
584
+ });
585
+ }
586
+ }
587
+ // Remove the expired lock
588
+ this.jobLocks.delete(jobId);
589
+ }
590
+ if (expired.length > 0) {
591
+ queueLog.info('Processed expired locks', { count: expired.length });
592
+ }
593
+ }
267
594
  // ============ Query Operations (delegated) ============
268
595
  async getJob(jobId) {
269
596
  return queryOps.getJob(jobId, this.getQueryContext());
@@ -528,6 +855,10 @@ export class QueueManager {
528
855
  this.dlqMaintenanceInterval = setInterval(() => {
529
856
  this.performDlqMaintenance();
530
857
  }, this.config.dlqMaintenanceMs);
858
+ // Lock expiration check runs at same interval as stall check
859
+ this.lockCheckInterval = setInterval(() => {
860
+ this.checkExpiredLocks();
861
+ }, this.config.stallCheckMs);
531
862
  this.cronScheduler.start();
532
863
  }
533
864
  checkJobTimeouts() {
@@ -547,31 +878,61 @@ export class QueueManager {
547
878
  }
548
879
  /**
549
880
  * Check for stalled jobs and handle them
550
- * Stalled = active job with no heartbeat for too long
881
+ * Uses two-phase detection (like BullMQ) to prevent false positives:
882
+ * - Phase 1: Jobs marked as candidates in previous check are confirmed stalled
883
+ * - Phase 2: Current processing jobs are marked as candidates for next check
551
884
  */
552
885
  checkStalledJobs() {
553
886
  const now = Date.now();
887
+ const confirmedStalled = [];
888
+ // Phase 1: Check jobs that were candidates from previous cycle
889
+ // If still in processing and still meets stall criteria → confirmed stalled
890
+ for (const jobId of this.stalledCandidates) {
891
+ // Find job in processing shards
892
+ const procIdx = processingShardIndex(String(jobId));
893
+ const job = this.processingShards[procIdx].get(jobId);
894
+ if (!job) {
895
+ // Job completed between checks - not stalled (false positive avoided!)
896
+ this.stalledCandidates.delete(jobId);
897
+ continue;
898
+ }
899
+ const stallConfig = this.shards[shardIndex(job.queue)].getStallConfig(job.queue);
900
+ if (!stallConfig.enabled) {
901
+ this.stalledCandidates.delete(jobId);
902
+ continue;
903
+ }
904
+ // Re-check stall criteria (job might have received heartbeat)
905
+ const action = getStallAction(job, stallConfig, now);
906
+ if (action !== "keep" /* StallAction.Keep */) {
907
+ // Confirmed stalled - was candidate AND still meets criteria
908
+ confirmedStalled.push({ job, action });
909
+ }
910
+ // Remove from candidates (will be re-added in phase 2 if still processing)
911
+ this.stalledCandidates.delete(jobId);
912
+ }
913
+ // Phase 2: Mark current processing jobs as candidates for NEXT check
554
914
  for (let i = 0; i < SHARD_COUNT; i++) {
555
915
  const procShard = this.processingShards[i];
556
- const stalledJobs = [];
557
- for (const [_jobId, job] of procShard) {
916
+ for (const [jobId, job] of procShard) {
558
917
  const stallConfig = this.shards[shardIndex(job.queue)].getStallConfig(job.queue);
559
918
  if (!stallConfig.enabled)
560
919
  continue;
920
+ // Only mark as candidate if past grace period and no recent heartbeat
561
921
  const action = getStallAction(job, stallConfig, now);
562
922
  if (action !== "keep" /* StallAction.Keep */) {
563
- stalledJobs.push({ job, action });
923
+ // Add to candidates - will be checked in NEXT cycle
924
+ this.stalledCandidates.add(jobId);
564
925
  }
565
926
  }
566
- // Process stalled jobs
567
- for (const { job, action } of stalledJobs) {
568
- this.handleStalledJob(job, action).catch((err) => {
569
- queueLog.error('Failed to handle stalled job', {
570
- jobId: String(job.id),
571
- error: String(err),
572
- });
927
+ }
928
+ // Process confirmed stalled jobs
929
+ for (const { job, action } of confirmedStalled) {
930
+ this.handleStalledJob(job, action).catch((err) => {
931
+ queueLog.error('Failed to handle stalled job', {
932
+ jobId: String(job.id),
933
+ error: String(err),
573
934
  });
574
- }
935
+ });
575
936
  }
576
937
  }
577
938
  /**
@@ -786,6 +1147,77 @@ export class QueueManager {
786
1147
  }
787
1148
  }
788
1149
  }
1150
+ // Clean stale stalledCandidates (jobs no longer in processing)
1151
+ for (const jobId of this.stalledCandidates) {
1152
+ const loc = this.jobIndex.get(jobId);
1153
+ if (loc?.type !== 'processing') {
1154
+ this.stalledCandidates.delete(jobId);
1155
+ }
1156
+ }
1157
+ // Clean orphaned jobIndex entries (pointing to invalid locations)
1158
+ // This is expensive so only run if index is large
1159
+ if (this.jobIndex.size > 100_000) {
1160
+ let orphanedCount = 0;
1161
+ for (const [jobId, loc] of this.jobIndex) {
1162
+ if (loc.type === 'processing') {
1163
+ const procIdx = processingShardIndex(String(jobId));
1164
+ if (!this.processingShards[procIdx].has(jobId)) {
1165
+ this.jobIndex.delete(jobId);
1166
+ orphanedCount++;
1167
+ }
1168
+ }
1169
+ else if (loc.type === 'queue') {
1170
+ // Check if job still exists in shard
1171
+ const shard = this.shards[loc.shardIdx];
1172
+ if (!shard.getQueue(loc.queueName).has(jobId)) {
1173
+ this.jobIndex.delete(jobId);
1174
+ orphanedCount++;
1175
+ }
1176
+ }
1177
+ }
1178
+ if (orphanedCount > 0) {
1179
+ queueLog.info('Cleaned orphaned jobIndex entries', { count: orphanedCount });
1180
+ }
1181
+ }
1182
+ // Clean orphaned job locks (locks for jobs no longer in processing)
1183
+ for (const jobId of this.jobLocks.keys()) {
1184
+ const loc = this.jobIndex.get(jobId);
1185
+ if (loc?.type !== 'processing') {
1186
+ this.jobLocks.delete(jobId);
1187
+ }
1188
+ }
1189
+ // Remove empty queues to free memory (like obliterate but only for empty queues)
1190
+ for (let i = 0; i < SHARD_COUNT; i++) {
1191
+ const shard = this.shards[i];
1192
+ const emptyQueues = [];
1193
+ for (const [queueName, queue] of shard.queues) {
1194
+ // Queue is empty and has no DLQ entries
1195
+ const dlqEntries = shard.dlq.get(queueName);
1196
+ if (queue.size === 0 && (!dlqEntries || dlqEntries.length === 0)) {
1197
+ emptyQueues.push(queueName);
1198
+ }
1199
+ }
1200
+ for (const queueName of emptyQueues) {
1201
+ shard.queues.delete(queueName);
1202
+ shard.dlq.delete(queueName);
1203
+ shard.uniqueKeys.delete(queueName);
1204
+ shard.queueState.delete(queueName);
1205
+ shard.activeGroups.delete(queueName);
1206
+ shard.rateLimiters.delete(queueName);
1207
+ shard.concurrencyLimiters.delete(queueName);
1208
+ shard.stallConfig.delete(queueName);
1209
+ shard.dlqConfig.delete(queueName);
1210
+ this.unregisterQueueName(queueName);
1211
+ }
1212
+ if (emptyQueues.length > 0) {
1213
+ queueLog.info('Removed empty queues', { shard: i, count: emptyQueues.length });
1214
+ }
1215
+ // Clean orphaned temporal index entries (memory leak fix)
1216
+ const cleanedTemporal = shard.cleanOrphanedTemporalEntries();
1217
+ if (cleanedTemporal > 0) {
1218
+ queueLog.info('Cleaned orphaned temporal entries', { shard: i, count: cleanedTemporal });
1219
+ }
1220
+ }
789
1221
  }
790
1222
  // ============ Lifecycle ============
791
1223
  shutdown() {
@@ -802,6 +1234,8 @@ export class QueueManager {
802
1234
  clearInterval(this.stallCheckInterval);
803
1235
  if (this.dlqMaintenanceInterval)
804
1236
  clearInterval(this.dlqMaintenanceInterval);
1237
+ if (this.lockCheckInterval)
1238
+ clearInterval(this.lockCheckInterval);
805
1239
  this.storage?.close();
806
1240
  // Clear in-memory collections
807
1241
  this.jobIndex.clear();
@@ -811,6 +1245,9 @@ export class QueueManager {
811
1245
  this.customIdMap.clear();
812
1246
  this.pendingDepChecks.clear();
813
1247
  this.queueNamesCache.clear();
1248
+ this.jobLocks.clear();
1249
+ this.stalledCandidates.clear();
1250
+ this.clientJobs.clear();
814
1251
  for (const shard of this.processingShards) {
815
1252
  shard.clear();
816
1253
  }
@@ -851,5 +1288,77 @@ export class QueueManager {
851
1288
  cronPending: cronStats.pending,
852
1289
  };
853
1290
  }
1291
+ /**
1292
+ * Get detailed memory statistics for debugging memory issues.
1293
+ * Returns counts of entries in all major collections.
1294
+ */
1295
+ getMemoryStats() {
1296
+ let processingTotal = 0;
1297
+ let queuedTotal = 0;
1298
+ let waitingDepsTotal = 0;
1299
+ let temporalIndexTotal = 0;
1300
+ let delayedHeapTotal = 0;
1301
+ for (let i = 0; i < SHARD_COUNT; i++) {
1302
+ processingTotal += this.processingShards[i].size;
1303
+ const shardStats = this.shards[i].getStats();
1304
+ queuedTotal += shardStats.queuedJobs;
1305
+ waitingDepsTotal += this.shards[i].waitingDeps.size;
1306
+ // Get internal structure sizes
1307
+ const internalSizes = this.shards[i].getInternalSizes();
1308
+ temporalIndexTotal += internalSizes.temporalIndex;
1309
+ delayedHeapTotal += internalSizes.delayedHeap;
1310
+ }
1311
+ // Count total jobs across all clients
1312
+ let clientJobsTotal = 0;
1313
+ for (const jobs of this.clientJobs.values()) {
1314
+ clientJobsTotal += jobs.size;
1315
+ }
1316
+ return {
1317
+ jobIndex: this.jobIndex.size,
1318
+ completedJobs: this.completedJobs.size,
1319
+ jobResults: this.jobResults.size,
1320
+ jobLogs: this.jobLogs.size,
1321
+ customIdMap: this.customIdMap.size,
1322
+ jobLocks: this.jobLocks.size,
1323
+ clientJobs: this.clientJobs.size,
1324
+ clientJobsTotal,
1325
+ pendingDepChecks: this.pendingDepChecks.size,
1326
+ stalledCandidates: this.stalledCandidates.size,
1327
+ processingTotal,
1328
+ queuedTotal,
1329
+ waitingDepsTotal,
1330
+ temporalIndexTotal,
1331
+ delayedHeapTotal,
1332
+ };
1333
+ }
1334
+ /**
1335
+ * Force compact all collections to reduce memory usage.
1336
+ * Use after large batch operations or when memory pressure is high.
1337
+ */
1338
+ compactMemory() {
1339
+ // Compact priority queues that have high stale ratios
1340
+ for (let i = 0; i < SHARD_COUNT; i++) {
1341
+ for (const q of this.shards[i].queues.values()) {
1342
+ if (q.needsCompaction(0.1)) {
1343
+ // More aggressive: 10% stale threshold
1344
+ q.compact();
1345
+ }
1346
+ }
1347
+ }
1348
+ // Clean up empty client tracking entries
1349
+ for (const [clientId, jobs] of this.clientJobs) {
1350
+ if (jobs.size === 0) {
1351
+ this.clientJobs.delete(clientId);
1352
+ }
1353
+ }
1354
+ // Clean orphaned job locks (jobs no longer in processing)
1355
+ for (const jobId of this.jobLocks.keys()) {
1356
+ const loc = this.jobIndex.get(jobId);
1357
+ if (loc?.type !== 'processing') {
1358
+ this.jobLocks.delete(jobId);
1359
+ }
1360
+ }
1361
+ queueLog.info('Memory compacted');
1362
+ }
854
1363
  }
855
1364
  //# sourceMappingURL=queueManager.js.map