bunqueue 1.9.5 → 1.9.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. package/dist/application/backgroundTasks.d.ts +32 -0
  2. package/dist/application/backgroundTasks.d.ts.map +1 -0
  3. package/dist/application/backgroundTasks.js +318 -0
  4. package/dist/application/backgroundTasks.js.map +1 -0
  5. package/dist/application/cleanupTasks.d.ts +11 -0
  6. package/dist/application/cleanupTasks.d.ts.map +1 -0
  7. package/dist/application/cleanupTasks.js +181 -0
  8. package/dist/application/cleanupTasks.js.map +1 -0
  9. package/dist/application/lockManager.d.ts +62 -0
  10. package/dist/application/lockManager.d.ts.map +1 -0
  11. package/dist/application/lockManager.js +307 -0
  12. package/dist/application/lockManager.js.map +1 -0
  13. package/dist/application/operations/push.js +2 -2
  14. package/dist/application/operations/push.js.map +1 -1
  15. package/dist/application/queueManager.d.ts +14 -176
  16. package/dist/application/queueManager.d.ts.map +1 -1
  17. package/dist/application/queueManager.js +130 -953
  18. package/dist/application/queueManager.js.map +1 -1
  19. package/dist/application/statsManager.d.ts +56 -0
  20. package/dist/application/statsManager.d.ts.map +1 -0
  21. package/dist/application/statsManager.js +111 -0
  22. package/dist/application/statsManager.js.map +1 -0
  23. package/dist/application/types.d.ts +123 -0
  24. package/dist/application/types.d.ts.map +1 -0
  25. package/dist/application/types.js +16 -0
  26. package/dist/application/types.js.map +1 -0
  27. package/dist/client/queue/queue.d.ts.map +1 -1
  28. package/dist/client/queue/queue.js +2 -0
  29. package/dist/client/queue/queue.js.map +1 -1
  30. package/dist/client/types.d.ts +6 -0
  31. package/dist/client/types.d.ts.map +1 -1
  32. package/dist/client/types.js.map +1 -1
  33. package/dist/domain/types/command.d.ts +2 -0
  34. package/dist/domain/types/command.d.ts.map +1 -1
  35. package/dist/domain/types/job.d.ts +6 -0
  36. package/dist/domain/types/job.d.ts.map +1 -1
  37. package/dist/domain/types/job.js.map +1 -1
  38. package/dist/infrastructure/persistence/sqlite.d.ts +6 -2
  39. package/dist/infrastructure/persistence/sqlite.d.ts.map +1 -1
  40. package/dist/infrastructure/persistence/sqlite.js +12 -3
  41. package/dist/infrastructure/persistence/sqlite.js.map +1 -1
  42. package/dist/infrastructure/server/handlers/core.d.ts.map +1 -1
  43. package/dist/infrastructure/server/handlers/core.js +1 -0
  44. package/dist/infrastructure/server/handlers/core.js.map +1 -1
  45. package/dist/infrastructure/server/tcp.d.ts.map +1 -1
  46. package/dist/infrastructure/server/tcp.js +14 -8
  47. package/dist/infrastructure/server/tcp.js.map +1 -1
  48. package/package.json +2 -1
@@ -2,17 +2,15 @@
2
2
  * Queue Manager
3
3
  * Core orchestrator for all queue operations
4
4
  */
5
- import { calculateBackoff, createJobLock, isLockExpired, renewLock, DEFAULT_LOCK_TTL, } from '../domain/types/job';
6
- import { queueLog } from '../shared/logger';
7
- import { getStallAction, incrementStallCount } from '../domain/types/stall';
5
+ import { DEFAULT_LOCK_TTL } from '../domain/types/job';
8
6
  import { Shard } from '../domain/queue/shard';
9
7
  import { SqliteStorage } from '../infrastructure/persistence/sqlite';
10
8
  import { CronScheduler } from '../infrastructure/scheduler/cronScheduler';
11
9
  import { WebhookManager } from './webhookManager';
12
10
  import { WorkerManager } from './workerManager';
13
11
  import { EventsManager } from './eventsManager';
14
- import { RWLock, withWriteLock } from '../shared/lock';
15
- import { shardIndex, processingShardIndex, SHARD_COUNT } from '../shared/hash';
12
+ import { RWLock } from '../shared/lock';
13
+ import { shardIndex, SHARD_COUNT } from '../shared/hash';
16
14
  import { pushJob, pushJobBatch } from './operations/push';
17
15
  import { pullJob, pullJobBatch } from './operations/pull';
18
16
  import { ackJob, ackJobBatch, ackJobBatchWithResults, failJob, } from './operations/ack';
@@ -23,18 +21,10 @@ import * as dlqOps from './dlqManager';
23
21
  import * as logsOps from './jobLogsManager';
24
22
  import { generatePrometheusMetrics } from './metricsExporter';
25
23
  import { LRUMap, BoundedSet, BoundedMap } from '../shared/lru';
26
- const DEFAULT_CONFIG = {
27
- maxCompletedJobs: 50_000,
28
- maxJobResults: 5_000,
29
- maxJobLogs: 10_000,
30
- maxCustomIds: 50_000,
31
- maxWaitingDeps: 10_000,
32
- cleanupIntervalMs: 10_000,
33
- jobTimeoutCheckMs: 5_000,
34
- dependencyCheckMs: 1_000,
35
- stallCheckMs: 5_000,
36
- dlqMaintenanceMs: 60_000,
37
- };
24
+ import { DEFAULT_CONFIG } from './types';
25
+ import * as lockMgr from './lockManager';
26
+ import * as bgTasks from './backgroundTasks';
27
+ import * as statsMgr from './statsManager';
38
28
  /**
39
29
  * QueueManager - Central coordinator
40
30
  */
@@ -52,17 +42,12 @@ export class QueueManager {
52
42
  jobResults;
53
43
  customIdMap;
54
44
  jobLogs;
55
- // Deferred dependency resolution queue (to avoid lock order violations)
45
+ // Deferred dependency resolution queue
56
46
  pendingDepChecks = new Set();
57
- depCheckInterval = null;
58
- // Two-phase stall detection (like BullMQ)
59
- // Jobs are added here on first check, confirmed stalled on second check
47
+ // Two-phase stall detection
60
48
  stalledCandidates = new Set();
61
49
  // Lock-based job ownership tracking (BullMQ-style)
62
- // Maps jobId to lock info (token, owner, expiration)
63
50
  jobLocks = new Map();
64
- // Client-job tracking for connection-based release
65
- // When a TCP connection closes, all jobs owned by that client are released
66
51
  clientJobs = new Map();
67
52
  // Cron scheduler
68
53
  cronScheduler;
@@ -80,18 +65,14 @@ export class QueueManager {
80
65
  totalFailed: { value: 0n },
81
66
  };
82
67
  startTime = Date.now();
83
- // Background intervals
84
- cleanupInterval = null;
85
- timeoutInterval = null;
86
- stallCheckInterval = null;
87
- dlqMaintenanceInterval = null;
88
- lockCheckInterval = null;
89
- // Queue names cache for O(1) listQueues instead of O(32 * queues)
68
+ // Background task handles
69
+ backgroundTaskHandles = null;
70
+ // Queue names cache for O(1) listQueues
90
71
  queueNamesCache = new Set();
91
72
  constructor(config = {}) {
92
73
  this.config = { ...DEFAULT_CONFIG, ...config };
93
74
  this.storage = config.dataPath ? new SqliteStorage({ path: config.dataPath }) : null;
94
- // Initialize bounded collections - BoundedSet is faster for completedJobs (no recency tracking needed)
75
+ // Initialize bounded collections
95
76
  this.completedJobs = new BoundedSet(this.config.maxCompletedJobs, (jobId) => {
96
77
  this.jobIndex.delete(jobId);
97
78
  });
@@ -115,10 +96,70 @@ export class QueueManager {
115
96
  this.workerManager = new WorkerManager();
116
97
  this.eventsManager = new EventsManager(this.webhookManager);
117
98
  // Load and start
118
- this.recover();
119
- this.startBackgroundTasks();
99
+ bgTasks.recover(this.getBackgroundContext());
100
+ // Load cron jobs from storage
101
+ if (this.storage) {
102
+ this.cronScheduler.load(this.storage.loadCronJobs());
103
+ }
104
+ this.backgroundTaskHandles = bgTasks.startBackgroundTasks(this.getBackgroundContext(), this.cronScheduler);
120
105
  }
121
106
  // ============ Context Builders ============
107
+ getLockContext() {
108
+ return {
109
+ jobIndex: this.jobIndex,
110
+ jobLocks: this.jobLocks,
111
+ clientJobs: this.clientJobs,
112
+ processingShards: this.processingShards,
113
+ processingLocks: this.processingLocks,
114
+ shards: this.shards,
115
+ shardLocks: this.shardLocks,
116
+ eventsManager: this.eventsManager,
117
+ };
118
+ }
119
+ getBackgroundContext() {
120
+ return {
121
+ config: this.config,
122
+ storage: this.storage,
123
+ shards: this.shards,
124
+ shardLocks: this.shardLocks,
125
+ processingShards: this.processingShards,
126
+ processingLocks: this.processingLocks,
127
+ jobIndex: this.jobIndex,
128
+ completedJobs: this.completedJobs,
129
+ jobResults: this.jobResults,
130
+ customIdMap: this.customIdMap,
131
+ jobLogs: this.jobLogs,
132
+ jobLocks: this.jobLocks,
133
+ clientJobs: this.clientJobs,
134
+ stalledCandidates: this.stalledCandidates,
135
+ pendingDepChecks: this.pendingDepChecks,
136
+ queueNamesCache: this.queueNamesCache,
137
+ eventsManager: this.eventsManager,
138
+ webhookManager: this.webhookManager,
139
+ metrics: this.metrics,
140
+ startTime: this.startTime,
141
+ fail: this.fail.bind(this),
142
+ registerQueueName: this.registerQueueName.bind(this),
143
+ unregisterQueueName: this.unregisterQueueName.bind(this),
144
+ };
145
+ }
146
+ getStatsContext() {
147
+ return {
148
+ shards: this.shards,
149
+ processingShards: this.processingShards,
150
+ completedJobs: this.completedJobs,
151
+ jobIndex: this.jobIndex,
152
+ jobResults: this.jobResults,
153
+ jobLogs: this.jobLogs,
154
+ customIdMap: this.customIdMap,
155
+ jobLocks: this.jobLocks,
156
+ clientJobs: this.clientJobs,
157
+ pendingDepChecks: this.pendingDepChecks,
158
+ stalledCandidates: this.stalledCandidates,
159
+ metrics: this.metrics,
160
+ startTime: this.startTime,
161
+ };
162
+ }
122
163
  getPushContext() {
123
164
  return {
124
165
  storage: this.storage,
@@ -163,7 +204,6 @@ export class QueueManager {
163
204
  onRepeat: this.handleRepeat.bind(this),
164
205
  };
165
206
  }
166
- /** Handle repeatable job - re-queue with incremented count */
167
207
  handleRepeat(job) {
168
208
  if (!job.repeat)
169
209
  return;
@@ -222,110 +262,83 @@ export class QueueManager {
222
262
  }
223
263
  // ============ Core Operations ============
224
264
  async push(queue, input) {
225
- // Register queue name in cache for O(1) listQueues
226
265
  this.registerQueueName(queue);
227
266
  return pushJob(queue, input, this.getPushContext());
228
267
  }
229
268
  async pushBatch(queue, inputs) {
230
- // Register queue name in cache for O(1) listQueues
231
269
  this.registerQueueName(queue);
232
270
  return pushJobBatch(queue, inputs, this.getPushContext());
233
271
  }
234
272
  async pull(queue, timeoutMs = 0) {
235
273
  return pullJob(queue, timeoutMs, this.getPullContext());
236
274
  }
237
- /**
238
- * Pull a job and create a lock for it (BullMQ-style).
239
- * Returns both the job and its lock token for ownership verification.
240
- */
241
275
  async pullWithLock(queue, owner, timeoutMs = 0, lockTtl = DEFAULT_LOCK_TTL) {
242
276
  const job = await pullJob(queue, timeoutMs, this.getPullContext());
243
277
  if (!job)
244
278
  return { job: null, token: null };
245
- const token = this.createLock(job.id, owner, lockTtl);
279
+ const token = lockMgr.createLock(job.id, owner, this.getLockContext(), lockTtl);
246
280
  return { job, token };
247
281
  }
248
- /** Pull multiple jobs in single lock acquisition - O(1) instead of O(n) locks */
249
282
  async pullBatch(queue, count, timeoutMs = 0) {
250
283
  return pullJobBatch(queue, count, timeoutMs, this.getPullContext());
251
284
  }
252
- /**
253
- * Pull multiple jobs and create locks for them (BullMQ-style).
254
- * Returns both jobs and their lock tokens for ownership verification.
255
- */
256
285
  async pullBatchWithLock(queue, count, owner, timeoutMs = 0, lockTtl = DEFAULT_LOCK_TTL) {
257
286
  const jobs = await pullJobBatch(queue, count, timeoutMs, this.getPullContext());
258
287
  const tokens = [];
259
288
  for (const job of jobs) {
260
- const token = this.createLock(job.id, owner, lockTtl);
289
+ const token = lockMgr.createLock(job.id, owner, this.getLockContext(), lockTtl);
261
290
  tokens.push(token ?? '');
262
291
  }
263
292
  return { jobs, tokens };
264
293
  }
265
294
  async ack(jobId, result, token) {
266
- // If token provided, verify ownership before acknowledging
267
- if (token && !this.verifyLock(jobId, token)) {
295
+ if (token && !lockMgr.verifyLock(jobId, token, this.getLockContext())) {
268
296
  throw new Error(`Invalid or expired lock token for job ${jobId}`);
269
297
  }
270
298
  await ackJob(jobId, result, this.getAckContext());
271
- // Release lock after successful ack
272
- this.releaseLock(jobId, token);
299
+ lockMgr.releaseLock(jobId, this.getLockContext(), token);
273
300
  }
274
- /** Acknowledge multiple jobs in parallel with Promise.all */
275
301
  async ackBatch(jobIds, tokens) {
276
- // Verify all tokens first if provided
277
302
  if (tokens?.length === jobIds.length) {
278
303
  for (let i = 0; i < jobIds.length; i++) {
279
304
  const t = tokens[i];
280
- if (t && !this.verifyLock(jobIds[i], t)) {
305
+ if (t && !lockMgr.verifyLock(jobIds[i], t, this.getLockContext())) {
281
306
  throw new Error(`Invalid or expired lock token for job ${jobIds[i]}`);
282
307
  }
283
308
  }
284
309
  }
285
310
  await ackJobBatch(jobIds, this.getAckContext());
286
- // Release locks after successful ack
287
311
  if (tokens) {
288
312
  for (let i = 0; i < jobIds.length; i++) {
289
- this.releaseLock(jobIds[i], tokens[i]);
313
+ lockMgr.releaseLock(jobIds[i], this.getLockContext(), tokens[i]);
290
314
  }
291
315
  }
292
316
  }
293
- /** Acknowledge multiple jobs with individual results - batch optimized */
294
317
  async ackBatchWithResults(items) {
295
- // Verify all tokens first if provided
296
318
  for (const item of items) {
297
- if (item.token && !this.verifyLock(item.id, item.token)) {
319
+ if (item.token && !lockMgr.verifyLock(item.id, item.token, this.getLockContext())) {
298
320
  throw new Error(`Invalid or expired lock token for job ${item.id}`);
299
321
  }
300
322
  }
301
323
  await ackJobBatchWithResults(items, this.getAckContext());
302
- // Release locks after successful ack
303
324
  for (const item of items) {
304
- this.releaseLock(item.id, item.token);
325
+ lockMgr.releaseLock(item.id, this.getLockContext(), item.token);
305
326
  }
306
327
  }
307
328
  async fail(jobId, error, token) {
308
- // If token provided, verify ownership before failing
309
- if (token && !this.verifyLock(jobId, token)) {
329
+ if (token && !lockMgr.verifyLock(jobId, token, this.getLockContext())) {
310
330
  throw new Error(`Invalid or expired lock token for job ${jobId}`);
311
331
  }
312
332
  await failJob(jobId, error, this.getAckContext());
313
- // Release lock after fail
314
- this.releaseLock(jobId, token);
333
+ lockMgr.releaseLock(jobId, this.getLockContext(), token);
315
334
  }
316
- /**
317
- * Update job heartbeat for stall detection (single job).
318
- * If token is provided, also renews the lock.
319
- */
320
335
  jobHeartbeat(jobId, token) {
321
336
  const loc = this.jobIndex.get(jobId);
322
337
  if (loc?.type !== 'processing')
323
338
  return false;
324
- // If token provided, renew lock (which also updates heartbeat)
325
339
  if (token) {
326
- return this.renewJobLock(jobId, token);
340
+ return lockMgr.renewJobLock(jobId, token, this.getLockContext());
327
341
  }
328
- // Legacy mode: just update heartbeat without token verification
329
342
  const processing = this.processingShards[loc.shardIdx];
330
343
  const job = processing.get(jobId);
331
344
  if (job) {
@@ -334,262 +347,42 @@ export class QueueManager {
334
347
  }
335
348
  return false;
336
349
  }
337
- /**
338
- * Update job heartbeat for multiple jobs (batch).
339
- * If tokens are provided, also renews the locks.
340
- */
341
350
  jobHeartbeatBatch(jobIds, tokens) {
342
351
  let count = 0;
343
352
  for (let i = 0; i < jobIds.length; i++) {
344
- const token = tokens?.[i];
345
- if (this.jobHeartbeat(jobIds[i], token))
353
+ if (this.jobHeartbeat(jobIds[i], tokens?.[i]))
346
354
  count++;
347
355
  }
348
356
  return count;
349
357
  }
350
- // ============ Lock Management (BullMQ-style) ============
351
- /**
352
- * Create a lock for a job when it's pulled for processing.
353
- * @returns The lock token, or null if job not in processing
354
- */
358
+ // ============ Lock Management (delegated) ============
355
359
  createLock(jobId, owner, ttl = DEFAULT_LOCK_TTL) {
356
- const loc = this.jobIndex.get(jobId);
357
- if (loc?.type !== 'processing')
358
- return null;
359
- // Check if lock already exists (shouldn't happen, but defensive)
360
- if (this.jobLocks.has(jobId)) {
361
- queueLog.warn('Lock already exists for job', { jobId: String(jobId), owner });
362
- return null;
363
- }
364
- const lock = createJobLock(jobId, owner, ttl);
365
- this.jobLocks.set(jobId, lock);
366
- return lock.token;
367
- }
368
- /**
369
- * Verify that a token is valid for a job.
370
- * @returns true if token matches the active lock
371
- */
360
+ return lockMgr.createLock(jobId, owner, this.getLockContext(), ttl);
361
+ }
372
362
  verifyLock(jobId, token) {
373
- const lock = this.jobLocks.get(jobId);
374
- if (!lock)
375
- return false;
376
- if (lock.token !== token)
377
- return false;
378
- if (isLockExpired(lock))
379
- return false;
380
- return true;
363
+ return lockMgr.verifyLock(jobId, token, this.getLockContext());
381
364
  }
382
- /**
383
- * Renew a lock with the given token.
384
- * @returns true if renewal succeeded, false if token invalid or lock expired
385
- */
386
365
  renewJobLock(jobId, token, newTtl) {
387
- const lock = this.jobLocks.get(jobId);
388
- if (!lock)
389
- return false;
390
- if (lock.token !== token)
391
- return false;
392
- if (isLockExpired(lock)) {
393
- // Lock already expired, remove it
394
- this.jobLocks.delete(jobId);
395
- return false;
396
- }
397
- renewLock(lock, newTtl);
398
- // Also update lastHeartbeat on the job (for legacy stall detection compatibility)
399
- const loc = this.jobIndex.get(jobId);
400
- if (loc?.type === 'processing') {
401
- const job = this.processingShards[loc.shardIdx].get(jobId);
402
- if (job)
403
- job.lastHeartbeat = Date.now();
404
- }
405
- return true;
366
+ return lockMgr.renewJobLock(jobId, token, this.getLockContext(), newTtl);
406
367
  }
407
- /**
408
- * Renew locks for multiple jobs (batch operation).
409
- * @returns Array of jobIds that were successfully renewed
410
- */
411
368
  renewJobLockBatch(items) {
412
- const renewed = [];
413
- for (const item of items) {
414
- if (this.renewJobLock(item.id, item.token, item.ttl)) {
415
- renewed.push(String(item.id));
416
- }
417
- }
418
- return renewed;
369
+ return lockMgr.renewJobLockBatch(items, this.getLockContext());
419
370
  }
420
- /**
421
- * Release a lock when job is completed or failed.
422
- * Should be called by ACK/FAIL operations.
423
- */
424
371
  releaseLock(jobId, token) {
425
- const lock = this.jobLocks.get(jobId);
426
- if (!lock)
427
- return true; // No lock to release
428
- // If token provided, verify it matches
429
- if (token && lock.token !== token) {
430
- queueLog.warn('Token mismatch on lock release', {
431
- jobId: String(jobId),
432
- expected: lock.token.substring(0, 8),
433
- got: token.substring(0, 8),
434
- });
435
- return false;
436
- }
437
- this.jobLocks.delete(jobId);
438
- return true;
372
+ return lockMgr.releaseLock(jobId, this.getLockContext(), token);
439
373
  }
440
- /**
441
- * Get lock info for a job (for debugging/monitoring).
442
- */
443
374
  getLockInfo(jobId) {
444
- return this.jobLocks.get(jobId) ?? null;
375
+ return lockMgr.getLockInfo(jobId, this.getLockContext());
445
376
  }
446
- // ============ Client-Job Tracking ============
447
- /**
448
- * Register a job as owned by a client (called on PULL).
449
- */
377
+ // ============ Client-Job Tracking (delegated) ============
450
378
  registerClientJob(clientId, jobId) {
451
- let jobs = this.clientJobs.get(clientId);
452
- if (!jobs) {
453
- jobs = new Set();
454
- this.clientJobs.set(clientId, jobs);
455
- }
456
- jobs.add(jobId);
379
+ lockMgr.registerClientJob(clientId, jobId, this.getLockContext());
457
380
  }
458
- /**
459
- * Unregister a job from a client (called on ACK/FAIL).
460
- */
461
381
  unregisterClientJob(clientId, jobId) {
462
- if (!clientId)
463
- return;
464
- const jobs = this.clientJobs.get(clientId);
465
- if (jobs) {
466
- jobs.delete(jobId);
467
- if (jobs.size === 0) {
468
- this.clientJobs.delete(clientId);
469
- }
470
- }
382
+ lockMgr.unregisterClientJob(clientId, jobId, this.getLockContext());
471
383
  }
472
- /**
473
- * Release all jobs owned by a client back to queue (called on TCP disconnect).
474
- * Returns the number of jobs released.
475
- */
476
384
  releaseClientJobs(clientId) {
477
- const jobs = this.clientJobs.get(clientId);
478
- if (!jobs || jobs.size === 0) {
479
- this.clientJobs.delete(clientId);
480
- return 0;
481
- }
482
- let released = 0;
483
- const now = Date.now();
484
- for (const jobId of jobs) {
485
- const loc = this.jobIndex.get(jobId);
486
- if (loc?.type !== 'processing')
487
- continue;
488
- const procIdx = loc.shardIdx;
489
- const job = this.processingShards[procIdx].get(jobId);
490
- if (!job)
491
- continue;
492
- // Remove from processing
493
- this.processingShards[procIdx].delete(jobId);
494
- // Release lock if exists
495
- this.jobLocks.delete(jobId);
496
- // Release concurrency
497
- const idx = shardIndex(job.queue);
498
- const shard = this.shards[idx];
499
- shard.releaseConcurrency(job.queue);
500
- // Release group if active
501
- if (job.groupId) {
502
- shard.releaseGroup(job.queue, job.groupId);
503
- }
504
- // Reset job state for retry
505
- job.startedAt = null;
506
- job.lastHeartbeat = now;
507
- // Re-queue the job
508
- shard.getQueue(job.queue).push(job);
509
- const isDelayed = job.runAt > now;
510
- shard.incrementQueued(jobId, isDelayed, job.createdAt, job.queue, job.runAt);
511
- this.jobIndex.set(jobId, { type: 'queue', shardIdx: idx, queueName: job.queue });
512
- released++;
513
- }
514
- // Clear client tracking
515
- this.clientJobs.delete(clientId);
516
- if (released > 0) {
517
- queueLog.info('Released client jobs', { clientId: clientId.substring(0, 8), released });
518
- }
519
- return released;
520
- }
521
- /**
522
- * Check and handle expired locks.
523
- * Jobs with expired locks are requeued for retry.
524
- */
525
- checkExpiredLocks() {
526
- const now = Date.now();
527
- const expired = [];
528
- for (const [jobId, lock] of this.jobLocks) {
529
- if (isLockExpired(lock, now)) {
530
- expired.push({ jobId, lock });
531
- }
532
- }
533
- for (const { jobId, lock } of expired) {
534
- const procIdx = processingShardIndex(String(jobId));
535
- const job = this.processingShards[procIdx].get(jobId);
536
- if (job) {
537
- const idx = shardIndex(job.queue);
538
- const shard = this.shards[idx];
539
- const queue = shard.getQueue(job.queue);
540
- // Remove from processing
541
- this.processingShards[procIdx].delete(jobId);
542
- // Increment attempts and reset state
543
- job.attempts++;
544
- job.startedAt = null;
545
- job.lastHeartbeat = now;
546
- job.stallCount++;
547
- // Check if max stalls exceeded
548
- const stallConfig = shard.getStallConfig(job.queue);
549
- if (stallConfig.maxStalls > 0 && job.stallCount >= stallConfig.maxStalls) {
550
- // Move to DLQ using shard's addToDlq method
551
- shard.addToDlq(job, "stalled" /* FailureReason.Stalled */, `Lock expired after ${lock.renewalCount} renewals`);
552
- this.jobIndex.set(jobId, { type: 'dlq', queueName: job.queue });
553
- queueLog.warn('Job moved to DLQ due to lock expiration', {
554
- jobId: String(jobId),
555
- queue: job.queue,
556
- owner: lock.owner,
557
- renewals: lock.renewalCount,
558
- stallCount: job.stallCount,
559
- });
560
- this.eventsManager.broadcast({
561
- eventType: "failed" /* EventType.Failed */,
562
- jobId,
563
- queue: job.queue,
564
- timestamp: now,
565
- error: 'Lock expired (max stalls reached)',
566
- });
567
- }
568
- else {
569
- // Requeue for retry (always push - priority queue handles ordering)
570
- queue.push(job);
571
- this.jobIndex.set(jobId, { type: 'queue', shardIdx: idx, queueName: job.queue });
572
- queueLog.info('Job requeued due to lock expiration', {
573
- jobId: String(jobId),
574
- queue: job.queue,
575
- owner: lock.owner,
576
- renewals: lock.renewalCount,
577
- attempt: job.attempts,
578
- });
579
- this.eventsManager.broadcast({
580
- eventType: "stalled" /* EventType.Stalled */,
581
- jobId,
582
- queue: job.queue,
583
- timestamp: now,
584
- });
585
- }
586
- }
587
- // Remove the expired lock
588
- this.jobLocks.delete(jobId);
589
- }
590
- if (expired.length > 0) {
591
- queueLog.info('Processed expired locks', { count: expired.length });
592
- }
385
+ return lockMgr.releaseClientJobs(clientId, this.getLockContext());
593
386
  }
594
387
  // ============ Query Operations (delegated) ============
595
388
  async getJob(jobId) {
@@ -622,81 +415,55 @@ export class QueueManager {
622
415
  }
623
416
  obliterate(queue) {
624
417
  queueControl.obliterateQueue(queue, { shards: this.shards, jobIndex: this.jobIndex });
625
- // Remove from cache
626
418
  this.unregisterQueueName(queue);
627
419
  }
628
420
  listQueues() {
629
- // O(1) using cache instead of O(32 * queues) iterating all shards
630
421
  return Array.from(this.queueNamesCache);
631
422
  }
632
- /** Register queue name in cache - called when first job is pushed */
633
423
  registerQueueName(queue) {
634
424
  this.queueNamesCache.add(queue);
635
425
  }
636
- /** Unregister queue name from cache - called on obliterate */
637
426
  unregisterQueueName(queue) {
638
427
  this.queueNamesCache.delete(queue);
639
428
  }
640
429
  clean(queue, graceMs, state, limit) {
641
430
  return queueControl.cleanQueue(queue, graceMs, { shards: this.shards, jobIndex: this.jobIndex }, state, limit);
642
431
  }
643
- /** Get job counts grouped by priority for a queue */
644
432
  getCountsPerPriority(queue) {
645
433
  const idx = shardIndex(queue);
646
434
  const counts = this.shards[idx].getCountsPerPriority(queue);
647
435
  return Object.fromEntries(counts);
648
436
  }
649
- /**
650
- * Get jobs with filtering and pagination
651
- * @param queue - Queue name
652
- * @param options - Filter options
653
- * @returns Array of jobs matching the criteria
654
- */
655
437
  getJobs(queue, options = {}) {
656
438
  const { state, start = 0, end = 100, asc = true } = options;
657
439
  const idx = shardIndex(queue);
658
440
  const shard = this.shards[idx];
659
441
  const now = Date.now();
660
442
  const jobs = [];
661
- // Collect jobs based on state filter
662
443
  if (!state || state === 'waiting') {
663
- const queueJobs = shard.getQueue(queue).values();
664
- jobs.push(...queueJobs.filter((j) => j.runAt <= now));
444
+ jobs.push(...shard
445
+ .getQueue(queue)
446
+ .values()
447
+ .filter((j) => j.runAt <= now));
665
448
  }
666
449
  if (!state || state === 'delayed') {
667
- const queueJobs = shard.getQueue(queue).values();
668
- jobs.push(...queueJobs.filter((j) => j.runAt > now));
450
+ jobs.push(...shard
451
+ .getQueue(queue)
452
+ .values()
453
+ .filter((j) => j.runAt > now));
669
454
  }
670
455
  if (!state || state === 'active') {
671
456
  for (let i = 0; i < SHARD_COUNT; i++) {
672
457
  for (const job of this.processingShards[i].values()) {
673
- if (job.queue === queue) {
458
+ if (job.queue === queue)
674
459
  jobs.push(job);
675
- }
676
460
  }
677
461
  }
678
462
  }
679
463
  if (!state || state === 'failed') {
680
- const dlqJobs = shard.getDlq(queue);
681
- jobs.push(...dlqJobs);
682
- }
683
- // For completed jobs, check completed jobs set
684
- if (state === 'completed') {
685
- // Iterate completedJobs and filter by queue
686
- // Note: This is not efficient for large sets, but provides the data
687
- for (const jobId of this.completedJobs) {
688
- const result = this.jobResults.get(jobId);
689
- if (result) {
690
- // We don't have the full job object for completed jobs in memory
691
- // Just count them or return IDs - for now skip completed state
692
- }
693
- }
694
- // Completed jobs are stored in SQLite, would need storage access
695
- // For now, return empty for completed state if not in DLQ
464
+ jobs.push(...shard.getDlq(queue));
696
465
  }
697
- // Sort by createdAt
698
466
  jobs.sort((a, b) => (asc ? a.createdAt - b.createdAt : b.createdAt - a.createdAt));
699
- // Apply pagination
700
467
  return jobs.slice(start, end);
701
468
  }
702
469
  // ============ DLQ Operations (delegated) ============
@@ -709,58 +476,37 @@ export class QueueManager {
709
476
  purgeDlq(queue) {
710
477
  return dlqOps.purgeDlqJobs(queue, this.getDlqContext());
711
478
  }
712
- /**
713
- * Retry a completed job by re-queueing it
714
- * @param queue - Queue name
715
- * @param jobId - Specific job ID to retry (optional - retries all if not specified)
716
- * @returns Number of jobs retried
717
- */
718
479
  retryCompleted(queue, jobId) {
719
480
  if (jobId) {
720
- // Check if job is in completedJobs set
721
- if (!this.completedJobs.has(jobId)) {
481
+ if (!this.completedJobs.has(jobId))
722
482
  return 0;
723
- }
724
- // Get job from storage
725
483
  const job = this.storage?.getJob(jobId);
726
- if (job?.queue !== queue) {
484
+ if (job?.queue !== queue)
727
485
  return 0;
728
- }
729
486
  return this.requeueCompletedJob(job);
730
487
  }
731
- // Retry all completed jobs for queue
732
488
  let count = 0;
733
489
  for (const id of this.completedJobs) {
734
490
  const job = this.storage?.getJob(id);
735
- if (job?.queue === queue) {
491
+ if (job?.queue === queue)
736
492
  count += this.requeueCompletedJob(job);
737
- }
738
493
  }
739
494
  return count;
740
495
  }
741
- /**
742
- * Internal helper to re-queue a completed job
743
- */
744
496
  requeueCompletedJob(job) {
745
- // Reset job state
746
497
  job.attempts = 0;
747
498
  job.startedAt = null;
748
499
  job.completedAt = null;
749
500
  job.runAt = Date.now();
750
501
  job.progress = 0;
751
- // Re-queue
752
502
  const idx = shardIndex(job.queue);
753
503
  const shard = this.shards[idx];
754
504
  shard.getQueue(job.queue).push(job);
755
505
  shard.incrementQueued(job.id, false, job.createdAt, job.queue, job.runAt);
756
- // Update index
757
506
  this.jobIndex.set(job.id, { type: 'queue', shardIdx: idx, queueName: job.queue });
758
- // Cleanup completed tracking
759
507
  this.completedJobs.delete(job.id);
760
508
  this.jobResults.delete(job.id);
761
- // Update storage
762
509
  this.storage?.updateForRetry(job);
763
- // Notify
764
510
  shard.notify();
765
511
  return 1;
766
512
  }
@@ -801,11 +547,7 @@ export class QueueManager {
801
547
  }
802
548
  // ============ Job Logs (delegated) ============
803
549
  addLog(jobId, message, level = 'info') {
804
- return logsOps.addJobLog(jobId, message, {
805
- jobIndex: this.jobIndex,
806
- jobLogs: this.jobLogs,
807
- maxLogsPerJob: this.maxLogsPerJob,
808
- }, level);
550
+ return logsOps.addJobLog(jobId, message, { jobIndex: this.jobIndex, jobLogs: this.jobLogs, maxLogsPerJob: this.maxLogsPerJob }, level);
809
551
  }
810
552
  getLogs(jobId) {
811
553
  return logsOps.getJobLogs(jobId, {
@@ -847,515 +589,52 @@ export class QueueManager {
847
589
  subscribe(callback) {
848
590
  return this.eventsManager.subscribe(callback);
849
591
  }
850
- /** Wait for job completion - event-driven, no polling */
851
592
  waitForJobCompletion(jobId, timeoutMs) {
852
593
  return this.eventsManager.waitForJobCompletion(jobId, timeoutMs);
853
594
  }
854
- // ============ Internal State Access (for validation) ============
855
- /** Get job index for dependency validation */
595
+ // ============ Internal State Access ============
856
596
  getJobIndex() {
857
597
  return this.jobIndex;
858
598
  }
859
- /** Get completed jobs set for dependency validation */
860
599
  getCompletedJobs() {
861
600
  return this.completedJobs;
862
601
  }
863
- /**
864
- * Called when a job is completed - schedules deferred dependency check
865
- * This avoids lock order violations by not iterating shards while holding locks
866
- */
602
+ /** Expose shards for testing (internal use only) */
603
+ getShards() {
604
+ return this.shards;
605
+ }
867
606
  onJobCompleted(completedId) {
868
607
  this.pendingDepChecks.add(completedId);
869
608
  }
870
- /**
871
- * Batch version of onJobCompleted - more efficient for large batches
872
- */
873
609
  onJobsCompleted(completedIds) {
874
- for (const id of completedIds) {
610
+ for (const id of completedIds)
875
611
  this.pendingDepChecks.add(id);
876
- }
877
612
  }
878
- /**
879
- * Check if there are any jobs waiting for dependencies
880
- * Used to skip dependency tracking when not needed
881
- */
882
613
  hasPendingDeps() {
883
- // Check if any shard has waiting dependencies
884
614
  for (const shard of this.shards) {
885
615
  if (shard.waitingDeps.size > 0)
886
616
  return true;
887
617
  }
888
618
  return false;
889
619
  }
890
- /**
891
- * Process pending dependency checks in a separate task
892
- * Uses reverse index for O(m) where m = jobs waiting on completed deps
893
- * Instead of O(n) full scan of all waiting deps
894
- */
895
- async processPendingDependencies() {
896
- if (this.pendingDepChecks.size === 0)
897
- return;
898
- // Copy and clear the pending set
899
- const completedIds = Array.from(this.pendingDepChecks);
900
- this.pendingDepChecks.clear();
901
- // Collect jobs to check by shard
902
- const jobsToCheckByShard = new Map();
903
- // Use reverse index to find only affected jobs - O(m) instead of O(n)
904
- for (const completedId of completedIds) {
905
- for (let i = 0; i < SHARD_COUNT; i++) {
906
- const waitingJobIds = this.shards[i].getJobsWaitingFor(completedId);
907
- if (waitingJobIds && waitingJobIds.size > 0) {
908
- let shardJobs = jobsToCheckByShard.get(i);
909
- if (!shardJobs) {
910
- shardJobs = new Set();
911
- jobsToCheckByShard.set(i, shardJobs);
912
- }
913
- for (const jobId of waitingJobIds) {
914
- shardJobs.add(jobId);
915
- }
916
- }
917
- }
918
- }
919
- // Process each shard that has affected jobs - in parallel using Promise.all
920
- await Promise.all(Array.from(jobsToCheckByShard.entries()).map(async ([i, jobIdsToCheck]) => {
921
- const shard = this.shards[i];
922
- const jobsToPromote = [];
923
- // Check only the affected jobs, not all waiting deps
924
- for (const jobId of jobIdsToCheck) {
925
- const job = shard.waitingDeps.get(jobId);
926
- if (job?.dependsOn.every((dep) => this.completedJobs.has(dep))) {
927
- jobsToPromote.push(job);
928
- }
929
- }
930
- // Now acquire lock and modify
931
- if (jobsToPromote.length > 0) {
932
- await withWriteLock(this.shardLocks[i], () => {
933
- const now = Date.now();
934
- for (const job of jobsToPromote) {
935
- if (shard.waitingDeps.has(job.id)) {
936
- shard.waitingDeps.delete(job.id);
937
- // Unregister from dependency index
938
- shard.unregisterDependencies(job.id, job.dependsOn);
939
- shard.getQueue(job.queue).push(job);
940
- // Update running counters for O(1) stats and temporal index
941
- const isDelayed = job.runAt > now;
942
- shard.incrementQueued(job.id, isDelayed, job.createdAt, job.queue, job.runAt);
943
- this.jobIndex.set(job.id, { type: 'queue', shardIdx: i, queueName: job.queue });
944
- }
945
- }
946
- if (jobsToPromote.length > 0) {
947
- shard.notify();
948
- }
949
- });
950
- }
951
- }));
952
- }
953
- // ============ Background Tasks ============
954
- startBackgroundTasks() {
955
- this.cleanupInterval = setInterval(() => {
956
- this.cleanup();
957
- }, this.config.cleanupIntervalMs);
958
- this.timeoutInterval = setInterval(() => {
959
- this.checkJobTimeouts();
960
- }, this.config.jobTimeoutCheckMs);
961
- this.depCheckInterval = setInterval(() => {
962
- this.processPendingDependencies().catch((err) => {
963
- queueLog.error('Dependency check failed', { error: String(err) });
964
- });
965
- }, this.config.dependencyCheckMs);
966
- this.stallCheckInterval = setInterval(() => {
967
- this.checkStalledJobs();
968
- }, this.config.stallCheckMs);
969
- this.dlqMaintenanceInterval = setInterval(() => {
970
- this.performDlqMaintenance();
971
- }, this.config.dlqMaintenanceMs);
972
- // Lock expiration check runs at same interval as stall check
973
- this.lockCheckInterval = setInterval(() => {
974
- this.checkExpiredLocks();
975
- }, this.config.stallCheckMs);
976
- this.cronScheduler.start();
977
- }
978
- checkJobTimeouts() {
979
- const now = Date.now();
980
- for (const procShard of this.processingShards) {
981
- for (const [jobId, job] of procShard) {
982
- if (job.timeout && job.startedAt && now - job.startedAt > job.timeout) {
983
- this.fail(jobId, 'Job timeout exceeded').catch((err) => {
984
- queueLog.error('Failed to mark timed out job as failed', {
985
- jobId: String(jobId),
986
- error: String(err),
987
- });
988
- });
989
- }
990
- }
991
- }
992
- }
993
- /**
994
- * Check for stalled jobs and handle them
995
- * Uses two-phase detection (like BullMQ) to prevent false positives:
996
- * - Phase 1: Jobs marked as candidates in previous check are confirmed stalled
997
- * - Phase 2: Current processing jobs are marked as candidates for next check
998
- */
999
- checkStalledJobs() {
1000
- const now = Date.now();
1001
- const confirmedStalled = [];
1002
- // Phase 1: Check jobs that were candidates from previous cycle
1003
- // If still in processing and still meets stall criteria → confirmed stalled
1004
- for (const jobId of this.stalledCandidates) {
1005
- // Find job in processing shards
1006
- const procIdx = processingShardIndex(String(jobId));
1007
- const job = this.processingShards[procIdx].get(jobId);
1008
- if (!job) {
1009
- // Job completed between checks - not stalled (false positive avoided!)
1010
- this.stalledCandidates.delete(jobId);
1011
- continue;
1012
- }
1013
- const stallConfig = this.shards[shardIndex(job.queue)].getStallConfig(job.queue);
1014
- if (!stallConfig.enabled) {
1015
- this.stalledCandidates.delete(jobId);
1016
- continue;
1017
- }
1018
- // Re-check stall criteria (job might have received heartbeat)
1019
- const action = getStallAction(job, stallConfig, now);
1020
- if (action !== "keep" /* StallAction.Keep */) {
1021
- // Confirmed stalled - was candidate AND still meets criteria
1022
- confirmedStalled.push({ job, action });
1023
- }
1024
- // Remove from candidates (will be re-added in phase 2 if still processing)
1025
- this.stalledCandidates.delete(jobId);
1026
- }
1027
- // Phase 2: Mark current processing jobs as candidates for NEXT check
1028
- for (let i = 0; i < SHARD_COUNT; i++) {
1029
- const procShard = this.processingShards[i];
1030
- for (const [jobId, job] of procShard) {
1031
- const stallConfig = this.shards[shardIndex(job.queue)].getStallConfig(job.queue);
1032
- if (!stallConfig.enabled)
1033
- continue;
1034
- // Only mark as candidate if past grace period and no recent heartbeat
1035
- const action = getStallAction(job, stallConfig, now);
1036
- if (action !== "keep" /* StallAction.Keep */) {
1037
- // Add to candidates - will be checked in NEXT cycle
1038
- this.stalledCandidates.add(jobId);
1039
- }
1040
- }
1041
- }
1042
- // Process confirmed stalled jobs
1043
- for (const { job, action } of confirmedStalled) {
1044
- this.handleStalledJob(job, action).catch((err) => {
1045
- queueLog.error('Failed to handle stalled job', {
1046
- jobId: String(job.id),
1047
- error: String(err),
1048
- });
1049
- });
1050
- }
1051
- }
1052
- /**
1053
- * Handle a stalled job based on the action
1054
- */
1055
- async handleStalledJob(job, action) {
1056
- const idx = shardIndex(job.queue);
1057
- const shard = this.shards[idx];
1058
- const procIdx = processingShardIndex(String(job.id));
1059
- // Emit stalled event
1060
- this.eventsManager.broadcast({
1061
- eventType: "stalled" /* EventType.Stalled */,
1062
- queue: job.queue,
1063
- jobId: job.id,
1064
- timestamp: Date.now(),
1065
- data: { stallCount: job.stallCount + 1, action },
1066
- });
1067
- void this.webhookManager.trigger('stalled', String(job.id), job.queue, {
1068
- data: { stallCount: job.stallCount + 1, action },
1069
- });
1070
- if (action === "move_to_dlq" /* StallAction.MoveToDlq */) {
1071
- // Max stalls reached - move to DLQ
1072
- queueLog.warn('Job exceeded max stalls, moving to DLQ', {
1073
- jobId: String(job.id),
1074
- queue: job.queue,
1075
- stallCount: job.stallCount,
1076
- });
1077
- // Remove from processing
1078
- this.processingShards[procIdx].delete(job.id);
1079
- shard.releaseConcurrency(job.queue);
1080
- // Add to DLQ with stalled reason
1081
- const entry = shard.addToDlq(job, "stalled" /* FailureReason.Stalled */, `Job stalled ${job.stallCount + 1} times`);
1082
- this.jobIndex.set(job.id, { type: 'dlq', queueName: job.queue });
1083
- // Persist DLQ entry
1084
- this.storage?.saveDlqEntry(entry);
1085
- }
1086
- else {
1087
- // Retry - increment stall count and re-queue
1088
- incrementStallCount(job);
1089
- job.attempts++;
1090
- job.startedAt = null;
1091
- job.runAt = Date.now() + calculateBackoff(job);
1092
- job.lastHeartbeat = Date.now();
1093
- queueLog.warn('Job stalled, retrying', {
1094
- jobId: String(job.id),
1095
- queue: job.queue,
1096
- stallCount: job.stallCount,
1097
- attempt: job.attempts,
1098
- });
1099
- // Remove from processing
1100
- this.processingShards[procIdx].delete(job.id);
1101
- shard.releaseConcurrency(job.queue);
1102
- // Re-queue
1103
- shard.getQueue(job.queue).push(job);
1104
- const isDelayed = job.runAt > Date.now();
1105
- shard.incrementQueued(job.id, isDelayed, job.createdAt, job.queue, job.runAt);
1106
- this.jobIndex.set(job.id, { type: 'queue', shardIdx: idx, queueName: job.queue });
1107
- // Persist
1108
- this.storage?.updateForRetry(job);
1109
- }
620
+ // ============ Stats (delegated) ============
621
+ getStats() {
622
+ return statsMgr.getStats(this.getStatsContext(), this.cronScheduler);
1110
623
  }
1111
- /**
1112
- * Perform DLQ maintenance: auto-retry and purge expired
1113
- */
1114
- performDlqMaintenance() {
1115
- const ctx = this.getDlqContext();
1116
- // Process each queue
1117
- for (const queueName of this.queueNamesCache) {
1118
- try {
1119
- // Auto-retry eligible entries
1120
- const retried = dlqOps.processAutoRetry(queueName, ctx);
1121
- if (retried > 0) {
1122
- queueLog.info('DLQ auto-retry completed', { queue: queueName, retried });
1123
- }
1124
- // Purge expired entries
1125
- const purged = dlqOps.purgeExpiredDlq(queueName, ctx);
1126
- if (purged > 0) {
1127
- queueLog.info('DLQ purge completed', { queue: queueName, purged });
1128
- }
1129
- }
1130
- catch (err) {
1131
- queueLog.error('DLQ maintenance failed', { queue: queueName, error: String(err) });
1132
- }
1133
- }
624
+ getMemoryStats() {
625
+ return statsMgr.getMemoryStats(this.getStatsContext());
1134
626
  }
1135
- recover() {
1136
- if (!this.storage)
1137
- return;
1138
- // Load pending jobs
1139
- const now = Date.now();
1140
- for (const job of this.storage.loadPendingJobs()) {
1141
- const idx = shardIndex(job.queue);
1142
- const shard = this.shards[idx];
1143
- shard.getQueue(job.queue).push(job);
1144
- this.jobIndex.set(job.id, { type: 'queue', shardIdx: idx, queueName: job.queue });
1145
- // Update running counters for O(1) stats
1146
- const isDelayed = job.runAt > now;
1147
- shard.incrementQueued(job.id, isDelayed, job.createdAt, job.queue, job.runAt);
1148
- // Register queue name in cache
1149
- this.registerQueueName(job.queue);
1150
- }
1151
- // Load DLQ entries
1152
- const dlqEntries = this.storage.loadDlq();
1153
- let dlqCount = 0;
1154
- for (const [queue, entries] of dlqEntries) {
1155
- const idx = shardIndex(queue);
1156
- const shard = this.shards[idx];
1157
- for (const entry of entries) {
1158
- // Add to shard's DLQ (directly set since we're loading)
1159
- let dlq = shard.dlq.get(queue);
1160
- if (!dlq) {
1161
- dlq = [];
1162
- shard.dlq.set(queue, dlq);
1163
- }
1164
- dlq.push(entry);
1165
- shard.incrementDlq();
1166
- dlqCount++;
1167
- }
1168
- this.registerQueueName(queue);
1169
- }
1170
- if (dlqCount > 0) {
1171
- queueLog.info('Loaded DLQ entries', { count: dlqCount });
1172
- }
1173
- // Load cron jobs
1174
- this.cronScheduler.load(this.storage.loadCronJobs());
1175
- }
1176
- // eslint-disable-next-line complexity
1177
- cleanup() {
1178
- // LRU collections auto-evict, but we still need to clean up:
1179
- // 1. Orphaned processing shard entries (jobs stuck in processing)
1180
- // 2. Stale waiting dependencies
1181
- // 3. Orphaned unique keys and active groups
1182
- // 4. Refresh delayed job counters (jobs that became ready)
1183
- const now = Date.now();
1184
- const stallTimeout = 30 * 60 * 1000; // 30 minutes max for processing
1185
- // Refresh delayed counters - update jobs that have become ready
1186
- for (let i = 0; i < SHARD_COUNT; i++) {
1187
- this.shards[i].refreshDelayedCount(now);
1188
- }
1189
- // Compact priority queues if stale ratio > 20% (reclaim memory)
1190
- for (let i = 0; i < SHARD_COUNT; i++) {
1191
- for (const q of this.shards[i].queues.values()) {
1192
- if (q.needsCompaction(0.2)) {
1193
- q.compact();
1194
- }
1195
- }
1196
- }
1197
- // Clean orphaned processing entries
1198
- for (let i = 0; i < SHARD_COUNT; i++) {
1199
- const orphaned = [];
1200
- for (const [jobId, job] of this.processingShards[i]) {
1201
- if (job.startedAt && now - job.startedAt > stallTimeout) {
1202
- orphaned.push(jobId);
1203
- }
1204
- }
1205
- for (const jobId of orphaned) {
1206
- const job = this.processingShards[i].get(jobId);
1207
- if (job) {
1208
- this.processingShards[i].delete(jobId);
1209
- this.jobIndex.delete(jobId);
1210
- queueLog.warn('Cleaned orphaned processing job', { jobId: String(jobId) });
1211
- }
1212
- }
1213
- }
1214
- // Clean stale waiting dependencies (waiting > 1 hour)
1215
- const depTimeout = 60 * 60 * 1000; // 1 hour
1216
- for (let i = 0; i < SHARD_COUNT; i++) {
1217
- const shard = this.shards[i];
1218
- const stale = [];
1219
- for (const [_id, job] of shard.waitingDeps) {
1220
- if (now - job.createdAt > depTimeout) {
1221
- stale.push(job);
1222
- }
1223
- }
1224
- for (const job of stale) {
1225
- shard.waitingDeps.delete(job.id);
1226
- // Remove from dependency index
1227
- shard.unregisterDependencies(job.id, job.dependsOn);
1228
- this.jobIndex.delete(job.id);
1229
- queueLog.warn('Cleaned stale waiting dependency', { jobId: String(job.id) });
1230
- }
1231
- }
1232
- // Clean orphaned and expired unique keys
1233
- for (let i = 0; i < SHARD_COUNT; i++) {
1234
- const shard = this.shards[i];
1235
- // First, clean expired unique keys
1236
- const expiredCleaned = shard.cleanExpiredUniqueKeys();
1237
- if (expiredCleaned > 0) {
1238
- queueLog.info('Cleaned expired unique keys', { shard: i, removed: expiredCleaned });
1239
- }
1240
- // Then trim if too many keys remain
1241
- for (const [queueName, keys] of shard.uniqueKeys) {
1242
- if (keys.size > 1000) {
1243
- // If too many keys, trim oldest half
1244
- const toRemove = Math.floor(keys.size / 2);
1245
- const iter = keys.keys();
1246
- for (let j = 0; j < toRemove; j++) {
1247
- const { value, done } = iter.next();
1248
- if (done)
1249
- break;
1250
- keys.delete(value);
1251
- }
1252
- queueLog.info('Trimmed unique keys', { queue: queueName, removed: toRemove });
1253
- }
1254
- }
1255
- // Clean orphaned active groups
1256
- for (const [queueName, groups] of shard.activeGroups) {
1257
- if (groups.size > 1000) {
1258
- const toRemove = Math.floor(groups.size / 2);
1259
- const iter = groups.values();
1260
- for (let j = 0; j < toRemove; j++) {
1261
- const { value, done } = iter.next();
1262
- if (done)
1263
- break;
1264
- groups.delete(value);
1265
- }
1266
- queueLog.info('Trimmed active groups', { queue: queueName, removed: toRemove });
1267
- }
1268
- }
1269
- }
1270
- // Clean stale stalledCandidates (jobs no longer in processing)
1271
- for (const jobId of this.stalledCandidates) {
1272
- const loc = this.jobIndex.get(jobId);
1273
- if (loc?.type !== 'processing') {
1274
- this.stalledCandidates.delete(jobId);
1275
- }
1276
- }
1277
- // Clean orphaned jobIndex entries (pointing to invalid locations)
1278
- // This is expensive so only run if index is large
1279
- if (this.jobIndex.size > 100_000) {
1280
- let orphanedCount = 0;
1281
- for (const [jobId, loc] of this.jobIndex) {
1282
- if (loc.type === 'processing') {
1283
- const procIdx = processingShardIndex(String(jobId));
1284
- if (!this.processingShards[procIdx].has(jobId)) {
1285
- this.jobIndex.delete(jobId);
1286
- orphanedCount++;
1287
- }
1288
- }
1289
- else if (loc.type === 'queue') {
1290
- // Check if job still exists in shard
1291
- const shard = this.shards[loc.shardIdx];
1292
- if (!shard.getQueue(loc.queueName).has(jobId)) {
1293
- this.jobIndex.delete(jobId);
1294
- orphanedCount++;
1295
- }
1296
- }
1297
- }
1298
- if (orphanedCount > 0) {
1299
- queueLog.info('Cleaned orphaned jobIndex entries', { count: orphanedCount });
1300
- }
1301
- }
1302
- // Clean orphaned job locks (locks for jobs no longer in processing)
1303
- for (const jobId of this.jobLocks.keys()) {
1304
- const loc = this.jobIndex.get(jobId);
1305
- if (loc?.type !== 'processing') {
1306
- this.jobLocks.delete(jobId);
1307
- }
1308
- }
1309
- // Remove empty queues to free memory (like obliterate but only for empty queues)
1310
- for (let i = 0; i < SHARD_COUNT; i++) {
1311
- const shard = this.shards[i];
1312
- const emptyQueues = [];
1313
- for (const [queueName, queue] of shard.queues) {
1314
- // Queue is empty and has no DLQ entries
1315
- const dlqEntries = shard.dlq.get(queueName);
1316
- if (queue.size === 0 && (!dlqEntries || dlqEntries.length === 0)) {
1317
- emptyQueues.push(queueName);
1318
- }
1319
- }
1320
- for (const queueName of emptyQueues) {
1321
- shard.queues.delete(queueName);
1322
- shard.dlq.delete(queueName);
1323
- shard.uniqueKeys.delete(queueName);
1324
- shard.queueState.delete(queueName);
1325
- shard.activeGroups.delete(queueName);
1326
- shard.rateLimiters.delete(queueName);
1327
- shard.concurrencyLimiters.delete(queueName);
1328
- shard.stallConfig.delete(queueName);
1329
- shard.dlqConfig.delete(queueName);
1330
- this.unregisterQueueName(queueName);
1331
- }
1332
- if (emptyQueues.length > 0) {
1333
- queueLog.info('Removed empty queues', { shard: i, count: emptyQueues.length });
1334
- }
1335
- // Clean orphaned temporal index entries (memory leak fix)
1336
- const cleanedTemporal = shard.cleanOrphanedTemporalEntries();
1337
- if (cleanedTemporal > 0) {
1338
- queueLog.info('Cleaned orphaned temporal entries', { shard: i, count: cleanedTemporal });
1339
- }
1340
- }
627
+ compactMemory() {
628
+ statsMgr.compactMemory(this.getStatsContext());
1341
629
  }
1342
630
  // ============ Lifecycle ============
1343
631
  shutdown() {
1344
632
  this.cronScheduler.stop();
1345
633
  this.workerManager.stop();
1346
634
  this.eventsManager.clear();
1347
- if (this.cleanupInterval)
1348
- clearInterval(this.cleanupInterval);
1349
- if (this.timeoutInterval)
1350
- clearInterval(this.timeoutInterval);
1351
- if (this.depCheckInterval)
1352
- clearInterval(this.depCheckInterval);
1353
- if (this.stallCheckInterval)
1354
- clearInterval(this.stallCheckInterval);
1355
- if (this.dlqMaintenanceInterval)
1356
- clearInterval(this.dlqMaintenanceInterval);
1357
- if (this.lockCheckInterval)
1358
- clearInterval(this.lockCheckInterval);
635
+ if (this.backgroundTaskHandles) {
636
+ bgTasks.stopBackgroundTasks(this.backgroundTaskHandles);
637
+ }
1359
638
  this.storage?.close();
1360
639
  // Clear in-memory collections
1361
640
  this.jobIndex.clear();
@@ -1368,9 +647,8 @@ export class QueueManager {
1368
647
  this.jobLocks.clear();
1369
648
  this.stalledCandidates.clear();
1370
649
  this.clientJobs.clear();
1371
- for (const shard of this.processingShards) {
650
+ for (const shard of this.processingShards)
1372
651
  shard.clear();
1373
- }
1374
652
  for (const shard of this.shards) {
1375
653
  shard.waitingDeps.clear();
1376
654
  shard.dependencyIndex.clear();
@@ -1379,106 +657,5 @@ export class QueueManager {
1379
657
  shard.activeGroups.clear();
1380
658
  }
1381
659
  }
1382
- getStats() {
1383
- let waiting = 0, delayed = 0, active = 0, dlq = 0;
1384
- // O(32) instead of O(n) - use running counters from each shard
1385
- for (let i = 0; i < SHARD_COUNT; i++) {
1386
- const shardStats = this.shards[i].getStats();
1387
- const queuedTotal = shardStats.queuedJobs;
1388
- const delayedInShard = shardStats.delayedJobs;
1389
- // waiting = queued jobs that are not delayed
1390
- waiting += Math.max(0, queuedTotal - delayedInShard);
1391
- delayed += delayedInShard;
1392
- dlq += shardStats.dlqJobs;
1393
- active += this.processingShards[i].size;
1394
- }
1395
- const cronStats = this.cronScheduler.getStats();
1396
- return {
1397
- waiting,
1398
- delayed,
1399
- active,
1400
- dlq,
1401
- completed: this.completedJobs.size,
1402
- totalPushed: this.metrics.totalPushed.value,
1403
- totalPulled: this.metrics.totalPulled.value,
1404
- totalCompleted: this.metrics.totalCompleted.value,
1405
- totalFailed: this.metrics.totalFailed.value,
1406
- uptime: Date.now() - this.startTime,
1407
- cronJobs: cronStats.total,
1408
- cronPending: cronStats.pending,
1409
- };
1410
- }
1411
- /**
1412
- * Get detailed memory statistics for debugging memory issues.
1413
- * Returns counts of entries in all major collections.
1414
- */
1415
- getMemoryStats() {
1416
- let processingTotal = 0;
1417
- let queuedTotal = 0;
1418
- let waitingDepsTotal = 0;
1419
- let temporalIndexTotal = 0;
1420
- let delayedHeapTotal = 0;
1421
- for (let i = 0; i < SHARD_COUNT; i++) {
1422
- processingTotal += this.processingShards[i].size;
1423
- const shardStats = this.shards[i].getStats();
1424
- queuedTotal += shardStats.queuedJobs;
1425
- waitingDepsTotal += this.shards[i].waitingDeps.size;
1426
- // Get internal structure sizes
1427
- const internalSizes = this.shards[i].getInternalSizes();
1428
- temporalIndexTotal += internalSizes.temporalIndex;
1429
- delayedHeapTotal += internalSizes.delayedHeap;
1430
- }
1431
- // Count total jobs across all clients
1432
- let clientJobsTotal = 0;
1433
- for (const jobs of this.clientJobs.values()) {
1434
- clientJobsTotal += jobs.size;
1435
- }
1436
- return {
1437
- jobIndex: this.jobIndex.size,
1438
- completedJobs: this.completedJobs.size,
1439
- jobResults: this.jobResults.size,
1440
- jobLogs: this.jobLogs.size,
1441
- customIdMap: this.customIdMap.size,
1442
- jobLocks: this.jobLocks.size,
1443
- clientJobs: this.clientJobs.size,
1444
- clientJobsTotal,
1445
- pendingDepChecks: this.pendingDepChecks.size,
1446
- stalledCandidates: this.stalledCandidates.size,
1447
- processingTotal,
1448
- queuedTotal,
1449
- waitingDepsTotal,
1450
- temporalIndexTotal,
1451
- delayedHeapTotal,
1452
- };
1453
- }
1454
- /**
1455
- * Force compact all collections to reduce memory usage.
1456
- * Use after large batch operations or when memory pressure is high.
1457
- */
1458
- compactMemory() {
1459
- // Compact priority queues that have high stale ratios
1460
- for (let i = 0; i < SHARD_COUNT; i++) {
1461
- for (const q of this.shards[i].queues.values()) {
1462
- if (q.needsCompaction(0.1)) {
1463
- // More aggressive: 10% stale threshold
1464
- q.compact();
1465
- }
1466
- }
1467
- }
1468
- // Clean up empty client tracking entries
1469
- for (const [clientId, jobs] of this.clientJobs) {
1470
- if (jobs.size === 0) {
1471
- this.clientJobs.delete(clientId);
1472
- }
1473
- }
1474
- // Clean orphaned job locks (jobs no longer in processing)
1475
- for (const jobId of this.jobLocks.keys()) {
1476
- const loc = this.jobIndex.get(jobId);
1477
- if (loc?.type !== 'processing') {
1478
- this.jobLocks.delete(jobId);
1479
- }
1480
- }
1481
- queueLog.info('Memory compacted');
1482
- }
1483
660
  }
1484
661
  //# sourceMappingURL=queueManager.js.map