bunqueue 1.9.5 → 1.9.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/application/backgroundTasks.d.ts +32 -0
- package/dist/application/backgroundTasks.d.ts.map +1 -0
- package/dist/application/backgroundTasks.js +318 -0
- package/dist/application/backgroundTasks.js.map +1 -0
- package/dist/application/cleanupTasks.d.ts +11 -0
- package/dist/application/cleanupTasks.d.ts.map +1 -0
- package/dist/application/cleanupTasks.js +181 -0
- package/dist/application/cleanupTasks.js.map +1 -0
- package/dist/application/lockManager.d.ts +62 -0
- package/dist/application/lockManager.d.ts.map +1 -0
- package/dist/application/lockManager.js +307 -0
- package/dist/application/lockManager.js.map +1 -0
- package/dist/application/operations/push.js +2 -2
- package/dist/application/operations/push.js.map +1 -1
- package/dist/application/queueManager.d.ts +14 -176
- package/dist/application/queueManager.d.ts.map +1 -1
- package/dist/application/queueManager.js +130 -953
- package/dist/application/queueManager.js.map +1 -1
- package/dist/application/statsManager.d.ts +56 -0
- package/dist/application/statsManager.d.ts.map +1 -0
- package/dist/application/statsManager.js +111 -0
- package/dist/application/statsManager.js.map +1 -0
- package/dist/application/types.d.ts +123 -0
- package/dist/application/types.d.ts.map +1 -0
- package/dist/application/types.js +16 -0
- package/dist/application/types.js.map +1 -0
- package/dist/client/queue/queue.d.ts.map +1 -1
- package/dist/client/queue/queue.js +2 -0
- package/dist/client/queue/queue.js.map +1 -1
- package/dist/client/types.d.ts +6 -0
- package/dist/client/types.d.ts.map +1 -1
- package/dist/client/types.js.map +1 -1
- package/dist/domain/types/command.d.ts +2 -0
- package/dist/domain/types/command.d.ts.map +1 -1
- package/dist/domain/types/job.d.ts +6 -0
- package/dist/domain/types/job.d.ts.map +1 -1
- package/dist/domain/types/job.js.map +1 -1
- package/dist/infrastructure/persistence/sqlite.d.ts +6 -2
- package/dist/infrastructure/persistence/sqlite.d.ts.map +1 -1
- package/dist/infrastructure/persistence/sqlite.js +12 -3
- package/dist/infrastructure/persistence/sqlite.js.map +1 -1
- package/dist/infrastructure/server/handlers/core.d.ts.map +1 -1
- package/dist/infrastructure/server/handlers/core.js +1 -0
- package/dist/infrastructure/server/handlers/core.js.map +1 -1
- package/dist/infrastructure/server/tcp.d.ts.map +1 -1
- package/dist/infrastructure/server/tcp.js +14 -8
- package/dist/infrastructure/server/tcp.js.map +1 -1
- package/package.json +2 -1
|
@@ -2,17 +2,15 @@
|
|
|
2
2
|
* Queue Manager
|
|
3
3
|
* Core orchestrator for all queue operations
|
|
4
4
|
*/
|
|
5
|
-
import {
|
|
6
|
-
import { queueLog } from '../shared/logger';
|
|
7
|
-
import { getStallAction, incrementStallCount } from '../domain/types/stall';
|
|
5
|
+
import { DEFAULT_LOCK_TTL } from '../domain/types/job';
|
|
8
6
|
import { Shard } from '../domain/queue/shard';
|
|
9
7
|
import { SqliteStorage } from '../infrastructure/persistence/sqlite';
|
|
10
8
|
import { CronScheduler } from '../infrastructure/scheduler/cronScheduler';
|
|
11
9
|
import { WebhookManager } from './webhookManager';
|
|
12
10
|
import { WorkerManager } from './workerManager';
|
|
13
11
|
import { EventsManager } from './eventsManager';
|
|
14
|
-
import { RWLock
|
|
15
|
-
import { shardIndex,
|
|
12
|
+
import { RWLock } from '../shared/lock';
|
|
13
|
+
import { shardIndex, SHARD_COUNT } from '../shared/hash';
|
|
16
14
|
import { pushJob, pushJobBatch } from './operations/push';
|
|
17
15
|
import { pullJob, pullJobBatch } from './operations/pull';
|
|
18
16
|
import { ackJob, ackJobBatch, ackJobBatchWithResults, failJob, } from './operations/ack';
|
|
@@ -23,18 +21,10 @@ import * as dlqOps from './dlqManager';
|
|
|
23
21
|
import * as logsOps from './jobLogsManager';
|
|
24
22
|
import { generatePrometheusMetrics } from './metricsExporter';
|
|
25
23
|
import { LRUMap, BoundedSet, BoundedMap } from '../shared/lru';
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
maxCustomIds: 50_000,
|
|
31
|
-
maxWaitingDeps: 10_000,
|
|
32
|
-
cleanupIntervalMs: 10_000,
|
|
33
|
-
jobTimeoutCheckMs: 5_000,
|
|
34
|
-
dependencyCheckMs: 1_000,
|
|
35
|
-
stallCheckMs: 5_000,
|
|
36
|
-
dlqMaintenanceMs: 60_000,
|
|
37
|
-
};
|
|
24
|
+
import { DEFAULT_CONFIG } from './types';
|
|
25
|
+
import * as lockMgr from './lockManager';
|
|
26
|
+
import * as bgTasks from './backgroundTasks';
|
|
27
|
+
import * as statsMgr from './statsManager';
|
|
38
28
|
/**
|
|
39
29
|
* QueueManager - Central coordinator
|
|
40
30
|
*/
|
|
@@ -52,17 +42,12 @@ export class QueueManager {
|
|
|
52
42
|
jobResults;
|
|
53
43
|
customIdMap;
|
|
54
44
|
jobLogs;
|
|
55
|
-
// Deferred dependency resolution queue
|
|
45
|
+
// Deferred dependency resolution queue
|
|
56
46
|
pendingDepChecks = new Set();
|
|
57
|
-
|
|
58
|
-
// Two-phase stall detection (like BullMQ)
|
|
59
|
-
// Jobs are added here on first check, confirmed stalled on second check
|
|
47
|
+
// Two-phase stall detection
|
|
60
48
|
stalledCandidates = new Set();
|
|
61
49
|
// Lock-based job ownership tracking (BullMQ-style)
|
|
62
|
-
// Maps jobId to lock info (token, owner, expiration)
|
|
63
50
|
jobLocks = new Map();
|
|
64
|
-
// Client-job tracking for connection-based release
|
|
65
|
-
// When a TCP connection closes, all jobs owned by that client are released
|
|
66
51
|
clientJobs = new Map();
|
|
67
52
|
// Cron scheduler
|
|
68
53
|
cronScheduler;
|
|
@@ -80,18 +65,14 @@ export class QueueManager {
|
|
|
80
65
|
totalFailed: { value: 0n },
|
|
81
66
|
};
|
|
82
67
|
startTime = Date.now();
|
|
83
|
-
// Background
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
stallCheckInterval = null;
|
|
87
|
-
dlqMaintenanceInterval = null;
|
|
88
|
-
lockCheckInterval = null;
|
|
89
|
-
// Queue names cache for O(1) listQueues instead of O(32 * queues)
|
|
68
|
+
// Background task handles
|
|
69
|
+
backgroundTaskHandles = null;
|
|
70
|
+
// Queue names cache for O(1) listQueues
|
|
90
71
|
queueNamesCache = new Set();
|
|
91
72
|
constructor(config = {}) {
|
|
92
73
|
this.config = { ...DEFAULT_CONFIG, ...config };
|
|
93
74
|
this.storage = config.dataPath ? new SqliteStorage({ path: config.dataPath }) : null;
|
|
94
|
-
// Initialize bounded collections
|
|
75
|
+
// Initialize bounded collections
|
|
95
76
|
this.completedJobs = new BoundedSet(this.config.maxCompletedJobs, (jobId) => {
|
|
96
77
|
this.jobIndex.delete(jobId);
|
|
97
78
|
});
|
|
@@ -115,10 +96,70 @@ export class QueueManager {
|
|
|
115
96
|
this.workerManager = new WorkerManager();
|
|
116
97
|
this.eventsManager = new EventsManager(this.webhookManager);
|
|
117
98
|
// Load and start
|
|
118
|
-
|
|
119
|
-
|
|
99
|
+
bgTasks.recover(this.getBackgroundContext());
|
|
100
|
+
// Load cron jobs from storage
|
|
101
|
+
if (this.storage) {
|
|
102
|
+
this.cronScheduler.load(this.storage.loadCronJobs());
|
|
103
|
+
}
|
|
104
|
+
this.backgroundTaskHandles = bgTasks.startBackgroundTasks(this.getBackgroundContext(), this.cronScheduler);
|
|
120
105
|
}
|
|
121
106
|
// ============ Context Builders ============
|
|
107
|
+
getLockContext() {
|
|
108
|
+
return {
|
|
109
|
+
jobIndex: this.jobIndex,
|
|
110
|
+
jobLocks: this.jobLocks,
|
|
111
|
+
clientJobs: this.clientJobs,
|
|
112
|
+
processingShards: this.processingShards,
|
|
113
|
+
processingLocks: this.processingLocks,
|
|
114
|
+
shards: this.shards,
|
|
115
|
+
shardLocks: this.shardLocks,
|
|
116
|
+
eventsManager: this.eventsManager,
|
|
117
|
+
};
|
|
118
|
+
}
|
|
119
|
+
getBackgroundContext() {
|
|
120
|
+
return {
|
|
121
|
+
config: this.config,
|
|
122
|
+
storage: this.storage,
|
|
123
|
+
shards: this.shards,
|
|
124
|
+
shardLocks: this.shardLocks,
|
|
125
|
+
processingShards: this.processingShards,
|
|
126
|
+
processingLocks: this.processingLocks,
|
|
127
|
+
jobIndex: this.jobIndex,
|
|
128
|
+
completedJobs: this.completedJobs,
|
|
129
|
+
jobResults: this.jobResults,
|
|
130
|
+
customIdMap: this.customIdMap,
|
|
131
|
+
jobLogs: this.jobLogs,
|
|
132
|
+
jobLocks: this.jobLocks,
|
|
133
|
+
clientJobs: this.clientJobs,
|
|
134
|
+
stalledCandidates: this.stalledCandidates,
|
|
135
|
+
pendingDepChecks: this.pendingDepChecks,
|
|
136
|
+
queueNamesCache: this.queueNamesCache,
|
|
137
|
+
eventsManager: this.eventsManager,
|
|
138
|
+
webhookManager: this.webhookManager,
|
|
139
|
+
metrics: this.metrics,
|
|
140
|
+
startTime: this.startTime,
|
|
141
|
+
fail: this.fail.bind(this),
|
|
142
|
+
registerQueueName: this.registerQueueName.bind(this),
|
|
143
|
+
unregisterQueueName: this.unregisterQueueName.bind(this),
|
|
144
|
+
};
|
|
145
|
+
}
|
|
146
|
+
getStatsContext() {
|
|
147
|
+
return {
|
|
148
|
+
shards: this.shards,
|
|
149
|
+
processingShards: this.processingShards,
|
|
150
|
+
completedJobs: this.completedJobs,
|
|
151
|
+
jobIndex: this.jobIndex,
|
|
152
|
+
jobResults: this.jobResults,
|
|
153
|
+
jobLogs: this.jobLogs,
|
|
154
|
+
customIdMap: this.customIdMap,
|
|
155
|
+
jobLocks: this.jobLocks,
|
|
156
|
+
clientJobs: this.clientJobs,
|
|
157
|
+
pendingDepChecks: this.pendingDepChecks,
|
|
158
|
+
stalledCandidates: this.stalledCandidates,
|
|
159
|
+
metrics: this.metrics,
|
|
160
|
+
startTime: this.startTime,
|
|
161
|
+
};
|
|
162
|
+
}
|
|
122
163
|
getPushContext() {
|
|
123
164
|
return {
|
|
124
165
|
storage: this.storage,
|
|
@@ -163,7 +204,6 @@ export class QueueManager {
|
|
|
163
204
|
onRepeat: this.handleRepeat.bind(this),
|
|
164
205
|
};
|
|
165
206
|
}
|
|
166
|
-
/** Handle repeatable job - re-queue with incremented count */
|
|
167
207
|
handleRepeat(job) {
|
|
168
208
|
if (!job.repeat)
|
|
169
209
|
return;
|
|
@@ -222,110 +262,83 @@ export class QueueManager {
|
|
|
222
262
|
}
|
|
223
263
|
// ============ Core Operations ============
|
|
224
264
|
async push(queue, input) {
|
|
225
|
-
// Register queue name in cache for O(1) listQueues
|
|
226
265
|
this.registerQueueName(queue);
|
|
227
266
|
return pushJob(queue, input, this.getPushContext());
|
|
228
267
|
}
|
|
229
268
|
async pushBatch(queue, inputs) {
|
|
230
|
-
// Register queue name in cache for O(1) listQueues
|
|
231
269
|
this.registerQueueName(queue);
|
|
232
270
|
return pushJobBatch(queue, inputs, this.getPushContext());
|
|
233
271
|
}
|
|
234
272
|
async pull(queue, timeoutMs = 0) {
|
|
235
273
|
return pullJob(queue, timeoutMs, this.getPullContext());
|
|
236
274
|
}
|
|
237
|
-
/**
|
|
238
|
-
* Pull a job and create a lock for it (BullMQ-style).
|
|
239
|
-
* Returns both the job and its lock token for ownership verification.
|
|
240
|
-
*/
|
|
241
275
|
async pullWithLock(queue, owner, timeoutMs = 0, lockTtl = DEFAULT_LOCK_TTL) {
|
|
242
276
|
const job = await pullJob(queue, timeoutMs, this.getPullContext());
|
|
243
277
|
if (!job)
|
|
244
278
|
return { job: null, token: null };
|
|
245
|
-
const token =
|
|
279
|
+
const token = lockMgr.createLock(job.id, owner, this.getLockContext(), lockTtl);
|
|
246
280
|
return { job, token };
|
|
247
281
|
}
|
|
248
|
-
/** Pull multiple jobs in single lock acquisition - O(1) instead of O(n) locks */
|
|
249
282
|
async pullBatch(queue, count, timeoutMs = 0) {
|
|
250
283
|
return pullJobBatch(queue, count, timeoutMs, this.getPullContext());
|
|
251
284
|
}
|
|
252
|
-
/**
|
|
253
|
-
* Pull multiple jobs and create locks for them (BullMQ-style).
|
|
254
|
-
* Returns both jobs and their lock tokens for ownership verification.
|
|
255
|
-
*/
|
|
256
285
|
async pullBatchWithLock(queue, count, owner, timeoutMs = 0, lockTtl = DEFAULT_LOCK_TTL) {
|
|
257
286
|
const jobs = await pullJobBatch(queue, count, timeoutMs, this.getPullContext());
|
|
258
287
|
const tokens = [];
|
|
259
288
|
for (const job of jobs) {
|
|
260
|
-
const token =
|
|
289
|
+
const token = lockMgr.createLock(job.id, owner, this.getLockContext(), lockTtl);
|
|
261
290
|
tokens.push(token ?? '');
|
|
262
291
|
}
|
|
263
292
|
return { jobs, tokens };
|
|
264
293
|
}
|
|
265
294
|
async ack(jobId, result, token) {
|
|
266
|
-
|
|
267
|
-
if (token && !this.verifyLock(jobId, token)) {
|
|
295
|
+
if (token && !lockMgr.verifyLock(jobId, token, this.getLockContext())) {
|
|
268
296
|
throw new Error(`Invalid or expired lock token for job ${jobId}`);
|
|
269
297
|
}
|
|
270
298
|
await ackJob(jobId, result, this.getAckContext());
|
|
271
|
-
|
|
272
|
-
this.releaseLock(jobId, token);
|
|
299
|
+
lockMgr.releaseLock(jobId, this.getLockContext(), token);
|
|
273
300
|
}
|
|
274
|
-
/** Acknowledge multiple jobs in parallel with Promise.all */
|
|
275
301
|
async ackBatch(jobIds, tokens) {
|
|
276
|
-
// Verify all tokens first if provided
|
|
277
302
|
if (tokens?.length === jobIds.length) {
|
|
278
303
|
for (let i = 0; i < jobIds.length; i++) {
|
|
279
304
|
const t = tokens[i];
|
|
280
|
-
if (t && !
|
|
305
|
+
if (t && !lockMgr.verifyLock(jobIds[i], t, this.getLockContext())) {
|
|
281
306
|
throw new Error(`Invalid or expired lock token for job ${jobIds[i]}`);
|
|
282
307
|
}
|
|
283
308
|
}
|
|
284
309
|
}
|
|
285
310
|
await ackJobBatch(jobIds, this.getAckContext());
|
|
286
|
-
// Release locks after successful ack
|
|
287
311
|
if (tokens) {
|
|
288
312
|
for (let i = 0; i < jobIds.length; i++) {
|
|
289
|
-
|
|
313
|
+
lockMgr.releaseLock(jobIds[i], this.getLockContext(), tokens[i]);
|
|
290
314
|
}
|
|
291
315
|
}
|
|
292
316
|
}
|
|
293
|
-
/** Acknowledge multiple jobs with individual results - batch optimized */
|
|
294
317
|
async ackBatchWithResults(items) {
|
|
295
|
-
// Verify all tokens first if provided
|
|
296
318
|
for (const item of items) {
|
|
297
|
-
if (item.token && !
|
|
319
|
+
if (item.token && !lockMgr.verifyLock(item.id, item.token, this.getLockContext())) {
|
|
298
320
|
throw new Error(`Invalid or expired lock token for job ${item.id}`);
|
|
299
321
|
}
|
|
300
322
|
}
|
|
301
323
|
await ackJobBatchWithResults(items, this.getAckContext());
|
|
302
|
-
// Release locks after successful ack
|
|
303
324
|
for (const item of items) {
|
|
304
|
-
|
|
325
|
+
lockMgr.releaseLock(item.id, this.getLockContext(), item.token);
|
|
305
326
|
}
|
|
306
327
|
}
|
|
307
328
|
async fail(jobId, error, token) {
|
|
308
|
-
|
|
309
|
-
if (token && !this.verifyLock(jobId, token)) {
|
|
329
|
+
if (token && !lockMgr.verifyLock(jobId, token, this.getLockContext())) {
|
|
310
330
|
throw new Error(`Invalid or expired lock token for job ${jobId}`);
|
|
311
331
|
}
|
|
312
332
|
await failJob(jobId, error, this.getAckContext());
|
|
313
|
-
|
|
314
|
-
this.releaseLock(jobId, token);
|
|
333
|
+
lockMgr.releaseLock(jobId, this.getLockContext(), token);
|
|
315
334
|
}
|
|
316
|
-
/**
|
|
317
|
-
* Update job heartbeat for stall detection (single job).
|
|
318
|
-
* If token is provided, also renews the lock.
|
|
319
|
-
*/
|
|
320
335
|
jobHeartbeat(jobId, token) {
|
|
321
336
|
const loc = this.jobIndex.get(jobId);
|
|
322
337
|
if (loc?.type !== 'processing')
|
|
323
338
|
return false;
|
|
324
|
-
// If token provided, renew lock (which also updates heartbeat)
|
|
325
339
|
if (token) {
|
|
326
|
-
return
|
|
340
|
+
return lockMgr.renewJobLock(jobId, token, this.getLockContext());
|
|
327
341
|
}
|
|
328
|
-
// Legacy mode: just update heartbeat without token verification
|
|
329
342
|
const processing = this.processingShards[loc.shardIdx];
|
|
330
343
|
const job = processing.get(jobId);
|
|
331
344
|
if (job) {
|
|
@@ -334,262 +347,42 @@ export class QueueManager {
|
|
|
334
347
|
}
|
|
335
348
|
return false;
|
|
336
349
|
}
|
|
337
|
-
/**
|
|
338
|
-
* Update job heartbeat for multiple jobs (batch).
|
|
339
|
-
* If tokens are provided, also renews the locks.
|
|
340
|
-
*/
|
|
341
350
|
jobHeartbeatBatch(jobIds, tokens) {
|
|
342
351
|
let count = 0;
|
|
343
352
|
for (let i = 0; i < jobIds.length; i++) {
|
|
344
|
-
|
|
345
|
-
if (this.jobHeartbeat(jobIds[i], token))
|
|
353
|
+
if (this.jobHeartbeat(jobIds[i], tokens?.[i]))
|
|
346
354
|
count++;
|
|
347
355
|
}
|
|
348
356
|
return count;
|
|
349
357
|
}
|
|
350
|
-
// ============ Lock Management (
|
|
351
|
-
/**
|
|
352
|
-
* Create a lock for a job when it's pulled for processing.
|
|
353
|
-
* @returns The lock token, or null if job not in processing
|
|
354
|
-
*/
|
|
358
|
+
// ============ Lock Management (delegated) ============
|
|
355
359
|
createLock(jobId, owner, ttl = DEFAULT_LOCK_TTL) {
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
return null;
|
|
359
|
-
// Check if lock already exists (shouldn't happen, but defensive)
|
|
360
|
-
if (this.jobLocks.has(jobId)) {
|
|
361
|
-
queueLog.warn('Lock already exists for job', { jobId: String(jobId), owner });
|
|
362
|
-
return null;
|
|
363
|
-
}
|
|
364
|
-
const lock = createJobLock(jobId, owner, ttl);
|
|
365
|
-
this.jobLocks.set(jobId, lock);
|
|
366
|
-
return lock.token;
|
|
367
|
-
}
|
|
368
|
-
/**
|
|
369
|
-
* Verify that a token is valid for a job.
|
|
370
|
-
* @returns true if token matches the active lock
|
|
371
|
-
*/
|
|
360
|
+
return lockMgr.createLock(jobId, owner, this.getLockContext(), ttl);
|
|
361
|
+
}
|
|
372
362
|
verifyLock(jobId, token) {
|
|
373
|
-
|
|
374
|
-
if (!lock)
|
|
375
|
-
return false;
|
|
376
|
-
if (lock.token !== token)
|
|
377
|
-
return false;
|
|
378
|
-
if (isLockExpired(lock))
|
|
379
|
-
return false;
|
|
380
|
-
return true;
|
|
363
|
+
return lockMgr.verifyLock(jobId, token, this.getLockContext());
|
|
381
364
|
}
|
|
382
|
-
/**
|
|
383
|
-
* Renew a lock with the given token.
|
|
384
|
-
* @returns true if renewal succeeded, false if token invalid or lock expired
|
|
385
|
-
*/
|
|
386
365
|
renewJobLock(jobId, token, newTtl) {
|
|
387
|
-
|
|
388
|
-
if (!lock)
|
|
389
|
-
return false;
|
|
390
|
-
if (lock.token !== token)
|
|
391
|
-
return false;
|
|
392
|
-
if (isLockExpired(lock)) {
|
|
393
|
-
// Lock already expired, remove it
|
|
394
|
-
this.jobLocks.delete(jobId);
|
|
395
|
-
return false;
|
|
396
|
-
}
|
|
397
|
-
renewLock(lock, newTtl);
|
|
398
|
-
// Also update lastHeartbeat on the job (for legacy stall detection compatibility)
|
|
399
|
-
const loc = this.jobIndex.get(jobId);
|
|
400
|
-
if (loc?.type === 'processing') {
|
|
401
|
-
const job = this.processingShards[loc.shardIdx].get(jobId);
|
|
402
|
-
if (job)
|
|
403
|
-
job.lastHeartbeat = Date.now();
|
|
404
|
-
}
|
|
405
|
-
return true;
|
|
366
|
+
return lockMgr.renewJobLock(jobId, token, this.getLockContext(), newTtl);
|
|
406
367
|
}
|
|
407
|
-
/**
|
|
408
|
-
* Renew locks for multiple jobs (batch operation).
|
|
409
|
-
* @returns Array of jobIds that were successfully renewed
|
|
410
|
-
*/
|
|
411
368
|
renewJobLockBatch(items) {
|
|
412
|
-
|
|
413
|
-
for (const item of items) {
|
|
414
|
-
if (this.renewJobLock(item.id, item.token, item.ttl)) {
|
|
415
|
-
renewed.push(String(item.id));
|
|
416
|
-
}
|
|
417
|
-
}
|
|
418
|
-
return renewed;
|
|
369
|
+
return lockMgr.renewJobLockBatch(items, this.getLockContext());
|
|
419
370
|
}
|
|
420
|
-
/**
|
|
421
|
-
* Release a lock when job is completed or failed.
|
|
422
|
-
* Should be called by ACK/FAIL operations.
|
|
423
|
-
*/
|
|
424
371
|
releaseLock(jobId, token) {
|
|
425
|
-
|
|
426
|
-
if (!lock)
|
|
427
|
-
return true; // No lock to release
|
|
428
|
-
// If token provided, verify it matches
|
|
429
|
-
if (token && lock.token !== token) {
|
|
430
|
-
queueLog.warn('Token mismatch on lock release', {
|
|
431
|
-
jobId: String(jobId),
|
|
432
|
-
expected: lock.token.substring(0, 8),
|
|
433
|
-
got: token.substring(0, 8),
|
|
434
|
-
});
|
|
435
|
-
return false;
|
|
436
|
-
}
|
|
437
|
-
this.jobLocks.delete(jobId);
|
|
438
|
-
return true;
|
|
372
|
+
return lockMgr.releaseLock(jobId, this.getLockContext(), token);
|
|
439
373
|
}
|
|
440
|
-
/**
|
|
441
|
-
* Get lock info for a job (for debugging/monitoring).
|
|
442
|
-
*/
|
|
443
374
|
getLockInfo(jobId) {
|
|
444
|
-
return this.
|
|
375
|
+
return lockMgr.getLockInfo(jobId, this.getLockContext());
|
|
445
376
|
}
|
|
446
|
-
// ============ Client-Job Tracking ============
|
|
447
|
-
/**
|
|
448
|
-
* Register a job as owned by a client (called on PULL).
|
|
449
|
-
*/
|
|
377
|
+
// ============ Client-Job Tracking (delegated) ============
|
|
450
378
|
registerClientJob(clientId, jobId) {
|
|
451
|
-
|
|
452
|
-
if (!jobs) {
|
|
453
|
-
jobs = new Set();
|
|
454
|
-
this.clientJobs.set(clientId, jobs);
|
|
455
|
-
}
|
|
456
|
-
jobs.add(jobId);
|
|
379
|
+
lockMgr.registerClientJob(clientId, jobId, this.getLockContext());
|
|
457
380
|
}
|
|
458
|
-
/**
|
|
459
|
-
* Unregister a job from a client (called on ACK/FAIL).
|
|
460
|
-
*/
|
|
461
381
|
unregisterClientJob(clientId, jobId) {
|
|
462
|
-
|
|
463
|
-
return;
|
|
464
|
-
const jobs = this.clientJobs.get(clientId);
|
|
465
|
-
if (jobs) {
|
|
466
|
-
jobs.delete(jobId);
|
|
467
|
-
if (jobs.size === 0) {
|
|
468
|
-
this.clientJobs.delete(clientId);
|
|
469
|
-
}
|
|
470
|
-
}
|
|
382
|
+
lockMgr.unregisterClientJob(clientId, jobId, this.getLockContext());
|
|
471
383
|
}
|
|
472
|
-
/**
|
|
473
|
-
* Release all jobs owned by a client back to queue (called on TCP disconnect).
|
|
474
|
-
* Returns the number of jobs released.
|
|
475
|
-
*/
|
|
476
384
|
releaseClientJobs(clientId) {
|
|
477
|
-
|
|
478
|
-
if (!jobs || jobs.size === 0) {
|
|
479
|
-
this.clientJobs.delete(clientId);
|
|
480
|
-
return 0;
|
|
481
|
-
}
|
|
482
|
-
let released = 0;
|
|
483
|
-
const now = Date.now();
|
|
484
|
-
for (const jobId of jobs) {
|
|
485
|
-
const loc = this.jobIndex.get(jobId);
|
|
486
|
-
if (loc?.type !== 'processing')
|
|
487
|
-
continue;
|
|
488
|
-
const procIdx = loc.shardIdx;
|
|
489
|
-
const job = this.processingShards[procIdx].get(jobId);
|
|
490
|
-
if (!job)
|
|
491
|
-
continue;
|
|
492
|
-
// Remove from processing
|
|
493
|
-
this.processingShards[procIdx].delete(jobId);
|
|
494
|
-
// Release lock if exists
|
|
495
|
-
this.jobLocks.delete(jobId);
|
|
496
|
-
// Release concurrency
|
|
497
|
-
const idx = shardIndex(job.queue);
|
|
498
|
-
const shard = this.shards[idx];
|
|
499
|
-
shard.releaseConcurrency(job.queue);
|
|
500
|
-
// Release group if active
|
|
501
|
-
if (job.groupId) {
|
|
502
|
-
shard.releaseGroup(job.queue, job.groupId);
|
|
503
|
-
}
|
|
504
|
-
// Reset job state for retry
|
|
505
|
-
job.startedAt = null;
|
|
506
|
-
job.lastHeartbeat = now;
|
|
507
|
-
// Re-queue the job
|
|
508
|
-
shard.getQueue(job.queue).push(job);
|
|
509
|
-
const isDelayed = job.runAt > now;
|
|
510
|
-
shard.incrementQueued(jobId, isDelayed, job.createdAt, job.queue, job.runAt);
|
|
511
|
-
this.jobIndex.set(jobId, { type: 'queue', shardIdx: idx, queueName: job.queue });
|
|
512
|
-
released++;
|
|
513
|
-
}
|
|
514
|
-
// Clear client tracking
|
|
515
|
-
this.clientJobs.delete(clientId);
|
|
516
|
-
if (released > 0) {
|
|
517
|
-
queueLog.info('Released client jobs', { clientId: clientId.substring(0, 8), released });
|
|
518
|
-
}
|
|
519
|
-
return released;
|
|
520
|
-
}
|
|
521
|
-
/**
|
|
522
|
-
* Check and handle expired locks.
|
|
523
|
-
* Jobs with expired locks are requeued for retry.
|
|
524
|
-
*/
|
|
525
|
-
checkExpiredLocks() {
|
|
526
|
-
const now = Date.now();
|
|
527
|
-
const expired = [];
|
|
528
|
-
for (const [jobId, lock] of this.jobLocks) {
|
|
529
|
-
if (isLockExpired(lock, now)) {
|
|
530
|
-
expired.push({ jobId, lock });
|
|
531
|
-
}
|
|
532
|
-
}
|
|
533
|
-
for (const { jobId, lock } of expired) {
|
|
534
|
-
const procIdx = processingShardIndex(String(jobId));
|
|
535
|
-
const job = this.processingShards[procIdx].get(jobId);
|
|
536
|
-
if (job) {
|
|
537
|
-
const idx = shardIndex(job.queue);
|
|
538
|
-
const shard = this.shards[idx];
|
|
539
|
-
const queue = shard.getQueue(job.queue);
|
|
540
|
-
// Remove from processing
|
|
541
|
-
this.processingShards[procIdx].delete(jobId);
|
|
542
|
-
// Increment attempts and reset state
|
|
543
|
-
job.attempts++;
|
|
544
|
-
job.startedAt = null;
|
|
545
|
-
job.lastHeartbeat = now;
|
|
546
|
-
job.stallCount++;
|
|
547
|
-
// Check if max stalls exceeded
|
|
548
|
-
const stallConfig = shard.getStallConfig(job.queue);
|
|
549
|
-
if (stallConfig.maxStalls > 0 && job.stallCount >= stallConfig.maxStalls) {
|
|
550
|
-
// Move to DLQ using shard's addToDlq method
|
|
551
|
-
shard.addToDlq(job, "stalled" /* FailureReason.Stalled */, `Lock expired after ${lock.renewalCount} renewals`);
|
|
552
|
-
this.jobIndex.set(jobId, { type: 'dlq', queueName: job.queue });
|
|
553
|
-
queueLog.warn('Job moved to DLQ due to lock expiration', {
|
|
554
|
-
jobId: String(jobId),
|
|
555
|
-
queue: job.queue,
|
|
556
|
-
owner: lock.owner,
|
|
557
|
-
renewals: lock.renewalCount,
|
|
558
|
-
stallCount: job.stallCount,
|
|
559
|
-
});
|
|
560
|
-
this.eventsManager.broadcast({
|
|
561
|
-
eventType: "failed" /* EventType.Failed */,
|
|
562
|
-
jobId,
|
|
563
|
-
queue: job.queue,
|
|
564
|
-
timestamp: now,
|
|
565
|
-
error: 'Lock expired (max stalls reached)',
|
|
566
|
-
});
|
|
567
|
-
}
|
|
568
|
-
else {
|
|
569
|
-
// Requeue for retry (always push - priority queue handles ordering)
|
|
570
|
-
queue.push(job);
|
|
571
|
-
this.jobIndex.set(jobId, { type: 'queue', shardIdx: idx, queueName: job.queue });
|
|
572
|
-
queueLog.info('Job requeued due to lock expiration', {
|
|
573
|
-
jobId: String(jobId),
|
|
574
|
-
queue: job.queue,
|
|
575
|
-
owner: lock.owner,
|
|
576
|
-
renewals: lock.renewalCount,
|
|
577
|
-
attempt: job.attempts,
|
|
578
|
-
});
|
|
579
|
-
this.eventsManager.broadcast({
|
|
580
|
-
eventType: "stalled" /* EventType.Stalled */,
|
|
581
|
-
jobId,
|
|
582
|
-
queue: job.queue,
|
|
583
|
-
timestamp: now,
|
|
584
|
-
});
|
|
585
|
-
}
|
|
586
|
-
}
|
|
587
|
-
// Remove the expired lock
|
|
588
|
-
this.jobLocks.delete(jobId);
|
|
589
|
-
}
|
|
590
|
-
if (expired.length > 0) {
|
|
591
|
-
queueLog.info('Processed expired locks', { count: expired.length });
|
|
592
|
-
}
|
|
385
|
+
return lockMgr.releaseClientJobs(clientId, this.getLockContext());
|
|
593
386
|
}
|
|
594
387
|
// ============ Query Operations (delegated) ============
|
|
595
388
|
async getJob(jobId) {
|
|
@@ -622,81 +415,55 @@ export class QueueManager {
|
|
|
622
415
|
}
|
|
623
416
|
obliterate(queue) {
|
|
624
417
|
queueControl.obliterateQueue(queue, { shards: this.shards, jobIndex: this.jobIndex });
|
|
625
|
-
// Remove from cache
|
|
626
418
|
this.unregisterQueueName(queue);
|
|
627
419
|
}
|
|
628
420
|
listQueues() {
|
|
629
|
-
// O(1) using cache instead of O(32 * queues) iterating all shards
|
|
630
421
|
return Array.from(this.queueNamesCache);
|
|
631
422
|
}
|
|
632
|
-
/** Register queue name in cache - called when first job is pushed */
|
|
633
423
|
registerQueueName(queue) {
|
|
634
424
|
this.queueNamesCache.add(queue);
|
|
635
425
|
}
|
|
636
|
-
/** Unregister queue name from cache - called on obliterate */
|
|
637
426
|
unregisterQueueName(queue) {
|
|
638
427
|
this.queueNamesCache.delete(queue);
|
|
639
428
|
}
|
|
640
429
|
clean(queue, graceMs, state, limit) {
|
|
641
430
|
return queueControl.cleanQueue(queue, graceMs, { shards: this.shards, jobIndex: this.jobIndex }, state, limit);
|
|
642
431
|
}
|
|
643
|
-
/** Get job counts grouped by priority for a queue */
|
|
644
432
|
getCountsPerPriority(queue) {
|
|
645
433
|
const idx = shardIndex(queue);
|
|
646
434
|
const counts = this.shards[idx].getCountsPerPriority(queue);
|
|
647
435
|
return Object.fromEntries(counts);
|
|
648
436
|
}
|
|
649
|
-
/**
|
|
650
|
-
* Get jobs with filtering and pagination
|
|
651
|
-
* @param queue - Queue name
|
|
652
|
-
* @param options - Filter options
|
|
653
|
-
* @returns Array of jobs matching the criteria
|
|
654
|
-
*/
|
|
655
437
|
getJobs(queue, options = {}) {
|
|
656
438
|
const { state, start = 0, end = 100, asc = true } = options;
|
|
657
439
|
const idx = shardIndex(queue);
|
|
658
440
|
const shard = this.shards[idx];
|
|
659
441
|
const now = Date.now();
|
|
660
442
|
const jobs = [];
|
|
661
|
-
// Collect jobs based on state filter
|
|
662
443
|
if (!state || state === 'waiting') {
|
|
663
|
-
|
|
664
|
-
|
|
444
|
+
jobs.push(...shard
|
|
445
|
+
.getQueue(queue)
|
|
446
|
+
.values()
|
|
447
|
+
.filter((j) => j.runAt <= now));
|
|
665
448
|
}
|
|
666
449
|
if (!state || state === 'delayed') {
|
|
667
|
-
|
|
668
|
-
|
|
450
|
+
jobs.push(...shard
|
|
451
|
+
.getQueue(queue)
|
|
452
|
+
.values()
|
|
453
|
+
.filter((j) => j.runAt > now));
|
|
669
454
|
}
|
|
670
455
|
if (!state || state === 'active') {
|
|
671
456
|
for (let i = 0; i < SHARD_COUNT; i++) {
|
|
672
457
|
for (const job of this.processingShards[i].values()) {
|
|
673
|
-
if (job.queue === queue)
|
|
458
|
+
if (job.queue === queue)
|
|
674
459
|
jobs.push(job);
|
|
675
|
-
}
|
|
676
460
|
}
|
|
677
461
|
}
|
|
678
462
|
}
|
|
679
463
|
if (!state || state === 'failed') {
|
|
680
|
-
|
|
681
|
-
jobs.push(...dlqJobs);
|
|
682
|
-
}
|
|
683
|
-
// For completed jobs, check completed jobs set
|
|
684
|
-
if (state === 'completed') {
|
|
685
|
-
// Iterate completedJobs and filter by queue
|
|
686
|
-
// Note: This is not efficient for large sets, but provides the data
|
|
687
|
-
for (const jobId of this.completedJobs) {
|
|
688
|
-
const result = this.jobResults.get(jobId);
|
|
689
|
-
if (result) {
|
|
690
|
-
// We don't have the full job object for completed jobs in memory
|
|
691
|
-
// Just count them or return IDs - for now skip completed state
|
|
692
|
-
}
|
|
693
|
-
}
|
|
694
|
-
// Completed jobs are stored in SQLite, would need storage access
|
|
695
|
-
// For now, return empty for completed state if not in DLQ
|
|
464
|
+
jobs.push(...shard.getDlq(queue));
|
|
696
465
|
}
|
|
697
|
-
// Sort by createdAt
|
|
698
466
|
jobs.sort((a, b) => (asc ? a.createdAt - b.createdAt : b.createdAt - a.createdAt));
|
|
699
|
-
// Apply pagination
|
|
700
467
|
return jobs.slice(start, end);
|
|
701
468
|
}
|
|
702
469
|
// ============ DLQ Operations (delegated) ============
|
|
@@ -709,58 +476,37 @@ export class QueueManager {
|
|
|
709
476
|
purgeDlq(queue) {
|
|
710
477
|
return dlqOps.purgeDlqJobs(queue, this.getDlqContext());
|
|
711
478
|
}
|
|
712
|
-
/**
|
|
713
|
-
* Retry a completed job by re-queueing it
|
|
714
|
-
* @param queue - Queue name
|
|
715
|
-
* @param jobId - Specific job ID to retry (optional - retries all if not specified)
|
|
716
|
-
* @returns Number of jobs retried
|
|
717
|
-
*/
|
|
718
479
|
retryCompleted(queue, jobId) {
|
|
719
480
|
if (jobId) {
|
|
720
|
-
|
|
721
|
-
if (!this.completedJobs.has(jobId)) {
|
|
481
|
+
if (!this.completedJobs.has(jobId))
|
|
722
482
|
return 0;
|
|
723
|
-
}
|
|
724
|
-
// Get job from storage
|
|
725
483
|
const job = this.storage?.getJob(jobId);
|
|
726
|
-
if (job?.queue !== queue)
|
|
484
|
+
if (job?.queue !== queue)
|
|
727
485
|
return 0;
|
|
728
|
-
}
|
|
729
486
|
return this.requeueCompletedJob(job);
|
|
730
487
|
}
|
|
731
|
-
// Retry all completed jobs for queue
|
|
732
488
|
let count = 0;
|
|
733
489
|
for (const id of this.completedJobs) {
|
|
734
490
|
const job = this.storage?.getJob(id);
|
|
735
|
-
if (job?.queue === queue)
|
|
491
|
+
if (job?.queue === queue)
|
|
736
492
|
count += this.requeueCompletedJob(job);
|
|
737
|
-
}
|
|
738
493
|
}
|
|
739
494
|
return count;
|
|
740
495
|
}
|
|
741
|
-
/**
|
|
742
|
-
* Internal helper to re-queue a completed job
|
|
743
|
-
*/
|
|
744
496
|
requeueCompletedJob(job) {
|
|
745
|
-
// Reset job state
|
|
746
497
|
job.attempts = 0;
|
|
747
498
|
job.startedAt = null;
|
|
748
499
|
job.completedAt = null;
|
|
749
500
|
job.runAt = Date.now();
|
|
750
501
|
job.progress = 0;
|
|
751
|
-
// Re-queue
|
|
752
502
|
const idx = shardIndex(job.queue);
|
|
753
503
|
const shard = this.shards[idx];
|
|
754
504
|
shard.getQueue(job.queue).push(job);
|
|
755
505
|
shard.incrementQueued(job.id, false, job.createdAt, job.queue, job.runAt);
|
|
756
|
-
// Update index
|
|
757
506
|
this.jobIndex.set(job.id, { type: 'queue', shardIdx: idx, queueName: job.queue });
|
|
758
|
-
// Cleanup completed tracking
|
|
759
507
|
this.completedJobs.delete(job.id);
|
|
760
508
|
this.jobResults.delete(job.id);
|
|
761
|
-
// Update storage
|
|
762
509
|
this.storage?.updateForRetry(job);
|
|
763
|
-
// Notify
|
|
764
510
|
shard.notify();
|
|
765
511
|
return 1;
|
|
766
512
|
}
|
|
@@ -801,11 +547,7 @@ export class QueueManager {
|
|
|
801
547
|
}
|
|
802
548
|
// ============ Job Logs (delegated) ============
|
|
803
549
|
addLog(jobId, message, level = 'info') {
|
|
804
|
-
return logsOps.addJobLog(jobId, message, {
|
|
805
|
-
jobIndex: this.jobIndex,
|
|
806
|
-
jobLogs: this.jobLogs,
|
|
807
|
-
maxLogsPerJob: this.maxLogsPerJob,
|
|
808
|
-
}, level);
|
|
550
|
+
return logsOps.addJobLog(jobId, message, { jobIndex: this.jobIndex, jobLogs: this.jobLogs, maxLogsPerJob: this.maxLogsPerJob }, level);
|
|
809
551
|
}
|
|
810
552
|
getLogs(jobId) {
|
|
811
553
|
return logsOps.getJobLogs(jobId, {
|
|
@@ -847,515 +589,52 @@ export class QueueManager {
|
|
|
847
589
|
subscribe(callback) {
|
|
848
590
|
return this.eventsManager.subscribe(callback);
|
|
849
591
|
}
|
|
850
|
-
/** Wait for job completion - event-driven, no polling */
|
|
851
592
|
waitForJobCompletion(jobId, timeoutMs) {
|
|
852
593
|
return this.eventsManager.waitForJobCompletion(jobId, timeoutMs);
|
|
853
594
|
}
|
|
854
|
-
// ============ Internal State Access
|
|
855
|
-
/** Get job index for dependency validation */
|
|
595
|
+
// ============ Internal State Access ============
|
|
856
596
|
getJobIndex() {
|
|
857
597
|
return this.jobIndex;
|
|
858
598
|
}
|
|
859
|
-
/** Get completed jobs set for dependency validation */
|
|
860
599
|
getCompletedJobs() {
|
|
861
600
|
return this.completedJobs;
|
|
862
601
|
}
|
|
863
|
-
/**
|
|
864
|
-
|
|
865
|
-
|
|
866
|
-
|
|
602
|
+
/** Expose shards for testing (internal use only) */
|
|
603
|
+
getShards() {
|
|
604
|
+
return this.shards;
|
|
605
|
+
}
|
|
867
606
|
onJobCompleted(completedId) {
|
|
868
607
|
this.pendingDepChecks.add(completedId);
|
|
869
608
|
}
|
|
870
|
-
/**
|
|
871
|
-
* Batch version of onJobCompleted - more efficient for large batches
|
|
872
|
-
*/
|
|
873
609
|
onJobsCompleted(completedIds) {
|
|
874
|
-
for (const id of completedIds)
|
|
610
|
+
for (const id of completedIds)
|
|
875
611
|
this.pendingDepChecks.add(id);
|
|
876
|
-
}
|
|
877
612
|
}
|
|
878
|
-
/**
|
|
879
|
-
* Check if there are any jobs waiting for dependencies
|
|
880
|
-
* Used to skip dependency tracking when not needed
|
|
881
|
-
*/
|
|
882
613
|
hasPendingDeps() {
|
|
883
|
-
// Check if any shard has waiting dependencies
|
|
884
614
|
for (const shard of this.shards) {
|
|
885
615
|
if (shard.waitingDeps.size > 0)
|
|
886
616
|
return true;
|
|
887
617
|
}
|
|
888
618
|
return false;
|
|
889
619
|
}
|
|
890
|
-
|
|
891
|
-
|
|
892
|
-
|
|
893
|
-
* Instead of O(n) full scan of all waiting deps
|
|
894
|
-
*/
|
|
895
|
-
async processPendingDependencies() {
|
|
896
|
-
if (this.pendingDepChecks.size === 0)
|
|
897
|
-
return;
|
|
898
|
-
// Copy and clear the pending set
|
|
899
|
-
const completedIds = Array.from(this.pendingDepChecks);
|
|
900
|
-
this.pendingDepChecks.clear();
|
|
901
|
-
// Collect jobs to check by shard
|
|
902
|
-
const jobsToCheckByShard = new Map();
|
|
903
|
-
// Use reverse index to find only affected jobs - O(m) instead of O(n)
|
|
904
|
-
for (const completedId of completedIds) {
|
|
905
|
-
for (let i = 0; i < SHARD_COUNT; i++) {
|
|
906
|
-
const waitingJobIds = this.shards[i].getJobsWaitingFor(completedId);
|
|
907
|
-
if (waitingJobIds && waitingJobIds.size > 0) {
|
|
908
|
-
let shardJobs = jobsToCheckByShard.get(i);
|
|
909
|
-
if (!shardJobs) {
|
|
910
|
-
shardJobs = new Set();
|
|
911
|
-
jobsToCheckByShard.set(i, shardJobs);
|
|
912
|
-
}
|
|
913
|
-
for (const jobId of waitingJobIds) {
|
|
914
|
-
shardJobs.add(jobId);
|
|
915
|
-
}
|
|
916
|
-
}
|
|
917
|
-
}
|
|
918
|
-
}
|
|
919
|
-
// Process each shard that has affected jobs - in parallel using Promise.all
|
|
920
|
-
await Promise.all(Array.from(jobsToCheckByShard.entries()).map(async ([i, jobIdsToCheck]) => {
|
|
921
|
-
const shard = this.shards[i];
|
|
922
|
-
const jobsToPromote = [];
|
|
923
|
-
// Check only the affected jobs, not all waiting deps
|
|
924
|
-
for (const jobId of jobIdsToCheck) {
|
|
925
|
-
const job = shard.waitingDeps.get(jobId);
|
|
926
|
-
if (job?.dependsOn.every((dep) => this.completedJobs.has(dep))) {
|
|
927
|
-
jobsToPromote.push(job);
|
|
928
|
-
}
|
|
929
|
-
}
|
|
930
|
-
// Now acquire lock and modify
|
|
931
|
-
if (jobsToPromote.length > 0) {
|
|
932
|
-
await withWriteLock(this.shardLocks[i], () => {
|
|
933
|
-
const now = Date.now();
|
|
934
|
-
for (const job of jobsToPromote) {
|
|
935
|
-
if (shard.waitingDeps.has(job.id)) {
|
|
936
|
-
shard.waitingDeps.delete(job.id);
|
|
937
|
-
// Unregister from dependency index
|
|
938
|
-
shard.unregisterDependencies(job.id, job.dependsOn);
|
|
939
|
-
shard.getQueue(job.queue).push(job);
|
|
940
|
-
// Update running counters for O(1) stats and temporal index
|
|
941
|
-
const isDelayed = job.runAt > now;
|
|
942
|
-
shard.incrementQueued(job.id, isDelayed, job.createdAt, job.queue, job.runAt);
|
|
943
|
-
this.jobIndex.set(job.id, { type: 'queue', shardIdx: i, queueName: job.queue });
|
|
944
|
-
}
|
|
945
|
-
}
|
|
946
|
-
if (jobsToPromote.length > 0) {
|
|
947
|
-
shard.notify();
|
|
948
|
-
}
|
|
949
|
-
});
|
|
950
|
-
}
|
|
951
|
-
}));
|
|
952
|
-
}
|
|
953
|
-
// ============ Background Tasks ============
|
|
954
|
-
startBackgroundTasks() {
|
|
955
|
-
this.cleanupInterval = setInterval(() => {
|
|
956
|
-
this.cleanup();
|
|
957
|
-
}, this.config.cleanupIntervalMs);
|
|
958
|
-
this.timeoutInterval = setInterval(() => {
|
|
959
|
-
this.checkJobTimeouts();
|
|
960
|
-
}, this.config.jobTimeoutCheckMs);
|
|
961
|
-
this.depCheckInterval = setInterval(() => {
|
|
962
|
-
this.processPendingDependencies().catch((err) => {
|
|
963
|
-
queueLog.error('Dependency check failed', { error: String(err) });
|
|
964
|
-
});
|
|
965
|
-
}, this.config.dependencyCheckMs);
|
|
966
|
-
this.stallCheckInterval = setInterval(() => {
|
|
967
|
-
this.checkStalledJobs();
|
|
968
|
-
}, this.config.stallCheckMs);
|
|
969
|
-
this.dlqMaintenanceInterval = setInterval(() => {
|
|
970
|
-
this.performDlqMaintenance();
|
|
971
|
-
}, this.config.dlqMaintenanceMs);
|
|
972
|
-
// Lock expiration check runs at same interval as stall check
|
|
973
|
-
this.lockCheckInterval = setInterval(() => {
|
|
974
|
-
this.checkExpiredLocks();
|
|
975
|
-
}, this.config.stallCheckMs);
|
|
976
|
-
this.cronScheduler.start();
|
|
977
|
-
}
|
|
978
|
-
checkJobTimeouts() {
|
|
979
|
-
const now = Date.now();
|
|
980
|
-
for (const procShard of this.processingShards) {
|
|
981
|
-
for (const [jobId, job] of procShard) {
|
|
982
|
-
if (job.timeout && job.startedAt && now - job.startedAt > job.timeout) {
|
|
983
|
-
this.fail(jobId, 'Job timeout exceeded').catch((err) => {
|
|
984
|
-
queueLog.error('Failed to mark timed out job as failed', {
|
|
985
|
-
jobId: String(jobId),
|
|
986
|
-
error: String(err),
|
|
987
|
-
});
|
|
988
|
-
});
|
|
989
|
-
}
|
|
990
|
-
}
|
|
991
|
-
}
|
|
992
|
-
}
|
|
993
|
-
/**
|
|
994
|
-
* Check for stalled jobs and handle them
|
|
995
|
-
* Uses two-phase detection (like BullMQ) to prevent false positives:
|
|
996
|
-
* - Phase 1: Jobs marked as candidates in previous check are confirmed stalled
|
|
997
|
-
* - Phase 2: Current processing jobs are marked as candidates for next check
|
|
998
|
-
*/
|
|
999
|
-
checkStalledJobs() {
|
|
1000
|
-
const now = Date.now();
|
|
1001
|
-
const confirmedStalled = [];
|
|
1002
|
-
// Phase 1: Check jobs that were candidates from previous cycle
|
|
1003
|
-
// If still in processing and still meets stall criteria → confirmed stalled
|
|
1004
|
-
for (const jobId of this.stalledCandidates) {
|
|
1005
|
-
// Find job in processing shards
|
|
1006
|
-
const procIdx = processingShardIndex(String(jobId));
|
|
1007
|
-
const job = this.processingShards[procIdx].get(jobId);
|
|
1008
|
-
if (!job) {
|
|
1009
|
-
// Job completed between checks - not stalled (false positive avoided!)
|
|
1010
|
-
this.stalledCandidates.delete(jobId);
|
|
1011
|
-
continue;
|
|
1012
|
-
}
|
|
1013
|
-
const stallConfig = this.shards[shardIndex(job.queue)].getStallConfig(job.queue);
|
|
1014
|
-
if (!stallConfig.enabled) {
|
|
1015
|
-
this.stalledCandidates.delete(jobId);
|
|
1016
|
-
continue;
|
|
1017
|
-
}
|
|
1018
|
-
// Re-check stall criteria (job might have received heartbeat)
|
|
1019
|
-
const action = getStallAction(job, stallConfig, now);
|
|
1020
|
-
if (action !== "keep" /* StallAction.Keep */) {
|
|
1021
|
-
// Confirmed stalled - was candidate AND still meets criteria
|
|
1022
|
-
confirmedStalled.push({ job, action });
|
|
1023
|
-
}
|
|
1024
|
-
// Remove from candidates (will be re-added in phase 2 if still processing)
|
|
1025
|
-
this.stalledCandidates.delete(jobId);
|
|
1026
|
-
}
|
|
1027
|
-
// Phase 2: Mark current processing jobs as candidates for NEXT check
|
|
1028
|
-
for (let i = 0; i < SHARD_COUNT; i++) {
|
|
1029
|
-
const procShard = this.processingShards[i];
|
|
1030
|
-
for (const [jobId, job] of procShard) {
|
|
1031
|
-
const stallConfig = this.shards[shardIndex(job.queue)].getStallConfig(job.queue);
|
|
1032
|
-
if (!stallConfig.enabled)
|
|
1033
|
-
continue;
|
|
1034
|
-
// Only mark as candidate if past grace period and no recent heartbeat
|
|
1035
|
-
const action = getStallAction(job, stallConfig, now);
|
|
1036
|
-
if (action !== "keep" /* StallAction.Keep */) {
|
|
1037
|
-
// Add to candidates - will be checked in NEXT cycle
|
|
1038
|
-
this.stalledCandidates.add(jobId);
|
|
1039
|
-
}
|
|
1040
|
-
}
|
|
1041
|
-
}
|
|
1042
|
-
// Process confirmed stalled jobs
|
|
1043
|
-
for (const { job, action } of confirmedStalled) {
|
|
1044
|
-
this.handleStalledJob(job, action).catch((err) => {
|
|
1045
|
-
queueLog.error('Failed to handle stalled job', {
|
|
1046
|
-
jobId: String(job.id),
|
|
1047
|
-
error: String(err),
|
|
1048
|
-
});
|
|
1049
|
-
});
|
|
1050
|
-
}
|
|
1051
|
-
}
|
|
1052
|
-
/**
|
|
1053
|
-
* Handle a stalled job based on the action
|
|
1054
|
-
*/
|
|
1055
|
-
async handleStalledJob(job, action) {
|
|
1056
|
-
const idx = shardIndex(job.queue);
|
|
1057
|
-
const shard = this.shards[idx];
|
|
1058
|
-
const procIdx = processingShardIndex(String(job.id));
|
|
1059
|
-
// Emit stalled event
|
|
1060
|
-
this.eventsManager.broadcast({
|
|
1061
|
-
eventType: "stalled" /* EventType.Stalled */,
|
|
1062
|
-
queue: job.queue,
|
|
1063
|
-
jobId: job.id,
|
|
1064
|
-
timestamp: Date.now(),
|
|
1065
|
-
data: { stallCount: job.stallCount + 1, action },
|
|
1066
|
-
});
|
|
1067
|
-
void this.webhookManager.trigger('stalled', String(job.id), job.queue, {
|
|
1068
|
-
data: { stallCount: job.stallCount + 1, action },
|
|
1069
|
-
});
|
|
1070
|
-
if (action === "move_to_dlq" /* StallAction.MoveToDlq */) {
|
|
1071
|
-
// Max stalls reached - move to DLQ
|
|
1072
|
-
queueLog.warn('Job exceeded max stalls, moving to DLQ', {
|
|
1073
|
-
jobId: String(job.id),
|
|
1074
|
-
queue: job.queue,
|
|
1075
|
-
stallCount: job.stallCount,
|
|
1076
|
-
});
|
|
1077
|
-
// Remove from processing
|
|
1078
|
-
this.processingShards[procIdx].delete(job.id);
|
|
1079
|
-
shard.releaseConcurrency(job.queue);
|
|
1080
|
-
// Add to DLQ with stalled reason
|
|
1081
|
-
const entry = shard.addToDlq(job, "stalled" /* FailureReason.Stalled */, `Job stalled ${job.stallCount + 1} times`);
|
|
1082
|
-
this.jobIndex.set(job.id, { type: 'dlq', queueName: job.queue });
|
|
1083
|
-
// Persist DLQ entry
|
|
1084
|
-
this.storage?.saveDlqEntry(entry);
|
|
1085
|
-
}
|
|
1086
|
-
else {
|
|
1087
|
-
// Retry - increment stall count and re-queue
|
|
1088
|
-
incrementStallCount(job);
|
|
1089
|
-
job.attempts++;
|
|
1090
|
-
job.startedAt = null;
|
|
1091
|
-
job.runAt = Date.now() + calculateBackoff(job);
|
|
1092
|
-
job.lastHeartbeat = Date.now();
|
|
1093
|
-
queueLog.warn('Job stalled, retrying', {
|
|
1094
|
-
jobId: String(job.id),
|
|
1095
|
-
queue: job.queue,
|
|
1096
|
-
stallCount: job.stallCount,
|
|
1097
|
-
attempt: job.attempts,
|
|
1098
|
-
});
|
|
1099
|
-
// Remove from processing
|
|
1100
|
-
this.processingShards[procIdx].delete(job.id);
|
|
1101
|
-
shard.releaseConcurrency(job.queue);
|
|
1102
|
-
// Re-queue
|
|
1103
|
-
shard.getQueue(job.queue).push(job);
|
|
1104
|
-
const isDelayed = job.runAt > Date.now();
|
|
1105
|
-
shard.incrementQueued(job.id, isDelayed, job.createdAt, job.queue, job.runAt);
|
|
1106
|
-
this.jobIndex.set(job.id, { type: 'queue', shardIdx: idx, queueName: job.queue });
|
|
1107
|
-
// Persist
|
|
1108
|
-
this.storage?.updateForRetry(job);
|
|
1109
|
-
}
|
|
620
|
+
// ============ Stats (delegated) ============
|
|
621
|
+
getStats() {
|
|
622
|
+
return statsMgr.getStats(this.getStatsContext(), this.cronScheduler);
|
|
1110
623
|
}
|
|
1111
|
-
|
|
1112
|
-
|
|
1113
|
-
*/
|
|
1114
|
-
performDlqMaintenance() {
|
|
1115
|
-
const ctx = this.getDlqContext();
|
|
1116
|
-
// Process each queue
|
|
1117
|
-
for (const queueName of this.queueNamesCache) {
|
|
1118
|
-
try {
|
|
1119
|
-
// Auto-retry eligible entries
|
|
1120
|
-
const retried = dlqOps.processAutoRetry(queueName, ctx);
|
|
1121
|
-
if (retried > 0) {
|
|
1122
|
-
queueLog.info('DLQ auto-retry completed', { queue: queueName, retried });
|
|
1123
|
-
}
|
|
1124
|
-
// Purge expired entries
|
|
1125
|
-
const purged = dlqOps.purgeExpiredDlq(queueName, ctx);
|
|
1126
|
-
if (purged > 0) {
|
|
1127
|
-
queueLog.info('DLQ purge completed', { queue: queueName, purged });
|
|
1128
|
-
}
|
|
1129
|
-
}
|
|
1130
|
-
catch (err) {
|
|
1131
|
-
queueLog.error('DLQ maintenance failed', { queue: queueName, error: String(err) });
|
|
1132
|
-
}
|
|
1133
|
-
}
|
|
624
|
+
getMemoryStats() {
|
|
625
|
+
return statsMgr.getMemoryStats(this.getStatsContext());
|
|
1134
626
|
}
|
|
1135
|
-
|
|
1136
|
-
|
|
1137
|
-
return;
|
|
1138
|
-
// Load pending jobs
|
|
1139
|
-
const now = Date.now();
|
|
1140
|
-
for (const job of this.storage.loadPendingJobs()) {
|
|
1141
|
-
const idx = shardIndex(job.queue);
|
|
1142
|
-
const shard = this.shards[idx];
|
|
1143
|
-
shard.getQueue(job.queue).push(job);
|
|
1144
|
-
this.jobIndex.set(job.id, { type: 'queue', shardIdx: idx, queueName: job.queue });
|
|
1145
|
-
// Update running counters for O(1) stats
|
|
1146
|
-
const isDelayed = job.runAt > now;
|
|
1147
|
-
shard.incrementQueued(job.id, isDelayed, job.createdAt, job.queue, job.runAt);
|
|
1148
|
-
// Register queue name in cache
|
|
1149
|
-
this.registerQueueName(job.queue);
|
|
1150
|
-
}
|
|
1151
|
-
// Load DLQ entries
|
|
1152
|
-
const dlqEntries = this.storage.loadDlq();
|
|
1153
|
-
let dlqCount = 0;
|
|
1154
|
-
for (const [queue, entries] of dlqEntries) {
|
|
1155
|
-
const idx = shardIndex(queue);
|
|
1156
|
-
const shard = this.shards[idx];
|
|
1157
|
-
for (const entry of entries) {
|
|
1158
|
-
// Add to shard's DLQ (directly set since we're loading)
|
|
1159
|
-
let dlq = shard.dlq.get(queue);
|
|
1160
|
-
if (!dlq) {
|
|
1161
|
-
dlq = [];
|
|
1162
|
-
shard.dlq.set(queue, dlq);
|
|
1163
|
-
}
|
|
1164
|
-
dlq.push(entry);
|
|
1165
|
-
shard.incrementDlq();
|
|
1166
|
-
dlqCount++;
|
|
1167
|
-
}
|
|
1168
|
-
this.registerQueueName(queue);
|
|
1169
|
-
}
|
|
1170
|
-
if (dlqCount > 0) {
|
|
1171
|
-
queueLog.info('Loaded DLQ entries', { count: dlqCount });
|
|
1172
|
-
}
|
|
1173
|
-
// Load cron jobs
|
|
1174
|
-
this.cronScheduler.load(this.storage.loadCronJobs());
|
|
1175
|
-
}
|
|
1176
|
-
// eslint-disable-next-line complexity
|
|
1177
|
-
cleanup() {
|
|
1178
|
-
// LRU collections auto-evict, but we still need to clean up:
|
|
1179
|
-
// 1. Orphaned processing shard entries (jobs stuck in processing)
|
|
1180
|
-
// 2. Stale waiting dependencies
|
|
1181
|
-
// 3. Orphaned unique keys and active groups
|
|
1182
|
-
// 4. Refresh delayed job counters (jobs that became ready)
|
|
1183
|
-
const now = Date.now();
|
|
1184
|
-
const stallTimeout = 30 * 60 * 1000; // 30 minutes max for processing
|
|
1185
|
-
// Refresh delayed counters - update jobs that have become ready
|
|
1186
|
-
for (let i = 0; i < SHARD_COUNT; i++) {
|
|
1187
|
-
this.shards[i].refreshDelayedCount(now);
|
|
1188
|
-
}
|
|
1189
|
-
// Compact priority queues if stale ratio > 20% (reclaim memory)
|
|
1190
|
-
for (let i = 0; i < SHARD_COUNT; i++) {
|
|
1191
|
-
for (const q of this.shards[i].queues.values()) {
|
|
1192
|
-
if (q.needsCompaction(0.2)) {
|
|
1193
|
-
q.compact();
|
|
1194
|
-
}
|
|
1195
|
-
}
|
|
1196
|
-
}
|
|
1197
|
-
// Clean orphaned processing entries
|
|
1198
|
-
for (let i = 0; i < SHARD_COUNT; i++) {
|
|
1199
|
-
const orphaned = [];
|
|
1200
|
-
for (const [jobId, job] of this.processingShards[i]) {
|
|
1201
|
-
if (job.startedAt && now - job.startedAt > stallTimeout) {
|
|
1202
|
-
orphaned.push(jobId);
|
|
1203
|
-
}
|
|
1204
|
-
}
|
|
1205
|
-
for (const jobId of orphaned) {
|
|
1206
|
-
const job = this.processingShards[i].get(jobId);
|
|
1207
|
-
if (job) {
|
|
1208
|
-
this.processingShards[i].delete(jobId);
|
|
1209
|
-
this.jobIndex.delete(jobId);
|
|
1210
|
-
queueLog.warn('Cleaned orphaned processing job', { jobId: String(jobId) });
|
|
1211
|
-
}
|
|
1212
|
-
}
|
|
1213
|
-
}
|
|
1214
|
-
// Clean stale waiting dependencies (waiting > 1 hour)
|
|
1215
|
-
const depTimeout = 60 * 60 * 1000; // 1 hour
|
|
1216
|
-
for (let i = 0; i < SHARD_COUNT; i++) {
|
|
1217
|
-
const shard = this.shards[i];
|
|
1218
|
-
const stale = [];
|
|
1219
|
-
for (const [_id, job] of shard.waitingDeps) {
|
|
1220
|
-
if (now - job.createdAt > depTimeout) {
|
|
1221
|
-
stale.push(job);
|
|
1222
|
-
}
|
|
1223
|
-
}
|
|
1224
|
-
for (const job of stale) {
|
|
1225
|
-
shard.waitingDeps.delete(job.id);
|
|
1226
|
-
// Remove from dependency index
|
|
1227
|
-
shard.unregisterDependencies(job.id, job.dependsOn);
|
|
1228
|
-
this.jobIndex.delete(job.id);
|
|
1229
|
-
queueLog.warn('Cleaned stale waiting dependency', { jobId: String(job.id) });
|
|
1230
|
-
}
|
|
1231
|
-
}
|
|
1232
|
-
// Clean orphaned and expired unique keys
|
|
1233
|
-
for (let i = 0; i < SHARD_COUNT; i++) {
|
|
1234
|
-
const shard = this.shards[i];
|
|
1235
|
-
// First, clean expired unique keys
|
|
1236
|
-
const expiredCleaned = shard.cleanExpiredUniqueKeys();
|
|
1237
|
-
if (expiredCleaned > 0) {
|
|
1238
|
-
queueLog.info('Cleaned expired unique keys', { shard: i, removed: expiredCleaned });
|
|
1239
|
-
}
|
|
1240
|
-
// Then trim if too many keys remain
|
|
1241
|
-
for (const [queueName, keys] of shard.uniqueKeys) {
|
|
1242
|
-
if (keys.size > 1000) {
|
|
1243
|
-
// If too many keys, trim oldest half
|
|
1244
|
-
const toRemove = Math.floor(keys.size / 2);
|
|
1245
|
-
const iter = keys.keys();
|
|
1246
|
-
for (let j = 0; j < toRemove; j++) {
|
|
1247
|
-
const { value, done } = iter.next();
|
|
1248
|
-
if (done)
|
|
1249
|
-
break;
|
|
1250
|
-
keys.delete(value);
|
|
1251
|
-
}
|
|
1252
|
-
queueLog.info('Trimmed unique keys', { queue: queueName, removed: toRemove });
|
|
1253
|
-
}
|
|
1254
|
-
}
|
|
1255
|
-
// Clean orphaned active groups
|
|
1256
|
-
for (const [queueName, groups] of shard.activeGroups) {
|
|
1257
|
-
if (groups.size > 1000) {
|
|
1258
|
-
const toRemove = Math.floor(groups.size / 2);
|
|
1259
|
-
const iter = groups.values();
|
|
1260
|
-
for (let j = 0; j < toRemove; j++) {
|
|
1261
|
-
const { value, done } = iter.next();
|
|
1262
|
-
if (done)
|
|
1263
|
-
break;
|
|
1264
|
-
groups.delete(value);
|
|
1265
|
-
}
|
|
1266
|
-
queueLog.info('Trimmed active groups', { queue: queueName, removed: toRemove });
|
|
1267
|
-
}
|
|
1268
|
-
}
|
|
1269
|
-
}
|
|
1270
|
-
// Clean stale stalledCandidates (jobs no longer in processing)
|
|
1271
|
-
for (const jobId of this.stalledCandidates) {
|
|
1272
|
-
const loc = this.jobIndex.get(jobId);
|
|
1273
|
-
if (loc?.type !== 'processing') {
|
|
1274
|
-
this.stalledCandidates.delete(jobId);
|
|
1275
|
-
}
|
|
1276
|
-
}
|
|
1277
|
-
// Clean orphaned jobIndex entries (pointing to invalid locations)
|
|
1278
|
-
// This is expensive so only run if index is large
|
|
1279
|
-
if (this.jobIndex.size > 100_000) {
|
|
1280
|
-
let orphanedCount = 0;
|
|
1281
|
-
for (const [jobId, loc] of this.jobIndex) {
|
|
1282
|
-
if (loc.type === 'processing') {
|
|
1283
|
-
const procIdx = processingShardIndex(String(jobId));
|
|
1284
|
-
if (!this.processingShards[procIdx].has(jobId)) {
|
|
1285
|
-
this.jobIndex.delete(jobId);
|
|
1286
|
-
orphanedCount++;
|
|
1287
|
-
}
|
|
1288
|
-
}
|
|
1289
|
-
else if (loc.type === 'queue') {
|
|
1290
|
-
// Check if job still exists in shard
|
|
1291
|
-
const shard = this.shards[loc.shardIdx];
|
|
1292
|
-
if (!shard.getQueue(loc.queueName).has(jobId)) {
|
|
1293
|
-
this.jobIndex.delete(jobId);
|
|
1294
|
-
orphanedCount++;
|
|
1295
|
-
}
|
|
1296
|
-
}
|
|
1297
|
-
}
|
|
1298
|
-
if (orphanedCount > 0) {
|
|
1299
|
-
queueLog.info('Cleaned orphaned jobIndex entries', { count: orphanedCount });
|
|
1300
|
-
}
|
|
1301
|
-
}
|
|
1302
|
-
// Clean orphaned job locks (locks for jobs no longer in processing)
|
|
1303
|
-
for (const jobId of this.jobLocks.keys()) {
|
|
1304
|
-
const loc = this.jobIndex.get(jobId);
|
|
1305
|
-
if (loc?.type !== 'processing') {
|
|
1306
|
-
this.jobLocks.delete(jobId);
|
|
1307
|
-
}
|
|
1308
|
-
}
|
|
1309
|
-
// Remove empty queues to free memory (like obliterate but only for empty queues)
|
|
1310
|
-
for (let i = 0; i < SHARD_COUNT; i++) {
|
|
1311
|
-
const shard = this.shards[i];
|
|
1312
|
-
const emptyQueues = [];
|
|
1313
|
-
for (const [queueName, queue] of shard.queues) {
|
|
1314
|
-
// Queue is empty and has no DLQ entries
|
|
1315
|
-
const dlqEntries = shard.dlq.get(queueName);
|
|
1316
|
-
if (queue.size === 0 && (!dlqEntries || dlqEntries.length === 0)) {
|
|
1317
|
-
emptyQueues.push(queueName);
|
|
1318
|
-
}
|
|
1319
|
-
}
|
|
1320
|
-
for (const queueName of emptyQueues) {
|
|
1321
|
-
shard.queues.delete(queueName);
|
|
1322
|
-
shard.dlq.delete(queueName);
|
|
1323
|
-
shard.uniqueKeys.delete(queueName);
|
|
1324
|
-
shard.queueState.delete(queueName);
|
|
1325
|
-
shard.activeGroups.delete(queueName);
|
|
1326
|
-
shard.rateLimiters.delete(queueName);
|
|
1327
|
-
shard.concurrencyLimiters.delete(queueName);
|
|
1328
|
-
shard.stallConfig.delete(queueName);
|
|
1329
|
-
shard.dlqConfig.delete(queueName);
|
|
1330
|
-
this.unregisterQueueName(queueName);
|
|
1331
|
-
}
|
|
1332
|
-
if (emptyQueues.length > 0) {
|
|
1333
|
-
queueLog.info('Removed empty queues', { shard: i, count: emptyQueues.length });
|
|
1334
|
-
}
|
|
1335
|
-
// Clean orphaned temporal index entries (memory leak fix)
|
|
1336
|
-
const cleanedTemporal = shard.cleanOrphanedTemporalEntries();
|
|
1337
|
-
if (cleanedTemporal > 0) {
|
|
1338
|
-
queueLog.info('Cleaned orphaned temporal entries', { shard: i, count: cleanedTemporal });
|
|
1339
|
-
}
|
|
1340
|
-
}
|
|
627
|
+
compactMemory() {
|
|
628
|
+
statsMgr.compactMemory(this.getStatsContext());
|
|
1341
629
|
}
|
|
1342
630
|
// ============ Lifecycle ============
|
|
1343
631
|
shutdown() {
|
|
1344
632
|
this.cronScheduler.stop();
|
|
1345
633
|
this.workerManager.stop();
|
|
1346
634
|
this.eventsManager.clear();
|
|
1347
|
-
if (this.
|
|
1348
|
-
|
|
1349
|
-
|
|
1350
|
-
clearInterval(this.timeoutInterval);
|
|
1351
|
-
if (this.depCheckInterval)
|
|
1352
|
-
clearInterval(this.depCheckInterval);
|
|
1353
|
-
if (this.stallCheckInterval)
|
|
1354
|
-
clearInterval(this.stallCheckInterval);
|
|
1355
|
-
if (this.dlqMaintenanceInterval)
|
|
1356
|
-
clearInterval(this.dlqMaintenanceInterval);
|
|
1357
|
-
if (this.lockCheckInterval)
|
|
1358
|
-
clearInterval(this.lockCheckInterval);
|
|
635
|
+
if (this.backgroundTaskHandles) {
|
|
636
|
+
bgTasks.stopBackgroundTasks(this.backgroundTaskHandles);
|
|
637
|
+
}
|
|
1359
638
|
this.storage?.close();
|
|
1360
639
|
// Clear in-memory collections
|
|
1361
640
|
this.jobIndex.clear();
|
|
@@ -1368,9 +647,8 @@ export class QueueManager {
|
|
|
1368
647
|
this.jobLocks.clear();
|
|
1369
648
|
this.stalledCandidates.clear();
|
|
1370
649
|
this.clientJobs.clear();
|
|
1371
|
-
for (const shard of this.processingShards)
|
|
650
|
+
for (const shard of this.processingShards)
|
|
1372
651
|
shard.clear();
|
|
1373
|
-
}
|
|
1374
652
|
for (const shard of this.shards) {
|
|
1375
653
|
shard.waitingDeps.clear();
|
|
1376
654
|
shard.dependencyIndex.clear();
|
|
@@ -1379,106 +657,5 @@ export class QueueManager {
|
|
|
1379
657
|
shard.activeGroups.clear();
|
|
1380
658
|
}
|
|
1381
659
|
}
|
|
1382
|
-
getStats() {
|
|
1383
|
-
let waiting = 0, delayed = 0, active = 0, dlq = 0;
|
|
1384
|
-
// O(32) instead of O(n) - use running counters from each shard
|
|
1385
|
-
for (let i = 0; i < SHARD_COUNT; i++) {
|
|
1386
|
-
const shardStats = this.shards[i].getStats();
|
|
1387
|
-
const queuedTotal = shardStats.queuedJobs;
|
|
1388
|
-
const delayedInShard = shardStats.delayedJobs;
|
|
1389
|
-
// waiting = queued jobs that are not delayed
|
|
1390
|
-
waiting += Math.max(0, queuedTotal - delayedInShard);
|
|
1391
|
-
delayed += delayedInShard;
|
|
1392
|
-
dlq += shardStats.dlqJobs;
|
|
1393
|
-
active += this.processingShards[i].size;
|
|
1394
|
-
}
|
|
1395
|
-
const cronStats = this.cronScheduler.getStats();
|
|
1396
|
-
return {
|
|
1397
|
-
waiting,
|
|
1398
|
-
delayed,
|
|
1399
|
-
active,
|
|
1400
|
-
dlq,
|
|
1401
|
-
completed: this.completedJobs.size,
|
|
1402
|
-
totalPushed: this.metrics.totalPushed.value,
|
|
1403
|
-
totalPulled: this.metrics.totalPulled.value,
|
|
1404
|
-
totalCompleted: this.metrics.totalCompleted.value,
|
|
1405
|
-
totalFailed: this.metrics.totalFailed.value,
|
|
1406
|
-
uptime: Date.now() - this.startTime,
|
|
1407
|
-
cronJobs: cronStats.total,
|
|
1408
|
-
cronPending: cronStats.pending,
|
|
1409
|
-
};
|
|
1410
|
-
}
|
|
1411
|
-
/**
|
|
1412
|
-
* Get detailed memory statistics for debugging memory issues.
|
|
1413
|
-
* Returns counts of entries in all major collections.
|
|
1414
|
-
*/
|
|
1415
|
-
getMemoryStats() {
|
|
1416
|
-
let processingTotal = 0;
|
|
1417
|
-
let queuedTotal = 0;
|
|
1418
|
-
let waitingDepsTotal = 0;
|
|
1419
|
-
let temporalIndexTotal = 0;
|
|
1420
|
-
let delayedHeapTotal = 0;
|
|
1421
|
-
for (let i = 0; i < SHARD_COUNT; i++) {
|
|
1422
|
-
processingTotal += this.processingShards[i].size;
|
|
1423
|
-
const shardStats = this.shards[i].getStats();
|
|
1424
|
-
queuedTotal += shardStats.queuedJobs;
|
|
1425
|
-
waitingDepsTotal += this.shards[i].waitingDeps.size;
|
|
1426
|
-
// Get internal structure sizes
|
|
1427
|
-
const internalSizes = this.shards[i].getInternalSizes();
|
|
1428
|
-
temporalIndexTotal += internalSizes.temporalIndex;
|
|
1429
|
-
delayedHeapTotal += internalSizes.delayedHeap;
|
|
1430
|
-
}
|
|
1431
|
-
// Count total jobs across all clients
|
|
1432
|
-
let clientJobsTotal = 0;
|
|
1433
|
-
for (const jobs of this.clientJobs.values()) {
|
|
1434
|
-
clientJobsTotal += jobs.size;
|
|
1435
|
-
}
|
|
1436
|
-
return {
|
|
1437
|
-
jobIndex: this.jobIndex.size,
|
|
1438
|
-
completedJobs: this.completedJobs.size,
|
|
1439
|
-
jobResults: this.jobResults.size,
|
|
1440
|
-
jobLogs: this.jobLogs.size,
|
|
1441
|
-
customIdMap: this.customIdMap.size,
|
|
1442
|
-
jobLocks: this.jobLocks.size,
|
|
1443
|
-
clientJobs: this.clientJobs.size,
|
|
1444
|
-
clientJobsTotal,
|
|
1445
|
-
pendingDepChecks: this.pendingDepChecks.size,
|
|
1446
|
-
stalledCandidates: this.stalledCandidates.size,
|
|
1447
|
-
processingTotal,
|
|
1448
|
-
queuedTotal,
|
|
1449
|
-
waitingDepsTotal,
|
|
1450
|
-
temporalIndexTotal,
|
|
1451
|
-
delayedHeapTotal,
|
|
1452
|
-
};
|
|
1453
|
-
}
|
|
1454
|
-
/**
|
|
1455
|
-
* Force compact all collections to reduce memory usage.
|
|
1456
|
-
* Use after large batch operations or when memory pressure is high.
|
|
1457
|
-
*/
|
|
1458
|
-
compactMemory() {
|
|
1459
|
-
// Compact priority queues that have high stale ratios
|
|
1460
|
-
for (let i = 0; i < SHARD_COUNT; i++) {
|
|
1461
|
-
for (const q of this.shards[i].queues.values()) {
|
|
1462
|
-
if (q.needsCompaction(0.1)) {
|
|
1463
|
-
// More aggressive: 10% stale threshold
|
|
1464
|
-
q.compact();
|
|
1465
|
-
}
|
|
1466
|
-
}
|
|
1467
|
-
}
|
|
1468
|
-
// Clean up empty client tracking entries
|
|
1469
|
-
for (const [clientId, jobs] of this.clientJobs) {
|
|
1470
|
-
if (jobs.size === 0) {
|
|
1471
|
-
this.clientJobs.delete(clientId);
|
|
1472
|
-
}
|
|
1473
|
-
}
|
|
1474
|
-
// Clean orphaned job locks (jobs no longer in processing)
|
|
1475
|
-
for (const jobId of this.jobLocks.keys()) {
|
|
1476
|
-
const loc = this.jobIndex.get(jobId);
|
|
1477
|
-
if (loc?.type !== 'processing') {
|
|
1478
|
-
this.jobLocks.delete(jobId);
|
|
1479
|
-
}
|
|
1480
|
-
}
|
|
1481
|
-
queueLog.info('Memory compacted');
|
|
1482
|
-
}
|
|
1483
660
|
}
|
|
1484
661
|
//# sourceMappingURL=queueManager.js.map
|