bunqueue 1.9.1 → 1.9.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/application/operations/queueControl.d.ts.map +1 -1
- package/dist/application/operations/queueControl.js +6 -1
- package/dist/application/operations/queueControl.js.map +1 -1
- package/dist/application/queueManager.d.ts +115 -9
- package/dist/application/queueManager.d.ts.map +1 -1
- package/dist/application/queueManager.js +535 -26
- package/dist/application/queueManager.js.map +1 -1
- package/dist/cli/client.d.ts +6 -0
- package/dist/cli/client.d.ts.map +1 -1
- package/dist/cli/client.js +60 -48
- package/dist/cli/client.js.map +1 -1
- package/dist/cli/commands/server.d.ts.map +1 -1
- package/dist/cli/commands/server.js +30 -14
- package/dist/cli/commands/server.js.map +1 -1
- package/dist/cli/help.d.ts.map +1 -1
- package/dist/cli/help.js +10 -6
- package/dist/cli/help.js.map +1 -1
- package/dist/cli/index.d.ts.map +1 -1
- package/dist/cli/index.js +15 -2
- package/dist/cli/index.js.map +1 -1
- package/dist/client/queue/dlqOps.d.ts +24 -0
- package/dist/client/queue/dlqOps.d.ts.map +1 -0
- package/dist/client/queue/dlqOps.js +73 -0
- package/dist/client/queue/dlqOps.js.map +1 -0
- package/dist/client/queue/helpers.d.ts +20 -0
- package/dist/client/queue/helpers.d.ts.map +1 -0
- package/dist/client/queue/helpers.js +34 -0
- package/dist/client/queue/helpers.js.map +1 -0
- package/dist/client/queue/index.d.ts +8 -0
- package/dist/client/queue/index.d.ts.map +1 -0
- package/dist/client/queue/index.js +8 -0
- package/dist/client/queue/index.js.map +1 -0
- package/dist/client/queue/queue.d.ts +60 -0
- package/dist/client/queue/queue.d.ts.map +1 -0
- package/dist/client/queue/queue.js +322 -0
- package/dist/client/queue/queue.js.map +1 -0
- package/dist/client/queue.d.ts +3 -78
- package/dist/client/queue.d.ts.map +1 -1
- package/dist/client/queue.js +3 -463
- package/dist/client/queue.js.map +1 -1
- package/dist/client/sandboxed/index.d.ts +8 -0
- package/dist/client/sandboxed/index.d.ts.map +1 -0
- package/dist/client/sandboxed/index.js +7 -0
- package/dist/client/sandboxed/index.js.map +1 -0
- package/dist/client/sandboxed/types.d.ts +62 -0
- package/dist/client/sandboxed/types.d.ts.map +1 -0
- package/dist/client/sandboxed/types.js +6 -0
- package/dist/client/sandboxed/types.js.map +1 -0
- package/dist/client/sandboxed/worker.d.ts +38 -0
- package/dist/client/sandboxed/worker.d.ts.map +1 -0
- package/dist/client/sandboxed/worker.js +176 -0
- package/dist/client/sandboxed/worker.js.map +1 -0
- package/dist/client/sandboxed/wrapper.d.ts +13 -0
- package/dist/client/sandboxed/wrapper.d.ts.map +1 -0
- package/dist/client/sandboxed/wrapper.js +65 -0
- package/dist/client/sandboxed/wrapper.js.map +1 -0
- package/dist/client/sandboxedWorker.d.ts +4 -87
- package/dist/client/sandboxedWorker.d.ts.map +1 -1
- package/dist/client/sandboxedWorker.js +3 -296
- package/dist/client/sandboxedWorker.js.map +1 -1
- package/dist/client/tcp/client.d.ts +40 -0
- package/dist/client/tcp/client.d.ts.map +1 -0
- package/dist/client/tcp/client.js +289 -0
- package/dist/client/tcp/client.js.map +1 -0
- package/dist/client/tcp/connection.d.ts +57 -0
- package/dist/client/tcp/connection.d.ts.map +1 -0
- package/dist/client/tcp/connection.js +162 -0
- package/dist/client/tcp/connection.js.map +1 -0
- package/dist/client/tcp/health.d.ts +47 -0
- package/dist/client/tcp/health.d.ts.map +1 -0
- package/dist/client/tcp/health.js +95 -0
- package/dist/client/tcp/health.js.map +1 -0
- package/dist/client/tcp/index.d.ts +13 -0
- package/dist/client/tcp/index.d.ts.map +1 -0
- package/dist/client/tcp/index.js +12 -0
- package/dist/client/tcp/index.js.map +1 -0
- package/dist/client/tcp/lineBuffer.d.ts +17 -0
- package/dist/client/tcp/lineBuffer.d.ts.map +1 -0
- package/dist/client/tcp/lineBuffer.js +32 -0
- package/dist/client/tcp/lineBuffer.js.map +1 -0
- package/dist/client/tcp/reconnect.d.ts +38 -0
- package/dist/client/tcp/reconnect.d.ts.map +1 -0
- package/dist/client/tcp/reconnect.js +70 -0
- package/dist/client/tcp/reconnect.js.map +1 -0
- package/dist/client/tcp/shared.d.ts +11 -0
- package/dist/client/tcp/shared.d.ts.map +1 -0
- package/dist/client/tcp/shared.js +20 -0
- package/dist/client/tcp/shared.js.map +1 -0
- package/dist/client/tcp/types.d.ts +76 -0
- package/dist/client/tcp/types.d.ts.map +1 -0
- package/dist/client/tcp/types.js +20 -0
- package/dist/client/tcp/types.js.map +1 -0
- package/dist/client/tcpClient.d.ts +4 -110
- package/dist/client/tcpClient.d.ts.map +1 -1
- package/dist/client/tcpClient.js +3 -523
- package/dist/client/tcpClient.js.map +1 -1
- package/dist/client/tcpPool.d.ts +3 -0
- package/dist/client/tcpPool.d.ts.map +1 -1
- package/dist/client/tcpPool.js +21 -2
- package/dist/client/tcpPool.js.map +1 -1
- package/dist/client/types.d.ts +11 -2
- package/dist/client/types.d.ts.map +1 -1
- package/dist/client/types.js.map +1 -1
- package/dist/client/worker/ackBatcher.d.ts +40 -0
- package/dist/client/worker/ackBatcher.d.ts.map +1 -0
- package/dist/client/worker/ackBatcher.js +137 -0
- package/dist/client/worker/ackBatcher.js.map +1 -0
- package/dist/client/worker/index.d.ts +11 -0
- package/dist/client/worker/index.d.ts.map +1 -0
- package/dist/client/worker/index.js +10 -0
- package/dist/client/worker/index.js.map +1 -0
- package/dist/client/worker/jobParser.d.ts +10 -0
- package/dist/client/worker/jobParser.d.ts.map +1 -0
- package/dist/client/worker/jobParser.js +43 -0
- package/dist/client/worker/jobParser.js.map +1 -0
- package/dist/client/worker/processor.d.ts +24 -0
- package/dist/client/worker/processor.d.ts.map +1 -0
- package/dist/client/worker/processor.js +86 -0
- package/dist/client/worker/processor.js.map +1 -0
- package/dist/client/worker/types.d.ts +38 -0
- package/dist/client/worker/types.d.ts.map +1 -0
- package/dist/client/worker/types.js +14 -0
- package/dist/client/worker/types.js.map +1 -0
- package/dist/client/worker/worker.d.ts +53 -0
- package/dist/client/worker/worker.d.ts.map +1 -0
- package/dist/client/worker/worker.js +367 -0
- package/dist/client/worker/worker.js.map +1 -0
- package/dist/client/worker.d.ts +3 -69
- package/dist/client/worker.d.ts.map +1 -1
- package/dist/client/worker.js +3 -472
- package/dist/client/worker.js.map +1 -1
- package/dist/domain/queue/shard.d.ts +19 -2
- package/dist/domain/queue/shard.d.ts.map +1 -1
- package/dist/domain/queue/shard.js +36 -4
- package/dist/domain/queue/shard.js.map +1 -1
- package/dist/domain/types/command.d.ts +9 -0
- package/dist/domain/types/command.d.ts.map +1 -1
- package/dist/domain/types/job.d.ts +27 -0
- package/dist/domain/types/job.d.ts.map +1 -1
- package/dist/domain/types/job.js +34 -0
- package/dist/domain/types/job.js.map +1 -1
- package/dist/domain/types/response.d.ts +15 -1
- package/dist/domain/types/response.d.ts.map +1 -1
- package/dist/domain/types/response.js +16 -0
- package/dist/domain/types/response.js.map +1 -1
- package/dist/infrastructure/server/handlers/core.d.ts +1 -1
- package/dist/infrastructure/server/handlers/core.d.ts.map +1 -1
- package/dist/infrastructure/server/handlers/core.js +74 -15
- package/dist/infrastructure/server/handlers/core.js.map +1 -1
- package/dist/infrastructure/server/handlers/monitoring.d.ts.map +1 -1
- package/dist/infrastructure/server/handlers/monitoring.js +6 -4
- package/dist/infrastructure/server/handlers/monitoring.js.map +1 -1
- package/dist/infrastructure/server/http.d.ts +10 -3
- package/dist/infrastructure/server/http.d.ts.map +1 -1
- package/dist/infrastructure/server/http.js +244 -163
- package/dist/infrastructure/server/http.js.map +1 -1
- package/dist/infrastructure/server/tcp.d.ts +8 -3
- package/dist/infrastructure/server/tcp.d.ts.map +1 -1
- package/dist/infrastructure/server/tcp.js +77 -57
- package/dist/infrastructure/server/tcp.js.map +1 -1
- package/dist/infrastructure/server/types.d.ts +2 -0
- package/dist/infrastructure/server/types.d.ts.map +1 -1
- package/dist/main.js +24 -4
- package/dist/main.js.map +1 -1
- package/package.json +1 -1
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
* Queue Manager
|
|
3
3
|
* Core orchestrator for all queue operations
|
|
4
4
|
*/
|
|
5
|
-
import { calculateBackoff } from '../domain/types/job';
|
|
5
|
+
import { calculateBackoff, createJobLock, isLockExpired, renewLock, DEFAULT_LOCK_TTL, } from '../domain/types/job';
|
|
6
6
|
import { queueLog } from '../shared/logger';
|
|
7
7
|
import { getStallAction, incrementStallCount } from '../domain/types/stall';
|
|
8
8
|
import { Shard } from '../domain/queue/shard';
|
|
@@ -55,6 +55,15 @@ export class QueueManager {
|
|
|
55
55
|
// Deferred dependency resolution queue (to avoid lock order violations)
|
|
56
56
|
pendingDepChecks = new Set();
|
|
57
57
|
depCheckInterval = null;
|
|
58
|
+
// Two-phase stall detection (like BullMQ)
|
|
59
|
+
// Jobs are added here on first check, confirmed stalled on second check
|
|
60
|
+
stalledCandidates = new Set();
|
|
61
|
+
// Lock-based job ownership tracking (BullMQ-style)
|
|
62
|
+
// Maps jobId to lock info (token, owner, expiration)
|
|
63
|
+
jobLocks = new Map();
|
|
64
|
+
// Client-job tracking for connection-based release
|
|
65
|
+
// When a TCP connection closes, all jobs owned by that client are released
|
|
66
|
+
clientJobs = new Map();
|
|
58
67
|
// Cron scheduler
|
|
59
68
|
cronScheduler;
|
|
60
69
|
// Managers
|
|
@@ -76,6 +85,7 @@ export class QueueManager {
|
|
|
76
85
|
timeoutInterval = null;
|
|
77
86
|
stallCheckInterval = null;
|
|
78
87
|
dlqMaintenanceInterval = null;
|
|
88
|
+
lockCheckInterval = null;
|
|
79
89
|
// Queue names cache for O(1) listQueues instead of O(32 * queues)
|
|
80
90
|
queueNamesCache = new Set();
|
|
81
91
|
constructor(config = {}) {
|
|
@@ -224,29 +234,98 @@ export class QueueManager {
|
|
|
224
234
|
async pull(queue, timeoutMs = 0) {
|
|
225
235
|
return pullJob(queue, timeoutMs, this.getPullContext());
|
|
226
236
|
}
|
|
237
|
+
/**
|
|
238
|
+
* Pull a job and create a lock for it (BullMQ-style).
|
|
239
|
+
* Returns both the job and its lock token for ownership verification.
|
|
240
|
+
*/
|
|
241
|
+
async pullWithLock(queue, owner, timeoutMs = 0, lockTtl = DEFAULT_LOCK_TTL) {
|
|
242
|
+
const job = await pullJob(queue, timeoutMs, this.getPullContext());
|
|
243
|
+
if (!job)
|
|
244
|
+
return { job: null, token: null };
|
|
245
|
+
const token = this.createLock(job.id, owner, lockTtl);
|
|
246
|
+
return { job, token };
|
|
247
|
+
}
|
|
227
248
|
/** Pull multiple jobs in single lock acquisition - O(1) instead of O(n) locks */
|
|
228
249
|
async pullBatch(queue, count, timeoutMs = 0) {
|
|
229
250
|
return pullJobBatch(queue, count, timeoutMs, this.getPullContext());
|
|
230
251
|
}
|
|
231
|
-
|
|
232
|
-
|
|
252
|
+
/**
|
|
253
|
+
* Pull multiple jobs and create locks for them (BullMQ-style).
|
|
254
|
+
* Returns both jobs and their lock tokens for ownership verification.
|
|
255
|
+
*/
|
|
256
|
+
async pullBatchWithLock(queue, count, owner, timeoutMs = 0, lockTtl = DEFAULT_LOCK_TTL) {
|
|
257
|
+
const jobs = await pullJobBatch(queue, count, timeoutMs, this.getPullContext());
|
|
258
|
+
const tokens = [];
|
|
259
|
+
for (const job of jobs) {
|
|
260
|
+
const token = this.createLock(job.id, owner, lockTtl);
|
|
261
|
+
tokens.push(token ?? '');
|
|
262
|
+
}
|
|
263
|
+
return { jobs, tokens };
|
|
264
|
+
}
|
|
265
|
+
async ack(jobId, result, token) {
|
|
266
|
+
// If token provided, verify ownership before acknowledging
|
|
267
|
+
if (token && !this.verifyLock(jobId, token)) {
|
|
268
|
+
throw new Error(`Invalid or expired lock token for job ${jobId}`);
|
|
269
|
+
}
|
|
270
|
+
await ackJob(jobId, result, this.getAckContext());
|
|
271
|
+
// Release lock after successful ack
|
|
272
|
+
this.releaseLock(jobId, token);
|
|
233
273
|
}
|
|
234
274
|
/** Acknowledge multiple jobs in parallel with Promise.all */
|
|
235
|
-
async ackBatch(jobIds) {
|
|
236
|
-
|
|
275
|
+
async ackBatch(jobIds, tokens) {
|
|
276
|
+
// Verify all tokens first if provided
|
|
277
|
+
if (tokens?.length === jobIds.length) {
|
|
278
|
+
for (let i = 0; i < jobIds.length; i++) {
|
|
279
|
+
const t = tokens[i];
|
|
280
|
+
if (t && !this.verifyLock(jobIds[i], t)) {
|
|
281
|
+
throw new Error(`Invalid or expired lock token for job ${jobIds[i]}`);
|
|
282
|
+
}
|
|
283
|
+
}
|
|
284
|
+
}
|
|
285
|
+
await ackJobBatch(jobIds, this.getAckContext());
|
|
286
|
+
// Release locks after successful ack
|
|
287
|
+
if (tokens) {
|
|
288
|
+
for (let i = 0; i < jobIds.length; i++) {
|
|
289
|
+
this.releaseLock(jobIds[i], tokens[i]);
|
|
290
|
+
}
|
|
291
|
+
}
|
|
237
292
|
}
|
|
238
293
|
/** Acknowledge multiple jobs with individual results - batch optimized */
|
|
239
294
|
async ackBatchWithResults(items) {
|
|
240
|
-
|
|
295
|
+
// Verify all tokens first if provided
|
|
296
|
+
for (const item of items) {
|
|
297
|
+
if (item.token && !this.verifyLock(item.id, item.token)) {
|
|
298
|
+
throw new Error(`Invalid or expired lock token for job ${item.id}`);
|
|
299
|
+
}
|
|
300
|
+
}
|
|
301
|
+
await ackJobBatchWithResults(items, this.getAckContext());
|
|
302
|
+
// Release locks after successful ack
|
|
303
|
+
for (const item of items) {
|
|
304
|
+
this.releaseLock(item.id, item.token);
|
|
305
|
+
}
|
|
241
306
|
}
|
|
242
|
-
async fail(jobId, error) {
|
|
243
|
-
|
|
307
|
+
async fail(jobId, error, token) {
|
|
308
|
+
// If token provided, verify ownership before failing
|
|
309
|
+
if (token && !this.verifyLock(jobId, token)) {
|
|
310
|
+
throw new Error(`Invalid or expired lock token for job ${jobId}`);
|
|
311
|
+
}
|
|
312
|
+
await failJob(jobId, error, this.getAckContext());
|
|
313
|
+
// Release lock after fail
|
|
314
|
+
this.releaseLock(jobId, token);
|
|
244
315
|
}
|
|
245
|
-
/**
|
|
246
|
-
|
|
316
|
+
/**
|
|
317
|
+
* Update job heartbeat for stall detection (single job).
|
|
318
|
+
* If token is provided, also renews the lock.
|
|
319
|
+
*/
|
|
320
|
+
jobHeartbeat(jobId, token) {
|
|
247
321
|
const loc = this.jobIndex.get(jobId);
|
|
248
322
|
if (loc?.type !== 'processing')
|
|
249
323
|
return false;
|
|
324
|
+
// If token provided, renew lock (which also updates heartbeat)
|
|
325
|
+
if (token) {
|
|
326
|
+
return this.renewJobLock(jobId, token);
|
|
327
|
+
}
|
|
328
|
+
// Legacy mode: just update heartbeat without token verification
|
|
250
329
|
const processing = this.processingShards[loc.shardIdx];
|
|
251
330
|
const job = processing.get(jobId);
|
|
252
331
|
if (job) {
|
|
@@ -255,15 +334,263 @@ export class QueueManager {
|
|
|
255
334
|
}
|
|
256
335
|
return false;
|
|
257
336
|
}
|
|
258
|
-
/**
|
|
259
|
-
|
|
337
|
+
/**
|
|
338
|
+
* Update job heartbeat for multiple jobs (batch).
|
|
339
|
+
* If tokens are provided, also renews the locks.
|
|
340
|
+
*/
|
|
341
|
+
jobHeartbeatBatch(jobIds, tokens) {
|
|
260
342
|
let count = 0;
|
|
261
|
-
for (
|
|
262
|
-
|
|
343
|
+
for (let i = 0; i < jobIds.length; i++) {
|
|
344
|
+
const token = tokens?.[i];
|
|
345
|
+
if (this.jobHeartbeat(jobIds[i], token))
|
|
263
346
|
count++;
|
|
264
347
|
}
|
|
265
348
|
return count;
|
|
266
349
|
}
|
|
350
|
+
// ============ Lock Management (BullMQ-style) ============
|
|
351
|
+
/**
|
|
352
|
+
* Create a lock for a job when it's pulled for processing.
|
|
353
|
+
* @returns The lock token, or null if job not in processing
|
|
354
|
+
*/
|
|
355
|
+
createLock(jobId, owner, ttl = DEFAULT_LOCK_TTL) {
|
|
356
|
+
const loc = this.jobIndex.get(jobId);
|
|
357
|
+
if (loc?.type !== 'processing')
|
|
358
|
+
return null;
|
|
359
|
+
// Check if lock already exists (shouldn't happen, but defensive)
|
|
360
|
+
if (this.jobLocks.has(jobId)) {
|
|
361
|
+
queueLog.warn('Lock already exists for job', { jobId: String(jobId), owner });
|
|
362
|
+
return null;
|
|
363
|
+
}
|
|
364
|
+
const lock = createJobLock(jobId, owner, ttl);
|
|
365
|
+
this.jobLocks.set(jobId, lock);
|
|
366
|
+
return lock.token;
|
|
367
|
+
}
|
|
368
|
+
/**
|
|
369
|
+
* Verify that a token is valid for a job.
|
|
370
|
+
* @returns true if token matches the active lock
|
|
371
|
+
*/
|
|
372
|
+
verifyLock(jobId, token) {
|
|
373
|
+
const lock = this.jobLocks.get(jobId);
|
|
374
|
+
if (!lock)
|
|
375
|
+
return false;
|
|
376
|
+
if (lock.token !== token)
|
|
377
|
+
return false;
|
|
378
|
+
if (isLockExpired(lock))
|
|
379
|
+
return false;
|
|
380
|
+
return true;
|
|
381
|
+
}
|
|
382
|
+
/**
|
|
383
|
+
* Renew a lock with the given token.
|
|
384
|
+
* @returns true if renewal succeeded, false if token invalid or lock expired
|
|
385
|
+
*/
|
|
386
|
+
renewJobLock(jobId, token, newTtl) {
|
|
387
|
+
const lock = this.jobLocks.get(jobId);
|
|
388
|
+
if (!lock)
|
|
389
|
+
return false;
|
|
390
|
+
if (lock.token !== token)
|
|
391
|
+
return false;
|
|
392
|
+
if (isLockExpired(lock)) {
|
|
393
|
+
// Lock already expired, remove it
|
|
394
|
+
this.jobLocks.delete(jobId);
|
|
395
|
+
return false;
|
|
396
|
+
}
|
|
397
|
+
renewLock(lock, newTtl);
|
|
398
|
+
// Also update lastHeartbeat on the job (for legacy stall detection compatibility)
|
|
399
|
+
const loc = this.jobIndex.get(jobId);
|
|
400
|
+
if (loc?.type === 'processing') {
|
|
401
|
+
const job = this.processingShards[loc.shardIdx].get(jobId);
|
|
402
|
+
if (job)
|
|
403
|
+
job.lastHeartbeat = Date.now();
|
|
404
|
+
}
|
|
405
|
+
return true;
|
|
406
|
+
}
|
|
407
|
+
/**
|
|
408
|
+
* Renew locks for multiple jobs (batch operation).
|
|
409
|
+
* @returns Array of jobIds that were successfully renewed
|
|
410
|
+
*/
|
|
411
|
+
renewJobLockBatch(items) {
|
|
412
|
+
const renewed = [];
|
|
413
|
+
for (const item of items) {
|
|
414
|
+
if (this.renewJobLock(item.id, item.token, item.ttl)) {
|
|
415
|
+
renewed.push(String(item.id));
|
|
416
|
+
}
|
|
417
|
+
}
|
|
418
|
+
return renewed;
|
|
419
|
+
}
|
|
420
|
+
/**
|
|
421
|
+
* Release a lock when job is completed or failed.
|
|
422
|
+
* Should be called by ACK/FAIL operations.
|
|
423
|
+
*/
|
|
424
|
+
releaseLock(jobId, token) {
|
|
425
|
+
const lock = this.jobLocks.get(jobId);
|
|
426
|
+
if (!lock)
|
|
427
|
+
return true; // No lock to release
|
|
428
|
+
// If token provided, verify it matches
|
|
429
|
+
if (token && lock.token !== token) {
|
|
430
|
+
queueLog.warn('Token mismatch on lock release', {
|
|
431
|
+
jobId: String(jobId),
|
|
432
|
+
expected: lock.token.substring(0, 8),
|
|
433
|
+
got: token.substring(0, 8),
|
|
434
|
+
});
|
|
435
|
+
return false;
|
|
436
|
+
}
|
|
437
|
+
this.jobLocks.delete(jobId);
|
|
438
|
+
return true;
|
|
439
|
+
}
|
|
440
|
+
/**
|
|
441
|
+
* Get lock info for a job (for debugging/monitoring).
|
|
442
|
+
*/
|
|
443
|
+
getLockInfo(jobId) {
|
|
444
|
+
return this.jobLocks.get(jobId) ?? null;
|
|
445
|
+
}
|
|
446
|
+
// ============ Client-Job Tracking ============
|
|
447
|
+
/**
|
|
448
|
+
* Register a job as owned by a client (called on PULL).
|
|
449
|
+
*/
|
|
450
|
+
registerClientJob(clientId, jobId) {
|
|
451
|
+
let jobs = this.clientJobs.get(clientId);
|
|
452
|
+
if (!jobs) {
|
|
453
|
+
jobs = new Set();
|
|
454
|
+
this.clientJobs.set(clientId, jobs);
|
|
455
|
+
}
|
|
456
|
+
jobs.add(jobId);
|
|
457
|
+
}
|
|
458
|
+
/**
|
|
459
|
+
* Unregister a job from a client (called on ACK/FAIL).
|
|
460
|
+
*/
|
|
461
|
+
unregisterClientJob(clientId, jobId) {
|
|
462
|
+
if (!clientId)
|
|
463
|
+
return;
|
|
464
|
+
const jobs = this.clientJobs.get(clientId);
|
|
465
|
+
if (jobs) {
|
|
466
|
+
jobs.delete(jobId);
|
|
467
|
+
if (jobs.size === 0) {
|
|
468
|
+
this.clientJobs.delete(clientId);
|
|
469
|
+
}
|
|
470
|
+
}
|
|
471
|
+
}
|
|
472
|
+
/**
|
|
473
|
+
* Release all jobs owned by a client back to queue (called on TCP disconnect).
|
|
474
|
+
* Returns the number of jobs released.
|
|
475
|
+
*/
|
|
476
|
+
releaseClientJobs(clientId) {
|
|
477
|
+
const jobs = this.clientJobs.get(clientId);
|
|
478
|
+
if (!jobs || jobs.size === 0) {
|
|
479
|
+
this.clientJobs.delete(clientId);
|
|
480
|
+
return 0;
|
|
481
|
+
}
|
|
482
|
+
let released = 0;
|
|
483
|
+
const now = Date.now();
|
|
484
|
+
for (const jobId of jobs) {
|
|
485
|
+
const loc = this.jobIndex.get(jobId);
|
|
486
|
+
if (loc?.type !== 'processing')
|
|
487
|
+
continue;
|
|
488
|
+
const procIdx = loc.shardIdx;
|
|
489
|
+
const job = this.processingShards[procIdx].get(jobId);
|
|
490
|
+
if (!job)
|
|
491
|
+
continue;
|
|
492
|
+
// Remove from processing
|
|
493
|
+
this.processingShards[procIdx].delete(jobId);
|
|
494
|
+
// Release lock if exists
|
|
495
|
+
this.jobLocks.delete(jobId);
|
|
496
|
+
// Release concurrency
|
|
497
|
+
const idx = shardIndex(job.queue);
|
|
498
|
+
const shard = this.shards[idx];
|
|
499
|
+
shard.releaseConcurrency(job.queue);
|
|
500
|
+
// Release group if active
|
|
501
|
+
if (job.groupId) {
|
|
502
|
+
shard.releaseGroup(job.queue, job.groupId);
|
|
503
|
+
}
|
|
504
|
+
// Reset job state for retry
|
|
505
|
+
job.startedAt = null;
|
|
506
|
+
job.lastHeartbeat = now;
|
|
507
|
+
// Re-queue the job
|
|
508
|
+
shard.getQueue(job.queue).push(job);
|
|
509
|
+
const isDelayed = job.runAt > now;
|
|
510
|
+
shard.incrementQueued(jobId, isDelayed, job.createdAt, job.queue, job.runAt);
|
|
511
|
+
this.jobIndex.set(jobId, { type: 'queue', shardIdx: idx, queueName: job.queue });
|
|
512
|
+
released++;
|
|
513
|
+
}
|
|
514
|
+
// Clear client tracking
|
|
515
|
+
this.clientJobs.delete(clientId);
|
|
516
|
+
if (released > 0) {
|
|
517
|
+
queueLog.info('Released client jobs', { clientId: clientId.substring(0, 8), released });
|
|
518
|
+
}
|
|
519
|
+
return released;
|
|
520
|
+
}
|
|
521
|
+
/**
|
|
522
|
+
* Check and handle expired locks.
|
|
523
|
+
* Jobs with expired locks are requeued for retry.
|
|
524
|
+
*/
|
|
525
|
+
checkExpiredLocks() {
|
|
526
|
+
const now = Date.now();
|
|
527
|
+
const expired = [];
|
|
528
|
+
for (const [jobId, lock] of this.jobLocks) {
|
|
529
|
+
if (isLockExpired(lock, now)) {
|
|
530
|
+
expired.push({ jobId, lock });
|
|
531
|
+
}
|
|
532
|
+
}
|
|
533
|
+
for (const { jobId, lock } of expired) {
|
|
534
|
+
const procIdx = processingShardIndex(String(jobId));
|
|
535
|
+
const job = this.processingShards[procIdx].get(jobId);
|
|
536
|
+
if (job) {
|
|
537
|
+
const idx = shardIndex(job.queue);
|
|
538
|
+
const shard = this.shards[idx];
|
|
539
|
+
const queue = shard.getQueue(job.queue);
|
|
540
|
+
// Remove from processing
|
|
541
|
+
this.processingShards[procIdx].delete(jobId);
|
|
542
|
+
// Increment attempts and reset state
|
|
543
|
+
job.attempts++;
|
|
544
|
+
job.startedAt = null;
|
|
545
|
+
job.lastHeartbeat = now;
|
|
546
|
+
job.stallCount++;
|
|
547
|
+
// Check if max stalls exceeded
|
|
548
|
+
const stallConfig = shard.getStallConfig(job.queue);
|
|
549
|
+
if (stallConfig.maxStalls > 0 && job.stallCount >= stallConfig.maxStalls) {
|
|
550
|
+
// Move to DLQ using shard's addToDlq method
|
|
551
|
+
shard.addToDlq(job, "stalled" /* FailureReason.Stalled */, `Lock expired after ${lock.renewalCount} renewals`);
|
|
552
|
+
this.jobIndex.set(jobId, { type: 'dlq', queueName: job.queue });
|
|
553
|
+
queueLog.warn('Job moved to DLQ due to lock expiration', {
|
|
554
|
+
jobId: String(jobId),
|
|
555
|
+
queue: job.queue,
|
|
556
|
+
owner: lock.owner,
|
|
557
|
+
renewals: lock.renewalCount,
|
|
558
|
+
stallCount: job.stallCount,
|
|
559
|
+
});
|
|
560
|
+
this.eventsManager.broadcast({
|
|
561
|
+
eventType: "failed" /* EventType.Failed */,
|
|
562
|
+
jobId,
|
|
563
|
+
queue: job.queue,
|
|
564
|
+
timestamp: now,
|
|
565
|
+
error: 'Lock expired (max stalls reached)',
|
|
566
|
+
});
|
|
567
|
+
}
|
|
568
|
+
else {
|
|
569
|
+
// Requeue for retry (always push - priority queue handles ordering)
|
|
570
|
+
queue.push(job);
|
|
571
|
+
this.jobIndex.set(jobId, { type: 'queue', shardIdx: idx, queueName: job.queue });
|
|
572
|
+
queueLog.info('Job requeued due to lock expiration', {
|
|
573
|
+
jobId: String(jobId),
|
|
574
|
+
queue: job.queue,
|
|
575
|
+
owner: lock.owner,
|
|
576
|
+
renewals: lock.renewalCount,
|
|
577
|
+
attempt: job.attempts,
|
|
578
|
+
});
|
|
579
|
+
this.eventsManager.broadcast({
|
|
580
|
+
eventType: "stalled" /* EventType.Stalled */,
|
|
581
|
+
jobId,
|
|
582
|
+
queue: job.queue,
|
|
583
|
+
timestamp: now,
|
|
584
|
+
});
|
|
585
|
+
}
|
|
586
|
+
}
|
|
587
|
+
// Remove the expired lock
|
|
588
|
+
this.jobLocks.delete(jobId);
|
|
589
|
+
}
|
|
590
|
+
if (expired.length > 0) {
|
|
591
|
+
queueLog.info('Processed expired locks', { count: expired.length });
|
|
592
|
+
}
|
|
593
|
+
}
|
|
267
594
|
// ============ Query Operations (delegated) ============
|
|
268
595
|
async getJob(jobId) {
|
|
269
596
|
return queryOps.getJob(jobId, this.getQueryContext());
|
|
@@ -528,6 +855,10 @@ export class QueueManager {
|
|
|
528
855
|
this.dlqMaintenanceInterval = setInterval(() => {
|
|
529
856
|
this.performDlqMaintenance();
|
|
530
857
|
}, this.config.dlqMaintenanceMs);
|
|
858
|
+
// Lock expiration check runs at same interval as stall check
|
|
859
|
+
this.lockCheckInterval = setInterval(() => {
|
|
860
|
+
this.checkExpiredLocks();
|
|
861
|
+
}, this.config.stallCheckMs);
|
|
531
862
|
this.cronScheduler.start();
|
|
532
863
|
}
|
|
533
864
|
checkJobTimeouts() {
|
|
@@ -547,31 +878,61 @@ export class QueueManager {
|
|
|
547
878
|
}
|
|
548
879
|
/**
|
|
549
880
|
* Check for stalled jobs and handle them
|
|
550
|
-
*
|
|
881
|
+
* Uses two-phase detection (like BullMQ) to prevent false positives:
|
|
882
|
+
* - Phase 1: Jobs marked as candidates in previous check are confirmed stalled
|
|
883
|
+
* - Phase 2: Current processing jobs are marked as candidates for next check
|
|
551
884
|
*/
|
|
552
885
|
checkStalledJobs() {
|
|
553
886
|
const now = Date.now();
|
|
887
|
+
const confirmedStalled = [];
|
|
888
|
+
// Phase 1: Check jobs that were candidates from previous cycle
|
|
889
|
+
// If still in processing and still meets stall criteria → confirmed stalled
|
|
890
|
+
for (const jobId of this.stalledCandidates) {
|
|
891
|
+
// Find job in processing shards
|
|
892
|
+
const procIdx = processingShardIndex(String(jobId));
|
|
893
|
+
const job = this.processingShards[procIdx].get(jobId);
|
|
894
|
+
if (!job) {
|
|
895
|
+
// Job completed between checks - not stalled (false positive avoided!)
|
|
896
|
+
this.stalledCandidates.delete(jobId);
|
|
897
|
+
continue;
|
|
898
|
+
}
|
|
899
|
+
const stallConfig = this.shards[shardIndex(job.queue)].getStallConfig(job.queue);
|
|
900
|
+
if (!stallConfig.enabled) {
|
|
901
|
+
this.stalledCandidates.delete(jobId);
|
|
902
|
+
continue;
|
|
903
|
+
}
|
|
904
|
+
// Re-check stall criteria (job might have received heartbeat)
|
|
905
|
+
const action = getStallAction(job, stallConfig, now);
|
|
906
|
+
if (action !== "keep" /* StallAction.Keep */) {
|
|
907
|
+
// Confirmed stalled - was candidate AND still meets criteria
|
|
908
|
+
confirmedStalled.push({ job, action });
|
|
909
|
+
}
|
|
910
|
+
// Remove from candidates (will be re-added in phase 2 if still processing)
|
|
911
|
+
this.stalledCandidates.delete(jobId);
|
|
912
|
+
}
|
|
913
|
+
// Phase 2: Mark current processing jobs as candidates for NEXT check
|
|
554
914
|
for (let i = 0; i < SHARD_COUNT; i++) {
|
|
555
915
|
const procShard = this.processingShards[i];
|
|
556
|
-
const
|
|
557
|
-
for (const [_jobId, job] of procShard) {
|
|
916
|
+
for (const [jobId, job] of procShard) {
|
|
558
917
|
const stallConfig = this.shards[shardIndex(job.queue)].getStallConfig(job.queue);
|
|
559
918
|
if (!stallConfig.enabled)
|
|
560
919
|
continue;
|
|
920
|
+
// Only mark as candidate if past grace period and no recent heartbeat
|
|
561
921
|
const action = getStallAction(job, stallConfig, now);
|
|
562
922
|
if (action !== "keep" /* StallAction.Keep */) {
|
|
563
|
-
|
|
923
|
+
// Add to candidates - will be checked in NEXT cycle
|
|
924
|
+
this.stalledCandidates.add(jobId);
|
|
564
925
|
}
|
|
565
926
|
}
|
|
566
|
-
|
|
567
|
-
|
|
568
|
-
|
|
569
|
-
|
|
570
|
-
|
|
571
|
-
|
|
572
|
-
|
|
927
|
+
}
|
|
928
|
+
// Process confirmed stalled jobs
|
|
929
|
+
for (const { job, action } of confirmedStalled) {
|
|
930
|
+
this.handleStalledJob(job, action).catch((err) => {
|
|
931
|
+
queueLog.error('Failed to handle stalled job', {
|
|
932
|
+
jobId: String(job.id),
|
|
933
|
+
error: String(err),
|
|
573
934
|
});
|
|
574
|
-
}
|
|
935
|
+
});
|
|
575
936
|
}
|
|
576
937
|
}
|
|
577
938
|
/**
|
|
@@ -786,6 +1147,77 @@ export class QueueManager {
|
|
|
786
1147
|
}
|
|
787
1148
|
}
|
|
788
1149
|
}
|
|
1150
|
+
// Clean stale stalledCandidates (jobs no longer in processing)
|
|
1151
|
+
for (const jobId of this.stalledCandidates) {
|
|
1152
|
+
const loc = this.jobIndex.get(jobId);
|
|
1153
|
+
if (loc?.type !== 'processing') {
|
|
1154
|
+
this.stalledCandidates.delete(jobId);
|
|
1155
|
+
}
|
|
1156
|
+
}
|
|
1157
|
+
// Clean orphaned jobIndex entries (pointing to invalid locations)
|
|
1158
|
+
// This is expensive so only run if index is large
|
|
1159
|
+
if (this.jobIndex.size > 100_000) {
|
|
1160
|
+
let orphanedCount = 0;
|
|
1161
|
+
for (const [jobId, loc] of this.jobIndex) {
|
|
1162
|
+
if (loc.type === 'processing') {
|
|
1163
|
+
const procIdx = processingShardIndex(String(jobId));
|
|
1164
|
+
if (!this.processingShards[procIdx].has(jobId)) {
|
|
1165
|
+
this.jobIndex.delete(jobId);
|
|
1166
|
+
orphanedCount++;
|
|
1167
|
+
}
|
|
1168
|
+
}
|
|
1169
|
+
else if (loc.type === 'queue') {
|
|
1170
|
+
// Check if job still exists in shard
|
|
1171
|
+
const shard = this.shards[loc.shardIdx];
|
|
1172
|
+
if (!shard.getQueue(loc.queueName).has(jobId)) {
|
|
1173
|
+
this.jobIndex.delete(jobId);
|
|
1174
|
+
orphanedCount++;
|
|
1175
|
+
}
|
|
1176
|
+
}
|
|
1177
|
+
}
|
|
1178
|
+
if (orphanedCount > 0) {
|
|
1179
|
+
queueLog.info('Cleaned orphaned jobIndex entries', { count: orphanedCount });
|
|
1180
|
+
}
|
|
1181
|
+
}
|
|
1182
|
+
// Clean orphaned job locks (locks for jobs no longer in processing)
|
|
1183
|
+
for (const jobId of this.jobLocks.keys()) {
|
|
1184
|
+
const loc = this.jobIndex.get(jobId);
|
|
1185
|
+
if (loc?.type !== 'processing') {
|
|
1186
|
+
this.jobLocks.delete(jobId);
|
|
1187
|
+
}
|
|
1188
|
+
}
|
|
1189
|
+
// Remove empty queues to free memory (like obliterate but only for empty queues)
|
|
1190
|
+
for (let i = 0; i < SHARD_COUNT; i++) {
|
|
1191
|
+
const shard = this.shards[i];
|
|
1192
|
+
const emptyQueues = [];
|
|
1193
|
+
for (const [queueName, queue] of shard.queues) {
|
|
1194
|
+
// Queue is empty and has no DLQ entries
|
|
1195
|
+
const dlqEntries = shard.dlq.get(queueName);
|
|
1196
|
+
if (queue.size === 0 && (!dlqEntries || dlqEntries.length === 0)) {
|
|
1197
|
+
emptyQueues.push(queueName);
|
|
1198
|
+
}
|
|
1199
|
+
}
|
|
1200
|
+
for (const queueName of emptyQueues) {
|
|
1201
|
+
shard.queues.delete(queueName);
|
|
1202
|
+
shard.dlq.delete(queueName);
|
|
1203
|
+
shard.uniqueKeys.delete(queueName);
|
|
1204
|
+
shard.queueState.delete(queueName);
|
|
1205
|
+
shard.activeGroups.delete(queueName);
|
|
1206
|
+
shard.rateLimiters.delete(queueName);
|
|
1207
|
+
shard.concurrencyLimiters.delete(queueName);
|
|
1208
|
+
shard.stallConfig.delete(queueName);
|
|
1209
|
+
shard.dlqConfig.delete(queueName);
|
|
1210
|
+
this.unregisterQueueName(queueName);
|
|
1211
|
+
}
|
|
1212
|
+
if (emptyQueues.length > 0) {
|
|
1213
|
+
queueLog.info('Removed empty queues', { shard: i, count: emptyQueues.length });
|
|
1214
|
+
}
|
|
1215
|
+
// Clean orphaned temporal index entries (memory leak fix)
|
|
1216
|
+
const cleanedTemporal = shard.cleanOrphanedTemporalEntries();
|
|
1217
|
+
if (cleanedTemporal > 0) {
|
|
1218
|
+
queueLog.info('Cleaned orphaned temporal entries', { shard: i, count: cleanedTemporal });
|
|
1219
|
+
}
|
|
1220
|
+
}
|
|
789
1221
|
}
|
|
790
1222
|
// ============ Lifecycle ============
|
|
791
1223
|
shutdown() {
|
|
@@ -802,6 +1234,8 @@ export class QueueManager {
|
|
|
802
1234
|
clearInterval(this.stallCheckInterval);
|
|
803
1235
|
if (this.dlqMaintenanceInterval)
|
|
804
1236
|
clearInterval(this.dlqMaintenanceInterval);
|
|
1237
|
+
if (this.lockCheckInterval)
|
|
1238
|
+
clearInterval(this.lockCheckInterval);
|
|
805
1239
|
this.storage?.close();
|
|
806
1240
|
// Clear in-memory collections
|
|
807
1241
|
this.jobIndex.clear();
|
|
@@ -811,6 +1245,9 @@ export class QueueManager {
|
|
|
811
1245
|
this.customIdMap.clear();
|
|
812
1246
|
this.pendingDepChecks.clear();
|
|
813
1247
|
this.queueNamesCache.clear();
|
|
1248
|
+
this.jobLocks.clear();
|
|
1249
|
+
this.stalledCandidates.clear();
|
|
1250
|
+
this.clientJobs.clear();
|
|
814
1251
|
for (const shard of this.processingShards) {
|
|
815
1252
|
shard.clear();
|
|
816
1253
|
}
|
|
@@ -851,5 +1288,77 @@ export class QueueManager {
|
|
|
851
1288
|
cronPending: cronStats.pending,
|
|
852
1289
|
};
|
|
853
1290
|
}
|
|
1291
|
+
/**
|
|
1292
|
+
* Get detailed memory statistics for debugging memory issues.
|
|
1293
|
+
* Returns counts of entries in all major collections.
|
|
1294
|
+
*/
|
|
1295
|
+
getMemoryStats() {
|
|
1296
|
+
let processingTotal = 0;
|
|
1297
|
+
let queuedTotal = 0;
|
|
1298
|
+
let waitingDepsTotal = 0;
|
|
1299
|
+
let temporalIndexTotal = 0;
|
|
1300
|
+
let delayedHeapTotal = 0;
|
|
1301
|
+
for (let i = 0; i < SHARD_COUNT; i++) {
|
|
1302
|
+
processingTotal += this.processingShards[i].size;
|
|
1303
|
+
const shardStats = this.shards[i].getStats();
|
|
1304
|
+
queuedTotal += shardStats.queuedJobs;
|
|
1305
|
+
waitingDepsTotal += this.shards[i].waitingDeps.size;
|
|
1306
|
+
// Get internal structure sizes
|
|
1307
|
+
const internalSizes = this.shards[i].getInternalSizes();
|
|
1308
|
+
temporalIndexTotal += internalSizes.temporalIndex;
|
|
1309
|
+
delayedHeapTotal += internalSizes.delayedHeap;
|
|
1310
|
+
}
|
|
1311
|
+
// Count total jobs across all clients
|
|
1312
|
+
let clientJobsTotal = 0;
|
|
1313
|
+
for (const jobs of this.clientJobs.values()) {
|
|
1314
|
+
clientJobsTotal += jobs.size;
|
|
1315
|
+
}
|
|
1316
|
+
return {
|
|
1317
|
+
jobIndex: this.jobIndex.size,
|
|
1318
|
+
completedJobs: this.completedJobs.size,
|
|
1319
|
+
jobResults: this.jobResults.size,
|
|
1320
|
+
jobLogs: this.jobLogs.size,
|
|
1321
|
+
customIdMap: this.customIdMap.size,
|
|
1322
|
+
jobLocks: this.jobLocks.size,
|
|
1323
|
+
clientJobs: this.clientJobs.size,
|
|
1324
|
+
clientJobsTotal,
|
|
1325
|
+
pendingDepChecks: this.pendingDepChecks.size,
|
|
1326
|
+
stalledCandidates: this.stalledCandidates.size,
|
|
1327
|
+
processingTotal,
|
|
1328
|
+
queuedTotal,
|
|
1329
|
+
waitingDepsTotal,
|
|
1330
|
+
temporalIndexTotal,
|
|
1331
|
+
delayedHeapTotal,
|
|
1332
|
+
};
|
|
1333
|
+
}
|
|
1334
|
+
/**
|
|
1335
|
+
* Force compact all collections to reduce memory usage.
|
|
1336
|
+
* Use after large batch operations or when memory pressure is high.
|
|
1337
|
+
*/
|
|
1338
|
+
compactMemory() {
|
|
1339
|
+
// Compact priority queues that have high stale ratios
|
|
1340
|
+
for (let i = 0; i < SHARD_COUNT; i++) {
|
|
1341
|
+
for (const q of this.shards[i].queues.values()) {
|
|
1342
|
+
if (q.needsCompaction(0.1)) {
|
|
1343
|
+
// More aggressive: 10% stale threshold
|
|
1344
|
+
q.compact();
|
|
1345
|
+
}
|
|
1346
|
+
}
|
|
1347
|
+
}
|
|
1348
|
+
// Clean up empty client tracking entries
|
|
1349
|
+
for (const [clientId, jobs] of this.clientJobs) {
|
|
1350
|
+
if (jobs.size === 0) {
|
|
1351
|
+
this.clientJobs.delete(clientId);
|
|
1352
|
+
}
|
|
1353
|
+
}
|
|
1354
|
+
// Clean orphaned job locks (jobs no longer in processing)
|
|
1355
|
+
for (const jobId of this.jobLocks.keys()) {
|
|
1356
|
+
const loc = this.jobIndex.get(jobId);
|
|
1357
|
+
if (loc?.type !== 'processing') {
|
|
1358
|
+
this.jobLocks.delete(jobId);
|
|
1359
|
+
}
|
|
1360
|
+
}
|
|
1361
|
+
queueLog.info('Memory compacted');
|
|
1362
|
+
}
|
|
854
1363
|
}
|
|
855
1364
|
//# sourceMappingURL=queueManager.js.map
|