bunqueue 2.8.5 → 2.8.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/application/operations/ack.d.ts +1 -1
- package/dist/application/operations/ack.js +2 -2
- package/dist/application/queueManager.d.ts +1 -1
- package/dist/application/queueManager.js +2 -2
- package/dist/application/statsManager.js +18 -8
- package/dist/client/queue/dlq.js +1 -1
- package/dist/client/queue/operations/management.js +4 -2
- package/dist/client/queue/queue.js +2 -0
- package/dist/client/queue/scheduler.js +5 -0
- package/dist/client/tcp/client.d.ts +9 -0
- package/dist/client/tcp/client.js +38 -3
- package/dist/client/tcp/connection.js +11 -0
- package/dist/client/tcp/health.d.ts +14 -0
- package/dist/client/tcp/health.js +24 -0
- package/dist/client/tcp/types.d.ts +10 -0
- package/dist/client/tcp/types.js +1 -0
- package/dist/client/tcpPool.js +2 -0
- package/dist/client/types.d.ts +6 -0
- package/dist/client/worker/worker.js +7 -2
- package/dist/client/worker/workerPull.d.ts +2 -0
- package/dist/client/worker/workerPull.js +12 -5
- package/dist/domain/types/command.d.ts +4 -0
- package/dist/infrastructure/server/handlers/advanced.js +60 -8
- package/dist/infrastructure/server/handlers/core.js +1 -1
- package/dist/infrastructure/server/handlers/cron.js +1 -0
- package/dist/infrastructure/server/handlers/monitoring.js +7 -2
- package/dist/infrastructure/server/http.js +20 -6
- package/dist/infrastructure/server/httpRouteJobs.js +14 -2
- package/dist/infrastructure/server/httpRouteQueueConfig.js +19 -3
- package/dist/infrastructure/server/httpRouteQueues.js +13 -1
- package/dist/infrastructure/server/httpRouteResources.js +4 -0
- package/package.json +1 -1
|
@@ -57,7 +57,7 @@ export declare function ackJob(jobId: JobId, result: unknown, ctx: AckContext):
|
|
|
57
57
|
/**
|
|
58
58
|
* Mark job as failed
|
|
59
59
|
*/
|
|
60
|
-
export declare function failJob(jobId: JobId, error: string | undefined, ctx: AckContext): Promise<void>;
|
|
60
|
+
export declare function failJob(jobId: JobId, error: string | undefined, ctx: AckContext, unrecoverable?: boolean): Promise<void>;
|
|
61
61
|
/**
|
|
62
62
|
* Acknowledge multiple jobs - optimized batch processing
|
|
63
63
|
* Groups jobs by shard to minimize lock acquisitions: O(shards) instead of O(n)
|
|
@@ -112,7 +112,7 @@ function moveFailedJobToDlq(job, jobId, error, shard, ctx) {
|
|
|
112
112
|
/**
|
|
113
113
|
* Mark job as failed
|
|
114
114
|
*/
|
|
115
|
-
export async function failJob(jobId, error, ctx) {
|
|
115
|
+
export async function failJob(jobId, error, ctx, unrecoverable = false) {
|
|
116
116
|
const procIdx = processingShardIndex(jobId);
|
|
117
117
|
const job = await withWriteLock(ctx.processingLocks[procIdx], () => {
|
|
118
118
|
const job = ctx.processingShards[procIdx].get(jobId);
|
|
@@ -134,7 +134,7 @@ export async function failJob(jobId, error, ctx) {
|
|
|
134
134
|
await withWriteLock(ctx.shardLocks[idx], () => {
|
|
135
135
|
const shard = ctx.shards[idx];
|
|
136
136
|
shard.releaseJobResources(job.queue, job.uniqueKey, job.groupId);
|
|
137
|
-
if (canRetry(job)) {
|
|
137
|
+
if (!unrecoverable && canRetry(job)) {
|
|
138
138
|
const now = Date.now();
|
|
139
139
|
job.runAt = now + calculateBackoff(job);
|
|
140
140
|
shard.getQueue(job.queue).push(job);
|
|
@@ -79,7 +79,7 @@ export declare class QueueManager {
|
|
|
79
79
|
result: unknown;
|
|
80
80
|
token?: string;
|
|
81
81
|
}>): Promise<void>;
|
|
82
|
-
fail(jobId: JobId, error?: string, token?: string): Promise<void>;
|
|
82
|
+
fail(jobId: JobId, error?: string, token?: string, unrecoverable?: boolean): Promise<void>;
|
|
83
83
|
/**
|
|
84
84
|
* Check if a failed lock verification is a genuine ownership conflict.
|
|
85
85
|
* If the job is still in processing with a different lock, throw.
|
|
@@ -358,7 +358,7 @@ export class QueueManager {
|
|
|
358
358
|
lockMgr.releaseLock(item.id, lockCtx, item.token);
|
|
359
359
|
}
|
|
360
360
|
}
|
|
361
|
-
async fail(jobId, error, token) {
|
|
361
|
+
async fail(jobId, error, token, unrecoverable = false) {
|
|
362
362
|
const lockCtx = this.contextFactory.getLockContext();
|
|
363
363
|
if (token && !lockMgr.verifyLock(jobId, token, lockCtx)) {
|
|
364
364
|
this.throwIfOwnershipConflict(jobId, lockCtx);
|
|
@@ -367,7 +367,7 @@ export class QueueManager {
|
|
|
367
367
|
return;
|
|
368
368
|
}
|
|
369
369
|
try {
|
|
370
|
-
await failJob(jobId, error, this.contextFactory.getAckContext());
|
|
370
|
+
await failJob(jobId, error, this.contextFactory.getAckContext(), unrecoverable);
|
|
371
371
|
}
|
|
372
372
|
catch (err) {
|
|
373
373
|
// Job removed from processing by stall detection. The stall retry
|
|
@@ -3,6 +3,16 @@
|
|
|
3
3
|
* Provides system metrics and memory compaction utilities
|
|
4
4
|
*/
|
|
5
5
|
import { SHARD_COUNT, shardIndex } from '../shared/hash';
|
|
6
|
+
/** Count jobs belonging to `queueName` across one or more job iterables. */
|
|
7
|
+
function countByQueue(sources, queueName) {
|
|
8
|
+
let count = 0;
|
|
9
|
+
for (const src of sources) {
|
|
10
|
+
for (const job of src)
|
|
11
|
+
if (job.queue === queueName)
|
|
12
|
+
count++;
|
|
13
|
+
}
|
|
14
|
+
return count;
|
|
15
|
+
}
|
|
6
16
|
/**
|
|
7
17
|
* Get queue statistics - uses running counters + priority scan
|
|
8
18
|
*/
|
|
@@ -14,7 +24,10 @@ export function getStats(ctx, cronScheduler) {
|
|
|
14
24
|
delayed += shardStats.delayedJobs;
|
|
15
25
|
dlq += shardStats.dlqJobs;
|
|
16
26
|
active += ctx.processingShards[i].size;
|
|
17
|
-
waitingChildren
|
|
27
|
+
// getJobState reports BOTH waitingChildren (flow parents) and waitingDeps
|
|
28
|
+
// (jobs blocked on dependsOn) as state 'waiting-children', and getJobs lists
|
|
29
|
+
// both — so the count must include both or it undercounts vs state/list (#95 class).
|
|
30
|
+
waitingChildren += ctx.shards[i].waitingChildren.size + ctx.shards[i].waitingDeps.size;
|
|
18
31
|
// Scan queues to split waiting vs prioritized (BullMQ v5 compat)
|
|
19
32
|
for (const queue of ctx.shards[i].queues.values()) {
|
|
20
33
|
for (const job of queue.values()) {
|
|
@@ -174,13 +187,10 @@ export function getQueueJobCounts(queueName, ctx) {
|
|
|
174
187
|
}
|
|
175
188
|
// Count failed (DLQ) jobs for this queue
|
|
176
189
|
const failed = shard.getDlq(queueName).length;
|
|
177
|
-
// Count waiting-children jobs
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
waitingChildrenCount++;
|
|
182
|
-
}
|
|
183
|
-
}
|
|
190
|
+
// Count waiting-children jobs. getJobState/getJobs treat BOTH waitingChildren
|
|
191
|
+
// (flow parents) and waitingDeps (jobs blocked on dependsOn) as 'waiting-children',
|
|
192
|
+
// so count both to stay consistent with state/list (#95 class).
|
|
193
|
+
const waitingChildrenCount = countByQueue([shard.waitingChildren.values(), shard.waitingDeps.values()], queueName);
|
|
184
194
|
// Per-queue cumulative counters
|
|
185
195
|
const perQueue = ctx.perQueueMetrics?.get(queueName);
|
|
186
196
|
const totalCompleted = Number(perQueue?.totalCompleted ?? 0n);
|
package/dist/client/queue/dlq.js
CHANGED
|
@@ -64,7 +64,7 @@ export function retryDlq(ctx, id) {
|
|
|
64
64
|
if (ctx.embedded)
|
|
65
65
|
return dlqOps.retryDlqEmbedded(ctx.name, id);
|
|
66
66
|
if (ctx.tcp)
|
|
67
|
-
void ctx.tcp.send({ cmd: 'RetryDlq', queue: ctx.name, id });
|
|
67
|
+
void ctx.tcp.send({ cmd: 'RetryDlq', queue: ctx.name, jobId: id });
|
|
68
68
|
return 0;
|
|
69
69
|
}
|
|
70
70
|
/** Retry DLQ entries by filter */
|
|
@@ -77,7 +77,8 @@ export async function cleanAsync(ctx, grace, limit, type) {
|
|
|
77
77
|
queue: ctx.name,
|
|
78
78
|
grace,
|
|
79
79
|
limit,
|
|
80
|
-
type
|
|
80
|
+
// Handler reads `state`; sending `type` made the state filter a no-op.
|
|
81
|
+
state: type,
|
|
81
82
|
});
|
|
82
83
|
if (!response.ok)
|
|
83
84
|
return [];
|
|
@@ -107,7 +108,8 @@ export async function promoteJobs(ctx, opts) {
|
|
|
107
108
|
});
|
|
108
109
|
if (!response.ok)
|
|
109
110
|
return 0;
|
|
110
|
-
|
|
111
|
+
// Handler returns `count`; reading `promoted` always yielded 0.
|
|
112
|
+
return (response.count ?? 0);
|
|
111
113
|
}
|
|
112
114
|
/** Promote a single job */
|
|
113
115
|
export async function promoteJob(ctx, id) {
|
|
@@ -63,6 +63,7 @@ export class Queue {
|
|
|
63
63
|
poolSize,
|
|
64
64
|
pingInterval: connOpts.pingInterval,
|
|
65
65
|
commandTimeout: connOpts.commandTimeout,
|
|
66
|
+
maxCommandTimeouts: connOpts.maxCommandTimeouts,
|
|
66
67
|
pipelining: connOpts.pipelining,
|
|
67
68
|
maxInFlight: connOpts.maxInFlight,
|
|
68
69
|
});
|
|
@@ -76,6 +77,7 @@ export class Queue {
|
|
|
76
77
|
poolSize,
|
|
77
78
|
pingInterval: connOpts.pingInterval,
|
|
78
79
|
commandTimeout: connOpts.commandTimeout,
|
|
80
|
+
maxCommandTimeouts: connOpts.maxCommandTimeouts,
|
|
79
81
|
pipelining: connOpts.pipelining,
|
|
80
82
|
maxInFlight: connOpts.maxInFlight,
|
|
81
83
|
});
|
|
@@ -65,6 +65,9 @@ export async function upsertJobScheduler(ctx, schedulerId, repeatOpts, jobTempla
|
|
|
65
65
|
const dedupFields = buildCronDedup(jobTemplate);
|
|
66
66
|
const jobOptions = buildCronJobOptions(ctx.defaultJobOptions, jobTemplate);
|
|
67
67
|
const cronName = toCronName(ctx, schedulerId);
|
|
68
|
+
// Priority of spawned jobs: carried on the top-level Cron field (the handler
|
|
69
|
+
// reads cmd.priority), which buildCronJobOptions does not cover.
|
|
70
|
+
const priority = jobTemplate?.opts?.priority ?? ctx.defaultJobOptions?.priority;
|
|
68
71
|
if (ctx.embedded) {
|
|
69
72
|
const manager = getSharedManager();
|
|
70
73
|
manager.addCron({
|
|
@@ -73,6 +76,7 @@ export async function upsertJobScheduler(ctx, schedulerId, repeatOpts, jobTempla
|
|
|
73
76
|
data,
|
|
74
77
|
schedule: cronPattern,
|
|
75
78
|
repeatEvery,
|
|
79
|
+
priority,
|
|
76
80
|
timezone: repeatOpts.timezone ?? 'UTC',
|
|
77
81
|
skipMissedOnRestart: repeatOpts.skipMissedOnRestart,
|
|
78
82
|
immediately: repeatOpts.immediately,
|
|
@@ -94,6 +98,7 @@ export async function upsertJobScheduler(ctx, schedulerId, repeatOpts, jobTempla
|
|
|
94
98
|
data,
|
|
95
99
|
schedule: cronPattern,
|
|
96
100
|
repeatEvery,
|
|
101
|
+
priority,
|
|
97
102
|
timezone: repeatOpts.timezone,
|
|
98
103
|
skipMissedOnRestart: repeatOpts.skipMissedOnRestart,
|
|
99
104
|
immediately: repeatOpts.immediately,
|
|
@@ -73,6 +73,15 @@ export declare class TcpClient extends EventEmitter {
|
|
|
73
73
|
/** Send ping to check connection health */
|
|
74
74
|
ping(): Promise<boolean>;
|
|
75
75
|
private handlePingFailure;
|
|
76
|
+
/**
|
|
77
|
+
* A command timed out. On a half-open socket (peer gone, no FIN/RST) writes
|
|
78
|
+
* keep succeeding but no response ever returns — every command times out
|
|
79
|
+
* while the socket still looks "connected". The health-check ping is one way
|
|
80
|
+
* to notice, but it can be disabled or slower than real traffic, leaving a
|
|
81
|
+
* worker's PULL loop to time out forever without ever reconnecting (#94).
|
|
82
|
+
* Treat a sustained run of timeouts as a dead link and force a reconnect.
|
|
83
|
+
*/
|
|
84
|
+
private handleCommandTimeout;
|
|
76
85
|
private forceReconnect;
|
|
77
86
|
/** Get connection health metrics */
|
|
78
87
|
getHealth(): ConnectionHealth;
|
|
@@ -79,6 +79,7 @@ export class TcpClient extends EventEmitter {
|
|
|
79
79
|
this.health = new HealthTracker({
|
|
80
80
|
pingInterval: this.options.pingInterval,
|
|
81
81
|
maxPingFailures: this.options.maxPingFailures,
|
|
82
|
+
maxCommandTimeouts: this.options.maxCommandTimeouts,
|
|
82
83
|
});
|
|
83
84
|
this.reconnect = new ReconnectManager({
|
|
84
85
|
maxReconnectAttempts: this.options.maxReconnectAttempts,
|
|
@@ -258,15 +259,44 @@ export class TcpClient extends EventEmitter {
|
|
|
258
259
|
this.emit('health', { type: 'ping_failed' });
|
|
259
260
|
}
|
|
260
261
|
}
|
|
262
|
+
/**
|
|
263
|
+
* A command timed out. On a half-open socket (peer gone, no FIN/RST) writes
|
|
264
|
+
* keep succeeding but no response ever returns — every command times out
|
|
265
|
+
* while the socket still looks "connected". The health-check ping is one way
|
|
266
|
+
* to notice, but it can be disabled or slower than real traffic, leaving a
|
|
267
|
+
* worker's PULL loop to time out forever without ever reconnecting (#94).
|
|
268
|
+
* Treat a sustained run of timeouts as a dead link and force a reconnect.
|
|
269
|
+
*/
|
|
270
|
+
handleCommandTimeout() {
|
|
271
|
+
if (this.health.recordCommandTimeout()) {
|
|
272
|
+
this.emit('health', { type: 'unhealthy', reason: 'max_command_timeouts' });
|
|
273
|
+
this.forceReconnect();
|
|
274
|
+
}
|
|
275
|
+
}
|
|
261
276
|
forceReconnect() {
|
|
262
277
|
if (this.reconnect.isClosed())
|
|
263
278
|
return;
|
|
264
279
|
if (this.socket) {
|
|
265
|
-
|
|
280
|
+
// end() can throw on an already-errored/half-dead socket. Swallow it:
|
|
281
|
+
// failing to close the corpse must NOT abort the reconnect path below,
|
|
282
|
+
// or the connection would stay wedged forever (the #94 failure mode).
|
|
283
|
+
try {
|
|
284
|
+
this.socket.end();
|
|
285
|
+
}
|
|
286
|
+
catch {
|
|
287
|
+
/* socket already torn down */
|
|
288
|
+
}
|
|
266
289
|
this.socket = null;
|
|
267
290
|
}
|
|
268
291
|
this.connected = false;
|
|
269
292
|
this.health.stopPing();
|
|
293
|
+
// Settle every in-flight/queued command NOW. Otherwise their per-command
|
|
294
|
+
// timeouts keep ticking and fire AFTER the fresh socket is up — each stale
|
|
295
|
+
// timeout bumps the new connection's dead-link counter and can re-trigger
|
|
296
|
+
// forceReconnect in a loop (a reconnect storm that never stabilises). It
|
|
297
|
+
// also unblocks awaiting callers (e.g. a Worker's PULL) immediately instead
|
|
298
|
+
// of making them wait out the full commandTimeout on a corpse.
|
|
299
|
+
this.commands.rejectAll(new Error('Connection lost'));
|
|
270
300
|
if (this.reconnect.canReconnect())
|
|
271
301
|
this.reconnect.scheduleReconnect(() => this.connect());
|
|
272
302
|
}
|
|
@@ -343,6 +373,8 @@ export class TcpClient extends EventEmitter {
|
|
|
343
373
|
if (removed) {
|
|
344
374
|
this.health.recordError();
|
|
345
375
|
next.reject(new Error('Command timeout'));
|
|
376
|
+
// In-flight command got no response: count it toward dead-link detection.
|
|
377
|
+
this.handleCommandTimeout();
|
|
346
378
|
}
|
|
347
379
|
}, this.options.commandTimeout);
|
|
348
380
|
next.timeout = newTimeout;
|
|
@@ -365,17 +397,20 @@ export class TcpClient extends EventEmitter {
|
|
|
365
397
|
let pendingRef;
|
|
366
398
|
const promise = new Promise((resolve, reject) => {
|
|
367
399
|
const timeout = setTimeout(() => {
|
|
368
|
-
// Try to remove from queue first
|
|
400
|
+
// Try to remove from queue first. A still-queued command never reached
|
|
401
|
+
// the socket (e.g. waiting on connect), so it is NOT evidence of a dead
|
|
402
|
+
// link — reject it but don't count it toward dead-link detection.
|
|
369
403
|
if (this.commands.remove(id)) {
|
|
370
404
|
this.health.recordError();
|
|
371
405
|
reject(new Error('Command timeout'));
|
|
372
406
|
return;
|
|
373
407
|
}
|
|
374
|
-
// Try to remove from in-flight
|
|
408
|
+
// Try to remove from in-flight: this one WAS sent and got no response.
|
|
375
409
|
const removed = this.commands.removeByReqId(reqId);
|
|
376
410
|
if (removed) {
|
|
377
411
|
this.health.recordError();
|
|
378
412
|
reject(new Error('Command timeout'));
|
|
413
|
+
this.handleCommandTimeout();
|
|
379
414
|
}
|
|
380
415
|
}, this.options.commandTimeout);
|
|
381
416
|
pendingRef = {
|
|
@@ -42,6 +42,17 @@ export async function createConnection(target, connectTimeout, events) {
|
|
|
42
42
|
},
|
|
43
43
|
open(sock) {
|
|
44
44
|
cleanup();
|
|
45
|
+
// Enable TCP keepalive so the OS probes idle connections and surfaces a
|
|
46
|
+
// dead peer (suspended host, NAT/LB drop) via an error/close event,
|
|
47
|
+
// instead of a half-open socket lingering until tcp_retries2 (~15 min).
|
|
48
|
+
// Best-effort: not all platforms honor the delay, and older Bun builds
|
|
49
|
+
// may lack the method — never let it abort connection setup. See #94.
|
|
50
|
+
try {
|
|
51
|
+
sock.setKeepAlive?.(true, 15000);
|
|
52
|
+
}
|
|
53
|
+
catch {
|
|
54
|
+
/* keepalive unsupported on this platform/runtime */
|
|
55
|
+
}
|
|
45
56
|
socketData.write = (d) => sock.write(d);
|
|
46
57
|
socketData.end = () => sock.end();
|
|
47
58
|
connectionResolved = true;
|
|
@@ -7,6 +7,11 @@ import type { ConnectionHealth } from './types';
|
|
|
7
7
|
export interface HealthConfig {
|
|
8
8
|
pingInterval: number;
|
|
9
9
|
maxPingFailures: number;
|
|
10
|
+
/**
|
|
11
|
+
* Consecutive command timeouts before the link is concluded dead (0 = off).
|
|
12
|
+
* Optional for backward compatibility; defaults to 3 when omitted.
|
|
13
|
+
*/
|
|
14
|
+
maxCommandTimeouts?: number;
|
|
10
15
|
}
|
|
11
16
|
/**
|
|
12
17
|
* Tracks connection health metrics
|
|
@@ -14,6 +19,7 @@ export interface HealthConfig {
|
|
|
14
19
|
export declare class HealthTracker {
|
|
15
20
|
private readonly config;
|
|
16
21
|
private consecutivePingFailures;
|
|
22
|
+
private consecutiveCommandTimeouts;
|
|
17
23
|
private lastSuccessAt;
|
|
18
24
|
private lastErrorAt;
|
|
19
25
|
private connectedAt;
|
|
@@ -35,6 +41,14 @@ export declare class HealthTracker {
|
|
|
35
41
|
recordPingSuccess(latencyMs: number): void;
|
|
36
42
|
/** Record ping failure, returns true if max failures reached */
|
|
37
43
|
recordPingFailure(): boolean;
|
|
44
|
+
/**
|
|
45
|
+
* Record a command timeout. Returns true when the configured consecutive
|
|
46
|
+
* threshold is reached (and the feature is enabled), signalling the caller to
|
|
47
|
+
* force a reconnect. Any intervening success resets the counter, so this only
|
|
48
|
+
* fires on a sustained run of timeouts — the signature of a dead/half-open
|
|
49
|
+
* socket where writes succeed but no response ever comes back.
|
|
50
|
+
*/
|
|
51
|
+
recordCommandTimeout(): boolean;
|
|
38
52
|
/** Get current health metrics */
|
|
39
53
|
getHealth(state: 'connected' | 'connecting' | 'disconnected' | 'closed'): ConnectionHealth;
|
|
40
54
|
/** Start ping timer */
|
|
@@ -2,12 +2,15 @@
|
|
|
2
2
|
* TCP Health Tracker
|
|
3
3
|
* Monitors connection health with ping and latency tracking
|
|
4
4
|
*/
|
|
5
|
+
/** Default consecutive command-timeout threshold when not configured. */
|
|
6
|
+
const DEFAULT_MAX_COMMAND_TIMEOUTS = 3;
|
|
5
7
|
/**
|
|
6
8
|
* Tracks connection health metrics
|
|
7
9
|
*/
|
|
8
10
|
export class HealthTracker {
|
|
9
11
|
config;
|
|
10
12
|
consecutivePingFailures = 0;
|
|
13
|
+
consecutiveCommandTimeouts = 0;
|
|
11
14
|
lastSuccessAt = null;
|
|
12
15
|
lastErrorAt = null;
|
|
13
16
|
connectedAt = null;
|
|
@@ -23,6 +26,9 @@ export class HealthTracker {
|
|
|
23
26
|
recordSuccess(latencyMs) {
|
|
24
27
|
this.lastSuccessAt = Date.now();
|
|
25
28
|
this.totalCommands++;
|
|
29
|
+
// A real response proves the link is alive: the prior timeouts were not a
|
|
30
|
+
// sustained run, so reset the dead-link counter ("consecutive" must mean it).
|
|
31
|
+
this.consecutiveCommandTimeouts = 0;
|
|
26
32
|
this.recordLatency(latencyMs);
|
|
27
33
|
}
|
|
28
34
|
/** Record command error */
|
|
@@ -38,10 +44,13 @@ export class HealthTracker {
|
|
|
38
44
|
recordConnected() {
|
|
39
45
|
this.connectedAt = Date.now();
|
|
40
46
|
this.consecutivePingFailures = 0;
|
|
47
|
+
this.consecutiveCommandTimeouts = 0;
|
|
41
48
|
}
|
|
42
49
|
/** Record ping success */
|
|
43
50
|
recordPingSuccess(latencyMs) {
|
|
51
|
+
// A successful ping is also proof the link is alive — clear both suspicions.
|
|
44
52
|
this.consecutivePingFailures = 0;
|
|
53
|
+
this.consecutiveCommandTimeouts = 0;
|
|
45
54
|
this.recordLatency(latencyMs);
|
|
46
55
|
}
|
|
47
56
|
/** Record ping failure, returns true if max failures reached */
|
|
@@ -51,6 +60,20 @@ export class HealthTracker {
|
|
|
51
60
|
this.totalErrors++;
|
|
52
61
|
return this.consecutivePingFailures >= this.config.maxPingFailures;
|
|
53
62
|
}
|
|
63
|
+
/**
|
|
64
|
+
* Record a command timeout. Returns true when the configured consecutive
|
|
65
|
+
* threshold is reached (and the feature is enabled), signalling the caller to
|
|
66
|
+
* force a reconnect. Any intervening success resets the counter, so this only
|
|
67
|
+
* fires on a sustained run of timeouts — the signature of a dead/half-open
|
|
68
|
+
* socket where writes succeed but no response ever comes back.
|
|
69
|
+
*/
|
|
70
|
+
recordCommandTimeout() {
|
|
71
|
+
const max = this.config.maxCommandTimeouts ?? DEFAULT_MAX_COMMAND_TIMEOUTS;
|
|
72
|
+
if (max <= 0)
|
|
73
|
+
return false;
|
|
74
|
+
this.consecutiveCommandTimeouts++;
|
|
75
|
+
return this.consecutiveCommandTimeouts >= max;
|
|
76
|
+
}
|
|
54
77
|
/** Get current health metrics */
|
|
55
78
|
getHealth(state) {
|
|
56
79
|
const avgLatency = this.latencyHistory.length > 0
|
|
@@ -63,6 +86,7 @@ export class HealthTracker {
|
|
|
63
86
|
lastErrorAt: this.lastErrorAt,
|
|
64
87
|
avgLatencyMs: Math.round(avgLatency * 100) / 100,
|
|
65
88
|
consecutivePingFailures: this.consecutivePingFailures,
|
|
89
|
+
consecutiveCommandTimeouts: this.consecutiveCommandTimeouts,
|
|
66
90
|
totalCommands: this.totalCommands,
|
|
67
91
|
totalErrors: this.totalErrors,
|
|
68
92
|
uptimeMs: this.connectedAt ? Date.now() - this.connectedAt : 0,
|
|
@@ -26,6 +26,14 @@ export interface ConnectionOptions {
|
|
|
26
26
|
pingInterval?: number;
|
|
27
27
|
/** Max consecutive ping failures before forcing reconnect (default: 3) */
|
|
28
28
|
maxPingFailures?: number;
|
|
29
|
+
/**
|
|
30
|
+
* Max consecutive command timeouts (with no intervening success) before the
|
|
31
|
+
* connection is concluded dead and reconnect is forced (default: 3, 0 to
|
|
32
|
+
* disable). This is the recovery path for a half-open socket when the
|
|
33
|
+
* health-check ping is disabled or slower than real traffic — a worker whose
|
|
34
|
+
* PULLs keep timing out no longer stalls forever waiting on the ping. See #94.
|
|
35
|
+
*/
|
|
36
|
+
maxCommandTimeouts?: number;
|
|
29
37
|
/** Enable pipelining - multiple commands in flight (default: true) */
|
|
30
38
|
pipelining?: boolean;
|
|
31
39
|
/** Max commands in flight when pipelining (default: 100) */
|
|
@@ -45,6 +53,8 @@ export interface ConnectionHealth {
|
|
|
45
53
|
avgLatencyMs: number;
|
|
46
54
|
/** Consecutive ping failures */
|
|
47
55
|
consecutivePingFailures: number;
|
|
56
|
+
/** Consecutive command timeouts with no intervening success */
|
|
57
|
+
consecutiveCommandTimeouts: number;
|
|
48
58
|
/** Total commands sent */
|
|
49
59
|
totalCommands: number;
|
|
50
60
|
/** Total errors */
|
package/dist/client/tcp/types.js
CHANGED
package/dist/client/tcpPool.js
CHANGED
|
@@ -29,6 +29,7 @@ export class TcpConnectionPool {
|
|
|
29
29
|
autoReconnect: options.autoReconnect ?? true,
|
|
30
30
|
pingInterval: options.pingInterval ?? 30000,
|
|
31
31
|
maxPingFailures: options.maxPingFailures ?? 3,
|
|
32
|
+
maxCommandTimeouts: options.maxCommandTimeouts ?? 3,
|
|
32
33
|
pipelining: options.pipelining ?? true,
|
|
33
34
|
maxInFlight: options.maxInFlight ?? 100,
|
|
34
35
|
};
|
|
@@ -46,6 +47,7 @@ export class TcpConnectionPool {
|
|
|
46
47
|
autoReconnect: this.options.autoReconnect,
|
|
47
48
|
pingInterval: this.options.pingInterval,
|
|
48
49
|
maxPingFailures: this.options.maxPingFailures,
|
|
50
|
+
maxCommandTimeouts: this.options.maxCommandTimeouts,
|
|
49
51
|
});
|
|
50
52
|
this.clients.push(client);
|
|
51
53
|
}
|
package/dist/client/types.d.ts
CHANGED
|
@@ -376,6 +376,12 @@ export interface ConnectionOptions {
|
|
|
376
376
|
pingInterval?: number;
|
|
377
377
|
/** Command timeout in ms (default: 30000) */
|
|
378
378
|
commandTimeout?: number;
|
|
379
|
+
/**
|
|
380
|
+
* Consecutive command timeouts (no intervening success) before the connection
|
|
381
|
+
* is concluded dead and a reconnect is forced (default: 3, 0 to disable).
|
|
382
|
+
* Recovery path for a half-open socket independent of the health-check ping. See #94.
|
|
383
|
+
*/
|
|
384
|
+
maxCommandTimeouts?: number;
|
|
379
385
|
/** Enable TCP pipelining (default: true) */
|
|
380
386
|
pipelining?: boolean;
|
|
381
387
|
/** Max commands in flight per connection (default: 100) */
|
|
@@ -47,6 +47,7 @@ function createTcpPool(opts, concurrency) {
|
|
|
47
47
|
poolSize,
|
|
48
48
|
pingInterval: connOpts.pingInterval,
|
|
49
49
|
commandTimeout: connOpts.commandTimeout,
|
|
50
|
+
maxCommandTimeouts: connOpts.maxCommandTimeouts,
|
|
50
51
|
pipelining: connOpts.pipelining,
|
|
51
52
|
maxInFlight: connOpts.maxInFlight,
|
|
52
53
|
});
|
|
@@ -397,9 +398,12 @@ export class Worker extends EventEmitter {
|
|
|
397
398
|
cmd: 'ExtendLocks',
|
|
398
399
|
ids: jobIds,
|
|
399
400
|
tokens,
|
|
400
|
-
|
|
401
|
+
// Protocol expects a per-id `durations` array, and the handler returns
|
|
402
|
+
// `count` (not `extended`). Sending `duration`/reading `extended` made
|
|
403
|
+
// batch lock renewal silently keep the old TTL.
|
|
404
|
+
durations: jobIds.map(() => duration),
|
|
401
405
|
});
|
|
402
|
-
const extended = response.
|
|
406
|
+
const extended = response.count;
|
|
403
407
|
return extended ?? 0;
|
|
404
408
|
}
|
|
405
409
|
// ============ Lifecycle ============
|
|
@@ -734,6 +738,7 @@ export class Worker extends EventEmitter {
|
|
|
734
738
|
workerId: this.workerId,
|
|
735
739
|
useLocks: this.opts.useLocks,
|
|
736
740
|
pollTimeout: this.opts.pollTimeout,
|
|
741
|
+
lockDuration: this.opts.lockDuration,
|
|
737
742
|
};
|
|
738
743
|
}
|
|
739
744
|
/** Apply worker-level removeOnComplete/removeOnFail defaults to a job */
|
|
@@ -9,6 +9,8 @@ export interface PullConfig {
|
|
|
9
9
|
readonly workerId: string;
|
|
10
10
|
readonly useLocks: boolean;
|
|
11
11
|
readonly pollTimeout: number;
|
|
12
|
+
/** Lock TTL in ms to request from the server on a lock-based pull. */
|
|
13
|
+
readonly lockDuration?: number;
|
|
12
14
|
}
|
|
13
15
|
export declare function pullEmbedded(config: PullConfig, count: number): Promise<Array<{
|
|
14
16
|
job: InternalJob;
|
|
@@ -6,13 +6,14 @@ import { getSharedManager } from '../manager';
|
|
|
6
6
|
import { parseJobFromResponse } from './jobParser';
|
|
7
7
|
export async function pullEmbedded(config, count) {
|
|
8
8
|
const manager = getSharedManager();
|
|
9
|
-
// Use lock-based pull only when useLocks is enabled
|
|
9
|
+
// Use lock-based pull only when useLocks is enabled. Pass lockDuration so the
|
|
10
|
+
// configured lock TTL is honored in embedded mode too (undefined → server default).
|
|
10
11
|
if (config.useLocks) {
|
|
11
12
|
if (count === 1) {
|
|
12
|
-
const { job, token } = await manager.pullWithLock(config.name, config.workerId, 0);
|
|
13
|
+
const { job, token } = await manager.pullWithLock(config.name, config.workerId, 0, config.lockDuration);
|
|
13
14
|
return job ? [{ job, token }] : [];
|
|
14
15
|
}
|
|
15
|
-
const { jobs, tokens } = await manager.pullBatchWithLock(config.name, count, config.workerId, 0);
|
|
16
|
+
const { jobs, tokens } = await manager.pullBatchWithLock(config.name, count, config.workerId, 0, config.lockDuration);
|
|
16
17
|
return jobs.map((job, i) => ({ job, token: tokens[i] || null }));
|
|
17
18
|
}
|
|
18
19
|
// No locks - use regular pull
|
|
@@ -26,16 +27,22 @@ export async function pullEmbedded(config, count) {
|
|
|
26
27
|
export async function pullTcp(config, tcp, count, closing) {
|
|
27
28
|
if (closing)
|
|
28
29
|
return [];
|
|
29
|
-
// Build pull command - only request locks if useLocks is enabled
|
|
30
|
+
// Build pull command - only request locks if useLocks is enabled.
|
|
31
|
+
// `count` belongs to the batch PULLB; a single PULL doesn't need it.
|
|
30
32
|
const cmd = {
|
|
31
33
|
cmd: count === 1 ? 'PULL' : 'PULLB',
|
|
32
34
|
queue: config.name,
|
|
33
35
|
timeout: config.pollTimeout,
|
|
34
|
-
count,
|
|
35
36
|
};
|
|
37
|
+
if (count > 1)
|
|
38
|
+
cmd.count = count;
|
|
36
39
|
// Only request lock ownership when useLocks is enabled
|
|
37
40
|
if (config.useLocks) {
|
|
38
41
|
cmd.owner = config.workerId;
|
|
42
|
+
// Propagate the configured lock TTL so the server doesn't always fall back
|
|
43
|
+
// to its 30s default (WorkerOptions.lockDuration was previously ignored).
|
|
44
|
+
if (config.lockDuration !== undefined)
|
|
45
|
+
cmd.lockTtl = config.lockDuration;
|
|
39
46
|
}
|
|
40
47
|
const response = await tcp.send(cmd);
|
|
41
48
|
if (!response.ok)
|
|
@@ -98,6 +98,8 @@ export interface FailCommand extends BaseCommand {
|
|
|
98
98
|
readonly id: string;
|
|
99
99
|
readonly error?: string;
|
|
100
100
|
readonly token?: string;
|
|
101
|
+
/** Skip all remaining retries and fail terminally (UnrecoverableError over TCP). */
|
|
102
|
+
readonly unrecoverable?: boolean;
|
|
101
103
|
}
|
|
102
104
|
export interface GetJobCommand extends BaseCommand {
|
|
103
105
|
readonly cmd: 'GetJob';
|
|
@@ -309,6 +311,8 @@ export interface AddLogCommand extends BaseCommand {
|
|
|
309
311
|
export interface GetLogsCommand extends BaseCommand {
|
|
310
312
|
readonly cmd: 'GetLogs';
|
|
311
313
|
readonly id: string;
|
|
314
|
+
readonly start?: number;
|
|
315
|
+
readonly end?: number;
|
|
312
316
|
}
|
|
313
317
|
export interface HeartbeatCommand extends BaseCommand {
|
|
314
318
|
readonly cmd: 'Heartbeat';
|
|
@@ -4,6 +4,45 @@
|
|
|
4
4
|
*/
|
|
5
5
|
import * as resp from '../../../domain/types/response';
|
|
6
6
|
import { jobId } from '../../../domain/types/job';
|
|
7
|
+
/**
|
|
8
|
+
* Coerce a value to a finite number, or return undefined if it can't be.
|
|
9
|
+
* Guards config endpoints against non-numeric input (e.g. `"abc"`) that would
|
|
10
|
+
* otherwise reach numeric comparisons as NaN and silently break behaviour
|
|
11
|
+
* (a string `stallInterval` disabled stall detection entirely).
|
|
12
|
+
*/
|
|
13
|
+
function toFiniteNumber(value) {
|
|
14
|
+
if (value === undefined || value === null)
|
|
15
|
+
return undefined;
|
|
16
|
+
const n = typeof value === 'number' ? value : Number(value);
|
|
17
|
+
return Number.isFinite(n) ? n : undefined;
|
|
18
|
+
}
|
|
19
|
+
/**
|
|
20
|
+
* Sanitize the numeric fields of a config object: coerce numeric strings, drop
|
|
21
|
+
* non-numeric garbage (so the manager's merge keeps the existing/default value
|
|
22
|
+
* instead of storing NaN). Booleans and unknown keys pass through untouched.
|
|
23
|
+
*/
|
|
24
|
+
function sanitizeConfigNumbers(config, numericKeys) {
|
|
25
|
+
if (!config || typeof config !== 'object')
|
|
26
|
+
return config;
|
|
27
|
+
const numeric = new Set(numericKeys);
|
|
28
|
+
const out = {};
|
|
29
|
+
for (const [key, value] of Object.entries(config)) {
|
|
30
|
+
if (!numeric.has(key)) {
|
|
31
|
+
out[key] = value; // booleans / unknown keys pass through untouched
|
|
32
|
+
continue;
|
|
33
|
+
}
|
|
34
|
+
if (value === null) {
|
|
35
|
+
out[key] = null; // valid for nullable fields (e.g. dlq maxAge)
|
|
36
|
+
continue;
|
|
37
|
+
}
|
|
38
|
+
const n = toFiniteNumber(value);
|
|
39
|
+
// coerce numeric strings; omit non-numeric garbage so the manager's merge
|
|
40
|
+
// keeps the existing/default value instead of storing NaN
|
|
41
|
+
if (n !== undefined)
|
|
42
|
+
out[key] = n;
|
|
43
|
+
}
|
|
44
|
+
return out;
|
|
45
|
+
}
|
|
7
46
|
// ============ Job Management ============
|
|
8
47
|
/** Handle Update command - update job data */
|
|
9
48
|
export async function handleUpdate(cmd, ctx, reqId) {
|
|
@@ -122,8 +161,11 @@ export function handleCount(cmd, ctx, reqId) {
|
|
|
122
161
|
// ============ Rate Limiting ============
|
|
123
162
|
/** Handle RateLimit command */
|
|
124
163
|
export function handleRateLimit(cmd, ctx, reqId) {
|
|
125
|
-
|
|
126
|
-
|
|
164
|
+
const limit = toFiniteNumber(cmd.limit);
|
|
165
|
+
if (limit === undefined)
|
|
166
|
+
return resp.error('limit must be a finite number', reqId);
|
|
167
|
+
ctx.queueManager.setRateLimit(cmd.queue, limit);
|
|
168
|
+
ctx.queueManager.emitDashboardEvent('ratelimit:set', { queue: cmd.queue, max: limit });
|
|
127
169
|
return resp.ok(undefined, reqId);
|
|
128
170
|
}
|
|
129
171
|
/** Handle RateLimitClear command */
|
|
@@ -134,10 +176,13 @@ export function handleRateLimitClear(cmd, ctx, reqId) {
|
|
|
134
176
|
}
|
|
135
177
|
/** Handle SetConcurrency command */
|
|
136
178
|
export function handleSetConcurrency(cmd, ctx, reqId) {
|
|
137
|
-
|
|
179
|
+
const limit = toFiniteNumber(cmd.limit);
|
|
180
|
+
if (limit === undefined)
|
|
181
|
+
return resp.error('limit must be a finite number', reqId);
|
|
182
|
+
ctx.queueManager.setConcurrency(cmd.queue, limit);
|
|
138
183
|
ctx.queueManager.emitDashboardEvent('concurrency:set', {
|
|
139
184
|
queue: cmd.queue,
|
|
140
|
-
concurrency:
|
|
185
|
+
concurrency: limit,
|
|
141
186
|
});
|
|
142
187
|
return resp.ok(undefined, reqId);
|
|
143
188
|
}
|
|
@@ -150,10 +195,11 @@ export function handleClearConcurrency(cmd, ctx, reqId) {
|
|
|
150
195
|
// ============ Config Commands ============
|
|
151
196
|
/** Handle SetStallConfig command */
|
|
152
197
|
export function handleSetStallConfig(cmd, ctx, reqId) {
|
|
153
|
-
|
|
198
|
+
const config = sanitizeConfigNumbers(cmd.config, ['stallInterval', 'maxStalls', 'gracePeriod']);
|
|
199
|
+
ctx.queueManager.setStallConfig(cmd.queue, config);
|
|
154
200
|
ctx.queueManager.emitDashboardEvent('config:stall-changed', {
|
|
155
201
|
queue: cmd.queue,
|
|
156
|
-
config
|
|
202
|
+
config,
|
|
157
203
|
});
|
|
158
204
|
return resp.ok(undefined, reqId);
|
|
159
205
|
}
|
|
@@ -164,10 +210,16 @@ export function handleGetStallConfig(cmd, ctx, reqId) {
|
|
|
164
210
|
}
|
|
165
211
|
/** Handle SetDlqConfig command */
|
|
166
212
|
export function handleSetDlqConfig(cmd, ctx, reqId) {
|
|
167
|
-
|
|
213
|
+
const config = sanitizeConfigNumbers(cmd.config, [
|
|
214
|
+
'autoRetryInterval',
|
|
215
|
+
'maxAutoRetries',
|
|
216
|
+
'maxAge',
|
|
217
|
+
'maxEntries',
|
|
218
|
+
]);
|
|
219
|
+
ctx.queueManager.setDlqConfig(cmd.queue, config);
|
|
168
220
|
ctx.queueManager.emitDashboardEvent('config:dlq-changed', {
|
|
169
221
|
queue: cmd.queue,
|
|
170
|
-
config
|
|
222
|
+
config,
|
|
171
223
|
});
|
|
172
224
|
return resp.ok(undefined, reqId);
|
|
173
225
|
}
|
|
@@ -189,7 +189,7 @@ export async function handleAckBatch(cmd, ctx, reqId) {
|
|
|
189
189
|
export async function handleFail(cmd, ctx, reqId) {
|
|
190
190
|
try {
|
|
191
191
|
const jid = jobId(cmd.id);
|
|
192
|
-
await ctx.queueManager.fail(jid, cmd.error, cmd.token);
|
|
192
|
+
await ctx.queueManager.fail(jid, cmd.error, cmd.token, cmd.unrecoverable);
|
|
193
193
|
// Unregister job from client tracking
|
|
194
194
|
ctx.queueManager.unregisterClientJob(ctx.clientId, jid);
|
|
195
195
|
return resp.ok(undefined, reqId);
|
|
@@ -18,8 +18,13 @@ export function handleAddLog(cmd, ctx, reqId) {
|
|
|
18
18
|
}
|
|
19
19
|
export function handleGetLogs(cmd, ctx, reqId) {
|
|
20
20
|
const jid = jobId(cmd.id);
|
|
21
|
-
const
|
|
22
|
-
|
|
21
|
+
const all = ctx.queueManager.getLogs(jid);
|
|
22
|
+
// Honor optional pagination (start/end inclusive) the client already sends.
|
|
23
|
+
const total = all.length;
|
|
24
|
+
const logs = cmd.start === undefined && cmd.end === undefined
|
|
25
|
+
? all
|
|
26
|
+
: all.slice(cmd.start ?? 0, (cmd.end ?? total - 1) + 1);
|
|
27
|
+
return resp.data({ logs, count: total }, reqId);
|
|
23
28
|
}
|
|
24
29
|
// ============ Worker Heartbeat ============
|
|
25
30
|
export function handleHeartbeat(cmd, ctx, reqId) {
|
|
@@ -59,6 +59,17 @@ export function createHttpServer(queueManager, config) {
|
|
|
59
59
|
});
|
|
60
60
|
// Helper to get CORS origin string
|
|
61
61
|
const getCorsOrigin = () => (corsOrigins.has('*') ? '*' : Array.from(corsOrigins).join(', '));
|
|
62
|
+
// Attach CORS to responses built outside the routeRequest pipeline (health,
|
|
63
|
+
// ready, prometheus, debug) so browser dashboards can read them cross-origin
|
|
64
|
+
// (audit #16-20). Response headers are mutable for normally-constructed
|
|
65
|
+
// Responses; this never overwrites an existing value set by the endpoint.
|
|
66
|
+
const withCors = async (r) => {
|
|
67
|
+
const res = await r;
|
|
68
|
+
if (!res.headers.has('Access-Control-Allow-Origin')) {
|
|
69
|
+
res.headers.set('Access-Control-Allow-Origin', getCorsOrigin());
|
|
70
|
+
}
|
|
71
|
+
return res;
|
|
72
|
+
};
|
|
62
73
|
// Fetch handler
|
|
63
74
|
const fetch = async (req, server) => {
|
|
64
75
|
const url = new URL(req.url);
|
|
@@ -69,26 +80,26 @@ export function createHttpServer(queueManager, config) {
|
|
|
69
80
|
}
|
|
70
81
|
// Health endpoints (no auth, no rate limit)
|
|
71
82
|
if (path === '/health') {
|
|
72
|
-
return healthEndpoint(queueManager, wsHandler.size, sseHandler.size);
|
|
83
|
+
return withCors(healthEndpoint(queueManager, wsHandler.size, sseHandler.size));
|
|
73
84
|
}
|
|
74
85
|
if (path === '/healthz' || path === '/live') {
|
|
75
|
-
return new Response('OK', { status: 200 });
|
|
86
|
+
return withCors(new Response('OK', { status: 200 }));
|
|
76
87
|
}
|
|
77
88
|
if (path === '/ready') {
|
|
78
|
-
return jsonResponse({ ok: true, ready: true });
|
|
89
|
+
return jsonResponse({ ok: true, ready: true }, 200, corsOrigins);
|
|
79
90
|
}
|
|
80
91
|
// Debug endpoints (require auth)
|
|
81
92
|
if (path === '/gc' && req.method === 'POST') {
|
|
82
93
|
const denied = checkAuth(req, authTokens);
|
|
83
94
|
if (denied)
|
|
84
95
|
return denied;
|
|
85
|
-
return gcEndpoint(queueManager);
|
|
96
|
+
return withCors(gcEndpoint(queueManager));
|
|
86
97
|
}
|
|
87
98
|
if (path === '/heapstats' && req.method === 'GET') {
|
|
88
99
|
const denied = checkAuth(req, authTokens);
|
|
89
100
|
if (denied)
|
|
90
101
|
return denied;
|
|
91
|
-
return heapStatsEndpoint(queueManager);
|
|
102
|
+
return withCors(heapStatsEndpoint(queueManager));
|
|
92
103
|
}
|
|
93
104
|
// Rate limiting
|
|
94
105
|
const clientIp = req.headers.get('x-forwarded-for')?.split(',')[0]?.trim() ??
|
|
@@ -131,7 +142,10 @@ export function createHttpServer(queueManager, config) {
|
|
|
131
142
|
return denied;
|
|
132
143
|
}
|
|
133
144
|
return new Response(queueManager.getPrometheusMetrics(), {
|
|
134
|
-
headers: {
|
|
145
|
+
headers: {
|
|
146
|
+
'Content-Type': 'text/plain; version=0.0.4; charset=utf-8',
|
|
147
|
+
'Access-Control-Allow-Origin': getCorsOrigin(),
|
|
148
|
+
},
|
|
135
149
|
});
|
|
136
150
|
}
|
|
137
151
|
// Check authentication for other endpoints
|
|
@@ -79,6 +79,7 @@ async function routeJobManagement(req, path, method, ctx, cors) {
|
|
|
79
79
|
cmd: 'ChangePriority',
|
|
80
80
|
id: priorityMatch[1],
|
|
81
81
|
priority: body['priority'],
|
|
82
|
+
lifo: body['lifo'],
|
|
82
83
|
}, ctx);
|
|
83
84
|
return jsonResponse(r, r.ok ? 200 : 400, cors);
|
|
84
85
|
}
|
|
@@ -250,7 +251,12 @@ export async function routeJobRoutes(req, path, method, ctx, cors) {
|
|
|
250
251
|
const body = await parseJsonBody(req, cors);
|
|
251
252
|
if (body instanceof Response)
|
|
252
253
|
return body;
|
|
253
|
-
const r = await handleCommand({
|
|
254
|
+
const r = await handleCommand({
|
|
255
|
+
cmd: 'ACK',
|
|
256
|
+
id: ackMatch[1],
|
|
257
|
+
result: body['result'],
|
|
258
|
+
token: body['token'],
|
|
259
|
+
}, ctx);
|
|
254
260
|
return jsonResponse(r, r.ok ? 200 : 400, cors);
|
|
255
261
|
}
|
|
256
262
|
// POST /jobs/:id/fail
|
|
@@ -259,7 +265,13 @@ export async function routeJobRoutes(req, path, method, ctx, cors) {
|
|
|
259
265
|
const body = await parseJsonBody(req, cors);
|
|
260
266
|
if (body instanceof Response)
|
|
261
267
|
return body;
|
|
262
|
-
const r = await handleCommand({
|
|
268
|
+
const r = await handleCommand({
|
|
269
|
+
cmd: 'FAIL',
|
|
270
|
+
id: failMatch[1],
|
|
271
|
+
error: body['error'],
|
|
272
|
+
token: body['token'],
|
|
273
|
+
unrecoverable: body['unrecoverable'],
|
|
274
|
+
}, ctx);
|
|
263
275
|
return jsonResponse(r, r.ok ? 200 : 400, cors);
|
|
264
276
|
}
|
|
265
277
|
// Delegate to sub-routers
|
|
@@ -26,8 +26,23 @@ export async function routeQueueConfigRoutes(req, path, method, ctx, cors) {
|
|
|
26
26
|
const dlqMatch = path.match(RE_QUEUE_DLQ);
|
|
27
27
|
if (dlqMatch && method === 'GET') {
|
|
28
28
|
const queue = decodeURIComponent(dlqMatch[1]);
|
|
29
|
-
const
|
|
30
|
-
|
|
29
|
+
const all = ctx.queueManager.getDlqEntries(queue);
|
|
30
|
+
// Optional pagination so a dashboard can page large DLQs. Non-numeric params
|
|
31
|
+
// are ignored (treated as absent) rather than producing an empty/garbage slice.
|
|
32
|
+
const params = new URL(req.url).searchParams;
|
|
33
|
+
const toInt = (v) => {
|
|
34
|
+
if (v === null)
|
|
35
|
+
return undefined;
|
|
36
|
+
const n = Number(v);
|
|
37
|
+
return Number.isFinite(n) ? Math.trunc(n) : undefined;
|
|
38
|
+
};
|
|
39
|
+
const limit = toInt(params.get('limit'));
|
|
40
|
+
const offset = toInt(params.get('offset'));
|
|
41
|
+
const start = Math.max(0, offset ?? 0);
|
|
42
|
+
const entries = limit === undefined && offset === undefined
|
|
43
|
+
? all
|
|
44
|
+
: all.slice(start, start + (limit !== undefined ? Math.max(0, limit) : all.length));
|
|
45
|
+
return jsonResponse({ ok: true, entries, total: all.length }, 200, cors);
|
|
31
46
|
}
|
|
32
47
|
// POST /queues/:queue/dlq/retry
|
|
33
48
|
const dlqRetryMatch = path.match(RE_QUEUE_DLQ_RETRY);
|
|
@@ -79,7 +94,8 @@ export async function routeQueueConfigRoutes(req, path, method, ctx, cors) {
|
|
|
79
94
|
const r = await handleCommand({
|
|
80
95
|
cmd: 'SetConcurrency',
|
|
81
96
|
queue,
|
|
82
|
-
|
|
97
|
+
// Accept the natural `concurrency` field for this endpoint as well as `limit`.
|
|
98
|
+
limit: (body['concurrency'] ?? body['limit']),
|
|
83
99
|
}, ctx);
|
|
84
100
|
return jsonResponse(r, 200, cors);
|
|
85
101
|
}
|
|
@@ -105,7 +105,19 @@ async function routeJobOps(req, path, method, ctx, cors) {
|
|
|
105
105
|
if (listMatch && method === 'GET') {
|
|
106
106
|
const queue = decodeURIComponent(listMatch[1]);
|
|
107
107
|
const url = new URL(req.url);
|
|
108
|
-
|
|
108
|
+
// Accept `state`, `status` (dashboard/REST convention), and `states` as
|
|
109
|
+
// aliases, each repeatable and comma-separated. Previously only `state` was
|
|
110
|
+
// read, so `?status=failed` silently fell through to an unfiltered list and
|
|
111
|
+
// returned the whole queue (#95). A state name never contains a comma, so
|
|
112
|
+
// splitting is safe.
|
|
113
|
+
const stateValues = [
|
|
114
|
+
...url.searchParams.getAll('state'),
|
|
115
|
+
...url.searchParams.getAll('status'),
|
|
116
|
+
...url.searchParams.getAll('states'),
|
|
117
|
+
]
|
|
118
|
+
.flatMap((v) => v.split(','))
|
|
119
|
+
.map((s) => s.trim())
|
|
120
|
+
.filter(Boolean);
|
|
109
121
|
const state = stateValues.length === 0
|
|
110
122
|
? undefined
|
|
111
123
|
: stateValues.length === 1
|
|
@@ -40,6 +40,10 @@ export async function routeResourceRoutes(req, path, method, ctx, cors) {
|
|
|
40
40
|
uniqueKey: body['uniqueKey'],
|
|
41
41
|
dedup: body['dedup'],
|
|
42
42
|
skipMissedOnRestart: body['skipMissedOnRestart'],
|
|
43
|
+
immediately: body['immediately'],
|
|
44
|
+
skipIfNoWorker: body['skipIfNoWorker'],
|
|
45
|
+
preventOverlap: body['preventOverlap'],
|
|
46
|
+
jobOptions: body['jobOptions'],
|
|
43
47
|
}, ctx);
|
|
44
48
|
return jsonResponse(r, r.ok ? 200 : 400, cors);
|
|
45
49
|
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "bunqueue",
|
|
3
|
-
"version": "2.8.
|
|
3
|
+
"version": "2.8.7",
|
|
4
4
|
"description": "High-performance job queue for Bun & AI agents. SQLite persistence, cron scheduling, priorities, retries, DLQ, webhooks, native MCP server. Zero external dependencies.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "dist/main.js",
|