bunqueue 2.8.5 → 2.8.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/client/queue/queue.js +2 -0
- package/dist/client/tcp/client.d.ts +9 -0
- package/dist/client/tcp/client.js +38 -3
- package/dist/client/tcp/connection.js +11 -0
- package/dist/client/tcp/health.d.ts +14 -0
- package/dist/client/tcp/health.js +24 -0
- package/dist/client/tcp/types.d.ts +10 -0
- package/dist/client/tcp/types.js +1 -0
- package/dist/client/tcpPool.js +2 -0
- package/dist/client/types.d.ts +6 -0
- package/dist/client/worker/worker.js +1 -0
- package/package.json +1 -1
|
@@ -63,6 +63,7 @@ export class Queue {
|
|
|
63
63
|
poolSize,
|
|
64
64
|
pingInterval: connOpts.pingInterval,
|
|
65
65
|
commandTimeout: connOpts.commandTimeout,
|
|
66
|
+
maxCommandTimeouts: connOpts.maxCommandTimeouts,
|
|
66
67
|
pipelining: connOpts.pipelining,
|
|
67
68
|
maxInFlight: connOpts.maxInFlight,
|
|
68
69
|
});
|
|
@@ -76,6 +77,7 @@ export class Queue {
|
|
|
76
77
|
poolSize,
|
|
77
78
|
pingInterval: connOpts.pingInterval,
|
|
78
79
|
commandTimeout: connOpts.commandTimeout,
|
|
80
|
+
maxCommandTimeouts: connOpts.maxCommandTimeouts,
|
|
79
81
|
pipelining: connOpts.pipelining,
|
|
80
82
|
maxInFlight: connOpts.maxInFlight,
|
|
81
83
|
});
|
|
@@ -73,6 +73,15 @@ export declare class TcpClient extends EventEmitter {
|
|
|
73
73
|
/** Send ping to check connection health */
|
|
74
74
|
ping(): Promise<boolean>;
|
|
75
75
|
private handlePingFailure;
|
|
76
|
+
/**
|
|
77
|
+
* A command timed out. On a half-open socket (peer gone, no FIN/RST) writes
|
|
78
|
+
* keep succeeding but no response ever returns — every command times out
|
|
79
|
+
* while the socket still looks "connected". The health-check ping is one way
|
|
80
|
+
* to notice, but it can be disabled or slower than real traffic, leaving a
|
|
81
|
+
* worker's PULL loop to time out forever without ever reconnecting (#94).
|
|
82
|
+
* Treat a sustained run of timeouts as a dead link and force a reconnect.
|
|
83
|
+
*/
|
|
84
|
+
private handleCommandTimeout;
|
|
76
85
|
private forceReconnect;
|
|
77
86
|
/** Get connection health metrics */
|
|
78
87
|
getHealth(): ConnectionHealth;
|
|
@@ -79,6 +79,7 @@ export class TcpClient extends EventEmitter {
|
|
|
79
79
|
this.health = new HealthTracker({
|
|
80
80
|
pingInterval: this.options.pingInterval,
|
|
81
81
|
maxPingFailures: this.options.maxPingFailures,
|
|
82
|
+
maxCommandTimeouts: this.options.maxCommandTimeouts,
|
|
82
83
|
});
|
|
83
84
|
this.reconnect = new ReconnectManager({
|
|
84
85
|
maxReconnectAttempts: this.options.maxReconnectAttempts,
|
|
@@ -258,15 +259,44 @@ export class TcpClient extends EventEmitter {
|
|
|
258
259
|
this.emit('health', { type: 'ping_failed' });
|
|
259
260
|
}
|
|
260
261
|
}
|
|
262
|
+
/**
|
|
263
|
+
* A command timed out. On a half-open socket (peer gone, no FIN/RST) writes
|
|
264
|
+
* keep succeeding but no response ever returns — every command times out
|
|
265
|
+
* while the socket still looks "connected". The health-check ping is one way
|
|
266
|
+
* to notice, but it can be disabled or slower than real traffic, leaving a
|
|
267
|
+
* worker's PULL loop to time out forever without ever reconnecting (#94).
|
|
268
|
+
* Treat a sustained run of timeouts as a dead link and force a reconnect.
|
|
269
|
+
*/
|
|
270
|
+
handleCommandTimeout() {
|
|
271
|
+
if (this.health.recordCommandTimeout()) {
|
|
272
|
+
this.emit('health', { type: 'unhealthy', reason: 'max_command_timeouts' });
|
|
273
|
+
this.forceReconnect();
|
|
274
|
+
}
|
|
275
|
+
}
|
|
261
276
|
forceReconnect() {
|
|
262
277
|
if (this.reconnect.isClosed())
|
|
263
278
|
return;
|
|
264
279
|
if (this.socket) {
|
|
265
|
-
|
|
280
|
+
// end() can throw on an already-errored/half-dead socket. Swallow it:
|
|
281
|
+
// failing to close the corpse must NOT abort the reconnect path below,
|
|
282
|
+
// or the connection would stay wedged forever (the #94 failure mode).
|
|
283
|
+
try {
|
|
284
|
+
this.socket.end();
|
|
285
|
+
}
|
|
286
|
+
catch {
|
|
287
|
+
/* socket already torn down */
|
|
288
|
+
}
|
|
266
289
|
this.socket = null;
|
|
267
290
|
}
|
|
268
291
|
this.connected = false;
|
|
269
292
|
this.health.stopPing();
|
|
293
|
+
// Settle every in-flight/queued command NOW. Otherwise their per-command
|
|
294
|
+
// timeouts keep ticking and fire AFTER the fresh socket is up — each stale
|
|
295
|
+
// timeout bumps the new connection's dead-link counter and can re-trigger
|
|
296
|
+
// forceReconnect in a loop (a reconnect storm that never stabilises). It
|
|
297
|
+
// also unblocks awaiting callers (e.g. a Worker's PULL) immediately instead
|
|
298
|
+
// of making them wait out the full commandTimeout on a corpse.
|
|
299
|
+
this.commands.rejectAll(new Error('Connection lost'));
|
|
270
300
|
if (this.reconnect.canReconnect())
|
|
271
301
|
this.reconnect.scheduleReconnect(() => this.connect());
|
|
272
302
|
}
|
|
@@ -343,6 +373,8 @@ export class TcpClient extends EventEmitter {
|
|
|
343
373
|
if (removed) {
|
|
344
374
|
this.health.recordError();
|
|
345
375
|
next.reject(new Error('Command timeout'));
|
|
376
|
+
// In-flight command got no response: count it toward dead-link detection.
|
|
377
|
+
this.handleCommandTimeout();
|
|
346
378
|
}
|
|
347
379
|
}, this.options.commandTimeout);
|
|
348
380
|
next.timeout = newTimeout;
|
|
@@ -365,17 +397,20 @@ export class TcpClient extends EventEmitter {
|
|
|
365
397
|
let pendingRef;
|
|
366
398
|
const promise = new Promise((resolve, reject) => {
|
|
367
399
|
const timeout = setTimeout(() => {
|
|
368
|
-
// Try to remove from queue first
|
|
400
|
+
// Try to remove from queue first. A still-queued command never reached
|
|
401
|
+
// the socket (e.g. waiting on connect), so it is NOT evidence of a dead
|
|
402
|
+
// link — reject it but don't count it toward dead-link detection.
|
|
369
403
|
if (this.commands.remove(id)) {
|
|
370
404
|
this.health.recordError();
|
|
371
405
|
reject(new Error('Command timeout'));
|
|
372
406
|
return;
|
|
373
407
|
}
|
|
374
|
-
// Try to remove from in-flight
|
|
408
|
+
// Try to remove from in-flight: this one WAS sent and got no response.
|
|
375
409
|
const removed = this.commands.removeByReqId(reqId);
|
|
376
410
|
if (removed) {
|
|
377
411
|
this.health.recordError();
|
|
378
412
|
reject(new Error('Command timeout'));
|
|
413
|
+
this.handleCommandTimeout();
|
|
379
414
|
}
|
|
380
415
|
}, this.options.commandTimeout);
|
|
381
416
|
pendingRef = {
|
|
@@ -42,6 +42,17 @@ export async function createConnection(target, connectTimeout, events) {
|
|
|
42
42
|
},
|
|
43
43
|
open(sock) {
|
|
44
44
|
cleanup();
|
|
45
|
+
// Enable TCP keepalive so the OS probes idle connections and surfaces a
|
|
46
|
+
// dead peer (suspended host, NAT/LB drop) via an error/close event,
|
|
47
|
+
// instead of a half-open socket lingering until tcp_retries2 (~15 min).
|
|
48
|
+
// Best-effort: not all platforms honor the delay, and older Bun builds
|
|
49
|
+
// may lack the method — never let it abort connection setup. See #94.
|
|
50
|
+
try {
|
|
51
|
+
sock.setKeepAlive?.(true, 15000);
|
|
52
|
+
}
|
|
53
|
+
catch {
|
|
54
|
+
/* keepalive unsupported on this platform/runtime */
|
|
55
|
+
}
|
|
45
56
|
socketData.write = (d) => sock.write(d);
|
|
46
57
|
socketData.end = () => sock.end();
|
|
47
58
|
connectionResolved = true;
|
|
@@ -7,6 +7,11 @@ import type { ConnectionHealth } from './types';
|
|
|
7
7
|
export interface HealthConfig {
|
|
8
8
|
pingInterval: number;
|
|
9
9
|
maxPingFailures: number;
|
|
10
|
+
/**
|
|
11
|
+
* Consecutive command timeouts before the link is concluded dead (0 = off).
|
|
12
|
+
* Optional for backward compatibility; defaults to 3 when omitted.
|
|
13
|
+
*/
|
|
14
|
+
maxCommandTimeouts?: number;
|
|
10
15
|
}
|
|
11
16
|
/**
|
|
12
17
|
* Tracks connection health metrics
|
|
@@ -14,6 +19,7 @@ export interface HealthConfig {
|
|
|
14
19
|
export declare class HealthTracker {
|
|
15
20
|
private readonly config;
|
|
16
21
|
private consecutivePingFailures;
|
|
22
|
+
private consecutiveCommandTimeouts;
|
|
17
23
|
private lastSuccessAt;
|
|
18
24
|
private lastErrorAt;
|
|
19
25
|
private connectedAt;
|
|
@@ -35,6 +41,14 @@ export declare class HealthTracker {
|
|
|
35
41
|
recordPingSuccess(latencyMs: number): void;
|
|
36
42
|
/** Record ping failure, returns true if max failures reached */
|
|
37
43
|
recordPingFailure(): boolean;
|
|
44
|
+
/**
|
|
45
|
+
* Record a command timeout. Returns true when the configured consecutive
|
|
46
|
+
* threshold is reached (and the feature is enabled), signalling the caller to
|
|
47
|
+
* force a reconnect. Any intervening success resets the counter, so this only
|
|
48
|
+
* fires on a sustained run of timeouts — the signature of a dead/half-open
|
|
49
|
+
* socket where writes succeed but no response ever comes back.
|
|
50
|
+
*/
|
|
51
|
+
recordCommandTimeout(): boolean;
|
|
38
52
|
/** Get current health metrics */
|
|
39
53
|
getHealth(state: 'connected' | 'connecting' | 'disconnected' | 'closed'): ConnectionHealth;
|
|
40
54
|
/** Start ping timer */
|
|
@@ -2,12 +2,15 @@
|
|
|
2
2
|
* TCP Health Tracker
|
|
3
3
|
* Monitors connection health with ping and latency tracking
|
|
4
4
|
*/
|
|
5
|
+
/** Default consecutive command-timeout threshold when not configured. */
|
|
6
|
+
const DEFAULT_MAX_COMMAND_TIMEOUTS = 3;
|
|
5
7
|
/**
|
|
6
8
|
* Tracks connection health metrics
|
|
7
9
|
*/
|
|
8
10
|
export class HealthTracker {
|
|
9
11
|
config;
|
|
10
12
|
consecutivePingFailures = 0;
|
|
13
|
+
consecutiveCommandTimeouts = 0;
|
|
11
14
|
lastSuccessAt = null;
|
|
12
15
|
lastErrorAt = null;
|
|
13
16
|
connectedAt = null;
|
|
@@ -23,6 +26,9 @@ export class HealthTracker {
|
|
|
23
26
|
recordSuccess(latencyMs) {
|
|
24
27
|
this.lastSuccessAt = Date.now();
|
|
25
28
|
this.totalCommands++;
|
|
29
|
+
// A real response proves the link is alive: the prior timeouts were not a
|
|
30
|
+
// sustained run, so reset the dead-link counter ("consecutive" must mean it).
|
|
31
|
+
this.consecutiveCommandTimeouts = 0;
|
|
26
32
|
this.recordLatency(latencyMs);
|
|
27
33
|
}
|
|
28
34
|
/** Record command error */
|
|
@@ -38,10 +44,13 @@ export class HealthTracker {
|
|
|
38
44
|
recordConnected() {
|
|
39
45
|
this.connectedAt = Date.now();
|
|
40
46
|
this.consecutivePingFailures = 0;
|
|
47
|
+
this.consecutiveCommandTimeouts = 0;
|
|
41
48
|
}
|
|
42
49
|
/** Record ping success */
|
|
43
50
|
recordPingSuccess(latencyMs) {
|
|
51
|
+
// A successful ping is also proof the link is alive — clear both suspicions.
|
|
44
52
|
this.consecutivePingFailures = 0;
|
|
53
|
+
this.consecutiveCommandTimeouts = 0;
|
|
45
54
|
this.recordLatency(latencyMs);
|
|
46
55
|
}
|
|
47
56
|
/** Record ping failure, returns true if max failures reached */
|
|
@@ -51,6 +60,20 @@ export class HealthTracker {
|
|
|
51
60
|
this.totalErrors++;
|
|
52
61
|
return this.consecutivePingFailures >= this.config.maxPingFailures;
|
|
53
62
|
}
|
|
63
|
+
/**
|
|
64
|
+
* Record a command timeout. Returns true when the configured consecutive
|
|
65
|
+
* threshold is reached (and the feature is enabled), signalling the caller to
|
|
66
|
+
* force a reconnect. Any intervening success resets the counter, so this only
|
|
67
|
+
* fires on a sustained run of timeouts — the signature of a dead/half-open
|
|
68
|
+
* socket where writes succeed but no response ever comes back.
|
|
69
|
+
*/
|
|
70
|
+
recordCommandTimeout() {
|
|
71
|
+
const max = this.config.maxCommandTimeouts ?? DEFAULT_MAX_COMMAND_TIMEOUTS;
|
|
72
|
+
if (max <= 0)
|
|
73
|
+
return false;
|
|
74
|
+
this.consecutiveCommandTimeouts++;
|
|
75
|
+
return this.consecutiveCommandTimeouts >= max;
|
|
76
|
+
}
|
|
54
77
|
/** Get current health metrics */
|
|
55
78
|
getHealth(state) {
|
|
56
79
|
const avgLatency = this.latencyHistory.length > 0
|
|
@@ -63,6 +86,7 @@ export class HealthTracker {
|
|
|
63
86
|
lastErrorAt: this.lastErrorAt,
|
|
64
87
|
avgLatencyMs: Math.round(avgLatency * 100) / 100,
|
|
65
88
|
consecutivePingFailures: this.consecutivePingFailures,
|
|
89
|
+
consecutiveCommandTimeouts: this.consecutiveCommandTimeouts,
|
|
66
90
|
totalCommands: this.totalCommands,
|
|
67
91
|
totalErrors: this.totalErrors,
|
|
68
92
|
uptimeMs: this.connectedAt ? Date.now() - this.connectedAt : 0,
|
|
@@ -26,6 +26,14 @@ export interface ConnectionOptions {
|
|
|
26
26
|
pingInterval?: number;
|
|
27
27
|
/** Max consecutive ping failures before forcing reconnect (default: 3) */
|
|
28
28
|
maxPingFailures?: number;
|
|
29
|
+
/**
|
|
30
|
+
* Max consecutive command timeouts (with no intervening success) before the
|
|
31
|
+
* connection is concluded dead and reconnect is forced (default: 3, 0 to
|
|
32
|
+
* disable). This is the recovery path for a half-open socket when the
|
|
33
|
+
* health-check ping is disabled or slower than real traffic — a worker whose
|
|
34
|
+
* PULLs keep timing out no longer stalls forever waiting on the ping. See #94.
|
|
35
|
+
*/
|
|
36
|
+
maxCommandTimeouts?: number;
|
|
29
37
|
/** Enable pipelining - multiple commands in flight (default: true) */
|
|
30
38
|
pipelining?: boolean;
|
|
31
39
|
/** Max commands in flight when pipelining (default: 100) */
|
|
@@ -45,6 +53,8 @@ export interface ConnectionHealth {
|
|
|
45
53
|
avgLatencyMs: number;
|
|
46
54
|
/** Consecutive ping failures */
|
|
47
55
|
consecutivePingFailures: number;
|
|
56
|
+
/** Consecutive command timeouts with no intervening success */
|
|
57
|
+
consecutiveCommandTimeouts: number;
|
|
48
58
|
/** Total commands sent */
|
|
49
59
|
totalCommands: number;
|
|
50
60
|
/** Total errors */
|
package/dist/client/tcp/types.js
CHANGED
package/dist/client/tcpPool.js
CHANGED
|
@@ -29,6 +29,7 @@ export class TcpConnectionPool {
|
|
|
29
29
|
autoReconnect: options.autoReconnect ?? true,
|
|
30
30
|
pingInterval: options.pingInterval ?? 30000,
|
|
31
31
|
maxPingFailures: options.maxPingFailures ?? 3,
|
|
32
|
+
maxCommandTimeouts: options.maxCommandTimeouts ?? 3,
|
|
32
33
|
pipelining: options.pipelining ?? true,
|
|
33
34
|
maxInFlight: options.maxInFlight ?? 100,
|
|
34
35
|
};
|
|
@@ -46,6 +47,7 @@ export class TcpConnectionPool {
|
|
|
46
47
|
autoReconnect: this.options.autoReconnect,
|
|
47
48
|
pingInterval: this.options.pingInterval,
|
|
48
49
|
maxPingFailures: this.options.maxPingFailures,
|
|
50
|
+
maxCommandTimeouts: this.options.maxCommandTimeouts,
|
|
49
51
|
});
|
|
50
52
|
this.clients.push(client);
|
|
51
53
|
}
|
package/dist/client/types.d.ts
CHANGED
|
@@ -376,6 +376,12 @@ export interface ConnectionOptions {
|
|
|
376
376
|
pingInterval?: number;
|
|
377
377
|
/** Command timeout in ms (default: 30000) */
|
|
378
378
|
commandTimeout?: number;
|
|
379
|
+
/**
|
|
380
|
+
* Consecutive command timeouts (no intervening success) before the connection
|
|
381
|
+
* is concluded dead and a reconnect is forced (default: 3, 0 to disable).
|
|
382
|
+
* Recovery path for a half-open socket independent of the health-check ping. See #94.
|
|
383
|
+
*/
|
|
384
|
+
maxCommandTimeouts?: number;
|
|
379
385
|
/** Enable TCP pipelining (default: true) */
|
|
380
386
|
pipelining?: boolean;
|
|
381
387
|
/** Max commands in flight per connection (default: 100) */
|
|
@@ -47,6 +47,7 @@ function createTcpPool(opts, concurrency) {
|
|
|
47
47
|
poolSize,
|
|
48
48
|
pingInterval: connOpts.pingInterval,
|
|
49
49
|
commandTimeout: connOpts.commandTimeout,
|
|
50
|
+
maxCommandTimeouts: connOpts.maxCommandTimeouts,
|
|
50
51
|
pipelining: connOpts.pipelining,
|
|
51
52
|
maxInFlight: connOpts.maxInFlight,
|
|
52
53
|
});
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "bunqueue",
|
|
3
|
-
"version": "2.8.
|
|
3
|
+
"version": "2.8.6",
|
|
4
4
|
"description": "High-performance job queue for Bun & AI agents. SQLite persistence, cron scheduling, priorities, retries, DLQ, webhooks, native MCP server. Zero external dependencies.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "dist/main.js",
|