clawmatrix 0.2.6 → 0.2.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/acp-proxy.ts +548 -204
- package/src/cluster-service.ts +12 -0
- package/src/compat.ts +63 -2
- package/src/connection.ts +39 -6
- package/src/handoff.ts +10 -3
- package/src/peer-manager.ts +68 -34
- package/src/router.ts +21 -0
- package/src/terminal.ts +3 -1
package/src/cluster-service.ts
CHANGED
|
@@ -165,6 +165,18 @@ export class ClusterRuntime {
|
|
|
165
165
|
}
|
|
166
166
|
}
|
|
167
167
|
|
|
168
|
+
// Auto-detect ACP agents if ACP is enabled but no agents are explicitly configured
|
|
169
|
+
if (this.acpProxy && this.config.acp?.enabled && (!this.config.acp.agents || this.config.acp.agents.length === 0)) {
|
|
170
|
+
AcpProxy.detectAvailableAgents(this.config.acp.commands).then((detected) => {
|
|
171
|
+
if (detected.length > 0) {
|
|
172
|
+
this.logger.info(`[clawmatrix] Auto-detected ACP agents: ${detected.map((a) => a.id).join(", ")}`);
|
|
173
|
+
this.peerManager.updateAcpAgents(detected);
|
|
174
|
+
}
|
|
175
|
+
}).catch((err) => {
|
|
176
|
+
this.logger.error(`[clawmatrix] ACP agent detection failed: ${err}`);
|
|
177
|
+
});
|
|
178
|
+
}
|
|
179
|
+
|
|
168
180
|
// Start subsystems
|
|
169
181
|
this.peerManager.start();
|
|
170
182
|
this.modelProxy.start();
|
package/src/compat.ts
CHANGED
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
*/
|
|
6
6
|
|
|
7
7
|
import { spawn as cpSpawn } from "node:child_process";
|
|
8
|
-
import { readFile, writeFile } from "node:fs/promises";
|
|
8
|
+
import { open, readFile, stat, writeFile } from "node:fs/promises";
|
|
9
9
|
import { createRequire } from "node:module";
|
|
10
10
|
|
|
11
11
|
export interface SpawnResult {
|
|
@@ -118,12 +118,38 @@ export function spawnPty(
|
|
|
118
118
|
const pty = loadPty();
|
|
119
119
|
if (!pty) throw new Error("node-pty is not available — install it with: npm install node-pty");
|
|
120
120
|
|
|
121
|
+
// Filter out undefined values from env — node-pty's C code (posix_spawnp)
|
|
122
|
+
// cannot handle undefined entries and will fail silently.
|
|
123
|
+
const baseEnv = process.env as Record<string, string | undefined>;
|
|
124
|
+
const mergedEnv: Record<string, string> = {};
|
|
125
|
+
for (const [k, v] of Object.entries(baseEnv)) {
|
|
126
|
+
if (v !== undefined) mergedEnv[k] = v;
|
|
127
|
+
}
|
|
128
|
+
if (opts.env) {
|
|
129
|
+
for (const [k, v] of Object.entries(opts.env)) {
|
|
130
|
+
if (v !== undefined) mergedEnv[k] = v;
|
|
131
|
+
}
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
// Validate cwd exists to give a clear error instead of cryptic posix_spawnp failure
|
|
135
|
+
if (opts.cwd) {
|
|
136
|
+
try {
|
|
137
|
+
const fs = require("node:fs");
|
|
138
|
+
if (!fs.existsSync(opts.cwd)) {
|
|
139
|
+
throw new Error(`cwd does not exist: ${opts.cwd}`);
|
|
140
|
+
}
|
|
141
|
+
} catch (e) {
|
|
142
|
+
if (e instanceof Error && e.message.startsWith("cwd")) throw e;
|
|
143
|
+
// fs check failed, proceed anyway
|
|
144
|
+
}
|
|
145
|
+
}
|
|
146
|
+
|
|
121
147
|
const proc = pty.spawn(shell, args, {
|
|
122
148
|
name: "xterm-256color",
|
|
123
149
|
cols: opts.cols ?? 80,
|
|
124
150
|
rows: opts.rows ?? 24,
|
|
125
151
|
cwd: opts.cwd,
|
|
126
|
-
env:
|
|
152
|
+
env: mergedEnv,
|
|
127
153
|
});
|
|
128
154
|
|
|
129
155
|
return {
|
|
@@ -141,6 +167,41 @@ export async function readFileText(path: string): Promise<string> {
|
|
|
141
167
|
return readFile(path, "utf-8");
|
|
142
168
|
}
|
|
143
169
|
|
|
170
|
+
/** Read at most `bytes` bytes from the beginning of a file as text. */
|
|
171
|
+
export async function readFileHead(path: string, bytes: number): Promise<string> {
|
|
172
|
+
const fh = await open(path, "r");
|
|
173
|
+
try {
|
|
174
|
+
const buf = Buffer.alloc(bytes);
|
|
175
|
+
const { bytesRead } = await fh.read(buf, 0, bytes, 0);
|
|
176
|
+
return buf.toString("utf-8", 0, bytesRead);
|
|
177
|
+
} finally {
|
|
178
|
+
await fh.close();
|
|
179
|
+
}
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
/** Read at most `bytes` bytes from the end of a file as text. */
|
|
183
|
+
export async function readFileTail(path: string, bytes: number): Promise<string> {
|
|
184
|
+
const info = await stat(path);
|
|
185
|
+
const size = info.size;
|
|
186
|
+
if (size === 0) return "";
|
|
187
|
+
const readBytes = Math.min(bytes, size);
|
|
188
|
+
const offset = size - readBytes;
|
|
189
|
+
const fh = await open(path, "r");
|
|
190
|
+
try {
|
|
191
|
+
const buf = Buffer.alloc(readBytes);
|
|
192
|
+
const { bytesRead } = await fh.read(buf, 0, readBytes, offset);
|
|
193
|
+
const text = buf.toString("utf-8", 0, bytesRead);
|
|
194
|
+
// Drop the first (possibly partial) line if we didn't read from start
|
|
195
|
+
if (offset > 0) {
|
|
196
|
+
const nl = text.indexOf("\n");
|
|
197
|
+
return nl >= 0 ? text.slice(nl + 1) : text;
|
|
198
|
+
}
|
|
199
|
+
return text;
|
|
200
|
+
} finally {
|
|
201
|
+
await fh.close();
|
|
202
|
+
}
|
|
203
|
+
}
|
|
204
|
+
|
|
144
205
|
/** Write text to a file (replaces Bun.write()). */
|
|
145
206
|
export async function writeFileText(path: string, content: string): Promise<void> {
|
|
146
207
|
await writeFile(path, content, "utf-8");
|
package/src/connection.ts
CHANGED
|
@@ -82,6 +82,8 @@ export class Connection extends EventEmitter<ConnectionEvents> {
|
|
|
82
82
|
private pendingNonce: string | null = null;
|
|
83
83
|
private closed = false;
|
|
84
84
|
private lastPingSentAt = 0;
|
|
85
|
+
/** Timestamp of the last frame received from the remote side. */
|
|
86
|
+
private lastReceivedAt = 0;
|
|
85
87
|
/** Exponential moving average of heartbeat RTT in milliseconds. */
|
|
86
88
|
latencyMs = 0;
|
|
87
89
|
|
|
@@ -190,6 +192,7 @@ export class Connection extends EventEmitter<ConnectionEvents> {
|
|
|
190
192
|
private async onRawMessage(data: unknown) {
|
|
191
193
|
const str = typeof data === "string" ? data : String(data);
|
|
192
194
|
if (!str.length) return;
|
|
195
|
+
this.lastReceivedAt = Date.now();
|
|
193
196
|
|
|
194
197
|
let frame: AnyClusterFrame | undefined;
|
|
195
198
|
|
|
@@ -477,24 +480,54 @@ export class Connection extends EventEmitter<ConnectionEvents> {
|
|
|
477
480
|
}
|
|
478
481
|
|
|
479
482
|
// ── Heartbeat ──────────────────────────────────────────────────
|
|
483
|
+
/** Maximum silence duration before declaring the connection dead. */
|
|
484
|
+
private static readonly RECEIVE_TIMEOUT = HEARTBEAT_BASE * HEARTBEAT_TIMEOUT_COUNT + HEARTBEAT_JITTER * HEARTBEAT_TIMEOUT_COUNT;
|
|
485
|
+
|
|
480
486
|
private startHeartbeat() {
|
|
487
|
+
this.lastReceivedAt = Date.now();
|
|
481
488
|
const scheduleNext = () => {
|
|
482
489
|
const interval = HEARTBEAT_BASE + Math.random() * HEARTBEAT_JITTER;
|
|
483
490
|
this.heartbeatTimer = setTimeout(() => {
|
|
484
491
|
if (this.closed) return;
|
|
492
|
+
|
|
493
|
+
// Watchdog: if no data received for a long time, the connection is dead
|
|
494
|
+
// regardless of what the heartbeat ping/pong state says.
|
|
495
|
+
const silenceMs = Date.now() - this.lastReceivedAt;
|
|
496
|
+
if (this.lastReceivedAt > 0 && silenceMs > Connection.RECEIVE_TIMEOUT) {
|
|
497
|
+
debug("heartbeat", `No data received for ${Math.round(silenceMs / 1000)}s from ${this.remoteNodeId ?? "unknown"}, closing`);
|
|
498
|
+
this.close(4002, "receive timeout");
|
|
499
|
+
return;
|
|
500
|
+
}
|
|
501
|
+
|
|
485
502
|
// Increment before checking: this ping is about to be sent and
|
|
486
503
|
// counts as outstanding until a pong arrives.
|
|
487
504
|
this.missedPongs++;
|
|
488
505
|
if (this.missedPongs >= HEARTBEAT_TIMEOUT_COUNT) {
|
|
506
|
+
debug("heartbeat", `${HEARTBEAT_TIMEOUT_COUNT} missed pongs from ${this.remoteNodeId ?? "unknown"}, closing`);
|
|
489
507
|
this.close(4002, "heartbeat timeout");
|
|
490
508
|
return;
|
|
491
509
|
}
|
|
492
|
-
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
510
|
+
|
|
511
|
+
// Send ping — wrapped in try-catch to prevent breaking the heartbeat chain.
|
|
512
|
+
// If send fails, the connection is dead; close it.
|
|
513
|
+
try {
|
|
514
|
+
if (this.transport.readyState !== WebSocket.OPEN) {
|
|
515
|
+
debug("heartbeat", `Transport not open (state=${this.transport.readyState}) for ${this.remoteNodeId ?? "unknown"}, closing`);
|
|
516
|
+
this.close(4002, "transport closed");
|
|
517
|
+
return;
|
|
518
|
+
}
|
|
519
|
+
this.lastPingSentAt = Date.now();
|
|
520
|
+
this.send({
|
|
521
|
+
type: "ping",
|
|
522
|
+
from: this.nodeId,
|
|
523
|
+
timestamp: this.lastPingSentAt,
|
|
524
|
+
} as AnyClusterFrame);
|
|
525
|
+
} catch (err) {
|
|
526
|
+
debug("heartbeat", `Ping send failed for ${this.remoteNodeId ?? "unknown"}: ${err}`);
|
|
527
|
+
this.close(4002, "ping send failed");
|
|
528
|
+
return;
|
|
529
|
+
}
|
|
530
|
+
|
|
498
531
|
scheduleNext();
|
|
499
532
|
}, interval);
|
|
500
533
|
};
|
package/src/handoff.ts
CHANGED
|
@@ -91,6 +91,7 @@ export class HandoffManager {
|
|
|
91
91
|
for (const [id, entry] of this.active) {
|
|
92
92
|
if (entry.status === "input_required" && entry.inputRequiredAt && now - entry.inputRequiredAt > INPUT_REQUIRED_TTL) {
|
|
93
93
|
this.active.delete(id);
|
|
94
|
+
this.sessionWatchers.delete(entry.sessionId);
|
|
94
95
|
// Notify the requester that the handoff timed out
|
|
95
96
|
this.peerManager.sendTo(entry.from, {
|
|
96
97
|
type: "handoff_res",
|
|
@@ -110,6 +111,7 @@ export class HandoffManager {
|
|
|
110
111
|
// (e.g. cancel arrived during input_required when no process was running)
|
|
111
112
|
if (entry.status === "canceled") {
|
|
112
113
|
this.active.delete(id);
|
|
114
|
+
this.sessionWatchers.delete(entry.sessionId);
|
|
113
115
|
}
|
|
114
116
|
}
|
|
115
117
|
// Clean stale inputRequiredTargets (requester side)
|
|
@@ -355,6 +357,7 @@ export class HandoffManager {
|
|
|
355
357
|
|
|
356
358
|
if (activeEntry.status === "canceled") {
|
|
357
359
|
this.active.delete(id);
|
|
360
|
+
this.sessionWatchers.delete(activeEntry.sessionId);
|
|
358
361
|
return;
|
|
359
362
|
}
|
|
360
363
|
|
|
@@ -401,11 +404,13 @@ export class HandoffManager {
|
|
|
401
404
|
|
|
402
405
|
if (activeEntry.status === "canceled") {
|
|
403
406
|
this.active.delete(id);
|
|
407
|
+
this.sessionWatchers.delete(activeEntry.sessionId);
|
|
404
408
|
return;
|
|
405
409
|
}
|
|
406
410
|
|
|
407
411
|
activeEntry.status = "completed";
|
|
408
412
|
this.active.delete(id);
|
|
413
|
+
this.sessionWatchers.delete(activeEntry.sessionId);
|
|
409
414
|
|
|
410
415
|
// Broadcast task completed
|
|
411
416
|
this.taskActivity.broadcast(id, "handoff", "completed", agent, activeEntry.startedAt);
|
|
@@ -429,10 +434,12 @@ export class HandoffManager {
|
|
|
429
434
|
} catch (err) {
|
|
430
435
|
if (activeEntry.status === "canceled") {
|
|
431
436
|
this.active.delete(id);
|
|
437
|
+
this.sessionWatchers.delete(activeEntry.sessionId);
|
|
432
438
|
return;
|
|
433
439
|
}
|
|
434
440
|
activeEntry.status = "failed";
|
|
435
441
|
this.active.delete(id);
|
|
442
|
+
this.sessionWatchers.delete(activeEntry.sessionId);
|
|
436
443
|
|
|
437
444
|
// Broadcast task failed
|
|
438
445
|
this.taskActivity.broadcast(
|
|
@@ -468,7 +475,7 @@ export class HandoffManager {
|
|
|
468
475
|
|
|
469
476
|
const reader = body.getReader();
|
|
470
477
|
const decoder = new TextDecoder();
|
|
471
|
-
|
|
478
|
+
const chunks: string[] = [];
|
|
472
479
|
let buffer = "";
|
|
473
480
|
|
|
474
481
|
try {
|
|
@@ -489,7 +496,7 @@ export class HandoffManager {
|
|
|
489
496
|
const parsed = JSON.parse(data);
|
|
490
497
|
const delta = parsed.choices?.[0]?.delta?.content;
|
|
491
498
|
if (delta) {
|
|
492
|
-
|
|
499
|
+
chunks.push(delta);
|
|
493
500
|
const streamFrame: HandoffStreamChunk = {
|
|
494
501
|
type: "handoff_stream",
|
|
495
502
|
id: handoffId,
|
|
@@ -531,7 +538,7 @@ export class HandoffManager {
|
|
|
531
538
|
this.peerManager.sendTo(to, doneFrame);
|
|
532
539
|
if (sessionId) this.sendToOtherWatchers(sessionId, to, doneFrame);
|
|
533
540
|
|
|
534
|
-
return
|
|
541
|
+
return chunks.join("");
|
|
535
542
|
}
|
|
536
543
|
|
|
537
544
|
/** Handle incoming input_required from remote (requester side). */
|
package/src/peer-manager.ts
CHANGED
|
@@ -28,6 +28,26 @@ import type { KeyPair } from "./crypto.ts";
|
|
|
28
28
|
const RECONNECT_BASE = 1_000;
|
|
29
29
|
const RECONNECT_MAX = 60_000;
|
|
30
30
|
|
|
31
|
+
/** Frame types that bypass dedup (streams share one id across chunks; responses share id with request). */
|
|
32
|
+
const SKIP_DEDUP_TYPES = new Set([
|
|
33
|
+
// Streaming
|
|
34
|
+
"model_stream", "handoff_stream", "acp_stream",
|
|
35
|
+
// Response frames (share id with their request)
|
|
36
|
+
"model_res", "tool_res", "tool_batch_res",
|
|
37
|
+
"handoff_res", "handoff_status_res", "handoff_input_required",
|
|
38
|
+
// Handoff control (reuse original handoff_req id)
|
|
39
|
+
"handoff_input", "handoff_cancel", "handoff_status",
|
|
40
|
+
// Diagnostics & approval
|
|
41
|
+
"diagnostic_exec_res", "diagnostic_status_res", "peer_approval_res",
|
|
42
|
+
// ACP responses
|
|
43
|
+
"acp_res", "acp_close_res", "acp_list_res", "acp_resume_res",
|
|
44
|
+
"acp_cancel_res", "acp_set_mode_res", "acp_get_modes_res",
|
|
45
|
+
"chat_history_res",
|
|
46
|
+
// Terminal
|
|
47
|
+
"terminal_open_res", "terminal_data", "terminal_resize",
|
|
48
|
+
"terminal_close", "terminal_close_res",
|
|
49
|
+
]);
|
|
50
|
+
|
|
31
51
|
/** Classify WebSocket close code into a human-readable reason. */
|
|
32
52
|
function classifyCloseReason(code: number, reason: string): string {
|
|
33
53
|
if (reason) return reason;
|
|
@@ -132,6 +152,16 @@ export class PeerManager extends EventEmitter<PeerManagerEvents> {
|
|
|
132
152
|
this.approvalManager = new PeerApprovalManager(approvalConfig, stateDir);
|
|
133
153
|
}
|
|
134
154
|
|
|
155
|
+
/** Update locally advertised ACP agents and re-broadcast to all peers. */
|
|
156
|
+
updateAcpAgents(agents: import("./types.ts").AcpAgentInfo[]) {
|
|
157
|
+
this.localCapabilities.acpAgents = agents;
|
|
158
|
+
this.router.updateLocalAcpAgents(agents);
|
|
159
|
+
// Re-sync all connected peers so they learn the updated capabilities
|
|
160
|
+
for (const conn of this.router.getDirectConnections()) {
|
|
161
|
+
this.sendPeerSync(conn);
|
|
162
|
+
}
|
|
163
|
+
}
|
|
164
|
+
|
|
135
165
|
// ── Lifecycle ──────────────────────────────────────────────────
|
|
136
166
|
async start() {
|
|
137
167
|
await this.approvalManager.load();
|
|
@@ -354,6 +384,12 @@ export class PeerManager extends EventEmitter<PeerManagerEvents> {
|
|
|
354
384
|
tryReconnect();
|
|
355
385
|
});
|
|
356
386
|
ws.addEventListener("close", (ev) => {
|
|
387
|
+
// Don't reconnect if this was a self-connection (peer URL points to ourselves).
|
|
388
|
+
// Without this guard, outbound detects self → closes → scheduleReconnect → loop.
|
|
389
|
+
if (ev.code === 4002 && ev.reason === "self-connection") {
|
|
390
|
+
debug("peer", `connectToPeer(${peer.nodeId}): self-connection, will not reconnect`);
|
|
391
|
+
return;
|
|
392
|
+
}
|
|
357
393
|
if (!lastError) {
|
|
358
394
|
lastError = classifyCloseReason(ev.code, ev.reason);
|
|
359
395
|
}
|
|
@@ -387,19 +423,17 @@ export class PeerManager extends EventEmitter<PeerManagerEvents> {
|
|
|
387
423
|
// Peer's persistent public key for TOFU identity binding
|
|
388
424
|
const peerPublicKey = conn.remoteIdentityKey ?? undefined;
|
|
389
425
|
|
|
390
|
-
// Prevent self-connection:
|
|
391
|
-
//
|
|
392
|
-
|
|
393
|
-
|
|
426
|
+
// Prevent self-connection: close immediately if the remote side authenticated
|
|
427
|
+
// with our own nodeId. For outbound this means the peer URL accidentally
|
|
428
|
+
// points to self; for inbound it means a remote node is (mis)using our nodeId.
|
|
429
|
+
// Exception: loopback connections with the same nodeId are local clients
|
|
430
|
+
// (Mac desktop app / iOS simulator) and are allowed through.
|
|
431
|
+
const isLocalClient = conn.role === "inbound" && nodeId === this.config.nodeId && isLoopback(ip);
|
|
432
|
+
if (nodeId === this.config.nodeId && !isLocalClient) {
|
|
433
|
+
debug("peer", `Self-connection detected (${conn.role}, nodeId=${nodeId}, ip=${ip}), closing`);
|
|
394
434
|
conn.close(4002, "self-connection");
|
|
395
435
|
return;
|
|
396
436
|
}
|
|
397
|
-
|
|
398
|
-
// Peer approval check (inbound only — outbound peers are explicitly configured)
|
|
399
|
-
// Skip approval for same-nodeId connections from localhost (local clients
|
|
400
|
-
// like Mac desktop app / iOS simulator). An attacker would need to already
|
|
401
|
-
// be on the same machine to exploit this, which is outside our threat model.
|
|
402
|
-
const isLocalClient = nodeId === this.config.nodeId && isLoopback(ip);
|
|
403
437
|
debug("approval", `onPeerAuthenticated: nodeId=${nodeId} role=${conn.role} isLocalClient=${isLocalClient} ip=${ip}`);
|
|
404
438
|
if (conn.role === "inbound" && !isLocalClient) {
|
|
405
439
|
// IP-level approval rate limiting (suppress noise from leaked tokens)
|
|
@@ -477,10 +511,22 @@ export class PeerManager extends EventEmitter<PeerManagerEvents> {
|
|
|
477
511
|
|
|
478
512
|
private completePeerJoin(conn: Connection, caps: NodeCapabilities) {
|
|
479
513
|
const nodeId = conn.remoteNodeId!;
|
|
514
|
+
|
|
515
|
+
// If there's an existing connection for this nodeId (e.g. peer reconnected
|
|
516
|
+
// while old TCP hadn't closed yet), close it AFTER overwriting the route so
|
|
517
|
+
// the stale-close guard in onPeerDisconnected correctly skips cleanup.
|
|
518
|
+
const oldRoute = this.router.getRoute(nodeId);
|
|
519
|
+
const oldConn = oldRoute?.connection;
|
|
520
|
+
|
|
480
521
|
// Same-nodeId 连接(如 Mac/iOS 桌面客户端)也正常注册路由,
|
|
481
522
|
// 使得 sendTo(nodeId) 能将响应帧路由回客户端连接。
|
|
482
523
|
this.router.addDirectPeer(nodeId, conn, caps);
|
|
483
524
|
|
|
525
|
+
if (oldConn && oldConn !== conn && oldConn.isOpen) {
|
|
526
|
+
debug("peer", `completePeerJoin(${nodeId}): closing replaced connection`);
|
|
527
|
+
oldConn.close(1000, "replaced by new connection");
|
|
528
|
+
}
|
|
529
|
+
|
|
484
530
|
conn.on("message", (frame) => this.onFrame(frame, conn));
|
|
485
531
|
conn.on("latency", (ms) => this.router.updateLatency(nodeId, ms));
|
|
486
532
|
conn.on("close", () => this.onPeerDisconnected(conn));
|
|
@@ -520,6 +566,16 @@ export class PeerManager extends EventEmitter<PeerManagerEvents> {
|
|
|
520
566
|
const nodeId = conn.remoteNodeId;
|
|
521
567
|
if (!nodeId) return;
|
|
522
568
|
|
|
569
|
+
// Guard: only remove the peer if THIS connection is still the active one
|
|
570
|
+
// in the router. When a peer reconnects, the new connection replaces the
|
|
571
|
+
// old one in the route table. If the old connection's close event fires
|
|
572
|
+
// afterwards, it must NOT remove the new connection's route.
|
|
573
|
+
const currentRoute = this.router.getRoute(nodeId);
|
|
574
|
+
if (currentRoute?.connection && currentRoute.connection !== conn) {
|
|
575
|
+
debug("peer", `onPeerDisconnected(${nodeId}): stale connection close, current route has different connection — skipping cleanup`);
|
|
576
|
+
return;
|
|
577
|
+
}
|
|
578
|
+
|
|
523
579
|
// Same-nodeId 本地客户端断开:仅清理路由,不广播 peer_leave
|
|
524
580
|
if (nodeId === this.config.nodeId) {
|
|
525
581
|
this.router.removePeer(nodeId);
|
|
@@ -557,32 +613,10 @@ export class PeerManager extends EventEmitter<PeerManagerEvents> {
|
|
|
557
613
|
return;
|
|
558
614
|
}
|
|
559
615
|
|
|
560
|
-
// Skip dedup for streaming and response frame types.
|
|
561
|
-
// Stream frames share one id across many chunks.
|
|
562
|
-
// Response frames (model_res, tool_res, handoff_res, etc.) share the same id
|
|
563
|
-
// as their request — without this exemption, a relay node that forwarded
|
|
564
|
-
// the request would mark the id as seen and then drop the returning response.
|
|
565
616
|
// Skip dedup for streaming chunks (same id across many chunks) and response
|
|
566
617
|
// frames (share id with their request — relay would otherwise drop the reply).
|
|
567
|
-
// handoff_input
|
|
568
|
-
|
|
569
|
-
const skipDedup = frame.type === "model_stream" || frame.type === "handoff_stream"
|
|
570
|
-
|| frame.type === "model_res" || frame.type === "tool_res"
|
|
571
|
-
|| frame.type === "handoff_res" || frame.type === "handoff_status_res"
|
|
572
|
-
|| frame.type === "handoff_input_required"
|
|
573
|
-
|| frame.type === "handoff_input" || frame.type === "handoff_cancel"
|
|
574
|
-
|| frame.type === "handoff_status"
|
|
575
|
-
|| frame.type === "diagnostic_exec_res" || frame.type === "diagnostic_status_res"
|
|
576
|
-
|| frame.type === "peer_approval_res"
|
|
577
|
-
|| frame.type === "acp_stream" || frame.type === "acp_res"
|
|
578
|
-
|| frame.type === "acp_close_res"
|
|
579
|
-
|| frame.type === "acp_list_res" || frame.type === "acp_resume_res"
|
|
580
|
-
|| frame.type === "acp_cancel_res"
|
|
581
|
-
|| frame.type === "acp_set_mode_res" || frame.type === "acp_get_modes_res"
|
|
582
|
-
|| frame.type === "terminal_open_res" || frame.type === "terminal_data"
|
|
583
|
-
|| frame.type === "terminal_resize" || frame.type === "terminal_close"
|
|
584
|
-
|| frame.type === "terminal_close_res";
|
|
585
|
-
if (frame.id && !skipDedup && this.router.isDuplicate(frame.id)) return;
|
|
618
|
+
// handoff_input/cancel/status reuse the original handoff_req id.
|
|
619
|
+
if (frame.id && !SKIP_DEDUP_TYPES.has(frame.type) && this.router.isDuplicate(frame.id)) return;
|
|
586
620
|
|
|
587
621
|
// Handle peer approval responses locally (don't emit to cluster-service)
|
|
588
622
|
if (frame.type === "peer_approval_res") {
|
package/src/router.ts
CHANGED
|
@@ -3,6 +3,7 @@ import type { Connection } from "./connection.ts";
|
|
|
3
3
|
|
|
4
4
|
const DEFAULT_TTL = 3;
|
|
5
5
|
const MAX_SEEN_FRAMES = 10_000;
|
|
6
|
+
const MAX_FAILED_REQUESTS = 5_000;
|
|
6
7
|
const ROTATE_INTERVAL = 60_000; // rotate dedup maps every 60s
|
|
7
8
|
|
|
8
9
|
export interface RouteEntry {
|
|
@@ -52,6 +53,11 @@ export class Router {
|
|
|
52
53
|
this.rotateTimer = setInterval(() => this.rotateSeenFrames(), ROTATE_INTERVAL);
|
|
53
54
|
}
|
|
54
55
|
|
|
56
|
+
/** Update locally advertised ACP agents (used after auto-detection). */
|
|
57
|
+
updateLocalAcpAgents(agents: AcpAgentInfo[]) {
|
|
58
|
+
this.localAcpAgents = agents;
|
|
59
|
+
}
|
|
60
|
+
|
|
55
61
|
/** Stop periodic cleanup. Call on shutdown. */
|
|
56
62
|
destroy() {
|
|
57
63
|
if (this.rotateTimer) {
|
|
@@ -300,6 +306,21 @@ export class Router {
|
|
|
300
306
|
* TTL defaults to 15 minutes — long enough for handoff timeouts. */
|
|
301
307
|
markFailed(requestId: string, ttlMs = 900_000) {
|
|
302
308
|
this.failedRequests.set(requestId, Date.now() + ttlMs);
|
|
309
|
+
// Evict entries when map grows too large: first expired, then FIFO
|
|
310
|
+
if (this.failedRequests.size > MAX_FAILED_REQUESTS) {
|
|
311
|
+
const now = Date.now();
|
|
312
|
+
// Pass 1: remove expired entries
|
|
313
|
+
for (const [id, expiresAt] of this.failedRequests) {
|
|
314
|
+
if (now > expiresAt) this.failedRequests.delete(id);
|
|
315
|
+
}
|
|
316
|
+
// Pass 2: if still over limit, remove oldest (insertion-order) entries
|
|
317
|
+
if (this.failedRequests.size > MAX_FAILED_REQUESTS) {
|
|
318
|
+
for (const [id] of this.failedRequests) {
|
|
319
|
+
if (this.failedRequests.size <= MAX_FAILED_REQUESTS) break;
|
|
320
|
+
this.failedRequests.delete(id);
|
|
321
|
+
}
|
|
322
|
+
}
|
|
323
|
+
}
|
|
303
324
|
}
|
|
304
325
|
|
|
305
326
|
isFailed(requestId: string): boolean {
|
package/src/terminal.ts
CHANGED
|
@@ -204,13 +204,15 @@ export class TerminalManager {
|
|
|
204
204
|
payload: { success: true, sessionId },
|
|
205
205
|
} as TerminalOpenResponse);
|
|
206
206
|
} catch (err) {
|
|
207
|
+
const errMsg = err instanceof Error ? err.message : String(err);
|
|
208
|
+
debug("terminal", `PTY spawn failed: shell=${shell} cwd=${frame.payload.cwd ?? "(default)"} error=${errMsg}`);
|
|
207
209
|
this.peerManager.sendTo(frame.from, {
|
|
208
210
|
type: "terminal_open_res",
|
|
209
211
|
id: frame.id,
|
|
210
212
|
from: nodeId,
|
|
211
213
|
to: frame.from,
|
|
212
214
|
timestamp: Date.now(),
|
|
213
|
-
payload: { success: false, error: `Failed to spawn PTY: ${
|
|
215
|
+
payload: { success: false, error: `Failed to spawn PTY (shell=${shell}): ${errMsg}` },
|
|
214
216
|
} as TerminalOpenResponse);
|
|
215
217
|
}
|
|
216
218
|
}
|