clawmatrix 0.4.0 → 0.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/acp-proxy.ts +9 -6
- package/src/cluster-service.ts +10 -1
- package/src/config.ts +2 -1
- package/src/handoff.ts +8 -0
- package/src/health-tracker.ts +40 -8
- package/src/model-proxy.ts +11 -0
- package/src/peer-manager.ts +286 -22
- package/src/router.ts +93 -1
- package/src/tools/cluster-peers.ts +2 -0
package/package.json
CHANGED
package/src/acp-proxy.ts
CHANGED
|
@@ -2781,13 +2781,15 @@ async function readFirstUserMessageFromTranscript(transcriptPath: string): Promi
|
|
|
2781
2781
|
const msg = parsed?.message;
|
|
2782
2782
|
if (msg?.role !== "user") continue;
|
|
2783
2783
|
// Extract text from content (string or array of content blocks)
|
|
2784
|
-
|
|
2785
|
-
if (
|
|
2784
|
+
let raw: string | null = null;
|
|
2785
|
+
if (typeof msg.content === "string") raw = msg.content;
|
|
2786
|
+
else if (Array.isArray(msg.content)) {
|
|
2786
2787
|
for (const block of msg.content) {
|
|
2787
|
-
if (typeof block === "string")
|
|
2788
|
-
if (block?.type === "text" && typeof block.text === "string")
|
|
2788
|
+
if (typeof block === "string") { raw = block; break; }
|
|
2789
|
+
if (block?.type === "text" && typeof block.text === "string") { raw = block.text; break; }
|
|
2789
2790
|
}
|
|
2790
2791
|
}
|
|
2792
|
+
if (raw) return stripEnvelope(stripInboundMetadata(raw)).slice(0, 120) || null;
|
|
2791
2793
|
} catch {
|
|
2792
2794
|
// skip malformed lines
|
|
2793
2795
|
}
|
|
@@ -2861,7 +2863,8 @@ async function fetchSessionListFromDisk(): Promise<AcpSessionInfo[]> {
|
|
|
2861
2863
|
try {
|
|
2862
2864
|
const content = await readFileText(storePath);
|
|
2863
2865
|
const entries: Record<string, { sessionId?: string; updatedAt?: number; displayName?: string; subject?: string; label?: string; acp?: { agent?: string } }> = JSON.parse(content);
|
|
2864
|
-
|
|
2866
|
+
// Use ACP agent type if available, otherwise default to "openclaw" for native sessions
|
|
2867
|
+
const agentDefault = "openclaw";
|
|
2865
2868
|
// Read all transcripts in parallel (first message + mtime)
|
|
2866
2869
|
const entryList = Object.entries(entries).filter(([, e]) => e.sessionId);
|
|
2867
2870
|
const transcriptResults = await Promise.all(
|
|
@@ -2885,7 +2888,7 @@ async function fetchSessionListFromDisk(): Promise<AcpSessionInfo[]> {
|
|
|
2885
2888
|
title: entry.displayName ?? entry.subject ?? entry.label ?? undefined,
|
|
2886
2889
|
description: firstMsg ?? undefined,
|
|
2887
2890
|
updatedAt: effectiveTs ? new Date(effectiveTs).toISOString() : undefined,
|
|
2888
|
-
agent,
|
|
2891
|
+
agent: entry.acp?.agent ?? agentDefault,
|
|
2889
2892
|
});
|
|
2890
2893
|
}
|
|
2891
2894
|
} catch {
|
package/src/cluster-service.ts
CHANGED
|
@@ -133,6 +133,12 @@ export class ClusterRuntime {
|
|
|
133
133
|
this.agentById.set(a.id, a);
|
|
134
134
|
for (const t of a.tags) this.agentsByTag.set(t, a);
|
|
135
135
|
}
|
|
136
|
+
|
|
137
|
+
// Wire up active task checker for route probing (prevents switching mid-task)
|
|
138
|
+
this.peerManager.setActiveTaskChecker((nodeId) => {
|
|
139
|
+
return this.handoffManager.hasPendingForNode(nodeId)
|
|
140
|
+
|| this.modelProxy.hasPendingForNode(nodeId);
|
|
141
|
+
});
|
|
136
142
|
}
|
|
137
143
|
|
|
138
144
|
async start() {
|
|
@@ -144,8 +150,11 @@ export class ClusterRuntime {
|
|
|
144
150
|
this.peerManager.on("peerConnected", (nodeId) => {
|
|
145
151
|
this.logger.info(`[clawmatrix] Peer connected: ${nodeId}`);
|
|
146
152
|
this.refreshDiscoveredModels();
|
|
147
|
-
|
|
153
|
+
// Init sync state BEFORE recording the event — recordPeerOnline triggers
|
|
154
|
+
// broadcastSync which must use the freshly initialized syncState.
|
|
155
|
+
// Reversing this order causes syncState corruption and infinite sync loops.
|
|
148
156
|
this.healthTracker.initPeerSync(nodeId);
|
|
157
|
+
this.healthTracker.recordPeerOnline(nodeId, "direct");
|
|
149
158
|
});
|
|
150
159
|
|
|
151
160
|
this.peerManager.on("peerDisconnected", (nodeId) => {
|
package/src/config.ts
CHANGED
|
@@ -51,7 +51,8 @@ const ModelInfoSchema = z.object({
|
|
|
51
51
|
|
|
52
52
|
const PeerConfigSchema = z.object({
|
|
53
53
|
nodeId: z.string(),
|
|
54
|
-
|
|
54
|
+
/** Single URL or array of URLs for multi-channel connections. */
|
|
55
|
+
url: z.union([z.string(), z.array(z.string()).min(1)]),
|
|
55
56
|
});
|
|
56
57
|
|
|
57
58
|
const ToolProxyConfigSchema = z.object({
|
package/src/handoff.ts
CHANGED
|
@@ -678,6 +678,14 @@ export class HandoffManager {
|
|
|
678
678
|
}
|
|
679
679
|
|
|
680
680
|
/** Clean up on shutdown. */
|
|
681
|
+
/** Check if there are pending outbound handoffs targeting a specific node. */
|
|
682
|
+
hasPendingForNode(nodeId: string): boolean {
|
|
683
|
+
for (const p of this.pending.values()) {
|
|
684
|
+
if (p.targetNodeId === nodeId) return true;
|
|
685
|
+
}
|
|
686
|
+
return false;
|
|
687
|
+
}
|
|
688
|
+
|
|
681
689
|
destroy() {
|
|
682
690
|
if (this.staleCleanupTimer) {
|
|
683
691
|
clearInterval(this.staleCleanupTimer);
|
package/src/health-tracker.ts
CHANGED
|
@@ -79,6 +79,12 @@ export class HealthTracker {
|
|
|
79
79
|
private compactTimer: ReturnType<typeof setInterval> | null = null;
|
|
80
80
|
private saveTimer: ReturnType<typeof setTimeout> | null = null;
|
|
81
81
|
private dirty = false;
|
|
82
|
+
/** Debounce timer for broadcastSync (prevents rapid-fire broadcasts). */
|
|
83
|
+
private broadcastTimer: ReturnType<typeof setTimeout> | null = null;
|
|
84
|
+
/** Round counter per peer to detect non-converging sync loops. */
|
|
85
|
+
private syncRounds = new Map<string, number>();
|
|
86
|
+
private static readonly MAX_SYNC_ROUNDS = 10;
|
|
87
|
+
private static readonly BROADCAST_DEBOUNCE = 500; // ms
|
|
82
88
|
|
|
83
89
|
constructor(opts: HealthTrackerOptions) {
|
|
84
90
|
this.nodeId = opts.nodeId;
|
|
@@ -119,6 +125,10 @@ export class HealthTracker {
|
|
|
119
125
|
clearInterval(this.compactTimer);
|
|
120
126
|
this.compactTimer = null;
|
|
121
127
|
}
|
|
128
|
+
if (this.broadcastTimer) {
|
|
129
|
+
clearTimeout(this.broadcastTimer);
|
|
130
|
+
this.broadcastTimer = null;
|
|
131
|
+
}
|
|
122
132
|
if (this.saveTimer) {
|
|
123
133
|
clearTimeout(this.saveTimer);
|
|
124
134
|
this.saveTimer = null;
|
|
@@ -165,6 +175,16 @@ export class HealthTracker {
|
|
|
165
175
|
const message = new Uint8Array(Buffer.from(frame.payload.data, "base64"));
|
|
166
176
|
const syncKey = peerId;
|
|
167
177
|
|
|
178
|
+
// Guard against non-converging sync loops
|
|
179
|
+
const rounds = (this.syncRounds.get(peerId) ?? 0) + 1;
|
|
180
|
+
if (rounds > HealthTracker.MAX_SYNC_ROUNDS) {
|
|
181
|
+
debug(TAG, `sync with ${peerId} exceeded ${HealthTracker.MAX_SYNC_ROUNDS} rounds, resetting`);
|
|
182
|
+
this.syncStates.set(syncKey, Automerge.initSyncState());
|
|
183
|
+
this.syncRounds.delete(peerId);
|
|
184
|
+
return;
|
|
185
|
+
}
|
|
186
|
+
this.syncRounds.set(peerId, rounds);
|
|
187
|
+
|
|
168
188
|
try {
|
|
169
189
|
const syncState = this.syncStates.get(syncKey) ?? Automerge.initSyncState();
|
|
170
190
|
const [newDoc, newSyncState] = Automerge.receiveSyncMessage(this.doc, syncState, message);
|
|
@@ -172,18 +192,19 @@ export class HealthTracker {
|
|
|
172
192
|
this.syncStates.set(syncKey, newSyncState);
|
|
173
193
|
this.scheduleSave();
|
|
174
194
|
|
|
175
|
-
// Send our response
|
|
195
|
+
// Send our response (only if there's something to send)
|
|
176
196
|
this.sendSyncMessage(peerId);
|
|
177
197
|
} catch (err) {
|
|
178
198
|
debug(TAG, `error handling sync from ${peerId}: ${err}`);
|
|
179
199
|
}
|
|
180
200
|
}
|
|
181
201
|
|
|
182
|
-
/**
|
|
202
|
+
/** Initialize sync state for a peer (called on peer connect).
|
|
203
|
+
* Does NOT send a message — the subsequent recordPeerOnline → broadcastSync handles that.
|
|
204
|
+
* Sending here would race with broadcastSync and corrupt the sync state. */
|
|
183
205
|
initPeerSync(peerId: string) {
|
|
184
206
|
if (peerId === this.nodeId) return;
|
|
185
207
|
this.syncStates.set(peerId, Automerge.initSyncState());
|
|
186
|
-
this.sendSyncMessage(peerId);
|
|
187
208
|
}
|
|
188
209
|
|
|
189
210
|
/** Clean up sync state for a disconnected peer. */
|
|
@@ -196,7 +217,11 @@ export class HealthTracker {
|
|
|
196
217
|
const [newSyncState, message] = Automerge.generateSyncMessage(this.doc, syncState);
|
|
197
218
|
this.syncStates.set(peerId, newSyncState);
|
|
198
219
|
|
|
199
|
-
if (!message)
|
|
220
|
+
if (!message) {
|
|
221
|
+
// Sync converged — reset round counter
|
|
222
|
+
this.syncRounds.delete(peerId);
|
|
223
|
+
return;
|
|
224
|
+
}
|
|
200
225
|
|
|
201
226
|
debug(TAG, `sending health sync to ${peerId} (${message.byteLength} bytes)`);
|
|
202
227
|
|
|
@@ -214,10 +239,17 @@ export class HealthTracker {
|
|
|
214
239
|
}
|
|
215
240
|
|
|
216
241
|
private broadcastSync() {
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
242
|
+
// Debounce: multiple events in quick succession → single broadcast
|
|
243
|
+
if (this.broadcastTimer) return;
|
|
244
|
+
this.broadcastTimer = setTimeout(() => {
|
|
245
|
+
this.broadcastTimer = null;
|
|
246
|
+
// Reset round counters — new broadcast starts fresh sync cycle
|
|
247
|
+
this.syncRounds.clear();
|
|
248
|
+
const peers = this.peerManager.router.getAllPeers();
|
|
249
|
+
for (const peer of peers) {
|
|
250
|
+
this.sendSyncMessage(peer.nodeId);
|
|
251
|
+
}
|
|
252
|
+
}, HealthTracker.BROADCAST_DEBOUNCE);
|
|
221
253
|
}
|
|
222
254
|
|
|
223
255
|
// ── Timeline aggregation ──────────────────────────────────
|
package/src/model-proxy.ts
CHANGED
|
@@ -43,6 +43,7 @@ interface PendingModelReq {
|
|
|
43
43
|
stream: boolean;
|
|
44
44
|
responseFormat: ResponseFormat;
|
|
45
45
|
model?: string;
|
|
46
|
+
targetNodeId?: string;
|
|
46
47
|
controller?: ReadableStreamDefaultController;
|
|
47
48
|
encoder?: TextEncoder;
|
|
48
49
|
/** Whether real content (not just setup events) has been sent to the stream. */
|
|
@@ -356,6 +357,14 @@ export class ModelProxy {
|
|
|
356
357
|
this.httpServer.listen(this.config.proxyPort, "127.0.0.1");
|
|
357
358
|
}
|
|
358
359
|
|
|
360
|
+
/** Check if there are pending model requests targeting a specific node. */
|
|
361
|
+
hasPendingForNode(nodeId: string): boolean {
|
|
362
|
+
for (const p of this.pending.values()) {
|
|
363
|
+
if (p.targetNodeId === nodeId) return true;
|
|
364
|
+
}
|
|
365
|
+
return false;
|
|
366
|
+
}
|
|
367
|
+
|
|
359
368
|
stop() {
|
|
360
369
|
if (this.cacheCleanupTimer) {
|
|
361
370
|
clearInterval(this.cacheCleanupTimer);
|
|
@@ -653,6 +662,7 @@ export class ModelProxy {
|
|
|
653
662
|
this.pending.set(requestId, {
|
|
654
663
|
resolve: () => {}, reject: () => {},
|
|
655
664
|
timer, stream: true, responseFormat, model,
|
|
665
|
+
targetNodeId,
|
|
656
666
|
controller, encoder,
|
|
657
667
|
hasContent: false,
|
|
658
668
|
failoverCandidates,
|
|
@@ -827,6 +837,7 @@ export class ModelProxy {
|
|
|
827
837
|
this.pending.set(requestId, {
|
|
828
838
|
resolve: resolve as (v: unknown) => void,
|
|
829
839
|
reject, timer, stream: false, responseFormat,
|
|
840
|
+
targetNodeId,
|
|
830
841
|
});
|
|
831
842
|
|
|
832
843
|
const sent = this.peerManager.sendTo(targetNodeId, frame);
|
package/src/peer-manager.ts
CHANGED
|
@@ -91,8 +91,19 @@ export class PeerManager extends EventEmitter<PeerManagerEvents> {
|
|
|
91
91
|
private localCapabilities: NodeCapabilities;
|
|
92
92
|
private httpServer: Server | null = null;
|
|
93
93
|
private wss: WebSocketServer | null = null;
|
|
94
|
+
/** Reconnect timers keyed by `nodeId|url` for per-channel reconnection. */
|
|
94
95
|
private reconnectTimers = new Map<string, ReturnType<typeof setTimeout>>();
|
|
95
96
|
private reconnectAttempts = new Map<string, number>();
|
|
97
|
+
/** Track which nodeIds have already completed the full peer join (for multi-channel). */
|
|
98
|
+
private joinedPeers = new Set<string>();
|
|
99
|
+
/** All configured URLs per peer (for multi-URL peers). */
|
|
100
|
+
private peerUrls = new Map<string, string[]>();
|
|
101
|
+
/** Currently active URL per peer. */
|
|
102
|
+
private activeUrls = new Map<string, string>();
|
|
103
|
+
/** Last probe latency per URL (ms). */
|
|
104
|
+
private urlProbeLatencies = new Map<string, number>();
|
|
105
|
+
/** Route probe interval timer. */
|
|
106
|
+
private probeTimer: ReturnType<typeof setInterval> | null = null;
|
|
96
107
|
/** Deferred disconnect timers — grace period before broadcasting peer_leave. */
|
|
97
108
|
private disconnectGraceTimers = new Map<string, ReturnType<typeof setTimeout>>();
|
|
98
109
|
private stopped = false;
|
|
@@ -191,10 +202,16 @@ export class PeerManager extends EventEmitter<PeerManagerEvents> {
|
|
|
191
202
|
for (const peer of this.config.peers) {
|
|
192
203
|
this.connectToPeer(peer);
|
|
193
204
|
}
|
|
205
|
+
// Start route probing for peers with multiple URLs
|
|
206
|
+
this.startRouteProbing();
|
|
194
207
|
}
|
|
195
208
|
|
|
196
209
|
async stop() {
|
|
197
210
|
this.stopped = true;
|
|
211
|
+
if (this.probeTimer) {
|
|
212
|
+
clearInterval(this.probeTimer);
|
|
213
|
+
this.probeTimer = null;
|
|
214
|
+
}
|
|
198
215
|
if (this.gossipDebounceTimer) {
|
|
199
216
|
clearTimeout(this.gossipDebounceTimer);
|
|
200
217
|
this.gossipDebounceTimer = null;
|
|
@@ -226,11 +243,13 @@ export class PeerManager extends EventEmitter<PeerManagerEvents> {
|
|
|
226
243
|
this.rateLimiter.destroy();
|
|
227
244
|
this.approvalManager.destroy();
|
|
228
245
|
this.router.destroy();
|
|
246
|
+
this.joinedPeers.clear();
|
|
229
247
|
}
|
|
230
248
|
|
|
231
249
|
/** Force-stop without broadcasting or waiting — used when graceful stop times out. */
|
|
232
250
|
forceStop() {
|
|
233
251
|
this.stopped = true;
|
|
252
|
+
if (this.probeTimer) { clearInterval(this.probeTimer); this.probeTimer = null; }
|
|
234
253
|
for (const timer of this.reconnectTimers.values()) clearTimeout(timer);
|
|
235
254
|
this.reconnectTimers.clear();
|
|
236
255
|
for (const [, timer] of this.disconnectGraceTimers) clearTimeout(timer);
|
|
@@ -246,6 +265,7 @@ export class PeerManager extends EventEmitter<PeerManagerEvents> {
|
|
|
246
265
|
this.rateLimiter.destroy();
|
|
247
266
|
this.approvalManager.destroy();
|
|
248
267
|
this.router.destroy();
|
|
268
|
+
this.joinedPeers.clear();
|
|
249
269
|
}
|
|
250
270
|
|
|
251
271
|
private closeServers() {
|
|
@@ -264,6 +284,143 @@ export class PeerManager extends EventEmitter<PeerManagerEvents> {
|
|
|
264
284
|
}
|
|
265
285
|
}
|
|
266
286
|
|
|
287
|
+
// ── Route probing (for multi-URL peers) ──────────────────────────
|
|
288
|
+
private static readonly PROBE_INTERVAL = 3_600_000; // 1 hour
|
|
289
|
+
/** Minimum improvement ratio to trigger a route switch. */
|
|
290
|
+
private static readonly SWITCH_THRESHOLD = 0.7; // new must be ≤70% of current
|
|
291
|
+
|
|
292
|
+
private startRouteProbing() {
|
|
293
|
+
// Only probe if any peer has multiple URLs
|
|
294
|
+
const hasMultiUrl = [...this.peerUrls.values()].some((urls) => urls.length > 1);
|
|
295
|
+
if (!hasMultiUrl) return;
|
|
296
|
+
|
|
297
|
+
this.probeTimer = setInterval(() => this.probeAllRoutes(), PeerManager.PROBE_INTERVAL);
|
|
298
|
+
}
|
|
299
|
+
|
|
300
|
+
private async probeAllRoutes() {
|
|
301
|
+
for (const [nodeId, urls] of this.peerUrls) {
|
|
302
|
+
if (urls.length <= 1) continue;
|
|
303
|
+
const activeUrl = this.activeUrls.get(nodeId);
|
|
304
|
+
for (const url of urls) {
|
|
305
|
+
if (url === activeUrl) continue;
|
|
306
|
+
// Probe non-active URLs
|
|
307
|
+
const latency = await this.probeUrl(url);
|
|
308
|
+
if (latency !== null) {
|
|
309
|
+
this.urlProbeLatencies.set(url, latency);
|
|
310
|
+
}
|
|
311
|
+
}
|
|
312
|
+
// Also record active connection's real latency
|
|
313
|
+
if (activeUrl) {
|
|
314
|
+
const route = this.router.getRoute(nodeId);
|
|
315
|
+
if (route && route.latencyMs > 0) {
|
|
316
|
+
this.urlProbeLatencies.set(activeUrl, route.latencyMs);
|
|
317
|
+
}
|
|
318
|
+
}
|
|
319
|
+
// Evaluate if we should switch
|
|
320
|
+
this.evaluateRouteSwitch(nodeId);
|
|
321
|
+
}
|
|
322
|
+
}
|
|
323
|
+
|
|
324
|
+
/**
|
|
325
|
+
* Probe a URL by measuring HTTP response time (WS server also serves HTTP).
|
|
326
|
+
* Returns latency in ms, or null if unreachable.
|
|
327
|
+
*/
|
|
328
|
+
private async probeUrl(wsUrl: string): Promise<number | null> {
|
|
329
|
+
try {
|
|
330
|
+
const httpUrl = wsUrl.replace(/^ws(s?):\/\//, "http$1://");
|
|
331
|
+
const start = Date.now();
|
|
332
|
+
const controller = new AbortController();
|
|
333
|
+
const timeout = setTimeout(() => controller.abort(), 5_000);
|
|
334
|
+
try {
|
|
335
|
+
const res = await fetch(httpUrl, {
|
|
336
|
+
method: "HEAD",
|
|
337
|
+
signal: controller.signal,
|
|
338
|
+
// @ts-ignore — Node.js 18+ supports this
|
|
339
|
+
keepalive: false,
|
|
340
|
+
});
|
|
341
|
+
clearTimeout(timeout);
|
|
342
|
+
if (res.ok || res.status === 200) {
|
|
343
|
+
return Date.now() - start;
|
|
344
|
+
}
|
|
345
|
+
return null;
|
|
346
|
+
} catch {
|
|
347
|
+
clearTimeout(timeout);
|
|
348
|
+
return null;
|
|
349
|
+
}
|
|
350
|
+
} catch {
|
|
351
|
+
return null;
|
|
352
|
+
}
|
|
353
|
+
}
|
|
354
|
+
|
|
355
|
+
/** Check if we should switch to a better URL for a peer. */
|
|
356
|
+
private evaluateRouteSwitch(nodeId: string) {
|
|
357
|
+
const urls = this.peerUrls.get(nodeId);
|
|
358
|
+
const activeUrl = this.activeUrls.get(nodeId);
|
|
359
|
+
if (!urls || !activeUrl || urls.length <= 1) return;
|
|
360
|
+
|
|
361
|
+
const currentLatency = this.urlProbeLatencies.get(activeUrl);
|
|
362
|
+
if (!currentLatency || currentLatency <= 0) return;
|
|
363
|
+
|
|
364
|
+
// Find best alternative
|
|
365
|
+
let bestUrl: string | undefined;
|
|
366
|
+
let bestLatency = Infinity;
|
|
367
|
+
for (const url of urls) {
|
|
368
|
+
if (url === activeUrl) continue;
|
|
369
|
+
const lat = this.urlProbeLatencies.get(url);
|
|
370
|
+
if (lat !== undefined && lat < bestLatency) {
|
|
371
|
+
bestLatency = lat;
|
|
372
|
+
bestUrl = url;
|
|
373
|
+
}
|
|
374
|
+
}
|
|
375
|
+
|
|
376
|
+
if (!bestUrl || bestLatency >= currentLatency * PeerManager.SWITCH_THRESHOLD) return;
|
|
377
|
+
|
|
378
|
+
// Check if there are active tasks — don't switch mid-task
|
|
379
|
+
if (this.hasActiveTasks(nodeId)) {
|
|
380
|
+
debug("probe", `${nodeId}: better route found (${activeUrl} ${currentLatency}ms → ${bestUrl} ${bestLatency}ms) but has active tasks, deferring`);
|
|
381
|
+
return;
|
|
382
|
+
}
|
|
383
|
+
|
|
384
|
+
debug("probe", `${nodeId}: switching route ${activeUrl} ${currentLatency}ms → ${bestUrl} ${bestLatency}ms`);
|
|
385
|
+
this.switchRoute(nodeId, bestUrl);
|
|
386
|
+
}
|
|
387
|
+
|
|
388
|
+
/** Callback to check if there are active tasks involving a peer. Set by ClusterRuntime. */
|
|
389
|
+
private activeTaskChecker: ((nodeId: string) => boolean) | null = null;
|
|
390
|
+
|
|
391
|
+
/** Register a callback to check for active tasks (used to prevent route switches mid-task). */
|
|
392
|
+
setActiveTaskChecker(checker: (nodeId: string) => boolean) {
|
|
393
|
+
this.activeTaskChecker = checker;
|
|
394
|
+
}
|
|
395
|
+
|
|
396
|
+
/** Check if there are active tasks involving a peer (handoffs, model requests, etc.). */
|
|
397
|
+
private hasActiveTasks(nodeId: string): boolean {
|
|
398
|
+
return this.activeTaskChecker?.(nodeId) ?? false;
|
|
399
|
+
}
|
|
400
|
+
|
|
401
|
+
/** Switch the active route for a peer to a new URL. */
|
|
402
|
+
private switchRoute(nodeId: string, newUrl: string) {
|
|
403
|
+
this.activeUrls.set(nodeId, newUrl);
|
|
404
|
+
// Connect to the new URL — the new connection will authenticate and join
|
|
405
|
+
// as an additional channel briefly, then we close the old one.
|
|
406
|
+
const oldRoute = this.router.getRoute(nodeId);
|
|
407
|
+
const oldConn = oldRoute?.connection;
|
|
408
|
+
|
|
409
|
+
this.connectToChannel(nodeId, newUrl);
|
|
410
|
+
|
|
411
|
+
// Close the old connection after a short delay (give new connection time to establish)
|
|
412
|
+
if (oldConn?.isOpen) {
|
|
413
|
+
setTimeout(() => {
|
|
414
|
+
// Only close if a new connection has taken over
|
|
415
|
+
const currentRoute = this.router.getRoute(nodeId);
|
|
416
|
+
if (currentRoute?.connection && currentRoute.connection !== oldConn) {
|
|
417
|
+
debug("probe", `${nodeId}: closing old channel after route switch`);
|
|
418
|
+
oldConn.close(1000, "route switch");
|
|
419
|
+
}
|
|
420
|
+
}, 5_000);
|
|
421
|
+
}
|
|
422
|
+
}
|
|
423
|
+
|
|
267
424
|
/** Set an HTTP request handler for non-WebSocket requests (e.g. web dashboard). */
|
|
268
425
|
private httpRequestHandler: ((req: IncomingMessage, res: ServerResponse) => boolean) | null = null;
|
|
269
426
|
|
|
@@ -380,26 +537,52 @@ export class PeerManager extends EventEmitter<PeerManagerEvents> {
|
|
|
380
537
|
|
|
381
538
|
// ── Outbound connections (standard WebSocket) ──────────────────
|
|
382
539
|
private connectToPeer(peer: PeerConfig) {
|
|
540
|
+
const urls = Array.isArray(peer.url) ? peer.url : [peer.url];
|
|
541
|
+
this.peerUrls.set(peer.nodeId, urls);
|
|
542
|
+
// Connect to the first URL (or best known from probes)
|
|
543
|
+
const bestUrl = this.pickBestUrl(peer.nodeId, urls);
|
|
544
|
+
this.activeUrls.set(peer.nodeId, bestUrl);
|
|
545
|
+
this.connectToChannel(peer.nodeId, bestUrl);
|
|
546
|
+
}
|
|
547
|
+
|
|
548
|
+
/** Pick the best URL for a peer based on probe latencies. Falls back to first URL. */
|
|
549
|
+
private pickBestUrl(nodeId: string, urls: string[]): string {
|
|
550
|
+
if (urls.length <= 1) return urls[0];
|
|
551
|
+
let bestUrl = urls[0];
|
|
552
|
+
let bestLatency = Infinity;
|
|
553
|
+
for (const url of urls) {
|
|
554
|
+
const lat = this.urlProbeLatencies.get(url);
|
|
555
|
+
if (lat !== undefined && lat < bestLatency) {
|
|
556
|
+
bestLatency = lat;
|
|
557
|
+
bestUrl = url;
|
|
558
|
+
}
|
|
559
|
+
}
|
|
560
|
+
return bestUrl;
|
|
561
|
+
}
|
|
562
|
+
|
|
563
|
+
/** Connect a single channel (URL) for a peer. */
|
|
564
|
+
private connectToChannel(nodeId: string, url: string) {
|
|
383
565
|
if (this.stopped) {
|
|
384
|
-
debug("peer", `
|
|
566
|
+
debug("peer", `connectToChannel(${nodeId}): skipped (stopped)`);
|
|
385
567
|
return;
|
|
386
568
|
}
|
|
387
569
|
|
|
388
|
-
const
|
|
389
|
-
|
|
570
|
+
const channelKey = `${nodeId}|${url}`;
|
|
571
|
+
const attempt = this.reconnectAttempts.get(channelKey) ?? 0;
|
|
572
|
+
debug("peer", `connectToChannel(${nodeId}): attempt=${attempt} url=${url}`);
|
|
390
573
|
|
|
391
574
|
// Use a common WS subprotocol for traffic disguise
|
|
392
575
|
let ws: WebSocket;
|
|
393
576
|
try {
|
|
394
|
-
ws = new WebSocket(
|
|
577
|
+
ws = new WebSocket(url, ["graphql-transport-ws"]);
|
|
395
578
|
} catch (err) {
|
|
396
|
-
debug("peer", `
|
|
397
|
-
this.
|
|
579
|
+
debug("peer", `connectToChannel(${nodeId}): WebSocket constructor threw: ${err}`);
|
|
580
|
+
this.scheduleChannelReconnect(nodeId, url);
|
|
398
581
|
return;
|
|
399
582
|
}
|
|
400
583
|
|
|
401
584
|
ws.addEventListener("open", () => {
|
|
402
|
-
debug("peer", `
|
|
585
|
+
debug("peer", `connectToChannel(${nodeId}): ws open url=${url}`);
|
|
403
586
|
const conn = new Connection(
|
|
404
587
|
ws,
|
|
405
588
|
"outbound",
|
|
@@ -411,8 +594,8 @@ export class PeerManager extends EventEmitter<PeerManagerEvents> {
|
|
|
411
594
|
conn.bindWebSocket(ws);
|
|
412
595
|
|
|
413
596
|
conn.on("authenticated", (caps) => {
|
|
414
|
-
debug("peer", `
|
|
415
|
-
this.reconnectAttempts.delete(
|
|
597
|
+
debug("peer", `connectToChannel(${nodeId}): authenticated url=${url}`);
|
|
598
|
+
this.reconnectAttempts.delete(channelKey);
|
|
416
599
|
this.onPeerAuthenticated(conn, caps);
|
|
417
600
|
});
|
|
418
601
|
|
|
@@ -426,7 +609,7 @@ export class PeerManager extends EventEmitter<PeerManagerEvents> {
|
|
|
426
609
|
const tryReconnect = () => {
|
|
427
610
|
if (!reconnectScheduled) {
|
|
428
611
|
reconnectScheduled = true;
|
|
429
|
-
this.
|
|
612
|
+
this.scheduleChannelReconnect(nodeId, url, lastError);
|
|
430
613
|
}
|
|
431
614
|
};
|
|
432
615
|
|
|
@@ -438,7 +621,7 @@ export class PeerManager extends EventEmitter<PeerManagerEvents> {
|
|
|
438
621
|
// Don't reconnect if this was a self-connection (peer URL points to ourselves).
|
|
439
622
|
// Without this guard, outbound detects self → closes → scheduleReconnect → loop.
|
|
440
623
|
if (ev.code === 4002 && ev.reason === "self-connection") {
|
|
441
|
-
debug("peer", `
|
|
624
|
+
debug("peer", `connectToChannel(${nodeId}): self-connection, will not reconnect`);
|
|
442
625
|
return;
|
|
443
626
|
}
|
|
444
627
|
if (!lastError) {
|
|
@@ -448,24 +631,65 @@ export class PeerManager extends EventEmitter<PeerManagerEvents> {
|
|
|
448
631
|
});
|
|
449
632
|
}
|
|
450
633
|
|
|
451
|
-
private
|
|
634
|
+
private scheduleChannelReconnect(nodeId: string, url: string, reason?: string) {
|
|
452
635
|
if (this.stopped) {
|
|
453
|
-
debug("peer", `
|
|
636
|
+
debug("peer", `scheduleChannelReconnect(${nodeId}): skipped (stopped)`);
|
|
454
637
|
return;
|
|
455
638
|
}
|
|
456
|
-
|
|
639
|
+
const channelKey = `${nodeId}|${url}`;
|
|
640
|
+
if (this.reconnectTimers.has(channelKey)) return;
|
|
641
|
+
|
|
642
|
+
const attempt = this.reconnectAttempts.get(channelKey) ?? 0;
|
|
643
|
+
|
|
644
|
+
// On first failure, try an alternative URL immediately (failover)
|
|
645
|
+
if (attempt === 0) {
|
|
646
|
+
const urls = this.peerUrls.get(nodeId);
|
|
647
|
+
if (urls && urls.length > 1) {
|
|
648
|
+
const altUrl = this.pickNextUrl(nodeId, url, urls);
|
|
649
|
+
if (altUrl && altUrl !== url) {
|
|
650
|
+
debug("peer", `scheduleChannelReconnect(${nodeId}): failover ${url} → ${altUrl}`);
|
|
651
|
+
this.activeUrls.set(nodeId, altUrl);
|
|
652
|
+
this.reconnectAttempts.set(channelKey, attempt + 1);
|
|
653
|
+
// Connect to alternative immediately, schedule original for later
|
|
654
|
+
this.connectToChannel(nodeId, altUrl);
|
|
655
|
+
const timer = setTimeout(() => {
|
|
656
|
+
this.reconnectTimers.delete(channelKey);
|
|
657
|
+
// Only reconnect original URL if not already connected
|
|
658
|
+
if (!this.joinedPeers.has(nodeId)) {
|
|
659
|
+
this.connectToChannel(nodeId, url);
|
|
660
|
+
}
|
|
661
|
+
}, RECONNECT_MAX);
|
|
662
|
+
this.reconnectTimers.set(channelKey, timer);
|
|
663
|
+
return;
|
|
664
|
+
}
|
|
665
|
+
}
|
|
666
|
+
}
|
|
457
667
|
|
|
458
|
-
const attempt = this.reconnectAttempts.get(peer.nodeId) ?? 0;
|
|
459
668
|
const delay = Math.min(RECONNECT_BASE * 2 ** attempt, RECONNECT_MAX);
|
|
460
|
-
this.reconnectAttempts.set(
|
|
669
|
+
this.reconnectAttempts.set(channelKey, attempt + 1);
|
|
461
670
|
const tag = reason ? ` reason="${reason}"` : "";
|
|
462
|
-
debug("peer", `
|
|
671
|
+
debug("peer", `scheduleChannelReconnect(${nodeId}): attempt=${attempt} delay=${delay}ms url=${url}${tag}`);
|
|
463
672
|
|
|
464
673
|
const timer = setTimeout(() => {
|
|
465
|
-
this.reconnectTimers.delete(
|
|
466
|
-
this.
|
|
674
|
+
this.reconnectTimers.delete(channelKey);
|
|
675
|
+
this.connectToChannel(nodeId, url);
|
|
467
676
|
}, delay);
|
|
468
|
-
this.reconnectTimers.set(
|
|
677
|
+
this.reconnectTimers.set(channelKey, timer);
|
|
678
|
+
}
|
|
679
|
+
|
|
680
|
+
/** Pick the next best URL to try, excluding the current one. */
|
|
681
|
+
private pickNextUrl(nodeId: string, currentUrl: string, urls: string[]): string | undefined {
|
|
682
|
+
let bestUrl: string | undefined;
|
|
683
|
+
let bestLatency = Infinity;
|
|
684
|
+
for (const url of urls) {
|
|
685
|
+
if (url === currentUrl) continue;
|
|
686
|
+
const lat = this.urlProbeLatencies.get(url) ?? 10_000;
|
|
687
|
+
if (lat < bestLatency) {
|
|
688
|
+
bestLatency = lat;
|
|
689
|
+
bestUrl = url;
|
|
690
|
+
}
|
|
691
|
+
}
|
|
692
|
+
return bestUrl ?? urls.find((u) => u !== currentUrl);
|
|
469
693
|
}
|
|
470
694
|
|
|
471
695
|
// ── Peer lifecycle ─────────────────────────────────────────────
|
|
@@ -565,6 +789,23 @@ export class PeerManager extends EventEmitter<PeerManagerEvents> {
|
|
|
565
789
|
// Cancel disconnect grace timer if the peer is reconnecting
|
|
566
790
|
const wasInGrace = this.cancelDisconnectGrace(nodeId);
|
|
567
791
|
|
|
792
|
+
// Check if this peer already has an active connection (additional channel)
|
|
793
|
+
const isAdditionalChannel = this.joinedPeers.has(nodeId);
|
|
794
|
+
|
|
795
|
+
if (isAdditionalChannel) {
|
|
796
|
+
// Additional channel — just add to the channel pool, no peer_join broadcast
|
|
797
|
+
this.router.addChannel(nodeId, conn);
|
|
798
|
+
conn.on("message", (frame) => this.onFrame(frame, conn));
|
|
799
|
+
conn.on("latency", () => this.router.updateActiveChannel(nodeId));
|
|
800
|
+
conn.on("close", () => this.onChannelDisconnected(conn));
|
|
801
|
+
const channelCount = this.router.getChannelCount(nodeId);
|
|
802
|
+
debug("peer", `completePeerJoin(${nodeId}): additional channel added (total=${channelCount})`);
|
|
803
|
+
audit("channel_add", { nodeId, detail: `channels=${channelCount}` });
|
|
804
|
+
return;
|
|
805
|
+
}
|
|
806
|
+
|
|
807
|
+
// First channel — full join flow
|
|
808
|
+
|
|
568
809
|
// If there's an existing connection for this nodeId (e.g. peer reconnected
|
|
569
810
|
// while old TCP hadn't closed yet), close it AFTER overwriting the route so
|
|
570
811
|
// the stale-close guard in onPeerDisconnected correctly skips cleanup.
|
|
@@ -580,9 +821,11 @@ export class PeerManager extends EventEmitter<PeerManagerEvents> {
|
|
|
580
821
|
oldConn.close(1000, "replaced by new connection");
|
|
581
822
|
}
|
|
582
823
|
|
|
824
|
+
this.joinedPeers.add(nodeId);
|
|
825
|
+
|
|
583
826
|
conn.on("message", (frame) => this.onFrame(frame, conn));
|
|
584
|
-
conn.on("latency", (
|
|
585
|
-
conn.on("close", () => this.
|
|
827
|
+
conn.on("latency", () => this.router.updateActiveChannel(nodeId));
|
|
828
|
+
conn.on("close", () => this.onChannelDisconnected(conn));
|
|
586
829
|
|
|
587
830
|
this.sendPeerSync(conn);
|
|
588
831
|
|
|
@@ -615,6 +858,25 @@ export class PeerManager extends EventEmitter<PeerManagerEvents> {
|
|
|
615
858
|
this.emit("peerConnected", nodeId);
|
|
616
859
|
}
|
|
617
860
|
|
|
861
|
+
/** Handle a single channel disconnecting (multi-channel aware). */
|
|
862
|
+
private onChannelDisconnected(conn: Connection) {
|
|
863
|
+
const nodeId = conn.remoteNodeId;
|
|
864
|
+
if (!nodeId) return;
|
|
865
|
+
|
|
866
|
+
// Remove this channel from the pool
|
|
867
|
+
const hasRemaining = this.router.removeChannel(nodeId, conn);
|
|
868
|
+
if (hasRemaining) {
|
|
869
|
+
// Other channels still alive — just log, no peer_leave
|
|
870
|
+
const channelCount = this.router.getChannelCount(nodeId);
|
|
871
|
+
debug("peer", `onChannelDisconnected(${nodeId}): channel lost, ${channelCount} remaining`);
|
|
872
|
+
audit("channel_remove", { nodeId, detail: `channels=${channelCount}` });
|
|
873
|
+
return;
|
|
874
|
+
}
|
|
875
|
+
|
|
876
|
+
// Last channel gone — proceed with peer disconnect logic
|
|
877
|
+
this.onPeerDisconnected(conn);
|
|
878
|
+
}
|
|
879
|
+
|
|
618
880
|
private onPeerDisconnected(conn: Connection) {
|
|
619
881
|
const nodeId = conn.remoteNodeId;
|
|
620
882
|
if (!nodeId) return;
|
|
@@ -632,6 +894,7 @@ export class PeerManager extends EventEmitter<PeerManagerEvents> {
|
|
|
632
894
|
// Same-nodeId 本地客户端断开:仅清理路由,不广播 peer_leave
|
|
633
895
|
if (nodeId === this.config.nodeId) {
|
|
634
896
|
this.router.removePeer(nodeId);
|
|
897
|
+
this.joinedPeers.delete(nodeId);
|
|
635
898
|
return;
|
|
636
899
|
}
|
|
637
900
|
|
|
@@ -680,6 +943,7 @@ export class PeerManager extends EventEmitter<PeerManagerEvents> {
|
|
|
680
943
|
|
|
681
944
|
audit("peer_leave", { nodeId });
|
|
682
945
|
this.router.removePeer(nodeId);
|
|
946
|
+
this.joinedPeers.delete(nodeId);
|
|
683
947
|
|
|
684
948
|
// Remove satellite contexts that were only reachable via this peer
|
|
685
949
|
for (let i = this.satelliteContexts.length - 1; i >= 0; i--) {
|
package/src/router.ts
CHANGED
|
@@ -30,7 +30,9 @@ export class Router {
|
|
|
30
30
|
private localToolProxy?: ToolProxyInfo;
|
|
31
31
|
private localAcpAgents?: AcpAgentInfo[];
|
|
32
32
|
private routes = new Map<string, RouteEntry>();
|
|
33
|
-
private connections = new Map<string, Connection>(); // nodeId → direct connection
|
|
33
|
+
private connections = new Map<string, Connection>(); // nodeId → active (best) direct connection
|
|
34
|
+
/** All live channels per nodeId (multi-channel support). */
|
|
35
|
+
private channels = new Map<string, Set<Connection>>();
|
|
34
36
|
/** Double-map dedup: current window + previous window. Rotated periodically. */
|
|
35
37
|
private seenCurrent = new Map<string, true>();
|
|
36
38
|
private seenPrevious = new Map<string, true>();
|
|
@@ -128,6 +130,7 @@ export class Router {
|
|
|
128
130
|
this.seenCurrent.clear();
|
|
129
131
|
this.seenPrevious.clear();
|
|
130
132
|
this.failedRequests.clear();
|
|
133
|
+
this.channels.clear();
|
|
131
134
|
}
|
|
132
135
|
|
|
133
136
|
// ── Route table management ─────────────────────────────────────
|
|
@@ -139,6 +142,10 @@ export class Router {
|
|
|
139
142
|
const old = this.routes.get(nodeId);
|
|
140
143
|
if (old) this.unindexEntry(old);
|
|
141
144
|
this.connections.set(nodeId, connection);
|
|
145
|
+
// Add to channel set
|
|
146
|
+
let channelSet = this.channels.get(nodeId);
|
|
147
|
+
if (!channelSet) { channelSet = new Set(); this.channels.set(nodeId, channelSet); }
|
|
148
|
+
channelSet.add(connection);
|
|
142
149
|
const entry: RouteEntry = {
|
|
143
150
|
nodeId,
|
|
144
151
|
agents: capabilities.agents,
|
|
@@ -157,6 +164,74 @@ export class Router {
|
|
|
157
164
|
this.indexEntry(entry);
|
|
158
165
|
}
|
|
159
166
|
|
|
167
|
+
/** Add an additional channel to an existing peer (multi-channel). */
|
|
168
|
+
addChannel(nodeId: string, connection: Connection) {
|
|
169
|
+
let channelSet = this.channels.get(nodeId);
|
|
170
|
+
if (!channelSet) { channelSet = new Set(); this.channels.set(nodeId, channelSet); }
|
|
171
|
+
channelSet.add(connection);
|
|
172
|
+
this.updateActiveChannel(nodeId);
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
/** Remove a single channel. Returns true if the peer still has live channels. */
|
|
176
|
+
removeChannel(nodeId: string, connection: Connection): boolean {
|
|
177
|
+
const channelSet = this.channels.get(nodeId);
|
|
178
|
+
if (channelSet) {
|
|
179
|
+
channelSet.delete(connection);
|
|
180
|
+
if (channelSet.size === 0) {
|
|
181
|
+
this.channels.delete(nodeId);
|
|
182
|
+
return false;
|
|
183
|
+
}
|
|
184
|
+
// Pick new active channel
|
|
185
|
+
this.updateActiveChannel(nodeId);
|
|
186
|
+
return true;
|
|
187
|
+
}
|
|
188
|
+
return false;
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
/** Re-evaluate the active (best) channel for a peer based on latency. */
|
|
192
|
+
updateActiveChannel(nodeId: string) {
|
|
193
|
+
const channelSet = this.channels.get(nodeId);
|
|
194
|
+
if (!channelSet || channelSet.size === 0) return;
|
|
195
|
+
|
|
196
|
+
let best: Connection | null = null;
|
|
197
|
+
let bestLatency = Infinity;
|
|
198
|
+
for (const conn of channelSet) {
|
|
199
|
+
if (!conn.isOpen) continue;
|
|
200
|
+
// Prefer lower latency; treat 0 (unmeasured) as high
|
|
201
|
+
const lat = conn.latencyMs > 0 ? conn.latencyMs : 10_000;
|
|
202
|
+
if (lat < bestLatency) {
|
|
203
|
+
bestLatency = lat;
|
|
204
|
+
best = conn;
|
|
205
|
+
}
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
if (best) {
|
|
209
|
+
this.connections.set(nodeId, best);
|
|
210
|
+
const route = this.routes.get(nodeId);
|
|
211
|
+
if (route) {
|
|
212
|
+
route.connection = best;
|
|
213
|
+
route.latencyMs = best.latencyMs;
|
|
214
|
+
}
|
|
215
|
+
}
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
/** Get the number of live channels for a peer. */
|
|
219
|
+
getChannelCount(nodeId: string): number {
|
|
220
|
+
const channelSet = this.channels.get(nodeId);
|
|
221
|
+
if (!channelSet) return 0;
|
|
222
|
+
let count = 0;
|
|
223
|
+
for (const conn of channelSet) {
|
|
224
|
+
if (conn.isOpen) count++;
|
|
225
|
+
}
|
|
226
|
+
return count;
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
/** Get all channels for a peer (for diagnostics/status). */
|
|
230
|
+
getChannels(nodeId: string): Connection[] {
|
|
231
|
+
const channelSet = this.channels.get(nodeId);
|
|
232
|
+
return channelSet ? [...channelSet] : [];
|
|
233
|
+
}
|
|
234
|
+
|
|
160
235
|
addRelayPeer(peer: PeerInfo, viaNodeId: string) {
|
|
161
236
|
// Don't add ourselves
|
|
162
237
|
if (peer.nodeId === this.nodeId) return;
|
|
@@ -192,6 +267,7 @@ export class Router {
|
|
|
192
267
|
|
|
193
268
|
removePeer(nodeId: string) {
|
|
194
269
|
this.connections.delete(nodeId);
|
|
270
|
+
this.channels.delete(nodeId);
|
|
195
271
|
const removed = this.routes.get(nodeId);
|
|
196
272
|
if (removed) {
|
|
197
273
|
this.unindexEntry(removed);
|
|
@@ -346,11 +422,27 @@ export class Router {
|
|
|
346
422
|
const route = this.routes.get(targetNodeId);
|
|
347
423
|
if (!route) return false;
|
|
348
424
|
|
|
425
|
+
// Try active connection first
|
|
349
426
|
if (route.connection?.isOpen) {
|
|
350
427
|
route.connection.send(frame);
|
|
351
428
|
return true;
|
|
352
429
|
}
|
|
353
430
|
|
|
431
|
+
// Fallback: try other channels (multi-channel failover)
|
|
432
|
+
const channelSet = this.channels.get(targetNodeId);
|
|
433
|
+
if (channelSet) {
|
|
434
|
+
for (const conn of channelSet) {
|
|
435
|
+
if (conn.isOpen) {
|
|
436
|
+
conn.send(frame);
|
|
437
|
+
// Promote to active
|
|
438
|
+
this.connections.set(targetNodeId, conn);
|
|
439
|
+
route.connection = conn;
|
|
440
|
+
route.latencyMs = conn.latencyMs;
|
|
441
|
+
return true;
|
|
442
|
+
}
|
|
443
|
+
}
|
|
444
|
+
}
|
|
445
|
+
|
|
354
446
|
// Relay through intermediate node
|
|
355
447
|
if (route.reachableVia) {
|
|
356
448
|
const relay = this.connections.get(route.reachableVia);
|
|
@@ -41,6 +41,7 @@ export function createClusterPeersTool(): AnyAgentTool {
|
|
|
41
41
|
const sentinelOnline = sentinelStatus === "direct" || sentinelStatus === "relay";
|
|
42
42
|
const effectiveStatus = status === "unreachable" && sentinelOnline ? "sentinel-only" : status;
|
|
43
43
|
|
|
44
|
+
const channelCount = runtime.peerManager.router.getChannelCount(entry.nodeId);
|
|
44
45
|
return {
|
|
45
46
|
nodeId: entry.nodeId,
|
|
46
47
|
agents: entry.agents.map((a) => ({
|
|
@@ -53,6 +54,7 @@ export function createClusterPeersTool(): AnyAgentTool {
|
|
|
53
54
|
tools: entry.toolProxy?.enabled ? (entry.toolProxy.allow ?? []) : [],
|
|
54
55
|
status: effectiveStatus,
|
|
55
56
|
latencyMs: entry.latencyMs,
|
|
57
|
+
...(channelCount > 1 ? { channels: channelCount } : {}),
|
|
56
58
|
...(hasSentinel ? {
|
|
57
59
|
sentinel: sentinelOnline ? "online" : "offline",
|
|
58
60
|
} : {}),
|