clawmatrix 0.2.2 → 0.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "clawmatrix",
3
- "version": "0.2.2",
3
+ "version": "0.2.4",
4
4
  "description": "Decentralized mesh cluster plugin for OpenClaw — inter-gateway communication, model proxy, task handoff, and tool proxy.",
5
5
  "type": "module",
6
6
  "license": "MIT",
package/src/cli.ts CHANGED
@@ -99,7 +99,7 @@ export const registerClusterCli = ({ program }: { program: Command }) => {
99
99
  models: Array<{ id: string }>;
100
100
  tags: string[];
101
101
  connected: boolean;
102
- status: "direct" | "relay" | "unreachable";
102
+ status: "direct" | "relay" | "unreachable" | "sentinel-only";
103
103
  latencyMs: number;
104
104
  reachableVia: string | null;
105
105
  }>;
@@ -121,10 +121,15 @@ export const registerClusterCli = ({ program }: { program: Command }) => {
121
121
 
122
122
  for (let i = 0; i < peers.length; i++) {
123
123
  const peer = peers[i];
124
- const dot = peer.status === "direct" ? green("●") : peer.status === "relay" ? yellow("●") : red("○");
124
+ const dot = peer.status === "direct" ? green("●")
125
+ : peer.status === "relay" ? yellow("●")
126
+ : peer.status === "sentinel-only" ? yellow("◐")
127
+ : red("○");
125
128
  const latency = peer.connected && peer.latencyMs > 0 ? dim(` ${peer.latencyMs}ms`) : "";
126
129
  const statusLabel = peer.status === "relay"
127
130
  ? yellow(` relay via ${peer.reachableVia}`)
131
+ : peer.status === "sentinel-only"
132
+ ? yellow(" sentinel only")
128
133
  : peer.status === "unreachable"
129
134
  ? red(" unreachable")
130
135
  : "";
@@ -505,12 +505,14 @@ export function createClusterService(
505
505
  config: ClawMatrixConfig,
506
506
  openclawConfig: OpenClawConfig,
507
507
  openclawVersion?: string,
508
+ onStarted?: () => void,
508
509
  ): OpenClawPluginService {
509
510
  return {
510
511
  id: "clawmatrix",
511
512
  start(ctx: OpenClawPluginServiceContext) {
512
513
  clusterRuntime = new ClusterRuntime(config, ctx.logger, openclawConfig, openclawVersion);
513
514
  clusterRuntime.start();
515
+ onStarted?.();
514
516
  },
515
517
  async stop() {
516
518
  if (clusterRuntime) {
package/src/identity.ts CHANGED
@@ -54,10 +54,12 @@ export function loadOrCreateIdentity(stateDir: string): KeyPair {
54
54
  if (fs.existsSync(filePath)) {
55
55
  const raw = fs.readFileSync(filePath, "utf-8");
56
56
  const data: IdentityData = JSON.parse(raw);
57
- return keyPairFromSerialized(data.publicKey, data.privateKey);
57
+ const keyPair = keyPairFromSerialized(data.publicKey, data.privateKey);
58
+ console.error(`[clawmatrix:identity] loaded existing identity from ${filePath} (publicKey=${data.publicKey.slice(0, 12)}...)`);
59
+ return keyPair;
58
60
  }
59
- } catch {
60
- // Corrupted file regenerate
61
+ } catch (err) {
62
+ console.error(`[clawmatrix:identity] failed to load identity from ${filePath}, regenerating: ${err}`);
61
63
  }
62
64
 
63
65
  // Generate new identity
@@ -73,6 +75,7 @@ export function loadOrCreateIdentity(stateDir: string): KeyPair {
73
75
  fs.mkdirSync(stateDir, { recursive: true });
74
76
  }
75
77
  fs.writeFileSync(filePath, JSON.stringify(data, null, 2), { mode: 0o600 });
78
+ console.error(`[clawmatrix:identity] generated NEW identity at ${filePath} (publicKey=${data.publicKey.slice(0, 12)}...)`);
76
79
 
77
80
  return keyPair;
78
81
  }
package/src/index.ts CHANGED
@@ -136,7 +136,12 @@ const plugin = {
136
136
  }
137
137
 
138
138
  // Background service: manages mesh connections, WS listener, heartbeat
139
- api.registerService(createClusterService(config, api.config, api.runtime.version));
139
+ // onStarted callback wires up approval after the runtime is available
140
+ let onServiceStarted: (() => void) | undefined;
141
+ const serviceStartedPromise = config.peerApproval.enabled
142
+ ? new Promise<void>((resolve) => { onServiceStarted = resolve; })
143
+ : undefined;
144
+ api.registerService(createClusterService(config, api.config, api.runtime.version, onServiceStarted));
140
145
 
141
146
  // Model providers: register per-node providers so models are accessed as nodeId/modelId
142
147
  const baseUrl = `http://127.0.0.1:${config.proxyPort}/v1`;
@@ -363,15 +368,52 @@ const plugin = {
363
368
  }
364
369
  }
365
370
  }
366
- // Auto-detect: scan OpenClaw channels config for enabled channels with groups
371
+ // Auto-detect: prefer DM targets (owner) over group targets
367
372
  if (targets.length === 0) {
368
373
  const channelsConfig = (api.config as Record<string, unknown>).channels as
369
- Record<string, { enabled?: boolean; groups?: Record<string, unknown> }> | undefined;
374
+ Record<string, { enabled?: boolean; allowFrom?: Array<string | number>; groups?: Record<string, unknown> }> | undefined;
375
+
376
+ // Parse commands.ownerAllowFrom for channel-prefixed owner IDs
377
+ // e.g. ["telegram:12345", "feishu:user_abc"] → { telegram: "12345", feishu: "user_abc" }
378
+ const commandsConfig = (api.config as Record<string, unknown>).commands as
379
+ { ownerAllowFrom?: Array<string | number> } | undefined;
380
+ const ownerByChannel = new Map<string, string>();
381
+ if (commandsConfig?.ownerAllowFrom) {
382
+ for (const entry of commandsConfig.ownerAllowFrom) {
383
+ const str = String(entry).trim();
384
+ const colonIdx = str.indexOf(":");
385
+ if (colonIdx > 0) {
386
+ const ch = str.slice(0, colonIdx).toLowerCase();
387
+ const id = str.slice(colonIdx + 1).trim();
388
+ if (id && !ownerByChannel.has(ch)) {
389
+ ownerByChannel.set(ch, id);
390
+ }
391
+ }
392
+ }
393
+ }
394
+
370
395
  if (channelsConfig) {
371
396
  for (const [channelId, chConf] of Object.entries(channelsConfig)) {
372
- if (!chConf || chConf.enabled === false) continue;
397
+ if (!chConf || typeof chConf !== "object" || chConf.enabled === false) continue;
398
+
399
+ // Priority 1: owner from commands.ownerAllowFrom → DM
400
+ const ownerDm = ownerByChannel.get(channelId);
401
+ if (ownerDm) {
402
+ targets.push({ channel: channelId, to: ownerDm });
403
+ continue;
404
+ }
405
+
406
+ // Priority 2: channel allowFrom (DM allowlist) → first entry as DM
407
+ if (Array.isArray(chConf.allowFrom) && chConf.allowFrom.length > 0) {
408
+ const firstUser = String(chConf.allowFrom[0]).trim();
409
+ if (firstUser && firstUser !== "*") {
410
+ targets.push({ channel: channelId, to: firstUser });
411
+ continue;
412
+ }
413
+ }
414
+
415
+ // Priority 3: groups (fallback)
373
416
  if (chConf.groups && typeof chConf.groups === "object") {
374
- // Use the first configured group as notification target
375
417
  const firstGroupId = Object.keys(chConf.groups)[0];
376
418
  if (firstGroupId) {
377
419
  targets.push({ channel: channelId, to: firstGroupId });
@@ -388,8 +430,11 @@ const plugin = {
388
430
  }
389
431
  };
390
432
 
391
- // Delay setup until service has started
392
- setTimeout(setupApproval, 1000);
433
+ // Run setupApproval once the cluster service has started
434
+ serviceStartedPromise!.then(() => {
435
+ // Small delay to ensure runtime is fully wired
436
+ setTimeout(setupApproval, 100);
437
+ });
393
438
  }
394
439
 
395
440
  // Gateway methods (queried by CLI via `openclaw gateway call`)
@@ -399,6 +444,8 @@ const plugin = {
399
444
  try {
400
445
  const runtime = getClusterRuntime();
401
446
  const peers = runtime.peerManager.router.getAllPeers();
447
+ const mergedPeers = mergeSentinelPeers(peers, runtime)
448
+ .filter((p) => (p as { nodeId: string }).nodeId !== config.nodeId);
402
449
  respond(true, {
403
450
  nodeId: config.nodeId,
404
451
  listen: config.listen ? config.listenPort : false,
@@ -406,7 +453,7 @@ const plugin = {
406
453
  agents: config.agents.map((a) => ({ id: a.id, description: a.description })),
407
454
  models: config.models.map((m) => ({ id: m.id })),
408
455
  tags: config.tags,
409
- peers: mergeSentinelPeers(peers, runtime),
456
+ peers: mergedPeers,
410
457
  });
411
458
  } catch {
412
459
  respond(false, { error: "ClawMatrix service not running" });
@@ -687,16 +734,18 @@ function mergeSentinelPeers(
687
734
  const status = runtime.peerManager.router.getPeerStatus(p);
688
735
  const sentinel = sentinelMap.get(p.nodeId);
689
736
  const sentinelStatus = sentinel ? runtime.peerManager.router.getPeerStatus(sentinel) : undefined;
737
+ const sentinelOnline = sentinelStatus === "direct" || sentinelStatus === "relay";
738
+ const effectiveStatus = status === "unreachable" && sentinelOnline ? "sentinel-only" : status;
690
739
  result.push({
691
740
  nodeId: p.nodeId,
692
741
  agents: p.agents,
693
742
  models: p.models,
694
743
  tags: p.tags,
695
744
  connected: status !== "unreachable",
696
- status,
745
+ status: effectiveStatus,
697
746
  reachableVia: p.reachableVia,
698
747
  latencyMs: p.latencyMs,
699
- ...(sentinel ? { sentinel: sentinelStatus === "direct" || sentinelStatus === "relay" ? "online" : "offline" } : {}),
748
+ ...(sentinel ? { sentinel: sentinelOnline ? "online" : "offline" } : {}),
700
749
  });
701
750
  }
702
751
 
@@ -704,15 +753,16 @@ function mergeSentinelPeers(
704
753
  for (const [mainId, sentinel] of sentinelMap) {
705
754
  if (seen.has(mainId)) continue;
706
755
  const sentinelStatus = runtime.peerManager.router.getPeerStatus(sentinel);
756
+ const sentinelOnline = sentinelStatus === "direct" || sentinelStatus === "relay";
707
757
  result.push({
708
758
  nodeId: mainId,
709
759
  agents: [],
710
760
  models: [],
711
761
  tags: [],
712
762
  connected: false,
713
- status: "unreachable",
763
+ status: sentinelOnline ? "sentinel-only" : "unreachable",
714
764
  latencyMs: sentinel.latencyMs,
715
- sentinel: sentinelStatus === "direct" || sentinelStatus === "relay" ? "online" : "offline",
765
+ sentinel: sentinelOnline ? "online" : "offline",
716
766
  });
717
767
  }
718
768
 
@@ -101,6 +101,8 @@ export class PeerApprovalManager extends EventEmitter<PeerApprovalEvents> {
101
101
  private loaded = false;
102
102
  /** IP → list of deny timestamps (for approval noise suppression). */
103
103
  private ipDenyHistory = new Map<string, number[]>();
104
+ /** Active sentinel polling timers (for cleanup on destroy). */
105
+ private sentinelTimers = new Set<ReturnType<typeof setInterval>>();
104
106
 
105
107
  constructor(config: PeerApprovalConfig, stateDir: string) {
106
108
  super();
@@ -190,6 +192,15 @@ export class PeerApprovalManager extends EventEmitter<PeerApprovalEvents> {
190
192
  // Always allow pre-approved nodeIds from config
191
193
  if (this.config.allowList.includes(nodeId)) return "allow";
192
194
 
195
+ // Auto-allow sentinel companions: if "X:sentinel" connects and "X" is approved, allow it
196
+ const sentinelSuffix = ":sentinel";
197
+ if (nodeId.endsWith(sentinelSuffix)) {
198
+ const baseNodeId = nodeId.slice(0, -sentinelSuffix.length);
199
+ if (this.data.approved[baseNodeId] || this.config.allowList.includes(baseNodeId)) {
200
+ return "allow";
201
+ }
202
+ }
203
+
193
204
  // Already approved (persisted)
194
205
  const approved = this.data.approved[nodeId];
195
206
  if (approved) {
@@ -236,6 +247,16 @@ export class PeerApprovalManager extends EventEmitter<PeerApprovalEvents> {
236
247
  publicKey?: string,
237
248
  ip?: string,
238
249
  ): Promise<"approve" | "deny" | "timeout"> {
250
+ // Sentinel companion: piggyback on the base nodeId's approval instead of
251
+ // creating a separate request. If the base node's approval is already pending,
252
+ // wait for that decision. If the sentinel arrives first, wait for the base
253
+ // node's approval to appear (up to the configured timeout).
254
+ const sentinelSuffix = ":sentinel";
255
+ if (nodeId.endsWith(sentinelSuffix)) {
256
+ const baseNodeId = nodeId.slice(0, -sentinelSuffix.length);
257
+ return this.waitForBaseApproval(baseNodeId, nodeId, capabilities, publicKey);
258
+ }
259
+
239
260
  const approvalId = crypto.randomUUID();
240
261
  this.log(`requestApproval: nodeId=${nodeId} mode=${this.config.mode} approvalId=${approvalId}`);
241
262
 
@@ -481,6 +502,79 @@ export class PeerApprovalManager extends EventEmitter<PeerApprovalEvents> {
481
502
  });
482
503
  }
483
504
 
505
+ /**
506
+ * Wait for the base nodeId's approval decision (for sentinel companions).
507
+ * If the base already has a pending approval, piggyback on it.
508
+ * If not yet pending, poll briefly until it appears or timeout.
509
+ */
510
+ private waitForBaseApproval(
511
+ baseNodeId: string,
512
+ sentinelNodeId: string,
513
+ capabilities: NodeCapabilities,
514
+ publicKey?: string,
515
+ ): Promise<"approve" | "deny" | "timeout"> {
516
+ const autoApprove = (decision: "approve" | "deny" | "timeout") => {
517
+ if (decision === "approve") {
518
+ this.addApproved(sentinelNodeId, capabilities.deviceInfo, publicKey, {
519
+ source: `auto:sentinel-of:${baseNodeId}`, at: Date.now(),
520
+ });
521
+ }
522
+ return decision;
523
+ };
524
+
525
+ // Try to find an existing pending approval for the base node
526
+ const tryPiggyback = (): Promise<"approve" | "deny" | "timeout"> | null => {
527
+ for (const pending of this.pending.values()) {
528
+ if (pending.nodeId === baseNodeId) {
529
+ this.log(`sentinel ${sentinelNodeId} piggybacking on pending approval for ${baseNodeId}`);
530
+ return new Promise<"approve" | "deny" | "timeout">((resolve) => {
531
+ const origResolve = pending.resolve;
532
+ pending.resolve = (decision) => {
533
+ origResolve(decision);
534
+ resolve(autoApprove(decision));
535
+ };
536
+ });
537
+ }
538
+ }
539
+ return null;
540
+ };
541
+
542
+ const existing = tryPiggyback();
543
+ if (existing) return existing;
544
+
545
+ // Base node's approval hasn't been created yet (sentinel arrived first).
546
+ // Poll briefly — the base node should connect within a few seconds.
547
+ this.log(`sentinel ${sentinelNodeId} waiting for base node ${baseNodeId} approval to appear`);
548
+ return new Promise<"approve" | "deny" | "timeout">((resolve) => {
549
+ let attempts = 0;
550
+ const maxAttempts = 30; // 30 × 1s = 30s max wait
551
+ const cleanup = () => { clearInterval(timer); this.sentinelTimers.delete(timer); };
552
+ const timer = setInterval(() => {
553
+ attempts++;
554
+
555
+ // Check if base got approved while we were waiting
556
+ if (this.data.approved[baseNodeId]) {
557
+ cleanup();
558
+ resolve(autoApprove("approve"));
559
+ return;
560
+ }
561
+
562
+ const result = tryPiggyback();
563
+ if (result) {
564
+ cleanup();
565
+ result.then(resolve);
566
+ return;
567
+ }
568
+
569
+ if (attempts >= maxAttempts) {
570
+ cleanup();
571
+ resolve("timeout");
572
+ }
573
+ }, 1_000);
574
+ this.sentinelTimers.add(timer);
575
+ });
576
+ }
577
+
484
578
  private addApproved(nodeId: string, deviceInfo?: DeviceInfo, publicKey?: string, resolvedBy?: ApprovalResolvedBy) {
485
579
  this.data.approved[nodeId] = {
486
580
  nodeId,
@@ -518,8 +612,12 @@ export class PeerApprovalManager extends EventEmitter<PeerApprovalEvents> {
518
612
  mode: "notify" | "required",
519
613
  ip?: string,
520
614
  ) {
521
- if (!this.channelApi || this.notifyTargets.length === 0) {
522
- this.log(`sendNotifications: skipped (channelApi=${!!this.channelApi} targets=${this.notifyTargets.length})`);
615
+ if (!this.channelApi && !this.gatewaySend) {
616
+ this.log(`sendNotifications: skipped (no channelApi or gatewaySend)`);
617
+ return;
618
+ }
619
+ if (this.notifyTargets.length === 0) {
620
+ this.log(`sendNotifications: skipped (no targets)`);
523
621
  return;
524
622
  }
525
623
  this.log(`sendNotifications: sending to ${this.notifyTargets.length} targets`);
@@ -553,31 +651,40 @@ export class PeerApprovalManager extends EventEmitter<PeerApprovalEvents> {
553
651
 
554
652
  for (const target of this.notifyTargets) {
555
653
  try {
556
- const channelObj = this.channelApi[target.channel];
557
-
558
- // Convention: sendMessage{Channel} e.g. sendMessageTelegram
559
- const capitalize = (s: string) => s.charAt(0).toUpperCase() + s.slice(1);
560
- const methodName = `sendMessage${capitalize(target.channel)}`;
561
- const sendFn = channelObj?.[methodName];
562
-
563
- if (typeof sendFn === "function") {
564
- // Direct channel API (built-in channels like telegram)
565
- const opts: Record<string, unknown> = {
566
- accountId: target.accountId,
567
- messageThreadId: target.threadId,
568
- };
569
- if (mode === "required") {
570
- opts.buttons = [
571
- [
572
- { text: "\u2705 Approve", callback_data: approveCmd },
573
- { text: "\u274c Deny", callback_data: denyCmd },
574
- ],
575
- ];
654
+ let sent = false;
655
+
656
+ // Try direct channel API first (built-in channels like telegram)
657
+ if (this.channelApi) {
658
+ const channelObj = this.channelApi[target.channel];
659
+ const capitalize = (s: string) => s.charAt(0).toUpperCase() + s.slice(1);
660
+ const methodName = `sendMessage${capitalize(target.channel)}`;
661
+ const sendFn = channelObj?.[methodName];
662
+
663
+ if (typeof sendFn === "function") {
664
+ try {
665
+ const opts: Record<string, unknown> = {
666
+ accountId: target.accountId,
667
+ messageThreadId: target.threadId,
668
+ };
669
+ if (mode === "required") {
670
+ opts.buttons = [
671
+ [
672
+ { text: "\u2705 Approve", callback_data: approveCmd },
673
+ { text: "\u274c Deny", callback_data: denyCmd },
674
+ ],
675
+ ];
676
+ }
677
+ await sendFn(target.to, message, opts);
678
+ this.log(`sendNotifications: sent to ${target.channel}/${target.to} via channelApi`);
679
+ sent = true;
680
+ } catch (apiErr) {
681
+ this.log(`sendNotifications: channelApi failed for ${target.channel}/${target.to}: ${apiErr}, trying gatewaySend`);
682
+ }
576
683
  }
577
- await sendFn(target.to, message, opts);
578
- this.log(`sendNotifications: sent to ${target.channel}/${target.to} via channelApi`);
579
- } else if (this.gatewaySend) {
580
- // Fallback: gateway send method (works for all channels including plugins like feishu)
684
+ }
685
+
686
+ // Fallback: gateway send (works for all channels, more reliable)
687
+ if (!sent && this.gatewaySend) {
581
688
  await this.gatewaySend({
582
689
  to: target.to,
583
690
  message: mode === "required"
@@ -588,8 +695,11 @@ export class PeerApprovalManager extends EventEmitter<PeerApprovalEvents> {
588
695
  threadId: target.threadId != null ? String(target.threadId) : undefined,
589
696
  });
590
697
  this.log(`sendNotifications: sent to ${target.channel}/${target.to} via gatewaySend`);
591
- } else {
592
- this.log(`sendNotifications: no channelApi or gatewaySend for "${target.channel}"`);
698
+ sent = true;
699
+ }
700
+
701
+ if (!sent) {
702
+ this.log(`sendNotifications: no send method available for "${target.channel}"`);
593
703
  }
594
704
  } catch (err) {
595
705
  this.log(`sendNotifications: failed for ${target.channel}/${target.to}: ${err}`);
@@ -619,6 +729,11 @@ export class PeerApprovalManager extends EventEmitter<PeerApprovalEvents> {
619
729
  }
620
730
 
621
731
  destroy() {
732
+ // Clean up sentinel polling timers
733
+ for (const timer of this.sentinelTimers) {
734
+ clearInterval(timer);
735
+ }
736
+ this.sentinelTimers.clear();
622
737
  // Reject all pending approvals
623
738
  for (const pending of this.pending.values()) {
624
739
  pending.resolve("deny");
@@ -59,6 +59,8 @@ export class PeerManager extends EventEmitter<PeerManagerEvents> {
59
59
  /** Map from ws WebSocket to remote IP (for audit logging on close). */
60
60
  private inboundIps = new Map<WsWebSocket, string>();
61
61
  private gossipDebounceTimer: ReturnType<typeof setTimeout> | null = null;
62
+ /** Latest connection per nodeId awaiting peer approval (updated on reconnect). */
63
+ private pendingApprovalConns = new Map<string, { conn: Connection; caps: NodeCapabilities }>();
62
64
  /** Persistent X25519 identity key pair (TOFU). See identity.ts for security model. */
63
65
  private identityKeyPair: KeyPair;
64
66
  private e2eeOptions: ConnectionE2eeOptions;
@@ -280,12 +282,26 @@ export class PeerManager extends EventEmitter<PeerManagerEvents> {
280
282
 
281
283
  // ── Outbound connections (standard WebSocket) ──────────────────
282
284
  private connectToPeer(peer: PeerConfig) {
283
- if (this.stopped) return;
285
+ if (this.stopped) {
286
+ debug("peer", `connectToPeer(${peer.nodeId}): skipped (stopped)`);
287
+ return;
288
+ }
289
+
290
+ const attempt = this.reconnectAttempts.get(peer.nodeId) ?? 0;
291
+ debug("peer", `connectToPeer(${peer.nodeId}): attempt=${attempt} url=${peer.url}`);
284
292
 
285
293
  // Use a common WS subprotocol for traffic disguise
286
- const ws = new WebSocket(peer.url, ["graphql-transport-ws"]);
294
+ let ws: WebSocket;
295
+ try {
296
+ ws = new WebSocket(peer.url, ["graphql-transport-ws"]);
297
+ } catch (err) {
298
+ debug("peer", `connectToPeer(${peer.nodeId}): WebSocket constructor threw: ${err}`);
299
+ this.scheduleReconnect(peer);
300
+ return;
301
+ }
287
302
 
288
303
  ws.addEventListener("open", () => {
304
+ debug("peer", `connectToPeer(${peer.nodeId}): ws open`);
289
305
  const conn = new Connection(
290
306
  ws,
291
307
  "outbound",
@@ -297,6 +313,7 @@ export class PeerManager extends EventEmitter<PeerManagerEvents> {
297
313
  conn.bindWebSocket(ws);
298
314
 
299
315
  conn.on("authenticated", (caps) => {
316
+ debug("peer", `connectToPeer(${peer.nodeId}): authenticated`);
300
317
  this.reconnectAttempts.delete(peer.nodeId);
301
318
  this.onPeerAuthenticated(conn, caps);
302
319
  });
@@ -314,17 +331,27 @@ export class PeerManager extends EventEmitter<PeerManagerEvents> {
314
331
  }
315
332
  };
316
333
 
317
- ws.addEventListener("error", tryReconnect);
318
- ws.addEventListener("close", tryReconnect);
334
+ ws.addEventListener("error", (ev) => {
335
+ debug("peer", `connectToPeer(${peer.nodeId}): ws error: ${(ev as ErrorEvent).message ?? "unknown"}`);
336
+ tryReconnect();
337
+ });
338
+ ws.addEventListener("close", (ev) => {
339
+ debug("peer", `connectToPeer(${peer.nodeId}): ws close code=${ev.code} reason=${ev.reason}`);
340
+ tryReconnect();
341
+ });
319
342
  }
320
343
 
321
344
  private scheduleReconnect(peer: PeerConfig) {
322
- if (this.stopped) return;
345
+ if (this.stopped) {
346
+ debug("peer", `scheduleReconnect(${peer.nodeId}): skipped (stopped)`);
347
+ return;
348
+ }
323
349
  if (this.reconnectTimers.has(peer.nodeId)) return;
324
350
 
325
351
  const attempt = this.reconnectAttempts.get(peer.nodeId) ?? 0;
326
352
  const delay = Math.min(RECONNECT_BASE * 2 ** attempt, RECONNECT_MAX);
327
353
  this.reconnectAttempts.set(peer.nodeId, attempt + 1);
354
+ debug("peer", `scheduleReconnect(${peer.nodeId}): attempt=${attempt} delay=${delay}ms`);
328
355
 
329
356
  const timer = setTimeout(() => {
330
357
  this.reconnectTimers.delete(peer.nodeId);
@@ -360,8 +387,23 @@ export class PeerManager extends EventEmitter<PeerManagerEvents> {
360
387
  return;
361
388
  }
362
389
  if (check === "pending") {
390
+ // If already waiting for approval for this nodeId (remote side
391
+ // reconnected after auth timeout), just update the connection ref
392
+ // so the existing .then() handler acts on the latest connection.
393
+ // Do NOT call requestApproval again — that would register duplicate
394
+ // .then() handlers that all call completePeerJoin.
395
+ if (this.pendingApprovalConns.has(nodeId)) {
396
+ debug("approval", `reusing pending approval for ${nodeId}, updating conn ref`);
397
+ this.pendingApprovalConns.set(nodeId, { conn, caps });
398
+ if (this.config.peerApproval?.mode === "required") {
399
+ conn.on("close", () => this.onPeerDisconnected(conn));
400
+ }
401
+ return;
402
+ }
403
+
363
404
  // In notify mode, requestApproval auto-approves and sends notification.
364
405
  // In required mode, it waits for explicit approval.
406
+ this.pendingApprovalConns.set(nodeId, { conn, caps });
365
407
  this.approvalManager.requestApproval(
366
408
  nodeId,
367
409
  caps,
@@ -372,12 +414,16 @@ export class PeerManager extends EventEmitter<PeerManagerEvents> {
372
414
  peerPublicKey,
373
415
  ip,
374
416
  ).then((decision) => {
375
- if (decision === "approve" && conn.isOpen) {
376
- conn.completeAuth();
377
- this.completePeerJoin(conn, caps);
378
- } else if (conn.isOpen) {
417
+ const latest = this.pendingApprovalConns.get(nodeId);
418
+ this.pendingApprovalConns.delete(nodeId);
419
+ const activeConn = latest?.conn ?? conn;
420
+ const activeCaps = latest?.caps ?? caps;
421
+ if (decision === "approve" && activeConn.isOpen) {
422
+ activeConn.completeAuth();
423
+ this.completePeerJoin(activeConn, activeCaps);
424
+ } else if (activeConn.isOpen) {
379
425
  if (ip) this.approvalManager.recordIpDeny(ip);
380
- conn.close(
426
+ activeConn.close(
381
427
  decision === "timeout" ? 4004 : 4005,
382
428
  decision === "timeout" ? "approval timeout" : "approval denied",
383
429
  );
@@ -14,7 +14,9 @@ export function createClusterPeersTool(): AnyAgentTool {
14
14
  async execute() {
15
15
  try {
16
16
  const runtime = getClusterRuntime();
17
- const allEntries = runtime.peerManager.router.getAllPeers();
17
+ const localNodeId = runtime.config.nodeId;
18
+ const allEntries = runtime.peerManager.router.getAllPeers()
19
+ .filter((e) => e.nodeId !== localNodeId && e.nodeId !== `${localNodeId}:sentinel`);
18
20
 
19
21
  // Separate sentinel peers from normal peers
20
22
  const sentinelSet = new Set<string>();
@@ -34,6 +36,8 @@ export function createClusterPeersTool(): AnyAgentTool {
34
36
  const sentinelStatus = sentinelEntry
35
37
  ? runtime.peerManager.router.getPeerStatus(sentinelEntry)
36
38
  : undefined;
39
+ const sentinelOnline = sentinelStatus === "direct" || sentinelStatus === "relay";
40
+ const effectiveStatus = status === "unreachable" && sentinelOnline ? "sentinel-only" : status;
37
41
 
38
42
  return {
39
43
  nodeId: entry.nodeId,
@@ -45,11 +49,10 @@ export function createClusterPeersTool(): AnyAgentTool {
45
49
  models: entry.models.map((m) => m.id),
46
50
  tags: entry.tags,
47
51
  tools: entry.toolProxy?.enabled ? (entry.toolProxy.allow ?? []) : [],
48
- status,
52
+ status: effectiveStatus,
49
53
  latencyMs: entry.latencyMs,
50
- // Sentinel info merged into the same row
51
54
  ...(hasSentinel ? {
52
- sentinel: sentinelStatus === "direct" || sentinelStatus === "relay" ? "online" : "offline",
55
+ sentinel: sentinelOnline ? "online" : "offline",
53
56
  } : {}),
54
57
  };
55
58
  });
@@ -61,15 +64,16 @@ export function createClusterPeersTool(): AnyAgentTool {
61
64
  if (peers.some((p) => p.nodeId === mainNodeId)) continue;
62
65
  // Main node is gone, only sentinel remains
63
66
  const sentinelStatus = runtime.peerManager.router.getPeerStatus(entry);
67
+ const sentinelOnline = sentinelStatus === "direct" || sentinelStatus === "relay";
64
68
  peers.push({
65
69
  nodeId: mainNodeId,
66
70
  agents: [],
67
71
  models: [],
68
72
  tags: entry.tags.filter((t) => t !== "sentinel"),
69
73
  tools: [],
70
- status: "unreachable",
74
+ status: sentinelOnline ? "sentinel-only" : "unreachable",
71
75
  latencyMs: entry.latencyMs,
72
- sentinel: sentinelStatus === "direct" || sentinelStatus === "relay" ? "online" : "offline",
76
+ sentinel: sentinelOnline ? "online" : "offline",
73
77
  } as (typeof peers)[number]);
74
78
  }
75
79