clawmatrix 0.2.11 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/index.ts CHANGED
@@ -272,7 +272,7 @@ const plugin = {
272
272
 
273
273
  const repatchTimer = setInterval(patchAllConfigs, 10_000);
274
274
  repatchTimer.unref?.();
275
- api.on("dispose", () => clearInterval(repatchTimer));
275
+ api.on("gateway_stop", () => clearInterval(repatchTimer));
276
276
 
277
277
  for (const [nodeId, models] of Object.entries(modelsByNode)) {
278
278
  api.registerProvider({
@@ -555,6 +555,180 @@ const plugin = {
555
555
  },
556
556
  );
557
557
 
558
+ // ── Tool proxy CLI gateway methods ──────────────────────────────
559
+
560
+ api.registerGatewayMethod(
561
+ "clawmatrix.tools.list",
562
+ ({ params, respond }: GatewayRequestHandlerOptions) => {
563
+ try {
564
+ const runtime = getClusterRuntime();
565
+ const { node } = (params ?? {}) as { node?: string };
566
+ const allPeers = runtime.peerManager.router.getAllPeers();
567
+ const peers = mergeSentinelPeers(allPeers, runtime)
568
+ .filter((p) => (p as { nodeId: string }).nodeId !== config.nodeId);
569
+
570
+ type PeerToolInfo = { nodeId: string; status: string; toolProxy?: import("./types.ts").ToolProxyInfo };
571
+ const result: PeerToolInfo[] = [];
572
+
573
+ for (const peer of peers) {
574
+ const p = peer as { nodeId: string; connected: boolean; status: string; toolProxy?: import("./types.ts").ToolProxyInfo };
575
+ if (node && p.nodeId !== node) continue;
576
+ if (!p.toolProxy?.enabled) continue;
577
+ result.push({
578
+ nodeId: p.nodeId,
579
+ status: p.status,
580
+ toolProxy: p.toolProxy,
581
+ });
582
+ }
583
+
584
+ respond(true, result);
585
+ } catch {
586
+ respond(false, { error: "ClawMatrix service not running" });
587
+ }
588
+ },
589
+ );
590
+
591
+ api.registerGatewayMethod(
592
+ "clawmatrix.tools.call",
593
+ async ({ params, respond }: GatewayRequestHandlerOptions) => {
594
+ try {
595
+ const runtime = getClusterRuntime();
596
+ const { node, tool, params: toolParams, timeout } = (params ?? {}) as {
597
+ node?: string; tool?: string; params?: Record<string, unknown>; timeout?: number;
598
+ };
599
+
600
+ if (!node || !tool) {
601
+ respond(false, { error: "Missing required params: node, tool" });
602
+ return;
603
+ }
604
+
605
+ const result = await runtime.toolProxy.invoke(node, tool, toolParams ?? {}, timeout);
606
+ respond(true, result);
607
+ } catch (err) {
608
+ respond(false, { error: err instanceof Error ? err.message : String(err) });
609
+ }
610
+ },
611
+ );
612
+
613
+ api.registerGatewayMethod(
614
+ "clawmatrix.tools.batch",
615
+ async ({ params, respond }: GatewayRequestHandlerOptions) => {
616
+ try {
617
+ const runtime = getClusterRuntime();
618
+ const { node, items, stopOnError, timeout } = (params ?? {}) as {
619
+ node?: string;
620
+ items?: Array<{ tool: string; params?: Record<string, unknown> }>;
621
+ stopOnError?: boolean;
622
+ timeout?: number;
623
+ };
624
+
625
+ if (!node || !items || items.length === 0) {
626
+ respond(false, { error: "Missing required params: node, items" });
627
+ return;
628
+ }
629
+
630
+ const batchItems = items.map((item) => ({
631
+ tool: item.tool,
632
+ params: item.params ?? {},
633
+ }));
634
+
635
+ const results = await runtime.toolProxy.invokeBatch(node, batchItems, { stopOnError, timeout });
636
+ respond(true, results);
637
+ } catch (err) {
638
+ respond(false, { error: err instanceof Error ? err.message : String(err) });
639
+ }
640
+ },
641
+ );
642
+
643
+ // ── Models CLI gateway method ──────────────────────────────────
644
+
645
+ api.registerGatewayMethod(
646
+ "clawmatrix.models.list",
647
+ ({ params, respond }: GatewayRequestHandlerOptions) => {
648
+ try {
649
+ const runtime = getClusterRuntime();
650
+ const { node } = (params ?? {}) as { node?: string };
651
+ const allProxyModels = runtime.modelProxy.allProxyModels;
652
+
653
+ const reachable = new Set(
654
+ runtime.peerManager.router.getAllPeers()
655
+ .filter((p) => p.connection?.isOpen || p.reachableVia)
656
+ .map((p) => p.nodeId),
657
+ );
658
+
659
+ const models = allProxyModels
660
+ .filter((m) => !node || m.nodeId === node)
661
+ .map((m) => ({
662
+ id: m.id,
663
+ nodeId: m.nodeId,
664
+ provider: m.provider ?? m.nodeId,
665
+ ...(m.description && { description: m.description }),
666
+ ...(m.contextWindow && { contextWindow: m.contextWindow }),
667
+ ...(m.maxTokens && { maxTokens: m.maxTokens }),
668
+ ...(m.reasoning !== undefined && { reasoning: m.reasoning }),
669
+ ...(m.input && { input: m.input }),
670
+ ...(m.api && { api: m.api }),
671
+ ...(m.cost && { cost: m.cost }),
672
+ reachable: reachable.has(m.nodeId),
673
+ }));
674
+
675
+ respond(true, models);
676
+ } catch {
677
+ respond(false, { error: "ClawMatrix service not running" });
678
+ }
679
+ },
680
+ );
681
+
682
+ // ── Events CLI gateway methods ──────────────────────────────────
683
+
684
+ api.registerGatewayMethod(
685
+ "clawmatrix.events.query",
686
+ ({ params, respond }: GatewayRequestHandlerOptions) => {
687
+ try {
688
+ const runtime = getClusterRuntime();
689
+ if (!runtime.webHandler) {
690
+ respond(false, { error: "Events not enabled (web.enabled = false)" });
691
+ return;
692
+ }
693
+ const { type, source, since, unconsumed, limit } = (params ?? {}) as {
694
+ type?: string; source?: string; since?: number; unconsumed?: boolean; limit?: number;
695
+ };
696
+ const events = runtime.webHandler.queryEvents({
697
+ type,
698
+ source,
699
+ since,
700
+ unconsumed: unconsumed ?? true,
701
+ limit: limit ?? 20,
702
+ });
703
+ respond(true, events);
704
+ } catch {
705
+ respond(false, { error: "ClawMatrix service not running" });
706
+ }
707
+ },
708
+ );
709
+
710
+ api.registerGatewayMethod(
711
+ "clawmatrix.events.consume",
712
+ ({ params, respond }: GatewayRequestHandlerOptions) => {
713
+ try {
714
+ const runtime = getClusterRuntime();
715
+ if (!runtime.webHandler) {
716
+ respond(false, { error: "Events not enabled (web.enabled = false)" });
717
+ return;
718
+ }
719
+ const { ids } = (params ?? {}) as { ids?: string[] };
720
+ if (!ids || ids.length === 0) {
721
+ respond(false, { error: "Missing required param: ids (array of event IDs)" });
722
+ return;
723
+ }
724
+ const consumed = runtime.webHandler.consumeEvents(ids);
725
+ respond(true, { consumed, ids });
726
+ } catch {
727
+ respond(false, { error: "ClawMatrix service not running" });
728
+ }
729
+ },
730
+ );
731
+
558
732
  // Log model selection on each LLM call (fire-and-forget)
559
733
  api.on("llm_input", (event) => {
560
734
  api.logger.debug(`[clawmatrix] llm_input: provider=${event.provider} model=${event.model}`);
@@ -563,6 +737,10 @@ const plugin = {
563
737
  // CLI subcommand
564
738
  api.registerCli(registerClusterCli, { commands: ["clawmatrix"] });
565
739
 
740
+ // Auto-install global `clawmatrix` shim next to the `openclaw` binary.
741
+ // Runs once on plugin load; non-blocking, best-effort.
742
+ installGlobalCliShim(api.logger);
743
+
566
744
  // Plugin command: /clawmatrix approve|deny|revoke
567
745
  // Handles Telegram callback buttons and other chat surfaces.
568
746
  // Plugin commands are processed before the agent, so they bypass the LLM.
@@ -651,11 +829,9 @@ const plugin = {
651
829
  lines.push("[ClawMatrix] No peers online. Use cluster_peers to check cluster status.");
652
830
  } else {
653
831
  lines.push(
654
- `[ClawMatrix Cluster] YOU ARE node="${config.nodeId}"${config.tags.length ? ` tags=${config.tags.join(",")}` : ""}. This is YOUR identity — never target yourself with cluster tools.`,
832
+ `[ClawMatrix Cluster] YOU ARE node="${config.nodeId}"${config.tags.length ? ` tags=${config.tags.join(",")}` : ""}. ${peerCount} peer(s) online.`,
655
833
  ...(config.agents.length > 0 ? [`Role: ${config.agents[0]!.description}`] : []),
656
- `${peerCount} remote peer(s) online. Use cluster_peers to see topology, agents, and models.`,
657
- "Prefer cluster_tool for device-specific tools (screenshot, battery, etc.); cluster_exec/read/write for file/shell ops; cluster_handoff for complex multi-step tasks.",
658
- "IMPORTANT: Always tell user which remote node you're targeting before calling cluster tools.",
834
+ "Use cluster_peers to see topology. Always tell user which remote node you're targeting before calling cluster tools.",
659
835
  );
660
836
  }
661
837
  cachedSystemContext = lines.join("\n");
@@ -753,6 +929,9 @@ function mergeSentinelPeers(
753
929
  status: effectiveStatus,
754
930
  reachableVia: p.reachableVia,
755
931
  latencyMs: p.latencyMs,
932
+ toolProxy: p.toolProxy,
933
+ acpAgents: p.acpAgents,
934
+ deviceInfo: p.deviceInfo,
756
935
  ...(sentinel ? { sentinel: sentinelOnline ? "online" : "offline" } : {}),
757
936
  });
758
937
  }
@@ -777,4 +956,54 @@ function mergeSentinelPeers(
777
956
  return result;
778
957
  }
779
958
 
959
+ /** Auto-install a global `clawmatrix` CLI shim next to the `openclaw` binary. */
960
+ function installGlobalCliShim(logger: { info: (msg: string) => void; warn: (msg: string) => void }) {
961
+ try {
962
+ const fs = require("node:fs") as typeof import("node:fs");
963
+ const path = require("node:path") as typeof import("node:path");
964
+
965
+ // Find the real directory where `openclaw` lives (resolve symlinks)
966
+ const openclawBin = process.argv[0]; // node process running openclaw
967
+ // Walk up to find the openclaw binary: it's in the same bin dir as node,
968
+ // or we can resolve it from process.env.PATH
969
+ let binDir: string | null = null;
970
+ const envPath = process.env.PATH ?? "";
971
+ for (const dir of envPath.split(path.delimiter)) {
972
+ const candidate = path.join(dir, "openclaw");
973
+ try {
974
+ fs.accessSync(candidate, fs.constants.X_OK);
975
+ // Resolve symlinks to get the real bin directory
976
+ const realPath = fs.realpathSync(candidate);
977
+ binDir = path.dirname(realPath);
978
+ break;
979
+ } catch {
980
+ // Not in this dir
981
+ }
982
+ }
983
+ if (!binDir) return;
984
+
985
+ const shimPath = path.join(binDir, "clawmatrix");
986
+
987
+ // Skip if shim already exists and is our shim (check marker comment)
988
+ try {
989
+ const existing = fs.readFileSync(shimPath, "utf-8");
990
+ if (existing.includes("clawmatrix-shim")) return; // already installed
991
+ } catch {
992
+ // File doesn't exist, proceed to create
993
+ }
994
+
995
+ const shim = [
996
+ "#!/usr/bin/env sh",
997
+ "# clawmatrix-shim: auto-installed by clawmatrix plugin",
998
+ 'exec openclaw clawmatrix "$@"',
999
+ "",
1000
+ ].join("\n");
1001
+
1002
+ fs.writeFileSync(shimPath, shim, { mode: 0o755 });
1003
+ logger.info(`[clawmatrix] Installed global CLI shim: ${shimPath}`);
1004
+ } catch {
1005
+ // Best-effort: don't break plugin loading if shim install fails
1006
+ }
1007
+ }
1008
+
780
1009
  export default plugin;
@@ -1,6 +1,6 @@
1
1
  import * as Automerge from "@automerge/automerge";
2
2
  import { watch, type FSWatcher } from "node:fs";
3
- import { readdir, readFile, stat as fsStat, writeFile, mkdir, rename } from "node:fs/promises";
3
+ import { readdir, readFile, stat as fsStat, writeFile, mkdir, rename, unlink } from "node:fs/promises";
4
4
  import path from "node:path";
5
5
  import ignore, { type Ignore } from "ignore";
6
6
  import picomatch from "picomatch";
@@ -131,7 +131,7 @@ export class KnowledgeSync {
131
131
  private localChangesRunning = false;
132
132
  private localChangesQueued = false;
133
133
  /** Paths recently written by exportFileToFs — suppress watcher re-trigger. Stores {content, timestamp}. */
134
- private writtenByExport = new Map<string, { content: string; ts: number }>();
134
+ private writtenByExport = new Map<string, { content: string | null; ts: number }>();
135
135
  /** Deferred git commit timer — batches multiple remote syncs into one commit. */
136
136
  private gitCommitTimer: ReturnType<typeof setTimeout> | null = null;
137
137
  private pendingGitSources = new Set<string>();
@@ -326,10 +326,20 @@ export class KnowledgeSync {
326
326
  await this.saveAutomergeDoc(this.registryPath, this.registry);
327
327
 
328
328
  // Discover new files from registry and initiate their sync
329
+ const deletedPaths: string[] = [];
329
330
  for (const [relPath, meta] of Object.entries(newDoc.files ?? {})) {
330
331
  if (meta.deleted) {
331
- // Clean up sync states for deleted files
332
+ // Clean up sync states, in-memory doc, persisted doc, and local file
332
333
  this.cleanupDeletedFileSyncStates(relPath);
334
+ if (this.fileDocs.has(relPath)) {
335
+ this.fileDocs.delete(relPath);
336
+ deletedPaths.push(relPath);
337
+ // Remove persisted automerge doc
338
+ const docPath = path.join(this.docsDir, docFileName(relPath));
339
+ await rename(docPath, docPath + ".deleted").catch(() => {});
340
+ // Delete local file from workspace
341
+ await this.deleteLocalFile(relPath);
342
+ }
333
343
  continue;
334
344
  }
335
345
  if (!this.fileDocs.has(relPath)) {
@@ -339,6 +349,12 @@ export class KnowledgeSync {
339
349
  this.syncDocWithPeer(peerId, relPath);
340
350
  }
341
351
 
352
+ // Commit remote deletions to git
353
+ if (deletedPaths.length > 0) {
354
+ debug(TAG, `remote deletion from ${peerId}: ${deletedPaths.join(", ")}`);
355
+ this.schedulePendingGitCommit(peerId);
356
+ }
357
+
342
358
  this.sendSyncMessage(peerId, REGISTRY_DOC_ID);
343
359
  }
344
360
 
@@ -846,6 +862,30 @@ export class KnowledgeSync {
846
862
  }
847
863
  }
848
864
 
865
+ /** Delete a local file from workspace (triggered by remote deletion). */
866
+ private async deleteLocalFile(relPath: string) {
867
+ const absPath = path.resolve(this.opts.workspacePath, relPath);
868
+
869
+ // Prevent path traversal
870
+ if (!absPath.startsWith(this.opts.workspacePath + path.sep) && absPath !== this.opts.workspacePath) {
871
+ debug(TAG, `blocked path traversal on delete: ${relPath}`);
872
+ return;
873
+ }
874
+
875
+ // Mark as our own deletion so the watcher doesn't re-process it.
876
+ // handleLocalChangesInner sees currentContent === null === marker.content and skips.
877
+ this.writtenByExport.set(relPath, { content: null, ts: Date.now() });
878
+ try {
879
+ await unlink(absPath);
880
+ debug(TAG, `deleted local file: ${relPath}`);
881
+ } catch (err) {
882
+ // File may not exist locally — that's fine
883
+ if ((err as NodeJS.ErrnoException).code !== "ENOENT") {
884
+ debug(TAG, `failed to delete local file ${relPath}: ${err}`);
885
+ }
886
+ }
887
+ }
888
+
849
889
  /** Read all workspace files matching whitelist. */
850
890
  private async readWhitelistedFiles(): Promise<Record<string, string>> {
851
891
  const files: Record<string, string> = {};
@@ -954,9 +994,7 @@ export class KnowledgeSync {
954
994
  }
955
995
 
956
996
  let doc = Automerge.init<FileDoc>();
957
- doc = Automerge.change(doc, (d) => {
958
- (d as FileDoc).content = content;
959
- });
997
+ doc = changeFileContent(doc, content);
960
998
  this.fileDocs.set(relPath, doc);
961
999
 
962
1000
  this.registry = Automerge.change(this.registry, (d) => {
@@ -754,7 +754,7 @@ export class ModelProxy {
754
754
  let currentId = requestId;
755
755
  let currentTarget = targetNodeId;
756
756
  let currentFrame = frame;
757
- let remaining = failoverCandidates;
757
+ let failoverIdx = 0; // index into failoverCandidates (avoids slice allocations)
758
758
  const maxAttempts = failoverCandidates.length + 1;
759
759
 
760
760
  for (let attempt = 0; attempt < maxAttempts; attempt++) {
@@ -763,13 +763,13 @@ export class ModelProxy {
763
763
 
764
764
  if (!result.success) {
765
765
  // Upstream error — try failover if available
766
- if (remaining.length > 0 && buildFrame) {
767
- const next = remaining[0]!;
768
- debug("proxy", `failover: remote error "${result.error}" → trying ${next.routeNodeId} (${remaining.length - 1} left)`);
766
+ if (failoverIdx < failoverCandidates.length && buildFrame) {
767
+ const next = failoverCandidates[failoverIdx]!;
768
+ debug("proxy", `failover: remote error "${result.error}" → trying ${next.routeNodeId} (${failoverCandidates.length - failoverIdx - 1} left)`);
769
+ failoverIdx++;
769
770
  currentId = crypto.randomUUID();
770
771
  currentFrame = buildFrame(next, currentId);
771
772
  currentTarget = next.routeNodeId;
772
- remaining = remaining.slice(1);
773
773
  continue;
774
774
  }
775
775
  return {
@@ -782,13 +782,13 @@ export class ModelProxy {
782
782
  return this.formatNonStreamResult(result, currentId, currentFrame, responseFormat);
783
783
  } catch (err) {
784
784
  // Timeout or send failure — try failover
785
- if (remaining.length > 0 && buildFrame) {
786
- const next = remaining[0]!;
787
- debug("proxy", `failover: ${err instanceof Error ? err.message : String(err)} → trying ${next.routeNodeId} (${remaining.length - 1} left)`);
785
+ if (failoverIdx < failoverCandidates.length && buildFrame) {
786
+ const next = failoverCandidates[failoverIdx]!;
787
+ debug("proxy", `failover: ${err instanceof Error ? err.message : String(err)} → trying ${next.routeNodeId} (${failoverCandidates.length - failoverIdx - 1} left)`);
788
+ failoverIdx++;
788
789
  currentId = crypto.randomUUID();
789
790
  currentFrame = buildFrame(next, currentId);
790
791
  currentTarget = next.routeNodeId;
791
- remaining = remaining.slice(1);
792
792
  continue;
793
793
  }
794
794
  return {
@@ -980,6 +980,24 @@ export class ModelProxy {
980
980
  const pending = this.pending.get(frame.id);
981
981
  if (!pending?.stream || !pending.controller || !pending.encoder) return;
982
982
 
983
+ // Reset activity timer — keeps long-running streams alive and detects
984
+ // stalled connections within modelTimeout of the last received chunk.
985
+ clearTimeout(pending.timer);
986
+ if (!frame.payload.done) {
987
+ pending.timer = setTimeout(() => {
988
+ // Capture references before cleanup removes pending from the map
989
+ const { stableStreamId, responseFormat, controller, encoder, model, failoverCandidates, buildFrame } = pending;
990
+ this.cleanupRequest(frame.id);
991
+ this.peerManager.router.markFailed(frame.id);
992
+ this.tryStreamFailover(
993
+ stableStreamId ?? frame.id, responseFormat,
994
+ controller!, encoder!, model ?? "",
995
+ failoverCandidates ?? [], buildFrame,
996
+ `stream stalled (no data for ${this.modelTimeout / 1000}s)`,
997
+ );
998
+ }, this.modelTimeout);
999
+ }
1000
+
983
1001
  try {
984
1002
  if (pending.responseFormat === "responses") {
985
1003
  this.handleModelStreamResponses(frame, pending);
@@ -1305,7 +1323,14 @@ export class ModelProxy {
1305
1323
  let chatFallbackResult: Awaited<ReturnType<ModelProxy["retryWithChatCompletions"]>> = null;
1306
1324
  try {
1307
1325
  result = JSON.parse(responseText);
1308
- } catch {
1326
+ // Detect error objects in 200 OK responses (some APIs return HTTP 200 with error body)
1327
+ if (result.error && typeof result.error === "object" && !result.choices && !result.output) {
1328
+ const errMsg = (result.error as { message?: string }).message ?? JSON.stringify(result.error);
1329
+ throw new Error(`Upstream error (200 OK): ${String(errMsg).slice(0, 200)}`);
1330
+ }
1331
+ } catch (parseErr) {
1332
+ // Re-throw non-parse errors (e.g. upstream error detection above)
1333
+ if (!(parseErr instanceof SyntaxError)) throw parseErr;
1309
1334
  // Upstream returned non-JSON (e.g. SSE in non-stream mode) — try chat completions fallback
1310
1335
  if (!cachedApi && isResponsesApi) {
1311
1336
  debug("model_req", `responses API returned non-JSON for "${model.id}", retrying with chat completions`);
@@ -93,6 +93,8 @@ export class PeerManager extends EventEmitter<PeerManagerEvents> {
93
93
  private wss: WebSocketServer | null = null;
94
94
  private reconnectTimers = new Map<string, ReturnType<typeof setTimeout>>();
95
95
  private reconnectAttempts = new Map<string, number>();
96
+ /** Deferred disconnect timers — grace period before broadcasting peer_leave. */
97
+ private disconnectGraceTimers = new Map<string, ReturnType<typeof setTimeout>>();
96
98
  private stopped = false;
97
99
  /** Map from ws WebSocket to Connection for inbound connections. */
98
100
  private inboundConnections = new Map<WsWebSocket, Connection>();
@@ -165,6 +167,17 @@ export class PeerManager extends EventEmitter<PeerManagerEvents> {
165
167
  }
166
168
  }
167
169
 
170
+ /** Update the local tool proxy catalog and re-broadcast to all peers. */
171
+ updateToolCatalog(catalog: import("./types.ts").ToolCatalogEntry[]) {
172
+ if (this.localCapabilities.toolProxy) {
173
+ this.localCapabilities.toolProxy = { ...this.localCapabilities.toolProxy, catalog };
174
+ }
175
+ this.router.updateLocalToolCatalog(catalog);
176
+ for (const conn of this.router.getDirectConnections()) {
177
+ this.sendPeerSync(conn);
178
+ }
179
+ }
180
+
168
181
  // ── Lifecycle ──────────────────────────────────────────────────
169
182
  async start() {
170
183
  await this.approvalManager.load();
@@ -190,6 +203,12 @@ export class PeerManager extends EventEmitter<PeerManagerEvents> {
190
203
  clearTimeout(timer);
191
204
  }
192
205
  this.reconnectTimers.clear();
206
+ // Flush all disconnect grace timers (execute leave immediately on shutdown)
207
+ for (const [nodeId, timer] of this.disconnectGraceTimers) {
208
+ clearTimeout(timer);
209
+ this.executePeerLeave(nodeId);
210
+ }
211
+ this.disconnectGraceTimers.clear();
193
212
 
194
213
  this.router.broadcast({
195
214
  type: "peer_leave",
@@ -461,9 +480,6 @@ export class PeerManager extends EventEmitter<PeerManagerEvents> {
461
480
  if (this.pendingApprovalConns.has(nodeId)) {
462
481
  debug("approval", `reusing pending approval for ${nodeId}, updating conn ref`);
463
482
  this.pendingApprovalConns.set(nodeId, { conn, caps });
464
- if (this.config.peerApproval?.mode === "required") {
465
- conn.on("close", () => this.onPeerDisconnected(conn));
466
- }
467
483
  return;
468
484
  }
469
485
 
@@ -495,10 +511,12 @@ export class PeerManager extends EventEmitter<PeerManagerEvents> {
495
511
  );
496
512
  }
497
513
  });
498
- // In required mode, don't complete the join yet
514
+ // In required mode, don't complete the join yet.
515
+ // No close handler needed here: the peer was never added to the router,
516
+ // so onPeerDisconnected would broadcast a spurious peer_leave.
517
+ // If the conn drops before approval resolves, the .then() handler sees
518
+ // activeConn.isOpen === false and skips all actions.
499
519
  if (this.config.peerApproval?.mode === "required") {
500
- // Wire up close handler to clean up if connection drops while pending
501
- conn.on("close", () => this.onPeerDisconnected(conn));
502
520
  return;
503
521
  }
504
522
  // In notify mode, requestApproval resolves immediately, but
@@ -515,6 +533,9 @@ export class PeerManager extends EventEmitter<PeerManagerEvents> {
515
533
  private completePeerJoin(conn: Connection, caps: NodeCapabilities) {
516
534
  const nodeId = conn.remoteNodeId!;
517
535
 
536
+ // Cancel disconnect grace timer if the peer is reconnecting
537
+ const wasInGrace = this.cancelDisconnectGrace(nodeId);
538
+
518
539
  // If there's an existing connection for this nodeId (e.g. peer reconnected
519
540
  // while old TCP hadn't closed yet), close it AFTER overwriting the route so
520
541
  // the stale-close guard in onPeerDisconnected correctly skips cleanup.
@@ -585,15 +606,58 @@ export class PeerManager extends EventEmitter<PeerManagerEvents> {
585
606
  return;
586
607
  }
587
608
 
609
+ // Grace period: defer peer_leave broadcast to allow quick reconnection
610
+ // (e.g. iOS WiFi ↔ cellular handoff, brief audio interruption).
611
+ // If the peer reconnects within the grace window, completePeerJoin
612
+ // will cancel this timer via cancelDisconnectGrace.
613
+ const graceMs = this.config.disconnectGrace ?? 30_000;
614
+ if (graceMs <= 0) {
615
+ this.executePeerLeave(nodeId, conn);
616
+ return;
617
+ }
618
+ debug("peer", `onPeerDisconnected(${nodeId}): starting ${graceMs / 1000}s grace period`);
619
+
620
+ // Clear any existing grace timer for this node (shouldn't happen, but be safe)
621
+ this.cancelDisconnectGrace(nodeId);
622
+
623
+ this.disconnectGraceTimers.set(nodeId, setTimeout(() => {
624
+ this.disconnectGraceTimers.delete(nodeId);
625
+ this.executePeerLeave(nodeId, conn);
626
+ }, graceMs));
627
+ }
628
+
629
+ /** Cancel a pending disconnect grace timer (called when peer reconnects quickly). */
630
+ private cancelDisconnectGrace(nodeId: string): boolean {
631
+ const timer = this.disconnectGraceTimers.get(nodeId);
632
+ if (timer) {
633
+ clearTimeout(timer);
634
+ this.disconnectGraceTimers.delete(nodeId);
635
+ debug("peer", `cancelDisconnectGrace(${nodeId}): peer reconnected within grace period`);
636
+ return true;
637
+ }
638
+ return false;
639
+ }
640
+
641
+ /** Execute the actual peer leave (after grace period expires or immediate for shutdown). */
642
+ private executePeerLeave(nodeId: string, conn?: Connection) {
643
+ // Double-check the route hasn't been replaced by a new connection during grace
644
+ if (conn) {
645
+ const currentRoute = this.router.getRoute(nodeId);
646
+ if (currentRoute?.connection && currentRoute.connection !== conn) {
647
+ debug("peer", `executePeerLeave(${nodeId}): route replaced during grace — skipping`);
648
+ return;
649
+ }
650
+ }
651
+
588
652
  audit("peer_leave", { nodeId });
589
653
  this.router.removePeer(nodeId);
590
654
 
591
655
  // Remove satellite contexts that were only reachable via this peer
592
- this.satelliteContexts = this.satelliteContexts.filter(s => {
593
- // Keep satellites that are not associated with the disconnected peer
594
- // (satellite nodeIds typically differ from mesh peer nodeIds)
595
- return s.nodeId !== nodeId;
596
- });
656
+ for (let i = this.satelliteContexts.length - 1; i >= 0; i--) {
657
+ if (this.satelliteContexts[i].nodeId === nodeId) {
658
+ this.satelliteContexts.splice(i, 1);
659
+ }
660
+ }
597
661
 
598
662
  this.router.broadcast({
599
663
  type: "peer_leave",
@@ -748,13 +812,17 @@ export class PeerManager extends EventEmitter<PeerManagerEvents> {
748
812
  const prev = this.router.getRoute(peer.nodeId);
749
813
  const hadAgents = prev?.agents.length ?? 0;
750
814
  const hadDirectPeers = prev?.directPeers.length ?? 0;
751
- const hadToolProxy = JSON.stringify(prev?.toolProxy);
752
815
  const hadDeviceInfo = prev?.deviceInfo?.hostname;
753
816
  const hadAcpAgents = prev?.acpAgents?.length ?? 0;
817
+ const hadToolProxyEnabled = prev?.toolProxy?.enabled;
818
+ const hadToolProxyCatalogLen = prev?.toolProxy?.catalog?.length ?? 0;
819
+ const hadToolProxyAllowLen = prev?.toolProxy?.allow?.length ?? 0;
754
820
  this.router.updatePeerCapabilities(peer.nodeId, peer);
755
821
  if (peer.agents.length !== hadAgents || peer.models.length !== (prev?.models.length ?? 0)
756
822
  || (peer.directPeers?.length ?? 0) !== hadDirectPeers
757
- || JSON.stringify(peer.toolProxy) !== hadToolProxy
823
+ || peer.toolProxy?.enabled !== hadToolProxyEnabled
824
+ || (peer.toolProxy?.catalog?.length ?? 0) !== hadToolProxyCatalogLen
825
+ || (peer.toolProxy?.allow?.length ?? 0) !== hadToolProxyAllowLen
758
826
  || peer.deviceInfo?.hostname !== hadDeviceInfo
759
827
  || (peer.acpAgents?.length ?? 0) !== hadAcpAgents) {
760
828
  changed = true;
@@ -33,19 +33,20 @@ export class RateLimiter {
33
33
 
34
34
  let timestamps = this.attempts.get(ip);
35
35
  if (timestamps) {
36
- // Remove expired entries
37
- timestamps = timestamps.filter((t) => t > cutoff);
36
+ // In-place pruning: find first non-expired index and splice
37
+ let firstValid = 0;
38
+ while (firstValid < timestamps.length && timestamps[firstValid] <= cutoff) firstValid++;
39
+ if (firstValid > 0) timestamps.splice(0, firstValid);
38
40
  } else {
39
41
  timestamps = [];
42
+ this.attempts.set(ip, timestamps);
40
43
  }
41
44
 
42
45
  if (timestamps.length >= this.config.maxAttempts) {
43
- this.attempts.set(ip, timestamps);
44
46
  return false;
45
47
  }
46
48
 
47
49
  timestamps.push(now);
48
- this.attempts.set(ip, timestamps);
49
50
  return true;
50
51
  }
51
52
 
@@ -61,19 +62,24 @@ export class RateLimiter {
61
62
  /** Get remaining attempts for an IP. */
62
63
  remaining(ip: string): number {
63
64
  const cutoff = Date.now() - this.config.windowMs;
64
- const timestamps = this.attempts.get(ip) ?? [];
65
- const active = timestamps.filter((t) => t > cutoff).length;
65
+ const timestamps = this.attempts.get(ip);
66
+ if (!timestamps) return this.config.maxAttempts;
67
+ let active = 0;
68
+ for (let i = timestamps.length - 1; i >= 0; i--) {
69
+ if (timestamps[i] > cutoff) active++; else break;
70
+ }
66
71
  return Math.max(0, this.config.maxAttempts - active);
67
72
  }
68
73
 
69
74
  private gc() {
70
75
  const cutoff = Date.now() - this.config.windowMs;
71
76
  for (const [ip, timestamps] of this.attempts) {
72
- const active = timestamps.filter((t) => t > cutoff);
73
- if (active.length === 0) {
77
+ let firstValid = 0;
78
+ while (firstValid < timestamps.length && timestamps[firstValid] <= cutoff) firstValid++;
79
+ if (firstValid === timestamps.length) {
74
80
  this.attempts.delete(ip);
75
- } else {
76
- this.attempts.set(ip, active);
81
+ } else if (firstValid > 0) {
82
+ timestamps.splice(0, firstValid);
77
83
  }
78
84
  }
79
85
  }