clawmatrix 0.2.9 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/handoff.ts CHANGED
@@ -54,16 +54,33 @@ export class HandoffManager {
54
54
  // Multi-device sync: track which nodes are watching each handoff session (by sessionId)
55
55
  private sessionWatchers = new Map<string, Set<string>>();
56
56
 
57
+ // Pre-built indexes for O(1) agent lookup
58
+ private agentById = new Map<string, ClawMatrixConfig["agents"][number]>();
59
+ private agentsByTag = new Map<string, ClawMatrixConfig["agents"][number]>();
60
+
57
61
  constructor(config: ClawMatrixConfig, peerManager: PeerManager, gatewayInfo: GatewayInfo) {
58
62
  this.config = config;
59
63
  this.peerManager = peerManager;
60
64
  this.gatewayInfo = gatewayInfo;
61
65
  this.taskActivity = new TaskActivityBroadcaster(config, peerManager);
62
66
 
67
+ // Build agent indexes
68
+ for (const a of config.agents) {
69
+ this.agentById.set(a.id, a);
70
+ for (const t of a.tags) this.agentsByTag.set(t, a);
71
+ }
72
+
63
73
  // Periodically clean up stale input_required entries
64
74
  this.staleCleanupTimer = setInterval(() => this.cleanupStale(), STALE_CLEANUP_INTERVAL);
65
75
  }
66
76
 
77
+ private findLocalAgent(target: string): ClawMatrixConfig["agents"][number] | undefined {
78
+ if (target.startsWith("tags:")) {
79
+ return this.agentsByTag.get(target.slice(5));
80
+ }
81
+ return this.agentById.get(target);
82
+ }
83
+
67
84
  // ── Multi-device sync helpers ──────────────────────────────────
68
85
 
69
86
  private addSessionWatcher(sessionId: string, nodeId: string) {
@@ -271,13 +288,7 @@ export class HandoffManager {
271
288
  const { id, from, payload } = frame;
272
289
 
273
290
  // Find matching local agent
274
- const agent = this.config.agents.find((a) => {
275
- if (payload.target.startsWith("tags:")) {
276
- const tag = payload.target.slice(5);
277
- return a.tags.includes(tag);
278
- }
279
- return a.id === payload.target;
280
- });
291
+ const agent = this.findLocalAgent(payload.target);
281
292
 
282
293
  if (!agent) {
283
294
  this.peerManager.sendTo(from, {
@@ -477,6 +488,8 @@ export class HandoffManager {
477
488
  const decoder = new TextDecoder();
478
489
  const chunks: string[] = [];
479
490
  let buffer = "";
491
+ // Cache active entry lookup outside the hot loop — stable during the stream
492
+ const activeEntry = this.active.get(handoffId);
480
493
 
481
494
  try {
482
495
  while (true) {
@@ -510,7 +523,6 @@ export class HandoffManager {
510
523
 
511
524
  // Broadcast progress to mobile nodes (throttled, detail is just
512
525
  // a heartbeat — don't send token-level deltas as they're meaningless fragments)
513
- const activeEntry = this.active.get(handoffId);
514
526
  if (activeEntry) {
515
527
  this.taskActivity.broadcast(
516
528
  handoffId, "handoff", "progress", activeEntry.agent, activeEntry.startedAt,
@@ -605,6 +617,7 @@ export class HandoffManager {
605
617
 
606
618
  // If canceled during input_required, no runAgentTurn is running to clean up
607
619
  if (wasInputRequired) {
620
+ this.sessionWatchers.delete(entry.sessionId);
608
621
  this.active.delete(frame.id);
609
622
  }
610
623
 
@@ -132,12 +132,17 @@ export class HealthTracker {
132
132
  // ── Event recording ─────────────────────────────────────────
133
133
 
134
134
  recordEvent(event: HealthEvent) {
135
+ // Strip undefined values — Automerge rejects them
136
+ const clean: Record<string, unknown> = {};
137
+ for (const [k, v] of Object.entries(event)) {
138
+ if (v !== undefined) clean[k] = v;
139
+ }
135
140
  this.doc = Automerge.change(this.doc, (d) => {
136
141
  if (!d.nodes[this.nodeId]) {
137
142
  d.nodes[this.nodeId] = { events: [], lastUpdated: 0 };
138
143
  }
139
144
  const entry = d.nodes[this.nodeId]!;
140
- entry.events.push({ ...event });
145
+ entry.events.push(clean as HealthEvent);
141
146
  entry.lastUpdated = Date.now();
142
147
  });
143
148
  this.scheduleSave();
package/src/index.ts CHANGED
@@ -16,6 +16,7 @@ import { createClusterDiagnosticTool } from "./tools/cluster-diagnostic.ts";
16
16
  import { createClusterAcpTool } from "./tools/cluster-acp.ts";
17
17
  import { createClusterTerminalTool } from "./tools/cluster-terminal.ts";
18
18
  import { createClusterToolInvokeTool } from "./tools/cluster-tool.ts";
19
+ import { createClusterTransferTool } from "./tools/cluster-transfer.ts";
19
20
  import { registerClusterCli } from "./cli.ts";
20
21
  import { spawnProcess } from "./compat.ts";
21
22
 
@@ -271,7 +272,7 @@ const plugin = {
271
272
 
272
273
  const repatchTimer = setInterval(patchAllConfigs, 10_000);
273
274
  repatchTimer.unref?.();
274
- api.on("dispose", () => clearInterval(repatchTimer));
275
+ api.on("gateway_stop", () => clearInterval(repatchTimer));
275
276
 
276
277
  for (const [nodeId, models] of Object.entries(modelsByNode)) {
277
278
  api.registerProvider({
@@ -302,6 +303,7 @@ const plugin = {
302
303
  api.registerTool(createClusterAcpTool(), { optional: true });
303
304
  api.registerTool(createClusterTerminalTool(), { optional: true });
304
305
  api.registerTool(createClusterToolInvokeTool(), { optional: true });
306
+ api.registerTool(createClusterTransferTool(), { optional: true });
305
307
 
306
308
  // Wire up peer approval with OpenClaw channel API
307
309
  if (config.peerApproval.enabled) {
@@ -330,20 +332,23 @@ const plugin = {
330
332
 
331
333
  const proc = spawnProcess(
332
334
  ["openclaw", "gateway", "call", "send", "--json", "--params", JSON.stringify(sendParams)],
333
- { stdout: "pipe", stderr: "pipe" },
335
+ { stdout: "ignore", stderr: "pipe" },
334
336
  );
335
- const code = await proc.exited;
336
- if (code !== 0) {
337
- const stderrChunks: Uint8Array[] = [];
337
+ // Collect stderr concurrently with waiting for exit to avoid pipe deadlock
338
+ const stderrPromise = (async () => {
339
+ const chunks: Uint8Array[] = [];
338
340
  if (proc.stderr) {
339
341
  const reader = proc.stderr.getReader();
340
342
  while (true) {
341
343
  const { done, value } = await reader.read();
342
344
  if (done) break;
343
- stderrChunks.push(value);
345
+ chunks.push(value);
344
346
  }
345
347
  }
346
- const errMsg = Buffer.concat(stderrChunks).toString("utf-8").trim();
348
+ return Buffer.concat(chunks).toString("utf-8").trim();
349
+ })();
350
+ const [code, errMsg] = await Promise.all([proc.exited, stderrPromise]);
351
+ if (code !== 0) {
347
352
  throw new Error(`gateway send failed (exit ${code}): ${errMsg}`);
348
353
  }
349
354
  });
@@ -550,6 +555,180 @@ const plugin = {
550
555
  },
551
556
  );
552
557
 
558
+ // ── Tool proxy CLI gateway methods ──────────────────────────────
559
+
560
+ api.registerGatewayMethod(
561
+ "clawmatrix.tools.list",
562
+ ({ params, respond }: GatewayRequestHandlerOptions) => {
563
+ try {
564
+ const runtime = getClusterRuntime();
565
+ const { node } = (params ?? {}) as { node?: string };
566
+ const allPeers = runtime.peerManager.router.getAllPeers();
567
+ const peers = mergeSentinelPeers(allPeers, runtime)
568
+ .filter((p) => (p as { nodeId: string }).nodeId !== config.nodeId);
569
+
570
+ type PeerToolInfo = { nodeId: string; status: string; toolProxy?: import("./types.ts").ToolProxyInfo };
571
+ const result: PeerToolInfo[] = [];
572
+
573
+ for (const peer of peers) {
574
+ const p = peer as { nodeId: string; connected: boolean; status: string; toolProxy?: import("./types.ts").ToolProxyInfo };
575
+ if (node && p.nodeId !== node) continue;
576
+ if (!p.toolProxy?.enabled) continue;
577
+ result.push({
578
+ nodeId: p.nodeId,
579
+ status: p.status,
580
+ toolProxy: p.toolProxy,
581
+ });
582
+ }
583
+
584
+ respond(true, result);
585
+ } catch {
586
+ respond(false, { error: "ClawMatrix service not running" });
587
+ }
588
+ },
589
+ );
590
+
591
+ api.registerGatewayMethod(
592
+ "clawmatrix.tools.call",
593
+ async ({ params, respond }: GatewayRequestHandlerOptions) => {
594
+ try {
595
+ const runtime = getClusterRuntime();
596
+ const { node, tool, params: toolParams, timeout } = (params ?? {}) as {
597
+ node?: string; tool?: string; params?: Record<string, unknown>; timeout?: number;
598
+ };
599
+
600
+ if (!node || !tool) {
601
+ respond(false, { error: "Missing required params: node, tool" });
602
+ return;
603
+ }
604
+
605
+ const result = await runtime.toolProxy.invoke(node, tool, toolParams ?? {}, timeout);
606
+ respond(true, result);
607
+ } catch (err) {
608
+ respond(false, { error: err instanceof Error ? err.message : String(err) });
609
+ }
610
+ },
611
+ );
612
+
613
+ api.registerGatewayMethod(
614
+ "clawmatrix.tools.batch",
615
+ async ({ params, respond }: GatewayRequestHandlerOptions) => {
616
+ try {
617
+ const runtime = getClusterRuntime();
618
+ const { node, items, stopOnError, timeout } = (params ?? {}) as {
619
+ node?: string;
620
+ items?: Array<{ tool: string; params?: Record<string, unknown> }>;
621
+ stopOnError?: boolean;
622
+ timeout?: number;
623
+ };
624
+
625
+ if (!node || !items || items.length === 0) {
626
+ respond(false, { error: "Missing required params: node, items" });
627
+ return;
628
+ }
629
+
630
+ const batchItems = items.map((item) => ({
631
+ tool: item.tool,
632
+ params: item.params ?? {},
633
+ }));
634
+
635
+ const results = await runtime.toolProxy.invokeBatch(node, batchItems, { stopOnError, timeout });
636
+ respond(true, results);
637
+ } catch (err) {
638
+ respond(false, { error: err instanceof Error ? err.message : String(err) });
639
+ }
640
+ },
641
+ );
642
+
643
+ // ── Models CLI gateway method ──────────────────────────────────
644
+
645
+ api.registerGatewayMethod(
646
+ "clawmatrix.models.list",
647
+ ({ params, respond }: GatewayRequestHandlerOptions) => {
648
+ try {
649
+ const runtime = getClusterRuntime();
650
+ const { node } = (params ?? {}) as { node?: string };
651
+ const allProxyModels = runtime.modelProxy.allProxyModels;
652
+
653
+ const reachable = new Set(
654
+ runtime.peerManager.router.getAllPeers()
655
+ .filter((p) => p.connection?.isOpen || p.reachableVia)
656
+ .map((p) => p.nodeId),
657
+ );
658
+
659
+ const models = allProxyModels
660
+ .filter((m) => !node || m.nodeId === node)
661
+ .map((m) => ({
662
+ id: m.id,
663
+ nodeId: m.nodeId,
664
+ provider: m.provider ?? m.nodeId,
665
+ ...(m.description && { description: m.description }),
666
+ ...(m.contextWindow && { contextWindow: m.contextWindow }),
667
+ ...(m.maxTokens && { maxTokens: m.maxTokens }),
668
+ ...(m.reasoning !== undefined && { reasoning: m.reasoning }),
669
+ ...(m.input && { input: m.input }),
670
+ ...(m.api && { api: m.api }),
671
+ ...(m.cost && { cost: m.cost }),
672
+ reachable: reachable.has(m.nodeId),
673
+ }));
674
+
675
+ respond(true, models);
676
+ } catch {
677
+ respond(false, { error: "ClawMatrix service not running" });
678
+ }
679
+ },
680
+ );
681
+
682
+ // ── Events CLI gateway methods ──────────────────────────────────
683
+
684
+ api.registerGatewayMethod(
685
+ "clawmatrix.events.query",
686
+ ({ params, respond }: GatewayRequestHandlerOptions) => {
687
+ try {
688
+ const runtime = getClusterRuntime();
689
+ if (!runtime.webHandler) {
690
+ respond(false, { error: "Events not enabled (web.enabled = false)" });
691
+ return;
692
+ }
693
+ const { type, source, since, unconsumed, limit } = (params ?? {}) as {
694
+ type?: string; source?: string; since?: number; unconsumed?: boolean; limit?: number;
695
+ };
696
+ const events = runtime.webHandler.queryEvents({
697
+ type,
698
+ source,
699
+ since,
700
+ unconsumed: unconsumed ?? true,
701
+ limit: limit ?? 20,
702
+ });
703
+ respond(true, events);
704
+ } catch {
705
+ respond(false, { error: "ClawMatrix service not running" });
706
+ }
707
+ },
708
+ );
709
+
710
+ api.registerGatewayMethod(
711
+ "clawmatrix.events.consume",
712
+ ({ params, respond }: GatewayRequestHandlerOptions) => {
713
+ try {
714
+ const runtime = getClusterRuntime();
715
+ if (!runtime.webHandler) {
716
+ respond(false, { error: "Events not enabled (web.enabled = false)" });
717
+ return;
718
+ }
719
+ const { ids } = (params ?? {}) as { ids?: string[] };
720
+ if (!ids || ids.length === 0) {
721
+ respond(false, { error: "Missing required param: ids (array of event IDs)" });
722
+ return;
723
+ }
724
+ const consumed = runtime.webHandler.consumeEvents(ids);
725
+ respond(true, { consumed, ids });
726
+ } catch {
727
+ respond(false, { error: "ClawMatrix service not running" });
728
+ }
729
+ },
730
+ );
731
+
553
732
  // Log model selection on each LLM call (fire-and-forget)
554
733
  api.on("llm_input", (event) => {
555
734
  api.logger.debug(`[clawmatrix] llm_input: provider=${event.provider} model=${event.model}`);
@@ -558,6 +737,10 @@ const plugin = {
558
737
  // CLI subcommand
559
738
  api.registerCli(registerClusterCli, { commands: ["clawmatrix"] });
560
739
 
740
+ // Auto-install global `clawmatrix` shim next to the `openclaw` binary.
741
+ // Runs once on plugin load; non-blocking, best-effort.
742
+ installGlobalCliShim(api.logger);
743
+
561
744
  // Plugin command: /clawmatrix approve|deny|revoke
562
745
  // Handles Telegram callback buttons and other chat surfaces.
563
746
  // Plugin commands are processed before the agent, so they bypass the LLM.
@@ -646,11 +829,9 @@ const plugin = {
646
829
  lines.push("[ClawMatrix] No peers online. Use cluster_peers to check cluster status.");
647
830
  } else {
648
831
  lines.push(
649
- `[ClawMatrix Cluster] YOU ARE node="${config.nodeId}"${config.tags.length ? ` tags=${config.tags.join(",")}` : ""}. This is YOUR identity — never target yourself with cluster tools.`,
832
+ `[ClawMatrix Cluster] YOU ARE node="${config.nodeId}"${config.tags.length ? ` tags=${config.tags.join(",")}` : ""}. ${peerCount} peer(s) online.`,
650
833
  ...(config.agents.length > 0 ? [`Role: ${config.agents[0]!.description}`] : []),
651
- `${peerCount} remote peer(s) online. Use cluster_peers to see topology, agents, and models.`,
652
- "Prefer cluster_tool for device-specific tools (screenshot, battery, etc.); cluster_exec/read/write for file/shell ops; cluster_handoff for complex multi-step tasks.",
653
- "IMPORTANT: Always tell user which remote node you're targeting before calling cluster tools.",
834
+ "Use cluster_peers to see topology. Always tell user which remote node you're targeting before calling cluster tools.",
654
835
  );
655
836
  }
656
837
  cachedSystemContext = lines.join("\n");
@@ -748,6 +929,9 @@ function mergeSentinelPeers(
748
929
  status: effectiveStatus,
749
930
  reachableVia: p.reachableVia,
750
931
  latencyMs: p.latencyMs,
932
+ toolProxy: p.toolProxy,
933
+ acpAgents: p.acpAgents,
934
+ deviceInfo: p.deviceInfo,
751
935
  ...(sentinel ? { sentinel: sentinelOnline ? "online" : "offline" } : {}),
752
936
  });
753
937
  }
@@ -772,4 +956,54 @@ function mergeSentinelPeers(
772
956
  return result;
773
957
  }
774
958
 
959
+ /** Auto-install a global `clawmatrix` CLI shim next to the `openclaw` binary. */
960
+ function installGlobalCliShim(logger: { info: (msg: string) => void; warn: (msg: string) => void }) {
961
+ try {
962
+ const fs = require("node:fs") as typeof import("node:fs");
963
+ const path = require("node:path") as typeof import("node:path");
964
+
965
+ // Find the real directory where `openclaw` lives (resolve symlinks)
966
+ const openclawBin = process.argv[0]; // node process running openclaw
967
+ // Walk up to find the openclaw binary: it's in the same bin dir as node,
968
+ // or we can resolve it from process.env.PATH
969
+ let binDir: string | null = null;
970
+ const envPath = process.env.PATH ?? "";
971
+ for (const dir of envPath.split(path.delimiter)) {
972
+ const candidate = path.join(dir, "openclaw");
973
+ try {
974
+ fs.accessSync(candidate, fs.constants.X_OK);
975
+ // Resolve symlinks to get the real bin directory
976
+ const realPath = fs.realpathSync(candidate);
977
+ binDir = path.dirname(realPath);
978
+ break;
979
+ } catch {
980
+ // Not in this dir
981
+ }
982
+ }
983
+ if (!binDir) return;
984
+
985
+ const shimPath = path.join(binDir, "clawmatrix");
986
+
987
+ // Skip if shim already exists and is our shim (check marker comment)
988
+ try {
989
+ const existing = fs.readFileSync(shimPath, "utf-8");
990
+ if (existing.includes("clawmatrix-shim")) return; // already installed
991
+ } catch {
992
+ // File doesn't exist, proceed to create
993
+ }
994
+
995
+ const shim = [
996
+ "#!/usr/bin/env sh",
997
+ "# clawmatrix-shim: auto-installed by clawmatrix plugin",
998
+ 'exec openclaw clawmatrix "$@"',
999
+ "",
1000
+ ].join("\n");
1001
+
1002
+ fs.writeFileSync(shimPath, shim, { mode: 0o755 });
1003
+ logger.info(`[clawmatrix] Installed global CLI shim: ${shimPath}`);
1004
+ } catch {
1005
+ // Best-effort: don't break plugin loading if shim install fails
1006
+ }
1007
+ }
1008
+
775
1009
  export default plugin;
@@ -1,6 +1,6 @@
1
1
  import * as Automerge from "@automerge/automerge";
2
2
  import { watch, type FSWatcher } from "node:fs";
3
- import { readdir, readFile, stat as fsStat, writeFile, mkdir, rename } from "node:fs/promises";
3
+ import { readdir, readFile, stat as fsStat, writeFile, mkdir, rename, unlink } from "node:fs/promises";
4
4
  import path from "node:path";
5
5
  import ignore, { type Ignore } from "ignore";
6
6
  import picomatch from "picomatch";
@@ -128,8 +128,10 @@ export class KnowledgeSync {
128
128
  // ── FS / Watcher ───────────────────────────────────────────────
129
129
  private watcher: FSWatcher | null = null;
130
130
  private debounceTimer: ReturnType<typeof setTimeout> | null = null;
131
+ private localChangesRunning = false;
132
+ private localChangesQueued = false;
131
133
  /** Paths recently written by exportFileToFs — suppress watcher re-trigger. Stores {content, timestamp}. */
132
- private writtenByExport = new Map<string, { content: string; ts: number }>();
134
+ private writtenByExport = new Map<string, { content: string | null; ts: number }>();
133
135
  /** Deferred git commit timer — batches multiple remote syncs into one commit. */
134
136
  private gitCommitTimer: ReturnType<typeof setTimeout> | null = null;
135
137
  private pendingGitSources = new Set<string>();
@@ -324,10 +326,20 @@ export class KnowledgeSync {
324
326
  await this.saveAutomergeDoc(this.registryPath, this.registry);
325
327
 
326
328
  // Discover new files from registry and initiate their sync
329
+ const deletedPaths: string[] = [];
327
330
  for (const [relPath, meta] of Object.entries(newDoc.files ?? {})) {
328
331
  if (meta.deleted) {
329
- // Clean up sync states for deleted files
332
+ // Clean up sync states, in-memory doc, persisted doc, and local file
330
333
  this.cleanupDeletedFileSyncStates(relPath);
334
+ if (this.fileDocs.has(relPath)) {
335
+ this.fileDocs.delete(relPath);
336
+ deletedPaths.push(relPath);
337
+ // Remove persisted automerge doc
338
+ const docPath = path.join(this.docsDir, docFileName(relPath));
339
+ await rename(docPath, docPath + ".deleted").catch(() => {});
340
+ // Delete local file from workspace
341
+ await this.deleteLocalFile(relPath);
342
+ }
331
343
  continue;
332
344
  }
333
345
  if (!this.fileDocs.has(relPath)) {
@@ -337,6 +349,12 @@ export class KnowledgeSync {
337
349
  this.syncDocWithPeer(peerId, relPath);
338
350
  }
339
351
 
352
+ // Commit remote deletions to git
353
+ if (deletedPaths.length > 0) {
354
+ debug(TAG, `remote deletion from ${peerId}: ${deletedPaths.join(", ")}`);
355
+ this.schedulePendingGitCommit(peerId);
356
+ }
357
+
340
358
  this.sendSyncMessage(peerId, REGISTRY_DOC_ID);
341
359
  }
342
360
 
@@ -474,6 +492,27 @@ export class KnowledgeSync {
474
492
  }
475
493
 
476
494
  private async handleLocalChanges() {
495
+ // Mutex: if already running, mark queued and return — the running
496
+ // invocation will re-run when it finishes to pick up new changes.
497
+ if (this.localChangesRunning) {
498
+ this.localChangesQueued = true;
499
+ return;
500
+ }
501
+ this.localChangesRunning = true;
502
+ try {
503
+ await this.handleLocalChangesInner();
504
+ } finally {
505
+ this.localChangesRunning = false;
506
+ if (this.localChangesQueued) {
507
+ this.localChangesQueued = false;
508
+ this.handleLocalChanges().catch((err) => {
509
+ debug(TAG, `queued local change handling error: ${err}`);
510
+ });
511
+ }
512
+ }
513
+ }
514
+
515
+ private async handleLocalChangesInner() {
477
516
  // Only process files in pendingChanges (incremental)
478
517
  const changesToProcess = new Set(this.pendingChanges);
479
518
  this.pendingChanges.clear();
@@ -802,7 +841,13 @@ export class KnowledgeSync {
802
841
  if (this.isIgnored(relPath)) return;
803
842
 
804
843
  const content = doc.content ?? "";
805
- const absPath = path.join(this.opts.workspacePath, relPath);
844
+ const absPath = path.resolve(this.opts.workspacePath, relPath);
845
+
846
+ // Prevent path traversal (e.g. relPath = "../../etc/passwd")
847
+ if (!absPath.startsWith(this.opts.workspacePath + path.sep) && absPath !== this.opts.workspacePath) {
848
+ debug(TAG, `blocked path traversal attempt: ${relPath}`);
849
+ return;
850
+ }
806
851
 
807
852
  let currentContent: string | null = null;
808
853
  try {
@@ -817,6 +862,30 @@ export class KnowledgeSync {
817
862
  }
818
863
  }
819
864
 
865
+ /** Delete a local file from workspace (triggered by remote deletion). */
866
+ private async deleteLocalFile(relPath: string) {
867
+ const absPath = path.resolve(this.opts.workspacePath, relPath);
868
+
869
+ // Prevent path traversal
870
+ if (!absPath.startsWith(this.opts.workspacePath + path.sep) && absPath !== this.opts.workspacePath) {
871
+ debug(TAG, `blocked path traversal on delete: ${relPath}`);
872
+ return;
873
+ }
874
+
875
+ // Mark as our own deletion so the watcher doesn't re-process it.
876
+ // handleLocalChangesInner sees currentContent === null === marker.content and skips.
877
+ this.writtenByExport.set(relPath, { content: null, ts: Date.now() });
878
+ try {
879
+ await unlink(absPath);
880
+ debug(TAG, `deleted local file: ${relPath}`);
881
+ } catch (err) {
882
+ // File may not exist locally — that's fine
883
+ if ((err as NodeJS.ErrnoException).code !== "ENOENT") {
884
+ debug(TAG, `failed to delete local file ${relPath}: ${err}`);
885
+ }
886
+ }
887
+ }
888
+
820
889
  /** Read all workspace files matching whitelist. */
821
890
  private async readWhitelistedFiles(): Promise<Record<string, string>> {
822
891
  const files: Record<string, string> = {};
@@ -925,9 +994,7 @@ export class KnowledgeSync {
925
994
  }
926
995
 
927
996
  let doc = Automerge.init<FileDoc>();
928
- doc = Automerge.change(doc, (d) => {
929
- (d as FileDoc).content = content;
930
- });
997
+ doc = changeFileContent(doc, content);
931
998
  this.fileDocs.set(relPath, doc);
932
999
 
933
1000
  this.registry = Automerge.change(this.registry, (d) => {
@@ -754,7 +754,7 @@ export class ModelProxy {
754
754
  let currentId = requestId;
755
755
  let currentTarget = targetNodeId;
756
756
  let currentFrame = frame;
757
- let remaining = failoverCandidates;
757
+ let failoverIdx = 0; // index into failoverCandidates (avoids slice allocations)
758
758
  const maxAttempts = failoverCandidates.length + 1;
759
759
 
760
760
  for (let attempt = 0; attempt < maxAttempts; attempt++) {
@@ -763,13 +763,13 @@ export class ModelProxy {
763
763
 
764
764
  if (!result.success) {
765
765
  // Upstream error — try failover if available
766
- if (remaining.length > 0 && buildFrame) {
767
- const next = remaining[0]!;
768
- debug("proxy", `failover: remote error "${result.error}" → trying ${next.routeNodeId} (${remaining.length - 1} left)`);
766
+ if (failoverIdx < failoverCandidates.length && buildFrame) {
767
+ const next = failoverCandidates[failoverIdx]!;
768
+ debug("proxy", `failover: remote error "${result.error}" → trying ${next.routeNodeId} (${failoverCandidates.length - failoverIdx - 1} left)`);
769
+ failoverIdx++;
769
770
  currentId = crypto.randomUUID();
770
771
  currentFrame = buildFrame(next, currentId);
771
772
  currentTarget = next.routeNodeId;
772
- remaining = remaining.slice(1);
773
773
  continue;
774
774
  }
775
775
  return {
@@ -782,13 +782,13 @@ export class ModelProxy {
782
782
  return this.formatNonStreamResult(result, currentId, currentFrame, responseFormat);
783
783
  } catch (err) {
784
784
  // Timeout or send failure — try failover
785
- if (remaining.length > 0 && buildFrame) {
786
- const next = remaining[0]!;
787
- debug("proxy", `failover: ${err instanceof Error ? err.message : String(err)} → trying ${next.routeNodeId} (${remaining.length - 1} left)`);
785
+ if (failoverIdx < failoverCandidates.length && buildFrame) {
786
+ const next = failoverCandidates[failoverIdx]!;
787
+ debug("proxy", `failover: ${err instanceof Error ? err.message : String(err)} → trying ${next.routeNodeId} (${failoverCandidates.length - failoverIdx - 1} left)`);
788
+ failoverIdx++;
788
789
  currentId = crypto.randomUUID();
789
790
  currentFrame = buildFrame(next, currentId);
790
791
  currentTarget = next.routeNodeId;
791
- remaining = remaining.slice(1);
792
792
  continue;
793
793
  }
794
794
  return {
@@ -980,6 +980,24 @@ export class ModelProxy {
980
980
  const pending = this.pending.get(frame.id);
981
981
  if (!pending?.stream || !pending.controller || !pending.encoder) return;
982
982
 
983
+ // Reset activity timer — keeps long-running streams alive and detects
984
+ // stalled connections within modelTimeout of the last received chunk.
985
+ clearTimeout(pending.timer);
986
+ if (!frame.payload.done) {
987
+ pending.timer = setTimeout(() => {
988
+ // Capture references before cleanup removes pending from the map
989
+ const { stableStreamId, responseFormat, controller, encoder, model, failoverCandidates, buildFrame } = pending;
990
+ this.cleanupRequest(frame.id);
991
+ this.peerManager.router.markFailed(frame.id);
992
+ this.tryStreamFailover(
993
+ stableStreamId ?? frame.id, responseFormat,
994
+ controller!, encoder!, model ?? "",
995
+ failoverCandidates ?? [], buildFrame,
996
+ `stream stalled (no data for ${this.modelTimeout / 1000}s)`,
997
+ );
998
+ }, this.modelTimeout);
999
+ }
1000
+
983
1001
  try {
984
1002
  if (pending.responseFormat === "responses") {
985
1003
  this.handleModelStreamResponses(frame, pending);
@@ -1305,7 +1323,14 @@ export class ModelProxy {
1305
1323
  let chatFallbackResult: Awaited<ReturnType<ModelProxy["retryWithChatCompletions"]>> = null;
1306
1324
  try {
1307
1325
  result = JSON.parse(responseText);
1308
- } catch {
1326
+ // Detect error objects in 200 OK responses (some APIs return HTTP 200 with error body)
1327
+ if (result.error && typeof result.error === "object" && !result.choices && !result.output) {
1328
+ const errMsg = (result.error as { message?: string }).message ?? JSON.stringify(result.error);
1329
+ throw new Error(`Upstream error (200 OK): ${String(errMsg).slice(0, 200)}`);
1330
+ }
1331
+ } catch (parseErr) {
1332
+ // Re-throw non-parse errors (e.g. upstream error detection above)
1333
+ if (!(parseErr instanceof SyntaxError)) throw parseErr;
1309
1334
  // Upstream returned non-JSON (e.g. SSE in non-stream mode) — try chat completions fallback
1310
1335
  if (!cachedApi && isResponsesApi) {
1311
1336
  debug("model_req", `responses API returned non-JSON for "${model.id}", retrying with chat completions`);