clawmatrix 0.2.11 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +27 -0
- package/README.md +123 -12
- package/package.json +2 -1
- package/src/acp-proxy.ts +407 -68
- package/src/cli.ts +478 -10
- package/src/cluster-service.ts +114 -14
- package/src/compat.ts +0 -6
- package/src/config.ts +8 -5
- package/src/connection.ts +61 -55
- package/src/e2e/helpers.ts +1 -5
- package/src/file-transfer.ts +64 -14
- package/src/handoff.ts +21 -8
- package/src/index.ts +234 -5
- package/src/knowledge-sync.ts +44 -6
- package/src/model-proxy.ts +35 -10
- package/src/peer-manager.ts +81 -13
- package/src/rate-limiter.ts +16 -10
- package/src/router.ts +115 -33
- package/src/sentinel-manager.ts +51 -0
- package/src/sentinel.ts +13 -3
- package/src/tool-proxy.ts +12 -4
- package/src/tools/cluster-diagnostic.ts +3 -2
- package/src/tools/cluster-edit.ts +2 -1
- package/src/tools/cluster-events.ts +3 -1
- package/src/tools/cluster-exec.ts +2 -0
- package/src/tools/cluster-handoff.ts +3 -1
- package/src/tools/cluster-peers.ts +3 -1
- package/src/tools/cluster-read.ts +4 -1
- package/src/tools/cluster-send.ts +2 -1
- package/src/tools/cluster-terminal.ts +4 -7
- package/src/tools/cluster-tool.ts +2 -2
- package/src/tools/cluster-write.ts +3 -1
- package/src/types.ts +103 -1
- package/src/web.ts +2 -10
- package/src/web-ui.ts +0 -1622
package/src/index.ts
CHANGED
|
@@ -272,7 +272,7 @@ const plugin = {
|
|
|
272
272
|
|
|
273
273
|
const repatchTimer = setInterval(patchAllConfigs, 10_000);
|
|
274
274
|
repatchTimer.unref?.();
|
|
275
|
-
api.on("
|
|
275
|
+
api.on("gateway_stop", () => clearInterval(repatchTimer));
|
|
276
276
|
|
|
277
277
|
for (const [nodeId, models] of Object.entries(modelsByNode)) {
|
|
278
278
|
api.registerProvider({
|
|
@@ -555,6 +555,180 @@ const plugin = {
|
|
|
555
555
|
},
|
|
556
556
|
);
|
|
557
557
|
|
|
558
|
+
// ── Tool proxy CLI gateway methods ──────────────────────────────
|
|
559
|
+
|
|
560
|
+
api.registerGatewayMethod(
|
|
561
|
+
"clawmatrix.tools.list",
|
|
562
|
+
({ params, respond }: GatewayRequestHandlerOptions) => {
|
|
563
|
+
try {
|
|
564
|
+
const runtime = getClusterRuntime();
|
|
565
|
+
const { node } = (params ?? {}) as { node?: string };
|
|
566
|
+
const allPeers = runtime.peerManager.router.getAllPeers();
|
|
567
|
+
const peers = mergeSentinelPeers(allPeers, runtime)
|
|
568
|
+
.filter((p) => (p as { nodeId: string }).nodeId !== config.nodeId);
|
|
569
|
+
|
|
570
|
+
type PeerToolInfo = { nodeId: string; status: string; toolProxy?: import("./types.ts").ToolProxyInfo };
|
|
571
|
+
const result: PeerToolInfo[] = [];
|
|
572
|
+
|
|
573
|
+
for (const peer of peers) {
|
|
574
|
+
const p = peer as { nodeId: string; connected: boolean; status: string; toolProxy?: import("./types.ts").ToolProxyInfo };
|
|
575
|
+
if (node && p.nodeId !== node) continue;
|
|
576
|
+
if (!p.toolProxy?.enabled) continue;
|
|
577
|
+
result.push({
|
|
578
|
+
nodeId: p.nodeId,
|
|
579
|
+
status: p.status,
|
|
580
|
+
toolProxy: p.toolProxy,
|
|
581
|
+
});
|
|
582
|
+
}
|
|
583
|
+
|
|
584
|
+
respond(true, result);
|
|
585
|
+
} catch {
|
|
586
|
+
respond(false, { error: "ClawMatrix service not running" });
|
|
587
|
+
}
|
|
588
|
+
},
|
|
589
|
+
);
|
|
590
|
+
|
|
591
|
+
api.registerGatewayMethod(
|
|
592
|
+
"clawmatrix.tools.call",
|
|
593
|
+
async ({ params, respond }: GatewayRequestHandlerOptions) => {
|
|
594
|
+
try {
|
|
595
|
+
const runtime = getClusterRuntime();
|
|
596
|
+
const { node, tool, params: toolParams, timeout } = (params ?? {}) as {
|
|
597
|
+
node?: string; tool?: string; params?: Record<string, unknown>; timeout?: number;
|
|
598
|
+
};
|
|
599
|
+
|
|
600
|
+
if (!node || !tool) {
|
|
601
|
+
respond(false, { error: "Missing required params: node, tool" });
|
|
602
|
+
return;
|
|
603
|
+
}
|
|
604
|
+
|
|
605
|
+
const result = await runtime.toolProxy.invoke(node, tool, toolParams ?? {}, timeout);
|
|
606
|
+
respond(true, result);
|
|
607
|
+
} catch (err) {
|
|
608
|
+
respond(false, { error: err instanceof Error ? err.message : String(err) });
|
|
609
|
+
}
|
|
610
|
+
},
|
|
611
|
+
);
|
|
612
|
+
|
|
613
|
+
api.registerGatewayMethod(
|
|
614
|
+
"clawmatrix.tools.batch",
|
|
615
|
+
async ({ params, respond }: GatewayRequestHandlerOptions) => {
|
|
616
|
+
try {
|
|
617
|
+
const runtime = getClusterRuntime();
|
|
618
|
+
const { node, items, stopOnError, timeout } = (params ?? {}) as {
|
|
619
|
+
node?: string;
|
|
620
|
+
items?: Array<{ tool: string; params?: Record<string, unknown> }>;
|
|
621
|
+
stopOnError?: boolean;
|
|
622
|
+
timeout?: number;
|
|
623
|
+
};
|
|
624
|
+
|
|
625
|
+
if (!node || !items || items.length === 0) {
|
|
626
|
+
respond(false, { error: "Missing required params: node, items" });
|
|
627
|
+
return;
|
|
628
|
+
}
|
|
629
|
+
|
|
630
|
+
const batchItems = items.map((item) => ({
|
|
631
|
+
tool: item.tool,
|
|
632
|
+
params: item.params ?? {},
|
|
633
|
+
}));
|
|
634
|
+
|
|
635
|
+
const results = await runtime.toolProxy.invokeBatch(node, batchItems, { stopOnError, timeout });
|
|
636
|
+
respond(true, results);
|
|
637
|
+
} catch (err) {
|
|
638
|
+
respond(false, { error: err instanceof Error ? err.message : String(err) });
|
|
639
|
+
}
|
|
640
|
+
},
|
|
641
|
+
);
|
|
642
|
+
|
|
643
|
+
// ── Models CLI gateway method ──────────────────────────────────
|
|
644
|
+
|
|
645
|
+
api.registerGatewayMethod(
|
|
646
|
+
"clawmatrix.models.list",
|
|
647
|
+
({ params, respond }: GatewayRequestHandlerOptions) => {
|
|
648
|
+
try {
|
|
649
|
+
const runtime = getClusterRuntime();
|
|
650
|
+
const { node } = (params ?? {}) as { node?: string };
|
|
651
|
+
const allProxyModels = runtime.modelProxy.allProxyModels;
|
|
652
|
+
|
|
653
|
+
const reachable = new Set(
|
|
654
|
+
runtime.peerManager.router.getAllPeers()
|
|
655
|
+
.filter((p) => p.connection?.isOpen || p.reachableVia)
|
|
656
|
+
.map((p) => p.nodeId),
|
|
657
|
+
);
|
|
658
|
+
|
|
659
|
+
const models = allProxyModels
|
|
660
|
+
.filter((m) => !node || m.nodeId === node)
|
|
661
|
+
.map((m) => ({
|
|
662
|
+
id: m.id,
|
|
663
|
+
nodeId: m.nodeId,
|
|
664
|
+
provider: m.provider ?? m.nodeId,
|
|
665
|
+
...(m.description && { description: m.description }),
|
|
666
|
+
...(m.contextWindow && { contextWindow: m.contextWindow }),
|
|
667
|
+
...(m.maxTokens && { maxTokens: m.maxTokens }),
|
|
668
|
+
...(m.reasoning !== undefined && { reasoning: m.reasoning }),
|
|
669
|
+
...(m.input && { input: m.input }),
|
|
670
|
+
...(m.api && { api: m.api }),
|
|
671
|
+
...(m.cost && { cost: m.cost }),
|
|
672
|
+
reachable: reachable.has(m.nodeId),
|
|
673
|
+
}));
|
|
674
|
+
|
|
675
|
+
respond(true, models);
|
|
676
|
+
} catch {
|
|
677
|
+
respond(false, { error: "ClawMatrix service not running" });
|
|
678
|
+
}
|
|
679
|
+
},
|
|
680
|
+
);
|
|
681
|
+
|
|
682
|
+
// ── Events CLI gateway methods ──────────────────────────────────
|
|
683
|
+
|
|
684
|
+
api.registerGatewayMethod(
|
|
685
|
+
"clawmatrix.events.query",
|
|
686
|
+
({ params, respond }: GatewayRequestHandlerOptions) => {
|
|
687
|
+
try {
|
|
688
|
+
const runtime = getClusterRuntime();
|
|
689
|
+
if (!runtime.webHandler) {
|
|
690
|
+
respond(false, { error: "Events not enabled (web.enabled = false)" });
|
|
691
|
+
return;
|
|
692
|
+
}
|
|
693
|
+
const { type, source, since, unconsumed, limit } = (params ?? {}) as {
|
|
694
|
+
type?: string; source?: string; since?: number; unconsumed?: boolean; limit?: number;
|
|
695
|
+
};
|
|
696
|
+
const events = runtime.webHandler.queryEvents({
|
|
697
|
+
type,
|
|
698
|
+
source,
|
|
699
|
+
since,
|
|
700
|
+
unconsumed: unconsumed ?? true,
|
|
701
|
+
limit: limit ?? 20,
|
|
702
|
+
});
|
|
703
|
+
respond(true, events);
|
|
704
|
+
} catch {
|
|
705
|
+
respond(false, { error: "ClawMatrix service not running" });
|
|
706
|
+
}
|
|
707
|
+
},
|
|
708
|
+
);
|
|
709
|
+
|
|
710
|
+
api.registerGatewayMethod(
|
|
711
|
+
"clawmatrix.events.consume",
|
|
712
|
+
({ params, respond }: GatewayRequestHandlerOptions) => {
|
|
713
|
+
try {
|
|
714
|
+
const runtime = getClusterRuntime();
|
|
715
|
+
if (!runtime.webHandler) {
|
|
716
|
+
respond(false, { error: "Events not enabled (web.enabled = false)" });
|
|
717
|
+
return;
|
|
718
|
+
}
|
|
719
|
+
const { ids } = (params ?? {}) as { ids?: string[] };
|
|
720
|
+
if (!ids || ids.length === 0) {
|
|
721
|
+
respond(false, { error: "Missing required param: ids (array of event IDs)" });
|
|
722
|
+
return;
|
|
723
|
+
}
|
|
724
|
+
const consumed = runtime.webHandler.consumeEvents(ids);
|
|
725
|
+
respond(true, { consumed, ids });
|
|
726
|
+
} catch {
|
|
727
|
+
respond(false, { error: "ClawMatrix service not running" });
|
|
728
|
+
}
|
|
729
|
+
},
|
|
730
|
+
);
|
|
731
|
+
|
|
558
732
|
// Log model selection on each LLM call (fire-and-forget)
|
|
559
733
|
api.on("llm_input", (event) => {
|
|
560
734
|
api.logger.debug(`[clawmatrix] llm_input: provider=${event.provider} model=${event.model}`);
|
|
@@ -563,6 +737,10 @@ const plugin = {
|
|
|
563
737
|
// CLI subcommand
|
|
564
738
|
api.registerCli(registerClusterCli, { commands: ["clawmatrix"] });
|
|
565
739
|
|
|
740
|
+
// Auto-install global `clawmatrix` shim next to the `openclaw` binary.
|
|
741
|
+
// Runs once on plugin load; non-blocking, best-effort.
|
|
742
|
+
installGlobalCliShim(api.logger);
|
|
743
|
+
|
|
566
744
|
// Plugin command: /clawmatrix approve|deny|revoke
|
|
567
745
|
// Handles Telegram callback buttons and other chat surfaces.
|
|
568
746
|
// Plugin commands are processed before the agent, so they bypass the LLM.
|
|
@@ -651,11 +829,9 @@ const plugin = {
|
|
|
651
829
|
lines.push("[ClawMatrix] No peers online. Use cluster_peers to check cluster status.");
|
|
652
830
|
} else {
|
|
653
831
|
lines.push(
|
|
654
|
-
`[ClawMatrix Cluster] YOU ARE node="${config.nodeId}"${config.tags.length ? ` tags=${config.tags.join(",")}` : ""}.
|
|
832
|
+
`[ClawMatrix Cluster] YOU ARE node="${config.nodeId}"${config.tags.length ? ` tags=${config.tags.join(",")}` : ""}. ${peerCount} peer(s) online.`,
|
|
655
833
|
...(config.agents.length > 0 ? [`Role: ${config.agents[0]!.description}`] : []),
|
|
656
|
-
|
|
657
|
-
"Prefer cluster_tool for device-specific tools (screenshot, battery, etc.); cluster_exec/read/write for file/shell ops; cluster_handoff for complex multi-step tasks.",
|
|
658
|
-
"IMPORTANT: Always tell user which remote node you're targeting before calling cluster tools.",
|
|
834
|
+
"Use cluster_peers to see topology. Always tell user which remote node you're targeting before calling cluster tools.",
|
|
659
835
|
);
|
|
660
836
|
}
|
|
661
837
|
cachedSystemContext = lines.join("\n");
|
|
@@ -753,6 +929,9 @@ function mergeSentinelPeers(
|
|
|
753
929
|
status: effectiveStatus,
|
|
754
930
|
reachableVia: p.reachableVia,
|
|
755
931
|
latencyMs: p.latencyMs,
|
|
932
|
+
toolProxy: p.toolProxy,
|
|
933
|
+
acpAgents: p.acpAgents,
|
|
934
|
+
deviceInfo: p.deviceInfo,
|
|
756
935
|
...(sentinel ? { sentinel: sentinelOnline ? "online" : "offline" } : {}),
|
|
757
936
|
});
|
|
758
937
|
}
|
|
@@ -777,4 +956,54 @@ function mergeSentinelPeers(
|
|
|
777
956
|
return result;
|
|
778
957
|
}
|
|
779
958
|
|
|
959
|
+
/** Auto-install a global `clawmatrix` CLI shim next to the `openclaw` binary. */
|
|
960
|
+
function installGlobalCliShim(logger: { info: (msg: string) => void; warn: (msg: string) => void }) {
|
|
961
|
+
try {
|
|
962
|
+
const fs = require("node:fs") as typeof import("node:fs");
|
|
963
|
+
const path = require("node:path") as typeof import("node:path");
|
|
964
|
+
|
|
965
|
+
// Find the real directory where `openclaw` lives (resolve symlinks)
|
|
966
|
+
const openclawBin = process.argv[0]; // node process running openclaw
|
|
967
|
+
// Walk up to find the openclaw binary: it's in the same bin dir as node,
|
|
968
|
+
// or we can resolve it from process.env.PATH
|
|
969
|
+
let binDir: string | null = null;
|
|
970
|
+
const envPath = process.env.PATH ?? "";
|
|
971
|
+
for (const dir of envPath.split(path.delimiter)) {
|
|
972
|
+
const candidate = path.join(dir, "openclaw");
|
|
973
|
+
try {
|
|
974
|
+
fs.accessSync(candidate, fs.constants.X_OK);
|
|
975
|
+
// Resolve symlinks to get the real bin directory
|
|
976
|
+
const realPath = fs.realpathSync(candidate);
|
|
977
|
+
binDir = path.dirname(realPath);
|
|
978
|
+
break;
|
|
979
|
+
} catch {
|
|
980
|
+
// Not in this dir
|
|
981
|
+
}
|
|
982
|
+
}
|
|
983
|
+
if (!binDir) return;
|
|
984
|
+
|
|
985
|
+
const shimPath = path.join(binDir, "clawmatrix");
|
|
986
|
+
|
|
987
|
+
// Skip if shim already exists and is our shim (check marker comment)
|
|
988
|
+
try {
|
|
989
|
+
const existing = fs.readFileSync(shimPath, "utf-8");
|
|
990
|
+
if (existing.includes("clawmatrix-shim")) return; // already installed
|
|
991
|
+
} catch {
|
|
992
|
+
// File doesn't exist, proceed to create
|
|
993
|
+
}
|
|
994
|
+
|
|
995
|
+
const shim = [
|
|
996
|
+
"#!/usr/bin/env sh",
|
|
997
|
+
"# clawmatrix-shim: auto-installed by clawmatrix plugin",
|
|
998
|
+
'exec openclaw clawmatrix "$@"',
|
|
999
|
+
"",
|
|
1000
|
+
].join("\n");
|
|
1001
|
+
|
|
1002
|
+
fs.writeFileSync(shimPath, shim, { mode: 0o755 });
|
|
1003
|
+
logger.info(`[clawmatrix] Installed global CLI shim: ${shimPath}`);
|
|
1004
|
+
} catch {
|
|
1005
|
+
// Best-effort: don't break plugin loading if shim install fails
|
|
1006
|
+
}
|
|
1007
|
+
}
|
|
1008
|
+
|
|
780
1009
|
export default plugin;
|
package/src/knowledge-sync.ts
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import * as Automerge from "@automerge/automerge";
|
|
2
2
|
import { watch, type FSWatcher } from "node:fs";
|
|
3
|
-
import { readdir, readFile, stat as fsStat, writeFile, mkdir, rename } from "node:fs/promises";
|
|
3
|
+
import { readdir, readFile, stat as fsStat, writeFile, mkdir, rename, unlink } from "node:fs/promises";
|
|
4
4
|
import path from "node:path";
|
|
5
5
|
import ignore, { type Ignore } from "ignore";
|
|
6
6
|
import picomatch from "picomatch";
|
|
@@ -131,7 +131,7 @@ export class KnowledgeSync {
|
|
|
131
131
|
private localChangesRunning = false;
|
|
132
132
|
private localChangesQueued = false;
|
|
133
133
|
/** Paths recently written by exportFileToFs — suppress watcher re-trigger. Stores {content, timestamp}. */
|
|
134
|
-
private writtenByExport = new Map<string, { content: string; ts: number }>();
|
|
134
|
+
private writtenByExport = new Map<string, { content: string | null; ts: number }>();
|
|
135
135
|
/** Deferred git commit timer — batches multiple remote syncs into one commit. */
|
|
136
136
|
private gitCommitTimer: ReturnType<typeof setTimeout> | null = null;
|
|
137
137
|
private pendingGitSources = new Set<string>();
|
|
@@ -326,10 +326,20 @@ export class KnowledgeSync {
|
|
|
326
326
|
await this.saveAutomergeDoc(this.registryPath, this.registry);
|
|
327
327
|
|
|
328
328
|
// Discover new files from registry and initiate their sync
|
|
329
|
+
const deletedPaths: string[] = [];
|
|
329
330
|
for (const [relPath, meta] of Object.entries(newDoc.files ?? {})) {
|
|
330
331
|
if (meta.deleted) {
|
|
331
|
-
// Clean up sync states
|
|
332
|
+
// Clean up sync states, in-memory doc, persisted doc, and local file
|
|
332
333
|
this.cleanupDeletedFileSyncStates(relPath);
|
|
334
|
+
if (this.fileDocs.has(relPath)) {
|
|
335
|
+
this.fileDocs.delete(relPath);
|
|
336
|
+
deletedPaths.push(relPath);
|
|
337
|
+
// Remove persisted automerge doc
|
|
338
|
+
const docPath = path.join(this.docsDir, docFileName(relPath));
|
|
339
|
+
await rename(docPath, docPath + ".deleted").catch(() => {});
|
|
340
|
+
// Delete local file from workspace
|
|
341
|
+
await this.deleteLocalFile(relPath);
|
|
342
|
+
}
|
|
333
343
|
continue;
|
|
334
344
|
}
|
|
335
345
|
if (!this.fileDocs.has(relPath)) {
|
|
@@ -339,6 +349,12 @@ export class KnowledgeSync {
|
|
|
339
349
|
this.syncDocWithPeer(peerId, relPath);
|
|
340
350
|
}
|
|
341
351
|
|
|
352
|
+
// Commit remote deletions to git
|
|
353
|
+
if (deletedPaths.length > 0) {
|
|
354
|
+
debug(TAG, `remote deletion from ${peerId}: ${deletedPaths.join(", ")}`);
|
|
355
|
+
this.schedulePendingGitCommit(peerId);
|
|
356
|
+
}
|
|
357
|
+
|
|
342
358
|
this.sendSyncMessage(peerId, REGISTRY_DOC_ID);
|
|
343
359
|
}
|
|
344
360
|
|
|
@@ -846,6 +862,30 @@ export class KnowledgeSync {
|
|
|
846
862
|
}
|
|
847
863
|
}
|
|
848
864
|
|
|
865
|
+
/** Delete a local file from workspace (triggered by remote deletion). */
|
|
866
|
+
private async deleteLocalFile(relPath: string) {
|
|
867
|
+
const absPath = path.resolve(this.opts.workspacePath, relPath);
|
|
868
|
+
|
|
869
|
+
// Prevent path traversal
|
|
870
|
+
if (!absPath.startsWith(this.opts.workspacePath + path.sep) && absPath !== this.opts.workspacePath) {
|
|
871
|
+
debug(TAG, `blocked path traversal on delete: ${relPath}`);
|
|
872
|
+
return;
|
|
873
|
+
}
|
|
874
|
+
|
|
875
|
+
// Mark as our own deletion so the watcher doesn't re-process it.
|
|
876
|
+
// handleLocalChangesInner sees currentContent === null === marker.content and skips.
|
|
877
|
+
this.writtenByExport.set(relPath, { content: null, ts: Date.now() });
|
|
878
|
+
try {
|
|
879
|
+
await unlink(absPath);
|
|
880
|
+
debug(TAG, `deleted local file: ${relPath}`);
|
|
881
|
+
} catch (err) {
|
|
882
|
+
// File may not exist locally — that's fine
|
|
883
|
+
if ((err as NodeJS.ErrnoException).code !== "ENOENT") {
|
|
884
|
+
debug(TAG, `failed to delete local file ${relPath}: ${err}`);
|
|
885
|
+
}
|
|
886
|
+
}
|
|
887
|
+
}
|
|
888
|
+
|
|
849
889
|
/** Read all workspace files matching whitelist. */
|
|
850
890
|
private async readWhitelistedFiles(): Promise<Record<string, string>> {
|
|
851
891
|
const files: Record<string, string> = {};
|
|
@@ -954,9 +994,7 @@ export class KnowledgeSync {
|
|
|
954
994
|
}
|
|
955
995
|
|
|
956
996
|
let doc = Automerge.init<FileDoc>();
|
|
957
|
-
doc =
|
|
958
|
-
(d as FileDoc).content = content;
|
|
959
|
-
});
|
|
997
|
+
doc = changeFileContent(doc, content);
|
|
960
998
|
this.fileDocs.set(relPath, doc);
|
|
961
999
|
|
|
962
1000
|
this.registry = Automerge.change(this.registry, (d) => {
|
package/src/model-proxy.ts
CHANGED
|
@@ -754,7 +754,7 @@ export class ModelProxy {
|
|
|
754
754
|
let currentId = requestId;
|
|
755
755
|
let currentTarget = targetNodeId;
|
|
756
756
|
let currentFrame = frame;
|
|
757
|
-
let
|
|
757
|
+
let failoverIdx = 0; // index into failoverCandidates (avoids slice allocations)
|
|
758
758
|
const maxAttempts = failoverCandidates.length + 1;
|
|
759
759
|
|
|
760
760
|
for (let attempt = 0; attempt < maxAttempts; attempt++) {
|
|
@@ -763,13 +763,13 @@ export class ModelProxy {
|
|
|
763
763
|
|
|
764
764
|
if (!result.success) {
|
|
765
765
|
// Upstream error — try failover if available
|
|
766
|
-
if (
|
|
767
|
-
const next =
|
|
768
|
-
debug("proxy", `failover: remote error "${result.error}" → trying ${next.routeNodeId} (${
|
|
766
|
+
if (failoverIdx < failoverCandidates.length && buildFrame) {
|
|
767
|
+
const next = failoverCandidates[failoverIdx]!;
|
|
768
|
+
debug("proxy", `failover: remote error "${result.error}" → trying ${next.routeNodeId} (${failoverCandidates.length - failoverIdx - 1} left)`);
|
|
769
|
+
failoverIdx++;
|
|
769
770
|
currentId = crypto.randomUUID();
|
|
770
771
|
currentFrame = buildFrame(next, currentId);
|
|
771
772
|
currentTarget = next.routeNodeId;
|
|
772
|
-
remaining = remaining.slice(1);
|
|
773
773
|
continue;
|
|
774
774
|
}
|
|
775
775
|
return {
|
|
@@ -782,13 +782,13 @@ export class ModelProxy {
|
|
|
782
782
|
return this.formatNonStreamResult(result, currentId, currentFrame, responseFormat);
|
|
783
783
|
} catch (err) {
|
|
784
784
|
// Timeout or send failure — try failover
|
|
785
|
-
if (
|
|
786
|
-
const next =
|
|
787
|
-
debug("proxy", `failover: ${err instanceof Error ? err.message : String(err)} → trying ${next.routeNodeId} (${
|
|
785
|
+
if (failoverIdx < failoverCandidates.length && buildFrame) {
|
|
786
|
+
const next = failoverCandidates[failoverIdx]!;
|
|
787
|
+
debug("proxy", `failover: ${err instanceof Error ? err.message : String(err)} → trying ${next.routeNodeId} (${failoverCandidates.length - failoverIdx - 1} left)`);
|
|
788
|
+
failoverIdx++;
|
|
788
789
|
currentId = crypto.randomUUID();
|
|
789
790
|
currentFrame = buildFrame(next, currentId);
|
|
790
791
|
currentTarget = next.routeNodeId;
|
|
791
|
-
remaining = remaining.slice(1);
|
|
792
792
|
continue;
|
|
793
793
|
}
|
|
794
794
|
return {
|
|
@@ -980,6 +980,24 @@ export class ModelProxy {
|
|
|
980
980
|
const pending = this.pending.get(frame.id);
|
|
981
981
|
if (!pending?.stream || !pending.controller || !pending.encoder) return;
|
|
982
982
|
|
|
983
|
+
// Reset activity timer — keeps long-running streams alive and detects
|
|
984
|
+
// stalled connections within modelTimeout of the last received chunk.
|
|
985
|
+
clearTimeout(pending.timer);
|
|
986
|
+
if (!frame.payload.done) {
|
|
987
|
+
pending.timer = setTimeout(() => {
|
|
988
|
+
// Capture references before cleanup removes pending from the map
|
|
989
|
+
const { stableStreamId, responseFormat, controller, encoder, model, failoverCandidates, buildFrame } = pending;
|
|
990
|
+
this.cleanupRequest(frame.id);
|
|
991
|
+
this.peerManager.router.markFailed(frame.id);
|
|
992
|
+
this.tryStreamFailover(
|
|
993
|
+
stableStreamId ?? frame.id, responseFormat,
|
|
994
|
+
controller!, encoder!, model ?? "",
|
|
995
|
+
failoverCandidates ?? [], buildFrame,
|
|
996
|
+
`stream stalled (no data for ${this.modelTimeout / 1000}s)`,
|
|
997
|
+
);
|
|
998
|
+
}, this.modelTimeout);
|
|
999
|
+
}
|
|
1000
|
+
|
|
983
1001
|
try {
|
|
984
1002
|
if (pending.responseFormat === "responses") {
|
|
985
1003
|
this.handleModelStreamResponses(frame, pending);
|
|
@@ -1305,7 +1323,14 @@ export class ModelProxy {
|
|
|
1305
1323
|
let chatFallbackResult: Awaited<ReturnType<ModelProxy["retryWithChatCompletions"]>> = null;
|
|
1306
1324
|
try {
|
|
1307
1325
|
result = JSON.parse(responseText);
|
|
1308
|
-
|
|
1326
|
+
// Detect error objects in 200 OK responses (some APIs return HTTP 200 with error body)
|
|
1327
|
+
if (result.error && typeof result.error === "object" && !result.choices && !result.output) {
|
|
1328
|
+
const errMsg = (result.error as { message?: string }).message ?? JSON.stringify(result.error);
|
|
1329
|
+
throw new Error(`Upstream error (200 OK): ${String(errMsg).slice(0, 200)}`);
|
|
1330
|
+
}
|
|
1331
|
+
} catch (parseErr) {
|
|
1332
|
+
// Re-throw non-parse errors (e.g. upstream error detection above)
|
|
1333
|
+
if (!(parseErr instanceof SyntaxError)) throw parseErr;
|
|
1309
1334
|
// Upstream returned non-JSON (e.g. SSE in non-stream mode) — try chat completions fallback
|
|
1310
1335
|
if (!cachedApi && isResponsesApi) {
|
|
1311
1336
|
debug("model_req", `responses API returned non-JSON for "${model.id}", retrying with chat completions`);
|
package/src/peer-manager.ts
CHANGED
|
@@ -93,6 +93,8 @@ export class PeerManager extends EventEmitter<PeerManagerEvents> {
|
|
|
93
93
|
private wss: WebSocketServer | null = null;
|
|
94
94
|
private reconnectTimers = new Map<string, ReturnType<typeof setTimeout>>();
|
|
95
95
|
private reconnectAttempts = new Map<string, number>();
|
|
96
|
+
/** Deferred disconnect timers — grace period before broadcasting peer_leave. */
|
|
97
|
+
private disconnectGraceTimers = new Map<string, ReturnType<typeof setTimeout>>();
|
|
96
98
|
private stopped = false;
|
|
97
99
|
/** Map from ws WebSocket to Connection for inbound connections. */
|
|
98
100
|
private inboundConnections = new Map<WsWebSocket, Connection>();
|
|
@@ -165,6 +167,17 @@ export class PeerManager extends EventEmitter<PeerManagerEvents> {
|
|
|
165
167
|
}
|
|
166
168
|
}
|
|
167
169
|
|
|
170
|
+
/** Update the local tool proxy catalog and re-broadcast to all peers. */
|
|
171
|
+
updateToolCatalog(catalog: import("./types.ts").ToolCatalogEntry[]) {
|
|
172
|
+
if (this.localCapabilities.toolProxy) {
|
|
173
|
+
this.localCapabilities.toolProxy = { ...this.localCapabilities.toolProxy, catalog };
|
|
174
|
+
}
|
|
175
|
+
this.router.updateLocalToolCatalog(catalog);
|
|
176
|
+
for (const conn of this.router.getDirectConnections()) {
|
|
177
|
+
this.sendPeerSync(conn);
|
|
178
|
+
}
|
|
179
|
+
}
|
|
180
|
+
|
|
168
181
|
// ── Lifecycle ──────────────────────────────────────────────────
|
|
169
182
|
async start() {
|
|
170
183
|
await this.approvalManager.load();
|
|
@@ -190,6 +203,12 @@ export class PeerManager extends EventEmitter<PeerManagerEvents> {
|
|
|
190
203
|
clearTimeout(timer);
|
|
191
204
|
}
|
|
192
205
|
this.reconnectTimers.clear();
|
|
206
|
+
// Flush all disconnect grace timers (execute leave immediately on shutdown)
|
|
207
|
+
for (const [nodeId, timer] of this.disconnectGraceTimers) {
|
|
208
|
+
clearTimeout(timer);
|
|
209
|
+
this.executePeerLeave(nodeId);
|
|
210
|
+
}
|
|
211
|
+
this.disconnectGraceTimers.clear();
|
|
193
212
|
|
|
194
213
|
this.router.broadcast({
|
|
195
214
|
type: "peer_leave",
|
|
@@ -461,9 +480,6 @@ export class PeerManager extends EventEmitter<PeerManagerEvents> {
|
|
|
461
480
|
if (this.pendingApprovalConns.has(nodeId)) {
|
|
462
481
|
debug("approval", `reusing pending approval for ${nodeId}, updating conn ref`);
|
|
463
482
|
this.pendingApprovalConns.set(nodeId, { conn, caps });
|
|
464
|
-
if (this.config.peerApproval?.mode === "required") {
|
|
465
|
-
conn.on("close", () => this.onPeerDisconnected(conn));
|
|
466
|
-
}
|
|
467
483
|
return;
|
|
468
484
|
}
|
|
469
485
|
|
|
@@ -495,10 +511,12 @@ export class PeerManager extends EventEmitter<PeerManagerEvents> {
|
|
|
495
511
|
);
|
|
496
512
|
}
|
|
497
513
|
});
|
|
498
|
-
// In required mode, don't complete the join yet
|
|
514
|
+
// In required mode, don't complete the join yet.
|
|
515
|
+
// No close handler needed here: the peer was never added to the router,
|
|
516
|
+
// so onPeerDisconnected would broadcast a spurious peer_leave.
|
|
517
|
+
// If the conn drops before approval resolves, the .then() handler sees
|
|
518
|
+
// activeConn.isOpen === false and skips all actions.
|
|
499
519
|
if (this.config.peerApproval?.mode === "required") {
|
|
500
|
-
// Wire up close handler to clean up if connection drops while pending
|
|
501
|
-
conn.on("close", () => this.onPeerDisconnected(conn));
|
|
502
520
|
return;
|
|
503
521
|
}
|
|
504
522
|
// In notify mode, requestApproval resolves immediately, but
|
|
@@ -515,6 +533,9 @@ export class PeerManager extends EventEmitter<PeerManagerEvents> {
|
|
|
515
533
|
private completePeerJoin(conn: Connection, caps: NodeCapabilities) {
|
|
516
534
|
const nodeId = conn.remoteNodeId!;
|
|
517
535
|
|
|
536
|
+
// Cancel disconnect grace timer if the peer is reconnecting
|
|
537
|
+
const wasInGrace = this.cancelDisconnectGrace(nodeId);
|
|
538
|
+
|
|
518
539
|
// If there's an existing connection for this nodeId (e.g. peer reconnected
|
|
519
540
|
// while old TCP hadn't closed yet), close it AFTER overwriting the route so
|
|
520
541
|
// the stale-close guard in onPeerDisconnected correctly skips cleanup.
|
|
@@ -585,15 +606,58 @@ export class PeerManager extends EventEmitter<PeerManagerEvents> {
|
|
|
585
606
|
return;
|
|
586
607
|
}
|
|
587
608
|
|
|
609
|
+
// Grace period: defer peer_leave broadcast to allow quick reconnection
|
|
610
|
+
// (e.g. iOS WiFi ↔ cellular handoff, brief audio interruption).
|
|
611
|
+
// If the peer reconnects within the grace window, completePeerJoin
|
|
612
|
+
// will cancel this timer via cancelDisconnectGrace.
|
|
613
|
+
const graceMs = this.config.disconnectGrace ?? 30_000;
|
|
614
|
+
if (graceMs <= 0) {
|
|
615
|
+
this.executePeerLeave(nodeId, conn);
|
|
616
|
+
return;
|
|
617
|
+
}
|
|
618
|
+
debug("peer", `onPeerDisconnected(${nodeId}): starting ${graceMs / 1000}s grace period`);
|
|
619
|
+
|
|
620
|
+
// Clear any existing grace timer for this node (shouldn't happen, but be safe)
|
|
621
|
+
this.cancelDisconnectGrace(nodeId);
|
|
622
|
+
|
|
623
|
+
this.disconnectGraceTimers.set(nodeId, setTimeout(() => {
|
|
624
|
+
this.disconnectGraceTimers.delete(nodeId);
|
|
625
|
+
this.executePeerLeave(nodeId, conn);
|
|
626
|
+
}, graceMs));
|
|
627
|
+
}
|
|
628
|
+
|
|
629
|
+
/** Cancel a pending disconnect grace timer (called when peer reconnects quickly). */
|
|
630
|
+
private cancelDisconnectGrace(nodeId: string): boolean {
|
|
631
|
+
const timer = this.disconnectGraceTimers.get(nodeId);
|
|
632
|
+
if (timer) {
|
|
633
|
+
clearTimeout(timer);
|
|
634
|
+
this.disconnectGraceTimers.delete(nodeId);
|
|
635
|
+
debug("peer", `cancelDisconnectGrace(${nodeId}): peer reconnected within grace period`);
|
|
636
|
+
return true;
|
|
637
|
+
}
|
|
638
|
+
return false;
|
|
639
|
+
}
|
|
640
|
+
|
|
641
|
+
/** Execute the actual peer leave (after grace period expires or immediate for shutdown). */
|
|
642
|
+
private executePeerLeave(nodeId: string, conn?: Connection) {
|
|
643
|
+
// Double-check the route hasn't been replaced by a new connection during grace
|
|
644
|
+
if (conn) {
|
|
645
|
+
const currentRoute = this.router.getRoute(nodeId);
|
|
646
|
+
if (currentRoute?.connection && currentRoute.connection !== conn) {
|
|
647
|
+
debug("peer", `executePeerLeave(${nodeId}): route replaced during grace — skipping`);
|
|
648
|
+
return;
|
|
649
|
+
}
|
|
650
|
+
}
|
|
651
|
+
|
|
588
652
|
audit("peer_leave", { nodeId });
|
|
589
653
|
this.router.removePeer(nodeId);
|
|
590
654
|
|
|
591
655
|
// Remove satellite contexts that were only reachable via this peer
|
|
592
|
-
|
|
593
|
-
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
}
|
|
656
|
+
for (let i = this.satelliteContexts.length - 1; i >= 0; i--) {
|
|
657
|
+
if (this.satelliteContexts[i].nodeId === nodeId) {
|
|
658
|
+
this.satelliteContexts.splice(i, 1);
|
|
659
|
+
}
|
|
660
|
+
}
|
|
597
661
|
|
|
598
662
|
this.router.broadcast({
|
|
599
663
|
type: "peer_leave",
|
|
@@ -748,13 +812,17 @@ export class PeerManager extends EventEmitter<PeerManagerEvents> {
|
|
|
748
812
|
const prev = this.router.getRoute(peer.nodeId);
|
|
749
813
|
const hadAgents = prev?.agents.length ?? 0;
|
|
750
814
|
const hadDirectPeers = prev?.directPeers.length ?? 0;
|
|
751
|
-
const hadToolProxy = JSON.stringify(prev?.toolProxy);
|
|
752
815
|
const hadDeviceInfo = prev?.deviceInfo?.hostname;
|
|
753
816
|
const hadAcpAgents = prev?.acpAgents?.length ?? 0;
|
|
817
|
+
const hadToolProxyEnabled = prev?.toolProxy?.enabled;
|
|
818
|
+
const hadToolProxyCatalogLen = prev?.toolProxy?.catalog?.length ?? 0;
|
|
819
|
+
const hadToolProxyAllowLen = prev?.toolProxy?.allow?.length ?? 0;
|
|
754
820
|
this.router.updatePeerCapabilities(peer.nodeId, peer);
|
|
755
821
|
if (peer.agents.length !== hadAgents || peer.models.length !== (prev?.models.length ?? 0)
|
|
756
822
|
|| (peer.directPeers?.length ?? 0) !== hadDirectPeers
|
|
757
|
-
||
|
|
823
|
+
|| peer.toolProxy?.enabled !== hadToolProxyEnabled
|
|
824
|
+
|| (peer.toolProxy?.catalog?.length ?? 0) !== hadToolProxyCatalogLen
|
|
825
|
+
|| (peer.toolProxy?.allow?.length ?? 0) !== hadToolProxyAllowLen
|
|
758
826
|
|| peer.deviceInfo?.hostname !== hadDeviceInfo
|
|
759
827
|
|| (peer.acpAgents?.length ?? 0) !== hadAcpAgents) {
|
|
760
828
|
changed = true;
|
package/src/rate-limiter.ts
CHANGED
|
@@ -33,19 +33,20 @@ export class RateLimiter {
|
|
|
33
33
|
|
|
34
34
|
let timestamps = this.attempts.get(ip);
|
|
35
35
|
if (timestamps) {
|
|
36
|
-
//
|
|
37
|
-
|
|
36
|
+
// In-place pruning: find first non-expired index and splice
|
|
37
|
+
let firstValid = 0;
|
|
38
|
+
while (firstValid < timestamps.length && timestamps[firstValid] <= cutoff) firstValid++;
|
|
39
|
+
if (firstValid > 0) timestamps.splice(0, firstValid);
|
|
38
40
|
} else {
|
|
39
41
|
timestamps = [];
|
|
42
|
+
this.attempts.set(ip, timestamps);
|
|
40
43
|
}
|
|
41
44
|
|
|
42
45
|
if (timestamps.length >= this.config.maxAttempts) {
|
|
43
|
-
this.attempts.set(ip, timestamps);
|
|
44
46
|
return false;
|
|
45
47
|
}
|
|
46
48
|
|
|
47
49
|
timestamps.push(now);
|
|
48
|
-
this.attempts.set(ip, timestamps);
|
|
49
50
|
return true;
|
|
50
51
|
}
|
|
51
52
|
|
|
@@ -61,19 +62,24 @@ export class RateLimiter {
|
|
|
61
62
|
/** Get remaining attempts for an IP. */
|
|
62
63
|
remaining(ip: string): number {
|
|
63
64
|
const cutoff = Date.now() - this.config.windowMs;
|
|
64
|
-
const timestamps = this.attempts.get(ip)
|
|
65
|
-
|
|
65
|
+
const timestamps = this.attempts.get(ip);
|
|
66
|
+
if (!timestamps) return this.config.maxAttempts;
|
|
67
|
+
let active = 0;
|
|
68
|
+
for (let i = timestamps.length - 1; i >= 0; i--) {
|
|
69
|
+
if (timestamps[i] > cutoff) active++; else break;
|
|
70
|
+
}
|
|
66
71
|
return Math.max(0, this.config.maxAttempts - active);
|
|
67
72
|
}
|
|
68
73
|
|
|
69
74
|
private gc() {
|
|
70
75
|
const cutoff = Date.now() - this.config.windowMs;
|
|
71
76
|
for (const [ip, timestamps] of this.attempts) {
|
|
72
|
-
|
|
73
|
-
|
|
77
|
+
let firstValid = 0;
|
|
78
|
+
while (firstValid < timestamps.length && timestamps[firstValid] <= cutoff) firstValid++;
|
|
79
|
+
if (firstValid === timestamps.length) {
|
|
74
80
|
this.attempts.delete(ip);
|
|
75
|
-
} else {
|
|
76
|
-
|
|
81
|
+
} else if (firstValid > 0) {
|
|
82
|
+
timestamps.splice(0, firstValid);
|
|
77
83
|
}
|
|
78
84
|
}
|
|
79
85
|
}
|