clawmatrix 0.6.1 → 0.6.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3,58 +3,102 @@ name: clawmatrix
3
3
  description: Use the clawmatrix CLI to interact with remote devices (phones, computers, servers) in a mesh cluster — run tools, check location, get battery status, read/write files, execute commands, and more on any connected node.
4
4
  ---
5
5
 
6
- Use the `clawmatrix` CLI to interact with the ClawMatrix mesh cluster. Remote nodes can be phones (iPhone/Android), computers, or servers.
6
+ Use the `clawmatrix` CLI to interact with the ClawMatrix mesh cluster. Remote nodes can be phones (iPhone/Android), computers, or servers. You can invoke any tool available on remote nodes — including getting device location, battery status, running shell commands, reading/writing files, and more.
7
7
 
8
- Run `clawmatrix --help` or `clawmatrix <command> --help` for detailed usage.
8
+ All commands output LLM-friendly text (no ANSI colors) when stdout is not a TTY.
9
9
 
10
- ## Capabilities
10
+ ## Quick Start
11
11
 
12
- **Cluster & Discovery**
13
- - View cluster topology, peer status, node config, and uptime/availability
14
- - Check reachability of specific nodes with latency
15
- - Approve/deny/revoke peer connections
12
+ 1. Run `clawmatrix status` to see connected nodes and their capabilities
13
+ 2. Run `clawmatrix tools <nodeId>` to see what tools a node offers
14
+ 3. Run `clawmatrix call <nodeId> <tool> '<params>'` to invoke a tool
16
15
 
17
- **Remote Tool Execution**
18
- - Discover tools across all nodes (filter by keyword, describe params)
19
- - Invoke single tools or batch multiple in one round-trip
20
- - Tools include device-specific capabilities: location, battery, camera, clipboard, contacts, calendar, health data, HomeKit, etc.
16
+ ## Cluster Status
21
17
 
22
- **Models**
23
- - List all LLM models available across the cluster, filterable by node
18
+ ```bash
19
+ clawmatrix status # Cluster topology: peers, agents, models, tags
20
+ clawmatrix status --json # Structured JSON output
21
+ clawmatrix check <nodeId> # Quick reachability check with latency
22
+ ```
24
23
 
25
- **Agent Delegation**
26
- - `handoff` — delegate tasks to remote agents with streaming output and failover
27
- - `send` — fire-and-forget messages to remote nodes
28
- - `acp` — manage persistent coding agent sessions (Claude Code, Codex, Gemini) with prompt/resume/cancel/close
24
+ Always start with `clawmatrix status` to understand the current topology before performing other operations.
29
25
 
30
- **Events & Automations**
31
- - Query, consume, and ingest events from external sources (iOS Shortcuts, webhooks, etc.)
32
- - Manage automation rules: list, create/save, manually trigger, replay historical executions
26
+ ## Remote Tools
33
27
 
34
- **Infrastructure**
35
- - `diagnostic` sentinel-based diagnostics (works even when gateway is down)
36
- - `terminal` interactive PTY sessions on remote nodes
37
- - `transfer` push/pull files up to 100MB between nodes with integrity verification
38
- - `notify` push Dynamic Island / Live Activity notifications to iOS devices
28
+ ```bash
29
+ clawmatrix tools # List all remote tools (compact)
30
+ clawmatrix tools <nodeId> # Tools on a specific node
31
+ clawmatrix tools --describe <tool> # Full usage and parameter schema
32
+ clawmatrix tools --filter <keyword> # Search by name or description
33
+ ```
39
34
 
40
- **Knowledge Sync (CRDT)**
41
- - List synced workspace files, view change history, line-by-line blame, read file content
42
- - All knowledge is CRDT-based and mesh-synced across nodes
35
+ Use `--describe` to understand a tool's parameters before calling it.
43
36
 
44
- **Kanban Board**
45
- - Distributed task board: create, list, get, update, claim, move, annotate, delete cards
46
- - Filter by stage/priority/label/node; stages flow from `backlog` → `done` → `archived`
37
+ ## Invoke Tools
47
38
 
48
- **Config Management**
49
- - View runtime config summary
50
- - Read/write config files under `~/.openclaw/` (for remote node configuration)
39
+ ```bash
40
+ clawmatrix call <nodeId> <tool> '<json-params>' # Single tool invocation
41
+ clawmatrix call <nodeId> <tool> '<json-params>' -t 30000 # With timeout (ms)
42
+ ```
51
43
 
52
- ## Important Notes
44
+ ```bash
45
+ clawmatrix batch <nodeId> '[{"tool":"t1","params":{}},{"tool":"t2","params":{}}]'
46
+ clawmatrix batch <nodeId> --no-stop-on-error '[...]' # Continue on failure
47
+ ```
53
48
 
54
- - **Always start with `clawmatrix status`** to understand the cluster topology before other operations
55
- - **Use `clawmatrix tools --describe <tool>`** to check a tool's parameter schema before calling it
56
- - **Use `clawmatrix notify` proactively during long tasks** (builds, tests, large refactors) so the user can track progress on their phone without watching the terminal
57
- - Output is **LLM-optimized**: no ANSI colors and compact format when stdout is not a TTY; add `--json` for structured output
58
- - Target nodes by exact `nodeId` or by tag expression `tags:<tag>`
59
- - Most commands support `--json` for structured output
60
- - `batch` and `automations save` support stdin for piping data
49
+ Batch supports stdin: `echo '<json>' | clawmatrix batch <nodeId>`
50
+
51
+ ## Models
52
+
53
+ ```bash
54
+ clawmatrix models # All cluster models
55
+ clawmatrix models --node <nodeId> # Filter by node
56
+ ```
57
+
58
+ ## Events
59
+
60
+ ```bash
61
+ clawmatrix events # Unconsumed events
62
+ clawmatrix events --type <type> # Filter by type
63
+ clawmatrix events --source <nodeId> # Filter by source node
64
+ clawmatrix events --consume <id1,id2> # Mark events as consumed
65
+ clawmatrix events --all # Include consumed events
66
+ ```
67
+
68
+ ## Peer Approval
69
+
70
+ ```bash
71
+ clawmatrix approve <approvalId> # Approve a pending peer
72
+ clawmatrix deny <approvalId> # Deny a pending peer
73
+ clawmatrix approval list # List pending/approved/denied peers
74
+ clawmatrix approval revoke <nodeId> # Revoke an approved peer
75
+ ```
76
+
77
+ ## Notifications (Dynamic Island / Live Activity)
78
+
79
+ Push progress notifications to the user's iPhone via `clawmatrix notify`. This triggers the Dynamic Island and lock screen Live Activity.
80
+
81
+ **Use this proactively when running long tasks** (iOS builds, large refactors, test suites, batch operations) so the user can track progress without watching the terminal.
82
+
83
+ ```bash
84
+ # Start a notification
85
+ clawmatrix notify "iOS 构建" --detail "正在编译..."
86
+ # Returns: {"taskId":"<id>", "action":"start", "targets":1}
87
+
88
+ # Update progress
89
+ clawmatrix notify "iOS 构建" --action update --task-id <id> --detail "链接中..." --progress 80
90
+
91
+ # End (success)
92
+ clawmatrix notify "iOS 构建" --action end --task-id <id>
93
+ ```
94
+
95
+ Options: `--detail <text>`, `--progress <0-100>`, `--action start|update|end`, `--task-id <id>`, `--tool <name>`
96
+
97
+ ## Workflow
98
+
99
+ 1. Run `clawmatrix status` to see the cluster topology
100
+ 2. Use `clawmatrix tools --filter <keyword>` to find relevant tools
101
+ 3. Use `clawmatrix tools --describe <tool>` to check parameters
102
+ 4. Use `clawmatrix call` or `clawmatrix batch` to invoke tools
103
+ 5. If a call fails, run `clawmatrix check <nodeId>` to verify connectivity
104
+ 6. For long-running tasks, use `clawmatrix notify` to push progress to the user's phone
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "clawmatrix",
3
- "version": "0.6.1",
3
+ "version": "0.6.3",
4
4
  "description": "Decentralized mesh cluster plugin for OpenClaw — inter-gateway communication, model proxy, task handoff, and tool proxy.",
5
5
  "type": "module",
6
6
  "license": "MIT",
package/src/acp-proxy.ts CHANGED
@@ -2626,6 +2626,42 @@ export class AcpProxy {
2626
2626
  return null;
2627
2627
  }
2628
2628
 
2629
+ /** Reject a specific pending request by frame id (e.g. from relay_fail). */
2630
+ rejectPending(frameId: string, reason: string) {
2631
+ const entry = this.pending.get(frameId);
2632
+ if (entry) {
2633
+ clearTimeout(entry.timer);
2634
+ this.pending.delete(frameId);
2635
+ entry.reject(new Error(reason));
2636
+ }
2637
+ }
2638
+
2639
+ /**
2640
+ * Fail-fast all pending requests targeting a disconnected node (or nodes
2641
+ * that were reachable only via that node as relay).
2642
+ * Called by ClusterRuntime on peerDisconnected.
2643
+ */
2644
+ onPeerDisconnected(nodeId: string) {
2645
+ for (const [id, entry] of this.pending) {
2646
+ const target = entry.targetNodeId;
2647
+ if (!target) continue;
2648
+ // Direct match: request was targeting the disconnected node
2649
+ if (target === nodeId) {
2650
+ clearTimeout(entry.timer);
2651
+ this.pending.delete(id);
2652
+ entry.reject(new Error(`Node "${target}" disconnected while request was pending`));
2653
+ continue;
2654
+ }
2655
+ // Relay match: request was targeting a node reachable via the disconnected relay
2656
+ const route = this.peerManager.router.getRoute(target);
2657
+ if (!route || (!route.connection?.isOpen && !route.reachableVia)) {
2658
+ clearTimeout(entry.timer);
2659
+ this.pending.delete(id);
2660
+ entry.reject(new Error(`Node "${target}" became unreachable (relay "${nodeId}" disconnected)`));
2661
+ }
2662
+ }
2663
+ }
2664
+
2629
2665
  private sendRequest(
2630
2666
  targetNodeId: string,
2631
2667
  agent: string,
@@ -175,6 +175,9 @@ export class ClusterRuntime {
175
175
  this.refreshDiscoveredModels();
176
176
  this.healthTracker.recordPeerOffline(nodeId);
177
177
  this.healthTracker.removePeerSync(nodeId);
178
+ // Fail-fast any pending requests targeting this node or relayed through it
179
+ this.acpProxy?.onPeerDisconnected(nodeId);
180
+ this.handoffManager?.onPeerDisconnected(nodeId);
178
181
  });
179
182
 
180
183
  this.peerManager.on("peerCapabilitiesChanged", () => {
@@ -275,7 +278,12 @@ export class ClusterRuntime {
275
278
  // Auto-detect ACP agents if ACP is enabled but no agents are explicitly configured
276
279
  // Check both ClawMatrix and OpenClaw configs (consistent with acpProxy creation above)
277
280
  if (this.acpProxy && (!this.config.acp?.agents || this.config.acp.agents.length === 0)) {
278
- AcpProxy.detectAvailableAgents(this.config.acp?.commands).then((detected) => {
281
+ const openclawAcp = (this.openclawConfig as Record<string, any>).acp;
282
+ const detectionCommands = {
283
+ ...(openclawAcp?.commands && typeof openclawAcp.commands === "object" ? openclawAcp.commands : {}),
284
+ ...(this.config.acp?.commands ?? {}),
285
+ };
286
+ AcpProxy.detectAvailableAgents(Object.keys(detectionCommands).length > 0 ? detectionCommands : undefined).then((detected) => {
279
287
  if (detected.length > 0) {
280
288
  this.logger.info(`[clawmatrix] Auto-detected ACP agents: ${detected.map((a) => a.id).join(", ")}`);
281
289
  this.peerManager.updateAcpAgents(detected);
@@ -891,6 +899,22 @@ export class ClusterRuntime {
891
899
  this.apiHandler.pushKanbanEvent(kf.payload);
892
900
  }
893
901
  break;
902
+ case "relay_fail": {
903
+ // A relay node could not forward our frame to its target.
904
+ // Clean up the stale relay route and reject pending requests.
905
+ const target = (frame.payload as any)?.target ?? "unknown";
906
+ debug("dispatch", `relay_fail id=${frame.id} target=${target} from=${frame.from}`);
907
+ // Remove the stale relay route so we don't keep sending frames into a dead path
908
+ if (typeof target === "string" && target !== "unknown") {
909
+ this.peerManager.router.removeRelayRoute(target);
910
+ }
911
+ if (frame.id) {
912
+ this.acpProxy?.rejectPending(frame.id, `Relay "${frame.from}" cannot reach "${target}"`);
913
+ this.handoffManager?.rejectPending(frame.id, `Relay "${frame.from}" cannot reach "${target}"`);
914
+ this.toolProxy.rejectPending(frame.id, `Relay "${frame.from}" cannot reach "${target}"`);
915
+ }
916
+ break;
917
+ }
894
918
  }
895
919
  }
896
920
 
package/src/connection.ts CHANGED
@@ -147,7 +147,17 @@ export class Connection extends EventEmitter<ConnectionEvents> {
147
147
 
148
148
  /** Bind standard WebSocket event listeners. Call this for outbound connections. */
149
149
  bindWebSocket(ws: WebSocket) {
150
- ws.addEventListener("message", (ev) => this.onRawMessage(ev.data));
150
+ // Node.js 22+ built-in WebSocket defaults binaryType to "blob", which
151
+ // onRawMessage cannot handle (Blob is async-only). Set "arraybuffer"
152
+ // so binary frames arrive as ArrayBuffer, then normalize to Buffer below.
153
+ if ("binaryType" in ws) {
154
+ (ws as any).binaryType = "arraybuffer";
155
+ }
156
+ ws.addEventListener("message", (ev) => {
157
+ // Normalize ArrayBuffer → Buffer for consistent handling across runtimes
158
+ const data = ev.data instanceof ArrayBuffer ? Buffer.from(ev.data) : ev.data;
159
+ this.onRawMessage(data);
160
+ });
151
161
  ws.addEventListener("close", (ev) => {
152
162
  this.close(ev.code, ev.reason);
153
163
  });
@@ -208,11 +218,19 @@ export class Connection extends EventEmitter<ConnectionEvents> {
208
218
 
209
219
  /** Send raw data. Buffers sent as binary frames; strings as-is; objects JSON-encoded. */
210
220
  private sendRaw(data: unknown) {
211
- if (this.transport.readyState === WebSocket.OPEN) {
212
- if (Buffer.isBuffer(data)) {
213
- this.transport.send(data);
214
- } else {
215
- this.transport.send(typeof data === "string" ? data : JSON.stringify(data));
221
+ try {
222
+ if (this.transport.readyState === WebSocket.OPEN) {
223
+ if (Buffer.isBuffer(data)) {
224
+ this.transport.send(data);
225
+ } else {
226
+ this.transport.send(typeof data === "string" ? data : JSON.stringify(data));
227
+ }
228
+ }
229
+ } catch (err) {
230
+ debug("send", `sendRaw failed for ${this.remoteNodeId ?? "unknown"}: ${err}`);
231
+ // Transport is broken — schedule close on next tick to avoid re-entrancy
232
+ if (!this.closed) {
233
+ queueMicrotask(() => this.close(4002, "send failed"));
216
234
  }
217
235
  }
218
236
  }
@@ -269,25 +287,6 @@ export class Connection extends EventEmitter<ConnectionEvents> {
269
287
  private async onRawMessage(data: unknown) {
270
288
  this.lastReceivedAt = Date.now();
271
289
 
272
- // Debug: log data type for unauthenticated connections to diagnose auth issues
273
- if (!this.authenticated && this.role === "outbound") {
274
- const dtype = Buffer.isBuffer(data) ? `Buffer(${(data as Buffer).length})` :
275
- data instanceof ArrayBuffer ? `ArrayBuffer(${data.byteLength})` :
276
- typeof data === "string" ? `string(${data.length})` :
277
- `${data?.constructor?.name ?? typeof data}`;
278
- debug("auth", `onRawMessage[outbound] dataType=${dtype}`);
279
- }
280
-
281
- // Normalize non-Buffer binary types to Buffer.
282
- // Node.js 24+'s built-in WebSocket (undici) delivers binary frames as Blob
283
- // (binaryType "blob") or ArrayBuffer (binaryType "arraybuffer").
284
- // The ws package delivers Buffer (binaryType "nodebuffer").
285
- if (data instanceof ArrayBuffer) {
286
- data = Buffer.from(data);
287
- } else if (typeof Blob !== "undefined" && data instanceof Blob) {
288
- data = Buffer.from(await data.arrayBuffer());
289
- }
290
-
291
290
  let frame: AnyClusterFrame | undefined;
292
291
 
293
292
  // Binary frame (Buffer) — decrypt directly without base64
@@ -403,7 +402,7 @@ export class Connection extends EventEmitter<ConnectionEvents> {
403
402
  }
404
403
 
405
404
  if (frame.type === "ping") {
406
- this.send({ type: "pong", from: this.nodeId, timestamp: Date.now() } as AnyClusterFrame);
405
+ this.sendDirect({ type: "pong", from: this.nodeId, timestamp: Date.now() } as AnyClusterFrame);
407
406
  return;
408
407
  }
409
408
  if (frame.type === "pong") {
@@ -579,7 +578,6 @@ export class Connection extends EventEmitter<ConnectionEvents> {
579
578
  }
580
579
 
581
580
  // auth_ok (decrypted from binary envelope, or plaintext legacy)
582
- debug("auth", `Outbound received frame type=${frame.type} (authenticated=${this.authenticated})`);
583
581
  if (frame.type === "auth_ok") {
584
582
  const ok = frame as AuthOk;
585
583
  this.remoteNodeId = ok.payload.nodeId;
@@ -718,7 +716,7 @@ export class Connection extends EventEmitter<ConnectionEvents> {
718
716
  return;
719
717
  }
720
718
  this.lastPingSentAt = Date.now();
721
- this.send({
719
+ this.sendDirect({
722
720
  type: "ping",
723
721
  from: this.nodeId,
724
722
  timestamp: this.lastPingSentAt,
@@ -755,7 +753,7 @@ export class Connection extends EventEmitter<ConnectionEvents> {
755
753
  close(code = 1000, reason = "normal") {
756
754
  if (this.closed) return;
757
755
  // Flush any pending batch before closing
758
- this.flushBatch();
756
+ try { this.flushBatch(); } catch { /* transport may already be dead */ }
759
757
  this.closed = true;
760
758
  this.clearAuthTimer();
761
759
  if (this.heartbeatTimer) {
package/src/handoff.ts CHANGED
@@ -744,6 +744,38 @@ export class HandoffManager {
744
744
  return false;
745
745
  }
746
746
 
747
+ /** Reject a specific pending request by frame id (e.g. from relay_fail). */
748
+ rejectPending(frameId: string, reason: string) {
749
+ const entry = this.pending.get(frameId);
750
+ if (entry) {
751
+ clearTimeout(entry.timer);
752
+ this.pending.delete(frameId);
753
+ entry.reject(new Error(reason));
754
+ }
755
+ }
756
+
757
+ /**
758
+ * Fail-fast pending handoff requests targeting a disconnected node
759
+ * (or nodes reachable only via that relay). Called on peerDisconnected.
760
+ */
761
+ onPeerDisconnected(nodeId: string) {
762
+ for (const [id, entry] of this.pending) {
763
+ if (entry.targetNodeId === nodeId) {
764
+ clearTimeout(entry.timer);
765
+ this.pending.delete(id);
766
+ entry.reject(new Error(`Node "${entry.targetNodeId}" disconnected while handoff was pending`));
767
+ continue;
768
+ }
769
+ // Check if the target was reachable via this relay
770
+ const route = this.peerManager.router.getRoute(entry.targetNodeId);
771
+ if (!route || (!route.connection?.isOpen && !route.reachableVia)) {
772
+ clearTimeout(entry.timer);
773
+ this.pending.delete(id);
774
+ entry.reject(new Error(`Node "${entry.targetNodeId}" became unreachable (relay "${nodeId}" disconnected)`));
775
+ }
776
+ }
777
+ }
778
+
747
779
  destroy() {
748
780
  if (this.staleCleanupTimer) {
749
781
  clearInterval(this.staleCleanupTimer);
package/src/index.ts CHANGED
@@ -1537,291 +1537,6 @@ const plugin = {
1537
1537
  },
1538
1538
  );
1539
1539
 
1540
- // ── Availability gateway method ──────────────────────────────────
1541
-
1542
- api.registerGatewayMethod(
1543
- "clawmatrix.availability",
1544
- ({ params, respond }: GatewayRequestHandlerOptions) => {
1545
- try {
1546
- const runtime = getClusterRuntime();
1547
- const { range } = (params ?? {}) as { range?: string };
1548
- const validRanges = ["24h", "7d", "90d"];
1549
- const r = validRanges.includes(range ?? "") ? (range as "24h" | "7d" | "90d") : "24h";
1550
- const result = runtime.healthTracker.getAvailability(r);
1551
- respond(true, result);
1552
- } catch {
1553
- respond(false, { error: "ClawMatrix service not running" });
1554
- }
1555
- },
1556
- );
1557
-
1558
- // ── Automation gateway methods ──────────────────────────────────
1559
-
1560
- api.registerGatewayMethod(
1561
- "clawmatrix.automations.rules",
1562
- ({ respond }: GatewayRequestHandlerOptions) => {
1563
- try {
1564
- const runtime = getClusterRuntime();
1565
- if (!runtime.automationManager) {
1566
- respond(true, { rules: [] });
1567
- return;
1568
- }
1569
- respond(true, { rules: runtime.automationManager.getRules() });
1570
- } catch {
1571
- respond(false, { error: "ClawMatrix service not running" });
1572
- }
1573
- },
1574
- );
1575
-
1576
- api.registerGatewayMethod(
1577
- "clawmatrix.automations.save",
1578
- async ({ params, respond }: GatewayRequestHandlerOptions) => {
1579
- try {
1580
- const runtime = getClusterRuntime();
1581
- if (!runtime.automationManager) {
1582
- respond(false, { error: "Automations not available" });
1583
- return;
1584
- }
1585
- const { rules } = (params ?? {}) as { rules?: unknown[] };
1586
- if (!Array.isArray(rules)) {
1587
- respond(false, { error: "Missing required param: rules (array)" });
1588
- return;
1589
- }
1590
- await runtime.automationManager.saveRules(rules as Parameters<typeof runtime.automationManager.saveRules>[0]);
1591
- respond(true, { ok: true, count: rules.length });
1592
- } catch (err) {
1593
- respond(false, { error: String(err) });
1594
- }
1595
- },
1596
- );
1597
-
1598
- api.registerGatewayMethod(
1599
- "clawmatrix.automations.history",
1600
- ({ params, respond }: GatewayRequestHandlerOptions) => {
1601
- try {
1602
- const runtime = getClusterRuntime();
1603
- if (!runtime.automationManager) {
1604
- respond(true, { executions: [] });
1605
- return;
1606
- }
1607
- const { limit } = (params ?? {}) as { limit?: number };
1608
- respond(true, { executions: runtime.automationManager.getExecutions(limit ?? 50) });
1609
- } catch {
1610
- respond(false, { error: "ClawMatrix service not running" });
1611
- }
1612
- },
1613
- );
1614
-
1615
- api.registerGatewayMethod(
1616
- "clawmatrix.automations.run",
1617
- async ({ params, respond }: GatewayRequestHandlerOptions) => {
1618
- try {
1619
- const runtime = getClusterRuntime();
1620
- if (!runtime.automationManager) {
1621
- respond(false, { error: "Automations not available" });
1622
- return;
1623
- }
1624
- const { ruleId, event } = (params ?? {}) as { ruleId?: string; event?: Record<string, unknown> };
1625
- if (!ruleId) {
1626
- respond(false, { error: "Missing required param: ruleId" });
1627
- return;
1628
- }
1629
- const evt = event ? {
1630
- id: nanoid(),
1631
- source: String(event.source || "cli"),
1632
- type: String(event.type || "manual"),
1633
- data: (event.data ?? {}) as Record<string, unknown>,
1634
- ts: typeof event.ts === "number" ? event.ts : Date.now(),
1635
- consumed: false,
1636
- } as import("./types.ts").IngestedEvent : undefined;
1637
- const execution = await runtime.automationManager.runRuleById(ruleId, evt);
1638
- respond(true, { ok: true, execution });
1639
- } catch (err) {
1640
- respond(false, { error: String(err) });
1641
- }
1642
- },
1643
- );
1644
-
1645
- api.registerGatewayMethod(
1646
- "clawmatrix.automations.replay",
1647
- async ({ params, respond }: GatewayRequestHandlerOptions) => {
1648
- try {
1649
- const runtime = getClusterRuntime();
1650
- if (!runtime.automationManager) {
1651
- respond(false, { error: "Automations not available" });
1652
- return;
1653
- }
1654
- const { executionId } = (params ?? {}) as { executionId?: string };
1655
- if (!executionId) {
1656
- respond(false, { error: "Missing required param: executionId" });
1657
- return;
1658
- }
1659
- const execution = await runtime.automationManager.replayExecution(executionId);
1660
- respond(true, { ok: true, execution });
1661
- } catch (err) {
1662
- respond(false, { error: String(err) });
1663
- }
1664
- },
1665
- );
1666
-
1667
- // ── Events ingest gateway method ──────────────────────────────────
1668
-
1669
- api.registerGatewayMethod(
1670
- "clawmatrix.events.ingest",
1671
- ({ params, respond }: GatewayRequestHandlerOptions) => {
1672
- try {
1673
- const runtime = getClusterRuntime();
1674
- if (!runtime.apiHandler) {
1675
- respond(false, { error: "API handler not available (listen mode required)" });
1676
- return;
1677
- }
1678
- const { events } = (params ?? {}) as { events?: unknown[] };
1679
- if (!Array.isArray(events) || events.length === 0) {
1680
- respond(false, { error: "Missing required param: events (non-empty array)" });
1681
- return;
1682
- }
1683
- const result = runtime.apiHandler.ingestEvents(events as Array<Record<string, unknown>>);
1684
- respond(true, { ok: true, ...result });
1685
- } catch {
1686
- respond(false, { error: "ClawMatrix service not running" });
1687
- }
1688
- },
1689
- );
1690
-
1691
- // ── Board update gateway method ──────────────────────────────────
1692
-
1693
- api.registerGatewayMethod(
1694
- "clawmatrix.board.update",
1695
- ({ params, respond }: GatewayRequestHandlerOptions) => {
1696
- try {
1697
- const runtime = getClusterRuntime();
1698
- if (!runtime.kanbanManager) {
1699
- respond(false, { error: "Kanban not enabled" });
1700
- return;
1701
- }
1702
- const { cardId, ...updates } = (params ?? {}) as {
1703
- cardId?: string; title?: string; description?: string;
1704
- priority?: string; targetNode?: string; targetAgent?: string;
1705
- cwd?: string; labels?: string[];
1706
- };
1707
- if (!cardId) {
1708
- respond(false, { error: "Missing required param: cardId" });
1709
- return;
1710
- }
1711
- const card = runtime.kanbanManager.updateCard(cardId, updates as Parameters<typeof runtime.kanbanManager.updateCard>[1]);
1712
- if (!card) {
1713
- respond(false, { error: `Card not found: ${cardId}` });
1714
- return;
1715
- }
1716
- respond(true, card);
1717
- } catch {
1718
- respond(false, { error: "ClawMatrix service not running" });
1719
- }
1720
- },
1721
- );
1722
-
1723
- // ── Config gateway method ──────────────────────────────────────────
1724
-
1725
- api.registerGatewayMethod(
1726
- "clawmatrix.config.get",
1727
- ({ respond }: GatewayRequestHandlerOptions) => {
1728
- try {
1729
- const runtime = getClusterRuntime();
1730
- const c = runtime.config;
1731
- respond(true, {
1732
- nodeId: c.nodeId,
1733
- listen: c.listen,
1734
- tags: c.tags,
1735
- agents: c.agents.map((a) => ({ id: a.id, model: a.model })),
1736
- models: c.models.map((m) => ({ id: m.id, provider: m.provider })),
1737
- e2ee: c.e2ee,
1738
- toolProxy: c.toolProxy ? { enabled: c.toolProxy.enabled, allow: c.toolProxy.allow, deny: c.toolProxy.deny } : undefined,
1739
- terminal: c.terminal,
1740
- acp: c.acp ? { enabled: c.acp.enabled } : undefined,
1741
- knowledge: c.knowledge ? { enabled: c.knowledge.enabled } : undefined,
1742
- proxyModels: c.proxyModels?.length ?? 0,
1743
- peers: c.peers?.length ?? 0,
1744
- });
1745
- } catch {
1746
- respond(false, { error: "ClawMatrix service not running" });
1747
- }
1748
- },
1749
- );
1750
-
1751
- // ── Config file read/write gateway methods ──────────────────────
1752
-
1753
- api.registerGatewayMethod(
1754
- "clawmatrix.config.read",
1755
- async ({ params, respond }: GatewayRequestHandlerOptions) => {
1756
- try {
1757
- const runtime = getClusterRuntime();
1758
- if (!runtime.apiHandler) {
1759
- respond(false, { error: "API handler not available" });
1760
- return;
1761
- }
1762
- const { path: configPath } = (params ?? {}) as { path?: string };
1763
- if (!configPath) {
1764
- respond(false, { error: "Missing required param: path" });
1765
- return;
1766
- }
1767
- const result = await runtime.apiHandler.readConfigFile(configPath);
1768
- respond(result.success, result);
1769
- } catch (err) {
1770
- respond(false, { error: String(err) });
1771
- }
1772
- },
1773
- );
1774
-
1775
- api.registerGatewayMethod(
1776
- "clawmatrix.config.write",
1777
- async ({ params, respond }: GatewayRequestHandlerOptions) => {
1778
- try {
1779
- const runtime = getClusterRuntime();
1780
- if (!runtime.apiHandler) {
1781
- respond(false, { error: "API handler not available" });
1782
- return;
1783
- }
1784
- const { path: configPath, content } = (params ?? {}) as { path?: string; content?: string };
1785
- if (!configPath || typeof content !== "string") {
1786
- respond(false, { error: "Missing required params: path, content" });
1787
- return;
1788
- }
1789
- const result = await runtime.apiHandler.writeConfigFile(configPath, content);
1790
- respond(result.success, result);
1791
- } catch (err) {
1792
- respond(false, { error: String(err) });
1793
- }
1794
- },
1795
- );
1796
-
1797
- // ── Knowledge content gateway method ──────────────────────────────
1798
-
1799
- api.registerGatewayMethod(
1800
- "clawmatrix.kb.content",
1801
- ({ params, respond }: GatewayRequestHandlerOptions) => {
1802
- try {
1803
- const runtime = getClusterRuntime();
1804
- if (!runtime.knowledgeSync) {
1805
- respond(false, { error: "Knowledge sync not enabled" });
1806
- return;
1807
- }
1808
- const { path } = (params ?? {}) as { path?: string };
1809
- if (!path) {
1810
- respond(false, { error: "Missing required param: path" });
1811
- return;
1812
- }
1813
- const content = runtime.knowledgeSync.getFileContent(path);
1814
- if (content === null) {
1815
- respond(false, { error: "File not found or content not yet synced" });
1816
- return;
1817
- }
1818
- respond(true, { path, content });
1819
- } catch {
1820
- respond(false, { error: "ClawMatrix service not running" });
1821
- }
1822
- },
1823
- );
1824
-
1825
1540
  // Log model selection on each LLM call (fire-and-forget)
1826
1541
  api.on("llm_input", (event) => {
1827
1542
  api.logger.debug(`[clawmatrix] llm_input: provider=${event.provider} model=${event.model}`);
@@ -191,6 +191,11 @@ export class KnowledgeSync {
191
191
 
192
192
  // ── Public API ─────────────────────────────────────────────────
193
193
 
194
+ /** The resolved workspace directory that knowledge files are synced under. */
195
+ get workspacePath(): string {
196
+ return this.opts.workspacePath;
197
+ }
198
+
194
199
  async start() {
195
200
  debug(TAG, `starting knowledge sync: workspace=${this.opts.workspacePath}`);
196
201
 
@@ -536,7 +541,7 @@ export class KnowledgeSync {
536
541
  }
537
542
 
538
543
  /** List all synced files with metadata. */
539
- listSyncedFiles(): Array<{ path: string; version: number; updatedAt: number; deleted: boolean }> {
544
+ listSyncedFiles(): Array<{ path: string; version: number; updatedAt: number; deleted: boolean; synced: boolean }> {
540
545
  const files = this.registry.files;
541
546
  if (!files) return [];
542
547
  return Object.entries(files).map(([path, meta]) => ({
@@ -544,6 +549,7 @@ export class KnowledgeSync {
544
549
  version: meta.version,
545
550
  updatedAt: meta.updatedAt,
546
551
  deleted: meta.deleted,
552
+ synced: this.fileDocs.has(path) && this.fileDocs.get(path)!.content !== undefined,
547
553
  })).filter(f => !f.deleted);
548
554
  }
549
555
 
@@ -50,6 +50,8 @@ const SKIP_DEDUP_EXPLICIT = new Set([
50
50
  // File transfer
51
51
  "file_transfer_chunk", "file_transfer_chunk_ack",
52
52
  "file_transfer_ack", "file_transfer_complete",
53
+ // Relay failure notification (shares id with the original request)
54
+ "relay_fail",
53
55
  ]);
54
56
 
55
57
  function skipDedup(type: string): boolean {
@@ -151,6 +153,8 @@ export class PeerManager extends EventEmitter<PeerManagerEvents> {
151
153
  acpAgents = config.acp.agents;
152
154
  } else if (Array.isArray(ocAcp?.allowedAgents) && ocAcp.allowedAgents.length > 0) {
153
155
  acpAgents = ocAcp.allowedAgents.map((id: string) => ({ id, description: "" }));
156
+ } else if (ocAcp?.commands && typeof ocAcp.commands === "object") {
157
+ acpAgents = Object.keys(ocAcp.commands).map((id) => ({ id, description: "" }));
154
158
  }
155
159
  }
156
160
  this.localCapabilities = {
@@ -231,6 +235,8 @@ export class PeerManager extends EventEmitter<PeerManagerEvents> {
231
235
  }
232
236
  // Start route probing for peers with multiple URLs
233
237
  this.startRouteProbing();
238
+ // Start periodic sweep of stale relay routes
239
+ this.router.startRelaySweep();
234
240
  }
235
241
 
236
242
  async stop() {
@@ -719,9 +725,15 @@ export class PeerManager extends EventEmitter<PeerManagerEvents> {
719
725
  debug("peer", `connectToChannel(${nodeId}): self-connection, will not reconnect`);
720
726
  return;
721
727
  }
722
- // Don't reconnect warm-up pruned channels
723
- if (ev.reason === "warm-up pruned") {
724
- debug("peer", `connectToChannel(${nodeId}): warm-up pruned, will not reconnect`);
728
+ // Don't reconnect deliberate closures that shouldn't trigger reconnection
729
+ const skipReasons = ["warm-up pruned", "route switch", "replaced by new connection"];
730
+ if (skipReasons.includes(ev.reason)) {
731
+ debug("peer", `connectToChannel(${nodeId}): ${ev.reason}, will not reconnect`);
732
+ return;
733
+ }
734
+ // Don't reconnect when peer approval was denied — retrying is futile
735
+ if (ev.code === 4005) {
736
+ debug("peer", `connectToChannel(${nodeId}): approval denied, will not reconnect`);
725
737
  return;
726
738
  }
727
739
  // Record close code for adaptive backoff
@@ -1245,6 +1257,22 @@ export class PeerManager extends EventEmitter<PeerManagerEvents> {
1245
1257
  // No remote alternative — fall through to local handling
1246
1258
  // (model-proxy will handle locally or send error back)
1247
1259
  } else {
1260
+ // Relay failed — clean up the stale route so we don't keep trying it,
1261
+ // and notify the sender so it can fail-fast.
1262
+ if (frame.to) {
1263
+ this.router.removeRelayRoute(frame.to);
1264
+ }
1265
+ if (frame.from && frame.id) {
1266
+ debug("peer", `relay failed for ${frame.type} id=${frame.id} to=${frame.to}, sending relay_fail to ${frame.from}`);
1267
+ this.sendTo(frame.from, {
1268
+ type: "relay_fail",
1269
+ id: frame.id,
1270
+ from: this.config.nodeId,
1271
+ to: frame.from,
1272
+ timestamp: Date.now(),
1273
+ payload: { target: frame.to, reason: "unreachable" },
1274
+ } as AnyClusterFrame);
1275
+ }
1248
1276
  return;
1249
1277
  }
1250
1278
  }
@@ -1329,21 +1357,18 @@ export class PeerManager extends EventEmitter<PeerManagerEvents> {
1329
1357
  if (peer.nodeId === this.config.nodeId) continue;
1330
1358
  if (peer.nodeId === from.remoteNodeId) {
1331
1359
  const prev = this.router.getRoute(peer.nodeId);
1332
- const hadAgents = prev?.agents.length ?? 0;
1333
- const hadDirectPeers = prev?.directPeers.length ?? 0;
1334
- const hadDeviceInfo = prev?.deviceInfo?.hostname;
1335
- const hadAcpAgents = prev?.acpAgents?.length ?? 0;
1336
- const hadToolProxyEnabled = prev?.toolProxy?.enabled;
1337
- const hadToolProxyCatalogLen = prev?.toolProxy?.catalog?.length ?? 0;
1338
- const hadToolProxyAllowLen = prev?.toolProxy?.allow?.length ?? 0;
1360
+ const prevSnapshot = prev ? JSON.stringify({
1361
+ a: prev.agents.length, m: prev.models.length,
1362
+ dp: prev.directPeers?.length, tp: prev.toolProxy,
1363
+ di: prev.deviceInfo, aa: prev.acpAgents?.length,
1364
+ }) : "";
1339
1365
  this.router.updatePeerCapabilities(peer.nodeId, peer);
1340
- if (peer.agents.length !== hadAgents || peer.models.length !== (prev?.models.length ?? 0)
1341
- || (peer.directPeers?.length ?? 0) !== hadDirectPeers
1342
- || peer.toolProxy?.enabled !== hadToolProxyEnabled
1343
- || (peer.toolProxy?.catalog?.length ?? 0) !== hadToolProxyCatalogLen
1344
- || (peer.toolProxy?.allow?.length ?? 0) !== hadToolProxyAllowLen
1345
- || peer.deviceInfo?.hostname !== hadDeviceInfo
1346
- || (peer.acpAgents?.length ?? 0) !== hadAcpAgents) {
1366
+ const newSnapshot = JSON.stringify({
1367
+ a: peer.agents.length, m: peer.models.length,
1368
+ dp: peer.directPeers?.length, tp: peer.toolProxy,
1369
+ di: peer.deviceInfo, aa: peer.acpAgents?.length,
1370
+ });
1371
+ if (newSnapshot !== prevSnapshot) {
1347
1372
  changed = true;
1348
1373
  }
1349
1374
  } else {
@@ -1351,7 +1376,14 @@ export class PeerManager extends EventEmitter<PeerManagerEvents> {
1351
1376
  // using them as relay would create a routing loop.
1352
1377
  if (peer.reachableVia === this.config.nodeId) continue;
1353
1378
  const existing = this.router.getRoute(peer.nodeId);
1354
- if (!existing) changed = true;
1379
+ if (!existing) {
1380
+ changed = true;
1381
+ } else {
1382
+ // Detect changes in existing relay peer (e.g. version update after restart)
1383
+ const prevSnap = JSON.stringify({ a: existing.agents.length, m: existing.models.length, di: existing.deviceInfo, aa: existing.acpAgents?.length });
1384
+ const newSnap = JSON.stringify({ a: peer.agents.length, m: peer.models.length, di: peer.deviceInfo, aa: peer.acpAgents?.length });
1385
+ if (newSnap !== prevSnap) changed = true;
1386
+ }
1355
1387
  this.router.addRelayPeer(peer, from.remoteNodeId!);
1356
1388
  }
1357
1389
  }
package/src/router.ts CHANGED
@@ -8,6 +8,9 @@ const MAX_SEEN_FRAMES = 10_000;
8
8
  const MAX_FAILED_REQUESTS = 5_000;
9
9
  const SEEN_FRAME_TTL = 120_000; // 2 minutes (was ~60-120s with double-map rotation)
10
10
  const FAILED_REQUEST_TTL = 900_000; // 15 minutes
11
+ /** Relay routes older than this without a peer_sync refresh are considered stale. */
12
+ const RELAY_ROUTE_MAX_AGE = 300_000; // 5 minutes
13
+ const RELAY_ROUTE_SWEEP_INTERVAL = 60_000; // sweep every 60s
11
14
 
12
15
  export interface RouteEntry {
13
16
  nodeId: string;
@@ -127,8 +130,51 @@ export class Router {
127
130
  }
128
131
  }
129
132
 
133
+ private relaySweepTimer: ReturnType<typeof setInterval> | null = null;
134
+
135
+ /** Start periodic sweep of stale relay routes. */
136
+ startRelaySweep() {
137
+ if (this.relaySweepTimer) return;
138
+ this.relaySweepTimer = setInterval(() => this.removeStaleRelayRoutes(), RELAY_ROUTE_SWEEP_INTERVAL);
139
+ }
140
+
141
+ /** Remove relay routes whose lastSeen is older than RELAY_ROUTE_MAX_AGE. */
142
+ removeStaleRelayRoutes(): string[] {
143
+ const now = Date.now();
144
+ const removed: string[] = [];
145
+ for (const [id, entry] of this.routes) {
146
+ if (entry.reachableVia && !entry.connection && (now - entry.lastSeen) > RELAY_ROUTE_MAX_AGE) {
147
+ debug("router", `removing stale relay route: ${id} (via ${entry.reachableVia}, age=${Math.round((now - entry.lastSeen) / 1000)}s)`);
148
+ this.unindexEntry(entry);
149
+ this.routes.delete(id);
150
+ this.syncVersion++;
151
+ this.removedPeers.set(id, this.syncVersion);
152
+ this.peerVersions.delete(id);
153
+ removed.push(id);
154
+ }
155
+ }
156
+ return removed;
157
+ }
158
+
159
+ /** Remove a specific relay route (e.g. on relay_fail). No-op for direct connections. */
160
+ removeRelayRoute(nodeId: string): boolean {
161
+ const entry = this.routes.get(nodeId);
162
+ if (!entry || entry.connection) return false; // only remove relay routes
163
+ debug("router", `removing relay route on failure: ${nodeId} (via ${entry.reachableVia})`);
164
+ this.unindexEntry(entry);
165
+ this.routes.delete(nodeId);
166
+ this.syncVersion++;
167
+ this.removedPeers.set(nodeId, this.syncVersion);
168
+ this.peerVersions.delete(nodeId);
169
+ return true;
170
+ }
171
+
130
172
  /** Stop periodic cleanup. Call on shutdown. */
131
173
  destroy() {
174
+ if (this.relaySweepTimer) {
175
+ clearInterval(this.relaySweepTimer);
176
+ this.relaySweepTimer = null;
177
+ }
132
178
  this.seenFrames.clear();
133
179
  this.failedRequests.clear();
134
180
  this.channels.clear();
package/src/sentinel.ts CHANGED
@@ -177,7 +177,7 @@ function buildCapabilities(): NodeCapabilities {
177
177
  }
178
178
 
179
179
  function connectToPeer(peer: { nodeId: string; url: string }) {
180
- const ws = new WsWebSocket(peer.url, ["graphql-transport-ws"]);
180
+ const ws = new WebSocket(peer.url, ["graphql-transport-ws"]);
181
181
  const e2eeOpts: ConnectionE2eeOptions = {
182
182
  e2ee: config.e2ee,
183
183
  compression: config.compression,
package/src/tool-proxy.ts CHANGED
@@ -130,6 +130,23 @@ export class ToolProxy {
130
130
  }
131
131
 
132
132
  // ── Incoming response ──────────────────────────────────────────
133
+ /** Reject a specific pending request by frame id (e.g. from relay_fail). */
134
+ rejectPending(frameId: string, reason: string) {
135
+ const pending = this.pending.get(frameId);
136
+ if (pending) {
137
+ clearTimeout(pending.timer);
138
+ this.pending.delete(frameId);
139
+ pending.reject(new Error(reason));
140
+ return;
141
+ }
142
+ const batch = this.pendingBatch.get(frameId);
143
+ if (batch) {
144
+ clearTimeout(batch.timer);
145
+ this.pendingBatch.delete(frameId);
146
+ batch.reject(new Error(reason));
147
+ }
148
+ }
149
+
133
150
  handleResponse(frame: ToolProxyResponse) {
134
151
  if (this.peerManager.router.isFailed(frame.id)) return;
135
152
  const pending = this.pending.get(frame.id);