clawmatrix 0.2.8 → 0.2.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1090,13 +1090,28 @@ export class ModelProxy {
1090
1090
  ?? this.config.models.find((m) => m.id === payload.model)
1091
1091
  : this.config.models.find((m) => m.id === payload.model);
1092
1092
  if (!model) {
1093
+ // Model not available locally — try forwarding to a remote node that has it
1094
+ const originalTarget = frame.to;
1095
+ const exclude = new Set([this.config.nodeId]);
1096
+ if (originalTarget) exclude.add(originalTarget);
1097
+ const alternatives = this.peerManager.router.findNodesForModel(payload.model, exclude);
1098
+ for (const alt of alternatives) {
1099
+ if (this.peerManager.sendTo(alt.nodeId, { ...frame, to: alt.nodeId })) {
1100
+ debug("model_req", `failover: ${originalTarget ?? "local"} → ${alt.nodeId} for "${payload.model}"`);
1101
+ return;
1102
+ }
1103
+ }
1104
+ // No alternative found
1105
+ const hint = originalTarget && originalTarget !== this.config.nodeId
1106
+ ? `Target node "${originalTarget}" is unreachable and no alternative nodes provide model "${payload.model}"`
1107
+ : `Model "${payload.model}" not available locally`;
1093
1108
  this.peerManager.sendTo(from, {
1094
1109
  type: "model_res",
1095
1110
  id,
1096
1111
  from: this.config.nodeId,
1097
1112
  to: from,
1098
1113
  timestamp: Date.now(),
1099
- payload: { success: false, error: `Model "${payload.model}" not available locally` },
1114
+ payload: { success: false, error: hint },
1100
1115
  } satisfies ModelResponse);
1101
1116
  return;
1102
1117
  }
@@ -46,6 +46,9 @@ const SKIP_DEDUP_TYPES = new Set([
46
46
  // Terminal
47
47
  "terminal_open_res", "terminal_data", "terminal_resize",
48
48
  "terminal_close", "terminal_close_res",
49
+ // File transfer
50
+ "file_transfer_chunk", "file_transfer_chunk_ack",
51
+ "file_transfer_ack", "file_transfer_complete",
49
52
  ]);
50
53
 
51
54
  /** Classify WebSocket close code into a human-readable reason. */
@@ -604,8 +607,11 @@ export class PeerManager extends EventEmitter<PeerManagerEvents> {
604
607
 
605
608
  // ── Message handling ───────────────────────────────────────────
606
609
  private onFrame(frame: AnyClusterFrame, from: Connection) {
607
- // Ignore self-echo: frames with our own nodeId that were relayed back to us
608
- if (frame.from === this.config.nodeId) return;
610
+ // Ignore self-echo: frames with our own nodeId that were relayed back to us.
611
+ // Exception: frames from same-nodeId satellite connections (Mac/iOS client)
612
+ // are legitimate requests that must be processed or relayed.
613
+ const isSatellite = from.remoteNodeId === this.config.nodeId;
614
+ if (frame.from === this.config.nodeId && !isSatellite) return;
609
615
 
610
616
  // Validate from field: must be the direct peer or a known node (relayed)
611
617
  if (frame.from && frame.from !== from.remoteNodeId && !this.router.getRoute(frame.from)) {
@@ -674,8 +680,26 @@ export class PeerManager extends EventEmitter<PeerManagerEvents> {
674
680
  }
675
681
 
676
682
  if (frame.to && frame.to !== this.config.nodeId) {
677
- this.router.tryRelay(frame);
678
- return;
683
+ if (this.router.tryRelay(frame)) return;
684
+
685
+ // Relay failed — for model_req, try alternative nodes or fall through to local handling
686
+ if (frame.type === "model_req") {
687
+ const modelId = (frame as any).payload?.model;
688
+ if (modelId) {
689
+ const exclude = new Set([frame.to, this.config.nodeId]);
690
+ const alternatives = this.router.findNodesForModel(modelId, exclude);
691
+ for (const alt of alternatives) {
692
+ if (this.sendTo(alt.nodeId, { ...frame, to: alt.nodeId })) {
693
+ debug("peer", `model_req failover: ${frame.to} → ${alt.nodeId}`);
694
+ return;
695
+ }
696
+ }
697
+ }
698
+ // No remote alternative — fall through to local handling
699
+ // (model-proxy will handle locally or send error back)
700
+ } else {
701
+ return;
702
+ }
679
703
  }
680
704
 
681
705
  // Forward to same-nodeId satellite connection (e.g. Mac desktop app) so that
package/src/router.ts CHANGED
@@ -233,6 +233,31 @@ export class Router {
233
233
  return this.routes.get(target);
234
234
  }
235
235
 
236
+ /** Find reachable nodes that provide a specific model, sorted by latency.
237
+ * Excludes nodes in the `exclude` set. */
238
+ findNodesForModel(modelId: string, exclude?: Set<string>): RouteEntry[] {
239
+ const candidates: RouteEntry[] = [];
240
+ for (const entry of this.routes.values()) {
241
+ if (exclude?.has(entry.nodeId)) continue;
242
+ if (!entry.models.some((m) => m.id === modelId)) continue;
243
+ // Check reachability
244
+ if (entry.connection?.isOpen) {
245
+ candidates.push(entry);
246
+ } else if (entry.reachableVia) {
247
+ const relay = this.connections.get(entry.reachableVia);
248
+ if (relay?.isOpen) candidates.push(entry);
249
+ }
250
+ }
251
+ // Sort: direct first, then by latency
252
+ candidates.sort((a, b) => {
253
+ const aDirect = a.connection ? 0 : 1;
254
+ const bDirect = b.connection ? 0 : 1;
255
+ if (aDirect !== bDirect) return aDirect - bDirect;
256
+ return a.latencyMs - b.latencyMs;
257
+ });
258
+ return candidates;
259
+ }
260
+
236
261
  // ── Message sending and relay ──────────────────────────────────
237
262
  /** Send a frame to a specific node, relaying if necessary. Returns true if sent. */
238
263
  sendTo(targetNodeId: string, frame: ClusterFrame | AnyClusterFrame): boolean {
@@ -8,7 +8,7 @@
8
8
 
9
9
  import { fork, type ChildProcess } from "node:child_process";
10
10
  import { join, dirname } from "node:path";
11
- import { existsSync, readFileSync, mkdirSync, openSync } from "node:fs";
11
+ import { existsSync, readFileSync, mkdirSync, openSync, closeSync } from "node:fs";
12
12
  import { homedir, tmpdir } from "node:os";
13
13
  import type { ClawMatrixConfig } from "./config.ts";
14
14
 
@@ -42,6 +42,9 @@ export class SentinelManager {
42
42
  execArgv: this.resolveExecArgv(),
43
43
  });
44
44
 
45
+ // Close the log fd in the parent — the child has its own copy
46
+ closeSync(logFd);
47
+
45
48
  // Send config to sentinel via IPC (includes gateway PID for health checks)
46
49
  // If sentinel has no explicit listenPort but the gateway is a listener,
47
50
  // inherit the gateway's port for automatic takeover when gateway dies.
@@ -83,7 +86,7 @@ export class SentinelManager {
83
86
  }, 1000);
84
87
  }
85
88
 
86
- stop() {
89
+ async stop() {
87
90
  // IPC is disconnected shortly after start, so use PID file for shutdown
88
91
  if (existsSync(this.pidFile)) {
89
92
  try {
@@ -92,13 +95,11 @@ export class SentinelManager {
92
95
  process.kill(pid, "SIGTERM");
93
96
  // Wait briefly for the process to exit so the next start()
94
97
  // doesn't race with a still-dying sentinel
95
- const deadline = Date.now() + 3_000;
96
- while (Date.now() < deadline) {
98
+ for (let i = 0; i < 60; i++) {
97
99
  try {
98
100
  process.kill(pid, 0);
99
- // Still alive — brief spin
100
- const waitUntil = Date.now() + 50;
101
- while (Date.now() < waitUntil) { /* spin */ }
101
+ // Still alive — async wait
102
+ await new Promise((r) => setTimeout(r, 50));
102
103
  } catch {
103
104
  break; // exited
104
105
  }
package/src/sentinel.ts CHANGED
@@ -569,6 +569,28 @@ process.on("exit", (code) => {
569
569
  try { process.stderr.write(`[svc ${ts}] Exit code=${code}\n`); } catch { /* ignore */ }
570
570
  });
571
571
 
572
+ /** Connect to all configured peers (called when gateway dies). */
573
+ function connectAllPeers() {
574
+ for (const peer of config.peers) {
575
+ if (!connections.has(peer.nodeId) && !reconnectTimers.has(peer.nodeId)) {
576
+ connectToPeer(peer);
577
+ }
578
+ }
579
+ }
580
+
581
+ /** Disconnect from all peers (called when gateway recovers). */
582
+ function disconnectAllPeers() {
583
+ for (const [nodeId, conn] of connections) {
584
+ conn.close(1000, "gateway recovered");
585
+ connections.delete(nodeId);
586
+ }
587
+ for (const [nodeId, timer] of reconnectTimers) {
588
+ clearTimeout(timer);
589
+ reconnectTimers.delete(nodeId);
590
+ }
591
+ reconnectAttempts.clear();
592
+ }
593
+
572
594
  /** Periodically check if the gateway process is still alive via kill(pid, 0). */
573
595
  function startGatewayHealthCheck() {
574
596
  if (healthCheckTimer || !gatewayPid) return;
@@ -581,11 +603,15 @@ function startGatewayHealthCheck() {
581
603
  log("Gateway process detected — back online");
582
604
  // Release the port so the gateway can reclaim it
583
605
  stopListening();
606
+ // Disconnect from peers — gateway handles mesh connections
607
+ disconnectAllPeers();
584
608
  }
585
609
  } catch {
586
610
  if (gatewayAlive) {
587
611
  gatewayAlive = false;
588
612
  log(`Gateway process (pid ${gatewayPid}) gone — entering standalone mode`);
613
+ // Connect to peers now that gateway is down
614
+ connectAllPeers();
589
615
  // Take over the gateway's listen port
590
616
  if (config.listenPort) {
591
617
  // Small delay to let the OS release the port from the dead process
@@ -608,11 +634,7 @@ function boot() {
608
634
  writePidFile();
609
635
  log(`Started (pid ${process.pid}, gateway ${gatewayPid}, nodeId ${sentinelNodeId()}, takeover port ${config.listenPort || "none"})`);
610
636
 
611
- // Connect to all configured peers
612
- for (const peer of config.peers) {
613
- connectToPeer(peer);
614
- }
615
-
616
- // Note: we do NOT start listening here.
617
- // Listening only starts when gateway dies (port takeover mode).
637
+ // Do NOT connect to peers on boot — gateway handles mesh connections.
638
+ // Sentinel only connects when gateway dies (standalone mode).
639
+ // Listening also only starts when gateway dies (port takeover mode).
618
640
  }
package/src/terminal.ts CHANGED
@@ -109,7 +109,8 @@ export class TerminalManager {
109
109
  return;
110
110
  }
111
111
 
112
- // Check allowFrom
112
+ // TODO(security): allowFrom 为空时默认允许所有已认证 peer 打开终端会话。
113
+ // 当前仅用于受信任网络。开放前需改为默认拒绝或要求显式配置。
113
114
  if (termConfig?.allowFrom && termConfig.allowFrom.length > 0) {
114
115
  if (!termConfig.allowFrom.includes(frame.from)) {
115
116
  this.peerManager.sendTo(frame.from, {
@@ -88,6 +88,8 @@ export function createClusterDiagnosticTool(): AnyAgentTool {
88
88
  };
89
89
  }
90
90
 
91
+ // TODO(security): exec 允许任何已认证 peer 在远程 sentinel 执行任意命令,无 allowlist 或 capability check。
92
+ // 当前仅用于受信任网络。开放前需添加命令白名单或 per-peer 授权。
91
93
  if (action === "exec") {
92
94
  if (!command) {
93
95
  return {
@@ -0,0 +1,91 @@
1
+ import type { AnyAgentTool } from "openclaw/plugin-sdk";
2
+ import { getClusterRuntime } from "../cluster-service.ts";
3
+
4
+ export function createClusterTransferTool(): AnyAgentTool {
5
+ return {
6
+ name: "cluster_transfer",
7
+ label: "Cluster File Transfer",
8
+ description:
9
+ "Transfer a file between the local node and a remote cluster node. " +
10
+ "Supports large files (up to 100MB) with chunked transfer and SHA-256 integrity check. " +
11
+ "Specify source_node to pull from remote, or target_node to push to remote.",
12
+ parameters: {
13
+ type: "object",
14
+ properties: {
15
+ source_node: {
16
+ type: "string",
17
+ description: "Source nodeId (omit for local). Exactly one of source_node or target_node must be provided.",
18
+ },
19
+ source_path: {
20
+ type: "string",
21
+ description: "File path on the source node",
22
+ },
23
+ target_node: {
24
+ type: "string",
25
+ description: "Target nodeId (omit for local). Exactly one of source_node or target_node must be provided.",
26
+ },
27
+ target_path: {
28
+ type: "string",
29
+ description: "File path on the target node",
30
+ },
31
+ },
32
+ required: ["source_path", "target_path"],
33
+ },
34
+ async execute(_toolCallId, params) {
35
+ const { source_node, source_path, target_node, target_path } = params as {
36
+ source_node?: string;
37
+ source_path: string;
38
+ target_node?: string;
39
+ target_path: string;
40
+ };
41
+
42
+ // Validate: exactly one of source_node or target_node must be provided
43
+ if (source_node && target_node) {
44
+ return {
45
+ content: [{ type: "text" as const, text: "Error: Provide either source_node or target_node, not both." }],
46
+ details: { error: true },
47
+ };
48
+ }
49
+ if (!source_node && !target_node) {
50
+ return {
51
+ content: [{ type: "text" as const, text: "Error: Provide either source_node (to pull) or target_node (to push)." }],
52
+ details: { error: true },
53
+ };
54
+ }
55
+
56
+ try {
57
+ const runtime = getClusterRuntime();
58
+ const ftm = runtime.fileTransferManager;
59
+ if (!ftm) {
60
+ return {
61
+ content: [{ type: "text" as const, text: "Error: File transfer is not enabled on this node." }],
62
+ details: { error: true },
63
+ };
64
+ }
65
+
66
+ let result;
67
+ if (source_node) {
68
+ // Pull: remote → local
69
+ result = await ftm.pullFile(source_node, source_path, target_path);
70
+ } else {
71
+ // Push: local → remote
72
+ result = await ftm.pushFile(target_node!, source_path, target_path);
73
+ }
74
+
75
+ const text = result.success
76
+ ? `Transfer complete: ${result.bytesTransferred} bytes transferred.`
77
+ : `Transfer failed: ${result.error}`;
78
+
79
+ return {
80
+ content: [{ type: "text" as const, text }],
81
+ details: result,
82
+ };
83
+ } catch (err) {
84
+ return {
85
+ content: [{ type: "text" as const, text: `Transfer error: ${err instanceof Error ? err.message : String(err)}` }],
86
+ details: { error: true },
87
+ };
88
+ }
89
+ },
90
+ };
91
+ }
package/src/types.ts CHANGED
@@ -317,6 +317,68 @@ export interface ToolBatchResponse extends ClusterFrame {
317
317
  };
318
318
  }
319
319
 
320
+ // ── File transfer ─────────────────────────────────────────────────
321
+ export interface FileTransferInit extends ClusterFrame {
322
+ type: "file_transfer_init";
323
+ id: string;
324
+ payload: {
325
+ sessionId: string;
326
+ direction: "push" | "pull";
327
+ filePath: string;
328
+ targetPath: string;
329
+ fileSize: number;
330
+ totalChunks: number;
331
+ chunkSize: number;
332
+ checksum: string; // SHA-256 hex
333
+ };
334
+ }
335
+
336
+ export interface FileTransferAck extends ClusterFrame {
337
+ type: "file_transfer_ack";
338
+ id: string;
339
+ payload: {
340
+ sessionId: string;
341
+ accepted: boolean;
342
+ error?: string;
343
+ // Pull mode: responder includes file metadata
344
+ fileSize?: number;
345
+ totalChunks?: number;
346
+ checksum?: string;
347
+ };
348
+ }
349
+
350
+ export interface FileTransferChunk extends ClusterFrame {
351
+ type: "file_transfer_chunk";
352
+ id: string;
353
+ payload: {
354
+ sessionId: string;
355
+ chunkIndex: number;
356
+ data: string; // base64-encoded
357
+ };
358
+ }
359
+
360
+ export interface FileTransferChunkAck extends ClusterFrame {
361
+ type: "file_transfer_chunk_ack";
362
+ id: string;
363
+ payload: {
364
+ sessionId: string;
365
+ chunkIndex: number;
366
+ success: boolean;
367
+ error?: string;
368
+ };
369
+ }
370
+
371
+ export interface FileTransferComplete extends ClusterFrame {
372
+ type: "file_transfer_complete";
373
+ id: string;
374
+ payload: {
375
+ sessionId: string;
376
+ success: boolean;
377
+ error?: string;
378
+ bytesTransferred?: number;
379
+ };
380
+ }
381
+
320
382
  // ── Device info ───────────────────────────────────────────────────
321
383
  export interface DeviceInfo {
322
384
  os: string; // e.g. "Darwin 24.6.0", "Linux 6.1.0"
@@ -411,6 +473,32 @@ export interface KnowledgeSyncFrame extends ClusterFrame {
411
473
  };
412
474
  }
413
475
 
476
+ // ── Health sync ──────────────────────────────────────────────────
477
+ export interface HealthSyncFrame extends ClusterFrame {
478
+ type: "health_sync";
479
+ payload: {
480
+ data: string; // base64-encoded Automerge sync message
481
+ };
482
+ }
483
+
484
+ export interface AvailabilityRequest extends ClusterFrame {
485
+ type: "availability_req";
486
+ id: string;
487
+ payload: {
488
+ range: "24h" | "7d" | "90d";
489
+ };
490
+ }
491
+
492
+ export interface AvailabilityResponse extends ClusterFrame {
493
+ type: "availability_res";
494
+ id: string;
495
+ payload: {
496
+ success: boolean;
497
+ data?: unknown;
498
+ error?: string;
499
+ };
500
+ }
501
+
414
502
  // ── Diagnostic (sentinel) ────────────────────────────────────────
415
503
  export interface DiagnosticExec extends ClusterFrame {
416
504
  type: "diagnostic_exec";
@@ -831,4 +919,12 @@ export type AnyClusterFrame =
831
919
  | TerminalData
832
920
  | TerminalResize
833
921
  | TerminalCloseRequest
834
- | TerminalCloseResponse;
922
+ | TerminalCloseResponse
923
+ | HealthSyncFrame
924
+ | AvailabilityRequest
925
+ | AvailabilityResponse
926
+ | FileTransferInit
927
+ | FileTransferAck
928
+ | FileTransferChunk
929
+ | FileTransferChunkAck
930
+ | FileTransferComplete;
package/src/web.ts CHANGED
@@ -3,6 +3,7 @@ import type { PeerManager } from "./peer-manager.ts";
3
3
  import type { HandoffManager } from "./handoff.ts";
4
4
  import type { ClawMatrixConfig } from "./config.ts";
5
5
  import type { SatelliteContext, IngestedEvent } from "./types.ts";
6
+ import type { HealthTracker } from "./health-tracker.ts";
6
7
  import { timingSafeEqual } from "./auth.ts";
7
8
  import { renderDashboard } from "./web-ui.ts";
8
9
  import { readBody } from "./http-utils.ts";
@@ -46,6 +47,7 @@ export class WebHandler {
46
47
  private ingestedEvents: IngestedEvent[] = []; // ring buffer for ingested events
47
48
  private loginAttempts = new Map<string, { count: number; resetAt: number }>(); // IP → rate limit
48
49
  private loginCleanupTimer: ReturnType<typeof setInterval> | null = null;
50
+ private healthTracker: HealthTracker | null = null;
49
51
  private onPeerConnected: (nodeId: string) => void;
50
52
  private onPeerDisconnected: (nodeId: string) => void;
51
53
 
@@ -91,6 +93,11 @@ export class WebHandler {
91
93
  peerManager.on("peerDisconnected", this.onPeerDisconnected);
92
94
  }
93
95
 
96
+ /** Set the health tracker for availability API. */
97
+ setHealthTracker(tracker: HealthTracker) {
98
+ this.healthTracker = tracker;
99
+ }
100
+
94
101
  /** Clean up timers and pending requests on shutdown. */
95
102
  destroy() {
96
103
  // Remove event listeners to prevent post-destroy callbacks
@@ -181,6 +188,11 @@ export class WebHandler {
181
188
  return;
182
189
  }
183
190
 
191
+ if (path === "/api/availability" && req.method === "GET") {
192
+ this.handleAvailability(req, res);
193
+ return;
194
+ }
195
+
184
196
  if (path === "/api/satellite/poll" && req.method === "GET") {
185
197
  this.handleSatellitePoll(req, res);
186
198
  return;
@@ -271,6 +283,27 @@ export class WebHandler {
271
283
  }
272
284
  }
273
285
 
286
+ private handleAvailability(req: IncomingMessage, res: ServerResponse) {
287
+ if (!this.healthTracker) {
288
+ res.writeHead(503, { "Content-Type": "application/json" });
289
+ res.end(JSON.stringify({ error: "Health tracker not available" }));
290
+ return;
291
+ }
292
+
293
+ const url = new URL(req.url ?? "/", `http://${req.headers.host ?? "localhost"}`);
294
+ const range = (url.searchParams.get("range") ?? "24h") as "24h" | "7d" | "90d";
295
+
296
+ if (!["24h", "7d", "90d"].includes(range)) {
297
+ res.writeHead(400, { "Content-Type": "application/json" });
298
+ res.end(JSON.stringify({ error: "Invalid range. Use 24h, 7d, or 90d" }));
299
+ return;
300
+ }
301
+
302
+ const result = this.healthTracker.getAvailability(range);
303
+ res.writeHead(200, { "Content-Type": "application/json" });
304
+ res.end(JSON.stringify(result));
305
+ }
306
+
274
307
  private handleStatus(res: ServerResponse) {
275
308
  const peers = this.peerManager.router.getAllPeers();
276
309
  const localNode = {