clawmatrix 0.2.9 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -14,6 +14,7 @@ import { ModelProxy } from "./model-proxy.ts";
14
14
  import { ToolProxy, type GatewayInfo } from "./tool-proxy.ts";
15
15
  import { AcpProxy, readAllSessionStoresFromDisk } from "./acp-proxy.ts";
16
16
  import { TerminalManager } from "./terminal.ts";
17
+ import { FileTransferManager } from "./file-transfer.ts";
17
18
  import { WebHandler } from "./web.ts";
18
19
  import { KnowledgeSync } from "./knowledge-sync.ts";
19
20
  import { HealthTracker } from "./health-tracker.ts";
@@ -30,6 +31,8 @@ import type {
30
31
  HandoffInput,
31
32
  KnowledgeSyncFrame,
32
33
  HealthSyncFrame,
34
+ AvailabilityRequest,
35
+ AvailabilityResponse,
33
36
  ModelRequest,
34
37
  ModelResponse,
35
38
  ModelStreamChunk,
@@ -63,6 +66,11 @@ import type {
63
66
  TerminalResize,
64
67
  TerminalCloseRequest,
65
68
  TerminalCloseResponse,
69
+ FileTransferInit,
70
+ FileTransferAck,
71
+ FileTransferChunk,
72
+ FileTransferChunkAck,
73
+ FileTransferComplete,
66
74
  } from "./types.ts";
67
75
 
68
76
  function resolveGatewayInfo(openclawConfig: OpenClawConfig): GatewayInfo {
@@ -86,6 +94,7 @@ export class ClusterRuntime {
86
94
  readonly toolProxy: ToolProxy;
87
95
  readonly acpProxy: AcpProxy | null;
88
96
  readonly terminalManager: TerminalManager;
97
+ readonly fileTransferManager: FileTransferManager | null;
89
98
  knowledgeSync: KnowledgeSync | null = null;
90
99
  healthTracker: HealthTracker;
91
100
  webHandler: WebHandler | null = null;
@@ -93,6 +102,9 @@ export class ClusterRuntime {
93
102
  private logger: PluginLogger;
94
103
  private openclawConfig: OpenClawConfig;
95
104
  private exitHandler: (() => void) | null = null;
105
+ // Pre-built indexes for O(1) local agent lookup
106
+ private agentById = new Map<string, ClawMatrixConfig["agents"][number]>();
107
+ private agentsByTag = new Map<string, ClawMatrixConfig["agents"][number]>();
96
108
 
97
109
  constructor(config: ClawMatrixConfig, logger: PluginLogger, openclawConfig: OpenClawConfig, openclawVersion?: string) {
98
110
  this.config = config;
@@ -107,13 +119,21 @@ export class ClusterRuntime {
107
119
  const acpEnabled = config.acp?.enabled || (openclawConfig as Record<string, any>).acp?.enabled;
108
120
  this.acpProxy = acpEnabled ? new AcpProxy(config, this.peerManager, openclawConfig as Record<string, unknown>, gatewayInfo) : null;
109
121
  this.terminalManager = new TerminalManager(config, this.peerManager);
122
+ this.fileTransferManager = config.fileTransfer?.enabled
123
+ ? new FileTransferManager(config, this.peerManager)
124
+ : null;
110
125
  this.healthTracker = new HealthTracker({
111
126
  nodeId: config.nodeId,
112
127
  peerManager: this.peerManager,
113
128
  });
129
+ // Build agent indexes
130
+ for (const a of config.agents) {
131
+ this.agentById.set(a.id, a);
132
+ for (const t of a.tags) this.agentsByTag.set(t, a);
133
+ }
114
134
  }
115
135
 
116
- start() {
136
+ async start() {
117
137
  // Wire up frame dispatch
118
138
  this.peerManager.on("frame", (frame) => {
119
139
  this.dispatchFrame(frame);
@@ -195,14 +215,31 @@ export class ClusterRuntime {
195
215
  this.logger.error(`[clawmatrix] Health tracker failed to start: ${err}`);
196
216
  });
197
217
 
198
- // Start subsystems
199
- this.peerManager.start();
200
- this.modelProxy.start();
201
-
202
218
  // Sentinel: detached subprocess for diagnostics when gateway dies.
203
219
  // Default on; starts when not explicitly disabled AND (has outbound peers OR gateway is a listener for port takeover)
204
- if ((this.config.sentinel?.enabled ?? true) && (this.config.peers.length > 0 || this.config.listen)) {
220
+ const sentinelEnabled = (this.config.sentinel?.enabled ?? true) && (this.config.peers.length > 0 || this.config.listen);
221
+ if (sentinelEnabled) {
205
222
  this.sentinelManager = new SentinelManager(this.config);
223
+ // Kill old sentinel and wait for the listen port to be released
224
+ // before PeerManager tries to bind it.
225
+ await this.sentinelManager.ensurePortFree();
226
+ }
227
+
228
+ // Start subsystems (port is now guaranteed free)
229
+ this.peerManager.start();
230
+ this.modelProxy.start();
231
+
232
+ // Fetch tool catalog from the local gateway (non-blocking).
233
+ // The catalog (tool names, descriptions) is advertised to peers via peer_sync
234
+ // so remote LLM callers can discover available tools.
235
+ if (this.config.toolProxy?.enabled) {
236
+ this.fetchToolCatalog().catch((err) => {
237
+ this.logger.warn(`[clawmatrix] Tool catalog fetch failed (non-fatal): ${err}`);
238
+ });
239
+ }
240
+
241
+ // Spawn the new sentinel after PeerManager is listening
242
+ if (sentinelEnabled && this.sentinelManager) {
206
243
  this.sentinelManager.start();
207
244
  this.logger.info(`[clawmatrix] Sentinel started for node "${this.config.nodeId}"`);
208
245
  }
@@ -238,6 +275,7 @@ export class ClusterRuntime {
238
275
  this.acpProxy?.destroy();
239
276
  this.terminalManager.destroy();
240
277
  this.modelProxy.stop();
278
+ this.fileTransferManager?.destroy();
241
279
  this.toolProxy.destroy();
242
280
  await this.peerManager.stop();
243
281
  this.logger.info(`[clawmatrix] Node "${this.config.nodeId}" stopped`);
@@ -248,6 +286,61 @@ export class ClusterRuntime {
248
286
  this.modelProxy.updateDiscoveredModels(peers);
249
287
  }
250
288
 
289
+ /** Fetch tool catalog from the local OpenClaw gateway and advertise to peers. */
290
+ private async fetchToolCatalog() {
291
+ const { spawnProcess } = await import("./compat.ts");
292
+ const proc = spawnProcess(
293
+ ["openclaw", "gateway", "call", "tools.catalog", "--json", "--params", '{"includePlugins":true}'],
294
+ { stdout: "pipe", stderr: "pipe" },
295
+ );
296
+ const chunks: Uint8Array[] = [];
297
+ if (proc.stdout) {
298
+ const reader = proc.stdout.getReader();
299
+ while (true) {
300
+ const { done, value } = await reader.read();
301
+ if (done) break;
302
+ chunks.push(value);
303
+ }
304
+ }
305
+ const code = await proc.exited;
306
+ if (code !== 0) return;
307
+
308
+ const stdout = Buffer.concat(chunks).toString("utf-8").trim();
309
+ if (!stdout) return;
310
+ // stdout may contain non-JSON log lines (e.g. "[plugins] ...") before the actual JSON.
311
+ // Extract the first JSON object from the output.
312
+ const jsonStart = stdout.indexOf("{");
313
+ if (jsonStart < 0) return;
314
+ const data = JSON.parse(stdout.slice(jsonStart)) as {
315
+ groups?: Array<{
316
+ tools: Array<{ id: string; label: string; description: string }>;
317
+ }>;
318
+ };
319
+ if (!data.groups) return;
320
+
321
+ const allowSet = new Set(this.config.toolProxy?.allow ?? []);
322
+ const isWildcard = allowSet.has("*") || allowSet.size === 0;
323
+ const denySet = new Set(this.config.toolProxy?.deny ?? []);
324
+ const catalog: import("./types.ts").ToolCatalogEntry[] = [];
325
+ for (const group of data.groups) {
326
+ for (const tool of group.tools) {
327
+ if (denySet.has(tool.id)) continue;
328
+ if (!isWildcard && !allowSet.has(tool.id)) continue;
329
+ // Skip clawmatrix's own cluster_ tools (they're the invoker, not the invokee)
330
+ if (tool.id.startsWith("cluster_")) continue;
331
+ catalog.push({
332
+ name: tool.id,
333
+ description: tool.description,
334
+ });
335
+ }
336
+ }
337
+
338
+ if (catalog.length > 0) {
339
+ this.peerManager.updateToolCatalog(catalog);
340
+ this.logger.info(`[clawmatrix] Tool catalog: ${catalog.length} tool(s) advertised to peers`);
341
+ }
342
+ }
343
+
251
344
  private resolveWorkspacePath(): string | null {
252
345
  // Read workspace from OpenClaw agent config (first agent or default agent)
253
346
  const agents = (this.openclawConfig as Record<string, unknown>).agents as
@@ -338,6 +431,20 @@ export class ClusterRuntime {
338
431
  case "health_sync":
339
432
  this.healthTracker.handleSyncMessage(frame as HealthSyncFrame);
340
433
  break;
434
+ case "availability_req": {
435
+ const af = frame as AvailabilityRequest;
436
+ const range = af.payload.range ?? "24h";
437
+ const data = this.healthTracker.getAvailability(range);
438
+ this.peerManager.sendTo(af.from, {
439
+ type: "availability_res",
440
+ id: af.id,
441
+ from: this.config.nodeId,
442
+ to: af.from,
443
+ timestamp: Date.now(),
444
+ payload: { success: true, data },
445
+ } as AvailabilityResponse);
446
+ break;
447
+ }
341
448
  case "acp_req":
342
449
  if (this.acpProxy) {
343
450
  this.acpProxy.handleRequest(frame as AcpTaskRequest).catch((err) => {
@@ -464,6 +571,29 @@ export class ClusterRuntime {
464
571
  case "acp_get_modes_res":
465
572
  this.acpProxy?.handleGetModesResponse(frame as AcpGetModesResponse);
466
573
  break;
574
+ case "acp_set_config":
575
+ if (this.acpProxy) {
576
+ this.acpProxy.handleSetConfigRequest(frame as import("./types.ts").AcpSetConfigRequest).catch((err) => {
577
+ this.logger.error(`[clawmatrix] ACP set config error: ${err}`);
578
+ });
579
+ }
580
+ break;
581
+ case "acp_set_config_res":
582
+ this.acpProxy?.handleSetConfigResponse(frame as import("./types.ts").AcpSetConfigResponse);
583
+ break;
584
+ case "acp_subscribe":
585
+ if (this.acpProxy) {
586
+ this.acpProxy.handleSubscribeRequest(frame as import("./types.ts").AcpSubscribeRequest).catch((err) => {
587
+ this.logger.error(`[clawmatrix] ACP subscribe error: ${err}`);
588
+ });
589
+ }
590
+ break;
591
+ case "acp_unsubscribe":
592
+ this.acpProxy?.handleUnsubscribeRequest(frame as import("./types.ts").AcpUnsubscribeRequest);
593
+ break;
594
+ case "acp_session_notify":
595
+ // Outbound notification — no server-side handling needed.
596
+ break;
467
597
  case "chat_history_req":
468
598
  if (this.acpProxy) {
469
599
  this.acpProxy.handleChatHistoryRequest(frame as ChatHistoryRequest).catch((err) => {
@@ -494,18 +624,32 @@ export class ClusterRuntime {
494
624
  frame as TerminalOpenRequest | TerminalOpenResponse | TerminalData | TerminalResize | TerminalCloseRequest | TerminalCloseResponse,
495
625
  );
496
626
  break;
627
+ case "file_transfer_init":
628
+ this.fileTransferManager?.handleInit(frame as FileTransferInit).catch((err) => {
629
+ this.logger.error(`[clawmatrix] File transfer init error: ${err}`);
630
+ });
631
+ break;
632
+ case "file_transfer_ack":
633
+ this.fileTransferManager?.handleAck(frame as FileTransferAck);
634
+ break;
635
+ case "file_transfer_chunk":
636
+ this.fileTransferManager?.handleChunk(frame as FileTransferChunk);
637
+ break;
638
+ case "file_transfer_chunk_ack":
639
+ this.fileTransferManager?.handleChunkAck(frame as FileTransferChunkAck);
640
+ break;
641
+ case "file_transfer_complete":
642
+ this.fileTransferManager?.handleComplete(frame as FileTransferComplete);
643
+ break;
497
644
  }
498
645
  }
499
646
 
500
647
  private handleSendMessage(frame: SendMessage) {
501
648
  // Inject message into local agent session via openclaw CLI
502
649
  const { target, message } = frame.payload;
503
- const agent = this.config.agents.find((a) => {
504
- if (target.startsWith("tags:")) {
505
- return a.tags.includes(target.slice(5));
506
- }
507
- return a.id === target;
508
- });
650
+ const agent = target.startsWith("tags:")
651
+ ? this.agentsByTag.get(target.slice(5))
652
+ : this.agentById.get(target);
509
653
 
510
654
  if (!agent) {
511
655
  this.logger.warn(
@@ -543,9 +687,9 @@ export function createClusterService(
543
687
  ): OpenClawPluginService {
544
688
  return {
545
689
  id: "clawmatrix",
546
- start(ctx: OpenClawPluginServiceContext) {
690
+ async start(ctx: OpenClawPluginServiceContext) {
547
691
  clusterRuntime = new ClusterRuntime(config, ctx.logger, openclawConfig, openclawVersion);
548
- clusterRuntime.start();
692
+ await clusterRuntime.start();
549
693
  onStarted?.();
550
694
  },
551
695
  async stop() {
package/src/compat.ts CHANGED
@@ -8,12 +8,6 @@ import { spawn as cpSpawn } from "node:child_process";
8
8
  import { open, readFile, stat, writeFile } from "node:fs/promises";
9
9
  import { createRequire } from "node:module";
10
10
 
11
- export interface SpawnResult {
12
- exitCode: number;
13
- stdout: string;
14
- stderr: string;
15
- }
16
-
17
11
  /** Spawn a subprocess and collect stdout/stderr. */
18
12
  export function spawnProcess(
19
13
  cmd: string[],
package/src/config.ts CHANGED
@@ -134,6 +134,14 @@ const TerminalConfigSchema = z.object({
134
134
  allowFrom: z.array(z.string()).default([]),
135
135
  }).optional();
136
136
 
137
+ const FileTransferConfigSchema = z.object({
138
+ enabled: z.boolean().default(false),
139
+ chunkSize: z.number().default(262_144), // 256KB
140
+ maxFileSize: z.number().default(104_857_600), // 100MB
141
+ timeout: z.number().default(300_000), // 5min per-chunk
142
+ allowedPaths: z.array(z.string()).default([]), // empty = no restriction
143
+ }).optional();
144
+
137
145
  const AcpConfigSchema = z.object({
138
146
  enabled: z.boolean().default(false),
139
147
  /** ACP agents available on this node. Advertised to peers via capabilities. */
@@ -153,22 +161,26 @@ const RawClawMatrixConfigSchema = z.object({
153
161
  secret: z.string().min(16, "secret must be at least 16 characters"),
154
162
  listen: z.boolean().default(false),
155
163
  listenHost: z.string().default("0.0.0.0"),
156
- listenPort: z.number().default(0),
164
+ listenPort: z.number().int().min(0).max(65535).default(0),
157
165
  peers: z.array(PeerConfigSchema).default([]),
158
166
  agents: z.array(AgentInfoSchema).default([]),
159
167
  models: z.array(ModelInfoSchema).default([]),
160
168
  proxyModels: z.array(ProxyModelGroupSchema).default([]),
161
169
  tags: z.array(z.string()).default([]),
162
- proxyPort: z.number().default(0),
170
+ proxyPort: z.number().int().min(0).max(65535).default(0),
163
171
  toolProxy: ToolProxyConfigSchema.optional(),
164
- handoffTimeout: z.number().default(600_000),
165
- modelTimeout: z.number().default(120_000),
166
- toolTimeout: z.number().default(30_000),
172
+ handoffTimeout: z.number().positive().default(600_000),
173
+ modelTimeout: z.number().positive().default(120_000),
174
+ toolTimeout: z.number().positive().default(30_000),
175
+ /** Grace period (ms) before broadcasting peer_leave after disconnect.
176
+ * Allows brief reconnections (WiFi/cellular handoff) to be invisible to the mesh. */
177
+ disconnectGrace: z.number().nonnegative().default(30_000),
167
178
  sentinel: SentinelConfigSchema,
168
179
  web: WebConfigSchema,
169
180
  knowledge: KnowledgeConfigSchema,
170
181
  terminal: TerminalConfigSchema,
171
182
  acp: AcpConfigSchema,
183
+ fileTransfer: FileTransferConfigSchema,
172
184
  peerApproval: z.union([
173
185
  z.boolean(), // true = required mode, false = disabled
174
186
  PeerApprovalConfigSchema,
package/src/connection.ts CHANGED
@@ -485,70 +485,76 @@ export class Connection extends EventEmitter<ConnectionEvents> {
485
485
 
486
486
  private startHeartbeat() {
487
487
  this.lastReceivedAt = Date.now();
488
- const scheduleNext = () => {
489
- const interval = HEARTBEAT_BASE + Math.random() * HEARTBEAT_JITTER;
490
- this.heartbeatTimer = setTimeout(() => {
491
- if (this.closed) return;
492
-
493
- // Watchdog: if no data received for a long time, the connection is dead
494
- // regardless of what the heartbeat ping/pong state says.
495
- const silenceMs = Date.now() - this.lastReceivedAt;
496
- if (this.lastReceivedAt > 0 && silenceMs > Connection.RECEIVE_TIMEOUT) {
497
- debug("heartbeat", `No data received for ${Math.round(silenceMs / 1000)}s from ${this.remoteNodeId ?? "unknown"}, closing`);
498
- this.close(4002, "receive timeout");
499
- return;
500
- }
488
+ this.scheduleHeartbeatTick();
489
+ }
501
490
 
502
- // Increment before checking: this ping is about to be sent and
503
- // counts as outstanding until a pong arrives.
504
- this.missedPongs++;
505
- if (this.missedPongs >= HEARTBEAT_TIMEOUT_COUNT) {
506
- debug("heartbeat", `${HEARTBEAT_TIMEOUT_COUNT} missed pongs from ${this.remoteNodeId ?? "unknown"}, closing`);
507
- this.close(4002, "heartbeat timeout");
508
- return;
509
- }
491
+ private scheduleHeartbeatTick() {
492
+ const interval = HEARTBEAT_BASE + Math.random() * HEARTBEAT_JITTER;
493
+ this.heartbeatTimer = setTimeout(this.heartbeatTick, interval);
494
+ }
510
495
 
511
- // Send ping wrapped in try-catch to prevent breaking the heartbeat chain.
512
- // If send fails, the connection is dead; close it.
513
- try {
514
- if (this.transport.readyState !== WebSocket.OPEN) {
515
- debug("heartbeat", `Transport not open (state=${this.transport.readyState}) for ${this.remoteNodeId ?? "unknown"}, closing`);
516
- this.close(4002, "transport closed");
517
- return;
518
- }
519
- this.lastPingSentAt = Date.now();
520
- this.send({
521
- type: "ping",
522
- from: this.nodeId,
523
- timestamp: this.lastPingSentAt,
524
- } as AnyClusterFrame);
525
- } catch (err) {
526
- debug("heartbeat", `Ping send failed for ${this.remoteNodeId ?? "unknown"}: ${err}`);
527
- this.close(4002, "ping send failed");
528
- return;
529
- }
496
+ private heartbeatTick = () => {
497
+ if (this.closed) return;
530
498
 
531
- scheduleNext();
532
- }, interval);
533
- };
534
- scheduleNext();
535
- }
499
+ // Watchdog: if no data received for a long time, the connection is dead
500
+ // regardless of what the heartbeat ping/pong state says.
501
+ const silenceMs = Date.now() - this.lastReceivedAt;
502
+ if (this.lastReceivedAt > 0 && silenceMs > Connection.RECEIVE_TIMEOUT) {
503
+ debug("heartbeat", `No data received for ${Math.round(silenceMs / 1000)}s from ${this.remoteNodeId ?? "unknown"}, closing`);
504
+ this.close(4002, "receive timeout");
505
+ return;
506
+ }
507
+
508
+ // Increment before checking: this ping is about to be sent and
509
+ // counts as outstanding until a pong arrives.
510
+ this.missedPongs++;
511
+ if (this.missedPongs >= HEARTBEAT_TIMEOUT_COUNT) {
512
+ debug("heartbeat", `${HEARTBEAT_TIMEOUT_COUNT} missed pongs from ${this.remoteNodeId ?? "unknown"}, closing`);
513
+ this.close(4002, "heartbeat timeout");
514
+ return;
515
+ }
516
+
517
+ // Send ping — wrapped in try-catch to prevent breaking the heartbeat chain.
518
+ // If send fails, the connection is dead; close it.
519
+ try {
520
+ if (this.transport.readyState !== WebSocket.OPEN) {
521
+ debug("heartbeat", `Transport not open (state=${this.transport.readyState}) for ${this.remoteNodeId ?? "unknown"}, closing`);
522
+ this.close(4002, "transport closed");
523
+ return;
524
+ }
525
+ this.lastPingSentAt = Date.now();
526
+ this.send({
527
+ type: "ping",
528
+ from: this.nodeId,
529
+ timestamp: this.lastPingSentAt,
530
+ } as AnyClusterFrame);
531
+ } catch (err) {
532
+ debug("heartbeat", `Ping send failed for ${this.remoteNodeId ?? "unknown"}: ${err}`);
533
+ this.close(4002, "ping send failed");
534
+ return;
535
+ }
536
+
537
+ this.scheduleHeartbeatTick();
538
+ };
536
539
 
537
540
  // ── Dummy traffic (breaks heartbeat timing pattern) ────────────
538
541
  private startDummyTraffic() {
539
542
  if (!this.sessionKey) return;
540
- const scheduleNext = () => {
541
- // Random interval 2-8 seconds — interleaves with heartbeat to obscure pattern
542
- const interval = 2_000 + Math.random() * 6_000;
543
- this.dummyTimer = setTimeout(() => {
544
- if (this.closed || !this.sessionKey) return;
545
- this.send({ type: "_d", from: "", timestamp: 0 } as unknown as AnyClusterFrame);
546
- scheduleNext();
547
- }, interval);
548
- };
549
- scheduleNext();
543
+ this.scheduleDummyTick();
550
544
  }
551
545
 
546
+ private scheduleDummyTick() {
547
+ // Random interval 2-8 seconds — interleaves with heartbeat to obscure pattern
548
+ const interval = 2_000 + Math.random() * 6_000;
549
+ this.dummyTimer = setTimeout(this.dummyTick, interval);
550
+ }
551
+
552
+ private dummyTick = () => {
553
+ if (this.closed || !this.sessionKey) return;
554
+ this.send({ type: "_d", from: "", timestamp: 0 } as unknown as AnyClusterFrame);
555
+ this.scheduleDummyTick();
556
+ };
557
+
552
558
  // ── Cleanup ────────────────────────────────────────────────────
553
559
  close(code = 1000, reason = "normal") {
554
560
  if (this.closed) return;
@@ -23,11 +23,6 @@ export function allocPort(): number {
23
23
  return nextPort++;
24
24
  }
25
25
 
26
- /** Reset port counter (call in afterAll if tests run in a loop). */
27
- export function resetPorts(start = 19500): void {
28
- nextPort = start;
29
- }
30
-
31
26
  // ── Logger ──────────────────────────────────────────────────────────
32
27
 
33
28
  /** Minimal no-op logger satisfying PluginLogger. */
@@ -110,6 +105,7 @@ function buildConfig(options: TestNodeOptions): ClawMatrixConfig {
110
105
  handoffTimeout: options.handoffTimeout ?? 600_000,
111
106
  modelTimeout: options.modelTimeout ?? 120_000,
112
107
  toolTimeout: options.toolTimeout ?? 30_000,
108
+ disconnectGrace: 0,
113
109
  peerApproval: {
114
110
  enabled: false,
115
111
  mode: "notify",