clawmatrix 0.2.11 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +27 -0
- package/README.md +123 -12
- package/cli/bin/clawmatrix.mjs +1006 -0
- package/cli/package.json +27 -0
- package/cli/skills/clawmatrix/SKILL.md +104 -0
- package/openclaw.plugin.json +1 -0
- package/package.json +3 -1
- package/src/acp-proxy.ts +820 -96
- package/src/cluster-service.ts +186 -16
- package/src/compat.ts +0 -6
- package/src/config.ts +8 -5
- package/src/connection.ts +61 -55
- package/src/e2e/helpers.ts +1 -5
- package/src/file-transfer.ts +64 -14
- package/src/handoff.ts +21 -8
- package/src/health-tracker.ts +40 -11
- package/src/index.ts +686 -14
- package/src/knowledge-sync.ts +62 -10
- package/src/model-proxy.ts +40 -10
- package/src/peer-manager.ts +114 -17
- package/src/rate-limiter.ts +16 -10
- package/src/router.ts +115 -33
- package/src/sentinel-manager.ts +51 -0
- package/src/sentinel.ts +13 -3
- package/src/tool-proxy.ts +52 -6
- package/src/tools/cluster-diagnostic.ts +3 -2
- package/src/tools/cluster-edit.ts +2 -1
- package/src/tools/cluster-events.ts +3 -1
- package/src/tools/cluster-exec.ts +2 -0
- package/src/tools/cluster-handoff.ts +3 -1
- package/src/tools/cluster-notify.ts +132 -0
- package/src/tools/cluster-peers.ts +3 -1
- package/src/tools/cluster-read.ts +4 -1
- package/src/tools/cluster-send.ts +2 -1
- package/src/tools/cluster-terminal.ts +4 -7
- package/src/tools/cluster-tool.ts +2 -2
- package/src/tools/cluster-write.ts +3 -1
- package/src/types.ts +103 -1
- package/src/web.ts +2 -10
- package/src/cli.ts +0 -243
- package/src/web-ui.ts +0 -1622
package/src/cluster-service.ts
CHANGED
|
@@ -102,6 +102,11 @@ export class ClusterRuntime {
|
|
|
102
102
|
private logger: PluginLogger;
|
|
103
103
|
private openclawConfig: OpenClawConfig;
|
|
104
104
|
private exitHandler: (() => void) | null = null;
|
|
105
|
+
// Pre-built indexes for O(1) local agent lookup
|
|
106
|
+
private agentById = new Map<string, ClawMatrixConfig["agents"][number]>();
|
|
107
|
+
private agentsByTag = new Map<string, ClawMatrixConfig["agents"][number]>();
|
|
108
|
+
/** Track known relay peers to record health events on discovery/removal. */
|
|
109
|
+
private knownRelayPeers = new Set<string>();
|
|
105
110
|
|
|
106
111
|
constructor(config: ClawMatrixConfig, logger: PluginLogger, openclawConfig: OpenClawConfig, openclawVersion?: string) {
|
|
107
112
|
this.config = config;
|
|
@@ -123,9 +128,14 @@ export class ClusterRuntime {
|
|
|
123
128
|
nodeId: config.nodeId,
|
|
124
129
|
peerManager: this.peerManager,
|
|
125
130
|
});
|
|
131
|
+
// Build agent indexes
|
|
132
|
+
for (const a of config.agents) {
|
|
133
|
+
this.agentById.set(a.id, a);
|
|
134
|
+
for (const t of a.tags) this.agentsByTag.set(t, a);
|
|
135
|
+
}
|
|
126
136
|
}
|
|
127
137
|
|
|
128
|
-
start() {
|
|
138
|
+
async start() {
|
|
129
139
|
// Wire up frame dispatch
|
|
130
140
|
this.peerManager.on("frame", (frame) => {
|
|
131
141
|
this.dispatchFrame(frame);
|
|
@@ -147,6 +157,7 @@ export class ClusterRuntime {
|
|
|
147
157
|
|
|
148
158
|
this.peerManager.on("peerCapabilitiesChanged", () => {
|
|
149
159
|
this.refreshDiscoveredModels();
|
|
160
|
+
this.trackRelayPeerHealth();
|
|
150
161
|
});
|
|
151
162
|
|
|
152
163
|
// Web dashboard (must be set before peerManager.start() creates the HTTP server)
|
|
@@ -207,14 +218,31 @@ export class ClusterRuntime {
|
|
|
207
218
|
this.logger.error(`[clawmatrix] Health tracker failed to start: ${err}`);
|
|
208
219
|
});
|
|
209
220
|
|
|
210
|
-
// Start subsystems
|
|
211
|
-
this.peerManager.start();
|
|
212
|
-
this.modelProxy.start();
|
|
213
|
-
|
|
214
221
|
// Sentinel: detached subprocess for diagnostics when gateway dies.
|
|
215
222
|
// Default on; starts when not explicitly disabled AND (has outbound peers OR gateway is a listener for port takeover)
|
|
216
|
-
|
|
223
|
+
const sentinelEnabled = (this.config.sentinel?.enabled ?? true) && (this.config.peers.length > 0 || this.config.listen);
|
|
224
|
+
if (sentinelEnabled) {
|
|
217
225
|
this.sentinelManager = new SentinelManager(this.config);
|
|
226
|
+
// Kill old sentinel and wait for the listen port to be released
|
|
227
|
+
// before PeerManager tries to bind it.
|
|
228
|
+
await this.sentinelManager.ensurePortFree();
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
// Start subsystems (port is now guaranteed free)
|
|
232
|
+
this.peerManager.start();
|
|
233
|
+
this.modelProxy.start();
|
|
234
|
+
|
|
235
|
+
// Fetch tool catalog from the local gateway (non-blocking).
|
|
236
|
+
// The catalog (tool names, descriptions) is advertised to peers via peer_sync
|
|
237
|
+
// so remote LLM callers can discover available tools.
|
|
238
|
+
if (this.config.toolProxy?.enabled) {
|
|
239
|
+
this.fetchToolCatalog().catch((err) => {
|
|
240
|
+
this.logger.warn(`[clawmatrix] Tool catalog fetch failed (non-fatal): ${err}`);
|
|
241
|
+
});
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
// Spawn the new sentinel after PeerManager is listening
|
|
245
|
+
if (sentinelEnabled && this.sentinelManager) {
|
|
218
246
|
this.sentinelManager.start();
|
|
219
247
|
this.logger.info(`[clawmatrix] Sentinel started for node "${this.config.nodeId}"`);
|
|
220
248
|
}
|
|
@@ -243,8 +271,33 @@ export class ClusterRuntime {
|
|
|
243
271
|
// NOTE: intentionally do NOT stop sentinel here.
|
|
244
272
|
// Sentinel must survive gateway shutdown — that's its entire purpose.
|
|
245
273
|
// It will be replaced by killOldSentinel() on next gateway start.
|
|
274
|
+
|
|
275
|
+
// Wrap all async shutdown in a 4s timeout to prevent blocking gateway restart.
|
|
276
|
+
// OpenClaw's force-exit timer is 5s for stop, so we must finish before that.
|
|
277
|
+
const STOP_TIMEOUT_MS = 4000;
|
|
278
|
+
let timer: ReturnType<typeof setTimeout> | undefined;
|
|
279
|
+
await Promise.race([
|
|
280
|
+
this.stopInternal().then(() => { clearTimeout(timer); }),
|
|
281
|
+
new Promise<void>((resolve) => {
|
|
282
|
+
timer = setTimeout(() => {
|
|
283
|
+
this.logger.warn("[clawmatrix] Graceful shutdown timed out after 4s, forcing cleanup");
|
|
284
|
+
this.forceCleanup();
|
|
285
|
+
resolve();
|
|
286
|
+
}, STOP_TIMEOUT_MS);
|
|
287
|
+
}),
|
|
288
|
+
]);
|
|
289
|
+
this.logger.info(`[clawmatrix] Node "${this.config.nodeId}" stopped`);
|
|
290
|
+
}
|
|
291
|
+
|
|
292
|
+
private async stopInternal() {
|
|
246
293
|
await this.healthTracker.stop();
|
|
247
294
|
await this.knowledgeSync?.stop();
|
|
295
|
+
this.syncCleanup();
|
|
296
|
+
await this.peerManager.stop();
|
|
297
|
+
}
|
|
298
|
+
|
|
299
|
+
/** Synchronous cleanup that never blocks. */
|
|
300
|
+
private syncCleanup() {
|
|
248
301
|
this.webHandler?.destroy();
|
|
249
302
|
this.handoffManager.destroy();
|
|
250
303
|
this.acpProxy?.destroy();
|
|
@@ -252,8 +305,12 @@ export class ClusterRuntime {
|
|
|
252
305
|
this.modelProxy.stop();
|
|
253
306
|
this.fileTransferManager?.destroy();
|
|
254
307
|
this.toolProxy.destroy();
|
|
255
|
-
|
|
256
|
-
|
|
308
|
+
}
|
|
309
|
+
|
|
310
|
+
/** Emergency cleanup when graceful shutdown times out. */
|
|
311
|
+
private forceCleanup() {
|
|
312
|
+
try { this.syncCleanup(); } catch { /* best effort */ }
|
|
313
|
+
try { this.peerManager.forceStop(); } catch { /* best effort */ }
|
|
257
314
|
}
|
|
258
315
|
|
|
259
316
|
private refreshDiscoveredModels() {
|
|
@@ -261,6 +318,87 @@ export class ClusterRuntime {
|
|
|
261
318
|
this.modelProxy.updateDiscoveredModels(peers);
|
|
262
319
|
}
|
|
263
320
|
|
|
321
|
+
/** Track relay peer health: record peer_online/peer_offline for relay peers. */
|
|
322
|
+
private trackRelayPeerHealth() {
|
|
323
|
+
const currentRelayPeers = new Set<string>();
|
|
324
|
+
for (const peer of this.peerManager.router.getAllPeers()) {
|
|
325
|
+
if (peer.reachableVia) {
|
|
326
|
+
currentRelayPeers.add(peer.nodeId);
|
|
327
|
+
}
|
|
328
|
+
}
|
|
329
|
+
|
|
330
|
+
// New relay peers discovered
|
|
331
|
+
for (const nodeId of currentRelayPeers) {
|
|
332
|
+
if (!this.knownRelayPeers.has(nodeId)) {
|
|
333
|
+
this.healthTracker.recordPeerOnline(nodeId, "relay");
|
|
334
|
+
}
|
|
335
|
+
}
|
|
336
|
+
|
|
337
|
+
// Relay peers that disappeared
|
|
338
|
+
for (const nodeId of this.knownRelayPeers) {
|
|
339
|
+
if (!currentRelayPeers.has(nodeId)) {
|
|
340
|
+
this.healthTracker.recordPeerOffline(nodeId, "relay_route_lost");
|
|
341
|
+
}
|
|
342
|
+
}
|
|
343
|
+
|
|
344
|
+
this.knownRelayPeers = currentRelayPeers;
|
|
345
|
+
}
|
|
346
|
+
|
|
347
|
+
/** Fetch tool catalog from the local OpenClaw gateway and advertise to peers. */
|
|
348
|
+
private async fetchToolCatalog() {
|
|
349
|
+
const { spawnProcess } = await import("./compat.ts");
|
|
350
|
+
const proc = spawnProcess(
|
|
351
|
+
["openclaw", "gateway", "call", "tools.catalog", "--json", "--params", '{"includePlugins":true}'],
|
|
352
|
+
{ stdout: "pipe", stderr: "pipe" },
|
|
353
|
+
);
|
|
354
|
+
const chunks: Uint8Array[] = [];
|
|
355
|
+
if (proc.stdout) {
|
|
356
|
+
const reader = proc.stdout.getReader();
|
|
357
|
+
while (true) {
|
|
358
|
+
const { done, value } = await reader.read();
|
|
359
|
+
if (done) break;
|
|
360
|
+
chunks.push(value);
|
|
361
|
+
}
|
|
362
|
+
}
|
|
363
|
+
const code = await proc.exited;
|
|
364
|
+
if (code !== 0) return;
|
|
365
|
+
|
|
366
|
+
const stdout = Buffer.concat(chunks).toString("utf-8").trim();
|
|
367
|
+
if (!stdout) return;
|
|
368
|
+
// stdout may contain non-JSON log lines (e.g. "[plugins] ...") before the actual JSON.
|
|
369
|
+
// Extract the first JSON object from the output.
|
|
370
|
+
const jsonStart = stdout.indexOf("{");
|
|
371
|
+
if (jsonStart < 0) return;
|
|
372
|
+
const data = JSON.parse(stdout.slice(jsonStart)) as {
|
|
373
|
+
groups?: Array<{
|
|
374
|
+
tools: Array<{ id: string; label: string; description: string }>;
|
|
375
|
+
}>;
|
|
376
|
+
};
|
|
377
|
+
if (!data.groups) return;
|
|
378
|
+
|
|
379
|
+
const allowSet = new Set(this.config.toolProxy?.allow ?? []);
|
|
380
|
+
const isWildcard = allowSet.has("*") || allowSet.size === 0;
|
|
381
|
+
const denySet = new Set(this.config.toolProxy?.deny ?? []);
|
|
382
|
+
const catalog: import("./types.ts").ToolCatalogEntry[] = [];
|
|
383
|
+
for (const group of data.groups) {
|
|
384
|
+
for (const tool of group.tools) {
|
|
385
|
+
if (denySet.has(tool.id)) continue;
|
|
386
|
+
if (!isWildcard && !allowSet.has(tool.id)) continue;
|
|
387
|
+
// Skip clawmatrix's own cluster_ tools (they're the invoker, not the invokee)
|
|
388
|
+
if (tool.id.startsWith("cluster_")) continue;
|
|
389
|
+
catalog.push({
|
|
390
|
+
name: tool.id,
|
|
391
|
+
description: tool.description,
|
|
392
|
+
});
|
|
393
|
+
}
|
|
394
|
+
}
|
|
395
|
+
|
|
396
|
+
if (catalog.length > 0) {
|
|
397
|
+
this.peerManager.updateToolCatalog(catalog);
|
|
398
|
+
this.logger.info(`[clawmatrix] Tool catalog: ${catalog.length} tool(s) advertised to peers`);
|
|
399
|
+
}
|
|
400
|
+
}
|
|
401
|
+
|
|
264
402
|
private resolveWorkspacePath(): string | null {
|
|
265
403
|
// Read workspace from OpenClaw agent config (first agent or default agent)
|
|
266
404
|
const agents = (this.openclawConfig as Record<string, unknown>).agents as
|
|
@@ -491,6 +629,41 @@ export class ClusterRuntime {
|
|
|
491
629
|
case "acp_get_modes_res":
|
|
492
630
|
this.acpProxy?.handleGetModesResponse(frame as AcpGetModesResponse);
|
|
493
631
|
break;
|
|
632
|
+
case "acp_set_config":
|
|
633
|
+
if (this.acpProxy) {
|
|
634
|
+
this.acpProxy.handleSetConfigRequest(frame as import("./types.ts").AcpSetConfigRequest).catch((err) => {
|
|
635
|
+
this.logger.error(`[clawmatrix] ACP set config error: ${err}`);
|
|
636
|
+
});
|
|
637
|
+
} else {
|
|
638
|
+
const cf = frame as import("./types.ts").AcpSetConfigRequest;
|
|
639
|
+
this.peerManager.sendTo(cf.from, {
|
|
640
|
+
type: "acp_set_config_res", id: cf.id, from: this.config.nodeId, to: cf.from,
|
|
641
|
+
timestamp: Date.now(), payload: { success: false, error: "ACP not enabled on this node" },
|
|
642
|
+
} as import("./types.ts").AcpSetConfigResponse);
|
|
643
|
+
}
|
|
644
|
+
break;
|
|
645
|
+
case "acp_set_config_res":
|
|
646
|
+
this.acpProxy?.handleSetConfigResponse(frame as import("./types.ts").AcpSetConfigResponse);
|
|
647
|
+
break;
|
|
648
|
+
case "acp_subscribe":
|
|
649
|
+
if (this.acpProxy) {
|
|
650
|
+
this.acpProxy.handleSubscribeRequest(frame as import("./types.ts").AcpSubscribeRequest).catch((err) => {
|
|
651
|
+
this.logger.error(`[clawmatrix] ACP subscribe error: ${err}`);
|
|
652
|
+
});
|
|
653
|
+
} else {
|
|
654
|
+
const sf = frame as import("./types.ts").AcpSubscribeRequest;
|
|
655
|
+
this.peerManager.sendTo(sf.from, {
|
|
656
|
+
type: "acp_subscribe_res", id: sf.id, from: this.config.nodeId, to: sf.from,
|
|
657
|
+
timestamp: Date.now(), payload: { success: false, error: "ACP not enabled on this node" },
|
|
658
|
+
} as import("./types.ts").AcpSubscribeResponse);
|
|
659
|
+
}
|
|
660
|
+
break;
|
|
661
|
+
case "acp_unsubscribe":
|
|
662
|
+
this.acpProxy?.handleUnsubscribeRequest(frame as import("./types.ts").AcpUnsubscribeRequest);
|
|
663
|
+
break;
|
|
664
|
+
case "acp_session_notify":
|
|
665
|
+
// Outbound notification — no server-side handling needed.
|
|
666
|
+
break;
|
|
494
667
|
case "chat_history_req":
|
|
495
668
|
if (this.acpProxy) {
|
|
496
669
|
this.acpProxy.handleChatHistoryRequest(frame as ChatHistoryRequest).catch((err) => {
|
|
@@ -544,12 +717,9 @@ export class ClusterRuntime {
|
|
|
544
717
|
private handleSendMessage(frame: SendMessage) {
|
|
545
718
|
// Inject message into local agent session via openclaw CLI
|
|
546
719
|
const { target, message } = frame.payload;
|
|
547
|
-
const agent =
|
|
548
|
-
|
|
549
|
-
|
|
550
|
-
}
|
|
551
|
-
return a.id === target;
|
|
552
|
-
});
|
|
720
|
+
const agent = target.startsWith("tags:")
|
|
721
|
+
? this.agentsByTag.get(target.slice(5))
|
|
722
|
+
: this.agentById.get(target);
|
|
553
723
|
|
|
554
724
|
if (!agent) {
|
|
555
725
|
this.logger.warn(
|
|
@@ -587,9 +757,9 @@ export function createClusterService(
|
|
|
587
757
|
): OpenClawPluginService {
|
|
588
758
|
return {
|
|
589
759
|
id: "clawmatrix",
|
|
590
|
-
start(ctx: OpenClawPluginServiceContext) {
|
|
760
|
+
async start(ctx: OpenClawPluginServiceContext) {
|
|
591
761
|
clusterRuntime = new ClusterRuntime(config, ctx.logger, openclawConfig, openclawVersion);
|
|
592
|
-
clusterRuntime.start();
|
|
762
|
+
await clusterRuntime.start();
|
|
593
763
|
onStarted?.();
|
|
594
764
|
},
|
|
595
765
|
async stop() {
|
package/src/compat.ts
CHANGED
|
@@ -8,12 +8,6 @@ import { spawn as cpSpawn } from "node:child_process";
|
|
|
8
8
|
import { open, readFile, stat, writeFile } from "node:fs/promises";
|
|
9
9
|
import { createRequire } from "node:module";
|
|
10
10
|
|
|
11
|
-
export interface SpawnResult {
|
|
12
|
-
exitCode: number;
|
|
13
|
-
stdout: string;
|
|
14
|
-
stderr: string;
|
|
15
|
-
}
|
|
16
|
-
|
|
17
11
|
/** Spawn a subprocess and collect stdout/stderr. */
|
|
18
12
|
export function spawnProcess(
|
|
19
13
|
cmd: string[],
|
package/src/config.ts
CHANGED
|
@@ -161,17 +161,20 @@ const RawClawMatrixConfigSchema = z.object({
|
|
|
161
161
|
secret: z.string().min(16, "secret must be at least 16 characters"),
|
|
162
162
|
listen: z.boolean().default(false),
|
|
163
163
|
listenHost: z.string().default("0.0.0.0"),
|
|
164
|
-
listenPort: z.number().default(0),
|
|
164
|
+
listenPort: z.number().int().min(0).max(65535).default(0),
|
|
165
165
|
peers: z.array(PeerConfigSchema).default([]),
|
|
166
166
|
agents: z.array(AgentInfoSchema).default([]),
|
|
167
167
|
models: z.array(ModelInfoSchema).default([]),
|
|
168
168
|
proxyModels: z.array(ProxyModelGroupSchema).default([]),
|
|
169
169
|
tags: z.array(z.string()).default([]),
|
|
170
|
-
proxyPort: z.number().default(0),
|
|
170
|
+
proxyPort: z.number().int().min(0).max(65535).default(0),
|
|
171
171
|
toolProxy: ToolProxyConfigSchema.optional(),
|
|
172
|
-
handoffTimeout: z.number().default(600_000),
|
|
173
|
-
modelTimeout: z.number().default(120_000),
|
|
174
|
-
toolTimeout: z.number().default(30_000),
|
|
172
|
+
handoffTimeout: z.number().positive().default(600_000),
|
|
173
|
+
modelTimeout: z.number().positive().default(120_000),
|
|
174
|
+
toolTimeout: z.number().positive().default(30_000),
|
|
175
|
+
/** Grace period (ms) before broadcasting peer_leave after disconnect.
|
|
176
|
+
* Allows brief reconnections (WiFi/cellular handoff) to be invisible to the mesh. */
|
|
177
|
+
disconnectGrace: z.number().nonnegative().default(30_000),
|
|
175
178
|
sentinel: SentinelConfigSchema,
|
|
176
179
|
web: WebConfigSchema,
|
|
177
180
|
knowledge: KnowledgeConfigSchema,
|
package/src/connection.ts
CHANGED
|
@@ -485,70 +485,76 @@ export class Connection extends EventEmitter<ConnectionEvents> {
|
|
|
485
485
|
|
|
486
486
|
private startHeartbeat() {
|
|
487
487
|
this.lastReceivedAt = Date.now();
|
|
488
|
-
|
|
489
|
-
|
|
490
|
-
this.heartbeatTimer = setTimeout(() => {
|
|
491
|
-
if (this.closed) return;
|
|
492
|
-
|
|
493
|
-
// Watchdog: if no data received for a long time, the connection is dead
|
|
494
|
-
// regardless of what the heartbeat ping/pong state says.
|
|
495
|
-
const silenceMs = Date.now() - this.lastReceivedAt;
|
|
496
|
-
if (this.lastReceivedAt > 0 && silenceMs > Connection.RECEIVE_TIMEOUT) {
|
|
497
|
-
debug("heartbeat", `No data received for ${Math.round(silenceMs / 1000)}s from ${this.remoteNodeId ?? "unknown"}, closing`);
|
|
498
|
-
this.close(4002, "receive timeout");
|
|
499
|
-
return;
|
|
500
|
-
}
|
|
488
|
+
this.scheduleHeartbeatTick();
|
|
489
|
+
}
|
|
501
490
|
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
|
|
506
|
-
debug("heartbeat", `${HEARTBEAT_TIMEOUT_COUNT} missed pongs from ${this.remoteNodeId ?? "unknown"}, closing`);
|
|
507
|
-
this.close(4002, "heartbeat timeout");
|
|
508
|
-
return;
|
|
509
|
-
}
|
|
491
|
+
private scheduleHeartbeatTick() {
|
|
492
|
+
const interval = HEARTBEAT_BASE + Math.random() * HEARTBEAT_JITTER;
|
|
493
|
+
this.heartbeatTimer = setTimeout(this.heartbeatTick, interval);
|
|
494
|
+
}
|
|
510
495
|
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
try {
|
|
514
|
-
if (this.transport.readyState !== WebSocket.OPEN) {
|
|
515
|
-
debug("heartbeat", `Transport not open (state=${this.transport.readyState}) for ${this.remoteNodeId ?? "unknown"}, closing`);
|
|
516
|
-
this.close(4002, "transport closed");
|
|
517
|
-
return;
|
|
518
|
-
}
|
|
519
|
-
this.lastPingSentAt = Date.now();
|
|
520
|
-
this.send({
|
|
521
|
-
type: "ping",
|
|
522
|
-
from: this.nodeId,
|
|
523
|
-
timestamp: this.lastPingSentAt,
|
|
524
|
-
} as AnyClusterFrame);
|
|
525
|
-
} catch (err) {
|
|
526
|
-
debug("heartbeat", `Ping send failed for ${this.remoteNodeId ?? "unknown"}: ${err}`);
|
|
527
|
-
this.close(4002, "ping send failed");
|
|
528
|
-
return;
|
|
529
|
-
}
|
|
496
|
+
private heartbeatTick = () => {
|
|
497
|
+
if (this.closed) return;
|
|
530
498
|
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
|
|
535
|
-
|
|
499
|
+
// Watchdog: if no data received for a long time, the connection is dead
|
|
500
|
+
// regardless of what the heartbeat ping/pong state says.
|
|
501
|
+
const silenceMs = Date.now() - this.lastReceivedAt;
|
|
502
|
+
if (this.lastReceivedAt > 0 && silenceMs > Connection.RECEIVE_TIMEOUT) {
|
|
503
|
+
debug("heartbeat", `No data received for ${Math.round(silenceMs / 1000)}s from ${this.remoteNodeId ?? "unknown"}, closing`);
|
|
504
|
+
this.close(4002, "receive timeout");
|
|
505
|
+
return;
|
|
506
|
+
}
|
|
507
|
+
|
|
508
|
+
// Increment before checking: this ping is about to be sent and
|
|
509
|
+
// counts as outstanding until a pong arrives.
|
|
510
|
+
this.missedPongs++;
|
|
511
|
+
if (this.missedPongs >= HEARTBEAT_TIMEOUT_COUNT) {
|
|
512
|
+
debug("heartbeat", `${HEARTBEAT_TIMEOUT_COUNT} missed pongs from ${this.remoteNodeId ?? "unknown"}, closing`);
|
|
513
|
+
this.close(4002, "heartbeat timeout");
|
|
514
|
+
return;
|
|
515
|
+
}
|
|
516
|
+
|
|
517
|
+
// Send ping — wrapped in try-catch to prevent breaking the heartbeat chain.
|
|
518
|
+
// If send fails, the connection is dead; close it.
|
|
519
|
+
try {
|
|
520
|
+
if (this.transport.readyState !== WebSocket.OPEN) {
|
|
521
|
+
debug("heartbeat", `Transport not open (state=${this.transport.readyState}) for ${this.remoteNodeId ?? "unknown"}, closing`);
|
|
522
|
+
this.close(4002, "transport closed");
|
|
523
|
+
return;
|
|
524
|
+
}
|
|
525
|
+
this.lastPingSentAt = Date.now();
|
|
526
|
+
this.send({
|
|
527
|
+
type: "ping",
|
|
528
|
+
from: this.nodeId,
|
|
529
|
+
timestamp: this.lastPingSentAt,
|
|
530
|
+
} as AnyClusterFrame);
|
|
531
|
+
} catch (err) {
|
|
532
|
+
debug("heartbeat", `Ping send failed for ${this.remoteNodeId ?? "unknown"}: ${err}`);
|
|
533
|
+
this.close(4002, "ping send failed");
|
|
534
|
+
return;
|
|
535
|
+
}
|
|
536
|
+
|
|
537
|
+
this.scheduleHeartbeatTick();
|
|
538
|
+
};
|
|
536
539
|
|
|
537
540
|
// ── Dummy traffic (breaks heartbeat timing pattern) ────────────
|
|
538
541
|
private startDummyTraffic() {
|
|
539
542
|
if (!this.sessionKey) return;
|
|
540
|
-
|
|
541
|
-
// Random interval 2-8 seconds — interleaves with heartbeat to obscure pattern
|
|
542
|
-
const interval = 2_000 + Math.random() * 6_000;
|
|
543
|
-
this.dummyTimer = setTimeout(() => {
|
|
544
|
-
if (this.closed || !this.sessionKey) return;
|
|
545
|
-
this.send({ type: "_d", from: "", timestamp: 0 } as unknown as AnyClusterFrame);
|
|
546
|
-
scheduleNext();
|
|
547
|
-
}, interval);
|
|
548
|
-
};
|
|
549
|
-
scheduleNext();
|
|
543
|
+
this.scheduleDummyTick();
|
|
550
544
|
}
|
|
551
545
|
|
|
546
|
+
private scheduleDummyTick() {
|
|
547
|
+
// Random interval 2-8 seconds — interleaves with heartbeat to obscure pattern
|
|
548
|
+
const interval = 2_000 + Math.random() * 6_000;
|
|
549
|
+
this.dummyTimer = setTimeout(this.dummyTick, interval);
|
|
550
|
+
}
|
|
551
|
+
|
|
552
|
+
private dummyTick = () => {
|
|
553
|
+
if (this.closed || !this.sessionKey) return;
|
|
554
|
+
this.send({ type: "_d", from: "", timestamp: 0 } as unknown as AnyClusterFrame);
|
|
555
|
+
this.scheduleDummyTick();
|
|
556
|
+
};
|
|
557
|
+
|
|
552
558
|
// ── Cleanup ────────────────────────────────────────────────────
|
|
553
559
|
close(code = 1000, reason = "normal") {
|
|
554
560
|
if (this.closed) return;
|
package/src/e2e/helpers.ts
CHANGED
|
@@ -23,11 +23,6 @@ export function allocPort(): number {
|
|
|
23
23
|
return nextPort++;
|
|
24
24
|
}
|
|
25
25
|
|
|
26
|
-
/** Reset port counter (call in afterAll if tests run in a loop). */
|
|
27
|
-
export function resetPorts(start = 19500): void {
|
|
28
|
-
nextPort = start;
|
|
29
|
-
}
|
|
30
|
-
|
|
31
26
|
// ── Logger ──────────────────────────────────────────────────────────
|
|
32
27
|
|
|
33
28
|
/** Minimal no-op logger satisfying PluginLogger. */
|
|
@@ -110,6 +105,7 @@ function buildConfig(options: TestNodeOptions): ClawMatrixConfig {
|
|
|
110
105
|
handoffTimeout: options.handoffTimeout ?? 600_000,
|
|
111
106
|
modelTimeout: options.modelTimeout ?? 120_000,
|
|
112
107
|
toolTimeout: options.toolTimeout ?? 30_000,
|
|
108
|
+
disconnectGrace: 0,
|
|
113
109
|
peerApproval: {
|
|
114
110
|
enabled: false,
|
|
115
111
|
mode: "notify",
|
package/src/file-transfer.ts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { createHash } from "node:crypto";
|
|
2
|
-
import { readFile, writeFile, stat, mkdir } from "node:fs/promises";
|
|
2
|
+
import { readFile, writeFile, stat, mkdir, lstat, realpath } from "node:fs/promises";
|
|
3
3
|
import path from "node:path";
|
|
4
4
|
import { debug } from "./debug.ts";
|
|
5
5
|
import type { PeerManager } from "./peer-manager.ts";
|
|
@@ -40,6 +40,7 @@ interface PendingTransfer {
|
|
|
40
40
|
expectedChunks?: number;
|
|
41
41
|
expectedChecksum?: string;
|
|
42
42
|
receivedChunks?: number;
|
|
43
|
+
onProgress?: (progress: TransferProgress) => void;
|
|
43
44
|
}
|
|
44
45
|
|
|
45
46
|
interface ReceivingTransfer {
|
|
@@ -59,6 +60,20 @@ interface ReceivingTransfer {
|
|
|
59
60
|
cachedData?: Buffer;
|
|
60
61
|
}
|
|
61
62
|
|
|
63
|
+
export interface TransferProgress {
|
|
64
|
+
sessionId: string;
|
|
65
|
+
direction: "push" | "pull";
|
|
66
|
+
sentChunks: number;
|
|
67
|
+
totalChunks: number;
|
|
68
|
+
bytesTransferred: number;
|
|
69
|
+
totalBytes: number;
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
export interface TransferOptions {
|
|
73
|
+
/** Called after each chunk is acknowledged. */
|
|
74
|
+
onProgress?: (progress: TransferProgress) => void;
|
|
75
|
+
}
|
|
76
|
+
|
|
62
77
|
export interface TransferResult {
|
|
63
78
|
success: boolean;
|
|
64
79
|
bytesTransferred: number;
|
|
@@ -87,10 +102,10 @@ export class FileTransferManager {
|
|
|
87
102
|
|
|
88
103
|
// ── Public API ──────────────────────────────────────────────────
|
|
89
104
|
|
|
90
|
-
async pushFile(remoteNode: string, localPath: string, remotePath: string): Promise<TransferResult> {
|
|
105
|
+
async pushFile(remoteNode: string, localPath: string, remotePath: string, opts?: TransferOptions): Promise<TransferResult> {
|
|
91
106
|
this.ensureEnabled();
|
|
92
107
|
const resolvedPath = path.resolve(localPath);
|
|
93
|
-
this.validatePath(resolvedPath);
|
|
108
|
+
await this.validatePath(resolvedPath);
|
|
94
109
|
|
|
95
110
|
const fileData = await readFile(resolvedPath);
|
|
96
111
|
if (fileData.length > this.config.maxFileSize) {
|
|
@@ -117,6 +132,7 @@ export class FileTransferManager {
|
|
|
117
132
|
chunkSize: this.config.chunkSize,
|
|
118
133
|
totalChunks,
|
|
119
134
|
sentChunks: 0,
|
|
135
|
+
onProgress: opts?.onProgress,
|
|
120
136
|
});
|
|
121
137
|
|
|
122
138
|
this.peerManager.sendTo(remoteNode, {
|
|
@@ -139,10 +155,10 @@ export class FileTransferManager {
|
|
|
139
155
|
});
|
|
140
156
|
}
|
|
141
157
|
|
|
142
|
-
async pullFile(remoteNode: string, remotePath: string, localPath: string): Promise<TransferResult> {
|
|
158
|
+
async pullFile(remoteNode: string, remotePath: string, localPath: string, opts?: TransferOptions): Promise<TransferResult> {
|
|
143
159
|
this.ensureEnabled();
|
|
144
160
|
const resolvedPath = path.resolve(localPath);
|
|
145
|
-
this.validatePath(resolvedPath);
|
|
161
|
+
await this.validatePath(resolvedPath);
|
|
146
162
|
|
|
147
163
|
const sessionId = crypto.randomUUID();
|
|
148
164
|
|
|
@@ -160,6 +176,7 @@ export class FileTransferManager {
|
|
|
160
176
|
timer,
|
|
161
177
|
chunks: new Map(),
|
|
162
178
|
receivedChunks: 0,
|
|
179
|
+
onProgress: opts?.onProgress,
|
|
163
180
|
});
|
|
164
181
|
|
|
165
182
|
this.peerManager.sendTo(remoteNode, {
|
|
@@ -196,7 +213,7 @@ export class FileTransferManager {
|
|
|
196
213
|
if (direction === "push") {
|
|
197
214
|
// Remote wants to push a file to us
|
|
198
215
|
const resolvedTarget = path.resolve(targetPath);
|
|
199
|
-
if (!this.isPathAllowed(resolvedTarget)) {
|
|
216
|
+
if (!(await this.isPathAllowed(resolvedTarget))) {
|
|
200
217
|
this.sendAck(frame.from, sessionId, frame.id, false, "Target path not allowed");
|
|
201
218
|
return;
|
|
202
219
|
}
|
|
@@ -226,7 +243,7 @@ export class FileTransferManager {
|
|
|
226
243
|
} else {
|
|
227
244
|
// Remote wants to pull a file from us
|
|
228
245
|
const resolvedSource = path.resolve(filePath);
|
|
229
|
-
if (!this.isPathAllowed(resolvedSource)) {
|
|
246
|
+
if (!(await this.isPathAllowed(resolvedSource))) {
|
|
230
247
|
this.sendAck(frame.from, sessionId, frame.id, false, "Source path not allowed");
|
|
231
248
|
return;
|
|
232
249
|
}
|
|
@@ -331,6 +348,16 @@ export class FileTransferManager {
|
|
|
331
348
|
payload: { sessionId, chunkIndex, success: true },
|
|
332
349
|
} as FileTransferChunkAck);
|
|
333
350
|
|
|
351
|
+
pending.onProgress?.({
|
|
352
|
+
sessionId,
|
|
353
|
+
direction: "pull",
|
|
354
|
+
sentChunks: pending.receivedChunks!,
|
|
355
|
+
totalChunks: pending.expectedChunks ?? 0,
|
|
356
|
+
// Use current chunk size as proxy — Math.min clamps the last (smaller) chunk correctly
|
|
357
|
+
bytesTransferred: Math.min(pending.receivedChunks! * buf.length, pending.expectedSize ?? 0),
|
|
358
|
+
totalBytes: pending.expectedSize ?? 0,
|
|
359
|
+
});
|
|
360
|
+
|
|
334
361
|
// Check if all chunks received
|
|
335
362
|
if (pending.receivedChunks === pending.expectedChunks) {
|
|
336
363
|
this.finalizePull(sessionId).catch((err) => {
|
|
@@ -387,6 +414,14 @@ export class FileTransferManager {
|
|
|
387
414
|
return;
|
|
388
415
|
}
|
|
389
416
|
pending.sentChunks = chunkIndex + 1;
|
|
417
|
+
pending.onProgress?.({
|
|
418
|
+
sessionId,
|
|
419
|
+
direction: "push",
|
|
420
|
+
sentChunks: pending.sentChunks!,
|
|
421
|
+
totalChunks: pending.totalChunks!,
|
|
422
|
+
bytesTransferred: Math.min(pending.sentChunks! * (pending.chunkSize ?? this.config.chunkSize), pending.fileData?.length ?? 0),
|
|
423
|
+
totalBytes: pending.fileData?.length ?? 0,
|
|
424
|
+
});
|
|
390
425
|
if (pending.sentChunks! < pending.totalChunks!) {
|
|
391
426
|
this.sendNextChunk(sessionId);
|
|
392
427
|
}
|
|
@@ -442,13 +477,13 @@ export class FileTransferManager {
|
|
|
442
477
|
}
|
|
443
478
|
|
|
444
479
|
destroy(): void {
|
|
445
|
-
for (const [
|
|
480
|
+
for (const [, transfer] of this.pending) {
|
|
446
481
|
clearTimeout(transfer.timer);
|
|
447
482
|
transfer.reject(new Error("FileTransferManager destroyed"));
|
|
448
483
|
}
|
|
449
484
|
this.pending.clear();
|
|
450
485
|
|
|
451
|
-
for (const [
|
|
486
|
+
for (const [, transfer] of this.receiving) {
|
|
452
487
|
clearTimeout(transfer.timer);
|
|
453
488
|
}
|
|
454
489
|
this.receiving.clear();
|
|
@@ -462,20 +497,35 @@ export class FileTransferManager {
|
|
|
462
497
|
}
|
|
463
498
|
}
|
|
464
499
|
|
|
465
|
-
private validatePath(resolvedPath: string): void {
|
|
466
|
-
if (!this.isPathAllowed(resolvedPath)) {
|
|
500
|
+
private async validatePath(resolvedPath: string): Promise<void> {
|
|
501
|
+
if (!(await this.isPathAllowed(resolvedPath))) {
|
|
467
502
|
throw new Error(`Path not allowed: ${resolvedPath}`);
|
|
468
503
|
}
|
|
469
504
|
}
|
|
470
505
|
|
|
471
|
-
private isPathAllowed(resolvedPath: string): boolean {
|
|
506
|
+
private async isPathAllowed(resolvedPath: string): Promise<boolean> {
|
|
472
507
|
// TODO(security): allowedPaths 为空时默认允许所有路径,当前仅用于受信任网络。
|
|
473
508
|
// 开放到非受信环境前需改为默认拒绝,或要求显式配置 allowedPaths。
|
|
474
509
|
if (this.config.allowedPaths.length === 0) return true;
|
|
475
|
-
|
|
510
|
+
|
|
511
|
+
// Resolve symlinks to prevent path traversal via symlink
|
|
512
|
+
let realResolved: string;
|
|
513
|
+
try {
|
|
514
|
+
realResolved = await realpath(resolvedPath);
|
|
515
|
+
} catch {
|
|
516
|
+
// File doesn't exist yet (for write targets) — check parent directory
|
|
517
|
+
const parentDir = path.dirname(resolvedPath);
|
|
518
|
+
try {
|
|
519
|
+
realResolved = path.join(await realpath(parentDir), path.basename(resolvedPath));
|
|
520
|
+
} catch {
|
|
521
|
+
// Parent doesn't exist either — use the resolved path as-is
|
|
522
|
+
realResolved = resolvedPath;
|
|
523
|
+
}
|
|
524
|
+
}
|
|
525
|
+
|
|
476
526
|
return this.config.allowedPaths.some((allowed) => {
|
|
477
527
|
const resolvedAllowed = path.resolve(allowed);
|
|
478
|
-
return
|
|
528
|
+
return realResolved === resolvedAllowed || realResolved.startsWith(resolvedAllowed + path.sep);
|
|
479
529
|
});
|
|
480
530
|
}
|
|
481
531
|
|