clawmatrix 0.2.3 → 0.2.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/cli.ts +7 -2
- package/src/cluster-service.ts +2 -0
- package/src/compat.ts +3 -2
- package/src/identity.ts +7 -4
- package/src/index.ts +21 -20
- package/src/peer-manager.ts +53 -7
- package/src/tools/cluster-peers.ts +10 -6
package/package.json
CHANGED
package/src/cli.ts
CHANGED
|
@@ -99,7 +99,7 @@ export const registerClusterCli = ({ program }: { program: Command }) => {
|
|
|
99
99
|
models: Array<{ id: string }>;
|
|
100
100
|
tags: string[];
|
|
101
101
|
connected: boolean;
|
|
102
|
-
status: "direct" | "relay" | "unreachable";
|
|
102
|
+
status: "direct" | "relay" | "unreachable" | "sentinel-only";
|
|
103
103
|
latencyMs: number;
|
|
104
104
|
reachableVia: string | null;
|
|
105
105
|
}>;
|
|
@@ -121,10 +121,15 @@ export const registerClusterCli = ({ program }: { program: Command }) => {
|
|
|
121
121
|
|
|
122
122
|
for (let i = 0; i < peers.length; i++) {
|
|
123
123
|
const peer = peers[i];
|
|
124
|
-
const dot = peer.status === "direct" ? green("●")
|
|
124
|
+
const dot = peer.status === "direct" ? green("●")
|
|
125
|
+
: peer.status === "relay" ? yellow("●")
|
|
126
|
+
: peer.status === "sentinel-only" ? yellow("◐")
|
|
127
|
+
: red("○");
|
|
125
128
|
const latency = peer.connected && peer.latencyMs > 0 ? dim(` ${peer.latencyMs}ms`) : "";
|
|
126
129
|
const statusLabel = peer.status === "relay"
|
|
127
130
|
? yellow(` relay via ${peer.reachableVia}`)
|
|
131
|
+
: peer.status === "sentinel-only"
|
|
132
|
+
? yellow(" sentinel only")
|
|
128
133
|
: peer.status === "unreachable"
|
|
129
134
|
? red(" unreachable")
|
|
130
135
|
: "";
|
package/src/cluster-service.ts
CHANGED
|
@@ -505,12 +505,14 @@ export function createClusterService(
|
|
|
505
505
|
config: ClawMatrixConfig,
|
|
506
506
|
openclawConfig: OpenClawConfig,
|
|
507
507
|
openclawVersion?: string,
|
|
508
|
+
onStarted?: () => void,
|
|
508
509
|
): OpenClawPluginService {
|
|
509
510
|
return {
|
|
510
511
|
id: "clawmatrix",
|
|
511
512
|
start(ctx: OpenClawPluginServiceContext) {
|
|
512
513
|
clusterRuntime = new ClusterRuntime(config, ctx.logger, openclawConfig, openclawVersion);
|
|
513
514
|
clusterRuntime.start();
|
|
515
|
+
onStarted?.();
|
|
514
516
|
},
|
|
515
517
|
async stop() {
|
|
516
518
|
if (clusterRuntime) {
|
package/src/compat.ts
CHANGED
|
@@ -6,6 +6,7 @@
|
|
|
6
6
|
|
|
7
7
|
import { spawn as cpSpawn } from "node:child_process";
|
|
8
8
|
import { readFile, writeFile } from "node:fs/promises";
|
|
9
|
+
import { createRequire } from "node:module";
|
|
9
10
|
|
|
10
11
|
export interface SpawnResult {
|
|
11
12
|
exitCode: number;
|
|
@@ -97,8 +98,8 @@ let ptyModule: {
|
|
|
97
98
|
function loadPty() {
|
|
98
99
|
if (ptyModule !== undefined) return ptyModule;
|
|
99
100
|
try {
|
|
100
|
-
|
|
101
|
-
ptyModule =
|
|
101
|
+
const req = createRequire(import.meta.url);
|
|
102
|
+
ptyModule = req("node-pty");
|
|
102
103
|
} catch {
|
|
103
104
|
ptyModule = null;
|
|
104
105
|
}
|
package/src/identity.ts
CHANGED
|
@@ -26,6 +26,7 @@
|
|
|
26
26
|
* ECDH exchange with the peer's key.
|
|
27
27
|
*/
|
|
28
28
|
|
|
29
|
+
import { createPrivateKey } from "node:crypto";
|
|
29
30
|
import fs from "node:fs";
|
|
30
31
|
import path from "node:path";
|
|
31
32
|
import {
|
|
@@ -54,10 +55,12 @@ export function loadOrCreateIdentity(stateDir: string): KeyPair {
|
|
|
54
55
|
if (fs.existsSync(filePath)) {
|
|
55
56
|
const raw = fs.readFileSync(filePath, "utf-8");
|
|
56
57
|
const data: IdentityData = JSON.parse(raw);
|
|
57
|
-
|
|
58
|
+
const keyPair = keyPairFromSerialized(data.publicKey, data.privateKey);
|
|
59
|
+
console.error(`[clawmatrix:identity] loaded existing identity from ${filePath} (publicKey=${data.publicKey.slice(0, 12)}...)`);
|
|
60
|
+
return keyPair;
|
|
58
61
|
}
|
|
59
|
-
} catch {
|
|
60
|
-
|
|
62
|
+
} catch (err) {
|
|
63
|
+
console.error(`[clawmatrix:identity] failed to load identity from ${filePath}, regenerating: ${err}`);
|
|
61
64
|
}
|
|
62
65
|
|
|
63
66
|
// Generate new identity
|
|
@@ -73,13 +76,13 @@ export function loadOrCreateIdentity(stateDir: string): KeyPair {
|
|
|
73
76
|
fs.mkdirSync(stateDir, { recursive: true });
|
|
74
77
|
}
|
|
75
78
|
fs.writeFileSync(filePath, JSON.stringify(data, null, 2), { mode: 0o600 });
|
|
79
|
+
console.error(`[clawmatrix:identity] generated NEW identity at ${filePath} (publicKey=${data.publicKey.slice(0, 12)}...)`);
|
|
76
80
|
|
|
77
81
|
return keyPair;
|
|
78
82
|
}
|
|
79
83
|
|
|
80
84
|
/** Reconstruct a KeyPair from serialized base64 strings. */
|
|
81
85
|
function keyPairFromSerialized(publicKeyB64: string, privateKeyB64: string): KeyPair {
|
|
82
|
-
const { createPrivateKey } = require("node:crypto");
|
|
83
86
|
const publicKey = Buffer.from(publicKeyB64, "base64");
|
|
84
87
|
const privateKey = Buffer.from(privateKeyB64, "base64");
|
|
85
88
|
|
package/src/index.ts
CHANGED
|
@@ -136,7 +136,12 @@ const plugin = {
|
|
|
136
136
|
}
|
|
137
137
|
|
|
138
138
|
// Background service: manages mesh connections, WS listener, heartbeat
|
|
139
|
-
|
|
139
|
+
// onStarted callback wires up approval after the runtime is available
|
|
140
|
+
let onServiceStarted: (() => void) | undefined;
|
|
141
|
+
const serviceStartedPromise = config.peerApproval.enabled
|
|
142
|
+
? new Promise<void>((resolve) => { onServiceStarted = resolve; })
|
|
143
|
+
: undefined;
|
|
144
|
+
api.registerService(createClusterService(config, api.config, api.runtime.version, onServiceStarted));
|
|
140
145
|
|
|
141
146
|
// Model providers: register per-node providers so models are accessed as nodeId/modelId
|
|
142
147
|
const baseUrl = `http://127.0.0.1:${config.proxyPort}/v1`;
|
|
@@ -425,20 +430,11 @@ const plugin = {
|
|
|
425
430
|
}
|
|
426
431
|
};
|
|
427
432
|
|
|
428
|
-
//
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
} catch {
|
|
434
|
-
if (attempt < 30) {
|
|
435
|
-
setTimeout(() => retrySetup(attempt + 1), 1000);
|
|
436
|
-
} else {
|
|
437
|
-
debug("approval", "setupApproval gave up after 30 attempts — cluster runtime never initialized");
|
|
438
|
-
}
|
|
439
|
-
}
|
|
440
|
-
};
|
|
441
|
-
setTimeout(() => retrySetup(), 1000);
|
|
433
|
+
// Run setupApproval once the cluster service has started
|
|
434
|
+
serviceStartedPromise!.then(() => {
|
|
435
|
+
// Small delay to ensure runtime is fully wired
|
|
436
|
+
setTimeout(setupApproval, 100);
|
|
437
|
+
});
|
|
442
438
|
}
|
|
443
439
|
|
|
444
440
|
// Gateway methods (queried by CLI via `openclaw gateway call`)
|
|
@@ -448,6 +444,8 @@ const plugin = {
|
|
|
448
444
|
try {
|
|
449
445
|
const runtime = getClusterRuntime();
|
|
450
446
|
const peers = runtime.peerManager.router.getAllPeers();
|
|
447
|
+
const mergedPeers = mergeSentinelPeers(peers, runtime)
|
|
448
|
+
.filter((p) => (p as { nodeId: string }).nodeId !== config.nodeId);
|
|
451
449
|
respond(true, {
|
|
452
450
|
nodeId: config.nodeId,
|
|
453
451
|
listen: config.listen ? config.listenPort : false,
|
|
@@ -455,7 +453,7 @@ const plugin = {
|
|
|
455
453
|
agents: config.agents.map((a) => ({ id: a.id, description: a.description })),
|
|
456
454
|
models: config.models.map((m) => ({ id: m.id })),
|
|
457
455
|
tags: config.tags,
|
|
458
|
-
peers:
|
|
456
|
+
peers: mergedPeers,
|
|
459
457
|
});
|
|
460
458
|
} catch {
|
|
461
459
|
respond(false, { error: "ClawMatrix service not running" });
|
|
@@ -736,16 +734,18 @@ function mergeSentinelPeers(
|
|
|
736
734
|
const status = runtime.peerManager.router.getPeerStatus(p);
|
|
737
735
|
const sentinel = sentinelMap.get(p.nodeId);
|
|
738
736
|
const sentinelStatus = sentinel ? runtime.peerManager.router.getPeerStatus(sentinel) : undefined;
|
|
737
|
+
const sentinelOnline = sentinelStatus === "direct" || sentinelStatus === "relay";
|
|
738
|
+
const effectiveStatus = status === "unreachable" && sentinelOnline ? "sentinel-only" : status;
|
|
739
739
|
result.push({
|
|
740
740
|
nodeId: p.nodeId,
|
|
741
741
|
agents: p.agents,
|
|
742
742
|
models: p.models,
|
|
743
743
|
tags: p.tags,
|
|
744
744
|
connected: status !== "unreachable",
|
|
745
|
-
status,
|
|
745
|
+
status: effectiveStatus,
|
|
746
746
|
reachableVia: p.reachableVia,
|
|
747
747
|
latencyMs: p.latencyMs,
|
|
748
|
-
...(sentinel ? { sentinel:
|
|
748
|
+
...(sentinel ? { sentinel: sentinelOnline ? "online" : "offline" } : {}),
|
|
749
749
|
});
|
|
750
750
|
}
|
|
751
751
|
|
|
@@ -753,15 +753,16 @@ function mergeSentinelPeers(
|
|
|
753
753
|
for (const [mainId, sentinel] of sentinelMap) {
|
|
754
754
|
if (seen.has(mainId)) continue;
|
|
755
755
|
const sentinelStatus = runtime.peerManager.router.getPeerStatus(sentinel);
|
|
756
|
+
const sentinelOnline = sentinelStatus === "direct" || sentinelStatus === "relay";
|
|
756
757
|
result.push({
|
|
757
758
|
nodeId: mainId,
|
|
758
759
|
agents: [],
|
|
759
760
|
models: [],
|
|
760
761
|
tags: [],
|
|
761
762
|
connected: false,
|
|
762
|
-
status: "unreachable",
|
|
763
|
+
status: sentinelOnline ? "sentinel-only" : "unreachable",
|
|
763
764
|
latencyMs: sentinel.latencyMs,
|
|
764
|
-
sentinel:
|
|
765
|
+
sentinel: sentinelOnline ? "online" : "offline",
|
|
765
766
|
});
|
|
766
767
|
}
|
|
767
768
|
|
package/src/peer-manager.ts
CHANGED
|
@@ -28,6 +28,23 @@ import type { KeyPair } from "./crypto.ts";
|
|
|
28
28
|
const RECONNECT_BASE = 1_000;
|
|
29
29
|
const RECONNECT_MAX = 60_000;
|
|
30
30
|
|
|
31
|
+
/** Classify WebSocket close code into a human-readable reason. */
|
|
32
|
+
function classifyCloseReason(code: number, reason: string): string {
|
|
33
|
+
if (reason) return reason;
|
|
34
|
+
switch (code) {
|
|
35
|
+
case 1006: return "unreachable (node may be down)";
|
|
36
|
+
case 1000: return "normal close";
|
|
37
|
+
case 1001: return "peer going away";
|
|
38
|
+
case 1002: return "protocol error";
|
|
39
|
+
case 1003: return "unsupported data";
|
|
40
|
+
case 1008: return "policy violation";
|
|
41
|
+
case 1011: return "server error";
|
|
42
|
+
case 4001: return "auth failed";
|
|
43
|
+
case 4003: return "auth timeout";
|
|
44
|
+
default: return `close code ${code}`;
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
|
|
31
48
|
/** Check if an IP is a loopback address (IPv4 127.x or IPv6 ::1). */
|
|
32
49
|
function isLoopback(ip?: string): boolean {
|
|
33
50
|
if (!ip) return false;
|
|
@@ -282,12 +299,26 @@ export class PeerManager extends EventEmitter<PeerManagerEvents> {
|
|
|
282
299
|
|
|
283
300
|
// ── Outbound connections (standard WebSocket) ──────────────────
|
|
284
301
|
private connectToPeer(peer: PeerConfig) {
|
|
285
|
-
if (this.stopped)
|
|
302
|
+
if (this.stopped) {
|
|
303
|
+
debug("peer", `connectToPeer(${peer.nodeId}): skipped (stopped)`);
|
|
304
|
+
return;
|
|
305
|
+
}
|
|
306
|
+
|
|
307
|
+
const attempt = this.reconnectAttempts.get(peer.nodeId) ?? 0;
|
|
308
|
+
debug("peer", `connectToPeer(${peer.nodeId}): attempt=${attempt} url=${peer.url}`);
|
|
286
309
|
|
|
287
310
|
// Use a common WS subprotocol for traffic disguise
|
|
288
|
-
|
|
311
|
+
let ws: WebSocket;
|
|
312
|
+
try {
|
|
313
|
+
ws = new WebSocket(peer.url, ["graphql-transport-ws"]);
|
|
314
|
+
} catch (err) {
|
|
315
|
+
debug("peer", `connectToPeer(${peer.nodeId}): WebSocket constructor threw: ${err}`);
|
|
316
|
+
this.scheduleReconnect(peer);
|
|
317
|
+
return;
|
|
318
|
+
}
|
|
289
319
|
|
|
290
320
|
ws.addEventListener("open", () => {
|
|
321
|
+
debug("peer", `connectToPeer(${peer.nodeId}): ws open`);
|
|
291
322
|
const conn = new Connection(
|
|
292
323
|
ws,
|
|
293
324
|
"outbound",
|
|
@@ -299,6 +330,7 @@ export class PeerManager extends EventEmitter<PeerManagerEvents> {
|
|
|
299
330
|
conn.bindWebSocket(ws);
|
|
300
331
|
|
|
301
332
|
conn.on("authenticated", (caps) => {
|
|
333
|
+
debug("peer", `connectToPeer(${peer.nodeId}): authenticated`);
|
|
302
334
|
this.reconnectAttempts.delete(peer.nodeId);
|
|
303
335
|
this.onPeerAuthenticated(conn, caps);
|
|
304
336
|
});
|
|
@@ -309,24 +341,38 @@ export class PeerManager extends EventEmitter<PeerManagerEvents> {
|
|
|
309
341
|
});
|
|
310
342
|
|
|
311
343
|
let reconnectScheduled = false;
|
|
344
|
+
let lastError: string | undefined;
|
|
312
345
|
const tryReconnect = () => {
|
|
313
346
|
if (!reconnectScheduled) {
|
|
314
347
|
reconnectScheduled = true;
|
|
315
|
-
this.scheduleReconnect(peer);
|
|
348
|
+
this.scheduleReconnect(peer, lastError);
|
|
316
349
|
}
|
|
317
350
|
};
|
|
318
351
|
|
|
319
|
-
ws.addEventListener("error",
|
|
320
|
-
|
|
352
|
+
ws.addEventListener("error", (ev) => {
|
|
353
|
+
lastError = (ev as ErrorEvent).message || undefined;
|
|
354
|
+
tryReconnect();
|
|
355
|
+
});
|
|
356
|
+
ws.addEventListener("close", (ev) => {
|
|
357
|
+
if (!lastError) {
|
|
358
|
+
lastError = classifyCloseReason(ev.code, ev.reason);
|
|
359
|
+
}
|
|
360
|
+
tryReconnect();
|
|
361
|
+
});
|
|
321
362
|
}
|
|
322
363
|
|
|
323
|
-
private scheduleReconnect(peer: PeerConfig) {
|
|
324
|
-
if (this.stopped)
|
|
364
|
+
private scheduleReconnect(peer: PeerConfig, reason?: string) {
|
|
365
|
+
if (this.stopped) {
|
|
366
|
+
debug("peer", `scheduleReconnect(${peer.nodeId}): skipped (stopped)`);
|
|
367
|
+
return;
|
|
368
|
+
}
|
|
325
369
|
if (this.reconnectTimers.has(peer.nodeId)) return;
|
|
326
370
|
|
|
327
371
|
const attempt = this.reconnectAttempts.get(peer.nodeId) ?? 0;
|
|
328
372
|
const delay = Math.min(RECONNECT_BASE * 2 ** attempt, RECONNECT_MAX);
|
|
329
373
|
this.reconnectAttempts.set(peer.nodeId, attempt + 1);
|
|
374
|
+
const tag = reason ? ` reason="${reason}"` : "";
|
|
375
|
+
debug("peer", `scheduleReconnect(${peer.nodeId}): attempt=${attempt} delay=${delay}ms${tag}`);
|
|
330
376
|
|
|
331
377
|
const timer = setTimeout(() => {
|
|
332
378
|
this.reconnectTimers.delete(peer.nodeId);
|
|
@@ -14,7 +14,9 @@ export function createClusterPeersTool(): AnyAgentTool {
|
|
|
14
14
|
async execute() {
|
|
15
15
|
try {
|
|
16
16
|
const runtime = getClusterRuntime();
|
|
17
|
-
const
|
|
17
|
+
const localNodeId = runtime.config.nodeId;
|
|
18
|
+
const allEntries = runtime.peerManager.router.getAllPeers()
|
|
19
|
+
.filter((e) => e.nodeId !== localNodeId && e.nodeId !== `${localNodeId}:sentinel`);
|
|
18
20
|
|
|
19
21
|
// Separate sentinel peers from normal peers
|
|
20
22
|
const sentinelSet = new Set<string>();
|
|
@@ -34,6 +36,8 @@ export function createClusterPeersTool(): AnyAgentTool {
|
|
|
34
36
|
const sentinelStatus = sentinelEntry
|
|
35
37
|
? runtime.peerManager.router.getPeerStatus(sentinelEntry)
|
|
36
38
|
: undefined;
|
|
39
|
+
const sentinelOnline = sentinelStatus === "direct" || sentinelStatus === "relay";
|
|
40
|
+
const effectiveStatus = status === "unreachable" && sentinelOnline ? "sentinel-only" : status;
|
|
37
41
|
|
|
38
42
|
return {
|
|
39
43
|
nodeId: entry.nodeId,
|
|
@@ -45,11 +49,10 @@ export function createClusterPeersTool(): AnyAgentTool {
|
|
|
45
49
|
models: entry.models.map((m) => m.id),
|
|
46
50
|
tags: entry.tags,
|
|
47
51
|
tools: entry.toolProxy?.enabled ? (entry.toolProxy.allow ?? []) : [],
|
|
48
|
-
status,
|
|
52
|
+
status: effectiveStatus,
|
|
49
53
|
latencyMs: entry.latencyMs,
|
|
50
|
-
// Sentinel info merged into the same row
|
|
51
54
|
...(hasSentinel ? {
|
|
52
|
-
sentinel:
|
|
55
|
+
sentinel: sentinelOnline ? "online" : "offline",
|
|
53
56
|
} : {}),
|
|
54
57
|
};
|
|
55
58
|
});
|
|
@@ -61,15 +64,16 @@ export function createClusterPeersTool(): AnyAgentTool {
|
|
|
61
64
|
if (peers.some((p) => p.nodeId === mainNodeId)) continue;
|
|
62
65
|
// Main node is gone, only sentinel remains
|
|
63
66
|
const sentinelStatus = runtime.peerManager.router.getPeerStatus(entry);
|
|
67
|
+
const sentinelOnline = sentinelStatus === "direct" || sentinelStatus === "relay";
|
|
64
68
|
peers.push({
|
|
65
69
|
nodeId: mainNodeId,
|
|
66
70
|
agents: [],
|
|
67
71
|
models: [],
|
|
68
72
|
tags: entry.tags.filter((t) => t !== "sentinel"),
|
|
69
73
|
tools: [],
|
|
70
|
-
status: "unreachable",
|
|
74
|
+
status: sentinelOnline ? "sentinel-only" : "unreachable",
|
|
71
75
|
latencyMs: entry.latencyMs,
|
|
72
|
-
sentinel:
|
|
76
|
+
sentinel: sentinelOnline ? "online" : "offline",
|
|
73
77
|
} as (typeof peers)[number]);
|
|
74
78
|
}
|
|
75
79
|
|