clawmatrix 0.2.3 → 0.2.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "clawmatrix",
3
- "version": "0.2.3",
3
+ "version": "0.2.5",
4
4
  "description": "Decentralized mesh cluster plugin for OpenClaw — inter-gateway communication, model proxy, task handoff, and tool proxy.",
5
5
  "type": "module",
6
6
  "license": "MIT",
package/src/cli.ts CHANGED
@@ -99,7 +99,7 @@ export const registerClusterCli = ({ program }: { program: Command }) => {
99
99
  models: Array<{ id: string }>;
100
100
  tags: string[];
101
101
  connected: boolean;
102
- status: "direct" | "relay" | "unreachable";
102
+ status: "direct" | "relay" | "unreachable" | "sentinel-only";
103
103
  latencyMs: number;
104
104
  reachableVia: string | null;
105
105
  }>;
@@ -121,10 +121,15 @@ export const registerClusterCli = ({ program }: { program: Command }) => {
121
121
 
122
122
  for (let i = 0; i < peers.length; i++) {
123
123
  const peer = peers[i];
124
- const dot = peer.status === "direct" ? green("●") : peer.status === "relay" ? yellow("●") : red("○");
124
+ const dot = peer.status === "direct" ? green("●")
125
+ : peer.status === "relay" ? yellow("●")
126
+ : peer.status === "sentinel-only" ? yellow("◐")
127
+ : red("○");
125
128
  const latency = peer.connected && peer.latencyMs > 0 ? dim(` ${peer.latencyMs}ms`) : "";
126
129
  const statusLabel = peer.status === "relay"
127
130
  ? yellow(` relay via ${peer.reachableVia}`)
131
+ : peer.status === "sentinel-only"
132
+ ? yellow(" sentinel only")
128
133
  : peer.status === "unreachable"
129
134
  ? red(" unreachable")
130
135
  : "";
@@ -505,12 +505,14 @@ export function createClusterService(
505
505
  config: ClawMatrixConfig,
506
506
  openclawConfig: OpenClawConfig,
507
507
  openclawVersion?: string,
508
+ onStarted?: () => void,
508
509
  ): OpenClawPluginService {
509
510
  return {
510
511
  id: "clawmatrix",
511
512
  start(ctx: OpenClawPluginServiceContext) {
512
513
  clusterRuntime = new ClusterRuntime(config, ctx.logger, openclawConfig, openclawVersion);
513
514
  clusterRuntime.start();
515
+ onStarted?.();
514
516
  },
515
517
  async stop() {
516
518
  if (clusterRuntime) {
package/src/compat.ts CHANGED
@@ -6,6 +6,7 @@
6
6
 
7
7
  import { spawn as cpSpawn } from "node:child_process";
8
8
  import { readFile, writeFile } from "node:fs/promises";
9
+ import { createRequire } from "node:module";
9
10
 
10
11
  export interface SpawnResult {
11
12
  exitCode: number;
@@ -97,8 +98,8 @@ let ptyModule: {
97
98
  function loadPty() {
98
99
  if (ptyModule !== undefined) return ptyModule;
99
100
  try {
100
- // eslint-disable-next-line @typescript-eslint/no-require-imports
101
- ptyModule = require("node-pty");
101
+ const req = createRequire(import.meta.url);
102
+ ptyModule = req("node-pty");
102
103
  } catch {
103
104
  ptyModule = null;
104
105
  }
package/src/identity.ts CHANGED
@@ -26,6 +26,7 @@
26
26
  * ECDH exchange with the peer's key.
27
27
  */
28
28
 
29
+ import { createPrivateKey } from "node:crypto";
29
30
  import fs from "node:fs";
30
31
  import path from "node:path";
31
32
  import {
@@ -54,10 +55,12 @@ export function loadOrCreateIdentity(stateDir: string): KeyPair {
54
55
  if (fs.existsSync(filePath)) {
55
56
  const raw = fs.readFileSync(filePath, "utf-8");
56
57
  const data: IdentityData = JSON.parse(raw);
57
- return keyPairFromSerialized(data.publicKey, data.privateKey);
58
+ const keyPair = keyPairFromSerialized(data.publicKey, data.privateKey);
59
+ console.error(`[clawmatrix:identity] loaded existing identity from ${filePath} (publicKey=${data.publicKey.slice(0, 12)}...)`);
60
+ return keyPair;
58
61
  }
59
- } catch {
60
- // Corrupted file regenerate
62
+ } catch (err) {
63
+ console.error(`[clawmatrix:identity] failed to load identity from ${filePath}, regenerating: ${err}`);
61
64
  }
62
65
 
63
66
  // Generate new identity
@@ -73,13 +76,13 @@ export function loadOrCreateIdentity(stateDir: string): KeyPair {
73
76
  fs.mkdirSync(stateDir, { recursive: true });
74
77
  }
75
78
  fs.writeFileSync(filePath, JSON.stringify(data, null, 2), { mode: 0o600 });
79
+ console.error(`[clawmatrix:identity] generated NEW identity at ${filePath} (publicKey=${data.publicKey.slice(0, 12)}...)`);
76
80
 
77
81
  return keyPair;
78
82
  }
79
83
 
80
84
  /** Reconstruct a KeyPair from serialized base64 strings. */
81
85
  function keyPairFromSerialized(publicKeyB64: string, privateKeyB64: string): KeyPair {
82
- const { createPrivateKey } = require("node:crypto");
83
86
  const publicKey = Buffer.from(publicKeyB64, "base64");
84
87
  const privateKey = Buffer.from(privateKeyB64, "base64");
85
88
 
package/src/index.ts CHANGED
@@ -136,7 +136,12 @@ const plugin = {
136
136
  }
137
137
 
138
138
  // Background service: manages mesh connections, WS listener, heartbeat
139
- api.registerService(createClusterService(config, api.config, api.runtime.version));
139
+ // onStarted callback wires up approval after the runtime is available
140
+ let onServiceStarted: (() => void) | undefined;
141
+ const serviceStartedPromise = config.peerApproval.enabled
142
+ ? new Promise<void>((resolve) => { onServiceStarted = resolve; })
143
+ : undefined;
144
+ api.registerService(createClusterService(config, api.config, api.runtime.version, onServiceStarted));
140
145
 
141
146
  // Model providers: register per-node providers so models are accessed as nodeId/modelId
142
147
  const baseUrl = `http://127.0.0.1:${config.proxyPort}/v1`;
@@ -425,20 +430,11 @@ const plugin = {
425
430
  }
426
431
  };
427
432
 
428
- // Retry until cluster runtime is initialized (service start is async)
429
- const retrySetup = (attempt = 0) => {
430
- try {
431
- getClusterRuntime();
432
- setupApproval();
433
- } catch {
434
- if (attempt < 30) {
435
- setTimeout(() => retrySetup(attempt + 1), 1000);
436
- } else {
437
- debug("approval", "setupApproval gave up after 30 attempts — cluster runtime never initialized");
438
- }
439
- }
440
- };
441
- setTimeout(() => retrySetup(), 1000);
433
+ // Run setupApproval once the cluster service has started
434
+ serviceStartedPromise!.then(() => {
435
+ // Small delay to ensure runtime is fully wired
436
+ setTimeout(setupApproval, 100);
437
+ });
442
438
  }
443
439
 
444
440
  // Gateway methods (queried by CLI via `openclaw gateway call`)
@@ -448,6 +444,8 @@ const plugin = {
448
444
  try {
449
445
  const runtime = getClusterRuntime();
450
446
  const peers = runtime.peerManager.router.getAllPeers();
447
+ const mergedPeers = mergeSentinelPeers(peers, runtime)
448
+ .filter((p) => (p as { nodeId: string }).nodeId !== config.nodeId);
451
449
  respond(true, {
452
450
  nodeId: config.nodeId,
453
451
  listen: config.listen ? config.listenPort : false,
@@ -455,7 +453,7 @@ const plugin = {
455
453
  agents: config.agents.map((a) => ({ id: a.id, description: a.description })),
456
454
  models: config.models.map((m) => ({ id: m.id })),
457
455
  tags: config.tags,
458
- peers: mergeSentinelPeers(peers, runtime),
456
+ peers: mergedPeers,
459
457
  });
460
458
  } catch {
461
459
  respond(false, { error: "ClawMatrix service not running" });
@@ -736,16 +734,18 @@ function mergeSentinelPeers(
736
734
  const status = runtime.peerManager.router.getPeerStatus(p);
737
735
  const sentinel = sentinelMap.get(p.nodeId);
738
736
  const sentinelStatus = sentinel ? runtime.peerManager.router.getPeerStatus(sentinel) : undefined;
737
+ const sentinelOnline = sentinelStatus === "direct" || sentinelStatus === "relay";
738
+ const effectiveStatus = status === "unreachable" && sentinelOnline ? "sentinel-only" : status;
739
739
  result.push({
740
740
  nodeId: p.nodeId,
741
741
  agents: p.agents,
742
742
  models: p.models,
743
743
  tags: p.tags,
744
744
  connected: status !== "unreachable",
745
- status,
745
+ status: effectiveStatus,
746
746
  reachableVia: p.reachableVia,
747
747
  latencyMs: p.latencyMs,
748
- ...(sentinel ? { sentinel: sentinelStatus === "direct" || sentinelStatus === "relay" ? "online" : "offline" } : {}),
748
+ ...(sentinel ? { sentinel: sentinelOnline ? "online" : "offline" } : {}),
749
749
  });
750
750
  }
751
751
 
@@ -753,15 +753,16 @@ function mergeSentinelPeers(
753
753
  for (const [mainId, sentinel] of sentinelMap) {
754
754
  if (seen.has(mainId)) continue;
755
755
  const sentinelStatus = runtime.peerManager.router.getPeerStatus(sentinel);
756
+ const sentinelOnline = sentinelStatus === "direct" || sentinelStatus === "relay";
756
757
  result.push({
757
758
  nodeId: mainId,
758
759
  agents: [],
759
760
  models: [],
760
761
  tags: [],
761
762
  connected: false,
762
- status: "unreachable",
763
+ status: sentinelOnline ? "sentinel-only" : "unreachable",
763
764
  latencyMs: sentinel.latencyMs,
764
- sentinel: sentinelStatus === "direct" || sentinelStatus === "relay" ? "online" : "offline",
765
+ sentinel: sentinelOnline ? "online" : "offline",
765
766
  });
766
767
  }
767
768
 
@@ -28,6 +28,23 @@ import type { KeyPair } from "./crypto.ts";
28
28
  const RECONNECT_BASE = 1_000;
29
29
  const RECONNECT_MAX = 60_000;
30
30
 
31
+ /** Classify WebSocket close code into a human-readable reason. */
32
+ function classifyCloseReason(code: number, reason: string): string {
33
+ if (reason) return reason;
34
+ switch (code) {
35
+ case 1006: return "unreachable (node may be down)";
36
+ case 1000: return "normal close";
37
+ case 1001: return "peer going away";
38
+ case 1002: return "protocol error";
39
+ case 1003: return "unsupported data";
40
+ case 1008: return "policy violation";
41
+ case 1011: return "server error";
42
+ case 4001: return "auth failed";
43
+ case 4003: return "auth timeout";
44
+ default: return `close code ${code}`;
45
+ }
46
+ }
47
+
31
48
  /** Check if an IP is a loopback address (IPv4 127.x or IPv6 ::1). */
32
49
  function isLoopback(ip?: string): boolean {
33
50
  if (!ip) return false;
@@ -282,12 +299,26 @@ export class PeerManager extends EventEmitter<PeerManagerEvents> {
282
299
 
283
300
  // ── Outbound connections (standard WebSocket) ──────────────────
284
301
  private connectToPeer(peer: PeerConfig) {
285
- if (this.stopped) return;
302
+ if (this.stopped) {
303
+ debug("peer", `connectToPeer(${peer.nodeId}): skipped (stopped)`);
304
+ return;
305
+ }
306
+
307
+ const attempt = this.reconnectAttempts.get(peer.nodeId) ?? 0;
308
+ debug("peer", `connectToPeer(${peer.nodeId}): attempt=${attempt} url=${peer.url}`);
286
309
 
287
310
  // Use a common WS subprotocol for traffic disguise
288
- const ws = new WebSocket(peer.url, ["graphql-transport-ws"]);
311
+ let ws: WebSocket;
312
+ try {
313
+ ws = new WebSocket(peer.url, ["graphql-transport-ws"]);
314
+ } catch (err) {
315
+ debug("peer", `connectToPeer(${peer.nodeId}): WebSocket constructor threw: ${err}`);
316
+ this.scheduleReconnect(peer);
317
+ return;
318
+ }
289
319
 
290
320
  ws.addEventListener("open", () => {
321
+ debug("peer", `connectToPeer(${peer.nodeId}): ws open`);
291
322
  const conn = new Connection(
292
323
  ws,
293
324
  "outbound",
@@ -299,6 +330,7 @@ export class PeerManager extends EventEmitter<PeerManagerEvents> {
299
330
  conn.bindWebSocket(ws);
300
331
 
301
332
  conn.on("authenticated", (caps) => {
333
+ debug("peer", `connectToPeer(${peer.nodeId}): authenticated`);
302
334
  this.reconnectAttempts.delete(peer.nodeId);
303
335
  this.onPeerAuthenticated(conn, caps);
304
336
  });
@@ -309,24 +341,38 @@ export class PeerManager extends EventEmitter<PeerManagerEvents> {
309
341
  });
310
342
 
311
343
  let reconnectScheduled = false;
344
+ let lastError: string | undefined;
312
345
  const tryReconnect = () => {
313
346
  if (!reconnectScheduled) {
314
347
  reconnectScheduled = true;
315
- this.scheduleReconnect(peer);
348
+ this.scheduleReconnect(peer, lastError);
316
349
  }
317
350
  };
318
351
 
319
- ws.addEventListener("error", tryReconnect);
320
- ws.addEventListener("close", tryReconnect);
352
+ ws.addEventListener("error", (ev) => {
353
+ lastError = (ev as ErrorEvent).message || undefined;
354
+ tryReconnect();
355
+ });
356
+ ws.addEventListener("close", (ev) => {
357
+ if (!lastError) {
358
+ lastError = classifyCloseReason(ev.code, ev.reason);
359
+ }
360
+ tryReconnect();
361
+ });
321
362
  }
322
363
 
323
- private scheduleReconnect(peer: PeerConfig) {
324
- if (this.stopped) return;
364
+ private scheduleReconnect(peer: PeerConfig, reason?: string) {
365
+ if (this.stopped) {
366
+ debug("peer", `scheduleReconnect(${peer.nodeId}): skipped (stopped)`);
367
+ return;
368
+ }
325
369
  if (this.reconnectTimers.has(peer.nodeId)) return;
326
370
 
327
371
  const attempt = this.reconnectAttempts.get(peer.nodeId) ?? 0;
328
372
  const delay = Math.min(RECONNECT_BASE * 2 ** attempt, RECONNECT_MAX);
329
373
  this.reconnectAttempts.set(peer.nodeId, attempt + 1);
374
+ const tag = reason ? ` reason="${reason}"` : "";
375
+ debug("peer", `scheduleReconnect(${peer.nodeId}): attempt=${attempt} delay=${delay}ms${tag}`);
330
376
 
331
377
  const timer = setTimeout(() => {
332
378
  this.reconnectTimers.delete(peer.nodeId);
@@ -14,7 +14,9 @@ export function createClusterPeersTool(): AnyAgentTool {
14
14
  async execute() {
15
15
  try {
16
16
  const runtime = getClusterRuntime();
17
- const allEntries = runtime.peerManager.router.getAllPeers();
17
+ const localNodeId = runtime.config.nodeId;
18
+ const allEntries = runtime.peerManager.router.getAllPeers()
19
+ .filter((e) => e.nodeId !== localNodeId && e.nodeId !== `${localNodeId}:sentinel`);
18
20
 
19
21
  // Separate sentinel peers from normal peers
20
22
  const sentinelSet = new Set<string>();
@@ -34,6 +36,8 @@ export function createClusterPeersTool(): AnyAgentTool {
34
36
  const sentinelStatus = sentinelEntry
35
37
  ? runtime.peerManager.router.getPeerStatus(sentinelEntry)
36
38
  : undefined;
39
+ const sentinelOnline = sentinelStatus === "direct" || sentinelStatus === "relay";
40
+ const effectiveStatus = status === "unreachable" && sentinelOnline ? "sentinel-only" : status;
37
41
 
38
42
  return {
39
43
  nodeId: entry.nodeId,
@@ -45,11 +49,10 @@ export function createClusterPeersTool(): AnyAgentTool {
45
49
  models: entry.models.map((m) => m.id),
46
50
  tags: entry.tags,
47
51
  tools: entry.toolProxy?.enabled ? (entry.toolProxy.allow ?? []) : [],
48
- status,
52
+ status: effectiveStatus,
49
53
  latencyMs: entry.latencyMs,
50
- // Sentinel info merged into the same row
51
54
  ...(hasSentinel ? {
52
- sentinel: sentinelStatus === "direct" || sentinelStatus === "relay" ? "online" : "offline",
55
+ sentinel: sentinelOnline ? "online" : "offline",
53
56
  } : {}),
54
57
  };
55
58
  });
@@ -61,15 +64,16 @@ export function createClusterPeersTool(): AnyAgentTool {
61
64
  if (peers.some((p) => p.nodeId === mainNodeId)) continue;
62
65
  // Main node is gone, only sentinel remains
63
66
  const sentinelStatus = runtime.peerManager.router.getPeerStatus(entry);
67
+ const sentinelOnline = sentinelStatus === "direct" || sentinelStatus === "relay";
64
68
  peers.push({
65
69
  nodeId: mainNodeId,
66
70
  agents: [],
67
71
  models: [],
68
72
  tags: entry.tags.filter((t) => t !== "sentinel"),
69
73
  tools: [],
70
- status: "unreachable",
74
+ status: sentinelOnline ? "sentinel-only" : "unreachable",
71
75
  latencyMs: entry.latencyMs,
72
- sentinel: sentinelStatus === "direct" || sentinelStatus === "relay" ? "online" : "offline",
76
+ sentinel: sentinelOnline ? "online" : "offline",
73
77
  } as (typeof peers)[number]);
74
78
  }
75
79