clawmatrix 0.2.8 → 0.2.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/acp-proxy.ts +122 -12
- package/src/cluster-service.ts +68 -2
- package/src/config.ts +9 -0
- package/src/file-transfer.ts +671 -0
- package/src/health-tracker.ts +586 -0
- package/src/index.ts +11 -6
- package/src/knowledge-sync.ts +30 -1
- package/src/model-proxy.ts +16 -1
- package/src/peer-manager.ts +28 -4
- package/src/router.ts +25 -0
- package/src/sentinel-manager.ts +8 -7
- package/src/sentinel.ts +29 -7
- package/src/terminal.ts +2 -1
- package/src/tools/cluster-diagnostic.ts +2 -0
- package/src/tools/cluster-transfer.ts +91 -0
- package/src/types.ts +97 -1
- package/src/web.ts +33 -0
|
@@ -0,0 +1,586 @@
|
|
|
1
|
+
import * as Automerge from "@automerge/automerge";
|
|
2
|
+
import path from "node:path";
|
|
3
|
+
import { readFile, writeFile, mkdir } from "node:fs/promises";
|
|
4
|
+
import { homedir, tmpdir } from "node:os";
|
|
5
|
+
|
|
6
|
+
import { debug } from "./debug.ts";
|
|
7
|
+
import type { PeerManager } from "./peer-manager.ts";
|
|
8
|
+
import type { HealthSyncFrame } from "./types.ts";
|
|
9
|
+
|
|
10
|
+
const TAG = "health";
|
|
11
|
+
|
|
12
|
+
/** Retention period for health events (default 90 days). */
|
|
13
|
+
const DEFAULT_RETENTION_MS = 90 * 24 * 60 * 60 * 1000;
|
|
14
|
+
|
|
15
|
+
/** Compact interval: every 24 hours. */
|
|
16
|
+
const COMPACT_INTERVAL = 24 * 60 * 60 * 1000;
|
|
17
|
+
|
|
18
|
+
/** Save debounce interval (5 seconds). */
|
|
19
|
+
const SAVE_DEBOUNCE = 5_000;
|
|
20
|
+
|
|
21
|
+
// ── Document schema ─────────────────────────────────────────────
|
|
22
|
+
|
|
23
|
+
export interface HealthEvent {
|
|
24
|
+
ts: number;
|
|
25
|
+
type: "start" | "stop" | "peer_online" | "peer_offline";
|
|
26
|
+
peer?: string;
|
|
27
|
+
via?: string; // "direct" | "relay"
|
|
28
|
+
reason?: string; // disconnect reason
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
interface NodeHealthEntry {
|
|
32
|
+
events: HealthEvent[];
|
|
33
|
+
lastUpdated: number;
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
export interface HealthDoc {
|
|
37
|
+
nodes: Record<string, NodeHealthEntry>;
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
// ── Timeline aggregation ────────────────────────────────────────
|
|
41
|
+
|
|
42
|
+
export type BucketState = "up" | "degraded" | "down" | "unknown";
|
|
43
|
+
|
|
44
|
+
export interface NodeTimeline {
|
|
45
|
+
nodeId: string;
|
|
46
|
+
firstSeen: number;
|
|
47
|
+
lastSeen: number;
|
|
48
|
+
buckets: BucketState[];
|
|
49
|
+
uptimeRatio: number;
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
export interface AvailabilityResult {
|
|
53
|
+
range: string;
|
|
54
|
+
bucketMinutes: number;
|
|
55
|
+
startTs: number;
|
|
56
|
+
endTs: number;
|
|
57
|
+
nodes: NodeTimeline[];
|
|
58
|
+
/** Time periods when the observing node was down (cannot observe). */
|
|
59
|
+
gaps: Array<[number, number]>;
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
// ── HealthTracker ───────────────────────────────────────────────
|
|
63
|
+
|
|
64
|
+
export interface HealthTrackerOptions {
|
|
65
|
+
nodeId: string;
|
|
66
|
+
peerManager: PeerManager;
|
|
67
|
+
retentionMs?: number;
|
|
68
|
+
/** Override state directory (for tests). */
|
|
69
|
+
stateDir?: string;
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
export class HealthTracker {
|
|
73
|
+
private doc: Automerge.Doc<HealthDoc>;
|
|
74
|
+
private syncStates = new Map<string, Automerge.SyncState>();
|
|
75
|
+
private readonly nodeId: string;
|
|
76
|
+
private readonly peerManager: PeerManager;
|
|
77
|
+
private readonly retentionMs: number;
|
|
78
|
+
private readonly docPath: string;
|
|
79
|
+
private compactTimer: ReturnType<typeof setInterval> | null = null;
|
|
80
|
+
private saveTimer: ReturnType<typeof setTimeout> | null = null;
|
|
81
|
+
private dirty = false;
|
|
82
|
+
|
|
83
|
+
constructor(opts: HealthTrackerOptions) {
|
|
84
|
+
this.nodeId = opts.nodeId;
|
|
85
|
+
this.peerManager = opts.peerManager;
|
|
86
|
+
this.retentionMs = opts.retentionMs ?? DEFAULT_RETENTION_MS;
|
|
87
|
+
|
|
88
|
+
const stateDir = opts.stateDir ?? path.join(homedir() || tmpdir(), ".openclaw", "clawmatrix");
|
|
89
|
+
this.docPath = path.join(stateDir, "health.automerge");
|
|
90
|
+
|
|
91
|
+
// Initialize empty doc (will be replaced by load if file exists)
|
|
92
|
+
this.doc = Automerge.init<HealthDoc>();
|
|
93
|
+
this.doc = Automerge.change(this.doc, (d) => {
|
|
94
|
+
(d as HealthDoc).nodes = {};
|
|
95
|
+
});
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
async start() {
|
|
99
|
+
// Load persisted doc
|
|
100
|
+
await this.load();
|
|
101
|
+
|
|
102
|
+
// Record self start
|
|
103
|
+
this.recordEvent({ ts: Date.now(), type: "start" });
|
|
104
|
+
|
|
105
|
+
// Compact old events on start
|
|
106
|
+
this.compact();
|
|
107
|
+
|
|
108
|
+
// Schedule periodic compact
|
|
109
|
+
this.compactTimer = setInterval(() => this.compact(), COMPACT_INTERVAL);
|
|
110
|
+
|
|
111
|
+
debug(TAG, `health tracker started for node "${this.nodeId}"`);
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
async stop() {
|
|
115
|
+
// Record self stop
|
|
116
|
+
this.recordEvent({ ts: Date.now(), type: "stop" });
|
|
117
|
+
|
|
118
|
+
if (this.compactTimer) {
|
|
119
|
+
clearInterval(this.compactTimer);
|
|
120
|
+
this.compactTimer = null;
|
|
121
|
+
}
|
|
122
|
+
if (this.saveTimer) {
|
|
123
|
+
clearTimeout(this.saveTimer);
|
|
124
|
+
this.saveTimer = null;
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
// Final save
|
|
128
|
+
await this.save();
|
|
129
|
+
debug(TAG, "health tracker stopped");
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
// ── Event recording ─────────────────────────────────────────
|
|
133
|
+
|
|
134
|
+
recordEvent(event: HealthEvent) {
|
|
135
|
+
// Strip undefined values — Automerge rejects them
|
|
136
|
+
const clean: Record<string, unknown> = {};
|
|
137
|
+
for (const [k, v] of Object.entries(event)) {
|
|
138
|
+
if (v !== undefined) clean[k] = v;
|
|
139
|
+
}
|
|
140
|
+
this.doc = Automerge.change(this.doc, (d) => {
|
|
141
|
+
if (!d.nodes[this.nodeId]) {
|
|
142
|
+
d.nodes[this.nodeId] = { events: [], lastUpdated: 0 };
|
|
143
|
+
}
|
|
144
|
+
const entry = d.nodes[this.nodeId]!;
|
|
145
|
+
entry.events.push(clean as HealthEvent);
|
|
146
|
+
entry.lastUpdated = Date.now();
|
|
147
|
+
});
|
|
148
|
+
this.scheduleSave();
|
|
149
|
+
this.broadcastSync();
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
recordPeerOnline(peerId: string, via: "direct" | "relay") {
|
|
153
|
+
this.recordEvent({ ts: Date.now(), type: "peer_online", peer: peerId, via });
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
recordPeerOffline(peerId: string, reason?: string) {
|
|
157
|
+
this.recordEvent({ ts: Date.now(), type: "peer_offline", peer: peerId, reason });
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
// ── Sync protocol ──────────────────────────────────────────
|
|
161
|
+
|
|
162
|
+
/** Handle incoming health_sync frame from a peer. */
|
|
163
|
+
handleSyncMessage(frame: HealthSyncFrame) {
|
|
164
|
+
const peerId = frame.from;
|
|
165
|
+
const message = new Uint8Array(Buffer.from(frame.payload.data, "base64"));
|
|
166
|
+
const syncKey = peerId;
|
|
167
|
+
|
|
168
|
+
try {
|
|
169
|
+
const syncState = this.syncStates.get(syncKey) ?? Automerge.initSyncState();
|
|
170
|
+
const [newDoc, newSyncState] = Automerge.receiveSyncMessage(this.doc, syncState, message);
|
|
171
|
+
this.doc = newDoc;
|
|
172
|
+
this.syncStates.set(syncKey, newSyncState);
|
|
173
|
+
this.scheduleSave();
|
|
174
|
+
|
|
175
|
+
// Send our response
|
|
176
|
+
this.sendSyncMessage(peerId);
|
|
177
|
+
} catch (err) {
|
|
178
|
+
debug(TAG, `error handling sync from ${peerId}: ${err}`);
|
|
179
|
+
}
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
/** Initiate sync with a peer (called on peer connect). */
|
|
183
|
+
initPeerSync(peerId: string) {
|
|
184
|
+
if (peerId === this.nodeId) return;
|
|
185
|
+
this.syncStates.set(peerId, Automerge.initSyncState());
|
|
186
|
+
this.sendSyncMessage(peerId);
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
/** Clean up sync state for a disconnected peer. */
|
|
190
|
+
removePeerSync(peerId: string) {
|
|
191
|
+
this.syncStates.delete(peerId);
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
private sendSyncMessage(peerId: string) {
|
|
195
|
+
const syncState = this.syncStates.get(peerId) ?? Automerge.initSyncState();
|
|
196
|
+
const [newSyncState, message] = Automerge.generateSyncMessage(this.doc, syncState);
|
|
197
|
+
this.syncStates.set(peerId, newSyncState);
|
|
198
|
+
|
|
199
|
+
if (!message) return;
|
|
200
|
+
|
|
201
|
+
debug(TAG, `sending health sync to ${peerId} (${message.byteLength} bytes)`);
|
|
202
|
+
|
|
203
|
+
const frame: HealthSyncFrame = {
|
|
204
|
+
type: "health_sync",
|
|
205
|
+
from: this.nodeId,
|
|
206
|
+
to: peerId,
|
|
207
|
+
timestamp: Date.now(),
|
|
208
|
+
payload: {
|
|
209
|
+
data: Buffer.from(message).toString("base64"),
|
|
210
|
+
},
|
|
211
|
+
};
|
|
212
|
+
|
|
213
|
+
this.peerManager.router.sendTo(peerId, frame);
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
private broadcastSync() {
|
|
217
|
+
const peers = this.peerManager.router.getAllPeers();
|
|
218
|
+
for (const peer of peers) {
|
|
219
|
+
this.sendSyncMessage(peer.nodeId);
|
|
220
|
+
}
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
// ── Timeline aggregation ──────────────────────────────────
|
|
224
|
+
|
|
225
|
+
/**
|
|
226
|
+
* Build availability timeline for all known nodes.
|
|
227
|
+
* @param range - "24h", "7d", or "90d"
|
|
228
|
+
*/
|
|
229
|
+
getAvailability(range: "24h" | "7d" | "90d" = "24h"): AvailabilityResult {
|
|
230
|
+
const now = Date.now();
|
|
231
|
+
let durationMs: number;
|
|
232
|
+
let bucketMinutes: number;
|
|
233
|
+
|
|
234
|
+
switch (range) {
|
|
235
|
+
case "24h":
|
|
236
|
+
durationMs = 24 * 60 * 60 * 1000;
|
|
237
|
+
bucketMinutes = 30;
|
|
238
|
+
break;
|
|
239
|
+
case "7d":
|
|
240
|
+
durationMs = 7 * 24 * 60 * 60 * 1000;
|
|
241
|
+
bucketMinutes = 60 * 4; // 4-hour buckets
|
|
242
|
+
break;
|
|
243
|
+
case "90d":
|
|
244
|
+
durationMs = 90 * 24 * 60 * 60 * 1000;
|
|
245
|
+
bucketMinutes = 60 * 24; // 1-day buckets
|
|
246
|
+
break;
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
const startTs = now - durationMs;
|
|
250
|
+
const endTs = now;
|
|
251
|
+
const bucketMs = bucketMinutes * 60 * 1000;
|
|
252
|
+
const bucketCount = Math.ceil(durationMs / bucketMs);
|
|
253
|
+
|
|
254
|
+
// Find observation gaps (periods where THIS node was down)
|
|
255
|
+
const gaps = this.getObservationGaps(this.nodeId, startTs, endTs);
|
|
256
|
+
|
|
257
|
+
// Build timeline for each node (including self)
|
|
258
|
+
const nodes: NodeTimeline[] = [];
|
|
259
|
+
|
|
260
|
+
for (const [nodeId, entry] of Object.entries(this.doc.nodes)) {
|
|
261
|
+
// For self, use start/stop events to determine uptime
|
|
262
|
+
// For other nodes, use peer_online/peer_offline from the observing node
|
|
263
|
+
const timeline = this.buildNodeTimeline(
|
|
264
|
+
nodeId,
|
|
265
|
+
entry,
|
|
266
|
+
startTs,
|
|
267
|
+
endTs,
|
|
268
|
+
bucketMs,
|
|
269
|
+
bucketCount,
|
|
270
|
+
gaps,
|
|
271
|
+
);
|
|
272
|
+
if (timeline) nodes.push(timeline);
|
|
273
|
+
}
|
|
274
|
+
|
|
275
|
+
return {
|
|
276
|
+
range,
|
|
277
|
+
bucketMinutes,
|
|
278
|
+
startTs,
|
|
279
|
+
endTs,
|
|
280
|
+
nodes,
|
|
281
|
+
gaps,
|
|
282
|
+
};
|
|
283
|
+
}
|
|
284
|
+
|
|
285
|
+
private buildNodeTimeline(
|
|
286
|
+
nodeId: string,
|
|
287
|
+
entry: NodeHealthEntry,
|
|
288
|
+
startTs: number,
|
|
289
|
+
endTs: number,
|
|
290
|
+
bucketMs: number,
|
|
291
|
+
bucketCount: number,
|
|
292
|
+
observerGaps: Array<[number, number]>,
|
|
293
|
+
): NodeTimeline | null {
|
|
294
|
+
const events = [...entry.events].sort((a, b) => a.ts - b.ts);
|
|
295
|
+
if (events.length === 0) return null;
|
|
296
|
+
|
|
297
|
+
const firstSeen = events[0]!.ts;
|
|
298
|
+
const lastSeen = entry.lastUpdated;
|
|
299
|
+
|
|
300
|
+
// Build online intervals for this node
|
|
301
|
+
const intervals = this.buildOnlineIntervals(nodeId, events, startTs, endTs);
|
|
302
|
+
|
|
303
|
+
// Calculate per-bucket state
|
|
304
|
+
const buckets: BucketState[] = [];
|
|
305
|
+
let totalOnline = 0;
|
|
306
|
+
let totalObservable = 0;
|
|
307
|
+
|
|
308
|
+
for (let i = 0; i < bucketCount; i++) {
|
|
309
|
+
const bStart = startTs + i * bucketMs;
|
|
310
|
+
const bEnd = Math.min(bStart + bucketMs, endTs);
|
|
311
|
+
|
|
312
|
+
// How much of this bucket is observable (subtract observer gaps)
|
|
313
|
+
const observableMs = this.observableTimeInRange(bStart, bEnd, observerGaps);
|
|
314
|
+
|
|
315
|
+
if (observableMs === 0) {
|
|
316
|
+
buckets.push("unknown");
|
|
317
|
+
continue;
|
|
318
|
+
}
|
|
319
|
+
|
|
320
|
+
// How much of this bucket the node was online
|
|
321
|
+
const onlineMs = this.overlapMs(intervals, bStart, bEnd);
|
|
322
|
+
const ratio = onlineMs / observableMs;
|
|
323
|
+
|
|
324
|
+
totalOnline += onlineMs;
|
|
325
|
+
totalObservable += observableMs;
|
|
326
|
+
|
|
327
|
+
if (ratio >= 0.95) buckets.push("up");
|
|
328
|
+
else if (ratio >= 0.05) buckets.push("degraded");
|
|
329
|
+
else buckets.push("down");
|
|
330
|
+
}
|
|
331
|
+
|
|
332
|
+
const uptimeRatio = totalObservable > 0 ? totalOnline / totalObservable : 0;
|
|
333
|
+
|
|
334
|
+
return { nodeId, firstSeen, lastSeen, buckets, uptimeRatio };
|
|
335
|
+
}
|
|
336
|
+
|
|
337
|
+
/**
|
|
338
|
+
* Build online intervals for a node.
|
|
339
|
+
* - For self node: uses start/stop events
|
|
340
|
+
* - For other nodes: uses peer_online/peer_offline events from all observers
|
|
341
|
+
*/
|
|
342
|
+
private buildOnlineIntervals(
|
|
343
|
+
nodeId: string,
|
|
344
|
+
events: HealthEvent[],
|
|
345
|
+
startTs: number,
|
|
346
|
+
endTs: number,
|
|
347
|
+
): Array<[number, number]> {
|
|
348
|
+
const intervals: Array<[number, number]> = [];
|
|
349
|
+
|
|
350
|
+
if (nodeId === this.nodeId) {
|
|
351
|
+
// Self: start/stop events define uptime
|
|
352
|
+
// But we're looking at all nodes' data, so check if this nodeId
|
|
353
|
+
// has start/stop events (each node writes its own start/stop)
|
|
354
|
+
return this.buildSelfIntervals(events, startTs, endTs);
|
|
355
|
+
}
|
|
356
|
+
|
|
357
|
+
// For remote nodes: gather peer_online/peer_offline events from all observer nodes
|
|
358
|
+
// We have the CRDT doc with all nodes' events merged
|
|
359
|
+
// Look through ALL nodes' events for peer_online/peer_offline referencing this nodeId
|
|
360
|
+
return this.buildPeerIntervals(nodeId, startTs, endTs);
|
|
361
|
+
}
|
|
362
|
+
|
|
363
|
+
private buildSelfIntervals(
|
|
364
|
+
events: HealthEvent[],
|
|
365
|
+
startTs: number,
|
|
366
|
+
endTs: number,
|
|
367
|
+
): Array<[number, number]> {
|
|
368
|
+
const intervals: Array<[number, number]> = [];
|
|
369
|
+
let onlineSince: number | null = null;
|
|
370
|
+
|
|
371
|
+
for (const ev of events) {
|
|
372
|
+
if (ev.ts < startTs) {
|
|
373
|
+
// Track state before window
|
|
374
|
+
if (ev.type === "start") onlineSince = ev.ts;
|
|
375
|
+
else if (ev.type === "stop") onlineSince = null;
|
|
376
|
+
continue;
|
|
377
|
+
}
|
|
378
|
+
if (ev.ts > endTs) break;
|
|
379
|
+
|
|
380
|
+
if (ev.type === "start") {
|
|
381
|
+
onlineSince = ev.ts;
|
|
382
|
+
} else if (ev.type === "stop" && onlineSince !== null) {
|
|
383
|
+
intervals.push([Math.max(onlineSince, startTs), ev.ts]);
|
|
384
|
+
onlineSince = null;
|
|
385
|
+
}
|
|
386
|
+
}
|
|
387
|
+
|
|
388
|
+
// If still online at end of window
|
|
389
|
+
if (onlineSince !== null) {
|
|
390
|
+
intervals.push([Math.max(onlineSince, startTs), endTs]);
|
|
391
|
+
}
|
|
392
|
+
|
|
393
|
+
return intervals;
|
|
394
|
+
}
|
|
395
|
+
|
|
396
|
+
private buildPeerIntervals(
|
|
397
|
+
targetNodeId: string,
|
|
398
|
+
startTs: number,
|
|
399
|
+
endTs: number,
|
|
400
|
+
): Array<[number, number]> {
|
|
401
|
+
const intervals: Array<[number, number]> = [];
|
|
402
|
+
|
|
403
|
+
// Collect peer_online/peer_offline events from all observer nodes
|
|
404
|
+
const relevantEvents: HealthEvent[] = [];
|
|
405
|
+
for (const [, entry] of Object.entries(this.doc.nodes)) {
|
|
406
|
+
for (const ev of entry.events) {
|
|
407
|
+
if (ev.peer === targetNodeId && (ev.type === "peer_online" || ev.type === "peer_offline")) {
|
|
408
|
+
relevantEvents.push(ev);
|
|
409
|
+
}
|
|
410
|
+
}
|
|
411
|
+
}
|
|
412
|
+
relevantEvents.sort((a, b) => a.ts - b.ts);
|
|
413
|
+
|
|
414
|
+
let onlineSince: number | null = null;
|
|
415
|
+
|
|
416
|
+
for (const ev of relevantEvents) {
|
|
417
|
+
if (ev.ts < startTs) {
|
|
418
|
+
if (ev.type === "peer_online") onlineSince = ev.ts;
|
|
419
|
+
else if (ev.type === "peer_offline") onlineSince = null;
|
|
420
|
+
continue;
|
|
421
|
+
}
|
|
422
|
+
if (ev.ts > endTs) break;
|
|
423
|
+
|
|
424
|
+
if (ev.type === "peer_online") {
|
|
425
|
+
if (onlineSince === null) onlineSince = ev.ts;
|
|
426
|
+
} else if (ev.type === "peer_offline" && onlineSince !== null) {
|
|
427
|
+
intervals.push([Math.max(onlineSince, startTs), ev.ts]);
|
|
428
|
+
onlineSince = null;
|
|
429
|
+
}
|
|
430
|
+
}
|
|
431
|
+
|
|
432
|
+
if (onlineSince !== null) {
|
|
433
|
+
intervals.push([Math.max(onlineSince, startTs), endTs]);
|
|
434
|
+
}
|
|
435
|
+
|
|
436
|
+
return intervals;
|
|
437
|
+
}
|
|
438
|
+
|
|
439
|
+
/** Get observation gaps: periods when the local node was not running. */
|
|
440
|
+
private getObservationGaps(
|
|
441
|
+
nodeId: string,
|
|
442
|
+
startTs: number,
|
|
443
|
+
endTs: number,
|
|
444
|
+
): Array<[number, number]> {
|
|
445
|
+
const entry = this.doc.nodes[nodeId];
|
|
446
|
+
if (!entry) return [[startTs, endTs]]; // no data = entire range is a gap
|
|
447
|
+
|
|
448
|
+
const selfIntervals = this.buildSelfIntervals(
|
|
449
|
+
[...entry.events].sort((a, b) => a.ts - b.ts),
|
|
450
|
+
startTs,
|
|
451
|
+
endTs,
|
|
452
|
+
);
|
|
453
|
+
|
|
454
|
+
// Gaps are the complement of self intervals within [startTs, endTs]
|
|
455
|
+
const gaps: Array<[number, number]> = [];
|
|
456
|
+
let cursor = startTs;
|
|
457
|
+
|
|
458
|
+
for (const [start, end] of selfIntervals) {
|
|
459
|
+
if (start > cursor) {
|
|
460
|
+
gaps.push([cursor, start]);
|
|
461
|
+
}
|
|
462
|
+
cursor = Math.max(cursor, end);
|
|
463
|
+
}
|
|
464
|
+
|
|
465
|
+
if (cursor < endTs) {
|
|
466
|
+
gaps.push([cursor, endTs]);
|
|
467
|
+
}
|
|
468
|
+
|
|
469
|
+
return gaps;
|
|
470
|
+
}
|
|
471
|
+
|
|
472
|
+
/** Calculate observable time in a range, excluding gaps. */
|
|
473
|
+
private observableTimeInRange(
|
|
474
|
+
start: number,
|
|
475
|
+
end: number,
|
|
476
|
+
gaps: Array<[number, number]>,
|
|
477
|
+
): number {
|
|
478
|
+
let total = end - start;
|
|
479
|
+
for (const [gStart, gEnd] of gaps) {
|
|
480
|
+
const overlapStart = Math.max(start, gStart);
|
|
481
|
+
const overlapEnd = Math.min(end, gEnd);
|
|
482
|
+
if (overlapStart < overlapEnd) {
|
|
483
|
+
total -= overlapEnd - overlapStart;
|
|
484
|
+
}
|
|
485
|
+
}
|
|
486
|
+
return Math.max(0, total);
|
|
487
|
+
}
|
|
488
|
+
|
|
489
|
+
/** Calculate total overlap between intervals and a range. */
|
|
490
|
+
private overlapMs(intervals: Array<[number, number]>, start: number, end: number): number {
|
|
491
|
+
let total = 0;
|
|
492
|
+
for (const [iStart, iEnd] of intervals) {
|
|
493
|
+
const overlapStart = Math.max(start, iStart);
|
|
494
|
+
const overlapEnd = Math.min(end, iEnd);
|
|
495
|
+
if (overlapStart < overlapEnd) {
|
|
496
|
+
total += overlapEnd - overlapStart;
|
|
497
|
+
}
|
|
498
|
+
}
|
|
499
|
+
return total;
|
|
500
|
+
}
|
|
501
|
+
|
|
502
|
+
// ── Compact ───────────────────────────────────────────────
|
|
503
|
+
|
|
504
|
+
private compact() {
|
|
505
|
+
const cutoff = Date.now() - this.retentionMs;
|
|
506
|
+
let pruned = 0;
|
|
507
|
+
|
|
508
|
+
this.doc = Automerge.change(this.doc, (d) => {
|
|
509
|
+
for (const [, node] of Object.entries(d.nodes)) {
|
|
510
|
+
const before = node.events.length;
|
|
511
|
+
// Keep events newer than cutoff; also keep the last event before cutoff
|
|
512
|
+
// to preserve state continuity
|
|
513
|
+
let lastBeforeCutoff = -1;
|
|
514
|
+
for (let i = 0; i < node.events.length; i++) {
|
|
515
|
+
if (node.events[i]!.ts < cutoff) lastBeforeCutoff = i;
|
|
516
|
+
}
|
|
517
|
+
if (lastBeforeCutoff > 0) {
|
|
518
|
+
// Remove all events before the last one before cutoff
|
|
519
|
+
node.events.splice(0, lastBeforeCutoff);
|
|
520
|
+
pruned += before - node.events.length;
|
|
521
|
+
}
|
|
522
|
+
}
|
|
523
|
+
});
|
|
524
|
+
|
|
525
|
+
if (pruned > 0) {
|
|
526
|
+
debug(TAG, `compacted ${pruned} old events`);
|
|
527
|
+
// Re-save to discard old ops
|
|
528
|
+
this.recompact();
|
|
529
|
+
}
|
|
530
|
+
}
|
|
531
|
+
|
|
532
|
+
/** Re-serialize to discard Automerge op history for removed data. */
|
|
533
|
+
private recompact() {
|
|
534
|
+
const bytes = Automerge.save(this.doc);
|
|
535
|
+
this.doc = Automerge.load<HealthDoc>(bytes);
|
|
536
|
+
this.scheduleSave();
|
|
537
|
+
}
|
|
538
|
+
|
|
539
|
+
// ── Persistence ───────────────────────────────────────────
|
|
540
|
+
|
|
541
|
+
private async load() {
|
|
542
|
+
try {
|
|
543
|
+
const data = await readFile(this.docPath);
|
|
544
|
+
this.doc = Automerge.load<HealthDoc>(new Uint8Array(data));
|
|
545
|
+
debug(TAG, `loaded health doc from ${this.docPath}`);
|
|
546
|
+
} catch {
|
|
547
|
+
debug(TAG, "no existing health doc, starting fresh");
|
|
548
|
+
}
|
|
549
|
+
}
|
|
550
|
+
|
|
551
|
+
private async save() {
|
|
552
|
+
try {
|
|
553
|
+
const data = Automerge.save(this.doc);
|
|
554
|
+
await mkdir(path.dirname(this.docPath), { recursive: true });
|
|
555
|
+
await writeFile(this.docPath, Buffer.from(data));
|
|
556
|
+
} catch (err) {
|
|
557
|
+
debug(TAG, `failed to save health doc: ${err}`);
|
|
558
|
+
}
|
|
559
|
+
}
|
|
560
|
+
|
|
561
|
+
private scheduleSave() {
|
|
562
|
+
this.dirty = true;
|
|
563
|
+
if (this.saveTimer) return;
|
|
564
|
+
this.saveTimer = setTimeout(() => {
|
|
565
|
+
this.saveTimer = null;
|
|
566
|
+
if (this.dirty) {
|
|
567
|
+
this.dirty = false;
|
|
568
|
+
this.save().catch((err) => {
|
|
569
|
+
debug(TAG, `deferred save error: ${err}`);
|
|
570
|
+
});
|
|
571
|
+
}
|
|
572
|
+
}, SAVE_DEBOUNCE);
|
|
573
|
+
}
|
|
574
|
+
|
|
575
|
+
// ── Public accessors ──────────────────────────────────────
|
|
576
|
+
|
|
577
|
+
/** Get all known node IDs (including offline ones). */
|
|
578
|
+
getKnownNodes(): string[] {
|
|
579
|
+
return Object.keys(this.doc.nodes);
|
|
580
|
+
}
|
|
581
|
+
|
|
582
|
+
/** Get raw events for a specific node. */
|
|
583
|
+
getNodeEvents(nodeId: string): HealthEvent[] {
|
|
584
|
+
return [...(this.doc.nodes[nodeId]?.events ?? [])];
|
|
585
|
+
}
|
|
586
|
+
}
|
package/src/index.ts
CHANGED
|
@@ -16,6 +16,7 @@ import { createClusterDiagnosticTool } from "./tools/cluster-diagnostic.ts";
|
|
|
16
16
|
import { createClusterAcpTool } from "./tools/cluster-acp.ts";
|
|
17
17
|
import { createClusterTerminalTool } from "./tools/cluster-terminal.ts";
|
|
18
18
|
import { createClusterToolInvokeTool } from "./tools/cluster-tool.ts";
|
|
19
|
+
import { createClusterTransferTool } from "./tools/cluster-transfer.ts";
|
|
19
20
|
import { registerClusterCli } from "./cli.ts";
|
|
20
21
|
import { spawnProcess } from "./compat.ts";
|
|
21
22
|
|
|
@@ -302,6 +303,7 @@ const plugin = {
|
|
|
302
303
|
api.registerTool(createClusterAcpTool(), { optional: true });
|
|
303
304
|
api.registerTool(createClusterTerminalTool(), { optional: true });
|
|
304
305
|
api.registerTool(createClusterToolInvokeTool(), { optional: true });
|
|
306
|
+
api.registerTool(createClusterTransferTool(), { optional: true });
|
|
305
307
|
|
|
306
308
|
// Wire up peer approval with OpenClaw channel API
|
|
307
309
|
if (config.peerApproval.enabled) {
|
|
@@ -330,20 +332,23 @@ const plugin = {
|
|
|
330
332
|
|
|
331
333
|
const proc = spawnProcess(
|
|
332
334
|
["openclaw", "gateway", "call", "send", "--json", "--params", JSON.stringify(sendParams)],
|
|
333
|
-
{ stdout: "
|
|
335
|
+
{ stdout: "ignore", stderr: "pipe" },
|
|
334
336
|
);
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
const
|
|
337
|
+
// Collect stderr concurrently with waiting for exit to avoid pipe deadlock
|
|
338
|
+
const stderrPromise = (async () => {
|
|
339
|
+
const chunks: Uint8Array[] = [];
|
|
338
340
|
if (proc.stderr) {
|
|
339
341
|
const reader = proc.stderr.getReader();
|
|
340
342
|
while (true) {
|
|
341
343
|
const { done, value } = await reader.read();
|
|
342
344
|
if (done) break;
|
|
343
|
-
|
|
345
|
+
chunks.push(value);
|
|
344
346
|
}
|
|
345
347
|
}
|
|
346
|
-
|
|
348
|
+
return Buffer.concat(chunks).toString("utf-8").trim();
|
|
349
|
+
})();
|
|
350
|
+
const [code, errMsg] = await Promise.all([proc.exited, stderrPromise]);
|
|
351
|
+
if (code !== 0) {
|
|
347
352
|
throw new Error(`gateway send failed (exit ${code}): ${errMsg}`);
|
|
348
353
|
}
|
|
349
354
|
});
|
package/src/knowledge-sync.ts
CHANGED
|
@@ -128,6 +128,8 @@ export class KnowledgeSync {
|
|
|
128
128
|
// ── FS / Watcher ───────────────────────────────────────────────
|
|
129
129
|
private watcher: FSWatcher | null = null;
|
|
130
130
|
private debounceTimer: ReturnType<typeof setTimeout> | null = null;
|
|
131
|
+
private localChangesRunning = false;
|
|
132
|
+
private localChangesQueued = false;
|
|
131
133
|
/** Paths recently written by exportFileToFs — suppress watcher re-trigger. Stores {content, timestamp}. */
|
|
132
134
|
private writtenByExport = new Map<string, { content: string; ts: number }>();
|
|
133
135
|
/** Deferred git commit timer — batches multiple remote syncs into one commit. */
|
|
@@ -474,6 +476,27 @@ export class KnowledgeSync {
|
|
|
474
476
|
}
|
|
475
477
|
|
|
476
478
|
private async handleLocalChanges() {
|
|
479
|
+
// Mutex: if already running, mark queued and return — the running
|
|
480
|
+
// invocation will re-run when it finishes to pick up new changes.
|
|
481
|
+
if (this.localChangesRunning) {
|
|
482
|
+
this.localChangesQueued = true;
|
|
483
|
+
return;
|
|
484
|
+
}
|
|
485
|
+
this.localChangesRunning = true;
|
|
486
|
+
try {
|
|
487
|
+
await this.handleLocalChangesInner();
|
|
488
|
+
} finally {
|
|
489
|
+
this.localChangesRunning = false;
|
|
490
|
+
if (this.localChangesQueued) {
|
|
491
|
+
this.localChangesQueued = false;
|
|
492
|
+
this.handleLocalChanges().catch((err) => {
|
|
493
|
+
debug(TAG, `queued local change handling error: ${err}`);
|
|
494
|
+
});
|
|
495
|
+
}
|
|
496
|
+
}
|
|
497
|
+
}
|
|
498
|
+
|
|
499
|
+
private async handleLocalChangesInner() {
|
|
477
500
|
// Only process files in pendingChanges (incremental)
|
|
478
501
|
const changesToProcess = new Set(this.pendingChanges);
|
|
479
502
|
this.pendingChanges.clear();
|
|
@@ -802,7 +825,13 @@ export class KnowledgeSync {
|
|
|
802
825
|
if (this.isIgnored(relPath)) return;
|
|
803
826
|
|
|
804
827
|
const content = doc.content ?? "";
|
|
805
|
-
const absPath = path.
|
|
828
|
+
const absPath = path.resolve(this.opts.workspacePath, relPath);
|
|
829
|
+
|
|
830
|
+
// Prevent path traversal (e.g. relPath = "../../etc/passwd")
|
|
831
|
+
if (!absPath.startsWith(this.opts.workspacePath + path.sep) && absPath !== this.opts.workspacePath) {
|
|
832
|
+
debug(TAG, `blocked path traversal attempt: ${relPath}`);
|
|
833
|
+
return;
|
|
834
|
+
}
|
|
806
835
|
|
|
807
836
|
let currentContent: string | null = null;
|
|
808
837
|
try {
|