clawmatrix 0.4.1 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -49,7 +49,9 @@ const TAG = "knowledge";
49
49
  const REGISTRY_DOC_ID = "";
50
50
  const SYNC_CONFIG_FILE = ".clawmatrix.sync";
51
51
  /** TTL for writtenByExport entries (ms). Stale entries are cleaned up to prevent leaks. */
52
- const EXPORT_MARKER_TTL = 30_000;
52
+ const EXPORT_MARKER_TTL = 60_000;
53
+ /** Interval for periodic writtenByExport cleanup (ms). */
54
+ const EXPORT_CLEANUP_INTERVAL = 60_000;
53
55
  /** Max concurrent file I/O operations during walkDir / export. */
54
56
  const MAX_IO_CONCURRENCY = 32;
55
57
  /** Delay before batched git commit (ms). */
@@ -73,14 +75,20 @@ function legacyDocFileName(relPath: string): string {
73
75
  return relPath.replaceAll("/", "--") + ".automerge";
74
76
  }
75
77
 
76
- /** Run async tasks with bounded concurrency. */
77
- async function pMap<T, R>(items: T[], fn: (item: T) => Promise<R>, concurrency: number): Promise<R[]> {
78
- const results: R[] = new Array(items.length);
78
+ /** Run async tasks with bounded concurrency. Individual errors are caught
79
+ * and logged so that one failing item does not abort the entire batch. */
80
+ async function pMap<T, R>(items: T[], fn: (item: T) => Promise<R>, concurrency: number): Promise<(R | undefined)[]> {
81
+ const results: (R | undefined)[] = new Array(items.length);
79
82
  let i = 0;
80
83
  async function worker() {
81
84
  while (i < items.length) {
82
85
  const idx = i++;
83
- results[idx] = await fn(items[idx]);
86
+ try {
87
+ results[idx] = await fn(items[idx]);
88
+ } catch (err) {
89
+ debug(TAG, `pMap item ${idx} failed: ${err}`);
90
+ results[idx] = undefined;
91
+ }
84
92
  }
85
93
  }
86
94
  await Promise.all(Array.from({ length: Math.min(concurrency, items.length) }, () => worker()));
@@ -114,16 +122,27 @@ async function streamToString(stream: ReadableStream | null): Promise<string> {
114
122
  }
115
123
 
116
124
  /** Update FileDoc content using Automerge.updateText for character-level CRDT merging. */
117
- function changeFileContent(doc: Automerge.Doc<FileDoc>, content: string): Automerge.Doc<FileDoc> {
125
+ function changeFileContent(doc: Automerge.Doc<FileDoc>, content: string, attribution?: string): Automerge.Doc<FileDoc> {
126
+ const changeOpts = attribution ? { message: attribution } : undefined;
118
127
  const currentContent = doc.content;
119
128
  // If content field doesn't exist yet (new doc), initialize it first
120
129
  if (currentContent === undefined) {
130
+ if (changeOpts) {
131
+ return Automerge.change(doc, changeOpts, (d) => {
132
+ (d as FileDoc).content = content;
133
+ });
134
+ }
121
135
  return Automerge.change(doc, (d) => {
122
136
  (d as FileDoc).content = content;
123
137
  });
124
138
  }
125
139
  // Use updateText for minimal diff — enables proper concurrent merge
126
140
  if (currentContent === content) return doc;
141
+ if (changeOpts) {
142
+ return Automerge.change(doc, changeOpts, (d) => {
143
+ Automerge.updateText(d, ["content"], content);
144
+ });
145
+ }
127
146
  return Automerge.change(doc, (d) => {
128
147
  Automerge.updateText(d, ["content"], content);
129
148
  });
@@ -149,6 +168,10 @@ export class KnowledgeSync {
149
168
  /** Deferred git commit timer — batches multiple remote syncs into one commit. */
150
169
  private gitCommitTimer: ReturnType<typeof setTimeout> | null = null;
151
170
  private pendingGitSources = new Set<string>();
171
+ /** Periodic cleanup interval for stale writtenByExport entries. */
172
+ private exportCleanupTimer: ReturnType<typeof setInterval> | null = null;
173
+ /** Pending file attribution from tool hooks — consumed by handleLocalChangesInner. */
174
+ private pendingAttribution = new Map<string, { nodeId: string; agentId: string; agentType?: string; sessionKey?: string }>();
152
175
 
153
176
  // ── Paths ──────────────────────────────────────────────────────
154
177
  private registryPath: string;
@@ -251,6 +274,12 @@ export class KnowledgeSync {
251
274
  this.scheduleSync();
252
275
  });
253
276
 
277
+ // Periodic cleanup of stale export markers to prevent memory leaks
278
+ // when watcher events are missed or debounced away.
279
+ this.exportCleanupTimer = setInterval(() => {
280
+ this.cleanupStaleExportMarkers();
281
+ }, EXPORT_CLEANUP_INTERVAL);
282
+
254
283
  debug(TAG, "file watcher started");
255
284
  }
256
285
 
@@ -260,6 +289,10 @@ export class KnowledgeSync {
260
289
  clearTimeout(this.debounceTimer);
261
290
  this.debounceTimer = null;
262
291
  }
292
+ if (this.exportCleanupTimer) {
293
+ clearInterval(this.exportCleanupTimer);
294
+ this.exportCleanupTimer = null;
295
+ }
263
296
  this.watcher?.close();
264
297
  this.watcher = null;
265
298
  await this.flushPendingGitCommit();
@@ -418,6 +451,117 @@ export class KnowledgeSync {
418
451
  return this.matcher(relPath);
419
452
  }
420
453
 
454
+ /** Get change history for a synced file, with attribution. */
455
+ getFileHistory(relPath: string): Array<{ timestamp: number; message: string; actor: string }> {
456
+ const doc = this.fileDocs.get(relPath);
457
+ if (!doc) return [];
458
+
459
+ try {
460
+ const history = Automerge.getHistory(doc);
461
+ return history.map((state) => {
462
+ // Automerge time is in seconds; convert to ms if it looks like seconds
463
+ let ts = state.change.time ?? 0;
464
+ if (ts > 0 && ts < 1e12) ts *= 1000;
465
+ return {
466
+ timestamp: ts,
467
+ message: state.change.message ?? "",
468
+ actor: state.change.actor ?? "",
469
+ };
470
+ });
471
+ } catch {
472
+ return [];
473
+ }
474
+ }
475
+
476
+ /**
477
+ * Get per-line blame for a synced file.
478
+ * Returns an attribution entry for each line of the current content,
479
+ * indicating which change (and thus which node/agent) last modified it.
480
+ */
481
+ getFileBlame(relPath: string): Array<{ line: number; timestamp: number; message: string; actor: string }> | null {
482
+ const doc = this.fileDocs.get(relPath);
483
+ if (!doc || !doc.content) return null;
484
+
485
+ try {
486
+ const history = Automerge.getHistory(doc);
487
+ if (history.length === 0) return null;
488
+
489
+ const currentLines = doc.content.split("\n");
490
+
491
+ // Build snapshots array with metadata (reverse order for backward walk)
492
+ const snapshots: Array<{ lines: Set<string>; ts: number; message: string; actor: string }> = [];
493
+ for (const state of history) {
494
+ let ts = state.change.time ?? 0;
495
+ if (ts > 0 && ts < 1e12) ts *= 1000;
496
+ const content = state.snapshot.content ?? "";
497
+ // Store unique line contents present in each snapshot
498
+ const lineSet = new Set<string>();
499
+ for (const line of content.split("\n")) lineSet.add(line);
500
+ snapshots.push({ lines: lineSet, ts, message: state.change.message ?? "", actor: state.change.actor ?? "" });
501
+ }
502
+
503
+ // For each line in current content, walk backward through history.
504
+ // The line is attributed to the LATEST change whose snapshot contains it,
505
+ // but the PREVIOUS snapshot does NOT. This handles insertions correctly.
506
+ const blame: Array<{ line: number; timestamp: number; message: string; actor: string }> = [];
507
+
508
+ for (let i = 0; i < currentLines.length; i++) {
509
+ const lineContent = currentLines[i];
510
+ let attributed = false;
511
+
512
+ // Walk backward through history
513
+ for (let h = snapshots.length - 1; h >= 0; h--) {
514
+ const snap = snapshots[h];
515
+ const prevSnap = h > 0 ? snapshots[h - 1] : null;
516
+
517
+ // This snapshot has the line, but previous doesn't → this change introduced it
518
+ if (snap.lines.has(lineContent) && (!prevSnap || !prevSnap.lines.has(lineContent))) {
519
+ blame.push({ line: i + 1, timestamp: snap.ts, message: snap.message, actor: snap.actor });
520
+ attributed = true;
521
+ break;
522
+ }
523
+ }
524
+
525
+ if (!attributed) {
526
+ // Fallback: attribute to the first change
527
+ const first = snapshots[0];
528
+ blame.push({ line: i + 1, timestamp: first?.ts ?? 0, message: first?.message ?? "", actor: first?.actor ?? "" });
529
+ }
530
+ }
531
+
532
+ return blame;
533
+ } catch {
534
+ return null;
535
+ }
536
+ }
537
+
538
+ /** List all synced files with metadata. */
539
+ listSyncedFiles(): Array<{ path: string; version: number; updatedAt: number; deleted: boolean }> {
540
+ const files = this.registry.files;
541
+ if (!files) return [];
542
+ return Object.entries(files).map(([path, meta]) => ({
543
+ path,
544
+ version: meta.version,
545
+ updatedAt: meta.updatedAt,
546
+ deleted: meta.deleted,
547
+ })).filter(f => !f.deleted);
548
+ }
549
+
550
+ /**
551
+ * Set pending attribution for a file path.
552
+ * Called by tool hooks (after_tool_call, ACP stream) before fsWatcher picks up the change.
553
+ * The attribution is consumed by handleLocalChangesInner when the file change is processed.
554
+ */
555
+ setPendingAttribution(absOrRelPath: string, attribution: { nodeId: string; agentId: string; agentType?: string; sessionKey?: string }) {
556
+ // Convert absolute path to relative
557
+ const relPath = absOrRelPath.startsWith(this.opts.workspacePath)
558
+ ? absOrRelPath.slice(this.opts.workspacePath.length + 1)
559
+ : absOrRelPath;
560
+ this.pendingAttribution.set(relPath, attribution);
561
+ // Auto-cleanup after 30s in case fsWatcher never fires (file outside sync scope)
562
+ setTimeout(() => this.pendingAttribution.delete(relPath), 30_000);
563
+ }
564
+
421
565
  // ── Private: sync helpers ──────────────────────────────────────
422
566
 
423
567
  private syncDocWithPeer(peerId: string, docId: string) {
@@ -613,8 +757,15 @@ export class KnowledgeSync {
613
757
  for (const relPath of [...added, ...modified]) {
614
758
  const content = contentCache.get(relPath)!;
615
759
 
760
+ // Use pending attribution from tool hooks (precise), fallback to nodeId (human edit)
761
+ const pending = this.pendingAttribution.get(relPath);
762
+ const attribution = pending
763
+ ? JSON.stringify(pending)
764
+ : JSON.stringify({ nodeId: this.opts.nodeId });
765
+ if (pending) this.pendingAttribution.delete(relPath);
766
+
616
767
  let doc = this.fileDocs.get(relPath) ?? Automerge.init<FileDoc>();
617
- doc = changeFileContent(doc, content);
768
+ doc = changeFileContent(doc, content, attribution);
618
769
  this.fileDocs.set(relPath, doc);
619
770
 
620
771
  await this.saveFileDoc(relPath);
@@ -11,6 +11,9 @@
11
11
  */
12
12
 
13
13
  import { spawn } from "node:child_process";
14
+ import { readdir, lstat, readlink } from "node:fs/promises";
15
+ import { join } from "node:path";
16
+ import { nanoid } from "nanoid";
14
17
  import {
15
18
  createReadTool,
16
19
  createWriteTool,
@@ -26,11 +29,15 @@ interface ExecParams {
26
29
  timeout?: number;
27
30
  }
28
31
 
32
+ interface ListParams {
33
+ path: string;
34
+ }
35
+
29
36
  type ToolResult = Record<string, unknown>;
30
37
 
31
38
  // ── Constants ──────────────────────────────────────────────────────
32
39
 
33
- const LOCAL_TOOLS = new Set(["exec", "read", "write", "edit"]);
40
+ const LOCAL_TOOLS = new Set(["exec", "read", "write", "edit", "list"]);
34
41
  const DEFAULT_EXEC_TIMEOUT = 300; // seconds
35
42
  const MAX_OUTPUT_BYTES = 512 * 1024; // 512KB
36
43
 
@@ -53,6 +60,8 @@ export async function executeLocally(
53
60
  return executePiTool("write", params);
54
61
  case "edit":
55
62
  return executePiTool("edit", params);
63
+ case "list":
64
+ return executeList(params as unknown as ListParams);
56
65
  default:
57
66
  throw new Error(`Unknown local tool: ${tool}`);
58
67
  }
@@ -124,6 +133,60 @@ async function executeExec(params: ExecParams): Promise<ToolResult> {
124
133
  });
125
134
  }
126
135
 
136
+ // ── list: directory listing via node:fs ────────────────────────────
137
+
138
+ function formatPermissions(mode: number): string {
139
+ const perms = ["---", "--x", "-w-", "-wx", "r--", "r-x", "rw-", "rwx"];
140
+ const owner = perms[(mode >> 6) & 7];
141
+ const group = perms[(mode >> 3) & 7];
142
+ const other = perms[mode & 7];
143
+ return `${owner}${group}${other}`;
144
+ }
145
+
146
+ async function executeList(params: ListParams): Promise<ToolResult> {
147
+ const { path: dirPath } = params;
148
+ if (!dirPath) throw new Error("list: path is required");
149
+
150
+ const entries = await readdir(dirPath);
151
+ const items = await Promise.all(
152
+ entries.map(async (name) => {
153
+ const fullPath = join(dirPath, name);
154
+ try {
155
+ const st = await lstat(fullPath);
156
+ let type: "file" | "directory" | "symlink" = "file";
157
+ let target: string | undefined;
158
+ if (st.isSymbolicLink()) {
159
+ type = "symlink";
160
+ try {
161
+ target = await readlink(fullPath);
162
+ } catch {}
163
+ } else if (st.isDirectory()) {
164
+ type = "directory";
165
+ }
166
+ return {
167
+ name,
168
+ type,
169
+ size: st.size,
170
+ mtime: st.mtime.toISOString(),
171
+ permissions: formatPermissions(st.mode),
172
+ ...(target !== undefined && { target }),
173
+ };
174
+ } catch {
175
+ return { name, type: "file" as const, size: 0, mtime: "", permissions: "" };
176
+ }
177
+ }),
178
+ );
179
+
180
+ // Sort: directories first, then alphabetically
181
+ items.sort((a, b) => {
182
+ if (a.type === "directory" && b.type !== "directory") return -1;
183
+ if (a.type !== "directory" && b.type === "directory") return 1;
184
+ return a.name.localeCompare(b.name);
185
+ });
186
+
187
+ return { path: dirPath, items };
188
+ }
189
+
127
190
  // ── read/write/edit: reuse pi-coding-agent factories ───────────────
128
191
 
129
192
  /** Cache key includes cwd so tools are recreated if the working directory changes. */
@@ -163,7 +226,7 @@ async function executePiTool(
163
226
  params: Record<string, unknown>,
164
227
  ): Promise<ToolResult> {
165
228
  const tool = getPiTool(name);
166
- const toolCallId = crypto.randomUUID();
229
+ const toolCallId = nanoid();
167
230
 
168
231
  const result = (await tool.execute(toolCallId, params)) as {
169
232
  content: Array<{ type: string; text?: string }>;
@@ -0,0 +1,198 @@
1
+ /**
2
+ * Sequence-based log replication protocol for ClawMatrix.
3
+ *
4
+ * Replicates append-only tables (audit_log, health_events, handoff_history)
5
+ * across mesh peers using vector clocks and delta sync.
6
+ *
7
+ * Each replicated row has (node_id, source_seq) — the node that created the
8
+ * event and its original autoincrement sequence number. The combination is
9
+ * globally unique (UNIQUE index in SQLite).
10
+ *
11
+ * Protocol:
12
+ * 1. On peer connect → both sides send log_sync { request: true, vector }
13
+ * 2. Receiver computes delta using vector clock and sends missing rows
14
+ * 3. Rows inserted via INSERT OR IGNORE (dedup on node_id + source_seq)
15
+ * 4. If more rows remain, sender marks hasMore and continues
16
+ *
17
+ * This replaces Automerge-based sync for append-only data (no conflicts).
18
+ */
19
+
20
+ import type { Store, ReplicatedTable } from "./store.ts";
21
+ import type { LogSyncFrame } from "./types.ts";
22
+ import { debug } from "./debug.ts";
23
+
24
+ const TAG = "log-repl";
25
+ const REPLICATED_TABLES: ReplicatedTable[] = ["audit_log", "health_events", "handoff_history"];
26
+ const BATCH_SIZE = 500;
27
+
28
+ export interface LogReplicatorOptions {
29
+ store: Store;
30
+ nodeId: string;
31
+ /** Send a frame to a specific peer. */
32
+ sendTo: (peerId: string, frame: LogSyncFrame) => void;
33
+ }
34
+
35
+ export class LogReplicator {
36
+ private store: Store;
37
+ private nodeId: string;
38
+ private sendTo: (peerId: string, frame: LogSyncFrame) => void;
39
+ /** Tracks connected peers for broadcasting local inserts. */
40
+ private connectedPeers = new Set<string>();
41
+
42
+ constructor(opts: LogReplicatorOptions) {
43
+ this.store = opts.store;
44
+ this.nodeId = opts.nodeId;
45
+ this.sendTo = opts.sendTo;
46
+ }
47
+
48
+ // ── Peer lifecycle ──────────────────────────────────────────
49
+
50
+ /** Called when a peer connects. Sends sync requests for all tables. */
51
+ initPeerSync(peerId: string) {
52
+ this.connectedPeers.add(peerId);
53
+ for (const table of REPLICATED_TABLES) {
54
+ this.sendSyncRequest(peerId, table);
55
+ }
56
+ }
57
+
58
+ /** Called when a peer disconnects. */
59
+ removePeerSync(peerId: string) {
60
+ this.connectedPeers.delete(peerId);
61
+ }
62
+
63
+ // ── Handle incoming frames ──────────────────────────────────
64
+
65
+ /** Handle a log_sync frame from a peer. */
66
+ handleSyncMessage(frame: LogSyncFrame) {
67
+ const peerId = frame.from;
68
+ const { table, vector, rows, request } = frame.payload;
69
+
70
+ if (!REPLICATED_TABLES.includes(table)) {
71
+ debug(TAG, `ignoring unknown table "${table}" from ${peerId}`);
72
+ return;
73
+ }
74
+
75
+ // If peer is requesting our delta, compute and send it
76
+ if (request && vector) {
77
+ this.sendDelta(peerId, table, vector);
78
+ }
79
+
80
+ // If peer sent rows, apply them
81
+ if (rows && rows.length > 0) {
82
+ const inserted = this.store.insertReplicatedRows(table, rows);
83
+ debug(TAG, `applied ${inserted}/${rows.length} rows to ${table} from ${peerId}`);
84
+
85
+ // Update persisted replication state
86
+ this.updateReplicationState(table, rows);
87
+ }
88
+ }
89
+
90
+ // ── Notify on local insert ──────────────────────────────────
91
+
92
+ /** Called after a local insert to push the new row to all connected peers. */
93
+ notifyLocalInsert(table: ReplicatedTable) {
94
+ if (this.connectedPeers.size === 0) return;
95
+
96
+ // Get the latest row from our node
97
+ const rows = this.store.getRowsSince(table, this.nodeId, this.getLocalMaxSeq(table) - 1, 1);
98
+ if (rows.length === 0) return;
99
+ this.updateReplicationState(table, rows);
100
+
101
+ // Broadcast to all connected peers
102
+ for (const peerId of this.connectedPeers) {
103
+ const frame: LogSyncFrame = {
104
+ type: "log_sync",
105
+ from: this.nodeId,
106
+ to: peerId,
107
+ timestamp: Date.now(),
108
+ payload: { table, rows },
109
+ };
110
+ this.sendTo(peerId, frame);
111
+ }
112
+ }
113
+
114
+ // ── Internal ────────────────────────────────────────────────
115
+
116
+ /** Send a sync request with our vector clock for a table. */
117
+ private sendSyncRequest(peerId: string, table: ReplicatedTable) {
118
+ const vector = this.store.getResumeVector(table);
119
+ const frame: LogSyncFrame = {
120
+ type: "log_sync",
121
+ from: this.nodeId,
122
+ to: peerId,
123
+ timestamp: Date.now(),
124
+ payload: { table, vector, request: true },
125
+ };
126
+ debug(TAG, `requesting sync for ${table} from ${peerId} (vector: ${JSON.stringify(vector)})`);
127
+ this.sendTo(peerId, frame);
128
+ }
129
+
130
+ /** Compute and send delta rows that the peer is missing. */
131
+ private sendDelta(peerId: string, table: ReplicatedTable, peerVector: Record<string, number>) {
132
+ const localVector = this.store.getVectorClock(table);
133
+
134
+ // For each node_id we know about, check if peer is behind
135
+ let totalSent = 0;
136
+ for (const nodeId of Object.keys(localVector)) {
137
+ const localMax = localVector[nodeId] ?? 0;
138
+ const peerMax = peerVector[nodeId] ?? 0;
139
+
140
+ if (localMax <= peerMax) continue; // peer is up to date for this node
141
+
142
+ // Fetch missing rows in batches
143
+ let afterSeq = peerMax;
144
+ let hasMore = true;
145
+
146
+ while (hasMore) {
147
+ const rows = this.store.getRowsSince(table, nodeId, afterSeq, BATCH_SIZE);
148
+ if (rows.length === 0) break;
149
+
150
+ hasMore = rows.length === BATCH_SIZE;
151
+ afterSeq = (rows[rows.length - 1] as Record<string, number>).source_seq;
152
+
153
+ const frame: LogSyncFrame = {
154
+ type: "log_sync",
155
+ from: this.nodeId,
156
+ to: peerId,
157
+ timestamp: Date.now(),
158
+ payload: { table, rows, hasMore },
159
+ };
160
+ this.sendTo(peerId, frame);
161
+ totalSent += rows.length;
162
+ }
163
+ }
164
+
165
+ if (totalSent > 0) {
166
+ debug(TAG, `sent ${totalSent} delta rows for ${table} to ${peerId}`);
167
+ }
168
+ }
169
+
170
+ /** Update persisted replication state after applying rows. */
171
+ private updateReplicationState(table: ReplicatedTable, rows: Record<string, unknown>[]) {
172
+ // Group by node_id and find max source_seq per node
173
+ const maxSeqs = new Map<string, number>();
174
+ for (const row of rows) {
175
+ const nodeId = row.node_id as string;
176
+ const sourceSeq = row.source_seq as number;
177
+ const current = maxSeqs.get(nodeId) ?? 0;
178
+ if (sourceSeq > current) maxSeqs.set(nodeId, sourceSeq);
179
+ }
180
+
181
+ for (const [nodeId, maxSeq] of maxSeqs) {
182
+ const existing = this.store.getReplicationState(table, nodeId);
183
+ if (maxSeq > existing) {
184
+ this.store.setReplicationState(table, nodeId, maxSeq);
185
+ }
186
+ }
187
+ }
188
+
189
+ /** Get local max source_seq for our own node. */
190
+ private getLocalMaxSeq(table: ReplicatedTable): number {
191
+ return this.store.getMaxSeq(table, this.nodeId);
192
+ }
193
+
194
+ /** Destroy: clean up state. */
195
+ destroy() {
196
+ this.connectedPeers.clear();
197
+ }
198
+ }