clawmem 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. package/AGENTS.md +660 -0
  2. package/CLAUDE.md +660 -0
  3. package/LICENSE +21 -0
  4. package/README.md +993 -0
  5. package/SKILL.md +717 -0
  6. package/bin/clawmem +75 -0
  7. package/package.json +72 -0
  8. package/src/amem.ts +797 -0
  9. package/src/beads.ts +263 -0
  10. package/src/clawmem.ts +1849 -0
  11. package/src/collections.ts +405 -0
  12. package/src/config.ts +178 -0
  13. package/src/consolidation.ts +123 -0
  14. package/src/directory-context.ts +248 -0
  15. package/src/errors.ts +41 -0
  16. package/src/formatter.ts +427 -0
  17. package/src/graph-traversal.ts +247 -0
  18. package/src/hooks/context-surfacing.ts +317 -0
  19. package/src/hooks/curator-nudge.ts +89 -0
  20. package/src/hooks/decision-extractor.ts +639 -0
  21. package/src/hooks/feedback-loop.ts +214 -0
  22. package/src/hooks/handoff-generator.ts +345 -0
  23. package/src/hooks/postcompact-inject.ts +226 -0
  24. package/src/hooks/precompact-extract.ts +314 -0
  25. package/src/hooks/pretool-inject.ts +79 -0
  26. package/src/hooks/session-bootstrap.ts +324 -0
  27. package/src/hooks/staleness-check.ts +130 -0
  28. package/src/hooks.ts +367 -0
  29. package/src/indexer.ts +327 -0
  30. package/src/intent.ts +294 -0
  31. package/src/limits.ts +26 -0
  32. package/src/llm.ts +1175 -0
  33. package/src/mcp.ts +2138 -0
  34. package/src/memory.ts +336 -0
  35. package/src/mmr.ts +93 -0
  36. package/src/observer.ts +269 -0
  37. package/src/openclaw/engine.ts +283 -0
  38. package/src/openclaw/index.ts +221 -0
  39. package/src/openclaw/plugin.json +83 -0
  40. package/src/openclaw/shell.ts +207 -0
  41. package/src/openclaw/tools.ts +304 -0
  42. package/src/profile.ts +346 -0
  43. package/src/promptguard.ts +218 -0
  44. package/src/retrieval-gate.ts +106 -0
  45. package/src/search-utils.ts +127 -0
  46. package/src/server.ts +783 -0
  47. package/src/splitter.ts +325 -0
  48. package/src/store.ts +4062 -0
  49. package/src/validation.ts +67 -0
  50. package/src/watcher.ts +58 -0
package/src/hooks.ts ADDED
@@ -0,0 +1,367 @@
1
+ /**
2
+ * ClawMem Hook Runner - stdin/stdout JSON hook dispatch for Claude Code
3
+ *
4
+ * Claude Code hooks send JSON on stdin and expect JSON on stdout.
5
+ * This module provides the I/O layer and dispatches to individual hook handlers.
6
+ */
7
+
8
+ import type { Store } from "./store.ts";
9
+ import { createHash } from "node:crypto";
10
+
11
+ // =============================================================================
12
+ // Types
13
+ // =============================================================================
14
+
15
+ export type HookInput = {
16
+ sessionId?: string;
17
+ prompt?: string;
18
+ transcriptPath?: string;
19
+ hookEventName?: string;
20
+ toolInput?: Record<string, unknown>;
21
+ };
22
+
23
+ export type HookOutput = {
24
+ continue?: boolean;
25
+ suppressOutput?: boolean;
26
+ stopReason?: string;
27
+ decision?: "approve" | "block";
28
+ reason?: string;
29
+ systemMessage?: string;
30
+ permissionDecision?: "allow" | "deny" | "ask";
31
+ hookSpecificOutput?: {
32
+ hookEventName?: string;
33
+ additionalContext?: string;
34
+ };
35
+ };
36
+
37
+ // =============================================================================
38
+ // I/O
39
+ // =============================================================================
40
+
41
+ /**
42
+ * Read hook input from stdin (Claude Code sends JSON with snake_case keys).
43
+ * Maps snake_case → camelCase to match HookInput type.
44
+ */
45
+ export async function readHookInput(): Promise<HookInput> {
46
+ const chunks: Uint8Array[] = [];
47
+ for await (const chunk of Bun.stdin.stream()) {
48
+ chunks.push(chunk);
49
+ }
50
+ const raw = Buffer.concat(chunks).toString("utf-8").trim();
51
+ if (!raw) return {};
52
+ try {
53
+ const parsed = JSON.parse(raw);
54
+ return {
55
+ sessionId: parsed.session_id ?? parsed.sessionId,
56
+ prompt: parsed.prompt,
57
+ transcriptPath: parsed.transcript_path ?? parsed.transcriptPath,
58
+ hookEventName: parsed.hook_event_name ?? parsed.hookEventName,
59
+ toolInput: parsed.tool_input ?? parsed.toolInput,
60
+ };
61
+ } catch {
62
+ return {};
63
+ }
64
+ }
65
+
66
+ /**
67
+ * Write hook output to stdout (Claude Code reads JSON).
68
+ */
69
+ export function writeHookOutput(output: HookOutput): void {
70
+ console.log(JSON.stringify(output));
71
+ }
72
+
73
+ /**
74
+ * Map internal hook names → Claude Code event names for hookSpecificOutput.
75
+ * Only UserPromptSubmit and PostToolUse support additionalContext.
76
+ * Stop/SessionStart hooks must NOT include hookSpecificOutput.
77
+ */
78
+ const HOOK_EVENT_MAP: Record<string, string | null> = {
79
+ "context-surfacing": "UserPromptSubmit",
80
+ "session-bootstrap": null, // SessionStart — no hookSpecificOutput
81
+ "staleness-check": null, // SessionStart — no hookSpecificOutput
82
+ "decision-extractor": null, // Stop — no hookSpecificOutput
83
+ "handoff-generator": null, // Stop — no hookSpecificOutput
84
+ "feedback-loop": null, // Stop — no hookSpecificOutput
85
+ "precompact-extract": null, // PreCompact — side-effect only, no context injection
86
+ "postcompact-inject": "SessionStart", // SessionStart(compact) — injects additionalContext
87
+ "pretool-inject": null, // PreToolUse — disabled (cannot inject additionalContext; E13 folded into context-surfacing)
88
+ };
89
+
90
+ /**
91
+ * Create a successful output with additional context injected into Claude's prompt.
92
+ */
93
+ export function makeContextOutput(
94
+ hookName: string,
95
+ context: string
96
+ ): HookOutput {
97
+ const eventName = HOOK_EVENT_MAP[hookName];
98
+ if (!eventName) {
99
+ // Stop/SessionStart hooks don't support hookSpecificOutput
100
+ return { continue: true, suppressOutput: false };
101
+ }
102
+ return {
103
+ continue: true,
104
+ suppressOutput: false,
105
+ hookSpecificOutput: {
106
+ hookEventName: eventName,
107
+ additionalContext: context,
108
+ },
109
+ };
110
+ }
111
+
112
+ /**
113
+ * Create an empty output (no context to inject).
114
+ */
115
+ export function makeEmptyOutput(hookName?: string): HookOutput {
116
+ const eventName = hookName ? HOOK_EVENT_MAP[hookName] : undefined;
117
+ if (hookName && !eventName) {
118
+ // Stop/SessionStart hooks don't support hookSpecificOutput
119
+ return { continue: true, suppressOutput: false };
120
+ }
121
+ return {
122
+ continue: true,
123
+ suppressOutput: false,
124
+ ...(eventName && {
125
+ hookSpecificOutput: {
126
+ hookEventName: eventName,
127
+ additionalContext: "",
128
+ },
129
+ }),
130
+ };
131
+ }
132
+
133
+ // =============================================================================
134
+ // Token Estimation
135
+ // =============================================================================
136
+
137
+ /**
138
+ * Estimate token count (~4 chars per token).
139
+ */
140
+ export function estimateTokens(text: string): number {
141
+ return Math.ceil(text.length / 4);
142
+ }
143
+
144
+ // =============================================================================
145
+ // Heartbeat / Dedupe Suppression (IO4)
146
+ // =============================================================================
147
+
148
+ const DEFAULT_HEARTBEAT_SUBSTRINGS = [
149
+ "heartbeat",
150
+ "health check",
151
+ "keepalive",
152
+ "keep-alive",
153
+ "status check",
154
+ "are you alive",
155
+ "still alive",
156
+ "ping",
157
+ "pong",
158
+ ];
159
+
160
+ function getHeartbeatSubstrings(): string[] {
161
+ const raw = (Bun.env.CLAWMEM_HEARTBEAT_PATTERNS || "").trim();
162
+ const extra = raw
163
+ ? raw.split(",").map(s => s.trim().toLowerCase()).filter(Boolean)
164
+ : [];
165
+ // Deduplicate while preserving order.
166
+ const seen = new Set<string>();
167
+ const out: string[] = [];
168
+ for (const s of [...DEFAULT_HEARTBEAT_SUBSTRINGS, ...extra]) {
169
+ if (!seen.has(s)) {
170
+ seen.add(s);
171
+ out.push(s);
172
+ }
173
+ }
174
+ return out;
175
+ }
176
+
177
+ export function isHeartbeatPrompt(prompt: string): boolean {
178
+ if (Bun.env.CLAWMEM_DISABLE_HEARTBEAT_SUPPRESSION === "true") return false;
179
+ const p = (prompt || "").trim().toLowerCase();
180
+ if (!p) return true;
181
+ if (p.startsWith("/")) return true;
182
+
183
+ // Exact tiny pings.
184
+ if (p === "ping" || p === "pong" || p === "heartbeat") return true;
185
+
186
+ const subs = getHeartbeatSubstrings();
187
+ return subs.some(s => p.includes(s));
188
+ }
189
+
190
+ export function wasPromptSeenRecently(store: Store, hookName: string, prompt: string): boolean {
191
+ const windowSecRaw = (Bun.env.CLAWMEM_HOOK_DEDUP_WINDOW_SEC || "").trim();
192
+ const windowSec = windowSecRaw ? parseInt(windowSecRaw, 10) : 600;
193
+ if (!Number.isFinite(windowSec) || windowSec <= 0) return false;
194
+
195
+ const normalized = (prompt || "").trim();
196
+ if (!normalized) return false;
197
+
198
+ const hash = createHash("sha256").update(normalized).digest("hex");
199
+ const now = new Date();
200
+ const nowIso = now.toISOString();
201
+
202
+ const row = store.db
203
+ .prepare("SELECT last_seen_at FROM hook_dedupe WHERE hook_name = ? AND prompt_hash = ? LIMIT 1")
204
+ .get(hookName, hash) as { last_seen_at: string } | null;
205
+
206
+ let recent = false;
207
+ if (row?.last_seen_at) {
208
+ const lastMs = Date.parse(row.last_seen_at);
209
+ if (!Number.isNaN(lastMs)) {
210
+ recent = (now.getTime() - lastMs) < windowSec * 1000;
211
+ }
212
+ }
213
+
214
+ const preview = normalized.slice(0, 120);
215
+ store.db.prepare(`
216
+ INSERT INTO hook_dedupe (hook_name, prompt_hash, prompt_preview, last_seen_at)
217
+ VALUES (?, ?, ?, ?)
218
+ ON CONFLICT(hook_name, prompt_hash) DO UPDATE SET
219
+ prompt_preview = excluded.prompt_preview,
220
+ last_seen_at = excluded.last_seen_at
221
+ `).run(hookName, hash, preview, nowIso);
222
+
223
+ return recent;
224
+ }
225
+
226
+ // =============================================================================
227
+ // Transcript Parsing
228
+ // =============================================================================
229
+
230
+ export type TranscriptMessage = {
231
+ role: "user" | "assistant" | "system";
232
+ content: string;
233
+ };
234
+
235
+ /**
236
+ * Read and parse a Claude Code transcript (.jsonl file).
237
+ * Returns the last N messages.
238
+ */
239
+ export function readTranscript(
240
+ transcriptPath: string,
241
+ lastN: number = 200,
242
+ roleFilter?: "user" | "assistant"
243
+ ): TranscriptMessage[] {
244
+ try {
245
+ const content = require("fs").readFileSync(transcriptPath, "utf-8");
246
+ const lines = content.split("\n").filter((l: string) => l.trim());
247
+ const messages: TranscriptMessage[] = [];
248
+
249
+ for (const line of lines) {
250
+ try {
251
+ const entry = JSON.parse(line);
252
+ // Claude Code transcript: {type, message: {role, content}} or flat {role, content}
253
+ const msg = entry.message ?? entry;
254
+ if (msg.role && msg.content) {
255
+ const role = msg.role as TranscriptMessage["role"];
256
+ const text = typeof msg.content === "string"
257
+ ? msg.content
258
+ : Array.isArray(msg.content)
259
+ ? msg.content
260
+ .map((b: any) => {
261
+ if (b.type === "text") return b.text;
262
+ if (b.type === "tool_use") return `[tool_use name="${b.name}" id="${b.id}"] ${JSON.stringify(b.input ?? {})}`;
263
+ if (b.type === "tool_result") return `[tool_result id="${b.tool_use_id}"] ${typeof b.content === "string" ? b.content.slice(0, 500) : ""}`;
264
+ return "";
265
+ })
266
+ .filter((s: string) => s)
267
+ .join("\n")
268
+ : JSON.stringify(msg.content);
269
+
270
+ if (!roleFilter || role === roleFilter) {
271
+ messages.push({ role, content: text });
272
+ }
273
+ }
274
+ } catch {
275
+ // Skip malformed lines
276
+ }
277
+ }
278
+
279
+ return messages.slice(-lastN);
280
+ } catch {
281
+ return [];
282
+ }
283
+ }
284
+
285
+ /**
286
+ * Validate a transcript path (security: must be absolute, .jsonl, regular file, <50MB).
287
+ */
288
+ export function validateTranscriptPath(path: string | undefined): string | null {
289
+ if (!path) return null;
290
+ if (!require("path").isAbsolute(path)) return null;
291
+ if (!path.endsWith(".jsonl")) return null;
292
+
293
+ try {
294
+ const stat = require("fs").statSync(path);
295
+ if (!stat.isFile()) return null;
296
+ if (stat.size > 50 * 1024 * 1024) return null; // 50MB limit
297
+ return path;
298
+ } catch {
299
+ return null;
300
+ }
301
+ }
302
+
303
+ // =============================================================================
304
+ // Snippet Helpers
305
+ // =============================================================================
306
+
307
+ /**
308
+ * Smart truncate: break at paragraph → sentence → newline → word boundary.
309
+ */
310
+ export function smartTruncate(text: string, maxChars: number = 300): string {
311
+ if (text.length <= maxChars) return text;
312
+
313
+ const truncated = text.slice(0, maxChars);
314
+
315
+ // Try paragraph break
316
+ const paraIdx = truncated.lastIndexOf("\n\n");
317
+ if (paraIdx > maxChars * 0.5) return truncated.slice(0, paraIdx).trimEnd();
318
+
319
+ // Try sentence break
320
+ const sentenceMatch = truncated.match(/^(.+[.!?])\s/s);
321
+ if (sentenceMatch && sentenceMatch[1]!.length > maxChars * 0.5) {
322
+ return sentenceMatch[1]!;
323
+ }
324
+
325
+ // Try newline break
326
+ const nlIdx = truncated.lastIndexOf("\n");
327
+ if (nlIdx > maxChars * 0.5) return truncated.slice(0, nlIdx).trimEnd();
328
+
329
+ // Try word boundary
330
+ const wordIdx = truncated.lastIndexOf(" ");
331
+ if (wordIdx > maxChars * 0.5) return truncated.slice(0, wordIdx).trimEnd() + "...";
332
+
333
+ return truncated.trimEnd() + "...";
334
+ }
335
+
336
+ // =============================================================================
337
+ // Logging
338
+ // =============================================================================
339
+
340
+ /**
341
+ * Log a context injection to the usage tracking table.
342
+ */
343
+ export function logInjection(
344
+ store: Store,
345
+ sessionId: string,
346
+ hookName: string,
347
+ injectedPaths: string[],
348
+ estimatedTokens: number
349
+ ): void {
350
+ try {
351
+ store.insertUsage({
352
+ sessionId,
353
+ timestamp: new Date().toISOString(),
354
+ hookName,
355
+ injectedPaths,
356
+ estimatedTokens,
357
+ wasReferenced: 0,
358
+ });
359
+
360
+ // Record co-activation for all injected paths (E3)
361
+ if (injectedPaths.length >= 2) {
362
+ store.recordCoActivation(injectedPaths);
363
+ }
364
+ } catch {
365
+ // Non-fatal: don't crash hook if usage logging fails
366
+ }
367
+ }
package/src/indexer.ts ADDED
@@ -0,0 +1,327 @@
1
+ /**
2
+ * ClawMem Indexer - Vault walking, frontmatter parsing, SAME metadata extraction
3
+ *
4
+ * Combines QMD's document indexing with SAME's frontmatter metadata system.
5
+ */
6
+
7
+ import { Glob } from "bun";
8
+ import { readFileSync, statSync } from "fs";
9
+ import { basename, relative } from "path";
10
+ import matter from "gray-matter";
11
+ import { createHash } from "crypto";
12
+ import type { Store } from "./store.ts";
13
+ import { inferContentType, confidenceScore, type ContentType } from "./memory.ts";
14
+ import { getDefaultLlamaCpp } from "./llm.ts";
15
+
16
+ // =============================================================================
17
+ // Types
18
+ // =============================================================================
19
+
20
+ export interface DocumentMeta {
21
+ title?: string;
22
+ tags?: string[];
23
+ domain?: string;
24
+ workstream?: string;
25
+ content_type?: ContentType;
26
+ review_by?: string;
27
+ }
28
+
29
+ export interface IndexStats {
30
+ added: number;
31
+ updated: number;
32
+ unchanged: number;
33
+ removed: number;
34
+ }
35
+
36
+ // =============================================================================
37
+ // Exclusion Rules
38
+ // =============================================================================
39
+
40
+ const EXCLUDED_DIRS = new Set([
41
+ "_PRIVATE",
42
+ ".clawmem",
43
+ ".git",
44
+ ".obsidian",
45
+ ".logseq",
46
+ ".foam",
47
+ ".dendron",
48
+ ".trash",
49
+ ".stversions",
50
+ "node_modules",
51
+ ".cache",
52
+ "vendor",
53
+ "dist",
54
+ "build",
55
+ "gits",
56
+ "scraped",
57
+ ]);
58
+
59
+ export function shouldExclude(relativePath: string): boolean {
60
+ const segments = relativePath.split("/");
61
+ return segments.some(s => EXCLUDED_DIRS.has(s) || (s.startsWith(".") && s !== "."));
62
+ }
63
+
64
+ // =============================================================================
65
+ // Content Hashing
66
+ // =============================================================================
67
+
68
+ export function hashContent(content: string): string {
69
+ return createHash("sha256").update(content, "utf-8").digest("hex");
70
+ }
71
+
72
+ // =============================================================================
73
+ // Title Extraction
74
+ // =============================================================================
75
+
76
+ export function extractTitle(content: string, filename: string): string {
77
+ const lines = content.split("\n");
78
+ for (const line of lines) {
79
+ const match = line.match(/^#+\s+(.+)/);
80
+ if (match?.[1]) return match[1].trim();
81
+ }
82
+ return basename(filename).replace(/\.(md|txt)$/i, "");
83
+ }
84
+
85
+ // =============================================================================
86
+ // Frontmatter Parsing
87
+ // =============================================================================
88
+
89
+ export function parseDocument(content: string, relativePath: string): { body: string; meta: DocumentMeta } {
90
+ try {
91
+ const { data, content: body } = matter(content);
92
+ return {
93
+ body,
94
+ meta: {
95
+ title: data.title as string | undefined,
96
+ tags: Array.isArray(data.tags) ? data.tags.map(String) : undefined,
97
+ domain: data.domain as string | undefined,
98
+ workstream: data.workstream as string | undefined,
99
+ content_type: (data.content_type as ContentType) || inferContentType(relativePath),
100
+ review_by: data.review_by as string | undefined,
101
+ },
102
+ };
103
+ } catch {
104
+ // If frontmatter parsing fails, treat entire content as body
105
+ return {
106
+ body: content,
107
+ meta: {
108
+ content_type: inferContentType(relativePath),
109
+ },
110
+ };
111
+ }
112
+ }
113
+
114
+ // =============================================================================
115
+ // Quality Scoring
116
+ // =============================================================================
117
+
118
+ export function computeQualityScore(body: string, meta: DocumentMeta): number {
119
+ let score = 0.3; // Base
120
+
121
+ // Length signals
122
+ if (body.length > 200) score += 0.1;
123
+ if (body.length > 500) score += 0.1;
124
+
125
+ // Structure signals
126
+ if (/^##\s+/m.test(body)) score += 0.1;
127
+ if (/^[-*]\s+/m.test(body)) score += 0.05;
128
+
129
+ // Decision/commitment keywords
130
+ if (/\b(decided?\s+to|chose|will\s+use|switching\s+to|going\s+with|selected|adopted)\b/i.test(body)) {
131
+ score += 0.15;
132
+ }
133
+
134
+ // Correction keywords
135
+ if (/\b(fix(ed)?|bug|resolved|corrected|patched|root\s+cause)\b/i.test(body)) {
136
+ score += 0.1;
137
+ }
138
+
139
+ // Frontmatter richness: +0.05 per populated field, max +0.15
140
+ let metaBonus = 0;
141
+ if (meta.tags && meta.tags.length > 0) metaBonus += 0.05;
142
+ if (meta.domain) metaBonus += 0.05;
143
+ if (meta.workstream) metaBonus += 0.05;
144
+ score += Math.min(0.15, metaBonus);
145
+
146
+ // Penalty for trivial stubs
147
+ if (body.length < 50) score -= 0.1;
148
+
149
+ return Math.max(0, Math.min(1.0, score));
150
+ }
151
+
152
+ // =============================================================================
153
+ // Collection Indexing
154
+ // =============================================================================
155
+
156
+ // Expand top-level brace patterns into individual glob strings.
157
+ // e.g. "{MEMORY.md,memory/x.md}" => ["MEMORY.md", "memory/x.md"]
158
+ // Patterns without braces pass through unchanged.
159
+ function expandBraces(pattern: string): string[] {
160
+ const match = pattern.match(/^\{(.+)\}$/);
161
+ if (!match) return [pattern];
162
+ return match[1]!.split(",").map(s => s.trim());
163
+ }
164
+
165
+ export async function indexCollection(
166
+ store: Store,
167
+ collectionName: string,
168
+ collectionPath: string,
169
+ pattern: string = "**/*.md"
170
+ ): Promise<IndexStats> {
171
+ const stats: IndexStats = { added: 0, updated: 0, unchanged: 0, removed: 0 };
172
+ const activePaths = new Set<string>();
173
+
174
+ // Get LLM instance for A-MEM enrichment
175
+ const llm = getDefaultLlamaCpp();
176
+
177
+ // Bun.Glob doesn't support brace expansion {a,b,c} — expand manually
178
+ const patterns = expandBraces(pattern);
179
+ const seen = new Set<string>();
180
+ const allEntries: string[] = [];
181
+ for (const p of patterns) {
182
+ const glob = new Glob(p);
183
+ for (const f of glob.scanSync({ cwd: collectionPath, followSymlinks: false, absolute: false })) {
184
+ if (!seen.has(f)) {
185
+ seen.add(f);
186
+ allEntries.push(f);
187
+ }
188
+ }
189
+ }
190
+
191
+ // Collect doc IDs that need post-index enrichment (deferred until after commit)
192
+ const enrichQueue: { docId: number; isNew: boolean }[] = [];
193
+
194
+ // Wrap all DB writes in a transaction for atomicity
195
+ store.db.exec("BEGIN");
196
+ try {
197
+ for (const relativePath of allEntries) {
198
+ if (shouldExclude(relativePath)) continue;
199
+
200
+ activePaths.add(relativePath);
201
+ const absolutePath = `${collectionPath}/${relativePath}`;
202
+
203
+ let content: string;
204
+ let mtime: Date;
205
+ try {
206
+ content = readFileSync(absolutePath, "utf-8");
207
+ mtime = statSync(absolutePath).mtime;
208
+ } catch {
209
+ continue; // File may have been deleted between scan and read
210
+ }
211
+
212
+ const contentHash = hashContent(content);
213
+ const now = new Date().toISOString();
214
+
215
+ // Check if document already exists
216
+ const existing = store.findActiveDocument(collectionName, relativePath);
217
+
218
+ if (existing) {
219
+ // Check if content changed via content hash
220
+ const existingRow = store.db.prepare(
221
+ "SELECT content_hash FROM documents WHERE id = ?"
222
+ ).get(existing.id) as { content_hash: string | null } | null;
223
+
224
+ if (existingRow?.content_hash === contentHash) {
225
+ stats.unchanged++;
226
+ continue;
227
+ }
228
+
229
+ // Content changed — update
230
+ const { body, meta } = parseDocument(content, relativePath);
231
+ const title = meta.title || extractTitle(body, relativePath);
232
+ const docHash = hashContent(body);
233
+
234
+ store.insertContent(docHash, body, now);
235
+ store.updateDocument(existing.id, title, docHash, mtime.toISOString());
236
+
237
+ // Update SAME metadata
238
+ const contentType = meta.content_type || inferContentType(relativePath);
239
+ store.updateDocumentMeta(existing.id, {
240
+ domain: meta.domain,
241
+ workstream: meta.workstream,
242
+ tags: meta.tags ? JSON.stringify(meta.tags) : undefined,
243
+ content_type: contentType,
244
+ review_by: meta.review_by,
245
+ confidence: confidenceScore(contentType, mtime, 0),
246
+ quality_score: computeQualityScore(body, meta),
247
+ });
248
+
249
+ // Update content_hash for next incremental check
250
+ store.db.prepare("UPDATE documents SET content_hash = ? WHERE id = ?").run(contentHash, existing.id);
251
+
252
+ // Defer A-MEM enrichment until after commit
253
+ enrichQueue.push({ docId: existing.id, isNew: false });
254
+
255
+ stats.updated++;
256
+ } else {
257
+ // Check for inactive (previously removed) doc at same path — reactivate instead of inserting
258
+ const inactive = store.db.prepare(
259
+ "SELECT id, hash FROM documents WHERE collection = ? AND path = ? AND active = 0"
260
+ ).get(collectionName, relativePath) as { id: number; hash: string } | null;
261
+
262
+ const { body, meta } = parseDocument(content, relativePath);
263
+ const title = meta.title || extractTitle(body, relativePath);
264
+ const docHash = hashContent(body);
265
+ const contentType = meta.content_type || inferContentType(relativePath);
266
+
267
+ store.insertContent(docHash, body, now);
268
+
269
+ if (inactive) {
270
+ // Reactivate existing row
271
+ store.db.prepare("UPDATE documents SET active = 1, hash = ?, title = ?, modified_at = ?, content_hash = ? WHERE id = ?")
272
+ .run(docHash, title, mtime.toISOString(), contentHash, inactive.id);
273
+ store.updateDocumentMeta(inactive.id, {
274
+ domain: meta.domain,
275
+ workstream: meta.workstream,
276
+ tags: meta.tags ? JSON.stringify(meta.tags) : undefined,
277
+ content_type: contentType,
278
+ review_by: meta.review_by,
279
+ confidence: confidenceScore(contentType, mtime, 0),
280
+ quality_score: computeQualityScore(body, meta),
281
+ });
282
+ enrichQueue.push({ docId: inactive.id, isNew: false });
283
+ } else {
284
+ // Truly new document
285
+ store.insertDocument(collectionName, relativePath, title, docHash, now, mtime.toISOString());
286
+ const newDoc = store.findActiveDocument(collectionName, relativePath);
287
+ if (newDoc) {
288
+ store.updateDocumentMeta(newDoc.id, {
289
+ domain: meta.domain,
290
+ workstream: meta.workstream,
291
+ tags: meta.tags ? JSON.stringify(meta.tags) : undefined,
292
+ content_type: contentType,
293
+ review_by: meta.review_by,
294
+ confidence: confidenceScore(contentType, mtime, 0),
295
+ quality_score: computeQualityScore(body, meta),
296
+ });
297
+ store.db.prepare("UPDATE documents SET content_hash = ? WHERE id = ?").run(contentHash, newDoc.id);
298
+ enrichQueue.push({ docId: newDoc.id, isNew: true });
299
+ }
300
+ }
301
+
302
+ stats.added++;
303
+ }
304
+ }
305
+
306
+ // Deactivate documents that no longer exist on disk
307
+ const storedPaths = store.getActiveDocumentPaths(collectionName);
308
+ for (const storedPath of storedPaths) {
309
+ if (!activePaths.has(storedPath)) {
310
+ store.deactivateDocument(collectionName, storedPath);
311
+ stats.removed++;
312
+ }
313
+ }
314
+
315
+ store.db.exec("COMMIT");
316
+ } catch (err) {
317
+ store.db.exec("ROLLBACK");
318
+ throw err;
319
+ }
320
+
321
+ // A-MEM enrichment runs after successful commit (LLM calls should not block transaction)
322
+ for (const { docId, isNew } of enrichQueue) {
323
+ await store.postIndexEnrich(llm, docId, isNew);
324
+ }
325
+
326
+ return stats;
327
+ }