clawmem 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. package/AGENTS.md +660 -0
  2. package/CLAUDE.md +660 -0
  3. package/LICENSE +21 -0
  4. package/README.md +993 -0
  5. package/SKILL.md +717 -0
  6. package/bin/clawmem +75 -0
  7. package/package.json +72 -0
  8. package/src/amem.ts +797 -0
  9. package/src/beads.ts +263 -0
  10. package/src/clawmem.ts +1849 -0
  11. package/src/collections.ts +405 -0
  12. package/src/config.ts +178 -0
  13. package/src/consolidation.ts +123 -0
  14. package/src/directory-context.ts +248 -0
  15. package/src/errors.ts +41 -0
  16. package/src/formatter.ts +427 -0
  17. package/src/graph-traversal.ts +247 -0
  18. package/src/hooks/context-surfacing.ts +317 -0
  19. package/src/hooks/curator-nudge.ts +89 -0
  20. package/src/hooks/decision-extractor.ts +639 -0
  21. package/src/hooks/feedback-loop.ts +214 -0
  22. package/src/hooks/handoff-generator.ts +345 -0
  23. package/src/hooks/postcompact-inject.ts +226 -0
  24. package/src/hooks/precompact-extract.ts +314 -0
  25. package/src/hooks/pretool-inject.ts +79 -0
  26. package/src/hooks/session-bootstrap.ts +324 -0
  27. package/src/hooks/staleness-check.ts +130 -0
  28. package/src/hooks.ts +367 -0
  29. package/src/indexer.ts +327 -0
  30. package/src/intent.ts +294 -0
  31. package/src/limits.ts +26 -0
  32. package/src/llm.ts +1175 -0
  33. package/src/mcp.ts +2138 -0
  34. package/src/memory.ts +336 -0
  35. package/src/mmr.ts +93 -0
  36. package/src/observer.ts +269 -0
  37. package/src/openclaw/engine.ts +283 -0
  38. package/src/openclaw/index.ts +221 -0
  39. package/src/openclaw/plugin.json +83 -0
  40. package/src/openclaw/shell.ts +207 -0
  41. package/src/openclaw/tools.ts +304 -0
  42. package/src/profile.ts +346 -0
  43. package/src/promptguard.ts +218 -0
  44. package/src/retrieval-gate.ts +106 -0
  45. package/src/search-utils.ts +127 -0
  46. package/src/server.ts +783 -0
  47. package/src/splitter.ts +325 -0
  48. package/src/store.ts +4062 -0
  49. package/src/validation.ts +67 -0
  50. package/src/watcher.ts +58 -0
package/src/mcp.ts ADDED
@@ -0,0 +1,2138 @@
1
+ #!/usr/bin/env bun
2
+ /**
3
+ * ClawMem MCP Server - Model Context Protocol server
4
+ *
5
+ * Exposes ClawMem search and document retrieval as MCP tools and resources.
6
+ * Includes all QMD tools + SAME memory tools (find_similar, session_log, reindex, index_stats).
7
+ * Documents are accessible via clawmem:// URIs.
8
+ */
9
+
10
+ import { McpServer, ResourceTemplate } from "@modelcontextprotocol/sdk/server/mcp.js";
11
+ import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
12
+ import { z } from "zod";
13
+ import {
14
+ createStore,
15
+ resolveStore,
16
+ extractSnippet,
17
+ extractIntentTerms,
18
+ INTENT_CHUNK_WEIGHT,
19
+ DEFAULT_EMBED_MODEL,
20
+ DEFAULT_QUERY_MODEL,
21
+ DEFAULT_RERANK_MODEL,
22
+ DEFAULT_MULTI_GET_MAX_BYTES,
23
+ type Store,
24
+ type SearchResult,
25
+ type CausalLink,
26
+ type EvolutionEntry,
27
+ } from "./store.ts";
28
+ import {
29
+ applyCompositeScoring,
30
+ hasRecencyIntent,
31
+ type EnrichedResult,
32
+ type CoActivationFn,
33
+ } from "./memory.ts";
34
+ import { enrichResults, reciprocalRankFusion, toRanked, type RankedResult } from "./search-utils.ts";
35
+ import { applyMMRDiversity } from "./mmr.ts";
36
+ import { indexCollection, type IndexStats } from "./indexer.ts";
37
+ import { listCollections } from "./collections.ts";
38
+ import { classifyIntent, decomposeQuery, type IntentType } from "./intent.ts";
39
+ import { adaptiveTraversal, mergeTraversalResults } from "./graph-traversal.ts";
40
+ import { getDefaultLlamaCpp } from "./llm.ts";
41
+ import { startConsolidationWorker, stopConsolidationWorker } from "./consolidation.ts";
42
+ import { listVaults, loadVaultConfig } from "./config.ts";
43
+
44
+ // =============================================================================
45
+ // Types
46
+ // =============================================================================
47
+
48
+ type SearchResultItem = {
49
+ docid: string;
50
+ file: string;
51
+ title: string;
52
+ score: number;
53
+ context: string | null;
54
+ snippet: string;
55
+ contentType?: string;
56
+ compositeScore?: number;
57
+ };
58
+
59
+ type StatusResult = {
60
+ totalDocuments: number;
61
+ needsEmbedding: number;
62
+ hasVectorIndex: boolean;
63
+ collections: {
64
+ name: string;
65
+ path: string;
66
+ pattern: string;
67
+ documents: number;
68
+ lastUpdated: string;
69
+ }[];
70
+ };
71
+
72
+ // =============================================================================
73
+ // Helpers
74
+ // =============================================================================
75
+
76
+ function encodeClawmemPath(path: string): string {
77
+ return path.split('/').map(segment => encodeURIComponent(segment)).join('/');
78
+ }
79
+
80
+ /** Split text into overlapping windows for intent-aware chunk selection */
81
+ function splitIntoWindows(text: string, windowSize: number, overlap = 200): string[] {
82
+ const windows: string[] = [];
83
+ for (let i = 0; i < text.length; i += windowSize - overlap) {
84
+ windows.push(text.slice(i, i + windowSize));
85
+ if (i + windowSize >= text.length) break;
86
+ }
87
+ return windows.length > 0 ? windows : [text];
88
+ }
89
+
90
+ /** Classify query into retrieval mode based on signal patterns */
91
+ function classifyRetrievalMode(query: string): "keyword" | "semantic" | "causal" | "timeline" | "discovery" | "complex" | "hybrid" {
92
+ const q = query.toLowerCase();
93
+
94
+ // Timeline (highest precision signals — check first)
95
+ if (/\b(last session|yesterday|prior session|previous session|last time we|handoff|what happened last|what did we do|cross.session|earlier today|this morning|what we discussed|when we last)\b/i.test(q)) return "timeline";
96
+
97
+ // Causal
98
+ if (/\b(why did|why was|why were|what caused|what led to|reason for|decided to|decision about|trade.?off|instead of|chose to|because we)\b/i.test(q)) return "causal";
99
+ if (/^why\b/i.test(q)) return "causal";
100
+
101
+ // Discovery
102
+ if (/\b(similar to|related to|what else|what other|reminds? me of|like this|comparable|neighbors)\b/i.test(q)) return "discovery";
103
+
104
+ // Complex multi-topic
105
+ if (/\band\s+(?:also|what|how|why)\b/i.test(q) || /\?.*\?/.test(q) || /\b(?:additionally|as well as|along with)\b/i.test(q) || /\bboth\s+.+\s+and\s+/i.test(q)) return "complex";
106
+
107
+ // Keyword: short + contains specific identifiers/codes/paths
108
+ if (q.length < 50 && (/[A-Z][A-Z0-9_]{2,}/.test(query) || /[\w-]+\.\w{2,4}\b/.test(q.trim()) || /\b(config|setting|error|path|file|port|url)\b/i.test(q))) return "keyword";
109
+
110
+ // Semantic: conceptual/explanatory
111
+ if (/\b(how does|explain|concept|overview|understand|meaning of|what is the purpose)\b/i.test(q)) return "semantic";
112
+
113
+ return "hybrid";
114
+ }
115
+
116
+ function formatSearchSummary(results: SearchResultItem[], query: string): string {
117
+ if (results.length === 0) return `No results found for "${query}"`;
118
+ const lines = [`Found ${results.length} result${results.length === 1 ? '' : 's'} for "${query}":\n`];
119
+ for (const r of results) {
120
+ const scoreStr = r.compositeScore !== undefined
121
+ ? `${Math.round(r.compositeScore * 100)}%`
122
+ : `${Math.round(r.score * 100)}%`;
123
+ const typeTag = r.contentType && r.contentType !== "note" ? ` [${r.contentType}]` : "";
124
+ lines.push(`${r.docid} ${scoreStr} ${r.file} - ${r.title}${typeTag}`);
125
+ }
126
+ return lines.join('\n');
127
+ }
128
+
129
+ function addLineNumbers(text: string, startLine: number = 1): string {
130
+ const lines = text.split('\n');
131
+ return lines.map((line, i) => `${startLine + i}: ${line}`).join('\n');
132
+ }
133
+
134
+ // =============================================================================
135
+ // MCP Server
136
+ // =============================================================================
137
+
138
+ export async function startMcpServer(): Promise<void> {
139
+ const store = createStore(undefined, { busyTimeout: 5000 });
140
+
141
+ // Vault store cache: prevents connection churn, closed on shutdown
142
+ const vaultStoreCache = new Map<string, Store>();
143
+
144
+ function getStore(vault?: string): Store {
145
+ if (!vault) return store;
146
+ const cached = vaultStoreCache.get(vault);
147
+ if (cached) return cached;
148
+ const s = resolveStore(vault, { busyTimeout: 5000 });
149
+ vaultStoreCache.set(vault, s);
150
+ return s;
151
+ }
152
+
153
+ function closeAllStores(): void {
154
+ for (const [, s] of vaultStoreCache) {
155
+ try { s.close(); } catch {}
156
+ }
157
+ vaultStoreCache.clear();
158
+ try { store.close(); } catch {}
159
+ }
160
+
161
+ const server = new McpServer({
162
+ name: "clawmem",
163
+ version: "0.1.0",
164
+ });
165
+
166
+ // ---------------------------------------------------------------------------
167
+ // Tool: __IMPORTANT (workflow instructions)
168
+ // ---------------------------------------------------------------------------
169
+
170
+ server.registerTool(
171
+ "__IMPORTANT",
172
+ {
173
+ title: "READ THIS FIRST: Memory search workflow",
174
+ description: "Instructions for efficient memory search. Read this before searching.",
175
+ inputSchema: {},
176
+ },
177
+ async () => ({
178
+ content: [{ type: "text" as const, text: `## ClawMem Search Workflow
179
+
180
+ PREFERRED: Use memory_retrieve(query) — auto-routes to the right backend.
181
+
182
+ If calling tools directly, match query type to tool:
183
+
184
+ "why did we decide X" → intent_search(query) NOT query()
185
+ "what happened last session" → session_log() NOT query()
186
+ "what else relates to X" → find_similar(file) NOT query()
187
+ complex multi-topic → query_plan(query) NOT query()
188
+ general recall → query(query, compact=true)
189
+ keyword spot check → search(query, compact=true)
190
+ conceptual/fuzzy → vsearch(query, compact=true)
191
+
192
+ WRONG: query("why did we choose PostgreSQL", compact=true)
193
+ RIGHT: intent_search("why did we choose PostgreSQL")
194
+ RIGHT: memory_retrieve("why did we choose PostgreSQL")
195
+
196
+ WRONG: query("what happened last session", compact=true)
197
+ RIGHT: session_log(limit=5)
198
+ RIGHT: memory_retrieve("what happened last session")
199
+
200
+ After search: multi_get("path1,path2") for full content of top hits.
201
+ Only escalate when injected <vault-context> is insufficient.` }]
202
+ })
203
+ );
204
+
205
+ // ---------------------------------------------------------------------------
206
+ // Tool: memory_retrieve (Meta-tool — auto-routing single entry point)
207
+ // ---------------------------------------------------------------------------
208
+
209
+ server.registerTool(
210
+ "memory_retrieve",
211
+ {
212
+ title: "Smart Memory Retrieve (Auto-Routing)",
213
+ description: `Unified memory retrieval — classifies your query and routes to the optimal search backend automatically. Use this instead of choosing between search/vsearch/query/intent_search.
214
+
215
+ Auto-routing:
216
+ - "why did we decide X" → causal graph traversal
217
+ - "what happened last session" → session history
218
+ - "what else relates to X" → vector neighbors
219
+ - Complex multi-topic → parallel decomposition
220
+ - General recall → full hybrid search
221
+
222
+ This is the recommended entry point for ALL memory queries.`,
223
+ inputSchema: {
224
+ query: z.string().describe("Your question or search query"),
225
+ mode: z.enum(["auto", "keyword", "semantic", "causal", "timeline", "discovery", "complex", "hybrid"]).optional().default("auto").describe("Override auto-detection: keyword=BM25, semantic=vector, causal=graph traversal, timeline=session history, discovery=similar docs, complex=multi-topic, hybrid=full pipeline"),
226
+ limit: z.number().optional().default(10),
227
+ compact: z.boolean().optional().default(true),
228
+ vault: z.string().optional().describe("Named vault (omit for default vault)"),
229
+ },
230
+ },
231
+ async ({ query, mode, limit, compact, vault }) => {
232
+ const store = getStore(vault);
233
+ const effectiveMode = mode === "auto" ? classifyRetrievalMode(query) : mode;
234
+ const lim = limit || 10;
235
+
236
+ // --- Timeline mode → session log ---
237
+ if (effectiveMode === "timeline") {
238
+ const sessions = store.getRecentSessions(lim);
239
+ if (sessions.length === 0) {
240
+ return { content: [{ type: "text", text: `[routed: timeline] No sessions tracked yet.` }] };
241
+ }
242
+ const lines = [`[routed: timeline] Recent sessions:\n`];
243
+ for (const sess of sessions) {
244
+ const duration = sess.endedAt
245
+ ? `${Math.round((new Date(sess.endedAt).getTime() - new Date(sess.startedAt).getTime()) / 60000)}min`
246
+ : "active";
247
+ lines.push(`${sess.sessionId.slice(0, 8)} ${sess.startedAt} (${duration})`);
248
+ if (sess.handoffPath) lines.push(` Handoff: ${sess.handoffPath}`);
249
+ if (sess.summary) lines.push(` ${sess.summary.slice(0, 100)}`);
250
+ if (sess.filesChanged.length > 0) lines.push(` Files: ${sess.filesChanged.slice(0, 5).join(", ")}`);
251
+ }
252
+ return { content: [{ type: "text", text: lines.join('\n') }], structuredContent: { mode: effectiveMode, sessions } };
253
+ }
254
+
255
+ // --- Causal mode → intent classification + graph traversal ---
256
+ if (effectiveMode === "causal") {
257
+ const llm = getDefaultLlamaCpp();
258
+ const intent = await classifyIntent(query, llm, store.db);
259
+ const bm25Results = store.searchFTS(query, 30);
260
+ let vecResults: SearchResult[] = [];
261
+ try { vecResults = await store.searchVec(query, DEFAULT_EMBED_MODEL, 30); } catch { /* no vectors */ }
262
+ const rrfWeights = intent.intent === 'WHY' ? [1.0, 1.5] : intent.intent === 'WHEN' ? [1.5, 1.0] : [1.0, 1.0];
263
+ const fusedRanked = reciprocalRankFusion([bm25Results.map(toRanked), vecResults.map(toRanked)], rrfWeights);
264
+ const allSearch = [...bm25Results, ...vecResults];
265
+ let fused: SearchResult[] = fusedRanked.map(fr => {
266
+ const orig = allSearch.find(r => r.filepath === fr.file);
267
+ return orig ? { ...orig, score: fr.score } : null;
268
+ }).filter((r): r is SearchResult => r !== null);
269
+
270
+ if (intent.intent === 'WHY' || intent.intent === 'ENTITY') {
271
+ try {
272
+ const anchorEmb = await llm.embed(query);
273
+ if (anchorEmb) {
274
+ const traversed = adaptiveTraversal(store.db, fused.slice(0, 10).map(r => ({ hash: r.hash, score: r.score })), {
275
+ maxDepth: 2, beamWidth: 5, budget: 30,
276
+ intent: intent.intent, queryEmbedding: anchorEmb.embedding,
277
+ });
278
+ const merged = mergeTraversalResults(store.db, fused.map(r => ({ hash: r.hash, score: r.score })), traversed);
279
+ // Hydrate merged results back to SearchResult format
280
+ const fusedMap = new Map(fused.map(r => [r.hash, r]));
281
+ fused = merged.map(m => {
282
+ const orig = fusedMap.get(m.hash);
283
+ if (orig) return { ...orig, score: m.score };
284
+ // Graph-discovered node — hydrate from DB
285
+ const doc = store.db.prepare(`
286
+ SELECT d.collection, d.path, d.title, d.hash, c.doc as body, d.modified_at
287
+ FROM documents d
288
+ LEFT JOIN content c ON c.hash = d.hash
289
+ WHERE d.hash = ? AND d.active = 1 LIMIT 1
290
+ `).get(m.hash) as { collection: string; path: string; title: string; hash: string; body: string | null; modified_at: string } | undefined;
291
+ if (!doc) return null;
292
+ return {
293
+ filepath: `clawmem://${doc.collection}/${doc.path}`,
294
+ displayPath: `${doc.collection}/${doc.path}`,
295
+ title: doc.title || doc.path.split("/").pop() || "",
296
+ context: null,
297
+ hash: doc.hash,
298
+ docid: doc.hash.slice(0, 6),
299
+ collectionName: doc.collection,
300
+ modifiedAt: doc.modified_at || "",
301
+ bodyLength: doc.body?.length || 0,
302
+ body: doc.body || "",
303
+ score: m.score,
304
+ source: "vec" as const,
305
+ } satisfies SearchResult;
306
+ }).filter((r): r is SearchResult => r !== null);
307
+ }
308
+ } catch { /* graph traversal failed — continue with base results */ }
309
+ }
310
+
311
+ const enriched = enrichResults(store, fused, query);
312
+ const scored = applyCompositeScoring(enriched, query).slice(0, lim);
313
+ const items = scored.map(r => ({
314
+ docid: `#${r.docid}`, path: r.displayPath, title: r.title,
315
+ score: Math.round(r.compositeScore * 100) / 100,
316
+ snippet: (r.body || "").substring(0, 150), content_type: r.contentType,
317
+ }));
318
+ return {
319
+ content: [{ type: "text", text: `[routed: causal, intent: ${intent.intent}] ${formatSearchSummary(items.map(i => ({ ...i, file: i.path, compositeScore: i.score, context: null })), query)}` }],
320
+ structuredContent: { mode: effectiveMode, intent, results: items },
321
+ };
322
+ }
323
+
324
+ // --- Complex mode → query decomposition ---
325
+ if (effectiveMode === "complex") {
326
+ const llm = getDefaultLlamaCpp();
327
+ const clauses = await decomposeQuery(query, llm, store.db);
328
+ const allResults: SearchResult[] = [];
329
+ for (const clause of clauses.sort((a, b) => a.priority - b.priority)) {
330
+ let results: SearchResult[] = [];
331
+ if (clause.type === 'bm25') results = store.searchFTS(clause.query, 20, undefined, clause.collections);
332
+ else if (clause.type === 'vector') { try { results = await store.searchVec(clause.query, DEFAULT_EMBED_MODEL, 20, undefined, clause.collections); } catch { /* */ } }
333
+ else if (clause.type === 'graph') { results = store.searchFTS(clause.query, 15, undefined, clause.collections); }
334
+ allResults.push(...results);
335
+ }
336
+ const seen = new Set<string>();
337
+ const deduped = allResults.filter(r => { if (seen.has(r.filepath)) return false; seen.add(r.filepath); return true; });
338
+ const enriched = enrichResults(store, deduped, query);
339
+ const scored = applyCompositeScoring(enriched, query).slice(0, lim);
340
+ const items = scored.map(r => ({
341
+ docid: `#${r.docid}`, path: r.displayPath, title: r.title,
342
+ score: Math.round(r.compositeScore * 100) / 100,
343
+ snippet: (r.body || "").substring(0, 150), content_type: r.contentType,
344
+ }));
345
+ return {
346
+ content: [{ type: "text", text: `[routed: complex, ${clauses.length} clauses] ${formatSearchSummary(items.map(i => ({ ...i, file: i.path, compositeScore: i.score, context: null })), query)}` }],
347
+ structuredContent: { mode: effectiveMode, clauses: clauses.length, results: items },
348
+ };
349
+ }
350
+
351
+ // --- Keyword / Semantic / Discovery / Hybrid modes ---
352
+ let results: SearchResult[] = [];
353
+ if (effectiveMode === "keyword") {
354
+ results = store.searchFTS(query, lim);
355
+ } else if (effectiveMode === "semantic" || effectiveMode === "discovery") {
356
+ try { results = await store.searchVec(query, DEFAULT_EMBED_MODEL, lim); } catch { results = store.searchFTS(query, lim); }
357
+ } else {
358
+ // Hybrid: BM25 + vector + RRF
359
+ const bm25 = store.searchFTS(query, 30);
360
+ let vec: SearchResult[] = [];
361
+ try { vec = await store.searchVec(query, DEFAULT_EMBED_MODEL, 30); } catch { /* */ }
362
+ if (vec.length > 0) {
363
+ const fusedRanked = reciprocalRankFusion([bm25.map(toRanked), vec.map(toRanked)], [1.0, 1.0]);
364
+ const allSearch = [...bm25, ...vec];
365
+ results = fusedRanked.map(fr => {
366
+ const orig = allSearch.find(r => r.filepath === fr.file);
367
+ return orig ? { ...orig, score: fr.score } : null;
368
+ }).filter((r): r is SearchResult => r !== null);
369
+ } else {
370
+ results = bm25;
371
+ }
372
+ }
373
+
374
+ const enriched = enrichResults(store, results, query);
375
+ const scored = applyCompositeScoring(enriched, query).slice(0, lim);
376
+ if (compact) {
377
+ const items = scored.map(r => ({
378
+ docid: `#${r.docid}`, path: r.displayPath, title: r.title,
379
+ score: Math.round(r.compositeScore * 100) / 100,
380
+ snippet: (r.body || "").substring(0, 150), content_type: r.contentType,
381
+ }));
382
+ return {
383
+ content: [{ type: "text", text: `[routed: ${effectiveMode}] ${formatSearchSummary(items.map(i => ({ ...i, file: i.path, compositeScore: i.score, context: null })), query)}` }],
384
+ structuredContent: { mode: effectiveMode, results: items },
385
+ };
386
+ }
387
+ const items: SearchResultItem[] = scored.map(r => {
388
+ const { line, snippet } = extractSnippet(r.body || "", query, 300, r.chunkPos);
389
+ return {
390
+ docid: `#${r.docid}`, file: r.displayPath, title: r.title,
391
+ score: r.score, compositeScore: Math.round(r.compositeScore * 100) / 100,
392
+ contentType: r.contentType, context: store.getContextForFile(r.filepath),
393
+ snippet: addLineNumbers(snippet, line),
394
+ };
395
+ });
396
+ return {
397
+ content: [{ type: "text", text: `[routed: ${effectiveMode}] ${formatSearchSummary(items, query)}` }],
398
+ structuredContent: { mode: effectiveMode, results: items },
399
+ };
400
+ }
401
+ );
402
+
403
+ // ---------------------------------------------------------------------------
404
+ // Resource: clawmem://{path}
405
+ // ---------------------------------------------------------------------------
406
+
407
+ server.registerResource(
408
+ "document",
409
+ new ResourceTemplate("clawmem://{+path}", { list: undefined }),
410
+ {
411
+ title: "ClawMem Document",
412
+ description: "A document from your ClawMem knowledge base.",
413
+ mimeType: "text/markdown",
414
+ },
415
+ async (uri, { path }) => {
416
+ const pathStr = Array.isArray(path) ? path.join('/') : (path || '');
417
+ const decodedPath = decodeURIComponent(pathStr);
418
+ const parts = decodedPath.split('/');
419
+ const collection = parts[0] || '';
420
+ const relativePath = parts.slice(1).join('/');
421
+
422
+ let doc = store.db.prepare(`
423
+ SELECT d.collection, d.path, d.title, c.doc as body
424
+ FROM documents d JOIN content c ON c.hash = d.hash
425
+ WHERE d.collection = ? AND d.path = ? AND d.active = 1
426
+ `).get(collection, relativePath) as { collection: string; path: string; title: string; body: string } | null;
427
+
428
+ if (!doc) {
429
+ doc = store.db.prepare(`
430
+ SELECT d.collection, d.path, d.title, c.doc as body
431
+ FROM documents d JOIN content c ON c.hash = d.hash
432
+ WHERE d.path LIKE ? AND d.active = 1 LIMIT 1
433
+ `).get(`%${relativePath}`) as typeof doc;
434
+ }
435
+
436
+ if (!doc) {
437
+ return { contents: [{ uri: uri.href, text: `Document not found: ${decodedPath}` }] };
438
+ }
439
+
440
+ const virtualPath = `clawmem://${doc.collection}/${doc.path}`;
441
+ const context = store.getContextForFile(virtualPath);
442
+ let text = addLineNumbers(doc.body);
443
+ if (context) text = `<!-- Context: ${context} -->\n\n` + text;
444
+
445
+ return {
446
+ contents: [{
447
+ uri: uri.href,
448
+ name: `${doc.collection}/${doc.path}`,
449
+ title: doc.title || doc.path,
450
+ mimeType: "text/markdown",
451
+ text,
452
+ }],
453
+ };
454
+ }
455
+ );
456
+
457
+ // ---------------------------------------------------------------------------
458
+ // Tool: search (BM25 + composite)
459
+ // ---------------------------------------------------------------------------
460
+
461
+ server.registerTool(
462
+ "search",
463
+ {
464
+ title: "Search (BM25 + Memory)",
465
+ description: "Keyword (BM25) search for exact term lookup. Use for config names, error codes, specific filenames. DO NOT use for 'why' questions (use intent_search) or cross-session queries (use session_log). Prefer memory_retrieve for auto-routing.",
466
+ inputSchema: {
467
+ query: z.string().describe("Search query"),
468
+ limit: z.number().optional().default(10),
469
+ minScore: z.number().optional().default(0),
470
+ collection: z.string().optional().describe("Filter to collection (single name or comma-separated)"),
471
+ compact: z.boolean().optional().default(false).describe("Return compact results (id, path, title, score, snippet) instead of full content"),
472
+ vault: z.string().optional().describe("Named vault (omit for default vault)"),
473
+ },
474
+ },
475
+ async ({ query, limit, minScore, collection, compact, vault }) => {
476
+ const store = getStore(vault);
477
+ const collections = collection
478
+ ? collection.split(",").map(c => c.trim()).filter(Boolean)
479
+ : undefined;
480
+ const results = store.searchFTS(query, limit || 10, undefined, collections);
481
+
482
+ const coFn = (path: string) => store.getCoActivated(path);
483
+ const enriched = enrichResults(store, results, query);
484
+ const scored = applyCompositeScoring(enriched, query, coFn)
485
+ .filter(r => r.compositeScore >= (minScore || 0));
486
+
487
+ if (compact) {
488
+ const items = scored.map(r => ({
489
+ docid: `#${r.docid}`, path: r.displayPath, title: r.title,
490
+ score: Math.round((r.compositeScore ?? r.score) * 100) / 100,
491
+ snippet: (r.body || "").substring(0, 150), content_type: r.contentType, modified_at: r.modifiedAt,
492
+ fragment: r.fragmentType ? { type: r.fragmentType, label: r.fragmentLabel } : undefined,
493
+ }));
494
+ return { content: [{ type: "text", text: formatSearchSummary(items.map(i => ({ ...i, file: i.path, compositeScore: i.score, context: null })), query) }], structuredContent: { results: items } };
495
+ }
496
+
497
+ const filtered: SearchResultItem[] = scored.map(r => {
498
+ const { line, snippet } = extractSnippet(r.body || "", query, 300, r.chunkPos);
499
+ return {
500
+ docid: `#${r.docid}`,
501
+ file: r.displayPath,
502
+ title: r.title,
503
+ score: r.score,
504
+ compositeScore: Math.round(r.compositeScore * 100) / 100,
505
+ contentType: r.contentType,
506
+ context: store.getContextForFile(r.filepath),
507
+ snippet: addLineNumbers(snippet, line),
508
+ };
509
+ });
510
+
511
+ return {
512
+ content: [{ type: "text", text: formatSearchSummary(filtered, query) }],
513
+ structuredContent: { results: filtered },
514
+ };
515
+ }
516
+ );
517
+
518
+ // ---------------------------------------------------------------------------
519
+ // Tool: vsearch (Vector + composite)
520
+ // ---------------------------------------------------------------------------
521
+
522
+ server.registerTool(
523
+ "vsearch",
524
+ {
525
+ title: "Vector Search (Semantic + Memory)",
526
+ description: "Vector similarity search for conceptual/fuzzy matching. Use when exact keywords are unknown. DO NOT use for causal 'why' questions (use intent_search) or session history (use session_log). Prefer memory_retrieve for auto-routing.",
527
+ inputSchema: {
528
+ query: z.string().describe("Natural language query"),
529
+ limit: z.number().optional().default(10),
530
+ minScore: z.number().optional().default(0.3),
531
+ collection: z.string().optional().describe("Filter to collection (single name or comma-separated)"),
532
+ compact: z.boolean().optional().default(false).describe("Return compact results (id, path, title, score, snippet) instead of full content"),
533
+ vault: z.string().optional().describe("Named vault (omit for default vault)"),
534
+ },
535
+ },
536
+ async ({ query, limit, minScore, collection, compact, vault }) => {
537
+ const store = getStore(vault);
538
+ const tableExists = store.db.prepare(`SELECT name FROM sqlite_master WHERE type='table' AND name='vectors_vec'`).get();
539
+ if (!tableExists) {
540
+ return { content: [{ type: "text", text: "Vector index not found. Run 'clawmem embed' first." }], isError: true };
541
+ }
542
+
543
+ const collections = collection
544
+ ? collection.split(",").map(c => c.trim()).filter(Boolean)
545
+ : undefined;
546
+ const results = await store.searchVec(query, DEFAULT_EMBED_MODEL, limit || 10, undefined, collections);
547
+
548
+ const coFn = (path: string) => store.getCoActivated(path);
549
+ const enriched = enrichResults(store, results, query);
550
+ const scored = applyCompositeScoring(enriched, query, coFn)
551
+ .filter(r => r.compositeScore >= (minScore || 0.3));
552
+
553
+ if (compact) {
554
+ const items = scored.map(r => ({
555
+ docid: `#${r.docid}`, path: r.displayPath, title: r.title,
556
+ score: Math.round((r.compositeScore ?? r.score) * 100) / 100,
557
+ snippet: (r.body || "").substring(0, 150), content_type: r.contentType, modified_at: r.modifiedAt,
558
+ fragment: r.fragmentType ? { type: r.fragmentType, label: r.fragmentLabel } : undefined,
559
+ }));
560
+ return { content: [{ type: "text", text: formatSearchSummary(items.map(i => ({ ...i, file: i.path, compositeScore: i.score, context: null })), query) }], structuredContent: { results: items } };
561
+ }
562
+
563
+ const items: SearchResultItem[] = scored.map(r => {
564
+ const { line, snippet } = extractSnippet(r.body || "", query, 300, r.chunkPos);
565
+ return {
566
+ docid: `#${r.docid}`,
567
+ file: r.displayPath,
568
+ title: r.title,
569
+ score: r.score,
570
+ compositeScore: Math.round(r.compositeScore * 100) / 100,
571
+ contentType: r.contentType,
572
+ context: store.getContextForFile(r.filepath),
573
+ snippet: addLineNumbers(snippet, line),
574
+ };
575
+ });
576
+
577
+ return {
578
+ content: [{ type: "text", text: formatSearchSummary(items, query) }],
579
+ structuredContent: { results: items },
580
+ };
581
+ }
582
+ );
583
+
584
+ // ---------------------------------------------------------------------------
585
+ // Tool: query (Hybrid + rerank + composite)
586
+ // ---------------------------------------------------------------------------
587
+
588
+ server.registerTool(
589
+ "query",
590
+ {
591
+ title: "Hybrid Query (Best Quality)",
592
+ description: "Full hybrid search (BM25 + vector + rerank). General-purpose — use when query type is unclear. WRONG: query('why did we decide X') — use intent_search instead. WRONG: query('what happened last session') — use session_log instead. Prefer memory_retrieve for auto-routing.",
593
+ inputSchema: {
594
+ query: z.string().describe("Natural language query"),
595
+ limit: z.number().optional().default(10),
596
+ minScore: z.number().optional().default(0),
597
+ collection: z.string().optional().describe("Filter to collection (single name or comma-separated)"),
598
+ compact: z.boolean().optional().default(false).describe("Return compact results (id, path, title, score, snippet) instead of full content"),
599
+ diverse: z.boolean().optional().default(true).describe("Apply MMR diversity filter to reduce near-duplicate results"),
600
+ intent: z.string().optional().describe("Domain intent hint for disambiguation — steers expansion, reranking, chunk selection, and snippet extraction"),
601
+ candidateLimit: z.number().optional().default(30).describe("Max candidates reaching the reranker (default 30)"),
602
+ vault: z.string().optional().describe("Named vault (omit for default vault)"),
603
+ },
604
+ },
605
+ async ({ query, limit, minScore, collection, compact, diverse, intent, candidateLimit, vault }) => {
606
+ const store = getStore(vault);
607
+ const candLimit = candidateLimit || 30;
608
+ const rankedLists: RankedResult[][] = [];
609
+ const docidMap = new Map<string, string>();
610
+ const hasVectors = !!store.db.prepare(`SELECT name FROM sqlite_master WHERE type='table' AND name='vectors_vec'`).get();
611
+
612
+ // Step 1: BM25 probe — skip expensive LLM expansion if strong signal
613
+ const collections = collection
614
+ ? collection.split(",").map(c => c.trim()).filter(Boolean)
615
+ : undefined;
616
+ const initialFts = store.searchFTS(query, 20, undefined, collections);
617
+ const topScore = initialFts.length > 0 ? Math.abs(initialFts[0]!.score) : 0;
618
+ const secondScore = initialFts.length > 1 ? Math.abs(initialFts[1]!.score) : 0;
619
+ // When intent is provided, disable strong-signal bypass — the obvious BM25
620
+ // match may not be what the caller wants (e.g. "performance" with intent "web page load times")
621
+ const hasStrongSignal = !intent && initialFts.length > 0
622
+ && topScore >= 0.85 && (topScore - secondScore) >= 0.15;
623
+
624
+ // Step 2: Query expansion (skipped if strong signal)
625
+ const queries = hasStrongSignal
626
+ ? [query]
627
+ : await store.expandQuery(query, DEFAULT_QUERY_MODEL, intent);
628
+
629
+ for (const q of queries) {
630
+ const ftsResults = q === query ? initialFts : store.searchFTS(q, 20, undefined, collections);
631
+ if (ftsResults.length > 0) {
632
+ for (const r of ftsResults) docidMap.set(r.filepath, r.docid);
633
+ rankedLists.push(ftsResults.map(r => ({ file: r.filepath, displayPath: r.displayPath, title: r.title, body: r.body || "", score: r.score })));
634
+ }
635
+ if (hasVectors) {
636
+ const vecResults = await store.searchVec(q, DEFAULT_EMBED_MODEL, 20, undefined, collections);
637
+ if (vecResults.length > 0) {
638
+ for (const r of vecResults) docidMap.set(r.filepath, r.docid);
639
+ rankedLists.push(vecResults.map(r => ({ file: r.filepath, displayPath: r.displayPath, title: r.title, body: r.body || "", score: r.score })));
640
+ }
641
+ }
642
+ }
643
+
644
+ const weights = rankedLists.map((_, i) => i < 2 ? 2.0 : 1.0);
645
+ const fused = reciprocalRankFusion(rankedLists, weights);
646
+ const candidates = fused.slice(0, candLimit);
647
+
648
+ // Step 3: Intent-aware chunk selection for reranking
649
+ const intentTerms = intent ? extractIntentTerms(intent) : [];
650
+ const chunksToRerank = candidates.map(c => {
651
+ let text = c.body.slice(0, 4000);
652
+ // When intent is provided, select the chunk with highest intent+query relevance
653
+ if (intentTerms.length > 0 && c.body.length > 4000) {
654
+ const chunks = splitIntoWindows(c.body, 4000);
655
+ let bestChunk = chunks[0]!;
656
+ let bestScore = -1;
657
+ const queryTerms = query.toLowerCase().split(/\s+/).filter(t => t.length > 0);
658
+ for (const chunk of chunks) {
659
+ const lower = chunk.toLowerCase();
660
+ let score = 0;
661
+ for (const term of queryTerms) { if (lower.includes(term)) score += 1.0; }
662
+ for (const term of intentTerms) { if (lower.includes(term)) score += INTENT_CHUNK_WEIGHT; }
663
+ if (score > bestScore) { bestScore = score; bestChunk = chunk; }
664
+ }
665
+ text = bestChunk;
666
+ }
667
+ return { file: c.file, text };
668
+ });
669
+
670
+ const reranked = await store.rerank(query, chunksToRerank, DEFAULT_RERANK_MODEL, intent);
671
+
672
+ const candidateMap = new Map(candidates.map(c => [c.file, c]));
673
+ const rrfRankMap = new Map(candidates.map((c, i) => [c.file, i + 1]));
674
+
675
+ // Blend RRF + reranker scores (position-aware)
676
+ const blended = reranked.map(r => {
677
+ const rrfRank = rrfRankMap.get(r.file) || candidates.length;
678
+ const rrfWeight = rrfRank <= 3 ? 0.75 : rrfRank <= 10 ? 0.60 : 0.40;
679
+ const blendedScore = rrfWeight * (1 / rrfRank) + (1 - rrfWeight) * r.score;
680
+ return { file: r.file, score: blendedScore };
681
+ });
682
+ blended.sort((a, b) => b.score - a.score);
683
+
684
+ // Map to SearchResults for composite scoring
685
+ const allSearchResults = [...store.searchFTS(query, 30)];
686
+ const resultMap = new Map(allSearchResults.map(r => [r.filepath, r]));
687
+ const searchResults = blended
688
+ .map(b => {
689
+ const r = resultMap.get(b.file) || candidateMap.get(b.file);
690
+ if (!r) return null;
691
+ return { ...r, score: b.score, filepath: b.file } as SearchResult;
692
+ })
693
+ .filter((r): r is SearchResult => r !== null);
694
+
695
+ const coFn = (path: string) => store.getCoActivated(path);
696
+ const enriched = enrichResults(store, searchResults, query);
697
+ let scored = applyCompositeScoring(enriched, query, coFn)
698
+ .filter(r => r.compositeScore >= (minScore || 0));
699
+ if (diverse !== false) scored = applyMMRDiversity(scored);
700
+ scored = scored.slice(0, limit || 10);
701
+
702
+ if (compact) {
703
+ const items = scored.map(r => ({
704
+ docid: `#${docidMap.get(r.filepath) || r.docid}`, path: r.displayPath, title: r.title,
705
+ score: Math.round((r.compositeScore ?? r.score) * 100) / 100,
706
+ snippet: (r.body || "").substring(0, 150), content_type: r.contentType, modified_at: r.modifiedAt,
707
+ fragment: r.fragmentType ? { type: r.fragmentType, label: r.fragmentLabel } : undefined,
708
+ }));
709
+ return { content: [{ type: "text", text: formatSearchSummary(items.map(i => ({ ...i, file: i.path, compositeScore: i.score, context: null })), query) }], structuredContent: { results: items } };
710
+ }
711
+
712
+ const items: SearchResultItem[] = scored.map(r => {
713
+ const { line, snippet } = extractSnippet(r.body || "", query, 300, r.chunkPos, intent);
714
+ return {
715
+ docid: `#${docidMap.get(r.filepath) || r.docid}`,
716
+ file: r.displayPath,
717
+ title: r.title,
718
+ score: r.score,
719
+ compositeScore: Math.round(r.compositeScore * 100) / 100,
720
+ contentType: r.contentType,
721
+ context: store.getContextForFile(r.filepath),
722
+ snippet: addLineNumbers(snippet, line),
723
+ };
724
+ });
725
+
726
+ return {
727
+ content: [{ type: "text", text: formatSearchSummary(items, query) }],
728
+ structuredContent: { results: items },
729
+ };
730
+ }
731
+ );
732
+
733
+ // ---------------------------------------------------------------------------
734
+ // Tool: memory_forget
735
+ // ---------------------------------------------------------------------------
736
+
737
+ server.registerTool(
738
+ "memory_forget",
739
+ {
740
+ title: "Forget Memory",
741
+ description: "Remove a memory by searching for the closest match and deactivating it.",
742
+ inputSchema: {
743
+ query: z.string().describe("What to forget — searches for the closest match"),
744
+ confirm: z.boolean().optional().default(true).describe("If true, deactivates the best match. If false, just shows what would be forgotten."),
745
+ vault: z.string().optional().describe("Named vault (omit for default vault)"),
746
+ },
747
+ },
748
+ async ({ query, confirm, vault }) => {
749
+ const store = getStore(vault);
750
+ const results = store.searchFTS(query, 5);
751
+ if (results.length === 0) {
752
+ return { content: [{ type: "text", text: `No matching memory found for "${query}"` }] };
753
+ }
754
+
755
+ const best = results[0]!;
756
+ const parts = best.displayPath.split("/");
757
+ const collection = parts[0]!;
758
+ const path = parts.slice(1).join("/");
759
+
760
+ if (!confirm) {
761
+ return {
762
+ content: [{ type: "text", text: `Would forget: ${best.displayPath} — "${best.title}" (score ${Math.round(best.score * 100)}%)` }],
763
+ structuredContent: { path: best.displayPath, title: best.title, score: best.score, action: "preview" },
764
+ };
765
+ }
766
+
767
+ store.deactivateDocument(collection, path);
768
+
769
+ // Log the deletion as audit trail
770
+ store.insertUsage({
771
+ sessionId: "mcp-forget",
772
+ timestamp: new Date().toISOString(),
773
+ hookName: "memory_forget",
774
+ injectedPaths: [best.displayPath],
775
+ estimatedTokens: 0,
776
+ wasReferenced: 0,
777
+ });
778
+
779
+ return {
780
+ content: [{ type: "text", text: `Forgotten: ${best.displayPath} — "${best.title}"` }],
781
+ structuredContent: { path: best.displayPath, title: best.title, action: "deactivated" },
782
+ };
783
+ }
784
+ );
785
+
786
+ // ---------------------------------------------------------------------------
787
+ // Tool: profile
788
+ // ---------------------------------------------------------------------------
789
+
790
+ server.registerTool(
791
+ "profile",
792
+ {
793
+ title: "User Profile",
794
+ description: "Get the current user profile (static facts + dynamic context). Rebuild if stale.",
795
+ inputSchema: {
796
+ rebuild: z.boolean().optional().default(false).describe("Force rebuild the profile"),
797
+ },
798
+ },
799
+ async ({ rebuild }) => {
800
+ const { getProfile: gp, updateProfile: up, isProfileStale: ips } = await import("./profile.ts");
801
+
802
+ if (rebuild || ips(store)) {
803
+ up(store);
804
+ }
805
+
806
+ const profile = gp(store);
807
+ if (!profile) {
808
+ return { content: [{ type: "text", text: "No profile available. Try: profile(rebuild=true)" }] };
809
+ }
810
+
811
+ const lines: string[] = [];
812
+ if (profile.static.length > 0) {
813
+ lines.push("## Known Context");
814
+ for (const f of profile.static) lines.push(`- ${f}`);
815
+ }
816
+ if (profile.dynamic.length > 0) {
817
+ lines.push("", "## Current Focus");
818
+ for (const d of profile.dynamic) lines.push(`- ${d}`);
819
+ }
820
+
821
+ return { content: [{ type: "text", text: lines.join("\n") || "Profile is empty." }] };
822
+ }
823
+ );
824
+
825
+ // ---------------------------------------------------------------------------
826
+ // Tool: get (Retrieve document)
827
+ // ---------------------------------------------------------------------------
828
+
829
+ server.registerTool(
830
+ "get",
831
+ {
832
+ title: "Get Document",
833
+ description: "Retrieve document by file path or docid.",
834
+ inputSchema: {
835
+ file: z.string().describe("File path or docid (#abc123)"),
836
+ fromLine: z.number().optional(),
837
+ maxLines: z.number().optional(),
838
+ lineNumbers: z.boolean().optional().default(false),
839
+ vault: z.string().optional().describe("Named vault (omit for default vault)"),
840
+ },
841
+ },
842
+ async ({ file, fromLine, maxLines, lineNumbers, vault }) => {
843
+ const store = getStore(vault);
844
+ let parsedFromLine = fromLine;
845
+ let lookup = file;
846
+ const colonMatch = lookup.match(/:(\d+)$/);
847
+ if (colonMatch?.[1] && parsedFromLine === undefined) {
848
+ parsedFromLine = parseInt(colonMatch[1], 10);
849
+ lookup = lookup.slice(0, -colonMatch[0].length);
850
+ }
851
+
852
+ const result = store.findDocument(lookup, { includeBody: false });
853
+ if ("error" in result) {
854
+ let msg = `Document not found: ${file}`;
855
+ if (result.similarFiles.length > 0) {
856
+ msg += `\n\nDid you mean?\n${result.similarFiles.map(s => ` - ${s}`).join('\n')}`;
857
+ }
858
+ return { content: [{ type: "text", text: msg }], isError: true };
859
+ }
860
+
861
+ const body = store.getDocumentBody(result, parsedFromLine, maxLines) ?? "";
862
+ let text = body;
863
+ if (lineNumbers) text = addLineNumbers(text, parsedFromLine || 1);
864
+ if (result.context) text = `<!-- Context: ${result.context} -->\n\n` + text;
865
+
866
+ return {
867
+ content: [{
868
+ type: "resource",
869
+ resource: {
870
+ uri: `clawmem://${encodeClawmemPath(result.displayPath)}`,
871
+ name: result.displayPath,
872
+ title: result.title,
873
+ mimeType: "text/markdown",
874
+ text,
875
+ },
876
+ }],
877
+ };
878
+ }
879
+ );
880
+
881
+ // ---------------------------------------------------------------------------
882
+ // Tool: multi_get (Retrieve multiple documents)
883
+ // ---------------------------------------------------------------------------
884
+
885
+ server.registerTool(
886
+ "multi_get",
887
+ {
888
+ title: "Multi-Get Documents",
889
+ description: "Retrieve multiple documents by glob pattern or comma-separated list.",
890
+ inputSchema: {
891
+ pattern: z.string().describe("Glob pattern or comma-separated paths"),
892
+ maxLines: z.number().optional(),
893
+ maxBytes: z.number().optional().default(10240),
894
+ lineNumbers: z.boolean().optional().default(false),
895
+ vault: z.string().optional().describe("Named vault (omit for default vault)"),
896
+ },
897
+ },
898
+ async ({ pattern, maxLines, maxBytes, lineNumbers, vault }) => {
899
+ const store = getStore(vault);
900
+ const { docs, errors } = store.findDocuments(pattern, { includeBody: true, maxBytes: maxBytes || DEFAULT_MULTI_GET_MAX_BYTES });
901
+ if (docs.length === 0 && errors.length === 0) {
902
+ return { content: [{ type: "text", text: `No files matched: ${pattern}` }], isError: true };
903
+ }
904
+
905
+ const content: any[] = [];
906
+ if (errors.length > 0) content.push({ type: "text", text: `Errors:\n${errors.join('\n')}` });
907
+
908
+ for (const result of docs) {
909
+ if (result.skipped) {
910
+ content.push({ type: "text", text: `[SKIPPED: ${result.doc.displayPath} - ${result.skipReason}]` });
911
+ continue;
912
+ }
913
+ let text = result.doc.body || "";
914
+ if (maxLines !== undefined) {
915
+ const lines = text.split("\n");
916
+ text = lines.slice(0, maxLines).join("\n");
917
+ if (lines.length > maxLines) text += `\n\n[... truncated ${lines.length - maxLines} more lines]`;
918
+ }
919
+ if (lineNumbers) text = addLineNumbers(text);
920
+ if (result.doc.context) text = `<!-- Context: ${result.doc.context} -->\n\n` + text;
921
+
922
+ content.push({
923
+ type: "resource",
924
+ resource: {
925
+ uri: `clawmem://${encodeClawmemPath(result.doc.displayPath)}`,
926
+ name: result.doc.displayPath,
927
+ title: result.doc.title,
928
+ mimeType: "text/markdown",
929
+ text,
930
+ },
931
+ });
932
+ }
933
+ return { content };
934
+ }
935
+ );
936
+
937
+ // ---------------------------------------------------------------------------
938
+ // Tool: status
939
+ // ---------------------------------------------------------------------------
940
+
941
+ server.registerTool(
942
+ "status",
943
+ {
944
+ title: "Index Status",
945
+ description: "Show ClawMem index status with content type distribution.",
946
+ inputSchema: {
947
+ vault: z.string().optional().describe("Named vault (omit for default vault)"),
948
+ },
949
+ },
950
+ async ({ vault }) => {
951
+ const store = getStore(vault);
952
+ const status: StatusResult = store.getStatus();
953
+
954
+ // Add content type distribution
955
+ const typeCounts = store.db.prepare(`
956
+ SELECT content_type, COUNT(*) as count FROM documents WHERE active = 1 GROUP BY content_type ORDER BY count DESC
957
+ `).all() as { content_type: string; count: number }[];
958
+
959
+ const summary = [
960
+ `ClawMem Index Status:`,
961
+ ` Total documents: ${status.totalDocuments}`,
962
+ ` Needs embedding: ${status.needsEmbedding}`,
963
+ ` Vector index: ${status.hasVectorIndex ? 'yes' : 'no'}`,
964
+ ` Collections: ${status.collections.length}`,
965
+ ];
966
+ for (const col of status.collections) {
967
+ summary.push(` - ${col.name}: ${col.path} (${col.documents} docs)`);
968
+ }
969
+ if (typeCounts.length > 0) {
970
+ summary.push(` Content types:`);
971
+ for (const t of typeCounts) {
972
+ summary.push(` - ${t.content_type}: ${t.count}`);
973
+ }
974
+ }
975
+
976
+ return {
977
+ content: [{ type: "text", text: summary.join('\n') }],
978
+ structuredContent: { ...status, contentTypes: typeCounts },
979
+ };
980
+ }
981
+ );
982
+
983
+ // ---------------------------------------------------------------------------
984
+ // Tool: find_similar (NEW - SAME)
985
+ // ---------------------------------------------------------------------------
986
+
987
+ server.registerTool(
988
+ "find_similar",
989
+ {
990
+ title: "Find Similar Notes",
991
+ description: "USE THIS for 'what else relates to X', 'show me similar docs'. Finds k-NN vector neighbors of a reference document — discovers connections beyond keyword overlap that search/query cannot find.",
992
+ inputSchema: {
993
+ file: z.string().describe("Path of reference document"),
994
+ limit: z.number().optional().default(5),
995
+ vault: z.string().optional().describe("Named vault (omit for default vault)"),
996
+ },
997
+ },
998
+ async ({ file, limit, vault }) => {
999
+ const store = getStore(vault);
1000
+ const tableExists = store.db.prepare(`SELECT name FROM sqlite_master WHERE type='table' AND name='vectors_vec'`).get();
1001
+ if (!tableExists) {
1002
+ return { content: [{ type: "text", text: "Vector index not found. Run 'clawmem embed' first." }], isError: true };
1003
+ }
1004
+
1005
+ // Get the reference document's body
1006
+ const result = store.findDocument(file, { includeBody: false });
1007
+ if ("error" in result) {
1008
+ return { content: [{ type: "text", text: `Document not found: ${file}` }], isError: true };
1009
+ }
1010
+
1011
+ const body = store.getDocumentBody(result) ?? "";
1012
+ const title = result.title || file;
1013
+
1014
+ // Use the document's content as the search query
1015
+ const queryText = `${title}\n${body.slice(0, 1000)}`;
1016
+ const vecResults = await store.searchVec(queryText, DEFAULT_EMBED_MODEL, (limit || 5) + 1);
1017
+
1018
+ // Filter out the reference document itself
1019
+ const similar = vecResults
1020
+ .filter(r => r.filepath !== result.filepath)
1021
+ .slice(0, limit || 5);
1022
+
1023
+ const items: SearchResultItem[] = similar.map(r => {
1024
+ const { line, snippet } = extractSnippet(r.body || "", title, 200);
1025
+ return {
1026
+ docid: `#${r.docid}`,
1027
+ file: r.displayPath,
1028
+ title: r.title,
1029
+ score: Math.round(r.score * 100) / 100,
1030
+ context: store.getContextForFile(r.filepath),
1031
+ snippet: addLineNumbers(snippet, line),
1032
+ };
1033
+ });
1034
+
1035
+ return {
1036
+ content: [{ type: "text", text: `${items.length} similar to "${title}":\n${items.map(i => ` ${i.file} (${Math.round(i.score * 100)}%)`).join('\n')}` }],
1037
+ structuredContent: { reference: file, results: items },
1038
+ };
1039
+ }
1040
+ );
1041
+
1042
+ // ---------------------------------------------------------------------------
1043
+ // Tool: reindex (NEW - SAME)
1044
+ // ---------------------------------------------------------------------------
1045
+
1046
+ server.registerTool(
1047
+ "reindex",
1048
+ {
1049
+ title: "Re-index Collections",
1050
+ description: "Trigger a re-scan of all collections. Detects new, changed, and deleted documents.",
1051
+ inputSchema: {
1052
+ vault: z.string().optional().describe("Named vault (omit for default vault)"),
1053
+ },
1054
+ },
1055
+ async ({ vault }) => {
1056
+ const store = getStore(vault);
1057
+ const collections = listCollections();
1058
+ const totalStats: IndexStats = { added: 0, updated: 0, unchanged: 0, removed: 0 };
1059
+
1060
+ for (const col of collections) {
1061
+ const stats = await indexCollection(store, col.name, col.path, col.pattern);
1062
+ totalStats.added += stats.added;
1063
+ totalStats.updated += stats.updated;
1064
+ totalStats.unchanged += stats.unchanged;
1065
+ totalStats.removed += stats.removed;
1066
+ }
1067
+
1068
+ const summary = `Reindex complete: +${totalStats.added} added, ~${totalStats.updated} updated, =${totalStats.unchanged} unchanged, -${totalStats.removed} removed`;
1069
+ return {
1070
+ content: [{ type: "text" as const, text: summary }],
1071
+ structuredContent: { ...totalStats } as Record<string, unknown>,
1072
+ };
1073
+ }
1074
+ );
1075
+
1076
+ // ---------------------------------------------------------------------------
1077
+ // Tool: index_stats (NEW - SAME)
1078
+ // ---------------------------------------------------------------------------
1079
+
1080
+ server.registerTool(
1081
+ "index_stats",
1082
+ {
1083
+ title: "Index Statistics",
1084
+ description: "Detailed index statistics with content type distribution, staleness info, and memory health.",
1085
+ inputSchema: {
1086
+ vault: z.string().optional().describe("Named vault (omit for default vault)"),
1087
+ },
1088
+ },
1089
+ async ({ vault }) => {
1090
+ const store = getStore(vault);
1091
+ const status = store.getStatus();
1092
+ const typeCounts = store.db.prepare(
1093
+ `SELECT content_type, COUNT(*) as count FROM documents WHERE active = 1 GROUP BY content_type ORDER BY count DESC`
1094
+ ).all() as { content_type: string; count: number }[];
1095
+
1096
+ const staleCount = store.db.prepare(
1097
+ `SELECT COUNT(*) as count FROM documents WHERE active = 1 AND review_by IS NOT NULL AND review_by <= ?`
1098
+ ).get(new Date().toISOString()) as { count: number };
1099
+
1100
+ const recentSessions = store.getRecentSessions(5);
1101
+ const avgAccessCount = store.db.prepare(
1102
+ `SELECT AVG(access_count) as avg FROM documents WHERE active = 1`
1103
+ ).get() as { avg: number | null };
1104
+
1105
+ const stats = {
1106
+ totalDocuments: status.totalDocuments,
1107
+ needsEmbedding: status.needsEmbedding,
1108
+ hasVectorIndex: status.hasVectorIndex,
1109
+ collections: status.collections.length,
1110
+ contentTypes: typeCounts,
1111
+ staleDocuments: staleCount.count,
1112
+ recentSessions: recentSessions.length,
1113
+ avgAccessCount: Math.round((avgAccessCount.avg ?? 0) * 100) / 100,
1114
+ };
1115
+
1116
+ const summary = [
1117
+ `Index Statistics:`,
1118
+ ` Documents: ${stats.totalDocuments} (${stats.needsEmbedding} need embedding)`,
1119
+ ` Stale documents: ${stats.staleDocuments}`,
1120
+ ` Recent sessions: ${stats.recentSessions}`,
1121
+ ` Avg access count: ${stats.avgAccessCount}`,
1122
+ ` Content types:`,
1123
+ ...typeCounts.map(t => ` ${t.content_type}: ${t.count}`),
1124
+ ];
1125
+
1126
+ return {
1127
+ content: [{ type: "text", text: summary.join('\n') }],
1128
+ structuredContent: stats,
1129
+ };
1130
+ }
1131
+ );
1132
+
1133
+ // ---------------------------------------------------------------------------
1134
+ // Tool: session_log (NEW - SAME)
1135
+ // ---------------------------------------------------------------------------
1136
+
1137
+ server.registerTool(
1138
+ "session_log",
1139
+ {
1140
+ title: "Session Log",
1141
+ description: "USE THIS when user references prior sessions: 'last time', 'yesterday', 'what happened', 'what did we do'. Returns session history with handoffs and file changes. DO NOT use query() for cross-session questions — this tool has session-specific data that search cannot find.",
1142
+ inputSchema: {
1143
+ limit: z.number().optional().default(10),
1144
+ vault: z.string().optional().describe("Named vault (omit for default vault)"),
1145
+ },
1146
+ },
1147
+ async ({ limit, vault }) => {
1148
+ const store = getStore(vault);
1149
+ const sessions = store.getRecentSessions(limit || 10);
1150
+ if (sessions.length === 0) {
1151
+ return { content: [{ type: "text", text: "No sessions tracked yet." }] };
1152
+ }
1153
+
1154
+ const lines: string[] = [];
1155
+ for (const s of sessions) {
1156
+ const duration = s.endedAt
1157
+ ? `${Math.round((new Date(s.endedAt).getTime() - new Date(s.startedAt).getTime()) / 60000)}min`
1158
+ : "active";
1159
+ lines.push(`${s.sessionId.slice(0, 8)} ${s.startedAt} (${duration})`);
1160
+ if (s.handoffPath) lines.push(` Handoff: ${s.handoffPath}`);
1161
+ if (s.summary) lines.push(` ${s.summary.slice(0, 100)}`);
1162
+ if (s.filesChanged.length > 0) lines.push(` Files: ${s.filesChanged.slice(0, 5).join(", ")}`);
1163
+ }
1164
+
1165
+ return {
1166
+ content: [{ type: "text", text: lines.join('\n') }],
1167
+ structuredContent: { sessions },
1168
+ };
1169
+ }
1170
+ );
1171
+
1172
+ // ---------------------------------------------------------------------------
1173
+ // Tool: beads_sync
1174
+ // ---------------------------------------------------------------------------
1175
+
1176
+ server.registerTool(
1177
+ "beads_sync",
1178
+ {
1179
+ title: "Sync Beads Issues",
1180
+ description: "Sync Beads issues from Dolt backend (bd CLI) into ClawMem search index. Queries live Dolt database — no stale JSONL dependency.",
1181
+ inputSchema: {
1182
+ project_path: z.string().optional().describe("Path to project with .beads/ directory (default: cwd)"),
1183
+ },
1184
+ },
1185
+ async ({ project_path }) => {
1186
+ const cwd = project_path || process.cwd();
1187
+ const projectDir = store.detectBeadsProject(cwd);
1188
+
1189
+ if (!projectDir) {
1190
+ return {
1191
+ content: [{ type: "text", text: "No Beads project found. Expected .beads/ directory in project path." }],
1192
+ };
1193
+ }
1194
+
1195
+ try {
1196
+ const result = await store.syncBeadsIssues(projectDir);
1197
+
1198
+ // A-MEM enrichment for newly created docs (generates semantic/entity edges)
1199
+ if (result.newDocIds.length > 0) {
1200
+ try {
1201
+ const llm = getDefaultLlamaCpp();
1202
+ for (const docId of result.newDocIds) {
1203
+ await store.postIndexEnrich(llm, docId, true);
1204
+ }
1205
+ } catch (enrichErr) {
1206
+ console.error(`[beads] A-MEM enrichment failed (non-fatal):`, enrichErr);
1207
+ }
1208
+ }
1209
+
1210
+ return {
1211
+ content: [{
1212
+ type: "text",
1213
+ text: `Beads sync complete:\n - ${result.created} new issues indexed\n - ${result.synced} existing issues updated\n - ${result.newDocIds.length} docs enriched with A-MEM\n - Total: ${result.created + result.synced} issues`,
1214
+ }],
1215
+ structuredContent: { ...result, project_dir: projectDir },
1216
+ };
1217
+ } catch (err) {
1218
+ return {
1219
+ content: [{
1220
+ type: "text",
1221
+ text: `Beads sync failed: ${err instanceof Error ? err.message : String(err)}`,
1222
+ }],
1223
+ isError: true,
1224
+ };
1225
+ }
1226
+ }
1227
+ );
1228
+
1229
+ // ---------------------------------------------------------------------------
1230
+ // Tool: build_graphs
1231
+ // ---------------------------------------------------------------------------
1232
+
1233
+ server.registerTool(
1234
+ "build_graphs",
1235
+ {
1236
+ title: "Build Memory Graphs",
1237
+ description: "Build temporal and semantic graphs for MAGMA multi-graph memory. Run after indexing documents.",
1238
+ inputSchema: {
1239
+ graph_types: z.array(z.enum(['temporal', 'semantic', 'all'])).optional().default(['all']),
1240
+ semantic_threshold: z.number().optional().default(0.7).describe("Similarity threshold for semantic edges (0.0-1.0)"),
1241
+ vault: z.string().optional().describe("Named vault (omit for default vault)"),
1242
+ },
1243
+ },
1244
+ async ({ graph_types, semantic_threshold, vault }) => {
1245
+ const store = getStore(vault);
1246
+ const types = graph_types || ['all'];
1247
+ const shouldBuildTemporal = types.includes('temporal') || types.includes('all');
1248
+ const shouldBuildSemantic = types.includes('semantic') || types.includes('all');
1249
+
1250
+ const results: { temporal?: number; semantic?: number } = {};
1251
+
1252
+ if (shouldBuildTemporal) {
1253
+ results.temporal = store.buildTemporalBackbone();
1254
+ }
1255
+
1256
+ if (shouldBuildSemantic) {
1257
+ results.semantic = await store.buildSemanticGraph(semantic_threshold);
1258
+ }
1259
+
1260
+ const lines = [];
1261
+ if (results.temporal !== undefined) lines.push(`Temporal graph: ${results.temporal} edges`);
1262
+ if (results.semantic !== undefined) lines.push(`Semantic graph: ${results.semantic} edges`);
1263
+
1264
+ return {
1265
+ content: [{
1266
+ type: "text",
1267
+ text: `Graph building complete:\n ${lines.join('\n ')}`,
1268
+ }],
1269
+ structuredContent: results,
1270
+ };
1271
+ }
1272
+ );
1273
+
1274
+ // ---------------------------------------------------------------------------
1275
+ // Tool: intent_search
1276
+ // ---------------------------------------------------------------------------
1277
+
1278
+ server.registerTool(
1279
+ "intent_search",
1280
+ {
1281
+ title: "Intent-Aware Search",
1282
+ description: "USE THIS for 'why did we decide X', 'what caused Y', 'who worked on Z'. Classifies intent (WHY/WHEN/ENTITY) and traverses causal + semantic graph edges. Returns decision chains that query() CANNOT find. If asking about reasons, causes, decisions, or entities — this tool, not query().",
1283
+ inputSchema: {
1284
+ query: z.string().describe("Search query"),
1285
+ limit: z.number().optional().default(10),
1286
+ force_intent: z.enum(['WHY', 'WHEN', 'ENTITY', 'WHAT']).optional().describe("Override automatic intent detection"),
1287
+ enable_graph_traversal: z.boolean().optional().default(true).describe("Enable multi-hop graph expansion"),
1288
+ vault: z.string().optional().describe("Named vault (omit for default vault)"),
1289
+ },
1290
+ },
1291
+ async ({ query, limit, force_intent, enable_graph_traversal, vault }) => {
1292
+ const store = getStore(vault);
1293
+ const llm = getDefaultLlamaCpp();
1294
+
1295
+ // Step 1: Intent classification
1296
+ const intent = force_intent
1297
+ ? { intent: force_intent as IntentType, confidence: 1.0 }
1298
+ : await classifyIntent(query, llm, store.db);
1299
+
1300
+ // Step 2: Baseline search (BM25 + Vector)
1301
+ const bm25Results = store.searchFTS(query, 30);
1302
+ const vecResults = await store.searchVec(query, DEFAULT_EMBED_MODEL, 30);
1303
+
1304
+ // Step 3: Intent-weighted RRF
1305
+ const rrfWeights = intent.intent === 'WHEN'
1306
+ ? [1.5, 1.0] // Boost BM25 for temporal (dates in text)
1307
+ : intent.intent === 'WHY'
1308
+ ? [1.0, 1.5] // Boost vector for causal (semantic)
1309
+ : [1.0, 1.0]; // Balanced
1310
+
1311
+ const fusedRanked = reciprocalRankFusion([bm25Results.map(toRanked), vecResults.map(toRanked)], rrfWeights);
1312
+
1313
+ // Map RRF results back to SearchResult with updated scores
1314
+ const allSearchResults = [...bm25Results, ...vecResults];
1315
+ const fused: SearchResult[] = fusedRanked.map(fr => {
1316
+ const original = allSearchResults.find(r => r.filepath === fr.file);
1317
+ return original ? { ...original, score: fr.score } : null;
1318
+ }).filter((r): r is SearchResult => r !== null);
1319
+
1320
+ // Step 4: Graph expansion (if enabled and intent allows)
1321
+ let expanded = fused;
1322
+ if (enable_graph_traversal && (intent.intent === 'WHY' || intent.intent === 'ENTITY')) {
1323
+ const anchorEmbeddingResult = await llm.embed(query);
1324
+ if (anchorEmbeddingResult) {
1325
+ const traversed = adaptiveTraversal(store.db, fused.slice(0, 10).map(r => ({ hash: r.hash, score: r.score })), {
1326
+ maxDepth: 2,
1327
+ beamWidth: 5,
1328
+ budget: 30,
1329
+ intent: intent.intent,
1330
+ queryEmbedding: anchorEmbeddingResult.embedding,
1331
+ });
1332
+
1333
+ // Merge traversed nodes with original results
1334
+ const merged = mergeTraversalResults(
1335
+ store.db,
1336
+ fused.map(r => ({ hash: r.hash, score: r.score })),
1337
+ traversed
1338
+ );
1339
+
1340
+ // Convert back to SearchResult format — hydrate graph-discovered nodes from DB
1341
+ expanded = merged.map(m => {
1342
+ const original = fused.find(f => f.hash === m.hash);
1343
+ if (original) return { ...original, score: m.score };
1344
+ // Graph-discovered node not in original fused results — hydrate from DB
1345
+ const doc = store.db.prepare(`
1346
+ SELECT d.collection, d.path, d.title, d.hash, c.doc as body, d.modified_at
1347
+ FROM documents d
1348
+ LEFT JOIN content c ON c.hash = d.hash
1349
+ WHERE d.hash = ? AND d.active = 1 LIMIT 1
1350
+ `).get(m.hash) as { collection: string; path: string; title: string; hash: string; body: string | null; modified_at: string } | undefined;
1351
+ if (!doc) return null;
1352
+ return {
1353
+ filepath: `clawmem://${doc.collection}/${doc.path}`,
1354
+ displayPath: `${doc.collection}/${doc.path}`,
1355
+ title: doc.title || doc.path.split("/").pop() || "",
1356
+ context: null,
1357
+ hash: doc.hash,
1358
+ docid: doc.hash.slice(0, 6),
1359
+ collectionName: doc.collection,
1360
+ modifiedAt: doc.modified_at || "",
1361
+ bodyLength: doc.body?.length || 0,
1362
+ body: doc.body || "",
1363
+ score: m.score,
1364
+ source: "vec" as const,
1365
+ } satisfies SearchResult;
1366
+ }).filter((r): r is SearchResult => r !== null);
1367
+ }
1368
+ }
1369
+
1370
+ // Step 5: Rerank top 30 and blend scores (same pattern as query tool)
1371
+ const toRerank = expanded.slice(0, 30);
1372
+ const rerankDocs = toRerank.map(r => ({
1373
+ file: r.filepath,
1374
+ text: r.body?.slice(0, 200) || r.title,
1375
+ }));
1376
+
1377
+ const reranked = await store.rerank(query, rerankDocs);
1378
+
1379
+ // Blend original + rerank scores using file-keyed join (matching query tool pattern)
1380
+ const rerankMap = new Map(reranked.map(r => [r.file, r.score]));
1381
+ const rankMap = new Map(toRerank.map((r, i) => [r.filepath, i + 1]));
1382
+ const blendedResults = toRerank.map(r => {
1383
+ const rerankScore = rerankMap.get(r.filepath) || 0;
1384
+ const rank = rankMap.get(r.filepath) || toRerank.length;
1385
+ const origWeight = rank <= 3 ? 0.75 : rank <= 10 ? 0.60 : 0.40;
1386
+ const blended = origWeight * r.score + (1 - origWeight) * rerankScore;
1387
+ return { ...r, score: blended };
1388
+ });
1389
+ blendedResults.sort((a, b) => b.score - a.score);
1390
+
1391
+ // Step 6: Composite scoring
1392
+ const enriched = enrichResults(store, blendedResults, query);
1393
+
1394
+ const scored = applyCompositeScoring(enriched, query);
1395
+
1396
+ // Format results
1397
+ const results = scored.slice(0, limit || 10).map(r => ({
1398
+ docid: r.docid,
1399
+ file: r.filepath,
1400
+ title: r.title,
1401
+ score: r.score,
1402
+ compositeScore: r.compositeScore,
1403
+ context: r.context,
1404
+ snippet: r.body?.slice(0, 300) || '',
1405
+ contentType: r.contentType,
1406
+ }));
1407
+
1408
+ return {
1409
+ content: [{
1410
+ type: "text",
1411
+ text: `Intent: ${intent.intent} (${Math.round(intent.confidence * 100)}% confidence)\n\n${formatSearchSummary(results, query)}`,
1412
+ }],
1413
+ structuredContent: {
1414
+ intent: intent.intent,
1415
+ confidence: intent.confidence,
1416
+ results,
1417
+ },
1418
+ };
1419
+ }
1420
+ );
1421
+
1422
+ // ---------------------------------------------------------------------------
1423
+ // Tool: query_plan (Multi-Query Decomposition)
1424
+ // ---------------------------------------------------------------------------
1425
+
1426
+ server.registerTool(
1427
+ "query_plan",
1428
+ {
1429
+ title: "Query Plan (Multi-Query Decomposition)",
1430
+ description: "USE THIS for complex multi-topic queries ('tell me about X and also Y', 'compare A with B in the context of C'). Decomposes into parallel typed retrieval clauses. DO NOT use query() for multi-topic — it searches as one blob. This tool splits topics and routes each optimally.",
1431
+ inputSchema: {
1432
+ query: z.string().describe("Complex or multi-topic query"),
1433
+ limit: z.number().optional().default(10),
1434
+ compact: z.boolean().optional().default(true).describe("Return compact results"),
1435
+ vault: z.string().optional().describe("Named vault (omit for default vault)"),
1436
+ },
1437
+ },
1438
+ async ({ query, limit, compact, vault }) => {
1439
+ const store = getStore(vault);
1440
+ const llm = getDefaultLlamaCpp();
1441
+
1442
+ // Decompose query into typed clauses
1443
+ const clauses = await decomposeQuery(query, llm, store.db);
1444
+
1445
+ // Sort by priority and execute each clause
1446
+ const sortedClauses = [...clauses].sort((a, b) => a.priority - b.priority);
1447
+ const allResults: SearchResult[] = [];
1448
+ const clauseDetails: { type: string; query: string; priority: number; resultCount: number }[] = [];
1449
+
1450
+ for (const clause of sortedClauses) {
1451
+ let results: SearchResult[] = [];
1452
+ if (clause.type === 'bm25') {
1453
+ results = store.searchFTS(clause.query, 20, undefined, clause.collections);
1454
+ } else if (clause.type === 'vector') {
1455
+ results = await store.searchVec(clause.query, DEFAULT_EMBED_MODEL, 20, undefined, clause.collections);
1456
+ } else if (clause.type === 'graph') {
1457
+ // Graph clause: run intent_search-style retrieval
1458
+ const intent = await classifyIntent(clause.query, llm, store.db);
1459
+ const bm25 = store.searchFTS(clause.query, 15, undefined, clause.collections);
1460
+ const vec = await store.searchVec(clause.query, DEFAULT_EMBED_MODEL, 15, undefined, clause.collections);
1461
+ const fused = reciprocalRankFusion([bm25.map(toRanked), vec.map(toRanked)], [1.0, 1.0]);
1462
+ const searchMap = new Map([...bm25, ...vec].map(r => [r.filepath, r]));
1463
+ results = fused
1464
+ .map(fr => searchMap.get(fr.file))
1465
+ .filter((r): r is SearchResult => r !== null);
1466
+
1467
+ // Graph expansion for WHY/ENTITY
1468
+ if (intent.intent === 'WHY' || intent.intent === 'ENTITY') {
1469
+ const anchorEmb = await llm.embed(clause.query);
1470
+ if (anchorEmb) {
1471
+ const traversed = adaptiveTraversal(store.db, results.slice(0, 5).map(r => ({ hash: r.hash, score: r.score })), {
1472
+ maxDepth: 2, beamWidth: 3, budget: 15, intent: intent.intent, queryEmbedding: anchorEmb.embedding,
1473
+ });
1474
+ const merged = mergeTraversalResults(store.db, results.map(r => ({ hash: r.hash, score: r.score })), traversed);
1475
+ const expandedMap = new Map(results.map(r => [r.hash, r]));
1476
+ results = merged.map(m => {
1477
+ const existing = expandedMap.get(m.hash);
1478
+ if (existing) return { ...existing, score: m.score };
1479
+ // Graph-discovered node — hydrate from DB
1480
+ const doc = store.db.prepare(`
1481
+ SELECT d.collection, d.path, d.title, d.hash, c.doc as body, d.modified_at
1482
+ FROM documents d
1483
+ LEFT JOIN content c ON c.hash = d.hash
1484
+ WHERE d.hash = ? AND d.active = 1 LIMIT 1
1485
+ `).get(m.hash) as { collection: string; path: string; title: string; hash: string; body: string | null; modified_at: string } | undefined;
1486
+ if (!doc) return null;
1487
+ return {
1488
+ filepath: `clawmem://${doc.collection}/${doc.path}`,
1489
+ displayPath: `${doc.collection}/${doc.path}`,
1490
+ title: doc.title || doc.path.split("/").pop() || "",
1491
+ context: null,
1492
+ hash: doc.hash,
1493
+ docid: doc.hash.slice(0, 6),
1494
+ collectionName: doc.collection,
1495
+ modifiedAt: doc.modified_at || "",
1496
+ bodyLength: doc.body?.length || 0,
1497
+ body: doc.body || "",
1498
+ score: m.score,
1499
+ source: "vec" as const,
1500
+ } satisfies SearchResult;
1501
+ }).filter((r): r is SearchResult => r !== null);
1502
+ }
1503
+ }
1504
+ }
1505
+ clauseDetails.push({ type: clause.type, query: clause.query, priority: clause.priority, resultCount: results.length });
1506
+ allResults.push(...results);
1507
+ }
1508
+
1509
+ // Deduplicate by filepath, keeping highest score
1510
+ const deduped = new Map<string, SearchResult>();
1511
+ for (const r of allResults) {
1512
+ const existing = deduped.get(r.filepath);
1513
+ if (!existing || r.score > existing.score) deduped.set(r.filepath, r);
1514
+ }
1515
+
1516
+ // RRF merge across clauses for final ranking
1517
+ const clauseLists = sortedClauses.map((clause, idx) => {
1518
+ const start = sortedClauses.slice(0, idx).reduce((sum, c, i) => sum + clauseDetails[i]!.resultCount, 0);
1519
+ const end = start + clauseDetails[idx]!.resultCount;
1520
+ return allResults.slice(start, end).map(toRanked);
1521
+ });
1522
+ const finalRanked = reciprocalRankFusion(clauseLists, sortedClauses.map(c => 6 - c.priority));
1523
+
1524
+ // Map back to SearchResults
1525
+ const resultMap = new Map([...deduped.values()].map(r => [r.filepath, r]));
1526
+ const finalResults = finalRanked
1527
+ .map(fr => { const r = resultMap.get(fr.file); return r ? { ...r, score: fr.score } : null; })
1528
+ .filter((r): r is SearchResult => r !== null);
1529
+
1530
+ const enriched = enrichResults(store, finalResults, query);
1531
+ const coFn: CoActivationFn = (path) => store.getCoActivated(path);
1532
+ const scored = applyCompositeScoring(enriched, query, coFn).slice(0, limit || 10);
1533
+
1534
+ const planSummary = clauseDetails.map(c => ` ${c.type}(p${c.priority}): "${c.query}" → ${c.resultCount} results`).join("\n");
1535
+
1536
+ if (compact) {
1537
+ const items = scored.map(r => ({
1538
+ docid: `#${r.docid}`, path: r.displayPath, title: r.title,
1539
+ score: Math.round((r.compositeScore ?? r.score) * 100) / 100,
1540
+ snippet: (r.body || "").substring(0, 150), content_type: r.contentType, modified_at: r.modifiedAt,
1541
+ }));
1542
+ return {
1543
+ content: [{ type: "text", text: `Query Plan (${sortedClauses.length} clauses):\n${planSummary}\n\n${formatSearchSummary(items.map(i => ({ ...i, file: i.path, compositeScore: i.score, context: null })), query)}` }],
1544
+ structuredContent: { plan: clauseDetails, results: items },
1545
+ };
1546
+ }
1547
+
1548
+ const items = scored.map(r => ({
1549
+ docid: r.docid, file: r.filepath, title: r.title, score: r.score,
1550
+ compositeScore: r.compositeScore, context: r.context, snippet: r.body?.slice(0, 300) || '', contentType: r.contentType,
1551
+ }));
1552
+ return {
1553
+ content: [{ type: "text", text: `Query Plan (${sortedClauses.length} clauses):\n${planSummary}\n\n${formatSearchSummary(items, query)}` }],
1554
+ structuredContent: { plan: clauseDetails, results: items },
1555
+ };
1556
+ }
1557
+ );
1558
+
1559
+ // ---------------------------------------------------------------------------
1560
+ // Tool: find_causal_links (A-MEM)
1561
+ // ---------------------------------------------------------------------------
1562
+
1563
+ server.registerTool(
1564
+ "find_causal_links",
1565
+ {
1566
+ title: "Find Causal Links",
1567
+ description: "USE THIS to trace decision chains: 'what led to X', 'trace how we got from A to B'. Follow up intent_search with this tool on a top result to walk the full causal chain. Returns depth-annotated links with reasoning.",
1568
+ inputSchema: {
1569
+ docid: z.string().describe("Document ID (e.g., '#123' or path)"),
1570
+ direction: z.enum(['causes', 'caused_by', 'both']).optional().default('both').describe("Direction: 'causes' (outbound), 'caused_by' (inbound), or 'both'"),
1571
+ depth: z.number().optional().default(5).describe("Maximum traversal depth (1-10)"),
1572
+ vault: z.string().optional().describe("Named vault (omit for default vault)"),
1573
+ },
1574
+ },
1575
+ async ({ docid, direction, depth, vault }) => {
1576
+ const store = getStore(vault);
1577
+ // Resolve docid to document
1578
+ const resolved = store.findDocumentByDocid(docid);
1579
+ if (!resolved) {
1580
+ return {
1581
+ content: [{ type: "text", text: `Document not found: ${docid}` }],
1582
+ };
1583
+ }
1584
+
1585
+ // Get the numeric docId
1586
+ const doc = store.db.prepare(`
1587
+ SELECT id, title, collection, path
1588
+ FROM documents
1589
+ WHERE hash = ? AND active = 1
1590
+ LIMIT 1
1591
+ `).get(resolved.hash) as { id: number; title: string; collection: string; path: string } | undefined;
1592
+
1593
+ if (!doc) {
1594
+ return {
1595
+ content: [{ type: "text", text: `Document not found: ${docid}` }],
1596
+ };
1597
+ }
1598
+
1599
+ // Find causal links
1600
+ const links = store.findCausalLinks(doc.id, direction, depth);
1601
+
1602
+ if (links.length === 0) {
1603
+ return {
1604
+ content: [{ type: "text", text: `No causal links found for "${doc.title}" (${direction})` }],
1605
+ structuredContent: { source: doc, links: [] },
1606
+ };
1607
+ }
1608
+
1609
+ // Format summary
1610
+ const directionLabel = direction === 'causes' ? 'causes' : direction === 'caused_by' ? 'is caused by' : 'is causally related to';
1611
+ const lines = [`"${doc.title}" ${directionLabel} ${links.length} document(s):\n`];
1612
+
1613
+ for (const link of links) {
1614
+ const confidence = Math.round(link.weight * 100);
1615
+ const reasoning = link.reasoning ? ` - ${link.reasoning}` : '';
1616
+ lines.push(`[Depth ${link.depth}] ${confidence}% ${link.title} (${link.filepath})${reasoning}`);
1617
+ }
1618
+
1619
+ return {
1620
+ content: [{ type: "text", text: lines.join('\n') }],
1621
+ structuredContent: {
1622
+ source: {
1623
+ id: doc.id,
1624
+ title: doc.title,
1625
+ filepath: `${doc.collection}/${doc.path}`,
1626
+ },
1627
+ direction,
1628
+ links: links.map(l => ({
1629
+ id: l.docId,
1630
+ title: l.title,
1631
+ filepath: l.filepath,
1632
+ depth: l.depth,
1633
+ confidence: Math.round(l.weight * 100),
1634
+ reasoning: l.reasoning,
1635
+ })),
1636
+ },
1637
+ };
1638
+ }
1639
+ );
1640
+
1641
+ // ---------------------------------------------------------------------------
1642
+ // Tool: memory_evolution_status (A-MEM)
1643
+ // ---------------------------------------------------------------------------
1644
+
1645
+ server.registerTool(
1646
+ "memory_evolution_status",
1647
+ {
1648
+ title: "Memory Evolution Status",
1649
+ description: "Get the evolution timeline for a memory document, showing how its keywords and context have changed over time based on new evidence.",
1650
+ inputSchema: {
1651
+ docid: z.string().describe("Document ID (e.g., '#123' or path)"),
1652
+ limit: z.number().optional().default(10).describe("Maximum number of evolution entries to return (1-100)"),
1653
+ vault: z.string().optional().describe("Named vault (omit for default vault)"),
1654
+ },
1655
+ },
1656
+ async ({ docid, limit, vault }) => {
1657
+ const store = getStore(vault);
1658
+ // Resolve docid to document
1659
+ const resolved = store.findDocumentByDocid(docid);
1660
+ if (!resolved) {
1661
+ return {
1662
+ content: [{ type: "text", text: `Document not found: ${docid}` }],
1663
+ };
1664
+ }
1665
+
1666
+ // Get the numeric docId
1667
+ const doc = store.db.prepare(`
1668
+ SELECT id, title, collection, path
1669
+ FROM documents
1670
+ WHERE hash = ? AND active = 1
1671
+ LIMIT 1
1672
+ `).get(resolved.hash) as { id: number; title: string; collection: string; path: string } | undefined;
1673
+
1674
+ if (!doc) {
1675
+ return {
1676
+ content: [{ type: "text", text: `Document not found: ${docid}` }],
1677
+ };
1678
+ }
1679
+
1680
+ // Get evolution timeline
1681
+ const timeline = store.getEvolutionTimeline(doc.id, limit);
1682
+
1683
+ if (timeline.length === 0) {
1684
+ return {
1685
+ content: [{ type: "text", text: `No evolution history found for "${doc.title}"` }],
1686
+ structuredContent: { document: doc, timeline: [] },
1687
+ };
1688
+ }
1689
+
1690
+ // Format summary
1691
+ const lines = [`Evolution timeline for "${doc.title}" (${timeline.length} version${timeline.length === 1 ? '' : 's'}):\n`];
1692
+
1693
+ for (const entry of timeline) {
1694
+ lines.push(`\nVersion ${entry.version} (${entry.createdAt})`);
1695
+ lines.push(`Triggered by: ${entry.triggeredBy.title} (${entry.triggeredBy.filepath})`);
1696
+
1697
+ // Keywords delta
1698
+ if (entry.previousKeywords || entry.newKeywords) {
1699
+ const prev = entry.previousKeywords?.join(', ') || 'none';
1700
+ const next = entry.newKeywords?.join(', ') || 'none';
1701
+ lines.push(`Keywords: ${prev} → ${next}`);
1702
+ }
1703
+
1704
+ // Context delta
1705
+ if (entry.previousContext || entry.newContext) {
1706
+ const prevCtx = entry.previousContext || 'none';
1707
+ const newCtx = entry.newContext || 'none';
1708
+ const prevPreview = prevCtx.substring(0, 50) + (prevCtx.length > 50 ? '...' : '');
1709
+ const newPreview = newCtx.substring(0, 50) + (newCtx.length > 50 ? '...' : '');
1710
+ lines.push(`Context: ${prevPreview} → ${newPreview}`);
1711
+ }
1712
+
1713
+ // Reasoning
1714
+ if (entry.reasoning) {
1715
+ lines.push(`Reasoning: ${entry.reasoning}`);
1716
+ }
1717
+ }
1718
+
1719
+ return {
1720
+ content: [{ type: "text", text: lines.join('\n') }],
1721
+ structuredContent: {
1722
+ document: {
1723
+ id: doc.id,
1724
+ title: doc.title,
1725
+ filepath: `${doc.collection}/${doc.path}`,
1726
+ },
1727
+ timeline: timeline.map(e => ({
1728
+ version: e.version,
1729
+ triggeredBy: {
1730
+ id: e.triggeredBy.docId,
1731
+ title: e.triggeredBy.title,
1732
+ filepath: e.triggeredBy.filepath,
1733
+ },
1734
+ previousKeywords: e.previousKeywords,
1735
+ newKeywords: e.newKeywords,
1736
+ previousContext: e.previousContext,
1737
+ newContext: e.newContext,
1738
+ reasoning: e.reasoning,
1739
+ createdAt: e.createdAt,
1740
+ })),
1741
+ },
1742
+ };
1743
+ }
1744
+ );
1745
+
1746
+ // ---------------------------------------------------------------------------
1747
+ // Tool: timeline (Engram integration)
1748
+ // ---------------------------------------------------------------------------
1749
+
1750
+ server.registerTool(
1751
+ "timeline",
1752
+ {
1753
+ title: "Document Timeline",
1754
+ description: "Show the temporal neighborhood around a document — what was created/modified before and after it. Token-efficient progressive disclosure: search → timeline (context) → get (full content). Use after finding a document via search to understand what happened around it.",
1755
+ inputSchema: {
1756
+ docid: z.string().describe("Document ID (e.g., '#123' or short hash)"),
1757
+ before: z.number().optional().default(5).describe("Number of documents to show before the focus (1-20)"),
1758
+ after: z.number().optional().default(5).describe("Number of documents to show after the focus (1-20)"),
1759
+ same_collection: z.boolean().optional().default(false).describe("Constrain to same collection (like session scoping)"),
1760
+ vault: z.string().optional().describe("Named vault (omit for default vault)"),
1761
+ },
1762
+ },
1763
+ async ({ docid, before, after, same_collection, vault }) => {
1764
+ const store = getStore(vault);
1765
+ // Resolve docid to numeric ID
1766
+ const resolved = store.findDocumentByDocid(docid);
1767
+ if (!resolved) {
1768
+ return { content: [{ type: "text", text: `Document not found: ${docid}` }] };
1769
+ }
1770
+
1771
+ const doc = store.db.prepare(`
1772
+ SELECT id, title, collection, path FROM documents WHERE hash = ? AND active = 1 LIMIT 1
1773
+ `).get(resolved.hash) as { id: number; title: string; collection: string; path: string } | undefined;
1774
+
1775
+ if (!doc) {
1776
+ return { content: [{ type: "text", text: `Document not found: ${docid}` }] };
1777
+ }
1778
+
1779
+ try {
1780
+ const result = store.timeline(doc.id, { before, after, sameCollection: same_collection });
1781
+
1782
+ const lines: string[] = [];
1783
+
1784
+ // Session info if available
1785
+ if (result.sessionId) {
1786
+ lines.push(`Session: ${result.sessionId}${result.sessionSummary ? ` — ${result.sessionSummary}` : ""}`);
1787
+ lines.push("");
1788
+ }
1789
+
1790
+ lines.push(`Total documents in scope: ${result.totalInRange}`);
1791
+ lines.push("");
1792
+
1793
+ // Before
1794
+ if (result.before.length > 0) {
1795
+ lines.push("─── BEFORE ───");
1796
+ for (const e of result.before) {
1797
+ lines.push(` [${e.contentType}] ${e.collection}/${e.path} (${e.modifiedAt.slice(0, 16)})`);
1798
+ }
1799
+ lines.push("");
1800
+ }
1801
+
1802
+ // Focus
1803
+ lines.push("─── FOCUS ───");
1804
+ lines.push(`→ [${result.focus.contentType}] ${result.focus.collection}/${result.focus.path} (${result.focus.modifiedAt.slice(0, 16)}) ← you are here`);
1805
+ lines.push("");
1806
+
1807
+ // After
1808
+ if (result.after.length > 0) {
1809
+ lines.push("─── AFTER ───");
1810
+ for (const e of result.after) {
1811
+ lines.push(` [${e.contentType}] ${e.collection}/${e.path} (${e.modifiedAt.slice(0, 16)})`);
1812
+ }
1813
+ }
1814
+
1815
+ return {
1816
+ content: [{ type: "text", text: lines.join("\n") }],
1817
+ structuredContent: result,
1818
+ };
1819
+ } catch (err: any) {
1820
+ return { content: [{ type: "text", text: `Timeline error: ${err.message}` }] };
1821
+ }
1822
+ }
1823
+ );
1824
+
1825
+ // ---------------------------------------------------------------------------
1826
+ // Tool: memory_pin
1827
+ // ---------------------------------------------------------------------------
1828
+
1829
+ server.registerTool(
1830
+ "memory_pin",
1831
+ {
1832
+ title: "Pin/Unpin Memory",
1833
+ description: "Pin a memory for permanent prioritization (+0.3 boost). USE PROACTIVELY when: user states a persistent constraint, makes an architecture decision, or corrects a misconception. Don't wait for curator — pin critical decisions immediately.",
1834
+ inputSchema: {
1835
+ query: z.string().describe("Search query to find the memory to pin/unpin"),
1836
+ unpin: z.boolean().optional().default(false).describe("Set true to unpin"),
1837
+ vault: z.string().optional().describe("Named vault (omit for default vault)"),
1838
+ },
1839
+ },
1840
+ async ({ query, unpin, vault }) => {
1841
+ const store = getStore(vault);
1842
+ const results = store.searchFTS(query, 3);
1843
+ if (results.length === 0) {
1844
+ return { content: [{ type: "text", text: "No matching memory found." }], isError: true };
1845
+ }
1846
+ const r = results[0]!;
1847
+ const parts = r.displayPath.split("/");
1848
+ const collection = parts[0]!;
1849
+ const path = parts.slice(1).join("/");
1850
+ const doc = store.findActiveDocument(collection, path);
1851
+ if (!doc) {
1852
+ return { content: [{ type: "text", text: "Document not found." }], isError: true };
1853
+ }
1854
+ store.pinDocument(collection, path, !unpin);
1855
+ const action = unpin ? "Unpinned" : "Pinned";
1856
+ return { content: [{ type: "text", text: `${action}: ${r.displayPath} (${r.title})` }] };
1857
+ }
1858
+ );
1859
+
1860
+ // ---------------------------------------------------------------------------
1861
+ // Tool: memory_snooze
1862
+ // ---------------------------------------------------------------------------
1863
+
1864
+ server.registerTool(
1865
+ "memory_snooze",
1866
+ {
1867
+ title: "Snooze Memory",
1868
+ description: "Temporarily hide a memory from context surfacing. USE PROACTIVELY when vault-context repeatedly surfaces irrelevant content — snooze it for 30 days instead of ignoring it. Reduces noise for future sessions.",
1869
+ inputSchema: {
1870
+ query: z.string().describe("Search query to find the memory to snooze"),
1871
+ until: z.string().optional().describe("ISO date to snooze until (e.g. 2026-03-01). Omit to unsnooze."),
1872
+ vault: z.string().optional().describe("Named vault (omit for default vault)"),
1873
+ },
1874
+ },
1875
+ async ({ query, until, vault }) => {
1876
+ const store = getStore(vault);
1877
+ const results = store.searchFTS(query, 3);
1878
+ if (results.length === 0) {
1879
+ return { content: [{ type: "text", text: "No matching memory found." }], isError: true };
1880
+ }
1881
+ const r = results[0]!;
1882
+ const parts = r.displayPath.split("/");
1883
+ const collection = parts[0]!;
1884
+ const path = parts.slice(1).join("/");
1885
+ const doc = store.findActiveDocument(collection, path);
1886
+ if (!doc) {
1887
+ return { content: [{ type: "text", text: "Document not found." }], isError: true };
1888
+ }
1889
+ store.snoozeDocument(collection, path, until || null);
1890
+ const msg = until
1891
+ ? `Snoozed until ${until}: ${r.displayPath}`
1892
+ : `Unsnoozed: ${r.displayPath}`;
1893
+ return { content: [{ type: "text", text: msg }] };
1894
+ }
1895
+ );
1896
+
1897
+ // ---------------------------------------------------------------------------
1898
+ // Tool: lifecycle_status
1899
+ // ---------------------------------------------------------------------------
1900
+
1901
+ server.registerTool(
1902
+ "lifecycle_status",
1903
+ {
1904
+ title: "Lifecycle Status",
1905
+ description: "Show document lifecycle statistics: active, archived, forgotten, pinned, snoozed counts and policy summary.",
1906
+ inputSchema: {
1907
+ vault: z.string().optional().describe("Named vault (omit for default vault)"),
1908
+ },
1909
+ },
1910
+ async ({ vault }) => {
1911
+ const store = getStore(vault);
1912
+ const stats = store.getLifecycleStats();
1913
+ const { loadConfig } = await import("./collections.ts");
1914
+ const config = loadConfig();
1915
+ const policy = config.lifecycle;
1916
+
1917
+ const lines = [
1918
+ `Active: ${stats.active}`,
1919
+ `Archived (auto): ${stats.archived}`,
1920
+ `Forgotten (manual): ${stats.forgotten}`,
1921
+ `Pinned: ${stats.pinned}`,
1922
+ `Snoozed: ${stats.snoozed}`,
1923
+ `Never accessed: ${stats.neverAccessed}`,
1924
+ `Oldest access: ${stats.oldestAccess?.slice(0, 10) || "n/a"}`,
1925
+ "",
1926
+ `Policy: ${policy ? `archive after ${policy.archive_after_days}d, purge after ${policy.purge_after_days ?? "never"}, dry_run=${policy.dry_run}` : "none configured"}`,
1927
+ ];
1928
+
1929
+ return { content: [{ type: "text", text: lines.join("\n") }] };
1930
+ }
1931
+ );
1932
+
1933
+ // ---------------------------------------------------------------------------
1934
+ // Tool: lifecycle_sweep
1935
+ // ---------------------------------------------------------------------------
1936
+
1937
+ server.registerTool(
1938
+ "lifecycle_sweep",
1939
+ {
1940
+ title: "Lifecycle Sweep",
1941
+ description: "Run lifecycle policies: archive stale docs, optionally purge old archives. Defaults to dry_run (preview only).",
1942
+ inputSchema: {
1943
+ dry_run: z.boolean().optional().default(true).describe("Preview what would be archived/purged without acting"),
1944
+ vault: z.string().optional().describe("Named vault (omit for default vault)"),
1945
+ },
1946
+ },
1947
+ async ({ dry_run, vault }) => {
1948
+ const store = getStore(vault);
1949
+ const { loadConfig } = await import("./collections.ts");
1950
+ const config = loadConfig();
1951
+ const policy = config.lifecycle;
1952
+ if (!policy) {
1953
+ return { content: [{ type: "text", text: "No lifecycle policy configured in config.yaml" }] };
1954
+ }
1955
+
1956
+ const candidates = store.getArchiveCandidates(policy);
1957
+
1958
+ if (dry_run) {
1959
+ const lines = candidates.map(c =>
1960
+ `- ${c.collection}/${c.path} (${c.content_type}, modified ${c.modified_at.slice(0, 10)}, accessed ${c.last_accessed_at?.slice(0, 10) || "never"})`
1961
+ );
1962
+ return { content: [{ type: "text", text: `Would archive ${candidates.length} document(s):\n${lines.join("\n") || "(none)"}` }] };
1963
+ }
1964
+
1965
+ const archived = store.archiveDocuments(candidates.map(c => c.id));
1966
+ let purged = 0;
1967
+ if (policy.purge_after_days) {
1968
+ purged = store.purgeArchivedDocuments(policy.purge_after_days);
1969
+ }
1970
+
1971
+ return { content: [{ type: "text", text: `Lifecycle sweep: archived ${archived}, purged ${purged}` }] };
1972
+ }
1973
+ );
1974
+
1975
+ // ---------------------------------------------------------------------------
1976
+ // Tool: lifecycle_restore
1977
+ // ---------------------------------------------------------------------------
1978
+
1979
+ server.registerTool(
1980
+ "lifecycle_restore",
1981
+ {
1982
+ title: "Restore Archived Documents",
1983
+ description: "Restore documents that were auto-archived by lifecycle policies. Does NOT restore manually forgotten documents.",
1984
+ inputSchema: {
1985
+ query: z.string().optional().describe("Search archived docs by keyword to find what to restore"),
1986
+ collection: z.string().optional().describe("Restore all archived docs from a specific collection"),
1987
+ all: z.boolean().optional().default(false).describe("Restore ALL archived documents"),
1988
+ vault: z.string().optional().describe("Named vault (omit for default vault)"),
1989
+ },
1990
+ },
1991
+ async ({ query, collection, all, vault }) => {
1992
+ const store = getStore(vault);
1993
+ if (query) {
1994
+ const results = store.searchArchived(query, 20);
1995
+
1996
+ if (results.length === 0) {
1997
+ return { content: [{ type: "text", text: "No archived documents match that query." }] };
1998
+ }
1999
+
2000
+ const restored = store.restoreArchivedDocuments({ ids: results.map(r => r.id) });
2001
+ const lines = results.map(r => `- [${r.score.toFixed(3)}] ${r.collection}/${r.path} (archived ${r.archived_at?.slice(0, 10)})`);
2002
+ return { content: [{ type: "text", text: `Restored ${restored}:\n${lines.join("\n")}` }] };
2003
+ }
2004
+
2005
+ if (collection) {
2006
+ const restored = store.restoreArchivedDocuments({ collection });
2007
+ return { content: [{ type: "text", text: `Restored ${restored} documents from collection "${collection}"` }] };
2008
+ }
2009
+
2010
+ if (all) {
2011
+ const restored = store.restoreArchivedDocuments({});
2012
+ return { content: [{ type: "text", text: `Restored ${restored} archived documents` }] };
2013
+ }
2014
+
2015
+ return { content: [{ type: "text", text: "Specify query, collection, or all=true" }], isError: true };
2016
+ }
2017
+ );
2018
+
2019
+ // ---------------------------------------------------------------------------
2020
+ // Tool: list_vaults
2021
+ // ---------------------------------------------------------------------------
2022
+
2023
+ server.registerTool(
2024
+ "list_vaults",
2025
+ {
2026
+ title: "List Configured Vaults",
2027
+ description: "Show all configured vault names and their SQLite paths. Returns empty if running in single-vault mode (default).",
2028
+ inputSchema: {},
2029
+ },
2030
+ async () => {
2031
+ const vaults = listVaults();
2032
+ if (vaults.length === 0) {
2033
+ return {
2034
+ content: [{
2035
+ type: "text",
2036
+ text: "No named vaults configured (single-vault mode). Add vaults via config.yaml or CLAWMEM_VAULTS env var.",
2037
+ }],
2038
+ };
2039
+ }
2040
+
2041
+ const config = loadVaultConfig();
2042
+ const lines = vaults.map(name => ` ${name}: ${config.vaults[name]}`);
2043
+ return {
2044
+ content: [{ type: "text", text: `Configured vaults (${vaults.length}):\n${lines.join('\n')}` }],
2045
+ structuredContent: { vaults: config.vaults },
2046
+ };
2047
+ }
2048
+ );
2049
+
2050
+ // ---------------------------------------------------------------------------
2051
+ // Tool: vault_sync
2052
+ // ---------------------------------------------------------------------------
2053
+
2054
+ server.registerTool(
2055
+ "vault_sync",
2056
+ {
2057
+ title: "Sync Content to Vault",
2058
+ description: "Index markdown documents from a directory into a named vault. Use to populate a vault with content from a specific path.",
2059
+ inputSchema: {
2060
+ vault: z.string().describe("Target vault name (must be configured in config.yaml or CLAWMEM_VAULTS)"),
2061
+ content_root: z.string().describe("Directory path to index markdown files from"),
2062
+ pattern: z.string().optional().default("**/*.md").describe("Glob pattern (default: **/*.md)"),
2063
+ collection_name: z.string().optional().describe("Collection name in the vault. Defaults to vault name."),
2064
+ },
2065
+ },
2066
+ async ({ vault, content_root, pattern, collection_name }) => {
2067
+ const s = getStore(vault);
2068
+ const root = content_root.replace(/^~/, process.env.HOME || "/tmp");
2069
+ const collName = collection_name || vault;
2070
+
2071
+ // Validate content_root — reject sensitive paths
2072
+ const { resolve: resolvePath } = await import("path");
2073
+ const resolvedRoot = resolvePath(root);
2074
+ const DENIED_PREFIXES = ["/etc/", "/root/", "/var/", "/proc/", "/sys/", "/dev/"];
2075
+ const DENIED_PATTERNS = [".ssh", ".gnupg", ".env", "credentials", "secrets", ".aws", ".kube"];
2076
+ if (DENIED_PREFIXES.some(p => resolvedRoot.startsWith(p)) ||
2077
+ DENIED_PATTERNS.some(p => resolvedRoot.includes(p))) {
2078
+ return {
2079
+ content: [{ type: "text", text: `Vault sync denied: "${resolvedRoot}" is in a restricted path` }],
2080
+ isError: true,
2081
+ };
2082
+ }
2083
+
2084
+ try {
2085
+ const stats = await indexCollection(s, collName, root, pattern || "**/*.md");
2086
+ return {
2087
+ content: [{
2088
+ type: "text",
2089
+ text: `Synced to vault "${vault}":\n Collection: ${collName}\n Root: ${root}\n Added: ${stats.added}\n Updated: ${stats.updated}\n Deleted: ${stats.removed}`,
2090
+ }],
2091
+ structuredContent: { vault, collection: collName, ...stats },
2092
+ };
2093
+ } catch (err: any) {
2094
+ return {
2095
+ content: [{ type: "text", text: `Vault sync failed: ${err.message}` }],
2096
+ isError: true,
2097
+ };
2098
+ }
2099
+ }
2100
+ );
2101
+
2102
+ // ---------------------------------------------------------------------------
2103
+ // Connect
2104
+ // ---------------------------------------------------------------------------
2105
+
2106
+ const transport = new StdioServerTransport();
2107
+ await server.connect(transport);
2108
+
2109
+ // ---------------------------------------------------------------------------
2110
+ // Consolidation Worker
2111
+ // ---------------------------------------------------------------------------
2112
+
2113
+ // Start consolidation worker if enabled
2114
+ if (Bun.env.CLAWMEM_ENABLE_CONSOLIDATION === "true") {
2115
+ const llm = getDefaultLlamaCpp();
2116
+ const intervalMs = parseInt(Bun.env.CLAWMEM_CONSOLIDATION_INTERVAL || "300000", 10);
2117
+ startConsolidationWorker(store, llm, intervalMs);
2118
+ }
2119
+
2120
+ // Signal handlers for graceful shutdown
2121
+ process.on("SIGINT", () => {
2122
+ console.error("\n[mcp] Received SIGINT, shutting down...");
2123
+ stopConsolidationWorker();
2124
+ closeAllStores();
2125
+ process.exit(0);
2126
+ });
2127
+
2128
+ process.on("SIGTERM", () => {
2129
+ console.error("\n[mcp] Received SIGTERM, shutting down...");
2130
+ stopConsolidationWorker();
2131
+ closeAllStores();
2132
+ process.exit(0);
2133
+ });
2134
+ }
2135
+
2136
+ if (import.meta.main) {
2137
+ startMcpServer().catch(console.error);
2138
+ }