clawmem 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENTS.md +660 -0
- package/CLAUDE.md +660 -0
- package/LICENSE +21 -0
- package/README.md +993 -0
- package/SKILL.md +717 -0
- package/bin/clawmem +75 -0
- package/package.json +72 -0
- package/src/amem.ts +797 -0
- package/src/beads.ts +263 -0
- package/src/clawmem.ts +1849 -0
- package/src/collections.ts +405 -0
- package/src/config.ts +178 -0
- package/src/consolidation.ts +123 -0
- package/src/directory-context.ts +248 -0
- package/src/errors.ts +41 -0
- package/src/formatter.ts +427 -0
- package/src/graph-traversal.ts +247 -0
- package/src/hooks/context-surfacing.ts +317 -0
- package/src/hooks/curator-nudge.ts +89 -0
- package/src/hooks/decision-extractor.ts +639 -0
- package/src/hooks/feedback-loop.ts +214 -0
- package/src/hooks/handoff-generator.ts +345 -0
- package/src/hooks/postcompact-inject.ts +226 -0
- package/src/hooks/precompact-extract.ts +314 -0
- package/src/hooks/pretool-inject.ts +79 -0
- package/src/hooks/session-bootstrap.ts +324 -0
- package/src/hooks/staleness-check.ts +130 -0
- package/src/hooks.ts +367 -0
- package/src/indexer.ts +327 -0
- package/src/intent.ts +294 -0
- package/src/limits.ts +26 -0
- package/src/llm.ts +1175 -0
- package/src/mcp.ts +2138 -0
- package/src/memory.ts +336 -0
- package/src/mmr.ts +93 -0
- package/src/observer.ts +269 -0
- package/src/openclaw/engine.ts +283 -0
- package/src/openclaw/index.ts +221 -0
- package/src/openclaw/plugin.json +83 -0
- package/src/openclaw/shell.ts +207 -0
- package/src/openclaw/tools.ts +304 -0
- package/src/profile.ts +346 -0
- package/src/promptguard.ts +218 -0
- package/src/retrieval-gate.ts +106 -0
- package/src/search-utils.ts +127 -0
- package/src/server.ts +783 -0
- package/src/splitter.ts +325 -0
- package/src/store.ts +4062 -0
- package/src/validation.ts +67 -0
- package/src/watcher.ts +58 -0
package/src/store.ts
ADDED
|
@@ -0,0 +1,4062 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* ClawMem Store - Core data access and retrieval functions
|
|
3
|
+
*
|
|
4
|
+
* Forked from QMD store with SAME agent memory extensions:
|
|
5
|
+
* - Extended documents table (domain, workstream, tags, content_type, confidence, access_count)
|
|
6
|
+
* - Session tracking (session_log table)
|
|
7
|
+
* - Context usage tracking (context_usage table)
|
|
8
|
+
*
|
|
9
|
+
* Usage:
|
|
10
|
+
* const store = createStore("/path/to/db.sqlite");
|
|
11
|
+
* // or use default path:
|
|
12
|
+
* const store = createStore();
|
|
13
|
+
*/
|
|
14
|
+
|
|
15
|
+
import { Database } from "bun:sqlite";
|
|
16
|
+
import { Glob } from "bun";
|
|
17
|
+
import { realpathSync } from "node:fs";
|
|
18
|
+
import * as sqliteVec from "sqlite-vec";
|
|
19
|
+
import {
|
|
20
|
+
LlamaCpp,
|
|
21
|
+
getDefaultLlamaCpp,
|
|
22
|
+
formatQueryForEmbedding,
|
|
23
|
+
formatDocForEmbedding,
|
|
24
|
+
type RerankDocument,
|
|
25
|
+
} from "./llm.ts";
|
|
26
|
+
import {
|
|
27
|
+
findContextForPath as collectionsFindContextForPath,
|
|
28
|
+
addContext as collectionsAddContext,
|
|
29
|
+
removeContext as collectionsRemoveContext,
|
|
30
|
+
listAllContexts as collectionsListAllContexts,
|
|
31
|
+
getCollection,
|
|
32
|
+
listCollections as collectionsListCollections,
|
|
33
|
+
addCollection as collectionsAddCollection,
|
|
34
|
+
removeCollection as collectionsRemoveCollection,
|
|
35
|
+
renameCollection as collectionsRenameCollection,
|
|
36
|
+
setGlobalContext,
|
|
37
|
+
loadConfig as collectionsLoadConfig,
|
|
38
|
+
type NamedCollection,
|
|
39
|
+
} from "./collections.ts";
|
|
40
|
+
import { getVaultPath } from "./config.ts";
|
|
41
|
+
import {
|
|
42
|
+
queryBeadsList,
|
|
43
|
+
formatBeadsIssueAsMarkdown,
|
|
44
|
+
detectBeadsProject,
|
|
45
|
+
type BeadsIssue,
|
|
46
|
+
} from "./beads.ts";
|
|
47
|
+
import {
|
|
48
|
+
constructMemoryNote,
|
|
49
|
+
storeMemoryNote,
|
|
50
|
+
generateMemoryLinks,
|
|
51
|
+
evolveMemories,
|
|
52
|
+
postIndexEnrich,
|
|
53
|
+
inferCausalLinks,
|
|
54
|
+
type ObservationWithDoc,
|
|
55
|
+
} from "./amem.ts";
|
|
56
|
+
|
|
57
|
+
// =============================================================================
|
|
58
|
+
// Configuration
|
|
59
|
+
// =============================================================================
|
|
60
|
+
|
|
61
|
+
const HOME = Bun.env.HOME || "/tmp";
|
|
62
|
+
export const DEFAULT_EMBED_MODEL = "granite";
|
|
63
|
+
export const DEFAULT_RERANK_MODEL = "ExpedientFalcon/qwen3-reranker:0.6b-q8_0";
|
|
64
|
+
export const DEFAULT_QUERY_MODEL = "tobil/qmd-query-expansion-1.7B";
|
|
65
|
+
export const DEFAULT_GLOB = "**/*.md";
|
|
66
|
+
export const DEFAULT_MULTI_GET_MAX_BYTES = 10 * 1024; // 10KB
|
|
67
|
+
|
|
68
|
+
// Chunking: 800 tokens per chunk with 15% overlap
|
|
69
|
+
export const CHUNK_SIZE_TOKENS = 800;
|
|
70
|
+
export const CHUNK_OVERLAP_TOKENS = Math.floor(CHUNK_SIZE_TOKENS * 0.15); // 120 tokens (15% overlap)
|
|
71
|
+
// Fallback char-based approximation for sync chunking (~4 chars per token)
|
|
72
|
+
export const CHUNK_SIZE_CHARS = CHUNK_SIZE_TOKENS * 4; // 3200 chars
|
|
73
|
+
export const CHUNK_OVERLAP_CHARS = CHUNK_OVERLAP_TOKENS * 4; // 480 chars
|
|
74
|
+
|
|
75
|
+
// =============================================================================
|
|
76
|
+
// Path utilities
|
|
77
|
+
// =============================================================================
|
|
78
|
+
|
|
79
|
+
export function homedir(): string {
|
|
80
|
+
return HOME;
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
export function resolve(...paths: string[]): string {
|
|
84
|
+
if (paths.length === 0) {
|
|
85
|
+
throw new Error("resolve: at least one path segment is required");
|
|
86
|
+
}
|
|
87
|
+
let result = paths[0]!.startsWith('/') ? '' : Bun.env.PWD || process.cwd();
|
|
88
|
+
for (const p of paths) {
|
|
89
|
+
if (p.startsWith('/')) {
|
|
90
|
+
result = p;
|
|
91
|
+
} else {
|
|
92
|
+
result = result + '/' + p;
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
const parts = result.split('/').filter(Boolean);
|
|
96
|
+
const normalized: string[] = [];
|
|
97
|
+
for (const part of parts) {
|
|
98
|
+
if (part === '..') normalized.pop();
|
|
99
|
+
else if (part !== '.') normalized.push(part);
|
|
100
|
+
}
|
|
101
|
+
return '/' + normalized.join('/');
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
// Flag to indicate production mode (set by qmd.ts at startup)
|
|
105
|
+
let _productionMode = false;
|
|
106
|
+
|
|
107
|
+
export function enableProductionMode(): void {
|
|
108
|
+
_productionMode = true;
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
export function getDefaultDbPath(indexName: string = "index"): string {
|
|
112
|
+
// Always allow override via INDEX_PATH (for testing)
|
|
113
|
+
if (Bun.env.INDEX_PATH) {
|
|
114
|
+
return Bun.env.INDEX_PATH;
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
// In non-production mode (tests), require explicit path
|
|
118
|
+
if (!_productionMode) {
|
|
119
|
+
throw new Error(
|
|
120
|
+
"Database path not set. Tests must set INDEX_PATH env var or use createStore() with explicit path. " +
|
|
121
|
+
"This prevents tests from accidentally writing to the global index."
|
|
122
|
+
);
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
const cacheDir = Bun.env.XDG_CACHE_HOME || resolve(homedir(), ".cache");
|
|
126
|
+
const clawmemCacheDir = resolve(cacheDir, "clawmem");
|
|
127
|
+
try { Bun.spawnSync(["mkdir", "-p", clawmemCacheDir]); } catch { }
|
|
128
|
+
return resolve(clawmemCacheDir, `${indexName}.sqlite`);
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
export function getPwd(): string {
|
|
132
|
+
return process.env.PWD || process.cwd();
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
export function getRealPath(path: string): string {
|
|
136
|
+
try {
|
|
137
|
+
return realpathSync(path);
|
|
138
|
+
} catch {
|
|
139
|
+
return resolve(path);
|
|
140
|
+
}
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
// =============================================================================
|
|
144
|
+
// Virtual Path Utilities (clawmem://)
|
|
145
|
+
// =============================================================================
|
|
146
|
+
|
|
147
|
+
export type VirtualPath = {
|
|
148
|
+
collectionName: string;
|
|
149
|
+
path: string; // relative path within collection
|
|
150
|
+
};
|
|
151
|
+
|
|
152
|
+
/**
|
|
153
|
+
* Normalize explicit virtual path formats to standard clawmem:// format.
|
|
154
|
+
* Only handles paths that are already explicitly virtual:
|
|
155
|
+
* - clawmem://collection/path.md (already normalized)
|
|
156
|
+
* - clawmem:////collection/path.md (extra slashes - normalize)
|
|
157
|
+
* - //collection/path.md (missing clawmem: prefix - add it)
|
|
158
|
+
*
|
|
159
|
+
* Does NOT handle:
|
|
160
|
+
* - collection/path.md (bare paths - could be filesystem relative)
|
|
161
|
+
* - :linenum suffix (should be parsed separately before calling this)
|
|
162
|
+
*/
|
|
163
|
+
export function normalizeVirtualPath(input: string): string {
|
|
164
|
+
let path = input.trim();
|
|
165
|
+
|
|
166
|
+
// Handle clawmem:// with extra slashes: clawmem:////collection/path -> clawmem://collection/path
|
|
167
|
+
if (path.startsWith('clawmem:')) {
|
|
168
|
+
// Remove clawmem: prefix and normalize slashes
|
|
169
|
+
// "clawmem:".length === 8
|
|
170
|
+
path = path.slice(8);
|
|
171
|
+
// Remove leading slashes and re-add exactly two
|
|
172
|
+
path = path.replace(/^\/+/, '');
|
|
173
|
+
// Collapse any internal multiple slashes to single
|
|
174
|
+
path = path.replace(/\/\/+/g, '/');
|
|
175
|
+
return `clawmem://${path}`;
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
// Handle //collection/path (missing clawmem: prefix)
|
|
179
|
+
if (path.startsWith('//')) {
|
|
180
|
+
path = path.replace(/^\/+/, '');
|
|
181
|
+
return `clawmem://${path}`;
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
// Return as-is for other cases (filesystem paths, docids, bare collection/path, etc.)
|
|
185
|
+
return path;
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
/**
|
|
189
|
+
* Parse a virtual path like "clawmem://collection-name/path/to/file.md"
|
|
190
|
+
* into its components.
|
|
191
|
+
* Also supports collection root: "clawmem://collection-name/" or "clawmem://collection-name"
|
|
192
|
+
*/
|
|
193
|
+
export function parseVirtualPath(virtualPath: string): VirtualPath | null {
|
|
194
|
+
// Normalize the path first
|
|
195
|
+
const normalized = normalizeVirtualPath(virtualPath);
|
|
196
|
+
|
|
197
|
+
// Match: clawmem://collection-name[/optional-path]
|
|
198
|
+
// Allows: clawmem://name, clawmem://name/, clawmem://name/path
|
|
199
|
+
const match = normalized.match(/^clawmem:\/\/([^\/]+)\/?(.*)$/);
|
|
200
|
+
if (!match?.[1]) return null;
|
|
201
|
+
return {
|
|
202
|
+
collectionName: match[1],
|
|
203
|
+
path: match[2] ?? '', // Empty string for collection root
|
|
204
|
+
};
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
/**
|
|
208
|
+
* Build a virtual path from collection name and relative path.
|
|
209
|
+
*/
|
|
210
|
+
export function buildVirtualPath(collectionName: string, path: string): string {
|
|
211
|
+
return `clawmem://${collectionName}/${path}`;
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
/**
|
|
215
|
+
* Check if a path is explicitly a virtual path.
|
|
216
|
+
* Only recognizes explicit virtual path formats:
|
|
217
|
+
* - clawmem://collection/path.md
|
|
218
|
+
* - //collection/path.md
|
|
219
|
+
*
|
|
220
|
+
* Does NOT consider bare collection/path.md as virtual - that should be
|
|
221
|
+
* handled separately by checking if the first component is a collection name.
|
|
222
|
+
*/
|
|
223
|
+
export function isVirtualPath(path: string): boolean {
|
|
224
|
+
const trimmed = path.trim();
|
|
225
|
+
|
|
226
|
+
// Explicit clawmem:// prefix (with any number of slashes)
|
|
227
|
+
if (trimmed.startsWith('clawmem:')) return true;
|
|
228
|
+
|
|
229
|
+
// //collection/path format (missing clawmem: prefix)
|
|
230
|
+
if (trimmed.startsWith('//')) return true;
|
|
231
|
+
|
|
232
|
+
return false;
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
/**
|
|
236
|
+
* Resolve a virtual path to absolute filesystem path.
|
|
237
|
+
*/
|
|
238
|
+
export function resolveVirtualPath(db: Database, virtualPath: string): string | null {
|
|
239
|
+
const parsed = parseVirtualPath(virtualPath);
|
|
240
|
+
if (!parsed) return null;
|
|
241
|
+
|
|
242
|
+
const coll = getCollectionByName(db, parsed.collectionName);
|
|
243
|
+
if (!coll) return null;
|
|
244
|
+
|
|
245
|
+
return resolve(coll.pwd, parsed.path);
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
/**
|
|
249
|
+
* Convert an absolute filesystem path to a virtual path.
|
|
250
|
+
* Returns null if the file is not in any indexed collection.
|
|
251
|
+
*/
|
|
252
|
+
export function toVirtualPath(db: Database, absolutePath: string): string | null {
|
|
253
|
+
// Get all collections from YAML config
|
|
254
|
+
const collections = collectionsListCollections();
|
|
255
|
+
|
|
256
|
+
// Find which collection this absolute path belongs to
|
|
257
|
+
for (const coll of collections) {
|
|
258
|
+
if (absolutePath.startsWith(coll.path + '/') || absolutePath === coll.path) {
|
|
259
|
+
// Extract relative path
|
|
260
|
+
const relativePath = absolutePath.startsWith(coll.path + '/')
|
|
261
|
+
? absolutePath.slice(coll.path.length + 1)
|
|
262
|
+
: '';
|
|
263
|
+
|
|
264
|
+
// Verify this document exists in the database
|
|
265
|
+
const doc = db.prepare(`
|
|
266
|
+
SELECT d.path
|
|
267
|
+
FROM documents d
|
|
268
|
+
WHERE d.collection = ? AND d.path = ? AND d.active = 1
|
|
269
|
+
LIMIT 1
|
|
270
|
+
`).get(coll.name, relativePath) as { path: string } | null;
|
|
271
|
+
|
|
272
|
+
if (doc) {
|
|
273
|
+
return buildVirtualPath(coll.name, relativePath);
|
|
274
|
+
}
|
|
275
|
+
}
|
|
276
|
+
}
|
|
277
|
+
|
|
278
|
+
return null;
|
|
279
|
+
}
|
|
280
|
+
|
|
281
|
+
// =============================================================================
|
|
282
|
+
// Database initialization
|
|
283
|
+
// =============================================================================
|
|
284
|
+
|
|
285
|
+
// On macOS, use Homebrew's SQLite which supports extensions
|
|
286
|
+
if (process.platform === "darwin") {
|
|
287
|
+
const homebrewSqlitePath = "/opt/homebrew/opt/sqlite/lib/libsqlite3.dylib";
|
|
288
|
+
try {
|
|
289
|
+
if (Bun.file(homebrewSqlitePath).size > 0) {
|
|
290
|
+
Database.setCustomSQLite(homebrewSqlitePath);
|
|
291
|
+
}
|
|
292
|
+
} catch { }
|
|
293
|
+
}
|
|
294
|
+
|
|
295
|
+
function initializeDatabase(db: Database): void {
|
|
296
|
+
sqliteVec.load(db);
|
|
297
|
+
db.exec("PRAGMA journal_mode = WAL");
|
|
298
|
+
db.exec("PRAGMA foreign_keys = ON");
|
|
299
|
+
|
|
300
|
+
// Drop legacy tables that are now managed in YAML
|
|
301
|
+
db.exec(`DROP TABLE IF EXISTS path_contexts`);
|
|
302
|
+
db.exec(`DROP TABLE IF EXISTS collections`);
|
|
303
|
+
|
|
304
|
+
// Content-addressable storage - the source of truth for document content
|
|
305
|
+
db.exec(`
|
|
306
|
+
CREATE TABLE IF NOT EXISTS content (
|
|
307
|
+
hash TEXT PRIMARY KEY,
|
|
308
|
+
doc TEXT NOT NULL,
|
|
309
|
+
created_at TEXT NOT NULL
|
|
310
|
+
)
|
|
311
|
+
`);
|
|
312
|
+
|
|
313
|
+
// Documents table - file system layer mapping virtual paths to content hashes
|
|
314
|
+
// Extended with SAME agent memory metadata columns
|
|
315
|
+
db.exec(`
|
|
316
|
+
CREATE TABLE IF NOT EXISTS documents (
|
|
317
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
318
|
+
collection TEXT NOT NULL,
|
|
319
|
+
path TEXT NOT NULL,
|
|
320
|
+
title TEXT NOT NULL,
|
|
321
|
+
hash TEXT NOT NULL,
|
|
322
|
+
created_at TEXT NOT NULL,
|
|
323
|
+
modified_at TEXT NOT NULL,
|
|
324
|
+
active INTEGER NOT NULL DEFAULT 1,
|
|
325
|
+
domain TEXT,
|
|
326
|
+
workstream TEXT,
|
|
327
|
+
tags TEXT,
|
|
328
|
+
content_type TEXT NOT NULL DEFAULT 'note',
|
|
329
|
+
review_by TEXT,
|
|
330
|
+
confidence REAL NOT NULL DEFAULT 0.5,
|
|
331
|
+
access_count INTEGER NOT NULL DEFAULT 0,
|
|
332
|
+
content_hash TEXT,
|
|
333
|
+
FOREIGN KEY (hash) REFERENCES content(hash) ON DELETE CASCADE,
|
|
334
|
+
UNIQUE(collection, path)
|
|
335
|
+
)
|
|
336
|
+
`);
|
|
337
|
+
|
|
338
|
+
// Migration: add SAME columns to existing databases
|
|
339
|
+
const docCols = db.prepare("PRAGMA table_info(documents)").all() as { name: string }[];
|
|
340
|
+
const colNames = new Set(docCols.map(c => c.name));
|
|
341
|
+
const migrations: [string, string][] = [
|
|
342
|
+
["domain", "ALTER TABLE documents ADD COLUMN domain TEXT"],
|
|
343
|
+
["workstream", "ALTER TABLE documents ADD COLUMN workstream TEXT"],
|
|
344
|
+
["tags", "ALTER TABLE documents ADD COLUMN tags TEXT"],
|
|
345
|
+
["content_type", "ALTER TABLE documents ADD COLUMN content_type TEXT NOT NULL DEFAULT 'note'"],
|
|
346
|
+
["review_by", "ALTER TABLE documents ADD COLUMN review_by TEXT"],
|
|
347
|
+
["confidence", "ALTER TABLE documents ADD COLUMN confidence REAL NOT NULL DEFAULT 0.5"],
|
|
348
|
+
["access_count", "ALTER TABLE documents ADD COLUMN access_count INTEGER NOT NULL DEFAULT 0"],
|
|
349
|
+
["content_hash", "ALTER TABLE documents ADD COLUMN content_hash TEXT"],
|
|
350
|
+
["quality_score", "ALTER TABLE documents ADD COLUMN quality_score REAL NOT NULL DEFAULT 0.5"],
|
|
351
|
+
["pinned", "ALTER TABLE documents ADD COLUMN pinned INTEGER NOT NULL DEFAULT 0"],
|
|
352
|
+
["snoozed_until", "ALTER TABLE documents ADD COLUMN snoozed_until TEXT"],
|
|
353
|
+
["last_accessed_at", "ALTER TABLE documents ADD COLUMN last_accessed_at TEXT"],
|
|
354
|
+
["archived_at", "ALTER TABLE documents ADD COLUMN archived_at TEXT"],
|
|
355
|
+
["memory_type", "ALTER TABLE documents ADD COLUMN memory_type TEXT DEFAULT 'semantic'"],
|
|
356
|
+
// Engram integration: dedup + topic key columns
|
|
357
|
+
["normalized_hash", "ALTER TABLE documents ADD COLUMN normalized_hash TEXT"],
|
|
358
|
+
["duplicate_count", "ALTER TABLE documents ADD COLUMN duplicate_count INTEGER NOT NULL DEFAULT 1"],
|
|
359
|
+
["last_seen_at", "ALTER TABLE documents ADD COLUMN last_seen_at TEXT"],
|
|
360
|
+
["topic_key", "ALTER TABLE documents ADD COLUMN topic_key TEXT"],
|
|
361
|
+
["revision_count", "ALTER TABLE documents ADD COLUMN revision_count INTEGER NOT NULL DEFAULT 1"],
|
|
362
|
+
];
|
|
363
|
+
for (const [col, sql] of migrations) {
|
|
364
|
+
if (!colNames.has(col)) {
|
|
365
|
+
try { db.exec(sql); } catch { /* column may already exist */ }
|
|
366
|
+
}
|
|
367
|
+
}
|
|
368
|
+
|
|
369
|
+
// Backfill last_accessed_at from modified_at for existing docs
|
|
370
|
+
try {
|
|
371
|
+
db.exec(`UPDATE documents SET last_accessed_at = modified_at WHERE last_accessed_at IS NULL`);
|
|
372
|
+
} catch { /* ignore if already backfilled */ }
|
|
373
|
+
|
|
374
|
+
db.exec(`CREATE INDEX IF NOT EXISTS idx_documents_collection ON documents(collection, active)`);
|
|
375
|
+
db.exec(`CREATE INDEX IF NOT EXISTS idx_documents_hash ON documents(hash)`);
|
|
376
|
+
|
|
377
|
+
// Engram integration: indexes for dedup, topic key, timeline
|
|
378
|
+
// Re-check columns after migration (handles concurrent processes where ALTER TABLE
|
|
379
|
+
// may race with PRAGMA table_info snapshot taken earlier)
|
|
380
|
+
const postMigrationCols = new Set(
|
|
381
|
+
(db.prepare("PRAGMA table_info(documents)").all() as { name: string }[]).map(c => c.name)
|
|
382
|
+
);
|
|
383
|
+
if (postMigrationCols.has("normalized_hash")) {
|
|
384
|
+
db.exec(`CREATE INDEX IF NOT EXISTS idx_documents_dedup ON documents(collection, content_type, normalized_hash, created_at DESC) WHERE active = 1 AND normalized_hash IS NOT NULL`);
|
|
385
|
+
}
|
|
386
|
+
if (postMigrationCols.has("topic_key")) {
|
|
387
|
+
db.exec(`CREATE INDEX IF NOT EXISTS idx_documents_topic_key ON documents(topic_key, collection) WHERE active = 1 AND topic_key IS NOT NULL`);
|
|
388
|
+
}
|
|
389
|
+
// Timeline indexes use existing columns (modified_at, id) — always safe
|
|
390
|
+
db.exec(`CREATE INDEX IF NOT EXISTS idx_documents_timeline ON documents(modified_at, id) WHERE active = 1`);
|
|
391
|
+
db.exec(`CREATE INDEX IF NOT EXISTS idx_documents_timeline_coll ON documents(collection, modified_at, id) WHERE active = 1`);
|
|
392
|
+
db.exec(`CREATE INDEX IF NOT EXISTS idx_documents_path ON documents(path, active)`);
|
|
393
|
+
|
|
394
|
+
// Cache table for LLM API calls
|
|
395
|
+
db.exec(`
|
|
396
|
+
CREATE TABLE IF NOT EXISTS llm_cache (
|
|
397
|
+
hash TEXT PRIMARY KEY,
|
|
398
|
+
result TEXT NOT NULL,
|
|
399
|
+
created_at TEXT NOT NULL
|
|
400
|
+
)
|
|
401
|
+
`);
|
|
402
|
+
|
|
403
|
+
// Content vectors
|
|
404
|
+
const cvInfo = db.prepare(`PRAGMA table_info(content_vectors)`).all() as { name: string }[];
|
|
405
|
+
const hasSeqColumn = cvInfo.some(col => col.name === 'seq');
|
|
406
|
+
if (cvInfo.length > 0 && !hasSeqColumn) {
|
|
407
|
+
db.exec(`DROP TABLE IF EXISTS content_vectors`);
|
|
408
|
+
db.exec(`DROP TABLE IF EXISTS vectors_vec`);
|
|
409
|
+
}
|
|
410
|
+
db.exec(`
|
|
411
|
+
CREATE TABLE IF NOT EXISTS content_vectors (
|
|
412
|
+
hash TEXT NOT NULL,
|
|
413
|
+
seq INTEGER NOT NULL DEFAULT 0,
|
|
414
|
+
pos INTEGER NOT NULL DEFAULT 0,
|
|
415
|
+
model TEXT NOT NULL,
|
|
416
|
+
embedded_at TEXT NOT NULL,
|
|
417
|
+
PRIMARY KEY (hash, seq)
|
|
418
|
+
)
|
|
419
|
+
`);
|
|
420
|
+
|
|
421
|
+
// FTS - index filepath (collection/path), title, and content
|
|
422
|
+
db.exec(`
|
|
423
|
+
CREATE VIRTUAL TABLE IF NOT EXISTS documents_fts USING fts5(
|
|
424
|
+
filepath, title, body,
|
|
425
|
+
tokenize='porter unicode61'
|
|
426
|
+
)
|
|
427
|
+
`);
|
|
428
|
+
|
|
429
|
+
// Triggers to keep FTS in sync
|
|
430
|
+
db.exec(`
|
|
431
|
+
CREATE TRIGGER IF NOT EXISTS documents_ai AFTER INSERT ON documents
|
|
432
|
+
WHEN new.active = 1
|
|
433
|
+
BEGIN
|
|
434
|
+
INSERT INTO documents_fts(rowid, filepath, title, body)
|
|
435
|
+
SELECT
|
|
436
|
+
new.id,
|
|
437
|
+
new.collection || '/' || new.path,
|
|
438
|
+
new.title,
|
|
439
|
+
(SELECT doc FROM content WHERE hash = new.hash)
|
|
440
|
+
WHERE new.active = 1;
|
|
441
|
+
END
|
|
442
|
+
`);
|
|
443
|
+
|
|
444
|
+
db.exec(`
|
|
445
|
+
CREATE TRIGGER IF NOT EXISTS documents_ad AFTER DELETE ON documents BEGIN
|
|
446
|
+
DELETE FROM documents_fts WHERE rowid = old.id;
|
|
447
|
+
END
|
|
448
|
+
`);
|
|
449
|
+
|
|
450
|
+
db.exec(`
|
|
451
|
+
CREATE TRIGGER IF NOT EXISTS documents_au AFTER UPDATE ON documents
|
|
452
|
+
BEGIN
|
|
453
|
+
-- Delete from FTS if no longer active
|
|
454
|
+
DELETE FROM documents_fts WHERE rowid = old.id AND new.active = 0;
|
|
455
|
+
|
|
456
|
+
-- Update FTS if still/newly active
|
|
457
|
+
INSERT OR REPLACE INTO documents_fts(rowid, filepath, title, body)
|
|
458
|
+
SELECT
|
|
459
|
+
new.id,
|
|
460
|
+
new.collection || '/' || new.path,
|
|
461
|
+
new.title,
|
|
462
|
+
(SELECT doc FROM content WHERE hash = new.hash)
|
|
463
|
+
WHERE new.active = 1;
|
|
464
|
+
END
|
|
465
|
+
`);
|
|
466
|
+
|
|
467
|
+
// SAME: Session tracking
|
|
468
|
+
db.exec(`
|
|
469
|
+
CREATE TABLE IF NOT EXISTS session_log (
|
|
470
|
+
session_id TEXT PRIMARY KEY,
|
|
471
|
+
started_at TEXT NOT NULL,
|
|
472
|
+
ended_at TEXT,
|
|
473
|
+
handoff_path TEXT,
|
|
474
|
+
machine TEXT,
|
|
475
|
+
files_changed TEXT,
|
|
476
|
+
summary TEXT
|
|
477
|
+
)
|
|
478
|
+
`);
|
|
479
|
+
|
|
480
|
+
// SAME: Context usage tracking (feedback loop)
|
|
481
|
+
db.exec(`
|
|
482
|
+
CREATE TABLE IF NOT EXISTS context_usage (
|
|
483
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
484
|
+
session_id TEXT,
|
|
485
|
+
timestamp TEXT NOT NULL,
|
|
486
|
+
hook_name TEXT NOT NULL,
|
|
487
|
+
injected_paths TEXT NOT NULL DEFAULT '[]',
|
|
488
|
+
estimated_tokens INTEGER NOT NULL DEFAULT 0,
|
|
489
|
+
was_referenced INTEGER NOT NULL DEFAULT 0
|
|
490
|
+
)
|
|
491
|
+
`);
|
|
492
|
+
db.exec(`CREATE INDEX IF NOT EXISTS idx_context_usage_session ON context_usage(session_id)`);
|
|
493
|
+
|
|
494
|
+
// Hook prompt dedupe: suppress duplicate/heartbeat prompts to reduce GPU churn.
|
|
495
|
+
db.exec(`
|
|
496
|
+
CREATE TABLE IF NOT EXISTS hook_dedupe (
|
|
497
|
+
hook_name TEXT NOT NULL,
|
|
498
|
+
prompt_hash TEXT NOT NULL,
|
|
499
|
+
prompt_preview TEXT,
|
|
500
|
+
last_seen_at TEXT NOT NULL,
|
|
501
|
+
PRIMARY KEY (hook_name, prompt_hash)
|
|
502
|
+
)
|
|
503
|
+
`);
|
|
504
|
+
db.exec(`CREATE INDEX IF NOT EXISTS idx_hook_dedupe_last_seen ON hook_dedupe(last_seen_at)`);
|
|
505
|
+
|
|
506
|
+
// Co-activation tracking: documents accessed together in the same injection
|
|
507
|
+
db.exec(`
|
|
508
|
+
CREATE TABLE IF NOT EXISTS co_activations (
|
|
509
|
+
doc_a TEXT NOT NULL,
|
|
510
|
+
doc_b TEXT NOT NULL,
|
|
511
|
+
count INTEGER NOT NULL DEFAULT 1,
|
|
512
|
+
last_seen TEXT NOT NULL,
|
|
513
|
+
PRIMARY KEY (doc_a, doc_b)
|
|
514
|
+
)
|
|
515
|
+
`);
|
|
516
|
+
|
|
517
|
+
// Migration: add fragment columns to content_vectors
|
|
518
|
+
const cvCols = db.prepare("PRAGMA table_info(content_vectors)").all() as { name: string }[];
|
|
519
|
+
const cvColNames = new Set(cvCols.map(c => c.name));
|
|
520
|
+
const cvMigrations: [string, string][] = [
|
|
521
|
+
["fragment_type", "ALTER TABLE content_vectors ADD COLUMN fragment_type TEXT"],
|
|
522
|
+
["fragment_label", "ALTER TABLE content_vectors ADD COLUMN fragment_label TEXT"],
|
|
523
|
+
["canonical_id", "ALTER TABLE content_vectors ADD COLUMN canonical_id TEXT"],
|
|
524
|
+
];
|
|
525
|
+
for (const [col, sql] of cvMigrations) {
|
|
526
|
+
if (!cvColNames.has(col)) {
|
|
527
|
+
try { db.exec(sql); } catch { /* column may already exist */ }
|
|
528
|
+
}
|
|
529
|
+
}
|
|
530
|
+
|
|
531
|
+
// Migration: add observation columns to documents
|
|
532
|
+
const obsMigrations: [string, string][] = [
|
|
533
|
+
["observation_type", "ALTER TABLE documents ADD COLUMN observation_type TEXT"],
|
|
534
|
+
["facts", "ALTER TABLE documents ADD COLUMN facts TEXT"],
|
|
535
|
+
["narrative", "ALTER TABLE documents ADD COLUMN narrative TEXT"],
|
|
536
|
+
["concepts", "ALTER TABLE documents ADD COLUMN concepts TEXT"],
|
|
537
|
+
["files_read", "ALTER TABLE documents ADD COLUMN files_read TEXT"],
|
|
538
|
+
["files_modified", "ALTER TABLE documents ADD COLUMN files_modified TEXT"],
|
|
539
|
+
["skill_name", "ALTER TABLE documents ADD COLUMN skill_name TEXT"],
|
|
540
|
+
["obs_quality_score", "ALTER TABLE documents ADD COLUMN obs_quality_score REAL"],
|
|
541
|
+
["failure_reason", "ALTER TABLE documents ADD COLUMN failure_reason TEXT"],
|
|
542
|
+
];
|
|
543
|
+
for (const [col, sql] of obsMigrations) {
|
|
544
|
+
if (!colNames.has(col)) {
|
|
545
|
+
try { db.exec(sql); } catch { /* column may already exist */ }
|
|
546
|
+
}
|
|
547
|
+
}
|
|
548
|
+
|
|
549
|
+
// Migration: add A-MEM columns to documents
|
|
550
|
+
const amemMigrations: [string, string][] = [
|
|
551
|
+
["amem_keywords", "ALTER TABLE documents ADD COLUMN amem_keywords TEXT"],
|
|
552
|
+
["amem_tags", "ALTER TABLE documents ADD COLUMN amem_tags TEXT"],
|
|
553
|
+
["amem_context", "ALTER TABLE documents ADD COLUMN amem_context TEXT"],
|
|
554
|
+
];
|
|
555
|
+
for (const [col, sql] of amemMigrations) {
|
|
556
|
+
if (!colNames.has(col)) {
|
|
557
|
+
try { db.exec(sql); } catch { /* column may already exist */ }
|
|
558
|
+
}
|
|
559
|
+
}
|
|
560
|
+
|
|
561
|
+
// Beads integration tables
|
|
562
|
+
db.exec(`
|
|
563
|
+
CREATE TABLE IF NOT EXISTS beads_issues (
|
|
564
|
+
beads_id TEXT PRIMARY KEY,
|
|
565
|
+
doc_id INTEGER,
|
|
566
|
+
issue_type TEXT,
|
|
567
|
+
status TEXT,
|
|
568
|
+
priority INTEGER,
|
|
569
|
+
tags TEXT,
|
|
570
|
+
assignee TEXT,
|
|
571
|
+
parent_id TEXT,
|
|
572
|
+
created_at TEXT,
|
|
573
|
+
closed_at TEXT,
|
|
574
|
+
last_synced_at TEXT,
|
|
575
|
+
FOREIGN KEY (doc_id) REFERENCES documents(id) ON DELETE CASCADE
|
|
576
|
+
)
|
|
577
|
+
`);
|
|
578
|
+
|
|
579
|
+
db.exec(`CREATE INDEX IF NOT EXISTS idx_beads_status ON beads_issues(status, priority)`);
|
|
580
|
+
db.exec(`CREATE INDEX IF NOT EXISTS idx_beads_parent ON beads_issues(parent_id)`);
|
|
581
|
+
|
|
582
|
+
db.exec(`
|
|
583
|
+
CREATE TABLE IF NOT EXISTS beads_dependencies (
|
|
584
|
+
source_id TEXT NOT NULL,
|
|
585
|
+
target_id TEXT NOT NULL,
|
|
586
|
+
dep_type TEXT NOT NULL,
|
|
587
|
+
created_at TEXT,
|
|
588
|
+
PRIMARY KEY (source_id, target_id, dep_type),
|
|
589
|
+
FOREIGN KEY (source_id) REFERENCES beads_issues(beads_id) ON DELETE CASCADE,
|
|
590
|
+
FOREIGN KEY (target_id) REFERENCES beads_issues(beads_id) ON DELETE CASCADE
|
|
591
|
+
)
|
|
592
|
+
`);
|
|
593
|
+
|
|
594
|
+
db.exec(`CREATE INDEX IF NOT EXISTS idx_beads_deps_target ON beads_dependencies(target_id, dep_type)`);
|
|
595
|
+
|
|
596
|
+
// MAGMA: Multi-graph relational memory
|
|
597
|
+
db.exec(`
|
|
598
|
+
CREATE TABLE IF NOT EXISTS memory_relations (
|
|
599
|
+
source_id INTEGER NOT NULL,
|
|
600
|
+
target_id INTEGER NOT NULL,
|
|
601
|
+
relation_type TEXT NOT NULL,
|
|
602
|
+
weight REAL DEFAULT 1.0,
|
|
603
|
+
metadata TEXT,
|
|
604
|
+
created_at TEXT,
|
|
605
|
+
PRIMARY KEY (source_id, target_id, relation_type),
|
|
606
|
+
FOREIGN KEY (source_id) REFERENCES documents(id) ON DELETE CASCADE,
|
|
607
|
+
FOREIGN KEY (target_id) REFERENCES documents(id) ON DELETE CASCADE
|
|
608
|
+
)
|
|
609
|
+
`);
|
|
610
|
+
|
|
611
|
+
db.exec(`CREATE INDEX IF NOT EXISTS idx_relations_source ON memory_relations(source_id, relation_type)`);
|
|
612
|
+
db.exec(`CREATE INDEX IF NOT EXISTS idx_relations_target ON memory_relations(target_id, relation_type)`);
|
|
613
|
+
db.exec(`CREATE INDEX IF NOT EXISTS idx_relations_weight ON memory_relations(weight DESC) WHERE weight > 0.5`);
|
|
614
|
+
|
|
615
|
+
// A-MEM: Memory evolution tracking
|
|
616
|
+
db.exec(`
|
|
617
|
+
CREATE TABLE IF NOT EXISTS memory_evolution (
|
|
618
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
619
|
+
memory_id INTEGER NOT NULL,
|
|
620
|
+
triggered_by INTEGER NOT NULL,
|
|
621
|
+
version INTEGER NOT NULL DEFAULT 1,
|
|
622
|
+
previous_keywords TEXT,
|
|
623
|
+
new_keywords TEXT,
|
|
624
|
+
previous_context TEXT,
|
|
625
|
+
new_context TEXT,
|
|
626
|
+
reasoning TEXT,
|
|
627
|
+
created_at TEXT NOT NULL DEFAULT (datetime('now')),
|
|
628
|
+
FOREIGN KEY (memory_id) REFERENCES documents(id) ON DELETE CASCADE,
|
|
629
|
+
FOREIGN KEY (triggered_by) REFERENCES documents(id) ON DELETE CASCADE
|
|
630
|
+
)
|
|
631
|
+
`);
|
|
632
|
+
|
|
633
|
+
db.exec(`CREATE INDEX IF NOT EXISTS idx_memory_evolution_memory_id ON memory_evolution(memory_id)`);
|
|
634
|
+
db.exec(`CREATE INDEX IF NOT EXISTS idx_memory_evolution_triggered_by ON memory_evolution(triggered_by)`);
|
|
635
|
+
db.exec(`CREATE INDEX IF NOT EXISTS idx_memory_evolution_created_at ON memory_evolution(created_at)`);
|
|
636
|
+
|
|
637
|
+
db.exec(`
|
|
638
|
+
CREATE TABLE IF NOT EXISTS entity_nodes (
|
|
639
|
+
entity_id TEXT PRIMARY KEY,
|
|
640
|
+
entity_type TEXT,
|
|
641
|
+
name TEXT,
|
|
642
|
+
description TEXT,
|
|
643
|
+
created_at TEXT
|
|
644
|
+
)
|
|
645
|
+
`);
|
|
646
|
+
|
|
647
|
+
db.exec(`
|
|
648
|
+
CREATE TABLE IF NOT EXISTS intent_classifications (
|
|
649
|
+
query_hash TEXT PRIMARY KEY,
|
|
650
|
+
query_text TEXT,
|
|
651
|
+
intent TEXT,
|
|
652
|
+
confidence REAL,
|
|
653
|
+
temporal_start TEXT,
|
|
654
|
+
temporal_end TEXT,
|
|
655
|
+
cached_at TEXT
|
|
656
|
+
)
|
|
657
|
+
`);
|
|
658
|
+
|
|
659
|
+
db.exec(`CREATE INDEX IF NOT EXISTS idx_intent_cache_time ON intent_classifications(cached_at)`);
|
|
660
|
+
}
|
|
661
|
+
|
|
662
|
+
|
|
663
|
+
// Per-database dimension cache (WeakMap keyed by db object — no collisions for in-memory DBs)
|
|
664
|
+
const vecTableDimsCache = new WeakMap<Database, number>();
|
|
665
|
+
|
|
666
|
+
function ensureVecTableInternal(db: Database, dimensions: number): void {
|
|
667
|
+
if (vecTableDimsCache.get(db) === dimensions) return;
|
|
668
|
+
|
|
669
|
+
const tableInfo = db.prepare(`SELECT sql FROM sqlite_master WHERE type='table' AND name='vectors_vec'`).get() as { sql: string } | null;
|
|
670
|
+
if (tableInfo) {
|
|
671
|
+
const match = tableInfo.sql.match(/float\[(\d+)\]/);
|
|
672
|
+
const hasHashSeq = tableInfo.sql.includes('hash_seq');
|
|
673
|
+
const hasCosine = tableInfo.sql.includes('distance_metric=cosine');
|
|
674
|
+
const existingDims = match?.[1] ? parseInt(match[1], 10) : null;
|
|
675
|
+
if (existingDims === dimensions && hasHashSeq && hasCosine) {
|
|
676
|
+
vecTableDimsCache.set(db, dimensions);
|
|
677
|
+
return;
|
|
678
|
+
}
|
|
679
|
+
db.exec("DROP TABLE IF EXISTS vectors_vec");
|
|
680
|
+
}
|
|
681
|
+
db.exec(`CREATE VIRTUAL TABLE IF NOT EXISTS vectors_vec USING vec0(hash_seq TEXT PRIMARY KEY, embedding float[${dimensions}] distance_metric=cosine)`);
|
|
682
|
+
vecTableDimsCache.set(db, dimensions);
|
|
683
|
+
}
|
|
684
|
+
|
|
685
|
+
// =============================================================================
|
|
686
|
+
// Store Factory
|
|
687
|
+
// =============================================================================
|
|
688
|
+
|
|
689
|
+
export type Store = {
|
|
690
|
+
db: Database;
|
|
691
|
+
dbPath: string;
|
|
692
|
+
close: () => void;
|
|
693
|
+
ensureVecTable: (dimensions: number) => void;
|
|
694
|
+
|
|
695
|
+
// Index health
|
|
696
|
+
getHashesNeedingEmbedding: () => number;
|
|
697
|
+
getIndexHealth: () => IndexHealthInfo;
|
|
698
|
+
getStatus: () => IndexStatus;
|
|
699
|
+
|
|
700
|
+
// Caching
|
|
701
|
+
getCacheKey: typeof getCacheKey;
|
|
702
|
+
getCachedResult: (cacheKey: string) => string | null;
|
|
703
|
+
setCachedResult: (cacheKey: string, result: string) => void;
|
|
704
|
+
clearCache: () => void;
|
|
705
|
+
|
|
706
|
+
// Cleanup and maintenance
|
|
707
|
+
deleteLLMCache: () => number;
|
|
708
|
+
deleteInactiveDocuments: () => number;
|
|
709
|
+
cleanupOrphanedContent: () => number;
|
|
710
|
+
cleanupOrphanedVectors: () => number;
|
|
711
|
+
vacuumDatabase: () => void;
|
|
712
|
+
|
|
713
|
+
// Context
|
|
714
|
+
getContextForFile: (filepath: string) => string | null;
|
|
715
|
+
getContextForPath: (collectionName: string, path: string) => string | null;
|
|
716
|
+
getCollectionByName: (name: string) => { name: string; pwd: string; glob_pattern: string } | null;
|
|
717
|
+
getCollectionsWithoutContext: () => { name: string; pwd: string; doc_count: number }[];
|
|
718
|
+
getTopLevelPathsWithoutContext: (collectionName: string) => string[];
|
|
719
|
+
|
|
720
|
+
// Virtual paths
|
|
721
|
+
parseVirtualPath: typeof parseVirtualPath;
|
|
722
|
+
buildVirtualPath: typeof buildVirtualPath;
|
|
723
|
+
isVirtualPath: typeof isVirtualPath;
|
|
724
|
+
resolveVirtualPath: (virtualPath: string) => string | null;
|
|
725
|
+
toVirtualPath: (absolutePath: string) => string | null;
|
|
726
|
+
|
|
727
|
+
// Search
|
|
728
|
+
searchFTS: (query: string, limit?: number, collectionId?: number, collections?: string[]) => SearchResult[];
|
|
729
|
+
searchVec: (query: string, model: string, limit?: number, collectionId?: number, collections?: string[]) => Promise<SearchResult[]>;
|
|
730
|
+
|
|
731
|
+
// Query expansion & reranking
|
|
732
|
+
expandQuery: (query: string, model?: string, intent?: string) => Promise<string[]>;
|
|
733
|
+
rerank: (query: string, documents: { file: string; text: string }[], model?: string, intent?: string) => Promise<{ file: string; score: number }[]>;
|
|
734
|
+
|
|
735
|
+
// Document retrieval
|
|
736
|
+
findDocument: (filename: string, options?: { includeBody?: boolean }) => DocumentResult | DocumentNotFound;
|
|
737
|
+
getDocumentBody: (doc: DocumentResult | { filepath: string }, fromLine?: number, maxLines?: number) => string | null;
|
|
738
|
+
findDocuments: (pattern: string, options?: { includeBody?: boolean; maxBytes?: number }) => { docs: MultiGetResult[]; errors: string[] };
|
|
739
|
+
|
|
740
|
+
// Fuzzy matching and docid lookup
|
|
741
|
+
findSimilarFiles: (query: string, maxDistance?: number, limit?: number) => string[];
|
|
742
|
+
matchFilesByGlob: (pattern: string) => { filepath: string; displayPath: string; bodyLength: number }[];
|
|
743
|
+
findDocumentByDocid: (docid: string) => { filepath: string; hash: string } | null;
|
|
744
|
+
|
|
745
|
+
// Document indexing operations
|
|
746
|
+
insertContent: (hash: string, content: string, createdAt: string) => void;
|
|
747
|
+
insertDocument: (collectionName: string, path: string, title: string, hash: string, createdAt: string, modifiedAt: string) => void;
|
|
748
|
+
findActiveDocument: (collectionName: string, path: string) => { id: number; hash: string; title: string; pinned: number; snoozed_until: string | null; confidence: number } | null;
|
|
749
|
+
findAnyDocument: (collectionName: string, path: string) => { id: number; hash: string; title: string; active: number } | null;
|
|
750
|
+
reactivateDocument: (documentId: number, title: string, hash: string, modifiedAt: string) => void;
|
|
751
|
+
updateDocumentTitle: (documentId: number, title: string, modifiedAt: string) => void;
|
|
752
|
+
updateDocument: (documentId: number, title: string, hash: string, modifiedAt: string) => void;
|
|
753
|
+
deactivateDocument: (collectionName: string, path: string) => void;
|
|
754
|
+
getActiveDocumentPaths: (collectionName: string) => string[];
|
|
755
|
+
|
|
756
|
+
// Vector/embedding operations
|
|
757
|
+
getHashesForEmbedding: () => { hash: string; body: string; path: string }[];
|
|
758
|
+
getHashesNeedingFragments: () => { hash: string; body: string; path: string; title: string; collection: string }[];
|
|
759
|
+
clearAllEmbeddings: () => void;
|
|
760
|
+
insertEmbedding: (hash: string, seq: number, pos: number, embedding: Float32Array, model: string, embeddedAt: string, fragmentType?: string, fragmentLabel?: string, canonicalId?: string) => void;
|
|
761
|
+
cleanStaleEmbeddings: () => number;
|
|
762
|
+
|
|
763
|
+
// SAME: Observation metadata
|
|
764
|
+
updateObservationFields: (docPath: string, collectionName: string, fields: { observation_type?: string; facts?: string; narrative?: string; concepts?: string; files_read?: string; files_modified?: string }) => void;
|
|
765
|
+
|
|
766
|
+
// SAME: Session tracking
|
|
767
|
+
insertSession: (sessionId: string, startedAt: string, machine?: string) => void;
|
|
768
|
+
updateSession: (sessionId: string, updates: { endedAt?: string; handoffPath?: string; filesChanged?: string[]; summary?: string }) => void;
|
|
769
|
+
getSession: (sessionId: string) => SessionRecord | null;
|
|
770
|
+
getRecentSessions: (limit: number) => SessionRecord[];
|
|
771
|
+
|
|
772
|
+
// SAME: Context usage tracking
|
|
773
|
+
insertUsage: (usage: UsageRecord) => void;
|
|
774
|
+
getUsageForSession: (sessionId: string) => UsageRow[];
|
|
775
|
+
markUsageReferenced: (id: number) => void;
|
|
776
|
+
|
|
777
|
+
// SAME: Document metadata operations
|
|
778
|
+
updateDocumentMeta: (docId: number, meta: { domain?: string; workstream?: string; tags?: string; content_type?: string; review_by?: string; confidence?: number; quality_score?: number }) => void;
|
|
779
|
+
incrementAccessCount: (paths: string[]) => void;
|
|
780
|
+
getDocumentsByType: (contentType: string, limit?: number) => DocumentRow[];
|
|
781
|
+
getStaleDocuments: (beforeDate: string) => DocumentRow[];
|
|
782
|
+
pinDocument: (collection: string, path: string, pinned: boolean) => void;
|
|
783
|
+
snoozeDocument: (collection: string, path: string, until: string | null) => void;
|
|
784
|
+
|
|
785
|
+
// Beads integration
|
|
786
|
+
syncBeadsIssues: (projectDir: string) => Promise<{ synced: number; created: number; newDocIds: number[] }>;
|
|
787
|
+
detectBeadsProject: (cwd: string) => string | null;
|
|
788
|
+
|
|
789
|
+
// MAGMA graph building
|
|
790
|
+
buildTemporalBackbone: () => number;
|
|
791
|
+
buildSemanticGraph: (threshold?: number) => Promise<number>;
|
|
792
|
+
|
|
793
|
+
// A-MEM: Self-Evolving Memory
|
|
794
|
+
constructMemoryNote: (llm: any, docId: number) => Promise<any>;
|
|
795
|
+
storeMemoryNote: (docId: number, note: any) => void;
|
|
796
|
+
generateMemoryLinks: (llm: any, docId: number, kNeighbors?: number) => Promise<number>;
|
|
797
|
+
evolveMemories: (llm: any, memoryId: number, triggeredBy: number) => Promise<boolean>;
|
|
798
|
+
postIndexEnrich: (llm: any, docId: number, isNew: boolean) => Promise<void>;
|
|
799
|
+
inferCausalLinks: (llm: any, observations: ObservationWithDoc[]) => Promise<number>;
|
|
800
|
+
findCausalLinks: (docId: number, direction?: 'causes' | 'caused_by' | 'both', maxDepth?: number) => CausalLink[];
|
|
801
|
+
getEvolutionTimeline: (docId: number, limit?: number) => EvolutionEntry[];
|
|
802
|
+
|
|
803
|
+
// Co-activation tracking
|
|
804
|
+
recordCoActivation: (paths: string[]) => void;
|
|
805
|
+
getCoActivated: (path: string, limit?: number) => { path: string; count: number }[];
|
|
806
|
+
|
|
807
|
+
// Usage relation tracking
|
|
808
|
+
insertRelation: (fromDoc: number, toDoc: number, relType: string, weight?: number) => void;
|
|
809
|
+
|
|
810
|
+
// Engram integration: unified save API for hook-generated memories
|
|
811
|
+
saveMemory: (params: SaveMemoryParams) => SaveMemoryResult;
|
|
812
|
+
hashNormalized: typeof hashNormalized;
|
|
813
|
+
|
|
814
|
+
// Engram integration: temporal timeline
|
|
815
|
+
timeline: (docId: number, options?: { before?: number; after?: number; sameCollection?: boolean }) => TimelineResult;
|
|
816
|
+
|
|
817
|
+
// Document archival & lifecycle
|
|
818
|
+
archiveDocuments: (ids: number[]) => number;
|
|
819
|
+
getArchiveCandidates: (policy: import("./collections.ts").LifecyclePolicy) => { id: number; collection: string; path: string; title: string; modified_at: string; last_accessed_at: string | null; content_type: string }[];
|
|
820
|
+
restoreArchivedDocuments: (filter: { ids?: number[]; collection?: string; sinceDate?: string }) => number;
|
|
821
|
+
purgeArchivedDocuments: (olderThanDays: number) => number;
|
|
822
|
+
getLifecycleStats: () => { active: number; archived: number; forgotten: number; pinned: number; snoozed: number; neverAccessed: number; oldestAccess: string | null };
|
|
823
|
+
searchArchived: (query: string, limit?: number) => { id: number; collection: string; path: string; title: string; archived_at: string; score: number }[];
|
|
824
|
+
};
|
|
825
|
+
|
|
826
|
+
/**
|
|
827
|
+
* Create a new store instance with the given database path.
|
|
828
|
+
* If no path is provided, uses the default path (~/.cache/qmd/index.sqlite).
|
|
829
|
+
*
|
|
830
|
+
* @param dbPath - Path to the SQLite database file
|
|
831
|
+
* @returns Store instance with all methods bound to the database
|
|
832
|
+
*/
|
|
833
|
+
export function createStore(dbPath?: string, opts?: { readonly?: boolean; busyTimeout?: number }): Store {
|
|
834
|
+
const resolvedPath = dbPath || getDefaultDbPath();
|
|
835
|
+
const db = opts?.readonly
|
|
836
|
+
? new Database(resolvedPath, { readonly: true })
|
|
837
|
+
: new Database(resolvedPath);
|
|
838
|
+
if (!opts?.readonly) {
|
|
839
|
+
initializeDatabase(db);
|
|
840
|
+
} else {
|
|
841
|
+
// Readonly: load sqlite-vec extension and set WAL mode pragma only
|
|
842
|
+
sqliteVec.load(db);
|
|
843
|
+
db.exec("PRAGMA journal_mode = WAL");
|
|
844
|
+
db.exec("PRAGMA query_only = ON");
|
|
845
|
+
}
|
|
846
|
+
if (opts?.busyTimeout !== undefined) {
|
|
847
|
+
db.exec(`PRAGMA busy_timeout = ${opts.busyTimeout}`);
|
|
848
|
+
}
|
|
849
|
+
|
|
850
|
+
return {
|
|
851
|
+
db,
|
|
852
|
+
dbPath: resolvedPath,
|
|
853
|
+
close: () => db.close(),
|
|
854
|
+
ensureVecTable: (dimensions: number) => ensureVecTableInternal(db, dimensions),
|
|
855
|
+
|
|
856
|
+
// Index health
|
|
857
|
+
getHashesNeedingEmbedding: () => getHashesNeedingEmbedding(db),
|
|
858
|
+
getIndexHealth: () => getIndexHealth(db),
|
|
859
|
+
getStatus: () => getStatus(db),
|
|
860
|
+
|
|
861
|
+
// Caching
|
|
862
|
+
getCacheKey,
|
|
863
|
+
getCachedResult: (cacheKey: string) => getCachedResult(db, cacheKey),
|
|
864
|
+
setCachedResult: (cacheKey: string, result: string) => setCachedResult(db, cacheKey, result),
|
|
865
|
+
clearCache: () => clearCache(db),
|
|
866
|
+
|
|
867
|
+
// Cleanup and maintenance
|
|
868
|
+
deleteLLMCache: () => deleteLLMCache(db),
|
|
869
|
+
deleteInactiveDocuments: () => deleteInactiveDocuments(db),
|
|
870
|
+
cleanupOrphanedContent: () => cleanupOrphanedContent(db),
|
|
871
|
+
cleanupOrphanedVectors: () => cleanupOrphanedVectors(db),
|
|
872
|
+
vacuumDatabase: () => vacuumDatabase(db),
|
|
873
|
+
|
|
874
|
+
// Context
|
|
875
|
+
getContextForFile: (filepath: string) => getContextForFile(db, filepath),
|
|
876
|
+
getContextForPath: (collectionName: string, path: string) => getContextForPath(db, collectionName, path),
|
|
877
|
+
getCollectionByName: (name: string) => getCollectionByName(db, name),
|
|
878
|
+
getCollectionsWithoutContext: () => getCollectionsWithoutContext(db),
|
|
879
|
+
getTopLevelPathsWithoutContext: (collectionName: string) => getTopLevelPathsWithoutContext(db, collectionName),
|
|
880
|
+
|
|
881
|
+
// Virtual paths
|
|
882
|
+
parseVirtualPath,
|
|
883
|
+
buildVirtualPath,
|
|
884
|
+
isVirtualPath,
|
|
885
|
+
resolveVirtualPath: (virtualPath: string) => resolveVirtualPath(db, virtualPath),
|
|
886
|
+
toVirtualPath: (absolutePath: string) => toVirtualPath(db, absolutePath),
|
|
887
|
+
|
|
888
|
+
// Search
|
|
889
|
+
searchFTS: (query: string, limit?: number, collectionId?: number, collections?: string[]) => searchFTS(db, query, limit, collectionId, collections),
|
|
890
|
+
searchVec: (query: string, model: string, limit?: number, collectionId?: number, collections?: string[]) => searchVec(db, query, model, limit, collectionId, collections),
|
|
891
|
+
|
|
892
|
+
// Query expansion & reranking
|
|
893
|
+
expandQuery: (query: string, model?: string, intent?: string) => expandQuery(query, model, db, intent),
|
|
894
|
+
rerank: (query: string, documents: { file: string; text: string }[], model?: string, intent?: string) => rerank(query, documents, model, db, intent),
|
|
895
|
+
|
|
896
|
+
// Document retrieval
|
|
897
|
+
findDocument: (filename: string, options?: { includeBody?: boolean }) => findDocument(db, filename, options),
|
|
898
|
+
getDocumentBody: (doc: DocumentResult | { filepath: string }, fromLine?: number, maxLines?: number) => getDocumentBody(db, doc, fromLine, maxLines),
|
|
899
|
+
findDocuments: (pattern: string, options?: { includeBody?: boolean; maxBytes?: number }) => findDocuments(db, pattern, options),
|
|
900
|
+
|
|
901
|
+
// Fuzzy matching and docid lookup
|
|
902
|
+
findSimilarFiles: (query: string, maxDistance?: number, limit?: number) => findSimilarFiles(db, query, maxDistance, limit),
|
|
903
|
+
matchFilesByGlob: (pattern: string) => matchFilesByGlob(db, pattern),
|
|
904
|
+
findDocumentByDocid: (docid: string) => findDocumentByDocid(db, docid),
|
|
905
|
+
|
|
906
|
+
// Document indexing operations
|
|
907
|
+
insertContent: (hash: string, content: string, createdAt: string) => insertContent(db, hash, content, createdAt),
|
|
908
|
+
insertDocument: (collectionName: string, path: string, title: string, hash: string, createdAt: string, modifiedAt: string) => insertDocument(db, collectionName, path, title, hash, createdAt, modifiedAt),
|
|
909
|
+
findActiveDocument: (collectionName: string, path: string) => findActiveDocument(db, collectionName, path),
|
|
910
|
+
findAnyDocument: (collectionName: string, path: string) => findAnyDocument(db, collectionName, path),
|
|
911
|
+
reactivateDocument: (documentId: number, title: string, hash: string, modifiedAt: string) => reactivateDocument(db, documentId, title, hash, modifiedAt),
|
|
912
|
+
updateDocumentTitle: (documentId: number, title: string, modifiedAt: string) => updateDocumentTitle(db, documentId, title, modifiedAt),
|
|
913
|
+
updateDocument: (documentId: number, title: string, hash: string, modifiedAt: string) => updateDocument(db, documentId, title, hash, modifiedAt),
|
|
914
|
+
deactivateDocument: (collectionName: string, path: string) => deactivateDocument(db, collectionName, path),
|
|
915
|
+
getActiveDocumentPaths: (collectionName: string) => getActiveDocumentPaths(db, collectionName),
|
|
916
|
+
|
|
917
|
+
// Vector/embedding operations
|
|
918
|
+
getHashesForEmbedding: () => getHashesForEmbedding(db),
|
|
919
|
+
getHashesNeedingFragments: () => getHashesNeedingFragments(db),
|
|
920
|
+
clearAllEmbeddings: () => clearAllEmbeddings(db),
|
|
921
|
+
insertEmbedding: (hash: string, seq: number, pos: number, embedding: Float32Array, model: string, embeddedAt: string, fragmentType?: string, fragmentLabel?: string, canonicalId?: string) => insertEmbedding(db, hash, seq, pos, embedding, model, embeddedAt, fragmentType, fragmentLabel, canonicalId),
|
|
922
|
+
cleanStaleEmbeddings: () => cleanStaleEmbeddings(db),
|
|
923
|
+
|
|
924
|
+
// SAME: Observation metadata
|
|
925
|
+
updateObservationFields: (docPath: string, collectionName: string, fields) => updateObservationFieldsFn(db, docPath, collectionName, fields),
|
|
926
|
+
|
|
927
|
+
// SAME: Session tracking
|
|
928
|
+
insertSession: (sessionId: string, startedAt: string, machine?: string) => insertSessionFn(db, sessionId, startedAt, machine),
|
|
929
|
+
updateSession: (sessionId: string, updates) => updateSessionFn(db, sessionId, updates),
|
|
930
|
+
getSession: (sessionId: string) => getSessionFn(db, sessionId),
|
|
931
|
+
getRecentSessions: (limit: number) => getRecentSessionsFn(db, limit),
|
|
932
|
+
|
|
933
|
+
// SAME: Context usage tracking
|
|
934
|
+
insertUsage: (usage: UsageRecord) => insertUsageFn(db, usage),
|
|
935
|
+
getUsageForSession: (sessionId: string) => getUsageForSessionFn(db, sessionId),
|
|
936
|
+
markUsageReferenced: (id: number) => markUsageReferencedFn(db, id),
|
|
937
|
+
|
|
938
|
+
// SAME: Document metadata operations
|
|
939
|
+
updateDocumentMeta: (docId: number, meta) => updateDocumentMetaFn(db, docId, meta),
|
|
940
|
+
incrementAccessCount: (paths: string[]) => incrementAccessCountFn(db, paths),
|
|
941
|
+
getDocumentsByType: (contentType: string, limit?: number) => getDocumentsByTypeFn(db, contentType, limit),
|
|
942
|
+
getStaleDocuments: (beforeDate: string) => getStaleDocumentsFn(db, beforeDate),
|
|
943
|
+
pinDocument: (collection: string, path: string, pinned: boolean) => pinDocumentFn(db, collection, path, pinned),
|
|
944
|
+
snoozeDocument: (collection: string, path: string, until: string | null) => snoozeDocumentFn(db, collection, path, until),
|
|
945
|
+
|
|
946
|
+
// Beads integration
|
|
947
|
+
syncBeadsIssues: (projectDir: string) => syncBeadsIssues(db, projectDir),
|
|
948
|
+
detectBeadsProject,
|
|
949
|
+
|
|
950
|
+
// MAGMA graph building
|
|
951
|
+
buildTemporalBackbone: () => buildTemporalBackbone(db),
|
|
952
|
+
buildSemanticGraph: (threshold?: number) => buildSemanticGraph(db, threshold),
|
|
953
|
+
|
|
954
|
+
// A-MEM: Self-Evolving Memory
|
|
955
|
+
constructMemoryNote: (llm: any, docId: number) => constructMemoryNote({ db, dbPath: resolvedPath } as Store, llm, docId),
|
|
956
|
+
storeMemoryNote: (docId: number, note: any) => storeMemoryNote({ db, dbPath: resolvedPath } as Store, docId, note),
|
|
957
|
+
generateMemoryLinks: (llm: any, docId: number, kNeighbors?: number) => generateMemoryLinks({ db, dbPath: resolvedPath } as Store, llm, docId, kNeighbors),
|
|
958
|
+
evolveMemories: (llm: any, memoryId: number, triggeredBy: number) => evolveMemories({ db, dbPath: resolvedPath } as Store, llm, memoryId, triggeredBy),
|
|
959
|
+
postIndexEnrich: (llm: any, docId: number, isNew: boolean) => postIndexEnrich({ db, dbPath: resolvedPath } as Store, llm, docId, isNew),
|
|
960
|
+
inferCausalLinks: (llm: any, observations: ObservationWithDoc[]) => inferCausalLinks({ db, dbPath: resolvedPath } as Store, llm, observations),
|
|
961
|
+
findCausalLinks: (docId: number, direction?: 'causes' | 'caused_by' | 'both', maxDepth?: number) => findCausalLinks(db, docId, direction, maxDepth),
|
|
962
|
+
getEvolutionTimeline: (docId: number, limit?: number) => getEvolutionTimeline(db, docId, limit),
|
|
963
|
+
|
|
964
|
+
// Co-activation tracking
|
|
965
|
+
recordCoActivation: (paths: string[]) => {
|
|
966
|
+
if (paths.length < 2) return;
|
|
967
|
+
const now = new Date().toISOString();
|
|
968
|
+
const stmt = db.prepare(`
|
|
969
|
+
INSERT INTO co_activations (doc_a, doc_b, count, last_seen)
|
|
970
|
+
VALUES (?, ?, 1, ?)
|
|
971
|
+
ON CONFLICT(doc_a, doc_b) DO UPDATE SET
|
|
972
|
+
count = count + 1,
|
|
973
|
+
last_seen = excluded.last_seen
|
|
974
|
+
`);
|
|
975
|
+
// Record all pairs (order-independent: always store sorted)
|
|
976
|
+
for (let i = 0; i < paths.length; i++) {
|
|
977
|
+
for (let j = i + 1; j < paths.length; j++) {
|
|
978
|
+
const sorted = [paths[i]!, paths[j]!].sort();
|
|
979
|
+
stmt.run(sorted[0]!, sorted[1]!, now);
|
|
980
|
+
}
|
|
981
|
+
}
|
|
982
|
+
},
|
|
983
|
+
getCoActivated: (path: string, limit: number = 5) => {
|
|
984
|
+
return db.prepare(`
|
|
985
|
+
SELECT
|
|
986
|
+
CASE WHEN doc_a = ? THEN doc_b ELSE doc_a END as path,
|
|
987
|
+
count
|
|
988
|
+
FROM co_activations
|
|
989
|
+
WHERE doc_a = ? OR doc_b = ?
|
|
990
|
+
ORDER BY count DESC
|
|
991
|
+
LIMIT ?
|
|
992
|
+
`).all(path, path, path, limit) as { path: string; count: number }[];
|
|
993
|
+
},
|
|
994
|
+
|
|
995
|
+
// Usage relation tracking — records relations between documents
|
|
996
|
+
insertRelation: (fromDoc: number, toDoc: number, relType: string, weight: number = 1.0) => {
|
|
997
|
+
db.prepare(`
|
|
998
|
+
INSERT INTO memory_relations (source_id, target_id, relation_type, weight, created_at)
|
|
999
|
+
VALUES (?, ?, ?, ?, ?)
|
|
1000
|
+
ON CONFLICT(source_id, target_id, relation_type) DO UPDATE SET
|
|
1001
|
+
weight = weight + excluded.weight,
|
|
1002
|
+
created_at = excluded.created_at
|
|
1003
|
+
`).run(fromDoc, toDoc, relType, weight, new Date().toISOString());
|
|
1004
|
+
},
|
|
1005
|
+
|
|
1006
|
+
// Engram integration: unified save API for hook-generated memories
|
|
1007
|
+
saveMemory: (params: SaveMemoryParams) => saveMemory(db, params),
|
|
1008
|
+
hashNormalized,
|
|
1009
|
+
|
|
1010
|
+
// Engram integration: temporal timeline
|
|
1011
|
+
timeline: (docId: number, options?) => timeline(db, docId, options),
|
|
1012
|
+
|
|
1013
|
+
// Document archival — deactivates documents by ID
|
|
1014
|
+
archiveDocuments: (ids: number[]) => {
|
|
1015
|
+
if (ids.length === 0) return 0;
|
|
1016
|
+
const now = new Date().toISOString();
|
|
1017
|
+
const placeholders = ids.map(() => "?").join(",");
|
|
1018
|
+
const result = db.prepare(`
|
|
1019
|
+
UPDATE documents SET active = 0, archived_at = ?
|
|
1020
|
+
WHERE id IN (${placeholders}) AND active = 1
|
|
1021
|
+
`).run(now, ...ids);
|
|
1022
|
+
return result.changes;
|
|
1023
|
+
},
|
|
1024
|
+
|
|
1025
|
+
// Lifecycle management
|
|
1026
|
+
getArchiveCandidates: (policy) => getArchiveCandidatesFn(db, policy),
|
|
1027
|
+
restoreArchivedDocuments: (filter) => restoreArchivedDocumentsFn(db, filter),
|
|
1028
|
+
purgeArchivedDocuments: (olderThanDays) => purgeArchivedDocumentsFn(db, olderThanDays),
|
|
1029
|
+
getLifecycleStats: () => getLifecycleStatsFn(db),
|
|
1030
|
+
searchArchived: (query, limit?) => searchArchivedFn(db, query, limit),
|
|
1031
|
+
};
|
|
1032
|
+
}
|
|
1033
|
+
|
|
1034
|
+
// =============================================================================
|
|
1035
|
+
// Core Document Type
|
|
1036
|
+
// =============================================================================
|
|
1037
|
+
|
|
1038
|
+
/**
|
|
1039
|
+
* Unified document result type with all metadata.
|
|
1040
|
+
* Body is optional - use getDocumentBody() to load it separately if needed.
|
|
1041
|
+
*/
|
|
1042
|
+
export type DocumentResult = {
|
|
1043
|
+
filepath: string; // Full filesystem path
|
|
1044
|
+
displayPath: string; // Short display path (e.g., "docs/readme.md")
|
|
1045
|
+
title: string; // Document title (from first heading or filename)
|
|
1046
|
+
context: string | null; // Folder context description if configured
|
|
1047
|
+
hash: string; // Content hash for caching/change detection
|
|
1048
|
+
docid: string; // Short docid (first 6 chars of hash) for quick reference
|
|
1049
|
+
collectionName: string; // Parent collection name
|
|
1050
|
+
modifiedAt: string; // Last modification timestamp
|
|
1051
|
+
bodyLength: number; // Body length in bytes (useful before loading)
|
|
1052
|
+
body?: string; // Document body (optional, load with getDocumentBody)
|
|
1053
|
+
};
|
|
1054
|
+
|
|
1055
|
+
/**
|
|
1056
|
+
* Extract short docid from a full hash (first 6 characters).
|
|
1057
|
+
*/
|
|
1058
|
+
export function getDocid(hash: string): string {
|
|
1059
|
+
return hash.slice(0, 6);
|
|
1060
|
+
}
|
|
1061
|
+
|
|
1062
|
+
/**
|
|
1063
|
+
* Handelize a filename to be more token-friendly.
|
|
1064
|
+
* - Convert triple underscore `___` to `/` (folder separator)
|
|
1065
|
+
* - Convert to lowercase
|
|
1066
|
+
* - Replace sequences of non-word chars (except /) with single dash
|
|
1067
|
+
* - Remove leading/trailing dashes from path segments
|
|
1068
|
+
* - Preserve folder structure (a/b/c/d.md stays structured)
|
|
1069
|
+
* - Preserve file extension
|
|
1070
|
+
*/
|
|
1071
|
+
export function handelize(path: string): string {
|
|
1072
|
+
if (!path || path.trim() === '') {
|
|
1073
|
+
throw new Error('handelize: path cannot be empty');
|
|
1074
|
+
}
|
|
1075
|
+
|
|
1076
|
+
// Check for paths that are just extensions or only dots/special chars
|
|
1077
|
+
// A valid path must have at least one letter or digit (including Unicode)
|
|
1078
|
+
const segments = path.split('/').filter(Boolean);
|
|
1079
|
+
const lastSegment = segments[segments.length - 1] || '';
|
|
1080
|
+
const filenameWithoutExt = lastSegment.replace(/\.[^.]+$/, '');
|
|
1081
|
+
const hasValidContent = /[\p{L}\p{N}]/u.test(filenameWithoutExt);
|
|
1082
|
+
if (!hasValidContent) {
|
|
1083
|
+
throw new Error(`handelize: path "${path}" has no valid filename content`);
|
|
1084
|
+
}
|
|
1085
|
+
|
|
1086
|
+
const result = path
|
|
1087
|
+
.replace(/___/g, '/') // Triple underscore becomes folder separator
|
|
1088
|
+
.toLowerCase()
|
|
1089
|
+
.split('/')
|
|
1090
|
+
.map((segment, idx, arr) => {
|
|
1091
|
+
const isLastSegment = idx === arr.length - 1;
|
|
1092
|
+
|
|
1093
|
+
if (isLastSegment) {
|
|
1094
|
+
// For the filename (last segment), preserve the extension
|
|
1095
|
+
const extMatch = segment.match(/(\.[a-z0-9]+)$/i);
|
|
1096
|
+
const ext = extMatch ? extMatch[1] : '';
|
|
1097
|
+
const nameWithoutExt = ext ? segment.slice(0, -ext.length) : segment;
|
|
1098
|
+
|
|
1099
|
+
const cleanedName = nameWithoutExt
|
|
1100
|
+
.replace(/[^\p{L}\p{N}]+/gu, '-') // Replace non-letter/digit chars with dash
|
|
1101
|
+
.replace(/^-+|-+$/g, ''); // Remove leading/trailing dashes
|
|
1102
|
+
|
|
1103
|
+
return cleanedName + ext;
|
|
1104
|
+
} else {
|
|
1105
|
+
// For directories, just clean normally
|
|
1106
|
+
return segment
|
|
1107
|
+
.replace(/[^\p{L}\p{N}]+/gu, '-')
|
|
1108
|
+
.replace(/^-+|-+$/g, '');
|
|
1109
|
+
}
|
|
1110
|
+
})
|
|
1111
|
+
.filter(Boolean)
|
|
1112
|
+
.join('/');
|
|
1113
|
+
|
|
1114
|
+
if (!result) {
|
|
1115
|
+
throw new Error(`handelize: path "${path}" resulted in empty string after processing`);
|
|
1116
|
+
}
|
|
1117
|
+
|
|
1118
|
+
return result;
|
|
1119
|
+
}
|
|
1120
|
+
|
|
1121
|
+
/**
|
|
1122
|
+
* Search result extends DocumentResult with score and source info
|
|
1123
|
+
*/
|
|
1124
|
+
export type SearchResult = DocumentResult & {
|
|
1125
|
+
score: number; // Relevance score (0-1)
|
|
1126
|
+
source: "fts" | "vec"; // Search source (full-text or vector)
|
|
1127
|
+
chunkPos?: number; // Character position of matching chunk (for vector search)
|
|
1128
|
+
fragmentType?: string; // Fragment type (section, list, code, frontmatter, fact, narrative)
|
|
1129
|
+
fragmentLabel?: string; // Fragment label (heading text, fm key, etc.)
|
|
1130
|
+
};
|
|
1131
|
+
|
|
1132
|
+
/**
|
|
1133
|
+
* Ranked result for RRF fusion (simplified, used internally)
|
|
1134
|
+
*/
|
|
1135
|
+
export type RankedResult = {
|
|
1136
|
+
file: string;
|
|
1137
|
+
displayPath: string;
|
|
1138
|
+
title: string;
|
|
1139
|
+
body: string;
|
|
1140
|
+
score: number;
|
|
1141
|
+
};
|
|
1142
|
+
|
|
1143
|
+
/**
|
|
1144
|
+
* Error result when document is not found
|
|
1145
|
+
*/
|
|
1146
|
+
export type DocumentNotFound = {
|
|
1147
|
+
error: "not_found";
|
|
1148
|
+
query: string;
|
|
1149
|
+
similarFiles: string[];
|
|
1150
|
+
};
|
|
1151
|
+
|
|
1152
|
+
/**
|
|
1153
|
+
* Result from multi-get operations
|
|
1154
|
+
*/
|
|
1155
|
+
export type MultiGetResult = {
|
|
1156
|
+
doc: DocumentResult;
|
|
1157
|
+
skipped: false;
|
|
1158
|
+
} | {
|
|
1159
|
+
doc: Pick<DocumentResult, "filepath" | "displayPath">;
|
|
1160
|
+
skipped: true;
|
|
1161
|
+
skipReason: string;
|
|
1162
|
+
};
|
|
1163
|
+
|
|
1164
|
+
export type CollectionInfo = {
|
|
1165
|
+
name: string;
|
|
1166
|
+
path: string;
|
|
1167
|
+
pattern: string;
|
|
1168
|
+
documents: number;
|
|
1169
|
+
lastUpdated: string;
|
|
1170
|
+
};
|
|
1171
|
+
|
|
1172
|
+
export type IndexStatus = {
|
|
1173
|
+
totalDocuments: number;
|
|
1174
|
+
needsEmbedding: number;
|
|
1175
|
+
hasVectorIndex: boolean;
|
|
1176
|
+
collections: CollectionInfo[];
|
|
1177
|
+
};
|
|
1178
|
+
|
|
1179
|
+
// =============================================================================
|
|
1180
|
+
// SAME: Agent Memory Types
|
|
1181
|
+
// =============================================================================
|
|
1182
|
+
|
|
1183
|
+
export type SessionRecord = {
|
|
1184
|
+
sessionId: string;
|
|
1185
|
+
startedAt: string;
|
|
1186
|
+
endedAt: string | null;
|
|
1187
|
+
handoffPath: string | null;
|
|
1188
|
+
machine: string | null;
|
|
1189
|
+
filesChanged: string[];
|
|
1190
|
+
summary: string | null;
|
|
1191
|
+
};
|
|
1192
|
+
|
|
1193
|
+
export type UsageRecord = {
|
|
1194
|
+
sessionId: string;
|
|
1195
|
+
timestamp: string;
|
|
1196
|
+
hookName: string;
|
|
1197
|
+
injectedPaths: string[];
|
|
1198
|
+
estimatedTokens: number;
|
|
1199
|
+
wasReferenced: number;
|
|
1200
|
+
};
|
|
1201
|
+
|
|
1202
|
+
export type UsageRow = {
|
|
1203
|
+
id: number;
|
|
1204
|
+
sessionId: string;
|
|
1205
|
+
timestamp: string;
|
|
1206
|
+
hookName: string;
|
|
1207
|
+
injectedPaths: string;
|
|
1208
|
+
estimatedTokens: number;
|
|
1209
|
+
wasReferenced: number;
|
|
1210
|
+
};
|
|
1211
|
+
|
|
1212
|
+
export type DocumentRow = {
|
|
1213
|
+
id: number;
|
|
1214
|
+
collection: string;
|
|
1215
|
+
path: string;
|
|
1216
|
+
title: string;
|
|
1217
|
+
hash: string;
|
|
1218
|
+
modifiedAt: string;
|
|
1219
|
+
domain: string | null;
|
|
1220
|
+
workstream: string | null;
|
|
1221
|
+
tags: string | null;
|
|
1222
|
+
contentType: string;
|
|
1223
|
+
reviewBy: string | null;
|
|
1224
|
+
confidence: number;
|
|
1225
|
+
accessCount: number;
|
|
1226
|
+
bodyLength: number;
|
|
1227
|
+
};
|
|
1228
|
+
|
|
1229
|
+
// =============================================================================
|
|
1230
|
+
// Index health
|
|
1231
|
+
// =============================================================================
|
|
1232
|
+
|
|
1233
|
+
export function getHashesNeedingEmbedding(db: Database): number {
|
|
1234
|
+
const result = db.prepare(`
|
|
1235
|
+
SELECT COUNT(DISTINCT d.hash) as count
|
|
1236
|
+
FROM documents d
|
|
1237
|
+
LEFT JOIN content_vectors v ON d.hash = v.hash AND v.seq = 0
|
|
1238
|
+
WHERE d.active = 1 AND v.hash IS NULL
|
|
1239
|
+
`).get() as { count: number };
|
|
1240
|
+
return result.count;
|
|
1241
|
+
}
|
|
1242
|
+
|
|
1243
|
+
export type IndexHealthInfo = {
|
|
1244
|
+
needsEmbedding: number;
|
|
1245
|
+
totalDocs: number;
|
|
1246
|
+
daysStale: number | null;
|
|
1247
|
+
};
|
|
1248
|
+
|
|
1249
|
+
export function getIndexHealth(db: Database): IndexHealthInfo {
|
|
1250
|
+
const needsEmbedding = getHashesNeedingEmbedding(db);
|
|
1251
|
+
const totalDocs = (db.prepare(`SELECT COUNT(*) as count FROM documents WHERE active = 1`).get() as { count: number }).count;
|
|
1252
|
+
|
|
1253
|
+
const mostRecent = db.prepare(`SELECT MAX(modified_at) as latest FROM documents WHERE active = 1`).get() as { latest: string | null };
|
|
1254
|
+
let daysStale: number | null = null;
|
|
1255
|
+
if (mostRecent?.latest) {
|
|
1256
|
+
const lastUpdate = new Date(mostRecent.latest);
|
|
1257
|
+
daysStale = Math.floor((Date.now() - lastUpdate.getTime()) / (24 * 60 * 60 * 1000));
|
|
1258
|
+
}
|
|
1259
|
+
|
|
1260
|
+
return { needsEmbedding, totalDocs, daysStale };
|
|
1261
|
+
}
|
|
1262
|
+
|
|
1263
|
+
// =============================================================================
|
|
1264
|
+
// Caching
|
|
1265
|
+
// =============================================================================
|
|
1266
|
+
|
|
1267
|
+
export function getCacheKey(url: string, body: object): string {
|
|
1268
|
+
const hash = new Bun.CryptoHasher("sha256");
|
|
1269
|
+
hash.update(url);
|
|
1270
|
+
hash.update(JSON.stringify(body));
|
|
1271
|
+
return hash.digest("hex");
|
|
1272
|
+
}
|
|
1273
|
+
|
|
1274
|
+
export function getCachedResult(db: Database, cacheKey: string): string | null {
|
|
1275
|
+
const row = db.prepare(`SELECT result FROM llm_cache WHERE hash = ?`).get(cacheKey) as { result: string } | null;
|
|
1276
|
+
return row?.result || null;
|
|
1277
|
+
}
|
|
1278
|
+
|
|
1279
|
+
export function setCachedResult(db: Database, cacheKey: string, result: string): void {
|
|
1280
|
+
const now = new Date().toISOString();
|
|
1281
|
+
db.prepare(`INSERT OR REPLACE INTO llm_cache (hash, result, created_at) VALUES (?, ?, ?)`).run(cacheKey, result, now);
|
|
1282
|
+
if (Math.random() < 0.01) {
|
|
1283
|
+
db.exec(`DELETE FROM llm_cache WHERE hash NOT IN (SELECT hash FROM llm_cache ORDER BY created_at DESC LIMIT 1000)`);
|
|
1284
|
+
}
|
|
1285
|
+
}
|
|
1286
|
+
|
|
1287
|
+
export function clearCache(db: Database): void {
|
|
1288
|
+
db.exec(`DELETE FROM llm_cache`);
|
|
1289
|
+
}
|
|
1290
|
+
|
|
1291
|
+
// =============================================================================
|
|
1292
|
+
// Cleanup and maintenance operations
|
|
1293
|
+
// =============================================================================
|
|
1294
|
+
|
|
1295
|
+
/**
|
|
1296
|
+
* Delete cached LLM API responses.
|
|
1297
|
+
* Returns the number of cached responses deleted.
|
|
1298
|
+
*/
|
|
1299
|
+
export function deleteLLMCache(db: Database): number {
|
|
1300
|
+
const result = db.prepare(`DELETE FROM llm_cache`).run();
|
|
1301
|
+
return result.changes;
|
|
1302
|
+
}
|
|
1303
|
+
|
|
1304
|
+
/**
|
|
1305
|
+
* Remove inactive document records (active = 0).
|
|
1306
|
+
* Returns the number of inactive documents deleted.
|
|
1307
|
+
*/
|
|
1308
|
+
export function deleteInactiveDocuments(db: Database): number {
|
|
1309
|
+
const result = db.prepare(`DELETE FROM documents WHERE active = 0`).run();
|
|
1310
|
+
return result.changes;
|
|
1311
|
+
}
|
|
1312
|
+
|
|
1313
|
+
/**
|
|
1314
|
+
* Remove orphaned content hashes that are not referenced by any active document.
|
|
1315
|
+
* Returns the number of orphaned content hashes deleted.
|
|
1316
|
+
*/
|
|
1317
|
+
export function cleanupOrphanedContent(db: Database): number {
|
|
1318
|
+
const result = db.prepare(`
|
|
1319
|
+
DELETE FROM content
|
|
1320
|
+
WHERE hash NOT IN (SELECT DISTINCT hash FROM documents WHERE active = 1)
|
|
1321
|
+
`).run();
|
|
1322
|
+
return result.changes;
|
|
1323
|
+
}
|
|
1324
|
+
|
|
1325
|
+
/**
|
|
1326
|
+
* Remove orphaned vector embeddings that are not referenced by any active document.
|
|
1327
|
+
* Returns the number of orphaned embedding chunks deleted.
|
|
1328
|
+
*/
|
|
1329
|
+
export function cleanupOrphanedVectors(db: Database): number {
|
|
1330
|
+
// Check if vectors_vec table exists
|
|
1331
|
+
const tableExists = db.prepare(`
|
|
1332
|
+
SELECT name FROM sqlite_master WHERE type='table' AND name='vectors_vec'
|
|
1333
|
+
`).get();
|
|
1334
|
+
|
|
1335
|
+
if (!tableExists) {
|
|
1336
|
+
return 0;
|
|
1337
|
+
}
|
|
1338
|
+
|
|
1339
|
+
// Count orphaned vectors first
|
|
1340
|
+
const countResult = db.prepare(`
|
|
1341
|
+
SELECT COUNT(*) as c FROM content_vectors cv
|
|
1342
|
+
WHERE NOT EXISTS (
|
|
1343
|
+
SELECT 1 FROM documents d WHERE d.hash = cv.hash AND d.active = 1
|
|
1344
|
+
)
|
|
1345
|
+
`).get() as { c: number };
|
|
1346
|
+
|
|
1347
|
+
if (countResult.c === 0) {
|
|
1348
|
+
return 0;
|
|
1349
|
+
}
|
|
1350
|
+
|
|
1351
|
+
// Delete from vectors_vec first
|
|
1352
|
+
db.exec(`
|
|
1353
|
+
DELETE FROM vectors_vec WHERE hash_seq IN (
|
|
1354
|
+
SELECT cv.hash || '_' || cv.seq FROM content_vectors cv
|
|
1355
|
+
WHERE NOT EXISTS (
|
|
1356
|
+
SELECT 1 FROM documents d WHERE d.hash = cv.hash AND d.active = 1
|
|
1357
|
+
)
|
|
1358
|
+
)
|
|
1359
|
+
`);
|
|
1360
|
+
|
|
1361
|
+
// Delete from content_vectors
|
|
1362
|
+
db.exec(`
|
|
1363
|
+
DELETE FROM content_vectors WHERE hash NOT IN (
|
|
1364
|
+
SELECT hash FROM documents WHERE active = 1
|
|
1365
|
+
)
|
|
1366
|
+
`);
|
|
1367
|
+
|
|
1368
|
+
return countResult.c;
|
|
1369
|
+
}
|
|
1370
|
+
|
|
1371
|
+
/**
|
|
1372
|
+
* Run VACUUM to reclaim unused space in the database.
|
|
1373
|
+
* This operation rebuilds the database file to eliminate fragmentation.
|
|
1374
|
+
*/
|
|
1375
|
+
export function vacuumDatabase(db: Database): void {
|
|
1376
|
+
db.exec(`VACUUM`);
|
|
1377
|
+
}
|
|
1378
|
+
|
|
1379
|
+
// =============================================================================
|
|
1380
|
+
// Canonical Document Identity
|
|
1381
|
+
// =============================================================================
|
|
1382
|
+
|
|
1383
|
+
/**
|
|
1384
|
+
* Deterministic document identity hash: hash(collection + "/" + path).
|
|
1385
|
+
* Stable across content changes — tracks document identity, not content.
|
|
1386
|
+
*/
|
|
1387
|
+
export function canonicalDocId(collection: string, path: string): string {
|
|
1388
|
+
const h = new Bun.CryptoHasher("sha256");
|
|
1389
|
+
h.update(collection + "/" + path);
|
|
1390
|
+
return h.digest("hex").slice(0, 16);
|
|
1391
|
+
}
|
|
1392
|
+
|
|
1393
|
+
/**
|
|
1394
|
+
* Remove stale embeddings: content_vectors rows whose hash no longer belongs
|
|
1395
|
+
* to any active document. Also cleans the corresponding vectors_vec rows.
|
|
1396
|
+
* Returns the number of stale embeddings removed.
|
|
1397
|
+
*/
|
|
1398
|
+
export function cleanStaleEmbeddings(db: Database): number {
|
|
1399
|
+
// Find orphaned hashes in content_vectors that have no active document
|
|
1400
|
+
const staleRows = db.prepare(`
|
|
1401
|
+
SELECT DISTINCT cv.hash
|
|
1402
|
+
FROM content_vectors cv
|
|
1403
|
+
LEFT JOIN documents d ON d.hash = cv.hash AND d.active = 1
|
|
1404
|
+
WHERE d.id IS NULL
|
|
1405
|
+
`).all() as { hash: string }[];
|
|
1406
|
+
|
|
1407
|
+
if (staleRows.length === 0) return 0;
|
|
1408
|
+
|
|
1409
|
+
const staleHashes = staleRows.map(r => r.hash);
|
|
1410
|
+
|
|
1411
|
+
// Get all hash_seq keys for stale rows to clean vectors_vec
|
|
1412
|
+
const placeholders = staleHashes.map(() => '?').join(',');
|
|
1413
|
+
const staleVecKeys = db.prepare(`
|
|
1414
|
+
SELECT hash || '_' || seq as hash_seq FROM content_vectors WHERE hash IN (${placeholders})
|
|
1415
|
+
`).all(...staleHashes) as { hash_seq: string }[];
|
|
1416
|
+
|
|
1417
|
+
// Delete from vectors_vec
|
|
1418
|
+
if (staleVecKeys.length > 0) {
|
|
1419
|
+
const vecPlaceholders = staleVecKeys.map(() => '?').join(',');
|
|
1420
|
+
db.prepare(`DELETE FROM vectors_vec WHERE hash_seq IN (${vecPlaceholders})`).run(...staleVecKeys.map(r => r.hash_seq));
|
|
1421
|
+
}
|
|
1422
|
+
|
|
1423
|
+
// Delete from content_vectors
|
|
1424
|
+
db.prepare(`DELETE FROM content_vectors WHERE hash IN (${placeholders})`).run(...staleHashes);
|
|
1425
|
+
|
|
1426
|
+
return staleVecKeys.length;
|
|
1427
|
+
}
|
|
1428
|
+
|
|
1429
|
+
// =============================================================================
|
|
1430
|
+
// Document helpers
|
|
1431
|
+
// =============================================================================
|
|
1432
|
+
|
|
1433
|
+
export async function hashContent(content: string): Promise<string> {
|
|
1434
|
+
const hash = new Bun.CryptoHasher("sha256");
|
|
1435
|
+
hash.update(content);
|
|
1436
|
+
return hash.digest("hex");
|
|
1437
|
+
}
|
|
1438
|
+
|
|
1439
|
+
export function extractTitle(content: string, filename: string): string {
|
|
1440
|
+
const match = content.match(/^##?\s+(.+)$/m);
|
|
1441
|
+
if (match) {
|
|
1442
|
+
const title = (match[1] ?? "").trim();
|
|
1443
|
+
if (title === "📝 Notes" || title === "Notes") {
|
|
1444
|
+
const nextMatch = content.match(/^##\s+(.+)$/m);
|
|
1445
|
+
if (nextMatch?.[1]) return nextMatch[1].trim();
|
|
1446
|
+
}
|
|
1447
|
+
return title;
|
|
1448
|
+
}
|
|
1449
|
+
return filename.replace(/\.md$/, "").split("/").pop() || filename;
|
|
1450
|
+
}
|
|
1451
|
+
|
|
1452
|
+
// =============================================================================
|
|
1453
|
+
// Document indexing operations
|
|
1454
|
+
// =============================================================================
|
|
1455
|
+
|
|
1456
|
+
/**
|
|
1457
|
+
* Insert content into the content table (content-addressable storage).
|
|
1458
|
+
* Uses INSERT OR IGNORE so duplicate hashes are skipped.
|
|
1459
|
+
*/
|
|
1460
|
+
export function insertContent(db: Database, hash: string, content: string, createdAt: string): void {
|
|
1461
|
+
db.prepare(`INSERT OR IGNORE INTO content (hash, doc, created_at) VALUES (?, ?, ?)`)
|
|
1462
|
+
.run(hash, content, createdAt);
|
|
1463
|
+
}
|
|
1464
|
+
|
|
1465
|
+
/**
|
|
1466
|
+
* Insert a new document into the documents table.
|
|
1467
|
+
*/
|
|
1468
|
+
export function insertDocument(
|
|
1469
|
+
db: Database,
|
|
1470
|
+
collectionName: string,
|
|
1471
|
+
path: string,
|
|
1472
|
+
title: string,
|
|
1473
|
+
hash: string,
|
|
1474
|
+
createdAt: string,
|
|
1475
|
+
modifiedAt: string
|
|
1476
|
+
): void {
|
|
1477
|
+
db.prepare(`
|
|
1478
|
+
INSERT INTO documents (collection, path, title, hash, created_at, modified_at, active)
|
|
1479
|
+
VALUES (?, ?, ?, ?, ?, ?, 1)
|
|
1480
|
+
`).run(collectionName, path, title, hash, createdAt, modifiedAt);
|
|
1481
|
+
}
|
|
1482
|
+
|
|
1483
|
+
// =============================================================================
|
|
1484
|
+
// Engram Integration: Dedup Hash & Unified Save API
|
|
1485
|
+
// =============================================================================
|
|
1486
|
+
|
|
1487
|
+
/**
|
|
1488
|
+
* Compute a normalized hash for dedup comparison.
|
|
1489
|
+
* Lowercases, collapses whitespace, trims — so cosmetic formatting changes
|
|
1490
|
+
* don't create false negatives.
|
|
1491
|
+
*/
|
|
1492
|
+
export function hashNormalized(content: string): string {
|
|
1493
|
+
const normalized = content.toLowerCase().replace(/\s+/g, " ").trim();
|
|
1494
|
+
const hasher = new Bun.CryptoHasher("sha256");
|
|
1495
|
+
hasher.update(normalized);
|
|
1496
|
+
return hasher.digest("hex");
|
|
1497
|
+
}
|
|
1498
|
+
|
|
1499
|
+
/**
|
|
1500
|
+
* Parameters for the unified saveMemory API.
|
|
1501
|
+
* Used by hooks (decision-extractor, handoff-generator) to write
|
|
1502
|
+
* agent-generated observations with dedup protection.
|
|
1503
|
+
*/
|
|
1504
|
+
export type SaveMemoryParams = {
|
|
1505
|
+
collection: string;
|
|
1506
|
+
path: string;
|
|
1507
|
+
title: string;
|
|
1508
|
+
body: string;
|
|
1509
|
+
contentType: string;
|
|
1510
|
+
confidence?: number;
|
|
1511
|
+
qualityScore?: number;
|
|
1512
|
+
/** Stable semantic payload for dedup hashing. If omitted, uses body. */
|
|
1513
|
+
semanticPayload?: string;
|
|
1514
|
+
/** Topic key for future upsert support (Phase 2). */
|
|
1515
|
+
topicKey?: string;
|
|
1516
|
+
};
|
|
1517
|
+
|
|
1518
|
+
export type SaveMemoryResult = {
|
|
1519
|
+
action: 'inserted' | 'deduplicated' | 'updated';
|
|
1520
|
+
docId: number;
|
|
1521
|
+
duplicateCount?: number;
|
|
1522
|
+
revisionCount?: number;
|
|
1523
|
+
};
|
|
1524
|
+
|
|
1525
|
+
/**
|
|
1526
|
+
* Unified save API for agent-generated memories (hook output).
|
|
1527
|
+
*
|
|
1528
|
+
* Dedup logic (from Engram's AddObservation pattern):
|
|
1529
|
+
* 1. Compute normalized_hash from semanticPayload (or body)
|
|
1530
|
+
* 2. Check dedup window: same normalized_hash + collection + content_type
|
|
1531
|
+
* within DEDUP_WINDOW_MINUTES → increment duplicate_count, skip insert
|
|
1532
|
+
* 3. Otherwise insert new document with metadata
|
|
1533
|
+
*
|
|
1534
|
+
* This function does NOT apply to file-backed indexing (indexer.ts).
|
|
1535
|
+
* File-backed docs use the existing insertDocument/updateDocument path
|
|
1536
|
+
* which preserves path-based identity.
|
|
1537
|
+
*/
|
|
1538
|
+
const DEDUP_WINDOW_MINUTES = 30;
|
|
1539
|
+
|
|
1540
|
+
export function saveMemory(db: Database, params: SaveMemoryParams): SaveMemoryResult {
|
|
1541
|
+
const now = new Date().toISOString();
|
|
1542
|
+
const payload = params.semanticPayload || params.body;
|
|
1543
|
+
const normHash = hashNormalized(payload);
|
|
1544
|
+
const bodyHasher = new Bun.CryptoHasher("sha256");
|
|
1545
|
+
bodyHasher.update(params.body);
|
|
1546
|
+
const bodyHash = bodyHasher.digest("hex");
|
|
1547
|
+
|
|
1548
|
+
// --- Dedup check: same normalized_hash within window ---
|
|
1549
|
+
const dedupRow = db.prepare(`
|
|
1550
|
+
SELECT id, duplicate_count
|
|
1551
|
+
FROM documents
|
|
1552
|
+
WHERE active = 1
|
|
1553
|
+
AND collection = ?
|
|
1554
|
+
AND content_type = ?
|
|
1555
|
+
AND normalized_hash = ?
|
|
1556
|
+
AND datetime(created_at) >= datetime('now', ?)
|
|
1557
|
+
ORDER BY created_at DESC
|
|
1558
|
+
LIMIT 1
|
|
1559
|
+
`).get(
|
|
1560
|
+
params.collection,
|
|
1561
|
+
params.contentType,
|
|
1562
|
+
normHash,
|
|
1563
|
+
`-${DEDUP_WINDOW_MINUTES} minutes`
|
|
1564
|
+
) as { id: number; duplicate_count: number } | null;
|
|
1565
|
+
|
|
1566
|
+
if (dedupRow) {
|
|
1567
|
+
// Increment duplicate_count and update last_seen_at
|
|
1568
|
+
db.prepare(`
|
|
1569
|
+
UPDATE documents
|
|
1570
|
+
SET duplicate_count = duplicate_count + 1,
|
|
1571
|
+
last_seen_at = ?
|
|
1572
|
+
WHERE id = ?
|
|
1573
|
+
`).run(now, dedupRow.id);
|
|
1574
|
+
|
|
1575
|
+
return {
|
|
1576
|
+
action: 'deduplicated',
|
|
1577
|
+
docId: dedupRow.id,
|
|
1578
|
+
duplicateCount: dedupRow.duplicate_count + 1,
|
|
1579
|
+
};
|
|
1580
|
+
}
|
|
1581
|
+
|
|
1582
|
+
// --- Insert new document ---
|
|
1583
|
+
// Store content
|
|
1584
|
+
db.prepare(`INSERT OR IGNORE INTO content (hash, doc, created_at) VALUES (?, ?, ?)`)
|
|
1585
|
+
.run(bodyHash, params.body, now);
|
|
1586
|
+
|
|
1587
|
+
// Insert document row
|
|
1588
|
+
try {
|
|
1589
|
+
db.prepare(`
|
|
1590
|
+
INSERT INTO documents (collection, path, title, hash, created_at, modified_at, active,
|
|
1591
|
+
content_type, confidence, quality_score, normalized_hash,
|
|
1592
|
+
duplicate_count, revision_count, last_seen_at, topic_key)
|
|
1593
|
+
VALUES (?, ?, ?, ?, ?, ?, 1, ?, ?, ?, ?, 1, 1, ?, ?)
|
|
1594
|
+
`).run(
|
|
1595
|
+
params.collection,
|
|
1596
|
+
params.path,
|
|
1597
|
+
params.title,
|
|
1598
|
+
bodyHash,
|
|
1599
|
+
now,
|
|
1600
|
+
now,
|
|
1601
|
+
params.contentType,
|
|
1602
|
+
params.confidence ?? 0.5,
|
|
1603
|
+
params.qualityScore ?? 0.5,
|
|
1604
|
+
normHash,
|
|
1605
|
+
now,
|
|
1606
|
+
params.topicKey ?? null,
|
|
1607
|
+
);
|
|
1608
|
+
} catch (err: any) {
|
|
1609
|
+
// UNIQUE(collection, path) conflict — update existing row
|
|
1610
|
+
if (err?.message?.includes("UNIQUE constraint")) {
|
|
1611
|
+
const existing = db.prepare(
|
|
1612
|
+
"SELECT id FROM documents WHERE collection = ? AND path = ? AND active = 1"
|
|
1613
|
+
).get(params.collection, params.path) as { id: number } | null;
|
|
1614
|
+
|
|
1615
|
+
if (existing) {
|
|
1616
|
+
db.prepare(`
|
|
1617
|
+
UPDATE documents
|
|
1618
|
+
SET hash = ?, title = ?, modified_at = ?, content_type = ?,
|
|
1619
|
+
confidence = ?, quality_score = ?, normalized_hash = ?,
|
|
1620
|
+
revision_count = revision_count + 1, last_seen_at = ?
|
|
1621
|
+
WHERE id = ?
|
|
1622
|
+
`).run(
|
|
1623
|
+
bodyHash, params.title, now, params.contentType,
|
|
1624
|
+
params.confidence ?? 0.5, params.qualityScore ?? 0.5, normHash,
|
|
1625
|
+
now, existing.id
|
|
1626
|
+
);
|
|
1627
|
+
|
|
1628
|
+
const updated = db.prepare("SELECT revision_count FROM documents WHERE id = ?")
|
|
1629
|
+
.get(existing.id) as { revision_count: number } | null;
|
|
1630
|
+
|
|
1631
|
+
return {
|
|
1632
|
+
action: 'updated',
|
|
1633
|
+
docId: existing.id,
|
|
1634
|
+
revisionCount: updated?.revision_count ?? 1,
|
|
1635
|
+
};
|
|
1636
|
+
}
|
|
1637
|
+
}
|
|
1638
|
+
throw err;
|
|
1639
|
+
}
|
|
1640
|
+
|
|
1641
|
+
// Get the inserted row ID
|
|
1642
|
+
const newDoc = db.prepare(
|
|
1643
|
+
"SELECT id FROM documents WHERE collection = ? AND path = ? AND active = 1"
|
|
1644
|
+
).get(params.collection, params.path) as { id: number } | null;
|
|
1645
|
+
|
|
1646
|
+
return {
|
|
1647
|
+
action: 'inserted',
|
|
1648
|
+
docId: newDoc?.id ?? -1,
|
|
1649
|
+
};
|
|
1650
|
+
}
|
|
1651
|
+
|
|
1652
|
+
// =============================================================================
|
|
1653
|
+
// Engram Integration: Timeline
|
|
1654
|
+
// =============================================================================
|
|
1655
|
+
|
|
1656
|
+
export type TimelineEntry = {
|
|
1657
|
+
id: number;
|
|
1658
|
+
collection: string;
|
|
1659
|
+
path: string;
|
|
1660
|
+
title: string;
|
|
1661
|
+
contentType: string;
|
|
1662
|
+
modifiedAt: string;
|
|
1663
|
+
isFocus: boolean;
|
|
1664
|
+
};
|
|
1665
|
+
|
|
1666
|
+
export type TimelineResult = {
|
|
1667
|
+
focus: TimelineEntry;
|
|
1668
|
+
before: TimelineEntry[];
|
|
1669
|
+
after: TimelineEntry[];
|
|
1670
|
+
totalInRange: number;
|
|
1671
|
+
sessionId?: string;
|
|
1672
|
+
sessionSummary?: string;
|
|
1673
|
+
};
|
|
1674
|
+
|
|
1675
|
+
/**
|
|
1676
|
+
* Get the temporal neighborhood around a document.
|
|
1677
|
+
* Returns N documents before and after the focus, ordered by (modified_at, id).
|
|
1678
|
+
* Optionally constrained to the same collection (like Engram's session scoping).
|
|
1679
|
+
*/
|
|
1680
|
+
export function timeline(
|
|
1681
|
+
db: Database,
|
|
1682
|
+
docId: number,
|
|
1683
|
+
options?: { before?: number; after?: number; sameCollection?: boolean }
|
|
1684
|
+
): TimelineResult {
|
|
1685
|
+
const before = options?.before ?? 5;
|
|
1686
|
+
const after = options?.after ?? 5;
|
|
1687
|
+
const sameCollection = options?.sameCollection ?? false;
|
|
1688
|
+
|
|
1689
|
+
// 1. Get focus document
|
|
1690
|
+
const focusRow = db.prepare(`
|
|
1691
|
+
SELECT id, collection, path, title, content_type, modified_at
|
|
1692
|
+
FROM documents WHERE id = ? AND active = 1
|
|
1693
|
+
`).get(docId) as { id: number; collection: string; path: string; title: string; content_type: string; modified_at: string } | null;
|
|
1694
|
+
|
|
1695
|
+
if (!focusRow) {
|
|
1696
|
+
throw new Error(`Timeline: document #${docId} not found or inactive`);
|
|
1697
|
+
}
|
|
1698
|
+
|
|
1699
|
+
const focus: TimelineEntry = {
|
|
1700
|
+
id: focusRow.id,
|
|
1701
|
+
collection: focusRow.collection,
|
|
1702
|
+
path: focusRow.path,
|
|
1703
|
+
title: focusRow.title,
|
|
1704
|
+
contentType: focusRow.content_type,
|
|
1705
|
+
modifiedAt: focusRow.modified_at,
|
|
1706
|
+
isFocus: true,
|
|
1707
|
+
};
|
|
1708
|
+
|
|
1709
|
+
// 2. Build collection filter (split queries per Codex review — avoid OR NULL in WHERE)
|
|
1710
|
+
const collFilter = sameCollection ? "AND collection = ?" : "";
|
|
1711
|
+
const collArgs = sameCollection ? [focusRow.collection] : [];
|
|
1712
|
+
|
|
1713
|
+
// 3. Before: documents modified before focus, closest first, compound ordering
|
|
1714
|
+
const beforeRows = db.prepare(`
|
|
1715
|
+
SELECT id, collection, path, title, content_type, modified_at
|
|
1716
|
+
FROM documents
|
|
1717
|
+
WHERE active = 1
|
|
1718
|
+
AND (modified_at < ? OR (modified_at = ? AND id < ?))
|
|
1719
|
+
${collFilter}
|
|
1720
|
+
ORDER BY modified_at DESC, id DESC
|
|
1721
|
+
LIMIT ?
|
|
1722
|
+
`).all(focusRow.modified_at, focusRow.modified_at, focusRow.id, ...collArgs, before) as typeof focusRow[];
|
|
1723
|
+
|
|
1724
|
+
// Reverse to chronological order (oldest first)
|
|
1725
|
+
beforeRows.reverse();
|
|
1726
|
+
|
|
1727
|
+
// 4. After: documents modified after focus, closest first
|
|
1728
|
+
const afterRows = db.prepare(`
|
|
1729
|
+
SELECT id, collection, path, title, content_type, modified_at
|
|
1730
|
+
FROM documents
|
|
1731
|
+
WHERE active = 1
|
|
1732
|
+
AND (modified_at > ? OR (modified_at = ? AND id > ?))
|
|
1733
|
+
${collFilter}
|
|
1734
|
+
ORDER BY modified_at ASC, id ASC
|
|
1735
|
+
LIMIT ?
|
|
1736
|
+
`).all(focusRow.modified_at, focusRow.modified_at, focusRow.id, ...collArgs, after) as typeof focusRow[];
|
|
1737
|
+
|
|
1738
|
+
// 5. Count total in range (same collection or all)
|
|
1739
|
+
const countSql = sameCollection
|
|
1740
|
+
? "SELECT COUNT(*) as cnt FROM documents WHERE active = 1 AND collection = ?"
|
|
1741
|
+
: "SELECT COUNT(*) as cnt FROM documents WHERE active = 1";
|
|
1742
|
+
const countRow = (sameCollection
|
|
1743
|
+
? db.prepare(countSql).get(focusRow.collection)
|
|
1744
|
+
: db.prepare(countSql).get()
|
|
1745
|
+
) as { cnt: number };
|
|
1746
|
+
|
|
1747
|
+
// 6. Session correlation: check if focus falls within a tracked session
|
|
1748
|
+
const sessionRow = db.prepare(`
|
|
1749
|
+
SELECT session_id, summary FROM session_log
|
|
1750
|
+
WHERE started_at <= ? AND (ended_at IS NULL OR ended_at >= ?)
|
|
1751
|
+
LIMIT 1
|
|
1752
|
+
`).get(focusRow.modified_at, focusRow.modified_at) as { session_id: string; summary: string | null } | null;
|
|
1753
|
+
|
|
1754
|
+
const toEntry = (r: typeof focusRow): TimelineEntry => ({
|
|
1755
|
+
id: r.id, collection: r.collection, path: r.path,
|
|
1756
|
+
title: r.title, contentType: r.content_type,
|
|
1757
|
+
modifiedAt: r.modified_at, isFocus: false,
|
|
1758
|
+
});
|
|
1759
|
+
|
|
1760
|
+
return {
|
|
1761
|
+
focus,
|
|
1762
|
+
before: beforeRows.map(toEntry),
|
|
1763
|
+
after: afterRows.map(toEntry),
|
|
1764
|
+
totalInRange: countRow.cnt,
|
|
1765
|
+
sessionId: sessionRow?.session_id,
|
|
1766
|
+
sessionSummary: sessionRow?.summary ?? undefined,
|
|
1767
|
+
};
|
|
1768
|
+
}
|
|
1769
|
+
|
|
1770
|
+
/**
|
|
1771
|
+
* Find an active document by collection name and path.
|
|
1772
|
+
*/
|
|
1773
|
+
export function findActiveDocument(
|
|
1774
|
+
db: Database,
|
|
1775
|
+
collectionName: string,
|
|
1776
|
+
path: string
|
|
1777
|
+
): { id: number; hash: string; title: string; pinned: number; snoozed_until: string | null; confidence: number } | null {
|
|
1778
|
+
return db.prepare(`
|
|
1779
|
+
SELECT id, hash, title, pinned, snoozed_until, confidence FROM documents
|
|
1780
|
+
WHERE collection = ? AND path = ? AND active = 1
|
|
1781
|
+
`).get(collectionName, path) as { id: number; hash: string; title: string; pinned: number; snoozed_until: string | null; confidence: number } | null;
|
|
1782
|
+
}
|
|
1783
|
+
|
|
1784
|
+
/**
|
|
1785
|
+
* Find a document by collection and path, regardless of active status.
|
|
1786
|
+
* Used to detect inactive rows that block re-insertion (UNIQUE constraint).
|
|
1787
|
+
*/
|
|
1788
|
+
export function findAnyDocument(
|
|
1789
|
+
db: Database,
|
|
1790
|
+
collectionName: string,
|
|
1791
|
+
path: string
|
|
1792
|
+
): { id: number; hash: string; title: string; active: number } | null {
|
|
1793
|
+
return db.prepare(`
|
|
1794
|
+
SELECT id, hash, title, active FROM documents
|
|
1795
|
+
WHERE collection = ? AND path = ?
|
|
1796
|
+
`).get(collectionName, path) as { id: number; hash: string; title: string; active: number } | null;
|
|
1797
|
+
}
|
|
1798
|
+
|
|
1799
|
+
/**
|
|
1800
|
+
* Reactivate an inactive document with updated content.
|
|
1801
|
+
*/
|
|
1802
|
+
export function reactivateDocument(
|
|
1803
|
+
db: Database,
|
|
1804
|
+
documentId: number,
|
|
1805
|
+
title: string,
|
|
1806
|
+
hash: string,
|
|
1807
|
+
modifiedAt: string
|
|
1808
|
+
): void {
|
|
1809
|
+
db.prepare(`UPDATE documents SET active = 1, title = ?, hash = ?, modified_at = ? WHERE id = ?`)
|
|
1810
|
+
.run(title, hash, modifiedAt, documentId);
|
|
1811
|
+
}
|
|
1812
|
+
|
|
1813
|
+
/**
|
|
1814
|
+
* Update the title and modified_at timestamp for a document.
|
|
1815
|
+
*/
|
|
1816
|
+
export function updateDocumentTitle(
|
|
1817
|
+
db: Database,
|
|
1818
|
+
documentId: number,
|
|
1819
|
+
title: string,
|
|
1820
|
+
modifiedAt: string
|
|
1821
|
+
): void {
|
|
1822
|
+
db.prepare(`UPDATE documents SET title = ?, modified_at = ? WHERE id = ?`)
|
|
1823
|
+
.run(title, modifiedAt, documentId);
|
|
1824
|
+
}
|
|
1825
|
+
|
|
1826
|
+
/**
|
|
1827
|
+
* Update an existing document's hash, title, and modified_at timestamp.
|
|
1828
|
+
* Used when content changes but the file path stays the same.
|
|
1829
|
+
*/
|
|
1830
|
+
export function updateDocument(
|
|
1831
|
+
db: Database,
|
|
1832
|
+
documentId: number,
|
|
1833
|
+
title: string,
|
|
1834
|
+
hash: string,
|
|
1835
|
+
modifiedAt: string
|
|
1836
|
+
): void {
|
|
1837
|
+
db.prepare(`UPDATE documents SET title = ?, hash = ?, modified_at = ? WHERE id = ?`)
|
|
1838
|
+
.run(title, hash, modifiedAt, documentId);
|
|
1839
|
+
}
|
|
1840
|
+
|
|
1841
|
+
/**
|
|
1842
|
+
* Deactivate a document (mark as inactive but don't delete).
|
|
1843
|
+
*/
|
|
1844
|
+
export function deactivateDocument(db: Database, collectionName: string, path: string): void {
|
|
1845
|
+
db.prepare(`UPDATE documents SET active = 0 WHERE collection = ? AND path = ? AND active = 1`)
|
|
1846
|
+
.run(collectionName, path);
|
|
1847
|
+
}
|
|
1848
|
+
|
|
1849
|
+
/**
|
|
1850
|
+
* Get all active document paths for a collection.
|
|
1851
|
+
*/
|
|
1852
|
+
export function getActiveDocumentPaths(db: Database, collectionName: string): string[] {
|
|
1853
|
+
const rows = db.prepare(`
|
|
1854
|
+
SELECT path FROM documents WHERE collection = ? AND active = 1
|
|
1855
|
+
`).all(collectionName) as { path: string }[];
|
|
1856
|
+
return rows.map(r => r.path);
|
|
1857
|
+
}
|
|
1858
|
+
|
|
1859
|
+
export { formatQueryForEmbedding, formatDocForEmbedding };
|
|
1860
|
+
|
|
1861
|
+
export function chunkDocument(content: string, maxChars: number = CHUNK_SIZE_CHARS, overlapChars: number = CHUNK_OVERLAP_CHARS): { text: string; pos: number }[] {
|
|
1862
|
+
if (content.length <= maxChars) {
|
|
1863
|
+
return [{ text: content, pos: 0 }];
|
|
1864
|
+
}
|
|
1865
|
+
|
|
1866
|
+
const chunks: { text: string; pos: number }[] = [];
|
|
1867
|
+
let charPos = 0;
|
|
1868
|
+
|
|
1869
|
+
while (charPos < content.length) {
|
|
1870
|
+
// Calculate end position for this chunk
|
|
1871
|
+
let endPos = Math.min(charPos + maxChars, content.length);
|
|
1872
|
+
|
|
1873
|
+
// If not at the end, try to find a good break point
|
|
1874
|
+
if (endPos < content.length) {
|
|
1875
|
+
const slice = content.slice(charPos, endPos);
|
|
1876
|
+
|
|
1877
|
+
// Look for break points in the last 30% of the chunk
|
|
1878
|
+
const searchStart = Math.floor(slice.length * 0.7);
|
|
1879
|
+
const searchSlice = slice.slice(searchStart);
|
|
1880
|
+
|
|
1881
|
+
// Priority: paragraph > sentence > line > word
|
|
1882
|
+
let breakOffset = -1;
|
|
1883
|
+
const paragraphBreak = searchSlice.lastIndexOf('\n\n');
|
|
1884
|
+
if (paragraphBreak >= 0) {
|
|
1885
|
+
breakOffset = searchStart + paragraphBreak + 2;
|
|
1886
|
+
} else {
|
|
1887
|
+
const sentenceEnd = Math.max(
|
|
1888
|
+
searchSlice.lastIndexOf('. '),
|
|
1889
|
+
searchSlice.lastIndexOf('.\n'),
|
|
1890
|
+
searchSlice.lastIndexOf('? '),
|
|
1891
|
+
searchSlice.lastIndexOf('?\n'),
|
|
1892
|
+
searchSlice.lastIndexOf('! '),
|
|
1893
|
+
searchSlice.lastIndexOf('!\n')
|
|
1894
|
+
);
|
|
1895
|
+
if (sentenceEnd >= 0) {
|
|
1896
|
+
breakOffset = searchStart + sentenceEnd + 2;
|
|
1897
|
+
} else {
|
|
1898
|
+
const lineBreak = searchSlice.lastIndexOf('\n');
|
|
1899
|
+
if (lineBreak >= 0) {
|
|
1900
|
+
breakOffset = searchStart + lineBreak + 1;
|
|
1901
|
+
} else {
|
|
1902
|
+
const spaceBreak = searchSlice.lastIndexOf(' ');
|
|
1903
|
+
if (spaceBreak >= 0) {
|
|
1904
|
+
breakOffset = searchStart + spaceBreak + 1;
|
|
1905
|
+
}
|
|
1906
|
+
}
|
|
1907
|
+
}
|
|
1908
|
+
}
|
|
1909
|
+
|
|
1910
|
+
if (breakOffset > 0) {
|
|
1911
|
+
endPos = charPos + breakOffset;
|
|
1912
|
+
}
|
|
1913
|
+
}
|
|
1914
|
+
|
|
1915
|
+
// Ensure we make progress
|
|
1916
|
+
if (endPos <= charPos) {
|
|
1917
|
+
endPos = Math.min(charPos + maxChars, content.length);
|
|
1918
|
+
}
|
|
1919
|
+
|
|
1920
|
+
chunks.push({ text: content.slice(charPos, endPos), pos: charPos });
|
|
1921
|
+
|
|
1922
|
+
// Move forward, but overlap with previous chunk
|
|
1923
|
+
// For last chunk, don't overlap (just go to the end)
|
|
1924
|
+
if (endPos >= content.length) {
|
|
1925
|
+
break;
|
|
1926
|
+
}
|
|
1927
|
+
charPos = endPos - overlapChars;
|
|
1928
|
+
const lastChunkPos = chunks.at(-1)!.pos;
|
|
1929
|
+
if (charPos <= lastChunkPos) {
|
|
1930
|
+
// Prevent infinite loop - move forward at least a bit
|
|
1931
|
+
charPos = endPos;
|
|
1932
|
+
}
|
|
1933
|
+
}
|
|
1934
|
+
|
|
1935
|
+
return chunks;
|
|
1936
|
+
}
|
|
1937
|
+
|
|
1938
|
+
/**
|
|
1939
|
+
* Chunk a document by actual token count using the LLM tokenizer.
|
|
1940
|
+
* More accurate than character-based chunking but requires async.
|
|
1941
|
+
*/
|
|
1942
|
+
export async function chunkDocumentByTokens(
|
|
1943
|
+
content: string,
|
|
1944
|
+
maxTokens: number = CHUNK_SIZE_TOKENS,
|
|
1945
|
+
overlapTokens: number = CHUNK_OVERLAP_TOKENS
|
|
1946
|
+
): Promise<{ text: string; pos: number; tokens: number }[]> {
|
|
1947
|
+
const llm = getDefaultLlamaCpp();
|
|
1948
|
+
|
|
1949
|
+
// Tokenize once upfront
|
|
1950
|
+
const allTokens = await llm.tokenize(content);
|
|
1951
|
+
const totalTokens = allTokens.length;
|
|
1952
|
+
|
|
1953
|
+
if (totalTokens <= maxTokens) {
|
|
1954
|
+
return [{ text: content, pos: 0, tokens: totalTokens }];
|
|
1955
|
+
}
|
|
1956
|
+
|
|
1957
|
+
const chunks: { text: string; pos: number; tokens: number }[] = [];
|
|
1958
|
+
const step = maxTokens - overlapTokens;
|
|
1959
|
+
const avgCharsPerToken = content.length / totalTokens;
|
|
1960
|
+
let tokenPos = 0;
|
|
1961
|
+
|
|
1962
|
+
while (tokenPos < totalTokens) {
|
|
1963
|
+
const chunkEnd = Math.min(tokenPos + maxTokens, totalTokens);
|
|
1964
|
+
const chunkTokens = allTokens.slice(tokenPos, chunkEnd);
|
|
1965
|
+
let chunkText = await llm.detokenize(chunkTokens);
|
|
1966
|
+
|
|
1967
|
+
// Find a good break point if not at end of document
|
|
1968
|
+
if (chunkEnd < totalTokens) {
|
|
1969
|
+
const searchStart = Math.floor(chunkText.length * 0.7);
|
|
1970
|
+
const searchSlice = chunkText.slice(searchStart);
|
|
1971
|
+
|
|
1972
|
+
let breakOffset = -1;
|
|
1973
|
+
const paragraphBreak = searchSlice.lastIndexOf('\n\n');
|
|
1974
|
+
if (paragraphBreak >= 0) {
|
|
1975
|
+
breakOffset = paragraphBreak + 2;
|
|
1976
|
+
} else {
|
|
1977
|
+
const sentenceEnd = Math.max(
|
|
1978
|
+
searchSlice.lastIndexOf('. '),
|
|
1979
|
+
searchSlice.lastIndexOf('.\n'),
|
|
1980
|
+
searchSlice.lastIndexOf('? '),
|
|
1981
|
+
searchSlice.lastIndexOf('?\n'),
|
|
1982
|
+
searchSlice.lastIndexOf('! '),
|
|
1983
|
+
searchSlice.lastIndexOf('!\n')
|
|
1984
|
+
);
|
|
1985
|
+
if (sentenceEnd >= 0) {
|
|
1986
|
+
breakOffset = sentenceEnd + 2;
|
|
1987
|
+
} else {
|
|
1988
|
+
const lineBreak = searchSlice.lastIndexOf('\n');
|
|
1989
|
+
if (lineBreak >= 0) {
|
|
1990
|
+
breakOffset = lineBreak + 1;
|
|
1991
|
+
}
|
|
1992
|
+
}
|
|
1993
|
+
}
|
|
1994
|
+
|
|
1995
|
+
if (breakOffset >= 0) {
|
|
1996
|
+
chunkText = chunkText.slice(0, searchStart + breakOffset);
|
|
1997
|
+
}
|
|
1998
|
+
}
|
|
1999
|
+
|
|
2000
|
+
// Approximate character position based on token position
|
|
2001
|
+
const charPos = Math.floor(tokenPos * avgCharsPerToken);
|
|
2002
|
+
chunks.push({ text: chunkText, pos: charPos, tokens: chunkTokens.length });
|
|
2003
|
+
|
|
2004
|
+
// Move forward
|
|
2005
|
+
if (chunkEnd >= totalTokens) break;
|
|
2006
|
+
|
|
2007
|
+
// Advance by step tokens (maxTokens - overlap)
|
|
2008
|
+
tokenPos += step;
|
|
2009
|
+
}
|
|
2010
|
+
|
|
2011
|
+
return chunks;
|
|
2012
|
+
}
|
|
2013
|
+
|
|
2014
|
+
// =============================================================================
|
|
2015
|
+
// Fuzzy matching
|
|
2016
|
+
// =============================================================================
|
|
2017
|
+
|
|
2018
|
+
function levenshtein(a: string, b: string): number {
|
|
2019
|
+
const m = a.length, n = b.length;
|
|
2020
|
+
if (m === 0) return n;
|
|
2021
|
+
if (n === 0) return m;
|
|
2022
|
+
const dp: number[][] = Array.from({ length: m + 1 }, () => Array(n + 1).fill(0));
|
|
2023
|
+
for (let i = 0; i <= m; i++) dp[i]![0] = i;
|
|
2024
|
+
for (let j = 0; j <= n; j++) dp[0]![j] = j;
|
|
2025
|
+
for (let i = 1; i <= m; i++) {
|
|
2026
|
+
for (let j = 1; j <= n; j++) {
|
|
2027
|
+
const cost = a[i - 1] === b[j - 1] ? 0 : 1;
|
|
2028
|
+
dp[i]![j] = Math.min(
|
|
2029
|
+
dp[i - 1]![j]! + 1,
|
|
2030
|
+
dp[i]![j - 1]! + 1,
|
|
2031
|
+
dp[i - 1]![j - 1]! + cost
|
|
2032
|
+
);
|
|
2033
|
+
}
|
|
2034
|
+
}
|
|
2035
|
+
return dp[m]![n]!;
|
|
2036
|
+
}
|
|
2037
|
+
|
|
2038
|
+
/**
|
|
2039
|
+
* Find a document by its short docid (first 6 characters of hash).
|
|
2040
|
+
* Returns the document's virtual path if found, null otherwise.
|
|
2041
|
+
* If multiple documents match the same short hash (collision), returns the first one.
|
|
2042
|
+
*/
|
|
2043
|
+
export function findDocumentByDocid(db: Database, docid: string): { filepath: string; hash: string } | null {
|
|
2044
|
+
// Normalize: remove leading # if present
|
|
2045
|
+
const shortHash = docid.startsWith('#') ? docid.slice(1) : docid;
|
|
2046
|
+
|
|
2047
|
+
if (shortHash.length < 1) return null;
|
|
2048
|
+
|
|
2049
|
+
// Look up documents where hash starts with the short hash
|
|
2050
|
+
const doc = db.prepare(`
|
|
2051
|
+
SELECT 'clawmem://' || d.collection || '/' || d.path as filepath, d.hash
|
|
2052
|
+
FROM documents d
|
|
2053
|
+
WHERE d.hash LIKE ? AND d.active = 1
|
|
2054
|
+
LIMIT 1
|
|
2055
|
+
`).get(`${shortHash}%`) as { filepath: string; hash: string } | null;
|
|
2056
|
+
|
|
2057
|
+
return doc;
|
|
2058
|
+
}
|
|
2059
|
+
|
|
2060
|
+
export function findSimilarFiles(db: Database, query: string, maxDistance: number = 3, limit: number = 5): string[] {
|
|
2061
|
+
const allFiles = db.prepare(`
|
|
2062
|
+
SELECT d.path
|
|
2063
|
+
FROM documents d
|
|
2064
|
+
WHERE d.active = 1
|
|
2065
|
+
`).all() as { path: string }[];
|
|
2066
|
+
const queryLower = query.toLowerCase();
|
|
2067
|
+
const scored = allFiles
|
|
2068
|
+
.map(f => ({ path: f.path, dist: levenshtein(f.path.toLowerCase(), queryLower) }))
|
|
2069
|
+
.filter(f => f.dist <= maxDistance)
|
|
2070
|
+
.sort((a, b) => a.dist - b.dist)
|
|
2071
|
+
.slice(0, limit);
|
|
2072
|
+
return scored.map(f => f.path);
|
|
2073
|
+
}
|
|
2074
|
+
|
|
2075
|
+
export function matchFilesByGlob(db: Database, pattern: string): { filepath: string; displayPath: string; bodyLength: number }[] {
|
|
2076
|
+
const allFiles = db.prepare(`
|
|
2077
|
+
SELECT
|
|
2078
|
+
'clawmem://' || d.collection || '/' || d.path as virtual_path,
|
|
2079
|
+
LENGTH(content.doc) as body_length,
|
|
2080
|
+
d.path,
|
|
2081
|
+
d.collection
|
|
2082
|
+
FROM documents d
|
|
2083
|
+
JOIN content ON content.hash = d.hash
|
|
2084
|
+
WHERE d.active = 1
|
|
2085
|
+
`).all() as { virtual_path: string; body_length: number; path: string; collection: string }[];
|
|
2086
|
+
|
|
2087
|
+
const glob = new Glob(pattern);
|
|
2088
|
+
return allFiles
|
|
2089
|
+
.filter(f => glob.match(f.virtual_path) || glob.match(f.path))
|
|
2090
|
+
.map(f => ({
|
|
2091
|
+
filepath: f.virtual_path, // Virtual path for precise lookup
|
|
2092
|
+
displayPath: f.path, // Relative path for display
|
|
2093
|
+
bodyLength: f.body_length
|
|
2094
|
+
}));
|
|
2095
|
+
}
|
|
2096
|
+
|
|
2097
|
+
// =============================================================================
|
|
2098
|
+
// Context
|
|
2099
|
+
// =============================================================================
|
|
2100
|
+
|
|
2101
|
+
/**
|
|
2102
|
+
* Get context for a file path using hierarchical inheritance.
|
|
2103
|
+
* Contexts are collection-scoped and inherit from parent directories.
|
|
2104
|
+
* For example, context at "/talks" applies to "/talks/2024/keynote.md".
|
|
2105
|
+
*
|
|
2106
|
+
* @param db Database instance (unused - kept for compatibility)
|
|
2107
|
+
* @param collectionName Collection name
|
|
2108
|
+
* @param path Relative path within the collection
|
|
2109
|
+
* @returns Context string or null if no context is defined
|
|
2110
|
+
*/
|
|
2111
|
+
export function getContextForPath(db: Database, collectionName: string, path: string): string | null {
|
|
2112
|
+
const config = collectionsLoadConfig();
|
|
2113
|
+
const coll = getCollection(collectionName);
|
|
2114
|
+
|
|
2115
|
+
if (!coll) return null;
|
|
2116
|
+
|
|
2117
|
+
// Collect ALL matching contexts (global + all path prefixes)
|
|
2118
|
+
const contexts: string[] = [];
|
|
2119
|
+
|
|
2120
|
+
// Add global context if present
|
|
2121
|
+
if (config.global_context) {
|
|
2122
|
+
contexts.push(config.global_context);
|
|
2123
|
+
}
|
|
2124
|
+
|
|
2125
|
+
// Add all matching path contexts (from most general to most specific)
|
|
2126
|
+
if (coll.context) {
|
|
2127
|
+
const normalizedPath = path.startsWith("/") ? path : `/${path}`;
|
|
2128
|
+
|
|
2129
|
+
// Collect all matching prefixes
|
|
2130
|
+
const matchingContexts: { prefix: string; context: string }[] = [];
|
|
2131
|
+
for (const [prefix, context] of Object.entries(coll.context)) {
|
|
2132
|
+
const normalizedPrefix = prefix.startsWith("/") ? prefix : `/${prefix}`;
|
|
2133
|
+
if (normalizedPath.startsWith(normalizedPrefix)) {
|
|
2134
|
+
matchingContexts.push({ prefix: normalizedPrefix, context });
|
|
2135
|
+
}
|
|
2136
|
+
}
|
|
2137
|
+
|
|
2138
|
+
// Sort by prefix length (shortest/most general first)
|
|
2139
|
+
matchingContexts.sort((a, b) => a.prefix.length - b.prefix.length);
|
|
2140
|
+
|
|
2141
|
+
// Add all matching contexts
|
|
2142
|
+
for (const match of matchingContexts) {
|
|
2143
|
+
contexts.push(match.context);
|
|
2144
|
+
}
|
|
2145
|
+
}
|
|
2146
|
+
|
|
2147
|
+
// Join all contexts with double newline
|
|
2148
|
+
return contexts.length > 0 ? contexts.join('\n\n') : null;
|
|
2149
|
+
}
|
|
2150
|
+
|
|
2151
|
+
/**
|
|
2152
|
+
* Get context for a file path (virtual or filesystem).
|
|
2153
|
+
* Resolves the collection and relative path using the YAML collections config.
|
|
2154
|
+
*/
|
|
2155
|
+
export function getContextForFile(db: Database, filepath: string): string | null {
|
|
2156
|
+
// Handle undefined or null filepath
|
|
2157
|
+
if (!filepath) return null;
|
|
2158
|
+
|
|
2159
|
+
// Get all collections from YAML config
|
|
2160
|
+
const collections = collectionsListCollections();
|
|
2161
|
+
const config = collectionsLoadConfig();
|
|
2162
|
+
|
|
2163
|
+
// Parse virtual path format: clawmem://collection/path
|
|
2164
|
+
let collectionName: string | null = null;
|
|
2165
|
+
let relativePath: string | null = null;
|
|
2166
|
+
|
|
2167
|
+
const parsedVirtual = filepath.startsWith('clawmem://') ? parseVirtualPath(filepath) : null;
|
|
2168
|
+
if (parsedVirtual) {
|
|
2169
|
+
collectionName = parsedVirtual.collectionName;
|
|
2170
|
+
relativePath = parsedVirtual.path;
|
|
2171
|
+
} else {
|
|
2172
|
+
// Filesystem path: find which collection this absolute path belongs to
|
|
2173
|
+
for (const coll of collections) {
|
|
2174
|
+
// Skip collections with missing paths
|
|
2175
|
+
if (!coll || !coll.path) continue;
|
|
2176
|
+
|
|
2177
|
+
if (filepath.startsWith(coll.path + '/') || filepath === coll.path) {
|
|
2178
|
+
collectionName = coll.name;
|
|
2179
|
+
// Extract relative path
|
|
2180
|
+
relativePath = filepath.startsWith(coll.path + '/')
|
|
2181
|
+
? filepath.slice(coll.path.length + 1)
|
|
2182
|
+
: '';
|
|
2183
|
+
break;
|
|
2184
|
+
}
|
|
2185
|
+
}
|
|
2186
|
+
|
|
2187
|
+
if (!collectionName || relativePath === null) return null;
|
|
2188
|
+
}
|
|
2189
|
+
|
|
2190
|
+
// Get the collection from config
|
|
2191
|
+
const coll = getCollection(collectionName);
|
|
2192
|
+
if (!coll) return null;
|
|
2193
|
+
|
|
2194
|
+
// Verify this document exists in the database
|
|
2195
|
+
const doc = db.prepare(`
|
|
2196
|
+
SELECT d.path
|
|
2197
|
+
FROM documents d
|
|
2198
|
+
WHERE d.collection = ? AND d.path = ? AND d.active = 1
|
|
2199
|
+
LIMIT 1
|
|
2200
|
+
`).get(collectionName, relativePath) as { path: string } | null;
|
|
2201
|
+
|
|
2202
|
+
if (!doc) return null;
|
|
2203
|
+
|
|
2204
|
+
// Collect ALL matching contexts (global + all path prefixes)
|
|
2205
|
+
const contexts: string[] = [];
|
|
2206
|
+
|
|
2207
|
+
// Add global context if present
|
|
2208
|
+
if (config.global_context) {
|
|
2209
|
+
contexts.push(config.global_context);
|
|
2210
|
+
}
|
|
2211
|
+
|
|
2212
|
+
// Add all matching path contexts (from most general to most specific)
|
|
2213
|
+
if (coll.context) {
|
|
2214
|
+
const normalizedPath = relativePath.startsWith("/") ? relativePath : `/${relativePath}`;
|
|
2215
|
+
|
|
2216
|
+
// Collect all matching prefixes
|
|
2217
|
+
const matchingContexts: { prefix: string; context: string }[] = [];
|
|
2218
|
+
for (const [prefix, context] of Object.entries(coll.context)) {
|
|
2219
|
+
const normalizedPrefix = prefix.startsWith("/") ? prefix : `/${prefix}`;
|
|
2220
|
+
if (normalizedPath.startsWith(normalizedPrefix)) {
|
|
2221
|
+
matchingContexts.push({ prefix: normalizedPrefix, context });
|
|
2222
|
+
}
|
|
2223
|
+
}
|
|
2224
|
+
|
|
2225
|
+
// Sort by prefix length (shortest/most general first)
|
|
2226
|
+
matchingContexts.sort((a, b) => a.prefix.length - b.prefix.length);
|
|
2227
|
+
|
|
2228
|
+
// Add all matching contexts
|
|
2229
|
+
for (const match of matchingContexts) {
|
|
2230
|
+
contexts.push(match.context);
|
|
2231
|
+
}
|
|
2232
|
+
}
|
|
2233
|
+
|
|
2234
|
+
// Join all contexts with double newline
|
|
2235
|
+
return contexts.length > 0 ? contexts.join('\n\n') : null;
|
|
2236
|
+
}
|
|
2237
|
+
|
|
2238
|
+
/**
|
|
2239
|
+
* Get collection by name from YAML config.
|
|
2240
|
+
* Returns collection metadata from ~/.config/qmd/index.yml
|
|
2241
|
+
*/
|
|
2242
|
+
export function getCollectionByName(db: Database, name: string): { name: string; pwd: string; glob_pattern: string } | null {
|
|
2243
|
+
const collection = getCollection(name);
|
|
2244
|
+
if (!collection) return null;
|
|
2245
|
+
|
|
2246
|
+
return {
|
|
2247
|
+
name: collection.name,
|
|
2248
|
+
pwd: collection.path,
|
|
2249
|
+
glob_pattern: collection.pattern,
|
|
2250
|
+
};
|
|
2251
|
+
}
|
|
2252
|
+
|
|
2253
|
+
/**
|
|
2254
|
+
* List all collections with document counts from database.
|
|
2255
|
+
* Merges YAML config with database statistics.
|
|
2256
|
+
*/
|
|
2257
|
+
export function listCollections(db: Database): { name: string; pwd: string; glob_pattern: string; doc_count: number; active_count: number; last_modified: string | null }[] {
|
|
2258
|
+
const collections = collectionsListCollections();
|
|
2259
|
+
|
|
2260
|
+
// Get document counts from database for each collection
|
|
2261
|
+
const result = collections.map(coll => {
|
|
2262
|
+
const stats = db.prepare(`
|
|
2263
|
+
SELECT
|
|
2264
|
+
COUNT(d.id) as doc_count,
|
|
2265
|
+
SUM(CASE WHEN d.active = 1 THEN 1 ELSE 0 END) as active_count,
|
|
2266
|
+
MAX(d.modified_at) as last_modified
|
|
2267
|
+
FROM documents d
|
|
2268
|
+
WHERE d.collection = ?
|
|
2269
|
+
`).get(coll.name) as { doc_count: number; active_count: number; last_modified: string | null } | null;
|
|
2270
|
+
|
|
2271
|
+
return {
|
|
2272
|
+
name: coll.name,
|
|
2273
|
+
pwd: coll.path,
|
|
2274
|
+
glob_pattern: coll.pattern,
|
|
2275
|
+
doc_count: stats?.doc_count || 0,
|
|
2276
|
+
active_count: stats?.active_count || 0,
|
|
2277
|
+
last_modified: stats?.last_modified || null,
|
|
2278
|
+
};
|
|
2279
|
+
});
|
|
2280
|
+
|
|
2281
|
+
return result;
|
|
2282
|
+
}
|
|
2283
|
+
|
|
2284
|
+
/**
|
|
2285
|
+
* Remove a collection and clean up its documents.
|
|
2286
|
+
* Uses collections.ts to remove from YAML config and cleans up database.
|
|
2287
|
+
*/
|
|
2288
|
+
export function removeCollection(db: Database, collectionName: string): { deletedDocs: number; cleanedHashes: number } {
|
|
2289
|
+
// Delete documents from database
|
|
2290
|
+
const docResult = db.prepare(`DELETE FROM documents WHERE collection = ?`).run(collectionName);
|
|
2291
|
+
|
|
2292
|
+
// Clean up orphaned content hashes
|
|
2293
|
+
const cleanupResult = db.prepare(`
|
|
2294
|
+
DELETE FROM content
|
|
2295
|
+
WHERE hash NOT IN (SELECT DISTINCT hash FROM documents WHERE active = 1)
|
|
2296
|
+
`).run();
|
|
2297
|
+
|
|
2298
|
+
// Remove from YAML config (returns true if found and removed)
|
|
2299
|
+
collectionsRemoveCollection(collectionName);
|
|
2300
|
+
|
|
2301
|
+
return {
|
|
2302
|
+
deletedDocs: docResult.changes,
|
|
2303
|
+
cleanedHashes: cleanupResult.changes
|
|
2304
|
+
};
|
|
2305
|
+
}
|
|
2306
|
+
|
|
2307
|
+
/**
|
|
2308
|
+
* Rename a collection.
|
|
2309
|
+
* Updates both YAML config and database documents table.
|
|
2310
|
+
*/
|
|
2311
|
+
export function renameCollection(db: Database, oldName: string, newName: string): void {
|
|
2312
|
+
// Update all documents with the new collection name in database
|
|
2313
|
+
db.prepare(`UPDATE documents SET collection = ? WHERE collection = ?`)
|
|
2314
|
+
.run(newName, oldName);
|
|
2315
|
+
|
|
2316
|
+
// Rename in YAML config
|
|
2317
|
+
collectionsRenameCollection(oldName, newName);
|
|
2318
|
+
}
|
|
2319
|
+
|
|
2320
|
+
// =============================================================================
|
|
2321
|
+
// Context Management Operations
|
|
2322
|
+
// =============================================================================
|
|
2323
|
+
|
|
2324
|
+
/**
|
|
2325
|
+
* Delete a context for a specific collection and path prefix.
|
|
2326
|
+
* Returns the number of contexts deleted.
|
|
2327
|
+
*/
|
|
2328
|
+
export function deleteContext(db: Database, collectionName: string, pathPrefix: string): number {
|
|
2329
|
+
// Use collections.ts to remove context
|
|
2330
|
+
const success = collectionsRemoveContext(collectionName, pathPrefix);
|
|
2331
|
+
return success ? 1 : 0;
|
|
2332
|
+
}
|
|
2333
|
+
|
|
2334
|
+
/**
|
|
2335
|
+
* Delete all global contexts (contexts with empty path_prefix).
|
|
2336
|
+
* Returns the number of contexts deleted.
|
|
2337
|
+
*/
|
|
2338
|
+
export function deleteGlobalContexts(db: Database): number {
|
|
2339
|
+
let deletedCount = 0;
|
|
2340
|
+
|
|
2341
|
+
// Remove global context
|
|
2342
|
+
setGlobalContext(undefined);
|
|
2343
|
+
deletedCount++;
|
|
2344
|
+
|
|
2345
|
+
// Remove root context (empty string) from all collections
|
|
2346
|
+
const collections = collectionsListCollections();
|
|
2347
|
+
for (const coll of collections) {
|
|
2348
|
+
const success = collectionsRemoveContext(coll.name, '');
|
|
2349
|
+
if (success) {
|
|
2350
|
+
deletedCount++;
|
|
2351
|
+
}
|
|
2352
|
+
}
|
|
2353
|
+
|
|
2354
|
+
return deletedCount;
|
|
2355
|
+
}
|
|
2356
|
+
|
|
2357
|
+
/**
|
|
2358
|
+
* List all contexts, grouped by collection.
|
|
2359
|
+
* Returns contexts ordered by collection name, then by path prefix length (longest first).
|
|
2360
|
+
*/
|
|
2361
|
+
export function listPathContexts(db: Database): { collection_name: string; path_prefix: string; context: string }[] {
|
|
2362
|
+
const allContexts = collectionsListAllContexts();
|
|
2363
|
+
|
|
2364
|
+
// Convert to expected format and sort
|
|
2365
|
+
return allContexts.map(ctx => ({
|
|
2366
|
+
collection_name: ctx.collection,
|
|
2367
|
+
path_prefix: ctx.path,
|
|
2368
|
+
context: ctx.context,
|
|
2369
|
+
})).sort((a, b) => {
|
|
2370
|
+
// Sort by collection name first
|
|
2371
|
+
if (a.collection_name !== b.collection_name) {
|
|
2372
|
+
return a.collection_name.localeCompare(b.collection_name);
|
|
2373
|
+
}
|
|
2374
|
+
// Then by path prefix length (longest first)
|
|
2375
|
+
if (a.path_prefix.length !== b.path_prefix.length) {
|
|
2376
|
+
return b.path_prefix.length - a.path_prefix.length;
|
|
2377
|
+
}
|
|
2378
|
+
// Then alphabetically
|
|
2379
|
+
return a.path_prefix.localeCompare(b.path_prefix);
|
|
2380
|
+
});
|
|
2381
|
+
}
|
|
2382
|
+
|
|
2383
|
+
/**
|
|
2384
|
+
* Get all collections (name only - from YAML config).
|
|
2385
|
+
*/
|
|
2386
|
+
export function getAllCollections(db: Database): { name: string }[] {
|
|
2387
|
+
const collections = collectionsListCollections();
|
|
2388
|
+
return collections.map(c => ({ name: c.name }));
|
|
2389
|
+
}
|
|
2390
|
+
|
|
2391
|
+
/**
|
|
2392
|
+
* Check which collections don't have any context defined.
|
|
2393
|
+
* Returns collections that have no context entries at all (not even root context).
|
|
2394
|
+
*/
|
|
2395
|
+
export function getCollectionsWithoutContext(db: Database): { name: string; pwd: string; doc_count: number }[] {
|
|
2396
|
+
// Get all collections from YAML config
|
|
2397
|
+
const yamlCollections = collectionsListCollections();
|
|
2398
|
+
|
|
2399
|
+
// Filter to those without context
|
|
2400
|
+
const collectionsWithoutContext: { name: string; pwd: string; doc_count: number }[] = [];
|
|
2401
|
+
|
|
2402
|
+
for (const coll of yamlCollections) {
|
|
2403
|
+
// Check if collection has any context
|
|
2404
|
+
if (!coll.context || Object.keys(coll.context).length === 0) {
|
|
2405
|
+
// Get doc count from database
|
|
2406
|
+
const stats = db.prepare(`
|
|
2407
|
+
SELECT COUNT(d.id) as doc_count
|
|
2408
|
+
FROM documents d
|
|
2409
|
+
WHERE d.collection = ? AND d.active = 1
|
|
2410
|
+
`).get(coll.name) as { doc_count: number } | null;
|
|
2411
|
+
|
|
2412
|
+
collectionsWithoutContext.push({
|
|
2413
|
+
name: coll.name,
|
|
2414
|
+
pwd: coll.path,
|
|
2415
|
+
doc_count: stats?.doc_count || 0,
|
|
2416
|
+
});
|
|
2417
|
+
}
|
|
2418
|
+
}
|
|
2419
|
+
|
|
2420
|
+
return collectionsWithoutContext.sort((a, b) => a.name.localeCompare(b.name));
|
|
2421
|
+
}
|
|
2422
|
+
|
|
2423
|
+
/**
|
|
2424
|
+
* Get top-level directories in a collection that don't have context.
|
|
2425
|
+
* Useful for suggesting where context might be needed.
|
|
2426
|
+
*/
|
|
2427
|
+
export function getTopLevelPathsWithoutContext(db: Database, collectionName: string): string[] {
|
|
2428
|
+
// Get all paths in the collection from database
|
|
2429
|
+
const paths = db.prepare(`
|
|
2430
|
+
SELECT DISTINCT path FROM documents
|
|
2431
|
+
WHERE collection = ? AND active = 1
|
|
2432
|
+
`).all(collectionName) as { path: string }[];
|
|
2433
|
+
|
|
2434
|
+
// Get existing contexts for this collection from YAML
|
|
2435
|
+
const yamlColl = getCollection(collectionName);
|
|
2436
|
+
if (!yamlColl) return [];
|
|
2437
|
+
|
|
2438
|
+
const contextPrefixes = new Set<string>();
|
|
2439
|
+
if (yamlColl.context) {
|
|
2440
|
+
for (const prefix of Object.keys(yamlColl.context)) {
|
|
2441
|
+
contextPrefixes.add(prefix);
|
|
2442
|
+
}
|
|
2443
|
+
}
|
|
2444
|
+
|
|
2445
|
+
// Extract top-level directories (first path component)
|
|
2446
|
+
const topLevelDirs = new Set<string>();
|
|
2447
|
+
for (const { path } of paths) {
|
|
2448
|
+
const parts = path.split('/').filter(Boolean);
|
|
2449
|
+
if (parts.length > 1) {
|
|
2450
|
+
const dir = parts[0];
|
|
2451
|
+
if (dir) topLevelDirs.add(dir);
|
|
2452
|
+
}
|
|
2453
|
+
}
|
|
2454
|
+
|
|
2455
|
+
// Filter out directories that already have context (exact or parent)
|
|
2456
|
+
const missing: string[] = [];
|
|
2457
|
+
for (const dir of topLevelDirs) {
|
|
2458
|
+
let hasContext = false;
|
|
2459
|
+
|
|
2460
|
+
// Check if this dir or any parent has context
|
|
2461
|
+
for (const prefix of contextPrefixes) {
|
|
2462
|
+
if (prefix === '' || prefix === dir || dir.startsWith(prefix + '/')) {
|
|
2463
|
+
hasContext = true;
|
|
2464
|
+
break;
|
|
2465
|
+
}
|
|
2466
|
+
}
|
|
2467
|
+
|
|
2468
|
+
if (!hasContext) {
|
|
2469
|
+
missing.push(dir);
|
|
2470
|
+
}
|
|
2471
|
+
}
|
|
2472
|
+
|
|
2473
|
+
return missing.sort();
|
|
2474
|
+
}
|
|
2475
|
+
|
|
2476
|
+
// =============================================================================
|
|
2477
|
+
// FTS Search
|
|
2478
|
+
// =============================================================================
|
|
2479
|
+
|
|
2480
|
+
function sanitizeFTS5Term(term: string): string {
|
|
2481
|
+
return term.replace(/[^\p{L}\p{N}']/gu, '').toLowerCase();
|
|
2482
|
+
}
|
|
2483
|
+
|
|
2484
|
+
function buildFTS5Query(query: string): string | null {
|
|
2485
|
+
const terms = query.split(/\s+/)
|
|
2486
|
+
.map(t => sanitizeFTS5Term(t))
|
|
2487
|
+
.filter(t => t.length > 0);
|
|
2488
|
+
if (terms.length === 0) return null;
|
|
2489
|
+
if (terms.length === 1) return `"${terms[0]}"*`;
|
|
2490
|
+
return terms.map(t => `"${t}"*`).join(' AND ');
|
|
2491
|
+
}
|
|
2492
|
+
|
|
2493
|
+
export function searchFTS(db: Database, query: string, limit: number = 20, collectionId?: number, collections?: string[]): SearchResult[] {
|
|
2494
|
+
const ftsQuery = buildFTS5Query(query);
|
|
2495
|
+
if (!ftsQuery) return [];
|
|
2496
|
+
|
|
2497
|
+
let sql = `
|
|
2498
|
+
SELECT
|
|
2499
|
+
'clawmem://' || d.collection || '/' || d.path as filepath,
|
|
2500
|
+
d.collection || '/' || d.path as display_path,
|
|
2501
|
+
d.title,
|
|
2502
|
+
content.doc as body,
|
|
2503
|
+
d.hash,
|
|
2504
|
+
d.modified_at,
|
|
2505
|
+
bm25(documents_fts, 10.0, 1.0) as bm25_score
|
|
2506
|
+
FROM documents_fts f
|
|
2507
|
+
JOIN documents d ON d.id = f.rowid
|
|
2508
|
+
JOIN content ON content.hash = d.hash
|
|
2509
|
+
WHERE documents_fts MATCH ? AND d.active = 1
|
|
2510
|
+
`;
|
|
2511
|
+
const params: (string | number)[] = [ftsQuery];
|
|
2512
|
+
|
|
2513
|
+
if (collections && collections.length > 0) {
|
|
2514
|
+
// SQL-level collection filtering — avoids full-table scan + post-filter
|
|
2515
|
+
const placeholders = collections.map(() => '?').join(',');
|
|
2516
|
+
sql += ` AND d.collection IN (${placeholders})`;
|
|
2517
|
+
params.push(...collections);
|
|
2518
|
+
} else if (collectionId) {
|
|
2519
|
+
// Legacy parameter — kept for backward compatibility
|
|
2520
|
+
sql += ` AND d.collection = ?`;
|
|
2521
|
+
params.push(String(collectionId));
|
|
2522
|
+
}
|
|
2523
|
+
|
|
2524
|
+
// bm25 lower is better; sort ascending.
|
|
2525
|
+
sql += ` ORDER BY bm25_score ASC LIMIT ?`;
|
|
2526
|
+
params.push(limit);
|
|
2527
|
+
|
|
2528
|
+
const rows = db.prepare(sql).all(...params) as { filepath: string; display_path: string; title: string; body: string; hash: string; modified_at: string; bm25_score: number }[];
|
|
2529
|
+
return rows.map(row => {
|
|
2530
|
+
const collectionName = row.filepath.split('//')[1]?.split('/')[0] || "";
|
|
2531
|
+
// Convert bm25 (lower is better) into a stable (0..1] score where higher is better.
|
|
2532
|
+
// Avoid per-query normalization so "strong signal" heuristics can work.
|
|
2533
|
+
const score = 1 / (1 + Math.max(0, row.bm25_score));
|
|
2534
|
+
return {
|
|
2535
|
+
filepath: row.filepath,
|
|
2536
|
+
displayPath: row.display_path,
|
|
2537
|
+
title: row.title,
|
|
2538
|
+
hash: row.hash,
|
|
2539
|
+
docid: getDocid(row.hash),
|
|
2540
|
+
collectionName,
|
|
2541
|
+
modifiedAt: row.modified_at || "",
|
|
2542
|
+
bodyLength: row.body.length,
|
|
2543
|
+
body: row.body,
|
|
2544
|
+
context: getContextForFile(db, row.filepath),
|
|
2545
|
+
score,
|
|
2546
|
+
source: "fts" as const,
|
|
2547
|
+
};
|
|
2548
|
+
});
|
|
2549
|
+
}
|
|
2550
|
+
|
|
2551
|
+
// =============================================================================
|
|
2552
|
+
// Vector Search
|
|
2553
|
+
// =============================================================================
|
|
2554
|
+
|
|
2555
|
+
export async function searchVec(db: Database, query: string, model: string, limit: number = 20, collectionId?: number, collections?: string[]): Promise<SearchResult[]> {
|
|
2556
|
+
const tableExists = db.prepare(`SELECT name FROM sqlite_master WHERE type='table' AND name='vectors_vec'`).get();
|
|
2557
|
+
if (!tableExists) return [];
|
|
2558
|
+
|
|
2559
|
+
const embedding = await getEmbedding(query, model, true);
|
|
2560
|
+
if (!embedding) return [];
|
|
2561
|
+
|
|
2562
|
+
// IMPORTANT: We use a two-step query approach here because sqlite-vec virtual tables
|
|
2563
|
+
// hang indefinitely when combined with JOINs in the same query. Do NOT try to
|
|
2564
|
+
// "optimize" this by combining into a single query with JOINs - it will break.
|
|
2565
|
+
// See: https://github.com/tobi/qmd/pull/23
|
|
2566
|
+
|
|
2567
|
+
// Step 1: Get vector matches from sqlite-vec (no JOINs allowed)
|
|
2568
|
+
const vecResults = db.prepare(`
|
|
2569
|
+
SELECT hash_seq, distance
|
|
2570
|
+
FROM vectors_vec
|
|
2571
|
+
WHERE embedding MATCH ? AND k = ?
|
|
2572
|
+
`).all(new Float32Array(embedding), limit * 3) as { hash_seq: string; distance: number }[];
|
|
2573
|
+
|
|
2574
|
+
if (vecResults.length === 0) return [];
|
|
2575
|
+
|
|
2576
|
+
// Step 2: Get chunk info and document data
|
|
2577
|
+
const hashSeqs = vecResults.map(r => r.hash_seq);
|
|
2578
|
+
const distanceMap = new Map(vecResults.map(r => [r.hash_seq, r.distance]));
|
|
2579
|
+
|
|
2580
|
+
// Build query for document lookup (includes fragment metadata)
|
|
2581
|
+
const placeholders = hashSeqs.map(() => '?').join(',');
|
|
2582
|
+
let docSql = `
|
|
2583
|
+
SELECT
|
|
2584
|
+
cv.hash || '_' || cv.seq as hash_seq,
|
|
2585
|
+
cv.hash,
|
|
2586
|
+
cv.pos,
|
|
2587
|
+
cv.fragment_type,
|
|
2588
|
+
cv.fragment_label,
|
|
2589
|
+
'clawmem://' || d.collection || '/' || d.path as filepath,
|
|
2590
|
+
d.collection || '/' || d.path as display_path,
|
|
2591
|
+
d.title,
|
|
2592
|
+
d.modified_at,
|
|
2593
|
+
content.doc as body
|
|
2594
|
+
FROM content_vectors cv
|
|
2595
|
+
JOIN documents d ON d.hash = cv.hash AND d.active = 1
|
|
2596
|
+
JOIN content ON content.hash = d.hash
|
|
2597
|
+
WHERE cv.hash || '_' || cv.seq IN (${placeholders})
|
|
2598
|
+
`;
|
|
2599
|
+
const params: string[] = [...hashSeqs];
|
|
2600
|
+
|
|
2601
|
+
if (collections && collections.length > 0) {
|
|
2602
|
+
const colPlaceholders = collections.map(() => '?').join(',');
|
|
2603
|
+
docSql += ` AND d.collection IN (${colPlaceholders})`;
|
|
2604
|
+
params.push(...collections);
|
|
2605
|
+
} else if (collectionId) {
|
|
2606
|
+
docSql += ` AND d.collection = ?`;
|
|
2607
|
+
params.push(String(collectionId));
|
|
2608
|
+
}
|
|
2609
|
+
|
|
2610
|
+
const docRows = db.prepare(docSql).all(...params) as {
|
|
2611
|
+
hash_seq: string; hash: string; pos: number; filepath: string;
|
|
2612
|
+
display_path: string; title: string; body: string; modified_at: string;
|
|
2613
|
+
fragment_type: string | null; fragment_label: string | null;
|
|
2614
|
+
}[];
|
|
2615
|
+
|
|
2616
|
+
// Combine with distances and dedupe by filepath (keep best-scoring fragment per doc)
|
|
2617
|
+
const seen = new Map<string, { row: typeof docRows[0]; bestDist: number }>();
|
|
2618
|
+
for (const row of docRows) {
|
|
2619
|
+
const distance = distanceMap.get(row.hash_seq) ?? 1;
|
|
2620
|
+
const existing = seen.get(row.filepath);
|
|
2621
|
+
if (!existing || distance < existing.bestDist) {
|
|
2622
|
+
seen.set(row.filepath, { row, bestDist: distance });
|
|
2623
|
+
}
|
|
2624
|
+
}
|
|
2625
|
+
|
|
2626
|
+
return Array.from(seen.values())
|
|
2627
|
+
.sort((a, b) => a.bestDist - b.bestDist)
|
|
2628
|
+
.slice(0, limit)
|
|
2629
|
+
.map(({ row, bestDist }) => {
|
|
2630
|
+
const collectionName = row.filepath.split('//')[1]?.split('/')[0] || "";
|
|
2631
|
+
return {
|
|
2632
|
+
filepath: row.filepath,
|
|
2633
|
+
displayPath: row.display_path,
|
|
2634
|
+
title: row.title,
|
|
2635
|
+
hash: row.hash,
|
|
2636
|
+
docid: getDocid(row.hash),
|
|
2637
|
+
collectionName,
|
|
2638
|
+
modifiedAt: row.modified_at || "",
|
|
2639
|
+
bodyLength: row.body.length,
|
|
2640
|
+
body: row.body,
|
|
2641
|
+
context: getContextForFile(db, row.filepath),
|
|
2642
|
+
score: 1 - bestDist, // Cosine similarity = 1 - cosine distance
|
|
2643
|
+
source: "vec" as const,
|
|
2644
|
+
chunkPos: row.pos,
|
|
2645
|
+
fragmentType: row.fragment_type ?? undefined,
|
|
2646
|
+
fragmentLabel: row.fragment_label ?? undefined,
|
|
2647
|
+
};
|
|
2648
|
+
});
|
|
2649
|
+
}
|
|
2650
|
+
|
|
2651
|
+
// =============================================================================
|
|
2652
|
+
// Embeddings
|
|
2653
|
+
// =============================================================================
|
|
2654
|
+
|
|
2655
|
+
async function getEmbedding(text: string, model: string, isQuery: boolean): Promise<number[] | null> {
|
|
2656
|
+
const llm = getDefaultLlamaCpp();
|
|
2657
|
+
// Format text using the appropriate prompt template
|
|
2658
|
+
const formattedText = isQuery ? formatQueryForEmbedding(text) : formatDocForEmbedding(text);
|
|
2659
|
+
const result = await llm.embed(formattedText, { model, isQuery });
|
|
2660
|
+
return result?.embedding || null;
|
|
2661
|
+
}
|
|
2662
|
+
|
|
2663
|
+
/**
|
|
2664
|
+
* Get all unique content hashes that need embeddings (from active documents).
|
|
2665
|
+
* Returns hash, document body, and a sample path for display purposes.
|
|
2666
|
+
*/
|
|
2667
|
+
export function getHashesForEmbedding(db: Database): { hash: string; body: string; path: string }[] {
|
|
2668
|
+
return db.prepare(`
|
|
2669
|
+
SELECT d.hash, c.doc as body, MIN(d.path) as path
|
|
2670
|
+
FROM documents d
|
|
2671
|
+
JOIN content c ON d.hash = c.hash
|
|
2672
|
+
LEFT JOIN content_vectors v ON d.hash = v.hash AND v.seq = 0
|
|
2673
|
+
WHERE d.active = 1 AND v.hash IS NULL
|
|
2674
|
+
GROUP BY d.hash
|
|
2675
|
+
`).all() as { hash: string; body: string; path: string }[];
|
|
2676
|
+
}
|
|
2677
|
+
|
|
2678
|
+
/**
|
|
2679
|
+
* Get all unique content hashes that need fragment-level embeddings.
|
|
2680
|
+
* Returns hashes that have no content_vectors row with fragment_type set.
|
|
2681
|
+
*/
|
|
2682
|
+
export function getHashesNeedingFragments(db: Database): { hash: string; body: string; path: string; title: string; collection: string }[] {
|
|
2683
|
+
return db.prepare(`
|
|
2684
|
+
SELECT d.hash, c.doc as body, MIN(d.path) as path, MIN(d.title) as title, MIN(d.collection) as collection
|
|
2685
|
+
FROM documents d
|
|
2686
|
+
JOIN content c ON d.hash = c.hash
|
|
2687
|
+
LEFT JOIN content_vectors v ON d.hash = v.hash AND v.fragment_type IS NOT NULL
|
|
2688
|
+
WHERE d.active = 1 AND v.hash IS NULL
|
|
2689
|
+
GROUP BY d.hash
|
|
2690
|
+
`).all() as { hash: string; body: string; path: string; title: string; collection: string }[];
|
|
2691
|
+
}
|
|
2692
|
+
|
|
2693
|
+
/**
|
|
2694
|
+
* Clear all embeddings from the database (force re-index).
|
|
2695
|
+
* Deletes all rows from content_vectors and drops the vectors_vec table.
|
|
2696
|
+
*/
|
|
2697
|
+
export function clearAllEmbeddings(db: Database): void {
|
|
2698
|
+
db.exec(`DELETE FROM content_vectors`);
|
|
2699
|
+
db.exec(`DROP TABLE IF EXISTS vectors_vec`);
|
|
2700
|
+
vecTableDimsCache.delete(db);
|
|
2701
|
+
}
|
|
2702
|
+
|
|
2703
|
+
/**
|
|
2704
|
+
* Insert a single embedding into both content_vectors and vectors_vec tables.
|
|
2705
|
+
* The hash_seq key is formatted as "hash_seq" for the vectors_vec table.
|
|
2706
|
+
*/
|
|
2707
|
+
export function insertEmbedding(
|
|
2708
|
+
db: Database,
|
|
2709
|
+
hash: string,
|
|
2710
|
+
seq: number,
|
|
2711
|
+
pos: number,
|
|
2712
|
+
embedding: Float32Array,
|
|
2713
|
+
model: string,
|
|
2714
|
+
embeddedAt: string,
|
|
2715
|
+
fragmentType?: string,
|
|
2716
|
+
fragmentLabel?: string,
|
|
2717
|
+
canonicalId?: string
|
|
2718
|
+
): void {
|
|
2719
|
+
const hashSeq = `${hash}_${seq}`;
|
|
2720
|
+
// vec0 virtual tables don't support INSERT OR REPLACE — delete first if exists.
|
|
2721
|
+
// Try-catch: table may not exist yet during dimension migration (ensureVecTable drops+recreates).
|
|
2722
|
+
try { db.prepare(`DELETE FROM vectors_vec WHERE hash_seq = ?`).run(hashSeq); } catch {}
|
|
2723
|
+
db.prepare(`INSERT INTO vectors_vec (hash_seq, embedding) VALUES (?, ?)`).run(hashSeq, embedding);
|
|
2724
|
+
db.prepare(
|
|
2725
|
+
`INSERT OR REPLACE INTO content_vectors (hash, seq, pos, model, embedded_at, fragment_type, fragment_label, canonical_id) VALUES (?, ?, ?, ?, ?, ?, ?, ?)`
|
|
2726
|
+
).run(hash, seq, pos, model, embeddedAt, fragmentType ?? null, fragmentLabel ?? null, canonicalId ?? null);
|
|
2727
|
+
}
|
|
2728
|
+
|
|
2729
|
+
// =============================================================================
|
|
2730
|
+
// Query expansion
|
|
2731
|
+
// =============================================================================
|
|
2732
|
+
|
|
2733
|
+
export async function expandQuery(query: string, model: string = DEFAULT_QUERY_MODEL, db: Database, intent?: string): Promise<string[]> {
|
|
2734
|
+
// Check cache first (include intent in cache key)
|
|
2735
|
+
const cacheKey = getCacheKey("expandQuery", { query, model, ...(intent && { intent }) });
|
|
2736
|
+
const cached = getCachedResult(db, cacheKey);
|
|
2737
|
+
if (cached) {
|
|
2738
|
+
const lines = cached.split('\n').map(l => l.trim()).filter(l => l.length > 0);
|
|
2739
|
+
return [query, ...lines.slice(0, 2)];
|
|
2740
|
+
}
|
|
2741
|
+
|
|
2742
|
+
const llm = getDefaultLlamaCpp();
|
|
2743
|
+
// Note: LlamaCpp uses hardcoded model, model parameter is ignored
|
|
2744
|
+
// Pass intent to steer expansion when provided
|
|
2745
|
+
const results = await llm.expandQuery(query, { intent });
|
|
2746
|
+
const queryTexts = results.map(r => r.text);
|
|
2747
|
+
|
|
2748
|
+
// Cache the expanded queries (excluding original)
|
|
2749
|
+
const expandedOnly = queryTexts.filter(t => t !== query);
|
|
2750
|
+
if (expandedOnly.length > 0) {
|
|
2751
|
+
setCachedResult(db, cacheKey, expandedOnly.join('\n'));
|
|
2752
|
+
}
|
|
2753
|
+
|
|
2754
|
+
return Array.from(new Set([query, ...queryTexts]));
|
|
2755
|
+
}
|
|
2756
|
+
|
|
2757
|
+
// =============================================================================
|
|
2758
|
+
// Reranking
|
|
2759
|
+
// =============================================================================
|
|
2760
|
+
|
|
2761
|
+
export async function rerank(query: string, documents: { file: string; text: string }[], model: string = DEFAULT_RERANK_MODEL, db: Database, intent?: string): Promise<{ file: string; score: number }[]> {
|
|
2762
|
+
// Prepend intent to rerank query so the reranker scores with domain context
|
|
2763
|
+
const rerankQuery = intent ? `${intent}\n\n${query}` : query;
|
|
2764
|
+
|
|
2765
|
+
// Deduplicate identical chunk texts — same content from different files shares a single score
|
|
2766
|
+
const textToFiles = new Map<string, string[]>();
|
|
2767
|
+
const uniqueDocs: RerankDocument[] = [];
|
|
2768
|
+
for (const doc of documents) {
|
|
2769
|
+
const existing = textToFiles.get(doc.text);
|
|
2770
|
+
if (existing) {
|
|
2771
|
+
existing.push(doc.file);
|
|
2772
|
+
} else {
|
|
2773
|
+
textToFiles.set(doc.text, [doc.file]);
|
|
2774
|
+
uniqueDocs.push(doc);
|
|
2775
|
+
}
|
|
2776
|
+
}
|
|
2777
|
+
|
|
2778
|
+
const cachedResults: Map<string, number> = new Map();
|
|
2779
|
+
const uncachedDocs: RerankDocument[] = [];
|
|
2780
|
+
|
|
2781
|
+
// Check cache for each unique document
|
|
2782
|
+
for (const doc of uniqueDocs) {
|
|
2783
|
+
const cacheKey = getCacheKey("rerank", { query: rerankQuery, file: doc.file, model });
|
|
2784
|
+
const cached = getCachedResult(db, cacheKey);
|
|
2785
|
+
if (cached !== null) {
|
|
2786
|
+
const score = parseFloat(cached);
|
|
2787
|
+
// Apply score to all files sharing this text
|
|
2788
|
+
for (const file of textToFiles.get(doc.text)!) cachedResults.set(file, score);
|
|
2789
|
+
} else {
|
|
2790
|
+
uncachedDocs.push({ file: doc.file, text: doc.text });
|
|
2791
|
+
}
|
|
2792
|
+
}
|
|
2793
|
+
|
|
2794
|
+
// Rerank uncached documents (remote GPU preferred, local node-llama-cpp fallback)
|
|
2795
|
+
// Cap parallelism at 4 to prevent VRAM exhaustion
|
|
2796
|
+
if (uncachedDocs.length > 0) {
|
|
2797
|
+
const rerankUrl = Bun.env.CLAWMEM_RERANK_URL;
|
|
2798
|
+
let scored = false;
|
|
2799
|
+
|
|
2800
|
+
// Try remote GPU reranker first
|
|
2801
|
+
// Truncate to ~400 chars per doc to fit within server's 512-token context
|
|
2802
|
+
// (query + document must fit in one pair; ~2 chars/token for mixed content)
|
|
2803
|
+
if (rerankUrl) {
|
|
2804
|
+
try {
|
|
2805
|
+
// Process in batches of 4 to prevent VRAM exhaustion
|
|
2806
|
+
for (let i = 0; i < uncachedDocs.length; i += 4) {
|
|
2807
|
+
const batch = uncachedDocs.slice(i, i + 4);
|
|
2808
|
+
const resp = await fetch(`${rerankUrl}/v1/rerank`, {
|
|
2809
|
+
method: "POST",
|
|
2810
|
+
headers: { "Content-Type": "application/json" },
|
|
2811
|
+
body: JSON.stringify({
|
|
2812
|
+
query: rerankQuery,
|
|
2813
|
+
documents: batch.map(d => d.text.slice(0, 400)),
|
|
2814
|
+
}),
|
|
2815
|
+
});
|
|
2816
|
+
if (resp.ok) {
|
|
2817
|
+
const data = await resp.json() as { results: { index: number; relevance_score: number }[] };
|
|
2818
|
+
for (const r of data.results) {
|
|
2819
|
+
const doc = batch[r.index]!;
|
|
2820
|
+
const cacheKey = getCacheKey("rerank", { query: rerankQuery, file: doc.file, model });
|
|
2821
|
+
setCachedResult(db, cacheKey, r.relevance_score.toString());
|
|
2822
|
+
// Apply score to all files sharing this text
|
|
2823
|
+
for (const file of textToFiles.get(doc.text)!) cachedResults.set(file, r.relevance_score);
|
|
2824
|
+
}
|
|
2825
|
+
} else {
|
|
2826
|
+
break; // Remote failed mid-batch, fall through to local
|
|
2827
|
+
}
|
|
2828
|
+
}
|
|
2829
|
+
scored = cachedResults.size > 0;
|
|
2830
|
+
} catch {
|
|
2831
|
+
// Remote failed, fall through to local
|
|
2832
|
+
}
|
|
2833
|
+
}
|
|
2834
|
+
|
|
2835
|
+
// Fallback to local node-llama-cpp
|
|
2836
|
+
if (!scored) {
|
|
2837
|
+
const remaining = uncachedDocs.filter(d => !cachedResults.has(d.file));
|
|
2838
|
+
if (remaining.length > 0) {
|
|
2839
|
+
const llm = getDefaultLlamaCpp();
|
|
2840
|
+
const rerankResult = await llm.rerank(rerankQuery, remaining, { model });
|
|
2841
|
+
for (const result of rerankResult.results) {
|
|
2842
|
+
const doc = remaining.find(d => d.file === result.file);
|
|
2843
|
+
const cacheKey = getCacheKey("rerank", { query: rerankQuery, file: result.file, model });
|
|
2844
|
+
setCachedResult(db, cacheKey, result.score.toString());
|
|
2845
|
+
// Apply score to all files sharing this text
|
|
2846
|
+
if (doc) {
|
|
2847
|
+
for (const file of textToFiles.get(doc.text)!) cachedResults.set(file, result.score);
|
|
2848
|
+
} else {
|
|
2849
|
+
cachedResults.set(result.file, result.score);
|
|
2850
|
+
}
|
|
2851
|
+
}
|
|
2852
|
+
}
|
|
2853
|
+
}
|
|
2854
|
+
}
|
|
2855
|
+
|
|
2856
|
+
// Return all results sorted by score
|
|
2857
|
+
return documents
|
|
2858
|
+
.map(doc => ({ file: doc.file, score: cachedResults.get(doc.file) || 0 }))
|
|
2859
|
+
.sort((a, b) => b.score - a.score);
|
|
2860
|
+
}
|
|
2861
|
+
|
|
2862
|
+
// =============================================================================
|
|
2863
|
+
// Document retrieval
|
|
2864
|
+
// =============================================================================
|
|
2865
|
+
|
|
2866
|
+
type DbDocRow = {
|
|
2867
|
+
virtual_path: string;
|
|
2868
|
+
display_path: string;
|
|
2869
|
+
title: string;
|
|
2870
|
+
hash: string;
|
|
2871
|
+
collection: string;
|
|
2872
|
+
path: string;
|
|
2873
|
+
modified_at: string;
|
|
2874
|
+
body_length: number;
|
|
2875
|
+
body?: string;
|
|
2876
|
+
};
|
|
2877
|
+
|
|
2878
|
+
/**
|
|
2879
|
+
* Find a document by filename/path, docid (#hash), or with fuzzy matching.
|
|
2880
|
+
* Returns document metadata without body by default.
|
|
2881
|
+
*
|
|
2882
|
+
* Supports:
|
|
2883
|
+
* - Virtual paths: clawmem://collection/path/to/file.md
|
|
2884
|
+
* - Absolute paths: /path/to/file.md
|
|
2885
|
+
* - Relative paths: path/to/file.md
|
|
2886
|
+
* - Short docid: #abc123 (first 6 chars of hash)
|
|
2887
|
+
*/
|
|
2888
|
+
export function findDocument(db: Database, filename: string, options: { includeBody?: boolean } = {}): DocumentResult | DocumentNotFound {
|
|
2889
|
+
let filepath = filename;
|
|
2890
|
+
const colonMatch = filepath.match(/:(\d+)$/);
|
|
2891
|
+
if (colonMatch) {
|
|
2892
|
+
filepath = filepath.slice(0, -colonMatch[0].length);
|
|
2893
|
+
}
|
|
2894
|
+
|
|
2895
|
+
// Check if this is a docid lookup (#hash or just 6-char hex)
|
|
2896
|
+
if (filepath.startsWith('#') || /^[a-f0-9]{6}$/i.test(filepath)) {
|
|
2897
|
+
const docidMatch = findDocumentByDocid(db, filepath);
|
|
2898
|
+
if (docidMatch) {
|
|
2899
|
+
filepath = docidMatch.filepath;
|
|
2900
|
+
} else {
|
|
2901
|
+
return { error: "not_found", query: filename, similarFiles: [] };
|
|
2902
|
+
}
|
|
2903
|
+
}
|
|
2904
|
+
|
|
2905
|
+
if (filepath.startsWith('~/')) {
|
|
2906
|
+
filepath = homedir() + filepath.slice(1);
|
|
2907
|
+
}
|
|
2908
|
+
|
|
2909
|
+
const bodyCol = options.includeBody ? `, content.doc as body` : ``;
|
|
2910
|
+
|
|
2911
|
+
// Build computed columns
|
|
2912
|
+
// Note: absoluteFilepath is computed from YAML collections after query
|
|
2913
|
+
const selectCols = `
|
|
2914
|
+
'clawmem://' || d.collection || '/' || d.path as virtual_path,
|
|
2915
|
+
d.collection || '/' || d.path as display_path,
|
|
2916
|
+
d.title,
|
|
2917
|
+
d.hash,
|
|
2918
|
+
d.collection,
|
|
2919
|
+
d.modified_at,
|
|
2920
|
+
LENGTH(content.doc) as body_length
|
|
2921
|
+
${bodyCol}
|
|
2922
|
+
`;
|
|
2923
|
+
|
|
2924
|
+
// Try to match by virtual path first
|
|
2925
|
+
let doc = db.prepare(`
|
|
2926
|
+
SELECT ${selectCols}
|
|
2927
|
+
FROM documents d
|
|
2928
|
+
JOIN content ON content.hash = d.hash
|
|
2929
|
+
WHERE 'clawmem://' || d.collection || '/' || d.path = ? AND d.active = 1
|
|
2930
|
+
`).get(filepath) as DbDocRow | null;
|
|
2931
|
+
|
|
2932
|
+
// Try fuzzy match by virtual path
|
|
2933
|
+
if (!doc) {
|
|
2934
|
+
doc = db.prepare(`
|
|
2935
|
+
SELECT ${selectCols}
|
|
2936
|
+
FROM documents d
|
|
2937
|
+
JOIN content ON content.hash = d.hash
|
|
2938
|
+
WHERE 'clawmem://' || d.collection || '/' || d.path LIKE ? AND d.active = 1
|
|
2939
|
+
LIMIT 1
|
|
2940
|
+
`).get(`%${filepath}`) as DbDocRow | null;
|
|
2941
|
+
}
|
|
2942
|
+
|
|
2943
|
+
// Try to match by absolute path (requires looking up collection paths from YAML)
|
|
2944
|
+
if (!doc && !filepath.startsWith('clawmem://')) {
|
|
2945
|
+
const collections = collectionsListCollections();
|
|
2946
|
+
for (const coll of collections) {
|
|
2947
|
+
let relativePath: string | null = null;
|
|
2948
|
+
|
|
2949
|
+
// If filepath is absolute and starts with collection path, extract relative part
|
|
2950
|
+
if (filepath.startsWith(coll.path + '/')) {
|
|
2951
|
+
relativePath = filepath.slice(coll.path.length + 1);
|
|
2952
|
+
}
|
|
2953
|
+
// Otherwise treat filepath as relative to collection
|
|
2954
|
+
else if (!filepath.startsWith('/')) {
|
|
2955
|
+
relativePath = filepath;
|
|
2956
|
+
}
|
|
2957
|
+
|
|
2958
|
+
if (relativePath) {
|
|
2959
|
+
doc = db.prepare(`
|
|
2960
|
+
SELECT ${selectCols}
|
|
2961
|
+
FROM documents d
|
|
2962
|
+
JOIN content ON content.hash = d.hash
|
|
2963
|
+
WHERE d.collection = ? AND d.path = ? AND d.active = 1
|
|
2964
|
+
`).get(coll.name, relativePath) as DbDocRow | null;
|
|
2965
|
+
if (doc) break;
|
|
2966
|
+
}
|
|
2967
|
+
}
|
|
2968
|
+
}
|
|
2969
|
+
|
|
2970
|
+
if (!doc) {
|
|
2971
|
+
const similar = findSimilarFiles(db, filepath, 5, 5);
|
|
2972
|
+
return { error: "not_found", query: filename, similarFiles: similar };
|
|
2973
|
+
}
|
|
2974
|
+
|
|
2975
|
+
// Get context using virtual path
|
|
2976
|
+
const virtualPath = doc.virtual_path || `clawmem://${doc.collection}/${doc.display_path}`;
|
|
2977
|
+
const context = getContextForFile(db, virtualPath);
|
|
2978
|
+
|
|
2979
|
+
return {
|
|
2980
|
+
filepath: virtualPath,
|
|
2981
|
+
displayPath: doc.display_path,
|
|
2982
|
+
title: doc.title,
|
|
2983
|
+
context,
|
|
2984
|
+
hash: doc.hash,
|
|
2985
|
+
docid: getDocid(doc.hash),
|
|
2986
|
+
collectionName: doc.collection,
|
|
2987
|
+
modifiedAt: doc.modified_at,
|
|
2988
|
+
bodyLength: doc.body_length,
|
|
2989
|
+
...(options.includeBody && doc.body !== undefined && { body: doc.body }),
|
|
2990
|
+
};
|
|
2991
|
+
}
|
|
2992
|
+
|
|
2993
|
+
/**
|
|
2994
|
+
* Get the body content for a document
|
|
2995
|
+
* Optionally slice by line range
|
|
2996
|
+
*/
|
|
2997
|
+
export function getDocumentBody(db: Database, doc: DocumentResult | { filepath: string }, fromLine?: number, maxLines?: number): string | null {
|
|
2998
|
+
const filepath = doc.filepath;
|
|
2999
|
+
|
|
3000
|
+
// Try to resolve document by filepath (absolute or virtual)
|
|
3001
|
+
let row: { body: string } | null = null;
|
|
3002
|
+
|
|
3003
|
+
// Try virtual path first
|
|
3004
|
+
if (filepath.startsWith('clawmem://')) {
|
|
3005
|
+
row = db.prepare(`
|
|
3006
|
+
SELECT content.doc as body
|
|
3007
|
+
FROM documents d
|
|
3008
|
+
JOIN content ON content.hash = d.hash
|
|
3009
|
+
WHERE 'clawmem://' || d.collection || '/' || d.path = ? AND d.active = 1
|
|
3010
|
+
`).get(filepath) as { body: string } | null;
|
|
3011
|
+
}
|
|
3012
|
+
|
|
3013
|
+
// Try absolute path by looking up in YAML collections
|
|
3014
|
+
if (!row) {
|
|
3015
|
+
const collections = collectionsListCollections();
|
|
3016
|
+
for (const coll of collections) {
|
|
3017
|
+
if (filepath.startsWith(coll.path + '/')) {
|
|
3018
|
+
const relativePath = filepath.slice(coll.path.length + 1);
|
|
3019
|
+
row = db.prepare(`
|
|
3020
|
+
SELECT content.doc as body
|
|
3021
|
+
FROM documents d
|
|
3022
|
+
JOIN content ON content.hash = d.hash
|
|
3023
|
+
WHERE d.collection = ? AND d.path = ? AND d.active = 1
|
|
3024
|
+
`).get(coll.name, relativePath) as { body: string } | null;
|
|
3025
|
+
if (row) break;
|
|
3026
|
+
}
|
|
3027
|
+
}
|
|
3028
|
+
}
|
|
3029
|
+
|
|
3030
|
+
// Try collection/path format (e.g., "_clawmem/decisions/foo.md")
|
|
3031
|
+
if (!row) {
|
|
3032
|
+
const slashIdx = filepath.indexOf('/');
|
|
3033
|
+
if (slashIdx > 0) {
|
|
3034
|
+
const collection = filepath.slice(0, slashIdx);
|
|
3035
|
+
const path = filepath.slice(slashIdx + 1);
|
|
3036
|
+
row = db.prepare(`
|
|
3037
|
+
SELECT content.doc as body
|
|
3038
|
+
FROM documents d
|
|
3039
|
+
JOIN content ON content.hash = d.hash
|
|
3040
|
+
WHERE d.collection = ? AND d.path = ? AND d.active = 1
|
|
3041
|
+
`).get(collection, path) as { body: string } | null;
|
|
3042
|
+
}
|
|
3043
|
+
}
|
|
3044
|
+
|
|
3045
|
+
if (!row) return null;
|
|
3046
|
+
|
|
3047
|
+
let body = row.body;
|
|
3048
|
+
if (fromLine !== undefined || maxLines !== undefined) {
|
|
3049
|
+
const lines = body.split('\n');
|
|
3050
|
+
const start = (fromLine || 1) - 1;
|
|
3051
|
+
const end = maxLines !== undefined ? start + maxLines : lines.length;
|
|
3052
|
+
body = lines.slice(start, end).join('\n');
|
|
3053
|
+
}
|
|
3054
|
+
|
|
3055
|
+
return body;
|
|
3056
|
+
}
|
|
3057
|
+
|
|
3058
|
+
/**
|
|
3059
|
+
* Find multiple documents by glob pattern or comma-separated list
|
|
3060
|
+
* Returns documents without body by default (use getDocumentBody to load)
|
|
3061
|
+
*/
|
|
3062
|
+
export function findDocuments(
|
|
3063
|
+
db: Database,
|
|
3064
|
+
pattern: string,
|
|
3065
|
+
options: { includeBody?: boolean; maxBytes?: number } = {}
|
|
3066
|
+
): { docs: MultiGetResult[]; errors: string[] } {
|
|
3067
|
+
const isCommaSeparated = pattern.includes(',') && !pattern.includes('*') && !pattern.includes('?');
|
|
3068
|
+
const errors: string[] = [];
|
|
3069
|
+
const maxBytes = options.maxBytes ?? DEFAULT_MULTI_GET_MAX_BYTES;
|
|
3070
|
+
|
|
3071
|
+
const bodyCol = options.includeBody ? `, content.doc as body` : ``;
|
|
3072
|
+
const selectCols = `
|
|
3073
|
+
'clawmem://' || d.collection || '/' || d.path as virtual_path,
|
|
3074
|
+
d.collection || '/' || d.path as display_path,
|
|
3075
|
+
d.title,
|
|
3076
|
+
d.hash,
|
|
3077
|
+
d.collection,
|
|
3078
|
+
d.modified_at,
|
|
3079
|
+
LENGTH(content.doc) as body_length
|
|
3080
|
+
${bodyCol}
|
|
3081
|
+
`;
|
|
3082
|
+
|
|
3083
|
+
let fileRows: DbDocRow[];
|
|
3084
|
+
|
|
3085
|
+
if (isCommaSeparated) {
|
|
3086
|
+
const names = pattern.split(',').map(s => s.trim()).filter(Boolean);
|
|
3087
|
+
fileRows = [];
|
|
3088
|
+
for (const name of names) {
|
|
3089
|
+
let doc = db.prepare(`
|
|
3090
|
+
SELECT ${selectCols}
|
|
3091
|
+
FROM documents d
|
|
3092
|
+
JOIN content ON content.hash = d.hash
|
|
3093
|
+
WHERE 'clawmem://' || d.collection || '/' || d.path = ? AND d.active = 1
|
|
3094
|
+
`).get(name) as DbDocRow | null;
|
|
3095
|
+
if (!doc) {
|
|
3096
|
+
doc = db.prepare(`
|
|
3097
|
+
SELECT ${selectCols}
|
|
3098
|
+
FROM documents d
|
|
3099
|
+
JOIN content ON content.hash = d.hash
|
|
3100
|
+
WHERE 'clawmem://' || d.collection || '/' || d.path LIKE ? AND d.active = 1
|
|
3101
|
+
LIMIT 1
|
|
3102
|
+
`).get(`%${name}`) as DbDocRow | null;
|
|
3103
|
+
}
|
|
3104
|
+
if (doc) {
|
|
3105
|
+
fileRows.push(doc);
|
|
3106
|
+
} else {
|
|
3107
|
+
const similar = findSimilarFiles(db, name, 5, 3);
|
|
3108
|
+
let msg = `File not found: ${name}`;
|
|
3109
|
+
if (similar.length > 0) {
|
|
3110
|
+
msg += ` (did you mean: ${similar.join(', ')}?)`;
|
|
3111
|
+
}
|
|
3112
|
+
errors.push(msg);
|
|
3113
|
+
}
|
|
3114
|
+
}
|
|
3115
|
+
} else {
|
|
3116
|
+
// Glob pattern match
|
|
3117
|
+
const matched = matchFilesByGlob(db, pattern);
|
|
3118
|
+
if (matched.length === 0) {
|
|
3119
|
+
errors.push(`No files matched pattern: ${pattern}`);
|
|
3120
|
+
return { docs: [], errors };
|
|
3121
|
+
}
|
|
3122
|
+
const virtualPaths = matched.map(m => m.filepath);
|
|
3123
|
+
const placeholders = virtualPaths.map(() => '?').join(',');
|
|
3124
|
+
fileRows = db.prepare(`
|
|
3125
|
+
SELECT ${selectCols}
|
|
3126
|
+
FROM documents d
|
|
3127
|
+
JOIN content ON content.hash = d.hash
|
|
3128
|
+
WHERE 'clawmem://' || d.collection || '/' || d.path IN (${placeholders}) AND d.active = 1
|
|
3129
|
+
`).all(...virtualPaths) as DbDocRow[];
|
|
3130
|
+
}
|
|
3131
|
+
|
|
3132
|
+
const results: MultiGetResult[] = [];
|
|
3133
|
+
|
|
3134
|
+
for (const row of fileRows) {
|
|
3135
|
+
// Get context using virtual path
|
|
3136
|
+
const virtualPath = row.virtual_path || `clawmem://${row.collection}/${row.display_path}`;
|
|
3137
|
+
const context = getContextForFile(db, virtualPath);
|
|
3138
|
+
|
|
3139
|
+
if (row.body_length > maxBytes) {
|
|
3140
|
+
results.push({
|
|
3141
|
+
doc: { filepath: virtualPath, displayPath: row.display_path },
|
|
3142
|
+
skipped: true,
|
|
3143
|
+
skipReason: `File too large (${Math.round(row.body_length / 1024)}KB > ${Math.round(maxBytes / 1024)}KB)`,
|
|
3144
|
+
});
|
|
3145
|
+
continue;
|
|
3146
|
+
}
|
|
3147
|
+
|
|
3148
|
+
results.push({
|
|
3149
|
+
doc: {
|
|
3150
|
+
filepath: virtualPath,
|
|
3151
|
+
displayPath: row.display_path,
|
|
3152
|
+
title: row.title || row.display_path.split('/').pop() || row.display_path,
|
|
3153
|
+
context,
|
|
3154
|
+
hash: row.hash,
|
|
3155
|
+
docid: getDocid(row.hash),
|
|
3156
|
+
collectionName: row.collection,
|
|
3157
|
+
modifiedAt: row.modified_at,
|
|
3158
|
+
bodyLength: row.body_length,
|
|
3159
|
+
...(options.includeBody && row.body !== undefined && { body: row.body }),
|
|
3160
|
+
},
|
|
3161
|
+
skipped: false,
|
|
3162
|
+
});
|
|
3163
|
+
}
|
|
3164
|
+
|
|
3165
|
+
return { docs: results, errors };
|
|
3166
|
+
}
|
|
3167
|
+
|
|
3168
|
+
// =============================================================================
|
|
3169
|
+
// Status
|
|
3170
|
+
// =============================================================================
|
|
3171
|
+
|
|
3172
|
+
export function getStatus(db: Database): IndexStatus {
|
|
3173
|
+
// Load collections from YAML
|
|
3174
|
+
const yamlCollections = collectionsListCollections();
|
|
3175
|
+
|
|
3176
|
+
// Get document counts and last update times for each collection
|
|
3177
|
+
const collections = yamlCollections.map(col => {
|
|
3178
|
+
const stats = db.prepare(`
|
|
3179
|
+
SELECT
|
|
3180
|
+
COUNT(*) as active_count,
|
|
3181
|
+
MAX(modified_at) as last_doc_update
|
|
3182
|
+
FROM documents
|
|
3183
|
+
WHERE collection = ? AND active = 1
|
|
3184
|
+
`).get(col.name) as { active_count: number; last_doc_update: string | null };
|
|
3185
|
+
|
|
3186
|
+
return {
|
|
3187
|
+
name: col.name,
|
|
3188
|
+
path: col.path,
|
|
3189
|
+
pattern: col.pattern,
|
|
3190
|
+
documents: stats.active_count,
|
|
3191
|
+
lastUpdated: stats.last_doc_update || new Date().toISOString(),
|
|
3192
|
+
};
|
|
3193
|
+
});
|
|
3194
|
+
|
|
3195
|
+
// Sort by last update time (most recent first)
|
|
3196
|
+
collections.sort((a, b) => {
|
|
3197
|
+
if (!a.lastUpdated) return 1;
|
|
3198
|
+
if (!b.lastUpdated) return -1;
|
|
3199
|
+
return new Date(b.lastUpdated).getTime() - new Date(a.lastUpdated).getTime();
|
|
3200
|
+
});
|
|
3201
|
+
|
|
3202
|
+
const totalDocs = (db.prepare(`SELECT COUNT(*) as c FROM documents WHERE active = 1`).get() as { c: number }).c;
|
|
3203
|
+
const needsEmbedding = getHashesNeedingEmbedding(db);
|
|
3204
|
+
const hasVectors = !!db.prepare(`SELECT name FROM sqlite_master WHERE type='table' AND name='vectors_vec'`).get();
|
|
3205
|
+
|
|
3206
|
+
return {
|
|
3207
|
+
totalDocuments: totalDocs,
|
|
3208
|
+
needsEmbedding,
|
|
3209
|
+
hasVectorIndex: hasVectors,
|
|
3210
|
+
collections,
|
|
3211
|
+
};
|
|
3212
|
+
}
|
|
3213
|
+
|
|
3214
|
+
// =============================================================================
|
|
3215
|
+
// Snippet extraction
|
|
3216
|
+
// =============================================================================
|
|
3217
|
+
|
|
3218
|
+
export type SnippetResult = {
|
|
3219
|
+
line: number; // 1-indexed line number of best match
|
|
3220
|
+
snippet: string; // The snippet text with diff-style header
|
|
3221
|
+
linesBefore: number; // Lines in document before snippet
|
|
3222
|
+
linesAfter: number; // Lines in document after snippet
|
|
3223
|
+
snippetLines: number; // Number of lines in snippet
|
|
3224
|
+
};
|
|
3225
|
+
|
|
3226
|
+
// Stop words filtered from intent strings before tokenization
|
|
3227
|
+
const INTENT_STOP_WORDS = new Set([
|
|
3228
|
+
"a", "an", "the", "and", "or", "but", "in", "on", "at", "to", "for",
|
|
3229
|
+
"of", "with", "by", "from", "is", "are", "was", "were", "be", "been",
|
|
3230
|
+
"being", "have", "has", "had", "do", "does", "did", "will", "would",
|
|
3231
|
+
"could", "should", "may", "might", "shall", "can", "about", "how",
|
|
3232
|
+
"what", "when", "where", "which", "who", "whom", "why", "this", "that",
|
|
3233
|
+
"these", "those", "it", "its", "not", "no", "so", "if", "then", "than",
|
|
3234
|
+
]);
|
|
3235
|
+
|
|
3236
|
+
/** Weight for intent terms relative to query terms (1.0) in snippet scoring */
|
|
3237
|
+
const INTENT_SNIPPET_WEIGHT = 0.3;
|
|
3238
|
+
|
|
3239
|
+
/** Weight for intent terms relative to query terms (1.0) in chunk selection */
|
|
3240
|
+
export const INTENT_CHUNK_WEIGHT = 0.5;
|
|
3241
|
+
|
|
3242
|
+
/**
|
|
3243
|
+
* Extract meaningful terms from an intent string, filtering stop words and punctuation.
|
|
3244
|
+
*/
|
|
3245
|
+
export function extractIntentTerms(intent: string): string[] {
|
|
3246
|
+
return intent.toLowerCase().split(/\s+/)
|
|
3247
|
+
.filter(w => w.length > 1 && !INTENT_STOP_WORDS.has(w))
|
|
3248
|
+
.map(w => w.replace(/[^a-z0-9-]/g, ""))
|
|
3249
|
+
.filter(w => w.length > 1);
|
|
3250
|
+
}
|
|
3251
|
+
|
|
3252
|
+
export function extractSnippet(body: string, query: string, maxLen = 500, chunkPos?: number, intent?: string): SnippetResult {
|
|
3253
|
+
const totalLines = body.split('\n').length;
|
|
3254
|
+
let searchBody = body;
|
|
3255
|
+
let lineOffset = 0;
|
|
3256
|
+
|
|
3257
|
+
if (chunkPos && chunkPos > 0) {
|
|
3258
|
+
const contextStart = Math.max(0, chunkPos - 100);
|
|
3259
|
+
const contextEnd = Math.min(body.length, chunkPos + maxLen + 100);
|
|
3260
|
+
searchBody = body.slice(contextStart, contextEnd);
|
|
3261
|
+
if (contextStart > 0) {
|
|
3262
|
+
lineOffset = body.slice(0, contextStart).split('\n').length - 1;
|
|
3263
|
+
}
|
|
3264
|
+
}
|
|
3265
|
+
|
|
3266
|
+
const lines = searchBody.split('\n');
|
|
3267
|
+
const queryTerms = query.toLowerCase().split(/\s+/).filter(t => t.length > 0);
|
|
3268
|
+
const intentTerms = intent ? extractIntentTerms(intent) : [];
|
|
3269
|
+
let bestLine = 0, bestScore = -1;
|
|
3270
|
+
|
|
3271
|
+
for (let i = 0; i < lines.length; i++) {
|
|
3272
|
+
const lineLower = (lines[i] ?? "").toLowerCase();
|
|
3273
|
+
let score = 0;
|
|
3274
|
+
for (const term of queryTerms) {
|
|
3275
|
+
if (lineLower.includes(term)) score++;
|
|
3276
|
+
}
|
|
3277
|
+
// Intent terms nudge snippet selection toward intent-relevant lines
|
|
3278
|
+
for (const term of intentTerms) {
|
|
3279
|
+
if (lineLower.includes(term)) score += INTENT_SNIPPET_WEIGHT;
|
|
3280
|
+
}
|
|
3281
|
+
if (score > bestScore) {
|
|
3282
|
+
bestScore = score;
|
|
3283
|
+
bestLine = i;
|
|
3284
|
+
}
|
|
3285
|
+
}
|
|
3286
|
+
|
|
3287
|
+
const start = Math.max(0, bestLine - 1);
|
|
3288
|
+
const end = Math.min(lines.length, bestLine + 3);
|
|
3289
|
+
const snippetLines = lines.slice(start, end);
|
|
3290
|
+
let snippetText = snippetLines.join('\n');
|
|
3291
|
+
|
|
3292
|
+
// If we focused on a chunk window and it produced an empty/whitespace-only snippet,
|
|
3293
|
+
// fall back to a full-document snippet so we always show something useful.
|
|
3294
|
+
if (chunkPos && chunkPos > 0 && snippetText.trim().length === 0) {
|
|
3295
|
+
return extractSnippet(body, query, maxLen, undefined, intent);
|
|
3296
|
+
}
|
|
3297
|
+
|
|
3298
|
+
if (snippetText.length > maxLen) snippetText = snippetText.substring(0, maxLen - 3) + "...";
|
|
3299
|
+
|
|
3300
|
+
const absoluteStart = lineOffset + start + 1; // 1-indexed
|
|
3301
|
+
const snippetLineCount = snippetLines.length;
|
|
3302
|
+
const linesBefore = absoluteStart - 1;
|
|
3303
|
+
const linesAfter = totalLines - (absoluteStart + snippetLineCount - 1);
|
|
3304
|
+
|
|
3305
|
+
// Format with diff-style header: @@ -start,count @@ (linesBefore before, linesAfter after)
|
|
3306
|
+
const header = `@@ -${absoluteStart},${snippetLineCount} @@ (${linesBefore} before, ${linesAfter} after)`;
|
|
3307
|
+
const snippet = `${header}\n${snippetText}`;
|
|
3308
|
+
|
|
3309
|
+
return {
|
|
3310
|
+
line: lineOffset + bestLine + 1,
|
|
3311
|
+
snippet,
|
|
3312
|
+
linesBefore,
|
|
3313
|
+
linesAfter,
|
|
3314
|
+
snippetLines: snippetLineCount,
|
|
3315
|
+
};
|
|
3316
|
+
}
|
|
3317
|
+
|
|
3318
|
+
// =============================================================================
|
|
3319
|
+
// SAME: Session Tracking
|
|
3320
|
+
// =============================================================================
|
|
3321
|
+
|
|
3322
|
+
function insertSessionFn(db: Database, sessionId: string, startedAt: string, machine?: string): void {
|
|
3323
|
+
db.prepare(`
|
|
3324
|
+
INSERT OR IGNORE INTO session_log (session_id, started_at, machine)
|
|
3325
|
+
VALUES (?, ?, ?)
|
|
3326
|
+
`).run(sessionId, startedAt, machine ?? null);
|
|
3327
|
+
}
|
|
3328
|
+
|
|
3329
|
+
function updateSessionFn(db: Database, sessionId: string, updates: { endedAt?: string; handoffPath?: string; filesChanged?: string[]; summary?: string }): void {
|
|
3330
|
+
const sets: string[] = [];
|
|
3331
|
+
const vals: (string | null)[] = [];
|
|
3332
|
+
if (updates.endedAt !== undefined) { sets.push("ended_at = ?"); vals.push(updates.endedAt); }
|
|
3333
|
+
if (updates.handoffPath !== undefined) { sets.push("handoff_path = ?"); vals.push(updates.handoffPath); }
|
|
3334
|
+
if (updates.filesChanged !== undefined) { sets.push("files_changed = ?"); vals.push(JSON.stringify(updates.filesChanged)); }
|
|
3335
|
+
if (updates.summary !== undefined) { sets.push("summary = ?"); vals.push(updates.summary); }
|
|
3336
|
+
if (sets.length === 0) return;
|
|
3337
|
+
vals.push(sessionId);
|
|
3338
|
+
db.prepare(`UPDATE session_log SET ${sets.join(", ")} WHERE session_id = ?`).run(...vals);
|
|
3339
|
+
}
|
|
3340
|
+
|
|
3341
|
+
function getSessionFn(db: Database, sessionId: string): SessionRecord | null {
|
|
3342
|
+
const row = db.prepare(`SELECT * FROM session_log WHERE session_id = ?`).get(sessionId) as any;
|
|
3343
|
+
if (!row) return null;
|
|
3344
|
+
return {
|
|
3345
|
+
sessionId: row.session_id,
|
|
3346
|
+
startedAt: row.started_at,
|
|
3347
|
+
endedAt: row.ended_at,
|
|
3348
|
+
handoffPath: row.handoff_path,
|
|
3349
|
+
machine: row.machine,
|
|
3350
|
+
filesChanged: row.files_changed ? JSON.parse(row.files_changed) : [],
|
|
3351
|
+
summary: row.summary,
|
|
3352
|
+
};
|
|
3353
|
+
}
|
|
3354
|
+
|
|
3355
|
+
function getRecentSessionsFn(db: Database, limit: number): SessionRecord[] {
|
|
3356
|
+
const rows = db.prepare(`SELECT * FROM session_log ORDER BY started_at DESC LIMIT ?`).all(limit) as any[];
|
|
3357
|
+
return rows.map(row => ({
|
|
3358
|
+
sessionId: row.session_id,
|
|
3359
|
+
startedAt: row.started_at,
|
|
3360
|
+
endedAt: row.ended_at,
|
|
3361
|
+
handoffPath: row.handoff_path,
|
|
3362
|
+
machine: row.machine,
|
|
3363
|
+
filesChanged: row.files_changed ? JSON.parse(row.files_changed) : [],
|
|
3364
|
+
summary: row.summary,
|
|
3365
|
+
}));
|
|
3366
|
+
}
|
|
3367
|
+
|
|
3368
|
+
// =============================================================================
|
|
3369
|
+
// SAME: Context Usage Tracking
|
|
3370
|
+
// =============================================================================
|
|
3371
|
+
|
|
3372
|
+
function insertUsageFn(db: Database, usage: UsageRecord): void {
|
|
3373
|
+
db.prepare(`
|
|
3374
|
+
INSERT INTO context_usage (session_id, timestamp, hook_name, injected_paths, estimated_tokens, was_referenced)
|
|
3375
|
+
VALUES (?, ?, ?, ?, ?, ?)
|
|
3376
|
+
`).run(usage.sessionId, usage.timestamp, usage.hookName, JSON.stringify(usage.injectedPaths), usage.estimatedTokens, usage.wasReferenced);
|
|
3377
|
+
}
|
|
3378
|
+
|
|
3379
|
+
function getUsageForSessionFn(db: Database, sessionId: string): UsageRow[] {
|
|
3380
|
+
return db.prepare(`
|
|
3381
|
+
SELECT id, session_id AS sessionId, timestamp, hook_name AS hookName,
|
|
3382
|
+
injected_paths AS injectedPaths, estimated_tokens AS estimatedTokens,
|
|
3383
|
+
was_referenced AS wasReferenced
|
|
3384
|
+
FROM context_usage WHERE session_id = ? ORDER BY timestamp
|
|
3385
|
+
`).all(sessionId) as UsageRow[];
|
|
3386
|
+
}
|
|
3387
|
+
|
|
3388
|
+
function markUsageReferencedFn(db: Database, id: number): void {
|
|
3389
|
+
db.prepare(`UPDATE context_usage SET was_referenced = 1 WHERE id = ?`).run(id);
|
|
3390
|
+
}
|
|
3391
|
+
|
|
3392
|
+
// =============================================================================
|
|
3393
|
+
// SAME: Document Metadata Operations
|
|
3394
|
+
// =============================================================================
|
|
3395
|
+
|
|
3396
|
+
function updateDocumentMetaFn(db: Database, docId: number, meta: { domain?: string; workstream?: string; tags?: string; content_type?: string; review_by?: string; confidence?: number; quality_score?: number }): void {
|
|
3397
|
+
const sets: string[] = [];
|
|
3398
|
+
const vals: (string | number | null)[] = [];
|
|
3399
|
+
if (meta.domain !== undefined) { sets.push("domain = ?"); vals.push(meta.domain); }
|
|
3400
|
+
if (meta.workstream !== undefined) { sets.push("workstream = ?"); vals.push(meta.workstream); }
|
|
3401
|
+
if (meta.tags !== undefined) { sets.push("tags = ?"); vals.push(meta.tags); }
|
|
3402
|
+
if (meta.content_type !== undefined) { sets.push("content_type = ?"); vals.push(meta.content_type); }
|
|
3403
|
+
if (meta.review_by !== undefined) { sets.push("review_by = ?"); vals.push(meta.review_by); }
|
|
3404
|
+
if (meta.confidence !== undefined) { sets.push("confidence = ?"); vals.push(meta.confidence); }
|
|
3405
|
+
if (meta.quality_score !== undefined) { sets.push("quality_score = ?"); vals.push(meta.quality_score); }
|
|
3406
|
+
if (sets.length === 0) return;
|
|
3407
|
+
vals.push(docId);
|
|
3408
|
+
db.prepare(`UPDATE documents SET ${sets.join(", ")} WHERE id = ?`).run(...vals);
|
|
3409
|
+
}
|
|
3410
|
+
|
|
3411
|
+
function pinDocumentFn(db: Database, collection: string, path: string, pinned: boolean): void {
|
|
3412
|
+
db.prepare(
|
|
3413
|
+
"UPDATE documents SET pinned = ? WHERE collection = ? AND path = ? AND active = 1"
|
|
3414
|
+
).run(pinned ? 1 : 0, collection, path);
|
|
3415
|
+
}
|
|
3416
|
+
|
|
3417
|
+
function snoozeDocumentFn(db: Database, collection: string, path: string, until: string | null): void {
|
|
3418
|
+
db.prepare(
|
|
3419
|
+
"UPDATE documents SET snoozed_until = ? WHERE collection = ? AND path = ? AND active = 1"
|
|
3420
|
+
).run(until, collection, path);
|
|
3421
|
+
}
|
|
3422
|
+
|
|
3423
|
+
function incrementAccessCountFn(db: Database, paths: string[]): void {
|
|
3424
|
+
if (paths.length === 0) return;
|
|
3425
|
+
const now = new Date().toISOString();
|
|
3426
|
+
const placeholders = paths.map(() => "?").join(",");
|
|
3427
|
+
db.prepare(`
|
|
3428
|
+
UPDATE documents SET access_count = access_count + 1, last_accessed_at = ?
|
|
3429
|
+
WHERE active = 1 AND (collection || '/' || path) IN (${placeholders})
|
|
3430
|
+
`).run(now, ...paths);
|
|
3431
|
+
}
|
|
3432
|
+
|
|
3433
|
+
function getDocumentsByTypeFn(db: Database, contentType: string, limit: number = 10): DocumentRow[] {
|
|
3434
|
+
return db.prepare(`
|
|
3435
|
+
SELECT d.id, d.collection, d.path, d.title, d.hash, d.modified_at as modifiedAt,
|
|
3436
|
+
d.domain, d.workstream, d.tags, d.content_type as contentType,
|
|
3437
|
+
d.review_by as reviewBy, d.confidence, d.access_count as accessCount,
|
|
3438
|
+
LENGTH(c.doc) as bodyLength
|
|
3439
|
+
FROM documents d
|
|
3440
|
+
JOIN content c ON c.hash = d.hash
|
|
3441
|
+
WHERE d.active = 1 AND d.content_type = ?
|
|
3442
|
+
ORDER BY d.modified_at DESC
|
|
3443
|
+
LIMIT ?
|
|
3444
|
+
`).all(contentType, limit) as DocumentRow[];
|
|
3445
|
+
}
|
|
3446
|
+
|
|
3447
|
+
function updateObservationFieldsFn(
|
|
3448
|
+
db: Database,
|
|
3449
|
+
docPath: string,
|
|
3450
|
+
collectionName: string,
|
|
3451
|
+
fields: { observation_type?: string; facts?: string; narrative?: string; concepts?: string; files_read?: string; files_modified?: string; skill_name?: string; quality_score?: string; failure_reason?: string }
|
|
3452
|
+
): void {
|
|
3453
|
+
const sets: string[] = [];
|
|
3454
|
+
const vals: (string | null)[] = [];
|
|
3455
|
+
if (fields.observation_type !== undefined) { sets.push("observation_type = ?"); vals.push(fields.observation_type); }
|
|
3456
|
+
if (fields.facts !== undefined) { sets.push("facts = ?"); vals.push(fields.facts); }
|
|
3457
|
+
if (fields.narrative !== undefined) { sets.push("narrative = ?"); vals.push(fields.narrative); }
|
|
3458
|
+
if (fields.concepts !== undefined) { sets.push("concepts = ?"); vals.push(fields.concepts); }
|
|
3459
|
+
if (fields.files_read !== undefined) { sets.push("files_read = ?"); vals.push(fields.files_read); }
|
|
3460
|
+
if (fields.files_modified !== undefined) { sets.push("files_modified = ?"); vals.push(fields.files_modified); }
|
|
3461
|
+
if (fields.skill_name !== undefined) { sets.push("skill_name = ?"); vals.push(fields.skill_name); }
|
|
3462
|
+
if (fields.quality_score !== undefined) { sets.push("obs_quality_score = ?"); vals.push(fields.quality_score); }
|
|
3463
|
+
if (fields.failure_reason !== undefined) { sets.push("failure_reason = ?"); vals.push(fields.failure_reason); }
|
|
3464
|
+
if (sets.length === 0) return;
|
|
3465
|
+
vals.push(collectionName, docPath);
|
|
3466
|
+
db.prepare(`UPDATE documents SET ${sets.join(", ")} WHERE collection = ? AND path = ? AND active = 1`).run(...vals);
|
|
3467
|
+
}
|
|
3468
|
+
|
|
3469
|
+
function getStaleDocumentsFn(db: Database, beforeDate: string): DocumentRow[] {
|
|
3470
|
+
return db.prepare(`
|
|
3471
|
+
SELECT d.id, d.collection, d.path, d.title, d.hash, d.modified_at as modifiedAt,
|
|
3472
|
+
d.domain, d.workstream, d.tags, d.content_type as contentType,
|
|
3473
|
+
d.review_by as reviewBy, d.confidence, d.access_count as accessCount,
|
|
3474
|
+
LENGTH(c.doc) as bodyLength
|
|
3475
|
+
FROM documents d
|
|
3476
|
+
JOIN content c ON c.hash = d.hash
|
|
3477
|
+
WHERE d.active = 1 AND d.review_by IS NOT NULL AND d.review_by != '' AND d.review_by <= ?
|
|
3478
|
+
ORDER BY d.review_by ASC
|
|
3479
|
+
`).all(beforeDate) as DocumentRow[];
|
|
3480
|
+
}
|
|
3481
|
+
|
|
3482
|
+
// =============================================================================
|
|
3483
|
+
// Beads Integration
|
|
3484
|
+
// =============================================================================
|
|
3485
|
+
|
|
3486
|
+
/**
|
|
3487
|
+
* Sync Beads issues from .beads/beads.jsonl into ClawMem.
|
|
3488
|
+
* Returns count of synced and newly created issues.
|
|
3489
|
+
*/
|
|
3490
|
+
export async function syncBeadsIssues(
|
|
3491
|
+
db: Database,
|
|
3492
|
+
projectDir: string
|
|
3493
|
+
): Promise<{ synced: number; created: number; newDocIds: number[] }> {
|
|
3494
|
+
const issues = queryBeadsList(projectDir);
|
|
3495
|
+
if (issues.length === 0) {
|
|
3496
|
+
console.warn(`[beads] No issues returned from bd list in ${projectDir}`);
|
|
3497
|
+
return { synced: 0, created: 0, newDocIds: [] };
|
|
3498
|
+
}
|
|
3499
|
+
|
|
3500
|
+
let synced = 0;
|
|
3501
|
+
let created = 0;
|
|
3502
|
+
const newDocIds: number[] = [];
|
|
3503
|
+
|
|
3504
|
+
for (const issue of issues) {
|
|
3505
|
+
const docPath = `_clawmem/beads/${issue.id}.md`;
|
|
3506
|
+
const docBody = formatBeadsIssueAsMarkdown(issue);
|
|
3507
|
+
const hash = await hashContent(docBody);
|
|
3508
|
+
|
|
3509
|
+
const existingDoc = findActiveDocument(db, 'beads', docPath);
|
|
3510
|
+
|
|
3511
|
+
if (existingDoc) {
|
|
3512
|
+
if (existingDoc.hash !== hash) {
|
|
3513
|
+
insertContent(db, hash, docBody, new Date().toISOString());
|
|
3514
|
+
db.prepare(`UPDATE documents SET hash = ?, modified_at = ? WHERE id = ?`)
|
|
3515
|
+
.run(hash, new Date().toISOString(), existingDoc.id);
|
|
3516
|
+
}
|
|
3517
|
+
|
|
3518
|
+
db.prepare(`
|
|
3519
|
+
UPDATE beads_issues
|
|
3520
|
+
SET status = ?, priority = ?, assignee = ?, last_synced_at = ?
|
|
3521
|
+
WHERE beads_id = ?
|
|
3522
|
+
`).run(
|
|
3523
|
+
issue.status,
|
|
3524
|
+
issue.priority,
|
|
3525
|
+
issue.assignee || null,
|
|
3526
|
+
new Date().toISOString(),
|
|
3527
|
+
issue.id
|
|
3528
|
+
);
|
|
3529
|
+
synced++;
|
|
3530
|
+
} else {
|
|
3531
|
+
insertContent(db, hash, docBody, issue.created_at);
|
|
3532
|
+
insertDocument(db, 'beads', docPath, issue.title, hash, issue.created_at, issue.created_at);
|
|
3533
|
+
|
|
3534
|
+
const newDoc = findActiveDocument(db, 'beads', docPath);
|
|
3535
|
+
if (!newDoc) {
|
|
3536
|
+
console.warn(`[beads] Failed to insert document for ${issue.id}`);
|
|
3537
|
+
continue;
|
|
3538
|
+
}
|
|
3539
|
+
|
|
3540
|
+
db.prepare(`
|
|
3541
|
+
INSERT INTO beads_issues (
|
|
3542
|
+
beads_id, doc_id, issue_type, status, priority, tags,
|
|
3543
|
+
assignee, parent_id, created_at, closed_at, last_synced_at
|
|
3544
|
+
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
3545
|
+
`).run(
|
|
3546
|
+
issue.id,
|
|
3547
|
+
newDoc.id,
|
|
3548
|
+
issue.type || issue.issue_type || "task",
|
|
3549
|
+
issue.status,
|
|
3550
|
+
issue.priority,
|
|
3551
|
+
JSON.stringify(issue.labels || issue.tags || []),
|
|
3552
|
+
issue.assignee || null,
|
|
3553
|
+
issue.parent || null,
|
|
3554
|
+
issue.created_at,
|
|
3555
|
+
issue.closed_at || null,
|
|
3556
|
+
new Date().toISOString()
|
|
3557
|
+
);
|
|
3558
|
+
|
|
3559
|
+
newDocIds.push(newDoc.id);
|
|
3560
|
+
created++;
|
|
3561
|
+
}
|
|
3562
|
+
}
|
|
3563
|
+
|
|
3564
|
+
// Second pass: insert all dependencies from Dolt (richer than legacy blocks-only)
|
|
3565
|
+
for (const issue of issues) {
|
|
3566
|
+
if (!issue.dependencies || issue.dependencies.length === 0) continue;
|
|
3567
|
+
for (const dep of issue.dependencies) {
|
|
3568
|
+
db.prepare(`
|
|
3569
|
+
INSERT OR IGNORE INTO beads_dependencies (source_id, target_id, dep_type, created_at)
|
|
3570
|
+
VALUES (?, ?, ?, ?)
|
|
3571
|
+
`).run(dep.issue_id, dep.depends_on_id, dep.type, dep.created_at || new Date().toISOString());
|
|
3572
|
+
}
|
|
3573
|
+
}
|
|
3574
|
+
|
|
3575
|
+
// Third pass: bridge beads_dependencies → memory_relations for MAGMA graph traversal
|
|
3576
|
+
const depTypeMap: Record<string, string> = {
|
|
3577
|
+
'blocks': 'causal',
|
|
3578
|
+
'conditional-blocks': 'causal',
|
|
3579
|
+
'waits-for': 'causal',
|
|
3580
|
+
'caused-by': 'causal',
|
|
3581
|
+
'discovered-from': 'supporting',
|
|
3582
|
+
'supersedes': 'supporting',
|
|
3583
|
+
'duplicates': 'supporting',
|
|
3584
|
+
'relates-to': 'semantic',
|
|
3585
|
+
'related': 'semantic',
|
|
3586
|
+
'parent-child': 'semantic',
|
|
3587
|
+
};
|
|
3588
|
+
|
|
3589
|
+
const allDeps = db.prepare(`SELECT source_id, target_id, dep_type FROM beads_dependencies`).all() as {
|
|
3590
|
+
source_id: string; target_id: string; dep_type: string;
|
|
3591
|
+
}[];
|
|
3592
|
+
|
|
3593
|
+
for (const dep of allDeps) {
|
|
3594
|
+
const relationType = depTypeMap[dep.dep_type] || 'semantic';
|
|
3595
|
+
|
|
3596
|
+
const sourceRow = db.prepare(`SELECT doc_id FROM beads_issues WHERE beads_id = ?`).get(dep.source_id) as { doc_id: number } | undefined;
|
|
3597
|
+
const targetRow = db.prepare(`SELECT doc_id FROM beads_issues WHERE beads_id = ?`).get(dep.target_id) as { doc_id: number } | undefined;
|
|
3598
|
+
|
|
3599
|
+
if (sourceRow && targetRow) {
|
|
3600
|
+
db.prepare(`
|
|
3601
|
+
INSERT OR IGNORE INTO memory_relations (source_id, target_id, relation_type, weight, metadata, created_at)
|
|
3602
|
+
VALUES (?, ?, ?, 1.0, ?, ?)
|
|
3603
|
+
`).run(
|
|
3604
|
+
sourceRow.doc_id,
|
|
3605
|
+
targetRow.doc_id,
|
|
3606
|
+
relationType,
|
|
3607
|
+
JSON.stringify({ origin: 'beads', dep_type: dep.dep_type }),
|
|
3608
|
+
new Date().toISOString()
|
|
3609
|
+
);
|
|
3610
|
+
}
|
|
3611
|
+
}
|
|
3612
|
+
|
|
3613
|
+
return { synced, created, newDocIds };
|
|
3614
|
+
}
|
|
3615
|
+
|
|
3616
|
+
/**
|
|
3617
|
+
* Export for MCP tool registration.
|
|
3618
|
+
*/
|
|
3619
|
+
export { detectBeadsProject };
|
|
3620
|
+
|
|
3621
|
+
// =============================================================================
|
|
3622
|
+
// MAGMA Graph Building
|
|
3623
|
+
// =============================================================================
|
|
3624
|
+
|
|
3625
|
+
/**
|
|
3626
|
+
* Build temporal backbone - connect documents in chronological order.
|
|
3627
|
+
* Returns number of edges created.
|
|
3628
|
+
*/
|
|
3629
|
+
export function buildTemporalBackbone(db: Database): number {
|
|
3630
|
+
// Get all documents ordered by creation time
|
|
3631
|
+
const docs = db.prepare(`
|
|
3632
|
+
SELECT id, created_at, modified_at
|
|
3633
|
+
FROM documents
|
|
3634
|
+
WHERE active = 1
|
|
3635
|
+
ORDER BY created_at ASC
|
|
3636
|
+
`).all() as { id: number; created_at: string; modified_at: string }[];
|
|
3637
|
+
|
|
3638
|
+
let edges = 0;
|
|
3639
|
+
|
|
3640
|
+
// Create temporal edges between consecutive documents
|
|
3641
|
+
for (let i = 1; i < docs.length; i++) {
|
|
3642
|
+
const prev = docs[i - 1]!;
|
|
3643
|
+
const curr = docs[i]!;
|
|
3644
|
+
|
|
3645
|
+
db.prepare(`
|
|
3646
|
+
INSERT OR IGNORE INTO memory_relations (source_id, target_id, relation_type, weight, created_at)
|
|
3647
|
+
VALUES (?, ?, 'temporal', 1.0, ?)
|
|
3648
|
+
`).run(prev.id, curr.id, new Date().toISOString());
|
|
3649
|
+
|
|
3650
|
+
edges++;
|
|
3651
|
+
}
|
|
3652
|
+
|
|
3653
|
+
return edges;
|
|
3654
|
+
}
|
|
3655
|
+
|
|
3656
|
+
/**
|
|
3657
|
+
* Build semantic graph from existing embeddings.
|
|
3658
|
+
* Connects documents with similarity > threshold.
|
|
3659
|
+
* Returns number of edges created.
|
|
3660
|
+
*/
|
|
3661
|
+
export async function buildSemanticGraph(
|
|
3662
|
+
db: Database,
|
|
3663
|
+
threshold: number = 0.7
|
|
3664
|
+
): Promise<number> {
|
|
3665
|
+
// Query all documents with embeddings
|
|
3666
|
+
const docs = db.prepare(`
|
|
3667
|
+
SELECT DISTINCT d.id, d.hash
|
|
3668
|
+
FROM documents d
|
|
3669
|
+
JOIN content_vectors cv ON d.hash = cv.hash
|
|
3670
|
+
WHERE d.active = 1 AND cv.seq = 0
|
|
3671
|
+
`).all() as { id: number; hash: string }[];
|
|
3672
|
+
|
|
3673
|
+
let edges = 0;
|
|
3674
|
+
|
|
3675
|
+
// For each document, find similar neighbors
|
|
3676
|
+
for (let i = 0; i < docs.length; i++) {
|
|
3677
|
+
const doc1 = docs[i]!;
|
|
3678
|
+
|
|
3679
|
+
// Find similar documents above threshold
|
|
3680
|
+
const similar = db.prepare(`
|
|
3681
|
+
SELECT
|
|
3682
|
+
d2.id as target_id,
|
|
3683
|
+
vec_distance_cosine(v1.embedding, v2.embedding) as distance
|
|
3684
|
+
FROM vectors_vec v1, vectors_vec v2
|
|
3685
|
+
JOIN documents d2 ON v2.hash_seq = d2.hash || '_0'
|
|
3686
|
+
WHERE v1.hash_seq = ? || '_0'
|
|
3687
|
+
AND d2.id != ?
|
|
3688
|
+
AND d2.active = 1
|
|
3689
|
+
AND vec_distance_cosine(v1.embedding, v2.embedding) < ?
|
|
3690
|
+
ORDER BY distance
|
|
3691
|
+
LIMIT 10
|
|
3692
|
+
`).all(doc1.hash, doc1.id, 1 - threshold) as { target_id: number; distance: number }[];
|
|
3693
|
+
|
|
3694
|
+
for (const sim of similar) {
|
|
3695
|
+
const similarity = 1 - sim.distance;
|
|
3696
|
+
db.prepare(`
|
|
3697
|
+
INSERT OR IGNORE INTO memory_relations (source_id, target_id, relation_type, weight, created_at)
|
|
3698
|
+
VALUES (?, ?, 'semantic', ?, ?)
|
|
3699
|
+
`).run(doc1.id, sim.target_id, similarity, new Date().toISOString());
|
|
3700
|
+
edges++;
|
|
3701
|
+
}
|
|
3702
|
+
}
|
|
3703
|
+
|
|
3704
|
+
return edges;
|
|
3705
|
+
}
|
|
3706
|
+
|
|
3707
|
+
// =============================================================================
|
|
3708
|
+
// A-MEM: Causal Graph Traversal
|
|
3709
|
+
// =============================================================================
|
|
3710
|
+
|
|
3711
|
+
export type CausalLink = {
|
|
3712
|
+
docId: number;
|
|
3713
|
+
title: string;
|
|
3714
|
+
filepath: string;
|
|
3715
|
+
depth: number;
|
|
3716
|
+
weight: number;
|
|
3717
|
+
reasoning: string | null;
|
|
3718
|
+
};
|
|
3719
|
+
|
|
3720
|
+
export function findCausalLinks(
|
|
3721
|
+
db: Database,
|
|
3722
|
+
docId: number,
|
|
3723
|
+
direction: 'causes' | 'caused_by' | 'both' = 'both',
|
|
3724
|
+
maxDepth: number = 5
|
|
3725
|
+
): CausalLink[] {
|
|
3726
|
+
if (maxDepth < 1) maxDepth = 1;
|
|
3727
|
+
if (maxDepth > 10) maxDepth = 10;
|
|
3728
|
+
|
|
3729
|
+
let query: string;
|
|
3730
|
+
|
|
3731
|
+
if (direction === 'causes') {
|
|
3732
|
+
// Outbound: documents this one causes
|
|
3733
|
+
query = `
|
|
3734
|
+
WITH RECURSIVE causal_chain(doc_id, depth, path) AS (
|
|
3735
|
+
-- Base case: immediate causal links outbound
|
|
3736
|
+
SELECT target_id, 1, json_array(?)
|
|
3737
|
+
FROM memory_relations
|
|
3738
|
+
WHERE source_id = ? AND relation_type = 'causal'
|
|
3739
|
+
|
|
3740
|
+
UNION ALL
|
|
3741
|
+
|
|
3742
|
+
-- Recursive case: follow the chain
|
|
3743
|
+
SELECT mr.target_id, cc.depth + 1, json_insert(cc.path, '$[#]', cc.doc_id)
|
|
3744
|
+
FROM memory_relations mr
|
|
3745
|
+
JOIN causal_chain cc ON mr.source_id = cc.doc_id
|
|
3746
|
+
WHERE cc.depth < ?
|
|
3747
|
+
AND mr.relation_type = 'causal'
|
|
3748
|
+
AND mr.target_id NOT IN (SELECT value FROM json_each(cc.path))
|
|
3749
|
+
)
|
|
3750
|
+
SELECT DISTINCT
|
|
3751
|
+
cc.doc_id as docId,
|
|
3752
|
+
d.title,
|
|
3753
|
+
d.collection || '/' || d.path as filepath,
|
|
3754
|
+
cc.depth,
|
|
3755
|
+
COALESCE(mr.weight, 1.0) as weight,
|
|
3756
|
+
json_extract(mr.metadata, '$.reasoning') as reasoning
|
|
3757
|
+
FROM causal_chain cc
|
|
3758
|
+
JOIN documents d ON d.id = cc.doc_id
|
|
3759
|
+
LEFT JOIN memory_relations mr ON (mr.source_id = ? AND mr.target_id = cc.doc_id AND mr.relation_type = 'causal')
|
|
3760
|
+
WHERE d.active = 1
|
|
3761
|
+
ORDER BY cc.depth, weight DESC
|
|
3762
|
+
`;
|
|
3763
|
+
return db.prepare(query).all(docId, docId, maxDepth, docId) as CausalLink[];
|
|
3764
|
+
} else if (direction === 'caused_by') {
|
|
3765
|
+
// Inbound: documents that cause this one
|
|
3766
|
+
query = `
|
|
3767
|
+
WITH RECURSIVE causal_chain(doc_id, depth, path) AS (
|
|
3768
|
+
-- Base case: immediate causal links inbound
|
|
3769
|
+
SELECT source_id, 1, json_array(?)
|
|
3770
|
+
FROM memory_relations
|
|
3771
|
+
WHERE target_id = ? AND relation_type = 'causal'
|
|
3772
|
+
|
|
3773
|
+
UNION ALL
|
|
3774
|
+
|
|
3775
|
+
-- Recursive case: follow the chain
|
|
3776
|
+
SELECT mr.source_id, cc.depth + 1, json_insert(cc.path, '$[#]', cc.doc_id)
|
|
3777
|
+
FROM memory_relations mr
|
|
3778
|
+
JOIN causal_chain cc ON mr.target_id = cc.doc_id
|
|
3779
|
+
WHERE cc.depth < ?
|
|
3780
|
+
AND mr.relation_type = 'causal'
|
|
3781
|
+
AND mr.source_id NOT IN (SELECT value FROM json_each(cc.path))
|
|
3782
|
+
)
|
|
3783
|
+
SELECT DISTINCT
|
|
3784
|
+
cc.doc_id as docId,
|
|
3785
|
+
d.title,
|
|
3786
|
+
d.collection || '/' || d.path as filepath,
|
|
3787
|
+
cc.depth,
|
|
3788
|
+
COALESCE(mr.weight, 1.0) as weight,
|
|
3789
|
+
json_extract(mr.metadata, '$.reasoning') as reasoning
|
|
3790
|
+
FROM causal_chain cc
|
|
3791
|
+
JOIN documents d ON d.id = cc.doc_id
|
|
3792
|
+
LEFT JOIN memory_relations mr ON (mr.target_id = ? AND mr.source_id = cc.doc_id AND mr.relation_type = 'causal')
|
|
3793
|
+
WHERE d.active = 1
|
|
3794
|
+
ORDER BY cc.depth, weight DESC
|
|
3795
|
+
`;
|
|
3796
|
+
return db.prepare(query).all(docId, docId, maxDepth, docId) as CausalLink[];
|
|
3797
|
+
} else {
|
|
3798
|
+
// Both directions
|
|
3799
|
+
const outbound = findCausalLinks(db, docId, 'causes', maxDepth);
|
|
3800
|
+
const inbound = findCausalLinks(db, docId, 'caused_by', maxDepth);
|
|
3801
|
+
|
|
3802
|
+
// Merge and deduplicate
|
|
3803
|
+
const seen = new Set<number>();
|
|
3804
|
+
const merged: CausalLink[] = [];
|
|
3805
|
+
|
|
3806
|
+
for (const link of [...outbound, ...inbound]) {
|
|
3807
|
+
if (!seen.has(link.docId)) {
|
|
3808
|
+
seen.add(link.docId);
|
|
3809
|
+
merged.push(link);
|
|
3810
|
+
}
|
|
3811
|
+
}
|
|
3812
|
+
|
|
3813
|
+
return merged.sort((a, b) => a.depth - b.depth || b.weight - a.weight);
|
|
3814
|
+
}
|
|
3815
|
+
}
|
|
3816
|
+
|
|
3817
|
+
// =============================================================================
|
|
3818
|
+
// A-MEM: Memory Evolution Timeline
|
|
3819
|
+
// =============================================================================
|
|
3820
|
+
|
|
3821
|
+
export type EvolutionEntry = {
|
|
3822
|
+
version: number;
|
|
3823
|
+
triggeredBy: {
|
|
3824
|
+
docId: number;
|
|
3825
|
+
title: string;
|
|
3826
|
+
filepath: string;
|
|
3827
|
+
};
|
|
3828
|
+
previousKeywords: string[] | null;
|
|
3829
|
+
newKeywords: string[] | null;
|
|
3830
|
+
previousContext: string | null;
|
|
3831
|
+
newContext: string | null;
|
|
3832
|
+
reasoning: string | null;
|
|
3833
|
+
createdAt: string;
|
|
3834
|
+
};
|
|
3835
|
+
|
|
3836
|
+
export function getEvolutionTimeline(
|
|
3837
|
+
db: Database,
|
|
3838
|
+
docId: number,
|
|
3839
|
+
limit: number = 10
|
|
3840
|
+
): EvolutionEntry[] {
|
|
3841
|
+
if (limit < 1) limit = 1;
|
|
3842
|
+
if (limit > 100) limit = 100;
|
|
3843
|
+
|
|
3844
|
+
const query = `
|
|
3845
|
+
SELECT
|
|
3846
|
+
e.version,
|
|
3847
|
+
e.triggered_by,
|
|
3848
|
+
d.title as trigger_title,
|
|
3849
|
+
d.collection || '/' || d.path as trigger_filepath,
|
|
3850
|
+
e.previous_keywords,
|
|
3851
|
+
e.new_keywords,
|
|
3852
|
+
e.previous_context,
|
|
3853
|
+
e.new_context,
|
|
3854
|
+
e.reasoning,
|
|
3855
|
+
e.created_at
|
|
3856
|
+
FROM memory_evolution e
|
|
3857
|
+
JOIN documents d ON d.id = e.triggered_by
|
|
3858
|
+
WHERE e.memory_id = ?
|
|
3859
|
+
AND d.active = 1
|
|
3860
|
+
ORDER BY e.created_at DESC
|
|
3861
|
+
LIMIT ?
|
|
3862
|
+
`;
|
|
3863
|
+
|
|
3864
|
+
const rows = db.prepare(query).all(docId, limit) as Array<{
|
|
3865
|
+
version: number;
|
|
3866
|
+
triggered_by: number;
|
|
3867
|
+
trigger_title: string;
|
|
3868
|
+
trigger_filepath: string;
|
|
3869
|
+
previous_keywords: string | null;
|
|
3870
|
+
new_keywords: string | null;
|
|
3871
|
+
previous_context: string | null;
|
|
3872
|
+
new_context: string | null;
|
|
3873
|
+
reasoning: string | null;
|
|
3874
|
+
created_at: string;
|
|
3875
|
+
}>;
|
|
3876
|
+
|
|
3877
|
+
return rows.map(row => {
|
|
3878
|
+
// Parse JSON keywords if present
|
|
3879
|
+
let prevKeywords: string[] | null = null;
|
|
3880
|
+
let newKeywords: string[] | null = null;
|
|
3881
|
+
|
|
3882
|
+
try {
|
|
3883
|
+
prevKeywords = row.previous_keywords ? JSON.parse(row.previous_keywords) : null;
|
|
3884
|
+
} catch (e) {
|
|
3885
|
+
console.error('[amem] Failed to parse previous_keywords:', e);
|
|
3886
|
+
}
|
|
3887
|
+
|
|
3888
|
+
try {
|
|
3889
|
+
newKeywords = row.new_keywords ? JSON.parse(row.new_keywords) : null;
|
|
3890
|
+
} catch (e) {
|
|
3891
|
+
console.error('[amem] Failed to parse new_keywords:', e);
|
|
3892
|
+
}
|
|
3893
|
+
|
|
3894
|
+
return {
|
|
3895
|
+
version: row.version,
|
|
3896
|
+
triggeredBy: {
|
|
3897
|
+
docId: row.triggered_by,
|
|
3898
|
+
title: row.trigger_title,
|
|
3899
|
+
filepath: row.trigger_filepath,
|
|
3900
|
+
},
|
|
3901
|
+
previousKeywords: prevKeywords,
|
|
3902
|
+
newKeywords: newKeywords,
|
|
3903
|
+
previousContext: row.previous_context,
|
|
3904
|
+
newContext: row.new_context,
|
|
3905
|
+
reasoning: row.reasoning,
|
|
3906
|
+
createdAt: row.created_at,
|
|
3907
|
+
};
|
|
3908
|
+
});
|
|
3909
|
+
}
|
|
3910
|
+
|
|
3911
|
+
// ---------------------------------------------------------------------------
|
|
3912
|
+
// Lifecycle management functions
|
|
3913
|
+
// ---------------------------------------------------------------------------
|
|
3914
|
+
|
|
3915
|
+
function getArchiveCandidatesFn(
|
|
3916
|
+
db: Database,
|
|
3917
|
+
policy: import("./collections.ts").LifecyclePolicy
|
|
3918
|
+
): { id: number; collection: string; path: string; title: string; modified_at: string; last_accessed_at: string | null; content_type: string }[] {
|
|
3919
|
+
const now = new Date();
|
|
3920
|
+
const defaultDays = policy.archive_after_days;
|
|
3921
|
+
|
|
3922
|
+
const rows = db.prepare(`
|
|
3923
|
+
SELECT id, collection, path, title, modified_at, last_accessed_at, content_type
|
|
3924
|
+
FROM documents
|
|
3925
|
+
WHERE active = 1 AND pinned = 0
|
|
3926
|
+
AND (snoozed_until IS NULL OR snoozed_until = '' OR snoozed_until <= ?)
|
|
3927
|
+
`).all(now.toISOString()) as any[];
|
|
3928
|
+
|
|
3929
|
+
const candidates: any[] = [];
|
|
3930
|
+
for (const row of rows) {
|
|
3931
|
+
if (policy.exempt_collections.includes(row.collection)) continue;
|
|
3932
|
+
|
|
3933
|
+
const typeOverride = policy.type_overrides[row.content_type];
|
|
3934
|
+
if (typeOverride === null) continue;
|
|
3935
|
+
const thresholdDays = typeOverride ?? defaultDays;
|
|
3936
|
+
|
|
3937
|
+
const cutoff = new Date(now);
|
|
3938
|
+
cutoff.setDate(cutoff.getDate() - thresholdDays);
|
|
3939
|
+
const cutoffStr = cutoff.toISOString();
|
|
3940
|
+
|
|
3941
|
+
const modifiedStale = row.modified_at <= cutoffStr;
|
|
3942
|
+
const accessedStale = !row.last_accessed_at || row.last_accessed_at <= cutoffStr;
|
|
3943
|
+
|
|
3944
|
+
if (modifiedStale && accessedStale) {
|
|
3945
|
+
candidates.push(row);
|
|
3946
|
+
}
|
|
3947
|
+
}
|
|
3948
|
+
|
|
3949
|
+
return candidates;
|
|
3950
|
+
}
|
|
3951
|
+
|
|
3952
|
+
function restoreArchivedDocumentsFn(
|
|
3953
|
+
db: Database,
|
|
3954
|
+
filter: { ids?: number[]; collection?: string; sinceDate?: string }
|
|
3955
|
+
): number {
|
|
3956
|
+
let sql = "UPDATE documents SET active = 1, archived_at = NULL WHERE active = 0 AND archived_at IS NOT NULL";
|
|
3957
|
+
const params: any[] = [];
|
|
3958
|
+
|
|
3959
|
+
if (filter.ids?.length) {
|
|
3960
|
+
const placeholders = filter.ids.map(() => "?").join(",");
|
|
3961
|
+
sql += ` AND id IN (${placeholders})`;
|
|
3962
|
+
params.push(...filter.ids);
|
|
3963
|
+
}
|
|
3964
|
+
if (filter.collection) {
|
|
3965
|
+
sql += " AND collection = ?";
|
|
3966
|
+
params.push(filter.collection);
|
|
3967
|
+
}
|
|
3968
|
+
if (filter.sinceDate) {
|
|
3969
|
+
sql += " AND archived_at >= ?";
|
|
3970
|
+
params.push(filter.sinceDate);
|
|
3971
|
+
}
|
|
3972
|
+
|
|
3973
|
+
return db.prepare(sql).run(...params).changes;
|
|
3974
|
+
}
|
|
3975
|
+
|
|
3976
|
+
function purgeArchivedDocumentsFn(db: Database, olderThanDays: number): number {
|
|
3977
|
+
const cutoff = new Date();
|
|
3978
|
+
cutoff.setDate(cutoff.getDate() - olderThanDays);
|
|
3979
|
+
const result = db.prepare(`
|
|
3980
|
+
DELETE FROM documents WHERE active = 0 AND archived_at IS NOT NULL AND archived_at <= ?
|
|
3981
|
+
`).run(cutoff.toISOString());
|
|
3982
|
+
return result.changes;
|
|
3983
|
+
}
|
|
3984
|
+
|
|
3985
|
+
function getLifecycleStatsFn(db: Database): {
|
|
3986
|
+
active: number; archived: number; forgotten: number;
|
|
3987
|
+
pinned: number; snoozed: number;
|
|
3988
|
+
neverAccessed: number; oldestAccess: string | null;
|
|
3989
|
+
} {
|
|
3990
|
+
const row = db.prepare(`
|
|
3991
|
+
SELECT
|
|
3992
|
+
SUM(CASE WHEN active = 1 THEN 1 ELSE 0 END) as active,
|
|
3993
|
+
SUM(CASE WHEN active = 0 AND archived_at IS NOT NULL THEN 1 ELSE 0 END) as archived,
|
|
3994
|
+
SUM(CASE WHEN active = 0 AND archived_at IS NULL THEN 1 ELSE 0 END) as forgotten,
|
|
3995
|
+
SUM(CASE WHEN active = 1 AND pinned = 1 THEN 1 ELSE 0 END) as pinned,
|
|
3996
|
+
SUM(CASE WHEN active = 1 AND snoozed_until IS NOT NULL AND snoozed_until > datetime('now') THEN 1 ELSE 0 END) as snoozed,
|
|
3997
|
+
SUM(CASE WHEN active = 1 AND last_accessed_at IS NULL THEN 1 ELSE 0 END) as neverAccessed,
|
|
3998
|
+
MIN(CASE WHEN active = 1 AND last_accessed_at IS NOT NULL THEN last_accessed_at END) as oldestAccess
|
|
3999
|
+
FROM documents
|
|
4000
|
+
`).get() as any;
|
|
4001
|
+
|
|
4002
|
+
return {
|
|
4003
|
+
active: row?.active ?? 0,
|
|
4004
|
+
archived: row?.archived ?? 0,
|
|
4005
|
+
forgotten: row?.forgotten ?? 0,
|
|
4006
|
+
pinned: row?.pinned ?? 0,
|
|
4007
|
+
snoozed: row?.snoozed ?? 0,
|
|
4008
|
+
neverAccessed: row?.neverAccessed ?? 0,
|
|
4009
|
+
oldestAccess: row?.oldestAccess ?? null,
|
|
4010
|
+
};
|
|
4011
|
+
}
|
|
4012
|
+
|
|
4013
|
+
function searchArchivedFn(
|
|
4014
|
+
db: Database,
|
|
4015
|
+
query: string,
|
|
4016
|
+
limit: number = 20
|
|
4017
|
+
): { id: number; collection: string; path: string; title: string; archived_at: string; score: number }[] {
|
|
4018
|
+
const likePattern = `%${query}%`;
|
|
4019
|
+
const rows = db.prepare(`
|
|
4020
|
+
SELECT d.id, d.collection, d.path, d.title, d.archived_at
|
|
4021
|
+
FROM documents d
|
|
4022
|
+
LEFT JOIN content c ON c.hash = d.hash
|
|
4023
|
+
WHERE d.active = 0 AND d.archived_at IS NOT NULL
|
|
4024
|
+
AND (d.title LIKE ? OR d.path LIKE ? OR d.collection LIKE ? OR c.doc LIKE ?)
|
|
4025
|
+
LIMIT ?
|
|
4026
|
+
`).all(likePattern, likePattern, likePattern, likePattern, limit) as any[];
|
|
4027
|
+
|
|
4028
|
+
return rows.map((r: any) => ({
|
|
4029
|
+
id: r.id, collection: r.collection, path: r.path, title: r.title,
|
|
4030
|
+
archived_at: r.archived_at, score: 1.0,
|
|
4031
|
+
}));
|
|
4032
|
+
}
|
|
4033
|
+
|
|
4034
|
+
// =============================================================================
|
|
4035
|
+
// Vault-aware store resolution
|
|
4036
|
+
// =============================================================================
|
|
4037
|
+
|
|
4038
|
+
/**
|
|
4039
|
+
* Resolve a store by vault name. If no vault is specified, returns the default store.
|
|
4040
|
+
* Named vaults are configured via config.yaml or CLAWMEM_VAULTS env var.
|
|
4041
|
+
*
|
|
4042
|
+
* @param vault - Named vault (e.g., "work", "personal"). Omit for default.
|
|
4043
|
+
* @param opts - Store options (readonly, busyTimeout)
|
|
4044
|
+
*/
|
|
4045
|
+
export function resolveStore(
|
|
4046
|
+
vault?: string,
|
|
4047
|
+
opts?: { readonly?: boolean; busyTimeout?: number }
|
|
4048
|
+
): Store {
|
|
4049
|
+
if (!vault) {
|
|
4050
|
+
return createStore(undefined, opts);
|
|
4051
|
+
}
|
|
4052
|
+
|
|
4053
|
+
const vaultPath = getVaultPath(vault);
|
|
4054
|
+
if (!vaultPath) {
|
|
4055
|
+
throw new Error(
|
|
4056
|
+
`Unknown vault: "${vault}". Configure it via CLAWMEM_VAULTS env var ` +
|
|
4057
|
+
`or in ~/.config/clawmem/config.yaml under "vaults:".`
|
|
4058
|
+
);
|
|
4059
|
+
}
|
|
4060
|
+
|
|
4061
|
+
return createStore(vaultPath, opts);
|
|
4062
|
+
}
|