@loreai/core 0.0.1 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (147) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +26 -5
  3. package/dist/bun/agents-file.d.ts +59 -0
  4. package/dist/bun/agents-file.d.ts.map +1 -0
  5. package/dist/bun/config.d.ts +58 -0
  6. package/dist/bun/config.d.ts.map +1 -0
  7. package/dist/bun/curator.d.ts +35 -0
  8. package/dist/bun/curator.d.ts.map +1 -0
  9. package/dist/bun/db/driver.bun.d.ts +5 -0
  10. package/dist/bun/db/driver.bun.d.ts.map +1 -0
  11. package/dist/bun/db/driver.node.d.ts +15 -0
  12. package/dist/bun/db/driver.node.d.ts.map +1 -0
  13. package/dist/bun/db.d.ts +22 -0
  14. package/dist/bun/db.d.ts.map +1 -0
  15. package/dist/bun/distillation.d.ts +32 -0
  16. package/dist/bun/distillation.d.ts.map +1 -0
  17. package/dist/bun/embedding.d.ts +90 -0
  18. package/dist/bun/embedding.d.ts.map +1 -0
  19. package/dist/bun/gradient.d.ts +73 -0
  20. package/dist/bun/gradient.d.ts.map +1 -0
  21. package/dist/bun/index.d.ts +19 -0
  22. package/dist/bun/index.d.ts.map +1 -0
  23. package/dist/bun/index.js +28236 -0
  24. package/dist/bun/index.js.map +7 -0
  25. package/dist/bun/lat-reader.d.ts +69 -0
  26. package/dist/bun/lat-reader.d.ts.map +1 -0
  27. package/dist/bun/log.d.ts +17 -0
  28. package/dist/bun/log.d.ts.map +1 -0
  29. package/dist/bun/ltm.d.ts +138 -0
  30. package/dist/bun/ltm.d.ts.map +1 -0
  31. package/dist/bun/markdown.d.ts +37 -0
  32. package/dist/bun/markdown.d.ts.map +1 -0
  33. package/dist/bun/prompt.d.ts +47 -0
  34. package/dist/bun/prompt.d.ts.map +1 -0
  35. package/dist/bun/recall.d.ts +41 -0
  36. package/dist/bun/recall.d.ts.map +1 -0
  37. package/dist/bun/search.d.ts +113 -0
  38. package/dist/bun/search.d.ts.map +1 -0
  39. package/dist/bun/temporal.d.ts +66 -0
  40. package/dist/bun/temporal.d.ts.map +1 -0
  41. package/dist/bun/types.d.ts +180 -0
  42. package/dist/bun/types.d.ts.map +1 -0
  43. package/dist/bun/worker.d.ts +6 -0
  44. package/dist/bun/worker.d.ts.map +1 -0
  45. package/dist/node/agents-file.d.ts +59 -0
  46. package/dist/node/agents-file.d.ts.map +1 -0
  47. package/dist/node/config.d.ts +58 -0
  48. package/dist/node/config.d.ts.map +1 -0
  49. package/dist/node/curator.d.ts +35 -0
  50. package/dist/node/curator.d.ts.map +1 -0
  51. package/dist/node/db/driver.bun.d.ts +5 -0
  52. package/dist/node/db/driver.bun.d.ts.map +1 -0
  53. package/dist/node/db/driver.node.d.ts +15 -0
  54. package/dist/node/db/driver.node.d.ts.map +1 -0
  55. package/dist/node/db.d.ts +22 -0
  56. package/dist/node/db.d.ts.map +1 -0
  57. package/dist/node/distillation.d.ts +32 -0
  58. package/dist/node/distillation.d.ts.map +1 -0
  59. package/dist/node/embedding.d.ts +90 -0
  60. package/dist/node/embedding.d.ts.map +1 -0
  61. package/dist/node/gradient.d.ts +73 -0
  62. package/dist/node/gradient.d.ts.map +1 -0
  63. package/dist/node/index.d.ts +19 -0
  64. package/dist/node/index.d.ts.map +1 -0
  65. package/dist/node/index.js +28253 -0
  66. package/dist/node/index.js.map +7 -0
  67. package/dist/node/lat-reader.d.ts +69 -0
  68. package/dist/node/lat-reader.d.ts.map +1 -0
  69. package/dist/node/log.d.ts +17 -0
  70. package/dist/node/log.d.ts.map +1 -0
  71. package/dist/node/ltm.d.ts +138 -0
  72. package/dist/node/ltm.d.ts.map +1 -0
  73. package/dist/node/markdown.d.ts +37 -0
  74. package/dist/node/markdown.d.ts.map +1 -0
  75. package/dist/node/prompt.d.ts +47 -0
  76. package/dist/node/prompt.d.ts.map +1 -0
  77. package/dist/node/recall.d.ts +41 -0
  78. package/dist/node/recall.d.ts.map +1 -0
  79. package/dist/node/search.d.ts +113 -0
  80. package/dist/node/search.d.ts.map +1 -0
  81. package/dist/node/temporal.d.ts +66 -0
  82. package/dist/node/temporal.d.ts.map +1 -0
  83. package/dist/node/types.d.ts +180 -0
  84. package/dist/node/types.d.ts.map +1 -0
  85. package/dist/node/worker.d.ts +6 -0
  86. package/dist/node/worker.d.ts.map +1 -0
  87. package/dist/types/agents-file.d.ts +59 -0
  88. package/dist/types/agents-file.d.ts.map +1 -0
  89. package/dist/types/config.d.ts +58 -0
  90. package/dist/types/config.d.ts.map +1 -0
  91. package/dist/types/curator.d.ts +35 -0
  92. package/dist/types/curator.d.ts.map +1 -0
  93. package/dist/types/db/driver.bun.d.ts +5 -0
  94. package/dist/types/db/driver.bun.d.ts.map +1 -0
  95. package/dist/types/db/driver.node.d.ts +15 -0
  96. package/dist/types/db/driver.node.d.ts.map +1 -0
  97. package/dist/types/db.d.ts +22 -0
  98. package/dist/types/db.d.ts.map +1 -0
  99. package/dist/types/distillation.d.ts +32 -0
  100. package/dist/types/distillation.d.ts.map +1 -0
  101. package/dist/types/embedding.d.ts +90 -0
  102. package/dist/types/embedding.d.ts.map +1 -0
  103. package/dist/types/gradient.d.ts +73 -0
  104. package/dist/types/gradient.d.ts.map +1 -0
  105. package/dist/types/index.d.ts +19 -0
  106. package/dist/types/index.d.ts.map +1 -0
  107. package/dist/types/lat-reader.d.ts +69 -0
  108. package/dist/types/lat-reader.d.ts.map +1 -0
  109. package/dist/types/log.d.ts +17 -0
  110. package/dist/types/log.d.ts.map +1 -0
  111. package/dist/types/ltm.d.ts +138 -0
  112. package/dist/types/ltm.d.ts.map +1 -0
  113. package/dist/types/markdown.d.ts +37 -0
  114. package/dist/types/markdown.d.ts.map +1 -0
  115. package/dist/types/prompt.d.ts +47 -0
  116. package/dist/types/prompt.d.ts.map +1 -0
  117. package/dist/types/recall.d.ts +41 -0
  118. package/dist/types/recall.d.ts.map +1 -0
  119. package/dist/types/search.d.ts +113 -0
  120. package/dist/types/search.d.ts.map +1 -0
  121. package/dist/types/temporal.d.ts +66 -0
  122. package/dist/types/temporal.d.ts.map +1 -0
  123. package/dist/types/types.d.ts +180 -0
  124. package/dist/types/types.d.ts.map +1 -0
  125. package/dist/types/worker.d.ts +6 -0
  126. package/dist/types/worker.d.ts.map +1 -0
  127. package/package.json +48 -5
  128. package/src/agents-file.ts +406 -0
  129. package/src/config.ts +132 -0
  130. package/src/curator.ts +220 -0
  131. package/src/db/driver.bun.ts +18 -0
  132. package/src/db/driver.node.ts +54 -0
  133. package/src/db.ts +433 -0
  134. package/src/distillation.ts +433 -0
  135. package/src/embedding.ts +528 -0
  136. package/src/gradient.ts +1387 -0
  137. package/src/index.ts +109 -0
  138. package/src/lat-reader.ts +374 -0
  139. package/src/log.ts +27 -0
  140. package/src/ltm.ts +861 -0
  141. package/src/markdown.ts +129 -0
  142. package/src/prompt.ts +454 -0
  143. package/src/recall.ts +446 -0
  144. package/src/search.ts +330 -0
  145. package/src/temporal.ts +379 -0
  146. package/src/types.ts +199 -0
  147. package/src/worker.ts +26 -0
package/src/ltm.ts ADDED
@@ -0,0 +1,861 @@
1
+ import { uuidv7 } from "uuidv7";
2
+ import { db, ensureProject } from "./db";
3
+ import { config } from "./config";
4
+ import { ftsQuery, ftsQueryOr, EMPTY_QUERY, extractTopTerms } from "./search";
5
+ import * as embedding from "./embedding";
6
+ import * as latReader from "./lat-reader";
7
+ import * as log from "./log";
8
+
9
+ // ~3 chars per token — validated as best heuristic against real API data.
10
+ function estimateTokens(text: string): number {
11
+ return Math.ceil(text.length / 3);
12
+ }
13
+
14
+ export type KnowledgeEntry = {
15
+ id: string;
16
+ project_id: string | null;
17
+ category: string;
18
+ title: string;
19
+ content: string;
20
+ source_session: string | null;
21
+ cross_project: number;
22
+ confidence: number;
23
+ created_at: number;
24
+ updated_at: number;
25
+ metadata: string | null;
26
+ };
27
+
28
+ /** Columns to select for KnowledgeEntry — excludes the embedding BLOB
29
+ * (4KB per entry) which is only needed by vectorSearch() in embedding.ts. */
30
+ const KNOWLEDGE_COLS =
31
+ "id, project_id, category, title, content, source_session, cross_project, confidence, created_at, updated_at, metadata";
32
+
33
+ /** Same columns with table alias prefix for use in JOIN queries. */
34
+ const KNOWLEDGE_COLS_K =
35
+ "k.id, k.project_id, k.category, k.title, k.content, k.source_session, k.cross_project, k.confidence, k.created_at, k.updated_at, k.metadata";
36
+
37
+ export function create(input: {
38
+ projectPath?: string;
39
+ category: string;
40
+ title: string;
41
+ content: string;
42
+ session?: string;
43
+ scope: "project" | "global";
44
+ crossProject?: boolean;
45
+ /** Explicit ID to use — for cross-machine import via agents-file. Defaults to a new UUIDv7. */
46
+ id?: string;
47
+ }): string {
48
+ const pid =
49
+ input.scope === "project" && input.projectPath
50
+ ? ensureProject(input.projectPath)
51
+ : null;
52
+
53
+ // Dedup guard: if an entry with the same project_id + title already exists,
54
+ // update its content instead of inserting a duplicate. This prevents the
55
+ // curator from creating multiple entries for the same concept across sessions.
56
+ // Also checks cross-project entries to prevent the curator from creating
57
+ // project-scoped duplicates of globally-shared knowledge.
58
+ // Note: when an explicit id is provided (cross-machine import), skip dedup —
59
+ // the caller (importFromFile) already handles duplicate detection by UUID.
60
+ if (!input.id) {
61
+ // First check same project_id
62
+ const existing = (
63
+ pid !== null
64
+ ? db()
65
+ .query(
66
+ "SELECT id FROM knowledge WHERE project_id = ? AND LOWER(title) = LOWER(?) AND confidence > 0 LIMIT 1",
67
+ )
68
+ .get(pid, input.title)
69
+ : db()
70
+ .query(
71
+ "SELECT id FROM knowledge WHERE project_id IS NULL AND LOWER(title) = LOWER(?) AND confidence > 0 LIMIT 1",
72
+ )
73
+ .get(input.title)
74
+ ) as { id: string } | null;
75
+
76
+ if (existing) {
77
+ update(existing.id, { content: input.content });
78
+ return existing.id;
79
+ }
80
+
81
+ // Also check cross-project entries — prevents creating project-scoped
82
+ // duplicates of entries that already exist as cross-project knowledge.
83
+ const crossExisting = db()
84
+ .query(
85
+ "SELECT id FROM knowledge WHERE cross_project = 1 AND LOWER(title) = LOWER(?) AND confidence > 0 LIMIT 1",
86
+ )
87
+ .get(input.title) as { id: string } | null;
88
+
89
+ if (crossExisting) {
90
+ update(crossExisting.id, { content: input.content });
91
+ return crossExisting.id;
92
+ }
93
+ }
94
+
95
+ const id = input.id ?? uuidv7();
96
+ const now = Date.now();
97
+ db()
98
+ .query(
99
+ `INSERT INTO knowledge (id, project_id, category, title, content, source_session, cross_project, confidence, created_at, updated_at)
100
+ VALUES (?, ?, ?, ?, ?, ?, ?, 1.0, ?, ?)`,
101
+ )
102
+ .run(
103
+ id,
104
+ pid,
105
+ input.category,
106
+ input.title,
107
+ input.content,
108
+ input.session ?? null,
109
+ (input.crossProject ?? false) ? 1 : 0,
110
+ now,
111
+ now,
112
+ );
113
+
114
+ // Fire-and-forget: embed for vector search (errors logged, never thrown)
115
+ if (embedding.isAvailable()) {
116
+ embedding.embedKnowledgeEntry(id, input.title, input.content);
117
+ }
118
+
119
+ return id;
120
+ }
121
+
122
+ export function update(
123
+ id: string,
124
+ input: { content?: string; confidence?: number },
125
+ ) {
126
+ const sets: string[] = [];
127
+ const params: unknown[] = [];
128
+ if (input.content !== undefined) {
129
+ sets.push("content = ?");
130
+ params.push(input.content);
131
+ }
132
+ if (input.confidence !== undefined) {
133
+ sets.push("confidence = ?");
134
+ params.push(input.confidence);
135
+ }
136
+ sets.push("updated_at = ?");
137
+ params.push(Date.now());
138
+ params.push(id);
139
+ db()
140
+ .query(`UPDATE knowledge SET ${sets.join(", ")} WHERE id = ?`)
141
+ .run(...(params as [string, ...string[]]));
142
+
143
+ // Re-embed when content changes (fire-and-forget)
144
+ if (embedding.isAvailable() && input.content !== undefined) {
145
+ const entry = get(id);
146
+ if (entry) {
147
+ embedding.embedKnowledgeEntry(id, entry.title, input.content);
148
+ }
149
+ }
150
+ }
151
+
152
+ export function remove(id: string) {
153
+ db().query("DELETE FROM knowledge WHERE id = ?").run(id);
154
+ }
155
+
156
+ export function forProject(
157
+ projectPath: string,
158
+ includeCross = true,
159
+ ): KnowledgeEntry[] {
160
+ const pid = ensureProject(projectPath);
161
+ if (includeCross) {
162
+ return db()
163
+ .query(
164
+ `SELECT ${KNOWLEDGE_COLS} FROM knowledge
165
+ WHERE (project_id = ? OR (project_id IS NULL) OR (cross_project = 1))
166
+ AND confidence > 0.2
167
+ ORDER BY confidence DESC, updated_at DESC`,
168
+ )
169
+ .all(pid) as KnowledgeEntry[];
170
+ }
171
+ return db()
172
+ .query(
173
+ `SELECT ${KNOWLEDGE_COLS} FROM knowledge
174
+ WHERE project_id = ?
175
+ AND confidence > 0.2
176
+ ORDER BY confidence DESC, updated_at DESC`,
177
+ )
178
+ .all(pid) as KnowledgeEntry[];
179
+ }
180
+
181
+ type Scored = { entry: KnowledgeEntry; score: number };
182
+
183
+ /** BM25 column weights for knowledge_fts: title, content, category.
184
+ * Reads from config().search.ftsWeights, falling back to defaults. */
185
+ function ftsWeights() {
186
+ return config().search.ftsWeights;
187
+ }
188
+
189
+ /** Max entries per pool to include on first turn when no session context exists. */
190
+ const NO_CONTEXT_FALLBACK_CAP = 10;
191
+
192
+ /** Number of top-confidence project entries always included as a safety net,
193
+ * even when they don't match any session context terms. This guards against
194
+ * the coarse term-overlap scoring accidentally excluding important project
195
+ * knowledge. */
196
+ const PROJECT_SAFETY_NET = 5;
197
+
198
+ /**
199
+ * Score entries by FTS5 BM25 relevance to session context.
200
+ *
201
+ * Uses OR semantics (not AND-then-OR) because we're scoring ALL candidates
202
+ * for relevance ranking, not searching for exact matches. An entry that
203
+ * matches 1 of 40 terms should still get a (low) score, not be excluded.
204
+ * BM25 naturally weights entries matching more terms higher.
205
+ *
206
+ * Returns a Map of entry ID → normalized score (0–1).
207
+ */
208
+ function scoreEntriesFTS(sessionContext: string): Map<string, number> {
209
+ const terms = extractTopTerms(sessionContext);
210
+ if (!terms.length) return new Map();
211
+
212
+ const q = terms.map((t) => `${t}*`).join(" OR ");
213
+ const { title, content, category } = ftsWeights();
214
+
215
+ try {
216
+ const results = db()
217
+ .query(
218
+ `SELECT k.id, bm25(knowledge_fts, ?, ?, ?) as rank
219
+ FROM knowledge k
220
+ JOIN knowledge_fts f ON k.rowid = f.rowid
221
+ WHERE knowledge_fts MATCH ?
222
+ AND k.confidence > 0.2`,
223
+ )
224
+ .all(title, content, category, q) as Array<{
225
+ id: string;
226
+ rank: number;
227
+ }>;
228
+
229
+ if (!results.length) return new Map();
230
+
231
+ // Normalize: BM25 rank is negative (more negative = better).
232
+ // Convert to 0–1 where 1 = best match.
233
+ const ranks = results.map((r) => r.rank);
234
+ const minRank = Math.min(...ranks);
235
+ const maxRank = Math.max(...ranks);
236
+ const scoreMap = new Map<string, number>();
237
+ for (const r of results) {
238
+ const norm =
239
+ minRank === maxRank ? 1 : (maxRank - r.rank) / (maxRank - minRank);
240
+ scoreMap.set(r.id, norm);
241
+ }
242
+ return scoreMap;
243
+ } catch {
244
+ return new Map();
245
+ }
246
+ }
247
+
248
+ /**
249
+ * Build a relevance-ranked, budget-capped list of knowledge entries for injection
250
+ * into the system prompt of a live session.
251
+ *
252
+ * Strategy:
253
+ * 1. Both project-specific and cross-project entries are scored for relevance
254
+ * against recent session context (last distillation + recent raw messages).
255
+ * 2. Project entries get a safety net: the top PROJECT_SAFETY_NET entries by
256
+ * confidence are always included even if they have zero relevance score.
257
+ * This ensures the most important project knowledge is never lost to
258
+ * coarse term-overlap scoring.
259
+ * 3. All scored entries are merged into a single pool and greedily packed
260
+ * into the token budget by score descending.
261
+ * 4. If there's no session context yet (first turn), fall back to top entries
262
+ * by confidence only (capped at NO_CONTEXT_FALLBACK_CAP per pool).
263
+ *
264
+ * @param projectPath Current project path
265
+ * @param sessionID Current session ID (for context extraction)
266
+ * @param maxTokens Hard token budget for the entire formatted block
267
+ */
268
+ export function forSession(
269
+ projectPath: string,
270
+ sessionID: string | undefined,
271
+ maxTokens: number,
272
+ ): KnowledgeEntry[] {
273
+ const pid = ensureProject(projectPath);
274
+
275
+ // --- 1. Load project-specific entries ---
276
+ const projectEntries = db()
277
+ .query(
278
+ `SELECT ${KNOWLEDGE_COLS} FROM knowledge
279
+ WHERE project_id = ? AND cross_project = 0 AND confidence > 0.2
280
+ ORDER BY confidence DESC, updated_at DESC`,
281
+ )
282
+ .all(pid) as KnowledgeEntry[];
283
+
284
+ // --- 2. Load cross-project candidates ---
285
+ const crossEntries = db()
286
+ .query(
287
+ `SELECT ${KNOWLEDGE_COLS} FROM knowledge
288
+ WHERE (project_id IS NULL OR cross_project = 1) AND confidence > 0.2
289
+ ORDER BY confidence DESC, updated_at DESC`,
290
+ )
291
+ .all() as KnowledgeEntry[];
292
+
293
+ if (!crossEntries.length && !projectEntries.length) return [];
294
+
295
+ // --- 3. Build session context for relevance scoring ---
296
+ let sessionContext = "";
297
+ if (sessionID) {
298
+ const distRow = db()
299
+ .query(
300
+ `SELECT observations FROM distillations
301
+ WHERE project_id = ? AND session_id = ?
302
+ ORDER BY created_at DESC LIMIT 1`,
303
+ )
304
+ .get(pid, sessionID) as { observations: string } | null;
305
+ if (distRow?.observations) {
306
+ sessionContext += distRow.observations + "\n";
307
+ }
308
+ const recentMsgs = db()
309
+ .query(
310
+ `SELECT content FROM temporal_messages
311
+ WHERE project_id = ? AND session_id = ?
312
+ ORDER BY created_at DESC LIMIT 10`,
313
+ )
314
+ .all(pid, sessionID) as Array<{ content: string }>;
315
+ if (recentMsgs.length) {
316
+ sessionContext += recentMsgs.map((m) => m.content).join("\n");
317
+ }
318
+ }
319
+
320
+ // --- 4. Score both pools by relevance ---
321
+ let scoredProject: Scored[];
322
+ let scoredCross: Scored[];
323
+
324
+ if (sessionContext.trim().length > 20) {
325
+ // Use FTS5 BM25 to score all knowledge entries against session context
326
+ const ftsScores = scoreEntriesFTS(sessionContext);
327
+
328
+ // Score project entries: FTS relevance × confidence, with safety net
329
+ const rawScored: Scored[] = projectEntries.map((entry) => ({
330
+ entry,
331
+ score: (ftsScores.get(entry.id) ?? 0) * entry.confidence,
332
+ }));
333
+ const matched = rawScored.filter((s) => s.score > 0);
334
+ const matchedIds = new Set(matched.map((s) => s.entry.id));
335
+
336
+ // Safety net: top PROJECT_SAFETY_NET entries by confidence that weren't already matched.
337
+ // Given a tiny score (0.001 * confidence) so they sort below genuinely matched entries.
338
+ const safetyNet = projectEntries
339
+ .filter((e) => !matchedIds.has(e.id))
340
+ .slice(0, PROJECT_SAFETY_NET)
341
+ .map((e) => ({ entry: e, score: 0.001 * e.confidence }));
342
+
343
+ scoredProject = [...matched, ...safetyNet];
344
+
345
+ // Score cross-project entries — only include entries with FTS match
346
+ scoredCross = crossEntries
347
+ .filter((e) => ftsScores.has(e.id))
348
+ .map((e) => ({
349
+ entry: e,
350
+ score: (ftsScores.get(e.id) ?? 0) * e.confidence,
351
+ }));
352
+ } else {
353
+ // No session context — fall back to top entries by confidence, capped
354
+ scoredProject = projectEntries
355
+ .slice(0, NO_CONTEXT_FALLBACK_CAP)
356
+ .map((entry) => ({ entry, score: entry.confidence }));
357
+ scoredCross = crossEntries
358
+ .slice(0, NO_CONTEXT_FALLBACK_CAP)
359
+ .map((entry) => ({ entry, score: entry.confidence }));
360
+ }
361
+
362
+ // --- 5. Merge and pack into token budget by score descending ---
363
+ const allScored = [...scoredProject, ...scoredCross];
364
+ allScored.sort((a, b) => b.score - a.score);
365
+
366
+ const HEADER_OVERHEAD_TOKENS = 15;
367
+ let used = HEADER_OVERHEAD_TOKENS;
368
+ const result: KnowledgeEntry[] = [];
369
+
370
+ for (const { entry } of allScored) {
371
+ if (used >= maxTokens) break;
372
+ const cost = estimateTokens(entry.title + entry.content) + 10;
373
+ if (used + cost > maxTokens) continue;
374
+ result.push(entry);
375
+ used += cost;
376
+ }
377
+
378
+ // --- 6. Pack lat.md sections into remaining budget ---
379
+ // lat.md sections compete for the remaining token budget (shared LTM pool).
380
+ // They are scored separately by BM25 relevance against the same session context.
381
+ if (latReader.hasLatDir(projectPath) && used < maxTokens) {
382
+ const latSections = latReader.scoreForSession(
383
+ projectPath,
384
+ sessionContext,
385
+ maxTokens - used,
386
+ );
387
+ for (const section of latSections) {
388
+ if (used >= maxTokens) break;
389
+ const display = section.first_paragraph ?? section.content;
390
+ const cost = estimateTokens(section.heading + display) + 10;
391
+ if (used + cost > maxTokens) continue;
392
+ // Convert lat section to a synthetic KnowledgeEntry for formatKnowledge()
393
+ result.push({
394
+ id: section.id,
395
+ project_id: section.project_id,
396
+ category: "lat.md",
397
+ title: `[${section.file}] ${section.heading}`,
398
+ content: display,
399
+ source_session: null,
400
+ cross_project: 0,
401
+ confidence: 1.0,
402
+ created_at: section.updated_at,
403
+ updated_at: section.updated_at,
404
+ metadata: null,
405
+ });
406
+ used += cost;
407
+ }
408
+ }
409
+
410
+ return result;
411
+ }
412
+
413
+ export function all(): KnowledgeEntry[] {
414
+ return db()
415
+ .query(
416
+ `SELECT ${KNOWLEDGE_COLS} FROM knowledge WHERE confidence > 0.2 ORDER BY confidence DESC, updated_at DESC`,
417
+ )
418
+ .all() as KnowledgeEntry[];
419
+ }
420
+
421
+ // LIKE-based fallback for when FTS5 fails unexpectedly.
422
+ function searchLike(input: {
423
+ query: string;
424
+ projectPath?: string;
425
+ limit: number;
426
+ }): KnowledgeEntry[] {
427
+ const terms = input.query
428
+ .toLowerCase()
429
+ .split(/\s+/)
430
+ .filter((t) => t.length > 2);
431
+ if (!terms.length) return [];
432
+ const conditions = terms
433
+ .map(() => "(LOWER(title) LIKE ? OR LOWER(content) LIKE ?)")
434
+ .join(" AND ");
435
+ const likeParams = terms.flatMap((t) => [`%${t}%`, `%${t}%`]);
436
+ if (input.projectPath) {
437
+ const pid = ensureProject(input.projectPath);
438
+ return db()
439
+ .query(
440
+ `SELECT ${KNOWLEDGE_COLS} FROM knowledge WHERE (project_id = ? OR project_id IS NULL OR cross_project = 1) AND confidence > 0.2 AND ${conditions} ORDER BY updated_at DESC LIMIT ?`,
441
+ )
442
+ .all(pid, ...likeParams, input.limit) as KnowledgeEntry[];
443
+ }
444
+ return db()
445
+ .query(
446
+ `SELECT ${KNOWLEDGE_COLS} FROM knowledge WHERE confidence > 0.2 AND ${conditions} ORDER BY updated_at DESC LIMIT ?`,
447
+ )
448
+ .all(...likeParams, input.limit) as KnowledgeEntry[];
449
+ }
450
+
451
+ export function search(input: {
452
+ query: string;
453
+ projectPath?: string;
454
+ limit?: number;
455
+ }): KnowledgeEntry[] {
456
+ const limit = input.limit ?? 20;
457
+ const q = ftsQuery(input.query);
458
+ if (q === EMPTY_QUERY) return [];
459
+
460
+ const pid = input.projectPath ? ensureProject(input.projectPath) : null;
461
+
462
+ const ftsSQL = pid
463
+ ? `SELECT ${KNOWLEDGE_COLS_K} FROM knowledge k
464
+ JOIN knowledge_fts f ON k.rowid = f.rowid
465
+ WHERE knowledge_fts MATCH ?
466
+ AND (k.project_id = ? OR k.project_id IS NULL OR k.cross_project = 1)
467
+ AND k.confidence > 0.2
468
+ ORDER BY bm25(knowledge_fts, ?, ?, ?) LIMIT ?`
469
+ : `SELECT ${KNOWLEDGE_COLS_K} FROM knowledge k
470
+ JOIN knowledge_fts f ON k.rowid = f.rowid
471
+ WHERE knowledge_fts MATCH ?
472
+ AND k.confidence > 0.2
473
+ ORDER BY bm25(knowledge_fts, ?, ?, ?) LIMIT ?`;
474
+
475
+ const { title, content, category } = ftsWeights();
476
+ const ftsParams = pid
477
+ ? [q, pid, title, content, category, limit]
478
+ : [q, title, content, category, limit];
479
+
480
+ try {
481
+ const results = db().query(ftsSQL).all(...ftsParams) as KnowledgeEntry[];
482
+ if (results.length) return results;
483
+
484
+ // AND returned nothing — try OR fallback for broader recall
485
+ const qOr = ftsQueryOr(input.query);
486
+ if (qOr === EMPTY_QUERY) return [];
487
+
488
+ const ftsParamsOr = pid
489
+ ? [qOr, pid, title, content, category, limit]
490
+ : [qOr, title, content, category, limit];
491
+ return db().query(ftsSQL).all(...ftsParamsOr) as KnowledgeEntry[];
492
+ } catch {
493
+ return searchLike({
494
+ query: input.query,
495
+ projectPath: input.projectPath,
496
+ limit,
497
+ });
498
+ }
499
+ }
500
+
501
+ export type ScoredKnowledgeEntry = KnowledgeEntry & { rank: number };
502
+
503
+ /**
504
+ * Search with BM25 scores included. Returns results with raw FTS5 rank values
505
+ * for use in cross-source score fusion (RRF).
506
+ */
507
+ export function searchScored(input: {
508
+ query: string;
509
+ projectPath?: string;
510
+ limit?: number;
511
+ }): ScoredKnowledgeEntry[] {
512
+ const limit = input.limit ?? 20;
513
+ const q = ftsQuery(input.query);
514
+ if (q === EMPTY_QUERY) return [];
515
+
516
+ const pid = input.projectPath ? ensureProject(input.projectPath) : null;
517
+ const { title, content, category } = ftsWeights();
518
+
519
+ const ftsSQL = pid
520
+ ? `SELECT ${KNOWLEDGE_COLS_K}, bm25(knowledge_fts, ?, ?, ?) as rank FROM knowledge k
521
+ JOIN knowledge_fts f ON k.rowid = f.rowid
522
+ WHERE knowledge_fts MATCH ?
523
+ AND (k.project_id = ? OR k.project_id IS NULL OR k.cross_project = 1)
524
+ AND k.confidence > 0.2
525
+ ORDER BY rank LIMIT ?`
526
+ : `SELECT ${KNOWLEDGE_COLS_K}, bm25(knowledge_fts, ?, ?, ?) as rank FROM knowledge k
527
+ JOIN knowledge_fts f ON k.rowid = f.rowid
528
+ WHERE knowledge_fts MATCH ?
529
+ AND k.confidence > 0.2
530
+ ORDER BY rank LIMIT ?`;
531
+
532
+ const ftsParams = pid
533
+ ? [title, content, category, q, pid, limit]
534
+ : [title, content, category, q, limit];
535
+
536
+ try {
537
+ const results = db().query(ftsSQL).all(...ftsParams) as ScoredKnowledgeEntry[];
538
+ if (results.length) return results;
539
+
540
+ const qOr = ftsQueryOr(input.query);
541
+ if (qOr === EMPTY_QUERY) return [];
542
+ const ftsParamsOr = pid
543
+ ? [title, content, category, qOr, pid, limit]
544
+ : [title, content, category, qOr, limit];
545
+ return db().query(ftsSQL).all(...ftsParamsOr) as ScoredKnowledgeEntry[];
546
+ } catch {
547
+ return [];
548
+ }
549
+ }
550
+
551
+ /**
552
+ * Search knowledge entries from OTHER projects — entries that are project-specific
553
+ * (cross_project=0) and belong to a different project_id than the given one.
554
+ * Used by the recall tool in "all" scope to surface relevant knowledge from
555
+ * the user's other projects ("tunnel" discovery across projects).
556
+ */
557
+ export function searchScoredOtherProjects(input: {
558
+ query: string;
559
+ excludeProjectPath: string;
560
+ limit?: number;
561
+ }): ScoredKnowledgeEntry[] {
562
+ const limit = input.limit ?? 10;
563
+ const q = ftsQuery(input.query);
564
+ if (q === EMPTY_QUERY) return [];
565
+
566
+ const excludePid = ensureProject(input.excludeProjectPath);
567
+ const { title, content, category } = ftsWeights();
568
+
569
+ // Find entries from other projects that are NOT cross-project (those are
570
+ // already included in the normal search via the cross_project=1 filter).
571
+ // Also exclude entries with no project_id (global) — already included.
572
+ const ftsSQL = `SELECT ${KNOWLEDGE_COLS_K}, bm25(knowledge_fts, ?, ?, ?) as rank FROM knowledge k
573
+ JOIN knowledge_fts f ON k.rowid = f.rowid
574
+ WHERE knowledge_fts MATCH ?
575
+ AND k.project_id IS NOT NULL
576
+ AND k.project_id != ?
577
+ AND k.cross_project = 0
578
+ AND k.confidence > 0.2
579
+ ORDER BY rank LIMIT ?`;
580
+
581
+ const ftsParams = [title, content, category, q, excludePid, limit];
582
+
583
+ try {
584
+ const results = db().query(ftsSQL).all(...ftsParams) as ScoredKnowledgeEntry[];
585
+ if (results.length) return results;
586
+
587
+ // AND returned nothing — try OR fallback
588
+ const qOr = ftsQueryOr(input.query);
589
+ if (qOr === EMPTY_QUERY) return [];
590
+ const ftsParamsOr = [title, content, category, qOr, excludePid, limit];
591
+ return db().query(ftsSQL).all(...ftsParamsOr) as ScoredKnowledgeEntry[];
592
+ } catch {
593
+ return [];
594
+ }
595
+ }
596
+
597
+ export function get(id: string): KnowledgeEntry | null {
598
+ return db()
599
+ .query(`SELECT ${KNOWLEDGE_COLS} FROM knowledge WHERE id = ?`)
600
+ .get(id) as KnowledgeEntry | null;
601
+ }
602
+
603
+ /**
604
+ * Prune knowledge entries whose content exceeds maxLength characters.
605
+ * These are typically corrupted entries from AGENTS.md roundtrip escaping bugs
606
+ * or curator hallucinations with full code dumps.
607
+ *
608
+ * Rather than hard-deleting, sets confidence to 0 so they're excluded from
609
+ * queries (confidence > 0.2) but can be inspected for debugging.
610
+ *
611
+ * @returns Number of entries pruned
612
+ */
613
+ export function pruneOversized(maxLength: number): number {
614
+ const result = db()
615
+ .query(
616
+ "UPDATE knowledge SET confidence = 0, updated_at = ? WHERE LENGTH(content) > ? AND confidence > 0",
617
+ )
618
+ .run(Date.now(), maxLength);
619
+ // node:sqlite returns `changes` as `number | bigint`; coerce for cross-runtime parity.
620
+ return Number(result.changes);
621
+ }
622
+
623
+ // ---------------------------------------------------------------------------
624
+ // Wiki-link cross-references ([[entry-id]] / [[Entry Title]])
625
+ // ---------------------------------------------------------------------------
626
+
627
+ const UUID_RE = /^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/i;
628
+ const WIKI_LINK_RE = /\[\[([^\]]+)\]\]/g;
629
+
630
+ /**
631
+ * Resolve a wiki-link reference to a knowledge entry ID.
632
+ * - UUID format → direct O(1) lookup
633
+ * - Title text → FTS5 best-match search
634
+ * Returns null if the reference can't be resolved.
635
+ */
636
+ export function resolveRef(ref: string): string | null {
637
+ if (UUID_RE.test(ref)) {
638
+ const entry = get(ref);
639
+ return entry ? entry.id : null;
640
+ }
641
+ // Title search — FTS5 best match
642
+ const results = search({ query: ref, limit: 1 });
643
+ return results.length ? results[0].id : null;
644
+ }
645
+
646
+ /**
647
+ * Extract [[...]] wiki-link references from entry content.
648
+ * Returns the raw ref strings (UUIDs or titles).
649
+ */
650
+ export function extractRefs(content: string): string[] {
651
+ const refs: string[] = [];
652
+ let match;
653
+ const re = new RegExp(WIKI_LINK_RE.source, WIKI_LINK_RE.flags);
654
+ while ((match = re.exec(content)) !== null) {
655
+ refs.push(match[1]);
656
+ }
657
+ return refs;
658
+ }
659
+
660
+ /**
661
+ * Populate the knowledge_refs join table for an entry by resolving its [[...]] links.
662
+ * Clears existing outgoing refs for this entry first.
663
+ */
664
+ export function syncRefs(entryId: string): number {
665
+ const entry = get(entryId);
666
+ if (!entry) return 0;
667
+
668
+ // Clear existing outgoing refs
669
+ db().query("DELETE FROM knowledge_refs WHERE from_id = ?").run(entryId);
670
+
671
+ const refs = extractRefs(entry.content);
672
+ if (!refs.length) return 0;
673
+
674
+ let synced = 0;
675
+ const insertStmt = db().query(
676
+ "INSERT OR IGNORE INTO knowledge_refs (from_id, to_id) VALUES (?, ?)",
677
+ );
678
+
679
+ for (const ref of refs) {
680
+ const targetId = resolveRef(ref);
681
+ if (targetId && targetId !== entryId) {
682
+ insertStmt.run(entryId, targetId);
683
+ synced++;
684
+ }
685
+ }
686
+
687
+ return synced;
688
+ }
689
+
690
+ /**
691
+ * Cascade-replace an entry ID in all knowledge content and the refs table.
692
+ * Used when an entry ID changes (future-proofing — current consolidation
693
+ * uses update-in-place so IDs don't change, but the mechanism exists).
694
+ */
695
+ export function cascadeRefReplace(oldId: string, newId: string): number {
696
+ const oldRef = `[[${oldId}]]`;
697
+ const newRef = `[[${newId}]]`;
698
+
699
+ // Rewrite content in entries that reference the old ID
700
+ const result = db()
701
+ .query(
702
+ `UPDATE knowledge SET content = REPLACE(content, ?, ?), updated_at = ?
703
+ WHERE content LIKE ?`,
704
+ )
705
+ .run(oldRef, newRef, Date.now(), `%${oldRef}%`);
706
+
707
+ // Update the join table
708
+ db().query("UPDATE OR IGNORE knowledge_refs SET to_id = ? WHERE to_id = ?").run(newId, oldId);
709
+ db().query("UPDATE OR IGNORE knowledge_refs SET from_id = ? WHERE from_id = ?").run(newId, oldId);
710
+
711
+ // Clean up any rows that became self-referential
712
+ db().query("DELETE FROM knowledge_refs WHERE from_id = to_id").run();
713
+
714
+ // node:sqlite returns `changes` as `number | bigint`; coerce for cross-runtime parity.
715
+ return Number(result.changes);
716
+ }
717
+
718
+ /**
719
+ * Clean dead references — remove [[uuid]] patterns pointing to deleted entries.
720
+ * Strips dead refs from content and purges orphan knowledge_refs rows.
721
+ *
722
+ * @returns Number of entries whose content was cleaned
723
+ */
724
+ export function cleanDeadRefs(): number {
725
+ // Step 1: Find orphan refs (target entry no longer exists)
726
+ const orphans = db()
727
+ .query(
728
+ `SELECT DISTINCT kr.from_id, kr.to_id FROM knowledge_refs kr
729
+ WHERE NOT EXISTS (SELECT 1 FROM knowledge k WHERE k.id = kr.to_id)`,
730
+ )
731
+ .all() as Array<{ from_id: string; to_id: string }>;
732
+
733
+ if (!orphans.length) return 0;
734
+
735
+ // Step 2: Strip [[dead-uuid]] from referring entries' content
736
+ const now = Date.now();
737
+ let cleaned = 0;
738
+
739
+ for (const ref of orphans) {
740
+ const deadRef = `[[${ref.to_id}]]`;
741
+ const result = db()
742
+ .query(
743
+ `UPDATE knowledge SET content = REPLACE(content, ?, ''), updated_at = ?
744
+ WHERE id = ? AND content LIKE ?`,
745
+ )
746
+ .run(deadRef, now, ref.from_id, `%${deadRef}%`);
747
+ if (result.changes > 0) cleaned++;
748
+ }
749
+
750
+ // Step 3: Delete orphan rows from knowledge_refs
751
+ db()
752
+ .query(
753
+ "DELETE FROM knowledge_refs WHERE to_id NOT IN (SELECT id FROM knowledge)",
754
+ )
755
+ .run();
756
+
757
+ if (cleaned > 0) {
758
+ log.info(`cleaned ${cleaned} entries with dead [[ref]] links`);
759
+ }
760
+
761
+ return cleaned;
762
+ }
763
+
764
+ // ---------------------------------------------------------------------------
765
+ // Knowledge integrity checking
766
+ // ---------------------------------------------------------------------------
767
+
768
+ export type IntegrityIssue = {
769
+ entryId: string;
770
+ type: "duplicate" | "stale-path" | "oversized" | "empty";
771
+ description: string;
772
+ suggestion?: string;
773
+ };
774
+
775
+ /**
776
+ * Check knowledge entries for integrity issues.
777
+ * Returns a list of issues found — does NOT auto-fix.
778
+ *
779
+ * Checks:
780
+ * 1. Duplicate detection — FTS5 title similarity between entries
781
+ * 2. Content quality — empty content, oversized entries
782
+ */
783
+ export function check(projectPath: string): IntegrityIssue[] {
784
+ const entries = forProject(projectPath, false);
785
+ const issues: IntegrityIssue[] = [];
786
+
787
+ // Oversized entries (>1200 chars with confidence > 0)
788
+ for (const entry of entries) {
789
+ if (entry.content.length > 1200) {
790
+ issues.push({
791
+ entryId: entry.id,
792
+ type: "oversized",
793
+ description: `Content is ${entry.content.length} chars (max 1200)`,
794
+ suggestion: "Trim or split into multiple entries",
795
+ });
796
+ }
797
+ }
798
+
799
+ // Empty or near-empty content
800
+ for (const entry of entries) {
801
+ if (entry.content.trim().length < 10) {
802
+ issues.push({
803
+ entryId: entry.id,
804
+ type: "empty",
805
+ description: `Content is empty or near-empty (${entry.content.trim().length} chars)`,
806
+ suggestion: "Delete or add meaningful content",
807
+ });
808
+ }
809
+ }
810
+
811
+ // Duplicate detection: for each entry, search by title and check for high overlap
812
+ const seen = new Set<string>();
813
+ for (const entry of entries) {
814
+ if (seen.has(entry.id)) continue;
815
+ const q = ftsQuery(entry.title);
816
+ if (q === EMPTY_QUERY) continue;
817
+
818
+ try {
819
+ const { title, content, category } = config().search.ftsWeights;
820
+ const matches = db()
821
+ .query(
822
+ `SELECT k.id, k.title FROM knowledge k
823
+ JOIN knowledge_fts f ON k.rowid = f.rowid
824
+ WHERE knowledge_fts MATCH ?
825
+ AND k.id != ?
826
+ AND k.confidence > 0.2
827
+ ORDER BY bm25(knowledge_fts, ?, ?, ?) LIMIT 3`,
828
+ )
829
+ .all(q, entry.id, title, content, category) as Array<{
830
+ id: string;
831
+ title: string;
832
+ }>;
833
+
834
+ for (const match of matches) {
835
+ if (seen.has(match.id)) continue;
836
+ // Check title similarity (case-insensitive)
837
+ const a = entry.title.toLowerCase();
838
+ const b = match.title.toLowerCase();
839
+ // Simple overlap: if one title contains the other or they share >70% of words
840
+ const wordsA = new Set(a.split(/\s+/));
841
+ const wordsB = new Set(b.split(/\s+/));
842
+ const intersection = [...wordsA].filter((w) => wordsB.has(w));
843
+ const overlap = intersection.length / Math.min(wordsA.size, wordsB.size);
844
+ if (overlap >= 0.7) {
845
+ issues.push({
846
+ entryId: entry.id,
847
+ type: "duplicate",
848
+ description: `Possibly duplicates "${match.title}" (${match.id.slice(0, 8)}...)`,
849
+ suggestion: `Merge with ${match.id}`,
850
+ });
851
+ seen.add(match.id);
852
+ }
853
+ }
854
+ } catch {
855
+ // FTS5 error — skip this entry
856
+ }
857
+ seen.add(entry.id);
858
+ }
859
+
860
+ return issues;
861
+ }