@loreai/core 0.0.1 → 0.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +26 -5
- package/dist/bun/agents-file.d.ts +59 -0
- package/dist/bun/agents-file.d.ts.map +1 -0
- package/dist/bun/config.d.ts +58 -0
- package/dist/bun/config.d.ts.map +1 -0
- package/dist/bun/curator.d.ts +35 -0
- package/dist/bun/curator.d.ts.map +1 -0
- package/dist/bun/db/driver.bun.d.ts +5 -0
- package/dist/bun/db/driver.bun.d.ts.map +1 -0
- package/dist/bun/db/driver.node.d.ts +15 -0
- package/dist/bun/db/driver.node.d.ts.map +1 -0
- package/dist/bun/db.d.ts +22 -0
- package/dist/bun/db.d.ts.map +1 -0
- package/dist/bun/distillation.d.ts +32 -0
- package/dist/bun/distillation.d.ts.map +1 -0
- package/dist/bun/embedding.d.ts +90 -0
- package/dist/bun/embedding.d.ts.map +1 -0
- package/dist/bun/gradient.d.ts +73 -0
- package/dist/bun/gradient.d.ts.map +1 -0
- package/dist/bun/index.d.ts +19 -0
- package/dist/bun/index.d.ts.map +1 -0
- package/dist/bun/index.js +28236 -0
- package/dist/bun/index.js.map +7 -0
- package/dist/bun/lat-reader.d.ts +69 -0
- package/dist/bun/lat-reader.d.ts.map +1 -0
- package/dist/bun/log.d.ts +17 -0
- package/dist/bun/log.d.ts.map +1 -0
- package/dist/bun/ltm.d.ts +138 -0
- package/dist/bun/ltm.d.ts.map +1 -0
- package/dist/bun/markdown.d.ts +37 -0
- package/dist/bun/markdown.d.ts.map +1 -0
- package/dist/bun/prompt.d.ts +47 -0
- package/dist/bun/prompt.d.ts.map +1 -0
- package/dist/bun/recall.d.ts +41 -0
- package/dist/bun/recall.d.ts.map +1 -0
- package/dist/bun/search.d.ts +113 -0
- package/dist/bun/search.d.ts.map +1 -0
- package/dist/bun/temporal.d.ts +66 -0
- package/dist/bun/temporal.d.ts.map +1 -0
- package/dist/bun/types.d.ts +180 -0
- package/dist/bun/types.d.ts.map +1 -0
- package/dist/bun/worker.d.ts +6 -0
- package/dist/bun/worker.d.ts.map +1 -0
- package/dist/node/agents-file.d.ts +59 -0
- package/dist/node/agents-file.d.ts.map +1 -0
- package/dist/node/config.d.ts +58 -0
- package/dist/node/config.d.ts.map +1 -0
- package/dist/node/curator.d.ts +35 -0
- package/dist/node/curator.d.ts.map +1 -0
- package/dist/node/db/driver.bun.d.ts +5 -0
- package/dist/node/db/driver.bun.d.ts.map +1 -0
- package/dist/node/db/driver.node.d.ts +15 -0
- package/dist/node/db/driver.node.d.ts.map +1 -0
- package/dist/node/db.d.ts +22 -0
- package/dist/node/db.d.ts.map +1 -0
- package/dist/node/distillation.d.ts +32 -0
- package/dist/node/distillation.d.ts.map +1 -0
- package/dist/node/embedding.d.ts +90 -0
- package/dist/node/embedding.d.ts.map +1 -0
- package/dist/node/gradient.d.ts +73 -0
- package/dist/node/gradient.d.ts.map +1 -0
- package/dist/node/index.d.ts +19 -0
- package/dist/node/index.d.ts.map +1 -0
- package/dist/node/index.js +28253 -0
- package/dist/node/index.js.map +7 -0
- package/dist/node/lat-reader.d.ts +69 -0
- package/dist/node/lat-reader.d.ts.map +1 -0
- package/dist/node/log.d.ts +17 -0
- package/dist/node/log.d.ts.map +1 -0
- package/dist/node/ltm.d.ts +138 -0
- package/dist/node/ltm.d.ts.map +1 -0
- package/dist/node/markdown.d.ts +37 -0
- package/dist/node/markdown.d.ts.map +1 -0
- package/dist/node/prompt.d.ts +47 -0
- package/dist/node/prompt.d.ts.map +1 -0
- package/dist/node/recall.d.ts +41 -0
- package/dist/node/recall.d.ts.map +1 -0
- package/dist/node/search.d.ts +113 -0
- package/dist/node/search.d.ts.map +1 -0
- package/dist/node/temporal.d.ts +66 -0
- package/dist/node/temporal.d.ts.map +1 -0
- package/dist/node/types.d.ts +180 -0
- package/dist/node/types.d.ts.map +1 -0
- package/dist/node/worker.d.ts +6 -0
- package/dist/node/worker.d.ts.map +1 -0
- package/dist/types/agents-file.d.ts +59 -0
- package/dist/types/agents-file.d.ts.map +1 -0
- package/dist/types/config.d.ts +58 -0
- package/dist/types/config.d.ts.map +1 -0
- package/dist/types/curator.d.ts +35 -0
- package/dist/types/curator.d.ts.map +1 -0
- package/dist/types/db/driver.bun.d.ts +5 -0
- package/dist/types/db/driver.bun.d.ts.map +1 -0
- package/dist/types/db/driver.node.d.ts +15 -0
- package/dist/types/db/driver.node.d.ts.map +1 -0
- package/dist/types/db.d.ts +22 -0
- package/dist/types/db.d.ts.map +1 -0
- package/dist/types/distillation.d.ts +32 -0
- package/dist/types/distillation.d.ts.map +1 -0
- package/dist/types/embedding.d.ts +90 -0
- package/dist/types/embedding.d.ts.map +1 -0
- package/dist/types/gradient.d.ts +73 -0
- package/dist/types/gradient.d.ts.map +1 -0
- package/dist/types/index.d.ts +19 -0
- package/dist/types/index.d.ts.map +1 -0
- package/dist/types/lat-reader.d.ts +69 -0
- package/dist/types/lat-reader.d.ts.map +1 -0
- package/dist/types/log.d.ts +17 -0
- package/dist/types/log.d.ts.map +1 -0
- package/dist/types/ltm.d.ts +138 -0
- package/dist/types/ltm.d.ts.map +1 -0
- package/dist/types/markdown.d.ts +37 -0
- package/dist/types/markdown.d.ts.map +1 -0
- package/dist/types/prompt.d.ts +47 -0
- package/dist/types/prompt.d.ts.map +1 -0
- package/dist/types/recall.d.ts +41 -0
- package/dist/types/recall.d.ts.map +1 -0
- package/dist/types/search.d.ts +113 -0
- package/dist/types/search.d.ts.map +1 -0
- package/dist/types/temporal.d.ts +66 -0
- package/dist/types/temporal.d.ts.map +1 -0
- package/dist/types/types.d.ts +180 -0
- package/dist/types/types.d.ts.map +1 -0
- package/dist/types/worker.d.ts +6 -0
- package/dist/types/worker.d.ts.map +1 -0
- package/package.json +48 -5
- package/src/agents-file.ts +406 -0
- package/src/config.ts +132 -0
- package/src/curator.ts +220 -0
- package/src/db/driver.bun.ts +18 -0
- package/src/db/driver.node.ts +54 -0
- package/src/db.ts +433 -0
- package/src/distillation.ts +433 -0
- package/src/embedding.ts +528 -0
- package/src/gradient.ts +1387 -0
- package/src/index.ts +109 -0
- package/src/lat-reader.ts +374 -0
- package/src/log.ts +27 -0
- package/src/ltm.ts +861 -0
- package/src/markdown.ts +129 -0
- package/src/prompt.ts +454 -0
- package/src/recall.ts +446 -0
- package/src/search.ts +330 -0
- package/src/temporal.ts +379 -0
- package/src/types.ts +199 -0
- package/src/worker.ts +26 -0
package/src/ltm.ts
ADDED
|
@@ -0,0 +1,861 @@
|
|
|
1
|
+
import { uuidv7 } from "uuidv7";
|
|
2
|
+
import { db, ensureProject } from "./db";
|
|
3
|
+
import { config } from "./config";
|
|
4
|
+
import { ftsQuery, ftsQueryOr, EMPTY_QUERY, extractTopTerms } from "./search";
|
|
5
|
+
import * as embedding from "./embedding";
|
|
6
|
+
import * as latReader from "./lat-reader";
|
|
7
|
+
import * as log from "./log";
|
|
8
|
+
|
|
9
|
+
// ~3 chars per token — validated as best heuristic against real API data.
|
|
10
|
+
function estimateTokens(text: string): number {
|
|
11
|
+
return Math.ceil(text.length / 3);
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
export type KnowledgeEntry = {
|
|
15
|
+
id: string;
|
|
16
|
+
project_id: string | null;
|
|
17
|
+
category: string;
|
|
18
|
+
title: string;
|
|
19
|
+
content: string;
|
|
20
|
+
source_session: string | null;
|
|
21
|
+
cross_project: number;
|
|
22
|
+
confidence: number;
|
|
23
|
+
created_at: number;
|
|
24
|
+
updated_at: number;
|
|
25
|
+
metadata: string | null;
|
|
26
|
+
};
|
|
27
|
+
|
|
28
|
+
/** Columns to select for KnowledgeEntry — excludes the embedding BLOB
|
|
29
|
+
* (4KB per entry) which is only needed by vectorSearch() in embedding.ts. */
|
|
30
|
+
const KNOWLEDGE_COLS =
|
|
31
|
+
"id, project_id, category, title, content, source_session, cross_project, confidence, created_at, updated_at, metadata";
|
|
32
|
+
|
|
33
|
+
/** Same columns with table alias prefix for use in JOIN queries. */
|
|
34
|
+
const KNOWLEDGE_COLS_K =
|
|
35
|
+
"k.id, k.project_id, k.category, k.title, k.content, k.source_session, k.cross_project, k.confidence, k.created_at, k.updated_at, k.metadata";
|
|
36
|
+
|
|
37
|
+
export function create(input: {
|
|
38
|
+
projectPath?: string;
|
|
39
|
+
category: string;
|
|
40
|
+
title: string;
|
|
41
|
+
content: string;
|
|
42
|
+
session?: string;
|
|
43
|
+
scope: "project" | "global";
|
|
44
|
+
crossProject?: boolean;
|
|
45
|
+
/** Explicit ID to use — for cross-machine import via agents-file. Defaults to a new UUIDv7. */
|
|
46
|
+
id?: string;
|
|
47
|
+
}): string {
|
|
48
|
+
const pid =
|
|
49
|
+
input.scope === "project" && input.projectPath
|
|
50
|
+
? ensureProject(input.projectPath)
|
|
51
|
+
: null;
|
|
52
|
+
|
|
53
|
+
// Dedup guard: if an entry with the same project_id + title already exists,
|
|
54
|
+
// update its content instead of inserting a duplicate. This prevents the
|
|
55
|
+
// curator from creating multiple entries for the same concept across sessions.
|
|
56
|
+
// Also checks cross-project entries to prevent the curator from creating
|
|
57
|
+
// project-scoped duplicates of globally-shared knowledge.
|
|
58
|
+
// Note: when an explicit id is provided (cross-machine import), skip dedup —
|
|
59
|
+
// the caller (importFromFile) already handles duplicate detection by UUID.
|
|
60
|
+
if (!input.id) {
|
|
61
|
+
// First check same project_id
|
|
62
|
+
const existing = (
|
|
63
|
+
pid !== null
|
|
64
|
+
? db()
|
|
65
|
+
.query(
|
|
66
|
+
"SELECT id FROM knowledge WHERE project_id = ? AND LOWER(title) = LOWER(?) AND confidence > 0 LIMIT 1",
|
|
67
|
+
)
|
|
68
|
+
.get(pid, input.title)
|
|
69
|
+
: db()
|
|
70
|
+
.query(
|
|
71
|
+
"SELECT id FROM knowledge WHERE project_id IS NULL AND LOWER(title) = LOWER(?) AND confidence > 0 LIMIT 1",
|
|
72
|
+
)
|
|
73
|
+
.get(input.title)
|
|
74
|
+
) as { id: string } | null;
|
|
75
|
+
|
|
76
|
+
if (existing) {
|
|
77
|
+
update(existing.id, { content: input.content });
|
|
78
|
+
return existing.id;
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
// Also check cross-project entries — prevents creating project-scoped
|
|
82
|
+
// duplicates of entries that already exist as cross-project knowledge.
|
|
83
|
+
const crossExisting = db()
|
|
84
|
+
.query(
|
|
85
|
+
"SELECT id FROM knowledge WHERE cross_project = 1 AND LOWER(title) = LOWER(?) AND confidence > 0 LIMIT 1",
|
|
86
|
+
)
|
|
87
|
+
.get(input.title) as { id: string } | null;
|
|
88
|
+
|
|
89
|
+
if (crossExisting) {
|
|
90
|
+
update(crossExisting.id, { content: input.content });
|
|
91
|
+
return crossExisting.id;
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
const id = input.id ?? uuidv7();
|
|
96
|
+
const now = Date.now();
|
|
97
|
+
db()
|
|
98
|
+
.query(
|
|
99
|
+
`INSERT INTO knowledge (id, project_id, category, title, content, source_session, cross_project, confidence, created_at, updated_at)
|
|
100
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, 1.0, ?, ?)`,
|
|
101
|
+
)
|
|
102
|
+
.run(
|
|
103
|
+
id,
|
|
104
|
+
pid,
|
|
105
|
+
input.category,
|
|
106
|
+
input.title,
|
|
107
|
+
input.content,
|
|
108
|
+
input.session ?? null,
|
|
109
|
+
(input.crossProject ?? false) ? 1 : 0,
|
|
110
|
+
now,
|
|
111
|
+
now,
|
|
112
|
+
);
|
|
113
|
+
|
|
114
|
+
// Fire-and-forget: embed for vector search (errors logged, never thrown)
|
|
115
|
+
if (embedding.isAvailable()) {
|
|
116
|
+
embedding.embedKnowledgeEntry(id, input.title, input.content);
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
return id;
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
export function update(
|
|
123
|
+
id: string,
|
|
124
|
+
input: { content?: string; confidence?: number },
|
|
125
|
+
) {
|
|
126
|
+
const sets: string[] = [];
|
|
127
|
+
const params: unknown[] = [];
|
|
128
|
+
if (input.content !== undefined) {
|
|
129
|
+
sets.push("content = ?");
|
|
130
|
+
params.push(input.content);
|
|
131
|
+
}
|
|
132
|
+
if (input.confidence !== undefined) {
|
|
133
|
+
sets.push("confidence = ?");
|
|
134
|
+
params.push(input.confidence);
|
|
135
|
+
}
|
|
136
|
+
sets.push("updated_at = ?");
|
|
137
|
+
params.push(Date.now());
|
|
138
|
+
params.push(id);
|
|
139
|
+
db()
|
|
140
|
+
.query(`UPDATE knowledge SET ${sets.join(", ")} WHERE id = ?`)
|
|
141
|
+
.run(...(params as [string, ...string[]]));
|
|
142
|
+
|
|
143
|
+
// Re-embed when content changes (fire-and-forget)
|
|
144
|
+
if (embedding.isAvailable() && input.content !== undefined) {
|
|
145
|
+
const entry = get(id);
|
|
146
|
+
if (entry) {
|
|
147
|
+
embedding.embedKnowledgeEntry(id, entry.title, input.content);
|
|
148
|
+
}
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
export function remove(id: string) {
|
|
153
|
+
db().query("DELETE FROM knowledge WHERE id = ?").run(id);
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
export function forProject(
|
|
157
|
+
projectPath: string,
|
|
158
|
+
includeCross = true,
|
|
159
|
+
): KnowledgeEntry[] {
|
|
160
|
+
const pid = ensureProject(projectPath);
|
|
161
|
+
if (includeCross) {
|
|
162
|
+
return db()
|
|
163
|
+
.query(
|
|
164
|
+
`SELECT ${KNOWLEDGE_COLS} FROM knowledge
|
|
165
|
+
WHERE (project_id = ? OR (project_id IS NULL) OR (cross_project = 1))
|
|
166
|
+
AND confidence > 0.2
|
|
167
|
+
ORDER BY confidence DESC, updated_at DESC`,
|
|
168
|
+
)
|
|
169
|
+
.all(pid) as KnowledgeEntry[];
|
|
170
|
+
}
|
|
171
|
+
return db()
|
|
172
|
+
.query(
|
|
173
|
+
`SELECT ${KNOWLEDGE_COLS} FROM knowledge
|
|
174
|
+
WHERE project_id = ?
|
|
175
|
+
AND confidence > 0.2
|
|
176
|
+
ORDER BY confidence DESC, updated_at DESC`,
|
|
177
|
+
)
|
|
178
|
+
.all(pid) as KnowledgeEntry[];
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
type Scored = { entry: KnowledgeEntry; score: number };
|
|
182
|
+
|
|
183
|
+
/** BM25 column weights for knowledge_fts: title, content, category.
|
|
184
|
+
* Reads from config().search.ftsWeights, falling back to defaults. */
|
|
185
|
+
function ftsWeights() {
|
|
186
|
+
return config().search.ftsWeights;
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
/** Max entries per pool to include on first turn when no session context exists. */
|
|
190
|
+
const NO_CONTEXT_FALLBACK_CAP = 10;
|
|
191
|
+
|
|
192
|
+
/** Number of top-confidence project entries always included as a safety net,
|
|
193
|
+
* even when they don't match any session context terms. This guards against
|
|
194
|
+
* the coarse term-overlap scoring accidentally excluding important project
|
|
195
|
+
* knowledge. */
|
|
196
|
+
const PROJECT_SAFETY_NET = 5;
|
|
197
|
+
|
|
198
|
+
/**
|
|
199
|
+
* Score entries by FTS5 BM25 relevance to session context.
|
|
200
|
+
*
|
|
201
|
+
* Uses OR semantics (not AND-then-OR) because we're scoring ALL candidates
|
|
202
|
+
* for relevance ranking, not searching for exact matches. An entry that
|
|
203
|
+
* matches 1 of 40 terms should still get a (low) score, not be excluded.
|
|
204
|
+
* BM25 naturally weights entries matching more terms higher.
|
|
205
|
+
*
|
|
206
|
+
* Returns a Map of entry ID → normalized score (0–1).
|
|
207
|
+
*/
|
|
208
|
+
function scoreEntriesFTS(sessionContext: string): Map<string, number> {
|
|
209
|
+
const terms = extractTopTerms(sessionContext);
|
|
210
|
+
if (!terms.length) return new Map();
|
|
211
|
+
|
|
212
|
+
const q = terms.map((t) => `${t}*`).join(" OR ");
|
|
213
|
+
const { title, content, category } = ftsWeights();
|
|
214
|
+
|
|
215
|
+
try {
|
|
216
|
+
const results = db()
|
|
217
|
+
.query(
|
|
218
|
+
`SELECT k.id, bm25(knowledge_fts, ?, ?, ?) as rank
|
|
219
|
+
FROM knowledge k
|
|
220
|
+
JOIN knowledge_fts f ON k.rowid = f.rowid
|
|
221
|
+
WHERE knowledge_fts MATCH ?
|
|
222
|
+
AND k.confidence > 0.2`,
|
|
223
|
+
)
|
|
224
|
+
.all(title, content, category, q) as Array<{
|
|
225
|
+
id: string;
|
|
226
|
+
rank: number;
|
|
227
|
+
}>;
|
|
228
|
+
|
|
229
|
+
if (!results.length) return new Map();
|
|
230
|
+
|
|
231
|
+
// Normalize: BM25 rank is negative (more negative = better).
|
|
232
|
+
// Convert to 0–1 where 1 = best match.
|
|
233
|
+
const ranks = results.map((r) => r.rank);
|
|
234
|
+
const minRank = Math.min(...ranks);
|
|
235
|
+
const maxRank = Math.max(...ranks);
|
|
236
|
+
const scoreMap = new Map<string, number>();
|
|
237
|
+
for (const r of results) {
|
|
238
|
+
const norm =
|
|
239
|
+
minRank === maxRank ? 1 : (maxRank - r.rank) / (maxRank - minRank);
|
|
240
|
+
scoreMap.set(r.id, norm);
|
|
241
|
+
}
|
|
242
|
+
return scoreMap;
|
|
243
|
+
} catch {
|
|
244
|
+
return new Map();
|
|
245
|
+
}
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
/**
|
|
249
|
+
* Build a relevance-ranked, budget-capped list of knowledge entries for injection
|
|
250
|
+
* into the system prompt of a live session.
|
|
251
|
+
*
|
|
252
|
+
* Strategy:
|
|
253
|
+
* 1. Both project-specific and cross-project entries are scored for relevance
|
|
254
|
+
* against recent session context (last distillation + recent raw messages).
|
|
255
|
+
* 2. Project entries get a safety net: the top PROJECT_SAFETY_NET entries by
|
|
256
|
+
* confidence are always included even if they have zero relevance score.
|
|
257
|
+
* This ensures the most important project knowledge is never lost to
|
|
258
|
+
* coarse term-overlap scoring.
|
|
259
|
+
* 3. All scored entries are merged into a single pool and greedily packed
|
|
260
|
+
* into the token budget by score descending.
|
|
261
|
+
* 4. If there's no session context yet (first turn), fall back to top entries
|
|
262
|
+
* by confidence only (capped at NO_CONTEXT_FALLBACK_CAP per pool).
|
|
263
|
+
*
|
|
264
|
+
* @param projectPath Current project path
|
|
265
|
+
* @param sessionID Current session ID (for context extraction)
|
|
266
|
+
* @param maxTokens Hard token budget for the entire formatted block
|
|
267
|
+
*/
|
|
268
|
+
export function forSession(
|
|
269
|
+
projectPath: string,
|
|
270
|
+
sessionID: string | undefined,
|
|
271
|
+
maxTokens: number,
|
|
272
|
+
): KnowledgeEntry[] {
|
|
273
|
+
const pid = ensureProject(projectPath);
|
|
274
|
+
|
|
275
|
+
// --- 1. Load project-specific entries ---
|
|
276
|
+
const projectEntries = db()
|
|
277
|
+
.query(
|
|
278
|
+
`SELECT ${KNOWLEDGE_COLS} FROM knowledge
|
|
279
|
+
WHERE project_id = ? AND cross_project = 0 AND confidence > 0.2
|
|
280
|
+
ORDER BY confidence DESC, updated_at DESC`,
|
|
281
|
+
)
|
|
282
|
+
.all(pid) as KnowledgeEntry[];
|
|
283
|
+
|
|
284
|
+
// --- 2. Load cross-project candidates ---
|
|
285
|
+
const crossEntries = db()
|
|
286
|
+
.query(
|
|
287
|
+
`SELECT ${KNOWLEDGE_COLS} FROM knowledge
|
|
288
|
+
WHERE (project_id IS NULL OR cross_project = 1) AND confidence > 0.2
|
|
289
|
+
ORDER BY confidence DESC, updated_at DESC`,
|
|
290
|
+
)
|
|
291
|
+
.all() as KnowledgeEntry[];
|
|
292
|
+
|
|
293
|
+
if (!crossEntries.length && !projectEntries.length) return [];
|
|
294
|
+
|
|
295
|
+
// --- 3. Build session context for relevance scoring ---
|
|
296
|
+
let sessionContext = "";
|
|
297
|
+
if (sessionID) {
|
|
298
|
+
const distRow = db()
|
|
299
|
+
.query(
|
|
300
|
+
`SELECT observations FROM distillations
|
|
301
|
+
WHERE project_id = ? AND session_id = ?
|
|
302
|
+
ORDER BY created_at DESC LIMIT 1`,
|
|
303
|
+
)
|
|
304
|
+
.get(pid, sessionID) as { observations: string } | null;
|
|
305
|
+
if (distRow?.observations) {
|
|
306
|
+
sessionContext += distRow.observations + "\n";
|
|
307
|
+
}
|
|
308
|
+
const recentMsgs = db()
|
|
309
|
+
.query(
|
|
310
|
+
`SELECT content FROM temporal_messages
|
|
311
|
+
WHERE project_id = ? AND session_id = ?
|
|
312
|
+
ORDER BY created_at DESC LIMIT 10`,
|
|
313
|
+
)
|
|
314
|
+
.all(pid, sessionID) as Array<{ content: string }>;
|
|
315
|
+
if (recentMsgs.length) {
|
|
316
|
+
sessionContext += recentMsgs.map((m) => m.content).join("\n");
|
|
317
|
+
}
|
|
318
|
+
}
|
|
319
|
+
|
|
320
|
+
// --- 4. Score both pools by relevance ---
|
|
321
|
+
let scoredProject: Scored[];
|
|
322
|
+
let scoredCross: Scored[];
|
|
323
|
+
|
|
324
|
+
if (sessionContext.trim().length > 20) {
|
|
325
|
+
// Use FTS5 BM25 to score all knowledge entries against session context
|
|
326
|
+
const ftsScores = scoreEntriesFTS(sessionContext);
|
|
327
|
+
|
|
328
|
+
// Score project entries: FTS relevance × confidence, with safety net
|
|
329
|
+
const rawScored: Scored[] = projectEntries.map((entry) => ({
|
|
330
|
+
entry,
|
|
331
|
+
score: (ftsScores.get(entry.id) ?? 0) * entry.confidence,
|
|
332
|
+
}));
|
|
333
|
+
const matched = rawScored.filter((s) => s.score > 0);
|
|
334
|
+
const matchedIds = new Set(matched.map((s) => s.entry.id));
|
|
335
|
+
|
|
336
|
+
// Safety net: top PROJECT_SAFETY_NET entries by confidence that weren't already matched.
|
|
337
|
+
// Given a tiny score (0.001 * confidence) so they sort below genuinely matched entries.
|
|
338
|
+
const safetyNet = projectEntries
|
|
339
|
+
.filter((e) => !matchedIds.has(e.id))
|
|
340
|
+
.slice(0, PROJECT_SAFETY_NET)
|
|
341
|
+
.map((e) => ({ entry: e, score: 0.001 * e.confidence }));
|
|
342
|
+
|
|
343
|
+
scoredProject = [...matched, ...safetyNet];
|
|
344
|
+
|
|
345
|
+
// Score cross-project entries — only include entries with FTS match
|
|
346
|
+
scoredCross = crossEntries
|
|
347
|
+
.filter((e) => ftsScores.has(e.id))
|
|
348
|
+
.map((e) => ({
|
|
349
|
+
entry: e,
|
|
350
|
+
score: (ftsScores.get(e.id) ?? 0) * e.confidence,
|
|
351
|
+
}));
|
|
352
|
+
} else {
|
|
353
|
+
// No session context — fall back to top entries by confidence, capped
|
|
354
|
+
scoredProject = projectEntries
|
|
355
|
+
.slice(0, NO_CONTEXT_FALLBACK_CAP)
|
|
356
|
+
.map((entry) => ({ entry, score: entry.confidence }));
|
|
357
|
+
scoredCross = crossEntries
|
|
358
|
+
.slice(0, NO_CONTEXT_FALLBACK_CAP)
|
|
359
|
+
.map((entry) => ({ entry, score: entry.confidence }));
|
|
360
|
+
}
|
|
361
|
+
|
|
362
|
+
// --- 5. Merge and pack into token budget by score descending ---
|
|
363
|
+
const allScored = [...scoredProject, ...scoredCross];
|
|
364
|
+
allScored.sort((a, b) => b.score - a.score);
|
|
365
|
+
|
|
366
|
+
const HEADER_OVERHEAD_TOKENS = 15;
|
|
367
|
+
let used = HEADER_OVERHEAD_TOKENS;
|
|
368
|
+
const result: KnowledgeEntry[] = [];
|
|
369
|
+
|
|
370
|
+
for (const { entry } of allScored) {
|
|
371
|
+
if (used >= maxTokens) break;
|
|
372
|
+
const cost = estimateTokens(entry.title + entry.content) + 10;
|
|
373
|
+
if (used + cost > maxTokens) continue;
|
|
374
|
+
result.push(entry);
|
|
375
|
+
used += cost;
|
|
376
|
+
}
|
|
377
|
+
|
|
378
|
+
// --- 6. Pack lat.md sections into remaining budget ---
|
|
379
|
+
// lat.md sections compete for the remaining token budget (shared LTM pool).
|
|
380
|
+
// They are scored separately by BM25 relevance against the same session context.
|
|
381
|
+
if (latReader.hasLatDir(projectPath) && used < maxTokens) {
|
|
382
|
+
const latSections = latReader.scoreForSession(
|
|
383
|
+
projectPath,
|
|
384
|
+
sessionContext,
|
|
385
|
+
maxTokens - used,
|
|
386
|
+
);
|
|
387
|
+
for (const section of latSections) {
|
|
388
|
+
if (used >= maxTokens) break;
|
|
389
|
+
const display = section.first_paragraph ?? section.content;
|
|
390
|
+
const cost = estimateTokens(section.heading + display) + 10;
|
|
391
|
+
if (used + cost > maxTokens) continue;
|
|
392
|
+
// Convert lat section to a synthetic KnowledgeEntry for formatKnowledge()
|
|
393
|
+
result.push({
|
|
394
|
+
id: section.id,
|
|
395
|
+
project_id: section.project_id,
|
|
396
|
+
category: "lat.md",
|
|
397
|
+
title: `[${section.file}] ${section.heading}`,
|
|
398
|
+
content: display,
|
|
399
|
+
source_session: null,
|
|
400
|
+
cross_project: 0,
|
|
401
|
+
confidence: 1.0,
|
|
402
|
+
created_at: section.updated_at,
|
|
403
|
+
updated_at: section.updated_at,
|
|
404
|
+
metadata: null,
|
|
405
|
+
});
|
|
406
|
+
used += cost;
|
|
407
|
+
}
|
|
408
|
+
}
|
|
409
|
+
|
|
410
|
+
return result;
|
|
411
|
+
}
|
|
412
|
+
|
|
413
|
+
export function all(): KnowledgeEntry[] {
|
|
414
|
+
return db()
|
|
415
|
+
.query(
|
|
416
|
+
`SELECT ${KNOWLEDGE_COLS} FROM knowledge WHERE confidence > 0.2 ORDER BY confidence DESC, updated_at DESC`,
|
|
417
|
+
)
|
|
418
|
+
.all() as KnowledgeEntry[];
|
|
419
|
+
}
|
|
420
|
+
|
|
421
|
+
// LIKE-based fallback for when FTS5 fails unexpectedly.
|
|
422
|
+
function searchLike(input: {
|
|
423
|
+
query: string;
|
|
424
|
+
projectPath?: string;
|
|
425
|
+
limit: number;
|
|
426
|
+
}): KnowledgeEntry[] {
|
|
427
|
+
const terms = input.query
|
|
428
|
+
.toLowerCase()
|
|
429
|
+
.split(/\s+/)
|
|
430
|
+
.filter((t) => t.length > 2);
|
|
431
|
+
if (!terms.length) return [];
|
|
432
|
+
const conditions = terms
|
|
433
|
+
.map(() => "(LOWER(title) LIKE ? OR LOWER(content) LIKE ?)")
|
|
434
|
+
.join(" AND ");
|
|
435
|
+
const likeParams = terms.flatMap((t) => [`%${t}%`, `%${t}%`]);
|
|
436
|
+
if (input.projectPath) {
|
|
437
|
+
const pid = ensureProject(input.projectPath);
|
|
438
|
+
return db()
|
|
439
|
+
.query(
|
|
440
|
+
`SELECT ${KNOWLEDGE_COLS} FROM knowledge WHERE (project_id = ? OR project_id IS NULL OR cross_project = 1) AND confidence > 0.2 AND ${conditions} ORDER BY updated_at DESC LIMIT ?`,
|
|
441
|
+
)
|
|
442
|
+
.all(pid, ...likeParams, input.limit) as KnowledgeEntry[];
|
|
443
|
+
}
|
|
444
|
+
return db()
|
|
445
|
+
.query(
|
|
446
|
+
`SELECT ${KNOWLEDGE_COLS} FROM knowledge WHERE confidence > 0.2 AND ${conditions} ORDER BY updated_at DESC LIMIT ?`,
|
|
447
|
+
)
|
|
448
|
+
.all(...likeParams, input.limit) as KnowledgeEntry[];
|
|
449
|
+
}
|
|
450
|
+
|
|
451
|
+
export function search(input: {
|
|
452
|
+
query: string;
|
|
453
|
+
projectPath?: string;
|
|
454
|
+
limit?: number;
|
|
455
|
+
}): KnowledgeEntry[] {
|
|
456
|
+
const limit = input.limit ?? 20;
|
|
457
|
+
const q = ftsQuery(input.query);
|
|
458
|
+
if (q === EMPTY_QUERY) return [];
|
|
459
|
+
|
|
460
|
+
const pid = input.projectPath ? ensureProject(input.projectPath) : null;
|
|
461
|
+
|
|
462
|
+
const ftsSQL = pid
|
|
463
|
+
? `SELECT ${KNOWLEDGE_COLS_K} FROM knowledge k
|
|
464
|
+
JOIN knowledge_fts f ON k.rowid = f.rowid
|
|
465
|
+
WHERE knowledge_fts MATCH ?
|
|
466
|
+
AND (k.project_id = ? OR k.project_id IS NULL OR k.cross_project = 1)
|
|
467
|
+
AND k.confidence > 0.2
|
|
468
|
+
ORDER BY bm25(knowledge_fts, ?, ?, ?) LIMIT ?`
|
|
469
|
+
: `SELECT ${KNOWLEDGE_COLS_K} FROM knowledge k
|
|
470
|
+
JOIN knowledge_fts f ON k.rowid = f.rowid
|
|
471
|
+
WHERE knowledge_fts MATCH ?
|
|
472
|
+
AND k.confidence > 0.2
|
|
473
|
+
ORDER BY bm25(knowledge_fts, ?, ?, ?) LIMIT ?`;
|
|
474
|
+
|
|
475
|
+
const { title, content, category } = ftsWeights();
|
|
476
|
+
const ftsParams = pid
|
|
477
|
+
? [q, pid, title, content, category, limit]
|
|
478
|
+
: [q, title, content, category, limit];
|
|
479
|
+
|
|
480
|
+
try {
|
|
481
|
+
const results = db().query(ftsSQL).all(...ftsParams) as KnowledgeEntry[];
|
|
482
|
+
if (results.length) return results;
|
|
483
|
+
|
|
484
|
+
// AND returned nothing — try OR fallback for broader recall
|
|
485
|
+
const qOr = ftsQueryOr(input.query);
|
|
486
|
+
if (qOr === EMPTY_QUERY) return [];
|
|
487
|
+
|
|
488
|
+
const ftsParamsOr = pid
|
|
489
|
+
? [qOr, pid, title, content, category, limit]
|
|
490
|
+
: [qOr, title, content, category, limit];
|
|
491
|
+
return db().query(ftsSQL).all(...ftsParamsOr) as KnowledgeEntry[];
|
|
492
|
+
} catch {
|
|
493
|
+
return searchLike({
|
|
494
|
+
query: input.query,
|
|
495
|
+
projectPath: input.projectPath,
|
|
496
|
+
limit,
|
|
497
|
+
});
|
|
498
|
+
}
|
|
499
|
+
}
|
|
500
|
+
|
|
501
|
+
export type ScoredKnowledgeEntry = KnowledgeEntry & { rank: number };
|
|
502
|
+
|
|
503
|
+
/**
|
|
504
|
+
* Search with BM25 scores included. Returns results with raw FTS5 rank values
|
|
505
|
+
* for use in cross-source score fusion (RRF).
|
|
506
|
+
*/
|
|
507
|
+
export function searchScored(input: {
|
|
508
|
+
query: string;
|
|
509
|
+
projectPath?: string;
|
|
510
|
+
limit?: number;
|
|
511
|
+
}): ScoredKnowledgeEntry[] {
|
|
512
|
+
const limit = input.limit ?? 20;
|
|
513
|
+
const q = ftsQuery(input.query);
|
|
514
|
+
if (q === EMPTY_QUERY) return [];
|
|
515
|
+
|
|
516
|
+
const pid = input.projectPath ? ensureProject(input.projectPath) : null;
|
|
517
|
+
const { title, content, category } = ftsWeights();
|
|
518
|
+
|
|
519
|
+
const ftsSQL = pid
|
|
520
|
+
? `SELECT ${KNOWLEDGE_COLS_K}, bm25(knowledge_fts, ?, ?, ?) as rank FROM knowledge k
|
|
521
|
+
JOIN knowledge_fts f ON k.rowid = f.rowid
|
|
522
|
+
WHERE knowledge_fts MATCH ?
|
|
523
|
+
AND (k.project_id = ? OR k.project_id IS NULL OR k.cross_project = 1)
|
|
524
|
+
AND k.confidence > 0.2
|
|
525
|
+
ORDER BY rank LIMIT ?`
|
|
526
|
+
: `SELECT ${KNOWLEDGE_COLS_K}, bm25(knowledge_fts, ?, ?, ?) as rank FROM knowledge k
|
|
527
|
+
JOIN knowledge_fts f ON k.rowid = f.rowid
|
|
528
|
+
WHERE knowledge_fts MATCH ?
|
|
529
|
+
AND k.confidence > 0.2
|
|
530
|
+
ORDER BY rank LIMIT ?`;
|
|
531
|
+
|
|
532
|
+
const ftsParams = pid
|
|
533
|
+
? [title, content, category, q, pid, limit]
|
|
534
|
+
: [title, content, category, q, limit];
|
|
535
|
+
|
|
536
|
+
try {
|
|
537
|
+
const results = db().query(ftsSQL).all(...ftsParams) as ScoredKnowledgeEntry[];
|
|
538
|
+
if (results.length) return results;
|
|
539
|
+
|
|
540
|
+
const qOr = ftsQueryOr(input.query);
|
|
541
|
+
if (qOr === EMPTY_QUERY) return [];
|
|
542
|
+
const ftsParamsOr = pid
|
|
543
|
+
? [title, content, category, qOr, pid, limit]
|
|
544
|
+
: [title, content, category, qOr, limit];
|
|
545
|
+
return db().query(ftsSQL).all(...ftsParamsOr) as ScoredKnowledgeEntry[];
|
|
546
|
+
} catch {
|
|
547
|
+
return [];
|
|
548
|
+
}
|
|
549
|
+
}
|
|
550
|
+
|
|
551
|
+
/**
|
|
552
|
+
* Search knowledge entries from OTHER projects — entries that are project-specific
|
|
553
|
+
* (cross_project=0) and belong to a different project_id than the given one.
|
|
554
|
+
* Used by the recall tool in "all" scope to surface relevant knowledge from
|
|
555
|
+
* the user's other projects ("tunnel" discovery across projects).
|
|
556
|
+
*/
|
|
557
|
+
export function searchScoredOtherProjects(input: {
|
|
558
|
+
query: string;
|
|
559
|
+
excludeProjectPath: string;
|
|
560
|
+
limit?: number;
|
|
561
|
+
}): ScoredKnowledgeEntry[] {
|
|
562
|
+
const limit = input.limit ?? 10;
|
|
563
|
+
const q = ftsQuery(input.query);
|
|
564
|
+
if (q === EMPTY_QUERY) return [];
|
|
565
|
+
|
|
566
|
+
const excludePid = ensureProject(input.excludeProjectPath);
|
|
567
|
+
const { title, content, category } = ftsWeights();
|
|
568
|
+
|
|
569
|
+
// Find entries from other projects that are NOT cross-project (those are
|
|
570
|
+
// already included in the normal search via the cross_project=1 filter).
|
|
571
|
+
// Also exclude entries with no project_id (global) — already included.
|
|
572
|
+
const ftsSQL = `SELECT ${KNOWLEDGE_COLS_K}, bm25(knowledge_fts, ?, ?, ?) as rank FROM knowledge k
|
|
573
|
+
JOIN knowledge_fts f ON k.rowid = f.rowid
|
|
574
|
+
WHERE knowledge_fts MATCH ?
|
|
575
|
+
AND k.project_id IS NOT NULL
|
|
576
|
+
AND k.project_id != ?
|
|
577
|
+
AND k.cross_project = 0
|
|
578
|
+
AND k.confidence > 0.2
|
|
579
|
+
ORDER BY rank LIMIT ?`;
|
|
580
|
+
|
|
581
|
+
const ftsParams = [title, content, category, q, excludePid, limit];
|
|
582
|
+
|
|
583
|
+
try {
|
|
584
|
+
const results = db().query(ftsSQL).all(...ftsParams) as ScoredKnowledgeEntry[];
|
|
585
|
+
if (results.length) return results;
|
|
586
|
+
|
|
587
|
+
// AND returned nothing — try OR fallback
|
|
588
|
+
const qOr = ftsQueryOr(input.query);
|
|
589
|
+
if (qOr === EMPTY_QUERY) return [];
|
|
590
|
+
const ftsParamsOr = [title, content, category, qOr, excludePid, limit];
|
|
591
|
+
return db().query(ftsSQL).all(...ftsParamsOr) as ScoredKnowledgeEntry[];
|
|
592
|
+
} catch {
|
|
593
|
+
return [];
|
|
594
|
+
}
|
|
595
|
+
}
|
|
596
|
+
|
|
597
|
+
export function get(id: string): KnowledgeEntry | null {
|
|
598
|
+
return db()
|
|
599
|
+
.query(`SELECT ${KNOWLEDGE_COLS} FROM knowledge WHERE id = ?`)
|
|
600
|
+
.get(id) as KnowledgeEntry | null;
|
|
601
|
+
}
|
|
602
|
+
|
|
603
|
+
/**
|
|
604
|
+
* Prune knowledge entries whose content exceeds maxLength characters.
|
|
605
|
+
* These are typically corrupted entries from AGENTS.md roundtrip escaping bugs
|
|
606
|
+
* or curator hallucinations with full code dumps.
|
|
607
|
+
*
|
|
608
|
+
* Rather than hard-deleting, sets confidence to 0 so they're excluded from
|
|
609
|
+
* queries (confidence > 0.2) but can be inspected for debugging.
|
|
610
|
+
*
|
|
611
|
+
* @returns Number of entries pruned
|
|
612
|
+
*/
|
|
613
|
+
export function pruneOversized(maxLength: number): number {
|
|
614
|
+
const result = db()
|
|
615
|
+
.query(
|
|
616
|
+
"UPDATE knowledge SET confidence = 0, updated_at = ? WHERE LENGTH(content) > ? AND confidence > 0",
|
|
617
|
+
)
|
|
618
|
+
.run(Date.now(), maxLength);
|
|
619
|
+
// node:sqlite returns `changes` as `number | bigint`; coerce for cross-runtime parity.
|
|
620
|
+
return Number(result.changes);
|
|
621
|
+
}
|
|
622
|
+
|
|
623
|
+
// ---------------------------------------------------------------------------
|
|
624
|
+
// Wiki-link cross-references ([[entry-id]] / [[Entry Title]])
|
|
625
|
+
// ---------------------------------------------------------------------------
|
|
626
|
+
|
|
627
|
+
const UUID_RE = /^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/i;
|
|
628
|
+
const WIKI_LINK_RE = /\[\[([^\]]+)\]\]/g;
|
|
629
|
+
|
|
630
|
+
/**
|
|
631
|
+
* Resolve a wiki-link reference to a knowledge entry ID.
|
|
632
|
+
* - UUID format → direct O(1) lookup
|
|
633
|
+
* - Title text → FTS5 best-match search
|
|
634
|
+
* Returns null if the reference can't be resolved.
|
|
635
|
+
*/
|
|
636
|
+
export function resolveRef(ref: string): string | null {
|
|
637
|
+
if (UUID_RE.test(ref)) {
|
|
638
|
+
const entry = get(ref);
|
|
639
|
+
return entry ? entry.id : null;
|
|
640
|
+
}
|
|
641
|
+
// Title search — FTS5 best match
|
|
642
|
+
const results = search({ query: ref, limit: 1 });
|
|
643
|
+
return results.length ? results[0].id : null;
|
|
644
|
+
}
|
|
645
|
+
|
|
646
|
+
/**
|
|
647
|
+
* Extract [[...]] wiki-link references from entry content.
|
|
648
|
+
* Returns the raw ref strings (UUIDs or titles).
|
|
649
|
+
*/
|
|
650
|
+
export function extractRefs(content: string): string[] {
|
|
651
|
+
const refs: string[] = [];
|
|
652
|
+
let match;
|
|
653
|
+
const re = new RegExp(WIKI_LINK_RE.source, WIKI_LINK_RE.flags);
|
|
654
|
+
while ((match = re.exec(content)) !== null) {
|
|
655
|
+
refs.push(match[1]);
|
|
656
|
+
}
|
|
657
|
+
return refs;
|
|
658
|
+
}
|
|
659
|
+
|
|
660
|
+
/**
|
|
661
|
+
* Populate the knowledge_refs join table for an entry by resolving its [[...]] links.
|
|
662
|
+
* Clears existing outgoing refs for this entry first.
|
|
663
|
+
*/
|
|
664
|
+
export function syncRefs(entryId: string): number {
|
|
665
|
+
const entry = get(entryId);
|
|
666
|
+
if (!entry) return 0;
|
|
667
|
+
|
|
668
|
+
// Clear existing outgoing refs
|
|
669
|
+
db().query("DELETE FROM knowledge_refs WHERE from_id = ?").run(entryId);
|
|
670
|
+
|
|
671
|
+
const refs = extractRefs(entry.content);
|
|
672
|
+
if (!refs.length) return 0;
|
|
673
|
+
|
|
674
|
+
let synced = 0;
|
|
675
|
+
const insertStmt = db().query(
|
|
676
|
+
"INSERT OR IGNORE INTO knowledge_refs (from_id, to_id) VALUES (?, ?)",
|
|
677
|
+
);
|
|
678
|
+
|
|
679
|
+
for (const ref of refs) {
|
|
680
|
+
const targetId = resolveRef(ref);
|
|
681
|
+
if (targetId && targetId !== entryId) {
|
|
682
|
+
insertStmt.run(entryId, targetId);
|
|
683
|
+
synced++;
|
|
684
|
+
}
|
|
685
|
+
}
|
|
686
|
+
|
|
687
|
+
return synced;
|
|
688
|
+
}
|
|
689
|
+
|
|
690
|
+
/**
|
|
691
|
+
* Cascade-replace an entry ID in all knowledge content and the refs table.
|
|
692
|
+
* Used when an entry ID changes (future-proofing — current consolidation
|
|
693
|
+
* uses update-in-place so IDs don't change, but the mechanism exists).
|
|
694
|
+
*/
|
|
695
|
+
export function cascadeRefReplace(oldId: string, newId: string): number {
|
|
696
|
+
const oldRef = `[[${oldId}]]`;
|
|
697
|
+
const newRef = `[[${newId}]]`;
|
|
698
|
+
|
|
699
|
+
// Rewrite content in entries that reference the old ID
|
|
700
|
+
const result = db()
|
|
701
|
+
.query(
|
|
702
|
+
`UPDATE knowledge SET content = REPLACE(content, ?, ?), updated_at = ?
|
|
703
|
+
WHERE content LIKE ?`,
|
|
704
|
+
)
|
|
705
|
+
.run(oldRef, newRef, Date.now(), `%${oldRef}%`);
|
|
706
|
+
|
|
707
|
+
// Update the join table
|
|
708
|
+
db().query("UPDATE OR IGNORE knowledge_refs SET to_id = ? WHERE to_id = ?").run(newId, oldId);
|
|
709
|
+
db().query("UPDATE OR IGNORE knowledge_refs SET from_id = ? WHERE from_id = ?").run(newId, oldId);
|
|
710
|
+
|
|
711
|
+
// Clean up any rows that became self-referential
|
|
712
|
+
db().query("DELETE FROM knowledge_refs WHERE from_id = to_id").run();
|
|
713
|
+
|
|
714
|
+
// node:sqlite returns `changes` as `number | bigint`; coerce for cross-runtime parity.
|
|
715
|
+
return Number(result.changes);
|
|
716
|
+
}
|
|
717
|
+
|
|
718
|
+
/**
|
|
719
|
+
* Clean dead references — remove [[uuid]] patterns pointing to deleted entries.
|
|
720
|
+
* Strips dead refs from content and purges orphan knowledge_refs rows.
|
|
721
|
+
*
|
|
722
|
+
* @returns Number of entries whose content was cleaned
|
|
723
|
+
*/
|
|
724
|
+
export function cleanDeadRefs(): number {
|
|
725
|
+
// Step 1: Find orphan refs (target entry no longer exists)
|
|
726
|
+
const orphans = db()
|
|
727
|
+
.query(
|
|
728
|
+
`SELECT DISTINCT kr.from_id, kr.to_id FROM knowledge_refs kr
|
|
729
|
+
WHERE NOT EXISTS (SELECT 1 FROM knowledge k WHERE k.id = kr.to_id)`,
|
|
730
|
+
)
|
|
731
|
+
.all() as Array<{ from_id: string; to_id: string }>;
|
|
732
|
+
|
|
733
|
+
if (!orphans.length) return 0;
|
|
734
|
+
|
|
735
|
+
// Step 2: Strip [[dead-uuid]] from referring entries' content
|
|
736
|
+
const now = Date.now();
|
|
737
|
+
let cleaned = 0;
|
|
738
|
+
|
|
739
|
+
for (const ref of orphans) {
|
|
740
|
+
const deadRef = `[[${ref.to_id}]]`;
|
|
741
|
+
const result = db()
|
|
742
|
+
.query(
|
|
743
|
+
`UPDATE knowledge SET content = REPLACE(content, ?, ''), updated_at = ?
|
|
744
|
+
WHERE id = ? AND content LIKE ?`,
|
|
745
|
+
)
|
|
746
|
+
.run(deadRef, now, ref.from_id, `%${deadRef}%`);
|
|
747
|
+
if (result.changes > 0) cleaned++;
|
|
748
|
+
}
|
|
749
|
+
|
|
750
|
+
// Step 3: Delete orphan rows from knowledge_refs
|
|
751
|
+
db()
|
|
752
|
+
.query(
|
|
753
|
+
"DELETE FROM knowledge_refs WHERE to_id NOT IN (SELECT id FROM knowledge)",
|
|
754
|
+
)
|
|
755
|
+
.run();
|
|
756
|
+
|
|
757
|
+
if (cleaned > 0) {
|
|
758
|
+
log.info(`cleaned ${cleaned} entries with dead [[ref]] links`);
|
|
759
|
+
}
|
|
760
|
+
|
|
761
|
+
return cleaned;
|
|
762
|
+
}
|
|
763
|
+
|
|
764
|
+
// ---------------------------------------------------------------------------
|
|
765
|
+
// Knowledge integrity checking
|
|
766
|
+
// ---------------------------------------------------------------------------
|
|
767
|
+
|
|
768
|
+
export type IntegrityIssue = {
|
|
769
|
+
entryId: string;
|
|
770
|
+
type: "duplicate" | "stale-path" | "oversized" | "empty";
|
|
771
|
+
description: string;
|
|
772
|
+
suggestion?: string;
|
|
773
|
+
};
|
|
774
|
+
|
|
775
|
+
/**
|
|
776
|
+
* Check knowledge entries for integrity issues.
|
|
777
|
+
* Returns a list of issues found — does NOT auto-fix.
|
|
778
|
+
*
|
|
779
|
+
* Checks:
|
|
780
|
+
* 1. Duplicate detection — FTS5 title similarity between entries
|
|
781
|
+
* 2. Content quality — empty content, oversized entries
|
|
782
|
+
*/
|
|
783
|
+
export function check(projectPath: string): IntegrityIssue[] {
|
|
784
|
+
const entries = forProject(projectPath, false);
|
|
785
|
+
const issues: IntegrityIssue[] = [];
|
|
786
|
+
|
|
787
|
+
// Oversized entries (>1200 chars with confidence > 0)
|
|
788
|
+
for (const entry of entries) {
|
|
789
|
+
if (entry.content.length > 1200) {
|
|
790
|
+
issues.push({
|
|
791
|
+
entryId: entry.id,
|
|
792
|
+
type: "oversized",
|
|
793
|
+
description: `Content is ${entry.content.length} chars (max 1200)`,
|
|
794
|
+
suggestion: "Trim or split into multiple entries",
|
|
795
|
+
});
|
|
796
|
+
}
|
|
797
|
+
}
|
|
798
|
+
|
|
799
|
+
// Empty or near-empty content
|
|
800
|
+
for (const entry of entries) {
|
|
801
|
+
if (entry.content.trim().length < 10) {
|
|
802
|
+
issues.push({
|
|
803
|
+
entryId: entry.id,
|
|
804
|
+
type: "empty",
|
|
805
|
+
description: `Content is empty or near-empty (${entry.content.trim().length} chars)`,
|
|
806
|
+
suggestion: "Delete or add meaningful content",
|
|
807
|
+
});
|
|
808
|
+
}
|
|
809
|
+
}
|
|
810
|
+
|
|
811
|
+
// Duplicate detection: for each entry, search by title and check for high overlap
|
|
812
|
+
const seen = new Set<string>();
|
|
813
|
+
for (const entry of entries) {
|
|
814
|
+
if (seen.has(entry.id)) continue;
|
|
815
|
+
const q = ftsQuery(entry.title);
|
|
816
|
+
if (q === EMPTY_QUERY) continue;
|
|
817
|
+
|
|
818
|
+
try {
|
|
819
|
+
const { title, content, category } = config().search.ftsWeights;
|
|
820
|
+
const matches = db()
|
|
821
|
+
.query(
|
|
822
|
+
`SELECT k.id, k.title FROM knowledge k
|
|
823
|
+
JOIN knowledge_fts f ON k.rowid = f.rowid
|
|
824
|
+
WHERE knowledge_fts MATCH ?
|
|
825
|
+
AND k.id != ?
|
|
826
|
+
AND k.confidence > 0.2
|
|
827
|
+
ORDER BY bm25(knowledge_fts, ?, ?, ?) LIMIT 3`,
|
|
828
|
+
)
|
|
829
|
+
.all(q, entry.id, title, content, category) as Array<{
|
|
830
|
+
id: string;
|
|
831
|
+
title: string;
|
|
832
|
+
}>;
|
|
833
|
+
|
|
834
|
+
for (const match of matches) {
|
|
835
|
+
if (seen.has(match.id)) continue;
|
|
836
|
+
// Check title similarity (case-insensitive)
|
|
837
|
+
const a = entry.title.toLowerCase();
|
|
838
|
+
const b = match.title.toLowerCase();
|
|
839
|
+
// Simple overlap: if one title contains the other or they share >70% of words
|
|
840
|
+
const wordsA = new Set(a.split(/\s+/));
|
|
841
|
+
const wordsB = new Set(b.split(/\s+/));
|
|
842
|
+
const intersection = [...wordsA].filter((w) => wordsB.has(w));
|
|
843
|
+
const overlap = intersection.length / Math.min(wordsA.size, wordsB.size);
|
|
844
|
+
if (overlap >= 0.7) {
|
|
845
|
+
issues.push({
|
|
846
|
+
entryId: entry.id,
|
|
847
|
+
type: "duplicate",
|
|
848
|
+
description: `Possibly duplicates "${match.title}" (${match.id.slice(0, 8)}...)`,
|
|
849
|
+
suggestion: `Merge with ${match.id}`,
|
|
850
|
+
});
|
|
851
|
+
seen.add(match.id);
|
|
852
|
+
}
|
|
853
|
+
}
|
|
854
|
+
} catch {
|
|
855
|
+
// FTS5 error — skip this entry
|
|
856
|
+
}
|
|
857
|
+
seen.add(entry.id);
|
|
858
|
+
}
|
|
859
|
+
|
|
860
|
+
return issues;
|
|
861
|
+
}
|