ex-brain 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +39 -37
- package/package.json +5 -5
- package/src/ai/compiler.ts +529 -0
- package/src/ai/embed-factory.ts +116 -0
- package/src/ai/entity-link.ts +226 -0
- package/src/ai/hash-embed.ts +30 -0
- package/src/ai/timeline-extractor.ts +436 -0
- package/src/cli.ts +16 -0
- package/src/commands/compile-cmd.ts +208 -0
- package/src/commands/graph-cmd.ts +1070 -0
- package/src/commands/index.ts +1447 -0
- package/src/config.ts +80 -0
- package/src/db/client.ts +101 -0
- package/src/db/schema.ts +49 -0
- package/src/markdown/io.ts +61 -0
- package/src/markdown/parser.ts +72 -0
- package/src/mcp/server.ts +540 -0
- package/src/repositories/brain-repo.ts +772 -0
- package/src/settings.ts +214 -0
- package/src/types/index.ts +55 -0
- package/src/utils/progress.ts +171 -0
- package/dist/cli.js +0 -93543
|
@@ -0,0 +1,772 @@
|
|
|
1
|
+
import { nowIso } from "../config";
|
|
2
|
+
import type {
|
|
3
|
+
BrainStats,
|
|
4
|
+
PageRecord,
|
|
5
|
+
PutPageInput,
|
|
6
|
+
SearchHit,
|
|
7
|
+
TimelineEntry,
|
|
8
|
+
} from "../types";
|
|
9
|
+
import type { ResolvedLLM } from "../settings";
|
|
10
|
+
import type { CompileInput, CompileResult } from "../ai/compiler";
|
|
11
|
+
import type { TimelineExtractionResult } from "../ai/timeline-extractor";
|
|
12
|
+
import { compileTruth } from "../ai/compiler";
|
|
13
|
+
import { extractTimelineEvents } from "../ai/timeline-extractor";
|
|
14
|
+
import { BrainDb } from "../db/client";
|
|
15
|
+
|
|
16
|
+
type SqlRow = Record<string, unknown>;
|
|
17
|
+
|
|
18
|
+
function one<T>(rows: SqlRow[] | null): T | null {
|
|
19
|
+
if (!rows || rows.length === 0) {
|
|
20
|
+
return null;
|
|
21
|
+
}
|
|
22
|
+
return rows[0] as T;
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
function many<T>(rows: SqlRow[] | null): T[] {
|
|
26
|
+
return (rows ?? []) as T[];
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
function parseFrontmatter(raw: string): Record<string, unknown> {
|
|
30
|
+
try {
|
|
31
|
+
return JSON.parse(raw) as Record<string, unknown>;
|
|
32
|
+
} catch {
|
|
33
|
+
return {};
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
export class BrainRepository {
|
|
38
|
+
constructor(private readonly db: BrainDb) {}
|
|
39
|
+
|
|
40
|
+
async init(): Promise<void> {
|
|
41
|
+
// Schema is auto-created when connecting.
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
async getPage(slug: string): Promise<PageRecord | null> {
|
|
45
|
+
const rows = await this.db.client.execute(
|
|
46
|
+
`SELECT slug, type, title, compiled_truth, timeline, frontmatter, created_at, updated_at
|
|
47
|
+
FROM pages WHERE slug = ?`,
|
|
48
|
+
[slug],
|
|
49
|
+
);
|
|
50
|
+
const row = one<{
|
|
51
|
+
slug: string;
|
|
52
|
+
type: string;
|
|
53
|
+
title: string;
|
|
54
|
+
compiled_truth: string;
|
|
55
|
+
timeline: string;
|
|
56
|
+
frontmatter: string;
|
|
57
|
+
created_at: string;
|
|
58
|
+
updated_at: string;
|
|
59
|
+
}>(rows);
|
|
60
|
+
if (!row) {
|
|
61
|
+
return null;
|
|
62
|
+
}
|
|
63
|
+
return {
|
|
64
|
+
slug: row.slug,
|
|
65
|
+
type: row.type,
|
|
66
|
+
title: row.title,
|
|
67
|
+
compiledTruth: row.compiled_truth,
|
|
68
|
+
timeline: row.timeline,
|
|
69
|
+
frontmatter: parseFrontmatter(row.frontmatter),
|
|
70
|
+
createdAt: row.created_at,
|
|
71
|
+
updatedAt: row.updated_at,
|
|
72
|
+
};
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
async putPage(input: PutPageInput, skipEmbed = false): Promise<PageRecord> {
|
|
76
|
+
const now = nowIso();
|
|
77
|
+
const existing = await this.getPage(input.slug);
|
|
78
|
+
const createdAt = existing?.createdAt ?? now;
|
|
79
|
+
const frontmatter = JSON.stringify(input.frontmatter ?? {});
|
|
80
|
+
const timeline = input.timeline ?? existing?.timeline ?? "";
|
|
81
|
+
await this.db.client.execute(
|
|
82
|
+
`INSERT INTO pages (slug, type, title, compiled_truth, timeline, frontmatter, created_at, updated_at)
|
|
83
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
|
|
84
|
+
ON DUPLICATE KEY UPDATE
|
|
85
|
+
type = VALUES(type),
|
|
86
|
+
title = VALUES(title),
|
|
87
|
+
compiled_truth = VALUES(compiled_truth),
|
|
88
|
+
timeline = VALUES(timeline),
|
|
89
|
+
frontmatter = VALUES(frontmatter),
|
|
90
|
+
updated_at = VALUES(updated_at)`,
|
|
91
|
+
[
|
|
92
|
+
input.slug,
|
|
93
|
+
input.type,
|
|
94
|
+
input.title,
|
|
95
|
+
input.compiledTruth,
|
|
96
|
+
timeline,
|
|
97
|
+
frontmatter,
|
|
98
|
+
createdAt,
|
|
99
|
+
now,
|
|
100
|
+
],
|
|
101
|
+
);
|
|
102
|
+
if (!skipEmbed) {
|
|
103
|
+
await this.syncPageToSearch(input.slug);
|
|
104
|
+
}
|
|
105
|
+
return (await this.getPage(input.slug)) as PageRecord;
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
async listPages(filters: {
|
|
109
|
+
type?: string;
|
|
110
|
+
tag?: string;
|
|
111
|
+
limit?: number;
|
|
112
|
+
}): Promise<PageRecord[]> {
|
|
113
|
+
const limit = filters.limit ?? 50;
|
|
114
|
+
const params: unknown[] = [];
|
|
115
|
+
let sql = `SELECT p.slug, p.type, p.title, p.compiled_truth, p.timeline, p.frontmatter, p.created_at, p.updated_at
|
|
116
|
+
FROM pages p`;
|
|
117
|
+
if (filters.tag) {
|
|
118
|
+
sql += " INNER JOIN page_tags t ON p.slug = t.page_slug";
|
|
119
|
+
}
|
|
120
|
+
sql += " WHERE 1=1";
|
|
121
|
+
if (filters.type) {
|
|
122
|
+
sql += " AND p.type = ?";
|
|
123
|
+
params.push(filters.type);
|
|
124
|
+
}
|
|
125
|
+
if (filters.tag) {
|
|
126
|
+
sql += " AND t.tag = ?";
|
|
127
|
+
params.push(filters.tag);
|
|
128
|
+
}
|
|
129
|
+
sql += " ORDER BY p.updated_at DESC LIMIT ?";
|
|
130
|
+
params.push(limit);
|
|
131
|
+
const rows = many<{
|
|
132
|
+
slug: string;
|
|
133
|
+
type: string;
|
|
134
|
+
title: string;
|
|
135
|
+
compiled_truth: string;
|
|
136
|
+
timeline: string;
|
|
137
|
+
frontmatter: string;
|
|
138
|
+
created_at: string;
|
|
139
|
+
updated_at: string;
|
|
140
|
+
}>(await this.db.client.execute(sql, params));
|
|
141
|
+
|
|
142
|
+
return rows.map((row) => ({
|
|
143
|
+
slug: row.slug,
|
|
144
|
+
type: row.type,
|
|
145
|
+
title: row.title,
|
|
146
|
+
compiledTruth: row.compiled_truth,
|
|
147
|
+
timeline: row.timeline,
|
|
148
|
+
frontmatter: parseFrontmatter(row.frontmatter),
|
|
149
|
+
createdAt: row.created_at,
|
|
150
|
+
updatedAt: row.updated_at,
|
|
151
|
+
}));
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
async stats(): Promise<BrainStats> {
|
|
155
|
+
const rows = await this.db.client.execute(
|
|
156
|
+
`SELECT
|
|
157
|
+
(SELECT COUNT(*) FROM pages) AS pages,
|
|
158
|
+
(SELECT COUNT(*) FROM links) AS links,
|
|
159
|
+
(SELECT COUNT(*) FROM page_tags) AS tags,
|
|
160
|
+
(SELECT COUNT(*) FROM timeline_entries) AS timeline_entries,
|
|
161
|
+
(SELECT COUNT(*) FROM raw_data) AS raw_rows`,
|
|
162
|
+
);
|
|
163
|
+
const row = one<{
|
|
164
|
+
pages: number;
|
|
165
|
+
links: number;
|
|
166
|
+
tags: number;
|
|
167
|
+
timeline_entries: number;
|
|
168
|
+
raw_rows: number;
|
|
169
|
+
}>(rows);
|
|
170
|
+
return {
|
|
171
|
+
pages: Number(row?.pages ?? 0),
|
|
172
|
+
links: Number(row?.links ?? 0),
|
|
173
|
+
tags: Number(row?.tags ?? 0),
|
|
174
|
+
timelineEntries: Number(row?.timeline_entries ?? 0),
|
|
175
|
+
rawRows: Number(row?.raw_rows ?? 0),
|
|
176
|
+
};
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
async search(query: string, limit = 10, type?: string): Promise<SearchHit[]> {
|
|
180
|
+
const where = type ? ({ type } as Record<string, unknown>) : undefined;
|
|
181
|
+
const result = await this.db.pagesCollection.hybridSearch({
|
|
182
|
+
query: { whereDocument: { $contains: query }, where },
|
|
183
|
+
nResults: limit,
|
|
184
|
+
include: ["documents", "metadatas", "distances"],
|
|
185
|
+
});
|
|
186
|
+
const ids = result.ids[0] ?? [];
|
|
187
|
+
const metadatas = result.metadatas?.[0] ?? [];
|
|
188
|
+
const docs = result.documents?.[0] ?? [];
|
|
189
|
+
const distances = result.distances?.[0] ?? [];
|
|
190
|
+
const hits: SearchHit[] = [];
|
|
191
|
+
for (let i = 0; i < ids.length; i += 1) {
|
|
192
|
+
const slug = ids[i];
|
|
193
|
+
if (!slug) continue;
|
|
194
|
+
const md = (metadatas[i] ?? {}) as Record<string, unknown>;
|
|
195
|
+
const distance = typeof distances[i] === "number" ? distances[i] : 1;
|
|
196
|
+
const score = 1 / (1 + distance);
|
|
197
|
+
hits.push({
|
|
198
|
+
slug,
|
|
199
|
+
title: String(md.title ?? slug),
|
|
200
|
+
type: String(md.type ?? "other"),
|
|
201
|
+
score,
|
|
202
|
+
excerpt: String(docs[i] ?? "").slice(0, 220),
|
|
203
|
+
updatedAt: String(md.updatedAt ?? ""),
|
|
204
|
+
});
|
|
205
|
+
}
|
|
206
|
+
return hits;
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
async query(question: string, limit = 10): Promise<SearchHit[]> {
|
|
210
|
+
const result = await this.db.pagesCollection.query({
|
|
211
|
+
queryTexts: question,
|
|
212
|
+
nResults: limit,
|
|
213
|
+
include: ["documents", "metadatas", "distances"],
|
|
214
|
+
});
|
|
215
|
+
const ids = result.ids[0] ?? [];
|
|
216
|
+
const metadatas = result.metadatas?.[0] ?? [];
|
|
217
|
+
const docs = result.documents?.[0] ?? [];
|
|
218
|
+
const distances = result.distances?.[0] ?? [];
|
|
219
|
+
const hits: SearchHit[] = [];
|
|
220
|
+
for (let i = 0; i < ids.length; i += 1) {
|
|
221
|
+
const slug = ids[i];
|
|
222
|
+
if (!slug) continue;
|
|
223
|
+
const md = (metadatas[i] ?? {}) as Record<string, unknown>;
|
|
224
|
+
const distance = typeof distances[i] === "number" ? distances[i] : 1;
|
|
225
|
+
const vectorScore = 1 / (1 + distance);
|
|
226
|
+
const freshnessBoost = this.recentBoost(String(md.updatedAt ?? ""));
|
|
227
|
+
const typeBoost = String(md.type ?? "") === "person" ? 0.05 : 0;
|
|
228
|
+
const score = vectorScore * 0.85 + freshnessBoost + typeBoost;
|
|
229
|
+
hits.push({
|
|
230
|
+
slug,
|
|
231
|
+
title: String(md.title ?? slug),
|
|
232
|
+
type: String(md.type ?? "other"),
|
|
233
|
+
score,
|
|
234
|
+
excerpt: String(docs[i] ?? "").slice(0, 220),
|
|
235
|
+
updatedAt: String(md.updatedAt ?? ""),
|
|
236
|
+
});
|
|
237
|
+
}
|
|
238
|
+
hits.sort((a, b) => b.score - a.score);
|
|
239
|
+
return hits;
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
private recentBoost(updatedAt: string): number {
|
|
243
|
+
if (!updatedAt) return 0;
|
|
244
|
+
const age = Date.now() - new Date(updatedAt).getTime();
|
|
245
|
+
const days = age / (1000 * 60 * 60 * 24);
|
|
246
|
+
return days <= 30 ? 0.1 : 0;
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
async syncPageToSearch(slug: string): Promise<void> {
|
|
250
|
+
const page = await this.getPage(slug);
|
|
251
|
+
if (!page) return;
|
|
252
|
+
const fullDoc = `${page.title}\n\n${page.compiledTruth}\n\n${page.timeline}`;
|
|
253
|
+
|
|
254
|
+
// Truncate to avoid embedding API limits (most models have 8192 token limit)
|
|
255
|
+
// Conservative: ~4 chars per token, so 8192 tokens ≈ 32000 chars
|
|
256
|
+
// But some models count differently, use 8000 chars as safe limit
|
|
257
|
+
const MAX_DOC_LENGTH = 8000;
|
|
258
|
+
const doc = fullDoc.length > MAX_DOC_LENGTH
|
|
259
|
+
? fullDoc.slice(0, MAX_DOC_LENGTH) + '\n... (truncated)'
|
|
260
|
+
: fullDoc;
|
|
261
|
+
|
|
262
|
+
const meta = {
|
|
263
|
+
slug: page.slug,
|
|
264
|
+
title: page.title,
|
|
265
|
+
type: page.type,
|
|
266
|
+
updatedAt: page.updatedAt,
|
|
267
|
+
};
|
|
268
|
+
await this.db.pagesCollection.upsert({
|
|
269
|
+
ids: [page.slug],
|
|
270
|
+
documents: [doc],
|
|
271
|
+
metadatas: [meta],
|
|
272
|
+
});
|
|
273
|
+
}
|
|
274
|
+
|
|
275
|
+
/**
|
|
276
|
+
* Batch sync multiple pages to search index.
|
|
277
|
+
* More efficient than calling syncPageToSearch for each page.
|
|
278
|
+
*/
|
|
279
|
+
async syncPagesToSearch(slugs: string[]): Promise<void> {
|
|
280
|
+
const pages = await Promise.all(slugs.map(s => this.getPage(s)));
|
|
281
|
+
const validPages = pages.filter((p): p is PageRecord => p !== null);
|
|
282
|
+
if (validPages.length === 0) return;
|
|
283
|
+
|
|
284
|
+
const MAX_DOC_LENGTH = 8000;
|
|
285
|
+
const docs = validPages.map(p => {
|
|
286
|
+
const fullDoc = `${p.title}\n\n${p.compiledTruth}\n\n${p.timeline}`;
|
|
287
|
+
return fullDoc.length > MAX_DOC_LENGTH
|
|
288
|
+
? fullDoc.slice(0, MAX_DOC_LENGTH) + '\n... (truncated)'
|
|
289
|
+
: fullDoc;
|
|
290
|
+
});
|
|
291
|
+
const metas = validPages.map(p => ({
|
|
292
|
+
slug: p.slug,
|
|
293
|
+
title: p.title,
|
|
294
|
+
type: p.type,
|
|
295
|
+
updatedAt: p.updatedAt,
|
|
296
|
+
}));
|
|
297
|
+
|
|
298
|
+
await this.db.pagesCollection.upsert({
|
|
299
|
+
ids: validPages.map(p => p.slug),
|
|
300
|
+
documents: docs,
|
|
301
|
+
metadatas: metas,
|
|
302
|
+
});
|
|
303
|
+
}
|
|
304
|
+
|
|
305
|
+
async embedAll(): Promise<number> {
|
|
306
|
+
const pages = await this.listPages({ limit: 100000 });
|
|
307
|
+
for (const page of pages) {
|
|
308
|
+
await this.syncPageToSearch(page.slug);
|
|
309
|
+
}
|
|
310
|
+
return pages.length;
|
|
311
|
+
}
|
|
312
|
+
|
|
313
|
+
async link(fromSlug: string, toSlug: string, context: string): Promise<void> {
|
|
314
|
+
await this.db.client.execute(
|
|
315
|
+
`INSERT INTO links (from_slug, to_slug, context, created_at)
|
|
316
|
+
VALUES (?, ?, ?, ?)
|
|
317
|
+
ON DUPLICATE KEY UPDATE context = VALUES(context)`,
|
|
318
|
+
[fromSlug, toSlug, context, nowIso()],
|
|
319
|
+
);
|
|
320
|
+
}
|
|
321
|
+
|
|
322
|
+
async timeline(slug: string, limit = 50): Promise<TimelineEntry[]> {
|
|
323
|
+
const rows = many<{
|
|
324
|
+
id: number;
|
|
325
|
+
page_slug: string;
|
|
326
|
+
date: string;
|
|
327
|
+
source: string;
|
|
328
|
+
summary: string;
|
|
329
|
+
detail: string;
|
|
330
|
+
}>(
|
|
331
|
+
await this.db.client.execute(
|
|
332
|
+
`SELECT id, page_slug, date, source, summary, detail
|
|
333
|
+
FROM timeline_entries
|
|
334
|
+
WHERE page_slug = ?
|
|
335
|
+
ORDER BY date DESC, id DESC
|
|
336
|
+
LIMIT ?`,
|
|
337
|
+
[slug, limit],
|
|
338
|
+
),
|
|
339
|
+
);
|
|
340
|
+
return rows.map((row) => ({
|
|
341
|
+
id: row.id,
|
|
342
|
+
pageSlug: row.page_slug,
|
|
343
|
+
date: row.date,
|
|
344
|
+
source: row.source,
|
|
345
|
+
summary: row.summary,
|
|
346
|
+
detail: row.detail,
|
|
347
|
+
}));
|
|
348
|
+
}
|
|
349
|
+
|
|
350
|
+
async timelineAdd(entry: TimelineEntry): Promise<void> {
|
|
351
|
+
await this.db.client.execute(
|
|
352
|
+
`INSERT INTO timeline_entries (page_slug, date, source, summary, detail, created_at)
|
|
353
|
+
VALUES (?, ?, ?, ?, ?, ?)`,
|
|
354
|
+
[
|
|
355
|
+
entry.pageSlug,
|
|
356
|
+
entry.date,
|
|
357
|
+
entry.source,
|
|
358
|
+
entry.summary,
|
|
359
|
+
entry.detail,
|
|
360
|
+
nowIso(),
|
|
361
|
+
],
|
|
362
|
+
);
|
|
363
|
+
}
|
|
364
|
+
|
|
365
|
+
/**
|
|
366
|
+
* Add multiple timeline entries in batch.
|
|
367
|
+
*/
|
|
368
|
+
async timelineAddBatch(entries: TimelineEntry[]): Promise<void> {
|
|
369
|
+
if (entries.length === 0) return;
|
|
370
|
+
const now = nowIso();
|
|
371
|
+
for (const entry of entries) {
|
|
372
|
+
await this.db.client.execute(
|
|
373
|
+
`INSERT INTO timeline_entries (page_slug, date, source, summary, detail, created_at)
|
|
374
|
+
VALUES (?, ?, ?, ?, ?, ?)`,
|
|
375
|
+
[entry.pageSlug, entry.date, entry.source, entry.summary, entry.detail, now],
|
|
376
|
+
);
|
|
377
|
+
}
|
|
378
|
+
}
|
|
379
|
+
|
|
380
|
+
/**
|
|
381
|
+
* Get timeline entries across all pages, sorted by date.
|
|
382
|
+
*/
|
|
383
|
+
async timelineGlobal(limit = 100): Promise<TimelineEntry[]> {
|
|
384
|
+
const rows = many<{ id: number; page_slug: string; date: string; source: string; summary: string; detail: string }>(
|
|
385
|
+
await this.db.client.execute(
|
|
386
|
+
`SELECT id, page_slug, date, source, summary, detail
|
|
387
|
+
FROM timeline_entries
|
|
388
|
+
ORDER BY date DESC, id DESC
|
|
389
|
+
LIMIT ?`,
|
|
390
|
+
[limit],
|
|
391
|
+
),
|
|
392
|
+
);
|
|
393
|
+
return rows.map((row) => ({
|
|
394
|
+
id: row.id,
|
|
395
|
+
pageSlug: row.page_slug,
|
|
396
|
+
date: row.date,
|
|
397
|
+
source: row.source,
|
|
398
|
+
summary: row.summary,
|
|
399
|
+
detail: row.detail,
|
|
400
|
+
}));
|
|
401
|
+
}
|
|
402
|
+
|
|
403
|
+
/**
|
|
404
|
+
* Delete a timeline entry by ID.
|
|
405
|
+
*/
|
|
406
|
+
async timelineDelete(id: number): Promise<void> {
|
|
407
|
+
await this.db.client.execute(
|
|
408
|
+
"DELETE FROM timeline_entries WHERE id = ?",
|
|
409
|
+
[id],
|
|
410
|
+
);
|
|
411
|
+
}
|
|
412
|
+
|
|
413
|
+
/**
|
|
414
|
+
* Update a timeline entry by ID.
|
|
415
|
+
*/
|
|
416
|
+
async timelineUpdate(id: number, updates: Partial<TimelineEntry>): Promise<void> {
|
|
417
|
+
const fields: string[] = [];
|
|
418
|
+
const values: unknown[] = [];
|
|
419
|
+
if (updates.date) { fields.push("date = ?"); values.push(updates.date); }
|
|
420
|
+
if (updates.source) { fields.push("source = ?"); values.push(updates.source); }
|
|
421
|
+
if (updates.summary) { fields.push("summary = ?"); values.push(updates.summary); }
|
|
422
|
+
if (updates.detail !== undefined) { fields.push("detail = ?"); values.push(updates.detail); }
|
|
423
|
+
if (fields.length === 0) return;
|
|
424
|
+
values.push(id);
|
|
425
|
+
await this.db.client.execute(
|
|
426
|
+
`UPDATE timeline_entries SET ${fields.join(", ")} WHERE id = ?`,
|
|
427
|
+
values,
|
|
428
|
+
);
|
|
429
|
+
}
|
|
430
|
+
|
|
431
|
+
async tags(slug: string): Promise<string[]> {
|
|
432
|
+
const rows = many<{ tag: string }>(
|
|
433
|
+
await this.db.client.execute(
|
|
434
|
+
"SELECT tag FROM page_tags WHERE page_slug = ? ORDER BY tag ASC",
|
|
435
|
+
[slug],
|
|
436
|
+
),
|
|
437
|
+
);
|
|
438
|
+
return rows.map((row) => row.tag);
|
|
439
|
+
}
|
|
440
|
+
|
|
441
|
+
async tag(slug: string, tag: string): Promise<void> {
|
|
442
|
+
await this.db.client.execute(
|
|
443
|
+
`INSERT INTO page_tags (page_slug, tag, created_at)
|
|
444
|
+
VALUES (?, ?, ?)
|
|
445
|
+
ON DUPLICATE KEY UPDATE tag = VALUES(tag)`,
|
|
446
|
+
[slug, tag, nowIso()],
|
|
447
|
+
);
|
|
448
|
+
}
|
|
449
|
+
|
|
450
|
+
async untag(slug: string, tag: string): Promise<void> {
|
|
451
|
+
await this.db.client.execute(
|
|
452
|
+
"DELETE FROM page_tags WHERE page_slug = ? AND tag = ?",
|
|
453
|
+
[slug, tag],
|
|
454
|
+
);
|
|
455
|
+
}
|
|
456
|
+
|
|
457
|
+
async readRaw(slug: string, source?: string): Promise<unknown[]> {
|
|
458
|
+
const params: unknown[] = [slug];
|
|
459
|
+
let sql =
|
|
460
|
+
"SELECT source, data, fetched_at FROM raw_data WHERE page_slug = ?";
|
|
461
|
+
if (source) {
|
|
462
|
+
sql += " AND source = ?";
|
|
463
|
+
params.push(source);
|
|
464
|
+
}
|
|
465
|
+
sql += " ORDER BY fetched_at DESC";
|
|
466
|
+
const rows = many<{ source: string; data: string; fetched_at: string }>(
|
|
467
|
+
await this.db.client.execute(sql, params),
|
|
468
|
+
);
|
|
469
|
+
return rows.map((row) => ({
|
|
470
|
+
source: row.source,
|
|
471
|
+
fetchedAt: row.fetched_at,
|
|
472
|
+
data: safeJson(row.data),
|
|
473
|
+
}));
|
|
474
|
+
}
|
|
475
|
+
|
|
476
|
+
async writeRaw(slug: string, source: string, data: unknown): Promise<void> {
|
|
477
|
+
await this.db.client.execute(
|
|
478
|
+
`INSERT INTO raw_data (page_slug, source, data, fetched_at)
|
|
479
|
+
VALUES (?, ?, ?, ?)`,
|
|
480
|
+
[slug, source, JSON.stringify(data), nowIso()],
|
|
481
|
+
);
|
|
482
|
+
}
|
|
483
|
+
|
|
484
|
+
async backlinks(slug: string): Promise<string[]> {
|
|
485
|
+
const rows = many<{ from_slug: string }>(
|
|
486
|
+
await this.db.client.execute(
|
|
487
|
+
"SELECT from_slug FROM links WHERE to_slug = ? ORDER BY from_slug ASC",
|
|
488
|
+
[slug],
|
|
489
|
+
),
|
|
490
|
+
);
|
|
491
|
+
return rows.map((row) => row.from_slug);
|
|
492
|
+
}
|
|
493
|
+
|
|
494
|
+
async allSlugs(): Promise<string[]> {
|
|
495
|
+
const rows = many<{ slug: string }>(
|
|
496
|
+
await this.db.client.execute("SELECT slug FROM pages ORDER BY slug ASC"),
|
|
497
|
+
);
|
|
498
|
+
return rows.map((row) => row.slug);
|
|
499
|
+
}
|
|
500
|
+
|
|
501
|
+
async deletePage(slug: string): Promise<void> {
|
|
502
|
+
await this.db.client.execute("DELETE FROM pages WHERE slug = ?", [slug]);
|
|
503
|
+
// Best-effort cleanup of related data (ignore errors for missing rows)
|
|
504
|
+
await this.db.client.execute("DELETE FROM links WHERE from_slug = ? OR to_slug = ?", [slug, slug]);
|
|
505
|
+
await this.db.client.execute("DELETE FROM page_tags WHERE page_slug = ?", [slug]);
|
|
506
|
+
await this.db.client.execute("DELETE FROM timeline_entries WHERE page_slug = ?", [slug]);
|
|
507
|
+
await this.db.client.execute("DELETE FROM raw_data WHERE page_slug = ?", [slug]);
|
|
508
|
+
}
|
|
509
|
+
|
|
510
|
+
/**
|
|
511
|
+
* Resolve an entity reference to an existing page slug if possible.
|
|
512
|
+
* Logic:
|
|
513
|
+
* 1. Check if generated slug exists.
|
|
514
|
+
* 2. Semantic search for name match (high confidence).
|
|
515
|
+
* 3. Otherwise return the candidate slug.
|
|
516
|
+
*/
|
|
517
|
+
async findSimilarSlug(candidateSlug: string, entityName: string): Promise<string> {
|
|
518
|
+
// 1. Check exact slug match
|
|
519
|
+
if (await this.getPage(candidateSlug)) {
|
|
520
|
+
return candidateSlug;
|
|
521
|
+
}
|
|
522
|
+
|
|
523
|
+
// 2. Semantic search for title match - skip if no embeddings available
|
|
524
|
+
// This is important for import speed: avoid slow search during batch import
|
|
525
|
+
try {
|
|
526
|
+
const hits = await this.search(entityName, 1);
|
|
527
|
+
if (hits.length > 0) {
|
|
528
|
+
const best = hits[0]!;
|
|
529
|
+
// Higher threshold to avoid false matches during import
|
|
530
|
+
if (best.score > 0.9) {
|
|
531
|
+
return best.slug;
|
|
532
|
+
}
|
|
533
|
+
}
|
|
534
|
+
} catch {
|
|
535
|
+
// Search may fail during batch import, ignore and return candidate
|
|
536
|
+
}
|
|
537
|
+
|
|
538
|
+
// 3. Return candidate
|
|
539
|
+
return candidateSlug;
|
|
540
|
+
}
|
|
541
|
+
|
|
542
|
+
/**
|
|
543
|
+
* Ensure an entity page exists. If not, create it with the given context.
|
|
544
|
+
* If exists, append new fact (deduped by exact sentence match).
|
|
545
|
+
* @returns true if page was created, false if already existed
|
|
546
|
+
*/
|
|
547
|
+
async ensureEntityPage(
|
|
548
|
+
slug: string,
|
|
549
|
+
type: string,
|
|
550
|
+
title: string,
|
|
551
|
+
relation: string,
|
|
552
|
+
context: string,
|
|
553
|
+
sourceSlug: string,
|
|
554
|
+
): Promise<boolean> {
|
|
555
|
+
const existing = await this.getPage(slug);
|
|
556
|
+
const newFact = `- **${relation}** [${title}](${slug}): ${context.trim()} (Source: ${sourceSlug})`;
|
|
557
|
+
|
|
558
|
+
if (!existing) {
|
|
559
|
+
await this.putPage({
|
|
560
|
+
slug,
|
|
561
|
+
type,
|
|
562
|
+
title,
|
|
563
|
+
compiledTruth: `## Facts\n\n${newFact}`,
|
|
564
|
+
timeline: "",
|
|
565
|
+
frontmatter: { autoCreated: true },
|
|
566
|
+
});
|
|
567
|
+
return true;
|
|
568
|
+
}
|
|
569
|
+
|
|
570
|
+
// Check for duplicate: if the exact context sentence already exists in compiledTruth
|
|
571
|
+
const trimmedContext = context.trim();
|
|
572
|
+
if (existing.compiledTruth.includes(trimmedContext)) {
|
|
573
|
+
return false;
|
|
574
|
+
}
|
|
575
|
+
|
|
576
|
+
// Append new fact under ## Facts header if it exists, otherwise create it
|
|
577
|
+
let updatedTruth = existing.compiledTruth;
|
|
578
|
+
if (!existing.compiledTruth.includes("## Facts")) {
|
|
579
|
+
updatedTruth = `## Facts\n\n${existing.compiledTruth}\n\n## Facts\n\n${newFact}`;
|
|
580
|
+
} else {
|
|
581
|
+
// Simple append before the first "---" or at the end
|
|
582
|
+
updatedTruth = existing.compiledTruth.replace(/\n---\n/, `\n${newFact}\n\n---\n`);
|
|
583
|
+
if (updatedTruth === existing.compiledTruth) {
|
|
584
|
+
updatedTruth += "\n" + newFact;
|
|
585
|
+
}
|
|
586
|
+
}
|
|
587
|
+
|
|
588
|
+
await this.putPage({
|
|
589
|
+
slug,
|
|
590
|
+
type,
|
|
591
|
+
title,
|
|
592
|
+
compiledTruth: updatedTruth,
|
|
593
|
+
timeline: existing.timeline,
|
|
594
|
+
frontmatter: existing.frontmatter,
|
|
595
|
+
});
|
|
596
|
+
return false;
|
|
597
|
+
}
|
|
598
|
+
|
|
599
|
+
// ---------------------------------------------------------------------------
|
|
600
|
+
// Smart Compilation & Timeline Integration
|
|
601
|
+
// ---------------------------------------------------------------------------
|
|
602
|
+
|
|
603
|
+
/**
|
|
604
|
+
* Compile new information into a page's compiled truth.
|
|
605
|
+
* This is the core "brain" function that:
|
|
606
|
+
* 1. Analyzes new information
|
|
607
|
+
* 2. Updates/replaces/appends to compiled truth intelligently
|
|
608
|
+
* 3. Extracts timeline entries
|
|
609
|
+
* 4. Maintains source attribution
|
|
610
|
+
*
|
|
611
|
+
* @param slug Page slug to compile into
|
|
612
|
+
* @param newInfo New information to process
|
|
613
|
+
* @param source Source of the information
|
|
614
|
+
* @param date Date of the information
|
|
615
|
+
* @param llm LLM configuration for semantic analysis
|
|
616
|
+
* @returns Compile result with changes made
|
|
617
|
+
*/
|
|
618
|
+
async compilePage(
|
|
619
|
+
slug: string,
|
|
620
|
+
newInfo: string,
|
|
621
|
+
source: string,
|
|
622
|
+
date: string,
|
|
623
|
+
llm: ResolvedLLM,
|
|
624
|
+
): Promise<CompileResult> {
|
|
625
|
+
const page = await this.getPage(slug);
|
|
626
|
+
if (!page) {
|
|
627
|
+
// Create new page if doesn't exist
|
|
628
|
+
await this.putPage({
|
|
629
|
+
slug,
|
|
630
|
+
type: "other",
|
|
631
|
+
title: slug.split("/").pop() ?? slug,
|
|
632
|
+
compiledTruth: newInfo,
|
|
633
|
+
frontmatter: { source, date, autoCreated: true },
|
|
634
|
+
});
|
|
635
|
+
return {
|
|
636
|
+
compiledTruth: newInfo,
|
|
637
|
+
changed: true,
|
|
638
|
+
changeType: "append",
|
|
639
|
+
changeSummary: "Created new page",
|
|
640
|
+
timelineEntries: [],
|
|
641
|
+
confidence: 0.8,
|
|
642
|
+
};
|
|
643
|
+
}
|
|
644
|
+
|
|
645
|
+
const timeline = await this.timeline(slug, 20);
|
|
646
|
+
const input: CompileInput = {
|
|
647
|
+
currentTruth: page.compiledTruth,
|
|
648
|
+
timeline,
|
|
649
|
+
newInfo,
|
|
650
|
+
source,
|
|
651
|
+
date,
|
|
652
|
+
pageContext: {
|
|
653
|
+
slug: page.slug,
|
|
654
|
+
type: page.type,
|
|
655
|
+
title: page.title,
|
|
656
|
+
},
|
|
657
|
+
};
|
|
658
|
+
|
|
659
|
+
const result = await compileTruth(input, llm);
|
|
660
|
+
|
|
661
|
+
// Apply changes if any
|
|
662
|
+
if (result.changed) {
|
|
663
|
+
await this.putPage({
|
|
664
|
+
slug: page.slug,
|
|
665
|
+
type: page.type,
|
|
666
|
+
title: page.title,
|
|
667
|
+
compiledTruth: result.compiledTruth,
|
|
668
|
+
timeline: page.timeline,
|
|
669
|
+
frontmatter: page.frontmatter,
|
|
670
|
+
});
|
|
671
|
+
|
|
672
|
+
// Add timeline entries
|
|
673
|
+
if (result.timelineEntries.length > 0) {
|
|
674
|
+
await this.timelineAddBatch(result.timelineEntries);
|
|
675
|
+
}
|
|
676
|
+
|
|
677
|
+
// Sync to search index
|
|
678
|
+
await this.syncPageToSearch(slug);
|
|
679
|
+
}
|
|
680
|
+
|
|
681
|
+
return result;
|
|
682
|
+
}
|
|
683
|
+
|
|
684
|
+
/**
|
|
685
|
+
* Extract and add timeline entries from content.
|
|
686
|
+
* Uses LLM for semantic extraction, falls back to regex.
|
|
687
|
+
*
|
|
688
|
+
* @param slug Page slug
|
|
689
|
+
* @param content Content to extract timeline from
|
|
690
|
+
* @param source Source identifier
|
|
691
|
+
* @param defaultDate Default date for entries without explicit dates
|
|
692
|
+
* @param llm LLM configuration
|
|
693
|
+
* @returns Extraction result with entries added
|
|
694
|
+
*/
|
|
695
|
+
async extractAndAddTimeline(
|
|
696
|
+
slug: string,
|
|
697
|
+
content: string,
|
|
698
|
+
source: string,
|
|
699
|
+
defaultDate: string,
|
|
700
|
+
llm: ResolvedLLM,
|
|
701
|
+
): Promise<TimelineExtractionResult> {
|
|
702
|
+
const result = await extractTimelineEvents(
|
|
703
|
+
{ content, source, defaultDate, pageSlug: slug },
|
|
704
|
+
llm,
|
|
705
|
+
);
|
|
706
|
+
|
|
707
|
+
if (result.entries.length > 0) {
|
|
708
|
+
await this.timelineAddBatch(result.entries);
|
|
709
|
+
}
|
|
710
|
+
|
|
711
|
+
return result;
|
|
712
|
+
}
|
|
713
|
+
|
|
714
|
+
/**
|
|
715
|
+
* Full ingestion pipeline:
|
|
716
|
+
* 1. Create/update page with content
|
|
717
|
+
* 2. Compile truth intelligently
|
|
718
|
+
* 3. Extract timeline events
|
|
719
|
+
* 4. Extract entity links
|
|
720
|
+
* 5. Sync to search
|
|
721
|
+
*
|
|
722
|
+
* @param slug Page slug
|
|
723
|
+
* @param content Full content
|
|
724
|
+
* @param source Source identifier
|
|
725
|
+
* @param type Page type
|
|
726
|
+
* @param llm LLM configuration
|
|
727
|
+
* @returns Full ingestion result
|
|
728
|
+
*/
|
|
729
|
+
async ingestContent(
|
|
730
|
+
slug: string,
|
|
731
|
+
content: string,
|
|
732
|
+
source: string,
|
|
733
|
+
type: string,
|
|
734
|
+
llm: ResolvedLLM,
|
|
735
|
+
): Promise<{
|
|
736
|
+
page: PageRecord;
|
|
737
|
+
compileResult: CompileResult;
|
|
738
|
+
timelineResult: TimelineExtractionResult;
|
|
739
|
+
}> {
|
|
740
|
+
const now = nowIso();
|
|
741
|
+
const date = now.slice(0, 10);
|
|
742
|
+
|
|
743
|
+
// Step 1: Compile truth (this creates/updates page)
|
|
744
|
+
const compileResult = await this.compilePage(slug, content, source, date, llm);
|
|
745
|
+
const page = await this.getPage(slug) as PageRecord;
|
|
746
|
+
|
|
747
|
+
// Step 2: Extract timeline events
|
|
748
|
+
const timelineResult = await this.extractAndAddTimeline(slug, content, source, date, llm);
|
|
749
|
+
|
|
750
|
+
// Step 3: Update page type if provided
|
|
751
|
+
if (type && page.type !== type) {
|
|
752
|
+
await this.putPage({
|
|
753
|
+
slug: page.slug,
|
|
754
|
+
type,
|
|
755
|
+
title: page.title,
|
|
756
|
+
compiledTruth: page.compiledTruth,
|
|
757
|
+
timeline: page.timeline,
|
|
758
|
+
frontmatter: { ...page.frontmatter, source, sourceType: type },
|
|
759
|
+
});
|
|
760
|
+
}
|
|
761
|
+
|
|
762
|
+
return { page, compileResult, timelineResult };
|
|
763
|
+
}
|
|
764
|
+
}
|
|
765
|
+
|
|
766
|
+
function safeJson(raw: string): unknown {
|
|
767
|
+
try {
|
|
768
|
+
return JSON.parse(raw);
|
|
769
|
+
} catch {
|
|
770
|
+
return raw;
|
|
771
|
+
}
|
|
772
|
+
}
|