ex-brain 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,990 @@
1
+ import { nowIso } from "../config";
2
+ import type {
3
+ BrainStats,
4
+ PageRecord,
5
+ PutPageInput,
6
+ SearchHit,
7
+ TimelineEntry,
8
+ } from "../types";
9
+ import type { ResolvedLLM } from "../settings";
10
+ import type { CompileInput, CompileResult } from "../ai/compiler";
11
+ import type { TimelineExtractionResult } from "../ai/timeline-extractor";
12
+ import { compileTruth } from "../ai/compiler";
13
+ import { extractTimelineEvents } from "../ai/timeline-extractor";
14
+ import { BrainDb } from "../db/client";
15
+ import { DbError, wrapDbError, logDbError, type DbOperation } from "../db/errors";
16
+ import { sanitizeQuery } from "../utils/query-sanitizer";
17
+
18
+ type SqlRow = Record<string, unknown>;
19
+
20
+ function one<T>(rows: SqlRow[] | null): T | null {
21
+ if (!rows || rows.length === 0) {
22
+ return null;
23
+ }
24
+ return rows[0] as T;
25
+ }
26
+
27
+ function many<T>(rows: SqlRow[] | null): T[] {
28
+ return (rows ?? []) as T[];
29
+ }
30
+
31
+ function parseFrontmatter(raw: string): Record<string, unknown> {
32
+ try {
33
+ return JSON.parse(raw) as Record<string, unknown>;
34
+ } catch {
35
+ return {};
36
+ }
37
+ }
38
+
39
+ export class BrainRepository {
40
+ constructor(private readonly db: BrainDb) {}
41
+
42
+ async init(): Promise<void> {
43
+ // Schema is auto-created when connecting.
44
+ }
45
+
46
+ async getPage(slug: string): Promise<PageRecord | null> {
47
+ try {
48
+ const rows = await this.db.client.execute(
49
+ `SELECT slug, type, title, compiled_truth, timeline, frontmatter, created_at, updated_at
50
+ FROM pages WHERE slug = ?`,
51
+ [slug],
52
+ );
53
+ const row = one<{
54
+ slug: string;
55
+ type: string;
56
+ title: string;
57
+ compiled_truth: string;
58
+ timeline: string;
59
+ frontmatter: string;
60
+ created_at: string;
61
+ updated_at: string;
62
+ }>(rows);
63
+ if (!row) {
64
+ return null;
65
+ }
66
+ return {
67
+ slug: row.slug,
68
+ type: row.type,
69
+ title: row.title,
70
+ compiledTruth: row.compiled_truth,
71
+ timeline: row.timeline,
72
+ frontmatter: parseFrontmatter(row.frontmatter),
73
+ createdAt: row.created_at,
74
+ updatedAt: row.updated_at,
75
+ };
76
+ } catch (error) {
77
+ const dbError = wrapDbError(error, "getPage", { slug });
78
+ logDbError(dbError);
79
+ throw dbError;
80
+ }
81
+ }
82
+
83
+ async putPage(input: PutPageInput, skipEmbed = false): Promise<PageRecord> {
84
+ try {
85
+ const now = nowIso();
86
+ const existing = await this.getPage(input.slug);
87
+ const createdAt = existing?.createdAt ?? now;
88
+ const frontmatter = JSON.stringify(input.frontmatter ?? {});
89
+ const timeline = input.timeline ?? existing?.timeline ?? "";
90
+ await this.db.client.execute(
91
+ `INSERT INTO pages (slug, type, title, compiled_truth, timeline, frontmatter, created_at, updated_at)
92
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?)
93
+ ON DUPLICATE KEY UPDATE
94
+ type = VALUES(type),
95
+ title = VALUES(title),
96
+ compiled_truth = VALUES(compiled_truth),
97
+ timeline = VALUES(timeline),
98
+ frontmatter = VALUES(frontmatter),
99
+ updated_at = VALUES(updated_at)`,
100
+ [
101
+ input.slug,
102
+ input.type,
103
+ input.title,
104
+ input.compiledTruth,
105
+ timeline,
106
+ frontmatter,
107
+ createdAt,
108
+ now,
109
+ ],
110
+ );
111
+ if (!skipEmbed) {
112
+ await this.syncPageToSearch(input.slug);
113
+ }
114
+ return (await this.getPage(input.slug)) as PageRecord;
115
+ } catch (error) {
116
+ const dbError = wrapDbError(error, "putPage", { slug: input.slug });
117
+ logDbError(dbError);
118
+ throw dbError;
119
+ }
120
+ }
121
+
122
+ async listPages(filters: {
123
+ type?: string;
124
+ tag?: string;
125
+ limit?: number;
126
+ }): Promise<PageRecord[]> {
127
+ try {
128
+ const limit = filters.limit ?? 50;
129
+ const params: unknown[] = [];
130
+ let sql = `SELECT p.slug, p.type, p.title, p.compiled_truth, p.timeline, p.frontmatter, p.created_at, p.updated_at
131
+ FROM pages p`;
132
+ if (filters.tag) {
133
+ sql += " INNER JOIN page_tags t ON p.slug = t.page_slug";
134
+ }
135
+ sql += " WHERE 1=1";
136
+ if (filters.type) {
137
+ sql += " AND p.type = ?";
138
+ params.push(filters.type);
139
+ }
140
+ if (filters.tag) {
141
+ sql += " AND t.tag = ?";
142
+ params.push(filters.tag);
143
+ }
144
+ sql += " ORDER BY p.updated_at DESC LIMIT ?";
145
+ params.push(limit);
146
+ const rows = many<{
147
+ slug: string;
148
+ type: string;
149
+ title: string;
150
+ compiled_truth: string;
151
+ timeline: string;
152
+ frontmatter: string;
153
+ created_at: string;
154
+ updated_at: string;
155
+ }>(await this.db.client.execute(sql, params));
156
+
157
+ return rows.map((row) => ({
158
+ slug: row.slug,
159
+ type: row.type,
160
+ title: row.title,
161
+ compiledTruth: row.compiled_truth,
162
+ timeline: row.timeline,
163
+ frontmatter: parseFrontmatter(row.frontmatter),
164
+ createdAt: row.created_at,
165
+ updatedAt: row.updated_at,
166
+ }));
167
+ } catch (error) {
168
+ const dbError = wrapDbError(error, "listPages", filters);
169
+ logDbError(dbError);
170
+ throw dbError;
171
+ }
172
+ }
173
+
174
+ async stats(): Promise<BrainStats> {
175
+ try {
176
+ const rows = await this.db.client.execute(
177
+ `SELECT
178
+ (SELECT COUNT(*) FROM pages) AS pages,
179
+ (SELECT COUNT(*) FROM links) AS links,
180
+ (SELECT COUNT(*) FROM page_tags) AS tags,
181
+ (SELECT COUNT(*) FROM timeline_entries) AS timeline_entries,
182
+ (SELECT COUNT(*) FROM raw_data) AS raw_rows`,
183
+ );
184
+ const row = one<{
185
+ pages: number;
186
+ links: number;
187
+ tags: number;
188
+ timeline_entries: number;
189
+ raw_rows: number;
190
+ }>(rows);
191
+ return {
192
+ pages: Number(row?.pages ?? 0),
193
+ links: Number(row?.links ?? 0),
194
+ tags: Number(row?.tags ?? 0),
195
+ timelineEntries: Number(row?.timeline_entries ?? 0),
196
+ rawRows: Number(row?.raw_rows ?? 0),
197
+ };
198
+ } catch (error) {
199
+ const dbError = wrapDbError(error, "stats");
200
+ logDbError(dbError);
201
+ throw dbError;
202
+ }
203
+ }
204
+
205
+ async search(query: string, limit = 10, type?: string): Promise<SearchHit[]> {
206
+ // Sanitize query to prevent JSON parse errors in seekdb
207
+ const sanitizedQuery = sanitizeQuery(query);
208
+
209
+ try {
210
+ const where = type ? ({ type } as Record<string, unknown>) : undefined;
211
+ const result = await this.db.pagesCollection.hybridSearch({
212
+ query: { whereDocument: { $contains: sanitizedQuery }, where },
213
+ nResults: limit,
214
+ include: ["documents", "metadatas", "distances"],
215
+ });
216
+ const ids = result.ids[0] ?? [];
217
+ const metadatas = result.metadatas?.[0] ?? [];
218
+ const docs = result.documents?.[0] ?? [];
219
+ const distances = result.distances?.[0] ?? [];
220
+ const hits: SearchHit[] = [];
221
+ for (let i = 0; i < ids.length; i += 1) {
222
+ const slug = ids[i];
223
+ if (!slug) continue;
224
+ const md = (metadatas[i] ?? {}) as Record<string, unknown>;
225
+ const distance = typeof distances[i] === "number" ? distances[i] : 1;
226
+ const score = 1 / (1 + distance);
227
+ hits.push({
228
+ slug,
229
+ title: String(md.title ?? slug),
230
+ type: String(md.type ?? "other"),
231
+ score,
232
+ excerpt: String(docs[i] ?? "").slice(0, 220),
233
+ updatedAt: String(md.updatedAt ?? ""),
234
+ });
235
+ }
236
+ return hits;
237
+ } catch (error) {
238
+ // Fallback to SQL LIKE search if vector search fails
239
+ console.warn(`[BrainRepo] Vector search failed, using SQL fallback for: ${sanitizedQuery}`);
240
+ return await this.fallbackSearch(sanitizedQuery, limit, type);
241
+ }
242
+ }
243
+
244
+ /**
245
+ * Fallback search using SQL LIKE when vector search fails.
246
+ * More robust but less accurate.
247
+ */
248
+ private async fallbackSearch(query: string, limit = 10, type?: string): Promise<SearchHit[]> {
249
+ try {
250
+ const sql = type
251
+ ? `SELECT slug, type, title, compiled_truth, updated_at FROM pages WHERE type = ? AND compiled_truth LIKE ? ORDER BY updated_at DESC LIMIT ?`
252
+ : `SELECT slug, type, title, compiled_truth, updated_at FROM pages WHERE compiled_truth LIKE ? ORDER BY updated_at DESC LIMIT ?`;
253
+
254
+ const params = type ? [type, `%${query}%`, limit] : [`%${query}%`, limit];
255
+
256
+ const rows = many<{ slug: string; type: string; title: string; compiled_truth: string; updated_at: string }>(
257
+ await this.db.client.execute(sql, params)
258
+ );
259
+
260
+ return rows.map(row => ({
261
+ slug: row.slug,
262
+ title: row.title,
263
+ type: row.type,
264
+ score: 0.5, // Fixed score for fallback search
265
+ excerpt: row.compiled_truth.slice(0, 220),
266
+ updatedAt: row.updated_at,
267
+ }));
268
+ } catch (fallbackError) {
269
+ const dbError = wrapDbError(fallbackError, "fallbackSearch", { query, limit, type });
270
+ logDbError(dbError);
271
+ return []; // Return empty results instead of throwing
272
+ }
273
+ }
274
+
275
+ async query(question: string, limit = 10): Promise<SearchHit[]> {
276
+ // Sanitize question to prevent parse errors
277
+ const sanitizedQuestion = sanitizeQuery(question);
278
+
279
+ try {
280
+ const result = await this.db.pagesCollection.query({
281
+ queryTexts: sanitizedQuestion,
282
+ nResults: limit,
283
+ include: ["documents", "metadatas", "distances"],
284
+ });
285
+ const ids = result.ids[0] ?? [];
286
+ const metadatas = result.metadatas?.[0] ?? [];
287
+ const docs = result.documents?.[0] ?? [];
288
+ const distances = result.distances?.[0] ?? [];
289
+ const hits: SearchHit[] = [];
290
+ for (let i = 0; i < ids.length; i += 1) {
291
+ const slug = ids[i];
292
+ if (!slug) continue;
293
+ const md = (metadatas[i] ?? {}) as Record<string, unknown>;
294
+ const distance = typeof distances[i] === "number" ? distances[i] : 1;
295
+ const vectorScore = 1 / (1 + distance);
296
+ const freshnessBoost = this.recentBoost(String(md.updatedAt ?? ""));
297
+ const typeBoost = String(md.type ?? "") === "person" ? 0.05 : 0;
298
+ const score = vectorScore * 0.85 + freshnessBoost + typeBoost;
299
+ hits.push({
300
+ slug,
301
+ title: String(md.title ?? slug),
302
+ type: String(md.type ?? "other"),
303
+ score,
304
+ excerpt: String(docs[i] ?? "").slice(0, 220),
305
+ updatedAt: String(md.updatedAt ?? ""),
306
+ });
307
+ }
308
+ hits.sort((a, b) => b.score - a.score);
309
+ return hits;
310
+ } catch (error) {
311
+ const dbError = wrapDbError(error, "query", { question, limit });
312
+ logDbError(dbError);
313
+ throw dbError;
314
+ }
315
+ }
316
+
317
+ private recentBoost(updatedAt: string): number {
318
+ if (!updatedAt) return 0;
319
+ const age = Date.now() - new Date(updatedAt).getTime();
320
+ const days = age / (1000 * 60 * 60 * 24);
321
+ return days <= 30 ? 0.1 : 0;
322
+ }
323
+
324
+ async syncPageToSearch(slug: string): Promise<void> {
325
+ try {
326
+ const page = await this.getPage(slug);
327
+ if (!page) return;
328
+ const fullDoc = `${page.title}\n\n${page.compiledTruth}\n\n${page.timeline}`;
329
+
330
+ // Truncate to avoid embedding API limits (most models have 8192 token limit)
331
+ // Conservative: ~4 chars per token, so 8192 tokens ≈ 32000 chars
332
+ // But some models count differently, use 8000 chars as safe limit
333
+ const MAX_DOC_LENGTH = 8000;
334
+ const doc = fullDoc.length > MAX_DOC_LENGTH
335
+ ? fullDoc.slice(0, MAX_DOC_LENGTH) + '\n... (truncated)'
336
+ : fullDoc;
337
+
338
+ const meta = {
339
+ slug: page.slug,
340
+ title: page.title,
341
+ type: page.type,
342
+ updatedAt: page.updatedAt,
343
+ };
344
+ await this.db.pagesCollection.upsert({
345
+ ids: [page.slug],
346
+ documents: [doc],
347
+ metadatas: [meta],
348
+ });
349
+ } catch (error) {
350
+ const dbError = wrapDbError(error, "syncPageToSearch", { slug });
351
+ logDbError(dbError);
352
+ // Don't throw - sync failure shouldn't break the main flow
353
+ console.warn(`[BrainRepo] syncPageToSearch failed for ${slug}: ${dbError.message}`);
354
+ }
355
+ }
356
+
357
+ /**
358
+ * Batch sync multiple pages to search index.
359
+ * More efficient than calling syncPageToSearch for each page.
360
+ */
361
+ async syncPagesToSearch(slugs: string[]): Promise<void> {
362
+ try {
363
+ const pages = await Promise.all(slugs.map(s => this.getPage(s)));
364
+ const validPages = pages.filter((p): p is PageRecord => p !== null);
365
+ if (validPages.length === 0) return;
366
+
367
+ const MAX_DOC_LENGTH = 8000;
368
+ const docs = validPages.map(p => {
369
+ const fullDoc = `${p.title}\n\n${p.compiledTruth}\n\n${p.timeline}`;
370
+ return fullDoc.length > MAX_DOC_LENGTH
371
+ ? fullDoc.slice(0, MAX_DOC_LENGTH) + '\n... (truncated)'
372
+ : fullDoc;
373
+ });
374
+ const metas = validPages.map(p => ({
375
+ slug: p.slug,
376
+ title: p.title,
377
+ type: p.type,
378
+ updatedAt: p.updatedAt,
379
+ }));
380
+
381
+ await this.db.pagesCollection.upsert({
382
+ ids: validPages.map(p => p.slug),
383
+ documents: docs,
384
+ metadatas: metas,
385
+ });
386
+ } catch (error) {
387
+ const dbError = wrapDbError(error, "syncPagesToSearch", { count: slugs.length });
388
+ logDbError(dbError);
389
+ // Don't throw - sync failure shouldn't break the main flow
390
+ console.warn(`[BrainRepo] syncPagesToSearch failed: ${dbError.message}`);
391
+ }
392
+ }
393
+
394
+ async embedAll(): Promise<number> {
395
+ try {
396
+ const pages = await this.listPages({ limit: 100000 });
397
+ if (pages.length === 0) return 0;
398
+ // Use batch sync for significant performance improvement
399
+ await this.syncPagesToSearch(pages.map(p => p.slug));
400
+ return pages.length;
401
+ } catch (error) {
402
+ const dbError = wrapDbError(error, "embedAll");
403
+ logDbError(dbError);
404
+ throw dbError;
405
+ }
406
+ }
407
+
408
+ async link(fromSlug: string, toSlug: string, context: string): Promise<void> {
409
+ try {
410
+ await this.db.client.execute(
411
+ `INSERT INTO links (from_slug, to_slug, context, created_at)
412
+ VALUES (?, ?, ?, ?)
413
+ ON DUPLICATE KEY UPDATE context = VALUES(context)`,
414
+ [fromSlug, toSlug, context, nowIso()],
415
+ );
416
+ } catch (error) {
417
+ const dbError = wrapDbError(error, "link", { fromSlug, toSlug });
418
+ logDbError(dbError);
419
+ throw dbError;
420
+ }
421
+ }
422
+
423
+ async timeline(slug: string, limit = 50): Promise<TimelineEntry[]> {
424
+ try {
425
+ const rows = many<{
426
+ id: number;
427
+ page_slug: string;
428
+ date: string;
429
+ source: string;
430
+ summary: string;
431
+ detail: string;
432
+ }>(
433
+ await this.db.client.execute(
434
+ `SELECT id, page_slug, date, source, summary, detail
435
+ FROM timeline_entries
436
+ WHERE page_slug = ?
437
+ ORDER BY date DESC, id DESC
438
+ LIMIT ?`,
439
+ [slug, limit],
440
+ ),
441
+ );
442
+ return rows.map((row) => ({
443
+ id: row.id,
444
+ pageSlug: row.page_slug,
445
+ date: row.date,
446
+ source: row.source,
447
+ summary: row.summary,
448
+ detail: row.detail,
449
+ }));
450
+ } catch (error) {
451
+ const dbError = wrapDbError(error, "timeline", { slug, limit });
452
+ logDbError(dbError);
453
+ throw dbError;
454
+ }
455
+ }
456
+
457
+ async timelineAdd(entry: TimelineEntry): Promise<void> {
458
+ try {
459
+ await this.db.client.execute(
460
+ `INSERT INTO timeline_entries (page_slug, date, source, summary, detail, created_at)
461
+ VALUES (?, ?, ?, ?, ?, ?)`,
462
+ [
463
+ entry.pageSlug,
464
+ entry.date,
465
+ entry.source,
466
+ entry.summary,
467
+ entry.detail,
468
+ nowIso(),
469
+ ],
470
+ );
471
+ } catch (error) {
472
+ const dbError = wrapDbError(error, "timelineAdd", { pageSlug: entry.pageSlug });
473
+ logDbError(dbError);
474
+ throw dbError;
475
+ }
476
+ }
477
+
478
+ /**
479
+ * Add multiple timeline entries in batch using multi-row INSERT.
480
+ * Much more efficient than individual INSERT statements.
481
+ */
482
+ async timelineAddBatch(entries: TimelineEntry[]): Promise<void> {
483
+ try {
484
+ if (entries.length === 0) return;
485
+ const now = nowIso();
486
+
487
+ // Use multi-row INSERT for better performance
488
+ const placeholders = entries.map(() => `(?, ?, ?, ?, ?, ?)`).join(', ');
489
+ const values = entries.flatMap(entry => [
490
+ entry.pageSlug,
491
+ entry.date,
492
+ entry.source,
493
+ entry.summary,
494
+ entry.detail,
495
+ now,
496
+ ]);
497
+
498
+ await this.db.client.execute(
499
+ `INSERT INTO timeline_entries (page_slug, date, source, summary, detail, created_at)
500
+ VALUES ${placeholders}`,
501
+ values,
502
+ );
503
+ } catch (error) {
504
+ const dbError = wrapDbError(error, "timelineAddBatch", { count: entries.length });
505
+ logDbError(dbError);
506
+ throw dbError;
507
+ }
508
+ }
509
+
510
+ /**
511
+ * Get timeline entries across all pages, sorted by date.
512
+ */
513
+ async timelineGlobal(limit = 100): Promise<TimelineEntry[]> {
514
+ try {
515
+ const rows = many<{ id: number; page_slug: string; date: string; source: string; summary: string; detail: string; importance: number }>(
516
+ await this.db.client.execute(
517
+ `SELECT id, page_slug, date, source, summary, detail, importance
518
+ FROM timeline_entries
519
+ ORDER BY date DESC, id DESC
520
+ LIMIT ?`,
521
+ [limit],
522
+ ),
523
+ );
524
+ return rows.map((row) => ({
525
+ id: row.id,
526
+ pageSlug: row.page_slug,
527
+ date: row.date,
528
+ source: row.source,
529
+ summary: row.summary,
530
+ detail: row.detail,
531
+ importance: row.importance ?? 3,
532
+ }));
533
+ } catch (error) {
534
+ const dbError = wrapDbError(error, "timelineGlobal", { limit });
535
+ logDbError(dbError);
536
+ throw dbError;
537
+ }
538
+ }
539
+
540
+ /**
541
+ * Delete a timeline entry by ID.
542
+ */
543
+ async timelineDelete(id: number): Promise<void> {
544
+ try {
545
+ await this.db.client.execute(
546
+ "DELETE FROM timeline_entries WHERE id = ?",
547
+ [id],
548
+ );
549
+ } catch (error) {
550
+ const dbError = wrapDbError(error, "timelineDelete", { id });
551
+ logDbError(dbError);
552
+ throw dbError;
553
+ }
554
+ }
555
+
556
+ /**
557
+ * Update a timeline entry by ID.
558
+ */
559
+ async timelineUpdate(id: number, updates: Partial<TimelineEntry>): Promise<void> {
560
+ try {
561
+ const fields: string[] = [];
562
+ const values: unknown[] = [];
563
+ if (updates.date) { fields.push("date = ?"); values.push(updates.date); }
564
+ if (updates.source) { fields.push("source = ?"); values.push(updates.source); }
565
+ if (updates.summary) { fields.push("summary = ?"); values.push(updates.summary); }
566
+ if (updates.detail !== undefined) { fields.push("detail = ?"); values.push(updates.detail); }
567
+ if (updates.importance !== undefined) { fields.push("importance = ?"); values.push(updates.importance); }
568
+ if (fields.length === 0) return;
569
+ values.push(id);
570
+ await this.db.client.execute(
571
+ `UPDATE timeline_entries SET ${fields.join(", ")} WHERE id = ?`,
572
+ values,
573
+ );
574
+ } catch (error) {
575
+ const dbError = wrapDbError(error, "timelineUpdate", { id });
576
+ logDbError(dbError);
577
+ throw dbError;
578
+ }
579
+ }
580
+
581
+ async tags(slug: string): Promise<string[]> {
582
+ try {
583
+ const rows = many<{ tag: string }>(
584
+ await this.db.client.execute(
585
+ "SELECT tag FROM page_tags WHERE page_slug = ? ORDER BY tag ASC",
586
+ [slug],
587
+ ),
588
+ );
589
+ return rows.map((row) => row.tag);
590
+ } catch (error) {
591
+ const dbError = wrapDbError(error, "tags", { slug });
592
+ logDbError(dbError);
593
+ throw dbError;
594
+ }
595
+ }
596
+
597
+ async tag(slug: string, tag: string): Promise<void> {
598
+ try {
599
+ await this.db.client.execute(
600
+ `INSERT INTO page_tags (page_slug, tag, created_at)
601
+ VALUES (?, ?, ?)
602
+ ON DUPLICATE KEY UPDATE tag = VALUES(tag)`,
603
+ [slug, tag, nowIso()],
604
+ );
605
+ } catch (error) {
606
+ const dbError = wrapDbError(error, "tag", { slug, tag });
607
+ logDbError(dbError);
608
+ throw dbError;
609
+ }
610
+ }
611
+
612
+ async untag(slug: string, tag: string): Promise<void> {
613
+ try {
614
+ await this.db.client.execute(
615
+ "DELETE FROM page_tags WHERE page_slug = ? AND tag = ?",
616
+ [slug, tag],
617
+ );
618
+ } catch (error) {
619
+ const dbError = wrapDbError(error, "untag", { slug, tag });
620
+ logDbError(dbError);
621
+ throw dbError;
622
+ }
623
+ }
624
+
625
+ async readRaw(slug: string, source?: string): Promise<unknown[]> {
626
+ try {
627
+ const params: unknown[] = [slug];
628
+ let sql =
629
+ "SELECT source, data, fetched_at FROM raw_data WHERE page_slug = ?";
630
+ if (source) {
631
+ sql += " AND source = ?";
632
+ params.push(source);
633
+ }
634
+ sql += " ORDER BY fetched_at DESC";
635
+ const rows = many<{ source: string; data: string; fetched_at: string }>(
636
+ await this.db.client.execute(sql, params),
637
+ );
638
+ return rows.map((row) => ({
639
+ source: row.source,
640
+ fetchedAt: row.fetched_at,
641
+ data: safeJson(row.data),
642
+ }));
643
+ } catch (error) {
644
+ const dbError = wrapDbError(error, "readRaw", { slug, source });
645
+ logDbError(dbError);
646
+ throw dbError;
647
+ }
648
+ }
649
+
650
+ async writeRaw(slug: string, source: string, data: unknown): Promise<void> {
651
+ try {
652
+ await this.db.client.execute(
653
+ `INSERT INTO raw_data (page_slug, source, data, fetched_at)
654
+ VALUES (?, ?, ?, ?)`,
655
+ [slug, source, JSON.stringify(data), nowIso()],
656
+ );
657
+ } catch (error) {
658
+ const dbError = wrapDbError(error, "writeRaw", { slug, source });
659
+ logDbError(dbError);
660
+ throw dbError;
661
+ }
662
+ }
663
+
664
+ async backlinks(slug: string): Promise<string[]> {
665
+ try {
666
+ const rows = many<{ from_slug: string }>(
667
+ await this.db.client.execute(
668
+ "SELECT from_slug FROM links WHERE to_slug = ? ORDER BY from_slug ASC",
669
+ [slug],
670
+ ),
671
+ );
672
+ return rows.map((row) => row.from_slug);
673
+ } catch (error) {
674
+ const dbError = wrapDbError(error, "backlinks", { slug });
675
+ logDbError(dbError);
676
+ throw dbError;
677
+ }
678
+ }
679
+
680
+ /**
681
+ * Get outgoing links from a page (pages this page links to).
682
+ * Returns array of { slug, context }.
683
+ */
684
+ async outgoingLinks(slug: string): Promise<Array<{ slug: string; context: string }>> {
685
+ try {
686
+ const rows = many<{ to_slug: string; context: string }>(
687
+ await this.db.client.execute(
688
+ "SELECT to_slug, context FROM links WHERE from_slug = ? ORDER BY to_slug ASC",
689
+ [slug],
690
+ ),
691
+ );
692
+ return rows.map((row) => ({ slug: row.to_slug, context: row.context }));
693
+ } catch (error) {
694
+ const dbError = wrapDbError(error, "outgoingLinks", { slug });
695
+ logDbError(dbError);
696
+ throw dbError;
697
+ }
698
+ }
699
+
700
+ async allSlugs(): Promise<string[]> {
701
+ try {
702
+ const rows = many<{ slug: string }>(
703
+ await this.db.client.execute("SELECT slug FROM pages ORDER BY slug ASC"),
704
+ );
705
+ return rows.map((row) => row.slug);
706
+ } catch (error) {
707
+ const dbError = wrapDbError(error, "allSlugs");
708
+ logDbError(dbError);
709
+ throw dbError;
710
+ }
711
+ }
712
+
713
+ async deletePage(slug: string): Promise<void> {
714
+ try {
715
+ await this.db.client.execute("DELETE FROM pages WHERE slug = ?", [slug]);
716
+ // Best-effort cleanup of related data (ignore errors for missing rows)
717
+ await this.db.client.execute("DELETE FROM links WHERE from_slug = ? OR to_slug = ?", [slug, slug]);
718
+ await this.db.client.execute("DELETE FROM page_tags WHERE page_slug = ?", [slug]);
719
+ await this.db.client.execute("DELETE FROM timeline_entries WHERE page_slug = ?", [slug]);
720
+ await this.db.client.execute("DELETE FROM raw_data WHERE page_slug = ?", [slug]);
721
+ } catch (error) {
722
+ const dbError = wrapDbError(error, "deletePage", { slug });
723
+ logDbError(dbError);
724
+ throw dbError;
725
+ }
726
+ }
727
+
728
+ /**
729
+ * Resolve an entity reference to an existing page slug if possible.
730
+ * Logic:
731
+ * 1. Check if generated slug exists.
732
+ * 2. Semantic search for name match (high confidence).
733
+ * 3. Otherwise return the candidate slug.
734
+ */
735
+ async findSimilarSlug(candidateSlug: string, entityName: string): Promise<string> {
736
+ // 1. Check exact slug match
737
+ if (await this.getPage(candidateSlug)) {
738
+ return candidateSlug;
739
+ }
740
+
741
+ // 2. Semantic search for title match - skip if no embeddings available
742
+ // This is important for import speed: avoid slow search during batch import
743
+ try {
744
+ const hits = await this.search(entityName, 1);
745
+ if (hits.length > 0) {
746
+ const best = hits[0]!;
747
+ // Higher threshold to avoid false matches during import
748
+ if (best.score > 0.9) {
749
+ return best.slug;
750
+ }
751
+ }
752
+ } catch {
753
+ // Search may fail during batch import, ignore and return candidate
754
+ }
755
+
756
+ // 3. Return candidate
757
+ return candidateSlug;
758
+ }
759
+
760
+ /**
761
+ * Ensure an entity page exists. If not, create it with the given context.
762
+ * If exists, append new fact (deduped by exact sentence match).
763
+ * @returns true if page was created, false if already existed
764
+ */
765
+ async ensureEntityPage(
766
+ slug: string,
767
+ type: string,
768
+ title: string,
769
+ relation: string,
770
+ context: string,
771
+ sourceSlug: string,
772
+ ): Promise<boolean> {
773
+ const existing = await this.getPage(slug);
774
+ const newFact = `- **${relation}** [${title}](${slug}): ${context.trim()} (Source: ${sourceSlug})`;
775
+
776
+ if (!existing) {
777
+ await this.putPage({
778
+ slug,
779
+ type,
780
+ title,
781
+ compiledTruth: `## Facts\n\n${newFact}`,
782
+ timeline: "",
783
+ frontmatter: { autoCreated: true },
784
+ });
785
+ return true;
786
+ }
787
+
788
+ // Check for duplicate: if the exact context sentence already exists in compiledTruth
789
+ const trimmedContext = context.trim();
790
+ if (existing.compiledTruth.includes(trimmedContext)) {
791
+ return false;
792
+ }
793
+
794
+ // Append new fact under ## Facts header if it exists, otherwise create it
795
+ let updatedTruth = existing.compiledTruth;
796
+ if (!existing.compiledTruth.includes("## Facts")) {
797
+ updatedTruth = `## Facts\n\n${existing.compiledTruth}\n\n## Facts\n\n${newFact}`;
798
+ } else {
799
+ // Simple append before the first "---" or at the end
800
+ updatedTruth = existing.compiledTruth.replace(/\n---\n/, `\n${newFact}\n\n---\n`);
801
+ if (updatedTruth === existing.compiledTruth) {
802
+ updatedTruth += "\n" + newFact;
803
+ }
804
+ }
805
+
806
+ await this.putPage({
807
+ slug,
808
+ type,
809
+ title,
810
+ compiledTruth: updatedTruth,
811
+ timeline: existing.timeline,
812
+ frontmatter: existing.frontmatter,
813
+ });
814
+ return false;
815
+ }
816
+
817
+ // ---------------------------------------------------------------------------
818
+ // Smart Compilation & Timeline Integration
819
+ // ---------------------------------------------------------------------------
820
+
821
+ /**
822
+ * Compile new information into a page's compiled truth.
823
+ * This is the core "brain" function that:
824
+ * 1. Analyzes new information
825
+ * 2. Updates/replaces/appends to compiled truth intelligently
826
+ * 3. Extracts timeline entries
827
+ * 4. Maintains source attribution
828
+ *
829
+ * @param slug Page slug to compile into
830
+ * @param newInfo New information to process
831
+ * @param source Source of the information
832
+ * @param date Date of the information
833
+ * @param llm LLM configuration for semantic analysis
834
+ * @returns Compile result with changes made
835
+ */
836
+ async compilePage(
837
+ slug: string,
838
+ newInfo: string,
839
+ source: string,
840
+ date: string,
841
+ llm: ResolvedLLM,
842
+ ): Promise<CompileResult> {
843
+ const page = await this.getPage(slug);
844
+ if (!page) {
845
+ // Create new page if doesn't exist
846
+ await this.putPage({
847
+ slug,
848
+ type: "other",
849
+ title: slug.split("/").pop() ?? slug,
850
+ compiledTruth: newInfo,
851
+ frontmatter: { source, date, autoCreated: true },
852
+ });
853
+ return {
854
+ compiledTruth: newInfo,
855
+ changed: true,
856
+ changeType: "append",
857
+ changeSummary: "Created new page",
858
+ timelineEntries: [],
859
+ confidence: 0.8,
860
+ };
861
+ }
862
+
863
+ const timeline = await this.timeline(slug, 20);
864
+ const input: CompileInput = {
865
+ currentTruth: page.compiledTruth,
866
+ timeline,
867
+ newInfo,
868
+ source,
869
+ date,
870
+ pageContext: {
871
+ slug: page.slug,
872
+ type: page.type,
873
+ title: page.title,
874
+ },
875
+ };
876
+
877
+ const result = await compileTruth(input, llm);
878
+
879
+ // Apply changes if any
880
+ if (result.changed) {
881
+ await this.putPage({
882
+ slug: page.slug,
883
+ type: page.type,
884
+ title: page.title,
885
+ compiledTruth: result.compiledTruth,
886
+ timeline: page.timeline,
887
+ frontmatter: page.frontmatter,
888
+ });
889
+
890
+ // Add timeline entries
891
+ if (result.timelineEntries.length > 0) {
892
+ await this.timelineAddBatch(result.timelineEntries);
893
+ }
894
+
895
+ // Sync to search index
896
+ await this.syncPageToSearch(slug);
897
+ }
898
+
899
+ return result;
900
+ }
901
+
902
+ /**
903
+ * Extract and add timeline entries from content.
904
+ * Uses LLM for semantic extraction, falls back to regex.
905
+ *
906
+ * @param slug Page slug
907
+ * @param content Content to extract timeline from
908
+ * @param source Source identifier
909
+ * @param defaultDate Default date for entries without explicit dates
910
+ * @param llm LLM configuration
911
+ * @returns Extraction result with entries added
912
+ */
913
+ async extractAndAddTimeline(
914
+ slug: string,
915
+ content: string,
916
+ source: string,
917
+ defaultDate: string,
918
+ llm: ResolvedLLM,
919
+ ): Promise<TimelineExtractionResult> {
920
+ const result = await extractTimelineEvents(
921
+ { content, source, defaultDate, pageSlug: slug },
922
+ llm,
923
+ );
924
+
925
+ if (result.entries.length > 0) {
926
+ await this.timelineAddBatch(result.entries);
927
+ }
928
+
929
+ return result;
930
+ }
931
+
932
+ /**
933
+ * Full ingestion pipeline:
934
+ * 1. Create/update page with content
935
+ * 2. Compile truth intelligently
936
+ * 3. Extract timeline events
937
+ * 4. Extract entity links
938
+ * 5. Sync to search
939
+ *
940
+ * @param slug Page slug
941
+ * @param content Full content
942
+ * @param source Source identifier
943
+ * @param type Page type
944
+ * @param llm LLM configuration
945
+ * @returns Full ingestion result
946
+ */
947
+ async ingestContent(
948
+ slug: string,
949
+ content: string,
950
+ source: string,
951
+ type: string,
952
+ llm: ResolvedLLM,
953
+ ): Promise<{
954
+ page: PageRecord;
955
+ compileResult: CompileResult;
956
+ timelineResult: TimelineExtractionResult;
957
+ }> {
958
+ const now = nowIso();
959
+ const date = now.slice(0, 10);
960
+
961
+ // Step 1: Compile truth (this creates/updates page)
962
+ const compileResult = await this.compilePage(slug, content, source, date, llm);
963
+ const page = await this.getPage(slug) as PageRecord;
964
+
965
+ // Step 2: Extract timeline events
966
+ const timelineResult = await this.extractAndAddTimeline(slug, content, source, date, llm);
967
+
968
+ // Step 3: Update page type if provided
969
+ if (type && page.type !== type) {
970
+ await this.putPage({
971
+ slug: page.slug,
972
+ type,
973
+ title: page.title,
974
+ compiledTruth: page.compiledTruth,
975
+ timeline: page.timeline,
976
+ frontmatter: { ...page.frontmatter, source, sourceType: type },
977
+ });
978
+ }
979
+
980
+ return { page, compileResult, timelineResult };
981
+ }
982
+ }
983
+
984
+ function safeJson(raw: string): unknown {
985
+ try {
986
+ return JSON.parse(raw);
987
+ } catch {
988
+ return raw;
989
+ }
990
+ }