@open330/kiwimu 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,368 @@
1
+ import { chatComplete } from "../llm-client";
2
+ import type { Store } from "../store";
3
+ import { slugify } from "./chunker";
4
+
5
+ // ── Phase 1: Extract original document structure ──
6
+
7
+ const STRUCTURE_SYSTEM = `You are a document analyzer. Extract the chapter/section structure from this textbook content, preserving the original order and hierarchy.
8
+
9
+ Return valid JSON only. No markdown fences.`;
10
+
11
+ const STRUCTURE_PROMPT = `Extract the document structure from this text. Preserve the original chapter/section ordering.
12
+
13
+ Source: "{sourceTitle}"
14
+
15
+ TEXT:
16
+ ---
17
+ {text}
18
+ ---
19
+
20
+ Return a JSON array of sections in order. Each element:
21
+ - "title": string — Original section/chapter title from the document
22
+ - "content": string — The full content of this section, converted to clean markdown. Preserve all information. Use LaTeX ($..$ inline, $$...$$ display) for equations. Clean up OCR artifacts.
23
+ - "level": number — 1 for chapter, 2 for section, 3 for subsection
24
+
25
+ Keep the content faithful to the original. Do not add or remove information. Just clean up formatting.
26
+ Return at most 8 sections per response to keep output manageable.`;
27
+
28
+ // ── Phase 2: Extract concepts for separate pages ──
29
+
30
+ const CONCEPT_SYSTEM = `You are a study wiki editor. Given source material pages, identify important concepts, terms, and definitions that deserve their own dedicated wiki pages.
31
+
32
+ Rules:
33
+ - Pick terms that appear across multiple sections OR are fundamental domain concepts
34
+ - Each concept page should have substantial educational content (2+ paragraphs)
35
+ - Explain the concept clearly with definitions, formulas, examples, and context
36
+ - Use [[wiki links]] to reference other concepts and source pages. Example: "[[Synchrotron Radiation]] is observed at [[radio frequencies]]"
37
+ - Suggest Wikipedia links for further reading
38
+
39
+ Return valid JSON only. No markdown fences.`;
40
+
41
+ const CONCEPT_PROMPT = `Based on these source pages, create concept/glossary wiki pages for important terms.
42
+
43
+ Source pages already created:
44
+ {sourcePages}
45
+
46
+ Create 3-6 concept pages for the most important terms, definitions, laws, and equations found in these pages.
47
+ Do NOT duplicate the source pages — instead, create focused concept pages that the source pages can link to.
48
+ Keep each page concise (2-3 paragraphs).
49
+
50
+ Return a JSON array where each element has:
51
+ - "title": string — Short concept name, 1-3 words (e.g., "Synchrotron Radiation", "Flux Density", "Angular Resolution"). Keep titles short so they match naturally in text.
52
+ - "content": string — Educational markdown content with [[wiki links]] to other concepts and source pages
53
+ - "suggested_links": Array<{text: string, url: string}> — Wikipedia/external reference links`;
54
+
55
+ interface StructurePage {
56
+ title: string;
57
+ content: string;
58
+ level: number;
59
+ }
60
+
61
+ interface ConceptPage {
62
+ title: string;
63
+ content: string;
64
+ suggested_links?: Array<{ text: string; url: string }>;
65
+ }
66
+
67
+ function splitByChapters(text: string): Array<{ chapterHint: string; text: string }> {
68
+ const chapterPattern = /\n(?=(?:CHAPTER\s*\d+|Chapter\s+\d+)[A-Z\s])/g;
69
+ const positions: number[] = [];
70
+ let m: RegExpExecArray | null;
71
+ while ((m = chapterPattern.exec(text))) positions.push(m.index);
72
+
73
+ if (positions.length < 2) return splitBySize(text, 20000);
74
+
75
+ const chunks: Array<{ chapterHint: string; text: string }> = [];
76
+ for (let i = 0; i < positions.length; i++) {
77
+ const start = positions[i];
78
+ const end = i + 1 < positions.length ? positions[i + 1] : text.length;
79
+ const chunkText = text.slice(start, end);
80
+ const titleMatch = chunkText.match(/(?:CHAPTER\s*\d+|Chapter\s+\d+)\s*([^\n]+)/);
81
+ const hint = titleMatch ? titleMatch[0].trim() : `Section ${i + 1}`;
82
+
83
+ if (chunkText.length > 60000) {
84
+ const sub = splitBySize(chunkText, 20000);
85
+ sub.forEach((s, j) => chunks.push({ chapterHint: `${hint} (part ${j + 1}/${sub.length})`, text: s.text }));
86
+ } else {
87
+ chunks.push({ chapterHint: hint, text: chunkText });
88
+ }
89
+ }
90
+ return chunks;
91
+ }
92
+
93
+ function splitBySize(text: string, maxSize: number): Array<{ chapterHint: string; text: string }> {
94
+ const chunks: Array<{ chapterHint: string; text: string }> = [];
95
+ let start = 0;
96
+ while (start < text.length) {
97
+ let end = Math.min(start + maxSize, text.length);
98
+ if (end < text.length) {
99
+ const lastBreak = text.lastIndexOf("\n\n", end);
100
+ if (lastBreak > start + maxSize * 0.5) end = lastBreak;
101
+ }
102
+ chunks.push({ chapterHint: `Part ${chunks.length + 1}`, text: text.slice(start, end) });
103
+ start = end;
104
+ }
105
+ return chunks;
106
+ }
107
+
108
+ function parseJSON<T>(raw: string): T {
109
+ let cleaned = raw.replace(/^```json?\n?/m, "").replace(/\n?```\s*$/m, "").trim();
110
+ try {
111
+ return JSON.parse(cleaned);
112
+ } catch (e1) {
113
+ // Try various repairs for truncated JSON
114
+ const repairs = [
115
+ // Truncated in the middle of a string value
116
+ () => cleaned.replace(/,"[^"]*$/, "") + "}]",
117
+ // Truncated after a value
118
+ () => cleaned.replace(/,?\s*$/, "]"),
119
+ // Truncated mid-object
120
+ () => cleaned.replace(/,?\s*"[^"]*$/, "") + "}]",
121
+ // Add missing closing brackets
122
+ () => {
123
+ const opens = (cleaned.match(/\[/g) || []).length;
124
+ const closes = (cleaned.match(/\]/g) || []).length;
125
+ const openBraces = (cleaned.match(/\{/g) || []).length;
126
+ const closeBraces = (cleaned.match(/\}/g) || []).length;
127
+ let fixed = cleaned;
128
+ // Close any unclosed strings
129
+ const lastQuote = fixed.lastIndexOf('"');
130
+ const afterQuote = fixed.slice(lastQuote + 1);
131
+ if (afterQuote.indexOf('"') === -1 && afterQuote.length > 0) {
132
+ // We're inside an unclosed string, truncate to last complete field
133
+ fixed = fixed.slice(0, fixed.lastIndexOf('",') + 2);
134
+ }
135
+ for (let i = 0; i < openBraces - closeBraces; i++) fixed += "}";
136
+ for (let i = 0; i < opens - closes; i++) fixed += "]";
137
+ return fixed;
138
+ },
139
+ ];
140
+ for (const repair of repairs) {
141
+ try {
142
+ return JSON.parse(repair());
143
+ } catch {}
144
+ }
145
+ // Last resort: log the problematic content for debugging
146
+ console.log(` \x1b[33m⚠ JSON repair 실패, 첫 200자: ${cleaned.slice(0, 200)}\x1b[0m`);
147
+ throw e1;
148
+ }
149
+ }
150
+
151
+ /**
152
+ * Resolve [[wiki links]] in content to actual markdown links.
153
+ */
154
+ function resolveWikiLinks(
155
+ pageId: number,
156
+ content: string,
157
+ slugMap: Map<string, { id: number; slug: string }>,
158
+ store: Store
159
+ ): string {
160
+ return content.replace(/\[\[([^\]]+)\]\]/g, (_match, term: string) => {
161
+ const slug = slugify(term);
162
+ const target = slugMap.get(slug);
163
+ if (target && target.id !== pageId) {
164
+ store.addLink(pageId, target.id, term);
165
+ return `[${term}](/wiki/${target.slug})`;
166
+ }
167
+ return term;
168
+ });
169
+ }
170
+
171
+ export async function llmChunkDocument(
172
+ rawText: string,
173
+ sourceTitle: string,
174
+ sourceId: number,
175
+ store: Store,
176
+ maxChunks: number = 0 // 0 = unlimited
177
+ ): Promise<{ sourceCount: number; conceptCount: number }> {
178
+ let chunks = splitByChapters(rawText);
179
+ if (maxChunks > 0 && chunks.length > maxChunks) {
180
+ console.log(`\x1b[33m⚠ ${chunks.length}개 청크 중 ${maxChunks}개만 처리합니다\x1b[0m`);
181
+ chunks = chunks.slice(0, maxChunks);
182
+ }
183
+ console.log(`\x1b[34m🧠 Phase 1: 원본 구조 추출 (${chunks.length}개 청크)...\x1b[0m`);
184
+
185
+ // ── Phase 1: Extract source pages ──
186
+ let orderCounter = 0;
187
+ const sourcePageSummaries: string[] = [];
188
+
189
+ for (let i = 0; i < chunks.length; i++) {
190
+ const chunk = chunks[i];
191
+ console.log(` [${i + 1}/${chunks.length}] ${chunk.chapterHint}`);
192
+
193
+ const prompt = STRUCTURE_PROMPT
194
+ .replace("{sourceTitle}", sourceTitle)
195
+ .replace("{text}", chunk.text.slice(0, 80000));
196
+
197
+ try {
198
+ const raw = await chatComplete(STRUCTURE_SYSTEM, prompt, 16384);
199
+ if (!raw || raw.trim().length < 10) {
200
+ console.log(` \x1b[33m⚠ 빈 응답, 재시도...\x1b[0m`);
201
+ const retry = await chatComplete(STRUCTURE_SYSTEM, prompt, 16384);
202
+ if (!retry || retry.trim().length < 10) {
203
+ console.log(` \x1b[31m✗ 재시도도 빈 응답\x1b[0m`);
204
+ continue;
205
+ }
206
+ const sections = parseJSON<StructurePage[]>(retry).filter(s => s.title && s.content && s.content.length > 30);
207
+ // fall through to process sections below
208
+ for (const section of sections) {
209
+ const slug = slugify(section.title);
210
+ if (!slug) continue;
211
+ const existing = store.getPage(slug);
212
+ if (existing) {
213
+ store.updatePageContent(existing.id, existing.content + "\n\n" + section.content);
214
+ } else {
215
+ store.addPage(slug, section.title, section.content, sourceId, slug, "source", orderCounter++);
216
+ sourcePageSummaries.push(`- ${section.title}: ${section.content.slice(0, 150).replace(/\n/g, " ")}`);
217
+ }
218
+ }
219
+ console.log(` → ${sections.length}개 섹션`);
220
+ continue;
221
+ }
222
+ const sections = parseJSON<StructurePage[]>(raw).filter(s => s.title && s.content && s.content.length > 30);
223
+
224
+ for (const section of sections) {
225
+ const slug = slugify(section.title);
226
+ if (!slug) continue;
227
+
228
+ const existing = store.getPage(slug);
229
+ if (existing) {
230
+ store.updatePageContent(existing.id, existing.content + "\n\n" + section.content);
231
+ } else {
232
+ store.addPage(slug, section.title, section.content, sourceId, slug, "source", orderCounter++);
233
+ sourcePageSummaries.push(`- ${section.title}: ${section.content.slice(0, 150).replace(/\n/g, " ")}`);
234
+ }
235
+ }
236
+ console.log(` → ${sections.length}개 섹션`);
237
+ } catch (e: any) {
238
+ console.log(` \x1b[31m✗ 실패: ${e.message}\x1b[0m`);
239
+ }
240
+ }
241
+
242
+ const sourceCount = orderCounter;
243
+ console.log(`\x1b[32m 📖 ${sourceCount}개 원본 페이지 생성 완료\x1b[0m`);
244
+
245
+ // ── Phase 2: Extract concept pages ──
246
+ console.log(`\x1b[34m🧠 Phase 2: 개념 페이지 추출...\x1b[0m`);
247
+
248
+ // Process source pages in small batches for concept extraction
249
+ const batchSize = 5;
250
+ let conceptCount = 0;
251
+
252
+ for (let i = 0; i < sourcePageSummaries.length; i += batchSize) {
253
+ const batch = sourcePageSummaries.slice(i, i + batchSize);
254
+ const batchLabel = ` [${Math.floor(i / batchSize) + 1}/${Math.ceil(sourcePageSummaries.length / batchSize)}]`;
255
+ console.log(`${batchLabel} 개념 추출 중...`);
256
+
257
+ const existingConcepts = conceptCount > 0
258
+ ? `\n\nAlready created concept pages (do not duplicate): ${store.listConceptPages().map(p => p.title).join(", ")}`
259
+ : "";
260
+ const prompt = CONCEPT_PROMPT.replace("{sourcePages}", batch.join("\n")) + existingConcepts;
261
+
262
+ try {
263
+ const raw = await chatComplete(CONCEPT_SYSTEM, prompt, 16384);
264
+ const concepts = parseJSON<ConceptPage[]>(raw).filter(c => c.title && c.content && c.content.length > 50);
265
+
266
+ for (const concept of concepts) {
267
+ const slug = slugify(concept.title);
268
+ if (!slug) continue;
269
+
270
+ // Don't create concept page if source page with same slug exists
271
+ const existing = store.getPage(slug);
272
+ if (existing) continue;
273
+
274
+ let content = concept.content;
275
+ if (concept.suggested_links?.length) {
276
+ content += "\n\n## External References\n\n";
277
+ for (const link of concept.suggested_links) {
278
+ content += `- [${link.text}](${link.url})\n`;
279
+ }
280
+ }
281
+
282
+ store.addPage(slug, concept.title, content, sourceId, slug, "concept", 0);
283
+ conceptCount++;
284
+ }
285
+ console.log(` → ${concepts.length}개 개념`);
286
+ } catch (e: any) {
287
+ console.log(` \x1b[31m✗ 실패: ${e.message}\x1b[0m`);
288
+ }
289
+ }
290
+
291
+ console.log(`\x1b[32m 📝 ${conceptCount}개 개념 페이지 생성 완료\x1b[0m`);
292
+
293
+ // ── Phase 3: Resolve wiki links + inject concept links into source pages ──
294
+ console.log(`\x1b[34m🔗 위키 링크 해석 중...\x1b[0m`);
295
+ const allPages = store.listPages();
296
+ const slugMap = new Map(allPages.map(p => [p.slug, { id: p.id, slug: p.slug }]));
297
+ for (const p of allPages) {
298
+ const titleSlug = slugify(p.title);
299
+ if (!slugMap.has(titleSlug)) slugMap.set(titleSlug, { id: p.id, slug: p.slug });
300
+ }
301
+
302
+ // Resolve [[wiki links]] in concept pages
303
+ let linkedPages = 0;
304
+ for (const page of allPages) {
305
+ if (!page.content.includes("[[")) continue;
306
+ const resolved = resolveWikiLinks(page.id, page.content, slugMap, store);
307
+ if (resolved !== page.content) {
308
+ store.updatePageContent(page.id, resolved);
309
+ linkedPages++;
310
+ }
311
+ }
312
+
313
+ // Inject concept links into source pages
314
+ const conceptPages = allPages.filter(p => p.page_type === "concept");
315
+ const srcPages = allPages.filter(p => p.page_type === "source");
316
+
317
+ // Build search terms: full title + key words from title (2+ words long)
318
+ const searchTerms: Array<{ term: string; concept: typeof conceptPages[0] }> = [];
319
+ for (const concept of conceptPages) {
320
+ searchTerms.push({ term: concept.title, concept });
321
+ // Also try individual significant words from multi-word titles
322
+ const words = concept.title.split(/\s+/).filter(w => w.length >= 4 && !/^(and|the|for|with|from|into)$/i.test(w));
323
+ if (words.length >= 2) {
324
+ // Try pairs of consecutive words
325
+ for (let i = 0; i < words.length - 1; i++) {
326
+ searchTerms.push({ term: `${words[i]} ${words[i + 1]}`, concept });
327
+ }
328
+ }
329
+ }
330
+ // Sort by term length descending for longest match first
331
+ searchTerms.sort((a, b) => b.term.length - a.term.length);
332
+
333
+ for (const srcPage of srcPages) {
334
+ let content = srcPage.content;
335
+ let modified = false;
336
+ const linkedConcepts = new Set<number>();
337
+
338
+ for (const { term, concept } of searchTerms) {
339
+ if (linkedConcepts.has(concept.id)) continue; // One link per concept per page
340
+ if (term.length < 3) continue;
341
+ const escaped = term.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
342
+ const regex = new RegExp(`(?<!\\[)(?<![\\w/])(${escaped})(?![\\w])(?!\\])(?![^[]*\\])`, "i");
343
+ const match = regex.exec(content);
344
+ if (match) {
345
+ const replacement = `[${match[1]}](/wiki/${concept.slug})`;
346
+ content = content.slice(0, match.index) + replacement + content.slice(match.index + match[0].length);
347
+ store.addLink(srcPage.id, concept.id, match[1]);
348
+ linkedConcepts.add(concept.id);
349
+ modified = true;
350
+ }
351
+ }
352
+ if (modified) {
353
+ store.updatePageContent(srcPage.id, content);
354
+ linkedPages++;
355
+ }
356
+ }
357
+
358
+ console.log(`\x1b[32m ${linkedPages}개 페이지에서 위키 링크 해석 완료\x1b[0m`);
359
+
360
+ return { sourceCount, conceptCount };
361
+ }
362
+
363
+ export function htmlToRawText(html: string): string {
364
+ const { load } = require("cheerio");
365
+ const $ = load(html);
366
+ $("script, style, nav, header, footer, noscript").remove();
367
+ return $("body").text() || $.text();
368
+ }
@@ -0,0 +1,84 @@
1
+ import { chatComplete } from "../llm-client";
2
+ import type { Store } from "../store";
3
+ import { slugify } from "./chunker";
4
+
5
+ const LINK_SYSTEM = `You are a wiki editor. Given wiki pages, find cross-link opportunities that were missed.
6
+ Return valid JSON only. No markdown fences.`;
7
+
8
+ const LINK_PROMPT = `These wiki pages exist but may be missing cross-links. Find where one page's content mentions a concept that has its own page.
9
+
10
+ Pages (slug | title | first 300 chars of content):
11
+ {pages}
12
+
13
+ Return JSON:
14
+ {
15
+ "links": [
16
+ {
17
+ "from_slug": "source-page-slug",
18
+ "to_slug": "target-page-slug",
19
+ "anchor_text": "exact phrase in source page to link"
20
+ }
21
+ ]
22
+ }
23
+
24
+ Rules:
25
+ - anchor_text MUST be an exact phrase found in the source page content
26
+ - Only link genuinely related concepts
27
+ - 3-8 links per page where meaningful
28
+ - Do NOT link a page to itself`;
29
+
30
+ export async function llmLinkPages(store: Store): Promise<number> {
31
+ const pages = store.listPages();
32
+ if (pages.length < 2) return 0;
33
+
34
+ const batchSize = 30;
35
+ let totalLinks = 0;
36
+
37
+ for (let i = 0; i < pages.length; i += batchSize) {
38
+ const batch = pages.slice(i, i + batchSize);
39
+ const pagesText = batch
40
+ .map(p => `${p.slug} | ${p.title} | ${p.content.slice(0, 300).replace(/\n/g, " ")}`)
41
+ .join("\n");
42
+
43
+ try {
44
+ const raw = await chatComplete(LINK_SYSTEM, LINK_PROMPT.replace("{pages}", pagesText), 8192);
45
+ let cleaned = raw.replace(/^```json?\n?/m, "").replace(/\n?```$/m, "").trim();
46
+
47
+ let result: { links: Array<{ from_slug: string; to_slug: string; anchor_text: string }> };
48
+ try {
49
+ result = JSON.parse(cleaned);
50
+ } catch {
51
+ // Try to repair truncated JSON
52
+ cleaned = cleaned.replace(/,?\s*$/, "]}");
53
+ try {
54
+ result = JSON.parse(cleaned);
55
+ } catch {
56
+ console.log(` \x1b[33m⚠ 링크 JSON 파싱 실패\x1b[0m`);
57
+ continue;
58
+ }
59
+ }
60
+
61
+ const slugToPage = new Map(pages.map(p => [p.slug, p]));
62
+
63
+ for (const link of result.links) {
64
+ const fromPage = slugToPage.get(link.from_slug);
65
+ const toPage = slugToPage.get(link.to_slug);
66
+ if (!fromPage || !toPage || fromPage.id === toPage.id) continue;
67
+
68
+ const anchor = link.anchor_text;
69
+ if (anchor && fromPage.content.includes(anchor) && !fromPage.content.includes(`[${anchor}]`)) {
70
+ const linkedText = `[${anchor}](/wiki/${link.to_slug})`;
71
+ const newContent = fromPage.content.replace(anchor, linkedText);
72
+ store.updatePageContent(fromPage.id, newContent);
73
+ fromPage.content = newContent;
74
+ store.addLink(fromPage.id, toPage.id, anchor);
75
+ totalLinks++;
76
+ }
77
+ }
78
+ } catch (e: any) {
79
+ console.log(` \x1b[31m링크 생성 실패: ${e.message}\x1b[0m`);
80
+ }
81
+ }
82
+
83
+ return totalLinks;
84
+ }
package/src/store.ts ADDED
@@ -0,0 +1,209 @@
1
+ import { Database } from "bun:sqlite";
2
+
3
+ export interface Source {
4
+ id: number;
5
+ uri: string;
6
+ type: string;
7
+ title: string;
8
+ raw_content: string;
9
+ fetched_at: string;
10
+ }
11
+
12
+ export interface Page {
13
+ id: number;
14
+ slug: string;
15
+ title: string;
16
+ content: string;
17
+ source_id: number | null;
18
+ section_anchor: string | null;
19
+ page_type: string; // 'source' | 'concept'
20
+ display_order: number;
21
+ }
22
+
23
+ export interface Link {
24
+ from_page_id: number;
25
+ to_page_id: number;
26
+ anchor_text: string;
27
+ }
28
+
29
+ const SCHEMA = `
30
+ CREATE TABLE IF NOT EXISTS sources (
31
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
32
+ uri TEXT UNIQUE NOT NULL,
33
+ type TEXT NOT NULL,
34
+ title TEXT,
35
+ raw_content TEXT,
36
+ fetched_at TEXT DEFAULT (datetime('now'))
37
+ );
38
+ CREATE TABLE IF NOT EXISTS pages (
39
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
40
+ slug TEXT UNIQUE NOT NULL,
41
+ title TEXT NOT NULL,
42
+ content TEXT NOT NULL,
43
+ source_id INTEGER REFERENCES sources(id),
44
+ section_anchor TEXT,
45
+ page_type TEXT NOT NULL DEFAULT 'concept',
46
+ display_order INTEGER NOT NULL DEFAULT 0,
47
+ created_at TEXT DEFAULT (datetime('now')),
48
+ updated_at TEXT DEFAULT (datetime('now'))
49
+ );
50
+ CREATE TABLE IF NOT EXISTS usage_logs (
51
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
52
+ source_id INTEGER REFERENCES sources(id),
53
+ llm_calls INTEGER NOT NULL DEFAULT 0,
54
+ prompt_tokens INTEGER NOT NULL DEFAULT 0,
55
+ completion_tokens INTEGER NOT NULL DEFAULT 0,
56
+ total_tokens INTEGER NOT NULL DEFAULT 0,
57
+ estimated_cost_usd REAL NOT NULL DEFAULT 0,
58
+ created_at TEXT DEFAULT (datetime('now'))
59
+ );
60
+ CREATE TABLE IF NOT EXISTS links (
61
+ from_page_id INTEGER REFERENCES pages(id),
62
+ to_page_id INTEGER REFERENCES pages(id),
63
+ anchor_text TEXT,
64
+ PRIMARY KEY (from_page_id, to_page_id, anchor_text)
65
+ );
66
+ `;
67
+
68
+ export class Store {
69
+ private db: Database;
70
+
71
+ constructor(dbPath: string) {
72
+ this.db = new Database(dbPath);
73
+ this.db.exec("PRAGMA journal_mode=WAL");
74
+ this.db.exec("PRAGMA foreign_keys=ON");
75
+ }
76
+
77
+ initSchema(): void {
78
+ this.db.exec(SCHEMA);
79
+ }
80
+
81
+ close(): void {
82
+ this.db.close();
83
+ }
84
+
85
+ // --- Sources ---
86
+
87
+ addSource(uri: string, type: string, title: string, rawContent: string): Source {
88
+ const existing = this.db.prepare("SELECT * FROM sources WHERE uri = ?").get(uri) as Source | undefined;
89
+ if (existing) {
90
+ // Update existing source, keep same ID to preserve FK relationships
91
+ this.db
92
+ .prepare("UPDATE sources SET type = ?, title = ?, raw_content = ?, fetched_at = datetime('now') WHERE id = ?")
93
+ .run(type, title, rawContent, existing.id);
94
+ return this.db.prepare("SELECT * FROM sources WHERE id = ?").get(existing.id) as Source;
95
+ }
96
+ this.db
97
+ .prepare("INSERT INTO sources (uri, type, title, raw_content) VALUES (?, ?, ?, ?)")
98
+ .run(uri, type, title, rawContent);
99
+ return this.db.prepare("SELECT * FROM sources WHERE uri = ?").get(uri) as Source;
100
+ }
101
+
102
+ getSource(uri: string): Source | null {
103
+ return (this.db.prepare("SELECT * FROM sources WHERE uri = ?").get(uri) as Source) ?? null;
104
+ }
105
+
106
+ listSources(): Source[] {
107
+ return this.db.prepare("SELECT * FROM sources ORDER BY fetched_at DESC").all() as Source[];
108
+ }
109
+
110
+ // --- Pages ---
111
+
112
+ addPage(
113
+ slug: string,
114
+ title: string,
115
+ content: string,
116
+ sourceId?: number,
117
+ sectionAnchor?: string,
118
+ pageType: string = "concept",
119
+ displayOrder: number = 0
120
+ ): Page {
121
+ this.db
122
+ .prepare(
123
+ "INSERT OR REPLACE INTO pages (slug, title, content, source_id, section_anchor, page_type, display_order) VALUES (?, ?, ?, ?, ?, ?, ?)"
124
+ )
125
+ .run(slug, title, content, sourceId ?? null, sectionAnchor ?? null, pageType, displayOrder);
126
+ return this.db.prepare("SELECT * FROM pages WHERE slug = ?").get(slug) as Page;
127
+ }
128
+
129
+ getPage(slug: string): Page | null {
130
+ return (this.db.prepare("SELECT * FROM pages WHERE slug = ?").get(slug) as Page) ?? null;
131
+ }
132
+
133
+ listPages(): Page[] {
134
+ return this.db.prepare("SELECT * FROM pages ORDER BY title").all() as Page[];
135
+ }
136
+
137
+ listSourcePages(): Page[] {
138
+ return this.db
139
+ .prepare("SELECT * FROM pages WHERE page_type = 'source' ORDER BY source_id, display_order")
140
+ .all() as Page[];
141
+ }
142
+
143
+ listConceptPages(): Page[] {
144
+ return this.db
145
+ .prepare("SELECT * FROM pages WHERE page_type = 'concept' ORDER BY title")
146
+ .all() as Page[];
147
+ }
148
+
149
+ deletePagesBySource(sourceId: number): void {
150
+ // Delete links involving these pages first
151
+ this.db.prepare(
152
+ "DELETE FROM links WHERE from_page_id IN (SELECT id FROM pages WHERE source_id = ?) OR to_page_id IN (SELECT id FROM pages WHERE source_id = ?)"
153
+ ).run(sourceId, sourceId);
154
+ this.db.prepare("DELETE FROM pages WHERE source_id = ?").run(sourceId);
155
+ }
156
+
157
+ deleteAllPages(): void {
158
+ this.db.exec("DELETE FROM links");
159
+ this.db.exec("DELETE FROM pages");
160
+ }
161
+
162
+ deleteAllSources(): void {
163
+ this.deleteAllPages();
164
+ this.db.exec("DELETE FROM sources");
165
+ }
166
+
167
+ updatePageContent(pageId: number, content: string): void {
168
+ this.db.prepare("UPDATE pages SET content = ?, updated_at = datetime('now') WHERE id = ?").run(content, pageId);
169
+ }
170
+
171
+ // --- Links ---
172
+
173
+ addLink(fromId: number, toId: number, anchorText: string): void {
174
+ this.db
175
+ .prepare("INSERT OR IGNORE INTO links (from_page_id, to_page_id, anchor_text) VALUES (?, ?, ?)")
176
+ .run(fromId, toId, anchorText);
177
+ }
178
+
179
+ clearLinks(): void {
180
+ this.db.exec("DELETE FROM links");
181
+ }
182
+
183
+ getBacklinks(pageId: number): Page[] {
184
+ return this.db
185
+ .prepare(
186
+ `SELECT p.* FROM pages p JOIN links l ON l.from_page_id = p.id WHERE l.to_page_id = ? ORDER BY p.title`
187
+ )
188
+ .all(pageId) as Page[];
189
+ }
190
+
191
+ getAllLinks(): Link[] {
192
+ return this.db.prepare("SELECT * FROM links").all() as Link[];
193
+ }
194
+
195
+ // --- Usage ---
196
+
197
+ addUsageLog(sourceId: number, calls: number, prompt: number, completion: number, total: number, cost: number): void {
198
+ this.db
199
+ .prepare("INSERT INTO usage_logs (source_id, llm_calls, prompt_tokens, completion_tokens, total_tokens, estimated_cost_usd) VALUES (?, ?, ?, ?, ?, ?)")
200
+ .run(sourceId, calls, prompt, completion, total, cost);
201
+ }
202
+
203
+ getUsageSummary(): { totalCalls: number; promptTokens: number; completionTokens: number; totalTokens: number; totalCost: number } {
204
+ const row = this.db.prepare(
205
+ "SELECT COALESCE(SUM(llm_calls),0) as totalCalls, COALESCE(SUM(prompt_tokens),0) as promptTokens, COALESCE(SUM(completion_tokens),0) as completionTokens, COALESCE(SUM(total_tokens),0) as totalTokens, COALESCE(SUM(estimated_cost_usd),0) as totalCost FROM usage_logs"
206
+ ).get() as any;
207
+ return row;
208
+ }
209
+ }