@open330/kiwimu 0.4.1 → 0.7.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/kiwimu +1 -1
- package/package.json +4 -1
- package/personas/namuwiki.json +6 -0
- package/src/build/renderer.ts +49 -2
- package/src/build/static/search.js +33 -2
- package/src/build/static/style.css +84 -1
- package/src/build/templates.ts +297 -167
- package/src/config.ts +35 -29
- package/src/demo/sample-data.ts +70 -0
- package/src/demo/setup.ts +31 -0
- package/src/expand/llm.ts +1 -1
- package/src/index.ts +208 -458
- package/src/ingest/docx.ts +0 -8
- package/src/ingest/legacy.ts +4 -4
- package/src/ingest/pdf.ts +1 -1
- package/src/ingest/pptx.ts +0 -1
- package/src/ingest/web.test.ts +41 -0
- package/src/ingest/web.ts +61 -62
- package/src/llm-client.ts +203 -126
- package/src/pipeline/chunker.test.ts +42 -0
- package/src/pipeline/chunker.ts +1 -48
- package/src/pipeline/llm-chunker.ts +133 -55
- package/src/server.ts +327 -0
- package/src/services/ingest.ts +100 -0
- package/src/store.test.ts +132 -0
- package/src/store.ts +102 -2
- package/src/pipeline/llm-linker.ts +0 -84
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
import { Store } from "../store";
|
|
2
|
+
import { type LLMConfig, type Persona } from "../config";
|
|
3
|
+
import { LLMClient, type UsageStats } from "../llm-client";
|
|
4
|
+
|
|
5
|
+
export interface IngestResult {
|
|
6
|
+
sourceCount: number;
|
|
7
|
+
conceptCount: number;
|
|
8
|
+
linkCount: number;
|
|
9
|
+
usage: UsageStats & { estimatedCostUsd: number };
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
export async function ingestUrl(
|
|
13
|
+
root: string,
|
|
14
|
+
store: Store,
|
|
15
|
+
url: string,
|
|
16
|
+
llmConfig: LLMConfig,
|
|
17
|
+
persona: Persona | null,
|
|
18
|
+
onProgress?: (status: string) => void
|
|
19
|
+
): Promise<IngestResult> {
|
|
20
|
+
const client = new LLMClient(llmConfig);
|
|
21
|
+
client.resetUsageStats();
|
|
22
|
+
|
|
23
|
+
const { fetchPage } = await import("../ingest/web");
|
|
24
|
+
const { llmChunkDocument, htmlToRawText } = await import("../pipeline/llm-chunker");
|
|
25
|
+
|
|
26
|
+
onProgress?.("URL 가져오는 중...");
|
|
27
|
+
const { title, html } = await fetchPage(url);
|
|
28
|
+
|
|
29
|
+
const source = store.addSource(url, "web", title, html);
|
|
30
|
+
const rawText = htmlToRawText(html);
|
|
31
|
+
|
|
32
|
+
onProgress?.("LLM 분석 중...");
|
|
33
|
+
const { sourceCount, conceptCount } = await llmChunkDocument(rawText, title, source.id, store, 0, persona, client);
|
|
34
|
+
|
|
35
|
+
const u = client.getUsageStats();
|
|
36
|
+
const estimatedCostUsd = client.getEstimatedCost();
|
|
37
|
+
store.addUsageLog(source.id, u.totalCalls, u.promptTokens, u.completionTokens, u.totalTokens, estimatedCostUsd);
|
|
38
|
+
|
|
39
|
+
return {
|
|
40
|
+
sourceCount,
|
|
41
|
+
conceptCount,
|
|
42
|
+
linkCount: 0,
|
|
43
|
+
usage: { ...u, estimatedCostUsd },
|
|
44
|
+
};
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
export async function ingestFile(
|
|
48
|
+
root: string,
|
|
49
|
+
store: Store,
|
|
50
|
+
filePath: string,
|
|
51
|
+
originalName: string,
|
|
52
|
+
llmConfig: LLMConfig,
|
|
53
|
+
persona: Persona | null,
|
|
54
|
+
onProgress?: (status: string) => void
|
|
55
|
+
): Promise<IngestResult> {
|
|
56
|
+
const client = new LLMClient(llmConfig);
|
|
57
|
+
client.resetUsageStats();
|
|
58
|
+
|
|
59
|
+
const { llmChunkDocument } = await import("../pipeline/llm-chunker");
|
|
60
|
+
|
|
61
|
+
const ext = originalName.split(".").pop()?.toLowerCase() || "";
|
|
62
|
+
|
|
63
|
+
let title: string;
|
|
64
|
+
let text: string;
|
|
65
|
+
|
|
66
|
+
if (ext === "pdf") {
|
|
67
|
+
const { extractTextFromPdf } = await import("../ingest/pdf");
|
|
68
|
+
onProgress?.("PDF 텍스트 추출 중...");
|
|
69
|
+
({ title, text } = await extractTextFromPdf(filePath));
|
|
70
|
+
} else if (ext === "docx") {
|
|
71
|
+
const { extractTextFromDocx } = await import("../ingest/docx");
|
|
72
|
+
onProgress?.("DOCX 텍스트 추출 중...");
|
|
73
|
+
({ title, text } = await extractTextFromDocx(filePath));
|
|
74
|
+
} else if (ext === "pptx") {
|
|
75
|
+
const { extractTextFromPptx } = await import("../ingest/pptx");
|
|
76
|
+
onProgress?.("PPTX 텍스트 추출 중...");
|
|
77
|
+
({ title, text } = await extractTextFromPptx(filePath));
|
|
78
|
+
} else {
|
|
79
|
+
const { extractWithTextutil } = await import("../ingest/legacy");
|
|
80
|
+
onProgress?.(`${ext.toUpperCase()} 텍스트 추출 중...`);
|
|
81
|
+
({ title, text } = await extractWithTextutil(filePath));
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
const source = store.addSource(filePath, ext, title, "(file)");
|
|
85
|
+
store.deletePagesBySource(source.id);
|
|
86
|
+
|
|
87
|
+
onProgress?.("LLM 분석 중...");
|
|
88
|
+
const { sourceCount, conceptCount } = await llmChunkDocument(text, title, source.id, store, 0, persona, client);
|
|
89
|
+
|
|
90
|
+
const u = client.getUsageStats();
|
|
91
|
+
const estimatedCostUsd = client.getEstimatedCost();
|
|
92
|
+
store.addUsageLog(source.id, u.totalCalls, u.promptTokens, u.completionTokens, u.totalTokens, estimatedCostUsd);
|
|
93
|
+
|
|
94
|
+
return {
|
|
95
|
+
sourceCount,
|
|
96
|
+
conceptCount,
|
|
97
|
+
linkCount: 0,
|
|
98
|
+
usage: { ...u, estimatedCostUsd },
|
|
99
|
+
};
|
|
100
|
+
}
|
|
@@ -0,0 +1,132 @@
|
|
|
1
|
+
import { expect, test, describe, beforeEach, afterEach } from "bun:test";
|
|
2
|
+
import { Store } from "./store";
|
|
3
|
+
|
|
4
|
+
describe("Store", () => {
|
|
5
|
+
let store: Store;
|
|
6
|
+
|
|
7
|
+
beforeEach(() => {
|
|
8
|
+
store = new Store(":memory:");
|
|
9
|
+
store.initSchema();
|
|
10
|
+
});
|
|
11
|
+
|
|
12
|
+
afterEach(() => {
|
|
13
|
+
store.close();
|
|
14
|
+
});
|
|
15
|
+
|
|
16
|
+
test("addSource and listSources", () => {
|
|
17
|
+
const src = store.addSource("file:///test.pdf", "pdf", "Test PDF", "raw content");
|
|
18
|
+
expect(src.id).toBeGreaterThan(0);
|
|
19
|
+
expect(src.uri).toBe("file:///test.pdf");
|
|
20
|
+
expect(src.type).toBe("pdf");
|
|
21
|
+
expect(src.title).toBe("Test PDF");
|
|
22
|
+
|
|
23
|
+
const sources = store.listSources();
|
|
24
|
+
expect(sources).toHaveLength(1);
|
|
25
|
+
expect(sources[0].uri).toBe("file:///test.pdf");
|
|
26
|
+
});
|
|
27
|
+
|
|
28
|
+
test("addSource updates existing source with same URI", () => {
|
|
29
|
+
const src1 = store.addSource("file:///test.pdf", "pdf", "V1", "content1");
|
|
30
|
+
const src2 = store.addSource("file:///test.pdf", "pdf", "V2", "content2");
|
|
31
|
+
expect(src2.id).toBe(src1.id);
|
|
32
|
+
expect(src2.title).toBe("V2");
|
|
33
|
+
expect(src2.raw_content).toBe("content2");
|
|
34
|
+
expect(store.listSources()).toHaveLength(1);
|
|
35
|
+
});
|
|
36
|
+
|
|
37
|
+
test("addPage and getPage by slug", () => {
|
|
38
|
+
const src = store.addSource("file:///test.pdf", "pdf", "Test", "raw");
|
|
39
|
+
const page = store.addPage("test-page", "Test Page", "# Content", src.id, null, "source", 0);
|
|
40
|
+
expect(page.slug).toBe("test-page");
|
|
41
|
+
expect(page.title).toBe("Test Page");
|
|
42
|
+
expect(page.page_type).toBe("source");
|
|
43
|
+
|
|
44
|
+
const fetched = store.getPage("test-page");
|
|
45
|
+
expect(fetched).not.toBeNull();
|
|
46
|
+
expect(fetched!.title).toBe("Test Page");
|
|
47
|
+
|
|
48
|
+
expect(store.getPage("nonexistent")).toBeNull();
|
|
49
|
+
});
|
|
50
|
+
|
|
51
|
+
test("listSourcePages and listConceptPages", () => {
|
|
52
|
+
const src = store.addSource("file:///test.pdf", "pdf", "Test", "raw");
|
|
53
|
+
store.addPage("src-page", "Source Page", "content", src.id, null, "source", 0);
|
|
54
|
+
store.addPage("concept-page", "Concept Page", "content", undefined, undefined, "concept", 0);
|
|
55
|
+
|
|
56
|
+
const sourcePages = store.listSourcePages();
|
|
57
|
+
expect(sourcePages).toHaveLength(1);
|
|
58
|
+
expect(sourcePages[0].slug).toBe("src-page");
|
|
59
|
+
|
|
60
|
+
const conceptPages = store.listConceptPages();
|
|
61
|
+
expect(conceptPages).toHaveLength(1);
|
|
62
|
+
expect(conceptPages[0].slug).toBe("concept-page");
|
|
63
|
+
});
|
|
64
|
+
|
|
65
|
+
test("addLink and getBacklinks", () => {
|
|
66
|
+
const src = store.addSource("file:///test.pdf", "pdf", "Test", "raw");
|
|
67
|
+
const pageA = store.addPage("page-a", "Page A", "content", src.id, null, "source", 0);
|
|
68
|
+
const pageB = store.addPage("page-b", "Page B", "content", src.id, null, "source", 1);
|
|
69
|
+
|
|
70
|
+
store.addLink(pageA.id, pageB.id, "link to B");
|
|
71
|
+
|
|
72
|
+
const backlinks = store.getBacklinks(pageB.id);
|
|
73
|
+
expect(backlinks).toHaveLength(1);
|
|
74
|
+
expect(backlinks[0].slug).toBe("page-a");
|
|
75
|
+
});
|
|
76
|
+
|
|
77
|
+
test("getAllBacklinksGrouped", () => {
|
|
78
|
+
const src = store.addSource("file:///test.pdf", "pdf", "Test", "raw");
|
|
79
|
+
const pageA = store.addPage("page-a", "Page A", "content", src.id, null, "source", 0);
|
|
80
|
+
const pageB = store.addPage("page-b", "Page B", "content", src.id, null, "source", 1);
|
|
81
|
+
const pageC = store.addPage("page-c", "Page C", "content", src.id, null, "source", 2);
|
|
82
|
+
|
|
83
|
+
store.addLink(pageA.id, pageC.id, "link to C from A");
|
|
84
|
+
store.addLink(pageB.id, pageC.id, "link to C from B");
|
|
85
|
+
|
|
86
|
+
const grouped = store.getAllBacklinksGrouped();
|
|
87
|
+
expect(grouped.has(pageC.id)).toBe(true);
|
|
88
|
+
expect(grouped.get(pageC.id)!).toHaveLength(2);
|
|
89
|
+
});
|
|
90
|
+
|
|
91
|
+
test("deletePagesBySource", () => {
|
|
92
|
+
const src = store.addSource("file:///test.pdf", "pdf", "Test", "raw");
|
|
93
|
+
store.addPage("page-1", "Page 1", "content", src.id, null, "source", 0);
|
|
94
|
+
store.addPage("page-2", "Page 2", "content", src.id, null, "source", 1);
|
|
95
|
+
expect(store.listPages()).toHaveLength(2);
|
|
96
|
+
|
|
97
|
+
store.deletePagesBySource(src.id);
|
|
98
|
+
expect(store.listPages()).toHaveLength(0);
|
|
99
|
+
});
|
|
100
|
+
|
|
101
|
+
test("slug uniqueness (duplicate handling via INSERT OR REPLACE)", () => {
|
|
102
|
+
const src = store.addSource("file:///test.pdf", "pdf", "Test", "raw");
|
|
103
|
+
store.addPage("same-slug", "Title V1", "content v1", src.id, null, "source", 0);
|
|
104
|
+
store.addPage("same-slug", "Title V2", "content v2", src.id, null, "source", 0);
|
|
105
|
+
|
|
106
|
+
const page = store.getPage("same-slug");
|
|
107
|
+
expect(page).not.toBeNull();
|
|
108
|
+
expect(page!.title).toBe("Title V2");
|
|
109
|
+
expect(page!.content).toBe("content v2");
|
|
110
|
+
});
|
|
111
|
+
|
|
112
|
+
test("listSourcesMeta excludes raw_content", () => {
|
|
113
|
+
store.addSource("file:///test.pdf", "pdf", "Test", "some large raw content here");
|
|
114
|
+
const meta = store.listSourcesMeta();
|
|
115
|
+
expect(meta).toHaveLength(1);
|
|
116
|
+
expect(meta[0].title).toBe("Test");
|
|
117
|
+
expect(meta[0]).not.toHaveProperty("raw_content");
|
|
118
|
+
});
|
|
119
|
+
|
|
120
|
+
test("addUsageLog and getUsageSummary", () => {
|
|
121
|
+
const src = store.addSource("file:///test.pdf", "pdf", "Test", "raw");
|
|
122
|
+
store.addUsageLog(src.id, 2, 100, 50, 150, 0.005);
|
|
123
|
+
store.addUsageLog(src.id, 3, 200, 100, 300, 0.01);
|
|
124
|
+
|
|
125
|
+
const summary = store.getUsageSummary();
|
|
126
|
+
expect(summary.totalCalls).toBe(5);
|
|
127
|
+
expect(summary.promptTokens).toBe(300);
|
|
128
|
+
expect(summary.completionTokens).toBe(150);
|
|
129
|
+
expect(summary.totalTokens).toBe(450);
|
|
130
|
+
expect(summary.totalCost).toBeCloseTo(0.015, 5);
|
|
131
|
+
});
|
|
132
|
+
});
|
package/src/store.ts
CHANGED
|
@@ -20,12 +20,31 @@ export interface Page {
|
|
|
20
20
|
display_order: number;
|
|
21
21
|
}
|
|
22
22
|
|
|
23
|
+
export interface SourceMeta {
|
|
24
|
+
id: number;
|
|
25
|
+
uri: string;
|
|
26
|
+
type: string;
|
|
27
|
+
title: string;
|
|
28
|
+
fetched_at: string;
|
|
29
|
+
}
|
|
30
|
+
|
|
23
31
|
export interface Link {
|
|
24
32
|
from_page_id: number;
|
|
25
33
|
to_page_id: number;
|
|
26
34
|
anchor_text: string;
|
|
27
35
|
}
|
|
28
36
|
|
|
37
|
+
export interface Quiz {
|
|
38
|
+
id: number;
|
|
39
|
+
page_id: number;
|
|
40
|
+
question: string;
|
|
41
|
+
answer: string;
|
|
42
|
+
quiz_type: string; // 'fill_blank' | 'ox' | 'short_answer'
|
|
43
|
+
created_at: string;
|
|
44
|
+
page_title?: string;
|
|
45
|
+
page_slug?: string;
|
|
46
|
+
}
|
|
47
|
+
|
|
29
48
|
const SCHEMA = `
|
|
30
49
|
CREATE TABLE IF NOT EXISTS sources (
|
|
31
50
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
@@ -63,6 +82,20 @@ CREATE TABLE IF NOT EXISTS links (
|
|
|
63
82
|
anchor_text TEXT,
|
|
64
83
|
PRIMARY KEY (from_page_id, to_page_id, anchor_text)
|
|
65
84
|
);
|
|
85
|
+
CREATE TABLE IF NOT EXISTS quizzes (
|
|
86
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
87
|
+
page_id INTEGER NOT NULL,
|
|
88
|
+
question TEXT NOT NULL,
|
|
89
|
+
answer TEXT NOT NULL,
|
|
90
|
+
quiz_type TEXT NOT NULL DEFAULT 'fill_blank',
|
|
91
|
+
created_at TEXT DEFAULT (datetime('now')),
|
|
92
|
+
FOREIGN KEY (page_id) REFERENCES pages(id)
|
|
93
|
+
);
|
|
94
|
+
CREATE INDEX IF NOT EXISTS idx_pages_source_id ON pages(source_id);
|
|
95
|
+
CREATE INDEX IF NOT EXISTS idx_pages_page_type ON pages(page_type);
|
|
96
|
+
CREATE INDEX IF NOT EXISTS idx_links_to_page ON links(to_page_id);
|
|
97
|
+
CREATE INDEX IF NOT EXISTS idx_links_from_page ON links(from_page_id);
|
|
98
|
+
CREATE INDEX IF NOT EXISTS idx_quizzes_page_id ON quizzes(page_id);
|
|
66
99
|
`;
|
|
67
100
|
|
|
68
101
|
export class Store {
|
|
@@ -107,6 +140,10 @@ export class Store {
|
|
|
107
140
|
return this.db.prepare("SELECT * FROM sources ORDER BY fetched_at DESC").all() as Source[];
|
|
108
141
|
}
|
|
109
142
|
|
|
143
|
+
listSourcesMeta(): SourceMeta[] {
|
|
144
|
+
return this.db.prepare("SELECT id, uri, type, title, fetched_at FROM sources ORDER BY id DESC").all() as SourceMeta[];
|
|
145
|
+
}
|
|
146
|
+
|
|
110
147
|
// --- Pages ---
|
|
111
148
|
|
|
112
149
|
addPage(
|
|
@@ -147,7 +184,11 @@ export class Store {
|
|
|
147
184
|
}
|
|
148
185
|
|
|
149
186
|
deletePagesBySource(sourceId: number): void {
|
|
150
|
-
// Delete
|
|
187
|
+
// Delete quizzes for these pages first
|
|
188
|
+
this.db.prepare(
|
|
189
|
+
"DELETE FROM quizzes WHERE page_id IN (SELECT id FROM pages WHERE source_id = ?)"
|
|
190
|
+
).run(sourceId);
|
|
191
|
+
// Delete links involving these pages
|
|
151
192
|
this.db.prepare(
|
|
152
193
|
"DELETE FROM links WHERE from_page_id IN (SELECT id FROM pages WHERE source_id = ?) OR to_page_id IN (SELECT id FROM pages WHERE source_id = ?)"
|
|
153
194
|
).run(sourceId, sourceId);
|
|
@@ -155,6 +196,7 @@ export class Store {
|
|
|
155
196
|
}
|
|
156
197
|
|
|
157
198
|
deleteAllPages(): void {
|
|
199
|
+
this.db.exec("DELETE FROM quizzes");
|
|
158
200
|
this.db.exec("DELETE FROM links");
|
|
159
201
|
this.db.exec("DELETE FROM pages");
|
|
160
202
|
}
|
|
@@ -192,6 +234,64 @@ export class Store {
|
|
|
192
234
|
return this.db.prepare("SELECT * FROM links").all() as Link[];
|
|
193
235
|
}
|
|
194
236
|
|
|
237
|
+
getAllBacklinksGrouped(): Map<number, Array<{id: number; slug: string; title: string; page_type: string}>> {
|
|
238
|
+
const rows = this.db.prepare(`
|
|
239
|
+
SELECT l.to_page_id, p.id, p.slug, p.title, p.page_type
|
|
240
|
+
FROM links l
|
|
241
|
+
JOIN pages p ON p.id = l.from_page_id
|
|
242
|
+
ORDER BY l.to_page_id
|
|
243
|
+
`).all() as Array<{to_page_id: number; id: number; slug: string; title: string; page_type: string}>;
|
|
244
|
+
|
|
245
|
+
const map = new Map<number, Array<{id: number; slug: string; title: string; page_type: string}>>();
|
|
246
|
+
for (const row of rows) {
|
|
247
|
+
if (!map.has(row.to_page_id)) map.set(row.to_page_id, []);
|
|
248
|
+
map.get(row.to_page_id)!.push({ id: row.id, slug: row.slug, title: row.title, page_type: row.page_type });
|
|
249
|
+
}
|
|
250
|
+
return map;
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
// --- Quizzes ---
|
|
254
|
+
|
|
255
|
+
addQuiz(pageId: number, question: string, answer: string, quizType: string): void {
|
|
256
|
+
this.db
|
|
257
|
+
.prepare("INSERT INTO quizzes (page_id, question, answer, quiz_type) VALUES (?, ?, ?, ?)")
|
|
258
|
+
.run(pageId, question, answer, quizType);
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
getQuizzesByPage(pageId: number): Quiz[] {
|
|
262
|
+
return this.db
|
|
263
|
+
.prepare(
|
|
264
|
+
`SELECT q.*, p.title as page_title, p.slug as page_slug
|
|
265
|
+
FROM quizzes q JOIN pages p ON p.id = q.page_id
|
|
266
|
+
WHERE q.page_id = ? ORDER BY q.id`
|
|
267
|
+
)
|
|
268
|
+
.all(pageId) as Quiz[];
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
getAllQuizzes(): Quiz[] {
|
|
272
|
+
return this.db
|
|
273
|
+
.prepare(
|
|
274
|
+
`SELECT q.*, p.title as page_title, p.slug as page_slug
|
|
275
|
+
FROM quizzes q JOIN pages p ON p.id = q.page_id
|
|
276
|
+
ORDER BY q.id`
|
|
277
|
+
)
|
|
278
|
+
.all() as Quiz[];
|
|
279
|
+
}
|
|
280
|
+
|
|
281
|
+
getRandomQuizzes(count: number): Quiz[] {
|
|
282
|
+
return this.db
|
|
283
|
+
.prepare(
|
|
284
|
+
`SELECT q.*, p.title as page_title, p.slug as page_slug
|
|
285
|
+
FROM quizzes q JOIN pages p ON p.id = q.page_id
|
|
286
|
+
ORDER BY RANDOM() LIMIT ?`
|
|
287
|
+
)
|
|
288
|
+
.all(count) as Quiz[];
|
|
289
|
+
}
|
|
290
|
+
|
|
291
|
+
deleteQuizzesByPage(pageId: number): void {
|
|
292
|
+
this.db.prepare("DELETE FROM quizzes WHERE page_id = ?").run(pageId);
|
|
293
|
+
}
|
|
294
|
+
|
|
195
295
|
// --- Usage ---
|
|
196
296
|
|
|
197
297
|
addUsageLog(sourceId: number, calls: number, prompt: number, completion: number, total: number, cost: number): void {
|
|
@@ -203,7 +303,7 @@ export class Store {
|
|
|
203
303
|
getUsageSummary(): { totalCalls: number; promptTokens: number; completionTokens: number; totalTokens: number; totalCost: number } {
|
|
204
304
|
const row = this.db.prepare(
|
|
205
305
|
"SELECT COALESCE(SUM(llm_calls),0) as totalCalls, COALESCE(SUM(prompt_tokens),0) as promptTokens, COALESCE(SUM(completion_tokens),0) as completionTokens, COALESCE(SUM(total_tokens),0) as totalTokens, COALESCE(SUM(estimated_cost_usd),0) as totalCost FROM usage_logs"
|
|
206
|
-
).get() as
|
|
306
|
+
).get() as { totalCalls: number; promptTokens: number; completionTokens: number; totalTokens: number; totalCost: number };
|
|
207
307
|
return row;
|
|
208
308
|
}
|
|
209
309
|
}
|
|
@@ -1,84 +0,0 @@
|
|
|
1
|
-
import { chatComplete } from "../llm-client";
|
|
2
|
-
import type { Store } from "../store";
|
|
3
|
-
import { slugify } from "./chunker";
|
|
4
|
-
|
|
5
|
-
const LINK_SYSTEM = `You are a wiki editor. Given wiki pages, find cross-link opportunities that were missed.
|
|
6
|
-
Return valid JSON only. No markdown fences.`;
|
|
7
|
-
|
|
8
|
-
const LINK_PROMPT = `These wiki pages exist but may be missing cross-links. Find where one page's content mentions a concept that has its own page.
|
|
9
|
-
|
|
10
|
-
Pages (slug | title | first 300 chars of content):
|
|
11
|
-
{pages}
|
|
12
|
-
|
|
13
|
-
Return JSON:
|
|
14
|
-
{
|
|
15
|
-
"links": [
|
|
16
|
-
{
|
|
17
|
-
"from_slug": "source-page-slug",
|
|
18
|
-
"to_slug": "target-page-slug",
|
|
19
|
-
"anchor_text": "exact phrase in source page to link"
|
|
20
|
-
}
|
|
21
|
-
]
|
|
22
|
-
}
|
|
23
|
-
|
|
24
|
-
Rules:
|
|
25
|
-
- anchor_text MUST be an exact phrase found in the source page content
|
|
26
|
-
- Only link genuinely related concepts
|
|
27
|
-
- 3-8 links per page where meaningful
|
|
28
|
-
- Do NOT link a page to itself`;
|
|
29
|
-
|
|
30
|
-
export async function llmLinkPages(store: Store): Promise<number> {
|
|
31
|
-
const pages = store.listPages();
|
|
32
|
-
if (pages.length < 2) return 0;
|
|
33
|
-
|
|
34
|
-
const batchSize = 30;
|
|
35
|
-
let totalLinks = 0;
|
|
36
|
-
|
|
37
|
-
for (let i = 0; i < pages.length; i += batchSize) {
|
|
38
|
-
const batch = pages.slice(i, i + batchSize);
|
|
39
|
-
const pagesText = batch
|
|
40
|
-
.map(p => `${p.slug} | ${p.title} | ${p.content.slice(0, 300).replace(/\n/g, " ")}`)
|
|
41
|
-
.join("\n");
|
|
42
|
-
|
|
43
|
-
try {
|
|
44
|
-
const raw = await chatComplete(LINK_SYSTEM, LINK_PROMPT.replace("{pages}", pagesText), 8192);
|
|
45
|
-
let cleaned = raw.replace(/^```json?\n?/m, "").replace(/\n?```$/m, "").trim();
|
|
46
|
-
|
|
47
|
-
let result: { links: Array<{ from_slug: string; to_slug: string; anchor_text: string }> };
|
|
48
|
-
try {
|
|
49
|
-
result = JSON.parse(cleaned);
|
|
50
|
-
} catch {
|
|
51
|
-
// Try to repair truncated JSON
|
|
52
|
-
cleaned = cleaned.replace(/,?\s*$/, "]}");
|
|
53
|
-
try {
|
|
54
|
-
result = JSON.parse(cleaned);
|
|
55
|
-
} catch {
|
|
56
|
-
console.log(` \x1b[33m⚠ 링크 JSON 파싱 실패\x1b[0m`);
|
|
57
|
-
continue;
|
|
58
|
-
}
|
|
59
|
-
}
|
|
60
|
-
|
|
61
|
-
const slugToPage = new Map(pages.map(p => [p.slug, p]));
|
|
62
|
-
|
|
63
|
-
for (const link of result.links) {
|
|
64
|
-
const fromPage = slugToPage.get(link.from_slug);
|
|
65
|
-
const toPage = slugToPage.get(link.to_slug);
|
|
66
|
-
if (!fromPage || !toPage || fromPage.id === toPage.id) continue;
|
|
67
|
-
|
|
68
|
-
const anchor = link.anchor_text;
|
|
69
|
-
if (anchor && fromPage.content.includes(anchor) && !fromPage.content.includes(`[${anchor}]`)) {
|
|
70
|
-
const linkedText = `[${anchor}](/wiki/${link.to_slug})`;
|
|
71
|
-
const newContent = fromPage.content.replace(anchor, linkedText);
|
|
72
|
-
store.updatePageContent(fromPage.id, newContent);
|
|
73
|
-
fromPage.content = newContent;
|
|
74
|
-
store.addLink(fromPage.id, toPage.id, anchor);
|
|
75
|
-
totalLinks++;
|
|
76
|
-
}
|
|
77
|
-
}
|
|
78
|
-
} catch (e: any) {
|
|
79
|
-
console.log(` \x1b[31m링크 생성 실패: ${e.message}\x1b[0m`);
|
|
80
|
-
}
|
|
81
|
-
}
|
|
82
|
-
|
|
83
|
-
return totalLinks;
|
|
84
|
-
}
|