@open330/kiwimu 0.4.1 → 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +98 -49
- package/bin/kiwimu +1 -1
- package/package.json +4 -1
- package/personas/namuwiki.json +6 -0
- package/src/build/renderer.ts +50 -2
- package/src/build/static/search.js +33 -2
- package/src/build/static/style.css +84 -1
- package/src/build/templates.ts +353 -167
- package/src/config.ts +35 -29
- package/src/demo/sample-data.ts +70 -0
- package/src/demo/setup.ts +31 -0
- package/src/expand/llm.ts +1 -1
- package/src/index.ts +234 -458
- package/src/ingest/docx.ts +0 -8
- package/src/ingest/legacy.ts +4 -4
- package/src/ingest/pdf.ts +1 -1
- package/src/ingest/pptx.ts +0 -1
- package/src/ingest/web.test.ts +41 -0
- package/src/ingest/web.ts +61 -62
- package/src/llm-client.ts +203 -126
- package/src/pipeline/chunker.test.ts +42 -0
- package/src/pipeline/chunker.ts +1 -48
- package/src/pipeline/llm-chunker.ts +144 -59
- package/src/server.ts +327 -0
- package/src/services/ingest.ts +100 -0
- package/src/store.test.ts +132 -0
- package/src/store.ts +206 -2
- package/src/pipeline/llm-linker.ts +0 -84
package/src/store.ts
CHANGED
|
@@ -20,12 +20,32 @@ export interface Page {
|
|
|
20
20
|
display_order: number;
|
|
21
21
|
}
|
|
22
22
|
|
|
23
|
+
export interface SourceMeta {
|
|
24
|
+
id: number;
|
|
25
|
+
uri: string;
|
|
26
|
+
type: string;
|
|
27
|
+
title: string;
|
|
28
|
+
fetched_at: string;
|
|
29
|
+
}
|
|
30
|
+
|
|
23
31
|
export interface Link {
|
|
24
32
|
from_page_id: number;
|
|
25
33
|
to_page_id: number;
|
|
26
34
|
anchor_text: string;
|
|
27
35
|
}
|
|
28
36
|
|
|
37
|
+
export interface Quiz {
|
|
38
|
+
id: number;
|
|
39
|
+
page_id: number;
|
|
40
|
+
question: string;
|
|
41
|
+
answer: string;
|
|
42
|
+
explanation: string;
|
|
43
|
+
quiz_type: string; // 'fill_blank' | 'ox' | 'short_answer'
|
|
44
|
+
created_at: string;
|
|
45
|
+
page_title?: string;
|
|
46
|
+
page_slug?: string;
|
|
47
|
+
}
|
|
48
|
+
|
|
29
49
|
const SCHEMA = `
|
|
30
50
|
CREATE TABLE IF NOT EXISTS sources (
|
|
31
51
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
@@ -63,6 +83,29 @@ CREATE TABLE IF NOT EXISTS links (
|
|
|
63
83
|
anchor_text TEXT,
|
|
64
84
|
PRIMARY KEY (from_page_id, to_page_id, anchor_text)
|
|
65
85
|
);
|
|
86
|
+
CREATE TABLE IF NOT EXISTS quizzes (
|
|
87
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
88
|
+
page_id INTEGER NOT NULL,
|
|
89
|
+
question TEXT NOT NULL,
|
|
90
|
+
answer TEXT NOT NULL,
|
|
91
|
+
explanation TEXT DEFAULT '',
|
|
92
|
+
quiz_type TEXT NOT NULL DEFAULT 'fill_blank',
|
|
93
|
+
created_at TEXT DEFAULT (datetime('now')),
|
|
94
|
+
FOREIGN KEY (page_id) REFERENCES pages(id)
|
|
95
|
+
);
|
|
96
|
+
CREATE TABLE IF NOT EXISTS quiz_attempts (
|
|
97
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
98
|
+
quiz_id INTEGER NOT NULL,
|
|
99
|
+
is_correct INTEGER NOT NULL DEFAULT 0,
|
|
100
|
+
attempted_at TEXT DEFAULT (datetime('now')),
|
|
101
|
+
FOREIGN KEY (quiz_id) REFERENCES quizzes(id)
|
|
102
|
+
);
|
|
103
|
+
CREATE INDEX IF NOT EXISTS idx_pages_source_id ON pages(source_id);
|
|
104
|
+
CREATE INDEX IF NOT EXISTS idx_attempts_quiz_id ON quiz_attempts(quiz_id);
|
|
105
|
+
CREATE INDEX IF NOT EXISTS idx_pages_page_type ON pages(page_type);
|
|
106
|
+
CREATE INDEX IF NOT EXISTS idx_links_to_page ON links(to_page_id);
|
|
107
|
+
CREATE INDEX IF NOT EXISTS idx_links_from_page ON links(from_page_id);
|
|
108
|
+
CREATE INDEX IF NOT EXISTS idx_quizzes_page_id ON quizzes(page_id);
|
|
66
109
|
`;
|
|
67
110
|
|
|
68
111
|
export class Store {
|
|
@@ -76,6 +119,12 @@ export class Store {
|
|
|
76
119
|
|
|
77
120
|
initSchema(): void {
|
|
78
121
|
this.db.exec(SCHEMA);
|
|
122
|
+
// Migrate: add explanation column if missing (for existing databases)
|
|
123
|
+
try {
|
|
124
|
+
this.db.exec("ALTER TABLE quizzes ADD COLUMN explanation TEXT DEFAULT ''");
|
|
125
|
+
} catch {
|
|
126
|
+
// Column already exists — ignore
|
|
127
|
+
}
|
|
79
128
|
}
|
|
80
129
|
|
|
81
130
|
close(): void {
|
|
@@ -107,6 +156,10 @@ export class Store {
|
|
|
107
156
|
return this.db.prepare("SELECT * FROM sources ORDER BY fetched_at DESC").all() as Source[];
|
|
108
157
|
}
|
|
109
158
|
|
|
159
|
+
listSourcesMeta(): SourceMeta[] {
|
|
160
|
+
return this.db.prepare("SELECT id, uri, type, title, fetched_at FROM sources ORDER BY id DESC").all() as SourceMeta[];
|
|
161
|
+
}
|
|
162
|
+
|
|
110
163
|
// --- Pages ---
|
|
111
164
|
|
|
112
165
|
addPage(
|
|
@@ -147,7 +200,15 @@ export class Store {
|
|
|
147
200
|
}
|
|
148
201
|
|
|
149
202
|
deletePagesBySource(sourceId: number): void {
|
|
150
|
-
// Delete
|
|
203
|
+
// Delete quiz attempts for quizzes on these pages first
|
|
204
|
+
this.db.prepare(
|
|
205
|
+
"DELETE FROM quiz_attempts WHERE quiz_id IN (SELECT id FROM quizzes WHERE page_id IN (SELECT id FROM pages WHERE source_id = ?))"
|
|
206
|
+
).run(sourceId);
|
|
207
|
+
// Delete quizzes for these pages
|
|
208
|
+
this.db.prepare(
|
|
209
|
+
"DELETE FROM quizzes WHERE page_id IN (SELECT id FROM pages WHERE source_id = ?)"
|
|
210
|
+
).run(sourceId);
|
|
211
|
+
// Delete links involving these pages
|
|
151
212
|
this.db.prepare(
|
|
152
213
|
"DELETE FROM links WHERE from_page_id IN (SELECT id FROM pages WHERE source_id = ?) OR to_page_id IN (SELECT id FROM pages WHERE source_id = ?)"
|
|
153
214
|
).run(sourceId, sourceId);
|
|
@@ -155,6 +216,8 @@ export class Store {
|
|
|
155
216
|
}
|
|
156
217
|
|
|
157
218
|
deleteAllPages(): void {
|
|
219
|
+
this.db.exec("DELETE FROM quiz_attempts");
|
|
220
|
+
this.db.exec("DELETE FROM quizzes");
|
|
158
221
|
this.db.exec("DELETE FROM links");
|
|
159
222
|
this.db.exec("DELETE FROM pages");
|
|
160
223
|
}
|
|
@@ -192,6 +255,147 @@ export class Store {
|
|
|
192
255
|
return this.db.prepare("SELECT * FROM links").all() as Link[];
|
|
193
256
|
}
|
|
194
257
|
|
|
258
|
+
getAllBacklinksGrouped(): Map<number, Array<{id: number; slug: string; title: string; page_type: string}>> {
|
|
259
|
+
const rows = this.db.prepare(`
|
|
260
|
+
SELECT l.to_page_id, p.id, p.slug, p.title, p.page_type
|
|
261
|
+
FROM links l
|
|
262
|
+
JOIN pages p ON p.id = l.from_page_id
|
|
263
|
+
ORDER BY l.to_page_id
|
|
264
|
+
`).all() as Array<{to_page_id: number; id: number; slug: string; title: string; page_type: string}>;
|
|
265
|
+
|
|
266
|
+
const map = new Map<number, Array<{id: number; slug: string; title: string; page_type: string}>>();
|
|
267
|
+
for (const row of rows) {
|
|
268
|
+
if (!map.has(row.to_page_id)) map.set(row.to_page_id, []);
|
|
269
|
+
map.get(row.to_page_id)!.push({ id: row.id, slug: row.slug, title: row.title, page_type: row.page_type });
|
|
270
|
+
}
|
|
271
|
+
return map;
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
// --- Quizzes ---
|
|
275
|
+
|
|
276
|
+
addQuiz(pageId: number, question: string, answer: string, quizType: string, explanation: string = ""): void {
|
|
277
|
+
this.db
|
|
278
|
+
.prepare("INSERT INTO quizzes (page_id, question, answer, explanation, quiz_type) VALUES (?, ?, ?, ?, ?)")
|
|
279
|
+
.run(pageId, question, answer, explanation, quizType);
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
getQuizzesByPage(pageId: number): Quiz[] {
|
|
283
|
+
return this.db
|
|
284
|
+
.prepare(
|
|
285
|
+
`SELECT q.*, p.title as page_title, p.slug as page_slug
|
|
286
|
+
FROM quizzes q JOIN pages p ON p.id = q.page_id
|
|
287
|
+
WHERE q.page_id = ? ORDER BY q.id`
|
|
288
|
+
)
|
|
289
|
+
.all(pageId) as Quiz[];
|
|
290
|
+
}
|
|
291
|
+
|
|
292
|
+
getAllQuizzes(): Quiz[] {
|
|
293
|
+
return this.db
|
|
294
|
+
.prepare(
|
|
295
|
+
`SELECT q.*, p.title as page_title, p.slug as page_slug
|
|
296
|
+
FROM quizzes q JOIN pages p ON p.id = q.page_id
|
|
297
|
+
ORDER BY q.id`
|
|
298
|
+
)
|
|
299
|
+
.all() as Quiz[];
|
|
300
|
+
}
|
|
301
|
+
|
|
302
|
+
getRandomQuizzes(count: number): Quiz[] {
|
|
303
|
+
return this.db
|
|
304
|
+
.prepare(
|
|
305
|
+
`SELECT q.*, p.title as page_title, p.slug as page_slug
|
|
306
|
+
FROM quizzes q JOIN pages p ON p.id = q.page_id
|
|
307
|
+
ORDER BY RANDOM() LIMIT ?`
|
|
308
|
+
)
|
|
309
|
+
.all(count) as Quiz[];
|
|
310
|
+
}
|
|
311
|
+
|
|
312
|
+
deleteQuizzesByPage(pageId: number): void {
|
|
313
|
+
this.db.prepare("DELETE FROM quizzes WHERE page_id = ?").run(pageId);
|
|
314
|
+
}
|
|
315
|
+
|
|
316
|
+
getSmartQuizzes(count: number): Quiz[] {
|
|
317
|
+
return this.db.prepare(`
|
|
318
|
+
SELECT q.*, p.title as page_title, p.slug as page_slug,
|
|
319
|
+
COALESCE(a.last_attempt, '1970-01-01') as last_attempt,
|
|
320
|
+
COALESCE(a.correct_count, 0) as correct_count,
|
|
321
|
+
COALESCE(a.wrong_count, 0) as wrong_count
|
|
322
|
+
FROM quizzes q
|
|
323
|
+
JOIN pages p ON p.id = q.page_id
|
|
324
|
+
LEFT JOIN (
|
|
325
|
+
SELECT quiz_id,
|
|
326
|
+
MAX(attempted_at) as last_attempt,
|
|
327
|
+
SUM(CASE WHEN is_correct = 1 THEN 1 ELSE 0 END) as correct_count,
|
|
328
|
+
SUM(CASE WHEN is_correct = 0 THEN 1 ELSE 0 END) as wrong_count
|
|
329
|
+
FROM quiz_attempts
|
|
330
|
+
GROUP BY quiz_id
|
|
331
|
+
) a ON a.quiz_id = q.id
|
|
332
|
+
ORDER BY
|
|
333
|
+
CASE WHEN a.last_attempt IS NULL THEN 0 ELSE 1 END,
|
|
334
|
+
CASE WHEN a.wrong_count > 0 THEN 0 ELSE 1 END,
|
|
335
|
+
a.last_attempt ASC
|
|
336
|
+
LIMIT ?
|
|
337
|
+
`).all(count) as Quiz[];
|
|
338
|
+
}
|
|
339
|
+
|
|
340
|
+
// --- Quiz Attempts ---
|
|
341
|
+
|
|
342
|
+
addQuizAttempt(quizId: number, isCorrect: boolean): void {
|
|
343
|
+
this.db
|
|
344
|
+
.prepare("INSERT INTO quiz_attempts (quiz_id, is_correct) VALUES (?, ?)")
|
|
345
|
+
.run(quizId, isCorrect ? 1 : 0);
|
|
346
|
+
}
|
|
347
|
+
|
|
348
|
+
getQuizStats(): { total: number; correct: number; incorrect: number; unattempted: number } {
|
|
349
|
+
const totalQuizzes = (this.db.prepare("SELECT COUNT(*) as cnt FROM quizzes").get() as { cnt: number }).cnt;
|
|
350
|
+
const attemptRow = this.db.prepare(`
|
|
351
|
+
SELECT COUNT(*) as total,
|
|
352
|
+
SUM(CASE WHEN is_correct = 1 THEN 1 ELSE 0 END) as correct,
|
|
353
|
+
SUM(CASE WHEN is_correct = 0 THEN 1 ELSE 0 END) as incorrect
|
|
354
|
+
FROM quiz_attempts
|
|
355
|
+
`).get() as { total: number; correct: number; incorrect: number };
|
|
356
|
+
const attemptedQuizzes = (this.db.prepare("SELECT COUNT(DISTINCT quiz_id) as cnt FROM quiz_attempts").get() as { cnt: number }).cnt;
|
|
357
|
+
return {
|
|
358
|
+
total: attemptRow.total,
|
|
359
|
+
correct: attemptRow.correct,
|
|
360
|
+
incorrect: attemptRow.incorrect,
|
|
361
|
+
unattempted: totalQuizzes - attemptedQuizzes,
|
|
362
|
+
};
|
|
363
|
+
}
|
|
364
|
+
|
|
365
|
+
getWeakQuizzes(limit: number): Quiz[] {
|
|
366
|
+
return this.db.prepare(`
|
|
367
|
+
SELECT q.*, p.title as page_title, p.slug as page_slug
|
|
368
|
+
FROM quizzes q
|
|
369
|
+
JOIN pages p ON p.id = q.page_id
|
|
370
|
+
LEFT JOIN (
|
|
371
|
+
SELECT quiz_id,
|
|
372
|
+
SUM(CASE WHEN is_correct = 0 THEN 1 ELSE 0 END) as wrong_count,
|
|
373
|
+
COUNT(*) as attempt_count
|
|
374
|
+
FROM quiz_attempts
|
|
375
|
+
GROUP BY quiz_id
|
|
376
|
+
) a ON a.quiz_id = q.id
|
|
377
|
+
ORDER BY
|
|
378
|
+
CASE WHEN a.attempt_count IS NULL THEN 1 ELSE 0 END DESC,
|
|
379
|
+
COALESCE(a.wrong_count, 0) DESC
|
|
380
|
+
LIMIT ?
|
|
381
|
+
`).all(limit) as Quiz[];
|
|
382
|
+
}
|
|
383
|
+
|
|
384
|
+
getQuizHistory(limit: number): Array<{ quiz_id: number; question: string; is_correct: boolean; attempted_at: string }> {
|
|
385
|
+
return this.db.prepare(`
|
|
386
|
+
SELECT qa.quiz_id, q.question, qa.is_correct, qa.attempted_at
|
|
387
|
+
FROM quiz_attempts qa
|
|
388
|
+
JOIN quizzes q ON q.id = qa.quiz_id
|
|
389
|
+
ORDER BY qa.attempted_at DESC
|
|
390
|
+
LIMIT ?
|
|
391
|
+
`).all(limit).map((row: any) => ({
|
|
392
|
+
quiz_id: row.quiz_id,
|
|
393
|
+
question: row.question,
|
|
394
|
+
is_correct: row.is_correct === 1,
|
|
395
|
+
attempted_at: row.attempted_at,
|
|
396
|
+
}));
|
|
397
|
+
}
|
|
398
|
+
|
|
195
399
|
// --- Usage ---
|
|
196
400
|
|
|
197
401
|
addUsageLog(sourceId: number, calls: number, prompt: number, completion: number, total: number, cost: number): void {
|
|
@@ -203,7 +407,7 @@ export class Store {
|
|
|
203
407
|
getUsageSummary(): { totalCalls: number; promptTokens: number; completionTokens: number; totalTokens: number; totalCost: number } {
|
|
204
408
|
const row = this.db.prepare(
|
|
205
409
|
"SELECT COALESCE(SUM(llm_calls),0) as totalCalls, COALESCE(SUM(prompt_tokens),0) as promptTokens, COALESCE(SUM(completion_tokens),0) as completionTokens, COALESCE(SUM(total_tokens),0) as totalTokens, COALESCE(SUM(estimated_cost_usd),0) as totalCost FROM usage_logs"
|
|
206
|
-
).get() as
|
|
410
|
+
).get() as { totalCalls: number; promptTokens: number; completionTokens: number; totalTokens: number; totalCost: number };
|
|
207
411
|
return row;
|
|
208
412
|
}
|
|
209
413
|
}
|
|
@@ -1,84 +0,0 @@
|
|
|
1
|
-
import { chatComplete } from "../llm-client";
|
|
2
|
-
import type { Store } from "../store";
|
|
3
|
-
import { slugify } from "./chunker";
|
|
4
|
-
|
|
5
|
-
const LINK_SYSTEM = `You are a wiki editor. Given wiki pages, find cross-link opportunities that were missed.
|
|
6
|
-
Return valid JSON only. No markdown fences.`;
|
|
7
|
-
|
|
8
|
-
const LINK_PROMPT = `These wiki pages exist but may be missing cross-links. Find where one page's content mentions a concept that has its own page.
|
|
9
|
-
|
|
10
|
-
Pages (slug | title | first 300 chars of content):
|
|
11
|
-
{pages}
|
|
12
|
-
|
|
13
|
-
Return JSON:
|
|
14
|
-
{
|
|
15
|
-
"links": [
|
|
16
|
-
{
|
|
17
|
-
"from_slug": "source-page-slug",
|
|
18
|
-
"to_slug": "target-page-slug",
|
|
19
|
-
"anchor_text": "exact phrase in source page to link"
|
|
20
|
-
}
|
|
21
|
-
]
|
|
22
|
-
}
|
|
23
|
-
|
|
24
|
-
Rules:
|
|
25
|
-
- anchor_text MUST be an exact phrase found in the source page content
|
|
26
|
-
- Only link genuinely related concepts
|
|
27
|
-
- 3-8 links per page where meaningful
|
|
28
|
-
- Do NOT link a page to itself`;
|
|
29
|
-
|
|
30
|
-
export async function llmLinkPages(store: Store): Promise<number> {
|
|
31
|
-
const pages = store.listPages();
|
|
32
|
-
if (pages.length < 2) return 0;
|
|
33
|
-
|
|
34
|
-
const batchSize = 30;
|
|
35
|
-
let totalLinks = 0;
|
|
36
|
-
|
|
37
|
-
for (let i = 0; i < pages.length; i += batchSize) {
|
|
38
|
-
const batch = pages.slice(i, i + batchSize);
|
|
39
|
-
const pagesText = batch
|
|
40
|
-
.map(p => `${p.slug} | ${p.title} | ${p.content.slice(0, 300).replace(/\n/g, " ")}`)
|
|
41
|
-
.join("\n");
|
|
42
|
-
|
|
43
|
-
try {
|
|
44
|
-
const raw = await chatComplete(LINK_SYSTEM, LINK_PROMPT.replace("{pages}", pagesText), 8192);
|
|
45
|
-
let cleaned = raw.replace(/^```json?\n?/m, "").replace(/\n?```$/m, "").trim();
|
|
46
|
-
|
|
47
|
-
let result: { links: Array<{ from_slug: string; to_slug: string; anchor_text: string }> };
|
|
48
|
-
try {
|
|
49
|
-
result = JSON.parse(cleaned);
|
|
50
|
-
} catch {
|
|
51
|
-
// Try to repair truncated JSON
|
|
52
|
-
cleaned = cleaned.replace(/,?\s*$/, "]}");
|
|
53
|
-
try {
|
|
54
|
-
result = JSON.parse(cleaned);
|
|
55
|
-
} catch {
|
|
56
|
-
console.log(` \x1b[33m⚠ 링크 JSON 파싱 실패\x1b[0m`);
|
|
57
|
-
continue;
|
|
58
|
-
}
|
|
59
|
-
}
|
|
60
|
-
|
|
61
|
-
const slugToPage = new Map(pages.map(p => [p.slug, p]));
|
|
62
|
-
|
|
63
|
-
for (const link of result.links) {
|
|
64
|
-
const fromPage = slugToPage.get(link.from_slug);
|
|
65
|
-
const toPage = slugToPage.get(link.to_slug);
|
|
66
|
-
if (!fromPage || !toPage || fromPage.id === toPage.id) continue;
|
|
67
|
-
|
|
68
|
-
const anchor = link.anchor_text;
|
|
69
|
-
if (anchor && fromPage.content.includes(anchor) && !fromPage.content.includes(`[${anchor}]`)) {
|
|
70
|
-
const linkedText = `[${anchor}](/wiki/${link.to_slug})`;
|
|
71
|
-
const newContent = fromPage.content.replace(anchor, linkedText);
|
|
72
|
-
store.updatePageContent(fromPage.id, newContent);
|
|
73
|
-
fromPage.content = newContent;
|
|
74
|
-
store.addLink(fromPage.id, toPage.id, anchor);
|
|
75
|
-
totalLinks++;
|
|
76
|
-
}
|
|
77
|
-
}
|
|
78
|
-
} catch (e: any) {
|
|
79
|
-
console.log(` \x1b[31m링크 생성 실패: ${e.message}\x1b[0m`);
|
|
80
|
-
}
|
|
81
|
-
}
|
|
82
|
-
|
|
83
|
-
return totalLinks;
|
|
84
|
-
}
|