@open330/kiwimu 0.8.0 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +105 -27
- package/package.json +1 -1
- package/src/build/renderer.ts +272 -32
- package/src/build/static/dynamic-qa.js +423 -0
- package/src/build/static/edit-page.js +58 -0
- package/src/build/static/peek-panel.css +201 -0
- package/src/build/static/peek-panel.js +470 -0
- package/src/build/static/search.js +30 -15
- package/src/build/static/style.css +821 -6
- package/src/build/templates.ts +700 -48
- package/src/config.ts +41 -3
- package/src/demo/sample-data.ts +69 -2
- package/src/demo/setup.ts +25 -6
- package/src/expand/llm.ts +2 -2
- package/src/index.ts +467 -60
- package/src/ingest/docx.ts +1 -1
- package/src/ingest/markdown.ts +21 -0
- package/src/ingest/pdf.ts +4 -2
- package/src/llm-client.ts +63 -69
- package/src/pipeline/citations.ts +107 -0
- package/src/pipeline/llm-chunker.ts +277 -131
- package/src/pipeline/standardizer.ts +41 -0
- package/src/server.ts +465 -32
- package/src/services/dynamic-qa.ts +190 -0
- package/src/services/embedding.ts +122 -0
- package/src/services/index-generator.ts +185 -0
- package/src/services/ingest.ts +83 -25
- package/src/services/lint.ts +249 -0
- package/src/services/promote.ts +150 -0
- package/src/store.test.ts +11 -0
- package/src/store.ts +561 -28
- package/src/utils.ts +30 -0
package/src/index.ts
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
import { Command } from "commander";
|
|
4
4
|
import { join } from "path";
|
|
5
|
-
import { CONFIG_FILE, DB_FILE, defaultConfig, findProjectRoot, getActivePersona, loadConfig, saveConfig } from "./config";
|
|
5
|
+
import { CONFIG_FILE, DB_FILE, SUPPORTED_EXTENSIONS, defaultConfig, findProjectRoot, getActivePersona, loadConfig, saveConfig } from "./config";
|
|
6
6
|
import { Store } from "./store";
|
|
7
7
|
|
|
8
8
|
const program = new Command()
|
|
@@ -43,14 +43,16 @@ program
|
|
|
43
43
|
console.log(`\x1b[32m✅ ${count}개 페이지가 빌드되었습니다!\x1b[0m`);
|
|
44
44
|
|
|
45
45
|
const { startServer } = await import("./server");
|
|
46
|
-
|
|
47
|
-
|
|
46
|
+
const demoPort = parseInt(process.env.KIWI_PORT || '8000', 10);
|
|
47
|
+
console.log(`🎉 데모 위키가 준비되었습니다! http://localhost:${demoPort} 에서 확인하세요`);
|
|
48
|
+
startServer(root, demoPort, "localhost");
|
|
48
49
|
return;
|
|
49
50
|
}
|
|
50
51
|
|
|
51
52
|
if (Bun.file(join(root, CONFIG_FILE)).size > 0) {
|
|
52
53
|
try {
|
|
53
|
-
|
|
54
|
+
const { accessSync } = await import("fs");
|
|
55
|
+
accessSync(join(root, CONFIG_FILE));
|
|
54
56
|
console.log("\x1b[33m이미 초기화된 프로젝트입니다.\x1b[0m");
|
|
55
57
|
return;
|
|
56
58
|
} catch {}
|
|
@@ -81,13 +83,13 @@ program
|
|
|
81
83
|
p.text({
|
|
82
84
|
message: "모델명",
|
|
83
85
|
placeholder:
|
|
84
|
-
results.provider === "gemini" ? "gemini-
|
|
85
|
-
results.provider === "azure-openai" ? "gpt-5-nano" :
|
|
86
|
-
results.provider === "openai" ? "gpt-
|
|
86
|
+
results.provider === "gemini" ? "gemini-3.1-flash-lite-preview" :
|
|
87
|
+
results.provider === "azure-openai" ? "gpt-5.4-nano" :
|
|
88
|
+
results.provider === "openai" ? "gpt-5.4-nano" : "claude-sonnet-4-6",
|
|
87
89
|
initialValue:
|
|
88
|
-
results.provider === "gemini" ? "gemini-
|
|
89
|
-
results.provider === "azure-openai" ? "gpt-5-nano" :
|
|
90
|
-
results.provider === "openai" ? "gpt-
|
|
90
|
+
results.provider === "gemini" ? "gemini-3.1-flash-lite-preview" :
|
|
91
|
+
results.provider === "azure-openai" ? "gpt-5.4-nano" :
|
|
92
|
+
results.provider === "openai" ? "gpt-5.4-nano" : "claude-sonnet-4-6",
|
|
91
93
|
}),
|
|
92
94
|
apiKey: () =>
|
|
93
95
|
p.password({
|
|
@@ -122,13 +124,14 @@ program
|
|
|
122
124
|
// --- add ---
|
|
123
125
|
program
|
|
124
126
|
.command("add <source>")
|
|
125
|
-
.description("URL 또는
|
|
127
|
+
.description("URL, 파일, 또는 디렉토리를 추가합니다 (PDF, DOCX, PPTX, DOC, PPT, KEY, RTF, MD)")
|
|
126
128
|
.action(async (source: string) => {
|
|
127
129
|
const root = findProjectRoot();
|
|
128
130
|
const config = loadConfig(root);
|
|
129
131
|
const persona = getActivePersona(config);
|
|
130
132
|
const store = new Store(join(root, DB_FILE));
|
|
131
133
|
try {
|
|
134
|
+
const schema = config.schema;
|
|
132
135
|
const isUrl = source.startsWith("http://") || source.startsWith("https://");
|
|
133
136
|
|
|
134
137
|
if (isUrl) {
|
|
@@ -136,29 +139,58 @@ program
|
|
|
136
139
|
validateUrl(source);
|
|
137
140
|
console.log(`\x1b[34m📥 URL 가져오는 중: ${source}\x1b[0m`);
|
|
138
141
|
const { ingestUrl } = await import("./services/ingest");
|
|
139
|
-
const result = await ingestUrl(root, store, source, config.llm, persona, (s) => console.log(` ${s}`));
|
|
142
|
+
const result = await ingestUrl(root, store, source, config.llm, persona, (s) => console.log(` ${s}`), schema);
|
|
140
143
|
console.log(`\x1b[32m✅ 📖 ${result.sourceCount}개 원본 + 📝 ${result.conceptCount}개 개념 문서 생성\x1b[0m`);
|
|
141
144
|
console.log(`\x1b[34m📊 LLM: ${result.usage.totalCalls}회 호출, ~$${result.usage.estimatedCostUsd.toFixed(4)}\x1b[0m`);
|
|
142
145
|
} else {
|
|
143
|
-
const { resolve } = await import("path");
|
|
146
|
+
const { resolve, basename } = await import("path");
|
|
144
147
|
const absPath = resolve(source);
|
|
145
|
-
const
|
|
146
|
-
|
|
148
|
+
const { statSync, readdirSync } = await import("fs");
|
|
149
|
+
|
|
150
|
+
let stat;
|
|
151
|
+
try {
|
|
152
|
+
stat = statSync(absPath);
|
|
153
|
+
} catch {
|
|
147
154
|
console.error(`\x1b[31m❌ 파일을 찾을 수 없습니다: ${source}\x1b[0m`);
|
|
148
155
|
process.exit(1);
|
|
149
156
|
}
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
157
|
+
|
|
158
|
+
if (stat.isDirectory()) {
|
|
159
|
+
// Find all .md files in directory
|
|
160
|
+
const mdFiles = readdirSync(absPath)
|
|
161
|
+
.filter(f => f.endsWith('.md'))
|
|
162
|
+
.map(f => join(absPath, f));
|
|
163
|
+
|
|
164
|
+
if (mdFiles.length === 0) {
|
|
165
|
+
console.error("디렉토리에 .md 파일이 없습니다");
|
|
166
|
+
process.exit(1);
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
console.log(`📂 ${mdFiles.length}개 마크다운 파일 발견`);
|
|
170
|
+
const { ingestFile } = await import("./services/ingest");
|
|
171
|
+
for (const mdFile of mdFiles) {
|
|
172
|
+
console.log(`\x1b[34m📥 파일 처리 중: ${basename(mdFile)}\x1b[0m`);
|
|
173
|
+
const result = await ingestFile(root, store, mdFile, basename(mdFile), config.llm, persona, (s) => console.log(` ${s}`), schema);
|
|
174
|
+
console.log(`\x1b[32m✅ 📖 ${result.sourceCount}개 원본 + 📝 ${result.conceptCount}개 개념\x1b[0m`);
|
|
175
|
+
}
|
|
176
|
+
} else {
|
|
177
|
+
const file = Bun.file(absPath);
|
|
178
|
+
if (!(await file.exists())) {
|
|
179
|
+
console.error(`\x1b[31m❌ 파일을 찾을 수 없습니다: ${source}\x1b[0m`);
|
|
180
|
+
process.exit(1);
|
|
181
|
+
}
|
|
182
|
+
const ext = source.split(".").pop()?.toLowerCase() || "";
|
|
183
|
+
if (!SUPPORTED_EXTENSIONS.includes(ext)) {
|
|
184
|
+
console.error(`\x1b[31m❌ 지원하지 않는 파일 형식입니다: .${ext}\x1b[0m`);
|
|
185
|
+
console.error(` 지원 형식: ${SUPPORTED_EXTENSIONS.join(', ')}`);
|
|
186
|
+
process.exit(1);
|
|
187
|
+
}
|
|
188
|
+
console.log(`\x1b[34m📥 파일 처리 중: ${source}\x1b[0m`);
|
|
189
|
+
const { ingestFile } = await import("./services/ingest");
|
|
190
|
+
const result = await ingestFile(root, store, absPath, source, config.llm, persona, (s) => console.log(` ${s}`), schema);
|
|
191
|
+
console.log(`\x1b[32m✅ 📖 ${result.sourceCount}개 원본 + 📝 ${result.conceptCount}개 개념 문서 생성\x1b[0m`);
|
|
192
|
+
console.log(`\x1b[34m📊 LLM: ${result.usage.totalCalls}회 호출, ~$${result.usage.estimatedCostUsd.toFixed(4)}\x1b[0m`);
|
|
156
193
|
}
|
|
157
|
-
console.log(`\x1b[34m📥 파일 처리 중: ${source}\x1b[0m`);
|
|
158
|
-
const { ingestFile } = await import("./services/ingest");
|
|
159
|
-
const result = await ingestFile(root, store, absPath, source, config.llm, persona, (s) => console.log(` ${s}`));
|
|
160
|
-
console.log(`\x1b[32m✅ 📖 ${result.sourceCount}개 원본 + 📝 ${result.conceptCount}개 개념 문서 생성\x1b[0m`);
|
|
161
|
-
console.log(`\x1b[34m📊 LLM: ${result.usage.totalCalls}회 호출, ~$${result.usage.estimatedCostUsd.toFixed(4)}\x1b[0m`);
|
|
162
194
|
}
|
|
163
195
|
} catch (e: unknown) {
|
|
164
196
|
const message = e instanceof Error ? e.message : String(e);
|
|
@@ -216,6 +248,10 @@ program
|
|
|
216
248
|
const { autoLinkPages } = await import("./pipeline/linker");
|
|
217
249
|
const linkCount = autoLinkPages(store);
|
|
218
250
|
console.log(`\x1b[32m✅ 확장 완료! (${linkCount}개 링크 갱신)\x1b[0m`);
|
|
251
|
+
} catch (e: unknown) {
|
|
252
|
+
const message = e instanceof Error ? e.message : String(e);
|
|
253
|
+
console.error(`\x1b[31m❌ ${message}\x1b[0m`);
|
|
254
|
+
process.exit(1);
|
|
219
255
|
} finally {
|
|
220
256
|
store.close();
|
|
221
257
|
}
|
|
@@ -235,6 +271,23 @@ program
|
|
|
235
271
|
const count = await buildSite(store, config, root);
|
|
236
272
|
console.log(`\x1b[32m✅ ${count}개 페이지가 빌드되었습니다!\x1b[0m`);
|
|
237
273
|
console.log(` 출력: ${join(root, config.build.output_dir)}/`);
|
|
274
|
+
|
|
275
|
+
// Generate embeddings (optional — uses [embedding] config or falls back to [llm])
|
|
276
|
+
try {
|
|
277
|
+
const embConfig = config.embedding
|
|
278
|
+
? { ...config.llm, provider: config.embedding.provider, api_key: config.embedding.api_key }
|
|
279
|
+
: config.llm;
|
|
280
|
+
if (embConfig.api_key && embConfig.provider !== "demo") {
|
|
281
|
+
const { generateMissingEmbeddings } = await import("./services/embedding");
|
|
282
|
+
await generateMissingEmbeddings(store, embConfig, (msg) => console.log(msg));
|
|
283
|
+
}
|
|
284
|
+
} catch (e: unknown) {
|
|
285
|
+
console.log(` ⚠ 임베딩 생성 건너뜀: ${e instanceof Error ? e.message : String(e)}`);
|
|
286
|
+
}
|
|
287
|
+
} catch (e: unknown) {
|
|
288
|
+
const message = e instanceof Error ? e.message : String(e);
|
|
289
|
+
console.error(`\x1b[31m❌ ${message}\x1b[0m`);
|
|
290
|
+
process.exit(1);
|
|
238
291
|
} finally {
|
|
239
292
|
store.close();
|
|
240
293
|
}
|
|
@@ -257,31 +310,41 @@ program
|
|
|
257
310
|
console.log("\x1b[34m🔨 빌드 중...\x1b[0m");
|
|
258
311
|
const count = await buildSite(store, config, root);
|
|
259
312
|
console.log(`\x1b[32m ${count}개 페이지 빌드 완료\x1b[0m`);
|
|
313
|
+
} catch (e: unknown) {
|
|
314
|
+
const message = e instanceof Error ? e.message : String(e);
|
|
315
|
+
console.error(`\x1b[31m❌ 빌드 실패: ${message}\x1b[0m`);
|
|
316
|
+
process.exit(1);
|
|
260
317
|
} finally {
|
|
261
318
|
store.close();
|
|
262
319
|
}
|
|
263
320
|
|
|
264
|
-
|
|
321
|
+
try {
|
|
322
|
+
console.log(`\x1b[34m🚀 ${opts.target}에 배포 중...\x1b[0m`);
|
|
265
323
|
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
324
|
+
if (opts.target === "gh-pages") {
|
|
325
|
+
const { deployGhPages } = await import("./deploy");
|
|
326
|
+
await deployGhPages(siteDir, opts.message);
|
|
327
|
+
console.log("\x1b[32m✅ GitHub Pages에 배포되었습니다!\x1b[0m");
|
|
328
|
+
try {
|
|
329
|
+
const proc = Bun.spawn(["gh", "repo", "view", "--json", "url", "-q", ".url"], { stdout: "pipe" });
|
|
330
|
+
const repoUrl = (await new Response(proc.stdout).text()).trim();
|
|
331
|
+
if (repoUrl) {
|
|
332
|
+
const owner = repoUrl.split("/").slice(-2).join("/").replace("https://github.com/", "");
|
|
333
|
+
const [user, repo] = owner.split("/");
|
|
334
|
+
console.log(` https://${user}.github.io/${repo}/`);
|
|
335
|
+
}
|
|
336
|
+
} catch {}
|
|
337
|
+
} else if (opts.target === "vercel") {
|
|
338
|
+
const { deployVercel } = await import("./deploy");
|
|
339
|
+
await deployVercel(siteDir);
|
|
340
|
+
console.log("\x1b[32m✅ Vercel에 배포되었습니다!\x1b[0m");
|
|
341
|
+
} else {
|
|
342
|
+
console.error(`\x1b[31m❌ 지원하지 않는 배포 대상: ${opts.target}\x1b[0m`);
|
|
343
|
+
process.exit(1);
|
|
344
|
+
}
|
|
345
|
+
} catch (e: unknown) {
|
|
346
|
+
const message = e instanceof Error ? e.message : String(e);
|
|
347
|
+
console.error(`\x1b[31m❌ 배포 실패: ${message}\x1b[0m`);
|
|
285
348
|
process.exit(1);
|
|
286
349
|
}
|
|
287
350
|
});
|
|
@@ -293,20 +356,29 @@ program
|
|
|
293
356
|
.option("-p, --port <port>", "포트 번호", "8000")
|
|
294
357
|
.option("-H, --host <host>", "바인드 주소", "localhost")
|
|
295
358
|
.action(async (opts) => {
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
359
|
+
try {
|
|
360
|
+
const root = findProjectRoot();
|
|
361
|
+
const config = loadConfig(root);
|
|
362
|
+
const siteDir = join(root, config.build.output_dir);
|
|
299
363
|
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
364
|
+
const { existsSync } = await import("fs");
|
|
365
|
+
if (!existsSync(siteDir)) {
|
|
366
|
+
const store = new Store(join(root, DB_FILE));
|
|
367
|
+
try {
|
|
368
|
+
const { buildSite } = await import("./build/renderer");
|
|
369
|
+
await buildSite(store, config, root);
|
|
370
|
+
} finally {
|
|
371
|
+
store.close();
|
|
372
|
+
}
|
|
373
|
+
}
|
|
307
374
|
|
|
308
|
-
|
|
309
|
-
|
|
375
|
+
const { startServer } = await import("./server");
|
|
376
|
+
startServer(root, parseInt(opts.port), opts.host);
|
|
377
|
+
} catch (e: unknown) {
|
|
378
|
+
const message = e instanceof Error ? e.message : String(e);
|
|
379
|
+
console.error(`\x1b[31m❌ ${message}\x1b[0m`);
|
|
380
|
+
process.exit(1);
|
|
381
|
+
}
|
|
310
382
|
});
|
|
311
383
|
|
|
312
384
|
// --- quiz ---
|
|
@@ -367,6 +439,10 @@ program
|
|
|
367
439
|
|
|
368
440
|
store.addQuizAttempt(q.id, isCorrect);
|
|
369
441
|
|
|
442
|
+
// SM-2 spaced repetition update
|
|
443
|
+
const quality = isCorrect ? 4 : 1; // 4=correct with hesitation, 1=wrong
|
|
444
|
+
store.updateQuizSRS(q.id, quality);
|
|
445
|
+
|
|
370
446
|
if (isCorrect) {
|
|
371
447
|
score++;
|
|
372
448
|
console.log(` \x1b[32m✅ 정답!\x1b[0m`);
|
|
@@ -396,6 +472,166 @@ program
|
|
|
396
472
|
}
|
|
397
473
|
|
|
398
474
|
p.outro("학습을 계속하세요! 🥝");
|
|
475
|
+
} catch (e: unknown) {
|
|
476
|
+
const message = e instanceof Error ? e.message : String(e);
|
|
477
|
+
console.error(`\x1b[31m❌ ${message}\x1b[0m`);
|
|
478
|
+
process.exit(1);
|
|
479
|
+
} finally {
|
|
480
|
+
store.close();
|
|
481
|
+
}
|
|
482
|
+
});
|
|
483
|
+
|
|
484
|
+
// --- lint ---
|
|
485
|
+
program
|
|
486
|
+
.command("lint")
|
|
487
|
+
.description("위키 건강 상태를 검사합니다 (orphan pages, dead links, etc.)")
|
|
488
|
+
.action(async () => {
|
|
489
|
+
const root = findProjectRoot();
|
|
490
|
+
const store = new Store(join(root, DB_FILE));
|
|
491
|
+
try {
|
|
492
|
+
const { lintWiki } = await import("./services/lint");
|
|
493
|
+
const report = lintWiki(store);
|
|
494
|
+
|
|
495
|
+
const { summary, issues } = report;
|
|
496
|
+
|
|
497
|
+
console.log(`\n\x1b[1m🔍 Wiki Lint Report\x1b[0m\n`);
|
|
498
|
+
console.log(` Pages: ${summary.total_pages} Links: ${summary.total_links}\n`);
|
|
499
|
+
|
|
500
|
+
if (issues.length === 0) {
|
|
501
|
+
console.log("\x1b[32m ✅ No issues found!\x1b[0m\n");
|
|
502
|
+
} else {
|
|
503
|
+
const errors = issues.filter(i => i.severity === 'error');
|
|
504
|
+
const warnings = issues.filter(i => i.severity === 'warning');
|
|
505
|
+
const infos = issues.filter(i => i.severity === 'info');
|
|
506
|
+
|
|
507
|
+
if (errors.length > 0) {
|
|
508
|
+
console.log(`\x1b[31m ❌ Errors (${errors.length})\x1b[0m`);
|
|
509
|
+
for (const issue of errors) {
|
|
510
|
+
console.log(` \x1b[31m• [${issue.type}] ${issue.message}\x1b[0m`);
|
|
511
|
+
if (issue.suggestion) console.log(` \x1b[2m→ ${issue.suggestion}\x1b[0m`);
|
|
512
|
+
}
|
|
513
|
+
console.log();
|
|
514
|
+
}
|
|
515
|
+
|
|
516
|
+
if (warnings.length > 0) {
|
|
517
|
+
console.log(`\x1b[33m ⚠ Warnings (${warnings.length})\x1b[0m`);
|
|
518
|
+
for (const issue of warnings) {
|
|
519
|
+
console.log(` \x1b[33m• [${issue.type}] ${issue.message}\x1b[0m`);
|
|
520
|
+
if (issue.suggestion) console.log(` \x1b[2m→ ${issue.suggestion}\x1b[0m`);
|
|
521
|
+
}
|
|
522
|
+
console.log();
|
|
523
|
+
}
|
|
524
|
+
|
|
525
|
+
if (infos.length > 0) {
|
|
526
|
+
console.log(`\x1b[36m ℹ Info (${infos.length})\x1b[0m`);
|
|
527
|
+
for (const issue of infos) {
|
|
528
|
+
console.log(` \x1b[36m• [${issue.type}] ${issue.message}\x1b[0m`);
|
|
529
|
+
if (issue.suggestion) console.log(` \x1b[2m→ ${issue.suggestion}\x1b[0m`);
|
|
530
|
+
}
|
|
531
|
+
console.log();
|
|
532
|
+
}
|
|
533
|
+
|
|
534
|
+
console.log(`\x1b[1m Summary: \x1b[31m${summary.errors} errors\x1b[0m, \x1b[33m${summary.warnings} warnings\x1b[0m, \x1b[36m${summary.info} info\x1b[0m\n`);
|
|
535
|
+
}
|
|
536
|
+
|
|
537
|
+
if (summary.errors > 0) process.exit(1);
|
|
538
|
+
} catch (e: unknown) {
|
|
539
|
+
const message = e instanceof Error ? e.message : String(e);
|
|
540
|
+
console.error(`\x1b[31m❌ ${message}\x1b[0m`);
|
|
541
|
+
process.exit(1);
|
|
542
|
+
} finally {
|
|
543
|
+
store.close();
|
|
544
|
+
}
|
|
545
|
+
});
|
|
546
|
+
|
|
547
|
+
// --- cite (backfill citations) ---
|
|
548
|
+
program
|
|
549
|
+
.command("cite")
|
|
550
|
+
.description("기존 개념 페이지에 대해 인용 정보를 역추적합니다 (LLM 호출 필요)")
|
|
551
|
+
.option("--dry-run", "실제 DB에 저장하지 않고 결과만 표시")
|
|
552
|
+
.action(async (opts: { dryRun?: boolean }) => {
|
|
553
|
+
const root = findProjectRoot();
|
|
554
|
+
const config = loadConfig(root);
|
|
555
|
+
const store = new Store(join(root, DB_FILE));
|
|
556
|
+
try {
|
|
557
|
+
const conceptPages = store.listConceptPages();
|
|
558
|
+
const sourcePages = store.listSourcePages();
|
|
559
|
+
|
|
560
|
+
if (conceptPages.length === 0) {
|
|
561
|
+
console.log("\x1b[33m개념 페이지가 없습니다.\x1b[0m");
|
|
562
|
+
return;
|
|
563
|
+
}
|
|
564
|
+
if (sourcePages.length === 0) {
|
|
565
|
+
console.log("\x1b[33m원본 페이지가 없습니다.\x1b[0m");
|
|
566
|
+
return;
|
|
567
|
+
}
|
|
568
|
+
if (!config.llm.api_key || config.llm.provider === "demo") {
|
|
569
|
+
console.error("\x1b[31m❌ LLM API 키가 필요합니다.\x1b[0m");
|
|
570
|
+
process.exit(1);
|
|
571
|
+
}
|
|
572
|
+
|
|
573
|
+
const { LLMClient } = await import("./llm-client");
|
|
574
|
+
const llmClient = new LLMClient(config.llm);
|
|
575
|
+
|
|
576
|
+
const sourcePageList = sourcePages.map(p => `- ${p.title} [slug: ${p.slug}]`).join("\n");
|
|
577
|
+
|
|
578
|
+
console.log(`\x1b[34m📚 ${conceptPages.length}개 개념 페이지에 대해 인용 역추적 시작...\x1b[0m`);
|
|
579
|
+
console.log(` 원본 페이지: ${sourcePages.length}개\n`);
|
|
580
|
+
|
|
581
|
+
let totalCitations = 0;
|
|
582
|
+
|
|
583
|
+
for (let i = 0; i < conceptPages.length; i++) {
|
|
584
|
+
const page = conceptPages[i];
|
|
585
|
+
console.log(` [${i + 1}/${conceptPages.length}] ${page.title}...`);
|
|
586
|
+
|
|
587
|
+
const system = `You analyze wiki content and identify which source pages each claim comes from.
|
|
588
|
+
Return valid JSON only. No markdown fences.`;
|
|
589
|
+
|
|
590
|
+
const prompt = `Given this concept page content and a list of source pages, identify which source pages each major claim or fact comes from.
|
|
591
|
+
|
|
592
|
+
Concept page: "${page.title}"
|
|
593
|
+
Content:
|
|
594
|
+
${page.content.slice(0, 3000)}
|
|
595
|
+
|
|
596
|
+
Available source pages:
|
|
597
|
+
${sourcePageList}
|
|
598
|
+
|
|
599
|
+
Return a JSON array of citation matches:
|
|
600
|
+
[{"source_page_slug": "the-slug", "excerpt": "brief relevant quote or claim from the concept page (max 150 chars)"}]
|
|
601
|
+
|
|
602
|
+
Only include matches where you are confident the content derives from that source. Return an empty array [] if no clear matches.`;
|
|
603
|
+
|
|
604
|
+
try {
|
|
605
|
+
const raw = await llmClient.chatComplete(system, prompt, 2048);
|
|
606
|
+
let cleaned = raw.replace(/^```json?\n?/m, "").replace(/\n?```\s*$/m, "").trim();
|
|
607
|
+
const matches = JSON.parse(cleaned) as Array<{ source_page_slug: string; excerpt?: string }>;
|
|
608
|
+
|
|
609
|
+
for (const match of matches) {
|
|
610
|
+
const sourcePage = store.getPage(match.source_page_slug);
|
|
611
|
+
if (!sourcePage || !sourcePage.source_id) continue;
|
|
612
|
+
|
|
613
|
+
if (!opts.dryRun) {
|
|
614
|
+
store.addCitation(page.id, sourcePage.source_id, sourcePage.id, match.excerpt || null, null);
|
|
615
|
+
}
|
|
616
|
+
totalCitations++;
|
|
617
|
+
console.log(` → ${sourcePage.title}${match.excerpt ? ': "' + match.excerpt.slice(0, 60) + '..."' : ''}`);
|
|
618
|
+
}
|
|
619
|
+
} catch (e: unknown) {
|
|
620
|
+
const message = e instanceof Error ? e.message : String(e);
|
|
621
|
+
console.log(` \x1b[33m⚠ 실패: ${message}\x1b[0m`);
|
|
622
|
+
}
|
|
623
|
+
}
|
|
624
|
+
|
|
625
|
+
if (opts.dryRun) {
|
|
626
|
+
console.log(`\n\x1b[33m🔍 DRY RUN: ${totalCitations}개 인용 발견 (저장하지 않음)\x1b[0m`);
|
|
627
|
+
} else {
|
|
628
|
+
console.log(`\n\x1b[32m✅ ${totalCitations}개 인용 정보가 생성되었습니다.\x1b[0m`);
|
|
629
|
+
console.log(` 인용 현황: kiwimu serve 후 /provenance 페이지에서 확인`);
|
|
630
|
+
}
|
|
631
|
+
} catch (e: unknown) {
|
|
632
|
+
const message = e instanceof Error ? e.message : String(e);
|
|
633
|
+
console.error(`\x1b[31m❌ ${message}\x1b[0m`);
|
|
634
|
+
process.exit(1);
|
|
399
635
|
} finally {
|
|
400
636
|
store.close();
|
|
401
637
|
}
|
|
@@ -413,13 +649,13 @@ program
|
|
|
413
649
|
const sources = store.listSources();
|
|
414
650
|
const sourcePages = store.listSourcePages();
|
|
415
651
|
const conceptPages = store.listConceptPages();
|
|
416
|
-
const
|
|
652
|
+
const linkCount = store.countLinks();
|
|
417
653
|
|
|
418
654
|
console.log(`\n\x1b[1m🥝 ${config.project.name}\x1b[0m\n`);
|
|
419
655
|
console.log(` 소스 ${sources.length}`);
|
|
420
656
|
console.log(` 📖 원본 ${sourcePages.length}`);
|
|
421
657
|
console.log(` 📝 개념 ${conceptPages.length}`);
|
|
422
|
-
console.log(` 🔗 링크 ${
|
|
658
|
+
console.log(` 🔗 링크 ${linkCount}`);
|
|
423
659
|
console.log(` 빌드 ${config.build.output_dir}`);
|
|
424
660
|
console.log(` 배포 ${config.deploy.target}`);
|
|
425
661
|
|
|
@@ -436,9 +672,180 @@ program
|
|
|
436
672
|
}
|
|
437
673
|
}
|
|
438
674
|
console.log();
|
|
675
|
+
} catch (e: unknown) {
|
|
676
|
+
const message = e instanceof Error ? e.message : String(e);
|
|
677
|
+
console.error(`\x1b[31m❌ ${message}\x1b[0m`);
|
|
678
|
+
process.exit(1);
|
|
439
679
|
} finally {
|
|
440
680
|
store.close();
|
|
441
681
|
}
|
|
442
682
|
});
|
|
443
683
|
|
|
684
|
+
// --- log ---
|
|
685
|
+
program
|
|
686
|
+
.command("log")
|
|
687
|
+
.description("활동 로그를 표시합니다")
|
|
688
|
+
.option("-n, --count <count>", "표시할 항목 수", "20")
|
|
689
|
+
.option("--action <action>", "액션으로 필터링 (ingest, page_created, quiz_attempted, query 등)")
|
|
690
|
+
.action((opts) => {
|
|
691
|
+
const root = findProjectRoot();
|
|
692
|
+
const store = new Store(join(root, DB_FILE));
|
|
693
|
+
try {
|
|
694
|
+
const limit = parseInt(opts.count) || 20;
|
|
695
|
+
const entries = store.getActivityLog(limit, 0, opts.action || undefined);
|
|
696
|
+
if (entries.length === 0) {
|
|
697
|
+
console.log("\x1b[33m활동 로그가 없습니다.\x1b[0m");
|
|
698
|
+
return;
|
|
699
|
+
}
|
|
700
|
+
for (const e of entries) {
|
|
701
|
+
const action = e.action.toUpperCase().padEnd(15);
|
|
702
|
+
console.log(`\x1b[2m[${e.created_at}]\x1b[0m \x1b[36m[${action}]\x1b[0m ${e.title}`);
|
|
703
|
+
}
|
|
704
|
+
const stats = store.getActivityStats();
|
|
705
|
+
console.log(`\n\x1b[2m총 ${stats.total}건\x1b[0m`);
|
|
706
|
+
} finally {
|
|
707
|
+
store.close();
|
|
708
|
+
}
|
|
709
|
+
});
|
|
710
|
+
|
|
711
|
+
// --- schema ---
|
|
712
|
+
program
|
|
713
|
+
.command("schema")
|
|
714
|
+
.description("스키마 설정을 관리합니다")
|
|
715
|
+
.option("--init", "기본 [schema] 섹션을 kiwi.toml에 추가합니다")
|
|
716
|
+
.option("--validate", "기존 페이지가 스키마 규칙에 부합하는지 확인합니다")
|
|
717
|
+
.action(async (opts: { init?: boolean; validate?: boolean }) => {
|
|
718
|
+
if (opts.init) {
|
|
719
|
+
// Generate default schema section and append to kiwi.toml
|
|
720
|
+
const root = findProjectRoot();
|
|
721
|
+
const { readFileSync, writeFileSync } = await import("fs");
|
|
722
|
+
const configPath = join(root, CONFIG_FILE);
|
|
723
|
+
const existing = readFileSync(configPath, "utf-8");
|
|
724
|
+
|
|
725
|
+
if (existing.includes("[schema]")) {
|
|
726
|
+
console.log("\x1b[33m[schema] 섹션이 이미 존재합니다.\x1b[0m");
|
|
727
|
+
return;
|
|
728
|
+
}
|
|
729
|
+
|
|
730
|
+
const defaultSchema = `
|
|
731
|
+
[schema]
|
|
732
|
+
# Wiki structure rules
|
|
733
|
+
categories = ["Fundamentals", "Advanced Topics", "Applications", "History", "People"]
|
|
734
|
+
# Naming conventions: 'noun_phrase', 'question', 'topic'
|
|
735
|
+
naming_convention = "noun_phrase"
|
|
736
|
+
# Content length rules (characters)
|
|
737
|
+
min_page_length = 200
|
|
738
|
+
max_page_length = 3000
|
|
739
|
+
|
|
740
|
+
[schema.terms]
|
|
741
|
+
# Term standardization: abbreviation = "Standard Form"
|
|
742
|
+
# "ML" = "Machine Learning"
|
|
743
|
+
# "DL" = "Deep Learning"
|
|
744
|
+
|
|
745
|
+
[schema.page_template]
|
|
746
|
+
sections = ["Definition", "Explanation", "Examples", "Related Concepts"]
|
|
747
|
+
`;
|
|
748
|
+
writeFileSync(configPath, existing.trimEnd() + "\n" + defaultSchema);
|
|
749
|
+
console.log("\x1b[32m[schema] 섹션이 kiwi.toml에 추가되었습니다.\x1b[0m");
|
|
750
|
+
console.log(" 필요에 맞게 수정해주세요.");
|
|
751
|
+
return;
|
|
752
|
+
}
|
|
753
|
+
|
|
754
|
+
if (opts.validate) {
|
|
755
|
+
const root = findProjectRoot();
|
|
756
|
+
const config = loadConfig(root);
|
|
757
|
+
const schema = config.schema;
|
|
758
|
+
|
|
759
|
+
if (!schema) {
|
|
760
|
+
console.log("\x1b[33m스키마가 정의되지 않았습니다. 'kiwimu schema --init'으로 생성하세요.\x1b[0m");
|
|
761
|
+
return;
|
|
762
|
+
}
|
|
763
|
+
|
|
764
|
+
const store = new Store(join(root, DB_FILE));
|
|
765
|
+
try {
|
|
766
|
+
const pages = store.listPages();
|
|
767
|
+
let issueCount = 0;
|
|
768
|
+
|
|
769
|
+
for (const page of pages) {
|
|
770
|
+
const issues: string[] = [];
|
|
771
|
+
|
|
772
|
+
// Check min length
|
|
773
|
+
if (schema.min_page_length && page.content.length < schema.min_page_length) {
|
|
774
|
+
issues.push(`길이 ${page.content.length}자 < 최소 ${schema.min_page_length}자`);
|
|
775
|
+
}
|
|
776
|
+
// Check max length
|
|
777
|
+
if (schema.max_page_length && page.content.length > schema.max_page_length) {
|
|
778
|
+
issues.push(`길이 ${page.content.length}자 > 최대 ${schema.max_page_length}자`);
|
|
779
|
+
}
|
|
780
|
+
// Check required sections
|
|
781
|
+
if (schema.page_template?.sections?.length && page.page_type === "concept") {
|
|
782
|
+
for (const section of schema.page_template.sections) {
|
|
783
|
+
const sectionPattern = new RegExp(`^##\\s+${section.replace(/[.*+?^${}()|[\]\\]/g, "\\$&")}`, "mi");
|
|
784
|
+
if (!sectionPattern.test(page.content)) {
|
|
785
|
+
issues.push(`누락된 섹션: "${section}"`);
|
|
786
|
+
}
|
|
787
|
+
}
|
|
788
|
+
}
|
|
789
|
+
// Check category assignment
|
|
790
|
+
if (schema.categories?.length && page.page_type === "concept" && !page.category) {
|
|
791
|
+
issues.push("카테고리 미지정");
|
|
792
|
+
}
|
|
793
|
+
|
|
794
|
+
if (issues.length > 0) {
|
|
795
|
+
issueCount += issues.length;
|
|
796
|
+
console.log(`\x1b[33m ${page.title}\x1b[0m (${page.slug})`);
|
|
797
|
+
for (const issue of issues) {
|
|
798
|
+
console.log(` - ${issue}`);
|
|
799
|
+
}
|
|
800
|
+
}
|
|
801
|
+
}
|
|
802
|
+
|
|
803
|
+
if (issueCount === 0) {
|
|
804
|
+
console.log("\x1b[32m모든 페이지가 스키마 규칙에 부합합니다.\x1b[0m");
|
|
805
|
+
} else {
|
|
806
|
+
console.log(`\n\x1b[33m총 ${issueCount}개 이슈 발견 (${pages.length}개 페이지 검사)\x1b[0m`);
|
|
807
|
+
}
|
|
808
|
+
} finally {
|
|
809
|
+
store.close();
|
|
810
|
+
}
|
|
811
|
+
return;
|
|
812
|
+
}
|
|
813
|
+
|
|
814
|
+
// Default: display current schema settings
|
|
815
|
+
const root = findProjectRoot();
|
|
816
|
+
const config = loadConfig(root);
|
|
817
|
+
const schema = config.schema;
|
|
818
|
+
|
|
819
|
+
if (!schema) {
|
|
820
|
+
console.log("\x1b[33m스키마가 정의되지 않았습니다.\x1b[0m");
|
|
821
|
+
console.log(" 'kiwimu schema --init'으로 기본 스키마를 생성하세요.");
|
|
822
|
+
return;
|
|
823
|
+
}
|
|
824
|
+
|
|
825
|
+
console.log("\n\x1b[1m[schema] 설정:\x1b[0m\n");
|
|
826
|
+
|
|
827
|
+
if (schema.categories?.length) {
|
|
828
|
+
console.log(` 카테고리: ${schema.categories.join(", ")}`);
|
|
829
|
+
}
|
|
830
|
+
if (schema.naming_convention) {
|
|
831
|
+
console.log(` 명명 규칙: ${schema.naming_convention}`);
|
|
832
|
+
}
|
|
833
|
+
if (schema.min_page_length != null) {
|
|
834
|
+
console.log(` 최소 페이지 길이: ${schema.min_page_length}자`);
|
|
835
|
+
}
|
|
836
|
+
if (schema.max_page_length != null) {
|
|
837
|
+
console.log(` 최대 페이지 길이: ${schema.max_page_length}자`);
|
|
838
|
+
}
|
|
839
|
+
if (schema.terms && Object.keys(schema.terms).length > 0) {
|
|
840
|
+
console.log(` 용어 표준화:`);
|
|
841
|
+
for (const [abbrev, standard] of Object.entries(schema.terms)) {
|
|
842
|
+
console.log(` ${abbrev} -> ${standard}`);
|
|
843
|
+
}
|
|
844
|
+
}
|
|
845
|
+
if (schema.page_template?.sections?.length) {
|
|
846
|
+
console.log(` 페이지 템플릿 섹션: ${schema.page_template.sections.join(", ")}`);
|
|
847
|
+
}
|
|
848
|
+
console.log();
|
|
849
|
+
});
|
|
850
|
+
|
|
444
851
|
program.parse();
|
package/src/ingest/docx.ts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
export async function extractTextFromDocx(filePath: string): Promise<{ title: string; text: string }> {
|
|
2
|
-
const mammoth =
|
|
2
|
+
const mammoth = await import("mammoth");
|
|
3
3
|
const result = await mammoth.extractRawText({ path: filePath });
|
|
4
4
|
const text: string = result.value;
|
|
5
5
|
const title = filePath.split("/").pop()?.replace(/\.docx?$/i, "") || "Untitled";
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
import { readFileSync } from "fs";
|
|
2
|
+
import { basename } from "path";
|
|
3
|
+
|
|
4
|
+
export function extractTextFromMarkdown(filePath: string): { title: string; text: string } {
|
|
5
|
+
const content = readFileSync(filePath, "utf-8");
|
|
6
|
+
|
|
7
|
+
// Remove YAML frontmatter if present
|
|
8
|
+
let text = content;
|
|
9
|
+
if (text.startsWith("---")) {
|
|
10
|
+
const endIndex = text.indexOf("---", 3);
|
|
11
|
+
if (endIndex !== -1) {
|
|
12
|
+
text = text.slice(endIndex + 3).trim();
|
|
13
|
+
}
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
// Extract title from first # heading
|
|
17
|
+
const titleMatch = text.match(/^#\s+(.+)$/m);
|
|
18
|
+
const title = titleMatch ? titleMatch[1].trim() : basename(filePath, ".md");
|
|
19
|
+
|
|
20
|
+
return { title, text };
|
|
21
|
+
}
|