@open330/kiwimu 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +230 -0
- package/assets/logos/logo_2_minimalist_icon.png +0 -0
- package/assets/logos/logo_2_minimalist_icon_transparent.png +0 -0
- package/package.json +62 -0
- package/src/build/renderer.ts +128 -0
- package/src/build/static/graph.js +114 -0
- package/src/build/static/search.js +66 -0
- package/src/build/static/style.css +853 -0
- package/src/build/templates.ts +616 -0
- package/src/config.ts +54 -0
- package/src/deploy.ts +32 -0
- package/src/expand/llm.ts +63 -0
- package/src/index.ts +615 -0
- package/src/ingest/docx.ts +15 -0
- package/src/ingest/legacy.ts +66 -0
- package/src/ingest/pdf.ts +14 -0
- package/src/ingest/pptx.ts +39 -0
- package/src/ingest/web.ts +77 -0
- package/src/llm-client.ts +177 -0
- package/src/pipeline/chunker.ts +63 -0
- package/src/pipeline/graph.ts +35 -0
- package/src/pipeline/linker.ts +49 -0
- package/src/pipeline/llm-chunker.ts +368 -0
- package/src/pipeline/llm-linker.ts +84 -0
- package/src/store.ts +209 -0
package/src/index.ts
ADDED
|
@@ -0,0 +1,615 @@
|
|
|
1
|
+
#!/usr/bin/env bun
|
|
2
|
+
|
|
3
|
+
import { Command } from "commander";
|
|
4
|
+
import { join } from "path";
|
|
5
|
+
import { CONFIG_FILE, DB_FILE, defaultConfig, findProjectRoot, loadConfig, saveConfig } from "./config";
|
|
6
|
+
import { Store } from "./store";
|
|
7
|
+
|
|
8
|
+
const program = new Command()
|
|
9
|
+
.name("kiwimu")
|
|
10
|
+
.description("π₯ Kiwi Mu β λλ§μ νμ΅ μν€λ₯Ό λ§λμΈμ")
|
|
11
|
+
.version("0.2.0");
|
|
12
|
+
|
|
13
|
+
// --- init ---
|
|
14
|
+
program
|
|
15
|
+
.command("init [name]")
|
|
16
|
+
.description("μ Kiwi Mu νλ‘μ νΈλ₯Ό μμ±ν©λλ€")
|
|
17
|
+
.action(async (name?: string) => {
|
|
18
|
+
const root = process.cwd();
|
|
19
|
+
if (Bun.file(join(root, CONFIG_FILE)).size > 0) {
|
|
20
|
+
try {
|
|
21
|
+
require("fs").accessSync(join(root, CONFIG_FILE));
|
|
22
|
+
console.log("\x1b[33mμ΄λ―Έ μ΄κΈ°νλ νλ‘μ νΈμ
λλ€.\x1b[0m");
|
|
23
|
+
return;
|
|
24
|
+
} catch {}
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
const p = await import("@clack/prompts");
|
|
28
|
+
|
|
29
|
+
p.intro("π₯ Kiwi Mu β μ νμ΅ μν€ λ§λ€κΈ°");
|
|
30
|
+
|
|
31
|
+
const values = await p.group({
|
|
32
|
+
name: () =>
|
|
33
|
+
p.text({
|
|
34
|
+
message: "μν€ μ΄λ¦",
|
|
35
|
+
placeholder: "My Study Wiki",
|
|
36
|
+
initialValue: name || "",
|
|
37
|
+
validate: (v) => (!v.trim() ? "μ΄λ¦μ μ
λ ₯ν΄μ£ΌμΈμ" : undefined),
|
|
38
|
+
}),
|
|
39
|
+
provider: () =>
|
|
40
|
+
p.select({
|
|
41
|
+
message: "LLM νλ‘λ°μ΄λ",
|
|
42
|
+
options: [
|
|
43
|
+
{ value: "gemini", label: "Google Gemini", hint: "λ¬΄λ£ API key (aistudio.google.com)" },
|
|
44
|
+
{ value: "azure-openai", label: "Azure OpenAI" },
|
|
45
|
+
{ value: "openai", label: "OpenAI" },
|
|
46
|
+
{ value: "anthropic", label: "Anthropic Claude" },
|
|
47
|
+
],
|
|
48
|
+
}),
|
|
49
|
+
model: ({ results }) =>
|
|
50
|
+
p.text({
|
|
51
|
+
message: "λͺ¨λΈλͺ
",
|
|
52
|
+
placeholder:
|
|
53
|
+
results.provider === "gemini" ? "gemini-2.0-flash-lite" :
|
|
54
|
+
results.provider === "azure-openai" ? "gpt-5-nano" :
|
|
55
|
+
results.provider === "openai" ? "gpt-4o-mini" : "claude-sonnet-4-20250514",
|
|
56
|
+
initialValue:
|
|
57
|
+
results.provider === "gemini" ? "gemini-2.0-flash-lite" :
|
|
58
|
+
results.provider === "azure-openai" ? "gpt-5-nano" :
|
|
59
|
+
results.provider === "openai" ? "gpt-4o-mini" : "claude-sonnet-4-20250514",
|
|
60
|
+
}),
|
|
61
|
+
apiKey: () =>
|
|
62
|
+
p.password({
|
|
63
|
+
message: "API Key",
|
|
64
|
+
validate: (v) => (!v.trim() ? "API Keyλ₯Ό μ
λ ₯ν΄μ£ΌμΈμ" : undefined),
|
|
65
|
+
}),
|
|
66
|
+
endpoint: ({ results }) =>
|
|
67
|
+
results.provider === "azure-openai"
|
|
68
|
+
? p.text({ message: "Azure Endpoint", placeholder: "https://..." })
|
|
69
|
+
: Promise.resolve(""),
|
|
70
|
+
});
|
|
71
|
+
|
|
72
|
+
if (p.isCancel(values)) {
|
|
73
|
+
p.cancel("μ·¨μλμμ΅λλ€.");
|
|
74
|
+
process.exit(0);
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
const config = defaultConfig(values.name as string);
|
|
78
|
+
config.llm.provider = values.provider as string;
|
|
79
|
+
config.llm.model = values.model as string;
|
|
80
|
+
config.llm.api_key = values.apiKey as string;
|
|
81
|
+
config.llm.endpoint = (values.endpoint as string) || "";
|
|
82
|
+
saveConfig(root, config);
|
|
83
|
+
|
|
84
|
+
const store = new Store(join(root, DB_FILE));
|
|
85
|
+
store.initSchema();
|
|
86
|
+
store.close();
|
|
87
|
+
|
|
88
|
+
p.outro(`π₯ '${values.name}' μν€κ° μμ±λμμ΅λλ€! λ€μ: kiwimu add <URL λλ νμΌ>`);
|
|
89
|
+
});
|
|
90
|
+
|
|
91
|
+
// --- add ---
|
|
92
|
+
program
|
|
93
|
+
.command("add <source>")
|
|
94
|
+
.description("URL λλ PDF νμΌμ μΆκ°ν©λλ€")
|
|
95
|
+
.action(async (source: string) => {
|
|
96
|
+
const root = findProjectRoot();
|
|
97
|
+
const store = new Store(join(root, DB_FILE));
|
|
98
|
+
|
|
99
|
+
const isUrl = source.startsWith("http://") || source.startsWith("https://");
|
|
100
|
+
const isPdf = source.toLowerCase().endsWith(".pdf");
|
|
101
|
+
|
|
102
|
+
if (isUrl) {
|
|
103
|
+
await addUrl(store, source);
|
|
104
|
+
} else if (isPdf) {
|
|
105
|
+
await addPdf(store, source);
|
|
106
|
+
} else {
|
|
107
|
+
console.log(`\x1b[31mμ§μνμ§ μλ μμ€ νμ: ${source}\x1b[0m`);
|
|
108
|
+
console.log("URL (http/https) λλ PDF νμΌμ μ
λ ₯ν΄μ£ΌμΈμ.");
|
|
109
|
+
store.close();
|
|
110
|
+
return;
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
store.close();
|
|
114
|
+
});
|
|
115
|
+
|
|
116
|
+
async function initLLM(root: string) {
|
|
117
|
+
const config = loadConfig(root);
|
|
118
|
+
const { setLLMConfig } = await import("./llm-client");
|
|
119
|
+
setLLMConfig(config.llm);
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
async function addUrl(store: Store, url: string) {
|
|
123
|
+
const { fetchPage } = await import("./ingest/web");
|
|
124
|
+
const { llmChunkDocument, htmlToRawText } = await import("./pipeline/llm-chunker");
|
|
125
|
+
|
|
126
|
+
const root = findProjectRoot();
|
|
127
|
+
await initLLM(root);
|
|
128
|
+
|
|
129
|
+
console.log(`\x1b[34mπ₯ URL κ°μ Έμ€λ μ€: ${url}\x1b[0m`);
|
|
130
|
+
const { title, html } = await fetchPage(url);
|
|
131
|
+
console.log(` μ λͺ©: ${title}`);
|
|
132
|
+
|
|
133
|
+
const source = store.addSource(url, "web", title, html);
|
|
134
|
+
const rawText = htmlToRawText(html);
|
|
135
|
+
|
|
136
|
+
console.log("\x1b[34mπ LLM κΈ°λ° λ¬Έμ λΆμ μ€...\x1b[0m");
|
|
137
|
+
const { sourceCount, conceptCount } = await llmChunkDocument(rawText, title, source.id, store);
|
|
138
|
+
console.log(`\x1b[32mβ
π ${sourceCount}κ° μλ³Έ + π ${conceptCount}κ° κ°λ
λ¬Έμ μμ±\x1b[0m`);
|
|
139
|
+
|
|
140
|
+
const { getUsageStats, getEstimatedCost, printUsageSummary } = await import("./llm-client");
|
|
141
|
+
printUsageSummary();
|
|
142
|
+
const u = getUsageStats();
|
|
143
|
+
store.addUsageLog(source.id, u.totalCalls, u.promptTokens, u.completionTokens, u.totalTokens, getEstimatedCost());
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
async function addPdf(store: Store, pdfPath: string) {
|
|
147
|
+
const { extractTextFromPdf } = await import("./ingest/pdf");
|
|
148
|
+
const { llmChunkDocument } = await import("./pipeline/llm-chunker");
|
|
149
|
+
const { resolve } = await import("path");
|
|
150
|
+
|
|
151
|
+
const absPath = resolve(pdfPath);
|
|
152
|
+
const file = Bun.file(absPath);
|
|
153
|
+
if (!(await file.exists())) {
|
|
154
|
+
console.log(`\x1b[31mνμΌμ μ°Ύμ μ μμ΅λλ€: ${pdfPath}\x1b[0m`);
|
|
155
|
+
return;
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
const root = findProjectRoot();
|
|
159
|
+
await initLLM(root);
|
|
160
|
+
|
|
161
|
+
console.log(`\x1b[34mπ₯ PDF μ²λ¦¬ μ€: ${pdfPath}\x1b[0m`);
|
|
162
|
+
const { title, text } = await extractTextFromPdf(absPath);
|
|
163
|
+
console.log(` μ λͺ©: ${title}`);
|
|
164
|
+
console.log(` ν
μ€νΈ κΈΈμ΄: ${text.length.toLocaleString()} μ`);
|
|
165
|
+
|
|
166
|
+
const source = store.addSource(absPath, "pdf", title, "(PDF)");
|
|
167
|
+
|
|
168
|
+
console.log("\x1b[34mπ LLM κΈ°λ° λ¬Έμ λΆμ μ€...\x1b[0m");
|
|
169
|
+
const { sourceCount, conceptCount } = await llmChunkDocument(text, title, source.id, store);
|
|
170
|
+
console.log(`\x1b[32mβ
π ${sourceCount}κ° μλ³Έ + π ${conceptCount}κ° κ°λ
λ¬Έμ μμ±\x1b[0m`);
|
|
171
|
+
|
|
172
|
+
const { getUsageStats, getEstimatedCost, printUsageSummary } = await import("./llm-client");
|
|
173
|
+
printUsageSummary();
|
|
174
|
+
const u = getUsageStats();
|
|
175
|
+
store.addUsageLog(source.id, u.totalCalls, u.promptTokens, u.completionTokens, u.totalTokens, getEstimatedCost());
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
// --- expand ---
|
|
179
|
+
program
|
|
180
|
+
.command("expand")
|
|
181
|
+
.description("LLMμ μ¬μ©ν΄ λ¬Έμλ₯Ό νμ₯ν©λλ€ (μ νμ¬ν)")
|
|
182
|
+
.option("--provider <provider>", "anthropic | openai | claude-cli | codex-cli")
|
|
183
|
+
.option("--model <model>", "λͺ¨λΈ μ΄λ¦")
|
|
184
|
+
.option("--pages <slugs...>", "νΉμ νμ΄μ§λ§ νμ₯")
|
|
185
|
+
.action(async (opts) => {
|
|
186
|
+
const root = findProjectRoot();
|
|
187
|
+
const config = loadConfig(root);
|
|
188
|
+
const store = new Store(join(root, DB_FILE));
|
|
189
|
+
|
|
190
|
+
const provider: string = opts.provider || config.expand.provider;
|
|
191
|
+
if (!provider) {
|
|
192
|
+
console.log("\x1b[33mνμ₯ νλ‘λ°μ΄λκ° μ€μ λμ§ μμμ΅λλ€.\x1b[0m");
|
|
193
|
+
console.log("μ¬μ©λ²: kiwimu expand --provider anthropic");
|
|
194
|
+
store.close();
|
|
195
|
+
return;
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
const allPages = store.listPages();
|
|
199
|
+
let pages = allPages;
|
|
200
|
+
if (opts.pages) {
|
|
201
|
+
pages = allPages.filter((p) => (opts.pages as string[]).includes(p.slug));
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
console.log(`\x1b[34mπ§ ${pages.length}κ° λ¬Έμλ₯Ό νμ₯ν©λλ€...\x1b[0m`);
|
|
205
|
+
|
|
206
|
+
const isCli = provider === "claude-cli" || provider === "codex-cli";
|
|
207
|
+
const { expandWithApi, expandWithCli } = await import("./expand/llm");
|
|
208
|
+
|
|
209
|
+
for (let i = 0; i < pages.length; i++) {
|
|
210
|
+
const page = pages[i];
|
|
211
|
+
console.log(` [${i + 1}/${pages.length}] ${page.title}`);
|
|
212
|
+
try {
|
|
213
|
+
const newContent = isCli
|
|
214
|
+
? await expandWithCli(page, allPages, provider.replace("-cli", ""))
|
|
215
|
+
: await expandWithApi(page, allPages, provider, opts.model);
|
|
216
|
+
store.updatePageContent(page.id, newContent);
|
|
217
|
+
} catch (e: any) {
|
|
218
|
+
console.log(` \x1b[31mμ€ν¨: ${e.message}\x1b[0m`);
|
|
219
|
+
}
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
const { autoLinkPages } = await import("./pipeline/linker");
|
|
223
|
+
const linkCount = autoLinkPages(store);
|
|
224
|
+
console.log(`\x1b[32mβ
νμ₯ μλ£! (${linkCount}κ° λ§ν¬ κ°±μ )\x1b[0m`);
|
|
225
|
+
store.close();
|
|
226
|
+
});
|
|
227
|
+
|
|
228
|
+
// --- build ---
|
|
229
|
+
program
|
|
230
|
+
.command("build")
|
|
231
|
+
.description("μ μ μν€ μ¬μ΄νΈλ₯Ό μμ±ν©λλ€")
|
|
232
|
+
.action(async () => {
|
|
233
|
+
const root = findProjectRoot();
|
|
234
|
+
const config = loadConfig(root);
|
|
235
|
+
const store = new Store(join(root, DB_FILE));
|
|
236
|
+
|
|
237
|
+
const { buildSite } = await import("./build/renderer");
|
|
238
|
+
|
|
239
|
+
console.log("\x1b[34mπ¨ μν€ λΉλ μ€...\x1b[0m");
|
|
240
|
+
const count = await buildSite(store, config, root);
|
|
241
|
+
console.log(`\x1b[32mβ
${count}κ° νμ΄μ§κ° λΉλλμμ΅λλ€!\x1b[0m`);
|
|
242
|
+
console.log(` μΆλ ₯: ${join(root, config.build.output_dir)}/`);
|
|
243
|
+
store.close();
|
|
244
|
+
});
|
|
245
|
+
|
|
246
|
+
// --- deploy ---
|
|
247
|
+
program
|
|
248
|
+
.command("deploy")
|
|
249
|
+
.description("μν€λ₯Ό GitHub Pagesμ λ°°ν¬ν©λλ€")
|
|
250
|
+
.option("--target <target>", "λ°°ν¬ λμ (gh-pages | vercel)", "gh-pages")
|
|
251
|
+
.option("--message <message>", "μ»€λ° λ©μμ§", "deploy: update wiki")
|
|
252
|
+
.action(async (opts) => {
|
|
253
|
+
const root = findProjectRoot();
|
|
254
|
+
const config = loadConfig(root);
|
|
255
|
+
const siteDir = join(root, config.build.output_dir);
|
|
256
|
+
|
|
257
|
+
// Auto-build before deploy
|
|
258
|
+
const store = new Store(join(root, DB_FILE));
|
|
259
|
+
const { buildSite } = await import("./build/renderer");
|
|
260
|
+
console.log("\x1b[34mπ¨ λΉλ μ€...\x1b[0m");
|
|
261
|
+
const count = await buildSite(store, config, root);
|
|
262
|
+
console.log(`\x1b[32m ${count}κ° νμ΄μ§ λΉλ μλ£\x1b[0m`);
|
|
263
|
+
store.close();
|
|
264
|
+
|
|
265
|
+
console.log(`\x1b[34mπ ${opts.target}μ λ°°ν¬ μ€...\x1b[0m`);
|
|
266
|
+
|
|
267
|
+
if (opts.target === "gh-pages") {
|
|
268
|
+
const { deployGhPages } = await import("./deploy");
|
|
269
|
+
await deployGhPages(siteDir, opts.message);
|
|
270
|
+
console.log("\x1b[32mβ
GitHub Pagesμ λ°°ν¬λμμ΅λλ€!\x1b[0m");
|
|
271
|
+
// Try to get the pages URL
|
|
272
|
+
try {
|
|
273
|
+
const proc = Bun.spawn(["gh", "repo", "view", "--json", "url", "-q", ".url"], { stdout: "pipe" });
|
|
274
|
+
const repoUrl = (await new Response(proc.stdout).text()).trim();
|
|
275
|
+
if (repoUrl) {
|
|
276
|
+
const owner = repoUrl.split("/").slice(-2).join("/").replace("https://github.com/", "");
|
|
277
|
+
const [user, repo] = owner.split("/");
|
|
278
|
+
console.log(` https://${user}.github.io/${repo}/`);
|
|
279
|
+
}
|
|
280
|
+
} catch {}
|
|
281
|
+
} else if (opts.target === "vercel") {
|
|
282
|
+
const { deployVercel } = await import("./deploy");
|
|
283
|
+
await deployVercel(siteDir);
|
|
284
|
+
console.log("\x1b[32mβ
Vercelμ λ°°ν¬λμμ΅λλ€!\x1b[0m");
|
|
285
|
+
} else {
|
|
286
|
+
console.log(`\x1b[31mμ§μνμ§ μλ λ°°ν¬ λμ: ${opts.target}\x1b[0m`);
|
|
287
|
+
}
|
|
288
|
+
});
|
|
289
|
+
|
|
290
|
+
// --- serve (dev) ---
|
|
291
|
+
program
|
|
292
|
+
.command("serve")
|
|
293
|
+
.description("μν€ μλ²λ₯Ό μ€νν©λλ€ (μΉμμ λ¬Έμ μΆκ° κ°λ₯)")
|
|
294
|
+
.option("-p, --port <port>", "ν¬νΈ λ²νΈ", "8000")
|
|
295
|
+
.option("-H, --host <host>", "λ°μΈλ μ£Όμ", "localhost")
|
|
296
|
+
.action(async (opts) => {
|
|
297
|
+
const root = findProjectRoot();
|
|
298
|
+
const config = loadConfig(root);
|
|
299
|
+
const siteDir = join(root, config.build.output_dir);
|
|
300
|
+
|
|
301
|
+
const { existsSync } = await import("fs");
|
|
302
|
+
|
|
303
|
+
// Auto-build if needed
|
|
304
|
+
if (!existsSync(siteDir)) {
|
|
305
|
+
const store = new Store(join(root, DB_FILE));
|
|
306
|
+
const { buildSite } = await import("./build/renderer");
|
|
307
|
+
await buildSite(store, config, root);
|
|
308
|
+
store.close();
|
|
309
|
+
}
|
|
310
|
+
|
|
311
|
+
let isProcessing = false;
|
|
312
|
+
let processingStatus = "";
|
|
313
|
+
|
|
314
|
+
const port = parseInt(opts.port);
|
|
315
|
+
const hostname = opts.host;
|
|
316
|
+
console.log(`\x1b[32mπ₯ Kiwi Mu μλ² μμ!\x1b[0m`);
|
|
317
|
+
console.log(` http://${hostname === "0.0.0.0" ? "localhost" : hostname}:${port}`);
|
|
318
|
+
if (hostname === "0.0.0.0") console.log(" λ€νΈμν¬μ 곡κ°λ¨ (0.0.0.0)");
|
|
319
|
+
console.log(" μΉμμ λ¬Έμ μΆκ° κ°λ₯ν©λλ€.\n");
|
|
320
|
+
|
|
321
|
+
Bun.serve({
|
|
322
|
+
port,
|
|
323
|
+
hostname,
|
|
324
|
+
async fetch(req) {
|
|
325
|
+
const url = new URL(req.url);
|
|
326
|
+
|
|
327
|
+
// ββ API endpoints ββ
|
|
328
|
+
|
|
329
|
+
// File upload endpoint
|
|
330
|
+
if (url.pathname === "/api/upload" && req.method === "POST") {
|
|
331
|
+
if (isProcessing) {
|
|
332
|
+
return Response.json({ error: "μ΄λ―Έ μ²λ¦¬ μ€μ
λλ€", status: processingStatus }, { status: 409 });
|
|
333
|
+
}
|
|
334
|
+
|
|
335
|
+
const formData = await req.formData();
|
|
336
|
+
const file = formData.get("file") as File | null;
|
|
337
|
+
if (!file) {
|
|
338
|
+
return Response.json({ error: "νμΌμ΄ νμν©λλ€" }, { status: 400 });
|
|
339
|
+
}
|
|
340
|
+
|
|
341
|
+
const ext = file.name.split(".").pop()?.toLowerCase() || "";
|
|
342
|
+
const supported = ["pdf", "docx", "doc", "pptx", "ppt", "key", "rtf"];
|
|
343
|
+
if (!supported.includes(ext)) {
|
|
344
|
+
return Response.json({ error: `μ§μνμ§ μλ νμ: .${ext}. μ§μ: ${supported.join(", ")}` }, { status: 400 });
|
|
345
|
+
}
|
|
346
|
+
|
|
347
|
+
// Save uploaded file
|
|
348
|
+
const uploadDir = join(root, "uploads");
|
|
349
|
+
const { mkdirSync } = await import("fs");
|
|
350
|
+
mkdirSync(uploadDir, { recursive: true });
|
|
351
|
+
const filePath = join(uploadDir, file.name);
|
|
352
|
+
await Bun.write(filePath, await file.arrayBuffer());
|
|
353
|
+
|
|
354
|
+
isProcessing = true;
|
|
355
|
+
processingStatus = "νμΌ μ²λ¦¬ μμ...";
|
|
356
|
+
|
|
357
|
+
(async () => {
|
|
358
|
+
try {
|
|
359
|
+
const store = new Store(join(root, DB_FILE));
|
|
360
|
+
const { setLLMConfig, resetUsageStats, getUsageStats, getEstimatedCost } = await import("./llm-client");
|
|
361
|
+
setLLMConfig(loadConfig(root).llm);
|
|
362
|
+
const { llmChunkDocument } = await import("./pipeline/llm-chunker");
|
|
363
|
+
resetUsageStats();
|
|
364
|
+
|
|
365
|
+
let title: string;
|
|
366
|
+
let text: string;
|
|
367
|
+
|
|
368
|
+
if (ext === "pdf") {
|
|
369
|
+
const { extractTextFromPdf } = await import("./ingest/pdf");
|
|
370
|
+
processingStatus = "PDF ν
μ€νΈ μΆμΆ μ€...";
|
|
371
|
+
({ title, text } = await extractTextFromPdf(filePath));
|
|
372
|
+
} else if (ext === "docx") {
|
|
373
|
+
const { extractTextFromDocx } = await import("./ingest/docx");
|
|
374
|
+
processingStatus = "DOCX ν
μ€νΈ μΆμΆ μ€...";
|
|
375
|
+
({ title, text } = await extractTextFromDocx(filePath));
|
|
376
|
+
} else if (ext === "pptx") {
|
|
377
|
+
const { extractTextFromPptx } = await import("./ingest/pptx");
|
|
378
|
+
processingStatus = "PPTX ν
μ€νΈ μΆμΆ μ€...";
|
|
379
|
+
({ title, text } = await extractTextFromPptx(filePath));
|
|
380
|
+
} else {
|
|
381
|
+
const { extractWithTextutil } = await import("./ingest/legacy");
|
|
382
|
+
processingStatus = `${ext.toUpperCase()} ν
μ€νΈ μΆμΆ μ€...`;
|
|
383
|
+
({ title, text } = await extractWithTextutil(filePath));
|
|
384
|
+
}
|
|
385
|
+
|
|
386
|
+
const src = store.addSource(filePath, ext, title, "(file)");
|
|
387
|
+
// Clean up old pages from previous processing of same source
|
|
388
|
+
store.deletePagesBySource(src.id);
|
|
389
|
+
|
|
390
|
+
processingStatus = "LLM λΆμ μ€...";
|
|
391
|
+
await llmChunkDocument(text, title, src.id, store);
|
|
392
|
+
|
|
393
|
+
const u = getUsageStats();
|
|
394
|
+
store.addUsageLog(src.id, u.totalCalls, u.promptTokens, u.completionTokens, u.totalTokens, getEstimatedCost());
|
|
395
|
+
|
|
396
|
+
processingStatus = "λΉλ μ€...";
|
|
397
|
+
const { buildSite } = await import("./build/renderer");
|
|
398
|
+
await buildSite(store, config, root);
|
|
399
|
+
store.close();
|
|
400
|
+
|
|
401
|
+
processingStatus = "μλ£!";
|
|
402
|
+
} catch (e: any) {
|
|
403
|
+
processingStatus = `μ€λ₯: ${e.message}`;
|
|
404
|
+
} finally {
|
|
405
|
+
setTimeout(() => { isProcessing = false; }, 2000);
|
|
406
|
+
}
|
|
407
|
+
})();
|
|
408
|
+
|
|
409
|
+
return Response.json({ ok: true, message: "νμΌ μ²λ¦¬ μμ" });
|
|
410
|
+
}
|
|
411
|
+
|
|
412
|
+
// URL add endpoint
|
|
413
|
+
if (url.pathname === "/api/add" && req.method === "POST") {
|
|
414
|
+
if (isProcessing) {
|
|
415
|
+
return Response.json({ error: "μ΄λ―Έ μ²λ¦¬ μ€μ
λλ€", status: processingStatus }, { status: 409 });
|
|
416
|
+
}
|
|
417
|
+
|
|
418
|
+
const body = await req.json() as { source: string };
|
|
419
|
+
if (!body.source) {
|
|
420
|
+
return Response.json({ error: "sourceκ° νμν©λλ€" }, { status: 400 });
|
|
421
|
+
}
|
|
422
|
+
|
|
423
|
+
isProcessing = true;
|
|
424
|
+
processingStatus = "μμ μ€...";
|
|
425
|
+
|
|
426
|
+
(async () => {
|
|
427
|
+
try {
|
|
428
|
+
const store = new Store(join(root, DB_FILE));
|
|
429
|
+
const { setLLMConfig, resetUsageStats, getUsageStats, getEstimatedCost } = await import("./llm-client");
|
|
430
|
+
setLLMConfig(loadConfig(root).llm);
|
|
431
|
+
resetUsageStats();
|
|
432
|
+
|
|
433
|
+
const source = body.source;
|
|
434
|
+
const { fetchPage } = await import("./ingest/web");
|
|
435
|
+
const { llmChunkDocument, htmlToRawText } = await import("./pipeline/llm-chunker");
|
|
436
|
+
|
|
437
|
+
processingStatus = "URL κ°μ Έμ€λ μ€...";
|
|
438
|
+
const { title, html } = await fetchPage(source);
|
|
439
|
+
const src = store.addSource(source, "web", title, html);
|
|
440
|
+
const rawText = htmlToRawText(html);
|
|
441
|
+
|
|
442
|
+
processingStatus = "LLM λΆμ μ€...";
|
|
443
|
+
await llmChunkDocument(rawText, title, src.id, store);
|
|
444
|
+
|
|
445
|
+
const u = getUsageStats();
|
|
446
|
+
store.addUsageLog(src.id, u.totalCalls, u.promptTokens, u.completionTokens, u.totalTokens, getEstimatedCost());
|
|
447
|
+
|
|
448
|
+
processingStatus = "λΉλ μ€...";
|
|
449
|
+
const { buildSite } = await import("./build/renderer");
|
|
450
|
+
await buildSite(store, config, root);
|
|
451
|
+
store.close();
|
|
452
|
+
|
|
453
|
+
processingStatus = "μλ£!";
|
|
454
|
+
} catch (e: any) {
|
|
455
|
+
processingStatus = `μ€λ₯: ${e.message}`;
|
|
456
|
+
} finally {
|
|
457
|
+
setTimeout(() => { isProcessing = false; }, 2000);
|
|
458
|
+
}
|
|
459
|
+
})();
|
|
460
|
+
|
|
461
|
+
return Response.json({ ok: true, message: "μ²λ¦¬ μμ" });
|
|
462
|
+
}
|
|
463
|
+
|
|
464
|
+
// Admin API - update LLM settings
|
|
465
|
+
if (url.pathname === "/api/settings" && req.method === "POST") {
|
|
466
|
+
const body = await req.json() as any;
|
|
467
|
+
const currentConfig = loadConfig(root);
|
|
468
|
+
if (body.wiki_name) currentConfig.project.name = body.wiki_name;
|
|
469
|
+
if (body.provider) currentConfig.llm.provider = body.provider;
|
|
470
|
+
if (body.model) currentConfig.llm.model = body.model;
|
|
471
|
+
if (body.api_key !== undefined) currentConfig.llm.api_key = body.api_key;
|
|
472
|
+
if (body.endpoint !== undefined) currentConfig.llm.endpoint = body.endpoint;
|
|
473
|
+
saveConfig(root, currentConfig);
|
|
474
|
+
// Reload config for serve
|
|
475
|
+
Object.assign(config, currentConfig);
|
|
476
|
+
|
|
477
|
+
// Auto-rebuild site with new settings
|
|
478
|
+
(async () => {
|
|
479
|
+
try {
|
|
480
|
+
const store = new Store(join(root, DB_FILE));
|
|
481
|
+
const { buildSite } = await import("./build/renderer");
|
|
482
|
+
await buildSite(store, currentConfig, root);
|
|
483
|
+
store.close();
|
|
484
|
+
console.log("\x1b[32mβ
μ€μ λ³κ²½ ν μ¬μ΄νΈ 리λΉλ μλ£\x1b[0m");
|
|
485
|
+
} catch (e: any) {
|
|
486
|
+
console.log(`\x1b[31m리λΉλ μ€ν¨: ${e.message}\x1b[0m`);
|
|
487
|
+
}
|
|
488
|
+
})();
|
|
489
|
+
|
|
490
|
+
return Response.json({ ok: true });
|
|
491
|
+
}
|
|
492
|
+
|
|
493
|
+
if (url.pathname === "/api/settings" && req.method === "GET") {
|
|
494
|
+
const currentConfig = loadConfig(root);
|
|
495
|
+
// Mask API key
|
|
496
|
+
const masked = { ...currentConfig.llm, api_key: currentConfig.llm.api_key ? "β’β’β’β’" + currentConfig.llm.api_key.slice(-4) : "" };
|
|
497
|
+
return Response.json(masked);
|
|
498
|
+
}
|
|
499
|
+
|
|
500
|
+
// Build API
|
|
501
|
+
if (url.pathname === "/api/build" && req.method === "POST") {
|
|
502
|
+
if (isProcessing) {
|
|
503
|
+
return Response.json({ error: "μ΄λ―Έ μ²λ¦¬ μ€μ
λλ€" }, { status: 409 });
|
|
504
|
+
}
|
|
505
|
+
isProcessing = true;
|
|
506
|
+
processingStatus = "λΉλ μ€...";
|
|
507
|
+
(async () => {
|
|
508
|
+
try {
|
|
509
|
+
const store = new Store(join(root, DB_FILE));
|
|
510
|
+
const { buildSite } = await import("./build/renderer");
|
|
511
|
+
await buildSite(store, loadConfig(root), root);
|
|
512
|
+
store.close();
|
|
513
|
+
processingStatus = "λΉλ μλ£!";
|
|
514
|
+
console.log("\x1b[32mβ
μλ λΉλ μλ£\x1b[0m");
|
|
515
|
+
} catch (e: any) {
|
|
516
|
+
processingStatus = `λΉλ μ€λ₯: ${e.message}`;
|
|
517
|
+
} finally {
|
|
518
|
+
setTimeout(() => { isProcessing = false; }, 2000);
|
|
519
|
+
}
|
|
520
|
+
})();
|
|
521
|
+
return Response.json({ ok: true, message: "λΉλ μμ" });
|
|
522
|
+
}
|
|
523
|
+
|
|
524
|
+
// Admin page
|
|
525
|
+
if (url.pathname === "/admin") {
|
|
526
|
+
const store = new Store(join(root, DB_FILE));
|
|
527
|
+
const sources = store.listSources();
|
|
528
|
+
const usage = store.getUsageSummary();
|
|
529
|
+
const configData = loadConfig(root);
|
|
530
|
+
store.close();
|
|
531
|
+
|
|
532
|
+
const { renderAdmin } = await import("./build/templates");
|
|
533
|
+
return new Response(renderAdmin({
|
|
534
|
+
wikiName: configData.project.name,
|
|
535
|
+
sources,
|
|
536
|
+
usage,
|
|
537
|
+
llmConfig: configData.llm,
|
|
538
|
+
}), { headers: { "Content-Type": "text/html" } });
|
|
539
|
+
}
|
|
540
|
+
|
|
541
|
+
if (url.pathname === "/api/status") {
|
|
542
|
+
const store = new Store(join(root, DB_FILE));
|
|
543
|
+
const sources = store.listSources();
|
|
544
|
+
const sourcePages = store.listSourcePages();
|
|
545
|
+
const conceptPages = store.listConceptPages();
|
|
546
|
+
const links = store.getAllLinks();
|
|
547
|
+
const usage = store.getUsageSummary();
|
|
548
|
+
store.close();
|
|
549
|
+
|
|
550
|
+
return Response.json({
|
|
551
|
+
processing: isProcessing,
|
|
552
|
+
processingStatus,
|
|
553
|
+
sources: sources.length,
|
|
554
|
+
sourcePages: sourcePages.length,
|
|
555
|
+
conceptPages: conceptPages.length,
|
|
556
|
+
links: links.length,
|
|
557
|
+
usage,
|
|
558
|
+
});
|
|
559
|
+
}
|
|
560
|
+
|
|
561
|
+
// ββ Static file serving ββ
|
|
562
|
+
let pathname = url.pathname;
|
|
563
|
+
if (pathname === "/") pathname = "/index.html";
|
|
564
|
+
|
|
565
|
+
const filePath = join(siteDir, pathname);
|
|
566
|
+
const file = Bun.file(filePath);
|
|
567
|
+
|
|
568
|
+
if (await file.exists()) {
|
|
569
|
+
return new Response(file);
|
|
570
|
+
}
|
|
571
|
+
return new Response("Not Found", { status: 404 });
|
|
572
|
+
},
|
|
573
|
+
});
|
|
574
|
+
});
|
|
575
|
+
|
|
576
|
+
// --- status ---
|
|
577
|
+
program
|
|
578
|
+
.command("status")
|
|
579
|
+
.description("νμ¬ ν€μ μνλ₯Ό νμν©λλ€")
|
|
580
|
+
.action(() => {
|
|
581
|
+
const root = findProjectRoot();
|
|
582
|
+
const config = loadConfig(root);
|
|
583
|
+
const store = new Store(join(root, DB_FILE));
|
|
584
|
+
|
|
585
|
+
const sources = store.listSources();
|
|
586
|
+
const sourcePages = store.listSourcePages();
|
|
587
|
+
const conceptPages = store.listConceptPages();
|
|
588
|
+
const links = store.getAllLinks();
|
|
589
|
+
|
|
590
|
+
console.log(`\n\x1b[1mπ₯ ${config.project.name}\x1b[0m\n`);
|
|
591
|
+
console.log(` μμ€ ${sources.length}`);
|
|
592
|
+
console.log(` π μλ³Έ ${sourcePages.length}`);
|
|
593
|
+
console.log(` π κ°λ
${conceptPages.length}`);
|
|
594
|
+
console.log(` π λ§ν¬ ${links.length}`);
|
|
595
|
+
console.log(` λΉλ ${config.build.output_dir}`);
|
|
596
|
+
console.log(` λ°°ν¬ ${config.deploy.target}`);
|
|
597
|
+
|
|
598
|
+
if (sourcePages.length) {
|
|
599
|
+
console.log("\n\x1b[1mπ μλ³Έ λ¬Έμ:\x1b[0m");
|
|
600
|
+
for (const p of sourcePages) {
|
|
601
|
+
console.log(` β’ ${p.title} \x1b[2m(${p.slug})\x1b[0m`);
|
|
602
|
+
}
|
|
603
|
+
}
|
|
604
|
+
if (conceptPages.length) {
|
|
605
|
+
console.log("\n\x1b[1mπ κ°λ
λ¬Έμ:\x1b[0m");
|
|
606
|
+
for (const p of conceptPages) {
|
|
607
|
+
console.log(` β’ ${p.title} \x1b[2m(${p.slug})\x1b[0m`);
|
|
608
|
+
}
|
|
609
|
+
}
|
|
610
|
+
|
|
611
|
+
console.log();
|
|
612
|
+
store.close();
|
|
613
|
+
});
|
|
614
|
+
|
|
615
|
+
program.parse();
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
export async function extractTextFromDocx(filePath: string): Promise<{ title: string; text: string }> {
|
|
2
|
+
const mammoth = require("mammoth");
|
|
3
|
+
const result = await mammoth.extractRawText({ path: filePath });
|
|
4
|
+
const text: string = result.value;
|
|
5
|
+
const title = filePath.split("/").pop()?.replace(/\.docx?$/i, "") || "Untitled";
|
|
6
|
+
return { title, text };
|
|
7
|
+
}
|
|
8
|
+
|
|
9
|
+
export async function extractHtmlFromDocx(filePath: string): Promise<{ title: string; html: string }> {
|
|
10
|
+
const mammoth = require("mammoth");
|
|
11
|
+
const result = await mammoth.convertToHtml({ path: filePath });
|
|
12
|
+
const html: string = result.value;
|
|
13
|
+
const title = filePath.split("/").pop()?.replace(/\.docx?$/i, "") || "Untitled";
|
|
14
|
+
return { title, html };
|
|
15
|
+
}
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Extract text from legacy formats (DOC, PPT, KEY) using macOS textutil/CLI tools.
|
|
3
|
+
*/
|
|
4
|
+
export async function extractWithTextutil(filePath: string): Promise<{ title: string; text: string }> {
|
|
5
|
+
const ext = filePath.split(".").pop()?.toLowerCase() || "";
|
|
6
|
+
const title = filePath.split("/").pop()?.replace(/\.[^.]+$/, "") || "Untitled";
|
|
7
|
+
|
|
8
|
+
// textutil supports: doc, docx, rtf, rtfd, html, webarchive, odt, wordml
|
|
9
|
+
const textutilFormats = new Set(["doc", "rtf", "odt"]);
|
|
10
|
+
|
|
11
|
+
if (textutilFormats.has(ext)) {
|
|
12
|
+
const proc = Bun.spawn(["textutil", "-convert", "txt", "-stdout", filePath], {
|
|
13
|
+
stdout: "pipe",
|
|
14
|
+
stderr: "pipe",
|
|
15
|
+
});
|
|
16
|
+
const text = await new Response(proc.stdout).text();
|
|
17
|
+
const exitCode = await proc.exited;
|
|
18
|
+
if (exitCode !== 0) {
|
|
19
|
+
const stderr = await new Response(proc.stderr).text();
|
|
20
|
+
throw new Error(`textutil failed: ${stderr}`);
|
|
21
|
+
}
|
|
22
|
+
return { title, text };
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
// For .key (Keynote), try mdimport for metadata or strings extraction
|
|
26
|
+
if (ext === "key") {
|
|
27
|
+
// Try to extract text using mdimport/spotlight metadata
|
|
28
|
+
try {
|
|
29
|
+
const proc = Bun.spawn(["mdimport", "-d2", filePath], { stdout: "pipe", stderr: "pipe" });
|
|
30
|
+
await proc.exited;
|
|
31
|
+
} catch {}
|
|
32
|
+
|
|
33
|
+
// Keynote files are directories or zip-like packages. Try strings extraction.
|
|
34
|
+
const proc = Bun.spawn(["strings", filePath], { stdout: "pipe", stderr: "pipe" });
|
|
35
|
+
const raw = await new Response(proc.stdout).text();
|
|
36
|
+
await proc.exited;
|
|
37
|
+
|
|
38
|
+
// Filter to lines that look like actual text content
|
|
39
|
+
const lines = raw.split("\n").filter((l) => {
|
|
40
|
+
const t = l.trim();
|
|
41
|
+
return t.length > 10 && /[a-zA-Zκ°-ν£]/.test(t) && !/^[{<\[]/.test(t);
|
|
42
|
+
});
|
|
43
|
+
|
|
44
|
+
if (!lines.length) {
|
|
45
|
+
throw new Error("Keynote νμΌμμ ν
μ€νΈλ₯Ό μΆμΆν μ μμ΅λλ€. PDFλ‘ λ΄λ³΄λ΄κΈ° ν λ€μ μλν΄μ£ΌμΈμ.");
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
return { title, text: lines.join("\n") };
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
// For .ppt (legacy PowerPoint), try textutil or strings
|
|
52
|
+
if (ext === "ppt") {
|
|
53
|
+
const proc = Bun.spawn(["strings", filePath], { stdout: "pipe", stderr: "pipe" });
|
|
54
|
+
const raw = await new Response(proc.stdout).text();
|
|
55
|
+
await proc.exited;
|
|
56
|
+
|
|
57
|
+
const lines = raw.split("\n").filter((l) => {
|
|
58
|
+
const t = l.trim();
|
|
59
|
+
return t.length > 5 && /[a-zA-Zκ°-ν£]/.test(t) && !/^[{<\[\x00-\x1f]/.test(t);
|
|
60
|
+
});
|
|
61
|
+
|
|
62
|
+
return { title, text: lines.join("\n") };
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
throw new Error(`μ§μνμ§ μλ νμΌ νμ: .${ext}`);
|
|
66
|
+
}
|