@open330/kiwimu 0.4.1 β 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +98 -49
- package/bin/kiwimu +1 -1
- package/package.json +4 -1
- package/personas/namuwiki.json +6 -0
- package/src/build/renderer.ts +50 -2
- package/src/build/static/search.js +33 -2
- package/src/build/static/style.css +84 -1
- package/src/build/templates.ts +353 -167
- package/src/config.ts +35 -29
- package/src/demo/sample-data.ts +70 -0
- package/src/demo/setup.ts +31 -0
- package/src/expand/llm.ts +1 -1
- package/src/index.ts +234 -458
- package/src/ingest/docx.ts +0 -8
- package/src/ingest/legacy.ts +4 -4
- package/src/ingest/pdf.ts +1 -1
- package/src/ingest/pptx.ts +0 -1
- package/src/ingest/web.test.ts +41 -0
- package/src/ingest/web.ts +61 -62
- package/src/llm-client.ts +203 -126
- package/src/pipeline/chunker.test.ts +42 -0
- package/src/pipeline/chunker.ts +1 -48
- package/src/pipeline/llm-chunker.ts +144 -59
- package/src/server.ts +327 -0
- package/src/services/ingest.ts +100 -0
- package/src/store.test.ts +132 -0
- package/src/store.ts +206 -2
- package/src/pipeline/llm-linker.ts +0 -84
package/src/server.ts
ADDED
|
@@ -0,0 +1,327 @@
|
|
|
1
|
+
import { join } from "path";
|
|
2
|
+
import path from "path";
|
|
3
|
+
import crypto from "crypto";
|
|
4
|
+
import { DB_FILE, loadConfig, saveConfig, getActivePersona } from "./config";
|
|
5
|
+
import { Store } from "./store";
|
|
6
|
+
import type { KiwiConfig } from "./config";
|
|
7
|
+
|
|
8
|
+
export function startServer(root: string, port: number, host: string): void {
|
|
9
|
+
const config = loadConfig(root);
|
|
10
|
+
const siteDir = join(root, config.build.output_dir);
|
|
11
|
+
|
|
12
|
+
let isProcessing = false;
|
|
13
|
+
let processingStatus = "";
|
|
14
|
+
|
|
15
|
+
const hostname = host;
|
|
16
|
+
const authToken = crypto.randomUUID();
|
|
17
|
+
console.log(`\x1b[32mπ₯ Kiwi Mu μλ² μμ!\x1b[0m`);
|
|
18
|
+
console.log(` http://${hostname === "0.0.0.0" ? "localhost" : hostname}:${port}`);
|
|
19
|
+
console.log(` κ΄λ¦¬ νμ΄μ§: http://${hostname === "0.0.0.0" ? "localhost" : hostname}:${port}/admin?token=${authToken}`);
|
|
20
|
+
console.log(` μΈμ¦ ν ν°: ${authToken}`);
|
|
21
|
+
if (hostname === "0.0.0.0") console.log(" λ€νΈμν¬μ 곡κ°λ¨ (0.0.0.0)");
|
|
22
|
+
console.log(" μΉμμ λ¬Έμ μΆκ° κ°λ₯ν©λλ€.\n");
|
|
23
|
+
|
|
24
|
+
Bun.serve({
|
|
25
|
+
port,
|
|
26
|
+
hostname,
|
|
27
|
+
async fetch(req) {
|
|
28
|
+
const url = new URL(req.url);
|
|
29
|
+
|
|
30
|
+
// ββ Auth middleware for /api/* and /admin ββ
|
|
31
|
+
if (url.pathname.startsWith("/api/") || url.pathname === "/admin") {
|
|
32
|
+
const authHeader = req.headers.get("Authorization");
|
|
33
|
+
const queryToken = url.searchParams.get("token");
|
|
34
|
+
const bearerToken = authHeader?.startsWith("Bearer ") ? authHeader.slice(7) : null;
|
|
35
|
+
if (bearerToken !== authToken && queryToken !== authToken) {
|
|
36
|
+
return Response.json({ error: "Unauthorized" }, { status: 401 });
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
// ββ API endpoints ββ
|
|
41
|
+
|
|
42
|
+
// File upload endpoint
|
|
43
|
+
if (url.pathname === "/api/upload" && req.method === "POST") {
|
|
44
|
+
if (isProcessing) {
|
|
45
|
+
return Response.json({ error: "μ΄λ―Έ μ²λ¦¬ μ€μ
λλ€", status: processingStatus }, { status: 409 });
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
const formData = await req.formData();
|
|
49
|
+
const file = formData.get("file") as File | null;
|
|
50
|
+
if (!file) {
|
|
51
|
+
return Response.json({ error: "νμΌμ΄ νμν©λλ€" }, { status: 400 });
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
const MAX_UPLOAD_SIZE = 50 * 1024 * 1024; // 50MB
|
|
55
|
+
if (file.size > MAX_UPLOAD_SIZE) {
|
|
56
|
+
return Response.json({ error: "νμΌ ν¬κΈ°κ° 50MBλ₯Ό μ΄κ³Όν©λλ€" }, { status: 413 });
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
const ext = file.name.split(".").pop()?.toLowerCase() || "";
|
|
60
|
+
const supported = ["pdf", "docx", "doc", "pptx", "ppt", "key", "rtf"];
|
|
61
|
+
if (!supported.includes(ext)) {
|
|
62
|
+
return Response.json({ error: `μ§μνμ§ μλ νμ: .${ext}. μ§μ: ${supported.join(", ")}` }, { status: 400 });
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
// Save uploaded file
|
|
66
|
+
const uploadDir = join(root, "uploads");
|
|
67
|
+
const { mkdirSync } = await import("fs");
|
|
68
|
+
mkdirSync(uploadDir, { recursive: true });
|
|
69
|
+
const filePath = join(uploadDir, path.basename(file.name));
|
|
70
|
+
await Bun.write(filePath, await file.arrayBuffer());
|
|
71
|
+
|
|
72
|
+
isProcessing = true;
|
|
73
|
+
processingStatus = "νμΌ μ²λ¦¬ μμ...";
|
|
74
|
+
|
|
75
|
+
(async () => {
|
|
76
|
+
const store = new Store(join(root, DB_FILE));
|
|
77
|
+
try {
|
|
78
|
+
const { ingestFile } = await import("./services/ingest");
|
|
79
|
+
const currentConfig = loadConfig(root);
|
|
80
|
+
const currentPersona = getActivePersona(currentConfig);
|
|
81
|
+
|
|
82
|
+
await ingestFile(root, store, filePath, file.name, currentConfig.llm, currentPersona, (status) => {
|
|
83
|
+
processingStatus = status;
|
|
84
|
+
});
|
|
85
|
+
|
|
86
|
+
processingStatus = "λΉλ μ€...";
|
|
87
|
+
const { buildSite } = await import("./build/renderer");
|
|
88
|
+
await buildSite(store, loadConfig(root), root);
|
|
89
|
+
|
|
90
|
+
processingStatus = "μλ£!";
|
|
91
|
+
} catch (e: unknown) {
|
|
92
|
+
const message = e instanceof Error ? e.message : String(e);
|
|
93
|
+
processingStatus = `μ€λ₯: ${message}`;
|
|
94
|
+
} finally {
|
|
95
|
+
store.close();
|
|
96
|
+
setTimeout(() => { isProcessing = false; }, 2000);
|
|
97
|
+
}
|
|
98
|
+
})();
|
|
99
|
+
|
|
100
|
+
return Response.json({ ok: true, message: "νμΌ μ²λ¦¬ μμ" });
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
// URL add endpoint
|
|
104
|
+
if (url.pathname === "/api/add" && req.method === "POST") {
|
|
105
|
+
if (isProcessing) {
|
|
106
|
+
return Response.json({ error: "μ΄λ―Έ μ²λ¦¬ μ€μ
λλ€", status: processingStatus }, { status: 409 });
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
const body = await req.json() as { source: string };
|
|
110
|
+
if (!body.source) {
|
|
111
|
+
return Response.json({ error: "sourceκ° νμν©λλ€" }, { status: 400 });
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
try {
|
|
115
|
+
const { validateUrl } = await import("./ingest/web");
|
|
116
|
+
validateUrl(body.source);
|
|
117
|
+
} catch (e: unknown) {
|
|
118
|
+
const message = e instanceof Error ? e.message : String(e);
|
|
119
|
+
return Response.json({ error: message }, { status: 400 });
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
isProcessing = true;
|
|
123
|
+
processingStatus = "μμ μ€...";
|
|
124
|
+
|
|
125
|
+
(async () => {
|
|
126
|
+
const store = new Store(join(root, DB_FILE));
|
|
127
|
+
try {
|
|
128
|
+
const { ingestUrl } = await import("./services/ingest");
|
|
129
|
+
const currentConfig = loadConfig(root);
|
|
130
|
+
const currentPersona = getActivePersona(currentConfig);
|
|
131
|
+
|
|
132
|
+
await ingestUrl(root, store, body.source, currentConfig.llm, currentPersona, (status) => {
|
|
133
|
+
processingStatus = status;
|
|
134
|
+
});
|
|
135
|
+
|
|
136
|
+
processingStatus = "λΉλ μ€...";
|
|
137
|
+
const { buildSite } = await import("./build/renderer");
|
|
138
|
+
await buildSite(store, loadConfig(root), root);
|
|
139
|
+
|
|
140
|
+
processingStatus = "μλ£!";
|
|
141
|
+
} catch (e: unknown) {
|
|
142
|
+
const message = e instanceof Error ? e.message : String(e);
|
|
143
|
+
processingStatus = `μ€λ₯: ${message}`;
|
|
144
|
+
} finally {
|
|
145
|
+
store.close();
|
|
146
|
+
setTimeout(() => { isProcessing = false; }, 2000);
|
|
147
|
+
}
|
|
148
|
+
})();
|
|
149
|
+
|
|
150
|
+
return Response.json({ ok: true, message: "μ²λ¦¬ μμ" });
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
// Admin API - update LLM settings
|
|
154
|
+
if (url.pathname === "/api/settings" && req.method === "POST") {
|
|
155
|
+
const body = await req.json() as Record<string, string | undefined>;
|
|
156
|
+
const currentConfig = loadConfig(root);
|
|
157
|
+
if (body.wiki_name) currentConfig.project.name = body.wiki_name;
|
|
158
|
+
if (body.provider) currentConfig.llm.provider = body.provider;
|
|
159
|
+
if (body.model) currentConfig.llm.model = body.model;
|
|
160
|
+
if (body.api_key !== undefined) currentConfig.llm.api_key = body.api_key ?? "";
|
|
161
|
+
if (body.endpoint !== undefined) currentConfig.llm.endpoint = body.endpoint ?? "";
|
|
162
|
+
saveConfig(root, currentConfig);
|
|
163
|
+
// Reload config for serve
|
|
164
|
+
Object.assign(config, currentConfig);
|
|
165
|
+
|
|
166
|
+
// Auto-rebuild site with new settings
|
|
167
|
+
(async () => {
|
|
168
|
+
const store = new Store(join(root, DB_FILE));
|
|
169
|
+
try {
|
|
170
|
+
const { buildSite } = await import("./build/renderer");
|
|
171
|
+
await buildSite(store, currentConfig, root);
|
|
172
|
+
console.log("\x1b[32mβ
μ€μ λ³κ²½ ν μ¬μ΄νΈ 리λΉλ μλ£\x1b[0m");
|
|
173
|
+
} catch (e: unknown) {
|
|
174
|
+
const message = e instanceof Error ? e.message : String(e);
|
|
175
|
+
console.error(`\x1b[31mβ 리λΉλ μ€ν¨: ${message}\x1b[0m`);
|
|
176
|
+
} finally {
|
|
177
|
+
store.close();
|
|
178
|
+
}
|
|
179
|
+
})();
|
|
180
|
+
|
|
181
|
+
return Response.json({ ok: true });
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
if (url.pathname === "/api/settings" && req.method === "GET") {
|
|
185
|
+
const currentConfig = loadConfig(root);
|
|
186
|
+
// Mask API key
|
|
187
|
+
const masked = { ...currentConfig.llm, api_key: currentConfig.llm.api_key ? "β’β’β’β’" + currentConfig.llm.api_key.slice(-4) : "" };
|
|
188
|
+
return Response.json(masked);
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
// Persona API
|
|
192
|
+
if (url.pathname === "/api/personas" && req.method === "GET") {
|
|
193
|
+
const currentConfig = loadConfig(root);
|
|
194
|
+
return Response.json({
|
|
195
|
+
personas: currentConfig.personas || [],
|
|
196
|
+
active: currentConfig.active_persona || "",
|
|
197
|
+
});
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
if (url.pathname === "/api/personas" && req.method === "POST") {
|
|
201
|
+
const body = await req.json() as Record<string, unknown>;
|
|
202
|
+
const currentConfig = loadConfig(root);
|
|
203
|
+
if (!currentConfig.personas) currentConfig.personas = [];
|
|
204
|
+
|
|
205
|
+
if (body.action === "add") {
|
|
206
|
+
const persona = body.persona as { name: string; description?: string; system_prompt?: string; content_style?: string };
|
|
207
|
+
const { name, description, system_prompt, content_style } = persona;
|
|
208
|
+
if (!name) return Response.json({ error: "μ΄λ¦μ΄ νμν©λλ€" }, { status: 400 });
|
|
209
|
+
if (currentConfig.personas.find(p => p.name === name)) {
|
|
210
|
+
return Response.json({ error: "μ΄λ―Έ μ‘΄μ¬νλ νλ₯΄μλμ
λλ€" }, { status: 409 });
|
|
211
|
+
}
|
|
212
|
+
currentConfig.personas.push({ name, description: description || "", system_prompt: system_prompt || "", content_style: content_style || "" });
|
|
213
|
+
} else if (body.action === "update") {
|
|
214
|
+
const originalName = body.original_name as string;
|
|
215
|
+
const persona = body.persona as { name: string; description: string; system_prompt: string; content_style: string };
|
|
216
|
+
const idx = currentConfig.personas.findIndex(p => p.name === originalName);
|
|
217
|
+
if (idx === -1) return Response.json({ error: "νλ₯΄μλλ₯Ό μ°Ύμ μ μμ΅λλ€" }, { status: 404 });
|
|
218
|
+
currentConfig.personas[idx] = persona;
|
|
219
|
+
if (currentConfig.active_persona === originalName && persona.name !== originalName) {
|
|
220
|
+
currentConfig.active_persona = persona.name;
|
|
221
|
+
}
|
|
222
|
+
} else if (body.action === "delete") {
|
|
223
|
+
const name = body.name as string;
|
|
224
|
+
currentConfig.personas = currentConfig.personas.filter(p => p.name !== name);
|
|
225
|
+
if (currentConfig.active_persona === name) {
|
|
226
|
+
currentConfig.active_persona = currentConfig.personas[0]?.name || "";
|
|
227
|
+
}
|
|
228
|
+
} else if (body.action === "activate") {
|
|
229
|
+
const name = body.name as string;
|
|
230
|
+
if (!currentConfig.personas.find(p => p.name === name)) {
|
|
231
|
+
return Response.json({ error: "νλ₯΄μλλ₯Ό μ°Ύμ μ μμ΅λλ€" }, { status: 404 });
|
|
232
|
+
}
|
|
233
|
+
currentConfig.active_persona = name;
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
saveConfig(root, currentConfig);
|
|
237
|
+
Object.assign(config, currentConfig);
|
|
238
|
+
return Response.json({ ok: true, personas: currentConfig.personas, active: currentConfig.active_persona });
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
// Build API
|
|
242
|
+
if (url.pathname === "/api/build" && req.method === "POST") {
|
|
243
|
+
if (isProcessing) {
|
|
244
|
+
return Response.json({ error: "μ΄λ―Έ μ²λ¦¬ μ€μ
λλ€" }, { status: 409 });
|
|
245
|
+
}
|
|
246
|
+
isProcessing = true;
|
|
247
|
+
processingStatus = "λΉλ μ€...";
|
|
248
|
+
(async () => {
|
|
249
|
+
const store = new Store(join(root, DB_FILE));
|
|
250
|
+
try {
|
|
251
|
+
const { buildSite } = await import("./build/renderer");
|
|
252
|
+
await buildSite(store, loadConfig(root), root);
|
|
253
|
+
processingStatus = "λΉλ μλ£!";
|
|
254
|
+
console.log("\x1b[32mβ
μλ λΉλ μλ£\x1b[0m");
|
|
255
|
+
} catch (e: unknown) {
|
|
256
|
+
const message = e instanceof Error ? e.message : String(e);
|
|
257
|
+
processingStatus = `λΉλ μ€λ₯: ${message}`;
|
|
258
|
+
} finally {
|
|
259
|
+
store.close();
|
|
260
|
+
setTimeout(() => { isProcessing = false; }, 2000);
|
|
261
|
+
}
|
|
262
|
+
})();
|
|
263
|
+
return Response.json({ ok: true, message: "λΉλ μμ" });
|
|
264
|
+
}
|
|
265
|
+
|
|
266
|
+
// Admin page
|
|
267
|
+
if (url.pathname === "/admin") {
|
|
268
|
+
const store = new Store(join(root, DB_FILE));
|
|
269
|
+
const sources = store.listSourcesMeta();
|
|
270
|
+
const usage = store.getUsageSummary();
|
|
271
|
+
const configData = loadConfig(root);
|
|
272
|
+
store.close();
|
|
273
|
+
|
|
274
|
+
const { renderAdmin } = await import("./build/templates");
|
|
275
|
+
return new Response(renderAdmin({
|
|
276
|
+
wikiName: configData.project.name,
|
|
277
|
+
sources,
|
|
278
|
+
usage,
|
|
279
|
+
llmConfig: configData.llm,
|
|
280
|
+
personas: configData.personas || [],
|
|
281
|
+
activePersona: configData.active_persona || "",
|
|
282
|
+
authToken,
|
|
283
|
+
}), { headers: { "Content-Type": "text/html" } });
|
|
284
|
+
}
|
|
285
|
+
|
|
286
|
+
if (url.pathname === "/api/status") {
|
|
287
|
+
const store = new Store(join(root, DB_FILE));
|
|
288
|
+
const sources = store.listSourcesMeta();
|
|
289
|
+
const sourcePages = store.listSourcePages();
|
|
290
|
+
const conceptPages = store.listConceptPages();
|
|
291
|
+
const links = store.getAllLinks();
|
|
292
|
+
const usage = store.getUsageSummary();
|
|
293
|
+
store.close();
|
|
294
|
+
|
|
295
|
+
return Response.json({
|
|
296
|
+
processing: isProcessing,
|
|
297
|
+
processingStatus,
|
|
298
|
+
sources: sources.length,
|
|
299
|
+
sourcePages: sourcePages.length,
|
|
300
|
+
conceptPages: conceptPages.length,
|
|
301
|
+
links: links.length,
|
|
302
|
+
usage,
|
|
303
|
+
});
|
|
304
|
+
}
|
|
305
|
+
|
|
306
|
+
// ββ Static file serving ββ
|
|
307
|
+
let pathname = url.pathname;
|
|
308
|
+
if (pathname === "/") pathname = "/index.html";
|
|
309
|
+
|
|
310
|
+
const resolved = path.resolve(join(siteDir, pathname));
|
|
311
|
+
if (!resolved.startsWith(path.resolve(siteDir))) {
|
|
312
|
+
return new Response("Forbidden", { status: 403 });
|
|
313
|
+
}
|
|
314
|
+
const staticFile = Bun.file(resolved);
|
|
315
|
+
|
|
316
|
+
if (await staticFile.exists()) {
|
|
317
|
+
const isHtml = pathname.endsWith(".html");
|
|
318
|
+
const cspValue = "default-src 'self'; script-src 'self' 'unsafe-inline' cdn.jsdelivr.net d3js.org; style-src 'self' 'unsafe-inline' cdn.jsdelivr.net fonts.googleapis.com; font-src fonts.gstatic.com; img-src * data:; connect-src 'self'";
|
|
319
|
+
if (isHtml) {
|
|
320
|
+
return new Response(staticFile, { headers: { "Content-Type": "text/html", "Content-Security-Policy": cspValue } });
|
|
321
|
+
}
|
|
322
|
+
return new Response(staticFile);
|
|
323
|
+
}
|
|
324
|
+
return new Response("Not Found", { status: 404 });
|
|
325
|
+
},
|
|
326
|
+
});
|
|
327
|
+
}
|
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
import { Store } from "../store";
|
|
2
|
+
import { type LLMConfig, type Persona } from "../config";
|
|
3
|
+
import { LLMClient, type UsageStats } from "../llm-client";
|
|
4
|
+
|
|
5
|
+
export interface IngestResult {
|
|
6
|
+
sourceCount: number;
|
|
7
|
+
conceptCount: number;
|
|
8
|
+
linkCount: number;
|
|
9
|
+
usage: UsageStats & { estimatedCostUsd: number };
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
export async function ingestUrl(
|
|
13
|
+
root: string,
|
|
14
|
+
store: Store,
|
|
15
|
+
url: string,
|
|
16
|
+
llmConfig: LLMConfig,
|
|
17
|
+
persona: Persona | null,
|
|
18
|
+
onProgress?: (status: string) => void
|
|
19
|
+
): Promise<IngestResult> {
|
|
20
|
+
const client = new LLMClient(llmConfig);
|
|
21
|
+
client.resetUsageStats();
|
|
22
|
+
|
|
23
|
+
const { fetchPage } = await import("../ingest/web");
|
|
24
|
+
const { llmChunkDocument, htmlToRawText } = await import("../pipeline/llm-chunker");
|
|
25
|
+
|
|
26
|
+
onProgress?.("β³ URL κ°μ Έμ€λ μ€...");
|
|
27
|
+
const { title, html } = await fetchPage(url);
|
|
28
|
+
|
|
29
|
+
const source = store.addSource(url, "web", title, html);
|
|
30
|
+
const rawText = htmlToRawText(html);
|
|
31
|
+
|
|
32
|
+
onProgress?.("β³ LLM λΆμ μμ...");
|
|
33
|
+
const { sourceCount, conceptCount } = await llmChunkDocument(rawText, title, source.id, store, 0, persona, client);
|
|
34
|
+
|
|
35
|
+
const u = client.getUsageStats();
|
|
36
|
+
const estimatedCostUsd = client.getEstimatedCost();
|
|
37
|
+
store.addUsageLog(source.id, u.totalCalls, u.promptTokens, u.completionTokens, u.totalTokens, estimatedCostUsd);
|
|
38
|
+
|
|
39
|
+
return {
|
|
40
|
+
sourceCount,
|
|
41
|
+
conceptCount,
|
|
42
|
+
linkCount: 0,
|
|
43
|
+
usage: { ...u, estimatedCostUsd },
|
|
44
|
+
};
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
export async function ingestFile(
|
|
48
|
+
root: string,
|
|
49
|
+
store: Store,
|
|
50
|
+
filePath: string,
|
|
51
|
+
originalName: string,
|
|
52
|
+
llmConfig: LLMConfig,
|
|
53
|
+
persona: Persona | null,
|
|
54
|
+
onProgress?: (status: string) => void
|
|
55
|
+
): Promise<IngestResult> {
|
|
56
|
+
const client = new LLMClient(llmConfig);
|
|
57
|
+
client.resetUsageStats();
|
|
58
|
+
|
|
59
|
+
const { llmChunkDocument } = await import("../pipeline/llm-chunker");
|
|
60
|
+
|
|
61
|
+
const ext = originalName.split(".").pop()?.toLowerCase() || "";
|
|
62
|
+
|
|
63
|
+
let title: string;
|
|
64
|
+
let text: string;
|
|
65
|
+
|
|
66
|
+
if (ext === "pdf") {
|
|
67
|
+
const { extractTextFromPdf } = await import("../ingest/pdf");
|
|
68
|
+
onProgress?.("β³ PDF ν
μ€νΈ μΆμΆ μ€...");
|
|
69
|
+
({ title, text } = await extractTextFromPdf(filePath));
|
|
70
|
+
} else if (ext === "docx") {
|
|
71
|
+
const { extractTextFromDocx } = await import("../ingest/docx");
|
|
72
|
+
onProgress?.("β³ DOCX ν
μ€νΈ μΆμΆ μ€...");
|
|
73
|
+
({ title, text } = await extractTextFromDocx(filePath));
|
|
74
|
+
} else if (ext === "pptx") {
|
|
75
|
+
const { extractTextFromPptx } = await import("../ingest/pptx");
|
|
76
|
+
onProgress?.("β³ PPTX ν
μ€νΈ μΆμΆ μ€...");
|
|
77
|
+
({ title, text } = await extractTextFromPptx(filePath));
|
|
78
|
+
} else {
|
|
79
|
+
const { extractWithTextutil } = await import("../ingest/legacy");
|
|
80
|
+
onProgress?.(`β³ ${ext.toUpperCase()} ν
μ€νΈ μΆμΆ μ€...`);
|
|
81
|
+
({ title, text } = await extractWithTextutil(filePath));
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
const source = store.addSource(filePath, ext, title, "(file)");
|
|
85
|
+
store.deletePagesBySource(source.id);
|
|
86
|
+
|
|
87
|
+
onProgress?.("β³ LLM λΆμ μμ...");
|
|
88
|
+
const { sourceCount, conceptCount } = await llmChunkDocument(text, title, source.id, store, 0, persona, client);
|
|
89
|
+
|
|
90
|
+
const u = client.getUsageStats();
|
|
91
|
+
const estimatedCostUsd = client.getEstimatedCost();
|
|
92
|
+
store.addUsageLog(source.id, u.totalCalls, u.promptTokens, u.completionTokens, u.totalTokens, estimatedCostUsd);
|
|
93
|
+
|
|
94
|
+
return {
|
|
95
|
+
sourceCount,
|
|
96
|
+
conceptCount,
|
|
97
|
+
linkCount: 0,
|
|
98
|
+
usage: { ...u, estimatedCostUsd },
|
|
99
|
+
};
|
|
100
|
+
}
|
|
@@ -0,0 +1,132 @@
|
|
|
1
|
+
import { expect, test, describe, beforeEach, afterEach } from "bun:test";
|
|
2
|
+
import { Store } from "./store";
|
|
3
|
+
|
|
4
|
+
describe("Store", () => {
|
|
5
|
+
let store: Store;
|
|
6
|
+
|
|
7
|
+
beforeEach(() => {
|
|
8
|
+
store = new Store(":memory:");
|
|
9
|
+
store.initSchema();
|
|
10
|
+
});
|
|
11
|
+
|
|
12
|
+
afterEach(() => {
|
|
13
|
+
store.close();
|
|
14
|
+
});
|
|
15
|
+
|
|
16
|
+
test("addSource and listSources", () => {
|
|
17
|
+
const src = store.addSource("file:///test.pdf", "pdf", "Test PDF", "raw content");
|
|
18
|
+
expect(src.id).toBeGreaterThan(0);
|
|
19
|
+
expect(src.uri).toBe("file:///test.pdf");
|
|
20
|
+
expect(src.type).toBe("pdf");
|
|
21
|
+
expect(src.title).toBe("Test PDF");
|
|
22
|
+
|
|
23
|
+
const sources = store.listSources();
|
|
24
|
+
expect(sources).toHaveLength(1);
|
|
25
|
+
expect(sources[0].uri).toBe("file:///test.pdf");
|
|
26
|
+
});
|
|
27
|
+
|
|
28
|
+
test("addSource updates existing source with same URI", () => {
|
|
29
|
+
const src1 = store.addSource("file:///test.pdf", "pdf", "V1", "content1");
|
|
30
|
+
const src2 = store.addSource("file:///test.pdf", "pdf", "V2", "content2");
|
|
31
|
+
expect(src2.id).toBe(src1.id);
|
|
32
|
+
expect(src2.title).toBe("V2");
|
|
33
|
+
expect(src2.raw_content).toBe("content2");
|
|
34
|
+
expect(store.listSources()).toHaveLength(1);
|
|
35
|
+
});
|
|
36
|
+
|
|
37
|
+
test("addPage and getPage by slug", () => {
|
|
38
|
+
const src = store.addSource("file:///test.pdf", "pdf", "Test", "raw");
|
|
39
|
+
const page = store.addPage("test-page", "Test Page", "# Content", src.id, null, "source", 0);
|
|
40
|
+
expect(page.slug).toBe("test-page");
|
|
41
|
+
expect(page.title).toBe("Test Page");
|
|
42
|
+
expect(page.page_type).toBe("source");
|
|
43
|
+
|
|
44
|
+
const fetched = store.getPage("test-page");
|
|
45
|
+
expect(fetched).not.toBeNull();
|
|
46
|
+
expect(fetched!.title).toBe("Test Page");
|
|
47
|
+
|
|
48
|
+
expect(store.getPage("nonexistent")).toBeNull();
|
|
49
|
+
});
|
|
50
|
+
|
|
51
|
+
test("listSourcePages and listConceptPages", () => {
|
|
52
|
+
const src = store.addSource("file:///test.pdf", "pdf", "Test", "raw");
|
|
53
|
+
store.addPage("src-page", "Source Page", "content", src.id, null, "source", 0);
|
|
54
|
+
store.addPage("concept-page", "Concept Page", "content", undefined, undefined, "concept", 0);
|
|
55
|
+
|
|
56
|
+
const sourcePages = store.listSourcePages();
|
|
57
|
+
expect(sourcePages).toHaveLength(1);
|
|
58
|
+
expect(sourcePages[0].slug).toBe("src-page");
|
|
59
|
+
|
|
60
|
+
const conceptPages = store.listConceptPages();
|
|
61
|
+
expect(conceptPages).toHaveLength(1);
|
|
62
|
+
expect(conceptPages[0].slug).toBe("concept-page");
|
|
63
|
+
});
|
|
64
|
+
|
|
65
|
+
test("addLink and getBacklinks", () => {
|
|
66
|
+
const src = store.addSource("file:///test.pdf", "pdf", "Test", "raw");
|
|
67
|
+
const pageA = store.addPage("page-a", "Page A", "content", src.id, null, "source", 0);
|
|
68
|
+
const pageB = store.addPage("page-b", "Page B", "content", src.id, null, "source", 1);
|
|
69
|
+
|
|
70
|
+
store.addLink(pageA.id, pageB.id, "link to B");
|
|
71
|
+
|
|
72
|
+
const backlinks = store.getBacklinks(pageB.id);
|
|
73
|
+
expect(backlinks).toHaveLength(1);
|
|
74
|
+
expect(backlinks[0].slug).toBe("page-a");
|
|
75
|
+
});
|
|
76
|
+
|
|
77
|
+
test("getAllBacklinksGrouped", () => {
|
|
78
|
+
const src = store.addSource("file:///test.pdf", "pdf", "Test", "raw");
|
|
79
|
+
const pageA = store.addPage("page-a", "Page A", "content", src.id, null, "source", 0);
|
|
80
|
+
const pageB = store.addPage("page-b", "Page B", "content", src.id, null, "source", 1);
|
|
81
|
+
const pageC = store.addPage("page-c", "Page C", "content", src.id, null, "source", 2);
|
|
82
|
+
|
|
83
|
+
store.addLink(pageA.id, pageC.id, "link to C from A");
|
|
84
|
+
store.addLink(pageB.id, pageC.id, "link to C from B");
|
|
85
|
+
|
|
86
|
+
const grouped = store.getAllBacklinksGrouped();
|
|
87
|
+
expect(grouped.has(pageC.id)).toBe(true);
|
|
88
|
+
expect(grouped.get(pageC.id)!).toHaveLength(2);
|
|
89
|
+
});
|
|
90
|
+
|
|
91
|
+
test("deletePagesBySource", () => {
|
|
92
|
+
const src = store.addSource("file:///test.pdf", "pdf", "Test", "raw");
|
|
93
|
+
store.addPage("page-1", "Page 1", "content", src.id, null, "source", 0);
|
|
94
|
+
store.addPage("page-2", "Page 2", "content", src.id, null, "source", 1);
|
|
95
|
+
expect(store.listPages()).toHaveLength(2);
|
|
96
|
+
|
|
97
|
+
store.deletePagesBySource(src.id);
|
|
98
|
+
expect(store.listPages()).toHaveLength(0);
|
|
99
|
+
});
|
|
100
|
+
|
|
101
|
+
test("slug uniqueness (duplicate handling via INSERT OR REPLACE)", () => {
|
|
102
|
+
const src = store.addSource("file:///test.pdf", "pdf", "Test", "raw");
|
|
103
|
+
store.addPage("same-slug", "Title V1", "content v1", src.id, null, "source", 0);
|
|
104
|
+
store.addPage("same-slug", "Title V2", "content v2", src.id, null, "source", 0);
|
|
105
|
+
|
|
106
|
+
const page = store.getPage("same-slug");
|
|
107
|
+
expect(page).not.toBeNull();
|
|
108
|
+
expect(page!.title).toBe("Title V2");
|
|
109
|
+
expect(page!.content).toBe("content v2");
|
|
110
|
+
});
|
|
111
|
+
|
|
112
|
+
test("listSourcesMeta excludes raw_content", () => {
|
|
113
|
+
store.addSource("file:///test.pdf", "pdf", "Test", "some large raw content here");
|
|
114
|
+
const meta = store.listSourcesMeta();
|
|
115
|
+
expect(meta).toHaveLength(1);
|
|
116
|
+
expect(meta[0].title).toBe("Test");
|
|
117
|
+
expect(meta[0]).not.toHaveProperty("raw_content");
|
|
118
|
+
});
|
|
119
|
+
|
|
120
|
+
test("addUsageLog and getUsageSummary", () => {
|
|
121
|
+
const src = store.addSource("file:///test.pdf", "pdf", "Test", "raw");
|
|
122
|
+
store.addUsageLog(src.id, 2, 100, 50, 150, 0.005);
|
|
123
|
+
store.addUsageLog(src.id, 3, 200, 100, 300, 0.01);
|
|
124
|
+
|
|
125
|
+
const summary = store.getUsageSummary();
|
|
126
|
+
expect(summary.totalCalls).toBe(5);
|
|
127
|
+
expect(summary.promptTokens).toBe(300);
|
|
128
|
+
expect(summary.completionTokens).toBe(150);
|
|
129
|
+
expect(summary.totalTokens).toBe(450);
|
|
130
|
+
expect(summary.totalCost).toBeCloseTo(0.015, 5);
|
|
131
|
+
});
|
|
132
|
+
});
|