@open330/kiwimu 0.4.1 → 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +98 -49
- package/bin/kiwimu +1 -1
- package/package.json +4 -1
- package/personas/namuwiki.json +6 -0
- package/src/build/renderer.ts +50 -2
- package/src/build/static/search.js +33 -2
- package/src/build/static/style.css +84 -1
- package/src/build/templates.ts +353 -167
- package/src/config.ts +35 -29
- package/src/demo/sample-data.ts +70 -0
- package/src/demo/setup.ts +31 -0
- package/src/expand/llm.ts +1 -1
- package/src/index.ts +234 -458
- package/src/ingest/docx.ts +0 -8
- package/src/ingest/legacy.ts +4 -4
- package/src/ingest/pdf.ts +1 -1
- package/src/ingest/pptx.ts +0 -1
- package/src/ingest/web.test.ts +41 -0
- package/src/ingest/web.ts +61 -62
- package/src/llm-client.ts +203 -126
- package/src/pipeline/chunker.test.ts +42 -0
- package/src/pipeline/chunker.ts +1 -48
- package/src/pipeline/llm-chunker.ts +144 -59
- package/src/server.ts +327 -0
- package/src/services/ingest.ts +100 -0
- package/src/store.test.ts +132 -0
- package/src/store.ts +206 -2
- package/src/pipeline/llm-linker.ts +0 -84
package/src/ingest/docx.ts
CHANGED
|
@@ -5,11 +5,3 @@ export async function extractTextFromDocx(filePath: string): Promise<{ title: st
|
|
|
5
5
|
const title = filePath.split("/").pop()?.replace(/\.docx?$/i, "") || "Untitled";
|
|
6
6
|
return { title, text };
|
|
7
7
|
}
|
|
8
|
-
|
|
9
|
-
export async function extractHtmlFromDocx(filePath: string): Promise<{ title: string; html: string }> {
|
|
10
|
-
const mammoth = require("mammoth");
|
|
11
|
-
const result = await mammoth.convertToHtml({ path: filePath });
|
|
12
|
-
const html: string = result.value;
|
|
13
|
-
const title = filePath.split("/").pop()?.replace(/\.docx?$/i, "") || "Untitled";
|
|
14
|
-
return { title, html };
|
|
15
|
-
}
|
package/src/ingest/legacy.ts
CHANGED
|
@@ -9,7 +9,7 @@ export async function extractWithTextutil(filePath: string): Promise<{ title: st
|
|
|
9
9
|
const textutilFormats = new Set(["doc", "rtf", "odt"]);
|
|
10
10
|
|
|
11
11
|
if (textutilFormats.has(ext)) {
|
|
12
|
-
const proc = Bun.spawn(["textutil", "-convert", "txt", "-stdout", filePath], {
|
|
12
|
+
const proc = Bun.spawn(["textutil", "-convert", "txt", "-stdout", "--", filePath], {
|
|
13
13
|
stdout: "pipe",
|
|
14
14
|
stderr: "pipe",
|
|
15
15
|
});
|
|
@@ -26,12 +26,12 @@ export async function extractWithTextutil(filePath: string): Promise<{ title: st
|
|
|
26
26
|
if (ext === "key") {
|
|
27
27
|
// Try to extract text using mdimport/spotlight metadata
|
|
28
28
|
try {
|
|
29
|
-
const proc = Bun.spawn(["mdimport", "-d2", filePath], { stdout: "pipe", stderr: "pipe" });
|
|
29
|
+
const proc = Bun.spawn(["mdimport", "-d2", "--", filePath], { stdout: "pipe", stderr: "pipe" });
|
|
30
30
|
await proc.exited;
|
|
31
31
|
} catch {}
|
|
32
32
|
|
|
33
33
|
// Keynote files are directories or zip-like packages. Try strings extraction.
|
|
34
|
-
const proc = Bun.spawn(["strings", filePath], { stdout: "pipe", stderr: "pipe" });
|
|
34
|
+
const proc = Bun.spawn(["strings", "--", filePath], { stdout: "pipe", stderr: "pipe" });
|
|
35
35
|
const raw = await new Response(proc.stdout).text();
|
|
36
36
|
await proc.exited;
|
|
37
37
|
|
|
@@ -50,7 +50,7 @@ export async function extractWithTextutil(filePath: string): Promise<{ title: st
|
|
|
50
50
|
|
|
51
51
|
// For .ppt (legacy PowerPoint), try textutil or strings
|
|
52
52
|
if (ext === "ppt") {
|
|
53
|
-
const proc = Bun.spawn(["strings", filePath], { stdout: "pipe", stderr: "pipe" });
|
|
53
|
+
const proc = Bun.spawn(["strings", "--", filePath], { stdout: "pipe", stderr: "pipe" });
|
|
54
54
|
const raw = await new Response(proc.stdout).text();
|
|
55
55
|
await proc.exited;
|
|
56
56
|
|
package/src/ingest/pdf.ts
CHANGED
package/src/ingest/pptx.ts
CHANGED
|
@@ -2,7 +2,6 @@ import { readFileSync } from "fs";
|
|
|
2
2
|
|
|
3
3
|
export async function extractTextFromPptx(filePath: string): Promise<{ title: string; text: string }> {
|
|
4
4
|
// PPTX is a ZIP containing XML files
|
|
5
|
-
const { Decompress } = await import("bun");
|
|
6
5
|
const JSZip = (await import("jszip")).default;
|
|
7
6
|
|
|
8
7
|
const buffer = readFileSync(filePath);
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
import { expect, test, describe } from "bun:test";
|
|
2
|
+
import { validateUrl } from "./web";
|
|
3
|
+
|
|
4
|
+
describe("validateUrl", () => {
|
|
5
|
+
test("정상 HTTP URL 허용", () => {
|
|
6
|
+
expect(() => validateUrl("http://example.com")).not.toThrow();
|
|
7
|
+
});
|
|
8
|
+
test("정상 HTTPS URL 허용", () => {
|
|
9
|
+
expect(() => validateUrl("https://example.com/page")).not.toThrow();
|
|
10
|
+
});
|
|
11
|
+
test("localhost 차단", () => {
|
|
12
|
+
expect(() => validateUrl("http://localhost:3000")).toThrow();
|
|
13
|
+
});
|
|
14
|
+
test("127.0.0.1 차단", () => {
|
|
15
|
+
expect(() => validateUrl("http://127.0.0.1")).toThrow();
|
|
16
|
+
});
|
|
17
|
+
test("10.x.x.x 차단", () => {
|
|
18
|
+
expect(() => validateUrl("http://10.0.0.1")).toThrow();
|
|
19
|
+
});
|
|
20
|
+
test("172.16.x.x 차단", () => {
|
|
21
|
+
expect(() => validateUrl("http://172.16.0.1")).toThrow();
|
|
22
|
+
});
|
|
23
|
+
test("192.168.x.x 차단", () => {
|
|
24
|
+
expect(() => validateUrl("http://192.168.1.1")).toThrow();
|
|
25
|
+
});
|
|
26
|
+
test("169.254.x.x 차단", () => {
|
|
27
|
+
expect(() => validateUrl("http://169.254.169.254")).toThrow();
|
|
28
|
+
});
|
|
29
|
+
test("file:// 프로토콜 차단", () => {
|
|
30
|
+
expect(() => validateUrl("file:///etc/passwd")).toThrow();
|
|
31
|
+
});
|
|
32
|
+
test("ftp:// 프로토콜 차단", () => {
|
|
33
|
+
expect(() => validateUrl("ftp://example.com")).toThrow();
|
|
34
|
+
});
|
|
35
|
+
test(".local 도메인 차단", () => {
|
|
36
|
+
expect(() => validateUrl("http://server.local")).toThrow();
|
|
37
|
+
});
|
|
38
|
+
test("0.0.0.0 차단", () => {
|
|
39
|
+
expect(() => validateUrl("http://0.0.0.0")).toThrow();
|
|
40
|
+
});
|
|
41
|
+
});
|
package/src/ingest/web.ts
CHANGED
|
@@ -1,77 +1,76 @@
|
|
|
1
1
|
import * as cheerio from "cheerio";
|
|
2
|
+
import { URL } from "url";
|
|
2
3
|
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
4
|
+
/**
|
|
5
|
+
* Validate a URL to prevent SSRF attacks.
|
|
6
|
+
* Blocks private/internal IP ranges and non-http(s) schemes.
|
|
7
|
+
*/
|
|
8
|
+
export function validateUrl(urlStr: string): void {
|
|
9
|
+
let parsed: URL;
|
|
10
|
+
try {
|
|
11
|
+
parsed = new URL(urlStr);
|
|
12
|
+
} catch {
|
|
13
|
+
throw new Error("유효하지 않은 URL입니다");
|
|
14
|
+
}
|
|
14
15
|
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
});
|
|
19
|
-
if (!resp.ok) throw new Error(`Failed to fetch ${url}: ${resp.status}`);
|
|
20
|
-
const html = await resp.text();
|
|
21
|
-
const $ = cheerio.load(html);
|
|
22
|
-
const title = $("title").text().trim() || url;
|
|
23
|
-
const body = $("body").html() || html;
|
|
24
|
-
return { title, html: body };
|
|
25
|
-
}
|
|
16
|
+
if (parsed.protocol !== "http:" && parsed.protocol !== "https:") {
|
|
17
|
+
throw new Error("http 또는 https URL만 허용됩니다");
|
|
18
|
+
}
|
|
26
19
|
|
|
27
|
-
|
|
28
|
-
const $ = cheerio.load(html, null, false);
|
|
29
|
-
const sections: Section[] = [];
|
|
30
|
-
let current: Section = { level: 1, title: "Introduction", htmlParts: [] };
|
|
20
|
+
const hostname = parsed.hostname;
|
|
31
21
|
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
22
|
+
// Block IP-based hostnames in private ranges
|
|
23
|
+
// IPv4 pattern
|
|
24
|
+
const ipv4Match = hostname.match(/^(\d{1,3})\.(\d{1,3})\.(\d{1,3})\.(\d{1,3})$/);
|
|
25
|
+
if (ipv4Match) {
|
|
26
|
+
const [, a, b, c, d] = ipv4Match.map(Number);
|
|
27
|
+
if (
|
|
28
|
+
a === 127 || // 127.0.0.0/8
|
|
29
|
+
a === 10 || // 10.0.0.0/8
|
|
30
|
+
(a === 172 && b >= 16 && b <= 31) || // 172.16.0.0/12
|
|
31
|
+
(a === 192 && b === 168) || // 192.168.0.0/16
|
|
32
|
+
(a === 169 && b === 254) || // 169.254.0.0/16
|
|
33
|
+
(a === 0 && b === 0 && c === 0 && d === 0) // 0.0.0.0
|
|
34
|
+
) {
|
|
35
|
+
throw new Error("내부 네트워크 주소는 허용되지 않습니다");
|
|
36
|
+
}
|
|
37
|
+
}
|
|
35
38
|
|
|
36
|
-
|
|
39
|
+
// Block common private hostnames
|
|
40
|
+
if (hostname === "localhost" || hostname === "[::1]" || hostname.endsWith(".local")) {
|
|
41
|
+
throw new Error("내부 네트워크 주소는 허용되지 않습니다");
|
|
42
|
+
}
|
|
43
|
+
}
|
|
37
44
|
|
|
38
|
-
|
|
45
|
+
export async function fetchPage(url: string): Promise<{ title: string; html: string }> {
|
|
46
|
+
validateUrl(url);
|
|
39
47
|
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
sections.push(current);
|
|
43
|
-
}
|
|
44
|
-
current = {
|
|
45
|
-
level: parseInt(tagName[1]),
|
|
46
|
-
title: $(el).text().trim(),
|
|
47
|
-
htmlParts: [],
|
|
48
|
-
};
|
|
49
|
-
return;
|
|
50
|
-
}
|
|
48
|
+
let currentUrl = url;
|
|
49
|
+
const maxRedirects = 5;
|
|
51
50
|
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
}
|
|
51
|
+
for (let i = 0; i <= maxRedirects; i++) {
|
|
52
|
+
const resp = await fetch(currentUrl, {
|
|
53
|
+
headers: { "User-Agent": "kiwimu/0.4 (learning wiki builder)" },
|
|
54
|
+
redirect: "manual",
|
|
55
|
+
});
|
|
58
56
|
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
57
|
+
if (resp.status >= 300 && resp.status < 400) {
|
|
58
|
+
const location = resp.headers.get("location");
|
|
59
|
+
if (!location) throw new Error(`Redirect without location header from ${currentUrl}`);
|
|
60
|
+
// Resolve relative redirect URLs
|
|
61
|
+
const redirectUrl = new URL(location, currentUrl).href;
|
|
62
|
+
validateUrl(redirectUrl); // Re-validate redirect target to prevent SSRF bypass
|
|
63
|
+
currentUrl = redirectUrl;
|
|
64
|
+
continue;
|
|
63
65
|
}
|
|
64
|
-
}
|
|
65
|
-
|
|
66
|
-
// Walk root children
|
|
67
|
-
const root = $.root();
|
|
68
|
-
for (const child of root.contents().toArray()) {
|
|
69
|
-
walk(child);
|
|
70
|
-
}
|
|
71
66
|
|
|
72
|
-
|
|
73
|
-
|
|
67
|
+
if (!resp.ok) throw new Error(`Failed to fetch ${currentUrl}: ${resp.status}`);
|
|
68
|
+
const html = await resp.text();
|
|
69
|
+
const $ = cheerio.load(html);
|
|
70
|
+
const title = $("title").text().trim() || url;
|
|
71
|
+
const body = $("body").html() || html;
|
|
72
|
+
return { title, html: body };
|
|
74
73
|
}
|
|
75
74
|
|
|
76
|
-
|
|
75
|
+
throw new Error(`Too many redirects fetching ${url}`);
|
|
77
76
|
}
|
package/src/llm-client.ts
CHANGED
|
@@ -8,73 +8,14 @@ export interface UsageStats {
|
|
|
8
8
|
totalTokens: number;
|
|
9
9
|
}
|
|
10
10
|
|
|
11
|
-
const _usage: UsageStats = {
|
|
12
|
-
totalCalls: 0,
|
|
13
|
-
promptTokens: 0,
|
|
14
|
-
completionTokens: 0,
|
|
15
|
-
totalTokens: 0,
|
|
16
|
-
};
|
|
17
|
-
|
|
18
|
-
let _llmConfig: LLMConfig | null = null;
|
|
19
|
-
|
|
20
|
-
export function setLLMConfig(config: LLMConfig): void {
|
|
21
|
-
_llmConfig = config;
|
|
22
|
-
}
|
|
23
|
-
|
|
24
|
-
export function getLLMConfig(): LLMConfig {
|
|
25
|
-
if (!_llmConfig) throw new Error("LLM config not set. Call setLLMConfig() first.");
|
|
26
|
-
return _llmConfig;
|
|
27
|
-
}
|
|
28
|
-
|
|
29
|
-
export function getUsageStats(): UsageStats {
|
|
30
|
-
return { ..._usage };
|
|
31
|
-
}
|
|
32
|
-
|
|
33
|
-
export function resetUsageStats(): void {
|
|
34
|
-
_usage.totalCalls = 0;
|
|
35
|
-
_usage.promptTokens = 0;
|
|
36
|
-
_usage.completionTokens = 0;
|
|
37
|
-
_usage.totalTokens = 0;
|
|
38
|
-
}
|
|
39
|
-
|
|
40
|
-
export function getEstimatedCost(): number {
|
|
41
|
-
const config = _llmConfig;
|
|
42
|
-
if (!config) return 0;
|
|
43
|
-
|
|
44
|
-
// Pricing per 1M tokens (approximate)
|
|
45
|
-
const pricing: Record<string, { input: number; output: number }> = {
|
|
46
|
-
"gemini": { input: 0.075, output: 0.30 },
|
|
47
|
-
"azure-openai": { input: 0.10, output: 0.40 },
|
|
48
|
-
"openai": { input: 0.15, output: 0.60 },
|
|
49
|
-
"anthropic": { input: 3.00, output: 15.00 },
|
|
50
|
-
};
|
|
51
|
-
const p = pricing[config.provider] || pricing["gemini"];
|
|
52
|
-
return (_usage.promptTokens / 1_000_000) * p.input + (_usage.completionTokens / 1_000_000) * p.output;
|
|
53
|
-
}
|
|
54
|
-
|
|
55
|
-
export function printUsageSummary(): void {
|
|
56
|
-
const u = _usage;
|
|
57
|
-
const cost = getEstimatedCost();
|
|
58
|
-
const provider = _llmConfig?.provider || "unknown";
|
|
59
|
-
const model = _llmConfig?.model || "unknown";
|
|
60
|
-
|
|
61
|
-
console.log(`\x1b[34m📊 LLM 사용량 (${provider}/${model}):\x1b[0m`);
|
|
62
|
-
console.log(` 호출 횟수: ${u.totalCalls}회`);
|
|
63
|
-
console.log(` 입력 토큰: ${u.promptTokens.toLocaleString()}`);
|
|
64
|
-
console.log(` 출력 토큰: ${u.completionTokens.toLocaleString()}`);
|
|
65
|
-
console.log(` 총 토큰: ${u.totalTokens.toLocaleString()}`);
|
|
66
|
-
console.log(` 예상 비용: ~$${cost.toFixed(4)}`);
|
|
67
|
-
}
|
|
68
|
-
|
|
69
11
|
// ── Provider implementations ──
|
|
70
12
|
|
|
71
|
-
async function geminiComplete(system: string, userMessage: string, maxTokens: number): Promise<{ text: string; usage?:
|
|
72
|
-
const
|
|
73
|
-
const url = `https://generativelanguage.googleapis.com/v1beta/models/${config.model}:generateContent?key=${config.api_key}`;
|
|
13
|
+
async function geminiComplete(config: LLMConfig, system: string, userMessage: string, maxTokens: number): Promise<{ text: string; usage?: { prompt_tokens: number; completion_tokens: number; total_tokens: number } }> {
|
|
14
|
+
const url = `https://generativelanguage.googleapis.com/v1beta/models/${config.model}:generateContent`;
|
|
74
15
|
|
|
75
16
|
const resp = await fetch(url, {
|
|
76
17
|
method: "POST",
|
|
77
|
-
headers: { "Content-Type": "application/json" },
|
|
18
|
+
headers: { "Content-Type": "application/json", "x-goog-api-key": config.api_key },
|
|
78
19
|
body: JSON.stringify({
|
|
79
20
|
system_instruction: { parts: [{ text: system }] },
|
|
80
21
|
contents: [{ parts: [{ text: userMessage }] }],
|
|
@@ -87,9 +28,10 @@ async function geminiComplete(system: string, userMessage: string, maxTokens: nu
|
|
|
87
28
|
throw new Error(`Gemini API error (${resp.status}): ${err.slice(0, 200)}`);
|
|
88
29
|
}
|
|
89
30
|
|
|
90
|
-
const data = await resp.json()
|
|
91
|
-
const
|
|
92
|
-
const
|
|
31
|
+
const data = await resp.json() as Record<string, unknown>;
|
|
32
|
+
const candidates = data.candidates as Array<{ content: { parts: Array<{ text: string }> } }> | undefined;
|
|
33
|
+
const text = candidates?.[0]?.content?.parts?.[0]?.text || "";
|
|
34
|
+
const usage = data.usageMetadata as { promptTokenCount?: number; candidatesTokenCount?: number; totalTokenCount?: number } | undefined;
|
|
93
35
|
return {
|
|
94
36
|
text,
|
|
95
37
|
usage: usage ? {
|
|
@@ -100,78 +42,213 @@ async function geminiComplete(system: string, userMessage: string, maxTokens: nu
|
|
|
100
42
|
};
|
|
101
43
|
}
|
|
102
44
|
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
45
|
+
// ── Class-based LLM client ──
|
|
46
|
+
|
|
47
|
+
type ProviderResult = { text: string; usage?: { prompt_tokens: number; completion_tokens: number; total_tokens: number } };
|
|
48
|
+
|
|
49
|
+
export class LLMClient {
|
|
50
|
+
private config: LLMConfig;
|
|
51
|
+
private usage: UsageStats = { totalCalls: 0, promptTokens: 0, completionTokens: 0, totalTokens: 0 };
|
|
52
|
+
private _openaiClient: InstanceType<typeof import("openai").default> | null = null;
|
|
53
|
+
private _anthropicClient: InstanceType<typeof import("@anthropic-ai/sdk").default> | null = null;
|
|
54
|
+
private _azureClient: InstanceType<typeof import("openai").AzureOpenAI> | null = null;
|
|
55
|
+
|
|
56
|
+
constructor(config: LLMConfig) {
|
|
57
|
+
this.config = config;
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
private async azureComplete(system: string, userMessage: string, maxTokens: number): Promise<ProviderResult> {
|
|
61
|
+
let apiKey = this.config.api_key;
|
|
62
|
+
let endpoint = this.config.endpoint;
|
|
63
|
+
let model = this.config.model;
|
|
64
|
+
|
|
65
|
+
if (!apiKey) {
|
|
66
|
+
try {
|
|
67
|
+
const keyFile = `${process.env.HOME}/keys/openai.azure.com/${this.config.model}.json`;
|
|
68
|
+
const raw = require("fs").readFileSync(keyFile, "utf-8");
|
|
69
|
+
const keyConfig = JSON.parse(raw)[0] as { key: string; endpoint: string; deployment: string };
|
|
70
|
+
apiKey = keyConfig.key;
|
|
71
|
+
endpoint = keyConfig.endpoint.split("/openai/")[0];
|
|
72
|
+
model = keyConfig.deployment;
|
|
73
|
+
} catch {
|
|
74
|
+
throw new Error("Azure OpenAI API key not configured");
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
if (!this._azureClient) {
|
|
79
|
+
const { AzureOpenAI } = await import("openai");
|
|
80
|
+
this._azureClient = new AzureOpenAI({ endpoint, apiKey, deployment: model, apiVersion: "2024-12-01-preview" });
|
|
121
81
|
}
|
|
82
|
+
|
|
83
|
+
const resp = await this._azureClient.chat.completions.create({
|
|
84
|
+
model: model,
|
|
85
|
+
max_completion_tokens: maxTokens,
|
|
86
|
+
messages: [
|
|
87
|
+
{ role: "system", content: system },
|
|
88
|
+
{ role: "user", content: userMessage },
|
|
89
|
+
],
|
|
90
|
+
});
|
|
91
|
+
|
|
92
|
+
return {
|
|
93
|
+
text: resp.choices[0]?.message?.content || "",
|
|
94
|
+
usage: resp.usage ? {
|
|
95
|
+
prompt_tokens: resp.usage.prompt_tokens || 0,
|
|
96
|
+
completion_tokens: resp.usage.completion_tokens || 0,
|
|
97
|
+
total_tokens: resp.usage.total_tokens || 0,
|
|
98
|
+
} : undefined,
|
|
99
|
+
};
|
|
122
100
|
}
|
|
123
101
|
|
|
124
|
-
|
|
125
|
-
|
|
102
|
+
private async openaiComplete(system: string, userMessage: string, maxTokens: number): Promise<ProviderResult> {
|
|
103
|
+
const { default: OpenAI } = await import("openai");
|
|
104
|
+
if (!this._openaiClient) {
|
|
105
|
+
this._openaiClient = new OpenAI({ apiKey: this.config.api_key });
|
|
106
|
+
}
|
|
107
|
+
const resp = await this._openaiClient.chat.completions.create({
|
|
108
|
+
model: this.config.model || "gpt-4o",
|
|
109
|
+
messages: [
|
|
110
|
+
{ role: "system", content: system },
|
|
111
|
+
{ role: "user", content: userMessage },
|
|
112
|
+
],
|
|
113
|
+
max_tokens: maxTokens,
|
|
114
|
+
});
|
|
115
|
+
return {
|
|
116
|
+
text: resp.choices[0]?.message?.content || "",
|
|
117
|
+
usage: resp.usage ? {
|
|
118
|
+
prompt_tokens: resp.usage.prompt_tokens || 0,
|
|
119
|
+
completion_tokens: resp.usage.completion_tokens || 0,
|
|
120
|
+
total_tokens: resp.usage.total_tokens || 0,
|
|
121
|
+
} : undefined,
|
|
122
|
+
};
|
|
123
|
+
}
|
|
126
124
|
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
125
|
+
private async anthropicComplete(system: string, userMessage: string, maxTokens: number): Promise<ProviderResult> {
|
|
126
|
+
const { default: Anthropic } = await import("@anthropic-ai/sdk");
|
|
127
|
+
if (!this._anthropicClient) {
|
|
128
|
+
this._anthropicClient = new Anthropic({ apiKey: this.config.api_key });
|
|
129
|
+
}
|
|
130
|
+
const resp = await this._anthropicClient.messages.create({
|
|
131
|
+
model: this.config.model || "claude-sonnet-4-20250514",
|
|
132
|
+
max_tokens: maxTokens,
|
|
133
|
+
system: system,
|
|
134
|
+
messages: [{ role: "user", content: userMessage }],
|
|
135
|
+
});
|
|
136
|
+
const content = resp.content[0]?.type === "text" ? resp.content[0].text : "";
|
|
137
|
+
return {
|
|
138
|
+
text: content,
|
|
139
|
+
usage: resp.usage ? {
|
|
140
|
+
prompt_tokens: resp.usage.input_tokens || 0,
|
|
141
|
+
completion_tokens: resp.usage.output_tokens || 0,
|
|
142
|
+
total_tokens: (resp.usage.input_tokens || 0) + (resp.usage.output_tokens || 0),
|
|
143
|
+
} : undefined,
|
|
144
|
+
};
|
|
145
|
+
}
|
|
135
146
|
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
147
|
+
async chatComplete(system: string, userMessage: string, maxTokens = 8192): Promise<string> {
|
|
148
|
+
let result: ProviderResult;
|
|
149
|
+
|
|
150
|
+
switch (this.config.provider) {
|
|
151
|
+
case "gemini":
|
|
152
|
+
result = await geminiComplete(this.config, system, userMessage, maxTokens);
|
|
153
|
+
break;
|
|
154
|
+
case "azure-openai":
|
|
155
|
+
result = await this.azureComplete(system, userMessage, maxTokens);
|
|
156
|
+
break;
|
|
157
|
+
case "openai":
|
|
158
|
+
result = await this.openaiComplete(system, userMessage, maxTokens);
|
|
159
|
+
break;
|
|
160
|
+
case "anthropic":
|
|
161
|
+
result = await this.anthropicComplete(system, userMessage, maxTokens);
|
|
162
|
+
break;
|
|
163
|
+
default:
|
|
164
|
+
throw new Error(`Unknown LLM provider: ${this.config.provider}`);
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
// Track usage
|
|
168
|
+
if (result.usage) {
|
|
169
|
+
this.usage.totalCalls++;
|
|
170
|
+
this.usage.promptTokens += result.usage.prompt_tokens || 0;
|
|
171
|
+
this.usage.completionTokens += result.usage.completion_tokens || 0;
|
|
172
|
+
this.usage.totalTokens += result.usage.total_tokens || 0;
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
return result.text;
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
getUsageStats(): UsageStats {
|
|
179
|
+
return { ...this.usage };
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
resetUsageStats(): void {
|
|
183
|
+
this.usage.totalCalls = 0;
|
|
184
|
+
this.usage.promptTokens = 0;
|
|
185
|
+
this.usage.completionTokens = 0;
|
|
186
|
+
this.usage.totalTokens = 0;
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
getEstimatedCost(): number {
|
|
190
|
+
// Pricing per 1M tokens (approximate)
|
|
191
|
+
const pricing: Record<string, { input: number; output: number }> = {
|
|
192
|
+
"gemini": { input: 0.075, output: 0.30 },
|
|
193
|
+
"azure-openai": { input: 0.10, output: 0.40 },
|
|
194
|
+
"openai": { input: 2.50, output: 10.00 },
|
|
195
|
+
"anthropic": { input: 3.00, output: 15.00 },
|
|
196
|
+
};
|
|
197
|
+
const p = pricing[this.config.provider] || pricing["gemini"];
|
|
198
|
+
return (this.usage.promptTokens / 1_000_000) * p.input + (this.usage.completionTokens / 1_000_000) * p.output;
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
printUsageSummary(): void {
|
|
202
|
+
const u = this.usage;
|
|
203
|
+
const cost = this.getEstimatedCost();
|
|
204
|
+
|
|
205
|
+
console.log(`\x1b[34m📊 LLM 사용량 (${this.config.provider}/${this.config.model}):\x1b[0m`);
|
|
206
|
+
console.log(` 호출 횟수: ${u.totalCalls}회`);
|
|
207
|
+
console.log(` 입력 토큰: ${u.promptTokens.toLocaleString()}`);
|
|
208
|
+
console.log(` 출력 토큰: ${u.completionTokens.toLocaleString()}`);
|
|
209
|
+
console.log(` 총 토큰: ${u.totalTokens.toLocaleString()}`);
|
|
210
|
+
console.log(` 예상 비용: ~$${cost.toFixed(4)}`);
|
|
211
|
+
}
|
|
144
212
|
}
|
|
145
213
|
|
|
146
|
-
// ──
|
|
214
|
+
// ── Deprecated global state wrappers (for backward compatibility) ──
|
|
215
|
+
|
|
216
|
+
/** @deprecated Use LLMClient class instead */
|
|
217
|
+
let _globalClient: LLMClient | null = null;
|
|
147
218
|
|
|
219
|
+
/** @deprecated Use `new LLMClient(config)` instead */
|
|
220
|
+
export function setLLMConfig(config: LLMConfig): void {
|
|
221
|
+
_globalClient = new LLMClient(config);
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
/** @deprecated Use LLMClient instance methods instead */
|
|
225
|
+
export function getUsageStats(): UsageStats {
|
|
226
|
+
if (!_globalClient) return { totalCalls: 0, promptTokens: 0, completionTokens: 0, totalTokens: 0 };
|
|
227
|
+
return _globalClient.getUsageStats();
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
/** @deprecated Use LLMClient instance methods instead */
|
|
231
|
+
export function resetUsageStats(): void {
|
|
232
|
+
if (_globalClient) _globalClient.resetUsageStats();
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
/** @deprecated Use LLMClient instance methods instead */
|
|
236
|
+
export function getEstimatedCost(): number {
|
|
237
|
+
if (!_globalClient) return 0;
|
|
238
|
+
return _globalClient.getEstimatedCost();
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
/** @deprecated Use LLMClient instance methods instead */
|
|
242
|
+
export function printUsageSummary(): void {
|
|
243
|
+
if (_globalClient) _globalClient.printUsageSummary();
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
/** @deprecated Use LLMClient instance methods instead */
|
|
148
247
|
export async function chatComplete(
|
|
149
248
|
system: string,
|
|
150
249
|
userMessage: string,
|
|
151
250
|
maxTokens = 8192
|
|
152
251
|
): Promise<string> {
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
let result: { text: string; usage?: any };
|
|
156
|
-
|
|
157
|
-
switch (config.provider) {
|
|
158
|
-
case "gemini":
|
|
159
|
-
result = await geminiComplete(system, userMessage, maxTokens);
|
|
160
|
-
break;
|
|
161
|
-
case "azure-openai":
|
|
162
|
-
result = await azureOpenAIComplete(system, userMessage, maxTokens);
|
|
163
|
-
break;
|
|
164
|
-
default:
|
|
165
|
-
throw new Error(`Unknown LLM provider: ${config.provider}`);
|
|
166
|
-
}
|
|
167
|
-
|
|
168
|
-
// Track usage
|
|
169
|
-
if (result.usage) {
|
|
170
|
-
_usage.totalCalls++;
|
|
171
|
-
_usage.promptTokens += result.usage.prompt_tokens || 0;
|
|
172
|
-
_usage.completionTokens += result.usage.completion_tokens || 0;
|
|
173
|
-
_usage.totalTokens += result.usage.total_tokens || 0;
|
|
174
|
-
}
|
|
175
|
-
|
|
176
|
-
return result.text;
|
|
252
|
+
if (!_globalClient) throw new Error("LLM config not set. Call setLLMConfig() first.");
|
|
253
|
+
return _globalClient.chatComplete(system, userMessage, maxTokens);
|
|
177
254
|
}
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
import { expect, test, describe } from "bun:test";
|
|
2
|
+
import { slugify, cleanTitle } from "./chunker";
|
|
3
|
+
|
|
4
|
+
describe("slugify", () => {
|
|
5
|
+
test("영어 텍스트", () => {
|
|
6
|
+
expect(slugify("Hello World")).toBe("hello-world");
|
|
7
|
+
});
|
|
8
|
+
test("한국어 텍스트", () => {
|
|
9
|
+
expect(slugify("양자역학")).toBe("양자역학");
|
|
10
|
+
});
|
|
11
|
+
test("한영 혼합", () => {
|
|
12
|
+
expect(slugify("Chapter 3 양자역학")).toBe("chapter-3-양자역학");
|
|
13
|
+
});
|
|
14
|
+
test("특수문자 제거", () => {
|
|
15
|
+
expect(slugify("Hello! @World#")).toBe("hello-world");
|
|
16
|
+
});
|
|
17
|
+
test("빈 문자열", () => {
|
|
18
|
+
expect(slugify("")).toBe("");
|
|
19
|
+
});
|
|
20
|
+
test("연속 공백/하이픈", () => {
|
|
21
|
+
expect(slugify("hello world---test")).toBe("hello-world-test");
|
|
22
|
+
});
|
|
23
|
+
test("80자 제한", () => {
|
|
24
|
+
const long = "a".repeat(100);
|
|
25
|
+
expect(slugify(long).length).toBeLessThanOrEqual(80);
|
|
26
|
+
});
|
|
27
|
+
test("한글 자모", () => {
|
|
28
|
+
expect(slugify("ㅋㅋㅋ 테스트")).toBe("ㅋㅋㅋ-테스트");
|
|
29
|
+
});
|
|
30
|
+
});
|
|
31
|
+
|
|
32
|
+
describe("cleanTitle", () => {
|
|
33
|
+
test("Chapter 번호 제거", () => {
|
|
34
|
+
expect(cleanTitle("Chapter 3 Quantum Mechanics")).toBe("Quantum Mechanics");
|
|
35
|
+
});
|
|
36
|
+
test("숫자 접두사 제거", () => {
|
|
37
|
+
expect(cleanTitle("3.2.1 Angular Momentum")).toBe("Angular Momentum");
|
|
38
|
+
});
|
|
39
|
+
test("일반 제목 유지", () => {
|
|
40
|
+
expect(cleanTitle("Quantum Mechanics")).toBe("Quantum Mechanics");
|
|
41
|
+
});
|
|
42
|
+
});
|