@open330/kiwimu 0.4.1 → 0.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5,11 +5,3 @@ export async function extractTextFromDocx(filePath: string): Promise<{ title: st
5
5
  const title = filePath.split("/").pop()?.replace(/\.docx?$/i, "") || "Untitled";
6
6
  return { title, text };
7
7
  }
8
-
9
- export async function extractHtmlFromDocx(filePath: string): Promise<{ title: string; html: string }> {
10
- const mammoth = require("mammoth");
11
- const result = await mammoth.convertToHtml({ path: filePath });
12
- const html: string = result.value;
13
- const title = filePath.split("/").pop()?.replace(/\.docx?$/i, "") || "Untitled";
14
- return { title, html };
15
- }
@@ -9,7 +9,7 @@ export async function extractWithTextutil(filePath: string): Promise<{ title: st
9
9
  const textutilFormats = new Set(["doc", "rtf", "odt"]);
10
10
 
11
11
  if (textutilFormats.has(ext)) {
12
- const proc = Bun.spawn(["textutil", "-convert", "txt", "-stdout", filePath], {
12
+ const proc = Bun.spawn(["textutil", "-convert", "txt", "-stdout", "--", filePath], {
13
13
  stdout: "pipe",
14
14
  stderr: "pipe",
15
15
  });
@@ -26,12 +26,12 @@ export async function extractWithTextutil(filePath: string): Promise<{ title: st
26
26
  if (ext === "key") {
27
27
  // Try to extract text using mdimport/spotlight metadata
28
28
  try {
29
- const proc = Bun.spawn(["mdimport", "-d2", filePath], { stdout: "pipe", stderr: "pipe" });
29
+ const proc = Bun.spawn(["mdimport", "-d2", "--", filePath], { stdout: "pipe", stderr: "pipe" });
30
30
  await proc.exited;
31
31
  } catch {}
32
32
 
33
33
  // Keynote files are directories or zip-like packages. Try strings extraction.
34
- const proc = Bun.spawn(["strings", filePath], { stdout: "pipe", stderr: "pipe" });
34
+ const proc = Bun.spawn(["strings", "--", filePath], { stdout: "pipe", stderr: "pipe" });
35
35
  const raw = await new Response(proc.stdout).text();
36
36
  await proc.exited;
37
37
 
@@ -50,7 +50,7 @@ export async function extractWithTextutil(filePath: string): Promise<{ title: st
50
50
 
51
51
  // For .ppt (legacy PowerPoint), try textutil or strings
52
52
  if (ext === "ppt") {
53
- const proc = Bun.spawn(["strings", filePath], { stdout: "pipe", stderr: "pipe" });
53
+ const proc = Bun.spawn(["strings", "--", filePath], { stdout: "pipe", stderr: "pipe" });
54
54
  const raw = await new Response(proc.stdout).text();
55
55
  await proc.exited;
56
56
 
package/src/ingest/pdf.ts CHANGED
@@ -1,5 +1,5 @@
1
1
  export async function extractTextFromPdf(pdfPath: string): Promise<{ title: string; text: string }> {
2
- let pdfParse: any;
2
+ let pdfParse: (buffer: Buffer) => Promise<{ info?: { Title?: string }; text: string }>;
3
3
  try {
4
4
  pdfParse = require("pdf-parse");
5
5
  } catch {
@@ -2,7 +2,6 @@ import { readFileSync } from "fs";
2
2
 
3
3
  export async function extractTextFromPptx(filePath: string): Promise<{ title: string; text: string }> {
4
4
  // PPTX is a ZIP containing XML files
5
- const { Decompress } = await import("bun");
6
5
  const JSZip = (await import("jszip")).default;
7
6
 
8
7
  const buffer = readFileSync(filePath);
@@ -0,0 +1,41 @@
1
+ import { expect, test, describe } from "bun:test";
2
+ import { validateUrl } from "./web";
3
+
4
+ describe("validateUrl", () => {
5
+ test("정상 HTTP URL 허용", () => {
6
+ expect(() => validateUrl("http://example.com")).not.toThrow();
7
+ });
8
+ test("정상 HTTPS URL 허용", () => {
9
+ expect(() => validateUrl("https://example.com/page")).not.toThrow();
10
+ });
11
+ test("localhost 차단", () => {
12
+ expect(() => validateUrl("http://localhost:3000")).toThrow();
13
+ });
14
+ test("127.0.0.1 차단", () => {
15
+ expect(() => validateUrl("http://127.0.0.1")).toThrow();
16
+ });
17
+ test("10.x.x.x 차단", () => {
18
+ expect(() => validateUrl("http://10.0.0.1")).toThrow();
19
+ });
20
+ test("172.16.x.x 차단", () => {
21
+ expect(() => validateUrl("http://172.16.0.1")).toThrow();
22
+ });
23
+ test("192.168.x.x 차단", () => {
24
+ expect(() => validateUrl("http://192.168.1.1")).toThrow();
25
+ });
26
+ test("169.254.x.x 차단", () => {
27
+ expect(() => validateUrl("http://169.254.169.254")).toThrow();
28
+ });
29
+ test("file:// 프로토콜 차단", () => {
30
+ expect(() => validateUrl("file:///etc/passwd")).toThrow();
31
+ });
32
+ test("ftp:// 프로토콜 차단", () => {
33
+ expect(() => validateUrl("ftp://example.com")).toThrow();
34
+ });
35
+ test(".local 도메인 차단", () => {
36
+ expect(() => validateUrl("http://server.local")).toThrow();
37
+ });
38
+ test("0.0.0.0 차단", () => {
39
+ expect(() => validateUrl("http://0.0.0.0")).toThrow();
40
+ });
41
+ });
package/src/ingest/web.ts CHANGED
@@ -1,77 +1,76 @@
1
1
  import * as cheerio from "cheerio";
2
+ import { URL } from "url";
2
3
 
3
- export interface Section {
4
- level: number;
5
- title: string;
6
- htmlParts: string[];
7
- }
8
-
9
- const HEADING_TAGS = new Set(["h1", "h2", "h3", "h4"]);
10
- const SKIP_TAGS = new Set(["nav", "header", "footer", "script", "style", "noscript"]);
11
- const CONTAINER_TAGS = new Set([
12
- "html", "head", "body", "div", "article", "main", "section", "aside", "details", "summary",
13
- ]);
4
+ /**
5
+ * Validate a URL to prevent SSRF attacks.
6
+ * Blocks private/internal IP ranges and non-http(s) schemes.
7
+ */
8
+ export function validateUrl(urlStr: string): void {
9
+ let parsed: URL;
10
+ try {
11
+ parsed = new URL(urlStr);
12
+ } catch {
13
+ throw new Error("유효하지 않은 URL입니다");
14
+ }
14
15
 
15
- export async function fetchPage(url: string): Promise<{ title: string; html: string }> {
16
- const resp = await fetch(url, {
17
- headers: { "User-Agent": "kiwimu/0.2 (learning wiki builder)" },
18
- });
19
- if (!resp.ok) throw new Error(`Failed to fetch ${url}: ${resp.status}`);
20
- const html = await resp.text();
21
- const $ = cheerio.load(html);
22
- const title = $("title").text().trim() || url;
23
- const body = $("body").html() || html;
24
- return { title, html: body };
25
- }
16
+ if (parsed.protocol !== "http:" && parsed.protocol !== "https:") {
17
+ throw new Error("http 또는 https URL만 허용됩니다");
18
+ }
26
19
 
27
- export function extractSections(html: string): Section[] {
28
- const $ = cheerio.load(html, null, false);
29
- const sections: Section[] = [];
30
- let current: Section = { level: 1, title: "Introduction", htmlParts: [] };
20
+ const hostname = parsed.hostname;
31
21
 
32
- function walk(el: cheerio.AnyNode): void {
33
- if (el.type === "text") return;
34
- if (el.type !== "tag") return;
22
+ // Block IP-based hostnames in private ranges
23
+ // IPv4 pattern
24
+ const ipv4Match = hostname.match(/^(\d{1,3})\.(\d{1,3})\.(\d{1,3})\.(\d{1,3})$/);
25
+ if (ipv4Match) {
26
+ const [, a, b, c, d] = ipv4Match.map(Number);
27
+ if (
28
+ a === 127 || // 127.0.0.0/8
29
+ a === 10 || // 10.0.0.0/8
30
+ (a === 172 && b >= 16 && b <= 31) || // 172.16.0.0/12
31
+ (a === 192 && b === 168) || // 192.168.0.0/16
32
+ (a === 169 && b === 254) || // 169.254.0.0/16
33
+ (a === 0 && b === 0 && c === 0 && d === 0) // 0.0.0.0
34
+ ) {
35
+ throw new Error("내부 네트워크 주소는 허용되지 않습니다");
36
+ }
37
+ }
35
38
 
36
- const tagName = (el as cheerio.Element).tagName.toLowerCase();
39
+ // Block common private hostnames
40
+ if (hostname === "localhost" || hostname === "[::1]" || hostname.endsWith(".local")) {
41
+ throw new Error("내부 네트워크 주소는 허용되지 않습니다");
42
+ }
43
+ }
37
44
 
38
- if (SKIP_TAGS.has(tagName)) return;
45
+ export async function fetchPage(url: string): Promise<{ title: string; html: string }> {
46
+ validateUrl(url);
39
47
 
40
- if (HEADING_TAGS.has(tagName)) {
41
- if (current.htmlParts.length > 0) {
42
- sections.push(current);
43
- }
44
- current = {
45
- level: parseInt(tagName[1]),
46
- title: $(el).text().trim(),
47
- htmlParts: [],
48
- };
49
- return;
50
- }
48
+ let currentUrl = url;
49
+ const maxRedirects = 5;
51
50
 
52
- if (CONTAINER_TAGS.has(tagName)) {
53
- for (const child of (el as cheerio.Element).children) {
54
- walk(child);
55
- }
56
- return;
57
- }
51
+ for (let i = 0; i <= maxRedirects; i++) {
52
+ const resp = await fetch(currentUrl, {
53
+ headers: { "User-Agent": "kiwimu/0.4 (learning wiki builder)" },
54
+ redirect: "manual",
55
+ });
58
56
 
59
- // Content element
60
- const html = $.html(el)?.trim();
61
- if (html) {
62
- current.htmlParts.push(html);
57
+ if (resp.status >= 300 && resp.status < 400) {
58
+ const location = resp.headers.get("location");
59
+ if (!location) throw new Error(`Redirect without location header from ${currentUrl}`);
60
+ // Resolve relative redirect URLs
61
+ const redirectUrl = new URL(location, currentUrl).href;
62
+ validateUrl(redirectUrl); // Re-validate redirect target to prevent SSRF bypass
63
+ currentUrl = redirectUrl;
64
+ continue;
63
65
  }
64
- }
65
-
66
- // Walk root children
67
- const root = $.root();
68
- for (const child of root.contents().toArray()) {
69
- walk(child);
70
- }
71
66
 
72
- if (current.htmlParts.length > 0) {
73
- sections.push(current);
67
+ if (!resp.ok) throw new Error(`Failed to fetch ${currentUrl}: ${resp.status}`);
68
+ const html = await resp.text();
69
+ const $ = cheerio.load(html);
70
+ const title = $("title").text().trim() || url;
71
+ const body = $("body").html() || html;
72
+ return { title, html: body };
74
73
  }
75
74
 
76
- return sections.filter((s) => s.htmlParts.length > 0);
75
+ throw new Error(`Too many redirects fetching ${url}`);
77
76
  }
package/src/llm-client.ts CHANGED
@@ -8,73 +8,14 @@ export interface UsageStats {
8
8
  totalTokens: number;
9
9
  }
10
10
 
11
- const _usage: UsageStats = {
12
- totalCalls: 0,
13
- promptTokens: 0,
14
- completionTokens: 0,
15
- totalTokens: 0,
16
- };
17
-
18
- let _llmConfig: LLMConfig | null = null;
19
-
20
- export function setLLMConfig(config: LLMConfig): void {
21
- _llmConfig = config;
22
- }
23
-
24
- export function getLLMConfig(): LLMConfig {
25
- if (!_llmConfig) throw new Error("LLM config not set. Call setLLMConfig() first.");
26
- return _llmConfig;
27
- }
28
-
29
- export function getUsageStats(): UsageStats {
30
- return { ..._usage };
31
- }
32
-
33
- export function resetUsageStats(): void {
34
- _usage.totalCalls = 0;
35
- _usage.promptTokens = 0;
36
- _usage.completionTokens = 0;
37
- _usage.totalTokens = 0;
38
- }
39
-
40
- export function getEstimatedCost(): number {
41
- const config = _llmConfig;
42
- if (!config) return 0;
43
-
44
- // Pricing per 1M tokens (approximate)
45
- const pricing: Record<string, { input: number; output: number }> = {
46
- "gemini": { input: 0.075, output: 0.30 },
47
- "azure-openai": { input: 0.10, output: 0.40 },
48
- "openai": { input: 0.15, output: 0.60 },
49
- "anthropic": { input: 3.00, output: 15.00 },
50
- };
51
- const p = pricing[config.provider] || pricing["gemini"];
52
- return (_usage.promptTokens / 1_000_000) * p.input + (_usage.completionTokens / 1_000_000) * p.output;
53
- }
54
-
55
- export function printUsageSummary(): void {
56
- const u = _usage;
57
- const cost = getEstimatedCost();
58
- const provider = _llmConfig?.provider || "unknown";
59
- const model = _llmConfig?.model || "unknown";
60
-
61
- console.log(`\x1b[34m📊 LLM 사용량 (${provider}/${model}):\x1b[0m`);
62
- console.log(` 호출 횟수: ${u.totalCalls}회`);
63
- console.log(` 입력 토큰: ${u.promptTokens.toLocaleString()}`);
64
- console.log(` 출력 토큰: ${u.completionTokens.toLocaleString()}`);
65
- console.log(` 총 토큰: ${u.totalTokens.toLocaleString()}`);
66
- console.log(` 예상 비용: ~$${cost.toFixed(4)}`);
67
- }
68
-
69
11
  // ── Provider implementations ──
70
12
 
71
- async function geminiComplete(system: string, userMessage: string, maxTokens: number): Promise<{ text: string; usage?: any }> {
72
- const config = getLLMConfig();
73
- const url = `https://generativelanguage.googleapis.com/v1beta/models/${config.model}:generateContent?key=${config.api_key}`;
13
+ async function geminiComplete(config: LLMConfig, system: string, userMessage: string, maxTokens: number): Promise<{ text: string; usage?: { prompt_tokens: number; completion_tokens: number; total_tokens: number } }> {
14
+ const url = `https://generativelanguage.googleapis.com/v1beta/models/${config.model}:generateContent`;
74
15
 
75
16
  const resp = await fetch(url, {
76
17
  method: "POST",
77
- headers: { "Content-Type": "application/json" },
18
+ headers: { "Content-Type": "application/json", "x-goog-api-key": config.api_key },
78
19
  body: JSON.stringify({
79
20
  system_instruction: { parts: [{ text: system }] },
80
21
  contents: [{ parts: [{ text: userMessage }] }],
@@ -87,9 +28,10 @@ async function geminiComplete(system: string, userMessage: string, maxTokens: nu
87
28
  throw new Error(`Gemini API error (${resp.status}): ${err.slice(0, 200)}`);
88
29
  }
89
30
 
90
- const data = await resp.json();
91
- const text = data.candidates?.[0]?.content?.parts?.[0]?.text || "";
92
- const usage = data.usageMetadata;
31
+ const data = await resp.json() as Record<string, unknown>;
32
+ const candidates = data.candidates as Array<{ content: { parts: Array<{ text: string }> } }> | undefined;
33
+ const text = candidates?.[0]?.content?.parts?.[0]?.text || "";
34
+ const usage = data.usageMetadata as { promptTokenCount?: number; candidatesTokenCount?: number; totalTokenCount?: number } | undefined;
93
35
  return {
94
36
  text,
95
37
  usage: usage ? {
@@ -100,78 +42,213 @@ async function geminiComplete(system: string, userMessage: string, maxTokens: nu
100
42
  };
101
43
  }
102
44
 
103
- async function azureOpenAIComplete(system: string, userMessage: string, maxTokens: number): Promise<{ text: string; usage?: any }> {
104
- const config = getLLMConfig();
105
-
106
- // Try loading from ~/keys/openai.azure.com/ if no api_key in config
107
- let apiKey = config.api_key;
108
- let endpoint = config.endpoint;
109
- let model = config.model;
110
-
111
- if (!apiKey) {
112
- try {
113
- const keyFile = `${process.env.HOME}/keys/openai.azure.com/${config.model}.json`;
114
- const raw = require("fs").readFileSync(keyFile, "utf-8");
115
- const keyConfig = JSON.parse(raw)[0];
116
- apiKey = keyConfig.key;
117
- endpoint = keyConfig.endpoint.split("/openai/")[0];
118
- model = keyConfig.deployment;
119
- } catch {
120
- throw new Error("Azure OpenAI API key not configured");
45
+ // ── Class-based LLM client ──
46
+
47
+ type ProviderResult = { text: string; usage?: { prompt_tokens: number; completion_tokens: number; total_tokens: number } };
48
+
49
+ export class LLMClient {
50
+ private config: LLMConfig;
51
+ private usage: UsageStats = { totalCalls: 0, promptTokens: 0, completionTokens: 0, totalTokens: 0 };
52
+ private _openaiClient: InstanceType<typeof import("openai").default> | null = null;
53
+ private _anthropicClient: InstanceType<typeof import("@anthropic-ai/sdk").default> | null = null;
54
+ private _azureClient: InstanceType<typeof import("openai").AzureOpenAI> | null = null;
55
+
56
+ constructor(config: LLMConfig) {
57
+ this.config = config;
58
+ }
59
+
60
+ private async azureComplete(system: string, userMessage: string, maxTokens: number): Promise<ProviderResult> {
61
+ let apiKey = this.config.api_key;
62
+ let endpoint = this.config.endpoint;
63
+ let model = this.config.model;
64
+
65
+ if (!apiKey) {
66
+ try {
67
+ const keyFile = `${process.env.HOME}/keys/openai.azure.com/${this.config.model}.json`;
68
+ const raw = require("fs").readFileSync(keyFile, "utf-8");
69
+ const keyConfig = JSON.parse(raw)[0] as { key: string; endpoint: string; deployment: string };
70
+ apiKey = keyConfig.key;
71
+ endpoint = keyConfig.endpoint.split("/openai/")[0];
72
+ model = keyConfig.deployment;
73
+ } catch {
74
+ throw new Error("Azure OpenAI API key not configured");
75
+ }
76
+ }
77
+
78
+ if (!this._azureClient) {
79
+ const { AzureOpenAI } = await import("openai");
80
+ this._azureClient = new AzureOpenAI({ endpoint, apiKey, deployment: model, apiVersion: "2024-12-01-preview" });
121
81
  }
82
+
83
+ const resp = await this._azureClient.chat.completions.create({
84
+ model: model,
85
+ max_completion_tokens: maxTokens,
86
+ messages: [
87
+ { role: "system", content: system },
88
+ { role: "user", content: userMessage },
89
+ ],
90
+ });
91
+
92
+ return {
93
+ text: resp.choices[0]?.message?.content || "",
94
+ usage: resp.usage ? {
95
+ prompt_tokens: resp.usage.prompt_tokens || 0,
96
+ completion_tokens: resp.usage.completion_tokens || 0,
97
+ total_tokens: resp.usage.total_tokens || 0,
98
+ } : undefined,
99
+ };
122
100
  }
123
101
 
124
- const { AzureOpenAI } = await import("openai");
125
- const client = new AzureOpenAI({ endpoint, apiKey, deployment: model, apiVersion: "2024-12-01-preview" });
102
+ private async openaiComplete(system: string, userMessage: string, maxTokens: number): Promise<ProviderResult> {
103
+ const { default: OpenAI } = await import("openai");
104
+ if (!this._openaiClient) {
105
+ this._openaiClient = new OpenAI({ apiKey: this.config.api_key });
106
+ }
107
+ const resp = await this._openaiClient.chat.completions.create({
108
+ model: this.config.model || "gpt-4o",
109
+ messages: [
110
+ { role: "system", content: system },
111
+ { role: "user", content: userMessage },
112
+ ],
113
+ max_tokens: maxTokens,
114
+ });
115
+ return {
116
+ text: resp.choices[0]?.message?.content || "",
117
+ usage: resp.usage ? {
118
+ prompt_tokens: resp.usage.prompt_tokens || 0,
119
+ completion_tokens: resp.usage.completion_tokens || 0,
120
+ total_tokens: resp.usage.total_tokens || 0,
121
+ } : undefined,
122
+ };
123
+ }
126
124
 
127
- const resp = await client.chat.completions.create({
128
- model,
129
- max_completion_tokens: maxTokens,
130
- messages: [
131
- { role: "system", content: system },
132
- { role: "user", content: userMessage },
133
- ],
134
- });
125
+ private async anthropicComplete(system: string, userMessage: string, maxTokens: number): Promise<ProviderResult> {
126
+ const { default: Anthropic } = await import("@anthropic-ai/sdk");
127
+ if (!this._anthropicClient) {
128
+ this._anthropicClient = new Anthropic({ apiKey: this.config.api_key });
129
+ }
130
+ const resp = await this._anthropicClient.messages.create({
131
+ model: this.config.model || "claude-sonnet-4-20250514",
132
+ max_tokens: maxTokens,
133
+ system: system,
134
+ messages: [{ role: "user", content: userMessage }],
135
+ });
136
+ const content = resp.content[0]?.type === "text" ? resp.content[0].text : "";
137
+ return {
138
+ text: content,
139
+ usage: resp.usage ? {
140
+ prompt_tokens: resp.usage.input_tokens || 0,
141
+ completion_tokens: resp.usage.output_tokens || 0,
142
+ total_tokens: (resp.usage.input_tokens || 0) + (resp.usage.output_tokens || 0),
143
+ } : undefined,
144
+ };
145
+ }
135
146
 
136
- return {
137
- text: resp.choices[0]?.message?.content || "",
138
- usage: resp.usage ? {
139
- prompt_tokens: resp.usage.prompt_tokens || 0,
140
- completion_tokens: resp.usage.completion_tokens || 0,
141
- total_tokens: resp.usage.total_tokens || 0,
142
- } : undefined,
143
- };
147
+ async chatComplete(system: string, userMessage: string, maxTokens = 8192): Promise<string> {
148
+ let result: ProviderResult;
149
+
150
+ switch (this.config.provider) {
151
+ case "gemini":
152
+ result = await geminiComplete(this.config, system, userMessage, maxTokens);
153
+ break;
154
+ case "azure-openai":
155
+ result = await this.azureComplete(system, userMessage, maxTokens);
156
+ break;
157
+ case "openai":
158
+ result = await this.openaiComplete(system, userMessage, maxTokens);
159
+ break;
160
+ case "anthropic":
161
+ result = await this.anthropicComplete(system, userMessage, maxTokens);
162
+ break;
163
+ default:
164
+ throw new Error(`Unknown LLM provider: ${this.config.provider}`);
165
+ }
166
+
167
+ // Track usage
168
+ if (result.usage) {
169
+ this.usage.totalCalls++;
170
+ this.usage.promptTokens += result.usage.prompt_tokens || 0;
171
+ this.usage.completionTokens += result.usage.completion_tokens || 0;
172
+ this.usage.totalTokens += result.usage.total_tokens || 0;
173
+ }
174
+
175
+ return result.text;
176
+ }
177
+
178
+ getUsageStats(): UsageStats {
179
+ return { ...this.usage };
180
+ }
181
+
182
+ resetUsageStats(): void {
183
+ this.usage.totalCalls = 0;
184
+ this.usage.promptTokens = 0;
185
+ this.usage.completionTokens = 0;
186
+ this.usage.totalTokens = 0;
187
+ }
188
+
189
+ getEstimatedCost(): number {
190
+ // Pricing per 1M tokens (approximate)
191
+ const pricing: Record<string, { input: number; output: number }> = {
192
+ "gemini": { input: 0.075, output: 0.30 },
193
+ "azure-openai": { input: 0.10, output: 0.40 },
194
+ "openai": { input: 2.50, output: 10.00 },
195
+ "anthropic": { input: 3.00, output: 15.00 },
196
+ };
197
+ const p = pricing[this.config.provider] || pricing["gemini"];
198
+ return (this.usage.promptTokens / 1_000_000) * p.input + (this.usage.completionTokens / 1_000_000) * p.output;
199
+ }
200
+
201
+ printUsageSummary(): void {
202
+ const u = this.usage;
203
+ const cost = this.getEstimatedCost();
204
+
205
+ console.log(`\x1b[34m📊 LLM 사용량 (${this.config.provider}/${this.config.model}):\x1b[0m`);
206
+ console.log(` 호출 횟수: ${u.totalCalls}회`);
207
+ console.log(` 입력 토큰: ${u.promptTokens.toLocaleString()}`);
208
+ console.log(` 출력 토큰: ${u.completionTokens.toLocaleString()}`);
209
+ console.log(` 총 토큰: ${u.totalTokens.toLocaleString()}`);
210
+ console.log(` 예상 비용: ~$${cost.toFixed(4)}`);
211
+ }
144
212
  }
145
213
 
146
- // ── Main interface ──
214
+ // ── Deprecated global state wrappers (for backward compatibility) ──
215
+
216
+ /** @deprecated Use LLMClient class instead */
217
+ let _globalClient: LLMClient | null = null;
147
218
 
219
+ /** @deprecated Use `new LLMClient(config)` instead */
220
+ export function setLLMConfig(config: LLMConfig): void {
221
+ _globalClient = new LLMClient(config);
222
+ }
223
+
224
+ /** @deprecated Use LLMClient instance methods instead */
225
+ export function getUsageStats(): UsageStats {
226
+ if (!_globalClient) return { totalCalls: 0, promptTokens: 0, completionTokens: 0, totalTokens: 0 };
227
+ return _globalClient.getUsageStats();
228
+ }
229
+
230
+ /** @deprecated Use LLMClient instance methods instead */
231
+ export function resetUsageStats(): void {
232
+ if (_globalClient) _globalClient.resetUsageStats();
233
+ }
234
+
235
+ /** @deprecated Use LLMClient instance methods instead */
236
+ export function getEstimatedCost(): number {
237
+ if (!_globalClient) return 0;
238
+ return _globalClient.getEstimatedCost();
239
+ }
240
+
241
+ /** @deprecated Use LLMClient instance methods instead */
242
+ export function printUsageSummary(): void {
243
+ if (_globalClient) _globalClient.printUsageSummary();
244
+ }
245
+
246
+ /** @deprecated Use LLMClient instance methods instead */
148
247
  export async function chatComplete(
149
248
  system: string,
150
249
  userMessage: string,
151
250
  maxTokens = 8192
152
251
  ): Promise<string> {
153
- const config = getLLMConfig();
154
-
155
- let result: { text: string; usage?: any };
156
-
157
- switch (config.provider) {
158
- case "gemini":
159
- result = await geminiComplete(system, userMessage, maxTokens);
160
- break;
161
- case "azure-openai":
162
- result = await azureOpenAIComplete(system, userMessage, maxTokens);
163
- break;
164
- default:
165
- throw new Error(`Unknown LLM provider: ${config.provider}`);
166
- }
167
-
168
- // Track usage
169
- if (result.usage) {
170
- _usage.totalCalls++;
171
- _usage.promptTokens += result.usage.prompt_tokens || 0;
172
- _usage.completionTokens += result.usage.completion_tokens || 0;
173
- _usage.totalTokens += result.usage.total_tokens || 0;
174
- }
175
-
176
- return result.text;
252
+ if (!_globalClient) throw new Error("LLM config not set. Call setLLMConfig() first.");
253
+ return _globalClient.chatComplete(system, userMessage, maxTokens);
177
254
  }
@@ -0,0 +1,42 @@
1
+ import { expect, test, describe } from "bun:test";
2
+ import { slugify, cleanTitle } from "./chunker";
3
+
4
+ describe("slugify", () => {
5
+ test("영어 텍스트", () => {
6
+ expect(slugify("Hello World")).toBe("hello-world");
7
+ });
8
+ test("한국어 텍스트", () => {
9
+ expect(slugify("양자역학")).toBe("양자역학");
10
+ });
11
+ test("한영 혼합", () => {
12
+ expect(slugify("Chapter 3 양자역학")).toBe("chapter-3-양자역학");
13
+ });
14
+ test("특수문자 제거", () => {
15
+ expect(slugify("Hello! @World#")).toBe("hello-world");
16
+ });
17
+ test("빈 문자열", () => {
18
+ expect(slugify("")).toBe("");
19
+ });
20
+ test("연속 공백/하이픈", () => {
21
+ expect(slugify("hello world---test")).toBe("hello-world-test");
22
+ });
23
+ test("80자 제한", () => {
24
+ const long = "a".repeat(100);
25
+ expect(slugify(long).length).toBeLessThanOrEqual(80);
26
+ });
27
+ test("한글 자모", () => {
28
+ expect(slugify("ㅋㅋㅋ 테스트")).toBe("ㅋㅋㅋ-테스트");
29
+ });
30
+ });
31
+
32
+ describe("cleanTitle", () => {
33
+ test("Chapter 번호 제거", () => {
34
+ expect(cleanTitle("Chapter 3 Quantum Mechanics")).toBe("Quantum Mechanics");
35
+ });
36
+ test("숫자 접두사 제거", () => {
37
+ expect(cleanTitle("3.2.1 Angular Momentum")).toBe("Angular Momentum");
38
+ });
39
+ test("일반 제목 유지", () => {
40
+ expect(cleanTitle("Quantum Mechanics")).toBe("Quantum Mechanics");
41
+ });
42
+ });