npm - @open330/kiwimu - Versions diffs - 0.4.1 → 0.8.0 - Mend

@open330/kiwimu 0.4.1 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

package/README.md +98 -49
package/bin/kiwimu +1 -1
package/package.json +4 -1
package/personas/namuwiki.json +6 -0
package/src/build/renderer.ts +50 -2
package/src/build/static/search.js +33 -2
package/src/build/static/style.css +84 -1
package/src/build/templates.ts +353 -167
package/src/config.ts +35 -29
package/src/demo/sample-data.ts +70 -0
package/src/demo/setup.ts +31 -0
package/src/expand/llm.ts +1 -1
package/src/index.ts +234 -458
package/src/ingest/docx.ts +0 -8
package/src/ingest/legacy.ts +4 -4
package/src/ingest/pdf.ts +1 -1
package/src/ingest/pptx.ts +0 -1
package/src/ingest/web.test.ts +41 -0
package/src/ingest/web.ts +61 -62
package/src/llm-client.ts +203 -126
package/src/pipeline/chunker.test.ts +42 -0
package/src/pipeline/chunker.ts +1 -48
package/src/pipeline/llm-chunker.ts +144 -59
package/src/server.ts +327 -0
package/src/services/ingest.ts +100 -0
package/src/store.test.ts +132 -0
package/src/store.ts +206 -2
package/src/pipeline/llm-linker.ts +0 -84

package/src/ingest/docx.ts CHANGED Viewed

@@ -5,11 +5,3 @@ export async function extractTextFromDocx(filePath: string): Promise<{ title: st
   const title = filePath.split("/").pop()?.replace(/\.docx?$/i, "") || "Untitled";
   return { title, text };
 }
-export async function extractHtmlFromDocx(filePath: string): Promise<{ title: string; html: string }> {
-  const mammoth = require("mammoth");
-  const result = await mammoth.convertToHtml({ path: filePath });
-  const html: string = result.value;
-  const title = filePath.split("/").pop()?.replace(/\.docx?$/i, "") || "Untitled";
-  return { title, html };
-}

package/src/ingest/legacy.ts CHANGED Viewed

@@ -9,7 +9,7 @@ export async function extractWithTextutil(filePath: string): Promise<{ title: st
   const textutilFormats = new Set(["doc", "rtf", "odt"]);
   if (textutilFormats.has(ext)) {
-    const proc = Bun.spawn(["textutil", "-convert", "txt", "-stdout", filePath], {
+    const proc = Bun.spawn(["textutil", "-convert", "txt", "-stdout", "--", filePath], {
       stdout: "pipe",
       stderr: "pipe",
     });
@@ -26,12 +26,12 @@ export async function extractWithTextutil(filePath: string): Promise<{ title: st
   if (ext === "key") {
     // Try to extract text using mdimport/spotlight metadata
     try {
-      const proc = Bun.spawn(["mdimport", "-d2", filePath], { stdout: "pipe", stderr: "pipe" });
+      const proc = Bun.spawn(["mdimport", "-d2", "--", filePath], { stdout: "pipe", stderr: "pipe" });
       await proc.exited;
     } catch {}
     // Keynote files are directories or zip-like packages. Try strings extraction.
-    const proc = Bun.spawn(["strings", filePath], { stdout: "pipe", stderr: "pipe" });
+    const proc = Bun.spawn(["strings", "--", filePath], { stdout: "pipe", stderr: "pipe" });
     const raw = await new Response(proc.stdout).text();
     await proc.exited;
@@ -50,7 +50,7 @@ export async function extractWithTextutil(filePath: string): Promise<{ title: st
   // For .ppt (legacy PowerPoint), try textutil or strings
   if (ext === "ppt") {
-    const proc = Bun.spawn(["strings", filePath], { stdout: "pipe", stderr: "pipe" });
+    const proc = Bun.spawn(["strings", "--", filePath], { stdout: "pipe", stderr: "pipe" });
     const raw = await new Response(proc.stdout).text();
     await proc.exited;

package/src/ingest/pdf.ts CHANGED Viewed

@@ -1,5 +1,5 @@
 export async function extractTextFromPdf(pdfPath: string): Promise<{ title: string; text: string }> {
-  let pdfParse: any;
+  let pdfParse: (buffer: Buffer) => Promise<{ info?: { Title?: string }; text: string }>;
   try {
     pdfParse = require("pdf-parse");
   } catch {

package/src/ingest/pptx.ts CHANGED Viewed

@@ -2,7 +2,6 @@ import { readFileSync } from "fs";
 export async function extractTextFromPptx(filePath: string): Promise<{ title: string; text: string }> {
   // PPTX is a ZIP containing XML files
-  const { Decompress } = await import("bun");
   const JSZip = (await import("jszip")).default;
   const buffer = readFileSync(filePath);

package/src/ingest/web.test.ts ADDED Viewed

@@ -0,0 +1,41 @@
+import { expect, test, describe } from "bun:test";
+import { validateUrl } from "./web";
+describe("validateUrl", () => {
+  test("정상 HTTP URL 허용", () => {
+    expect(() => validateUrl("http://example.com")).not.toThrow();
+  });
+  test("정상 HTTPS URL 허용", () => {
+    expect(() => validateUrl("https://example.com/page")).not.toThrow();
+  });
+  test("localhost 차단", () => {
+    expect(() => validateUrl("http://localhost:3000")).toThrow();
+  });
+  test("127.0.0.1 차단", () => {
+    expect(() => validateUrl("http://127.0.0.1")).toThrow();
+  });
+  test("10.x.x.x 차단", () => {
+    expect(() => validateUrl("http://10.0.0.1")).toThrow();
+  });
+  test("172.16.x.x 차단", () => {
+    expect(() => validateUrl("http://172.16.0.1")).toThrow();
+  });
+  test("192.168.x.x 차단", () => {
+    expect(() => validateUrl("http://192.168.1.1")).toThrow();
+  });
+  test("169.254.x.x 차단", () => {
+    expect(() => validateUrl("http://169.254.169.254")).toThrow();
+  });
+  test("file:// 프로토콜 차단", () => {
+    expect(() => validateUrl("file:///etc/passwd")).toThrow();
+  });
+  test("ftp:// 프로토콜 차단", () => {
+    expect(() => validateUrl("ftp://example.com")).toThrow();
+  });
+  test(".local 도메인 차단", () => {
+    expect(() => validateUrl("http://server.local")).toThrow();
+  });
+  test("0.0.0.0 차단", () => {
+    expect(() => validateUrl("http://0.0.0.0")).toThrow();
+  });
+});

package/src/ingest/web.ts CHANGED Viewed

@@ -1,77 +1,76 @@
 import * as cheerio from "cheerio";
+import { URL } from "url";
-export interface Section {
-  level: number;
-  title: string;
-  htmlParts: string[];
-}
-const HEADING_TAGS = new Set(["h1", "h2", "h3", "h4"]);
-const SKIP_TAGS = new Set(["nav", "header", "footer", "script", "style", "noscript"]);
-const CONTAINER_TAGS = new Set([
-  "html", "head", "body", "div", "article", "main", "section", "aside", "details", "summary",
-]);
+/**
+ * Validate a URL to prevent SSRF attacks.
+ * Blocks private/internal IP ranges and non-http(s) schemes.
+ */
+export function validateUrl(urlStr: string): void {
+  let parsed: URL;
+  try {
+    parsed = new URL(urlStr);
+  } catch {
+    throw new Error("유효하지 않은 URL입니다");
+  }
-export async function fetchPage(url: string): Promise<{ title: string; html: string }> {
-  const resp = await fetch(url, {
-    headers: { "User-Agent": "kiwimu/0.2 (learning wiki builder)" },
-  });
-  if (!resp.ok) throw new Error(`Failed to fetch ${url}: ${resp.status}`);
-  const html = await resp.text();
-  const $ = cheerio.load(html);
-  const title = $("title").text().trim() || url;
-  const body = $("body").html() || html;
-  return { title, html: body };
-}
+  if (parsed.protocol !== "http:" && parsed.protocol !== "https:") {
+    throw new Error("http 또는 https URL만 허용됩니다");
+  }
-export function extractSections(html: string): Section[] {
-  const $ = cheerio.load(html, null, false);
-  const sections: Section[] = [];
-  let current: Section = { level: 1, title: "Introduction", htmlParts: [] };
+  const hostname = parsed.hostname;
-  function walk(el: cheerio.AnyNode): void {
-    if (el.type === "text") return;
-    if (el.type !== "tag") return;
+  // Block IP-based hostnames in private ranges
+  // IPv4 pattern
+  const ipv4Match = hostname.match(/^(\d{1,3})\.(\d{1,3})\.(\d{1,3})\.(\d{1,3})$/);
+  if (ipv4Match) {
+    const [, a, b, c, d] = ipv4Match.map(Number);
+    if (
+      a === 127 ||                              // 127.0.0.0/8
+      a === 10 ||                               // 10.0.0.0/8
+      (a === 172 && b >= 16 && b <= 31) ||      // 172.16.0.0/12
+      (a === 192 && b === 168) ||               // 192.168.0.0/16
+      (a === 169 && b === 254) ||               // 169.254.0.0/16
+      (a === 0 && b === 0 && c === 0 && d === 0) // 0.0.0.0
+    ) {
+      throw new Error("내부 네트워크 주소는 허용되지 않습니다");
+    }
+  }
-    const tagName = (el as cheerio.Element).tagName.toLowerCase();
+  // Block common private hostnames
+  if (hostname === "localhost" || hostname === "[::1]" || hostname.endsWith(".local")) {
+    throw new Error("내부 네트워크 주소는 허용되지 않습니다");
+  }
+}
-    if (SKIP_TAGS.has(tagName)) return;
+export async function fetchPage(url: string): Promise<{ title: string; html: string }> {
+  validateUrl(url);
-    if (HEADING_TAGS.has(tagName)) {
-      if (current.htmlParts.length > 0) {
-        sections.push(current);
-      }
-      current = {
-        level: parseInt(tagName[1]),
-        title: $(el).text().trim(),
-        htmlParts: [],
-      };
-      return;
-    }
+  let currentUrl = url;
+  const maxRedirects = 5;
-    if (CONTAINER_TAGS.has(tagName)) {
-      for (const child of (el as cheerio.Element).children) {
-        walk(child);
-      }
-      return;
-    }
+  for (let i = 0; i <= maxRedirects; i++) {
+    const resp = await fetch(currentUrl, {
+      headers: { "User-Agent": "kiwimu/0.4 (learning wiki builder)" },
+      redirect: "manual",
+    });
-    // Content element
-    const html = $.html(el)?.trim();
-    if (html) {
-      current.htmlParts.push(html);
+    if (resp.status >= 300 && resp.status < 400) {
+      const location = resp.headers.get("location");
+      if (!location) throw new Error(`Redirect without location header from ${currentUrl}`);
+      // Resolve relative redirect URLs
+      const redirectUrl = new URL(location, currentUrl).href;
+      validateUrl(redirectUrl); // Re-validate redirect target to prevent SSRF bypass
+      currentUrl = redirectUrl;
+      continue;
     }
-  }
-  // Walk root children
-  const root = $.root();
-  for (const child of root.contents().toArray()) {
-    walk(child);
-  }
-  if (current.htmlParts.length > 0) {
-    sections.push(current);
+    if (!resp.ok) throw new Error(`Failed to fetch ${currentUrl}: ${resp.status}`);
+    const html = await resp.text();
+    const $ = cheerio.load(html);
+    const title = $("title").text().trim() || url;
+    const body = $("body").html() || html;
+    return { title, html: body };
   }
-  return sections.filter((s) => s.htmlParts.length > 0);
+  throw new Error(`Too many redirects fetching ${url}`);
 }

package/src/llm-client.ts CHANGED Viewed

@@ -8,73 +8,14 @@ export interface UsageStats {
   totalTokens: number;
 }
-const _usage: UsageStats = {
-  totalCalls: 0,
-  promptTokens: 0,
-  completionTokens: 0,
-  totalTokens: 0,
-};
-let _llmConfig: LLMConfig | null = null;
-export function setLLMConfig(config: LLMConfig): void {
-  _llmConfig = config;
-}
-export function getLLMConfig(): LLMConfig {
-  if (!_llmConfig) throw new Error("LLM config not set. Call setLLMConfig() first.");
-  return _llmConfig;
-}
-export function getUsageStats(): UsageStats {
-  return { ..._usage };
-}
-export function resetUsageStats(): void {
-  _usage.totalCalls = 0;
-  _usage.promptTokens = 0;
-  _usage.completionTokens = 0;
-  _usage.totalTokens = 0;
-}
-export function getEstimatedCost(): number {
-  const config = _llmConfig;
-  if (!config) return 0;
-  // Pricing per 1M tokens (approximate)
-  const pricing: Record<string, { input: number; output: number }> = {
-    "gemini": { input: 0.075, output: 0.30 },
-    "azure-openai": { input: 0.10, output: 0.40 },
-    "openai": { input: 0.15, output: 0.60 },
-    "anthropic": { input: 3.00, output: 15.00 },
-  };
-  const p = pricing[config.provider] || pricing["gemini"];
-  return (_usage.promptTokens / 1_000_000) * p.input + (_usage.completionTokens / 1_000_000) * p.output;
-}
-export function printUsageSummary(): void {
-  const u = _usage;
-  const cost = getEstimatedCost();
-  const provider = _llmConfig?.provider || "unknown";
-  const model = _llmConfig?.model || "unknown";
-  console.log(`\x1b[34m📊 LLM 사용량 (${provider}/${model}):\x1b[0m`);
-  console.log(`  호출 횟수:     ${u.totalCalls}회`);
-  console.log(`  입력 토큰:     ${u.promptTokens.toLocaleString()}`);
-  console.log(`  출력 토큰:     ${u.completionTokens.toLocaleString()}`);
-  console.log(`  총 토큰:       ${u.totalTokens.toLocaleString()}`);
-  console.log(`  예상 비용:     ~$${cost.toFixed(4)}`);
-}
 // ── Provider implementations ──
-async function geminiComplete(system: string, userMessage: string, maxTokens: number): Promise<{ text: string; usage?: any }> {
-  const config = getLLMConfig();
-  const url = `https://generativelanguage.googleapis.com/v1beta/models/${config.model}:generateContent?key=${config.api_key}`;
+async function geminiComplete(config: LLMConfig, system: string, userMessage: string, maxTokens: number): Promise<{ text: string; usage?: { prompt_tokens: number; completion_tokens: number; total_tokens: number } }> {
+  const url = `https://generativelanguage.googleapis.com/v1beta/models/${config.model}:generateContent`;
   const resp = await fetch(url, {
     method: "POST",
-    headers: { "Content-Type": "application/json" },
+    headers: { "Content-Type": "application/json", "x-goog-api-key": config.api_key },
     body: JSON.stringify({
       system_instruction: { parts: [{ text: system }] },
       contents: [{ parts: [{ text: userMessage }] }],
@@ -87,9 +28,10 @@ async function geminiComplete(system: string, userMessage: string, maxTokens: nu
     throw new Error(`Gemini API error (${resp.status}): ${err.slice(0, 200)}`);
   }
-  const data = await resp.json();
-  const text = data.candidates?.[0]?.content?.parts?.[0]?.text || "";
-  const usage = data.usageMetadata;
+  const data = await resp.json() as Record<string, unknown>;
+  const candidates = data.candidates as Array<{ content: { parts: Array<{ text: string }> } }> | undefined;
+  const text = candidates?.[0]?.content?.parts?.[0]?.text || "";
+  const usage = data.usageMetadata as { promptTokenCount?: number; candidatesTokenCount?: number; totalTokenCount?: number } | undefined;
   return {
     text,
     usage: usage ? {
@@ -100,78 +42,213 @@ async function geminiComplete(system: string, userMessage: string, maxTokens: nu
   };
 }
-async function azureOpenAIComplete(system: string, userMessage: string, maxTokens: number): Promise<{ text: string; usage?: any }> {
-  const config = getLLMConfig();
-  // Try loading from ~/keys/openai.azure.com/ if no api_key in config
-  let apiKey = config.api_key;
-  let endpoint = config.endpoint;
-  let model = config.model;
-  if (!apiKey) {
-    try {
-      const keyFile = `${process.env.HOME}/keys/openai.azure.com/${config.model}.json`;
-      const raw = require("fs").readFileSync(keyFile, "utf-8");
-      const keyConfig = JSON.parse(raw)[0];
-      apiKey = keyConfig.key;
-      endpoint = keyConfig.endpoint.split("/openai/")[0];
-      model = keyConfig.deployment;
-    } catch {
-      throw new Error("Azure OpenAI API key not configured");
+// ── Class-based LLM client ──
+type ProviderResult = { text: string; usage?: { prompt_tokens: number; completion_tokens: number; total_tokens: number } };
+export class LLMClient {
+  private config: LLMConfig;
+  private usage: UsageStats = { totalCalls: 0, promptTokens: 0, completionTokens: 0, totalTokens: 0 };
+  private _openaiClient: InstanceType<typeof import("openai").default> | null = null;
+  private _anthropicClient: InstanceType<typeof import("@anthropic-ai/sdk").default> | null = null;
+  private _azureClient: InstanceType<typeof import("openai").AzureOpenAI> | null = null;
+  constructor(config: LLMConfig) {
+    this.config = config;
+  }
+  private async azureComplete(system: string, userMessage: string, maxTokens: number): Promise<ProviderResult> {
+    let apiKey = this.config.api_key;
+    let endpoint = this.config.endpoint;
+    let model = this.config.model;
+    if (!apiKey) {
+      try {
+        const keyFile = `${process.env.HOME}/keys/openai.azure.com/${this.config.model}.json`;
+        const raw = require("fs").readFileSync(keyFile, "utf-8");
+        const keyConfig = JSON.parse(raw)[0] as { key: string; endpoint: string; deployment: string };
+        apiKey = keyConfig.key;
+        endpoint = keyConfig.endpoint.split("/openai/")[0];
+        model = keyConfig.deployment;
+      } catch {
+        throw new Error("Azure OpenAI API key not configured");
+      }
+    }
+    if (!this._azureClient) {
+      const { AzureOpenAI } = await import("openai");
+      this._azureClient = new AzureOpenAI({ endpoint, apiKey, deployment: model, apiVersion: "2024-12-01-preview" });
     }
+    const resp = await this._azureClient.chat.completions.create({
+      model: model,
+      max_completion_tokens: maxTokens,
+      messages: [
+        { role: "system", content: system },
+        { role: "user", content: userMessage },
+      ],
+    });
+    return {
+      text: resp.choices[0]?.message?.content || "",
+      usage: resp.usage ? {
+        prompt_tokens: resp.usage.prompt_tokens || 0,
+        completion_tokens: resp.usage.completion_tokens || 0,
+        total_tokens: resp.usage.total_tokens || 0,
+      } : undefined,
+    };
   }
-  const { AzureOpenAI } = await import("openai");
-  const client = new AzureOpenAI({ endpoint, apiKey, deployment: model, apiVersion: "2024-12-01-preview" });
+  private async openaiComplete(system: string, userMessage: string, maxTokens: number): Promise<ProviderResult> {
+    const { default: OpenAI } = await import("openai");
+    if (!this._openaiClient) {
+      this._openaiClient = new OpenAI({ apiKey: this.config.api_key });
+    }
+    const resp = await this._openaiClient.chat.completions.create({
+      model: this.config.model || "gpt-4o",
+      messages: [
+        { role: "system", content: system },
+        { role: "user", content: userMessage },
+      ],
+      max_tokens: maxTokens,
+    });
+    return {
+      text: resp.choices[0]?.message?.content || "",
+      usage: resp.usage ? {
+        prompt_tokens: resp.usage.prompt_tokens || 0,
+        completion_tokens: resp.usage.completion_tokens || 0,
+        total_tokens: resp.usage.total_tokens || 0,
+      } : undefined,
+    };
+  }
-  const resp = await client.chat.completions.create({
-    model,
-    max_completion_tokens: maxTokens,
-    messages: [
-      { role: "system", content: system },
-      { role: "user", content: userMessage },
-    ],
-  });
+  private async anthropicComplete(system: string, userMessage: string, maxTokens: number): Promise<ProviderResult> {
+    const { default: Anthropic } = await import("@anthropic-ai/sdk");
+    if (!this._anthropicClient) {
+      this._anthropicClient = new Anthropic({ apiKey: this.config.api_key });
+    }
+    const resp = await this._anthropicClient.messages.create({
+      model: this.config.model || "claude-sonnet-4-20250514",
+      max_tokens: maxTokens,
+      system: system,
+      messages: [{ role: "user", content: userMessage }],
+    });
+    const content = resp.content[0]?.type === "text" ? resp.content[0].text : "";
+    return {
+      text: content,
+      usage: resp.usage ? {
+        prompt_tokens: resp.usage.input_tokens || 0,
+        completion_tokens: resp.usage.output_tokens || 0,
+        total_tokens: (resp.usage.input_tokens || 0) + (resp.usage.output_tokens || 0),
+      } : undefined,
+    };
+  }
-  return {
-    text: resp.choices[0]?.message?.content || "",
-    usage: resp.usage ? {
-      prompt_tokens: resp.usage.prompt_tokens || 0,
-      completion_tokens: resp.usage.completion_tokens || 0,
-      total_tokens: resp.usage.total_tokens || 0,
-    } : undefined,
-  };
+  async chatComplete(system: string, userMessage: string, maxTokens = 8192): Promise<string> {
+    let result: ProviderResult;
+    switch (this.config.provider) {
+      case "gemini":
+        result = await geminiComplete(this.config, system, userMessage, maxTokens);
+        break;
+      case "azure-openai":
+        result = await this.azureComplete(system, userMessage, maxTokens);
+        break;
+      case "openai":
+        result = await this.openaiComplete(system, userMessage, maxTokens);
+        break;
+      case "anthropic":
+        result = await this.anthropicComplete(system, userMessage, maxTokens);
+        break;
+      default:
+        throw new Error(`Unknown LLM provider: ${this.config.provider}`);
+    }
+    // Track usage
+    if (result.usage) {
+      this.usage.totalCalls++;
+      this.usage.promptTokens += result.usage.prompt_tokens || 0;
+      this.usage.completionTokens += result.usage.completion_tokens || 0;
+      this.usage.totalTokens += result.usage.total_tokens || 0;
+    }
+    return result.text;
+  }
+  getUsageStats(): UsageStats {
+    return { ...this.usage };
+  }
+  resetUsageStats(): void {
+    this.usage.totalCalls = 0;
+    this.usage.promptTokens = 0;
+    this.usage.completionTokens = 0;
+    this.usage.totalTokens = 0;
+  }
+  getEstimatedCost(): number {
+    // Pricing per 1M tokens (approximate)
+    const pricing: Record<string, { input: number; output: number }> = {
+      "gemini": { input: 0.075, output: 0.30 },
+      "azure-openai": { input: 0.10, output: 0.40 },
+      "openai": { input: 2.50, output: 10.00 },
+      "anthropic": { input: 3.00, output: 15.00 },
+    };
+    const p = pricing[this.config.provider] || pricing["gemini"];
+    return (this.usage.promptTokens / 1_000_000) * p.input + (this.usage.completionTokens / 1_000_000) * p.output;
+  }
+  printUsageSummary(): void {
+    const u = this.usage;
+    const cost = this.getEstimatedCost();
+    console.log(`\x1b[34m📊 LLM 사용량 (${this.config.provider}/${this.config.model}):\x1b[0m`);
+    console.log(`  호출 횟수:     ${u.totalCalls}회`);
+    console.log(`  입력 토큰:     ${u.promptTokens.toLocaleString()}`);
+    console.log(`  출력 토큰:     ${u.completionTokens.toLocaleString()}`);
+    console.log(`  총 토큰:       ${u.totalTokens.toLocaleString()}`);
+    console.log(`  예상 비용:     ~$${cost.toFixed(4)}`);
+  }
 }
-// ── Main interface ──
+// ── Deprecated global state wrappers (for backward compatibility) ──
+/** @deprecated Use LLMClient class instead */
+let _globalClient: LLMClient | null = null;
+/** @deprecated Use `new LLMClient(config)` instead */
+export function setLLMConfig(config: LLMConfig): void {
+  _globalClient = new LLMClient(config);
+}
+/** @deprecated Use LLMClient instance methods instead */
+export function getUsageStats(): UsageStats {
+  if (!_globalClient) return { totalCalls: 0, promptTokens: 0, completionTokens: 0, totalTokens: 0 };
+  return _globalClient.getUsageStats();
+}
+/** @deprecated Use LLMClient instance methods instead */
+export function resetUsageStats(): void {
+  if (_globalClient) _globalClient.resetUsageStats();
+}
+/** @deprecated Use LLMClient instance methods instead */
+export function getEstimatedCost(): number {
+  if (!_globalClient) return 0;
+  return _globalClient.getEstimatedCost();
+}
+/** @deprecated Use LLMClient instance methods instead */
+export function printUsageSummary(): void {
+  if (_globalClient) _globalClient.printUsageSummary();
+}
+/** @deprecated Use LLMClient instance methods instead */
 export async function chatComplete(
   system: string,
   userMessage: string,
   maxTokens = 8192
 ): Promise<string> {
-  const config = getLLMConfig();
-  let result: { text: string; usage?: any };
-  switch (config.provider) {
-    case "gemini":
-      result = await geminiComplete(system, userMessage, maxTokens);
-      break;
-    case "azure-openai":
-      result = await azureOpenAIComplete(system, userMessage, maxTokens);
-      break;
-    default:
-      throw new Error(`Unknown LLM provider: ${config.provider}`);
-  }
-  // Track usage
-  if (result.usage) {
-    _usage.totalCalls++;
-    _usage.promptTokens += result.usage.prompt_tokens || 0;
-    _usage.completionTokens += result.usage.completion_tokens || 0;
-    _usage.totalTokens += result.usage.total_tokens || 0;
-  }
-  return result.text;
+  if (!_globalClient) throw new Error("LLM config not set. Call setLLMConfig() first.");
+  return _globalClient.chatComplete(system, userMessage, maxTokens);
 }

package/src/pipeline/chunker.test.ts ADDED Viewed

@@ -0,0 +1,42 @@
+import { expect, test, describe } from "bun:test";
+import { slugify, cleanTitle } from "./chunker";
+describe("slugify", () => {
+  test("영어 텍스트", () => {
+    expect(slugify("Hello World")).toBe("hello-world");
+  });
+  test("한국어 텍스트", () => {
+    expect(slugify("양자역학")).toBe("양자역학");
+  });
+  test("한영 혼합", () => {
+    expect(slugify("Chapter 3 양자역학")).toBe("chapter-3-양자역학");
+  });
+  test("특수문자 제거", () => {
+    expect(slugify("Hello! @World#")).toBe("hello-world");
+  });
+  test("빈 문자열", () => {
+    expect(slugify("")).toBe("");
+  });
+  test("연속 공백/하이픈", () => {
+    expect(slugify("hello   world---test")).toBe("hello-world-test");
+  });
+  test("80자 제한", () => {
+    const long = "a".repeat(100);
+    expect(slugify(long).length).toBeLessThanOrEqual(80);
+  });
+  test("한글 자모", () => {
+    expect(slugify("ㅋㅋㅋ 테스트")).toBe("ㅋㅋㅋ-테스트");
+  });
+});
+describe("cleanTitle", () => {
+  test("Chapter 번호 제거", () => {
+    expect(cleanTitle("Chapter 3 Quantum Mechanics")).toBe("Quantum Mechanics");
+  });
+  test("숫자 접두사 제거", () => {
+    expect(cleanTitle("3.2.1 Angular Momentum")).toBe("Angular Momentum");
+  });
+  test("일반 제목 유지", () => {
+    expect(cleanTitle("Quantum Mechanics")).toBe("Quantum Mechanics");
+  });
+});