npm - @open330/kiwimu - Versions diffs - 0.4.1 → 0.7.1 - Mend

@open330/kiwimu 0.4.1 → 0.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

package/bin/kiwimu +1 -1
package/package.json +4 -1
package/personas/namuwiki.json +6 -0
package/src/build/renderer.ts +49 -2
package/src/build/static/search.js +33 -2
package/src/build/static/style.css +84 -1
package/src/build/templates.ts +297 -167
package/src/config.ts +35 -29
package/src/demo/sample-data.ts +70 -0
package/src/demo/setup.ts +31 -0
package/src/expand/llm.ts +1 -1
package/src/index.ts +208 -458
package/src/ingest/docx.ts +0 -8
package/src/ingest/legacy.ts +4 -4
package/src/ingest/pdf.ts +1 -1
package/src/ingest/pptx.ts +0 -1
package/src/ingest/web.test.ts +41 -0
package/src/ingest/web.ts +61 -62
package/src/llm-client.ts +203 -126
package/src/pipeline/chunker.test.ts +42 -0
package/src/pipeline/chunker.ts +1 -48
package/src/pipeline/llm-chunker.ts +133 -55
package/src/server.ts +327 -0
package/src/services/ingest.ts +100 -0
package/src/store.test.ts +132 -0
package/src/store.ts +102 -2
package/src/pipeline/llm-linker.ts +0 -84

package/src/services/ingest.ts ADDED Viewed

@@ -0,0 +1,100 @@
+import { Store } from "../store";
+import { type LLMConfig, type Persona } from "../config";
+import { LLMClient, type UsageStats } from "../llm-client";
+export interface IngestResult {
+  sourceCount: number;
+  conceptCount: number;
+  linkCount: number;
+  usage: UsageStats & { estimatedCostUsd: number };
+}
+export async function ingestUrl(
+  root: string,
+  store: Store,
+  url: string,
+  llmConfig: LLMConfig,
+  persona: Persona | null,
+  onProgress?: (status: string) => void
+): Promise<IngestResult> {
+  const client = new LLMClient(llmConfig);
+  client.resetUsageStats();
+  const { fetchPage } = await import("../ingest/web");
+  const { llmChunkDocument, htmlToRawText } = await import("../pipeline/llm-chunker");
+  onProgress?.("URL 가져오는 중...");
+  const { title, html } = await fetchPage(url);
+  const source = store.addSource(url, "web", title, html);
+  const rawText = htmlToRawText(html);
+  onProgress?.("LLM 분석 중...");
+  const { sourceCount, conceptCount } = await llmChunkDocument(rawText, title, source.id, store, 0, persona, client);
+  const u = client.getUsageStats();
+  const estimatedCostUsd = client.getEstimatedCost();
+  store.addUsageLog(source.id, u.totalCalls, u.promptTokens, u.completionTokens, u.totalTokens, estimatedCostUsd);
+  return {
+    sourceCount,
+    conceptCount,
+    linkCount: 0,
+    usage: { ...u, estimatedCostUsd },
+  };
+}
+export async function ingestFile(
+  root: string,
+  store: Store,
+  filePath: string,
+  originalName: string,
+  llmConfig: LLMConfig,
+  persona: Persona | null,
+  onProgress?: (status: string) => void
+): Promise<IngestResult> {
+  const client = new LLMClient(llmConfig);
+  client.resetUsageStats();
+  const { llmChunkDocument } = await import("../pipeline/llm-chunker");
+  const ext = originalName.split(".").pop()?.toLowerCase() || "";
+  let title: string;
+  let text: string;
+  if (ext === "pdf") {
+    const { extractTextFromPdf } = await import("../ingest/pdf");
+    onProgress?.("PDF 텍스트 추출 중...");
+    ({ title, text } = await extractTextFromPdf(filePath));
+  } else if (ext === "docx") {
+    const { extractTextFromDocx } = await import("../ingest/docx");
+    onProgress?.("DOCX 텍스트 추출 중...");
+    ({ title, text } = await extractTextFromDocx(filePath));
+  } else if (ext === "pptx") {
+    const { extractTextFromPptx } = await import("../ingest/pptx");
+    onProgress?.("PPTX 텍스트 추출 중...");
+    ({ title, text } = await extractTextFromPptx(filePath));
+  } else {
+    const { extractWithTextutil } = await import("../ingest/legacy");
+    onProgress?.(`${ext.toUpperCase()} 텍스트 추출 중...`);
+    ({ title, text } = await extractWithTextutil(filePath));
+  }
+  const source = store.addSource(filePath, ext, title, "(file)");
+  store.deletePagesBySource(source.id);
+  onProgress?.("LLM 분석 중...");
+  const { sourceCount, conceptCount } = await llmChunkDocument(text, title, source.id, store, 0, persona, client);
+  const u = client.getUsageStats();
+  const estimatedCostUsd = client.getEstimatedCost();
+  store.addUsageLog(source.id, u.totalCalls, u.promptTokens, u.completionTokens, u.totalTokens, estimatedCostUsd);
+  return {
+    sourceCount,
+    conceptCount,
+    linkCount: 0,
+    usage: { ...u, estimatedCostUsd },
+  };
+}

package/src/store.test.ts ADDED Viewed

@@ -0,0 +1,132 @@
+import { expect, test, describe, beforeEach, afterEach } from "bun:test";
+import { Store } from "./store";
+describe("Store", () => {
+  let store: Store;
+  beforeEach(() => {
+    store = new Store(":memory:");
+    store.initSchema();
+  });
+  afterEach(() => {
+    store.close();
+  });
+  test("addSource and listSources", () => {
+    const src = store.addSource("file:///test.pdf", "pdf", "Test PDF", "raw content");
+    expect(src.id).toBeGreaterThan(0);
+    expect(src.uri).toBe("file:///test.pdf");
+    expect(src.type).toBe("pdf");
+    expect(src.title).toBe("Test PDF");
+    const sources = store.listSources();
+    expect(sources).toHaveLength(1);
+    expect(sources[0].uri).toBe("file:///test.pdf");
+  });
+  test("addSource updates existing source with same URI", () => {
+    const src1 = store.addSource("file:///test.pdf", "pdf", "V1", "content1");
+    const src2 = store.addSource("file:///test.pdf", "pdf", "V2", "content2");
+    expect(src2.id).toBe(src1.id);
+    expect(src2.title).toBe("V2");
+    expect(src2.raw_content).toBe("content2");
+    expect(store.listSources()).toHaveLength(1);
+  });
+  test("addPage and getPage by slug", () => {
+    const src = store.addSource("file:///test.pdf", "pdf", "Test", "raw");
+    const page = store.addPage("test-page", "Test Page", "# Content", src.id, null, "source", 0);
+    expect(page.slug).toBe("test-page");
+    expect(page.title).toBe("Test Page");
+    expect(page.page_type).toBe("source");
+    const fetched = store.getPage("test-page");
+    expect(fetched).not.toBeNull();
+    expect(fetched!.title).toBe("Test Page");
+    expect(store.getPage("nonexistent")).toBeNull();
+  });
+  test("listSourcePages and listConceptPages", () => {
+    const src = store.addSource("file:///test.pdf", "pdf", "Test", "raw");
+    store.addPage("src-page", "Source Page", "content", src.id, null, "source", 0);
+    store.addPage("concept-page", "Concept Page", "content", undefined, undefined, "concept", 0);
+    const sourcePages = store.listSourcePages();
+    expect(sourcePages).toHaveLength(1);
+    expect(sourcePages[0].slug).toBe("src-page");
+    const conceptPages = store.listConceptPages();
+    expect(conceptPages).toHaveLength(1);
+    expect(conceptPages[0].slug).toBe("concept-page");
+  });
+  test("addLink and getBacklinks", () => {
+    const src = store.addSource("file:///test.pdf", "pdf", "Test", "raw");
+    const pageA = store.addPage("page-a", "Page A", "content", src.id, null, "source", 0);
+    const pageB = store.addPage("page-b", "Page B", "content", src.id, null, "source", 1);
+    store.addLink(pageA.id, pageB.id, "link to B");
+    const backlinks = store.getBacklinks(pageB.id);
+    expect(backlinks).toHaveLength(1);
+    expect(backlinks[0].slug).toBe("page-a");
+  });
+  test("getAllBacklinksGrouped", () => {
+    const src = store.addSource("file:///test.pdf", "pdf", "Test", "raw");
+    const pageA = store.addPage("page-a", "Page A", "content", src.id, null, "source", 0);
+    const pageB = store.addPage("page-b", "Page B", "content", src.id, null, "source", 1);
+    const pageC = store.addPage("page-c", "Page C", "content", src.id, null, "source", 2);
+    store.addLink(pageA.id, pageC.id, "link to C from A");
+    store.addLink(pageB.id, pageC.id, "link to C from B");
+    const grouped = store.getAllBacklinksGrouped();
+    expect(grouped.has(pageC.id)).toBe(true);
+    expect(grouped.get(pageC.id)!).toHaveLength(2);
+  });
+  test("deletePagesBySource", () => {
+    const src = store.addSource("file:///test.pdf", "pdf", "Test", "raw");
+    store.addPage("page-1", "Page 1", "content", src.id, null, "source", 0);
+    store.addPage("page-2", "Page 2", "content", src.id, null, "source", 1);
+    expect(store.listPages()).toHaveLength(2);
+    store.deletePagesBySource(src.id);
+    expect(store.listPages()).toHaveLength(0);
+  });
+  test("slug uniqueness (duplicate handling via INSERT OR REPLACE)", () => {
+    const src = store.addSource("file:///test.pdf", "pdf", "Test", "raw");
+    store.addPage("same-slug", "Title V1", "content v1", src.id, null, "source", 0);
+    store.addPage("same-slug", "Title V2", "content v2", src.id, null, "source", 0);
+    const page = store.getPage("same-slug");
+    expect(page).not.toBeNull();
+    expect(page!.title).toBe("Title V2");
+    expect(page!.content).toBe("content v2");
+  });
+  test("listSourcesMeta excludes raw_content", () => {
+    store.addSource("file:///test.pdf", "pdf", "Test", "some large raw content here");
+    const meta = store.listSourcesMeta();
+    expect(meta).toHaveLength(1);
+    expect(meta[0].title).toBe("Test");
+    expect(meta[0]).not.toHaveProperty("raw_content");
+  });
+  test("addUsageLog and getUsageSummary", () => {
+    const src = store.addSource("file:///test.pdf", "pdf", "Test", "raw");
+    store.addUsageLog(src.id, 2, 100, 50, 150, 0.005);
+    store.addUsageLog(src.id, 3, 200, 100, 300, 0.01);
+    const summary = store.getUsageSummary();
+    expect(summary.totalCalls).toBe(5);
+    expect(summary.promptTokens).toBe(300);
+    expect(summary.completionTokens).toBe(150);
+    expect(summary.totalTokens).toBe(450);
+    expect(summary.totalCost).toBeCloseTo(0.015, 5);
+  });
+});

package/src/store.ts CHANGED Viewed

@@ -20,12 +20,31 @@ export interface Page {
   display_order: number;
 }
+export interface SourceMeta {
+  id: number;
+  uri: string;
+  type: string;
+  title: string;
+  fetched_at: string;
+}
 export interface Link {
   from_page_id: number;
   to_page_id: number;
   anchor_text: string;
 }
+export interface Quiz {
+  id: number;
+  page_id: number;
+  question: string;
+  answer: string;
+  quiz_type: string; // 'fill_blank' | 'ox' | 'short_answer'
+  created_at: string;
+  page_title?: string;
+  page_slug?: string;
+}
 const SCHEMA = `
 CREATE TABLE IF NOT EXISTS sources (
   id INTEGER PRIMARY KEY AUTOINCREMENT,
@@ -63,6 +82,20 @@ CREATE TABLE IF NOT EXISTS links (
   anchor_text TEXT,
   PRIMARY KEY (from_page_id, to_page_id, anchor_text)
 );
+CREATE TABLE IF NOT EXISTS quizzes (
+  id INTEGER PRIMARY KEY AUTOINCREMENT,
+  page_id INTEGER NOT NULL,
+  question TEXT NOT NULL,
+  answer TEXT NOT NULL,
+  quiz_type TEXT NOT NULL DEFAULT 'fill_blank',
+  created_at TEXT DEFAULT (datetime('now')),
+  FOREIGN KEY (page_id) REFERENCES pages(id)
+);
+CREATE INDEX IF NOT EXISTS idx_pages_source_id ON pages(source_id);
+CREATE INDEX IF NOT EXISTS idx_pages_page_type ON pages(page_type);
+CREATE INDEX IF NOT EXISTS idx_links_to_page ON links(to_page_id);
+CREATE INDEX IF NOT EXISTS idx_links_from_page ON links(from_page_id);
+CREATE INDEX IF NOT EXISTS idx_quizzes_page_id ON quizzes(page_id);
 `;
 export class Store {
@@ -107,6 +140,10 @@ export class Store {
     return this.db.prepare("SELECT * FROM sources ORDER BY fetched_at DESC").all() as Source[];
   }
+  listSourcesMeta(): SourceMeta[] {
+    return this.db.prepare("SELECT id, uri, type, title, fetched_at FROM sources ORDER BY id DESC").all() as SourceMeta[];
+  }
   // --- Pages ---
   addPage(
@@ -147,7 +184,11 @@ export class Store {
   }
   deletePagesBySource(sourceId: number): void {
-    // Delete links involving these pages first
+    // Delete quizzes for these pages first
+    this.db.prepare(
+      "DELETE FROM quizzes WHERE page_id IN (SELECT id FROM pages WHERE source_id = ?)"
+    ).run(sourceId);
+    // Delete links involving these pages
     this.db.prepare(
       "DELETE FROM links WHERE from_page_id IN (SELECT id FROM pages WHERE source_id = ?) OR to_page_id IN (SELECT id FROM pages WHERE source_id = ?)"
     ).run(sourceId, sourceId);
@@ -155,6 +196,7 @@ export class Store {
   }
   deleteAllPages(): void {
+    this.db.exec("DELETE FROM quizzes");
     this.db.exec("DELETE FROM links");
     this.db.exec("DELETE FROM pages");
   }
@@ -192,6 +234,64 @@ export class Store {
     return this.db.prepare("SELECT * FROM links").all() as Link[];
   }
+  getAllBacklinksGrouped(): Map<number, Array<{id: number; slug: string; title: string; page_type: string}>> {
+    const rows = this.db.prepare(`
+      SELECT l.to_page_id, p.id, p.slug, p.title, p.page_type
+      FROM links l
+      JOIN pages p ON p.id = l.from_page_id
+      ORDER BY l.to_page_id
+    `).all() as Array<{to_page_id: number; id: number; slug: string; title: string; page_type: string}>;
+    const map = new Map<number, Array<{id: number; slug: string; title: string; page_type: string}>>();
+    for (const row of rows) {
+      if (!map.has(row.to_page_id)) map.set(row.to_page_id, []);
+      map.get(row.to_page_id)!.push({ id: row.id, slug: row.slug, title: row.title, page_type: row.page_type });
+    }
+    return map;
+  }
+  // --- Quizzes ---
+  addQuiz(pageId: number, question: string, answer: string, quizType: string): void {
+    this.db
+      .prepare("INSERT INTO quizzes (page_id, question, answer, quiz_type) VALUES (?, ?, ?, ?)")
+      .run(pageId, question, answer, quizType);
+  }
+  getQuizzesByPage(pageId: number): Quiz[] {
+    return this.db
+      .prepare(
+        `SELECT q.*, p.title as page_title, p.slug as page_slug
+         FROM quizzes q JOIN pages p ON p.id = q.page_id
+         WHERE q.page_id = ? ORDER BY q.id`
+      )
+      .all(pageId) as Quiz[];
+  }
+  getAllQuizzes(): Quiz[] {
+    return this.db
+      .prepare(
+        `SELECT q.*, p.title as page_title, p.slug as page_slug
+         FROM quizzes q JOIN pages p ON p.id = q.page_id
+         ORDER BY q.id`
+      )
+      .all() as Quiz[];
+  }
+  getRandomQuizzes(count: number): Quiz[] {
+    return this.db
+      .prepare(
+        `SELECT q.*, p.title as page_title, p.slug as page_slug
+         FROM quizzes q JOIN pages p ON p.id = q.page_id
+         ORDER BY RANDOM() LIMIT ?`
+      )
+      .all(count) as Quiz[];
+  }
+  deleteQuizzesByPage(pageId: number): void {
+    this.db.prepare("DELETE FROM quizzes WHERE page_id = ?").run(pageId);
+  }
   // --- Usage ---
   addUsageLog(sourceId: number, calls: number, prompt: number, completion: number, total: number, cost: number): void {
@@ -203,7 +303,7 @@ export class Store {
   getUsageSummary(): { totalCalls: number; promptTokens: number; completionTokens: number; totalTokens: number; totalCost: number } {
     const row = this.db.prepare(
       "SELECT COALESCE(SUM(llm_calls),0) as totalCalls, COALESCE(SUM(prompt_tokens),0) as promptTokens, COALESCE(SUM(completion_tokens),0) as completionTokens, COALESCE(SUM(total_tokens),0) as totalTokens, COALESCE(SUM(estimated_cost_usd),0) as totalCost FROM usage_logs"
-    ).get() as any;
+    ).get() as { totalCalls: number; promptTokens: number; completionTokens: number; totalTokens: number; totalCost: number };
     return row;
   }
 }

package/src/pipeline/llm-linker.ts DELETED Viewed

@@ -1,84 +0,0 @@
-import { chatComplete } from "../llm-client";
-import type { Store } from "../store";
-import { slugify } from "./chunker";
-const LINK_SYSTEM = `You are a wiki editor. Given wiki pages, find cross-link opportunities that were missed.
-Return valid JSON only. No markdown fences.`;
-const LINK_PROMPT = `These wiki pages exist but may be missing cross-links. Find where one page's content mentions a concept that has its own page.
-Pages (slug | title | first 300 chars of content):
-{pages}
-Return JSON:
-{
-  "links": [
-    {
-      "from_slug": "source-page-slug",
-      "to_slug": "target-page-slug",
-      "anchor_text": "exact phrase in source page to link"
-    }
-  ]
-}
-Rules:
-- anchor_text MUST be an exact phrase found in the source page content
-- Only link genuinely related concepts
-- 3-8 links per page where meaningful
-- Do NOT link a page to itself`;
-export async function llmLinkPages(store: Store): Promise<number> {
-  const pages = store.listPages();
-  if (pages.length < 2) return 0;
-  const batchSize = 30;
-  let totalLinks = 0;
-  for (let i = 0; i < pages.length; i += batchSize) {
-    const batch = pages.slice(i, i + batchSize);
-    const pagesText = batch
-      .map(p => `${p.slug} | ${p.title} | ${p.content.slice(0, 300).replace(/\n/g, " ")}`)
-      .join("\n");
-    try {
-      const raw = await chatComplete(LINK_SYSTEM, LINK_PROMPT.replace("{pages}", pagesText), 8192);
-      let cleaned = raw.replace(/^```json?\n?/m, "").replace(/\n?```$/m, "").trim();
-      let result: { links: Array<{ from_slug: string; to_slug: string; anchor_text: string }> };
-      try {
-        result = JSON.parse(cleaned);
-      } catch {
-        // Try to repair truncated JSON
-        cleaned = cleaned.replace(/,?\s*$/, "]}");
-        try {
-          result = JSON.parse(cleaned);
-        } catch {
-          console.log(`    \x1b[33m⚠ 링크 JSON 파싱 실패\x1b[0m`);
-          continue;
-        }
-      }
-      const slugToPage = new Map(pages.map(p => [p.slug, p]));
-      for (const link of result.links) {
-        const fromPage = slugToPage.get(link.from_slug);
-        const toPage = slugToPage.get(link.to_slug);
-        if (!fromPage || !toPage || fromPage.id === toPage.id) continue;
-        const anchor = link.anchor_text;
-        if (anchor && fromPage.content.includes(anchor) && !fromPage.content.includes(`[${anchor}]`)) {
-          const linkedText = `[${anchor}](/wiki/${link.to_slug})`;
-          const newContent = fromPage.content.replace(anchor, linkedText);
-          store.updatePageContent(fromPage.id, newContent);
-          fromPage.content = newContent;
-          store.addLink(fromPage.id, toPage.id, anchor);
-          totalLinks++;
-        }
-      }
-    } catch (e: any) {
-      console.log(`    \x1b[31m링크 생성 실패: ${e.message}\x1b[0m`);
-    }
-  }
-  return totalLinks;
-}