npm - @open330/kiwimu - Versions diffs - 0.3.0 - Mend

@open330/kiwimu 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

package/LICENSE +21 -0
package/README.md +230 -0
package/assets/logos/logo_2_minimalist_icon.png +0 -0
package/assets/logos/logo_2_minimalist_icon_transparent.png +0 -0
package/package.json +62 -0
package/src/build/renderer.ts +128 -0
package/src/build/static/graph.js +114 -0
package/src/build/static/search.js +66 -0
package/src/build/static/style.css +853 -0
package/src/build/templates.ts +616 -0
package/src/config.ts +54 -0
package/src/deploy.ts +32 -0
package/src/expand/llm.ts +63 -0
package/src/index.ts +615 -0
package/src/ingest/docx.ts +15 -0
package/src/ingest/legacy.ts +66 -0
package/src/ingest/pdf.ts +14 -0
package/src/ingest/pptx.ts +39 -0
package/src/ingest/web.ts +77 -0
package/src/llm-client.ts +177 -0
package/src/pipeline/chunker.ts +63 -0
package/src/pipeline/graph.ts +35 -0
package/src/pipeline/linker.ts +49 -0
package/src/pipeline/llm-chunker.ts +368 -0
package/src/pipeline/llm-linker.ts +84 -0
package/src/store.ts +209 -0

package/src/ingest/pdf.ts ADDED Viewed

@@ -0,0 +1,14 @@
+export async function extractTextFromPdf(pdfPath: string): Promise<{ title: string; text: string }> {
+  let pdfParse: any;
+  try {
+    pdfParse = require("pdf-parse");
+  } catch {
+    throw new Error("PDF support requires pdf-parse. Run: bun add pdf-parse");
+  }
+  const buffer = await Bun.file(pdfPath).arrayBuffer();
+  const data = await pdfParse(Buffer.from(buffer));
+  const title = data.info?.Title || pdfPath.split("/").pop()?.replace(".pdf", "") || "Untitled";
+  return { title, text: data.text };
+}

package/src/ingest/pptx.ts ADDED Viewed

@@ -0,0 +1,39 @@
+import { readFileSync } from "fs";
+export async function extractTextFromPptx(filePath: string): Promise<{ title: string; text: string }> {
+  // PPTX is a ZIP containing XML files
+  const { Decompress } = await import("bun");
+  const JSZip = (await import("jszip")).default;
+  const buffer = readFileSync(filePath);
+  const zip = await JSZip.loadAsync(buffer);
+  const slides: string[] = [];
+  // Parse each slide XML
+  const slideFiles = Object.keys(zip.files)
+    .filter((f) => f.match(/^ppt\/slides\/slide\d+\.xml$/))
+    .sort((a, b) => {
+      const numA = parseInt(a.match(/slide(\d+)/)?.[1] || "0");
+      const numB = parseInt(b.match(/slide(\d+)/)?.[1] || "0");
+      return numA - numB;
+    });
+  for (const slidePath of slideFiles) {
+    const xml = await zip.files[slidePath].async("text");
+    // Extract text from <a:t> tags
+    const texts: string[] = [];
+    const regex = /<a:t>([^<]*)<\/a:t>/g;
+    let match;
+    while ((match = regex.exec(xml))) {
+      if (match[1].trim()) texts.push(match[1]);
+    }
+    if (texts.length) {
+      slides.push(texts.join(" "));
+    }
+  }
+  const title = filePath.split("/").pop()?.replace(/\.pptx?$/i, "") || "Untitled";
+  const text = slides.map((s, i) => `Slide ${i + 1}:\n${s}`).join("\n\n");
+  return { title, text };
+}

package/src/ingest/web.ts ADDED Viewed

@@ -0,0 +1,77 @@
+import * as cheerio from "cheerio";
+export interface Section {
+  level: number;
+  title: string;
+  htmlParts: string[];
+}
+const HEADING_TAGS = new Set(["h1", "h2", "h3", "h4"]);
+const SKIP_TAGS = new Set(["nav", "header", "footer", "script", "style", "noscript"]);
+const CONTAINER_TAGS = new Set([
+  "html", "head", "body", "div", "article", "main", "section", "aside", "details", "summary",
+]);
+export async function fetchPage(url: string): Promise<{ title: string; html: string }> {
+  const resp = await fetch(url, {
+    headers: { "User-Agent": "kiwimu/0.2 (learning wiki builder)" },
+  });
+  if (!resp.ok) throw new Error(`Failed to fetch ${url}: ${resp.status}`);
+  const html = await resp.text();
+  const $ = cheerio.load(html);
+  const title = $("title").text().trim() || url;
+  const body = $("body").html() || html;
+  return { title, html: body };
+}
+export function extractSections(html: string): Section[] {
+  const $ = cheerio.load(html, null, false);
+  const sections: Section[] = [];
+  let current: Section = { level: 1, title: "Introduction", htmlParts: [] };
+  function walk(el: cheerio.AnyNode): void {
+    if (el.type === "text") return;
+    if (el.type !== "tag") return;
+    const tagName = (el as cheerio.Element).tagName.toLowerCase();
+    if (SKIP_TAGS.has(tagName)) return;
+    if (HEADING_TAGS.has(tagName)) {
+      if (current.htmlParts.length > 0) {
+        sections.push(current);
+      }
+      current = {
+        level: parseInt(tagName[1]),
+        title: $(el).text().trim(),
+        htmlParts: [],
+      };
+      return;
+    }
+    if (CONTAINER_TAGS.has(tagName)) {
+      for (const child of (el as cheerio.Element).children) {
+        walk(child);
+      }
+      return;
+    }
+    // Content element
+    const html = $.html(el)?.trim();
+    if (html) {
+      current.htmlParts.push(html);
+    }
+  }
+  // Walk root children
+  const root = $.root();
+  for (const child of root.contents().toArray()) {
+    walk(child);
+  }
+  if (current.htmlParts.length > 0) {
+    sections.push(current);
+  }
+  return sections.filter((s) => s.htmlParts.length > 0);
+}

package/src/llm-client.ts ADDED Viewed

@@ -0,0 +1,177 @@
+import type { LLMConfig } from "./config";
+// Token usage tracking
+export interface UsageStats {
+  totalCalls: number;
+  promptTokens: number;
+  completionTokens: number;
+  totalTokens: number;
+}
+const _usage: UsageStats = {
+  totalCalls: 0,
+  promptTokens: 0,
+  completionTokens: 0,
+  totalTokens: 0,
+};
+let _llmConfig: LLMConfig | null = null;
+export function setLLMConfig(config: LLMConfig): void {
+  _llmConfig = config;
+}
+export function getLLMConfig(): LLMConfig {
+  if (!_llmConfig) throw new Error("LLM config not set. Call setLLMConfig() first.");
+  return _llmConfig;
+}
+export function getUsageStats(): UsageStats {
+  return { ..._usage };
+}
+export function resetUsageStats(): void {
+  _usage.totalCalls = 0;
+  _usage.promptTokens = 0;
+  _usage.completionTokens = 0;
+  _usage.totalTokens = 0;
+}
+export function getEstimatedCost(): number {
+  const config = _llmConfig;
+  if (!config) return 0;
+  // Pricing per 1M tokens (approximate)
+  const pricing: Record<string, { input: number; output: number }> = {
+    "gemini": { input: 0.075, output: 0.30 },
+    "azure-openai": { input: 0.10, output: 0.40 },
+    "openai": { input: 0.15, output: 0.60 },
+    "anthropic": { input: 3.00, output: 15.00 },
+  };
+  const p = pricing[config.provider] || pricing["gemini"];
+  return (_usage.promptTokens / 1_000_000) * p.input + (_usage.completionTokens / 1_000_000) * p.output;
+}
+export function printUsageSummary(): void {
+  const u = _usage;
+  const cost = getEstimatedCost();
+  const provider = _llmConfig?.provider || "unknown";
+  const model = _llmConfig?.model || "unknown";
+  console.log(`\x1b[34m📊 LLM 사용량 (${provider}/${model}):\x1b[0m`);
+  console.log(`  호출 횟수:     ${u.totalCalls}회`);
+  console.log(`  입력 토큰:     ${u.promptTokens.toLocaleString()}`);
+  console.log(`  출력 토큰:     ${u.completionTokens.toLocaleString()}`);
+  console.log(`  총 토큰:       ${u.totalTokens.toLocaleString()}`);
+  console.log(`  예상 비용:     ~$${cost.toFixed(4)}`);
+}
+// ── Provider implementations ──
+async function geminiComplete(system: string, userMessage: string, maxTokens: number): Promise<{ text: string; usage?: any }> {
+  const config = getLLMConfig();
+  const url = `https://generativelanguage.googleapis.com/v1beta/models/${config.model}:generateContent?key=${config.api_key}`;
+  const resp = await fetch(url, {
+    method: "POST",
+    headers: { "Content-Type": "application/json" },
+    body: JSON.stringify({
+      system_instruction: { parts: [{ text: system }] },
+      contents: [{ parts: [{ text: userMessage }] }],
+      generationConfig: { maxOutputTokens: maxTokens, temperature: 0.7 },
+    }),
+  });
+  if (!resp.ok) {
+    const err = await resp.text();
+    throw new Error(`Gemini API error (${resp.status}): ${err.slice(0, 200)}`);
+  }
+  const data = await resp.json();
+  const text = data.candidates?.[0]?.content?.parts?.[0]?.text || "";
+  const usage = data.usageMetadata;
+  return {
+    text,
+    usage: usage ? {
+      prompt_tokens: usage.promptTokenCount || 0,
+      completion_tokens: usage.candidatesTokenCount || 0,
+      total_tokens: usage.totalTokenCount || 0,
+    } : undefined,
+  };
+}
+async function azureOpenAIComplete(system: string, userMessage: string, maxTokens: number): Promise<{ text: string; usage?: any }> {
+  const config = getLLMConfig();
+  // Try loading from ~/keys/openai.azure.com/ if no api_key in config
+  let apiKey = config.api_key;
+  let endpoint = config.endpoint;
+  let model = config.model;
+  if (!apiKey) {
+    try {
+      const keyFile = `${process.env.HOME}/keys/openai.azure.com/${config.model}.json`;
+      const raw = require("fs").readFileSync(keyFile, "utf-8");
+      const keyConfig = JSON.parse(raw)[0];
+      apiKey = keyConfig.key;
+      endpoint = keyConfig.endpoint.split("/openai/")[0];
+      model = keyConfig.deployment;
+    } catch {
+      throw new Error("Azure OpenAI API key not configured");
+    }
+  }
+  const { AzureOpenAI } = await import("openai");
+  const client = new AzureOpenAI({ endpoint, apiKey, deployment: model, apiVersion: "2024-12-01-preview" });
+  const resp = await client.chat.completions.create({
+    model,
+    max_completion_tokens: maxTokens,
+    messages: [
+      { role: "system", content: system },
+      { role: "user", content: userMessage },
+    ],
+  });
+  return {
+    text: resp.choices[0]?.message?.content || "",
+    usage: resp.usage ? {
+      prompt_tokens: resp.usage.prompt_tokens || 0,
+      completion_tokens: resp.usage.completion_tokens || 0,
+      total_tokens: resp.usage.total_tokens || 0,
+    } : undefined,
+  };
+}
+// ── Main interface ──
+export async function chatComplete(
+  system: string,
+  userMessage: string,
+  maxTokens = 8192
+): Promise<string> {
+  const config = getLLMConfig();
+  let result: { text: string; usage?: any };
+  switch (config.provider) {
+    case "gemini":
+      result = await geminiComplete(system, userMessage, maxTokens);
+      break;
+    case "azure-openai":
+      result = await azureOpenAIComplete(system, userMessage, maxTokens);
+      break;
+    default:
+      throw new Error(`Unknown LLM provider: ${config.provider}`);
+  }
+  // Track usage
+  if (result.usage) {
+    _usage.totalCalls++;
+    _usage.promptTokens += result.usage.prompt_tokens || 0;
+    _usage.completionTokens += result.usage.completion_tokens || 0;
+    _usage.totalTokens += result.usage.total_tokens || 0;
+  }
+  return result.text;
+}

package/src/pipeline/chunker.ts ADDED Viewed

@@ -0,0 +1,63 @@
+import TurndownService from "turndown";
+import type { Section } from "../ingest/web";
+import type { Store } from "../store";
+const turndown = new TurndownService({ headingStyle: "atx" });
+turndown.remove(["script", "style"]);
+export function slugify(text: string): string {
+  return text
+    .normalize("NFKD")
+    .toLowerCase()
+    .trim()
+    .replace(/[^\w\s-]/g, "")
+    .replace(/[-\s]+/g, "-")
+    .replace(/^-|-$/g, "")
+    .slice(0, 80);
+}
+const STOP_TITLES = new Set([
+  "introduction", "overview", "summary", "conclusion", "references",
+  "bibliography", "appendix", "abstract", "preface", "contents",
+  "table of contents", "index", "acknowledgments", "notes",
+]);
+export function cleanTitle(title: string): string {
+  return title
+    .replace(/^\s*(Chapter\s+)?\d+(\.\d+)*\s*/i, "")
+    .replace(/\s+/g, " ")
+    .trim();
+}
+export function chunkSections(sections: Section[], sourceId: number, store: Store, minWords = 30): number {
+  let count = 0;
+  for (const section of sections) {
+    const title = cleanTitle(section.title);
+    if (!title) continue;
+    const slug = slugify(title);
+    if (!slug) continue;
+    const htmlContent = section.htmlParts.join("\n");
+    if (!htmlContent.trim()) continue;
+    const content = turndown.turndown(htmlContent).trim();
+    const wordCount = content.split(/\s+/).length;
+    if (wordCount < minWords) continue;
+    if (STOP_TITLES.has(slug) || STOP_TITLES.has(title.toLowerCase())) {
+      if (wordCount < 100) continue;
+    }
+    const existing = store.getPage(slug);
+    if (existing) {
+      store.updatePageContent(existing.id, existing.content + "\n\n" + content);
+    } else {
+      store.addPage(slug, title, content, sourceId, slug);
+      count++;
+    }
+  }
+  return count;
+}

package/src/pipeline/graph.ts ADDED Viewed

@@ -0,0 +1,35 @@
+import type { Store } from "../store";
+export interface GraphData {
+  nodes: Array<{ id: string; title: string; degree: number; type: string }>;
+  links: Array<{ source: string; target: string }>;
+}
+export function buildGraphData(store: Store): GraphData {
+  const pages = store.listPages();
+  const links = store.getAllLinks();
+  const degree = new Map<number, number>();
+  for (const page of pages) degree.set(page.id, 0);
+  for (const link of links) {
+    degree.set(link.from_page_id, (degree.get(link.from_page_id) || 0) + 1);
+    degree.set(link.to_page_id, (degree.get(link.to_page_id) || 0) + 1);
+  }
+  const slugMap = new Map(pages.map((p) => [p.id, p.slug]));
+  return {
+    nodes: pages.map((p) => ({
+      id: p.slug,
+      title: p.title,
+      degree: degree.get(p.id) || 0,
+      type: p.page_type,
+    })),
+    links: links
+      .filter((l) => slugMap.has(l.from_page_id) && slugMap.has(l.to_page_id))
+      .map((l) => ({
+        source: slugMap.get(l.from_page_id)!,
+        target: slugMap.get(l.to_page_id)!,
+      })),
+  };
+}

package/src/pipeline/linker.ts ADDED Viewed

@@ -0,0 +1,49 @@
+import type { Store, Page } from "../store";
+export function autoLinkPages(store: Store): number {
+  const pages = store.listPages();
+  if (!pages.length) return 0;
+  store.clearLinks();
+  let totalLinks = 0;
+  // Sort targets by title length descending (longest match first)
+  const targets = [...pages].sort((a, b) => b.title.length - a.title.length);
+  // Precompile patterns
+  const patterns: Array<{ regex: RegExp; page: Page }> = [];
+  for (const target of targets) {
+    if (target.title.length < 3) continue;
+    const escaped = target.title.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
+    patterns.push({
+      regex: new RegExp(`(?<!\\[)(?<!\\w)(${escaped})(?!\\w)(?!\\])`, "i"),
+      page: target,
+    });
+  }
+  for (const page of pages) {
+    let content = page.content;
+    const linkedSlugs = new Set<string>();
+    for (const { regex, page: target } of patterns) {
+      if (target.id === page.id) continue;
+      if (linkedSlugs.has(target.slug)) continue;
+      const match = regex.exec(content);
+      if (match) {
+        const matched = match[1];
+        const replacement = `[${matched}](/wiki/${target.slug})`;
+        content = content.slice(0, match.index) + replacement + content.slice(match.index + match[0].length);
+        linkedSlugs.add(target.slug);
+        store.addLink(page.id, target.id, matched);
+        totalLinks++;
+      }
+    }
+    if (linkedSlugs.size > 0) {
+      store.updatePageContent(page.id, content);
+    }
+  }
+  return totalLinks;
+}