npm - @open330/kiwimu - Versions diffs - 0.8.0 → 1.1.0 - Mend

@open330/kiwimu 0.8.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (32) hide show

package/README.md +105 -27
package/package.json +1 -1
package/src/build/renderer.ts +272 -32
package/src/build/static/dynamic-qa.js +423 -0
package/src/build/static/edit-page.js +58 -0
package/src/build/static/peek-panel.css +201 -0
package/src/build/static/peek-panel.js +470 -0
package/src/build/static/search.js +30 -15
package/src/build/static/style.css +821 -6
package/src/build/templates.ts +700 -48
package/src/config.ts +41 -3
package/src/demo/sample-data.ts +69 -2
package/src/demo/setup.ts +25 -6
package/src/expand/llm.ts +2 -2
package/src/index.ts +467 -60
package/src/ingest/docx.ts +1 -1
package/src/ingest/markdown.ts +21 -0
package/src/ingest/pdf.ts +4 -2
package/src/llm-client.ts +63 -69
package/src/pipeline/citations.ts +107 -0
package/src/pipeline/llm-chunker.ts +277 -131
package/src/pipeline/standardizer.ts +41 -0
package/src/server.ts +465 -32
package/src/services/dynamic-qa.ts +190 -0
package/src/services/embedding.ts +122 -0
package/src/services/index-generator.ts +185 -0
package/src/services/ingest.ts +83 -25
package/src/services/lint.ts +249 -0
package/src/services/promote.ts +150 -0
package/src/store.test.ts +11 -0
package/src/store.ts +561 -28
package/src/utils.ts +30 -0

package/src/services/lint.ts ADDED Viewed

@@ -0,0 +1,249 @@
+import type { Store } from "../store";
+import { normalizeTitle } from "../utils";
+export interface LintIssue {
+  type: 'orphan' | 'dead_link' | 'disconnected' | 'missing_backlink' | 'thin_content' | 'duplicate';
+  severity: 'error' | 'warning' | 'info';
+  pageId?: number;
+  pageTitle?: string;
+  message: string;
+  suggestion?: string;
+}
+export interface LintReport {
+  issues: LintIssue[];
+  summary: { errors: number; warnings: number; info: number; total_pages: number; total_links: number };
+  timestamp: string;
+}
+export function lintWiki(store: Store): LintReport {
+  const pages = store.listPages();
+  const links = store.getAllLinks();
+  const pageMap = new Map(pages.map(p => [p.id, p]));
+  const issues: LintIssue[] = [];
+  // --- a) Orphan Pages: no incoming links ---
+  const incomingCount = new Map<number, number>();
+  for (const link of links) {
+    incomingCount.set(link.to_page_id, (incomingCount.get(link.to_page_id) || 0) + 1);
+  }
+  for (const page of pages) {
+    if (!incomingCount.has(page.id)) {
+      issues.push({
+        type: 'orphan',
+        severity: 'warning',
+        pageId: page.id,
+        pageTitle: page.title,
+        message: `"${page.title}" has no incoming links (orphan page)`,
+        suggestion: 'Add links to this page from related pages',
+      });
+    }
+  }
+  // --- b) Dead Links: links pointing to non-existent pages ---
+  for (const link of links) {
+    if (!pageMap.has(link.to_page_id)) {
+      const fromPage = pageMap.get(link.from_page_id);
+      issues.push({
+        type: 'dead_link',
+        severity: 'error',
+        pageId: link.from_page_id,
+        pageTitle: fromPage?.title,
+        message: `Dead link from "${fromPage?.title || link.from_page_id}" to non-existent page (id: ${link.to_page_id}, anchor: "${link.anchor_text}")`,
+        suggestion: 'Remove or fix the broken link',
+      });
+    }
+    if (!pageMap.has(link.from_page_id)) {
+      issues.push({
+        type: 'dead_link',
+        severity: 'error',
+        pageId: link.from_page_id,
+        message: `Dead link from non-existent page (id: ${link.from_page_id}) to page id ${link.to_page_id}`,
+        suggestion: 'Clean up orphaned link records',
+      });
+    }
+  }
+  // --- c) Disconnected Clusters ---
+  // Build adjacency list (undirected) for connectivity
+  const adj = new Map<number, Set<number>>();
+  for (const page of pages) {
+    adj.set(page.id, new Set());
+  }
+  for (const link of links) {
+    if (pageMap.has(link.from_page_id) && pageMap.has(link.to_page_id)) {
+      adj.get(link.from_page_id)!.add(link.to_page_id);
+      adj.get(link.to_page_id)!.add(link.from_page_id);
+    }
+  }
+  if (pages.length > 0) {
+    const visited = new Set<number>();
+    const clusters: number[][] = [];
+    for (const page of pages) {
+      if (visited.has(page.id)) continue;
+      // BFS
+      const cluster: number[] = [];
+      const queue = [page.id];
+      visited.add(page.id);
+      while (queue.length > 0) {
+        const current = queue.shift()!;
+        cluster.push(current);
+        for (const neighbor of adj.get(current) || []) {
+          if (!visited.has(neighbor)) {
+            visited.add(neighbor);
+            queue.push(neighbor);
+          }
+        }
+      }
+      clusters.push(cluster);
+    }
+    if (clusters.length > 1) {
+      // Sort by size descending; the largest is the "main" cluster
+      clusters.sort((a, b) => b.length - a.length);
+      for (let i = 1; i < clusters.length; i++) {
+        const clusterPages = clusters[i].map(id => pageMap.get(id)!.title).join(', ');
+        for (const id of clusters[i]) {
+          const page = pageMap.get(id)!;
+          issues.push({
+            type: 'disconnected',
+            severity: 'warning',
+            pageId: page.id,
+            pageTitle: page.title,
+            message: `"${page.title}" is in a disconnected cluster (${clusters[i].length} pages: ${clusterPages.slice(0, 100)})`,
+            suggestion: 'Add links connecting this cluster to the main wiki graph',
+          });
+        }
+      }
+    }
+  }
+  // --- d) Missing Reciprocal Links ---
+  const linkSet = new Set(links.map(l => `${l.from_page_id}->${l.to_page_id}`));
+  for (const link of links) {
+    if (!pageMap.has(link.from_page_id) || !pageMap.has(link.to_page_id)) continue;
+    const reverse = `${link.to_page_id}->${link.from_page_id}`;
+    if (!linkSet.has(reverse)) {
+      const fromPage = pageMap.get(link.from_page_id)!;
+      const toPage = pageMap.get(link.to_page_id)!;
+      issues.push({
+        type: 'missing_backlink',
+        severity: 'info',
+        pageId: link.to_page_id,
+        pageTitle: toPage.title,
+        message: `"${toPage.title}" is linked from "${fromPage.title}" but doesn't link back`,
+        suggestion: `Consider adding a link from "${toPage.title}" back to "${fromPage.title}"`,
+      });
+    }
+  }
+  // --- e) Thin Content ---
+  for (const page of pages) {
+    if (page.content.length < 100) {
+      issues.push({
+        type: 'thin_content',
+        severity: 'warning',
+        pageId: page.id,
+        pageTitle: page.title,
+        message: `"${page.title}" has very short content (${page.content.length} chars)`,
+        suggestion: 'Expand this page with more detailed content',
+      });
+    }
+  }
+  // --- f) Duplicate Concepts ---
+  // Normalize titles and compare (strip spaces for stricter dedup matching)
+  const normalize = (s: string) => normalizeTitle(s).replace(/\s/g, "");
+  const seen = new Map<string, { id: number; title: string }>();
+  for (const page of pages) {
+    const norm = normalize(page.title);
+    if (!norm) continue;
+    const existing = seen.get(norm);
+    if (existing) {
+      issues.push({
+        type: 'duplicate',
+        severity: 'warning',
+        pageId: page.id,
+        pageTitle: page.title,
+        message: `"${page.title}" may be a duplicate of "${existing.title}"`,
+        suggestion: 'Consider merging these pages',
+      });
+    } else {
+      seen.set(norm, { id: page.id, title: page.title });
+    }
+  }
+  // Also check Levenshtein similarity for near-duplicates
+  const titles = Array.from(seen.values());
+  const reportedPairs = new Set<string>();
+  for (let i = 0; i < titles.length; i++) {
+    for (let j = i + 1; j < titles.length; j++) {
+      const a = normalize(titles[i].title);
+      const b = normalize(titles[j].title);
+      if (a.length < 3 || b.length < 3) continue;
+      const maxLen = Math.max(a.length, b.length);
+      // Skip pairs where length difference is too large for 85% similarity
+      if (Math.abs(a.length - b.length) > maxLen * 0.2) continue;
+      const dist = levenshtein(a, b);
+      const similarity = 1 - dist / maxLen;
+      if (similarity >= 0.85 && similarity < 1) {
+        const pairKey = [titles[i].id, titles[j].id].sort().join('-');
+        if (reportedPairs.has(pairKey)) continue;
+        reportedPairs.add(pairKey);
+        issues.push({
+          type: 'duplicate',
+          severity: 'info',
+          pageId: titles[j].id,
+          pageTitle: titles[j].title,
+          message: `"${titles[i].title}" and "${titles[j].title}" have similar titles (${Math.round(similarity * 100)}% similar)`,
+          suggestion: 'Review if these pages cover the same topic',
+        });
+      }
+    }
+  }
+  const errors = issues.filter(i => i.severity === 'error').length;
+  const warnings = issues.filter(i => i.severity === 'warning').length;
+  const info = issues.filter(i => i.severity === 'info').length;
+  return {
+    issues,
+    summary: {
+      errors,
+      warnings,
+      info,
+      total_pages: pages.length,
+      total_links: links.length,
+    },
+    timestamp: new Date().toISOString(),
+  };
+}
+/** Simple Levenshtein distance */
+function levenshtein(a: string, b: string): number {
+  const m = a.length;
+  const n = b.length;
+  if (m === 0) return n;
+  if (n === 0) return m;
+  // Use single-row optimization
+  let prev = Array.from({ length: n + 1 }, (_, i) => i);
+  let curr = new Array(n + 1);
+  for (let i = 1; i <= m; i++) {
+    curr[0] = i;
+    for (let j = 1; j <= n; j++) {
+      const cost = a[i - 1] === b[j - 1] ? 0 : 1;
+      curr[j] = Math.min(
+        prev[j] + 1,      // deletion
+        curr[j - 1] + 1,  // insertion
+        prev[j - 1] + cost // substitution
+      );
+    }
+    [prev, curr] = [curr, prev];
+  }
+  return prev[n];
+}

package/src/services/promote.ts ADDED Viewed

@@ -0,0 +1,150 @@
+import type { Store } from "../store";
+import type { LLMConfig } from "../config";
+import { stripJsonFences } from "../utils";
+export interface PromoteParams {
+  question: string;
+  answer: string;
+  title: string;
+  sourcePageId: number;
+  selectedText?: string;
+}
+export interface PromoteResult {
+  pageId: number;
+  slug: string;
+  title: string;
+  isNew: boolean;
+}
+/**
+ * Promote a Q&A answer into a permanent wiki concept page.
+ *
+ * Handles deduplication (appends to an existing page when titles match),
+ * slug generation, wiki-linking the new page to existing pages, parent
+ * link creation, and quiz generation.
+ */
+export async function promoteToWiki(
+  store: Store,
+  params: PromoteParams,
+  llmConfig: LLMConfig,
+): Promise<PromoteResult> {
+  const { question, answer, title, sourcePageId, selectedText } = params;
+  // Deduplication: check if a similar page already exists
+  const existing = store.findSimilarPage(title);
+  if (existing) {
+    const updatedContent = existing.content + "\n\n---\n\n" + answer;
+    store.updatePageContent(existing.id, updatedContent);
+    return {
+      pageId: existing.id,
+      slug: existing.slug,
+      title: existing.title,
+      isNew: false,
+    };
+  }
+  // --- Create a new concept page ---
+  const { slugify } = await import("../pipeline/chunker");
+  let slug = slugify(title);
+  if (!slug) slug = slugify(question);
+  if (!slug) slug = `qa-${Date.now()}`;
+  let finalSlug = slug;
+  let counter = 2;
+  while (store.getPage(finalSlug)) {
+    finalSlug = `${slug}-${counter++}`;
+  }
+  // Build page content with optional quoted context
+  let pageContent = answer;
+  if (selectedText) {
+    pageContent = `> ${selectedText.slice(0, 500)}\n\n${pageContent}`;
+  }
+  const page = store.addPage(finalSlug, title, pageContent, undefined, undefined, "concept", 0);
+  // Mark as user-generated origin
+  store.updatePageOrigin(finalSlug, "user", question, sourcePageId);
+  // --- Wiki-link the new page to existing pages ---
+  const targets = store
+    .listPageSummaries()
+    .filter((p) => p.id !== page.id && p.title.length >= 3)
+    .sort((a, b) => b.title.length - a.title.length);
+  let linkedContent = pageContent;
+  const linkedSlugs = new Set<string>();
+  for (const target of targets) {
+    if (linkedSlugs.has(target.slug)) continue;
+    const escaped = target.title.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
+    const regex = new RegExp(`(?<!\\[)(?<!\\w)(${escaped})(?!\\w)(?!\\])`, "i");
+    const match = regex.exec(linkedContent);
+    if (match) {
+      const replacement = `[${match[1]}](/wiki/${target.slug})`;
+      linkedContent =
+        linkedContent.slice(0, match.index) +
+        replacement +
+        linkedContent.slice(match.index + match[0].length);
+      linkedSlugs.add(target.slug);
+      store.addLink(page.id, target.id, match[1]);
+    }
+  }
+  if (linkedSlugs.size > 0) {
+    store.updatePageContent(page.id, linkedContent);
+  }
+  // Add link from source page to new page
+  store.addLink(sourcePageId, page.id, title);
+  // --- Generate 1-2 quizzes for the new concept ---
+  try {
+    const { LLMClient } = await import("../llm-client");
+    const llmClient = new LLMClient(llmConfig);
+    const quizSystem = `You are a quiz generator for a study wiki. Generate quiz questions that test UNDERSTANDING, not just memorization.
+Focus on higher-order thinking: "\uc65c?", "\uc5b4\ub5bb\uac8c?", "\ube44\uad50\ud558\ub77c", "\uc124\uba85\ud558\ub77c" style questions.
+Return valid JSON only. No markdown fences.`;
+    const quizPrompt = `Based on this wiki content, generate 1-2 quiz questions that test UNDERSTANDING.
+Types: "fill_blank" (\ube48\uce78 \ucc44\uc6b0\uae30), "ox" (OX \ud034\uc988 - true/false), "short_answer" (\ub2e8\ub2f5\ud615)
+Content title: ${title}
+Content:
+${answer.slice(0, 3000)}
+Respond with a JSON array only:
+[{"question": "...", "answer": "...", "explanation": "...", "type": "fill_blank"}]
+Rules:
+- For fill_blank: use ___ to mark the blank in the question
+- For ox: question should be a statement, answer should be "O" or "X"
+- For short_answer: question should be answerable in 1-3 words
+- Include "explanation" field: a brief 1-2 sentence explanation of WHY the answer is correct`;
+    const raw = await llmClient.chatComplete(quizSystem, quizPrompt, 2048);
+    const cleaned = stripJsonFences(raw);
+    const quizzes = JSON.parse(cleaned) as Array<{
+      question: string;
+      answer: string;
+      explanation?: string;
+      type: string;
+    }>;
+    for (const q of quizzes) {
+      if (q.question && q.answer && q.type) {
+        store.addQuiz(page.id, q.question, q.answer, q.type, q.explanation || "");
+      }
+    }
+  } catch {
+    // Quiz generation is non-critical; silently skip failures
+    console.log(`\x1b[33m\u26a0 \ud504\ub85c\ubaa8\ud2b8 \ud034\uc988 \uc0dd\uc131 \uc2e4\ud328\x1b[0m`);
+  }
+  return {
+    pageId: page.id,
+    slug: finalSlug,
+    title,
+    isNew: true,
+  };
+}

package/src/store.test.ts CHANGED Viewed

@@ -13,6 +13,17 @@ describe("Store", () => {
     store.close();
   });
+  test("schema: pages table has all migrated columns on a fresh DB", () => {
+    // Guards against CREATE TABLE / ALTER TABLE drift: every column added
+    // via the migration block must also exist after a fresh init, otherwise
+    // an index that references it (or downstream code) will break.
+    const db = (store as any).db;
+    const cols = db.query("PRAGMA table_info(pages)").all().map((r: any) => r.name);
+    for (const required of ["origin", "user_question", "parent_page_id", "category"]) {
+      expect(cols).toContain(required);
+    }
+  });
   test("addSource and listSources", () => {
     const src = store.addSource("file:///test.pdf", "pdf", "Test PDF", "raw content");
     expect(src.id).toBeGreaterThan(0);