npm - ex-brain - Versions diffs - 0.1.1 → 0.2.0 - Mend

ex-brain 0.1.1 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

package/README.md +48 -0
package/package.json +2 -1
package/src/ai/compiler.ts +18 -53
package/src/ai/entity-link.ts +31 -62
package/src/ai/llm-client.ts +291 -0
package/src/ai/timeline-extractor.ts +29 -62
package/src/commands/index.ts +612 -86
package/src/db/client.ts +121 -15
package/src/db/errors.ts +178 -0
package/src/db/schema.ts +1 -0
package/src/mcp/server.ts +400 -237
package/src/repositories/brain-repo.ts +576 -358
package/src/settings.ts +23 -2
package/src/types/index.ts +1 -0
package/src/utils/cli-output.ts +569 -0
package/src/utils/query-sanitizer.ts +63 -0

package/src/commands/index.ts CHANGED Viewed

@@ -20,10 +20,23 @@ import {
 } from "../markdown/parser";
 import { BrainRepository } from "../repositories/brain-repo";
 import { loadSettings, SETTINGS_PATH, DEFAULT_DB_PATH, type ResolvedLLM } from "../settings";
-import { extractRelations, entityToSlug, EntityType } from "../ai/entity-link";
+import { extractRelations, entityToSlug, type EntityType } from "../ai/entity-link";
 import { registerCompileCommands } from "./compile-cmd";
 import { registerGraphCommand } from "./graph-cmd";
 import { createProgress, formatDuration } from "../utils/progress";
+import {
+  success,
+  error as cliError,
+  warning,
+  info,
+  step,
+  subItem,
+  keyValue,
+  header,
+  createSpinner,
+  formatCount,
+  type ProgressSpinner,
+} from "../utils/cli-output";
 // ---------------------------------------------------------------------------
 // Helpers
@@ -59,32 +72,46 @@ async function applyEntityLinks(
   const settings = await loadSettings();
   if (!settings.llm.baseURL) {
     if (!json) {
-      process.stderr.write(`[entity-link] LLM not configured, skipping for ${sourceSlug}\n`);
+      warning(`LLM not configured, skipping entity extraction for ${sourceSlug}`);
     }
     return { created: 0, linked: 0 };
   }
-  const progress = createProgress();
+  const spinner = createSpinner();
   if (!json) {
-    progress.start(`Extracting entities from ${sourceSlug}`);
+    spinner.start(`Extracting entities from ${sourceSlug}...`);
   }
   const startTime = Date.now();
-  const relations = await extractRelations(content, settings.llm);
+  let relations;
+  try {
+    relations = await extractRelations(content, settings.llm);
+  } catch (err) {
+    if (!json) {
+      spinner.fail(`Entity extraction failed: ${err instanceof Error ? err.message : String(err)}`);
+    }
+    return { created: 0, linked: 0 };
+  }
   // Filter by confidence
-  const highConfidence = relations.filter((r) => r.confidence >= 0.6);
+  const confidenceThreshold = settings.extraction.confidenceThreshold;
+  const highConfidence = relations.filter((r) => r.confidence >= confidenceThreshold);
   const ignoredCount = relations.length - highConfidence.length;
   if (highConfidence.length === 0) {
     if (!json) {
-      progress.fail(`No high-confidence entities found`);
+      if (relations.length > 0) {
+        spinner.warn(`Found ${relations.length} entities but all below confidence threshold (${confidenceThreshold})`);
+      } else {
+        spinner.warn(`No entities found in content`);
+      }
     }
     return { created: 0, linked: 0 };
   }
   let created = 0;
   let linked = 0;
+  const details: string[] = [];
   for (const r of highConfidence) {
     // 1. Resolve entity slugs (disambiguation)
@@ -97,8 +124,8 @@ async function applyEntityLinks(
     // 2. Ensure entity pages exist
     const c1 = await repo.ensureEntityPage(fromSlug, r.from.type, r.from.name, r.relation, r.context, sourceSlug);
     const c2 = await repo.ensureEntityPage(toSlug, r.to.type, r.to.name, r.relation, r.context, sourceSlug);
-    if (c1) created += 1;
-    if (c2) created += 1;
+    if (c1) { created += 1; details.push(`Created: ${r.from.name} (${r.from.type})`); }
+    if (c2) { created += 1; details.push(`Created: ${r.to.name} (${r.to.type})`); }
     // 3. Link between entities (context includes relation type)
     await repo.link(fromSlug, toSlug, `[${r.relation}] ${r.context}`);
@@ -113,8 +140,16 @@ async function applyEntityLinks(
   if (!json) {
     const duration = formatDuration(Date.now() - startTime);
-    const entityNames = highConfidence.flatMap((r) => [r.from.name, r.to.name]);
-    progress.succeed(`${[...new Set(entityNames)].join(", ")} (${created} created, ${linked} links, ${duration})`);
+    const entityNames = [...new Set(highConfidence.flatMap((r) => [r.from.name, r.to.name]))];
+    spinner.succeed(`Extracted ${entityNames.length} entities: ${entityNames.join(", ")}`);
+    // Print detailed info
+    subItem(`${created} entity pages created`);
+    subItem(`${linked} links added`);
+    if (ignoredCount > 0) {
+      subItem(`${ignoredCount} low-confidence relations ignored`);
+    }
+    subItem(`Completed in ${duration}`);
   }
   return { created, linked };
@@ -267,6 +302,15 @@ Examples:
       }
       await withRepo(program, async (repo) => {
+        const jsonOut = isJson(program);
+        const spinner = createSpinner();
+        const startTime = Date.now();
+        if (!jsonOut) {
+          header(`Put: ${finalSlug}`);
+          spinner.start(`Creating/updating page...`);
+        }
         const page = await repo.putPage({
           slug: finalSlug,
           type,
@@ -275,12 +319,26 @@ Examples:
           timeline: parsed.timeline,
           frontmatter: parsed.frontmatter,
         });
+        if (!jsonOut) {
+          spinner.succeed(`Page saved: ${page.slug}`);
+          keyValue("Title", title);
+          keyValue("Type", type);
+          keyValue("Content length", `${parsed.compiledTruth.length} chars`);
+        }
         await applyEntityLinks(
           repo,
           finalSlug,
           parsed.compiledTruth,
-          isJson(program),
+          jsonOut,
         );
+        if (!jsonOut) {
+          const duration = formatDuration(Date.now() - startTime);
+          success(`Operation completed in ${duration}`);
+        }
         print(program, { ok: true, slug: page.slug, updatedAt: page.updatedAt });
       });
     },
@@ -350,7 +408,20 @@ Examples:
       return;
     }
     await withRepo(program, async (repo) => {
+      const jsonOut = isJson(program);
+      const spinner = createSpinner();
+      if (!jsonOut) {
+        header(`Delete: ${slug}`);
+        spinner.start(`Deleting page and related data...`);
+      }
       await repo.deletePage(slug);
+      if (!jsonOut) {
+        spinner.succeed(`Page deleted: ${slug}`);
+      }
       print(program, { ok: true, action: "delete", slug });
     });
   });
@@ -447,7 +518,7 @@ Examples:
         const limit = Number(opts.limit ?? 10);
         const hits = await repo.query(question, limit);
-        // If --llm flag, generate answer based on context
+        // If --llm flag, generate answer based on multi-layer context
         if (opts.llm) {
           const settings = await loadSettings();
           if (!settings.llm.baseURL) {
@@ -458,35 +529,48 @@ Examples:
           const progress = createProgress();
           progress.start("Searching knowledge base...");
-          // Use excerpts from hits as context (avoids extra DB queries that cause segfault)
           const contextLimit = Number(opts.contextLimit ?? 5);
           const topHits = hits.slice(0, contextLimit);
-          // Build context from search results
-          const contextPages = topHits.map(hit => ({
-            slug: hit.slug,
-            title: hit.title,
-            excerpt: hit.excerpt || "",
-          }));
+          if (topHits.length === 0) {
+            progress.stop();
+            process.stderr.write("No relevant pages found.\n");
+            print(program, { answer: "No relevant information found in the knowledge base.", sources: [] });
+            return;
+          }
+          // Collect multi-layer context (primary + raw data + linked pages scored by relevance)
+          progress.update(`Loading pages, raw documents, and linked content...`);
+          // ~100KB char budget ≈ 25K tokens, safe for most models
+          const MAX_CONTEXT_CHARS = 100_000;
+          const { sections, totalChars, stats } = await collectContextForLLM(repo, topHits, question, MAX_CONTEXT_CHARS);
+          if (sections.length === 0) {
+            progress.stop();
+            process.stderr.write("No content could be loaded.\n");
+            print(program, { answer: "Failed to load page content.", sources: [] });
+            return;
+          }
-          progress.update("Generating answer...");
+          progress.update(`Generating answer from ${stats.primaryPages} page(s), ${stats.rawDocs} raw doc(s), ${stats.linkedPages} linked page(s)...`);
           const startTime = Date.now();
-          const answer = await generateAnswerFromExcerpts(question, contextPages, settings.llm);
+          const answer = await generateAnswerWithContext(question, sections, stats, settings.llm);
           const duration = formatDuration(Date.now() - startTime);
-          progress.succeed(`Answer generated (${duration})`);
+          progress.succeed(`Answer generated (${duration}, context: ${(totalChars / 1024).toFixed(1)}KB)`);
-          // Output markdown
+          // Output answer as markdown
           console.log("\n" + answer);
-          // Show sources
-          if (contextPages.length > 0) {
-            console.log("\n---\n**Sources:**\n");
-            contextPages.forEach((p, i) => {
-              console.log(`${i + 1}. [[${p.slug}|${p.title}]]`);
-            });
+          // Show sources breakdown
+          console.log("\n---\n**Sources:**\n");
+          for (let i = 0; i < sections.length; i++) {
+            const s = sections[i];
+            const icon = s.type === 'primary' ? '📄' : s.type === 'raw_data' ? '📎' : '🔗';
+            console.log(`${icon} ${i + 1}. [[${s.slug}|${s.title}]] — ${s.label} (${(s.content.length / 1024).toFixed(1)}KB)`);
           }
+          console.log(`\n*Context: ${stats.primaryPages} page(s), ${stats.rawDocs} raw doc(s), ${stats.linkedPages} linked page(s)*`);
         } else {
           print(program, hits);
         }
@@ -887,18 +971,21 @@ Examples:
       .command("import")
       .argument("<dir>", "directory of markdown files")
       .description("import a directory of markdown files")
+      .option("--skip-index", "skip vector indexing (useful if seekdb crashes)")
       .addHelpText(
         "after",
         `
 Examples:
   ebrain import ./docs
   ebrain import ./docs --dry-run
+  ebrain import ./docs --skip-index  # skip vector indexing
 `,
       ),
-  ).action(async (dir: string, opts: { dryRun?: boolean }) => {
+  ).action(async (dir: string, opts: { dryRun?: boolean; skipIndex?: boolean }) => {
     await withRepo(program, async (repo) => {
       const root = resolve(dir);
       const files = await collectMarkdownFiles(root);
       if (isDryRun(opts)) {
         print(program, {
           dryRun: true,
@@ -912,11 +999,18 @@ Examples:
       const jsonOut = isJson(program);
       const settings = await loadSettings();
-      const progress = createProgress();
+      const spinner = createSpinner();
       const startTime = Date.now();
+      if (!jsonOut) {
+        header(`Import: ${root}`);
+      }
       // Phase 1: Parse all files and collect data
-      progress.start(`Scanning ${files.length} files...`);
+      if (!jsonOut) {
+        spinner.start(`Scanning ${files.length} files...`);
+      }
       const fileData: Array<{
         file: string;
         slug: string;
@@ -940,33 +1034,64 @@ Examples:
         fileData.push({ file, slug, parsed, content, wikiLinks, timelineEntries, tags });
       }
-      // Phase 2: Write all pages first
-      progress.update(`Writing ${fileData.length} pages...`);
+      if (!jsonOut) {
+        spinner.succeed(`Found ${files.length} markdown files`);
+      }
+      // Phase 2: Write all pages first (skip embed for performance)
+      if (!jsonOut) {
+        spinner.start(`Writing ${fileData.length} pages to database...`);
+      }
+      const allSlugs: string[] = [];
+      const writeErrors: string[] = [];
       for (let i = 0; i < fileData.length; i++) {
         const { slug, parsed } = fileData[i]!;
-        if (!jsonOut && i % 10 === 0) {
-          progress.update(`Writing pages... ${i + 1}/${fileData.length}`);
+        if (!jsonOut && i % 20 === 0) {
+          spinner.update(`Writing pages... ${i + 1}/${fileData.length}`);
+        }
+        try {
+          await repo.putPage({
+            slug,
+            type: String(parsed.frontmatter.type ?? inferTypeFromSlug(slug)),
+            title: String(parsed.frontmatter.title ?? slugToTitle(slug)),
+            compiledTruth: parsed.compiledTruth,
+            timeline: parsed.timeline,
+            frontmatter: parsed.frontmatter,
+          }, true); // skipEmbed: true for performance
+          allSlugs.push(slug);
+        } catch (err) {
+          writeErrors.push(`${slug}: ${err instanceof Error ? err.message : String(err)}`);
+        }
+      }
+      if (!jsonOut) {
+        spinner.succeed(`Wrote ${allSlugs.length} pages to database`);
+        if (writeErrors.length > 0) {
+          warning(`${writeErrors.length} pages failed to write`);
+          for (const e of writeErrors.slice(0, 3)) {
+            subItem(e);
+          }
+          if (writeErrors.length > 3) {
+            subItem(`... and ${writeErrors.length - 3} more`);
+          }
         }
-        await repo.putPage({
-          slug,
-          type: String(parsed.frontmatter.type ?? inferTypeFromSlug(slug)),
-          title: String(parsed.frontmatter.title ?? slugToTitle(slug)),
-          compiledTruth: parsed.compiledTruth,
-          timeline: parsed.timeline,
-          frontmatter: parsed.frontmatter,
-        });
       }
       // Phase 3: Parallel entity extraction (main optimization)
-      progress.update("Extracting entities...");
       const BATCH_SIZE = 10;
       const entityResults = new Map<string, Awaited<ReturnType<typeof extractRelations>>>();
       if (settings.llm.baseURL) {
+        if (!jsonOut) {
+          spinner.start(`Extracting entities with LLM...`);
+        }
         for (let i = 0; i < fileData.length; i += BATCH_SIZE) {
           const batch = fileData.slice(i, i + BATCH_SIZE).filter(d => d.tags.length === 0);
           if (!jsonOut) {
-            progress.update(`Extracting entities... ${Math.min(i + BATCH_SIZE, fileData.length)}/${fileData.length}`);
+            spinner.update(`Extracting entities... ${Math.min(i + BATCH_SIZE, fileData.length)}/${fileData.length}`);
           }
           const batchPromises = batch.map(async ({ slug, content }) => {
             const relations = await extractRelations(content, settings.llm);
@@ -977,13 +1102,34 @@ Examples:
             entityResults.set(slug, relations);
           }
         }
+        if (!jsonOut) {
+          spinner.succeed(`Entity extraction complete`);
+        }
+      } else {
+        if (!jsonOut) {
+          warning(`LLM not configured, skipping entity extraction`);
+        }
       }
       // Phase 4: Write links, tags, timeline, and entity pages
-      progress.update("Creating links and timeline...");
+      if (!jsonOut) {
+        spinner.start(`Creating links, tags, and timeline entries...`);
+      }
       let linkCount = 0;
       let timelineCount = 0;
       let entityCount = 0;
+      let tagCount = 0;
+      // Collect timeline entries for batch insert
+      const allTimelineEntries: Array<{
+        pageSlug: string;
+        date: string;
+        source: string;
+        summary: string;
+        detail: string;
+      }> = [];
       for (const { slug, wikiLinks, timelineEntries, tags, content } of fileData) {
         // Wiki links
@@ -992,9 +1138,9 @@ Examples:
           linkCount++;
         }
-        // Timeline entries
+        // Collect timeline entries for batch insert
         for (const entry of timelineEntries) {
-          await repo.timelineAdd({
+          allTimelineEntries.push({
             pageSlug: slug,
             date: entry.date,
             source: entry.source,
@@ -1007,6 +1153,7 @@ Examples:
         // Tags
         for (const tag of tags) {
           await repo.tag(slug, tag);
+          tagCount++;
         }
         // Entity links from parallel extraction
@@ -1032,12 +1179,53 @@ Examples:
         }
       }
+      // Batch insert all timeline entries
+      if (allTimelineEntries.length > 0) {
+        await repo.timelineAddBatch(allTimelineEntries);
+      }
+      if (!jsonOut) {
+        spinner.succeed(`Created links, tags, and timeline`);
+      }
+      // Phase 5: Batch sync all pages to search index
+      if (opts.skipIndex) {
+        if (!jsonOut) {
+          info(`Skipping vector indexing (--skip-index)`);
+        }
+      } else {
+        if (!jsonOut) {
+          spinner.start(`Indexing ${allSlugs.length} pages for search...`);
+        }
+        await repo.embedAll();
+        if (!jsonOut) {
+          spinner.succeed(`Search indexing complete`);
+        }
+      }
       const duration = formatDuration(Date.now() - startTime);
-      progress.succeed(`${files.length} files imported, ${entityCount} entities, ${linkCount} links (${duration})`);
+      if (!jsonOut) {
+        // Print summary
+        header("Import Summary");
+        keyValue("Files imported", String(files.length));
+        keyValue("Pages created", String(allSlugs.length));
+        keyValue("Entities extracted", String(entityCount));
+        keyValue("Links created", String(linkCount));
+        keyValue("Timeline entries", String(timelineCount));
+        keyValue("Tags added", String(tagCount));
+        keyValue("Duration", duration);
+        if (writeErrors.length > 0) {
+          warning(`${writeErrors.length} pages had errors`);
+        }
+      }
       print(program, {
+        ok: true,
         importedFiles: files.length,
-        pages: fileData.length,
+        pages: allSlugs.length,
         links: linkCount,
         timelineEntries: timelineCount,
         entities: entityCount,
@@ -1138,6 +1326,15 @@ Examples:
       }
       await withRepo(program, async (repo) => {
+        const jsonOut = isJson(program);
+        const spinner = createSpinner();
+        const startTime = Date.now();
+        if (!jsonOut) {
+          header(`Ingest: ${fileName}`);
+          spinner.start(`Creating page from file...`);
+        }
         await repo.putPage({
           slug,
           type,
@@ -1149,6 +1346,14 @@ Examples:
             sourceType: type,
           },
         });
+        if (!jsonOut) {
+          spinner.succeed(`Page created: ${slug}`);
+          keyValue("Source file", fileName);
+          keyValue("Type", type);
+          keyValue("Content length", `${content.length} chars`);
+        }
         await repo.timelineAdd({
           pageSlug: slug,
           date: new Date().toISOString().slice(0, 10),
@@ -1156,12 +1361,19 @@ Examples:
           summary: `Ingested file ${fileName}`,
           detail: "",
         });
         await applyEntityLinks(
           repo,
           slug,
           content,
-          isJson(program),
+          jsonOut,
         );
+        if (!jsonOut) {
+          const duration = formatDuration(Date.now() - startTime);
+          success(`Ingestion completed in ${duration}`);
+        }
         print(program, { ok: true, action: "ingest", slug });
       });
     },
@@ -1204,13 +1416,28 @@ Examples:
         }
         await withRepo(program, async (repo) => {
           const jsonOut = isJson(program);
+          const spinner = createSpinner();
+          const startTime = Date.now();
+          if (!jsonOut) {
+            header("Embed All Pages");
+            spinner.start(`Loading pages...`);
+          }
           const pages = await repo.listPages({ limit: 100000 });
-          let count = 0;
-          for (const page of pages) {
-            count += 1;
-            progress("embed " + page.slug, count, pages.length, jsonOut);
-            await repo.syncPageToSearch(page.slug);
+          if (!jsonOut) {
+            spinner.update(`Embedding ${pages.length} pages...`);
+          }
+          const count = await repo.embedAll();
+          if (!jsonOut) {
+            const duration = formatDuration(Date.now() - startTime);
+            spinner.succeed(`Embedded ${count} pages`);
+            keyValue("Duration", duration);
           }
           print(program, { embedded: count, mode: "all" });
         });
         return;
@@ -1223,7 +1450,20 @@ Examples:
         return;
       }
       await withRepo(program, async (repo) => {
+        const jsonOut = isJson(program);
+        const spinner = createSpinner();
+        if (!jsonOut) {
+          header(`Embed: ${slug}`);
+          spinner.start(`Generating embedding for page...`);
+        }
         await repo.syncPageToSearch(slug);
+        if (!jsonOut) {
+          spinner.succeed(`Page embedded: ${slug}`);
+        }
         print(program, { embedded: 1, slug });
       });
     },
@@ -1243,10 +1483,15 @@ Examples:
     )
     .action(async () => {
       await withRepo(program, async () => {
+        const settings = await loadSettings();
+        const dbPath = program.opts().db ?? settings.dbPath;
+        success(`Database initialized`);
+        keyValue("Path", dbPath);
         print(program, {
           ok: true,
-          dbPath:
-            program.opts().db ?? (await loadSettings()).dbPath,
+          dbPath,
         });
       });
     });
@@ -1264,7 +1509,19 @@ Examples:
     )
     .action(async () => {
       await withRepo(program, async (repo) => {
-        print(program, await repo.stats());
+        const jsonOut = isJson(program);
+        const stats = await repo.stats();
+        if (!jsonOut) {
+          header("Knowledge Base Statistics");
+          keyValue("Pages", String(stats.pages));
+          keyValue("Links", String(stats.links));
+          keyValue("Tags", String(stats.tags));
+          keyValue("Timeline entries", String(stats.timelineEntries));
+          keyValue("Raw data rows", String(stats.rawRows));
+        }
+        print(program, stats);
       });
     });
@@ -1324,7 +1581,20 @@ async function withRepo(
   const db = await BrainDb.connect(dbPath, settings);
   const repo = new BrainRepository(db);
   await callback(repo);
-  // CLI 短生命周期应用：强制退出绕过 seekdb native 模块的 cleanup bug
+  // Gracefully close database
+  // Note: seekdb SDK's InternalEmbeddedClient.close() is empty in embedded mode
+  // Data may not flush properly. Use remote seekdb server for reliability.
+  try {
+    await db.close();
+  } catch (e) {
+    // Close may fail due to seekdb native bug
+  }
+  // Give seekdb extra time after close
+  await new Promise((r) => setTimeout(r, 500));
+  // CLI: force exit to bypass seekdb native cleanup segfault
   process.exit(0);
 }
@@ -1366,18 +1636,249 @@ function normalizeLinkSlug(path: string): string {
 }
 // ---------------------------------------------------------------------------
-// LLM Answer Generation
+// LLM Answer Generation — Multi-layer Context Collection
 // ---------------------------------------------------------------------------
-interface ContextPage {
+/** A single section of context for the LLM prompt. */
+interface ContextSection {
+  type: 'primary' | 'raw_data' | 'linked';
   slug: string;
   title: string;
-  excerpt: string;
+  content: string;
+  /** Human-readable label like "原始文档 (crm)" or "关联页面: projects/alpha". */
+  label: string;
+}
+/**
+ * Collect multi-layer context for LLM answer generation.
+ *
+ * Layers (in priority order):
+ * 1. Primary: compiledTruth + timeline of each hit page
+ * 2. Raw data: original documents stored via raw.set
+ * 3. Linked pages: compiledTruth of pages linked to/from hit pages
+ *
+ * Budget is enforced via total character limit.
+ */
+async function collectContextForLLM(
+  repo: BrainRepository,
+  hits: Array<{ slug: string; title: string; score: number }>,
+  question: string,
+  maxChars: number,
+): Promise<{ sections: ContextSection[]; totalChars: number; stats: ContextStats }> {
+  const sections: ContextSection[] = [];
+  let totalChars = 0;
+  const stats: ContextStats = {
+    primaryPages: 0,
+    rawDocs: 0,
+    linkedPages: 0,
+    skippedChars: 0,
+  };
+  const seenSlugs = new Set<string>();
+  function addSection(section: ContextSection): boolean {
+    if (seenSlugs.has(`${section.type}:${section.slug}:${section.label}`)) {
+      return false;
+    }
+    const budget = maxChars - totalChars;
+    if (section.content.length > budget && sections.length > 0) {
+      // Truncate to fit budget
+      section.content = section.content.slice(0, budget - 20) + '\n...[truncated]';
+      stats.skippedChars += section.content.length - budget;
+    }
+    if (section.content.length > 0) {
+      sections.push(section);
+      totalChars += section.content.length;
+      seenSlugs.add(`${section.type}:${section.slug}:${section.label}`);
+      return true;
+    }
+    return false;
+  }
+  // Layer 1: Primary pages (compiledTruth + timeline)
+  for (const hit of hits) {
+    const page = await repo.getPage(hit.slug);
+    if (!page) continue;
+    const parts: string[] = [];
+    if (page.compiledTruth?.trim()) {
+      parts.push(page.compiledTruth.trim());
+    }
+    const tl = page.timeline?.trim();
+    if (tl) {
+      parts.push(`## 时间线\n${tl}`);
+    }
+    if (parts.length > 0) {
+      addSection({
+        type: 'primary',
+        slug: page.slug,
+        title: page.title,
+        content: parts.join('\n\n'),
+        label: `页面正文`,
+      });
+      stats.primaryPages++;
+    }
+  }
+  // Layer 2: Raw data (original documents)
+  for (const hit of hits) {
+    try {
+      const rawRows = await repo.readRaw(hit.slug) as Array<{ source: string; data: unknown; fetchedAt?: string }>;
+      for (const row of rawRows) {
+        let rawContent = '';
+        if (typeof row.data === 'string') {
+          rawContent = row.data;
+        } else if (typeof row.data === 'object' && row.data !== null) {
+          rawContent = JSON.stringify(row.data, null, 2);
+        }
+        if (rawContent.trim()) {
+          addSection({
+            type: 'raw_data',
+            slug: hit.slug,
+            title: hit.title,
+            content: rawContent,
+            label: `原始文档 (${row.source})`,
+          });
+          stats.rawDocs++;
+        }
+      }
+    } catch {
+      // Raw data fetch failure is non-fatal
+    }
+  }
+  // Layer 3: Linked pages — SEMANTICALLY SCORED against the question
+  // Only include linked pages that are actually relevant to what the user asked.
+  const allLinkedSlugs = new Set<string>();
+  for (const hit of hits) {
+    try {
+      const outLinks = await repo.outgoingLinks(hit.slug);
+      outLinks.forEach(l => allLinkedSlugs.add(l.slug));
+    } catch { /* ignore */ }
+    try {
+      const backlinkSlugs = await repo.backlinks(hit.slug);
+      backlinkSlugs.forEach(s => allLinkedSlugs.add(s));
+    } catch { /* ignore */ }
+  }
+  if (allLinkedSlugs.size > 0) {
+    // Score linked pages using broad semantic search.
+    // Query a wide set of pages, then intersect with linked slugs.
+    const broadLimit = Math.min(200, Math.max(50, allLinkedSlugs.size));
+    const broadResults = await repo.query(question, broadLimit);
+    const semanticScoreMap = new Map(broadResults.map(h => [h.slug, h.score]));
+    // Keyword-based fallback scoring for linked pages without embedding scores
+    const keywordScores = new Map<string, number>();
+    for (const linkedSlug of allLinkedSlugs) {
+      if (semanticScoreMap.has(linkedSlug)) continue;
+      try {
+        const page = await repo.getPage(linkedSlug);
+        if (page) {
+          const text = `${page.title} ${page.compiledTruth}`.slice(0, 2000);
+          keywordScores.set(linkedSlug, computeKeywordRelevance(text, question));
+        }
+      } catch { /* ignore */ }
+    }
+    // Combine scores: semantic first, then keyword fallback
+    const scoredLinked = [...allLinkedSlugs].map(slug => ({
+      slug,
+      score: semanticScoreMap.get(slug) ?? keywordScores.get(slug) ?? 0,
+    }));
+    // Filter: only include linked pages with meaningful relevance
+    const MIN_LINKED_SCORE = 0.02;
+    const relevantLinked = scoredLinked
+      .filter(s => s.score >= MIN_LINKED_SCORE)
+      .sort((a, b) => b.score - a.score);
+    // Fetch content for relevant linked pages (respecting budget)
+    for (const linked of relevantLinked) {
+      if (totalChars >= maxChars) break;
+      const linkedPage = await repo.getPage(linked.slug);
+      if (!linkedPage || !linkedPage.compiledTruth?.trim()) continue;
+      const remaining = maxChars - totalChars;
+      let content = linkedPage.compiledTruth.trim();
+      if (content.length > remaining - 100) {
+        content = content.slice(0, remaining - 100) + '\n...[truncated]';
+      }
+      addSection({
+        type: 'linked',
+        slug: linkedPage.slug,
+        title: linkedPage.title,
+        content,
+        label: `关联页面: ${linkedPage.slug} (相关度: ${(linked.score * 100).toFixed(1)}%)`,
+      });
+      stats.linkedPages++;
+      // Also fetch raw data for highly relevant linked pages
+      if (linked.score > 0.1) {
+        try {
+          const rawRows = await repo.readRaw(linked.slug) as Array<{ source: string; data: unknown }>;
+          for (const row of rawRows) {
+            let rawContent = typeof row.data === 'string' ? row.data : JSON.stringify(row.data);
+            if (rawContent.trim().length > 100) {
+              const remaining2 = maxChars - totalChars;
+              if (rawContent.length > remaining2 - 100) {
+                rawContent = rawContent.slice(0, remaining2 - 100) + '\n...[truncated]';
+              }
+              addSection({
+                type: 'raw_data',
+                slug: linked.slug,
+                title: linkedPage.title,
+                content: rawContent,
+                label: `原始文档 (关联: ${row.source})`,
+              });
+              stats.rawDocs++;
+            }
+          }
+        } catch { /* ignore */ }
+      }
+    }
+  }
+  return { sections, totalChars, stats };
+}
+/**
+ * Simple keyword-based relevance scoring (fallback for pages without embeddings).
+ * Computes the fraction of unique meaningful characters from the question
+ * that appear in the text.
+ */
+function computeKeywordRelevance(text: string, question: string): number {
+  const STOP_CHARS = new Set('的是了在和我有你就这不人都说上个大国为到以们年会生地要主中子自实家小对多能好可很所把当');
+  const questionChars = [...question]
+    .filter(c => !/\s|[,，。！？、；:：""''（）()【】\[\]{}<>\/\\|~`@#$%^&*+=_-]/.test(c) && !STOP_CHARS.has(c));
+  if (questionChars.length === 0) return 0;
+  const uniqueChars = new Set(questionChars);
+  const lower = text.toLowerCase();
+  let matched = 0;
+  for (const char of uniqueChars) {
+    if (lower.includes(char.toLowerCase())) matched++;
+  }
+  return matched / uniqueChars.size;
 }
-async function generateAnswerFromExcerpts(
+interface ContextStats {
+  primaryPages: number;
+  rawDocs: number;
+  linkedPages: number;
+  skippedChars: number;
+}
+/**
+ * Build LLM prompt from collected context sections and generate answer.
+ */
+async function generateAnswerWithContext(
   question: string,
-  pages: ContextPage[],
+  sections: ContextSection[],
+  stats: ContextStats,
   llm: ResolvedLLM,
 ): Promise<string> {
   const apiKey = llm.apiKey || process.env[llm.apiKeyEnv] || "";
@@ -1385,29 +1886,54 @@ async function generateAnswerFromExcerpts(
     return "Error: LLM API key not configured.";
   }
-  // Build context from page excerpts
-  const context = pages
-    .map((p, i) => {
-      return `## Source ${i + 1}: ${p.title}\n**Slug:** ${p.slug}\n\n${p.excerpt}`;
-    })
-    .join("\n\n---\n\n");
+  if (sections.length === 0) {
+    return "知识库中没有找到相关内容。";
+  }
-  const prompt = `You are answering a question based on the provided knowledge base context.
+  // Build context sections with clear labels
+  const contextParts: string[] = [];
+  let sectionIndex = 0;
+  // Group by type for cleaner output
+  const primarySections = sections.filter(s => s.type === 'primary');
+  const rawSections = sections.filter(s => s.type === 'raw_data');
+  const linkedSections = sections.filter(s => s.type === 'linked');
+  function renderSections(group: ContextSection[], header: string) {
+    if (group.length === 0) return;
+    contextParts.push(`## ${header}\n`);
+    for (const s of group) {
+      sectionIndex++;
+      contextParts.push(`### [${sectionIndex}] ${s.title} — ${s.label}\n**Slug:** ${s.slug}\n\n${s.content}\n`);
+    }
+    contextParts.push('');
+  }
+  renderSections(primarySections, '页面正文');
+  renderSections(rawSections, '原始文档');
+  renderSections(linkedSections, '关联页面');
+  const context = contextParts.join('\n');
-## Question
+  const prompt = `你是一个知识库助手，请根据提供的知识库内容回答问题。
+## 问题
 ${question}
-## Context from Knowledge Base
-${context || "(No relevant pages found)"}
+## 知识库内容
+${context}
-## Instructions
-- Answer the question based ONLY on the provided context
-- If the context doesn't contain enough information, say so
-- Cite sources using markdown links like [Title](slug) when referencing specific information
-- Format your answer in clean markdown
-- Be concise but comprehensive
+## 回答要求
+- 仅基于提供的知识库内容回答，不要编造信息
+- 如果知识库中没有相关信息，请明确说明
+- 引用来源时使用 [[slug|标题]] 的格式
+- 使用清晰的 markdown 格式
+- 如果涉及时间线信息，请在回答中体现
+- 区分哪些信息来自「页面正文」、哪些来自「原始文档」、哪些来自「关联页面」
+- 语言与提问保持一致（中文提问用中文回答，英文提问用英文回答）
-## Answer`;
+## 回答`;
   try {
     const resp = await fetch(
@@ -1423,12 +1949,12 @@ ${context || "(No relevant pages found)"}
           messages: [
             {
               role: "system",
-              content: "You are a helpful assistant that answers questions based on a knowledge base. Always cite your sources.",
+              content: "你是一个专业的知识库助手，基于提供的知识库内容准确回答问题。引用来源时使用 [[slug|标题]] 格式。回答要条理清晰，区分信息来源。",
             },
             { role: "user", content: prompt },
           ],
           temperature: 0.3,
-          max_tokens: 2048,
+          max_tokens: 4096,
         }),
       },
     );