npm - ex-brain - Versions diffs - 0.2.3 → 0.2.5 - Mend

ex-brain 0.2.3 → 0.2.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

package/README.md +1 -1
package/package.json +2 -1
package/src/ai/ax-adapter.ts +80 -0
package/src/ai/compiler.ts +148 -428
package/src/ai/entity-link.ts +102 -109
package/src/ai/timeline-extractor.ts +149 -306
package/src/commands/index.ts +207 -23
package/src/ai/llm-client.ts +0 -291

package/src/commands/index.ts CHANGED Viewed

@@ -544,10 +544,14 @@ Examples:
           }
           // Collect multi-layer context (primary + raw data + linked pages scored by relevance)
-          progress.update(`Loading pages, raw documents, and linked content...`);
           // ~100KB char budget ≈ 25K tokens, safe for most models
           const MAX_CONTEXT_CHARS = 100_000;
-          const { sections, totalChars, stats } = await collectContextForLLM(repo, topHits, question, MAX_CONTEXT_CHARS);
+          const ctxStart = Date.now();
+          progress.update(`Loading page content...`);
+          const { sections, totalChars, stats } = await collectContextForLLM(repo, topHits, question, MAX_CONTEXT_CHARS, (stage) => {
+            progress.update(`Loading ${stage}...`);
+          });
+          const ctxDuration = formatDuration(Date.now() - ctxStart);
           if (sections.length === 0) {
             progress.stop();
@@ -556,16 +560,18 @@ Examples:
             return;
           }
-          progress.update(`Generating answer from ${stats.primaryPages} page(s), ${stats.rawDocs} raw doc(s), ${stats.linkedPages} linked page(s)...`);
+          progress.succeed(`Loaded ${stats.primaryPages} page(s), ${stats.rawDocs} raw doc(s), ${stats.linkedPages} linked page(s) (${ctxDuration})`);
           const startTime = Date.now();
-          const answer = await generateAnswerWithContext(question, sections, stats, settings.llm);
+          const { answer, ok } = await generateAnswerWithStream(question, sections, stats, settings.llm);
-          const duration = formatDuration(Date.now() - startTime);
-          progress.succeed(`Answer generated (${duration}, context: ${(totalChars / 1024).toFixed(1)}KB)`);
+          if (!ok) {
+            // If streaming failed, answer contains the error message
+            console.log(answer);
+            return;
+          }
-          // Output answer as markdown
-          console.log("\n" + answer);
+          const duration = formatDuration(Date.now() - startTime);
           // Show sources breakdown
           console.log("\n---\n**Sources:**\n");
@@ -1093,7 +1099,7 @@ Examples:
         }
         for (let i = 0; i < fileData.length; i += BATCH_SIZE) {
-          const batch = fileData.slice(i, i + BATCH_SIZE).filter(d => d.tags.length === 0);
+          const batch = fileData.slice(i, i + BATCH_SIZE);
           if (!jsonOut) {
             spinner.update(`Extracting entities... ${Math.min(i + BATCH_SIZE, fileData.length)}/${fileData.length}`);
           }
@@ -1668,6 +1674,7 @@ async function collectContextForLLM(
   hits: Array<{ slug: string; title: string; score: number }>,
   question: string,
   maxChars: number,
+  onProgress?: (stage: string) => void,
 ): Promise<{ sections: ContextSection[]; totalChars: number; stats: ContextStats }> {
   const sections: ContextSection[] = [];
   let totalChars = 0;
@@ -1699,10 +1706,15 @@ async function collectContextForLLM(
     return false;
   }
+  // Cache pages fetched in Layer 1 to avoid redundant DB calls in Layer 3
+  const pageCache = new Map<string, NonNullable<Awaited<ReturnType<typeof repo.getPage>>>>();
   // Layer 1: Primary pages (compiledTruth + timeline)
+  onProgress?.('page content');
   for (const hit of hits) {
     const page = await repo.getPage(hit.slug);
     if (!page) continue;
+    pageCache.set(hit.slug, page);
     const parts: string[] = [];
     if (page.compiledTruth?.trim()) {
@@ -1726,6 +1738,7 @@ async function collectContextForLLM(
   }
   // Layer 2: Raw data (original documents)
+  onProgress?.('raw documents');
   for (const hit of hits) {
     try {
       const rawRows = await repo.readRaw(hit.slug) as Array<{ source: string; data: unknown; fetchedAt?: string }>;
@@ -1752,8 +1765,9 @@ async function collectContextForLLM(
     }
   }
-  // Layer 3: Linked pages — SEMANTICALLY SCORED against the question
-  // Only include linked pages that are actually relevant to what the user asked.
+  // Layer 3: Linked pages — score using cached data + keyword matching
+  // No second repo.query() call needed — reuse hits scores + keyword fallback
+  onProgress?.('linked pages');
   const allLinkedSlugs = new Set<string>();
   for (const hit of hits) {
     try {
@@ -1767,26 +1781,27 @@ async function collectContextForLLM(
   }
   if (allLinkedSlugs.size > 0) {
-    // Score linked pages using broad semantic search.
-    // Query a wide set of pages, then intersect with linked slugs.
-    const broadLimit = Math.min(200, Math.max(50, allLinkedSlugs.size));
-    const broadResults = await repo.query(question, broadLimit);
-    const semanticScoreMap = new Map(broadResults.map(h => [h.slug, h.score]));
-    // Keyword-based fallback scoring for linked pages without embedding scores
+    // Score: use semantic scores from initial hits (already cached), keyword for rest
+    const semanticScoreMap = new Map(hits.map(h => [h.slug, h.score]));
     const keywordScores = new Map<string, number>();
     for (const linkedSlug of allLinkedSlugs) {
       if (semanticScoreMap.has(linkedSlug)) continue;
-      try {
+      // Use cached page if available, only fetch if not in cache
+      const cached = pageCache.get(linkedSlug);
+      if (cached) {
+        const text = `${cached.title} ${cached.compiledTruth}`.slice(0, 2000);
+        keywordScores.set(linkedSlug, computeKeywordRelevance(text, question));
+      } else {
         const page = await repo.getPage(linkedSlug);
         if (page) {
+          pageCache.set(linkedSlug, page);
           const text = `${page.title} ${page.compiledTruth}`.slice(0, 2000);
           keywordScores.set(linkedSlug, computeKeywordRelevance(text, question));
         }
-      } catch { /* ignore */ }
+      }
     }
-    // Combine scores: semantic first, then keyword fallback
+    // Combine scores
     const scoredLinked = [...allLinkedSlugs].map(slug => ({
       slug,
       score: semanticScoreMap.get(slug) ?? keywordScores.get(slug) ?? 0,
@@ -1798,11 +1813,11 @@ async function collectContextForLLM(
       .filter(s => s.score >= MIN_LINKED_SCORE)
       .sort((a, b) => b.score - a.score);
-    // Fetch content for relevant linked pages (respecting budget)
+    // Add linked pages (already cached in pageCache, no extra fetch needed)
     for (const linked of relevantLinked) {
       if (totalChars >= maxChars) break;
-      const linkedPage = await repo.getPage(linked.slug);
+      const linkedPage = pageCache.get(linked.slug);
       if (!linkedPage || !linkedPage.compiledTruth?.trim()) continue;
       const remaining = maxChars - totalChars;
@@ -1879,6 +1894,175 @@ interface ContextStats {
 /**
  * Build LLM prompt from collected context sections and generate answer.
  */
+async function generateAnswerWithStream(
+  question: string,
+  sections: ContextSection[],
+  stats: ContextStats,
+  llm: ResolvedLLM,
+): Promise<{ answer: string; ok: boolean }> {
+  const apiKey = llm.apiKey || process.env[llm.apiKeyEnv] || "";
+  if (!apiKey) {
+    return { answer: "Error: LLM API key not configured.", ok: false };
+  }
+  if (sections.length === 0) {
+    return { answer: "知识库中没有找到相关内容。", ok: true };
+  }
+  // Build context sections with clear labels
+  const contextParts: string[] = [];
+  let sectionIndex = 0;
+  // Group by type for cleaner output
+  const primarySections = sections.filter(s => s.type === 'primary');
+  const rawSections = sections.filter(s => s.type === 'raw_data');
+  const linkedSections = sections.filter(s => s.type === 'linked');
+  function renderSections(group: ContextSection[], header: string) {
+    if (group.length === 0) return;
+    contextParts.push(`## ${header}\n`);
+    for (const s of group) {
+      sectionIndex++;
+      contextParts.push(`### [${sectionIndex}] ${s.title} — ${s.label}\n**Slug:** ${s.slug}\n\n${s.content}\n`);
+    }
+    contextParts.push('');
+  }
+  renderSections(primarySections, '页面正文');
+  renderSections(rawSections, '原始文档');
+  renderSections(linkedSections, '关联页面');
+  const context = contextParts.join('\n');
+  const prompt = `你是一个知识库助手，请根据提供的知识库内容回答问题。
+## 问题
+${question}
+## 知识库内容
+${context}
+## 回答要求
+- 仅基于提供的知识库内容回答，不要编造信息
+- 如果知识库中没有相关信息，请明确说明
+- 引用来源时使用 [[slug|标题]] 的格式
+- 使用清晰的 markdown 格式
+- 如果涉及时间线信息，请在回答中体现
+- 区分哪些信息来自「页面正文」、哪些来自「原始文档」、哪些来自「关联页面」
+- 语言与提问保持一致（中文提问用中文回答，英文提问用英文回答）
+## 回答`;
+  // Disable thinking/reasoning mode to reduce latency
+  const disableThinking: Record<string, unknown> = {};
+  // OpenAI/compatible: extra_body for thinking disable
+  // DeepSeek: use extra_body to disable thinking
+  // Many providers ignore unknown fields, so this is safe to always include
+  const extraBody: Record<string, unknown> = {
+    thinking: { type: "disabled" },
+  };
+  try {
+    const url = llm.baseURL.endsWith("/") ? llm.baseURL + "chat/completions" : llm.baseURL + "/chat/completions";
+    // Show thinking indicator while waiting for first token
+    process.stderr.write(`\x1b[35m💭\x1b[0m \x1b[2mConnecting to ${llm.model}...\x1b[0m\n`);
+    const resp = await fetch(
+      url,
+      {
+        method: "POST",
+        headers: {
+          "Content-Type": "application/json",
+          Authorization: `Bearer ${apiKey}`,
+        },
+        body: JSON.stringify({
+          model: llm.model,
+          stream: true,
+          messages: [
+            {
+              role: "system",
+              content: "你是一个专业的知识库助手，基于提供的知识库内容准确回答问题。引用来源时使用 [[slug|标题]] 格式。回答要条理清晰，区分信息来源。",
+            },
+            { role: "user", content: prompt },
+          ],
+          temperature: 0.3,
+          max_tokens: 4096,
+          ...disableThinking,
+          extra_body: extraBody,
+          // Also send thinking disable as top-level for providers that support it
+          thinking: { type: "disabled" },
+        }),
+        // Abort if no response within 30s
+        signal: AbortSignal.timeout(30_000),
+      },
+    );
+    if (!resp.ok) {
+      const text = await resp.text();
+      // Clear the thinking indicator line
+      process.stderr.write("\r\x1b[K");
+      return { answer: `Error: LLM API failed (${resp.status}): ${text.slice(0, 200)}`, ok: false };
+    }
+    if (!resp.body) {
+      process.stderr.write("\r\x1b[K");
+      return { answer: "Error: No response body from LLM API.", ok: false };
+    }
+    // Clear thinking indicator, show streaming status
+    process.stderr.write("\r\x1b[K");
+    process.stderr.write(`\x1b[32m✦\x1b[0m \x1b[2mStreaming response...\x1b[0m\n`);
+    // Stream the response
+    const reader = resp.body.getReader();
+    const decoder = new TextDecoder();
+    let fullAnswer = "";
+    let buffer = "";
+    let tokenCount = 0;
+    while (true) {
+      const { done, value } = await reader.read();
+      if (done) break;
+      buffer += decoder.decode(value, { stream: true });
+      const lines = buffer.split("\n");
+      // Keep the last incomplete line in buffer
+      buffer = lines.pop() || "";
+      for (const line of lines) {
+        const trimmed = line.trim();
+        if (!trimmed || trimmed === "data: [DONE]") continue;
+        if (!trimmed.startsWith("data: ")) continue;
+        try {
+          const json = JSON.parse(trimmed.slice(6));
+          const content = json.choices?.[0]?.delta?.content;
+          if (content) {
+            process.stdout.write(content);
+            fullAnswer += content;
+            tokenCount++;
+          }
+        } catch {
+          // Skip malformed SSE data
+        }
+      }
+    }
+    // Add a newline after streaming completes
+    process.stdout.write("\n");
+    return { answer: fullAnswer || "(No answer generated)", ok: true };
+  } catch (error) {
+    const msg = error instanceof Error ? error.message : String(error);
+    return { answer: `Error: ${msg}`, ok: false };
+  }
+}
+/**
+ * @deprecated Use generateAnswerWithStream instead
+ */
 async function generateAnswerWithContext(
   question: string,
   sections: ContextSection[],

package/src/ai/llm-client.ts DELETED Viewed

@@ -1,291 +0,0 @@
-/**
- * Unified LLM Client Module
- *
- * Provides centralized LLM calling functionality with:
- * - Retry mechanism (exponential backoff, max 3 retries)
- * - Error classification (APIError, TimeoutError, RateLimitError)
- * - Timeout control
- * - Unified API key resolution
- */
-import type { ResolvedLLM } from "../settings";
-// ---------------------------------------------------------------------------
-// Error Classes
-// ---------------------------------------------------------------------------
-export class LLMError extends Error {
-  constructor(
-    message: string,
-    public readonly code: string,
-    public readonly statusCode?: number,
-    public readonly retryable: boolean = false,
-  ) {
-    super(message);
-    this.name = "LLMError";
-  }
-}
-export class APIError extends LLMError {
-  constructor(message: string, statusCode?: number) {
-    super(message, "API_ERROR", statusCode, false);
-    this.name = "APIError";
-  }
-}
-export class TimeoutError extends LLMError {
-  constructor(message: string = "LLM request timed out") {
-    super(message, "TIMEOUT_ERROR", undefined, true);
-    this.name = "TimeoutError";
-  }
-}
-export class RateLimitError extends LLMError {
-  constructor(message: string = "Rate limit exceeded", retryAfter?: number) {
-    super(message, "RATE_LIMIT_ERROR", 429, true);
-    this.name = "RateLimitError";
-    this.retryAfter = retryAfter;
-  }
-  readonly retryAfter?: number;
-}
-// ---------------------------------------------------------------------------
-// Configuration
-// ---------------------------------------------------------------------------
-export interface LLMClientConfig {
-  /** Maximum number of retry attempts (default: 3) */
-  maxRetries?: number;
-  /** Base delay for exponential backoff in ms (default: 1000) */
-  baseDelay?: number;
-  /** Maximum delay cap in ms (default: 10000) */
-  maxDelay?: number;
-  /** Request timeout in ms (default: 60000) */
-  timeout?: number;
-}
-const DEFAULT_CONFIG: Required<LLMClientConfig> = {
-  maxRetries: 3,
-  baseDelay: 1000,
-  maxDelay: 10000,
-  timeout: 60000,
-};
-// ---------------------------------------------------------------------------
-// API Key Resolution
-// ---------------------------------------------------------------------------
-/**
- * Resolve API key from LLM configuration.
- * Checks direct apiKey first, then falls back to environment variable.
- */
-export function resolveApiKey(llm: ResolvedLLM): string {
-  if (llm.apiKey) return llm.apiKey;
-  if (llm.apiKeyEnv) return process.env[llm.apiKeyEnv] ?? "";
-  return "";
-}
-/**
- * Check if LLM is properly configured with an API key.
- */
-export function isLLMConfigured(llm: ResolvedLLM): boolean {
-  return !!resolveApiKey(llm);
-}
-// ---------------------------------------------------------------------------
-// LLM Call with Retry
-// ---------------------------------------------------------------------------
-/**
- * Call LLM with unified fetch, retry mechanism, error handling, and timeout.
- *
- * @param llm - Resolved LLM configuration
- * @param prompt - Prompt to send to the LLM
- * @param maxTokens - Maximum tokens in response
- * @param systemPrompt - Optional system prompt (default provided)
- * @param config - Optional client configuration
- * @returns Raw response text from LLM, or empty string on failure
- */
-export async function callLLM(
-  llm: ResolvedLLM,
-  prompt: string,
-  maxTokens: number,
-  systemPrompt: string = "You are a helpful assistant. Always output valid JSON.",
-  config: LLMClientConfig = {},
-): Promise<string> {
-  const apiKey = resolveApiKey(llm);
-  if (!apiKey) {
-    return "";
-  }
-  const cfg = { ...DEFAULT_CONFIG, ...config };
-  const url = llm.baseURL.endsWith("/")
-    ? llm.baseURL + "chat/completions"
-    : llm.baseURL + "/chat/completions";
-  const body = {
-    model: llm.model,
-    messages: [
-      { role: "system", content: systemPrompt },
-      { role: "user", content: prompt },
-    ],
-    temperature: 0.1,
-    max_tokens: maxTokens,
-    enable_thinking: false,
-  };
-  let lastError: LLMError | null = null;
-  for (let attempt = 0; attempt <= cfg.maxRetries; attempt++) {
-    try {
-      const response = await callWithTimeout(
-        fetch(url, {
-          method: "POST",
-          headers: {
-            "Content-Type": "application/json",
-            Authorization: `Bearer ${apiKey}`,
-          },
-          body: JSON.stringify(body),
-        }),
-        cfg.timeout,
-      );
-      if (!response.ok) {
-        const text = await response.text().catch(() => "");
-        lastError = classifyError(response.status, text, response.statusText);
-        // Don't retry for non-retryable errors
-        if (!lastError.retryable || attempt === cfg.maxRetries) {
-          console.warn(`[llm-client] LLM call failed after ${attempt + 1} attempt(s): ${lastError.message}`);
-          return "";
-        }
-        const delay = calculateBackoff(attempt, cfg.baseDelay, cfg.maxDelay, (lastError as RateLimitError).retryAfter);
-        console.warn(`[llm-client] Retrying after ${delay}ms (attempt ${attempt + 1}/${cfg.maxRetries})`);
-        await sleep(delay);
-        continue;
-      }
-      const data = await response.json() as { choices?: Array<{ message?: { content?: string } }> };
-      return data.choices?.[0]?.message?.content?.trim() ?? "";
-    } catch (error) {
-      // Classify the error
-      if (error instanceof TimeoutError) {
-        lastError = error;
-      } else if (error instanceof LLMError) {
-        lastError = error;
-      } else {
-        // Unknown error - wrap it
-        const msg = error instanceof Error ? error.message : String(error);
-        lastError = new APIError(`Unexpected error: ${msg}`);
-      }
-      // Don't retry if we've exhausted attempts
-      if (attempt === cfg.maxRetries) {
-        console.warn(`[llm-client] LLM call failed after ${attempt + 1} attempt(s): ${lastError.message}`);
-        return "";
-      }
-      // Check if error is retryable
-      if (!lastError.retryable) {
-        console.warn(`[llm-client] Non-retryable error: ${lastError.message}`);
-        return "";
-      }
-      const delay = calculateBackoff(attempt, cfg.baseDelay, cfg.maxDelay);
-      console.warn(`[llm-client] Retrying after ${delay}ms (attempt ${attempt + 1}/${cfg.maxRetries}): ${lastError.message}`);
-      await sleep(delay);
-    }
-  }
-  return "";
-}
-/**
- * Classify HTTP error into appropriate error type.
- */
-function classifyError(status: number, responseText: string, statusText: string): LLMError {
-  const truncatedText = responseText.slice(0, 200);
-  switch (status) {
-    case 429:
-      // Try to extract retry-after from response
-      const retryAfterMatch = responseText.match(/retry[- ]?after["']?\s*[:=]\s*(\d+)/i);
-      const retryAfter = retryAfterMatch?.[1] ? parseInt(retryAfterMatch[1], 10) : undefined;
-      return new RateLimitError(`Rate limited: ${statusText} - ${truncatedText}`, retryAfter);
-    case 408:
-    case 504:
-      return new TimeoutError(`Request timeout: ${statusText}`);
-    case 500:
-    case 502:
-    case 503:
-      return new APIError(`Server error (${status}): ${truncatedText}`, status);
-    default:
-      if (status >= 500) {
-        return new APIError(`Server error (${status}): ${truncatedText}`, status);
-      }
-      if (status >= 400) {
-        return new APIError(`Client error (${status}): ${truncatedText}`, status);
-      }
-      return new APIError(`HTTP error (${status}): ${truncatedText}`, status);
-  }
-}
-/**
- * Calculate exponential backoff delay with jitter.
- */
-function calculateBackoff(
-  attempt: number,
-  baseDelay: number,
-  maxDelay: number,
-  retryAfter?: number,
-): number {
-  // If server specified retry-after, use that
-  if (retryAfter && retryAfter > 0) {
-    return Math.min(retryAfter * 1000, maxDelay);
-  }
-  // Exponential backoff: baseDelay * 2^attempt
-  const exponentialDelay = baseDelay * Math.pow(2, attempt);
-  // Add jitter (±25%)
-  const jitter = exponentialDelay * 0.25 * (Math.random() * 2 - 1);
-  return Math.min(Math.round(exponentialDelay + jitter), maxDelay);
-}
-/**
- * Sleep for specified milliseconds.
- */
-function sleep(ms: number): Promise<void> {
-  return new Promise((resolve) => setTimeout(resolve, ms));
-}
-/**
- * Wrap fetch with timeout using Promise.race.
- */
-async function callWithTimeout<T>(promise: Promise<T>, timeoutMs: number): Promise<T> {
-  let timeoutId: NodeJS.Timeout;
-  const timeoutPromise = new Promise<never>((_, reject) => {
-    timeoutId = setTimeout(() => {
-      reject(new TimeoutError(`Request timed out after ${timeoutMs}ms`));
-    }, timeoutMs);
-  });
-  try {
-    return await Promise.race([promise, timeoutPromise]);
-  } finally {
-    clearTimeout(timeoutId!);
-  }
-}
-// ---------------------------------------------------------------------------
-// Re-export settings type for convenience
-// ---------------------------------------------------------------------------
-export type { ResolvedLLM } from "../settings";