npm - autosnippet - Versions diffs - 2.18.0 → 2.19.0 - Mend

autosnippet 2.18.0 → 2.19.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

package/dashboard/dist/assets/{icons-C6kshpB1.js → icons-C7FN32VL.js} +1 -1
package/dashboard/dist/assets/index-D8dCXLzr.js +129 -0
package/dashboard/dist/index.html +2 -2
package/lib/external/ai/AiProvider.js +42 -11
package/lib/external/ai/providers/ClaudeProvider.js +4 -2
package/lib/external/ai/providers/GoogleGeminiProvider.js +66 -8
package/lib/external/ai/providers/OpenAiProvider.js +48 -2
package/lib/external/mcp/handlers/bootstrap.js +1 -2
package/lib/http/HttpServer.js +4 -0
package/lib/http/routes/candidates.js +405 -0
package/lib/http/routes/search.js +113 -0
package/lib/infrastructure/vector/Chunker.js +3 -8
package/lib/infrastructure/vector/JsonVectorAdapter.js +2 -9
package/lib/service/candidate/SimilarityService.js +7 -35
package/lib/service/chat/ChatAgent.js +28 -686
package/lib/service/chat/ContextWindow.js +87 -3
package/lib/service/chat/ConversationStore.js +3 -4
package/lib/service/chat/ProjectSemanticMemory.js +9 -14
package/lib/service/chat/ReasoningLayer.js +10 -54
package/lib/service/chat/ToolRegistry.js +0 -52
package/lib/service/chat/tools.js +7 -6
package/lib/service/cursor/TokenBudget.js +4 -21
package/lib/service/search/CrossEncoderReranker.js +163 -0
package/lib/service/search/RetrievalFunnel.js +9 -36
package/lib/service/skills/SignalCollector.js +28 -28
package/lib/shared/similarity.js +101 -0
package/lib/shared/token-utils.js +46 -0
package/package.json +1 -1
package/dashboard/dist/assets/index-9byoG7kd.js +0 -129

package/lib/service/chat/ContextWindow.js CHANGED Viewed

@@ -22,6 +22,7 @@
  */
 import Logger from '../../infrastructure/logging/Logger.js';
+import { estimateTokensFast } from '../../shared/token-utils.js';
 /**
  * 一组相关消息的原子单元:
@@ -46,6 +47,89 @@ export class ContextWindow {
   /** @type {Object} 日志器 */
   #logger;
+  /**
+   * 模型名 → 上下文窗口大小映射（token 数）。
+   * 键为正则模式，按优先级从上到下匹配。
+   * 值为模型的原始上下文窗口上限。
+   * @type {Array<[RegExp, number]>}
+   */
+  static MODEL_CONTEXT_WINDOWS = [
+    // ── Google Gemini ──
+    [/gemini-3/i,               1_000_000],
+    [/gemini-2\.5/i,            1_000_000],
+    [/gemini-2/i,               1_000_000],
+    [/gemini-1\.5-pro/i,        1_000_000],
+    [/gemini-1\.5-flash/i,      1_000_000],
+    [/gemini-1\.0/i,              32_000],
+    [/gemini/i,                 1_000_000],   // 未知版本回退
+    // ── OpenAI ──
+    [/gpt-4o/i,                   128_000],
+    [/gpt-4-turbo/i,              128_000],
+    [/gpt-4-(?!turbo)/i,           8_192],
+    [/gpt-3\.5-turbo-16k/i,       16_384],
+    [/gpt-3\.5/i,                  4_096],
+    [/o1|o3|o4/i,                200_000],   // OpenAI reasoning models
+    // ── Anthropic ──
+    [/claude-.*sonnet-4/i,        200_000],
+    [/claude-3[\.\-]5/i,         200_000],
+    [/claude-3[\.\-]opus/i,      200_000],
+    [/claude-3/i,                200_000],
+    [/claude/i,                  200_000],    // 未知 claude 回退
+    // ── DeepSeek ──
+    [/deepseek/i,                 64_000],
+    // ── 本地 Ollama ──
+    [/llama3[\.\-]?[23]/i,       128_000],
+    [/llama3/i,                    8_192],
+    [/llama/i,                     4_096],
+    [/mistral/i,                  32_000],
+    [/qwen/i,                    128_000],
+    [/phi/i,                     128_000],
+    // ── Mock（测试） ──
+    [/mock/i,                     32_000],
+  ];
+  /**
+   * 根据模型名称解析合适的 ContextWindow token 预算。
+   *
+   * 策略: 取模型最大上下文窗口的一个安全分片，
+   *   - 大窗口 (≥200k): 预算 32000（tool schemas + system prompt 占显著空间）
+   *   - 中窗口 (≥64k):  预算 24000
+   *   - 小窗口 (≥16k):  预算 12000
+   *   - 微窗口 (<16k):  预算 = 窗口 × 0.7（留 30% 给 prompt/tool schema）
+   *
+   * @param {string} modelName — 模型名称，如 'gemini-3-flash-preview', 'gpt-4o-mini'
+   * @param {{ isSystem?: boolean }} [opts] — isSystem 为 true 时给予更高预算
+   * @returns {number} 建议的 token 预算
+   */
+  static resolveTokenBudget(modelName, opts = {}) {
+    const { isSystem = false } = opts;
+    // 1. 查找模型上下文窗口大小
+    let contextSize = 32_000; // 默认回退值
+    if (modelName) {
+      for (const [pattern, size] of ContextWindow.MODEL_CONTEXT_WINDOWS) {
+        if (pattern.test(modelName)) {
+          contextSize = size;
+          break;
+        }
+      }
+    }
+    // 2. 按分级策略计算 token 预算
+    let budget;
+    if (contextSize >= 200_000) {
+      budget = isSystem ? 32_000 : 24_000;
+    } else if (contextSize >= 64_000) {
+      budget = isSystem ? 24_000 : 20_000;
+    } else if (contextSize >= 16_000) {
+      budget = isSystem ? 14_000 : 12_000;
+    } else {
+      budget = Math.floor(contextSize * (isSystem ? 0.75 : 0.65));
+    }
+    return budget;
+  }
   /**
    * @param {number} [tokenBudget=24000] — token 预算上限
    */
@@ -285,10 +369,10 @@ export class ContextWindow {
   estimateTokens() {
     let total = 0;
     for (const m of this.#messages) {
-      if (m.content) total += m.content.length / 3;
-      if (m.toolCalls) total += JSON.stringify(m.toolCalls).length / 3;
+      if (m.content) total += estimateTokensFast(m.content);
+      if (m.toolCalls) total += estimateTokensFast(JSON.stringify(m.toolCalls));
     }
-    return Math.ceil(total);
+    return total;
   }
   /**

package/lib/service/chat/ConversationStore.js CHANGED Viewed

@@ -23,9 +23,9 @@ import path from 'node:path';
 import crypto from 'node:crypto';
 import Logger from '../../infrastructure/logging/Logger.js';
 import pathGuard from '../../shared/PathGuard.js';
+import { estimateTokens as _estimateTokens } from '../../shared/token-utils.js';
 const DEFAULT_TOKEN_BUDGET = 12000;     // ~12K tokens 留给历史, 其余给系统提示词和当前消息
-const CHARS_PER_TOKEN = 3.5;            // 近似: 中文 ~3.5 / 英文 ~4 / 取偏保守值
 const MAX_CONVERSATIONS = 100;          // 索引最多保留 100 个对话
 const SUMMARY_TARGET_TOKENS = 500;      // 压缩后的摘要目标 token 数
@@ -283,13 +283,12 @@ export class ConversationStore {
   }
   /**
-   * 估算 token 数
+   * 估算 token 数 — 委托给共享 token-utils（CJK 感知）
    * @param {string} text
    * @returns {number}
    */
   estimateTokens(text) {
-    if (!text) return 0;
-    return Math.ceil(text.length / CHARS_PER_TOKEN);
+    return _estimateTokens(text);
   }
   // ═══════════════════════════════════════════════════════

package/lib/service/chat/ProjectSemanticMemory.js CHANGED Viewed

@@ -25,6 +25,7 @@
  */
 import { randomUUID } from 'node:crypto';
+import { jaccardSimilarity, tokenizeForSimilarity } from '../../shared/similarity.js';
 // ──────────────────────────────────────────────────────────────
 // 常量
@@ -296,7 +297,7 @@ export class ProjectSemanticMemory {
     const now = Date.now();
     const lowerQuery = (query || '').toLowerCase();
-    const queryTokens = this.#tokenize(lowerQuery);
+    const queryTokens = this.#tokenizeWords(lowerQuery);
     const scored = all.map(m => {
       // Recency: 指数衰减 (半衰期 7 天)
@@ -702,7 +703,7 @@ export class ProjectSemanticMemory {
       : this.#stmts.getAll.all({ now });
     const lowerContent = content.toLowerCase();
-    const contentTokens = this.#tokenize(lowerContent);
+    const contentTokens = tokenizeForSimilarity(lowerContent);
     const scored = candidates
       .map(row => {
@@ -716,7 +717,7 @@ export class ProjectSemanticMemory {
   }
   /**
-   * 计算两段文本的相似度 (Jaccard + 子串)
+   * 计算两段文本的相似度 (Jaccard + 子串) — 委托共享 similarity 模块
    *
    * @param {Set<string>} tokensA — 预分词的 token 集合
    * @param {string} lowerA — 小写原文
@@ -725,18 +726,12 @@ export class ProjectSemanticMemory {
    */
   #computeSimilarity(tokensA, lowerA, contentB) {
     const lowerB = (contentB || '').toLowerCase();
-    const tokensB = this.#tokenize(lowerB);
+    const tokensB = tokenizeForSimilarity(lowerB);
     if (tokensA.size === 0 && tokensB.size === 0) return 1.0;
     if (tokensA.size === 0 || tokensB.size === 0) return 0.0;
-    // Jaccard similarity: |A ∩ B| / |A ∪ B|
-    let intersection = 0;
-    for (const t of tokensA) {
-      if (tokensB.has(t)) intersection++;
-    }
-    const union = new Set([...tokensA, ...tokensB]).size;
-    const jaccard = intersection / union;
+    const jaccard = jaccardSimilarity(tokensA, tokensB);
     // 子串包含加分
     const containsBonus = (lowerA.includes(lowerB) || lowerB.includes(lowerA)) ? 0.3 : 0;
@@ -756,7 +751,7 @@ export class ProjectSemanticMemory {
     if (!lowerQuery || !content) return 0;
     const lowerContent = content.toLowerCase();
-    const contentTokens = this.#tokenize(lowerContent);
+    const contentTokens = this.#tokenizeWords(lowerContent);
     if (queryTokens.size === 0) return 0;
@@ -783,11 +778,11 @@ export class ProjectSemanticMemory {
   }
   /**
-   * 分词 (简单: 按空格/标点分割, 去短词)
+   * 分词 (按空格/标点分割, 去短词) — 用于 relevance 计算
    * @param {string} text
    * @returns {Set<string>}
    */
-  #tokenize(text) {
+  #tokenizeWords(text) {
     if (!text) return new Set();
     return new Set(
       text

package/lib/service/chat/ReasoningLayer.js CHANGED Viewed

@@ -17,7 +17,7 @@
  *
  * ChatAgent 在主循环的 4 个生命周期点调用:
  *   1. beforeAICall(iteration, opts)              — 开始新轮次 + 可选注入反思/规划
- *   2. afterAICall(aiResult, mode)                — 提取 Thought + 提取 Plan
+ *   2. afterAICall(aiResult)                      — 提取 Thought + 提取 Plan
  *   3. afterToolExec(name, args, result, metrics) — 构建 Observation
  *   4. afterRound(roundResults)                   — 关闭轮次 + 写入摘要 + 更新计划进度
  *
@@ -143,32 +143,20 @@ export class ReasoningLayer {
    *   - 从 AI 响应中提取 Thought
    *   - 从 AI 响应中提取 Plan（首次 / replan 后）
    *
-   * @param {object|string} aiResult — AI 返回结果
-   * @param {'native'|'text'} [mode='native'] — 调用模式
+   * @param {object} aiResult — AI 返回结果 (native tool calling)
    */
-  afterAICall(aiResult, mode = 'native') {
+  afterAICall(aiResult) {
     if (!this.#config.enabled) return;
     let extractedText = null;
-    if (mode === 'native') {
-      // Native 模式: 当 AI 同时返回文本和工具调用时，文本就是 thought
-      if (aiResult?.text && aiResult?.functionCalls?.length > 0) {
-        this.#trace.setThought(aiResult.text);
-        extractedText = aiResult.text;
-        this.#logger.info(`[ReasoningLayer] 💭 thought: ${aiResult.text.substring(0, 150).replace(/\n/g, '↵')}…`);
-      } else if (aiResult?.text) {
-        extractedText = aiResult.text;
-      }
-    } else {
-      // Text 模式: 需要从完整响应中切分出 thought 部分（Action 块之前的文本）
-      const text = typeof aiResult === 'string' ? aiResult : aiResult?.text;
-      extractedText = text;
-      const thought = this.#extractThoughtFromText(text);
-      if (thought) {
-        this.#trace.setThought(thought);
-        this.#logger.info(`[ReasoningLayer] 💭 thought (text): ${thought.substring(0, 150).replace(/\n/g, '↵')}…`);
-      }
+    // Native 模式: 当 AI 同时返回文本和工具调用时，文本就是 thought
+    if (aiResult?.text && aiResult?.functionCalls?.length > 0) {
+      this.#trace.setThought(aiResult.text);
+      extractedText = aiResult.text;
+      this.#logger.info(`[ReasoningLayer] 💭 thought: ${aiResult.text.substring(0, 150).replace(/\n/g, '↵')}…`);
+    } else if (aiResult?.text) {
+      extractedText = aiResult.text;
     }
     // ── Planning: 从 AI 响应中提取 plan ──
@@ -549,38 +537,6 @@ export class ReasoningLayer {
     return meta;
   }
-  /**
-   * 从 LLM 文本响应中提取 Thought 部分（Action 块之前的文本）
-   *
-   * 不改变 #parseActions 逻辑，纯粹数据提取。
-   *
-   * @param {string} response — LLM 完整文本响应
-   * @returns {string|null}
-   * @private
-   */
-  #extractThoughtFromText(response) {
-    if (!response) return null;
-    // Thought 在第一个 Action 标记之前
-    const markers = [
-      /```(?:action|batch_actions|tool_code)/,
-      /Action\s*:\s*\w+/i,
-      /<tool_call>/,
-      /```json\s*\n\s*\{\s*"(?:tool|name|function)"/,
-    ];
-    let cutoff = response.length;
-    for (const m of markers) {
-      const idx = response.search(m);
-      if (idx !== -1 && idx < cutoff) cutoff = idx;
-    }
-    const thought = response.substring(0, cutoff).trim();
-    // 过短的（< 20 字符）不算有效 thought
-    return thought.length >= 20 ? thought : null;
-  }
   // ─── Planning 内部方法 ─────────────────────────────────
   /**

package/lib/service/chat/ToolRegistry.js CHANGED Viewed

@@ -178,58 +178,6 @@ export class ToolRegistry {
     return this.#tools.has(name);
   }
-  /**
-   * 转换为 Gemini functionDeclarations 格式
-   * 供 GoogleGeminiProvider.chatWithTools() 使用
-   *
-   * @param {string[]} [allowedTools] — 限制可用工具列表（不传则返回全部）
-   * @returns {Array<{name: string, description: string, parameters: object}>}
-   */
-  toFunctionDeclarations(allowedTools) {
-    const result = [];
-    for (const [name, tool] of this.#tools) {
-      if (allowedTools && !allowedTools.includes(name)) continue;
-      result.push({
-        name: tool.name,
-        description: tool.description || '',
-        parameters: this.#sanitizeSchemaForGemini(tool.parameters),
-      });
-    }
-    return result;
-  }
-  /**
-   * 清理 JSON Schema 使之兼容 Gemini API 的 OpenAPI 子集
-   * Gemini API 不支持某些 JSON Schema 扩展语法
-   */
-  #sanitizeSchemaForGemini(schema) {
-    if (!schema || typeof schema !== 'object') {
-      return { type: 'object', properties: {} };
-    }
-    const cleaned = { ...schema };
-    // 确保 type 存在
-    if (!cleaned.type) cleaned.type = 'object';
-    // 递归清理 properties
-    if (cleaned.properties) {
-      const props = {};
-      for (const [key, val] of Object.entries(cleaned.properties)) {
-        const prop = { ...val };
-        // 移除 Gemini 不支持的字段
-        delete prop.default;
-        delete prop.examples;
-        // 确保 type 存在
-        if (!prop.type) prop.type = 'string';
-        props[key] = prop;
-      }
-      cleaned.properties = props;
-    }
-    return cleaned;
-  }
   /**
    * 获取所有工具名
    */

package/lib/service/chat/tools.js CHANGED Viewed

@@ -1371,8 +1371,9 @@ Return ONLY a JSON array. No markdown, no extra text. Return [] if no relationsh
 # Recipe Pairs
 ${pairsText}`;
-    const response = await ctx.aiProvider.chat(prompt, { temperature: 0.2 });
-    const parsed = ctx.aiProvider.extractJSON(response, '[', ']');
+    const parsed = await ctx.aiProvider.chatWithStructuredOutput(prompt, {
+      openChar: '[', closeChar: ']', temperature: 0.2,
+    });
     const relations = Array.isArray(parsed) ? parsed : [];
     // 写入知识图谱（除非 dryRun）
@@ -1514,8 +1515,9 @@ const aiTranslate = {
     if (summary) parts.push(`summary: ${summary}`);
     if (usageGuide) parts.push(`usageGuide: ${usageGuide}`);
-    const raw = await ctx.aiProvider.chat(parts.join('\n'), { systemPrompt, temperature: 0.2 });
-    const parsed = ctx.aiProvider.extractJSON(raw, '{', '}');
+    const parsed = await ctx.aiProvider.chatWithStructuredOutput(parts.join('\n'), {
+      systemPrompt, temperature: 0.2,
+    });
     return parsed || { summaryEn: summary || '', usageGuideEn: usageGuide || '' };
   },
 };
@@ -1626,8 +1628,7 @@ Return ONLY valid JSON:
   }
 }`;
-    const raw = await ctx.aiProvider.chat(prompt, { temperature: 0.2 });
-    const rule = ctx.aiProvider.extractJSON(raw, '{', '}');
+    const rule = await ctx.aiProvider.chatWithStructuredOutput(prompt, { temperature: 0.2 });
     if (!rule) return { error: 'Failed to parse AI response' };
     // 验证正则表达式

package/lib/service/cursor/TokenBudget.js CHANGED Viewed

@@ -1,10 +1,13 @@
 /**
  * TokenBudget — Token 预算控制
  *
- * 简易 token 估算器（1 token ≈ 4 chars for English, 2 chars for CJK），
+ * token 估算统一使用 shared/token-utils（CJK 感知），
  * 用于确保 .mdc 文件不超出 Cursor 上下文预算。
  */
+import { estimateTokens } from '../../shared/token-utils.js';
+export { estimateTokens };
 /** 默认预算配置 */
 export const BUDGET = {
   CHANNEL_A_MAX: 400,        // Always-On Rules 最大 token
@@ -13,26 +16,6 @@ export const BUDGET = {
   CHANNEL_A_MAX_RULES: 8,    // Always-On Rules 最多规则数
 };
-/**
- * 估算文本 token 数
- * 简易算法：英文按 4 chars/token，CJK 按 2 chars/token
- * @param {string} text
- * @returns {number}
- */
-export function estimateTokens(text) {
-  if (!text) return 0;
-  let tokens = 0;
-  for (const ch of text) {
-    // CJK Unified Ideographs + common CJK ranges
-    if (ch.charCodeAt(0) > 0x2e80) {
-      tokens += 0.5; // ~2 chars per token for CJK
-    } else {
-      tokens += 0.25; // ~4 chars per token for English
-    }
-  }
-  return Math.ceil(tokens);
-}
 /**
  * 按 token 预算截断内容行
  * @param {string[]} lines - 内容行

package/lib/service/search/CrossEncoderReranker.js ADDED Viewed

@@ -0,0 +1,163 @@
+/**
+ * CrossEncoderReranker — AI 驱动的语义重排器
+ *
+ * 替代 Jaccard 相似度，使用 LLM 对 (query, document) 对进行语义相关性评分。
+ *
+ * 策略:
+ *   1. 将候选文档与 query 组成 pairs，批量送入 AI 评分
+ *   2. AI 返回每个 pair 的 relevance score (0.0-1.0)
+ *   3. 按 score 降序排列
+ *
+ * 优化:
+ *   - 单次 API 调用批量评分（减少延迟和成本）
+ *   - 文档截断至 MAX_DOC_LEN 控制 token 消耗
+ *   - 候选上限 MAX_CANDIDATES，超出部分保留原始顺序
+ *   - AI 不可用时自动降级到 Jaccard
+ */
+import { tokenize } from './InvertedIndex.js';
+import { jaccardSimilarity } from '../../shared/similarity.js';
+const MAX_CANDIDATES = 40;  // 超过此数量截断（控制 prompt 大小）
+const MAX_DOC_LEN = 300;    // 每个文档最大字符数
+export class CrossEncoderReranker {
+  #aiProvider;
+  #logger;
+  /**
+   * @param {object} opts
+   * @param {import('../../external/ai/AiProvider.js').AiProvider} opts.aiProvider
+   * @param {object} [opts.logger]
+   */
+  constructor(opts = {}) {
+    this.#aiProvider = opts.aiProvider || null;
+    this.#logger = opts.logger || console;
+  }
+  /**
+   * 对候选列表进行语义重排
+   *
+   * @param {string} query — 用户查询
+   * @param {Array<object>} candidates — Layer 1 输出的候选列表
+   * @returns {Promise<Array<object>>} — 附带 semanticScore 的候选列表（降序）
+   */
+  async rerank(query, candidates) {
+    if (!candidates || candidates.length === 0) return [];
+    if (!query) return candidates;
+    // 如果 AI Provider 不可用，降级到 Jaccard
+    if (!this.#aiProvider || typeof this.#aiProvider.chatWithStructuredOutput !== 'function') {
+      return this.#jaccardFallback(query, candidates);
+    }
+    // 截取前 MAX_CANDIDATES 个候选，剩余保持原始顺序
+    const head = candidates.slice(0, MAX_CANDIDATES);
+    const tail = candidates.slice(MAX_CANDIDATES);
+    try {
+      const scored = await this.#batchScore(query, head);
+      // tail 部分给一个递减的低分以保持稳定排序
+      const minScore = scored.length > 0
+        ? Math.min(...scored.map(s => s.semanticScore)) * 0.5
+        : 0;
+      const tailScored = tail.map((c, i) => ({
+        ...c,
+        semanticScore: Math.max(minScore - (i + 1) * 0.001, 0),
+      }));
+      return [...scored, ...tailScored];
+    } catch (err) {
+      this.#logger.warn?.(`[CrossEncoderReranker] AI scoring failed, falling back to Jaccard: ${err.message}`);
+      return this.#jaccardFallback(query, candidates);
+    }
+  }
+  /**
+   * 批量 AI 评分 — 单次 chatWithStructuredOutput 调用
+   */
+  async #batchScore(query, candidates) {
+    const pairs = candidates.map((c, i) => {
+      const doc = this.#extractDocText(c);
+      return `[${i}] ${doc.substring(0, MAX_DOC_LEN)}`;
+    });
+    const prompt = `# Task
+Score the relevance of each document to the query. Return ONLY a JSON array.
+# Query
+${query}
+# Documents
+${pairs.join('\n')}
+# Output Format
+Return a JSON array of objects: [{"i": 0, "s": 0.85}, {"i": 1, "s": 0.3}, ...]
+- "i": document index (integer)
+- "s": relevance score (float 0.0-1.0, where 1.0 = perfectly relevant)
+Score guidelines:
+- 1.0: exact match or directly answers the query
+- 0.7-0.9: highly relevant, covers the main topic
+- 0.4-0.6: partially relevant, related topic
+- 0.1-0.3: tangentially related
+- 0.0: completely irrelevant
+Return ONLY a JSON array, no markdown or explanation.`;
+    const result = await this.#aiProvider.chatWithStructuredOutput(prompt, {
+      openChar: '[',
+      closeChar: ']',
+      temperature: 0.1,
+      maxTokens: 2048,
+    });
+    if (!Array.isArray(result)) {
+      throw new Error('AI returned non-array result');
+    }
+    // 构建 index → score 映射
+    const scoreMap = new Map();
+    for (const item of result) {
+      const idx = item.i ?? item.index;
+      const score = item.s ?? item.score ?? 0;
+      if (typeof idx === 'number' && idx >= 0 && idx < candidates.length) {
+        scoreMap.set(idx, Math.max(0, Math.min(1, score)));
+      }
+    }
+    // 合并分数，未评分的给 0
+    return candidates.map((c, i) => ({
+      ...c,
+      semanticScore: scoreMap.get(i) ?? 0,
+    })).sort((a, b) => b.semanticScore - a.semanticScore);
+  }
+  /**
+   * 从候选对象提取用于评分的文本表示
+   */
+  #extractDocText(candidate) {
+    const parts = [
+      candidate.title,
+      candidate.trigger,
+      candidate.description || candidate.summary,
+      candidate.code,
+      candidate.content,
+    ].filter(Boolean);
+    return parts.join(' | ');
+  }
+  /**
+   * Jaccard 降级 — 当 AI 不可用时使用
+   */
+  #jaccardFallback(query, candidates) {
+    const queryTokens = new Set(tokenize(query));
+    if (queryTokens.size === 0) return candidates;
+    return candidates.map(candidate => {
+      const text = this.#extractDocText(candidate);
+      const docTokens = new Set(tokenize(text));
+      const score = jaccardSimilarity(queryTokens, docTokens);
+      return { ...candidate, semanticScore: score };
+    }).sort((a, b) => b.semanticScore - a.semanticScore);
+  }
+}

package/lib/service/search/RetrievalFunnel.js CHANGED Viewed

@@ -1,7 +1,7 @@
 /**
  * RetrievalFunnel — 4 层检索漏斗
  * Layer 1: Keyword Filter (倒排索引 fast recall)
- * Layer 2: Semantic Rerank (Jaccard 近似语义重排)
+ * Layer 2: Cross-Encoder Rerank (AI 驱动语义重排，降级 Jaccard)
  * Layer 3: Multi-Signal Ranking (6 信号加权)
  * Layer 4: Context-Aware Reranking (对话历史提升)
  */
@@ -9,10 +9,12 @@
 import { buildInvertedIndex, lookup, tokenize } from './InvertedIndex.js';
 import { MultiSignalRanker } from './MultiSignalRanker.js';
 import { CoarseRanker } from './CoarseRanker.js';
+import { CrossEncoderReranker } from './CrossEncoderReranker.js';
 export class RetrievalFunnel {
   #multiSignalRanker;
   #coarseRanker;
+  #crossEncoder;
   #vectorStore;
   #aiProvider;
@@ -21,6 +23,10 @@ export class RetrievalFunnel {
     this.#coarseRanker = new CoarseRanker(options);
     this.#vectorStore = options.vectorStore || null;
     this.#aiProvider = options.aiProvider || null;
+    this.#crossEncoder = new CrossEncoderReranker({
+      aiProvider: this.#aiProvider,
+      logger: options.logger || console,
+    });
   }
   /**
@@ -67,43 +73,10 @@ export class RetrievalFunnel {
   }
   /**
-   * Layer 2: 语义重排 — 优先使用向量相似度，降级到 Jaccard
+   * Layer 2: 语义重排 — Cross-Encoder AI 评分（降级 Jaccard）
    */
   async #semanticRerank(query, candidates) {
-    // 尝试使用向量相似度重排
-    if (this.#vectorStore && this.#aiProvider) {
-      try {
-        const queryEmbedding = await this.#aiProvider.embed(query);
-        if (queryEmbedding && queryEmbedding.length > 0) {
-          const vectorResults = await this.#vectorStore.query(queryEmbedding, candidates.length);
-          if (vectorResults && vectorResults.length > 0) {
-            const scoreMap = new Map(vectorResults.map(vr => [vr.id, vr.similarity || vr.score || 0]));
-            return candidates.map(candidate => {
-              const semanticScore = scoreMap.get(candidate.id) || 0;
-              return { ...candidate, semanticScore };
-            }).sort((a, b) => b.semanticScore - a.semanticScore);
-          }
-        }
-      } catch {
-        // 向量搜索失败，降级到 Jaccard
-      }
-    }
-    // Fallback: Jaccard 相似度
-    const queryTokens = new Set(tokenize(query));
-    if (queryTokens.size === 0) return candidates;
-    return candidates.map(candidate => {
-      const text = [candidate.title, candidate.trigger, candidate.content, candidate.code, candidate.description].filter(Boolean).join(' ');
-      const docTokens = new Set(tokenize(text));
-      // Jaccard 相似度
-      const intersection = [...queryTokens].filter(t => docTokens.has(t)).length;
-      const union = new Set([...queryTokens, ...docTokens]).size;
-      const jaccard = union > 0 ? intersection / union : 0;
-      return { ...candidate, semanticScore: jaccard };
-    }).sort((a, b) => b.semanticScore - a.semanticScore);
+    return this.#crossEncoder.rerank(query, candidates);
   }
   /**