autosnippet 2.18.0 → 2.19.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (29) hide show
  1. package/dashboard/dist/assets/{icons-C6kshpB1.js → icons-C7FN32VL.js} +1 -1
  2. package/dashboard/dist/assets/index-D8dCXLzr.js +129 -0
  3. package/dashboard/dist/index.html +2 -2
  4. package/lib/external/ai/AiProvider.js +42 -11
  5. package/lib/external/ai/providers/ClaudeProvider.js +4 -2
  6. package/lib/external/ai/providers/GoogleGeminiProvider.js +66 -8
  7. package/lib/external/ai/providers/OpenAiProvider.js +48 -2
  8. package/lib/external/mcp/handlers/bootstrap.js +1 -2
  9. package/lib/http/HttpServer.js +4 -0
  10. package/lib/http/routes/candidates.js +405 -0
  11. package/lib/http/routes/search.js +113 -0
  12. package/lib/infrastructure/vector/Chunker.js +3 -8
  13. package/lib/infrastructure/vector/JsonVectorAdapter.js +2 -9
  14. package/lib/service/candidate/SimilarityService.js +7 -35
  15. package/lib/service/chat/ChatAgent.js +28 -686
  16. package/lib/service/chat/ContextWindow.js +87 -3
  17. package/lib/service/chat/ConversationStore.js +3 -4
  18. package/lib/service/chat/ProjectSemanticMemory.js +9 -14
  19. package/lib/service/chat/ReasoningLayer.js +10 -54
  20. package/lib/service/chat/ToolRegistry.js +0 -52
  21. package/lib/service/chat/tools.js +7 -6
  22. package/lib/service/cursor/TokenBudget.js +4 -21
  23. package/lib/service/search/CrossEncoderReranker.js +163 -0
  24. package/lib/service/search/RetrievalFunnel.js +9 -36
  25. package/lib/service/skills/SignalCollector.js +28 -28
  26. package/lib/shared/similarity.js +101 -0
  27. package/lib/shared/token-utils.js +46 -0
  28. package/package.json +1 -1
  29. package/dashboard/dist/assets/index-9byoG7kd.js +0 -129
@@ -22,6 +22,7 @@
22
22
  */
23
23
 
24
24
  import Logger from '../../infrastructure/logging/Logger.js';
25
+ import { estimateTokensFast } from '../../shared/token-utils.js';
25
26
 
26
27
  /**
27
28
  * 一组相关消息的原子单元:
@@ -46,6 +47,89 @@ export class ContextWindow {
46
47
  /** @type {Object} 日志器 */
47
48
  #logger;
48
49
 
50
+ /**
51
+ * 模型名 → 上下文窗口大小映射(token 数)。
52
+ * 键为正则模式,按优先级从上到下匹配。
53
+ * 值为模型的原始上下文窗口上限。
54
+ * @type {Array<[RegExp, number]>}
55
+ */
56
+ static MODEL_CONTEXT_WINDOWS = [
57
+ // ── Google Gemini ──
58
+ [/gemini-3/i, 1_000_000],
59
+ [/gemini-2\.5/i, 1_000_000],
60
+ [/gemini-2/i, 1_000_000],
61
+ [/gemini-1\.5-pro/i, 1_000_000],
62
+ [/gemini-1\.5-flash/i, 1_000_000],
63
+ [/gemini-1\.0/i, 32_000],
64
+ [/gemini/i, 1_000_000], // 未知版本回退
65
+ // ── OpenAI ──
66
+ [/gpt-4o/i, 128_000],
67
+ [/gpt-4-turbo/i, 128_000],
68
+ [/gpt-4-(?!turbo)/i, 8_192],
69
+ [/gpt-3\.5-turbo-16k/i, 16_384],
70
+ [/gpt-3\.5/i, 4_096],
71
+ [/o1|o3|o4/i, 200_000], // OpenAI reasoning models
72
+ // ── Anthropic ──
73
+ [/claude-.*sonnet-4/i, 200_000],
74
+ [/claude-3[\.\-]5/i, 200_000],
75
+ [/claude-3[\.\-]opus/i, 200_000],
76
+ [/claude-3/i, 200_000],
77
+ [/claude/i, 200_000], // 未知 claude 回退
78
+ // ── DeepSeek ──
79
+ [/deepseek/i, 64_000],
80
+ // ── 本地 Ollama ──
81
+ [/llama3[\.\-]?[23]/i, 128_000],
82
+ [/llama3/i, 8_192],
83
+ [/llama/i, 4_096],
84
+ [/mistral/i, 32_000],
85
+ [/qwen/i, 128_000],
86
+ [/phi/i, 128_000],
87
+ // ── Mock(测试) ──
88
+ [/mock/i, 32_000],
89
+ ];
90
+
91
+ /**
92
+ * 根据模型名称解析合适的 ContextWindow token 预算。
93
+ *
94
+ * 策略: 取模型最大上下文窗口的一个安全分片,
95
+ * - 大窗口 (≥200k): 预算 32000(tool schemas + system prompt 占显著空间)
96
+ * - 中窗口 (≥64k): 预算 24000
97
+ * - 小窗口 (≥16k): 预算 12000
98
+ * - 微窗口 (<16k): 预算 = 窗口 × 0.7(留 30% 给 prompt/tool schema)
99
+ *
100
+ * @param {string} modelName — 模型名称,如 'gemini-3-flash-preview', 'gpt-4o-mini'
101
+ * @param {{ isSystem?: boolean }} [opts] — isSystem 为 true 时给予更高预算
102
+ * @returns {number} 建议的 token 预算
103
+ */
104
+ static resolveTokenBudget(modelName, opts = {}) {
105
+ const { isSystem = false } = opts;
106
+
107
+ // 1. 查找模型上下文窗口大小
108
+ let contextSize = 32_000; // 默认回退值
109
+ if (modelName) {
110
+ for (const [pattern, size] of ContextWindow.MODEL_CONTEXT_WINDOWS) {
111
+ if (pattern.test(modelName)) {
112
+ contextSize = size;
113
+ break;
114
+ }
115
+ }
116
+ }
117
+
118
+ // 2. 按分级策略计算 token 预算
119
+ let budget;
120
+ if (contextSize >= 200_000) {
121
+ budget = isSystem ? 32_000 : 24_000;
122
+ } else if (contextSize >= 64_000) {
123
+ budget = isSystem ? 24_000 : 20_000;
124
+ } else if (contextSize >= 16_000) {
125
+ budget = isSystem ? 14_000 : 12_000;
126
+ } else {
127
+ budget = Math.floor(contextSize * (isSystem ? 0.75 : 0.65));
128
+ }
129
+
130
+ return budget;
131
+ }
132
+
49
133
  /**
50
134
  * @param {number} [tokenBudget=24000] — token 预算上限
51
135
  */
@@ -285,10 +369,10 @@ export class ContextWindow {
285
369
  estimateTokens() {
286
370
  let total = 0;
287
371
  for (const m of this.#messages) {
288
- if (m.content) total += m.content.length / 3;
289
- if (m.toolCalls) total += JSON.stringify(m.toolCalls).length / 3;
372
+ if (m.content) total += estimateTokensFast(m.content);
373
+ if (m.toolCalls) total += estimateTokensFast(JSON.stringify(m.toolCalls));
290
374
  }
291
- return Math.ceil(total);
375
+ return total;
292
376
  }
293
377
 
294
378
  /**
@@ -23,9 +23,9 @@ import path from 'node:path';
23
23
  import crypto from 'node:crypto';
24
24
  import Logger from '../../infrastructure/logging/Logger.js';
25
25
  import pathGuard from '../../shared/PathGuard.js';
26
+ import { estimateTokens as _estimateTokens } from '../../shared/token-utils.js';
26
27
 
27
28
  const DEFAULT_TOKEN_BUDGET = 12000; // ~12K tokens 留给历史, 其余给系统提示词和当前消息
28
- const CHARS_PER_TOKEN = 3.5; // 近似: 中文 ~3.5 / 英文 ~4 / 取偏保守值
29
29
  const MAX_CONVERSATIONS = 100; // 索引最多保留 100 个对话
30
30
  const SUMMARY_TARGET_TOKENS = 500; // 压缩后的摘要目标 token 数
31
31
 
@@ -283,13 +283,12 @@ export class ConversationStore {
283
283
  }
284
284
 
285
285
  /**
286
- * 估算 token 数
286
+ * 估算 token 数 — 委托给共享 token-utils(CJK 感知)
287
287
  * @param {string} text
288
288
  * @returns {number}
289
289
  */
290
290
  estimateTokens(text) {
291
- if (!text) return 0;
292
- return Math.ceil(text.length / CHARS_PER_TOKEN);
291
+ return _estimateTokens(text);
293
292
  }
294
293
 
295
294
  // ═══════════════════════════════════════════════════════
@@ -25,6 +25,7 @@
25
25
  */
26
26
 
27
27
  import { randomUUID } from 'node:crypto';
28
+ import { jaccardSimilarity, tokenizeForSimilarity } from '../../shared/similarity.js';
28
29
 
29
30
  // ──────────────────────────────────────────────────────────────
30
31
  // 常量
@@ -296,7 +297,7 @@ export class ProjectSemanticMemory {
296
297
 
297
298
  const now = Date.now();
298
299
  const lowerQuery = (query || '').toLowerCase();
299
- const queryTokens = this.#tokenize(lowerQuery);
300
+ const queryTokens = this.#tokenizeWords(lowerQuery);
300
301
 
301
302
  const scored = all.map(m => {
302
303
  // Recency: 指数衰减 (半衰期 7 天)
@@ -702,7 +703,7 @@ export class ProjectSemanticMemory {
702
703
  : this.#stmts.getAll.all({ now });
703
704
 
704
705
  const lowerContent = content.toLowerCase();
705
- const contentTokens = this.#tokenize(lowerContent);
706
+ const contentTokens = tokenizeForSimilarity(lowerContent);
706
707
 
707
708
  const scored = candidates
708
709
  .map(row => {
@@ -716,7 +717,7 @@ export class ProjectSemanticMemory {
716
717
  }
717
718
 
718
719
  /**
719
- * 计算两段文本的相似度 (Jaccard + 子串)
720
+ * 计算两段文本的相似度 (Jaccard + 子串) — 委托共享 similarity 模块
720
721
  *
721
722
  * @param {Set<string>} tokensA — 预分词的 token 集合
722
723
  * @param {string} lowerA — 小写原文
@@ -725,18 +726,12 @@ export class ProjectSemanticMemory {
725
726
  */
726
727
  #computeSimilarity(tokensA, lowerA, contentB) {
727
728
  const lowerB = (contentB || '').toLowerCase();
728
- const tokensB = this.#tokenize(lowerB);
729
+ const tokensB = tokenizeForSimilarity(lowerB);
729
730
 
730
731
  if (tokensA.size === 0 && tokensB.size === 0) return 1.0;
731
732
  if (tokensA.size === 0 || tokensB.size === 0) return 0.0;
732
733
 
733
- // Jaccard similarity: |A ∩ B| / |A ∪ B|
734
- let intersection = 0;
735
- for (const t of tokensA) {
736
- if (tokensB.has(t)) intersection++;
737
- }
738
- const union = new Set([...tokensA, ...tokensB]).size;
739
- const jaccard = intersection / union;
734
+ const jaccard = jaccardSimilarity(tokensA, tokensB);
740
735
 
741
736
  // 子串包含加分
742
737
  const containsBonus = (lowerA.includes(lowerB) || lowerB.includes(lowerA)) ? 0.3 : 0;
@@ -756,7 +751,7 @@ export class ProjectSemanticMemory {
756
751
  if (!lowerQuery || !content) return 0;
757
752
 
758
753
  const lowerContent = content.toLowerCase();
759
- const contentTokens = this.#tokenize(lowerContent);
754
+ const contentTokens = this.#tokenizeWords(lowerContent);
760
755
 
761
756
  if (queryTokens.size === 0) return 0;
762
757
 
@@ -783,11 +778,11 @@ export class ProjectSemanticMemory {
783
778
  }
784
779
 
785
780
  /**
786
- * 分词 (简单: 按空格/标点分割, 去短词)
781
+ * 分词 (按空格/标点分割, 去短词) — 用于 relevance 计算
787
782
  * @param {string} text
788
783
  * @returns {Set<string>}
789
784
  */
790
- #tokenize(text) {
785
+ #tokenizeWords(text) {
791
786
  if (!text) return new Set();
792
787
  return new Set(
793
788
  text
@@ -17,7 +17,7 @@
17
17
  *
18
18
  * ChatAgent 在主循环的 4 个生命周期点调用:
19
19
  * 1. beforeAICall(iteration, opts) — 开始新轮次 + 可选注入反思/规划
20
- * 2. afterAICall(aiResult, mode) — 提取 Thought + 提取 Plan
20
+ * 2. afterAICall(aiResult) — 提取 Thought + 提取 Plan
21
21
  * 3. afterToolExec(name, args, result, metrics) — 构建 Observation
22
22
  * 4. afterRound(roundResults) — 关闭轮次 + 写入摘要 + 更新计划进度
23
23
  *
@@ -143,32 +143,20 @@ export class ReasoningLayer {
143
143
  * - 从 AI 响应中提取 Thought
144
144
  * - 从 AI 响应中提取 Plan(首次 / replan 后)
145
145
  *
146
- * @param {object|string} aiResult — AI 返回结果
147
- * @param {'native'|'text'} [mode='native'] — 调用模式
146
+ * @param {object} aiResult — AI 返回结果 (native tool calling)
148
147
  */
149
- afterAICall(aiResult, mode = 'native') {
148
+ afterAICall(aiResult) {
150
149
  if (!this.#config.enabled) return;
151
150
 
152
151
  let extractedText = null;
153
152
 
154
- if (mode === 'native') {
155
- // Native 模式: AI 同时返回文本和工具调用时,文本就是 thought
156
- if (aiResult?.text && aiResult?.functionCalls?.length > 0) {
157
- this.#trace.setThought(aiResult.text);
158
- extractedText = aiResult.text;
159
- this.#logger.info(`[ReasoningLayer] 💭 thought: ${aiResult.text.substring(0, 150).replace(/\n/g, '↵')}…`);
160
- } else if (aiResult?.text) {
161
- extractedText = aiResult.text;
162
- }
163
- } else {
164
- // Text 模式: 需要从完整响应中切分出 thought 部分(Action 块之前的文本)
165
- const text = typeof aiResult === 'string' ? aiResult : aiResult?.text;
166
- extractedText = text;
167
- const thought = this.#extractThoughtFromText(text);
168
- if (thought) {
169
- this.#trace.setThought(thought);
170
- this.#logger.info(`[ReasoningLayer] 💭 thought (text): ${thought.substring(0, 150).replace(/\n/g, '↵')}…`);
171
- }
153
+ // Native 模式: AI 同时返回文本和工具调用时,文本就是 thought
154
+ if (aiResult?.text && aiResult?.functionCalls?.length > 0) {
155
+ this.#trace.setThought(aiResult.text);
156
+ extractedText = aiResult.text;
157
+ this.#logger.info(`[ReasoningLayer] 💭 thought: ${aiResult.text.substring(0, 150).replace(/\n/g, '↵')}…`);
158
+ } else if (aiResult?.text) {
159
+ extractedText = aiResult.text;
172
160
  }
173
161
 
174
162
  // ── Planning: 从 AI 响应中提取 plan ──
@@ -549,38 +537,6 @@ export class ReasoningLayer {
549
537
  return meta;
550
538
  }
551
539
 
552
- /**
553
- * 从 LLM 文本响应中提取 Thought 部分(Action 块之前的文本)
554
- *
555
- * 不改变 #parseActions 逻辑,纯粹数据提取。
556
- *
557
- * @param {string} response — LLM 完整文本响应
558
- * @returns {string|null}
559
- * @private
560
- */
561
- #extractThoughtFromText(response) {
562
- if (!response) return null;
563
-
564
- // Thought 在第一个 Action 标记之前
565
- const markers = [
566
- /```(?:action|batch_actions|tool_code)/,
567
- /Action\s*:\s*\w+/i,
568
- /<tool_call>/,
569
- /```json\s*\n\s*\{\s*"(?:tool|name|function)"/,
570
- ];
571
-
572
- let cutoff = response.length;
573
- for (const m of markers) {
574
- const idx = response.search(m);
575
- if (idx !== -1 && idx < cutoff) cutoff = idx;
576
- }
577
-
578
- const thought = response.substring(0, cutoff).trim();
579
-
580
- // 过短的(< 20 字符)不算有效 thought
581
- return thought.length >= 20 ? thought : null;
582
- }
583
-
584
540
  // ─── Planning 内部方法 ─────────────────────────────────
585
541
 
586
542
  /**
@@ -178,58 +178,6 @@ export class ToolRegistry {
178
178
  return this.#tools.has(name);
179
179
  }
180
180
 
181
- /**
182
- * 转换为 Gemini functionDeclarations 格式
183
- * 供 GoogleGeminiProvider.chatWithTools() 使用
184
- *
185
- * @param {string[]} [allowedTools] — 限制可用工具列表(不传则返回全部)
186
- * @returns {Array<{name: string, description: string, parameters: object}>}
187
- */
188
- toFunctionDeclarations(allowedTools) {
189
- const result = [];
190
- for (const [name, tool] of this.#tools) {
191
- if (allowedTools && !allowedTools.includes(name)) continue;
192
- result.push({
193
- name: tool.name,
194
- description: tool.description || '',
195
- parameters: this.#sanitizeSchemaForGemini(tool.parameters),
196
- });
197
- }
198
- return result;
199
- }
200
-
201
- /**
202
- * 清理 JSON Schema 使之兼容 Gemini API 的 OpenAPI 子集
203
- * Gemini API 不支持某些 JSON Schema 扩展语法
204
- */
205
- #sanitizeSchemaForGemini(schema) {
206
- if (!schema || typeof schema !== 'object') {
207
- return { type: 'object', properties: {} };
208
- }
209
-
210
- const cleaned = { ...schema };
211
-
212
- // 确保 type 存在
213
- if (!cleaned.type) cleaned.type = 'object';
214
-
215
- // 递归清理 properties
216
- if (cleaned.properties) {
217
- const props = {};
218
- for (const [key, val] of Object.entries(cleaned.properties)) {
219
- const prop = { ...val };
220
- // 移除 Gemini 不支持的字段
221
- delete prop.default;
222
- delete prop.examples;
223
- // 确保 type 存在
224
- if (!prop.type) prop.type = 'string';
225
- props[key] = prop;
226
- }
227
- cleaned.properties = props;
228
- }
229
-
230
- return cleaned;
231
- }
232
-
233
181
  /**
234
182
  * 获取所有工具名
235
183
  */
@@ -1371,8 +1371,9 @@ Return ONLY a JSON array. No markdown, no extra text. Return [] if no relationsh
1371
1371
  # Recipe Pairs
1372
1372
  ${pairsText}`;
1373
1373
 
1374
- const response = await ctx.aiProvider.chat(prompt, { temperature: 0.2 });
1375
- const parsed = ctx.aiProvider.extractJSON(response, '[', ']');
1374
+ const parsed = await ctx.aiProvider.chatWithStructuredOutput(prompt, {
1375
+ openChar: '[', closeChar: ']', temperature: 0.2,
1376
+ });
1376
1377
  const relations = Array.isArray(parsed) ? parsed : [];
1377
1378
 
1378
1379
  // 写入知识图谱(除非 dryRun)
@@ -1514,8 +1515,9 @@ const aiTranslate = {
1514
1515
  if (summary) parts.push(`summary: ${summary}`);
1515
1516
  if (usageGuide) parts.push(`usageGuide: ${usageGuide}`);
1516
1517
 
1517
- const raw = await ctx.aiProvider.chat(parts.join('\n'), { systemPrompt, temperature: 0.2 });
1518
- const parsed = ctx.aiProvider.extractJSON(raw, '{', '}');
1518
+ const parsed = await ctx.aiProvider.chatWithStructuredOutput(parts.join('\n'), {
1519
+ systemPrompt, temperature: 0.2,
1520
+ });
1519
1521
  return parsed || { summaryEn: summary || '', usageGuideEn: usageGuide || '' };
1520
1522
  },
1521
1523
  };
@@ -1626,8 +1628,7 @@ Return ONLY valid JSON:
1626
1628
  }
1627
1629
  }`;
1628
1630
 
1629
- const raw = await ctx.aiProvider.chat(prompt, { temperature: 0.2 });
1630
- const rule = ctx.aiProvider.extractJSON(raw, '{', '}');
1631
+ const rule = await ctx.aiProvider.chatWithStructuredOutput(prompt, { temperature: 0.2 });
1631
1632
  if (!rule) return { error: 'Failed to parse AI response' };
1632
1633
 
1633
1634
  // 验证正则表达式
@@ -1,10 +1,13 @@
1
1
  /**
2
2
  * TokenBudget — Token 预算控制
3
3
  *
4
- * 简易 token 估算器(1 token ≈ 4 chars for English, 2 chars for CJK),
4
+ * token 估算统一使用 shared/token-utils(CJK 感知),
5
5
  * 用于确保 .mdc 文件不超出 Cursor 上下文预算。
6
6
  */
7
7
 
8
+ import { estimateTokens } from '../../shared/token-utils.js';
9
+ export { estimateTokens };
10
+
8
11
  /** 默认预算配置 */
9
12
  export const BUDGET = {
10
13
  CHANNEL_A_MAX: 400, // Always-On Rules 最大 token
@@ -13,26 +16,6 @@ export const BUDGET = {
13
16
  CHANNEL_A_MAX_RULES: 8, // Always-On Rules 最多规则数
14
17
  };
15
18
 
16
- /**
17
- * 估算文本 token 数
18
- * 简易算法:英文按 4 chars/token,CJK 按 2 chars/token
19
- * @param {string} text
20
- * @returns {number}
21
- */
22
- export function estimateTokens(text) {
23
- if (!text) return 0;
24
- let tokens = 0;
25
- for (const ch of text) {
26
- // CJK Unified Ideographs + common CJK ranges
27
- if (ch.charCodeAt(0) > 0x2e80) {
28
- tokens += 0.5; // ~2 chars per token for CJK
29
- } else {
30
- tokens += 0.25; // ~4 chars per token for English
31
- }
32
- }
33
- return Math.ceil(tokens);
34
- }
35
-
36
19
  /**
37
20
  * 按 token 预算截断内容行
38
21
  * @param {string[]} lines - 内容行
@@ -0,0 +1,163 @@
1
+ /**
2
+ * CrossEncoderReranker — AI 驱动的语义重排器
3
+ *
4
+ * 替代 Jaccard 相似度,使用 LLM 对 (query, document) 对进行语义相关性评分。
5
+ *
6
+ * 策略:
7
+ * 1. 将候选文档与 query 组成 pairs,批量送入 AI 评分
8
+ * 2. AI 返回每个 pair 的 relevance score (0.0-1.0)
9
+ * 3. 按 score 降序排列
10
+ *
11
+ * 优化:
12
+ * - 单次 API 调用批量评分(减少延迟和成本)
13
+ * - 文档截断至 MAX_DOC_LEN 控制 token 消耗
14
+ * - 候选上限 MAX_CANDIDATES,超出部分保留原始顺序
15
+ * - AI 不可用时自动降级到 Jaccard
16
+ */
17
+
18
+ import { tokenize } from './InvertedIndex.js';
19
+ import { jaccardSimilarity } from '../../shared/similarity.js';
20
+
21
+ const MAX_CANDIDATES = 40; // 超过此数量截断(控制 prompt 大小)
22
+ const MAX_DOC_LEN = 300; // 每个文档最大字符数
23
+
24
+ export class CrossEncoderReranker {
25
+ #aiProvider;
26
+ #logger;
27
+
28
+ /**
29
+ * @param {object} opts
30
+ * @param {import('../../external/ai/AiProvider.js').AiProvider} opts.aiProvider
31
+ * @param {object} [opts.logger]
32
+ */
33
+ constructor(opts = {}) {
34
+ this.#aiProvider = opts.aiProvider || null;
35
+ this.#logger = opts.logger || console;
36
+ }
37
+
38
+ /**
39
+ * 对候选列表进行语义重排
40
+ *
41
+ * @param {string} query — 用户查询
42
+ * @param {Array<object>} candidates — Layer 1 输出的候选列表
43
+ * @returns {Promise<Array<object>>} — 附带 semanticScore 的候选列表(降序)
44
+ */
45
+ async rerank(query, candidates) {
46
+ if (!candidates || candidates.length === 0) return [];
47
+ if (!query) return candidates;
48
+
49
+ // 如果 AI Provider 不可用,降级到 Jaccard
50
+ if (!this.#aiProvider || typeof this.#aiProvider.chatWithStructuredOutput !== 'function') {
51
+ return this.#jaccardFallback(query, candidates);
52
+ }
53
+
54
+ // 截取前 MAX_CANDIDATES 个候选,剩余保持原始顺序
55
+ const head = candidates.slice(0, MAX_CANDIDATES);
56
+ const tail = candidates.slice(MAX_CANDIDATES);
57
+
58
+ try {
59
+ const scored = await this.#batchScore(query, head);
60
+ // tail 部分给一个递减的低分以保持稳定排序
61
+ const minScore = scored.length > 0
62
+ ? Math.min(...scored.map(s => s.semanticScore)) * 0.5
63
+ : 0;
64
+ const tailScored = tail.map((c, i) => ({
65
+ ...c,
66
+ semanticScore: Math.max(minScore - (i + 1) * 0.001, 0),
67
+ }));
68
+ return [...scored, ...tailScored];
69
+ } catch (err) {
70
+ this.#logger.warn?.(`[CrossEncoderReranker] AI scoring failed, falling back to Jaccard: ${err.message}`);
71
+ return this.#jaccardFallback(query, candidates);
72
+ }
73
+ }
74
+
75
+ /**
76
+ * 批量 AI 评分 — 单次 chatWithStructuredOutput 调用
77
+ */
78
+ async #batchScore(query, candidates) {
79
+ const pairs = candidates.map((c, i) => {
80
+ const doc = this.#extractDocText(c);
81
+ return `[${i}] ${doc.substring(0, MAX_DOC_LEN)}`;
82
+ });
83
+
84
+ const prompt = `# Task
85
+ Score the relevance of each document to the query. Return ONLY a JSON array.
86
+
87
+ # Query
88
+ ${query}
89
+
90
+ # Documents
91
+ ${pairs.join('\n')}
92
+
93
+ # Output Format
94
+ Return a JSON array of objects: [{"i": 0, "s": 0.85}, {"i": 1, "s": 0.3}, ...]
95
+ - "i": document index (integer)
96
+ - "s": relevance score (float 0.0-1.0, where 1.0 = perfectly relevant)
97
+
98
+ Score guidelines:
99
+ - 1.0: exact match or directly answers the query
100
+ - 0.7-0.9: highly relevant, covers the main topic
101
+ - 0.4-0.6: partially relevant, related topic
102
+ - 0.1-0.3: tangentially related
103
+ - 0.0: completely irrelevant
104
+
105
+ Return ONLY a JSON array, no markdown or explanation.`;
106
+
107
+ const result = await this.#aiProvider.chatWithStructuredOutput(prompt, {
108
+ openChar: '[',
109
+ closeChar: ']',
110
+ temperature: 0.1,
111
+ maxTokens: 2048,
112
+ });
113
+
114
+ if (!Array.isArray(result)) {
115
+ throw new Error('AI returned non-array result');
116
+ }
117
+
118
+ // 构建 index → score 映射
119
+ const scoreMap = new Map();
120
+ for (const item of result) {
121
+ const idx = item.i ?? item.index;
122
+ const score = item.s ?? item.score ?? 0;
123
+ if (typeof idx === 'number' && idx >= 0 && idx < candidates.length) {
124
+ scoreMap.set(idx, Math.max(0, Math.min(1, score)));
125
+ }
126
+ }
127
+
128
+ // 合并分数,未评分的给 0
129
+ return candidates.map((c, i) => ({
130
+ ...c,
131
+ semanticScore: scoreMap.get(i) ?? 0,
132
+ })).sort((a, b) => b.semanticScore - a.semanticScore);
133
+ }
134
+
135
+ /**
136
+ * 从候选对象提取用于评分的文本表示
137
+ */
138
+ #extractDocText(candidate) {
139
+ const parts = [
140
+ candidate.title,
141
+ candidate.trigger,
142
+ candidate.description || candidate.summary,
143
+ candidate.code,
144
+ candidate.content,
145
+ ].filter(Boolean);
146
+ return parts.join(' | ');
147
+ }
148
+
149
+ /**
150
+ * Jaccard 降级 — 当 AI 不可用时使用
151
+ */
152
+ #jaccardFallback(query, candidates) {
153
+ const queryTokens = new Set(tokenize(query));
154
+ if (queryTokens.size === 0) return candidates;
155
+
156
+ return candidates.map(candidate => {
157
+ const text = this.#extractDocText(candidate);
158
+ const docTokens = new Set(tokenize(text));
159
+ const score = jaccardSimilarity(queryTokens, docTokens);
160
+ return { ...candidate, semanticScore: score };
161
+ }).sort((a, b) => b.semanticScore - a.semanticScore);
162
+ }
163
+ }
@@ -1,7 +1,7 @@
1
1
  /**
2
2
  * RetrievalFunnel — 4 层检索漏斗
3
3
  * Layer 1: Keyword Filter (倒排索引 fast recall)
4
- * Layer 2: Semantic Rerank (Jaccard 近似语义重排)
4
+ * Layer 2: Cross-Encoder Rerank (AI 驱动语义重排,降级 Jaccard)
5
5
  * Layer 3: Multi-Signal Ranking (6 信号加权)
6
6
  * Layer 4: Context-Aware Reranking (对话历史提升)
7
7
  */
@@ -9,10 +9,12 @@
9
9
  import { buildInvertedIndex, lookup, tokenize } from './InvertedIndex.js';
10
10
  import { MultiSignalRanker } from './MultiSignalRanker.js';
11
11
  import { CoarseRanker } from './CoarseRanker.js';
12
+ import { CrossEncoderReranker } from './CrossEncoderReranker.js';
12
13
 
13
14
  export class RetrievalFunnel {
14
15
  #multiSignalRanker;
15
16
  #coarseRanker;
17
+ #crossEncoder;
16
18
  #vectorStore;
17
19
  #aiProvider;
18
20
 
@@ -21,6 +23,10 @@ export class RetrievalFunnel {
21
23
  this.#coarseRanker = new CoarseRanker(options);
22
24
  this.#vectorStore = options.vectorStore || null;
23
25
  this.#aiProvider = options.aiProvider || null;
26
+ this.#crossEncoder = new CrossEncoderReranker({
27
+ aiProvider: this.#aiProvider,
28
+ logger: options.logger || console,
29
+ });
24
30
  }
25
31
 
26
32
  /**
@@ -67,43 +73,10 @@ export class RetrievalFunnel {
67
73
  }
68
74
 
69
75
  /**
70
- * Layer 2: 语义重排 — 优先使用向量相似度,降级到 Jaccard
76
+ * Layer 2: 语义重排 — Cross-Encoder AI 评分(降级 Jaccard
71
77
  */
72
78
  async #semanticRerank(query, candidates) {
73
- // 尝试使用向量相似度重排
74
- if (this.#vectorStore && this.#aiProvider) {
75
- try {
76
- const queryEmbedding = await this.#aiProvider.embed(query);
77
- if (queryEmbedding && queryEmbedding.length > 0) {
78
- const vectorResults = await this.#vectorStore.query(queryEmbedding, candidates.length);
79
- if (vectorResults && vectorResults.length > 0) {
80
- const scoreMap = new Map(vectorResults.map(vr => [vr.id, vr.similarity || vr.score || 0]));
81
- return candidates.map(candidate => {
82
- const semanticScore = scoreMap.get(candidate.id) || 0;
83
- return { ...candidate, semanticScore };
84
- }).sort((a, b) => b.semanticScore - a.semanticScore);
85
- }
86
- }
87
- } catch {
88
- // 向量搜索失败,降级到 Jaccard
89
- }
90
- }
91
-
92
- // Fallback: Jaccard 相似度
93
- const queryTokens = new Set(tokenize(query));
94
- if (queryTokens.size === 0) return candidates;
95
-
96
- return candidates.map(candidate => {
97
- const text = [candidate.title, candidate.trigger, candidate.content, candidate.code, candidate.description].filter(Boolean).join(' ');
98
- const docTokens = new Set(tokenize(text));
99
-
100
- // Jaccard 相似度
101
- const intersection = [...queryTokens].filter(t => docTokens.has(t)).length;
102
- const union = new Set([...queryTokens, ...docTokens]).size;
103
- const jaccard = union > 0 ? intersection / union : 0;
104
-
105
- return { ...candidate, semanticScore: jaccard };
106
- }).sort((a, b) => b.semanticScore - a.semanticScore);
79
+ return this.#crossEncoder.rerank(query, candidates);
107
80
  }
108
81
 
109
82
  /**