autosnippet 2.18.0 → 2.19.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (29) hide show
  1. package/dashboard/dist/assets/{icons-C6kshpB1.js → icons-C7FN32VL.js} +1 -1
  2. package/dashboard/dist/assets/index-D8dCXLzr.js +129 -0
  3. package/dashboard/dist/index.html +2 -2
  4. package/lib/external/ai/AiProvider.js +42 -11
  5. package/lib/external/ai/providers/ClaudeProvider.js +4 -2
  6. package/lib/external/ai/providers/GoogleGeminiProvider.js +66 -8
  7. package/lib/external/ai/providers/OpenAiProvider.js +48 -2
  8. package/lib/external/mcp/handlers/bootstrap.js +1 -2
  9. package/lib/http/HttpServer.js +4 -0
  10. package/lib/http/routes/candidates.js +405 -0
  11. package/lib/http/routes/search.js +113 -0
  12. package/lib/infrastructure/vector/Chunker.js +3 -8
  13. package/lib/infrastructure/vector/JsonVectorAdapter.js +2 -9
  14. package/lib/service/candidate/SimilarityService.js +7 -35
  15. package/lib/service/chat/ChatAgent.js +28 -686
  16. package/lib/service/chat/ContextWindow.js +87 -3
  17. package/lib/service/chat/ConversationStore.js +3 -4
  18. package/lib/service/chat/ProjectSemanticMemory.js +9 -14
  19. package/lib/service/chat/ReasoningLayer.js +10 -54
  20. package/lib/service/chat/ToolRegistry.js +0 -52
  21. package/lib/service/chat/tools.js +7 -6
  22. package/lib/service/cursor/TokenBudget.js +4 -21
  23. package/lib/service/search/CrossEncoderReranker.js +163 -0
  24. package/lib/service/search/RetrievalFunnel.js +9 -36
  25. package/lib/service/skills/SignalCollector.js +28 -28
  26. package/lib/shared/similarity.js +101 -0
  27. package/lib/shared/token-utils.js +46 -0
  28. package/package.json +1 -1
  29. package/dashboard/dist/assets/index-9byoG7kd.js +0 -129
@@ -205,8 +205,8 @@ export class SignalCollector {
205
205
  this.#logger.debug('[SignalCollector] invoking ChatAgent for analysis...');
206
206
  const { reply, toolCalls } = await this.#chatAgent.execute(prompt, { history: [], source: 'system' });
207
207
 
208
- // 4. 解析 AI 响应
209
- const parsed = this.#parseAiResponse(reply);
208
+ // 4. 解析 AI 响应 — 使用 AiProvider.extractJSON 统一 structured output 解析
209
+ const parsed = this.#parseStructuredReply(reply);
210
210
  const suggestions = parsed.suggestions || [];
211
211
 
212
212
  // 5. 过滤已推送
@@ -444,47 +444,47 @@ ${JSON.stringify(signals.codeChanges, null, 2)}
444
444
  }
445
445
 
446
446
  // ═══════════════════════════════════════════════════════
447
- // AI 响应解析
447
+ // AI 响应解析 — 统一使用 AiProvider.extractJSON (Structured Output)
448
448
  // ═══════════════════════════════════════════════════════
449
449
 
450
- #parseAiResponse(reply) {
451
- if (!reply) return { suggestions: [], nextIntervalMinutes: null, summary: '' };
450
+ /**
451
+ * ChatAgent ReAct 回复中提取结构化 JSON
452
+ *
453
+ * 优先级链:
454
+ * 1. AiProvider.extractJSON (支持 markdown 清理、截断修复、trailing comma 等)
455
+ * 2. 最后一行 JSON 回退 (兼容 prompt 要求的 "最后一行输出 JSON" 格式)
456
+ *
457
+ * @param {string} reply — ChatAgent.execute() 的回复文本
458
+ * @returns {{ suggestions: Array, nextIntervalMinutes: number|null, summary: string }}
459
+ */
460
+ #parseStructuredReply(reply) {
461
+ const defaultResult = { suggestions: [], nextIntervalMinutes: null, summary: '' };
462
+ if (!reply) return defaultResult;
452
463
 
453
464
  try {
454
- // 策略 1:尝试从最后一行解析 JSON
465
+ // 策略 1: 通过 AiProvider.extractJSON 统一解析
466
+ const aiProvider = this.#chatAgent?.aiProvider;
467
+ if (aiProvider && typeof aiProvider.extractJSON === 'function') {
468
+ const obj = aiProvider.extractJSON(reply, '{', '}');
469
+ if (obj && Array.isArray(obj.suggestions)) return obj;
470
+ }
471
+
472
+ // 策略 2: 回退 — 从最后一行提取 JSON (兼容 prompt 指令)
455
473
  const lines = reply.trim().split('\n');
456
- for (let i = lines.length - 1; i >= 0; i--) {
474
+ for (let i = lines.length - 1; i >= Math.max(0, lines.length - 5); i--) {
457
475
  const line = lines[i].trim();
458
476
  if (line.startsWith('{') && line.endsWith('}')) {
459
477
  try {
460
478
  const obj = JSON.parse(line);
461
479
  if (obj.suggestions) return obj;
462
- } catch { /* 继续尝试 */ }
480
+ } catch { /* 继续 */ }
463
481
  }
464
482
  }
465
-
466
- // 策略 2:尝试从 ```json ... ``` 块解析
467
- const codeBlockMatch = reply.match(/```(?:json)?\s*\n([\s\S]*?)\n```/);
468
- if (codeBlockMatch) {
469
- try {
470
- const obj = JSON.parse(codeBlockMatch[1].trim());
471
- if (obj.suggestions) return obj;
472
- } catch { /* fallthrough */ }
473
- }
474
-
475
- // 策略 3:尝试找到任何 JSON 对象
476
- const jsonMatch = reply.match(/\{[\s\S]*"suggestions"\s*:\s*\[[\s\S]*\][\s\S]*\}/);
477
- if (jsonMatch) {
478
- try {
479
- const obj = JSON.parse(jsonMatch[0]);
480
- if (obj.suggestions) return obj;
481
- } catch { /* fallthrough */ }
482
- }
483
483
  } catch {
484
- this.#logger.warn('[SignalCollector] failed to parse AI response');
484
+ this.#logger.warn('[SignalCollector] failed to parse structured reply');
485
485
  }
486
486
 
487
- return { suggestions: [], nextIntervalMinutes: null, summary: '' };
487
+ return defaultResult;
488
488
  }
489
489
 
490
490
  // ═══════════════════════════════════════════════════════
@@ -0,0 +1,101 @@
1
+ /**
2
+ * similarity — 统一相似度计算工具
3
+ *
4
+ * 项目内所有文本/向量相似度计算统一使用此模块:
5
+ * - jaccardSimilarity: 基于 token 集合的 Jaccard 系数
6
+ * - cosineSimilarity: 向量余弦相似度
7
+ * - textSimilarity: 高层文本相似度(Jaccard + 可选子串加分)
8
+ * - tokenizeForSimilarity: 通用 bigram 分词(面向相似度场景)
9
+ *
10
+ * @module shared/similarity
11
+ */
12
+
13
+ /**
14
+ * 通用 bigram 分词 — 面向相似度计算
15
+ *
16
+ * 将文本小写化、去标点后,生成 word + character n-gram 集合。
17
+ * 同时支持 CJK(单字 + bigram)和英文(整词 + bigram)。
18
+ *
19
+ * @param {string} text — 原始文本
20
+ * @param {number} [n=2] — n-gram 长度
21
+ * @returns {Set<string>} token 集合
22
+ */
23
+ export function tokenizeForSimilarity(text, n = 2) {
24
+ if (!text) return new Set();
25
+ const lower = text.toLowerCase().replace(/[^a-z0-9\u4e00-\u9fff\u3400-\u4dbf]+/g, ' ').trim();
26
+ const tokens = new Set();
27
+ const words = lower.split(/\s+/);
28
+ for (const w of words) {
29
+ if (w.length >= n) tokens.add(w);
30
+ for (let i = 0; i <= w.length - n; i++) {
31
+ tokens.add(w.slice(i, i + n));
32
+ }
33
+ }
34
+ return tokens;
35
+ }
36
+
37
+ /**
38
+ * Jaccard 相似度 — |A ∩ B| / |A ∪ B|
39
+ *
40
+ * @param {Set<string>} a — token 集合 A
41
+ * @param {Set<string>} b — token 集合 B
42
+ * @returns {number} 0.0 - 1.0
43
+ */
44
+ export function jaccardSimilarity(a, b) {
45
+ if ((!a || a.size === 0) && (!b || b.size === 0)) return 0;
46
+ if (!a || a.size === 0 || !b || b.size === 0) return 0;
47
+ let intersection = 0;
48
+ const smaller = a.size <= b.size ? a : b;
49
+ const larger = a.size <= b.size ? b : a;
50
+ for (const t of smaller) {
51
+ if (larger.has(t)) intersection++;
52
+ }
53
+ const union = a.size + b.size - intersection;
54
+ return union > 0 ? intersection / union : 0;
55
+ }
56
+
57
+ /**
58
+ * 余弦相似度 — 向量点积 / (||a|| * ||b||)
59
+ *
60
+ * @param {number[]} a — 向量 A
61
+ * @param {number[]} b — 向量 B
62
+ * @returns {number} 0.0 - 1.0(输入均为正值时)
63
+ */
64
+ export function cosineSimilarity(a, b) {
65
+ if (!a || !b || a.length !== b.length || a.length === 0) return 0;
66
+ let dotProduct = 0, normA = 0, normB = 0;
67
+ for (let i = 0; i < a.length; i++) {
68
+ dotProduct += a[i] * b[i];
69
+ normA += a[i] * a[i];
70
+ normB += b[i] * b[i];
71
+ }
72
+ const denom = Math.sqrt(normA) * Math.sqrt(normB);
73
+ return denom > 0 ? dotProduct / denom : 0;
74
+ }
75
+
76
+ /**
77
+ * 高层文本相似度 — Jaccard + 可选子串包含加分
78
+ *
79
+ * @param {string} textA — 文本 A
80
+ * @param {string} textB — 文本 B
81
+ * @param {object} [opts]
82
+ * @param {number} [opts.n=2] — n-gram 长度
83
+ * @param {boolean} [opts.substringBonus=false] — 是否启用子串包含加分 (+0.3)
84
+ * @returns {number} 0.0 - 1.0
85
+ */
86
+ export function textSimilarity(textA, textB, opts = {}) {
87
+ const { n = 2, substringBonus = false } = opts;
88
+ const tokensA = tokenizeForSimilarity(textA, n);
89
+ const tokensB = tokenizeForSimilarity(textB, n);
90
+ let sim = jaccardSimilarity(tokensA, tokensB);
91
+
92
+ if (substringBonus) {
93
+ const lowerA = (textA || '').toLowerCase();
94
+ const lowerB = (textB || '').toLowerCase();
95
+ if (lowerA && lowerB && (lowerA.includes(lowerB) || lowerB.includes(lowerA))) {
96
+ sim = Math.min(1.0, sim + 0.3);
97
+ }
98
+ }
99
+
100
+ return sim;
101
+ }
@@ -0,0 +1,46 @@
1
+ /**
2
+ * token-utils — 统一 Token 估算工具
3
+ *
4
+ * 项目内所有 token 估算统一使用此模块,避免各处使用不同的字符/token 比率。
5
+ *
6
+ * 算法:CJK 字符按 ~2 chars/token,ASCII 字符按 ~4 chars/token。
7
+ * 这与主流 tokenizer (tiktoken / SentencePiece) 的行为一致:
8
+ * - GPT-4 tokenizer: 英文 ~4 chars/token, 中文 ~1.5 chars/token
9
+ * - Gemini (SentencePiece): 类似比率
10
+ * - 本实现取保守值, 宁多不少
11
+ *
12
+ * @module shared/token-utils
13
+ */
14
+
15
+ /**
16
+ * 估算文本的 token 数量
17
+ *
18
+ * @param {string} text — 待估算的文本
19
+ * @returns {number} 估算 token 数(向上取整)
20
+ */
21
+ export function estimateTokens(text) {
22
+ if (!text) return 0;
23
+ let tokens = 0;
24
+ for (const ch of text) {
25
+ // CJK Unified Ideographs + 扩展区 + 常见符号区
26
+ if (ch.charCodeAt(0) > 0x2e80) {
27
+ tokens += 0.5; // ~2 chars per token for CJK
28
+ } else {
29
+ tokens += 0.25; // ~4 chars per token for English/ASCII
30
+ }
31
+ }
32
+ return Math.ceil(tokens);
33
+ }
34
+
35
+ /**
36
+ * 快速估算 — 纯 ASCII 场景下的快速路径(不区分 CJK,统一按 3.5 chars/token)
37
+ *
38
+ * 适用于已知只含英文 / 混合语言但无需精确的场景(如 ContextWindow 内部压缩阈值)。
39
+ *
40
+ * @param {string} text
41
+ * @returns {number}
42
+ */
43
+ export function estimateTokensFast(text) {
44
+ if (!text) return 0;
45
+ return Math.ceil(text.length / 3.5);
46
+ }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "autosnippet",
3
- "version": "2.18.0",
3
+ "version": "2.19.0",
4
4
  "description": "AutoSnippet - 连接开发者、AI 与项目知识库的工具",
5
5
  "type": "module",
6
6
  "main": "lib/bootstrap.js",