autosnippet 2.18.0 → 2.19.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dashboard/dist/assets/{icons-C6kshpB1.js → icons-C7FN32VL.js} +1 -1
- package/dashboard/dist/assets/index-D8dCXLzr.js +129 -0
- package/dashboard/dist/index.html +2 -2
- package/lib/external/ai/AiProvider.js +42 -11
- package/lib/external/ai/providers/ClaudeProvider.js +4 -2
- package/lib/external/ai/providers/GoogleGeminiProvider.js +66 -8
- package/lib/external/ai/providers/OpenAiProvider.js +48 -2
- package/lib/external/mcp/handlers/bootstrap.js +1 -2
- package/lib/http/HttpServer.js +4 -0
- package/lib/http/routes/candidates.js +405 -0
- package/lib/http/routes/search.js +113 -0
- package/lib/infrastructure/vector/Chunker.js +3 -8
- package/lib/infrastructure/vector/JsonVectorAdapter.js +2 -9
- package/lib/service/candidate/SimilarityService.js +7 -35
- package/lib/service/chat/ChatAgent.js +28 -686
- package/lib/service/chat/ContextWindow.js +87 -3
- package/lib/service/chat/ConversationStore.js +3 -4
- package/lib/service/chat/ProjectSemanticMemory.js +9 -14
- package/lib/service/chat/ReasoningLayer.js +10 -54
- package/lib/service/chat/ToolRegistry.js +0 -52
- package/lib/service/chat/tools.js +7 -6
- package/lib/service/cursor/TokenBudget.js +4 -21
- package/lib/service/search/CrossEncoderReranker.js +163 -0
- package/lib/service/search/RetrievalFunnel.js +9 -36
- package/lib/service/skills/SignalCollector.js +28 -28
- package/lib/shared/similarity.js +101 -0
- package/lib/shared/token-utils.js +46 -0
- package/package.json +1 -1
- package/dashboard/dist/assets/index-9byoG7kd.js +0 -129
|
@@ -205,8 +205,8 @@ export class SignalCollector {
|
|
|
205
205
|
this.#logger.debug('[SignalCollector] invoking ChatAgent for analysis...');
|
|
206
206
|
const { reply, toolCalls } = await this.#chatAgent.execute(prompt, { history: [], source: 'system' });
|
|
207
207
|
|
|
208
|
-
// 4. 解析 AI 响应
|
|
209
|
-
const parsed = this.#
|
|
208
|
+
// 4. 解析 AI 响应 — 使用 AiProvider.extractJSON 统一 structured output 解析
|
|
209
|
+
const parsed = this.#parseStructuredReply(reply);
|
|
210
210
|
const suggestions = parsed.suggestions || [];
|
|
211
211
|
|
|
212
212
|
// 5. 过滤已推送
|
|
@@ -444,47 +444,47 @@ ${JSON.stringify(signals.codeChanges, null, 2)}
|
|
|
444
444
|
}
|
|
445
445
|
|
|
446
446
|
// ═══════════════════════════════════════════════════════
|
|
447
|
-
// AI 响应解析
|
|
447
|
+
// AI 响应解析 — 统一使用 AiProvider.extractJSON (Structured Output)
|
|
448
448
|
// ═══════════════════════════════════════════════════════
|
|
449
449
|
|
|
450
|
-
|
|
451
|
-
|
|
450
|
+
/**
|
|
451
|
+
* 从 ChatAgent ReAct 回复中提取结构化 JSON
|
|
452
|
+
*
|
|
453
|
+
* 优先级链:
|
|
454
|
+
* 1. AiProvider.extractJSON (支持 markdown 清理、截断修复、trailing comma 等)
|
|
455
|
+
* 2. 最后一行 JSON 回退 (兼容 prompt 要求的 "最后一行输出 JSON" 格式)
|
|
456
|
+
*
|
|
457
|
+
* @param {string} reply — ChatAgent.execute() 的回复文本
|
|
458
|
+
* @returns {{ suggestions: Array, nextIntervalMinutes: number|null, summary: string }}
|
|
459
|
+
*/
|
|
460
|
+
#parseStructuredReply(reply) {
|
|
461
|
+
const defaultResult = { suggestions: [], nextIntervalMinutes: null, summary: '' };
|
|
462
|
+
if (!reply) return defaultResult;
|
|
452
463
|
|
|
453
464
|
try {
|
|
454
|
-
// 策略 1
|
|
465
|
+
// 策略 1: 通过 AiProvider.extractJSON 统一解析
|
|
466
|
+
const aiProvider = this.#chatAgent?.aiProvider;
|
|
467
|
+
if (aiProvider && typeof aiProvider.extractJSON === 'function') {
|
|
468
|
+
const obj = aiProvider.extractJSON(reply, '{', '}');
|
|
469
|
+
if (obj && Array.isArray(obj.suggestions)) return obj;
|
|
470
|
+
}
|
|
471
|
+
|
|
472
|
+
// 策略 2: 回退 — 从最后一行提取 JSON (兼容 prompt 指令)
|
|
455
473
|
const lines = reply.trim().split('\n');
|
|
456
|
-
for (let i = lines.length - 1; i >= 0; i--) {
|
|
474
|
+
for (let i = lines.length - 1; i >= Math.max(0, lines.length - 5); i--) {
|
|
457
475
|
const line = lines[i].trim();
|
|
458
476
|
if (line.startsWith('{') && line.endsWith('}')) {
|
|
459
477
|
try {
|
|
460
478
|
const obj = JSON.parse(line);
|
|
461
479
|
if (obj.suggestions) return obj;
|
|
462
|
-
} catch { /*
|
|
480
|
+
} catch { /* 继续 */ }
|
|
463
481
|
}
|
|
464
482
|
}
|
|
465
|
-
|
|
466
|
-
// 策略 2:尝试从 ```json ... ``` 块解析
|
|
467
|
-
const codeBlockMatch = reply.match(/```(?:json)?\s*\n([\s\S]*?)\n```/);
|
|
468
|
-
if (codeBlockMatch) {
|
|
469
|
-
try {
|
|
470
|
-
const obj = JSON.parse(codeBlockMatch[1].trim());
|
|
471
|
-
if (obj.suggestions) return obj;
|
|
472
|
-
} catch { /* fallthrough */ }
|
|
473
|
-
}
|
|
474
|
-
|
|
475
|
-
// 策略 3:尝试找到任何 JSON 对象
|
|
476
|
-
const jsonMatch = reply.match(/\{[\s\S]*"suggestions"\s*:\s*\[[\s\S]*\][\s\S]*\}/);
|
|
477
|
-
if (jsonMatch) {
|
|
478
|
-
try {
|
|
479
|
-
const obj = JSON.parse(jsonMatch[0]);
|
|
480
|
-
if (obj.suggestions) return obj;
|
|
481
|
-
} catch { /* fallthrough */ }
|
|
482
|
-
}
|
|
483
483
|
} catch {
|
|
484
|
-
this.#logger.warn('[SignalCollector] failed to parse
|
|
484
|
+
this.#logger.warn('[SignalCollector] failed to parse structured reply');
|
|
485
485
|
}
|
|
486
486
|
|
|
487
|
-
return
|
|
487
|
+
return defaultResult;
|
|
488
488
|
}
|
|
489
489
|
|
|
490
490
|
// ═══════════════════════════════════════════════════════
|
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* similarity — 统一相似度计算工具
|
|
3
|
+
*
|
|
4
|
+
* 项目内所有文本/向量相似度计算统一使用此模块:
|
|
5
|
+
* - jaccardSimilarity: 基于 token 集合的 Jaccard 系数
|
|
6
|
+
* - cosineSimilarity: 向量余弦相似度
|
|
7
|
+
* - textSimilarity: 高层文本相似度(Jaccard + 可选子串加分)
|
|
8
|
+
* - tokenizeForSimilarity: 通用 bigram 分词(面向相似度场景)
|
|
9
|
+
*
|
|
10
|
+
* @module shared/similarity
|
|
11
|
+
*/
|
|
12
|
+
|
|
13
|
+
/**
|
|
14
|
+
* 通用 bigram 分词 — 面向相似度计算
|
|
15
|
+
*
|
|
16
|
+
* 将文本小写化、去标点后,生成 word + character n-gram 集合。
|
|
17
|
+
* 同时支持 CJK(单字 + bigram)和英文(整词 + bigram)。
|
|
18
|
+
*
|
|
19
|
+
* @param {string} text — 原始文本
|
|
20
|
+
* @param {number} [n=2] — n-gram 长度
|
|
21
|
+
* @returns {Set<string>} token 集合
|
|
22
|
+
*/
|
|
23
|
+
export function tokenizeForSimilarity(text, n = 2) {
|
|
24
|
+
if (!text) return new Set();
|
|
25
|
+
const lower = text.toLowerCase().replace(/[^a-z0-9\u4e00-\u9fff\u3400-\u4dbf]+/g, ' ').trim();
|
|
26
|
+
const tokens = new Set();
|
|
27
|
+
const words = lower.split(/\s+/);
|
|
28
|
+
for (const w of words) {
|
|
29
|
+
if (w.length >= n) tokens.add(w);
|
|
30
|
+
for (let i = 0; i <= w.length - n; i++) {
|
|
31
|
+
tokens.add(w.slice(i, i + n));
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
return tokens;
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
/**
|
|
38
|
+
* Jaccard 相似度 — |A ∩ B| / |A ∪ B|
|
|
39
|
+
*
|
|
40
|
+
* @param {Set<string>} a — token 集合 A
|
|
41
|
+
* @param {Set<string>} b — token 集合 B
|
|
42
|
+
* @returns {number} 0.0 - 1.0
|
|
43
|
+
*/
|
|
44
|
+
export function jaccardSimilarity(a, b) {
|
|
45
|
+
if ((!a || a.size === 0) && (!b || b.size === 0)) return 0;
|
|
46
|
+
if (!a || a.size === 0 || !b || b.size === 0) return 0;
|
|
47
|
+
let intersection = 0;
|
|
48
|
+
const smaller = a.size <= b.size ? a : b;
|
|
49
|
+
const larger = a.size <= b.size ? b : a;
|
|
50
|
+
for (const t of smaller) {
|
|
51
|
+
if (larger.has(t)) intersection++;
|
|
52
|
+
}
|
|
53
|
+
const union = a.size + b.size - intersection;
|
|
54
|
+
return union > 0 ? intersection / union : 0;
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
/**
|
|
58
|
+
* 余弦相似度 — 向量点积 / (||a|| * ||b||)
|
|
59
|
+
*
|
|
60
|
+
* @param {number[]} a — 向量 A
|
|
61
|
+
* @param {number[]} b — 向量 B
|
|
62
|
+
* @returns {number} 0.0 - 1.0(输入均为正值时)
|
|
63
|
+
*/
|
|
64
|
+
export function cosineSimilarity(a, b) {
|
|
65
|
+
if (!a || !b || a.length !== b.length || a.length === 0) return 0;
|
|
66
|
+
let dotProduct = 0, normA = 0, normB = 0;
|
|
67
|
+
for (let i = 0; i < a.length; i++) {
|
|
68
|
+
dotProduct += a[i] * b[i];
|
|
69
|
+
normA += a[i] * a[i];
|
|
70
|
+
normB += b[i] * b[i];
|
|
71
|
+
}
|
|
72
|
+
const denom = Math.sqrt(normA) * Math.sqrt(normB);
|
|
73
|
+
return denom > 0 ? dotProduct / denom : 0;
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
/**
|
|
77
|
+
* 高层文本相似度 — Jaccard + 可选子串包含加分
|
|
78
|
+
*
|
|
79
|
+
* @param {string} textA — 文本 A
|
|
80
|
+
* @param {string} textB — 文本 B
|
|
81
|
+
* @param {object} [opts]
|
|
82
|
+
* @param {number} [opts.n=2] — n-gram 长度
|
|
83
|
+
* @param {boolean} [opts.substringBonus=false] — 是否启用子串包含加分 (+0.3)
|
|
84
|
+
* @returns {number} 0.0 - 1.0
|
|
85
|
+
*/
|
|
86
|
+
export function textSimilarity(textA, textB, opts = {}) {
|
|
87
|
+
const { n = 2, substringBonus = false } = opts;
|
|
88
|
+
const tokensA = tokenizeForSimilarity(textA, n);
|
|
89
|
+
const tokensB = tokenizeForSimilarity(textB, n);
|
|
90
|
+
let sim = jaccardSimilarity(tokensA, tokensB);
|
|
91
|
+
|
|
92
|
+
if (substringBonus) {
|
|
93
|
+
const lowerA = (textA || '').toLowerCase();
|
|
94
|
+
const lowerB = (textB || '').toLowerCase();
|
|
95
|
+
if (lowerA && lowerB && (lowerA.includes(lowerB) || lowerB.includes(lowerA))) {
|
|
96
|
+
sim = Math.min(1.0, sim + 0.3);
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
return sim;
|
|
101
|
+
}
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* token-utils — 统一 Token 估算工具
|
|
3
|
+
*
|
|
4
|
+
* 项目内所有 token 估算统一使用此模块,避免各处使用不同的字符/token 比率。
|
|
5
|
+
*
|
|
6
|
+
* 算法:CJK 字符按 ~2 chars/token,ASCII 字符按 ~4 chars/token。
|
|
7
|
+
* 这与主流 tokenizer (tiktoken / SentencePiece) 的行为一致:
|
|
8
|
+
* - GPT-4 tokenizer: 英文 ~4 chars/token, 中文 ~1.5 chars/token
|
|
9
|
+
* - Gemini (SentencePiece): 类似比率
|
|
10
|
+
* - 本实现取保守值, 宁多不少
|
|
11
|
+
*
|
|
12
|
+
* @module shared/token-utils
|
|
13
|
+
*/
|
|
14
|
+
|
|
15
|
+
/**
|
|
16
|
+
* 估算文本的 token 数量
|
|
17
|
+
*
|
|
18
|
+
* @param {string} text — 待估算的文本
|
|
19
|
+
* @returns {number} 估算 token 数(向上取整)
|
|
20
|
+
*/
|
|
21
|
+
export function estimateTokens(text) {
|
|
22
|
+
if (!text) return 0;
|
|
23
|
+
let tokens = 0;
|
|
24
|
+
for (const ch of text) {
|
|
25
|
+
// CJK Unified Ideographs + 扩展区 + 常见符号区
|
|
26
|
+
if (ch.charCodeAt(0) > 0x2e80) {
|
|
27
|
+
tokens += 0.5; // ~2 chars per token for CJK
|
|
28
|
+
} else {
|
|
29
|
+
tokens += 0.25; // ~4 chars per token for English/ASCII
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
return Math.ceil(tokens);
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
/**
|
|
36
|
+
* 快速估算 — 纯 ASCII 场景下的快速路径(不区分 CJK,统一按 3.5 chars/token)
|
|
37
|
+
*
|
|
38
|
+
* 适用于已知只含英文 / 混合语言但无需精确的场景(如 ContextWindow 内部压缩阈值)。
|
|
39
|
+
*
|
|
40
|
+
* @param {string} text
|
|
41
|
+
* @returns {number}
|
|
42
|
+
*/
|
|
43
|
+
export function estimateTokensFast(text) {
|
|
44
|
+
if (!text) return 0;
|
|
45
|
+
return Math.ceil(text.length / 3.5);
|
|
46
|
+
}
|