@lorrylurui/code-intelligence-mcp 2.0.8 → 2.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +21 -0
- package/dist/cli/eval-analyze-cli.js +314 -0
- package/dist/cli/eval-recommendation-cli.js +297 -0
- package/dist/config/queryRewrite.js +95 -0
- package/dist/config/tuning.js +4 -0
- package/dist/indexer/chunkText.js +36 -4
- package/dist/services/embeddingQueue.js +1 -0
- package/dist/services/recommendationService.js +116 -28
- package/dist/types/evalTrace.js +25 -0
- package/dist/workers/embeddingWorker.js +9 -1
- package/package.json +5 -3
package/README.md
CHANGED
|
@@ -45,3 +45,24 @@
|
|
|
45
45
|
|
|
46
46
|
MYSQL\*SYMBOLS_TABLE=frontend_collections_symbols
|
|
47
47
|
INDEX_GLOB=xxx/\*\*/\_.{js,jsx,ts,tsx}
|
|
48
|
+
|
|
49
|
+
## 4) 离线测评
|
|
50
|
+
|
|
51
|
+
```javascript
|
|
52
|
+
npx tsx src/cli/eval-recommendation-cli.ts
|
|
53
|
+
# 或指定 limit
|
|
54
|
+
npx tsx src/cli/eval-recommendation-cli.ts --limit 10
|
|
55
|
+
|
|
56
|
+
npm run eval
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
## 5)分析离线测评结果
|
|
60
|
+
|
|
61
|
+
```javascript
|
|
62
|
+
npm run analyze # 自动读最新结果文件
|
|
63
|
+
npm run analyze -- offline_eval/results/2026-05-27.jsonl # 指定文件
|
|
64
|
+
npm run analyze -- --baseline offline_eval/results/2026-05-26.jsonl # 与 baseline 对比 delta
|
|
65
|
+
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
或者直接引用eval-analysis.prompt.md 对最新结果分析
|
|
@@ -0,0 +1,314 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* eval-analyze-cli.ts — 离线评测结果分析工具
|
|
3
|
+
*
|
|
4
|
+
* 用法:
|
|
5
|
+
* npm run analyze # 自动读取 offline_eval/results/ 最新文件
|
|
6
|
+
* npm run analyze -- offline_eval/results/2026-05-27.jsonl
|
|
7
|
+
* npm run analyze -- --dir offline_eval/results --baseline offline_eval/results/2026-05-26.jsonl
|
|
8
|
+
*
|
|
9
|
+
* 输出:
|
|
10
|
+
* - 关键指标汇总(含与 baseline 对比 delta)
|
|
11
|
+
* - 按语言/符号类型分组 Recall@10
|
|
12
|
+
* - 失败归因分布
|
|
13
|
+
* - 主要发现(自动检测中英文差距、类型推断问题、误触等)
|
|
14
|
+
* - 建议优先级列表
|
|
15
|
+
*/
|
|
16
|
+
import * as fs from 'node:fs';
|
|
17
|
+
import * as path from 'node:path';
|
|
18
|
+
import * as readline from 'node:readline';
|
|
19
|
+
// ─── CLI 参数 ────────────────────────────────────────────────────────────────
|
|
20
|
+
const args = process.argv.slice(2);
|
|
21
|
+
function getArg(flag, fallback) {
|
|
22
|
+
const idx = args.indexOf(flag);
|
|
23
|
+
return idx !== -1 && args[idx + 1] ? args[idx + 1] : fallback;
|
|
24
|
+
}
|
|
25
|
+
const RESULTS_DIR = getArg('--dir', 'offline_eval/results');
|
|
26
|
+
const BASELINE_PATH = getArg('--baseline', '');
|
|
27
|
+
// ─── 分析阈值(可按实际结果微调) ────────────────────────────────────────────
|
|
28
|
+
const THRESHOLDS = {
|
|
29
|
+
/** 中英文 Recall@10 差距超过此值时触发"中文召回偏弱"发现 */
|
|
30
|
+
ZH_EN_RECALL_GAP: 0.1,
|
|
31
|
+
/** 函数类 Recall@10 低于此值时触发"函数类型推断"发现 */
|
|
32
|
+
FUNC_RECALL_LOW: 0.5,
|
|
33
|
+
/** no_semantic_recall 占比超过此值时触发"归因主导"发现 */
|
|
34
|
+
NO_SEMANTIC_DOMINANCE: 0.5,
|
|
35
|
+
/** no_semantic_recall 占比超过此值时输出归因修正建议 */
|
|
36
|
+
NO_SEMANTIC_REC_TRIGGER: 0.3,
|
|
37
|
+
/** quality_gate_rejected 条数超过此值时输出质量门控建议 */
|
|
38
|
+
QG_COUNT_MIN: 2,
|
|
39
|
+
/** ranked_below_topk 条数超过此值时输出排名调整建议 */
|
|
40
|
+
RANKED_COUNT_MIN: 1,
|
|
41
|
+
/** 终端横向进度条宽度(字符数) */
|
|
42
|
+
BAR_WIDTH: 20,
|
|
43
|
+
/** 中文零召回示例最多展示条数 */
|
|
44
|
+
ZH_ZERO_EXAMPLE_LIMIT: 4,
|
|
45
|
+
/** 误触示例最多展示条数 */
|
|
46
|
+
FP_EXAMPLE_LIMIT: 3,
|
|
47
|
+
};
|
|
48
|
+
function findLatestResultsFile() {
|
|
49
|
+
// 支持直接传路径(不带 flag)
|
|
50
|
+
const explicit = args.find((a) => a.endsWith('.jsonl') && !a.startsWith('--'));
|
|
51
|
+
if (explicit)
|
|
52
|
+
return explicit;
|
|
53
|
+
if (!fs.existsSync(RESULTS_DIR)) {
|
|
54
|
+
throw new Error(`Results directory not found: ${RESULTS_DIR}`);
|
|
55
|
+
}
|
|
56
|
+
const files = fs
|
|
57
|
+
.readdirSync(RESULTS_DIR)
|
|
58
|
+
.filter((f) => f.endsWith('.jsonl'))
|
|
59
|
+
.sort()
|
|
60
|
+
.reverse();
|
|
61
|
+
if (files.length === 0) {
|
|
62
|
+
throw new Error(`No .jsonl result files found in ${RESULTS_DIR}`);
|
|
63
|
+
}
|
|
64
|
+
return path.join(RESULTS_DIR, files[0]);
|
|
65
|
+
}
|
|
66
|
+
// ─── 文件加载 ─────────────────────────────────────────────────────────────────
|
|
67
|
+
async function loadResults(filePath) {
|
|
68
|
+
const results = [];
|
|
69
|
+
const rl = readline.createInterface({
|
|
70
|
+
input: fs.createReadStream(filePath),
|
|
71
|
+
crlfDelay: Infinity,
|
|
72
|
+
});
|
|
73
|
+
for await (const line of rl) {
|
|
74
|
+
const trimmed = line.trim();
|
|
75
|
+
if (trimmed)
|
|
76
|
+
results.push(JSON.parse(trimmed));
|
|
77
|
+
}
|
|
78
|
+
return results;
|
|
79
|
+
}
|
|
80
|
+
// ─── 工具函数 ─────────────────────────────────────────────────────────────────
|
|
81
|
+
/**
|
|
82
|
+
* 计算平均值,空数组时返回 0。
|
|
83
|
+
*/
|
|
84
|
+
function avg(nums) {
|
|
85
|
+
if (nums.length === 0)
|
|
86
|
+
return 0;
|
|
87
|
+
return nums.reduce((s, n) => s + n, 0) / nums.length;
|
|
88
|
+
}
|
|
89
|
+
function pct(n) {
|
|
90
|
+
return (n * 100).toFixed(1) + '%';
|
|
91
|
+
}
|
|
92
|
+
function delta(curr, base) {
|
|
93
|
+
if (base === undefined)
|
|
94
|
+
return '';
|
|
95
|
+
const d = curr - base;
|
|
96
|
+
const sign = d >= 0 ? '+' : '';
|
|
97
|
+
return ` (${sign}${pct(d)})`;
|
|
98
|
+
}
|
|
99
|
+
function recallByTag(results, tag) {
|
|
100
|
+
const tagged = results.filter((r) => !r.isNegativeSample && r.tags.includes(tag));
|
|
101
|
+
return {
|
|
102
|
+
recall: avg(tagged.map((r) => r.recallMain ?? 0)),
|
|
103
|
+
count: tagged.length,
|
|
104
|
+
};
|
|
105
|
+
}
|
|
106
|
+
/**
|
|
107
|
+
* 计算各项指标的平均值,返回一个汇总对象。
|
|
108
|
+
*/
|
|
109
|
+
function computeMetrics(positive, negative) {
|
|
110
|
+
return {
|
|
111
|
+
recallMain: avg(positive.map((r) => r.recallMain ?? 0)),
|
|
112
|
+
recall50: avg(positive.map((r) => r.recall50 ?? 0)),
|
|
113
|
+
firstHitScore: avg(positive.map((r) => r.firstHitScore ?? 0)),
|
|
114
|
+
rankingQuality: avg(positive.map((r) => r.rankingQuality ?? 0)),
|
|
115
|
+
coverage: positive.filter((r) => (r.recallMain ?? 0) > 0).length /
|
|
116
|
+
(positive.length || 1),
|
|
117
|
+
top1Acc: positive.filter((r) => r.top1Correct === true).length /
|
|
118
|
+
(positive.length || 1),
|
|
119
|
+
fpRate: negative.filter((r) => r.falsePositive).length /
|
|
120
|
+
(negative.length || 1),
|
|
121
|
+
};
|
|
122
|
+
}
|
|
123
|
+
// ─── 主分析逻辑 ───────────────────────────────────────────────────────────────
|
|
124
|
+
async function analyze() {
|
|
125
|
+
const filePath = findLatestResultsFile();
|
|
126
|
+
const fileName = path.basename(filePath);
|
|
127
|
+
const results = await loadResults(filePath);
|
|
128
|
+
const positive = results.filter((r) => !r.isNegativeSample);
|
|
129
|
+
const negative = results.filter((r) => r.isNegativeSample);
|
|
130
|
+
const metrics = computeMetrics(positive, negative);
|
|
131
|
+
// 如果不传 --baseline,baseMetrics 就是 undefined,delta() 函数返回空字符串,指标后面不显示涨跌。
|
|
132
|
+
let baseMetrics;
|
|
133
|
+
if (BASELINE_PATH && fs.existsSync(BASELINE_PATH)) {
|
|
134
|
+
const baseResults = await loadResults(BASELINE_PATH);
|
|
135
|
+
const basePos = baseResults.filter((r) => !r.isNegativeSample);
|
|
136
|
+
const baseNeg = baseResults.filter((r) => r.isNegativeSample);
|
|
137
|
+
baseMetrics = computeMetrics(basePos, baseNeg);
|
|
138
|
+
}
|
|
139
|
+
// 失败归因统计
|
|
140
|
+
const allFailures = positive.flatMap((r) => r.failures);
|
|
141
|
+
const failureCounts = {};
|
|
142
|
+
for (const f of allFailures) {
|
|
143
|
+
failureCounts[f.type] = (failureCounts[f.type] ?? 0) + 1;
|
|
144
|
+
}
|
|
145
|
+
const totalFailures = allFailures.length;
|
|
146
|
+
// 分组 Recall(按语言 + 类型标签)
|
|
147
|
+
const tagGroups = [
|
|
148
|
+
'en',
|
|
149
|
+
'zh',
|
|
150
|
+
'zh-en',
|
|
151
|
+
'component',
|
|
152
|
+
'hook',
|
|
153
|
+
'function',
|
|
154
|
+
'util',
|
|
155
|
+
'form',
|
|
156
|
+
];
|
|
157
|
+
const tagRecalls = new Map();
|
|
158
|
+
for (const tag of tagGroups) {
|
|
159
|
+
const stat = recallByTag(positive, tag);
|
|
160
|
+
if (stat.count > 0)
|
|
161
|
+
tagRecalls.set(tag, stat);
|
|
162
|
+
}
|
|
163
|
+
// 零召回 query
|
|
164
|
+
const zeroRecall = positive.filter((r) => r.recallMain === 0);
|
|
165
|
+
// 误触(false positive)示例
|
|
166
|
+
const fpExamples = negative.filter((r) => r.falsePositive);
|
|
167
|
+
// ─── 输出报告 ──────────────────────────────────────────────────────────────
|
|
168
|
+
const sep = '='.repeat(60);
|
|
169
|
+
const sub = '─'.repeat(60);
|
|
170
|
+
console.log('\n' + sep);
|
|
171
|
+
console.log(`数据来源:${fileName}`);
|
|
172
|
+
console.log(sep);
|
|
173
|
+
// ── 关键指标 ──
|
|
174
|
+
console.log('\n关键指标\n');
|
|
175
|
+
console.log(` 召回率(Recall@10): ${pct(metrics.recallMain).padStart(7)}${delta(metrics.recallMain, baseMetrics?.recallMain)}`);
|
|
176
|
+
console.log(` 首位命中分(MRR@10): ${pct(metrics.firstHitScore).padStart(7)}${delta(metrics.firstHitScore, baseMetrics?.firstHitScore)}`);
|
|
177
|
+
console.log(` 首条准确率(Top-1): ${pct(metrics.top1Acc).padStart(7)}${delta(metrics.top1Acc, baseMetrics?.top1Acc)}`);
|
|
178
|
+
console.log(` 误触率(FP): ${pct(metrics.fpRate).padStart(7)}${delta(metrics.fpRate, baseMetrics?.fpRate)}`);
|
|
179
|
+
console.log(`\n 总 query 数:${results.length}(正例 ${positive.length},负例 ${negative.length})`);
|
|
180
|
+
// ── 分组 Recall ──
|
|
181
|
+
console.log('\n' + sub);
|
|
182
|
+
console.log('按语言/符号类型 Recall@10\n');
|
|
183
|
+
for (const [tag, stat] of tagRecalls) {
|
|
184
|
+
const bar = '█'
|
|
185
|
+
.repeat(Math.round(stat.recall * THRESHOLDS.BAR_WIDTH))
|
|
186
|
+
.padEnd(THRESHOLDS.BAR_WIDTH);
|
|
187
|
+
console.log(` ${tag.padEnd(12)} ${bar} ${pct(stat.recall).padStart(7)} (${stat.count} queries)`);
|
|
188
|
+
}
|
|
189
|
+
// ── 失败归因 ──
|
|
190
|
+
console.log('\n' + sub);
|
|
191
|
+
console.log('失败归因分布\n');
|
|
192
|
+
const failureActionHints = {
|
|
193
|
+
no_semantic_recall: '→ 扩展 queryVariants / 中文同义词映射 / 调大 SYMBOL_TOP_K',
|
|
194
|
+
quality_gate_rejected: '→ 按 type 降低质量门控阈值',
|
|
195
|
+
ranked_below_topk: '→ 调整 RANK_WEIGHTS / LITERAL_MATCH_PRIORITY_BOOST',
|
|
196
|
+
reusability_filtered: '→ 检查 isReusableCandidate 路径规则',
|
|
197
|
+
structure_filtered: '→ 检查 category 过滤条件',
|
|
198
|
+
};
|
|
199
|
+
const sortedFailures = Object.entries(failureCounts).sort((a, b) => b[1] - a[1]);
|
|
200
|
+
for (const [type, count] of sortedFailures) {
|
|
201
|
+
const p = totalFailures > 0
|
|
202
|
+
? ((count / totalFailures) * 100).toFixed(1)
|
|
203
|
+
: '0.0';
|
|
204
|
+
const hint = failureActionHints[type] ?? '';
|
|
205
|
+
console.log(` ${type.padEnd(28)} ${String(count).padStart(3)} (${p}%) ${hint}`);
|
|
206
|
+
}
|
|
207
|
+
// ── 主要发现 ──
|
|
208
|
+
console.log('\n' + sub);
|
|
209
|
+
console.log('主要发现\n');
|
|
210
|
+
const findings = [];
|
|
211
|
+
// 发现1:中英文召回差距
|
|
212
|
+
const zhStat = tagRecalls.get('zh');
|
|
213
|
+
const enStat = tagRecalls.get('en');
|
|
214
|
+
if (zhStat &&
|
|
215
|
+
enStat &&
|
|
216
|
+
enStat.recall - zhStat.recall > THRESHOLDS.ZH_EN_RECALL_GAP) {
|
|
217
|
+
const zhZero = positive
|
|
218
|
+
.filter((r) => r.tags.includes('zh') && r.recallMain === 0)
|
|
219
|
+
.map((r) => `"${r.query}"`)
|
|
220
|
+
.slice(0, THRESHOLDS.ZH_ZERO_EXAMPLE_LIMIT);
|
|
221
|
+
findings.push(`中文 query 召回明显弱于英文\n` +
|
|
222
|
+
` 中文 Recall@10 = ${pct(zhStat.recall)},英文 = ${pct(enStat.recall)},差距 ${pct(enStat.recall - zhStat.recall)}\n` +
|
|
223
|
+
` 零召回中文 query 示例:${zhZero.join('、')}`);
|
|
224
|
+
}
|
|
225
|
+
// 发现2:函数类类型推断
|
|
226
|
+
const funcStat = tagRecalls.get('function');
|
|
227
|
+
if (funcStat && funcStat.recall < THRESHOLDS.FUNC_RECALL_LOW) {
|
|
228
|
+
findings.push(`函数类 query 召回偏低(Recall@10 = ${pct(funcStat.recall)})\n` +
|
|
229
|
+
` 可能原因:类型推断关键词缺少 function/formatter/validate,导致回退到 component 候选池`);
|
|
230
|
+
}
|
|
231
|
+
// 发现3:no_semantic_recall 主导
|
|
232
|
+
const noSemanticCount = failureCounts['no_semantic_recall'] ?? 0;
|
|
233
|
+
if (noSemanticCount / (totalFailures || 1) >
|
|
234
|
+
THRESHOLDS.NO_SEMANTIC_DOMINANCE) {
|
|
235
|
+
findings.push(`失败以 no_semantic_recall 为主(${noSemanticCount}/${totalFailures} 条,${((noSemanticCount / (totalFailures || 1)) * 100).toFixed(0)}%)\n` +
|
|
236
|
+
` 注意:该分类会掩盖 quality_gate_rejected 等更具体原因(当 DB 中无对应 id 时降级记录)`);
|
|
237
|
+
}
|
|
238
|
+
// 发现4:误触
|
|
239
|
+
if (fpExamples.length > 0) {
|
|
240
|
+
findings.push(`负例误触 ${fpExamples.length} 条(False Positive = ${pct(metrics.fpRate)})\n` +
|
|
241
|
+
` 示例:${fpExamples
|
|
242
|
+
.slice(0, THRESHOLDS.FP_EXAMPLE_LIMIT)
|
|
243
|
+
.map((r) => `"${r.query}"`)
|
|
244
|
+
.join('、')}`);
|
|
245
|
+
}
|
|
246
|
+
// 发现5:零召回 query 总数
|
|
247
|
+
if (zeroRecall.length > 0) {
|
|
248
|
+
const byTag = (tag) => zeroRecall.filter((r) => r.tags.includes(tag)).length;
|
|
249
|
+
findings.push(`${zeroRecall.length} 条正例 Recall@10 = 0\n` +
|
|
250
|
+
` 其中 zh:${byTag('zh')},en:${byTag('en')},component:${byTag('component')},hook:${byTag('hook')},function:${byTag('function')}`);
|
|
251
|
+
}
|
|
252
|
+
findings.forEach((f, i) => console.log(`${i + 1}. ${f}\n`));
|
|
253
|
+
// ── 建议优先级 ──
|
|
254
|
+
console.log(sub);
|
|
255
|
+
console.log('建议优先级(按预期收益排序)\n');
|
|
256
|
+
const recs = [];
|
|
257
|
+
if (zhStat &&
|
|
258
|
+
enStat &&
|
|
259
|
+
enStat.recall - zhStat.recall > THRESHOLDS.ZH_EN_RECALL_GAP) {
|
|
260
|
+
recs.push({
|
|
261
|
+
priority: '高',
|
|
262
|
+
action: '强化中文同义词扩展(导航栏→navigation bar、日期格式化→format date 等)',
|
|
263
|
+
reason: `中英召回差距 ${pct(enStat.recall - zhStat.recall)},影响 ${zhStat.count} 条 query`,
|
|
264
|
+
});
|
|
265
|
+
}
|
|
266
|
+
if (funcStat && funcStat.recall < THRESHOLDS.FUNC_RECALL_LOW) {
|
|
267
|
+
recs.push({
|
|
268
|
+
priority: '高',
|
|
269
|
+
action: '补类型推断关键词(category: function、query: formatter/validate/format)',
|
|
270
|
+
reason: `函数类 Recall@10 = ${pct(funcStat.recall)},类型误判导致错误候选池`,
|
|
271
|
+
});
|
|
272
|
+
}
|
|
273
|
+
if (noSemanticCount / (totalFailures || 1) >
|
|
274
|
+
THRESHOLDS.NO_SEMANTIC_REC_TRIGGER) {
|
|
275
|
+
recs.push({
|
|
276
|
+
priority: '中高',
|
|
277
|
+
action: '修正 eval 失败归因逻辑(name+path 查 DB id,避免 no_semantic_recall 兜底掩盖)',
|
|
278
|
+
reason: `${noSemanticCount} 条记为 no_semantic_recall,诊断精度不足`,
|
|
279
|
+
});
|
|
280
|
+
}
|
|
281
|
+
const qgCount = failureCounts['quality_gate_rejected'] ?? 0;
|
|
282
|
+
if (qgCount > THRESHOLDS.QG_COUNT_MIN) {
|
|
283
|
+
recs.push({
|
|
284
|
+
priority: '中高',
|
|
285
|
+
action: '按 symbol type 分阈值降低质量门控(函数/Hook 可比组件更宽松)',
|
|
286
|
+
reason: `${qgCount} 条被 quality gate 拦截`,
|
|
287
|
+
});
|
|
288
|
+
}
|
|
289
|
+
const rankedCount = failureCounts['ranked_below_topk'] ?? 0;
|
|
290
|
+
if (rankedCount > THRESHOLDS.RANKED_COUNT_MIN) {
|
|
291
|
+
recs.push({
|
|
292
|
+
priority: '中',
|
|
293
|
+
action: '加强 index-priority tie-break(同目录 index/menu/panel 命中时强制 index 优先)',
|
|
294
|
+
reason: `${rankedCount} 条有正确候选但排名未进 Top-K`,
|
|
295
|
+
});
|
|
296
|
+
}
|
|
297
|
+
if (recs.length === 0) {
|
|
298
|
+
recs.push({
|
|
299
|
+
priority: '中',
|
|
300
|
+
action: '持续扩充 query_set.jsonl 覆盖更多边界场景',
|
|
301
|
+
reason: '当前指标已较好,可增加测试覆盖度',
|
|
302
|
+
});
|
|
303
|
+
}
|
|
304
|
+
recs.forEach((r, i) => {
|
|
305
|
+
console.log(`${i + 1}. [${r.priority}] ${r.action}`);
|
|
306
|
+
console.log(` 原因:${r.reason}`);
|
|
307
|
+
console.log();
|
|
308
|
+
});
|
|
309
|
+
console.log(sep + '\n');
|
|
310
|
+
}
|
|
311
|
+
analyze().catch((e) => {
|
|
312
|
+
console.error(e instanceof Error ? e.message : e);
|
|
313
|
+
process.exit(1);
|
|
314
|
+
});
|
|
@@ -0,0 +1,297 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* eval-recommendation-cli.ts — 推荐质量离线评测 CLI
|
|
3
|
+
*
|
|
4
|
+
* 用法:
|
|
5
|
+
* npx tsx src/cli/eval-recommendation-cli.ts [--query-set offline_eval/query_set.jsonl] [--limit 10] [--output offline_eval/results/]
|
|
6
|
+
*
|
|
7
|
+
* 输出:
|
|
8
|
+
* - stdout: 评测摘要(Recall@10 / Recall@50 / MRR@10 / nDCG@10)
|
|
9
|
+
* - results/<date>.jsonl: 每条 query 的详细结果 + 失败分类
|
|
10
|
+
*/
|
|
11
|
+
import * as fs from 'node:fs';
|
|
12
|
+
import * as path from 'node:path';
|
|
13
|
+
import * as readline from 'node:readline';
|
|
14
|
+
import { RecommendationService } from '../services/recommendationService.js';
|
|
15
|
+
import { SymbolRepository } from '../repositories/symbolRepository.js';
|
|
16
|
+
import { classifySymbolFailure } from '../types/evalTrace.js';
|
|
17
|
+
// ─── CLI 参数 ────────────────────────────────────────────────────────────────
|
|
18
|
+
const args = process.argv.slice(2);
|
|
19
|
+
function getArg(flag, fallback) {
|
|
20
|
+
const idx = args.indexOf(flag);
|
|
21
|
+
return idx !== -1 && args[idx + 1] ? args[idx + 1] : fallback;
|
|
22
|
+
}
|
|
23
|
+
const QUERY_SET_PATH = getArg('--query-set', 'offline_eval/query_set.jsonl');
|
|
24
|
+
const OUTPUT_DIR = getArg('--output', 'offline_eval/results');
|
|
25
|
+
const TOP_K_MAIN = Number(getArg('--limit', '10')); // Recall@K_MAIN / MRR@K / nDCG@K
|
|
26
|
+
const RECALL_WIDE_K = 50; // 宽口径召回深度(用于 Recall@50),不是测试集数量
|
|
27
|
+
const REL_RELEVANT_MIN = 1; // rel >= 1 计入相关结果
|
|
28
|
+
const REL_PRIMARY = 2; // rel = 2 表示主答案/最高相关度
|
|
29
|
+
// ─── 指标计算 ─────────────────────────────────────────────────────────────────
|
|
30
|
+
/**
|
|
31
|
+
* 覆盖率 Recall@K:前 K 条结果中命中的相关条目占全部相关条目的比例。
|
|
32
|
+
* 衡量「应该找到的有多少被找到了」,与排名顺序无关。
|
|
33
|
+
* 负例(expected 全为 rel=0)视为完全命中,返回 1。
|
|
34
|
+
*/
|
|
35
|
+
function recallAtK(returnedNames, expected, k) {
|
|
36
|
+
const relevant = expected.filter((e) => e.rel >= REL_RELEVANT_MIN);
|
|
37
|
+
if (relevant.length === 0)
|
|
38
|
+
return 1;
|
|
39
|
+
const topK = returnedNames.slice(0, k);
|
|
40
|
+
const hits = relevant.filter((e) => topK.includes(e.name));
|
|
41
|
+
// 召回率@k = 真实召回的 / 所有相关的
|
|
42
|
+
return hits.length / relevant.length;
|
|
43
|
+
}
|
|
44
|
+
/**
|
|
45
|
+
* 倒数排名均值 MRR@K(Mean Reciprocal Rank):第一个相关结果出现在第 r 位时得分为 1/r。
|
|
46
|
+
* 衡量「最佳结果排多靠前」;未命中则返回 0。
|
|
47
|
+
*/
|
|
48
|
+
function mrrAtK(returnedNames, expected, k) {
|
|
49
|
+
const relevantNames = new Set(expected.filter((e) => e.rel >= REL_RELEVANT_MIN).map((e) => e.name));
|
|
50
|
+
const topK = returnedNames.slice(0, k);
|
|
51
|
+
for (let i = 0; i < topK.length; i++) {
|
|
52
|
+
// 有一个命中的 就返回对应的 MRR 分数,越靠前分数越高;如果都没命中,最后返回 0。
|
|
53
|
+
if (relevantNames.has(topK[i]))
|
|
54
|
+
return 1 / (i + 1);
|
|
55
|
+
}
|
|
56
|
+
return 0;
|
|
57
|
+
}
|
|
58
|
+
/**
|
|
59
|
+
* 归一化折损累积增益 nDCG@K(Normalized Discounted Cumulative Gain):综合考虑相关度分级(rel 0/1/2)
|
|
60
|
+
* 与排名位置的加权得分,再除以理想排序下的最大得分做归一化。
|
|
61
|
+
* 越靠前、相关度越高的结果得分越高;完全理想排序时返回 1。
|
|
62
|
+
*/
|
|
63
|
+
function ndcgAtK(returnedNames, expected, k) {
|
|
64
|
+
const relMap = new Map(expected.map((e) => [e.name, e.rel]));
|
|
65
|
+
const topK = returnedNames.slice(0, k);
|
|
66
|
+
const dcg = topK.reduce((sum, name, idx) => {
|
|
67
|
+
const rel = relMap.get(name) ?? 0;
|
|
68
|
+
return sum + (Math.pow(2, rel) - 1) / Math.log2(idx + 2);
|
|
69
|
+
}, 0);
|
|
70
|
+
const idealRels = expected
|
|
71
|
+
.map((e) => e.rel)
|
|
72
|
+
.sort((a, b) => b - a)
|
|
73
|
+
.slice(0, k);
|
|
74
|
+
const idcg = idealRels.reduce((sum, rel, idx) => {
|
|
75
|
+
return sum + (Math.pow(2, rel) - 1) / Math.log2(idx + 2);
|
|
76
|
+
}, 0);
|
|
77
|
+
return idcg === 0 ? 1 : dcg / idcg;
|
|
78
|
+
}
|
|
79
|
+
/**
|
|
80
|
+
* 返回失败阶段原因数组(无 ID 时按名称降级处理)
|
|
81
|
+
*/
|
|
82
|
+
function classifyFailuresFromTrace(expected, returnedNames, evalTrace, idByName) {
|
|
83
|
+
const relevant = expected.filter((e) => e.rel >= REL_RELEVANT_MIN);
|
|
84
|
+
const failures = [];
|
|
85
|
+
for (const exp of relevant) {
|
|
86
|
+
if (returnedNames.includes(exp.name))
|
|
87
|
+
continue;
|
|
88
|
+
const id = idByName.get(exp.name);
|
|
89
|
+
if (evalTrace !== undefined && id !== undefined) {
|
|
90
|
+
const failType = classifySymbolFailure(id, evalTrace);
|
|
91
|
+
if (failType !== 'found') {
|
|
92
|
+
failures.push({
|
|
93
|
+
name: exp.name,
|
|
94
|
+
expectedPath: exp.path,
|
|
95
|
+
type: failType,
|
|
96
|
+
});
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
else {
|
|
100
|
+
// DB 中无此 symbol,降级为 no_semantic_recall
|
|
101
|
+
failures.push({
|
|
102
|
+
name: exp.name,
|
|
103
|
+
expectedPath: exp.path,
|
|
104
|
+
type: 'no_semantic_recall',
|
|
105
|
+
});
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
return failures;
|
|
109
|
+
}
|
|
110
|
+
// ─── ID 解析(从返回结果中建立 name→id 映射) ────────────────────────────────
|
|
111
|
+
function buildIdMapFromResult(recommended, alternatives) {
|
|
112
|
+
const map = new Map();
|
|
113
|
+
if (recommended)
|
|
114
|
+
map.set(recommended.name, recommended.id);
|
|
115
|
+
alternatives.forEach((a) => map.set(a.name, a.id));
|
|
116
|
+
return map;
|
|
117
|
+
}
|
|
118
|
+
// ─── 汇总统计 ─────────────────────────────────────────────────────────────────
|
|
119
|
+
function avg(nums) {
|
|
120
|
+
if (nums.length === 0)
|
|
121
|
+
return 0;
|
|
122
|
+
return nums.reduce((s, n) => s + n, 0) / nums.length;
|
|
123
|
+
}
|
|
124
|
+
function formatPct(n) {
|
|
125
|
+
return (n * 100).toFixed(1) + '%';
|
|
126
|
+
}
|
|
127
|
+
function printSummary(results, kMain, baseline) {
|
|
128
|
+
const positive = results.filter((r) => !r.isNegativeSample);
|
|
129
|
+
const negative = results.filter((r) => r.isNegativeSample);
|
|
130
|
+
const recallMain = avg(positive.map((r) => r.recallMain ?? 0));
|
|
131
|
+
const recall50 = avg(positive.map((r) => r.recall50 ?? 0));
|
|
132
|
+
const firstHitScore = avg(positive.map((r) => r.firstHitScore ?? 0));
|
|
133
|
+
// const rankingQuality = avg(positive.map((r) => r.rankingQuality ?? 0));
|
|
134
|
+
const coverage = positive.filter((r) => (r.recallMain ?? 0) > 0).length /
|
|
135
|
+
(positive.length || 1);
|
|
136
|
+
const top1Acc = positive.filter((r) => r.top1Correct === true).length /
|
|
137
|
+
(positive.length || 1);
|
|
138
|
+
const fpRate = negative.filter((r) => r.falsePositive).length / (negative.length || 1);
|
|
139
|
+
const diff = (metric, val) => {
|
|
140
|
+
if (!baseline || !(metric in baseline))
|
|
141
|
+
return '';
|
|
142
|
+
const delta = val - baseline[metric];
|
|
143
|
+
return delta >= 0
|
|
144
|
+
? ` (+${formatPct(delta)})`
|
|
145
|
+
: ` (${formatPct(delta)})`;
|
|
146
|
+
};
|
|
147
|
+
console.log('\n' + '='.repeat(60));
|
|
148
|
+
console.log(`=== Eval Report ${new Date().toISOString().slice(0, 10)} ===`);
|
|
149
|
+
console.log('='.repeat(60));
|
|
150
|
+
console.log(`Queries total: ${results.length} (positive: ${positive.length}, negative: ${negative.length})`);
|
|
151
|
+
console.log('');
|
|
152
|
+
console.log(`召回率(Recall@${kMain}): ${formatPct(recallMain)}${diff('recallMain', recallMain)}`);
|
|
153
|
+
console.log(`首位命中分(MRR@${kMain}): ${formatPct(firstHitScore)}${diff('firstHitScore', firstHitScore)}`);
|
|
154
|
+
console.log(`首条准确率(Top-1): ${formatPct(top1Acc)}${diff('top1Acc', top1Acc)}`);
|
|
155
|
+
console.log(`误触率(FP): ${formatPct(fpRate)} (负例被错误推荐)`);
|
|
156
|
+
console.log('');
|
|
157
|
+
// ── Failure breakdown ──
|
|
158
|
+
const allFailures = positive.flatMap((r) => r.failures);
|
|
159
|
+
const failureCounts = {
|
|
160
|
+
no_semantic_recall: 0,
|
|
161
|
+
reusability_filtered: 0,
|
|
162
|
+
structure_filtered: 0,
|
|
163
|
+
ranked_below_topk: 0,
|
|
164
|
+
quality_gate_rejected: 0,
|
|
165
|
+
found: 0,
|
|
166
|
+
};
|
|
167
|
+
for (const f of allFailures)
|
|
168
|
+
failureCounts[f.type]++;
|
|
169
|
+
const totalExpected = positive.reduce((s, r) => s + r.failures.length + (r.recallMain === 1 ? 1 : 0), 0);
|
|
170
|
+
console.log('--- Failure Breakdown ---');
|
|
171
|
+
const failureActionHints = {
|
|
172
|
+
no_semantic_recall: '→ 调大 SYMBOL_TOP_K / 增加 queryVariants 数量',
|
|
173
|
+
reusability_filtered: '→ 检查 isReusableCandidate 路径规则是否误杀',
|
|
174
|
+
structure_filtered: '→ 检查 category 过滤条件',
|
|
175
|
+
ranked_below_topk: '→ 调整 RANK_WEIGHTS / LITERAL_MATCH_PRIORITY_BOOST',
|
|
176
|
+
quality_gate_rejected: '→ 调低 MIN_RECOMMENDATION_SCORE 阈值',
|
|
177
|
+
found: '',
|
|
178
|
+
};
|
|
179
|
+
for (const [type, count] of Object.entries(failureCounts)) {
|
|
180
|
+
if (type === 'found')
|
|
181
|
+
continue;
|
|
182
|
+
const pct = totalExpected > 0
|
|
183
|
+
? ((count / totalExpected) * 100).toFixed(1)
|
|
184
|
+
: '0.0';
|
|
185
|
+
const hint = failureActionHints[type];
|
|
186
|
+
console.log(` ${type.padEnd(26)} ${String(count).padStart(3)} (${pct}%) ${hint}`);
|
|
187
|
+
}
|
|
188
|
+
console.log('='.repeat(60) + '\n');
|
|
189
|
+
}
|
|
190
|
+
// ─── 主流程 ───────────────────────────────────────────────────────────────────
|
|
191
|
+
async function loadQuerySet(filePath) {
|
|
192
|
+
const cases = [];
|
|
193
|
+
const rl = readline.createInterface({
|
|
194
|
+
input: fs.createReadStream(filePath),
|
|
195
|
+
crlfDelay: Infinity,
|
|
196
|
+
});
|
|
197
|
+
for await (const line of rl) {
|
|
198
|
+
const trimmed = line.trim();
|
|
199
|
+
if (!trimmed)
|
|
200
|
+
continue;
|
|
201
|
+
if (trimmed.startsWith('#') || trimmed.startsWith('//'))
|
|
202
|
+
continue;
|
|
203
|
+
cases.push(JSON.parse(trimmed));
|
|
204
|
+
}
|
|
205
|
+
return cases;
|
|
206
|
+
}
|
|
207
|
+
async function runEval() {
|
|
208
|
+
console.log(`Loading query set: ${QUERY_SET_PATH}`);
|
|
209
|
+
const cases = await loadQuerySet(QUERY_SET_PATH);
|
|
210
|
+
console.log(`Loaded ${cases.length} queries. Running eval with limit=${TOP_K_MAIN}/${RECALL_WIDE_K}...\n`);
|
|
211
|
+
const repository = new SymbolRepository();
|
|
212
|
+
const service = new RecommendationService(repository);
|
|
213
|
+
const results = [];
|
|
214
|
+
for (const queryCase of cases) {
|
|
215
|
+
const isNegative = queryCase.expected.length === 0;
|
|
216
|
+
const wideResult = await service.recommendComponent({
|
|
217
|
+
...queryCase.input,
|
|
218
|
+
limit: RECALL_WIDE_K,
|
|
219
|
+
evalMode: true,
|
|
220
|
+
});
|
|
221
|
+
const wideNames = [
|
|
222
|
+
...(wideResult.recommended ? [wideResult.recommended.name] : []),
|
|
223
|
+
...wideResult.alternatives.map((a) => a.name),
|
|
224
|
+
];
|
|
225
|
+
const mainNames = wideNames.slice(0, TOP_K_MAIN);
|
|
226
|
+
const allReturned = [
|
|
227
|
+
...(wideResult.recommended ? [wideResult.recommended] : []),
|
|
228
|
+
...wideResult.alternatives,
|
|
229
|
+
];
|
|
230
|
+
const idByName = buildIdMapFromResult(wideResult.recommended, wideResult.alternatives);
|
|
231
|
+
const recallMain = isNegative
|
|
232
|
+
? null
|
|
233
|
+
: recallAtK(mainNames, queryCase.expected, TOP_K_MAIN);
|
|
234
|
+
const recall50 = isNegative
|
|
235
|
+
? null
|
|
236
|
+
: recallAtK(wideNames, queryCase.expected, RECALL_WIDE_K);
|
|
237
|
+
const firstHitRank = isNegative
|
|
238
|
+
? null
|
|
239
|
+
: mrrAtK(mainNames, queryCase.expected, TOP_K_MAIN);
|
|
240
|
+
// const rankingQuality = isNegative
|
|
241
|
+
// ? null
|
|
242
|
+
// : ndcgAtK(mainNames, queryCase.expected, TOP_K_MAIN);
|
|
243
|
+
const top1Correct = isNegative
|
|
244
|
+
? null
|
|
245
|
+
: queryCase.expected.some((e) => e.rel === REL_PRIMARY &&
|
|
246
|
+
wideResult.recommended?.name === e.name);
|
|
247
|
+
const failures = isNegative
|
|
248
|
+
? []
|
|
249
|
+
: classifyFailuresFromTrace(queryCase.expected, wideNames, wideResult.evalTrace, idByName);
|
|
250
|
+
const falsePositive = isNegative && allReturned.length > 0;
|
|
251
|
+
const qr = {
|
|
252
|
+
queryId: queryCase.id,
|
|
253
|
+
query: queryCase.input.query,
|
|
254
|
+
tags: queryCase.tags,
|
|
255
|
+
recallMain,
|
|
256
|
+
recall50,
|
|
257
|
+
firstHitScore: firstHitRank,
|
|
258
|
+
// rankingQuality,
|
|
259
|
+
top1Correct,
|
|
260
|
+
returnedNames: mainNames,
|
|
261
|
+
failures,
|
|
262
|
+
isNegativeSample: isNegative,
|
|
263
|
+
falsePositive,
|
|
264
|
+
};
|
|
265
|
+
results.push(qr);
|
|
266
|
+
const status = isNegative
|
|
267
|
+
? falsePositive
|
|
268
|
+
? '✗ False Positive)' // 负例,但系统返回了结果 → 误触发(False Positive)
|
|
269
|
+
: '✓ True Negative' // 负例,系统正确返回空 → 真负例(True Negative)
|
|
270
|
+
: recallMain === 1
|
|
271
|
+
? `✓ R@${TOP_K_MAIN}=1.0 完全召回`
|
|
272
|
+
: `✗ R@${TOP_K_MAIN}=${(recallMain ?? 0).toFixed(2)} 不完全召回`;
|
|
273
|
+
console.log(` [${queryCase.id}] ${queryCase.input.query.slice(0, 40).padEnd(40)} ${status}`);
|
|
274
|
+
}
|
|
275
|
+
printSummary(results, TOP_K_MAIN, null);
|
|
276
|
+
if (OUTPUT_DIR) {
|
|
277
|
+
fs.mkdirSync(OUTPUT_DIR, { recursive: true });
|
|
278
|
+
const dateStr = new Date().toISOString().slice(0, 10);
|
|
279
|
+
const outPath = path.join(OUTPUT_DIR, `${dateStr}.jsonl`);
|
|
280
|
+
const lines = results.map((r) => JSON.stringify(r)).join('\n');
|
|
281
|
+
fs.writeFileSync(outPath, lines + '\n', 'utf8');
|
|
282
|
+
console.log(`Report written to: ${outPath}`);
|
|
283
|
+
}
|
|
284
|
+
// 如果有正例查询完全没有召回任何相关结果,视为严重问题,输出警告并退出非 0 状态码以示 CI 失败。
|
|
285
|
+
const zeroRecall = results.filter((r) => !r.isNegativeSample && r.recallMain === 0);
|
|
286
|
+
if (zeroRecall.length > 0) {
|
|
287
|
+
console.log(`\nWARN: ${zeroRecall.length} positive queries have Recall@${TOP_K_MAIN}=0:`);
|
|
288
|
+
for (const r of zeroRecall) {
|
|
289
|
+
console.log(` [${r.queryId}] ${r.query}`);
|
|
290
|
+
}
|
|
291
|
+
process.exit(1);
|
|
292
|
+
}
|
|
293
|
+
}
|
|
294
|
+
runEval().catch((err) => {
|
|
295
|
+
console.error('Eval failed:', err);
|
|
296
|
+
process.exit(1);
|
|
297
|
+
});
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* queryRewrite.ts — query 预处理配置:噪音词清洗 + 同义词/别名扩展。
|
|
3
|
+
*
|
|
4
|
+
* 用于 buildQueryVariants,通过消除口语化干扰词和补充同义词变体,
|
|
5
|
+
* 提升语义检索的 recall(尤其是中英混用、别名查询场景)。
|
|
6
|
+
*/
|
|
7
|
+
// ─── 噪音词清洗正则(从 recommendationService 迁移) ────────────────────────
|
|
8
|
+
/**
|
|
9
|
+
* 依次对原始 query 做替换,去掉无实际语义的口语词。
|
|
10
|
+
* 注意:每个 pattern 带 /g 标志,替换后产生多余空格由调用方合并。
|
|
11
|
+
*/
|
|
12
|
+
export const NOISE_PATTERNS = [
|
|
13
|
+
/^帮我找(找)?(一个|一下)?/g,
|
|
14
|
+
/^有没有(现成的)?/g,
|
|
15
|
+
/^请推荐(一个|一下)?/g,
|
|
16
|
+
/可复用/g,
|
|
17
|
+
/现成的/g,
|
|
18
|
+
/封装好的/g,
|
|
19
|
+
/(组件|函数|hook|工具|util)(实现)?/gi,
|
|
20
|
+
];
|
|
21
|
+
// ─── 同义词/别名字典 ─────────────────────────────────────────────────────────
|
|
22
|
+
/**
|
|
23
|
+
* 每个 key 为一组同义词的首选英文词根,value 为同一概念的其他表达形式(中文、缩写、别名)。
|
|
24
|
+
*
|
|
25
|
+
* 匹配规则:任意一项(key 或 value 中的词)出现在 query 里即视为命中,
|
|
26
|
+
* 然后取组内第一个当前 query 中未出现的词作为替代词,生成同义扩展变体。
|
|
27
|
+
*
|
|
28
|
+
* 新增规则:key 使用最短、最通用的英文词根;中文词放在 value 数组最前。
|
|
29
|
+
*/
|
|
30
|
+
export const SYNONYM_MAP = {
|
|
31
|
+
// 表单输入
|
|
32
|
+
input: ['输入框', '输入', 'textfield', 'textinput'],
|
|
33
|
+
textarea: ['文本域', '多行输入', 'multiline'],
|
|
34
|
+
select: ['选择器', '下拉框', '下拉', 'dropdown'],
|
|
35
|
+
checkbox: ['复选框', '勾选'],
|
|
36
|
+
radio: ['单选框', '单选'],
|
|
37
|
+
// 弹层
|
|
38
|
+
dialog: ['弹窗', '弹框', '对话框', 'modal', 'popup'],
|
|
39
|
+
tooltip: ['提示', '气泡提示', '悬浮提示', 'popover'],
|
|
40
|
+
drawer: ['抽屉', '侧边栏', 'sidebar'],
|
|
41
|
+
// 反馈
|
|
42
|
+
loading: ['加载', '加载中', 'spinner'],
|
|
43
|
+
skeleton: ['骨架屏', '占位图', 'placeholder'],
|
|
44
|
+
notification: ['通知', '消息', '提醒', 'toast'],
|
|
45
|
+
alert: ['警告', '警示', '提示框'],
|
|
46
|
+
// 数据展示
|
|
47
|
+
table: ['表格'],
|
|
48
|
+
list: ['列表'],
|
|
49
|
+
pagination: ['分页', '翻页', 'pager'],
|
|
50
|
+
tabs: ['标签页', '选项卡', 'tab'],
|
|
51
|
+
badge: ['徽标', '角标', '标记'],
|
|
52
|
+
tag: ['标签', 'chip'],
|
|
53
|
+
// 导航
|
|
54
|
+
navigation: ['导航', 'nav'],
|
|
55
|
+
menu: ['菜单'],
|
|
56
|
+
breadcrumb: ['面包屑'],
|
|
57
|
+
// 媒体/布局
|
|
58
|
+
carousel: ['轮播', '走马灯', 'slider', 'swiper'],
|
|
59
|
+
upload: ['上传', '文件上传', 'file upload'],
|
|
60
|
+
image: ['图片', '图像', 'img'],
|
|
61
|
+
// 常用 Hook
|
|
62
|
+
debounce: ['防抖', '去抖', 'usedebounce'],
|
|
63
|
+
throttle: ['节流', 'usethrottle'],
|
|
64
|
+
// 搜索
|
|
65
|
+
search: ['搜索', '查询', 'filter'],
|
|
66
|
+
// 按钮
|
|
67
|
+
button: ['按钮', 'btn'],
|
|
68
|
+
};
|
|
69
|
+
// ─── 同义词扩展函数 ──────────────────────────────────────────────────────────
|
|
70
|
+
/**
|
|
71
|
+
* 在 query 中查找 SYNONYM_MAP 里命中的词,替换成同组内一个当前未出现的词,
|
|
72
|
+
* 生成同义扩展变体。若未命中任何同义词则返回 null。
|
|
73
|
+
*
|
|
74
|
+
* @example
|
|
75
|
+
* buildSynonymVariant('弹窗 onChange') // => 'dialog onChange'
|
|
76
|
+
* buildSynonymVariant('input onChange') // => '输入框 onChange'
|
|
77
|
+
*/
|
|
78
|
+
export function buildSynonymVariant(query) {
|
|
79
|
+
const lower = query.toLowerCase();
|
|
80
|
+
for (const [canonical, aliases] of Object.entries(SYNONYM_MAP)) {
|
|
81
|
+
const allTerms = [canonical, ...aliases];
|
|
82
|
+
const matchedTerm = allTerms.find((t) => lower.includes(t.toLowerCase()));
|
|
83
|
+
if (!matchedTerm)
|
|
84
|
+
continue;
|
|
85
|
+
const substitute = allTerms.find((t) => !lower.includes(t.toLowerCase()) && t !== matchedTerm);
|
|
86
|
+
if (!substitute)
|
|
87
|
+
continue;
|
|
88
|
+
// 大小写不敏感替换
|
|
89
|
+
const replaced = query.replace(new RegExp(matchedTerm.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'), 'gi'), substitute);
|
|
90
|
+
const trimmed = replaced.replace(/\s+/g, ' ').trim();
|
|
91
|
+
if (trimmed && trimmed !== query)
|
|
92
|
+
return trimmed;
|
|
93
|
+
}
|
|
94
|
+
return null;
|
|
95
|
+
}
|
package/dist/config/tuning.js
CHANGED
|
@@ -100,6 +100,10 @@ export const REQUIRED_FIELD_FALLBACK_MIN_SCORE = 0.4;
|
|
|
100
100
|
export const LITERAL_MATCH_PRIORITY_BOOST = 0.22;
|
|
101
101
|
/** 路径为 demo/example 风格时,对优先级分数扣减的值 */
|
|
102
102
|
export const DEMO_PATH_PRIORITY_PENALTY = 0.18;
|
|
103
|
+
/** 文件名为 index.js/ts/tsx/jsx 时对优先级分数的加成(优先推荐组件目录入口文件) */
|
|
104
|
+
export const INDEX_FILE_PRIORITY_BOOST = 0.18;
|
|
105
|
+
/** 同目录中存在 index 文件时,对其他非入口子文件的优先级扣减(避免 menu.js / panel.js 等内部实现抢占推荐位) */
|
|
106
|
+
export const SAME_DIR_INDEX_EXISTS_PENALTY = 0.25;
|
|
103
107
|
// ─── 搜索工具结果过滤 (tools/searchSymbols.ts) ───────────────────────────────
|
|
104
108
|
/** 最终返回结果所需的最低综合评分 */
|
|
105
109
|
export const SEARCH_SCORE_THRESHOLD = 0.45;
|
|
@@ -117,19 +117,51 @@ export function splitTextIntoChunks(content, options = {}) {
|
|
|
117
117
|
? sliceWithOverlap(block.text, maxChars, overlapChars)
|
|
118
118
|
: [block.text];
|
|
119
119
|
for (const part of oversizedParts) {
|
|
120
|
-
// 3.
|
|
121
|
-
|
|
120
|
+
// ── 3. 滑动窗口 + overlap ─────────────────────────────────────────
|
|
121
|
+
// 目标:把 parts 依次合并到 currentBlocks,直到"该收了"再收敛成一个 chunk。
|
|
122
|
+
// 收敛后 finalizeChunk 会把末尾 overlapChars 个字符带入下一块,减少边界信息丢失。
|
|
123
|
+
//
|
|
124
|
+
// 执行示例(targetChars=20, maxChars=30, overlapChars=5):
|
|
125
|
+
//
|
|
126
|
+
// part="Hello world"(11) currentLength=0 → 直接 push
|
|
127
|
+
// currentBlocks=["Hello world"] currentLength=11
|
|
128
|
+
//
|
|
129
|
+
// part="Foo bar baz"(11) additionLength=11+2=13 currentLength+13=24 ≤ 30,未达目标 → 直接 push
|
|
130
|
+
// currentBlocks=["Hello world","Foo bar baz"] currentLength=24
|
|
131
|
+
//
|
|
132
|
+
// part="A long sentence"(15) additionLength=15+2=17 currentLength+17=41 > 30 → wouldOverflowMax=true
|
|
133
|
+
// → finalizeChunk: chunks=["Hello world\n\nFoo bar baz"]
|
|
134
|
+
// overlap="r baz"(末5字符) currentBlocks=["r baz"] currentLength=5
|
|
135
|
+
// → push "A long sentence"
|
|
136
|
+
// currentBlocks=["r baz","A long sentence"] currentLength=5+2+15=22
|
|
137
|
+
//
|
|
138
|
+
// 最终 finalizeChunk(overlap=0): chunks 追加 "r baz\n\nA long sentence"
|
|
139
|
+
// ─────────────────────────────────────────────────────────────────
|
|
140
|
+
// SEP=2 对应 blocks.join('\n\n') 中每条边界的 '\n\n' 长度;
|
|
141
|
+
// 首个 block 无分隔符,所以 currentLength===0 时不加。
|
|
142
|
+
const SEP = 2;
|
|
143
|
+
const additionLength = currentLength === 0 ? part.length : SEP + part.length;
|
|
144
|
+
// 两种情况需要先收敛当前 chunk:
|
|
145
|
+
// 1. wouldOverflowMax:加入本 part 后超出硬上限,被动截断;
|
|
146
|
+
// 2. reachedTarget :当前已达目标大小,主动分块,保持粒度均匀。
|
|
122
147
|
const wouldOverflowMax = currentLength > 0 && currentLength + additionLength > maxChars;
|
|
123
148
|
const reachedTarget = currentLength >= targetChars;
|
|
124
|
-
// 已接近目标大小或即将超出上限时,先收敛当前 chunk,再开始下一块。
|
|
125
149
|
if (wouldOverflowMax || reachedTarget) {
|
|
150
|
+
// finalizeChunk 写入 chunks,并把末尾 overlap 文本返回作为新 currentBlocks 起点。
|
|
126
151
|
currentBlocks = finalizeChunk(chunks, currentBlocks, overlapChars);
|
|
152
|
+
// overlap 文本长度不固定,必须重算(不能增量推导)。
|
|
127
153
|
currentLength = currentBlocks.join('\n\n').length;
|
|
128
154
|
}
|
|
129
155
|
currentBlocks.push(part);
|
|
130
|
-
|
|
156
|
+
// flush 后 currentBlocks 可能含 overlap(length ≥ 1),也可能为空(length === 0);
|
|
157
|
+
// 增量计算避免每次重新 join 整个数组。
|
|
158
|
+
currentLength =
|
|
159
|
+
currentLength === 0
|
|
160
|
+
? part.length
|
|
161
|
+
: currentLength + SEP + part.length;
|
|
131
162
|
}
|
|
132
163
|
}
|
|
164
|
+
// 收尾兜底,确保剩余内容不丢失
|
|
133
165
|
finalizeChunk(chunks, currentBlocks, 0);
|
|
134
166
|
return chunks;
|
|
135
167
|
}
|
|
@@ -28,7 +28,8 @@
|
|
|
28
28
|
* ──────────────────────────────────────────────────────────────────────────────
|
|
29
29
|
*/
|
|
30
30
|
import { rankSemanticHits, rankSymbols } from './ranking.js';
|
|
31
|
-
import { DEMO_PATH_PRIORITY_PENALTY, LITERAL_MATCH_PRIORITY_BOOST, MIN_LITERAL_MATCH_SCORE, MIN_RECOMMENDATION_SCORE, MIN_SEMANTIC_TEXT_MATCH_SCORE, REQUIRED_FIELD_FALLBACK_MIN_SCORE, } from '../config/tuning.js';
|
|
31
|
+
import { DEMO_PATH_PRIORITY_PENALTY, INDEX_FILE_PRIORITY_BOOST, LITERAL_MATCH_PRIORITY_BOOST, MIN_LITERAL_MATCH_SCORE, MIN_RECOMMENDATION_SCORE, MIN_SEMANTIC_TEXT_MATCH_SCORE, REQUIRED_FIELD_FALLBACK_MIN_SCORE, SAME_DIR_INDEX_EXISTS_PENALTY, } from '../config/tuning.js';
|
|
32
|
+
import { NOISE_PATTERNS, buildSynonymVariant } from '../config/queryRewrite.js';
|
|
32
33
|
/** 跳过原因标识 */
|
|
33
34
|
const SKIPPED_REASON = {
|
|
34
35
|
NO_COMBINED: 'no_combined',
|
|
@@ -50,8 +51,8 @@ const RECOMMENDATION_MESSAGE = {
|
|
|
50
51
|
};
|
|
51
52
|
/** 详情补查的 top-k 条数 */
|
|
52
53
|
const ENRICH_TOP_K = 3;
|
|
53
|
-
/**
|
|
54
|
-
const MAX_QUERY_VARIANTS =
|
|
54
|
+
/** 最多取查询变体数量(原始 + 清洗 + 同义词扩展) */
|
|
55
|
+
const MAX_QUERY_VARIANTS = 3;
|
|
55
56
|
/** 结构/语义搜索 limit 倍数 */
|
|
56
57
|
const STRUCTURE_LIMIT_MULTIPLIER = 4;
|
|
57
58
|
/** 结构/语义搜索 limit 最小值 */
|
|
@@ -61,31 +62,33 @@ const DEFAULT_KEYWORD_SIMILARITY = 0.55;
|
|
|
61
62
|
function uniqueStrings(values = []) {
|
|
62
63
|
return [...new Set(values.map((value) => value.trim()).filter(Boolean))];
|
|
63
64
|
}
|
|
64
|
-
const QUERY_REWRITE_PATTERNS = [
|
|
65
|
-
/^帮我找(找)?(一个|一下)?/g,
|
|
66
|
-
/^有没有(现成的)?/g,
|
|
67
|
-
/^请推荐(一个|一下)?/g,
|
|
68
|
-
/可复用/g,
|
|
69
|
-
/现成的/g,
|
|
70
|
-
/封装好的/g,
|
|
71
|
-
/(组件|函数|hook|工具|util)(实现)?/gi,
|
|
72
|
-
];
|
|
73
65
|
/**
|
|
74
|
-
*
|
|
66
|
+
* 对原始查询进行清洗和变体生成:
|
|
67
|
+
* 1. 噪音词清洗(去掉口语化前缀、无意义词)
|
|
68
|
+
* 2. 同义词扩展(中英互转、别名替换)
|
|
69
|
+
* 生成最多 MAX_QUERY_VARIANTS 个去重变体,按从精确到宽泛排序。
|
|
75
70
|
*/
|
|
76
71
|
function buildQueryVariants(rawQuery) {
|
|
77
72
|
const base = rawQuery.trim();
|
|
78
73
|
if (!base)
|
|
79
74
|
return [];
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
rewritten = rewritten.replace(/\s+/g, ' ').trim();
|
|
85
|
-
if (!rewritten || rewritten === base) {
|
|
86
|
-
return [base];
|
|
75
|
+
// Step 1: 噪音词清洗
|
|
76
|
+
let cleaned = base;
|
|
77
|
+
for (const pattern of NOISE_PATTERNS) {
|
|
78
|
+
cleaned = cleaned.replace(pattern, ' ');
|
|
87
79
|
}
|
|
88
|
-
|
|
80
|
+
cleaned = cleaned.replace(/\s+/g, ' ').trim();
|
|
81
|
+
if (!cleaned)
|
|
82
|
+
cleaned = base;
|
|
83
|
+
// Step 2: 同义词扩展(基于清洗后的 query,减少噪音干扰匹配)
|
|
84
|
+
const synonymVariant = buildSynonymVariant(cleaned);
|
|
85
|
+
// 候选:原始 → 清洗后(若不同)→ 同义词扩展(若不同)
|
|
86
|
+
const candidates = [
|
|
87
|
+
base,
|
|
88
|
+
cleaned,
|
|
89
|
+
...(synonymVariant ? [synonymVariant] : []),
|
|
90
|
+
];
|
|
91
|
+
return uniqueStrings(candidates);
|
|
89
92
|
}
|
|
90
93
|
function normalizeToken(value) {
|
|
91
94
|
return value.trim().toLowerCase();
|
|
@@ -205,6 +208,14 @@ function isDemoLikePath(path, strict = false) {
|
|
|
205
208
|
: DEMO_LIKE_PATH_SEGMENTS_SOFT;
|
|
206
209
|
return segments.some((segment) => normalizedPath.includes(segment));
|
|
207
210
|
}
|
|
211
|
+
/**
|
|
212
|
+
* 判断文件是否为组件目录入口文件(index.js / index.ts / index.tsx / index.jsx)。
|
|
213
|
+
* 入口文件是组件的公共 API,应优先于内部子文件被推荐。
|
|
214
|
+
*/
|
|
215
|
+
function isIndexFile(filePath) {
|
|
216
|
+
const basename = filePath.split('/').pop()?.toLowerCase() ?? '';
|
|
217
|
+
return /^index\.(js|ts|tsx|jsx)$/.test(basename);
|
|
218
|
+
}
|
|
208
219
|
/**
|
|
209
220
|
* 判断是否为可复用候选,过滤掉明显的测试/示例代码。虽然有可能误伤一些真实组件,但优先保证推荐结果的实用性和专业度。
|
|
210
221
|
* @param symbol 要判断的代码符号
|
|
@@ -295,6 +306,10 @@ function computeRecommendationPriority(item, query) {
|
|
|
295
306
|
score += LITERAL_MATCH_PRIORITY_BOOST;
|
|
296
307
|
notes.push('名称或文件名命中查询');
|
|
297
308
|
}
|
|
309
|
+
if (isIndexFile(path)) {
|
|
310
|
+
score += INDEX_FILE_PRIORITY_BOOST;
|
|
311
|
+
notes.push('组件目录入口文件优先');
|
|
312
|
+
}
|
|
298
313
|
if (isDemoLikePath(path)) {
|
|
299
314
|
score -= DEMO_PATH_PRIORITY_PENALTY;
|
|
300
315
|
notes.push('示例工程路径降权');
|
|
@@ -306,6 +321,49 @@ function computeRecommendationPriority(item, query) {
|
|
|
306
321
|
: item.reason.summary,
|
|
307
322
|
};
|
|
308
323
|
}
|
|
324
|
+
/**
|
|
325
|
+
* 同目录 index 文件降权:当结果集中某目录已有 index 文件时,对该目录内其他子文件扭扣分,
|
|
326
|
+
* 解决 index.js 因内容稀疏(仅有 re-export)导致 embedding 分低而被内部子文件抑制的问题。
|
|
327
|
+
*/
|
|
328
|
+
function applyDirectoryIndexPenalty(entries) {
|
|
329
|
+
// 找出结果集中哪些目录已有 index 文件
|
|
330
|
+
const dirsWithIndex = new Set();
|
|
331
|
+
for (const entry of entries) {
|
|
332
|
+
const p = entry.item.symbol.path;
|
|
333
|
+
if (isIndexFile(p)) {
|
|
334
|
+
const dir = p.includes('/')
|
|
335
|
+
? p.substring(0, p.lastIndexOf('/'))
|
|
336
|
+
: '';
|
|
337
|
+
dirsWithIndex.add(dir);
|
|
338
|
+
}
|
|
339
|
+
}
|
|
340
|
+
if (dirsWithIndex.size === 0)
|
|
341
|
+
return entries;
|
|
342
|
+
// 对同目录中的非入口文件手动扣分
|
|
343
|
+
return entries.map((entry) => {
|
|
344
|
+
const p = entry.item.symbol.path;
|
|
345
|
+
if (isIndexFile(p))
|
|
346
|
+
return entry;
|
|
347
|
+
const dir = p.includes('/') ? p.substring(0, p.lastIndexOf('/')) : '';
|
|
348
|
+
if (!dirsWithIndex.has(dir))
|
|
349
|
+
return entry;
|
|
350
|
+
const newScore = Number(Math.max(0, entry.adjustedScore - SAME_DIR_INDEX_EXISTS_PENALTY).toFixed(3));
|
|
351
|
+
return {
|
|
352
|
+
...entry,
|
|
353
|
+
adjustedScore: newScore,
|
|
354
|
+
adjustedReason: `${entry.adjustedReason} + 同目录入口文件已命中,内部子文件降权`,
|
|
355
|
+
};
|
|
356
|
+
});
|
|
357
|
+
}
|
|
358
|
+
function accToEvalTrace(acc) {
|
|
359
|
+
return {
|
|
360
|
+
semanticIds: [...acc.semanticIds],
|
|
361
|
+
reusableIds: [...acc.reusableIds],
|
|
362
|
+
combinedIds: [...acc.combinedIds],
|
|
363
|
+
qualifiedIds: [...acc.qualifiedIds],
|
|
364
|
+
returnedIds: [...acc.returnedIds],
|
|
365
|
+
};
|
|
366
|
+
}
|
|
309
367
|
export class RecommendationService {
|
|
310
368
|
repository;
|
|
311
369
|
constructor(repository) {
|
|
@@ -400,6 +458,15 @@ export class RecommendationService {
|
|
|
400
458
|
let selectedQuery = null;
|
|
401
459
|
let fallbackReason = null;
|
|
402
460
|
const attempts = [];
|
|
461
|
+
const evalAcc = input.evalMode
|
|
462
|
+
? {
|
|
463
|
+
semanticIds: new Set(),
|
|
464
|
+
reusableIds: new Set(),
|
|
465
|
+
combinedIds: new Set(),
|
|
466
|
+
qualifiedIds: new Set(),
|
|
467
|
+
returnedIds: new Set(),
|
|
468
|
+
}
|
|
469
|
+
: undefined;
|
|
403
470
|
this.logSearchTypes(searchTypes);
|
|
404
471
|
for (const queryVariant of queryVariants) {
|
|
405
472
|
const { attempt, combined, searchResults, gathered } = await this.tryQueryVariant({
|
|
@@ -411,6 +478,7 @@ export class RecommendationService {
|
|
|
411
478
|
structureFields,
|
|
412
479
|
requiredProps,
|
|
413
480
|
requiredHooks,
|
|
481
|
+
evalAcc,
|
|
414
482
|
});
|
|
415
483
|
queriedBy = gathered.queriedBy;
|
|
416
484
|
if (!fallbackReason && gathered.fallbackReason) {
|
|
@@ -433,6 +501,7 @@ export class RecommendationService {
|
|
|
433
501
|
requiredHooks,
|
|
434
502
|
attempt,
|
|
435
503
|
limit,
|
|
504
|
+
evalAcc,
|
|
436
505
|
});
|
|
437
506
|
lastRankedCandidates = candidates;
|
|
438
507
|
if (candidates.length > 0) {
|
|
@@ -454,6 +523,7 @@ export class RecommendationService {
|
|
|
454
523
|
attempts,
|
|
455
524
|
selectedQuery,
|
|
456
525
|
fallbackReason,
|
|
526
|
+
evalTrace: evalAcc ? accToEvalTrace(evalAcc) : undefined,
|
|
457
527
|
});
|
|
458
528
|
}
|
|
459
529
|
this.logAttemptCheckpoint('attempt.no_candidate_after_rank', attempt);
|
|
@@ -474,6 +544,7 @@ export class RecommendationService {
|
|
|
474
544
|
attempts,
|
|
475
545
|
selectedQuery,
|
|
476
546
|
fallbackReason,
|
|
547
|
+
evalTrace: evalAcc ? accToEvalTrace(evalAcc) : undefined,
|
|
477
548
|
});
|
|
478
549
|
}
|
|
479
550
|
logStart(input) {
|
|
@@ -505,9 +576,12 @@ export class RecommendationService {
|
|
|
505
576
|
console.error('[code-intelligence-mcp] recommendComponent.preprocess queryVariants=%s requiredProps=%s requiredHooks=%s structureFields=%s searchTypes=%s preferSemantic=%s limit=%s', JSON.stringify(queryVariants), JSON.stringify(requiredProps), JSON.stringify(requiredHooks), JSON.stringify(structureFields), JSON.stringify(searchTypes), String(preferSemantic), String(limit));
|
|
506
577
|
return res;
|
|
507
578
|
}
|
|
508
|
-
async tryQueryVariant({ queryVariant, input, searchTypes, preferSemantic, limit, structureFields, requiredProps, requiredHooks, }) {
|
|
579
|
+
async tryQueryVariant({ queryVariant, input, searchTypes, preferSemantic, limit, structureFields, requiredProps, requiredHooks, evalAcc, }) {
|
|
509
580
|
const gathered = await this.gatherSearchResults(queryVariant, searchTypes, preferSemantic, limit);
|
|
510
581
|
const searchResults = gathered.searchResults;
|
|
582
|
+
if (evalAcc) {
|
|
583
|
+
searchResults.forEach((r) => evalAcc.semanticIds.add(r.symbol.id));
|
|
584
|
+
}
|
|
511
585
|
const attempt = {
|
|
512
586
|
query: queryVariant,
|
|
513
587
|
queriedBy: gathered.queriedBy,
|
|
@@ -542,10 +616,14 @@ export class RecommendationService {
|
|
|
542
616
|
if (reusableCandidates.length > 0) {
|
|
543
617
|
combined = reusableCandidates;
|
|
544
618
|
}
|
|
619
|
+
if (evalAcc) {
|
|
620
|
+
reusableCandidates.forEach((s) => evalAcc.reusableIds.add(s.id));
|
|
621
|
+
combined.forEach((s) => evalAcc.combinedIds.add(s.id));
|
|
622
|
+
}
|
|
545
623
|
attempt.combinedCount = combined.length;
|
|
546
624
|
return { attempt, combined, searchResults, gathered };
|
|
547
625
|
}
|
|
548
|
-
async rankAndEnrichCandidates({ combined, searchResults, queryVariant, queriedBy, requiredProps, requiredHooks, attempt, limit, }) {
|
|
626
|
+
async rankAndEnrichCandidates({ combined, searchResults, queryVariant, queriedBy, requiredProps, requiredHooks, attempt, limit, evalAcc, }) {
|
|
549
627
|
const ranked = queriedBy === QUERIED_BY.SEMANTIC
|
|
550
628
|
? rankSemanticHits(combined.map((symbol) => ({
|
|
551
629
|
symbol,
|
|
@@ -564,14 +642,17 @@ export class RecommendationService {
|
|
|
564
642
|
};
|
|
565
643
|
});
|
|
566
644
|
priorityScored.sort((a, b) => b.adjustedScore - a.adjustedScore);
|
|
645
|
+
// 同目录 index 文件降权:对同目录非入口子文件扭扣,确保 index.js > menu.js / panel.js
|
|
646
|
+
const reranked = applyDirectoryIndexPenalty(priorityScored);
|
|
647
|
+
reranked.sort((a, b) => b.adjustedScore - a.adjustedScore);
|
|
567
648
|
// 对优先级预排序后的 Top-K 做详情补查(getByName 补全完整 meta)
|
|
568
|
-
const enriched = await this.enrichTopCandidatesWithDetail(
|
|
649
|
+
const enriched = await this.enrichTopCandidatesWithDetail(reranked.map((e) => e.item));
|
|
569
650
|
attempt.detailEnrichedCount = enriched.enrichedCount;
|
|
570
|
-
// 将补查结果回填到
|
|
651
|
+
// 将补查结果回填到 reranked,保持优先级排序
|
|
571
652
|
const enrichedPriorityScored = enriched.ranked.map((item, idx) => ({
|
|
572
653
|
item,
|
|
573
|
-
adjustedScore:
|
|
574
|
-
adjustedReason:
|
|
654
|
+
adjustedScore: reranked[idx]?.adjustedScore ?? item.score,
|
|
655
|
+
adjustedReason: reranked[idx]?.adjustedReason ?? item.reason.summary,
|
|
575
656
|
}));
|
|
576
657
|
// 质量门控:score 阈值 + requiredProps/Hooks 命中校验(依赖完整 meta,必须在补查之后)
|
|
577
658
|
const qualifiedRanked = enrichedPriorityScored.filter((entry) => isStrongEnoughRecommendation(entry.item, queryVariant, queriedBy, requiredProps, requiredHooks));
|
|
@@ -579,8 +660,14 @@ export class RecommendationService {
|
|
|
579
660
|
if (qualifiedRanked.length === 0) {
|
|
580
661
|
attempt.skippedReason = SKIPPED_REASON.NO_QUALIFIED;
|
|
581
662
|
}
|
|
663
|
+
if (evalAcc) {
|
|
664
|
+
qualifiedRanked.forEach((e) => evalAcc.qualifiedIds.add(e.item.symbol.id));
|
|
665
|
+
}
|
|
582
666
|
// 已按优先级排序,直接构建候选结果
|
|
583
667
|
const candidates = qualifiedRanked.map((entry) => toCandidate(entry.item.symbol, entry.adjustedScore, entry.adjustedReason, requiredProps, requiredHooks));
|
|
668
|
+
if (evalAcc) {
|
|
669
|
+
candidates.forEach((c) => evalAcc.returnedIds.add(c.id));
|
|
670
|
+
}
|
|
584
671
|
console.error('[code-intelligence-mcp] recommendComponent.rank query=%s queriedBy=%s enriched=%s qualified=%s candidates=%s', queryVariant, queriedBy, String(enrichedPriorityScored.length), String(qualifiedRanked.length), String(candidates.length));
|
|
585
672
|
return candidates;
|
|
586
673
|
}
|
|
@@ -590,7 +677,7 @@ export class RecommendationService {
|
|
|
590
677
|
logAttemptsTrace(stage, payload) {
|
|
591
678
|
console.error('[code-intelligence-mcp] %s selectedQuery=%s queriedBy=%s attempts=%s fallbackReason=%s', stage, payload.selectedQuery ?? 'none', payload.queriedBy, JSON.stringify(payload.attempts), payload.fallbackReason ?? 'none');
|
|
592
679
|
}
|
|
593
|
-
buildResult({ recommended, alternatives, queriedBy, requiredProps, requiredHooks, attempts, selectedQuery, fallbackReason, }) {
|
|
680
|
+
buildResult({ recommended, alternatives, queriedBy, requiredProps, requiredHooks, attempts, selectedQuery, fallbackReason, evalTrace, }) {
|
|
594
681
|
return {
|
|
595
682
|
recommended,
|
|
596
683
|
alternatives,
|
|
@@ -608,6 +695,7 @@ export class RecommendationService {
|
|
|
608
695
|
retryUsed: attempts.length > 1,
|
|
609
696
|
fallbackReason,
|
|
610
697
|
},
|
|
698
|
+
evalTrace,
|
|
611
699
|
};
|
|
612
700
|
}
|
|
613
701
|
}
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* evalTrace.ts — 推荐主链各阶段符号 ID 追踪,仅在 evalMode=true 时填充。
|
|
3
|
+
*
|
|
4
|
+
* 用于 eval CLI 做 per-symbol 失败分类(误杀分析),
|
|
5
|
+
* 判断 expected symbol 在哪个阶段丢失:
|
|
6
|
+
* semanticIds → reusableIds → combinedIds → qualifiedIds → returnedIds
|
|
7
|
+
*/
|
|
8
|
+
/**
|
|
9
|
+
* 根据 EvalTrace 对单个 expected symbol 进行失败分类。
|
|
10
|
+
* @param symbolId DB 中的 symbol.id(需提前通过名称解析)
|
|
11
|
+
* @param trace 该次推荐调用的 EvalTrace
|
|
12
|
+
*/
|
|
13
|
+
export function classifySymbolFailure(symbolId, trace) {
|
|
14
|
+
if (trace.returnedIds.includes(symbolId))
|
|
15
|
+
return 'found';
|
|
16
|
+
if (!trace.semanticIds.includes(symbolId))
|
|
17
|
+
return 'no_semantic_recall';
|
|
18
|
+
if (!trace.reusableIds.includes(symbolId))
|
|
19
|
+
return 'reusability_filtered';
|
|
20
|
+
if (!trace.combinedIds.includes(symbolId))
|
|
21
|
+
return 'structure_filtered';
|
|
22
|
+
if (!trace.qualifiedIds.includes(symbolId))
|
|
23
|
+
return 'ranked_below_topk';
|
|
24
|
+
return 'quality_gate_rejected';
|
|
25
|
+
}
|
|
@@ -14,6 +14,11 @@
|
|
|
14
14
|
* 大仓分片:
|
|
15
15
|
* - 直接启动多个 worker 进程(同一 Redis)即可水平扩展,BullMQ 原生分布式协调
|
|
16
16
|
*/
|
|
17
|
+
// env.redisUrl (同一个 Redis) url+name('embedding') 决定了 BullMQ 的队列,生产者和消费者通过它们读写同一个队列实现通信
|
|
18
|
+
// │
|
|
19
|
+
// ├─ Queue('embedding') → LPUSH bull:embedding:wait ... ← Producer 写
|
|
20
|
+
// ├─ Worker('embedding') → BRPOPLPUSH bull:embedding:wait ← Worker 消费
|
|
21
|
+
// └─ QueueEvents('embedding')→ SUBSCRIBE bull:embedding:events ← 监听事件
|
|
17
22
|
import { Worker, QueueEvents } from 'bullmq';
|
|
18
23
|
import { Redis } from 'ioredis';
|
|
19
24
|
import { env } from '../config/env.js';
|
|
@@ -91,11 +96,14 @@ async function processEmbedJob(job, pool) {
|
|
|
91
96
|
*/
|
|
92
97
|
export async function startEmbeddingWorker(opts = {}) {
|
|
93
98
|
const { concurrency = 5, rpmLimit = 100 } = opts;
|
|
99
|
+
// worker1 负责从 Redis 拉 job。
|
|
100
|
+
// BullMQ Worker 用它执行 BRPOPLPUSH 这类阻塞命令来抢占 job、加锁、标记完成/失败。阻塞命令会占住整个连接,无法复用。
|
|
94
101
|
const connection = new Redis(env.redisUrl, {
|
|
95
102
|
maxRetriesPerRequest: null,
|
|
96
103
|
enableReadyCheck: false,
|
|
97
104
|
});
|
|
98
|
-
//
|
|
105
|
+
// worker2 负责订阅 Redis 的 Pub/Sub 事件频道。
|
|
106
|
+
// 给 QueueEvents 用。BullMQ 在 Redis 里发 Pub/Sub 事件(drained、completed、failed…),监听方需要独立的连接订阅这些事件频道。如果共用 connection,阻塞命令会让 Pub/Sub 订阅无法正常工作,所以 BullMQ 官方要求两个连接必须分开。
|
|
99
107
|
const eventsConnection = new Redis(env.redisUrl, {
|
|
100
108
|
maxRetriesPerRequest: null,
|
|
101
109
|
enableReadyCheck: false,
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@lorrylurui/code-intelligence-mcp",
|
|
3
|
-
"version": "2.0
|
|
3
|
+
"version": "2.1.0",
|
|
4
4
|
"private": false,
|
|
5
5
|
"description": "MCP server 提供仓库内可复用代码块(ts/tsx/js/jsx/css/less)的索引和查询能力,支持基于代码上下文的智能推荐。",
|
|
6
6
|
"type": "module",
|
|
@@ -9,8 +9,8 @@
|
|
|
9
9
|
"dist"
|
|
10
10
|
],
|
|
11
11
|
"bin": {
|
|
12
|
-
"code-intelligence-mcp": "
|
|
13
|
-
"code-intelligence-index": "
|
|
12
|
+
"code-intelligence-mcp": "dist/index.js",
|
|
13
|
+
"code-intelligence-index": "dist/cli/index-codebase-cli.js"
|
|
14
14
|
},
|
|
15
15
|
"scripts": {
|
|
16
16
|
"dev": "tsx watch --clear-screen=false --exclude node_modules --exclude dist src/index.ts",
|
|
@@ -21,6 +21,8 @@
|
|
|
21
21
|
"start": "node dist/index.js",
|
|
22
22
|
"index": "tsx src/cli/index-codebase-cli.ts",
|
|
23
23
|
"ci-index": "tsx src/cli/ci-index-cli.ts",
|
|
24
|
+
"eval": "tsx src/cli/eval-recommendation-cli.ts",
|
|
25
|
+
"analyze": "tsx src/cli/eval-analyze-cli.ts",
|
|
24
26
|
"worker:embedding": "tsx src/cli/embedding-worker-cli.ts",
|
|
25
27
|
"embedding:dev": "cd embedding-service && python3 -m uvicorn app:app --host 127.0.0.1 --port 8765",
|
|
26
28
|
"docker:up": "docker compose up -d",
|