@lorrylurui/code-intelligence-mcp 2.0.7 → 2.0.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +19 -0
- package/dist/cli/ci-index.js +46 -57
- package/dist/cli/eval-analyze-cli.js +308 -0
- package/dist/cli/eval-recommendation-cli.js +296 -0
- package/dist/config/queryRewrite.js +95 -0
- package/dist/config/tuning.js +4 -0
- package/dist/indexer/chunkText.js +36 -4
- package/dist/indexer/persistSymbols.js +32 -47
- package/dist/prompts/reusableCodeAdvisorPrompt.js +2 -1
- package/dist/repositories/chunkRepository.js +1 -1
- package/dist/repositories/symbolRepository.js +7 -5
- package/dist/services/embeddingQueue.js +1 -0
- package/dist/services/recommendationService.js +136 -38
- package/dist/services/reconcileIndexedSymbols.js +40 -0
- package/dist/services/reindex.js +26 -7
- package/dist/types/evalTrace.js +25 -0
- package/dist/workers/embeddingWorker.js +15 -7
- package/package.json +5 -3
- package/dist/.env +0 -24
|
@@ -0,0 +1,296 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* eval-recommendation-cli.ts — 推荐质量离线评测 CLI
|
|
3
|
+
*
|
|
4
|
+
* 用法:
|
|
5
|
+
* npx tsx src/cli/eval-recommendation-cli.ts [--query-set offline_eval/query_set.jsonl] [--limit 10] [--output offline_eval/results/]
|
|
6
|
+
*
|
|
7
|
+
* 输出:
|
|
8
|
+
* - stdout: 评测摘要(Recall@10 / Recall@50 / MRR@10 / nDCG@10)
|
|
9
|
+
* - results/<date>.jsonl: 每条 query 的详细结果 + 失败分类
|
|
10
|
+
*/
|
|
11
|
+
import * as fs from 'node:fs';
|
|
12
|
+
import * as path from 'node:path';
|
|
13
|
+
import * as readline from 'node:readline';
|
|
14
|
+
import { RecommendationService } from '../services/recommendationService.js';
|
|
15
|
+
import { SymbolRepository } from '../repositories/symbolRepository.js';
|
|
16
|
+
import { classifySymbolFailure } from '../types/evalTrace.js';
|
|
17
|
+
// ─── CLI 参数 ────────────────────────────────────────────────────────────────
|
|
18
|
+
const args = process.argv.slice(2);
|
|
19
|
+
function getArg(flag, fallback) {
|
|
20
|
+
const idx = args.indexOf(flag);
|
|
21
|
+
return idx !== -1 && args[idx + 1] ? args[idx + 1] : fallback;
|
|
22
|
+
}
|
|
23
|
+
const QUERY_SET_PATH = getArg('--query-set', 'offline_eval/query_set.jsonl');
|
|
24
|
+
const OUTPUT_DIR = getArg('--output', 'offline_eval/results');
|
|
25
|
+
const TOP_K_MAIN = Number(getArg('--limit', '10')); // Recall@K_MAIN / MRR@K / nDCG@K
|
|
26
|
+
const TOP_K_WIDE = 50; // Recall@50(宽口径)
|
|
27
|
+
// ─── 指标计算 ─────────────────────────────────────────────────────────────────
|
|
28
|
+
/**
|
|
29
|
+
* 覆盖率 Recall@K:前 K 条结果中命中的相关条目占全部相关条目的比例。
|
|
30
|
+
* 衡量「应该找到的有多少被找到了」,与排名顺序无关。
|
|
31
|
+
* 负例(expected 全为 rel=0)视为完全命中,返回 1。
|
|
32
|
+
*/
|
|
33
|
+
function recallAtK(returnedNames, expected, k) {
|
|
34
|
+
const relevant = expected.filter((e) => e.rel >= 1);
|
|
35
|
+
if (relevant.length === 0)
|
|
36
|
+
return 1;
|
|
37
|
+
const topK = returnedNames.slice(0, k);
|
|
38
|
+
const hits = relevant.filter((e) => topK.includes(e.name));
|
|
39
|
+
return hits.length / relevant.length;
|
|
40
|
+
}
|
|
41
|
+
/**
|
|
42
|
+
* 倒数排名均值 MRR@K(Mean Reciprocal Rank):第一个相关结果出现在第 r 位时得分为 1/r。
|
|
43
|
+
* 衡量「最佳结果排多靠前」;未命中则返回 0。
|
|
44
|
+
*/
|
|
45
|
+
function mrrAtK(returnedNames, expected, k) {
|
|
46
|
+
const relevantNames = new Set(expected.filter((e) => e.rel >= 1).map((e) => e.name));
|
|
47
|
+
const topK = returnedNames.slice(0, k);
|
|
48
|
+
for (let i = 0; i < topK.length; i++) {
|
|
49
|
+
if (relevantNames.has(topK[i]))
|
|
50
|
+
return 1 / (i + 1);
|
|
51
|
+
}
|
|
52
|
+
return 0;
|
|
53
|
+
}
|
|
54
|
+
/**
|
|
55
|
+
* 归一化折损累积增益 nDCG@K(Normalized Discounted Cumulative Gain):综合考虑相关度分级(rel 0/1/2)
|
|
56
|
+
* 与排名位置的加权得分,再除以理想排序下的最大得分做归一化。
|
|
57
|
+
* 越靠前、相关度越高的结果得分越高;完全理想排序时返回 1。
|
|
58
|
+
*/
|
|
59
|
+
function ndcgAtK(returnedNames, expected, k) {
|
|
60
|
+
const relMap = new Map(expected.map((e) => [e.name, e.rel]));
|
|
61
|
+
const topK = returnedNames.slice(0, k);
|
|
62
|
+
const dcg = topK.reduce((sum, name, idx) => {
|
|
63
|
+
const rel = relMap.get(name) ?? 0;
|
|
64
|
+
return sum + (Math.pow(2, rel) - 1) / Math.log2(idx + 2);
|
|
65
|
+
}, 0);
|
|
66
|
+
const idealRels = expected
|
|
67
|
+
.map((e) => e.rel)
|
|
68
|
+
.sort((a, b) => b - a)
|
|
69
|
+
.slice(0, k);
|
|
70
|
+
const idcg = idealRels.reduce((sum, rel, idx) => {
|
|
71
|
+
return sum + (Math.pow(2, rel) - 1) / Math.log2(idx + 2);
|
|
72
|
+
}, 0);
|
|
73
|
+
return idcg === 0 ? 1 : dcg / idcg;
|
|
74
|
+
}
|
|
75
|
+
// ─── 失败分类(无 ID 时按名称降级处理) ─────────────────────────────────────
|
|
76
|
+
function classifyFailuresFromTrace(expected, returnedNames, evalTrace, idByName) {
|
|
77
|
+
const relevant = expected.filter((e) => e.rel >= 1);
|
|
78
|
+
const failures = [];
|
|
79
|
+
for (const exp of relevant) {
|
|
80
|
+
if (returnedNames.includes(exp.name))
|
|
81
|
+
continue;
|
|
82
|
+
const id = idByName.get(exp.name);
|
|
83
|
+
if (evalTrace !== undefined && id !== undefined) {
|
|
84
|
+
const failType = classifySymbolFailure(id, evalTrace);
|
|
85
|
+
if (failType !== 'found') {
|
|
86
|
+
failures.push({
|
|
87
|
+
name: exp.name,
|
|
88
|
+
expectedPath: exp.path,
|
|
89
|
+
type: failType,
|
|
90
|
+
});
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
else {
|
|
94
|
+
// DB 中无此 symbol,降级为 no_semantic_recall
|
|
95
|
+
failures.push({
|
|
96
|
+
name: exp.name,
|
|
97
|
+
expectedPath: exp.path,
|
|
98
|
+
type: 'no_semantic_recall',
|
|
99
|
+
});
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
return failures;
|
|
103
|
+
}
|
|
104
|
+
// ─── ID 解析(从返回结果中建立 name→id 映射) ────────────────────────────────
|
|
105
|
+
function buildIdMapFromResult(recommended, alternatives) {
|
|
106
|
+
const map = new Map();
|
|
107
|
+
if (recommended)
|
|
108
|
+
map.set(recommended.name, recommended.id);
|
|
109
|
+
alternatives.forEach((a) => map.set(a.name, a.id));
|
|
110
|
+
return map;
|
|
111
|
+
}
|
|
112
|
+
// ─── 汇总统计 ─────────────────────────────────────────────────────────────────
|
|
113
|
+
function avg(nums) {
|
|
114
|
+
if (nums.length === 0)
|
|
115
|
+
return 0;
|
|
116
|
+
return nums.reduce((s, n) => s + n, 0) / nums.length;
|
|
117
|
+
}
|
|
118
|
+
function formatPct(n) {
|
|
119
|
+
return (n * 100).toFixed(1) + '%';
|
|
120
|
+
}
|
|
121
|
+
function printSummary(results, kMain, baseline) {
|
|
122
|
+
const positive = results.filter((r) => !r.isNegativeSample);
|
|
123
|
+
const negative = results.filter((r) => r.isNegativeSample);
|
|
124
|
+
const recallMain = avg(positive.map((r) => r.recallMain ?? 0));
|
|
125
|
+
const recall50 = avg(positive.map((r) => r.recall50 ?? 0));
|
|
126
|
+
const mrr = avg(positive.map((r) => r.mrrMain ?? 0));
|
|
127
|
+
const ndcg = avg(positive.map((r) => r.ndcgMain ?? 0));
|
|
128
|
+
const top1Acc = positive.filter((r) => r.top1Correct === true).length /
|
|
129
|
+
(positive.length || 1);
|
|
130
|
+
const fpRate = negative.filter((r) => r.falsePositive).length / (negative.length || 1);
|
|
131
|
+
const diff = (metric, val) => {
|
|
132
|
+
if (!baseline || !(metric in baseline))
|
|
133
|
+
return '';
|
|
134
|
+
const delta = val - baseline[metric];
|
|
135
|
+
return delta >= 0
|
|
136
|
+
? ` (+${formatPct(delta)})`
|
|
137
|
+
: ` (${formatPct(delta)})`;
|
|
138
|
+
};
|
|
139
|
+
console.log('\n' + '='.repeat(60));
|
|
140
|
+
console.log(`=== Eval Report ${new Date().toISOString().slice(0, 10)} ===`);
|
|
141
|
+
console.log('='.repeat(60));
|
|
142
|
+
console.log(`Queries total: ${results.length} (positive: ${positive.length}, negative: ${negative.length})`);
|
|
143
|
+
console.log('');
|
|
144
|
+
console.log(`Recall@${kMain}: ${formatPct(recallMain)}${diff('recallMain', recallMain)}`);
|
|
145
|
+
console.log(`Recall@50: ${formatPct(recall50)}${diff('recall50', recall50)}`);
|
|
146
|
+
console.log(`MRR@${kMain}: ${formatPct(mrr)}${diff('mrr', mrr)}`);
|
|
147
|
+
console.log(`nDCG@${kMain}: ${formatPct(ndcg)}${diff('ndcg', ndcg)}`);
|
|
148
|
+
console.log(`Top1 Acc: ${formatPct(top1Acc)}${diff('top1Acc', top1Acc)}`);
|
|
149
|
+
console.log(`False Pos: ${formatPct(fpRate)} (negative samples incorrectly returned results)`);
|
|
150
|
+
console.log('');
|
|
151
|
+
// ── Failure breakdown ──
|
|
152
|
+
const allFailures = positive.flatMap((r) => r.failures);
|
|
153
|
+
const failureCounts = {
|
|
154
|
+
no_semantic_recall: 0,
|
|
155
|
+
reusability_filtered: 0,
|
|
156
|
+
structure_filtered: 0,
|
|
157
|
+
ranked_below_topk: 0,
|
|
158
|
+
quality_gate_rejected: 0,
|
|
159
|
+
found: 0,
|
|
160
|
+
};
|
|
161
|
+
for (const f of allFailures)
|
|
162
|
+
failureCounts[f.type]++;
|
|
163
|
+
const totalExpected = positive.reduce((s, r) => s + r.failures.length + (r.recallMain === 1 ? 1 : 0), 0);
|
|
164
|
+
console.log('--- Failure Breakdown ---');
|
|
165
|
+
const failureActionHints = {
|
|
166
|
+
no_semantic_recall: '→ 调大 SYMBOL_TOP_K / 增加 queryVariants 数量',
|
|
167
|
+
reusability_filtered: '→ 检查 isReusableCandidate 路径规则是否误杀',
|
|
168
|
+
structure_filtered: '→ 检查 category 过滤条件',
|
|
169
|
+
ranked_below_topk: '→ 调整 RANK_WEIGHTS / LITERAL_MATCH_PRIORITY_BOOST',
|
|
170
|
+
quality_gate_rejected: '→ 调低 MIN_RECOMMENDATION_SCORE 阈值',
|
|
171
|
+
found: '',
|
|
172
|
+
};
|
|
173
|
+
for (const [type, count] of Object.entries(failureCounts)) {
|
|
174
|
+
if (type === 'found')
|
|
175
|
+
continue;
|
|
176
|
+
const pct = totalExpected > 0
|
|
177
|
+
? ((count / totalExpected) * 100).toFixed(1)
|
|
178
|
+
: '0.0';
|
|
179
|
+
const hint = failureActionHints[type];
|
|
180
|
+
console.log(` ${type.padEnd(26)} ${String(count).padStart(3)} (${pct}%) ${hint}`);
|
|
181
|
+
}
|
|
182
|
+
console.log('='.repeat(60) + '\n');
|
|
183
|
+
}
|
|
184
|
+
// ─── 主流程 ───────────────────────────────────────────────────────────────────
|
|
185
|
+
async function loadQuerySet(filePath) {
|
|
186
|
+
const cases = [];
|
|
187
|
+
const rl = readline.createInterface({
|
|
188
|
+
input: fs.createReadStream(filePath),
|
|
189
|
+
crlfDelay: Infinity,
|
|
190
|
+
});
|
|
191
|
+
for await (const line of rl) {
|
|
192
|
+
const trimmed = line.trim();
|
|
193
|
+
if (!trimmed)
|
|
194
|
+
continue;
|
|
195
|
+
cases.push(JSON.parse(trimmed));
|
|
196
|
+
}
|
|
197
|
+
return cases;
|
|
198
|
+
}
|
|
199
|
+
async function runEval() {
|
|
200
|
+
console.log(`Loading query set: ${QUERY_SET_PATH}`);
|
|
201
|
+
const cases = await loadQuerySet(QUERY_SET_PATH);
|
|
202
|
+
console.log(`Loaded ${cases.length} queries. Running eval with limit=${TOP_K_MAIN}/${TOP_K_WIDE}...\n`);
|
|
203
|
+
const repository = new SymbolRepository();
|
|
204
|
+
const service = new RecommendationService(repository);
|
|
205
|
+
const results = [];
|
|
206
|
+
for (const queryCase of cases) {
|
|
207
|
+
const isNegative = queryCase.expected.length === 0;
|
|
208
|
+
// Run with wide limit (Recall@50)
|
|
209
|
+
const wideResult = await service.recommendComponent({
|
|
210
|
+
...queryCase.input,
|
|
211
|
+
limit: TOP_K_WIDE,
|
|
212
|
+
evalMode: true,
|
|
213
|
+
});
|
|
214
|
+
const wideNames = [
|
|
215
|
+
...(wideResult.recommended ? [wideResult.recommended.name] : []),
|
|
216
|
+
...wideResult.alternatives.map((a) => a.name),
|
|
217
|
+
];
|
|
218
|
+
// Run with main limit for MRR/nDCG (or reuse wide result slice)
|
|
219
|
+
const mainNames = wideNames.slice(0, TOP_K_MAIN);
|
|
220
|
+
// Build id map from returned results
|
|
221
|
+
const allReturned = [
|
|
222
|
+
...(wideResult.recommended ? [wideResult.recommended] : []),
|
|
223
|
+
...wideResult.alternatives,
|
|
224
|
+
];
|
|
225
|
+
const idByName = buildIdMapFromResult(wideResult.recommended, wideResult.alternatives);
|
|
226
|
+
// Metrics (skip for negative samples)
|
|
227
|
+
const recallMain = isNegative
|
|
228
|
+
? null
|
|
229
|
+
: recallAtK(mainNames, queryCase.expected, TOP_K_MAIN);
|
|
230
|
+
const recall50 = isNegative
|
|
231
|
+
? null
|
|
232
|
+
: recallAtK(wideNames, queryCase.expected, TOP_K_WIDE);
|
|
233
|
+
const mrrMain = isNegative
|
|
234
|
+
? null
|
|
235
|
+
: mrrAtK(mainNames, queryCase.expected, TOP_K_MAIN);
|
|
236
|
+
const ndcgMain = isNegative
|
|
237
|
+
? null
|
|
238
|
+
: ndcgAtK(mainNames, queryCase.expected, TOP_K_MAIN);
|
|
239
|
+
const top1Correct = isNegative
|
|
240
|
+
? null
|
|
241
|
+
: queryCase.expected.some((e) => e.rel === 2 && wideResult.recommended?.name === e.name);
|
|
242
|
+
// Failure classification
|
|
243
|
+
const failures = isNegative
|
|
244
|
+
? []
|
|
245
|
+
: classifyFailuresFromTrace(queryCase.expected, wideNames, wideResult.evalTrace, idByName);
|
|
246
|
+
const falsePositive = isNegative && allReturned.length > 0;
|
|
247
|
+
const qr = {
|
|
248
|
+
queryId: queryCase.id,
|
|
249
|
+
query: queryCase.input.query,
|
|
250
|
+
tags: queryCase.tags,
|
|
251
|
+
recallMain,
|
|
252
|
+
recall50,
|
|
253
|
+
mrrMain,
|
|
254
|
+
ndcgMain,
|
|
255
|
+
top1Correct,
|
|
256
|
+
returnedNames: mainNames,
|
|
257
|
+
failures,
|
|
258
|
+
isNegativeSample: isNegative,
|
|
259
|
+
falsePositive,
|
|
260
|
+
};
|
|
261
|
+
results.push(qr);
|
|
262
|
+
// Progress
|
|
263
|
+
const status = isNegative
|
|
264
|
+
? falsePositive
|
|
265
|
+
? '✗ FP'
|
|
266
|
+
: '✓ TN'
|
|
267
|
+
: recallMain === 1
|
|
268
|
+
? `✓ R@${TOP_K_MAIN}=1.0`
|
|
269
|
+
: `✗ R@${TOP_K_MAIN}=${(recallMain ?? 0).toFixed(2)}`;
|
|
270
|
+
console.log(` [${queryCase.id}] ${queryCase.input.query.slice(0, 40).padEnd(40)} ${status}`);
|
|
271
|
+
}
|
|
272
|
+
// Print summary
|
|
273
|
+
printSummary(results, TOP_K_MAIN, null);
|
|
274
|
+
// Write JSONL report
|
|
275
|
+
if (OUTPUT_DIR) {
|
|
276
|
+
fs.mkdirSync(OUTPUT_DIR, { recursive: true });
|
|
277
|
+
const dateStr = new Date().toISOString().slice(0, 10);
|
|
278
|
+
const outPath = path.join(OUTPUT_DIR, `${dateStr}.jsonl`);
|
|
279
|
+
const lines = results.map((r) => JSON.stringify(r)).join('\n');
|
|
280
|
+
fs.writeFileSync(outPath, lines + '\n', 'utf8');
|
|
281
|
+
console.log(`Report written to: ${outPath}`);
|
|
282
|
+
}
|
|
283
|
+
// Exit with non-zero if any positive query has recall=0
|
|
284
|
+
const zeroRecall = results.filter((r) => !r.isNegativeSample && r.recallMain === 0);
|
|
285
|
+
if (zeroRecall.length > 0) {
|
|
286
|
+
console.log(`\nWARN: ${zeroRecall.length} positive queries have Recall@${TOP_K_MAIN}=0:`);
|
|
287
|
+
for (const r of zeroRecall) {
|
|
288
|
+
console.log(` [${r.queryId}] ${r.query}`);
|
|
289
|
+
}
|
|
290
|
+
process.exit(1);
|
|
291
|
+
}
|
|
292
|
+
}
|
|
293
|
+
runEval().catch((err) => {
|
|
294
|
+
console.error('Eval failed:', err);
|
|
295
|
+
process.exit(1);
|
|
296
|
+
});
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* queryRewrite.ts — query 预处理配置:噪音词清洗 + 同义词/别名扩展。
|
|
3
|
+
*
|
|
4
|
+
* 用于 buildQueryVariants,通过消除口语化干扰词和补充同义词变体,
|
|
5
|
+
* 提升语义检索的 recall(尤其是中英混用、别名查询场景)。
|
|
6
|
+
*/
|
|
7
|
+
// ─── 噪音词清洗正则(从 recommendationService 迁移) ────────────────────────
|
|
8
|
+
/**
|
|
9
|
+
* 依次对原始 query 做替换,去掉无实际语义的口语词。
|
|
10
|
+
* 注意:每个 pattern 带 /g 标志,替换后产生多余空格由调用方合并。
|
|
11
|
+
*/
|
|
12
|
+
export const NOISE_PATTERNS = [
|
|
13
|
+
/^帮我找(找)?(一个|一下)?/g,
|
|
14
|
+
/^有没有(现成的)?/g,
|
|
15
|
+
/^请推荐(一个|一下)?/g,
|
|
16
|
+
/可复用/g,
|
|
17
|
+
/现成的/g,
|
|
18
|
+
/封装好的/g,
|
|
19
|
+
/(组件|函数|hook|工具|util)(实现)?/gi,
|
|
20
|
+
];
|
|
21
|
+
// ─── 同义词/别名字典 ─────────────────────────────────────────────────────────
|
|
22
|
+
/**
|
|
23
|
+
* 每个 key 为一组同义词的首选英文词根,value 为同一概念的其他表达形式(中文、缩写、别名)。
|
|
24
|
+
*
|
|
25
|
+
* 匹配规则:任意一项(key 或 value 中的词)出现在 query 里即视为命中,
|
|
26
|
+
* 然后取组内第一个当前 query 中未出现的词作为替代词,生成同义扩展变体。
|
|
27
|
+
*
|
|
28
|
+
* 新增规则:key 使用最短、最通用的英文词根;中文词放在 value 数组最前。
|
|
29
|
+
*/
|
|
30
|
+
export const SYNONYM_MAP = {
|
|
31
|
+
// 表单输入
|
|
32
|
+
input: ['输入框', '输入', 'textfield', 'textinput'],
|
|
33
|
+
textarea: ['文本域', '多行输入', 'multiline'],
|
|
34
|
+
select: ['选择器', '下拉框', '下拉', 'dropdown'],
|
|
35
|
+
checkbox: ['复选框', '勾选'],
|
|
36
|
+
radio: ['单选框', '单选'],
|
|
37
|
+
// 弹层
|
|
38
|
+
dialog: ['弹窗', '弹框', '对话框', 'modal', 'popup'],
|
|
39
|
+
tooltip: ['提示', '气泡提示', '悬浮提示', 'popover'],
|
|
40
|
+
drawer: ['抽屉', '侧边栏', 'sidebar'],
|
|
41
|
+
// 反馈
|
|
42
|
+
loading: ['加载', '加载中', 'spinner'],
|
|
43
|
+
skeleton: ['骨架屏', '占位图', 'placeholder'],
|
|
44
|
+
notification: ['通知', '消息', '提醒', 'toast'],
|
|
45
|
+
alert: ['警告', '警示', '提示框'],
|
|
46
|
+
// 数据展示
|
|
47
|
+
table: ['表格'],
|
|
48
|
+
list: ['列表'],
|
|
49
|
+
pagination: ['分页', '翻页', 'pager'],
|
|
50
|
+
tabs: ['标签页', '选项卡', 'tab'],
|
|
51
|
+
badge: ['徽标', '角标', '标记'],
|
|
52
|
+
tag: ['标签', 'chip'],
|
|
53
|
+
// 导航
|
|
54
|
+
navigation: ['导航', 'nav'],
|
|
55
|
+
menu: ['菜单'],
|
|
56
|
+
breadcrumb: ['面包屑'],
|
|
57
|
+
// 媒体/布局
|
|
58
|
+
carousel: ['轮播', '走马灯', 'slider', 'swiper'],
|
|
59
|
+
upload: ['上传', '文件上传', 'file upload'],
|
|
60
|
+
image: ['图片', '图像', 'img'],
|
|
61
|
+
// 常用 Hook
|
|
62
|
+
debounce: ['防抖', '去抖', 'usedebounce'],
|
|
63
|
+
throttle: ['节流', 'usethrottle'],
|
|
64
|
+
// 搜索
|
|
65
|
+
search: ['搜索', '查询', 'filter'],
|
|
66
|
+
// 按钮
|
|
67
|
+
button: ['按钮', 'btn'],
|
|
68
|
+
};
|
|
69
|
+
// ─── 同义词扩展函数 ──────────────────────────────────────────────────────────
|
|
70
|
+
/**
|
|
71
|
+
* 在 query 中查找 SYNONYM_MAP 里命中的词,替换成同组内一个当前未出现的词,
|
|
72
|
+
* 生成同义扩展变体。若未命中任何同义词则返回 null。
|
|
73
|
+
*
|
|
74
|
+
* @example
|
|
75
|
+
* buildSynonymVariant('弹窗 onChange') // => 'dialog onChange'
|
|
76
|
+
* buildSynonymVariant('input onChange') // => '输入框 onChange'
|
|
77
|
+
*/
|
|
78
|
+
export function buildSynonymVariant(query) {
|
|
79
|
+
const lower = query.toLowerCase();
|
|
80
|
+
for (const [canonical, aliases] of Object.entries(SYNONYM_MAP)) {
|
|
81
|
+
const allTerms = [canonical, ...aliases];
|
|
82
|
+
const matchedTerm = allTerms.find((t) => lower.includes(t.toLowerCase()));
|
|
83
|
+
if (!matchedTerm)
|
|
84
|
+
continue;
|
|
85
|
+
const substitute = allTerms.find((t) => !lower.includes(t.toLowerCase()) && t !== matchedTerm);
|
|
86
|
+
if (!substitute)
|
|
87
|
+
continue;
|
|
88
|
+
// 大小写不敏感替换
|
|
89
|
+
const replaced = query.replace(new RegExp(matchedTerm.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'), 'gi'), substitute);
|
|
90
|
+
const trimmed = replaced.replace(/\s+/g, ' ').trim();
|
|
91
|
+
if (trimmed && trimmed !== query)
|
|
92
|
+
return trimmed;
|
|
93
|
+
}
|
|
94
|
+
return null;
|
|
95
|
+
}
|
package/dist/config/tuning.js
CHANGED
|
@@ -100,6 +100,10 @@ export const REQUIRED_FIELD_FALLBACK_MIN_SCORE = 0.4;
|
|
|
100
100
|
export const LITERAL_MATCH_PRIORITY_BOOST = 0.22;
|
|
101
101
|
/** 路径为 demo/example 风格时,对优先级分数扣减的值 */
|
|
102
102
|
export const DEMO_PATH_PRIORITY_PENALTY = 0.18;
|
|
103
|
+
/** 文件名为 index.js/ts/tsx/jsx 时对优先级分数的加成(优先推荐组件目录入口文件) */
|
|
104
|
+
export const INDEX_FILE_PRIORITY_BOOST = 0.18;
|
|
105
|
+
/** 同目录中存在 index 文件时,对其他非入口子文件的优先级扣减(避免 menu.js / panel.js 等内部实现抢占推荐位) */
|
|
106
|
+
export const SAME_DIR_INDEX_EXISTS_PENALTY = 0.25;
|
|
103
107
|
// ─── 搜索工具结果过滤 (tools/searchSymbols.ts) ───────────────────────────────
|
|
104
108
|
/** 最终返回结果所需的最低综合评分 */
|
|
105
109
|
export const SEARCH_SCORE_THRESHOLD = 0.45;
|
|
@@ -117,19 +117,51 @@ export function splitTextIntoChunks(content, options = {}) {
|
|
|
117
117
|
? sliceWithOverlap(block.text, maxChars, overlapChars)
|
|
118
118
|
: [block.text];
|
|
119
119
|
for (const part of oversizedParts) {
|
|
120
|
-
// 3.
|
|
121
|
-
|
|
120
|
+
// ── 3. 滑动窗口 + overlap ─────────────────────────────────────────
|
|
121
|
+
// 目标:把 parts 依次合并到 currentBlocks,直到"该收了"再收敛成一个 chunk。
|
|
122
|
+
// 收敛后 finalizeChunk 会把末尾 overlapChars 个字符带入下一块,减少边界信息丢失。
|
|
123
|
+
//
|
|
124
|
+
// 执行示例(targetChars=20, maxChars=30, overlapChars=5):
|
|
125
|
+
//
|
|
126
|
+
// part="Hello world"(11) currentLength=0 → 直接 push
|
|
127
|
+
// currentBlocks=["Hello world"] currentLength=11
|
|
128
|
+
//
|
|
129
|
+
// part="Foo bar baz"(11) additionLength=11+2=13 currentLength+13=24 ≤ 30,未达目标 → 直接 push
|
|
130
|
+
// currentBlocks=["Hello world","Foo bar baz"] currentLength=24
|
|
131
|
+
//
|
|
132
|
+
// part="A long sentence"(15) additionLength=15+2=17 currentLength+17=41 > 30 → wouldOverflowMax=true
|
|
133
|
+
// → finalizeChunk: chunks=["Hello world\n\nFoo bar baz"]
|
|
134
|
+
// overlap="r baz"(末5字符) currentBlocks=["r baz"] currentLength=5
|
|
135
|
+
// → push "A long sentence"
|
|
136
|
+
// currentBlocks=["r baz","A long sentence"] currentLength=5+2+15=22
|
|
137
|
+
//
|
|
138
|
+
// 最终 finalizeChunk(overlap=0): chunks 追加 "r baz\n\nA long sentence"
|
|
139
|
+
// ─────────────────────────────────────────────────────────────────
|
|
140
|
+
// SEP=2 对应 blocks.join('\n\n') 中每条边界的 '\n\n' 长度;
|
|
141
|
+
// 首个 block 无分隔符,所以 currentLength===0 时不加。
|
|
142
|
+
const SEP = 2;
|
|
143
|
+
const additionLength = currentLength === 0 ? part.length : SEP + part.length;
|
|
144
|
+
// 两种情况需要先收敛当前 chunk:
|
|
145
|
+
// 1. wouldOverflowMax:加入本 part 后超出硬上限,被动截断;
|
|
146
|
+
// 2. reachedTarget :当前已达目标大小,主动分块,保持粒度均匀。
|
|
122
147
|
const wouldOverflowMax = currentLength > 0 && currentLength + additionLength > maxChars;
|
|
123
148
|
const reachedTarget = currentLength >= targetChars;
|
|
124
|
-
// 已接近目标大小或即将超出上限时,先收敛当前 chunk,再开始下一块。
|
|
125
149
|
if (wouldOverflowMax || reachedTarget) {
|
|
150
|
+
// finalizeChunk 写入 chunks,并把末尾 overlap 文本返回作为新 currentBlocks 起点。
|
|
126
151
|
currentBlocks = finalizeChunk(chunks, currentBlocks, overlapChars);
|
|
152
|
+
// overlap 文本长度不固定,必须重算(不能增量推导)。
|
|
127
153
|
currentLength = currentBlocks.join('\n\n').length;
|
|
128
154
|
}
|
|
129
155
|
currentBlocks.push(part);
|
|
130
|
-
|
|
156
|
+
// flush 后 currentBlocks 可能含 overlap(length ≥ 1),也可能为空(length === 0);
|
|
157
|
+
// 增量计算避免每次重新 join 整个数组。
|
|
158
|
+
currentLength =
|
|
159
|
+
currentLength === 0
|
|
160
|
+
? part.length
|
|
161
|
+
: currentLength + SEP + part.length;
|
|
131
162
|
}
|
|
132
163
|
}
|
|
164
|
+
// 收尾兜底,确保剩余内容不丢失
|
|
133
165
|
finalizeChunk(chunks, currentBlocks, 0);
|
|
134
166
|
return chunks;
|
|
135
167
|
}
|
|
@@ -1,29 +1,22 @@
|
|
|
1
1
|
import { env } from '../config/env.js';
|
|
2
|
-
import { getAllTableSQLs } from '../db/schema.js';
|
|
3
2
|
import { SYMBOL_STATUS } from '../config/symbolStatus.js';
|
|
4
3
|
/**
|
|
5
4
|
* 依赖表上 `(path, name)` 唯一键:新行插入,已存在则更新类型/描述/内容与 meta;**不**修改 `usage_count`。
|
|
5
|
+
* 事务与连接生命周期由调用方管理。
|
|
6
6
|
* @param rows 来自 `indexProject`;空数组时立即返回,不开启事务。
|
|
7
7
|
* @param embeddings 与 `rows` 等长;某项为 `null` 表示本行不更新已有 `embedding`(新行则写入 NULL)。
|
|
8
8
|
* - 有值 → status 置为 online(2)
|
|
9
9
|
* - null → 新行写 pending(1),已有行保持原 status
|
|
10
10
|
*/
|
|
11
|
-
export async function upsertSymbols(
|
|
11
|
+
export async function upsertSymbols(client, rows, embeddings) {
|
|
12
12
|
if (rows.length === 0)
|
|
13
13
|
return;
|
|
14
14
|
if (embeddings && embeddings.length !== rows.length) {
|
|
15
15
|
throw new Error('upsertSymbols: embeddings length must match rows');
|
|
16
16
|
}
|
|
17
17
|
const actor = process.env.GITHUB_USERNAME?.trim() || 'system';
|
|
18
|
-
const
|
|
19
|
-
|
|
20
|
-
// 确保 extension + 表 + 基础索引存在
|
|
21
|
-
for (const sql of getAllTableSQLs()) {
|
|
22
|
-
await client.query(sql);
|
|
23
|
-
}
|
|
24
|
-
await client.query('BEGIN');
|
|
25
|
-
const t = env.symbolsTable;
|
|
26
|
-
const sql = `
|
|
18
|
+
const t = env.symbolsTable;
|
|
19
|
+
const sql = `
|
|
27
20
|
INSERT INTO ${t}
|
|
28
21
|
(name, type, category, path, description, content, meta,
|
|
29
22
|
insert_user, updated_user, embedding, semantic_hash, file_hash, status)
|
|
@@ -36,48 +29,40 @@ export async function upsertSymbols(pool, rows, embeddings) {
|
|
|
36
29
|
meta = EXCLUDED.meta,
|
|
37
30
|
updated_user = EXCLUDED.updated_user,
|
|
38
31
|
embedding = CASE
|
|
39
|
-
WHEN EXCLUDED.embedding IS NOT NULL THEN EXCLUDED.embedding
|
|
40
|
-
WHEN EXCLUDED.semantic_hash != ${t}.semantic_hash THEN NULL
|
|
41
|
-
ELSE ${t}.embedding
|
|
32
|
+
WHEN EXCLUDED.embedding IS NOT NULL THEN EXCLUDED.embedding -- 本次带了新向量,直接使用
|
|
33
|
+
WHEN EXCLUDED.semantic_hash != ${t}.semantic_hash THEN NULL -- 结构变了,旧向量作废,等重算
|
|
34
|
+
ELSE ${t}.embedding -- 结构未变,复用旧向量
|
|
42
35
|
END,
|
|
43
36
|
semantic_hash = EXCLUDED.semantic_hash,
|
|
44
37
|
file_hash = EXCLUDED.file_hash,
|
|
45
38
|
status = CASE
|
|
46
|
-
WHEN EXCLUDED.embedding IS NOT NULL THEN ${SYMBOL_STATUS.ONLINE}
|
|
47
|
-
WHEN EXCLUDED.semantic_hash != ${t}.semantic_hash THEN ${SYMBOL_STATUS.PENDING}
|
|
48
|
-
|
|
39
|
+
WHEN EXCLUDED.embedding IS NOT NULL THEN ${SYMBOL_STATUS.ONLINE} -- 本次带了新向量 → 直接 online
|
|
40
|
+
WHEN EXCLUDED.semantic_hash != ${t}.semantic_hash THEN ${SYMBOL_STATUS.PENDING} -- 结构变了,需重新 embedding → pending
|
|
41
|
+
WHEN ${t}.embedding IS NOT NULL THEN ${SYMBOL_STATUS.ONLINE} -- 结构未变且已有向量(含 offline 恢复)→ online
|
|
42
|
+
ELSE ${SYMBOL_STATUS.PENDING} -- 结构未变但无向量(首次 or 之前失败)→ pending
|
|
49
43
|
END,
|
|
50
44
|
updated_at = NOW()
|
|
51
45
|
`;
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
}
|
|
74
|
-
await client.query('COMMIT');
|
|
75
|
-
}
|
|
76
|
-
catch (e) {
|
|
77
|
-
await client.query('ROLLBACK');
|
|
78
|
-
throw e;
|
|
79
|
-
}
|
|
80
|
-
finally {
|
|
81
|
-
client.release();
|
|
46
|
+
for (let i = 0; i < rows.length; i++) {
|
|
47
|
+
const r = rows[i];
|
|
48
|
+
const emb = embeddings?.[i];
|
|
49
|
+
// pgvector 接受 "[x1,x2,...]" 格式字符串
|
|
50
|
+
const vecStr = emb != null ? `[${emb.join(',')}]` : null;
|
|
51
|
+
const statusVal = vecStr !== null ? SYMBOL_STATUS.ONLINE : SYMBOL_STATUS.PENDING;
|
|
52
|
+
await client.query(sql, [
|
|
53
|
+
r.name,
|
|
54
|
+
r.type,
|
|
55
|
+
r.category,
|
|
56
|
+
r.path,
|
|
57
|
+
r.description,
|
|
58
|
+
r.content,
|
|
59
|
+
JSON.stringify(r.meta),
|
|
60
|
+
actor,
|
|
61
|
+
actor,
|
|
62
|
+
vecStr, // $10 → cast as vector, null 时写 NULL
|
|
63
|
+
r.semantic_hash,
|
|
64
|
+
r.file_hash,
|
|
65
|
+
statusVal,
|
|
66
|
+
]);
|
|
82
67
|
}
|
|
83
68
|
}
|
|
@@ -65,7 +65,7 @@ const REUSABLE_CODE_ADVISOR_MARKDOWN = `# 可复用代码推荐
|
|
|
65
65
|
> 输出上述模板后**等待用户在聊天框输入回复**,识别规则:
|
|
66
66
|
> - 用户输入 **"1"、"采纳"、"采纳推荐"、"ok"、"好的"** 或类似确认词 → 从上方输出文本中读取 \`symbolId:<id>\` 那一行的值,立即调用 \`inc_usage\` 工具传入该 id,调用成功后回复"✓ 已记录使用,可直接集成"
|
|
67
67
|
> - 用户输入 **"2"、"取消"、"不用了"** 或类似否定词 → 回复"好的,已取消",停止
|
|
68
|
-
> -
|
|
68
|
+
> - 用户输入其他内容(如追问细节、props、最小接入方式)→ 可以继续补充说明,但回答结尾**必须再次原样展示**"是否采纳"的两个选项,不得省略
|
|
69
69
|
|
|
70
70
|
无结果时:
|
|
71
71
|
|
|
@@ -85,6 +85,7 @@ const REUSABLE_CODE_ADVISOR_MARKDOWN = `# 可复用代码推荐
|
|
|
85
85
|
> 输出上述模板后**等待用户在聊天框输入回复**,识别规则:
|
|
86
86
|
> - 用户输入 **"1"、"新建"、"帮我创建"** 或类似确认词 → 进入新建流程,引导用户确认最小接口设计
|
|
87
87
|
> - 用户输入 **"2"、"取消"、"不用了"** → 回复"好的,已取消",停止
|
|
88
|
+
> - 用户输入其他内容(如追问为何没找到、想先看候选)→ 可以继续解释,但回答结尾**必须再次原样展示**"是否采纳"的两个选项,不得省略
|
|
88
89
|
`;
|
|
89
90
|
export function registerReusableCodeAdvisorPrompt(server) {
|
|
90
91
|
server.prompt('reusable-code-advisor', REUSABLE_CODE_ADVISOR_DESCRIPTION, {
|
|
@@ -146,7 +146,7 @@ export class SymbolRepository {
|
|
|
146
146
|
path ILIKE $1 OR
|
|
147
147
|
meta::text ILIKE $1
|
|
148
148
|
)
|
|
149
|
-
AND status = $2
|
|
149
|
+
AND status = $2::smallint
|
|
150
150
|
`;
|
|
151
151
|
if (tokens.length) {
|
|
152
152
|
const tokenClauses = tokens.map((token) => {
|
|
@@ -165,7 +165,7 @@ export class SymbolRepository {
|
|
|
165
165
|
meta::text ILIKE $1 OR
|
|
166
166
|
(${tokenClauses.join(' OR ')})
|
|
167
167
|
)
|
|
168
|
-
AND status = $2
|
|
168
|
+
AND status = $2::smallint
|
|
169
169
|
`;
|
|
170
170
|
}
|
|
171
171
|
if (type) {
|
|
@@ -212,7 +212,7 @@ export class SymbolRepository {
|
|
|
212
212
|
1 - (embedding <=> $1::vector) AS similarity
|
|
213
213
|
FROM ${env.symbolsTable}
|
|
214
214
|
WHERE embedding IS NOT NULL
|
|
215
|
-
|
|
215
|
+
AND status = $2::smallint
|
|
216
216
|
`;
|
|
217
217
|
if (opts?.type) {
|
|
218
218
|
params.push(opts.type);
|
|
@@ -248,8 +248,9 @@ export class SymbolRepository {
|
|
|
248
248
|
SELECT id, name, type, category, path, description, content, meta::text AS meta, usage_count, created_at
|
|
249
249
|
FROM ${env.symbolsTable}
|
|
250
250
|
WHERE name = $1
|
|
251
|
+
AND status = $2::smallint
|
|
251
252
|
LIMIT 1
|
|
252
|
-
`, [name]);
|
|
253
|
+
`, [name, SEARCHABLE_STATUS]);
|
|
253
254
|
console.error('[code-intelligence-mcp] repository.getByName.db table=%s rows=%s', env.symbolsTable, String(rows.length));
|
|
254
255
|
if (rows.length === 0) {
|
|
255
256
|
return null;
|
|
@@ -310,8 +311,9 @@ export class SymbolRepository {
|
|
|
310
311
|
let sql = `
|
|
311
312
|
SELECT id, name, type, category, path, description, content, meta::text AS meta, usage_count, created_at
|
|
312
313
|
FROM ${env.symbolsTable}
|
|
313
|
-
|
|
314
|
+
WHERE status = $1::smallint
|
|
314
315
|
`;
|
|
316
|
+
params.push(SEARCHABLE_STATUS);
|
|
315
317
|
if (type) {
|
|
316
318
|
params.push(type);
|
|
317
319
|
sql += ` AND type = $${params.length}`;
|