@lorrylurui/code-intelligence-mcp 2.0.7 → 2.0.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -45,3 +45,22 @@
45
45
 
46
46
  MYSQL\*SYMBOLS_TABLE=frontend_collections_symbols
47
47
  INDEX_GLOB=xxx/\*\*/\_.{js,jsx,ts,tsx}
48
+
49
+ ## 4) 离线测评
50
+
51
+ ```javascript
52
+ npx tsx src/cli/eval-recommendation-cli.ts
53
+ # 或指定 limit
54
+ npx tsx src/cli/eval-recommendation-cli.ts --limit 10
55
+ ```
56
+
57
+ ## 5)分析离线测评结果
58
+
59
+ ```javascript
60
+ npm run analyze # 自动读最新结果文件
61
+ npm run analyze -- offline_eval/results/2026-05-27.jsonl # 指定文件
62
+ npm run analyze -- --baseline offline_eval/results/2026-05-26.jsonl # 与 baseline 对比 delta
63
+
64
+ ```
65
+
66
+ 或者直接引用eval-analysis.prompt.md 对最新结果分析
@@ -2,75 +2,64 @@
2
2
  import { env } from '../config/env.js';
3
3
  import { getPool } from '../db/postgres.js';
4
4
  import { indexProject } from '../indexer/indexProject.js';
5
- import { DEFAULT_STATUS_ON_UPSERT, SYMBOL_STATUS, } from '../config/symbolStatus.js';
5
+ import { upsertSymbols } from '../indexer/persistSymbols.js';
6
6
  import { enqueueEmbeddingBatch, closeEmbeddingQueue, } from '../services/embeddingQueue.js';
7
+ import { markRemovedSymbolsOffline } from '../services/reconcileIndexedSymbols.js';
7
8
  export async function runIncrementalIndex(opts) {
8
9
  const { projectRoot, changedFiles, deletedFiles, renamedFiles = [] } = opts;
9
10
  const pool = getPool();
10
11
  const tableName = env.symbolsTable;
11
- // 1. 删除文件:标记 offline
12
- for (const file of deletedFiles) {
13
- await pool.query(`UPDATE ${tableName} SET status = $1 WHERE path = $2`, [SYMBOL_STATUS.OFFLINE, file]);
14
- console.error(`[ci-index] marked offline: ${file}`);
15
- }
16
- // 2. 重命名文件:更新path
17
- for (const { from, to } of renamedFiles) {
18
- await pool.query(`UPDATE ${tableName} SET path = $1 WHERE path = $2`, [
19
- to,
20
- from,
21
- ]);
22
- console.error(`[ci-index] renamed: ${from} -> ${to}`);
23
- }
24
- // 3. 变更/新增文件:重新索引并标记 pending
25
- if (changedFiles.length > 0) {
26
- const rows = await indexProject({
12
+ const rows = changedFiles.length > 0
13
+ ? await indexProject({
27
14
  projectRoot,
28
15
  globPatterns: changedFiles,
29
- });
30
- for (const row of rows) {
31
- // 写入结构化数据
32
- // status 逻辑:新行写 pending;已有行仅在 semantic_hash 发生变化时才重置为 pending,
33
- // hash 未变说明语义未变,保留原 status(online → 缓存命中,不重复 embedding)
34
- await pool.query(`INSERT INTO ${tableName}
35
- (name, type, category, path, description, content, meta,
36
- file_hash, semantic_hash, status,
37
- usage_count, created_at, updated_at)
38
- VALUES ($1, $2, $3, $4, $5, $6, $7::jsonb, $8, $9, $10, 0, NOW(), NOW())
39
- ON CONFLICT (path, name) DO UPDATE SET
40
- type = EXCLUDED.type,
41
- category = EXCLUDED.category,
42
- description = EXCLUDED.description,
43
- content = EXCLUDED.content,
44
- meta = EXCLUDED.meta,
45
- file_hash = EXCLUDED.file_hash,
46
- semantic_hash = EXCLUDED.semantic_hash,
47
- status = CASE WHEN ${tableName}.semantic_hash = EXCLUDED.semantic_hash
48
- THEN ${tableName}.status
49
- ELSE EXCLUDED.status END,
50
- updated_at = NOW()`, [
51
- row.name,
52
- row.type,
53
- row.category ?? null,
54
- row.path,
55
- row.description ?? null,
56
- row.content ?? null,
57
- JSON.stringify(row.meta),
58
- row.file_hash,
59
- row.semantic_hash,
60
- DEFAULT_STATUS_ON_UPSERT,
61
- ]);
62
- console.error(`[ci-index] upserted: ${row.path}:${row.name}`);
16
+ })
17
+ : [];
18
+ const nullPayload = rows.map(() => null);
19
+ const hashes = [
20
+ ...new Set(rows.map((r) => r.semantic_hash).filter(Boolean)),
21
+ ];
22
+ const client = await pool.connect();
23
+ try {
24
+ await client.query('BEGIN');
25
+ // 1. 删除文件:复用 reconcile 的整文件 offline 语义
26
+ if (deletedFiles.length > 0) {
27
+ await markRemovedSymbolsOffline(client, deletedFiles, []);
28
+ for (const file of deletedFiles) {
29
+ console.error(`[ci-index] marked offline: ${file}`);
30
+ }
31
+ }
32
+ // 2. 重命名文件:更新path
33
+ for (const { from, to } of renamedFiles) {
34
+ await client.query(`UPDATE ${tableName} SET path = $1 WHERE path = $2`, [to, from]);
35
+ console.error(`[ci-index] renamed: ${from} -> ${to}`);
36
+ }
37
+ // 3. 变更/新增文件:重新索引并标记 pending
38
+ if (rows.length > 0) {
39
+ await upsertSymbols(client, rows, nullPayload);
40
+ for (const row of rows) {
41
+ console.error(`[ci-index] upserted: ${row.path}:${row.name}`);
42
+ }
43
+ await markRemovedSymbolsOffline(client, changedFiles, rows);
63
44
  }
64
- // 批量入队:jobId = semanticHash,相同 hash 自动去重,1000 个符号可能只产生 N 个唯一 job
65
- const hashes = [
66
- ...new Set(rows.map((r) => r.semantic_hash).filter(Boolean)),
67
- ];
45
+ await client.query('COMMIT');
46
+ }
47
+ catch (error) {
48
+ await client.query('ROLLBACK');
49
+ throw error;
50
+ }
51
+ finally {
52
+ client.release();
53
+ }
54
+ try {
68
55
  if (hashes.length > 0) {
69
56
  await enqueueEmbeddingBatch(hashes, env.symbolsTable);
70
57
  console.error(`[ci-index] enqueued ${hashes.length} unique semantic hashes for embedding`);
71
58
  }
59
+ await closeEmbeddingQueue();
60
+ }
61
+ finally {
62
+ await pool.end();
72
63
  }
73
- await closeEmbeddingQueue();
74
- await pool.end();
75
64
  console.error(`[ci-index] processed ${deletedFiles.length} deletions, ${renamedFiles.length} renames, ${changedFiles.length} changes`);
76
65
  }
@@ -0,0 +1,308 @@
1
+ /**
2
+ * eval-analyze-cli.ts — 离线评测结果分析工具
3
+ *
4
+ * 用法:
5
+ * npm run analyze # 自动读取 offline_eval/results/ 最新文件
6
+ * npm run analyze -- offline_eval/results/2026-05-27.jsonl
7
+ * npm run analyze -- --dir offline_eval/results --baseline offline_eval/results/2026-05-26.jsonl
8
+ *
9
+ * 输出:
10
+ * - 关键指标汇总(含与 baseline 对比 delta)
11
+ * - 按语言/符号类型分组 Recall@10
12
+ * - 失败归因分布
13
+ * - 主要发现(自动检测中英文差距、类型推断问题、误触等)
14
+ * - 建议优先级列表
15
+ */
16
+ import * as fs from 'node:fs';
17
+ import * as path from 'node:path';
18
+ import * as readline from 'node:readline';
19
+ // ─── CLI 参数 ────────────────────────────────────────────────────────────────
20
+ const args = process.argv.slice(2);
21
+ function getArg(flag, fallback) {
22
+ const idx = args.indexOf(flag);
23
+ return idx !== -1 && args[idx + 1] ? args[idx + 1] : fallback;
24
+ }
25
+ const RESULTS_DIR = getArg('--dir', 'offline_eval/results');
26
+ const BASELINE_PATH = getArg('--baseline', '');
27
+ // ─── 分析阈值(可按实际结果微调) ────────────────────────────────────────────
28
+ const THRESHOLDS = {
29
+ /** 中英文 Recall@10 差距超过此值时触发"中文召回偏弱"发现 */
30
+ ZH_EN_RECALL_GAP: 0.1,
31
+ /** 函数类 Recall@10 低于此值时触发"函数类型推断"发现 */
32
+ FUNC_RECALL_LOW: 0.5,
33
+ /** no_semantic_recall 占比超过此值时触发"归因主导"发现 */
34
+ NO_SEMANTIC_DOMINANCE: 0.5,
35
+ /** no_semantic_recall 占比超过此值时输出归因修正建议 */
36
+ NO_SEMANTIC_REC_TRIGGER: 0.3,
37
+ /** quality_gate_rejected 条数超过此值时输出质量门控建议 */
38
+ QG_COUNT_MIN: 2,
39
+ /** ranked_below_topk 条数超过此值时输出排名调整建议 */
40
+ RANKED_COUNT_MIN: 1,
41
+ /** 终端横向进度条宽度(字符数) */
42
+ BAR_WIDTH: 20,
43
+ /** 中文零召回示例最多展示条数 */
44
+ ZH_ZERO_EXAMPLE_LIMIT: 4,
45
+ /** 误触示例最多展示条数 */
46
+ FP_EXAMPLE_LIMIT: 3,
47
+ };
48
+ function findLatestResultsFile() {
49
+ // 支持直接传路径(不带 flag)
50
+ const explicit = args.find((a) => a.endsWith('.jsonl') && !a.startsWith('--'));
51
+ if (explicit)
52
+ return explicit;
53
+ if (!fs.existsSync(RESULTS_DIR)) {
54
+ throw new Error(`Results directory not found: ${RESULTS_DIR}`);
55
+ }
56
+ const files = fs
57
+ .readdirSync(RESULTS_DIR)
58
+ .filter((f) => f.endsWith('.jsonl'))
59
+ .sort()
60
+ .reverse();
61
+ if (files.length === 0) {
62
+ throw new Error(`No .jsonl result files found in ${RESULTS_DIR}`);
63
+ }
64
+ return path.join(RESULTS_DIR, files[0]);
65
+ }
66
+ // ─── 文件加载 ─────────────────────────────────────────────────────────────────
67
+ async function loadResults(filePath) {
68
+ const results = [];
69
+ const rl = readline.createInterface({
70
+ input: fs.createReadStream(filePath),
71
+ crlfDelay: Infinity,
72
+ });
73
+ for await (const line of rl) {
74
+ const trimmed = line.trim();
75
+ if (trimmed)
76
+ results.push(JSON.parse(trimmed));
77
+ }
78
+ return results;
79
+ }
80
+ // ─── 工具函数 ─────────────────────────────────────────────────────────────────
81
+ function avg(nums) {
82
+ if (nums.length === 0)
83
+ return 0;
84
+ return nums.reduce((s, n) => s + n, 0) / nums.length;
85
+ }
86
+ function pct(n) {
87
+ return (n * 100).toFixed(1) + '%';
88
+ }
89
+ function delta(curr, base) {
90
+ if (base === undefined)
91
+ return '';
92
+ const d = curr - base;
93
+ const sign = d >= 0 ? '+' : '';
94
+ return ` (${sign}${pct(d)})`;
95
+ }
96
+ function recallByTag(results, tag) {
97
+ const tagged = results.filter((r) => !r.isNegativeSample && r.tags.includes(tag));
98
+ return {
99
+ recall: avg(tagged.map((r) => r.recallMain ?? 0)),
100
+ count: tagged.length,
101
+ };
102
+ }
103
+ function computeMetrics(positive, negative) {
104
+ return {
105
+ recallMain: avg(positive.map((r) => r.recallMain ?? 0)),
106
+ recall50: avg(positive.map((r) => r.recall50 ?? 0)),
107
+ mrr: avg(positive.map((r) => r.mrrMain ?? 0)),
108
+ ndcg: avg(positive.map((r) => r.ndcgMain ?? 0)),
109
+ top1Acc: positive.filter((r) => r.top1Correct === true).length /
110
+ (positive.length || 1),
111
+ fpRate: negative.filter((r) => r.falsePositive).length /
112
+ (negative.length || 1),
113
+ };
114
+ }
115
+ // ─── 主分析逻辑 ───────────────────────────────────────────────────────────────
116
+ async function analyze() {
117
+ const filePath = findLatestResultsFile();
118
+ const fileName = path.basename(filePath);
119
+ const results = await loadResults(filePath);
120
+ const positive = results.filter((r) => !r.isNegativeSample);
121
+ const negative = results.filter((r) => r.isNegativeSample);
122
+ const metrics = computeMetrics(positive, negative);
123
+ // Baseline(用于 delta 对比)
124
+ let baseMetrics;
125
+ if (BASELINE_PATH && fs.existsSync(BASELINE_PATH)) {
126
+ const baseResults = await loadResults(BASELINE_PATH);
127
+ const basePos = baseResults.filter((r) => !r.isNegativeSample);
128
+ const baseNeg = baseResults.filter((r) => r.isNegativeSample);
129
+ baseMetrics = computeMetrics(basePos, baseNeg);
130
+ }
131
+ // 失败归因统计
132
+ const allFailures = positive.flatMap((r) => r.failures);
133
+ const failureCounts = {};
134
+ for (const f of allFailures) {
135
+ failureCounts[f.type] = (failureCounts[f.type] ?? 0) + 1;
136
+ }
137
+ const totalFailures = allFailures.length;
138
+ // 分组 Recall(按语言 + 类型标签)
139
+ const tagGroups = [
140
+ 'en',
141
+ 'zh',
142
+ 'zh-en',
143
+ 'component',
144
+ 'hook',
145
+ 'function',
146
+ 'util',
147
+ 'form',
148
+ ];
149
+ const tagRecalls = new Map();
150
+ for (const tag of tagGroups) {
151
+ const stat = recallByTag(positive, tag);
152
+ if (stat.count > 0)
153
+ tagRecalls.set(tag, stat);
154
+ }
155
+ // 零召回 query
156
+ const zeroRecall = positive.filter((r) => r.recallMain === 0);
157
+ // 误触(false positive)示例
158
+ const fpExamples = negative.filter((r) => r.falsePositive);
159
+ // ─── 输出报告 ──────────────────────────────────────────────────────────────
160
+ const sep = '='.repeat(60);
161
+ const sub = '─'.repeat(60);
162
+ console.log('\n' + sep);
163
+ console.log(`数据来源:${fileName}`);
164
+ console.log(sep);
165
+ // ── 关键指标 ──
166
+ console.log('\n关键指标\n');
167
+ console.log(` Recall@10: ${pct(metrics.recallMain).padStart(7)}${delta(metrics.recallMain, baseMetrics?.recallMain)}`);
168
+ console.log(` Recall@50: ${pct(metrics.recall50).padStart(7)}${delta(metrics.recall50, baseMetrics?.recall50)}`);
169
+ console.log(` MRR@10: ${pct(metrics.mrr).padStart(7)}${delta(metrics.mrr, baseMetrics?.mrr)}`);
170
+ console.log(` nDCG@10: ${pct(metrics.ndcg).padStart(7)}${delta(metrics.ndcg, baseMetrics?.ndcg)}`);
171
+ console.log(` Top1 Acc: ${pct(metrics.top1Acc).padStart(7)}${delta(metrics.top1Acc, baseMetrics?.top1Acc)}`);
172
+ console.log(` False Positive: ${pct(metrics.fpRate).padStart(7)}${delta(metrics.fpRate, baseMetrics?.fpRate)}`);
173
+ console.log(`\n 总 query 数:${results.length}(正例 ${positive.length},负例 ${negative.length})`);
174
+ // ── 分组 Recall ──
175
+ console.log('\n' + sub);
176
+ console.log('按语言/符号类型 Recall@10\n');
177
+ for (const [tag, stat] of tagRecalls) {
178
+ const bar = '█'
179
+ .repeat(Math.round(stat.recall * THRESHOLDS.BAR_WIDTH))
180
+ .padEnd(THRESHOLDS.BAR_WIDTH);
181
+ console.log(` ${tag.padEnd(12)} ${bar} ${pct(stat.recall).padStart(7)} (${stat.count} queries)`);
182
+ }
183
+ // ── 失败归因 ──
184
+ console.log('\n' + sub);
185
+ console.log('失败归因分布\n');
186
+ const failureActionHints = {
187
+ no_semantic_recall: '→ 扩展 queryVariants / 中文同义词映射 / 调大 SYMBOL_TOP_K',
188
+ quality_gate_rejected: '→ 按 type 降低质量门控阈值',
189
+ ranked_below_topk: '→ 调整 RANK_WEIGHTS / LITERAL_MATCH_PRIORITY_BOOST',
190
+ reusability_filtered: '→ 检查 isReusableCandidate 路径规则',
191
+ structure_filtered: '→ 检查 category 过滤条件',
192
+ };
193
+ const sortedFailures = Object.entries(failureCounts).sort((a, b) => b[1] - a[1]);
194
+ for (const [type, count] of sortedFailures) {
195
+ const p = totalFailures > 0
196
+ ? ((count / totalFailures) * 100).toFixed(1)
197
+ : '0.0';
198
+ const hint = failureActionHints[type] ?? '';
199
+ console.log(` ${type.padEnd(28)} ${String(count).padStart(3)} (${p}%) ${hint}`);
200
+ }
201
+ // ── 主要发现 ──
202
+ console.log('\n' + sub);
203
+ console.log('主要发现\n');
204
+ const findings = [];
205
+ // 发现1:中英文召回差距
206
+ const zhStat = tagRecalls.get('zh');
207
+ const enStat = tagRecalls.get('en');
208
+ if (zhStat &&
209
+ enStat &&
210
+ enStat.recall - zhStat.recall > THRESHOLDS.ZH_EN_RECALL_GAP) {
211
+ const zhZero = positive
212
+ .filter((r) => r.tags.includes('zh') && r.recallMain === 0)
213
+ .map((r) => `"${r.query}"`)
214
+ .slice(0, THRESHOLDS.ZH_ZERO_EXAMPLE_LIMIT);
215
+ findings.push(`中文 query 召回明显弱于英文\n` +
216
+ ` 中文 Recall@10 = ${pct(zhStat.recall)},英文 = ${pct(enStat.recall)},差距 ${pct(enStat.recall - zhStat.recall)}\n` +
217
+ ` 零召回中文 query 示例:${zhZero.join('、')}`);
218
+ }
219
+ // 发现2:函数类符号类型推断
220
+ const funcStat = tagRecalls.get('function');
221
+ if (funcStat && funcStat.recall < THRESHOLDS.FUNC_RECALL_LOW) {
222
+ findings.push(`函数类 query 召回偏低(Recall@10 = ${pct(funcStat.recall)})\n` +
223
+ ` 可能原因:类型推断关键词缺少 function/formatter/validate,导致回退到 component 候选池`);
224
+ }
225
+ // 发现3:no_semantic_recall 主导
226
+ const noSemanticCount = failureCounts['no_semantic_recall'] ?? 0;
227
+ if (noSemanticCount / (totalFailures || 1) >
228
+ THRESHOLDS.NO_SEMANTIC_DOMINANCE) {
229
+ findings.push(`失败以 no_semantic_recall 为主(${noSemanticCount}/${totalFailures} 条,${((noSemanticCount / (totalFailures || 1)) * 100).toFixed(0)}%)\n` +
230
+ ` 注意:该分类会掩盖 quality_gate_rejected 等更具体原因(当 DB 中无对应 id 时降级记录)`);
231
+ }
232
+ // 发现4:误触
233
+ if (fpExamples.length > 0) {
234
+ findings.push(`负例误触 ${fpExamples.length} 条(False Positive = ${pct(metrics.fpRate)})\n` +
235
+ ` 示例:${fpExamples
236
+ .slice(0, THRESHOLDS.FP_EXAMPLE_LIMIT)
237
+ .map((r) => `"${r.query}"`)
238
+ .join('、')}`);
239
+ }
240
+ // 发现5:零召回 query 总数
241
+ if (zeroRecall.length > 0) {
242
+ const byTag = (tag) => zeroRecall.filter((r) => r.tags.includes(tag)).length;
243
+ findings.push(`${zeroRecall.length} 条正例 Recall@10 = 0\n` +
244
+ ` 其中 zh:${byTag('zh')},en:${byTag('en')},component:${byTag('component')},hook:${byTag('hook')},function:${byTag('function')}`);
245
+ }
246
+ findings.forEach((f, i) => console.log(`${i + 1}. ${f}\n`));
247
+ // ── 建议优先级 ──
248
+ console.log(sub);
249
+ console.log('建议优先级(按预期收益排序)\n');
250
+ const recs = [];
251
+ if (zhStat &&
252
+ enStat &&
253
+ enStat.recall - zhStat.recall > THRESHOLDS.ZH_EN_RECALL_GAP) {
254
+ recs.push({
255
+ priority: '高',
256
+ action: '强化中文同义词扩展(导航栏→navigation bar、日期格式化→format date 等)',
257
+ reason: `中英召回差距 ${pct(enStat.recall - zhStat.recall)},影响 ${zhStat.count} 条 query`,
258
+ });
259
+ }
260
+ if (funcStat && funcStat.recall < THRESHOLDS.FUNC_RECALL_LOW) {
261
+ recs.push({
262
+ priority: '高',
263
+ action: '补类型推断关键词(category: function、query: formatter/validate/format)',
264
+ reason: `函数类 Recall@10 = ${pct(funcStat.recall)},类型误判导致错误候选池`,
265
+ });
266
+ }
267
+ if (noSemanticCount / (totalFailures || 1) >
268
+ THRESHOLDS.NO_SEMANTIC_REC_TRIGGER) {
269
+ recs.push({
270
+ priority: '中高',
271
+ action: '修正 eval 失败归因逻辑(name+path 查 DB id,避免 no_semantic_recall 兜底掩盖)',
272
+ reason: `${noSemanticCount} 条记为 no_semantic_recall,诊断精度不足`,
273
+ });
274
+ }
275
+ const qgCount = failureCounts['quality_gate_rejected'] ?? 0;
276
+ if (qgCount > THRESHOLDS.QG_COUNT_MIN) {
277
+ recs.push({
278
+ priority: '中高',
279
+ action: '按 symbol type 分阈值降低质量门控(函数/Hook 可比组件更宽松)',
280
+ reason: `${qgCount} 条被 quality gate 拦截`,
281
+ });
282
+ }
283
+ const rankedCount = failureCounts['ranked_below_topk'] ?? 0;
284
+ if (rankedCount > THRESHOLDS.RANKED_COUNT_MIN) {
285
+ recs.push({
286
+ priority: '中',
287
+ action: '加强 index-priority tie-break(同目录 index/menu/panel 命中时强制 index 优先)',
288
+ reason: `${rankedCount} 条有正确候选但排名未进 Top-K`,
289
+ });
290
+ }
291
+ if (recs.length === 0) {
292
+ recs.push({
293
+ priority: '中',
294
+ action: '持续扩充 query_set.jsonl 覆盖更多边界场景',
295
+ reason: '当前指标已较好,可增加测试覆盖度',
296
+ });
297
+ }
298
+ recs.forEach((r, i) => {
299
+ console.log(`${i + 1}. [${r.priority}] ${r.action}`);
300
+ console.log(` 原因:${r.reason}`);
301
+ console.log();
302
+ });
303
+ console.log(sep + '\n');
304
+ }
305
+ analyze().catch((e) => {
306
+ console.error(e instanceof Error ? e.message : e);
307
+ process.exit(1);
308
+ });