@lorrylurui/code-intelligence-mcp 2.0.7 → 2.0.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,296 @@
1
+ /**
2
+ * eval-recommendation-cli.ts — 推荐质量离线评测 CLI
3
+ *
4
+ * 用法:
5
+ * npx tsx src/cli/eval-recommendation-cli.ts [--query-set offline_eval/query_set.jsonl] [--limit 10] [--output offline_eval/results/]
6
+ *
7
+ * 输出:
8
+ * - stdout: 评测摘要(Recall@10 / Recall@50 / MRR@10 / nDCG@10)
9
+ * - results/<date>.jsonl: 每条 query 的详细结果 + 失败分类
10
+ */
11
+ import * as fs from 'node:fs';
12
+ import * as path from 'node:path';
13
+ import * as readline from 'node:readline';
14
+ import { RecommendationService } from '../services/recommendationService.js';
15
+ import { SymbolRepository } from '../repositories/symbolRepository.js';
16
+ import { classifySymbolFailure } from '../types/evalTrace.js';
17
+ // ─── CLI 参数 ────────────────────────────────────────────────────────────────
18
+ const args = process.argv.slice(2);
19
+ function getArg(flag, fallback) {
20
+ const idx = args.indexOf(flag);
21
+ return idx !== -1 && args[idx + 1] ? args[idx + 1] : fallback;
22
+ }
23
+ const QUERY_SET_PATH = getArg('--query-set', 'offline_eval/query_set.jsonl');
24
+ const OUTPUT_DIR = getArg('--output', 'offline_eval/results');
25
+ const TOP_K_MAIN = Number(getArg('--limit', '10')); // Recall@K_MAIN / MRR@K / nDCG@K
26
+ const TOP_K_WIDE = 50; // Recall@50(宽口径)
27
+ // ─── 指标计算 ─────────────────────────────────────────────────────────────────
28
+ /**
29
+ * 覆盖率 Recall@K:前 K 条结果中命中的相关条目占全部相关条目的比例。
30
+ * 衡量「应该找到的有多少被找到了」,与排名顺序无关。
31
+ * 负例(expected 全为 rel=0)视为完全命中,返回 1。
32
+ */
33
+ function recallAtK(returnedNames, expected, k) {
34
+ const relevant = expected.filter((e) => e.rel >= 1);
35
+ if (relevant.length === 0)
36
+ return 1;
37
+ const topK = returnedNames.slice(0, k);
38
+ const hits = relevant.filter((e) => topK.includes(e.name));
39
+ return hits.length / relevant.length;
40
+ }
41
+ /**
42
+ * 倒数排名均值 MRR@K(Mean Reciprocal Rank):第一个相关结果出现在第 r 位时得分为 1/r。
43
+ * 衡量「最佳结果排多靠前」;未命中则返回 0。
44
+ */
45
+ function mrrAtK(returnedNames, expected, k) {
46
+ const relevantNames = new Set(expected.filter((e) => e.rel >= 1).map((e) => e.name));
47
+ const topK = returnedNames.slice(0, k);
48
+ for (let i = 0; i < topK.length; i++) {
49
+ if (relevantNames.has(topK[i]))
50
+ return 1 / (i + 1);
51
+ }
52
+ return 0;
53
+ }
54
+ /**
55
+ * 归一化折损累积增益 nDCG@K(Normalized Discounted Cumulative Gain):综合考虑相关度分级(rel 0/1/2)
56
+ * 与排名位置的加权得分,再除以理想排序下的最大得分做归一化。
57
+ * 越靠前、相关度越高的结果得分越高;完全理想排序时返回 1。
58
+ */
59
+ function ndcgAtK(returnedNames, expected, k) {
60
+ const relMap = new Map(expected.map((e) => [e.name, e.rel]));
61
+ const topK = returnedNames.slice(0, k);
62
+ const dcg = topK.reduce((sum, name, idx) => {
63
+ const rel = relMap.get(name) ?? 0;
64
+ return sum + (Math.pow(2, rel) - 1) / Math.log2(idx + 2);
65
+ }, 0);
66
+ const idealRels = expected
67
+ .map((e) => e.rel)
68
+ .sort((a, b) => b - a)
69
+ .slice(0, k);
70
+ const idcg = idealRels.reduce((sum, rel, idx) => {
71
+ return sum + (Math.pow(2, rel) - 1) / Math.log2(idx + 2);
72
+ }, 0);
73
+ return idcg === 0 ? 1 : dcg / idcg;
74
+ }
75
+ // ─── 失败分类(无 ID 时按名称降级处理) ─────────────────────────────────────
76
+ function classifyFailuresFromTrace(expected, returnedNames, evalTrace, idByName) {
77
+ const relevant = expected.filter((e) => e.rel >= 1);
78
+ const failures = [];
79
+ for (const exp of relevant) {
80
+ if (returnedNames.includes(exp.name))
81
+ continue;
82
+ const id = idByName.get(exp.name);
83
+ if (evalTrace !== undefined && id !== undefined) {
84
+ const failType = classifySymbolFailure(id, evalTrace);
85
+ if (failType !== 'found') {
86
+ failures.push({
87
+ name: exp.name,
88
+ expectedPath: exp.path,
89
+ type: failType,
90
+ });
91
+ }
92
+ }
93
+ else {
94
+ // DB 中无此 symbol,降级为 no_semantic_recall
95
+ failures.push({
96
+ name: exp.name,
97
+ expectedPath: exp.path,
98
+ type: 'no_semantic_recall',
99
+ });
100
+ }
101
+ }
102
+ return failures;
103
+ }
104
+ // ─── ID 解析(从返回结果中建立 name→id 映射) ────────────────────────────────
105
+ function buildIdMapFromResult(recommended, alternatives) {
106
+ const map = new Map();
107
+ if (recommended)
108
+ map.set(recommended.name, recommended.id);
109
+ alternatives.forEach((a) => map.set(a.name, a.id));
110
+ return map;
111
+ }
112
+ // ─── 汇总统计 ─────────────────────────────────────────────────────────────────
113
+ function avg(nums) {
114
+ if (nums.length === 0)
115
+ return 0;
116
+ return nums.reduce((s, n) => s + n, 0) / nums.length;
117
+ }
118
+ function formatPct(n) {
119
+ return (n * 100).toFixed(1) + '%';
120
+ }
121
+ function printSummary(results, kMain, baseline) {
122
+ const positive = results.filter((r) => !r.isNegativeSample);
123
+ const negative = results.filter((r) => r.isNegativeSample);
124
+ const recallMain = avg(positive.map((r) => r.recallMain ?? 0));
125
+ const recall50 = avg(positive.map((r) => r.recall50 ?? 0));
126
+ const mrr = avg(positive.map((r) => r.mrrMain ?? 0));
127
+ const ndcg = avg(positive.map((r) => r.ndcgMain ?? 0));
128
+ const top1Acc = positive.filter((r) => r.top1Correct === true).length /
129
+ (positive.length || 1);
130
+ const fpRate = negative.filter((r) => r.falsePositive).length / (negative.length || 1);
131
+ const diff = (metric, val) => {
132
+ if (!baseline || !(metric in baseline))
133
+ return '';
134
+ const delta = val - baseline[metric];
135
+ return delta >= 0
136
+ ? ` (+${formatPct(delta)})`
137
+ : ` (${formatPct(delta)})`;
138
+ };
139
+ console.log('\n' + '='.repeat(60));
140
+ console.log(`=== Eval Report ${new Date().toISOString().slice(0, 10)} ===`);
141
+ console.log('='.repeat(60));
142
+ console.log(`Queries total: ${results.length} (positive: ${positive.length}, negative: ${negative.length})`);
143
+ console.log('');
144
+ console.log(`Recall@${kMain}: ${formatPct(recallMain)}${diff('recallMain', recallMain)}`);
145
+ console.log(`Recall@50: ${formatPct(recall50)}${diff('recall50', recall50)}`);
146
+ console.log(`MRR@${kMain}: ${formatPct(mrr)}${diff('mrr', mrr)}`);
147
+ console.log(`nDCG@${kMain}: ${formatPct(ndcg)}${diff('ndcg', ndcg)}`);
148
+ console.log(`Top1 Acc: ${formatPct(top1Acc)}${diff('top1Acc', top1Acc)}`);
149
+ console.log(`False Pos: ${formatPct(fpRate)} (negative samples incorrectly returned results)`);
150
+ console.log('');
151
+ // ── Failure breakdown ──
152
+ const allFailures = positive.flatMap((r) => r.failures);
153
+ const failureCounts = {
154
+ no_semantic_recall: 0,
155
+ reusability_filtered: 0,
156
+ structure_filtered: 0,
157
+ ranked_below_topk: 0,
158
+ quality_gate_rejected: 0,
159
+ found: 0,
160
+ };
161
+ for (const f of allFailures)
162
+ failureCounts[f.type]++;
163
+ const totalExpected = positive.reduce((s, r) => s + r.failures.length + (r.recallMain === 1 ? 1 : 0), 0);
164
+ console.log('--- Failure Breakdown ---');
165
+ const failureActionHints = {
166
+ no_semantic_recall: '→ 调大 SYMBOL_TOP_K / 增加 queryVariants 数量',
167
+ reusability_filtered: '→ 检查 isReusableCandidate 路径规则是否误杀',
168
+ structure_filtered: '→ 检查 category 过滤条件',
169
+ ranked_below_topk: '→ 调整 RANK_WEIGHTS / LITERAL_MATCH_PRIORITY_BOOST',
170
+ quality_gate_rejected: '→ 调低 MIN_RECOMMENDATION_SCORE 阈值',
171
+ found: '',
172
+ };
173
+ for (const [type, count] of Object.entries(failureCounts)) {
174
+ if (type === 'found')
175
+ continue;
176
+ const pct = totalExpected > 0
177
+ ? ((count / totalExpected) * 100).toFixed(1)
178
+ : '0.0';
179
+ const hint = failureActionHints[type];
180
+ console.log(` ${type.padEnd(26)} ${String(count).padStart(3)} (${pct}%) ${hint}`);
181
+ }
182
+ console.log('='.repeat(60) + '\n');
183
+ }
184
+ // ─── 主流程 ───────────────────────────────────────────────────────────────────
185
+ async function loadQuerySet(filePath) {
186
+ const cases = [];
187
+ const rl = readline.createInterface({
188
+ input: fs.createReadStream(filePath),
189
+ crlfDelay: Infinity,
190
+ });
191
+ for await (const line of rl) {
192
+ const trimmed = line.trim();
193
+ if (!trimmed)
194
+ continue;
195
+ cases.push(JSON.parse(trimmed));
196
+ }
197
+ return cases;
198
+ }
199
+ async function runEval() {
200
+ console.log(`Loading query set: ${QUERY_SET_PATH}`);
201
+ const cases = await loadQuerySet(QUERY_SET_PATH);
202
+ console.log(`Loaded ${cases.length} queries. Running eval with limit=${TOP_K_MAIN}/${TOP_K_WIDE}...\n`);
203
+ const repository = new SymbolRepository();
204
+ const service = new RecommendationService(repository);
205
+ const results = [];
206
+ for (const queryCase of cases) {
207
+ const isNegative = queryCase.expected.length === 0;
208
+ // Run with wide limit (Recall@50)
209
+ const wideResult = await service.recommendComponent({
210
+ ...queryCase.input,
211
+ limit: TOP_K_WIDE,
212
+ evalMode: true,
213
+ });
214
+ const wideNames = [
215
+ ...(wideResult.recommended ? [wideResult.recommended.name] : []),
216
+ ...wideResult.alternatives.map((a) => a.name),
217
+ ];
218
+ // Run with main limit for MRR/nDCG (or reuse wide result slice)
219
+ const mainNames = wideNames.slice(0, TOP_K_MAIN);
220
+ // Build id map from returned results
221
+ const allReturned = [
222
+ ...(wideResult.recommended ? [wideResult.recommended] : []),
223
+ ...wideResult.alternatives,
224
+ ];
225
+ const idByName = buildIdMapFromResult(wideResult.recommended, wideResult.alternatives);
226
+ // Metrics (skip for negative samples)
227
+ const recallMain = isNegative
228
+ ? null
229
+ : recallAtK(mainNames, queryCase.expected, TOP_K_MAIN);
230
+ const recall50 = isNegative
231
+ ? null
232
+ : recallAtK(wideNames, queryCase.expected, TOP_K_WIDE);
233
+ const mrrMain = isNegative
234
+ ? null
235
+ : mrrAtK(mainNames, queryCase.expected, TOP_K_MAIN);
236
+ const ndcgMain = isNegative
237
+ ? null
238
+ : ndcgAtK(mainNames, queryCase.expected, TOP_K_MAIN);
239
+ const top1Correct = isNegative
240
+ ? null
241
+ : queryCase.expected.some((e) => e.rel === 2 && wideResult.recommended?.name === e.name);
242
+ // Failure classification
243
+ const failures = isNegative
244
+ ? []
245
+ : classifyFailuresFromTrace(queryCase.expected, wideNames, wideResult.evalTrace, idByName);
246
+ const falsePositive = isNegative && allReturned.length > 0;
247
+ const qr = {
248
+ queryId: queryCase.id,
249
+ query: queryCase.input.query,
250
+ tags: queryCase.tags,
251
+ recallMain,
252
+ recall50,
253
+ mrrMain,
254
+ ndcgMain,
255
+ top1Correct,
256
+ returnedNames: mainNames,
257
+ failures,
258
+ isNegativeSample: isNegative,
259
+ falsePositive,
260
+ };
261
+ results.push(qr);
262
+ // Progress
263
+ const status = isNegative
264
+ ? falsePositive
265
+ ? '✗ FP'
266
+ : '✓ TN'
267
+ : recallMain === 1
268
+ ? `✓ R@${TOP_K_MAIN}=1.0`
269
+ : `✗ R@${TOP_K_MAIN}=${(recallMain ?? 0).toFixed(2)}`;
270
+ console.log(` [${queryCase.id}] ${queryCase.input.query.slice(0, 40).padEnd(40)} ${status}`);
271
+ }
272
+ // Print summary
273
+ printSummary(results, TOP_K_MAIN, null);
274
+ // Write JSONL report
275
+ if (OUTPUT_DIR) {
276
+ fs.mkdirSync(OUTPUT_DIR, { recursive: true });
277
+ const dateStr = new Date().toISOString().slice(0, 10);
278
+ const outPath = path.join(OUTPUT_DIR, `${dateStr}.jsonl`);
279
+ const lines = results.map((r) => JSON.stringify(r)).join('\n');
280
+ fs.writeFileSync(outPath, lines + '\n', 'utf8');
281
+ console.log(`Report written to: ${outPath}`);
282
+ }
283
+ // Exit with non-zero if any positive query has recall=0
284
+ const zeroRecall = results.filter((r) => !r.isNegativeSample && r.recallMain === 0);
285
+ if (zeroRecall.length > 0) {
286
+ console.log(`\nWARN: ${zeroRecall.length} positive queries have Recall@${TOP_K_MAIN}=0:`);
287
+ for (const r of zeroRecall) {
288
+ console.log(` [${r.queryId}] ${r.query}`);
289
+ }
290
+ process.exit(1);
291
+ }
292
+ }
293
+ runEval().catch((err) => {
294
+ console.error('Eval failed:', err);
295
+ process.exit(1);
296
+ });
@@ -0,0 +1,95 @@
1
+ /**
2
+ * queryRewrite.ts — query 预处理配置:噪音词清洗 + 同义词/别名扩展。
3
+ *
4
+ * 用于 buildQueryVariants,通过消除口语化干扰词和补充同义词变体,
5
+ * 提升语义检索的 recall(尤其是中英混用、别名查询场景)。
6
+ */
7
+ // ─── 噪音词清洗正则(从 recommendationService 迁移) ────────────────────────
8
+ /**
9
+ * 依次对原始 query 做替换,去掉无实际语义的口语词。
10
+ * 注意:每个 pattern 带 /g 标志,替换后产生多余空格由调用方合并。
11
+ */
12
+ export const NOISE_PATTERNS = [
13
+ /^帮我找(找)?(一个|一下)?/g,
14
+ /^有没有(现成的)?/g,
15
+ /^请推荐(一个|一下)?/g,
16
+ /可复用/g,
17
+ /现成的/g,
18
+ /封装好的/g,
19
+ /(组件|函数|hook|工具|util)(实现)?/gi,
20
+ ];
21
+ // ─── 同义词/别名字典 ─────────────────────────────────────────────────────────
22
+ /**
23
+ * 每个 key 为一组同义词的首选英文词根,value 为同一概念的其他表达形式(中文、缩写、别名)。
24
+ *
25
+ * 匹配规则:任意一项(key 或 value 中的词)出现在 query 里即视为命中,
26
+ * 然后取组内第一个当前 query 中未出现的词作为替代词,生成同义扩展变体。
27
+ *
28
+ * 新增规则:key 使用最短、最通用的英文词根;中文词放在 value 数组最前。
29
+ */
30
+ export const SYNONYM_MAP = {
31
+ // 表单输入
32
+ input: ['输入框', '输入', 'textfield', 'textinput'],
33
+ textarea: ['文本域', '多行输入', 'multiline'],
34
+ select: ['选择器', '下拉框', '下拉', 'dropdown'],
35
+ checkbox: ['复选框', '勾选'],
36
+ radio: ['单选框', '单选'],
37
+ // 弹层
38
+ dialog: ['弹窗', '弹框', '对话框', 'modal', 'popup'],
39
+ tooltip: ['提示', '气泡提示', '悬浮提示', 'popover'],
40
+ drawer: ['抽屉', '侧边栏', 'sidebar'],
41
+ // 反馈
42
+ loading: ['加载', '加载中', 'spinner'],
43
+ skeleton: ['骨架屏', '占位图', 'placeholder'],
44
+ notification: ['通知', '消息', '提醒', 'toast'],
45
+ alert: ['警告', '警示', '提示框'],
46
+ // 数据展示
47
+ table: ['表格'],
48
+ list: ['列表'],
49
+ pagination: ['分页', '翻页', 'pager'],
50
+ tabs: ['标签页', '选项卡', 'tab'],
51
+ badge: ['徽标', '角标', '标记'],
52
+ tag: ['标签', 'chip'],
53
+ // 导航
54
+ navigation: ['导航', 'nav'],
55
+ menu: ['菜单'],
56
+ breadcrumb: ['面包屑'],
57
+ // 媒体/布局
58
+ carousel: ['轮播', '走马灯', 'slider', 'swiper'],
59
+ upload: ['上传', '文件上传', 'file upload'],
60
+ image: ['图片', '图像', 'img'],
61
+ // 常用 Hook
62
+ debounce: ['防抖', '去抖', 'usedebounce'],
63
+ throttle: ['节流', 'usethrottle'],
64
+ // 搜索
65
+ search: ['搜索', '查询', 'filter'],
66
+ // 按钮
67
+ button: ['按钮', 'btn'],
68
+ };
69
+ // ─── 同义词扩展函数 ──────────────────────────────────────────────────────────
70
+ /**
71
+ * 在 query 中查找 SYNONYM_MAP 里命中的词,替换成同组内一个当前未出现的词,
72
+ * 生成同义扩展变体。若未命中任何同义词则返回 null。
73
+ *
74
+ * @example
75
+ * buildSynonymVariant('弹窗 onChange') // => 'dialog onChange'
76
+ * buildSynonymVariant('input onChange') // => '输入框 onChange'
77
+ */
78
+ export function buildSynonymVariant(query) {
79
+ const lower = query.toLowerCase();
80
+ for (const [canonical, aliases] of Object.entries(SYNONYM_MAP)) {
81
+ const allTerms = [canonical, ...aliases];
82
+ const matchedTerm = allTerms.find((t) => lower.includes(t.toLowerCase()));
83
+ if (!matchedTerm)
84
+ continue;
85
+ const substitute = allTerms.find((t) => !lower.includes(t.toLowerCase()) && t !== matchedTerm);
86
+ if (!substitute)
87
+ continue;
88
+ // 大小写不敏感替换
89
+ const replaced = query.replace(new RegExp(matchedTerm.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'), 'gi'), substitute);
90
+ const trimmed = replaced.replace(/\s+/g, ' ').trim();
91
+ if (trimmed && trimmed !== query)
92
+ return trimmed;
93
+ }
94
+ return null;
95
+ }
@@ -100,6 +100,10 @@ export const REQUIRED_FIELD_FALLBACK_MIN_SCORE = 0.4;
100
100
  export const LITERAL_MATCH_PRIORITY_BOOST = 0.22;
101
101
  /** 路径为 demo/example 风格时,对优先级分数扣减的值 */
102
102
  export const DEMO_PATH_PRIORITY_PENALTY = 0.18;
103
+ /** 文件名为 index.js/ts/tsx/jsx 时对优先级分数的加成(优先推荐组件目录入口文件) */
104
+ export const INDEX_FILE_PRIORITY_BOOST = 0.18;
105
+ /** 同目录中存在 index 文件时,对其他非入口子文件的优先级扣减(避免 menu.js / panel.js 等内部实现抢占推荐位) */
106
+ export const SAME_DIR_INDEX_EXISTS_PENALTY = 0.25;
103
107
  // ─── 搜索工具结果过滤 (tools/searchSymbols.ts) ───────────────────────────────
104
108
  /** 最终返回结果所需的最低综合评分 */
105
109
  export const SEARCH_SCORE_THRESHOLD = 0.45;
@@ -117,19 +117,51 @@ export function splitTextIntoChunks(content, options = {}) {
117
117
  ? sliceWithOverlap(block.text, maxChars, overlapChars)
118
118
  : [block.text];
119
119
  for (const part of oversizedParts) {
120
- // 3. overlap 滑动窗口:每当累计块接近目标大小或即将超出上限时,先把当前块收敛成一个 chunk,再开始下一块。每个新块的开头会带上前一个块末尾 overlapChars 长度的文本,减少边界信息丢失。
121
- const additionLength = currentLength === 0 ? part.length : part.length + 2;
120
+ // ── 3. 滑动窗口 + overlap ─────────────────────────────────────────
121
+ // 目标:把 parts 依次合并到 currentBlocks,直到"该收了"再收敛成一个 chunk。
122
+ // 收敛后 finalizeChunk 会把末尾 overlapChars 个字符带入下一块,减少边界信息丢失。
123
+ //
124
+ // 执行示例(targetChars=20, maxChars=30, overlapChars=5):
125
+ //
126
+ // part="Hello world"(11) currentLength=0 → 直接 push
127
+ // currentBlocks=["Hello world"] currentLength=11
128
+ //
129
+ // part="Foo bar baz"(11) additionLength=11+2=13 currentLength+13=24 ≤ 30,未达目标 → 直接 push
130
+ // currentBlocks=["Hello world","Foo bar baz"] currentLength=24
131
+ //
132
+ // part="A long sentence"(15) additionLength=15+2=17 currentLength+17=41 > 30 → wouldOverflowMax=true
133
+ // → finalizeChunk: chunks=["Hello world\n\nFoo bar baz"]
134
+ // overlap="r baz"(末5字符) currentBlocks=["r baz"] currentLength=5
135
+ // → push "A long sentence"
136
+ // currentBlocks=["r baz","A long sentence"] currentLength=5+2+15=22
137
+ //
138
+ // 最终 finalizeChunk(overlap=0): chunks 追加 "r baz\n\nA long sentence"
139
+ // ─────────────────────────────────────────────────────────────────
140
+ // SEP=2 对应 blocks.join('\n\n') 中每条边界的 '\n\n' 长度;
141
+ // 首个 block 无分隔符,所以 currentLength===0 时不加。
142
+ const SEP = 2;
143
+ const additionLength = currentLength === 0 ? part.length : SEP + part.length;
144
+ // 两种情况需要先收敛当前 chunk:
145
+ // 1. wouldOverflowMax:加入本 part 后超出硬上限,被动截断;
146
+ // 2. reachedTarget :当前已达目标大小,主动分块,保持粒度均匀。
122
147
  const wouldOverflowMax = currentLength > 0 && currentLength + additionLength > maxChars;
123
148
  const reachedTarget = currentLength >= targetChars;
124
- // 已接近目标大小或即将超出上限时,先收敛当前 chunk,再开始下一块。
125
149
  if (wouldOverflowMax || reachedTarget) {
150
+ // finalizeChunk 写入 chunks,并把末尾 overlap 文本返回作为新 currentBlocks 起点。
126
151
  currentBlocks = finalizeChunk(chunks, currentBlocks, overlapChars);
152
+ // overlap 文本长度不固定,必须重算(不能增量推导)。
127
153
  currentLength = currentBlocks.join('\n\n').length;
128
154
  }
129
155
  currentBlocks.push(part);
130
- currentLength = currentBlocks.join('\n\n').length;
156
+ // flush currentBlocks 可能含 overlap(length ≥ 1),也可能为空(length === 0);
157
+ // 增量计算避免每次重新 join 整个数组。
158
+ currentLength =
159
+ currentLength === 0
160
+ ? part.length
161
+ : currentLength + SEP + part.length;
131
162
  }
132
163
  }
164
+ // 收尾兜底,确保剩余内容不丢失
133
165
  finalizeChunk(chunks, currentBlocks, 0);
134
166
  return chunks;
135
167
  }
@@ -1,29 +1,22 @@
1
1
  import { env } from '../config/env.js';
2
- import { getAllTableSQLs } from '../db/schema.js';
3
2
  import { SYMBOL_STATUS } from '../config/symbolStatus.js';
4
3
  /**
5
4
  * 依赖表上 `(path, name)` 唯一键:新行插入,已存在则更新类型/描述/内容与 meta;**不**修改 `usage_count`。
5
+ * 事务与连接生命周期由调用方管理。
6
6
  * @param rows 来自 `indexProject`;空数组时立即返回,不开启事务。
7
7
  * @param embeddings 与 `rows` 等长;某项为 `null` 表示本行不更新已有 `embedding`(新行则写入 NULL)。
8
8
  * - 有值 → status 置为 online(2)
9
9
  * - null → 新行写 pending(1),已有行保持原 status
10
10
  */
11
- export async function upsertSymbols(pool, rows, embeddings) {
11
+ export async function upsertSymbols(client, rows, embeddings) {
12
12
  if (rows.length === 0)
13
13
  return;
14
14
  if (embeddings && embeddings.length !== rows.length) {
15
15
  throw new Error('upsertSymbols: embeddings length must match rows');
16
16
  }
17
17
  const actor = process.env.GITHUB_USERNAME?.trim() || 'system';
18
- const client = await pool.connect();
19
- try {
20
- // 确保 extension + 表 + 基础索引存在
21
- for (const sql of getAllTableSQLs()) {
22
- await client.query(sql);
23
- }
24
- await client.query('BEGIN');
25
- const t = env.symbolsTable;
26
- const sql = `
18
+ const t = env.symbolsTable;
19
+ const sql = `
27
20
  INSERT INTO ${t}
28
21
  (name, type, category, path, description, content, meta,
29
22
  insert_user, updated_user, embedding, semantic_hash, file_hash, status)
@@ -36,48 +29,40 @@ export async function upsertSymbols(pool, rows, embeddings) {
36
29
  meta = EXCLUDED.meta,
37
30
  updated_user = EXCLUDED.updated_user,
38
31
  embedding = CASE
39
- WHEN EXCLUDED.embedding IS NOT NULL THEN EXCLUDED.embedding
40
- WHEN EXCLUDED.semantic_hash != ${t}.semantic_hash THEN NULL
41
- ELSE ${t}.embedding
32
+ WHEN EXCLUDED.embedding IS NOT NULL THEN EXCLUDED.embedding -- 本次带了新向量,直接使用
33
+ WHEN EXCLUDED.semantic_hash != ${t}.semantic_hash THEN NULL -- 结构变了,旧向量作废,等重算
34
+ ELSE ${t}.embedding -- 结构未变,复用旧向量
42
35
  END,
43
36
  semantic_hash = EXCLUDED.semantic_hash,
44
37
  file_hash = EXCLUDED.file_hash,
45
38
  status = CASE
46
- WHEN EXCLUDED.embedding IS NOT NULL THEN ${SYMBOL_STATUS.ONLINE}
47
- WHEN EXCLUDED.semantic_hash != ${t}.semantic_hash THEN ${SYMBOL_STATUS.PENDING}
48
- ELSE ${t}.status
39
+ WHEN EXCLUDED.embedding IS NOT NULL THEN ${SYMBOL_STATUS.ONLINE} -- 本次带了新向量 → 直接 online
40
+ WHEN EXCLUDED.semantic_hash != ${t}.semantic_hash THEN ${SYMBOL_STATUS.PENDING} -- 结构变了,需重新 embedding → pending
41
+ WHEN ${t}.embedding IS NOT NULL THEN ${SYMBOL_STATUS.ONLINE} -- 结构未变且已有向量(含 offline 恢复)→ online
42
+ ELSE ${SYMBOL_STATUS.PENDING} -- 结构未变但无向量(首次 or 之前失败)→ pending
49
43
  END,
50
44
  updated_at = NOW()
51
45
  `;
52
- for (let i = 0; i < rows.length; i++) {
53
- const r = rows[i];
54
- const emb = embeddings?.[i];
55
- // pgvector 接受 "[x1,x2,...]" 格式字符串
56
- const vecStr = emb != null ? `[${emb.join(',')}]` : null;
57
- const statusVal = vecStr !== null ? SYMBOL_STATUS.ONLINE : SYMBOL_STATUS.PENDING;
58
- await client.query(sql, [
59
- r.name,
60
- r.type,
61
- r.category,
62
- r.path,
63
- r.description,
64
- r.content,
65
- JSON.stringify(r.meta),
66
- actor,
67
- actor,
68
- vecStr, // $10 → cast as vector, null 时写 NULL
69
- r.semantic_hash,
70
- r.file_hash,
71
- statusVal,
72
- ]);
73
- }
74
- await client.query('COMMIT');
75
- }
76
- catch (e) {
77
- await client.query('ROLLBACK');
78
- throw e;
79
- }
80
- finally {
81
- client.release();
46
+ for (let i = 0; i < rows.length; i++) {
47
+ const r = rows[i];
48
+ const emb = embeddings?.[i];
49
+ // pgvector 接受 "[x1,x2,...]" 格式字符串
50
+ const vecStr = emb != null ? `[${emb.join(',')}]` : null;
51
+ const statusVal = vecStr !== null ? SYMBOL_STATUS.ONLINE : SYMBOL_STATUS.PENDING;
52
+ await client.query(sql, [
53
+ r.name,
54
+ r.type,
55
+ r.category,
56
+ r.path,
57
+ r.description,
58
+ r.content,
59
+ JSON.stringify(r.meta),
60
+ actor,
61
+ actor,
62
+ vecStr, // $10 → cast as vector, null 时写 NULL
63
+ r.semantic_hash,
64
+ r.file_hash,
65
+ statusVal,
66
+ ]);
82
67
  }
83
68
  }
@@ -65,7 +65,7 @@ const REUSABLE_CODE_ADVISOR_MARKDOWN = `# 可复用代码推荐
65
65
  > 输出上述模板后**等待用户在聊天框输入回复**,识别规则:
66
66
  > - 用户输入 **"1"、"采纳"、"采纳推荐"、"ok"、"好的"** 或类似确认词 → 从上方输出文本中读取 \`symbolId:<id>\` 那一行的值,立即调用 \`inc_usage\` 工具传入该 id,调用成功后回复"✓ 已记录使用,可直接集成"
67
67
  > - 用户输入 **"2"、"取消"、"不用了"** 或类似否定词 → 回复"好的,已取消",停止
68
- > - 用户输入其他内容(如追问细节)→ 正常回答,回答结束后再次展示"是否采纳"选项
68
+ > - 用户输入其他内容(如追问细节、props、最小接入方式)→ 可以继续补充说明,但回答结尾**必须再次原样展示**"是否采纳"的两个选项,不得省略
69
69
 
70
70
  无结果时:
71
71
 
@@ -85,6 +85,7 @@ const REUSABLE_CODE_ADVISOR_MARKDOWN = `# 可复用代码推荐
85
85
  > 输出上述模板后**等待用户在聊天框输入回复**,识别规则:
86
86
  > - 用户输入 **"1"、"新建"、"帮我创建"** 或类似确认词 → 进入新建流程,引导用户确认最小接口设计
87
87
  > - 用户输入 **"2"、"取消"、"不用了"** → 回复"好的,已取消",停止
88
+ > - 用户输入其他内容(如追问为何没找到、想先看候选)→ 可以继续解释,但回答结尾**必须再次原样展示**"是否采纳"的两个选项,不得省略
88
89
  `;
89
90
  export function registerReusableCodeAdvisorPrompt(server) {
90
91
  server.prompt('reusable-code-advisor', REUSABLE_CODE_ADVISOR_DESCRIPTION, {
@@ -151,7 +151,7 @@ export class ChunkRepository {
151
151
  1 - (embedding <=> $1::vector) AS similarity
152
152
  FROM ${env.chunksTable}
153
153
  WHERE embedding IS NOT NULL
154
- AND status = $2
154
+ AND status = $2::smallint
155
155
  `;
156
156
  if (opts?.path) {
157
157
  params.push(opts.path);
@@ -146,7 +146,7 @@ export class SymbolRepository {
146
146
  path ILIKE $1 OR
147
147
  meta::text ILIKE $1
148
148
  )
149
- AND status = $2
149
+ AND status = $2::smallint
150
150
  `;
151
151
  if (tokens.length) {
152
152
  const tokenClauses = tokens.map((token) => {
@@ -165,7 +165,7 @@ export class SymbolRepository {
165
165
  meta::text ILIKE $1 OR
166
166
  (${tokenClauses.join(' OR ')})
167
167
  )
168
- AND status = $2
168
+ AND status = $2::smallint
169
169
  `;
170
170
  }
171
171
  if (type) {
@@ -212,7 +212,7 @@ export class SymbolRepository {
212
212
  1 - (embedding <=> $1::vector) AS similarity
213
213
  FROM ${env.symbolsTable}
214
214
  WHERE embedding IS NOT NULL
215
- AND status = $2
215
+ AND status = $2::smallint
216
216
  `;
217
217
  if (opts?.type) {
218
218
  params.push(opts.type);
@@ -248,8 +248,9 @@ export class SymbolRepository {
248
248
  SELECT id, name, type, category, path, description, content, meta::text AS meta, usage_count, created_at
249
249
  FROM ${env.symbolsTable}
250
250
  WHERE name = $1
251
+ AND status = $2::smallint
251
252
  LIMIT 1
252
- `, [name]);
253
+ `, [name, SEARCHABLE_STATUS]);
253
254
  console.error('[code-intelligence-mcp] repository.getByName.db table=%s rows=%s', env.symbolsTable, String(rows.length));
254
255
  if (rows.length === 0) {
255
256
  return null;
@@ -310,8 +311,9 @@ export class SymbolRepository {
310
311
  let sql = `
311
312
  SELECT id, name, type, category, path, description, content, meta::text AS meta, usage_count, created_at
312
313
  FROM ${env.symbolsTable}
313
- WHERE 1 = 1
314
+ WHERE status = $1::smallint
314
315
  `;
316
+ params.push(SEARCHABLE_STATUS);
315
317
  if (type) {
316
318
  params.push(type);
317
319
  sql += ` AND type = $${params.length}`;
@@ -15,6 +15,7 @@ import { env } from '../config/env.js';
15
15
  let _queue = null;
16
16
  let _connection = null;
17
17
  function getQueue() {
18
+ // worker3 往 Redis 写 job
18
19
  if (!_queue) {
19
20
  _connection = new Redis(env.redisUrl, {
20
21
  maxRetriesPerRequest: null, // BullMQ required