@lorrylurui/code-intelligence-mcp 2.0.7 → 2.0.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -28,7 +28,8 @@
28
28
  * ──────────────────────────────────────────────────────────────────────────────
29
29
  */
30
30
  import { rankSemanticHits, rankSymbols } from './ranking.js';
31
- import { DEMO_PATH_PRIORITY_PENALTY, LITERAL_MATCH_PRIORITY_BOOST, MIN_LITERAL_MATCH_SCORE, MIN_RECOMMENDATION_SCORE, MIN_SEMANTIC_TEXT_MATCH_SCORE, REQUIRED_FIELD_FALLBACK_MIN_SCORE, } from '../config/tuning.js';
31
+ import { DEMO_PATH_PRIORITY_PENALTY, INDEX_FILE_PRIORITY_BOOST, LITERAL_MATCH_PRIORITY_BOOST, MIN_LITERAL_MATCH_SCORE, MIN_RECOMMENDATION_SCORE, MIN_SEMANTIC_TEXT_MATCH_SCORE, REQUIRED_FIELD_FALLBACK_MIN_SCORE, SAME_DIR_INDEX_EXISTS_PENALTY, } from '../config/tuning.js';
32
+ import { NOISE_PATTERNS, buildSynonymVariant } from '../config/queryRewrite.js';
32
33
  /** 跳过原因标识 */
33
34
  const SKIPPED_REASON = {
34
35
  NO_COMBINED: 'no_combined',
@@ -50,8 +51,8 @@ const RECOMMENDATION_MESSAGE = {
50
51
  };
51
52
  /** 详情补查的 top-k 条数 */
52
53
  const ENRICH_TOP_K = 3;
53
- /** 最多取查询变体数量 */
54
- const MAX_QUERY_VARIANTS = 2;
54
+ /** 最多取查询变体数量(原始 + 清洗 + 同义词扩展) */
55
+ const MAX_QUERY_VARIANTS = 3;
55
56
  /** 结构/语义搜索 limit 倍数 */
56
57
  const STRUCTURE_LIMIT_MULTIPLIER = 4;
57
58
  /** 结构/语义搜索 limit 最小值 */
@@ -61,31 +62,33 @@ const DEFAULT_KEYWORD_SIMILARITY = 0.55;
61
62
  function uniqueStrings(values = []) {
62
63
  return [...new Set(values.map((value) => value.trim()).filter(Boolean))];
63
64
  }
64
- const QUERY_REWRITE_PATTERNS = [
65
- /^帮我找(找)?(一个|一下)?/g,
66
- /^有没有(现成的)?/g,
67
- /^请推荐(一个|一下)?/g,
68
- /可复用/g,
69
- /现成的/g,
70
- /封装好的/g,
71
- /(组件|函数|hook|工具|util)(实现)?/gi,
72
- ];
73
65
  /**
74
- * 对原始查询进行清洗和变体生成,去掉无意义的词,提炼更核心的查询内容
66
+ * 对原始查询进行清洗和变体生成:
67
+ * 1. 噪音词清洗(去掉口语化前缀、无意义词)
68
+ * 2. 同义词扩展(中英互转、别名替换)
69
+ * 生成最多 MAX_QUERY_VARIANTS 个去重变体,按从精确到宽泛排序。
75
70
  */
76
71
  function buildQueryVariants(rawQuery) {
77
72
  const base = rawQuery.trim();
78
73
  if (!base)
79
74
  return [];
80
- let rewritten = base;
81
- for (const pattern of QUERY_REWRITE_PATTERNS) {
82
- rewritten = rewritten.replace(pattern, ' ');
83
- }
84
- rewritten = rewritten.replace(/\s+/g, ' ').trim();
85
- if (!rewritten || rewritten === base) {
86
- return [base];
75
+ // Step 1: 噪音词清洗
76
+ let cleaned = base;
77
+ for (const pattern of NOISE_PATTERNS) {
78
+ cleaned = cleaned.replace(pattern, ' ');
87
79
  }
88
- return uniqueStrings([base, rewritten]);
80
+ cleaned = cleaned.replace(/\s+/g, ' ').trim();
81
+ if (!cleaned)
82
+ cleaned = base;
83
+ // Step 2: 同义词扩展(基于清洗后的 query,减少噪音干扰匹配)
84
+ const synonymVariant = buildSynonymVariant(cleaned);
85
+ // 候选:原始 → 清洗后(若不同)→ 同义词扩展(若不同)
86
+ const candidates = [
87
+ base,
88
+ cleaned,
89
+ ...(synonymVariant ? [synonymVariant] : []),
90
+ ];
91
+ return uniqueStrings(candidates);
89
92
  }
90
93
  function normalizeToken(value) {
91
94
  return value.trim().toLowerCase();
@@ -205,6 +208,14 @@ function isDemoLikePath(path, strict = false) {
205
208
  : DEMO_LIKE_PATH_SEGMENTS_SOFT;
206
209
  return segments.some((segment) => normalizedPath.includes(segment));
207
210
  }
211
+ /**
212
+ * 判断文件是否为组件目录入口文件(index.js / index.ts / index.tsx / index.jsx)。
213
+ * 入口文件是组件的公共 API,应优先于内部子文件被推荐。
214
+ */
215
+ function isIndexFile(filePath) {
216
+ const basename = filePath.split('/').pop()?.toLowerCase() ?? '';
217
+ return /^index\.(js|ts|tsx|jsx)$/.test(basename);
218
+ }
208
219
  /**
209
220
  * 判断是否为可复用候选,过滤掉明显的测试/示例代码。虽然有可能误伤一些真实组件,但优先保证推荐结果的实用性和专业度。
210
221
  * @param symbol 要判断的代码符号
@@ -295,6 +306,10 @@ function computeRecommendationPriority(item, query) {
295
306
  score += LITERAL_MATCH_PRIORITY_BOOST;
296
307
  notes.push('名称或文件名命中查询');
297
308
  }
309
+ if (isIndexFile(path)) {
310
+ score += INDEX_FILE_PRIORITY_BOOST;
311
+ notes.push('组件目录入口文件优先');
312
+ }
298
313
  if (isDemoLikePath(path)) {
299
314
  score -= DEMO_PATH_PRIORITY_PENALTY;
300
315
  notes.push('示例工程路径降权');
@@ -306,6 +321,49 @@ function computeRecommendationPriority(item, query) {
306
321
  : item.reason.summary,
307
322
  };
308
323
  }
324
+ /**
325
+ * 同目录 index 文件降权:当结果集中某目录已有 index 文件时,对该目录内其他子文件扭扣分,
326
+ * 解决 index.js 因内容稀疏(仅有 re-export)导致 embedding 分低而被内部子文件抑制的问题。
327
+ */
328
+ function applyDirectoryIndexPenalty(entries) {
329
+ // 找出结果集中哪些目录已有 index 文件
330
+ const dirsWithIndex = new Set();
331
+ for (const entry of entries) {
332
+ const p = entry.item.symbol.path;
333
+ if (isIndexFile(p)) {
334
+ const dir = p.includes('/')
335
+ ? p.substring(0, p.lastIndexOf('/'))
336
+ : '';
337
+ dirsWithIndex.add(dir);
338
+ }
339
+ }
340
+ if (dirsWithIndex.size === 0)
341
+ return entries;
342
+ // 对同目录中的非入口文件手动扣分
343
+ return entries.map((entry) => {
344
+ const p = entry.item.symbol.path;
345
+ if (isIndexFile(p))
346
+ return entry;
347
+ const dir = p.includes('/') ? p.substring(0, p.lastIndexOf('/')) : '';
348
+ if (!dirsWithIndex.has(dir))
349
+ return entry;
350
+ const newScore = Number(Math.max(0, entry.adjustedScore - SAME_DIR_INDEX_EXISTS_PENALTY).toFixed(3));
351
+ return {
352
+ ...entry,
353
+ adjustedScore: newScore,
354
+ adjustedReason: `${entry.adjustedReason} + 同目录入口文件已命中,内部子文件降权`,
355
+ };
356
+ });
357
+ }
358
+ function accToEvalTrace(acc) {
359
+ return {
360
+ semanticIds: [...acc.semanticIds],
361
+ reusableIds: [...acc.reusableIds],
362
+ combinedIds: [...acc.combinedIds],
363
+ qualifiedIds: [...acc.qualifiedIds],
364
+ returnedIds: [...acc.returnedIds],
365
+ };
366
+ }
309
367
  export class RecommendationService {
310
368
  repository;
311
369
  constructor(repository) {
@@ -400,6 +458,15 @@ export class RecommendationService {
400
458
  let selectedQuery = null;
401
459
  let fallbackReason = null;
402
460
  const attempts = [];
461
+ const evalAcc = input.evalMode
462
+ ? {
463
+ semanticIds: new Set(),
464
+ reusableIds: new Set(),
465
+ combinedIds: new Set(),
466
+ qualifiedIds: new Set(),
467
+ returnedIds: new Set(),
468
+ }
469
+ : undefined;
403
470
  this.logSearchTypes(searchTypes);
404
471
  for (const queryVariant of queryVariants) {
405
472
  const { attempt, combined, searchResults, gathered } = await this.tryQueryVariant({
@@ -411,6 +478,7 @@ export class RecommendationService {
411
478
  structureFields,
412
479
  requiredProps,
413
480
  requiredHooks,
481
+ evalAcc,
414
482
  });
415
483
  queriedBy = gathered.queriedBy;
416
484
  if (!fallbackReason && gathered.fallbackReason) {
@@ -433,6 +501,7 @@ export class RecommendationService {
433
501
  requiredHooks,
434
502
  attempt,
435
503
  limit,
504
+ evalAcc,
436
505
  });
437
506
  lastRankedCandidates = candidates;
438
507
  if (candidates.length > 0) {
@@ -454,6 +523,7 @@ export class RecommendationService {
454
523
  attempts,
455
524
  selectedQuery,
456
525
  fallbackReason,
526
+ evalTrace: evalAcc ? accToEvalTrace(evalAcc) : undefined,
457
527
  });
458
528
  }
459
529
  this.logAttemptCheckpoint('attempt.no_candidate_after_rank', attempt);
@@ -474,6 +544,7 @@ export class RecommendationService {
474
544
  attempts,
475
545
  selectedQuery,
476
546
  fallbackReason,
547
+ evalTrace: evalAcc ? accToEvalTrace(evalAcc) : undefined,
477
548
  });
478
549
  }
479
550
  logStart(input) {
@@ -505,9 +576,12 @@ export class RecommendationService {
505
576
  console.error('[code-intelligence-mcp] recommendComponent.preprocess queryVariants=%s requiredProps=%s requiredHooks=%s structureFields=%s searchTypes=%s preferSemantic=%s limit=%s', JSON.stringify(queryVariants), JSON.stringify(requiredProps), JSON.stringify(requiredHooks), JSON.stringify(structureFields), JSON.stringify(searchTypes), String(preferSemantic), String(limit));
506
577
  return res;
507
578
  }
508
- async tryQueryVariant({ queryVariant, input, searchTypes, preferSemantic, limit, structureFields, requiredProps, requiredHooks, }) {
579
+ async tryQueryVariant({ queryVariant, input, searchTypes, preferSemantic, limit, structureFields, requiredProps, requiredHooks, evalAcc, }) {
509
580
  const gathered = await this.gatherSearchResults(queryVariant, searchTypes, preferSemantic, limit);
510
581
  const searchResults = gathered.searchResults;
582
+ if (evalAcc) {
583
+ searchResults.forEach((r) => evalAcc.semanticIds.add(r.symbol.id));
584
+ }
511
585
  const attempt = {
512
586
  query: queryVariant,
513
587
  queriedBy: gathered.queriedBy,
@@ -542,36 +616,59 @@ export class RecommendationService {
542
616
  if (reusableCandidates.length > 0) {
543
617
  combined = reusableCandidates;
544
618
  }
619
+ if (evalAcc) {
620
+ reusableCandidates.forEach((s) => evalAcc.reusableIds.add(s.id));
621
+ combined.forEach((s) => evalAcc.combinedIds.add(s.id));
622
+ }
545
623
  attempt.combinedCount = combined.length;
546
624
  return { attempt, combined, searchResults, gathered };
547
625
  }
548
- async rankAndEnrichCandidates({ combined, searchResults, queryVariant, queriedBy, requiredProps, requiredHooks, attempt, limit, }) {
626
+ async rankAndEnrichCandidates({ combined, searchResults, queryVariant, queriedBy, requiredProps, requiredHooks, attempt, limit, evalAcc, }) {
549
627
  const ranked = queriedBy === QUERIED_BY.SEMANTIC
550
628
  ? rankSemanticHits(combined.map((symbol) => ({
551
629
  symbol,
552
630
  similarity: searchResults.find((item) => item.symbol.id === symbol.id)?.similarity ?? 0.55,
553
631
  })), queryVariant)
554
632
  : rankSymbols(queryVariant, combined);
555
- const enriched = await this.enrichTopCandidatesWithDetail(ranked);
556
- const enrichedRanked = enriched.ranked;
557
- attempt.detailEnrichedCount = enriched.enrichedCount;
558
- const qualifiedRanked = enrichedRanked.filter((item) => isStrongEnoughRecommendation(item, queryVariant, queriedBy, requiredProps, requiredHooks));
559
- attempt.qualifiedCount = qualifiedRanked.length;
560
- if (qualifiedRanked.length === 0) {
561
- attempt.skippedReason = SKIPPED_REASON.NO_QUALIFIED;
562
- }
563
- const prioritizedRanked = qualifiedRanked
564
- .map((item) => {
633
+ // 优先级预排序:仅依赖 name/path,无需 meta,前置到详情补查之前。
634
+ // 目的:确保补查的 Top-K 是优先级调整后最可能命中的候选,
635
+ // 避免高语义分但字面命中弱的候选占据补查名额,遗漏字面强命中的候选。
636
+ const priorityScored = ranked.map((item) => {
565
637
  const adjusted = computeRecommendationPriority(item, queryVariant);
566
638
  return {
567
639
  item,
568
640
  adjustedScore: adjusted.score,
569
641
  adjustedReason: adjusted.reason,
570
642
  };
571
- })
572
- .sort((a, b) => b.adjustedScore - a.adjustedScore);
573
- const candidates = prioritizedRanked.map((entry) => toCandidate(entry.item.symbol, entry.adjustedScore, entry.adjustedReason, requiredProps, requiredHooks));
574
- console.error('[code-intelligence-mcp] recommendComponent.rank query=%s queriedBy=%s enriched=%s qualified=%s candidates=%s', queryVariant, queriedBy, String(enrichedRanked.length), String(qualifiedRanked.length), String(candidates.length));
643
+ });
644
+ priorityScored.sort((a, b) => b.adjustedScore - a.adjustedScore);
645
+ // 同目录 index 文件降权:对同目录非入口子文件扭扣,确保 index.js > menu.js / panel.js
646
+ const reranked = applyDirectoryIndexPenalty(priorityScored);
647
+ reranked.sort((a, b) => b.adjustedScore - a.adjustedScore);
648
+ // 对优先级预排序后的 Top-K 做详情补查(getByName 补全完整 meta)
649
+ const enriched = await this.enrichTopCandidatesWithDetail(reranked.map((e) => e.item));
650
+ attempt.detailEnrichedCount = enriched.enrichedCount;
651
+ // 将补查结果回填到 reranked,保持优先级排序
652
+ const enrichedPriorityScored = enriched.ranked.map((item, idx) => ({
653
+ item,
654
+ adjustedScore: reranked[idx]?.adjustedScore ?? item.score,
655
+ adjustedReason: reranked[idx]?.adjustedReason ?? item.reason.summary,
656
+ }));
657
+ // 质量门控:score 阈值 + requiredProps/Hooks 命中校验(依赖完整 meta,必须在补查之后)
658
+ const qualifiedRanked = enrichedPriorityScored.filter((entry) => isStrongEnoughRecommendation(entry.item, queryVariant, queriedBy, requiredProps, requiredHooks));
659
+ attempt.qualifiedCount = qualifiedRanked.length;
660
+ if (qualifiedRanked.length === 0) {
661
+ attempt.skippedReason = SKIPPED_REASON.NO_QUALIFIED;
662
+ }
663
+ if (evalAcc) {
664
+ qualifiedRanked.forEach((e) => evalAcc.qualifiedIds.add(e.item.symbol.id));
665
+ }
666
+ // 已按优先级排序,直接构建候选结果
667
+ const candidates = qualifiedRanked.map((entry) => toCandidate(entry.item.symbol, entry.adjustedScore, entry.adjustedReason, requiredProps, requiredHooks));
668
+ if (evalAcc) {
669
+ candidates.forEach((c) => evalAcc.returnedIds.add(c.id));
670
+ }
671
+ console.error('[code-intelligence-mcp] recommendComponent.rank query=%s queriedBy=%s enriched=%s qualified=%s candidates=%s', queryVariant, queriedBy, String(enrichedPriorityScored.length), String(qualifiedRanked.length), String(candidates.length));
575
672
  return candidates;
576
673
  }
577
674
  logAttemptCheckpoint(stage, attempt) {
@@ -580,7 +677,7 @@ export class RecommendationService {
580
677
  logAttemptsTrace(stage, payload) {
581
678
  console.error('[code-intelligence-mcp] %s selectedQuery=%s queriedBy=%s attempts=%s fallbackReason=%s', stage, payload.selectedQuery ?? 'none', payload.queriedBy, JSON.stringify(payload.attempts), payload.fallbackReason ?? 'none');
582
679
  }
583
- buildResult({ recommended, alternatives, queriedBy, requiredProps, requiredHooks, attempts, selectedQuery, fallbackReason, }) {
680
+ buildResult({ recommended, alternatives, queriedBy, requiredProps, requiredHooks, attempts, selectedQuery, fallbackReason, evalTrace, }) {
584
681
  return {
585
682
  recommended,
586
683
  alternatives,
@@ -598,6 +695,7 @@ export class RecommendationService {
598
695
  retryUsed: attempts.length > 1,
599
696
  fallbackReason,
600
697
  },
698
+ evalTrace,
601
699
  };
602
700
  }
603
701
  }
@@ -0,0 +1,40 @@
1
+ import { env } from '../config/env.js';
2
+ import { SYMBOL_STATUS } from '../config/symbolStatus.js';
3
+ function buildPathToSymbolNames(rows) {
4
+ const pathToNames = new Map();
5
+ for (const row of rows) {
6
+ const names = pathToNames.get(row.path) ?? new Set();
7
+ names.add(row.name);
8
+ pathToNames.set(row.path, names);
9
+ }
10
+ return new Map([...pathToNames.entries()].map(([path, names]) => [path, [...names]]));
11
+ }
12
+ async function markFileSymbolsOffline(db, path) {
13
+ await db.query(`UPDATE ${env.symbolsTable}
14
+ SET status = $1::smallint, file_hash = NULL, updated_at = NOW()
15
+ WHERE path = $2 AND status != $1::smallint`, [SYMBOL_STATUS.OFFLINE, path]);
16
+ }
17
+ /**
18
+ * 将指定文件集合中已消失的 symbol 标记为 offline。
19
+ * - `rows` 为空(整文件被删)→ 该文件所有 symbol 下线;
20
+ * - `rows` 非空 → 仅将不再出现于 `rows` 的 symbol 下线;
21
+ * - 重新出现的 symbol 状态恢复由 upsertSymbols 负责(hash 没变且有 embedding → online,否则 pending),此处不重复处理。
22
+ * forceRebuild 场景由上游先统一清空 embedding/status,此函数不负责强制重算策略。
23
+ */
24
+ export async function markRemovedSymbolsOffline(db, filePaths, rows) {
25
+ if (filePaths.length === 0)
26
+ return;
27
+ const currentNamesByPath = buildPathToSymbolNames(rows);
28
+ for (const path of filePaths) {
29
+ const currentNames = currentNamesByPath.get(path) ?? [];
30
+ if (currentNames.length > 0) {
31
+ // 当前文件中已消失的 symbol 标记为 offline;
32
+ await db.query(`UPDATE ${env.symbolsTable}
33
+ SET status = $1::smallint, file_hash = NULL, updated_at = NOW()
34
+ WHERE path = $2 AND NOT (name = ANY($3)) AND status != $1::smallint`, [SYMBOL_STATUS.OFFLINE, path, currentNames]);
35
+ continue;
36
+ }
37
+ // 没有symbol,表示所有内容都删除下线
38
+ await markFileSymbolsOffline(db, path);
39
+ }
40
+ }
@@ -9,6 +9,7 @@ import { upsertSymbols } from '../indexer/persistSymbols.js';
9
9
  import { computeFileHash } from '../indexer/tsAstNormalizer.js';
10
10
  import { getRelativePathForDisplay } from '../indexer/heuristics.js';
11
11
  import { enqueueEmbeddingBatch, closeEmbeddingQueue, } from '../services/embeddingQueue.js';
12
+ import { markRemovedSymbolsOffline } from './reconcileIndexedSymbols.js';
12
13
  import { SYMBOL_STATUS } from '../config/symbolStatus.js';
13
14
  function isCallerDebugEnabled() {
14
15
  return /^(1|true|yes|on)$/i.test(process.env.DEBUG_CALLERS ?? '');
@@ -108,15 +109,33 @@ export async function runReindex(options = {}) {
108
109
  const pendingHashes = [
109
110
  ...new Set(rows.map((r) => r.semantic_hash).filter(Boolean)),
110
111
  ];
112
+ const relPathsForIndexedFiles = filesToIndex.map((file) => getRelativePathForDisplay(projectRoot, file));
111
113
  if (!dryRun) {
112
- // forceRebuild:先清空 DB 中已有的 embedding,使 worker cache check 必然 miss
113
- if (forceRebuild && pendingHashes.length > 0) {
114
- await pool.query(`UPDATE ${env.symbolsTable}
115
- SET embedding = NULL, status = $1
116
- WHERE semantic_hash = ANY($2)`, [SYMBOL_STATUS.PENDING, pendingHashes]);
117
- console.error(`[reindex] forceRebuild: cleared embeddings for ${pendingHashes.length} semantic_hash(es)`);
114
+ const client = await pool.connect();
115
+ try {
116
+ await client.query('BEGIN');
117
+ // forceRebuild:先清空 DB 中已有的 embedding,使 worker cache check 必然 miss;
118
+ // file_hash 一并重置,确保本次重建与后续普通 reindex 都不会复用旧缓存判定。
119
+ if (forceRebuild && pendingHashes.length > 0) {
120
+ await client.query(`UPDATE ${env.symbolsTable}
121
+ SET embedding = NULL, status = $1::smallint, file_hash = NULL
122
+ WHERE semantic_hash = ANY($2)`, [SYMBOL_STATUS.PENDING, pendingHashes]);
123
+ console.error(`[reindex] forceRebuild: cleared embeddings + file_hash for ${pendingHashes.length} semantic_hash(es)`);
124
+ }
125
+ // 能复用 status=online
126
+ // 结构变了,不能复用 status=pending embedding=null
127
+ await upsertSymbols(client, rows, nullPayload);
128
+ // 处理 file内 symbol下线 或 整个file所有symbols下线
129
+ await markRemovedSymbolsOffline(client, relPathsForIndexedFiles, rows);
130
+ await client.query('COMMIT');
131
+ }
132
+ catch (error) {
133
+ await client.query('ROLLBACK');
134
+ throw error;
135
+ }
136
+ finally {
137
+ client.release();
118
138
  }
119
- await upsertSymbols(pool, rows, nullPayload);
120
139
  if (pendingHashes.length > 0) {
121
140
  await enqueueEmbeddingBatch(pendingHashes, env.symbolsTable);
122
141
  console.error(`[reindex] enqueued ${pendingHashes.length} semantic_hash(es) → worker will handle embedding asynchronously`);
@@ -0,0 +1,25 @@
1
+ /**
2
+ * evalTrace.ts — 推荐主链各阶段符号 ID 追踪,仅在 evalMode=true 时填充。
3
+ *
4
+ * 用于 eval CLI 做 per-symbol 失败分类(误杀分析),
5
+ * 判断 expected symbol 在哪个阶段丢失:
6
+ * semanticIds → reusableIds → combinedIds → qualifiedIds → returnedIds
7
+ */
8
+ /**
9
+ * 根据 EvalTrace 对单个 expected symbol 进行失败分类。
10
+ * @param symbolId DB 中的 symbol.id(需提前通过名称解析)
11
+ * @param trace 该次推荐调用的 EvalTrace
12
+ */
13
+ export function classifySymbolFailure(symbolId, trace) {
14
+ if (trace.returnedIds.includes(symbolId))
15
+ return 'found';
16
+ if (!trace.semanticIds.includes(symbolId))
17
+ return 'no_semantic_recall';
18
+ if (!trace.reusableIds.includes(symbolId))
19
+ return 'reusability_filtered';
20
+ if (!trace.combinedIds.includes(symbolId))
21
+ return 'structure_filtered';
22
+ if (!trace.qualifiedIds.includes(symbolId))
23
+ return 'ranked_below_topk';
24
+ return 'quality_gate_rejected';
25
+ }
@@ -14,6 +14,11 @@
14
14
  * 大仓分片:
15
15
  * - 直接启动多个 worker 进程(同一 Redis)即可水平扩展,BullMQ 原生分布式协调
16
16
  */
17
+ // env.redisUrl (同一个 Redis) url+name('embedding') 决定了 BullMQ 的队列,生产者和消费者通过它们读写同一个队列实现通信
18
+ // │
19
+ // ├─ Queue('embedding') → LPUSH bull:embedding:wait ... ← Producer 写
20
+ // ├─ Worker('embedding') → BRPOPLPUSH bull:embedding:wait ← Worker 消费
21
+ // └─ QueueEvents('embedding')→ SUBSCRIBE bull:embedding:events ← 监听事件
17
22
  import { Worker, QueueEvents } from 'bullmq';
18
23
  import { Redis } from 'ioredis';
19
24
  import { env } from '../config/env.js';
@@ -31,7 +36,7 @@ async function processEmbedJob(job, pool) {
31
36
  const ts = () => new Date().toISOString();
32
37
  // Step 1: 缓存命中检查 —— 相同 semantic_hash 已有 online 向量
33
38
  const { rows: cached } = await pool.query(`SELECT embedding FROM ${table}
34
- WHERE semantic_hash = $1 AND status = $2 AND embedding IS NOT NULL
39
+ WHERE semantic_hash = $1 AND status = $2::smallint AND embedding IS NOT NULL
35
40
  LIMIT 1`, [semanticHash, SYMBOL_STATUS.ONLINE]);
36
41
  let vector;
37
42
  if (cached.length > 0) {
@@ -43,8 +48,8 @@ async function processEmbedJob(job, pool) {
43
48
  : cached[0].embedding;
44
49
  // cache hit 时只需把 pending 行的向量补齐(有可能是新增的同语义符号)
45
50
  const cacheResult = await pool.query(`UPDATE ${table}
46
- SET embedding = $1::vector, status = $2
47
- WHERE semantic_hash = $3 AND status = $4`, [
51
+ SET embedding = $1::vector, status = $2::smallint
52
+ WHERE semantic_hash = $3 AND status = $4::smallint`, [
48
53
  `[${vector.join(',')}]`,
49
54
  SYMBOL_STATUS.ONLINE,
50
55
  semanticHash,
@@ -56,7 +61,7 @@ async function processEmbedJob(job, pool) {
56
61
  // Cache miss: 取一条 pending 行做 embedding
57
62
  const { rows: pending } = await pool.query(`SELECT name, type, category, path, description, content, meta
58
63
  FROM ${table}
59
- WHERE semantic_hash = $1 AND status = $2
64
+ WHERE semantic_hash = $1 AND status = $2::smallint
60
65
  LIMIT 1`, [semanticHash, SYMBOL_STATUS.PENDING]);
61
66
  if (pending.length === 0) {
62
67
  // 所有行已被并发 worker 处理,幂等退出
@@ -75,8 +80,8 @@ async function processEmbedJob(job, pool) {
75
80
  const resolvedCategory = resolvedRow.category ?? null;
76
81
  // Step 2: 批量写入 —— 覆盖所有相同 semantic_hash 的 pending 行
77
82
  const result = await pool.query(`UPDATE ${table}
78
- SET embedding = $1::vector, status = $2, category = COALESCE($3, category)
79
- WHERE semantic_hash = $4 AND status = $5`, [
83
+ SET embedding = $1::vector, status = $2::smallint, category = COALESCE($3, category)
84
+ WHERE semantic_hash = $4 AND status = $5::smallint`, [
80
85
  `[${vector.join(',')}]`,
81
86
  SYMBOL_STATUS.ONLINE,
82
87
  resolvedCategory,
@@ -91,11 +96,14 @@ async function processEmbedJob(job, pool) {
91
96
  */
92
97
  export async function startEmbeddingWorker(opts = {}) {
93
98
  const { concurrency = 5, rpmLimit = 100 } = opts;
99
+ // worker1 负责从 Redis 拉 job。
100
+ // BullMQ Worker 用它执行 BRPOPLPUSH 这类阻塞命令来抢占 job、加锁、标记完成/失败。阻塞命令会占住整个连接,无法复用。
94
101
  const connection = new Redis(env.redisUrl, {
95
102
  maxRetriesPerRequest: null,
96
103
  enableReadyCheck: false,
97
104
  });
98
- // 独立连接监听队列事件(BullMQ 要求不共用 Worker 连接)
105
+ // worker2 负责订阅 Redis 的 Pub/Sub 事件频道。
106
+ // 给 QueueEvents 用。BullMQ 在 Redis 里发 Pub/Sub 事件(drained、completed、failed…),监听方需要独立的连接订阅这些事件频道。如果共用 connection,阻塞命令会让 Pub/Sub 订阅无法正常工作,所以 BullMQ 官方要求两个连接必须分开。
99
107
  const eventsConnection = new Redis(env.redisUrl, {
100
108
  maxRetriesPerRequest: null,
101
109
  enableReadyCheck: false,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@lorrylurui/code-intelligence-mcp",
3
- "version": "2.0.7",
3
+ "version": "2.0.9",
4
4
  "private": false,
5
5
  "description": "MCP server 提供仓库内可复用代码块(ts/tsx/js/jsx/css/less)的索引和查询能力,支持基于代码上下文的智能推荐。",
6
6
  "type": "module",
@@ -9,8 +9,8 @@
9
9
  "dist"
10
10
  ],
11
11
  "bin": {
12
- "code-intelligence-mcp": "./dist/index.js",
13
- "code-intelligence-index": "./dist/cli/index-codebase-cli.js"
12
+ "code-intelligence-mcp": "dist/index.js",
13
+ "code-intelligence-index": "dist/cli/index-codebase-cli.js"
14
14
  },
15
15
  "scripts": {
16
16
  "dev": "tsx watch --clear-screen=false --exclude node_modules --exclude dist src/index.ts",
@@ -21,6 +21,8 @@
21
21
  "start": "node dist/index.js",
22
22
  "index": "tsx src/cli/index-codebase-cli.ts",
23
23
  "ci-index": "tsx src/cli/ci-index-cli.ts",
24
+ "eval": "tsx src/cli/eval-recommendation-cli.ts",
25
+ "analyze": "tsx src/cli/eval-analyze-cli.ts",
24
26
  "worker:embedding": "tsx src/cli/embedding-worker-cli.ts",
25
27
  "embedding:dev": "cd embedding-service && python3 -m uvicorn app:app --host 127.0.0.1 --port 8765",
26
28
  "docker:up": "docker compose up -d",
package/dist/.env DELETED
@@ -1,24 +0,0 @@
1
- MYSQL_ENABLED=true
2
- MYSQL_HOST=127.0.0.1
3
- MYSQL_PORT=3306
4
- MYSQL_USER=root
5
- # 与 docker-compose.yml 中 MYSQL_ROOT_PASSWORD 一致(默认 devpassword,仅本地开发)
6
- MYSQL_PASSWORD=devpassword
7
- MYSQL_DATABASE=code_intelligence
8
- MYSQL_SYMBOLS_TABLE=symbols
9
- EMBEDDING_SERVICE_URL=http://127.0.0.1:8765
10
-
11
- # MCP Server 传输模式:stdio | http
12
- MCP_TRANSPORT=stdio
13
- # HTTP 模式监听地址,0.0.0.0 支持局域网访问,公网用公网IP
14
- MCP_HOST=0.0.0.0
15
- # HTTP 模式端口
16
- MCP_PORT=3000
17
- # Docker Compose 可选:与 .env 同步(compose 会读环境变量覆盖默认值)
18
- # MYSQL_ROOT_PASSWORD=devpassword
19
-
20
- # Phase 2 索引(npm run index)
21
- # INDEX_ROOT=
22
- # INDEX_GLOB=src/**/*.{ts,tsx}
23
- # INDEX_IGNORE=**/generated/**,**/*.spec.ts
24
- GIT_CI_MOCK_SERVER=false