@lorrylurui/code-intelligence-mcp 2.0.4 → 2.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,9 +1,8 @@
1
1
  import { env } from '../config/env.js';
2
2
  import { getPool } from '../db/postgres.js';
3
+ import { SYMBOL_SIMILARITY_THRESHOLD, SYMBOL_TOP_K } from '../config/tuning.js';
3
4
  import { createEmbeddingClient } from '../services/embeddingClient.js';
4
5
  import { SEARCHABLE_STATUS } from '../config/symbolStatus.js';
5
- const SIMILARITY_THRESHOLD = 0.5;
6
- const TOP_K = 20;
7
6
  const inMemorySymbols = [
8
7
  {
9
8
  id: 1,
@@ -82,21 +81,58 @@ function getMetaArray(meta, key) {
82
81
  return [];
83
82
  return value.filter((v) => typeof v === 'string');
84
83
  }
84
+ function extractSearchTokens(query) {
85
+ const tokens = new Set();
86
+ const normalized = query.trim().toLowerCase();
87
+ for (const match of normalized.matchAll(/[a-z0-9_]+/g)) {
88
+ if (match[0].length >= 2)
89
+ tokens.add(match[0]);
90
+ }
91
+ for (const match of query.matchAll(/[\u4e00-\u9fff]{2,}/g)) {
92
+ const text = match[0];
93
+ for (let index = 0; index < text.length - 1; index += 1) {
94
+ tokens.add(text.slice(index, index + 2));
95
+ }
96
+ }
97
+ return [...tokens];
98
+ }
99
+ function buildSearchText(symbol) {
100
+ return [
101
+ symbol.name,
102
+ symbol.path,
103
+ symbol.description ?? '',
104
+ JSON.stringify(symbol.meta ?? {}),
105
+ ]
106
+ .join(' ')
107
+ .toLowerCase();
108
+ }
109
+ function countTokenMatches(text, tokens) {
110
+ return tokens.reduce((count, token) => text.includes(token.toLowerCase()) ? count + 1 : count, 0);
111
+ }
85
112
  export class SymbolRepository {
86
113
  pool;
87
114
  constructor() {
88
115
  this.pool = getPool();
89
116
  }
90
117
  async search(query, type) {
118
+ console.error('[code-intelligence-mcp] repository.search.start query=%s type=%s table=%s searchableStatus=%s hasPool=%s', query, type ?? '', env.symbolsTable, String(SEARCHABLE_STATUS), String(Boolean(this.pool)));
91
119
  if (!this.pool) {
92
120
  const q = query.toLowerCase();
93
- return inMemorySymbols.filter((s) => {
121
+ const tokens = extractSearchTokens(query);
122
+ const matched = inMemorySymbols.filter((s) => {
94
123
  const typeOk = type ? s.type === type : true;
124
+ const text = buildSearchText(s);
95
125
  return (typeOk &&
96
- (s.name.toLowerCase().includes(q) ||
97
- (s.description ?? '').toLowerCase().includes(q)));
126
+ (text.includes(q) || countTokenMatches(text, tokens) >= 2));
98
127
  });
128
+ console.error('[code-intelligence-mcp] repository.search.memory count=%s top=%s', String(matched.length), JSON.stringify(matched.slice(0, 3).map((s) => ({
129
+ id: s.id,
130
+ name: s.name,
131
+ path: s.path,
132
+ }))));
133
+ return matched;
99
134
  }
135
+ const tokens = extractSearchTokens(query);
100
136
  const params = [
101
137
  `%${query}%`,
102
138
  SEARCHABLE_STATUS,
@@ -104,15 +140,46 @@ export class SymbolRepository {
104
140
  let sql = `
105
141
  SELECT id, name, type, category, path, description, content, meta::text AS meta, usage_count, created_at
106
142
  FROM ${env.symbolsTable}
107
- WHERE (name ILIKE $1 OR description ILIKE $1)
143
+ WHERE (
144
+ name ILIKE $1 OR
145
+ description ILIKE $1 OR
146
+ path ILIKE $1 OR
147
+ meta::text ILIKE $1
148
+ )
108
149
  AND status = $2
109
150
  `;
151
+ if (tokens.length) {
152
+ const tokenClauses = tokens.map((token) => {
153
+ // 每个query token都要在name/description/path/meta中至少匹配一次才算匹配,来提升搜索的准确度,避免单个token过于泛匹配导致的排名干扰
154
+ params.push(`%${token}%`);
155
+ const index = params.length;
156
+ return `name ILIKE $${index} OR description ILIKE $${index} OR path ILIKE $${index} OR meta::text ILIKE $${index}`;
157
+ });
158
+ sql = `
159
+ SELECT id, name, type, category, path, description, content, meta::text AS meta, usage_count, created_at
160
+ FROM ${env.symbolsTable}
161
+ WHERE (
162
+ name ILIKE $1 OR
163
+ description ILIKE $1 OR
164
+ path ILIKE $1 OR
165
+ meta::text ILIKE $1 OR
166
+ (${tokenClauses.join(' OR ')})
167
+ )
168
+ AND status = $2
169
+ `;
170
+ }
110
171
  if (type) {
111
172
  params.push(type);
112
173
  sql += ` AND type = $${params.length}`;
113
174
  }
114
175
  sql += ' ORDER BY usage_count DESC LIMIT 20';
115
176
  const { rows } = await this.pool.query(sql, params);
177
+ console.error('[code-intelligence-mcp] repository.search.db table=%s rows=%s top=%s note=name/description only', env.symbolsTable, String(rows.length), JSON.stringify(rows.slice(0, 3).map((r) => ({
178
+ id: r.id,
179
+ name: r.name,
180
+ path: r.path,
181
+ type: r.type,
182
+ }))));
116
183
  return rows.map((r) => mapRow(r));
117
184
  }
118
185
  /**
@@ -120,13 +187,16 @@ export class SymbolRepository {
120
187
  * 不再需要在 Node 拉取全量向量做内存计算。
121
188
  */
122
189
  async searchSemanticHits(query, opts) {
190
+ console.error('[code-intelligence-mcp] repository.searchSemanticHits.start query=%s type=%s table=%s limit=%s threshold=%s searchableStatus=%s hasPool=%s', query, opts?.type ?? '', env.symbolsTable, String(opts?.limit ?? SYMBOL_TOP_K), String(SYMBOL_SIMILARITY_THRESHOLD), String(SEARCHABLE_STATUS), String(Boolean(this.pool)));
123
191
  if (!env.embeddingServiceUrl) {
192
+ console.error('[code-intelligence-mcp] repository.searchSemanticHits.error missingEmbeddingServiceUrl');
124
193
  throw new Error('语义检索需配置 EMBEDDING_SERVICE_URL 并启动嵌入服务');
125
194
  }
126
195
  if (!this.pool) {
196
+ console.error('[code-intelligence-mcp] repository.searchSemanticHits.noPool returnEmpty');
127
197
  return [];
128
198
  }
129
- const limit = opts?.limit ?? TOP_K;
199
+ const limit = opts?.limit ?? SYMBOL_TOP_K;
130
200
  const client = createEmbeddingClient(env.embeddingServiceUrl);
131
201
  const [queryVec] = await client.embed([query.trim()]);
132
202
  if (!queryVec?.length) {
@@ -151,15 +221,26 @@ export class SymbolRepository {
151
221
  params.push(limit * 2); // 多取一倍以便 SIMILARITY_THRESHOLD 过滤后仍有足量结果
152
222
  sql += ` ORDER BY embedding <=> $1::vector LIMIT $${params.length}`;
153
223
  const { rows } = await this.pool.query(sql, params);
154
- return rows
155
- .map((r) => ({
224
+ const mapped = rows.map((r) => ({
156
225
  symbol: mapRow(r),
157
226
  similarity: Number(r.similarity),
227
+ }));
228
+ const passed = mapped.filter((x) => x.similarity >= SYMBOL_SIMILARITY_THRESHOLD);
229
+ console.error('[code-intelligence-mcp] repository.searchSemanticHits.db table=%s rawRows=%s passedThreshold=%s topRaw=%s', env.symbolsTable, String(rows.length), String(passed.length), JSON.stringify(mapped.slice(0, 5).map((x) => ({
230
+ id: x.symbol.id,
231
+ name: x.symbol.name,
232
+ path: x.symbol.path,
233
+ similarity: Number(x.similarity.toFixed(4)),
234
+ }))));
235
+ return passed
236
+ .map((r) => ({
237
+ symbol: r.symbol,
238
+ similarity: r.similarity,
158
239
  }))
159
- .filter((x) => x.similarity >= SIMILARITY_THRESHOLD)
160
240
  .slice(0, limit);
161
241
  }
162
242
  async getByName(name) {
243
+ console.error('[code-intelligence-mcp] repository.getByName.start name=%s table=%s hasPool=%s', name, env.symbolsTable, String(Boolean(this.pool)));
163
244
  if (!this.pool) {
164
245
  return (inMemorySymbols.find((s) => s.name.toLowerCase() === name.toLowerCase()) ?? null);
165
246
  }
@@ -169,6 +250,7 @@ export class SymbolRepository {
169
250
  WHERE name = $1
170
251
  LIMIT 1
171
252
  `, [name]);
253
+ console.error('[code-intelligence-mcp] repository.getByName.db table=%s rows=%s', env.symbolsTable, String(rows.length));
172
254
  if (rows.length === 0) {
173
255
  return null;
174
256
  }
@@ -191,6 +273,7 @@ export class SymbolRepository {
191
273
  return result.rowCount !== null && result.rowCount > 0;
192
274
  }
193
275
  async searchByStructure(fields, opts) {
276
+ console.error('[code-intelligence-mcp] repository.searchByStructure.start fields=%s type=%s category=%s table=%s limit=%s hasPool=%s', JSON.stringify(fields), opts?.type ?? '', opts?.category ?? '', env.symbolsTable, String(opts?.limit ?? 20), String(Boolean(this.pool)));
194
277
  const normalized = fields.map((f) => f.trim()).filter(Boolean);
195
278
  if (normalized.length === 0)
196
279
  return [];
@@ -215,7 +298,13 @@ export class SymbolRepository {
215
298
  return normalized.every((field) => propPool.includes(field.toLowerCase()));
216
299
  };
217
300
  if (!this.pool) {
218
- return inMemorySymbols.filter(matchesAll).slice(0, limit);
301
+ const matched = inMemorySymbols.filter(matchesAll).slice(0, limit);
302
+ console.error('[code-intelligence-mcp] repository.searchByStructure.memory matched=%s top=%s', String(matched.length), JSON.stringify(matched.slice(0, 3).map((s) => ({
303
+ id: s.id,
304
+ name: s.name,
305
+ path: s.path,
306
+ }))));
307
+ return matched;
219
308
  }
220
309
  const params = [];
221
310
  let sql = `
@@ -234,9 +323,13 @@ export class SymbolRepository {
234
323
  params.push(Math.max(limit * 5, 50));
235
324
  sql += ` ORDER BY usage_count DESC LIMIT $${params.length}`;
236
325
  const { rows } = await this.pool.query(sql, params);
237
- return rows
238
- .map((r) => mapRow(r))
239
- .filter(matchesAll)
240
- .slice(0, limit);
326
+ const mapped = rows.map((r) => mapRow(r));
327
+ const filtered = mapped.filter(matchesAll).slice(0, limit);
328
+ console.error('[code-intelligence-mcp] repository.searchByStructure.db table=%s scanned=%s matched=%s top=%s', env.symbolsTable, String(rows.length), String(filtered.length), JSON.stringify(filtered.slice(0, 3).map((s) => ({
329
+ id: s.id,
330
+ name: s.name,
331
+ path: s.path,
332
+ }))));
333
+ return filtered;
241
334
  }
242
335
  }
@@ -8,25 +8,41 @@ import { createSearchByStructureTool } from '../tools/searchByStructure.js';
8
8
  import { createIncUsageTool } from '../tools/incUsage.js';
9
9
  import { RecommendationService } from '../services/recommendationService.js';
10
10
  import { createRecommendComponentTool } from '../tools/recommendComponent.js';
11
+ import { createQueryDocsTool } from '../tools/queryDocs.js';
11
12
  export function createServer() {
13
+ console.error('[code-intelligence-mcp] createServer.init');
12
14
  const server = new McpServer({
13
15
  name: 'code-intelligence-mcp',
14
16
  version: '0.1.0',
15
17
  });
18
+ console.error('[code-intelligence-mcp] mcpServer.created name=code-intelligence-mcp version=0.1.0');
16
19
  const repository = new SymbolRepository();
20
+ console.error('[code-intelligence-mcp] repository.created');
17
21
  const recommendationService = new RecommendationService(repository);
22
+ console.error('[code-intelligence-mcp] recommendationService.created');
18
23
  const searchTool = createSearchSymbolsTool(repository);
19
24
  server.tool(searchTool.name, searchTool.description, searchTool.inputSchema, searchTool.handler);
25
+ console.error('[code-intelligence-mcp] tool.registered %s', searchTool.name);
20
26
  const detailTool = createGetSymbolDetailTool(repository);
21
27
  server.tool(detailTool.name, detailTool.description, detailTool.inputSchema, detailTool.handler);
28
+ console.error('[code-intelligence-mcp] tool.registered %s', detailTool.name);
22
29
  const structureTool = createSearchByStructureTool(repository);
23
30
  server.tool(structureTool.name, structureTool.description, structureTool.inputSchema, structureTool.handler);
31
+ console.error('[code-intelligence-mcp] tool.registered %s', structureTool.name);
24
32
  const reindexTool = createReindexTool();
25
33
  server.tool(reindexTool.name, reindexTool.description, reindexTool.inputSchema, reindexTool.handler);
34
+ console.error('[code-intelligence-mcp] tool.registered %s', reindexTool.name);
26
35
  const incUsageTool = createIncUsageTool(repository);
27
36
  server.tool(incUsageTool.name, incUsageTool.description, incUsageTool.inputSchema, incUsageTool.handler);
37
+ console.error('[code-intelligence-mcp] tool.registered %s', incUsageTool.name);
28
38
  const recommendComponentTool = createRecommendComponentTool(recommendationService);
29
39
  server.tool(recommendComponentTool.name, recommendComponentTool.description, recommendComponentTool.inputSchema, recommendComponentTool.handler);
40
+ console.error('[code-intelligence-mcp] tool.registered %s', recommendComponentTool.name);
30
41
  registerReusableCodeAdvisorPrompt(server);
42
+ console.error('[code-intelligence-mcp] prompt.registered reusable-code-advisor');
43
+ const queryDocsTool = createQueryDocsTool();
44
+ server.tool(queryDocsTool.name, queryDocsTool.description, queryDocsTool.inputSchema, queryDocsTool.handler);
45
+ console.error('[code-intelligence-mcp] tool.registered %s', queryDocsTool.name);
46
+ console.error('[code-intelligence-mcp] createServer.ready toolCount=7 promptCount=1');
31
47
  return server;
32
48
  }
@@ -0,0 +1,150 @@
1
+ /**
2
+ * contextAssembler: RAG 上下文组装器。
3
+ *
4
+ * 完整流程:
5
+ * topK hits
6
+ * → 邻块扩展(getAdjacentChunks):补全被截断的边界上下文
7
+ * → 去重(path + chunk_index):避免重复块被重复计费
8
+ * → 相关性排序:命中块靠前,纯邻块靠后
9
+ * → 字符预算截断:超出 CONTEXT_MAX_CHARS 时丢弃末尾块
10
+ * → 文本渲染:拼成可直接注入 prompt 的 contextText
11
+ *
12
+ * 为什么需要邻块扩展?
13
+ * chunk 切分时按结构和字符数截断,单个 chunk 可能只包含一段话的前半句。
14
+ * 取前后邻块(radius=1 即各一块)可以在不大幅增加 token 成本的前提下
15
+ * 把被截断的上下文还原,显著降低 LLM 产生"幻觉引用"的概率。
16
+ *
17
+ * 为什么要字符预算?
18
+ * 大多数 LLM 有 context window 限制。超出预算不仅导致截断错误,
19
+ * 还会因为过长的无关文本降低模型对真正相关段落的注意力权重("lost in the middle"问题)。
20
+ * 控制预算 = 控制召回精度。
21
+ */
22
+ import { CONTEXT_ADJACENT_RADIUS, CONTEXT_MAX_CHARS, CONTEXT_MAX_CHUNKS, } from '../config/tuning.js';
23
+ /**
24
+ * 渲染单个 chunk 为可读文本块,附带来源元信息。
25
+ *
26
+ * 格式示例:
27
+ * [来源: qa-doc/topK.md · 第2块/共5块 · 相似度 0.87]
28
+ * topK 参数控制返回数量,默认值为...
29
+ */
30
+ function renderChunk(chunk) {
31
+ const parts = [`来源: ${chunk.path}`];
32
+ parts.push(`第${chunk.chunkIndex + 1}块/共${chunk.chunkCount}块`);
33
+ if (chunk.similarity != null) {
34
+ parts.push(`相似度 ${chunk.similarity.toFixed(2)}`);
35
+ }
36
+ const header = `[${parts.join(' · ')}]`;
37
+ return `${header}\n${chunk.content}`;
38
+ }
39
+ export class ContextAssembler {
40
+ repo;
41
+ constructor(repo) {
42
+ this.repo = repo;
43
+ }
44
+ /**
45
+ * 组装 RAG 上下文。
46
+ *
47
+ * @param hits 来自 ChunkRepository.searchSemantic() 的 topK 结果(已按相似度降序)
48
+ * @param opts.maxChars 覆盖 CONTEXT_MAX_CHARS,用于运行时动态调整 token 预算
49
+ * @param opts.adjacentRadius 覆盖 CONTEXT_ADJACENT_RADIUS,0 表示不做邻块扩展
50
+ * @param opts.maxChunks 覆盖 CONTEXT_MAX_CHUNKS
51
+ */
52
+ async assemble(hits, opts) {
53
+ const maxChars = opts?.maxChars ?? CONTEXT_MAX_CHARS;
54
+ const radius = opts?.adjacentRadius ?? CONTEXT_ADJACENT_RADIUS;
55
+ const maxChunks = opts?.maxChunks ?? CONTEXT_MAX_CHUNKS;
56
+ const hitCount = hits.length;
57
+ // ── 步骤1:邻块扩展 ──────────────────────────────────────────────────
58
+ // 对每个命中块并行拉取前后邻块,补全被切分边界截断的上下文。
59
+ // 邻块本身没有 similarity 分数,排序时置于命中块之后。
60
+ const expanded = await this.expandWithAdjacentChunks(hits, radius);
61
+ // ── 步骤2:去重 ───────────────────────────────────────────────────────
62
+ // 多个命中块扩展后可能重叠,以 path+chunk_index 为键去重,保留先出现的版本
63
+ // (命中块在前,保留其 similarity;邻块在后,若与命中块重叠则丢弃邻块副本)。
64
+ const deduped = deduplicateChunks(expanded);
65
+ // ── 步骤3:排序 ───────────────────────────────────────────────────────
66
+ // similarity 有值(命中块)> similarity 无值(纯邻块);同类内部按相似度降序。
67
+ const sorted = sortChunks(deduped);
68
+ // ── 步骤4:字符预算截断 ───────────────────────────────────────────────
69
+ const { selected, truncated } = applyBudget(sorted, maxChars, maxChunks);
70
+ // ── 步骤5:文本渲染 ───────────────────────────────────────────────────
71
+ const contextText = selected.map(renderChunk).join('\n\n---\n\n');
72
+ return {
73
+ chunks: selected,
74
+ contextText,
75
+ hitCount,
76
+ totalChunks: selected.length,
77
+ truncated,
78
+ };
79
+ }
80
+ /**
81
+ * 对每个命中块并行拉取邻块,返回命中块 + 所有邻块的扁平列表(含重复,由后续去重处理)。
82
+ * radius=0 时跳过数据库查询,直接返回原始命中列表。
83
+ */
84
+ async expandWithAdjacentChunks(hits, radius) {
85
+ if (radius <= 0 || hits.length === 0)
86
+ return [...hits];
87
+ // 并行拉取,避免串行 N 次查询放大延迟。
88
+ const adjacentGroups = await Promise.all(hits.map((hit) => this.repo.getAdjacentChunks(hit.path, hit.chunkIndex, radius)));
89
+ // 命中块在前,邻块紧随其后(之后去重时命中块的 similarity 会被保留)。
90
+ const result = [...hits];
91
+ for (const group of adjacentGroups) {
92
+ result.push(...group);
93
+ }
94
+ return result;
95
+ }
96
+ }
97
+ /** 以 `${path}::${chunkIndex}` 为键去重,保留先出现的副本(命中块的 similarity 优先)。 */
98
+ function deduplicateChunks(chunks) {
99
+ const seen = new Set();
100
+ const result = [];
101
+ for (const chunk of chunks) {
102
+ const key = `${chunk.path}::${chunk.chunkIndex}`;
103
+ if (!seen.has(key)) {
104
+ seen.add(key);
105
+ result.push(chunk);
106
+ }
107
+ }
108
+ return result;
109
+ }
110
+ /**
111
+ * 排序规则:
112
+ * 1. 有 similarity(命中块)排在无 similarity(纯邻块)之前
113
+ * 2. 同类内部按 similarity 降序
114
+ * 3. 纯邻块内部保持原有顺序(path + chunkIndex 升序,保证上下文连贯)
115
+ */
116
+ function sortChunks(chunks) {
117
+ return [...chunks].sort((a, b) => {
118
+ const aHasSim = a.similarity != null;
119
+ const bHasSim = b.similarity != null;
120
+ if (aHasSim && !bHasSim)
121
+ return -1;
122
+ if (!aHasSim && bHasSim)
123
+ return 1;
124
+ if (aHasSim && bHasSim)
125
+ return (b.similarity ?? 0) - (a.similarity ?? 0);
126
+ // 纯邻块按路径+索引保持文档顺序
127
+ const pathCmp = a.path.localeCompare(b.path);
128
+ return pathCmp !== 0 ? pathCmp : a.chunkIndex - b.chunkIndex;
129
+ });
130
+ }
131
+ /**
132
+ * 从排好序的 chunk 列表中按字符预算和数量上限截取子集。
133
+ * 按顺序累加字符数,第一个超出预算的 chunk 及之后的全部丢弃。
134
+ */
135
+ function applyBudget(chunks, maxChars, maxChunks) {
136
+ const selected = [];
137
+ let totalChars = 0;
138
+ for (const chunk of chunks) {
139
+ if (selected.length >= maxChunks) {
140
+ return { selected, truncated: true };
141
+ }
142
+ const chunkChars = renderChunk(chunk).length;
143
+ if (totalChars + chunkChars > maxChars && selected.length > 0) {
144
+ return { selected, truncated: true };
145
+ }
146
+ selected.push(chunk);
147
+ totalChars += chunkChars;
148
+ }
149
+ return { selected, truncated: false };
150
+ }
@@ -1,3 +1,4 @@
1
+ import { CALLEE_MATCH_SCORE_MAX, CALLEE_MATCH_SCORE_PER_MATCH, COMMON_PATH_SCORE_NO, COMMON_PATH_SCORE_YES, RANK_WEIGHTS, RECENCY_SCORE_DEFAULT, RECENCY_SCORE_OLDEST, RECENCY_SCORE_TIERS, SEMANTIC_REASON_THRESHOLD_HIGH, SEMANTIC_REASON_THRESHOLD_MED, TEXT_MATCH_SCORES, TOKEN_OVERLAP_TIERS, USAGE_REASON_THRESHOLD_HIGH, USAGE_SCORE_LOG_DIVISOR, } from '../config/tuning.js';
1
2
  function clamp01(value) {
2
3
  if (value < 0)
3
4
  return 0;
@@ -5,40 +6,78 @@ function clamp01(value) {
5
6
  return 1;
6
7
  return value;
7
8
  }
9
+ function extractTextTokens(text) {
10
+ // eg: query='useDebounceInput组件', tokens=['useDebounceInput', '组件']
11
+ const tokens = new Set();
12
+ const lower = text.trim().toLowerCase();
13
+ for (const match of lower.matchAll(/[a-z0-9_]+/g)) {
14
+ if (match[0].length >= 2)
15
+ tokens.add(match[0]);
16
+ }
17
+ for (const match of text.matchAll(/[\u4e00-\u9fff]{2,}/g)) {
18
+ const chunk = match[0];
19
+ for (let index = 0; index < chunk.length - 1; index += 1) {
20
+ tokens.add(chunk.slice(index, index + 2));
21
+ }
22
+ }
23
+ return [...tokens];
24
+ }
25
+ // 先对query进行切分,然后计算切分后的token在symbol的name/description/path中出现的数量和比例,来判断是否存在关键词重合,进而提升排名。
26
+ function tokenOverlapScore(query, symbol) {
27
+ const queryTokens = extractTextTokens(query);
28
+ if (queryTokens.length === 0)
29
+ return 0;
30
+ const text = [symbol.name, symbol.description ?? '', symbol.path]
31
+ .join(' ')
32
+ .toLowerCase();
33
+ const matched = queryTokens.filter((token) => text.includes(token)).length;
34
+ const overlapRatio = matched / queryTokens.length;
35
+ for (const tier of TOKEN_OVERLAP_TIERS) {
36
+ if (matched >= tier.minMatches && overlapRatio >= tier.minRatio) {
37
+ return tier.score;
38
+ }
39
+ }
40
+ return 0;
41
+ }
8
42
  function textMatchScore(query, symbol) {
9
43
  const q = query.trim().toLowerCase();
10
44
  if (!q)
11
- return { score: 0, matchedBy: "weak" };
45
+ return { score: 0, matchedBy: 'weak' };
12
46
  const name = symbol.name.toLowerCase();
13
- const description = (symbol.description ?? "").toLowerCase();
47
+ const description = (symbol.description ?? '').toLowerCase();
14
48
  if (name === q)
15
- return { score: 1, matchedBy: "exact_name" };
49
+ return { score: 1, matchedBy: 'exact_name' };
16
50
  if (name.includes(q))
17
- return { score: 0.85, matchedBy: "name_contains" };
51
+ return {
52
+ score: TEXT_MATCH_SCORES.nameContains,
53
+ matchedBy: 'name_contains',
54
+ };
18
55
  if (description.includes(q))
19
- return { score: 0.65, matchedBy: "description_contains" };
20
- return { score: 0.2, matchedBy: "weak" };
56
+ return {
57
+ score: TEXT_MATCH_SCORES.descriptionContains,
58
+ matchedBy: 'description_contains',
59
+ };
60
+ const overlapScore = tokenOverlapScore(query, symbol);
61
+ if (overlapScore > 0)
62
+ return { score: overlapScore, matchedBy: 'token_overlap' };
63
+ return { score: TEXT_MATCH_SCORES.weak, matchedBy: 'weak' };
21
64
  }
22
65
  function usageScore(usageCount) {
23
66
  // log scale to avoid very large usage monopolizing ranking.
24
- return clamp01(Math.log10(usageCount + 1) / 3);
67
+ return clamp01(Math.log10(usageCount + 1) / USAGE_SCORE_LOG_DIVISOR);
25
68
  }
26
69
  function recencyScore(createdAt) {
27
70
  if (!createdAt)
28
- return 0.4;
71
+ return RECENCY_SCORE_DEFAULT;
29
72
  const ts = new Date(createdAt).getTime();
30
73
  if (Number.isNaN(ts))
31
- return 0.4;
74
+ return RECENCY_SCORE_DEFAULT;
32
75
  const days = (Date.now() - ts) / (1000 * 60 * 60 * 24);
33
- if (days <= 7)
34
- return 1;
35
- if (days <= 30)
36
- return 0.8;
37
- if (days <= 90)
38
- return 0.6;
39
- if (days <= 180)
40
- return 0.4;
41
- return 0.25;
76
+ for (const tier of RECENCY_SCORE_TIERS) {
77
+ if (days <= tier.maxDays)
78
+ return tier.score;
79
+ }
80
+ return RECENCY_SCORE_OLDEST;
42
81
  }
43
82
  function daysSinceCreated(createdAt) {
44
83
  if (!createdAt)
@@ -50,62 +89,72 @@ function daysSinceCreated(createdAt) {
50
89
  }
51
90
  function commonPathScore(path) {
52
91
  const lower = path.toLowerCase();
53
- return lower.includes("/common/") || lower.includes("/shared/") ? 1 : 0.35;
92
+ return lower.includes('/common/') || lower.includes('/shared/')
93
+ ? COMMON_PATH_SCORE_YES
94
+ : COMMON_PATH_SCORE_NO;
54
95
  }
55
- const RANK_WEIGHTS = {
56
- textMatch: 0.5,
57
- usage: 0.3,
58
- recency: 0.1,
59
- commonPath: 0.1
60
- };
61
96
  /**
62
- * Phase 5:以向量余弦相似度作为主文本维度,再叠加 usage / recency / common(与 `rankSymbols` 同权重)。
97
+ * Phase 5:以向量余弦相似度作为主文本维度,再叠加 usage / recency / common calleeNames 匹配度。
98
+ * calleeNames 作为结构信息独立信号,不污染纯语义向量。
63
99
  */
64
- export function rankSemanticHits(hits) {
100
+ export function rankSemanticHits(hits, query) {
65
101
  return hits
66
102
  .map(({ symbol, similarity }) => {
67
103
  const textScore = clamp01(similarity);
68
104
  const usage = usageScore(symbol.usageCount);
69
105
  const recency = recencyScore(symbol.createdAt);
70
106
  const common = commonPathScore(symbol.path);
107
+ // ✨ 新增:calleeNames 作为独立信号
108
+ let calleeMatchScore = 0;
109
+ if (query && Array.isArray(symbol.meta?.calleeNames)) {
110
+ const calleeNames = symbol.meta.calleeNames;
111
+ const queryLower = query.toLowerCase();
112
+ const matchedCallees = calleeNames.filter((callee) => queryLower.includes(callee.toLowerCase())).length;
113
+ if (matchedCallees > 0) {
114
+ calleeMatchScore = Math.min(matchedCallees * CALLEE_MATCH_SCORE_PER_MATCH, CALLEE_MATCH_SCORE_MAX);
115
+ }
116
+ }
71
117
  const score = textScore * RANK_WEIGHTS.textMatch +
72
118
  usage * RANK_WEIGHTS.usage +
73
119
  recency * RANK_WEIGHTS.recency +
74
- common * RANK_WEIGHTS.commonPath;
120
+ common * RANK_WEIGHTS.commonPath +
121
+ calleeMatchScore;
75
122
  const reasonParts = [];
76
- if (textScore >= 0.55)
77
- reasonParts.push("语义相似度高");
78
- else if (textScore >= 0.4)
79
- reasonParts.push("语义相关");
80
- if (usage >= 0.6)
81
- reasonParts.push("使用频率高");
82
- if (common >= 1)
83
- reasonParts.push("位于 shared/common 路径");
123
+ if (textScore >= SEMANTIC_REASON_THRESHOLD_HIGH)
124
+ reasonParts.push('语义相似度高');
125
+ else if (textScore >= SEMANTIC_REASON_THRESHOLD_MED)
126
+ reasonParts.push('语义相关');
127
+ if (usage >= USAGE_REASON_THRESHOLD_HIGH)
128
+ reasonParts.push('使用频率高');
129
+ if (common >= COMMON_PATH_SCORE_YES)
130
+ reasonParts.push('位于 shared/common 路径');
131
+ if (calleeMatchScore > 0)
132
+ reasonParts.push('函数调用关系匹配');
84
133
  if (reasonParts.length === 0)
85
- reasonParts.push("综合相关性较好");
134
+ reasonParts.push('综合相关性较好');
86
135
  return {
87
136
  symbol,
88
137
  score: Number(score.toFixed(3)),
89
138
  reason: {
90
139
  textMatch: {
91
140
  score: Number(textScore.toFixed(3)),
92
- matchedBy: "semantic"
141
+ matchedBy: 'semantic',
93
142
  },
94
143
  usage: {
95
144
  score: Number(usage.toFixed(3)),
96
- usageCount: symbol.usageCount
145
+ usageCount: symbol.usageCount,
97
146
  },
98
147
  recency: {
99
148
  score: Number(recency.toFixed(3)),
100
- daysSinceCreated: daysSinceCreated(symbol.createdAt)
149
+ daysSinceCreated: daysSinceCreated(symbol.createdAt),
101
150
  },
102
151
  commonPath: {
103
152
  score: Number(common.toFixed(3)),
104
- isCommonPath: common >= 1
153
+ isCommonPath: common >= COMMON_PATH_SCORE_YES,
105
154
  },
106
155
  weights: RANK_WEIGHTS,
107
- summary: reasonParts.join(" + ")
108
- }
156
+ summary: reasonParts.join(' + '),
157
+ },
109
158
  };
110
159
  })
111
160
  .sort((a, b) => b.score - a.score);
@@ -122,39 +171,41 @@ export function rankSymbols(query, symbols) {
122
171
  recency * RANK_WEIGHTS.recency +
123
172
  common * RANK_WEIGHTS.commonPath;
124
173
  const reasonParts = [];
125
- if (text.score >= 0.85)
126
- reasonParts.push("文本匹配度高");
127
- else if (text.score >= 0.65)
128
- reasonParts.push("描述命中");
129
- if (usage >= 0.6)
130
- reasonParts.push("使用频率高");
131
- if (common >= 1)
132
- reasonParts.push("位于 shared/common 路径");
174
+ if (text.score >= TEXT_MATCH_SCORES.nameContains)
175
+ reasonParts.push('文本匹配度高');
176
+ else if (text.score >= TEXT_MATCH_SCORES.descriptionContains)
177
+ reasonParts.push('描述命中');
178
+ else if (text.matchedBy === 'token_overlap')
179
+ reasonParts.push('关键词片段高度重合');
180
+ if (usage >= USAGE_REASON_THRESHOLD_HIGH)
181
+ reasonParts.push('使用频率高');
182
+ if (common >= COMMON_PATH_SCORE_YES)
183
+ reasonParts.push('位于 shared/common 路径');
133
184
  if (reasonParts.length === 0)
134
- reasonParts.push("综合相关性较好");
185
+ reasonParts.push('综合相关性较好');
135
186
  return {
136
187
  symbol,
137
188
  score: Number(score.toFixed(3)),
138
189
  reason: {
139
190
  textMatch: {
140
191
  score: Number(text.score.toFixed(3)),
141
- matchedBy: text.matchedBy
192
+ matchedBy: text.matchedBy,
142
193
  },
143
194
  usage: {
144
195
  score: Number(usage.toFixed(3)),
145
- usageCount: symbol.usageCount
196
+ usageCount: symbol.usageCount,
146
197
  },
147
198
  recency: {
148
199
  score: Number(recency.toFixed(3)),
149
- daysSinceCreated: daysSinceCreated(symbol.createdAt)
200
+ daysSinceCreated: daysSinceCreated(symbol.createdAt),
150
201
  },
151
202
  commonPath: {
152
203
  score: Number(common.toFixed(3)),
153
- isCommonPath: common >= 1
204
+ isCommonPath: common >= COMMON_PATH_SCORE_YES,
154
205
  },
155
206
  weights: RANK_WEIGHTS,
156
- summary: reasonParts.join(" + ")
157
- }
207
+ summary: reasonParts.join(' + '),
208
+ },
158
209
  };
159
210
  })
160
211
  .sort((a, b) => b.score - a.score);