@lorrylurui/code-intelligence-mcp 2.0.4 → 2.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,7 +2,7 @@ import { env } from '../config/env.js';
2
2
  import { getPool } from '../db/postgres.js';
3
3
  import { createEmbeddingClient } from '../services/embeddingClient.js';
4
4
  import { SEARCHABLE_STATUS } from '../config/symbolStatus.js';
5
- const SIMILARITY_THRESHOLD = 0.5;
5
+ const SIMILARITY_THRESHOLD = 0;
6
6
  const TOP_K = 20;
7
7
  const inMemorySymbols = [
8
8
  {
@@ -82,21 +82,58 @@ function getMetaArray(meta, key) {
82
82
  return [];
83
83
  return value.filter((v) => typeof v === 'string');
84
84
  }
85
+ function extractSearchTokens(query) {
86
+ const tokens = new Set();
87
+ const normalized = query.trim().toLowerCase();
88
+ for (const match of normalized.matchAll(/[a-z0-9_]+/g)) {
89
+ if (match[0].length >= 2)
90
+ tokens.add(match[0]);
91
+ }
92
+ for (const match of query.matchAll(/[\u4e00-\u9fff]{2,}/g)) {
93
+ const text = match[0];
94
+ for (let index = 0; index < text.length - 1; index += 1) {
95
+ tokens.add(text.slice(index, index + 2));
96
+ }
97
+ }
98
+ return [...tokens];
99
+ }
100
+ function buildSearchText(symbol) {
101
+ return [
102
+ symbol.name,
103
+ symbol.path,
104
+ symbol.description ?? '',
105
+ JSON.stringify(symbol.meta ?? {}),
106
+ ]
107
+ .join(' ')
108
+ .toLowerCase();
109
+ }
110
+ function countTokenMatches(text, tokens) {
111
+ return tokens.reduce((count, token) => text.includes(token.toLowerCase()) ? count + 1 : count, 0);
112
+ }
85
113
  export class SymbolRepository {
86
114
  pool;
87
115
  constructor() {
88
116
  this.pool = getPool();
89
117
  }
90
118
  async search(query, type) {
119
+ console.error('[code-intelligence-mcp] repository.search.start query=%s type=%s table=%s searchableStatus=%s hasPool=%s', query, type ?? '', env.symbolsTable, String(SEARCHABLE_STATUS), String(Boolean(this.pool)));
91
120
  if (!this.pool) {
92
121
  const q = query.toLowerCase();
93
- return inMemorySymbols.filter((s) => {
122
+ const tokens = extractSearchTokens(query);
123
+ const matched = inMemorySymbols.filter((s) => {
94
124
  const typeOk = type ? s.type === type : true;
125
+ const text = buildSearchText(s);
95
126
  return (typeOk &&
96
- (s.name.toLowerCase().includes(q) ||
97
- (s.description ?? '').toLowerCase().includes(q)));
127
+ (text.includes(q) || countTokenMatches(text, tokens) >= 2));
98
128
  });
129
+ console.error('[code-intelligence-mcp] repository.search.memory count=%s top=%s', String(matched.length), JSON.stringify(matched.slice(0, 3).map((s) => ({
130
+ id: s.id,
131
+ name: s.name,
132
+ path: s.path,
133
+ }))));
134
+ return matched;
99
135
  }
136
+ const tokens = extractSearchTokens(query);
100
137
  const params = [
101
138
  `%${query}%`,
102
139
  SEARCHABLE_STATUS,
@@ -104,15 +141,46 @@ export class SymbolRepository {
104
141
  let sql = `
105
142
  SELECT id, name, type, category, path, description, content, meta::text AS meta, usage_count, created_at
106
143
  FROM ${env.symbolsTable}
107
- WHERE (name ILIKE $1 OR description ILIKE $1)
144
+ WHERE (
145
+ name ILIKE $1 OR
146
+ description ILIKE $1 OR
147
+ path ILIKE $1 OR
148
+ meta::text ILIKE $1
149
+ )
108
150
  AND status = $2
109
151
  `;
152
+ if (tokens.length) {
153
+ const tokenClauses = tokens.map((token) => {
154
+ // 每个query token都要在name/description/path/meta中至少匹配一次才算匹配,来提升搜索的准确度,避免单个token过于泛匹配导致的排名干扰
155
+ params.push(`%${token}%`);
156
+ const index = params.length;
157
+ return `name ILIKE $${index} OR description ILIKE $${index} OR path ILIKE $${index} OR meta::text ILIKE $${index}`;
158
+ });
159
+ sql = `
160
+ SELECT id, name, type, category, path, description, content, meta::text AS meta, usage_count, created_at
161
+ FROM ${env.symbolsTable}
162
+ WHERE (
163
+ name ILIKE $1 OR
164
+ description ILIKE $1 OR
165
+ path ILIKE $1 OR
166
+ meta::text ILIKE $1 OR
167
+ (${tokenClauses.join(' OR ')})
168
+ )
169
+ AND status = $2
170
+ `;
171
+ }
110
172
  if (type) {
111
173
  params.push(type);
112
174
  sql += ` AND type = $${params.length}`;
113
175
  }
114
176
  sql += ' ORDER BY usage_count DESC LIMIT 20';
115
177
  const { rows } = await this.pool.query(sql, params);
178
+ console.error('[code-intelligence-mcp] repository.search.db table=%s rows=%s top=%s note=name/description only', env.symbolsTable, String(rows.length), JSON.stringify(rows.slice(0, 3).map((r) => ({
179
+ id: r.id,
180
+ name: r.name,
181
+ path: r.path,
182
+ type: r.type,
183
+ }))));
116
184
  return rows.map((r) => mapRow(r));
117
185
  }
118
186
  /**
@@ -120,10 +188,13 @@ export class SymbolRepository {
120
188
  * 不再需要在 Node 拉取全量向量做内存计算。
121
189
  */
122
190
  async searchSemanticHits(query, opts) {
191
+ console.error('[code-intelligence-mcp] repository.searchSemanticHits.start query=%s type=%s table=%s limit=%s threshold=%s searchableStatus=%s hasPool=%s', query, opts?.type ?? '', env.symbolsTable, String(opts?.limit ?? TOP_K), String(SIMILARITY_THRESHOLD), String(SEARCHABLE_STATUS), String(Boolean(this.pool)));
123
192
  if (!env.embeddingServiceUrl) {
193
+ console.error('[code-intelligence-mcp] repository.searchSemanticHits.error missingEmbeddingServiceUrl');
124
194
  throw new Error('语义检索需配置 EMBEDDING_SERVICE_URL 并启动嵌入服务');
125
195
  }
126
196
  if (!this.pool) {
197
+ console.error('[code-intelligence-mcp] repository.searchSemanticHits.noPool returnEmpty');
127
198
  return [];
128
199
  }
129
200
  const limit = opts?.limit ?? TOP_K;
@@ -151,15 +222,26 @@ export class SymbolRepository {
151
222
  params.push(limit * 2); // 多取一倍以便 SIMILARITY_THRESHOLD 过滤后仍有足量结果
152
223
  sql += ` ORDER BY embedding <=> $1::vector LIMIT $${params.length}`;
153
224
  const { rows } = await this.pool.query(sql, params);
154
- return rows
155
- .map((r) => ({
225
+ const mapped = rows.map((r) => ({
156
226
  symbol: mapRow(r),
157
227
  similarity: Number(r.similarity),
228
+ }));
229
+ const passed = mapped.filter((x) => x.similarity >= SIMILARITY_THRESHOLD);
230
+ console.error('[code-intelligence-mcp] repository.searchSemanticHits.db table=%s rawRows=%s passedThreshold=%s topRaw=%s', env.symbolsTable, String(rows.length), String(passed.length), JSON.stringify(mapped.slice(0, 5).map((x) => ({
231
+ id: x.symbol.id,
232
+ name: x.symbol.name,
233
+ path: x.symbol.path,
234
+ similarity: Number(x.similarity.toFixed(4)),
235
+ }))));
236
+ return passed
237
+ .map((r) => ({
238
+ symbol: r.symbol,
239
+ similarity: r.similarity,
158
240
  }))
159
- .filter((x) => x.similarity >= SIMILARITY_THRESHOLD)
160
241
  .slice(0, limit);
161
242
  }
162
243
  async getByName(name) {
244
+ console.error('[code-intelligence-mcp] repository.getByName.start name=%s table=%s hasPool=%s', name, env.symbolsTable, String(Boolean(this.pool)));
163
245
  if (!this.pool) {
164
246
  return (inMemorySymbols.find((s) => s.name.toLowerCase() === name.toLowerCase()) ?? null);
165
247
  }
@@ -169,6 +251,7 @@ export class SymbolRepository {
169
251
  WHERE name = $1
170
252
  LIMIT 1
171
253
  `, [name]);
254
+ console.error('[code-intelligence-mcp] repository.getByName.db table=%s rows=%s', env.symbolsTable, String(rows.length));
172
255
  if (rows.length === 0) {
173
256
  return null;
174
257
  }
@@ -191,6 +274,7 @@ export class SymbolRepository {
191
274
  return result.rowCount !== null && result.rowCount > 0;
192
275
  }
193
276
  async searchByStructure(fields, opts) {
277
+ console.error('[code-intelligence-mcp] repository.searchByStructure.start fields=%s type=%s category=%s table=%s limit=%s hasPool=%s', JSON.stringify(fields), opts?.type ?? '', opts?.category ?? '', env.symbolsTable, String(opts?.limit ?? 20), String(Boolean(this.pool)));
194
278
  const normalized = fields.map((f) => f.trim()).filter(Boolean);
195
279
  if (normalized.length === 0)
196
280
  return [];
@@ -215,7 +299,13 @@ export class SymbolRepository {
215
299
  return normalized.every((field) => propPool.includes(field.toLowerCase()));
216
300
  };
217
301
  if (!this.pool) {
218
- return inMemorySymbols.filter(matchesAll).slice(0, limit);
302
+ const matched = inMemorySymbols.filter(matchesAll).slice(0, limit);
303
+ console.error('[code-intelligence-mcp] repository.searchByStructure.memory matched=%s top=%s', String(matched.length), JSON.stringify(matched.slice(0, 3).map((s) => ({
304
+ id: s.id,
305
+ name: s.name,
306
+ path: s.path,
307
+ }))));
308
+ return matched;
219
309
  }
220
310
  const params = [];
221
311
  let sql = `
@@ -234,9 +324,13 @@ export class SymbolRepository {
234
324
  params.push(Math.max(limit * 5, 50));
235
325
  sql += ` ORDER BY usage_count DESC LIMIT $${params.length}`;
236
326
  const { rows } = await this.pool.query(sql, params);
237
- return rows
238
- .map((r) => mapRow(r))
239
- .filter(matchesAll)
240
- .slice(0, limit);
327
+ const mapped = rows.map((r) => mapRow(r));
328
+ const filtered = mapped.filter(matchesAll).slice(0, limit);
329
+ console.error('[code-intelligence-mcp] repository.searchByStructure.db table=%s scanned=%s matched=%s top=%s', env.symbolsTable, String(rows.length), String(filtered.length), JSON.stringify(filtered.slice(0, 3).map((s) => ({
330
+ id: s.id,
331
+ name: s.name,
332
+ path: s.path,
333
+ }))));
334
+ return filtered;
241
335
  }
242
336
  }
@@ -9,24 +9,36 @@ import { createIncUsageTool } from '../tools/incUsage.js';
9
9
  import { RecommendationService } from '../services/recommendationService.js';
10
10
  import { createRecommendComponentTool } from '../tools/recommendComponent.js';
11
11
  export function createServer() {
12
+ console.error('[code-intelligence-mcp] createServer.init');
12
13
  const server = new McpServer({
13
14
  name: 'code-intelligence-mcp',
14
15
  version: '0.1.0',
15
16
  });
17
+ console.error('[code-intelligence-mcp] mcpServer.created name=code-intelligence-mcp version=0.1.0');
16
18
  const repository = new SymbolRepository();
19
+ console.error('[code-intelligence-mcp] repository.created');
17
20
  const recommendationService = new RecommendationService(repository);
21
+ console.error('[code-intelligence-mcp] recommendationService.created');
18
22
  const searchTool = createSearchSymbolsTool(repository);
19
23
  server.tool(searchTool.name, searchTool.description, searchTool.inputSchema, searchTool.handler);
24
+ console.error('[code-intelligence-mcp] tool.registered %s', searchTool.name);
20
25
  const detailTool = createGetSymbolDetailTool(repository);
21
26
  server.tool(detailTool.name, detailTool.description, detailTool.inputSchema, detailTool.handler);
27
+ console.error('[code-intelligence-mcp] tool.registered %s', detailTool.name);
22
28
  const structureTool = createSearchByStructureTool(repository);
23
29
  server.tool(structureTool.name, structureTool.description, structureTool.inputSchema, structureTool.handler);
30
+ console.error('[code-intelligence-mcp] tool.registered %s', structureTool.name);
24
31
  const reindexTool = createReindexTool();
25
32
  server.tool(reindexTool.name, reindexTool.description, reindexTool.inputSchema, reindexTool.handler);
33
+ console.error('[code-intelligence-mcp] tool.registered %s', reindexTool.name);
26
34
  const incUsageTool = createIncUsageTool(repository);
27
35
  server.tool(incUsageTool.name, incUsageTool.description, incUsageTool.inputSchema, incUsageTool.handler);
36
+ console.error('[code-intelligence-mcp] tool.registered %s', incUsageTool.name);
28
37
  const recommendComponentTool = createRecommendComponentTool(recommendationService);
29
38
  server.tool(recommendComponentTool.name, recommendComponentTool.description, recommendComponentTool.inputSchema, recommendComponentTool.handler);
39
+ console.error('[code-intelligence-mcp] tool.registered %s', recommendComponentTool.name);
30
40
  registerReusableCodeAdvisorPrompt(server);
41
+ console.error('[code-intelligence-mcp] prompt.registered reusable-code-advisor');
42
+ console.error('[code-intelligence-mcp] createServer.ready toolCount=6 promptCount=1');
31
43
  return server;
32
44
  }
@@ -5,19 +5,56 @@ function clamp01(value) {
5
5
  return 1;
6
6
  return value;
7
7
  }
8
+ function extractTextTokens(text) {
9
+ // eg: query='useDebounceInput组件', tokens=['useDebounceInput', '组件']
10
+ const tokens = new Set();
11
+ const lower = text.trim().toLowerCase();
12
+ for (const match of lower.matchAll(/[a-z0-9_]+/g)) {
13
+ if (match[0].length >= 2)
14
+ tokens.add(match[0]);
15
+ }
16
+ for (const match of text.matchAll(/[\u4e00-\u9fff]{2,}/g)) {
17
+ const chunk = match[0];
18
+ for (let index = 0; index < chunk.length - 1; index += 1) {
19
+ tokens.add(chunk.slice(index, index + 2));
20
+ }
21
+ }
22
+ return [...tokens];
23
+ }
24
+ // 先对query进行切分,然后计算切分后的token在symbol的name/description/path中出现的数量和比例,来判断是否存在关键词重合,进而提升排名。
25
+ function tokenOverlapScore(query, symbol) {
26
+ const queryTokens = extractTextTokens(query);
27
+ if (queryTokens.length === 0)
28
+ return 0;
29
+ const text = [symbol.name, symbol.description ?? '', symbol.path]
30
+ .join(' ')
31
+ .toLowerCase();
32
+ const matched = queryTokens.filter((token) => text.includes(token)).length;
33
+ const overlapRatio = matched / queryTokens.length;
34
+ if (matched >= 4 && overlapRatio >= 0.45)
35
+ return 0.78;
36
+ if (matched >= 3 && overlapRatio >= 0.3)
37
+ return 0.68;
38
+ if (matched >= 2 && overlapRatio >= 0.18)
39
+ return 0.56;
40
+ return 0;
41
+ }
8
42
  function textMatchScore(query, symbol) {
9
43
  const q = query.trim().toLowerCase();
10
44
  if (!q)
11
- return { score: 0, matchedBy: "weak" };
45
+ return { score: 0, matchedBy: 'weak' };
12
46
  const name = symbol.name.toLowerCase();
13
- const description = (symbol.description ?? "").toLowerCase();
47
+ const description = (symbol.description ?? '').toLowerCase();
14
48
  if (name === q)
15
- return { score: 1, matchedBy: "exact_name" };
49
+ return { score: 1, matchedBy: 'exact_name' };
16
50
  if (name.includes(q))
17
- return { score: 0.85, matchedBy: "name_contains" };
51
+ return { score: 0.85, matchedBy: 'name_contains' };
18
52
  if (description.includes(q))
19
- return { score: 0.65, matchedBy: "description_contains" };
20
- return { score: 0.2, matchedBy: "weak" };
53
+ return { score: 0.65, matchedBy: 'description_contains' };
54
+ const overlapScore = tokenOverlapScore(query, symbol);
55
+ if (overlapScore > 0)
56
+ return { score: overlapScore, matchedBy: 'token_overlap' };
57
+ return { score: 0.2, matchedBy: 'weak' };
21
58
  }
22
59
  function usageScore(usageCount) {
23
60
  // log scale to avoid very large usage monopolizing ranking.
@@ -50,62 +87,76 @@ function daysSinceCreated(createdAt) {
50
87
  }
51
88
  function commonPathScore(path) {
52
89
  const lower = path.toLowerCase();
53
- return lower.includes("/common/") || lower.includes("/shared/") ? 1 : 0.35;
90
+ return lower.includes('/common/') || lower.includes('/shared/') ? 1 : 0.35;
54
91
  }
55
92
  const RANK_WEIGHTS = {
56
93
  textMatch: 0.5,
57
94
  usage: 0.3,
58
95
  recency: 0.1,
59
- commonPath: 0.1
96
+ commonPath: 0.1,
60
97
  };
61
98
  /**
62
- * Phase 5:以向量余弦相似度作为主文本维度,再叠加 usage / recency / common(与 `rankSymbols` 同权重)。
99
+ * Phase 5:以向量余弦相似度作为主文本维度,再叠加 usage / recency / common calleeNames 匹配度。
100
+ * calleeNames 作为结构信息独立信号,不污染纯语义向量。
63
101
  */
64
- export function rankSemanticHits(hits) {
102
+ export function rankSemanticHits(hits, query) {
65
103
  return hits
66
104
  .map(({ symbol, similarity }) => {
67
105
  const textScore = clamp01(similarity);
68
106
  const usage = usageScore(symbol.usageCount);
69
107
  const recency = recencyScore(symbol.createdAt);
70
108
  const common = commonPathScore(symbol.path);
109
+ // ✨ 新增:calleeNames 作为独立信号
110
+ let calleeMatchScore = 0;
111
+ if (query && Array.isArray(symbol.meta?.calleeNames)) {
112
+ const calleeNames = symbol.meta.calleeNames;
113
+ const queryLower = query.toLowerCase();
114
+ const matchedCallees = calleeNames.filter((callee) => queryLower.includes(callee.toLowerCase())).length;
115
+ if (matchedCallees > 0) {
116
+ calleeMatchScore = Math.min(matchedCallees * 0.05, 0.2);
117
+ }
118
+ }
71
119
  const score = textScore * RANK_WEIGHTS.textMatch +
72
120
  usage * RANK_WEIGHTS.usage +
73
121
  recency * RANK_WEIGHTS.recency +
74
- common * RANK_WEIGHTS.commonPath;
122
+ common * RANK_WEIGHTS.commonPath +
123
+ calleeMatchScore;
75
124
  const reasonParts = [];
76
125
  if (textScore >= 0.55)
77
- reasonParts.push("语义相似度高");
126
+ reasonParts.push('语义相似度高');
78
127
  else if (textScore >= 0.4)
79
- reasonParts.push("语义相关");
128
+ reasonParts.push('语义相关');
80
129
  if (usage >= 0.6)
81
- reasonParts.push("使用频率高");
130
+ reasonParts.push('使用频率高');
82
131
  if (common >= 1)
83
- reasonParts.push("位于 shared/common 路径");
132
+ reasonParts.push('位于 shared/common 路径');
133
+ if (calleeMatchScore > 0)
134
+ reasonParts.push('函数调用关系匹配');
84
135
  if (reasonParts.length === 0)
85
- reasonParts.push("综合相关性较好");
136
+ reasonParts.push('综合相关性较好');
86
137
  return {
87
138
  symbol,
88
139
  score: Number(score.toFixed(3)),
89
140
  reason: {
90
141
  textMatch: {
91
142
  score: Number(textScore.toFixed(3)),
92
- matchedBy: "semantic"
143
+ matchedBy: 'semantic',
93
144
  },
94
145
  usage: {
95
146
  score: Number(usage.toFixed(3)),
96
- usageCount: symbol.usageCount
147
+ usageCount: symbol.usageCount,
97
148
  },
98
149
  recency: {
99
150
  score: Number(recency.toFixed(3)),
100
- daysSinceCreated: daysSinceCreated(symbol.createdAt)
151
+ daysSinceCreated: daysSinceCreated(symbol.createdAt),
101
152
  },
102
153
  commonPath: {
103
154
  score: Number(common.toFixed(3)),
104
- isCommonPath: common >= 1
155
+ isCommonPath: common >= 1,
105
156
  },
106
157
  weights: RANK_WEIGHTS,
107
- summary: reasonParts.join(" + ")
108
- }
158
+ summary: reasonParts.join(' + '),
159
+ },
109
160
  };
110
161
  })
111
162
  .sort((a, b) => b.score - a.score);
@@ -123,38 +174,40 @@ export function rankSymbols(query, symbols) {
123
174
  common * RANK_WEIGHTS.commonPath;
124
175
  const reasonParts = [];
125
176
  if (text.score >= 0.85)
126
- reasonParts.push("文本匹配度高");
177
+ reasonParts.push('文本匹配度高');
127
178
  else if (text.score >= 0.65)
128
- reasonParts.push("描述命中");
179
+ reasonParts.push('描述命中');
180
+ else if (text.matchedBy === 'token_overlap')
181
+ reasonParts.push('关键词片段高度重合');
129
182
  if (usage >= 0.6)
130
- reasonParts.push("使用频率高");
183
+ reasonParts.push('使用频率高');
131
184
  if (common >= 1)
132
- reasonParts.push("位于 shared/common 路径");
185
+ reasonParts.push('位于 shared/common 路径');
133
186
  if (reasonParts.length === 0)
134
- reasonParts.push("综合相关性较好");
187
+ reasonParts.push('综合相关性较好');
135
188
  return {
136
189
  symbol,
137
190
  score: Number(score.toFixed(3)),
138
191
  reason: {
139
192
  textMatch: {
140
193
  score: Number(text.score.toFixed(3)),
141
- matchedBy: text.matchedBy
194
+ matchedBy: text.matchedBy,
142
195
  },
143
196
  usage: {
144
197
  score: Number(usage.toFixed(3)),
145
- usageCount: symbol.usageCount
198
+ usageCount: symbol.usageCount,
146
199
  },
147
200
  recency: {
148
201
  score: Number(recency.toFixed(3)),
149
- daysSinceCreated: daysSinceCreated(symbol.createdAt)
202
+ daysSinceCreated: daysSinceCreated(symbol.createdAt),
150
203
  },
151
204
  commonPath: {
152
205
  score: Number(common.toFixed(3)),
153
- isCommonPath: common >= 1
206
+ isCommonPath: common >= 1,
154
207
  },
155
208
  weights: RANK_WEIGHTS,
156
- summary: reasonParts.join(" + ")
157
- }
209
+ summary: reasonParts.join(' + '),
210
+ },
158
211
  };
159
212
  })
160
213
  .sort((a, b) => b.score - a.score);