@lorrylurui/code-intelligence-mcp 2.0.5 → 2.0.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/config/env.js +9 -0
- package/dist/config/tuning.js +114 -0
- package/dist/db/schema.js +37 -0
- package/dist/indexer/chunkText.js +164 -0
- package/dist/indexer/embedText.js +2 -2
- package/dist/repositories/chunkRepository.js +181 -0
- package/dist/repositories/symbolRepository.js +4 -5
- package/dist/server/createServer.js +5 -1
- package/dist/services/contextAssembler.js +150 -0
- package/dist/services/ranking.js +37 -39
- package/dist/services/recommendationService.js +325 -104
- package/dist/tools/queryDocs.js +113 -0
- package/dist/tools/searchSymbols.js +3 -2
- package/dist/types/chunk.js +1 -0
- package/package.json +1 -1
|
@@ -1,7 +1,92 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* ──────────────────────────────────────────────────────────────────────────────
|
|
3
|
+
* [Agent 闭环流程总览]
|
|
4
|
+
*
|
|
5
|
+
* recommendComponent (核心 agent 主循环)
|
|
6
|
+
*
|
|
7
|
+
* 1. 解析输入,生成 query 变体(query rewrite,多轮尝试)
|
|
8
|
+
* 2. 对每个 query 变体依次尝试:
|
|
9
|
+
* 2.1. 搜索候选(优先语义,异常时回退关键词)
|
|
10
|
+
* 2.2. 结构字段补充搜索(props/hooks)
|
|
11
|
+
* 2.3. 合并去重、按 category 过滤、过滤不可复用项
|
|
12
|
+
* 2.4. 排序、Top-K 详情补查(enrich)
|
|
13
|
+
* 2.5. 质量门控(quality gate,必须命中 requiredProps/hooks 或高分)
|
|
14
|
+
* 2.6. 优先级调整(如名称/路径命中加分、demo 路径降权)
|
|
15
|
+
* 2.7. 命中则立即返回推荐结果,记录 debug trace
|
|
16
|
+
* 2.8. 未命中则进入下一 query 变体(自动重试)
|
|
17
|
+
* 3. 所有变体均未命中则返回无结果,debug trace 记录所有尝试
|
|
18
|
+
*
|
|
19
|
+
* 关键特性:
|
|
20
|
+
* - query rewrite + retry(自动多轮尝试)
|
|
21
|
+
* - 结构/语义/关键词多路融合
|
|
22
|
+
* - Top-K 详情补查
|
|
23
|
+
* - 质量门控与优先级调整
|
|
24
|
+
* - 全流程 debug trace(可用于 agent 反思/可观测性)
|
|
25
|
+
*
|
|
26
|
+
* 总结:
|
|
27
|
+
* “实现了一个单 agent 闭环推荐系统,支持 query 自动重写与多轮重试,融合语义/结构/关键词多路检索,Top-K 详情补查,质量门控与优先级调整,并输出全流程 debug trace,便于 agent 反思和可观测性。”
|
|
28
|
+
* ──────────────────────────────────────────────────────────────────────────────
|
|
29
|
+
*/
|
|
1
30
|
import { rankSemanticHits, rankSymbols } from './ranking.js';
|
|
31
|
+
import { DEMO_PATH_PRIORITY_PENALTY, LITERAL_MATCH_PRIORITY_BOOST, MIN_LITERAL_MATCH_SCORE, MIN_RECOMMENDATION_SCORE, MIN_SEMANTIC_TEXT_MATCH_SCORE, REQUIRED_FIELD_FALLBACK_MIN_SCORE, } from '../config/tuning.js';
|
|
32
|
+
/** 跳过原因标识 */
|
|
33
|
+
const SKIPPED_REASON = {
|
|
34
|
+
NO_COMBINED: 'no_combined',
|
|
35
|
+
NO_QUALIFIED: 'no_qualified',
|
|
36
|
+
};
|
|
37
|
+
/** 查询方式标识 */
|
|
38
|
+
const QUERIED_BY = {
|
|
39
|
+
SEMANTIC: 'semantic',
|
|
40
|
+
KEYWORD: 'keyword',
|
|
41
|
+
};
|
|
42
|
+
/** 回退原因标识 */
|
|
43
|
+
const FALLBACK_REASON = {
|
|
44
|
+
SEMANTIC_ERROR: 'semantic_error_fallback_keyword',
|
|
45
|
+
};
|
|
46
|
+
/** 推荐结果文案 */
|
|
47
|
+
const RECOMMENDATION_MESSAGE = {
|
|
48
|
+
FOUND: '已找到可复用组件候选,首选已按综合匹配度排序。',
|
|
49
|
+
NOT_FOUND: '未找到符合条件的可复用组件。',
|
|
50
|
+
};
|
|
51
|
+
/** 详情补查的 top-k 条数 */
|
|
52
|
+
const ENRICH_TOP_K = 3;
|
|
53
|
+
/** 最多取查询变体数量 */
|
|
54
|
+
const MAX_QUERY_VARIANTS = 2;
|
|
55
|
+
/** 结构/语义搜索 limit 倍数 */
|
|
56
|
+
const STRUCTURE_LIMIT_MULTIPLIER = 4;
|
|
57
|
+
/** 结构/语义搜索 limit 最小值 */
|
|
58
|
+
const STRUCTURE_LIMIT_MIN = 12;
|
|
59
|
+
/** 关键词搜索命中时的默认相似度补值 */
|
|
60
|
+
const DEFAULT_KEYWORD_SIMILARITY = 0.55;
|
|
2
61
|
function uniqueStrings(values = []) {
|
|
3
62
|
return [...new Set(values.map((value) => value.trim()).filter(Boolean))];
|
|
4
63
|
}
|
|
64
|
+
const QUERY_REWRITE_PATTERNS = [
|
|
65
|
+
/^帮我找(找)?(一个|一下)?/g,
|
|
66
|
+
/^有没有(现成的)?/g,
|
|
67
|
+
/^请推荐(一个|一下)?/g,
|
|
68
|
+
/可复用/g,
|
|
69
|
+
/现成的/g,
|
|
70
|
+
/封装好的/g,
|
|
71
|
+
/(组件|函数|hook|工具|util)(实现)?/gi,
|
|
72
|
+
];
|
|
73
|
+
/**
|
|
74
|
+
* 对原始查询进行清洗和变体生成,去掉无意义的词,提炼更核心的查询内容
|
|
75
|
+
*/
|
|
76
|
+
function buildQueryVariants(rawQuery) {
|
|
77
|
+
const base = rawQuery.trim();
|
|
78
|
+
if (!base)
|
|
79
|
+
return [];
|
|
80
|
+
let rewritten = base;
|
|
81
|
+
for (const pattern of QUERY_REWRITE_PATTERNS) {
|
|
82
|
+
rewritten = rewritten.replace(pattern, ' ');
|
|
83
|
+
}
|
|
84
|
+
rewritten = rewritten.replace(/\s+/g, ' ').trim();
|
|
85
|
+
if (!rewritten || rewritten === base) {
|
|
86
|
+
return [base];
|
|
87
|
+
}
|
|
88
|
+
return uniqueStrings([base, rewritten]);
|
|
89
|
+
}
|
|
5
90
|
function normalizeToken(value) {
|
|
6
91
|
return value.trim().toLowerCase();
|
|
7
92
|
}
|
|
@@ -113,12 +198,6 @@ const NON_REUSABLE_PATH_PATTERNS = [
|
|
|
113
198
|
'.mock.',
|
|
114
199
|
];
|
|
115
200
|
const NON_REUSABLE_NAME_TOKENS = ['mock', 'fixture', 'example', 'demo'];
|
|
116
|
-
const MIN_RECOMMENDATION_SCORE = {
|
|
117
|
-
semantic: 0.5,
|
|
118
|
-
keyword: 0.45,
|
|
119
|
-
};
|
|
120
|
-
const MIN_SEMANTIC_TEXT_SCORE = 0.6;
|
|
121
|
-
const MIN_LITERAL_MATCH_SCORE = 0.18;
|
|
122
201
|
function isDemoLikePath(path, strict = false) {
|
|
123
202
|
const normalizedPath = path.toLowerCase();
|
|
124
203
|
const segments = strict
|
|
@@ -198,25 +277,26 @@ function hasStrongLiteralMatch(query, symbol) {
|
|
|
198
277
|
function isStrongEnoughRecommendation(item, query, queriedBy, requiredProps, requiredHooks) {
|
|
199
278
|
const hasRequiredFieldMatch = hasAllRequiredFields(item.symbol, requiredProps, requiredHooks);
|
|
200
279
|
const hasLiteralMatch = hasStrongLiteralMatch(query, item.symbol);
|
|
201
|
-
if (queriedBy ===
|
|
280
|
+
if (queriedBy === QUERIED_BY.SEMANTIC) {
|
|
202
281
|
return ((item.score >= MIN_RECOMMENDATION_SCORE.semantic &&
|
|
203
|
-
(item.reason.textMatch.score >=
|
|
282
|
+
(item.reason.textMatch.score >= MIN_SEMANTIC_TEXT_MATCH_SCORE ||
|
|
204
283
|
hasRequiredFieldMatch)) ||
|
|
205
284
|
(hasLiteralMatch && item.score >= MIN_LITERAL_MATCH_SCORE));
|
|
206
285
|
}
|
|
207
286
|
return (item.score >= MIN_RECOMMENDATION_SCORE.keyword ||
|
|
208
|
-
(hasRequiredFieldMatch &&
|
|
287
|
+
(hasRequiredFieldMatch &&
|
|
288
|
+
item.score >= REQUIRED_FIELD_FALLBACK_MIN_SCORE));
|
|
209
289
|
}
|
|
210
290
|
function computeRecommendationPriority(item, query) {
|
|
211
291
|
let score = item.score;
|
|
212
292
|
const notes = [];
|
|
213
293
|
const path = item.symbol.path.toLowerCase();
|
|
214
294
|
if (hasStrongLiteralMatch(query, item.symbol)) {
|
|
215
|
-
score +=
|
|
295
|
+
score += LITERAL_MATCH_PRIORITY_BOOST;
|
|
216
296
|
notes.push('名称或文件名命中查询');
|
|
217
297
|
}
|
|
218
298
|
if (isDemoLikePath(path)) {
|
|
219
|
-
score -=
|
|
299
|
+
score -= DEMO_PATH_PRIORITY_PENALTY;
|
|
220
300
|
notes.push('示例工程路径降权');
|
|
221
301
|
}
|
|
222
302
|
return {
|
|
@@ -231,125 +311,258 @@ export class RecommendationService {
|
|
|
231
311
|
constructor(repository) {
|
|
232
312
|
this.repository = repository;
|
|
233
313
|
}
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
const preferSemantic = input.semantic ?? true;
|
|
244
|
-
const limit = input.limit ?? 5;
|
|
314
|
+
/**
|
|
315
|
+
* 根据查询和提示信息从仓库中获取候选结果,优先语义搜索并在出错时回退关键词搜索,返回搜索结果和相关的调试信息供后续处理使用。
|
|
316
|
+
* @param query 查询字符串
|
|
317
|
+
* @param searchTypes 搜索的符号类型
|
|
318
|
+
* @param preferSemantic 是否优先使用语义搜索
|
|
319
|
+
* @param limit 返回结果的数量限制
|
|
320
|
+
* @returns 包含搜索结果和调试信息的对象
|
|
321
|
+
*/
|
|
322
|
+
async gatherSearchResults(query, searchTypes, preferSemantic, limit) {
|
|
245
323
|
let queriedBy = preferSemantic
|
|
246
|
-
?
|
|
247
|
-
:
|
|
248
|
-
let
|
|
249
|
-
console.error('[code-intelligence-mcp] recommendComponent.searchTypes types=%s', JSON.stringify(searchTypes));
|
|
324
|
+
? QUERIED_BY.SEMANTIC
|
|
325
|
+
: QUERIED_BY.KEYWORD;
|
|
326
|
+
let fallbackReason = null;
|
|
250
327
|
if (preferSemantic) {
|
|
251
328
|
try {
|
|
252
|
-
const semanticGroups = await Promise.all(searchTypes.map((type) => this.repository.searchSemanticHits(
|
|
329
|
+
const semanticGroups = await Promise.all(searchTypes.map((type) => this.repository.searchSemanticHits(query, {
|
|
253
330
|
type,
|
|
254
|
-
limit: Math.max(limit *
|
|
331
|
+
limit: Math.max(limit * STRUCTURE_LIMIT_MULTIPLIER, STRUCTURE_LIMIT_MIN),
|
|
255
332
|
})));
|
|
256
|
-
searchResults = semanticGroups.flat();
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
}))));
|
|
333
|
+
const searchResults = semanticGroups.flat();
|
|
334
|
+
return {
|
|
335
|
+
queriedBy,
|
|
336
|
+
searchResults,
|
|
337
|
+
fallbackReason,
|
|
338
|
+
};
|
|
263
339
|
}
|
|
264
340
|
catch {
|
|
265
|
-
queriedBy =
|
|
266
|
-
|
|
267
|
-
searchResults = keywordGroups
|
|
268
|
-
.flat()
|
|
269
|
-
.map((symbol) => ({ symbol, similarity: 0 }));
|
|
270
|
-
console.error('[code-intelligence-mcp] recommendComponent.semanticFailed fallback=keyword count=%s top=%s', String(searchResults.length), JSON.stringify(searchResults.slice(0, 3).map((item) => ({
|
|
271
|
-
id: item.symbol.id,
|
|
272
|
-
name: item.symbol.name,
|
|
273
|
-
path: item.symbol.path,
|
|
274
|
-
}))));
|
|
341
|
+
queriedBy = QUERIED_BY.KEYWORD;
|
|
342
|
+
fallbackReason = FALLBACK_REASON.SEMANTIC_ERROR;
|
|
275
343
|
}
|
|
276
344
|
}
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
345
|
+
const keywordGroups = await Promise.all(searchTypes.map((type) => this.repository.search(query, type)));
|
|
346
|
+
return {
|
|
347
|
+
queriedBy,
|
|
348
|
+
searchResults: keywordGroups
|
|
280
349
|
.flat()
|
|
281
|
-
.map((symbol) => ({ symbol, similarity: 0 }))
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
350
|
+
.map((symbol) => ({ symbol, similarity: 0 })),
|
|
351
|
+
fallbackReason,
|
|
352
|
+
};
|
|
353
|
+
}
|
|
354
|
+
/**
|
|
355
|
+
* 对排名靠前的候选项进行详情补查
|
|
356
|
+
*/
|
|
357
|
+
async enrichTopCandidatesWithDetail(ranked) {
|
|
358
|
+
const topSymbols = ranked
|
|
359
|
+
.slice(0, ENRICH_TOP_K)
|
|
360
|
+
.map((item) => item.symbol);
|
|
361
|
+
if (topSymbols.length === 0) {
|
|
362
|
+
return { ranked, enrichedCount: 0 };
|
|
363
|
+
}
|
|
364
|
+
const detailMap = new Map();
|
|
365
|
+
await Promise.all(topSymbols.map(async (symbol) => {
|
|
366
|
+
try {
|
|
367
|
+
const detail = await this.repository.getByName(symbol.name);
|
|
368
|
+
if (detail && detail.id === symbol.id) {
|
|
369
|
+
detailMap.set(symbol.id, detail);
|
|
370
|
+
}
|
|
371
|
+
}
|
|
372
|
+
catch {
|
|
373
|
+
// 详情补查失败时继续主流程,避免影响推荐输出。
|
|
374
|
+
}
|
|
375
|
+
}));
|
|
376
|
+
if (detailMap.size === 0) {
|
|
377
|
+
return { ranked, enrichedCount: 0 };
|
|
378
|
+
}
|
|
379
|
+
const enriched = ranked.map((item) => {
|
|
380
|
+
const detail = detailMap.get(item.symbol.id);
|
|
381
|
+
return detail ? { ...item, symbol: detail } : item;
|
|
382
|
+
});
|
|
383
|
+
return {
|
|
384
|
+
ranked: enriched,
|
|
385
|
+
enrichedCount: detailMap.size,
|
|
386
|
+
};
|
|
387
|
+
}
|
|
388
|
+
/**
|
|
389
|
+
* Agent 主循环:根据输入生成 query 变体,依次尝试多轮检索,融合语义/结构/关键词,Top-K 详情补查,质量门控,优先级调整。
|
|
390
|
+
* 命中即返回推荐,否则遍历所有变体,最终输出 debug trace。
|
|
391
|
+
*/
|
|
392
|
+
async recommendComponent(input) {
|
|
393
|
+
this.logStart(input);
|
|
394
|
+
const { requiredProps, requiredHooks, structureFields, searchTypes, preferSemantic, limit, queryVariants, } = this.preprocessInput(input);
|
|
395
|
+
let queriedBy = preferSemantic
|
|
396
|
+
? QUERIED_BY.SEMANTIC
|
|
397
|
+
: QUERIED_BY.KEYWORD;
|
|
398
|
+
let lastRankedCandidates = [];
|
|
399
|
+
let lastCombinedCount = 0;
|
|
400
|
+
let selectedQuery = null;
|
|
401
|
+
let fallbackReason = null;
|
|
402
|
+
const attempts = [];
|
|
403
|
+
this.logSearchTypes(searchTypes);
|
|
404
|
+
for (const queryVariant of queryVariants) {
|
|
405
|
+
const { attempt, combined, searchResults, gathered } = await this.tryQueryVariant({
|
|
406
|
+
queryVariant,
|
|
407
|
+
input,
|
|
408
|
+
searchTypes,
|
|
409
|
+
preferSemantic,
|
|
410
|
+
limit,
|
|
411
|
+
structureFields,
|
|
412
|
+
requiredProps,
|
|
413
|
+
requiredHooks,
|
|
414
|
+
});
|
|
415
|
+
queriedBy = gathered.queriedBy;
|
|
416
|
+
if (!fallbackReason && gathered.fallbackReason) {
|
|
417
|
+
fallbackReason = gathered.fallbackReason;
|
|
418
|
+
}
|
|
419
|
+
lastCombinedCount = combined.length;
|
|
420
|
+
this.logAttemptCheckpoint('attempt.summary', attempt);
|
|
421
|
+
if (combined.length === 0) {
|
|
422
|
+
attempt.skippedReason = SKIPPED_REASON.NO_COMBINED;
|
|
423
|
+
this.logAttemptCheckpoint('attempt.skipped.no_combined', attempt);
|
|
424
|
+
attempts.push(attempt);
|
|
425
|
+
continue;
|
|
426
|
+
}
|
|
427
|
+
const candidates = await this.rankAndEnrichCandidates({
|
|
428
|
+
combined,
|
|
429
|
+
searchResults,
|
|
430
|
+
queryVariant,
|
|
431
|
+
queriedBy,
|
|
432
|
+
requiredProps,
|
|
433
|
+
requiredHooks,
|
|
434
|
+
attempt,
|
|
435
|
+
limit,
|
|
436
|
+
});
|
|
437
|
+
lastRankedCandidates = candidates;
|
|
438
|
+
if (candidates.length > 0) {
|
|
439
|
+
selectedQuery = queryVariant;
|
|
440
|
+
attempts.push(attempt);
|
|
441
|
+
this.logAttemptCheckpoint('attempt.success', attempt);
|
|
442
|
+
this.logAttemptsTrace('recommendComponent.result.found', {
|
|
443
|
+
selectedQuery,
|
|
444
|
+
queriedBy,
|
|
445
|
+
attempts,
|
|
446
|
+
fallbackReason,
|
|
447
|
+
});
|
|
448
|
+
return this.buildResult({
|
|
449
|
+
recommended: candidates[0] ?? null,
|
|
450
|
+
alternatives: candidates.slice(1, limit),
|
|
451
|
+
queriedBy,
|
|
452
|
+
requiredProps,
|
|
453
|
+
requiredHooks,
|
|
454
|
+
attempts,
|
|
455
|
+
selectedQuery,
|
|
456
|
+
fallbackReason,
|
|
457
|
+
});
|
|
458
|
+
}
|
|
459
|
+
this.logAttemptCheckpoint('attempt.no_candidate_after_rank', attempt);
|
|
460
|
+
attempts.push(attempt);
|
|
287
461
|
}
|
|
462
|
+
this.logAttemptsTrace('recommendComponent.result.not_found', {
|
|
463
|
+
selectedQuery,
|
|
464
|
+
queriedBy,
|
|
465
|
+
attempts,
|
|
466
|
+
fallbackReason,
|
|
467
|
+
});
|
|
468
|
+
return this.buildResult({
|
|
469
|
+
recommended: null,
|
|
470
|
+
alternatives: [],
|
|
471
|
+
queriedBy,
|
|
472
|
+
requiredProps,
|
|
473
|
+
requiredHooks,
|
|
474
|
+
attempts,
|
|
475
|
+
selectedQuery,
|
|
476
|
+
fallbackReason,
|
|
477
|
+
});
|
|
478
|
+
}
|
|
479
|
+
logStart(input) {
|
|
480
|
+
console.error('[code-intelligence-mcp] recommendComponent.start query=%s category=%s semantic=%s limit=%s requiredProps=%s requiredHooks=%s', input.query, input.category ?? '', String(input.semantic ?? true), String(input.limit ?? 5), JSON.stringify(input.requiredProps ?? []), JSON.stringify(input.requiredHooks ?? []));
|
|
481
|
+
}
|
|
482
|
+
logSearchTypes(searchTypes) {
|
|
483
|
+
console.error('[code-intelligence-mcp] recommendComponent.searchTypes types=%s', JSON.stringify(searchTypes));
|
|
484
|
+
}
|
|
485
|
+
preprocessInput(input) {
|
|
486
|
+
const requiredProps = uniqueStrings(input.requiredProps);
|
|
487
|
+
const requiredHooks = uniqueStrings(input.requiredHooks);
|
|
488
|
+
const structureFields = uniqueStrings([
|
|
489
|
+
...requiredProps,
|
|
490
|
+
...requiredHooks,
|
|
491
|
+
]);
|
|
492
|
+
const searchTypes = inferSearchTypes(input);
|
|
493
|
+
const preferSemantic = input.semantic ?? true;
|
|
494
|
+
const limit = input.limit ?? 5;
|
|
495
|
+
const queryVariants = buildQueryVariants(input.query).slice(0, MAX_QUERY_VARIANTS);
|
|
496
|
+
const res = {
|
|
497
|
+
requiredProps,
|
|
498
|
+
requiredHooks,
|
|
499
|
+
structureFields,
|
|
500
|
+
searchTypes,
|
|
501
|
+
preferSemantic,
|
|
502
|
+
limit,
|
|
503
|
+
queryVariants,
|
|
504
|
+
};
|
|
505
|
+
console.error('[code-intelligence-mcp] recommendComponent.preprocess queryVariants=%s requiredProps=%s requiredHooks=%s structureFields=%s searchTypes=%s preferSemantic=%s limit=%s', JSON.stringify(queryVariants), JSON.stringify(requiredProps), JSON.stringify(requiredHooks), JSON.stringify(structureFields), JSON.stringify(searchTypes), String(preferSemantic), String(limit));
|
|
506
|
+
return res;
|
|
507
|
+
}
|
|
508
|
+
async tryQueryVariant({ queryVariant, input, searchTypes, preferSemantic, limit, structureFields, requiredProps, requiredHooks, }) {
|
|
509
|
+
const gathered = await this.gatherSearchResults(queryVariant, searchTypes, preferSemantic, limit);
|
|
510
|
+
const searchResults = gathered.searchResults;
|
|
511
|
+
const attempt = {
|
|
512
|
+
query: queryVariant,
|
|
513
|
+
queriedBy: gathered.queriedBy,
|
|
514
|
+
searchCount: searchResults.length,
|
|
515
|
+
structureCount: 0,
|
|
516
|
+
combinedCount: 0,
|
|
517
|
+
qualifiedCount: 0,
|
|
518
|
+
detailEnrichedCount: 0,
|
|
519
|
+
};
|
|
288
520
|
const structureResults = structureFields.length
|
|
289
521
|
? (await Promise.all(searchTypes.map((type) => this.repository.searchByStructure(structureFields, {
|
|
290
522
|
type,
|
|
291
|
-
limit: Math.max(limit *
|
|
523
|
+
limit: Math.max(limit * STRUCTURE_LIMIT_MULTIPLIER, STRUCTURE_LIMIT_MIN),
|
|
292
524
|
})))).flat()
|
|
293
525
|
: [];
|
|
294
|
-
|
|
295
|
-
id: symbol.id,
|
|
296
|
-
name: symbol.name,
|
|
297
|
-
path: symbol.path,
|
|
298
|
-
}))));
|
|
299
|
-
// 合并逻辑:先合并语义搜索(或关键词模糊搜索)和结构搜索结果去重
|
|
526
|
+
attempt.structureCount = structureResults.length;
|
|
300
527
|
const mergedBeforeCategory = mergeCandidates([
|
|
301
528
|
...structureResults,
|
|
302
529
|
...searchResults.map((item) => item.symbol),
|
|
303
530
|
]);
|
|
304
|
-
// 再按 category 过滤(如果有 category 限制)
|
|
305
531
|
let combined = mergedBeforeCategory.filter((symbol) => input.category
|
|
306
532
|
? (symbol.category ?? '')
|
|
307
533
|
.toLowerCase()
|
|
308
534
|
.includes(input.category.toLowerCase())
|
|
309
535
|
: true);
|
|
310
|
-
// LLM 可能把 "input" 之类词误当作 category,导致误筛空;若筛空则回退为不按 category 过滤。
|
|
311
536
|
if (combined.length === 0 &&
|
|
312
537
|
input.category &&
|
|
313
538
|
mergedBeforeCategory.length) {
|
|
314
|
-
console.error('[code-intelligence-mcp] recommendComponent.categoryFallback category=%s merged=%s -> useUnfiltered', input.category, String(mergedBeforeCategory.length));
|
|
315
539
|
combined = mergedBeforeCategory;
|
|
316
540
|
}
|
|
317
541
|
const reusableCandidates = combined.filter(isReusableCandidate);
|
|
318
542
|
if (reusableCandidates.length > 0) {
|
|
319
|
-
console.error('[code-intelligence-mcp] recommendComponent.reusableFilter before=%s after=%s removed=%s', String(combined.length), String(reusableCandidates.length), String(combined.length - reusableCandidates.length));
|
|
320
543
|
combined = reusableCandidates;
|
|
321
544
|
}
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
}))));
|
|
328
|
-
if (combined.length === 0) {
|
|
329
|
-
console.error('[code-intelligence-mcp] recommendComponent.emptyResult query=%s queriedBy=%s requiredProps=%s requiredHooks=%s', input.query, queriedBy, JSON.stringify(requiredProps), JSON.stringify(requiredHooks));
|
|
330
|
-
return {
|
|
331
|
-
recommended: null,
|
|
332
|
-
alternatives: [],
|
|
333
|
-
queriedBy,
|
|
334
|
-
structureFilter: {
|
|
335
|
-
requiredProps,
|
|
336
|
-
requiredHooks,
|
|
337
|
-
},
|
|
338
|
-
message: '未找到符合条件的可复用组件。',
|
|
339
|
-
};
|
|
340
|
-
}
|
|
341
|
-
// 最后排序并切分首选/备选
|
|
342
|
-
const ranked = queriedBy === 'semantic'
|
|
545
|
+
attempt.combinedCount = combined.length;
|
|
546
|
+
return { attempt, combined, searchResults, gathered };
|
|
547
|
+
}
|
|
548
|
+
async rankAndEnrichCandidates({ combined, searchResults, queryVariant, queriedBy, requiredProps, requiredHooks, attempt, limit, }) {
|
|
549
|
+
const ranked = queriedBy === QUERIED_BY.SEMANTIC
|
|
343
550
|
? rankSemanticHits(combined.map((symbol) => ({
|
|
344
551
|
symbol,
|
|
345
552
|
similarity: searchResults.find((item) => item.symbol.id === symbol.id)?.similarity ?? 0.55,
|
|
346
|
-
})),
|
|
347
|
-
: rankSymbols(
|
|
348
|
-
const
|
|
349
|
-
|
|
553
|
+
})), queryVariant)
|
|
554
|
+
: rankSymbols(queryVariant, combined);
|
|
555
|
+
const enriched = await this.enrichTopCandidatesWithDetail(ranked);
|
|
556
|
+
const enrichedRanked = enriched.ranked;
|
|
557
|
+
attempt.detailEnrichedCount = enriched.enrichedCount;
|
|
558
|
+
const qualifiedRanked = enrichedRanked.filter((item) => isStrongEnoughRecommendation(item, queryVariant, queriedBy, requiredProps, requiredHooks));
|
|
559
|
+
attempt.qualifiedCount = qualifiedRanked.length;
|
|
560
|
+
if (qualifiedRanked.length === 0) {
|
|
561
|
+
attempt.skippedReason = SKIPPED_REASON.NO_QUALIFIED;
|
|
562
|
+
}
|
|
350
563
|
const prioritizedRanked = qualifiedRanked
|
|
351
564
|
.map((item) => {
|
|
352
|
-
const adjusted = computeRecommendationPriority(item,
|
|
565
|
+
const adjusted = computeRecommendationPriority(item, queryVariant);
|
|
353
566
|
return {
|
|
354
567
|
item,
|
|
355
568
|
adjustedScore: adjusted.score,
|
|
@@ -358,25 +571,33 @@ export class RecommendationService {
|
|
|
358
571
|
})
|
|
359
572
|
.sort((a, b) => b.adjustedScore - a.adjustedScore);
|
|
360
573
|
const candidates = prioritizedRanked.map((entry) => toCandidate(entry.item.symbol, entry.adjustedScore, entry.adjustedReason, requiredProps, requiredHooks));
|
|
361
|
-
console.error('[code-intelligence-mcp] recommendComponent.
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
574
|
+
console.error('[code-intelligence-mcp] recommendComponent.rank query=%s queriedBy=%s enriched=%s qualified=%s candidates=%s', queryVariant, queriedBy, String(enrichedRanked.length), String(qualifiedRanked.length), String(candidates.length));
|
|
575
|
+
return candidates;
|
|
576
|
+
}
|
|
577
|
+
logAttemptCheckpoint(stage, attempt) {
|
|
578
|
+
console.error('[code-intelligence-mcp] recommendComponent.%s query=%s queriedBy=%s search=%s structure=%s combined=%s qualified=%s enriched=%s skipped=%s', stage, attempt.query, attempt.queriedBy, String(attempt.searchCount), String(attempt.structureCount), String(attempt.combinedCount), String(attempt.qualifiedCount), String(attempt.detailEnrichedCount), attempt.skippedReason ?? 'none');
|
|
579
|
+
}
|
|
580
|
+
logAttemptsTrace(stage, payload) {
|
|
581
|
+
console.error('[code-intelligence-mcp] %s selectedQuery=%s queriedBy=%s attempts=%s fallbackReason=%s', stage, payload.selectedQuery ?? 'none', payload.queriedBy, JSON.stringify(payload.attempts), payload.fallbackReason ?? 'none');
|
|
582
|
+
}
|
|
583
|
+
buildResult({ recommended, alternatives, queriedBy, requiredProps, requiredHooks, attempts, selectedQuery, fallbackReason, }) {
|
|
369
584
|
return {
|
|
370
|
-
recommended
|
|
371
|
-
alternatives
|
|
585
|
+
recommended,
|
|
586
|
+
alternatives,
|
|
372
587
|
queriedBy,
|
|
373
588
|
structureFilter: {
|
|
374
589
|
requiredProps,
|
|
375
590
|
requiredHooks,
|
|
376
591
|
},
|
|
377
|
-
message:
|
|
378
|
-
?
|
|
379
|
-
:
|
|
592
|
+
message: recommended !== null
|
|
593
|
+
? RECOMMENDATION_MESSAGE.FOUND
|
|
594
|
+
: RECOMMENDATION_MESSAGE.NOT_FOUND,
|
|
595
|
+
debug: {
|
|
596
|
+
attempts,
|
|
597
|
+
selectedQuery,
|
|
598
|
+
retryUsed: attempts.length > 1,
|
|
599
|
+
fallbackReason,
|
|
600
|
+
},
|
|
380
601
|
};
|
|
381
602
|
}
|
|
382
603
|
}
|
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* query_docs MCP 工具:完整的 RAG 检索 + 上下文组装入口。
|
|
3
|
+
*
|
|
4
|
+
* 调用链:
|
|
5
|
+
* query
|
|
6
|
+
* → ChunkRepository.searchSemantic() 向量检索 topK chunk
|
|
7
|
+
* → ContextAssembler.assemble() 邻块扩展 → 去重 → 字符预算截断 → 文本渲染
|
|
8
|
+
* → 返回 contextText + sources 供 LLM 合成最终回答
|
|
9
|
+
*
|
|
10
|
+
* 为什么工具只返回 contextText 而不直接生成回答?
|
|
11
|
+
* 本 MCP server 没有内置 LLM,"合成回答"由调用方(Claude/GPT 等)完成。
|
|
12
|
+
* 工具负责"检索 + 组装",调用方负责"理解 + 生成",职责清晰、可独立测试。
|
|
13
|
+
*/
|
|
14
|
+
import { z } from 'zod';
|
|
15
|
+
import { ChunkRepository } from '../repositories/chunkRepository.js';
|
|
16
|
+
import { ContextAssembler } from '../services/contextAssembler.js';
|
|
17
|
+
import { CHUNK_TOP_K, CONTEXT_ADJACENT_RADIUS, CONTEXT_MAX_CHARS, CONTEXT_MAX_CHUNKS, } from '../config/tuning.js';
|
|
18
|
+
export const queryDocsInput = z.object({
|
|
19
|
+
/** 自然语言查询,将被向量化后用于语义检索 */
|
|
20
|
+
query: z.string().min(1),
|
|
21
|
+
/** 语义检索拉取的候选 chunk 数,最终受字符预算限制 */
|
|
22
|
+
limit: z.number().int().min(1).max(50).optional().default(CHUNK_TOP_K),
|
|
23
|
+
/**
|
|
24
|
+
* 每个命中 chunk 向前后各扩展的邻块数。
|
|
25
|
+
* 0 = 不扩展(纯向量检索结果);1 = 各取一块(推荐);2 = 各取两块(长文档)
|
|
26
|
+
*/
|
|
27
|
+
adjacentRadius: z
|
|
28
|
+
.number()
|
|
29
|
+
.int()
|
|
30
|
+
.min(0)
|
|
31
|
+
.max(3)
|
|
32
|
+
.optional()
|
|
33
|
+
.default(CONTEXT_ADJACENT_RADIUS),
|
|
34
|
+
/** 上下文总字符数预算,超出时截断末尾 chunk */
|
|
35
|
+
maxChars: z.number().int().min(500).optional().default(CONTEXT_MAX_CHARS),
|
|
36
|
+
/** 扩展后保留的最大 chunk 数量 */
|
|
37
|
+
maxChunks: z
|
|
38
|
+
.number()
|
|
39
|
+
.int()
|
|
40
|
+
.min(1)
|
|
41
|
+
.max(30)
|
|
42
|
+
.optional()
|
|
43
|
+
.default(CONTEXT_MAX_CHUNKS),
|
|
44
|
+
/** 仅检索指定文档路径下的 chunk(精确路径过滤) */
|
|
45
|
+
path: z.string().optional(),
|
|
46
|
+
});
|
|
47
|
+
export function createQueryDocsTool() {
|
|
48
|
+
const repo = new ChunkRepository();
|
|
49
|
+
const assembler = new ContextAssembler(repo);
|
|
50
|
+
return {
|
|
51
|
+
name: 'query_docs',
|
|
52
|
+
description: '对文档知识库进行语义检索,返回与查询最相关的文档片段(已组装为可直接注入 prompt 的上下文文本)。\n' +
|
|
53
|
+
'使用场景:\n' +
|
|
54
|
+
'- 查找 QA 文档、架构说明、设计决策等非代码知识\n' +
|
|
55
|
+
'- 需要引用具体文档原文回答时\n' +
|
|
56
|
+
'- 回答后请基于返回的 contextText 中的原文进行陈述,不要凭空补充\n' +
|
|
57
|
+
'注意:本工具检索文档 chunk,代码符号请使用 search_symbols / recommend_component。',
|
|
58
|
+
inputSchema: queryDocsInput.shape,
|
|
59
|
+
handler: async (input) => {
|
|
60
|
+
// ── 阶段1:语义检索 ─────────────────────────────────────────────
|
|
61
|
+
const hits = await repo.searchSemantic(input.query, {
|
|
62
|
+
limit: input.limit,
|
|
63
|
+
path: input.path,
|
|
64
|
+
});
|
|
65
|
+
if (hits.length === 0) {
|
|
66
|
+
return {
|
|
67
|
+
content: [
|
|
68
|
+
{
|
|
69
|
+
type: 'text',
|
|
70
|
+
text: JSON.stringify({
|
|
71
|
+
contextText: '',
|
|
72
|
+
sources: [],
|
|
73
|
+
hitCount: 0,
|
|
74
|
+
totalChunks: 0,
|
|
75
|
+
truncated: false,
|
|
76
|
+
message: '未找到相关文档片段,请尝试调整查询或确认文档已建立索引。',
|
|
77
|
+
}),
|
|
78
|
+
},
|
|
79
|
+
],
|
|
80
|
+
};
|
|
81
|
+
}
|
|
82
|
+
// ── 阶段2:邻块扩展 + 去重 + 预算截断 + 文本渲染 ───────────────
|
|
83
|
+
const assembled = await assembler.assemble(hits, {
|
|
84
|
+
maxChars: input.maxChars,
|
|
85
|
+
adjacentRadius: input.adjacentRadius,
|
|
86
|
+
maxChunks: input.maxChunks,
|
|
87
|
+
});
|
|
88
|
+
// sources 供调用方引用来源,避免 LLM 伪造引用。
|
|
89
|
+
const sources = assembled.chunks.map((chunk) => ({
|
|
90
|
+
path: chunk.path,
|
|
91
|
+
title: chunk.title,
|
|
92
|
+
chunkIndex: chunk.chunkIndex,
|
|
93
|
+
chunkCount: chunk.chunkCount,
|
|
94
|
+
similarity: chunk.similarity ?? null,
|
|
95
|
+
summary: chunk.summary ?? null,
|
|
96
|
+
}));
|
|
97
|
+
return {
|
|
98
|
+
content: [
|
|
99
|
+
{
|
|
100
|
+
type: 'text',
|
|
101
|
+
text: JSON.stringify({
|
|
102
|
+
contextText: assembled.contextText,
|
|
103
|
+
sources,
|
|
104
|
+
hitCount: assembled.hitCount,
|
|
105
|
+
totalChunks: assembled.totalChunks,
|
|
106
|
+
truncated: assembled.truncated,
|
|
107
|
+
}),
|
|
108
|
+
},
|
|
109
|
+
],
|
|
110
|
+
};
|
|
111
|
+
},
|
|
112
|
+
};
|
|
113
|
+
}
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import { z } from 'zod';
|
|
2
2
|
import { rankSemanticHits, rankSymbols } from '../services/ranking.js';
|
|
3
3
|
import { isReusableCandidate } from '../services/recommendationService.js';
|
|
4
|
+
import { SEARCH_SCORE_THRESHOLD, SEARCH_TOP_K } from '../config/tuning.js';
|
|
4
5
|
export const searchSymbolsInput = z.object({
|
|
5
6
|
query: z.string().min(1),
|
|
6
7
|
type: z
|
|
@@ -11,8 +12,8 @@ export const searchSymbolsInput = z.object({
|
|
|
11
12
|
semantic: z.boolean().optional().default(false),
|
|
12
13
|
limit: z.number().int().min(1).max(100).optional().default(20),
|
|
13
14
|
});
|
|
14
|
-
const SCORE_THRESHOLD_FOR_FINAL =
|
|
15
|
-
const TOP_K_FOR_FINAL_RESULTS =
|
|
15
|
+
const SCORE_THRESHOLD_FOR_FINAL = SEARCH_SCORE_THRESHOLD; // 综合排序分阈値(语义相似度占50%权重,原始0.5相似度≈综合60.35起)
|
|
16
|
+
const TOP_K_FOR_FINAL_RESULTS = SEARCH_TOP_K; // 结果上限,返回相似度高的,保证数据质量
|
|
16
17
|
function toRankedResult(item) {
|
|
17
18
|
return {
|
|
18
19
|
id: item.symbol.id,
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|