@lorrylurui/code-intelligence-mcp 2.0.4 → 2.0.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +41 -0
- package/dist/config/env.js +9 -0
- package/dist/config/tuning.js +114 -0
- package/dist/db/schema.js +37 -0
- package/dist/index.js +1 -0
- package/dist/indexer/babelParser.js +2 -1
- package/dist/indexer/chunkText.js +164 -0
- package/dist/indexer/embedText.js +2 -2
- package/dist/indexer/indexProject.js +193 -22
- package/dist/indexer/jsAstNormalizer.js +36 -6
- package/dist/prompts/reusableCodeAdvisorPrompt.js +63 -34
- package/dist/repositories/chunkRepository.js +181 -0
- package/dist/repositories/symbolRepository.js +108 -15
- package/dist/server/createServer.js +16 -0
- package/dist/services/contextAssembler.js +150 -0
- package/dist/services/ranking.js +109 -58
- package/dist/services/recommendationService.js +515 -46
- package/dist/services/reindex.js +25 -0
- package/dist/tools/getSymbolDetail.js +2 -1
- package/dist/tools/queryDocs.js +113 -0
- package/dist/tools/recommendComponent.js +86 -10
- package/dist/tools/searchByStructure.js +2 -1
- package/dist/tools/searchSymbols.js +57 -21
- package/dist/types/chunk.js +1 -0
- package/dist/workers/embeddingWorker.js +0 -1
- package/package.json +1 -1
|
@@ -3,7 +3,9 @@
|
|
|
3
3
|
* 同时分析调用关系,填充 meta.callers / meta.callees。
|
|
4
4
|
*/
|
|
5
5
|
import fg from 'fast-glob';
|
|
6
|
-
import
|
|
6
|
+
import * as babelParser from '@babel/parser';
|
|
7
|
+
import * as bt from '@babel/types';
|
|
8
|
+
import { dirname, join, resolve } from 'node:path';
|
|
7
9
|
import { Node, Project } from 'ts-morph';
|
|
8
10
|
import { readFileSync, existsSync } from 'node:fs';
|
|
9
11
|
import { extractInterfaceOrTypeMeta, extractMetaFromCallable, } from './extractMeta.js';
|
|
@@ -12,6 +14,40 @@ import { parseJsFile } from './babelParser.js';
|
|
|
12
14
|
import { isParamPlaceholder } from './paramPlaceholder.js';
|
|
13
15
|
import { computeFileHash, computeSemanticHash } from './tsAstNormalizer.js';
|
|
14
16
|
const CALLERS_LIMIT = 20;
|
|
17
|
+
const BABEL_PLUGINS = [
|
|
18
|
+
'jsx',
|
|
19
|
+
'typescript',
|
|
20
|
+
'classPrivateMethods',
|
|
21
|
+
'classPrivateProperties',
|
|
22
|
+
'decorators-legacy',
|
|
23
|
+
'doExpressions',
|
|
24
|
+
'exportDefaultFrom',
|
|
25
|
+
'functionBind',
|
|
26
|
+
'logicalAssignment',
|
|
27
|
+
'nullishCoalescingOperator',
|
|
28
|
+
'objectRestSpread',
|
|
29
|
+
'optionalChaining',
|
|
30
|
+
'optionalCatchBinding',
|
|
31
|
+
];
|
|
32
|
+
function isCallerDebugEnabled() {
|
|
33
|
+
return /^(1|true|yes|on)$/i.test(process.env.DEBUG_CALLERS ?? '');
|
|
34
|
+
}
|
|
35
|
+
function getCallerDebugMatch() {
|
|
36
|
+
return (process.env.DEBUG_CALLERS_MATCH ?? '').trim().toLowerCase();
|
|
37
|
+
}
|
|
38
|
+
function shouldLogCallerDebug(parts) {
|
|
39
|
+
if (!isCallerDebugEnabled())
|
|
40
|
+
return false;
|
|
41
|
+
const match = getCallerDebugMatch();
|
|
42
|
+
if (!match)
|
|
43
|
+
return true;
|
|
44
|
+
return parts.some((part) => part?.toLowerCase().includes(match));
|
|
45
|
+
}
|
|
46
|
+
function logCallerDebug(stage, payload, parts) {
|
|
47
|
+
if (!shouldLogCallerDebug(parts))
|
|
48
|
+
return;
|
|
49
|
+
console.error(`[callers.debug] ${stage} ${JSON.stringify(payload)}`);
|
|
50
|
+
}
|
|
15
51
|
/** 判断文件类型 */
|
|
16
52
|
function isJsFile(filePath) {
|
|
17
53
|
return filePath.endsWith('.js') || filePath.endsWith('.jsx');
|
|
@@ -30,9 +66,9 @@ function mergeCallableMeta(symbolType, raw) {
|
|
|
30
66
|
const paramTypeFields = raw.paramTypeFields;
|
|
31
67
|
const { params: _p, paramTypeFields: _f, ...rest } = raw;
|
|
32
68
|
if (symbolType === 'component' && params?.length) {
|
|
33
|
-
const props = [
|
|
34
|
-
|
|
35
|
-
|
|
69
|
+
const props = [...new Set([...(paramTypeFields ?? []), ...params])]
|
|
70
|
+
.filter((name) => name.toLowerCase() !== 'props' && !isParamPlaceholder(name))
|
|
71
|
+
.sort();
|
|
36
72
|
return { ...rest, ...(props.length ? { props } : {}) };
|
|
37
73
|
}
|
|
38
74
|
return { ...rest, ...(params?.length ? { params } : {}) };
|
|
@@ -198,6 +234,142 @@ export const DEFAULT_IGNORE = [
|
|
|
198
234
|
'**/dist-srv/**',
|
|
199
235
|
'**/.turbo/**',
|
|
200
236
|
];
|
|
237
|
+
function resolveImportedSourceFile(currentFilePath, moduleSpecifier) {
|
|
238
|
+
if (!moduleSpecifier.startsWith('.'))
|
|
239
|
+
return null;
|
|
240
|
+
const base = resolve(dirname(currentFilePath), moduleSpecifier);
|
|
241
|
+
const candidates = [
|
|
242
|
+
base,
|
|
243
|
+
`${base}.ts`,
|
|
244
|
+
`${base}.tsx`,
|
|
245
|
+
`${base}.js`,
|
|
246
|
+
`${base}.jsx`,
|
|
247
|
+
join(base, 'index.ts'),
|
|
248
|
+
join(base, 'index.tsx'),
|
|
249
|
+
join(base, 'index.js'),
|
|
250
|
+
join(base, 'index.jsx'),
|
|
251
|
+
];
|
|
252
|
+
for (const candidate of candidates) {
|
|
253
|
+
if (existsSync(candidate))
|
|
254
|
+
return candidate;
|
|
255
|
+
}
|
|
256
|
+
return null;
|
|
257
|
+
}
|
|
258
|
+
function applyRelationMaps(rows, callersMap, calleesMap) {
|
|
259
|
+
for (const row of rows) {
|
|
260
|
+
const key = `${row.name}|${row.path}`;
|
|
261
|
+
const callers = callersMap.get(key);
|
|
262
|
+
const callees = calleesMap.get(key);
|
|
263
|
+
if (callers?.size) {
|
|
264
|
+
row.meta.callers = [...callers].slice(0, CALLERS_LIMIT).map((s) => {
|
|
265
|
+
const [name, ...pathParts] = s.split('|');
|
|
266
|
+
return { name, path: pathParts.join('|') };
|
|
267
|
+
});
|
|
268
|
+
}
|
|
269
|
+
if (callees?.size) {
|
|
270
|
+
row.meta.callees = [...callees].slice(0, CALLERS_LIMIT).map((s) => {
|
|
271
|
+
const [name, ...pathParts] = s.split('|');
|
|
272
|
+
return { name, path: pathParts.join('|') };
|
|
273
|
+
});
|
|
274
|
+
}
|
|
275
|
+
}
|
|
276
|
+
}
|
|
277
|
+
function analyzeJsRelations(jsFiles, symbolMap, projectRoot, rows) {
|
|
278
|
+
const callersMap = new Map();
|
|
279
|
+
const calleesMap = new Map();
|
|
280
|
+
for (const filePath of jsFiles) {
|
|
281
|
+
const relPath = getRelativePathForDisplay(projectRoot, filePath);
|
|
282
|
+
const callerRows = rows.filter((row) => row.path === relPath);
|
|
283
|
+
if (callerRows.length === 0)
|
|
284
|
+
continue;
|
|
285
|
+
logCallerDebug('js-file-start', {
|
|
286
|
+
filePath,
|
|
287
|
+
relPath,
|
|
288
|
+
callerRows: callerRows.map((row) => row.name),
|
|
289
|
+
}, [filePath, relPath, ...callerRows.map((row) => row.name)]);
|
|
290
|
+
let ast;
|
|
291
|
+
try {
|
|
292
|
+
ast = babelParser.parse(readFileSync(filePath, 'utf-8'), {
|
|
293
|
+
sourceType: 'module',
|
|
294
|
+
plugins: [...BABEL_PLUGINS],
|
|
295
|
+
strictMode: false,
|
|
296
|
+
});
|
|
297
|
+
}
|
|
298
|
+
catch (error) {
|
|
299
|
+
console.error(`[analyzeJsRelations] Failed to parse ${filePath}:`, error);
|
|
300
|
+
continue;
|
|
301
|
+
}
|
|
302
|
+
for (const stmt of ast.program.body) {
|
|
303
|
+
if (!bt.isImportDeclaration(stmt))
|
|
304
|
+
continue;
|
|
305
|
+
const moduleSpecifier = stmt.source.value;
|
|
306
|
+
if (typeof moduleSpecifier !== 'string')
|
|
307
|
+
continue;
|
|
308
|
+
const importedFile = resolveImportedSourceFile(filePath, moduleSpecifier);
|
|
309
|
+
logCallerDebug('js-import-resolve', {
|
|
310
|
+
from: relPath,
|
|
311
|
+
moduleSpecifier,
|
|
312
|
+
importedFile,
|
|
313
|
+
}, [relPath, moduleSpecifier, importedFile]);
|
|
314
|
+
if (!importedFile)
|
|
315
|
+
continue;
|
|
316
|
+
const importedRelPath = getRelativePathForDisplay(projectRoot, importedFile);
|
|
317
|
+
const importedNames = stmt.specifiers
|
|
318
|
+
.map((spec) => {
|
|
319
|
+
if (bt.isImportSpecifier(spec)) {
|
|
320
|
+
return bt.isIdentifier(spec.imported)
|
|
321
|
+
? spec.imported.name
|
|
322
|
+
: spec.imported.value;
|
|
323
|
+
}
|
|
324
|
+
if (bt.isImportDefaultSpecifier(spec)) {
|
|
325
|
+
return spec.local.name;
|
|
326
|
+
}
|
|
327
|
+
return null;
|
|
328
|
+
})
|
|
329
|
+
.filter((name) => Boolean(name));
|
|
330
|
+
logCallerDebug('js-import-specifiers', {
|
|
331
|
+
from: relPath,
|
|
332
|
+
moduleSpecifier,
|
|
333
|
+
importedRelPath,
|
|
334
|
+
importedNames,
|
|
335
|
+
}, [relPath, moduleSpecifier, importedRelPath, ...importedNames]);
|
|
336
|
+
for (const callerRow of callerRows) {
|
|
337
|
+
const callerKey = `${callerRow.name}|${callerRow.path}`;
|
|
338
|
+
for (const importedName of importedNames) {
|
|
339
|
+
const targetKey = `${importedName}|${importedRelPath}`;
|
|
340
|
+
const target = symbolMap.get(targetKey);
|
|
341
|
+
logCallerDebug('js-import-target', {
|
|
342
|
+
callerKey,
|
|
343
|
+
targetKey,
|
|
344
|
+
hit: Boolean(target),
|
|
345
|
+
availableKeysInFile: [...symbolMap.keys()].filter((key) => key.endsWith(`|${importedRelPath}`)),
|
|
346
|
+
}, [callerKey, targetKey, importedRelPath, importedName]);
|
|
347
|
+
if (!target)
|
|
348
|
+
continue;
|
|
349
|
+
const calleeSet = calleesMap.get(callerKey) || new Set();
|
|
350
|
+
calleeSet.add(`${target.name}|${target.path}`);
|
|
351
|
+
calleesMap.set(callerKey, calleeSet);
|
|
352
|
+
const callerSet = callersMap.get(`${target.name}|${target.path}`) ||
|
|
353
|
+
new Set();
|
|
354
|
+
callerSet.add(callerKey);
|
|
355
|
+
callersMap.set(`${target.name}|${target.path}`, callerSet);
|
|
356
|
+
logCallerDebug('js-link-added', {
|
|
357
|
+
callerKey,
|
|
358
|
+
calleeKey: `${target.name}|${target.path}`,
|
|
359
|
+
}, [callerKey, target.name, target.path]);
|
|
360
|
+
}
|
|
361
|
+
}
|
|
362
|
+
}
|
|
363
|
+
}
|
|
364
|
+
applyRelationMaps(rows, callersMap, calleesMap);
|
|
365
|
+
for (const row of rows) {
|
|
366
|
+
logCallerDebug('js-row-final', {
|
|
367
|
+
row: `${row.name}|${row.path}`,
|
|
368
|
+
callers: row.meta.callers ?? [],
|
|
369
|
+
callees: row.meta.callees ?? [],
|
|
370
|
+
}, [row.name, row.path]);
|
|
371
|
+
}
|
|
372
|
+
}
|
|
201
373
|
/**
|
|
202
374
|
* 按 glob 收集文件,用 ts-morph 加载并遍历每个文件的导出,生成全部代码块行。
|
|
203
375
|
* 同时分析调用关系,填充 meta.callers / meta.callees。
|
|
@@ -269,6 +441,10 @@ export async function indexProject(opts) {
|
|
|
269
441
|
});
|
|
270
442
|
}
|
|
271
443
|
symbolMap.get(key).exports.add(exportName);
|
|
444
|
+
logCallerDebug('symbol-map-add-ts', {
|
|
445
|
+
key,
|
|
446
|
+
exportName,
|
|
447
|
+
}, [row.name, row.path, exportName]);
|
|
272
448
|
}
|
|
273
449
|
}
|
|
274
450
|
}
|
|
@@ -291,6 +467,10 @@ export async function indexProject(opts) {
|
|
|
291
467
|
});
|
|
292
468
|
}
|
|
293
469
|
symbolMap.get(key).exports.add(row.name);
|
|
470
|
+
logCallerDebug('symbol-map-add-js', {
|
|
471
|
+
key,
|
|
472
|
+
exportName: row.name,
|
|
473
|
+
}, [row.name, row.path]);
|
|
294
474
|
}
|
|
295
475
|
}
|
|
296
476
|
catch (e) {
|
|
@@ -307,6 +487,9 @@ export async function indexProject(opts) {
|
|
|
307
487
|
project.addSourceFilesAtPaths(tsFiles);
|
|
308
488
|
analyzeRelations(project, symbolMap, projectRoot, out);
|
|
309
489
|
}
|
|
490
|
+
if (jsFiles.length > 0) {
|
|
491
|
+
analyzeJsRelations(jsFiles, symbolMap, projectRoot, out);
|
|
492
|
+
}
|
|
310
493
|
return out;
|
|
311
494
|
}
|
|
312
495
|
/**
|
|
@@ -392,25 +575,13 @@ function analyzeRelations(project, symbolMap, projectRoot, rows) {
|
|
|
392
575
|
}
|
|
393
576
|
});
|
|
394
577
|
}
|
|
395
|
-
|
|
578
|
+
applyRelationMaps(rows, callersMap, calleesMap);
|
|
396
579
|
for (const row of rows) {
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
// ✅ 反序列化回对象
|
|
403
|
-
const [name, ...pathParts] = s.split('|');
|
|
404
|
-
return { name, path: pathParts.join('|') };
|
|
405
|
-
});
|
|
406
|
-
}
|
|
407
|
-
if (callees?.size) {
|
|
408
|
-
row.meta.callees = [...callees].slice(0, CALLERS_LIMIT).map((s) => {
|
|
409
|
-
const [name, ...pathParts] = s.split('|');
|
|
410
|
-
// path:为了防止路径里万一含有 | 字符时截断错误。
|
|
411
|
-
return { name, path: pathParts.join('|') };
|
|
412
|
-
});
|
|
413
|
-
}
|
|
580
|
+
logCallerDebug('ts-row-final', {
|
|
581
|
+
row: `${row.name}|${row.path}`,
|
|
582
|
+
callers: row.meta.callers ?? [],
|
|
583
|
+
callees: row.meta.callees ?? [],
|
|
584
|
+
}, [row.name, row.path]);
|
|
414
585
|
}
|
|
415
586
|
console.error(`[analyzeRelations] processed ${rows.length} symbols`);
|
|
416
587
|
}
|
|
@@ -171,17 +171,32 @@ function traverseNode(node, cb) {
|
|
|
171
171
|
}
|
|
172
172
|
}
|
|
173
173
|
export function inferBehaviorFromJS(node) {
|
|
174
|
-
const behavior =
|
|
174
|
+
const behavior = new Set();
|
|
175
|
+
const add = (label) => {
|
|
176
|
+
behavior.add(label);
|
|
177
|
+
};
|
|
175
178
|
// 遍历 AST
|
|
176
179
|
traverseNode(node, (n) => {
|
|
177
180
|
// fetch / axios
|
|
178
181
|
if (t.isCallExpression(n) && t.isIdentifier(n.callee)) {
|
|
179
182
|
const name = n.callee.name.toLowerCase();
|
|
180
183
|
if (name.includes('fetch') || name.includes('axios')) {
|
|
181
|
-
|
|
184
|
+
add('performs network request');
|
|
182
185
|
}
|
|
183
186
|
if (name.includes('settimeout')) {
|
|
184
|
-
|
|
187
|
+
add('uses timer');
|
|
188
|
+
}
|
|
189
|
+
if (name.includes('getscroll')) {
|
|
190
|
+
add('tracks scroll position');
|
|
191
|
+
}
|
|
192
|
+
if (name.includes('getoffset')) {
|
|
193
|
+
add('computes element offset');
|
|
194
|
+
}
|
|
195
|
+
if (name.includes('throttle')) {
|
|
196
|
+
add('throttles position updates');
|
|
197
|
+
}
|
|
198
|
+
if (name.includes('addeventlistener')) {
|
|
199
|
+
add('listens to viewport events');
|
|
185
200
|
}
|
|
186
201
|
}
|
|
187
202
|
if (t.isCallExpression(n) &&
|
|
@@ -190,17 +205,32 @@ export function inferBehaviorFromJS(node) {
|
|
|
190
205
|
const prop = n.callee.property.name;
|
|
191
206
|
if (prop === 'getBoundingClientRect' ||
|
|
192
207
|
prop === 'getComputedStyle') {
|
|
193
|
-
|
|
208
|
+
add('reads dom layout');
|
|
194
209
|
}
|
|
210
|
+
if (prop === 'addEventListener') {
|
|
211
|
+
add('listens to viewport events');
|
|
212
|
+
}
|
|
213
|
+
}
|
|
214
|
+
if (t.isIdentifier(n)) {
|
|
215
|
+
const name = n.name.toLowerCase();
|
|
216
|
+
if (name === 'offsettop' || name === 'offsetbottom') {
|
|
217
|
+
add('supports offset positioning');
|
|
218
|
+
}
|
|
219
|
+
if (name === 'scrolltop') {
|
|
220
|
+
add('tracks scroll position');
|
|
221
|
+
}
|
|
222
|
+
}
|
|
223
|
+
if (t.isStringLiteral(n) && n.value.toLowerCase() === 'fixed') {
|
|
224
|
+
add('uses fixed positioning');
|
|
195
225
|
}
|
|
196
226
|
// localStorage
|
|
197
227
|
if (t.isMemberExpression(n) &&
|
|
198
228
|
t.isIdentifier(n.object) &&
|
|
199
229
|
n.object.name === 'localStorage') {
|
|
200
|
-
|
|
230
|
+
add('uses local storage');
|
|
201
231
|
}
|
|
202
232
|
});
|
|
203
|
-
return
|
|
233
|
+
return [...behavior].sort();
|
|
204
234
|
}
|
|
205
235
|
export function extractNormalizedSignatureJS(node) {
|
|
206
236
|
if (t.isFunctionDeclaration(node) || t.isFunctionExpression(node)) {
|
|
@@ -3,22 +3,30 @@ const REUSABLE_CODE_ADVISOR_DESCRIPTION = '在实现需求时检索并推荐最
|
|
|
3
3
|
/** 与 SKILL.md 中 `# 可复用代码推荐` 起至约束、示例说明为止的正文一致(无 YAML frontmatter)。 */
|
|
4
4
|
const REUSABLE_CODE_ADVISOR_MARKDOWN = `# 可复用代码推荐
|
|
5
5
|
|
|
6
|
-
##
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
6
|
+
## 工作流(三级降级链,严格按序,每级有结果即终止)
|
|
7
|
+
|
|
8
|
+
### 第一级:recommend_component(唯一首选)
|
|
9
|
+
1. 用户询问可复用组件/函数/util/工具 → 必须先调用 \`recommend_component\`
|
|
10
|
+
2. 返回 recommended != null → 将工具返回文本**原样输出**,**完全停止**,不得继续调用任何工具
|
|
11
|
+
3. 返回 recommended = null → 进入第二级
|
|
12
|
+
|
|
13
|
+
### 第二级:search_symbols(仅在第一级无结果时)
|
|
14
|
+
4. 调用 \`search_symbols\`(semantic=true,传入原始 query)
|
|
15
|
+
5. 返回非空结果 → 取第一条,按固定回复结构输出,**完全停止**
|
|
16
|
+
6. 返回空结果 → 进入第三级
|
|
17
|
+
|
|
18
|
+
### 第三级:输出无结果模板
|
|
19
|
+
7. 直接输出无结果固定模板,**完全停止**
|
|
20
|
+
8. 禁止进行 grep、read file、file search 等文件系统操作
|
|
21
|
+
|
|
22
|
+
## 硬性约束
|
|
23
|
+
|
|
24
|
+
- **不得跳过第一级直接调用 search_symbols**:对任何"帮我找 X""有没有 X"类问题,第一步永远是 \`recommend_component\`
|
|
25
|
+
- **工具返回后不得自由发挥**:输出必须以工具返回结果为唯一事实来源,按模板格式输出,禁止改写为散文或追加额外检索过程
|
|
26
|
+
- **禁止文件系统兜底**:MCP 三级链全部无结果后,只输出无结果模板,不读文件、不 grep
|
|
27
|
+
- **禁止工作区外路径**:禁止引用或读取工作区外文件(如 \`/Users/.../not_git_private/...\`)
|
|
28
|
+
- **禁止过程叙述**:不得输出"我先检索""Ran search_symbols""Read file"等过程描述
|
|
29
|
+
- 用户选择"采纳推荐"后,立即调用 \`inc_usage\`(symbolId 从结果 id 字段获取)
|
|
22
30
|
|
|
23
31
|
## 不适用场景
|
|
24
32
|
|
|
@@ -30,32 +38,53 @@ const REUSABLE_CODE_ADVISOR_MARKDOWN = `# 可复用代码推荐
|
|
|
30
38
|
## 搜索结果判断
|
|
31
39
|
|
|
32
40
|
根据 semanticSimilarity 决定推荐置信度:
|
|
33
|
-
- **> 0.85
|
|
34
|
-
- **0.6 – 0.85**:中等置信度,需结合 description
|
|
35
|
-
- **< 0.6
|
|
41
|
+
- **> 0.85**:高置信度,直接推荐
|
|
42
|
+
- **0.6 – 0.85**:中等置信度,需结合 description 综合判断
|
|
43
|
+
- **< 0.6**:低置信度,明确告知用户可能无合适实现
|
|
36
44
|
- **空结果**:明确说"未找到已有实现",不要凭空推荐
|
|
37
45
|
|
|
38
|
-
##
|
|
46
|
+
## 回复结构(固定模板,不得改写)
|
|
47
|
+
|
|
48
|
+
> ⚠️ 以下模板中的所有字段值(symbolId、使用范围、副作用、理由等)**均由 \`recommend_component\` 工具返回文本中已填好**,禁止自行推断或从代码文件中读取。LLM 只需将工具返回的文本**原样复制输出**,不得改写、补全或省略任何字段。
|
|
39
49
|
|
|
40
|
-
|
|
50
|
+
有结果时:
|
|
41
51
|
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
52
|
+
首选:<符号名> — <文件路径>
|
|
53
|
+
使用范围:<callers 或 "新增">
|
|
54
|
+
副作用:<sideEffects 或 "无">
|
|
55
|
+
理由:
|
|
56
|
+
1. <要点1>
|
|
57
|
+
2. <要点2>
|
|
58
|
+
其他候选:<候选A(副作用)>; <候选B(副作用)>
|
|
59
|
+
用法提示:
|
|
60
|
+
<最小集成示例>
|
|
61
|
+
是否采纳:
|
|
62
|
+
1. 采纳推荐
|
|
63
|
+
2. 取消
|
|
49
64
|
|
|
50
|
-
|
|
65
|
+
> 输出上述模板后**等待用户在聊天框输入回复**,识别规则:
|
|
66
|
+
> - 用户输入 **"1"、"采纳"、"采纳推荐"、"ok"、"好的"** 或类似确认词 → 从上方输出文本中读取 \`symbolId:<id>\` 那一行的值,立即调用 \`inc_usage\` 工具传入该 id,调用成功后回复"✓ 已记录使用,可直接集成"
|
|
67
|
+
> - 用户输入 **"2"、"取消"、"不用了"** 或类似否定词 → 回复"好的,已取消",停止
|
|
68
|
+
> - 用户输入其他内容(如追问细节)→ 正常回答,回答结束后再次展示"是否采纳"选项
|
|
51
69
|
|
|
52
|
-
|
|
53
|
-
- 若无合适代码块,明确说明,并给出最接近的选项及差距。
|
|
54
|
-
- 推理简洁,面向落地实现。
|
|
70
|
+
无结果时:
|
|
55
71
|
|
|
56
|
-
|
|
72
|
+
首选:未找到已有实现
|
|
73
|
+
使用范围:无
|
|
74
|
+
副作用:无
|
|
75
|
+
理由:
|
|
76
|
+
1. 当前索引中没有满足条件的符号
|
|
77
|
+
2. 已尝试可用检索方式,仍无可用候选
|
|
78
|
+
其他候选:无
|
|
79
|
+
用法提示:
|
|
80
|
+
// 可先创建一个最小可复用实现
|
|
81
|
+
是否采纳:
|
|
82
|
+
1. 让我新建一个最小可复用实现
|
|
83
|
+
2. 取消
|
|
57
84
|
|
|
58
|
-
|
|
85
|
+
> 输出上述模板后**等待用户在聊天框输入回复**,识别规则:
|
|
86
|
+
> - 用户输入 **"1"、"新建"、"帮我创建"** 或类似确认词 → 进入新建流程,引导用户确认最小接口设计
|
|
87
|
+
> - 用户输入 **"2"、"取消"、"不用了"** → 回复"好的,已取消",停止
|
|
59
88
|
`;
|
|
60
89
|
export function registerReusableCodeAdvisorPrompt(server) {
|
|
61
90
|
server.prompt('reusable-code-advisor', REUSABLE_CODE_ADVISOR_DESCRIPTION, {
|
|
@@ -0,0 +1,181 @@
|
|
|
1
|
+
import { env } from '../config/env.js';
|
|
2
|
+
import { CHUNK_SIMILARITY_THRESHOLD, CHUNK_TOP_K } from '../config/tuning.js';
|
|
3
|
+
import { SEARCHABLE_STATUS, SYMBOL_STATUS } from '../config/symbolStatus.js';
|
|
4
|
+
import { getPool } from '../db/postgres.js';
|
|
5
|
+
import { getAllChunkTableSQLs } from '../db/schema.js';
|
|
6
|
+
import { buildDocumentChunks } from '../indexer/chunkText.js';
|
|
7
|
+
import { createEmbeddingClient, embedAll, } from '../services/embeddingClient.js';
|
|
8
|
+
// 统一解析 pgvector 返回值,兼容字符串格式与数组格式。
|
|
9
|
+
function parseEmbedding(raw) {
|
|
10
|
+
if (raw == null)
|
|
11
|
+
return null;
|
|
12
|
+
if (Array.isArray(raw)) {
|
|
13
|
+
const nums = raw.map((item) => Number(item)).filter(Number.isFinite);
|
|
14
|
+
return nums.length === raw.length ? nums : null;
|
|
15
|
+
}
|
|
16
|
+
if (typeof raw === 'string') {
|
|
17
|
+
try {
|
|
18
|
+
const parsed = JSON.parse(raw);
|
|
19
|
+
return parseEmbedding(parsed);
|
|
20
|
+
}
|
|
21
|
+
catch {
|
|
22
|
+
return null;
|
|
23
|
+
}
|
|
24
|
+
}
|
|
25
|
+
return null;
|
|
26
|
+
}
|
|
27
|
+
// 将数据库行映射为业务层 chunk 对象。
|
|
28
|
+
function toStoredChunk(row) {
|
|
29
|
+
return {
|
|
30
|
+
id: row.id,
|
|
31
|
+
sourceId: row.source_id,
|
|
32
|
+
title: row.title,
|
|
33
|
+
path: row.path,
|
|
34
|
+
chunkIndex: row.chunk_index,
|
|
35
|
+
chunkCount: row.chunk_count,
|
|
36
|
+
content: row.content,
|
|
37
|
+
summary: row.summary,
|
|
38
|
+
category: row.category,
|
|
39
|
+
meta: row.meta ? JSON.parse(row.meta) : null,
|
|
40
|
+
semanticHash: row.semantic_hash,
|
|
41
|
+
embedding: parseEmbedding(row.embedding),
|
|
42
|
+
similarity: row.similarity ? Number(row.similarity) : undefined,
|
|
43
|
+
createdAt: row.created_at ?? null,
|
|
44
|
+
updatedAt: row.updated_at ?? null,
|
|
45
|
+
};
|
|
46
|
+
}
|
|
47
|
+
// 保留标题/路径/摘要:chunk截取/正文:完整chunk信息
|
|
48
|
+
function chunkToEmbeddingText(chunk) {
|
|
49
|
+
return [chunk.title, chunk.path, chunk.summary ?? '', chunk.content]
|
|
50
|
+
.filter(Boolean)
|
|
51
|
+
.join('\n');
|
|
52
|
+
}
|
|
53
|
+
export class ChunkRepository {
|
|
54
|
+
pool;
|
|
55
|
+
constructor() {
|
|
56
|
+
this.pool = getPool();
|
|
57
|
+
}
|
|
58
|
+
// 确保 chunk 表和索引存在,便于独立运行写入流程。
|
|
59
|
+
async ensureSchema() {
|
|
60
|
+
if (!this.pool)
|
|
61
|
+
return;
|
|
62
|
+
for (const sql of getAllChunkTableSQLs()) {
|
|
63
|
+
await this.pool.query(sql);
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
async upsertDocument(document, options = {}) {
|
|
67
|
+
if (!this.pool)
|
|
68
|
+
return [];
|
|
69
|
+
await this.ensureSchema();
|
|
70
|
+
// 先做语义切分,再加 overlap,得到一个文档的 chunk 列表。
|
|
71
|
+
const built = buildDocumentChunks(document, options);
|
|
72
|
+
if (built.length === 0)
|
|
73
|
+
return [];
|
|
74
|
+
let embeddings = built.map(() => null);
|
|
75
|
+
if (env.embeddingServiceUrl) {
|
|
76
|
+
// 批量 embedding,减少网络往返和 API 调用开销。
|
|
77
|
+
const client = createEmbeddingClient(env.embeddingServiceUrl);
|
|
78
|
+
embeddings = await embedAll(client, built.map(chunkToEmbeddingText));
|
|
79
|
+
}
|
|
80
|
+
const db = await this.pool.connect();
|
|
81
|
+
try {
|
|
82
|
+
await db.query('BEGIN');
|
|
83
|
+
// 先删旧版本再写新版本,避免同 path 的历史 chunk 混入召回。
|
|
84
|
+
const existing = await db.query(`SELECT id FROM ${env.chunksTable} WHERE path = $1`, [document.path]);
|
|
85
|
+
if (existing.rowCount && existing.rowCount > 0) {
|
|
86
|
+
await db.query(`DELETE FROM ${env.chunksTable} WHERE path = $1`, [document.path]);
|
|
87
|
+
}
|
|
88
|
+
const sql = `
|
|
89
|
+
INSERT INTO ${env.chunksTable}
|
|
90
|
+
(source_id, title, path, chunk_index, chunk_count, content, summary, category, meta,
|
|
91
|
+
embedding, semantic_hash, status)
|
|
92
|
+
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9::jsonb, $10::vector, $11, $12)
|
|
93
|
+
RETURNING id, source_id, title, path, chunk_index, chunk_count, content, summary, category,
|
|
94
|
+
meta::text AS meta, embedding, semantic_hash, status, created_at, updated_at
|
|
95
|
+
`;
|
|
96
|
+
const inserted = [];
|
|
97
|
+
for (let index = 0; index < built.length; index += 1) {
|
|
98
|
+
const chunk = built[index];
|
|
99
|
+
const embedding = embeddings[index];
|
|
100
|
+
const vecLiteral = Array.isArray(embedding)
|
|
101
|
+
? `[${embedding.join(',')}]`
|
|
102
|
+
: null;
|
|
103
|
+
// 无向量时写为 pending,后续可以复用 worker 补齐向量。
|
|
104
|
+
const { rows } = await db.query(sql, [
|
|
105
|
+
chunk.sourceId,
|
|
106
|
+
chunk.title,
|
|
107
|
+
chunk.path,
|
|
108
|
+
chunk.chunkIndex,
|
|
109
|
+
chunk.chunkCount,
|
|
110
|
+
chunk.content,
|
|
111
|
+
chunk.summary,
|
|
112
|
+
chunk.category,
|
|
113
|
+
JSON.stringify(chunk.meta),
|
|
114
|
+
vecLiteral,
|
|
115
|
+
chunk.semanticHash,
|
|
116
|
+
vecLiteral ? SYMBOL_STATUS.ONLINE : SYMBOL_STATUS.PENDING,
|
|
117
|
+
]);
|
|
118
|
+
inserted.push(toStoredChunk(rows[0]));
|
|
119
|
+
}
|
|
120
|
+
await db.query('COMMIT');
|
|
121
|
+
return inserted;
|
|
122
|
+
}
|
|
123
|
+
catch (error) {
|
|
124
|
+
await db.query('ROLLBACK');
|
|
125
|
+
throw error;
|
|
126
|
+
}
|
|
127
|
+
finally {
|
|
128
|
+
db.release();
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
async searchSemantic(query, opts) {
|
|
132
|
+
if (!env.embeddingServiceUrl) {
|
|
133
|
+
throw new Error('语义 chunk 检索需配置 EMBEDDING_SERVICE_URL');
|
|
134
|
+
}
|
|
135
|
+
if (!this.pool)
|
|
136
|
+
return [];
|
|
137
|
+
const limit = opts?.limit ?? CHUNK_TOP_K;
|
|
138
|
+
const client = createEmbeddingClient(env.embeddingServiceUrl);
|
|
139
|
+
// 查询先向量化,再在数据库中用 pgvector 做相似度排序。
|
|
140
|
+
const [queryVec] = await client.embed([query.trim()]);
|
|
141
|
+
if (!queryVec?.length) {
|
|
142
|
+
throw new Error('查询向量为空');
|
|
143
|
+
}
|
|
144
|
+
const params = [
|
|
145
|
+
`[${queryVec.join(',')}]`,
|
|
146
|
+
SEARCHABLE_STATUS,
|
|
147
|
+
];
|
|
148
|
+
let sql = `
|
|
149
|
+
SELECT id, source_id, title, path, chunk_index, chunk_count, content, summary, category,
|
|
150
|
+
meta::text AS meta, embedding, semantic_hash, status, created_at, updated_at,
|
|
151
|
+
1 - (embedding <=> $1::vector) AS similarity
|
|
152
|
+
FROM ${env.chunksTable}
|
|
153
|
+
WHERE embedding IS NOT NULL
|
|
154
|
+
AND status = $2
|
|
155
|
+
`;
|
|
156
|
+
if (opts?.path) {
|
|
157
|
+
params.push(opts.path);
|
|
158
|
+
sql += ` AND path = $${params.length}`;
|
|
159
|
+
}
|
|
160
|
+
params.push(limit * 2);
|
|
161
|
+
sql += ` ORDER BY embedding <=> $1::vector LIMIT $${params.length}`;
|
|
162
|
+
const { rows } = await this.pool.query(sql, params);
|
|
163
|
+
return rows
|
|
164
|
+
.map(toStoredChunk)
|
|
165
|
+
.filter((chunk) => (chunk.similarity ?? 0) >= CHUNK_SIMILARITY_THRESHOLD)
|
|
166
|
+
.slice(0, limit);
|
|
167
|
+
}
|
|
168
|
+
// 命中 chunk 后取前后邻块,提升回答时上下文完整性。
|
|
169
|
+
async getAdjacentChunks(path, chunkIndex, radius = 1) {
|
|
170
|
+
if (!this.pool)
|
|
171
|
+
return [];
|
|
172
|
+
const { rows } = await this.pool.query(`
|
|
173
|
+
SELECT id, source_id, title, path, chunk_index, chunk_count, content, summary, category,
|
|
174
|
+
meta::text AS meta, embedding, semantic_hash, status, created_at, updated_at
|
|
175
|
+
FROM ${env.chunksTable}
|
|
176
|
+
WHERE path = $1 AND chunk_index BETWEEN $2 AND $3
|
|
177
|
+
ORDER BY chunk_index ASC
|
|
178
|
+
`, [path, Math.max(0, chunkIndex - radius), chunkIndex + radius]);
|
|
179
|
+
return rows.map(toStoredChunk);
|
|
180
|
+
}
|
|
181
|
+
}
|