@lorrylurui/code-intelligence-mcp 1.1.15 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +74 -3
- package/dist/cli/ci-index-cli.js +66 -0
- package/dist/cli/ci-index.js +80 -0
- package/dist/cli/detect-duplicates.js +1 -6
- package/dist/cli/embedding-worker-cli.js +35 -0
- package/dist/cli/index-codebase.js +5 -4
- package/dist/config/env.js +3 -6
- package/dist/config/symbolStatus.js +8 -0
- package/dist/db/mysql.js +3 -6
- package/dist/db/schema.js +9 -2
- package/dist/indexer/astNormalizer.js +201 -0
- package/dist/indexer/babelParser.js +40 -15
- package/dist/indexer/categoryClassifier.js +129 -0
- package/dist/indexer/embedText.js +9 -7
- package/dist/indexer/heuristics.js +42 -23
- package/dist/indexer/indexProject.js +145 -55
- package/dist/indexer/jsAstNormalizer.js +201 -0
- package/dist/indexer/persistSymbols.js +7 -3
- package/dist/indexer/tsAstNormalizer.js +363 -0
- package/dist/prompts/reusableCodeAdvisorPrompt.js +6 -3
- package/dist/repositories/symbolRepository.js +81 -7
- package/dist/services/embeddingQueue.js +56 -0
- package/dist/services/reindex.js +12 -9
- package/dist/tools/searchByStructure.js +3 -1
- package/dist/tools/searchSymbols.js +14 -3
- package/dist/workers/embeddingWorker.js +100 -0
- package/package.json +7 -4
|
@@ -3,7 +3,9 @@
|
|
|
3
3
|
*/
|
|
4
4
|
import * as babelParser from '@babel/parser';
|
|
5
5
|
import * as bt from '@babel/types';
|
|
6
|
-
import { getRelativePathForDisplay, inferCategoryFromPath, } from './heuristics.js';
|
|
6
|
+
import { getRelativePathForDisplay, inferCategoryFromPath, inferCategoryFromName, } from './heuristics.js';
|
|
7
|
+
import { computeFileHash } from './tsAstNormalizer.js';
|
|
8
|
+
import { computeSemanticHashJs } from './jsAstNormalizer.js';
|
|
7
9
|
/** 从 JS 文件内容解析导出的代码块 */
|
|
8
10
|
export function parseJsFile(filePath, content, projectRoot) {
|
|
9
11
|
const out = [];
|
|
@@ -46,7 +48,23 @@ export function parseJsFile(filePath, content, projectRoot) {
|
|
|
46
48
|
out.push(...rows);
|
|
47
49
|
}
|
|
48
50
|
}
|
|
49
|
-
|
|
51
|
+
// 第三轮:更新content、file_hash、semantic_hash 字段
|
|
52
|
+
const outWithTwoHash = out.map((o) => {
|
|
53
|
+
const [semantic_hash, stableStr] = computeSemanticHashJs(o);
|
|
54
|
+
const { name, type, description, meta, category, path } = o;
|
|
55
|
+
return {
|
|
56
|
+
name,
|
|
57
|
+
type,
|
|
58
|
+
category,
|
|
59
|
+
path,
|
|
60
|
+
description,
|
|
61
|
+
meta,
|
|
62
|
+
content: stableStr,
|
|
63
|
+
file_hash: computeFileHash(content),
|
|
64
|
+
semantic_hash,
|
|
65
|
+
};
|
|
66
|
+
});
|
|
67
|
+
return outWithTwoHash;
|
|
50
68
|
}
|
|
51
69
|
/** 处理导出的声明 */
|
|
52
70
|
function processStatement(stmt, filePath, isJsx, projectRoot) {
|
|
@@ -216,21 +234,19 @@ function arrowToFunction(name, arrow) {
|
|
|
216
234
|
}
|
|
217
235
|
function createRowFromFunction(name, decl, filePath, projectRoot, isJsx) {
|
|
218
236
|
const relPath = getRelativePathForDisplay(projectRoot, filePath);
|
|
219
|
-
const category = inferCategoryFromPath(filePath);
|
|
237
|
+
const category = inferCategoryFromPath(filePath) || inferCategoryFromName(name);
|
|
220
238
|
// 检测是否有 JSX
|
|
221
239
|
const hasJsx = isJsx || containsJsx(decl);
|
|
222
240
|
// 判断类型:
|
|
223
241
|
// 1. 有 JSX = component
|
|
224
|
-
// 2. 名字包含
|
|
242
|
+
// 2. 名字包含 use = hook
|
|
225
243
|
// 3. 大写开头(JSX 组件约定)= component
|
|
226
|
-
// 4. 其他 =
|
|
227
|
-
const type =
|
|
228
|
-
? '
|
|
229
|
-
: name
|
|
230
|
-
? '
|
|
231
|
-
:
|
|
232
|
-
? 'component'
|
|
233
|
-
: 'util';
|
|
244
|
+
// 4. 其他 = function
|
|
245
|
+
const type = name.toLowerCase().includes('use')
|
|
246
|
+
? 'hook'
|
|
247
|
+
: isJsx || /^[A-Z]/.test(name)
|
|
248
|
+
? 'component'
|
|
249
|
+
: 'function';
|
|
234
250
|
const params = decl.params
|
|
235
251
|
.filter((p) => bt.isIdentifier(p))
|
|
236
252
|
.map((p) => p.name);
|
|
@@ -250,21 +266,28 @@ function createRowFromFunction(name, decl, filePath, projectRoot, isJsx) {
|
|
|
250
266
|
...(hooks.length ? { hooks } : {}),
|
|
251
267
|
...(sideEffects.length ? { sideEffects } : {}),
|
|
252
268
|
},
|
|
269
|
+
file_hash: '',
|
|
270
|
+
semantic_hash: '',
|
|
271
|
+
node: decl,
|
|
253
272
|
};
|
|
254
273
|
}
|
|
255
274
|
function createRowFromClass(name, _decl, filePath, projectRoot) {
|
|
256
275
|
const relPath = getRelativePathForDisplay(projectRoot, filePath);
|
|
257
276
|
const category = inferCategoryFromPath(filePath);
|
|
258
277
|
// 大写开头的类视为组件
|
|
259
|
-
const type = /^[A-Z]/.test(name) ? 'component' : '
|
|
278
|
+
const type = /^[A-Z]/.test(name) ? 'component' : 'function';
|
|
260
279
|
return {
|
|
261
280
|
name,
|
|
262
281
|
type,
|
|
263
282
|
category,
|
|
264
283
|
path: relPath,
|
|
265
284
|
description: null,
|
|
285
|
+
// content meta.kind 暂时废弃不用,
|
|
266
286
|
content: null,
|
|
267
|
-
meta: {
|
|
287
|
+
meta: {},
|
|
288
|
+
file_hash: '',
|
|
289
|
+
semantic_hash: '',
|
|
290
|
+
node: _decl,
|
|
268
291
|
};
|
|
269
292
|
}
|
|
270
293
|
/** 简单检测是否包含 JSX */
|
|
@@ -380,7 +403,9 @@ function getNodeText(n) {
|
|
|
380
403
|
if (propNode && typeof propNode === 'object') {
|
|
381
404
|
const propType = propNode.type;
|
|
382
405
|
if (propType === 'Identifier') {
|
|
383
|
-
prop =
|
|
406
|
+
prop =
|
|
407
|
+
propNode.name ||
|
|
408
|
+
'';
|
|
384
409
|
}
|
|
385
410
|
else if (propType === 'Literal') {
|
|
386
411
|
prop = String(propNode.value ?? '');
|
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* category 分类器:三层融合(规则 + embedding + LLM)
|
|
3
|
+
*/
|
|
4
|
+
import { createHash } from 'node:crypto';
|
|
5
|
+
import { env } from '../config/env.js';
|
|
6
|
+
import { createEmbeddingClient } from '../services/embeddingClient.js';
|
|
7
|
+
import { cosineSimilarity } from '../services/vectorMath.js';
|
|
8
|
+
import { inferCategoryFromPath, inferCategoryFromName } from './heuristics.js';
|
|
9
|
+
// 1. 定义 category label space
|
|
10
|
+
const CATEGORIES = [
|
|
11
|
+
'network',
|
|
12
|
+
'form',
|
|
13
|
+
'validation',
|
|
14
|
+
'format',
|
|
15
|
+
'state',
|
|
16
|
+
'ui',
|
|
17
|
+
'storage',
|
|
18
|
+
'math',
|
|
19
|
+
'hook',
|
|
20
|
+
'utils',
|
|
21
|
+
'component',
|
|
22
|
+
'service',
|
|
23
|
+
];
|
|
24
|
+
// 2. 预计算 category embeddings(缓存)
|
|
25
|
+
let categoryEmbeddingsCache = null;
|
|
26
|
+
export async function initCategoryEmbeddings() {
|
|
27
|
+
if (!env.embeddingServiceUrl)
|
|
28
|
+
return;
|
|
29
|
+
const client = createEmbeddingClient(env.embeddingServiceUrl);
|
|
30
|
+
const embeddings = await Promise.all(CATEGORIES.map(async (c) => {
|
|
31
|
+
const [vec] = await client.embed([c]);
|
|
32
|
+
return { name: c, embedding: vec };
|
|
33
|
+
}));
|
|
34
|
+
categoryEmbeddingsCache = embeddings;
|
|
35
|
+
}
|
|
36
|
+
// 3. embedding 层
|
|
37
|
+
const EMBEDDING_THRESHOLD = 0.5;
|
|
38
|
+
// TODO: 这里有问题,embedding是语义模板向量,categoryEmbeddingsCache是单个词的向量,相似度必然是<0.3
|
|
39
|
+
function categoryFromEmbedding(embedding) {
|
|
40
|
+
if (!categoryEmbeddingsCache)
|
|
41
|
+
return null;
|
|
42
|
+
let best = 'utils';
|
|
43
|
+
let maxScore = -Infinity;
|
|
44
|
+
for (const c of categoryEmbeddingsCache) {
|
|
45
|
+
const score = cosineSimilarity(embedding, c.embedding);
|
|
46
|
+
if (score > maxScore) {
|
|
47
|
+
maxScore = score;
|
|
48
|
+
best = c.name;
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
return maxScore < EMBEDDING_THRESHOLD ? null : best;
|
|
52
|
+
}
|
|
53
|
+
// 4. LLM 层(带缓存)
|
|
54
|
+
const LLM_CACHE_TTL = 24 * 60 * 60 * 1000;
|
|
55
|
+
const OLLAMA_URL = 'http://127.0.0.1:11434/v1/chat/completions';
|
|
56
|
+
// 可根据本地实际情况调整模型名称,例如:'llama3.2:3b'、'llama3.1:8b'
|
|
57
|
+
const OLLAMA_MODEL = 'llama3.2:3b';
|
|
58
|
+
const llmCategoryCache = new Map();
|
|
59
|
+
async function categoryFromLLM(stableStr) {
|
|
60
|
+
if (!stableStr)
|
|
61
|
+
return null;
|
|
62
|
+
const cacheKey = createHash('sha256')
|
|
63
|
+
.update(stableStr)
|
|
64
|
+
.digest('hex')
|
|
65
|
+
.slice(0, 16);
|
|
66
|
+
const cached = llmCategoryCache.get(cacheKey);
|
|
67
|
+
if (cached && Date.now() - cached.timestamp < LLM_CACHE_TTL)
|
|
68
|
+
return cached.category;
|
|
69
|
+
const prompt = `给一段代码语义说明,在下面列出来的所有类别中找到最相符的类别,请直接返回命中的原始字符串,没有命中不用返回。
|
|
70
|
+
代码语义:${stableStr}
|
|
71
|
+
类别合集: ${CATEGORIES.join(', ')}`;
|
|
72
|
+
try {
|
|
73
|
+
const body = {
|
|
74
|
+
model: OLLAMA_MODEL,
|
|
75
|
+
messages: [{ role: 'user', content: prompt }],
|
|
76
|
+
temperature: 0.4,
|
|
77
|
+
stream: false,
|
|
78
|
+
};
|
|
79
|
+
const res = await fetch(OLLAMA_URL, {
|
|
80
|
+
method: 'POST',
|
|
81
|
+
headers: { 'Content-Type': 'application/json' },
|
|
82
|
+
body: JSON.stringify(body),
|
|
83
|
+
});
|
|
84
|
+
const data = (await res.json());
|
|
85
|
+
const content = data.choices?.[0]?.message?.content
|
|
86
|
+
?.trim()
|
|
87
|
+
?.toLowerCase();
|
|
88
|
+
const category = CATEGORIES.find((c) => content.includes(c));
|
|
89
|
+
if (category) {
|
|
90
|
+
llmCategoryCache.set(cacheKey, { category, timestamp: Date.now() });
|
|
91
|
+
return category;
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
catch (e) {
|
|
95
|
+
console.error('[categoryFromLLM]', e);
|
|
96
|
+
}
|
|
97
|
+
return null;
|
|
98
|
+
}
|
|
99
|
+
// 5. 三层融合
|
|
100
|
+
export async function resolveCategory(rows, vecs) {
|
|
101
|
+
const pros = rows.map(async (r, i) => {
|
|
102
|
+
const { name } = r;
|
|
103
|
+
const ruleCategory = inferCategoryFromPath(r.path) || inferCategoryFromName(name);
|
|
104
|
+
console.error(`===from ruleCategory`, name, ruleCategory);
|
|
105
|
+
if (ruleCategory) {
|
|
106
|
+
return {
|
|
107
|
+
...r,
|
|
108
|
+
category: ruleCategory,
|
|
109
|
+
};
|
|
110
|
+
}
|
|
111
|
+
// TODO: 这里有问题,embedding是语义模板向量,categoryEmbeddingsCache是单个词的向量,相似度必然是<0.3
|
|
112
|
+
const emd = categoryFromEmbedding(vecs[i]);
|
|
113
|
+
console.error(`===from categoryFromEmbedding`, name, emd);
|
|
114
|
+
if (emd) {
|
|
115
|
+
return {
|
|
116
|
+
...r,
|
|
117
|
+
category: emd,
|
|
118
|
+
};
|
|
119
|
+
}
|
|
120
|
+
const cateLlm = await categoryFromLLM(r.content);
|
|
121
|
+
console.error(`===from categoryFromLLM`, name, cateLlm);
|
|
122
|
+
return {
|
|
123
|
+
...r,
|
|
124
|
+
category: cateLlm,
|
|
125
|
+
};
|
|
126
|
+
});
|
|
127
|
+
const newRows = await Promise.all(pros);
|
|
128
|
+
return newRows;
|
|
129
|
+
}
|
|
@@ -1,15 +1,17 @@
|
|
|
1
|
+
// 仅js类型使用,后续会删掉
|
|
2
|
+
const MAX_CONTENT_LENGTH = 1200;
|
|
1
3
|
function briefMeta(meta) {
|
|
2
|
-
const keys = [
|
|
4
|
+
const keys = ['props', 'params', 'properties', 'hooks'];
|
|
3
5
|
const parts = [];
|
|
4
6
|
for (const k of keys) {
|
|
5
7
|
const v = meta[k];
|
|
6
8
|
if (Array.isArray(v)) {
|
|
7
|
-
const strs = v.filter((x) => typeof x ===
|
|
9
|
+
const strs = v.filter((x) => typeof x === 'string');
|
|
8
10
|
if (strs.length)
|
|
9
|
-
parts.push(`${k}: ${strs.slice(0, 24).join(
|
|
11
|
+
parts.push(`${k}: ${strs.slice(0, 24).join(', ')}`);
|
|
10
12
|
}
|
|
11
13
|
}
|
|
12
|
-
return parts.join(
|
|
14
|
+
return parts.join('; ');
|
|
13
15
|
}
|
|
14
16
|
/**
|
|
15
17
|
* 拼成一段供向量模型编码的文本(名称、路径、注释、meta 摘要、源码片段)。
|
|
@@ -19,10 +21,10 @@ export function indexedRowToEmbedText(row) {
|
|
|
19
21
|
return [
|
|
20
22
|
`${row.type} ${row.name}`,
|
|
21
23
|
row.path,
|
|
22
|
-
row.description ??
|
|
24
|
+
row.description ?? '',
|
|
23
25
|
metaBit,
|
|
24
|
-
(row.content ??
|
|
26
|
+
(row.content ?? '').slice(0, MAX_CONTENT_LENGTH),
|
|
25
27
|
]
|
|
26
28
|
.filter((s) => s.length > 0)
|
|
27
|
-
.join(
|
|
29
|
+
.join('\n');
|
|
28
30
|
}
|
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* JSX 检测、路径/selector 启发式、JSDoc 摘要、相对路径。
|
|
3
3
|
*/
|
|
4
|
-
import { SyntaxKind } from
|
|
4
|
+
import { SyntaxKind } from 'ts-morph';
|
|
5
5
|
/**
|
|
6
6
|
* 判断源文件路径是否为 TSX(React JSX 语法所在扩展名)。
|
|
7
7
|
* @returns `true` 表示按「可能出现 JSX」处理,用于与 `.ts` 区分组件启发式。
|
|
8
8
|
*/
|
|
9
9
|
export function isTsxFile(filePath) {
|
|
10
|
-
return filePath.toLowerCase().endsWith(
|
|
10
|
+
return filePath.toLowerCase().endsWith('.tsx');
|
|
11
11
|
}
|
|
12
12
|
/**
|
|
13
13
|
* 在 AST 子树中是否出现 JSX 节点(元素、自闭合标签或 Fragment)。
|
|
@@ -27,30 +27,49 @@ export function hasJsxInNode(node) {
|
|
|
27
27
|
return found;
|
|
28
28
|
}
|
|
29
29
|
/**
|
|
30
|
-
* 从文件路径推断业务语义目录名(如 `.../components/
|
|
31
|
-
* @returns 命中 `components|features|...`
|
|
30
|
+
* 从文件路径推断业务语义目录名(如 `.../components/Button.tsx` → `components`)。
|
|
31
|
+
* @returns 命中 `components|features|...`
|
|
32
32
|
*/
|
|
33
33
|
export function inferCategoryFromPath(filePath) {
|
|
34
|
-
const
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
34
|
+
const markers = [
|
|
35
|
+
'components',
|
|
36
|
+
'features',
|
|
37
|
+
'modules',
|
|
38
|
+
'pages',
|
|
39
|
+
'widgets',
|
|
40
|
+
'hooks',
|
|
41
|
+
'utils',
|
|
42
|
+
'services',
|
|
43
|
+
'selectors',
|
|
44
|
+
'types',
|
|
45
|
+
'apis',
|
|
46
|
+
];
|
|
47
|
+
for (const m of markers) {
|
|
48
|
+
if (filePath.toLowerCase().includes(`/${m}/`)) {
|
|
49
|
+
return m;
|
|
41
50
|
}
|
|
42
51
|
}
|
|
43
52
|
return null;
|
|
44
53
|
}
|
|
45
54
|
/**
|
|
46
|
-
*
|
|
47
|
-
* @
|
|
55
|
+
* 从命名推断业务语义类别(如 `useAuth` → `hook`,`formatDate` → `format`,`validateData` → `validation`)。
|
|
56
|
+
* @param name
|
|
57
|
+
* @returns 命中`hook | ...`
|
|
48
58
|
*/
|
|
49
|
-
export function
|
|
50
|
-
const
|
|
51
|
-
if (
|
|
52
|
-
return
|
|
53
|
-
if (
|
|
59
|
+
export function inferCategoryFromName(originName) {
|
|
60
|
+
const name = originName.toLowerCase();
|
|
61
|
+
if (name.startsWith('use'))
|
|
62
|
+
return 'hook';
|
|
63
|
+
if (name.includes('fetch'))
|
|
64
|
+
return 'network';
|
|
65
|
+
if (name.includes('format'))
|
|
66
|
+
return 'format';
|
|
67
|
+
if (name.includes('validate'))
|
|
68
|
+
return 'validation';
|
|
69
|
+
return null;
|
|
70
|
+
}
|
|
71
|
+
export function isHookLike(exportName) {
|
|
72
|
+
if (/use$/i.test(exportName))
|
|
54
73
|
return true;
|
|
55
74
|
return false;
|
|
56
75
|
}
|
|
@@ -61,7 +80,7 @@ export function isSelectorLike(filePath, exportName) {
|
|
|
61
80
|
export function getLeadingDocDescription(node) {
|
|
62
81
|
const tryNode = (n) => {
|
|
63
82
|
const jd = n.getJsDocs;
|
|
64
|
-
if (typeof jd !==
|
|
83
|
+
if (typeof jd !== 'function')
|
|
65
84
|
return null;
|
|
66
85
|
const docs = jd.call(n);
|
|
67
86
|
if (!docs?.length)
|
|
@@ -69,15 +88,15 @@ export function getLeadingDocDescription(node) {
|
|
|
69
88
|
const t = docs[0].getDescription().trim();
|
|
70
89
|
return t || null;
|
|
71
90
|
};
|
|
72
|
-
return tryNode(node) ?? (node.getParent() ? tryNode(node.getParent()) : null);
|
|
91
|
+
return (tryNode(node) ?? (node.getParent() ? tryNode(node.getParent()) : null));
|
|
73
92
|
}
|
|
74
93
|
/**
|
|
75
94
|
* 将绝对路径转为相对 `projectRoot` 的路径,作为库中 `symbols.path`(便于跨机器、展示)。
|
|
76
95
|
* @returns 相对路径;若无法裁掉前缀则回退为原始绝对路径。
|
|
77
96
|
*/
|
|
78
97
|
export function getRelativePathForDisplay(projectRoot, absolutePath) {
|
|
79
|
-
const r = projectRoot.replace(/\\/g,
|
|
80
|
-
const a = absolutePath.replace(/\\/g,
|
|
98
|
+
const r = projectRoot.replace(/\\/g, '/');
|
|
99
|
+
const a = absolutePath.replace(/\\/g, '/');
|
|
81
100
|
if (a.startsWith(r)) {
|
|
82
101
|
return a.slice(r.length + 1);
|
|
83
102
|
}
|
|
@@ -92,5 +111,5 @@ export function snippetForNode(node, maxLen = 4000) {
|
|
|
92
111
|
const raw = node.getText();
|
|
93
112
|
if (raw.length <= maxLen)
|
|
94
113
|
return raw;
|
|
95
|
-
return raw.slice(0, maxLen) +
|
|
114
|
+
return raw.slice(0, maxLen) + '\n/* ... truncated ... */';
|
|
96
115
|
}
|