@lorrylurui/code-intelligence-mcp 1.1.15 → 2.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +74 -3
- package/dist/cli/ci-index-cli.js +85 -0
- package/dist/cli/ci-index.js +76 -0
- package/dist/cli/detect-duplicates.js +1 -6
- package/dist/cli/embedding-worker-cli.js +35 -0
- package/dist/cli/index-codebase-cli.js +64 -0
- package/dist/cli/index-codebase.js +5 -4
- package/dist/config/env.js +53 -81
- package/dist/config/symbolStatus.js +8 -0
- package/dist/db/mysql.js +3 -6
- package/dist/db/postgres.js +13 -0
- package/dist/db/schema.js +41 -19
- package/dist/indexer/astNormalizer.js +201 -0
- package/dist/indexer/babelParser.js +40 -15
- package/dist/indexer/categoryClassifier.js +129 -0
- package/dist/indexer/embedText.js +9 -7
- package/dist/indexer/heuristics.js +42 -23
- package/dist/indexer/indexProject.js +146 -56
- package/dist/indexer/jsAstNormalizer.js +201 -0
- package/dist/indexer/persistSymbols.js +49 -24
- package/dist/indexer/tsAstNormalizer.js +363 -0
- package/dist/prompts/reusableCodeAdvisorPrompt.js +21 -8
- package/dist/repositories/symbolRepository.js +53 -46
- package/dist/services/embeddingQueue.js +57 -0
- package/dist/services/reindex.js +90 -43
- package/dist/tools/getSymbolDetail.js +3 -1
- package/dist/tools/incUsage.js +12 -3
- package/dist/tools/reindex.js +3 -1
- package/dist/tools/searchByStructure.js +6 -2
- package/dist/tools/searchSymbols.js +18 -4
- package/dist/workers/embeddingWorker.js +145 -0
- package/package.json +10 -5
package/dist/db/schema.js
CHANGED
|
@@ -1,29 +1,51 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* 动态生成数据库表结构 SQL
|
|
2
|
+
* 动态生成数据库表结构 SQL(PostgreSQL + pgvector),表名可通过环境变量配置
|
|
3
3
|
*/
|
|
4
4
|
import { env } from '../config/env.js';
|
|
5
|
+
import { DEFAULT_STATUS_ON_UPSERT } from '../config/symbolStatus.js';
|
|
6
|
+
/** 确保 vector 扩展已启用 */
|
|
7
|
+
export function getEnsureExtensionSQL() {
|
|
8
|
+
return `CREATE EXTENSION IF NOT EXISTS vector`;
|
|
9
|
+
}
|
|
5
10
|
/** 获取 symbols 表的建表 SQL */
|
|
6
11
|
export function getSymbolsTableSQL() {
|
|
7
|
-
const tableName = env.
|
|
12
|
+
const tableName = env.symbolsTable;
|
|
8
13
|
return `CREATE TABLE IF NOT EXISTS ${tableName} (
|
|
9
|
-
id
|
|
10
|
-
name
|
|
11
|
-
type
|
|
12
|
-
category
|
|
13
|
-
path
|
|
14
|
-
description
|
|
15
|
-
content
|
|
16
|
-
meta
|
|
17
|
-
usage_count
|
|
18
|
-
embedding
|
|
19
|
-
insert_user
|
|
20
|
-
updated_user
|
|
21
|
-
created_at
|
|
22
|
-
updated_at
|
|
23
|
-
|
|
14
|
+
id SERIAL PRIMARY KEY,
|
|
15
|
+
name VARCHAR(255) NOT NULL,
|
|
16
|
+
type VARCHAR(50) NOT NULL,
|
|
17
|
+
category VARCHAR(255),
|
|
18
|
+
path TEXT NOT NULL,
|
|
19
|
+
description TEXT,
|
|
20
|
+
content TEXT,
|
|
21
|
+
meta JSONB,
|
|
22
|
+
usage_count INT NOT NULL DEFAULT 0,
|
|
23
|
+
embedding vector(384),
|
|
24
|
+
insert_user VARCHAR(255) NOT NULL DEFAULT 'system',
|
|
25
|
+
updated_user VARCHAR(255) NOT NULL DEFAULT 'system',
|
|
26
|
+
created_at TIMESTAMP NOT NULL DEFAULT NOW(),
|
|
27
|
+
updated_at TIMESTAMP NOT NULL DEFAULT NOW(),
|
|
28
|
+
file_hash VARCHAR(64),
|
|
29
|
+
semantic_hash VARCHAR(64),
|
|
30
|
+
status SMALLINT NOT NULL DEFAULT ${DEFAULT_STATUS_ON_UPSERT},
|
|
31
|
+
CONSTRAINT uk_${tableName}_path_name UNIQUE (path, name),
|
|
32
|
+
CONSTRAINT chk_${tableName}_type CHECK (type IN ('component','function','type','class','interface','hook'))
|
|
24
33
|
)`;
|
|
25
34
|
}
|
|
26
|
-
/**
|
|
35
|
+
/** 获取基础索引 SQL(不含 HNSW,HNSW 建议数据量 > 1000 后手动执行) */
|
|
36
|
+
export function getSymbolsIndexSQLs() {
|
|
37
|
+
const t = env.symbolsTable;
|
|
38
|
+
return [
|
|
39
|
+
`CREATE INDEX IF NOT EXISTS idx_file_hash ON ${t}(file_hash)`,
|
|
40
|
+
`CREATE INDEX IF NOT EXISTS idx_semantic_hash ON ${t}(semantic_hash)`,
|
|
41
|
+
`CREATE INDEX IF NOT EXISTS idx_status ON ${t}(status)`,
|
|
42
|
+
];
|
|
43
|
+
}
|
|
44
|
+
/** 获取所有建表 SQL(extension + table + indexes,可逐条执行) */
|
|
27
45
|
export function getAllTableSQLs() {
|
|
28
|
-
return [
|
|
46
|
+
return [
|
|
47
|
+
getEnsureExtensionSQL(),
|
|
48
|
+
getSymbolsTableSQL(),
|
|
49
|
+
...getSymbolsIndexSQLs(),
|
|
50
|
+
];
|
|
29
51
|
}
|
|
@@ -0,0 +1,201 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* astNormalizer.ts
|
|
3
|
+
* 对 ts-morph Node 做语义级标准化,生成 semantic_hash。
|
|
4
|
+
*
|
|
5
|
+
* 去掉:参数名、泛型参数名、函数体实现、空白格式、字面量值
|
|
6
|
+
* 保留:参数类型结构、返回类型、sideEffects、hooks
|
|
7
|
+
*/
|
|
8
|
+
import { createHash } from 'node:crypto';
|
|
9
|
+
import { Node, SyntaxKind } from 'ts-morph';
|
|
10
|
+
// ─────────────────────────────────────────────
|
|
11
|
+
// 内置类型白名单:不替换为 $T
|
|
12
|
+
// ─────────────────────────────────────────────
|
|
13
|
+
const BUILTIN_TYPES = new Set([
|
|
14
|
+
'string',
|
|
15
|
+
'number',
|
|
16
|
+
'boolean',
|
|
17
|
+
'void',
|
|
18
|
+
'null',
|
|
19
|
+
'undefined',
|
|
20
|
+
'never',
|
|
21
|
+
'unknown',
|
|
22
|
+
'any',
|
|
23
|
+
'object',
|
|
24
|
+
'symbol',
|
|
25
|
+
'bigint',
|
|
26
|
+
'Promise',
|
|
27
|
+
'Array',
|
|
28
|
+
'Record',
|
|
29
|
+
'Map',
|
|
30
|
+
'Set',
|
|
31
|
+
'WeakMap',
|
|
32
|
+
'WeakSet',
|
|
33
|
+
'Partial',
|
|
34
|
+
'Required',
|
|
35
|
+
'Readonly',
|
|
36
|
+
'Pick',
|
|
37
|
+
'Omit',
|
|
38
|
+
'Exclude',
|
|
39
|
+
'Extract',
|
|
40
|
+
'NonNullable',
|
|
41
|
+
'ReturnType',
|
|
42
|
+
'InstanceType',
|
|
43
|
+
'React',
|
|
44
|
+
'ReactNode',
|
|
45
|
+
'ReactElement',
|
|
46
|
+
'FC',
|
|
47
|
+
'MouseEvent',
|
|
48
|
+
'KeyboardEvent',
|
|
49
|
+
'ChangeEvent',
|
|
50
|
+
'HTMLElement',
|
|
51
|
+
'HTMLDivElement',
|
|
52
|
+
'HTMLInputElement',
|
|
53
|
+
'CSSProperties',
|
|
54
|
+
'RefObject',
|
|
55
|
+
'MutableRefObject',
|
|
56
|
+
]);
|
|
57
|
+
function normalizeTypeName(name) {
|
|
58
|
+
if (BUILTIN_TYPES.has(name))
|
|
59
|
+
return name;
|
|
60
|
+
if (/^T[A-Z]/.test(name) || (name.length === 1 && /[A-Z]/.test(name)))
|
|
61
|
+
return '$T';
|
|
62
|
+
return name;
|
|
63
|
+
}
|
|
64
|
+
function normalizeTypeString(typeStr) {
|
|
65
|
+
return typeStr
|
|
66
|
+
.replace(/\b([A-Z][A-Za-z0-9]*)\b/g, (match) => normalizeTypeName(match))
|
|
67
|
+
.replace(/\s+/g, ' ')
|
|
68
|
+
.trim();
|
|
69
|
+
}
|
|
70
|
+
// ─────────────────────────────────────────────
|
|
71
|
+
// normalizeNode:递归遍历 AST,输出标准化字符串
|
|
72
|
+
// ─────────────────────────────────────────────
|
|
73
|
+
export function normalizeNode(node) {
|
|
74
|
+
const paramNames = new Map();
|
|
75
|
+
let paramIdx = 0;
|
|
76
|
+
function allocParam(name) {
|
|
77
|
+
if (!paramNames.has(name))
|
|
78
|
+
paramNames.set(name, `$p${paramIdx++}`);
|
|
79
|
+
return paramNames.get(name);
|
|
80
|
+
}
|
|
81
|
+
function visit(n) {
|
|
82
|
+
const kind = n.getKind();
|
|
83
|
+
// 函数体 → {}(不关心实现)
|
|
84
|
+
if (kind === SyntaxKind.Block)
|
|
85
|
+
return '{}';
|
|
86
|
+
// 参数:只保留类型,去参数名
|
|
87
|
+
if (Node.isParameterDeclaration(n)) {
|
|
88
|
+
const nameNode = n.getNameNode();
|
|
89
|
+
const typeNode = n.getTypeNode();
|
|
90
|
+
const typeStr = typeNode
|
|
91
|
+
? normalizeTypeString(typeNode.getText())
|
|
92
|
+
: '$unknown';
|
|
93
|
+
const prefix = n.isRestParameter() ? '...' : '';
|
|
94
|
+
const suffix = n.hasInitializer() ? '=$default' : '';
|
|
95
|
+
// 解构参数:{ userId, options }: Config → {}:Config
|
|
96
|
+
if (Node.isObjectBindingPattern(nameNode) ||
|
|
97
|
+
Node.isArrayBindingPattern(nameNode)) {
|
|
98
|
+
return `${prefix}{}:${typeStr}${suffix}`;
|
|
99
|
+
}
|
|
100
|
+
allocParam(n.getName());
|
|
101
|
+
return `${prefix}$p:${typeStr}${suffix}`;
|
|
102
|
+
}
|
|
103
|
+
// 泛型参数:T / TData → $T
|
|
104
|
+
if (Node.isTypeParameterDeclaration(n)) {
|
|
105
|
+
const constraint = n.getConstraint();
|
|
106
|
+
return `$T${constraint ? ` extends ${normalizeTypeString(constraint.getText())}` : ''}`;
|
|
107
|
+
}
|
|
108
|
+
// 类型引用:标准化名称
|
|
109
|
+
if (Node.isTypeReference(n))
|
|
110
|
+
return normalizeTypeString(n.getText());
|
|
111
|
+
// JSX:只记录存在
|
|
112
|
+
if (Node.isJsxElement(n) || Node.isJsxSelfClosingElement(n))
|
|
113
|
+
return '<JSX/>';
|
|
114
|
+
// 字面量 → 占位符
|
|
115
|
+
if (kind === SyntaxKind.StringLiteral)
|
|
116
|
+
return '"$s"';
|
|
117
|
+
if (kind === SyntaxKind.NumericLiteral ||
|
|
118
|
+
kind === SyntaxKind.BigIntLiteral)
|
|
119
|
+
return '$n';
|
|
120
|
+
if (kind === SyntaxKind.TrueKeyword || kind === SyntaxKind.FalseKeyword)
|
|
121
|
+
return '$b';
|
|
122
|
+
const children = n.getChildren();
|
|
123
|
+
if (children.length === 0)
|
|
124
|
+
return n.getText();
|
|
125
|
+
return children.map(visit).join('');
|
|
126
|
+
}
|
|
127
|
+
return visit(node).replace(/\s+/g, ' ').trim();
|
|
128
|
+
}
|
|
129
|
+
// ─────────────────────────────────────────────
|
|
130
|
+
// extractNormalizedSignature:从声明节点提取标准化签名
|
|
131
|
+
// ─────────────────────────────────────────────
|
|
132
|
+
export function extractNormalizedSignature(node) {
|
|
133
|
+
// 函数声明 / 箭头函数 / 函数表达式
|
|
134
|
+
if (Node.isFunctionDeclaration(node) ||
|
|
135
|
+
Node.isArrowFunction(node) ||
|
|
136
|
+
Node.isFunctionExpression(node)) {
|
|
137
|
+
const typeParams = Node.isFunctionDeclaration(node)
|
|
138
|
+
? node.getTypeParameters().map((tp) => normalizeNode(tp))
|
|
139
|
+
: [];
|
|
140
|
+
const params = node.getParameters().map((p) => normalizeNode(p));
|
|
141
|
+
const retNode = node.getReturnTypeNode?.();
|
|
142
|
+
const returnType = retNode
|
|
143
|
+
? normalizeTypeString(retNode.getText())
|
|
144
|
+
: '$inferred';
|
|
145
|
+
const tpStr = typeParams.length ? `<${typeParams.join(',')}>` : '';
|
|
146
|
+
return `fn${tpStr}(${params.join(',')})=>${returnType}`;
|
|
147
|
+
}
|
|
148
|
+
// 变量声明(const foo = () => {})
|
|
149
|
+
if (Node.isVariableDeclaration(node)) {
|
|
150
|
+
const init = node.getInitializer();
|
|
151
|
+
if (init &&
|
|
152
|
+
(Node.isArrowFunction(init) || Node.isFunctionExpression(init))) {
|
|
153
|
+
return extractNormalizedSignature(init);
|
|
154
|
+
}
|
|
155
|
+
return normalizeNode(node);
|
|
156
|
+
}
|
|
157
|
+
// interface / type alias
|
|
158
|
+
if (Node.isInterfaceDeclaration(node) ||
|
|
159
|
+
Node.isTypeAliasDeclaration(node)) {
|
|
160
|
+
return normalizeNode(node);
|
|
161
|
+
}
|
|
162
|
+
// class:只取方法签名列表
|
|
163
|
+
if (Node.isClassDeclaration(node)) {
|
|
164
|
+
const methods = node.getMethods().map((m) => {
|
|
165
|
+
const params = m.getParameters().map((p) => normalizeNode(p));
|
|
166
|
+
const retNode = m.getReturnTypeNode();
|
|
167
|
+
const ret = retNode
|
|
168
|
+
? normalizeTypeString(retNode.getText())
|
|
169
|
+
: '$inferred';
|
|
170
|
+
return `${m.getName()}(${params.join(',')})=>${ret}`;
|
|
171
|
+
});
|
|
172
|
+
return `class{${methods.join(';')}}`;
|
|
173
|
+
}
|
|
174
|
+
return normalizeNode(node);
|
|
175
|
+
}
|
|
176
|
+
// ─────────────────────────────────────────────
|
|
177
|
+
// computeSemanticHash
|
|
178
|
+
// 纳入:标准化签名 + name + type + description + sideEffects + hooks
|
|
179
|
+
// 排除:参数名、实现、格式、callers/callees
|
|
180
|
+
// ─────────────────────────────────────────────
|
|
181
|
+
export function computeSemanticHash(row) {
|
|
182
|
+
const node = row.node || null;
|
|
183
|
+
const meta = row.meta || {};
|
|
184
|
+
const stable = {
|
|
185
|
+
name: row.name,
|
|
186
|
+
type: row.type,
|
|
187
|
+
description: row.description ?? null,
|
|
188
|
+
signature: node ? extractNormalizedSignature(node) : '',
|
|
189
|
+
sideEffects: [
|
|
190
|
+
...(meta.sideEffects ?? []),
|
|
191
|
+
].sort(),
|
|
192
|
+
hooks: [...(meta.hooks ?? [])].sort(),
|
|
193
|
+
};
|
|
194
|
+
return createHash('sha256').update(JSON.stringify(stable)).digest('hex');
|
|
195
|
+
}
|
|
196
|
+
// ─────────────────────────────────────────────
|
|
197
|
+
// computeFileHash:对文件原始内容
|
|
198
|
+
// ─────────────────────────────────────────────
|
|
199
|
+
export function computeFileHash(fileContent) {
|
|
200
|
+
return createHash('sha256').update(fileContent).digest('hex');
|
|
201
|
+
}
|
|
@@ -3,7 +3,9 @@
|
|
|
3
3
|
*/
|
|
4
4
|
import * as babelParser from '@babel/parser';
|
|
5
5
|
import * as bt from '@babel/types';
|
|
6
|
-
import { getRelativePathForDisplay, inferCategoryFromPath, } from './heuristics.js';
|
|
6
|
+
import { getRelativePathForDisplay, inferCategoryFromPath, inferCategoryFromName, } from './heuristics.js';
|
|
7
|
+
import { computeFileHash } from './tsAstNormalizer.js';
|
|
8
|
+
import { computeSemanticHashJs } from './jsAstNormalizer.js';
|
|
7
9
|
/** 从 JS 文件内容解析导出的代码块 */
|
|
8
10
|
export function parseJsFile(filePath, content, projectRoot) {
|
|
9
11
|
const out = [];
|
|
@@ -46,7 +48,23 @@ export function parseJsFile(filePath, content, projectRoot) {
|
|
|
46
48
|
out.push(...rows);
|
|
47
49
|
}
|
|
48
50
|
}
|
|
49
|
-
|
|
51
|
+
// 第三轮:更新content、file_hash、semantic_hash 字段
|
|
52
|
+
const outWithTwoHash = out.map((o) => {
|
|
53
|
+
const [semantic_hash, stableStr] = computeSemanticHashJs(o);
|
|
54
|
+
const { name, type, description, meta, category, path } = o;
|
|
55
|
+
return {
|
|
56
|
+
name,
|
|
57
|
+
type,
|
|
58
|
+
category,
|
|
59
|
+
path,
|
|
60
|
+
description,
|
|
61
|
+
meta,
|
|
62
|
+
content: stableStr,
|
|
63
|
+
file_hash: computeFileHash(content),
|
|
64
|
+
semantic_hash,
|
|
65
|
+
};
|
|
66
|
+
});
|
|
67
|
+
return outWithTwoHash;
|
|
50
68
|
}
|
|
51
69
|
/** 处理导出的声明 */
|
|
52
70
|
function processStatement(stmt, filePath, isJsx, projectRoot) {
|
|
@@ -216,21 +234,19 @@ function arrowToFunction(name, arrow) {
|
|
|
216
234
|
}
|
|
217
235
|
function createRowFromFunction(name, decl, filePath, projectRoot, isJsx) {
|
|
218
236
|
const relPath = getRelativePathForDisplay(projectRoot, filePath);
|
|
219
|
-
const category = inferCategoryFromPath(filePath);
|
|
237
|
+
const category = inferCategoryFromPath(filePath) || inferCategoryFromName(name);
|
|
220
238
|
// 检测是否有 JSX
|
|
221
239
|
const hasJsx = isJsx || containsJsx(decl);
|
|
222
240
|
// 判断类型:
|
|
223
241
|
// 1. 有 JSX = component
|
|
224
|
-
// 2. 名字包含
|
|
242
|
+
// 2. 名字包含 use = hook
|
|
225
243
|
// 3. 大写开头(JSX 组件约定)= component
|
|
226
|
-
// 4. 其他 =
|
|
227
|
-
const type =
|
|
228
|
-
? '
|
|
229
|
-
: name
|
|
230
|
-
? '
|
|
231
|
-
:
|
|
232
|
-
? 'component'
|
|
233
|
-
: 'util';
|
|
244
|
+
// 4. 其他 = function
|
|
245
|
+
const type = name.toLowerCase().includes('use')
|
|
246
|
+
? 'hook'
|
|
247
|
+
: isJsx || /^[A-Z]/.test(name)
|
|
248
|
+
? 'component'
|
|
249
|
+
: 'function';
|
|
234
250
|
const params = decl.params
|
|
235
251
|
.filter((p) => bt.isIdentifier(p))
|
|
236
252
|
.map((p) => p.name);
|
|
@@ -250,21 +266,28 @@ function createRowFromFunction(name, decl, filePath, projectRoot, isJsx) {
|
|
|
250
266
|
...(hooks.length ? { hooks } : {}),
|
|
251
267
|
...(sideEffects.length ? { sideEffects } : {}),
|
|
252
268
|
},
|
|
269
|
+
file_hash: '',
|
|
270
|
+
semantic_hash: '',
|
|
271
|
+
node: decl,
|
|
253
272
|
};
|
|
254
273
|
}
|
|
255
274
|
function createRowFromClass(name, _decl, filePath, projectRoot) {
|
|
256
275
|
const relPath = getRelativePathForDisplay(projectRoot, filePath);
|
|
257
276
|
const category = inferCategoryFromPath(filePath);
|
|
258
277
|
// 大写开头的类视为组件
|
|
259
|
-
const type = /^[A-Z]/.test(name) ? 'component' : '
|
|
278
|
+
const type = /^[A-Z]/.test(name) ? 'component' : 'function';
|
|
260
279
|
return {
|
|
261
280
|
name,
|
|
262
281
|
type,
|
|
263
282
|
category,
|
|
264
283
|
path: relPath,
|
|
265
284
|
description: null,
|
|
285
|
+
// content meta.kind 暂时废弃不用,
|
|
266
286
|
content: null,
|
|
267
|
-
meta: {
|
|
287
|
+
meta: {},
|
|
288
|
+
file_hash: '',
|
|
289
|
+
semantic_hash: '',
|
|
290
|
+
node: _decl,
|
|
268
291
|
};
|
|
269
292
|
}
|
|
270
293
|
/** 简单检测是否包含 JSX */
|
|
@@ -380,7 +403,9 @@ function getNodeText(n) {
|
|
|
380
403
|
if (propNode && typeof propNode === 'object') {
|
|
381
404
|
const propType = propNode.type;
|
|
382
405
|
if (propType === 'Identifier') {
|
|
383
|
-
prop =
|
|
406
|
+
prop =
|
|
407
|
+
propNode.name ||
|
|
408
|
+
'';
|
|
384
409
|
}
|
|
385
410
|
else if (propType === 'Literal') {
|
|
386
411
|
prop = String(propNode.value ?? '');
|
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* category 分类器:三层融合(规则 + embedding + LLM)
|
|
3
|
+
*/
|
|
4
|
+
import { createHash } from 'node:crypto';
|
|
5
|
+
import { env } from '../config/env.js';
|
|
6
|
+
import { createEmbeddingClient } from '../services/embeddingClient.js';
|
|
7
|
+
import { cosineSimilarity } from '../services/vectorMath.js';
|
|
8
|
+
import { inferCategoryFromPath, inferCategoryFromName } from './heuristics.js';
|
|
9
|
+
// 1. 定义 category label space
|
|
10
|
+
const CATEGORIES = [
|
|
11
|
+
'network',
|
|
12
|
+
'form',
|
|
13
|
+
'validation',
|
|
14
|
+
'format',
|
|
15
|
+
'state',
|
|
16
|
+
'ui',
|
|
17
|
+
'storage',
|
|
18
|
+
'math',
|
|
19
|
+
'hook',
|
|
20
|
+
'utils',
|
|
21
|
+
'component',
|
|
22
|
+
'service',
|
|
23
|
+
];
|
|
24
|
+
// 2. 预计算 category embeddings(缓存)
|
|
25
|
+
let categoryEmbeddingsCache = null;
|
|
26
|
+
export async function initCategoryEmbeddings() {
|
|
27
|
+
if (!env.embeddingServiceUrl)
|
|
28
|
+
return;
|
|
29
|
+
const client = createEmbeddingClient(env.embeddingServiceUrl);
|
|
30
|
+
const embeddings = await Promise.all(CATEGORIES.map(async (c) => {
|
|
31
|
+
const [vec] = await client.embed([c]);
|
|
32
|
+
return { name: c, embedding: vec };
|
|
33
|
+
}));
|
|
34
|
+
categoryEmbeddingsCache = embeddings;
|
|
35
|
+
}
|
|
36
|
+
// 3. embedding 层
|
|
37
|
+
const EMBEDDING_THRESHOLD = 0.5;
|
|
38
|
+
// TODO: 这里有问题,embedding是语义模板向量,categoryEmbeddingsCache是单个词的向量,相似度必然是<0.3
|
|
39
|
+
function categoryFromEmbedding(embedding) {
|
|
40
|
+
if (!categoryEmbeddingsCache)
|
|
41
|
+
return null;
|
|
42
|
+
let best = 'utils';
|
|
43
|
+
let maxScore = -Infinity;
|
|
44
|
+
for (const c of categoryEmbeddingsCache) {
|
|
45
|
+
const score = cosineSimilarity(embedding, c.embedding);
|
|
46
|
+
if (score > maxScore) {
|
|
47
|
+
maxScore = score;
|
|
48
|
+
best = c.name;
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
return maxScore < EMBEDDING_THRESHOLD ? null : best;
|
|
52
|
+
}
|
|
53
|
+
// 4. LLM 层(带缓存)
|
|
54
|
+
const LLM_CACHE_TTL = 24 * 60 * 60 * 1000;
|
|
55
|
+
const OLLAMA_URL = 'http://127.0.0.1:11434/v1/chat/completions';
|
|
56
|
+
// 可根据本地实际情况调整模型名称,例如:'llama3.2:3b'、'llama3.1:8b'
|
|
57
|
+
const OLLAMA_MODEL = 'llama3.2:3b';
|
|
58
|
+
const llmCategoryCache = new Map();
|
|
59
|
+
async function categoryFromLLM(stableStr) {
|
|
60
|
+
if (!stableStr)
|
|
61
|
+
return null;
|
|
62
|
+
const cacheKey = createHash('sha256')
|
|
63
|
+
.update(stableStr)
|
|
64
|
+
.digest('hex')
|
|
65
|
+
.slice(0, 16);
|
|
66
|
+
const cached = llmCategoryCache.get(cacheKey);
|
|
67
|
+
if (cached && Date.now() - cached.timestamp < LLM_CACHE_TTL)
|
|
68
|
+
return cached.category;
|
|
69
|
+
const prompt = `给一段代码语义说明,在下面列出来的所有类别中找到最相符的类别,请直接返回命中的原始字符串,没有命中不用返回。
|
|
70
|
+
代码语义:${stableStr}
|
|
71
|
+
类别合集: ${CATEGORIES.join(', ')}`;
|
|
72
|
+
try {
|
|
73
|
+
const body = {
|
|
74
|
+
model: OLLAMA_MODEL,
|
|
75
|
+
messages: [{ role: 'user', content: prompt }],
|
|
76
|
+
temperature: 0.4,
|
|
77
|
+
stream: false,
|
|
78
|
+
};
|
|
79
|
+
const res = await fetch(OLLAMA_URL, {
|
|
80
|
+
method: 'POST',
|
|
81
|
+
headers: { 'Content-Type': 'application/json' },
|
|
82
|
+
body: JSON.stringify(body),
|
|
83
|
+
});
|
|
84
|
+
const data = (await res.json());
|
|
85
|
+
const content = data.choices?.[0]?.message?.content
|
|
86
|
+
?.trim()
|
|
87
|
+
?.toLowerCase();
|
|
88
|
+
const category = CATEGORIES.find((c) => content.includes(c));
|
|
89
|
+
if (category) {
|
|
90
|
+
llmCategoryCache.set(cacheKey, { category, timestamp: Date.now() });
|
|
91
|
+
return category;
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
catch (e) {
|
|
95
|
+
console.error('[categoryFromLLM]', e);
|
|
96
|
+
}
|
|
97
|
+
return null;
|
|
98
|
+
}
|
|
99
|
+
// 5. 三层融合
|
|
100
|
+
export async function resolveCategory(rows, vecs) {
|
|
101
|
+
const pros = rows.map(async (r, i) => {
|
|
102
|
+
const { name } = r;
|
|
103
|
+
const ruleCategory = inferCategoryFromPath(r.path) || inferCategoryFromName(name);
|
|
104
|
+
// console.error(`===from ruleCategory`, name, ruleCategory);
|
|
105
|
+
if (ruleCategory) {
|
|
106
|
+
return {
|
|
107
|
+
...r,
|
|
108
|
+
category: ruleCategory,
|
|
109
|
+
};
|
|
110
|
+
}
|
|
111
|
+
// TODO: 这里有问题,embedding是语义模板向量,categoryEmbeddingsCache是单个词的向量,相似度必然是<0.3
|
|
112
|
+
const emd = categoryFromEmbedding(vecs[i]);
|
|
113
|
+
// console.error(`===from categoryFromEmbedding`, name, emd);
|
|
114
|
+
if (emd) {
|
|
115
|
+
return {
|
|
116
|
+
...r,
|
|
117
|
+
category: emd,
|
|
118
|
+
};
|
|
119
|
+
}
|
|
120
|
+
const cateLlm = await categoryFromLLM(r.content);
|
|
121
|
+
// console.error(`===from categoryFromLLM`, name, cateLlm);
|
|
122
|
+
return {
|
|
123
|
+
...r,
|
|
124
|
+
category: cateLlm,
|
|
125
|
+
};
|
|
126
|
+
});
|
|
127
|
+
const newRows = await Promise.all(pros);
|
|
128
|
+
return newRows;
|
|
129
|
+
}
|
|
@@ -1,15 +1,17 @@
|
|
|
1
|
+
// 仅js类型使用,后续会删掉
|
|
2
|
+
const MAX_CONTENT_LENGTH = 1200;
|
|
1
3
|
function briefMeta(meta) {
|
|
2
|
-
const keys = [
|
|
4
|
+
const keys = ['props', 'params', 'properties', 'hooks'];
|
|
3
5
|
const parts = [];
|
|
4
6
|
for (const k of keys) {
|
|
5
7
|
const v = meta[k];
|
|
6
8
|
if (Array.isArray(v)) {
|
|
7
|
-
const strs = v.filter((x) => typeof x ===
|
|
9
|
+
const strs = v.filter((x) => typeof x === 'string');
|
|
8
10
|
if (strs.length)
|
|
9
|
-
parts.push(`${k}: ${strs.slice(0, 24).join(
|
|
11
|
+
parts.push(`${k}: ${strs.slice(0, 24).join(', ')}`);
|
|
10
12
|
}
|
|
11
13
|
}
|
|
12
|
-
return parts.join(
|
|
14
|
+
return parts.join('; ');
|
|
13
15
|
}
|
|
14
16
|
/**
|
|
15
17
|
* 拼成一段供向量模型编码的文本(名称、路径、注释、meta 摘要、源码片段)。
|
|
@@ -19,10 +21,10 @@ export function indexedRowToEmbedText(row) {
|
|
|
19
21
|
return [
|
|
20
22
|
`${row.type} ${row.name}`,
|
|
21
23
|
row.path,
|
|
22
|
-
row.description ??
|
|
24
|
+
row.description ?? '',
|
|
23
25
|
metaBit,
|
|
24
|
-
(row.content ??
|
|
26
|
+
(row.content ?? '').slice(0, MAX_CONTENT_LENGTH),
|
|
25
27
|
]
|
|
26
28
|
.filter((s) => s.length > 0)
|
|
27
|
-
.join(
|
|
29
|
+
.join('\n');
|
|
28
30
|
}
|
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* JSX 检测、路径/selector 启发式、JSDoc 摘要、相对路径。
|
|
3
3
|
*/
|
|
4
|
-
import { SyntaxKind } from
|
|
4
|
+
import { SyntaxKind } from 'ts-morph';
|
|
5
5
|
/**
|
|
6
6
|
* 判断源文件路径是否为 TSX(React JSX 语法所在扩展名)。
|
|
7
7
|
* @returns `true` 表示按「可能出现 JSX」处理,用于与 `.ts` 区分组件启发式。
|
|
8
8
|
*/
|
|
9
9
|
export function isTsxFile(filePath) {
|
|
10
|
-
return filePath.toLowerCase().endsWith(
|
|
10
|
+
return filePath.toLowerCase().endsWith('.tsx');
|
|
11
11
|
}
|
|
12
12
|
/**
|
|
13
13
|
* 在 AST 子树中是否出现 JSX 节点(元素、自闭合标签或 Fragment)。
|
|
@@ -27,30 +27,49 @@ export function hasJsxInNode(node) {
|
|
|
27
27
|
return found;
|
|
28
28
|
}
|
|
29
29
|
/**
|
|
30
|
-
* 从文件路径推断业务语义目录名(如 `.../components/
|
|
31
|
-
* @returns 命中 `components|features|...`
|
|
30
|
+
* 从文件路径推断业务语义目录名(如 `.../components/Button.tsx` → `components`)。
|
|
31
|
+
* @returns 命中 `components|features|...`
|
|
32
32
|
*/
|
|
33
33
|
export function inferCategoryFromPath(filePath) {
|
|
34
|
-
const
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
34
|
+
const markers = [
|
|
35
|
+
'components',
|
|
36
|
+
'features',
|
|
37
|
+
'modules',
|
|
38
|
+
'pages',
|
|
39
|
+
'widgets',
|
|
40
|
+
'hooks',
|
|
41
|
+
'utils',
|
|
42
|
+
'services',
|
|
43
|
+
'selectors',
|
|
44
|
+
'types',
|
|
45
|
+
'apis',
|
|
46
|
+
];
|
|
47
|
+
for (const m of markers) {
|
|
48
|
+
if (filePath.toLowerCase().includes(`/${m}/`)) {
|
|
49
|
+
return m;
|
|
41
50
|
}
|
|
42
51
|
}
|
|
43
52
|
return null;
|
|
44
53
|
}
|
|
45
54
|
/**
|
|
46
|
-
*
|
|
47
|
-
* @
|
|
55
|
+
* 从命名推断业务语义类别(如 `useAuth` → `hook`,`formatDate` → `format`,`validateData` → `validation`)。
|
|
56
|
+
* @param name
|
|
57
|
+
* @returns 命中`hook | ...`
|
|
48
58
|
*/
|
|
49
|
-
export function
|
|
50
|
-
const
|
|
51
|
-
if (
|
|
52
|
-
return
|
|
53
|
-
if (
|
|
59
|
+
export function inferCategoryFromName(originName) {
|
|
60
|
+
const name = originName.toLowerCase();
|
|
61
|
+
if (name.startsWith('use'))
|
|
62
|
+
return 'hook';
|
|
63
|
+
if (name.includes('fetch'))
|
|
64
|
+
return 'network';
|
|
65
|
+
if (name.includes('format'))
|
|
66
|
+
return 'format';
|
|
67
|
+
if (name.includes('validate'))
|
|
68
|
+
return 'validation';
|
|
69
|
+
return null;
|
|
70
|
+
}
|
|
71
|
+
export function isHookLike(exportName) {
|
|
72
|
+
if (/use$/i.test(exportName))
|
|
54
73
|
return true;
|
|
55
74
|
return false;
|
|
56
75
|
}
|
|
@@ -61,7 +80,7 @@ export function isSelectorLike(filePath, exportName) {
|
|
|
61
80
|
export function getLeadingDocDescription(node) {
|
|
62
81
|
const tryNode = (n) => {
|
|
63
82
|
const jd = n.getJsDocs;
|
|
64
|
-
if (typeof jd !==
|
|
83
|
+
if (typeof jd !== 'function')
|
|
65
84
|
return null;
|
|
66
85
|
const docs = jd.call(n);
|
|
67
86
|
if (!docs?.length)
|
|
@@ -69,15 +88,15 @@ export function getLeadingDocDescription(node) {
|
|
|
69
88
|
const t = docs[0].getDescription().trim();
|
|
70
89
|
return t || null;
|
|
71
90
|
};
|
|
72
|
-
return tryNode(node) ?? (node.getParent() ? tryNode(node.getParent()) : null);
|
|
91
|
+
return (tryNode(node) ?? (node.getParent() ? tryNode(node.getParent()) : null));
|
|
73
92
|
}
|
|
74
93
|
/**
|
|
75
94
|
* 将绝对路径转为相对 `projectRoot` 的路径,作为库中 `symbols.path`(便于跨机器、展示)。
|
|
76
95
|
* @returns 相对路径;若无法裁掉前缀则回退为原始绝对路径。
|
|
77
96
|
*/
|
|
78
97
|
export function getRelativePathForDisplay(projectRoot, absolutePath) {
|
|
79
|
-
const r = projectRoot.replace(/\\/g,
|
|
80
|
-
const a = absolutePath.replace(/\\/g,
|
|
98
|
+
const r = projectRoot.replace(/\\/g, '/');
|
|
99
|
+
const a = absolutePath.replace(/\\/g, '/');
|
|
81
100
|
if (a.startsWith(r)) {
|
|
82
101
|
return a.slice(r.length + 1);
|
|
83
102
|
}
|
|
@@ -92,5 +111,5 @@ export function snippetForNode(node, maxLen = 4000) {
|
|
|
92
111
|
const raw = node.getText();
|
|
93
112
|
if (raw.length <= maxLen)
|
|
94
113
|
return raw;
|
|
95
|
-
return raw.slice(0, maxLen) +
|
|
114
|
+
return raw.slice(0, maxLen) + '\n/* ... truncated ... */';
|
|
96
115
|
}
|