@lorrylurui/code-intelligence-mcp 1.1.14 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +91 -598
- package/dist/cli/ci-index-cli.js +66 -0
- package/dist/cli/ci-index.js +80 -0
- package/dist/cli/detect-duplicates.js +1 -6
- package/dist/cli/embedding-worker-cli.js +35 -0
- package/dist/cli/index-codebase.js +6 -7
- package/dist/config/env.js +3 -102
- package/dist/config/symbolStatus.js +8 -0
- package/dist/db/mysql.js +3 -6
- package/dist/db/schema.js +9 -2
- package/dist/indexer/astNormalizer.js +201 -0
- package/dist/indexer/babelParser.js +257 -28
- package/dist/indexer/categoryClassifier.js +129 -0
- package/dist/indexer/embedText.js +9 -7
- package/dist/indexer/extractMeta.js +7 -2
- package/dist/indexer/heuristics.js +42 -23
- package/dist/indexer/indexProject.js +145 -55
- package/dist/indexer/jsAstNormalizer.js +201 -0
- package/dist/indexer/persistSymbols.js +7 -3
- package/dist/indexer/tsAstNormalizer.js +363 -0
- package/dist/prompts/reusableCodeAdvisorPrompt.js +6 -3
- package/dist/repositories/symbolRepository.js +81 -7
- package/dist/services/embeddingQueue.js +56 -0
- package/dist/services/reindex.js +12 -9
- package/dist/tools/searchByStructure.js +3 -1
- package/dist/tools/searchSymbols.js +14 -3
- package/dist/workers/embeddingWorker.js +100 -0
- package/package.json +7 -4
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* JSX 检测、路径/selector 启发式、JSDoc 摘要、相对路径。
|
|
3
3
|
*/
|
|
4
|
-
import { SyntaxKind } from
|
|
4
|
+
import { SyntaxKind } from 'ts-morph';
|
|
5
5
|
/**
|
|
6
6
|
* 判断源文件路径是否为 TSX(React JSX 语法所在扩展名)。
|
|
7
7
|
* @returns `true` 表示按「可能出现 JSX」处理,用于与 `.ts` 区分组件启发式。
|
|
8
8
|
*/
|
|
9
9
|
export function isTsxFile(filePath) {
|
|
10
|
-
return filePath.toLowerCase().endsWith(
|
|
10
|
+
return filePath.toLowerCase().endsWith('.tsx');
|
|
11
11
|
}
|
|
12
12
|
/**
|
|
13
13
|
* 在 AST 子树中是否出现 JSX 节点(元素、自闭合标签或 Fragment)。
|
|
@@ -27,30 +27,49 @@ export function hasJsxInNode(node) {
|
|
|
27
27
|
return found;
|
|
28
28
|
}
|
|
29
29
|
/**
|
|
30
|
-
* 从文件路径推断业务语义目录名(如 `.../components/
|
|
31
|
-
* @returns 命中 `components|features|...`
|
|
30
|
+
* 从文件路径推断业务语义目录名(如 `.../components/Button.tsx` → `components`)。
|
|
31
|
+
* @returns 命中 `components|features|...`
|
|
32
32
|
*/
|
|
33
33
|
export function inferCategoryFromPath(filePath) {
|
|
34
|
-
const
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
34
|
+
const markers = [
|
|
35
|
+
'components',
|
|
36
|
+
'features',
|
|
37
|
+
'modules',
|
|
38
|
+
'pages',
|
|
39
|
+
'widgets',
|
|
40
|
+
'hooks',
|
|
41
|
+
'utils',
|
|
42
|
+
'services',
|
|
43
|
+
'selectors',
|
|
44
|
+
'types',
|
|
45
|
+
'apis',
|
|
46
|
+
];
|
|
47
|
+
for (const m of markers) {
|
|
48
|
+
if (filePath.toLowerCase().includes(`/${m}/`)) {
|
|
49
|
+
return m;
|
|
41
50
|
}
|
|
42
51
|
}
|
|
43
52
|
return null;
|
|
44
53
|
}
|
|
45
54
|
/**
|
|
46
|
-
*
|
|
47
|
-
* @
|
|
55
|
+
* 从命名推断业务语义类别(如 `useAuth` → `hook`,`formatDate` → `format`,`validateData` → `validation`)。
|
|
56
|
+
* @param name
|
|
57
|
+
* @returns 命中`hook | ...`
|
|
48
58
|
*/
|
|
49
|
-
export function
|
|
50
|
-
const
|
|
51
|
-
if (
|
|
52
|
-
return
|
|
53
|
-
if (
|
|
59
|
+
export function inferCategoryFromName(originName) {
|
|
60
|
+
const name = originName.toLowerCase();
|
|
61
|
+
if (name.startsWith('use'))
|
|
62
|
+
return 'hook';
|
|
63
|
+
if (name.includes('fetch'))
|
|
64
|
+
return 'network';
|
|
65
|
+
if (name.includes('format'))
|
|
66
|
+
return 'format';
|
|
67
|
+
if (name.includes('validate'))
|
|
68
|
+
return 'validation';
|
|
69
|
+
return null;
|
|
70
|
+
}
|
|
71
|
+
export function isHookLike(exportName) {
|
|
72
|
+
if (/use$/i.test(exportName))
|
|
54
73
|
return true;
|
|
55
74
|
return false;
|
|
56
75
|
}
|
|
@@ -61,7 +80,7 @@ export function isSelectorLike(filePath, exportName) {
|
|
|
61
80
|
export function getLeadingDocDescription(node) {
|
|
62
81
|
const tryNode = (n) => {
|
|
63
82
|
const jd = n.getJsDocs;
|
|
64
|
-
if (typeof jd !==
|
|
83
|
+
if (typeof jd !== 'function')
|
|
65
84
|
return null;
|
|
66
85
|
const docs = jd.call(n);
|
|
67
86
|
if (!docs?.length)
|
|
@@ -69,15 +88,15 @@ export function getLeadingDocDescription(node) {
|
|
|
69
88
|
const t = docs[0].getDescription().trim();
|
|
70
89
|
return t || null;
|
|
71
90
|
};
|
|
72
|
-
return tryNode(node) ?? (node.getParent() ? tryNode(node.getParent()) : null);
|
|
91
|
+
return (tryNode(node) ?? (node.getParent() ? tryNode(node.getParent()) : null));
|
|
73
92
|
}
|
|
74
93
|
/**
|
|
75
94
|
* 将绝对路径转为相对 `projectRoot` 的路径,作为库中 `symbols.path`(便于跨机器、展示)。
|
|
76
95
|
* @returns 相对路径;若无法裁掉前缀则回退为原始绝对路径。
|
|
77
96
|
*/
|
|
78
97
|
export function getRelativePathForDisplay(projectRoot, absolutePath) {
|
|
79
|
-
const r = projectRoot.replace(/\\/g,
|
|
80
|
-
const a = absolutePath.replace(/\\/g,
|
|
98
|
+
const r = projectRoot.replace(/\\/g, '/');
|
|
99
|
+
const a = absolutePath.replace(/\\/g, '/');
|
|
81
100
|
if (a.startsWith(r)) {
|
|
82
101
|
return a.slice(r.length + 1);
|
|
83
102
|
}
|
|
@@ -92,5 +111,5 @@ export function snippetForNode(node, maxLen = 4000) {
|
|
|
92
111
|
const raw = node.getText();
|
|
93
112
|
if (raw.length <= maxLen)
|
|
94
113
|
return raw;
|
|
95
|
-
return raw.slice(0, maxLen) +
|
|
114
|
+
return raw.slice(0, maxLen) + '\n/* ... truncated ... */';
|
|
96
115
|
}
|
|
@@ -7,8 +7,10 @@ import { join, resolve } from 'node:path';
|
|
|
7
7
|
import { Node, Project } from 'ts-morph';
|
|
8
8
|
import { readFileSync, existsSync } from 'node:fs';
|
|
9
9
|
import { extractInterfaceOrTypeMeta, extractMetaFromCallable, } from './extractMeta.js';
|
|
10
|
-
import { getLeadingDocDescription, getRelativePathForDisplay, hasJsxInNode,
|
|
10
|
+
import { getLeadingDocDescription, getRelativePathForDisplay, hasJsxInNode, isHookLike, isTsxFile, } from './heuristics.js';
|
|
11
11
|
import { parseJsFile } from './babelParser.js';
|
|
12
|
+
import { computeFileHash, computeSemanticHash } from './tsAstNormalizer.js';
|
|
13
|
+
const CALLERS_LIMIT = 20;
|
|
12
14
|
/** 判断文件类型 */
|
|
13
15
|
function isJsFile(filePath) {
|
|
14
16
|
return filePath.endsWith('.js') || filePath.endsWith('.jsx');
|
|
@@ -59,39 +61,57 @@ function resolveExportName(exportName, decl) {
|
|
|
59
61
|
function processDeclaration(exportName, decl, sf, projectRoot) {
|
|
60
62
|
const filePath = sf.getFilePath();
|
|
61
63
|
const relPath = getRelativePathForDisplay(projectRoot, filePath);
|
|
62
|
-
const category = inferCategoryFromPath(filePath);
|
|
63
64
|
const name = resolveExportName(exportName, decl);
|
|
64
65
|
const description = getLeadingDocDescription(decl) ?? null;
|
|
65
66
|
if (Node.isInterfaceDeclaration(decl) ||
|
|
66
67
|
Node.isTypeAliasDeclaration(decl)) {
|
|
67
68
|
const meta = extractInterfaceOrTypeMeta(decl);
|
|
69
|
+
const type = Node.isInterfaceDeclaration(decl) ? 'interface' : 'type';
|
|
70
|
+
const [semantic_hash, stableStr] = computeSemanticHash({
|
|
71
|
+
name,
|
|
72
|
+
type,
|
|
73
|
+
description,
|
|
74
|
+
meta,
|
|
75
|
+
node: decl,
|
|
76
|
+
});
|
|
68
77
|
return {
|
|
69
78
|
name,
|
|
70
|
-
type
|
|
71
|
-
category,
|
|
79
|
+
type,
|
|
80
|
+
category: '',
|
|
72
81
|
path: relPath,
|
|
73
82
|
description,
|
|
74
|
-
content:
|
|
83
|
+
content: stableStr,
|
|
75
84
|
meta,
|
|
85
|
+
file_hash: computeFileHash(sf.getFullText()),
|
|
86
|
+
semantic_hash,
|
|
76
87
|
};
|
|
77
88
|
}
|
|
78
89
|
if (Node.isFunctionDeclaration(decl)) {
|
|
79
90
|
const jsx = isTsxFile(filePath) && hasJsxInNode(decl);
|
|
80
|
-
const type =
|
|
81
|
-
? '
|
|
82
|
-
:
|
|
83
|
-
? '
|
|
84
|
-
: '
|
|
91
|
+
const type = isHookLike(name)
|
|
92
|
+
? 'hook'
|
|
93
|
+
: jsx
|
|
94
|
+
? 'component'
|
|
95
|
+
: 'function';
|
|
85
96
|
const raw = extractMetaFromCallable(decl);
|
|
86
97
|
const meta = mergeCallableMeta(type, raw);
|
|
98
|
+
const [semantic_hash, stableStr] = computeSemanticHash({
|
|
99
|
+
name,
|
|
100
|
+
type,
|
|
101
|
+
description,
|
|
102
|
+
meta,
|
|
103
|
+
node: decl,
|
|
104
|
+
});
|
|
87
105
|
return {
|
|
88
106
|
name,
|
|
89
107
|
type,
|
|
90
|
-
category,
|
|
108
|
+
category: '',
|
|
91
109
|
path: relPath,
|
|
92
110
|
description,
|
|
93
|
-
content:
|
|
111
|
+
content: stableStr,
|
|
94
112
|
meta,
|
|
113
|
+
semantic_hash,
|
|
114
|
+
file_hash: computeFileHash(sf.getFullText()),
|
|
95
115
|
};
|
|
96
116
|
}
|
|
97
117
|
if (Node.isVariableDeclaration(decl)) {
|
|
@@ -112,33 +132,52 @@ function processDeclaration(exportName, decl, sf, projectRoot) {
|
|
|
112
132
|
if (!callable)
|
|
113
133
|
return null;
|
|
114
134
|
const jsx = isTsxFile(filePath) && hasJsxInNode(callable);
|
|
115
|
-
const type =
|
|
116
|
-
? '
|
|
117
|
-
:
|
|
118
|
-
? '
|
|
119
|
-
: '
|
|
135
|
+
const type = isHookLike(name)
|
|
136
|
+
? 'hook'
|
|
137
|
+
: jsx
|
|
138
|
+
? 'component'
|
|
139
|
+
: 'function';
|
|
120
140
|
const raw = extractMetaFromCallable(callable);
|
|
121
141
|
const meta = mergeCallableMeta(type, raw);
|
|
142
|
+
const [semantic_hash, stableStr] = computeSemanticHash({
|
|
143
|
+
name,
|
|
144
|
+
type,
|
|
145
|
+
description,
|
|
146
|
+
meta,
|
|
147
|
+
node: callable,
|
|
148
|
+
});
|
|
122
149
|
return {
|
|
123
150
|
name,
|
|
124
151
|
type,
|
|
125
|
-
category,
|
|
152
|
+
category: '',
|
|
126
153
|
path: relPath,
|
|
127
154
|
description,
|
|
128
|
-
content:
|
|
155
|
+
content: stableStr,
|
|
129
156
|
meta,
|
|
157
|
+
semantic_hash,
|
|
158
|
+
file_hash: computeFileHash(sf.getFullText()),
|
|
130
159
|
};
|
|
131
160
|
}
|
|
132
161
|
if (Node.isClassDeclaration(decl)) {
|
|
133
162
|
// 轻量:仅将 class 记为 util(后续可扩展为带 JSX 的组件类)
|
|
163
|
+
const type = 'class';
|
|
164
|
+
const [semantic_hash, stableStr] = computeSemanticHash({
|
|
165
|
+
name,
|
|
166
|
+
type,
|
|
167
|
+
description,
|
|
168
|
+
meta: { kind: type },
|
|
169
|
+
node: decl,
|
|
170
|
+
});
|
|
134
171
|
return {
|
|
135
172
|
name,
|
|
136
|
-
type
|
|
137
|
-
category,
|
|
173
|
+
type,
|
|
174
|
+
category: '',
|
|
138
175
|
path: relPath,
|
|
139
176
|
description,
|
|
140
|
-
content:
|
|
141
|
-
meta: { kind:
|
|
177
|
+
content: stableStr,
|
|
178
|
+
meta: { kind: type },
|
|
179
|
+
semantic_hash,
|
|
180
|
+
file_hash: computeFileHash(sf.getFullText()),
|
|
142
181
|
};
|
|
143
182
|
}
|
|
144
183
|
return null;
|
|
@@ -197,6 +236,23 @@ export async function indexProject(opts) {
|
|
|
197
236
|
for (const sf of project.getSourceFiles()) {
|
|
198
237
|
const relPath = getRelativePathForDisplay(projectRoot, sf.getFilePath());
|
|
199
238
|
const exported = sf.getExportedDeclarations();
|
|
239
|
+
// 只检索export的代码,减少噪音
|
|
240
|
+
// 如果要检索未export的,需要考虑
|
|
241
|
+
/**
|
|
242
|
+
* 类型一:真正的内部实现,不应该复用
|
|
243
|
+
function _buildSqlFragment() {} // 和模块强耦合,外部用不了
|
|
244
|
+
const __validateInternal = () => {} // 私有约定(下划线前缀)
|
|
245
|
+
→ 索引了也没用,反而是噪音
|
|
246
|
+
|
|
247
|
+
类型二:工具函数,只是作者忘了 export / 懒得 export
|
|
248
|
+
function debounce(fn, ms) {} // 通用,完全可以复用
|
|
249
|
+
function formatCurrency(n) {} // 通用,其他地方也需要
|
|
250
|
+
→ 索引有价值,还能反向提示"这个应该被 export"
|
|
251
|
+
|
|
252
|
+
类型三:文件内共享但跨文件无意义
|
|
253
|
+
function getLocalConfig() {} // 依赖闭包变量,移出去就坏了
|
|
254
|
+
→ 索引意义不大
|
|
255
|
+
*/
|
|
200
256
|
for (const [exportName, decls] of exported) {
|
|
201
257
|
for (const decl of decls) {
|
|
202
258
|
const row = processDeclaration(exportName, decl, sf, projectRoot);
|
|
@@ -205,7 +261,11 @@ export async function indexProject(opts) {
|
|
|
205
261
|
// 建立符号映射
|
|
206
262
|
const key = `${row.name}|${row.path}`;
|
|
207
263
|
if (!symbolMap.has(key)) {
|
|
208
|
-
symbolMap.set(key, {
|
|
264
|
+
symbolMap.set(key, {
|
|
265
|
+
name: row.name,
|
|
266
|
+
path: row.path,
|
|
267
|
+
exports: new Set(),
|
|
268
|
+
});
|
|
209
269
|
}
|
|
210
270
|
symbolMap.get(key).exports.add(exportName);
|
|
211
271
|
}
|
|
@@ -223,7 +283,11 @@ export async function indexProject(opts) {
|
|
|
223
283
|
for (const row of rows) {
|
|
224
284
|
const key = `${row.name}|${row.path}`;
|
|
225
285
|
if (!symbolMap.has(key)) {
|
|
226
|
-
symbolMap.set(key, {
|
|
286
|
+
symbolMap.set(key, {
|
|
287
|
+
name: row.name,
|
|
288
|
+
path: row.path,
|
|
289
|
+
exports: new Set(),
|
|
290
|
+
});
|
|
227
291
|
}
|
|
228
292
|
symbolMap.get(key).exports.add(row.name);
|
|
229
293
|
}
|
|
@@ -248,7 +312,6 @@ export async function indexProject(opts) {
|
|
|
248
312
|
* 分析调用关系,填充每个符号的 meta.callers 和 meta.callees
|
|
249
313
|
*/
|
|
250
314
|
function analyzeRelations(project, symbolMap, projectRoot, rows) {
|
|
251
|
-
// 构造快速查找:exportName -> [ {name, path} ]
|
|
252
315
|
const exportToSymbol = new Map();
|
|
253
316
|
for (const [key, value] of symbolMap) {
|
|
254
317
|
for (const exp of value.exports) {
|
|
@@ -257,52 +320,73 @@ function analyzeRelations(project, symbolMap, projectRoot, rows) {
|
|
|
257
320
|
exportToSymbol.set(exp, list);
|
|
258
321
|
}
|
|
259
322
|
}
|
|
260
|
-
|
|
261
|
-
const
|
|
262
|
-
const calleesMap = new Map(); // key = "name|path" -> set of callees
|
|
323
|
+
const callersMap = new Map();
|
|
324
|
+
const calleesMap = new Map();
|
|
263
325
|
for (const sf of project.getSourceFiles()) {
|
|
264
326
|
const filePath = sf.getFilePath();
|
|
265
327
|
const relPath = getRelativePathForDisplay(projectRoot, filePath);
|
|
266
|
-
// 获取当前文件导出的符号
|
|
267
328
|
const exported = sf.getExportedDeclarations();
|
|
268
|
-
const fileExportNames = new Set();
|
|
269
|
-
for (const [name, decls] of exported) {
|
|
270
|
-
fileExportNames.add(name);
|
|
271
|
-
}
|
|
272
|
-
// 遍历 AST 查找调用
|
|
273
329
|
sf.forEachDescendant((node) => {
|
|
274
|
-
//
|
|
330
|
+
// 函数调用
|
|
275
331
|
if (Node.isCallExpression(node)) {
|
|
276
332
|
const expr = node.getExpression();
|
|
277
333
|
const name = Node.isIdentifier(expr) ? expr.getText() : null;
|
|
278
334
|
if (name && exportToSymbol.has(name)) {
|
|
279
335
|
const targets = exportToSymbol.get(name);
|
|
280
|
-
// 当前文件是谁在调用
|
|
281
336
|
for (const [expName, decls] of exported) {
|
|
282
|
-
for (const
|
|
337
|
+
for (const _decl of decls) {
|
|
283
338
|
const callerKey = `${expName}|${relPath}`;
|
|
284
339
|
for (const target of targets) {
|
|
285
340
|
// callees: 我调用了谁
|
|
286
|
-
const calleeSet = calleesMap.get(callerKey) ||
|
|
287
|
-
|
|
341
|
+
const calleeSet = calleesMap.get(callerKey) ||
|
|
342
|
+
new Set();
|
|
343
|
+
calleeSet.add(`${target.name}|${target.path}`); // ✅ 字符串天然去重
|
|
288
344
|
calleesMap.set(callerKey, calleeSet);
|
|
289
|
-
// callers: 谁调用了我
|
|
290
345
|
const callerSet = callersMap.get(`${target.name}|${target.path}`) || new Set();
|
|
291
|
-
callerSet.add({
|
|
346
|
+
callerSet.add(`${expName}|${relPath}`);
|
|
292
347
|
callersMap.set(`${target.name}|${target.path}`, callerSet);
|
|
293
348
|
}
|
|
294
349
|
}
|
|
295
350
|
}
|
|
296
351
|
}
|
|
297
352
|
}
|
|
298
|
-
//
|
|
353
|
+
// Import 导入关系(含 import type)
|
|
299
354
|
if (Node.isImportDeclaration(node)) {
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
if (
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
355
|
+
// 1. 解析被导入文件的路径
|
|
356
|
+
const importedSf = node.getModuleSpecifierSourceFile();
|
|
357
|
+
if (!importedSf)
|
|
358
|
+
return; // 无法解析(第三方包等)跳过
|
|
359
|
+
const importedRelPath = getRelativePathForDisplay(projectRoot, importedSf.getFilePath());
|
|
360
|
+
// 2. 收集本条 import 引入了哪些具名符号
|
|
361
|
+
const namedBindings = node.getNamedImports(); // { SymbolRepository }
|
|
362
|
+
const defaultImport = node.getDefaultImport(); // import Foo from ...
|
|
363
|
+
const importedNames = [
|
|
364
|
+
...namedBindings.map((n) => n.getName()),
|
|
365
|
+
...(defaultImport ? [defaultImport.getText()] : []),
|
|
366
|
+
];
|
|
367
|
+
if (importedNames.length === 0)
|
|
368
|
+
return;
|
|
369
|
+
// 3. 当前文件的所有导出符号作为 caller
|
|
370
|
+
for (const [expName] of exported) {
|
|
371
|
+
const callerKey = `${expName}|${relPath}`;
|
|
372
|
+
for (const importedName of importedNames) {
|
|
373
|
+
// 4. 在 symbolMap 里找到被导入符号对应的 row
|
|
374
|
+
// key 格式:`name|path`,name 是 resolveExportName 后的真实名
|
|
375
|
+
// exportToSymbol 里存的是 exportName(可能是 'default'),
|
|
376
|
+
// 所以同时按 importedName 和路径在 symbolMap 里直接查
|
|
377
|
+
const targetKey = `${importedName}|${importedRelPath}`;
|
|
378
|
+
const target = symbolMap.get(targetKey);
|
|
379
|
+
if (!target)
|
|
380
|
+
continue;
|
|
381
|
+
// callee:当前文件的导出符号引用了 target
|
|
382
|
+
const calleeSet = calleesMap.get(callerKey) || new Set();
|
|
383
|
+
calleeSet.add(`${target.name}|${target.path}`);
|
|
384
|
+
calleesMap.set(callerKey, calleeSet);
|
|
385
|
+
const callerRefKey = `${target.name}|${target.path}`;
|
|
386
|
+
const callerSet = callersMap.get(callerRefKey) || new Set();
|
|
387
|
+
callerSet.add(`${expName}|${relPath}`);
|
|
388
|
+
callersMap.set(callerRefKey, callerSet);
|
|
389
|
+
}
|
|
306
390
|
}
|
|
307
391
|
}
|
|
308
392
|
});
|
|
@@ -312,13 +396,19 @@ function analyzeRelations(project, symbolMap, projectRoot, rows) {
|
|
|
312
396
|
const key = `${row.name}|${row.path}`;
|
|
313
397
|
const callers = callersMap.get(key);
|
|
314
398
|
const callees = calleesMap.get(key);
|
|
315
|
-
if (callers
|
|
316
|
-
row.meta =
|
|
317
|
-
|
|
399
|
+
if (callers?.size) {
|
|
400
|
+
row.meta.callers = [...callers].slice(0, CALLERS_LIMIT).map((s) => {
|
|
401
|
+
// ✅ 反序列化回对象
|
|
402
|
+
const [name, ...pathParts] = s.split('|');
|
|
403
|
+
return { name, path: pathParts.join('|') };
|
|
404
|
+
});
|
|
318
405
|
}
|
|
319
|
-
if (callees
|
|
320
|
-
row.meta =
|
|
321
|
-
|
|
406
|
+
if (callees?.size) {
|
|
407
|
+
row.meta.callees = [...callees].slice(0, CALLERS_LIMIT).map((s) => {
|
|
408
|
+
const [name, ...pathParts] = s.split('|');
|
|
409
|
+
// path:为了防止路径里万一含有 | 字符时截断错误。
|
|
410
|
+
return { name, path: pathParts.join('|') };
|
|
411
|
+
});
|
|
322
412
|
}
|
|
323
413
|
}
|
|
324
414
|
console.error(`[analyzeRelations] processed ${rows.length} symbols`);
|
|
@@ -0,0 +1,201 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* astNormalizerJs.ts
|
|
3
|
+
*
|
|
4
|
+
* JS/JSX 文件的语义签名提取,对应 ts-morph 版的 extractNormalizedSignature。
|
|
5
|
+
* Babel AST 没有类型信息,通过以下策略补偿:
|
|
6
|
+
* 1. 有 JSDoc → 从 @param / @returns 提取类型
|
|
7
|
+
* 2. 无 JSDoc → 参数类型标记为 $unknown,签名仍可区分结构形态
|
|
8
|
+
*
|
|
9
|
+
* 与 TS 版行为对齐:
|
|
10
|
+
* - 参数名 → 丢弃,替换为 $p
|
|
11
|
+
* - 函数体 → 替换为 {}
|
|
12
|
+
* - 默认值存在性保留(=$default),默认值内容丢弃
|
|
13
|
+
* - rest 参数保留 ... 前缀
|
|
14
|
+
* - 解构参数保留结构形态({} / []),无法展开内部字段
|
|
15
|
+
*/
|
|
16
|
+
import { createHash } from 'node:crypto';
|
|
17
|
+
import * as t from '@babel/types';
|
|
18
|
+
// ─────────────────────────────────────────────
|
|
19
|
+
// JSDoc 类型提取
|
|
20
|
+
// ─────────────────────────────────────────────
|
|
21
|
+
function getJSDoc(node) {
|
|
22
|
+
const comments = node.leadingComments;
|
|
23
|
+
if (!comments || comments.length === 0)
|
|
24
|
+
return null;
|
|
25
|
+
const jsdoc = comments.find((c) => c.value.includes('@param') || c.value.includes('@returns'));
|
|
26
|
+
return jsdoc ? jsdoc.value : null;
|
|
27
|
+
}
|
|
28
|
+
function parseJSDoc(jsdoc) {
|
|
29
|
+
const params = {};
|
|
30
|
+
let returnType;
|
|
31
|
+
const paramRegex = /@param\s+\{([^}]+)\}\s+(\w+)/g;
|
|
32
|
+
const returnRegex = /@returns?\s+\{([^}]+)\}/;
|
|
33
|
+
let match;
|
|
34
|
+
while ((match = paramRegex.exec(jsdoc))) {
|
|
35
|
+
const [, type, name] = match;
|
|
36
|
+
params[name] = normalizeType(type);
|
|
37
|
+
}
|
|
38
|
+
const returnMatch = jsdoc.match(returnRegex);
|
|
39
|
+
if (returnMatch) {
|
|
40
|
+
returnType = normalizeType(returnMatch[1]);
|
|
41
|
+
}
|
|
42
|
+
return { params, returnType };
|
|
43
|
+
}
|
|
44
|
+
function normalizeType(type) {
|
|
45
|
+
type = type.trim().toLowerCase();
|
|
46
|
+
if (type.includes('string'))
|
|
47
|
+
return 'string';
|
|
48
|
+
if (type.includes('number'))
|
|
49
|
+
return 'number';
|
|
50
|
+
if (type.includes('boolean'))
|
|
51
|
+
return 'boolean';
|
|
52
|
+
if (type.includes('array'))
|
|
53
|
+
return 'array';
|
|
54
|
+
if (type.includes('object'))
|
|
55
|
+
return 'object';
|
|
56
|
+
if (type.includes('promise')) {
|
|
57
|
+
const inner = type.match(/promise<(.+)>/);
|
|
58
|
+
return inner ? `Promise<${normalizeType(inner[1])}>` : 'Promise';
|
|
59
|
+
}
|
|
60
|
+
return 'unknown';
|
|
61
|
+
}
|
|
62
|
+
function normalizeParam(param, fallbackName, jsdoc) {
|
|
63
|
+
if (param.type === 'Identifier') {
|
|
64
|
+
const name = param.name;
|
|
65
|
+
const type = jsdoc?.params[name] ?? 'unknown';
|
|
66
|
+
return `${name}:${type}`;
|
|
67
|
+
}
|
|
68
|
+
if (param.type === 'ObjectPattern') {
|
|
69
|
+
const props = param.properties.map((p) => {
|
|
70
|
+
if (p.key?.name) {
|
|
71
|
+
const name = p.key.name;
|
|
72
|
+
const type = jsdoc?.params[name] ?? 'unknown';
|
|
73
|
+
return `${name}:${type}`;
|
|
74
|
+
}
|
|
75
|
+
return 'unknown';
|
|
76
|
+
});
|
|
77
|
+
return `{${props.sort().join(';')}}`;
|
|
78
|
+
}
|
|
79
|
+
return `${fallbackName}:unknown`;
|
|
80
|
+
}
|
|
81
|
+
function inferTypeFromExpression(expr) {
|
|
82
|
+
if (t.isStringLiteral(expr))
|
|
83
|
+
return 'string';
|
|
84
|
+
if (t.isNumericLiteral(expr))
|
|
85
|
+
return 'number';
|
|
86
|
+
if (t.isBooleanLiteral(expr))
|
|
87
|
+
return 'boolean';
|
|
88
|
+
if (t.isArrayExpression(expr))
|
|
89
|
+
return 'array';
|
|
90
|
+
if (t.isObjectExpression(expr))
|
|
91
|
+
return 'object';
|
|
92
|
+
return 'unknown';
|
|
93
|
+
}
|
|
94
|
+
function inferReturnType(node) {
|
|
95
|
+
if (!node.body)
|
|
96
|
+
return 'unknown';
|
|
97
|
+
if (t.isBlockStatement(node.body)) {
|
|
98
|
+
const returnStmt = node.body.body.find(t.isReturnStatement);
|
|
99
|
+
if (returnStmt && returnStmt.argument) {
|
|
100
|
+
return inferTypeFromExpression(returnStmt.argument);
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
if (!t.isBlockStatement(node.body)) {
|
|
104
|
+
return inferTypeFromExpression(node.body);
|
|
105
|
+
}
|
|
106
|
+
return 'unknown';
|
|
107
|
+
}
|
|
108
|
+
function normalizeFunction(node) {
|
|
109
|
+
const jsdocText = getJSDoc(node);
|
|
110
|
+
const jsdoc = jsdocText ? parseJSDoc(jsdocText) : undefined;
|
|
111
|
+
const params = node.params.map((p, i) => normalizeParam(p, `$p${i}`, jsdoc));
|
|
112
|
+
const returnType = jsdoc?.returnType ?? inferReturnType(node);
|
|
113
|
+
return `fn(${params.join(',')})=>${returnType}`;
|
|
114
|
+
}
|
|
115
|
+
function normalizeArrowFunction(node) {
|
|
116
|
+
const jsdocText = getJSDoc(node);
|
|
117
|
+
const jsdoc = jsdocText ? parseJSDoc(jsdocText) : undefined;
|
|
118
|
+
const params = node.params.map((p, i) => normalizeParam(p, `$p${i}`, jsdoc));
|
|
119
|
+
// const params = node.params.map((p, i) => normalizeParam(p, `$p${i}`));
|
|
120
|
+
const returnType = jsdoc?.returnType ?? inferReturnType(node);
|
|
121
|
+
return `fn(${params.join(',')})=>${returnType}`;
|
|
122
|
+
}
|
|
123
|
+
function normalizeClass(node) {
|
|
124
|
+
const methods = node.body.body
|
|
125
|
+
.filter(t.isClassMethod)
|
|
126
|
+
.map((m) => {
|
|
127
|
+
const name = t.isIdentifier(m.key) ? m.key.name : 'unknown';
|
|
128
|
+
return `${name}():unknown`;
|
|
129
|
+
})
|
|
130
|
+
.sort();
|
|
131
|
+
return `class{${methods.join(';')}}`;
|
|
132
|
+
}
|
|
133
|
+
function traverseNode(node, cb) {
|
|
134
|
+
cb(node);
|
|
135
|
+
for (const key in node) {
|
|
136
|
+
const value = node[key];
|
|
137
|
+
if (Array.isArray(value)) {
|
|
138
|
+
value.forEach((v) => {
|
|
139
|
+
if (v && typeof v.type === 'string') {
|
|
140
|
+
traverseNode(v, cb);
|
|
141
|
+
}
|
|
142
|
+
});
|
|
143
|
+
}
|
|
144
|
+
else if (value && typeof value.type === 'string') {
|
|
145
|
+
traverseNode(value, cb);
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
export function inferBehaviorFromJS(node) {
|
|
150
|
+
const behavior = [];
|
|
151
|
+
// 遍历 AST
|
|
152
|
+
traverseNode(node, (n) => {
|
|
153
|
+
// fetch / axios
|
|
154
|
+
if (t.isCallExpression(n) && t.isIdentifier(n.callee)) {
|
|
155
|
+
const name = n.callee.name.toLowerCase();
|
|
156
|
+
if (name.includes('fetch') || name.includes('axios')) {
|
|
157
|
+
behavior.push('performs network request');
|
|
158
|
+
}
|
|
159
|
+
if (name.includes('settimeout')) {
|
|
160
|
+
behavior.push('uses timer');
|
|
161
|
+
}
|
|
162
|
+
}
|
|
163
|
+
// localStorage
|
|
164
|
+
if (t.isMemberExpression(n) &&
|
|
165
|
+
t.isIdentifier(n.object) &&
|
|
166
|
+
n.object.name === 'localStorage') {
|
|
167
|
+
behavior.push('uses local storage');
|
|
168
|
+
}
|
|
169
|
+
});
|
|
170
|
+
return Array.from(new Set(behavior));
|
|
171
|
+
}
|
|
172
|
+
export function extractNormalizedSignatureJS(node) {
|
|
173
|
+
if (t.isFunctionDeclaration(node) || t.isFunctionExpression(node)) {
|
|
174
|
+
return normalizeFunction(node);
|
|
175
|
+
}
|
|
176
|
+
if (t.isArrowFunctionExpression(node)) {
|
|
177
|
+
return normalizeArrowFunction(node);
|
|
178
|
+
}
|
|
179
|
+
if (t.isClassDeclaration(node)) {
|
|
180
|
+
return normalizeClass(node);
|
|
181
|
+
}
|
|
182
|
+
return '';
|
|
183
|
+
}
|
|
184
|
+
export function computeSemanticHashJs(row) {
|
|
185
|
+
const signature = extractNormalizedSignatureJS(row.node);
|
|
186
|
+
const behavior = inferBehaviorFromJS(row.node);
|
|
187
|
+
const meta = row.meta || {};
|
|
188
|
+
const stable = {
|
|
189
|
+
name: row.name,
|
|
190
|
+
type: row.type,
|
|
191
|
+
description: row.description ?? null,
|
|
192
|
+
signature,
|
|
193
|
+
behavior: behavior.sort(),
|
|
194
|
+
sideEffects: [
|
|
195
|
+
...(meta.sideEffects ?? []),
|
|
196
|
+
].sort(),
|
|
197
|
+
hooks: [...(meta.hooks ?? [])].sort(),
|
|
198
|
+
};
|
|
199
|
+
const stableStr = JSON.stringify(stable);
|
|
200
|
+
return [createHash('sha256').update(stableStr).digest('hex'), stableStr];
|
|
201
|
+
}
|
|
@@ -15,8 +15,8 @@ export async function upsertSymbols(pool, rows, embeddings) {
|
|
|
15
15
|
const actor = process.env.GITHUB_USERNAME?.trim() || 'LorryIsLuRui';
|
|
16
16
|
await pool.query(getSymbolsTableSQL()); // 确保表存在
|
|
17
17
|
const sql = `
|
|
18
|
-
INSERT INTO ${env.mysqlSymbolsTable} (name, type, category, path, description, content, meta, insert_user, updated_user, embedding)
|
|
19
|
-
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
18
|
+
INSERT INTO ${env.mysqlSymbolsTable} (name, type, category, path, description, content, meta, insert_user, updated_user, embedding, semantic_hash, file_hash)
|
|
19
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
20
20
|
ON DUPLICATE KEY UPDATE
|
|
21
21
|
type = VALUES(type),
|
|
22
22
|
category = VALUES(category),
|
|
@@ -24,7 +24,9 @@ export async function upsertSymbols(pool, rows, embeddings) {
|
|
|
24
24
|
content = VALUES(content),
|
|
25
25
|
meta = VALUES(meta),
|
|
26
26
|
updated_user = VALUES(updated_user),
|
|
27
|
-
embedding = CASE WHEN VALUES(embedding) IS NOT NULL THEN VALUES(embedding) ELSE embedding END
|
|
27
|
+
embedding = CASE WHEN VALUES(embedding) IS NOT NULL THEN VALUES(embedding) ELSE embedding END,
|
|
28
|
+
semantic_hash = VALUES(semantic_hash),
|
|
29
|
+
file_hash = VALUES(file_hash)
|
|
28
30
|
`;
|
|
29
31
|
const conn = await pool.getConnection();
|
|
30
32
|
try {
|
|
@@ -44,6 +46,8 @@ export async function upsertSymbols(pool, rows, embeddings) {
|
|
|
44
46
|
actor,
|
|
45
47
|
actor,
|
|
46
48
|
embJson,
|
|
49
|
+
r.semantic_hash,
|
|
50
|
+
r.file_hash,
|
|
47
51
|
]);
|
|
48
52
|
}
|
|
49
53
|
await conn.commit();
|