@lorrylurui/code-intelligence-mcp 1.1.15 → 2.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +74 -3
- package/dist/cli/ci-index-cli.js +85 -0
- package/dist/cli/ci-index.js +76 -0
- package/dist/cli/detect-duplicates.js +1 -6
- package/dist/cli/embedding-worker-cli.js +35 -0
- package/dist/cli/index-codebase-cli.js +64 -0
- package/dist/cli/index-codebase.js +5 -4
- package/dist/config/env.js +53 -81
- package/dist/config/symbolStatus.js +8 -0
- package/dist/db/mysql.js +3 -6
- package/dist/db/postgres.js +13 -0
- package/dist/db/schema.js +41 -19
- package/dist/indexer/astNormalizer.js +201 -0
- package/dist/indexer/babelParser.js +40 -15
- package/dist/indexer/categoryClassifier.js +129 -0
- package/dist/indexer/embedText.js +9 -7
- package/dist/indexer/heuristics.js +42 -23
- package/dist/indexer/indexProject.js +146 -56
- package/dist/indexer/jsAstNormalizer.js +201 -0
- package/dist/indexer/persistSymbols.js +49 -24
- package/dist/indexer/tsAstNormalizer.js +363 -0
- package/dist/prompts/reusableCodeAdvisorPrompt.js +21 -8
- package/dist/repositories/symbolRepository.js +53 -46
- package/dist/services/embeddingQueue.js +57 -0
- package/dist/services/reindex.js +90 -43
- package/dist/tools/getSymbolDetail.js +3 -1
- package/dist/tools/incUsage.js +12 -3
- package/dist/tools/reindex.js +3 -1
- package/dist/tools/searchByStructure.js +6 -2
- package/dist/tools/searchSymbols.js +18 -4
- package/dist/workers/embeddingWorker.js +145 -0
- package/package.json +10 -5
|
@@ -7,8 +7,10 @@ import { join, resolve } from 'node:path';
|
|
|
7
7
|
import { Node, Project } from 'ts-morph';
|
|
8
8
|
import { readFileSync, existsSync } from 'node:fs';
|
|
9
9
|
import { extractInterfaceOrTypeMeta, extractMetaFromCallable, } from './extractMeta.js';
|
|
10
|
-
import { getLeadingDocDescription, getRelativePathForDisplay, hasJsxInNode,
|
|
10
|
+
import { getLeadingDocDescription, getRelativePathForDisplay, hasJsxInNode, isHookLike, isTsxFile, } from './heuristics.js';
|
|
11
11
|
import { parseJsFile } from './babelParser.js';
|
|
12
|
+
import { computeFileHash, computeSemanticHash } from './tsAstNormalizer.js';
|
|
13
|
+
const CALLERS_LIMIT = 20;
|
|
12
14
|
/** 判断文件类型 */
|
|
13
15
|
function isJsFile(filePath) {
|
|
14
16
|
return filePath.endsWith('.js') || filePath.endsWith('.jsx');
|
|
@@ -59,39 +61,57 @@ function resolveExportName(exportName, decl) {
|
|
|
59
61
|
function processDeclaration(exportName, decl, sf, projectRoot) {
|
|
60
62
|
const filePath = sf.getFilePath();
|
|
61
63
|
const relPath = getRelativePathForDisplay(projectRoot, filePath);
|
|
62
|
-
const category = inferCategoryFromPath(filePath);
|
|
63
64
|
const name = resolveExportName(exportName, decl);
|
|
64
65
|
const description = getLeadingDocDescription(decl) ?? null;
|
|
65
66
|
if (Node.isInterfaceDeclaration(decl) ||
|
|
66
67
|
Node.isTypeAliasDeclaration(decl)) {
|
|
67
68
|
const meta = extractInterfaceOrTypeMeta(decl);
|
|
69
|
+
const type = Node.isInterfaceDeclaration(decl) ? 'interface' : 'type';
|
|
70
|
+
const [semantic_hash, stableStr] = computeSemanticHash({
|
|
71
|
+
name,
|
|
72
|
+
type,
|
|
73
|
+
description,
|
|
74
|
+
meta,
|
|
75
|
+
node: decl,
|
|
76
|
+
});
|
|
68
77
|
return {
|
|
69
78
|
name,
|
|
70
|
-
type
|
|
71
|
-
category,
|
|
79
|
+
type,
|
|
80
|
+
category: '',
|
|
72
81
|
path: relPath,
|
|
73
82
|
description,
|
|
74
|
-
content:
|
|
83
|
+
content: stableStr,
|
|
75
84
|
meta,
|
|
85
|
+
file_hash: computeFileHash(sf.getFullText()),
|
|
86
|
+
semantic_hash,
|
|
76
87
|
};
|
|
77
88
|
}
|
|
78
89
|
if (Node.isFunctionDeclaration(decl)) {
|
|
79
90
|
const jsx = isTsxFile(filePath) && hasJsxInNode(decl);
|
|
80
|
-
const type =
|
|
81
|
-
? '
|
|
82
|
-
:
|
|
83
|
-
? '
|
|
84
|
-
: '
|
|
91
|
+
const type = isHookLike(name)
|
|
92
|
+
? 'hook'
|
|
93
|
+
: jsx
|
|
94
|
+
? 'component'
|
|
95
|
+
: 'function';
|
|
85
96
|
const raw = extractMetaFromCallable(decl);
|
|
86
97
|
const meta = mergeCallableMeta(type, raw);
|
|
98
|
+
const [semantic_hash, stableStr] = computeSemanticHash({
|
|
99
|
+
name,
|
|
100
|
+
type,
|
|
101
|
+
description,
|
|
102
|
+
meta,
|
|
103
|
+
node: decl,
|
|
104
|
+
});
|
|
87
105
|
return {
|
|
88
106
|
name,
|
|
89
107
|
type,
|
|
90
|
-
category,
|
|
108
|
+
category: '',
|
|
91
109
|
path: relPath,
|
|
92
110
|
description,
|
|
93
|
-
content:
|
|
111
|
+
content: stableStr,
|
|
94
112
|
meta,
|
|
113
|
+
semantic_hash,
|
|
114
|
+
file_hash: computeFileHash(sf.getFullText()),
|
|
95
115
|
};
|
|
96
116
|
}
|
|
97
117
|
if (Node.isVariableDeclaration(decl)) {
|
|
@@ -112,38 +132,57 @@ function processDeclaration(exportName, decl, sf, projectRoot) {
|
|
|
112
132
|
if (!callable)
|
|
113
133
|
return null;
|
|
114
134
|
const jsx = isTsxFile(filePath) && hasJsxInNode(callable);
|
|
115
|
-
const type =
|
|
116
|
-
? '
|
|
117
|
-
:
|
|
118
|
-
? '
|
|
119
|
-
: '
|
|
135
|
+
const type = isHookLike(name)
|
|
136
|
+
? 'hook'
|
|
137
|
+
: jsx
|
|
138
|
+
? 'component'
|
|
139
|
+
: 'function';
|
|
120
140
|
const raw = extractMetaFromCallable(callable);
|
|
121
141
|
const meta = mergeCallableMeta(type, raw);
|
|
142
|
+
const [semantic_hash, stableStr] = computeSemanticHash({
|
|
143
|
+
name,
|
|
144
|
+
type,
|
|
145
|
+
description,
|
|
146
|
+
meta,
|
|
147
|
+
node: callable,
|
|
148
|
+
});
|
|
122
149
|
return {
|
|
123
150
|
name,
|
|
124
151
|
type,
|
|
125
|
-
category,
|
|
152
|
+
category: '',
|
|
126
153
|
path: relPath,
|
|
127
154
|
description,
|
|
128
|
-
content:
|
|
155
|
+
content: stableStr,
|
|
129
156
|
meta,
|
|
157
|
+
semantic_hash,
|
|
158
|
+
file_hash: computeFileHash(sf.getFullText()),
|
|
130
159
|
};
|
|
131
160
|
}
|
|
132
161
|
if (Node.isClassDeclaration(decl)) {
|
|
133
162
|
// 轻量:仅将 class 记为 util(后续可扩展为带 JSX 的组件类)
|
|
163
|
+
const type = 'class';
|
|
164
|
+
const [semantic_hash, stableStr] = computeSemanticHash({
|
|
165
|
+
name,
|
|
166
|
+
type,
|
|
167
|
+
description,
|
|
168
|
+
meta: { kind: type },
|
|
169
|
+
node: decl,
|
|
170
|
+
});
|
|
134
171
|
return {
|
|
135
172
|
name,
|
|
136
|
-
type
|
|
137
|
-
category,
|
|
173
|
+
type,
|
|
174
|
+
category: '',
|
|
138
175
|
path: relPath,
|
|
139
176
|
description,
|
|
140
|
-
content:
|
|
141
|
-
meta: { kind:
|
|
177
|
+
content: stableStr,
|
|
178
|
+
meta: { kind: type },
|
|
179
|
+
semantic_hash,
|
|
180
|
+
file_hash: computeFileHash(sf.getFullText()),
|
|
142
181
|
};
|
|
143
182
|
}
|
|
144
183
|
return null;
|
|
145
184
|
}
|
|
146
|
-
const DEFAULT_IGNORE = [
|
|
185
|
+
export const DEFAULT_IGNORE = [
|
|
147
186
|
'**/node_modules/**',
|
|
148
187
|
'**/dist/**',
|
|
149
188
|
'**/.git/**',
|
|
@@ -197,6 +236,23 @@ export async function indexProject(opts) {
|
|
|
197
236
|
for (const sf of project.getSourceFiles()) {
|
|
198
237
|
const relPath = getRelativePathForDisplay(projectRoot, sf.getFilePath());
|
|
199
238
|
const exported = sf.getExportedDeclarations();
|
|
239
|
+
// 只检索export的代码,减少噪音
|
|
240
|
+
// 如果要检索未export的,需要考虑
|
|
241
|
+
/**
|
|
242
|
+
* 类型一:真正的内部实现,不应该复用
|
|
243
|
+
function _buildSqlFragment() {} // 和模块强耦合,外部用不了
|
|
244
|
+
const __validateInternal = () => {} // 私有约定(下划线前缀)
|
|
245
|
+
→ 索引了也没用,反而是噪音
|
|
246
|
+
|
|
247
|
+
类型二:工具函数,只是作者忘了 export / 懒得 export
|
|
248
|
+
function debounce(fn, ms) {} // 通用,完全可以复用
|
|
249
|
+
function formatCurrency(n) {} // 通用,其他地方也需要
|
|
250
|
+
→ 索引有价值,还能反向提示"这个应该被 export"
|
|
251
|
+
|
|
252
|
+
类型三:文件内共享但跨文件无意义
|
|
253
|
+
function getLocalConfig() {} // 依赖闭包变量,移出去就坏了
|
|
254
|
+
→ 索引意义不大
|
|
255
|
+
*/
|
|
200
256
|
for (const [exportName, decls] of exported) {
|
|
201
257
|
for (const decl of decls) {
|
|
202
258
|
const row = processDeclaration(exportName, decl, sf, projectRoot);
|
|
@@ -205,7 +261,11 @@ export async function indexProject(opts) {
|
|
|
205
261
|
// 建立符号映射
|
|
206
262
|
const key = `${row.name}|${row.path}`;
|
|
207
263
|
if (!symbolMap.has(key)) {
|
|
208
|
-
symbolMap.set(key, {
|
|
264
|
+
symbolMap.set(key, {
|
|
265
|
+
name: row.name,
|
|
266
|
+
path: row.path,
|
|
267
|
+
exports: new Set(),
|
|
268
|
+
});
|
|
209
269
|
}
|
|
210
270
|
symbolMap.get(key).exports.add(exportName);
|
|
211
271
|
}
|
|
@@ -223,7 +283,11 @@ export async function indexProject(opts) {
|
|
|
223
283
|
for (const row of rows) {
|
|
224
284
|
const key = `${row.name}|${row.path}`;
|
|
225
285
|
if (!symbolMap.has(key)) {
|
|
226
|
-
symbolMap.set(key, {
|
|
286
|
+
symbolMap.set(key, {
|
|
287
|
+
name: row.name,
|
|
288
|
+
path: row.path,
|
|
289
|
+
exports: new Set(),
|
|
290
|
+
});
|
|
227
291
|
}
|
|
228
292
|
symbolMap.get(key).exports.add(row.name);
|
|
229
293
|
}
|
|
@@ -248,7 +312,6 @@ export async function indexProject(opts) {
|
|
|
248
312
|
* 分析调用关系,填充每个符号的 meta.callers 和 meta.callees
|
|
249
313
|
*/
|
|
250
314
|
function analyzeRelations(project, symbolMap, projectRoot, rows) {
|
|
251
|
-
// 构造快速查找:exportName -> [ {name, path} ]
|
|
252
315
|
const exportToSymbol = new Map();
|
|
253
316
|
for (const [key, value] of symbolMap) {
|
|
254
317
|
for (const exp of value.exports) {
|
|
@@ -257,52 +320,73 @@ function analyzeRelations(project, symbolMap, projectRoot, rows) {
|
|
|
257
320
|
exportToSymbol.set(exp, list);
|
|
258
321
|
}
|
|
259
322
|
}
|
|
260
|
-
|
|
261
|
-
const
|
|
262
|
-
const calleesMap = new Map(); // key = "name|path" -> set of callees
|
|
323
|
+
const callersMap = new Map();
|
|
324
|
+
const calleesMap = new Map();
|
|
263
325
|
for (const sf of project.getSourceFiles()) {
|
|
264
326
|
const filePath = sf.getFilePath();
|
|
265
327
|
const relPath = getRelativePathForDisplay(projectRoot, filePath);
|
|
266
|
-
// 获取当前文件导出的符号
|
|
267
328
|
const exported = sf.getExportedDeclarations();
|
|
268
|
-
const fileExportNames = new Set();
|
|
269
|
-
for (const [name, decls] of exported) {
|
|
270
|
-
fileExportNames.add(name);
|
|
271
|
-
}
|
|
272
|
-
// 遍历 AST 查找调用
|
|
273
329
|
sf.forEachDescendant((node) => {
|
|
274
|
-
//
|
|
330
|
+
// 函数调用
|
|
275
331
|
if (Node.isCallExpression(node)) {
|
|
276
332
|
const expr = node.getExpression();
|
|
277
333
|
const name = Node.isIdentifier(expr) ? expr.getText() : null;
|
|
278
334
|
if (name && exportToSymbol.has(name)) {
|
|
279
335
|
const targets = exportToSymbol.get(name);
|
|
280
|
-
// 当前文件是谁在调用
|
|
281
336
|
for (const [expName, decls] of exported) {
|
|
282
|
-
for (const
|
|
337
|
+
for (const _decl of decls) {
|
|
283
338
|
const callerKey = `${expName}|${relPath}`;
|
|
284
339
|
for (const target of targets) {
|
|
285
340
|
// callees: 我调用了谁
|
|
286
|
-
const calleeSet = calleesMap.get(callerKey) ||
|
|
287
|
-
|
|
341
|
+
const calleeSet = calleesMap.get(callerKey) ||
|
|
342
|
+
new Set();
|
|
343
|
+
calleeSet.add(`${target.name}|${target.path}`); // ✅ 字符串天然去重
|
|
288
344
|
calleesMap.set(callerKey, calleeSet);
|
|
289
|
-
// callers: 谁调用了我
|
|
290
345
|
const callerSet = callersMap.get(`${target.name}|${target.path}`) || new Set();
|
|
291
|
-
callerSet.add({
|
|
346
|
+
callerSet.add(`${expName}|${relPath}`);
|
|
292
347
|
callersMap.set(`${target.name}|${target.path}`, callerSet);
|
|
293
348
|
}
|
|
294
349
|
}
|
|
295
350
|
}
|
|
296
351
|
}
|
|
297
352
|
}
|
|
298
|
-
//
|
|
353
|
+
// Import 导入关系(含 import type)
|
|
299
354
|
if (Node.isImportDeclaration(node)) {
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
if (
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
355
|
+
// 1. 解析被导入文件的路径
|
|
356
|
+
const importedSf = node.getModuleSpecifierSourceFile();
|
|
357
|
+
if (!importedSf)
|
|
358
|
+
return; // 无法解析(第三方包等)跳过
|
|
359
|
+
const importedRelPath = getRelativePathForDisplay(projectRoot, importedSf.getFilePath());
|
|
360
|
+
// 2. 收集本条 import 引入了哪些具名符号
|
|
361
|
+
const namedBindings = node.getNamedImports(); // { SymbolRepository }
|
|
362
|
+
const defaultImport = node.getDefaultImport(); // import Foo from ...
|
|
363
|
+
const importedNames = [
|
|
364
|
+
...namedBindings.map((n) => n.getName()),
|
|
365
|
+
...(defaultImport ? [defaultImport.getText()] : []),
|
|
366
|
+
];
|
|
367
|
+
if (importedNames.length === 0)
|
|
368
|
+
return;
|
|
369
|
+
// 3. 当前文件的所有导出符号作为 caller
|
|
370
|
+
for (const [expName] of exported) {
|
|
371
|
+
const callerKey = `${expName}|${relPath}`;
|
|
372
|
+
for (const importedName of importedNames) {
|
|
373
|
+
// 4. 在 symbolMap 里找到被导入符号对应的 row
|
|
374
|
+
// key 格式:`name|path`,name 是 resolveExportName 后的真实名
|
|
375
|
+
// exportToSymbol 里存的是 exportName(可能是 'default'),
|
|
376
|
+
// 所以同时按 importedName 和路径在 symbolMap 里直接查
|
|
377
|
+
const targetKey = `${importedName}|${importedRelPath}`;
|
|
378
|
+
const target = symbolMap.get(targetKey);
|
|
379
|
+
if (!target)
|
|
380
|
+
continue;
|
|
381
|
+
// callee:当前文件的导出符号引用了 target
|
|
382
|
+
const calleeSet = calleesMap.get(callerKey) || new Set();
|
|
383
|
+
calleeSet.add(`${target.name}|${target.path}`);
|
|
384
|
+
calleesMap.set(callerKey, calleeSet);
|
|
385
|
+
const callerRefKey = `${target.name}|${target.path}`;
|
|
386
|
+
const callerSet = callersMap.get(callerRefKey) || new Set();
|
|
387
|
+
callerSet.add(`${expName}|${relPath}`);
|
|
388
|
+
callersMap.set(callerRefKey, callerSet);
|
|
389
|
+
}
|
|
306
390
|
}
|
|
307
391
|
}
|
|
308
392
|
});
|
|
@@ -312,13 +396,19 @@ function analyzeRelations(project, symbolMap, projectRoot, rows) {
|
|
|
312
396
|
const key = `${row.name}|${row.path}`;
|
|
313
397
|
const callers = callersMap.get(key);
|
|
314
398
|
const callees = calleesMap.get(key);
|
|
315
|
-
if (callers
|
|
316
|
-
row.meta =
|
|
317
|
-
|
|
399
|
+
if (callers?.size) {
|
|
400
|
+
row.meta.callers = [...callers].slice(0, CALLERS_LIMIT).map((s) => {
|
|
401
|
+
// ✅ 反序列化回对象
|
|
402
|
+
const [name, ...pathParts] = s.split('|');
|
|
403
|
+
return { name, path: pathParts.join('|') };
|
|
404
|
+
});
|
|
318
405
|
}
|
|
319
|
-
if (callees
|
|
320
|
-
row.meta =
|
|
321
|
-
|
|
406
|
+
if (callees?.size) {
|
|
407
|
+
row.meta.callees = [...callees].slice(0, CALLERS_LIMIT).map((s) => {
|
|
408
|
+
const [name, ...pathParts] = s.split('|');
|
|
409
|
+
// path:为了防止路径里万一含有 | 字符时截断错误。
|
|
410
|
+
return { name, path: pathParts.join('|') };
|
|
411
|
+
});
|
|
322
412
|
}
|
|
323
413
|
}
|
|
324
414
|
console.error(`[analyzeRelations] processed ${rows.length} symbols`);
|
|
@@ -0,0 +1,201 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* astNormalizerJs.ts
|
|
3
|
+
*
|
|
4
|
+
* JS/JSX 文件的语义签名提取,对应 ts-morph 版的 extractNormalizedSignature。
|
|
5
|
+
* Babel AST 没有类型信息,通过以下策略补偿:
|
|
6
|
+
* 1. 有 JSDoc → 从 @param / @returns 提取类型
|
|
7
|
+
* 2. 无 JSDoc → 参数类型标记为 $unknown,签名仍可区分结构形态
|
|
8
|
+
*
|
|
9
|
+
* 与 TS 版行为对齐:
|
|
10
|
+
* - 参数名 → 丢弃,替换为 $p
|
|
11
|
+
* - 函数体 → 替换为 {}
|
|
12
|
+
* - 默认值存在性保留(=$default),默认值内容丢弃
|
|
13
|
+
* - rest 参数保留 ... 前缀
|
|
14
|
+
* - 解构参数保留结构形态({} / []),无法展开内部字段
|
|
15
|
+
*/
|
|
16
|
+
import { createHash } from 'node:crypto';
|
|
17
|
+
import * as t from '@babel/types';
|
|
18
|
+
// ─────────────────────────────────────────────
|
|
19
|
+
// JSDoc 类型提取
|
|
20
|
+
// ─────────────────────────────────────────────
|
|
21
|
+
function getJSDoc(node) {
|
|
22
|
+
const comments = node.leadingComments;
|
|
23
|
+
if (!comments || comments.length === 0)
|
|
24
|
+
return null;
|
|
25
|
+
const jsdoc = comments.find((c) => c.value.includes('@param') || c.value.includes('@returns'));
|
|
26
|
+
return jsdoc ? jsdoc.value : null;
|
|
27
|
+
}
|
|
28
|
+
function parseJSDoc(jsdoc) {
|
|
29
|
+
const params = {};
|
|
30
|
+
let returnType;
|
|
31
|
+
const paramRegex = /@param\s+\{([^}]+)\}\s+(\w+)/g;
|
|
32
|
+
const returnRegex = /@returns?\s+\{([^}]+)\}/;
|
|
33
|
+
let match;
|
|
34
|
+
while ((match = paramRegex.exec(jsdoc))) {
|
|
35
|
+
const [, type, name] = match;
|
|
36
|
+
params[name] = normalizeType(type);
|
|
37
|
+
}
|
|
38
|
+
const returnMatch = jsdoc.match(returnRegex);
|
|
39
|
+
if (returnMatch) {
|
|
40
|
+
returnType = normalizeType(returnMatch[1]);
|
|
41
|
+
}
|
|
42
|
+
return { params, returnType };
|
|
43
|
+
}
|
|
44
|
+
function normalizeType(type) {
|
|
45
|
+
type = type.trim().toLowerCase();
|
|
46
|
+
if (type.includes('string'))
|
|
47
|
+
return 'string';
|
|
48
|
+
if (type.includes('number'))
|
|
49
|
+
return 'number';
|
|
50
|
+
if (type.includes('boolean'))
|
|
51
|
+
return 'boolean';
|
|
52
|
+
if (type.includes('array'))
|
|
53
|
+
return 'array';
|
|
54
|
+
if (type.includes('object'))
|
|
55
|
+
return 'object';
|
|
56
|
+
if (type.includes('promise')) {
|
|
57
|
+
const inner = type.match(/promise<(.+)>/);
|
|
58
|
+
return inner ? `Promise<${normalizeType(inner[1])}>` : 'Promise';
|
|
59
|
+
}
|
|
60
|
+
return 'unknown';
|
|
61
|
+
}
|
|
62
|
+
function normalizeParam(param, fallbackName, jsdoc) {
|
|
63
|
+
if (param.type === 'Identifier') {
|
|
64
|
+
const name = param.name;
|
|
65
|
+
const type = jsdoc?.params[name] ?? 'unknown';
|
|
66
|
+
return `${name}:${type}`;
|
|
67
|
+
}
|
|
68
|
+
if (param.type === 'ObjectPattern') {
|
|
69
|
+
const props = param.properties.map((p) => {
|
|
70
|
+
if (p.key?.name) {
|
|
71
|
+
const name = p.key.name;
|
|
72
|
+
const type = jsdoc?.params[name] ?? 'unknown';
|
|
73
|
+
return `${name}:${type}`;
|
|
74
|
+
}
|
|
75
|
+
return 'unknown';
|
|
76
|
+
});
|
|
77
|
+
return `{${props.sort().join(';')}}`;
|
|
78
|
+
}
|
|
79
|
+
return `${fallbackName}:unknown`;
|
|
80
|
+
}
|
|
81
|
+
function inferTypeFromExpression(expr) {
|
|
82
|
+
if (t.isStringLiteral(expr))
|
|
83
|
+
return 'string';
|
|
84
|
+
if (t.isNumericLiteral(expr))
|
|
85
|
+
return 'number';
|
|
86
|
+
if (t.isBooleanLiteral(expr))
|
|
87
|
+
return 'boolean';
|
|
88
|
+
if (t.isArrayExpression(expr))
|
|
89
|
+
return 'array';
|
|
90
|
+
if (t.isObjectExpression(expr))
|
|
91
|
+
return 'object';
|
|
92
|
+
return 'unknown';
|
|
93
|
+
}
|
|
94
|
+
function inferReturnType(node) {
|
|
95
|
+
if (!node.body)
|
|
96
|
+
return 'unknown';
|
|
97
|
+
if (t.isBlockStatement(node.body)) {
|
|
98
|
+
const returnStmt = node.body.body.find(t.isReturnStatement);
|
|
99
|
+
if (returnStmt && returnStmt.argument) {
|
|
100
|
+
return inferTypeFromExpression(returnStmt.argument);
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
if (!t.isBlockStatement(node.body)) {
|
|
104
|
+
return inferTypeFromExpression(node.body);
|
|
105
|
+
}
|
|
106
|
+
return 'unknown';
|
|
107
|
+
}
|
|
108
|
+
function normalizeFunction(node) {
|
|
109
|
+
const jsdocText = getJSDoc(node);
|
|
110
|
+
const jsdoc = jsdocText ? parseJSDoc(jsdocText) : undefined;
|
|
111
|
+
const params = node.params.map((p, i) => normalizeParam(p, `$p${i}`, jsdoc));
|
|
112
|
+
const returnType = jsdoc?.returnType ?? inferReturnType(node);
|
|
113
|
+
return `fn(${params.join(',')})=>${returnType}`;
|
|
114
|
+
}
|
|
115
|
+
function normalizeArrowFunction(node) {
|
|
116
|
+
const jsdocText = getJSDoc(node);
|
|
117
|
+
const jsdoc = jsdocText ? parseJSDoc(jsdocText) : undefined;
|
|
118
|
+
const params = node.params.map((p, i) => normalizeParam(p, `$p${i}`, jsdoc));
|
|
119
|
+
// const params = node.params.map((p, i) => normalizeParam(p, `$p${i}`));
|
|
120
|
+
const returnType = jsdoc?.returnType ?? inferReturnType(node);
|
|
121
|
+
return `fn(${params.join(',')})=>${returnType}`;
|
|
122
|
+
}
|
|
123
|
+
function normalizeClass(node) {
|
|
124
|
+
const methods = node.body.body
|
|
125
|
+
.filter(t.isClassMethod)
|
|
126
|
+
.map((m) => {
|
|
127
|
+
const name = t.isIdentifier(m.key) ? m.key.name : 'unknown';
|
|
128
|
+
return `${name}():unknown`;
|
|
129
|
+
})
|
|
130
|
+
.sort();
|
|
131
|
+
return `class{${methods.join(';')}}`;
|
|
132
|
+
}
|
|
133
|
+
function traverseNode(node, cb) {
|
|
134
|
+
cb(node);
|
|
135
|
+
for (const key in node) {
|
|
136
|
+
const value = node[key];
|
|
137
|
+
if (Array.isArray(value)) {
|
|
138
|
+
value.forEach((v) => {
|
|
139
|
+
if (v && typeof v.type === 'string') {
|
|
140
|
+
traverseNode(v, cb);
|
|
141
|
+
}
|
|
142
|
+
});
|
|
143
|
+
}
|
|
144
|
+
else if (value && typeof value.type === 'string') {
|
|
145
|
+
traverseNode(value, cb);
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
export function inferBehaviorFromJS(node) {
|
|
150
|
+
const behavior = [];
|
|
151
|
+
// 遍历 AST
|
|
152
|
+
traverseNode(node, (n) => {
|
|
153
|
+
// fetch / axios
|
|
154
|
+
if (t.isCallExpression(n) && t.isIdentifier(n.callee)) {
|
|
155
|
+
const name = n.callee.name.toLowerCase();
|
|
156
|
+
if (name.includes('fetch') || name.includes('axios')) {
|
|
157
|
+
behavior.push('performs network request');
|
|
158
|
+
}
|
|
159
|
+
if (name.includes('settimeout')) {
|
|
160
|
+
behavior.push('uses timer');
|
|
161
|
+
}
|
|
162
|
+
}
|
|
163
|
+
// localStorage
|
|
164
|
+
if (t.isMemberExpression(n) &&
|
|
165
|
+
t.isIdentifier(n.object) &&
|
|
166
|
+
n.object.name === 'localStorage') {
|
|
167
|
+
behavior.push('uses local storage');
|
|
168
|
+
}
|
|
169
|
+
});
|
|
170
|
+
return Array.from(new Set(behavior));
|
|
171
|
+
}
|
|
172
|
+
export function extractNormalizedSignatureJS(node) {
|
|
173
|
+
if (t.isFunctionDeclaration(node) || t.isFunctionExpression(node)) {
|
|
174
|
+
return normalizeFunction(node);
|
|
175
|
+
}
|
|
176
|
+
if (t.isArrowFunctionExpression(node)) {
|
|
177
|
+
return normalizeArrowFunction(node);
|
|
178
|
+
}
|
|
179
|
+
if (t.isClassDeclaration(node)) {
|
|
180
|
+
return normalizeClass(node);
|
|
181
|
+
}
|
|
182
|
+
return '';
|
|
183
|
+
}
|
|
184
|
+
export function computeSemanticHashJs(row) {
|
|
185
|
+
const signature = extractNormalizedSignatureJS(row.node);
|
|
186
|
+
const behavior = inferBehaviorFromJS(row.node);
|
|
187
|
+
const meta = row.meta || {};
|
|
188
|
+
const stable = {
|
|
189
|
+
name: row.name,
|
|
190
|
+
type: row.type,
|
|
191
|
+
description: row.description ?? null,
|
|
192
|
+
signature,
|
|
193
|
+
behavior: behavior.sort(),
|
|
194
|
+
sideEffects: [
|
|
195
|
+
...(meta.sideEffects ?? []),
|
|
196
|
+
].sort(),
|
|
197
|
+
hooks: [...(meta.hooks ?? [])].sort(),
|
|
198
|
+
};
|
|
199
|
+
const stableStr = JSON.stringify(stable);
|
|
200
|
+
return [createHash('sha256').update(stableStr).digest('hex'), stableStr];
|
|
201
|
+
}
|
|
@@ -1,10 +1,12 @@
|
|
|
1
1
|
import { env } from '../config/env.js';
|
|
2
|
-
import {
|
|
2
|
+
import { getAllTableSQLs } from '../db/schema.js';
|
|
3
|
+
import { SYMBOL_STATUS } from '../config/symbolStatus.js';
|
|
3
4
|
/**
|
|
4
5
|
* 依赖表上 `(path, name)` 唯一键:新行插入,已存在则更新类型/描述/内容与 meta;**不**修改 `usage_count`。
|
|
5
6
|
* @param rows 来自 `indexProject`;空数组时立即返回,不开启事务。
|
|
6
7
|
* @param embeddings 与 `rows` 等长;某项为 `null` 表示本行不更新已有 `embedding`(新行则写入 NULL)。
|
|
7
|
-
*
|
|
8
|
+
* - 有值 → status 置为 online(2)
|
|
9
|
+
* - null → 新行写 pending(1),已有行保持原 status
|
|
8
10
|
*/
|
|
9
11
|
export async function upsertSymbols(pool, rows, embeddings) {
|
|
10
12
|
if (rows.length === 0)
|
|
@@ -12,28 +14,48 @@ export async function upsertSymbols(pool, rows, embeddings) {
|
|
|
12
14
|
if (embeddings && embeddings.length !== rows.length) {
|
|
13
15
|
throw new Error('upsertSymbols: embeddings length must match rows');
|
|
14
16
|
}
|
|
15
|
-
const actor = process.env.GITHUB_USERNAME?.trim() || '
|
|
16
|
-
await pool.
|
|
17
|
-
const sql = `
|
|
18
|
-
INSERT INTO ${env.mysqlSymbolsTable} (name, type, category, path, description, content, meta, insert_user, updated_user, embedding)
|
|
19
|
-
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
20
|
-
ON DUPLICATE KEY UPDATE
|
|
21
|
-
type = VALUES(type),
|
|
22
|
-
category = VALUES(category),
|
|
23
|
-
description = VALUES(description),
|
|
24
|
-
content = VALUES(content),
|
|
25
|
-
meta = VALUES(meta),
|
|
26
|
-
updated_user = VALUES(updated_user),
|
|
27
|
-
embedding = CASE WHEN VALUES(embedding) IS NOT NULL THEN VALUES(embedding) ELSE embedding END
|
|
28
|
-
`;
|
|
29
|
-
const conn = await pool.getConnection();
|
|
17
|
+
const actor = process.env.GITHUB_USERNAME?.trim() || 'system';
|
|
18
|
+
const client = await pool.connect();
|
|
30
19
|
try {
|
|
31
|
-
|
|
20
|
+
// 确保 extension + 表 + 基础索引存在
|
|
21
|
+
for (const sql of getAllTableSQLs()) {
|
|
22
|
+
await client.query(sql);
|
|
23
|
+
}
|
|
24
|
+
await client.query('BEGIN');
|
|
25
|
+
const t = env.symbolsTable;
|
|
26
|
+
const sql = `
|
|
27
|
+
INSERT INTO ${t}
|
|
28
|
+
(name, type, category, path, description, content, meta,
|
|
29
|
+
insert_user, updated_user, embedding, semantic_hash, file_hash, status)
|
|
30
|
+
VALUES ($1, $2, $3, $4, $5, $6, $7::jsonb, $8, $9, $10::vector, $11, $12, $13)
|
|
31
|
+
ON CONFLICT (path, name) DO UPDATE SET
|
|
32
|
+
type = EXCLUDED.type,
|
|
33
|
+
category = EXCLUDED.category,
|
|
34
|
+
description = EXCLUDED.description,
|
|
35
|
+
content = EXCLUDED.content,
|
|
36
|
+
meta = EXCLUDED.meta,
|
|
37
|
+
updated_user = EXCLUDED.updated_user,
|
|
38
|
+
embedding = CASE
|
|
39
|
+
WHEN EXCLUDED.embedding IS NOT NULL THEN EXCLUDED.embedding
|
|
40
|
+
WHEN EXCLUDED.semantic_hash != ${t}.semantic_hash THEN NULL
|
|
41
|
+
ELSE ${t}.embedding
|
|
42
|
+
END,
|
|
43
|
+
semantic_hash = EXCLUDED.semantic_hash,
|
|
44
|
+
file_hash = EXCLUDED.file_hash,
|
|
45
|
+
status = CASE
|
|
46
|
+
WHEN EXCLUDED.embedding IS NOT NULL THEN ${SYMBOL_STATUS.ONLINE}
|
|
47
|
+
WHEN EXCLUDED.semantic_hash != ${t}.semantic_hash THEN ${SYMBOL_STATUS.PENDING}
|
|
48
|
+
ELSE ${t}.status
|
|
49
|
+
END,
|
|
50
|
+
updated_at = NOW()
|
|
51
|
+
`;
|
|
32
52
|
for (let i = 0; i < rows.length; i++) {
|
|
33
53
|
const r = rows[i];
|
|
34
54
|
const emb = embeddings?.[i];
|
|
35
|
-
|
|
36
|
-
|
|
55
|
+
// pgvector 接受 "[x1,x2,...]" 格式字符串
|
|
56
|
+
const vecStr = emb != null ? `[${emb.join(',')}]` : null;
|
|
57
|
+
const statusVal = vecStr !== null ? SYMBOL_STATUS.ONLINE : SYMBOL_STATUS.PENDING;
|
|
58
|
+
await client.query(sql, [
|
|
37
59
|
r.name,
|
|
38
60
|
r.type,
|
|
39
61
|
r.category,
|
|
@@ -43,16 +65,19 @@ export async function upsertSymbols(pool, rows, embeddings) {
|
|
|
43
65
|
JSON.stringify(r.meta),
|
|
44
66
|
actor,
|
|
45
67
|
actor,
|
|
46
|
-
|
|
68
|
+
vecStr, // $10 → cast as vector, null 时写 NULL
|
|
69
|
+
r.semantic_hash,
|
|
70
|
+
r.file_hash,
|
|
71
|
+
statusVal,
|
|
47
72
|
]);
|
|
48
73
|
}
|
|
49
|
-
await
|
|
74
|
+
await client.query('COMMIT');
|
|
50
75
|
}
|
|
51
76
|
catch (e) {
|
|
52
|
-
await
|
|
77
|
+
await client.query('ROLLBACK');
|
|
53
78
|
throw e;
|
|
54
79
|
}
|
|
55
80
|
finally {
|
|
56
|
-
|
|
81
|
+
client.release();
|
|
57
82
|
}
|
|
58
83
|
}
|