@lorrylurui/code-intelligence-mcp 1.1.14 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +91 -598
- package/dist/cli/ci-index-cli.js +66 -0
- package/dist/cli/ci-index.js +80 -0
- package/dist/cli/detect-duplicates.js +1 -6
- package/dist/cli/embedding-worker-cli.js +35 -0
- package/dist/cli/index-codebase.js +6 -7
- package/dist/config/env.js +3 -102
- package/dist/config/symbolStatus.js +8 -0
- package/dist/db/mysql.js +3 -6
- package/dist/db/schema.js +9 -2
- package/dist/indexer/astNormalizer.js +201 -0
- package/dist/indexer/babelParser.js +257 -28
- package/dist/indexer/categoryClassifier.js +129 -0
- package/dist/indexer/embedText.js +9 -7
- package/dist/indexer/extractMeta.js +7 -2
- package/dist/indexer/heuristics.js +42 -23
- package/dist/indexer/indexProject.js +145 -55
- package/dist/indexer/jsAstNormalizer.js +201 -0
- package/dist/indexer/persistSymbols.js +7 -3
- package/dist/indexer/tsAstNormalizer.js +363 -0
- package/dist/prompts/reusableCodeAdvisorPrompt.js +6 -3
- package/dist/repositories/symbolRepository.js +81 -7
- package/dist/services/embeddingQueue.js +56 -0
- package/dist/services/reindex.js +12 -9
- package/dist/tools/searchByStructure.js +3 -1
- package/dist/tools/searchSymbols.js +14 -3
- package/dist/workers/embeddingWorker.js +100 -0
- package/package.json +7 -4
|
@@ -3,7 +3,9 @@
|
|
|
3
3
|
*/
|
|
4
4
|
import * as babelParser from '@babel/parser';
|
|
5
5
|
import * as bt from '@babel/types';
|
|
6
|
-
import { getRelativePathForDisplay, inferCategoryFromPath } from './heuristics.js';
|
|
6
|
+
import { getRelativePathForDisplay, inferCategoryFromPath, inferCategoryFromName, } from './heuristics.js';
|
|
7
|
+
import { computeFileHash } from './tsAstNormalizer.js';
|
|
8
|
+
import { computeSemanticHashJs } from './jsAstNormalizer.js';
|
|
7
9
|
/** 从 JS 文件内容解析导出的代码块 */
|
|
8
10
|
export function parseJsFile(filePath, content, projectRoot) {
|
|
9
11
|
const out = [];
|
|
@@ -46,7 +48,23 @@ export function parseJsFile(filePath, content, projectRoot) {
|
|
|
46
48
|
out.push(...rows);
|
|
47
49
|
}
|
|
48
50
|
}
|
|
49
|
-
|
|
51
|
+
// 第三轮:更新content、file_hash、semantic_hash 字段
|
|
52
|
+
const outWithTwoHash = out.map((o) => {
|
|
53
|
+
const [semantic_hash, stableStr] = computeSemanticHashJs(o);
|
|
54
|
+
const { name, type, description, meta, category, path } = o;
|
|
55
|
+
return {
|
|
56
|
+
name,
|
|
57
|
+
type,
|
|
58
|
+
category,
|
|
59
|
+
path,
|
|
60
|
+
description,
|
|
61
|
+
meta,
|
|
62
|
+
content: stableStr,
|
|
63
|
+
file_hash: computeFileHash(content),
|
|
64
|
+
semantic_hash,
|
|
65
|
+
};
|
|
66
|
+
});
|
|
67
|
+
return outWithTwoHash;
|
|
50
68
|
}
|
|
51
69
|
/** 处理导出的声明 */
|
|
52
70
|
function processStatement(stmt, filePath, isJsx, projectRoot) {
|
|
@@ -70,10 +88,14 @@ function processStatement(stmt, filePath, isJsx, projectRoot) {
|
|
|
70
88
|
}
|
|
71
89
|
else if (bt.isVariableDeclaration(decl)) {
|
|
72
90
|
for (const declarator of decl.declarations) {
|
|
73
|
-
if (bt.isVariableDeclarator(declarator) &&
|
|
91
|
+
if (bt.isVariableDeclarator(declarator) &&
|
|
92
|
+
declarator.id &&
|
|
93
|
+
bt.isIdentifier(declarator.id)) {
|
|
74
94
|
const name = declarator.id.name;
|
|
75
95
|
const init = declarator.init;
|
|
76
|
-
if (name &&
|
|
96
|
+
if (name &&
|
|
97
|
+
(bt.isArrowFunctionExpression(init) ||
|
|
98
|
+
bt.isFunctionExpression(init))) {
|
|
77
99
|
const fnDecl = arrowToFunction(name, init);
|
|
78
100
|
out.push(createRowFromFunction(name, fnDecl, filePath, projectRoot, isJsx));
|
|
79
101
|
}
|
|
@@ -84,19 +106,26 @@ function processStatement(stmt, filePath, isJsx, projectRoot) {
|
|
|
84
106
|
// 处理 module.exports = xxx
|
|
85
107
|
else if (bt.isExpressionStatement(stmt)) {
|
|
86
108
|
const expr = stmt.expression;
|
|
87
|
-
if (bt.isAssignmentExpression(expr) &&
|
|
109
|
+
if (bt.isAssignmentExpression(expr) &&
|
|
110
|
+
bt.isMemberExpression(expr.left)) {
|
|
88
111
|
const left = expr.left;
|
|
89
112
|
// module.exports = xxx
|
|
90
|
-
if (bt.isIdentifier(left.object) &&
|
|
91
|
-
|
|
113
|
+
if (bt.isIdentifier(left.object) &&
|
|
114
|
+
left.object.name === 'module' &&
|
|
115
|
+
bt.isIdentifier(left.property) &&
|
|
116
|
+
left.property.name === 'exports') {
|
|
92
117
|
const right = expr.right;
|
|
93
118
|
if (bt.isObjectExpression(right)) {
|
|
94
119
|
for (const prop of right.properties) {
|
|
95
|
-
if (bt.isObjectProperty(prop) &&
|
|
120
|
+
if (bt.isObjectProperty(prop) &&
|
|
121
|
+
bt.isIdentifier(prop.key)) {
|
|
96
122
|
const name = prop.key.name;
|
|
97
123
|
const value = prop.value;
|
|
98
|
-
if (bt.isFunctionExpression(value) ||
|
|
99
|
-
|
|
124
|
+
if (bt.isFunctionExpression(value) ||
|
|
125
|
+
bt.isArrowFunctionExpression(value)) {
|
|
126
|
+
const fnDecl = arrowToFunction(name, bt.isArrowFunctionExpression(value)
|
|
127
|
+
? value
|
|
128
|
+
: value);
|
|
100
129
|
out.push(createRowFromFunction(name, fnDecl, filePath, projectRoot, isJsx));
|
|
101
130
|
}
|
|
102
131
|
}
|
|
@@ -112,11 +141,15 @@ function processStatement(stmt, filePath, isJsx, projectRoot) {
|
|
|
112
141
|
}
|
|
113
142
|
}
|
|
114
143
|
// exports.xxx = xxx
|
|
115
|
-
else if (bt.isIdentifier(left.object) &&
|
|
116
|
-
|
|
144
|
+
else if (bt.isIdentifier(left.object) &&
|
|
145
|
+
left.object.name === 'exports') {
|
|
146
|
+
const name = bt.isIdentifier(left.property)
|
|
147
|
+
? left.property.name
|
|
148
|
+
: null;
|
|
117
149
|
if (name) {
|
|
118
150
|
const right = expr.right;
|
|
119
|
-
if (bt.isFunctionExpression(right) ||
|
|
151
|
+
if (bt.isFunctionExpression(right) ||
|
|
152
|
+
bt.isArrowFunctionExpression(right)) {
|
|
120
153
|
const fnDecl = arrowToFunction(name, bt.isArrowFunctionExpression(right) ? right : right);
|
|
121
154
|
out.push(createRowFromFunction(name, fnDecl, filePath, projectRoot, isJsx));
|
|
122
155
|
}
|
|
@@ -166,10 +199,14 @@ function scanAllDeclarations(stmt, filePath, isJsx, projectRoot) {
|
|
|
166
199
|
// 变量声明: const foo = () => {}, const bar = function() {}
|
|
167
200
|
else if (bt.isVariableDeclaration(stmt)) {
|
|
168
201
|
for (const declarator of stmt.declarations) {
|
|
169
|
-
if (bt.isVariableDeclarator(declarator) &&
|
|
202
|
+
if (bt.isVariableDeclarator(declarator) &&
|
|
203
|
+
declarator.id &&
|
|
204
|
+
bt.isIdentifier(declarator.id)) {
|
|
170
205
|
const name = declarator.id.name;
|
|
171
206
|
const init = declarator.init;
|
|
172
|
-
if (name &&
|
|
207
|
+
if (name &&
|
|
208
|
+
(bt.isArrowFunctionExpression(init) ||
|
|
209
|
+
bt.isFunctionExpression(init))) {
|
|
173
210
|
const fnDecl = arrowToFunction(name, init);
|
|
174
211
|
out.push(createRowFromFunction(name, fnDecl, filePath, projectRoot, isJsx));
|
|
175
212
|
}
|
|
@@ -197,24 +234,24 @@ function arrowToFunction(name, arrow) {
|
|
|
197
234
|
}
|
|
198
235
|
function createRowFromFunction(name, decl, filePath, projectRoot, isJsx) {
|
|
199
236
|
const relPath = getRelativePathForDisplay(projectRoot, filePath);
|
|
200
|
-
const category = inferCategoryFromPath(filePath);
|
|
237
|
+
const category = inferCategoryFromPath(filePath) || inferCategoryFromName(name);
|
|
201
238
|
// 检测是否有 JSX
|
|
202
239
|
const hasJsx = isJsx || containsJsx(decl);
|
|
203
240
|
// 判断类型:
|
|
204
241
|
// 1. 有 JSX = component
|
|
205
|
-
// 2. 名字包含
|
|
242
|
+
// 2. 名字包含 use = hook
|
|
206
243
|
// 3. 大写开头(JSX 组件约定)= component
|
|
207
|
-
// 4. 其他 =
|
|
208
|
-
const type =
|
|
209
|
-
? '
|
|
210
|
-
: name
|
|
211
|
-
? '
|
|
212
|
-
:
|
|
213
|
-
? 'component'
|
|
214
|
-
: 'util';
|
|
244
|
+
// 4. 其他 = function
|
|
245
|
+
const type = name.toLowerCase().includes('use')
|
|
246
|
+
? 'hook'
|
|
247
|
+
: isJsx || /^[A-Z]/.test(name)
|
|
248
|
+
? 'component'
|
|
249
|
+
: 'function';
|
|
215
250
|
const params = decl.params
|
|
216
251
|
.filter((p) => bt.isIdentifier(p))
|
|
217
252
|
.map((p) => p.name);
|
|
253
|
+
const hooks = extractHooksFromBody(decl);
|
|
254
|
+
const sideEffects = extractSideEffects(decl);
|
|
218
255
|
return {
|
|
219
256
|
name,
|
|
220
257
|
type,
|
|
@@ -223,24 +260,34 @@ function createRowFromFunction(name, decl, filePath, projectRoot, isJsx) {
|
|
|
223
260
|
description: null,
|
|
224
261
|
content: `function ${decl.id?.name || 'anonymous'}(${params.join(', ')}) { ... }`,
|
|
225
262
|
meta: {
|
|
263
|
+
kind: 'function',
|
|
226
264
|
params,
|
|
227
265
|
returnType: getReturnType(decl),
|
|
266
|
+
...(hooks.length ? { hooks } : {}),
|
|
267
|
+
...(sideEffects.length ? { sideEffects } : {}),
|
|
228
268
|
},
|
|
269
|
+
file_hash: '',
|
|
270
|
+
semantic_hash: '',
|
|
271
|
+
node: decl,
|
|
229
272
|
};
|
|
230
273
|
}
|
|
231
274
|
function createRowFromClass(name, _decl, filePath, projectRoot) {
|
|
232
275
|
const relPath = getRelativePathForDisplay(projectRoot, filePath);
|
|
233
276
|
const category = inferCategoryFromPath(filePath);
|
|
234
277
|
// 大写开头的类视为组件
|
|
235
|
-
const type = /^[A-Z]/.test(name) ? 'component' : '
|
|
278
|
+
const type = /^[A-Z]/.test(name) ? 'component' : 'function';
|
|
236
279
|
return {
|
|
237
280
|
name,
|
|
238
281
|
type,
|
|
239
282
|
category,
|
|
240
283
|
path: relPath,
|
|
241
284
|
description: null,
|
|
285
|
+
// content meta.kind 暂时废弃不用,
|
|
242
286
|
content: null,
|
|
243
|
-
meta: {
|
|
287
|
+
meta: {},
|
|
288
|
+
file_hash: '',
|
|
289
|
+
semantic_hash: '',
|
|
290
|
+
node: _decl,
|
|
244
291
|
};
|
|
245
292
|
}
|
|
246
293
|
/** 简单检测是否包含 JSX */
|
|
@@ -253,7 +300,17 @@ function containsJsx(node) {
|
|
|
253
300
|
return;
|
|
254
301
|
}
|
|
255
302
|
// 只遍历常见的包含子节点的属性
|
|
256
|
-
const keys = [
|
|
303
|
+
const keys = [
|
|
304
|
+
'body',
|
|
305
|
+
'declarations',
|
|
306
|
+
'arguments',
|
|
307
|
+
'callee',
|
|
308
|
+
'init',
|
|
309
|
+
'left',
|
|
310
|
+
'right',
|
|
311
|
+
'consequent',
|
|
312
|
+
'alternate',
|
|
313
|
+
];
|
|
257
314
|
for (const key of keys) {
|
|
258
315
|
const val = n[key];
|
|
259
316
|
if (Array.isArray(val)) {
|
|
@@ -294,3 +351,175 @@ function getReturnType(fn) {
|
|
|
294
351
|
}
|
|
295
352
|
return undefined;
|
|
296
353
|
}
|
|
354
|
+
/**
|
|
355
|
+
* 遍历 Babel AST 节点,收集所有满足条件的回调
|
|
356
|
+
*/
|
|
357
|
+
function visitNodes(node, callback) {
|
|
358
|
+
callback(node);
|
|
359
|
+
for (const key of Object.keys(node)) {
|
|
360
|
+
const val = node[key];
|
|
361
|
+
if (Array.isArray(val)) {
|
|
362
|
+
for (const v of val) {
|
|
363
|
+
if (v && typeof v === 'object' && 'type' in v) {
|
|
364
|
+
visitNodes(v, callback);
|
|
365
|
+
}
|
|
366
|
+
}
|
|
367
|
+
}
|
|
368
|
+
else if (val && typeof val === 'object' && 'type' in val) {
|
|
369
|
+
visitNodes(val, callback);
|
|
370
|
+
}
|
|
371
|
+
}
|
|
372
|
+
}
|
|
373
|
+
/**
|
|
374
|
+
* 从函数体中提取 React Hooks(use 开头的函数调用)
|
|
375
|
+
*/
|
|
376
|
+
function extractHooksFromBody(fn) {
|
|
377
|
+
const seen = new Set();
|
|
378
|
+
const body = fn.body;
|
|
379
|
+
if (!body || !bt.isBlockStatement(body))
|
|
380
|
+
return [];
|
|
381
|
+
visitNodes(body, (n) => {
|
|
382
|
+
if (bt.isCallExpression(n)) {
|
|
383
|
+
const callee = n.callee;
|
|
384
|
+
if (bt.isIdentifier(callee) && callee.name.startsWith('use')) {
|
|
385
|
+
seen.add(callee.name);
|
|
386
|
+
}
|
|
387
|
+
}
|
|
388
|
+
});
|
|
389
|
+
return [...seen].sort();
|
|
390
|
+
}
|
|
391
|
+
/**
|
|
392
|
+
* 获取节点的文本表示(通过 AST 节点属性构建)
|
|
393
|
+
*/
|
|
394
|
+
function getNodeText(n) {
|
|
395
|
+
const node = n;
|
|
396
|
+
if (!node || typeof node !== 'object')
|
|
397
|
+
return '';
|
|
398
|
+
const type = node.type;
|
|
399
|
+
if (type === 'MemberExpression' || type === 'OptionalMemberExpression') {
|
|
400
|
+
const obj = getNodeText(node.object);
|
|
401
|
+
const propNode = node.property;
|
|
402
|
+
let prop = '';
|
|
403
|
+
if (propNode && typeof propNode === 'object') {
|
|
404
|
+
const propType = propNode.type;
|
|
405
|
+
if (propType === 'Identifier') {
|
|
406
|
+
prop =
|
|
407
|
+
propNode.name ||
|
|
408
|
+
'';
|
|
409
|
+
}
|
|
410
|
+
else if (propType === 'Literal') {
|
|
411
|
+
prop = String(propNode.value ?? '');
|
|
412
|
+
}
|
|
413
|
+
}
|
|
414
|
+
const computed = node.computed;
|
|
415
|
+
return obj + (computed ? `[${prop}]` : `.${prop}`);
|
|
416
|
+
}
|
|
417
|
+
if (type === 'Identifier') {
|
|
418
|
+
return node.name || '';
|
|
419
|
+
}
|
|
420
|
+
if (type === 'Literal' || type === 'NullLiteral') {
|
|
421
|
+
const val = node.value;
|
|
422
|
+
return val === null ? 'null' : String(val);
|
|
423
|
+
}
|
|
424
|
+
if (type === 'CallExpression' || type === 'OptionalCallExpression') {
|
|
425
|
+
const callee = getNodeText(node.callee);
|
|
426
|
+
return callee + '(...)';
|
|
427
|
+
}
|
|
428
|
+
if (type === 'AssignmentExpression') {
|
|
429
|
+
const left = getNodeText(node.left);
|
|
430
|
+
return left + ' = ...';
|
|
431
|
+
}
|
|
432
|
+
return '';
|
|
433
|
+
}
|
|
434
|
+
/**
|
|
435
|
+
* 静态分析函数体的副作用
|
|
436
|
+
*/
|
|
437
|
+
function extractSideEffects(fn) {
|
|
438
|
+
const effects = new Set();
|
|
439
|
+
const body = fn.body;
|
|
440
|
+
if (!body || !bt.isBlockStatement(body))
|
|
441
|
+
return [];
|
|
442
|
+
const paramNames = new Set(fn.params
|
|
443
|
+
.filter((p) => bt.isIdentifier(p))
|
|
444
|
+
.map((p) => p.name));
|
|
445
|
+
visitNodes(body, (n) => {
|
|
446
|
+
// 1. 网络请求
|
|
447
|
+
if (bt.isCallExpression(n)) {
|
|
448
|
+
const calleeText = n.callee && 'name' in n.callee
|
|
449
|
+
? n.callee.name
|
|
450
|
+
: '';
|
|
451
|
+
const calleeTextLower = calleeText.toLowerCase();
|
|
452
|
+
if (calleeTextLower === 'fetch' ||
|
|
453
|
+
calleeTextLower === 'axios' ||
|
|
454
|
+
calleeTextLower === 'xhr' ||
|
|
455
|
+
calleeTextLower === 'ajax' ||
|
|
456
|
+
calleeText.startsWith('axios.') ||
|
|
457
|
+
calleeTextLower.includes('request')) {
|
|
458
|
+
effects.add('network');
|
|
459
|
+
}
|
|
460
|
+
if (calleeTextLower.includes('xmlhttprequest')) {
|
|
461
|
+
effects.add('network');
|
|
462
|
+
}
|
|
463
|
+
}
|
|
464
|
+
// 2. 计时器
|
|
465
|
+
if (bt.isCallExpression(n)) {
|
|
466
|
+
const calleeName = n.callee && 'name' in n.callee
|
|
467
|
+
? n.callee.name
|
|
468
|
+
: '';
|
|
469
|
+
if (calleeName === 'setTimeout' ||
|
|
470
|
+
calleeName === 'setInterval' ||
|
|
471
|
+
calleeName === 'requestAnimationFrame' ||
|
|
472
|
+
calleeName === 'setImmediate') {
|
|
473
|
+
effects.add('timer');
|
|
474
|
+
}
|
|
475
|
+
}
|
|
476
|
+
// 3. DOM/全局对象操作
|
|
477
|
+
if (bt.isExpressionStatement(n)) {
|
|
478
|
+
const text = getNodeText(n.expression);
|
|
479
|
+
if (/\bdocument\.\w+/.test(text) ||
|
|
480
|
+
/\bwindow\.\w+/.test(text) ||
|
|
481
|
+
/\bnavigator\.\w+/.test(text) ||
|
|
482
|
+
/\blocation\.\w+/.test(text)) {
|
|
483
|
+
if (/=/.test(text) &&
|
|
484
|
+
!text.includes('===') &&
|
|
485
|
+
!text.includes('==')) {
|
|
486
|
+
effects.add('dom');
|
|
487
|
+
}
|
|
488
|
+
}
|
|
489
|
+
}
|
|
490
|
+
// 4. 存储操作
|
|
491
|
+
if (bt.isCallExpression(n)) {
|
|
492
|
+
const text = getNodeText(n);
|
|
493
|
+
if (text.includes('localStorage') ||
|
|
494
|
+
text.includes('sessionStorage') ||
|
|
495
|
+
text.includes('cookie')) {
|
|
496
|
+
effects.add('storage');
|
|
497
|
+
}
|
|
498
|
+
}
|
|
499
|
+
// 5. 入参修改
|
|
500
|
+
if (bt.isAssignmentExpression(n)) {
|
|
501
|
+
const leftText = getNodeText(n.left);
|
|
502
|
+
for (const param of paramNames) {
|
|
503
|
+
if (leftText.startsWith(`${param}.`) ||
|
|
504
|
+
leftText.startsWith(`${param}[`)) {
|
|
505
|
+
effects.add('mutation');
|
|
506
|
+
break;
|
|
507
|
+
}
|
|
508
|
+
}
|
|
509
|
+
}
|
|
510
|
+
if (bt.isCallExpression(n) && n.callee) {
|
|
511
|
+
const calleeText = getNodeText(n.callee);
|
|
512
|
+
for (const param of paramNames) {
|
|
513
|
+
if (calleeText.startsWith(`${param}.`) ||
|
|
514
|
+
calleeText.startsWith(`${param}[`)) {
|
|
515
|
+
// 检测 push/pop/splice 等 mutations
|
|
516
|
+
if (/\.(push|pop|shift|unshift|splice|sort|reverse|fill)\(/.test(calleeText)) {
|
|
517
|
+
effects.add('mutation');
|
|
518
|
+
break;
|
|
519
|
+
}
|
|
520
|
+
}
|
|
521
|
+
}
|
|
522
|
+
}
|
|
523
|
+
});
|
|
524
|
+
return [...effects].sort();
|
|
525
|
+
}
|
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* category 分类器:三层融合(规则 + embedding + LLM)
|
|
3
|
+
*/
|
|
4
|
+
import { createHash } from 'node:crypto';
|
|
5
|
+
import { env } from '../config/env.js';
|
|
6
|
+
import { createEmbeddingClient } from '../services/embeddingClient.js';
|
|
7
|
+
import { cosineSimilarity } from '../services/vectorMath.js';
|
|
8
|
+
import { inferCategoryFromPath, inferCategoryFromName } from './heuristics.js';
|
|
9
|
+
// 1. 定义 category label space
|
|
10
|
+
const CATEGORIES = [
|
|
11
|
+
'network',
|
|
12
|
+
'form',
|
|
13
|
+
'validation',
|
|
14
|
+
'format',
|
|
15
|
+
'state',
|
|
16
|
+
'ui',
|
|
17
|
+
'storage',
|
|
18
|
+
'math',
|
|
19
|
+
'hook',
|
|
20
|
+
'utils',
|
|
21
|
+
'component',
|
|
22
|
+
'service',
|
|
23
|
+
];
|
|
24
|
+
// 2. 预计算 category embeddings(缓存)
|
|
25
|
+
let categoryEmbeddingsCache = null;
|
|
26
|
+
export async function initCategoryEmbeddings() {
|
|
27
|
+
if (!env.embeddingServiceUrl)
|
|
28
|
+
return;
|
|
29
|
+
const client = createEmbeddingClient(env.embeddingServiceUrl);
|
|
30
|
+
const embeddings = await Promise.all(CATEGORIES.map(async (c) => {
|
|
31
|
+
const [vec] = await client.embed([c]);
|
|
32
|
+
return { name: c, embedding: vec };
|
|
33
|
+
}));
|
|
34
|
+
categoryEmbeddingsCache = embeddings;
|
|
35
|
+
}
|
|
36
|
+
// 3. embedding 层
|
|
37
|
+
const EMBEDDING_THRESHOLD = 0.5;
|
|
38
|
+
// TODO: 这里有问题,embedding是语义模板向量,categoryEmbeddingsCache是单个词的向量,相似度必然是<0.3
|
|
39
|
+
function categoryFromEmbedding(embedding) {
|
|
40
|
+
if (!categoryEmbeddingsCache)
|
|
41
|
+
return null;
|
|
42
|
+
let best = 'utils';
|
|
43
|
+
let maxScore = -Infinity;
|
|
44
|
+
for (const c of categoryEmbeddingsCache) {
|
|
45
|
+
const score = cosineSimilarity(embedding, c.embedding);
|
|
46
|
+
if (score > maxScore) {
|
|
47
|
+
maxScore = score;
|
|
48
|
+
best = c.name;
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
return maxScore < EMBEDDING_THRESHOLD ? null : best;
|
|
52
|
+
}
|
|
53
|
+
// 4. LLM 层(带缓存)
|
|
54
|
+
const LLM_CACHE_TTL = 24 * 60 * 60 * 1000;
|
|
55
|
+
const OLLAMA_URL = 'http://127.0.0.1:11434/v1/chat/completions';
|
|
56
|
+
// 可根据本地实际情况调整模型名称,例如:'llama3.2:3b'、'llama3.1:8b'
|
|
57
|
+
const OLLAMA_MODEL = 'llama3.2:3b';
|
|
58
|
+
const llmCategoryCache = new Map();
|
|
59
|
+
async function categoryFromLLM(stableStr) {
|
|
60
|
+
if (!stableStr)
|
|
61
|
+
return null;
|
|
62
|
+
const cacheKey = createHash('sha256')
|
|
63
|
+
.update(stableStr)
|
|
64
|
+
.digest('hex')
|
|
65
|
+
.slice(0, 16);
|
|
66
|
+
const cached = llmCategoryCache.get(cacheKey);
|
|
67
|
+
if (cached && Date.now() - cached.timestamp < LLM_CACHE_TTL)
|
|
68
|
+
return cached.category;
|
|
69
|
+
const prompt = `给一段代码语义说明,在下面列出来的所有类别中找到最相符的类别,请直接返回命中的原始字符串,没有命中不用返回。
|
|
70
|
+
代码语义:${stableStr}
|
|
71
|
+
类别合集: ${CATEGORIES.join(', ')}`;
|
|
72
|
+
try {
|
|
73
|
+
const body = {
|
|
74
|
+
model: OLLAMA_MODEL,
|
|
75
|
+
messages: [{ role: 'user', content: prompt }],
|
|
76
|
+
temperature: 0.4,
|
|
77
|
+
stream: false,
|
|
78
|
+
};
|
|
79
|
+
const res = await fetch(OLLAMA_URL, {
|
|
80
|
+
method: 'POST',
|
|
81
|
+
headers: { 'Content-Type': 'application/json' },
|
|
82
|
+
body: JSON.stringify(body),
|
|
83
|
+
});
|
|
84
|
+
const data = (await res.json());
|
|
85
|
+
const content = data.choices?.[0]?.message?.content
|
|
86
|
+
?.trim()
|
|
87
|
+
?.toLowerCase();
|
|
88
|
+
const category = CATEGORIES.find((c) => content.includes(c));
|
|
89
|
+
if (category) {
|
|
90
|
+
llmCategoryCache.set(cacheKey, { category, timestamp: Date.now() });
|
|
91
|
+
return category;
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
catch (e) {
|
|
95
|
+
console.error('[categoryFromLLM]', e);
|
|
96
|
+
}
|
|
97
|
+
return null;
|
|
98
|
+
}
|
|
99
|
+
// 5. 三层融合
|
|
100
|
+
export async function resolveCategory(rows, vecs) {
|
|
101
|
+
const pros = rows.map(async (r, i) => {
|
|
102
|
+
const { name } = r;
|
|
103
|
+
const ruleCategory = inferCategoryFromPath(r.path) || inferCategoryFromName(name);
|
|
104
|
+
console.error(`===from ruleCategory`, name, ruleCategory);
|
|
105
|
+
if (ruleCategory) {
|
|
106
|
+
return {
|
|
107
|
+
...r,
|
|
108
|
+
category: ruleCategory,
|
|
109
|
+
};
|
|
110
|
+
}
|
|
111
|
+
// TODO: 这里有问题,embedding是语义模板向量,categoryEmbeddingsCache是单个词的向量,相似度必然是<0.3
|
|
112
|
+
const emd = categoryFromEmbedding(vecs[i]);
|
|
113
|
+
console.error(`===from categoryFromEmbedding`, name, emd);
|
|
114
|
+
if (emd) {
|
|
115
|
+
return {
|
|
116
|
+
...r,
|
|
117
|
+
category: emd,
|
|
118
|
+
};
|
|
119
|
+
}
|
|
120
|
+
const cateLlm = await categoryFromLLM(r.content);
|
|
121
|
+
console.error(`===from categoryFromLLM`, name, cateLlm);
|
|
122
|
+
return {
|
|
123
|
+
...r,
|
|
124
|
+
category: cateLlm,
|
|
125
|
+
};
|
|
126
|
+
});
|
|
127
|
+
const newRows = await Promise.all(pros);
|
|
128
|
+
return newRows;
|
|
129
|
+
}
|
|
@@ -1,15 +1,17 @@
|
|
|
1
|
+
// 仅js类型使用,后续会删掉
|
|
2
|
+
const MAX_CONTENT_LENGTH = 1200;
|
|
1
3
|
function briefMeta(meta) {
|
|
2
|
-
const keys = [
|
|
4
|
+
const keys = ['props', 'params', 'properties', 'hooks'];
|
|
3
5
|
const parts = [];
|
|
4
6
|
for (const k of keys) {
|
|
5
7
|
const v = meta[k];
|
|
6
8
|
if (Array.isArray(v)) {
|
|
7
|
-
const strs = v.filter((x) => typeof x ===
|
|
9
|
+
const strs = v.filter((x) => typeof x === 'string');
|
|
8
10
|
if (strs.length)
|
|
9
|
-
parts.push(`${k}: ${strs.slice(0, 24).join(
|
|
11
|
+
parts.push(`${k}: ${strs.slice(0, 24).join(', ')}`);
|
|
10
12
|
}
|
|
11
13
|
}
|
|
12
|
-
return parts.join(
|
|
14
|
+
return parts.join('; ');
|
|
13
15
|
}
|
|
14
16
|
/**
|
|
15
17
|
* 拼成一段供向量模型编码的文本(名称、路径、注释、meta 摘要、源码片段)。
|
|
@@ -19,10 +21,10 @@ export function indexedRowToEmbedText(row) {
|
|
|
19
21
|
return [
|
|
20
22
|
`${row.type} ${row.name}`,
|
|
21
23
|
row.path,
|
|
22
|
-
row.description ??
|
|
24
|
+
row.description ?? '',
|
|
23
25
|
metaBit,
|
|
24
|
-
(row.content ??
|
|
26
|
+
(row.content ?? '').slice(0, MAX_CONTENT_LENGTH),
|
|
25
27
|
]
|
|
26
28
|
.filter((s) => s.length > 0)
|
|
27
|
-
.join(
|
|
29
|
+
.join('\n');
|
|
28
30
|
}
|
|
@@ -87,6 +87,7 @@ export function extractFunctionMeta(fn) {
|
|
|
87
87
|
const returnType = extractReturnTypeText(fn);
|
|
88
88
|
const sideEffects = extractSideEffects(fn);
|
|
89
89
|
return {
|
|
90
|
+
kind: 'function',
|
|
90
91
|
params,
|
|
91
92
|
...(paramTypeFields.length ? { paramTypeFields } : {}),
|
|
92
93
|
...(hooks.length ? { hooks } : {}),
|
|
@@ -116,6 +117,7 @@ export function extractInterfaceOrTypeMeta(node) {
|
|
|
116
117
|
if (Node.isTypeAliasDeclaration(node)) {
|
|
117
118
|
return { kind: 'typeAlias' };
|
|
118
119
|
}
|
|
120
|
+
// 其他类型(如 enum)暂不处理,标记为 unknown 以供后续扩展。
|
|
119
121
|
return { kind: 'unknown' };
|
|
120
122
|
}
|
|
121
123
|
/**
|
|
@@ -166,7 +168,9 @@ export function extractSideEffects(node) {
|
|
|
166
168
|
/\bnavigator\.\w+/.test(text) ||
|
|
167
169
|
/\blocation\.\w+/.test(text)) {
|
|
168
170
|
// 区分读取和写入
|
|
169
|
-
if (/=/.test(text) &&
|
|
171
|
+
if (/=/.test(text) &&
|
|
172
|
+
!text.includes('===') &&
|
|
173
|
+
!text.includes('==')) {
|
|
170
174
|
effects.add('dom');
|
|
171
175
|
}
|
|
172
176
|
}
|
|
@@ -199,7 +203,8 @@ export function extractSideEffects(node) {
|
|
|
199
203
|
const text = n.getText();
|
|
200
204
|
// 检测 param.x = ... 或 param.push/pop/splice 等
|
|
201
205
|
for (const param of paramNames) {
|
|
202
|
-
if (text.includes(`${param}.`) ||
|
|
206
|
+
if (text.includes(`${param}.`) ||
|
|
207
|
+
text.startsWith(`${param} =`)) {
|
|
203
208
|
effects.add('mutation');
|
|
204
209
|
break;
|
|
205
210
|
}
|