@lorrylurui/code-intelligence-mcp 1.1.15 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3,7 +3,9 @@
3
3
  */
4
4
  import * as babelParser from '@babel/parser';
5
5
  import * as bt from '@babel/types';
6
- import { getRelativePathForDisplay, inferCategoryFromPath, } from './heuristics.js';
6
+ import { getRelativePathForDisplay, inferCategoryFromPath, inferCategoryFromName, } from './heuristics.js';
7
+ import { computeFileHash } from './tsAstNormalizer.js';
8
+ import { computeSemanticHashJs } from './jsAstNormalizer.js';
7
9
  /** 从 JS 文件内容解析导出的代码块 */
8
10
  export function parseJsFile(filePath, content, projectRoot) {
9
11
  const out = [];
@@ -46,7 +48,23 @@ export function parseJsFile(filePath, content, projectRoot) {
46
48
  out.push(...rows);
47
49
  }
48
50
  }
49
- return out;
51
+ // 第三轮:更新content、file_hash、semantic_hash 字段
52
+ const outWithTwoHash = out.map((o) => {
53
+ const [semantic_hash, stableStr] = computeSemanticHashJs(o);
54
+ const { name, type, description, meta, category, path } = o;
55
+ return {
56
+ name,
57
+ type,
58
+ category,
59
+ path,
60
+ description,
61
+ meta,
62
+ content: stableStr,
63
+ file_hash: computeFileHash(content),
64
+ semantic_hash,
65
+ };
66
+ });
67
+ return outWithTwoHash;
50
68
  }
51
69
  /** 处理导出的声明 */
52
70
  function processStatement(stmt, filePath, isJsx, projectRoot) {
@@ -216,21 +234,19 @@ function arrowToFunction(name, arrow) {
216
234
  }
217
235
  function createRowFromFunction(name, decl, filePath, projectRoot, isJsx) {
218
236
  const relPath = getRelativePathForDisplay(projectRoot, filePath);
219
- const category = inferCategoryFromPath(filePath);
237
+ const category = inferCategoryFromPath(filePath) || inferCategoryFromName(name);
220
238
  // 检测是否有 JSX
221
239
  const hasJsx = isJsx || containsJsx(decl);
222
240
  // 判断类型:
223
241
  // 1. 有 JSX = component
224
- // 2. 名字包含 selector = selector
242
+ // 2. 名字包含 use = hook
225
243
  // 3. 大写开头(JSX 组件约定)= component
226
- // 4. 其他 = util
227
- const type = hasJsx
228
- ? 'component'
229
- : name.toLowerCase().includes('selector')
230
- ? 'selector'
231
- : isJsx && /^[A-Z]/.test(name)
232
- ? 'component'
233
- : 'util';
244
+ // 4. 其他 = function
245
+ const type = name.toLowerCase().includes('use')
246
+ ? 'hook'
247
+ : isJsx || /^[A-Z]/.test(name)
248
+ ? 'component'
249
+ : 'function';
234
250
  const params = decl.params
235
251
  .filter((p) => bt.isIdentifier(p))
236
252
  .map((p) => p.name);
@@ -250,21 +266,28 @@ function createRowFromFunction(name, decl, filePath, projectRoot, isJsx) {
250
266
  ...(hooks.length ? { hooks } : {}),
251
267
  ...(sideEffects.length ? { sideEffects } : {}),
252
268
  },
269
+ file_hash: '',
270
+ semantic_hash: '',
271
+ node: decl,
253
272
  };
254
273
  }
255
274
  function createRowFromClass(name, _decl, filePath, projectRoot) {
256
275
  const relPath = getRelativePathForDisplay(projectRoot, filePath);
257
276
  const category = inferCategoryFromPath(filePath);
258
277
  // 大写开头的类视为组件
259
- const type = /^[A-Z]/.test(name) ? 'component' : 'util';
278
+ const type = /^[A-Z]/.test(name) ? 'component' : 'function';
260
279
  return {
261
280
  name,
262
281
  type,
263
282
  category,
264
283
  path: relPath,
265
284
  description: null,
285
+ // content meta.kind 暂时废弃不用,
266
286
  content: null,
267
- meta: { kind: 'class' },
287
+ meta: {},
288
+ file_hash: '',
289
+ semantic_hash: '',
290
+ node: _decl,
268
291
  };
269
292
  }
270
293
  /** 简单检测是否包含 JSX */
@@ -380,7 +403,9 @@ function getNodeText(n) {
380
403
  if (propNode && typeof propNode === 'object') {
381
404
  const propType = propNode.type;
382
405
  if (propType === 'Identifier') {
383
- prop = propNode.name || '';
406
+ prop =
407
+ propNode.name ||
408
+ '';
384
409
  }
385
410
  else if (propType === 'Literal') {
386
411
  prop = String(propNode.value ?? '');
@@ -0,0 +1,129 @@
1
+ /**
2
+ * category 分类器:三层融合(规则 + embedding + LLM)
3
+ */
4
+ import { createHash } from 'node:crypto';
5
+ import { env } from '../config/env.js';
6
+ import { createEmbeddingClient } from '../services/embeddingClient.js';
7
+ import { cosineSimilarity } from '../services/vectorMath.js';
8
+ import { inferCategoryFromPath, inferCategoryFromName } from './heuristics.js';
9
+ // 1. 定义 category label space
10
+ const CATEGORIES = [
11
+ 'network',
12
+ 'form',
13
+ 'validation',
14
+ 'format',
15
+ 'state',
16
+ 'ui',
17
+ 'storage',
18
+ 'math',
19
+ 'hook',
20
+ 'utils',
21
+ 'component',
22
+ 'service',
23
+ ];
24
+ // 2. 预计算 category embeddings(缓存)
25
+ let categoryEmbeddingsCache = null;
26
+ export async function initCategoryEmbeddings() {
27
+ if (!env.embeddingServiceUrl)
28
+ return;
29
+ const client = createEmbeddingClient(env.embeddingServiceUrl);
30
+ const embeddings = await Promise.all(CATEGORIES.map(async (c) => {
31
+ const [vec] = await client.embed([c]);
32
+ return { name: c, embedding: vec };
33
+ }));
34
+ categoryEmbeddingsCache = embeddings;
35
+ }
36
+ // 3. embedding 层
37
+ const EMBEDDING_THRESHOLD = 0.5;
38
+ // TODO: 这里有问题,embedding是语义模板向量,categoryEmbeddingsCache是单个词的向量,相似度必然是<0.3
39
+ function categoryFromEmbedding(embedding) {
40
+ if (!categoryEmbeddingsCache)
41
+ return null;
42
+ let best = 'utils';
43
+ let maxScore = -Infinity;
44
+ for (const c of categoryEmbeddingsCache) {
45
+ const score = cosineSimilarity(embedding, c.embedding);
46
+ if (score > maxScore) {
47
+ maxScore = score;
48
+ best = c.name;
49
+ }
50
+ }
51
+ return maxScore < EMBEDDING_THRESHOLD ? null : best;
52
+ }
53
+ // 4. LLM 层(带缓存)
54
+ const LLM_CACHE_TTL = 24 * 60 * 60 * 1000;
55
+ const OLLAMA_URL = 'http://127.0.0.1:11434/v1/chat/completions';
56
+ // 可根据本地实际情况调整模型名称,例如:'llama3.2:3b'、'llama3.1:8b'
57
+ const OLLAMA_MODEL = 'llama3.2:3b';
58
+ const llmCategoryCache = new Map();
59
+ async function categoryFromLLM(stableStr) {
60
+ if (!stableStr)
61
+ return null;
62
+ const cacheKey = createHash('sha256')
63
+ .update(stableStr)
64
+ .digest('hex')
65
+ .slice(0, 16);
66
+ const cached = llmCategoryCache.get(cacheKey);
67
+ if (cached && Date.now() - cached.timestamp < LLM_CACHE_TTL)
68
+ return cached.category;
69
+ const prompt = `给一段代码语义说明,在下面列出来的所有类别中找到最相符的类别,请直接返回命中的原始字符串,没有命中不用返回。
70
+ 代码语义:${stableStr}
71
+ 类别合集: ${CATEGORIES.join(', ')}`;
72
+ try {
73
+ const body = {
74
+ model: OLLAMA_MODEL,
75
+ messages: [{ role: 'user', content: prompt }],
76
+ temperature: 0.4,
77
+ stream: false,
78
+ };
79
+ const res = await fetch(OLLAMA_URL, {
80
+ method: 'POST',
81
+ headers: { 'Content-Type': 'application/json' },
82
+ body: JSON.stringify(body),
83
+ });
84
+ const data = (await res.json());
85
+ const content = data.choices?.[0]?.message?.content
86
+ ?.trim()
87
+ ?.toLowerCase();
88
+ const category = CATEGORIES.find((c) => content.includes(c));
89
+ if (category) {
90
+ llmCategoryCache.set(cacheKey, { category, timestamp: Date.now() });
91
+ return category;
92
+ }
93
+ }
94
+ catch (e) {
95
+ console.error('[categoryFromLLM]', e);
96
+ }
97
+ return null;
98
+ }
99
+ // 5. 三层融合
100
+ export async function resolveCategory(rows, vecs) {
101
+ const pros = rows.map(async (r, i) => {
102
+ const { name } = r;
103
+ const ruleCategory = inferCategoryFromPath(r.path) || inferCategoryFromName(name);
104
+ console.error(`===from ruleCategory`, name, ruleCategory);
105
+ if (ruleCategory) {
106
+ return {
107
+ ...r,
108
+ category: ruleCategory,
109
+ };
110
+ }
111
+ // TODO: 这里有问题,embedding是语义模板向量,categoryEmbeddingsCache是单个词的向量,相似度必然是<0.3
112
+ const emd = categoryFromEmbedding(vecs[i]);
113
+ console.error(`===from categoryFromEmbedding`, name, emd);
114
+ if (emd) {
115
+ return {
116
+ ...r,
117
+ category: emd,
118
+ };
119
+ }
120
+ const cateLlm = await categoryFromLLM(r.content);
121
+ console.error(`===from categoryFromLLM`, name, cateLlm);
122
+ return {
123
+ ...r,
124
+ category: cateLlm,
125
+ };
126
+ });
127
+ const newRows = await Promise.all(pros);
128
+ return newRows;
129
+ }
@@ -1,15 +1,17 @@
1
+ // 仅js类型使用,后续会删掉
2
+ const MAX_CONTENT_LENGTH = 1200;
1
3
  function briefMeta(meta) {
2
- const keys = ["props", "params", "properties", "hooks"];
4
+ const keys = ['props', 'params', 'properties', 'hooks'];
3
5
  const parts = [];
4
6
  for (const k of keys) {
5
7
  const v = meta[k];
6
8
  if (Array.isArray(v)) {
7
- const strs = v.filter((x) => typeof x === "string");
9
+ const strs = v.filter((x) => typeof x === 'string');
8
10
  if (strs.length)
9
- parts.push(`${k}: ${strs.slice(0, 24).join(", ")}`);
11
+ parts.push(`${k}: ${strs.slice(0, 24).join(', ')}`);
10
12
  }
11
13
  }
12
- return parts.join("; ");
14
+ return parts.join('; ');
13
15
  }
14
16
  /**
15
17
  * 拼成一段供向量模型编码的文本(名称、路径、注释、meta 摘要、源码片段)。
@@ -19,10 +21,10 @@ export function indexedRowToEmbedText(row) {
19
21
  return [
20
22
  `${row.type} ${row.name}`,
21
23
  row.path,
22
- row.description ?? "",
24
+ row.description ?? '',
23
25
  metaBit,
24
- (row.content ?? "").slice(0, 1200)
26
+ (row.content ?? '').slice(0, MAX_CONTENT_LENGTH),
25
27
  ]
26
28
  .filter((s) => s.length > 0)
27
- .join("\n");
29
+ .join('\n');
28
30
  }
@@ -1,13 +1,13 @@
1
1
  /**
2
2
  * JSX 检测、路径/selector 启发式、JSDoc 摘要、相对路径。
3
3
  */
4
- import { SyntaxKind } from "ts-morph";
4
+ import { SyntaxKind } from 'ts-morph';
5
5
  /**
6
6
  * 判断源文件路径是否为 TSX(React JSX 语法所在扩展名)。
7
7
  * @returns `true` 表示按「可能出现 JSX」处理,用于与 `.ts` 区分组件启发式。
8
8
  */
9
9
  export function isTsxFile(filePath) {
10
- return filePath.toLowerCase().endsWith(".tsx");
10
+ return filePath.toLowerCase().endsWith('.tsx');
11
11
  }
12
12
  /**
13
13
  * 在 AST 子树中是否出现 JSX 节点(元素、自闭合标签或 Fragment)。
@@ -27,30 +27,49 @@ export function hasJsxInNode(node) {
27
27
  return found;
28
28
  }
29
29
  /**
30
- * 从文件路径推断业务语义目录名(如 `.../components/form/Button.tsx` → `form`)。
31
- * @returns 命中 `components|features|...` 下一级目录名;无法推断时为 `null`,写入 DB 的 `category` 字段。
30
+ * 从文件路径推断业务语义目录名(如 `.../components/Button.tsx` → `components`)。
31
+ * @returns 命中 `components|features|...`
32
32
  */
33
33
  export function inferCategoryFromPath(filePath) {
34
- const norm = filePath.replace(/\\/g, "/");
35
- const parts = norm.split("/");
36
- const markers = ["components", "features", "modules", "pages", "widgets", "hooks"];
37
- for (let i = 0; i < parts.length; i++) {
38
- const m = markers.indexOf(parts[i]);
39
- if (m >= 0 && parts[i + 1]) {
40
- return parts[i + 1];
34
+ const markers = [
35
+ 'components',
36
+ 'features',
37
+ 'modules',
38
+ 'pages',
39
+ 'widgets',
40
+ 'hooks',
41
+ 'utils',
42
+ 'services',
43
+ 'selectors',
44
+ 'types',
45
+ 'apis',
46
+ ];
47
+ for (const m of markers) {
48
+ if (filePath.toLowerCase().includes(`/${m}/`)) {
49
+ return m;
41
50
  }
42
51
  }
43
52
  return null;
44
53
  }
45
54
  /**
46
- * 启发式判断导出是否更像状态选择器(selector)。
47
- * @returns `true` 时索引类型记为 `selector`,否则同文件下函数多为 `util`。
55
+ * 从命名推断业务语义类别(如 `useAuth` → `hook`,`formatDate` → `format`,`validateData` → `validation`)。
56
+ * @param name
57
+ * @returns 命中`hook | ...`
48
58
  */
49
- export function isSelectorLike(filePath, exportName) {
50
- const lowerPath = filePath.toLowerCase();
51
- if (lowerPath.includes("selector"))
52
- return true;
53
- if (/selector$/i.test(exportName))
59
+ export function inferCategoryFromName(originName) {
60
+ const name = originName.toLowerCase();
61
+ if (name.startsWith('use'))
62
+ return 'hook';
63
+ if (name.includes('fetch'))
64
+ return 'network';
65
+ if (name.includes('format'))
66
+ return 'format';
67
+ if (name.includes('validate'))
68
+ return 'validation';
69
+ return null;
70
+ }
71
+ export function isHookLike(exportName) {
72
+ if (/use$/i.test(exportName))
54
73
  return true;
55
74
  return false;
56
75
  }
@@ -61,7 +80,7 @@ export function isSelectorLike(filePath, exportName) {
61
80
  export function getLeadingDocDescription(node) {
62
81
  const tryNode = (n) => {
63
82
  const jd = n.getJsDocs;
64
- if (typeof jd !== "function")
83
+ if (typeof jd !== 'function')
65
84
  return null;
66
85
  const docs = jd.call(n);
67
86
  if (!docs?.length)
@@ -69,15 +88,15 @@ export function getLeadingDocDescription(node) {
69
88
  const t = docs[0].getDescription().trim();
70
89
  return t || null;
71
90
  };
72
- return tryNode(node) ?? (node.getParent() ? tryNode(node.getParent()) : null);
91
+ return (tryNode(node) ?? (node.getParent() ? tryNode(node.getParent()) : null));
73
92
  }
74
93
  /**
75
94
  * 将绝对路径转为相对 `projectRoot` 的路径,作为库中 `symbols.path`(便于跨机器、展示)。
76
95
  * @returns 相对路径;若无法裁掉前缀则回退为原始绝对路径。
77
96
  */
78
97
  export function getRelativePathForDisplay(projectRoot, absolutePath) {
79
- const r = projectRoot.replace(/\\/g, "/");
80
- const a = absolutePath.replace(/\\/g, "/");
98
+ const r = projectRoot.replace(/\\/g, '/');
99
+ const a = absolutePath.replace(/\\/g, '/');
81
100
  if (a.startsWith(r)) {
82
101
  return a.slice(r.length + 1);
83
102
  }
@@ -92,5 +111,5 @@ export function snippetForNode(node, maxLen = 4000) {
92
111
  const raw = node.getText();
93
112
  if (raw.length <= maxLen)
94
113
  return raw;
95
- return raw.slice(0, maxLen) + "\n/* ... truncated ... */";
114
+ return raw.slice(0, maxLen) + '\n/* ... truncated ... */';
96
115
  }