@lorrylurui/code-intelligence-mcp 1.1.15 → 2.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -7,8 +7,10 @@ import { join, resolve } from 'node:path';
7
7
  import { Node, Project } from 'ts-morph';
8
8
  import { readFileSync, existsSync } from 'node:fs';
9
9
  import { extractInterfaceOrTypeMeta, extractMetaFromCallable, } from './extractMeta.js';
10
- import { getLeadingDocDescription, getRelativePathForDisplay, hasJsxInNode, inferCategoryFromPath, isSelectorLike, isTsxFile, snippetForNode, } from './heuristics.js';
10
+ import { getLeadingDocDescription, getRelativePathForDisplay, hasJsxInNode, isHookLike, isTsxFile, } from './heuristics.js';
11
11
  import { parseJsFile } from './babelParser.js';
12
+ import { computeFileHash, computeSemanticHash } from './tsAstNormalizer.js';
13
+ const CALLERS_LIMIT = 20;
12
14
  /** 判断文件类型 */
13
15
  function isJsFile(filePath) {
14
16
  return filePath.endsWith('.js') || filePath.endsWith('.jsx');
@@ -59,39 +61,57 @@ function resolveExportName(exportName, decl) {
59
61
  function processDeclaration(exportName, decl, sf, projectRoot) {
60
62
  const filePath = sf.getFilePath();
61
63
  const relPath = getRelativePathForDisplay(projectRoot, filePath);
62
- const category = inferCategoryFromPath(filePath);
63
64
  const name = resolveExportName(exportName, decl);
64
65
  const description = getLeadingDocDescription(decl) ?? null;
65
66
  if (Node.isInterfaceDeclaration(decl) ||
66
67
  Node.isTypeAliasDeclaration(decl)) {
67
68
  const meta = extractInterfaceOrTypeMeta(decl);
69
+ const type = Node.isInterfaceDeclaration(decl) ? 'interface' : 'type';
70
+ const [semantic_hash, stableStr] = computeSemanticHash({
71
+ name,
72
+ type,
73
+ description,
74
+ meta,
75
+ node: decl,
76
+ });
68
77
  return {
69
78
  name,
70
- type: 'type',
71
- category,
79
+ type,
80
+ category: '',
72
81
  path: relPath,
73
82
  description,
74
- content: snippetForNode(decl),
83
+ content: stableStr,
75
84
  meta,
85
+ file_hash: computeFileHash(sf.getFullText()),
86
+ semantic_hash,
76
87
  };
77
88
  }
78
89
  if (Node.isFunctionDeclaration(decl)) {
79
90
  const jsx = isTsxFile(filePath) && hasJsxInNode(decl);
80
- const type = jsx
81
- ? 'component'
82
- : isSelectorLike(filePath, name)
83
- ? 'selector'
84
- : 'util';
91
+ const type = isHookLike(name)
92
+ ? 'hook'
93
+ : jsx
94
+ ? 'component'
95
+ : 'function';
85
96
  const raw = extractMetaFromCallable(decl);
86
97
  const meta = mergeCallableMeta(type, raw);
98
+ const [semantic_hash, stableStr] = computeSemanticHash({
99
+ name,
100
+ type,
101
+ description,
102
+ meta,
103
+ node: decl,
104
+ });
87
105
  return {
88
106
  name,
89
107
  type,
90
- category,
108
+ category: '',
91
109
  path: relPath,
92
110
  description,
93
- content: snippetForNode(decl),
111
+ content: stableStr,
94
112
  meta,
113
+ semantic_hash,
114
+ file_hash: computeFileHash(sf.getFullText()),
95
115
  };
96
116
  }
97
117
  if (Node.isVariableDeclaration(decl)) {
@@ -112,38 +132,57 @@ function processDeclaration(exportName, decl, sf, projectRoot) {
112
132
  if (!callable)
113
133
  return null;
114
134
  const jsx = isTsxFile(filePath) && hasJsxInNode(callable);
115
- const type = jsx
116
- ? 'component'
117
- : isSelectorLike(filePath, name)
118
- ? 'selector'
119
- : 'util';
135
+ const type = isHookLike(name)
136
+ ? 'hook'
137
+ : jsx
138
+ ? 'component'
139
+ : 'function';
120
140
  const raw = extractMetaFromCallable(callable);
121
141
  const meta = mergeCallableMeta(type, raw);
142
+ const [semantic_hash, stableStr] = computeSemanticHash({
143
+ name,
144
+ type,
145
+ description,
146
+ meta,
147
+ node: callable,
148
+ });
122
149
  return {
123
150
  name,
124
151
  type,
125
- category,
152
+ category: '',
126
153
  path: relPath,
127
154
  description,
128
- content: snippetForNode(callable),
155
+ content: stableStr,
129
156
  meta,
157
+ semantic_hash,
158
+ file_hash: computeFileHash(sf.getFullText()),
130
159
  };
131
160
  }
132
161
  if (Node.isClassDeclaration(decl)) {
133
162
  // 轻量:仅将 class 记为 util(后续可扩展为带 JSX 的组件类)
163
+ const type = 'class';
164
+ const [semantic_hash, stableStr] = computeSemanticHash({
165
+ name,
166
+ type,
167
+ description,
168
+ meta: { kind: type },
169
+ node: decl,
170
+ });
134
171
  return {
135
172
  name,
136
- type: 'util',
137
- category,
173
+ type,
174
+ category: '',
138
175
  path: relPath,
139
176
  description,
140
- content: snippetForNode(decl),
141
- meta: { kind: 'class' },
177
+ content: stableStr,
178
+ meta: { kind: type },
179
+ semantic_hash,
180
+ file_hash: computeFileHash(sf.getFullText()),
142
181
  };
143
182
  }
144
183
  return null;
145
184
  }
146
- const DEFAULT_IGNORE = [
185
+ export const DEFAULT_IGNORE = [
147
186
  '**/node_modules/**',
148
187
  '**/dist/**',
149
188
  '**/.git/**',
@@ -197,6 +236,23 @@ export async function indexProject(opts) {
197
236
  for (const sf of project.getSourceFiles()) {
198
237
  const relPath = getRelativePathForDisplay(projectRoot, sf.getFilePath());
199
238
  const exported = sf.getExportedDeclarations();
239
+ // 只检索export的代码,减少噪音
240
+ // 如果要检索未export的,需要考虑
241
+ /**
242
+ * 类型一:真正的内部实现,不应该复用
243
+ function _buildSqlFragment() {} // 和模块强耦合,外部用不了
244
+ const __validateInternal = () => {} // 私有约定(下划线前缀)
245
+ → 索引了也没用,反而是噪音
246
+
247
+ 类型二:工具函数,只是作者忘了 export / 懒得 export
248
+ function debounce(fn, ms) {} // 通用,完全可以复用
249
+ function formatCurrency(n) {} // 通用,其他地方也需要
250
+ → 索引有价值,还能反向提示"这个应该被 export"
251
+
252
+ 类型三:文件内共享但跨文件无意义
253
+ function getLocalConfig() {} // 依赖闭包变量,移出去就坏了
254
+ → 索引意义不大
255
+ */
200
256
  for (const [exportName, decls] of exported) {
201
257
  for (const decl of decls) {
202
258
  const row = processDeclaration(exportName, decl, sf, projectRoot);
@@ -205,7 +261,11 @@ export async function indexProject(opts) {
205
261
  // 建立符号映射
206
262
  const key = `${row.name}|${row.path}`;
207
263
  if (!symbolMap.has(key)) {
208
- symbolMap.set(key, { name: row.name, path: row.path, exports: new Set() });
264
+ symbolMap.set(key, {
265
+ name: row.name,
266
+ path: row.path,
267
+ exports: new Set(),
268
+ });
209
269
  }
210
270
  symbolMap.get(key).exports.add(exportName);
211
271
  }
@@ -223,7 +283,11 @@ export async function indexProject(opts) {
223
283
  for (const row of rows) {
224
284
  const key = `${row.name}|${row.path}`;
225
285
  if (!symbolMap.has(key)) {
226
- symbolMap.set(key, { name: row.name, path: row.path, exports: new Set() });
286
+ symbolMap.set(key, {
287
+ name: row.name,
288
+ path: row.path,
289
+ exports: new Set(),
290
+ });
227
291
  }
228
292
  symbolMap.get(key).exports.add(row.name);
229
293
  }
@@ -248,7 +312,6 @@ export async function indexProject(opts) {
248
312
  * 分析调用关系,填充每个符号的 meta.callers 和 meta.callees
249
313
  */
250
314
  function analyzeRelations(project, symbolMap, projectRoot, rows) {
251
- // 构造快速查找:exportName -> [ {name, path} ]
252
315
  const exportToSymbol = new Map();
253
316
  for (const [key, value] of symbolMap) {
254
317
  for (const exp of value.exports) {
@@ -257,52 +320,73 @@ function analyzeRelations(project, symbolMap, projectRoot, rows) {
257
320
  exportToSymbol.set(exp, list);
258
321
  }
259
322
  }
260
- // 收集所有 callers callees
261
- const callersMap = new Map(); // key = "name|path" -> set of callers
262
- const calleesMap = new Map(); // key = "name|path" -> set of callees
323
+ const callersMap = new Map();
324
+ const calleesMap = new Map();
263
325
  for (const sf of project.getSourceFiles()) {
264
326
  const filePath = sf.getFilePath();
265
327
  const relPath = getRelativePathForDisplay(projectRoot, filePath);
266
- // 获取当前文件导出的符号
267
328
  const exported = sf.getExportedDeclarations();
268
- const fileExportNames = new Set();
269
- for (const [name, decls] of exported) {
270
- fileExportNames.add(name);
271
- }
272
- // 遍历 AST 查找调用
273
329
  sf.forEachDescendant((node) => {
274
- // 1. 函数调用
330
+ // 函数调用
275
331
  if (Node.isCallExpression(node)) {
276
332
  const expr = node.getExpression();
277
333
  const name = Node.isIdentifier(expr) ? expr.getText() : null;
278
334
  if (name && exportToSymbol.has(name)) {
279
335
  const targets = exportToSymbol.get(name);
280
- // 当前文件是谁在调用
281
336
  for (const [expName, decls] of exported) {
282
- for (const decl of decls) {
337
+ for (const _decl of decls) {
283
338
  const callerKey = `${expName}|${relPath}`;
284
339
  for (const target of targets) {
285
340
  // callees: 我调用了谁
286
- const calleeSet = calleesMap.get(callerKey) || new Set();
287
- calleeSet.add({ name: target.name, path: target.path });
341
+ const calleeSet = calleesMap.get(callerKey) ||
342
+ new Set();
343
+ calleeSet.add(`${target.name}|${target.path}`); // ✅ 字符串天然去重
288
344
  calleesMap.set(callerKey, calleeSet);
289
- // callers: 谁调用了我
290
345
  const callerSet = callersMap.get(`${target.name}|${target.path}`) || new Set();
291
- callerSet.add({ name: expName, path: relPath });
346
+ callerSet.add(`${expName}|${relPath}`);
292
347
  callersMap.set(`${target.name}|${target.path}`, callerSet);
293
348
  }
294
349
  }
295
350
  }
296
351
  }
297
352
  }
298
- // 2. Import 导入
353
+ // Import 导入关系(含 import type)
299
354
  if (Node.isImportDeclaration(node)) {
300
- const moduleSpec = node.getModuleSpecifier().getText();
301
- // 简单处理:只处理从 ./ 或 ../ 开始的相对导入
302
- if (moduleSpec.startsWith("'.") || moduleSpec.startsWith('".')) {
303
- const importPath = moduleSpec.slice(1, -1); // 去掉引号
304
- // 尝试匹配已索引的符号
305
- // 这里简化处理,暂不展开
355
+ // 1. 解析被导入文件的路径
356
+ const importedSf = node.getModuleSpecifierSourceFile();
357
+ if (!importedSf)
358
+ return; // 无法解析(第三方包等)跳过
359
+ const importedRelPath = getRelativePathForDisplay(projectRoot, importedSf.getFilePath());
360
+ // 2. 收集本条 import 引入了哪些具名符号
361
+ const namedBindings = node.getNamedImports(); // { SymbolRepository }
362
+ const defaultImport = node.getDefaultImport(); // import Foo from ...
363
+ const importedNames = [
364
+ ...namedBindings.map((n) => n.getName()),
365
+ ...(defaultImport ? [defaultImport.getText()] : []),
366
+ ];
367
+ if (importedNames.length === 0)
368
+ return;
369
+ // 3. 当前文件的所有导出符号作为 caller
370
+ for (const [expName] of exported) {
371
+ const callerKey = `${expName}|${relPath}`;
372
+ for (const importedName of importedNames) {
373
+ // 4. 在 symbolMap 里找到被导入符号对应的 row
374
+ // key 格式:`name|path`,name 是 resolveExportName 后的真实名
375
+ // exportToSymbol 里存的是 exportName(可能是 'default'),
376
+ // 所以同时按 importedName 和路径在 symbolMap 里直接查
377
+ const targetKey = `${importedName}|${importedRelPath}`;
378
+ const target = symbolMap.get(targetKey);
379
+ if (!target)
380
+ continue;
381
+ // callee:当前文件的导出符号引用了 target
382
+ const calleeSet = calleesMap.get(callerKey) || new Set();
383
+ calleeSet.add(`${target.name}|${target.path}`);
384
+ calleesMap.set(callerKey, calleeSet);
385
+ const callerRefKey = `${target.name}|${target.path}`;
386
+ const callerSet = callersMap.get(callerRefKey) || new Set();
387
+ callerSet.add(`${expName}|${relPath}`);
388
+ callersMap.set(callerRefKey, callerSet);
389
+ }
306
390
  }
307
391
  }
308
392
  });
@@ -312,13 +396,19 @@ function analyzeRelations(project, symbolMap, projectRoot, rows) {
312
396
  const key = `${row.name}|${row.path}`;
313
397
  const callers = callersMap.get(key);
314
398
  const callees = calleesMap.get(key);
315
- if (callers && callers.size > 0) {
316
- row.meta = row.meta || {};
317
- row.meta.callers = [...callers].slice(0, 20); // 限制数量
399
+ if (callers?.size) {
400
+ row.meta.callers = [...callers].slice(0, CALLERS_LIMIT).map((s) => {
401
+ // 反序列化回对象
402
+ const [name, ...pathParts] = s.split('|');
403
+ return { name, path: pathParts.join('|') };
404
+ });
318
405
  }
319
- if (callees && callees.size > 0) {
320
- row.meta = row.meta || {};
321
- row.meta.callees = [...callees].slice(0, 20);
406
+ if (callees?.size) {
407
+ row.meta.callees = [...callees].slice(0, CALLERS_LIMIT).map((s) => {
408
+ const [name, ...pathParts] = s.split('|');
409
+ // path:为了防止路径里万一含有 | 字符时截断错误。
410
+ return { name, path: pathParts.join('|') };
411
+ });
322
412
  }
323
413
  }
324
414
  console.error(`[analyzeRelations] processed ${rows.length} symbols`);
@@ -0,0 +1,201 @@
1
+ /**
2
+ * astNormalizerJs.ts
3
+ *
4
+ * JS/JSX 文件的语义签名提取,对应 ts-morph 版的 extractNormalizedSignature。
5
+ * Babel AST 没有类型信息,通过以下策略补偿:
6
+ * 1. 有 JSDoc → 从 @param / @returns 提取类型
7
+ * 2. 无 JSDoc → 参数类型标记为 $unknown,签名仍可区分结构形态
8
+ *
9
+ * 与 TS 版行为对齐:
10
+ * - 参数名 → 丢弃,替换为 $p
11
+ * - 函数体 → 替换为 {}
12
+ * - 默认值存在性保留(=$default),默认值内容丢弃
13
+ * - rest 参数保留 ... 前缀
14
+ * - 解构参数保留结构形态({} / []),无法展开内部字段
15
+ */
16
+ import { createHash } from 'node:crypto';
17
+ import * as t from '@babel/types';
18
+ // ─────────────────────────────────────────────
19
+ // JSDoc 类型提取
20
+ // ─────────────────────────────────────────────
21
+ function getJSDoc(node) {
22
+ const comments = node.leadingComments;
23
+ if (!comments || comments.length === 0)
24
+ return null;
25
+ const jsdoc = comments.find((c) => c.value.includes('@param') || c.value.includes('@returns'));
26
+ return jsdoc ? jsdoc.value : null;
27
+ }
28
+ function parseJSDoc(jsdoc) {
29
+ const params = {};
30
+ let returnType;
31
+ const paramRegex = /@param\s+\{([^}]+)\}\s+(\w+)/g;
32
+ const returnRegex = /@returns?\s+\{([^}]+)\}/;
33
+ let match;
34
+ while ((match = paramRegex.exec(jsdoc))) {
35
+ const [, type, name] = match;
36
+ params[name] = normalizeType(type);
37
+ }
38
+ const returnMatch = jsdoc.match(returnRegex);
39
+ if (returnMatch) {
40
+ returnType = normalizeType(returnMatch[1]);
41
+ }
42
+ return { params, returnType };
43
+ }
44
+ function normalizeType(type) {
45
+ type = type.trim().toLowerCase();
46
+ if (type.includes('string'))
47
+ return 'string';
48
+ if (type.includes('number'))
49
+ return 'number';
50
+ if (type.includes('boolean'))
51
+ return 'boolean';
52
+ if (type.includes('array'))
53
+ return 'array';
54
+ if (type.includes('object'))
55
+ return 'object';
56
+ if (type.includes('promise')) {
57
+ const inner = type.match(/promise<(.+)>/);
58
+ return inner ? `Promise<${normalizeType(inner[1])}>` : 'Promise';
59
+ }
60
+ return 'unknown';
61
+ }
62
+ function normalizeParam(param, fallbackName, jsdoc) {
63
+ if (param.type === 'Identifier') {
64
+ const name = param.name;
65
+ const type = jsdoc?.params[name] ?? 'unknown';
66
+ return `${name}:${type}`;
67
+ }
68
+ if (param.type === 'ObjectPattern') {
69
+ const props = param.properties.map((p) => {
70
+ if (p.key?.name) {
71
+ const name = p.key.name;
72
+ const type = jsdoc?.params[name] ?? 'unknown';
73
+ return `${name}:${type}`;
74
+ }
75
+ return 'unknown';
76
+ });
77
+ return `{${props.sort().join(';')}}`;
78
+ }
79
+ return `${fallbackName}:unknown`;
80
+ }
81
+ function inferTypeFromExpression(expr) {
82
+ if (t.isStringLiteral(expr))
83
+ return 'string';
84
+ if (t.isNumericLiteral(expr))
85
+ return 'number';
86
+ if (t.isBooleanLiteral(expr))
87
+ return 'boolean';
88
+ if (t.isArrayExpression(expr))
89
+ return 'array';
90
+ if (t.isObjectExpression(expr))
91
+ return 'object';
92
+ return 'unknown';
93
+ }
94
+ function inferReturnType(node) {
95
+ if (!node.body)
96
+ return 'unknown';
97
+ if (t.isBlockStatement(node.body)) {
98
+ const returnStmt = node.body.body.find(t.isReturnStatement);
99
+ if (returnStmt && returnStmt.argument) {
100
+ return inferTypeFromExpression(returnStmt.argument);
101
+ }
102
+ }
103
+ if (!t.isBlockStatement(node.body)) {
104
+ return inferTypeFromExpression(node.body);
105
+ }
106
+ return 'unknown';
107
+ }
108
+ function normalizeFunction(node) {
109
+ const jsdocText = getJSDoc(node);
110
+ const jsdoc = jsdocText ? parseJSDoc(jsdocText) : undefined;
111
+ const params = node.params.map((p, i) => normalizeParam(p, `$p${i}`, jsdoc));
112
+ const returnType = jsdoc?.returnType ?? inferReturnType(node);
113
+ return `fn(${params.join(',')})=>${returnType}`;
114
+ }
115
+ function normalizeArrowFunction(node) {
116
+ const jsdocText = getJSDoc(node);
117
+ const jsdoc = jsdocText ? parseJSDoc(jsdocText) : undefined;
118
+ const params = node.params.map((p, i) => normalizeParam(p, `$p${i}`, jsdoc));
119
+ // const params = node.params.map((p, i) => normalizeParam(p, `$p${i}`));
120
+ const returnType = jsdoc?.returnType ?? inferReturnType(node);
121
+ return `fn(${params.join(',')})=>${returnType}`;
122
+ }
123
+ function normalizeClass(node) {
124
+ const methods = node.body.body
125
+ .filter(t.isClassMethod)
126
+ .map((m) => {
127
+ const name = t.isIdentifier(m.key) ? m.key.name : 'unknown';
128
+ return `${name}():unknown`;
129
+ })
130
+ .sort();
131
+ return `class{${methods.join(';')}}`;
132
+ }
133
+ function traverseNode(node, cb) {
134
+ cb(node);
135
+ for (const key in node) {
136
+ const value = node[key];
137
+ if (Array.isArray(value)) {
138
+ value.forEach((v) => {
139
+ if (v && typeof v.type === 'string') {
140
+ traverseNode(v, cb);
141
+ }
142
+ });
143
+ }
144
+ else if (value && typeof value.type === 'string') {
145
+ traverseNode(value, cb);
146
+ }
147
+ }
148
+ }
149
+ export function inferBehaviorFromJS(node) {
150
+ const behavior = [];
151
+ // 遍历 AST
152
+ traverseNode(node, (n) => {
153
+ // fetch / axios
154
+ if (t.isCallExpression(n) && t.isIdentifier(n.callee)) {
155
+ const name = n.callee.name.toLowerCase();
156
+ if (name.includes('fetch') || name.includes('axios')) {
157
+ behavior.push('performs network request');
158
+ }
159
+ if (name.includes('settimeout')) {
160
+ behavior.push('uses timer');
161
+ }
162
+ }
163
+ // localStorage
164
+ if (t.isMemberExpression(n) &&
165
+ t.isIdentifier(n.object) &&
166
+ n.object.name === 'localStorage') {
167
+ behavior.push('uses local storage');
168
+ }
169
+ });
170
+ return Array.from(new Set(behavior));
171
+ }
172
+ export function extractNormalizedSignatureJS(node) {
173
+ if (t.isFunctionDeclaration(node) || t.isFunctionExpression(node)) {
174
+ return normalizeFunction(node);
175
+ }
176
+ if (t.isArrowFunctionExpression(node)) {
177
+ return normalizeArrowFunction(node);
178
+ }
179
+ if (t.isClassDeclaration(node)) {
180
+ return normalizeClass(node);
181
+ }
182
+ return '';
183
+ }
184
+ export function computeSemanticHashJs(row) {
185
+ const signature = extractNormalizedSignatureJS(row.node);
186
+ const behavior = inferBehaviorFromJS(row.node);
187
+ const meta = row.meta || {};
188
+ const stable = {
189
+ name: row.name,
190
+ type: row.type,
191
+ description: row.description ?? null,
192
+ signature,
193
+ behavior: behavior.sort(),
194
+ sideEffects: [
195
+ ...(meta.sideEffects ?? []),
196
+ ].sort(),
197
+ hooks: [...(meta.hooks ?? [])].sort(),
198
+ };
199
+ const stableStr = JSON.stringify(stable);
200
+ return [createHash('sha256').update(stableStr).digest('hex'), stableStr];
201
+ }
@@ -1,10 +1,12 @@
1
1
  import { env } from '../config/env.js';
2
- import { getSymbolsTableSQL } from '../db/schema.js';
2
+ import { getAllTableSQLs } from '../db/schema.js';
3
+ import { SYMBOL_STATUS } from '../config/symbolStatus.js';
3
4
  /**
4
5
  * 依赖表上 `(path, name)` 唯一键:新行插入,已存在则更新类型/描述/内容与 meta;**不**修改 `usage_count`。
5
6
  * @param rows 来自 `indexProject`;空数组时立即返回,不开启事务。
6
7
  * @param embeddings 与 `rows` 等长;某项为 `null` 表示本行不更新已有 `embedding`(新行则写入 NULL)。
7
- * @returns Promise 在提交成功时 resolve;任一行失败则整批回滚并抛出异常。
8
+ * - 有值 status 置为 online(2)
9
+ * - null → 新行写 pending(1),已有行保持原 status
8
10
  */
9
11
  export async function upsertSymbols(pool, rows, embeddings) {
10
12
  if (rows.length === 0)
@@ -12,28 +14,48 @@ export async function upsertSymbols(pool, rows, embeddings) {
12
14
  if (embeddings && embeddings.length !== rows.length) {
13
15
  throw new Error('upsertSymbols: embeddings length must match rows');
14
16
  }
15
- const actor = process.env.GITHUB_USERNAME?.trim() || 'LorryIsLuRui';
16
- await pool.query(getSymbolsTableSQL()); // 确保表存在
17
- const sql = `
18
- INSERT INTO ${env.mysqlSymbolsTable} (name, type, category, path, description, content, meta, insert_user, updated_user, embedding)
19
- VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
20
- ON DUPLICATE KEY UPDATE
21
- type = VALUES(type),
22
- category = VALUES(category),
23
- description = VALUES(description),
24
- content = VALUES(content),
25
- meta = VALUES(meta),
26
- updated_user = VALUES(updated_user),
27
- embedding = CASE WHEN VALUES(embedding) IS NOT NULL THEN VALUES(embedding) ELSE embedding END
28
- `;
29
- const conn = await pool.getConnection();
17
+ const actor = process.env.GITHUB_USERNAME?.trim() || 'system';
18
+ const client = await pool.connect();
30
19
  try {
31
- await conn.beginTransaction();
20
+ // 确保 extension + 表 + 基础索引存在
21
+ for (const sql of getAllTableSQLs()) {
22
+ await client.query(sql);
23
+ }
24
+ await client.query('BEGIN');
25
+ const t = env.symbolsTable;
26
+ const sql = `
27
+ INSERT INTO ${t}
28
+ (name, type, category, path, description, content, meta,
29
+ insert_user, updated_user, embedding, semantic_hash, file_hash, status)
30
+ VALUES ($1, $2, $3, $4, $5, $6, $7::jsonb, $8, $9, $10::vector, $11, $12, $13)
31
+ ON CONFLICT (path, name) DO UPDATE SET
32
+ type = EXCLUDED.type,
33
+ category = EXCLUDED.category,
34
+ description = EXCLUDED.description,
35
+ content = EXCLUDED.content,
36
+ meta = EXCLUDED.meta,
37
+ updated_user = EXCLUDED.updated_user,
38
+ embedding = CASE
39
+ WHEN EXCLUDED.embedding IS NOT NULL THEN EXCLUDED.embedding
40
+ WHEN EXCLUDED.semantic_hash != ${t}.semantic_hash THEN NULL
41
+ ELSE ${t}.embedding
42
+ END,
43
+ semantic_hash = EXCLUDED.semantic_hash,
44
+ file_hash = EXCLUDED.file_hash,
45
+ status = CASE
46
+ WHEN EXCLUDED.embedding IS NOT NULL THEN ${SYMBOL_STATUS.ONLINE}
47
+ WHEN EXCLUDED.semantic_hash != ${t}.semantic_hash THEN ${SYMBOL_STATUS.PENDING}
48
+ ELSE ${t}.status
49
+ END,
50
+ updated_at = NOW()
51
+ `;
32
52
  for (let i = 0; i < rows.length; i++) {
33
53
  const r = rows[i];
34
54
  const emb = embeddings?.[i];
35
- const embJson = emb !== undefined && emb !== null ? JSON.stringify(emb) : null;
36
- await conn.query(sql, [
55
+ // pgvector 接受 "[x1,x2,...]" 格式字符串
56
+ const vecStr = emb != null ? `[${emb.join(',')}]` : null;
57
+ const statusVal = vecStr !== null ? SYMBOL_STATUS.ONLINE : SYMBOL_STATUS.PENDING;
58
+ await client.query(sql, [
37
59
  r.name,
38
60
  r.type,
39
61
  r.category,
@@ -43,16 +65,19 @@ export async function upsertSymbols(pool, rows, embeddings) {
43
65
  JSON.stringify(r.meta),
44
66
  actor,
45
67
  actor,
46
- embJson,
68
+ vecStr, // $10 → cast as vector, null 时写 NULL
69
+ r.semantic_hash,
70
+ r.file_hash,
71
+ statusVal,
47
72
  ]);
48
73
  }
49
- await conn.commit();
74
+ await client.query('COMMIT');
50
75
  }
51
76
  catch (e) {
52
- await conn.rollback();
77
+ await client.query('ROLLBACK');
53
78
  throw e;
54
79
  }
55
80
  finally {
56
- conn.release();
81
+ client.release();
57
82
  }
58
83
  }