@lorrylurui/code-intelligence-mcp 1.1.15 → 2.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,363 @@
1
+ /**
2
+ * tsAstNormalizer.ts
3
+ * 对 ts-morph Node 做语义级标准化,生成 semantic_hash。
4
+ *
5
+ * 去掉:参数名、泛型参数名、函数体实现、空白格式、字面量值
6
+ * 保留:参数类型结构、返回类型、sideEffects、hooks
7
+ */
8
+ import { createHash } from 'node:crypto';
9
+ import { Node, SyntaxKind } from 'ts-morph';
10
+ // ─────────────────────────────────────────────
11
+ // 内置类型白名单:不替换为 $T
12
+ // ─────────────────────────────────────────────
13
+ const BUILTIN_TYPES = new Set([
14
+ 'string',
15
+ 'number',
16
+ 'boolean',
17
+ 'void',
18
+ 'null',
19
+ 'undefined',
20
+ 'never',
21
+ 'unknown',
22
+ 'any',
23
+ 'object',
24
+ 'symbol',
25
+ 'bigint',
26
+ 'Promise',
27
+ 'Array',
28
+ 'Record',
29
+ 'Map',
30
+ 'Set',
31
+ 'WeakMap',
32
+ 'WeakSet',
33
+ 'Partial',
34
+ 'Required',
35
+ 'Readonly',
36
+ 'Pick',
37
+ 'Omit',
38
+ 'Exclude',
39
+ 'Extract',
40
+ 'NonNullable',
41
+ 'ReturnType',
42
+ 'InstanceType',
43
+ 'React',
44
+ 'ReactNode',
45
+ 'ReactElement',
46
+ 'FC',
47
+ 'MouseEvent',
48
+ 'KeyboardEvent',
49
+ 'ChangeEvent',
50
+ 'HTMLElement',
51
+ 'HTMLDivElement',
52
+ 'HTMLInputElement',
53
+ 'CSSProperties',
54
+ 'RefObject',
55
+ 'MutableRefObject',
56
+ ]);
57
+ function normalizeTypeName(name) {
58
+ if (BUILTIN_TYPES.has(name))
59
+ return name;
60
+ if (/^T[A-Z]/.test(name) || (name.length === 1 && /[A-Z]/.test(name)))
61
+ return '$T';
62
+ return name;
63
+ }
64
+ function normalizeTypeString(typeStr) {
65
+ return typeStr
66
+ .replace(/\b([A-Z][A-Za-z0-9]*)\b/g, (match) => normalizeTypeName(match))
67
+ .replace(/\s+/g, ' ')
68
+ .trim();
69
+ }
70
+ // 从类型节点提取属性列表(递归处理嵌套对象)
71
+ function extractPropertiesFromType(typeNode) {
72
+ const props = [];
73
+ // 处理 { a: string, b: number } 这种字面量对象类型
74
+ if (Node.isTypeLiteral(typeNode)) {
75
+ for (const member of typeNode.getMembers()) {
76
+ if (Node.isPropertySignature(member)) {
77
+ const name = member.getName();
78
+ const typeNode = member.getTypeNode();
79
+ const typeStr = typeNode
80
+ ? normalizeTypeString(typeNode.getText())
81
+ : '$unknown';
82
+ const optional = member.hasQuestionToken() ? '?' : '';
83
+ props.push(`${name}${optional}:${typeStr}`);
84
+ }
85
+ }
86
+ }
87
+ // 处理交叉类型 A & B
88
+ if (Node.isIntersectionTypeNode(typeNode)) {
89
+ for (const member of typeNode.getChildren()) {
90
+ if (Node.isTypeLiteral(member)) {
91
+ props.push(...extractPropertiesFromType(member));
92
+ }
93
+ }
94
+ }
95
+ return props;
96
+ }
97
+ // ─────────────────────────────────────────────
98
+ // normalizeNode:递归遍历 AST,输出标准化字符串
99
+ // ─────────────────────────────────────────────
100
+ export function normalizeNode(node) {
101
+ const paramNames = new Map();
102
+ let paramIdx = 0;
103
+ function allocParam(name) {
104
+ if (!paramNames.has(name))
105
+ paramNames.set(name, `$p${paramIdx++}`);
106
+ return paramNames.get(name);
107
+ }
108
+ function visit(n) {
109
+ const kind = n.getKind();
110
+ // 函数体 → {}(不关心实现)
111
+ if (kind === SyntaxKind.Block)
112
+ return '{}';
113
+ // 参数:只保留类型,去参数名
114
+ if (Node.isParameterDeclaration(n)) {
115
+ const nameNode = n.getNameNode();
116
+ const typeNode = n.getTypeNode();
117
+ const typeStr = typeNode
118
+ ? normalizeTypeString(typeNode.getText())
119
+ : '$unknown';
120
+ const prefix = n.isRestParameter() ? '...' : '';
121
+ const suffix = n.hasInitializer() ? '=$default' : '';
122
+ // 解构参数:{ userId, options }: Config → {}:Config
123
+ if (Node.isObjectBindingPattern(nameNode) ||
124
+ Node.isArrayBindingPattern(nameNode)) {
125
+ return `${prefix}{}:${typeStr}${suffix}`;
126
+ }
127
+ allocParam(n.getName());
128
+ return `${prefix}$p:${typeStr}${suffix}`;
129
+ }
130
+ // 泛型参数:T / TData → $T
131
+ if (Node.isTypeParameterDeclaration(n)) {
132
+ const constraint = n.getConstraint();
133
+ return `$T${constraint ? ` extends ${normalizeTypeString(constraint.getText())}` : ''}`;
134
+ }
135
+ // 类型引用:标准化名称
136
+ if (Node.isTypeReference(n))
137
+ return normalizeTypeString(n.getText());
138
+ // JSX:只记录存在
139
+ if (Node.isJsxElement(n) || Node.isJsxSelfClosingElement(n))
140
+ return '<JSX/>';
141
+ // 字面量 → 占位符
142
+ if (kind === SyntaxKind.StringLiteral)
143
+ return '"$s"';
144
+ if (kind === SyntaxKind.NumericLiteral ||
145
+ kind === SyntaxKind.BigIntLiteral)
146
+ return '$n';
147
+ if (kind === SyntaxKind.TrueKeyword || kind === SyntaxKind.FalseKeyword)
148
+ return '$b';
149
+ const children = n.getChildren();
150
+ if (children.length === 0)
151
+ return n.getText();
152
+ return children.map(visit).join('');
153
+ }
154
+ return visit(node).replace(/\s+/g, ' ').trim();
155
+ }
156
+ function normalizeParameter(param, index) {
157
+ const typeNode = param.getTypeNode();
158
+ const typeStr = typeNode
159
+ ? normalizeTypeWithStructure(typeNode)
160
+ : '$unknown';
161
+ const prefix = param.isRestParameter() ? '...' : '';
162
+ const suffix = param.hasInitializer() ? '=$default' : '';
163
+ const nameNode = param.getNameNode();
164
+ // 解构参数:{ id, opts }: Config → { id: $d, opts: $d }:{id:xx, opts:xx}
165
+ if (Node.isObjectBindingPattern(nameNode) ||
166
+ Node.isArrayBindingPattern(nameNode)) {
167
+ const elements = nameNode.getElements();
168
+ const destrProps = [];
169
+ for (const el of elements) {
170
+ if (Node.isOmittedExpression(el))
171
+ continue;
172
+ const elAny = el;
173
+ const name = elAny.getName?.() ?? 'unknown';
174
+ // 检查是否有问号(可选属性)
175
+ const hasQ = el
176
+ .getChildren()
177
+ .some((c) => c.getKind() === SyntaxKind.QuestionToken);
178
+ // 检查是否有默认值,有则使用默认值的类型
179
+ const initializer = el.getInitializer();
180
+ let valueType;
181
+ if (initializer) {
182
+ // 用初始值表达式的类型
183
+ valueType = normalizeTypeWithStructure(initializer);
184
+ }
185
+ else {
186
+ valueType = '$d';
187
+ }
188
+ destrProps.push(`${name}${hasQ ? '?' : ''}:${valueType}`);
189
+ }
190
+ // 排序 key, 防止仅仅因为顺序不一样导致hash不稳定
191
+ destrProps.sort();
192
+ return `${prefix}{${destrProps.join(',')}}:${typeStr}${suffix}`;
193
+ }
194
+ return `${prefix}$p${index}:${typeStr}${suffix}`;
195
+ }
196
+ function normalizeTypeWithStructure(typeNode) {
197
+ // 如果是类型字面量 { name: string, age: number }
198
+ if (Node.isTypeLiteral(typeNode)) {
199
+ const props = typeNode
200
+ .getMembers()
201
+ .filter(Node.isPropertySignature)
202
+ .map((prop) => {
203
+ const propTypeNode = prop.getTypeNode();
204
+ const propType = propTypeNode
205
+ ? normalizeTypeWithStructure(propTypeNode)
206
+ : '$unknown';
207
+ return `${prop.getName()}${prop.hasQuestionToken() ? '?' : ''}:${propType}`;
208
+ });
209
+ return `{${props.join(',')}}`;
210
+ }
211
+ // 如果是类型引用(e.g. Param, Array<string>, Promise<User>)
212
+ if (Node.isTypeReference(typeNode)) {
213
+ const typeName = typeNode.getTypeName().getText();
214
+ // 优先处理泛型参数(不管是否在白名单中)
215
+ const typeArgs = typeNode.getTypeArguments();
216
+ if (typeArgs.length > 0) {
217
+ const normalizedArgs = typeArgs.map((arg) => normalizeTypeWithStructure(arg));
218
+ return `${typeName}<${normalizedArgs.join(',')}>`;
219
+ }
220
+ // 没有泛型参数时,检查是否是基础类型
221
+ if (BUILTIN_TYPES.has(typeName)) {
222
+ return typeName;
223
+ }
224
+ // 无泛型的类型引用,尝试解析实际类型
225
+ try {
226
+ const type = typeNode.getType();
227
+ const symbol = type.getSymbol();
228
+ if (symbol) {
229
+ const declarations = symbol.getDeclarations();
230
+ if (declarations.length > 0) {
231
+ const decl = declarations[0];
232
+ // 如果是接口声明,递归处理
233
+ if (Node.isInterfaceDeclaration(decl)) {
234
+ const props = decl.getProperties().map((prop) => {
235
+ const propTypeNode = prop.getTypeNode();
236
+ const propType = propTypeNode
237
+ ? normalizeTypeWithStructure(propTypeNode)
238
+ : '$unknown';
239
+ return `${prop.getName()}${prop.hasQuestionToken() ? '?' : ''}:${propType}`;
240
+ });
241
+ return `{${props.join(',')}}`;
242
+ }
243
+ // 如果是类型别名
244
+ if (Node.isTypeAliasDeclaration(decl)) {
245
+ const aliasedType = decl.getTypeNode();
246
+ if (aliasedType) {
247
+ return normalizeTypeWithStructure(aliasedType);
248
+ }
249
+ }
250
+ }
251
+ }
252
+ }
253
+ catch (e) {
254
+ // 解析失败,回退到类型名
255
+ }
256
+ return typeName;
257
+ }
258
+ // 其他情况(联合类型、交叉类型、基础类型等)
259
+ return normalizeTypeString(typeNode.getText());
260
+ }
261
+ export function extractNormalizedSignature(node) {
262
+ // 函数声明 / 箭头函数 / 函数表达式
263
+ if (Node.isFunctionDeclaration(node) ||
264
+ Node.isArrowFunction(node) ||
265
+ Node.isFunctionExpression(node)) {
266
+ const typeParams = Node.isFunctionDeclaration(node)
267
+ ? node.getTypeParameters().map((tp) => normalizeNode(tp))
268
+ : [];
269
+ const params = node
270
+ .getParameters()
271
+ .sort()
272
+ .map((p, i) => normalizeParameter(p, i));
273
+ const retNode = node.getReturnTypeNode?.();
274
+ const returnType = retNode
275
+ ? normalizeTypeString(retNode.getText())
276
+ : '$inferred';
277
+ const tpStr = typeParams.length ? `<${typeParams.join(',')}>` : '';
278
+ return `fn${tpStr}(${params.join(',')})=>${returnType}`;
279
+ }
280
+ // 变量声明(const foo = () => {})
281
+ if (Node.isVariableDeclaration(node)) {
282
+ const init = node.getInitializer();
283
+ if (init &&
284
+ (Node.isArrowFunction(init) || Node.isFunctionExpression(init))) {
285
+ return extractNormalizedSignature(init);
286
+ }
287
+ return normalizeNode(node);
288
+ }
289
+ // interface:提取所有属性,模板化名称
290
+ if (Node.isInterfaceDeclaration(node)) {
291
+ const props = node.getProperties().map((prop) => {
292
+ const typeNode = prop.getTypeNode();
293
+ const typeStr = typeNode
294
+ ? normalizeTypeString(typeNode.getText())
295
+ : '$unknown';
296
+ const optional = prop.hasQuestionToken() ? '?' : '';
297
+ return `${prop.getName()}${optional}:${typeStr}`;
298
+ });
299
+ const extendsClause = node.getExtends();
300
+ const extendsStr = extendsClause.length
301
+ ? ` extends ${extendsClause.map((e) => normalizeTypeString(e.getText())).join(',')}`
302
+ : '';
303
+ return `interface{${props.sort().join(';')}}${extendsStr}`;
304
+ }
305
+ // type alias(对象类型):提取结构化信息
306
+ if (Node.isTypeAliasDeclaration(node)) {
307
+ const typeNode = node.getTypeNode();
308
+ if (typeNode) {
309
+ // 处理 type Foo = { ... }
310
+ const props = extractPropertiesFromType(typeNode);
311
+ if (props.length > 0) {
312
+ return `type{${props.sort().join(';')}}`;
313
+ }
314
+ }
315
+ return normalizeNode(node);
316
+ }
317
+ // class:只取方法签名列表
318
+ if (Node.isClassDeclaration(node)) {
319
+ const methods = node
320
+ .getMethods()
321
+ .sort()
322
+ .map((m) => {
323
+ const params = m
324
+ .getParameters()
325
+ .sort()
326
+ .map((p) => normalizeNode(p));
327
+ const retNode = m.getReturnTypeNode();
328
+ const ret = retNode
329
+ ? normalizeTypeString(retNode.getText())
330
+ : '$inferred';
331
+ return `${m.getName()}(${params.sort().join(',')})=>${ret}`;
332
+ });
333
+ return `class{${methods.join(';')}}`;
334
+ }
335
+ return normalizeNode(node);
336
+ }
337
+ // ─────────────────────────────────────────────
338
+ // computeSemanticHash
339
+ // 纳入:标准化签名 + name + type + description + sideEffects + hooks
340
+ // 排除:参数名、实现、格式、callers/callees
341
+ // ─────────────────────────────────────────────
342
+ export function computeSemanticHash(row) {
343
+ const node = row.node || null;
344
+ const meta = row.meta || {};
345
+ const stable = {
346
+ name: row.name,
347
+ type: row.type,
348
+ description: row.description ?? null,
349
+ signature: node ? extractNormalizedSignature(node) : '',
350
+ sideEffects: [
351
+ ...(meta.sideEffects ?? []),
352
+ ].sort(),
353
+ hooks: [...(meta.hooks ?? [])].sort(),
354
+ };
355
+ const stableStr = JSON.stringify(stable);
356
+ return [createHash('sha256').update(stableStr).digest('hex'), stableStr];
357
+ }
358
+ // ─────────────────────────────────────────────
359
+ // computeFileHash:对文件原始内容
360
+ // ─────────────────────────────────────────────
361
+ export function computeFileHash(fileContent) {
362
+ return createHash('sha256').update(fileContent).digest('hex');
363
+ }
@@ -7,7 +7,7 @@ const REUSABLE_CODE_ADVISOR_MARKDOWN = `# 可复用代码推荐
7
7
 
8
8
  当用户需要可复用代码或实现类需求时,按顺序执行:
9
9
 
10
- 1. 调用 search_symbols 检索候选,type 根据用户需求传(component/util/selector/type
10
+ 1. 调用 search_symbols 检索候选,type 根据用户需求传(component/function/hook/class/type/interface);描述功能意图时设置 semantic=true
11
11
  2. 如果用户指定了结构过滤条件(props/params/properties/hooks),额外调用 search_by_structure 做结构匹配
12
12
  3. 先 search_symbols(limit=20) 拉候选,再对 Top 3 调用 get_symbol_detail 做深度判断
13
13
  4. 若仅凭签名/摘要无法判断,对最相关的若干候选调用 get_symbol_detail 获取详情
@@ -16,7 +16,25 @@ const REUSABLE_CODE_ADVISOR_MARKDOWN = `# 可复用代码推荐
16
16
  - **API 是否简单**、入参是否合适
17
17
  - **依赖与副作用**风险
18
18
  - **复用安全性**(稳定性、耦合度、是否便于扩展)
19
- 6. 给出**唯一首选**推荐,并说明理由
19
+ 6. 给出**唯一首选**推荐,并说明理由,同时使用 **AskUserQuestion** 工具,提供两个选项:
20
+ - 采纳推荐
21
+ - 取消
22
+ 7. 用户选择"采纳推荐"后,立即调用 inc_usage 工具记录该行为(symbolId 从搜索结果的 id 字段获取),不要遗漏此步骤。
23
+
24
+ ## 不适用场景
25
+
26
+ 以下情况不要调用搜索工具:
27
+ - 用户只是问代码如何写(概念性问题),不需要检索已有实现
28
+ - 用户明确说"新建一个"、"自己实现"、"不用已有的"
29
+ - 查询过于通用(如只说"utils"),先与用户确认具体需求再搜索
30
+
31
+ ## 搜索结果判断
32
+
33
+ 根据 semanticSimilarity 决定推荐置信度:
34
+ - **> 0.85**:高置信度,可直接推荐
35
+ - **0.6 – 0.85**:中等置信度,需结合 description 和 get_symbol_detail 综合判断
36
+ - **< 0.6**:低置信度,说明可能无合适实现,明确告知用户
37
+ - **空结果**:明确说"未找到已有实现",不要凭空推荐
20
38
 
21
39
  ## 回复结构
22
40
 
@@ -28,6 +46,7 @@ const REUSABLE_CODE_ADVISOR_MARKDOWN = `# 可复用代码推荐
28
46
  - **理由:** 1~3 条要点
29
47
  - **其他候选:** 简要列出及取舍(同步标注副作用)
30
48
  - **用法提示:** 结合用户场景的最小集成说明
49
+ - **是否采纳:** 展示两个选项:选项1. 采纳推荐 选项2. 取消。等待用户确认
31
50
 
32
51
  ## 约束
33
52
 
@@ -35,12 +54,6 @@ const REUSABLE_CODE_ADVISOR_MARKDOWN = `# 可复用代码推荐
35
54
  - 若无合适代码块,明确说明,并给出最接近的选项及差距。
36
55
  - 推理简洁,面向落地实现。
37
56
 
38
- ## 使用反馈
39
-
40
- 当最终采纳了某个推荐代码块后,必须调用 inc_usage 工具记录采纳行为:
41
- “请调用 inc_usage({ symbolId: <选中的代码块 id> })”
42
- 其中 symbolId 从 search_symbols 或 search_by_structure 返回结果的 id 字段获取。这条记录会用于后续排序优化。
43
-
44
57
  ## 更多示例
45
58
 
46
59
  与仓库内 \`.cursor/skills/reusable-code-advisor/examples.md\` 中的示例一致(在 Cursor 或本地打开该文件查看)。
@@ -1,7 +1,9 @@
1
1
  import { env } from '../config/env.js';
2
- import { getMySqlPool } from '../db/mysql.js';
2
+ import { getPool } from '../db/postgres.js';
3
3
  import { createEmbeddingClient } from '../services/embeddingClient.js';
4
- import { cosineSimilarity } from '../services/vectorMath.js';
4
+ import { SEARCHABLE_STATUS } from '../config/symbolStatus.js';
5
+ const SIMILARITY_THRESHOLD = 0.5;
6
+ const TOP_K = 20;
5
7
  const inMemorySymbols = [
6
8
  {
7
9
  id: 1,
@@ -18,7 +20,7 @@ const inMemorySymbols = [
18
20
  {
19
21
  id: 2,
20
22
  name: 'formatDate',
21
- type: 'util',
23
+ type: 'function',
22
24
  category: 'date',
23
25
  path: 'src/utils/date.ts',
24
26
  description: 'Format date to YYYY-MM-DD',
@@ -39,6 +41,7 @@ function parseEmbedding(raw) {
39
41
  }
40
42
  if (typeof raw === 'string') {
41
43
  try {
44
+ // pgvector 返回 "[x1,x2,...]",恰好是合法 JSON 数组
42
45
  const j = JSON.parse(raw);
43
46
  if (!Array.isArray(j))
44
47
  return null;
@@ -82,7 +85,7 @@ function getMetaArray(meta, key) {
82
85
  export class SymbolRepository {
83
86
  pool;
84
87
  constructor() {
85
- this.pool = getMySqlPool();
88
+ this.pool = getPool();
86
89
  }
87
90
  async search(query, type) {
88
91
  if (!this.pool) {
@@ -94,23 +97,27 @@ export class SymbolRepository {
94
97
  (s.description ?? '').toLowerCase().includes(q)));
95
98
  });
96
99
  }
97
- const params = [`%${query}%`];
100
+ const params = [
101
+ `%${query}%`,
102
+ SEARCHABLE_STATUS,
103
+ ];
98
104
  let sql = `
99
- SELECT id, name, type, category, path, description, content, CAST(meta AS CHAR) AS meta, usage_count, created_at
100
- FROM ${env.mysqlSymbolsTable}
101
- WHERE (name LIKE ? OR description LIKE ?)
105
+ SELECT id, name, type, category, path, description, content, meta::text AS meta, usage_count, created_at
106
+ FROM ${env.symbolsTable}
107
+ WHERE (name ILIKE $1 OR description ILIKE $1)
108
+ AND status = $2
102
109
  `;
103
- params.push(`%${query}%`);
104
110
  if (type) {
105
- sql += ' AND type = ?';
106
111
  params.push(type);
112
+ sql += ` AND type = $${params.length}`;
107
113
  }
108
114
  sql += ' ORDER BY usage_count DESC LIMIT 20';
109
- const [rows] = await this.pool.query(sql, params);
115
+ const { rows } = await this.pool.query(sql, params);
110
116
  return rows.map((r) => mapRow(r));
111
117
  }
112
118
  /**
113
- * Phase 5:对自然语言查询做向量检索,返回代码块与余弦相似度(已去掉 embedding 列便于 JSON 输出)。
119
+ * 语义向量检索:将 query 嵌入后用 pgvector <=> 运算符(cosine distance)在数据库内完成相似度排序。
120
+ * 不再需要在 Node 拉取全量向量做内存计算。
114
121
  */
115
122
  async searchSemanticHits(query, opts) {
116
123
  if (!env.embeddingServiceUrl) {
@@ -119,47 +126,47 @@ export class SymbolRepository {
119
126
  if (!this.pool) {
120
127
  return [];
121
128
  }
122
- const candidateLimit = opts?.candidateLimit ?? 3000;
123
- const limit = opts?.limit ?? 20;
124
- const type = opts?.type;
129
+ const limit = opts?.limit ?? TOP_K;
125
130
  const client = createEmbeddingClient(env.embeddingServiceUrl);
126
131
  const [queryVec] = await client.embed([query.trim()]);
127
132
  if (!queryVec?.length) {
128
133
  throw new Error('查询向量为空');
129
134
  }
135
+ // pgvector 向量字面量格式:[x1,x2,...]
136
+ const vecLiteral = `[${queryVec.join(',')}]`;
137
+ const params = [vecLiteral, SEARCHABLE_STATUS];
138
+ // 1 - cosine_distance = cosine_similarity;多取一倍候选后在应用层过阈值
130
139
  let sql = `
131
- SELECT id, name, type, category, path, description, content, CAST(meta AS CHAR) AS meta, usage_count, created_at, embedding
132
- FROM ${env.mysqlSymbolsTable}
140
+ SELECT id, name, type, category, path, description, content, meta::text AS meta,
141
+ usage_count, created_at,
142
+ 1 - (embedding <=> $1::vector) AS similarity
143
+ FROM ${env.symbolsTable}
133
144
  WHERE embedding IS NOT NULL
145
+ AND status = $2
134
146
  `;
135
- const params = [];
136
- if (type) {
137
- sql += ' AND type = ?';
138
- params.push(type);
147
+ if (opts?.type) {
148
+ params.push(opts.type);
149
+ sql += ` AND type = $${params.length}`;
139
150
  }
140
- sql += ' ORDER BY usage_count DESC LIMIT ?';
141
- params.push(candidateLimit);
142
- const [rows] = await this.pool.query(sql, params);
143
- const withVec = rows
144
- .map((r) => mapRow(r, { includeEmbedding: true }))
145
- .filter((s) => s.embedding && s.embedding.length === queryVec.length);
146
- return withVec
147
- .map((s) => {
148
- const sim = cosineSimilarity(queryVec, s.embedding);
149
- const { embedding: _, ...rest } = s;
150
- return { symbol: rest, similarity: sim };
151
- })
152
- .sort((a, b) => b.similarity - a.similarity)
151
+ params.push(limit * 2); // 多取一倍以便 SIMILARITY_THRESHOLD 过滤后仍有足量结果
152
+ sql += ` ORDER BY embedding <=> $1::vector LIMIT $${params.length}`;
153
+ const { rows } = await this.pool.query(sql, params);
154
+ return rows
155
+ .map((r) => ({
156
+ symbol: mapRow(r),
157
+ similarity: Number(r.similarity),
158
+ }))
159
+ .filter((x) => x.similarity >= SIMILARITY_THRESHOLD)
153
160
  .slice(0, limit);
154
161
  }
155
162
  async getByName(name) {
156
163
  if (!this.pool) {
157
164
  return (inMemorySymbols.find((s) => s.name.toLowerCase() === name.toLowerCase()) ?? null);
158
165
  }
159
- const [rows] = await this.pool.query(`
160
- SELECT id, name, type, category, path, description, content, CAST(meta AS CHAR) AS meta, usage_count, created_at
161
- FROM ${env.mysqlSymbolsTable}
162
- WHERE name = ?
166
+ const { rows } = await this.pool.query(`
167
+ SELECT id, name, type, category, path, description, content, meta::text AS meta, usage_count, created_at
168
+ FROM ${env.symbolsTable}
169
+ WHERE name = $1
163
170
  LIMIT 1
164
171
  `, [name]);
165
172
  if (rows.length === 0) {
@@ -180,8 +187,8 @@ export class SymbolRepository {
180
187
  }
181
188
  return false;
182
189
  }
183
- const [result] = await this.pool.query(`UPDATE ${env.mysqlSymbolsTable} SET usage_count = usage_count + 1 WHERE id = ?`, [symbolId]);
184
- return result.affectedRows > 0;
190
+ const result = await this.pool.query(`UPDATE ${env.symbolsTable} SET usage_count = usage_count + 1 WHERE id = $1`, [symbolId]);
191
+ return result.rowCount !== null && result.rowCount > 0;
185
192
  }
186
193
  async searchByStructure(fields, opts) {
187
194
  const normalized = fields.map((f) => f.trim()).filter(Boolean);
@@ -212,21 +219,21 @@ export class SymbolRepository {
212
219
  }
213
220
  const params = [];
214
221
  let sql = `
215
- SELECT id, name, type, category, path, description, content, CAST(meta AS CHAR) AS meta, usage_count, created_at
216
- FROM ${env.mysqlSymbolsTable}
222
+ SELECT id, name, type, category, path, description, content, meta::text AS meta, usage_count, created_at
223
+ FROM ${env.symbolsTable}
217
224
  WHERE 1 = 1
218
225
  `;
219
226
  if (type) {
220
- sql += ' AND type = ?';
221
227
  params.push(type);
228
+ sql += ` AND type = $${params.length}`;
222
229
  }
223
230
  if (category) {
224
- sql += ' AND category LIKE ?';
225
231
  params.push(`%${category}%`);
232
+ sql += ` AND category ILIKE $${params.length}`;
226
233
  }
227
- sql += ' ORDER BY usage_count DESC LIMIT ?';
228
234
  params.push(Math.max(limit * 5, 50));
229
- const [rows] = await this.pool.query(sql, params);
235
+ sql += ` ORDER BY usage_count DESC LIMIT $${params.length}`;
236
+ const { rows } = await this.pool.query(sql, params);
230
237
  return rows
231
238
  .map((r) => mapRow(r))
232
239
  .filter(matchesAll)
@@ -0,0 +1,57 @@
1
+ /**
2
+ * BullMQ embedding 队列 producer。
3
+ *
4
+ * 去重策略:
5
+ * - 同一 CI run 内:ci-index.ts 用 new Set(hashes) 去重后再入队,Redis 层无需 jobId 去重
6
+ * - 跨 CI run 的向量缓存:由 worker 查询 DB(status=online AND semantic_hash=?)决定是否调 API
7
+ * - 不使用 jobId,避免 BullMQ completed 状态残留导致后续 run 任务被跳过
8
+ *
9
+ * CI 流程只负责 enqueue,worker 异步消费,CI 不阻塞。
10
+ * 调用方在进程退出前需调用 closeEmbeddingQueue() 释放连接。
11
+ */
12
+ import { Queue } from 'bullmq';
13
+ import { Redis } from 'ioredis';
14
+ import { env } from '../config/env.js';
15
+ let _queue = null;
16
+ let _connection = null;
17
+ function getQueue() {
18
+ if (!_queue) {
19
+ _connection = new Redis(env.redisUrl, {
20
+ maxRetriesPerRequest: null, // BullMQ required
21
+ enableReadyCheck: false,
22
+ });
23
+ _queue = new Queue('embedding', { connection: _connection });
24
+ }
25
+ return _queue;
26
+ }
27
+ /** 单个 semanticHash 入队 */
28
+ export async function enqueueEmbedding(semanticHash, symbolsTable) {
29
+ await getQueue().add('embed', { semanticHash, symbolsTable: symbolsTable ?? env.symbolsTable }, {
30
+ attempts: 5,
31
+ backoff: { type: 'exponential', delay: 5_000 },
32
+ });
33
+ }
34
+ /**
35
+ * 批量入队(同一 CI run 内已由调用方 new Set 去重)。
36
+ * worker 消费时查 DB 决定是否真正调 embedding API。
37
+ */
38
+ export async function enqueueEmbeddingBatch(semanticHashes, symbolsTable) {
39
+ const table = symbolsTable ?? env.symbolsTable;
40
+ const queue = getQueue();
41
+ const jobs = semanticHashes.map((hash) => ({
42
+ name: 'embed',
43
+ data: { semanticHash: hash, symbolsTable: table },
44
+ opts: {
45
+ attempts: 5,
46
+ backoff: { type: 'exponential', delay: 5_000 },
47
+ },
48
+ }));
49
+ await queue.addBulk(jobs);
50
+ }
51
+ /** 进程退出前关闭连接(CI 脚本必须调用,否则进程挂起) */
52
+ export async function closeEmbeddingQueue() {
53
+ await _queue?.close();
54
+ await _connection?.quit();
55
+ _queue = null;
56
+ _connection = null;
57
+ }