@lorrylurui/code-intelligence-mcp 1.1.14 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +91 -598
- package/dist/cli/ci-index-cli.js +66 -0
- package/dist/cli/ci-index.js +80 -0
- package/dist/cli/detect-duplicates.js +1 -6
- package/dist/cli/embedding-worker-cli.js +35 -0
- package/dist/cli/index-codebase.js +6 -7
- package/dist/config/env.js +3 -102
- package/dist/config/symbolStatus.js +8 -0
- package/dist/db/mysql.js +3 -6
- package/dist/db/schema.js +9 -2
- package/dist/indexer/astNormalizer.js +201 -0
- package/dist/indexer/babelParser.js +257 -28
- package/dist/indexer/categoryClassifier.js +129 -0
- package/dist/indexer/embedText.js +9 -7
- package/dist/indexer/extractMeta.js +7 -2
- package/dist/indexer/heuristics.js +42 -23
- package/dist/indexer/indexProject.js +145 -55
- package/dist/indexer/jsAstNormalizer.js +201 -0
- package/dist/indexer/persistSymbols.js +7 -3
- package/dist/indexer/tsAstNormalizer.js +363 -0
- package/dist/prompts/reusableCodeAdvisorPrompt.js +6 -3
- package/dist/repositories/symbolRepository.js +81 -7
- package/dist/services/embeddingQueue.js +56 -0
- package/dist/services/reindex.js +12 -9
- package/dist/tools/searchByStructure.js +3 -1
- package/dist/tools/searchSymbols.js +14 -3
- package/dist/workers/embeddingWorker.js +100 -0
- package/package.json +7 -4
|
@@ -2,12 +2,16 @@ import { z } from 'zod';
|
|
|
2
2
|
import { rankSemanticHits, rankSymbols } from '../services/ranking.js';
|
|
3
3
|
export const searchSymbolsInput = z.object({
|
|
4
4
|
query: z.string().min(1),
|
|
5
|
-
type: z
|
|
5
|
+
type: z
|
|
6
|
+
.enum(['component', 'function', 'hook', 'type', 'interface', 'class'])
|
|
7
|
+
.optional(),
|
|
6
8
|
ranked: z.boolean().optional().default(true),
|
|
7
9
|
/** Phase 5:自然语言 / 描述句检索(需 EMBEDDING_SERVICE_URL + 索引已写入 embedding) */
|
|
8
10
|
semantic: z.boolean().optional().default(false),
|
|
9
11
|
limit: z.number().int().min(1).max(100).optional().default(20),
|
|
10
12
|
});
|
|
13
|
+
const THREADHOLD_SIMILARITY_FOR_FINAL = 0.6;
|
|
14
|
+
const TOP_K_FOR_FINAL_RESULTS = 20; // 结果上限,返回相似度高的,保证数据质量
|
|
11
15
|
export function createSearchSymbolsTool(repository) {
|
|
12
16
|
return {
|
|
13
17
|
name: 'search_symbols',
|
|
@@ -21,7 +25,8 @@ export function createSearchSymbolsTool(repository) {
|
|
|
21
25
|
});
|
|
22
26
|
const simById = new Map(hits.map((h) => [h.symbol.id, h.similarity]));
|
|
23
27
|
const resultRows = input.ranked
|
|
24
|
-
? rankSemanticHits(hits)
|
|
28
|
+
? rankSemanticHits(hits)
|
|
29
|
+
.map((item) => ({
|
|
25
30
|
id: item.symbol.id,
|
|
26
31
|
name: item.symbol.name,
|
|
27
32
|
type: item.symbol.type,
|
|
@@ -33,6 +38,9 @@ export function createSearchSymbolsTool(repository) {
|
|
|
33
38
|
reasonDetail: item.reason,
|
|
34
39
|
semanticSimilarity: Number((simById.get(item.symbol.id) ?? 0).toFixed(4)),
|
|
35
40
|
}))
|
|
41
|
+
.filter((x) => x.semanticSimilarity >=
|
|
42
|
+
THREADHOLD_SIMILARITY_FOR_FINAL) // 阈值过滤,去掉明显不相关的结果
|
|
43
|
+
.slice(0, TOP_K_FOR_FINAL_RESULTS)
|
|
36
44
|
: hits.map((h) => ({
|
|
37
45
|
id: h.symbol.id,
|
|
38
46
|
name: h.symbol.name,
|
|
@@ -53,7 +61,8 @@ export function createSearchSymbolsTool(repository) {
|
|
|
53
61
|
}
|
|
54
62
|
const rows = await repository.search(input.query, input.type);
|
|
55
63
|
const resultRows = input.ranked
|
|
56
|
-
? rankSymbols(input.query, rows)
|
|
64
|
+
? rankSymbols(input.query, rows)
|
|
65
|
+
.map((item) => ({
|
|
57
66
|
id: item.symbol.id,
|
|
58
67
|
name: item.symbol.name,
|
|
59
68
|
type: item.symbol.type,
|
|
@@ -64,6 +73,8 @@ export function createSearchSymbolsTool(repository) {
|
|
|
64
73
|
reason: item.reason.summary,
|
|
65
74
|
reasonDetail: item.reason,
|
|
66
75
|
}))
|
|
76
|
+
.filter((x) => x.score >= THREADHOLD_SIMILARITY_FOR_FINAL) // 阈值过滤,去掉明显不相关的结果
|
|
77
|
+
.slice(0, TOP_K_FOR_FINAL_RESULTS)
|
|
67
78
|
: rows.map((r) => ({
|
|
68
79
|
id: r.id,
|
|
69
80
|
name: r.name,
|
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* BullMQ embedding worker(常驻消费进程)。
|
|
3
|
+
*
|
|
4
|
+
* 流程:
|
|
5
|
+
* 1. 收到 job { semanticHash }
|
|
6
|
+
* 2. 查 semantic_hash 缓存:若已有 status=online 的符号带 embedding → 直接复用(0 次 API 调用)
|
|
7
|
+
* 3. 缓存未命中 → 取一条 pending 行构建语义文本 → 调 embedding API
|
|
8
|
+
* 4. 批量 UPDATE:所有 semantic_hash 相同且 status=pending 的行一次性写入向量并置 online
|
|
9
|
+
*
|
|
10
|
+
* 并发/限流:
|
|
11
|
+
* - concurrency 控制同时处理的 job 数(默认 5)
|
|
12
|
+
* - BullMQ limiter 控制全局 RPM(默认 100/min,留 buffer 低于 OpenAI 3000 RPM)
|
|
13
|
+
*
|
|
14
|
+
* 大仓分片:
|
|
15
|
+
* - 直接启动多个 worker 进程(同一 Redis)即可水平扩展,BullMQ 原生分布式协调
|
|
16
|
+
*/
|
|
17
|
+
import { Worker } from 'bullmq';
|
|
18
|
+
import Redis from 'ioredis';
|
|
19
|
+
import { env } from '../config/env.js';
|
|
20
|
+
import { getMySqlPool } from '../db/mysql.js';
|
|
21
|
+
import { createEmbeddingClient } from './embeddingClient.js';
|
|
22
|
+
import { indexedRowToEmbedText } from '../indexer/embedText.js';
|
|
23
|
+
import { SYMBOL_STATUS } from '../config/symbolStatus.js';
|
|
24
|
+
async function processEmbedJob(job, pool) {
|
|
25
|
+
const { semanticHash } = job.data;
|
|
26
|
+
const table = env.mysqlSymbolsTable;
|
|
27
|
+
const embedClient = createEmbeddingClient(env.embeddingServiceUrl);
|
|
28
|
+
// Step 1: 缓存命中检查 —— 相同 semantic_hash 已有 online 向量
|
|
29
|
+
const [cached] = await pool.query(`SELECT embedding FROM ${table}
|
|
30
|
+
WHERE semantic_hash = ? AND status = ? AND embedding IS NOT NULL
|
|
31
|
+
LIMIT 1`, [semanticHash, SYMBOL_STATUS.ONLINE]);
|
|
32
|
+
let vector;
|
|
33
|
+
if (cached.length > 0) {
|
|
34
|
+
// Cache hit: 直接复用已有向量,0 次 API 调用
|
|
35
|
+
vector =
|
|
36
|
+
typeof cached[0].embedding === 'string'
|
|
37
|
+
? JSON.parse(cached[0].embedding)
|
|
38
|
+
: cached[0].embedding;
|
|
39
|
+
console.error(`[worker] cache hit hash=${semanticHash.slice(0, 10)}…`);
|
|
40
|
+
}
|
|
41
|
+
else {
|
|
42
|
+
// Cache miss: 取一条 pending 行做 embedding
|
|
43
|
+
const [pending] = await pool.query(`SELECT name, type, category, path, description, content, meta
|
|
44
|
+
FROM ${table}
|
|
45
|
+
WHERE semantic_hash = ? AND status = ?
|
|
46
|
+
LIMIT 1`, [semanticHash, SYMBOL_STATUS.PENDING]);
|
|
47
|
+
if (pending.length === 0) {
|
|
48
|
+
// 所有行已被并发 worker 处理,幂等退出
|
|
49
|
+
return;
|
|
50
|
+
}
|
|
51
|
+
const row = pending[0];
|
|
52
|
+
const meta = typeof row.meta === 'string'
|
|
53
|
+
? JSON.parse(row.meta)
|
|
54
|
+
: (row.meta ?? {});
|
|
55
|
+
const doc = indexedRowToEmbedText({ ...row, meta });
|
|
56
|
+
const vectors = await embedClient.embed([doc]);
|
|
57
|
+
vector = vectors[0];
|
|
58
|
+
console.error(`[worker] embedded hash=${semanticHash.slice(0, 10)}… path=${row.path}:${row.name}`);
|
|
59
|
+
}
|
|
60
|
+
// Step 2: 批量写入 —— 批量更新所有拥有相同 semantic_hash 的 pending 行
|
|
61
|
+
// 一次 API 调用覆盖 N 个同义符号(大仓重复代码/多文件同函数场景收益明显)
|
|
62
|
+
await pool.query(`UPDATE ${table}
|
|
63
|
+
SET embedding = CAST(? AS JSON), status = ?
|
|
64
|
+
WHERE semantic_hash = ? AND status = ?`, [
|
|
65
|
+
JSON.stringify(vector),
|
|
66
|
+
SYMBOL_STATUS.ONLINE,
|
|
67
|
+
semanticHash,
|
|
68
|
+
SYMBOL_STATUS.PENDING,
|
|
69
|
+
]);
|
|
70
|
+
}
|
|
71
|
+
/**
|
|
72
|
+
* 启动 embedding worker,返回 Worker 实例(可用于优雅关闭)。
|
|
73
|
+
*/
|
|
74
|
+
export function startEmbeddingWorker(opts = {}) {
|
|
75
|
+
const { concurrency = 5, rpmLimit = 100 } = opts;
|
|
76
|
+
const connection = new Redis(env.redisUrl, {
|
|
77
|
+
maxRetriesPerRequest: null,
|
|
78
|
+
enableReadyCheck: false,
|
|
79
|
+
});
|
|
80
|
+
const pool = getMySqlPool();
|
|
81
|
+
if (!pool) {
|
|
82
|
+
throw new Error('[embeddingWorker] MySQL pool unavailable — check env vars');
|
|
83
|
+
}
|
|
84
|
+
const worker = new Worker('embedding', (job) => processEmbedJob(job, pool), {
|
|
85
|
+
connection,
|
|
86
|
+
concurrency,
|
|
87
|
+
// 全局限流:所有 worker 进程共享,防止触发 OpenAI rate limit
|
|
88
|
+
limiter: { max: rpmLimit, duration: 60_000 },
|
|
89
|
+
});
|
|
90
|
+
worker.on('completed', (job) => {
|
|
91
|
+
console.error(`[worker] ✓ job done hash=${job.data.semanticHash.slice(0, 10)}…`);
|
|
92
|
+
});
|
|
93
|
+
worker.on('failed', (job, err) => {
|
|
94
|
+
console.error(`[worker] ✗ job fail hash=${job?.data?.semanticHash?.slice(0, 10)}… err=${err.message}`);
|
|
95
|
+
});
|
|
96
|
+
worker.on('error', (err) => {
|
|
97
|
+
console.error(`[worker] error: ${err.message}`);
|
|
98
|
+
});
|
|
99
|
+
return worker;
|
|
100
|
+
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@lorrylurui/code-intelligence-mcp",
|
|
3
|
-
"version": "1.
|
|
3
|
+
"version": "1.2.0",
|
|
4
4
|
"private": false,
|
|
5
5
|
"description": "MCP server 提供仓库内可复用代码块(ts/tsx/js/jsx/css/less)的索引和查询能力,支持基于代码上下文的智能推荐。",
|
|
6
6
|
"type": "module",
|
|
@@ -10,15 +10,16 @@
|
|
|
10
10
|
],
|
|
11
11
|
"bin": {
|
|
12
12
|
"code-intelligence-mcp": "./dist/index.js",
|
|
13
|
-
"code-intelligence-index": "./dist/cli/index-codebase.js"
|
|
13
|
+
"code-intelligence-index": "./dist/cli/index-codebase-cli.js"
|
|
14
14
|
},
|
|
15
15
|
"scripts": {
|
|
16
16
|
"dev": "tsx watch --clear-screen=false --exclude node_modules --exclude dist src/index.ts",
|
|
17
17
|
"dev:mcp": "node ./scripts/mcp-dev-watch.mjs",
|
|
18
18
|
"build": "tsc -p tsconfig.json",
|
|
19
19
|
"start": "node dist/index.js",
|
|
20
|
-
"index": "tsx src/cli/index-codebase.ts",
|
|
21
|
-
"
|
|
20
|
+
"index": "tsx src/cli/index-codebase-cli.ts",
|
|
21
|
+
"ci-index": "tsx src/cli/ci-index-cli.ts",
|
|
22
|
+
"worker:embedding": "tsx src/cli/embedding-worker-cli.ts",
|
|
22
23
|
"embedding:dev": "cd embedding-service && python3 -m uvicorn app:app --host 127.0.0.1 --port 8765",
|
|
23
24
|
"docker:up": "docker compose up -d",
|
|
24
25
|
"docker:down": "docker compose down",
|
|
@@ -29,8 +30,10 @@
|
|
|
29
30
|
"@babel/types": "^7.29.0",
|
|
30
31
|
"@modelcontextprotocol/sdk": "^1.12.3",
|
|
31
32
|
"@types/react": "^19.2.14",
|
|
33
|
+
"bullmq": "^5.74.1",
|
|
32
34
|
"dotenv": "^16.4.5",
|
|
33
35
|
"fast-glob": "^3.3.2",
|
|
36
|
+
"ioredis": "^5.10.1",
|
|
34
37
|
"mysql2": "^3.11.3",
|
|
35
38
|
"react": "^19.2.4",
|
|
36
39
|
"ts-morph": "^25.0.0",
|