@lorrylurui/code-intelligence-mcp 1.1.15 → 2.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +74 -3
- package/dist/cli/ci-index-cli.js +85 -0
- package/dist/cli/ci-index.js +76 -0
- package/dist/cli/detect-duplicates.js +1 -6
- package/dist/cli/embedding-worker-cli.js +35 -0
- package/dist/cli/index-codebase-cli.js +64 -0
- package/dist/cli/index-codebase.js +5 -4
- package/dist/config/env.js +53 -81
- package/dist/config/symbolStatus.js +8 -0
- package/dist/db/mysql.js +3 -6
- package/dist/db/postgres.js +13 -0
- package/dist/db/schema.js +41 -19
- package/dist/indexer/astNormalizer.js +201 -0
- package/dist/indexer/babelParser.js +40 -15
- package/dist/indexer/categoryClassifier.js +129 -0
- package/dist/indexer/embedText.js +9 -7
- package/dist/indexer/heuristics.js +42 -23
- package/dist/indexer/indexProject.js +146 -56
- package/dist/indexer/jsAstNormalizer.js +201 -0
- package/dist/indexer/persistSymbols.js +49 -24
- package/dist/indexer/tsAstNormalizer.js +363 -0
- package/dist/prompts/reusableCodeAdvisorPrompt.js +21 -8
- package/dist/repositories/symbolRepository.js +53 -46
- package/dist/services/embeddingQueue.js +57 -0
- package/dist/services/reindex.js +90 -43
- package/dist/tools/getSymbolDetail.js +3 -1
- package/dist/tools/incUsage.js +12 -3
- package/dist/tools/reindex.js +3 -1
- package/dist/tools/searchByStructure.js +6 -2
- package/dist/tools/searchSymbols.js +18 -4
- package/dist/workers/embeddingWorker.js +145 -0
- package/package.json +10 -5
package/README.md
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# Code Intelligence MCP
|
|
1
|
+
# Code Intelligence MCP
|
|
2
2
|
|
|
3
3
|
- MCP Server(stdio)
|
|
4
4
|
- Tool: `search_symbols`
|
|
@@ -8,7 +8,6 @@
|
|
|
8
8
|
- Tool: `recommend_component`
|
|
9
9
|
- Tool: `incUsage`
|
|
10
10
|
- Prompt: `reusable-code-advisor`
|
|
11
|
-
- MySQL Repository(可选启用)
|
|
12
11
|
- Cursor Skill:`reusable-code-advisor`(`.cursor/skills/reusable-code-advisor/`,
|
|
13
12
|
|
|
14
13
|
## 1) 配置mcp servers
|
|
@@ -32,5 +31,77 @@
|
|
|
32
31
|
|
|
33
32
|
## 3) 项目根目录环境变量
|
|
34
33
|
|
|
34
|
+
<!-- 最小配置 1.表名 2.需要检索的文件路径和类型 -->
|
|
35
|
+
|
|
35
36
|
MYSQL\*SYMBOLS_TABLE=frontend_collections_symbols
|
|
36
|
-
INDEX_GLOB=
|
|
37
|
+
INDEX_GLOB=xxx/\*\*/\_.{js,jsx,ts,tsx}
|
|
38
|
+
|
|
39
|
+
# 待优化项
|
|
40
|
+
|
|
41
|
+
修复优先级:
|
|
42
|
+
✅8
|
|
43
|
+
✅5
|
|
44
|
+
✅134 done,但是第二层embedding需要优化,llm fallback太慢+漂移,也需要调整模型
|
|
45
|
+
✅6
|
|
46
|
+
✅27
|
|
47
|
+
|
|
48
|
+
1. meta里面有多个信息,哪些做结构化过滤,哪些做向量检索?
|
|
49
|
+
结论:ast normalizers后拼一个语义模板,用这个模板内容生成向量
|
|
50
|
+
❓做法见qa-doc/semantic-phrase.md
|
|
51
|
+
2. 对于 class类型,content字段保留关键方法或摘要,而不是完全为空
|
|
52
|
+
最新修改:content赋值为语义模板
|
|
53
|
+
3. category过于模糊,
|
|
54
|
+
这三层怎么做:category 优先使用规则和 embedding 分类,
|
|
55
|
+
LLM 只作为 fallback,避免不稳定和成本问题
|
|
56
|
+
❓做法见qa-doc/category.md
|
|
57
|
+
4. type category meta.kind 字段是否多余了?type只有5个值,
|
|
58
|
+
type表达代码结构、category表达语义结构,kind?
|
|
59
|
+
type: function / component / hook / class / type / interface
|
|
60
|
+
category:最新的三层结构(还没实现,只有文档)
|
|
61
|
+
kind: 现在跟type重叠较多,建议弱化meta.kind → 只保留特殊情况:
|
|
62
|
+
❓改造方法qa-doc/type-category-kind.md
|
|
63
|
+
5. 在ci做增量索引时,把changed files,如果是1000+文件,性能爆炸,考虑用file hash 判断?embedding也没有优化缓存?
|
|
64
|
+
❓见qa-doc/ci-hash-solution 方案:🥈 file hash + ast normalizer hash,新增semantic_hash
|
|
65
|
+
- CI 增量(git changed files 触发)
|
|
66
|
+
只需要 semantic_hash
|
|
67
|
+
file_hash 可省,因为文件必然变了
|
|
68
|
+
- 每日全量扫描
|
|
69
|
+
file_hash 用来跳过 AST 解析(CPU 优化)
|
|
70
|
+
semantic_hash 用来跳过 embedding(费用优化)
|
|
71
|
+
content_hash 删掉,职责完全被 semantic_hash 覆盖
|
|
72
|
+
6. 大仓问题:
|
|
73
|
+
❓big-repo.md
|
|
74
|
+
- ci embedding解耦,新增embedding_status, ci时,全量写入status='pending'-> ci finish
|
|
75
|
+
- ci如果检测到文件删除,则对被删除的代码块标记delete(这里需要新增字段)
|
|
76
|
+
- node+redis 消费写embedding job
|
|
77
|
+
- 对语义模板semantic_hash做向量缓存,semantic_hash相同即功能未变
|
|
78
|
+
- 大仓分片并行
|
|
79
|
+
7. content暂时用不到,但也不用删除,目前暴利截取4000字符需要优化:
|
|
80
|
+
content(降级为辅助字段):✔ 不参与 embedding✔ 不参与排序✔ 不参与过滤✔ 用于:1. LLM改造建议 2.debug 3.future rerank
|
|
81
|
+
最简单:只存 signature
|
|
82
|
+
最优:content = {
|
|
83
|
+
signature: "function fetchData(url, options)",
|
|
84
|
+
snippet: "核心逻辑代码(<=300行)",
|
|
85
|
+
keyCalls: ["fetch", "cache"]
|
|
86
|
+
}
|
|
87
|
+
8. TopK???,首先去掉usage过滤,再做两次topk,1.根据余弦相似度选topk 2.对1的结果用现有的usage,updated_at等加权排序
|
|
88
|
+
❓topK.md
|
|
89
|
+
现在:SQL过滤(type) → ORDER BY usage_count DESC LIMIT 3000→ embedding 相似度排序→ 取 top20
|
|
90
|
+
这个逻辑不对,导致query: "debounce function",debounce 使用少 ❌ fetch 很热门 ✅,结果Top3000里全是 fetch / request, debounce 被过滤掉 ❌
|
|
91
|
+
|
|
92
|
+
👉 优点:
|
|
93
|
+
• 不阻塞 CI
|
|
94
|
+
• 可扩展
|
|
95
|
+
|
|
96
|
+
6. 大仓问题呢?
|
|
97
|
+
|
|
98
|
+
# 简历里还没做的优化
|
|
99
|
+
|
|
100
|
+
1. embedding基石 - 语义模板模板,使用ast数据拼装语义模板
|
|
101
|
+
2. class的content为null
|
|
102
|
+
3. category分层 1.规则 2.预设所有种类,使用embedding召回 3.llm兜底
|
|
103
|
+
4. type meta.kind逻辑优化,现在太重叠了
|
|
104
|
+
5. ci-hash-solution
|
|
105
|
+
6. 大仓问题
|
|
106
|
+
7. content优化
|
|
107
|
+
8. ✅topk优化
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* CI增量索引CLI:处理changed files和deleted files
|
|
4
|
+
*
|
|
5
|
+
* 用法:
|
|
6
|
+
* node src/cli/ci-index-cli.js --changed src/file1.ts,src/file2.ts --deleted src/old.ts --renamed src/old.ts:src/new.ts
|
|
7
|
+
*/
|
|
8
|
+
import { resolve } from 'node:path';
|
|
9
|
+
import { loadProjectDotenv } from '../config/env.js';
|
|
10
|
+
import { runIncrementalIndex } from './ci-index.js';
|
|
11
|
+
async function main() {
|
|
12
|
+
const args = process.argv.slice(2);
|
|
13
|
+
const projectRoot = resolve(process.env.INDEX_ROOT ?? process.cwd());
|
|
14
|
+
loadProjectDotenv(projectRoot);
|
|
15
|
+
let changedFiles = [];
|
|
16
|
+
let deletedFiles = [];
|
|
17
|
+
let renamedFiles = [];
|
|
18
|
+
/** 解析 --key value 和 --key=value 两种格式 */
|
|
19
|
+
function getArgValue(key, idx) {
|
|
20
|
+
const arg = args[idx];
|
|
21
|
+
const prefix = `--${key}=`;
|
|
22
|
+
if (arg.startsWith(prefix))
|
|
23
|
+
return [arg.slice(prefix.length), idx];
|
|
24
|
+
if (arg === `--${key}` && idx + 1 < args.length)
|
|
25
|
+
return [args[idx + 1], idx + 1];
|
|
26
|
+
return [null, idx];
|
|
27
|
+
}
|
|
28
|
+
for (let i = 0; i < args.length; i++) {
|
|
29
|
+
const arg = args[i];
|
|
30
|
+
if (arg === '--changed' || arg.startsWith('--changed=')) {
|
|
31
|
+
const [val, next] = getArgValue('changed', i);
|
|
32
|
+
if (val) {
|
|
33
|
+
changedFiles = val
|
|
34
|
+
.split(',')
|
|
35
|
+
.map((s) => s.trim())
|
|
36
|
+
.filter(Boolean);
|
|
37
|
+
i = next;
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
else if (arg === '--deleted' || arg.startsWith('--deleted=')) {
|
|
41
|
+
const [val, next] = getArgValue('deleted', i);
|
|
42
|
+
if (val) {
|
|
43
|
+
deletedFiles = val
|
|
44
|
+
.split(',')
|
|
45
|
+
.map((s) => s.trim())
|
|
46
|
+
.filter(Boolean);
|
|
47
|
+
i = next;
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
else if (arg === '--renamed' || arg.startsWith('--renamed=')) {
|
|
51
|
+
const [val, next] = getArgValue('renamed', i);
|
|
52
|
+
if (val) {
|
|
53
|
+
renamedFiles = val
|
|
54
|
+
.split(',')
|
|
55
|
+
.map((s) => {
|
|
56
|
+
const [from, to] = s.split(':');
|
|
57
|
+
return { from: from.trim(), to: to.trim() };
|
|
58
|
+
})
|
|
59
|
+
.filter((r) => r.from && r.to);
|
|
60
|
+
i = next;
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
if (changedFiles.length === 0 &&
|
|
65
|
+
deletedFiles.length === 0 &&
|
|
66
|
+
renamedFiles.length === 0) {
|
|
67
|
+
console.error('Usage: node ci-index-cli.js --changed file1,file2 --deleted file3 --renamed old:new');
|
|
68
|
+
process.exit(1);
|
|
69
|
+
}
|
|
70
|
+
console.error(`[ci-index-cli] projectRoot=${projectRoot}`);
|
|
71
|
+
console.error(`[ci-index-cli] changed: ${changedFiles.join(', ')}`);
|
|
72
|
+
console.error(`[ci-index-cli] deleted: ${deletedFiles.join(', ')}`);
|
|
73
|
+
console.error(`[ci-index-cli] renamed: ${renamedFiles.map((r) => `${r.from}->${r.to}`).join(', ')}`);
|
|
74
|
+
await runIncrementalIndex({
|
|
75
|
+
projectRoot,
|
|
76
|
+
changedFiles,
|
|
77
|
+
deletedFiles,
|
|
78
|
+
renamedFiles,
|
|
79
|
+
});
|
|
80
|
+
console.error('[ci-index-cli] completed successfully');
|
|
81
|
+
}
|
|
82
|
+
main().catch((err) => {
|
|
83
|
+
console.error('[ci-index-cli] failed:', err);
|
|
84
|
+
process.exit(1);
|
|
85
|
+
});
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
// CI增量索引:处理changed files和deleted files
|
|
2
|
+
import { env } from '../config/env.js';
|
|
3
|
+
import { getPool } from '../db/postgres.js';
|
|
4
|
+
import { indexProject } from '../indexer/indexProject.js';
|
|
5
|
+
import { DEFAULT_STATUS_ON_UPSERT, SYMBOL_STATUS, } from '../config/symbolStatus.js';
|
|
6
|
+
import { enqueueEmbeddingBatch, closeEmbeddingQueue, } from '../services/embeddingQueue.js';
|
|
7
|
+
export async function runIncrementalIndex(opts) {
|
|
8
|
+
const { projectRoot, changedFiles, deletedFiles, renamedFiles = [] } = opts;
|
|
9
|
+
const pool = getPool();
|
|
10
|
+
const tableName = env.symbolsTable;
|
|
11
|
+
// 1. 删除文件:标记 offline
|
|
12
|
+
for (const file of deletedFiles) {
|
|
13
|
+
await pool.query(`UPDATE ${tableName} SET status = $1 WHERE path = $2`, [SYMBOL_STATUS.OFFLINE, file]);
|
|
14
|
+
console.error(`[ci-index] marked offline: ${file}`);
|
|
15
|
+
}
|
|
16
|
+
// 2. 重命名文件:更新path
|
|
17
|
+
for (const { from, to } of renamedFiles) {
|
|
18
|
+
await pool.query(`UPDATE ${tableName} SET path = $1 WHERE path = $2`, [
|
|
19
|
+
to,
|
|
20
|
+
from,
|
|
21
|
+
]);
|
|
22
|
+
console.error(`[ci-index] renamed: ${from} -> ${to}`);
|
|
23
|
+
}
|
|
24
|
+
// 3. 变更/新增文件:重新索引并标记 pending
|
|
25
|
+
if (changedFiles.length > 0) {
|
|
26
|
+
const rows = await indexProject({
|
|
27
|
+
projectRoot,
|
|
28
|
+
globPatterns: changedFiles,
|
|
29
|
+
});
|
|
30
|
+
for (const row of rows) {
|
|
31
|
+
// 写入结构化数据
|
|
32
|
+
// status 逻辑:新行写 pending;已有行仅在 semantic_hash 发生变化时才重置为 pending,
|
|
33
|
+
// hash 未变说明语义未变,保留原 status(online → 缓存命中,不重复 embedding)
|
|
34
|
+
await pool.query(`INSERT INTO ${tableName}
|
|
35
|
+
(name, type, category, path, description, content, meta,
|
|
36
|
+
file_hash, semantic_hash, status,
|
|
37
|
+
usage_count, created_at, updated_at)
|
|
38
|
+
VALUES ($1, $2, $3, $4, $5, $6, $7::jsonb, $8, $9, $10, 0, NOW(), NOW())
|
|
39
|
+
ON CONFLICT (path, name) DO UPDATE SET
|
|
40
|
+
type = EXCLUDED.type,
|
|
41
|
+
category = EXCLUDED.category,
|
|
42
|
+
description = EXCLUDED.description,
|
|
43
|
+
content = EXCLUDED.content,
|
|
44
|
+
meta = EXCLUDED.meta,
|
|
45
|
+
file_hash = EXCLUDED.file_hash,
|
|
46
|
+
semantic_hash = EXCLUDED.semantic_hash,
|
|
47
|
+
status = CASE WHEN ${tableName}.semantic_hash = EXCLUDED.semantic_hash
|
|
48
|
+
THEN ${tableName}.status
|
|
49
|
+
ELSE EXCLUDED.status END,
|
|
50
|
+
updated_at = NOW()`, [
|
|
51
|
+
row.name,
|
|
52
|
+
row.type,
|
|
53
|
+
row.category ?? null,
|
|
54
|
+
row.path,
|
|
55
|
+
row.description ?? null,
|
|
56
|
+
row.content ?? null,
|
|
57
|
+
JSON.stringify(row.meta),
|
|
58
|
+
row.file_hash,
|
|
59
|
+
row.semantic_hash,
|
|
60
|
+
DEFAULT_STATUS_ON_UPSERT,
|
|
61
|
+
]);
|
|
62
|
+
console.error(`[ci-index] upserted: ${row.path}:${row.name}`);
|
|
63
|
+
}
|
|
64
|
+
// 批量入队:jobId = semanticHash,相同 hash 自动去重,1000 个符号可能只产生 N 个唯一 job
|
|
65
|
+
const hashes = [
|
|
66
|
+
...new Set(rows.map((r) => r.semantic_hash).filter(Boolean)),
|
|
67
|
+
];
|
|
68
|
+
if (hashes.length > 0) {
|
|
69
|
+
await enqueueEmbeddingBatch(hashes, env.symbolsTable);
|
|
70
|
+
console.error(`[ci-index] enqueued ${hashes.length} unique semantic hashes for embedding`);
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
await closeEmbeddingQueue();
|
|
74
|
+
await pool.end();
|
|
75
|
+
console.error(`[ci-index] processed ${deletedFiles.length} deletions, ${renamedFiles.length} renames, ${changedFiles.length} changes`);
|
|
76
|
+
}
|
|
@@ -109,8 +109,7 @@ async function main() {
|
|
|
109
109
|
}
|
|
110
110
|
}
|
|
111
111
|
loadProjectDotenv(projectRoot);
|
|
112
|
-
console.error(`[duplicate-check] projectRoot=${projectRoot}, `
|
|
113
|
-
`MYSQL_ENABLED=${process.env.MYSQL_ENABLED}`);
|
|
112
|
+
console.error(`[duplicate-check] projectRoot=${projectRoot}, `);
|
|
114
113
|
// 3️ 解析命令行参数
|
|
115
114
|
const args = parseArgs(process.argv.slice(2));
|
|
116
115
|
const changedFilesPath = args.get('changed-files') ?? 'changed_files.txt';
|
|
@@ -131,10 +130,6 @@ async function main() {
|
|
|
131
130
|
}
|
|
132
131
|
else {
|
|
133
132
|
validateEnv();
|
|
134
|
-
const pool = getMySqlPool();
|
|
135
|
-
if (!pool || !env.mysqlEnabled) {
|
|
136
|
-
throw new Error('duplicate-check 需要 MYSQL_ENABLED=true 并可连接 MySQL。');
|
|
137
|
-
}
|
|
138
133
|
if (!env.embeddingServiceUrl) {
|
|
139
134
|
throw new Error('duplicate-check 需要 EMBEDDING_SERVICE_URL(embedding service)。');
|
|
140
135
|
}
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* embedding worker 进程入口。
|
|
3
|
+
*
|
|
4
|
+
* 本地启动:
|
|
5
|
+
* npm run worker:embedding
|
|
6
|
+
*
|
|
7
|
+
* 大仓分片(多进程并行):
|
|
8
|
+
* WORKER_CONCURRENCY=10 npm run worker:embedding &
|
|
9
|
+
* WORKER_CONCURRENCY=10 npm run worker:embedding &
|
|
10
|
+
* # 启动 N 个进程,BullMQ 自动分配任务,无需手动分片
|
|
11
|
+
*
|
|
12
|
+
* 环境变量:
|
|
13
|
+
* REDIS_URL Redis 连接 URL(默认 redis://127.0.0.1:6379)
|
|
14
|
+
* PG_URL PostgreSQL 连接字符串
|
|
15
|
+
* EMBEDDING_SERVICE_URL Python embedding 服务地址
|
|
16
|
+
* WORKER_CONCURRENCY 单进程并发 job 数(默认 5)
|
|
17
|
+
* WORKER_RPM_LIMIT 全局 RPM 上限(默认 100,跨所有 worker 进程)
|
|
18
|
+
* PROJECT_ROOT 项目根目录,用于加载 .env(默认 cwd)
|
|
19
|
+
*/
|
|
20
|
+
import { loadProjectDotenv } from '../config/env.js';
|
|
21
|
+
import { startEmbeddingWorker } from '../workers/embeddingWorker.js';
|
|
22
|
+
const projectRoot = process.env.PROJECT_ROOT ?? process.cwd();
|
|
23
|
+
loadProjectDotenv(projectRoot);
|
|
24
|
+
const concurrency = Number(process.env.WORKER_CONCURRENCY ?? '5');
|
|
25
|
+
const rpmLimit = Number(process.env.WORKER_RPM_LIMIT ?? '100');
|
|
26
|
+
const { worker, stop } = await startEmbeddingWorker({ concurrency, rpmLimit });
|
|
27
|
+
console.error(`[embedding-worker] started concurrency=${concurrency} rpm_limit=${rpmLimit}`);
|
|
28
|
+
// 当前 job 执行完再退出
|
|
29
|
+
for (const sig of ['SIGINT', 'SIGTERM']) {
|
|
30
|
+
process.on(sig, async () => {
|
|
31
|
+
console.error('[embedding-worker] shutting down…');
|
|
32
|
+
await stop();
|
|
33
|
+
process.exit(0);
|
|
34
|
+
});
|
|
35
|
+
}
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* Phase 2 CLI:扫描代码库并写入 PostgreSQL `symbols`。
|
|
4
|
+
*
|
|
5
|
+
* 环境变量加载顺序:
|
|
6
|
+
* 1. 命令行参数(最高优先级)
|
|
7
|
+
* 2. INDEX_ROOT 指向的第三方项目 .env(中等优先级,优先使用第三方显式设置的值)
|
|
8
|
+
* 3. 本地的 .env(最低优先级,提供默认值)
|
|
9
|
+
*/
|
|
10
|
+
import { resolve } from 'node:path';
|
|
11
|
+
import { CLI_KEYS, loadProjectDotenv } from '../config/env.js';
|
|
12
|
+
import { runReindex } from '../services/reindex.js';
|
|
13
|
+
/**
|
|
14
|
+
* 入口:加载第三方 .env → 校验环境 → 调用 runReindex。
|
|
15
|
+
* 进度与统计输出到 **stderr**,避免占用 stdout。
|
|
16
|
+
* 进程退出码:成功 `0`,连接失败或异常 `1`。
|
|
17
|
+
*/
|
|
18
|
+
async function main() {
|
|
19
|
+
// Step 1: 始终从 cwd 加载第三方 .env(这是 P2,会覆盖本地 MCP .env)
|
|
20
|
+
// 注意:不能用 process.env.INDEX_ROOT,那个值可能已被本地 MCP .env(P3)污染
|
|
21
|
+
const thirdPartyKeys = loadProjectDotenv(process.cwd());
|
|
22
|
+
// Step 2: INDEX_ROOT 只有来自 P1(CLI)或 P2(第三方 .env)时才可信
|
|
23
|
+
// 若只在本地 MCP .env(P3)里设了 INDEX_ROOT,在第三方项目中运行时应忽略它
|
|
24
|
+
const indexRoot = CLI_KEYS.has('INDEX_ROOT') || thirdPartyKeys.has('INDEX_ROOT')
|
|
25
|
+
? process.env.INDEX_ROOT
|
|
26
|
+
: undefined;
|
|
27
|
+
const projectRoot = resolve(indexRoot ?? process.cwd());
|
|
28
|
+
console.error(`PG_URL=${process.env.PG_URL ? '(set)' : '(not set)'}` +
|
|
29
|
+
`[index] projectRoot=${projectRoot} (INDEX_ROOT: ${CLI_KEYS.has('INDEX_ROOT') ? 'CLI' : thirdPartyKeys.has('INDEX_ROOT') ? 'third-party .env' : 'cwd fallback'})`);
|
|
30
|
+
const globPatterns = process.env.INDEX_GLOB
|
|
31
|
+
? process.env.INDEX_GLOB.split(/\s+/)
|
|
32
|
+
.map((s) => s.trim())
|
|
33
|
+
.filter(Boolean)
|
|
34
|
+
: undefined;
|
|
35
|
+
const ignore = process.env.INDEX_IGNORE
|
|
36
|
+
? process.env.INDEX_IGNORE.split(',').map((s) => s.trim())
|
|
37
|
+
: undefined;
|
|
38
|
+
const forceRebuild = process.argv.includes('--force-rebuild');
|
|
39
|
+
const result = await runReindex({
|
|
40
|
+
projectRoot,
|
|
41
|
+
globPatterns,
|
|
42
|
+
ignore,
|
|
43
|
+
dryRun: false,
|
|
44
|
+
forceRebuild,
|
|
45
|
+
});
|
|
46
|
+
console.error(`[index] extracted ${result.extractedCount} symbol(s), enqueued ${result.enqueuedCount} for embedding`);
|
|
47
|
+
console.error('[index] upserted into PostgreSQL, success:', result.upserted);
|
|
48
|
+
}
|
|
49
|
+
main().catch((err) => {
|
|
50
|
+
console.error('[index] failed:', err);
|
|
51
|
+
const anyErr = err;
|
|
52
|
+
if (anyErr.code === 'ECONNREFUSED') {
|
|
53
|
+
const pgUrl = process.env.PG_URL ?? 'postgresql://...@127.0.0.1:5432/...';
|
|
54
|
+
console.error(`[index] 原因: 无法连接 PostgreSQL(连接被拒绝)。当前 PG_URL=${pgUrl}。请确认 docker compose up -d 已启动 pgvector 容器。`);
|
|
55
|
+
}
|
|
56
|
+
else if (anyErr.code === 'ER_ACCESS_DENIED_ERROR' ||
|
|
57
|
+
anyErr.code === '28P01') {
|
|
58
|
+
console.error('[index] 原因: 用户名或密码错误,请检查 PG_URL 中的 user/password。');
|
|
59
|
+
}
|
|
60
|
+
else if (anyErr.code === 'ENOTFOUND' || anyErr.code === 'ETIMEDOUT') {
|
|
61
|
+
console.error('[index] 原因: 网络不可达或超时,请检查 PG_URL 中的 host 是否可解析。');
|
|
62
|
+
}
|
|
63
|
+
process.exit(1);
|
|
64
|
+
});
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
2
|
/**
|
|
3
|
-
* Phase 2 CLI:扫描代码库并写入 MySQL `symbols
|
|
3
|
+
* Phase 2 CLI:扫描代码库并写入 MySQL `symbols`。
|
|
4
4
|
*
|
|
5
5
|
* 环境变量加载顺序:
|
|
6
6
|
* 1. 命令行参数(最高优先级)
|
|
@@ -16,10 +16,11 @@ import { runReindex } from '../services/reindex.js';
|
|
|
16
16
|
* 进程退出码:成功 `0`,无 MySQL 或异常 `1`。
|
|
17
17
|
*/
|
|
18
18
|
async function main() {
|
|
19
|
+
// const projectRoot = resolve(process.env.INDEX_ROOT ?? process.cwd());
|
|
20
|
+
loadProjectDotenv(resolve(process.env.INDEX_ROOT ?? process.cwd()));
|
|
19
21
|
const projectRoot = resolve(process.env.INDEX_ROOT ?? process.cwd());
|
|
20
|
-
|
|
21
|
-
console.error(`
|
|
22
|
-
`MYSQL_HOST=${process.env.MYSQL_HOST}` +
|
|
22
|
+
console.error(projectRoot, process.env.INDEX_ROOT);
|
|
23
|
+
console.error(`MYSQL_HOST=${process.env.MYSQL_HOST}` +
|
|
23
24
|
`[index] projectRoot=${projectRoot}`);
|
|
24
25
|
const globPatterns = process.env.INDEX_GLOB
|
|
25
26
|
? process.env.INDEX_GLOB.split(/\s+/)
|
package/dist/config/env.js
CHANGED
|
@@ -3,101 +3,73 @@ import path from 'node:path';
|
|
|
3
3
|
import { fileURLToPath } from 'node:url';
|
|
4
4
|
import { existsSync, readFileSync } from 'node:fs';
|
|
5
5
|
const __dirname = path.dirname(fileURLToPath(import.meta.url));
|
|
6
|
-
//
|
|
6
|
+
// ─── Priority 1 (highest): Third-party CLI args --KEY=VALUE ───────────────────
|
|
7
|
+
// 记录哪些 key 来自命令行,任何后续加载都不得覆盖
|
|
8
|
+
export const CLI_KEYS = new Set();
|
|
7
9
|
for (const arg of process.argv) {
|
|
8
10
|
const match = arg.match(/^--([A-Z_][A-Z0-9_]*)=(.+)$/);
|
|
9
11
|
if (match) {
|
|
10
12
|
process.env[match[1]] = match[2];
|
|
13
|
+
CLI_KEYS.add(match[1]);
|
|
11
14
|
}
|
|
12
15
|
}
|
|
13
|
-
// MCP
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
});
|
|
16
|
+
// ─── Priority 3 (lowest): Local MCP repo .env ─────────────────────────────────
|
|
17
|
+
// override: false → 只填补尚未设置的 key,不覆盖 CLI 参数
|
|
18
|
+
// dev 模式: __dirname = src/config → ../../ = 项目根目录
|
|
19
|
+
// prod 模式: __dirname = dist/config → ../../ = 项目根目录
|
|
20
|
+
const MCP_SERVER_ENV_PATH = path.resolve(__dirname, '..', '..', '.env');
|
|
21
|
+
if (existsSync(MCP_SERVER_ENV_PATH)) {
|
|
22
|
+
dotenv.config({ path: MCP_SERVER_ENV_PATH, override: false });
|
|
23
|
+
}
|
|
20
24
|
/**
|
|
21
|
-
*
|
|
22
|
-
*
|
|
25
|
+
* 加载第三方项目的 .env 文件(Priority 2)。
|
|
26
|
+
*
|
|
27
|
+
* 优先级规则:
|
|
28
|
+
* 第三方 CLI 参数(P1) > 第三方 .env(P2) > 本地 MCP .env(P3)
|
|
29
|
+
*
|
|
30
|
+
* - CLI 参数在 CLI_KEYS 中已记录,永不覆盖
|
|
31
|
+
* - 第三方 .env 中的 key 覆盖本地 MCP .env(即 P2 > P3)
|
|
32
|
+
*
|
|
33
|
+
* 应在进程启动后、任何 env.xxx 读取前尽早调用一次。
|
|
23
34
|
*/
|
|
24
35
|
export function loadProjectDotenv(projectRoot) {
|
|
25
36
|
const envPath = path.resolve(projectRoot, '.env');
|
|
26
|
-
if (!existsSync(envPath))
|
|
27
|
-
return;
|
|
28
|
-
|
|
29
|
-
const
|
|
30
|
-
|
|
31
|
-
const
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
const eqIdx = trimmed.indexOf('=');
|
|
37
|
-
if (eqIdx === -1)
|
|
38
|
-
continue;
|
|
39
|
-
const key = trimmed.slice(0, eqIdx).trim();
|
|
40
|
-
if (!key)
|
|
41
|
-
continue;
|
|
42
|
-
thirdPartyKeys.add(key);
|
|
43
|
-
}
|
|
44
|
-
// 第二步:如果某个 key 是第三方显式定义的,则覆盖(不管值是什么)
|
|
45
|
-
for (const line of content.split('\n')) {
|
|
46
|
-
const trimmed = line.trim();
|
|
47
|
-
if (!trimmed || trimmed.startsWith('#'))
|
|
48
|
-
continue;
|
|
49
|
-
const eqIdx = trimmed.indexOf('=');
|
|
50
|
-
if (eqIdx === -1)
|
|
51
|
-
continue;
|
|
52
|
-
const key = trimmed.slice(0, eqIdx).trim();
|
|
53
|
-
let value = trimmed.slice(eqIdx + 1).trim();
|
|
54
|
-
value = value.replace(/^["']|["']$/g, '');
|
|
55
|
-
if (!key)
|
|
56
|
-
continue;
|
|
57
|
-
// 只有当第三方显式定义了这个 key 时才覆盖
|
|
58
|
-
if (thirdPartyKeys.has(key)) {
|
|
59
|
-
process.env[key] = value;
|
|
60
|
-
}
|
|
61
|
-
}
|
|
62
|
-
}
|
|
63
|
-
// 外部传入的 env 已在上一步保留,这里确保环境变量已正确设置
|
|
64
|
-
for (const arg of process.argv) {
|
|
65
|
-
const match = arg.match(/^--([A-Z_][A-Z0-9_]*)=(.+)$/);
|
|
66
|
-
if (match) {
|
|
67
|
-
process.env[match[1]] = match[2];
|
|
37
|
+
if (!existsSync(envPath))
|
|
38
|
+
return new Set();
|
|
39
|
+
// dotenv.parse 只解析文件,不写 process.env
|
|
40
|
+
const parsed = dotenv.parse(readFileSync(envPath));
|
|
41
|
+
const loadedKeys = new Set();
|
|
42
|
+
for (const [key, value] of Object.entries(parsed)) {
|
|
43
|
+
if (CLI_KEYS.has(key))
|
|
44
|
+
continue; // P1 CLI args 永不被覆盖
|
|
45
|
+
process.env[key] = value; // P2 第三方 .env 覆盖 P3 本地 .env
|
|
46
|
+
loadedKeys.add(key);
|
|
68
47
|
}
|
|
48
|
+
return loadedKeys;
|
|
69
49
|
}
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
'MYSQL_USER',
|
|
73
|
-
'MYSQL_DATABASE',
|
|
74
|
-
];
|
|
75
|
-
console.error(`[Config] MYSQL_ENABLED: ${process.env.MYSQL_ENABLED},
|
|
76
|
-
MYSQL_HOST: ${process.env.MYSQL_HOST},
|
|
77
|
-
MYSQL_USER: ${process.env.MYSQL_USER},
|
|
78
|
-
MYSQL_DATABASE: ${process.env.MYSQL_DATABASE},
|
|
79
|
-
EMBEDDING_SERVICE_URL: ${process.env.EMBEDDING_SERVICE_URL},
|
|
80
|
-
MYSQL_SYMBOLS_TABLE: ${process.env.MYSQL_SYMBOLS_TABLE}
|
|
81
|
-
`);
|
|
50
|
+
// ─── env 对象:getter 懒读取,确保 loadProjectDotenv() 后立即生效 ────────────
|
|
51
|
+
// 每次访问 env.xxx 都从 process.env 实时读取,避免快照冻结问题
|
|
82
52
|
export const env = {
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
53
|
+
/** PostgreSQL 连接字符串,如 postgresql://user:pass@host:5432/db */
|
|
54
|
+
get pgUrl() {
|
|
55
|
+
return (process.env.PG_URL ??
|
|
56
|
+
'postgresql://postgres:devpassword@127.0.0.1:5432/code_intelligence');
|
|
57
|
+
},
|
|
58
|
+
/** symbols 表名,可通过 SYMBOLS_TABLE 环境变量配置 */
|
|
59
|
+
get symbolsTable() {
|
|
60
|
+
return process.env.SYMBOLS_TABLE ?? 'symbols';
|
|
61
|
+
},
|
|
62
|
+
/** Python FastAPI 嵌入服务根 URL,如 http://127.0.0.1:8765 */
|
|
63
|
+
get embeddingServiceUrl() {
|
|
64
|
+
return (process.env.EMBEDDING_SERVICE_URL ?? '').trim();
|
|
65
|
+
},
|
|
66
|
+
/** Redis 连接 URL,供 BullMQ embedding worker 使用 */
|
|
67
|
+
get redisUrl() {
|
|
68
|
+
return process.env.REDIS_URL ?? 'redis://127.0.0.1:6379';
|
|
69
|
+
},
|
|
93
70
|
};
|
|
94
71
|
export function validateEnv() {
|
|
95
|
-
if (!env.
|
|
96
|
-
|
|
97
|
-
}
|
|
98
|
-
for (const key of requiredWhenEnabled) {
|
|
99
|
-
if (!process.env[key]) {
|
|
100
|
-
throw new Error(`Missing environment variable: ${key}`);
|
|
101
|
-
}
|
|
72
|
+
if (!process.env.PG_URL) {
|
|
73
|
+
console.warn('[Config] PG_URL not set, using default: postgresql://postgres:devpassword@127.0.0.1:5432/code_intelligence');
|
|
102
74
|
}
|
|
103
75
|
}
|
package/dist/db/mysql.js
CHANGED
|
@@ -1,10 +1,7 @@
|
|
|
1
|
-
import mysql from
|
|
2
|
-
import { env } from
|
|
1
|
+
import mysql from 'mysql2/promise';
|
|
2
|
+
import { env } from '../config/env.js';
|
|
3
3
|
let pool = null;
|
|
4
4
|
export function getMySqlPool() {
|
|
5
|
-
if (!env.mysqlEnabled) {
|
|
6
|
-
return null;
|
|
7
|
-
}
|
|
8
5
|
if (!pool) {
|
|
9
6
|
pool = mysql.createPool({
|
|
10
7
|
host: env.mysqlHost,
|
|
@@ -13,7 +10,7 @@ export function getMySqlPool() {
|
|
|
13
10
|
password: env.mysqlPassword,
|
|
14
11
|
database: env.mysqlDatabase,
|
|
15
12
|
waitForConnections: true,
|
|
16
|
-
connectionLimit: 10
|
|
13
|
+
connectionLimit: 10,
|
|
17
14
|
});
|
|
18
15
|
}
|
|
19
16
|
return pool;
|