@lorrylurui/code-intelligence-mcp 1.2.0 → 2.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/ci-index-cli.js +40 -21
- package/dist/cli/ci-index.js +23 -27
- package/dist/cli/embedding-worker-cli.js +4 -4
- package/dist/cli/index-codebase-cli.js +64 -0
- package/dist/config/env.js +52 -77
- package/dist/db/postgres.js +13 -0
- package/dist/db/schema.js +40 -25
- package/dist/indexer/categoryClassifier.js +3 -3
- package/dist/indexer/indexProject.js +1 -1
- package/dist/indexer/persistSymbols.js +47 -26
- package/dist/prompts/reusableCodeAdvisorPrompt.js +19 -9
- package/dist/repositories/symbolRepository.js +52 -119
- package/dist/services/embeddingQueue.js +19 -18
- package/dist/services/reindex.js +92 -48
- package/dist/tools/getSymbolDetail.js +3 -1
- package/dist/tools/incUsage.js +12 -3
- package/dist/tools/reindex.js +3 -1
- package/dist/tools/searchByStructure.js +3 -1
- package/dist/tools/searchSymbols.js +8 -5
- package/dist/workers/embeddingWorker.js +86 -41
- package/package.json +4 -2
package/dist/cli/ci-index-cli.js
CHANGED
|
@@ -15,31 +15,50 @@ async function main() {
|
|
|
15
15
|
let changedFiles = [];
|
|
16
16
|
let deletedFiles = [];
|
|
17
17
|
let renamedFiles = [];
|
|
18
|
+
/** 解析 --key value 和 --key=value 两种格式 */
|
|
19
|
+
function getArgValue(key, idx) {
|
|
20
|
+
const arg = args[idx];
|
|
21
|
+
const prefix = `--${key}=`;
|
|
22
|
+
if (arg.startsWith(prefix))
|
|
23
|
+
return [arg.slice(prefix.length), idx];
|
|
24
|
+
if (arg === `--${key}` && idx + 1 < args.length)
|
|
25
|
+
return [args[idx + 1], idx + 1];
|
|
26
|
+
return [null, idx];
|
|
27
|
+
}
|
|
18
28
|
for (let i = 0; i < args.length; i++) {
|
|
19
29
|
const arg = args[i];
|
|
20
|
-
if (arg === '--changed'
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
30
|
+
if (arg === '--changed' || arg.startsWith('--changed=')) {
|
|
31
|
+
const [val, next] = getArgValue('changed', i);
|
|
32
|
+
if (val) {
|
|
33
|
+
changedFiles = val
|
|
34
|
+
.split(',')
|
|
35
|
+
.map((s) => s.trim())
|
|
36
|
+
.filter(Boolean);
|
|
37
|
+
i = next;
|
|
38
|
+
}
|
|
26
39
|
}
|
|
27
|
-
else if (arg === '--deleted'
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
40
|
+
else if (arg === '--deleted' || arg.startsWith('--deleted=')) {
|
|
41
|
+
const [val, next] = getArgValue('deleted', i);
|
|
42
|
+
if (val) {
|
|
43
|
+
deletedFiles = val
|
|
44
|
+
.split(',')
|
|
45
|
+
.map((s) => s.trim())
|
|
46
|
+
.filter(Boolean);
|
|
47
|
+
i = next;
|
|
48
|
+
}
|
|
33
49
|
}
|
|
34
|
-
else if (arg === '--renamed'
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
50
|
+
else if (arg === '--renamed' || arg.startsWith('--renamed=')) {
|
|
51
|
+
const [val, next] = getArgValue('renamed', i);
|
|
52
|
+
if (val) {
|
|
53
|
+
renamedFiles = val
|
|
54
|
+
.split(',')
|
|
55
|
+
.map((s) => {
|
|
56
|
+
const [from, to] = s.split(':');
|
|
57
|
+
return { from: from.trim(), to: to.trim() };
|
|
58
|
+
})
|
|
59
|
+
.filter((r) => r.from && r.to);
|
|
60
|
+
i = next;
|
|
61
|
+
}
|
|
43
62
|
}
|
|
44
63
|
}
|
|
45
64
|
if (changedFiles.length === 0 &&
|
package/dist/cli/ci-index.js
CHANGED
|
@@ -1,28 +1,21 @@
|
|
|
1
1
|
// CI增量索引:处理changed files和deleted files
|
|
2
|
-
import { env
|
|
3
|
-
import {
|
|
2
|
+
import { env } from '../config/env.js';
|
|
3
|
+
import { getPool } from '../db/postgres.js';
|
|
4
4
|
import { indexProject } from '../indexer/indexProject.js';
|
|
5
5
|
import { DEFAULT_STATUS_ON_UPSERT, SYMBOL_STATUS, } from '../config/symbolStatus.js';
|
|
6
6
|
import { enqueueEmbeddingBatch, closeEmbeddingQueue, } from '../services/embeddingQueue.js';
|
|
7
7
|
export async function runIncrementalIndex(opts) {
|
|
8
8
|
const { projectRoot, changedFiles, deletedFiles, renamedFiles = [] } = opts;
|
|
9
|
-
|
|
10
|
-
const
|
|
11
|
-
if (!pool) {
|
|
12
|
-
throw new Error('Failed to get MySQL pool');
|
|
13
|
-
}
|
|
14
|
-
const tableName = env.mysqlSymbolsTable;
|
|
9
|
+
const pool = getPool();
|
|
10
|
+
const tableName = env.symbolsTable;
|
|
15
11
|
// 1. 删除文件:标记 offline
|
|
16
12
|
for (const file of deletedFiles) {
|
|
17
|
-
await pool.query(`UPDATE ${tableName} SET status =
|
|
18
|
-
SYMBOL_STATUS.OFFLINE,
|
|
19
|
-
file,
|
|
20
|
-
]);
|
|
13
|
+
await pool.query(`UPDATE ${tableName} SET status = $1 WHERE path = $2`, [SYMBOL_STATUS.OFFLINE, file]);
|
|
21
14
|
console.error(`[ci-index] marked offline: ${file}`);
|
|
22
15
|
}
|
|
23
16
|
// 2. 重命名文件:更新path
|
|
24
17
|
for (const { from, to } of renamedFiles) {
|
|
25
|
-
await pool.query(`UPDATE ${tableName} SET path =
|
|
18
|
+
await pool.query(`UPDATE ${tableName} SET path = $1 WHERE path = $2`, [
|
|
26
19
|
to,
|
|
27
20
|
from,
|
|
28
21
|
]);
|
|
@@ -35,21 +28,25 @@ export async function runIncrementalIndex(opts) {
|
|
|
35
28
|
globPatterns: changedFiles,
|
|
36
29
|
});
|
|
37
30
|
for (const row of rows) {
|
|
38
|
-
//
|
|
31
|
+
// 写入结构化数据
|
|
32
|
+
// status 逻辑:新行写 pending;已有行仅在 semantic_hash 发生变化时才重置为 pending,
|
|
33
|
+
// hash 未变说明语义未变,保留原 status(online → 缓存命中,不重复 embedding)
|
|
39
34
|
await pool.query(`INSERT INTO ${tableName}
|
|
40
35
|
(name, type, category, path, description, content, meta,
|
|
41
36
|
file_hash, semantic_hash, status,
|
|
42
37
|
usage_count, created_at, updated_at)
|
|
43
|
-
VALUES (
|
|
44
|
-
ON
|
|
45
|
-
type =
|
|
46
|
-
category =
|
|
47
|
-
description =
|
|
48
|
-
content =
|
|
49
|
-
meta =
|
|
50
|
-
file_hash =
|
|
51
|
-
semantic_hash =
|
|
52
|
-
status =
|
|
38
|
+
VALUES ($1, $2, $3, $4, $5, $6, $7::jsonb, $8, $9, $10, 0, NOW(), NOW())
|
|
39
|
+
ON CONFLICT (path, name) DO UPDATE SET
|
|
40
|
+
type = EXCLUDED.type,
|
|
41
|
+
category = EXCLUDED.category,
|
|
42
|
+
description = EXCLUDED.description,
|
|
43
|
+
content = EXCLUDED.content,
|
|
44
|
+
meta = EXCLUDED.meta,
|
|
45
|
+
file_hash = EXCLUDED.file_hash,
|
|
46
|
+
semantic_hash = EXCLUDED.semantic_hash,
|
|
47
|
+
status = CASE WHEN ${tableName}.semantic_hash = EXCLUDED.semantic_hash
|
|
48
|
+
THEN ${tableName}.status
|
|
49
|
+
ELSE EXCLUDED.status END,
|
|
53
50
|
updated_at = NOW()`, [
|
|
54
51
|
row.name,
|
|
55
52
|
row.type,
|
|
@@ -61,16 +58,15 @@ export async function runIncrementalIndex(opts) {
|
|
|
61
58
|
row.file_hash,
|
|
62
59
|
row.semantic_hash,
|
|
63
60
|
DEFAULT_STATUS_ON_UPSERT,
|
|
64
|
-
DEFAULT_STATUS_ON_UPSERT,
|
|
65
61
|
]);
|
|
66
|
-
console.error(`[ci-index]
|
|
62
|
+
console.error(`[ci-index] upserted: ${row.path}:${row.name}`);
|
|
67
63
|
}
|
|
68
64
|
// 批量入队:jobId = semanticHash,相同 hash 自动去重,1000 个符号可能只产生 N 个唯一 job
|
|
69
65
|
const hashes = [
|
|
70
66
|
...new Set(rows.map((r) => r.semantic_hash).filter(Boolean)),
|
|
71
67
|
];
|
|
72
68
|
if (hashes.length > 0) {
|
|
73
|
-
await enqueueEmbeddingBatch(hashes);
|
|
69
|
+
await enqueueEmbeddingBatch(hashes, env.symbolsTable);
|
|
74
70
|
console.error(`[ci-index] enqueued ${hashes.length} unique semantic hashes for embedding`);
|
|
75
71
|
}
|
|
76
72
|
}
|
|
@@ -11,7 +11,7 @@
|
|
|
11
11
|
*
|
|
12
12
|
* 环境变量:
|
|
13
13
|
* REDIS_URL Redis 连接 URL(默认 redis://127.0.0.1:6379)
|
|
14
|
-
*
|
|
14
|
+
* PG_URL PostgreSQL 连接字符串
|
|
15
15
|
* EMBEDDING_SERVICE_URL Python embedding 服务地址
|
|
16
16
|
* WORKER_CONCURRENCY 单进程并发 job 数(默认 5)
|
|
17
17
|
* WORKER_RPM_LIMIT 全局 RPM 上限(默认 100,跨所有 worker 进程)
|
|
@@ -23,13 +23,13 @@ const projectRoot = process.env.PROJECT_ROOT ?? process.cwd();
|
|
|
23
23
|
loadProjectDotenv(projectRoot);
|
|
24
24
|
const concurrency = Number(process.env.WORKER_CONCURRENCY ?? '5');
|
|
25
25
|
const rpmLimit = Number(process.env.WORKER_RPM_LIMIT ?? '100');
|
|
26
|
-
const worker = startEmbeddingWorker({ concurrency, rpmLimit });
|
|
26
|
+
const { worker, stop } = await startEmbeddingWorker({ concurrency, rpmLimit });
|
|
27
27
|
console.error(`[embedding-worker] started concurrency=${concurrency} rpm_limit=${rpmLimit}`);
|
|
28
|
-
//
|
|
28
|
+
// 当前 job 执行完再退出
|
|
29
29
|
for (const sig of ['SIGINT', 'SIGTERM']) {
|
|
30
30
|
process.on(sig, async () => {
|
|
31
31
|
console.error('[embedding-worker] shutting down…');
|
|
32
|
-
await
|
|
32
|
+
await stop();
|
|
33
33
|
process.exit(0);
|
|
34
34
|
});
|
|
35
35
|
}
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* Phase 2 CLI:扫描代码库并写入 PostgreSQL `symbols`。
|
|
4
|
+
*
|
|
5
|
+
* 环境变量加载顺序:
|
|
6
|
+
* 1. 命令行参数(最高优先级)
|
|
7
|
+
* 2. INDEX_ROOT 指向的第三方项目 .env(中等优先级,优先使用第三方显式设置的值)
|
|
8
|
+
* 3. 本地的 .env(最低优先级,提供默认值)
|
|
9
|
+
*/
|
|
10
|
+
import { resolve } from 'node:path';
|
|
11
|
+
import { CLI_KEYS, loadProjectDotenv } from '../config/env.js';
|
|
12
|
+
import { runReindex } from '../services/reindex.js';
|
|
13
|
+
/**
|
|
14
|
+
* 入口:加载第三方 .env → 校验环境 → 调用 runReindex。
|
|
15
|
+
* 进度与统计输出到 **stderr**,避免占用 stdout。
|
|
16
|
+
* 进程退出码:成功 `0`,连接失败或异常 `1`。
|
|
17
|
+
*/
|
|
18
|
+
async function main() {
|
|
19
|
+
// Step 1: 始终从 cwd 加载第三方 .env(这是 P2,会覆盖本地 MCP .env)
|
|
20
|
+
// 注意:不能用 process.env.INDEX_ROOT,那个值可能已被本地 MCP .env(P3)污染
|
|
21
|
+
const thirdPartyKeys = loadProjectDotenv(process.cwd());
|
|
22
|
+
// Step 2: INDEX_ROOT 只有来自 P1(CLI)或 P2(第三方 .env)时才可信
|
|
23
|
+
// 若只在本地 MCP .env(P3)里设了 INDEX_ROOT,在第三方项目中运行时应忽略它
|
|
24
|
+
const indexRoot = CLI_KEYS.has('INDEX_ROOT') || thirdPartyKeys.has('INDEX_ROOT')
|
|
25
|
+
? process.env.INDEX_ROOT
|
|
26
|
+
: undefined;
|
|
27
|
+
const projectRoot = resolve(indexRoot ?? process.cwd());
|
|
28
|
+
console.error(`PG_URL=${process.env.PG_URL ? '(set)' : '(not set)'}` +
|
|
29
|
+
`[index] projectRoot=${projectRoot} (INDEX_ROOT: ${CLI_KEYS.has('INDEX_ROOT') ? 'CLI' : thirdPartyKeys.has('INDEX_ROOT') ? 'third-party .env' : 'cwd fallback'})`);
|
|
30
|
+
const globPatterns = process.env.INDEX_GLOB
|
|
31
|
+
? process.env.INDEX_GLOB.split(/\s+/)
|
|
32
|
+
.map((s) => s.trim())
|
|
33
|
+
.filter(Boolean)
|
|
34
|
+
: undefined;
|
|
35
|
+
const ignore = process.env.INDEX_IGNORE
|
|
36
|
+
? process.env.INDEX_IGNORE.split(',').map((s) => s.trim())
|
|
37
|
+
: undefined;
|
|
38
|
+
const forceRebuild = process.argv.includes('--force-rebuild');
|
|
39
|
+
const result = await runReindex({
|
|
40
|
+
projectRoot,
|
|
41
|
+
globPatterns,
|
|
42
|
+
ignore,
|
|
43
|
+
dryRun: false,
|
|
44
|
+
forceRebuild,
|
|
45
|
+
});
|
|
46
|
+
console.error(`[index] extracted ${result.extractedCount} symbol(s), enqueued ${result.enqueuedCount} for embedding`);
|
|
47
|
+
console.error('[index] upserted into PostgreSQL, success:', result.upserted);
|
|
48
|
+
}
|
|
49
|
+
main().catch((err) => {
|
|
50
|
+
console.error('[index] failed:', err);
|
|
51
|
+
const anyErr = err;
|
|
52
|
+
if (anyErr.code === 'ECONNREFUSED') {
|
|
53
|
+
const pgUrl = process.env.PG_URL ?? 'postgresql://...@127.0.0.1:5432/...';
|
|
54
|
+
console.error(`[index] 原因: 无法连接 PostgreSQL(连接被拒绝)。当前 PG_URL=${pgUrl}。请确认 docker compose up -d 已启动 pgvector 容器。`);
|
|
55
|
+
}
|
|
56
|
+
else if (anyErr.code === 'ER_ACCESS_DENIED_ERROR' ||
|
|
57
|
+
anyErr.code === '28P01') {
|
|
58
|
+
console.error('[index] 原因: 用户名或密码错误,请检查 PG_URL 中的 user/password。');
|
|
59
|
+
}
|
|
60
|
+
else if (anyErr.code === 'ENOTFOUND' || anyErr.code === 'ETIMEDOUT') {
|
|
61
|
+
console.error('[index] 原因: 网络不可达或超时,请检查 PG_URL 中的 host 是否可解析。');
|
|
62
|
+
}
|
|
63
|
+
process.exit(1);
|
|
64
|
+
});
|
package/dist/config/env.js
CHANGED
|
@@ -3,98 +3,73 @@ import path from 'node:path';
|
|
|
3
3
|
import { fileURLToPath } from 'node:url';
|
|
4
4
|
import { existsSync, readFileSync } from 'node:fs';
|
|
5
5
|
const __dirname = path.dirname(fileURLToPath(import.meta.url));
|
|
6
|
-
//
|
|
6
|
+
// ─── Priority 1 (highest): Third-party CLI args --KEY=VALUE ───────────────────
|
|
7
|
+
// 记录哪些 key 来自命令行,任何后续加载都不得覆盖
|
|
8
|
+
export const CLI_KEYS = new Set();
|
|
7
9
|
for (const arg of process.argv) {
|
|
8
10
|
const match = arg.match(/^--([A-Z_][A-Z0-9_]*)=(.+)$/);
|
|
9
11
|
if (match) {
|
|
10
12
|
process.env[match[1]] = match[2];
|
|
13
|
+
CLI_KEYS.add(match[1]);
|
|
11
14
|
}
|
|
12
15
|
}
|
|
13
|
-
// MCP
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
});
|
|
16
|
+
// ─── Priority 3 (lowest): Local MCP repo .env ─────────────────────────────────
|
|
17
|
+
// override: false → 只填补尚未设置的 key,不覆盖 CLI 参数
|
|
18
|
+
// dev 模式: __dirname = src/config → ../../ = 项目根目录
|
|
19
|
+
// prod 模式: __dirname = dist/config → ../../ = 项目根目录
|
|
20
|
+
const MCP_SERVER_ENV_PATH = path.resolve(__dirname, '..', '..', '.env');
|
|
21
|
+
if (existsSync(MCP_SERVER_ENV_PATH)) {
|
|
22
|
+
dotenv.config({ path: MCP_SERVER_ENV_PATH, override: false });
|
|
23
|
+
}
|
|
20
24
|
/**
|
|
21
|
-
*
|
|
22
|
-
*
|
|
25
|
+
* 加载第三方项目的 .env 文件(Priority 2)。
|
|
26
|
+
*
|
|
27
|
+
* 优先级规则:
|
|
28
|
+
* 第三方 CLI 参数(P1) > 第三方 .env(P2) > 本地 MCP .env(P3)
|
|
29
|
+
*
|
|
30
|
+
* - CLI 参数在 CLI_KEYS 中已记录,永不覆盖
|
|
31
|
+
* - 第三方 .env 中的 key 覆盖本地 MCP .env(即 P2 > P3)
|
|
32
|
+
*
|
|
33
|
+
* 应在进程启动后、任何 env.xxx 读取前尽早调用一次。
|
|
23
34
|
*/
|
|
24
35
|
export function loadProjectDotenv(projectRoot) {
|
|
25
36
|
const envPath = path.resolve(projectRoot, '.env');
|
|
26
|
-
if (!existsSync(envPath))
|
|
27
|
-
return;
|
|
28
|
-
|
|
29
|
-
const
|
|
30
|
-
|
|
31
|
-
const
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
const eqIdx = trimmed.indexOf('=');
|
|
37
|
-
if (eqIdx === -1)
|
|
38
|
-
continue;
|
|
39
|
-
const key = trimmed.slice(0, eqIdx).trim();
|
|
40
|
-
if (!key)
|
|
41
|
-
continue;
|
|
42
|
-
thirdPartyKeys.add(key);
|
|
43
|
-
}
|
|
44
|
-
// 第二步:如果某个 key 是第三方显式定义的,则覆盖(不管值是什么)
|
|
45
|
-
for (const line of content.split('\n')) {
|
|
46
|
-
const trimmed = line.trim();
|
|
47
|
-
if (!trimmed || trimmed.startsWith('#'))
|
|
48
|
-
continue;
|
|
49
|
-
const eqIdx = trimmed.indexOf('=');
|
|
50
|
-
if (eqIdx === -1)
|
|
51
|
-
continue;
|
|
52
|
-
const key = trimmed.slice(0, eqIdx).trim();
|
|
53
|
-
let value = trimmed.slice(eqIdx + 1).trim();
|
|
54
|
-
value = value.replace(/^["']|["']$/g, '');
|
|
55
|
-
if (!key)
|
|
56
|
-
continue;
|
|
57
|
-
// 只有当第三方显式定义了这个 key 时才覆盖
|
|
58
|
-
if (thirdPartyKeys.has(key)) {
|
|
59
|
-
process.env[key] = value;
|
|
60
|
-
}
|
|
61
|
-
}
|
|
62
|
-
}
|
|
63
|
-
// 外部传入的 env 已在上一步保留,这里确保环境变量已正确设置
|
|
64
|
-
for (const arg of process.argv) {
|
|
65
|
-
const match = arg.match(/^--([A-Z_][A-Z0-9_]*)=(.+)$/);
|
|
66
|
-
if (match) {
|
|
67
|
-
process.env[match[1]] = match[2];
|
|
37
|
+
if (!existsSync(envPath))
|
|
38
|
+
return new Set();
|
|
39
|
+
// dotenv.parse 只解析文件,不写 process.env
|
|
40
|
+
const parsed = dotenv.parse(readFileSync(envPath));
|
|
41
|
+
const loadedKeys = new Set();
|
|
42
|
+
for (const [key, value] of Object.entries(parsed)) {
|
|
43
|
+
if (CLI_KEYS.has(key))
|
|
44
|
+
continue; // P1 CLI args 永不被覆盖
|
|
45
|
+
process.env[key] = value; // P2 第三方 .env 覆盖 P3 本地 .env
|
|
46
|
+
loadedKeys.add(key);
|
|
68
47
|
}
|
|
48
|
+
return loadedKeys;
|
|
69
49
|
}
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
'MYSQL_USER',
|
|
73
|
-
'MYSQL_DATABASE',
|
|
74
|
-
];
|
|
75
|
-
console.error(`[Config] MYSQL_HOST: ${process.env.MYSQL_HOST},
|
|
76
|
-
MYSQL_USER: ${process.env.MYSQL_USER},
|
|
77
|
-
MYSQL_DATABASE: ${process.env.MYSQL_DATABASE},
|
|
78
|
-
EMBEDDING_SERVICE_URL: ${process.env.EMBEDDING_SERVICE_URL},
|
|
79
|
-
MYSQL_SYMBOLS_TABLE: ${process.env.MYSQL_SYMBOLS_TABLE}
|
|
80
|
-
`);
|
|
50
|
+
// ─── env 对象:getter 懒读取,确保 loadProjectDotenv() 后立即生效 ────────────
|
|
51
|
+
// 每次访问 env.xxx 都从 process.env 实时读取,避免快照冻结问题
|
|
81
52
|
export const env = {
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
/** symbols 表名,可通过
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
53
|
+
/** PostgreSQL 连接字符串,如 postgresql://user:pass@host:5432/db */
|
|
54
|
+
get pgUrl() {
|
|
55
|
+
return (process.env.PG_URL ??
|
|
56
|
+
'postgresql://postgres:devpassword@127.0.0.1:5432/code_intelligence');
|
|
57
|
+
},
|
|
58
|
+
/** symbols 表名,可通过 SYMBOLS_TABLE 环境变量配置 */
|
|
59
|
+
get symbolsTable() {
|
|
60
|
+
return process.env.SYMBOLS_TABLE ?? 'symbols';
|
|
61
|
+
},
|
|
62
|
+
/** Python FastAPI 嵌入服务根 URL,如 http://127.0.0.1:8765 */
|
|
63
|
+
get embeddingServiceUrl() {
|
|
64
|
+
return (process.env.EMBEDDING_SERVICE_URL ?? '').trim();
|
|
65
|
+
},
|
|
91
66
|
/** Redis 连接 URL,供 BullMQ embedding worker 使用 */
|
|
92
|
-
redisUrl
|
|
67
|
+
get redisUrl() {
|
|
68
|
+
return process.env.REDIS_URL ?? 'redis://127.0.0.1:6379';
|
|
69
|
+
},
|
|
93
70
|
};
|
|
94
71
|
export function validateEnv() {
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
throw new Error(`Missing environment variable: ${key}`);
|
|
98
|
-
}
|
|
72
|
+
if (!process.env.PG_URL) {
|
|
73
|
+
console.warn('[Config] PG_URL not set, using default: postgresql://postgres:devpassword@127.0.0.1:5432/code_intelligence');
|
|
99
74
|
}
|
|
100
75
|
}
|
package/dist/db/schema.js
CHANGED
|
@@ -1,36 +1,51 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* 动态生成数据库表结构 SQL
|
|
2
|
+
* 动态生成数据库表结构 SQL(PostgreSQL + pgvector),表名可通过环境变量配置
|
|
3
3
|
*/
|
|
4
4
|
import { env } from '../config/env.js';
|
|
5
5
|
import { DEFAULT_STATUS_ON_UPSERT } from '../config/symbolStatus.js';
|
|
6
|
+
/** 确保 vector 扩展已启用 */
|
|
7
|
+
export function getEnsureExtensionSQL() {
|
|
8
|
+
return `CREATE EXTENSION IF NOT EXISTS vector`;
|
|
9
|
+
}
|
|
6
10
|
/** 获取 symbols 表的建表 SQL */
|
|
7
11
|
export function getSymbolsTableSQL() {
|
|
8
|
-
const tableName = env.
|
|
12
|
+
const tableName = env.symbolsTable;
|
|
9
13
|
return `CREATE TABLE IF NOT EXISTS ${tableName} (
|
|
10
|
-
id
|
|
11
|
-
name
|
|
12
|
-
type
|
|
13
|
-
category
|
|
14
|
-
path
|
|
15
|
-
description
|
|
16
|
-
content
|
|
17
|
-
meta
|
|
18
|
-
usage_count
|
|
19
|
-
embedding
|
|
20
|
-
insert_user
|
|
21
|
-
updated_user
|
|
22
|
-
created_at
|
|
23
|
-
updated_at
|
|
24
|
-
file_hash
|
|
25
|
-
semantic_hash VARCHAR(64)
|
|
26
|
-
status
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
INDEX idx_semantic_hash (semantic_hash),
|
|
30
|
-
INDEX idx_status (status)
|
|
14
|
+
id SERIAL PRIMARY KEY,
|
|
15
|
+
name VARCHAR(255) NOT NULL,
|
|
16
|
+
type VARCHAR(50) NOT NULL,
|
|
17
|
+
category VARCHAR(255),
|
|
18
|
+
path TEXT NOT NULL,
|
|
19
|
+
description TEXT,
|
|
20
|
+
content TEXT,
|
|
21
|
+
meta JSONB,
|
|
22
|
+
usage_count INT NOT NULL DEFAULT 0,
|
|
23
|
+
embedding vector(384),
|
|
24
|
+
insert_user VARCHAR(255) NOT NULL DEFAULT 'system',
|
|
25
|
+
updated_user VARCHAR(255) NOT NULL DEFAULT 'system',
|
|
26
|
+
created_at TIMESTAMP NOT NULL DEFAULT NOW(),
|
|
27
|
+
updated_at TIMESTAMP NOT NULL DEFAULT NOW(),
|
|
28
|
+
file_hash VARCHAR(64),
|
|
29
|
+
semantic_hash VARCHAR(64),
|
|
30
|
+
status SMALLINT NOT NULL DEFAULT ${DEFAULT_STATUS_ON_UPSERT},
|
|
31
|
+
CONSTRAINT uk_${tableName}_path_name UNIQUE (path, name),
|
|
32
|
+
CONSTRAINT chk_${tableName}_type CHECK (type IN ('component','function','type','class','interface','hook'))
|
|
31
33
|
)`;
|
|
32
34
|
}
|
|
33
|
-
/**
|
|
35
|
+
/** 获取基础索引 SQL(不含 HNSW,HNSW 建议数据量 > 1000 后手动执行) */
|
|
36
|
+
export function getSymbolsIndexSQLs() {
|
|
37
|
+
const t = env.symbolsTable;
|
|
38
|
+
return [
|
|
39
|
+
`CREATE INDEX IF NOT EXISTS idx_file_hash ON ${t}(file_hash)`,
|
|
40
|
+
`CREATE INDEX IF NOT EXISTS idx_semantic_hash ON ${t}(semantic_hash)`,
|
|
41
|
+
`CREATE INDEX IF NOT EXISTS idx_status ON ${t}(status)`,
|
|
42
|
+
];
|
|
43
|
+
}
|
|
44
|
+
/** 获取所有建表 SQL(extension + table + indexes,可逐条执行) */
|
|
34
45
|
export function getAllTableSQLs() {
|
|
35
|
-
return [
|
|
46
|
+
return [
|
|
47
|
+
getEnsureExtensionSQL(),
|
|
48
|
+
getSymbolsTableSQL(),
|
|
49
|
+
...getSymbolsIndexSQLs(),
|
|
50
|
+
];
|
|
36
51
|
}
|
|
@@ -101,7 +101,7 @@ export async function resolveCategory(rows, vecs) {
|
|
|
101
101
|
const pros = rows.map(async (r, i) => {
|
|
102
102
|
const { name } = r;
|
|
103
103
|
const ruleCategory = inferCategoryFromPath(r.path) || inferCategoryFromName(name);
|
|
104
|
-
console.error(`===from ruleCategory`, name, ruleCategory);
|
|
104
|
+
// console.error(`===from ruleCategory`, name, ruleCategory);
|
|
105
105
|
if (ruleCategory) {
|
|
106
106
|
return {
|
|
107
107
|
...r,
|
|
@@ -110,7 +110,7 @@ export async function resolveCategory(rows, vecs) {
|
|
|
110
110
|
}
|
|
111
111
|
// TODO: 这里有问题,embedding是语义模板向量,categoryEmbeddingsCache是单个词的向量,相似度必然是<0.3
|
|
112
112
|
const emd = categoryFromEmbedding(vecs[i]);
|
|
113
|
-
console.error(`===from categoryFromEmbedding`, name, emd);
|
|
113
|
+
// console.error(`===from categoryFromEmbedding`, name, emd);
|
|
114
114
|
if (emd) {
|
|
115
115
|
return {
|
|
116
116
|
...r,
|
|
@@ -118,7 +118,7 @@ export async function resolveCategory(rows, vecs) {
|
|
|
118
118
|
};
|
|
119
119
|
}
|
|
120
120
|
const cateLlm = await categoryFromLLM(r.content);
|
|
121
|
-
console.error(`===from categoryFromLLM`, name, cateLlm);
|
|
121
|
+
// console.error(`===from categoryFromLLM`, name, cateLlm);
|
|
122
122
|
return {
|
|
123
123
|
...r,
|
|
124
124
|
category: cateLlm,
|
|
@@ -1,10 +1,12 @@
|
|
|
1
1
|
import { env } from '../config/env.js';
|
|
2
|
-
import {
|
|
2
|
+
import { getAllTableSQLs } from '../db/schema.js';
|
|
3
|
+
import { SYMBOL_STATUS } from '../config/symbolStatus.js';
|
|
3
4
|
/**
|
|
4
5
|
* 依赖表上 `(path, name)` 唯一键:新行插入,已存在则更新类型/描述/内容与 meta;**不**修改 `usage_count`。
|
|
5
6
|
* @param rows 来自 `indexProject`;空数组时立即返回,不开启事务。
|
|
6
7
|
* @param embeddings 与 `rows` 等长;某项为 `null` 表示本行不更新已有 `embedding`(新行则写入 NULL)。
|
|
7
|
-
*
|
|
8
|
+
* - 有值 → status 置为 online(2)
|
|
9
|
+
* - null → 新行写 pending(1),已有行保持原 status
|
|
8
10
|
*/
|
|
9
11
|
export async function upsertSymbols(pool, rows, embeddings) {
|
|
10
12
|
if (rows.length === 0)
|
|
@@ -12,30 +14,48 @@ export async function upsertSymbols(pool, rows, embeddings) {
|
|
|
12
14
|
if (embeddings && embeddings.length !== rows.length) {
|
|
13
15
|
throw new Error('upsertSymbols: embeddings length must match rows');
|
|
14
16
|
}
|
|
15
|
-
const actor = process.env.GITHUB_USERNAME?.trim() || '
|
|
16
|
-
await pool.
|
|
17
|
-
const sql = `
|
|
18
|
-
INSERT INTO ${env.mysqlSymbolsTable} (name, type, category, path, description, content, meta, insert_user, updated_user, embedding, semantic_hash, file_hash)
|
|
19
|
-
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
20
|
-
ON DUPLICATE KEY UPDATE
|
|
21
|
-
type = VALUES(type),
|
|
22
|
-
category = VALUES(category),
|
|
23
|
-
description = VALUES(description),
|
|
24
|
-
content = VALUES(content),
|
|
25
|
-
meta = VALUES(meta),
|
|
26
|
-
updated_user = VALUES(updated_user),
|
|
27
|
-
embedding = CASE WHEN VALUES(embedding) IS NOT NULL THEN VALUES(embedding) ELSE embedding END,
|
|
28
|
-
semantic_hash = VALUES(semantic_hash),
|
|
29
|
-
file_hash = VALUES(file_hash)
|
|
30
|
-
`;
|
|
31
|
-
const conn = await pool.getConnection();
|
|
17
|
+
const actor = process.env.GITHUB_USERNAME?.trim() || 'system';
|
|
18
|
+
const client = await pool.connect();
|
|
32
19
|
try {
|
|
33
|
-
|
|
20
|
+
// 确保 extension + 表 + 基础索引存在
|
|
21
|
+
for (const sql of getAllTableSQLs()) {
|
|
22
|
+
await client.query(sql);
|
|
23
|
+
}
|
|
24
|
+
await client.query('BEGIN');
|
|
25
|
+
const t = env.symbolsTable;
|
|
26
|
+
const sql = `
|
|
27
|
+
INSERT INTO ${t}
|
|
28
|
+
(name, type, category, path, description, content, meta,
|
|
29
|
+
insert_user, updated_user, embedding, semantic_hash, file_hash, status)
|
|
30
|
+
VALUES ($1, $2, $3, $4, $5, $6, $7::jsonb, $8, $9, $10::vector, $11, $12, $13)
|
|
31
|
+
ON CONFLICT (path, name) DO UPDATE SET
|
|
32
|
+
type = EXCLUDED.type,
|
|
33
|
+
category = EXCLUDED.category,
|
|
34
|
+
description = EXCLUDED.description,
|
|
35
|
+
content = EXCLUDED.content,
|
|
36
|
+
meta = EXCLUDED.meta,
|
|
37
|
+
updated_user = EXCLUDED.updated_user,
|
|
38
|
+
embedding = CASE
|
|
39
|
+
WHEN EXCLUDED.embedding IS NOT NULL THEN EXCLUDED.embedding
|
|
40
|
+
WHEN EXCLUDED.semantic_hash != ${t}.semantic_hash THEN NULL
|
|
41
|
+
ELSE ${t}.embedding
|
|
42
|
+
END,
|
|
43
|
+
semantic_hash = EXCLUDED.semantic_hash,
|
|
44
|
+
file_hash = EXCLUDED.file_hash,
|
|
45
|
+
status = CASE
|
|
46
|
+
WHEN EXCLUDED.embedding IS NOT NULL THEN ${SYMBOL_STATUS.ONLINE}
|
|
47
|
+
WHEN EXCLUDED.semantic_hash != ${t}.semantic_hash THEN ${SYMBOL_STATUS.PENDING}
|
|
48
|
+
ELSE ${t}.status
|
|
49
|
+
END,
|
|
50
|
+
updated_at = NOW()
|
|
51
|
+
`;
|
|
34
52
|
for (let i = 0; i < rows.length; i++) {
|
|
35
53
|
const r = rows[i];
|
|
36
54
|
const emb = embeddings?.[i];
|
|
37
|
-
|
|
38
|
-
|
|
55
|
+
// pgvector 接受 "[x1,x2,...]" 格式字符串
|
|
56
|
+
const vecStr = emb != null ? `[${emb.join(',')}]` : null;
|
|
57
|
+
const statusVal = vecStr !== null ? SYMBOL_STATUS.ONLINE : SYMBOL_STATUS.PENDING;
|
|
58
|
+
await client.query(sql, [
|
|
39
59
|
r.name,
|
|
40
60
|
r.type,
|
|
41
61
|
r.category,
|
|
@@ -45,18 +65,19 @@ export async function upsertSymbols(pool, rows, embeddings) {
|
|
|
45
65
|
JSON.stringify(r.meta),
|
|
46
66
|
actor,
|
|
47
67
|
actor,
|
|
48
|
-
|
|
68
|
+
vecStr, // $10 → cast as vector, null 时写 NULL
|
|
49
69
|
r.semantic_hash,
|
|
50
70
|
r.file_hash,
|
|
71
|
+
statusVal,
|
|
51
72
|
]);
|
|
52
73
|
}
|
|
53
|
-
await
|
|
74
|
+
await client.query('COMMIT');
|
|
54
75
|
}
|
|
55
76
|
catch (e) {
|
|
56
|
-
await
|
|
77
|
+
await client.query('ROLLBACK');
|
|
57
78
|
throw e;
|
|
58
79
|
}
|
|
59
80
|
finally {
|
|
60
|
-
|
|
81
|
+
client.release();
|
|
61
82
|
}
|
|
62
83
|
}
|