@lorrylurui/code-intelligence-mcp 1.2.0 → 2.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -15,31 +15,50 @@ async function main() {
15
15
  let changedFiles = [];
16
16
  let deletedFiles = [];
17
17
  let renamedFiles = [];
18
+ /** 解析 --key value 和 --key=value 两种格式 */
19
+ function getArgValue(key, idx) {
20
+ const arg = args[idx];
21
+ const prefix = `--${key}=`;
22
+ if (arg.startsWith(prefix))
23
+ return [arg.slice(prefix.length), idx];
24
+ if (arg === `--${key}` && idx + 1 < args.length)
25
+ return [args[idx + 1], idx + 1];
26
+ return [null, idx];
27
+ }
18
28
  for (let i = 0; i < args.length; i++) {
19
29
  const arg = args[i];
20
- if (arg === '--changed' && i + 1 < args.length) {
21
- changedFiles = args[i + 1]
22
- .split(',')
23
- .map((s) => s.trim())
24
- .filter(Boolean);
25
- i++;
30
+ if (arg === '--changed' || arg.startsWith('--changed=')) {
31
+ const [val, next] = getArgValue('changed', i);
32
+ if (val) {
33
+ changedFiles = val
34
+ .split(',')
35
+ .map((s) => s.trim())
36
+ .filter(Boolean);
37
+ i = next;
38
+ }
26
39
  }
27
- else if (arg === '--deleted' && i + 1 < args.length) {
28
- deletedFiles = args[i + 1]
29
- .split(',')
30
- .map((s) => s.trim())
31
- .filter(Boolean);
32
- i++;
40
+ else if (arg === '--deleted' || arg.startsWith('--deleted=')) {
41
+ const [val, next] = getArgValue('deleted', i);
42
+ if (val) {
43
+ deletedFiles = val
44
+ .split(',')
45
+ .map((s) => s.trim())
46
+ .filter(Boolean);
47
+ i = next;
48
+ }
33
49
  }
34
- else if (arg === '--renamed' && i + 1 < args.length) {
35
- renamedFiles = args[i + 1]
36
- .split(',')
37
- .map((s) => {
38
- const [from, to] = s.split(':');
39
- return { from: from.trim(), to: to.trim() };
40
- })
41
- .filter((r) => r.from && r.to);
42
- i++;
50
+ else if (arg === '--renamed' || arg.startsWith('--renamed=')) {
51
+ const [val, next] = getArgValue('renamed', i);
52
+ if (val) {
53
+ renamedFiles = val
54
+ .split(',')
55
+ .map((s) => {
56
+ const [from, to] = s.split(':');
57
+ return { from: from.trim(), to: to.trim() };
58
+ })
59
+ .filter((r) => r.from && r.to);
60
+ i = next;
61
+ }
43
62
  }
44
63
  }
45
64
  if (changedFiles.length === 0 &&
@@ -1,28 +1,21 @@
1
1
  // CI增量索引:处理changed files和deleted files
2
- import { env, loadProjectDotenv } from '../config/env.js';
3
- import { getMySqlPool } from '../db/mysql.js';
2
+ import { env } from '../config/env.js';
3
+ import { getPool } from '../db/postgres.js';
4
4
  import { indexProject } from '../indexer/indexProject.js';
5
5
  import { DEFAULT_STATUS_ON_UPSERT, SYMBOL_STATUS, } from '../config/symbolStatus.js';
6
6
  import { enqueueEmbeddingBatch, closeEmbeddingQueue, } from '../services/embeddingQueue.js';
7
7
  export async function runIncrementalIndex(opts) {
8
8
  const { projectRoot, changedFiles, deletedFiles, renamedFiles = [] } = opts;
9
- loadProjectDotenv(projectRoot);
10
- const pool = getMySqlPool();
11
- if (!pool) {
12
- throw new Error('Failed to get MySQL pool');
13
- }
14
- const tableName = env.mysqlSymbolsTable;
9
+ const pool = getPool();
10
+ const tableName = env.symbolsTable;
15
11
  // 1. 删除文件:标记 offline
16
12
  for (const file of deletedFiles) {
17
- await pool.query(`UPDATE ${tableName} SET status = ? WHERE path = ?`, [
18
- SYMBOL_STATUS.OFFLINE,
19
- file,
20
- ]);
13
+ await pool.query(`UPDATE ${tableName} SET status = $1 WHERE path = $2`, [SYMBOL_STATUS.OFFLINE, file]);
21
14
  console.error(`[ci-index] marked offline: ${file}`);
22
15
  }
23
16
  // 2. 重命名文件:更新path
24
17
  for (const { from, to } of renamedFiles) {
25
- await pool.query(`UPDATE ${tableName} SET path = ? WHERE path = ?`, [
18
+ await pool.query(`UPDATE ${tableName} SET path = $1 WHERE path = $2`, [
26
19
  to,
27
20
  from,
28
21
  ]);
@@ -35,21 +28,25 @@ export async function runIncrementalIndex(opts) {
35
28
  globPatterns: changedFiles,
36
29
  });
37
30
  for (const row of rows) {
38
- // 写入结构化数据,标记pending
31
+ // 写入结构化数据
32
+ // status 逻辑:新行写 pending;已有行仅在 semantic_hash 发生变化时才重置为 pending,
33
+ // hash 未变说明语义未变,保留原 status(online → 缓存命中,不重复 embedding)
39
34
  await pool.query(`INSERT INTO ${tableName}
40
35
  (name, type, category, path, description, content, meta,
41
36
  file_hash, semantic_hash, status,
42
37
  usage_count, created_at, updated_at)
43
- VALUES (?, ?, ?, ?, ?, ?, CAST(? AS JSON), ?, ?, ?, 0, NOW(), NOW())
44
- ON DUPLICATE KEY UPDATE
45
- type = VALUES(type),
46
- category = VALUES(category),
47
- description = VALUES(description),
48
- content = VALUES(content),
49
- meta = VALUES(meta),
50
- file_hash = VALUES(file_hash),
51
- semantic_hash = VALUES(semantic_hash),
52
- status = ?,
38
+ VALUES ($1, $2, $3, $4, $5, $6, $7::jsonb, $8, $9, $10, 0, NOW(), NOW())
39
+ ON CONFLICT (path, name) DO UPDATE SET
40
+ type = EXCLUDED.type,
41
+ category = EXCLUDED.category,
42
+ description = EXCLUDED.description,
43
+ content = EXCLUDED.content,
44
+ meta = EXCLUDED.meta,
45
+ file_hash = EXCLUDED.file_hash,
46
+ semantic_hash = EXCLUDED.semantic_hash,
47
+ status = CASE WHEN ${tableName}.semantic_hash = EXCLUDED.semantic_hash
48
+ THEN ${tableName}.status
49
+ ELSE EXCLUDED.status END,
53
50
  updated_at = NOW()`, [
54
51
  row.name,
55
52
  row.type,
@@ -61,16 +58,15 @@ export async function runIncrementalIndex(opts) {
61
58
  row.file_hash,
62
59
  row.semantic_hash,
63
60
  DEFAULT_STATUS_ON_UPSERT,
64
- DEFAULT_STATUS_ON_UPSERT,
65
61
  ]);
66
- console.error(`[ci-index] indexed (pending): ${row.path}:${row.name}`);
62
+ console.error(`[ci-index] upserted: ${row.path}:${row.name}`);
67
63
  }
68
64
  // 批量入队:jobId = semanticHash,相同 hash 自动去重,1000 个符号可能只产生 N 个唯一 job
69
65
  const hashes = [
70
66
  ...new Set(rows.map((r) => r.semantic_hash).filter(Boolean)),
71
67
  ];
72
68
  if (hashes.length > 0) {
73
- await enqueueEmbeddingBatch(hashes);
69
+ await enqueueEmbeddingBatch(hashes, env.symbolsTable);
74
70
  console.error(`[ci-index] enqueued ${hashes.length} unique semantic hashes for embedding`);
75
71
  }
76
72
  }
@@ -11,7 +11,7 @@
11
11
  *
12
12
  * 环境变量:
13
13
  * REDIS_URL Redis 连接 URL(默认 redis://127.0.0.1:6379)
14
- * MYSQL_HOST / ... MySQL 连接配置
14
+ * PG_URL PostgreSQL 连接字符串
15
15
  * EMBEDDING_SERVICE_URL Python embedding 服务地址
16
16
  * WORKER_CONCURRENCY 单进程并发 job 数(默认 5)
17
17
  * WORKER_RPM_LIMIT 全局 RPM 上限(默认 100,跨所有 worker 进程)
@@ -23,13 +23,13 @@ const projectRoot = process.env.PROJECT_ROOT ?? process.cwd();
23
23
  loadProjectDotenv(projectRoot);
24
24
  const concurrency = Number(process.env.WORKER_CONCURRENCY ?? '5');
25
25
  const rpmLimit = Number(process.env.WORKER_RPM_LIMIT ?? '100');
26
- const worker = startEmbeddingWorker({ concurrency, rpmLimit });
26
+ const { worker, stop } = await startEmbeddingWorker({ concurrency, rpmLimit });
27
27
  console.error(`[embedding-worker] started concurrency=${concurrency} rpm_limit=${rpmLimit}`);
28
- // 优雅关闭:等当前 job 执行完再退出
28
+ // 当前 job 执行完再退出
29
29
  for (const sig of ['SIGINT', 'SIGTERM']) {
30
30
  process.on(sig, async () => {
31
31
  console.error('[embedding-worker] shutting down…');
32
- await worker.close();
32
+ await stop();
33
33
  process.exit(0);
34
34
  });
35
35
  }
@@ -0,0 +1,64 @@
1
+ #!/usr/bin/env node
2
+ /**
3
+ * Phase 2 CLI:扫描代码库并写入 PostgreSQL `symbols`。
4
+ *
5
+ * 环境变量加载顺序:
6
+ * 1. 命令行参数(最高优先级)
7
+ * 2. INDEX_ROOT 指向的第三方项目 .env(中等优先级,优先使用第三方显式设置的值)
8
+ * 3. 本地的 .env(最低优先级,提供默认值)
9
+ */
10
+ import { resolve } from 'node:path';
11
+ import { CLI_KEYS, loadProjectDotenv } from '../config/env.js';
12
+ import { runReindex } from '../services/reindex.js';
13
+ /**
14
+ * 入口:加载第三方 .env → 校验环境 → 调用 runReindex。
15
+ * 进度与统计输出到 **stderr**,避免占用 stdout。
16
+ * 进程退出码:成功 `0`,连接失败或异常 `1`。
17
+ */
18
+ async function main() {
19
+ // Step 1: 始终从 cwd 加载第三方 .env(这是 P2,会覆盖本地 MCP .env)
20
+ // 注意:不能用 process.env.INDEX_ROOT,那个值可能已被本地 MCP .env(P3)污染
21
+ const thirdPartyKeys = loadProjectDotenv(process.cwd());
22
+ // Step 2: INDEX_ROOT 只有来自 P1(CLI)或 P2(第三方 .env)时才可信
23
+ // 若只在本地 MCP .env(P3)里设了 INDEX_ROOT,在第三方项目中运行时应忽略它
24
+ const indexRoot = CLI_KEYS.has('INDEX_ROOT') || thirdPartyKeys.has('INDEX_ROOT')
25
+ ? process.env.INDEX_ROOT
26
+ : undefined;
27
+ const projectRoot = resolve(indexRoot ?? process.cwd());
28
+ console.error(`PG_URL=${process.env.PG_URL ? '(set)' : '(not set)'}` +
29
+ `[index] projectRoot=${projectRoot} (INDEX_ROOT: ${CLI_KEYS.has('INDEX_ROOT') ? 'CLI' : thirdPartyKeys.has('INDEX_ROOT') ? 'third-party .env' : 'cwd fallback'})`);
30
+ const globPatterns = process.env.INDEX_GLOB
31
+ ? process.env.INDEX_GLOB.split(/\s+/)
32
+ .map((s) => s.trim())
33
+ .filter(Boolean)
34
+ : undefined;
35
+ const ignore = process.env.INDEX_IGNORE
36
+ ? process.env.INDEX_IGNORE.split(',').map((s) => s.trim())
37
+ : undefined;
38
+ const forceRebuild = process.argv.includes('--force-rebuild');
39
+ const result = await runReindex({
40
+ projectRoot,
41
+ globPatterns,
42
+ ignore,
43
+ dryRun: false,
44
+ forceRebuild,
45
+ });
46
+ console.error(`[index] extracted ${result.extractedCount} symbol(s), enqueued ${result.enqueuedCount} for embedding`);
47
+ console.error('[index] upserted into PostgreSQL, success:', result.upserted);
48
+ }
49
+ main().catch((err) => {
50
+ console.error('[index] failed:', err);
51
+ const anyErr = err;
52
+ if (anyErr.code === 'ECONNREFUSED') {
53
+ const pgUrl = process.env.PG_URL ?? 'postgresql://...@127.0.0.1:5432/...';
54
+ console.error(`[index] 原因: 无法连接 PostgreSQL(连接被拒绝)。当前 PG_URL=${pgUrl}。请确认 docker compose up -d 已启动 pgvector 容器。`);
55
+ }
56
+ else if (anyErr.code === 'ER_ACCESS_DENIED_ERROR' ||
57
+ anyErr.code === '28P01') {
58
+ console.error('[index] 原因: 用户名或密码错误,请检查 PG_URL 中的 user/password。');
59
+ }
60
+ else if (anyErr.code === 'ENOTFOUND' || anyErr.code === 'ETIMEDOUT') {
61
+ console.error('[index] 原因: 网络不可达或超时,请检查 PG_URL 中的 host 是否可解析。');
62
+ }
63
+ process.exit(1);
64
+ });
@@ -3,98 +3,73 @@ import path from 'node:path';
3
3
  import { fileURLToPath } from 'node:url';
4
4
  import { existsSync, readFileSync } from 'node:fs';
5
5
  const __dirname = path.dirname(fileURLToPath(import.meta.url));
6
- // 解析命令行参数 --key=value 格式,注入到 process.env
6
+ // ─── Priority 1 (highest): Third-party CLI args --KEY=VALUE ───────────────────
7
+ // 记录哪些 key 来自命令行,任何后续加载都不得覆盖
8
+ export const CLI_KEYS = new Set();
7
9
  for (const arg of process.argv) {
8
10
  const match = arg.match(/^--([A-Z_][A-Z0-9_]*)=(.+)$/);
9
11
  if (match) {
10
12
  process.env[match[1]] = match[2];
13
+ CLI_KEYS.add(match[1]);
11
14
  }
12
15
  }
13
- // MCP Server 本地 .env 路径(固定指向项目根目录)
14
- const MCP_SERVER_ROOT = path.resolve(__dirname, '..', '..', './dist'); // MCP Server 根目录
15
- const MCP_SERVER_ENV_PATH = path.resolve(MCP_SERVER_ROOT, '.env');
16
- dotenv.config({
17
- path: MCP_SERVER_ENV_PATH,
18
- override: false, // 不覆盖已存在的变量
19
- });
16
+ // ─── Priority 3 (lowest): Local MCP repo .env ─────────────────────────────────
17
+ // override: false 只填补尚未设置的 key,不覆盖 CLI 参数
18
+ // dev 模式: __dirname = src/config → ../../ = 项目根目录
19
+ // prod 模式: __dirname = dist/config → ../../ = 项目根目录
20
+ const MCP_SERVER_ENV_PATH = path.resolve(__dirname, '..', '..', '.env');
21
+ if (existsSync(MCP_SERVER_ENV_PATH)) {
22
+ dotenv.config({ path: MCP_SERVER_ENV_PATH, override: false });
23
+ }
20
24
  /**
21
- * 从指定项目根目录加载 .env process.env
22
- * 行为:优先使用第三方显式设置的值,否则保留 MCP Server 本地配置
25
+ * 加载第三方项目的 .env 文件(Priority 2)。
26
+ *
27
+ * 优先级规则:
28
+ * 第三方 CLI 参数(P1) > 第三方 .env(P2) > 本地 MCP .env(P3)
29
+ *
30
+ * - CLI 参数在 CLI_KEYS 中已记录,永不覆盖
31
+ * - 第三方 .env 中的 key 覆盖本地 MCP .env(即 P2 > P3)
32
+ *
33
+ * 应在进程启动后、任何 env.xxx 读取前尽早调用一次。
23
34
  */
24
35
  export function loadProjectDotenv(projectRoot) {
25
36
  const envPath = path.resolve(projectRoot, '.env');
26
- if (!existsSync(envPath)) {
27
- return;
28
- }
29
- const content = readFileSync(envPath, 'utf-8');
30
- // 第一步:收集第三方 .env 中所有显式定义的 key
31
- const thirdPartyKeys = new Set();
32
- for (const line of content.split('\n')) {
33
- const trimmed = line.trim();
34
- if (!trimmed || trimmed.startsWith('#'))
35
- continue;
36
- const eqIdx = trimmed.indexOf('=');
37
- if (eqIdx === -1)
38
- continue;
39
- const key = trimmed.slice(0, eqIdx).trim();
40
- if (!key)
41
- continue;
42
- thirdPartyKeys.add(key);
43
- }
44
- // 第二步:如果某个 key 是第三方显式定义的,则覆盖(不管值是什么)
45
- for (const line of content.split('\n')) {
46
- const trimmed = line.trim();
47
- if (!trimmed || trimmed.startsWith('#'))
48
- continue;
49
- const eqIdx = trimmed.indexOf('=');
50
- if (eqIdx === -1)
51
- continue;
52
- const key = trimmed.slice(0, eqIdx).trim();
53
- let value = trimmed.slice(eqIdx + 1).trim();
54
- value = value.replace(/^["']|["']$/g, '');
55
- if (!key)
56
- continue;
57
- // 只有当第三方显式定义了这个 key 时才覆盖
58
- if (thirdPartyKeys.has(key)) {
59
- process.env[key] = value;
60
- }
61
- }
62
- }
63
- // 外部传入的 env 已在上一步保留,这里确保环境变量已正确设置
64
- for (const arg of process.argv) {
65
- const match = arg.match(/^--([A-Z_][A-Z0-9_]*)=(.+)$/);
66
- if (match) {
67
- process.env[match[1]] = match[2];
37
+ if (!existsSync(envPath))
38
+ return new Set();
39
+ // dotenv.parse 只解析文件,不写 process.env
40
+ const parsed = dotenv.parse(readFileSync(envPath));
41
+ const loadedKeys = new Set();
42
+ for (const [key, value] of Object.entries(parsed)) {
43
+ if (CLI_KEYS.has(key))
44
+ continue; // P1 CLI args 永不被覆盖
45
+ process.env[key] = value; // P2 第三方 .env 覆盖 P3 本地 .env
46
+ loadedKeys.add(key);
68
47
  }
48
+ return loadedKeys;
69
49
  }
70
- const requiredWhenEnabled = [
71
- 'MYSQL_HOST',
72
- 'MYSQL_USER',
73
- 'MYSQL_DATABASE',
74
- ];
75
- console.error(`[Config] MYSQL_HOST: ${process.env.MYSQL_HOST},
76
- MYSQL_USER: ${process.env.MYSQL_USER},
77
- MYSQL_DATABASE: ${process.env.MYSQL_DATABASE},
78
- EMBEDDING_SERVICE_URL: ${process.env.EMBEDDING_SERVICE_URL},
79
- MYSQL_SYMBOLS_TABLE: ${process.env.MYSQL_SYMBOLS_TABLE}
80
- `);
50
+ // ─── env 对象:getter 懒读取,确保 loadProjectDotenv() 后立即生效 ────────────
51
+ // 每次访问 env.xxx 都从 process.env 实时读取,避免快照冻结问题
81
52
  export const env = {
82
- mysqlHost: process.env.MYSQL_HOST ?? '127.0.0.1',
83
- mysqlPort: Number(process.env.MYSQL_PORT ?? '3306'),
84
- mysqlUser: process.env.MYSQL_USER ?? 'root',
85
- mysqlPassword: process.env.MYSQL_PASSWORD ?? '',
86
- mysqlDatabase: process.env.MYSQL_DATABASE ?? 'code_intelligence',
87
- /** symbols 表名,可通过 MYSQL_SYMBOLS_TABLE 环境变量配置 */
88
- mysqlSymbolsTable: process.env.MYSQL_SYMBOLS_TABLE ?? 'symbols',
89
- /** Phase 5:指向 Python FastAPI 嵌入服务根 URL,如 http://127.0.0.1:8765 */
90
- embeddingServiceUrl: (process.env.EMBEDDING_SERVICE_URL ?? '').trim(),
53
+ /** PostgreSQL 连接字符串,如 postgresql://user:pass@host:5432/db */
54
+ get pgUrl() {
55
+ return (process.env.PG_URL ??
56
+ 'postgresql://postgres:devpassword@127.0.0.1:5432/code_intelligence');
57
+ },
58
+ /** symbols 表名,可通过 SYMBOLS_TABLE 环境变量配置 */
59
+ get symbolsTable() {
60
+ return process.env.SYMBOLS_TABLE ?? 'symbols';
61
+ },
62
+ /** Python FastAPI 嵌入服务根 URL,如 http://127.0.0.1:8765 */
63
+ get embeddingServiceUrl() {
64
+ return (process.env.EMBEDDING_SERVICE_URL ?? '').trim();
65
+ },
91
66
  /** Redis 连接 URL,供 BullMQ embedding worker 使用 */
92
- redisUrl: process.env.REDIS_URL ?? 'redis://127.0.0.1:6379',
67
+ get redisUrl() {
68
+ return process.env.REDIS_URL ?? 'redis://127.0.0.1:6379';
69
+ },
93
70
  };
94
71
  export function validateEnv() {
95
- for (const key of requiredWhenEnabled) {
96
- if (!process.env[key]) {
97
- throw new Error(`Missing environment variable: ${key}`);
98
- }
72
+ if (!process.env.PG_URL) {
73
+ console.warn('[Config] PG_URL not set, using default: postgresql://postgres:devpassword@127.0.0.1:5432/code_intelligence');
99
74
  }
100
75
  }
@@ -0,0 +1,13 @@
1
+ import pg from 'pg';
2
+ import { env } from '../config/env.js';
3
+ const { Pool } = pg;
4
+ let pool = null;
5
+ export function getPool() {
6
+ if (!pool) {
7
+ pool = new Pool({
8
+ connectionString: env.pgUrl,
9
+ max: 10,
10
+ });
11
+ }
12
+ return pool;
13
+ }
package/dist/db/schema.js CHANGED
@@ -1,36 +1,51 @@
1
1
  /**
2
- * 动态生成数据库表结构 SQL,表名可通过环境变量配置
2
+ * 动态生成数据库表结构 SQL(PostgreSQL + pgvector),表名可通过环境变量配置
3
3
  */
4
4
  import { env } from '../config/env.js';
5
5
  import { DEFAULT_STATUS_ON_UPSERT } from '../config/symbolStatus.js';
6
+ /** 确保 vector 扩展已启用 */
7
+ export function getEnsureExtensionSQL() {
8
+ return `CREATE EXTENSION IF NOT EXISTS vector`;
9
+ }
6
10
  /** 获取 symbols 表的建表 SQL */
7
11
  export function getSymbolsTableSQL() {
8
- const tableName = env.mysqlSymbolsTable;
12
+ const tableName = env.symbolsTable;
9
13
  return `CREATE TABLE IF NOT EXISTS ${tableName} (
10
- id INT PRIMARY KEY AUTO_INCREMENT,
11
- name VARCHAR(255) NOT NULL,
12
- type ENUM('component', 'function', 'type', 'class', 'interface', 'hook') NOT NULL,
13
- category VARCHAR(255) NULL,
14
- path TEXT NOT NULL,
15
- description TEXT NULL,
16
- content MEDIUMTEXT NULL,
17
- meta JSON NULL,
18
- usage_count INT NOT NULL DEFAULT 0,
19
- embedding JSON NULL COMMENT 'Phase 5: L2-normalized vector from Python embedding service (e.g. 384-dim MiniLM)',
20
- insert_user VARCHAR(255) NOT NULL DEFAULT 'LorryIsLuRui',
21
- updated_user VARCHAR(255) NOT NULL DEFAULT 'LorryIsLuRui',
22
- created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
23
- updated_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
24
- file_hash VARCHAR(64) NULL COMMENT '文件内容 SHA256',
25
- semantic_hash VARCHAR(64) NULL COMMENT 'normalized AST 语义模板 SHA256',
26
- status TINYINT NOT NULL DEFAULT ${DEFAULT_STATUS_ON_UPSERT} COMMENT '状态: 0-offline(删除), 1-pending(待处理), 2-online(可用), 3-error(错误)',
27
- UNIQUE KEY uk_symbols_path_name (path(512), name(255)),
28
- INDEX idx_file_hash (file_hash),
29
- INDEX idx_semantic_hash (semantic_hash),
30
- INDEX idx_status (status)
14
+ id SERIAL PRIMARY KEY,
15
+ name VARCHAR(255) NOT NULL,
16
+ type VARCHAR(50) NOT NULL,
17
+ category VARCHAR(255),
18
+ path TEXT NOT NULL,
19
+ description TEXT,
20
+ content TEXT,
21
+ meta JSONB,
22
+ usage_count INT NOT NULL DEFAULT 0,
23
+ embedding vector(384),
24
+ insert_user VARCHAR(255) NOT NULL DEFAULT 'system',
25
+ updated_user VARCHAR(255) NOT NULL DEFAULT 'system',
26
+ created_at TIMESTAMP NOT NULL DEFAULT NOW(),
27
+ updated_at TIMESTAMP NOT NULL DEFAULT NOW(),
28
+ file_hash VARCHAR(64),
29
+ semantic_hash VARCHAR(64),
30
+ status SMALLINT NOT NULL DEFAULT ${DEFAULT_STATUS_ON_UPSERT},
31
+ CONSTRAINT uk_${tableName}_path_name UNIQUE (path, name),
32
+ CONSTRAINT chk_${tableName}_type CHECK (type IN ('component','function','type','class','interface','hook'))
31
33
  )`;
32
34
  }
33
- /** 获取所有建表 SQL(可一次性执行) */
35
+ /** 获取基础索引 SQL(不含 HNSW,HNSW 建议数据量 > 1000 后手动执行) */
36
+ export function getSymbolsIndexSQLs() {
37
+ const t = env.symbolsTable;
38
+ return [
39
+ `CREATE INDEX IF NOT EXISTS idx_file_hash ON ${t}(file_hash)`,
40
+ `CREATE INDEX IF NOT EXISTS idx_semantic_hash ON ${t}(semantic_hash)`,
41
+ `CREATE INDEX IF NOT EXISTS idx_status ON ${t}(status)`,
42
+ ];
43
+ }
44
+ /** 获取所有建表 SQL(extension + table + indexes,可逐条执行) */
34
45
  export function getAllTableSQLs() {
35
- return [getSymbolsTableSQL()];
46
+ return [
47
+ getEnsureExtensionSQL(),
48
+ getSymbolsTableSQL(),
49
+ ...getSymbolsIndexSQLs(),
50
+ ];
36
51
  }
@@ -101,7 +101,7 @@ export async function resolveCategory(rows, vecs) {
101
101
  const pros = rows.map(async (r, i) => {
102
102
  const { name } = r;
103
103
  const ruleCategory = inferCategoryFromPath(r.path) || inferCategoryFromName(name);
104
- console.error(`===from ruleCategory`, name, ruleCategory);
104
+ // console.error(`===from ruleCategory`, name, ruleCategory);
105
105
  if (ruleCategory) {
106
106
  return {
107
107
  ...r,
@@ -110,7 +110,7 @@ export async function resolveCategory(rows, vecs) {
110
110
  }
111
111
  // TODO: 这里有问题,embedding是语义模板向量,categoryEmbeddingsCache是单个词的向量,相似度必然是<0.3
112
112
  const emd = categoryFromEmbedding(vecs[i]);
113
- console.error(`===from categoryFromEmbedding`, name, emd);
113
+ // console.error(`===from categoryFromEmbedding`, name, emd);
114
114
  if (emd) {
115
115
  return {
116
116
  ...r,
@@ -118,7 +118,7 @@ export async function resolveCategory(rows, vecs) {
118
118
  };
119
119
  }
120
120
  const cateLlm = await categoryFromLLM(r.content);
121
- console.error(`===from categoryFromLLM`, name, cateLlm);
121
+ // console.error(`===from categoryFromLLM`, name, cateLlm);
122
122
  return {
123
123
  ...r,
124
124
  category: cateLlm,
@@ -182,7 +182,7 @@ function processDeclaration(exportName, decl, sf, projectRoot) {
182
182
  }
183
183
  return null;
184
184
  }
185
- const DEFAULT_IGNORE = [
185
+ export const DEFAULT_IGNORE = [
186
186
  '**/node_modules/**',
187
187
  '**/dist/**',
188
188
  '**/.git/**',
@@ -1,10 +1,12 @@
1
1
  import { env } from '../config/env.js';
2
- import { getSymbolsTableSQL } from '../db/schema.js';
2
+ import { getAllTableSQLs } from '../db/schema.js';
3
+ import { SYMBOL_STATUS } from '../config/symbolStatus.js';
3
4
  /**
4
5
  * 依赖表上 `(path, name)` 唯一键:新行插入,已存在则更新类型/描述/内容与 meta;**不**修改 `usage_count`。
5
6
  * @param rows 来自 `indexProject`;空数组时立即返回,不开启事务。
6
7
  * @param embeddings 与 `rows` 等长;某项为 `null` 表示本行不更新已有 `embedding`(新行则写入 NULL)。
7
- * @returns Promise 在提交成功时 resolve;任一行失败则整批回滚并抛出异常。
8
+ * - 有值 status 置为 online(2)
9
+ * - null → 新行写 pending(1),已有行保持原 status
8
10
  */
9
11
  export async function upsertSymbols(pool, rows, embeddings) {
10
12
  if (rows.length === 0)
@@ -12,30 +14,48 @@ export async function upsertSymbols(pool, rows, embeddings) {
12
14
  if (embeddings && embeddings.length !== rows.length) {
13
15
  throw new Error('upsertSymbols: embeddings length must match rows');
14
16
  }
15
- const actor = process.env.GITHUB_USERNAME?.trim() || 'LorryIsLuRui';
16
- await pool.query(getSymbolsTableSQL()); // 确保表存在
17
- const sql = `
18
- INSERT INTO ${env.mysqlSymbolsTable} (name, type, category, path, description, content, meta, insert_user, updated_user, embedding, semantic_hash, file_hash)
19
- VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
20
- ON DUPLICATE KEY UPDATE
21
- type = VALUES(type),
22
- category = VALUES(category),
23
- description = VALUES(description),
24
- content = VALUES(content),
25
- meta = VALUES(meta),
26
- updated_user = VALUES(updated_user),
27
- embedding = CASE WHEN VALUES(embedding) IS NOT NULL THEN VALUES(embedding) ELSE embedding END,
28
- semantic_hash = VALUES(semantic_hash),
29
- file_hash = VALUES(file_hash)
30
- `;
31
- const conn = await pool.getConnection();
17
+ const actor = process.env.GITHUB_USERNAME?.trim() || 'system';
18
+ const client = await pool.connect();
32
19
  try {
33
- await conn.beginTransaction();
20
+ // 确保 extension + 表 + 基础索引存在
21
+ for (const sql of getAllTableSQLs()) {
22
+ await client.query(sql);
23
+ }
24
+ await client.query('BEGIN');
25
+ const t = env.symbolsTable;
26
+ const sql = `
27
+ INSERT INTO ${t}
28
+ (name, type, category, path, description, content, meta,
29
+ insert_user, updated_user, embedding, semantic_hash, file_hash, status)
30
+ VALUES ($1, $2, $3, $4, $5, $6, $7::jsonb, $8, $9, $10::vector, $11, $12, $13)
31
+ ON CONFLICT (path, name) DO UPDATE SET
32
+ type = EXCLUDED.type,
33
+ category = EXCLUDED.category,
34
+ description = EXCLUDED.description,
35
+ content = EXCLUDED.content,
36
+ meta = EXCLUDED.meta,
37
+ updated_user = EXCLUDED.updated_user,
38
+ embedding = CASE
39
+ WHEN EXCLUDED.embedding IS NOT NULL THEN EXCLUDED.embedding
40
+ WHEN EXCLUDED.semantic_hash != ${t}.semantic_hash THEN NULL
41
+ ELSE ${t}.embedding
42
+ END,
43
+ semantic_hash = EXCLUDED.semantic_hash,
44
+ file_hash = EXCLUDED.file_hash,
45
+ status = CASE
46
+ WHEN EXCLUDED.embedding IS NOT NULL THEN ${SYMBOL_STATUS.ONLINE}
47
+ WHEN EXCLUDED.semantic_hash != ${t}.semantic_hash THEN ${SYMBOL_STATUS.PENDING}
48
+ ELSE ${t}.status
49
+ END,
50
+ updated_at = NOW()
51
+ `;
34
52
  for (let i = 0; i < rows.length; i++) {
35
53
  const r = rows[i];
36
54
  const emb = embeddings?.[i];
37
- const embJson = emb !== undefined && emb !== null ? JSON.stringify(emb) : null;
38
- await conn.query(sql, [
55
+ // pgvector 接受 "[x1,x2,...]" 格式字符串
56
+ const vecStr = emb != null ? `[${emb.join(',')}]` : null;
57
+ const statusVal = vecStr !== null ? SYMBOL_STATUS.ONLINE : SYMBOL_STATUS.PENDING;
58
+ await client.query(sql, [
39
59
  r.name,
40
60
  r.type,
41
61
  r.category,
@@ -45,18 +65,19 @@ export async function upsertSymbols(pool, rows, embeddings) {
45
65
  JSON.stringify(r.meta),
46
66
  actor,
47
67
  actor,
48
- embJson,
68
+ vecStr, // $10 → cast as vector, null 时写 NULL
49
69
  r.semantic_hash,
50
70
  r.file_hash,
71
+ statusVal,
51
72
  ]);
52
73
  }
53
- await conn.commit();
74
+ await client.query('COMMIT');
54
75
  }
55
76
  catch (e) {
56
- await conn.rollback();
77
+ await client.query('ROLLBACK');
57
78
  throw e;
58
79
  }
59
80
  finally {
60
- conn.release();
81
+ client.release();
61
82
  }
62
83
  }