@lorrylurui/code-intelligence-mcp 2.0.7 → 2.0.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/ci-index.js +46 -57
- package/dist/indexer/persistSymbols.js +32 -47
- package/dist/prompts/reusableCodeAdvisorPrompt.js +2 -1
- package/dist/repositories/chunkRepository.js +1 -1
- package/dist/repositories/symbolRepository.js +7 -5
- package/dist/services/recommendationService.js +24 -14
- package/dist/services/reconcileIndexedSymbols.js +40 -0
- package/dist/services/reindex.js +26 -7
- package/dist/workers/embeddingWorker.js +6 -6
- package/package.json +1 -1
- package/dist/.env +0 -24
package/dist/cli/ci-index.js
CHANGED
|
@@ -2,75 +2,64 @@
|
|
|
2
2
|
import { env } from '../config/env.js';
|
|
3
3
|
import { getPool } from '../db/postgres.js';
|
|
4
4
|
import { indexProject } from '../indexer/indexProject.js';
|
|
5
|
-
import {
|
|
5
|
+
import { upsertSymbols } from '../indexer/persistSymbols.js';
|
|
6
6
|
import { enqueueEmbeddingBatch, closeEmbeddingQueue, } from '../services/embeddingQueue.js';
|
|
7
|
+
import { markRemovedSymbolsOffline } from '../services/reconcileIndexedSymbols.js';
|
|
7
8
|
export async function runIncrementalIndex(opts) {
|
|
8
9
|
const { projectRoot, changedFiles, deletedFiles, renamedFiles = [] } = opts;
|
|
9
10
|
const pool = getPool();
|
|
10
11
|
const tableName = env.symbolsTable;
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
await pool.query(`UPDATE ${tableName} SET status = $1 WHERE path = $2`, [SYMBOL_STATUS.OFFLINE, file]);
|
|
14
|
-
console.error(`[ci-index] marked offline: ${file}`);
|
|
15
|
-
}
|
|
16
|
-
// 2. 重命名文件:更新path
|
|
17
|
-
for (const { from, to } of renamedFiles) {
|
|
18
|
-
await pool.query(`UPDATE ${tableName} SET path = $1 WHERE path = $2`, [
|
|
19
|
-
to,
|
|
20
|
-
from,
|
|
21
|
-
]);
|
|
22
|
-
console.error(`[ci-index] renamed: ${from} -> ${to}`);
|
|
23
|
-
}
|
|
24
|
-
// 3. 变更/新增文件:重新索引并标记 pending
|
|
25
|
-
if (changedFiles.length > 0) {
|
|
26
|
-
const rows = await indexProject({
|
|
12
|
+
const rows = changedFiles.length > 0
|
|
13
|
+
? await indexProject({
|
|
27
14
|
projectRoot,
|
|
28
15
|
globPatterns: changedFiles,
|
|
29
|
-
})
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
row.path
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
JSON.stringify(row.meta),
|
|
58
|
-
row.file_hash,
|
|
59
|
-
row.semantic_hash,
|
|
60
|
-
DEFAULT_STATUS_ON_UPSERT,
|
|
61
|
-
]);
|
|
62
|
-
console.error(`[ci-index] upserted: ${row.path}:${row.name}`);
|
|
16
|
+
})
|
|
17
|
+
: [];
|
|
18
|
+
const nullPayload = rows.map(() => null);
|
|
19
|
+
const hashes = [
|
|
20
|
+
...new Set(rows.map((r) => r.semantic_hash).filter(Boolean)),
|
|
21
|
+
];
|
|
22
|
+
const client = await pool.connect();
|
|
23
|
+
try {
|
|
24
|
+
await client.query('BEGIN');
|
|
25
|
+
// 1. 删除文件:复用 reconcile 的整文件 offline 语义
|
|
26
|
+
if (deletedFiles.length > 0) {
|
|
27
|
+
await markRemovedSymbolsOffline(client, deletedFiles, []);
|
|
28
|
+
for (const file of deletedFiles) {
|
|
29
|
+
console.error(`[ci-index] marked offline: ${file}`);
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
// 2. 重命名文件:更新path
|
|
33
|
+
for (const { from, to } of renamedFiles) {
|
|
34
|
+
await client.query(`UPDATE ${tableName} SET path = $1 WHERE path = $2`, [to, from]);
|
|
35
|
+
console.error(`[ci-index] renamed: ${from} -> ${to}`);
|
|
36
|
+
}
|
|
37
|
+
// 3. 变更/新增文件:重新索引并标记 pending
|
|
38
|
+
if (rows.length > 0) {
|
|
39
|
+
await upsertSymbols(client, rows, nullPayload);
|
|
40
|
+
for (const row of rows) {
|
|
41
|
+
console.error(`[ci-index] upserted: ${row.path}:${row.name}`);
|
|
42
|
+
}
|
|
43
|
+
await markRemovedSymbolsOffline(client, changedFiles, rows);
|
|
63
44
|
}
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
45
|
+
await client.query('COMMIT');
|
|
46
|
+
}
|
|
47
|
+
catch (error) {
|
|
48
|
+
await client.query('ROLLBACK');
|
|
49
|
+
throw error;
|
|
50
|
+
}
|
|
51
|
+
finally {
|
|
52
|
+
client.release();
|
|
53
|
+
}
|
|
54
|
+
try {
|
|
68
55
|
if (hashes.length > 0) {
|
|
69
56
|
await enqueueEmbeddingBatch(hashes, env.symbolsTable);
|
|
70
57
|
console.error(`[ci-index] enqueued ${hashes.length} unique semantic hashes for embedding`);
|
|
71
58
|
}
|
|
59
|
+
await closeEmbeddingQueue();
|
|
60
|
+
}
|
|
61
|
+
finally {
|
|
62
|
+
await pool.end();
|
|
72
63
|
}
|
|
73
|
-
await closeEmbeddingQueue();
|
|
74
|
-
await pool.end();
|
|
75
64
|
console.error(`[ci-index] processed ${deletedFiles.length} deletions, ${renamedFiles.length} renames, ${changedFiles.length} changes`);
|
|
76
65
|
}
|
|
@@ -1,29 +1,22 @@
|
|
|
1
1
|
import { env } from '../config/env.js';
|
|
2
|
-
import { getAllTableSQLs } from '../db/schema.js';
|
|
3
2
|
import { SYMBOL_STATUS } from '../config/symbolStatus.js';
|
|
4
3
|
/**
|
|
5
4
|
* 依赖表上 `(path, name)` 唯一键:新行插入,已存在则更新类型/描述/内容与 meta;**不**修改 `usage_count`。
|
|
5
|
+
* 事务与连接生命周期由调用方管理。
|
|
6
6
|
* @param rows 来自 `indexProject`;空数组时立即返回,不开启事务。
|
|
7
7
|
* @param embeddings 与 `rows` 等长;某项为 `null` 表示本行不更新已有 `embedding`(新行则写入 NULL)。
|
|
8
8
|
* - 有值 → status 置为 online(2)
|
|
9
9
|
* - null → 新行写 pending(1),已有行保持原 status
|
|
10
10
|
*/
|
|
11
|
-
export async function upsertSymbols(
|
|
11
|
+
export async function upsertSymbols(client, rows, embeddings) {
|
|
12
12
|
if (rows.length === 0)
|
|
13
13
|
return;
|
|
14
14
|
if (embeddings && embeddings.length !== rows.length) {
|
|
15
15
|
throw new Error('upsertSymbols: embeddings length must match rows');
|
|
16
16
|
}
|
|
17
17
|
const actor = process.env.GITHUB_USERNAME?.trim() || 'system';
|
|
18
|
-
const
|
|
19
|
-
|
|
20
|
-
// 确保 extension + 表 + 基础索引存在
|
|
21
|
-
for (const sql of getAllTableSQLs()) {
|
|
22
|
-
await client.query(sql);
|
|
23
|
-
}
|
|
24
|
-
await client.query('BEGIN');
|
|
25
|
-
const t = env.symbolsTable;
|
|
26
|
-
const sql = `
|
|
18
|
+
const t = env.symbolsTable;
|
|
19
|
+
const sql = `
|
|
27
20
|
INSERT INTO ${t}
|
|
28
21
|
(name, type, category, path, description, content, meta,
|
|
29
22
|
insert_user, updated_user, embedding, semantic_hash, file_hash, status)
|
|
@@ -36,48 +29,40 @@ export async function upsertSymbols(pool, rows, embeddings) {
|
|
|
36
29
|
meta = EXCLUDED.meta,
|
|
37
30
|
updated_user = EXCLUDED.updated_user,
|
|
38
31
|
embedding = CASE
|
|
39
|
-
WHEN EXCLUDED.embedding IS NOT NULL THEN EXCLUDED.embedding
|
|
40
|
-
WHEN EXCLUDED.semantic_hash != ${t}.semantic_hash THEN NULL
|
|
41
|
-
ELSE ${t}.embedding
|
|
32
|
+
WHEN EXCLUDED.embedding IS NOT NULL THEN EXCLUDED.embedding -- 本次带了新向量,直接使用
|
|
33
|
+
WHEN EXCLUDED.semantic_hash != ${t}.semantic_hash THEN NULL -- 结构变了,旧向量作废,等重算
|
|
34
|
+
ELSE ${t}.embedding -- 结构未变,复用旧向量
|
|
42
35
|
END,
|
|
43
36
|
semantic_hash = EXCLUDED.semantic_hash,
|
|
44
37
|
file_hash = EXCLUDED.file_hash,
|
|
45
38
|
status = CASE
|
|
46
|
-
WHEN EXCLUDED.embedding IS NOT NULL THEN ${SYMBOL_STATUS.ONLINE}
|
|
47
|
-
WHEN EXCLUDED.semantic_hash != ${t}.semantic_hash THEN ${SYMBOL_STATUS.PENDING}
|
|
48
|
-
|
|
39
|
+
WHEN EXCLUDED.embedding IS NOT NULL THEN ${SYMBOL_STATUS.ONLINE} -- 本次带了新向量 → 直接 online
|
|
40
|
+
WHEN EXCLUDED.semantic_hash != ${t}.semantic_hash THEN ${SYMBOL_STATUS.PENDING} -- 结构变了,需重新 embedding → pending
|
|
41
|
+
WHEN ${t}.embedding IS NOT NULL THEN ${SYMBOL_STATUS.ONLINE} -- 结构未变且已有向量(含 offline 恢复)→ online
|
|
42
|
+
ELSE ${SYMBOL_STATUS.PENDING} -- 结构未变但无向量(首次 or 之前失败)→ pending
|
|
49
43
|
END,
|
|
50
44
|
updated_at = NOW()
|
|
51
45
|
`;
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
}
|
|
74
|
-
await client.query('COMMIT');
|
|
75
|
-
}
|
|
76
|
-
catch (e) {
|
|
77
|
-
await client.query('ROLLBACK');
|
|
78
|
-
throw e;
|
|
79
|
-
}
|
|
80
|
-
finally {
|
|
81
|
-
client.release();
|
|
46
|
+
for (let i = 0; i < rows.length; i++) {
|
|
47
|
+
const r = rows[i];
|
|
48
|
+
const emb = embeddings?.[i];
|
|
49
|
+
// pgvector 接受 "[x1,x2,...]" 格式字符串
|
|
50
|
+
const vecStr = emb != null ? `[${emb.join(',')}]` : null;
|
|
51
|
+
const statusVal = vecStr !== null ? SYMBOL_STATUS.ONLINE : SYMBOL_STATUS.PENDING;
|
|
52
|
+
await client.query(sql, [
|
|
53
|
+
r.name,
|
|
54
|
+
r.type,
|
|
55
|
+
r.category,
|
|
56
|
+
r.path,
|
|
57
|
+
r.description,
|
|
58
|
+
r.content,
|
|
59
|
+
JSON.stringify(r.meta),
|
|
60
|
+
actor,
|
|
61
|
+
actor,
|
|
62
|
+
vecStr, // $10 → cast as vector, null 时写 NULL
|
|
63
|
+
r.semantic_hash,
|
|
64
|
+
r.file_hash,
|
|
65
|
+
statusVal,
|
|
66
|
+
]);
|
|
82
67
|
}
|
|
83
68
|
}
|
|
@@ -65,7 +65,7 @@ const REUSABLE_CODE_ADVISOR_MARKDOWN = `# 可复用代码推荐
|
|
|
65
65
|
> 输出上述模板后**等待用户在聊天框输入回复**,识别规则:
|
|
66
66
|
> - 用户输入 **"1"、"采纳"、"采纳推荐"、"ok"、"好的"** 或类似确认词 → 从上方输出文本中读取 \`symbolId:<id>\` 那一行的值,立即调用 \`inc_usage\` 工具传入该 id,调用成功后回复"✓ 已记录使用,可直接集成"
|
|
67
67
|
> - 用户输入 **"2"、"取消"、"不用了"** 或类似否定词 → 回复"好的,已取消",停止
|
|
68
|
-
> -
|
|
68
|
+
> - 用户输入其他内容(如追问细节、props、最小接入方式)→ 可以继续补充说明,但回答结尾**必须再次原样展示**"是否采纳"的两个选项,不得省略
|
|
69
69
|
|
|
70
70
|
无结果时:
|
|
71
71
|
|
|
@@ -85,6 +85,7 @@ const REUSABLE_CODE_ADVISOR_MARKDOWN = `# 可复用代码推荐
|
|
|
85
85
|
> 输出上述模板后**等待用户在聊天框输入回复**,识别规则:
|
|
86
86
|
> - 用户输入 **"1"、"新建"、"帮我创建"** 或类似确认词 → 进入新建流程,引导用户确认最小接口设计
|
|
87
87
|
> - 用户输入 **"2"、"取消"、"不用了"** → 回复"好的,已取消",停止
|
|
88
|
+
> - 用户输入其他内容(如追问为何没找到、想先看候选)→ 可以继续解释,但回答结尾**必须再次原样展示**"是否采纳"的两个选项,不得省略
|
|
88
89
|
`;
|
|
89
90
|
export function registerReusableCodeAdvisorPrompt(server) {
|
|
90
91
|
server.prompt('reusable-code-advisor', REUSABLE_CODE_ADVISOR_DESCRIPTION, {
|
|
@@ -146,7 +146,7 @@ export class SymbolRepository {
|
|
|
146
146
|
path ILIKE $1 OR
|
|
147
147
|
meta::text ILIKE $1
|
|
148
148
|
)
|
|
149
|
-
AND status = $2
|
|
149
|
+
AND status = $2::smallint
|
|
150
150
|
`;
|
|
151
151
|
if (tokens.length) {
|
|
152
152
|
const tokenClauses = tokens.map((token) => {
|
|
@@ -165,7 +165,7 @@ export class SymbolRepository {
|
|
|
165
165
|
meta::text ILIKE $1 OR
|
|
166
166
|
(${tokenClauses.join(' OR ')})
|
|
167
167
|
)
|
|
168
|
-
AND status = $2
|
|
168
|
+
AND status = $2::smallint
|
|
169
169
|
`;
|
|
170
170
|
}
|
|
171
171
|
if (type) {
|
|
@@ -212,7 +212,7 @@ export class SymbolRepository {
|
|
|
212
212
|
1 - (embedding <=> $1::vector) AS similarity
|
|
213
213
|
FROM ${env.symbolsTable}
|
|
214
214
|
WHERE embedding IS NOT NULL
|
|
215
|
-
|
|
215
|
+
AND status = $2::smallint
|
|
216
216
|
`;
|
|
217
217
|
if (opts?.type) {
|
|
218
218
|
params.push(opts.type);
|
|
@@ -248,8 +248,9 @@ export class SymbolRepository {
|
|
|
248
248
|
SELECT id, name, type, category, path, description, content, meta::text AS meta, usage_count, created_at
|
|
249
249
|
FROM ${env.symbolsTable}
|
|
250
250
|
WHERE name = $1
|
|
251
|
+
AND status = $2::smallint
|
|
251
252
|
LIMIT 1
|
|
252
|
-
`, [name]);
|
|
253
|
+
`, [name, SEARCHABLE_STATUS]);
|
|
253
254
|
console.error('[code-intelligence-mcp] repository.getByName.db table=%s rows=%s', env.symbolsTable, String(rows.length));
|
|
254
255
|
if (rows.length === 0) {
|
|
255
256
|
return null;
|
|
@@ -310,8 +311,9 @@ export class SymbolRepository {
|
|
|
310
311
|
let sql = `
|
|
311
312
|
SELECT id, name, type, category, path, description, content, meta::text AS meta, usage_count, created_at
|
|
312
313
|
FROM ${env.symbolsTable}
|
|
313
|
-
|
|
314
|
+
WHERE status = $1::smallint
|
|
314
315
|
`;
|
|
316
|
+
params.push(SEARCHABLE_STATUS);
|
|
315
317
|
if (type) {
|
|
316
318
|
params.push(type);
|
|
317
319
|
sql += ` AND type = $${params.length}`;
|
|
@@ -552,26 +552,36 @@ export class RecommendationService {
|
|
|
552
552
|
similarity: searchResults.find((item) => item.symbol.id === symbol.id)?.similarity ?? 0.55,
|
|
553
553
|
})), queryVariant)
|
|
554
554
|
: rankSymbols(queryVariant, combined);
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
const
|
|
559
|
-
attempt.qualifiedCount = qualifiedRanked.length;
|
|
560
|
-
if (qualifiedRanked.length === 0) {
|
|
561
|
-
attempt.skippedReason = SKIPPED_REASON.NO_QUALIFIED;
|
|
562
|
-
}
|
|
563
|
-
const prioritizedRanked = qualifiedRanked
|
|
564
|
-
.map((item) => {
|
|
555
|
+
// 优先级预排序:仅依赖 name/path,无需 meta,前置到详情补查之前。
|
|
556
|
+
// 目的:确保补查的 Top-K 是优先级调整后最可能命中的候选,
|
|
557
|
+
// 避免高语义分但字面命中弱的候选占据补查名额,遗漏字面强命中的候选。
|
|
558
|
+
const priorityScored = ranked.map((item) => {
|
|
565
559
|
const adjusted = computeRecommendationPriority(item, queryVariant);
|
|
566
560
|
return {
|
|
567
561
|
item,
|
|
568
562
|
adjustedScore: adjusted.score,
|
|
569
563
|
adjustedReason: adjusted.reason,
|
|
570
564
|
};
|
|
571
|
-
})
|
|
572
|
-
|
|
573
|
-
|
|
574
|
-
|
|
565
|
+
});
|
|
566
|
+
priorityScored.sort((a, b) => b.adjustedScore - a.adjustedScore);
|
|
567
|
+
// 对优先级预排序后的 Top-K 做详情补查(getByName 补全完整 meta)
|
|
568
|
+
const enriched = await this.enrichTopCandidatesWithDetail(priorityScored.map((e) => e.item));
|
|
569
|
+
attempt.detailEnrichedCount = enriched.enrichedCount;
|
|
570
|
+
// 将补查结果回填到 priorityScored,保持优先级排序
|
|
571
|
+
const enrichedPriorityScored = enriched.ranked.map((item, idx) => ({
|
|
572
|
+
item,
|
|
573
|
+
adjustedScore: priorityScored[idx]?.adjustedScore ?? item.score,
|
|
574
|
+
adjustedReason: priorityScored[idx]?.adjustedReason ?? item.reason.summary,
|
|
575
|
+
}));
|
|
576
|
+
// 质量门控:score 阈值 + requiredProps/Hooks 命中校验(依赖完整 meta,必须在补查之后)
|
|
577
|
+
const qualifiedRanked = enrichedPriorityScored.filter((entry) => isStrongEnoughRecommendation(entry.item, queryVariant, queriedBy, requiredProps, requiredHooks));
|
|
578
|
+
attempt.qualifiedCount = qualifiedRanked.length;
|
|
579
|
+
if (qualifiedRanked.length === 0) {
|
|
580
|
+
attempt.skippedReason = SKIPPED_REASON.NO_QUALIFIED;
|
|
581
|
+
}
|
|
582
|
+
// 已按优先级排序,直接构建候选结果
|
|
583
|
+
const candidates = qualifiedRanked.map((entry) => toCandidate(entry.item.symbol, entry.adjustedScore, entry.adjustedReason, requiredProps, requiredHooks));
|
|
584
|
+
console.error('[code-intelligence-mcp] recommendComponent.rank query=%s queriedBy=%s enriched=%s qualified=%s candidates=%s', queryVariant, queriedBy, String(enrichedPriorityScored.length), String(qualifiedRanked.length), String(candidates.length));
|
|
575
585
|
return candidates;
|
|
576
586
|
}
|
|
577
587
|
logAttemptCheckpoint(stage, attempt) {
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
import { env } from '../config/env.js';
|
|
2
|
+
import { SYMBOL_STATUS } from '../config/symbolStatus.js';
|
|
3
|
+
function buildPathToSymbolNames(rows) {
|
|
4
|
+
const pathToNames = new Map();
|
|
5
|
+
for (const row of rows) {
|
|
6
|
+
const names = pathToNames.get(row.path) ?? new Set();
|
|
7
|
+
names.add(row.name);
|
|
8
|
+
pathToNames.set(row.path, names);
|
|
9
|
+
}
|
|
10
|
+
return new Map([...pathToNames.entries()].map(([path, names]) => [path, [...names]]));
|
|
11
|
+
}
|
|
12
|
+
async function markFileSymbolsOffline(db, path) {
|
|
13
|
+
await db.query(`UPDATE ${env.symbolsTable}
|
|
14
|
+
SET status = $1::smallint, file_hash = NULL, updated_at = NOW()
|
|
15
|
+
WHERE path = $2 AND status != $1::smallint`, [SYMBOL_STATUS.OFFLINE, path]);
|
|
16
|
+
}
|
|
17
|
+
/**
|
|
18
|
+
* 将指定文件集合中已消失的 symbol 标记为 offline。
|
|
19
|
+
* - `rows` 为空(整文件被删)→ 该文件所有 symbol 下线;
|
|
20
|
+
* - `rows` 非空 → 仅将不再出现于 `rows` 的 symbol 下线;
|
|
21
|
+
* - 重新出现的 symbol 状态恢复由 upsertSymbols 负责(hash 没变且有 embedding → online,否则 pending),此处不重复处理。
|
|
22
|
+
* forceRebuild 场景由上游先统一清空 embedding/status,此函数不负责强制重算策略。
|
|
23
|
+
*/
|
|
24
|
+
export async function markRemovedSymbolsOffline(db, filePaths, rows) {
|
|
25
|
+
if (filePaths.length === 0)
|
|
26
|
+
return;
|
|
27
|
+
const currentNamesByPath = buildPathToSymbolNames(rows);
|
|
28
|
+
for (const path of filePaths) {
|
|
29
|
+
const currentNames = currentNamesByPath.get(path) ?? [];
|
|
30
|
+
if (currentNames.length > 0) {
|
|
31
|
+
// 当前文件中已消失的 symbol 标记为 offline;
|
|
32
|
+
await db.query(`UPDATE ${env.symbolsTable}
|
|
33
|
+
SET status = $1::smallint, file_hash = NULL, updated_at = NOW()
|
|
34
|
+
WHERE path = $2 AND NOT (name = ANY($3)) AND status != $1::smallint`, [SYMBOL_STATUS.OFFLINE, path, currentNames]);
|
|
35
|
+
continue;
|
|
36
|
+
}
|
|
37
|
+
// 没有symbol,表示所有内容都删除下线
|
|
38
|
+
await markFileSymbolsOffline(db, path);
|
|
39
|
+
}
|
|
40
|
+
}
|
package/dist/services/reindex.js
CHANGED
|
@@ -9,6 +9,7 @@ import { upsertSymbols } from '../indexer/persistSymbols.js';
|
|
|
9
9
|
import { computeFileHash } from '../indexer/tsAstNormalizer.js';
|
|
10
10
|
import { getRelativePathForDisplay } from '../indexer/heuristics.js';
|
|
11
11
|
import { enqueueEmbeddingBatch, closeEmbeddingQueue, } from '../services/embeddingQueue.js';
|
|
12
|
+
import { markRemovedSymbolsOffline } from './reconcileIndexedSymbols.js';
|
|
12
13
|
import { SYMBOL_STATUS } from '../config/symbolStatus.js';
|
|
13
14
|
function isCallerDebugEnabled() {
|
|
14
15
|
return /^(1|true|yes|on)$/i.test(process.env.DEBUG_CALLERS ?? '');
|
|
@@ -108,15 +109,33 @@ export async function runReindex(options = {}) {
|
|
|
108
109
|
const pendingHashes = [
|
|
109
110
|
...new Set(rows.map((r) => r.semantic_hash).filter(Boolean)),
|
|
110
111
|
];
|
|
112
|
+
const relPathsForIndexedFiles = filesToIndex.map((file) => getRelativePathForDisplay(projectRoot, file));
|
|
111
113
|
if (!dryRun) {
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
await
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
114
|
+
const client = await pool.connect();
|
|
115
|
+
try {
|
|
116
|
+
await client.query('BEGIN');
|
|
117
|
+
// forceRebuild:先清空 DB 中已有的 embedding,使 worker cache check 必然 miss;
|
|
118
|
+
// file_hash 一并重置,确保本次重建与后续普通 reindex 都不会复用旧缓存判定。
|
|
119
|
+
if (forceRebuild && pendingHashes.length > 0) {
|
|
120
|
+
await client.query(`UPDATE ${env.symbolsTable}
|
|
121
|
+
SET embedding = NULL, status = $1::smallint, file_hash = NULL
|
|
122
|
+
WHERE semantic_hash = ANY($2)`, [SYMBOL_STATUS.PENDING, pendingHashes]);
|
|
123
|
+
console.error(`[reindex] forceRebuild: cleared embeddings + file_hash for ${pendingHashes.length} semantic_hash(es)`);
|
|
124
|
+
}
|
|
125
|
+
// 能复用 status=online
|
|
126
|
+
// 结构变了,不能复用 status=pending embedding=null
|
|
127
|
+
await upsertSymbols(client, rows, nullPayload);
|
|
128
|
+
// 处理 file内 symbol下线 或 整个file所有symbols下线
|
|
129
|
+
await markRemovedSymbolsOffline(client, relPathsForIndexedFiles, rows);
|
|
130
|
+
await client.query('COMMIT');
|
|
131
|
+
}
|
|
132
|
+
catch (error) {
|
|
133
|
+
await client.query('ROLLBACK');
|
|
134
|
+
throw error;
|
|
135
|
+
}
|
|
136
|
+
finally {
|
|
137
|
+
client.release();
|
|
118
138
|
}
|
|
119
|
-
await upsertSymbols(pool, rows, nullPayload);
|
|
120
139
|
if (pendingHashes.length > 0) {
|
|
121
140
|
await enqueueEmbeddingBatch(pendingHashes, env.symbolsTable);
|
|
122
141
|
console.error(`[reindex] enqueued ${pendingHashes.length} semantic_hash(es) → worker will handle embedding asynchronously`);
|
|
@@ -31,7 +31,7 @@ async function processEmbedJob(job, pool) {
|
|
|
31
31
|
const ts = () => new Date().toISOString();
|
|
32
32
|
// Step 1: 缓存命中检查 —— 相同 semantic_hash 已有 online 向量
|
|
33
33
|
const { rows: cached } = await pool.query(`SELECT embedding FROM ${table}
|
|
34
|
-
WHERE semantic_hash = $1 AND status = $2 AND embedding IS NOT NULL
|
|
34
|
+
WHERE semantic_hash = $1 AND status = $2::smallint AND embedding IS NOT NULL
|
|
35
35
|
LIMIT 1`, [semanticHash, SYMBOL_STATUS.ONLINE]);
|
|
36
36
|
let vector;
|
|
37
37
|
if (cached.length > 0) {
|
|
@@ -43,8 +43,8 @@ async function processEmbedJob(job, pool) {
|
|
|
43
43
|
: cached[0].embedding;
|
|
44
44
|
// cache hit 时只需把 pending 行的向量补齐(有可能是新增的同语义符号)
|
|
45
45
|
const cacheResult = await pool.query(`UPDATE ${table}
|
|
46
|
-
SET embedding = $1::vector, status = $2
|
|
47
|
-
WHERE semantic_hash = $3 AND status = $4`, [
|
|
46
|
+
SET embedding = $1::vector, status = $2::smallint
|
|
47
|
+
WHERE semantic_hash = $3 AND status = $4::smallint`, [
|
|
48
48
|
`[${vector.join(',')}]`,
|
|
49
49
|
SYMBOL_STATUS.ONLINE,
|
|
50
50
|
semanticHash,
|
|
@@ -56,7 +56,7 @@ async function processEmbedJob(job, pool) {
|
|
|
56
56
|
// Cache miss: 取一条 pending 行做 embedding
|
|
57
57
|
const { rows: pending } = await pool.query(`SELECT name, type, category, path, description, content, meta
|
|
58
58
|
FROM ${table}
|
|
59
|
-
WHERE semantic_hash = $1 AND status = $2
|
|
59
|
+
WHERE semantic_hash = $1 AND status = $2::smallint
|
|
60
60
|
LIMIT 1`, [semanticHash, SYMBOL_STATUS.PENDING]);
|
|
61
61
|
if (pending.length === 0) {
|
|
62
62
|
// 所有行已被并发 worker 处理,幂等退出
|
|
@@ -75,8 +75,8 @@ async function processEmbedJob(job, pool) {
|
|
|
75
75
|
const resolvedCategory = resolvedRow.category ?? null;
|
|
76
76
|
// Step 2: 批量写入 —— 覆盖所有相同 semantic_hash 的 pending 行
|
|
77
77
|
const result = await pool.query(`UPDATE ${table}
|
|
78
|
-
SET embedding = $1::vector, status = $2, category = COALESCE($3, category)
|
|
79
|
-
WHERE semantic_hash = $4 AND status = $5`, [
|
|
78
|
+
SET embedding = $1::vector, status = $2::smallint, category = COALESCE($3, category)
|
|
79
|
+
WHERE semantic_hash = $4 AND status = $5::smallint`, [
|
|
80
80
|
`[${vector.join(',')}]`,
|
|
81
81
|
SYMBOL_STATUS.ONLINE,
|
|
82
82
|
resolvedCategory,
|
package/package.json
CHANGED
package/dist/.env
DELETED
|
@@ -1,24 +0,0 @@
|
|
|
1
|
-
MYSQL_ENABLED=true
|
|
2
|
-
MYSQL_HOST=127.0.0.1
|
|
3
|
-
MYSQL_PORT=3306
|
|
4
|
-
MYSQL_USER=root
|
|
5
|
-
# 与 docker-compose.yml 中 MYSQL_ROOT_PASSWORD 一致(默认 devpassword,仅本地开发)
|
|
6
|
-
MYSQL_PASSWORD=devpassword
|
|
7
|
-
MYSQL_DATABASE=code_intelligence
|
|
8
|
-
MYSQL_SYMBOLS_TABLE=symbols
|
|
9
|
-
EMBEDDING_SERVICE_URL=http://127.0.0.1:8765
|
|
10
|
-
|
|
11
|
-
# MCP Server 传输模式:stdio | http
|
|
12
|
-
MCP_TRANSPORT=stdio
|
|
13
|
-
# HTTP 模式监听地址,0.0.0.0 支持局域网访问,公网用公网IP
|
|
14
|
-
MCP_HOST=0.0.0.0
|
|
15
|
-
# HTTP 模式端口
|
|
16
|
-
MCP_PORT=3000
|
|
17
|
-
# Docker Compose 可选:与 .env 同步(compose 会读环境变量覆盖默认值)
|
|
18
|
-
# MYSQL_ROOT_PASSWORD=devpassword
|
|
19
|
-
|
|
20
|
-
# Phase 2 索引(npm run index)
|
|
21
|
-
# INDEX_ROOT=
|
|
22
|
-
# INDEX_GLOB=src/**/*.{ts,tsx}
|
|
23
|
-
# INDEX_IGNORE=**/generated/**,**/*.spec.ts
|
|
24
|
-
GIT_CI_MOCK_SERVER=false
|