kiri-mcp-server 0.9.6 → 0.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +144 -13
- package/dist/client/cli.js +68 -0
- package/dist/client/cli.js.map +1 -0
- package/dist/client/index.js +5 -0
- package/dist/client/index.js.map +1 -0
- package/dist/eval/metrics.js +47 -0
- package/dist/eval/metrics.js.map +1 -0
- package/dist/indexer/cli.js +362 -0
- package/dist/indexer/cli.js.map +1 -0
- package/dist/indexer/codeintel.js +182 -0
- package/dist/indexer/codeintel.js.map +1 -0
- package/dist/indexer/git.js +30 -0
- package/dist/indexer/git.js.map +1 -0
- package/dist/indexer/language.js +34 -0
- package/dist/indexer/language.js.map +1 -0
- package/dist/indexer/pipeline/filters/denylist.js +71 -0
- package/dist/indexer/pipeline/filters/denylist.js.map +1 -0
- package/dist/indexer/schema.js +101 -0
- package/dist/indexer/schema.js.map +1 -0
- package/dist/package.json +14 -1
- package/dist/server/bootstrap.js +19 -0
- package/dist/server/bootstrap.js.map +1 -0
- package/dist/server/context.js +1 -0
- package/dist/server/context.js.map +1 -0
- package/dist/server/fallbacks/degradeController.js +69 -0
- package/dist/server/fallbacks/degradeController.js.map +1 -0
- package/dist/server/handlers.js +1268 -0
- package/dist/server/handlers.js.map +1 -0
- package/dist/server/main.js +151 -0
- package/dist/server/main.js.map +1 -0
- package/dist/server/observability/metrics.js +56 -0
- package/dist/server/observability/metrics.js.map +1 -0
- package/dist/server/observability/tracing.js +58 -0
- package/dist/server/observability/tracing.js.map +1 -0
- package/dist/server/rpc.js +477 -0
- package/dist/server/rpc.js.map +1 -0
- package/dist/server/runtime.js +47 -0
- package/dist/server/runtime.js.map +1 -0
- package/dist/server/scoring.js +116 -0
- package/dist/server/scoring.js.map +1 -0
- package/dist/server/stdio.js +76 -0
- package/dist/server/stdio.js.map +1 -0
- package/dist/shared/duckdb.js +119 -0
- package/dist/shared/duckdb.js.map +1 -0
- package/dist/shared/embedding.js +98 -0
- package/dist/shared/embedding.js.map +1 -0
- package/dist/shared/index.js +9 -0
- package/dist/shared/index.js.map +1 -0
- package/dist/shared/security/config.js +64 -0
- package/dist/shared/security/config.js.map +1 -0
- package/dist/shared/security/masker.js +56 -0
- package/dist/shared/security/masker.js.map +1 -0
- package/dist/shared/tokenizer.js +4 -0
- package/dist/shared/tokenizer.js.map +1 -0
- package/dist/shared/utils/simpleYaml.js +89 -0
- package/dist/shared/utils/simpleYaml.js.map +1 -0
- package/dist/src/client/proxy.js +83 -13
- package/dist/src/client/proxy.js.map +1 -1
- package/dist/src/client/start-daemon.d.ts.map +1 -1
- package/dist/src/client/start-daemon.js +2 -1
- package/dist/src/client/start-daemon.js.map +1 -1
- package/dist/src/daemon/daemon.js +97 -18
- package/dist/src/daemon/daemon.js.map +1 -1
- package/dist/src/daemon/socket.d.ts +6 -4
- package/dist/src/daemon/socket.d.ts.map +1 -1
- package/dist/src/daemon/socket.js +62 -18
- package/dist/src/daemon/socket.js.map +1 -1
- package/dist/src/indexer/cli.d.ts +1 -0
- package/dist/src/indexer/cli.d.ts.map +1 -1
- package/dist/src/indexer/cli.js +503 -257
- package/dist/src/indexer/cli.js.map +1 -1
- package/dist/src/indexer/codeintel.d.ts +1 -1
- package/dist/src/indexer/codeintel.d.ts.map +1 -1
- package/dist/src/indexer/codeintel.js +296 -3
- package/dist/src/indexer/codeintel.js.map +1 -1
- package/dist/src/indexer/dart/analyze.d.ts +29 -0
- package/dist/src/indexer/dart/analyze.d.ts.map +1 -0
- package/dist/src/indexer/dart/analyze.js +452 -0
- package/dist/src/indexer/dart/analyze.js.map +1 -0
- package/dist/src/indexer/dart/client.d.ts +113 -0
- package/dist/src/indexer/dart/client.d.ts.map +1 -0
- package/dist/src/indexer/dart/client.js +444 -0
- package/dist/src/indexer/dart/client.js.map +1 -0
- package/dist/src/indexer/dart/config.d.ts +36 -0
- package/dist/src/indexer/dart/config.d.ts.map +1 -0
- package/dist/src/indexer/dart/config.js +62 -0
- package/dist/src/indexer/dart/config.js.map +1 -0
- package/dist/src/indexer/dart/dependencies.d.ts +17 -0
- package/dist/src/indexer/dart/dependencies.d.ts.map +1 -0
- package/dist/src/indexer/dart/dependencies.js +102 -0
- package/dist/src/indexer/dart/dependencies.js.map +1 -0
- package/dist/src/indexer/dart/pathKey.d.ts +40 -0
- package/dist/src/indexer/dart/pathKey.d.ts.map +1 -0
- package/dist/src/indexer/dart/pathKey.js +72 -0
- package/dist/src/indexer/dart/pathKey.js.map +1 -0
- package/dist/src/indexer/dart/poolGate.d.ts +57 -0
- package/dist/src/indexer/dart/poolGate.d.ts.map +1 -0
- package/dist/src/indexer/dart/poolGate.js +87 -0
- package/dist/src/indexer/dart/poolGate.js.map +1 -0
- package/dist/src/indexer/dart/sdk.d.ts +40 -0
- package/dist/src/indexer/dart/sdk.d.ts.map +1 -0
- package/dist/src/indexer/dart/sdk.js +167 -0
- package/dist/src/indexer/dart/sdk.js.map +1 -0
- package/dist/src/indexer/dart/transform.d.ts +17 -0
- package/dist/src/indexer/dart/transform.d.ts.map +1 -0
- package/dist/src/indexer/dart/transform.js +157 -0
- package/dist/src/indexer/dart/transform.js.map +1 -0
- package/dist/src/indexer/dart/types.d.ts +137 -0
- package/dist/src/indexer/dart/types.d.ts.map +1 -0
- package/dist/src/indexer/dart/types.js +5 -0
- package/dist/src/indexer/dart/types.js.map +1 -0
- package/dist/src/indexer/git.d.ts +1 -0
- package/dist/src/indexer/git.d.ts.map +1 -1
- package/dist/src/indexer/git.js +8 -0
- package/dist/src/indexer/git.js.map +1 -1
- package/dist/src/indexer/language.d.ts.map +1 -1
- package/dist/src/indexer/language.js +1 -0
- package/dist/src/indexer/language.js.map +1 -1
- package/dist/src/indexer/queue.d.ts +19 -0
- package/dist/src/indexer/queue.d.ts.map +1 -0
- package/dist/src/indexer/queue.js +50 -0
- package/dist/src/indexer/queue.js.map +1 -0
- package/dist/src/indexer/schema.d.ts +61 -1
- package/dist/src/indexer/schema.d.ts.map +1 -1
- package/dist/src/indexer/schema.js +253 -2
- package/dist/src/indexer/schema.js.map +1 -1
- package/dist/src/indexer/watch.d.ts +21 -0
- package/dist/src/indexer/watch.d.ts.map +1 -1
- package/dist/src/indexer/watch.js +189 -28
- package/dist/src/indexer/watch.js.map +1 -1
- package/dist/src/server/abbreviations.d.ts +47 -0
- package/dist/src/server/abbreviations.d.ts.map +1 -0
- package/dist/src/server/abbreviations.js +71 -0
- package/dist/src/server/abbreviations.js.map +1 -0
- package/dist/src/server/boost-profiles.d.ts +63 -0
- package/dist/src/server/boost-profiles.d.ts.map +1 -0
- package/dist/src/server/boost-profiles.js +86 -0
- package/dist/src/server/boost-profiles.js.map +1 -0
- package/dist/src/server/context.d.ts +7 -0
- package/dist/src/server/context.d.ts.map +1 -1
- package/dist/src/server/handlers.d.ts +3 -2
- package/dist/src/server/handlers.d.ts.map +1 -1
- package/dist/src/server/handlers.js +542 -96
- package/dist/src/server/handlers.js.map +1 -1
- package/dist/src/server/indexBootstrap.d.ts.map +1 -1
- package/dist/src/server/indexBootstrap.js +4 -1
- package/dist/src/server/indexBootstrap.js.map +1 -1
- package/dist/src/server/main.d.ts.map +1 -1
- package/dist/src/server/main.js +112 -30
- package/dist/src/server/main.js.map +1 -1
- package/dist/src/server/rpc.d.ts.map +1 -1
- package/dist/src/server/rpc.js +28 -9
- package/dist/src/server/rpc.js.map +1 -1
- package/dist/src/server/rrf.d.ts +86 -0
- package/dist/src/server/rrf.d.ts.map +1 -0
- package/dist/src/server/rrf.js +108 -0
- package/dist/src/server/rrf.js.map +1 -0
- package/dist/src/server/runtime.d.ts.map +1 -1
- package/dist/src/server/runtime.js +45 -6
- package/dist/src/server/runtime.js.map +1 -1
- package/dist/src/server/scoring.d.ts.map +1 -1
- package/dist/src/server/scoring.js +19 -0
- package/dist/src/server/scoring.js.map +1 -1
- package/dist/src/shared/cli/args.d.ts +70 -0
- package/dist/src/shared/cli/args.d.ts.map +1 -0
- package/dist/src/shared/cli/args.js +84 -0
- package/dist/src/shared/cli/args.js.map +1 -0
- package/dist/src/shared/duckdb.d.ts.map +1 -1
- package/dist/src/shared/duckdb.js +9 -0
- package/dist/src/shared/duckdb.js.map +1 -1
- package/dist/src/shared/embedding/engine.d.ts +38 -0
- package/dist/src/shared/embedding/engine.d.ts.map +1 -0
- package/dist/src/shared/embedding/engine.js +6 -0
- package/dist/src/shared/embedding/engine.js.map +1 -0
- package/dist/src/shared/embedding/lsh-engine.d.ts +11 -0
- package/dist/src/shared/embedding/lsh-engine.d.ts.map +1 -0
- package/dist/src/shared/embedding/lsh-engine.js +14 -0
- package/dist/src/shared/embedding/lsh-engine.js.map +1 -0
- package/dist/src/shared/embedding/registry.d.ts +25 -0
- package/dist/src/shared/embedding/registry.d.ts.map +1 -0
- package/dist/src/shared/embedding/registry.js +50 -0
- package/dist/src/shared/embedding/registry.js.map +1 -0
- package/dist/src/shared/embedding/semantic-engine.d.ts +14 -0
- package/dist/src/shared/embedding/semantic-engine.d.ts.map +1 -0
- package/dist/src/shared/embedding/semantic-engine.js +50 -0
- package/dist/src/shared/embedding/semantic-engine.js.map +1 -0
- package/dist/src/shared/models/model-manager.d.ts +38 -0
- package/dist/src/shared/models/model-manager.d.ts.map +1 -0
- package/dist/src/shared/models/model-manager.js +116 -0
- package/dist/src/shared/models/model-manager.js.map +1 -0
- package/dist/src/shared/models/model-manifest.d.ts +22 -0
- package/dist/src/shared/models/model-manifest.d.ts.map +1 -0
- package/dist/src/shared/models/model-manifest.js +24 -0
- package/dist/src/shared/models/model-manifest.js.map +1 -0
- package/dist/src/shared/utils/path.d.ts +46 -0
- package/dist/src/shared/utils/path.d.ts.map +1 -0
- package/dist/src/shared/utils/path.js +94 -0
- package/dist/src/shared/utils/path.js.map +1 -0
- package/dist/src/shared/utils/socket.d.ts +61 -0
- package/dist/src/shared/utils/socket.d.ts.map +1 -0
- package/dist/src/shared/utils/socket.js +156 -0
- package/dist/src/shared/utils/socket.js.map +1 -0
- package/dist/src/shared/utils/validation.d.ts +14 -0
- package/dist/src/shared/utils/validation.d.ts.map +1 -0
- package/dist/src/shared/utils/validation.js +22 -0
- package/dist/src/shared/utils/validation.js.map +1 -0
- package/package.json +14 -1
package/dist/src/indexer/cli.js
CHANGED
|
@@ -1,17 +1,54 @@
|
|
|
1
1
|
import { createHash } from "node:crypto";
|
|
2
|
+
import { existsSync } from "node:fs";
|
|
2
3
|
import { readFile, stat } from "node:fs/promises";
|
|
3
4
|
import { join, resolve, extname } from "node:path";
|
|
4
5
|
import { pathToFileURL } from "node:url";
|
|
5
6
|
import { DuckDBClient } from "../shared/duckdb.js";
|
|
6
7
|
import { generateEmbedding } from "../shared/embedding.js";
|
|
8
|
+
import { acquireLock, releaseLock, LockfileError, getLockOwner } from "../shared/utils/lockfile.js";
|
|
9
|
+
import { normalizeDbPath, ensureDbParentDir, getRepoPathCandidates } from "../shared/utils/path.js";
|
|
7
10
|
import { analyzeSource, buildFallbackSnippet } from "./codeintel.js";
|
|
8
|
-
import { getDefaultBranch, getHeadCommit, gitLsFiles } from "./git.js";
|
|
11
|
+
import { getDefaultBranch, getHeadCommit, gitLsFiles, gitDiffNameOnly } from "./git.js";
|
|
9
12
|
import { detectLanguage } from "./language.js";
|
|
10
|
-
import {
|
|
13
|
+
import { getIndexerQueue } from "./queue.js";
|
|
14
|
+
import { ensureBaseSchema, ensureRepoMetaColumns, rebuildFTSIfNeeded } from "./schema.js";
|
|
11
15
|
import { IndexWatcher } from "./watch.js";
|
|
12
16
|
const MAX_SAMPLE_BYTES = 32_768;
|
|
13
17
|
const MAX_FILE_BYTES = 32 * 1024 * 1024; // 32MB limit to prevent memory exhaustion
|
|
14
18
|
const SCAN_BATCH_SIZE = 100; // Process files in batches to limit memory usage
|
|
19
|
+
/**
|
|
20
|
+
* Maximum number of SQL placeholders per INSERT statement.
|
|
21
|
+
*
|
|
22
|
+
* DuckDB's internal limit is 65535 placeholders, but we use a conservative value of 30000 for:
|
|
23
|
+
* 1. Safety margin: Prevents stack overflow when building large SQL strings in JavaScript
|
|
24
|
+
* 2. Performance: Smaller batches reduce memory pressure and provide better error granularity
|
|
25
|
+
* 3. Compatibility: Works safely across different DuckDB versions and system configurations
|
|
26
|
+
*
|
|
27
|
+
* This value has been validated with real-world testing:
|
|
28
|
+
* - Successfully handles 10000+ files in batch-processing.spec.ts
|
|
29
|
+
* - Prevents "Maximum call stack size exceeded" errors (Issue #39)
|
|
30
|
+
* - Balances transaction throughput vs. individual batch size
|
|
31
|
+
*
|
|
32
|
+
* Example batch sizes with this limit:
|
|
33
|
+
* - 4-column table (blob): 7500 records per batch
|
|
34
|
+
* - 5-column table (dependency): 6000 records per batch
|
|
35
|
+
* - 9-column table (symbol): 3333 records per batch
|
|
36
|
+
*/
|
|
37
|
+
const MAX_SQL_PLACEHOLDERS = 30000;
|
|
38
|
+
/**
|
|
39
|
+
* Calculate safe batch size for SQL INSERT operations based on columns per record.
|
|
40
|
+
* Ensures total placeholders per statement stays under MAX_SQL_PLACEHOLDERS.
|
|
41
|
+
*
|
|
42
|
+
* @param columnsPerRecord - Number of columns in the INSERT statement (must be positive)
|
|
43
|
+
* @returns Safe batch size that won't exceed placeholder limit
|
|
44
|
+
* @throws {Error} If columnsPerRecord is not a positive integer
|
|
45
|
+
*/
|
|
46
|
+
function calculateBatchSize(columnsPerRecord) {
|
|
47
|
+
if (columnsPerRecord <= 0 || !Number.isInteger(columnsPerRecord)) {
|
|
48
|
+
throw new Error(`columnsPerRecord must be a positive integer, got: ${columnsPerRecord}`);
|
|
49
|
+
}
|
|
50
|
+
return Math.floor(MAX_SQL_PLACEHOLDERS / columnsPerRecord);
|
|
51
|
+
}
|
|
15
52
|
function countLines(content) {
|
|
16
53
|
if (content.length === 0) {
|
|
17
54
|
return 0;
|
|
@@ -35,137 +72,237 @@ function isBinaryBuffer(buffer) {
|
|
|
35
72
|
* @param defaultBranch - Default branch name (e.g., "main", "master"), or null if unknown
|
|
36
73
|
* @returns The repository ID (auto-generated on first insert, reused thereafter)
|
|
37
74
|
*/
|
|
38
|
-
async function
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
75
|
+
async function mergeLegacyRepoRows(db, canonicalRepoId, legacyRepoIds) {
|
|
76
|
+
if (legacyRepoIds.length === 0) {
|
|
77
|
+
return;
|
|
78
|
+
}
|
|
79
|
+
const referencingTables = await db.all(`SELECT DISTINCT c.table_name
|
|
80
|
+
FROM duckdb_columns() AS c
|
|
81
|
+
JOIN duckdb_tables() AS t
|
|
82
|
+
ON c.database_name = t.database_name
|
|
83
|
+
AND c.schema_name = t.schema_name
|
|
84
|
+
AND c.table_name = t.table_name
|
|
85
|
+
WHERE c.column_name = 'repo_id'
|
|
86
|
+
AND c.table_name <> 'repo'
|
|
87
|
+
AND t.table_type = 'BASE TABLE'`);
|
|
88
|
+
const safeTables = referencingTables
|
|
89
|
+
.map((row) => row.table_name)
|
|
90
|
+
.filter((name) => /^[A-Za-z0-9_]+$/.test(name));
|
|
91
|
+
await db.transaction(async () => {
|
|
92
|
+
for (const legacyRepoId of legacyRepoIds) {
|
|
93
|
+
for (const tableName of safeTables) {
|
|
94
|
+
await db.run(`UPDATE ${tableName} SET repo_id = ? WHERE repo_id = ?`, [
|
|
95
|
+
canonicalRepoId,
|
|
96
|
+
legacyRepoId,
|
|
97
|
+
]);
|
|
98
|
+
}
|
|
99
|
+
await db.run("DELETE FROM repo WHERE id = ?", [legacyRepoId]);
|
|
100
|
+
}
|
|
101
|
+
});
|
|
102
|
+
}
|
|
103
|
+
async function ensureRepo(db, repoRoot, defaultBranch, candidateRoots) {
|
|
104
|
+
const searchRoots = Array.from(new Set([repoRoot, ...(candidateRoots ?? [])]));
|
|
105
|
+
const placeholders = searchRoots.map(() => "?").join(", ");
|
|
106
|
+
let rows = await db.all(`SELECT id, root FROM repo WHERE root IN (${placeholders})`, searchRoots);
|
|
107
|
+
if (rows.length === 0) {
|
|
108
|
+
await db.run(`INSERT INTO repo (root, default_branch, indexed_at)
|
|
109
|
+
VALUES (?, ?, CURRENT_TIMESTAMP)
|
|
110
|
+
ON CONFLICT(root) DO UPDATE SET
|
|
111
|
+
default_branch = COALESCE(excluded.default_branch, repo.default_branch)`, [repoRoot, defaultBranch]);
|
|
112
|
+
rows = await db.all(`SELECT id, root FROM repo WHERE root IN (${placeholders})`, searchRoots);
|
|
113
|
+
}
|
|
47
114
|
if (rows.length === 0) {
|
|
48
115
|
throw new Error("Failed to create or find repository record. Check database constraints and schema.");
|
|
49
116
|
}
|
|
50
|
-
|
|
51
|
-
if (!
|
|
117
|
+
let canonicalRow = rows.find((row) => row.root === repoRoot) ?? rows[0];
|
|
118
|
+
if (!canonicalRow) {
|
|
52
119
|
throw new Error("Failed to retrieve repository record. Database returned empty result.");
|
|
53
120
|
}
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
if (blobs.size === 0)
|
|
58
|
-
return;
|
|
59
|
-
// Use bulk insert for better performance
|
|
60
|
-
const blobArray = Array.from(blobs.values());
|
|
61
|
-
const placeholders = blobArray.map(() => "(?, ?, ?, ?)").join(", ");
|
|
62
|
-
const sql = `INSERT OR REPLACE INTO blob (hash, size_bytes, line_count, content) VALUES ${placeholders}`;
|
|
63
|
-
const params = [];
|
|
64
|
-
for (const blob of blobArray) {
|
|
65
|
-
params.push(blob.hash, blob.sizeBytes, blob.lineCount, blob.content);
|
|
121
|
+
if (canonicalRow.root !== repoRoot) {
|
|
122
|
+
await db.run("UPDATE repo SET root = ? WHERE id = ?", [repoRoot, canonicalRow.id]);
|
|
123
|
+
canonicalRow = { ...canonicalRow, root: repoRoot };
|
|
66
124
|
}
|
|
67
|
-
|
|
125
|
+
const legacyIds = rows.filter((row) => row.id !== canonicalRow.id).map((row) => row.id);
|
|
126
|
+
await mergeLegacyRepoRows(db, canonicalRow.id, legacyIds);
|
|
127
|
+
return canonicalRow.id;
|
|
68
128
|
}
|
|
69
|
-
|
|
129
|
+
/**
|
|
130
|
+
* Generic helper function to persist records in batches to prevent stack overflow.
|
|
131
|
+
* Splits large datasets into smaller batches and executes INSERT statements sequentially.
|
|
132
|
+
*
|
|
133
|
+
* IMPORTANT: This function must be called within an active database transaction.
|
|
134
|
+
* See runIndexer() for transaction management context.
|
|
135
|
+
*
|
|
136
|
+
* @param db - Database client (must be within an active transaction)
|
|
137
|
+
* @param records - Array of records to persist
|
|
138
|
+
* @param batchSize - Maximum number of records per INSERT statement
|
|
139
|
+
* @param buildInsert - Function that builds SQL and params for a batch
|
|
140
|
+
*/
|
|
141
|
+
async function persistInBatches(db, records, batchSize, buildInsert) {
|
|
70
142
|
if (records.length === 0)
|
|
71
143
|
return;
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
144
|
+
for (let i = 0; i < records.length; i += batchSize) {
|
|
145
|
+
const batch = records.slice(i, i + batchSize);
|
|
146
|
+
const { sql, params } = buildInsert(batch);
|
|
147
|
+
try {
|
|
148
|
+
await db.run(sql, params);
|
|
149
|
+
}
|
|
150
|
+
catch (error) {
|
|
151
|
+
// バッチインデックスとサイズを含むエラーメッセージ(0-indexedの正確な範囲)
|
|
152
|
+
const batchInfo = `Batch ${Math.floor(i / batchSize) + 1}/${Math.ceil(records.length / batchSize)} (records ${i}-${i + batch.length - 1})`;
|
|
153
|
+
throw new Error(`Failed to persist batch: ${batchInfo}. Original error: ${error instanceof Error ? error.message : String(error)}`);
|
|
154
|
+
}
|
|
78
155
|
}
|
|
79
|
-
await db.run(sql, params);
|
|
80
156
|
}
|
|
157
|
+
/**
|
|
158
|
+
* Persist blob records to database in batches to prevent stack overflow.
|
|
159
|
+
*
|
|
160
|
+
* IMPORTANT: This function must be called within an active database transaction.
|
|
161
|
+
* See runIndexer() for transaction management context.
|
|
162
|
+
*
|
|
163
|
+
* @param db - Database client (must be within an active transaction)
|
|
164
|
+
* @param blobs - Map of blob records to persist
|
|
165
|
+
*/
|
|
166
|
+
async function persistBlobs(db, blobs) {
|
|
167
|
+
const blobArray = Array.from(blobs.values());
|
|
168
|
+
const BATCH_SIZE = calculateBatchSize(4); // blob table has 4 columns
|
|
169
|
+
await persistInBatches(db, blobArray, BATCH_SIZE, (batch) => ({
|
|
170
|
+
sql: `INSERT OR REPLACE INTO blob (hash, size_bytes, line_count, content) VALUES ${batch.map(() => "(?, ?, ?, ?)").join(", ")}`,
|
|
171
|
+
params: batch.flatMap((blob) => [blob.hash, blob.sizeBytes, blob.lineCount, blob.content]),
|
|
172
|
+
}));
|
|
173
|
+
}
|
|
174
|
+
/**
|
|
175
|
+
* Persist tree records to database in batches to prevent stack overflow.
|
|
176
|
+
*
|
|
177
|
+
* IMPORTANT: This function must be called within an active database transaction.
|
|
178
|
+
* See runIndexer() for transaction management context.
|
|
179
|
+
*
|
|
180
|
+
* @param db - Database client (must be within an active transaction)
|
|
181
|
+
* @param repoId - Repository ID
|
|
182
|
+
* @param commitHash - Git commit hash
|
|
183
|
+
* @param records - File records to persist
|
|
184
|
+
*/
|
|
185
|
+
async function persistTrees(db, repoId, commitHash, records) {
|
|
186
|
+
const BATCH_SIZE = calculateBatchSize(8); // tree table has 8 columns
|
|
187
|
+
await persistInBatches(db, records, BATCH_SIZE, (batch) => ({
|
|
188
|
+
sql: `INSERT OR REPLACE INTO tree (repo_id, commit_hash, path, blob_hash, ext, lang, is_binary, mtime) VALUES ${batch.map(() => "(?, ?, ?, ?, ?, ?, ?, ?)").join(", ")}`,
|
|
189
|
+
params: batch.flatMap((record) => [
|
|
190
|
+
repoId,
|
|
191
|
+
commitHash,
|
|
192
|
+
record.path,
|
|
193
|
+
record.blobHash,
|
|
194
|
+
record.ext,
|
|
195
|
+
record.lang,
|
|
196
|
+
record.isBinary,
|
|
197
|
+
record.mtimeIso,
|
|
198
|
+
]),
|
|
199
|
+
}));
|
|
200
|
+
}
|
|
201
|
+
/**
|
|
202
|
+
* Persist file records to database in batches to prevent stack overflow.
|
|
203
|
+
*
|
|
204
|
+
* IMPORTANT: This function must be called within an active database transaction.
|
|
205
|
+
* See runIndexer() for transaction management context.
|
|
206
|
+
*
|
|
207
|
+
* @param db - Database client (must be within an active transaction)
|
|
208
|
+
* @param repoId - Repository ID
|
|
209
|
+
* @param records - File records to persist
|
|
210
|
+
*/
|
|
81
211
|
async function persistFiles(db, repoId, records) {
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
212
|
+
const BATCH_SIZE = calculateBatchSize(7); // file table has 7 columns
|
|
213
|
+
await persistInBatches(db, records, BATCH_SIZE, (batch) => ({
|
|
214
|
+
sql: `INSERT OR REPLACE INTO file (repo_id, path, blob_hash, ext, lang, is_binary, mtime) VALUES ${batch.map(() => "(?, ?, ?, ?, ?, ?, ?)").join(", ")}`,
|
|
215
|
+
params: batch.flatMap((record) => [
|
|
216
|
+
repoId,
|
|
217
|
+
record.path,
|
|
218
|
+
record.blobHash,
|
|
219
|
+
record.ext,
|
|
220
|
+
record.lang,
|
|
221
|
+
record.isBinary,
|
|
222
|
+
record.mtimeIso,
|
|
223
|
+
]),
|
|
224
|
+
}));
|
|
92
225
|
}
|
|
226
|
+
/**
|
|
227
|
+
* Persist symbol records to database in batches to prevent stack overflow.
|
|
228
|
+
*
|
|
229
|
+
* IMPORTANT: This function must be called within an active database transaction.
|
|
230
|
+
* See runIndexer() for transaction management context.
|
|
231
|
+
*
|
|
232
|
+
* @param db - Database client (must be within an active transaction)
|
|
233
|
+
* @param repoId - Repository ID
|
|
234
|
+
* @param records - Symbol records to persist
|
|
235
|
+
*/
|
|
93
236
|
async function persistSymbols(db, repoId, records) {
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
}
|
|
110
|
-
await db.run(sql, params);
|
|
111
|
-
}
|
|
237
|
+
const BATCH_SIZE = calculateBatchSize(9); // symbol table has 9 columns
|
|
238
|
+
await persistInBatches(db, records, BATCH_SIZE, (batch) => ({
|
|
239
|
+
sql: `INSERT OR REPLACE INTO symbol (repo_id, path, symbol_id, name, kind, range_start_line, range_end_line, signature, doc) VALUES ${batch.map(() => "(?, ?, ?, ?, ?, ?, ?, ?, ?)").join(", ")}`,
|
|
240
|
+
params: batch.flatMap((r) => [
|
|
241
|
+
repoId,
|
|
242
|
+
r.path,
|
|
243
|
+
r.symbolId,
|
|
244
|
+
r.name,
|
|
245
|
+
r.kind,
|
|
246
|
+
r.rangeStartLine,
|
|
247
|
+
r.rangeEndLine,
|
|
248
|
+
r.signature,
|
|
249
|
+
r.doc,
|
|
250
|
+
]),
|
|
251
|
+
}));
|
|
112
252
|
}
|
|
253
|
+
/**
|
|
254
|
+
* Persist snippet records to database in batches to prevent stack overflow.
|
|
255
|
+
*
|
|
256
|
+
* IMPORTANT: This function must be called within an active database transaction.
|
|
257
|
+
* See runIndexer() for transaction management context.
|
|
258
|
+
*
|
|
259
|
+
* @param db - Database client (must be within an active transaction)
|
|
260
|
+
* @param repoId - Repository ID
|
|
261
|
+
* @param records - Snippet records to persist
|
|
262
|
+
*/
|
|
113
263
|
async function persistSnippets(db, repoId, records) {
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
const batch = records.slice(i, i + BATCH_SIZE);
|
|
120
|
-
const placeholders = batch.map(() => "(?, ?, ?, ?, ?, ?)").join(", ");
|
|
121
|
-
const sql = `
|
|
122
|
-
INSERT OR REPLACE INTO snippet (
|
|
123
|
-
repo_id, path, snippet_id, start_line, end_line, symbol_id
|
|
124
|
-
) VALUES ${placeholders}
|
|
125
|
-
`;
|
|
126
|
-
const params = [];
|
|
127
|
-
for (const record of batch) {
|
|
128
|
-
params.push(repoId, record.path, record.snippetId, record.startLine, record.endLine, record.symbolId);
|
|
129
|
-
}
|
|
130
|
-
await db.run(sql, params);
|
|
131
|
-
}
|
|
264
|
+
const BATCH_SIZE = calculateBatchSize(6); // snippet table has 6 columns
|
|
265
|
+
await persistInBatches(db, records, BATCH_SIZE, (batch) => ({
|
|
266
|
+
sql: `INSERT OR REPLACE INTO snippet (repo_id, path, snippet_id, start_line, end_line, symbol_id) VALUES ${batch.map(() => "(?, ?, ?, ?, ?, ?)").join(", ")}`,
|
|
267
|
+
params: batch.flatMap((r) => [repoId, r.path, r.snippetId, r.startLine, r.endLine, r.symbolId]),
|
|
268
|
+
}));
|
|
132
269
|
}
|
|
270
|
+
/**
|
|
271
|
+
* Persist file dependency records to database in batches to prevent stack overflow.
|
|
272
|
+
*
|
|
273
|
+
* MUST be called within a transaction.
|
|
274
|
+
* Batch size is dynamically calculated based on MAX_SQL_PLACEHOLDERS.
|
|
275
|
+
*/
|
|
133
276
|
async function persistDependencies(db, repoId, records) {
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
const batch = records.slice(i, i + BATCH_SIZE);
|
|
140
|
-
const placeholders = batch.map(() => "(?, ?, ?, ?, ?)").join(", ");
|
|
141
|
-
const sql = `
|
|
142
|
-
INSERT OR REPLACE INTO dependency (
|
|
143
|
-
repo_id, src_path, dst_kind, dst, rel
|
|
144
|
-
) VALUES ${placeholders}
|
|
145
|
-
`;
|
|
146
|
-
const params = [];
|
|
147
|
-
for (const record of batch) {
|
|
148
|
-
params.push(repoId, record.srcPath, record.dstKind, record.dst, record.rel);
|
|
149
|
-
}
|
|
150
|
-
await db.run(sql, params);
|
|
151
|
-
}
|
|
277
|
+
const BATCH_SIZE = calculateBatchSize(5); // dependency table has 5 columns
|
|
278
|
+
await persistInBatches(db, records, BATCH_SIZE, (batch) => ({
|
|
279
|
+
sql: `INSERT OR REPLACE INTO dependency (repo_id, src_path, dst_kind, dst, rel) VALUES ${batch.map(() => "(?, ?, ?, ?, ?)").join(", ")}`,
|
|
280
|
+
params: batch.flatMap((r) => [repoId, r.srcPath, r.dstKind, r.dst, r.rel]),
|
|
281
|
+
}));
|
|
152
282
|
}
|
|
283
|
+
/**
|
|
284
|
+
* Persist file embedding records to database in batches to prevent stack overflow.
|
|
285
|
+
*
|
|
286
|
+
* IMPORTANT: This function must be called within an active database transaction.
|
|
287
|
+
* See runIndexer() for transaction management context.
|
|
288
|
+
*
|
|
289
|
+
* @param db - Database client (must be within an active transaction)
|
|
290
|
+
* @param repoId - Repository ID
|
|
291
|
+
* @param records - Embedding records to persist
|
|
292
|
+
*/
|
|
153
293
|
async function persistEmbeddings(db, repoId, records) {
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
params.push(repoId, record.path, record.dims, JSON.stringify(record.vector));
|
|
165
|
-
}
|
|
166
|
-
await db.run(sql, params);
|
|
294
|
+
const BATCH_SIZE = calculateBatchSize(4); // file_embedding table has 4 parameterized columns
|
|
295
|
+
await persistInBatches(db, records, BATCH_SIZE, (batch) => ({
|
|
296
|
+
sql: `INSERT OR REPLACE INTO file_embedding (repo_id, path, dims, vector_json, updated_at) VALUES ${batch.map(() => "(?, ?, ?, ?, CURRENT_TIMESTAMP)").join(", ")}`,
|
|
297
|
+
params: batch.flatMap((record) => [
|
|
298
|
+
repoId,
|
|
299
|
+
record.path,
|
|
300
|
+
record.dims,
|
|
301
|
+
JSON.stringify(record.vector),
|
|
302
|
+
]),
|
|
303
|
+
}));
|
|
167
304
|
}
|
|
168
|
-
function buildCodeIntel(files, blobs) {
|
|
305
|
+
async function buildCodeIntel(files, blobs, workspaceRoot) {
|
|
169
306
|
const fileSet = new Set(files.map((file) => file.path));
|
|
170
307
|
const symbols = [];
|
|
171
308
|
const snippets = [];
|
|
@@ -178,7 +315,7 @@ function buildCodeIntel(files, blobs) {
|
|
|
178
315
|
if (!blob || blob.content === null) {
|
|
179
316
|
continue;
|
|
180
317
|
}
|
|
181
|
-
const analysis = analyzeSource(file.path, file.lang, blob.content, fileSet);
|
|
318
|
+
const analysis = await analyzeSource(file.path, file.lang, blob.content, fileSet, workspaceRoot);
|
|
182
319
|
for (const symbol of analysis.symbols) {
|
|
183
320
|
symbols.push({
|
|
184
321
|
path: file.path,
|
|
@@ -234,9 +371,10 @@ async function scanFilesInBatches(repoRoot, paths) {
|
|
|
234
371
|
const allBlobs = new Map();
|
|
235
372
|
const allFiles = [];
|
|
236
373
|
const allEmbeddings = [];
|
|
374
|
+
const allMissingPaths = [];
|
|
237
375
|
for (let i = 0; i < paths.length; i += SCAN_BATCH_SIZE) {
|
|
238
376
|
const batch = paths.slice(i, i + SCAN_BATCH_SIZE);
|
|
239
|
-
const { blobs, files, embeddings } = await scanFiles(repoRoot, batch);
|
|
377
|
+
const { blobs, files, embeddings, missingPaths } = await scanFiles(repoRoot, batch);
|
|
240
378
|
// マージ: blobはhashでユニークなので重複排除
|
|
241
379
|
for (const [hash, blob] of blobs) {
|
|
242
380
|
if (!allBlobs.has(hash)) {
|
|
@@ -245,15 +383,22 @@ async function scanFilesInBatches(repoRoot, paths) {
|
|
|
245
383
|
}
|
|
246
384
|
allFiles.push(...files);
|
|
247
385
|
allEmbeddings.push(...embeddings);
|
|
386
|
+
allMissingPaths.push(...missingPaths);
|
|
248
387
|
// バッチデータを明示的にクリアしてGCを促す
|
|
249
388
|
blobs.clear();
|
|
250
389
|
}
|
|
251
|
-
return {
|
|
390
|
+
return {
|
|
391
|
+
blobs: allBlobs,
|
|
392
|
+
files: allFiles,
|
|
393
|
+
embeddings: allEmbeddings,
|
|
394
|
+
missingPaths: allMissingPaths,
|
|
395
|
+
};
|
|
252
396
|
}
|
|
253
397
|
async function scanFiles(repoRoot, paths) {
|
|
254
398
|
const blobs = new Map();
|
|
255
399
|
const files = [];
|
|
256
400
|
const embeddings = [];
|
|
401
|
+
const missingPaths = [];
|
|
257
402
|
for (const relativePath of paths) {
|
|
258
403
|
const absolutePath = join(repoRoot, relativePath);
|
|
259
404
|
try {
|
|
@@ -302,11 +447,17 @@ async function scanFiles(repoRoot, paths) {
|
|
|
302
447
|
}
|
|
303
448
|
}
|
|
304
449
|
catch (error) {
|
|
450
|
+
// Fix #4: Track deleted files (ENOENT) for database cleanup
|
|
451
|
+
if (error.code === "ENOENT") {
|
|
452
|
+
missingPaths.push(relativePath);
|
|
453
|
+
continue;
|
|
454
|
+
}
|
|
455
|
+
// Other errors (permissions, etc.) - log and skip
|
|
305
456
|
console.warn(`Cannot read ${relativePath} due to filesystem error. Fix file permissions or remove the file.`);
|
|
306
457
|
console.warn(error);
|
|
307
458
|
}
|
|
308
459
|
}
|
|
309
|
-
return { blobs, files, embeddings };
|
|
460
|
+
return { blobs, files, embeddings, missingPaths };
|
|
310
461
|
}
|
|
311
462
|
/**
|
|
312
463
|
* 既存のファイルハッシュをDBから取得する(インクリメンタルインデックス用)
|
|
@@ -383,161 +534,242 @@ async function deleteFileRecords(db, repoId, headCommit, path) {
|
|
|
383
534
|
await db.run("DELETE FROM file WHERE repo_id = ? AND path = ?", [repoId, path]);
|
|
384
535
|
}
|
|
385
536
|
export async function runIndexer(options) {
|
|
386
|
-
const
|
|
387
|
-
const
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
537
|
+
const repoPathCandidates = getRepoPathCandidates(options.repoRoot);
|
|
538
|
+
const repoRoot = repoPathCandidates[0];
|
|
539
|
+
if (!repoRoot) {
|
|
540
|
+
throw new Error(`Unable to resolve repository root for ${options.repoRoot}`);
|
|
541
|
+
}
|
|
542
|
+
let databasePath;
|
|
543
|
+
// Fix #2: Ensure parent directory exists BEFORE normalization
|
|
544
|
+
// This guarantees consistent path normalization on first and subsequent runs
|
|
545
|
+
await ensureDbParentDir(options.databasePath);
|
|
546
|
+
// Critical: Use normalizeDbPath to ensure consistent path across runs
|
|
547
|
+
// This prevents lock file and queue key bypass when DB is accessed via symlink
|
|
548
|
+
databasePath = normalizeDbPath(options.databasePath);
|
|
549
|
+
// DuckDB single-writer制約対応: 同じdatabasePathへの並列書き込みを防ぐため、
|
|
550
|
+
// databasePathごとのキューで直列化する
|
|
551
|
+
return getIndexerQueue(databasePath).add(async () => {
|
|
552
|
+
// Fix #1 & #2: Add file lock for multi-process safety (unless caller already holds lock)
|
|
553
|
+
const lockfilePath = `${databasePath}.lock`;
|
|
554
|
+
let lockAcquired = false;
|
|
555
|
+
if (!options.skipLocking) {
|
|
556
|
+
try {
|
|
557
|
+
acquireLock(lockfilePath);
|
|
558
|
+
lockAcquired = true;
|
|
402
559
|
}
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
for (const file of files) {
|
|
409
|
-
const existingHash = existingHashes.get(file.path);
|
|
410
|
-
if (existingHash !== file.blobHash) {
|
|
411
|
-
changedFiles.push(file);
|
|
412
|
-
const blob = blobs.get(file.blobHash);
|
|
413
|
-
if (blob) {
|
|
414
|
-
changedBlobs.set(blob.hash, blob);
|
|
415
|
-
}
|
|
560
|
+
catch (error) {
|
|
561
|
+
if (error instanceof LockfileError) {
|
|
562
|
+
const ownerPid = error.ownerPid ?? getLockOwner(lockfilePath);
|
|
563
|
+
const ownerInfo = ownerPid ? ` (PID: ${ownerPid})` : "";
|
|
564
|
+
throw new Error(`Another indexing process${ownerInfo} holds the lock for ${databasePath}. Please wait for it to complete.`);
|
|
416
565
|
}
|
|
566
|
+
throw error;
|
|
417
567
|
}
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
568
|
+
}
|
|
569
|
+
let db = null;
|
|
570
|
+
try {
|
|
571
|
+
const dbClient = await DuckDBClient.connect({ databasePath, ensureDirectory: true });
|
|
572
|
+
db = dbClient;
|
|
573
|
+
await ensureBaseSchema(dbClient);
|
|
574
|
+
// Phase 3: Ensure FTS metadata columns exist for existing DBs (migration)
|
|
575
|
+
await ensureRepoMetaColumns(dbClient);
|
|
576
|
+
const [headCommit, defaultBranch] = await Promise.all([
|
|
577
|
+
getHeadCommit(repoRoot),
|
|
578
|
+
getDefaultBranch(repoRoot),
|
|
579
|
+
]);
|
|
580
|
+
const repoId = await ensureRepo(dbClient, repoRoot, defaultBranch, repoPathCandidates);
|
|
581
|
+
// Incremental mode: only reindex files in changedPaths (empty array means no-op)
|
|
582
|
+
if (options.changedPaths) {
|
|
583
|
+
// First, reconcile deleted files (handle renames/deletions)
|
|
584
|
+
const deletedPaths = await reconcileDeletedFiles(dbClient, repoId, repoRoot);
|
|
585
|
+
if (deletedPaths.length > 0) {
|
|
586
|
+
console.info(`Removed ${deletedPaths.length} deleted file(s) from index.`);
|
|
423
587
|
}
|
|
424
|
-
|
|
425
|
-
|
|
588
|
+
const existingHashes = await getExistingFileHashes(dbClient, repoId);
|
|
589
|
+
const { blobs, files, embeddings, missingPaths } = await scanFilesInBatches(repoRoot, options.changedPaths);
|
|
590
|
+
// Filter out files that haven't actually changed (same hash)
|
|
591
|
+
const changedFiles = [];
|
|
592
|
+
const changedBlobs = new Map();
|
|
593
|
+
for (const file of files) {
|
|
594
|
+
const existingHash = existingHashes.get(file.path);
|
|
595
|
+
if (existingHash !== file.blobHash) {
|
|
596
|
+
changedFiles.push(file);
|
|
597
|
+
const blob = blobs.get(file.blobHash);
|
|
598
|
+
if (blob) {
|
|
599
|
+
changedBlobs.set(blob.hash, blob);
|
|
600
|
+
}
|
|
601
|
+
}
|
|
426
602
|
}
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
const blob = changedBlobs.get(file.blobHash);
|
|
435
|
-
if (!blob)
|
|
436
|
-
continue;
|
|
437
|
-
// Build code intelligence for this file
|
|
438
|
-
const fileSymbols = [];
|
|
439
|
-
const fileSnippets = [];
|
|
440
|
-
const fileDependencies = [];
|
|
441
|
-
if (!file.isBinary && blob.content) {
|
|
442
|
-
const analysis = analyzeSource(file.path, file.lang, blob.content, fileSet);
|
|
443
|
-
for (const symbol of analysis.symbols) {
|
|
444
|
-
fileSymbols.push({
|
|
445
|
-
path: file.path,
|
|
446
|
-
symbolId: symbol.symbolId,
|
|
447
|
-
name: symbol.name,
|
|
448
|
-
kind: symbol.kind,
|
|
449
|
-
rangeStartLine: symbol.rangeStartLine,
|
|
450
|
-
rangeEndLine: symbol.rangeEndLine,
|
|
451
|
-
signature: symbol.signature,
|
|
452
|
-
doc: symbol.doc,
|
|
453
|
-
});
|
|
603
|
+
if (changedFiles.length === 0 && missingPaths.length === 0) {
|
|
604
|
+
console.info(`No actual changes detected in ${options.changedPaths.length} file(s). Skipping reindex.`);
|
|
605
|
+
// Fix #3 & #4: If files were deleted (git or watch mode), still need to dirty FTS and rebuild
|
|
606
|
+
if (deletedPaths.length > 0) {
|
|
607
|
+
console.info(`${deletedPaths.length} file(s) deleted (git) - marking FTS dirty`);
|
|
608
|
+
if (defaultBranch) {
|
|
609
|
+
await dbClient.run("UPDATE repo SET indexed_at = CURRENT_TIMESTAMP, default_branch = ?, fts_dirty = true, fts_generation = fts_generation + 1 WHERE id = ?", [defaultBranch, repoId]);
|
|
454
610
|
}
|
|
455
|
-
|
|
611
|
+
else {
|
|
612
|
+
await dbClient.run("UPDATE repo SET indexed_at = CURRENT_TIMESTAMP, fts_dirty = true, fts_generation = fts_generation + 1 WHERE id = ?", [repoId]);
|
|
613
|
+
}
|
|
614
|
+
await rebuildFTSIfNeeded(dbClient, repoId);
|
|
615
|
+
}
|
|
616
|
+
else {
|
|
617
|
+
// No deletions either - just update timestamp
|
|
618
|
+
if (defaultBranch) {
|
|
619
|
+
await dbClient.run("UPDATE repo SET indexed_at = CURRENT_TIMESTAMP, default_branch = ? WHERE id = ?", [defaultBranch, repoId]);
|
|
620
|
+
}
|
|
621
|
+
else {
|
|
622
|
+
await dbClient.run("UPDATE repo SET indexed_at = CURRENT_TIMESTAMP WHERE id = ?", [
|
|
623
|
+
repoId,
|
|
624
|
+
]);
|
|
625
|
+
}
|
|
626
|
+
}
|
|
627
|
+
return;
|
|
628
|
+
}
|
|
629
|
+
// Process all changed files in a single transaction for atomicity
|
|
630
|
+
const fileSet = new Set(files.map((f) => f.path));
|
|
631
|
+
const embeddingMap = new Map();
|
|
632
|
+
for (const embedding of embeddings) {
|
|
633
|
+
embeddingMap.set(embedding.path, embedding);
|
|
634
|
+
}
|
|
635
|
+
let processedCount = 0;
|
|
636
|
+
await dbClient.transaction(async () => {
|
|
637
|
+
// Fix #5: Handle deleted files from watch mode (uncommitted deletions) INSIDE transaction
|
|
638
|
+
// This ensures deletion + FTS dirty flag update are atomic
|
|
639
|
+
if (missingPaths.length > 0) {
|
|
640
|
+
// Loop through each missing file and delete with headCommit
|
|
641
|
+
for (const path of missingPaths) {
|
|
642
|
+
await deleteFileRecords(dbClient, repoId, headCommit, path);
|
|
643
|
+
}
|
|
644
|
+
console.info(`Removed ${missingPaths.length} missing file(s) from index (watch mode deletion).`);
|
|
645
|
+
}
|
|
646
|
+
// Process changed files
|
|
647
|
+
for (const file of changedFiles) {
|
|
648
|
+
const blob = changedBlobs.get(file.blobHash);
|
|
649
|
+
if (!blob)
|
|
650
|
+
continue;
|
|
651
|
+
// Build code intelligence for this file
|
|
652
|
+
const fileSymbols = [];
|
|
653
|
+
const fileSnippets = [];
|
|
654
|
+
const fileDependencies = [];
|
|
655
|
+
if (!file.isBinary && blob.content) {
|
|
656
|
+
const analysis = await analyzeSource(file.path, file.lang, blob.content, fileSet, repoRoot);
|
|
657
|
+
for (const symbol of analysis.symbols) {
|
|
658
|
+
fileSymbols.push({
|
|
659
|
+
path: file.path,
|
|
660
|
+
symbolId: symbol.symbolId,
|
|
661
|
+
name: symbol.name,
|
|
662
|
+
kind: symbol.kind,
|
|
663
|
+
rangeStartLine: symbol.rangeStartLine,
|
|
664
|
+
rangeEndLine: symbol.rangeEndLine,
|
|
665
|
+
signature: symbol.signature,
|
|
666
|
+
doc: symbol.doc,
|
|
667
|
+
});
|
|
668
|
+
}
|
|
669
|
+
for (const snippet of analysis.snippets) {
|
|
670
|
+
fileSnippets.push({
|
|
671
|
+
path: file.path,
|
|
672
|
+
snippetId: snippet.startLine,
|
|
673
|
+
startLine: snippet.startLine,
|
|
674
|
+
endLine: snippet.endLine,
|
|
675
|
+
symbolId: snippet.symbolId,
|
|
676
|
+
});
|
|
677
|
+
}
|
|
678
|
+
for (const dep of analysis.dependencies) {
|
|
679
|
+
fileDependencies.push({
|
|
680
|
+
srcPath: file.path,
|
|
681
|
+
dstKind: dep.dstKind,
|
|
682
|
+
dst: dep.dst,
|
|
683
|
+
rel: dep.rel,
|
|
684
|
+
});
|
|
685
|
+
}
|
|
686
|
+
}
|
|
687
|
+
else {
|
|
688
|
+
// Binary or no content: add fallback snippet
|
|
689
|
+
const fallback = buildFallbackSnippet(blob.lineCount ?? 1);
|
|
456
690
|
fileSnippets.push({
|
|
457
691
|
path: file.path,
|
|
458
|
-
snippetId:
|
|
459
|
-
startLine:
|
|
460
|
-
endLine:
|
|
461
|
-
symbolId:
|
|
692
|
+
snippetId: fallback.startLine,
|
|
693
|
+
startLine: fallback.startLine,
|
|
694
|
+
endLine: fallback.endLine,
|
|
695
|
+
symbolId: fallback.symbolId,
|
|
462
696
|
});
|
|
463
697
|
}
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
698
|
+
const fileEmbedding = embeddingMap.get(file.path) ?? null;
|
|
699
|
+
// Delete old records for this file (within main transaction)
|
|
700
|
+
await deleteFileRecords(dbClient, repoId, headCommit, file.path);
|
|
701
|
+
// Insert new records (within main transaction)
|
|
702
|
+
await persistBlobs(dbClient, new Map([[blob.hash, blob]]));
|
|
703
|
+
await persistTrees(dbClient, repoId, headCommit, [file]);
|
|
704
|
+
await persistFiles(dbClient, repoId, [file]);
|
|
705
|
+
await persistSymbols(dbClient, repoId, fileSymbols);
|
|
706
|
+
await persistSnippets(dbClient, repoId, fileSnippets);
|
|
707
|
+
await persistDependencies(dbClient, repoId, fileDependencies);
|
|
708
|
+
if (fileEmbedding) {
|
|
709
|
+
await persistEmbeddings(dbClient, repoId, [fileEmbedding]);
|
|
471
710
|
}
|
|
711
|
+
processedCount++;
|
|
472
712
|
}
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
path: file.path,
|
|
478
|
-
snippetId: fallback.startLine,
|
|
479
|
-
startLine: fallback.startLine,
|
|
480
|
-
endLine: fallback.endLine,
|
|
481
|
-
symbolId: fallback.symbolId,
|
|
482
|
-
});
|
|
713
|
+
// Update timestamp and mark FTS dirty inside transaction for atomicity
|
|
714
|
+
// Fix: Increment fts_generation to prevent lost updates during concurrent rebuilds
|
|
715
|
+
if (defaultBranch) {
|
|
716
|
+
await dbClient.run("UPDATE repo SET indexed_at = CURRENT_TIMESTAMP, default_branch = ?, fts_dirty = true, fts_generation = fts_generation + 1 WHERE id = ?", [defaultBranch, repoId]);
|
|
483
717
|
}
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
await deleteFileRecords(db, repoId, headCommit, file.path);
|
|
487
|
-
// Insert new records (within main transaction)
|
|
488
|
-
await persistBlobs(db, new Map([[blob.hash, blob]]));
|
|
489
|
-
await persistTrees(db, repoId, headCommit, [file]);
|
|
490
|
-
await persistFiles(db, repoId, [file]);
|
|
491
|
-
await persistSymbols(db, repoId, fileSymbols);
|
|
492
|
-
await persistSnippets(db, repoId, fileSnippets);
|
|
493
|
-
await persistDependencies(db, repoId, fileDependencies);
|
|
494
|
-
if (fileEmbedding) {
|
|
495
|
-
await persistEmbeddings(db, repoId, [fileEmbedding]);
|
|
718
|
+
else {
|
|
719
|
+
await dbClient.run("UPDATE repo SET indexed_at = CURRENT_TIMESTAMP, fts_dirty = true, fts_generation = fts_generation + 1 WHERE id = ?", [repoId]);
|
|
496
720
|
}
|
|
497
|
-
|
|
498
|
-
}
|
|
499
|
-
//
|
|
721
|
+
});
|
|
722
|
+
console.info(`Incrementally indexed ${processedCount} changed file(s) for repo ${repoRoot} at ${databasePath} (commit ${headCommit.slice(0, 12)})`);
|
|
723
|
+
// Phase 2+3: Rebuild FTS index after incremental updates (dirty=true triggers rebuild)
|
|
724
|
+
await rebuildFTSIfNeeded(dbClient, repoId);
|
|
725
|
+
return;
|
|
726
|
+
}
|
|
727
|
+
// Full mode: reindex entire repository
|
|
728
|
+
const paths = await gitLsFiles(repoRoot);
|
|
729
|
+
const { blobs, files, embeddings, missingPaths } = await scanFilesInBatches(repoRoot, paths);
|
|
730
|
+
// In full mode, missingPaths should be rare (git ls-files returns existing files)
|
|
731
|
+
// But log them if they occur (race condition: file deleted between ls-files and scan)
|
|
732
|
+
if (missingPaths.length > 0) {
|
|
733
|
+
console.warn(`${missingPaths.length} file(s) disappeared during full reindex (race condition)`);
|
|
734
|
+
}
|
|
735
|
+
const codeIntel = await buildCodeIntel(files, blobs, repoRoot);
|
|
736
|
+
await dbClient.transaction(async () => {
|
|
737
|
+
await dbClient.run("DELETE FROM tree WHERE repo_id = ?", [repoId]);
|
|
738
|
+
await dbClient.run("DELETE FROM file WHERE repo_id = ?", [repoId]);
|
|
739
|
+
await dbClient.run("DELETE FROM symbol WHERE repo_id = ?", [repoId]);
|
|
740
|
+
await dbClient.run("DELETE FROM snippet WHERE repo_id = ?", [repoId]);
|
|
741
|
+
await dbClient.run("DELETE FROM dependency WHERE repo_id = ?", [repoId]);
|
|
742
|
+
await dbClient.run("DELETE FROM file_embedding WHERE repo_id = ?", [repoId]);
|
|
743
|
+
await persistBlobs(dbClient, blobs);
|
|
744
|
+
await persistTrees(dbClient, repoId, headCommit, files);
|
|
745
|
+
await persistFiles(dbClient, repoId, files);
|
|
746
|
+
await persistSymbols(dbClient, repoId, codeIntel.symbols);
|
|
747
|
+
await persistSnippets(dbClient, repoId, codeIntel.snippets);
|
|
748
|
+
await persistDependencies(dbClient, repoId, codeIntel.dependencies);
|
|
749
|
+
await persistEmbeddings(dbClient, repoId, embeddings);
|
|
750
|
+
// Update timestamp and mark FTS dirty inside transaction to ensure atomicity
|
|
751
|
+
// Fix: Increment fts_generation to prevent lost updates during concurrent rebuilds
|
|
500
752
|
if (defaultBranch) {
|
|
501
|
-
await
|
|
753
|
+
await dbClient.run("UPDATE repo SET indexed_at = CURRENT_TIMESTAMP, default_branch = ?, fts_dirty = true, fts_generation = fts_generation + 1 WHERE id = ?", [defaultBranch, repoId]);
|
|
502
754
|
}
|
|
503
755
|
else {
|
|
504
|
-
await
|
|
756
|
+
await dbClient.run("UPDATE repo SET indexed_at = CURRENT_TIMESTAMP, fts_dirty = true, fts_generation = fts_generation + 1 WHERE id = ?", [repoId]);
|
|
505
757
|
}
|
|
506
758
|
});
|
|
507
|
-
console.info(`
|
|
508
|
-
|
|
759
|
+
console.info(`Indexed ${files.length} files for repo ${repoRoot} at ${databasePath} (commit ${headCommit.slice(0, 12)})`);
|
|
760
|
+
// Phase 2+3: Force rebuild FTS index after full reindex
|
|
761
|
+
await rebuildFTSIfNeeded(dbClient, repoId, true);
|
|
509
762
|
}
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
await db.transaction(async () => {
|
|
515
|
-
await db.run("DELETE FROM tree WHERE repo_id = ?", [repoId]);
|
|
516
|
-
await db.run("DELETE FROM file WHERE repo_id = ?", [repoId]);
|
|
517
|
-
await db.run("DELETE FROM symbol WHERE repo_id = ?", [repoId]);
|
|
518
|
-
await db.run("DELETE FROM snippet WHERE repo_id = ?", [repoId]);
|
|
519
|
-
await db.run("DELETE FROM dependency WHERE repo_id = ?", [repoId]);
|
|
520
|
-
await db.run("DELETE FROM file_embedding WHERE repo_id = ?", [repoId]);
|
|
521
|
-
await persistBlobs(db, blobs);
|
|
522
|
-
await persistTrees(db, repoId, headCommit, files);
|
|
523
|
-
await persistFiles(db, repoId, files);
|
|
524
|
-
await persistSymbols(db, repoId, codeIntel.symbols);
|
|
525
|
-
await persistSnippets(db, repoId, codeIntel.snippets);
|
|
526
|
-
await persistDependencies(db, repoId, codeIntel.dependencies);
|
|
527
|
-
await persistEmbeddings(db, repoId, embeddings);
|
|
528
|
-
// Update timestamp inside transaction to ensure atomicity
|
|
529
|
-
if (defaultBranch) {
|
|
530
|
-
await db.run("UPDATE repo SET indexed_at = CURRENT_TIMESTAMP, default_branch = ? WHERE id = ?", [defaultBranch, repoId]);
|
|
763
|
+
finally {
|
|
764
|
+
// Fix #2: Ensure lock is released even if DB connection fails
|
|
765
|
+
if (db) {
|
|
766
|
+
await db.close();
|
|
531
767
|
}
|
|
532
|
-
|
|
533
|
-
|
|
768
|
+
if (lockAcquired) {
|
|
769
|
+
releaseLock(lockfilePath);
|
|
534
770
|
}
|
|
535
|
-
}
|
|
536
|
-
|
|
537
|
-
}
|
|
538
|
-
finally {
|
|
539
|
-
await db.close();
|
|
540
|
-
}
|
|
771
|
+
}
|
|
772
|
+
});
|
|
541
773
|
}
|
|
542
774
|
function parseArg(flag) {
|
|
543
775
|
const index = process.argv.indexOf(flag);
|
|
@@ -554,12 +786,26 @@ if (import.meta.url === pathToFileURL(process.argv[1] ?? "").href) {
|
|
|
554
786
|
const watch = process.argv.includes("--watch");
|
|
555
787
|
const debounceMs = parseInt(parseArg("--debounce") ?? "500", 10);
|
|
556
788
|
const options = { repoRoot, databasePath, full: full || !since };
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
|
|
562
|
-
|
|
789
|
+
const main = async () => {
|
|
790
|
+
if (since) {
|
|
791
|
+
options.since = since;
|
|
792
|
+
if (!options.full) {
|
|
793
|
+
const diffPaths = await gitDiffNameOnly(repoRoot, since);
|
|
794
|
+
options.changedPaths = diffPaths;
|
|
795
|
+
if (diffPaths.length === 0) {
|
|
796
|
+
console.info(`No tracked changes since ${since}. Skipping incremental scan.`);
|
|
797
|
+
}
|
|
798
|
+
}
|
|
799
|
+
}
|
|
800
|
+
const dbMissing = !existsSync(databasePath);
|
|
801
|
+
const shouldIndex = options.full || !options.changedPaths || options.changedPaths.length > 0 || dbMissing;
|
|
802
|
+
if (shouldIndex) {
|
|
803
|
+
await runIndexer(options);
|
|
804
|
+
}
|
|
805
|
+
else {
|
|
806
|
+
// No diff results and not running full indexing: keep metadata fresh without DB writes
|
|
807
|
+
console.info("No files to reindex. Database remains unchanged.");
|
|
808
|
+
}
|
|
563
809
|
if (watch) {
|
|
564
810
|
// Start watch mode after initial indexing completes
|
|
565
811
|
const abortController = new AbortController();
|
|
@@ -578,8 +824,8 @@ if (import.meta.url === pathToFileURL(process.argv[1] ?? "").href) {
|
|
|
578
824
|
process.on("SIGTERM", shutdownHandler);
|
|
579
825
|
await watcher.start();
|
|
580
826
|
}
|
|
581
|
-
}
|
|
582
|
-
|
|
827
|
+
};
|
|
828
|
+
main().catch((error) => {
|
|
583
829
|
console.error("Failed to index repository. Retry after resolving the logged error.");
|
|
584
830
|
console.error(error);
|
|
585
831
|
process.exitCode = 1;
|