kiri-mcp-server 0.9.6 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (207) hide show
  1. package/README.md +144 -13
  2. package/dist/client/cli.js +68 -0
  3. package/dist/client/cli.js.map +1 -0
  4. package/dist/client/index.js +5 -0
  5. package/dist/client/index.js.map +1 -0
  6. package/dist/eval/metrics.js +47 -0
  7. package/dist/eval/metrics.js.map +1 -0
  8. package/dist/indexer/cli.js +362 -0
  9. package/dist/indexer/cli.js.map +1 -0
  10. package/dist/indexer/codeintel.js +182 -0
  11. package/dist/indexer/codeintel.js.map +1 -0
  12. package/dist/indexer/git.js +30 -0
  13. package/dist/indexer/git.js.map +1 -0
  14. package/dist/indexer/language.js +34 -0
  15. package/dist/indexer/language.js.map +1 -0
  16. package/dist/indexer/pipeline/filters/denylist.js +71 -0
  17. package/dist/indexer/pipeline/filters/denylist.js.map +1 -0
  18. package/dist/indexer/schema.js +101 -0
  19. package/dist/indexer/schema.js.map +1 -0
  20. package/dist/package.json +14 -1
  21. package/dist/server/bootstrap.js +19 -0
  22. package/dist/server/bootstrap.js.map +1 -0
  23. package/dist/server/context.js +1 -0
  24. package/dist/server/context.js.map +1 -0
  25. package/dist/server/fallbacks/degradeController.js +69 -0
  26. package/dist/server/fallbacks/degradeController.js.map +1 -0
  27. package/dist/server/handlers.js +1268 -0
  28. package/dist/server/handlers.js.map +1 -0
  29. package/dist/server/main.js +151 -0
  30. package/dist/server/main.js.map +1 -0
  31. package/dist/server/observability/metrics.js +56 -0
  32. package/dist/server/observability/metrics.js.map +1 -0
  33. package/dist/server/observability/tracing.js +58 -0
  34. package/dist/server/observability/tracing.js.map +1 -0
  35. package/dist/server/rpc.js +477 -0
  36. package/dist/server/rpc.js.map +1 -0
  37. package/dist/server/runtime.js +47 -0
  38. package/dist/server/runtime.js.map +1 -0
  39. package/dist/server/scoring.js +116 -0
  40. package/dist/server/scoring.js.map +1 -0
  41. package/dist/server/stdio.js +76 -0
  42. package/dist/server/stdio.js.map +1 -0
  43. package/dist/shared/duckdb.js +119 -0
  44. package/dist/shared/duckdb.js.map +1 -0
  45. package/dist/shared/embedding.js +98 -0
  46. package/dist/shared/embedding.js.map +1 -0
  47. package/dist/shared/index.js +9 -0
  48. package/dist/shared/index.js.map +1 -0
  49. package/dist/shared/security/config.js +64 -0
  50. package/dist/shared/security/config.js.map +1 -0
  51. package/dist/shared/security/masker.js +56 -0
  52. package/dist/shared/security/masker.js.map +1 -0
  53. package/dist/shared/tokenizer.js +4 -0
  54. package/dist/shared/tokenizer.js.map +1 -0
  55. package/dist/shared/utils/simpleYaml.js +89 -0
  56. package/dist/shared/utils/simpleYaml.js.map +1 -0
  57. package/dist/src/client/proxy.js +83 -13
  58. package/dist/src/client/proxy.js.map +1 -1
  59. package/dist/src/client/start-daemon.d.ts.map +1 -1
  60. package/dist/src/client/start-daemon.js +2 -1
  61. package/dist/src/client/start-daemon.js.map +1 -1
  62. package/dist/src/daemon/daemon.js +97 -18
  63. package/dist/src/daemon/daemon.js.map +1 -1
  64. package/dist/src/daemon/socket.d.ts +6 -4
  65. package/dist/src/daemon/socket.d.ts.map +1 -1
  66. package/dist/src/daemon/socket.js +62 -18
  67. package/dist/src/daemon/socket.js.map +1 -1
  68. package/dist/src/indexer/cli.d.ts +1 -0
  69. package/dist/src/indexer/cli.d.ts.map +1 -1
  70. package/dist/src/indexer/cli.js +503 -257
  71. package/dist/src/indexer/cli.js.map +1 -1
  72. package/dist/src/indexer/codeintel.d.ts +1 -1
  73. package/dist/src/indexer/codeintel.d.ts.map +1 -1
  74. package/dist/src/indexer/codeintel.js +296 -3
  75. package/dist/src/indexer/codeintel.js.map +1 -1
  76. package/dist/src/indexer/dart/analyze.d.ts +29 -0
  77. package/dist/src/indexer/dart/analyze.d.ts.map +1 -0
  78. package/dist/src/indexer/dart/analyze.js +452 -0
  79. package/dist/src/indexer/dart/analyze.js.map +1 -0
  80. package/dist/src/indexer/dart/client.d.ts +113 -0
  81. package/dist/src/indexer/dart/client.d.ts.map +1 -0
  82. package/dist/src/indexer/dart/client.js +444 -0
  83. package/dist/src/indexer/dart/client.js.map +1 -0
  84. package/dist/src/indexer/dart/config.d.ts +36 -0
  85. package/dist/src/indexer/dart/config.d.ts.map +1 -0
  86. package/dist/src/indexer/dart/config.js +62 -0
  87. package/dist/src/indexer/dart/config.js.map +1 -0
  88. package/dist/src/indexer/dart/dependencies.d.ts +17 -0
  89. package/dist/src/indexer/dart/dependencies.d.ts.map +1 -0
  90. package/dist/src/indexer/dart/dependencies.js +102 -0
  91. package/dist/src/indexer/dart/dependencies.js.map +1 -0
  92. package/dist/src/indexer/dart/pathKey.d.ts +40 -0
  93. package/dist/src/indexer/dart/pathKey.d.ts.map +1 -0
  94. package/dist/src/indexer/dart/pathKey.js +72 -0
  95. package/dist/src/indexer/dart/pathKey.js.map +1 -0
  96. package/dist/src/indexer/dart/poolGate.d.ts +57 -0
  97. package/dist/src/indexer/dart/poolGate.d.ts.map +1 -0
  98. package/dist/src/indexer/dart/poolGate.js +87 -0
  99. package/dist/src/indexer/dart/poolGate.js.map +1 -0
  100. package/dist/src/indexer/dart/sdk.d.ts +40 -0
  101. package/dist/src/indexer/dart/sdk.d.ts.map +1 -0
  102. package/dist/src/indexer/dart/sdk.js +167 -0
  103. package/dist/src/indexer/dart/sdk.js.map +1 -0
  104. package/dist/src/indexer/dart/transform.d.ts +17 -0
  105. package/dist/src/indexer/dart/transform.d.ts.map +1 -0
  106. package/dist/src/indexer/dart/transform.js +157 -0
  107. package/dist/src/indexer/dart/transform.js.map +1 -0
  108. package/dist/src/indexer/dart/types.d.ts +137 -0
  109. package/dist/src/indexer/dart/types.d.ts.map +1 -0
  110. package/dist/src/indexer/dart/types.js +5 -0
  111. package/dist/src/indexer/dart/types.js.map +1 -0
  112. package/dist/src/indexer/git.d.ts +1 -0
  113. package/dist/src/indexer/git.d.ts.map +1 -1
  114. package/dist/src/indexer/git.js +8 -0
  115. package/dist/src/indexer/git.js.map +1 -1
  116. package/dist/src/indexer/language.d.ts.map +1 -1
  117. package/dist/src/indexer/language.js +1 -0
  118. package/dist/src/indexer/language.js.map +1 -1
  119. package/dist/src/indexer/queue.d.ts +19 -0
  120. package/dist/src/indexer/queue.d.ts.map +1 -0
  121. package/dist/src/indexer/queue.js +50 -0
  122. package/dist/src/indexer/queue.js.map +1 -0
  123. package/dist/src/indexer/schema.d.ts +61 -1
  124. package/dist/src/indexer/schema.d.ts.map +1 -1
  125. package/dist/src/indexer/schema.js +253 -2
  126. package/dist/src/indexer/schema.js.map +1 -1
  127. package/dist/src/indexer/watch.d.ts +21 -0
  128. package/dist/src/indexer/watch.d.ts.map +1 -1
  129. package/dist/src/indexer/watch.js +189 -28
  130. package/dist/src/indexer/watch.js.map +1 -1
  131. package/dist/src/server/abbreviations.d.ts +47 -0
  132. package/dist/src/server/abbreviations.d.ts.map +1 -0
  133. package/dist/src/server/abbreviations.js +71 -0
  134. package/dist/src/server/abbreviations.js.map +1 -0
  135. package/dist/src/server/boost-profiles.d.ts +63 -0
  136. package/dist/src/server/boost-profiles.d.ts.map +1 -0
  137. package/dist/src/server/boost-profiles.js +86 -0
  138. package/dist/src/server/boost-profiles.js.map +1 -0
  139. package/dist/src/server/context.d.ts +7 -0
  140. package/dist/src/server/context.d.ts.map +1 -1
  141. package/dist/src/server/handlers.d.ts +3 -2
  142. package/dist/src/server/handlers.d.ts.map +1 -1
  143. package/dist/src/server/handlers.js +542 -96
  144. package/dist/src/server/handlers.js.map +1 -1
  145. package/dist/src/server/indexBootstrap.d.ts.map +1 -1
  146. package/dist/src/server/indexBootstrap.js +4 -1
  147. package/dist/src/server/indexBootstrap.js.map +1 -1
  148. package/dist/src/server/main.d.ts.map +1 -1
  149. package/dist/src/server/main.js +112 -30
  150. package/dist/src/server/main.js.map +1 -1
  151. package/dist/src/server/rpc.d.ts.map +1 -1
  152. package/dist/src/server/rpc.js +28 -9
  153. package/dist/src/server/rpc.js.map +1 -1
  154. package/dist/src/server/rrf.d.ts +86 -0
  155. package/dist/src/server/rrf.d.ts.map +1 -0
  156. package/dist/src/server/rrf.js +108 -0
  157. package/dist/src/server/rrf.js.map +1 -0
  158. package/dist/src/server/runtime.d.ts.map +1 -1
  159. package/dist/src/server/runtime.js +45 -6
  160. package/dist/src/server/runtime.js.map +1 -1
  161. package/dist/src/server/scoring.d.ts.map +1 -1
  162. package/dist/src/server/scoring.js +19 -0
  163. package/dist/src/server/scoring.js.map +1 -1
  164. package/dist/src/shared/cli/args.d.ts +70 -0
  165. package/dist/src/shared/cli/args.d.ts.map +1 -0
  166. package/dist/src/shared/cli/args.js +84 -0
  167. package/dist/src/shared/cli/args.js.map +1 -0
  168. package/dist/src/shared/duckdb.d.ts.map +1 -1
  169. package/dist/src/shared/duckdb.js +9 -0
  170. package/dist/src/shared/duckdb.js.map +1 -1
  171. package/dist/src/shared/embedding/engine.d.ts +38 -0
  172. package/dist/src/shared/embedding/engine.d.ts.map +1 -0
  173. package/dist/src/shared/embedding/engine.js +6 -0
  174. package/dist/src/shared/embedding/engine.js.map +1 -0
  175. package/dist/src/shared/embedding/lsh-engine.d.ts +11 -0
  176. package/dist/src/shared/embedding/lsh-engine.d.ts.map +1 -0
  177. package/dist/src/shared/embedding/lsh-engine.js +14 -0
  178. package/dist/src/shared/embedding/lsh-engine.js.map +1 -0
  179. package/dist/src/shared/embedding/registry.d.ts +25 -0
  180. package/dist/src/shared/embedding/registry.d.ts.map +1 -0
  181. package/dist/src/shared/embedding/registry.js +50 -0
  182. package/dist/src/shared/embedding/registry.js.map +1 -0
  183. package/dist/src/shared/embedding/semantic-engine.d.ts +14 -0
  184. package/dist/src/shared/embedding/semantic-engine.d.ts.map +1 -0
  185. package/dist/src/shared/embedding/semantic-engine.js +50 -0
  186. package/dist/src/shared/embedding/semantic-engine.js.map +1 -0
  187. package/dist/src/shared/models/model-manager.d.ts +38 -0
  188. package/dist/src/shared/models/model-manager.d.ts.map +1 -0
  189. package/dist/src/shared/models/model-manager.js +116 -0
  190. package/dist/src/shared/models/model-manager.js.map +1 -0
  191. package/dist/src/shared/models/model-manifest.d.ts +22 -0
  192. package/dist/src/shared/models/model-manifest.d.ts.map +1 -0
  193. package/dist/src/shared/models/model-manifest.js +24 -0
  194. package/dist/src/shared/models/model-manifest.js.map +1 -0
  195. package/dist/src/shared/utils/path.d.ts +46 -0
  196. package/dist/src/shared/utils/path.d.ts.map +1 -0
  197. package/dist/src/shared/utils/path.js +94 -0
  198. package/dist/src/shared/utils/path.js.map +1 -0
  199. package/dist/src/shared/utils/socket.d.ts +61 -0
  200. package/dist/src/shared/utils/socket.d.ts.map +1 -0
  201. package/dist/src/shared/utils/socket.js +156 -0
  202. package/dist/src/shared/utils/socket.js.map +1 -0
  203. package/dist/src/shared/utils/validation.d.ts +14 -0
  204. package/dist/src/shared/utils/validation.d.ts.map +1 -0
  205. package/dist/src/shared/utils/validation.js +22 -0
  206. package/dist/src/shared/utils/validation.js.map +1 -0
  207. package/package.json +14 -1
@@ -1,17 +1,54 @@
1
1
  import { createHash } from "node:crypto";
2
+ import { existsSync } from "node:fs";
2
3
  import { readFile, stat } from "node:fs/promises";
3
4
  import { join, resolve, extname } from "node:path";
4
5
  import { pathToFileURL } from "node:url";
5
6
  import { DuckDBClient } from "../shared/duckdb.js";
6
7
  import { generateEmbedding } from "../shared/embedding.js";
8
+ import { acquireLock, releaseLock, LockfileError, getLockOwner } from "../shared/utils/lockfile.js";
9
+ import { normalizeDbPath, ensureDbParentDir, getRepoPathCandidates } from "../shared/utils/path.js";
7
10
  import { analyzeSource, buildFallbackSnippet } from "./codeintel.js";
8
- import { getDefaultBranch, getHeadCommit, gitLsFiles } from "./git.js";
11
+ import { getDefaultBranch, getHeadCommit, gitLsFiles, gitDiffNameOnly } from "./git.js";
9
12
  import { detectLanguage } from "./language.js";
10
- import { ensureBaseSchema } from "./schema.js";
13
+ import { getIndexerQueue } from "./queue.js";
14
+ import { ensureBaseSchema, ensureRepoMetaColumns, rebuildFTSIfNeeded } from "./schema.js";
11
15
  import { IndexWatcher } from "./watch.js";
12
16
  const MAX_SAMPLE_BYTES = 32_768;
13
17
  const MAX_FILE_BYTES = 32 * 1024 * 1024; // 32MB limit to prevent memory exhaustion
14
18
  const SCAN_BATCH_SIZE = 100; // Process files in batches to limit memory usage
19
+ /**
20
+ * Maximum number of SQL placeholders per INSERT statement.
21
+ *
22
+ * DuckDB's internal limit is 65535 placeholders, but we use a conservative value of 30000 for:
23
+ * 1. Safety margin: Prevents stack overflow when building large SQL strings in JavaScript
24
+ * 2. Performance: Smaller batches reduce memory pressure and provide better error granularity
25
+ * 3. Compatibility: Works safely across different DuckDB versions and system configurations
26
+ *
27
+ * This value has been validated with real-world testing:
28
+ * - Successfully handles 10000+ files in batch-processing.spec.ts
29
+ * - Prevents "Maximum call stack size exceeded" errors (Issue #39)
30
+ * - Balances transaction throughput vs. individual batch size
31
+ *
32
+ * Example batch sizes with this limit:
33
+ * - 4-column table (blob): 7500 records per batch
34
+ * - 5-column table (dependency): 6000 records per batch
35
+ * - 9-column table (symbol): 3333 records per batch
36
+ */
37
+ const MAX_SQL_PLACEHOLDERS = 30000;
38
+ /**
39
+ * Calculate safe batch size for SQL INSERT operations based on columns per record.
40
+ * Ensures total placeholders per statement stays under MAX_SQL_PLACEHOLDERS.
41
+ *
42
+ * @param columnsPerRecord - Number of columns in the INSERT statement (must be positive)
43
+ * @returns Safe batch size that won't exceed placeholder limit
44
+ * @throws {Error} If columnsPerRecord is not a positive integer
45
+ */
46
+ function calculateBatchSize(columnsPerRecord) {
47
+ if (columnsPerRecord <= 0 || !Number.isInteger(columnsPerRecord)) {
48
+ throw new Error(`columnsPerRecord must be a positive integer, got: ${columnsPerRecord}`);
49
+ }
50
+ return Math.floor(MAX_SQL_PLACEHOLDERS / columnsPerRecord);
51
+ }
15
52
  function countLines(content) {
16
53
  if (content.length === 0) {
17
54
  return 0;
@@ -35,137 +72,237 @@ function isBinaryBuffer(buffer) {
35
72
  * @param defaultBranch - Default branch name (e.g., "main", "master"), or null if unknown
36
73
  * @returns The repository ID (auto-generated on first insert, reused thereafter)
37
74
  */
38
- async function ensureRepo(db, repoRoot, defaultBranch) {
39
- // Atomically insert or update using ON CONFLICT to leverage auto-increment
40
- // This eliminates the TOCTOU race condition present in manual ID generation
41
- await db.run(`INSERT INTO repo (root, default_branch, indexed_at)
42
- VALUES (?, ?, CURRENT_TIMESTAMP)
43
- ON CONFLICT(root) DO UPDATE SET
44
- default_branch = COALESCE(excluded.default_branch, repo.default_branch)`, [repoRoot, defaultBranch]);
45
- // Fetch the ID of the existing or newly created repo
46
- const rows = await db.all("SELECT id FROM repo WHERE root = ?", [repoRoot]);
75
+ async function mergeLegacyRepoRows(db, canonicalRepoId, legacyRepoIds) {
76
+ if (legacyRepoIds.length === 0) {
77
+ return;
78
+ }
79
+ const referencingTables = await db.all(`SELECT DISTINCT c.table_name
80
+ FROM duckdb_columns() AS c
81
+ JOIN duckdb_tables() AS t
82
+ ON c.database_name = t.database_name
83
+ AND c.schema_name = t.schema_name
84
+ AND c.table_name = t.table_name
85
+ WHERE c.column_name = 'repo_id'
86
+ AND c.table_name <> 'repo'
87
+ AND t.table_type = 'BASE TABLE'`);
88
+ const safeTables = referencingTables
89
+ .map((row) => row.table_name)
90
+ .filter((name) => /^[A-Za-z0-9_]+$/.test(name));
91
+ await db.transaction(async () => {
92
+ for (const legacyRepoId of legacyRepoIds) {
93
+ for (const tableName of safeTables) {
94
+ await db.run(`UPDATE ${tableName} SET repo_id = ? WHERE repo_id = ?`, [
95
+ canonicalRepoId,
96
+ legacyRepoId,
97
+ ]);
98
+ }
99
+ await db.run("DELETE FROM repo WHERE id = ?", [legacyRepoId]);
100
+ }
101
+ });
102
+ }
103
+ async function ensureRepo(db, repoRoot, defaultBranch, candidateRoots) {
104
+ const searchRoots = Array.from(new Set([repoRoot, ...(candidateRoots ?? [])]));
105
+ const placeholders = searchRoots.map(() => "?").join(", ");
106
+ let rows = await db.all(`SELECT id, root FROM repo WHERE root IN (${placeholders})`, searchRoots);
107
+ if (rows.length === 0) {
108
+ await db.run(`INSERT INTO repo (root, default_branch, indexed_at)
109
+ VALUES (?, ?, CURRENT_TIMESTAMP)
110
+ ON CONFLICT(root) DO UPDATE SET
111
+ default_branch = COALESCE(excluded.default_branch, repo.default_branch)`, [repoRoot, defaultBranch]);
112
+ rows = await db.all(`SELECT id, root FROM repo WHERE root IN (${placeholders})`, searchRoots);
113
+ }
47
114
  if (rows.length === 0) {
48
115
  throw new Error("Failed to create or find repository record. Check database constraints and schema.");
49
116
  }
50
- const row = rows[0];
51
- if (!row) {
117
+ let canonicalRow = rows.find((row) => row.root === repoRoot) ?? rows[0];
118
+ if (!canonicalRow) {
52
119
  throw new Error("Failed to retrieve repository record. Database returned empty result.");
53
120
  }
54
- return row.id;
55
- }
56
- async function persistBlobs(db, blobs) {
57
- if (blobs.size === 0)
58
- return;
59
- // Use bulk insert for better performance
60
- const blobArray = Array.from(blobs.values());
61
- const placeholders = blobArray.map(() => "(?, ?, ?, ?)").join(", ");
62
- const sql = `INSERT OR REPLACE INTO blob (hash, size_bytes, line_count, content) VALUES ${placeholders}`;
63
- const params = [];
64
- for (const blob of blobArray) {
65
- params.push(blob.hash, blob.sizeBytes, blob.lineCount, blob.content);
121
+ if (canonicalRow.root !== repoRoot) {
122
+ await db.run("UPDATE repo SET root = ? WHERE id = ?", [repoRoot, canonicalRow.id]);
123
+ canonicalRow = { ...canonicalRow, root: repoRoot };
66
124
  }
67
- await db.run(sql, params);
125
+ const legacyIds = rows.filter((row) => row.id !== canonicalRow.id).map((row) => row.id);
126
+ await mergeLegacyRepoRows(db, canonicalRow.id, legacyIds);
127
+ return canonicalRow.id;
68
128
  }
69
- async function persistTrees(db, repoId, commitHash, records) {
129
+ /**
130
+ * Generic helper function to persist records in batches to prevent stack overflow.
131
+ * Splits large datasets into smaller batches and executes INSERT statements sequentially.
132
+ *
133
+ * IMPORTANT: This function must be called within an active database transaction.
134
+ * See runIndexer() for transaction management context.
135
+ *
136
+ * @param db - Database client (must be within an active transaction)
137
+ * @param records - Array of records to persist
138
+ * @param batchSize - Maximum number of records per INSERT statement
139
+ * @param buildInsert - Function that builds SQL and params for a batch
140
+ */
141
+ async function persistInBatches(db, records, batchSize, buildInsert) {
70
142
  if (records.length === 0)
71
143
  return;
72
- // Use bulk insert for better performance
73
- const placeholders = records.map(() => "(?, ?, ?, ?, ?, ?, ?, ?)").join(", ");
74
- const sql = `INSERT OR REPLACE INTO tree (repo_id, commit_hash, path, blob_hash, ext, lang, is_binary, mtime) VALUES ${placeholders}`;
75
- const params = [];
76
- for (const record of records) {
77
- params.push(repoId, commitHash, record.path, record.blobHash, record.ext, record.lang, record.isBinary, record.mtimeIso);
144
+ for (let i = 0; i < records.length; i += batchSize) {
145
+ const batch = records.slice(i, i + batchSize);
146
+ const { sql, params } = buildInsert(batch);
147
+ try {
148
+ await db.run(sql, params);
149
+ }
150
+ catch (error) {
151
+ // バッチインデックスとサイズを含むエラーメッセージ(0-indexedの正確な範囲)
152
+ const batchInfo = `Batch ${Math.floor(i / batchSize) + 1}/${Math.ceil(records.length / batchSize)} (records ${i}-${i + batch.length - 1})`;
153
+ throw new Error(`Failed to persist batch: ${batchInfo}. Original error: ${error instanceof Error ? error.message : String(error)}`);
154
+ }
78
155
  }
79
- await db.run(sql, params);
80
156
  }
157
+ /**
158
+ * Persist blob records to database in batches to prevent stack overflow.
159
+ *
160
+ * IMPORTANT: This function must be called within an active database transaction.
161
+ * See runIndexer() for transaction management context.
162
+ *
163
+ * @param db - Database client (must be within an active transaction)
164
+ * @param blobs - Map of blob records to persist
165
+ */
166
+ async function persistBlobs(db, blobs) {
167
+ const blobArray = Array.from(blobs.values());
168
+ const BATCH_SIZE = calculateBatchSize(4); // blob table has 4 columns
169
+ await persistInBatches(db, blobArray, BATCH_SIZE, (batch) => ({
170
+ sql: `INSERT OR REPLACE INTO blob (hash, size_bytes, line_count, content) VALUES ${batch.map(() => "(?, ?, ?, ?)").join(", ")}`,
171
+ params: batch.flatMap((blob) => [blob.hash, blob.sizeBytes, blob.lineCount, blob.content]),
172
+ }));
173
+ }
174
+ /**
175
+ * Persist tree records to database in batches to prevent stack overflow.
176
+ *
177
+ * IMPORTANT: This function must be called within an active database transaction.
178
+ * See runIndexer() for transaction management context.
179
+ *
180
+ * @param db - Database client (must be within an active transaction)
181
+ * @param repoId - Repository ID
182
+ * @param commitHash - Git commit hash
183
+ * @param records - File records to persist
184
+ */
185
+ async function persistTrees(db, repoId, commitHash, records) {
186
+ const BATCH_SIZE = calculateBatchSize(8); // tree table has 8 columns
187
+ await persistInBatches(db, records, BATCH_SIZE, (batch) => ({
188
+ sql: `INSERT OR REPLACE INTO tree (repo_id, commit_hash, path, blob_hash, ext, lang, is_binary, mtime) VALUES ${batch.map(() => "(?, ?, ?, ?, ?, ?, ?, ?)").join(", ")}`,
189
+ params: batch.flatMap((record) => [
190
+ repoId,
191
+ commitHash,
192
+ record.path,
193
+ record.blobHash,
194
+ record.ext,
195
+ record.lang,
196
+ record.isBinary,
197
+ record.mtimeIso,
198
+ ]),
199
+ }));
200
+ }
201
+ /**
202
+ * Persist file records to database in batches to prevent stack overflow.
203
+ *
204
+ * IMPORTANT: This function must be called within an active database transaction.
205
+ * See runIndexer() for transaction management context.
206
+ *
207
+ * @param db - Database client (must be within an active transaction)
208
+ * @param repoId - Repository ID
209
+ * @param records - File records to persist
210
+ */
81
211
  async function persistFiles(db, repoId, records) {
82
- if (records.length === 0)
83
- return;
84
- // Use bulk insert for better performance
85
- const placeholders = records.map(() => "(?, ?, ?, ?, ?, ?, ?)").join(", ");
86
- const sql = `INSERT OR REPLACE INTO file (repo_id, path, blob_hash, ext, lang, is_binary, mtime) VALUES ${placeholders}`;
87
- const params = [];
88
- for (const record of records) {
89
- params.push(repoId, record.path, record.blobHash, record.ext, record.lang, record.isBinary, record.mtimeIso);
90
- }
91
- await db.run(sql, params);
212
+ const BATCH_SIZE = calculateBatchSize(7); // file table has 7 columns
213
+ await persistInBatches(db, records, BATCH_SIZE, (batch) => ({
214
+ sql: `INSERT OR REPLACE INTO file (repo_id, path, blob_hash, ext, lang, is_binary, mtime) VALUES ${batch.map(() => "(?, ?, ?, ?, ?, ?, ?)").join(", ")}`,
215
+ params: batch.flatMap((record) => [
216
+ repoId,
217
+ record.path,
218
+ record.blobHash,
219
+ record.ext,
220
+ record.lang,
221
+ record.isBinary,
222
+ record.mtimeIso,
223
+ ]),
224
+ }));
92
225
  }
226
+ /**
227
+ * Persist symbol records to database in batches to prevent stack overflow.
228
+ *
229
+ * IMPORTANT: This function must be called within an active database transaction.
230
+ * See runIndexer() for transaction management context.
231
+ *
232
+ * @param db - Database client (must be within an active transaction)
233
+ * @param repoId - Repository ID
234
+ * @param records - Symbol records to persist
235
+ */
93
236
  async function persistSymbols(db, repoId, records) {
94
- if (records.length === 0)
95
- return;
96
- // バッチサイズを1000に制限してスタックオーバーフローを防ぐ
97
- const BATCH_SIZE = 1000;
98
- for (let i = 0; i < records.length; i += BATCH_SIZE) {
99
- const batch = records.slice(i, i + BATCH_SIZE);
100
- const placeholders = batch.map(() => "(?, ?, ?, ?, ?, ?, ?, ?, ?)").join(", ");
101
- const sql = `
102
- INSERT OR REPLACE INTO symbol (
103
- repo_id, path, symbol_id, name, kind, range_start_line, range_end_line, signature, doc
104
- ) VALUES ${placeholders}
105
- `;
106
- const params = [];
107
- for (const record of batch) {
108
- params.push(repoId, record.path, record.symbolId, record.name, record.kind, record.rangeStartLine, record.rangeEndLine, record.signature, record.doc);
109
- }
110
- await db.run(sql, params);
111
- }
237
+ const BATCH_SIZE = calculateBatchSize(9); // symbol table has 9 columns
238
+ await persistInBatches(db, records, BATCH_SIZE, (batch) => ({
239
+ sql: `INSERT OR REPLACE INTO symbol (repo_id, path, symbol_id, name, kind, range_start_line, range_end_line, signature, doc) VALUES ${batch.map(() => "(?, ?, ?, ?, ?, ?, ?, ?, ?)").join(", ")}`,
240
+ params: batch.flatMap((r) => [
241
+ repoId,
242
+ r.path,
243
+ r.symbolId,
244
+ r.name,
245
+ r.kind,
246
+ r.rangeStartLine,
247
+ r.rangeEndLine,
248
+ r.signature,
249
+ r.doc,
250
+ ]),
251
+ }));
112
252
  }
253
+ /**
254
+ * Persist snippet records to database in batches to prevent stack overflow.
255
+ *
256
+ * IMPORTANT: This function must be called within an active database transaction.
257
+ * See runIndexer() for transaction management context.
258
+ *
259
+ * @param db - Database client (must be within an active transaction)
260
+ * @param repoId - Repository ID
261
+ * @param records - Snippet records to persist
262
+ */
113
263
  async function persistSnippets(db, repoId, records) {
114
- if (records.length === 0)
115
- return;
116
- // バッチサイズを1000に制限してスタックオーバーフローを防ぐ
117
- const BATCH_SIZE = 1000;
118
- for (let i = 0; i < records.length; i += BATCH_SIZE) {
119
- const batch = records.slice(i, i + BATCH_SIZE);
120
- const placeholders = batch.map(() => "(?, ?, ?, ?, ?, ?)").join(", ");
121
- const sql = `
122
- INSERT OR REPLACE INTO snippet (
123
- repo_id, path, snippet_id, start_line, end_line, symbol_id
124
- ) VALUES ${placeholders}
125
- `;
126
- const params = [];
127
- for (const record of batch) {
128
- params.push(repoId, record.path, record.snippetId, record.startLine, record.endLine, record.symbolId);
129
- }
130
- await db.run(sql, params);
131
- }
264
+ const BATCH_SIZE = calculateBatchSize(6); // snippet table has 6 columns
265
+ await persistInBatches(db, records, BATCH_SIZE, (batch) => ({
266
+ sql: `INSERT OR REPLACE INTO snippet (repo_id, path, snippet_id, start_line, end_line, symbol_id) VALUES ${batch.map(() => "(?, ?, ?, ?, ?, ?)").join(", ")}`,
267
+ params: batch.flatMap((r) => [repoId, r.path, r.snippetId, r.startLine, r.endLine, r.symbolId]),
268
+ }));
132
269
  }
270
+ /**
271
+ * Persist file dependency records to database in batches to prevent stack overflow.
272
+ *
273
+ * MUST be called within a transaction.
274
+ * Batch size is dynamically calculated based on MAX_SQL_PLACEHOLDERS.
275
+ */
133
276
  async function persistDependencies(db, repoId, records) {
134
- if (records.length === 0)
135
- return;
136
- // バッチサイズを1000に制限してスタックオーバーフローを防ぐ
137
- const BATCH_SIZE = 1000;
138
- for (let i = 0; i < records.length; i += BATCH_SIZE) {
139
- const batch = records.slice(i, i + BATCH_SIZE);
140
- const placeholders = batch.map(() => "(?, ?, ?, ?, ?)").join(", ");
141
- const sql = `
142
- INSERT OR REPLACE INTO dependency (
143
- repo_id, src_path, dst_kind, dst, rel
144
- ) VALUES ${placeholders}
145
- `;
146
- const params = [];
147
- for (const record of batch) {
148
- params.push(repoId, record.srcPath, record.dstKind, record.dst, record.rel);
149
- }
150
- await db.run(sql, params);
151
- }
277
+ const BATCH_SIZE = calculateBatchSize(5); // dependency table has 5 columns
278
+ await persistInBatches(db, records, BATCH_SIZE, (batch) => ({
279
+ sql: `INSERT OR REPLACE INTO dependency (repo_id, src_path, dst_kind, dst, rel) VALUES ${batch.map(() => "(?, ?, ?, ?, ?)").join(", ")}`,
280
+ params: batch.flatMap((r) => [repoId, r.srcPath, r.dstKind, r.dst, r.rel]),
281
+ }));
152
282
  }
283
+ /**
284
+ * Persist file embedding records to database in batches to prevent stack overflow.
285
+ *
286
+ * IMPORTANT: This function must be called within an active database transaction.
287
+ * See runIndexer() for transaction management context.
288
+ *
289
+ * @param db - Database client (must be within an active transaction)
290
+ * @param repoId - Repository ID
291
+ * @param records - Embedding records to persist
292
+ */
153
293
  async function persistEmbeddings(db, repoId, records) {
154
- if (records.length === 0)
155
- return;
156
- const placeholders = records.map(() => "(?, ?, ?, ?, CURRENT_TIMESTAMP)").join(", ");
157
- const sql = `
158
- INSERT OR REPLACE INTO file_embedding (
159
- repo_id, path, dims, vector_json, updated_at
160
- ) VALUES ${placeholders}
161
- `;
162
- const params = [];
163
- for (const record of records) {
164
- params.push(repoId, record.path, record.dims, JSON.stringify(record.vector));
165
- }
166
- await db.run(sql, params);
294
+ const BATCH_SIZE = calculateBatchSize(4); // file_embedding table has 4 parameterized columns
295
+ await persistInBatches(db, records, BATCH_SIZE, (batch) => ({
296
+ sql: `INSERT OR REPLACE INTO file_embedding (repo_id, path, dims, vector_json, updated_at) VALUES ${batch.map(() => "(?, ?, ?, ?, CURRENT_TIMESTAMP)").join(", ")}`,
297
+ params: batch.flatMap((record) => [
298
+ repoId,
299
+ record.path,
300
+ record.dims,
301
+ JSON.stringify(record.vector),
302
+ ]),
303
+ }));
167
304
  }
168
- function buildCodeIntel(files, blobs) {
305
+ async function buildCodeIntel(files, blobs, workspaceRoot) {
169
306
  const fileSet = new Set(files.map((file) => file.path));
170
307
  const symbols = [];
171
308
  const snippets = [];
@@ -178,7 +315,7 @@ function buildCodeIntel(files, blobs) {
178
315
  if (!blob || blob.content === null) {
179
316
  continue;
180
317
  }
181
- const analysis = analyzeSource(file.path, file.lang, blob.content, fileSet);
318
+ const analysis = await analyzeSource(file.path, file.lang, blob.content, fileSet, workspaceRoot);
182
319
  for (const symbol of analysis.symbols) {
183
320
  symbols.push({
184
321
  path: file.path,
@@ -234,9 +371,10 @@ async function scanFilesInBatches(repoRoot, paths) {
234
371
  const allBlobs = new Map();
235
372
  const allFiles = [];
236
373
  const allEmbeddings = [];
374
+ const allMissingPaths = [];
237
375
  for (let i = 0; i < paths.length; i += SCAN_BATCH_SIZE) {
238
376
  const batch = paths.slice(i, i + SCAN_BATCH_SIZE);
239
- const { blobs, files, embeddings } = await scanFiles(repoRoot, batch);
377
+ const { blobs, files, embeddings, missingPaths } = await scanFiles(repoRoot, batch);
240
378
  // マージ: blobはhashでユニークなので重複排除
241
379
  for (const [hash, blob] of blobs) {
242
380
  if (!allBlobs.has(hash)) {
@@ -245,15 +383,22 @@ async function scanFilesInBatches(repoRoot, paths) {
245
383
  }
246
384
  allFiles.push(...files);
247
385
  allEmbeddings.push(...embeddings);
386
+ allMissingPaths.push(...missingPaths);
248
387
  // バッチデータを明示的にクリアしてGCを促す
249
388
  blobs.clear();
250
389
  }
251
- return { blobs: allBlobs, files: allFiles, embeddings: allEmbeddings };
390
+ return {
391
+ blobs: allBlobs,
392
+ files: allFiles,
393
+ embeddings: allEmbeddings,
394
+ missingPaths: allMissingPaths,
395
+ };
252
396
  }
253
397
  async function scanFiles(repoRoot, paths) {
254
398
  const blobs = new Map();
255
399
  const files = [];
256
400
  const embeddings = [];
401
+ const missingPaths = [];
257
402
  for (const relativePath of paths) {
258
403
  const absolutePath = join(repoRoot, relativePath);
259
404
  try {
@@ -302,11 +447,17 @@ async function scanFiles(repoRoot, paths) {
302
447
  }
303
448
  }
304
449
  catch (error) {
450
+ // Fix #4: Track deleted files (ENOENT) for database cleanup
451
+ if (error.code === "ENOENT") {
452
+ missingPaths.push(relativePath);
453
+ continue;
454
+ }
455
+ // Other errors (permissions, etc.) - log and skip
305
456
  console.warn(`Cannot read ${relativePath} due to filesystem error. Fix file permissions or remove the file.`);
306
457
  console.warn(error);
307
458
  }
308
459
  }
309
- return { blobs, files, embeddings };
460
+ return { blobs, files, embeddings, missingPaths };
310
461
  }
311
462
  /**
312
463
  * 既存のファイルハッシュをDBから取得する(インクリメンタルインデックス用)
@@ -383,161 +534,242 @@ async function deleteFileRecords(db, repoId, headCommit, path) {
383
534
  await db.run("DELETE FROM file WHERE repo_id = ? AND path = ?", [repoId, path]);
384
535
  }
385
536
  export async function runIndexer(options) {
386
- const repoRoot = resolve(options.repoRoot);
387
- const databasePath = resolve(options.databasePath);
388
- const db = await DuckDBClient.connect({ databasePath, ensureDirectory: true });
389
- try {
390
- await ensureBaseSchema(db);
391
- const [headCommit, defaultBranch] = await Promise.all([
392
- getHeadCommit(repoRoot),
393
- getDefaultBranch(repoRoot),
394
- ]);
395
- const repoId = await ensureRepo(db, repoRoot, defaultBranch);
396
- // Incremental mode: only reindex files in changedPaths
397
- if (options.changedPaths && options.changedPaths.length > 0) {
398
- // First, reconcile deleted files (handle renames/deletions)
399
- const deletedPaths = await reconcileDeletedFiles(db, repoId, repoRoot);
400
- if (deletedPaths.length > 0) {
401
- console.info(`Removed ${deletedPaths.length} deleted file(s) from index.`);
537
+ const repoPathCandidates = getRepoPathCandidates(options.repoRoot);
538
+ const repoRoot = repoPathCandidates[0];
539
+ if (!repoRoot) {
540
+ throw new Error(`Unable to resolve repository root for ${options.repoRoot}`);
541
+ }
542
+ let databasePath;
543
+ // Fix #2: Ensure parent directory exists BEFORE normalization
544
+ // This guarantees consistent path normalization on first and subsequent runs
545
+ await ensureDbParentDir(options.databasePath);
546
+ // Critical: Use normalizeDbPath to ensure consistent path across runs
547
+ // This prevents lock file and queue key bypass when DB is accessed via symlink
548
+ databasePath = normalizeDbPath(options.databasePath);
549
+ // DuckDB single-writer制約対応: 同じdatabasePathへの並列書き込みを防ぐため、
550
+ // databasePathごとのキューで直列化する
551
+ return getIndexerQueue(databasePath).add(async () => {
552
+ // Fix #1 & #2: Add file lock for multi-process safety (unless caller already holds lock)
553
+ const lockfilePath = `${databasePath}.lock`;
554
+ let lockAcquired = false;
555
+ if (!options.skipLocking) {
556
+ try {
557
+ acquireLock(lockfilePath);
558
+ lockAcquired = true;
402
559
  }
403
- const existingHashes = await getExistingFileHashes(db, repoId);
404
- const { blobs, files, embeddings } = await scanFilesInBatches(repoRoot, options.changedPaths);
405
- // Filter out files that haven't actually changed (same hash)
406
- const changedFiles = [];
407
- const changedBlobs = new Map();
408
- for (const file of files) {
409
- const existingHash = existingHashes.get(file.path);
410
- if (existingHash !== file.blobHash) {
411
- changedFiles.push(file);
412
- const blob = blobs.get(file.blobHash);
413
- if (blob) {
414
- changedBlobs.set(blob.hash, blob);
415
- }
560
+ catch (error) {
561
+ if (error instanceof LockfileError) {
562
+ const ownerPid = error.ownerPid ?? getLockOwner(lockfilePath);
563
+ const ownerInfo = ownerPid ? ` (PID: ${ownerPid})` : "";
564
+ throw new Error(`Another indexing process${ownerInfo} holds the lock for ${databasePath}. Please wait for it to complete.`);
416
565
  }
566
+ throw error;
417
567
  }
418
- if (changedFiles.length === 0) {
419
- console.info(`No actual changes detected in ${options.changedPaths.length} file(s). Skipping reindex.`);
420
- // Still update timestamp to indicate we checked
421
- if (defaultBranch) {
422
- await db.run("UPDATE repo SET indexed_at = CURRENT_TIMESTAMP, default_branch = ? WHERE id = ?", [defaultBranch, repoId]);
568
+ }
569
+ let db = null;
570
+ try {
571
+ const dbClient = await DuckDBClient.connect({ databasePath, ensureDirectory: true });
572
+ db = dbClient;
573
+ await ensureBaseSchema(dbClient);
574
+ // Phase 3: Ensure FTS metadata columns exist for existing DBs (migration)
575
+ await ensureRepoMetaColumns(dbClient);
576
+ const [headCommit, defaultBranch] = await Promise.all([
577
+ getHeadCommit(repoRoot),
578
+ getDefaultBranch(repoRoot),
579
+ ]);
580
+ const repoId = await ensureRepo(dbClient, repoRoot, defaultBranch, repoPathCandidates);
581
+ // Incremental mode: only reindex files in changedPaths (empty array means no-op)
582
+ if (options.changedPaths) {
583
+ // First, reconcile deleted files (handle renames/deletions)
584
+ const deletedPaths = await reconcileDeletedFiles(dbClient, repoId, repoRoot);
585
+ if (deletedPaths.length > 0) {
586
+ console.info(`Removed ${deletedPaths.length} deleted file(s) from index.`);
423
587
  }
424
- else {
425
- await db.run("UPDATE repo SET indexed_at = CURRENT_TIMESTAMP WHERE id = ?", [repoId]);
588
+ const existingHashes = await getExistingFileHashes(dbClient, repoId);
589
+ const { blobs, files, embeddings, missingPaths } = await scanFilesInBatches(repoRoot, options.changedPaths);
590
+ // Filter out files that haven't actually changed (same hash)
591
+ const changedFiles = [];
592
+ const changedBlobs = new Map();
593
+ for (const file of files) {
594
+ const existingHash = existingHashes.get(file.path);
595
+ if (existingHash !== file.blobHash) {
596
+ changedFiles.push(file);
597
+ const blob = blobs.get(file.blobHash);
598
+ if (blob) {
599
+ changedBlobs.set(blob.hash, blob);
600
+ }
601
+ }
426
602
  }
427
- return;
428
- }
429
- // Process all changed files in a single transaction for atomicity
430
- const fileSet = new Set(files.map((f) => f.path));
431
- let processedCount = 0;
432
- await db.transaction(async () => {
433
- for (const file of changedFiles) {
434
- const blob = changedBlobs.get(file.blobHash);
435
- if (!blob)
436
- continue;
437
- // Build code intelligence for this file
438
- const fileSymbols = [];
439
- const fileSnippets = [];
440
- const fileDependencies = [];
441
- if (!file.isBinary && blob.content) {
442
- const analysis = analyzeSource(file.path, file.lang, blob.content, fileSet);
443
- for (const symbol of analysis.symbols) {
444
- fileSymbols.push({
445
- path: file.path,
446
- symbolId: symbol.symbolId,
447
- name: symbol.name,
448
- kind: symbol.kind,
449
- rangeStartLine: symbol.rangeStartLine,
450
- rangeEndLine: symbol.rangeEndLine,
451
- signature: symbol.signature,
452
- doc: symbol.doc,
453
- });
603
+ if (changedFiles.length === 0 && missingPaths.length === 0) {
604
+ console.info(`No actual changes detected in ${options.changedPaths.length} file(s). Skipping reindex.`);
605
+ // Fix #3 & #4: If files were deleted (git or watch mode), still need to dirty FTS and rebuild
606
+ if (deletedPaths.length > 0) {
607
+ console.info(`${deletedPaths.length} file(s) deleted (git) - marking FTS dirty`);
608
+ if (defaultBranch) {
609
+ await dbClient.run("UPDATE repo SET indexed_at = CURRENT_TIMESTAMP, default_branch = ?, fts_dirty = true, fts_generation = fts_generation + 1 WHERE id = ?", [defaultBranch, repoId]);
454
610
  }
455
- for (const snippet of analysis.snippets) {
611
+ else {
612
+ await dbClient.run("UPDATE repo SET indexed_at = CURRENT_TIMESTAMP, fts_dirty = true, fts_generation = fts_generation + 1 WHERE id = ?", [repoId]);
613
+ }
614
+ await rebuildFTSIfNeeded(dbClient, repoId);
615
+ }
616
+ else {
617
+ // No deletions either - just update timestamp
618
+ if (defaultBranch) {
619
+ await dbClient.run("UPDATE repo SET indexed_at = CURRENT_TIMESTAMP, default_branch = ? WHERE id = ?", [defaultBranch, repoId]);
620
+ }
621
+ else {
622
+ await dbClient.run("UPDATE repo SET indexed_at = CURRENT_TIMESTAMP WHERE id = ?", [
623
+ repoId,
624
+ ]);
625
+ }
626
+ }
627
+ return;
628
+ }
629
+ // Process all changed files in a single transaction for atomicity
630
+ const fileSet = new Set(files.map((f) => f.path));
631
+ const embeddingMap = new Map();
632
+ for (const embedding of embeddings) {
633
+ embeddingMap.set(embedding.path, embedding);
634
+ }
635
+ let processedCount = 0;
636
+ await dbClient.transaction(async () => {
637
+ // Fix #5: Handle deleted files from watch mode (uncommitted deletions) INSIDE transaction
638
+ // This ensures deletion + FTS dirty flag update are atomic
639
+ if (missingPaths.length > 0) {
640
+ // Loop through each missing file and delete with headCommit
641
+ for (const path of missingPaths) {
642
+ await deleteFileRecords(dbClient, repoId, headCommit, path);
643
+ }
644
+ console.info(`Removed ${missingPaths.length} missing file(s) from index (watch mode deletion).`);
645
+ }
646
+ // Process changed files
647
+ for (const file of changedFiles) {
648
+ const blob = changedBlobs.get(file.blobHash);
649
+ if (!blob)
650
+ continue;
651
+ // Build code intelligence for this file
652
+ const fileSymbols = [];
653
+ const fileSnippets = [];
654
+ const fileDependencies = [];
655
+ if (!file.isBinary && blob.content) {
656
+ const analysis = await analyzeSource(file.path, file.lang, blob.content, fileSet, repoRoot);
657
+ for (const symbol of analysis.symbols) {
658
+ fileSymbols.push({
659
+ path: file.path,
660
+ symbolId: symbol.symbolId,
661
+ name: symbol.name,
662
+ kind: symbol.kind,
663
+ rangeStartLine: symbol.rangeStartLine,
664
+ rangeEndLine: symbol.rangeEndLine,
665
+ signature: symbol.signature,
666
+ doc: symbol.doc,
667
+ });
668
+ }
669
+ for (const snippet of analysis.snippets) {
670
+ fileSnippets.push({
671
+ path: file.path,
672
+ snippetId: snippet.startLine,
673
+ startLine: snippet.startLine,
674
+ endLine: snippet.endLine,
675
+ symbolId: snippet.symbolId,
676
+ });
677
+ }
678
+ for (const dep of analysis.dependencies) {
679
+ fileDependencies.push({
680
+ srcPath: file.path,
681
+ dstKind: dep.dstKind,
682
+ dst: dep.dst,
683
+ rel: dep.rel,
684
+ });
685
+ }
686
+ }
687
+ else {
688
+ // Binary or no content: add fallback snippet
689
+ const fallback = buildFallbackSnippet(blob.lineCount ?? 1);
456
690
  fileSnippets.push({
457
691
  path: file.path,
458
- snippetId: snippet.startLine,
459
- startLine: snippet.startLine,
460
- endLine: snippet.endLine,
461
- symbolId: snippet.symbolId,
692
+ snippetId: fallback.startLine,
693
+ startLine: fallback.startLine,
694
+ endLine: fallback.endLine,
695
+ symbolId: fallback.symbolId,
462
696
  });
463
697
  }
464
- for (const dep of analysis.dependencies) {
465
- fileDependencies.push({
466
- srcPath: file.path,
467
- dstKind: dep.dstKind,
468
- dst: dep.dst,
469
- rel: dep.rel,
470
- });
698
+ const fileEmbedding = embeddingMap.get(file.path) ?? null;
699
+ // Delete old records for this file (within main transaction)
700
+ await deleteFileRecords(dbClient, repoId, headCommit, file.path);
701
+ // Insert new records (within main transaction)
702
+ await persistBlobs(dbClient, new Map([[blob.hash, blob]]));
703
+ await persistTrees(dbClient, repoId, headCommit, [file]);
704
+ await persistFiles(dbClient, repoId, [file]);
705
+ await persistSymbols(dbClient, repoId, fileSymbols);
706
+ await persistSnippets(dbClient, repoId, fileSnippets);
707
+ await persistDependencies(dbClient, repoId, fileDependencies);
708
+ if (fileEmbedding) {
709
+ await persistEmbeddings(dbClient, repoId, [fileEmbedding]);
471
710
  }
711
+ processedCount++;
472
712
  }
473
- else {
474
- // Binary or no content: add fallback snippet
475
- const fallback = buildFallbackSnippet(blob.lineCount ?? 1);
476
- fileSnippets.push({
477
- path: file.path,
478
- snippetId: fallback.startLine,
479
- startLine: fallback.startLine,
480
- endLine: fallback.endLine,
481
- symbolId: fallback.symbolId,
482
- });
713
+ // Update timestamp and mark FTS dirty inside transaction for atomicity
714
+ // Fix: Increment fts_generation to prevent lost updates during concurrent rebuilds
715
+ if (defaultBranch) {
716
+ await dbClient.run("UPDATE repo SET indexed_at = CURRENT_TIMESTAMP, default_branch = ?, fts_dirty = true, fts_generation = fts_generation + 1 WHERE id = ?", [defaultBranch, repoId]);
483
717
  }
484
- const fileEmbedding = embeddings.find((e) => e.path === file.path) ?? null;
485
- // Delete old records for this file (within main transaction)
486
- await deleteFileRecords(db, repoId, headCommit, file.path);
487
- // Insert new records (within main transaction)
488
- await persistBlobs(db, new Map([[blob.hash, blob]]));
489
- await persistTrees(db, repoId, headCommit, [file]);
490
- await persistFiles(db, repoId, [file]);
491
- await persistSymbols(db, repoId, fileSymbols);
492
- await persistSnippets(db, repoId, fileSnippets);
493
- await persistDependencies(db, repoId, fileDependencies);
494
- if (fileEmbedding) {
495
- await persistEmbeddings(db, repoId, [fileEmbedding]);
718
+ else {
719
+ await dbClient.run("UPDATE repo SET indexed_at = CURRENT_TIMESTAMP, fts_dirty = true, fts_generation = fts_generation + 1 WHERE id = ?", [repoId]);
496
720
  }
497
- processedCount++;
498
- }
499
- // Update timestamp inside main transaction
721
+ });
722
+ console.info(`Incrementally indexed ${processedCount} changed file(s) for repo ${repoRoot} at ${databasePath} (commit ${headCommit.slice(0, 12)})`);
723
+ // Phase 2+3: Rebuild FTS index after incremental updates (dirty=true triggers rebuild)
724
+ await rebuildFTSIfNeeded(dbClient, repoId);
725
+ return;
726
+ }
727
+ // Full mode: reindex entire repository
728
+ const paths = await gitLsFiles(repoRoot);
729
+ const { blobs, files, embeddings, missingPaths } = await scanFilesInBatches(repoRoot, paths);
730
+ // In full mode, missingPaths should be rare (git ls-files returns existing files)
731
+ // But log them if they occur (race condition: file deleted between ls-files and scan)
732
+ if (missingPaths.length > 0) {
733
+ console.warn(`${missingPaths.length} file(s) disappeared during full reindex (race condition)`);
734
+ }
735
+ const codeIntel = await buildCodeIntel(files, blobs, repoRoot);
736
+ await dbClient.transaction(async () => {
737
+ await dbClient.run("DELETE FROM tree WHERE repo_id = ?", [repoId]);
738
+ await dbClient.run("DELETE FROM file WHERE repo_id = ?", [repoId]);
739
+ await dbClient.run("DELETE FROM symbol WHERE repo_id = ?", [repoId]);
740
+ await dbClient.run("DELETE FROM snippet WHERE repo_id = ?", [repoId]);
741
+ await dbClient.run("DELETE FROM dependency WHERE repo_id = ?", [repoId]);
742
+ await dbClient.run("DELETE FROM file_embedding WHERE repo_id = ?", [repoId]);
743
+ await persistBlobs(dbClient, blobs);
744
+ await persistTrees(dbClient, repoId, headCommit, files);
745
+ await persistFiles(dbClient, repoId, files);
746
+ await persistSymbols(dbClient, repoId, codeIntel.symbols);
747
+ await persistSnippets(dbClient, repoId, codeIntel.snippets);
748
+ await persistDependencies(dbClient, repoId, codeIntel.dependencies);
749
+ await persistEmbeddings(dbClient, repoId, embeddings);
750
+ // Update timestamp and mark FTS dirty inside transaction to ensure atomicity
751
+ // Fix: Increment fts_generation to prevent lost updates during concurrent rebuilds
500
752
  if (defaultBranch) {
501
- await db.run("UPDATE repo SET indexed_at = CURRENT_TIMESTAMP, default_branch = ? WHERE id = ?", [defaultBranch, repoId]);
753
+ await dbClient.run("UPDATE repo SET indexed_at = CURRENT_TIMESTAMP, default_branch = ?, fts_dirty = true, fts_generation = fts_generation + 1 WHERE id = ?", [defaultBranch, repoId]);
502
754
  }
503
755
  else {
504
- await db.run("UPDATE repo SET indexed_at = CURRENT_TIMESTAMP WHERE id = ?", [repoId]);
756
+ await dbClient.run("UPDATE repo SET indexed_at = CURRENT_TIMESTAMP, fts_dirty = true, fts_generation = fts_generation + 1 WHERE id = ?", [repoId]);
505
757
  }
506
758
  });
507
- console.info(`Incrementally indexed ${processedCount} changed file(s) for repo ${repoRoot} at ${databasePath} (commit ${headCommit.slice(0, 12)})`);
508
- return;
759
+ console.info(`Indexed ${files.length} files for repo ${repoRoot} at ${databasePath} (commit ${headCommit.slice(0, 12)})`);
760
+ // Phase 2+3: Force rebuild FTS index after full reindex
761
+ await rebuildFTSIfNeeded(dbClient, repoId, true);
509
762
  }
510
- // Full mode: reindex entire repository
511
- const paths = await gitLsFiles(repoRoot);
512
- const { blobs, files, embeddings } = await scanFilesInBatches(repoRoot, paths);
513
- const codeIntel = buildCodeIntel(files, blobs);
514
- await db.transaction(async () => {
515
- await db.run("DELETE FROM tree WHERE repo_id = ?", [repoId]);
516
- await db.run("DELETE FROM file WHERE repo_id = ?", [repoId]);
517
- await db.run("DELETE FROM symbol WHERE repo_id = ?", [repoId]);
518
- await db.run("DELETE FROM snippet WHERE repo_id = ?", [repoId]);
519
- await db.run("DELETE FROM dependency WHERE repo_id = ?", [repoId]);
520
- await db.run("DELETE FROM file_embedding WHERE repo_id = ?", [repoId]);
521
- await persistBlobs(db, blobs);
522
- await persistTrees(db, repoId, headCommit, files);
523
- await persistFiles(db, repoId, files);
524
- await persistSymbols(db, repoId, codeIntel.symbols);
525
- await persistSnippets(db, repoId, codeIntel.snippets);
526
- await persistDependencies(db, repoId, codeIntel.dependencies);
527
- await persistEmbeddings(db, repoId, embeddings);
528
- // Update timestamp inside transaction to ensure atomicity
529
- if (defaultBranch) {
530
- await db.run("UPDATE repo SET indexed_at = CURRENT_TIMESTAMP, default_branch = ? WHERE id = ?", [defaultBranch, repoId]);
763
+ finally {
764
+ // Fix #2: Ensure lock is released even if DB connection fails
765
+ if (db) {
766
+ await db.close();
531
767
  }
532
- else {
533
- await db.run("UPDATE repo SET indexed_at = CURRENT_TIMESTAMP WHERE id = ?", [repoId]);
768
+ if (lockAcquired) {
769
+ releaseLock(lockfilePath);
534
770
  }
535
- });
536
- console.info(`Indexed ${files.length} files for repo ${repoRoot} at ${databasePath} (commit ${headCommit.slice(0, 12)})`);
537
- }
538
- finally {
539
- await db.close();
540
- }
771
+ }
772
+ });
541
773
  }
542
774
  function parseArg(flag) {
543
775
  const index = process.argv.indexOf(flag);
@@ -554,12 +786,26 @@ if (import.meta.url === pathToFileURL(process.argv[1] ?? "").href) {
554
786
  const watch = process.argv.includes("--watch");
555
787
  const debounceMs = parseInt(parseArg("--debounce") ?? "500", 10);
556
788
  const options = { repoRoot, databasePath, full: full || !since };
557
- if (since) {
558
- options.since = since;
559
- }
560
- // Run initial indexing
561
- runIndexer(options)
562
- .then(async () => {
789
+ const main = async () => {
790
+ if (since) {
791
+ options.since = since;
792
+ if (!options.full) {
793
+ const diffPaths = await gitDiffNameOnly(repoRoot, since);
794
+ options.changedPaths = diffPaths;
795
+ if (diffPaths.length === 0) {
796
+ console.info(`No tracked changes since ${since}. Skipping incremental scan.`);
797
+ }
798
+ }
799
+ }
800
+ const dbMissing = !existsSync(databasePath);
801
+ const shouldIndex = options.full || !options.changedPaths || options.changedPaths.length > 0 || dbMissing;
802
+ if (shouldIndex) {
803
+ await runIndexer(options);
804
+ }
805
+ else {
806
+ // No diff results and not running full indexing: keep metadata fresh without DB writes
807
+ console.info("No files to reindex. Database remains unchanged.");
808
+ }
563
809
  if (watch) {
564
810
  // Start watch mode after initial indexing completes
565
811
  const abortController = new AbortController();
@@ -578,8 +824,8 @@ if (import.meta.url === pathToFileURL(process.argv[1] ?? "").href) {
578
824
  process.on("SIGTERM", shutdownHandler);
579
825
  await watcher.start();
580
826
  }
581
- })
582
- .catch((error) => {
827
+ };
828
+ main().catch((error) => {
583
829
  console.error("Failed to index repository. Retry after resolving the logged error.");
584
830
  console.error(error);
585
831
  process.exitCode = 1;