@pratik7368patil/anchor-core 0.1.29 → 0.1.31

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -175,8 +175,11 @@ function canonicalizeText(text) {
175
175
  return text.toLowerCase().replace(/https?:\/\/\S+/g, "").replace(/[^a-z0-9_./ -]/g, " ").replace(/\s+/g, " ").trim();
176
176
  }
177
177
  function tokenizeSearchText(text, maxTokens = 32) {
178
- const tokens = text.toLowerCase().match(/[a-z0-9_./-]{3,}/g);
179
- return uniqueStrings(tokens ?? []).slice(0, maxTokens);
178
+ const shortSignalTokens = /* @__PURE__ */ new Set(["id", "db", "api", "key", "sql", "jwt", "ui", "ux"]);
179
+ const tokens = text.toLowerCase().match(/[a-z0-9_./-]{2,}/g);
180
+ return uniqueStrings(
181
+ (tokens ?? []).filter((token) => token.length >= 3 || shortSignalTokens.has(token))
182
+ ).slice(0, maxTokens);
180
183
  }
181
184
 
182
185
  // src/security/redact-secrets.ts
@@ -379,7 +382,8 @@ CREATE TABLE IF NOT EXISTS code_index_state (
379
382
  last_indexed_at TEXT NOT NULL,
380
383
  indexed_files INTEGER NOT NULL,
381
384
  code_chunks INTEGER NOT NULL,
382
- skipped_files INTEGER NOT NULL
385
+ skipped_files INTEGER NOT NULL,
386
+ last_indexed_commit TEXT
383
387
  );
384
388
 
385
389
  CREATE TABLE IF NOT EXISTS code_imports (
@@ -1710,9 +1714,28 @@ function calculateCoverage(input) {
1710
1714
 
1711
1715
  // src/db/database.ts
1712
1716
  var CODE_WRITE_PROGRESS_INTERVAL = 150;
1717
+ var FTS_DELETE_BATCH_SIZE = 500;
1713
1718
  function shouldEmitCodeWriteProgress(current, total) {
1714
1719
  return current === 0 || current === 1 || current === total || current % CODE_WRITE_PROGRESS_INTERVAL === 0;
1715
1720
  }
1721
+ function shouldEmitFtsDeleteProgress(current, total) {
1722
+ return current === 0 || current === 1 || current === total || current % FTS_DELETE_BATCH_SIZE === 0;
1723
+ }
1724
+ function deleteFtsRowsByRowId(db, ftsTable, rowIds, onProgress) {
1725
+ if (rowIds.length === 0) {
1726
+ onProgress?.(0, 0);
1727
+ return;
1728
+ }
1729
+ const deleteRow = db.prepare(`DELETE FROM ${ftsTable} WHERE rowid = ?`);
1730
+ onProgress?.(0, rowIds.length);
1731
+ for (const [index, rowId] of rowIds.entries()) {
1732
+ deleteRow.run(rowId);
1733
+ const current = index + 1;
1734
+ if (shouldEmitFtsDeleteProgress(current, rowIds.length)) {
1735
+ onProgress?.(current, rowIds.length);
1736
+ }
1737
+ }
1738
+ }
1716
1739
  function defaultDatabasePath(cwd) {
1717
1740
  return path4.join(cwd, ".anchor", "index.sqlite");
1718
1741
  }
@@ -1738,6 +1761,14 @@ function applyPerformancePragmas(db) {
1738
1761
  db.pragma("mmap_size = 268435456");
1739
1762
  db.pragma("temp_store = MEMORY");
1740
1763
  }
1764
+ function runDatabaseMaintenance(db) {
1765
+ try {
1766
+ db.exec("ANALYZE");
1767
+ db.pragma("optimize");
1768
+ db.pragma("wal_checkpoint(TRUNCATE)");
1769
+ } catch {
1770
+ }
1771
+ }
1741
1772
  function initializeSchema(db) {
1742
1773
  db.exec(SCHEMA_SQL);
1743
1774
  ensureColumn(db, "sync_state", "history_coverage", "TEXT");
@@ -1751,6 +1782,7 @@ function initializeSchema(db) {
1751
1782
  ensureColumn(db, "sync_state", "graphql_cursor_reset_at", "TEXT");
1752
1783
  ensureColumn(db, "sync_state", "graphql_cursor_reason", "TEXT");
1753
1784
  ensureColumn(db, "sync_state", "graphql_cursor_updated_at", "TEXT");
1785
+ ensureColumn(db, "code_index_state", "last_indexed_commit", "TEXT");
1754
1786
  }
1755
1787
  function ensureColumn(db, tableName, columnName, definition) {
1756
1788
  const columns = db.prepare(`PRAGMA table_info(${tableName})`).all();
@@ -1800,10 +1832,153 @@ function ensureRepository(db, fullName) {
1800
1832
  if (!row) throw new Error(`Failed to create repository row for ${fullName}`);
1801
1833
  return row.id;
1802
1834
  }
1835
+ function getRepositoryId(db, fullName) {
1836
+ const row = db.prepare("SELECT id FROM repositories WHERE full_name = ?").get(fullName);
1837
+ return row?.id;
1838
+ }
1803
1839
  function getLastSyncTime(db, repo) {
1804
1840
  const row = db.prepare("SELECT last_sync_at FROM sync_state WHERE repo = ?").get(repo);
1805
1841
  return row?.last_sync_at ?? void 0;
1806
1842
  }
1843
+ function getCodeIndexStateForRepo(db, repo) {
1844
+ initializeSchema(db);
1845
+ const row = db.prepare(
1846
+ `SELECT repo, last_indexed_at, indexed_files, code_chunks, skipped_files, last_indexed_commit
1847
+ FROM code_index_state
1848
+ WHERE repo = ?`
1849
+ ).get(repo);
1850
+ if (!row?.repo) return void 0;
1851
+ return {
1852
+ repo: row.repo,
1853
+ lastIndexedAt: row.last_indexed_at ?? void 0,
1854
+ indexedFiles: row.indexed_files ?? 0,
1855
+ codeChunks: row.code_chunks ?? 0,
1856
+ skippedFiles: row.skipped_files ?? 0,
1857
+ lastIndexedCommit: row.last_indexed_commit ?? void 0
1858
+ };
1859
+ }
1860
+ function getRepoCodeFileHashes(db, repo) {
1861
+ initializeSchema(db);
1862
+ const repoId = getRepositoryId(db, repo);
1863
+ if (!repoId) return /* @__PURE__ */ new Map();
1864
+ const rows = db.prepare("SELECT path, content_hash FROM code_files WHERE repo_id = ?").all(repoId);
1865
+ return new Map(rows.map((row) => [row.path, row.content_hash]));
1866
+ }
1867
+ function getRepoCodeFiles(db, repo) {
1868
+ initializeSchema(db);
1869
+ const repoId = getRepositoryId(db, repo);
1870
+ if (!repoId) return [];
1871
+ const rows = db.prepare(
1872
+ `SELECT path, language, size_bytes, content_hash, updated_at
1873
+ FROM code_files
1874
+ WHERE repo_id = ?`
1875
+ ).all(repoId);
1876
+ return rows.map((row) => ({
1877
+ repo,
1878
+ path: row.path,
1879
+ language: row.language ?? void 0,
1880
+ sizeBytes: row.size_bytes,
1881
+ contentHash: row.content_hash,
1882
+ updatedAt: row.updated_at
1883
+ }));
1884
+ }
1885
+ function parseJsonArray3(value) {
1886
+ try {
1887
+ const parsed = JSON.parse(value);
1888
+ return Array.isArray(parsed) ? parsed.filter((item) => typeof item === "string") : [];
1889
+ } catch {
1890
+ return [];
1891
+ }
1892
+ }
1893
+ function getRepoCodeChunkSymbols(db, repo) {
1894
+ initializeSchema(db);
1895
+ const repoId = getRepositoryId(db, repo);
1896
+ if (!repoId) return [];
1897
+ const rows = db.prepare(
1898
+ `SELECT id, file_path, language, start_line, end_line, symbols_json, content_hash, updated_at
1899
+ FROM code_chunks
1900
+ WHERE repo_id = ?`
1901
+ ).all(repoId);
1902
+ return rows.map((row) => ({
1903
+ id: row.id,
1904
+ repo,
1905
+ filePath: row.file_path,
1906
+ language: row.language ?? void 0,
1907
+ startLine: row.start_line,
1908
+ endLine: row.end_line,
1909
+ sanitizedText: "",
1910
+ symbols: parseJsonArray3(row.symbols_json),
1911
+ contentHash: row.content_hash,
1912
+ updatedAt: row.updated_at
1913
+ }));
1914
+ }
1915
+ function getRepoTestChunks(db, repo) {
1916
+ initializeSchema(db);
1917
+ const repoId = getRepositoryId(db, repo);
1918
+ if (!repoId) return [];
1919
+ const rows = db.prepare(
1920
+ `SELECT id, file_path, language, start_line, end_line, sanitized_text, symbols_json, content_hash, updated_at
1921
+ FROM code_chunks
1922
+ WHERE repo_id = ? AND file_path IN (
1923
+ SELECT path FROM test_files WHERE repo_id = ?
1924
+ )`
1925
+ ).all(repoId, repoId);
1926
+ return rows.map((row) => ({
1927
+ id: row.id,
1928
+ repo,
1929
+ filePath: row.file_path,
1930
+ language: row.language ?? void 0,
1931
+ startLine: row.start_line,
1932
+ endLine: row.end_line,
1933
+ sanitizedText: row.sanitized_text,
1934
+ symbols: parseJsonArray3(row.symbols_json),
1935
+ contentHash: row.content_hash,
1936
+ updatedAt: row.updated_at
1937
+ }));
1938
+ }
1939
+ function getRepoCodeImports(db, repo) {
1940
+ initializeSchema(db);
1941
+ const repoId = getRepositoryId(db, repo);
1942
+ if (!repoId) return [];
1943
+ const rows = db.prepare(
1944
+ `SELECT source_path, specifier, imported_path, imported_symbols_json, kind
1945
+ FROM code_imports
1946
+ WHERE repo_id = ?`
1947
+ ).all(repoId);
1948
+ return rows.map((row) => ({
1949
+ repo,
1950
+ sourcePath: row.source_path,
1951
+ specifier: row.specifier,
1952
+ importedPath: row.imported_path ?? void 0,
1953
+ importedSymbols: parseJsonArray3(row.imported_symbols_json),
1954
+ kind: row.kind
1955
+ }));
1956
+ }
1957
+ function getRepoCodeCounts(db, repo) {
1958
+ initializeSchema(db);
1959
+ const repoId = getRepositoryId(db, repo);
1960
+ if (!repoId) return { files: 0, chunks: 0 };
1961
+ const files = db.prepare("SELECT COUNT(*) AS count FROM code_files WHERE repo_id = ?").get(repoId).count;
1962
+ const chunks = db.prepare("SELECT COUNT(*) AS count FROM code_chunks WHERE repo_id = ?").get(repoId).count;
1963
+ return { files, chunks };
1964
+ }
1965
+ function touchCodeIndexState(db, repo, skippedFiles, currentCommit2) {
1966
+ initializeSchema(db);
1967
+ const counts = getRepoCodeCounts(db, repo);
1968
+ const now = (/* @__PURE__ */ new Date()).toISOString();
1969
+ db.prepare(
1970
+ `INSERT INTO code_index_state
1971
+ (repo, last_indexed_at, indexed_files, code_chunks, skipped_files, last_indexed_commit)
1972
+ VALUES (?, ?, ?, ?, ?, ?)
1973
+ ON CONFLICT(repo) DO UPDATE SET
1974
+ last_indexed_at = excluded.last_indexed_at,
1975
+ indexed_files = excluded.indexed_files,
1976
+ code_chunks = excluded.code_chunks,
1977
+ skipped_files = excluded.skipped_files,
1978
+ last_indexed_commit = excluded.last_indexed_commit`
1979
+ ).run(repo, now, counts.files, counts.chunks, skippedFiles, currentCommit2 ?? null);
1980
+ return counts;
1981
+ }
1807
1982
  function updateSyncState(db, repo, lastIndexedPr, metadata = {}) {
1808
1983
  const now = (/* @__PURE__ */ new Date()).toISOString();
1809
1984
  db.prepare(
@@ -1905,9 +2080,12 @@ function clearGraphQLFetchCheckpoint(db, repo, scope) {
1905
2080
  ).run((/* @__PURE__ */ new Date()).toISOString(), repo);
1906
2081
  }
1907
2082
  function deleteExistingPrData(db, prId) {
1908
- db.prepare(
1909
- "DELETE FROM wisdom_units_fts WHERE unitId IN (SELECT id FROM wisdom_units WHERE pr_id = ?)"
1910
- ).run(prId);
2083
+ const wisdomRowIds = db.prepare("SELECT rowid FROM wisdom_units WHERE pr_id = ?").all(prId);
2084
+ deleteFtsRowsByRowId(
2085
+ db,
2086
+ "wisdom_units_fts",
2087
+ wisdomRowIds.map((row) => row.rowid)
2088
+ );
1911
2089
  db.prepare("DELETE FROM regression_events WHERE pr_id = ?").run(prId);
1912
2090
  db.prepare("DELETE FROM wisdom_units WHERE pr_id = ?").run(prId);
1913
2091
  db.prepare("DELETE FROM pr_comments WHERE pr_id = ?").run(prId);
@@ -2019,11 +2197,11 @@ function upsertPullRequest(db, pr, wisdomUnits, regressionEvents = []) {
2019
2197
  );
2020
2198
  const insertFts = db.prepare(
2021
2199
  `INSERT INTO wisdom_units_fts
2022
- (unitId, sanitizedText, filePaths, symbols, prTitle, prBody, category)
2023
- VALUES (?, ?, ?, ?, ?, ?, ?)`
2200
+ (rowid, unitId, sanitizedText, filePaths, symbols, prTitle, prBody, category)
2201
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?)`
2024
2202
  );
2025
2203
  for (const unit of wisdomUnits) {
2026
- insertWisdom.run(
2204
+ const wisdomInsert = insertWisdom.run(
2027
2205
  unit.id,
2028
2206
  repoId,
2029
2207
  prRow.id,
@@ -2042,6 +2220,7 @@ function upsertPullRequest(db, pr, wisdomUnits, regressionEvents = []) {
2042
2220
  unit.confidence
2043
2221
  );
2044
2222
  insertFts.run(
2223
+ Number(wisdomInsert.lastInsertRowid),
2045
2224
  unit.id,
2046
2225
  unit.sanitizedText,
2047
2226
  unit.filePaths.join(" "),
@@ -2092,28 +2271,88 @@ function replaceCodeIndex(db, repo, codeFiles, codeChunks, skippedFiles, cwd, ar
2092
2271
  initializeSchema(db);
2093
2272
  const repoId = ensureRepository(db, repo);
2094
2273
  const now = (/* @__PURE__ */ new Date()).toISOString();
2095
- options.onProgress?.({ stage: "writing_code_index", repo, phase: "Inferring test awareness" });
2096
- const testAwareness = inferTestAwareness(repo, codeFiles, codeChunks, {
2274
+ const deletedPaths = options.deletedPaths ?? [];
2275
+ const changedImports = options.changedImports;
2276
+ const testAwareness = options.testAwareness ?? inferTestAwareness(repo, codeFiles, codeChunks, {
2097
2277
  onProgress: options.onProgress
2098
2278
  });
2099
2279
  options.onProgress?.({ stage: "writing_code_index", repo, phase: "Writing code index" });
2280
+ const changedPaths = [...new Set(codeFiles.map((file) => file.path))];
2281
+ const affectedPaths = [.../* @__PURE__ */ new Set([...changedPaths, ...deletedPaths])];
2100
2282
  const transaction = db.transaction(() => {
2101
- const existingChunkCount = db.prepare("SELECT COUNT(*) AS count FROM code_chunks WHERE repo_id = ?").get(repoId).count;
2102
- const existingPatternCount = db.prepare("SELECT COUNT(*) AS count FROM architecture_patterns WHERE repo_id = ?").get(repoId).count;
2283
+ let existingChunkRowIds = [];
2284
+ if (affectedPaths.length > 0) {
2285
+ const placeholders = affectedPaths.map(() => "?").join(", ");
2286
+ existingChunkRowIds = db.prepare(
2287
+ `SELECT rowid
2288
+ FROM code_chunks
2289
+ WHERE repo_id = ? AND file_path IN (${placeholders})`
2290
+ ).all(repoId, ...affectedPaths);
2291
+ }
2292
+ const existingPatternRowIds = db.prepare("SELECT rowid FROM architecture_patterns WHERE repo_id = ?").all(repoId);
2103
2293
  options.onProgress?.({
2104
2294
  stage: "deleting_existing_code_index",
2105
2295
  repo,
2106
- chunks: existingChunkCount,
2107
- patterns: existingPatternCount
2296
+ chunks: existingChunkRowIds.length,
2297
+ patterns: existingPatternRowIds.length
2108
2298
  });
2109
- db.prepare(
2110
- "DELETE FROM code_chunks_fts WHERE chunkId IN (SELECT id FROM code_chunks WHERE repo_id = ?)"
2111
- ).run(repoId);
2112
- db.prepare("DELETE FROM code_chunks WHERE repo_id = ?").run(repoId);
2113
- db.prepare("DELETE FROM code_files WHERE repo_id = ?").run(repoId);
2114
- db.prepare("DELETE FROM test_links WHERE repo_id = ? AND reason != 'PR co-change'").run(repoId);
2115
- db.prepare("DELETE FROM test_files WHERE repo_id = ?").run(repoId);
2116
- deleteExistingArchitectureData(db, repoId);
2299
+ deleteFtsRowsByRowId(
2300
+ db,
2301
+ "code_chunks_fts",
2302
+ existingChunkRowIds.map((row) => row.rowid),
2303
+ (current, total) => options.onProgress?.({
2304
+ stage: "deleting_code_fts",
2305
+ repo,
2306
+ current,
2307
+ total,
2308
+ chunks: existingChunkRowIds.length
2309
+ })
2310
+ );
2311
+ if (affectedPaths.length > 0) {
2312
+ const placeholders = affectedPaths.map(() => "?").join(", ");
2313
+ db.prepare(
2314
+ `DELETE FROM code_chunks
2315
+ WHERE repo_id = ? AND file_path IN (${placeholders})`
2316
+ ).run(repoId, ...affectedPaths);
2317
+ db.prepare(
2318
+ `DELETE FROM code_files
2319
+ WHERE repo_id = ? AND path IN (${placeholders})`
2320
+ ).run(repoId, ...affectedPaths);
2321
+ db.prepare(
2322
+ `DELETE FROM test_links
2323
+ WHERE repo_id = ?
2324
+ AND reason != 'PR co-change'
2325
+ AND (source_path IN (${placeholders}) OR test_path IN (${placeholders}))`
2326
+ ).run(repoId, ...affectedPaths, ...affectedPaths);
2327
+ db.prepare(
2328
+ `DELETE FROM test_files
2329
+ WHERE repo_id = ? AND path IN (${placeholders})`
2330
+ ).run(repoId, ...affectedPaths);
2331
+ if (changedImports) {
2332
+ db.prepare(
2333
+ `DELETE FROM code_imports
2334
+ WHERE repo_id = ? AND source_path IN (${placeholders})`
2335
+ ).run(repoId, ...affectedPaths);
2336
+ }
2337
+ }
2338
+ deleteExistingArchitectureData(db, repoId, repo, existingPatternRowIds, options);
2339
+ if (changedImports) {
2340
+ const insertImport = db.prepare(
2341
+ `INSERT INTO code_imports
2342
+ (repo_id, source_path, specifier, imported_path, imported_symbols_json, kind)
2343
+ VALUES (?, ?, ?, ?, ?, ?)`
2344
+ );
2345
+ for (const item of changedImports) {
2346
+ insertImport.run(
2347
+ repoId,
2348
+ item.sourcePath,
2349
+ item.specifier,
2350
+ item.importedPath ?? null,
2351
+ JSON.stringify(item.importedSymbols),
2352
+ item.kind
2353
+ );
2354
+ }
2355
+ }
2117
2356
  const insertFile = db.prepare(
2118
2357
  `INSERT INTO code_files
2119
2358
  (repo_id, path, language, size_bytes, content_hash, updated_at)
@@ -2155,8 +2394,8 @@ function replaceCodeIndex(db, repo, codeFiles, codeChunks, skippedFiles, cwd, ar
2155
2394
  );
2156
2395
  const insertFts = db.prepare(
2157
2396
  `INSERT INTO code_chunks_fts
2158
- (chunkId, sanitizedText, filePath, symbols, language)
2159
- VALUES (?, ?, ?, ?, ?)`
2397
+ (rowid, chunkId, sanitizedText, filePath, symbols, language)
2398
+ VALUES (?, ?, ?, ?, ?, ?)`
2160
2399
  );
2161
2400
  options.onProgress?.({
2162
2401
  stage: "writing_code_chunks",
@@ -2169,7 +2408,7 @@ function replaceCodeIndex(db, repo, codeFiles, codeChunks, skippedFiles, cwd, ar
2169
2408
  for (const [index, chunk] of codeChunks.entries()) {
2170
2409
  const fileId = fileIds.get(chunk.filePath);
2171
2410
  if (!fileId) continue;
2172
- insertChunk.run(
2411
+ const chunkInsert = insertChunk.run(
2173
2412
  chunk.id,
2174
2413
  repoId,
2175
2414
  fileId,
@@ -2184,6 +2423,7 @@ function replaceCodeIndex(db, repo, codeFiles, codeChunks, skippedFiles, cwd, ar
2184
2423
  chunk.updatedAt
2185
2424
  );
2186
2425
  insertFts.run(
2426
+ Number(chunkInsert.lastInsertRowid),
2187
2427
  chunk.id,
2188
2428
  chunk.sanitizedText,
2189
2429
  chunk.filePath,
@@ -2204,18 +2444,22 @@ function replaceCodeIndex(db, repo, codeFiles, codeChunks, skippedFiles, cwd, ar
2204
2444
  }
2205
2445
  }
2206
2446
  insertTestAwareness(db, repoId, repo, testAwareness.testFiles, testAwareness.testLinks, options);
2207
- insertArchitectureData(db, repoId, repo, architecture, options);
2447
+ insertArchitectureData(db, repoId, repo, architecture, options, !changedImports);
2208
2448
  insertArchitectureMapEdges(db, repoId, repo, architecture, testAwareness.testLinks, options);
2209
2449
  options.onProgress?.({ stage: "writing_code_index", repo, phase: "Updating index state" });
2450
+ const totalFileCount = db.prepare("SELECT COUNT(*) AS count FROM code_files WHERE repo_id = ?").get(repoId).count;
2451
+ const totalChunkCount = db.prepare("SELECT COUNT(*) AS count FROM code_chunks WHERE repo_id = ?").get(repoId).count;
2210
2452
  db.prepare(
2211
- `INSERT INTO code_index_state (repo, last_indexed_at, indexed_files, code_chunks, skipped_files)
2212
- VALUES (?, ?, ?, ?, ?)
2453
+ `INSERT INTO code_index_state
2454
+ (repo, last_indexed_at, indexed_files, code_chunks, skipped_files, last_indexed_commit)
2455
+ VALUES (?, ?, ?, ?, ?, ?)
2213
2456
  ON CONFLICT(repo) DO UPDATE SET
2214
2457
  last_indexed_at = excluded.last_indexed_at,
2215
2458
  indexed_files = excluded.indexed_files,
2216
2459
  code_chunks = excluded.code_chunks,
2217
- skipped_files = excluded.skipped_files`
2218
- ).run(repo, now, codeFiles.length, codeChunks.length, skippedFiles);
2460
+ skipped_files = excluded.skipped_files,
2461
+ last_indexed_commit = excluded.last_indexed_commit`
2462
+ ).run(repo, now, totalFileCount, totalChunkCount, skippedFiles, options.currentCommit ?? null);
2219
2463
  db.prepare(
2220
2464
  `INSERT INTO architecture_index_state (repo, last_indexed_at, components, patterns, imports)
2221
2465
  VALUES (?, ?, ?, ?, ?)
@@ -2233,9 +2477,10 @@ function replaceCodeIndex(db, repo, codeFiles, codeChunks, skippedFiles, cwd, ar
2233
2477
  );
2234
2478
  });
2235
2479
  transaction();
2480
+ const counts = getRepoCodeCounts(db, repo);
2236
2481
  return {
2237
- indexedFiles: codeFiles.length,
2238
- codeChunksCreated: codeChunks.length,
2482
+ indexedFiles: counts.files,
2483
+ codeChunksCreated: counts.chunks,
2239
2484
  testFilesIndexed: testAwareness.testFiles.length,
2240
2485
  testLinksCreated: testAwareness.testLinks.length,
2241
2486
  architectureComponentsIndexed: architecture.components.length,
@@ -2245,46 +2490,56 @@ function replaceCodeIndex(db, repo, codeFiles, codeChunks, skippedFiles, cwd, ar
2245
2490
  databasePath: defaultDatabasePath(cwd)
2246
2491
  };
2247
2492
  }
2248
- function deleteExistingArchitectureData(db, repoId) {
2249
- db.prepare(
2250
- "DELETE FROM architecture_patterns_fts WHERE patternId IN (SELECT id FROM architecture_patterns WHERE repo_id = ?)"
2251
- ).run(repoId);
2493
+ function deleteExistingArchitectureData(db, repoId, repo, patternRowIds, options = {}) {
2494
+ deleteFtsRowsByRowId(
2495
+ db,
2496
+ "architecture_patterns_fts",
2497
+ patternRowIds.map((row) => row.rowid),
2498
+ (current, total) => options.onProgress?.({
2499
+ stage: "deleting_architecture_fts",
2500
+ repo,
2501
+ current,
2502
+ total,
2503
+ patterns: patternRowIds.length
2504
+ })
2505
+ );
2252
2506
  db.prepare("DELETE FROM architecture_patterns WHERE repo_id = ?").run(repoId);
2253
2507
  db.prepare("DELETE FROM architecture_components WHERE repo_id = ?").run(repoId);
2254
- db.prepare("DELETE FROM code_imports WHERE repo_id = ?").run(repoId);
2255
2508
  db.prepare("DELETE FROM architecture_map_edges WHERE repo_id = ?").run(repoId);
2256
2509
  }
2257
- function insertArchitectureData(db, repoId, repo, architecture, options = {}) {
2258
- const insertImport = db.prepare(
2259
- `INSERT INTO code_imports
2260
- (repo_id, source_path, specifier, imported_path, imported_symbols_json, kind)
2261
- VALUES (?, ?, ?, ?, ?, ?)`
2262
- );
2263
- options.onProgress?.({
2264
- stage: "writing_architecture_data",
2265
- repo,
2266
- current: 0,
2267
- total: architecture.imports.length,
2268
- kind: "imports"
2269
- });
2270
- for (const [index, item] of architecture.imports.entries()) {
2271
- insertImport.run(
2272
- repoId,
2273
- item.sourcePath,
2274
- item.specifier,
2275
- item.importedPath ?? null,
2276
- JSON.stringify(item.importedSymbols),
2277
- item.kind
2510
+ function insertArchitectureData(db, repoId, repo, architecture, options = {}, includeImports = true) {
2511
+ if (includeImports) {
2512
+ const insertImport = db.prepare(
2513
+ `INSERT INTO code_imports
2514
+ (repo_id, source_path, specifier, imported_path, imported_symbols_json, kind)
2515
+ VALUES (?, ?, ?, ?, ?, ?)`
2278
2516
  );
2279
- const current = index + 1;
2280
- if (shouldEmitCodeWriteProgress(current, architecture.imports.length)) {
2281
- options.onProgress?.({
2282
- stage: "writing_architecture_data",
2283
- repo,
2284
- current,
2285
- total: architecture.imports.length,
2286
- kind: "imports"
2287
- });
2517
+ options.onProgress?.({
2518
+ stage: "writing_architecture_data",
2519
+ repo,
2520
+ current: 0,
2521
+ total: architecture.imports.length,
2522
+ kind: "imports"
2523
+ });
2524
+ for (const [index, item] of architecture.imports.entries()) {
2525
+ insertImport.run(
2526
+ repoId,
2527
+ item.sourcePath,
2528
+ item.specifier,
2529
+ item.importedPath ?? null,
2530
+ JSON.stringify(item.importedSymbols),
2531
+ item.kind
2532
+ );
2533
+ const current = index + 1;
2534
+ if (shouldEmitCodeWriteProgress(current, architecture.imports.length)) {
2535
+ options.onProgress?.({
2536
+ stage: "writing_architecture_data",
2537
+ repo,
2538
+ current,
2539
+ total: architecture.imports.length,
2540
+ kind: "imports"
2541
+ });
2542
+ }
2288
2543
  }
2289
2544
  }
2290
2545
  const insertComponent = db.prepare(
@@ -2331,8 +2586,8 @@ function insertArchitectureData(db, repoId, repo, architecture, options = {}) {
2331
2586
  VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`
2332
2587
  );
2333
2588
  const insertFts = db.prepare(
2334
- `INSERT INTO architecture_patterns_fts (patternId, summary, area, sourceFiles, symbols)
2335
- VALUES (?, ?, ?, ?, ?)`
2589
+ `INSERT INTO architecture_patterns_fts (rowid, patternId, summary, area, sourceFiles, symbols)
2590
+ VALUES (?, ?, ?, ?, ?, ?)`
2336
2591
  );
2337
2592
  options.onProgress?.({
2338
2593
  stage: "writing_architecture_data",
@@ -2342,7 +2597,7 @@ function insertArchitectureData(db, repoId, repo, architecture, options = {}) {
2342
2597
  kind: "patterns"
2343
2598
  });
2344
2599
  for (const [index, pattern] of architecture.patterns.entries()) {
2345
- insertPattern.run(
2600
+ const patternInsert = insertPattern.run(
2346
2601
  pattern.id,
2347
2602
  repoId,
2348
2603
  pattern.repo,
@@ -2356,6 +2611,7 @@ function insertArchitectureData(db, repoId, repo, architecture, options = {}) {
2356
2611
  pattern.createdAt
2357
2612
  );
2358
2613
  insertFts.run(
2614
+ Number(patternInsert.lastInsertRowid),
2359
2615
  pattern.id,
2360
2616
  pattern.sanitizedSummary,
2361
2617
  pattern.area,
@@ -3014,18 +3270,7 @@ function createPattern(input) {
3014
3270
  };
3015
3271
  }
3016
3272
  function buildArchitectureIndex(repo, files, chunks, options = {}) {
3017
- const allPaths = files.map((file) => file.path);
3018
- const codePaths = new Set(allPaths);
3019
- const relatedTestIndex = buildRelatedTestIndex(allPaths);
3020
- const symbolSetsByPath = /* @__PURE__ */ new Map();
3021
- for (const chunk of chunks) {
3022
- const existing = symbolSetsByPath.get(chunk.filePath) ?? /* @__PURE__ */ new Set();
3023
- for (const symbol of chunk.symbols) {
3024
- if (existing.size >= 40) break;
3025
- existing.add(symbol);
3026
- }
3027
- symbolSetsByPath.set(chunk.filePath, existing);
3028
- }
3273
+ const codePaths = new Set(files.map((file) => file.path));
3029
3274
  const imports = [];
3030
3275
  options.onProgress?.({
3031
3276
  stage: "building_architecture_imports",
@@ -3048,6 +3293,20 @@ function buildArchitectureIndex(repo, files, chunks, options = {}) {
3048
3293
  });
3049
3294
  }
3050
3295
  }
3296
+ return buildArchitectureFromIndexedData(repo, files, chunks, imports, options);
3297
+ }
3298
+ function buildArchitectureFromIndexedData(repo, files, chunks, imports, options = {}) {
3299
+ const allPaths = files.map((file) => file.path);
3300
+ const relatedTestIndex = buildRelatedTestIndex(allPaths);
3301
+ const symbolSetsByPath = /* @__PURE__ */ new Map();
3302
+ for (const chunk of chunks) {
3303
+ const existing = symbolSetsByPath.get(chunk.filePath) ?? /* @__PURE__ */ new Set();
3304
+ for (const symbol of chunk.symbols) {
3305
+ if (existing.size >= 40) break;
3306
+ existing.add(symbol);
3307
+ }
3308
+ symbolSetsByPath.set(chunk.filePath, existing);
3309
+ }
3051
3310
  const importsByPath = /* @__PURE__ */ new Map();
3052
3311
  for (const item of imports) {
3053
3312
  const existing = importsByPath.get(item.sourcePath) ?? [];
@@ -3063,7 +3322,7 @@ function buildArchitectureIndex(repo, files, chunks, options = {}) {
3063
3322
  components: 0
3064
3323
  });
3065
3324
  for (const [index, file] of files.entries()) {
3066
- const area = classifyArchitectureArea(file.path, file.language, file.content);
3325
+ const area = classifyArchitectureArea(file.path, file.language);
3067
3326
  const fileImports = importsByPath.get(file.path) ?? [];
3068
3327
  const symbols = [...symbolSetsByPath.get(file.path) ?? []];
3069
3328
  components.push({
@@ -3277,15 +3536,134 @@ function discoverGitFiles(cwd) {
3277
3536
  });
3278
3537
  return output.split("\n").map((line) => normalizeGitPath(line.trim())).filter(Boolean);
3279
3538
  }
3539
+ function discoverGitUntrackedFiles(cwd) {
3540
+ const output = execFileSync3("git", ["ls-files", "--others", "--exclude-standard"], {
3541
+ cwd,
3542
+ encoding: "utf8",
3543
+ stdio: ["ignore", "pipe", "pipe"]
3544
+ });
3545
+ return output.split("\n").map((line) => normalizeGitPath(line.trim())).filter(Boolean);
3546
+ }
3547
+ function execGitLines(cwd, args) {
3548
+ const output = execFileSync3("git", args, {
3549
+ cwd,
3550
+ encoding: "utf8",
3551
+ stdio: ["ignore", "pipe", "ignore"]
3552
+ });
3553
+ return output.split("\n").map((line) => line.trimEnd()).filter(Boolean);
3554
+ }
3555
+ function readGitHeadCommit(cwd) {
3556
+ try {
3557
+ return execFileSync3("git", ["rev-parse", "HEAD"], {
3558
+ cwd,
3559
+ encoding: "utf8",
3560
+ stdio: ["ignore", "pipe", "ignore"]
3561
+ }).trim();
3562
+ } catch {
3563
+ return void 0;
3564
+ }
3565
+ }
3566
+ function hasDirtyWorkingTree(cwd) {
3567
+ try {
3568
+ const status = execFileSync3("git", ["status", "--porcelain"], {
3569
+ cwd,
3570
+ encoding: "utf8",
3571
+ stdio: ["ignore", "pipe", "ignore"]
3572
+ });
3573
+ return status.trim().length > 0;
3574
+ } catch {
3575
+ return true;
3576
+ }
3577
+ }
3578
+ function parseNameStatusLine(line) {
3579
+ const parts = line.split(" ").map((item) => normalizeGitPath(item));
3580
+ if (parts.length < 2) return void 0;
3581
+ const status = parts[0] ?? "";
3582
+ if (!status) return void 0;
3583
+ if (status.startsWith("R") || status.startsWith("C")) {
3584
+ return { status, previousPath: parts[1], path: parts[2] };
3585
+ }
3586
+ return { status, path: parts[1] };
3587
+ }
3588
+ function planIncrementalCodeIndex(cwd, lastIndexedCommit, existingIndexedPaths) {
3589
+ const currentCommit2 = readGitHeadCommit(cwd);
3590
+ const trackedPaths = discoverGitFiles(cwd);
3591
+ const trackedSet = new Set(trackedPaths);
3592
+ const deletedPaths = /* @__PURE__ */ new Set();
3593
+ const changedPaths = /* @__PURE__ */ new Set();
3594
+ const dirtyWorkingTree = hasDirtyWorkingTree(cwd);
3595
+ if (!lastIndexedCommit) {
3596
+ return {
3597
+ currentCommit: currentCommit2,
3598
+ trackedPaths,
3599
+ changedPaths: trackedPaths,
3600
+ deletedPaths: [...existingIndexedPaths].filter((filePath) => !trackedSet.has(filePath)),
3601
+ dirtyWorkingTree,
3602
+ fallbackToFullHashCompare: true,
3603
+ reason: "No previous commit snapshot; using full hash comparison."
3604
+ };
3605
+ }
3606
+ if (dirtyWorkingTree) {
3607
+ return {
3608
+ currentCommit: currentCommit2,
3609
+ trackedPaths,
3610
+ changedPaths: trackedPaths,
3611
+ deletedPaths: [...existingIndexedPaths].filter((filePath) => !trackedSet.has(filePath)),
3612
+ dirtyWorkingTree,
3613
+ fallbackToFullHashCompare: true,
3614
+ reason: "Working tree is dirty; using full hash comparison for deterministic results."
3615
+ };
3616
+ }
3617
+ try {
3618
+ const lines = execGitLines(cwd, ["diff", "--name-status", `${lastIndexedCommit}..HEAD`]);
3619
+ for (const line of lines) {
3620
+ const parsed = parseNameStatusLine(line);
3621
+ if (!parsed?.path) continue;
3622
+ const statusCode = parsed.status[0];
3623
+ const normalizedPath = normalizeGitPath(parsed.path);
3624
+ if (statusCode === "D") {
3625
+ deletedPaths.add(normalizedPath);
3626
+ continue;
3627
+ }
3628
+ if (trackedSet.has(normalizedPath)) changedPaths.add(normalizedPath);
3629
+ }
3630
+ for (const untrackedPath of discoverGitUntrackedFiles(cwd)) {
3631
+ if (trackedSet.has(untrackedPath)) changedPaths.add(untrackedPath);
3632
+ }
3633
+ for (const existingPath of existingIndexedPaths) {
3634
+ if (!trackedSet.has(existingPath)) deletedPaths.add(existingPath);
3635
+ }
3636
+ return {
3637
+ currentCommit: currentCommit2,
3638
+ trackedPaths,
3639
+ changedPaths: [...changedPaths],
3640
+ deletedPaths: [...deletedPaths],
3641
+ dirtyWorkingTree: false,
3642
+ fallbackToFullHashCompare: false,
3643
+ reason: "Using git diff and untracked files against last indexed commit."
3644
+ };
3645
+ } catch {
3646
+ return {
3647
+ currentCommit: currentCommit2,
3648
+ trackedPaths,
3649
+ changedPaths: trackedPaths,
3650
+ deletedPaths: [...existingIndexedPaths].filter((filePath) => !trackedSet.has(filePath)),
3651
+ dirtyWorkingTree: true,
3652
+ fallbackToFullHashCompare: true,
3653
+ reason: "Unable to compute git diff; falling back to full hash comparison."
3654
+ };
3655
+ }
3656
+ }
3280
3657
  var DISCOVERY_SCAN_INTERVAL = 200;
3281
- function discoverCodeFiles(cwd, repo, options = {}) {
3658
+ function discoverFromPaths(cwd, repo, inputPaths, options = {}) {
3282
3659
  const maxFileBytes = options.maxFileBytes ?? DEFAULT_MAX_CODE_FILE_BYTES;
3660
+ const includeContent = options.includeContent ?? false;
3283
3661
  const rootPath = path7.resolve(cwd);
3284
3662
  const files = [];
3285
3663
  let skippedFiles = 0;
3286
- const gitFiles = discoverGitFiles(cwd);
3287
- const total = gitFiles.length;
3288
- for (const [scanIndex, filePath] of gitFiles.entries()) {
3664
+ const candidatePaths = [...new Set(inputPaths.map((value) => normalizeGitPath(value)).filter(Boolean))];
3665
+ const total = candidatePaths.length;
3666
+ for (const [scanIndex, filePath] of candidatePaths.entries()) {
3289
3667
  const scanned = scanIndex + 1;
3290
3668
  if (scanned % DISCOVERY_SCAN_INTERVAL === 0 || scanned === total) {
3291
3669
  options.onScan?.(scanned, total);
@@ -3316,7 +3694,6 @@ function discoverCodeFiles(cwd, repo, options = {}) {
3316
3694
  skippedFiles += 1;
3317
3695
  continue;
3318
3696
  }
3319
- const content = buffer.toString("utf8");
3320
3697
  files.push({
3321
3698
  repo,
3322
3699
  path: filePath,
@@ -3325,11 +3702,21 @@ function discoverCodeFiles(cwd, repo, options = {}) {
3325
3702
  contentHash: crypto3.createHash("sha256").update(buffer).digest("hex"),
3326
3703
  updatedAt: stat.mtime.toISOString(),
3327
3704
  absolutePath,
3328
- content
3705
+ ...includeContent ? { content: buffer.toString("utf8") } : {}
3329
3706
  });
3330
3707
  }
3331
3708
  return { files, skippedFiles };
3332
3709
  }
3710
+ function discoverCodeFiles(cwd, repo, options = {}) {
3711
+ return discoverFromPaths(cwd, repo, discoverGitFiles(cwd), options);
3712
+ }
3713
+ function discoverCodeFilesByPaths(cwd, repo, filePaths, options = {}) {
3714
+ return discoverFromPaths(cwd, repo, filePaths, options);
3715
+ }
3716
+ function readDiscoveredCodeFileContent(file) {
3717
+ if (typeof file.content === "string") return file.content;
3718
+ return fs4.readFileSync(file.absolutePath, "utf8");
3719
+ }
3333
3720
 
3334
3721
  // src/retrieval/test-commands.ts
3335
3722
  import crypto4 from "crypto";
@@ -3563,40 +3950,154 @@ function refreshTestCommands(db, cwd, repo, files = [], options = {}) {
3563
3950
 
3564
3951
  // src/indexer/code-indexer.ts
3565
3952
  function indexCodebase(db, options) {
3953
+ const state = getCodeIndexStateForRepo(db, options.repo);
3954
+ const existingHashes = getRepoCodeFileHashes(db, options.repo);
3955
+ const plan = planIncrementalCodeIndex(
3956
+ options.cwd,
3957
+ state?.lastIndexedCommit,
3958
+ new Set(existingHashes.keys())
3959
+ );
3566
3960
  options.onProgress?.({ stage: "discovering_code_files", repo: options.repo });
3567
- const discovery = discoverCodeFiles(options.cwd, options.repo, {
3961
+ const discovery = plan.fallbackToFullHashCompare ? discoverCodeFiles(options.cwd, options.repo, {
3962
+ maxFileBytes: options.maxFileBytes,
3963
+ onScan: (scanned, total) => options.onProgress?.({
3964
+ stage: "discovering_code_files",
3965
+ repo: options.repo,
3966
+ scanned,
3967
+ total
3968
+ })
3969
+ }) : discoverCodeFilesByPaths(options.cwd, options.repo, plan.changedPaths, {
3568
3970
  maxFileBytes: options.maxFileBytes,
3569
- onScan: (scanned, total) => options.onProgress?.({ stage: "discovering_code_files", repo: options.repo, scanned, total })
3971
+ onScan: (scanned, total) => options.onProgress?.({
3972
+ stage: "discovering_code_files",
3973
+ repo: options.repo,
3974
+ scanned,
3975
+ total
3976
+ })
3570
3977
  });
3978
+ const changedFiles = discovery.files.filter(
3979
+ (file) => existingHashes.get(file.path) !== file.contentHash
3980
+ );
3981
+ const discoveredPaths = new Set(discovery.files.map((file) => file.path));
3982
+ const deletedPaths = plan.fallbackToFullHashCompare ? [...existingHashes.keys()].filter((filePath) => !discoveredPaths.has(filePath)) : plan.deletedPaths;
3571
3983
  options.onProgress?.({
3572
3984
  stage: "discovered_code_files",
3573
3985
  repo: options.repo,
3574
- files: discovery.files.length,
3986
+ files: changedFiles.length,
3575
3987
  skippedFiles: discovery.skippedFiles
3576
3988
  });
3577
- const chunks = [];
3578
- for (const [index, file] of discovery.files.entries()) {
3989
+ if (changedFiles.length === 0 && deletedPaths.length === 0) {
3990
+ const counts = touchCodeIndexState(
3991
+ db,
3992
+ options.repo,
3993
+ discovery.skippedFiles,
3994
+ plan.currentCommit
3995
+ );
3996
+ const repoId = ensureRepository(db, options.repo);
3997
+ const scopedCount = (table) => db.prepare(`SELECT COUNT(*) AS count FROM ${table} WHERE repo_id = ?`).get(repoId).count;
3998
+ const summary2 = {
3999
+ indexedFiles: counts.files,
4000
+ codeChunksCreated: counts.chunks,
4001
+ testFilesIndexed: scopedCount("test_files"),
4002
+ testLinksCreated: scopedCount("test_links"),
4003
+ architectureComponentsIndexed: scopedCount("architecture_components"),
4004
+ architecturePatternsIndexed: scopedCount("architecture_patterns"),
4005
+ architectureImportsIndexed: scopedCount("code_imports"),
4006
+ skippedFiles: discovery.skippedFiles,
4007
+ databasePath: defaultDatabasePath(options.cwd)
4008
+ };
4009
+ options.onProgress?.({
4010
+ stage: "completed_code_index",
4011
+ repo: options.repo,
4012
+ files: summary2.indexedFiles,
4013
+ chunks: summary2.codeChunksCreated,
4014
+ skippedFiles: summary2.skippedFiles,
4015
+ testFiles: summary2.testFilesIndexed,
4016
+ testLinks: summary2.testLinksCreated,
4017
+ architectureComponents: summary2.architectureComponentsIndexed,
4018
+ architecturePatterns: summary2.architecturePatternsIndexed,
4019
+ architectureImports: summary2.architectureImportsIndexed
4020
+ });
4021
+ return summary2;
4022
+ }
4023
+ const changedChunks = [];
4024
+ const changedImports = [];
4025
+ const projectedIndexedPaths = new Set(
4026
+ [...existingHashes.keys()].filter((filePath) => !deletedPaths.includes(filePath))
4027
+ );
4028
+ for (const file of changedFiles) projectedIndexedPaths.add(file.path);
4029
+ for (const [index, file] of changedFiles.entries()) {
3579
4030
  options.onProgress?.({
3580
4031
  stage: "indexing_code_file",
3581
4032
  repo: options.repo,
3582
4033
  current: index + 1,
3583
- total: discovery.files.length,
4034
+ total: changedFiles.length,
3584
4035
  filePath: file.path
3585
4036
  });
3586
- const fileChunks = chunkCodeFile(file);
3587
- chunks.push(...fileChunks);
4037
+ const content = readDiscoveredCodeFileContent(file);
4038
+ const fileWithContent = { ...file, content };
4039
+ const fileChunks = chunkCodeFile(fileWithContent);
4040
+ changedChunks.push(...fileChunks);
4041
+ changedImports.push(
4042
+ ...extractCodeImports(file.path, content, projectedIndexedPaths, options.repo)
4043
+ );
3588
4044
  options.onProgress?.({
3589
4045
  stage: "indexed_code_file",
3590
4046
  repo: options.repo,
3591
4047
  current: index + 1,
3592
- total: discovery.files.length,
4048
+ total: changedFiles.length,
3593
4049
  filePath: file.path,
3594
4050
  chunks: fileChunks.length
3595
4051
  });
3596
4052
  }
3597
- const architecture = buildArchitectureIndex(options.repo, discovery.files, chunks, {
4053
+ const affectedPaths = /* @__PURE__ */ new Set([
4054
+ ...deletedPaths,
4055
+ ...changedFiles.map((file) => file.path)
4056
+ ]);
4057
+ const allFilesByPath = new Map(getRepoCodeFiles(db, options.repo).map((file) => [file.path, file]));
4058
+ for (const filePath of deletedPaths) allFilesByPath.delete(filePath);
4059
+ for (const file of changedFiles) {
4060
+ allFilesByPath.set(file.path, {
4061
+ repo: file.repo,
4062
+ path: file.path,
4063
+ language: file.language,
4064
+ sizeBytes: file.sizeBytes,
4065
+ contentHash: file.contentHash,
4066
+ updatedAt: file.updatedAt
4067
+ });
4068
+ }
4069
+ const allFiles = [...allFilesByPath.values()];
4070
+ const allSymbolChunks = getRepoCodeChunkSymbols(db, options.repo).filter(
4071
+ (chunk) => !affectedPaths.has(chunk.filePath)
4072
+ );
4073
+ allSymbolChunks.push(...changedChunks);
4074
+ const allImports = getRepoCodeImports(db, options.repo).filter(
4075
+ (item) => !affectedPaths.has(item.sourcePath)
4076
+ );
4077
+ allImports.push(...changedImports);
4078
+ const testChunks = getRepoTestChunks(db, options.repo).filter(
4079
+ (chunk) => !affectedPaths.has(chunk.filePath)
4080
+ );
4081
+ for (const chunk of changedChunks) {
4082
+ if (isTestFilePath(chunk.filePath)) testChunks.push(chunk);
4083
+ }
4084
+ const testAwareness = inferTestAwareness(options.repo, allFiles, testChunks, {
3598
4085
  onProgress: options.onProgress
3599
4086
  });
4087
+ options.onProgress?.({
4088
+ stage: "building_architecture_imports",
4089
+ repo: options.repo,
4090
+ current: allFiles.length,
4091
+ total: allFiles.length,
4092
+ imports: allImports.length
4093
+ });
4094
+ const architecture = buildArchitectureFromIndexedData(
4095
+ options.repo,
4096
+ allFiles,
4097
+ allSymbolChunks,
4098
+ allImports,
4099
+ { onProgress: options.onProgress }
4100
+ );
3600
4101
  options.onProgress?.({
3601
4102
  stage: "indexed_architecture",
3602
4103
  repo: options.repo,
@@ -3607,14 +4108,22 @@ function indexCodebase(db, options) {
3607
4108
  const summary = replaceCodeIndex(
3608
4109
  db,
3609
4110
  options.repo,
3610
- discovery.files.map(({ content: _content, absolutePath: _absolutePath, ...file }) => file),
3611
- chunks,
4111
+ changedFiles.map(({ content: _content, absolutePath: _absolutePath, ...file }) => file),
4112
+ changedChunks,
3612
4113
  discovery.skippedFiles,
3613
4114
  options.cwd,
3614
4115
  architecture,
3615
- { onProgress: options.onProgress }
4116
+ {
4117
+ onProgress: options.onProgress,
4118
+ deletedPaths,
4119
+ changedImports,
4120
+ currentCommit: plan.currentCommit,
4121
+ testAwareness
4122
+ }
3616
4123
  );
3617
- refreshTestCommands(db, options.cwd, options.repo, [], { onProgress: options.onProgress });
4124
+ refreshTestCommands(db, options.cwd, options.repo, [], {
4125
+ onProgress: options.onProgress
4126
+ });
3618
4127
  options.onProgress?.({
3619
4128
  stage: "completed_code_index",
3620
4129
  repo: options.repo,
@@ -4053,7 +4562,7 @@ function clampMaxResults(value, defaultValue) {
4053
4562
 
4054
4563
  // src/retrieval/ranker.ts
4055
4564
  import path11 from "path";
4056
- function parseJsonArray3(value) {
4565
+ function parseJsonArray4(value) {
4057
4566
  try {
4058
4567
  const parsed = JSON.parse(value);
4059
4568
  return Array.isArray(parsed) ? parsed.filter((item) => typeof item === "string") : [];
@@ -4071,9 +4580,9 @@ function rowToWisdomUnit(row) {
4071
4580
  category: row.category,
4072
4581
  text: row.text,
4073
4582
  sanitizedText: row.sanitized_text,
4074
- filePaths: parseJsonArray3(row.file_paths_json),
4075
- symbols: parseJsonArray3(row.symbols_json),
4076
- authors: parseJsonArray3(row.authors_json),
4583
+ filePaths: parseJsonArray4(row.file_paths_json),
4584
+ symbols: parseJsonArray4(row.symbols_json),
4585
+ authors: parseJsonArray4(row.authors_json),
4077
4586
  createdAt: row.created_at,
4078
4587
  mergedAt: row.merged_at ?? void 0,
4079
4588
  confidence: row.confidence,
@@ -4136,11 +4645,11 @@ function symbolMatch2(unit, querySymbols) {
4136
4645
  }
4137
4646
  function textMatch2(unit, inputText) {
4138
4647
  const queryTokens = tokenizeSearchText(inputText, 32);
4139
- if (queryTokens.length === 0) return unit.bm25 === void 0 ? 0 : 0.45;
4648
+ if (queryTokens.length === 0) return unit.bm25 === void 0 ? 0 : 0.35;
4140
4649
  const haystack = `${unit.sanitizedText} ${unit.filePaths.join(" ")} ${unit.symbols.join(" ")}`.toLowerCase();
4141
4650
  const overlap = queryTokens.filter((token) => haystack.includes(token.toLowerCase())).length / queryTokens.length;
4142
- const bm25Signal = unit.bm25 === void 0 ? 0 : Math.max(0.25, Math.min(1, 1 / (1 + Math.abs(unit.bm25))));
4143
- return Math.max(overlap, bm25Signal);
4651
+ const bm25Signal = unit.bm25 === void 0 ? 0 : Math.max(0, Math.min(1, 1 / (1 + Math.abs(unit.bm25))));
4652
+ return unit.bm25 === void 0 ? overlap : Number((0.65 * overlap + 0.35 * bm25Signal).toFixed(4));
4144
4653
  }
4145
4654
  function reviewerOrAuthorSignal(unit) {
4146
4655
  if (unit.sourceType === "review_comment" || unit.sourceType === "review_summary") return 0.9;
@@ -4332,7 +4841,7 @@ function rankWisdomUnits(db, input) {
4332
4841
 
4333
4842
  // src/retrieval/code-ranker.ts
4334
4843
  import path12 from "path";
4335
- function parseJsonArray4(value) {
4844
+ function parseJsonArray5(value) {
4336
4845
  try {
4337
4846
  const parsed = JSON.parse(value);
4338
4847
  return Array.isArray(parsed) ? parsed.filter((item) => typeof item === "string") : [];
@@ -4349,7 +4858,7 @@ function rowToCodeChunk(row) {
4349
4858
  startLine: row.start_line,
4350
4859
  endLine: row.end_line,
4351
4860
  sanitizedText: row.sanitized_text,
4352
- symbols: parseJsonArray4(row.symbols_json),
4861
+ symbols: parseJsonArray5(row.symbols_json),
4353
4862
  contentHash: row.content_hash,
4354
4863
  updatedAt: row.updated_at,
4355
4864
  bm25: row.bm25 ?? void 0
@@ -4398,8 +4907,8 @@ function textMatch3(chunk, input) {
4398
4907
  );
4399
4908
  const haystack = `${chunk.sanitizedText} ${chunk.filePath} ${chunk.symbols.join(" ")}`.toLowerCase();
4400
4909
  const overlap = tokens.length ? tokens.filter((token) => haystack.includes(token.toLowerCase())).length / tokens.length : 0;
4401
- const bm25Signal = chunk.bm25 === void 0 ? 0 : Math.max(0.25, Math.min(1, 1 / (1 + Math.abs(chunk.bm25))));
4402
- return Math.max(overlap, bm25Signal);
4910
+ const bm25Signal = chunk.bm25 === void 0 ? 0 : Math.max(0, Math.min(1, 1 / (1 + Math.abs(chunk.bm25))));
4911
+ return chunk.bm25 === void 0 ? overlap : Number((0.65 * overlap + 0.35 * bm25Signal).toFixed(4));
4403
4912
  }
4404
4913
  function recencyScore2(chunk) {
4405
4914
  const timestamp = Date.parse(chunk.updatedAt);
@@ -4497,7 +5006,7 @@ function rankCodeChunks(db, input) {
4497
5006
 
4498
5007
  // src/retrieval/architecture-ranker.ts
4499
5008
  import path13 from "path";
4500
- function parseJsonArray5(value) {
5009
+ function parseJsonArray6(value) {
4501
5010
  try {
4502
5011
  const parsed = JSON.parse(value);
4503
5012
  return Array.isArray(parsed) ? parsed.filter((item) => typeof item === "string") : [];
@@ -4521,8 +5030,8 @@ function rowToPattern(row) {
4521
5030
  name: row.name,
4522
5031
  summary: row.summary_sanitized,
4523
5032
  sanitizedSummary: row.summary_sanitized,
4524
- sourceFiles: parseJsonArray5(row.source_files_json),
4525
- symbols: parseJsonArray5(row.symbols_json),
5033
+ sourceFiles: parseJsonArray6(row.source_files_json),
5034
+ symbols: parseJsonArray6(row.symbols_json),
4526
5035
  evidence: parseEvidence(row.evidence_json),
4527
5036
  confidence: row.confidence,
4528
5037
  createdAt: row.created_at,
@@ -4638,7 +5147,7 @@ function rankArchitecturePatterns(db, input) {
4638
5147
 
4639
5148
  // src/retrieval/test-ranker.ts
4640
5149
  import path14 from "path";
4641
- function parseJsonArray6(value) {
5150
+ function parseJsonArray7(value) {
4642
5151
  if (!value) return [];
4643
5152
  try {
4644
5153
  const parsed = JSON.parse(value);
@@ -4651,7 +5160,7 @@ function baseStem(filePath) {
4651
5160
  return path14.posix.basename(filePath).replace(/\.(test|spec)\.[^.]+$/i, "").replace(/\.[^.]+$/i, "").toLowerCase();
4652
5161
  }
4653
5162
  function rowToRanked(row, input) {
4654
- const symbols = parseJsonArray6(row.symbols_json);
5163
+ const symbols = parseJsonArray7(row.symbols_json);
4655
5164
  const text = row.sanitized_text ?? "";
4656
5165
  const matchedSymbols = (input.symbols ?? []).filter((symbol) => {
4657
5166
  const lower = symbol.toLowerCase();
@@ -4721,7 +5230,7 @@ function rankRelevantTests(db, input) {
4721
5230
 
4722
5231
  // src/retrieval/regression-ranker.ts
4723
5232
  import path15 from "path";
4724
- function parseJsonArray7(value) {
5233
+ function parseJsonArray8(value) {
4725
5234
  try {
4726
5235
  const parsed = JSON.parse(value);
4727
5236
  return Array.isArray(parsed) ? parsed.filter((item) => typeof item === "string") : [];
@@ -4736,12 +5245,12 @@ function rowToEvent(row) {
4736
5245
  prNumber: row.pr_number,
4737
5246
  prUrl: row.pr_url,
4738
5247
  summary: row.summary_sanitized,
4739
- filePaths: parseJsonArray7(row.file_paths_json),
4740
- symbols: parseJsonArray7(row.symbols_json),
4741
- testPaths: parseJsonArray7(row.test_paths_json),
4742
- authors: parseJsonArray7(row.authors_json),
4743
- labels: parseJsonArray7(row.labels_json),
4744
- signals: parseJsonArray7(row.signals_json),
5248
+ filePaths: parseJsonArray8(row.file_paths_json),
5249
+ symbols: parseJsonArray8(row.symbols_json),
5250
+ testPaths: parseJsonArray8(row.test_paths_json),
5251
+ authors: parseJsonArray8(row.authors_json),
5252
+ labels: parseJsonArray8(row.labels_json),
5253
+ signals: parseJsonArray8(row.signals_json),
4745
5254
  createdAt: row.created_at,
4746
5255
  mergedAt: row.merged_at ?? void 0,
4747
5256
  confidence: row.confidence
@@ -6141,7 +6650,7 @@ function syncPlaybooksToDatabase(db, cwd) {
6141
6650
  }
6142
6651
 
6143
6652
  // src/retrieval/onboarding.ts
6144
- function parseJsonArray8(value) {
6653
+ function parseJsonArray9(value) {
6145
6654
  try {
6146
6655
  const parsed = JSON.parse(value);
6147
6656
  return Array.isArray(parsed) ? parsed.filter((item) => typeof item === "string") : [];
@@ -6174,7 +6683,7 @@ function riskyModules(db) {
6174
6683
  ORDER BY confidence DESC, COALESCE(merged_at, created_at) DESC
6175
6684
  LIMIT 20`
6176
6685
  ).all();
6177
- return [...new Set(rows.flatMap((row) => parseJsonArray8(row.file_paths_json)))].slice(0, 10);
6686
+ return [...new Set(rows.flatMap((row) => parseJsonArray9(row.file_paths_json)))].slice(0, 10);
6178
6687
  }
6179
6688
  function relatedTests(db, files) {
6180
6689
  if (files.length === 0) {
@@ -6253,6 +6762,7 @@ import crypto8 from "crypto";
6253
6762
  import fs7 from "fs";
6254
6763
  import path18 from "path";
6255
6764
  var ANCHOR_EVALS_FILE = "anchor.evals.json";
6765
+ var DEFAULT_EVAL_K = 8;
6256
6766
  function evalsPath(cwd) {
6257
6767
  return path18.join(cwd, ANCHOR_EVALS_FILE);
6258
6768
  }
@@ -6351,6 +6861,7 @@ function runRetrievalEvals(db, cwd) {
6351
6861
  initializeSchema(db);
6352
6862
  const filePath = evalsPath(cwd);
6353
6863
  const evalFile = readEvalFile(cwd);
6864
+ const k = DEFAULT_EVAL_K;
6354
6865
  const results = evalFile.evals.map((item) => {
6355
6866
  const context = buildAnchorContextResult(db, cwd, {
6356
6867
  task: item.task,
@@ -6361,6 +6872,9 @@ function runRetrievalEvals(db, cwd) {
6361
6872
  ...Array.isArray(context.metadata.items) ? context.metadata.items : [],
6362
6873
  ...Array.isArray(context.metadata.teamRules) ? context.metadata.teamRules : []
6363
6874
  ];
6875
+ const rankedPrs = uniqueStrings(
6876
+ metadataItems.map((metadata) => metadata.prNumber).filter((prNumber) => typeof prNumber === "number").map(String)
6877
+ ).map(Number);
6364
6878
  const foundPrs = uniqueStrings(
6365
6879
  metadataItems.map((metadata) => metadata.prNumber).filter((prNumber) => typeof prNumber === "number").map(String)
6366
6880
  ).map(Number);
@@ -6371,6 +6885,23 @@ function runRetrievalEvals(db, cwd) {
6371
6885
  const missingCategories = item.expectedCategories.filter(
6372
6886
  (category) => !foundCategories.includes(category)
6373
6887
  );
6888
+ const expectedPrRanks = item.expectedPrs.map((prNumber) => {
6889
+ const index = rankedPrs.indexOf(prNumber);
6890
+ return {
6891
+ prNumber,
6892
+ rank: index >= 0 ? index + 1 : void 0
6893
+ };
6894
+ });
6895
+ const topK = rankedPrs.slice(0, k);
6896
+ const relevantInTopK = item.expectedPrs.filter((prNumber) => topK.includes(prNumber));
6897
+ const precisionAtK2 = k > 0 ? Number((relevantInTopK.length / k).toFixed(4)) : 0;
6898
+ const recallAtK2 = item.expectedPrs.length > 0 ? Number((relevantInTopK.length / item.expectedPrs.length).toFixed(4)) : 0;
6899
+ const reciprocalRank = (() => {
6900
+ if (item.expectedPrs.length === 0) return 0;
6901
+ const firstRank = expectedPrRanks.map((entry) => entry.rank).filter((rank) => typeof rank === "number").sort((a, b) => a - b)[0];
6902
+ if (!firstRank) return 0;
6903
+ return Number((1 / firstRank).toFixed(4));
6904
+ })();
6374
6905
  return {
6375
6906
  id: item.id,
6376
6907
  task: item.task,
@@ -6378,18 +6909,35 @@ function runRetrievalEvals(db, cwd) {
6378
6909
  expectedPrs: item.expectedPrs,
6379
6910
  foundPrs,
6380
6911
  missingPrs,
6912
+ expectedPrRanks,
6381
6913
  expectedCategories: item.expectedCategories,
6382
6914
  foundCategories,
6383
- missingCategories
6915
+ missingCategories,
6916
+ precisionAtK: precisionAtK2,
6917
+ recallAtK: recallAtK2,
6918
+ reciprocalRank
6384
6919
  };
6385
6920
  });
6386
6921
  const passed = results.filter((result) => result.passed).length;
6922
+ const precisionAtK = results.length > 0 ? Number(
6923
+ (results.reduce((sum, result) => sum + result.precisionAtK, 0) / results.length).toFixed(4)
6924
+ ) : 0;
6925
+ const recallAtK = results.length > 0 ? Number(
6926
+ (results.reduce((sum, result) => sum + result.recallAtK, 0) / results.length).toFixed(4)
6927
+ ) : 0;
6928
+ const mrr = results.length > 0 ? Number(
6929
+ (results.reduce((sum, result) => sum + result.reciprocalRank, 0) / results.length).toFixed(4)
6930
+ ) : 0;
6387
6931
  return {
6388
6932
  ok: passed === results.length,
6389
6933
  path: filePath,
6390
6934
  total: results.length,
6391
6935
  passed,
6392
6936
  failed: results.length - passed,
6937
+ precisionAtK,
6938
+ recallAtK,
6939
+ mrr,
6940
+ k,
6393
6941
  results
6394
6942
  };
6395
6943
  }
@@ -8744,7 +9292,7 @@ function packageRootForSpecifier(specifier) {
8744
9292
  if (normalized.startsWith("@") && parts.length >= 2) return `${parts[0]}/${parts[1]}`;
8745
9293
  return parts[0] ?? "";
8746
9294
  }
8747
- function parseJsonArray9(value) {
9295
+ function parseJsonArray10(value) {
8748
9296
  try {
8749
9297
  const parsed = JSON.parse(value);
8750
9298
  return Array.isArray(parsed) ? parsed.filter((item) => typeof item === "string") : [];
@@ -8878,7 +9426,7 @@ function rebuildOrgGraph(db, config, baseDirOrOptions) {
8878
9426
  `imports ${sanitizeHistoricalText(rootSpecifier || item.specifier)}`
8879
9427
  )
8880
9428
  ],
8881
- confidence: parseJsonArray9(item.imported_symbols_json).length > 0 ? 0.88 : 0.76
9429
+ confidence: parseJsonArray10(item.imported_symbols_json).length > 0 ? 0.88 : 0.76
8882
9430
  });
8883
9431
  }
8884
9432
  if (shouldEmitProgress3(index + 1, imports.length)) {
@@ -9214,7 +9762,11 @@ async function indexOrgRepos(db, config, options = {}) {
9214
9762
  command,
9215
9763
  totalRepos: repos.length
9216
9764
  });
9217
- for (const [repoIndex, repo] of repos.entries()) {
9765
+ const maxConcurrency = Math.max(1, Math.min(options.concurrency ?? 3, 4));
9766
+ let nextRepoIndex = 0;
9767
+ const processRepo = async (repoIndex) => {
9768
+ const repo = repos[repoIndex];
9769
+ if (!repo) return;
9218
9770
  const repoPosition = repoIndex + 1;
9219
9771
  const localPath = orgRepoLocalPath(config.org, repo, options.baseDir);
9220
9772
  const repoStartedAt = (/* @__PURE__ */ new Date()).toISOString();
@@ -9465,7 +10017,18 @@ async function indexOrgRepos(db, config, options = {}) {
9465
10017
  error: message
9466
10018
  });
9467
10019
  }
9468
- }
10020
+ };
10021
+ const worker = async () => {
10022
+ while (true) {
10023
+ const repoIndex = nextRepoIndex;
10024
+ nextRepoIndex += 1;
10025
+ if (repoIndex >= repos.length) return;
10026
+ await processRepo(repoIndex);
10027
+ }
10028
+ };
10029
+ await Promise.all(
10030
+ Array.from({ length: Math.min(maxConcurrency, repos.length) }, () => worker())
10031
+ );
9469
10032
  let graph;
9470
10033
  if (options.noGraph) {
9471
10034
  const counts = getOrgGraphCounts(db, config.org);
@@ -9510,6 +10073,7 @@ async function indexOrgRepos(db, config, options = {}) {
9510
10073
  codeFilesIndexed: results.reduce((sum, result) => sum + (result.code?.indexedFiles ?? 0), 0),
9511
10074
  failures: results.map((result) => result.error).concat(graph.error ? [graph.error] : []).filter((error) => Boolean(error))
9512
10075
  });
10076
+ runDatabaseMaintenance(db);
9513
10077
  emit({
9514
10078
  stage: "org_sync_completed",
9515
10079
  org: config.org,
@@ -10397,6 +10961,7 @@ export {
10397
10961
  anchorMcpEntry,
10398
10962
  architectureFilesFromDiff,
10399
10963
  buildAnchorContextResult,
10964
+ buildArchitectureFromIndexedData,
10400
10965
  buildArchitectureIndex,
10401
10966
  buildArchitectureMap,
10402
10967
  buildFtsQuery,
@@ -10436,6 +11001,7 @@ export {
10436
11001
  detectTestCommands,
10437
11002
  detectTestCommandsForFile,
10438
11003
  discoverCodeFiles,
11004
+ discoverCodeFilesByPaths,
10439
11005
  emptyCodeIndexSummary,
10440
11006
  ensureAnchorGitExclude,
10441
11007
  ensureCursorConfig,
@@ -10464,6 +11030,7 @@ export {
10464
11030
  getAnchorIndexHealth,
10465
11031
  getArchitectureContext,
10466
11032
  getArchitectureMapContext,
11033
+ getCodeIndexStateForRepo,
10467
11034
  getGitHubRateLimitDelayMs,
10468
11035
  getGraphQLFetchCheckpoint,
10469
11036
  getIndexStatus,
@@ -10474,12 +11041,19 @@ export {
10474
11041
  getOrgRepoState,
10475
11042
  getOrgStatus,
10476
11043
  getPlaybook,
11044
+ getRepoCodeChunkSymbols,
11045
+ getRepoCodeCounts,
11046
+ getRepoCodeFileHashes,
11047
+ getRepoCodeFiles,
11048
+ getRepoCodeImports,
11049
+ getRepoTestChunks,
10477
11050
  getSemanticStatus,
10478
11051
  getSuggestedPromptTexts,
10479
11052
  getSuggestedPrompts,
10480
11053
  getWisdomCategoryCounts,
10481
11054
  githubAuthFixMessage,
10482
11055
  graphQLFetchCheckpointScope,
11056
+ hasDirtyWorkingTree,
10483
11057
  hasHighSignalLanguage,
10484
11058
  indexCodebase,
10485
11059
  indexOrgRepos,
@@ -10515,6 +11089,7 @@ export {
10515
11089
  orgRoot,
10516
11090
  paginateWithGitHubRateLimit,
10517
11091
  parseGitHubRemote,
11092
+ planIncrementalCodeIndex,
10518
11093
  planTask,
10519
11094
  plannedOrgCloneCommands,
10520
11095
  rankArchitecturePatterns,
@@ -10523,6 +11098,8 @@ export {
10523
11098
  rankRelevantTests,
10524
11099
  rankTeamRules,
10525
11100
  rankWisdomUnits,
11101
+ readDiscoveredCodeFileContent,
11102
+ readGitHeadCommit,
10526
11103
  readOrgHeartbeat,
10527
11104
  rebuildOrgGraph,
10528
11105
  recordFeedback,
@@ -10543,6 +11120,7 @@ export {
10543
11120
  resolvePullRequestFetchLimit,
10544
11121
  reviewDiff,
10545
11122
  runAnchorCi,
11123
+ runDatabaseMaintenance,
10546
11124
  runDoctor,
10547
11125
  runRetrievalEvals,
10548
11126
  sanitizeHistoricalText,
@@ -10557,6 +11135,7 @@ export {
10557
11135
  syncOrgConfigToDatabase,
10558
11136
  syncPlaybooksToDatabase,
10559
11137
  tokenizeSearchText,
11138
+ touchCodeIndexState,
10560
11139
  truncateText,
10561
11140
  uniqueStrings,
10562
11141
  updateGitHubGraphQLRateLimitState,