@pratik7368patil/anchor-core 0.1.30 → 0.1.32

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -175,8 +175,11 @@ function canonicalizeText(text) {
175
175
  return text.toLowerCase().replace(/https?:\/\/\S+/g, "").replace(/[^a-z0-9_./ -]/g, " ").replace(/\s+/g, " ").trim();
176
176
  }
177
177
  function tokenizeSearchText(text, maxTokens = 32) {
178
- const tokens = text.toLowerCase().match(/[a-z0-9_./-]{3,}/g);
179
- return uniqueStrings(tokens ?? []).slice(0, maxTokens);
178
+ const shortSignalTokens = /* @__PURE__ */ new Set(["id", "db", "api", "key", "sql", "jwt", "ui", "ux"]);
179
+ const tokens = text.toLowerCase().match(/[a-z0-9_./-]{2,}/g);
180
+ return uniqueStrings(
181
+ (tokens ?? []).filter((token) => token.length >= 3 || shortSignalTokens.has(token))
182
+ ).slice(0, maxTokens);
180
183
  }
181
184
 
182
185
  // src/security/redact-secrets.ts
@@ -379,7 +382,8 @@ CREATE TABLE IF NOT EXISTS code_index_state (
379
382
  last_indexed_at TEXT NOT NULL,
380
383
  indexed_files INTEGER NOT NULL,
381
384
  code_chunks INTEGER NOT NULL,
382
- skipped_files INTEGER NOT NULL
385
+ skipped_files INTEGER NOT NULL,
386
+ last_indexed_commit TEXT
383
387
  );
384
388
 
385
389
  CREATE TABLE IF NOT EXISTS code_imports (
@@ -1757,6 +1761,14 @@ function applyPerformancePragmas(db) {
1757
1761
  db.pragma("mmap_size = 268435456");
1758
1762
  db.pragma("temp_store = MEMORY");
1759
1763
  }
1764
+ function runDatabaseMaintenance(db) {
1765
+ try {
1766
+ db.exec("ANALYZE");
1767
+ db.pragma("optimize");
1768
+ db.pragma("wal_checkpoint(TRUNCATE)");
1769
+ } catch {
1770
+ }
1771
+ }
1760
1772
  function initializeSchema(db) {
1761
1773
  db.exec(SCHEMA_SQL);
1762
1774
  ensureColumn(db, "sync_state", "history_coverage", "TEXT");
@@ -1770,6 +1782,7 @@ function initializeSchema(db) {
1770
1782
  ensureColumn(db, "sync_state", "graphql_cursor_reset_at", "TEXT");
1771
1783
  ensureColumn(db, "sync_state", "graphql_cursor_reason", "TEXT");
1772
1784
  ensureColumn(db, "sync_state", "graphql_cursor_updated_at", "TEXT");
1785
+ ensureColumn(db, "code_index_state", "last_indexed_commit", "TEXT");
1773
1786
  }
1774
1787
  function ensureColumn(db, tableName, columnName, definition) {
1775
1788
  const columns = db.prepare(`PRAGMA table_info(${tableName})`).all();
@@ -1819,10 +1832,153 @@ function ensureRepository(db, fullName) {
1819
1832
  if (!row) throw new Error(`Failed to create repository row for ${fullName}`);
1820
1833
  return row.id;
1821
1834
  }
1835
+ function getRepositoryId(db, fullName) {
1836
+ const row = db.prepare("SELECT id FROM repositories WHERE full_name = ?").get(fullName);
1837
+ return row?.id;
1838
+ }
1822
1839
  function getLastSyncTime(db, repo) {
1823
1840
  const row = db.prepare("SELECT last_sync_at FROM sync_state WHERE repo = ?").get(repo);
1824
1841
  return row?.last_sync_at ?? void 0;
1825
1842
  }
1843
+ function getCodeIndexStateForRepo(db, repo) {
1844
+ initializeSchema(db);
1845
+ const row = db.prepare(
1846
+ `SELECT repo, last_indexed_at, indexed_files, code_chunks, skipped_files, last_indexed_commit
1847
+ FROM code_index_state
1848
+ WHERE repo = ?`
1849
+ ).get(repo);
1850
+ if (!row?.repo) return void 0;
1851
+ return {
1852
+ repo: row.repo,
1853
+ lastIndexedAt: row.last_indexed_at ?? void 0,
1854
+ indexedFiles: row.indexed_files ?? 0,
1855
+ codeChunks: row.code_chunks ?? 0,
1856
+ skippedFiles: row.skipped_files ?? 0,
1857
+ lastIndexedCommit: row.last_indexed_commit ?? void 0
1858
+ };
1859
+ }
1860
+ function getRepoCodeFileHashes(db, repo) {
1861
+ initializeSchema(db);
1862
+ const repoId = getRepositoryId(db, repo);
1863
+ if (!repoId) return /* @__PURE__ */ new Map();
1864
+ const rows = db.prepare("SELECT path, content_hash FROM code_files WHERE repo_id = ?").all(repoId);
1865
+ return new Map(rows.map((row) => [row.path, row.content_hash]));
1866
+ }
1867
+ function getRepoCodeFiles(db, repo) {
1868
+ initializeSchema(db);
1869
+ const repoId = getRepositoryId(db, repo);
1870
+ if (!repoId) return [];
1871
+ const rows = db.prepare(
1872
+ `SELECT path, language, size_bytes, content_hash, updated_at
1873
+ FROM code_files
1874
+ WHERE repo_id = ?`
1875
+ ).all(repoId);
1876
+ return rows.map((row) => ({
1877
+ repo,
1878
+ path: row.path,
1879
+ language: row.language ?? void 0,
1880
+ sizeBytes: row.size_bytes,
1881
+ contentHash: row.content_hash,
1882
+ updatedAt: row.updated_at
1883
+ }));
1884
+ }
1885
+ function parseJsonArray3(value) {
1886
+ try {
1887
+ const parsed = JSON.parse(value);
1888
+ return Array.isArray(parsed) ? parsed.filter((item) => typeof item === "string") : [];
1889
+ } catch {
1890
+ return [];
1891
+ }
1892
+ }
1893
+ function getRepoCodeChunkSymbols(db, repo) {
1894
+ initializeSchema(db);
1895
+ const repoId = getRepositoryId(db, repo);
1896
+ if (!repoId) return [];
1897
+ const rows = db.prepare(
1898
+ `SELECT id, file_path, language, start_line, end_line, symbols_json, content_hash, updated_at
1899
+ FROM code_chunks
1900
+ WHERE repo_id = ?`
1901
+ ).all(repoId);
1902
+ return rows.map((row) => ({
1903
+ id: row.id,
1904
+ repo,
1905
+ filePath: row.file_path,
1906
+ language: row.language ?? void 0,
1907
+ startLine: row.start_line,
1908
+ endLine: row.end_line,
1909
+ sanitizedText: "",
1910
+ symbols: parseJsonArray3(row.symbols_json),
1911
+ contentHash: row.content_hash,
1912
+ updatedAt: row.updated_at
1913
+ }));
1914
+ }
1915
+ function getRepoTestChunks(db, repo) {
1916
+ initializeSchema(db);
1917
+ const repoId = getRepositoryId(db, repo);
1918
+ if (!repoId) return [];
1919
+ const rows = db.prepare(
1920
+ `SELECT id, file_path, language, start_line, end_line, sanitized_text, symbols_json, content_hash, updated_at
1921
+ FROM code_chunks
1922
+ WHERE repo_id = ? AND file_path IN (
1923
+ SELECT path FROM test_files WHERE repo_id = ?
1924
+ )`
1925
+ ).all(repoId, repoId);
1926
+ return rows.map((row) => ({
1927
+ id: row.id,
1928
+ repo,
1929
+ filePath: row.file_path,
1930
+ language: row.language ?? void 0,
1931
+ startLine: row.start_line,
1932
+ endLine: row.end_line,
1933
+ sanitizedText: row.sanitized_text,
1934
+ symbols: parseJsonArray3(row.symbols_json),
1935
+ contentHash: row.content_hash,
1936
+ updatedAt: row.updated_at
1937
+ }));
1938
+ }
1939
+ function getRepoCodeImports(db, repo) {
1940
+ initializeSchema(db);
1941
+ const repoId = getRepositoryId(db, repo);
1942
+ if (!repoId) return [];
1943
+ const rows = db.prepare(
1944
+ `SELECT source_path, specifier, imported_path, imported_symbols_json, kind
1945
+ FROM code_imports
1946
+ WHERE repo_id = ?`
1947
+ ).all(repoId);
1948
+ return rows.map((row) => ({
1949
+ repo,
1950
+ sourcePath: row.source_path,
1951
+ specifier: row.specifier,
1952
+ importedPath: row.imported_path ?? void 0,
1953
+ importedSymbols: parseJsonArray3(row.imported_symbols_json),
1954
+ kind: row.kind
1955
+ }));
1956
+ }
1957
+ function getRepoCodeCounts(db, repo) {
1958
+ initializeSchema(db);
1959
+ const repoId = getRepositoryId(db, repo);
1960
+ if (!repoId) return { files: 0, chunks: 0 };
1961
+ const files = db.prepare("SELECT COUNT(*) AS count FROM code_files WHERE repo_id = ?").get(repoId).count;
1962
+ const chunks = db.prepare("SELECT COUNT(*) AS count FROM code_chunks WHERE repo_id = ?").get(repoId).count;
1963
+ return { files, chunks };
1964
+ }
1965
+ function touchCodeIndexState(db, repo, skippedFiles, currentCommit2) {
1966
+ initializeSchema(db);
1967
+ const counts = getRepoCodeCounts(db, repo);
1968
+ const now = (/* @__PURE__ */ new Date()).toISOString();
1969
+ db.prepare(
1970
+ `INSERT INTO code_index_state
1971
+ (repo, last_indexed_at, indexed_files, code_chunks, skipped_files, last_indexed_commit)
1972
+ VALUES (?, ?, ?, ?, ?, ?)
1973
+ ON CONFLICT(repo) DO UPDATE SET
1974
+ last_indexed_at = excluded.last_indexed_at,
1975
+ indexed_files = excluded.indexed_files,
1976
+ code_chunks = excluded.code_chunks,
1977
+ skipped_files = excluded.skipped_files,
1978
+ last_indexed_commit = excluded.last_indexed_commit`
1979
+ ).run(repo, now, counts.files, counts.chunks, skippedFiles, currentCommit2 ?? null);
1980
+ return counts;
1981
+ }
1826
1982
  function updateSyncState(db, repo, lastIndexedPr, metadata = {}) {
1827
1983
  const now = (/* @__PURE__ */ new Date()).toISOString();
1828
1984
  db.prepare(
@@ -2115,13 +2271,24 @@ function replaceCodeIndex(db, repo, codeFiles, codeChunks, skippedFiles, cwd, ar
2115
2271
  initializeSchema(db);
2116
2272
  const repoId = ensureRepository(db, repo);
2117
2273
  const now = (/* @__PURE__ */ new Date()).toISOString();
2118
- options.onProgress?.({ stage: "writing_code_index", repo, phase: "Inferring test awareness" });
2119
- const testAwareness = inferTestAwareness(repo, codeFiles, codeChunks, {
2274
+ const deletedPaths = options.deletedPaths ?? [];
2275
+ const changedImports = options.changedImports;
2276
+ const testAwareness = options.testAwareness ?? inferTestAwareness(repo, codeFiles, codeChunks, {
2120
2277
  onProgress: options.onProgress
2121
2278
  });
2122
2279
  options.onProgress?.({ stage: "writing_code_index", repo, phase: "Writing code index" });
2280
+ const changedPaths = [...new Set(codeFiles.map((file) => file.path))];
2281
+ const affectedPaths = [.../* @__PURE__ */ new Set([...changedPaths, ...deletedPaths])];
2123
2282
  const transaction = db.transaction(() => {
2124
- const existingChunkRowIds = db.prepare("SELECT rowid FROM code_chunks WHERE repo_id = ?").all(repoId);
2283
+ let existingChunkRowIds = [];
2284
+ if (affectedPaths.length > 0) {
2285
+ const placeholders = affectedPaths.map(() => "?").join(", ");
2286
+ existingChunkRowIds = db.prepare(
2287
+ `SELECT rowid
2288
+ FROM code_chunks
2289
+ WHERE repo_id = ? AND file_path IN (${placeholders})`
2290
+ ).all(repoId, ...affectedPaths);
2291
+ }
2125
2292
  const existingPatternRowIds = db.prepare("SELECT rowid FROM architecture_patterns WHERE repo_id = ?").all(repoId);
2126
2293
  options.onProgress?.({
2127
2294
  stage: "deleting_existing_code_index",
@@ -2141,11 +2308,51 @@ function replaceCodeIndex(db, repo, codeFiles, codeChunks, skippedFiles, cwd, ar
2141
2308
  chunks: existingChunkRowIds.length
2142
2309
  })
2143
2310
  );
2144
- db.prepare("DELETE FROM code_chunks WHERE repo_id = ?").run(repoId);
2145
- db.prepare("DELETE FROM code_files WHERE repo_id = ?").run(repoId);
2146
- db.prepare("DELETE FROM test_links WHERE repo_id = ? AND reason != 'PR co-change'").run(repoId);
2147
- db.prepare("DELETE FROM test_files WHERE repo_id = ?").run(repoId);
2311
+ if (affectedPaths.length > 0) {
2312
+ const placeholders = affectedPaths.map(() => "?").join(", ");
2313
+ db.prepare(
2314
+ `DELETE FROM code_chunks
2315
+ WHERE repo_id = ? AND file_path IN (${placeholders})`
2316
+ ).run(repoId, ...affectedPaths);
2317
+ db.prepare(
2318
+ `DELETE FROM code_files
2319
+ WHERE repo_id = ? AND path IN (${placeholders})`
2320
+ ).run(repoId, ...affectedPaths);
2321
+ db.prepare(
2322
+ `DELETE FROM test_links
2323
+ WHERE repo_id = ?
2324
+ AND reason != 'PR co-change'
2325
+ AND (source_path IN (${placeholders}) OR test_path IN (${placeholders}))`
2326
+ ).run(repoId, ...affectedPaths, ...affectedPaths);
2327
+ db.prepare(
2328
+ `DELETE FROM test_files
2329
+ WHERE repo_id = ? AND path IN (${placeholders})`
2330
+ ).run(repoId, ...affectedPaths);
2331
+ if (changedImports) {
2332
+ db.prepare(
2333
+ `DELETE FROM code_imports
2334
+ WHERE repo_id = ? AND source_path IN (${placeholders})`
2335
+ ).run(repoId, ...affectedPaths);
2336
+ }
2337
+ }
2148
2338
  deleteExistingArchitectureData(db, repoId, repo, existingPatternRowIds, options);
2339
+ if (changedImports) {
2340
+ const insertImport = db.prepare(
2341
+ `INSERT INTO code_imports
2342
+ (repo_id, source_path, specifier, imported_path, imported_symbols_json, kind)
2343
+ VALUES (?, ?, ?, ?, ?, ?)`
2344
+ );
2345
+ for (const item of changedImports) {
2346
+ insertImport.run(
2347
+ repoId,
2348
+ item.sourcePath,
2349
+ item.specifier,
2350
+ item.importedPath ?? null,
2351
+ JSON.stringify(item.importedSymbols),
2352
+ item.kind
2353
+ );
2354
+ }
2355
+ }
2149
2356
  const insertFile = db.prepare(
2150
2357
  `INSERT INTO code_files
2151
2358
  (repo_id, path, language, size_bytes, content_hash, updated_at)
@@ -2237,18 +2444,22 @@ function replaceCodeIndex(db, repo, codeFiles, codeChunks, skippedFiles, cwd, ar
2237
2444
  }
2238
2445
  }
2239
2446
  insertTestAwareness(db, repoId, repo, testAwareness.testFiles, testAwareness.testLinks, options);
2240
- insertArchitectureData(db, repoId, repo, architecture, options);
2447
+ insertArchitectureData(db, repoId, repo, architecture, options, !changedImports);
2241
2448
  insertArchitectureMapEdges(db, repoId, repo, architecture, testAwareness.testLinks, options);
2242
2449
  options.onProgress?.({ stage: "writing_code_index", repo, phase: "Updating index state" });
2450
+ const totalFileCount = db.prepare("SELECT COUNT(*) AS count FROM code_files WHERE repo_id = ?").get(repoId).count;
2451
+ const totalChunkCount = db.prepare("SELECT COUNT(*) AS count FROM code_chunks WHERE repo_id = ?").get(repoId).count;
2243
2452
  db.prepare(
2244
- `INSERT INTO code_index_state (repo, last_indexed_at, indexed_files, code_chunks, skipped_files)
2245
- VALUES (?, ?, ?, ?, ?)
2453
+ `INSERT INTO code_index_state
2454
+ (repo, last_indexed_at, indexed_files, code_chunks, skipped_files, last_indexed_commit)
2455
+ VALUES (?, ?, ?, ?, ?, ?)
2246
2456
  ON CONFLICT(repo) DO UPDATE SET
2247
2457
  last_indexed_at = excluded.last_indexed_at,
2248
2458
  indexed_files = excluded.indexed_files,
2249
2459
  code_chunks = excluded.code_chunks,
2250
- skipped_files = excluded.skipped_files`
2251
- ).run(repo, now, codeFiles.length, codeChunks.length, skippedFiles);
2460
+ skipped_files = excluded.skipped_files,
2461
+ last_indexed_commit = excluded.last_indexed_commit`
2462
+ ).run(repo, now, totalFileCount, totalChunkCount, skippedFiles, options.currentCommit ?? null);
2252
2463
  db.prepare(
2253
2464
  `INSERT INTO architecture_index_state (repo, last_indexed_at, components, patterns, imports)
2254
2465
  VALUES (?, ?, ?, ?, ?)
@@ -2266,9 +2477,10 @@ function replaceCodeIndex(db, repo, codeFiles, codeChunks, skippedFiles, cwd, ar
2266
2477
  );
2267
2478
  });
2268
2479
  transaction();
2480
+ const counts = getRepoCodeCounts(db, repo);
2269
2481
  return {
2270
- indexedFiles: codeFiles.length,
2271
- codeChunksCreated: codeChunks.length,
2482
+ indexedFiles: counts.files,
2483
+ codeChunksCreated: counts.chunks,
2272
2484
  testFilesIndexed: testAwareness.testFiles.length,
2273
2485
  testLinksCreated: testAwareness.testLinks.length,
2274
2486
  architectureComponentsIndexed: architecture.components.length,
@@ -2293,40 +2505,41 @@ function deleteExistingArchitectureData(db, repoId, repo, patternRowIds, options
2293
2505
  );
2294
2506
  db.prepare("DELETE FROM architecture_patterns WHERE repo_id = ?").run(repoId);
2295
2507
  db.prepare("DELETE FROM architecture_components WHERE repo_id = ?").run(repoId);
2296
- db.prepare("DELETE FROM code_imports WHERE repo_id = ?").run(repoId);
2297
2508
  db.prepare("DELETE FROM architecture_map_edges WHERE repo_id = ?").run(repoId);
2298
2509
  }
2299
- function insertArchitectureData(db, repoId, repo, architecture, options = {}) {
2300
- const insertImport = db.prepare(
2301
- `INSERT INTO code_imports
2302
- (repo_id, source_path, specifier, imported_path, imported_symbols_json, kind)
2303
- VALUES (?, ?, ?, ?, ?, ?)`
2304
- );
2305
- options.onProgress?.({
2306
- stage: "writing_architecture_data",
2307
- repo,
2308
- current: 0,
2309
- total: architecture.imports.length,
2310
- kind: "imports"
2311
- });
2312
- for (const [index, item] of architecture.imports.entries()) {
2313
- insertImport.run(
2314
- repoId,
2315
- item.sourcePath,
2316
- item.specifier,
2317
- item.importedPath ?? null,
2318
- JSON.stringify(item.importedSymbols),
2319
- item.kind
2510
+ function insertArchitectureData(db, repoId, repo, architecture, options = {}, includeImports = true) {
2511
+ if (includeImports) {
2512
+ const insertImport = db.prepare(
2513
+ `INSERT INTO code_imports
2514
+ (repo_id, source_path, specifier, imported_path, imported_symbols_json, kind)
2515
+ VALUES (?, ?, ?, ?, ?, ?)`
2320
2516
  );
2321
- const current = index + 1;
2322
- if (shouldEmitCodeWriteProgress(current, architecture.imports.length)) {
2323
- options.onProgress?.({
2324
- stage: "writing_architecture_data",
2325
- repo,
2326
- current,
2327
- total: architecture.imports.length,
2328
- kind: "imports"
2329
- });
2517
+ options.onProgress?.({
2518
+ stage: "writing_architecture_data",
2519
+ repo,
2520
+ current: 0,
2521
+ total: architecture.imports.length,
2522
+ kind: "imports"
2523
+ });
2524
+ for (const [index, item] of architecture.imports.entries()) {
2525
+ insertImport.run(
2526
+ repoId,
2527
+ item.sourcePath,
2528
+ item.specifier,
2529
+ item.importedPath ?? null,
2530
+ JSON.stringify(item.importedSymbols),
2531
+ item.kind
2532
+ );
2533
+ const current = index + 1;
2534
+ if (shouldEmitCodeWriteProgress(current, architecture.imports.length)) {
2535
+ options.onProgress?.({
2536
+ stage: "writing_architecture_data",
2537
+ repo,
2538
+ current,
2539
+ total: architecture.imports.length,
2540
+ kind: "imports"
2541
+ });
2542
+ }
2330
2543
  }
2331
2544
  }
2332
2545
  const insertComponent = db.prepare(
@@ -2482,19 +2695,36 @@ function insertPrCochangeTestLinks(db, repoId, filePaths) {
2482
2695
  }
2483
2696
  }
2484
2697
  function insertTestAwareness(db, repoId, repo, testFiles, testLinks, options = {}) {
2698
+ const dedupedTestFilesByPath = /* @__PURE__ */ new Map();
2699
+ for (const file of testFiles) dedupedTestFilesByPath.set(file.path, file);
2700
+ const dedupedTestFiles = [...dedupedTestFilesByPath.values()];
2701
+ const dedupedTestLinksByKey = /* @__PURE__ */ new Map();
2702
+ for (const link of testLinks) {
2703
+ const key = `${link.sourcePath}\0${link.testPath}\0${link.reason}`;
2704
+ const existing = dedupedTestLinksByKey.get(key);
2705
+ if (!existing || link.strength > existing.strength) {
2706
+ dedupedTestLinksByKey.set(key, link);
2707
+ }
2708
+ }
2709
+ const dedupedTestLinks = [...dedupedTestLinksByKey.values()];
2485
2710
  const insertTestFile = db.prepare(
2486
2711
  `INSERT INTO test_files
2487
2712
  (repo_id, path, language, size_bytes, content_hash, updated_at)
2488
- VALUES (?, ?, ?, ?, ?, ?)`
2713
+ VALUES (?, ?, ?, ?, ?, ?)
2714
+ ON CONFLICT(repo_id, path) DO UPDATE SET
2715
+ language = excluded.language,
2716
+ size_bytes = excluded.size_bytes,
2717
+ content_hash = excluded.content_hash,
2718
+ updated_at = excluded.updated_at`
2489
2719
  );
2490
2720
  options.onProgress?.({
2491
2721
  stage: "writing_test_awareness",
2492
2722
  repo,
2493
2723
  current: 0,
2494
- total: testFiles.length,
2724
+ total: dedupedTestFiles.length,
2495
2725
  kind: "test_files"
2496
2726
  });
2497
- for (const [index, file] of testFiles.entries()) {
2727
+ for (const [index, file] of dedupedTestFiles.entries()) {
2498
2728
  insertTestFile.run(
2499
2729
  repoId,
2500
2730
  file.path,
@@ -2504,36 +2734,38 @@ function insertTestAwareness(db, repoId, repo, testFiles, testLinks, options = {
2504
2734
  file.updatedAt
2505
2735
  );
2506
2736
  const current = index + 1;
2507
- if (shouldEmitCodeWriteProgress(current, testFiles.length)) {
2737
+ if (shouldEmitCodeWriteProgress(current, dedupedTestFiles.length)) {
2508
2738
  options.onProgress?.({
2509
2739
  stage: "writing_test_awareness",
2510
2740
  repo,
2511
2741
  current,
2512
- total: testFiles.length,
2742
+ total: dedupedTestFiles.length,
2513
2743
  kind: "test_files"
2514
2744
  });
2515
2745
  }
2516
2746
  }
2517
2747
  const insertTestLink = db.prepare(
2518
2748
  `INSERT INTO test_links (repo_id, source_path, test_path, reason, strength)
2519
- VALUES (?, ?, ?, ?, ?)`
2749
+ VALUES (?, ?, ?, ?, ?)
2750
+ ON CONFLICT(repo_id, source_path, test_path, reason) DO UPDATE SET
2751
+ strength = excluded.strength`
2520
2752
  );
2521
2753
  options.onProgress?.({
2522
2754
  stage: "writing_test_awareness",
2523
2755
  repo,
2524
2756
  current: 0,
2525
- total: testLinks.length,
2757
+ total: dedupedTestLinks.length,
2526
2758
  kind: "test_links"
2527
2759
  });
2528
- for (const [index, link] of testLinks.entries()) {
2760
+ for (const [index, link] of dedupedTestLinks.entries()) {
2529
2761
  insertTestLink.run(repoId, link.sourcePath, link.testPath, link.reason, link.strength);
2530
2762
  const current = index + 1;
2531
- if (shouldEmitCodeWriteProgress(current, testLinks.length)) {
2763
+ if (shouldEmitCodeWriteProgress(current, dedupedTestLinks.length)) {
2532
2764
  options.onProgress?.({
2533
2765
  stage: "writing_test_awareness",
2534
2766
  repo,
2535
2767
  current,
2536
- total: testLinks.length,
2768
+ total: dedupedTestLinks.length,
2537
2769
  kind: "test_links"
2538
2770
  });
2539
2771
  }
@@ -3057,18 +3289,7 @@ function createPattern(input) {
3057
3289
  };
3058
3290
  }
3059
3291
  function buildArchitectureIndex(repo, files, chunks, options = {}) {
3060
- const allPaths = files.map((file) => file.path);
3061
- const codePaths = new Set(allPaths);
3062
- const relatedTestIndex = buildRelatedTestIndex(allPaths);
3063
- const symbolSetsByPath = /* @__PURE__ */ new Map();
3064
- for (const chunk of chunks) {
3065
- const existing = symbolSetsByPath.get(chunk.filePath) ?? /* @__PURE__ */ new Set();
3066
- for (const symbol of chunk.symbols) {
3067
- if (existing.size >= 40) break;
3068
- existing.add(symbol);
3069
- }
3070
- symbolSetsByPath.set(chunk.filePath, existing);
3071
- }
3292
+ const codePaths = new Set(files.map((file) => file.path));
3072
3293
  const imports = [];
3073
3294
  options.onProgress?.({
3074
3295
  stage: "building_architecture_imports",
@@ -3091,6 +3312,20 @@ function buildArchitectureIndex(repo, files, chunks, options = {}) {
3091
3312
  });
3092
3313
  }
3093
3314
  }
3315
+ return buildArchitectureFromIndexedData(repo, files, chunks, imports, options);
3316
+ }
3317
+ function buildArchitectureFromIndexedData(repo, files, chunks, imports, options = {}) {
3318
+ const allPaths = files.map((file) => file.path);
3319
+ const relatedTestIndex = buildRelatedTestIndex(allPaths);
3320
+ const symbolSetsByPath = /* @__PURE__ */ new Map();
3321
+ for (const chunk of chunks) {
3322
+ const existing = symbolSetsByPath.get(chunk.filePath) ?? /* @__PURE__ */ new Set();
3323
+ for (const symbol of chunk.symbols) {
3324
+ if (existing.size >= 40) break;
3325
+ existing.add(symbol);
3326
+ }
3327
+ symbolSetsByPath.set(chunk.filePath, existing);
3328
+ }
3094
3329
  const importsByPath = /* @__PURE__ */ new Map();
3095
3330
  for (const item of imports) {
3096
3331
  const existing = importsByPath.get(item.sourcePath) ?? [];
@@ -3106,7 +3341,7 @@ function buildArchitectureIndex(repo, files, chunks, options = {}) {
3106
3341
  components: 0
3107
3342
  });
3108
3343
  for (const [index, file] of files.entries()) {
3109
- const area = classifyArchitectureArea(file.path, file.language, file.content);
3344
+ const area = classifyArchitectureArea(file.path, file.language);
3110
3345
  const fileImports = importsByPath.get(file.path) ?? [];
3111
3346
  const symbols = [...symbolSetsByPath.get(file.path) ?? []];
3112
3347
  components.push({
@@ -3320,15 +3555,134 @@ function discoverGitFiles(cwd) {
3320
3555
  });
3321
3556
  return output.split("\n").map((line) => normalizeGitPath(line.trim())).filter(Boolean);
3322
3557
  }
3558
+ function discoverGitUntrackedFiles(cwd) {
3559
+ const output = execFileSync3("git", ["ls-files", "--others", "--exclude-standard"], {
3560
+ cwd,
3561
+ encoding: "utf8",
3562
+ stdio: ["ignore", "pipe", "pipe"]
3563
+ });
3564
+ return output.split("\n").map((line) => normalizeGitPath(line.trim())).filter(Boolean);
3565
+ }
3566
+ function execGitLines(cwd, args) {
3567
+ const output = execFileSync3("git", args, {
3568
+ cwd,
3569
+ encoding: "utf8",
3570
+ stdio: ["ignore", "pipe", "ignore"]
3571
+ });
3572
+ return output.split("\n").map((line) => line.trimEnd()).filter(Boolean);
3573
+ }
3574
+ function readGitHeadCommit(cwd) {
3575
+ try {
3576
+ return execFileSync3("git", ["rev-parse", "HEAD"], {
3577
+ cwd,
3578
+ encoding: "utf8",
3579
+ stdio: ["ignore", "pipe", "ignore"]
3580
+ }).trim();
3581
+ } catch {
3582
+ return void 0;
3583
+ }
3584
+ }
3585
+ function hasDirtyWorkingTree(cwd) {
3586
+ try {
3587
+ const status = execFileSync3("git", ["status", "--porcelain"], {
3588
+ cwd,
3589
+ encoding: "utf8",
3590
+ stdio: ["ignore", "pipe", "ignore"]
3591
+ });
3592
+ return status.trim().length > 0;
3593
+ } catch {
3594
+ return true;
3595
+ }
3596
+ }
3597
+ function parseNameStatusLine(line) {
3598
+ const parts = line.split(" ").map((item) => normalizeGitPath(item));
3599
+ if (parts.length < 2) return void 0;
3600
+ const status = parts[0] ?? "";
3601
+ if (!status) return void 0;
3602
+ if (status.startsWith("R") || status.startsWith("C")) {
3603
+ return { status, previousPath: parts[1], path: parts[2] };
3604
+ }
3605
+ return { status, path: parts[1] };
3606
+ }
3607
+ function planIncrementalCodeIndex(cwd, lastIndexedCommit, existingIndexedPaths) {
3608
+ const currentCommit2 = readGitHeadCommit(cwd);
3609
+ const trackedPaths = discoverGitFiles(cwd);
3610
+ const trackedSet = new Set(trackedPaths);
3611
+ const deletedPaths = /* @__PURE__ */ new Set();
3612
+ const changedPaths = /* @__PURE__ */ new Set();
3613
+ const dirtyWorkingTree = hasDirtyWorkingTree(cwd);
3614
+ if (!lastIndexedCommit) {
3615
+ return {
3616
+ currentCommit: currentCommit2,
3617
+ trackedPaths,
3618
+ changedPaths: trackedPaths,
3619
+ deletedPaths: [...existingIndexedPaths].filter((filePath) => !trackedSet.has(filePath)),
3620
+ dirtyWorkingTree,
3621
+ fallbackToFullHashCompare: true,
3622
+ reason: "No previous commit snapshot; using full hash comparison."
3623
+ };
3624
+ }
3625
+ if (dirtyWorkingTree) {
3626
+ return {
3627
+ currentCommit: currentCommit2,
3628
+ trackedPaths,
3629
+ changedPaths: trackedPaths,
3630
+ deletedPaths: [...existingIndexedPaths].filter((filePath) => !trackedSet.has(filePath)),
3631
+ dirtyWorkingTree,
3632
+ fallbackToFullHashCompare: true,
3633
+ reason: "Working tree is dirty; using full hash comparison for deterministic results."
3634
+ };
3635
+ }
3636
+ try {
3637
+ const lines = execGitLines(cwd, ["diff", "--name-status", `${lastIndexedCommit}..HEAD`]);
3638
+ for (const line of lines) {
3639
+ const parsed = parseNameStatusLine(line);
3640
+ if (!parsed?.path) continue;
3641
+ const statusCode = parsed.status[0];
3642
+ const normalizedPath = normalizeGitPath(parsed.path);
3643
+ if (statusCode === "D") {
3644
+ deletedPaths.add(normalizedPath);
3645
+ continue;
3646
+ }
3647
+ if (trackedSet.has(normalizedPath)) changedPaths.add(normalizedPath);
3648
+ }
3649
+ for (const untrackedPath of discoverGitUntrackedFiles(cwd)) {
3650
+ if (trackedSet.has(untrackedPath)) changedPaths.add(untrackedPath);
3651
+ }
3652
+ for (const existingPath of existingIndexedPaths) {
3653
+ if (!trackedSet.has(existingPath)) deletedPaths.add(existingPath);
3654
+ }
3655
+ return {
3656
+ currentCommit: currentCommit2,
3657
+ trackedPaths,
3658
+ changedPaths: [...changedPaths],
3659
+ deletedPaths: [...deletedPaths],
3660
+ dirtyWorkingTree: false,
3661
+ fallbackToFullHashCompare: false,
3662
+ reason: "Using git diff and untracked files against last indexed commit."
3663
+ };
3664
+ } catch {
3665
+ return {
3666
+ currentCommit: currentCommit2,
3667
+ trackedPaths,
3668
+ changedPaths: trackedPaths,
3669
+ deletedPaths: [...existingIndexedPaths].filter((filePath) => !trackedSet.has(filePath)),
3670
+ dirtyWorkingTree: true,
3671
+ fallbackToFullHashCompare: true,
3672
+ reason: "Unable to compute git diff; falling back to full hash comparison."
3673
+ };
3674
+ }
3675
+ }
3323
3676
  var DISCOVERY_SCAN_INTERVAL = 200;
3324
- function discoverCodeFiles(cwd, repo, options = {}) {
3677
+ function discoverFromPaths(cwd, repo, inputPaths, options = {}) {
3325
3678
  const maxFileBytes = options.maxFileBytes ?? DEFAULT_MAX_CODE_FILE_BYTES;
3679
+ const includeContent = options.includeContent ?? false;
3326
3680
  const rootPath = path7.resolve(cwd);
3327
3681
  const files = [];
3328
3682
  let skippedFiles = 0;
3329
- const gitFiles = discoverGitFiles(cwd);
3330
- const total = gitFiles.length;
3331
- for (const [scanIndex, filePath] of gitFiles.entries()) {
3683
+ const candidatePaths = [...new Set(inputPaths.map((value) => normalizeGitPath(value)).filter(Boolean))];
3684
+ const total = candidatePaths.length;
3685
+ for (const [scanIndex, filePath] of candidatePaths.entries()) {
3332
3686
  const scanned = scanIndex + 1;
3333
3687
  if (scanned % DISCOVERY_SCAN_INTERVAL === 0 || scanned === total) {
3334
3688
  options.onScan?.(scanned, total);
@@ -3359,7 +3713,6 @@ function discoverCodeFiles(cwd, repo, options = {}) {
3359
3713
  skippedFiles += 1;
3360
3714
  continue;
3361
3715
  }
3362
- const content = buffer.toString("utf8");
3363
3716
  files.push({
3364
3717
  repo,
3365
3718
  path: filePath,
@@ -3368,11 +3721,21 @@ function discoverCodeFiles(cwd, repo, options = {}) {
3368
3721
  contentHash: crypto3.createHash("sha256").update(buffer).digest("hex"),
3369
3722
  updatedAt: stat.mtime.toISOString(),
3370
3723
  absolutePath,
3371
- content
3724
+ ...includeContent ? { content: buffer.toString("utf8") } : {}
3372
3725
  });
3373
3726
  }
3374
3727
  return { files, skippedFiles };
3375
3728
  }
3729
+ function discoverCodeFiles(cwd, repo, options = {}) {
3730
+ return discoverFromPaths(cwd, repo, discoverGitFiles(cwd), options);
3731
+ }
3732
+ function discoverCodeFilesByPaths(cwd, repo, filePaths, options = {}) {
3733
+ return discoverFromPaths(cwd, repo, filePaths, options);
3734
+ }
3735
+ function readDiscoveredCodeFileContent(file) {
3736
+ if (typeof file.content === "string") return file.content;
3737
+ return fs4.readFileSync(file.absolutePath, "utf8");
3738
+ }
3376
3739
 
3377
3740
  // src/retrieval/test-commands.ts
3378
3741
  import crypto4 from "crypto";
@@ -3606,40 +3969,154 @@ function refreshTestCommands(db, cwd, repo, files = [], options = {}) {
3606
3969
 
3607
3970
  // src/indexer/code-indexer.ts
3608
3971
  function indexCodebase(db, options) {
3972
+ const state = getCodeIndexStateForRepo(db, options.repo);
3973
+ const existingHashes = getRepoCodeFileHashes(db, options.repo);
3974
+ const plan = planIncrementalCodeIndex(
3975
+ options.cwd,
3976
+ state?.lastIndexedCommit,
3977
+ new Set(existingHashes.keys())
3978
+ );
3609
3979
  options.onProgress?.({ stage: "discovering_code_files", repo: options.repo });
3610
- const discovery = discoverCodeFiles(options.cwd, options.repo, {
3980
+ const discovery = plan.fallbackToFullHashCompare ? discoverCodeFiles(options.cwd, options.repo, {
3981
+ maxFileBytes: options.maxFileBytes,
3982
+ onScan: (scanned, total) => options.onProgress?.({
3983
+ stage: "discovering_code_files",
3984
+ repo: options.repo,
3985
+ scanned,
3986
+ total
3987
+ })
3988
+ }) : discoverCodeFilesByPaths(options.cwd, options.repo, plan.changedPaths, {
3611
3989
  maxFileBytes: options.maxFileBytes,
3612
- onScan: (scanned, total) => options.onProgress?.({ stage: "discovering_code_files", repo: options.repo, scanned, total })
3990
+ onScan: (scanned, total) => options.onProgress?.({
3991
+ stage: "discovering_code_files",
3992
+ repo: options.repo,
3993
+ scanned,
3994
+ total
3995
+ })
3613
3996
  });
3997
+ const changedFiles = discovery.files.filter(
3998
+ (file) => existingHashes.get(file.path) !== file.contentHash
3999
+ );
4000
+ const discoveredPaths = new Set(discovery.files.map((file) => file.path));
4001
+ const deletedPaths = plan.fallbackToFullHashCompare ? [...existingHashes.keys()].filter((filePath) => !discoveredPaths.has(filePath)) : plan.deletedPaths;
3614
4002
  options.onProgress?.({
3615
4003
  stage: "discovered_code_files",
3616
4004
  repo: options.repo,
3617
- files: discovery.files.length,
4005
+ files: changedFiles.length,
3618
4006
  skippedFiles: discovery.skippedFiles
3619
4007
  });
3620
- const chunks = [];
3621
- for (const [index, file] of discovery.files.entries()) {
4008
+ if (changedFiles.length === 0 && deletedPaths.length === 0) {
4009
+ const counts = touchCodeIndexState(
4010
+ db,
4011
+ options.repo,
4012
+ discovery.skippedFiles,
4013
+ plan.currentCommit
4014
+ );
4015
+ const repoId = ensureRepository(db, options.repo);
4016
+ const scopedCount = (table) => db.prepare(`SELECT COUNT(*) AS count FROM ${table} WHERE repo_id = ?`).get(repoId).count;
4017
+ const summary2 = {
4018
+ indexedFiles: counts.files,
4019
+ codeChunksCreated: counts.chunks,
4020
+ testFilesIndexed: scopedCount("test_files"),
4021
+ testLinksCreated: scopedCount("test_links"),
4022
+ architectureComponentsIndexed: scopedCount("architecture_components"),
4023
+ architecturePatternsIndexed: scopedCount("architecture_patterns"),
4024
+ architectureImportsIndexed: scopedCount("code_imports"),
4025
+ skippedFiles: discovery.skippedFiles,
4026
+ databasePath: defaultDatabasePath(options.cwd)
4027
+ };
4028
+ options.onProgress?.({
4029
+ stage: "completed_code_index",
4030
+ repo: options.repo,
4031
+ files: summary2.indexedFiles,
4032
+ chunks: summary2.codeChunksCreated,
4033
+ skippedFiles: summary2.skippedFiles,
4034
+ testFiles: summary2.testFilesIndexed,
4035
+ testLinks: summary2.testLinksCreated,
4036
+ architectureComponents: summary2.architectureComponentsIndexed,
4037
+ architecturePatterns: summary2.architecturePatternsIndexed,
4038
+ architectureImports: summary2.architectureImportsIndexed
4039
+ });
4040
+ return summary2;
4041
+ }
4042
+ const changedChunks = [];
4043
+ const changedImports = [];
4044
+ const projectedIndexedPaths = new Set(
4045
+ [...existingHashes.keys()].filter((filePath) => !deletedPaths.includes(filePath))
4046
+ );
4047
+ for (const file of changedFiles) projectedIndexedPaths.add(file.path);
4048
+ for (const [index, file] of changedFiles.entries()) {
3622
4049
  options.onProgress?.({
3623
4050
  stage: "indexing_code_file",
3624
4051
  repo: options.repo,
3625
4052
  current: index + 1,
3626
- total: discovery.files.length,
4053
+ total: changedFiles.length,
3627
4054
  filePath: file.path
3628
4055
  });
3629
- const fileChunks = chunkCodeFile(file);
3630
- chunks.push(...fileChunks);
4056
+ const content = readDiscoveredCodeFileContent(file);
4057
+ const fileWithContent = { ...file, content };
4058
+ const fileChunks = chunkCodeFile(fileWithContent);
4059
+ changedChunks.push(...fileChunks);
4060
+ changedImports.push(
4061
+ ...extractCodeImports(file.path, content, projectedIndexedPaths, options.repo)
4062
+ );
3631
4063
  options.onProgress?.({
3632
4064
  stage: "indexed_code_file",
3633
4065
  repo: options.repo,
3634
4066
  current: index + 1,
3635
- total: discovery.files.length,
4067
+ total: changedFiles.length,
3636
4068
  filePath: file.path,
3637
4069
  chunks: fileChunks.length
3638
4070
  });
3639
4071
  }
3640
- const architecture = buildArchitectureIndex(options.repo, discovery.files, chunks, {
4072
+ const affectedPaths = /* @__PURE__ */ new Set([
4073
+ ...deletedPaths,
4074
+ ...changedFiles.map((file) => file.path)
4075
+ ]);
4076
+ const allFilesByPath = new Map(getRepoCodeFiles(db, options.repo).map((file) => [file.path, file]));
4077
+ for (const filePath of deletedPaths) allFilesByPath.delete(filePath);
4078
+ for (const file of changedFiles) {
4079
+ allFilesByPath.set(file.path, {
4080
+ repo: file.repo,
4081
+ path: file.path,
4082
+ language: file.language,
4083
+ sizeBytes: file.sizeBytes,
4084
+ contentHash: file.contentHash,
4085
+ updatedAt: file.updatedAt
4086
+ });
4087
+ }
4088
+ const allFiles = [...allFilesByPath.values()];
4089
+ const allSymbolChunks = getRepoCodeChunkSymbols(db, options.repo).filter(
4090
+ (chunk) => !affectedPaths.has(chunk.filePath)
4091
+ );
4092
+ allSymbolChunks.push(...changedChunks);
4093
+ const allImports = getRepoCodeImports(db, options.repo).filter(
4094
+ (item) => !affectedPaths.has(item.sourcePath)
4095
+ );
4096
+ allImports.push(...changedImports);
4097
+ const testChunks = getRepoTestChunks(db, options.repo).filter(
4098
+ (chunk) => !affectedPaths.has(chunk.filePath)
4099
+ );
4100
+ for (const chunk of changedChunks) {
4101
+ if (isTestFilePath(chunk.filePath)) testChunks.push(chunk);
4102
+ }
4103
+ const testAwareness = inferTestAwareness(options.repo, allFiles, testChunks, {
3641
4104
  onProgress: options.onProgress
3642
4105
  });
4106
+ options.onProgress?.({
4107
+ stage: "building_architecture_imports",
4108
+ repo: options.repo,
4109
+ current: allFiles.length,
4110
+ total: allFiles.length,
4111
+ imports: allImports.length
4112
+ });
4113
+ const architecture = buildArchitectureFromIndexedData(
4114
+ options.repo,
4115
+ allFiles,
4116
+ allSymbolChunks,
4117
+ allImports,
4118
+ { onProgress: options.onProgress }
4119
+ );
3643
4120
  options.onProgress?.({
3644
4121
  stage: "indexed_architecture",
3645
4122
  repo: options.repo,
@@ -3650,14 +4127,22 @@ function indexCodebase(db, options) {
3650
4127
  const summary = replaceCodeIndex(
3651
4128
  db,
3652
4129
  options.repo,
3653
- discovery.files.map(({ content: _content, absolutePath: _absolutePath, ...file }) => file),
3654
- chunks,
4130
+ changedFiles.map(({ content: _content, absolutePath: _absolutePath, ...file }) => file),
4131
+ changedChunks,
3655
4132
  discovery.skippedFiles,
3656
4133
  options.cwd,
3657
4134
  architecture,
3658
- { onProgress: options.onProgress }
4135
+ {
4136
+ onProgress: options.onProgress,
4137
+ deletedPaths,
4138
+ changedImports,
4139
+ currentCommit: plan.currentCommit,
4140
+ testAwareness
4141
+ }
3659
4142
  );
3660
- refreshTestCommands(db, options.cwd, options.repo, [], { onProgress: options.onProgress });
4143
+ refreshTestCommands(db, options.cwd, options.repo, [], {
4144
+ onProgress: options.onProgress
4145
+ });
3661
4146
  options.onProgress?.({
3662
4147
  stage: "completed_code_index",
3663
4148
  repo: options.repo,
@@ -4096,7 +4581,7 @@ function clampMaxResults(value, defaultValue) {
4096
4581
 
4097
4582
  // src/retrieval/ranker.ts
4098
4583
  import path11 from "path";
4099
- function parseJsonArray3(value) {
4584
+ function parseJsonArray4(value) {
4100
4585
  try {
4101
4586
  const parsed = JSON.parse(value);
4102
4587
  return Array.isArray(parsed) ? parsed.filter((item) => typeof item === "string") : [];
@@ -4114,9 +4599,9 @@ function rowToWisdomUnit(row) {
4114
4599
  category: row.category,
4115
4600
  text: row.text,
4116
4601
  sanitizedText: row.sanitized_text,
4117
- filePaths: parseJsonArray3(row.file_paths_json),
4118
- symbols: parseJsonArray3(row.symbols_json),
4119
- authors: parseJsonArray3(row.authors_json),
4602
+ filePaths: parseJsonArray4(row.file_paths_json),
4603
+ symbols: parseJsonArray4(row.symbols_json),
4604
+ authors: parseJsonArray4(row.authors_json),
4120
4605
  createdAt: row.created_at,
4121
4606
  mergedAt: row.merged_at ?? void 0,
4122
4607
  confidence: row.confidence,
@@ -4179,11 +4664,11 @@ function symbolMatch2(unit, querySymbols) {
4179
4664
  }
4180
4665
  function textMatch2(unit, inputText) {
4181
4666
  const queryTokens = tokenizeSearchText(inputText, 32);
4182
- if (queryTokens.length === 0) return unit.bm25 === void 0 ? 0 : 0.45;
4667
+ if (queryTokens.length === 0) return unit.bm25 === void 0 ? 0 : 0.35;
4183
4668
  const haystack = `${unit.sanitizedText} ${unit.filePaths.join(" ")} ${unit.symbols.join(" ")}`.toLowerCase();
4184
4669
  const overlap = queryTokens.filter((token) => haystack.includes(token.toLowerCase())).length / queryTokens.length;
4185
- const bm25Signal = unit.bm25 === void 0 ? 0 : Math.max(0.25, Math.min(1, 1 / (1 + Math.abs(unit.bm25))));
4186
- return Math.max(overlap, bm25Signal);
4670
+ const bm25Signal = unit.bm25 === void 0 ? 0 : Math.max(0, Math.min(1, 1 / (1 + Math.abs(unit.bm25))));
4671
+ return unit.bm25 === void 0 ? overlap : Number((0.65 * overlap + 0.35 * bm25Signal).toFixed(4));
4187
4672
  }
4188
4673
  function reviewerOrAuthorSignal(unit) {
4189
4674
  if (unit.sourceType === "review_comment" || unit.sourceType === "review_summary") return 0.9;
@@ -4375,7 +4860,7 @@ function rankWisdomUnits(db, input) {
4375
4860
 
4376
4861
  // src/retrieval/code-ranker.ts
4377
4862
  import path12 from "path";
4378
- function parseJsonArray4(value) {
4863
+ function parseJsonArray5(value) {
4379
4864
  try {
4380
4865
  const parsed = JSON.parse(value);
4381
4866
  return Array.isArray(parsed) ? parsed.filter((item) => typeof item === "string") : [];
@@ -4392,7 +4877,7 @@ function rowToCodeChunk(row) {
4392
4877
  startLine: row.start_line,
4393
4878
  endLine: row.end_line,
4394
4879
  sanitizedText: row.sanitized_text,
4395
- symbols: parseJsonArray4(row.symbols_json),
4880
+ symbols: parseJsonArray5(row.symbols_json),
4396
4881
  contentHash: row.content_hash,
4397
4882
  updatedAt: row.updated_at,
4398
4883
  bm25: row.bm25 ?? void 0
@@ -4441,8 +4926,8 @@ function textMatch3(chunk, input) {
4441
4926
  );
4442
4927
  const haystack = `${chunk.sanitizedText} ${chunk.filePath} ${chunk.symbols.join(" ")}`.toLowerCase();
4443
4928
  const overlap = tokens.length ? tokens.filter((token) => haystack.includes(token.toLowerCase())).length / tokens.length : 0;
4444
- const bm25Signal = chunk.bm25 === void 0 ? 0 : Math.max(0.25, Math.min(1, 1 / (1 + Math.abs(chunk.bm25))));
4445
- return Math.max(overlap, bm25Signal);
4929
+ const bm25Signal = chunk.bm25 === void 0 ? 0 : Math.max(0, Math.min(1, 1 / (1 + Math.abs(chunk.bm25))));
4930
+ return chunk.bm25 === void 0 ? overlap : Number((0.65 * overlap + 0.35 * bm25Signal).toFixed(4));
4446
4931
  }
4447
4932
  function recencyScore2(chunk) {
4448
4933
  const timestamp = Date.parse(chunk.updatedAt);
@@ -4540,7 +5025,7 @@ function rankCodeChunks(db, input) {
4540
5025
 
4541
5026
  // src/retrieval/architecture-ranker.ts
4542
5027
  import path13 from "path";
4543
- function parseJsonArray5(value) {
5028
+ function parseJsonArray6(value) {
4544
5029
  try {
4545
5030
  const parsed = JSON.parse(value);
4546
5031
  return Array.isArray(parsed) ? parsed.filter((item) => typeof item === "string") : [];
@@ -4564,8 +5049,8 @@ function rowToPattern(row) {
4564
5049
  name: row.name,
4565
5050
  summary: row.summary_sanitized,
4566
5051
  sanitizedSummary: row.summary_sanitized,
4567
- sourceFiles: parseJsonArray5(row.source_files_json),
4568
- symbols: parseJsonArray5(row.symbols_json),
5052
+ sourceFiles: parseJsonArray6(row.source_files_json),
5053
+ symbols: parseJsonArray6(row.symbols_json),
4569
5054
  evidence: parseEvidence(row.evidence_json),
4570
5055
  confidence: row.confidence,
4571
5056
  createdAt: row.created_at,
@@ -4681,7 +5166,7 @@ function rankArchitecturePatterns(db, input) {
4681
5166
 
4682
5167
  // src/retrieval/test-ranker.ts
4683
5168
  import path14 from "path";
4684
- function parseJsonArray6(value) {
5169
+ function parseJsonArray7(value) {
4685
5170
  if (!value) return [];
4686
5171
  try {
4687
5172
  const parsed = JSON.parse(value);
@@ -4694,7 +5179,7 @@ function baseStem(filePath) {
4694
5179
  return path14.posix.basename(filePath).replace(/\.(test|spec)\.[^.]+$/i, "").replace(/\.[^.]+$/i, "").toLowerCase();
4695
5180
  }
4696
5181
  function rowToRanked(row, input) {
4697
- const symbols = parseJsonArray6(row.symbols_json);
5182
+ const symbols = parseJsonArray7(row.symbols_json);
4698
5183
  const text = row.sanitized_text ?? "";
4699
5184
  const matchedSymbols = (input.symbols ?? []).filter((symbol) => {
4700
5185
  const lower = symbol.toLowerCase();
@@ -4764,7 +5249,7 @@ function rankRelevantTests(db, input) {
4764
5249
 
4765
5250
  // src/retrieval/regression-ranker.ts
4766
5251
  import path15 from "path";
4767
- function parseJsonArray7(value) {
5252
+ function parseJsonArray8(value) {
4768
5253
  try {
4769
5254
  const parsed = JSON.parse(value);
4770
5255
  return Array.isArray(parsed) ? parsed.filter((item) => typeof item === "string") : [];
@@ -4779,12 +5264,12 @@ function rowToEvent(row) {
4779
5264
  prNumber: row.pr_number,
4780
5265
  prUrl: row.pr_url,
4781
5266
  summary: row.summary_sanitized,
4782
- filePaths: parseJsonArray7(row.file_paths_json),
4783
- symbols: parseJsonArray7(row.symbols_json),
4784
- testPaths: parseJsonArray7(row.test_paths_json),
4785
- authors: parseJsonArray7(row.authors_json),
4786
- labels: parseJsonArray7(row.labels_json),
4787
- signals: parseJsonArray7(row.signals_json),
5267
+ filePaths: parseJsonArray8(row.file_paths_json),
5268
+ symbols: parseJsonArray8(row.symbols_json),
5269
+ testPaths: parseJsonArray8(row.test_paths_json),
5270
+ authors: parseJsonArray8(row.authors_json),
5271
+ labels: parseJsonArray8(row.labels_json),
5272
+ signals: parseJsonArray8(row.signals_json),
4788
5273
  createdAt: row.created_at,
4789
5274
  mergedAt: row.merged_at ?? void 0,
4790
5275
  confidence: row.confidence
@@ -6184,7 +6669,7 @@ function syncPlaybooksToDatabase(db, cwd) {
6184
6669
  }
6185
6670
 
6186
6671
  // src/retrieval/onboarding.ts
6187
- function parseJsonArray8(value) {
6672
+ function parseJsonArray9(value) {
6188
6673
  try {
6189
6674
  const parsed = JSON.parse(value);
6190
6675
  return Array.isArray(parsed) ? parsed.filter((item) => typeof item === "string") : [];
@@ -6217,7 +6702,7 @@ function riskyModules(db) {
6217
6702
  ORDER BY confidence DESC, COALESCE(merged_at, created_at) DESC
6218
6703
  LIMIT 20`
6219
6704
  ).all();
6220
- return [...new Set(rows.flatMap((row) => parseJsonArray8(row.file_paths_json)))].slice(0, 10);
6705
+ return [...new Set(rows.flatMap((row) => parseJsonArray9(row.file_paths_json)))].slice(0, 10);
6221
6706
  }
6222
6707
  function relatedTests(db, files) {
6223
6708
  if (files.length === 0) {
@@ -6296,6 +6781,7 @@ import crypto8 from "crypto";
6296
6781
  import fs7 from "fs";
6297
6782
  import path18 from "path";
6298
6783
  var ANCHOR_EVALS_FILE = "anchor.evals.json";
6784
+ var DEFAULT_EVAL_K = 8;
6299
6785
  function evalsPath(cwd) {
6300
6786
  return path18.join(cwd, ANCHOR_EVALS_FILE);
6301
6787
  }
@@ -6394,6 +6880,7 @@ function runRetrievalEvals(db, cwd) {
6394
6880
  initializeSchema(db);
6395
6881
  const filePath = evalsPath(cwd);
6396
6882
  const evalFile = readEvalFile(cwd);
6883
+ const k = DEFAULT_EVAL_K;
6397
6884
  const results = evalFile.evals.map((item) => {
6398
6885
  const context = buildAnchorContextResult(db, cwd, {
6399
6886
  task: item.task,
@@ -6404,6 +6891,9 @@ function runRetrievalEvals(db, cwd) {
6404
6891
  ...Array.isArray(context.metadata.items) ? context.metadata.items : [],
6405
6892
  ...Array.isArray(context.metadata.teamRules) ? context.metadata.teamRules : []
6406
6893
  ];
6894
+ const rankedPrs = uniqueStrings(
6895
+ metadataItems.map((metadata) => metadata.prNumber).filter((prNumber) => typeof prNumber === "number").map(String)
6896
+ ).map(Number);
6407
6897
  const foundPrs = uniqueStrings(
6408
6898
  metadataItems.map((metadata) => metadata.prNumber).filter((prNumber) => typeof prNumber === "number").map(String)
6409
6899
  ).map(Number);
@@ -6414,6 +6904,23 @@ function runRetrievalEvals(db, cwd) {
6414
6904
  const missingCategories = item.expectedCategories.filter(
6415
6905
  (category) => !foundCategories.includes(category)
6416
6906
  );
6907
+ const expectedPrRanks = item.expectedPrs.map((prNumber) => {
6908
+ const index = rankedPrs.indexOf(prNumber);
6909
+ return {
6910
+ prNumber,
6911
+ rank: index >= 0 ? index + 1 : void 0
6912
+ };
6913
+ });
6914
+ const topK = rankedPrs.slice(0, k);
6915
+ const relevantInTopK = item.expectedPrs.filter((prNumber) => topK.includes(prNumber));
6916
+ const precisionAtK2 = k > 0 ? Number((relevantInTopK.length / k).toFixed(4)) : 0;
6917
+ const recallAtK2 = item.expectedPrs.length > 0 ? Number((relevantInTopK.length / item.expectedPrs.length).toFixed(4)) : 0;
6918
+ const reciprocalRank = (() => {
6919
+ if (item.expectedPrs.length === 0) return 0;
6920
+ const firstRank = expectedPrRanks.map((entry) => entry.rank).filter((rank) => typeof rank === "number").sort((a, b) => a - b)[0];
6921
+ if (!firstRank) return 0;
6922
+ return Number((1 / firstRank).toFixed(4));
6923
+ })();
6417
6924
  return {
6418
6925
  id: item.id,
6419
6926
  task: item.task,
@@ -6421,18 +6928,35 @@ function runRetrievalEvals(db, cwd) {
6421
6928
  expectedPrs: item.expectedPrs,
6422
6929
  foundPrs,
6423
6930
  missingPrs,
6931
+ expectedPrRanks,
6424
6932
  expectedCategories: item.expectedCategories,
6425
6933
  foundCategories,
6426
- missingCategories
6934
+ missingCategories,
6935
+ precisionAtK: precisionAtK2,
6936
+ recallAtK: recallAtK2,
6937
+ reciprocalRank
6427
6938
  };
6428
6939
  });
6429
6940
  const passed = results.filter((result) => result.passed).length;
6941
+ const precisionAtK = results.length > 0 ? Number(
6942
+ (results.reduce((sum, result) => sum + result.precisionAtK, 0) / results.length).toFixed(4)
6943
+ ) : 0;
6944
+ const recallAtK = results.length > 0 ? Number(
6945
+ (results.reduce((sum, result) => sum + result.recallAtK, 0) / results.length).toFixed(4)
6946
+ ) : 0;
6947
+ const mrr = results.length > 0 ? Number(
6948
+ (results.reduce((sum, result) => sum + result.reciprocalRank, 0) / results.length).toFixed(4)
6949
+ ) : 0;
6430
6950
  return {
6431
6951
  ok: passed === results.length,
6432
6952
  path: filePath,
6433
6953
  total: results.length,
6434
6954
  passed,
6435
6955
  failed: results.length - passed,
6956
+ precisionAtK,
6957
+ recallAtK,
6958
+ mrr,
6959
+ k,
6436
6960
  results
6437
6961
  };
6438
6962
  }
@@ -8787,7 +9311,7 @@ function packageRootForSpecifier(specifier) {
8787
9311
  if (normalized.startsWith("@") && parts.length >= 2) return `${parts[0]}/${parts[1]}`;
8788
9312
  return parts[0] ?? "";
8789
9313
  }
8790
- function parseJsonArray9(value) {
9314
+ function parseJsonArray10(value) {
8791
9315
  try {
8792
9316
  const parsed = JSON.parse(value);
8793
9317
  return Array.isArray(parsed) ? parsed.filter((item) => typeof item === "string") : [];
@@ -8921,7 +9445,7 @@ function rebuildOrgGraph(db, config, baseDirOrOptions) {
8921
9445
  `imports ${sanitizeHistoricalText(rootSpecifier || item.specifier)}`
8922
9446
  )
8923
9447
  ],
8924
- confidence: parseJsonArray9(item.imported_symbols_json).length > 0 ? 0.88 : 0.76
9448
+ confidence: parseJsonArray10(item.imported_symbols_json).length > 0 ? 0.88 : 0.76
8925
9449
  });
8926
9450
  }
8927
9451
  if (shouldEmitProgress3(index + 1, imports.length)) {
@@ -9257,7 +9781,11 @@ async function indexOrgRepos(db, config, options = {}) {
9257
9781
  command,
9258
9782
  totalRepos: repos.length
9259
9783
  });
9260
- for (const [repoIndex, repo] of repos.entries()) {
9784
+ const maxConcurrency = Math.max(1, Math.min(options.concurrency ?? 3, 4));
9785
+ let nextRepoIndex = 0;
9786
+ const processRepo = async (repoIndex) => {
9787
+ const repo = repos[repoIndex];
9788
+ if (!repo) return;
9261
9789
  const repoPosition = repoIndex + 1;
9262
9790
  const localPath = orgRepoLocalPath(config.org, repo, options.baseDir);
9263
9791
  const repoStartedAt = (/* @__PURE__ */ new Date()).toISOString();
@@ -9508,7 +10036,18 @@ async function indexOrgRepos(db, config, options = {}) {
9508
10036
  error: message
9509
10037
  });
9510
10038
  }
9511
- }
10039
+ };
10040
+ const worker = async () => {
10041
+ while (true) {
10042
+ const repoIndex = nextRepoIndex;
10043
+ nextRepoIndex += 1;
10044
+ if (repoIndex >= repos.length) return;
10045
+ await processRepo(repoIndex);
10046
+ }
10047
+ };
10048
+ await Promise.all(
10049
+ Array.from({ length: Math.min(maxConcurrency, repos.length) }, () => worker())
10050
+ );
9512
10051
  let graph;
9513
10052
  if (options.noGraph) {
9514
10053
  const counts = getOrgGraphCounts(db, config.org);
@@ -9553,6 +10092,7 @@ async function indexOrgRepos(db, config, options = {}) {
9553
10092
  codeFilesIndexed: results.reduce((sum, result) => sum + (result.code?.indexedFiles ?? 0), 0),
9554
10093
  failures: results.map((result) => result.error).concat(graph.error ? [graph.error] : []).filter((error) => Boolean(error))
9555
10094
  });
10095
+ runDatabaseMaintenance(db);
9556
10096
  emit({
9557
10097
  stage: "org_sync_completed",
9558
10098
  org: config.org,
@@ -10440,6 +10980,7 @@ export {
10440
10980
  anchorMcpEntry,
10441
10981
  architectureFilesFromDiff,
10442
10982
  buildAnchorContextResult,
10983
+ buildArchitectureFromIndexedData,
10443
10984
  buildArchitectureIndex,
10444
10985
  buildArchitectureMap,
10445
10986
  buildFtsQuery,
@@ -10479,6 +11020,7 @@ export {
10479
11020
  detectTestCommands,
10480
11021
  detectTestCommandsForFile,
10481
11022
  discoverCodeFiles,
11023
+ discoverCodeFilesByPaths,
10482
11024
  emptyCodeIndexSummary,
10483
11025
  ensureAnchorGitExclude,
10484
11026
  ensureCursorConfig,
@@ -10507,6 +11049,7 @@ export {
10507
11049
  getAnchorIndexHealth,
10508
11050
  getArchitectureContext,
10509
11051
  getArchitectureMapContext,
11052
+ getCodeIndexStateForRepo,
10510
11053
  getGitHubRateLimitDelayMs,
10511
11054
  getGraphQLFetchCheckpoint,
10512
11055
  getIndexStatus,
@@ -10517,12 +11060,19 @@ export {
10517
11060
  getOrgRepoState,
10518
11061
  getOrgStatus,
10519
11062
  getPlaybook,
11063
+ getRepoCodeChunkSymbols,
11064
+ getRepoCodeCounts,
11065
+ getRepoCodeFileHashes,
11066
+ getRepoCodeFiles,
11067
+ getRepoCodeImports,
11068
+ getRepoTestChunks,
10520
11069
  getSemanticStatus,
10521
11070
  getSuggestedPromptTexts,
10522
11071
  getSuggestedPrompts,
10523
11072
  getWisdomCategoryCounts,
10524
11073
  githubAuthFixMessage,
10525
11074
  graphQLFetchCheckpointScope,
11075
+ hasDirtyWorkingTree,
10526
11076
  hasHighSignalLanguage,
10527
11077
  indexCodebase,
10528
11078
  indexOrgRepos,
@@ -10558,6 +11108,7 @@ export {
10558
11108
  orgRoot,
10559
11109
  paginateWithGitHubRateLimit,
10560
11110
  parseGitHubRemote,
11111
+ planIncrementalCodeIndex,
10561
11112
  planTask,
10562
11113
  plannedOrgCloneCommands,
10563
11114
  rankArchitecturePatterns,
@@ -10566,6 +11117,8 @@ export {
10566
11117
  rankRelevantTests,
10567
11118
  rankTeamRules,
10568
11119
  rankWisdomUnits,
11120
+ readDiscoveredCodeFileContent,
11121
+ readGitHeadCommit,
10569
11122
  readOrgHeartbeat,
10570
11123
  rebuildOrgGraph,
10571
11124
  recordFeedback,
@@ -10586,6 +11139,7 @@ export {
10586
11139
  resolvePullRequestFetchLimit,
10587
11140
  reviewDiff,
10588
11141
  runAnchorCi,
11142
+ runDatabaseMaintenance,
10589
11143
  runDoctor,
10590
11144
  runRetrievalEvals,
10591
11145
  sanitizeHistoricalText,
@@ -10600,6 +11154,7 @@ export {
10600
11154
  syncOrgConfigToDatabase,
10601
11155
  syncPlaybooksToDatabase,
10602
11156
  tokenizeSearchText,
11157
+ touchCodeIndexState,
10603
11158
  truncateText,
10604
11159
  uniqueStrings,
10605
11160
  updateGitHubGraphQLRateLimitState,