@pratik7368patil/anchor-core 0.1.30 → 0.1.31

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -175,8 +175,11 @@ function canonicalizeText(text) {
175
175
  return text.toLowerCase().replace(/https?:\/\/\S+/g, "").replace(/[^a-z0-9_./ -]/g, " ").replace(/\s+/g, " ").trim();
176
176
  }
177
177
  function tokenizeSearchText(text, maxTokens = 32) {
178
- const tokens = text.toLowerCase().match(/[a-z0-9_./-]{3,}/g);
179
- return uniqueStrings(tokens ?? []).slice(0, maxTokens);
178
+ const shortSignalTokens = /* @__PURE__ */ new Set(["id", "db", "api", "key", "sql", "jwt", "ui", "ux"]);
179
+ const tokens = text.toLowerCase().match(/[a-z0-9_./-]{2,}/g);
180
+ return uniqueStrings(
181
+ (tokens ?? []).filter((token) => token.length >= 3 || shortSignalTokens.has(token))
182
+ ).slice(0, maxTokens);
180
183
  }
181
184
 
182
185
  // src/security/redact-secrets.ts
@@ -379,7 +382,8 @@ CREATE TABLE IF NOT EXISTS code_index_state (
379
382
  last_indexed_at TEXT NOT NULL,
380
383
  indexed_files INTEGER NOT NULL,
381
384
  code_chunks INTEGER NOT NULL,
382
- skipped_files INTEGER NOT NULL
385
+ skipped_files INTEGER NOT NULL,
386
+ last_indexed_commit TEXT
383
387
  );
384
388
 
385
389
  CREATE TABLE IF NOT EXISTS code_imports (
@@ -1757,6 +1761,14 @@ function applyPerformancePragmas(db) {
1757
1761
  db.pragma("mmap_size = 268435456");
1758
1762
  db.pragma("temp_store = MEMORY");
1759
1763
  }
1764
+ function runDatabaseMaintenance(db) {
1765
+ try {
1766
+ db.exec("ANALYZE");
1767
+ db.pragma("optimize");
1768
+ db.pragma("wal_checkpoint(TRUNCATE)");
1769
+ } catch {
1770
+ }
1771
+ }
1760
1772
  function initializeSchema(db) {
1761
1773
  db.exec(SCHEMA_SQL);
1762
1774
  ensureColumn(db, "sync_state", "history_coverage", "TEXT");
@@ -1770,6 +1782,7 @@ function initializeSchema(db) {
1770
1782
  ensureColumn(db, "sync_state", "graphql_cursor_reset_at", "TEXT");
1771
1783
  ensureColumn(db, "sync_state", "graphql_cursor_reason", "TEXT");
1772
1784
  ensureColumn(db, "sync_state", "graphql_cursor_updated_at", "TEXT");
1785
+ ensureColumn(db, "code_index_state", "last_indexed_commit", "TEXT");
1773
1786
  }
1774
1787
  function ensureColumn(db, tableName, columnName, definition) {
1775
1788
  const columns = db.prepare(`PRAGMA table_info(${tableName})`).all();
@@ -1819,10 +1832,153 @@ function ensureRepository(db, fullName) {
1819
1832
  if (!row) throw new Error(`Failed to create repository row for ${fullName}`);
1820
1833
  return row.id;
1821
1834
  }
1835
+ function getRepositoryId(db, fullName) {
1836
+ const row = db.prepare("SELECT id FROM repositories WHERE full_name = ?").get(fullName);
1837
+ return row?.id;
1838
+ }
1822
1839
  function getLastSyncTime(db, repo) {
1823
1840
  const row = db.prepare("SELECT last_sync_at FROM sync_state WHERE repo = ?").get(repo);
1824
1841
  return row?.last_sync_at ?? void 0;
1825
1842
  }
1843
+ function getCodeIndexStateForRepo(db, repo) {
1844
+ initializeSchema(db);
1845
+ const row = db.prepare(
1846
+ `SELECT repo, last_indexed_at, indexed_files, code_chunks, skipped_files, last_indexed_commit
1847
+ FROM code_index_state
1848
+ WHERE repo = ?`
1849
+ ).get(repo);
1850
+ if (!row?.repo) return void 0;
1851
+ return {
1852
+ repo: row.repo,
1853
+ lastIndexedAt: row.last_indexed_at ?? void 0,
1854
+ indexedFiles: row.indexed_files ?? 0,
1855
+ codeChunks: row.code_chunks ?? 0,
1856
+ skippedFiles: row.skipped_files ?? 0,
1857
+ lastIndexedCommit: row.last_indexed_commit ?? void 0
1858
+ };
1859
+ }
1860
+ function getRepoCodeFileHashes(db, repo) {
1861
+ initializeSchema(db);
1862
+ const repoId = getRepositoryId(db, repo);
1863
+ if (!repoId) return /* @__PURE__ */ new Map();
1864
+ const rows = db.prepare("SELECT path, content_hash FROM code_files WHERE repo_id = ?").all(repoId);
1865
+ return new Map(rows.map((row) => [row.path, row.content_hash]));
1866
+ }
1867
+ function getRepoCodeFiles(db, repo) {
1868
+ initializeSchema(db);
1869
+ const repoId = getRepositoryId(db, repo);
1870
+ if (!repoId) return [];
1871
+ const rows = db.prepare(
1872
+ `SELECT path, language, size_bytes, content_hash, updated_at
1873
+ FROM code_files
1874
+ WHERE repo_id = ?`
1875
+ ).all(repoId);
1876
+ return rows.map((row) => ({
1877
+ repo,
1878
+ path: row.path,
1879
+ language: row.language ?? void 0,
1880
+ sizeBytes: row.size_bytes,
1881
+ contentHash: row.content_hash,
1882
+ updatedAt: row.updated_at
1883
+ }));
1884
+ }
1885
+ function parseJsonArray3(value) {
1886
+ try {
1887
+ const parsed = JSON.parse(value);
1888
+ return Array.isArray(parsed) ? parsed.filter((item) => typeof item === "string") : [];
1889
+ } catch {
1890
+ return [];
1891
+ }
1892
+ }
1893
+ function getRepoCodeChunkSymbols(db, repo) {
1894
+ initializeSchema(db);
1895
+ const repoId = getRepositoryId(db, repo);
1896
+ if (!repoId) return [];
1897
+ const rows = db.prepare(
1898
+ `SELECT id, file_path, language, start_line, end_line, symbols_json, content_hash, updated_at
1899
+ FROM code_chunks
1900
+ WHERE repo_id = ?`
1901
+ ).all(repoId);
1902
+ return rows.map((row) => ({
1903
+ id: row.id,
1904
+ repo,
1905
+ filePath: row.file_path,
1906
+ language: row.language ?? void 0,
1907
+ startLine: row.start_line,
1908
+ endLine: row.end_line,
1909
+ sanitizedText: "",
1910
+ symbols: parseJsonArray3(row.symbols_json),
1911
+ contentHash: row.content_hash,
1912
+ updatedAt: row.updated_at
1913
+ }));
1914
+ }
1915
+ function getRepoTestChunks(db, repo) {
1916
+ initializeSchema(db);
1917
+ const repoId = getRepositoryId(db, repo);
1918
+ if (!repoId) return [];
1919
+ const rows = db.prepare(
1920
+ `SELECT id, file_path, language, start_line, end_line, sanitized_text, symbols_json, content_hash, updated_at
1921
+ FROM code_chunks
1922
+ WHERE repo_id = ? AND file_path IN (
1923
+ SELECT path FROM test_files WHERE repo_id = ?
1924
+ )`
1925
+ ).all(repoId, repoId);
1926
+ return rows.map((row) => ({
1927
+ id: row.id,
1928
+ repo,
1929
+ filePath: row.file_path,
1930
+ language: row.language ?? void 0,
1931
+ startLine: row.start_line,
1932
+ endLine: row.end_line,
1933
+ sanitizedText: row.sanitized_text,
1934
+ symbols: parseJsonArray3(row.symbols_json),
1935
+ contentHash: row.content_hash,
1936
+ updatedAt: row.updated_at
1937
+ }));
1938
+ }
1939
+ function getRepoCodeImports(db, repo) {
1940
+ initializeSchema(db);
1941
+ const repoId = getRepositoryId(db, repo);
1942
+ if (!repoId) return [];
1943
+ const rows = db.prepare(
1944
+ `SELECT source_path, specifier, imported_path, imported_symbols_json, kind
1945
+ FROM code_imports
1946
+ WHERE repo_id = ?`
1947
+ ).all(repoId);
1948
+ return rows.map((row) => ({
1949
+ repo,
1950
+ sourcePath: row.source_path,
1951
+ specifier: row.specifier,
1952
+ importedPath: row.imported_path ?? void 0,
1953
+ importedSymbols: parseJsonArray3(row.imported_symbols_json),
1954
+ kind: row.kind
1955
+ }));
1956
+ }
1957
+ function getRepoCodeCounts(db, repo) {
1958
+ initializeSchema(db);
1959
+ const repoId = getRepositoryId(db, repo);
1960
+ if (!repoId) return { files: 0, chunks: 0 };
1961
+ const files = db.prepare("SELECT COUNT(*) AS count FROM code_files WHERE repo_id = ?").get(repoId).count;
1962
+ const chunks = db.prepare("SELECT COUNT(*) AS count FROM code_chunks WHERE repo_id = ?").get(repoId).count;
1963
+ return { files, chunks };
1964
+ }
1965
+ function touchCodeIndexState(db, repo, skippedFiles, currentCommit2) {
1966
+ initializeSchema(db);
1967
+ const counts = getRepoCodeCounts(db, repo);
1968
+ const now = (/* @__PURE__ */ new Date()).toISOString();
1969
+ db.prepare(
1970
+ `INSERT INTO code_index_state
1971
+ (repo, last_indexed_at, indexed_files, code_chunks, skipped_files, last_indexed_commit)
1972
+ VALUES (?, ?, ?, ?, ?, ?)
1973
+ ON CONFLICT(repo) DO UPDATE SET
1974
+ last_indexed_at = excluded.last_indexed_at,
1975
+ indexed_files = excluded.indexed_files,
1976
+ code_chunks = excluded.code_chunks,
1977
+ skipped_files = excluded.skipped_files,
1978
+ last_indexed_commit = excluded.last_indexed_commit`
1979
+ ).run(repo, now, counts.files, counts.chunks, skippedFiles, currentCommit2 ?? null);
1980
+ return counts;
1981
+ }
1826
1982
  function updateSyncState(db, repo, lastIndexedPr, metadata = {}) {
1827
1983
  const now = (/* @__PURE__ */ new Date()).toISOString();
1828
1984
  db.prepare(
@@ -2115,13 +2271,24 @@ function replaceCodeIndex(db, repo, codeFiles, codeChunks, skippedFiles, cwd, ar
2115
2271
  initializeSchema(db);
2116
2272
  const repoId = ensureRepository(db, repo);
2117
2273
  const now = (/* @__PURE__ */ new Date()).toISOString();
2118
- options.onProgress?.({ stage: "writing_code_index", repo, phase: "Inferring test awareness" });
2119
- const testAwareness = inferTestAwareness(repo, codeFiles, codeChunks, {
2274
+ const deletedPaths = options.deletedPaths ?? [];
2275
+ const changedImports = options.changedImports;
2276
+ const testAwareness = options.testAwareness ?? inferTestAwareness(repo, codeFiles, codeChunks, {
2120
2277
  onProgress: options.onProgress
2121
2278
  });
2122
2279
  options.onProgress?.({ stage: "writing_code_index", repo, phase: "Writing code index" });
2280
+ const changedPaths = [...new Set(codeFiles.map((file) => file.path))];
2281
+ const affectedPaths = [.../* @__PURE__ */ new Set([...changedPaths, ...deletedPaths])];
2123
2282
  const transaction = db.transaction(() => {
2124
- const existingChunkRowIds = db.prepare("SELECT rowid FROM code_chunks WHERE repo_id = ?").all(repoId);
2283
+ let existingChunkRowIds = [];
2284
+ if (affectedPaths.length > 0) {
2285
+ const placeholders = affectedPaths.map(() => "?").join(", ");
2286
+ existingChunkRowIds = db.prepare(
2287
+ `SELECT rowid
2288
+ FROM code_chunks
2289
+ WHERE repo_id = ? AND file_path IN (${placeholders})`
2290
+ ).all(repoId, ...affectedPaths);
2291
+ }
2125
2292
  const existingPatternRowIds = db.prepare("SELECT rowid FROM architecture_patterns WHERE repo_id = ?").all(repoId);
2126
2293
  options.onProgress?.({
2127
2294
  stage: "deleting_existing_code_index",
@@ -2141,11 +2308,51 @@ function replaceCodeIndex(db, repo, codeFiles, codeChunks, skippedFiles, cwd, ar
2141
2308
  chunks: existingChunkRowIds.length
2142
2309
  })
2143
2310
  );
2144
- db.prepare("DELETE FROM code_chunks WHERE repo_id = ?").run(repoId);
2145
- db.prepare("DELETE FROM code_files WHERE repo_id = ?").run(repoId);
2146
- db.prepare("DELETE FROM test_links WHERE repo_id = ? AND reason != 'PR co-change'").run(repoId);
2147
- db.prepare("DELETE FROM test_files WHERE repo_id = ?").run(repoId);
2311
+ if (affectedPaths.length > 0) {
2312
+ const placeholders = affectedPaths.map(() => "?").join(", ");
2313
+ db.prepare(
2314
+ `DELETE FROM code_chunks
2315
+ WHERE repo_id = ? AND file_path IN (${placeholders})`
2316
+ ).run(repoId, ...affectedPaths);
2317
+ db.prepare(
2318
+ `DELETE FROM code_files
2319
+ WHERE repo_id = ? AND path IN (${placeholders})`
2320
+ ).run(repoId, ...affectedPaths);
2321
+ db.prepare(
2322
+ `DELETE FROM test_links
2323
+ WHERE repo_id = ?
2324
+ AND reason != 'PR co-change'
2325
+ AND (source_path IN (${placeholders}) OR test_path IN (${placeholders}))`
2326
+ ).run(repoId, ...affectedPaths, ...affectedPaths);
2327
+ db.prepare(
2328
+ `DELETE FROM test_files
2329
+ WHERE repo_id = ? AND path IN (${placeholders})`
2330
+ ).run(repoId, ...affectedPaths);
2331
+ if (changedImports) {
2332
+ db.prepare(
2333
+ `DELETE FROM code_imports
2334
+ WHERE repo_id = ? AND source_path IN (${placeholders})`
2335
+ ).run(repoId, ...affectedPaths);
2336
+ }
2337
+ }
2148
2338
  deleteExistingArchitectureData(db, repoId, repo, existingPatternRowIds, options);
2339
+ if (changedImports) {
2340
+ const insertImport = db.prepare(
2341
+ `INSERT INTO code_imports
2342
+ (repo_id, source_path, specifier, imported_path, imported_symbols_json, kind)
2343
+ VALUES (?, ?, ?, ?, ?, ?)`
2344
+ );
2345
+ for (const item of changedImports) {
2346
+ insertImport.run(
2347
+ repoId,
2348
+ item.sourcePath,
2349
+ item.specifier,
2350
+ item.importedPath ?? null,
2351
+ JSON.stringify(item.importedSymbols),
2352
+ item.kind
2353
+ );
2354
+ }
2355
+ }
2149
2356
  const insertFile = db.prepare(
2150
2357
  `INSERT INTO code_files
2151
2358
  (repo_id, path, language, size_bytes, content_hash, updated_at)
@@ -2237,18 +2444,22 @@ function replaceCodeIndex(db, repo, codeFiles, codeChunks, skippedFiles, cwd, ar
2237
2444
  }
2238
2445
  }
2239
2446
  insertTestAwareness(db, repoId, repo, testAwareness.testFiles, testAwareness.testLinks, options);
2240
- insertArchitectureData(db, repoId, repo, architecture, options);
2447
+ insertArchitectureData(db, repoId, repo, architecture, options, !changedImports);
2241
2448
  insertArchitectureMapEdges(db, repoId, repo, architecture, testAwareness.testLinks, options);
2242
2449
  options.onProgress?.({ stage: "writing_code_index", repo, phase: "Updating index state" });
2450
+ const totalFileCount = db.prepare("SELECT COUNT(*) AS count FROM code_files WHERE repo_id = ?").get(repoId).count;
2451
+ const totalChunkCount = db.prepare("SELECT COUNT(*) AS count FROM code_chunks WHERE repo_id = ?").get(repoId).count;
2243
2452
  db.prepare(
2244
- `INSERT INTO code_index_state (repo, last_indexed_at, indexed_files, code_chunks, skipped_files)
2245
- VALUES (?, ?, ?, ?, ?)
2453
+ `INSERT INTO code_index_state
2454
+ (repo, last_indexed_at, indexed_files, code_chunks, skipped_files, last_indexed_commit)
2455
+ VALUES (?, ?, ?, ?, ?, ?)
2246
2456
  ON CONFLICT(repo) DO UPDATE SET
2247
2457
  last_indexed_at = excluded.last_indexed_at,
2248
2458
  indexed_files = excluded.indexed_files,
2249
2459
  code_chunks = excluded.code_chunks,
2250
- skipped_files = excluded.skipped_files`
2251
- ).run(repo, now, codeFiles.length, codeChunks.length, skippedFiles);
2460
+ skipped_files = excluded.skipped_files,
2461
+ last_indexed_commit = excluded.last_indexed_commit`
2462
+ ).run(repo, now, totalFileCount, totalChunkCount, skippedFiles, options.currentCommit ?? null);
2252
2463
  db.prepare(
2253
2464
  `INSERT INTO architecture_index_state (repo, last_indexed_at, components, patterns, imports)
2254
2465
  VALUES (?, ?, ?, ?, ?)
@@ -2266,9 +2477,10 @@ function replaceCodeIndex(db, repo, codeFiles, codeChunks, skippedFiles, cwd, ar
2266
2477
  );
2267
2478
  });
2268
2479
  transaction();
2480
+ const counts = getRepoCodeCounts(db, repo);
2269
2481
  return {
2270
- indexedFiles: codeFiles.length,
2271
- codeChunksCreated: codeChunks.length,
2482
+ indexedFiles: counts.files,
2483
+ codeChunksCreated: counts.chunks,
2272
2484
  testFilesIndexed: testAwareness.testFiles.length,
2273
2485
  testLinksCreated: testAwareness.testLinks.length,
2274
2486
  architectureComponentsIndexed: architecture.components.length,
@@ -2293,40 +2505,41 @@ function deleteExistingArchitectureData(db, repoId, repo, patternRowIds, options
2293
2505
  );
2294
2506
  db.prepare("DELETE FROM architecture_patterns WHERE repo_id = ?").run(repoId);
2295
2507
  db.prepare("DELETE FROM architecture_components WHERE repo_id = ?").run(repoId);
2296
- db.prepare("DELETE FROM code_imports WHERE repo_id = ?").run(repoId);
2297
2508
  db.prepare("DELETE FROM architecture_map_edges WHERE repo_id = ?").run(repoId);
2298
2509
  }
2299
- function insertArchitectureData(db, repoId, repo, architecture, options = {}) {
2300
- const insertImport = db.prepare(
2301
- `INSERT INTO code_imports
2302
- (repo_id, source_path, specifier, imported_path, imported_symbols_json, kind)
2303
- VALUES (?, ?, ?, ?, ?, ?)`
2304
- );
2305
- options.onProgress?.({
2306
- stage: "writing_architecture_data",
2307
- repo,
2308
- current: 0,
2309
- total: architecture.imports.length,
2310
- kind: "imports"
2311
- });
2312
- for (const [index, item] of architecture.imports.entries()) {
2313
- insertImport.run(
2314
- repoId,
2315
- item.sourcePath,
2316
- item.specifier,
2317
- item.importedPath ?? null,
2318
- JSON.stringify(item.importedSymbols),
2319
- item.kind
2510
+ function insertArchitectureData(db, repoId, repo, architecture, options = {}, includeImports = true) {
2511
+ if (includeImports) {
2512
+ const insertImport = db.prepare(
2513
+ `INSERT INTO code_imports
2514
+ (repo_id, source_path, specifier, imported_path, imported_symbols_json, kind)
2515
+ VALUES (?, ?, ?, ?, ?, ?)`
2320
2516
  );
2321
- const current = index + 1;
2322
- if (shouldEmitCodeWriteProgress(current, architecture.imports.length)) {
2323
- options.onProgress?.({
2324
- stage: "writing_architecture_data",
2325
- repo,
2326
- current,
2327
- total: architecture.imports.length,
2328
- kind: "imports"
2329
- });
2517
+ options.onProgress?.({
2518
+ stage: "writing_architecture_data",
2519
+ repo,
2520
+ current: 0,
2521
+ total: architecture.imports.length,
2522
+ kind: "imports"
2523
+ });
2524
+ for (const [index, item] of architecture.imports.entries()) {
2525
+ insertImport.run(
2526
+ repoId,
2527
+ item.sourcePath,
2528
+ item.specifier,
2529
+ item.importedPath ?? null,
2530
+ JSON.stringify(item.importedSymbols),
2531
+ item.kind
2532
+ );
2533
+ const current = index + 1;
2534
+ if (shouldEmitCodeWriteProgress(current, architecture.imports.length)) {
2535
+ options.onProgress?.({
2536
+ stage: "writing_architecture_data",
2537
+ repo,
2538
+ current,
2539
+ total: architecture.imports.length,
2540
+ kind: "imports"
2541
+ });
2542
+ }
2330
2543
  }
2331
2544
  }
2332
2545
  const insertComponent = db.prepare(
@@ -3057,18 +3270,7 @@ function createPattern(input) {
3057
3270
  };
3058
3271
  }
3059
3272
  function buildArchitectureIndex(repo, files, chunks, options = {}) {
3060
- const allPaths = files.map((file) => file.path);
3061
- const codePaths = new Set(allPaths);
3062
- const relatedTestIndex = buildRelatedTestIndex(allPaths);
3063
- const symbolSetsByPath = /* @__PURE__ */ new Map();
3064
- for (const chunk of chunks) {
3065
- const existing = symbolSetsByPath.get(chunk.filePath) ?? /* @__PURE__ */ new Set();
3066
- for (const symbol of chunk.symbols) {
3067
- if (existing.size >= 40) break;
3068
- existing.add(symbol);
3069
- }
3070
- symbolSetsByPath.set(chunk.filePath, existing);
3071
- }
3273
+ const codePaths = new Set(files.map((file) => file.path));
3072
3274
  const imports = [];
3073
3275
  options.onProgress?.({
3074
3276
  stage: "building_architecture_imports",
@@ -3091,6 +3293,20 @@ function buildArchitectureIndex(repo, files, chunks, options = {}) {
3091
3293
  });
3092
3294
  }
3093
3295
  }
3296
+ return buildArchitectureFromIndexedData(repo, files, chunks, imports, options);
3297
+ }
3298
+ function buildArchitectureFromIndexedData(repo, files, chunks, imports, options = {}) {
3299
+ const allPaths = files.map((file) => file.path);
3300
+ const relatedTestIndex = buildRelatedTestIndex(allPaths);
3301
+ const symbolSetsByPath = /* @__PURE__ */ new Map();
3302
+ for (const chunk of chunks) {
3303
+ const existing = symbolSetsByPath.get(chunk.filePath) ?? /* @__PURE__ */ new Set();
3304
+ for (const symbol of chunk.symbols) {
3305
+ if (existing.size >= 40) break;
3306
+ existing.add(symbol);
3307
+ }
3308
+ symbolSetsByPath.set(chunk.filePath, existing);
3309
+ }
3094
3310
  const importsByPath = /* @__PURE__ */ new Map();
3095
3311
  for (const item of imports) {
3096
3312
  const existing = importsByPath.get(item.sourcePath) ?? [];
@@ -3106,7 +3322,7 @@ function buildArchitectureIndex(repo, files, chunks, options = {}) {
3106
3322
  components: 0
3107
3323
  });
3108
3324
  for (const [index, file] of files.entries()) {
3109
- const area = classifyArchitectureArea(file.path, file.language, file.content);
3325
+ const area = classifyArchitectureArea(file.path, file.language);
3110
3326
  const fileImports = importsByPath.get(file.path) ?? [];
3111
3327
  const symbols = [...symbolSetsByPath.get(file.path) ?? []];
3112
3328
  components.push({
@@ -3320,15 +3536,134 @@ function discoverGitFiles(cwd) {
3320
3536
  });
3321
3537
  return output.split("\n").map((line) => normalizeGitPath(line.trim())).filter(Boolean);
3322
3538
  }
3539
+ function discoverGitUntrackedFiles(cwd) {
3540
+ const output = execFileSync3("git", ["ls-files", "--others", "--exclude-standard"], {
3541
+ cwd,
3542
+ encoding: "utf8",
3543
+ stdio: ["ignore", "pipe", "pipe"]
3544
+ });
3545
+ return output.split("\n").map((line) => normalizeGitPath(line.trim())).filter(Boolean);
3546
+ }
3547
+ function execGitLines(cwd, args) {
3548
+ const output = execFileSync3("git", args, {
3549
+ cwd,
3550
+ encoding: "utf8",
3551
+ stdio: ["ignore", "pipe", "ignore"]
3552
+ });
3553
+ return output.split("\n").map((line) => line.trimEnd()).filter(Boolean);
3554
+ }
3555
+ function readGitHeadCommit(cwd) {
3556
+ try {
3557
+ return execFileSync3("git", ["rev-parse", "HEAD"], {
3558
+ cwd,
3559
+ encoding: "utf8",
3560
+ stdio: ["ignore", "pipe", "ignore"]
3561
+ }).trim();
3562
+ } catch {
3563
+ return void 0;
3564
+ }
3565
+ }
3566
+ function hasDirtyWorkingTree(cwd) {
3567
+ try {
3568
+ const status = execFileSync3("git", ["status", "--porcelain"], {
3569
+ cwd,
3570
+ encoding: "utf8",
3571
+ stdio: ["ignore", "pipe", "ignore"]
3572
+ });
3573
+ return status.trim().length > 0;
3574
+ } catch {
3575
+ return true;
3576
+ }
3577
+ }
3578
+ function parseNameStatusLine(line) {
3579
+ const parts = line.split(" ").map((item) => normalizeGitPath(item));
3580
+ if (parts.length < 2) return void 0;
3581
+ const status = parts[0] ?? "";
3582
+ if (!status) return void 0;
3583
+ if (status.startsWith("R") || status.startsWith("C")) {
3584
+ return { status, previousPath: parts[1], path: parts[2] };
3585
+ }
3586
+ return { status, path: parts[1] };
3587
+ }
3588
+ function planIncrementalCodeIndex(cwd, lastIndexedCommit, existingIndexedPaths) {
3589
+ const currentCommit2 = readGitHeadCommit(cwd);
3590
+ const trackedPaths = discoverGitFiles(cwd);
3591
+ const trackedSet = new Set(trackedPaths);
3592
+ const deletedPaths = /* @__PURE__ */ new Set();
3593
+ const changedPaths = /* @__PURE__ */ new Set();
3594
+ const dirtyWorkingTree = hasDirtyWorkingTree(cwd);
3595
+ if (!lastIndexedCommit) {
3596
+ return {
3597
+ currentCommit: currentCommit2,
3598
+ trackedPaths,
3599
+ changedPaths: trackedPaths,
3600
+ deletedPaths: [...existingIndexedPaths].filter((filePath) => !trackedSet.has(filePath)),
3601
+ dirtyWorkingTree,
3602
+ fallbackToFullHashCompare: true,
3603
+ reason: "No previous commit snapshot; using full hash comparison."
3604
+ };
3605
+ }
3606
+ if (dirtyWorkingTree) {
3607
+ return {
3608
+ currentCommit: currentCommit2,
3609
+ trackedPaths,
3610
+ changedPaths: trackedPaths,
3611
+ deletedPaths: [...existingIndexedPaths].filter((filePath) => !trackedSet.has(filePath)),
3612
+ dirtyWorkingTree,
3613
+ fallbackToFullHashCompare: true,
3614
+ reason: "Working tree is dirty; using full hash comparison for deterministic results."
3615
+ };
3616
+ }
3617
+ try {
3618
+ const lines = execGitLines(cwd, ["diff", "--name-status", `${lastIndexedCommit}..HEAD`]);
3619
+ for (const line of lines) {
3620
+ const parsed = parseNameStatusLine(line);
3621
+ if (!parsed?.path) continue;
3622
+ const statusCode = parsed.status[0];
3623
+ const normalizedPath = normalizeGitPath(parsed.path);
3624
+ if (statusCode === "D") {
3625
+ deletedPaths.add(normalizedPath);
3626
+ continue;
3627
+ }
3628
+ if (trackedSet.has(normalizedPath)) changedPaths.add(normalizedPath);
3629
+ }
3630
+ for (const untrackedPath of discoverGitUntrackedFiles(cwd)) {
3631
+ if (trackedSet.has(untrackedPath)) changedPaths.add(untrackedPath);
3632
+ }
3633
+ for (const existingPath of existingIndexedPaths) {
3634
+ if (!trackedSet.has(existingPath)) deletedPaths.add(existingPath);
3635
+ }
3636
+ return {
3637
+ currentCommit: currentCommit2,
3638
+ trackedPaths,
3639
+ changedPaths: [...changedPaths],
3640
+ deletedPaths: [...deletedPaths],
3641
+ dirtyWorkingTree: false,
3642
+ fallbackToFullHashCompare: false,
3643
+ reason: "Using git diff and untracked files against last indexed commit."
3644
+ };
3645
+ } catch {
3646
+ return {
3647
+ currentCommit: currentCommit2,
3648
+ trackedPaths,
3649
+ changedPaths: trackedPaths,
3650
+ deletedPaths: [...existingIndexedPaths].filter((filePath) => !trackedSet.has(filePath)),
3651
+ dirtyWorkingTree: true,
3652
+ fallbackToFullHashCompare: true,
3653
+ reason: "Unable to compute git diff; falling back to full hash comparison."
3654
+ };
3655
+ }
3656
+ }
3323
3657
  var DISCOVERY_SCAN_INTERVAL = 200;
3324
- function discoverCodeFiles(cwd, repo, options = {}) {
3658
+ function discoverFromPaths(cwd, repo, inputPaths, options = {}) {
3325
3659
  const maxFileBytes = options.maxFileBytes ?? DEFAULT_MAX_CODE_FILE_BYTES;
3660
+ const includeContent = options.includeContent ?? false;
3326
3661
  const rootPath = path7.resolve(cwd);
3327
3662
  const files = [];
3328
3663
  let skippedFiles = 0;
3329
- const gitFiles = discoverGitFiles(cwd);
3330
- const total = gitFiles.length;
3331
- for (const [scanIndex, filePath] of gitFiles.entries()) {
3664
+ const candidatePaths = [...new Set(inputPaths.map((value) => normalizeGitPath(value)).filter(Boolean))];
3665
+ const total = candidatePaths.length;
3666
+ for (const [scanIndex, filePath] of candidatePaths.entries()) {
3332
3667
  const scanned = scanIndex + 1;
3333
3668
  if (scanned % DISCOVERY_SCAN_INTERVAL === 0 || scanned === total) {
3334
3669
  options.onScan?.(scanned, total);
@@ -3359,7 +3694,6 @@ function discoverCodeFiles(cwd, repo, options = {}) {
3359
3694
  skippedFiles += 1;
3360
3695
  continue;
3361
3696
  }
3362
- const content = buffer.toString("utf8");
3363
3697
  files.push({
3364
3698
  repo,
3365
3699
  path: filePath,
@@ -3368,11 +3702,21 @@ function discoverCodeFiles(cwd, repo, options = {}) {
3368
3702
  contentHash: crypto3.createHash("sha256").update(buffer).digest("hex"),
3369
3703
  updatedAt: stat.mtime.toISOString(),
3370
3704
  absolutePath,
3371
- content
3705
+ ...includeContent ? { content: buffer.toString("utf8") } : {}
3372
3706
  });
3373
3707
  }
3374
3708
  return { files, skippedFiles };
3375
3709
  }
3710
+ function discoverCodeFiles(cwd, repo, options = {}) {
3711
+ return discoverFromPaths(cwd, repo, discoverGitFiles(cwd), options);
3712
+ }
3713
+ function discoverCodeFilesByPaths(cwd, repo, filePaths, options = {}) {
3714
+ return discoverFromPaths(cwd, repo, filePaths, options);
3715
+ }
3716
+ function readDiscoveredCodeFileContent(file) {
3717
+ if (typeof file.content === "string") return file.content;
3718
+ return fs4.readFileSync(file.absolutePath, "utf8");
3719
+ }
3376
3720
 
3377
3721
  // src/retrieval/test-commands.ts
3378
3722
  import crypto4 from "crypto";
@@ -3606,40 +3950,154 @@ function refreshTestCommands(db, cwd, repo, files = [], options = {}) {
3606
3950
 
3607
3951
  // src/indexer/code-indexer.ts
3608
3952
  function indexCodebase(db, options) {
3953
+ const state = getCodeIndexStateForRepo(db, options.repo);
3954
+ const existingHashes = getRepoCodeFileHashes(db, options.repo);
3955
+ const plan = planIncrementalCodeIndex(
3956
+ options.cwd,
3957
+ state?.lastIndexedCommit,
3958
+ new Set(existingHashes.keys())
3959
+ );
3609
3960
  options.onProgress?.({ stage: "discovering_code_files", repo: options.repo });
3610
- const discovery = discoverCodeFiles(options.cwd, options.repo, {
3961
+ const discovery = plan.fallbackToFullHashCompare ? discoverCodeFiles(options.cwd, options.repo, {
3962
+ maxFileBytes: options.maxFileBytes,
3963
+ onScan: (scanned, total) => options.onProgress?.({
3964
+ stage: "discovering_code_files",
3965
+ repo: options.repo,
3966
+ scanned,
3967
+ total
3968
+ })
3969
+ }) : discoverCodeFilesByPaths(options.cwd, options.repo, plan.changedPaths, {
3611
3970
  maxFileBytes: options.maxFileBytes,
3612
- onScan: (scanned, total) => options.onProgress?.({ stage: "discovering_code_files", repo: options.repo, scanned, total })
3971
+ onScan: (scanned, total) => options.onProgress?.({
3972
+ stage: "discovering_code_files",
3973
+ repo: options.repo,
3974
+ scanned,
3975
+ total
3976
+ })
3613
3977
  });
3978
+ const changedFiles = discovery.files.filter(
3979
+ (file) => existingHashes.get(file.path) !== file.contentHash
3980
+ );
3981
+ const discoveredPaths = new Set(discovery.files.map((file) => file.path));
3982
+ const deletedPaths = plan.fallbackToFullHashCompare ? [...existingHashes.keys()].filter((filePath) => !discoveredPaths.has(filePath)) : plan.deletedPaths;
3614
3983
  options.onProgress?.({
3615
3984
  stage: "discovered_code_files",
3616
3985
  repo: options.repo,
3617
- files: discovery.files.length,
3986
+ files: changedFiles.length,
3618
3987
  skippedFiles: discovery.skippedFiles
3619
3988
  });
3620
- const chunks = [];
3621
- for (const [index, file] of discovery.files.entries()) {
3989
+ if (changedFiles.length === 0 && deletedPaths.length === 0) {
3990
+ const counts = touchCodeIndexState(
3991
+ db,
3992
+ options.repo,
3993
+ discovery.skippedFiles,
3994
+ plan.currentCommit
3995
+ );
3996
+ const repoId = ensureRepository(db, options.repo);
3997
+ const scopedCount = (table) => db.prepare(`SELECT COUNT(*) AS count FROM ${table} WHERE repo_id = ?`).get(repoId).count;
3998
+ const summary2 = {
3999
+ indexedFiles: counts.files,
4000
+ codeChunksCreated: counts.chunks,
4001
+ testFilesIndexed: scopedCount("test_files"),
4002
+ testLinksCreated: scopedCount("test_links"),
4003
+ architectureComponentsIndexed: scopedCount("architecture_components"),
4004
+ architecturePatternsIndexed: scopedCount("architecture_patterns"),
4005
+ architectureImportsIndexed: scopedCount("code_imports"),
4006
+ skippedFiles: discovery.skippedFiles,
4007
+ databasePath: defaultDatabasePath(options.cwd)
4008
+ };
4009
+ options.onProgress?.({
4010
+ stage: "completed_code_index",
4011
+ repo: options.repo,
4012
+ files: summary2.indexedFiles,
4013
+ chunks: summary2.codeChunksCreated,
4014
+ skippedFiles: summary2.skippedFiles,
4015
+ testFiles: summary2.testFilesIndexed,
4016
+ testLinks: summary2.testLinksCreated,
4017
+ architectureComponents: summary2.architectureComponentsIndexed,
4018
+ architecturePatterns: summary2.architecturePatternsIndexed,
4019
+ architectureImports: summary2.architectureImportsIndexed
4020
+ });
4021
+ return summary2;
4022
+ }
4023
+ const changedChunks = [];
4024
+ const changedImports = [];
4025
+ const projectedIndexedPaths = new Set(
4026
+ [...existingHashes.keys()].filter((filePath) => !deletedPaths.includes(filePath))
4027
+ );
4028
+ for (const file of changedFiles) projectedIndexedPaths.add(file.path);
4029
+ for (const [index, file] of changedFiles.entries()) {
3622
4030
  options.onProgress?.({
3623
4031
  stage: "indexing_code_file",
3624
4032
  repo: options.repo,
3625
4033
  current: index + 1,
3626
- total: discovery.files.length,
4034
+ total: changedFiles.length,
3627
4035
  filePath: file.path
3628
4036
  });
3629
- const fileChunks = chunkCodeFile(file);
3630
- chunks.push(...fileChunks);
4037
+ const content = readDiscoveredCodeFileContent(file);
4038
+ const fileWithContent = { ...file, content };
4039
+ const fileChunks = chunkCodeFile(fileWithContent);
4040
+ changedChunks.push(...fileChunks);
4041
+ changedImports.push(
4042
+ ...extractCodeImports(file.path, content, projectedIndexedPaths, options.repo)
4043
+ );
3631
4044
  options.onProgress?.({
3632
4045
  stage: "indexed_code_file",
3633
4046
  repo: options.repo,
3634
4047
  current: index + 1,
3635
- total: discovery.files.length,
4048
+ total: changedFiles.length,
3636
4049
  filePath: file.path,
3637
4050
  chunks: fileChunks.length
3638
4051
  });
3639
4052
  }
3640
- const architecture = buildArchitectureIndex(options.repo, discovery.files, chunks, {
4053
+ const affectedPaths = /* @__PURE__ */ new Set([
4054
+ ...deletedPaths,
4055
+ ...changedFiles.map((file) => file.path)
4056
+ ]);
4057
+ const allFilesByPath = new Map(getRepoCodeFiles(db, options.repo).map((file) => [file.path, file]));
4058
+ for (const filePath of deletedPaths) allFilesByPath.delete(filePath);
4059
+ for (const file of changedFiles) {
4060
+ allFilesByPath.set(file.path, {
4061
+ repo: file.repo,
4062
+ path: file.path,
4063
+ language: file.language,
4064
+ sizeBytes: file.sizeBytes,
4065
+ contentHash: file.contentHash,
4066
+ updatedAt: file.updatedAt
4067
+ });
4068
+ }
4069
+ const allFiles = [...allFilesByPath.values()];
4070
+ const allSymbolChunks = getRepoCodeChunkSymbols(db, options.repo).filter(
4071
+ (chunk) => !affectedPaths.has(chunk.filePath)
4072
+ );
4073
+ allSymbolChunks.push(...changedChunks);
4074
+ const allImports = getRepoCodeImports(db, options.repo).filter(
4075
+ (item) => !affectedPaths.has(item.sourcePath)
4076
+ );
4077
+ allImports.push(...changedImports);
4078
+ const testChunks = getRepoTestChunks(db, options.repo).filter(
4079
+ (chunk) => !affectedPaths.has(chunk.filePath)
4080
+ );
4081
+ for (const chunk of changedChunks) {
4082
+ if (isTestFilePath(chunk.filePath)) testChunks.push(chunk);
4083
+ }
4084
+ const testAwareness = inferTestAwareness(options.repo, allFiles, testChunks, {
3641
4085
  onProgress: options.onProgress
3642
4086
  });
4087
+ options.onProgress?.({
4088
+ stage: "building_architecture_imports",
4089
+ repo: options.repo,
4090
+ current: allFiles.length,
4091
+ total: allFiles.length,
4092
+ imports: allImports.length
4093
+ });
4094
+ const architecture = buildArchitectureFromIndexedData(
4095
+ options.repo,
4096
+ allFiles,
4097
+ allSymbolChunks,
4098
+ allImports,
4099
+ { onProgress: options.onProgress }
4100
+ );
3643
4101
  options.onProgress?.({
3644
4102
  stage: "indexed_architecture",
3645
4103
  repo: options.repo,
@@ -3650,14 +4108,22 @@ function indexCodebase(db, options) {
3650
4108
  const summary = replaceCodeIndex(
3651
4109
  db,
3652
4110
  options.repo,
3653
- discovery.files.map(({ content: _content, absolutePath: _absolutePath, ...file }) => file),
3654
- chunks,
4111
+ changedFiles.map(({ content: _content, absolutePath: _absolutePath, ...file }) => file),
4112
+ changedChunks,
3655
4113
  discovery.skippedFiles,
3656
4114
  options.cwd,
3657
4115
  architecture,
3658
- { onProgress: options.onProgress }
4116
+ {
4117
+ onProgress: options.onProgress,
4118
+ deletedPaths,
4119
+ changedImports,
4120
+ currentCommit: plan.currentCommit,
4121
+ testAwareness
4122
+ }
3659
4123
  );
3660
- refreshTestCommands(db, options.cwd, options.repo, [], { onProgress: options.onProgress });
4124
+ refreshTestCommands(db, options.cwd, options.repo, [], {
4125
+ onProgress: options.onProgress
4126
+ });
3661
4127
  options.onProgress?.({
3662
4128
  stage: "completed_code_index",
3663
4129
  repo: options.repo,
@@ -4096,7 +4562,7 @@ function clampMaxResults(value, defaultValue) {
4096
4562
 
4097
4563
  // src/retrieval/ranker.ts
4098
4564
  import path11 from "path";
4099
- function parseJsonArray3(value) {
4565
+ function parseJsonArray4(value) {
4100
4566
  try {
4101
4567
  const parsed = JSON.parse(value);
4102
4568
  return Array.isArray(parsed) ? parsed.filter((item) => typeof item === "string") : [];
@@ -4114,9 +4580,9 @@ function rowToWisdomUnit(row) {
4114
4580
  category: row.category,
4115
4581
  text: row.text,
4116
4582
  sanitizedText: row.sanitized_text,
4117
- filePaths: parseJsonArray3(row.file_paths_json),
4118
- symbols: parseJsonArray3(row.symbols_json),
4119
- authors: parseJsonArray3(row.authors_json),
4583
+ filePaths: parseJsonArray4(row.file_paths_json),
4584
+ symbols: parseJsonArray4(row.symbols_json),
4585
+ authors: parseJsonArray4(row.authors_json),
4120
4586
  createdAt: row.created_at,
4121
4587
  mergedAt: row.merged_at ?? void 0,
4122
4588
  confidence: row.confidence,
@@ -4179,11 +4645,11 @@ function symbolMatch2(unit, querySymbols) {
4179
4645
  }
4180
4646
  function textMatch2(unit, inputText) {
4181
4647
  const queryTokens = tokenizeSearchText(inputText, 32);
4182
- if (queryTokens.length === 0) return unit.bm25 === void 0 ? 0 : 0.45;
4648
+ if (queryTokens.length === 0) return unit.bm25 === void 0 ? 0 : 0.35;
4183
4649
  const haystack = `${unit.sanitizedText} ${unit.filePaths.join(" ")} ${unit.symbols.join(" ")}`.toLowerCase();
4184
4650
  const overlap = queryTokens.filter((token) => haystack.includes(token.toLowerCase())).length / queryTokens.length;
4185
- const bm25Signal = unit.bm25 === void 0 ? 0 : Math.max(0.25, Math.min(1, 1 / (1 + Math.abs(unit.bm25))));
4186
- return Math.max(overlap, bm25Signal);
4651
+ const bm25Signal = unit.bm25 === void 0 ? 0 : Math.max(0, Math.min(1, 1 / (1 + Math.abs(unit.bm25))));
4652
+ return unit.bm25 === void 0 ? overlap : Number((0.65 * overlap + 0.35 * bm25Signal).toFixed(4));
4187
4653
  }
4188
4654
  function reviewerOrAuthorSignal(unit) {
4189
4655
  if (unit.sourceType === "review_comment" || unit.sourceType === "review_summary") return 0.9;
@@ -4375,7 +4841,7 @@ function rankWisdomUnits(db, input) {
4375
4841
 
4376
4842
  // src/retrieval/code-ranker.ts
4377
4843
  import path12 from "path";
4378
- function parseJsonArray4(value) {
4844
+ function parseJsonArray5(value) {
4379
4845
  try {
4380
4846
  const parsed = JSON.parse(value);
4381
4847
  return Array.isArray(parsed) ? parsed.filter((item) => typeof item === "string") : [];
@@ -4392,7 +4858,7 @@ function rowToCodeChunk(row) {
4392
4858
  startLine: row.start_line,
4393
4859
  endLine: row.end_line,
4394
4860
  sanitizedText: row.sanitized_text,
4395
- symbols: parseJsonArray4(row.symbols_json),
4861
+ symbols: parseJsonArray5(row.symbols_json),
4396
4862
  contentHash: row.content_hash,
4397
4863
  updatedAt: row.updated_at,
4398
4864
  bm25: row.bm25 ?? void 0
@@ -4441,8 +4907,8 @@ function textMatch3(chunk, input) {
4441
4907
  );
4442
4908
  const haystack = `${chunk.sanitizedText} ${chunk.filePath} ${chunk.symbols.join(" ")}`.toLowerCase();
4443
4909
  const overlap = tokens.length ? tokens.filter((token) => haystack.includes(token.toLowerCase())).length / tokens.length : 0;
4444
- const bm25Signal = chunk.bm25 === void 0 ? 0 : Math.max(0.25, Math.min(1, 1 / (1 + Math.abs(chunk.bm25))));
4445
- return Math.max(overlap, bm25Signal);
4910
+ const bm25Signal = chunk.bm25 === void 0 ? 0 : Math.max(0, Math.min(1, 1 / (1 + Math.abs(chunk.bm25))));
4911
+ return chunk.bm25 === void 0 ? overlap : Number((0.65 * overlap + 0.35 * bm25Signal).toFixed(4));
4446
4912
  }
4447
4913
  function recencyScore2(chunk) {
4448
4914
  const timestamp = Date.parse(chunk.updatedAt);
@@ -4540,7 +5006,7 @@ function rankCodeChunks(db, input) {
4540
5006
 
4541
5007
  // src/retrieval/architecture-ranker.ts
4542
5008
  import path13 from "path";
4543
- function parseJsonArray5(value) {
5009
+ function parseJsonArray6(value) {
4544
5010
  try {
4545
5011
  const parsed = JSON.parse(value);
4546
5012
  return Array.isArray(parsed) ? parsed.filter((item) => typeof item === "string") : [];
@@ -4564,8 +5030,8 @@ function rowToPattern(row) {
4564
5030
  name: row.name,
4565
5031
  summary: row.summary_sanitized,
4566
5032
  sanitizedSummary: row.summary_sanitized,
4567
- sourceFiles: parseJsonArray5(row.source_files_json),
4568
- symbols: parseJsonArray5(row.symbols_json),
5033
+ sourceFiles: parseJsonArray6(row.source_files_json),
5034
+ symbols: parseJsonArray6(row.symbols_json),
4569
5035
  evidence: parseEvidence(row.evidence_json),
4570
5036
  confidence: row.confidence,
4571
5037
  createdAt: row.created_at,
@@ -4681,7 +5147,7 @@ function rankArchitecturePatterns(db, input) {
4681
5147
 
4682
5148
  // src/retrieval/test-ranker.ts
4683
5149
  import path14 from "path";
4684
- function parseJsonArray6(value) {
5150
+ function parseJsonArray7(value) {
4685
5151
  if (!value) return [];
4686
5152
  try {
4687
5153
  const parsed = JSON.parse(value);
@@ -4694,7 +5160,7 @@ function baseStem(filePath) {
4694
5160
  return path14.posix.basename(filePath).replace(/\.(test|spec)\.[^.]+$/i, "").replace(/\.[^.]+$/i, "").toLowerCase();
4695
5161
  }
4696
5162
  function rowToRanked(row, input) {
4697
- const symbols = parseJsonArray6(row.symbols_json);
5163
+ const symbols = parseJsonArray7(row.symbols_json);
4698
5164
  const text = row.sanitized_text ?? "";
4699
5165
  const matchedSymbols = (input.symbols ?? []).filter((symbol) => {
4700
5166
  const lower = symbol.toLowerCase();
@@ -4764,7 +5230,7 @@ function rankRelevantTests(db, input) {
4764
5230
 
4765
5231
  // src/retrieval/regression-ranker.ts
4766
5232
  import path15 from "path";
4767
- function parseJsonArray7(value) {
5233
+ function parseJsonArray8(value) {
4768
5234
  try {
4769
5235
  const parsed = JSON.parse(value);
4770
5236
  return Array.isArray(parsed) ? parsed.filter((item) => typeof item === "string") : [];
@@ -4779,12 +5245,12 @@ function rowToEvent(row) {
4779
5245
  prNumber: row.pr_number,
4780
5246
  prUrl: row.pr_url,
4781
5247
  summary: row.summary_sanitized,
4782
- filePaths: parseJsonArray7(row.file_paths_json),
4783
- symbols: parseJsonArray7(row.symbols_json),
4784
- testPaths: parseJsonArray7(row.test_paths_json),
4785
- authors: parseJsonArray7(row.authors_json),
4786
- labels: parseJsonArray7(row.labels_json),
4787
- signals: parseJsonArray7(row.signals_json),
5248
+ filePaths: parseJsonArray8(row.file_paths_json),
5249
+ symbols: parseJsonArray8(row.symbols_json),
5250
+ testPaths: parseJsonArray8(row.test_paths_json),
5251
+ authors: parseJsonArray8(row.authors_json),
5252
+ labels: parseJsonArray8(row.labels_json),
5253
+ signals: parseJsonArray8(row.signals_json),
4788
5254
  createdAt: row.created_at,
4789
5255
  mergedAt: row.merged_at ?? void 0,
4790
5256
  confidence: row.confidence
@@ -6184,7 +6650,7 @@ function syncPlaybooksToDatabase(db, cwd) {
6184
6650
  }
6185
6651
 
6186
6652
  // src/retrieval/onboarding.ts
6187
- function parseJsonArray8(value) {
6653
+ function parseJsonArray9(value) {
6188
6654
  try {
6189
6655
  const parsed = JSON.parse(value);
6190
6656
  return Array.isArray(parsed) ? parsed.filter((item) => typeof item === "string") : [];
@@ -6217,7 +6683,7 @@ function riskyModules(db) {
6217
6683
  ORDER BY confidence DESC, COALESCE(merged_at, created_at) DESC
6218
6684
  LIMIT 20`
6219
6685
  ).all();
6220
- return [...new Set(rows.flatMap((row) => parseJsonArray8(row.file_paths_json)))].slice(0, 10);
6686
+ return [...new Set(rows.flatMap((row) => parseJsonArray9(row.file_paths_json)))].slice(0, 10);
6221
6687
  }
6222
6688
  function relatedTests(db, files) {
6223
6689
  if (files.length === 0) {
@@ -6296,6 +6762,7 @@ import crypto8 from "crypto";
6296
6762
  import fs7 from "fs";
6297
6763
  import path18 from "path";
6298
6764
  var ANCHOR_EVALS_FILE = "anchor.evals.json";
6765
+ var DEFAULT_EVAL_K = 8;
6299
6766
  function evalsPath(cwd) {
6300
6767
  return path18.join(cwd, ANCHOR_EVALS_FILE);
6301
6768
  }
@@ -6394,6 +6861,7 @@ function runRetrievalEvals(db, cwd) {
6394
6861
  initializeSchema(db);
6395
6862
  const filePath = evalsPath(cwd);
6396
6863
  const evalFile = readEvalFile(cwd);
6864
+ const k = DEFAULT_EVAL_K;
6397
6865
  const results = evalFile.evals.map((item) => {
6398
6866
  const context = buildAnchorContextResult(db, cwd, {
6399
6867
  task: item.task,
@@ -6404,6 +6872,9 @@ function runRetrievalEvals(db, cwd) {
6404
6872
  ...Array.isArray(context.metadata.items) ? context.metadata.items : [],
6405
6873
  ...Array.isArray(context.metadata.teamRules) ? context.metadata.teamRules : []
6406
6874
  ];
6875
+ const rankedPrs = uniqueStrings(
6876
+ metadataItems.map((metadata) => metadata.prNumber).filter((prNumber) => typeof prNumber === "number").map(String)
6877
+ ).map(Number);
6407
6878
  const foundPrs = uniqueStrings(
6408
6879
  metadataItems.map((metadata) => metadata.prNumber).filter((prNumber) => typeof prNumber === "number").map(String)
6409
6880
  ).map(Number);
@@ -6414,6 +6885,23 @@ function runRetrievalEvals(db, cwd) {
6414
6885
  const missingCategories = item.expectedCategories.filter(
6415
6886
  (category) => !foundCategories.includes(category)
6416
6887
  );
6888
+ const expectedPrRanks = item.expectedPrs.map((prNumber) => {
6889
+ const index = rankedPrs.indexOf(prNumber);
6890
+ return {
6891
+ prNumber,
6892
+ rank: index >= 0 ? index + 1 : void 0
6893
+ };
6894
+ });
6895
+ const topK = rankedPrs.slice(0, k);
6896
+ const relevantInTopK = item.expectedPrs.filter((prNumber) => topK.includes(prNumber));
6897
+ const precisionAtK2 = k > 0 ? Number((relevantInTopK.length / k).toFixed(4)) : 0;
6898
+ const recallAtK2 = item.expectedPrs.length > 0 ? Number((relevantInTopK.length / item.expectedPrs.length).toFixed(4)) : 0;
6899
+ const reciprocalRank = (() => {
6900
+ if (item.expectedPrs.length === 0) return 0;
6901
+ const firstRank = expectedPrRanks.map((entry) => entry.rank).filter((rank) => typeof rank === "number").sort((a, b) => a - b)[0];
6902
+ if (!firstRank) return 0;
6903
+ return Number((1 / firstRank).toFixed(4));
6904
+ })();
6417
6905
  return {
6418
6906
  id: item.id,
6419
6907
  task: item.task,
@@ -6421,18 +6909,35 @@ function runRetrievalEvals(db, cwd) {
6421
6909
  expectedPrs: item.expectedPrs,
6422
6910
  foundPrs,
6423
6911
  missingPrs,
6912
+ expectedPrRanks,
6424
6913
  expectedCategories: item.expectedCategories,
6425
6914
  foundCategories,
6426
- missingCategories
6915
+ missingCategories,
6916
+ precisionAtK: precisionAtK2,
6917
+ recallAtK: recallAtK2,
6918
+ reciprocalRank
6427
6919
  };
6428
6920
  });
6429
6921
  const passed = results.filter((result) => result.passed).length;
6922
+ const precisionAtK = results.length > 0 ? Number(
6923
+ (results.reduce((sum, result) => sum + result.precisionAtK, 0) / results.length).toFixed(4)
6924
+ ) : 0;
6925
+ const recallAtK = results.length > 0 ? Number(
6926
+ (results.reduce((sum, result) => sum + result.recallAtK, 0) / results.length).toFixed(4)
6927
+ ) : 0;
6928
+ const mrr = results.length > 0 ? Number(
6929
+ (results.reduce((sum, result) => sum + result.reciprocalRank, 0) / results.length).toFixed(4)
6930
+ ) : 0;
6430
6931
  return {
6431
6932
  ok: passed === results.length,
6432
6933
  path: filePath,
6433
6934
  total: results.length,
6434
6935
  passed,
6435
6936
  failed: results.length - passed,
6937
+ precisionAtK,
6938
+ recallAtK,
6939
+ mrr,
6940
+ k,
6436
6941
  results
6437
6942
  };
6438
6943
  }
@@ -8787,7 +9292,7 @@ function packageRootForSpecifier(specifier) {
8787
9292
  if (normalized.startsWith("@") && parts.length >= 2) return `${parts[0]}/${parts[1]}`;
8788
9293
  return parts[0] ?? "";
8789
9294
  }
8790
- function parseJsonArray9(value) {
9295
+ function parseJsonArray10(value) {
8791
9296
  try {
8792
9297
  const parsed = JSON.parse(value);
8793
9298
  return Array.isArray(parsed) ? parsed.filter((item) => typeof item === "string") : [];
@@ -8921,7 +9426,7 @@ function rebuildOrgGraph(db, config, baseDirOrOptions) {
8921
9426
  `imports ${sanitizeHistoricalText(rootSpecifier || item.specifier)}`
8922
9427
  )
8923
9428
  ],
8924
- confidence: parseJsonArray9(item.imported_symbols_json).length > 0 ? 0.88 : 0.76
9429
+ confidence: parseJsonArray10(item.imported_symbols_json).length > 0 ? 0.88 : 0.76
8925
9430
  });
8926
9431
  }
8927
9432
  if (shouldEmitProgress3(index + 1, imports.length)) {
@@ -9257,7 +9762,11 @@ async function indexOrgRepos(db, config, options = {}) {
9257
9762
  command,
9258
9763
  totalRepos: repos.length
9259
9764
  });
9260
- for (const [repoIndex, repo] of repos.entries()) {
9765
+ const maxConcurrency = Math.max(1, Math.min(options.concurrency ?? 3, 4));
9766
+ let nextRepoIndex = 0;
9767
+ const processRepo = async (repoIndex) => {
9768
+ const repo = repos[repoIndex];
9769
+ if (!repo) return;
9261
9770
  const repoPosition = repoIndex + 1;
9262
9771
  const localPath = orgRepoLocalPath(config.org, repo, options.baseDir);
9263
9772
  const repoStartedAt = (/* @__PURE__ */ new Date()).toISOString();
@@ -9508,7 +10017,18 @@ async function indexOrgRepos(db, config, options = {}) {
9508
10017
  error: message
9509
10018
  });
9510
10019
  }
9511
- }
10020
+ };
10021
+ const worker = async () => {
10022
+ while (true) {
10023
+ const repoIndex = nextRepoIndex;
10024
+ nextRepoIndex += 1;
10025
+ if (repoIndex >= repos.length) return;
10026
+ await processRepo(repoIndex);
10027
+ }
10028
+ };
10029
+ await Promise.all(
10030
+ Array.from({ length: Math.min(maxConcurrency, repos.length) }, () => worker())
10031
+ );
9512
10032
  let graph;
9513
10033
  if (options.noGraph) {
9514
10034
  const counts = getOrgGraphCounts(db, config.org);
@@ -9553,6 +10073,7 @@ async function indexOrgRepos(db, config, options = {}) {
9553
10073
  codeFilesIndexed: results.reduce((sum, result) => sum + (result.code?.indexedFiles ?? 0), 0),
9554
10074
  failures: results.map((result) => result.error).concat(graph.error ? [graph.error] : []).filter((error) => Boolean(error))
9555
10075
  });
10076
+ runDatabaseMaintenance(db);
9556
10077
  emit({
9557
10078
  stage: "org_sync_completed",
9558
10079
  org: config.org,
@@ -10440,6 +10961,7 @@ export {
10440
10961
  anchorMcpEntry,
10441
10962
  architectureFilesFromDiff,
10442
10963
  buildAnchorContextResult,
10964
+ buildArchitectureFromIndexedData,
10443
10965
  buildArchitectureIndex,
10444
10966
  buildArchitectureMap,
10445
10967
  buildFtsQuery,
@@ -10479,6 +11001,7 @@ export {
10479
11001
  detectTestCommands,
10480
11002
  detectTestCommandsForFile,
10481
11003
  discoverCodeFiles,
11004
+ discoverCodeFilesByPaths,
10482
11005
  emptyCodeIndexSummary,
10483
11006
  ensureAnchorGitExclude,
10484
11007
  ensureCursorConfig,
@@ -10507,6 +11030,7 @@ export {
10507
11030
  getAnchorIndexHealth,
10508
11031
  getArchitectureContext,
10509
11032
  getArchitectureMapContext,
11033
+ getCodeIndexStateForRepo,
10510
11034
  getGitHubRateLimitDelayMs,
10511
11035
  getGraphQLFetchCheckpoint,
10512
11036
  getIndexStatus,
@@ -10517,12 +11041,19 @@ export {
10517
11041
  getOrgRepoState,
10518
11042
  getOrgStatus,
10519
11043
  getPlaybook,
11044
+ getRepoCodeChunkSymbols,
11045
+ getRepoCodeCounts,
11046
+ getRepoCodeFileHashes,
11047
+ getRepoCodeFiles,
11048
+ getRepoCodeImports,
11049
+ getRepoTestChunks,
10520
11050
  getSemanticStatus,
10521
11051
  getSuggestedPromptTexts,
10522
11052
  getSuggestedPrompts,
10523
11053
  getWisdomCategoryCounts,
10524
11054
  githubAuthFixMessage,
10525
11055
  graphQLFetchCheckpointScope,
11056
+ hasDirtyWorkingTree,
10526
11057
  hasHighSignalLanguage,
10527
11058
  indexCodebase,
10528
11059
  indexOrgRepos,
@@ -10558,6 +11089,7 @@ export {
10558
11089
  orgRoot,
10559
11090
  paginateWithGitHubRateLimit,
10560
11091
  parseGitHubRemote,
11092
+ planIncrementalCodeIndex,
10561
11093
  planTask,
10562
11094
  plannedOrgCloneCommands,
10563
11095
  rankArchitecturePatterns,
@@ -10566,6 +11098,8 @@ export {
10566
11098
  rankRelevantTests,
10567
11099
  rankTeamRules,
10568
11100
  rankWisdomUnits,
11101
+ readDiscoveredCodeFileContent,
11102
+ readGitHeadCommit,
10569
11103
  readOrgHeartbeat,
10570
11104
  rebuildOrgGraph,
10571
11105
  recordFeedback,
@@ -10586,6 +11120,7 @@ export {
10586
11120
  resolvePullRequestFetchLimit,
10587
11121
  reviewDiff,
10588
11122
  runAnchorCi,
11123
+ runDatabaseMaintenance,
10589
11124
  runDoctor,
10590
11125
  runRetrievalEvals,
10591
11126
  sanitizeHistoricalText,
@@ -10600,6 +11135,7 @@ export {
10600
11135
  syncOrgConfigToDatabase,
10601
11136
  syncPlaybooksToDatabase,
10602
11137
  tokenizeSearchText,
11138
+ touchCodeIndexState,
10603
11139
  truncateText,
10604
11140
  uniqueStrings,
10605
11141
  updateGitHubGraphQLRateLimitState,