@pratik7368patil/anchor-core 0.1.29 → 0.1.31
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.ts +75 -2
- package/dist/index.js +713 -134
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
- package/src/db/schema.sql +2 -1
package/dist/index.js
CHANGED
|
@@ -175,8 +175,11 @@ function canonicalizeText(text) {
|
|
|
175
175
|
return text.toLowerCase().replace(/https?:\/\/\S+/g, "").replace(/[^a-z0-9_./ -]/g, " ").replace(/\s+/g, " ").trim();
|
|
176
176
|
}
|
|
177
177
|
function tokenizeSearchText(text, maxTokens = 32) {
|
|
178
|
-
const
|
|
179
|
-
|
|
178
|
+
const shortSignalTokens = /* @__PURE__ */ new Set(["id", "db", "api", "key", "sql", "jwt", "ui", "ux"]);
|
|
179
|
+
const tokens = text.toLowerCase().match(/[a-z0-9_./-]{2,}/g);
|
|
180
|
+
return uniqueStrings(
|
|
181
|
+
(tokens ?? []).filter((token) => token.length >= 3 || shortSignalTokens.has(token))
|
|
182
|
+
).slice(0, maxTokens);
|
|
180
183
|
}
|
|
181
184
|
|
|
182
185
|
// src/security/redact-secrets.ts
|
|
@@ -379,7 +382,8 @@ CREATE TABLE IF NOT EXISTS code_index_state (
|
|
|
379
382
|
last_indexed_at TEXT NOT NULL,
|
|
380
383
|
indexed_files INTEGER NOT NULL,
|
|
381
384
|
code_chunks INTEGER NOT NULL,
|
|
382
|
-
skipped_files INTEGER NOT NULL
|
|
385
|
+
skipped_files INTEGER NOT NULL,
|
|
386
|
+
last_indexed_commit TEXT
|
|
383
387
|
);
|
|
384
388
|
|
|
385
389
|
CREATE TABLE IF NOT EXISTS code_imports (
|
|
@@ -1710,9 +1714,28 @@ function calculateCoverage(input) {
|
|
|
1710
1714
|
|
|
1711
1715
|
// src/db/database.ts
|
|
1712
1716
|
var CODE_WRITE_PROGRESS_INTERVAL = 150;
|
|
1717
|
+
var FTS_DELETE_BATCH_SIZE = 500;
|
|
1713
1718
|
function shouldEmitCodeWriteProgress(current, total) {
|
|
1714
1719
|
return current === 0 || current === 1 || current === total || current % CODE_WRITE_PROGRESS_INTERVAL === 0;
|
|
1715
1720
|
}
|
|
1721
|
+
function shouldEmitFtsDeleteProgress(current, total) {
|
|
1722
|
+
return current === 0 || current === 1 || current === total || current % FTS_DELETE_BATCH_SIZE === 0;
|
|
1723
|
+
}
|
|
1724
|
+
function deleteFtsRowsByRowId(db, ftsTable, rowIds, onProgress) {
|
|
1725
|
+
if (rowIds.length === 0) {
|
|
1726
|
+
onProgress?.(0, 0);
|
|
1727
|
+
return;
|
|
1728
|
+
}
|
|
1729
|
+
const deleteRow = db.prepare(`DELETE FROM ${ftsTable} WHERE rowid = ?`);
|
|
1730
|
+
onProgress?.(0, rowIds.length);
|
|
1731
|
+
for (const [index, rowId] of rowIds.entries()) {
|
|
1732
|
+
deleteRow.run(rowId);
|
|
1733
|
+
const current = index + 1;
|
|
1734
|
+
if (shouldEmitFtsDeleteProgress(current, rowIds.length)) {
|
|
1735
|
+
onProgress?.(current, rowIds.length);
|
|
1736
|
+
}
|
|
1737
|
+
}
|
|
1738
|
+
}
|
|
1716
1739
|
function defaultDatabasePath(cwd) {
|
|
1717
1740
|
return path4.join(cwd, ".anchor", "index.sqlite");
|
|
1718
1741
|
}
|
|
@@ -1738,6 +1761,14 @@ function applyPerformancePragmas(db) {
|
|
|
1738
1761
|
db.pragma("mmap_size = 268435456");
|
|
1739
1762
|
db.pragma("temp_store = MEMORY");
|
|
1740
1763
|
}
|
|
1764
|
+
function runDatabaseMaintenance(db) {
|
|
1765
|
+
try {
|
|
1766
|
+
db.exec("ANALYZE");
|
|
1767
|
+
db.pragma("optimize");
|
|
1768
|
+
db.pragma("wal_checkpoint(TRUNCATE)");
|
|
1769
|
+
} catch {
|
|
1770
|
+
}
|
|
1771
|
+
}
|
|
1741
1772
|
function initializeSchema(db) {
|
|
1742
1773
|
db.exec(SCHEMA_SQL);
|
|
1743
1774
|
ensureColumn(db, "sync_state", "history_coverage", "TEXT");
|
|
@@ -1751,6 +1782,7 @@ function initializeSchema(db) {
|
|
|
1751
1782
|
ensureColumn(db, "sync_state", "graphql_cursor_reset_at", "TEXT");
|
|
1752
1783
|
ensureColumn(db, "sync_state", "graphql_cursor_reason", "TEXT");
|
|
1753
1784
|
ensureColumn(db, "sync_state", "graphql_cursor_updated_at", "TEXT");
|
|
1785
|
+
ensureColumn(db, "code_index_state", "last_indexed_commit", "TEXT");
|
|
1754
1786
|
}
|
|
1755
1787
|
function ensureColumn(db, tableName, columnName, definition) {
|
|
1756
1788
|
const columns = db.prepare(`PRAGMA table_info(${tableName})`).all();
|
|
@@ -1800,10 +1832,153 @@ function ensureRepository(db, fullName) {
|
|
|
1800
1832
|
if (!row) throw new Error(`Failed to create repository row for ${fullName}`);
|
|
1801
1833
|
return row.id;
|
|
1802
1834
|
}
|
|
1835
|
+
function getRepositoryId(db, fullName) {
|
|
1836
|
+
const row = db.prepare("SELECT id FROM repositories WHERE full_name = ?").get(fullName);
|
|
1837
|
+
return row?.id;
|
|
1838
|
+
}
|
|
1803
1839
|
function getLastSyncTime(db, repo) {
|
|
1804
1840
|
const row = db.prepare("SELECT last_sync_at FROM sync_state WHERE repo = ?").get(repo);
|
|
1805
1841
|
return row?.last_sync_at ?? void 0;
|
|
1806
1842
|
}
|
|
1843
|
+
function getCodeIndexStateForRepo(db, repo) {
|
|
1844
|
+
initializeSchema(db);
|
|
1845
|
+
const row = db.prepare(
|
|
1846
|
+
`SELECT repo, last_indexed_at, indexed_files, code_chunks, skipped_files, last_indexed_commit
|
|
1847
|
+
FROM code_index_state
|
|
1848
|
+
WHERE repo = ?`
|
|
1849
|
+
).get(repo);
|
|
1850
|
+
if (!row?.repo) return void 0;
|
|
1851
|
+
return {
|
|
1852
|
+
repo: row.repo,
|
|
1853
|
+
lastIndexedAt: row.last_indexed_at ?? void 0,
|
|
1854
|
+
indexedFiles: row.indexed_files ?? 0,
|
|
1855
|
+
codeChunks: row.code_chunks ?? 0,
|
|
1856
|
+
skippedFiles: row.skipped_files ?? 0,
|
|
1857
|
+
lastIndexedCommit: row.last_indexed_commit ?? void 0
|
|
1858
|
+
};
|
|
1859
|
+
}
|
|
1860
|
+
function getRepoCodeFileHashes(db, repo) {
|
|
1861
|
+
initializeSchema(db);
|
|
1862
|
+
const repoId = getRepositoryId(db, repo);
|
|
1863
|
+
if (!repoId) return /* @__PURE__ */ new Map();
|
|
1864
|
+
const rows = db.prepare("SELECT path, content_hash FROM code_files WHERE repo_id = ?").all(repoId);
|
|
1865
|
+
return new Map(rows.map((row) => [row.path, row.content_hash]));
|
|
1866
|
+
}
|
|
1867
|
+
function getRepoCodeFiles(db, repo) {
|
|
1868
|
+
initializeSchema(db);
|
|
1869
|
+
const repoId = getRepositoryId(db, repo);
|
|
1870
|
+
if (!repoId) return [];
|
|
1871
|
+
const rows = db.prepare(
|
|
1872
|
+
`SELECT path, language, size_bytes, content_hash, updated_at
|
|
1873
|
+
FROM code_files
|
|
1874
|
+
WHERE repo_id = ?`
|
|
1875
|
+
).all(repoId);
|
|
1876
|
+
return rows.map((row) => ({
|
|
1877
|
+
repo,
|
|
1878
|
+
path: row.path,
|
|
1879
|
+
language: row.language ?? void 0,
|
|
1880
|
+
sizeBytes: row.size_bytes,
|
|
1881
|
+
contentHash: row.content_hash,
|
|
1882
|
+
updatedAt: row.updated_at
|
|
1883
|
+
}));
|
|
1884
|
+
}
|
|
1885
|
+
function parseJsonArray3(value) {
|
|
1886
|
+
try {
|
|
1887
|
+
const parsed = JSON.parse(value);
|
|
1888
|
+
return Array.isArray(parsed) ? parsed.filter((item) => typeof item === "string") : [];
|
|
1889
|
+
} catch {
|
|
1890
|
+
return [];
|
|
1891
|
+
}
|
|
1892
|
+
}
|
|
1893
|
+
function getRepoCodeChunkSymbols(db, repo) {
|
|
1894
|
+
initializeSchema(db);
|
|
1895
|
+
const repoId = getRepositoryId(db, repo);
|
|
1896
|
+
if (!repoId) return [];
|
|
1897
|
+
const rows = db.prepare(
|
|
1898
|
+
`SELECT id, file_path, language, start_line, end_line, symbols_json, content_hash, updated_at
|
|
1899
|
+
FROM code_chunks
|
|
1900
|
+
WHERE repo_id = ?`
|
|
1901
|
+
).all(repoId);
|
|
1902
|
+
return rows.map((row) => ({
|
|
1903
|
+
id: row.id,
|
|
1904
|
+
repo,
|
|
1905
|
+
filePath: row.file_path,
|
|
1906
|
+
language: row.language ?? void 0,
|
|
1907
|
+
startLine: row.start_line,
|
|
1908
|
+
endLine: row.end_line,
|
|
1909
|
+
sanitizedText: "",
|
|
1910
|
+
symbols: parseJsonArray3(row.symbols_json),
|
|
1911
|
+
contentHash: row.content_hash,
|
|
1912
|
+
updatedAt: row.updated_at
|
|
1913
|
+
}));
|
|
1914
|
+
}
|
|
1915
|
+
function getRepoTestChunks(db, repo) {
|
|
1916
|
+
initializeSchema(db);
|
|
1917
|
+
const repoId = getRepositoryId(db, repo);
|
|
1918
|
+
if (!repoId) return [];
|
|
1919
|
+
const rows = db.prepare(
|
|
1920
|
+
`SELECT id, file_path, language, start_line, end_line, sanitized_text, symbols_json, content_hash, updated_at
|
|
1921
|
+
FROM code_chunks
|
|
1922
|
+
WHERE repo_id = ? AND file_path IN (
|
|
1923
|
+
SELECT path FROM test_files WHERE repo_id = ?
|
|
1924
|
+
)`
|
|
1925
|
+
).all(repoId, repoId);
|
|
1926
|
+
return rows.map((row) => ({
|
|
1927
|
+
id: row.id,
|
|
1928
|
+
repo,
|
|
1929
|
+
filePath: row.file_path,
|
|
1930
|
+
language: row.language ?? void 0,
|
|
1931
|
+
startLine: row.start_line,
|
|
1932
|
+
endLine: row.end_line,
|
|
1933
|
+
sanitizedText: row.sanitized_text,
|
|
1934
|
+
symbols: parseJsonArray3(row.symbols_json),
|
|
1935
|
+
contentHash: row.content_hash,
|
|
1936
|
+
updatedAt: row.updated_at
|
|
1937
|
+
}));
|
|
1938
|
+
}
|
|
1939
|
+
function getRepoCodeImports(db, repo) {
|
|
1940
|
+
initializeSchema(db);
|
|
1941
|
+
const repoId = getRepositoryId(db, repo);
|
|
1942
|
+
if (!repoId) return [];
|
|
1943
|
+
const rows = db.prepare(
|
|
1944
|
+
`SELECT source_path, specifier, imported_path, imported_symbols_json, kind
|
|
1945
|
+
FROM code_imports
|
|
1946
|
+
WHERE repo_id = ?`
|
|
1947
|
+
).all(repoId);
|
|
1948
|
+
return rows.map((row) => ({
|
|
1949
|
+
repo,
|
|
1950
|
+
sourcePath: row.source_path,
|
|
1951
|
+
specifier: row.specifier,
|
|
1952
|
+
importedPath: row.imported_path ?? void 0,
|
|
1953
|
+
importedSymbols: parseJsonArray3(row.imported_symbols_json),
|
|
1954
|
+
kind: row.kind
|
|
1955
|
+
}));
|
|
1956
|
+
}
|
|
1957
|
+
function getRepoCodeCounts(db, repo) {
|
|
1958
|
+
initializeSchema(db);
|
|
1959
|
+
const repoId = getRepositoryId(db, repo);
|
|
1960
|
+
if (!repoId) return { files: 0, chunks: 0 };
|
|
1961
|
+
const files = db.prepare("SELECT COUNT(*) AS count FROM code_files WHERE repo_id = ?").get(repoId).count;
|
|
1962
|
+
const chunks = db.prepare("SELECT COUNT(*) AS count FROM code_chunks WHERE repo_id = ?").get(repoId).count;
|
|
1963
|
+
return { files, chunks };
|
|
1964
|
+
}
|
|
1965
|
+
function touchCodeIndexState(db, repo, skippedFiles, currentCommit2) {
|
|
1966
|
+
initializeSchema(db);
|
|
1967
|
+
const counts = getRepoCodeCounts(db, repo);
|
|
1968
|
+
const now = (/* @__PURE__ */ new Date()).toISOString();
|
|
1969
|
+
db.prepare(
|
|
1970
|
+
`INSERT INTO code_index_state
|
|
1971
|
+
(repo, last_indexed_at, indexed_files, code_chunks, skipped_files, last_indexed_commit)
|
|
1972
|
+
VALUES (?, ?, ?, ?, ?, ?)
|
|
1973
|
+
ON CONFLICT(repo) DO UPDATE SET
|
|
1974
|
+
last_indexed_at = excluded.last_indexed_at,
|
|
1975
|
+
indexed_files = excluded.indexed_files,
|
|
1976
|
+
code_chunks = excluded.code_chunks,
|
|
1977
|
+
skipped_files = excluded.skipped_files,
|
|
1978
|
+
last_indexed_commit = excluded.last_indexed_commit`
|
|
1979
|
+
).run(repo, now, counts.files, counts.chunks, skippedFiles, currentCommit2 ?? null);
|
|
1980
|
+
return counts;
|
|
1981
|
+
}
|
|
1807
1982
|
function updateSyncState(db, repo, lastIndexedPr, metadata = {}) {
|
|
1808
1983
|
const now = (/* @__PURE__ */ new Date()).toISOString();
|
|
1809
1984
|
db.prepare(
|
|
@@ -1905,9 +2080,12 @@ function clearGraphQLFetchCheckpoint(db, repo, scope) {
|
|
|
1905
2080
|
).run((/* @__PURE__ */ new Date()).toISOString(), repo);
|
|
1906
2081
|
}
|
|
1907
2082
|
function deleteExistingPrData(db, prId) {
|
|
1908
|
-
db.prepare(
|
|
1909
|
-
|
|
1910
|
-
|
|
2083
|
+
const wisdomRowIds = db.prepare("SELECT rowid FROM wisdom_units WHERE pr_id = ?").all(prId);
|
|
2084
|
+
deleteFtsRowsByRowId(
|
|
2085
|
+
db,
|
|
2086
|
+
"wisdom_units_fts",
|
|
2087
|
+
wisdomRowIds.map((row) => row.rowid)
|
|
2088
|
+
);
|
|
1911
2089
|
db.prepare("DELETE FROM regression_events WHERE pr_id = ?").run(prId);
|
|
1912
2090
|
db.prepare("DELETE FROM wisdom_units WHERE pr_id = ?").run(prId);
|
|
1913
2091
|
db.prepare("DELETE FROM pr_comments WHERE pr_id = ?").run(prId);
|
|
@@ -2019,11 +2197,11 @@ function upsertPullRequest(db, pr, wisdomUnits, regressionEvents = []) {
|
|
|
2019
2197
|
);
|
|
2020
2198
|
const insertFts = db.prepare(
|
|
2021
2199
|
`INSERT INTO wisdom_units_fts
|
|
2022
|
-
(unitId, sanitizedText, filePaths, symbols, prTitle, prBody, category)
|
|
2023
|
-
VALUES (?, ?, ?, ?, ?, ?, ?)`
|
|
2200
|
+
(rowid, unitId, sanitizedText, filePaths, symbols, prTitle, prBody, category)
|
|
2201
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?)`
|
|
2024
2202
|
);
|
|
2025
2203
|
for (const unit of wisdomUnits) {
|
|
2026
|
-
insertWisdom.run(
|
|
2204
|
+
const wisdomInsert = insertWisdom.run(
|
|
2027
2205
|
unit.id,
|
|
2028
2206
|
repoId,
|
|
2029
2207
|
prRow.id,
|
|
@@ -2042,6 +2220,7 @@ function upsertPullRequest(db, pr, wisdomUnits, regressionEvents = []) {
|
|
|
2042
2220
|
unit.confidence
|
|
2043
2221
|
);
|
|
2044
2222
|
insertFts.run(
|
|
2223
|
+
Number(wisdomInsert.lastInsertRowid),
|
|
2045
2224
|
unit.id,
|
|
2046
2225
|
unit.sanitizedText,
|
|
2047
2226
|
unit.filePaths.join(" "),
|
|
@@ -2092,28 +2271,88 @@ function replaceCodeIndex(db, repo, codeFiles, codeChunks, skippedFiles, cwd, ar
|
|
|
2092
2271
|
initializeSchema(db);
|
|
2093
2272
|
const repoId = ensureRepository(db, repo);
|
|
2094
2273
|
const now = (/* @__PURE__ */ new Date()).toISOString();
|
|
2095
|
-
|
|
2096
|
-
const
|
|
2274
|
+
const deletedPaths = options.deletedPaths ?? [];
|
|
2275
|
+
const changedImports = options.changedImports;
|
|
2276
|
+
const testAwareness = options.testAwareness ?? inferTestAwareness(repo, codeFiles, codeChunks, {
|
|
2097
2277
|
onProgress: options.onProgress
|
|
2098
2278
|
});
|
|
2099
2279
|
options.onProgress?.({ stage: "writing_code_index", repo, phase: "Writing code index" });
|
|
2280
|
+
const changedPaths = [...new Set(codeFiles.map((file) => file.path))];
|
|
2281
|
+
const affectedPaths = [.../* @__PURE__ */ new Set([...changedPaths, ...deletedPaths])];
|
|
2100
2282
|
const transaction = db.transaction(() => {
|
|
2101
|
-
|
|
2102
|
-
|
|
2283
|
+
let existingChunkRowIds = [];
|
|
2284
|
+
if (affectedPaths.length > 0) {
|
|
2285
|
+
const placeholders = affectedPaths.map(() => "?").join(", ");
|
|
2286
|
+
existingChunkRowIds = db.prepare(
|
|
2287
|
+
`SELECT rowid
|
|
2288
|
+
FROM code_chunks
|
|
2289
|
+
WHERE repo_id = ? AND file_path IN (${placeholders})`
|
|
2290
|
+
).all(repoId, ...affectedPaths);
|
|
2291
|
+
}
|
|
2292
|
+
const existingPatternRowIds = db.prepare("SELECT rowid FROM architecture_patterns WHERE repo_id = ?").all(repoId);
|
|
2103
2293
|
options.onProgress?.({
|
|
2104
2294
|
stage: "deleting_existing_code_index",
|
|
2105
2295
|
repo,
|
|
2106
|
-
chunks:
|
|
2107
|
-
patterns:
|
|
2296
|
+
chunks: existingChunkRowIds.length,
|
|
2297
|
+
patterns: existingPatternRowIds.length
|
|
2108
2298
|
});
|
|
2109
|
-
|
|
2110
|
-
|
|
2111
|
-
|
|
2112
|
-
|
|
2113
|
-
|
|
2114
|
-
|
|
2115
|
-
|
|
2116
|
-
|
|
2299
|
+
deleteFtsRowsByRowId(
|
|
2300
|
+
db,
|
|
2301
|
+
"code_chunks_fts",
|
|
2302
|
+
existingChunkRowIds.map((row) => row.rowid),
|
|
2303
|
+
(current, total) => options.onProgress?.({
|
|
2304
|
+
stage: "deleting_code_fts",
|
|
2305
|
+
repo,
|
|
2306
|
+
current,
|
|
2307
|
+
total,
|
|
2308
|
+
chunks: existingChunkRowIds.length
|
|
2309
|
+
})
|
|
2310
|
+
);
|
|
2311
|
+
if (affectedPaths.length > 0) {
|
|
2312
|
+
const placeholders = affectedPaths.map(() => "?").join(", ");
|
|
2313
|
+
db.prepare(
|
|
2314
|
+
`DELETE FROM code_chunks
|
|
2315
|
+
WHERE repo_id = ? AND file_path IN (${placeholders})`
|
|
2316
|
+
).run(repoId, ...affectedPaths);
|
|
2317
|
+
db.prepare(
|
|
2318
|
+
`DELETE FROM code_files
|
|
2319
|
+
WHERE repo_id = ? AND path IN (${placeholders})`
|
|
2320
|
+
).run(repoId, ...affectedPaths);
|
|
2321
|
+
db.prepare(
|
|
2322
|
+
`DELETE FROM test_links
|
|
2323
|
+
WHERE repo_id = ?
|
|
2324
|
+
AND reason != 'PR co-change'
|
|
2325
|
+
AND (source_path IN (${placeholders}) OR test_path IN (${placeholders}))`
|
|
2326
|
+
).run(repoId, ...affectedPaths, ...affectedPaths);
|
|
2327
|
+
db.prepare(
|
|
2328
|
+
`DELETE FROM test_files
|
|
2329
|
+
WHERE repo_id = ? AND path IN (${placeholders})`
|
|
2330
|
+
).run(repoId, ...affectedPaths);
|
|
2331
|
+
if (changedImports) {
|
|
2332
|
+
db.prepare(
|
|
2333
|
+
`DELETE FROM code_imports
|
|
2334
|
+
WHERE repo_id = ? AND source_path IN (${placeholders})`
|
|
2335
|
+
).run(repoId, ...affectedPaths);
|
|
2336
|
+
}
|
|
2337
|
+
}
|
|
2338
|
+
deleteExistingArchitectureData(db, repoId, repo, existingPatternRowIds, options);
|
|
2339
|
+
if (changedImports) {
|
|
2340
|
+
const insertImport = db.prepare(
|
|
2341
|
+
`INSERT INTO code_imports
|
|
2342
|
+
(repo_id, source_path, specifier, imported_path, imported_symbols_json, kind)
|
|
2343
|
+
VALUES (?, ?, ?, ?, ?, ?)`
|
|
2344
|
+
);
|
|
2345
|
+
for (const item of changedImports) {
|
|
2346
|
+
insertImport.run(
|
|
2347
|
+
repoId,
|
|
2348
|
+
item.sourcePath,
|
|
2349
|
+
item.specifier,
|
|
2350
|
+
item.importedPath ?? null,
|
|
2351
|
+
JSON.stringify(item.importedSymbols),
|
|
2352
|
+
item.kind
|
|
2353
|
+
);
|
|
2354
|
+
}
|
|
2355
|
+
}
|
|
2117
2356
|
const insertFile = db.prepare(
|
|
2118
2357
|
`INSERT INTO code_files
|
|
2119
2358
|
(repo_id, path, language, size_bytes, content_hash, updated_at)
|
|
@@ -2155,8 +2394,8 @@ function replaceCodeIndex(db, repo, codeFiles, codeChunks, skippedFiles, cwd, ar
|
|
|
2155
2394
|
);
|
|
2156
2395
|
const insertFts = db.prepare(
|
|
2157
2396
|
`INSERT INTO code_chunks_fts
|
|
2158
|
-
(chunkId, sanitizedText, filePath, symbols, language)
|
|
2159
|
-
VALUES (?, ?, ?, ?, ?)`
|
|
2397
|
+
(rowid, chunkId, sanitizedText, filePath, symbols, language)
|
|
2398
|
+
VALUES (?, ?, ?, ?, ?, ?)`
|
|
2160
2399
|
);
|
|
2161
2400
|
options.onProgress?.({
|
|
2162
2401
|
stage: "writing_code_chunks",
|
|
@@ -2169,7 +2408,7 @@ function replaceCodeIndex(db, repo, codeFiles, codeChunks, skippedFiles, cwd, ar
|
|
|
2169
2408
|
for (const [index, chunk] of codeChunks.entries()) {
|
|
2170
2409
|
const fileId = fileIds.get(chunk.filePath);
|
|
2171
2410
|
if (!fileId) continue;
|
|
2172
|
-
insertChunk.run(
|
|
2411
|
+
const chunkInsert = insertChunk.run(
|
|
2173
2412
|
chunk.id,
|
|
2174
2413
|
repoId,
|
|
2175
2414
|
fileId,
|
|
@@ -2184,6 +2423,7 @@ function replaceCodeIndex(db, repo, codeFiles, codeChunks, skippedFiles, cwd, ar
|
|
|
2184
2423
|
chunk.updatedAt
|
|
2185
2424
|
);
|
|
2186
2425
|
insertFts.run(
|
|
2426
|
+
Number(chunkInsert.lastInsertRowid),
|
|
2187
2427
|
chunk.id,
|
|
2188
2428
|
chunk.sanitizedText,
|
|
2189
2429
|
chunk.filePath,
|
|
@@ -2204,18 +2444,22 @@ function replaceCodeIndex(db, repo, codeFiles, codeChunks, skippedFiles, cwd, ar
|
|
|
2204
2444
|
}
|
|
2205
2445
|
}
|
|
2206
2446
|
insertTestAwareness(db, repoId, repo, testAwareness.testFiles, testAwareness.testLinks, options);
|
|
2207
|
-
insertArchitectureData(db, repoId, repo, architecture, options);
|
|
2447
|
+
insertArchitectureData(db, repoId, repo, architecture, options, !changedImports);
|
|
2208
2448
|
insertArchitectureMapEdges(db, repoId, repo, architecture, testAwareness.testLinks, options);
|
|
2209
2449
|
options.onProgress?.({ stage: "writing_code_index", repo, phase: "Updating index state" });
|
|
2450
|
+
const totalFileCount = db.prepare("SELECT COUNT(*) AS count FROM code_files WHERE repo_id = ?").get(repoId).count;
|
|
2451
|
+
const totalChunkCount = db.prepare("SELECT COUNT(*) AS count FROM code_chunks WHERE repo_id = ?").get(repoId).count;
|
|
2210
2452
|
db.prepare(
|
|
2211
|
-
`INSERT INTO code_index_state
|
|
2212
|
-
|
|
2453
|
+
`INSERT INTO code_index_state
|
|
2454
|
+
(repo, last_indexed_at, indexed_files, code_chunks, skipped_files, last_indexed_commit)
|
|
2455
|
+
VALUES (?, ?, ?, ?, ?, ?)
|
|
2213
2456
|
ON CONFLICT(repo) DO UPDATE SET
|
|
2214
2457
|
last_indexed_at = excluded.last_indexed_at,
|
|
2215
2458
|
indexed_files = excluded.indexed_files,
|
|
2216
2459
|
code_chunks = excluded.code_chunks,
|
|
2217
|
-
skipped_files = excluded.skipped_files
|
|
2218
|
-
|
|
2460
|
+
skipped_files = excluded.skipped_files,
|
|
2461
|
+
last_indexed_commit = excluded.last_indexed_commit`
|
|
2462
|
+
).run(repo, now, totalFileCount, totalChunkCount, skippedFiles, options.currentCommit ?? null);
|
|
2219
2463
|
db.prepare(
|
|
2220
2464
|
`INSERT INTO architecture_index_state (repo, last_indexed_at, components, patterns, imports)
|
|
2221
2465
|
VALUES (?, ?, ?, ?, ?)
|
|
@@ -2233,9 +2477,10 @@ function replaceCodeIndex(db, repo, codeFiles, codeChunks, skippedFiles, cwd, ar
|
|
|
2233
2477
|
);
|
|
2234
2478
|
});
|
|
2235
2479
|
transaction();
|
|
2480
|
+
const counts = getRepoCodeCounts(db, repo);
|
|
2236
2481
|
return {
|
|
2237
|
-
indexedFiles:
|
|
2238
|
-
codeChunksCreated:
|
|
2482
|
+
indexedFiles: counts.files,
|
|
2483
|
+
codeChunksCreated: counts.chunks,
|
|
2239
2484
|
testFilesIndexed: testAwareness.testFiles.length,
|
|
2240
2485
|
testLinksCreated: testAwareness.testLinks.length,
|
|
2241
2486
|
architectureComponentsIndexed: architecture.components.length,
|
|
@@ -2245,46 +2490,56 @@ function replaceCodeIndex(db, repo, codeFiles, codeChunks, skippedFiles, cwd, ar
|
|
|
2245
2490
|
databasePath: defaultDatabasePath(cwd)
|
|
2246
2491
|
};
|
|
2247
2492
|
}
|
|
2248
|
-
function deleteExistingArchitectureData(db, repoId) {
|
|
2249
|
-
|
|
2250
|
-
|
|
2251
|
-
|
|
2493
|
+
function deleteExistingArchitectureData(db, repoId, repo, patternRowIds, options = {}) {
|
|
2494
|
+
deleteFtsRowsByRowId(
|
|
2495
|
+
db,
|
|
2496
|
+
"architecture_patterns_fts",
|
|
2497
|
+
patternRowIds.map((row) => row.rowid),
|
|
2498
|
+
(current, total) => options.onProgress?.({
|
|
2499
|
+
stage: "deleting_architecture_fts",
|
|
2500
|
+
repo,
|
|
2501
|
+
current,
|
|
2502
|
+
total,
|
|
2503
|
+
patterns: patternRowIds.length
|
|
2504
|
+
})
|
|
2505
|
+
);
|
|
2252
2506
|
db.prepare("DELETE FROM architecture_patterns WHERE repo_id = ?").run(repoId);
|
|
2253
2507
|
db.prepare("DELETE FROM architecture_components WHERE repo_id = ?").run(repoId);
|
|
2254
|
-
db.prepare("DELETE FROM code_imports WHERE repo_id = ?").run(repoId);
|
|
2255
2508
|
db.prepare("DELETE FROM architecture_map_edges WHERE repo_id = ?").run(repoId);
|
|
2256
2509
|
}
|
|
2257
|
-
function insertArchitectureData(db, repoId, repo, architecture, options = {}) {
|
|
2258
|
-
|
|
2259
|
-
|
|
2260
|
-
|
|
2261
|
-
|
|
2262
|
-
|
|
2263
|
-
options.onProgress?.({
|
|
2264
|
-
stage: "writing_architecture_data",
|
|
2265
|
-
repo,
|
|
2266
|
-
current: 0,
|
|
2267
|
-
total: architecture.imports.length,
|
|
2268
|
-
kind: "imports"
|
|
2269
|
-
});
|
|
2270
|
-
for (const [index, item] of architecture.imports.entries()) {
|
|
2271
|
-
insertImport.run(
|
|
2272
|
-
repoId,
|
|
2273
|
-
item.sourcePath,
|
|
2274
|
-
item.specifier,
|
|
2275
|
-
item.importedPath ?? null,
|
|
2276
|
-
JSON.stringify(item.importedSymbols),
|
|
2277
|
-
item.kind
|
|
2510
|
+
function insertArchitectureData(db, repoId, repo, architecture, options = {}, includeImports = true) {
|
|
2511
|
+
if (includeImports) {
|
|
2512
|
+
const insertImport = db.prepare(
|
|
2513
|
+
`INSERT INTO code_imports
|
|
2514
|
+
(repo_id, source_path, specifier, imported_path, imported_symbols_json, kind)
|
|
2515
|
+
VALUES (?, ?, ?, ?, ?, ?)`
|
|
2278
2516
|
);
|
|
2279
|
-
|
|
2280
|
-
|
|
2281
|
-
|
|
2282
|
-
|
|
2283
|
-
|
|
2284
|
-
|
|
2285
|
-
|
|
2286
|
-
|
|
2287
|
-
|
|
2517
|
+
options.onProgress?.({
|
|
2518
|
+
stage: "writing_architecture_data",
|
|
2519
|
+
repo,
|
|
2520
|
+
current: 0,
|
|
2521
|
+
total: architecture.imports.length,
|
|
2522
|
+
kind: "imports"
|
|
2523
|
+
});
|
|
2524
|
+
for (const [index, item] of architecture.imports.entries()) {
|
|
2525
|
+
insertImport.run(
|
|
2526
|
+
repoId,
|
|
2527
|
+
item.sourcePath,
|
|
2528
|
+
item.specifier,
|
|
2529
|
+
item.importedPath ?? null,
|
|
2530
|
+
JSON.stringify(item.importedSymbols),
|
|
2531
|
+
item.kind
|
|
2532
|
+
);
|
|
2533
|
+
const current = index + 1;
|
|
2534
|
+
if (shouldEmitCodeWriteProgress(current, architecture.imports.length)) {
|
|
2535
|
+
options.onProgress?.({
|
|
2536
|
+
stage: "writing_architecture_data",
|
|
2537
|
+
repo,
|
|
2538
|
+
current,
|
|
2539
|
+
total: architecture.imports.length,
|
|
2540
|
+
kind: "imports"
|
|
2541
|
+
});
|
|
2542
|
+
}
|
|
2288
2543
|
}
|
|
2289
2544
|
}
|
|
2290
2545
|
const insertComponent = db.prepare(
|
|
@@ -2331,8 +2586,8 @@ function insertArchitectureData(db, repoId, repo, architecture, options = {}) {
|
|
|
2331
2586
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`
|
|
2332
2587
|
);
|
|
2333
2588
|
const insertFts = db.prepare(
|
|
2334
|
-
`INSERT INTO architecture_patterns_fts (patternId, summary, area, sourceFiles, symbols)
|
|
2335
|
-
VALUES (?, ?, ?, ?, ?)`
|
|
2589
|
+
`INSERT INTO architecture_patterns_fts (rowid, patternId, summary, area, sourceFiles, symbols)
|
|
2590
|
+
VALUES (?, ?, ?, ?, ?, ?)`
|
|
2336
2591
|
);
|
|
2337
2592
|
options.onProgress?.({
|
|
2338
2593
|
stage: "writing_architecture_data",
|
|
@@ -2342,7 +2597,7 @@ function insertArchitectureData(db, repoId, repo, architecture, options = {}) {
|
|
|
2342
2597
|
kind: "patterns"
|
|
2343
2598
|
});
|
|
2344
2599
|
for (const [index, pattern] of architecture.patterns.entries()) {
|
|
2345
|
-
insertPattern.run(
|
|
2600
|
+
const patternInsert = insertPattern.run(
|
|
2346
2601
|
pattern.id,
|
|
2347
2602
|
repoId,
|
|
2348
2603
|
pattern.repo,
|
|
@@ -2356,6 +2611,7 @@ function insertArchitectureData(db, repoId, repo, architecture, options = {}) {
|
|
|
2356
2611
|
pattern.createdAt
|
|
2357
2612
|
);
|
|
2358
2613
|
insertFts.run(
|
|
2614
|
+
Number(patternInsert.lastInsertRowid),
|
|
2359
2615
|
pattern.id,
|
|
2360
2616
|
pattern.sanitizedSummary,
|
|
2361
2617
|
pattern.area,
|
|
@@ -3014,18 +3270,7 @@ function createPattern(input) {
|
|
|
3014
3270
|
};
|
|
3015
3271
|
}
|
|
3016
3272
|
function buildArchitectureIndex(repo, files, chunks, options = {}) {
|
|
3017
|
-
const
|
|
3018
|
-
const codePaths = new Set(allPaths);
|
|
3019
|
-
const relatedTestIndex = buildRelatedTestIndex(allPaths);
|
|
3020
|
-
const symbolSetsByPath = /* @__PURE__ */ new Map();
|
|
3021
|
-
for (const chunk of chunks) {
|
|
3022
|
-
const existing = symbolSetsByPath.get(chunk.filePath) ?? /* @__PURE__ */ new Set();
|
|
3023
|
-
for (const symbol of chunk.symbols) {
|
|
3024
|
-
if (existing.size >= 40) break;
|
|
3025
|
-
existing.add(symbol);
|
|
3026
|
-
}
|
|
3027
|
-
symbolSetsByPath.set(chunk.filePath, existing);
|
|
3028
|
-
}
|
|
3273
|
+
const codePaths = new Set(files.map((file) => file.path));
|
|
3029
3274
|
const imports = [];
|
|
3030
3275
|
options.onProgress?.({
|
|
3031
3276
|
stage: "building_architecture_imports",
|
|
@@ -3048,6 +3293,20 @@ function buildArchitectureIndex(repo, files, chunks, options = {}) {
|
|
|
3048
3293
|
});
|
|
3049
3294
|
}
|
|
3050
3295
|
}
|
|
3296
|
+
return buildArchitectureFromIndexedData(repo, files, chunks, imports, options);
|
|
3297
|
+
}
|
|
3298
|
+
function buildArchitectureFromIndexedData(repo, files, chunks, imports, options = {}) {
|
|
3299
|
+
const allPaths = files.map((file) => file.path);
|
|
3300
|
+
const relatedTestIndex = buildRelatedTestIndex(allPaths);
|
|
3301
|
+
const symbolSetsByPath = /* @__PURE__ */ new Map();
|
|
3302
|
+
for (const chunk of chunks) {
|
|
3303
|
+
const existing = symbolSetsByPath.get(chunk.filePath) ?? /* @__PURE__ */ new Set();
|
|
3304
|
+
for (const symbol of chunk.symbols) {
|
|
3305
|
+
if (existing.size >= 40) break;
|
|
3306
|
+
existing.add(symbol);
|
|
3307
|
+
}
|
|
3308
|
+
symbolSetsByPath.set(chunk.filePath, existing);
|
|
3309
|
+
}
|
|
3051
3310
|
const importsByPath = /* @__PURE__ */ new Map();
|
|
3052
3311
|
for (const item of imports) {
|
|
3053
3312
|
const existing = importsByPath.get(item.sourcePath) ?? [];
|
|
@@ -3063,7 +3322,7 @@ function buildArchitectureIndex(repo, files, chunks, options = {}) {
|
|
|
3063
3322
|
components: 0
|
|
3064
3323
|
});
|
|
3065
3324
|
for (const [index, file] of files.entries()) {
|
|
3066
|
-
const area = classifyArchitectureArea(file.path, file.language
|
|
3325
|
+
const area = classifyArchitectureArea(file.path, file.language);
|
|
3067
3326
|
const fileImports = importsByPath.get(file.path) ?? [];
|
|
3068
3327
|
const symbols = [...symbolSetsByPath.get(file.path) ?? []];
|
|
3069
3328
|
components.push({
|
|
@@ -3277,15 +3536,134 @@ function discoverGitFiles(cwd) {
|
|
|
3277
3536
|
});
|
|
3278
3537
|
return output.split("\n").map((line) => normalizeGitPath(line.trim())).filter(Boolean);
|
|
3279
3538
|
}
|
|
3539
|
+
function discoverGitUntrackedFiles(cwd) {
|
|
3540
|
+
const output = execFileSync3("git", ["ls-files", "--others", "--exclude-standard"], {
|
|
3541
|
+
cwd,
|
|
3542
|
+
encoding: "utf8",
|
|
3543
|
+
stdio: ["ignore", "pipe", "pipe"]
|
|
3544
|
+
});
|
|
3545
|
+
return output.split("\n").map((line) => normalizeGitPath(line.trim())).filter(Boolean);
|
|
3546
|
+
}
|
|
3547
|
+
function execGitLines(cwd, args) {
|
|
3548
|
+
const output = execFileSync3("git", args, {
|
|
3549
|
+
cwd,
|
|
3550
|
+
encoding: "utf8",
|
|
3551
|
+
stdio: ["ignore", "pipe", "ignore"]
|
|
3552
|
+
});
|
|
3553
|
+
return output.split("\n").map((line) => line.trimEnd()).filter(Boolean);
|
|
3554
|
+
}
|
|
3555
|
+
function readGitHeadCommit(cwd) {
|
|
3556
|
+
try {
|
|
3557
|
+
return execFileSync3("git", ["rev-parse", "HEAD"], {
|
|
3558
|
+
cwd,
|
|
3559
|
+
encoding: "utf8",
|
|
3560
|
+
stdio: ["ignore", "pipe", "ignore"]
|
|
3561
|
+
}).trim();
|
|
3562
|
+
} catch {
|
|
3563
|
+
return void 0;
|
|
3564
|
+
}
|
|
3565
|
+
}
|
|
3566
|
+
function hasDirtyWorkingTree(cwd) {
|
|
3567
|
+
try {
|
|
3568
|
+
const status = execFileSync3("git", ["status", "--porcelain"], {
|
|
3569
|
+
cwd,
|
|
3570
|
+
encoding: "utf8",
|
|
3571
|
+
stdio: ["ignore", "pipe", "ignore"]
|
|
3572
|
+
});
|
|
3573
|
+
return status.trim().length > 0;
|
|
3574
|
+
} catch {
|
|
3575
|
+
return true;
|
|
3576
|
+
}
|
|
3577
|
+
}
|
|
3578
|
+
function parseNameStatusLine(line) {
|
|
3579
|
+
const parts = line.split(" ").map((item) => normalizeGitPath(item));
|
|
3580
|
+
if (parts.length < 2) return void 0;
|
|
3581
|
+
const status = parts[0] ?? "";
|
|
3582
|
+
if (!status) return void 0;
|
|
3583
|
+
if (status.startsWith("R") || status.startsWith("C")) {
|
|
3584
|
+
return { status, previousPath: parts[1], path: parts[2] };
|
|
3585
|
+
}
|
|
3586
|
+
return { status, path: parts[1] };
|
|
3587
|
+
}
|
|
3588
|
+
function planIncrementalCodeIndex(cwd, lastIndexedCommit, existingIndexedPaths) {
|
|
3589
|
+
const currentCommit2 = readGitHeadCommit(cwd);
|
|
3590
|
+
const trackedPaths = discoverGitFiles(cwd);
|
|
3591
|
+
const trackedSet = new Set(trackedPaths);
|
|
3592
|
+
const deletedPaths = /* @__PURE__ */ new Set();
|
|
3593
|
+
const changedPaths = /* @__PURE__ */ new Set();
|
|
3594
|
+
const dirtyWorkingTree = hasDirtyWorkingTree(cwd);
|
|
3595
|
+
if (!lastIndexedCommit) {
|
|
3596
|
+
return {
|
|
3597
|
+
currentCommit: currentCommit2,
|
|
3598
|
+
trackedPaths,
|
|
3599
|
+
changedPaths: trackedPaths,
|
|
3600
|
+
deletedPaths: [...existingIndexedPaths].filter((filePath) => !trackedSet.has(filePath)),
|
|
3601
|
+
dirtyWorkingTree,
|
|
3602
|
+
fallbackToFullHashCompare: true,
|
|
3603
|
+
reason: "No previous commit snapshot; using full hash comparison."
|
|
3604
|
+
};
|
|
3605
|
+
}
|
|
3606
|
+
if (dirtyWorkingTree) {
|
|
3607
|
+
return {
|
|
3608
|
+
currentCommit: currentCommit2,
|
|
3609
|
+
trackedPaths,
|
|
3610
|
+
changedPaths: trackedPaths,
|
|
3611
|
+
deletedPaths: [...existingIndexedPaths].filter((filePath) => !trackedSet.has(filePath)),
|
|
3612
|
+
dirtyWorkingTree,
|
|
3613
|
+
fallbackToFullHashCompare: true,
|
|
3614
|
+
reason: "Working tree is dirty; using full hash comparison for deterministic results."
|
|
3615
|
+
};
|
|
3616
|
+
}
|
|
3617
|
+
try {
|
|
3618
|
+
const lines = execGitLines(cwd, ["diff", "--name-status", `${lastIndexedCommit}..HEAD`]);
|
|
3619
|
+
for (const line of lines) {
|
|
3620
|
+
const parsed = parseNameStatusLine(line);
|
|
3621
|
+
if (!parsed?.path) continue;
|
|
3622
|
+
const statusCode = parsed.status[0];
|
|
3623
|
+
const normalizedPath = normalizeGitPath(parsed.path);
|
|
3624
|
+
if (statusCode === "D") {
|
|
3625
|
+
deletedPaths.add(normalizedPath);
|
|
3626
|
+
continue;
|
|
3627
|
+
}
|
|
3628
|
+
if (trackedSet.has(normalizedPath)) changedPaths.add(normalizedPath);
|
|
3629
|
+
}
|
|
3630
|
+
for (const untrackedPath of discoverGitUntrackedFiles(cwd)) {
|
|
3631
|
+
if (trackedSet.has(untrackedPath)) changedPaths.add(untrackedPath);
|
|
3632
|
+
}
|
|
3633
|
+
for (const existingPath of existingIndexedPaths) {
|
|
3634
|
+
if (!trackedSet.has(existingPath)) deletedPaths.add(existingPath);
|
|
3635
|
+
}
|
|
3636
|
+
return {
|
|
3637
|
+
currentCommit: currentCommit2,
|
|
3638
|
+
trackedPaths,
|
|
3639
|
+
changedPaths: [...changedPaths],
|
|
3640
|
+
deletedPaths: [...deletedPaths],
|
|
3641
|
+
dirtyWorkingTree: false,
|
|
3642
|
+
fallbackToFullHashCompare: false,
|
|
3643
|
+
reason: "Using git diff and untracked files against last indexed commit."
|
|
3644
|
+
};
|
|
3645
|
+
} catch {
|
|
3646
|
+
return {
|
|
3647
|
+
currentCommit: currentCommit2,
|
|
3648
|
+
trackedPaths,
|
|
3649
|
+
changedPaths: trackedPaths,
|
|
3650
|
+
deletedPaths: [...existingIndexedPaths].filter((filePath) => !trackedSet.has(filePath)),
|
|
3651
|
+
dirtyWorkingTree: true,
|
|
3652
|
+
fallbackToFullHashCompare: true,
|
|
3653
|
+
reason: "Unable to compute git diff; falling back to full hash comparison."
|
|
3654
|
+
};
|
|
3655
|
+
}
|
|
3656
|
+
}
|
|
3280
3657
|
var DISCOVERY_SCAN_INTERVAL = 200;
|
|
3281
|
-
function
|
|
3658
|
+
function discoverFromPaths(cwd, repo, inputPaths, options = {}) {
|
|
3282
3659
|
const maxFileBytes = options.maxFileBytes ?? DEFAULT_MAX_CODE_FILE_BYTES;
|
|
3660
|
+
const includeContent = options.includeContent ?? false;
|
|
3283
3661
|
const rootPath = path7.resolve(cwd);
|
|
3284
3662
|
const files = [];
|
|
3285
3663
|
let skippedFiles = 0;
|
|
3286
|
-
const
|
|
3287
|
-
const total =
|
|
3288
|
-
for (const [scanIndex, filePath] of
|
|
3664
|
+
const candidatePaths = [...new Set(inputPaths.map((value) => normalizeGitPath(value)).filter(Boolean))];
|
|
3665
|
+
const total = candidatePaths.length;
|
|
3666
|
+
for (const [scanIndex, filePath] of candidatePaths.entries()) {
|
|
3289
3667
|
const scanned = scanIndex + 1;
|
|
3290
3668
|
if (scanned % DISCOVERY_SCAN_INTERVAL === 0 || scanned === total) {
|
|
3291
3669
|
options.onScan?.(scanned, total);
|
|
@@ -3316,7 +3694,6 @@ function discoverCodeFiles(cwd, repo, options = {}) {
|
|
|
3316
3694
|
skippedFiles += 1;
|
|
3317
3695
|
continue;
|
|
3318
3696
|
}
|
|
3319
|
-
const content = buffer.toString("utf8");
|
|
3320
3697
|
files.push({
|
|
3321
3698
|
repo,
|
|
3322
3699
|
path: filePath,
|
|
@@ -3325,11 +3702,21 @@ function discoverCodeFiles(cwd, repo, options = {}) {
|
|
|
3325
3702
|
contentHash: crypto3.createHash("sha256").update(buffer).digest("hex"),
|
|
3326
3703
|
updatedAt: stat.mtime.toISOString(),
|
|
3327
3704
|
absolutePath,
|
|
3328
|
-
content
|
|
3705
|
+
...includeContent ? { content: buffer.toString("utf8") } : {}
|
|
3329
3706
|
});
|
|
3330
3707
|
}
|
|
3331
3708
|
return { files, skippedFiles };
|
|
3332
3709
|
}
|
|
3710
|
+
function discoverCodeFiles(cwd, repo, options = {}) {
|
|
3711
|
+
return discoverFromPaths(cwd, repo, discoverGitFiles(cwd), options);
|
|
3712
|
+
}
|
|
3713
|
+
function discoverCodeFilesByPaths(cwd, repo, filePaths, options = {}) {
|
|
3714
|
+
return discoverFromPaths(cwd, repo, filePaths, options);
|
|
3715
|
+
}
|
|
3716
|
+
function readDiscoveredCodeFileContent(file) {
|
|
3717
|
+
if (typeof file.content === "string") return file.content;
|
|
3718
|
+
return fs4.readFileSync(file.absolutePath, "utf8");
|
|
3719
|
+
}
|
|
3333
3720
|
|
|
3334
3721
|
// src/retrieval/test-commands.ts
|
|
3335
3722
|
import crypto4 from "crypto";
|
|
@@ -3563,40 +3950,154 @@ function refreshTestCommands(db, cwd, repo, files = [], options = {}) {
|
|
|
3563
3950
|
|
|
3564
3951
|
// src/indexer/code-indexer.ts
|
|
3565
3952
|
function indexCodebase(db, options) {
|
|
3953
|
+
const state = getCodeIndexStateForRepo(db, options.repo);
|
|
3954
|
+
const existingHashes = getRepoCodeFileHashes(db, options.repo);
|
|
3955
|
+
const plan = planIncrementalCodeIndex(
|
|
3956
|
+
options.cwd,
|
|
3957
|
+
state?.lastIndexedCommit,
|
|
3958
|
+
new Set(existingHashes.keys())
|
|
3959
|
+
);
|
|
3566
3960
|
options.onProgress?.({ stage: "discovering_code_files", repo: options.repo });
|
|
3567
|
-
const discovery = discoverCodeFiles(options.cwd, options.repo, {
|
|
3961
|
+
const discovery = plan.fallbackToFullHashCompare ? discoverCodeFiles(options.cwd, options.repo, {
|
|
3962
|
+
maxFileBytes: options.maxFileBytes,
|
|
3963
|
+
onScan: (scanned, total) => options.onProgress?.({
|
|
3964
|
+
stage: "discovering_code_files",
|
|
3965
|
+
repo: options.repo,
|
|
3966
|
+
scanned,
|
|
3967
|
+
total
|
|
3968
|
+
})
|
|
3969
|
+
}) : discoverCodeFilesByPaths(options.cwd, options.repo, plan.changedPaths, {
|
|
3568
3970
|
maxFileBytes: options.maxFileBytes,
|
|
3569
|
-
onScan: (scanned, total) => options.onProgress?.({
|
|
3971
|
+
onScan: (scanned, total) => options.onProgress?.({
|
|
3972
|
+
stage: "discovering_code_files",
|
|
3973
|
+
repo: options.repo,
|
|
3974
|
+
scanned,
|
|
3975
|
+
total
|
|
3976
|
+
})
|
|
3570
3977
|
});
|
|
3978
|
+
const changedFiles = discovery.files.filter(
|
|
3979
|
+
(file) => existingHashes.get(file.path) !== file.contentHash
|
|
3980
|
+
);
|
|
3981
|
+
const discoveredPaths = new Set(discovery.files.map((file) => file.path));
|
|
3982
|
+
const deletedPaths = plan.fallbackToFullHashCompare ? [...existingHashes.keys()].filter((filePath) => !discoveredPaths.has(filePath)) : plan.deletedPaths;
|
|
3571
3983
|
options.onProgress?.({
|
|
3572
3984
|
stage: "discovered_code_files",
|
|
3573
3985
|
repo: options.repo,
|
|
3574
|
-
files:
|
|
3986
|
+
files: changedFiles.length,
|
|
3575
3987
|
skippedFiles: discovery.skippedFiles
|
|
3576
3988
|
});
|
|
3577
|
-
|
|
3578
|
-
|
|
3989
|
+
if (changedFiles.length === 0 && deletedPaths.length === 0) {
|
|
3990
|
+
const counts = touchCodeIndexState(
|
|
3991
|
+
db,
|
|
3992
|
+
options.repo,
|
|
3993
|
+
discovery.skippedFiles,
|
|
3994
|
+
plan.currentCommit
|
|
3995
|
+
);
|
|
3996
|
+
const repoId = ensureRepository(db, options.repo);
|
|
3997
|
+
const scopedCount = (table) => db.prepare(`SELECT COUNT(*) AS count FROM ${table} WHERE repo_id = ?`).get(repoId).count;
|
|
3998
|
+
const summary2 = {
|
|
3999
|
+
indexedFiles: counts.files,
|
|
4000
|
+
codeChunksCreated: counts.chunks,
|
|
4001
|
+
testFilesIndexed: scopedCount("test_files"),
|
|
4002
|
+
testLinksCreated: scopedCount("test_links"),
|
|
4003
|
+
architectureComponentsIndexed: scopedCount("architecture_components"),
|
|
4004
|
+
architecturePatternsIndexed: scopedCount("architecture_patterns"),
|
|
4005
|
+
architectureImportsIndexed: scopedCount("code_imports"),
|
|
4006
|
+
skippedFiles: discovery.skippedFiles,
|
|
4007
|
+
databasePath: defaultDatabasePath(options.cwd)
|
|
4008
|
+
};
|
|
4009
|
+
options.onProgress?.({
|
|
4010
|
+
stage: "completed_code_index",
|
|
4011
|
+
repo: options.repo,
|
|
4012
|
+
files: summary2.indexedFiles,
|
|
4013
|
+
chunks: summary2.codeChunksCreated,
|
|
4014
|
+
skippedFiles: summary2.skippedFiles,
|
|
4015
|
+
testFiles: summary2.testFilesIndexed,
|
|
4016
|
+
testLinks: summary2.testLinksCreated,
|
|
4017
|
+
architectureComponents: summary2.architectureComponentsIndexed,
|
|
4018
|
+
architecturePatterns: summary2.architecturePatternsIndexed,
|
|
4019
|
+
architectureImports: summary2.architectureImportsIndexed
|
|
4020
|
+
});
|
|
4021
|
+
return summary2;
|
|
4022
|
+
}
|
|
4023
|
+
const changedChunks = [];
|
|
4024
|
+
const changedImports = [];
|
|
4025
|
+
const projectedIndexedPaths = new Set(
|
|
4026
|
+
[...existingHashes.keys()].filter((filePath) => !deletedPaths.includes(filePath))
|
|
4027
|
+
);
|
|
4028
|
+
for (const file of changedFiles) projectedIndexedPaths.add(file.path);
|
|
4029
|
+
for (const [index, file] of changedFiles.entries()) {
|
|
3579
4030
|
options.onProgress?.({
|
|
3580
4031
|
stage: "indexing_code_file",
|
|
3581
4032
|
repo: options.repo,
|
|
3582
4033
|
current: index + 1,
|
|
3583
|
-
total:
|
|
4034
|
+
total: changedFiles.length,
|
|
3584
4035
|
filePath: file.path
|
|
3585
4036
|
});
|
|
3586
|
-
const
|
|
3587
|
-
|
|
4037
|
+
const content = readDiscoveredCodeFileContent(file);
|
|
4038
|
+
const fileWithContent = { ...file, content };
|
|
4039
|
+
const fileChunks = chunkCodeFile(fileWithContent);
|
|
4040
|
+
changedChunks.push(...fileChunks);
|
|
4041
|
+
changedImports.push(
|
|
4042
|
+
...extractCodeImports(file.path, content, projectedIndexedPaths, options.repo)
|
|
4043
|
+
);
|
|
3588
4044
|
options.onProgress?.({
|
|
3589
4045
|
stage: "indexed_code_file",
|
|
3590
4046
|
repo: options.repo,
|
|
3591
4047
|
current: index + 1,
|
|
3592
|
-
total:
|
|
4048
|
+
total: changedFiles.length,
|
|
3593
4049
|
filePath: file.path,
|
|
3594
4050
|
chunks: fileChunks.length
|
|
3595
4051
|
});
|
|
3596
4052
|
}
|
|
3597
|
-
const
|
|
4053
|
+
const affectedPaths = /* @__PURE__ */ new Set([
|
|
4054
|
+
...deletedPaths,
|
|
4055
|
+
...changedFiles.map((file) => file.path)
|
|
4056
|
+
]);
|
|
4057
|
+
const allFilesByPath = new Map(getRepoCodeFiles(db, options.repo).map((file) => [file.path, file]));
|
|
4058
|
+
for (const filePath of deletedPaths) allFilesByPath.delete(filePath);
|
|
4059
|
+
for (const file of changedFiles) {
|
|
4060
|
+
allFilesByPath.set(file.path, {
|
|
4061
|
+
repo: file.repo,
|
|
4062
|
+
path: file.path,
|
|
4063
|
+
language: file.language,
|
|
4064
|
+
sizeBytes: file.sizeBytes,
|
|
4065
|
+
contentHash: file.contentHash,
|
|
4066
|
+
updatedAt: file.updatedAt
|
|
4067
|
+
});
|
|
4068
|
+
}
|
|
4069
|
+
const allFiles = [...allFilesByPath.values()];
|
|
4070
|
+
const allSymbolChunks = getRepoCodeChunkSymbols(db, options.repo).filter(
|
|
4071
|
+
(chunk) => !affectedPaths.has(chunk.filePath)
|
|
4072
|
+
);
|
|
4073
|
+
allSymbolChunks.push(...changedChunks);
|
|
4074
|
+
const allImports = getRepoCodeImports(db, options.repo).filter(
|
|
4075
|
+
(item) => !affectedPaths.has(item.sourcePath)
|
|
4076
|
+
);
|
|
4077
|
+
allImports.push(...changedImports);
|
|
4078
|
+
const testChunks = getRepoTestChunks(db, options.repo).filter(
|
|
4079
|
+
(chunk) => !affectedPaths.has(chunk.filePath)
|
|
4080
|
+
);
|
|
4081
|
+
for (const chunk of changedChunks) {
|
|
4082
|
+
if (isTestFilePath(chunk.filePath)) testChunks.push(chunk);
|
|
4083
|
+
}
|
|
4084
|
+
const testAwareness = inferTestAwareness(options.repo, allFiles, testChunks, {
|
|
3598
4085
|
onProgress: options.onProgress
|
|
3599
4086
|
});
|
|
4087
|
+
options.onProgress?.({
|
|
4088
|
+
stage: "building_architecture_imports",
|
|
4089
|
+
repo: options.repo,
|
|
4090
|
+
current: allFiles.length,
|
|
4091
|
+
total: allFiles.length,
|
|
4092
|
+
imports: allImports.length
|
|
4093
|
+
});
|
|
4094
|
+
const architecture = buildArchitectureFromIndexedData(
|
|
4095
|
+
options.repo,
|
|
4096
|
+
allFiles,
|
|
4097
|
+
allSymbolChunks,
|
|
4098
|
+
allImports,
|
|
4099
|
+
{ onProgress: options.onProgress }
|
|
4100
|
+
);
|
|
3600
4101
|
options.onProgress?.({
|
|
3601
4102
|
stage: "indexed_architecture",
|
|
3602
4103
|
repo: options.repo,
|
|
@@ -3607,14 +4108,22 @@ function indexCodebase(db, options) {
|
|
|
3607
4108
|
const summary = replaceCodeIndex(
|
|
3608
4109
|
db,
|
|
3609
4110
|
options.repo,
|
|
3610
|
-
|
|
3611
|
-
|
|
4111
|
+
changedFiles.map(({ content: _content, absolutePath: _absolutePath, ...file }) => file),
|
|
4112
|
+
changedChunks,
|
|
3612
4113
|
discovery.skippedFiles,
|
|
3613
4114
|
options.cwd,
|
|
3614
4115
|
architecture,
|
|
3615
|
-
{
|
|
4116
|
+
{
|
|
4117
|
+
onProgress: options.onProgress,
|
|
4118
|
+
deletedPaths,
|
|
4119
|
+
changedImports,
|
|
4120
|
+
currentCommit: plan.currentCommit,
|
|
4121
|
+
testAwareness
|
|
4122
|
+
}
|
|
3616
4123
|
);
|
|
3617
|
-
refreshTestCommands(db, options.cwd, options.repo, [], {
|
|
4124
|
+
refreshTestCommands(db, options.cwd, options.repo, [], {
|
|
4125
|
+
onProgress: options.onProgress
|
|
4126
|
+
});
|
|
3618
4127
|
options.onProgress?.({
|
|
3619
4128
|
stage: "completed_code_index",
|
|
3620
4129
|
repo: options.repo,
|
|
@@ -4053,7 +4562,7 @@ function clampMaxResults(value, defaultValue) {
|
|
|
4053
4562
|
|
|
4054
4563
|
// src/retrieval/ranker.ts
|
|
4055
4564
|
import path11 from "path";
|
|
4056
|
-
function
|
|
4565
|
+
function parseJsonArray4(value) {
|
|
4057
4566
|
try {
|
|
4058
4567
|
const parsed = JSON.parse(value);
|
|
4059
4568
|
return Array.isArray(parsed) ? parsed.filter((item) => typeof item === "string") : [];
|
|
@@ -4071,9 +4580,9 @@ function rowToWisdomUnit(row) {
|
|
|
4071
4580
|
category: row.category,
|
|
4072
4581
|
text: row.text,
|
|
4073
4582
|
sanitizedText: row.sanitized_text,
|
|
4074
|
-
filePaths:
|
|
4075
|
-
symbols:
|
|
4076
|
-
authors:
|
|
4583
|
+
filePaths: parseJsonArray4(row.file_paths_json),
|
|
4584
|
+
symbols: parseJsonArray4(row.symbols_json),
|
|
4585
|
+
authors: parseJsonArray4(row.authors_json),
|
|
4077
4586
|
createdAt: row.created_at,
|
|
4078
4587
|
mergedAt: row.merged_at ?? void 0,
|
|
4079
4588
|
confidence: row.confidence,
|
|
@@ -4136,11 +4645,11 @@ function symbolMatch2(unit, querySymbols) {
|
|
|
4136
4645
|
}
|
|
4137
4646
|
function textMatch2(unit, inputText) {
|
|
4138
4647
|
const queryTokens = tokenizeSearchText(inputText, 32);
|
|
4139
|
-
if (queryTokens.length === 0) return unit.bm25 === void 0 ? 0 : 0.
|
|
4648
|
+
if (queryTokens.length === 0) return unit.bm25 === void 0 ? 0 : 0.35;
|
|
4140
4649
|
const haystack = `${unit.sanitizedText} ${unit.filePaths.join(" ")} ${unit.symbols.join(" ")}`.toLowerCase();
|
|
4141
4650
|
const overlap = queryTokens.filter((token) => haystack.includes(token.toLowerCase())).length / queryTokens.length;
|
|
4142
|
-
const bm25Signal = unit.bm25 === void 0 ? 0 : Math.max(0
|
|
4143
|
-
return
|
|
4651
|
+
const bm25Signal = unit.bm25 === void 0 ? 0 : Math.max(0, Math.min(1, 1 / (1 + Math.abs(unit.bm25))));
|
|
4652
|
+
return unit.bm25 === void 0 ? overlap : Number((0.65 * overlap + 0.35 * bm25Signal).toFixed(4));
|
|
4144
4653
|
}
|
|
4145
4654
|
function reviewerOrAuthorSignal(unit) {
|
|
4146
4655
|
if (unit.sourceType === "review_comment" || unit.sourceType === "review_summary") return 0.9;
|
|
@@ -4332,7 +4841,7 @@ function rankWisdomUnits(db, input) {
|
|
|
4332
4841
|
|
|
4333
4842
|
// src/retrieval/code-ranker.ts
|
|
4334
4843
|
import path12 from "path";
|
|
4335
|
-
function
|
|
4844
|
+
function parseJsonArray5(value) {
|
|
4336
4845
|
try {
|
|
4337
4846
|
const parsed = JSON.parse(value);
|
|
4338
4847
|
return Array.isArray(parsed) ? parsed.filter((item) => typeof item === "string") : [];
|
|
@@ -4349,7 +4858,7 @@ function rowToCodeChunk(row) {
|
|
|
4349
4858
|
startLine: row.start_line,
|
|
4350
4859
|
endLine: row.end_line,
|
|
4351
4860
|
sanitizedText: row.sanitized_text,
|
|
4352
|
-
symbols:
|
|
4861
|
+
symbols: parseJsonArray5(row.symbols_json),
|
|
4353
4862
|
contentHash: row.content_hash,
|
|
4354
4863
|
updatedAt: row.updated_at,
|
|
4355
4864
|
bm25: row.bm25 ?? void 0
|
|
@@ -4398,8 +4907,8 @@ function textMatch3(chunk, input) {
|
|
|
4398
4907
|
);
|
|
4399
4908
|
const haystack = `${chunk.sanitizedText} ${chunk.filePath} ${chunk.symbols.join(" ")}`.toLowerCase();
|
|
4400
4909
|
const overlap = tokens.length ? tokens.filter((token) => haystack.includes(token.toLowerCase())).length / tokens.length : 0;
|
|
4401
|
-
const bm25Signal = chunk.bm25 === void 0 ? 0 : Math.max(0
|
|
4402
|
-
return
|
|
4910
|
+
const bm25Signal = chunk.bm25 === void 0 ? 0 : Math.max(0, Math.min(1, 1 / (1 + Math.abs(chunk.bm25))));
|
|
4911
|
+
return chunk.bm25 === void 0 ? overlap : Number((0.65 * overlap + 0.35 * bm25Signal).toFixed(4));
|
|
4403
4912
|
}
|
|
4404
4913
|
function recencyScore2(chunk) {
|
|
4405
4914
|
const timestamp = Date.parse(chunk.updatedAt);
|
|
@@ -4497,7 +5006,7 @@ function rankCodeChunks(db, input) {
|
|
|
4497
5006
|
|
|
4498
5007
|
// src/retrieval/architecture-ranker.ts
|
|
4499
5008
|
import path13 from "path";
|
|
4500
|
-
function
|
|
5009
|
+
function parseJsonArray6(value) {
|
|
4501
5010
|
try {
|
|
4502
5011
|
const parsed = JSON.parse(value);
|
|
4503
5012
|
return Array.isArray(parsed) ? parsed.filter((item) => typeof item === "string") : [];
|
|
@@ -4521,8 +5030,8 @@ function rowToPattern(row) {
|
|
|
4521
5030
|
name: row.name,
|
|
4522
5031
|
summary: row.summary_sanitized,
|
|
4523
5032
|
sanitizedSummary: row.summary_sanitized,
|
|
4524
|
-
sourceFiles:
|
|
4525
|
-
symbols:
|
|
5033
|
+
sourceFiles: parseJsonArray6(row.source_files_json),
|
|
5034
|
+
symbols: parseJsonArray6(row.symbols_json),
|
|
4526
5035
|
evidence: parseEvidence(row.evidence_json),
|
|
4527
5036
|
confidence: row.confidence,
|
|
4528
5037
|
createdAt: row.created_at,
|
|
@@ -4638,7 +5147,7 @@ function rankArchitecturePatterns(db, input) {
|
|
|
4638
5147
|
|
|
4639
5148
|
// src/retrieval/test-ranker.ts
|
|
4640
5149
|
import path14 from "path";
|
|
4641
|
-
function
|
|
5150
|
+
function parseJsonArray7(value) {
|
|
4642
5151
|
if (!value) return [];
|
|
4643
5152
|
try {
|
|
4644
5153
|
const parsed = JSON.parse(value);
|
|
@@ -4651,7 +5160,7 @@ function baseStem(filePath) {
|
|
|
4651
5160
|
return path14.posix.basename(filePath).replace(/\.(test|spec)\.[^.]+$/i, "").replace(/\.[^.]+$/i, "").toLowerCase();
|
|
4652
5161
|
}
|
|
4653
5162
|
function rowToRanked(row, input) {
|
|
4654
|
-
const symbols =
|
|
5163
|
+
const symbols = parseJsonArray7(row.symbols_json);
|
|
4655
5164
|
const text = row.sanitized_text ?? "";
|
|
4656
5165
|
const matchedSymbols = (input.symbols ?? []).filter((symbol) => {
|
|
4657
5166
|
const lower = symbol.toLowerCase();
|
|
@@ -4721,7 +5230,7 @@ function rankRelevantTests(db, input) {
|
|
|
4721
5230
|
|
|
4722
5231
|
// src/retrieval/regression-ranker.ts
|
|
4723
5232
|
import path15 from "path";
|
|
4724
|
-
function
|
|
5233
|
+
function parseJsonArray8(value) {
|
|
4725
5234
|
try {
|
|
4726
5235
|
const parsed = JSON.parse(value);
|
|
4727
5236
|
return Array.isArray(parsed) ? parsed.filter((item) => typeof item === "string") : [];
|
|
@@ -4736,12 +5245,12 @@ function rowToEvent(row) {
|
|
|
4736
5245
|
prNumber: row.pr_number,
|
|
4737
5246
|
prUrl: row.pr_url,
|
|
4738
5247
|
summary: row.summary_sanitized,
|
|
4739
|
-
filePaths:
|
|
4740
|
-
symbols:
|
|
4741
|
-
testPaths:
|
|
4742
|
-
authors:
|
|
4743
|
-
labels:
|
|
4744
|
-
signals:
|
|
5248
|
+
filePaths: parseJsonArray8(row.file_paths_json),
|
|
5249
|
+
symbols: parseJsonArray8(row.symbols_json),
|
|
5250
|
+
testPaths: parseJsonArray8(row.test_paths_json),
|
|
5251
|
+
authors: parseJsonArray8(row.authors_json),
|
|
5252
|
+
labels: parseJsonArray8(row.labels_json),
|
|
5253
|
+
signals: parseJsonArray8(row.signals_json),
|
|
4745
5254
|
createdAt: row.created_at,
|
|
4746
5255
|
mergedAt: row.merged_at ?? void 0,
|
|
4747
5256
|
confidence: row.confidence
|
|
@@ -6141,7 +6650,7 @@ function syncPlaybooksToDatabase(db, cwd) {
|
|
|
6141
6650
|
}
|
|
6142
6651
|
|
|
6143
6652
|
// src/retrieval/onboarding.ts
|
|
6144
|
-
function
|
|
6653
|
+
function parseJsonArray9(value) {
|
|
6145
6654
|
try {
|
|
6146
6655
|
const parsed = JSON.parse(value);
|
|
6147
6656
|
return Array.isArray(parsed) ? parsed.filter((item) => typeof item === "string") : [];
|
|
@@ -6174,7 +6683,7 @@ function riskyModules(db) {
|
|
|
6174
6683
|
ORDER BY confidence DESC, COALESCE(merged_at, created_at) DESC
|
|
6175
6684
|
LIMIT 20`
|
|
6176
6685
|
).all();
|
|
6177
|
-
return [...new Set(rows.flatMap((row) =>
|
|
6686
|
+
return [...new Set(rows.flatMap((row) => parseJsonArray9(row.file_paths_json)))].slice(0, 10);
|
|
6178
6687
|
}
|
|
6179
6688
|
function relatedTests(db, files) {
|
|
6180
6689
|
if (files.length === 0) {
|
|
@@ -6253,6 +6762,7 @@ import crypto8 from "crypto";
|
|
|
6253
6762
|
import fs7 from "fs";
|
|
6254
6763
|
import path18 from "path";
|
|
6255
6764
|
var ANCHOR_EVALS_FILE = "anchor.evals.json";
|
|
6765
|
+
var DEFAULT_EVAL_K = 8;
|
|
6256
6766
|
function evalsPath(cwd) {
|
|
6257
6767
|
return path18.join(cwd, ANCHOR_EVALS_FILE);
|
|
6258
6768
|
}
|
|
@@ -6351,6 +6861,7 @@ function runRetrievalEvals(db, cwd) {
|
|
|
6351
6861
|
initializeSchema(db);
|
|
6352
6862
|
const filePath = evalsPath(cwd);
|
|
6353
6863
|
const evalFile = readEvalFile(cwd);
|
|
6864
|
+
const k = DEFAULT_EVAL_K;
|
|
6354
6865
|
const results = evalFile.evals.map((item) => {
|
|
6355
6866
|
const context = buildAnchorContextResult(db, cwd, {
|
|
6356
6867
|
task: item.task,
|
|
@@ -6361,6 +6872,9 @@ function runRetrievalEvals(db, cwd) {
|
|
|
6361
6872
|
...Array.isArray(context.metadata.items) ? context.metadata.items : [],
|
|
6362
6873
|
...Array.isArray(context.metadata.teamRules) ? context.metadata.teamRules : []
|
|
6363
6874
|
];
|
|
6875
|
+
const rankedPrs = uniqueStrings(
|
|
6876
|
+
metadataItems.map((metadata) => metadata.prNumber).filter((prNumber) => typeof prNumber === "number").map(String)
|
|
6877
|
+
).map(Number);
|
|
6364
6878
|
const foundPrs = uniqueStrings(
|
|
6365
6879
|
metadataItems.map((metadata) => metadata.prNumber).filter((prNumber) => typeof prNumber === "number").map(String)
|
|
6366
6880
|
).map(Number);
|
|
@@ -6371,6 +6885,23 @@ function runRetrievalEvals(db, cwd) {
|
|
|
6371
6885
|
const missingCategories = item.expectedCategories.filter(
|
|
6372
6886
|
(category) => !foundCategories.includes(category)
|
|
6373
6887
|
);
|
|
6888
|
+
const expectedPrRanks = item.expectedPrs.map((prNumber) => {
|
|
6889
|
+
const index = rankedPrs.indexOf(prNumber);
|
|
6890
|
+
return {
|
|
6891
|
+
prNumber,
|
|
6892
|
+
rank: index >= 0 ? index + 1 : void 0
|
|
6893
|
+
};
|
|
6894
|
+
});
|
|
6895
|
+
const topK = rankedPrs.slice(0, k);
|
|
6896
|
+
const relevantInTopK = item.expectedPrs.filter((prNumber) => topK.includes(prNumber));
|
|
6897
|
+
const precisionAtK2 = k > 0 ? Number((relevantInTopK.length / k).toFixed(4)) : 0;
|
|
6898
|
+
const recallAtK2 = item.expectedPrs.length > 0 ? Number((relevantInTopK.length / item.expectedPrs.length).toFixed(4)) : 0;
|
|
6899
|
+
const reciprocalRank = (() => {
|
|
6900
|
+
if (item.expectedPrs.length === 0) return 0;
|
|
6901
|
+
const firstRank = expectedPrRanks.map((entry) => entry.rank).filter((rank) => typeof rank === "number").sort((a, b) => a - b)[0];
|
|
6902
|
+
if (!firstRank) return 0;
|
|
6903
|
+
return Number((1 / firstRank).toFixed(4));
|
|
6904
|
+
})();
|
|
6374
6905
|
return {
|
|
6375
6906
|
id: item.id,
|
|
6376
6907
|
task: item.task,
|
|
@@ -6378,18 +6909,35 @@ function runRetrievalEvals(db, cwd) {
|
|
|
6378
6909
|
expectedPrs: item.expectedPrs,
|
|
6379
6910
|
foundPrs,
|
|
6380
6911
|
missingPrs,
|
|
6912
|
+
expectedPrRanks,
|
|
6381
6913
|
expectedCategories: item.expectedCategories,
|
|
6382
6914
|
foundCategories,
|
|
6383
|
-
missingCategories
|
|
6915
|
+
missingCategories,
|
|
6916
|
+
precisionAtK: precisionAtK2,
|
|
6917
|
+
recallAtK: recallAtK2,
|
|
6918
|
+
reciprocalRank
|
|
6384
6919
|
};
|
|
6385
6920
|
});
|
|
6386
6921
|
const passed = results.filter((result) => result.passed).length;
|
|
6922
|
+
const precisionAtK = results.length > 0 ? Number(
|
|
6923
|
+
(results.reduce((sum, result) => sum + result.precisionAtK, 0) / results.length).toFixed(4)
|
|
6924
|
+
) : 0;
|
|
6925
|
+
const recallAtK = results.length > 0 ? Number(
|
|
6926
|
+
(results.reduce((sum, result) => sum + result.recallAtK, 0) / results.length).toFixed(4)
|
|
6927
|
+
) : 0;
|
|
6928
|
+
const mrr = results.length > 0 ? Number(
|
|
6929
|
+
(results.reduce((sum, result) => sum + result.reciprocalRank, 0) / results.length).toFixed(4)
|
|
6930
|
+
) : 0;
|
|
6387
6931
|
return {
|
|
6388
6932
|
ok: passed === results.length,
|
|
6389
6933
|
path: filePath,
|
|
6390
6934
|
total: results.length,
|
|
6391
6935
|
passed,
|
|
6392
6936
|
failed: results.length - passed,
|
|
6937
|
+
precisionAtK,
|
|
6938
|
+
recallAtK,
|
|
6939
|
+
mrr,
|
|
6940
|
+
k,
|
|
6393
6941
|
results
|
|
6394
6942
|
};
|
|
6395
6943
|
}
|
|
@@ -8744,7 +9292,7 @@ function packageRootForSpecifier(specifier) {
|
|
|
8744
9292
|
if (normalized.startsWith("@") && parts.length >= 2) return `${parts[0]}/${parts[1]}`;
|
|
8745
9293
|
return parts[0] ?? "";
|
|
8746
9294
|
}
|
|
8747
|
-
function
|
|
9295
|
+
function parseJsonArray10(value) {
|
|
8748
9296
|
try {
|
|
8749
9297
|
const parsed = JSON.parse(value);
|
|
8750
9298
|
return Array.isArray(parsed) ? parsed.filter((item) => typeof item === "string") : [];
|
|
@@ -8878,7 +9426,7 @@ function rebuildOrgGraph(db, config, baseDirOrOptions) {
|
|
|
8878
9426
|
`imports ${sanitizeHistoricalText(rootSpecifier || item.specifier)}`
|
|
8879
9427
|
)
|
|
8880
9428
|
],
|
|
8881
|
-
confidence:
|
|
9429
|
+
confidence: parseJsonArray10(item.imported_symbols_json).length > 0 ? 0.88 : 0.76
|
|
8882
9430
|
});
|
|
8883
9431
|
}
|
|
8884
9432
|
if (shouldEmitProgress3(index + 1, imports.length)) {
|
|
@@ -9214,7 +9762,11 @@ async function indexOrgRepos(db, config, options = {}) {
|
|
|
9214
9762
|
command,
|
|
9215
9763
|
totalRepos: repos.length
|
|
9216
9764
|
});
|
|
9217
|
-
|
|
9765
|
+
const maxConcurrency = Math.max(1, Math.min(options.concurrency ?? 3, 4));
|
|
9766
|
+
let nextRepoIndex = 0;
|
|
9767
|
+
const processRepo = async (repoIndex) => {
|
|
9768
|
+
const repo = repos[repoIndex];
|
|
9769
|
+
if (!repo) return;
|
|
9218
9770
|
const repoPosition = repoIndex + 1;
|
|
9219
9771
|
const localPath = orgRepoLocalPath(config.org, repo, options.baseDir);
|
|
9220
9772
|
const repoStartedAt = (/* @__PURE__ */ new Date()).toISOString();
|
|
@@ -9465,7 +10017,18 @@ async function indexOrgRepos(db, config, options = {}) {
|
|
|
9465
10017
|
error: message
|
|
9466
10018
|
});
|
|
9467
10019
|
}
|
|
9468
|
-
}
|
|
10020
|
+
};
|
|
10021
|
+
const worker = async () => {
|
|
10022
|
+
while (true) {
|
|
10023
|
+
const repoIndex = nextRepoIndex;
|
|
10024
|
+
nextRepoIndex += 1;
|
|
10025
|
+
if (repoIndex >= repos.length) return;
|
|
10026
|
+
await processRepo(repoIndex);
|
|
10027
|
+
}
|
|
10028
|
+
};
|
|
10029
|
+
await Promise.all(
|
|
10030
|
+
Array.from({ length: Math.min(maxConcurrency, repos.length) }, () => worker())
|
|
10031
|
+
);
|
|
9469
10032
|
let graph;
|
|
9470
10033
|
if (options.noGraph) {
|
|
9471
10034
|
const counts = getOrgGraphCounts(db, config.org);
|
|
@@ -9510,6 +10073,7 @@ async function indexOrgRepos(db, config, options = {}) {
|
|
|
9510
10073
|
codeFilesIndexed: results.reduce((sum, result) => sum + (result.code?.indexedFiles ?? 0), 0),
|
|
9511
10074
|
failures: results.map((result) => result.error).concat(graph.error ? [graph.error] : []).filter((error) => Boolean(error))
|
|
9512
10075
|
});
|
|
10076
|
+
runDatabaseMaintenance(db);
|
|
9513
10077
|
emit({
|
|
9514
10078
|
stage: "org_sync_completed",
|
|
9515
10079
|
org: config.org,
|
|
@@ -10397,6 +10961,7 @@ export {
|
|
|
10397
10961
|
anchorMcpEntry,
|
|
10398
10962
|
architectureFilesFromDiff,
|
|
10399
10963
|
buildAnchorContextResult,
|
|
10964
|
+
buildArchitectureFromIndexedData,
|
|
10400
10965
|
buildArchitectureIndex,
|
|
10401
10966
|
buildArchitectureMap,
|
|
10402
10967
|
buildFtsQuery,
|
|
@@ -10436,6 +11001,7 @@ export {
|
|
|
10436
11001
|
detectTestCommands,
|
|
10437
11002
|
detectTestCommandsForFile,
|
|
10438
11003
|
discoverCodeFiles,
|
|
11004
|
+
discoverCodeFilesByPaths,
|
|
10439
11005
|
emptyCodeIndexSummary,
|
|
10440
11006
|
ensureAnchorGitExclude,
|
|
10441
11007
|
ensureCursorConfig,
|
|
@@ -10464,6 +11030,7 @@ export {
|
|
|
10464
11030
|
getAnchorIndexHealth,
|
|
10465
11031
|
getArchitectureContext,
|
|
10466
11032
|
getArchitectureMapContext,
|
|
11033
|
+
getCodeIndexStateForRepo,
|
|
10467
11034
|
getGitHubRateLimitDelayMs,
|
|
10468
11035
|
getGraphQLFetchCheckpoint,
|
|
10469
11036
|
getIndexStatus,
|
|
@@ -10474,12 +11041,19 @@ export {
|
|
|
10474
11041
|
getOrgRepoState,
|
|
10475
11042
|
getOrgStatus,
|
|
10476
11043
|
getPlaybook,
|
|
11044
|
+
getRepoCodeChunkSymbols,
|
|
11045
|
+
getRepoCodeCounts,
|
|
11046
|
+
getRepoCodeFileHashes,
|
|
11047
|
+
getRepoCodeFiles,
|
|
11048
|
+
getRepoCodeImports,
|
|
11049
|
+
getRepoTestChunks,
|
|
10477
11050
|
getSemanticStatus,
|
|
10478
11051
|
getSuggestedPromptTexts,
|
|
10479
11052
|
getSuggestedPrompts,
|
|
10480
11053
|
getWisdomCategoryCounts,
|
|
10481
11054
|
githubAuthFixMessage,
|
|
10482
11055
|
graphQLFetchCheckpointScope,
|
|
11056
|
+
hasDirtyWorkingTree,
|
|
10483
11057
|
hasHighSignalLanguage,
|
|
10484
11058
|
indexCodebase,
|
|
10485
11059
|
indexOrgRepos,
|
|
@@ -10515,6 +11089,7 @@ export {
|
|
|
10515
11089
|
orgRoot,
|
|
10516
11090
|
paginateWithGitHubRateLimit,
|
|
10517
11091
|
parseGitHubRemote,
|
|
11092
|
+
planIncrementalCodeIndex,
|
|
10518
11093
|
planTask,
|
|
10519
11094
|
plannedOrgCloneCommands,
|
|
10520
11095
|
rankArchitecturePatterns,
|
|
@@ -10523,6 +11098,8 @@ export {
|
|
|
10523
11098
|
rankRelevantTests,
|
|
10524
11099
|
rankTeamRules,
|
|
10525
11100
|
rankWisdomUnits,
|
|
11101
|
+
readDiscoveredCodeFileContent,
|
|
11102
|
+
readGitHeadCommit,
|
|
10526
11103
|
readOrgHeartbeat,
|
|
10527
11104
|
rebuildOrgGraph,
|
|
10528
11105
|
recordFeedback,
|
|
@@ -10543,6 +11120,7 @@ export {
|
|
|
10543
11120
|
resolvePullRequestFetchLimit,
|
|
10544
11121
|
reviewDiff,
|
|
10545
11122
|
runAnchorCi,
|
|
11123
|
+
runDatabaseMaintenance,
|
|
10546
11124
|
runDoctor,
|
|
10547
11125
|
runRetrievalEvals,
|
|
10548
11126
|
sanitizeHistoricalText,
|
|
@@ -10557,6 +11135,7 @@ export {
|
|
|
10557
11135
|
syncOrgConfigToDatabase,
|
|
10558
11136
|
syncPlaybooksToDatabase,
|
|
10559
11137
|
tokenizeSearchText,
|
|
11138
|
+
touchCodeIndexState,
|
|
10560
11139
|
truncateText,
|
|
10561
11140
|
uniqueStrings,
|
|
10562
11141
|
updateGitHubGraphQLRateLimitState,
|