@pratik7368patil/anchor-core 0.1.8 → 0.1.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.ts +173 -10
- package/dist/index.js +1049 -54
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
- package/src/db/schema.sql +62 -0
package/dist/index.js
CHANGED
|
@@ -253,7 +253,7 @@ function redactedHistoricalText(text) {
|
|
|
253
253
|
|
|
254
254
|
// src/db/database.ts
|
|
255
255
|
import fs3 from "fs";
|
|
256
|
-
import
|
|
256
|
+
import path4 from "path";
|
|
257
257
|
import Database from "better-sqlite3";
|
|
258
258
|
|
|
259
259
|
// src/db/migrations.ts
|
|
@@ -376,6 +376,63 @@ CREATE TABLE IF NOT EXISTS code_index_state (
|
|
|
376
376
|
skipped_files INTEGER NOT NULL
|
|
377
377
|
);
|
|
378
378
|
|
|
379
|
+
CREATE TABLE IF NOT EXISTS test_files (
|
|
380
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
381
|
+
repo_id INTEGER NOT NULL REFERENCES repositories(id) ON DELETE CASCADE,
|
|
382
|
+
path TEXT NOT NULL,
|
|
383
|
+
language TEXT,
|
|
384
|
+
size_bytes INTEGER NOT NULL,
|
|
385
|
+
content_hash TEXT NOT NULL,
|
|
386
|
+
updated_at TEXT NOT NULL,
|
|
387
|
+
UNIQUE(repo_id, path)
|
|
388
|
+
);
|
|
389
|
+
|
|
390
|
+
CREATE TABLE IF NOT EXISTS test_links (
|
|
391
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
392
|
+
repo_id INTEGER NOT NULL REFERENCES repositories(id) ON DELETE CASCADE,
|
|
393
|
+
source_path TEXT NOT NULL,
|
|
394
|
+
test_path TEXT NOT NULL,
|
|
395
|
+
reason TEXT NOT NULL,
|
|
396
|
+
strength REAL NOT NULL,
|
|
397
|
+
UNIQUE(repo_id, source_path, test_path, reason)
|
|
398
|
+
);
|
|
399
|
+
|
|
400
|
+
CREATE TABLE IF NOT EXISTS regression_events (
|
|
401
|
+
id TEXT PRIMARY KEY,
|
|
402
|
+
repo_id INTEGER NOT NULL REFERENCES repositories(id) ON DELETE CASCADE,
|
|
403
|
+
pr_id INTEGER REFERENCES pull_requests(id) ON DELETE CASCADE,
|
|
404
|
+
repo TEXT NOT NULL,
|
|
405
|
+
pr_number INTEGER NOT NULL,
|
|
406
|
+
pr_url TEXT NOT NULL,
|
|
407
|
+
summary_sanitized TEXT NOT NULL,
|
|
408
|
+
file_paths_json TEXT NOT NULL,
|
|
409
|
+
symbols_json TEXT NOT NULL,
|
|
410
|
+
test_paths_json TEXT NOT NULL,
|
|
411
|
+
authors_json TEXT NOT NULL,
|
|
412
|
+
labels_json TEXT NOT NULL,
|
|
413
|
+
signals_json TEXT NOT NULL,
|
|
414
|
+
created_at TEXT NOT NULL,
|
|
415
|
+
merged_at TEXT,
|
|
416
|
+
confidence REAL NOT NULL
|
|
417
|
+
);
|
|
418
|
+
|
|
419
|
+
CREATE TABLE IF NOT EXISTS index_runs (
|
|
420
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
421
|
+
command TEXT NOT NULL,
|
|
422
|
+
repo TEXT,
|
|
423
|
+
started_at TEXT NOT NULL,
|
|
424
|
+
finished_at TEXT,
|
|
425
|
+
history_coverage TEXT,
|
|
426
|
+
history_limit INTEGER,
|
|
427
|
+
prs_fetched INTEGER,
|
|
428
|
+
prs_skipped INTEGER,
|
|
429
|
+
comments_indexed INTEGER,
|
|
430
|
+
code_files_indexed INTEGER,
|
|
431
|
+
test_files_indexed INTEGER,
|
|
432
|
+
failures_json TEXT NOT NULL DEFAULT '[]',
|
|
433
|
+
status TEXT NOT NULL
|
|
434
|
+
);
|
|
435
|
+
|
|
379
436
|
CREATE TABLE IF NOT EXISTS sync_state (
|
|
380
437
|
repo TEXT PRIMARY KEY,
|
|
381
438
|
last_sync_at TEXT,
|
|
@@ -393,6 +450,11 @@ CREATE INDEX IF NOT EXISTS idx_wisdom_units_category ON wisdom_units(category);
|
|
|
393
450
|
CREATE INDEX IF NOT EXISTS idx_wisdom_units_pr ON wisdom_units(pr_id);
|
|
394
451
|
CREATE INDEX IF NOT EXISTS idx_code_files_path ON code_files(path);
|
|
395
452
|
CREATE INDEX IF NOT EXISTS idx_code_chunks_file_path ON code_chunks(file_path);
|
|
453
|
+
CREATE INDEX IF NOT EXISTS idx_test_files_path ON test_files(path);
|
|
454
|
+
CREATE INDEX IF NOT EXISTS idx_test_links_source ON test_links(source_path);
|
|
455
|
+
CREATE INDEX IF NOT EXISTS idx_test_links_test ON test_links(test_path);
|
|
456
|
+
CREATE INDEX IF NOT EXISTS idx_regression_events_pr ON regression_events(pr_id);
|
|
457
|
+
CREATE INDEX IF NOT EXISTS idx_index_runs_started ON index_runs(started_at);
|
|
396
458
|
`;
|
|
397
459
|
|
|
398
460
|
// src/rules/team-rules.ts
|
|
@@ -678,6 +740,80 @@ function validateTeamRulesFile(cwd) {
|
|
|
678
740
|
rules: loaded.rules
|
|
679
741
|
};
|
|
680
742
|
}
|
|
743
|
+
function addTeamRule(cwd, input) {
|
|
744
|
+
ensureTeamRulesFile(cwd);
|
|
745
|
+
const filePath = rulesPath(cwd);
|
|
746
|
+
const raw = JSON.parse(fs2.readFileSync(filePath, "utf8"));
|
|
747
|
+
const nextRule = {
|
|
748
|
+
id: input.id,
|
|
749
|
+
category: input.category,
|
|
750
|
+
text: input.text,
|
|
751
|
+
filePaths: input.filePaths ?? [],
|
|
752
|
+
symbols: input.symbols ?? [],
|
|
753
|
+
evidence: [
|
|
754
|
+
{
|
|
755
|
+
prNumber: input.prNumber,
|
|
756
|
+
prUrl: input.prUrl,
|
|
757
|
+
sourceType: input.sourceType ?? "pr_body"
|
|
758
|
+
}
|
|
759
|
+
],
|
|
760
|
+
confidenceLevel: "strong"
|
|
761
|
+
};
|
|
762
|
+
const next = { version: 1, rules: [...raw.rules ?? [], nextRule] };
|
|
763
|
+
fs2.writeFileSync(filePath, `${JSON.stringify(next, null, 2)}
|
|
764
|
+
`);
|
|
765
|
+
const validation = validateTeamRulesFile(cwd);
|
|
766
|
+
if (!validation.ok) {
|
|
767
|
+
throw new Error(`Invalid Anchor rule: ${validation.errors.join("; ")}`);
|
|
768
|
+
}
|
|
769
|
+
const rule = validation.rules.find((item) => item.id === input.id);
|
|
770
|
+
if (!rule) throw new Error(`Failed to add Anchor rule ${input.id}`);
|
|
771
|
+
return { path: filePath, rule };
|
|
772
|
+
}
|
|
773
|
+
function checkTeamRuleEvidence(cwd) {
|
|
774
|
+
const validation = validateTeamRulesFile(cwd);
|
|
775
|
+
if (!validation.ok) {
|
|
776
|
+
return {
|
|
777
|
+
ok: false,
|
|
778
|
+
path: validation.path,
|
|
779
|
+
checked: 0,
|
|
780
|
+
missing: [],
|
|
781
|
+
errors: validation.errors
|
|
782
|
+
};
|
|
783
|
+
}
|
|
784
|
+
const databasePath = defaultDatabasePath(detectGitRoot(cwd) ?? cwd);
|
|
785
|
+
if (!fs2.existsSync(databasePath)) {
|
|
786
|
+
return {
|
|
787
|
+
ok: false,
|
|
788
|
+
path: validation.path,
|
|
789
|
+
checked: 0,
|
|
790
|
+
missing: [],
|
|
791
|
+
errors: [`Anchor database not found at ${databasePath}. Run anchor index first.`]
|
|
792
|
+
};
|
|
793
|
+
}
|
|
794
|
+
const db = openAnchorDatabase(detectGitRoot(cwd) ?? cwd, databasePath);
|
|
795
|
+
try {
|
|
796
|
+
initializeSchema(db);
|
|
797
|
+
const missing = [];
|
|
798
|
+
let checked = 0;
|
|
799
|
+
for (const rule of validation.rules) {
|
|
800
|
+
for (const evidence of rule.evidence) {
|
|
801
|
+
checked += 1;
|
|
802
|
+
const row = db.prepare("SELECT 1 FROM pull_requests WHERE number = ? LIMIT 1").get(evidence.prNumber);
|
|
803
|
+
if (!row) missing.push({ ruleId: rule.id, prNumber: evidence.prNumber });
|
|
804
|
+
}
|
|
805
|
+
}
|
|
806
|
+
return {
|
|
807
|
+
ok: missing.length === 0,
|
|
808
|
+
path: validation.path,
|
|
809
|
+
checked,
|
|
810
|
+
missing,
|
|
811
|
+
errors: []
|
|
812
|
+
};
|
|
813
|
+
} finally {
|
|
814
|
+
db.close();
|
|
815
|
+
}
|
|
816
|
+
}
|
|
681
817
|
function pathMatch(rulePaths, queryFiles) {
|
|
682
818
|
if (rulePaths.length === 0 || queryFiles.length === 0) return 0;
|
|
683
819
|
let best = 0;
|
|
@@ -733,6 +869,15 @@ function confidenceReasons(rule) {
|
|
|
733
869
|
...rule.symbols.length > 0 ? ["symbol-associated"] : []
|
|
734
870
|
];
|
|
735
871
|
}
|
|
872
|
+
function matchReasons(parts) {
|
|
873
|
+
const reasons = ["team-approved rule"];
|
|
874
|
+
if (parts.filePathMatch >= 0.9) reasons.push("exact file path match");
|
|
875
|
+
else if (parts.filePathMatch >= 0.45) reasons.push("related file path match");
|
|
876
|
+
if (parts.symbolMatch >= 0.9) reasons.push("exact symbol match");
|
|
877
|
+
else if (parts.symbolMatch >= 0.45) reasons.push("symbol-associated rule");
|
|
878
|
+
if (parts.textMatch >= 0.35) reasons.push("text matched task or diff terms");
|
|
879
|
+
return reasons.slice(0, 5);
|
|
880
|
+
}
|
|
736
881
|
function passesStrictMode(rule, input) {
|
|
737
882
|
if (!input.strict) return true;
|
|
738
883
|
if (rule.freshnessStatus === "stale") return false;
|
|
@@ -744,13 +889,21 @@ function rankTeamRules(db, cwd, input) {
|
|
|
744
889
|
const codeSnapshot = loadCurrentCodeSnapshot(db);
|
|
745
890
|
return loaded.rules.map((rule) => {
|
|
746
891
|
const freshness = evaluateFreshness(rule, codeSnapshot);
|
|
747
|
-
const
|
|
892
|
+
const parts = {
|
|
893
|
+
filePathMatch: pathMatch(rule.filePaths, input.files ?? []),
|
|
894
|
+
symbolMatch: symbolMatch(rule, input.symbols ?? []),
|
|
895
|
+
textMatch: textMatch(rule, input),
|
|
896
|
+
confidence: confidenceScore(rule.confidenceLevel)
|
|
897
|
+
};
|
|
898
|
+
const score = 1 + 0.35 * parts.filePathMatch + 0.25 * parts.symbolMatch + 0.25 * parts.textMatch + 0.15 * parts.confidence;
|
|
748
899
|
return {
|
|
749
900
|
...rule,
|
|
750
901
|
score: Number(score.toFixed(4)),
|
|
751
902
|
freshnessStatus: freshness.status,
|
|
752
903
|
freshnessReason: freshness.reason,
|
|
753
|
-
confidenceReasons: confidenceReasons(rule)
|
|
904
|
+
confidenceReasons: confidenceReasons(rule),
|
|
905
|
+
matchReasons: matchReasons(parts),
|
|
906
|
+
rankSignals: parts
|
|
754
907
|
};
|
|
755
908
|
}).filter((rule) => passesStrictMode(rule, input)).sort((a, b) => b.score - a.score).slice(0, 4);
|
|
756
909
|
}
|
|
@@ -761,12 +914,102 @@ function countValidTeamRules(cwd) {
|
|
|
761
914
|
return { count: loaded.rules.length, lastRuleIndexTime: stat.mtime.toISOString() };
|
|
762
915
|
}
|
|
763
916
|
|
|
917
|
+
// src/indexer/test-awareness.ts
|
|
918
|
+
import path3 from "path";
|
|
919
|
+
function normalizePath(filePath) {
|
|
920
|
+
return filePath.replace(/\\/g, "/").replace(/^\.\/+/, "");
|
|
921
|
+
}
|
|
922
|
+
function pathSegments(filePath) {
|
|
923
|
+
return normalizePath(filePath).split("/").filter(Boolean);
|
|
924
|
+
}
|
|
925
|
+
function basenameWithoutExtensions(filePath) {
|
|
926
|
+
const base = path3.posix.basename(normalizePath(filePath));
|
|
927
|
+
return base.replace(/\.(test|spec)\.[^.]+$/i, "").replace(/\.[^.]+$/i, "");
|
|
928
|
+
}
|
|
929
|
+
function sourceLikeDir(filePath) {
|
|
930
|
+
const segments = pathSegments(path3.posix.dirname(normalizePath(filePath)));
|
|
931
|
+
return segments.filter((segment) => !["__tests__", "test", "tests", "spec"].includes(segment));
|
|
932
|
+
}
|
|
933
|
+
function isTestFilePath(filePath) {
|
|
934
|
+
const normalized = normalizePath(filePath);
|
|
935
|
+
const segments = pathSegments(normalized).map((segment) => segment.toLowerCase());
|
|
936
|
+
const base = path3.posix.basename(normalized).toLowerCase();
|
|
937
|
+
return /\.(test|spec)\.[^.]+$/i.test(base) || segments.includes("__tests__") || segments.includes("test") || segments.includes("tests") || segments.includes("spec");
|
|
938
|
+
}
|
|
939
|
+
function testRecord(file) {
|
|
940
|
+
return {
|
|
941
|
+
repo: file.repo,
|
|
942
|
+
path: file.path,
|
|
943
|
+
language: file.language,
|
|
944
|
+
sizeBytes: file.sizeBytes,
|
|
945
|
+
contentHash: file.contentHash,
|
|
946
|
+
updatedAt: file.updatedAt
|
|
947
|
+
};
|
|
948
|
+
}
|
|
949
|
+
function strengthFor(reason) {
|
|
950
|
+
if (reason === "same basename") return 1;
|
|
951
|
+
if (reason === "imported source path") return 0.9;
|
|
952
|
+
if (reason === "same directory") return 0.7;
|
|
953
|
+
return 0.5;
|
|
954
|
+
}
|
|
955
|
+
function pathMentionedInTest(testPath, sourcePath, chunksByFile) {
|
|
956
|
+
const text = (chunksByFile.get(testPath) ?? []).map((chunk) => chunk.sanitizedText).join("\n");
|
|
957
|
+
if (!text) return false;
|
|
958
|
+
const sourceNoExt = sourcePath.replace(/\.[^.]+$/i, "");
|
|
959
|
+
const sourceBase = basenameWithoutExtensions(sourcePath);
|
|
960
|
+
return text.includes(sourcePath) || text.includes(sourceNoExt) || new RegExp(`from\\s+["'][^"']*${escapeRegExp(sourceBase)}["']`, "i").test(text) || new RegExp(`require\\(["'][^"']*${escapeRegExp(sourceBase)}["']\\)`, "i").test(text);
|
|
961
|
+
}
|
|
962
|
+
function escapeRegExp(value) {
|
|
963
|
+
return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
964
|
+
}
|
|
965
|
+
function inferTestAwareness(repo, codeFiles, codeChunks) {
|
|
966
|
+
const testFiles = codeFiles.filter((file) => isTestFilePath(file.path));
|
|
967
|
+
const sourceFiles = codeFiles.filter((file) => !isTestFilePath(file.path));
|
|
968
|
+
const chunksByFile = /* @__PURE__ */ new Map();
|
|
969
|
+
for (const chunk of codeChunks) {
|
|
970
|
+
const chunks = chunksByFile.get(chunk.filePath) ?? [];
|
|
971
|
+
chunks.push(chunk);
|
|
972
|
+
chunksByFile.set(chunk.filePath, chunks);
|
|
973
|
+
}
|
|
974
|
+
const linkMap = /* @__PURE__ */ new Map();
|
|
975
|
+
const addLink = (sourcePath, testPath, reason) => {
|
|
976
|
+
const key = `${sourcePath}\0${testPath}\0${reason}`;
|
|
977
|
+
linkMap.set(key, {
|
|
978
|
+
repo,
|
|
979
|
+
sourcePath,
|
|
980
|
+
testPath,
|
|
981
|
+
reason,
|
|
982
|
+
strength: strengthFor(reason)
|
|
983
|
+
});
|
|
984
|
+
};
|
|
985
|
+
for (const test of testFiles) {
|
|
986
|
+
const testBase = basenameWithoutExtensions(test.path).toLowerCase();
|
|
987
|
+
const testDir = sourceLikeDir(test.path).join("/");
|
|
988
|
+
for (const source of sourceFiles) {
|
|
989
|
+
const sourceBase = basenameWithoutExtensions(source.path).toLowerCase();
|
|
990
|
+
const sourceDir = sourceLikeDir(source.path).join("/");
|
|
991
|
+
if (testBase === sourceBase) addLink(source.path, test.path, "same basename");
|
|
992
|
+
else if (testDir && sourceDir && testDir === sourceDir) {
|
|
993
|
+
addLink(source.path, test.path, "same directory");
|
|
994
|
+
}
|
|
995
|
+
if (pathMentionedInTest(test.path, source.path, chunksByFile)) {
|
|
996
|
+
addLink(source.path, test.path, "imported source path");
|
|
997
|
+
}
|
|
998
|
+
}
|
|
999
|
+
}
|
|
1000
|
+
const dedupedTests = testFiles.map(testRecord);
|
|
1001
|
+
return {
|
|
1002
|
+
testFiles: dedupedTests,
|
|
1003
|
+
testLinks: uniqueStrings([...linkMap.keys()]).map((key) => linkMap.get(key))
|
|
1004
|
+
};
|
|
1005
|
+
}
|
|
1006
|
+
|
|
764
1007
|
// src/db/database.ts
|
|
765
1008
|
function defaultDatabasePath(cwd) {
|
|
766
|
-
return
|
|
1009
|
+
return path4.join(cwd, ".anchor", "index.sqlite");
|
|
767
1010
|
}
|
|
768
1011
|
function openAnchorDatabase(cwd, databasePath = defaultDatabasePath(cwd)) {
|
|
769
|
-
fs3.mkdirSync(
|
|
1012
|
+
fs3.mkdirSync(path4.dirname(databasePath), { recursive: true });
|
|
770
1013
|
const db = new Database(databasePath);
|
|
771
1014
|
db.pragma("journal_mode = WAL");
|
|
772
1015
|
db.pragma("foreign_keys = ON");
|
|
@@ -790,7 +1033,9 @@ function checkSchema(db) {
|
|
|
790
1033
|
const codeTables = db.prepare("SELECT name FROM sqlite_master WHERE type IN ('table', 'virtual') AND name = ?").all("code_chunks_fts");
|
|
791
1034
|
const wisdom = db.prepare("SELECT name FROM sqlite_master WHERE name = ?").all("wisdom_units");
|
|
792
1035
|
const code = db.prepare("SELECT name FROM sqlite_master WHERE name = ?").all("code_chunks");
|
|
793
|
-
|
|
1036
|
+
const tests = db.prepare("SELECT name FROM sqlite_master WHERE name = ?").all("test_files");
|
|
1037
|
+
const regressions = db.prepare("SELECT name FROM sqlite_master WHERE name = ?").all("regression_events");
|
|
1038
|
+
return tables.length > 0 && wisdom.length > 0 && codeTables.length > 0 && code.length > 0 && tests.length > 0 && regressions.length > 0;
|
|
794
1039
|
} catch {
|
|
795
1040
|
return false;
|
|
796
1041
|
}
|
|
@@ -837,14 +1082,15 @@ function deleteExistingPrData(db, prId) {
|
|
|
837
1082
|
const unitRows = db.prepare("SELECT id FROM wisdom_units WHERE pr_id = ?").all(prId);
|
|
838
1083
|
const deleteFts = db.prepare("DELETE FROM wisdom_units_fts WHERE unitId = ?");
|
|
839
1084
|
for (const row of unitRows) deleteFts.run(row.id);
|
|
1085
|
+
db.prepare("DELETE FROM regression_events WHERE pr_id = ?").run(prId);
|
|
840
1086
|
db.prepare("DELETE FROM wisdom_units WHERE pr_id = ?").run(prId);
|
|
841
1087
|
db.prepare("DELETE FROM pr_comments WHERE pr_id = ?").run(prId);
|
|
842
1088
|
db.prepare("DELETE FROM pr_files WHERE pr_id = ?").run(prId);
|
|
843
1089
|
}
|
|
844
|
-
function upsertPullRequest(db, pr, wisdomUnits) {
|
|
1090
|
+
function upsertPullRequest(db, pr, wisdomUnits, regressionEvents = []) {
|
|
845
1091
|
const repoId = ensureRepository(db, pr.repo);
|
|
846
1092
|
const author = pr.user?.login ?? "unknown";
|
|
847
|
-
const
|
|
1093
|
+
const labels2 = (pr.labels ?? []).map((label) => typeof label === "string" ? label : label.name).filter(Boolean);
|
|
848
1094
|
const titleText = redactedHistoricalText(pr.title);
|
|
849
1095
|
const bodyText = redactedHistoricalText(pr.body ?? "");
|
|
850
1096
|
const bodySanitized = sanitizeHistoricalText(pr.body ?? "");
|
|
@@ -871,7 +1117,7 @@ function upsertPullRequest(db, pr, wisdomUnits) {
|
|
|
871
1117
|
bodyText,
|
|
872
1118
|
bodySanitized,
|
|
873
1119
|
author,
|
|
874
|
-
JSON.stringify(
|
|
1120
|
+
JSON.stringify(labels2),
|
|
875
1121
|
pr.created_at,
|
|
876
1122
|
pr.merged_at ?? null,
|
|
877
1123
|
pr.updated_at ?? null
|
|
@@ -891,6 +1137,7 @@ function upsertPullRequest(db, pr, wisdomUnits) {
|
|
|
891
1137
|
file.patch ? sanitizeHistoricalText(file.patch) : null
|
|
892
1138
|
);
|
|
893
1139
|
}
|
|
1140
|
+
insertPrCochangeTestLinks(db, repoId, pr.files.map((file) => file.filename));
|
|
894
1141
|
const insertComment = db.prepare(
|
|
895
1142
|
`INSERT INTO pr_comments
|
|
896
1143
|
(pr_id, source_type, author, body_text, sanitized_text, file_path, created_at, is_reviewer)
|
|
@@ -974,21 +1221,56 @@ function upsertPullRequest(db, pr, wisdomUnits) {
|
|
|
974
1221
|
unit.category
|
|
975
1222
|
);
|
|
976
1223
|
}
|
|
1224
|
+
const insertRegression = db.prepare(
|
|
1225
|
+
`INSERT INTO regression_events
|
|
1226
|
+
(id, repo_id, pr_id, repo, pr_number, pr_url, summary_sanitized, file_paths_json,
|
|
1227
|
+
symbols_json, test_paths_json, authors_json, labels_json, signals_json, created_at,
|
|
1228
|
+
merged_at, confidence)
|
|
1229
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`
|
|
1230
|
+
);
|
|
1231
|
+
for (const event of regressionEvents) {
|
|
1232
|
+
insertRegression.run(
|
|
1233
|
+
event.id,
|
|
1234
|
+
repoId,
|
|
1235
|
+
prRow.id,
|
|
1236
|
+
event.repo,
|
|
1237
|
+
event.prNumber,
|
|
1238
|
+
event.prUrl,
|
|
1239
|
+
event.summary,
|
|
1240
|
+
JSON.stringify(event.filePaths),
|
|
1241
|
+
JSON.stringify(event.symbols),
|
|
1242
|
+
JSON.stringify(event.testPaths),
|
|
1243
|
+
JSON.stringify(event.authors),
|
|
1244
|
+
JSON.stringify(event.labels),
|
|
1245
|
+
JSON.stringify(event.signals),
|
|
1246
|
+
event.createdAt,
|
|
1247
|
+
event.mergedAt ?? null,
|
|
1248
|
+
event.confidence
|
|
1249
|
+
);
|
|
1250
|
+
}
|
|
977
1251
|
});
|
|
978
1252
|
transaction();
|
|
979
1253
|
const comments = (pr.reviews?.length ?? 0) + (pr.reviewComments?.length ?? 0) + (pr.issueComments?.length ?? 0);
|
|
980
|
-
return {
|
|
1254
|
+
return {
|
|
1255
|
+
files: pr.files.length,
|
|
1256
|
+
comments,
|
|
1257
|
+
wisdom: wisdomUnits.length,
|
|
1258
|
+
regressions: regressionEvents.length
|
|
1259
|
+
};
|
|
981
1260
|
}
|
|
982
1261
|
function replaceCodeIndex(db, repo, codeFiles, codeChunks, skippedFiles, cwd) {
|
|
983
1262
|
initializeSchema(db);
|
|
984
1263
|
const repoId = ensureRepository(db, repo);
|
|
985
1264
|
const now = (/* @__PURE__ */ new Date()).toISOString();
|
|
1265
|
+
const testAwareness = inferTestAwareness(repo, codeFiles, codeChunks);
|
|
986
1266
|
const transaction = db.transaction(() => {
|
|
987
1267
|
const existingChunks = db.prepare("SELECT id FROM code_chunks WHERE repo_id = ?").all(repoId);
|
|
988
1268
|
const deleteFts = db.prepare("DELETE FROM code_chunks_fts WHERE chunkId = ?");
|
|
989
1269
|
for (const row of existingChunks) deleteFts.run(row.id);
|
|
990
1270
|
db.prepare("DELETE FROM code_chunks WHERE repo_id = ?").run(repoId);
|
|
991
1271
|
db.prepare("DELETE FROM code_files WHERE repo_id = ?").run(repoId);
|
|
1272
|
+
db.prepare("DELETE FROM test_links WHERE repo_id = ? AND reason != 'PR co-change'").run(repoId);
|
|
1273
|
+
db.prepare("DELETE FROM test_files WHERE repo_id = ?").run(repoId);
|
|
992
1274
|
const insertFile = db.prepare(
|
|
993
1275
|
`INSERT INTO code_files
|
|
994
1276
|
(repo_id, path, language, size_bytes, content_hash, updated_at)
|
|
@@ -1042,6 +1324,7 @@ function replaceCodeIndex(db, repo, codeFiles, codeChunks, skippedFiles, cwd) {
|
|
|
1042
1324
|
chunk.language ?? ""
|
|
1043
1325
|
);
|
|
1044
1326
|
}
|
|
1327
|
+
insertTestAwareness(db, repoId, testAwareness.testFiles, testAwareness.testLinks);
|
|
1045
1328
|
db.prepare(
|
|
1046
1329
|
`INSERT INTO code_index_state (repo, last_indexed_at, indexed_files, code_chunks, skipped_files)
|
|
1047
1330
|
VALUES (?, ?, ?, ?, ?)
|
|
@@ -1056,10 +1339,72 @@ function replaceCodeIndex(db, repo, codeFiles, codeChunks, skippedFiles, cwd) {
|
|
|
1056
1339
|
return {
|
|
1057
1340
|
indexedFiles: codeFiles.length,
|
|
1058
1341
|
codeChunksCreated: codeChunks.length,
|
|
1342
|
+
testFilesIndexed: testAwareness.testFiles.length,
|
|
1343
|
+
testLinksCreated: testAwareness.testLinks.length,
|
|
1059
1344
|
skippedFiles,
|
|
1060
1345
|
databasePath: defaultDatabasePath(cwd)
|
|
1061
1346
|
};
|
|
1062
1347
|
}
|
|
1348
|
+
function insertPrCochangeTestLinks(db, repoId, filePaths) {
|
|
1349
|
+
const testPaths = filePaths.filter(isTestFilePath);
|
|
1350
|
+
const sourcePaths = filePaths.filter((filePath) => !isTestFilePath(filePath));
|
|
1351
|
+
if (testPaths.length === 0 || sourcePaths.length === 0) return;
|
|
1352
|
+
const insert = db.prepare(
|
|
1353
|
+
`INSERT INTO test_links (repo_id, source_path, test_path, reason, strength)
|
|
1354
|
+
VALUES (?, ?, ?, 'PR co-change', 0.75)
|
|
1355
|
+
ON CONFLICT(repo_id, source_path, test_path, reason) DO UPDATE SET strength = excluded.strength`
|
|
1356
|
+
);
|
|
1357
|
+
for (const sourcePath of sourcePaths) {
|
|
1358
|
+
for (const testPath of testPaths) insert.run(repoId, sourcePath, testPath);
|
|
1359
|
+
}
|
|
1360
|
+
}
|
|
1361
|
+
function insertTestAwareness(db, repoId, testFiles, testLinks) {
|
|
1362
|
+
const insertTestFile = db.prepare(
|
|
1363
|
+
`INSERT INTO test_files
|
|
1364
|
+
(repo_id, path, language, size_bytes, content_hash, updated_at)
|
|
1365
|
+
VALUES (?, ?, ?, ?, ?, ?)`
|
|
1366
|
+
);
|
|
1367
|
+
for (const file of testFiles) {
|
|
1368
|
+
insertTestFile.run(
|
|
1369
|
+
repoId,
|
|
1370
|
+
file.path,
|
|
1371
|
+
file.language ?? null,
|
|
1372
|
+
file.sizeBytes,
|
|
1373
|
+
file.contentHash,
|
|
1374
|
+
file.updatedAt
|
|
1375
|
+
);
|
|
1376
|
+
}
|
|
1377
|
+
const insertTestLink = db.prepare(
|
|
1378
|
+
`INSERT INTO test_links (repo_id, source_path, test_path, reason, strength)
|
|
1379
|
+
VALUES (?, ?, ?, ?, ?)`
|
|
1380
|
+
);
|
|
1381
|
+
for (const link of testLinks) {
|
|
1382
|
+
insertTestLink.run(repoId, link.sourcePath, link.testPath, link.reason, link.strength);
|
|
1383
|
+
}
|
|
1384
|
+
}
|
|
1385
|
+
function recordIndexRun(db, run) {
|
|
1386
|
+
initializeSchema(db);
|
|
1387
|
+
db.prepare(
|
|
1388
|
+
`INSERT INTO index_runs
|
|
1389
|
+
(command, repo, started_at, finished_at, history_coverage, history_limit, prs_fetched,
|
|
1390
|
+
prs_skipped, comments_indexed, code_files_indexed, test_files_indexed, failures_json, status)
|
|
1391
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`
|
|
1392
|
+
).run(
|
|
1393
|
+
run.command,
|
|
1394
|
+
run.repo ?? null,
|
|
1395
|
+
run.startedAt,
|
|
1396
|
+
run.finishedAt ?? (/* @__PURE__ */ new Date()).toISOString(),
|
|
1397
|
+
run.historyCoverage ?? null,
|
|
1398
|
+
run.historyLimit ?? null,
|
|
1399
|
+
run.prsFetched ?? null,
|
|
1400
|
+
run.prsSkipped ?? null,
|
|
1401
|
+
run.commentsIndexed ?? null,
|
|
1402
|
+
run.codeFilesIndexed ?? null,
|
|
1403
|
+
run.testFilesIndexed ?? null,
|
|
1404
|
+
JSON.stringify(run.failures ?? []),
|
|
1405
|
+
run.status
|
|
1406
|
+
);
|
|
1407
|
+
}
|
|
1063
1408
|
function getIndexStatus(cwd, githubTokenConfigured = Boolean(resolveGitHubToken({ cwd }).token), databasePath = defaultDatabasePath(cwd)) {
|
|
1064
1409
|
if (!fs3.existsSync(databasePath)) {
|
|
1065
1410
|
const rules = countValidTeamRules(cwd);
|
|
@@ -1071,6 +1416,9 @@ function getIndexStatus(cwd, githubTokenConfigured = Boolean(resolveGitHubToken(
|
|
|
1071
1416
|
wisdomUnitCount: 0,
|
|
1072
1417
|
codeFileCount: 0,
|
|
1073
1418
|
codeChunkCount: 0,
|
|
1419
|
+
testFileCount: 0,
|
|
1420
|
+
testLinkCount: 0,
|
|
1421
|
+
regressionEventCount: 0,
|
|
1074
1422
|
historyCoverage: "unknown",
|
|
1075
1423
|
staleEvidenceCount: 0,
|
|
1076
1424
|
teamRuleCount: rules.count,
|
|
@@ -1092,6 +1440,9 @@ function getIndexStatus(cwd, githubTokenConfigured = Boolean(resolveGitHubToken(
|
|
|
1092
1440
|
wisdomUnitCount: 0,
|
|
1093
1441
|
codeFileCount: 0,
|
|
1094
1442
|
codeChunkCount: 0,
|
|
1443
|
+
testFileCount: 0,
|
|
1444
|
+
testLinkCount: 0,
|
|
1445
|
+
regressionEventCount: 0,
|
|
1095
1446
|
historyCoverage: "unknown",
|
|
1096
1447
|
staleEvidenceCount: 0,
|
|
1097
1448
|
teamRuleCount: rules2.count,
|
|
@@ -1108,6 +1459,13 @@ function getIndexStatus(cwd, githubTokenConfigured = Boolean(resolveGitHubToken(
|
|
|
1108
1459
|
const codeIndexRow = db.prepare("SELECT last_indexed_at FROM code_index_state ORDER BY last_indexed_at DESC LIMIT 1").get();
|
|
1109
1460
|
const wisdomUnitCount = count("wisdom_units");
|
|
1110
1461
|
const codeChunkCount = count("code_chunks");
|
|
1462
|
+
const lastSuccessfulRun = db.prepare(
|
|
1463
|
+
"SELECT finished_at, failures_json FROM index_runs WHERE status = 'success' ORDER BY finished_at DESC LIMIT 1"
|
|
1464
|
+
).get();
|
|
1465
|
+
const lastFailedRun = db.prepare(
|
|
1466
|
+
"SELECT finished_at, failures_json FROM index_runs WHERE status = 'failed' ORDER BY finished_at DESC LIMIT 1"
|
|
1467
|
+
).get();
|
|
1468
|
+
const staleCodeIndex = isCodeIndexStale(codeIndexRow?.last_indexed_at ?? void 0);
|
|
1111
1469
|
const rules = countValidTeamRules(cwd);
|
|
1112
1470
|
return {
|
|
1113
1471
|
repo: repoRow?.full_name,
|
|
@@ -1118,6 +1476,9 @@ function getIndexStatus(cwd, githubTokenConfigured = Boolean(resolveGitHubToken(
|
|
|
1118
1476
|
wisdomUnitCount,
|
|
1119
1477
|
codeFileCount: count("code_files"),
|
|
1120
1478
|
codeChunkCount,
|
|
1479
|
+
testFileCount: count("test_files"),
|
|
1480
|
+
testLinkCount: count("test_links"),
|
|
1481
|
+
regressionEventCount: count("regression_events"),
|
|
1121
1482
|
historyCoverage: syncRow?.history_coverage ?? "unknown",
|
|
1122
1483
|
historyLimit: syncRow?.history_limit ?? void 0,
|
|
1123
1484
|
staleEvidenceCount: countStaleEvidence(db),
|
|
@@ -1125,6 +1486,16 @@ function getIndexStatus(cwd, githubTokenConfigured = Boolean(resolveGitHubToken(
|
|
|
1125
1486
|
lastSyncTime: syncRow?.last_sync_at ?? void 0,
|
|
1126
1487
|
lastCodeIndexTime: codeIndexRow?.last_indexed_at ?? void 0,
|
|
1127
1488
|
lastRuleIndexTime: rules.lastRuleIndexTime,
|
|
1489
|
+
lastSuccessfulRun: lastSuccessfulRun?.finished_at ?? void 0,
|
|
1490
|
+
lastFailedRun: lastFailedRun?.finished_at ?? void 0,
|
|
1491
|
+
staleCodeIndex,
|
|
1492
|
+
suggestedNextCommand: suggestedNextCommand({
|
|
1493
|
+
prCount: count("pull_requests"),
|
|
1494
|
+
wisdomUnitCount,
|
|
1495
|
+
codeChunkCount,
|
|
1496
|
+
staleCodeIndex,
|
|
1497
|
+
historyCoverage: syncRow?.history_coverage ?? "unknown"
|
|
1498
|
+
}),
|
|
1128
1499
|
githubTokenConfigured,
|
|
1129
1500
|
health: wisdomUnitCount > 0 || codeChunkCount > 0 ? "ok" : "empty_index"
|
|
1130
1501
|
};
|
|
@@ -1132,6 +1503,18 @@ function getIndexStatus(cwd, githubTokenConfigured = Boolean(resolveGitHubToken(
|
|
|
1132
1503
|
db.close();
|
|
1133
1504
|
}
|
|
1134
1505
|
}
|
|
1506
|
+
function isCodeIndexStale(lastIndexedAt) {
|
|
1507
|
+
if (!lastIndexedAt) return true;
|
|
1508
|
+
const timestamp = Date.parse(lastIndexedAt);
|
|
1509
|
+
if (Number.isNaN(timestamp)) return true;
|
|
1510
|
+
return Date.now() - timestamp > 1e3 * 60 * 60 * 24 * 7;
|
|
1511
|
+
}
|
|
1512
|
+
function suggestedNextCommand(input) {
|
|
1513
|
+
if (input.prCount === 0 && input.wisdomUnitCount === 0) return "anchor index";
|
|
1514
|
+
if (input.codeChunkCount === 0 || input.staleCodeIndex) return "anchor index-code";
|
|
1515
|
+
if (input.historyCoverage !== "all") return "anchor index-all";
|
|
1516
|
+
return void 0;
|
|
1517
|
+
}
|
|
1135
1518
|
function countStaleEvidence(db) {
|
|
1136
1519
|
const codeFiles = new Set(
|
|
1137
1520
|
db.prepare("SELECT path FROM code_files").all().map(
|
|
@@ -1186,7 +1569,7 @@ function chunkHistoricalText(text, maxChunkLength = 700) {
|
|
|
1186
1569
|
|
|
1187
1570
|
// src/indexer/code-chunker.ts
|
|
1188
1571
|
import crypto from "crypto";
|
|
1189
|
-
import
|
|
1572
|
+
import path5 from "path";
|
|
1190
1573
|
var DEFAULT_CHUNK_LINES = 80;
|
|
1191
1574
|
var DEFAULT_OVERLAP_LINES = 8;
|
|
1192
1575
|
var FUNCTION_CALL_STOP_WORDS = /* @__PURE__ */ new Set([
|
|
@@ -1219,7 +1602,7 @@ function extractCodeSymbols(text, filePath) {
|
|
|
1219
1602
|
const candidate = match[1] ?? "";
|
|
1220
1603
|
if (!FUNCTION_CALL_STOP_WORDS.has(candidate)) symbols.push(candidate);
|
|
1221
1604
|
}
|
|
1222
|
-
const basename =
|
|
1605
|
+
const basename = path5.basename(filePath).replace(/\.[^.]+$/, "");
|
|
1223
1606
|
if (/^[A-Za-z_$][\w$-]*$/.test(basename)) symbols.push(basename);
|
|
1224
1607
|
return uniqueStrings(symbols).slice(0, 40);
|
|
1225
1608
|
}
|
|
@@ -1259,7 +1642,7 @@ function chunkCodeFile(file, options = {}) {
|
|
|
1259
1642
|
import { execFileSync as execFileSync3 } from "child_process";
|
|
1260
1643
|
import crypto2 from "crypto";
|
|
1261
1644
|
import fs4 from "fs";
|
|
1262
|
-
import
|
|
1645
|
+
import path6 from "path";
|
|
1263
1646
|
var DEFAULT_MAX_CODE_FILE_BYTES = 512 * 1024;
|
|
1264
1647
|
var HARD_EXCLUDED_SEGMENTS = /* @__PURE__ */ new Set([
|
|
1265
1648
|
".git",
|
|
@@ -1307,7 +1690,7 @@ function isHardExcludedCodePath(filePath) {
|
|
|
1307
1690
|
const normalized = normalizeGitPath(filePath);
|
|
1308
1691
|
const segments = normalized.split("/");
|
|
1309
1692
|
if (segments.some((segment) => HARD_EXCLUDED_SEGMENTS.has(segment))) return true;
|
|
1310
|
-
const basename =
|
|
1693
|
+
const basename = path6.posix.basename(normalized).toLowerCase();
|
|
1311
1694
|
if ([".netrc", ".npmrc", ".pypirc", ".yarnrc"].includes(basename)) return true;
|
|
1312
1695
|
if (basename === ".env" || basename.startsWith(".env.")) return true;
|
|
1313
1696
|
if (basename === "id_rsa" || basename === "id_rsa.pub" || basename === "id_dsa" || basename === "id_ecdsa" || basename === "id_ed25519") {
|
|
@@ -1317,7 +1700,7 @@ function isHardExcludedCodePath(filePath) {
|
|
|
1317
1700
|
return false;
|
|
1318
1701
|
}
|
|
1319
1702
|
function languageForPath(filePath) {
|
|
1320
|
-
const extension =
|
|
1703
|
+
const extension = path6.extname(filePath).toLowerCase();
|
|
1321
1704
|
return LANGUAGE_BY_EXTENSION[extension];
|
|
1322
1705
|
}
|
|
1323
1706
|
function isProbablyBinary(buffer) {
|
|
@@ -1340,7 +1723,7 @@ function discoverGitFiles(cwd) {
|
|
|
1340
1723
|
}
|
|
1341
1724
|
function discoverCodeFiles(cwd, repo, options = {}) {
|
|
1342
1725
|
const maxFileBytes = options.maxFileBytes ?? DEFAULT_MAX_CODE_FILE_BYTES;
|
|
1343
|
-
const rootPath =
|
|
1726
|
+
const rootPath = path6.resolve(cwd);
|
|
1344
1727
|
const files = [];
|
|
1345
1728
|
let skippedFiles = 0;
|
|
1346
1729
|
for (const filePath of discoverGitFiles(cwd)) {
|
|
@@ -1348,9 +1731,9 @@ function discoverCodeFiles(cwd, repo, options = {}) {
|
|
|
1348
1731
|
skippedFiles += 1;
|
|
1349
1732
|
continue;
|
|
1350
1733
|
}
|
|
1351
|
-
const absolutePath =
|
|
1352
|
-
const relativeToRoot =
|
|
1353
|
-
if (relativeToRoot.startsWith("..") ||
|
|
1734
|
+
const absolutePath = path6.resolve(cwd, filePath);
|
|
1735
|
+
const relativeToRoot = path6.relative(rootPath, absolutePath);
|
|
1736
|
+
if (relativeToRoot.startsWith("..") || path6.isAbsolute(relativeToRoot)) {
|
|
1354
1737
|
skippedFiles += 1;
|
|
1355
1738
|
continue;
|
|
1356
1739
|
}
|
|
@@ -1430,14 +1813,19 @@ function emptyCodeIndexSummary(cwd) {
|
|
|
1430
1813
|
return {
|
|
1431
1814
|
indexedFiles: 0,
|
|
1432
1815
|
codeChunksCreated: 0,
|
|
1816
|
+
testFilesIndexed: 0,
|
|
1817
|
+
testLinksCreated: 0,
|
|
1433
1818
|
skippedFiles: 0,
|
|
1434
1819
|
databasePath: defaultDatabasePath(cwd)
|
|
1435
1820
|
};
|
|
1436
1821
|
}
|
|
1437
1822
|
|
|
1823
|
+
// src/indexer/regression-extractor.ts
|
|
1824
|
+
import crypto4 from "crypto";
|
|
1825
|
+
|
|
1438
1826
|
// src/indexer/wisdom-extractor.ts
|
|
1439
1827
|
import crypto3 from "crypto";
|
|
1440
|
-
import
|
|
1828
|
+
import path7 from "path";
|
|
1441
1829
|
var CATEGORY_KEYWORDS = [
|
|
1442
1830
|
["security_note", /\b(security|secret|token|bearer|oauth|credential|xss|csrf|injection|sanitize|redact)\b/i],
|
|
1443
1831
|
["architecture_decision", /\b(architecture decision|architectural|we intentionally|design decision)\b/i],
|
|
@@ -1469,7 +1857,7 @@ function extractSymbols(text, filePaths) {
|
|
|
1469
1857
|
}
|
|
1470
1858
|
}
|
|
1471
1859
|
for (const filePath of filePaths) {
|
|
1472
|
-
const basename =
|
|
1860
|
+
const basename = path7.basename(filePath).replace(/\.[^.]+$/, "");
|
|
1473
1861
|
if (/^[A-Za-z_$][\w$]*$/.test(basename)) symbols.push(basename);
|
|
1474
1862
|
}
|
|
1475
1863
|
return uniqueStrings(symbols).slice(0, 30);
|
|
@@ -1618,6 +2006,76 @@ ${filePaths.join("\n")}`, filePaths);
|
|
|
1618
2006
|
return units;
|
|
1619
2007
|
}
|
|
1620
2008
|
|
|
2009
|
+
// src/indexer/regression-extractor.ts
|
|
2010
|
+
var REGRESSION_SIGNALS = [
|
|
2011
|
+
["regression", /\bregression\b/i],
|
|
2012
|
+
["revert", /\b(revert|reverted)\b/i],
|
|
2013
|
+
["rollback", /\brollback\b/i],
|
|
2014
|
+
["hotfix", /\bhotfix\b/i],
|
|
2015
|
+
["incident", /\bincident\b/i],
|
|
2016
|
+
["root cause", /\broot cause\b/i],
|
|
2017
|
+
["this broke", /\b(this broke|broke)\b/i],
|
|
2018
|
+
["fixed by", /\bfixed by\b/i]
|
|
2019
|
+
];
|
|
2020
|
+
function labels(pr) {
|
|
2021
|
+
return (pr.labels ?? []).map((label) => typeof label === "string" ? label : label.name).filter((label) => Boolean(label));
|
|
2022
|
+
}
|
|
2023
|
+
function sourceTexts(pr) {
|
|
2024
|
+
return [
|
|
2025
|
+
pr.title,
|
|
2026
|
+
pr.body ?? "",
|
|
2027
|
+
...labels(pr),
|
|
2028
|
+
...(pr.reviews ?? []).map((item) => item.body ?? ""),
|
|
2029
|
+
...(pr.reviewComments ?? []).map((item) => item.body ?? ""),
|
|
2030
|
+
...(pr.issueComments ?? []).map((item) => item.body ?? ""),
|
|
2031
|
+
...(pr.commits ?? []).map((item) => item.commit?.message ?? "")
|
|
2032
|
+
].filter((text) => text.trim());
|
|
2033
|
+
}
|
|
2034
|
+
function stableRegressionId(pr, summary, signals) {
|
|
2035
|
+
const hash = crypto4.createHash("sha256").update([pr.repo, pr.number, canonicalizeText(summary), signals.join("|")].join("\0")).digest("hex").slice(0, 24);
|
|
2036
|
+
return `re_${hash}`;
|
|
2037
|
+
}
|
|
2038
|
+
function extractRegressionEvents(pr) {
|
|
2039
|
+
const allText = sourceTexts(pr).join("\n");
|
|
2040
|
+
const signals = REGRESSION_SIGNALS.filter(([, pattern]) => pattern.test(allText)).map(
|
|
2041
|
+
([signal]) => signal
|
|
2042
|
+
);
|
|
2043
|
+
if (signals.length === 0) return [];
|
|
2044
|
+
const files = uniqueStrings(pr.files.map((file) => file.filename));
|
|
2045
|
+
const testPaths = files.filter(isTestFilePath);
|
|
2046
|
+
const sanitizedSummary = sanitizeHistoricalText(
|
|
2047
|
+
clipSentence(`${pr.title}. ${pr.body ?? ""}`, 420)
|
|
2048
|
+
);
|
|
2049
|
+
if (!sanitizedSummary) return [];
|
|
2050
|
+
const reviewerCount = (pr.reviews ?? []).length + (pr.reviewComments ?? []).length;
|
|
2051
|
+
const confidence = Math.min(
|
|
2052
|
+
1,
|
|
2053
|
+
Number((0.58 + signals.length * 0.06 + (reviewerCount > 0 ? 0.08 : 0)).toFixed(2))
|
|
2054
|
+
);
|
|
2055
|
+
const authors = uniqueStrings([
|
|
2056
|
+
pr.user?.login ?? "unknown",
|
|
2057
|
+
...(pr.reviewComments ?? []).map((comment) => comment.user?.login ?? "unknown")
|
|
2058
|
+
]);
|
|
2059
|
+
const event = {
|
|
2060
|
+
id: stableRegressionId(pr, sanitizedSummary, signals),
|
|
2061
|
+
repo: pr.repo,
|
|
2062
|
+
prNumber: pr.number,
|
|
2063
|
+
prUrl: pr.html_url,
|
|
2064
|
+
summary: sanitizedSummary,
|
|
2065
|
+
filePaths: files,
|
|
2066
|
+
symbols: extractSymbols(`${sanitizedSummary}
|
|
2067
|
+
${files.join("\n")}`, files),
|
|
2068
|
+
testPaths,
|
|
2069
|
+
authors,
|
|
2070
|
+
labels: labels(pr),
|
|
2071
|
+
signals: uniqueStrings(signals),
|
|
2072
|
+
createdAt: pr.created_at,
|
|
2073
|
+
mergedAt: pr.merged_at ?? void 0,
|
|
2074
|
+
confidence
|
|
2075
|
+
};
|
|
2076
|
+
return [event];
|
|
2077
|
+
}
|
|
2078
|
+
|
|
1621
2079
|
// src/indexer/normalize-pr.ts
|
|
1622
2080
|
function normalizePullRequest(input) {
|
|
1623
2081
|
return {
|
|
@@ -1640,6 +2098,7 @@ function indexPullRequests(db, pullRequests, options) {
|
|
|
1640
2098
|
let indexedFiles = 0;
|
|
1641
2099
|
let indexedComments = 0;
|
|
1642
2100
|
let wisdomUnitsCreated = 0;
|
|
2101
|
+
let regressionEventsCreated = 0;
|
|
1643
2102
|
let skippedItems = 0;
|
|
1644
2103
|
let lastPr;
|
|
1645
2104
|
for (const [index, rawPr] of pullRequests.entries()) {
|
|
@@ -1656,10 +2115,12 @@ function indexPullRequests(db, pullRequests, options) {
|
|
|
1656
2115
|
continue;
|
|
1657
2116
|
}
|
|
1658
2117
|
const wisdomUnits = extractWisdomUnits(pr);
|
|
1659
|
-
const
|
|
2118
|
+
const regressionEvents = extractRegressionEvents(pr);
|
|
2119
|
+
const result = upsertPullRequest(db, pr, wisdomUnits, regressionEvents);
|
|
1660
2120
|
indexedFiles += result.files;
|
|
1661
2121
|
indexedComments += result.comments;
|
|
1662
2122
|
wisdomUnitsCreated += result.wisdom;
|
|
2123
|
+
regressionEventsCreated += result.regressions;
|
|
1663
2124
|
lastPr = pr.number;
|
|
1664
2125
|
options.onProgress?.({
|
|
1665
2126
|
stage: "indexed_pull_request",
|
|
@@ -1682,6 +2143,7 @@ function indexPullRequests(db, pullRequests, options) {
|
|
|
1682
2143
|
indexedFiles,
|
|
1683
2144
|
indexedComments,
|
|
1684
2145
|
wisdomUnitsCreated,
|
|
2146
|
+
regressionEventsCreated,
|
|
1685
2147
|
skippedItems,
|
|
1686
2148
|
databasePath: defaultDatabasePath(options.cwd)
|
|
1687
2149
|
};
|
|
@@ -1693,7 +2155,7 @@ function shouldSyncSince(db, repo, fallbackSince) {
|
|
|
1693
2155
|
}
|
|
1694
2156
|
|
|
1695
2157
|
// src/retrieval/query-builder.ts
|
|
1696
|
-
import
|
|
2158
|
+
import path8 from "path";
|
|
1697
2159
|
var CATEGORY_HINTS = [
|
|
1698
2160
|
"security",
|
|
1699
2161
|
"regression",
|
|
@@ -1709,7 +2171,29 @@ function ftsToken(token) {
|
|
|
1709
2171
|
if (clean.length < 3) return void 0;
|
|
1710
2172
|
return `${clean}*`;
|
|
1711
2173
|
}
|
|
1712
|
-
function
|
|
2174
|
+
function testFilenameHints(filePath) {
|
|
2175
|
+
const parsed = path8.parse(filePath);
|
|
2176
|
+
const base = parsed.name.replace(/\.(test|spec)$/i, "");
|
|
2177
|
+
return [`${base}.test${parsed.ext}`, `${base}.spec${parsed.ext}`];
|
|
2178
|
+
}
|
|
2179
|
+
function diffHunkTerms(diff) {
|
|
2180
|
+
if (!diff) return [];
|
|
2181
|
+
const terms = [];
|
|
2182
|
+
const truncated = truncateText(diff, 5e3) ?? "";
|
|
2183
|
+
for (const line of truncated.split("\n")) {
|
|
2184
|
+
if (line.startsWith("diff --git")) {
|
|
2185
|
+
terms.push(...line.split(/[\\/]/).slice(-4));
|
|
2186
|
+
}
|
|
2187
|
+
if (line.startsWith("@@")) {
|
|
2188
|
+
terms.push(line.replace(/^@@[^@]*@@/, ""));
|
|
2189
|
+
}
|
|
2190
|
+
if (/^[+-]\s*(?:export\s+)?(?:class|function|const|let|var|type|interface)\s+/.test(line)) {
|
|
2191
|
+
terms.push(line);
|
|
2192
|
+
}
|
|
2193
|
+
}
|
|
2194
|
+
return terms;
|
|
2195
|
+
}
|
|
2196
|
+
function buildQueryTerms(input) {
|
|
1713
2197
|
const files = input.files ?? [];
|
|
1714
2198
|
const symbols = "symbols" in input ? input.symbols ?? [] : [];
|
|
1715
2199
|
const categories = "categories" in input ? input.categories ?? [] : [];
|
|
@@ -1718,18 +2202,24 @@ function buildFtsQuery(input) {
|
|
|
1718
2202
|
const baseText = "task" in input ? input.task : input.query;
|
|
1719
2203
|
const fileTerms = files.flatMap((file) => [
|
|
1720
2204
|
file,
|
|
1721
|
-
|
|
1722
|
-
...
|
|
2205
|
+
path8.basename(file),
|
|
2206
|
+
...testFilenameHints(file),
|
|
2207
|
+
...path8.dirname(file).split(/[\\/]/).filter(Boolean)
|
|
1723
2208
|
]);
|
|
1724
|
-
|
|
2209
|
+
return uniqueStrings([
|
|
1725
2210
|
...tokenizeSearchText(baseText, 24),
|
|
1726
2211
|
...tokenizeSearchText(fileTerms.join(" "), 24),
|
|
1727
2212
|
...tokenizeSearchText(symbols.join(" "), 24),
|
|
1728
2213
|
...tokenizeSearchText(categories.join(" "), 12),
|
|
1729
2214
|
...tokenizeSearchText(diff ?? "", 18),
|
|
1730
2215
|
...tokenizeSearchText(currentCode ?? "", 18),
|
|
2216
|
+
...tokenizeSearchText(diffHunkTerms(diff).join(" "), 18),
|
|
2217
|
+
...CATEGORY_HINTS,
|
|
1731
2218
|
...CATEGORY_HINTS.filter((hint) => baseText.toLowerCase().includes(hint))
|
|
1732
|
-
]).
|
|
2219
|
+
]).slice(0, 80);
|
|
2220
|
+
}
|
|
2221
|
+
function buildFtsQuery(input) {
|
|
2222
|
+
const tokens = buildQueryTerms(input).map(ftsToken).filter((token) => Boolean(token)).slice(0, 48);
|
|
1733
2223
|
return tokens.join(" OR ");
|
|
1734
2224
|
}
|
|
1735
2225
|
function clampMaxResults(value, defaultValue) {
|
|
@@ -1738,7 +2228,7 @@ function clampMaxResults(value, defaultValue) {
|
|
|
1738
2228
|
}
|
|
1739
2229
|
|
|
1740
2230
|
// src/retrieval/ranker.ts
|
|
1741
|
-
import
|
|
2231
|
+
import path9 from "path";
|
|
1742
2232
|
function parseJsonArray2(value) {
|
|
1743
2233
|
try {
|
|
1744
2234
|
const parsed = JSON.parse(value);
|
|
@@ -1785,11 +2275,11 @@ function filePathMatch(unitPaths, queryFiles) {
|
|
|
1785
2275
|
if (queryFiles.length === 0 || unitPaths.length === 0) return 0;
|
|
1786
2276
|
let best = 0;
|
|
1787
2277
|
for (const queryFile of queryFiles) {
|
|
1788
|
-
const queryBase =
|
|
1789
|
-
const queryDir =
|
|
2278
|
+
const queryBase = path9.basename(queryFile).toLowerCase();
|
|
2279
|
+
const queryDir = path9.dirname(queryFile).toLowerCase();
|
|
1790
2280
|
for (const unitPath of unitPaths) {
|
|
1791
|
-
const unitBase =
|
|
1792
|
-
const unitDir =
|
|
2281
|
+
const unitBase = path9.basename(unitPath).toLowerCase();
|
|
2282
|
+
const unitDir = path9.dirname(unitPath).toLowerCase();
|
|
1793
2283
|
const q = queryFile.toLowerCase();
|
|
1794
2284
|
const u = unitPath.toLowerCase();
|
|
1795
2285
|
if (q === u) best = Math.max(best, 1);
|
|
@@ -1813,7 +2303,7 @@ function symbolMatch2(unit, querySymbols) {
|
|
|
1813
2303
|
const lower = symbol.toLowerCase();
|
|
1814
2304
|
if (unitSymbols.includes(lower)) best = Math.max(best, 1);
|
|
1815
2305
|
else if (text.includes(`\`${lower}\``)) best = Math.max(best, 1);
|
|
1816
|
-
else if (new RegExp(`\\b${
|
|
2306
|
+
else if (new RegExp(`\\b${escapeRegExp2(lower)}\\b`, "i").test(text))
|
|
1817
2307
|
best = Math.max(best, 0.66);
|
|
1818
2308
|
else if (unitSymbols.some((candidate) => candidate.includes(lower) || lower.includes(candidate))) {
|
|
1819
2309
|
best = Math.max(best, 0.35);
|
|
@@ -1850,6 +2340,19 @@ function freshnessMultiplier(status) {
|
|
|
1850
2340
|
if (status === "possibly_stale") return 0.85;
|
|
1851
2341
|
return 0.55;
|
|
1852
2342
|
}
|
|
2343
|
+
function matchReasons2(parts, unit) {
|
|
2344
|
+
const reasons = [];
|
|
2345
|
+
if (parts.filePathMatch >= 0.9) reasons.push("exact file path match");
|
|
2346
|
+
else if (parts.filePathMatch >= 0.45) reasons.push("related file path match");
|
|
2347
|
+
if (parts.symbolMatch >= 0.9) reasons.push("exact symbol match");
|
|
2348
|
+
else if (parts.symbolMatch >= 0.45) reasons.push("symbol mentioned in evidence");
|
|
2349
|
+
if (parts.textMatch >= 0.45) reasons.push("text matched task or diff terms");
|
|
2350
|
+
if (parts.reviewerOrAuthorSignal >= 0.85) reasons.push("reviewer evidence");
|
|
2351
|
+
if (unit.category === "security_note" || unit.category === "bug_regression") {
|
|
2352
|
+
reasons.push(`${unit.category.replace(/_/g, " ")} priority`);
|
|
2353
|
+
}
|
|
2354
|
+
return reasons.slice(0, 5);
|
|
2355
|
+
}
|
|
1853
2356
|
function scoreUnit(unit, input, duplicateCount, repeatedEvidenceCount, freshness) {
|
|
1854
2357
|
const queryFiles = input.files ?? [];
|
|
1855
2358
|
const querySymbols = "symbols" in input ? input.symbols ?? [] : [];
|
|
@@ -1876,10 +2379,12 @@ function scoreUnit(unit, input, duplicateCount, repeatedEvidenceCount, freshness
|
|
|
1876
2379
|
confidenceReasons: confidenceReasonsFor(unit, repeatedEvidenceCount),
|
|
1877
2380
|
freshnessStatus: freshness.status,
|
|
1878
2381
|
freshnessReason: freshness.reason,
|
|
1879
|
-
evidence: evidenceForWisdom(unit)
|
|
2382
|
+
evidence: evidenceForWisdom(unit),
|
|
2383
|
+
matchReasons: matchReasons2(parts, unit),
|
|
2384
|
+
rankSignals: parts
|
|
1880
2385
|
};
|
|
1881
2386
|
}
|
|
1882
|
-
function
|
|
2387
|
+
function escapeRegExp2(value) {
|
|
1883
2388
|
return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
1884
2389
|
}
|
|
1885
2390
|
function loadCandidates(db, input) {
|
|
@@ -1968,7 +2473,7 @@ function rankWisdomUnits(db, input) {
|
|
|
1968
2473
|
}
|
|
1969
2474
|
|
|
1970
2475
|
// src/retrieval/code-ranker.ts
|
|
1971
|
-
import
|
|
2476
|
+
import path10 from "path";
|
|
1972
2477
|
function parseJsonArray3(value) {
|
|
1973
2478
|
try {
|
|
1974
2479
|
const parsed = JSON.parse(value);
|
|
@@ -1995,13 +2500,13 @@ function rowToCodeChunk(row) {
|
|
|
1995
2500
|
function filePathMatch2(filePath, queryFiles) {
|
|
1996
2501
|
if (queryFiles.length === 0) return 0;
|
|
1997
2502
|
let best = 0;
|
|
1998
|
-
const unitBase =
|
|
1999
|
-
const unitDir =
|
|
2503
|
+
const unitBase = path10.basename(filePath).toLowerCase();
|
|
2504
|
+
const unitDir = path10.dirname(filePath).toLowerCase();
|
|
2000
2505
|
const unit = filePath.toLowerCase();
|
|
2001
2506
|
for (const queryFile of queryFiles) {
|
|
2002
2507
|
const query = queryFile.toLowerCase();
|
|
2003
|
-
const queryBase =
|
|
2004
|
-
const queryDir =
|
|
2508
|
+
const queryBase = path10.basename(queryFile).toLowerCase();
|
|
2509
|
+
const queryDir = path10.dirname(queryFile).toLowerCase();
|
|
2005
2510
|
if (query === unit) best = Math.max(best, 1);
|
|
2006
2511
|
else if (queryBase === unitBase) best = Math.max(best, 0.72);
|
|
2007
2512
|
else if (queryDir === unitDir) best = Math.max(best, 0.62);
|
|
@@ -2021,7 +2526,7 @@ function symbolMatch3(chunk, querySymbols) {
|
|
|
2021
2526
|
for (const symbol of querySymbols) {
|
|
2022
2527
|
const lower = symbol.toLowerCase();
|
|
2023
2528
|
if (chunkSymbols.includes(lower)) best = Math.max(best, 1);
|
|
2024
|
-
else if (new RegExp(`\\b${
|
|
2529
|
+
else if (new RegExp(`\\b${escapeRegExp3(lower)}\\b`, "i").test(text)) best = Math.max(best, 0.7);
|
|
2025
2530
|
else if (chunkSymbols.some((candidate) => candidate.includes(lower) || lower.includes(candidate))) {
|
|
2026
2531
|
best = Math.max(best, 0.42);
|
|
2027
2532
|
}
|
|
@@ -2047,7 +2552,17 @@ function recencyScore2(chunk) {
|
|
|
2047
2552
|
if (ageDays < 730) return 0.45;
|
|
2048
2553
|
return 0.25;
|
|
2049
2554
|
}
|
|
2050
|
-
function
|
|
2555
|
+
function matchReasons3(parts) {
|
|
2556
|
+
const reasons = [];
|
|
2557
|
+
if (parts.filePathMatch >= 0.9) reasons.push("exact file path match");
|
|
2558
|
+
else if (parts.filePathMatch >= 0.45) reasons.push("related file path match");
|
|
2559
|
+
if (parts.symbolMatch >= 0.9) reasons.push("exact symbol match");
|
|
2560
|
+
else if (parts.symbolMatch >= 0.45) reasons.push("symbol mentioned in current code");
|
|
2561
|
+
if (parts.textMatch >= 0.45) reasons.push("text matched task or diff terms");
|
|
2562
|
+
if (parts.recency >= 0.75) reasons.push("recent code file");
|
|
2563
|
+
return reasons.slice(0, 5);
|
|
2564
|
+
}
|
|
2565
|
+
function escapeRegExp3(value) {
|
|
2051
2566
|
return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
2052
2567
|
}
|
|
2053
2568
|
function escapeLike(value) {
|
|
@@ -2071,7 +2586,7 @@ function loadCodeCandidates(db, input) {
|
|
|
2071
2586
|
}
|
|
2072
2587
|
}
|
|
2073
2588
|
for (const file of input.files ?? []) {
|
|
2074
|
-
const basename =
|
|
2589
|
+
const basename = path10.basename(file);
|
|
2075
2590
|
const rows = db.prepare(
|
|
2076
2591
|
`SELECT cc.*, NULL AS bm25
|
|
2077
2592
|
FROM code_chunks cc
|
|
@@ -2113,13 +2628,206 @@ function rankCodeChunks(db, input) {
|
|
|
2113
2628
|
...chunk,
|
|
2114
2629
|
symbols: uniqueStrings(chunk.symbols),
|
|
2115
2630
|
score: Number(score.toFixed(4)),
|
|
2116
|
-
scoreParts: parts
|
|
2631
|
+
scoreParts: parts,
|
|
2632
|
+
matchReasons: matchReasons3(parts),
|
|
2633
|
+
rankSignals: parts
|
|
2117
2634
|
};
|
|
2118
2635
|
}).sort((a, b) => b.score - a.score || b.startLine - a.startLine);
|
|
2119
2636
|
const limit = Math.min(5, clampMaxResults(input.maxResults, 5));
|
|
2120
2637
|
return ranked.slice(0, limit);
|
|
2121
2638
|
}
|
|
2122
2639
|
|
|
2640
|
+
// src/retrieval/test-ranker.ts
|
|
2641
|
+
import path11 from "path";
|
|
2642
|
+
function parseJsonArray4(value) {
|
|
2643
|
+
if (!value) return [];
|
|
2644
|
+
try {
|
|
2645
|
+
const parsed = JSON.parse(value);
|
|
2646
|
+
return Array.isArray(parsed) ? parsed.filter((item) => typeof item === "string") : [];
|
|
2647
|
+
} catch {
|
|
2648
|
+
return [];
|
|
2649
|
+
}
|
|
2650
|
+
}
|
|
2651
|
+
function baseStem(filePath) {
|
|
2652
|
+
return path11.posix.basename(filePath).replace(/\.(test|spec)\.[^.]+$/i, "").replace(/\.[^.]+$/i, "").toLowerCase();
|
|
2653
|
+
}
|
|
2654
|
+
function rowToRanked(row, input) {
|
|
2655
|
+
const symbols = parseJsonArray4(row.symbols_json);
|
|
2656
|
+
const text = row.sanitized_text ?? "";
|
|
2657
|
+
const matchedSymbols = (input.symbols ?? []).filter((symbol) => {
|
|
2658
|
+
const lower = symbol.toLowerCase();
|
|
2659
|
+
return symbols.some((candidate) => candidate.toLowerCase() === lower) || new RegExp(`\\b${escapeRegExp4(symbol)}\\b`, "i").test(text);
|
|
2660
|
+
});
|
|
2661
|
+
const exactFile = (input.files ?? []).some((file) => row.source_path === file);
|
|
2662
|
+
const basenameMatch = (input.files ?? []).some((file) => baseStem(file) === baseStem(row.path));
|
|
2663
|
+
const symbolScore = matchedSymbols.length > 0 ? 0.25 : 0;
|
|
2664
|
+
const score = (exactFile ? 0.55 : 0) + (basenameMatch ? 0.25 : 0) + (row.strength ?? 0.35) * 0.3 + symbolScore;
|
|
2665
|
+
return {
|
|
2666
|
+
repo: "",
|
|
2667
|
+
path: row.path,
|
|
2668
|
+
language: row.language ?? void 0,
|
|
2669
|
+
sizeBytes: row.size_bytes,
|
|
2670
|
+
contentHash: row.content_hash,
|
|
2671
|
+
updatedAt: row.updated_at,
|
|
2672
|
+
sourcePath: row.source_path ?? void 0,
|
|
2673
|
+
reason: row.reason ?? (basenameMatch ? "same basename" : "test file match"),
|
|
2674
|
+
strength: row.strength ?? 0.35,
|
|
2675
|
+
score: Number(score.toFixed(4)),
|
|
2676
|
+
matchedSymbols
|
|
2677
|
+
};
|
|
2678
|
+
}
|
|
2679
|
+
function escapeRegExp4(value) {
|
|
2680
|
+
return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
2681
|
+
}
|
|
2682
|
+
function rankRelevantTests(db, input) {
|
|
2683
|
+
const candidates = /* @__PURE__ */ new Map();
|
|
2684
|
+
for (const file of input.files ?? []) {
|
|
2685
|
+
const linkedRows = db.prepare(
|
|
2686
|
+
`SELECT tf.path, tf.language, tf.size_bytes, tf.content_hash, tf.updated_at,
|
|
2687
|
+
tl.source_path, tl.reason, tl.strength, cc.symbols_json, cc.sanitized_text
|
|
2688
|
+
FROM test_links tl
|
|
2689
|
+
JOIN test_files tf ON tf.repo_id = tl.repo_id AND tf.path = tl.test_path
|
|
2690
|
+
LEFT JOIN code_chunks cc ON cc.repo_id = tl.repo_id AND cc.file_path = tf.path
|
|
2691
|
+
WHERE tl.source_path = ?
|
|
2692
|
+
ORDER BY tl.strength DESC
|
|
2693
|
+
LIMIT 40`
|
|
2694
|
+
).all(file);
|
|
2695
|
+
for (const row of linkedRows) candidates.set(row.path, row);
|
|
2696
|
+
const basename = baseStem(file);
|
|
2697
|
+
const basenameRows = db.prepare(
|
|
2698
|
+
`SELECT tf.path, tf.language, tf.size_bytes, tf.content_hash, tf.updated_at,
|
|
2699
|
+
NULL AS source_path, 'same basename' AS reason, 0.7 AS strength,
|
|
2700
|
+
cc.symbols_json, cc.sanitized_text
|
|
2701
|
+
FROM test_files tf
|
|
2702
|
+
LEFT JOIN code_chunks cc ON cc.file_path = tf.path
|
|
2703
|
+
WHERE lower(tf.path) LIKE ?
|
|
2704
|
+
LIMIT 25`
|
|
2705
|
+
).all(`%${basename}%`);
|
|
2706
|
+
for (const row of basenameRows) candidates.set(row.path, row);
|
|
2707
|
+
}
|
|
2708
|
+
if (candidates.size === 0) {
|
|
2709
|
+
const rows = db.prepare(
|
|
2710
|
+
`SELECT tf.path, tf.language, tf.size_bytes, tf.content_hash, tf.updated_at,
|
|
2711
|
+
NULL AS source_path, 'recent test file' AS reason, 0.25 AS strength,
|
|
2712
|
+
cc.symbols_json, cc.sanitized_text
|
|
2713
|
+
FROM test_files tf
|
|
2714
|
+
LEFT JOIN code_chunks cc ON cc.file_path = tf.path
|
|
2715
|
+
ORDER BY tf.updated_at DESC
|
|
2716
|
+
LIMIT 20`
|
|
2717
|
+
).all();
|
|
2718
|
+
for (const row of rows) candidates.set(row.path, row);
|
|
2719
|
+
}
|
|
2720
|
+
return [...candidates.values()].map((row) => rowToRanked(row, input)).sort((a, b) => b.score - a.score || a.path.localeCompare(b.path)).slice(0, Math.min(5, clampMaxResults(input.maxResults, 5)));
|
|
2721
|
+
}
|
|
2722
|
+
|
|
2723
|
+
// src/retrieval/regression-ranker.ts
|
|
2724
|
+
import path12 from "path";
|
|
2725
|
+
function parseJsonArray5(value) {
|
|
2726
|
+
try {
|
|
2727
|
+
const parsed = JSON.parse(value);
|
|
2728
|
+
return Array.isArray(parsed) ? parsed.filter((item) => typeof item === "string") : [];
|
|
2729
|
+
} catch {
|
|
2730
|
+
return [];
|
|
2731
|
+
}
|
|
2732
|
+
}
|
|
2733
|
+
function rowToEvent(row) {
|
|
2734
|
+
return {
|
|
2735
|
+
id: row.id,
|
|
2736
|
+
repo: row.repo,
|
|
2737
|
+
prNumber: row.pr_number,
|
|
2738
|
+
prUrl: row.pr_url,
|
|
2739
|
+
summary: row.summary_sanitized,
|
|
2740
|
+
filePaths: parseJsonArray5(row.file_paths_json),
|
|
2741
|
+
symbols: parseJsonArray5(row.symbols_json),
|
|
2742
|
+
testPaths: parseJsonArray5(row.test_paths_json),
|
|
2743
|
+
authors: parseJsonArray5(row.authors_json),
|
|
2744
|
+
labels: parseJsonArray5(row.labels_json),
|
|
2745
|
+
signals: parseJsonArray5(row.signals_json),
|
|
2746
|
+
createdAt: row.created_at,
|
|
2747
|
+
mergedAt: row.merged_at ?? void 0,
|
|
2748
|
+
confidence: row.confidence
|
|
2749
|
+
};
|
|
2750
|
+
}
|
|
2751
|
+
function filePathMatch3(eventPaths, queryFiles) {
|
|
2752
|
+
let best = 0;
|
|
2753
|
+
for (const queryFile of queryFiles) {
|
|
2754
|
+
const queryBase = path12.posix.basename(queryFile).toLowerCase();
|
|
2755
|
+
const queryDir = path12.posix.dirname(queryFile).toLowerCase();
|
|
2756
|
+
for (const eventPath of eventPaths) {
|
|
2757
|
+
const eventBase = path12.posix.basename(eventPath).toLowerCase();
|
|
2758
|
+
const eventDir = path12.posix.dirname(eventPath).toLowerCase();
|
|
2759
|
+
if (queryFile.toLowerCase() === eventPath.toLowerCase()) best = Math.max(best, 1);
|
|
2760
|
+
else if (queryBase === eventBase) best = Math.max(best, 0.7);
|
|
2761
|
+
else if (queryDir === eventDir) best = Math.max(best, 0.55);
|
|
2762
|
+
}
|
|
2763
|
+
}
|
|
2764
|
+
return best;
|
|
2765
|
+
}
|
|
2766
|
+
function symbolMatch4(event, querySymbols) {
|
|
2767
|
+
const eventSymbols = event.symbols.map((symbol) => symbol.toLowerCase());
|
|
2768
|
+
let best = 0;
|
|
2769
|
+
for (const symbol of querySymbols) {
|
|
2770
|
+
const lower = symbol.toLowerCase();
|
|
2771
|
+
if (eventSymbols.includes(lower)) best = Math.max(best, 1);
|
|
2772
|
+
else if (event.summary.toLowerCase().includes(lower)) best = Math.max(best, 0.65);
|
|
2773
|
+
}
|
|
2774
|
+
return best;
|
|
2775
|
+
}
|
|
2776
|
+
function textMatch4(event, inputText) {
|
|
2777
|
+
const tokens = tokenizeSearchText(inputText, 32);
|
|
2778
|
+
if (tokens.length === 0) return 0;
|
|
2779
|
+
const haystack = `${event.summary} ${event.filePaths.join(" ")} ${event.symbols.join(" ")} ${event.signals.join(" ")}`.toLowerCase();
|
|
2780
|
+
return tokens.filter((token) => haystack.includes(token.toLowerCase())).length / tokens.length;
|
|
2781
|
+
}
|
|
2782
|
+
function recencyScore3(event) {
|
|
2783
|
+
const timestamp = Date.parse(event.mergedAt ?? event.createdAt);
|
|
2784
|
+
if (Number.isNaN(timestamp)) return 0.25;
|
|
2785
|
+
const ageDays = Math.max(0, (Date.now() - timestamp) / (1e3 * 60 * 60 * 24));
|
|
2786
|
+
if (ageDays < 180) return 1;
|
|
2787
|
+
if (ageDays < 730) return 0.7;
|
|
2788
|
+
return 0.35;
|
|
2789
|
+
}
|
|
2790
|
+
function matchReasons4(parts, event) {
|
|
2791
|
+
const reasons = [];
|
|
2792
|
+
if ((parts.filePathMatch ?? 0) >= 0.9) reasons.push("exact file path match");
|
|
2793
|
+
else if ((parts.filePathMatch ?? 0) >= 0.45) reasons.push("related file path match");
|
|
2794
|
+
if ((parts.symbolMatch ?? 0) >= 0.9) reasons.push("exact symbol match");
|
|
2795
|
+
if ((parts.textMatch ?? 0) >= 0.35) reasons.push("text matched task or diff terms");
|
|
2796
|
+
if (event.signals.length > 0)
|
|
2797
|
+
reasons.push(`regression signals: ${event.signals.slice(0, 3).join(", ")}`);
|
|
2798
|
+
return reasons.slice(0, 5);
|
|
2799
|
+
}
|
|
2800
|
+
function loadRegressionEvents(db) {
|
|
2801
|
+
const rows = db.prepare(
|
|
2802
|
+
"SELECT * FROM regression_events ORDER BY COALESCE(merged_at, created_at) DESC LIMIT 200"
|
|
2803
|
+
).all();
|
|
2804
|
+
return rows.map(rowToEvent);
|
|
2805
|
+
}
|
|
2806
|
+
function rankRegressionEvents(db, input) {
|
|
2807
|
+
const queryFiles = input.files ?? [];
|
|
2808
|
+
const querySymbols = "symbols" in input ? input.symbols ?? [] : [];
|
|
2809
|
+
const inputText = "task" in input ? `${input.task} ${input.diff ?? ""} ${input.currentCode ?? ""}` : input.query;
|
|
2810
|
+
const ranked = loadRegressionEvents(db).map((event) => {
|
|
2811
|
+
const parts = {
|
|
2812
|
+
filePathMatch: filePathMatch3(event.filePaths, queryFiles),
|
|
2813
|
+
symbolMatch: symbolMatch4(event, querySymbols),
|
|
2814
|
+
textMatch: textMatch4(event, inputText),
|
|
2815
|
+
recency: recencyScore3(event),
|
|
2816
|
+
confidence: event.confidence
|
|
2817
|
+
};
|
|
2818
|
+
const score = 0.35 * parts.filePathMatch + 0.2 * parts.symbolMatch + 0.2 * parts.textMatch + 0.15 * parts.recency + 0.1 * parts.confidence;
|
|
2819
|
+
return {
|
|
2820
|
+
...event,
|
|
2821
|
+
filePaths: uniqueStrings(event.filePaths),
|
|
2822
|
+
symbols: uniqueStrings(event.symbols),
|
|
2823
|
+
score: Number(score.toFixed(4)),
|
|
2824
|
+
matchReasons: matchReasons4(parts, event),
|
|
2825
|
+
rankSignals: parts
|
|
2826
|
+
};
|
|
2827
|
+
}).filter((event) => event.score > 0 || "regressionsOnly" in input && input.regressionsOnly).sort((a, b) => b.score - a.score || b.confidence - a.confidence);
|
|
2828
|
+
return ranked.slice(0, Math.min(5, clampMaxResults(input.maxResults, 5)));
|
|
2829
|
+
}
|
|
2830
|
+
|
|
2123
2831
|
// src/retrieval/formatter.ts
|
|
2124
2832
|
function evidenceLine(unit) {
|
|
2125
2833
|
const author = unit.authors[0] ? ` by @${unit.authors[0]}` : "";
|
|
@@ -2166,7 +2874,7 @@ function riskLines(units) {
|
|
|
2166
2874
|
}
|
|
2167
2875
|
return [...risks].slice(0, 4);
|
|
2168
2876
|
}
|
|
2169
|
-
function formatAnchorContext(units, input, codeChunks = [], teamRules = [], warnings = []) {
|
|
2877
|
+
function formatAnchorContext(units, input, codeChunks = [], teamRules = [], warnings = [], relevantTests = [], regressionEvents = [], extraMetadata = {}) {
|
|
2170
2878
|
const lines = ["# Anchor Context", ""];
|
|
2171
2879
|
if (warnings.length > 0) {
|
|
2172
2880
|
lines.push("## Warnings", "");
|
|
@@ -2216,6 +2924,33 @@ function formatAnchorContext(units, input, codeChunks = [], teamRules = [], warn
|
|
|
2216
2924
|
lines.push("");
|
|
2217
2925
|
});
|
|
2218
2926
|
}
|
|
2927
|
+
lines.push("## Relevant tests", "");
|
|
2928
|
+
if (relevantTests.length === 0) {
|
|
2929
|
+
lines.push("No directly related tests found in the local index.", "");
|
|
2930
|
+
} else {
|
|
2931
|
+
relevantTests.forEach((test, index) => {
|
|
2932
|
+
const symbolText = test.matchedSymbols.length ? `; symbols: ${test.matchedSymbols.slice(0, 6).join(", ")}` : "";
|
|
2933
|
+
lines.push(`${index + 1}. ${test.path}${symbolText}`);
|
|
2934
|
+
lines.push(` Why it matters: ${test.reason} (${test.strength.toFixed(2)} link strength).`);
|
|
2935
|
+
if (test.sourcePath) lines.push(` Source: ${test.sourcePath}`);
|
|
2936
|
+
lines.push("");
|
|
2937
|
+
});
|
|
2938
|
+
}
|
|
2939
|
+
lines.push("## Regression memory", "");
|
|
2940
|
+
if (regressionEvents.length === 0) {
|
|
2941
|
+
lines.push("No related regression events found in the local index.", "");
|
|
2942
|
+
} else {
|
|
2943
|
+
regressionEvents.forEach((event, index) => {
|
|
2944
|
+
lines.push(`${index + 1}. ${clipSentence(event.summary, 220)}`);
|
|
2945
|
+
lines.push(` Evidence: PR #${event.prNumber}, signals: ${event.signals.join(", ")}`);
|
|
2946
|
+
lines.push(` Files: ${event.filePaths.slice(0, 5).join(", ") || "n/a"}`);
|
|
2947
|
+
if (event.testPaths.length > 0) {
|
|
2948
|
+
lines.push(` Tests: ${event.testPaths.slice(0, 5).join(", ")}`);
|
|
2949
|
+
}
|
|
2950
|
+
lines.push(` Link: ${event.prUrl}`);
|
|
2951
|
+
lines.push("");
|
|
2952
|
+
});
|
|
2953
|
+
}
|
|
2219
2954
|
lines.push("## Risks", "");
|
|
2220
2955
|
const risks = riskLines(units);
|
|
2221
2956
|
if (risks.length === 0) {
|
|
@@ -2248,7 +2983,9 @@ function formatAnchorContext(units, input, codeChunks = [], teamRules = [], warn
|
|
|
2248
2983
|
sourceType: unit.sourceType,
|
|
2249
2984
|
filePaths: unit.filePaths,
|
|
2250
2985
|
symbols: unit.symbols,
|
|
2251
|
-
duplicateCount: unit.duplicateCount
|
|
2986
|
+
duplicateCount: unit.duplicateCount,
|
|
2987
|
+
matchReasons: unit.matchReasons,
|
|
2988
|
+
rankSignals: unit.rankSignals
|
|
2252
2989
|
})),
|
|
2253
2990
|
teamRules: teamRules.map((rule) => ({
|
|
2254
2991
|
id: rule.id,
|
|
@@ -2260,7 +2997,9 @@ function formatAnchorContext(units, input, codeChunks = [], teamRules = [], warn
|
|
|
2260
2997
|
category: rule.category,
|
|
2261
2998
|
filePaths: rule.filePaths,
|
|
2262
2999
|
symbols: rule.symbols,
|
|
2263
|
-
evidence: rule.evidence
|
|
3000
|
+
evidence: rule.evidence,
|
|
3001
|
+
matchReasons: rule.matchReasons,
|
|
3002
|
+
rankSignals: rule.rankSignals
|
|
2264
3003
|
})),
|
|
2265
3004
|
codeEvidence: codeChunks.map((chunk) => ({
|
|
2266
3005
|
id: chunk.id,
|
|
@@ -2269,8 +3008,32 @@ function formatAnchorContext(units, input, codeChunks = [], teamRules = [], warn
|
|
|
2269
3008
|
language: chunk.language,
|
|
2270
3009
|
startLine: chunk.startLine,
|
|
2271
3010
|
endLine: chunk.endLine,
|
|
2272
|
-
symbols: chunk.symbols
|
|
2273
|
-
|
|
3011
|
+
symbols: chunk.symbols,
|
|
3012
|
+
matchReasons: chunk.matchReasons,
|
|
3013
|
+
rankSignals: chunk.rankSignals
|
|
3014
|
+
})),
|
|
3015
|
+
relevantTests: relevantTests.map((test) => ({
|
|
3016
|
+
path: test.path,
|
|
3017
|
+
sourcePath: test.sourcePath,
|
|
3018
|
+
reason: test.reason,
|
|
3019
|
+
strength: test.strength,
|
|
3020
|
+
score: test.score,
|
|
3021
|
+
matchedSymbols: test.matchedSymbols
|
|
3022
|
+
})),
|
|
3023
|
+
regressionEvents: regressionEvents.map((event) => ({
|
|
3024
|
+
id: event.id,
|
|
3025
|
+
score: event.score,
|
|
3026
|
+
prNumber: event.prNumber,
|
|
3027
|
+
prUrl: event.prUrl,
|
|
3028
|
+
filePaths: event.filePaths,
|
|
3029
|
+
symbols: event.symbols,
|
|
3030
|
+
testPaths: event.testPaths,
|
|
3031
|
+
summary: clipSentence(event.summary, 260),
|
|
3032
|
+
matchReasons: event.matchReasons,
|
|
3033
|
+
rankSignals: event.rankSignals
|
|
3034
|
+
})),
|
|
3035
|
+
queryTerms: buildQueryTerms(input),
|
|
3036
|
+
...extraMetadata
|
|
2274
3037
|
}
|
|
2275
3038
|
};
|
|
2276
3039
|
}
|
|
@@ -2303,7 +3066,9 @@ function formatSearchHistory(units) {
|
|
|
2303
3066
|
sourceType: unit.sourceType,
|
|
2304
3067
|
sanitizedSnippet: clipSentence(unit.sanitizedText, 260),
|
|
2305
3068
|
matchedFiles: unit.filePaths,
|
|
2306
|
-
matchedSymbols: unit.symbols
|
|
3069
|
+
matchedSymbols: unit.symbols,
|
|
3070
|
+
matchReasons: unit.matchReasons,
|
|
3071
|
+
rankSignals: unit.rankSignals
|
|
2307
3072
|
}))
|
|
2308
3073
|
}
|
|
2309
3074
|
};
|
|
@@ -2320,6 +3085,9 @@ function formatIndexStatus(status) {
|
|
|
2320
3085
|
`- Wisdom units: ${status.wisdomUnitCount}`,
|
|
2321
3086
|
`- Code files: ${status.codeFileCount}`,
|
|
2322
3087
|
`- Code chunks: ${status.codeChunkCount}`,
|
|
3088
|
+
`- Test files: ${status.testFileCount}`,
|
|
3089
|
+
`- Test links: ${status.testLinkCount}`,
|
|
3090
|
+
`- Regression events: ${status.regressionEventCount}`,
|
|
2323
3091
|
`- History coverage: ${status.historyCoverage ?? "unknown"}`,
|
|
2324
3092
|
`- History limit: ${status.historyLimit ?? "n/a"}`,
|
|
2325
3093
|
`- Stale evidence: ${status.staleEvidenceCount}`,
|
|
@@ -2327,12 +3095,192 @@ function formatIndexStatus(status) {
|
|
|
2327
3095
|
`- Last sync: ${status.lastSyncTime ?? "never"}`,
|
|
2328
3096
|
`- Last code index: ${status.lastCodeIndexTime ?? "never"}`,
|
|
2329
3097
|
`- Last rule index: ${status.lastRuleIndexTime ?? "never"}`,
|
|
3098
|
+
`- Last successful index run: ${status.lastSuccessfulRun ?? "never"}`,
|
|
3099
|
+
`- Last failed index run: ${status.lastFailedRun ?? "never"}`,
|
|
3100
|
+
`- Stale code index: ${status.staleCodeIndex ? "yes" : "no"}`,
|
|
3101
|
+
`- Suggested next command: ${status.suggestedNextCommand ?? "n/a"}`,
|
|
2330
3102
|
`- GitHub token configured: ${status.githubTokenConfigured ? "yes" : "no"}`,
|
|
2331
3103
|
`- Health: ${status.health}`
|
|
2332
3104
|
];
|
|
2333
3105
|
return { markdown: lines.join("\n"), metadata: status };
|
|
2334
3106
|
}
|
|
2335
3107
|
|
|
3108
|
+
// src/retrieval/semantic.ts
|
|
3109
|
+
function getSemanticStatus(env = process.env, provider) {
|
|
3110
|
+
if (env.ANCHOR_SEMANTIC !== "local") {
|
|
3111
|
+
return {
|
|
3112
|
+
enabled: false,
|
|
3113
|
+
mode: "disabled",
|
|
3114
|
+
available: false,
|
|
3115
|
+
reason: "Semantic search is disabled; SQLite FTS is active."
|
|
3116
|
+
};
|
|
3117
|
+
}
|
|
3118
|
+
if (!provider || !provider.isAvailable()) {
|
|
3119
|
+
return {
|
|
3120
|
+
enabled: true,
|
|
3121
|
+
mode: "local",
|
|
3122
|
+
available: false,
|
|
3123
|
+
reason: "Local semantic search requested, but no local embedding provider is available; falling back to SQLite FTS."
|
|
3124
|
+
};
|
|
3125
|
+
}
|
|
3126
|
+
return {
|
|
3127
|
+
enabled: true,
|
|
3128
|
+
mode: "local",
|
|
3129
|
+
available: true,
|
|
3130
|
+
reason: `Using local embedding provider: ${provider.name}.`
|
|
3131
|
+
};
|
|
3132
|
+
}
|
|
3133
|
+
|
|
3134
|
+
// src/retrieval/context.ts
|
|
3135
|
+
function buildAnchorContextResult(db, cwd, input, warnings = []) {
|
|
3136
|
+
const history = rankWisdomUnits(db, input);
|
|
3137
|
+
const code = rankCodeChunks(db, input);
|
|
3138
|
+
const rules = rankTeamRules(db, cwd, input);
|
|
3139
|
+
const tests = rankRelevantTests(db, input);
|
|
3140
|
+
const regressions = rankRegressionEvents(db, input);
|
|
3141
|
+
const indexStatus = getIndexStatus(cwd);
|
|
3142
|
+
const semanticStatus = getSemanticStatus();
|
|
3143
|
+
const strictWarnings = input.strict && indexStatus.historyCoverage !== "all" ? [
|
|
3144
|
+
`Strict mode is using ${indexStatus.historyCoverage ?? "unknown"} PR history coverage; run anchor index-all for broader evidence.`
|
|
3145
|
+
] : [];
|
|
3146
|
+
return formatAnchorContext(
|
|
3147
|
+
history,
|
|
3148
|
+
input,
|
|
3149
|
+
code,
|
|
3150
|
+
rules,
|
|
3151
|
+
[...warnings, ...strictWarnings],
|
|
3152
|
+
tests,
|
|
3153
|
+
regressions,
|
|
3154
|
+
{
|
|
3155
|
+
indexHealth: {
|
|
3156
|
+
historyCoverage: indexStatus.historyCoverage ?? "unknown",
|
|
3157
|
+
staleCodeIndex: Boolean(indexStatus.staleCodeIndex),
|
|
3158
|
+
lastSuccessfulRun: indexStatus.lastSuccessfulRun,
|
|
3159
|
+
lastFailedRun: indexStatus.lastFailedRun
|
|
3160
|
+
},
|
|
3161
|
+
semanticStatus
|
|
3162
|
+
}
|
|
3163
|
+
);
|
|
3164
|
+
}
|
|
3165
|
+
|
|
3166
|
+
// src/retrieval/explain-file.ts
|
|
3167
|
+
function explainFile(db, cwd, input) {
|
|
3168
|
+
const contextInput = {
|
|
3169
|
+
task: `Explain ${input.file}: ownership, constraints, regressions, tests, and important symbols.`,
|
|
3170
|
+
files: [input.file],
|
|
3171
|
+
symbols: input.symbols,
|
|
3172
|
+
strict: input.strict,
|
|
3173
|
+
maxResults: input.maxResults
|
|
3174
|
+
};
|
|
3175
|
+
const code = rankCodeChunks(db, contextInput);
|
|
3176
|
+
const importantSymbols = [...new Set(code.flatMap((chunk) => chunk.symbols))].slice(0, 10);
|
|
3177
|
+
const ownership = code[0]?.sanitizedText ? clipSentence(code[0].sanitizedText, 220) : "No indexed code chunk found for this file.";
|
|
3178
|
+
const context = buildAnchorContextResult(db, cwd, contextInput);
|
|
3179
|
+
const markdown = [
|
|
3180
|
+
"# Anchor File Explain",
|
|
3181
|
+
"",
|
|
3182
|
+
`File: ${input.file}`,
|
|
3183
|
+
`Appears to own: ${ownership}`,
|
|
3184
|
+
`Important symbols: ${importantSymbols.join(", ") || "n/a"}`,
|
|
3185
|
+
"",
|
|
3186
|
+
context.markdown.replace(/^# Anchor Context\n\n/, "")
|
|
3187
|
+
].join("\n");
|
|
3188
|
+
return {
|
|
3189
|
+
markdown,
|
|
3190
|
+
metadata: {
|
|
3191
|
+
...context.metadata,
|
|
3192
|
+
mode: "explain_file",
|
|
3193
|
+
file: input.file,
|
|
3194
|
+
importantSymbols
|
|
3195
|
+
}
|
|
3196
|
+
};
|
|
3197
|
+
}
|
|
3198
|
+
|
|
3199
|
+
// src/retrieval/review-diff.ts
|
|
3200
|
+
function filesFromDiff(diff) {
|
|
3201
|
+
const files = [];
|
|
3202
|
+
for (const line of diff.split("\n")) {
|
|
3203
|
+
const match = line.match(/^diff --git a\/(.+?) b\/(.+)$/);
|
|
3204
|
+
if (match?.[2] && match[2] !== "/dev/null") files.push(match[2]);
|
|
3205
|
+
const plus = line.match(/^\+\+\+ b\/(.+)$/);
|
|
3206
|
+
if (plus?.[1] && plus[1] !== "/dev/null") files.push(plus[1]);
|
|
3207
|
+
}
|
|
3208
|
+
return uniqueStrings(files);
|
|
3209
|
+
}
|
|
3210
|
+
function asArray(value) {
|
|
3211
|
+
return Array.isArray(value) ? value : [];
|
|
3212
|
+
}
|
|
3213
|
+
function reviewDiff(db, cwd, input) {
|
|
3214
|
+
const files = input.files?.length ? input.files : filesFromDiff(input.diff);
|
|
3215
|
+
const contextInput = {
|
|
3216
|
+
task: "Review this diff against Anchor history, team rules, regressions, and tests.",
|
|
3217
|
+
files,
|
|
3218
|
+
diff: input.diff,
|
|
3219
|
+
strict: input.strict,
|
|
3220
|
+
maxResults: input.maxResults
|
|
3221
|
+
};
|
|
3222
|
+
const context = buildAnchorContextResult(db, cwd, contextInput);
|
|
3223
|
+
const items = asArray(context.metadata.items);
|
|
3224
|
+
const regressions = asArray(context.metadata.regressionEvents);
|
|
3225
|
+
const tests = asArray(context.metadata.relevantTests);
|
|
3226
|
+
const ruleItems = asArray(context.metadata.teamRules);
|
|
3227
|
+
const blockerRules = ruleItems.filter(
|
|
3228
|
+
(item) => item.freshnessStatus !== "stale" && item.confidenceLevel !== "weak"
|
|
3229
|
+
);
|
|
3230
|
+
const historicalConstraints = items.filter(
|
|
3231
|
+
(item) => ["constraint", "api_contract", "security_note", "architecture_decision"].includes(
|
|
3232
|
+
item.category ?? ""
|
|
3233
|
+
)
|
|
3234
|
+
);
|
|
3235
|
+
const lines = ["# Anchor Diff Review", "", `Changed files: ${files.join(", ") || "n/a"}`, ""];
|
|
3236
|
+
lines.push("## Blockers", "");
|
|
3237
|
+
if (blockerRules.length === 0) lines.push("- No evidence-backed blockers found.");
|
|
3238
|
+
else {
|
|
3239
|
+
for (const rule of blockerRules.slice(0, 4)) {
|
|
3240
|
+
lines.push(`- Team rule evidence may block this change: ${rule.category ?? "rule"}.`);
|
|
3241
|
+
}
|
|
3242
|
+
}
|
|
3243
|
+
lines.push("", "## Risks", "");
|
|
3244
|
+
const riskItems = items.filter(
|
|
3245
|
+
(item) => ["security_note", "bug_regression", "api_contract"].includes(item.category ?? "")
|
|
3246
|
+
);
|
|
3247
|
+
if (riskItems.length === 0) lines.push("- No specific historical risks found.");
|
|
3248
|
+
else {
|
|
3249
|
+
for (const item of riskItems.slice(0, 5)) {
|
|
3250
|
+
lines.push(`- [${item.category}] PR #${item.prNumber}: preserve cited behavior.`);
|
|
3251
|
+
}
|
|
3252
|
+
}
|
|
3253
|
+
lines.push("", "## Historical constraints", "");
|
|
3254
|
+
if (historicalConstraints.length === 0) lines.push("- No matching constraints found.");
|
|
3255
|
+
else {
|
|
3256
|
+
for (const item of historicalConstraints.slice(0, 5)) {
|
|
3257
|
+
lines.push(`- PR #${item.prNumber}: ${item.category} (${item.confidenceLevel}).`);
|
|
3258
|
+
}
|
|
3259
|
+
}
|
|
3260
|
+
lines.push("", "## Regression checks", "");
|
|
3261
|
+
if (regressions.length === 0) lines.push("- No related regression memory found.");
|
|
3262
|
+
else {
|
|
3263
|
+
for (const event of regressions.slice(0, 5)) {
|
|
3264
|
+
lines.push(`- PR #${event.prNumber}: ${clipSentence(event.summary ?? "", 180)}`);
|
|
3265
|
+
}
|
|
3266
|
+
}
|
|
3267
|
+
lines.push("", "## Recommended tests", "");
|
|
3268
|
+
if (tests.length === 0) lines.push("- No related tests found in the local index.");
|
|
3269
|
+
else {
|
|
3270
|
+
for (const test of tests.slice(0, 6)) {
|
|
3271
|
+
lines.push(`- ${test.path ?? "unknown test"} (${test.reason ?? "related"})`);
|
|
3272
|
+
}
|
|
3273
|
+
}
|
|
3274
|
+
return {
|
|
3275
|
+
markdown: lines.join("\n"),
|
|
3276
|
+
metadata: {
|
|
3277
|
+
...context.metadata,
|
|
3278
|
+
mode: "review_diff",
|
|
3279
|
+
changedFiles: files
|
|
3280
|
+
}
|
|
3281
|
+
};
|
|
3282
|
+
}
|
|
3283
|
+
|
|
2336
3284
|
// src/github/client.ts
|
|
2337
3285
|
import { Octokit } from "@octokit/rest";
|
|
2338
3286
|
function createGitHubClient(token) {
|
|
@@ -2523,7 +3471,7 @@ async function fetchMergedPullRequests(options) {
|
|
|
2523
3471
|
|
|
2524
3472
|
// src/doctor.ts
|
|
2525
3473
|
import fs5 from "fs";
|
|
2526
|
-
import
|
|
3474
|
+
import path13 from "path";
|
|
2527
3475
|
function check(name, ok, message, fix) {
|
|
2528
3476
|
return { name, ok, message, fix: ok ? void 0 : fix };
|
|
2529
3477
|
}
|
|
@@ -2584,7 +3532,7 @@ async function runDoctor(options) {
|
|
|
2584
3532
|
)
|
|
2585
3533
|
);
|
|
2586
3534
|
}
|
|
2587
|
-
const cursorConfigPath =
|
|
3535
|
+
const cursorConfigPath = path13.join(gitRoot ?? cwd, ".cursor", "mcp.json");
|
|
2588
3536
|
let cursorConfig;
|
|
2589
3537
|
let cursorConfigValid = false;
|
|
2590
3538
|
if (fs5.existsSync(cursorConfigPath)) {
|
|
@@ -2659,7 +3607,7 @@ async function runDoctor(options) {
|
|
|
2659
3607
|
"Run pnpm build, then try anchor serve from the repository."
|
|
2660
3608
|
)
|
|
2661
3609
|
);
|
|
2662
|
-
const rulePath =
|
|
3610
|
+
const rulePath = path13.join(gitRoot ?? cwd, ".cursor", "rules", "anchor.mdc");
|
|
2663
3611
|
checks.push(
|
|
2664
3612
|
check(
|
|
2665
3613
|
"Cursor rule file exists",
|
|
@@ -2670,16 +3618,51 @@ async function runDoctor(options) {
|
|
|
2670
3618
|
);
|
|
2671
3619
|
return { ok: checks.every((item) => item.ok), checks };
|
|
2672
3620
|
}
|
|
3621
|
+
|
|
3622
|
+
// src/health.ts
|
|
3623
|
+
function evaluateIndexHealth(status, rulesOk) {
|
|
3624
|
+
const warnings = [];
|
|
3625
|
+
if (status.health === "missing_database") warnings.push("Anchor database is missing.");
|
|
3626
|
+
if (status.health === "schema_invalid") warnings.push("Anchor SQLite schema is invalid.");
|
|
3627
|
+
if (status.health === "empty_index") warnings.push("Anchor index is empty.");
|
|
3628
|
+
if (status.historyCoverage !== "all") warnings.push("PR history coverage is partial.");
|
|
3629
|
+
if (status.staleCodeIndex) warnings.push("Code index is older than 7 days or has never run.");
|
|
3630
|
+
if (!rulesOk) warnings.push("Team rules file is missing or invalid.");
|
|
3631
|
+
if (status.lastFailedRun) warnings.push(`Last failed index run: ${status.lastFailedRun}.`);
|
|
3632
|
+
const hasError = status.health === "missing_database" || status.health === "schema_invalid";
|
|
3633
|
+
const healthStatus = hasError ? "error" : warnings.length > 0 ? "warning" : "ok";
|
|
3634
|
+
return {
|
|
3635
|
+
status: healthStatus,
|
|
3636
|
+
warnings,
|
|
3637
|
+
suggestedNextCommand: status.suggestedNextCommand,
|
|
3638
|
+
historyCoverage: status.historyCoverage ?? "unknown",
|
|
3639
|
+
staleCodeIndex: Boolean(status.staleCodeIndex),
|
|
3640
|
+
lastSuccessfulRun: status.lastSuccessfulRun,
|
|
3641
|
+
lastFailedRun: status.lastFailedRun
|
|
3642
|
+
};
|
|
3643
|
+
}
|
|
3644
|
+
function getAnchorIndexHealth(cwd) {
|
|
3645
|
+
const indexStatus = getIndexStatus(cwd);
|
|
3646
|
+
const rulesValidation = validateTeamRulesFile(cwd);
|
|
3647
|
+
return {
|
|
3648
|
+
...evaluateIndexHealth(indexStatus, rulesValidation.ok),
|
|
3649
|
+
indexStatus
|
|
3650
|
+
};
|
|
3651
|
+
}
|
|
2673
3652
|
export {
|
|
2674
3653
|
ANCHOR_CURSOR_RULE,
|
|
2675
3654
|
DEFAULT_MAX_CODE_FILE_BYTES,
|
|
2676
3655
|
SCHEMA_SQL,
|
|
2677
3656
|
TEAM_RULES_FILE,
|
|
3657
|
+
addTeamRule,
|
|
2678
3658
|
anchorMcpEntry,
|
|
3659
|
+
buildAnchorContextResult,
|
|
2679
3660
|
buildFtsQuery,
|
|
3661
|
+
buildQueryTerms,
|
|
2680
3662
|
canonicalizeText,
|
|
2681
3663
|
categorizeWisdom,
|
|
2682
3664
|
checkSchema,
|
|
3665
|
+
checkTeamRuleEvidence,
|
|
2683
3666
|
chunkCodeFile,
|
|
2684
3667
|
chunkHistoricalText,
|
|
2685
3668
|
claimKeyFor,
|
|
@@ -2702,23 +3685,31 @@ export {
|
|
|
2702
3685
|
ensureRepository,
|
|
2703
3686
|
ensureTeamRulesFile,
|
|
2704
3687
|
evaluateFreshness,
|
|
3688
|
+
evaluateIndexHealth,
|
|
2705
3689
|
evidenceForWisdom,
|
|
3690
|
+
explainFile,
|
|
2706
3691
|
extractCodeSymbols,
|
|
3692
|
+
extractRegressionEvents,
|
|
2707
3693
|
extractSymbols,
|
|
2708
3694
|
extractWisdomUnits,
|
|
2709
3695
|
fetchMergedPullRequests,
|
|
2710
3696
|
fetchPullRequestDetails,
|
|
3697
|
+
filesFromDiff,
|
|
2711
3698
|
formatAnchorContext,
|
|
2712
3699
|
formatIndexStatus,
|
|
2713
3700
|
formatSearchHistory,
|
|
3701
|
+
getAnchorIndexHealth,
|
|
2714
3702
|
getIndexStatus,
|
|
2715
3703
|
getLastSyncTime,
|
|
3704
|
+
getSemanticStatus,
|
|
2716
3705
|
githubAuthFixMessage,
|
|
2717
3706
|
hasHighSignalLanguage,
|
|
2718
3707
|
indexCodebase,
|
|
2719
3708
|
indexPullRequests,
|
|
3709
|
+
inferTestAwareness,
|
|
2720
3710
|
initializeSchema,
|
|
2721
3711
|
isHardExcludedCodePath,
|
|
3712
|
+
isTestFilePath,
|
|
2722
3713
|
loadCurrentCodeSnapshot,
|
|
2723
3714
|
loadTeamRulesFile,
|
|
2724
3715
|
mergeAnchorMcpConfig,
|
|
@@ -2726,14 +3717,18 @@ export {
|
|
|
2726
3717
|
openAnchorDatabase,
|
|
2727
3718
|
parseGitHubRemote,
|
|
2728
3719
|
rankCodeChunks,
|
|
3720
|
+
rankRegressionEvents,
|
|
3721
|
+
rankRelevantTests,
|
|
2729
3722
|
rankTeamRules,
|
|
2730
3723
|
rankWisdomUnits,
|
|
3724
|
+
recordIndexRun,
|
|
2731
3725
|
redactSecrets,
|
|
2732
3726
|
redactedHistoricalText,
|
|
2733
3727
|
replaceCodeIndex,
|
|
2734
3728
|
resolveGitHubToken,
|
|
2735
3729
|
resolvePullRequestDetailConcurrency,
|
|
2736
3730
|
resolvePullRequestFetchLimit,
|
|
3731
|
+
reviewDiff,
|
|
2737
3732
|
runDoctor,
|
|
2738
3733
|
sanitizeHistoricalText,
|
|
2739
3734
|
shouldSyncSince,
|