@pratik7368patil/anchor-core 0.1.7 → 0.1.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.ts +259 -4
- package/dist/index.js +1643 -98
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
- package/src/db/schema.sql +65 -0
package/dist/index.js
CHANGED
|
@@ -252,8 +252,8 @@ function redactedHistoricalText(text) {
|
|
|
252
252
|
}
|
|
253
253
|
|
|
254
254
|
// src/db/database.ts
|
|
255
|
-
import
|
|
256
|
-
import
|
|
255
|
+
import fs3 from "fs";
|
|
256
|
+
import path4 from "path";
|
|
257
257
|
import Database from "better-sqlite3";
|
|
258
258
|
|
|
259
259
|
// src/db/migrations.ts
|
|
@@ -376,10 +376,70 @@ CREATE TABLE IF NOT EXISTS code_index_state (
|
|
|
376
376
|
skipped_files INTEGER NOT NULL
|
|
377
377
|
);
|
|
378
378
|
|
|
379
|
+
CREATE TABLE IF NOT EXISTS test_files (
|
|
380
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
381
|
+
repo_id INTEGER NOT NULL REFERENCES repositories(id) ON DELETE CASCADE,
|
|
382
|
+
path TEXT NOT NULL,
|
|
383
|
+
language TEXT,
|
|
384
|
+
size_bytes INTEGER NOT NULL,
|
|
385
|
+
content_hash TEXT NOT NULL,
|
|
386
|
+
updated_at TEXT NOT NULL,
|
|
387
|
+
UNIQUE(repo_id, path)
|
|
388
|
+
);
|
|
389
|
+
|
|
390
|
+
CREATE TABLE IF NOT EXISTS test_links (
|
|
391
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
392
|
+
repo_id INTEGER NOT NULL REFERENCES repositories(id) ON DELETE CASCADE,
|
|
393
|
+
source_path TEXT NOT NULL,
|
|
394
|
+
test_path TEXT NOT NULL,
|
|
395
|
+
reason TEXT NOT NULL,
|
|
396
|
+
strength REAL NOT NULL,
|
|
397
|
+
UNIQUE(repo_id, source_path, test_path, reason)
|
|
398
|
+
);
|
|
399
|
+
|
|
400
|
+
CREATE TABLE IF NOT EXISTS regression_events (
|
|
401
|
+
id TEXT PRIMARY KEY,
|
|
402
|
+
repo_id INTEGER NOT NULL REFERENCES repositories(id) ON DELETE CASCADE,
|
|
403
|
+
pr_id INTEGER REFERENCES pull_requests(id) ON DELETE CASCADE,
|
|
404
|
+
repo TEXT NOT NULL,
|
|
405
|
+
pr_number INTEGER NOT NULL,
|
|
406
|
+
pr_url TEXT NOT NULL,
|
|
407
|
+
summary_sanitized TEXT NOT NULL,
|
|
408
|
+
file_paths_json TEXT NOT NULL,
|
|
409
|
+
symbols_json TEXT NOT NULL,
|
|
410
|
+
test_paths_json TEXT NOT NULL,
|
|
411
|
+
authors_json TEXT NOT NULL,
|
|
412
|
+
labels_json TEXT NOT NULL,
|
|
413
|
+
signals_json TEXT NOT NULL,
|
|
414
|
+
created_at TEXT NOT NULL,
|
|
415
|
+
merged_at TEXT,
|
|
416
|
+
confidence REAL NOT NULL
|
|
417
|
+
);
|
|
418
|
+
|
|
419
|
+
CREATE TABLE IF NOT EXISTS index_runs (
|
|
420
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
421
|
+
command TEXT NOT NULL,
|
|
422
|
+
repo TEXT,
|
|
423
|
+
started_at TEXT NOT NULL,
|
|
424
|
+
finished_at TEXT,
|
|
425
|
+
history_coverage TEXT,
|
|
426
|
+
history_limit INTEGER,
|
|
427
|
+
prs_fetched INTEGER,
|
|
428
|
+
prs_skipped INTEGER,
|
|
429
|
+
comments_indexed INTEGER,
|
|
430
|
+
code_files_indexed INTEGER,
|
|
431
|
+
test_files_indexed INTEGER,
|
|
432
|
+
failures_json TEXT NOT NULL DEFAULT '[]',
|
|
433
|
+
status TEXT NOT NULL
|
|
434
|
+
);
|
|
435
|
+
|
|
379
436
|
CREATE TABLE IF NOT EXISTS sync_state (
|
|
380
437
|
repo TEXT PRIMARY KEY,
|
|
381
438
|
last_sync_at TEXT,
|
|
382
439
|
last_indexed_pr INTEGER,
|
|
440
|
+
history_coverage TEXT,
|
|
441
|
+
history_limit INTEGER,
|
|
442
|
+
history_since TEXT,
|
|
383
443
|
updated_at TEXT NOT NULL
|
|
384
444
|
);
|
|
385
445
|
|
|
@@ -390,14 +450,566 @@ CREATE INDEX IF NOT EXISTS idx_wisdom_units_category ON wisdom_units(category);
|
|
|
390
450
|
CREATE INDEX IF NOT EXISTS idx_wisdom_units_pr ON wisdom_units(pr_id);
|
|
391
451
|
CREATE INDEX IF NOT EXISTS idx_code_files_path ON code_files(path);
|
|
392
452
|
CREATE INDEX IF NOT EXISTS idx_code_chunks_file_path ON code_chunks(file_path);
|
|
453
|
+
CREATE INDEX IF NOT EXISTS idx_test_files_path ON test_files(path);
|
|
454
|
+
CREATE INDEX IF NOT EXISTS idx_test_links_source ON test_links(source_path);
|
|
455
|
+
CREATE INDEX IF NOT EXISTS idx_test_links_test ON test_links(test_path);
|
|
456
|
+
CREATE INDEX IF NOT EXISTS idx_regression_events_pr ON regression_events(pr_id);
|
|
457
|
+
CREATE INDEX IF NOT EXISTS idx_index_runs_started ON index_runs(started_at);
|
|
393
458
|
`;
|
|
394
459
|
|
|
460
|
+
// src/rules/team-rules.ts
|
|
461
|
+
import fs2 from "fs";
|
|
462
|
+
import path2 from "path";
|
|
463
|
+
import { z } from "zod";
|
|
464
|
+
|
|
465
|
+
// src/retrieval/evidence.ts
|
|
466
|
+
function claimKeyFor(category, sanitizedText) {
|
|
467
|
+
return `${category}:${canonicalizeText(sanitizedText).slice(0, 180)}`;
|
|
468
|
+
}
|
|
469
|
+
function confidenceLevelFor(confidence) {
|
|
470
|
+
if (confidence >= 0.75) return "strong";
|
|
471
|
+
if (confidence >= 0.55) return "moderate";
|
|
472
|
+
return "weak";
|
|
473
|
+
}
|
|
474
|
+
function confidenceRank(level) {
|
|
475
|
+
const ranks = {
|
|
476
|
+
weak: 1,
|
|
477
|
+
moderate: 2,
|
|
478
|
+
strong: 3
|
|
479
|
+
};
|
|
480
|
+
return ranks[level];
|
|
481
|
+
}
|
|
482
|
+
function confidenceAtLeast(level, minimum) {
|
|
483
|
+
return confidenceRank(level) >= confidenceRank(minimum);
|
|
484
|
+
}
|
|
485
|
+
function evidenceForWisdom(unit) {
|
|
486
|
+
return {
|
|
487
|
+
prNumber: unit.prNumber,
|
|
488
|
+
prUrl: unit.prUrl,
|
|
489
|
+
sourceType: unit.sourceType,
|
|
490
|
+
author: unit.authors[0],
|
|
491
|
+
filePath: unit.filePaths[0]
|
|
492
|
+
};
|
|
493
|
+
}
|
|
494
|
+
function confidenceReasonsFor(unit, repeatedEvidenceCount) {
|
|
495
|
+
const reasons = [];
|
|
496
|
+
if (unit.sourceType === "review_comment" || unit.sourceType === "review_summary") {
|
|
497
|
+
reasons.push("reviewer evidence");
|
|
498
|
+
} else if (unit.sourceType === "pr_body") {
|
|
499
|
+
reasons.push("PR description evidence");
|
|
500
|
+
} else if (unit.sourceType === "commit_message") {
|
|
501
|
+
reasons.push("commit message evidence");
|
|
502
|
+
} else {
|
|
503
|
+
reasons.push(sourceTypeLabel(unit.sourceType));
|
|
504
|
+
}
|
|
505
|
+
if (unit.filePaths.length > 0) reasons.push("file-associated");
|
|
506
|
+
if (unit.symbols.length > 0) reasons.push("symbol-associated");
|
|
507
|
+
if (/\b(regression|this broke|broke|root cause)\b/i.test(unit.sanitizedText)) {
|
|
508
|
+
reasons.push("regression language");
|
|
509
|
+
}
|
|
510
|
+
if (/\b(do not|don't|must|should not|avoid|invariant|contract)\b/i.test(unit.sanitizedText)) {
|
|
511
|
+
reasons.push("constraint language");
|
|
512
|
+
}
|
|
513
|
+
if (repeatedEvidenceCount > 1) {
|
|
514
|
+
reasons.push(`repeated across ${repeatedEvidenceCount} PRs`);
|
|
515
|
+
}
|
|
516
|
+
return reasons;
|
|
517
|
+
}
|
|
518
|
+
function sourceTypeLabel(sourceType) {
|
|
519
|
+
return sourceType.replace(/_/g, " ");
|
|
520
|
+
}
|
|
521
|
+
function parseJsonArray(value) {
|
|
522
|
+
try {
|
|
523
|
+
const parsed = JSON.parse(value);
|
|
524
|
+
return Array.isArray(parsed) ? parsed.filter((item) => typeof item === "string") : [];
|
|
525
|
+
} catch {
|
|
526
|
+
return [];
|
|
527
|
+
}
|
|
528
|
+
}
|
|
529
|
+
function loadCurrentCodeSnapshot(db) {
|
|
530
|
+
const fileRows = db.prepare("SELECT path FROM code_files").all();
|
|
531
|
+
const chunkRows = db.prepare("SELECT file_path, symbols_json FROM code_chunks").all();
|
|
532
|
+
const filePaths = new Set(fileRows.map((row) => row.path));
|
|
533
|
+
const symbolsByFile = /* @__PURE__ */ new Map();
|
|
534
|
+
const allSymbols = /* @__PURE__ */ new Set();
|
|
535
|
+
for (const row of chunkRows) {
|
|
536
|
+
const symbols = parseJsonArray(row.symbols_json).map((symbol) => symbol.toLowerCase());
|
|
537
|
+
const fileSymbols = symbolsByFile.get(row.file_path) ?? /* @__PURE__ */ new Set();
|
|
538
|
+
for (const symbol of symbols) {
|
|
539
|
+
fileSymbols.add(symbol);
|
|
540
|
+
allSymbols.add(symbol);
|
|
541
|
+
}
|
|
542
|
+
symbolsByFile.set(row.file_path, fileSymbols);
|
|
543
|
+
}
|
|
544
|
+
return {
|
|
545
|
+
hasCodeIndex: fileRows.length > 0 || chunkRows.length > 0,
|
|
546
|
+
filePaths,
|
|
547
|
+
symbolsByFile,
|
|
548
|
+
allSymbols
|
|
549
|
+
};
|
|
550
|
+
}
|
|
551
|
+
function evaluateFreshness(subject, snapshot) {
|
|
552
|
+
if (!snapshot.hasCodeIndex) {
|
|
553
|
+
return {
|
|
554
|
+
status: "possibly_stale",
|
|
555
|
+
reason: "No current code index is available to verify this evidence."
|
|
556
|
+
};
|
|
557
|
+
}
|
|
558
|
+
const filePaths = subject.filePaths.filter(Boolean);
|
|
559
|
+
const symbols = subject.symbols.map((symbol) => symbol.toLowerCase()).filter(Boolean);
|
|
560
|
+
if (filePaths.length > 0) {
|
|
561
|
+
const existingFiles = filePaths.filter((filePath) => snapshot.filePaths.has(filePath));
|
|
562
|
+
if (existingFiles.length === 0) {
|
|
563
|
+
return {
|
|
564
|
+
status: "stale",
|
|
565
|
+
reason: "None of the historical file paths exist in the current code index."
|
|
566
|
+
};
|
|
567
|
+
}
|
|
568
|
+
if (symbols.length === 0) {
|
|
569
|
+
return {
|
|
570
|
+
status: "current",
|
|
571
|
+
reason: "At least one historical file path exists in the current code index."
|
|
572
|
+
};
|
|
573
|
+
}
|
|
574
|
+
for (const filePath of existingFiles) {
|
|
575
|
+
const fileSymbols = snapshot.symbolsByFile.get(filePath) ?? /* @__PURE__ */ new Set();
|
|
576
|
+
if (symbols.some((symbol) => fileSymbols.has(symbol))) {
|
|
577
|
+
return {
|
|
578
|
+
status: "current",
|
|
579
|
+
reason: "Historical file and symbol are present in the current code index."
|
|
580
|
+
};
|
|
581
|
+
}
|
|
582
|
+
}
|
|
583
|
+
if (symbols.some((symbol) => snapshot.allSymbols.has(symbol))) {
|
|
584
|
+
return {
|
|
585
|
+
status: "possibly_stale",
|
|
586
|
+
reason: "The historical file exists, but the referenced symbol appears elsewhere or moved."
|
|
587
|
+
};
|
|
588
|
+
}
|
|
589
|
+
return {
|
|
590
|
+
status: "possibly_stale",
|
|
591
|
+
reason: "The historical file exists, but referenced symbols were not found there."
|
|
592
|
+
};
|
|
593
|
+
}
|
|
594
|
+
if (symbols.length > 0 && symbols.some((symbol) => snapshot.allSymbols.has(symbol))) {
|
|
595
|
+
return {
|
|
596
|
+
status: "current",
|
|
597
|
+
reason: "Referenced symbol exists in the current code index."
|
|
598
|
+
};
|
|
599
|
+
}
|
|
600
|
+
return {
|
|
601
|
+
status: "possibly_stale",
|
|
602
|
+
reason: "Evidence has no exact current file path to verify."
|
|
603
|
+
};
|
|
604
|
+
}
|
|
605
|
+
|
|
606
|
+
// src/rules/team-rules.ts
|
|
607
|
+
var TEAM_RULES_FILE = "anchor.rules.json";
|
|
608
|
+
var SourceTypeSchema = z.enum([
|
|
609
|
+
"pr_body",
|
|
610
|
+
"review_comment",
|
|
611
|
+
"issue_comment",
|
|
612
|
+
"review_summary",
|
|
613
|
+
"commit_message",
|
|
614
|
+
"diff_context"
|
|
615
|
+
]);
|
|
616
|
+
var WisdomCategorySchema = z.enum([
|
|
617
|
+
"architecture_decision",
|
|
618
|
+
"constraint",
|
|
619
|
+
"rejected_approach",
|
|
620
|
+
"bug_regression",
|
|
621
|
+
"testing_rule",
|
|
622
|
+
"api_contract",
|
|
623
|
+
"performance_note",
|
|
624
|
+
"security_note",
|
|
625
|
+
"style_convention",
|
|
626
|
+
"unknown"
|
|
627
|
+
]);
|
|
628
|
+
var ConfidenceLevelSchema = z.enum(["strong", "moderate", "weak"]);
|
|
629
|
+
var EvidenceRefSchema = z.object({
|
|
630
|
+
prNumber: z.number().int().positive(),
|
|
631
|
+
prUrl: z.string().url(),
|
|
632
|
+
sourceType: SourceTypeSchema,
|
|
633
|
+
author: z.string().min(1).optional(),
|
|
634
|
+
filePath: z.string().min(1).optional(),
|
|
635
|
+
note: z.string().min(1).max(500).optional()
|
|
636
|
+
});
|
|
637
|
+
var TeamRuleSchema = z.object({
|
|
638
|
+
id: z.string().min(1).max(120).regex(/^[a-z0-9][a-z0-9._-]*$/i),
|
|
639
|
+
category: WisdomCategorySchema,
|
|
640
|
+
text: z.string().min(1).max(1e3),
|
|
641
|
+
filePaths: z.array(z.string().min(1)).max(50).default([]),
|
|
642
|
+
symbols: z.array(z.string().min(1)).max(100).default([]),
|
|
643
|
+
evidence: z.array(EvidenceRefSchema).min(1),
|
|
644
|
+
confidenceLevel: ConfidenceLevelSchema.default("strong")
|
|
645
|
+
});
|
|
646
|
+
var TeamRulesFileSchema = z.object({
|
|
647
|
+
version: z.literal(1),
|
|
648
|
+
rules: z.array(TeamRuleSchema).default([])
|
|
649
|
+
});
|
|
650
|
+
function rulesPath(cwd) {
|
|
651
|
+
return path2.join(detectGitRoot(cwd) ?? cwd, TEAM_RULES_FILE);
|
|
652
|
+
}
|
|
653
|
+
function defaultRulesFile() {
|
|
654
|
+
return `${JSON.stringify({ version: 1, rules: [] }, null, 2)}
|
|
655
|
+
`;
|
|
656
|
+
}
|
|
657
|
+
function ensureTeamRulesFile(cwd) {
|
|
658
|
+
const filePath = rulesPath(cwd);
|
|
659
|
+
if (fs2.existsSync(filePath)) return { path: filePath, created: false };
|
|
660
|
+
fs2.writeFileSync(filePath, defaultRulesFile());
|
|
661
|
+
return { path: filePath, created: true };
|
|
662
|
+
}
|
|
663
|
+
function sanitizeEvidence(evidence) {
|
|
664
|
+
return evidence.map((item) => ({
|
|
665
|
+
...item,
|
|
666
|
+
note: item.note ? sanitizeHistoricalText(item.note) : void 0
|
|
667
|
+
}));
|
|
668
|
+
}
|
|
669
|
+
function loadTeamRulesFile(cwd) {
|
|
670
|
+
const filePath = rulesPath(cwd);
|
|
671
|
+
if (!fs2.existsSync(filePath)) {
|
|
672
|
+
return { ok: true, exists: false, path: filePath, errors: [], rules: [] };
|
|
673
|
+
}
|
|
674
|
+
let parsedJson;
|
|
675
|
+
try {
|
|
676
|
+
parsedJson = JSON.parse(fs2.readFileSync(filePath, "utf8"));
|
|
677
|
+
} catch (error) {
|
|
678
|
+
return {
|
|
679
|
+
ok: false,
|
|
680
|
+
exists: true,
|
|
681
|
+
path: filePath,
|
|
682
|
+
errors: [`Invalid JSON: ${error instanceof Error ? error.message : String(error)}`],
|
|
683
|
+
rules: []
|
|
684
|
+
};
|
|
685
|
+
}
|
|
686
|
+
const parsed = TeamRulesFileSchema.safeParse(parsedJson);
|
|
687
|
+
if (!parsed.success) {
|
|
688
|
+
return {
|
|
689
|
+
ok: false,
|
|
690
|
+
exists: true,
|
|
691
|
+
path: filePath,
|
|
692
|
+
errors: parsed.error.issues.map((issue) => `${issue.path.join(".")}: ${issue.message}`),
|
|
693
|
+
rules: []
|
|
694
|
+
};
|
|
695
|
+
}
|
|
696
|
+
const seenIds = /* @__PURE__ */ new Set();
|
|
697
|
+
const duplicateIds = parsed.data.rules.map((rule) => rule.id).filter((id) => {
|
|
698
|
+
if (seenIds.has(id)) return true;
|
|
699
|
+
seenIds.add(id);
|
|
700
|
+
return false;
|
|
701
|
+
});
|
|
702
|
+
if (duplicateIds.length > 0) {
|
|
703
|
+
return {
|
|
704
|
+
ok: false,
|
|
705
|
+
exists: true,
|
|
706
|
+
path: filePath,
|
|
707
|
+
errors: [`Duplicate rule ids: ${uniqueStrings(duplicateIds).join(", ")}`],
|
|
708
|
+
rules: []
|
|
709
|
+
};
|
|
710
|
+
}
|
|
711
|
+
const rules = parsed.data.rules.map((rule) => {
|
|
712
|
+
const sanitizedText = sanitizeHistoricalText(rule.text);
|
|
713
|
+
return {
|
|
714
|
+
id: rule.id,
|
|
715
|
+
category: rule.category,
|
|
716
|
+
text: sanitizedText,
|
|
717
|
+
sanitizedText,
|
|
718
|
+
filePaths: uniqueStrings(rule.filePaths),
|
|
719
|
+
symbols: uniqueStrings(rule.symbols),
|
|
720
|
+
evidence: sanitizeEvidence(rule.evidence),
|
|
721
|
+
confidenceLevel: rule.confidenceLevel
|
|
722
|
+
};
|
|
723
|
+
});
|
|
724
|
+
return { ok: true, exists: true, path: filePath, errors: [], rules };
|
|
725
|
+
}
|
|
726
|
+
function validateTeamRulesFile(cwd) {
|
|
727
|
+
const loaded = loadTeamRulesFile(cwd);
|
|
728
|
+
if (!loaded.exists) {
|
|
729
|
+
return {
|
|
730
|
+
ok: false,
|
|
731
|
+
path: loaded.path,
|
|
732
|
+
errors: [`${TEAM_RULES_FILE} does not exist. Run anchor rules init.`],
|
|
733
|
+
rules: []
|
|
734
|
+
};
|
|
735
|
+
}
|
|
736
|
+
return {
|
|
737
|
+
ok: loaded.ok,
|
|
738
|
+
path: loaded.path,
|
|
739
|
+
errors: loaded.errors,
|
|
740
|
+
rules: loaded.rules
|
|
741
|
+
};
|
|
742
|
+
}
|
|
743
|
+
function addTeamRule(cwd, input) {
|
|
744
|
+
ensureTeamRulesFile(cwd);
|
|
745
|
+
const filePath = rulesPath(cwd);
|
|
746
|
+
const raw = JSON.parse(fs2.readFileSync(filePath, "utf8"));
|
|
747
|
+
const nextRule = {
|
|
748
|
+
id: input.id,
|
|
749
|
+
category: input.category,
|
|
750
|
+
text: input.text,
|
|
751
|
+
filePaths: input.filePaths ?? [],
|
|
752
|
+
symbols: input.symbols ?? [],
|
|
753
|
+
evidence: [
|
|
754
|
+
{
|
|
755
|
+
prNumber: input.prNumber,
|
|
756
|
+
prUrl: input.prUrl,
|
|
757
|
+
sourceType: input.sourceType ?? "pr_body"
|
|
758
|
+
}
|
|
759
|
+
],
|
|
760
|
+
confidenceLevel: "strong"
|
|
761
|
+
};
|
|
762
|
+
const next = { version: 1, rules: [...raw.rules ?? [], nextRule] };
|
|
763
|
+
fs2.writeFileSync(filePath, `${JSON.stringify(next, null, 2)}
|
|
764
|
+
`);
|
|
765
|
+
const validation = validateTeamRulesFile(cwd);
|
|
766
|
+
if (!validation.ok) {
|
|
767
|
+
throw new Error(`Invalid Anchor rule: ${validation.errors.join("; ")}`);
|
|
768
|
+
}
|
|
769
|
+
const rule = validation.rules.find((item) => item.id === input.id);
|
|
770
|
+
if (!rule) throw new Error(`Failed to add Anchor rule ${input.id}`);
|
|
771
|
+
return { path: filePath, rule };
|
|
772
|
+
}
|
|
773
|
+
function checkTeamRuleEvidence(cwd) {
|
|
774
|
+
const validation = validateTeamRulesFile(cwd);
|
|
775
|
+
if (!validation.ok) {
|
|
776
|
+
return {
|
|
777
|
+
ok: false,
|
|
778
|
+
path: validation.path,
|
|
779
|
+
checked: 0,
|
|
780
|
+
missing: [],
|
|
781
|
+
errors: validation.errors
|
|
782
|
+
};
|
|
783
|
+
}
|
|
784
|
+
const databasePath = defaultDatabasePath(detectGitRoot(cwd) ?? cwd);
|
|
785
|
+
if (!fs2.existsSync(databasePath)) {
|
|
786
|
+
return {
|
|
787
|
+
ok: false,
|
|
788
|
+
path: validation.path,
|
|
789
|
+
checked: 0,
|
|
790
|
+
missing: [],
|
|
791
|
+
errors: [`Anchor database not found at ${databasePath}. Run anchor index first.`]
|
|
792
|
+
};
|
|
793
|
+
}
|
|
794
|
+
const db = openAnchorDatabase(detectGitRoot(cwd) ?? cwd, databasePath);
|
|
795
|
+
try {
|
|
796
|
+
initializeSchema(db);
|
|
797
|
+
const missing = [];
|
|
798
|
+
let checked = 0;
|
|
799
|
+
for (const rule of validation.rules) {
|
|
800
|
+
for (const evidence of rule.evidence) {
|
|
801
|
+
checked += 1;
|
|
802
|
+
const row = db.prepare("SELECT 1 FROM pull_requests WHERE number = ? LIMIT 1").get(evidence.prNumber);
|
|
803
|
+
if (!row) missing.push({ ruleId: rule.id, prNumber: evidence.prNumber });
|
|
804
|
+
}
|
|
805
|
+
}
|
|
806
|
+
return {
|
|
807
|
+
ok: missing.length === 0,
|
|
808
|
+
path: validation.path,
|
|
809
|
+
checked,
|
|
810
|
+
missing,
|
|
811
|
+
errors: []
|
|
812
|
+
};
|
|
813
|
+
} finally {
|
|
814
|
+
db.close();
|
|
815
|
+
}
|
|
816
|
+
}
|
|
817
|
+
function pathMatch(rulePaths, queryFiles) {
|
|
818
|
+
if (rulePaths.length === 0 || queryFiles.length === 0) return 0;
|
|
819
|
+
let best = 0;
|
|
820
|
+
for (const rulePath of rulePaths) {
|
|
821
|
+
const ruleBase = path2.basename(rulePath).toLowerCase();
|
|
822
|
+
const ruleDir = path2.dirname(rulePath).toLowerCase();
|
|
823
|
+
for (const queryFile of queryFiles) {
|
|
824
|
+
const queryBase = path2.basename(queryFile).toLowerCase();
|
|
825
|
+
const queryDir = path2.dirname(queryFile).toLowerCase();
|
|
826
|
+
if (rulePath.toLowerCase() === queryFile.toLowerCase()) best = Math.max(best, 1);
|
|
827
|
+
else if (ruleBase === queryBase) best = Math.max(best, 0.72);
|
|
828
|
+
else if (ruleDir === queryDir) best = Math.max(best, 0.6);
|
|
829
|
+
else if (ruleDir.startsWith(queryDir) || queryDir.startsWith(ruleDir)) {
|
|
830
|
+
best = Math.max(best, 0.35);
|
|
831
|
+
}
|
|
832
|
+
}
|
|
833
|
+
}
|
|
834
|
+
return best;
|
|
835
|
+
}
|
|
836
|
+
function symbolMatch(rule, querySymbols) {
|
|
837
|
+
if (rule.symbols.length === 0 || querySymbols.length === 0) return 0;
|
|
838
|
+
const ruleSymbols = rule.symbols.map((symbol) => symbol.toLowerCase());
|
|
839
|
+
let best = 0;
|
|
840
|
+
for (const symbol of querySymbols) {
|
|
841
|
+
const lower = symbol.toLowerCase();
|
|
842
|
+
if (ruleSymbols.includes(lower)) best = Math.max(best, 1);
|
|
843
|
+
else if (ruleSymbols.some((candidate) => candidate.includes(lower) || lower.includes(candidate))) {
|
|
844
|
+
best = Math.max(best, 0.45);
|
|
845
|
+
}
|
|
846
|
+
}
|
|
847
|
+
return best;
|
|
848
|
+
}
|
|
849
|
+
function textMatch(rule, input) {
|
|
850
|
+
const tokens = tokenizeSearchText(
|
|
851
|
+
`${input.task} ${input.diff ?? ""} ${input.currentCode ?? ""}`,
|
|
852
|
+
32
|
|
853
|
+
);
|
|
854
|
+
if (tokens.length === 0) return 0;
|
|
855
|
+
const haystack = `${rule.sanitizedText} ${rule.filePaths.join(" ")} ${rule.symbols.join(" ")}`.toLowerCase();
|
|
856
|
+
return tokens.filter((token) => haystack.includes(token.toLowerCase())).length / tokens.length;
|
|
857
|
+
}
|
|
858
|
+
function confidenceScore(level) {
|
|
859
|
+
if (level === "strong") return 1;
|
|
860
|
+
if (level === "moderate") return 0.7;
|
|
861
|
+
return 0.4;
|
|
862
|
+
}
|
|
863
|
+
function confidenceReasons(rule) {
|
|
864
|
+
const firstEvidence = rule.evidence[0];
|
|
865
|
+
return [
|
|
866
|
+
"team-approved rule",
|
|
867
|
+
firstEvidence ? `${sourceTypeLabel(firstEvidence.sourceType)} evidence` : "source evidence",
|
|
868
|
+
...rule.filePaths.length > 0 ? ["file-associated"] : [],
|
|
869
|
+
...rule.symbols.length > 0 ? ["symbol-associated"] : []
|
|
870
|
+
];
|
|
871
|
+
}
|
|
872
|
+
function matchReasons(parts) {
|
|
873
|
+
const reasons = ["team-approved rule"];
|
|
874
|
+
if (parts.filePathMatch >= 0.9) reasons.push("exact file path match");
|
|
875
|
+
else if (parts.filePathMatch >= 0.45) reasons.push("related file path match");
|
|
876
|
+
if (parts.symbolMatch >= 0.9) reasons.push("exact symbol match");
|
|
877
|
+
else if (parts.symbolMatch >= 0.45) reasons.push("symbol-associated rule");
|
|
878
|
+
if (parts.textMatch >= 0.35) reasons.push("text matched task or diff terms");
|
|
879
|
+
return reasons.slice(0, 5);
|
|
880
|
+
}
|
|
881
|
+
function passesStrictMode(rule, input) {
|
|
882
|
+
if (!input.strict) return true;
|
|
883
|
+
if (rule.freshnessStatus === "stale") return false;
|
|
884
|
+
return confidenceAtLeast(rule.confidenceLevel, input.minConfidence ?? "strong");
|
|
885
|
+
}
|
|
886
|
+
function rankTeamRules(db, cwd, input) {
|
|
887
|
+
const loaded = loadTeamRulesFile(cwd);
|
|
888
|
+
if (!loaded.ok || loaded.rules.length === 0) return [];
|
|
889
|
+
const codeSnapshot = loadCurrentCodeSnapshot(db);
|
|
890
|
+
return loaded.rules.map((rule) => {
|
|
891
|
+
const freshness = evaluateFreshness(rule, codeSnapshot);
|
|
892
|
+
const parts = {
|
|
893
|
+
filePathMatch: pathMatch(rule.filePaths, input.files ?? []),
|
|
894
|
+
symbolMatch: symbolMatch(rule, input.symbols ?? []),
|
|
895
|
+
textMatch: textMatch(rule, input),
|
|
896
|
+
confidence: confidenceScore(rule.confidenceLevel)
|
|
897
|
+
};
|
|
898
|
+
const score = 1 + 0.35 * parts.filePathMatch + 0.25 * parts.symbolMatch + 0.25 * parts.textMatch + 0.15 * parts.confidence;
|
|
899
|
+
return {
|
|
900
|
+
...rule,
|
|
901
|
+
score: Number(score.toFixed(4)),
|
|
902
|
+
freshnessStatus: freshness.status,
|
|
903
|
+
freshnessReason: freshness.reason,
|
|
904
|
+
confidenceReasons: confidenceReasons(rule),
|
|
905
|
+
matchReasons: matchReasons(parts),
|
|
906
|
+
rankSignals: parts
|
|
907
|
+
};
|
|
908
|
+
}).filter((rule) => passesStrictMode(rule, input)).sort((a, b) => b.score - a.score).slice(0, 4);
|
|
909
|
+
}
|
|
910
|
+
function countValidTeamRules(cwd) {
|
|
911
|
+
const loaded = loadTeamRulesFile(cwd);
|
|
912
|
+
if (!loaded.exists || !loaded.ok) return { count: 0 };
|
|
913
|
+
const stat = fs2.statSync(loaded.path);
|
|
914
|
+
return { count: loaded.rules.length, lastRuleIndexTime: stat.mtime.toISOString() };
|
|
915
|
+
}
|
|
916
|
+
|
|
917
|
+
// src/indexer/test-awareness.ts
|
|
918
|
+
import path3 from "path";
|
|
919
|
+
function normalizePath(filePath) {
|
|
920
|
+
return filePath.replace(/\\/g, "/").replace(/^\.\/+/, "");
|
|
921
|
+
}
|
|
922
|
+
function pathSegments(filePath) {
|
|
923
|
+
return normalizePath(filePath).split("/").filter(Boolean);
|
|
924
|
+
}
|
|
925
|
+
function basenameWithoutExtensions(filePath) {
|
|
926
|
+
const base = path3.posix.basename(normalizePath(filePath));
|
|
927
|
+
return base.replace(/\.(test|spec)\.[^.]+$/i, "").replace(/\.[^.]+$/i, "");
|
|
928
|
+
}
|
|
929
|
+
function sourceLikeDir(filePath) {
|
|
930
|
+
const segments = pathSegments(path3.posix.dirname(normalizePath(filePath)));
|
|
931
|
+
return segments.filter((segment) => !["__tests__", "test", "tests", "spec"].includes(segment));
|
|
932
|
+
}
|
|
933
|
+
function isTestFilePath(filePath) {
|
|
934
|
+
const normalized = normalizePath(filePath);
|
|
935
|
+
const segments = pathSegments(normalized).map((segment) => segment.toLowerCase());
|
|
936
|
+
const base = path3.posix.basename(normalized).toLowerCase();
|
|
937
|
+
return /\.(test|spec)\.[^.]+$/i.test(base) || segments.includes("__tests__") || segments.includes("test") || segments.includes("tests") || segments.includes("spec");
|
|
938
|
+
}
|
|
939
|
+
function testRecord(file) {
|
|
940
|
+
return {
|
|
941
|
+
repo: file.repo,
|
|
942
|
+
path: file.path,
|
|
943
|
+
language: file.language,
|
|
944
|
+
sizeBytes: file.sizeBytes,
|
|
945
|
+
contentHash: file.contentHash,
|
|
946
|
+
updatedAt: file.updatedAt
|
|
947
|
+
};
|
|
948
|
+
}
|
|
949
|
+
function strengthFor(reason) {
|
|
950
|
+
if (reason === "same basename") return 1;
|
|
951
|
+
if (reason === "imported source path") return 0.9;
|
|
952
|
+
if (reason === "same directory") return 0.7;
|
|
953
|
+
return 0.5;
|
|
954
|
+
}
|
|
955
|
+
function pathMentionedInTest(testPath, sourcePath, chunksByFile) {
|
|
956
|
+
const text = (chunksByFile.get(testPath) ?? []).map((chunk) => chunk.sanitizedText).join("\n");
|
|
957
|
+
if (!text) return false;
|
|
958
|
+
const sourceNoExt = sourcePath.replace(/\.[^.]+$/i, "");
|
|
959
|
+
const sourceBase = basenameWithoutExtensions(sourcePath);
|
|
960
|
+
return text.includes(sourcePath) || text.includes(sourceNoExt) || new RegExp(`from\\s+["'][^"']*${escapeRegExp(sourceBase)}["']`, "i").test(text) || new RegExp(`require\\(["'][^"']*${escapeRegExp(sourceBase)}["']\\)`, "i").test(text);
|
|
961
|
+
}
|
|
962
|
+
function escapeRegExp(value) {
|
|
963
|
+
return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
964
|
+
}
|
|
965
|
+
function inferTestAwareness(repo, codeFiles, codeChunks) {
|
|
966
|
+
const testFiles = codeFiles.filter((file) => isTestFilePath(file.path));
|
|
967
|
+
const sourceFiles = codeFiles.filter((file) => !isTestFilePath(file.path));
|
|
968
|
+
const chunksByFile = /* @__PURE__ */ new Map();
|
|
969
|
+
for (const chunk of codeChunks) {
|
|
970
|
+
const chunks = chunksByFile.get(chunk.filePath) ?? [];
|
|
971
|
+
chunks.push(chunk);
|
|
972
|
+
chunksByFile.set(chunk.filePath, chunks);
|
|
973
|
+
}
|
|
974
|
+
const linkMap = /* @__PURE__ */ new Map();
|
|
975
|
+
const addLink = (sourcePath, testPath, reason) => {
|
|
976
|
+
const key = `${sourcePath}\0${testPath}\0${reason}`;
|
|
977
|
+
linkMap.set(key, {
|
|
978
|
+
repo,
|
|
979
|
+
sourcePath,
|
|
980
|
+
testPath,
|
|
981
|
+
reason,
|
|
982
|
+
strength: strengthFor(reason)
|
|
983
|
+
});
|
|
984
|
+
};
|
|
985
|
+
for (const test of testFiles) {
|
|
986
|
+
const testBase = basenameWithoutExtensions(test.path).toLowerCase();
|
|
987
|
+
const testDir = sourceLikeDir(test.path).join("/");
|
|
988
|
+
for (const source of sourceFiles) {
|
|
989
|
+
const sourceBase = basenameWithoutExtensions(source.path).toLowerCase();
|
|
990
|
+
const sourceDir = sourceLikeDir(source.path).join("/");
|
|
991
|
+
if (testBase === sourceBase) addLink(source.path, test.path, "same basename");
|
|
992
|
+
else if (testDir && sourceDir && testDir === sourceDir) {
|
|
993
|
+
addLink(source.path, test.path, "same directory");
|
|
994
|
+
}
|
|
995
|
+
if (pathMentionedInTest(test.path, source.path, chunksByFile)) {
|
|
996
|
+
addLink(source.path, test.path, "imported source path");
|
|
997
|
+
}
|
|
998
|
+
}
|
|
999
|
+
}
|
|
1000
|
+
const dedupedTests = testFiles.map(testRecord);
|
|
1001
|
+
return {
|
|
1002
|
+
testFiles: dedupedTests,
|
|
1003
|
+
testLinks: uniqueStrings([...linkMap.keys()]).map((key) => linkMap.get(key))
|
|
1004
|
+
};
|
|
1005
|
+
}
|
|
1006
|
+
|
|
395
1007
|
// src/db/database.ts
|
|
396
1008
|
function defaultDatabasePath(cwd) {
|
|
397
|
-
return
|
|
1009
|
+
return path4.join(cwd, ".anchor", "index.sqlite");
|
|
398
1010
|
}
|
|
399
1011
|
function openAnchorDatabase(cwd, databasePath = defaultDatabasePath(cwd)) {
|
|
400
|
-
|
|
1012
|
+
fs3.mkdirSync(path4.dirname(databasePath), { recursive: true });
|
|
401
1013
|
const db = new Database(databasePath);
|
|
402
1014
|
db.pragma("journal_mode = WAL");
|
|
403
1015
|
db.pragma("foreign_keys = ON");
|
|
@@ -405,6 +1017,15 @@ function openAnchorDatabase(cwd, databasePath = defaultDatabasePath(cwd)) {
|
|
|
405
1017
|
}
|
|
406
1018
|
function initializeSchema(db) {
|
|
407
1019
|
db.exec(SCHEMA_SQL);
|
|
1020
|
+
ensureColumn(db, "sync_state", "history_coverage", "TEXT");
|
|
1021
|
+
ensureColumn(db, "sync_state", "history_limit", "INTEGER");
|
|
1022
|
+
ensureColumn(db, "sync_state", "history_since", "TEXT");
|
|
1023
|
+
}
|
|
1024
|
+
function ensureColumn(db, tableName, columnName, definition) {
|
|
1025
|
+
const columns = db.prepare(`PRAGMA table_info(${tableName})`).all();
|
|
1026
|
+
if (!columns.some((column) => column.name === columnName)) {
|
|
1027
|
+
db.exec(`ALTER TABLE ${tableName} ADD COLUMN ${columnName} ${definition}`);
|
|
1028
|
+
}
|
|
408
1029
|
}
|
|
409
1030
|
function checkSchema(db) {
|
|
410
1031
|
try {
|
|
@@ -412,7 +1033,9 @@ function checkSchema(db) {
|
|
|
412
1033
|
const codeTables = db.prepare("SELECT name FROM sqlite_master WHERE type IN ('table', 'virtual') AND name = ?").all("code_chunks_fts");
|
|
413
1034
|
const wisdom = db.prepare("SELECT name FROM sqlite_master WHERE name = ?").all("wisdom_units");
|
|
414
1035
|
const code = db.prepare("SELECT name FROM sqlite_master WHERE name = ?").all("code_chunks");
|
|
415
|
-
|
|
1036
|
+
const tests = db.prepare("SELECT name FROM sqlite_master WHERE name = ?").all("test_files");
|
|
1037
|
+
const regressions = db.prepare("SELECT name FROM sqlite_master WHERE name = ?").all("regression_events");
|
|
1038
|
+
return tables.length > 0 && wisdom.length > 0 && codeTables.length > 0 && code.length > 0 && tests.length > 0 && regressions.length > 0;
|
|
416
1039
|
} catch {
|
|
417
1040
|
return false;
|
|
418
1041
|
}
|
|
@@ -432,29 +1055,42 @@ function getLastSyncTime(db, repo) {
|
|
|
432
1055
|
const row = db.prepare("SELECT last_sync_at FROM sync_state WHERE repo = ?").get(repo);
|
|
433
1056
|
return row?.last_sync_at ?? void 0;
|
|
434
1057
|
}
|
|
435
|
-
function updateSyncState(db, repo, lastIndexedPr) {
|
|
1058
|
+
function updateSyncState(db, repo, lastIndexedPr, metadata = {}) {
|
|
436
1059
|
const now = (/* @__PURE__ */ new Date()).toISOString();
|
|
437
1060
|
db.prepare(
|
|
438
|
-
`INSERT INTO sync_state
|
|
439
|
-
|
|
1061
|
+
`INSERT INTO sync_state
|
|
1062
|
+
(repo, last_sync_at, last_indexed_pr, history_coverage, history_limit, history_since, updated_at)
|
|
1063
|
+
VALUES (?, ?, ?, ?, ?, ?, ?)
|
|
440
1064
|
ON CONFLICT(repo) DO UPDATE SET
|
|
441
1065
|
last_sync_at = excluded.last_sync_at,
|
|
442
1066
|
last_indexed_pr = excluded.last_indexed_pr,
|
|
1067
|
+
history_coverage = excluded.history_coverage,
|
|
1068
|
+
history_limit = excluded.history_limit,
|
|
1069
|
+
history_since = excluded.history_since,
|
|
443
1070
|
updated_at = excluded.updated_at`
|
|
444
|
-
).run(
|
|
1071
|
+
).run(
|
|
1072
|
+
repo,
|
|
1073
|
+
now,
|
|
1074
|
+
lastIndexedPr ?? null,
|
|
1075
|
+
metadata.historyCoverage ?? "unknown",
|
|
1076
|
+
metadata.historyLimit ?? null,
|
|
1077
|
+
metadata.historySince ?? null,
|
|
1078
|
+
now
|
|
1079
|
+
);
|
|
445
1080
|
}
|
|
446
1081
|
function deleteExistingPrData(db, prId) {
|
|
447
1082
|
const unitRows = db.prepare("SELECT id FROM wisdom_units WHERE pr_id = ?").all(prId);
|
|
448
1083
|
const deleteFts = db.prepare("DELETE FROM wisdom_units_fts WHERE unitId = ?");
|
|
449
1084
|
for (const row of unitRows) deleteFts.run(row.id);
|
|
1085
|
+
db.prepare("DELETE FROM regression_events WHERE pr_id = ?").run(prId);
|
|
450
1086
|
db.prepare("DELETE FROM wisdom_units WHERE pr_id = ?").run(prId);
|
|
451
1087
|
db.prepare("DELETE FROM pr_comments WHERE pr_id = ?").run(prId);
|
|
452
1088
|
db.prepare("DELETE FROM pr_files WHERE pr_id = ?").run(prId);
|
|
453
1089
|
}
|
|
454
|
-
function upsertPullRequest(db, pr, wisdomUnits) {
|
|
1090
|
+
function upsertPullRequest(db, pr, wisdomUnits, regressionEvents = []) {
|
|
455
1091
|
const repoId = ensureRepository(db, pr.repo);
|
|
456
1092
|
const author = pr.user?.login ?? "unknown";
|
|
457
|
-
const
|
|
1093
|
+
const labels2 = (pr.labels ?? []).map((label) => typeof label === "string" ? label : label.name).filter(Boolean);
|
|
458
1094
|
const titleText = redactedHistoricalText(pr.title);
|
|
459
1095
|
const bodyText = redactedHistoricalText(pr.body ?? "");
|
|
460
1096
|
const bodySanitized = sanitizeHistoricalText(pr.body ?? "");
|
|
@@ -481,7 +1117,7 @@ function upsertPullRequest(db, pr, wisdomUnits) {
|
|
|
481
1117
|
bodyText,
|
|
482
1118
|
bodySanitized,
|
|
483
1119
|
author,
|
|
484
|
-
JSON.stringify(
|
|
1120
|
+
JSON.stringify(labels2),
|
|
485
1121
|
pr.created_at,
|
|
486
1122
|
pr.merged_at ?? null,
|
|
487
1123
|
pr.updated_at ?? null
|
|
@@ -501,6 +1137,7 @@ function upsertPullRequest(db, pr, wisdomUnits) {
|
|
|
501
1137
|
file.patch ? sanitizeHistoricalText(file.patch) : null
|
|
502
1138
|
);
|
|
503
1139
|
}
|
|
1140
|
+
insertPrCochangeTestLinks(db, repoId, pr.files.map((file) => file.filename));
|
|
504
1141
|
const insertComment = db.prepare(
|
|
505
1142
|
`INSERT INTO pr_comments
|
|
506
1143
|
(pr_id, source_type, author, body_text, sanitized_text, file_path, created_at, is_reviewer)
|
|
@@ -584,21 +1221,56 @@ function upsertPullRequest(db, pr, wisdomUnits) {
|
|
|
584
1221
|
unit.category
|
|
585
1222
|
);
|
|
586
1223
|
}
|
|
1224
|
+
const insertRegression = db.prepare(
|
|
1225
|
+
`INSERT INTO regression_events
|
|
1226
|
+
(id, repo_id, pr_id, repo, pr_number, pr_url, summary_sanitized, file_paths_json,
|
|
1227
|
+
symbols_json, test_paths_json, authors_json, labels_json, signals_json, created_at,
|
|
1228
|
+
merged_at, confidence)
|
|
1229
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`
|
|
1230
|
+
);
|
|
1231
|
+
for (const event of regressionEvents) {
|
|
1232
|
+
insertRegression.run(
|
|
1233
|
+
event.id,
|
|
1234
|
+
repoId,
|
|
1235
|
+
prRow.id,
|
|
1236
|
+
event.repo,
|
|
1237
|
+
event.prNumber,
|
|
1238
|
+
event.prUrl,
|
|
1239
|
+
event.summary,
|
|
1240
|
+
JSON.stringify(event.filePaths),
|
|
1241
|
+
JSON.stringify(event.symbols),
|
|
1242
|
+
JSON.stringify(event.testPaths),
|
|
1243
|
+
JSON.stringify(event.authors),
|
|
1244
|
+
JSON.stringify(event.labels),
|
|
1245
|
+
JSON.stringify(event.signals),
|
|
1246
|
+
event.createdAt,
|
|
1247
|
+
event.mergedAt ?? null,
|
|
1248
|
+
event.confidence
|
|
1249
|
+
);
|
|
1250
|
+
}
|
|
587
1251
|
});
|
|
588
1252
|
transaction();
|
|
589
1253
|
const comments = (pr.reviews?.length ?? 0) + (pr.reviewComments?.length ?? 0) + (pr.issueComments?.length ?? 0);
|
|
590
|
-
return {
|
|
1254
|
+
return {
|
|
1255
|
+
files: pr.files.length,
|
|
1256
|
+
comments,
|
|
1257
|
+
wisdom: wisdomUnits.length,
|
|
1258
|
+
regressions: regressionEvents.length
|
|
1259
|
+
};
|
|
591
1260
|
}
|
|
592
1261
|
function replaceCodeIndex(db, repo, codeFiles, codeChunks, skippedFiles, cwd) {
|
|
593
1262
|
initializeSchema(db);
|
|
594
1263
|
const repoId = ensureRepository(db, repo);
|
|
595
1264
|
const now = (/* @__PURE__ */ new Date()).toISOString();
|
|
1265
|
+
const testAwareness = inferTestAwareness(repo, codeFiles, codeChunks);
|
|
596
1266
|
const transaction = db.transaction(() => {
|
|
597
1267
|
const existingChunks = db.prepare("SELECT id FROM code_chunks WHERE repo_id = ?").all(repoId);
|
|
598
1268
|
const deleteFts = db.prepare("DELETE FROM code_chunks_fts WHERE chunkId = ?");
|
|
599
1269
|
for (const row of existingChunks) deleteFts.run(row.id);
|
|
600
1270
|
db.prepare("DELETE FROM code_chunks WHERE repo_id = ?").run(repoId);
|
|
601
1271
|
db.prepare("DELETE FROM code_files WHERE repo_id = ?").run(repoId);
|
|
1272
|
+
db.prepare("DELETE FROM test_links WHERE repo_id = ? AND reason != 'PR co-change'").run(repoId);
|
|
1273
|
+
db.prepare("DELETE FROM test_files WHERE repo_id = ?").run(repoId);
|
|
602
1274
|
const insertFile = db.prepare(
|
|
603
1275
|
`INSERT INTO code_files
|
|
604
1276
|
(repo_id, path, language, size_bytes, content_hash, updated_at)
|
|
@@ -652,6 +1324,7 @@ function replaceCodeIndex(db, repo, codeFiles, codeChunks, skippedFiles, cwd) {
|
|
|
652
1324
|
chunk.language ?? ""
|
|
653
1325
|
);
|
|
654
1326
|
}
|
|
1327
|
+
insertTestAwareness(db, repoId, testAwareness.testFiles, testAwareness.testLinks);
|
|
655
1328
|
db.prepare(
|
|
656
1329
|
`INSERT INTO code_index_state (repo, last_indexed_at, indexed_files, code_chunks, skipped_files)
|
|
657
1330
|
VALUES (?, ?, ?, ?, ?)
|
|
@@ -666,12 +1339,75 @@ function replaceCodeIndex(db, repo, codeFiles, codeChunks, skippedFiles, cwd) {
|
|
|
666
1339
|
return {
|
|
667
1340
|
indexedFiles: codeFiles.length,
|
|
668
1341
|
codeChunksCreated: codeChunks.length,
|
|
1342
|
+
testFilesIndexed: testAwareness.testFiles.length,
|
|
1343
|
+
testLinksCreated: testAwareness.testLinks.length,
|
|
669
1344
|
skippedFiles,
|
|
670
1345
|
databasePath: defaultDatabasePath(cwd)
|
|
671
1346
|
};
|
|
672
1347
|
}
|
|
1348
|
+
function insertPrCochangeTestLinks(db, repoId, filePaths) {
|
|
1349
|
+
const testPaths = filePaths.filter(isTestFilePath);
|
|
1350
|
+
const sourcePaths = filePaths.filter((filePath) => !isTestFilePath(filePath));
|
|
1351
|
+
if (testPaths.length === 0 || sourcePaths.length === 0) return;
|
|
1352
|
+
const insert = db.prepare(
|
|
1353
|
+
`INSERT INTO test_links (repo_id, source_path, test_path, reason, strength)
|
|
1354
|
+
VALUES (?, ?, ?, 'PR co-change', 0.75)
|
|
1355
|
+
ON CONFLICT(repo_id, source_path, test_path, reason) DO UPDATE SET strength = excluded.strength`
|
|
1356
|
+
);
|
|
1357
|
+
for (const sourcePath of sourcePaths) {
|
|
1358
|
+
for (const testPath of testPaths) insert.run(repoId, sourcePath, testPath);
|
|
1359
|
+
}
|
|
1360
|
+
}
|
|
1361
|
+
function insertTestAwareness(db, repoId, testFiles, testLinks) {
|
|
1362
|
+
const insertTestFile = db.prepare(
|
|
1363
|
+
`INSERT INTO test_files
|
|
1364
|
+
(repo_id, path, language, size_bytes, content_hash, updated_at)
|
|
1365
|
+
VALUES (?, ?, ?, ?, ?, ?)`
|
|
1366
|
+
);
|
|
1367
|
+
for (const file of testFiles) {
|
|
1368
|
+
insertTestFile.run(
|
|
1369
|
+
repoId,
|
|
1370
|
+
file.path,
|
|
1371
|
+
file.language ?? null,
|
|
1372
|
+
file.sizeBytes,
|
|
1373
|
+
file.contentHash,
|
|
1374
|
+
file.updatedAt
|
|
1375
|
+
);
|
|
1376
|
+
}
|
|
1377
|
+
const insertTestLink = db.prepare(
|
|
1378
|
+
`INSERT INTO test_links (repo_id, source_path, test_path, reason, strength)
|
|
1379
|
+
VALUES (?, ?, ?, ?, ?)`
|
|
1380
|
+
);
|
|
1381
|
+
for (const link of testLinks) {
|
|
1382
|
+
insertTestLink.run(repoId, link.sourcePath, link.testPath, link.reason, link.strength);
|
|
1383
|
+
}
|
|
1384
|
+
}
|
|
1385
|
+
function recordIndexRun(db, run) {
|
|
1386
|
+
initializeSchema(db);
|
|
1387
|
+
db.prepare(
|
|
1388
|
+
`INSERT INTO index_runs
|
|
1389
|
+
(command, repo, started_at, finished_at, history_coverage, history_limit, prs_fetched,
|
|
1390
|
+
prs_skipped, comments_indexed, code_files_indexed, test_files_indexed, failures_json, status)
|
|
1391
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`
|
|
1392
|
+
).run(
|
|
1393
|
+
run.command,
|
|
1394
|
+
run.repo ?? null,
|
|
1395
|
+
run.startedAt,
|
|
1396
|
+
run.finishedAt ?? (/* @__PURE__ */ new Date()).toISOString(),
|
|
1397
|
+
run.historyCoverage ?? null,
|
|
1398
|
+
run.historyLimit ?? null,
|
|
1399
|
+
run.prsFetched ?? null,
|
|
1400
|
+
run.prsSkipped ?? null,
|
|
1401
|
+
run.commentsIndexed ?? null,
|
|
1402
|
+
run.codeFilesIndexed ?? null,
|
|
1403
|
+
run.testFilesIndexed ?? null,
|
|
1404
|
+
JSON.stringify(run.failures ?? []),
|
|
1405
|
+
run.status
|
|
1406
|
+
);
|
|
1407
|
+
}
|
|
673
1408
|
function getIndexStatus(cwd, githubTokenConfigured = Boolean(resolveGitHubToken({ cwd }).token), databasePath = defaultDatabasePath(cwd)) {
|
|
674
|
-
if (!
|
|
1409
|
+
if (!fs3.existsSync(databasePath)) {
|
|
1410
|
+
const rules = countValidTeamRules(cwd);
|
|
675
1411
|
return {
|
|
676
1412
|
databasePath,
|
|
677
1413
|
prCount: 0,
|
|
@@ -680,6 +1416,13 @@ function getIndexStatus(cwd, githubTokenConfigured = Boolean(resolveGitHubToken(
|
|
|
680
1416
|
wisdomUnitCount: 0,
|
|
681
1417
|
codeFileCount: 0,
|
|
682
1418
|
codeChunkCount: 0,
|
|
1419
|
+
testFileCount: 0,
|
|
1420
|
+
testLinkCount: 0,
|
|
1421
|
+
regressionEventCount: 0,
|
|
1422
|
+
historyCoverage: "unknown",
|
|
1423
|
+
staleEvidenceCount: 0,
|
|
1424
|
+
teamRuleCount: rules.count,
|
|
1425
|
+
lastRuleIndexTime: rules.lastRuleIndexTime,
|
|
683
1426
|
githubTokenConfigured,
|
|
684
1427
|
health: "missing_database"
|
|
685
1428
|
};
|
|
@@ -688,6 +1431,7 @@ function getIndexStatus(cwd, githubTokenConfigured = Boolean(resolveGitHubToken(
|
|
|
688
1431
|
try {
|
|
689
1432
|
initializeSchema(db);
|
|
690
1433
|
if (!checkSchema(db)) {
|
|
1434
|
+
const rules2 = countValidTeamRules(cwd);
|
|
691
1435
|
return {
|
|
692
1436
|
databasePath,
|
|
693
1437
|
prCount: 0,
|
|
@@ -696,16 +1440,33 @@ function getIndexStatus(cwd, githubTokenConfigured = Boolean(resolveGitHubToken(
|
|
|
696
1440
|
wisdomUnitCount: 0,
|
|
697
1441
|
codeFileCount: 0,
|
|
698
1442
|
codeChunkCount: 0,
|
|
1443
|
+
testFileCount: 0,
|
|
1444
|
+
testLinkCount: 0,
|
|
1445
|
+
regressionEventCount: 0,
|
|
1446
|
+
historyCoverage: "unknown",
|
|
1447
|
+
staleEvidenceCount: 0,
|
|
1448
|
+
teamRuleCount: rules2.count,
|
|
1449
|
+
lastRuleIndexTime: rules2.lastRuleIndexTime,
|
|
699
1450
|
githubTokenConfigured,
|
|
700
1451
|
health: "schema_invalid"
|
|
701
1452
|
};
|
|
702
1453
|
}
|
|
703
1454
|
const count = (table) => db.prepare(`SELECT COUNT(*) AS count FROM ${table}`).get().count;
|
|
704
1455
|
const repoRow = db.prepare("SELECT full_name FROM repositories ORDER BY id LIMIT 1").get();
|
|
705
|
-
const syncRow = db.prepare(
|
|
1456
|
+
const syncRow = db.prepare(
|
|
1457
|
+
"SELECT last_sync_at, history_coverage, history_limit FROM sync_state ORDER BY updated_at DESC LIMIT 1"
|
|
1458
|
+
).get();
|
|
706
1459
|
const codeIndexRow = db.prepare("SELECT last_indexed_at FROM code_index_state ORDER BY last_indexed_at DESC LIMIT 1").get();
|
|
707
1460
|
const wisdomUnitCount = count("wisdom_units");
|
|
708
1461
|
const codeChunkCount = count("code_chunks");
|
|
1462
|
+
const lastSuccessfulRun = db.prepare(
|
|
1463
|
+
"SELECT finished_at, failures_json FROM index_runs WHERE status = 'success' ORDER BY finished_at DESC LIMIT 1"
|
|
1464
|
+
).get();
|
|
1465
|
+
const lastFailedRun = db.prepare(
|
|
1466
|
+
"SELECT finished_at, failures_json FROM index_runs WHERE status = 'failed' ORDER BY finished_at DESC LIMIT 1"
|
|
1467
|
+
).get();
|
|
1468
|
+
const staleCodeIndex = isCodeIndexStale(codeIndexRow?.last_indexed_at ?? void 0);
|
|
1469
|
+
const rules = countValidTeamRules(cwd);
|
|
709
1470
|
return {
|
|
710
1471
|
repo: repoRow?.full_name,
|
|
711
1472
|
databasePath,
|
|
@@ -715,8 +1476,26 @@ function getIndexStatus(cwd, githubTokenConfigured = Boolean(resolveGitHubToken(
|
|
|
715
1476
|
wisdomUnitCount,
|
|
716
1477
|
codeFileCount: count("code_files"),
|
|
717
1478
|
codeChunkCount,
|
|
1479
|
+
testFileCount: count("test_files"),
|
|
1480
|
+
testLinkCount: count("test_links"),
|
|
1481
|
+
regressionEventCount: count("regression_events"),
|
|
1482
|
+
historyCoverage: syncRow?.history_coverage ?? "unknown",
|
|
1483
|
+
historyLimit: syncRow?.history_limit ?? void 0,
|
|
1484
|
+
staleEvidenceCount: countStaleEvidence(db),
|
|
1485
|
+
teamRuleCount: rules.count,
|
|
718
1486
|
lastSyncTime: syncRow?.last_sync_at ?? void 0,
|
|
719
1487
|
lastCodeIndexTime: codeIndexRow?.last_indexed_at ?? void 0,
|
|
1488
|
+
lastRuleIndexTime: rules.lastRuleIndexTime,
|
|
1489
|
+
lastSuccessfulRun: lastSuccessfulRun?.finished_at ?? void 0,
|
|
1490
|
+
lastFailedRun: lastFailedRun?.finished_at ?? void 0,
|
|
1491
|
+
staleCodeIndex,
|
|
1492
|
+
suggestedNextCommand: suggestedNextCommand({
|
|
1493
|
+
prCount: count("pull_requests"),
|
|
1494
|
+
wisdomUnitCount,
|
|
1495
|
+
codeChunkCount,
|
|
1496
|
+
staleCodeIndex,
|
|
1497
|
+
historyCoverage: syncRow?.history_coverage ?? "unknown"
|
|
1498
|
+
}),
|
|
720
1499
|
githubTokenConfigured,
|
|
721
1500
|
health: wisdomUnitCount > 0 || codeChunkCount > 0 ? "ok" : "empty_index"
|
|
722
1501
|
};
|
|
@@ -724,6 +1503,39 @@ function getIndexStatus(cwd, githubTokenConfigured = Boolean(resolveGitHubToken(
|
|
|
724
1503
|
db.close();
|
|
725
1504
|
}
|
|
726
1505
|
}
|
|
1506
|
+
function isCodeIndexStale(lastIndexedAt) {
|
|
1507
|
+
if (!lastIndexedAt) return true;
|
|
1508
|
+
const timestamp = Date.parse(lastIndexedAt);
|
|
1509
|
+
if (Number.isNaN(timestamp)) return true;
|
|
1510
|
+
return Date.now() - timestamp > 1e3 * 60 * 60 * 24 * 7;
|
|
1511
|
+
}
|
|
1512
|
+
function suggestedNextCommand(input) {
|
|
1513
|
+
if (input.prCount === 0 && input.wisdomUnitCount === 0) return "anchor index";
|
|
1514
|
+
if (input.codeChunkCount === 0 || input.staleCodeIndex) return "anchor index-code";
|
|
1515
|
+
if (input.historyCoverage !== "all") return "anchor index-all";
|
|
1516
|
+
return void 0;
|
|
1517
|
+
}
|
|
1518
|
+
function countStaleEvidence(db) {
|
|
1519
|
+
const codeFiles = new Set(
|
|
1520
|
+
db.prepare("SELECT path FROM code_files").all().map(
|
|
1521
|
+
(row) => row.path
|
|
1522
|
+
)
|
|
1523
|
+
);
|
|
1524
|
+
if (codeFiles.size === 0) return 0;
|
|
1525
|
+
const rows = db.prepare("SELECT file_paths_json FROM wisdom_units").all();
|
|
1526
|
+
let stale = 0;
|
|
1527
|
+
for (const row of rows) {
|
|
1528
|
+
let paths = [];
|
|
1529
|
+
try {
|
|
1530
|
+
const parsed = JSON.parse(row.file_paths_json);
|
|
1531
|
+
paths = Array.isArray(parsed) ? parsed.filter((item) => typeof item === "string") : [];
|
|
1532
|
+
} catch {
|
|
1533
|
+
paths = [];
|
|
1534
|
+
}
|
|
1535
|
+
if (paths.length > 0 && !paths.some((filePath) => codeFiles.has(filePath))) stale += 1;
|
|
1536
|
+
}
|
|
1537
|
+
return stale;
|
|
1538
|
+
}
|
|
727
1539
|
|
|
728
1540
|
// src/indexer/chunker.ts
|
|
729
1541
|
var HIGH_SIGNAL_PATTERN = /\b(because|we intentionally|do not|don't|must|should not|avoid|rejected|regression|breaking|contract|invariant|performance|security|secret|token|migration|compatibility|lazy|eager|thread-safe|race|deadlock|deprecated|backward compatible|do not change|this broke|root cause|architecture decision)\b/i;
|
|
@@ -757,7 +1569,7 @@ function chunkHistoricalText(text, maxChunkLength = 700) {
|
|
|
757
1569
|
|
|
758
1570
|
// src/indexer/code-chunker.ts
|
|
759
1571
|
import crypto from "crypto";
|
|
760
|
-
import
|
|
1572
|
+
import path5 from "path";
|
|
761
1573
|
var DEFAULT_CHUNK_LINES = 80;
|
|
762
1574
|
var DEFAULT_OVERLAP_LINES = 8;
|
|
763
1575
|
var FUNCTION_CALL_STOP_WORDS = /* @__PURE__ */ new Set([
|
|
@@ -790,7 +1602,7 @@ function extractCodeSymbols(text, filePath) {
|
|
|
790
1602
|
const candidate = match[1] ?? "";
|
|
791
1603
|
if (!FUNCTION_CALL_STOP_WORDS.has(candidate)) symbols.push(candidate);
|
|
792
1604
|
}
|
|
793
|
-
const basename =
|
|
1605
|
+
const basename = path5.basename(filePath).replace(/\.[^.]+$/, "");
|
|
794
1606
|
if (/^[A-Za-z_$][\w$-]*$/.test(basename)) symbols.push(basename);
|
|
795
1607
|
return uniqueStrings(symbols).slice(0, 40);
|
|
796
1608
|
}
|
|
@@ -829,8 +1641,8 @@ function chunkCodeFile(file, options = {}) {
|
|
|
829
1641
|
// src/indexer/code-file-discovery.ts
|
|
830
1642
|
import { execFileSync as execFileSync3 } from "child_process";
|
|
831
1643
|
import crypto2 from "crypto";
|
|
832
|
-
import
|
|
833
|
-
import
|
|
1644
|
+
import fs4 from "fs";
|
|
1645
|
+
import path6 from "path";
|
|
834
1646
|
var DEFAULT_MAX_CODE_FILE_BYTES = 512 * 1024;
|
|
835
1647
|
var HARD_EXCLUDED_SEGMENTS = /* @__PURE__ */ new Set([
|
|
836
1648
|
".git",
|
|
@@ -878,7 +1690,7 @@ function isHardExcludedCodePath(filePath) {
|
|
|
878
1690
|
const normalized = normalizeGitPath(filePath);
|
|
879
1691
|
const segments = normalized.split("/");
|
|
880
1692
|
if (segments.some((segment) => HARD_EXCLUDED_SEGMENTS.has(segment))) return true;
|
|
881
|
-
const basename =
|
|
1693
|
+
const basename = path6.posix.basename(normalized).toLowerCase();
|
|
882
1694
|
if ([".netrc", ".npmrc", ".pypirc", ".yarnrc"].includes(basename)) return true;
|
|
883
1695
|
if (basename === ".env" || basename.startsWith(".env.")) return true;
|
|
884
1696
|
if (basename === "id_rsa" || basename === "id_rsa.pub" || basename === "id_dsa" || basename === "id_ecdsa" || basename === "id_ed25519") {
|
|
@@ -888,7 +1700,7 @@ function isHardExcludedCodePath(filePath) {
|
|
|
888
1700
|
return false;
|
|
889
1701
|
}
|
|
890
1702
|
function languageForPath(filePath) {
|
|
891
|
-
const extension =
|
|
1703
|
+
const extension = path6.extname(filePath).toLowerCase();
|
|
892
1704
|
return LANGUAGE_BY_EXTENSION[extension];
|
|
893
1705
|
}
|
|
894
1706
|
function isProbablyBinary(buffer) {
|
|
@@ -911,7 +1723,7 @@ function discoverGitFiles(cwd) {
|
|
|
911
1723
|
}
|
|
912
1724
|
function discoverCodeFiles(cwd, repo, options = {}) {
|
|
913
1725
|
const maxFileBytes = options.maxFileBytes ?? DEFAULT_MAX_CODE_FILE_BYTES;
|
|
914
|
-
const rootPath =
|
|
1726
|
+
const rootPath = path6.resolve(cwd);
|
|
915
1727
|
const files = [];
|
|
916
1728
|
let skippedFiles = 0;
|
|
917
1729
|
for (const filePath of discoverGitFiles(cwd)) {
|
|
@@ -919,15 +1731,15 @@ function discoverCodeFiles(cwd, repo, options = {}) {
|
|
|
919
1731
|
skippedFiles += 1;
|
|
920
1732
|
continue;
|
|
921
1733
|
}
|
|
922
|
-
const absolutePath =
|
|
923
|
-
const relativeToRoot =
|
|
924
|
-
if (relativeToRoot.startsWith("..") ||
|
|
1734
|
+
const absolutePath = path6.resolve(cwd, filePath);
|
|
1735
|
+
const relativeToRoot = path6.relative(rootPath, absolutePath);
|
|
1736
|
+
if (relativeToRoot.startsWith("..") || path6.isAbsolute(relativeToRoot)) {
|
|
925
1737
|
skippedFiles += 1;
|
|
926
1738
|
continue;
|
|
927
1739
|
}
|
|
928
1740
|
let stat;
|
|
929
1741
|
try {
|
|
930
|
-
stat =
|
|
1742
|
+
stat = fs4.statSync(absolutePath);
|
|
931
1743
|
} catch {
|
|
932
1744
|
skippedFiles += 1;
|
|
933
1745
|
continue;
|
|
@@ -936,7 +1748,7 @@ function discoverCodeFiles(cwd, repo, options = {}) {
|
|
|
936
1748
|
skippedFiles += 1;
|
|
937
1749
|
continue;
|
|
938
1750
|
}
|
|
939
|
-
const buffer =
|
|
1751
|
+
const buffer = fs4.readFileSync(absolutePath);
|
|
940
1752
|
if (isProbablyBinary(buffer)) {
|
|
941
1753
|
skippedFiles += 1;
|
|
942
1754
|
continue;
|
|
@@ -1001,14 +1813,19 @@ function emptyCodeIndexSummary(cwd) {
|
|
|
1001
1813
|
return {
|
|
1002
1814
|
indexedFiles: 0,
|
|
1003
1815
|
codeChunksCreated: 0,
|
|
1816
|
+
testFilesIndexed: 0,
|
|
1817
|
+
testLinksCreated: 0,
|
|
1004
1818
|
skippedFiles: 0,
|
|
1005
1819
|
databasePath: defaultDatabasePath(cwd)
|
|
1006
1820
|
};
|
|
1007
1821
|
}
|
|
1008
1822
|
|
|
1823
|
+
// src/indexer/regression-extractor.ts
|
|
1824
|
+
import crypto4 from "crypto";
|
|
1825
|
+
|
|
1009
1826
|
// src/indexer/wisdom-extractor.ts
|
|
1010
1827
|
import crypto3 from "crypto";
|
|
1011
|
-
import
|
|
1828
|
+
import path7 from "path";
|
|
1012
1829
|
var CATEGORY_KEYWORDS = [
|
|
1013
1830
|
["security_note", /\b(security|secret|token|bearer|oauth|credential|xss|csrf|injection|sanitize|redact)\b/i],
|
|
1014
1831
|
["architecture_decision", /\b(architecture decision|architectural|we intentionally|design decision)\b/i],
|
|
@@ -1040,7 +1857,7 @@ function extractSymbols(text, filePaths) {
|
|
|
1040
1857
|
}
|
|
1041
1858
|
}
|
|
1042
1859
|
for (const filePath of filePaths) {
|
|
1043
|
-
const basename =
|
|
1860
|
+
const basename = path7.basename(filePath).replace(/\.[^.]+$/, "");
|
|
1044
1861
|
if (/^[A-Za-z_$][\w$]*$/.test(basename)) symbols.push(basename);
|
|
1045
1862
|
}
|
|
1046
1863
|
return uniqueStrings(symbols).slice(0, 30);
|
|
@@ -1189,6 +2006,76 @@ ${filePaths.join("\n")}`, filePaths);
|
|
|
1189
2006
|
return units;
|
|
1190
2007
|
}
|
|
1191
2008
|
|
|
2009
|
+
// src/indexer/regression-extractor.ts
|
|
2010
|
+
var REGRESSION_SIGNALS = [
|
|
2011
|
+
["regression", /\bregression\b/i],
|
|
2012
|
+
["revert", /\b(revert|reverted)\b/i],
|
|
2013
|
+
["rollback", /\brollback\b/i],
|
|
2014
|
+
["hotfix", /\bhotfix\b/i],
|
|
2015
|
+
["incident", /\bincident\b/i],
|
|
2016
|
+
["root cause", /\broot cause\b/i],
|
|
2017
|
+
["this broke", /\b(this broke|broke)\b/i],
|
|
2018
|
+
["fixed by", /\bfixed by\b/i]
|
|
2019
|
+
];
|
|
2020
|
+
function labels(pr) {
|
|
2021
|
+
return (pr.labels ?? []).map((label) => typeof label === "string" ? label : label.name).filter((label) => Boolean(label));
|
|
2022
|
+
}
|
|
2023
|
+
function sourceTexts(pr) {
|
|
2024
|
+
return [
|
|
2025
|
+
pr.title,
|
|
2026
|
+
pr.body ?? "",
|
|
2027
|
+
...labels(pr),
|
|
2028
|
+
...(pr.reviews ?? []).map((item) => item.body ?? ""),
|
|
2029
|
+
...(pr.reviewComments ?? []).map((item) => item.body ?? ""),
|
|
2030
|
+
...(pr.issueComments ?? []).map((item) => item.body ?? ""),
|
|
2031
|
+
...(pr.commits ?? []).map((item) => item.commit?.message ?? "")
|
|
2032
|
+
].filter((text) => text.trim());
|
|
2033
|
+
}
|
|
2034
|
+
function stableRegressionId(pr, summary, signals) {
|
|
2035
|
+
const hash = crypto4.createHash("sha256").update([pr.repo, pr.number, canonicalizeText(summary), signals.join("|")].join("\0")).digest("hex").slice(0, 24);
|
|
2036
|
+
return `re_${hash}`;
|
|
2037
|
+
}
|
|
2038
|
+
function extractRegressionEvents(pr) {
|
|
2039
|
+
const allText = sourceTexts(pr).join("\n");
|
|
2040
|
+
const signals = REGRESSION_SIGNALS.filter(([, pattern]) => pattern.test(allText)).map(
|
|
2041
|
+
([signal]) => signal
|
|
2042
|
+
);
|
|
2043
|
+
if (signals.length === 0) return [];
|
|
2044
|
+
const files = uniqueStrings(pr.files.map((file) => file.filename));
|
|
2045
|
+
const testPaths = files.filter(isTestFilePath);
|
|
2046
|
+
const sanitizedSummary = sanitizeHistoricalText(
|
|
2047
|
+
clipSentence(`${pr.title}. ${pr.body ?? ""}`, 420)
|
|
2048
|
+
);
|
|
2049
|
+
if (!sanitizedSummary) return [];
|
|
2050
|
+
const reviewerCount = (pr.reviews ?? []).length + (pr.reviewComments ?? []).length;
|
|
2051
|
+
const confidence = Math.min(
|
|
2052
|
+
1,
|
|
2053
|
+
Number((0.58 + signals.length * 0.06 + (reviewerCount > 0 ? 0.08 : 0)).toFixed(2))
|
|
2054
|
+
);
|
|
2055
|
+
const authors = uniqueStrings([
|
|
2056
|
+
pr.user?.login ?? "unknown",
|
|
2057
|
+
...(pr.reviewComments ?? []).map((comment) => comment.user?.login ?? "unknown")
|
|
2058
|
+
]);
|
|
2059
|
+
const event = {
|
|
2060
|
+
id: stableRegressionId(pr, sanitizedSummary, signals),
|
|
2061
|
+
repo: pr.repo,
|
|
2062
|
+
prNumber: pr.number,
|
|
2063
|
+
prUrl: pr.html_url,
|
|
2064
|
+
summary: sanitizedSummary,
|
|
2065
|
+
filePaths: files,
|
|
2066
|
+
symbols: extractSymbols(`${sanitizedSummary}
|
|
2067
|
+
${files.join("\n")}`, files),
|
|
2068
|
+
testPaths,
|
|
2069
|
+
authors,
|
|
2070
|
+
labels: labels(pr),
|
|
2071
|
+
signals: uniqueStrings(signals),
|
|
2072
|
+
createdAt: pr.created_at,
|
|
2073
|
+
mergedAt: pr.merged_at ?? void 0,
|
|
2074
|
+
confidence
|
|
2075
|
+
};
|
|
2076
|
+
return [event];
|
|
2077
|
+
}
|
|
2078
|
+
|
|
1192
2079
|
// src/indexer/normalize-pr.ts
|
|
1193
2080
|
function normalizePullRequest(input) {
|
|
1194
2081
|
return {
|
|
@@ -1211,6 +2098,7 @@ function indexPullRequests(db, pullRequests, options) {
|
|
|
1211
2098
|
let indexedFiles = 0;
|
|
1212
2099
|
let indexedComments = 0;
|
|
1213
2100
|
let wisdomUnitsCreated = 0;
|
|
2101
|
+
let regressionEventsCreated = 0;
|
|
1214
2102
|
let skippedItems = 0;
|
|
1215
2103
|
let lastPr;
|
|
1216
2104
|
for (const [index, rawPr] of pullRequests.entries()) {
|
|
@@ -1227,10 +2115,12 @@ function indexPullRequests(db, pullRequests, options) {
|
|
|
1227
2115
|
continue;
|
|
1228
2116
|
}
|
|
1229
2117
|
const wisdomUnits = extractWisdomUnits(pr);
|
|
1230
|
-
const
|
|
2118
|
+
const regressionEvents = extractRegressionEvents(pr);
|
|
2119
|
+
const result = upsertPullRequest(db, pr, wisdomUnits, regressionEvents);
|
|
1231
2120
|
indexedFiles += result.files;
|
|
1232
2121
|
indexedComments += result.comments;
|
|
1233
2122
|
wisdomUnitsCreated += result.wisdom;
|
|
2123
|
+
regressionEventsCreated += result.regressions;
|
|
1234
2124
|
lastPr = pr.number;
|
|
1235
2125
|
options.onProgress?.({
|
|
1236
2126
|
stage: "indexed_pull_request",
|
|
@@ -1242,13 +2132,18 @@ function indexPullRequests(db, pullRequests, options) {
|
|
|
1242
2132
|
});
|
|
1243
2133
|
}
|
|
1244
2134
|
if (options.updateSyncStateAfter !== false) {
|
|
1245
|
-
updateSyncState(db, options.repo, lastPr
|
|
2135
|
+
updateSyncState(db, options.repo, lastPr, {
|
|
2136
|
+
historyCoverage: options.historyCoverage,
|
|
2137
|
+
historyLimit: options.historyLimit,
|
|
2138
|
+
historySince: options.historySince
|
|
2139
|
+
});
|
|
1246
2140
|
}
|
|
1247
2141
|
return {
|
|
1248
2142
|
indexedPrs: pullRequests.length - skippedItems,
|
|
1249
2143
|
indexedFiles,
|
|
1250
2144
|
indexedComments,
|
|
1251
2145
|
wisdomUnitsCreated,
|
|
2146
|
+
regressionEventsCreated,
|
|
1252
2147
|
skippedItems,
|
|
1253
2148
|
databasePath: defaultDatabasePath(options.cwd)
|
|
1254
2149
|
};
|
|
@@ -1260,7 +2155,7 @@ function shouldSyncSince(db, repo, fallbackSince) {
|
|
|
1260
2155
|
}
|
|
1261
2156
|
|
|
1262
2157
|
// src/retrieval/query-builder.ts
|
|
1263
|
-
import
|
|
2158
|
+
import path8 from "path";
|
|
1264
2159
|
var CATEGORY_HINTS = [
|
|
1265
2160
|
"security",
|
|
1266
2161
|
"regression",
|
|
@@ -1276,7 +2171,29 @@ function ftsToken(token) {
|
|
|
1276
2171
|
if (clean.length < 3) return void 0;
|
|
1277
2172
|
return `${clean}*`;
|
|
1278
2173
|
}
|
|
1279
|
-
function
|
|
2174
|
+
function testFilenameHints(filePath) {
|
|
2175
|
+
const parsed = path8.parse(filePath);
|
|
2176
|
+
const base = parsed.name.replace(/\.(test|spec)$/i, "");
|
|
2177
|
+
return [`${base}.test${parsed.ext}`, `${base}.spec${parsed.ext}`];
|
|
2178
|
+
}
|
|
2179
|
+
function diffHunkTerms(diff) {
|
|
2180
|
+
if (!diff) return [];
|
|
2181
|
+
const terms = [];
|
|
2182
|
+
const truncated = truncateText(diff, 5e3) ?? "";
|
|
2183
|
+
for (const line of truncated.split("\n")) {
|
|
2184
|
+
if (line.startsWith("diff --git")) {
|
|
2185
|
+
terms.push(...line.split(/[\\/]/).slice(-4));
|
|
2186
|
+
}
|
|
2187
|
+
if (line.startsWith("@@")) {
|
|
2188
|
+
terms.push(line.replace(/^@@[^@]*@@/, ""));
|
|
2189
|
+
}
|
|
2190
|
+
if (/^[+-]\s*(?:export\s+)?(?:class|function|const|let|var|type|interface)\s+/.test(line)) {
|
|
2191
|
+
terms.push(line);
|
|
2192
|
+
}
|
|
2193
|
+
}
|
|
2194
|
+
return terms;
|
|
2195
|
+
}
|
|
2196
|
+
function buildQueryTerms(input) {
|
|
1280
2197
|
const files = input.files ?? [];
|
|
1281
2198
|
const symbols = "symbols" in input ? input.symbols ?? [] : [];
|
|
1282
2199
|
const categories = "categories" in input ? input.categories ?? [] : [];
|
|
@@ -1285,18 +2202,24 @@ function buildFtsQuery(input) {
|
|
|
1285
2202
|
const baseText = "task" in input ? input.task : input.query;
|
|
1286
2203
|
const fileTerms = files.flatMap((file) => [
|
|
1287
2204
|
file,
|
|
1288
|
-
|
|
1289
|
-
...
|
|
2205
|
+
path8.basename(file),
|
|
2206
|
+
...testFilenameHints(file),
|
|
2207
|
+
...path8.dirname(file).split(/[\\/]/).filter(Boolean)
|
|
1290
2208
|
]);
|
|
1291
|
-
|
|
2209
|
+
return uniqueStrings([
|
|
1292
2210
|
...tokenizeSearchText(baseText, 24),
|
|
1293
2211
|
...tokenizeSearchText(fileTerms.join(" "), 24),
|
|
1294
2212
|
...tokenizeSearchText(symbols.join(" "), 24),
|
|
1295
2213
|
...tokenizeSearchText(categories.join(" "), 12),
|
|
1296
2214
|
...tokenizeSearchText(diff ?? "", 18),
|
|
1297
2215
|
...tokenizeSearchText(currentCode ?? "", 18),
|
|
2216
|
+
...tokenizeSearchText(diffHunkTerms(diff).join(" "), 18),
|
|
2217
|
+
...CATEGORY_HINTS,
|
|
1298
2218
|
...CATEGORY_HINTS.filter((hint) => baseText.toLowerCase().includes(hint))
|
|
1299
|
-
]).
|
|
2219
|
+
]).slice(0, 80);
|
|
2220
|
+
}
|
|
2221
|
+
function buildFtsQuery(input) {
|
|
2222
|
+
const tokens = buildQueryTerms(input).map(ftsToken).filter((token) => Boolean(token)).slice(0, 48);
|
|
1300
2223
|
return tokens.join(" OR ");
|
|
1301
2224
|
}
|
|
1302
2225
|
function clampMaxResults(value, defaultValue) {
|
|
@@ -1305,8 +2228,8 @@ function clampMaxResults(value, defaultValue) {
|
|
|
1305
2228
|
}
|
|
1306
2229
|
|
|
1307
2230
|
// src/retrieval/ranker.ts
|
|
1308
|
-
import
|
|
1309
|
-
function
|
|
2231
|
+
import path9 from "path";
|
|
2232
|
+
function parseJsonArray2(value) {
|
|
1310
2233
|
try {
|
|
1311
2234
|
const parsed = JSON.parse(value);
|
|
1312
2235
|
return Array.isArray(parsed) ? parsed.filter((item) => typeof item === "string") : [];
|
|
@@ -1324,9 +2247,9 @@ function rowToWisdomUnit(row) {
|
|
|
1324
2247
|
category: row.category,
|
|
1325
2248
|
text: row.text,
|
|
1326
2249
|
sanitizedText: row.sanitized_text,
|
|
1327
|
-
filePaths:
|
|
1328
|
-
symbols:
|
|
1329
|
-
authors:
|
|
2250
|
+
filePaths: parseJsonArray2(row.file_paths_json),
|
|
2251
|
+
symbols: parseJsonArray2(row.symbols_json),
|
|
2252
|
+
authors: parseJsonArray2(row.authors_json),
|
|
1330
2253
|
createdAt: row.created_at,
|
|
1331
2254
|
mergedAt: row.merged_at ?? void 0,
|
|
1332
2255
|
confidence: row.confidence,
|
|
@@ -1352,17 +2275,18 @@ function filePathMatch(unitPaths, queryFiles) {
|
|
|
1352
2275
|
if (queryFiles.length === 0 || unitPaths.length === 0) return 0;
|
|
1353
2276
|
let best = 0;
|
|
1354
2277
|
for (const queryFile of queryFiles) {
|
|
1355
|
-
const queryBase =
|
|
1356
|
-
const queryDir =
|
|
2278
|
+
const queryBase = path9.basename(queryFile).toLowerCase();
|
|
2279
|
+
const queryDir = path9.dirname(queryFile).toLowerCase();
|
|
1357
2280
|
for (const unitPath of unitPaths) {
|
|
1358
|
-
const unitBase =
|
|
1359
|
-
const unitDir =
|
|
2281
|
+
const unitBase = path9.basename(unitPath).toLowerCase();
|
|
2282
|
+
const unitDir = path9.dirname(unitPath).toLowerCase();
|
|
1360
2283
|
const q = queryFile.toLowerCase();
|
|
1361
2284
|
const u = unitPath.toLowerCase();
|
|
1362
2285
|
if (q === u) best = Math.max(best, 1);
|
|
1363
2286
|
else if (queryBase === unitBase) best = Math.max(best, 0.68);
|
|
1364
2287
|
else if (queryDir === unitDir) best = Math.max(best, 0.62);
|
|
1365
|
-
else if (unitDir.startsWith(queryDir) || queryDir.startsWith(unitDir))
|
|
2288
|
+
else if (unitDir.startsWith(queryDir) || queryDir.startsWith(unitDir))
|
|
2289
|
+
best = Math.max(best, 0.38);
|
|
1366
2290
|
else if (queryBase && unitBase && queryBase.split(".")[0] === unitBase.split(".")[0]) {
|
|
1367
2291
|
best = Math.max(best, 0.48);
|
|
1368
2292
|
}
|
|
@@ -1370,7 +2294,7 @@ function filePathMatch(unitPaths, queryFiles) {
|
|
|
1370
2294
|
}
|
|
1371
2295
|
return best;
|
|
1372
2296
|
}
|
|
1373
|
-
function
|
|
2297
|
+
function symbolMatch2(unit, querySymbols) {
|
|
1374
2298
|
if (querySymbols.length === 0) return 0;
|
|
1375
2299
|
const unitSymbols = unit.symbols.map((symbol) => symbol.toLowerCase());
|
|
1376
2300
|
const text = unit.sanitizedText.toLowerCase();
|
|
@@ -1379,14 +2303,15 @@ function symbolMatch(unit, querySymbols) {
|
|
|
1379
2303
|
const lower = symbol.toLowerCase();
|
|
1380
2304
|
if (unitSymbols.includes(lower)) best = Math.max(best, 1);
|
|
1381
2305
|
else if (text.includes(`\`${lower}\``)) best = Math.max(best, 1);
|
|
1382
|
-
else if (new RegExp(`\\b${
|
|
2306
|
+
else if (new RegExp(`\\b${escapeRegExp2(lower)}\\b`, "i").test(text))
|
|
2307
|
+
best = Math.max(best, 0.66);
|
|
1383
2308
|
else if (unitSymbols.some((candidate) => candidate.includes(lower) || lower.includes(candidate))) {
|
|
1384
2309
|
best = Math.max(best, 0.35);
|
|
1385
2310
|
}
|
|
1386
2311
|
}
|
|
1387
2312
|
return best;
|
|
1388
2313
|
}
|
|
1389
|
-
function
|
|
2314
|
+
function textMatch2(unit, inputText) {
|
|
1390
2315
|
const queryTokens = tokenizeSearchText(inputText, 32);
|
|
1391
2316
|
if (queryTokens.length === 0) return unit.bm25 === void 0 ? 0 : 0.45;
|
|
1392
2317
|
const haystack = `${unit.sanitizedText} ${unit.filePaths.join(" ")} ${unit.symbols.join(" ")}`.toLowerCase();
|
|
@@ -1410,28 +2335,56 @@ function recencyScore(unit) {
|
|
|
1410
2335
|
if (ageDays < 1460) return 0.45;
|
|
1411
2336
|
return 0.25;
|
|
1412
2337
|
}
|
|
1413
|
-
function
|
|
2338
|
+
function freshnessMultiplier(status) {
|
|
2339
|
+
if (status === "current") return 1;
|
|
2340
|
+
if (status === "possibly_stale") return 0.85;
|
|
2341
|
+
return 0.55;
|
|
2342
|
+
}
|
|
2343
|
+
function matchReasons2(parts, unit) {
|
|
2344
|
+
const reasons = [];
|
|
2345
|
+
if (parts.filePathMatch >= 0.9) reasons.push("exact file path match");
|
|
2346
|
+
else if (parts.filePathMatch >= 0.45) reasons.push("related file path match");
|
|
2347
|
+
if (parts.symbolMatch >= 0.9) reasons.push("exact symbol match");
|
|
2348
|
+
else if (parts.symbolMatch >= 0.45) reasons.push("symbol mentioned in evidence");
|
|
2349
|
+
if (parts.textMatch >= 0.45) reasons.push("text matched task or diff terms");
|
|
2350
|
+
if (parts.reviewerOrAuthorSignal >= 0.85) reasons.push("reviewer evidence");
|
|
2351
|
+
if (unit.category === "security_note" || unit.category === "bug_regression") {
|
|
2352
|
+
reasons.push(`${unit.category.replace(/_/g, " ")} priority`);
|
|
2353
|
+
}
|
|
2354
|
+
return reasons.slice(0, 5);
|
|
2355
|
+
}
|
|
2356
|
+
function scoreUnit(unit, input, duplicateCount, repeatedEvidenceCount, freshness) {
|
|
1414
2357
|
const queryFiles = input.files ?? [];
|
|
1415
2358
|
const querySymbols = "symbols" in input ? input.symbols ?? [] : [];
|
|
1416
2359
|
const inputText = "task" in input ? `${input.task} ${input.diff ?? ""} ${input.currentCode ?? ""}` : input.query;
|
|
1417
|
-
const repetition = Math.min(1, duplicateCount / 3);
|
|
2360
|
+
const repetition = Math.min(1, Math.max(duplicateCount, repeatedEvidenceCount) / 3);
|
|
2361
|
+
const claimKey = claimKeyFor(unit.category, unit.sanitizedText);
|
|
1418
2362
|
const parts = {
|
|
1419
2363
|
filePathMatch: filePathMatch(unit.filePaths, queryFiles),
|
|
1420
|
-
symbolMatch:
|
|
1421
|
-
textMatch:
|
|
2364
|
+
symbolMatch: symbolMatch2(unit, querySymbols),
|
|
2365
|
+
textMatch: textMatch2(unit, inputText),
|
|
1422
2366
|
reviewerOrAuthorSignal: reviewerOrAuthorSignal(unit),
|
|
1423
2367
|
recencyOrRepetition: Math.max(recencyScore(unit), repetition),
|
|
1424
2368
|
categoryPriority: categoryPriority(unit.category)
|
|
1425
2369
|
};
|
|
1426
|
-
const score = 0.35 * parts.filePathMatch + 0.2 * parts.symbolMatch + 0.2 * parts.textMatch + 0.1 * parts.reviewerOrAuthorSignal + 0.1 * parts.recencyOrRepetition + 0.05 * parts.categoryPriority;
|
|
2370
|
+
const score = (0.35 * parts.filePathMatch + 0.2 * parts.symbolMatch + 0.2 * parts.textMatch + 0.1 * parts.reviewerOrAuthorSignal + 0.1 * parts.recencyOrRepetition + 0.05 * parts.categoryPriority) * freshnessMultiplier(freshness.status);
|
|
1427
2371
|
return {
|
|
1428
2372
|
...unit,
|
|
1429
2373
|
score: Number(score.toFixed(4)),
|
|
1430
2374
|
scoreParts: parts,
|
|
1431
|
-
duplicateCount
|
|
2375
|
+
duplicateCount,
|
|
2376
|
+
claimKey,
|
|
2377
|
+
repeatedEvidenceCount,
|
|
2378
|
+
confidenceLevel: confidenceLevelFor(unit.confidence),
|
|
2379
|
+
confidenceReasons: confidenceReasonsFor(unit, repeatedEvidenceCount),
|
|
2380
|
+
freshnessStatus: freshness.status,
|
|
2381
|
+
freshnessReason: freshness.reason,
|
|
2382
|
+
evidence: evidenceForWisdom(unit),
|
|
2383
|
+
matchReasons: matchReasons2(parts, unit),
|
|
2384
|
+
rankSignals: parts
|
|
1432
2385
|
};
|
|
1433
2386
|
}
|
|
1434
|
-
function
|
|
2387
|
+
function escapeRegExp2(value) {
|
|
1435
2388
|
return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
1436
2389
|
}
|
|
1437
2390
|
function loadCandidates(db, input) {
|
|
@@ -1458,20 +2411,48 @@ function loadCandidates(db, input) {
|
|
|
1458
2411
|
).all(...categories);
|
|
1459
2412
|
return rows.map(rowToWisdomUnit);
|
|
1460
2413
|
}
|
|
2414
|
+
function loadClaimRepetitionCounts(db) {
|
|
2415
|
+
const rows = db.prepare("SELECT category, sanitized_text, pr_number FROM wisdom_units").all();
|
|
2416
|
+
const grouped = /* @__PURE__ */ new Map();
|
|
2417
|
+
for (const row of rows) {
|
|
2418
|
+
const key = claimKeyFor(row.category, row.sanitized_text);
|
|
2419
|
+
const prs = grouped.get(key) ?? /* @__PURE__ */ new Set();
|
|
2420
|
+
prs.add(row.pr_number);
|
|
2421
|
+
grouped.set(key, prs);
|
|
2422
|
+
}
|
|
2423
|
+
return new Map([...grouped.entries()].map(([key, prs]) => [key, prs.size]));
|
|
2424
|
+
}
|
|
2425
|
+
function minConfidence(input) {
|
|
2426
|
+
if ("minConfidence" in input && input.minConfidence) return input.minConfidence;
|
|
2427
|
+
return "strong";
|
|
2428
|
+
}
|
|
2429
|
+
function passesStrictMode2(unit, input) {
|
|
2430
|
+
if (!("strict" in input) || !input.strict) return true;
|
|
2431
|
+
if (unit.freshnessStatus === "stale") return false;
|
|
2432
|
+
return confidenceAtLeast(unit.confidenceLevel, minConfidence(input));
|
|
2433
|
+
}
|
|
1461
2434
|
function rankWisdomUnits(db, input) {
|
|
1462
2435
|
const candidates = loadCandidates(db, input);
|
|
2436
|
+
const codeSnapshot = loadCurrentCodeSnapshot(db);
|
|
2437
|
+
const repetitionCounts = loadClaimRepetitionCounts(db);
|
|
1463
2438
|
const duplicates = /* @__PURE__ */ new Map();
|
|
1464
2439
|
for (const unit of candidates) {
|
|
1465
|
-
const key =
|
|
2440
|
+
const key = claimKeyFor(unit.category, unit.sanitizedText);
|
|
1466
2441
|
duplicates.set(key, (duplicates.get(key) ?? 0) + 1);
|
|
1467
2442
|
}
|
|
1468
2443
|
const ranked = candidates.map((unit) => {
|
|
1469
|
-
const key =
|
|
1470
|
-
return scoreUnit(
|
|
1471
|
-
|
|
2444
|
+
const key = claimKeyFor(unit.category, unit.sanitizedText);
|
|
2445
|
+
return scoreUnit(
|
|
2446
|
+
unit,
|
|
2447
|
+
input,
|
|
2448
|
+
duplicates.get(key) ?? 1,
|
|
2449
|
+
repetitionCounts.get(key) ?? 1,
|
|
2450
|
+
evaluateFreshness(unit, codeSnapshot)
|
|
2451
|
+
);
|
|
2452
|
+
}).filter((unit) => passesStrictMode2(unit, input)).sort((a, b) => b.score - a.score || b.confidence - a.confidence);
|
|
1472
2453
|
const grouped = /* @__PURE__ */ new Map();
|
|
1473
2454
|
for (const unit of ranked) {
|
|
1474
|
-
const key =
|
|
2455
|
+
const key = unit.claimKey;
|
|
1475
2456
|
const existing = grouped.get(key);
|
|
1476
2457
|
if (!existing || unit.score > existing.score) {
|
|
1477
2458
|
grouped.set(key, {
|
|
@@ -1479,7 +2460,11 @@ function rankWisdomUnits(db, input) {
|
|
|
1479
2460
|
filePaths: uniqueStrings([...existing?.filePaths ?? [], ...unit.filePaths]),
|
|
1480
2461
|
symbols: uniqueStrings([...existing?.symbols ?? [], ...unit.symbols]),
|
|
1481
2462
|
authors: uniqueStrings([...existing?.authors ?? [], ...unit.authors]),
|
|
1482
|
-
duplicateCount: Math.max(unit.duplicateCount, existing?.duplicateCount ?? 1)
|
|
2463
|
+
duplicateCount: Math.max(unit.duplicateCount, existing?.duplicateCount ?? 1),
|
|
2464
|
+
repeatedEvidenceCount: Math.max(
|
|
2465
|
+
unit.repeatedEvidenceCount,
|
|
2466
|
+
existing?.repeatedEvidenceCount ?? 1
|
|
2467
|
+
)
|
|
1483
2468
|
});
|
|
1484
2469
|
}
|
|
1485
2470
|
}
|
|
@@ -1488,8 +2473,8 @@ function rankWisdomUnits(db, input) {
|
|
|
1488
2473
|
}
|
|
1489
2474
|
|
|
1490
2475
|
// src/retrieval/code-ranker.ts
|
|
1491
|
-
import
|
|
1492
|
-
function
|
|
2476
|
+
import path10 from "path";
|
|
2477
|
+
function parseJsonArray3(value) {
|
|
1493
2478
|
try {
|
|
1494
2479
|
const parsed = JSON.parse(value);
|
|
1495
2480
|
return Array.isArray(parsed) ? parsed.filter((item) => typeof item === "string") : [];
|
|
@@ -1506,7 +2491,7 @@ function rowToCodeChunk(row) {
|
|
|
1506
2491
|
startLine: row.start_line,
|
|
1507
2492
|
endLine: row.end_line,
|
|
1508
2493
|
sanitizedText: row.sanitized_text,
|
|
1509
|
-
symbols:
|
|
2494
|
+
symbols: parseJsonArray3(row.symbols_json),
|
|
1510
2495
|
contentHash: row.content_hash,
|
|
1511
2496
|
updatedAt: row.updated_at,
|
|
1512
2497
|
bm25: row.bm25 ?? void 0
|
|
@@ -1515,13 +2500,13 @@ function rowToCodeChunk(row) {
|
|
|
1515
2500
|
function filePathMatch2(filePath, queryFiles) {
|
|
1516
2501
|
if (queryFiles.length === 0) return 0;
|
|
1517
2502
|
let best = 0;
|
|
1518
|
-
const unitBase =
|
|
1519
|
-
const unitDir =
|
|
2503
|
+
const unitBase = path10.basename(filePath).toLowerCase();
|
|
2504
|
+
const unitDir = path10.dirname(filePath).toLowerCase();
|
|
1520
2505
|
const unit = filePath.toLowerCase();
|
|
1521
2506
|
for (const queryFile of queryFiles) {
|
|
1522
2507
|
const query = queryFile.toLowerCase();
|
|
1523
|
-
const queryBase =
|
|
1524
|
-
const queryDir =
|
|
2508
|
+
const queryBase = path10.basename(queryFile).toLowerCase();
|
|
2509
|
+
const queryDir = path10.dirname(queryFile).toLowerCase();
|
|
1525
2510
|
if (query === unit) best = Math.max(best, 1);
|
|
1526
2511
|
else if (queryBase === unitBase) best = Math.max(best, 0.72);
|
|
1527
2512
|
else if (queryDir === unitDir) best = Math.max(best, 0.62);
|
|
@@ -1533,7 +2518,7 @@ function filePathMatch2(filePath, queryFiles) {
|
|
|
1533
2518
|
}
|
|
1534
2519
|
return best;
|
|
1535
2520
|
}
|
|
1536
|
-
function
|
|
2521
|
+
function symbolMatch3(chunk, querySymbols) {
|
|
1537
2522
|
if (querySymbols.length === 0) return 0;
|
|
1538
2523
|
const chunkSymbols = chunk.symbols.map((symbol) => symbol.toLowerCase());
|
|
1539
2524
|
const text = chunk.sanitizedText.toLowerCase();
|
|
@@ -1541,14 +2526,14 @@ function symbolMatch2(chunk, querySymbols) {
|
|
|
1541
2526
|
for (const symbol of querySymbols) {
|
|
1542
2527
|
const lower = symbol.toLowerCase();
|
|
1543
2528
|
if (chunkSymbols.includes(lower)) best = Math.max(best, 1);
|
|
1544
|
-
else if (new RegExp(`\\b${
|
|
2529
|
+
else if (new RegExp(`\\b${escapeRegExp3(lower)}\\b`, "i").test(text)) best = Math.max(best, 0.7);
|
|
1545
2530
|
else if (chunkSymbols.some((candidate) => candidate.includes(lower) || lower.includes(candidate))) {
|
|
1546
2531
|
best = Math.max(best, 0.42);
|
|
1547
2532
|
}
|
|
1548
2533
|
}
|
|
1549
2534
|
return best;
|
|
1550
2535
|
}
|
|
1551
|
-
function
|
|
2536
|
+
function textMatch3(chunk, input) {
|
|
1552
2537
|
const tokens = tokenizeSearchText(
|
|
1553
2538
|
`${input.task} ${input.diff ?? ""} ${input.currentCode ?? ""}`,
|
|
1554
2539
|
40
|
|
@@ -1567,7 +2552,17 @@ function recencyScore2(chunk) {
|
|
|
1567
2552
|
if (ageDays < 730) return 0.45;
|
|
1568
2553
|
return 0.25;
|
|
1569
2554
|
}
|
|
1570
|
-
function
|
|
2555
|
+
function matchReasons3(parts) {
|
|
2556
|
+
const reasons = [];
|
|
2557
|
+
if (parts.filePathMatch >= 0.9) reasons.push("exact file path match");
|
|
2558
|
+
else if (parts.filePathMatch >= 0.45) reasons.push("related file path match");
|
|
2559
|
+
if (parts.symbolMatch >= 0.9) reasons.push("exact symbol match");
|
|
2560
|
+
else if (parts.symbolMatch >= 0.45) reasons.push("symbol mentioned in current code");
|
|
2561
|
+
if (parts.textMatch >= 0.45) reasons.push("text matched task or diff terms");
|
|
2562
|
+
if (parts.recency >= 0.75) reasons.push("recent code file");
|
|
2563
|
+
return reasons.slice(0, 5);
|
|
2564
|
+
}
|
|
2565
|
+
function escapeRegExp3(value) {
|
|
1571
2566
|
return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
1572
2567
|
}
|
|
1573
2568
|
function escapeLike(value) {
|
|
@@ -1591,7 +2586,7 @@ function loadCodeCandidates(db, input) {
|
|
|
1591
2586
|
}
|
|
1592
2587
|
}
|
|
1593
2588
|
for (const file of input.files ?? []) {
|
|
1594
|
-
const basename =
|
|
2589
|
+
const basename = path10.basename(file);
|
|
1595
2590
|
const rows = db.prepare(
|
|
1596
2591
|
`SELECT cc.*, NULL AS bm25
|
|
1597
2592
|
FROM code_chunks cc
|
|
@@ -1624,8 +2619,8 @@ function rankCodeChunks(db, input) {
|
|
|
1624
2619
|
const ranked = loadCodeCandidates(db, input).map((chunk) => {
|
|
1625
2620
|
const parts = {
|
|
1626
2621
|
filePathMatch: filePathMatch2(chunk.filePath, queryFiles),
|
|
1627
|
-
symbolMatch:
|
|
1628
|
-
textMatch:
|
|
2622
|
+
symbolMatch: symbolMatch3(chunk, querySymbols),
|
|
2623
|
+
textMatch: textMatch3(chunk, input),
|
|
1629
2624
|
recency: recencyScore2(chunk)
|
|
1630
2625
|
};
|
|
1631
2626
|
const score = 0.4 * parts.filePathMatch + 0.25 * parts.symbolMatch + 0.25 * parts.textMatch + 0.1 * parts.recency;
|
|
@@ -1633,21 +2628,221 @@ function rankCodeChunks(db, input) {
|
|
|
1633
2628
|
...chunk,
|
|
1634
2629
|
symbols: uniqueStrings(chunk.symbols),
|
|
1635
2630
|
score: Number(score.toFixed(4)),
|
|
1636
|
-
scoreParts: parts
|
|
2631
|
+
scoreParts: parts,
|
|
2632
|
+
matchReasons: matchReasons3(parts),
|
|
2633
|
+
rankSignals: parts
|
|
1637
2634
|
};
|
|
1638
2635
|
}).sort((a, b) => b.score - a.score || b.startLine - a.startLine);
|
|
1639
2636
|
const limit = Math.min(5, clampMaxResults(input.maxResults, 5));
|
|
1640
2637
|
return ranked.slice(0, limit);
|
|
1641
2638
|
}
|
|
1642
2639
|
|
|
2640
|
+
// src/retrieval/test-ranker.ts
|
|
2641
|
+
import path11 from "path";
|
|
2642
|
+
function parseJsonArray4(value) {
|
|
2643
|
+
if (!value) return [];
|
|
2644
|
+
try {
|
|
2645
|
+
const parsed = JSON.parse(value);
|
|
2646
|
+
return Array.isArray(parsed) ? parsed.filter((item) => typeof item === "string") : [];
|
|
2647
|
+
} catch {
|
|
2648
|
+
return [];
|
|
2649
|
+
}
|
|
2650
|
+
}
|
|
2651
|
+
function baseStem(filePath) {
|
|
2652
|
+
return path11.posix.basename(filePath).replace(/\.(test|spec)\.[^.]+$/i, "").replace(/\.[^.]+$/i, "").toLowerCase();
|
|
2653
|
+
}
|
|
2654
|
+
function rowToRanked(row, input) {
|
|
2655
|
+
const symbols = parseJsonArray4(row.symbols_json);
|
|
2656
|
+
const text = row.sanitized_text ?? "";
|
|
2657
|
+
const matchedSymbols = (input.symbols ?? []).filter((symbol) => {
|
|
2658
|
+
const lower = symbol.toLowerCase();
|
|
2659
|
+
return symbols.some((candidate) => candidate.toLowerCase() === lower) || new RegExp(`\\b${escapeRegExp4(symbol)}\\b`, "i").test(text);
|
|
2660
|
+
});
|
|
2661
|
+
const exactFile = (input.files ?? []).some((file) => row.source_path === file);
|
|
2662
|
+
const basenameMatch = (input.files ?? []).some((file) => baseStem(file) === baseStem(row.path));
|
|
2663
|
+
const symbolScore = matchedSymbols.length > 0 ? 0.25 : 0;
|
|
2664
|
+
const score = (exactFile ? 0.55 : 0) + (basenameMatch ? 0.25 : 0) + (row.strength ?? 0.35) * 0.3 + symbolScore;
|
|
2665
|
+
return {
|
|
2666
|
+
repo: "",
|
|
2667
|
+
path: row.path,
|
|
2668
|
+
language: row.language ?? void 0,
|
|
2669
|
+
sizeBytes: row.size_bytes,
|
|
2670
|
+
contentHash: row.content_hash,
|
|
2671
|
+
updatedAt: row.updated_at,
|
|
2672
|
+
sourcePath: row.source_path ?? void 0,
|
|
2673
|
+
reason: row.reason ?? (basenameMatch ? "same basename" : "test file match"),
|
|
2674
|
+
strength: row.strength ?? 0.35,
|
|
2675
|
+
score: Number(score.toFixed(4)),
|
|
2676
|
+
matchedSymbols
|
|
2677
|
+
};
|
|
2678
|
+
}
|
|
2679
|
+
function escapeRegExp4(value) {
|
|
2680
|
+
return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
2681
|
+
}
|
|
2682
|
+
function rankRelevantTests(db, input) {
|
|
2683
|
+
const candidates = /* @__PURE__ */ new Map();
|
|
2684
|
+
for (const file of input.files ?? []) {
|
|
2685
|
+
const linkedRows = db.prepare(
|
|
2686
|
+
`SELECT tf.path, tf.language, tf.size_bytes, tf.content_hash, tf.updated_at,
|
|
2687
|
+
tl.source_path, tl.reason, tl.strength, cc.symbols_json, cc.sanitized_text
|
|
2688
|
+
FROM test_links tl
|
|
2689
|
+
JOIN test_files tf ON tf.repo_id = tl.repo_id AND tf.path = tl.test_path
|
|
2690
|
+
LEFT JOIN code_chunks cc ON cc.repo_id = tl.repo_id AND cc.file_path = tf.path
|
|
2691
|
+
WHERE tl.source_path = ?
|
|
2692
|
+
ORDER BY tl.strength DESC
|
|
2693
|
+
LIMIT 40`
|
|
2694
|
+
).all(file);
|
|
2695
|
+
for (const row of linkedRows) candidates.set(row.path, row);
|
|
2696
|
+
const basename = baseStem(file);
|
|
2697
|
+
const basenameRows = db.prepare(
|
|
2698
|
+
`SELECT tf.path, tf.language, tf.size_bytes, tf.content_hash, tf.updated_at,
|
|
2699
|
+
NULL AS source_path, 'same basename' AS reason, 0.7 AS strength,
|
|
2700
|
+
cc.symbols_json, cc.sanitized_text
|
|
2701
|
+
FROM test_files tf
|
|
2702
|
+
LEFT JOIN code_chunks cc ON cc.file_path = tf.path
|
|
2703
|
+
WHERE lower(tf.path) LIKE ?
|
|
2704
|
+
LIMIT 25`
|
|
2705
|
+
).all(`%${basename}%`);
|
|
2706
|
+
for (const row of basenameRows) candidates.set(row.path, row);
|
|
2707
|
+
}
|
|
2708
|
+
if (candidates.size === 0) {
|
|
2709
|
+
const rows = db.prepare(
|
|
2710
|
+
`SELECT tf.path, tf.language, tf.size_bytes, tf.content_hash, tf.updated_at,
|
|
2711
|
+
NULL AS source_path, 'recent test file' AS reason, 0.25 AS strength,
|
|
2712
|
+
cc.symbols_json, cc.sanitized_text
|
|
2713
|
+
FROM test_files tf
|
|
2714
|
+
LEFT JOIN code_chunks cc ON cc.file_path = tf.path
|
|
2715
|
+
ORDER BY tf.updated_at DESC
|
|
2716
|
+
LIMIT 20`
|
|
2717
|
+
).all();
|
|
2718
|
+
for (const row of rows) candidates.set(row.path, row);
|
|
2719
|
+
}
|
|
2720
|
+
return [...candidates.values()].map((row) => rowToRanked(row, input)).sort((a, b) => b.score - a.score || a.path.localeCompare(b.path)).slice(0, Math.min(5, clampMaxResults(input.maxResults, 5)));
|
|
2721
|
+
}
|
|
2722
|
+
|
|
2723
|
+
// src/retrieval/regression-ranker.ts
|
|
2724
|
+
import path12 from "path";
|
|
2725
|
+
function parseJsonArray5(value) {
|
|
2726
|
+
try {
|
|
2727
|
+
const parsed = JSON.parse(value);
|
|
2728
|
+
return Array.isArray(parsed) ? parsed.filter((item) => typeof item === "string") : [];
|
|
2729
|
+
} catch {
|
|
2730
|
+
return [];
|
|
2731
|
+
}
|
|
2732
|
+
}
|
|
2733
|
+
function rowToEvent(row) {
|
|
2734
|
+
return {
|
|
2735
|
+
id: row.id,
|
|
2736
|
+
repo: row.repo,
|
|
2737
|
+
prNumber: row.pr_number,
|
|
2738
|
+
prUrl: row.pr_url,
|
|
2739
|
+
summary: row.summary_sanitized,
|
|
2740
|
+
filePaths: parseJsonArray5(row.file_paths_json),
|
|
2741
|
+
symbols: parseJsonArray5(row.symbols_json),
|
|
2742
|
+
testPaths: parseJsonArray5(row.test_paths_json),
|
|
2743
|
+
authors: parseJsonArray5(row.authors_json),
|
|
2744
|
+
labels: parseJsonArray5(row.labels_json),
|
|
2745
|
+
signals: parseJsonArray5(row.signals_json),
|
|
2746
|
+
createdAt: row.created_at,
|
|
2747
|
+
mergedAt: row.merged_at ?? void 0,
|
|
2748
|
+
confidence: row.confidence
|
|
2749
|
+
};
|
|
2750
|
+
}
|
|
2751
|
+
function filePathMatch3(eventPaths, queryFiles) {
|
|
2752
|
+
let best = 0;
|
|
2753
|
+
for (const queryFile of queryFiles) {
|
|
2754
|
+
const queryBase = path12.posix.basename(queryFile).toLowerCase();
|
|
2755
|
+
const queryDir = path12.posix.dirname(queryFile).toLowerCase();
|
|
2756
|
+
for (const eventPath of eventPaths) {
|
|
2757
|
+
const eventBase = path12.posix.basename(eventPath).toLowerCase();
|
|
2758
|
+
const eventDir = path12.posix.dirname(eventPath).toLowerCase();
|
|
2759
|
+
if (queryFile.toLowerCase() === eventPath.toLowerCase()) best = Math.max(best, 1);
|
|
2760
|
+
else if (queryBase === eventBase) best = Math.max(best, 0.7);
|
|
2761
|
+
else if (queryDir === eventDir) best = Math.max(best, 0.55);
|
|
2762
|
+
}
|
|
2763
|
+
}
|
|
2764
|
+
return best;
|
|
2765
|
+
}
|
|
2766
|
+
function symbolMatch4(event, querySymbols) {
|
|
2767
|
+
const eventSymbols = event.symbols.map((symbol) => symbol.toLowerCase());
|
|
2768
|
+
let best = 0;
|
|
2769
|
+
for (const symbol of querySymbols) {
|
|
2770
|
+
const lower = symbol.toLowerCase();
|
|
2771
|
+
if (eventSymbols.includes(lower)) best = Math.max(best, 1);
|
|
2772
|
+
else if (event.summary.toLowerCase().includes(lower)) best = Math.max(best, 0.65);
|
|
2773
|
+
}
|
|
2774
|
+
return best;
|
|
2775
|
+
}
|
|
2776
|
+
function textMatch4(event, inputText) {
|
|
2777
|
+
const tokens = tokenizeSearchText(inputText, 32);
|
|
2778
|
+
if (tokens.length === 0) return 0;
|
|
2779
|
+
const haystack = `${event.summary} ${event.filePaths.join(" ")} ${event.symbols.join(" ")} ${event.signals.join(" ")}`.toLowerCase();
|
|
2780
|
+
return tokens.filter((token) => haystack.includes(token.toLowerCase())).length / tokens.length;
|
|
2781
|
+
}
|
|
2782
|
+
function recencyScore3(event) {
|
|
2783
|
+
const timestamp = Date.parse(event.mergedAt ?? event.createdAt);
|
|
2784
|
+
if (Number.isNaN(timestamp)) return 0.25;
|
|
2785
|
+
const ageDays = Math.max(0, (Date.now() - timestamp) / (1e3 * 60 * 60 * 24));
|
|
2786
|
+
if (ageDays < 180) return 1;
|
|
2787
|
+
if (ageDays < 730) return 0.7;
|
|
2788
|
+
return 0.35;
|
|
2789
|
+
}
|
|
2790
|
+
function matchReasons4(parts, event) {
|
|
2791
|
+
const reasons = [];
|
|
2792
|
+
if ((parts.filePathMatch ?? 0) >= 0.9) reasons.push("exact file path match");
|
|
2793
|
+
else if ((parts.filePathMatch ?? 0) >= 0.45) reasons.push("related file path match");
|
|
2794
|
+
if ((parts.symbolMatch ?? 0) >= 0.9) reasons.push("exact symbol match");
|
|
2795
|
+
if ((parts.textMatch ?? 0) >= 0.35) reasons.push("text matched task or diff terms");
|
|
2796
|
+
if (event.signals.length > 0)
|
|
2797
|
+
reasons.push(`regression signals: ${event.signals.slice(0, 3).join(", ")}`);
|
|
2798
|
+
return reasons.slice(0, 5);
|
|
2799
|
+
}
|
|
2800
|
+
function loadRegressionEvents(db) {
|
|
2801
|
+
const rows = db.prepare(
|
|
2802
|
+
"SELECT * FROM regression_events ORDER BY COALESCE(merged_at, created_at) DESC LIMIT 200"
|
|
2803
|
+
).all();
|
|
2804
|
+
return rows.map(rowToEvent);
|
|
2805
|
+
}
|
|
2806
|
+
function rankRegressionEvents(db, input) {
|
|
2807
|
+
const queryFiles = input.files ?? [];
|
|
2808
|
+
const querySymbols = "symbols" in input ? input.symbols ?? [] : [];
|
|
2809
|
+
const inputText = "task" in input ? `${input.task} ${input.diff ?? ""} ${input.currentCode ?? ""}` : input.query;
|
|
2810
|
+
const ranked = loadRegressionEvents(db).map((event) => {
|
|
2811
|
+
const parts = {
|
|
2812
|
+
filePathMatch: filePathMatch3(event.filePaths, queryFiles),
|
|
2813
|
+
symbolMatch: symbolMatch4(event, querySymbols),
|
|
2814
|
+
textMatch: textMatch4(event, inputText),
|
|
2815
|
+
recency: recencyScore3(event),
|
|
2816
|
+
confidence: event.confidence
|
|
2817
|
+
};
|
|
2818
|
+
const score = 0.35 * parts.filePathMatch + 0.2 * parts.symbolMatch + 0.2 * parts.textMatch + 0.15 * parts.recency + 0.1 * parts.confidence;
|
|
2819
|
+
return {
|
|
2820
|
+
...event,
|
|
2821
|
+
filePaths: uniqueStrings(event.filePaths),
|
|
2822
|
+
symbols: uniqueStrings(event.symbols),
|
|
2823
|
+
score: Number(score.toFixed(4)),
|
|
2824
|
+
matchReasons: matchReasons4(parts, event),
|
|
2825
|
+
rankSignals: parts
|
|
2826
|
+
};
|
|
2827
|
+
}).filter((event) => event.score > 0 || "regressionsOnly" in input && input.regressionsOnly).sort((a, b) => b.score - a.score || b.confidence - a.confidence);
|
|
2828
|
+
return ranked.slice(0, Math.min(5, clampMaxResults(input.maxResults, 5)));
|
|
2829
|
+
}
|
|
2830
|
+
|
|
1643
2831
|
// src/retrieval/formatter.ts
|
|
1644
2832
|
function evidenceLine(unit) {
|
|
1645
2833
|
const author = unit.authors[0] ? ` by @${unit.authors[0]}` : "";
|
|
1646
2834
|
const file = unit.filePaths[0] ? `, ${unit.filePaths[0]}` : "";
|
|
1647
2835
|
return `PR #${unit.prNumber}${author}, ${unit.sourceType}${file}`;
|
|
1648
2836
|
}
|
|
2837
|
+
function confidenceLine(unit) {
|
|
2838
|
+
const reasons = unit.confidenceReasons.length ? ` (${unit.confidenceReasons.join(", ")})` : "";
|
|
2839
|
+
return `${unit.confidenceLevel}${reasons}`;
|
|
2840
|
+
}
|
|
2841
|
+
function currentCodeCheckLine(unit) {
|
|
2842
|
+
return `${unit.freshnessStatus.replace(/_/g, " ")} - ${unit.freshnessReason}`;
|
|
2843
|
+
}
|
|
1649
2844
|
function whyItMatters(unit, input) {
|
|
1650
|
-
const prefix = unit.
|
|
2845
|
+
const prefix = unit.confidenceLevel === "weak" ? "Historical evidence suggests " : "";
|
|
1651
2846
|
const target = input.files?.[0] ? ` when editing ${input.files[0]}` : " for this change";
|
|
1652
2847
|
const categoryReasons = {
|
|
1653
2848
|
security_note: `${prefix}there is a security-sensitive constraint to preserve${target}.`,
|
|
@@ -1679,15 +2874,39 @@ function riskLines(units) {
|
|
|
1679
2874
|
}
|
|
1680
2875
|
return [...risks].slice(0, 4);
|
|
1681
2876
|
}
|
|
1682
|
-
function formatAnchorContext(units, input, codeChunks = []) {
|
|
1683
|
-
const lines = ["# Anchor Context", ""
|
|
2877
|
+
function formatAnchorContext(units, input, codeChunks = [], teamRules = [], warnings = [], relevantTests = [], regressionEvents = [], extraMetadata = {}) {
|
|
2878
|
+
const lines = ["# Anchor Context", ""];
|
|
2879
|
+
if (warnings.length > 0) {
|
|
2880
|
+
lines.push("## Warnings", "");
|
|
2881
|
+
for (const warning of warnings) lines.push(`- ${warning}`);
|
|
2882
|
+
lines.push("");
|
|
2883
|
+
}
|
|
2884
|
+
if (teamRules.length > 0) {
|
|
2885
|
+
lines.push("## Team-approved rules", "");
|
|
2886
|
+
teamRules.forEach((rule, index) => {
|
|
2887
|
+
const evidence = rule.evidence[0];
|
|
2888
|
+
const evidenceText = evidence ? `PR #${evidence.prNumber}, ${evidence.sourceType}${evidence.filePath ? `, ${evidence.filePath}` : ""}` : "No evidence";
|
|
2889
|
+
lines.push(`${index + 1}. [${rule.category}] ${clipSentence(rule.sanitizedText)}`);
|
|
2890
|
+
lines.push(` Evidence: ${evidenceText}`);
|
|
2891
|
+
lines.push(` Confidence: ${confidenceLine(rule)}`);
|
|
2892
|
+
lines.push(` Current code check: ${currentCodeCheckLine(rule)}`);
|
|
2893
|
+
if (evidence?.prUrl) lines.push(` Link: ${evidence.prUrl}`);
|
|
2894
|
+
lines.push("");
|
|
2895
|
+
});
|
|
2896
|
+
}
|
|
2897
|
+
lines.push("## Must know", "");
|
|
1684
2898
|
if (units.length === 0) {
|
|
1685
|
-
lines.push(
|
|
2899
|
+
lines.push(
|
|
2900
|
+
input.strict ? "No reliable historical evidence found." : "No directly relevant indexed PR history found.",
|
|
2901
|
+
""
|
|
2902
|
+
);
|
|
1686
2903
|
} else {
|
|
1687
2904
|
units.forEach((unit, index) => {
|
|
1688
|
-
const statement = unit.
|
|
2905
|
+
const statement = unit.confidenceLevel === "weak" ? `Historical evidence suggests ${clipSentence(unit.sanitizedText)}` : clipSentence(unit.sanitizedText);
|
|
1689
2906
|
lines.push(`${index + 1}. [${unit.category}] ${statement}`);
|
|
1690
2907
|
lines.push(` Evidence: ${evidenceLine(unit)}`);
|
|
2908
|
+
lines.push(` Confidence: ${confidenceLine(unit)}`);
|
|
2909
|
+
lines.push(` Current code check: ${currentCodeCheckLine(unit)}`);
|
|
1691
2910
|
lines.push(` Why it matters: ${whyItMatters(unit, input)}`);
|
|
1692
2911
|
lines.push(` Link: ${unit.prUrl}`);
|
|
1693
2912
|
lines.push("");
|
|
@@ -1705,6 +2924,33 @@ function formatAnchorContext(units, input, codeChunks = []) {
|
|
|
1705
2924
|
lines.push("");
|
|
1706
2925
|
});
|
|
1707
2926
|
}
|
|
2927
|
+
lines.push("## Relevant tests", "");
|
|
2928
|
+
if (relevantTests.length === 0) {
|
|
2929
|
+
lines.push("No directly related tests found in the local index.", "");
|
|
2930
|
+
} else {
|
|
2931
|
+
relevantTests.forEach((test, index) => {
|
|
2932
|
+
const symbolText = test.matchedSymbols.length ? `; symbols: ${test.matchedSymbols.slice(0, 6).join(", ")}` : "";
|
|
2933
|
+
lines.push(`${index + 1}. ${test.path}${symbolText}`);
|
|
2934
|
+
lines.push(` Why it matters: ${test.reason} (${test.strength.toFixed(2)} link strength).`);
|
|
2935
|
+
if (test.sourcePath) lines.push(` Source: ${test.sourcePath}`);
|
|
2936
|
+
lines.push("");
|
|
2937
|
+
});
|
|
2938
|
+
}
|
|
2939
|
+
lines.push("## Regression memory", "");
|
|
2940
|
+
if (regressionEvents.length === 0) {
|
|
2941
|
+
lines.push("No related regression events found in the local index.", "");
|
|
2942
|
+
} else {
|
|
2943
|
+
regressionEvents.forEach((event, index) => {
|
|
2944
|
+
lines.push(`${index + 1}. ${clipSentence(event.summary, 220)}`);
|
|
2945
|
+
lines.push(` Evidence: PR #${event.prNumber}, signals: ${event.signals.join(", ")}`);
|
|
2946
|
+
lines.push(` Files: ${event.filePaths.slice(0, 5).join(", ") || "n/a"}`);
|
|
2947
|
+
if (event.testPaths.length > 0) {
|
|
2948
|
+
lines.push(` Tests: ${event.testPaths.slice(0, 5).join(", ")}`);
|
|
2949
|
+
}
|
|
2950
|
+
lines.push(` Link: ${event.prUrl}`);
|
|
2951
|
+
lines.push("");
|
|
2952
|
+
});
|
|
2953
|
+
}
|
|
1708
2954
|
lines.push("## Risks", "");
|
|
1709
2955
|
const risks = riskLines(units);
|
|
1710
2956
|
if (risks.length === 0) {
|
|
@@ -1724,13 +2970,36 @@ function formatAnchorContext(units, input, codeChunks = []) {
|
|
|
1724
2970
|
id: unit.id,
|
|
1725
2971
|
score: unit.score,
|
|
1726
2972
|
confidence: unit.confidence,
|
|
2973
|
+
confidenceLevel: unit.confidenceLevel,
|
|
2974
|
+
confidenceReasons: unit.confidenceReasons,
|
|
2975
|
+
freshnessStatus: unit.freshnessStatus,
|
|
2976
|
+
freshnessReason: unit.freshnessReason,
|
|
2977
|
+
evidence: unit.evidence,
|
|
2978
|
+
claimKey: unit.claimKey,
|
|
2979
|
+
repeatedEvidenceCount: unit.repeatedEvidenceCount,
|
|
1727
2980
|
category: unit.category,
|
|
1728
2981
|
prNumber: unit.prNumber,
|
|
1729
2982
|
prUrl: unit.prUrl,
|
|
1730
2983
|
sourceType: unit.sourceType,
|
|
1731
2984
|
filePaths: unit.filePaths,
|
|
1732
2985
|
symbols: unit.symbols,
|
|
1733
|
-
duplicateCount: unit.duplicateCount
|
|
2986
|
+
duplicateCount: unit.duplicateCount,
|
|
2987
|
+
matchReasons: unit.matchReasons,
|
|
2988
|
+
rankSignals: unit.rankSignals
|
|
2989
|
+
})),
|
|
2990
|
+
teamRules: teamRules.map((rule) => ({
|
|
2991
|
+
id: rule.id,
|
|
2992
|
+
score: rule.score,
|
|
2993
|
+
confidenceLevel: rule.confidenceLevel,
|
|
2994
|
+
confidenceReasons: rule.confidenceReasons,
|
|
2995
|
+
freshnessStatus: rule.freshnessStatus,
|
|
2996
|
+
freshnessReason: rule.freshnessReason,
|
|
2997
|
+
category: rule.category,
|
|
2998
|
+
filePaths: rule.filePaths,
|
|
2999
|
+
symbols: rule.symbols,
|
|
3000
|
+
evidence: rule.evidence,
|
|
3001
|
+
matchReasons: rule.matchReasons,
|
|
3002
|
+
rankSignals: rule.rankSignals
|
|
1734
3003
|
})),
|
|
1735
3004
|
codeEvidence: codeChunks.map((chunk) => ({
|
|
1736
3005
|
id: chunk.id,
|
|
@@ -1739,8 +3008,32 @@ function formatAnchorContext(units, input, codeChunks = []) {
|
|
|
1739
3008
|
language: chunk.language,
|
|
1740
3009
|
startLine: chunk.startLine,
|
|
1741
3010
|
endLine: chunk.endLine,
|
|
1742
|
-
symbols: chunk.symbols
|
|
1743
|
-
|
|
3011
|
+
symbols: chunk.symbols,
|
|
3012
|
+
matchReasons: chunk.matchReasons,
|
|
3013
|
+
rankSignals: chunk.rankSignals
|
|
3014
|
+
})),
|
|
3015
|
+
relevantTests: relevantTests.map((test) => ({
|
|
3016
|
+
path: test.path,
|
|
3017
|
+
sourcePath: test.sourcePath,
|
|
3018
|
+
reason: test.reason,
|
|
3019
|
+
strength: test.strength,
|
|
3020
|
+
score: test.score,
|
|
3021
|
+
matchedSymbols: test.matchedSymbols
|
|
3022
|
+
})),
|
|
3023
|
+
regressionEvents: regressionEvents.map((event) => ({
|
|
3024
|
+
id: event.id,
|
|
3025
|
+
score: event.score,
|
|
3026
|
+
prNumber: event.prNumber,
|
|
3027
|
+
prUrl: event.prUrl,
|
|
3028
|
+
filePaths: event.filePaths,
|
|
3029
|
+
symbols: event.symbols,
|
|
3030
|
+
testPaths: event.testPaths,
|
|
3031
|
+
summary: clipSentence(event.summary, 260),
|
|
3032
|
+
matchReasons: event.matchReasons,
|
|
3033
|
+
rankSignals: event.rankSignals
|
|
3034
|
+
})),
|
|
3035
|
+
queryTerms: buildQueryTerms(input),
|
|
3036
|
+
...extraMetadata
|
|
1744
3037
|
}
|
|
1745
3038
|
};
|
|
1746
3039
|
}
|
|
@@ -1773,7 +3066,9 @@ function formatSearchHistory(units) {
|
|
|
1773
3066
|
sourceType: unit.sourceType,
|
|
1774
3067
|
sanitizedSnippet: clipSentence(unit.sanitizedText, 260),
|
|
1775
3068
|
matchedFiles: unit.filePaths,
|
|
1776
|
-
matchedSymbols: unit.symbols
|
|
3069
|
+
matchedSymbols: unit.symbols,
|
|
3070
|
+
matchReasons: unit.matchReasons,
|
|
3071
|
+
rankSignals: unit.rankSignals
|
|
1777
3072
|
}))
|
|
1778
3073
|
}
|
|
1779
3074
|
};
|
|
@@ -1790,14 +3085,202 @@ function formatIndexStatus(status) {
|
|
|
1790
3085
|
`- Wisdom units: ${status.wisdomUnitCount}`,
|
|
1791
3086
|
`- Code files: ${status.codeFileCount}`,
|
|
1792
3087
|
`- Code chunks: ${status.codeChunkCount}`,
|
|
3088
|
+
`- Test files: ${status.testFileCount}`,
|
|
3089
|
+
`- Test links: ${status.testLinkCount}`,
|
|
3090
|
+
`- Regression events: ${status.regressionEventCount}`,
|
|
3091
|
+
`- History coverage: ${status.historyCoverage ?? "unknown"}`,
|
|
3092
|
+
`- History limit: ${status.historyLimit ?? "n/a"}`,
|
|
3093
|
+
`- Stale evidence: ${status.staleEvidenceCount}`,
|
|
3094
|
+
`- Team rules: ${status.teamRuleCount}`,
|
|
1793
3095
|
`- Last sync: ${status.lastSyncTime ?? "never"}`,
|
|
1794
3096
|
`- Last code index: ${status.lastCodeIndexTime ?? "never"}`,
|
|
3097
|
+
`- Last rule index: ${status.lastRuleIndexTime ?? "never"}`,
|
|
3098
|
+
`- Last successful index run: ${status.lastSuccessfulRun ?? "never"}`,
|
|
3099
|
+
`- Last failed index run: ${status.lastFailedRun ?? "never"}`,
|
|
3100
|
+
`- Stale code index: ${status.staleCodeIndex ? "yes" : "no"}`,
|
|
3101
|
+
`- Suggested next command: ${status.suggestedNextCommand ?? "n/a"}`,
|
|
1795
3102
|
`- GitHub token configured: ${status.githubTokenConfigured ? "yes" : "no"}`,
|
|
1796
3103
|
`- Health: ${status.health}`
|
|
1797
3104
|
];
|
|
1798
3105
|
return { markdown: lines.join("\n"), metadata: status };
|
|
1799
3106
|
}
|
|
1800
3107
|
|
|
3108
|
+
// src/retrieval/semantic.ts
|
|
3109
|
+
function getSemanticStatus(env = process.env, provider) {
|
|
3110
|
+
if (env.ANCHOR_SEMANTIC !== "local") {
|
|
3111
|
+
return {
|
|
3112
|
+
enabled: false,
|
|
3113
|
+
mode: "disabled",
|
|
3114
|
+
available: false,
|
|
3115
|
+
reason: "Semantic search is disabled; SQLite FTS is active."
|
|
3116
|
+
};
|
|
3117
|
+
}
|
|
3118
|
+
if (!provider || !provider.isAvailable()) {
|
|
3119
|
+
return {
|
|
3120
|
+
enabled: true,
|
|
3121
|
+
mode: "local",
|
|
3122
|
+
available: false,
|
|
3123
|
+
reason: "Local semantic search requested, but no local embedding provider is available; falling back to SQLite FTS."
|
|
3124
|
+
};
|
|
3125
|
+
}
|
|
3126
|
+
return {
|
|
3127
|
+
enabled: true,
|
|
3128
|
+
mode: "local",
|
|
3129
|
+
available: true,
|
|
3130
|
+
reason: `Using local embedding provider: ${provider.name}.`
|
|
3131
|
+
};
|
|
3132
|
+
}
|
|
3133
|
+
|
|
3134
|
+
// src/retrieval/context.ts
|
|
3135
|
+
function buildAnchorContextResult(db, cwd, input, warnings = []) {
|
|
3136
|
+
const history = rankWisdomUnits(db, input);
|
|
3137
|
+
const code = rankCodeChunks(db, input);
|
|
3138
|
+
const rules = rankTeamRules(db, cwd, input);
|
|
3139
|
+
const tests = rankRelevantTests(db, input);
|
|
3140
|
+
const regressions = rankRegressionEvents(db, input);
|
|
3141
|
+
const indexStatus = getIndexStatus(cwd);
|
|
3142
|
+
const semanticStatus = getSemanticStatus();
|
|
3143
|
+
const strictWarnings = input.strict && indexStatus.historyCoverage !== "all" ? [
|
|
3144
|
+
`Strict mode is using ${indexStatus.historyCoverage ?? "unknown"} PR history coverage; run anchor index-all for broader evidence.`
|
|
3145
|
+
] : [];
|
|
3146
|
+
return formatAnchorContext(
|
|
3147
|
+
history,
|
|
3148
|
+
input,
|
|
3149
|
+
code,
|
|
3150
|
+
rules,
|
|
3151
|
+
[...warnings, ...strictWarnings],
|
|
3152
|
+
tests,
|
|
3153
|
+
regressions,
|
|
3154
|
+
{
|
|
3155
|
+
indexHealth: {
|
|
3156
|
+
historyCoverage: indexStatus.historyCoverage ?? "unknown",
|
|
3157
|
+
staleCodeIndex: Boolean(indexStatus.staleCodeIndex),
|
|
3158
|
+
lastSuccessfulRun: indexStatus.lastSuccessfulRun,
|
|
3159
|
+
lastFailedRun: indexStatus.lastFailedRun
|
|
3160
|
+
},
|
|
3161
|
+
semanticStatus
|
|
3162
|
+
}
|
|
3163
|
+
);
|
|
3164
|
+
}
|
|
3165
|
+
|
|
3166
|
+
// src/retrieval/explain-file.ts
|
|
3167
|
+
function explainFile(db, cwd, input) {
|
|
3168
|
+
const contextInput = {
|
|
3169
|
+
task: `Explain ${input.file}: ownership, constraints, regressions, tests, and important symbols.`,
|
|
3170
|
+
files: [input.file],
|
|
3171
|
+
symbols: input.symbols,
|
|
3172
|
+
strict: input.strict,
|
|
3173
|
+
maxResults: input.maxResults
|
|
3174
|
+
};
|
|
3175
|
+
const code = rankCodeChunks(db, contextInput);
|
|
3176
|
+
const importantSymbols = [...new Set(code.flatMap((chunk) => chunk.symbols))].slice(0, 10);
|
|
3177
|
+
const ownership = code[0]?.sanitizedText ? clipSentence(code[0].sanitizedText, 220) : "No indexed code chunk found for this file.";
|
|
3178
|
+
const context = buildAnchorContextResult(db, cwd, contextInput);
|
|
3179
|
+
const markdown = [
|
|
3180
|
+
"# Anchor File Explain",
|
|
3181
|
+
"",
|
|
3182
|
+
`File: ${input.file}`,
|
|
3183
|
+
`Appears to own: ${ownership}`,
|
|
3184
|
+
`Important symbols: ${importantSymbols.join(", ") || "n/a"}`,
|
|
3185
|
+
"",
|
|
3186
|
+
context.markdown.replace(/^# Anchor Context\n\n/, "")
|
|
3187
|
+
].join("\n");
|
|
3188
|
+
return {
|
|
3189
|
+
markdown,
|
|
3190
|
+
metadata: {
|
|
3191
|
+
...context.metadata,
|
|
3192
|
+
mode: "explain_file",
|
|
3193
|
+
file: input.file,
|
|
3194
|
+
importantSymbols
|
|
3195
|
+
}
|
|
3196
|
+
};
|
|
3197
|
+
}
|
|
3198
|
+
|
|
3199
|
+
// src/retrieval/review-diff.ts
|
|
3200
|
+
function filesFromDiff(diff) {
|
|
3201
|
+
const files = [];
|
|
3202
|
+
for (const line of diff.split("\n")) {
|
|
3203
|
+
const match = line.match(/^diff --git a\/(.+?) b\/(.+)$/);
|
|
3204
|
+
if (match?.[2] && match[2] !== "/dev/null") files.push(match[2]);
|
|
3205
|
+
const plus = line.match(/^\+\+\+ b\/(.+)$/);
|
|
3206
|
+
if (plus?.[1] && plus[1] !== "/dev/null") files.push(plus[1]);
|
|
3207
|
+
}
|
|
3208
|
+
return uniqueStrings(files);
|
|
3209
|
+
}
|
|
3210
|
+
function asArray(value) {
|
|
3211
|
+
return Array.isArray(value) ? value : [];
|
|
3212
|
+
}
|
|
3213
|
+
function reviewDiff(db, cwd, input) {
|
|
3214
|
+
const files = input.files?.length ? input.files : filesFromDiff(input.diff);
|
|
3215
|
+
const contextInput = {
|
|
3216
|
+
task: "Review this diff against Anchor history, team rules, regressions, and tests.",
|
|
3217
|
+
files,
|
|
3218
|
+
diff: input.diff,
|
|
3219
|
+
strict: input.strict,
|
|
3220
|
+
maxResults: input.maxResults
|
|
3221
|
+
};
|
|
3222
|
+
const context = buildAnchorContextResult(db, cwd, contextInput);
|
|
3223
|
+
const items = asArray(context.metadata.items);
|
|
3224
|
+
const regressions = asArray(context.metadata.regressionEvents);
|
|
3225
|
+
const tests = asArray(context.metadata.relevantTests);
|
|
3226
|
+
const ruleItems = asArray(context.metadata.teamRules);
|
|
3227
|
+
const blockerRules = ruleItems.filter(
|
|
3228
|
+
(item) => item.freshnessStatus !== "stale" && item.confidenceLevel !== "weak"
|
|
3229
|
+
);
|
|
3230
|
+
const historicalConstraints = items.filter(
|
|
3231
|
+
(item) => ["constraint", "api_contract", "security_note", "architecture_decision"].includes(
|
|
3232
|
+
item.category ?? ""
|
|
3233
|
+
)
|
|
3234
|
+
);
|
|
3235
|
+
const lines = ["# Anchor Diff Review", "", `Changed files: ${files.join(", ") || "n/a"}`, ""];
|
|
3236
|
+
lines.push("## Blockers", "");
|
|
3237
|
+
if (blockerRules.length === 0) lines.push("- No evidence-backed blockers found.");
|
|
3238
|
+
else {
|
|
3239
|
+
for (const rule of blockerRules.slice(0, 4)) {
|
|
3240
|
+
lines.push(`- Team rule evidence may block this change: ${rule.category ?? "rule"}.`);
|
|
3241
|
+
}
|
|
3242
|
+
}
|
|
3243
|
+
lines.push("", "## Risks", "");
|
|
3244
|
+
const riskItems = items.filter(
|
|
3245
|
+
(item) => ["security_note", "bug_regression", "api_contract"].includes(item.category ?? "")
|
|
3246
|
+
);
|
|
3247
|
+
if (riskItems.length === 0) lines.push("- No specific historical risks found.");
|
|
3248
|
+
else {
|
|
3249
|
+
for (const item of riskItems.slice(0, 5)) {
|
|
3250
|
+
lines.push(`- [${item.category}] PR #${item.prNumber}: preserve cited behavior.`);
|
|
3251
|
+
}
|
|
3252
|
+
}
|
|
3253
|
+
lines.push("", "## Historical constraints", "");
|
|
3254
|
+
if (historicalConstraints.length === 0) lines.push("- No matching constraints found.");
|
|
3255
|
+
else {
|
|
3256
|
+
for (const item of historicalConstraints.slice(0, 5)) {
|
|
3257
|
+
lines.push(`- PR #${item.prNumber}: ${item.category} (${item.confidenceLevel}).`);
|
|
3258
|
+
}
|
|
3259
|
+
}
|
|
3260
|
+
lines.push("", "## Regression checks", "");
|
|
3261
|
+
if (regressions.length === 0) lines.push("- No related regression memory found.");
|
|
3262
|
+
else {
|
|
3263
|
+
for (const event of regressions.slice(0, 5)) {
|
|
3264
|
+
lines.push(`- PR #${event.prNumber}: ${clipSentence(event.summary ?? "", 180)}`);
|
|
3265
|
+
}
|
|
3266
|
+
}
|
|
3267
|
+
lines.push("", "## Recommended tests", "");
|
|
3268
|
+
if (tests.length === 0) lines.push("- No related tests found in the local index.");
|
|
3269
|
+
else {
|
|
3270
|
+
for (const test of tests.slice(0, 6)) {
|
|
3271
|
+
lines.push(`- ${test.path ?? "unknown test"} (${test.reason ?? "related"})`);
|
|
3272
|
+
}
|
|
3273
|
+
}
|
|
3274
|
+
return {
|
|
3275
|
+
markdown: lines.join("\n"),
|
|
3276
|
+
metadata: {
|
|
3277
|
+
...context.metadata,
|
|
3278
|
+
mode: "review_diff",
|
|
3279
|
+
changedFiles: files
|
|
3280
|
+
}
|
|
3281
|
+
};
|
|
3282
|
+
}
|
|
3283
|
+
|
|
1801
3284
|
// src/github/client.ts
|
|
1802
3285
|
import { Octokit } from "@octokit/rest";
|
|
1803
3286
|
function createGitHubClient(token) {
|
|
@@ -1987,8 +3470,8 @@ async function fetchMergedPullRequests(options) {
|
|
|
1987
3470
|
}
|
|
1988
3471
|
|
|
1989
3472
|
// src/doctor.ts
|
|
1990
|
-
import
|
|
1991
|
-
import
|
|
3473
|
+
import fs5 from "fs";
|
|
3474
|
+
import path13 from "path";
|
|
1992
3475
|
function check(name, ok, message, fix) {
|
|
1993
3476
|
return { name, ok, message, fix: ok ? void 0 : fix };
|
|
1994
3477
|
}
|
|
@@ -2049,12 +3532,12 @@ async function runDoctor(options) {
|
|
|
2049
3532
|
)
|
|
2050
3533
|
);
|
|
2051
3534
|
}
|
|
2052
|
-
const cursorConfigPath =
|
|
3535
|
+
const cursorConfigPath = path13.join(gitRoot ?? cwd, ".cursor", "mcp.json");
|
|
2053
3536
|
let cursorConfig;
|
|
2054
3537
|
let cursorConfigValid = false;
|
|
2055
|
-
if (
|
|
3538
|
+
if (fs5.existsSync(cursorConfigPath)) {
|
|
2056
3539
|
try {
|
|
2057
|
-
cursorConfig = JSON.parse(
|
|
3540
|
+
cursorConfig = JSON.parse(fs5.readFileSync(cursorConfigPath, "utf8"));
|
|
2058
3541
|
cursorConfigValid = true;
|
|
2059
3542
|
} catch {
|
|
2060
3543
|
cursorConfigValid = false;
|
|
@@ -2063,7 +3546,7 @@ async function runDoctor(options) {
|
|
|
2063
3546
|
checks.push(
|
|
2064
3547
|
check(
|
|
2065
3548
|
".cursor/mcp.json valid",
|
|
2066
|
-
|
|
3549
|
+
fs5.existsSync(cursorConfigPath) && cursorConfigValid,
|
|
2067
3550
|
cursorConfigValid ? ".cursor/mcp.json exists and is valid JSON." : ".cursor/mcp.json is missing or invalid.",
|
|
2068
3551
|
"Run anchor init. If the file is malformed, fix the JSON and rerun anchor init."
|
|
2069
3552
|
)
|
|
@@ -2080,7 +3563,7 @@ async function runDoctor(options) {
|
|
|
2080
3563
|
)
|
|
2081
3564
|
);
|
|
2082
3565
|
const dbPath = defaultDatabasePath(gitRoot ?? cwd);
|
|
2083
|
-
const dbExists =
|
|
3566
|
+
const dbExists = fs5.existsSync(dbPath);
|
|
2084
3567
|
checks.push(
|
|
2085
3568
|
check(
|
|
2086
3569
|
".anchor/index.sqlite exists",
|
|
@@ -2124,30 +3607,72 @@ async function runDoctor(options) {
|
|
|
2124
3607
|
"Run pnpm build, then try anchor serve from the repository."
|
|
2125
3608
|
)
|
|
2126
3609
|
);
|
|
2127
|
-
const rulePath =
|
|
3610
|
+
const rulePath = path13.join(gitRoot ?? cwd, ".cursor", "rules", "anchor.mdc");
|
|
2128
3611
|
checks.push(
|
|
2129
3612
|
check(
|
|
2130
3613
|
"Cursor rule file exists",
|
|
2131
|
-
|
|
2132
|
-
|
|
3614
|
+
fs5.existsSync(rulePath),
|
|
3615
|
+
fs5.existsSync(rulePath) ? "Cursor rule file exists." : "Cursor rule file is missing.",
|
|
2133
3616
|
"Run anchor init to create .cursor/rules/anchor.mdc."
|
|
2134
3617
|
)
|
|
2135
3618
|
);
|
|
2136
3619
|
return { ok: checks.every((item) => item.ok), checks };
|
|
2137
3620
|
}
|
|
3621
|
+
|
|
3622
|
+
// src/health.ts
|
|
3623
|
+
function evaluateIndexHealth(status, rulesOk) {
|
|
3624
|
+
const warnings = [];
|
|
3625
|
+
if (status.health === "missing_database") warnings.push("Anchor database is missing.");
|
|
3626
|
+
if (status.health === "schema_invalid") warnings.push("Anchor SQLite schema is invalid.");
|
|
3627
|
+
if (status.health === "empty_index") warnings.push("Anchor index is empty.");
|
|
3628
|
+
if (status.historyCoverage !== "all") warnings.push("PR history coverage is partial.");
|
|
3629
|
+
if (status.staleCodeIndex) warnings.push("Code index is older than 7 days or has never run.");
|
|
3630
|
+
if (!rulesOk) warnings.push("Team rules file is missing or invalid.");
|
|
3631
|
+
if (status.lastFailedRun) warnings.push(`Last failed index run: ${status.lastFailedRun}.`);
|
|
3632
|
+
const hasError = status.health === "missing_database" || status.health === "schema_invalid";
|
|
3633
|
+
const healthStatus = hasError ? "error" : warnings.length > 0 ? "warning" : "ok";
|
|
3634
|
+
return {
|
|
3635
|
+
status: healthStatus,
|
|
3636
|
+
warnings,
|
|
3637
|
+
suggestedNextCommand: status.suggestedNextCommand,
|
|
3638
|
+
historyCoverage: status.historyCoverage ?? "unknown",
|
|
3639
|
+
staleCodeIndex: Boolean(status.staleCodeIndex),
|
|
3640
|
+
lastSuccessfulRun: status.lastSuccessfulRun,
|
|
3641
|
+
lastFailedRun: status.lastFailedRun
|
|
3642
|
+
};
|
|
3643
|
+
}
|
|
3644
|
+
function getAnchorIndexHealth(cwd) {
|
|
3645
|
+
const indexStatus = getIndexStatus(cwd);
|
|
3646
|
+
const rulesValidation = validateTeamRulesFile(cwd);
|
|
3647
|
+
return {
|
|
3648
|
+
...evaluateIndexHealth(indexStatus, rulesValidation.ok),
|
|
3649
|
+
indexStatus
|
|
3650
|
+
};
|
|
3651
|
+
}
|
|
2138
3652
|
export {
|
|
2139
3653
|
ANCHOR_CURSOR_RULE,
|
|
2140
3654
|
DEFAULT_MAX_CODE_FILE_BYTES,
|
|
2141
3655
|
SCHEMA_SQL,
|
|
3656
|
+
TEAM_RULES_FILE,
|
|
3657
|
+
addTeamRule,
|
|
2142
3658
|
anchorMcpEntry,
|
|
3659
|
+
buildAnchorContextResult,
|
|
2143
3660
|
buildFtsQuery,
|
|
3661
|
+
buildQueryTerms,
|
|
2144
3662
|
canonicalizeText,
|
|
2145
3663
|
categorizeWisdom,
|
|
2146
3664
|
checkSchema,
|
|
3665
|
+
checkTeamRuleEvidence,
|
|
2147
3666
|
chunkCodeFile,
|
|
2148
3667
|
chunkHistoricalText,
|
|
3668
|
+
claimKeyFor,
|
|
2149
3669
|
clampMaxResults,
|
|
2150
3670
|
clipSentence,
|
|
3671
|
+
confidenceAtLeast,
|
|
3672
|
+
confidenceLevelFor,
|
|
3673
|
+
confidenceRank,
|
|
3674
|
+
confidenceReasonsFor,
|
|
3675
|
+
countValidTeamRules,
|
|
2151
3676
|
createGitHubClient,
|
|
2152
3677
|
defaultDatabasePath,
|
|
2153
3678
|
detectGitHubRepo,
|
|
@@ -2158,42 +3683,62 @@ export {
|
|
|
2158
3683
|
ensureCursorConfig,
|
|
2159
3684
|
ensureCursorRule,
|
|
2160
3685
|
ensureRepository,
|
|
3686
|
+
ensureTeamRulesFile,
|
|
3687
|
+
evaluateFreshness,
|
|
3688
|
+
evaluateIndexHealth,
|
|
3689
|
+
evidenceForWisdom,
|
|
3690
|
+
explainFile,
|
|
2161
3691
|
extractCodeSymbols,
|
|
3692
|
+
extractRegressionEvents,
|
|
2162
3693
|
extractSymbols,
|
|
2163
3694
|
extractWisdomUnits,
|
|
2164
3695
|
fetchMergedPullRequests,
|
|
2165
3696
|
fetchPullRequestDetails,
|
|
3697
|
+
filesFromDiff,
|
|
2166
3698
|
formatAnchorContext,
|
|
2167
3699
|
formatIndexStatus,
|
|
2168
3700
|
formatSearchHistory,
|
|
3701
|
+
getAnchorIndexHealth,
|
|
2169
3702
|
getIndexStatus,
|
|
2170
3703
|
getLastSyncTime,
|
|
3704
|
+
getSemanticStatus,
|
|
2171
3705
|
githubAuthFixMessage,
|
|
2172
3706
|
hasHighSignalLanguage,
|
|
2173
3707
|
indexCodebase,
|
|
2174
3708
|
indexPullRequests,
|
|
3709
|
+
inferTestAwareness,
|
|
2175
3710
|
initializeSchema,
|
|
2176
3711
|
isHardExcludedCodePath,
|
|
3712
|
+
isTestFilePath,
|
|
3713
|
+
loadCurrentCodeSnapshot,
|
|
3714
|
+
loadTeamRulesFile,
|
|
2177
3715
|
mergeAnchorMcpConfig,
|
|
2178
3716
|
normalizePullRequest,
|
|
2179
3717
|
openAnchorDatabase,
|
|
2180
3718
|
parseGitHubRemote,
|
|
2181
3719
|
rankCodeChunks,
|
|
3720
|
+
rankRegressionEvents,
|
|
3721
|
+
rankRelevantTests,
|
|
3722
|
+
rankTeamRules,
|
|
2182
3723
|
rankWisdomUnits,
|
|
3724
|
+
recordIndexRun,
|
|
2183
3725
|
redactSecrets,
|
|
2184
3726
|
redactedHistoricalText,
|
|
2185
3727
|
replaceCodeIndex,
|
|
2186
3728
|
resolveGitHubToken,
|
|
2187
3729
|
resolvePullRequestDetailConcurrency,
|
|
2188
3730
|
resolvePullRequestFetchLimit,
|
|
3731
|
+
reviewDiff,
|
|
2189
3732
|
runDoctor,
|
|
2190
3733
|
sanitizeHistoricalText,
|
|
2191
3734
|
shouldSyncSince,
|
|
3735
|
+
sourceTypeLabel,
|
|
2192
3736
|
stripPromptInjection,
|
|
2193
3737
|
tokenizeSearchText,
|
|
2194
3738
|
truncateText,
|
|
2195
3739
|
uniqueStrings,
|
|
2196
3740
|
updateSyncState,
|
|
2197
|
-
upsertPullRequest
|
|
3741
|
+
upsertPullRequest,
|
|
3742
|
+
validateTeamRulesFile
|
|
2198
3743
|
};
|
|
2199
3744
|
//# sourceMappingURL=index.js.map
|