@pratik7368patil/anchor-core 0.1.5 → 0.1.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.ts +120 -4
- package/dist/index.js +687 -47
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
- package/src/db/schema.sql +44 -0
package/dist/index.js
CHANGED
|
@@ -334,6 +334,48 @@ CREATE VIRTUAL TABLE IF NOT EXISTS wisdom_units_fts USING fts5(
|
|
|
334
334
|
category
|
|
335
335
|
);
|
|
336
336
|
|
|
337
|
+
CREATE TABLE IF NOT EXISTS code_files (
|
|
338
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
339
|
+
repo_id INTEGER NOT NULL REFERENCES repositories(id) ON DELETE CASCADE,
|
|
340
|
+
path TEXT NOT NULL,
|
|
341
|
+
language TEXT,
|
|
342
|
+
size_bytes INTEGER NOT NULL,
|
|
343
|
+
content_hash TEXT NOT NULL,
|
|
344
|
+
updated_at TEXT NOT NULL,
|
|
345
|
+
UNIQUE(repo_id, path)
|
|
346
|
+
);
|
|
347
|
+
|
|
348
|
+
CREATE TABLE IF NOT EXISTS code_chunks (
|
|
349
|
+
id TEXT PRIMARY KEY,
|
|
350
|
+
repo_id INTEGER NOT NULL REFERENCES repositories(id) ON DELETE CASCADE,
|
|
351
|
+
file_id INTEGER NOT NULL REFERENCES code_files(id) ON DELETE CASCADE,
|
|
352
|
+
repo TEXT NOT NULL,
|
|
353
|
+
file_path TEXT NOT NULL,
|
|
354
|
+
language TEXT,
|
|
355
|
+
start_line INTEGER NOT NULL,
|
|
356
|
+
end_line INTEGER NOT NULL,
|
|
357
|
+
sanitized_text TEXT NOT NULL,
|
|
358
|
+
symbols_json TEXT NOT NULL,
|
|
359
|
+
content_hash TEXT NOT NULL,
|
|
360
|
+
updated_at TEXT NOT NULL
|
|
361
|
+
);
|
|
362
|
+
|
|
363
|
+
CREATE VIRTUAL TABLE IF NOT EXISTS code_chunks_fts USING fts5(
|
|
364
|
+
chunkId UNINDEXED,
|
|
365
|
+
sanitizedText,
|
|
366
|
+
filePath,
|
|
367
|
+
symbols,
|
|
368
|
+
language
|
|
369
|
+
);
|
|
370
|
+
|
|
371
|
+
CREATE TABLE IF NOT EXISTS code_index_state (
|
|
372
|
+
repo TEXT PRIMARY KEY,
|
|
373
|
+
last_indexed_at TEXT NOT NULL,
|
|
374
|
+
indexed_files INTEGER NOT NULL,
|
|
375
|
+
code_chunks INTEGER NOT NULL,
|
|
376
|
+
skipped_files INTEGER NOT NULL
|
|
377
|
+
);
|
|
378
|
+
|
|
337
379
|
CREATE TABLE IF NOT EXISTS sync_state (
|
|
338
380
|
repo TEXT PRIMARY KEY,
|
|
339
381
|
last_sync_at TEXT,
|
|
@@ -346,6 +388,8 @@ CREATE INDEX IF NOT EXISTS idx_pr_files_path ON pr_files(path);
|
|
|
346
388
|
CREATE INDEX IF NOT EXISTS idx_pr_comments_source ON pr_comments(source_type);
|
|
347
389
|
CREATE INDEX IF NOT EXISTS idx_wisdom_units_category ON wisdom_units(category);
|
|
348
390
|
CREATE INDEX IF NOT EXISTS idx_wisdom_units_pr ON wisdom_units(pr_id);
|
|
391
|
+
CREATE INDEX IF NOT EXISTS idx_code_files_path ON code_files(path);
|
|
392
|
+
CREATE INDEX IF NOT EXISTS idx_code_chunks_file_path ON code_chunks(file_path);
|
|
349
393
|
`;
|
|
350
394
|
|
|
351
395
|
// src/db/database.ts
|
|
@@ -365,8 +409,10 @@ function initializeSchema(db) {
|
|
|
365
409
|
function checkSchema(db) {
|
|
366
410
|
try {
|
|
367
411
|
const tables = db.prepare("SELECT name FROM sqlite_master WHERE type IN ('table', 'virtual') AND name = ?").all("wisdom_units_fts");
|
|
412
|
+
const codeTables = db.prepare("SELECT name FROM sqlite_master WHERE type IN ('table', 'virtual') AND name = ?").all("code_chunks_fts");
|
|
368
413
|
const wisdom = db.prepare("SELECT name FROM sqlite_master WHERE name = ?").all("wisdom_units");
|
|
369
|
-
|
|
414
|
+
const code = db.prepare("SELECT name FROM sqlite_master WHERE name = ?").all("code_chunks");
|
|
415
|
+
return tables.length > 0 && wisdom.length > 0 && codeTables.length > 0 && code.length > 0;
|
|
370
416
|
} catch {
|
|
371
417
|
return false;
|
|
372
418
|
}
|
|
@@ -543,6 +589,87 @@ function upsertPullRequest(db, pr, wisdomUnits) {
|
|
|
543
589
|
const comments = (pr.reviews?.length ?? 0) + (pr.reviewComments?.length ?? 0) + (pr.issueComments?.length ?? 0);
|
|
544
590
|
return { files: pr.files.length, comments, wisdom: wisdomUnits.length };
|
|
545
591
|
}
|
|
592
|
+
function replaceCodeIndex(db, repo, codeFiles, codeChunks, skippedFiles, cwd) {
|
|
593
|
+
initializeSchema(db);
|
|
594
|
+
const repoId = ensureRepository(db, repo);
|
|
595
|
+
const now = (/* @__PURE__ */ new Date()).toISOString();
|
|
596
|
+
const transaction = db.transaction(() => {
|
|
597
|
+
const existingChunks = db.prepare("SELECT id FROM code_chunks WHERE repo_id = ?").all(repoId);
|
|
598
|
+
const deleteFts = db.prepare("DELETE FROM code_chunks_fts WHERE chunkId = ?");
|
|
599
|
+
for (const row of existingChunks) deleteFts.run(row.id);
|
|
600
|
+
db.prepare("DELETE FROM code_chunks WHERE repo_id = ?").run(repoId);
|
|
601
|
+
db.prepare("DELETE FROM code_files WHERE repo_id = ?").run(repoId);
|
|
602
|
+
const insertFile = db.prepare(
|
|
603
|
+
`INSERT INTO code_files
|
|
604
|
+
(repo_id, path, language, size_bytes, content_hash, updated_at)
|
|
605
|
+
VALUES (?, ?, ?, ?, ?, ?)`
|
|
606
|
+
);
|
|
607
|
+
for (const file of codeFiles) {
|
|
608
|
+
insertFile.run(
|
|
609
|
+
repoId,
|
|
610
|
+
file.path,
|
|
611
|
+
file.language ?? null,
|
|
612
|
+
file.sizeBytes,
|
|
613
|
+
file.contentHash,
|
|
614
|
+
file.updatedAt
|
|
615
|
+
);
|
|
616
|
+
}
|
|
617
|
+
const fileRows = db.prepare("SELECT id, path FROM code_files WHERE repo_id = ?").all(repoId);
|
|
618
|
+
const fileIds = new Map(fileRows.map((row) => [row.path, row.id]));
|
|
619
|
+
const insertChunk = db.prepare(
|
|
620
|
+
`INSERT INTO code_chunks
|
|
621
|
+
(id, repo_id, file_id, repo, file_path, language, start_line, end_line, sanitized_text,
|
|
622
|
+
symbols_json, content_hash, updated_at)
|
|
623
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`
|
|
624
|
+
);
|
|
625
|
+
const insertFts = db.prepare(
|
|
626
|
+
`INSERT INTO code_chunks_fts
|
|
627
|
+
(chunkId, sanitizedText, filePath, symbols, language)
|
|
628
|
+
VALUES (?, ?, ?, ?, ?)`
|
|
629
|
+
);
|
|
630
|
+
for (const chunk of codeChunks) {
|
|
631
|
+
const fileId = fileIds.get(chunk.filePath);
|
|
632
|
+
if (!fileId) continue;
|
|
633
|
+
insertChunk.run(
|
|
634
|
+
chunk.id,
|
|
635
|
+
repoId,
|
|
636
|
+
fileId,
|
|
637
|
+
chunk.repo,
|
|
638
|
+
chunk.filePath,
|
|
639
|
+
chunk.language ?? null,
|
|
640
|
+
chunk.startLine,
|
|
641
|
+
chunk.endLine,
|
|
642
|
+
chunk.sanitizedText,
|
|
643
|
+
JSON.stringify(chunk.symbols),
|
|
644
|
+
chunk.contentHash,
|
|
645
|
+
chunk.updatedAt
|
|
646
|
+
);
|
|
647
|
+
insertFts.run(
|
|
648
|
+
chunk.id,
|
|
649
|
+
chunk.sanitizedText,
|
|
650
|
+
chunk.filePath,
|
|
651
|
+
chunk.symbols.join(" "),
|
|
652
|
+
chunk.language ?? ""
|
|
653
|
+
);
|
|
654
|
+
}
|
|
655
|
+
db.prepare(
|
|
656
|
+
`INSERT INTO code_index_state (repo, last_indexed_at, indexed_files, code_chunks, skipped_files)
|
|
657
|
+
VALUES (?, ?, ?, ?, ?)
|
|
658
|
+
ON CONFLICT(repo) DO UPDATE SET
|
|
659
|
+
last_indexed_at = excluded.last_indexed_at,
|
|
660
|
+
indexed_files = excluded.indexed_files,
|
|
661
|
+
code_chunks = excluded.code_chunks,
|
|
662
|
+
skipped_files = excluded.skipped_files`
|
|
663
|
+
).run(repo, now, codeFiles.length, codeChunks.length, skippedFiles);
|
|
664
|
+
});
|
|
665
|
+
transaction();
|
|
666
|
+
return {
|
|
667
|
+
indexedFiles: codeFiles.length,
|
|
668
|
+
codeChunksCreated: codeChunks.length,
|
|
669
|
+
skippedFiles,
|
|
670
|
+
databasePath: defaultDatabasePath(cwd)
|
|
671
|
+
};
|
|
672
|
+
}
|
|
546
673
|
function getIndexStatus(cwd, githubTokenConfigured = Boolean(resolveGitHubToken({ cwd }).token), databasePath = defaultDatabasePath(cwd)) {
|
|
547
674
|
if (!fs2.existsSync(databasePath)) {
|
|
548
675
|
return {
|
|
@@ -551,12 +678,15 @@ function getIndexStatus(cwd, githubTokenConfigured = Boolean(resolveGitHubToken(
|
|
|
551
678
|
fileCount: 0,
|
|
552
679
|
commentCount: 0,
|
|
553
680
|
wisdomUnitCount: 0,
|
|
681
|
+
codeFileCount: 0,
|
|
682
|
+
codeChunkCount: 0,
|
|
554
683
|
githubTokenConfigured,
|
|
555
684
|
health: "missing_database"
|
|
556
685
|
};
|
|
557
686
|
}
|
|
558
687
|
const db = openAnchorDatabase(cwd, databasePath);
|
|
559
688
|
try {
|
|
689
|
+
initializeSchema(db);
|
|
560
690
|
if (!checkSchema(db)) {
|
|
561
691
|
return {
|
|
562
692
|
databasePath,
|
|
@@ -564,6 +694,8 @@ function getIndexStatus(cwd, githubTokenConfigured = Boolean(resolveGitHubToken(
|
|
|
564
694
|
fileCount: 0,
|
|
565
695
|
commentCount: 0,
|
|
566
696
|
wisdomUnitCount: 0,
|
|
697
|
+
codeFileCount: 0,
|
|
698
|
+
codeChunkCount: 0,
|
|
567
699
|
githubTokenConfigured,
|
|
568
700
|
health: "schema_invalid"
|
|
569
701
|
};
|
|
@@ -571,7 +703,9 @@ function getIndexStatus(cwd, githubTokenConfigured = Boolean(resolveGitHubToken(
|
|
|
571
703
|
const count = (table) => db.prepare(`SELECT COUNT(*) AS count FROM ${table}`).get().count;
|
|
572
704
|
const repoRow = db.prepare("SELECT full_name FROM repositories ORDER BY id LIMIT 1").get();
|
|
573
705
|
const syncRow = db.prepare("SELECT last_sync_at FROM sync_state ORDER BY updated_at DESC LIMIT 1").get();
|
|
706
|
+
const codeIndexRow = db.prepare("SELECT last_indexed_at FROM code_index_state ORDER BY last_indexed_at DESC LIMIT 1").get();
|
|
574
707
|
const wisdomUnitCount = count("wisdom_units");
|
|
708
|
+
const codeChunkCount = count("code_chunks");
|
|
575
709
|
return {
|
|
576
710
|
repo: repoRow?.full_name,
|
|
577
711
|
databasePath,
|
|
@@ -579,9 +713,12 @@ function getIndexStatus(cwd, githubTokenConfigured = Boolean(resolveGitHubToken(
|
|
|
579
713
|
fileCount: count("pr_files"),
|
|
580
714
|
commentCount: count("pr_comments"),
|
|
581
715
|
wisdomUnitCount,
|
|
716
|
+
codeFileCount: count("code_files"),
|
|
717
|
+
codeChunkCount,
|
|
582
718
|
lastSyncTime: syncRow?.last_sync_at ?? void 0,
|
|
719
|
+
lastCodeIndexTime: codeIndexRow?.last_indexed_at ?? void 0,
|
|
583
720
|
githubTokenConfigured,
|
|
584
|
-
health: wisdomUnitCount > 0 ? "ok" : "empty_index"
|
|
721
|
+
health: wisdomUnitCount > 0 || codeChunkCount > 0 ? "ok" : "empty_index"
|
|
585
722
|
};
|
|
586
723
|
} finally {
|
|
587
724
|
db.close();
|
|
@@ -618,9 +755,260 @@ function chunkHistoricalText(text, maxChunkLength = 700) {
|
|
|
618
755
|
return expanded.filter((chunk) => chunk.length >= 12 && hasHighSignalLanguage(chunk));
|
|
619
756
|
}
|
|
620
757
|
|
|
621
|
-
// src/indexer/
|
|
758
|
+
// src/indexer/code-chunker.ts
|
|
622
759
|
import crypto from "crypto";
|
|
623
760
|
import path3 from "path";
|
|
761
|
+
var DEFAULT_CHUNK_LINES = 80;
|
|
762
|
+
var DEFAULT_OVERLAP_LINES = 8;
|
|
763
|
+
var FUNCTION_CALL_STOP_WORDS = /* @__PURE__ */ new Set([
|
|
764
|
+
"catch",
|
|
765
|
+
"describe",
|
|
766
|
+
"for",
|
|
767
|
+
"if",
|
|
768
|
+
"it",
|
|
769
|
+
"return",
|
|
770
|
+
"switch",
|
|
771
|
+
"test",
|
|
772
|
+
"while"
|
|
773
|
+
]);
|
|
774
|
+
function stableCodeChunkId(file, startLine, endLine) {
|
|
775
|
+
const hash = crypto.createHash("sha256").update([file.repo, file.path, file.contentHash, startLine, endLine].join("\0")).digest("hex").slice(0, 24);
|
|
776
|
+
return `cc_${hash}`;
|
|
777
|
+
}
|
|
778
|
+
function extractCodeSymbols(text, filePath) {
|
|
779
|
+
const symbols = [];
|
|
780
|
+
const declarations = text.matchAll(
|
|
781
|
+
/\b(?:export\s+)?(?:async\s+)?(?:class|function|interface|type|enum|const|let|var)\s+([A-Za-z_$][\w$]*)/g
|
|
782
|
+
);
|
|
783
|
+
for (const match of declarations) symbols.push(match[1] ?? "");
|
|
784
|
+
const objectMethods = text.matchAll(
|
|
785
|
+
/\b([A-Za-z_$][\w$]{2,})\s*[:=]\s*(?:async\s*)?\([^)]*\)\s*=>/g
|
|
786
|
+
);
|
|
787
|
+
for (const match of objectMethods) symbols.push(match[1] ?? "");
|
|
788
|
+
const calls = text.matchAll(/\b([A-Za-z_$][\w$]{2,})\s*\(/g);
|
|
789
|
+
for (const match of calls) {
|
|
790
|
+
const candidate = match[1] ?? "";
|
|
791
|
+
if (!FUNCTION_CALL_STOP_WORDS.has(candidate)) symbols.push(candidate);
|
|
792
|
+
}
|
|
793
|
+
const basename = path3.basename(filePath).replace(/\.[^.]+$/, "");
|
|
794
|
+
if (/^[A-Za-z_$][\w$-]*$/.test(basename)) symbols.push(basename);
|
|
795
|
+
return uniqueStrings(symbols).slice(0, 40);
|
|
796
|
+
}
|
|
797
|
+
function chunkCodeFile(file, options = {}) {
|
|
798
|
+
const chunkLines = options.chunkLines ?? DEFAULT_CHUNK_LINES;
|
|
799
|
+
const overlapLines = Math.max(
|
|
800
|
+
0,
|
|
801
|
+
Math.min(options.overlapLines ?? DEFAULT_OVERLAP_LINES, chunkLines - 1)
|
|
802
|
+
);
|
|
803
|
+
const lines = file.content.replace(/\r\n/g, "\n").split("\n");
|
|
804
|
+
const chunks = [];
|
|
805
|
+
for (let startIndex = 0; startIndex < lines.length; ) {
|
|
806
|
+
const endIndex = Math.min(lines.length, startIndex + chunkLines);
|
|
807
|
+
const rawText = lines.slice(startIndex, endIndex).join("\n");
|
|
808
|
+
const sanitizedText = sanitizeHistoricalText(rawText);
|
|
809
|
+
if (sanitizedText) {
|
|
810
|
+
chunks.push({
|
|
811
|
+
id: stableCodeChunkId(file, startIndex + 1, endIndex),
|
|
812
|
+
repo: file.repo,
|
|
813
|
+
filePath: file.path,
|
|
814
|
+
language: file.language,
|
|
815
|
+
startLine: startIndex + 1,
|
|
816
|
+
endLine: endIndex,
|
|
817
|
+
sanitizedText,
|
|
818
|
+
symbols: extractCodeSymbols(sanitizedText, file.path),
|
|
819
|
+
contentHash: file.contentHash,
|
|
820
|
+
updatedAt: file.updatedAt
|
|
821
|
+
});
|
|
822
|
+
}
|
|
823
|
+
if (endIndex >= lines.length) break;
|
|
824
|
+
startIndex = Math.max(startIndex + 1, endIndex - overlapLines);
|
|
825
|
+
}
|
|
826
|
+
return chunks;
|
|
827
|
+
}
|
|
828
|
+
|
|
829
|
+
// src/indexer/code-file-discovery.ts
|
|
830
|
+
import { execFileSync as execFileSync3 } from "child_process";
|
|
831
|
+
import crypto2 from "crypto";
|
|
832
|
+
import fs3 from "fs";
|
|
833
|
+
import path4 from "path";
|
|
834
|
+
var DEFAULT_MAX_CODE_FILE_BYTES = 512 * 1024;
|
|
835
|
+
var HARD_EXCLUDED_SEGMENTS = /* @__PURE__ */ new Set([
|
|
836
|
+
".git",
|
|
837
|
+
".anchor",
|
|
838
|
+
".cursor",
|
|
839
|
+
".codex",
|
|
840
|
+
".aws",
|
|
841
|
+
".ssh",
|
|
842
|
+
"node_modules",
|
|
843
|
+
".nuxt",
|
|
844
|
+
".next",
|
|
845
|
+
"dist",
|
|
846
|
+
"build",
|
|
847
|
+
"coverage",
|
|
848
|
+
".turbo"
|
|
849
|
+
]);
|
|
850
|
+
var LANGUAGE_BY_EXTENSION = {
|
|
851
|
+
".cjs": "javascript",
|
|
852
|
+
".css": "css",
|
|
853
|
+
".go": "go",
|
|
854
|
+
".html": "html",
|
|
855
|
+
".java": "java",
|
|
856
|
+
".js": "javascript",
|
|
857
|
+
".json": "json",
|
|
858
|
+
".jsx": "javascript",
|
|
859
|
+
".md": "markdown",
|
|
860
|
+
".mjs": "javascript",
|
|
861
|
+
".py": "python",
|
|
862
|
+
".rb": "ruby",
|
|
863
|
+
".rs": "rust",
|
|
864
|
+
".scss": "scss",
|
|
865
|
+
".sh": "shell",
|
|
866
|
+
".sql": "sql",
|
|
867
|
+
".svelte": "svelte",
|
|
868
|
+
".ts": "typescript",
|
|
869
|
+
".tsx": "typescript",
|
|
870
|
+
".vue": "vue",
|
|
871
|
+
".yaml": "yaml",
|
|
872
|
+
".yml": "yaml"
|
|
873
|
+
};
|
|
874
|
+
function normalizeGitPath(value) {
|
|
875
|
+
return value.replace(/\\/g, "/").replace(/^\.\/+/, "");
|
|
876
|
+
}
|
|
877
|
+
function isHardExcludedCodePath(filePath) {
|
|
878
|
+
const normalized = normalizeGitPath(filePath);
|
|
879
|
+
const segments = normalized.split("/");
|
|
880
|
+
if (segments.some((segment) => HARD_EXCLUDED_SEGMENTS.has(segment))) return true;
|
|
881
|
+
const basename = path4.posix.basename(normalized).toLowerCase();
|
|
882
|
+
if ([".netrc", ".npmrc", ".pypirc", ".yarnrc"].includes(basename)) return true;
|
|
883
|
+
if (basename === ".env" || basename.startsWith(".env.")) return true;
|
|
884
|
+
if (basename === "id_rsa" || basename === "id_rsa.pub" || basename === "id_dsa" || basename === "id_ecdsa" || basename === "id_ed25519") {
|
|
885
|
+
return true;
|
|
886
|
+
}
|
|
887
|
+
if (/\.(pem|key|p12|pfx)$/i.test(basename)) return true;
|
|
888
|
+
return false;
|
|
889
|
+
}
|
|
890
|
+
function languageForPath(filePath) {
|
|
891
|
+
const extension = path4.extname(filePath).toLowerCase();
|
|
892
|
+
return LANGUAGE_BY_EXTENSION[extension];
|
|
893
|
+
}
|
|
894
|
+
function isProbablyBinary(buffer) {
|
|
895
|
+
if (buffer.includes(0)) return true;
|
|
896
|
+
if (buffer.length === 0) return false;
|
|
897
|
+
let suspicious = 0;
|
|
898
|
+
for (const byte of buffer) {
|
|
899
|
+
const isAllowedControl = byte === 9 || byte === 10 || byte === 13;
|
|
900
|
+
if (byte < 32 && !isAllowedControl) suspicious += 1;
|
|
901
|
+
}
|
|
902
|
+
return suspicious / buffer.length > 0.01;
|
|
903
|
+
}
|
|
904
|
+
function discoverGitFiles(cwd) {
|
|
905
|
+
const output = execFileSync3("git", ["ls-files", "--cached", "--others", "--exclude-standard"], {
|
|
906
|
+
cwd,
|
|
907
|
+
encoding: "utf8",
|
|
908
|
+
stdio: ["ignore", "pipe", "pipe"]
|
|
909
|
+
});
|
|
910
|
+
return output.split("\n").map((line) => normalizeGitPath(line.trim())).filter(Boolean);
|
|
911
|
+
}
|
|
912
|
+
function discoverCodeFiles(cwd, repo, options = {}) {
|
|
913
|
+
const maxFileBytes = options.maxFileBytes ?? DEFAULT_MAX_CODE_FILE_BYTES;
|
|
914
|
+
const rootPath = path4.resolve(cwd);
|
|
915
|
+
const files = [];
|
|
916
|
+
let skippedFiles = 0;
|
|
917
|
+
for (const filePath of discoverGitFiles(cwd)) {
|
|
918
|
+
if (isHardExcludedCodePath(filePath)) {
|
|
919
|
+
skippedFiles += 1;
|
|
920
|
+
continue;
|
|
921
|
+
}
|
|
922
|
+
const absolutePath = path4.resolve(cwd, filePath);
|
|
923
|
+
const relativeToRoot = path4.relative(rootPath, absolutePath);
|
|
924
|
+
if (relativeToRoot.startsWith("..") || path4.isAbsolute(relativeToRoot)) {
|
|
925
|
+
skippedFiles += 1;
|
|
926
|
+
continue;
|
|
927
|
+
}
|
|
928
|
+
let stat;
|
|
929
|
+
try {
|
|
930
|
+
stat = fs3.statSync(absolutePath);
|
|
931
|
+
} catch {
|
|
932
|
+
skippedFiles += 1;
|
|
933
|
+
continue;
|
|
934
|
+
}
|
|
935
|
+
if (!stat.isFile() || stat.size > maxFileBytes) {
|
|
936
|
+
skippedFiles += 1;
|
|
937
|
+
continue;
|
|
938
|
+
}
|
|
939
|
+
const buffer = fs3.readFileSync(absolutePath);
|
|
940
|
+
if (isProbablyBinary(buffer)) {
|
|
941
|
+
skippedFiles += 1;
|
|
942
|
+
continue;
|
|
943
|
+
}
|
|
944
|
+
const content = buffer.toString("utf8");
|
|
945
|
+
files.push({
|
|
946
|
+
repo,
|
|
947
|
+
path: filePath,
|
|
948
|
+
language: languageForPath(filePath),
|
|
949
|
+
sizeBytes: stat.size,
|
|
950
|
+
contentHash: crypto2.createHash("sha256").update(buffer).digest("hex"),
|
|
951
|
+
updatedAt: stat.mtime.toISOString(),
|
|
952
|
+
absolutePath,
|
|
953
|
+
content
|
|
954
|
+
});
|
|
955
|
+
}
|
|
956
|
+
return { files, skippedFiles };
|
|
957
|
+
}
|
|
958
|
+
|
|
959
|
+
// src/indexer/code-indexer.ts
|
|
960
|
+
function indexCodebase(db, options) {
|
|
961
|
+
options.onProgress?.({ stage: "discovering_code_files", repo: options.repo });
|
|
962
|
+
const discovery = discoverCodeFiles(options.cwd, options.repo, {
|
|
963
|
+
maxFileBytes: options.maxFileBytes
|
|
964
|
+
});
|
|
965
|
+
options.onProgress?.({
|
|
966
|
+
stage: "discovered_code_files",
|
|
967
|
+
repo: options.repo,
|
|
968
|
+
files: discovery.files.length,
|
|
969
|
+
skippedFiles: discovery.skippedFiles
|
|
970
|
+
});
|
|
971
|
+
const chunks = [];
|
|
972
|
+
for (const [index, file] of discovery.files.entries()) {
|
|
973
|
+
options.onProgress?.({
|
|
974
|
+
stage: "indexing_code_file",
|
|
975
|
+
repo: options.repo,
|
|
976
|
+
current: index + 1,
|
|
977
|
+
total: discovery.files.length,
|
|
978
|
+
filePath: file.path
|
|
979
|
+
});
|
|
980
|
+
const fileChunks = chunkCodeFile(file);
|
|
981
|
+
chunks.push(...fileChunks);
|
|
982
|
+
options.onProgress?.({
|
|
983
|
+
stage: "indexed_code_file",
|
|
984
|
+
repo: options.repo,
|
|
985
|
+
current: index + 1,
|
|
986
|
+
total: discovery.files.length,
|
|
987
|
+
filePath: file.path,
|
|
988
|
+
chunks: fileChunks.length
|
|
989
|
+
});
|
|
990
|
+
}
|
|
991
|
+
return replaceCodeIndex(
|
|
992
|
+
db,
|
|
993
|
+
options.repo,
|
|
994
|
+
discovery.files.map(({ content: _content, absolutePath: _absolutePath, ...file }) => file),
|
|
995
|
+
chunks,
|
|
996
|
+
discovery.skippedFiles,
|
|
997
|
+
options.cwd
|
|
998
|
+
);
|
|
999
|
+
}
|
|
1000
|
+
function emptyCodeIndexSummary(cwd) {
|
|
1001
|
+
return {
|
|
1002
|
+
indexedFiles: 0,
|
|
1003
|
+
codeChunksCreated: 0,
|
|
1004
|
+
skippedFiles: 0,
|
|
1005
|
+
databasePath: defaultDatabasePath(cwd)
|
|
1006
|
+
};
|
|
1007
|
+
}
|
|
1008
|
+
|
|
1009
|
+
// src/indexer/wisdom-extractor.ts
|
|
1010
|
+
import crypto3 from "crypto";
|
|
1011
|
+
import path5 from "path";
|
|
624
1012
|
var CATEGORY_KEYWORDS = [
|
|
625
1013
|
["security_note", /\b(security|secret|token|bearer|oauth|credential|xss|csrf|injection|sanitize|redact)\b/i],
|
|
626
1014
|
["architecture_decision", /\b(architecture decision|architectural|we intentionally|design decision)\b/i],
|
|
@@ -652,7 +1040,7 @@ function extractSymbols(text, filePaths) {
|
|
|
652
1040
|
}
|
|
653
1041
|
}
|
|
654
1042
|
for (const filePath of filePaths) {
|
|
655
|
-
const basename =
|
|
1043
|
+
const basename = path5.basename(filePath).replace(/\.[^.]+$/, "");
|
|
656
1044
|
if (/^[A-Za-z_$][\w$]*$/.test(basename)) symbols.push(basename);
|
|
657
1045
|
}
|
|
658
1046
|
return uniqueStrings(symbols).slice(0, 30);
|
|
@@ -676,7 +1064,7 @@ function confidenceFor(entry, text, category, duplicateCount) {
|
|
|
676
1064
|
return Math.max(0, Math.min(1, Number(confidence.toFixed(2))));
|
|
677
1065
|
}
|
|
678
1066
|
function stableWisdomId(pr, sourceType, text, filePaths, createdAt, authors) {
|
|
679
|
-
const hash =
|
|
1067
|
+
const hash = crypto3.createHash("sha256").update(
|
|
680
1068
|
[pr.repo, pr.number, sourceType, canonicalizeText(text), filePaths.join("|"), createdAt, authors.join("|")].join(
|
|
681
1069
|
"\0"
|
|
682
1070
|
)
|
|
@@ -872,7 +1260,7 @@ function shouldSyncSince(db, repo, fallbackSince) {
|
|
|
872
1260
|
}
|
|
873
1261
|
|
|
874
1262
|
// src/retrieval/query-builder.ts
|
|
875
|
-
import
|
|
1263
|
+
import path6 from "path";
|
|
876
1264
|
var CATEGORY_HINTS = [
|
|
877
1265
|
"security",
|
|
878
1266
|
"regression",
|
|
@@ -897,8 +1285,8 @@ function buildFtsQuery(input) {
|
|
|
897
1285
|
const baseText = "task" in input ? input.task : input.query;
|
|
898
1286
|
const fileTerms = files.flatMap((file) => [
|
|
899
1287
|
file,
|
|
900
|
-
|
|
901
|
-
...
|
|
1288
|
+
path6.basename(file),
|
|
1289
|
+
...path6.dirname(file).split(/[\\/]/).filter(Boolean)
|
|
902
1290
|
]);
|
|
903
1291
|
const tokens = uniqueStrings([
|
|
904
1292
|
...tokenizeSearchText(baseText, 24),
|
|
@@ -917,7 +1305,7 @@ function clampMaxResults(value, defaultValue) {
|
|
|
917
1305
|
}
|
|
918
1306
|
|
|
919
1307
|
// src/retrieval/ranker.ts
|
|
920
|
-
import
|
|
1308
|
+
import path7 from "path";
|
|
921
1309
|
function parseJsonArray(value) {
|
|
922
1310
|
try {
|
|
923
1311
|
const parsed = JSON.parse(value);
|
|
@@ -964,11 +1352,11 @@ function filePathMatch(unitPaths, queryFiles) {
|
|
|
964
1352
|
if (queryFiles.length === 0 || unitPaths.length === 0) return 0;
|
|
965
1353
|
let best = 0;
|
|
966
1354
|
for (const queryFile of queryFiles) {
|
|
967
|
-
const queryBase =
|
|
968
|
-
const queryDir =
|
|
1355
|
+
const queryBase = path7.basename(queryFile).toLowerCase();
|
|
1356
|
+
const queryDir = path7.dirname(queryFile).toLowerCase();
|
|
969
1357
|
for (const unitPath of unitPaths) {
|
|
970
|
-
const unitBase =
|
|
971
|
-
const unitDir =
|
|
1358
|
+
const unitBase = path7.basename(unitPath).toLowerCase();
|
|
1359
|
+
const unitDir = path7.dirname(unitPath).toLowerCase();
|
|
972
1360
|
const q = queryFile.toLowerCase();
|
|
973
1361
|
const u = unitPath.toLowerCase();
|
|
974
1362
|
if (q === u) best = Math.max(best, 1);
|
|
@@ -1099,6 +1487,159 @@ function rankWisdomUnits(db, input) {
|
|
|
1099
1487
|
return [...grouped.values()].sort((a, b) => b.score - a.score || b.confidence - a.confidence).slice(0, limit);
|
|
1100
1488
|
}
|
|
1101
1489
|
|
|
1490
|
+
// src/retrieval/code-ranker.ts
|
|
1491
|
+
import path8 from "path";
|
|
1492
|
+
function parseJsonArray2(value) {
|
|
1493
|
+
try {
|
|
1494
|
+
const parsed = JSON.parse(value);
|
|
1495
|
+
return Array.isArray(parsed) ? parsed.filter((item) => typeof item === "string") : [];
|
|
1496
|
+
} catch {
|
|
1497
|
+
return [];
|
|
1498
|
+
}
|
|
1499
|
+
}
|
|
1500
|
+
function rowToCodeChunk(row) {
|
|
1501
|
+
return {
|
|
1502
|
+
id: row.id,
|
|
1503
|
+
repo: row.repo,
|
|
1504
|
+
filePath: row.file_path,
|
|
1505
|
+
language: row.language ?? void 0,
|
|
1506
|
+
startLine: row.start_line,
|
|
1507
|
+
endLine: row.end_line,
|
|
1508
|
+
sanitizedText: row.sanitized_text,
|
|
1509
|
+
symbols: parseJsonArray2(row.symbols_json),
|
|
1510
|
+
contentHash: row.content_hash,
|
|
1511
|
+
updatedAt: row.updated_at,
|
|
1512
|
+
bm25: row.bm25 ?? void 0
|
|
1513
|
+
};
|
|
1514
|
+
}
|
|
1515
|
+
function filePathMatch2(filePath, queryFiles) {
|
|
1516
|
+
if (queryFiles.length === 0) return 0;
|
|
1517
|
+
let best = 0;
|
|
1518
|
+
const unitBase = path8.basename(filePath).toLowerCase();
|
|
1519
|
+
const unitDir = path8.dirname(filePath).toLowerCase();
|
|
1520
|
+
const unit = filePath.toLowerCase();
|
|
1521
|
+
for (const queryFile of queryFiles) {
|
|
1522
|
+
const query = queryFile.toLowerCase();
|
|
1523
|
+
const queryBase = path8.basename(queryFile).toLowerCase();
|
|
1524
|
+
const queryDir = path8.dirname(queryFile).toLowerCase();
|
|
1525
|
+
if (query === unit) best = Math.max(best, 1);
|
|
1526
|
+
else if (queryBase === unitBase) best = Math.max(best, 0.72);
|
|
1527
|
+
else if (queryDir === unitDir) best = Math.max(best, 0.62);
|
|
1528
|
+
else if (unitDir.startsWith(queryDir) || queryDir.startsWith(unitDir))
|
|
1529
|
+
best = Math.max(best, 0.38);
|
|
1530
|
+
else if (queryBase && unitBase && queryBase.split(".")[0] === unitBase.split(".")[0]) {
|
|
1531
|
+
best = Math.max(best, 0.48);
|
|
1532
|
+
}
|
|
1533
|
+
}
|
|
1534
|
+
return best;
|
|
1535
|
+
}
|
|
1536
|
+
function symbolMatch2(chunk, querySymbols) {
|
|
1537
|
+
if (querySymbols.length === 0) return 0;
|
|
1538
|
+
const chunkSymbols = chunk.symbols.map((symbol) => symbol.toLowerCase());
|
|
1539
|
+
const text = chunk.sanitizedText.toLowerCase();
|
|
1540
|
+
let best = 0;
|
|
1541
|
+
for (const symbol of querySymbols) {
|
|
1542
|
+
const lower = symbol.toLowerCase();
|
|
1543
|
+
if (chunkSymbols.includes(lower)) best = Math.max(best, 1);
|
|
1544
|
+
else if (new RegExp(`\\b${escapeRegExp2(lower)}\\b`, "i").test(text)) best = Math.max(best, 0.7);
|
|
1545
|
+
else if (chunkSymbols.some((candidate) => candidate.includes(lower) || lower.includes(candidate))) {
|
|
1546
|
+
best = Math.max(best, 0.42);
|
|
1547
|
+
}
|
|
1548
|
+
}
|
|
1549
|
+
return best;
|
|
1550
|
+
}
|
|
1551
|
+
function textMatch2(chunk, input) {
|
|
1552
|
+
const tokens = tokenizeSearchText(
|
|
1553
|
+
`${input.task} ${input.diff ?? ""} ${input.currentCode ?? ""}`,
|
|
1554
|
+
40
|
|
1555
|
+
);
|
|
1556
|
+
const haystack = `${chunk.sanitizedText} ${chunk.filePath} ${chunk.symbols.join(" ")}`.toLowerCase();
|
|
1557
|
+
const overlap = tokens.length ? tokens.filter((token) => haystack.includes(token.toLowerCase())).length / tokens.length : 0;
|
|
1558
|
+
const bm25Signal = chunk.bm25 === void 0 ? 0 : Math.max(0.25, Math.min(1, 1 / (1 + Math.abs(chunk.bm25))));
|
|
1559
|
+
return Math.max(overlap, bm25Signal);
|
|
1560
|
+
}
|
|
1561
|
+
function recencyScore2(chunk) {
|
|
1562
|
+
const timestamp = Date.parse(chunk.updatedAt);
|
|
1563
|
+
if (Number.isNaN(timestamp)) return 0.25;
|
|
1564
|
+
const ageDays = Math.max(0, (Date.now() - timestamp) / (1e3 * 60 * 60 * 24));
|
|
1565
|
+
if (ageDays < 30) return 1;
|
|
1566
|
+
if (ageDays < 180) return 0.75;
|
|
1567
|
+
if (ageDays < 730) return 0.45;
|
|
1568
|
+
return 0.25;
|
|
1569
|
+
}
|
|
1570
|
+
function escapeRegExp2(value) {
|
|
1571
|
+
return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
1572
|
+
}
|
|
1573
|
+
function escapeLike(value) {
|
|
1574
|
+
return value.replace(/[\\%_]/g, (match) => `\\${match}`);
|
|
1575
|
+
}
|
|
1576
|
+
function loadCodeCandidates(db, input) {
|
|
1577
|
+
const candidates = /* @__PURE__ */ new Map();
|
|
1578
|
+
const ftsQuery = buildFtsQuery(input);
|
|
1579
|
+
if (ftsQuery) {
|
|
1580
|
+
const rows = db.prepare(
|
|
1581
|
+
`SELECT cc.*, bm25(code_chunks_fts) AS bm25
|
|
1582
|
+
FROM code_chunks_fts
|
|
1583
|
+
JOIN code_chunks cc ON cc.id = code_chunks_fts.chunkId
|
|
1584
|
+
WHERE code_chunks_fts MATCH ?
|
|
1585
|
+
ORDER BY bm25(code_chunks_fts)
|
|
1586
|
+
LIMIT 150`
|
|
1587
|
+
).all(ftsQuery);
|
|
1588
|
+
for (const row of rows) {
|
|
1589
|
+
const chunk = rowToCodeChunk(row);
|
|
1590
|
+
candidates.set(chunk.id, chunk);
|
|
1591
|
+
}
|
|
1592
|
+
}
|
|
1593
|
+
for (const file of input.files ?? []) {
|
|
1594
|
+
const basename = path8.basename(file);
|
|
1595
|
+
const rows = db.prepare(
|
|
1596
|
+
`SELECT cc.*, NULL AS bm25
|
|
1597
|
+
FROM code_chunks cc
|
|
1598
|
+
WHERE cc.file_path = ?
|
|
1599
|
+
OR cc.file_path LIKE ? ESCAPE '\\'
|
|
1600
|
+
LIMIT 80`
|
|
1601
|
+
).all(file, `%/${escapeLike(basename)}`);
|
|
1602
|
+
for (const row of rows) {
|
|
1603
|
+
const chunk = rowToCodeChunk(row);
|
|
1604
|
+
candidates.set(chunk.id, { ...chunk, bm25: candidates.get(chunk.id)?.bm25 ?? chunk.bm25 });
|
|
1605
|
+
}
|
|
1606
|
+
}
|
|
1607
|
+
if (candidates.size === 0) {
|
|
1608
|
+
const rows = db.prepare(
|
|
1609
|
+
`SELECT cc.*, NULL AS bm25
|
|
1610
|
+
FROM code_chunks cc
|
|
1611
|
+
ORDER BY updated_at DESC
|
|
1612
|
+
LIMIT 80`
|
|
1613
|
+
).all();
|
|
1614
|
+
for (const row of rows) {
|
|
1615
|
+
const chunk = rowToCodeChunk(row);
|
|
1616
|
+
candidates.set(chunk.id, chunk);
|
|
1617
|
+
}
|
|
1618
|
+
}
|
|
1619
|
+
return [...candidates.values()];
|
|
1620
|
+
}
|
|
1621
|
+
function rankCodeChunks(db, input) {
|
|
1622
|
+
const queryFiles = input.files ?? [];
|
|
1623
|
+
const querySymbols = input.symbols ?? [];
|
|
1624
|
+
const ranked = loadCodeCandidates(db, input).map((chunk) => {
|
|
1625
|
+
const parts = {
|
|
1626
|
+
filePathMatch: filePathMatch2(chunk.filePath, queryFiles),
|
|
1627
|
+
symbolMatch: symbolMatch2(chunk, querySymbols),
|
|
1628
|
+
textMatch: textMatch2(chunk, input),
|
|
1629
|
+
recency: recencyScore2(chunk)
|
|
1630
|
+
};
|
|
1631
|
+
const score = 0.4 * parts.filePathMatch + 0.25 * parts.symbolMatch + 0.25 * parts.textMatch + 0.1 * parts.recency;
|
|
1632
|
+
return {
|
|
1633
|
+
...chunk,
|
|
1634
|
+
symbols: uniqueStrings(chunk.symbols),
|
|
1635
|
+
score: Number(score.toFixed(4)),
|
|
1636
|
+
scoreParts: parts
|
|
1637
|
+
};
|
|
1638
|
+
}).sort((a, b) => b.score - a.score || b.startLine - a.startLine);
|
|
1639
|
+
const limit = Math.min(5, clampMaxResults(input.maxResults, 5));
|
|
1640
|
+
return ranked.slice(0, limit);
|
|
1641
|
+
}
|
|
1642
|
+
|
|
1102
1643
|
// src/retrieval/formatter.ts
|
|
1103
1644
|
function evidenceLine(unit) {
|
|
1104
1645
|
const author = unit.authors[0] ? ` by @${unit.authors[0]}` : "";
|
|
@@ -1125,14 +1666,20 @@ function whyItMatters(unit, input) {
|
|
|
1125
1666
|
function riskLines(units) {
|
|
1126
1667
|
const risks = /* @__PURE__ */ new Set();
|
|
1127
1668
|
for (const unit of units) {
|
|
1128
|
-
if (unit.category === "security_note")
|
|
1129
|
-
|
|
1130
|
-
if (unit.category === "
|
|
1131
|
-
|
|
1669
|
+
if (unit.category === "security_note")
|
|
1670
|
+
risks.add("Avoid logging, exposing, or weakening security-sensitive values.");
|
|
1671
|
+
if (unit.category === "bug_regression")
|
|
1672
|
+
risks.add("Check for regressions similar to the cited PR history.");
|
|
1673
|
+
if (unit.category === "api_contract")
|
|
1674
|
+
risks.add("Preserve documented API and backward-compatibility contracts.");
|
|
1675
|
+
if (unit.category === "constraint")
|
|
1676
|
+
risks.add(
|
|
1677
|
+
"Do not remove constraints without verifying the original reason no longer applies."
|
|
1678
|
+
);
|
|
1132
1679
|
}
|
|
1133
1680
|
return [...risks].slice(0, 4);
|
|
1134
1681
|
}
|
|
1135
|
-
function formatAnchorContext(units, input) {
|
|
1682
|
+
function formatAnchorContext(units, input, codeChunks = []) {
|
|
1136
1683
|
const lines = ["# Anchor Context", "", "## Must know", ""];
|
|
1137
1684
|
if (units.length === 0) {
|
|
1138
1685
|
lines.push("No directly relevant indexed PR history found.", "");
|
|
@@ -1146,6 +1693,18 @@ function formatAnchorContext(units, input) {
|
|
|
1146
1693
|
lines.push("");
|
|
1147
1694
|
});
|
|
1148
1695
|
}
|
|
1696
|
+
lines.push("## Codebase Evidence", "");
|
|
1697
|
+
if (codeChunks.length === 0) {
|
|
1698
|
+
lines.push("No directly relevant indexed codebase context found.", "");
|
|
1699
|
+
} else {
|
|
1700
|
+
codeChunks.forEach((chunk, index) => {
|
|
1701
|
+
const symbols = chunk.symbols.length ? `; symbols: ${chunk.symbols.slice(0, 6).join(", ")}` : "";
|
|
1702
|
+
lines.push(`${index + 1}. ${chunk.filePath}:${chunk.startLine}-${chunk.endLine}${symbols}`);
|
|
1703
|
+
lines.push(` Why it matters: Current code near this match may affect the requested edit.`);
|
|
1704
|
+
lines.push(` Snippet: ${clipSentence(chunk.sanitizedText, 260)}`);
|
|
1705
|
+
lines.push("");
|
|
1706
|
+
});
|
|
1707
|
+
}
|
|
1149
1708
|
lines.push("## Risks", "");
|
|
1150
1709
|
const risks = riskLines(units);
|
|
1151
1710
|
if (risks.length === 0) {
|
|
@@ -1172,6 +1731,15 @@ function formatAnchorContext(units, input) {
|
|
|
1172
1731
|
filePaths: unit.filePaths,
|
|
1173
1732
|
symbols: unit.symbols,
|
|
1174
1733
|
duplicateCount: unit.duplicateCount
|
|
1734
|
+
})),
|
|
1735
|
+
codeEvidence: codeChunks.map((chunk) => ({
|
|
1736
|
+
id: chunk.id,
|
|
1737
|
+
score: chunk.score,
|
|
1738
|
+
filePath: chunk.filePath,
|
|
1739
|
+
language: chunk.language,
|
|
1740
|
+
startLine: chunk.startLine,
|
|
1741
|
+
endLine: chunk.endLine,
|
|
1742
|
+
symbols: chunk.symbols
|
|
1175
1743
|
}))
|
|
1176
1744
|
}
|
|
1177
1745
|
};
|
|
@@ -1220,7 +1788,10 @@ function formatIndexStatus(status) {
|
|
|
1220
1788
|
`- Files: ${status.fileCount}`,
|
|
1221
1789
|
`- Comments: ${status.commentCount}`,
|
|
1222
1790
|
`- Wisdom units: ${status.wisdomUnitCount}`,
|
|
1791
|
+
`- Code files: ${status.codeFileCount}`,
|
|
1792
|
+
`- Code chunks: ${status.codeChunkCount}`,
|
|
1223
1793
|
`- Last sync: ${status.lastSyncTime ?? "never"}`,
|
|
1794
|
+
`- Last code index: ${status.lastCodeIndexTime ?? "never"}`,
|
|
1224
1795
|
`- GitHub token configured: ${status.githubTokenConfigured ? "yes" : "no"}`,
|
|
1225
1796
|
`- Health: ${status.health}`
|
|
1226
1797
|
];
|
|
@@ -1306,16 +1877,67 @@ async function fetchPullRequestDetails(octokit, repoFullName, pullNumber) {
|
|
|
1306
1877
|
}
|
|
1307
1878
|
|
|
1308
1879
|
// src/github/fetch-prs.ts
|
|
1880
|
+
function resolvePullRequestFetchLimit(options) {
|
|
1881
|
+
return options.all ? void 0 : Math.max(1, Math.min(options.limit ?? 200, 1e3));
|
|
1882
|
+
}
|
|
1883
|
+
function resolvePullRequestDetailConcurrency(options) {
|
|
1884
|
+
const value = options.detailConcurrency ?? 5;
|
|
1885
|
+
if (!Number.isFinite(value)) return 5;
|
|
1886
|
+
return Math.max(1, Math.min(Math.trunc(value), 10));
|
|
1887
|
+
}
|
|
1888
|
+
async function fetchPullRequestDetailsConcurrently(options) {
|
|
1889
|
+
const results = new Array(options.pullNumbers.length);
|
|
1890
|
+
let nextIndex = 0;
|
|
1891
|
+
let completed = 0;
|
|
1892
|
+
const workerCount = Math.min(options.detailConcurrency, options.pullNumbers.length);
|
|
1893
|
+
async function worker() {
|
|
1894
|
+
while (nextIndex < options.pullNumbers.length) {
|
|
1895
|
+
const index = nextIndex;
|
|
1896
|
+
nextIndex += 1;
|
|
1897
|
+
const pullNumber = options.pullNumbers[index];
|
|
1898
|
+
if (pullNumber === void 0) continue;
|
|
1899
|
+
options.onProgress?.({
|
|
1900
|
+
stage: "fetching_pull_request_details",
|
|
1901
|
+
repo: options.repo,
|
|
1902
|
+
current: index + 1,
|
|
1903
|
+
total: options.pullNumbers.length,
|
|
1904
|
+
prNumber: pullNumber,
|
|
1905
|
+
detailConcurrency: options.detailConcurrency
|
|
1906
|
+
});
|
|
1907
|
+
results[index] = await fetchPullRequestDetails(options.octokit, options.repo, pullNumber);
|
|
1908
|
+
completed += 1;
|
|
1909
|
+
options.onProgress?.({
|
|
1910
|
+
stage: "fetched_pull_request_details",
|
|
1911
|
+
repo: options.repo,
|
|
1912
|
+
current: completed,
|
|
1913
|
+
total: options.pullNumbers.length,
|
|
1914
|
+
prNumber: pullNumber,
|
|
1915
|
+
detailConcurrency: options.detailConcurrency
|
|
1916
|
+
});
|
|
1917
|
+
}
|
|
1918
|
+
}
|
|
1919
|
+
await Promise.all(Array.from({ length: workerCount }, () => worker()));
|
|
1920
|
+
return results.map((result, index) => {
|
|
1921
|
+
if (!result) {
|
|
1922
|
+
throw new Error(`Failed to fetch PR details at index ${index}.`);
|
|
1923
|
+
}
|
|
1924
|
+
return result;
|
|
1925
|
+
});
|
|
1926
|
+
}
|
|
1309
1927
|
async function fetchMergedPullRequests(options) {
|
|
1310
1928
|
const [owner, repo] = options.repo.split("/");
|
|
1311
1929
|
if (!owner || !repo) throw new Error(`Invalid repo '${options.repo}'. Expected owner/name.`);
|
|
1312
1930
|
const octokit = createGitHubClient(options.token);
|
|
1313
|
-
const limit =
|
|
1931
|
+
const limit = resolvePullRequestFetchLimit(options);
|
|
1932
|
+
const detailConcurrency = resolvePullRequestDetailConcurrency(options);
|
|
1314
1933
|
const sinceTime = options.since ? Date.parse(options.since) : void 0;
|
|
1315
1934
|
const pullNumbers = [];
|
|
1935
|
+
let scannedPullRequests = 0;
|
|
1936
|
+
let reachedSinceBoundary = false;
|
|
1316
1937
|
options.onProgress?.({
|
|
1317
1938
|
stage: "discovering_pull_requests",
|
|
1318
1939
|
repo: options.repo,
|
|
1940
|
+
all: limit === void 0,
|
|
1319
1941
|
limit,
|
|
1320
1942
|
since: options.since
|
|
1321
1943
|
});
|
|
@@ -1325,41 +1947,48 @@ async function fetchMergedPullRequests(options) {
|
|
|
1325
1947
|
state: "closed",
|
|
1326
1948
|
sort: "updated",
|
|
1327
1949
|
direction: "desc",
|
|
1328
|
-
per_page:
|
|
1950
|
+
per_page: 100
|
|
1329
1951
|
})) {
|
|
1952
|
+
scannedPullRequests += response.data.length;
|
|
1330
1953
|
for (const pull of response.data) {
|
|
1331
|
-
if (!pull.merged_at) continue;
|
|
1332
1954
|
if (sinceTime && Date.parse(pull.updated_at) < sinceTime) {
|
|
1333
|
-
|
|
1955
|
+
reachedSinceBoundary = true;
|
|
1956
|
+
break;
|
|
1334
1957
|
}
|
|
1958
|
+
if (!pull.merged_at) continue;
|
|
1335
1959
|
pullNumbers.push(pull.number);
|
|
1336
|
-
if (pullNumbers.length >= limit) break;
|
|
1960
|
+
if (limit !== void 0 && pullNumbers.length >= limit) break;
|
|
1337
1961
|
}
|
|
1338
|
-
|
|
1962
|
+
options.onProgress?.({
|
|
1963
|
+
stage: "scanned_pull_request_page",
|
|
1964
|
+
repo: options.repo,
|
|
1965
|
+
all: limit === void 0,
|
|
1966
|
+
limit,
|
|
1967
|
+
scannedPullRequests,
|
|
1968
|
+
matchedMergedPullRequests: pullNumbers.length
|
|
1969
|
+
});
|
|
1970
|
+
if (reachedSinceBoundary || limit !== void 0 && pullNumbers.length >= limit) break;
|
|
1339
1971
|
}
|
|
1340
1972
|
options.onProgress?.({
|
|
1341
1973
|
stage: "discovered_pull_requests",
|
|
1342
1974
|
repo: options.repo,
|
|
1975
|
+
all: limit === void 0,
|
|
1343
1976
|
total: pullNumbers.length,
|
|
1344
|
-
limit
|
|
1977
|
+
limit,
|
|
1978
|
+
detailConcurrency
|
|
1979
|
+
});
|
|
1980
|
+
return fetchPullRequestDetailsConcurrently({
|
|
1981
|
+
octokit,
|
|
1982
|
+
repo: options.repo,
|
|
1983
|
+
pullNumbers,
|
|
1984
|
+
detailConcurrency,
|
|
1985
|
+
onProgress: options.onProgress
|
|
1345
1986
|
});
|
|
1346
|
-
const details = [];
|
|
1347
|
-
for (const [index, pullNumber] of pullNumbers.entries()) {
|
|
1348
|
-
options.onProgress?.({
|
|
1349
|
-
stage: "fetching_pull_request_details",
|
|
1350
|
-
repo: options.repo,
|
|
1351
|
-
current: index + 1,
|
|
1352
|
-
total: pullNumbers.length,
|
|
1353
|
-
prNumber: pullNumber
|
|
1354
|
-
});
|
|
1355
|
-
details.push(await fetchPullRequestDetails(octokit, options.repo, pullNumber));
|
|
1356
|
-
}
|
|
1357
|
-
return details;
|
|
1358
1987
|
}
|
|
1359
1988
|
|
|
1360
1989
|
// src/doctor.ts
|
|
1361
|
-
import
|
|
1362
|
-
import
|
|
1990
|
+
import fs4 from "fs";
|
|
1991
|
+
import path9 from "path";
|
|
1363
1992
|
function check(name, ok, message, fix) {
|
|
1364
1993
|
return { name, ok, message, fix: ok ? void 0 : fix };
|
|
1365
1994
|
}
|
|
@@ -1420,12 +2049,12 @@ async function runDoctor(options) {
|
|
|
1420
2049
|
)
|
|
1421
2050
|
);
|
|
1422
2051
|
}
|
|
1423
|
-
const cursorConfigPath =
|
|
2052
|
+
const cursorConfigPath = path9.join(gitRoot ?? cwd, ".cursor", "mcp.json");
|
|
1424
2053
|
let cursorConfig;
|
|
1425
2054
|
let cursorConfigValid = false;
|
|
1426
|
-
if (
|
|
2055
|
+
if (fs4.existsSync(cursorConfigPath)) {
|
|
1427
2056
|
try {
|
|
1428
|
-
cursorConfig = JSON.parse(
|
|
2057
|
+
cursorConfig = JSON.parse(fs4.readFileSync(cursorConfigPath, "utf8"));
|
|
1429
2058
|
cursorConfigValid = true;
|
|
1430
2059
|
} catch {
|
|
1431
2060
|
cursorConfigValid = false;
|
|
@@ -1434,7 +2063,7 @@ async function runDoctor(options) {
|
|
|
1434
2063
|
checks.push(
|
|
1435
2064
|
check(
|
|
1436
2065
|
".cursor/mcp.json valid",
|
|
1437
|
-
|
|
2066
|
+
fs4.existsSync(cursorConfigPath) && cursorConfigValid,
|
|
1438
2067
|
cursorConfigValid ? ".cursor/mcp.json exists and is valid JSON." : ".cursor/mcp.json is missing or invalid.",
|
|
1439
2068
|
"Run anchor init. If the file is malformed, fix the JSON and rerun anchor init."
|
|
1440
2069
|
)
|
|
@@ -1451,7 +2080,7 @@ async function runDoctor(options) {
|
|
|
1451
2080
|
)
|
|
1452
2081
|
);
|
|
1453
2082
|
const dbPath = defaultDatabasePath(gitRoot ?? cwd);
|
|
1454
|
-
const dbExists =
|
|
2083
|
+
const dbExists = fs4.existsSync(dbPath);
|
|
1455
2084
|
checks.push(
|
|
1456
2085
|
check(
|
|
1457
2086
|
".anchor/index.sqlite exists",
|
|
@@ -1495,12 +2124,12 @@ async function runDoctor(options) {
|
|
|
1495
2124
|
"Run pnpm build, then try anchor serve from the repository."
|
|
1496
2125
|
)
|
|
1497
2126
|
);
|
|
1498
|
-
const rulePath =
|
|
2127
|
+
const rulePath = path9.join(gitRoot ?? cwd, ".cursor", "rules", "anchor.mdc");
|
|
1499
2128
|
checks.push(
|
|
1500
2129
|
check(
|
|
1501
2130
|
"Cursor rule file exists",
|
|
1502
|
-
|
|
1503
|
-
|
|
2131
|
+
fs4.existsSync(rulePath),
|
|
2132
|
+
fs4.existsSync(rulePath) ? "Cursor rule file exists." : "Cursor rule file is missing.",
|
|
1504
2133
|
"Run anchor init to create .cursor/rules/anchor.mdc."
|
|
1505
2134
|
)
|
|
1506
2135
|
);
|
|
@@ -1508,12 +2137,14 @@ async function runDoctor(options) {
|
|
|
1508
2137
|
}
|
|
1509
2138
|
export {
|
|
1510
2139
|
ANCHOR_CURSOR_RULE,
|
|
2140
|
+
DEFAULT_MAX_CODE_FILE_BYTES,
|
|
1511
2141
|
SCHEMA_SQL,
|
|
1512
2142
|
anchorMcpEntry,
|
|
1513
2143
|
buildFtsQuery,
|
|
1514
2144
|
canonicalizeText,
|
|
1515
2145
|
categorizeWisdom,
|
|
1516
2146
|
checkSchema,
|
|
2147
|
+
chunkCodeFile,
|
|
1517
2148
|
chunkHistoricalText,
|
|
1518
2149
|
clampMaxResults,
|
|
1519
2150
|
clipSentence,
|
|
@@ -1521,10 +2152,13 @@ export {
|
|
|
1521
2152
|
defaultDatabasePath,
|
|
1522
2153
|
detectGitHubRepo,
|
|
1523
2154
|
detectGitRoot,
|
|
2155
|
+
discoverCodeFiles,
|
|
2156
|
+
emptyCodeIndexSummary,
|
|
1524
2157
|
ensureAnchorGitExclude,
|
|
1525
2158
|
ensureCursorConfig,
|
|
1526
2159
|
ensureCursorRule,
|
|
1527
2160
|
ensureRepository,
|
|
2161
|
+
extractCodeSymbols,
|
|
1528
2162
|
extractSymbols,
|
|
1529
2163
|
extractWisdomUnits,
|
|
1530
2164
|
fetchMergedPullRequests,
|
|
@@ -1536,16 +2170,22 @@ export {
|
|
|
1536
2170
|
getLastSyncTime,
|
|
1537
2171
|
githubAuthFixMessage,
|
|
1538
2172
|
hasHighSignalLanguage,
|
|
2173
|
+
indexCodebase,
|
|
1539
2174
|
indexPullRequests,
|
|
1540
2175
|
initializeSchema,
|
|
2176
|
+
isHardExcludedCodePath,
|
|
1541
2177
|
mergeAnchorMcpConfig,
|
|
1542
2178
|
normalizePullRequest,
|
|
1543
2179
|
openAnchorDatabase,
|
|
1544
2180
|
parseGitHubRemote,
|
|
2181
|
+
rankCodeChunks,
|
|
1545
2182
|
rankWisdomUnits,
|
|
1546
2183
|
redactSecrets,
|
|
1547
2184
|
redactedHistoricalText,
|
|
2185
|
+
replaceCodeIndex,
|
|
1548
2186
|
resolveGitHubToken,
|
|
2187
|
+
resolvePullRequestDetailConcurrency,
|
|
2188
|
+
resolvePullRequestFetchLimit,
|
|
1549
2189
|
runDoctor,
|
|
1550
2190
|
sanitizeHistoricalText,
|
|
1551
2191
|
shouldSyncSince,
|