@pratik7368patil/anchor-core 0.1.6 → 0.1.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.ts +96 -2
- package/dist/index.js +608 -28
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
- package/src/db/schema.sql +44 -0
package/dist/index.js
CHANGED
|
@@ -334,6 +334,48 @@ CREATE VIRTUAL TABLE IF NOT EXISTS wisdom_units_fts USING fts5(
|
|
|
334
334
|
category
|
|
335
335
|
);
|
|
336
336
|
|
|
337
|
+
CREATE TABLE IF NOT EXISTS code_files (
|
|
338
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
339
|
+
repo_id INTEGER NOT NULL REFERENCES repositories(id) ON DELETE CASCADE,
|
|
340
|
+
path TEXT NOT NULL,
|
|
341
|
+
language TEXT,
|
|
342
|
+
size_bytes INTEGER NOT NULL,
|
|
343
|
+
content_hash TEXT NOT NULL,
|
|
344
|
+
updated_at TEXT NOT NULL,
|
|
345
|
+
UNIQUE(repo_id, path)
|
|
346
|
+
);
|
|
347
|
+
|
|
348
|
+
CREATE TABLE IF NOT EXISTS code_chunks (
|
|
349
|
+
id TEXT PRIMARY KEY,
|
|
350
|
+
repo_id INTEGER NOT NULL REFERENCES repositories(id) ON DELETE CASCADE,
|
|
351
|
+
file_id INTEGER NOT NULL REFERENCES code_files(id) ON DELETE CASCADE,
|
|
352
|
+
repo TEXT NOT NULL,
|
|
353
|
+
file_path TEXT NOT NULL,
|
|
354
|
+
language TEXT,
|
|
355
|
+
start_line INTEGER NOT NULL,
|
|
356
|
+
end_line INTEGER NOT NULL,
|
|
357
|
+
sanitized_text TEXT NOT NULL,
|
|
358
|
+
symbols_json TEXT NOT NULL,
|
|
359
|
+
content_hash TEXT NOT NULL,
|
|
360
|
+
updated_at TEXT NOT NULL
|
|
361
|
+
);
|
|
362
|
+
|
|
363
|
+
CREATE VIRTUAL TABLE IF NOT EXISTS code_chunks_fts USING fts5(
|
|
364
|
+
chunkId UNINDEXED,
|
|
365
|
+
sanitizedText,
|
|
366
|
+
filePath,
|
|
367
|
+
symbols,
|
|
368
|
+
language
|
|
369
|
+
);
|
|
370
|
+
|
|
371
|
+
CREATE TABLE IF NOT EXISTS code_index_state (
|
|
372
|
+
repo TEXT PRIMARY KEY,
|
|
373
|
+
last_indexed_at TEXT NOT NULL,
|
|
374
|
+
indexed_files INTEGER NOT NULL,
|
|
375
|
+
code_chunks INTEGER NOT NULL,
|
|
376
|
+
skipped_files INTEGER NOT NULL
|
|
377
|
+
);
|
|
378
|
+
|
|
337
379
|
CREATE TABLE IF NOT EXISTS sync_state (
|
|
338
380
|
repo TEXT PRIMARY KEY,
|
|
339
381
|
last_sync_at TEXT,
|
|
@@ -346,6 +388,8 @@ CREATE INDEX IF NOT EXISTS idx_pr_files_path ON pr_files(path);
|
|
|
346
388
|
CREATE INDEX IF NOT EXISTS idx_pr_comments_source ON pr_comments(source_type);
|
|
347
389
|
CREATE INDEX IF NOT EXISTS idx_wisdom_units_category ON wisdom_units(category);
|
|
348
390
|
CREATE INDEX IF NOT EXISTS idx_wisdom_units_pr ON wisdom_units(pr_id);
|
|
391
|
+
CREATE INDEX IF NOT EXISTS idx_code_files_path ON code_files(path);
|
|
392
|
+
CREATE INDEX IF NOT EXISTS idx_code_chunks_file_path ON code_chunks(file_path);
|
|
349
393
|
`;
|
|
350
394
|
|
|
351
395
|
// src/db/database.ts
|
|
@@ -365,8 +409,10 @@ function initializeSchema(db) {
|
|
|
365
409
|
function checkSchema(db) {
|
|
366
410
|
try {
|
|
367
411
|
const tables = db.prepare("SELECT name FROM sqlite_master WHERE type IN ('table', 'virtual') AND name = ?").all("wisdom_units_fts");
|
|
412
|
+
const codeTables = db.prepare("SELECT name FROM sqlite_master WHERE type IN ('table', 'virtual') AND name = ?").all("code_chunks_fts");
|
|
368
413
|
const wisdom = db.prepare("SELECT name FROM sqlite_master WHERE name = ?").all("wisdom_units");
|
|
369
|
-
|
|
414
|
+
const code = db.prepare("SELECT name FROM sqlite_master WHERE name = ?").all("code_chunks");
|
|
415
|
+
return tables.length > 0 && wisdom.length > 0 && codeTables.length > 0 && code.length > 0;
|
|
370
416
|
} catch {
|
|
371
417
|
return false;
|
|
372
418
|
}
|
|
@@ -543,6 +589,87 @@ function upsertPullRequest(db, pr, wisdomUnits) {
|
|
|
543
589
|
const comments = (pr.reviews?.length ?? 0) + (pr.reviewComments?.length ?? 0) + (pr.issueComments?.length ?? 0);
|
|
544
590
|
return { files: pr.files.length, comments, wisdom: wisdomUnits.length };
|
|
545
591
|
}
|
|
592
|
+
function replaceCodeIndex(db, repo, codeFiles, codeChunks, skippedFiles, cwd) {
|
|
593
|
+
initializeSchema(db);
|
|
594
|
+
const repoId = ensureRepository(db, repo);
|
|
595
|
+
const now = (/* @__PURE__ */ new Date()).toISOString();
|
|
596
|
+
const transaction = db.transaction(() => {
|
|
597
|
+
const existingChunks = db.prepare("SELECT id FROM code_chunks WHERE repo_id = ?").all(repoId);
|
|
598
|
+
const deleteFts = db.prepare("DELETE FROM code_chunks_fts WHERE chunkId = ?");
|
|
599
|
+
for (const row of existingChunks) deleteFts.run(row.id);
|
|
600
|
+
db.prepare("DELETE FROM code_chunks WHERE repo_id = ?").run(repoId);
|
|
601
|
+
db.prepare("DELETE FROM code_files WHERE repo_id = ?").run(repoId);
|
|
602
|
+
const insertFile = db.prepare(
|
|
603
|
+
`INSERT INTO code_files
|
|
604
|
+
(repo_id, path, language, size_bytes, content_hash, updated_at)
|
|
605
|
+
VALUES (?, ?, ?, ?, ?, ?)`
|
|
606
|
+
);
|
|
607
|
+
for (const file of codeFiles) {
|
|
608
|
+
insertFile.run(
|
|
609
|
+
repoId,
|
|
610
|
+
file.path,
|
|
611
|
+
file.language ?? null,
|
|
612
|
+
file.sizeBytes,
|
|
613
|
+
file.contentHash,
|
|
614
|
+
file.updatedAt
|
|
615
|
+
);
|
|
616
|
+
}
|
|
617
|
+
const fileRows = db.prepare("SELECT id, path FROM code_files WHERE repo_id = ?").all(repoId);
|
|
618
|
+
const fileIds = new Map(fileRows.map((row) => [row.path, row.id]));
|
|
619
|
+
const insertChunk = db.prepare(
|
|
620
|
+
`INSERT INTO code_chunks
|
|
621
|
+
(id, repo_id, file_id, repo, file_path, language, start_line, end_line, sanitized_text,
|
|
622
|
+
symbols_json, content_hash, updated_at)
|
|
623
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`
|
|
624
|
+
);
|
|
625
|
+
const insertFts = db.prepare(
|
|
626
|
+
`INSERT INTO code_chunks_fts
|
|
627
|
+
(chunkId, sanitizedText, filePath, symbols, language)
|
|
628
|
+
VALUES (?, ?, ?, ?, ?)`
|
|
629
|
+
);
|
|
630
|
+
for (const chunk of codeChunks) {
|
|
631
|
+
const fileId = fileIds.get(chunk.filePath);
|
|
632
|
+
if (!fileId) continue;
|
|
633
|
+
insertChunk.run(
|
|
634
|
+
chunk.id,
|
|
635
|
+
repoId,
|
|
636
|
+
fileId,
|
|
637
|
+
chunk.repo,
|
|
638
|
+
chunk.filePath,
|
|
639
|
+
chunk.language ?? null,
|
|
640
|
+
chunk.startLine,
|
|
641
|
+
chunk.endLine,
|
|
642
|
+
chunk.sanitizedText,
|
|
643
|
+
JSON.stringify(chunk.symbols),
|
|
644
|
+
chunk.contentHash,
|
|
645
|
+
chunk.updatedAt
|
|
646
|
+
);
|
|
647
|
+
insertFts.run(
|
|
648
|
+
chunk.id,
|
|
649
|
+
chunk.sanitizedText,
|
|
650
|
+
chunk.filePath,
|
|
651
|
+
chunk.symbols.join(" "),
|
|
652
|
+
chunk.language ?? ""
|
|
653
|
+
);
|
|
654
|
+
}
|
|
655
|
+
db.prepare(
|
|
656
|
+
`INSERT INTO code_index_state (repo, last_indexed_at, indexed_files, code_chunks, skipped_files)
|
|
657
|
+
VALUES (?, ?, ?, ?, ?)
|
|
658
|
+
ON CONFLICT(repo) DO UPDATE SET
|
|
659
|
+
last_indexed_at = excluded.last_indexed_at,
|
|
660
|
+
indexed_files = excluded.indexed_files,
|
|
661
|
+
code_chunks = excluded.code_chunks,
|
|
662
|
+
skipped_files = excluded.skipped_files`
|
|
663
|
+
).run(repo, now, codeFiles.length, codeChunks.length, skippedFiles);
|
|
664
|
+
});
|
|
665
|
+
transaction();
|
|
666
|
+
return {
|
|
667
|
+
indexedFiles: codeFiles.length,
|
|
668
|
+
codeChunksCreated: codeChunks.length,
|
|
669
|
+
skippedFiles,
|
|
670
|
+
databasePath: defaultDatabasePath(cwd)
|
|
671
|
+
};
|
|
672
|
+
}
|
|
546
673
|
function getIndexStatus(cwd, githubTokenConfigured = Boolean(resolveGitHubToken({ cwd }).token), databasePath = defaultDatabasePath(cwd)) {
|
|
547
674
|
if (!fs2.existsSync(databasePath)) {
|
|
548
675
|
return {
|
|
@@ -551,12 +678,15 @@ function getIndexStatus(cwd, githubTokenConfigured = Boolean(resolveGitHubToken(
|
|
|
551
678
|
fileCount: 0,
|
|
552
679
|
commentCount: 0,
|
|
553
680
|
wisdomUnitCount: 0,
|
|
681
|
+
codeFileCount: 0,
|
|
682
|
+
codeChunkCount: 0,
|
|
554
683
|
githubTokenConfigured,
|
|
555
684
|
health: "missing_database"
|
|
556
685
|
};
|
|
557
686
|
}
|
|
558
687
|
const db = openAnchorDatabase(cwd, databasePath);
|
|
559
688
|
try {
|
|
689
|
+
initializeSchema(db);
|
|
560
690
|
if (!checkSchema(db)) {
|
|
561
691
|
return {
|
|
562
692
|
databasePath,
|
|
@@ -564,6 +694,8 @@ function getIndexStatus(cwd, githubTokenConfigured = Boolean(resolveGitHubToken(
|
|
|
564
694
|
fileCount: 0,
|
|
565
695
|
commentCount: 0,
|
|
566
696
|
wisdomUnitCount: 0,
|
|
697
|
+
codeFileCount: 0,
|
|
698
|
+
codeChunkCount: 0,
|
|
567
699
|
githubTokenConfigured,
|
|
568
700
|
health: "schema_invalid"
|
|
569
701
|
};
|
|
@@ -571,7 +703,9 @@ function getIndexStatus(cwd, githubTokenConfigured = Boolean(resolveGitHubToken(
|
|
|
571
703
|
const count = (table) => db.prepare(`SELECT COUNT(*) AS count FROM ${table}`).get().count;
|
|
572
704
|
const repoRow = db.prepare("SELECT full_name FROM repositories ORDER BY id LIMIT 1").get();
|
|
573
705
|
const syncRow = db.prepare("SELECT last_sync_at FROM sync_state ORDER BY updated_at DESC LIMIT 1").get();
|
|
706
|
+
const codeIndexRow = db.prepare("SELECT last_indexed_at FROM code_index_state ORDER BY last_indexed_at DESC LIMIT 1").get();
|
|
574
707
|
const wisdomUnitCount = count("wisdom_units");
|
|
708
|
+
const codeChunkCount = count("code_chunks");
|
|
575
709
|
return {
|
|
576
710
|
repo: repoRow?.full_name,
|
|
577
711
|
databasePath,
|
|
@@ -579,9 +713,12 @@ function getIndexStatus(cwd, githubTokenConfigured = Boolean(resolveGitHubToken(
|
|
|
579
713
|
fileCount: count("pr_files"),
|
|
580
714
|
commentCount: count("pr_comments"),
|
|
581
715
|
wisdomUnitCount,
|
|
716
|
+
codeFileCount: count("code_files"),
|
|
717
|
+
codeChunkCount,
|
|
582
718
|
lastSyncTime: syncRow?.last_sync_at ?? void 0,
|
|
719
|
+
lastCodeIndexTime: codeIndexRow?.last_indexed_at ?? void 0,
|
|
583
720
|
githubTokenConfigured,
|
|
584
|
-
health: wisdomUnitCount > 0 ? "ok" : "empty_index"
|
|
721
|
+
health: wisdomUnitCount > 0 || codeChunkCount > 0 ? "ok" : "empty_index"
|
|
585
722
|
};
|
|
586
723
|
} finally {
|
|
587
724
|
db.close();
|
|
@@ -618,9 +755,260 @@ function chunkHistoricalText(text, maxChunkLength = 700) {
|
|
|
618
755
|
return expanded.filter((chunk) => chunk.length >= 12 && hasHighSignalLanguage(chunk));
|
|
619
756
|
}
|
|
620
757
|
|
|
621
|
-
// src/indexer/
|
|
758
|
+
// src/indexer/code-chunker.ts
|
|
622
759
|
import crypto from "crypto";
|
|
623
760
|
import path3 from "path";
|
|
761
|
+
var DEFAULT_CHUNK_LINES = 80;
|
|
762
|
+
var DEFAULT_OVERLAP_LINES = 8;
|
|
763
|
+
var FUNCTION_CALL_STOP_WORDS = /* @__PURE__ */ new Set([
|
|
764
|
+
"catch",
|
|
765
|
+
"describe",
|
|
766
|
+
"for",
|
|
767
|
+
"if",
|
|
768
|
+
"it",
|
|
769
|
+
"return",
|
|
770
|
+
"switch",
|
|
771
|
+
"test",
|
|
772
|
+
"while"
|
|
773
|
+
]);
|
|
774
|
+
function stableCodeChunkId(file, startLine, endLine) {
|
|
775
|
+
const hash = crypto.createHash("sha256").update([file.repo, file.path, file.contentHash, startLine, endLine].join("\0")).digest("hex").slice(0, 24);
|
|
776
|
+
return `cc_${hash}`;
|
|
777
|
+
}
|
|
778
|
+
function extractCodeSymbols(text, filePath) {
|
|
779
|
+
const symbols = [];
|
|
780
|
+
const declarations = text.matchAll(
|
|
781
|
+
/\b(?:export\s+)?(?:async\s+)?(?:class|function|interface|type|enum|const|let|var)\s+([A-Za-z_$][\w$]*)/g
|
|
782
|
+
);
|
|
783
|
+
for (const match of declarations) symbols.push(match[1] ?? "");
|
|
784
|
+
const objectMethods = text.matchAll(
|
|
785
|
+
/\b([A-Za-z_$][\w$]{2,})\s*[:=]\s*(?:async\s*)?\([^)]*\)\s*=>/g
|
|
786
|
+
);
|
|
787
|
+
for (const match of objectMethods) symbols.push(match[1] ?? "");
|
|
788
|
+
const calls = text.matchAll(/\b([A-Za-z_$][\w$]{2,})\s*\(/g);
|
|
789
|
+
for (const match of calls) {
|
|
790
|
+
const candidate = match[1] ?? "";
|
|
791
|
+
if (!FUNCTION_CALL_STOP_WORDS.has(candidate)) symbols.push(candidate);
|
|
792
|
+
}
|
|
793
|
+
const basename = path3.basename(filePath).replace(/\.[^.]+$/, "");
|
|
794
|
+
if (/^[A-Za-z_$][\w$-]*$/.test(basename)) symbols.push(basename);
|
|
795
|
+
return uniqueStrings(symbols).slice(0, 40);
|
|
796
|
+
}
|
|
797
|
+
function chunkCodeFile(file, options = {}) {
|
|
798
|
+
const chunkLines = options.chunkLines ?? DEFAULT_CHUNK_LINES;
|
|
799
|
+
const overlapLines = Math.max(
|
|
800
|
+
0,
|
|
801
|
+
Math.min(options.overlapLines ?? DEFAULT_OVERLAP_LINES, chunkLines - 1)
|
|
802
|
+
);
|
|
803
|
+
const lines = file.content.replace(/\r\n/g, "\n").split("\n");
|
|
804
|
+
const chunks = [];
|
|
805
|
+
for (let startIndex = 0; startIndex < lines.length; ) {
|
|
806
|
+
const endIndex = Math.min(lines.length, startIndex + chunkLines);
|
|
807
|
+
const rawText = lines.slice(startIndex, endIndex).join("\n");
|
|
808
|
+
const sanitizedText = sanitizeHistoricalText(rawText);
|
|
809
|
+
if (sanitizedText) {
|
|
810
|
+
chunks.push({
|
|
811
|
+
id: stableCodeChunkId(file, startIndex + 1, endIndex),
|
|
812
|
+
repo: file.repo,
|
|
813
|
+
filePath: file.path,
|
|
814
|
+
language: file.language,
|
|
815
|
+
startLine: startIndex + 1,
|
|
816
|
+
endLine: endIndex,
|
|
817
|
+
sanitizedText,
|
|
818
|
+
symbols: extractCodeSymbols(sanitizedText, file.path),
|
|
819
|
+
contentHash: file.contentHash,
|
|
820
|
+
updatedAt: file.updatedAt
|
|
821
|
+
});
|
|
822
|
+
}
|
|
823
|
+
if (endIndex >= lines.length) break;
|
|
824
|
+
startIndex = Math.max(startIndex + 1, endIndex - overlapLines);
|
|
825
|
+
}
|
|
826
|
+
return chunks;
|
|
827
|
+
}
|
|
828
|
+
|
|
829
|
+
// src/indexer/code-file-discovery.ts
|
|
830
|
+
import { execFileSync as execFileSync3 } from "child_process";
|
|
831
|
+
import crypto2 from "crypto";
|
|
832
|
+
import fs3 from "fs";
|
|
833
|
+
import path4 from "path";
|
|
834
|
+
var DEFAULT_MAX_CODE_FILE_BYTES = 512 * 1024;
|
|
835
|
+
var HARD_EXCLUDED_SEGMENTS = /* @__PURE__ */ new Set([
|
|
836
|
+
".git",
|
|
837
|
+
".anchor",
|
|
838
|
+
".cursor",
|
|
839
|
+
".codex",
|
|
840
|
+
".aws",
|
|
841
|
+
".ssh",
|
|
842
|
+
"node_modules",
|
|
843
|
+
".nuxt",
|
|
844
|
+
".next",
|
|
845
|
+
"dist",
|
|
846
|
+
"build",
|
|
847
|
+
"coverage",
|
|
848
|
+
".turbo"
|
|
849
|
+
]);
|
|
850
|
+
var LANGUAGE_BY_EXTENSION = {
|
|
851
|
+
".cjs": "javascript",
|
|
852
|
+
".css": "css",
|
|
853
|
+
".go": "go",
|
|
854
|
+
".html": "html",
|
|
855
|
+
".java": "java",
|
|
856
|
+
".js": "javascript",
|
|
857
|
+
".json": "json",
|
|
858
|
+
".jsx": "javascript",
|
|
859
|
+
".md": "markdown",
|
|
860
|
+
".mjs": "javascript",
|
|
861
|
+
".py": "python",
|
|
862
|
+
".rb": "ruby",
|
|
863
|
+
".rs": "rust",
|
|
864
|
+
".scss": "scss",
|
|
865
|
+
".sh": "shell",
|
|
866
|
+
".sql": "sql",
|
|
867
|
+
".svelte": "svelte",
|
|
868
|
+
".ts": "typescript",
|
|
869
|
+
".tsx": "typescript",
|
|
870
|
+
".vue": "vue",
|
|
871
|
+
".yaml": "yaml",
|
|
872
|
+
".yml": "yaml"
|
|
873
|
+
};
|
|
874
|
+
function normalizeGitPath(value) {
|
|
875
|
+
return value.replace(/\\/g, "/").replace(/^\.\/+/, "");
|
|
876
|
+
}
|
|
877
|
+
function isHardExcludedCodePath(filePath) {
|
|
878
|
+
const normalized = normalizeGitPath(filePath);
|
|
879
|
+
const segments = normalized.split("/");
|
|
880
|
+
if (segments.some((segment) => HARD_EXCLUDED_SEGMENTS.has(segment))) return true;
|
|
881
|
+
const basename = path4.posix.basename(normalized).toLowerCase();
|
|
882
|
+
if ([".netrc", ".npmrc", ".pypirc", ".yarnrc"].includes(basename)) return true;
|
|
883
|
+
if (basename === ".env" || basename.startsWith(".env.")) return true;
|
|
884
|
+
if (basename === "id_rsa" || basename === "id_rsa.pub" || basename === "id_dsa" || basename === "id_ecdsa" || basename === "id_ed25519") {
|
|
885
|
+
return true;
|
|
886
|
+
}
|
|
887
|
+
if (/\.(pem|key|p12|pfx)$/i.test(basename)) return true;
|
|
888
|
+
return false;
|
|
889
|
+
}
|
|
890
|
+
function languageForPath(filePath) {
|
|
891
|
+
const extension = path4.extname(filePath).toLowerCase();
|
|
892
|
+
return LANGUAGE_BY_EXTENSION[extension];
|
|
893
|
+
}
|
|
894
|
+
function isProbablyBinary(buffer) {
|
|
895
|
+
if (buffer.includes(0)) return true;
|
|
896
|
+
if (buffer.length === 0) return false;
|
|
897
|
+
let suspicious = 0;
|
|
898
|
+
for (const byte of buffer) {
|
|
899
|
+
const isAllowedControl = byte === 9 || byte === 10 || byte === 13;
|
|
900
|
+
if (byte < 32 && !isAllowedControl) suspicious += 1;
|
|
901
|
+
}
|
|
902
|
+
return suspicious / buffer.length > 0.01;
|
|
903
|
+
}
|
|
904
|
+
function discoverGitFiles(cwd) {
|
|
905
|
+
const output = execFileSync3("git", ["ls-files", "--cached", "--others", "--exclude-standard"], {
|
|
906
|
+
cwd,
|
|
907
|
+
encoding: "utf8",
|
|
908
|
+
stdio: ["ignore", "pipe", "pipe"]
|
|
909
|
+
});
|
|
910
|
+
return output.split("\n").map((line) => normalizeGitPath(line.trim())).filter(Boolean);
|
|
911
|
+
}
|
|
912
|
+
function discoverCodeFiles(cwd, repo, options = {}) {
|
|
913
|
+
const maxFileBytes = options.maxFileBytes ?? DEFAULT_MAX_CODE_FILE_BYTES;
|
|
914
|
+
const rootPath = path4.resolve(cwd);
|
|
915
|
+
const files = [];
|
|
916
|
+
let skippedFiles = 0;
|
|
917
|
+
for (const filePath of discoverGitFiles(cwd)) {
|
|
918
|
+
if (isHardExcludedCodePath(filePath)) {
|
|
919
|
+
skippedFiles += 1;
|
|
920
|
+
continue;
|
|
921
|
+
}
|
|
922
|
+
const absolutePath = path4.resolve(cwd, filePath);
|
|
923
|
+
const relativeToRoot = path4.relative(rootPath, absolutePath);
|
|
924
|
+
if (relativeToRoot.startsWith("..") || path4.isAbsolute(relativeToRoot)) {
|
|
925
|
+
skippedFiles += 1;
|
|
926
|
+
continue;
|
|
927
|
+
}
|
|
928
|
+
let stat;
|
|
929
|
+
try {
|
|
930
|
+
stat = fs3.statSync(absolutePath);
|
|
931
|
+
} catch {
|
|
932
|
+
skippedFiles += 1;
|
|
933
|
+
continue;
|
|
934
|
+
}
|
|
935
|
+
if (!stat.isFile() || stat.size > maxFileBytes) {
|
|
936
|
+
skippedFiles += 1;
|
|
937
|
+
continue;
|
|
938
|
+
}
|
|
939
|
+
const buffer = fs3.readFileSync(absolutePath);
|
|
940
|
+
if (isProbablyBinary(buffer)) {
|
|
941
|
+
skippedFiles += 1;
|
|
942
|
+
continue;
|
|
943
|
+
}
|
|
944
|
+
const content = buffer.toString("utf8");
|
|
945
|
+
files.push({
|
|
946
|
+
repo,
|
|
947
|
+
path: filePath,
|
|
948
|
+
language: languageForPath(filePath),
|
|
949
|
+
sizeBytes: stat.size,
|
|
950
|
+
contentHash: crypto2.createHash("sha256").update(buffer).digest("hex"),
|
|
951
|
+
updatedAt: stat.mtime.toISOString(),
|
|
952
|
+
absolutePath,
|
|
953
|
+
content
|
|
954
|
+
});
|
|
955
|
+
}
|
|
956
|
+
return { files, skippedFiles };
|
|
957
|
+
}
|
|
958
|
+
|
|
959
|
+
// src/indexer/code-indexer.ts
|
|
960
|
+
function indexCodebase(db, options) {
|
|
961
|
+
options.onProgress?.({ stage: "discovering_code_files", repo: options.repo });
|
|
962
|
+
const discovery = discoverCodeFiles(options.cwd, options.repo, {
|
|
963
|
+
maxFileBytes: options.maxFileBytes
|
|
964
|
+
});
|
|
965
|
+
options.onProgress?.({
|
|
966
|
+
stage: "discovered_code_files",
|
|
967
|
+
repo: options.repo,
|
|
968
|
+
files: discovery.files.length,
|
|
969
|
+
skippedFiles: discovery.skippedFiles
|
|
970
|
+
});
|
|
971
|
+
const chunks = [];
|
|
972
|
+
for (const [index, file] of discovery.files.entries()) {
|
|
973
|
+
options.onProgress?.({
|
|
974
|
+
stage: "indexing_code_file",
|
|
975
|
+
repo: options.repo,
|
|
976
|
+
current: index + 1,
|
|
977
|
+
total: discovery.files.length,
|
|
978
|
+
filePath: file.path
|
|
979
|
+
});
|
|
980
|
+
const fileChunks = chunkCodeFile(file);
|
|
981
|
+
chunks.push(...fileChunks);
|
|
982
|
+
options.onProgress?.({
|
|
983
|
+
stage: "indexed_code_file",
|
|
984
|
+
repo: options.repo,
|
|
985
|
+
current: index + 1,
|
|
986
|
+
total: discovery.files.length,
|
|
987
|
+
filePath: file.path,
|
|
988
|
+
chunks: fileChunks.length
|
|
989
|
+
});
|
|
990
|
+
}
|
|
991
|
+
return replaceCodeIndex(
|
|
992
|
+
db,
|
|
993
|
+
options.repo,
|
|
994
|
+
discovery.files.map(({ content: _content, absolutePath: _absolutePath, ...file }) => file),
|
|
995
|
+
chunks,
|
|
996
|
+
discovery.skippedFiles,
|
|
997
|
+
options.cwd
|
|
998
|
+
);
|
|
999
|
+
}
|
|
1000
|
+
function emptyCodeIndexSummary(cwd) {
|
|
1001
|
+
return {
|
|
1002
|
+
indexedFiles: 0,
|
|
1003
|
+
codeChunksCreated: 0,
|
|
1004
|
+
skippedFiles: 0,
|
|
1005
|
+
databasePath: defaultDatabasePath(cwd)
|
|
1006
|
+
};
|
|
1007
|
+
}
|
|
1008
|
+
|
|
1009
|
+
// src/indexer/wisdom-extractor.ts
|
|
1010
|
+
import crypto3 from "crypto";
|
|
1011
|
+
import path5 from "path";
|
|
624
1012
|
var CATEGORY_KEYWORDS = [
|
|
625
1013
|
["security_note", /\b(security|secret|token|bearer|oauth|credential|xss|csrf|injection|sanitize|redact)\b/i],
|
|
626
1014
|
["architecture_decision", /\b(architecture decision|architectural|we intentionally|design decision)\b/i],
|
|
@@ -652,7 +1040,7 @@ function extractSymbols(text, filePaths) {
|
|
|
652
1040
|
}
|
|
653
1041
|
}
|
|
654
1042
|
for (const filePath of filePaths) {
|
|
655
|
-
const basename =
|
|
1043
|
+
const basename = path5.basename(filePath).replace(/\.[^.]+$/, "");
|
|
656
1044
|
if (/^[A-Za-z_$][\w$]*$/.test(basename)) symbols.push(basename);
|
|
657
1045
|
}
|
|
658
1046
|
return uniqueStrings(symbols).slice(0, 30);
|
|
@@ -676,7 +1064,7 @@ function confidenceFor(entry, text, category, duplicateCount) {
|
|
|
676
1064
|
return Math.max(0, Math.min(1, Number(confidence.toFixed(2))));
|
|
677
1065
|
}
|
|
678
1066
|
function stableWisdomId(pr, sourceType, text, filePaths, createdAt, authors) {
|
|
679
|
-
const hash =
|
|
1067
|
+
const hash = crypto3.createHash("sha256").update(
|
|
680
1068
|
[pr.repo, pr.number, sourceType, canonicalizeText(text), filePaths.join("|"), createdAt, authors.join("|")].join(
|
|
681
1069
|
"\0"
|
|
682
1070
|
)
|
|
@@ -872,7 +1260,7 @@ function shouldSyncSince(db, repo, fallbackSince) {
|
|
|
872
1260
|
}
|
|
873
1261
|
|
|
874
1262
|
// src/retrieval/query-builder.ts
|
|
875
|
-
import
|
|
1263
|
+
import path6 from "path";
|
|
876
1264
|
var CATEGORY_HINTS = [
|
|
877
1265
|
"security",
|
|
878
1266
|
"regression",
|
|
@@ -897,8 +1285,8 @@ function buildFtsQuery(input) {
|
|
|
897
1285
|
const baseText = "task" in input ? input.task : input.query;
|
|
898
1286
|
const fileTerms = files.flatMap((file) => [
|
|
899
1287
|
file,
|
|
900
|
-
|
|
901
|
-
...
|
|
1288
|
+
path6.basename(file),
|
|
1289
|
+
...path6.dirname(file).split(/[\\/]/).filter(Boolean)
|
|
902
1290
|
]);
|
|
903
1291
|
const tokens = uniqueStrings([
|
|
904
1292
|
...tokenizeSearchText(baseText, 24),
|
|
@@ -917,7 +1305,7 @@ function clampMaxResults(value, defaultValue) {
|
|
|
917
1305
|
}
|
|
918
1306
|
|
|
919
1307
|
// src/retrieval/ranker.ts
|
|
920
|
-
import
|
|
1308
|
+
import path7 from "path";
|
|
921
1309
|
function parseJsonArray(value) {
|
|
922
1310
|
try {
|
|
923
1311
|
const parsed = JSON.parse(value);
|
|
@@ -964,11 +1352,11 @@ function filePathMatch(unitPaths, queryFiles) {
|
|
|
964
1352
|
if (queryFiles.length === 0 || unitPaths.length === 0) return 0;
|
|
965
1353
|
let best = 0;
|
|
966
1354
|
for (const queryFile of queryFiles) {
|
|
967
|
-
const queryBase =
|
|
968
|
-
const queryDir =
|
|
1355
|
+
const queryBase = path7.basename(queryFile).toLowerCase();
|
|
1356
|
+
const queryDir = path7.dirname(queryFile).toLowerCase();
|
|
969
1357
|
for (const unitPath of unitPaths) {
|
|
970
|
-
const unitBase =
|
|
971
|
-
const unitDir =
|
|
1358
|
+
const unitBase = path7.basename(unitPath).toLowerCase();
|
|
1359
|
+
const unitDir = path7.dirname(unitPath).toLowerCase();
|
|
972
1360
|
const q = queryFile.toLowerCase();
|
|
973
1361
|
const u = unitPath.toLowerCase();
|
|
974
1362
|
if (q === u) best = Math.max(best, 1);
|
|
@@ -1099,6 +1487,159 @@ function rankWisdomUnits(db, input) {
|
|
|
1099
1487
|
return [...grouped.values()].sort((a, b) => b.score - a.score || b.confidence - a.confidence).slice(0, limit);
|
|
1100
1488
|
}
|
|
1101
1489
|
|
|
1490
|
+
// src/retrieval/code-ranker.ts
|
|
1491
|
+
import path8 from "path";
|
|
1492
|
+
function parseJsonArray2(value) {
|
|
1493
|
+
try {
|
|
1494
|
+
const parsed = JSON.parse(value);
|
|
1495
|
+
return Array.isArray(parsed) ? parsed.filter((item) => typeof item === "string") : [];
|
|
1496
|
+
} catch {
|
|
1497
|
+
return [];
|
|
1498
|
+
}
|
|
1499
|
+
}
|
|
1500
|
+
function rowToCodeChunk(row) {
|
|
1501
|
+
return {
|
|
1502
|
+
id: row.id,
|
|
1503
|
+
repo: row.repo,
|
|
1504
|
+
filePath: row.file_path,
|
|
1505
|
+
language: row.language ?? void 0,
|
|
1506
|
+
startLine: row.start_line,
|
|
1507
|
+
endLine: row.end_line,
|
|
1508
|
+
sanitizedText: row.sanitized_text,
|
|
1509
|
+
symbols: parseJsonArray2(row.symbols_json),
|
|
1510
|
+
contentHash: row.content_hash,
|
|
1511
|
+
updatedAt: row.updated_at,
|
|
1512
|
+
bm25: row.bm25 ?? void 0
|
|
1513
|
+
};
|
|
1514
|
+
}
|
|
1515
|
+
function filePathMatch2(filePath, queryFiles) {
|
|
1516
|
+
if (queryFiles.length === 0) return 0;
|
|
1517
|
+
let best = 0;
|
|
1518
|
+
const unitBase = path8.basename(filePath).toLowerCase();
|
|
1519
|
+
const unitDir = path8.dirname(filePath).toLowerCase();
|
|
1520
|
+
const unit = filePath.toLowerCase();
|
|
1521
|
+
for (const queryFile of queryFiles) {
|
|
1522
|
+
const query = queryFile.toLowerCase();
|
|
1523
|
+
const queryBase = path8.basename(queryFile).toLowerCase();
|
|
1524
|
+
const queryDir = path8.dirname(queryFile).toLowerCase();
|
|
1525
|
+
if (query === unit) best = Math.max(best, 1);
|
|
1526
|
+
else if (queryBase === unitBase) best = Math.max(best, 0.72);
|
|
1527
|
+
else if (queryDir === unitDir) best = Math.max(best, 0.62);
|
|
1528
|
+
else if (unitDir.startsWith(queryDir) || queryDir.startsWith(unitDir))
|
|
1529
|
+
best = Math.max(best, 0.38);
|
|
1530
|
+
else if (queryBase && unitBase && queryBase.split(".")[0] === unitBase.split(".")[0]) {
|
|
1531
|
+
best = Math.max(best, 0.48);
|
|
1532
|
+
}
|
|
1533
|
+
}
|
|
1534
|
+
return best;
|
|
1535
|
+
}
|
|
1536
|
+
function symbolMatch2(chunk, querySymbols) {
|
|
1537
|
+
if (querySymbols.length === 0) return 0;
|
|
1538
|
+
const chunkSymbols = chunk.symbols.map((symbol) => symbol.toLowerCase());
|
|
1539
|
+
const text = chunk.sanitizedText.toLowerCase();
|
|
1540
|
+
let best = 0;
|
|
1541
|
+
for (const symbol of querySymbols) {
|
|
1542
|
+
const lower = symbol.toLowerCase();
|
|
1543
|
+
if (chunkSymbols.includes(lower)) best = Math.max(best, 1);
|
|
1544
|
+
else if (new RegExp(`\\b${escapeRegExp2(lower)}\\b`, "i").test(text)) best = Math.max(best, 0.7);
|
|
1545
|
+
else if (chunkSymbols.some((candidate) => candidate.includes(lower) || lower.includes(candidate))) {
|
|
1546
|
+
best = Math.max(best, 0.42);
|
|
1547
|
+
}
|
|
1548
|
+
}
|
|
1549
|
+
return best;
|
|
1550
|
+
}
|
|
1551
|
+
function textMatch2(chunk, input) {
|
|
1552
|
+
const tokens = tokenizeSearchText(
|
|
1553
|
+
`${input.task} ${input.diff ?? ""} ${input.currentCode ?? ""}`,
|
|
1554
|
+
40
|
|
1555
|
+
);
|
|
1556
|
+
const haystack = `${chunk.sanitizedText} ${chunk.filePath} ${chunk.symbols.join(" ")}`.toLowerCase();
|
|
1557
|
+
const overlap = tokens.length ? tokens.filter((token) => haystack.includes(token.toLowerCase())).length / tokens.length : 0;
|
|
1558
|
+
const bm25Signal = chunk.bm25 === void 0 ? 0 : Math.max(0.25, Math.min(1, 1 / (1 + Math.abs(chunk.bm25))));
|
|
1559
|
+
return Math.max(overlap, bm25Signal);
|
|
1560
|
+
}
|
|
1561
|
+
function recencyScore2(chunk) {
|
|
1562
|
+
const timestamp = Date.parse(chunk.updatedAt);
|
|
1563
|
+
if (Number.isNaN(timestamp)) return 0.25;
|
|
1564
|
+
const ageDays = Math.max(0, (Date.now() - timestamp) / (1e3 * 60 * 60 * 24));
|
|
1565
|
+
if (ageDays < 30) return 1;
|
|
1566
|
+
if (ageDays < 180) return 0.75;
|
|
1567
|
+
if (ageDays < 730) return 0.45;
|
|
1568
|
+
return 0.25;
|
|
1569
|
+
}
|
|
1570
|
+
function escapeRegExp2(value) {
|
|
1571
|
+
return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
1572
|
+
}
|
|
1573
|
+
function escapeLike(value) {
|
|
1574
|
+
return value.replace(/[\\%_]/g, (match) => `\\${match}`);
|
|
1575
|
+
}
|
|
1576
|
+
function loadCodeCandidates(db, input) {
|
|
1577
|
+
const candidates = /* @__PURE__ */ new Map();
|
|
1578
|
+
const ftsQuery = buildFtsQuery(input);
|
|
1579
|
+
if (ftsQuery) {
|
|
1580
|
+
const rows = db.prepare(
|
|
1581
|
+
`SELECT cc.*, bm25(code_chunks_fts) AS bm25
|
|
1582
|
+
FROM code_chunks_fts
|
|
1583
|
+
JOIN code_chunks cc ON cc.id = code_chunks_fts.chunkId
|
|
1584
|
+
WHERE code_chunks_fts MATCH ?
|
|
1585
|
+
ORDER BY bm25(code_chunks_fts)
|
|
1586
|
+
LIMIT 150`
|
|
1587
|
+
).all(ftsQuery);
|
|
1588
|
+
for (const row of rows) {
|
|
1589
|
+
const chunk = rowToCodeChunk(row);
|
|
1590
|
+
candidates.set(chunk.id, chunk);
|
|
1591
|
+
}
|
|
1592
|
+
}
|
|
1593
|
+
for (const file of input.files ?? []) {
|
|
1594
|
+
const basename = path8.basename(file);
|
|
1595
|
+
const rows = db.prepare(
|
|
1596
|
+
`SELECT cc.*, NULL AS bm25
|
|
1597
|
+
FROM code_chunks cc
|
|
1598
|
+
WHERE cc.file_path = ?
|
|
1599
|
+
OR cc.file_path LIKE ? ESCAPE '\\'
|
|
1600
|
+
LIMIT 80`
|
|
1601
|
+
).all(file, `%/${escapeLike(basename)}`);
|
|
1602
|
+
for (const row of rows) {
|
|
1603
|
+
const chunk = rowToCodeChunk(row);
|
|
1604
|
+
candidates.set(chunk.id, { ...chunk, bm25: candidates.get(chunk.id)?.bm25 ?? chunk.bm25 });
|
|
1605
|
+
}
|
|
1606
|
+
}
|
|
1607
|
+
if (candidates.size === 0) {
|
|
1608
|
+
const rows = db.prepare(
|
|
1609
|
+
`SELECT cc.*, NULL AS bm25
|
|
1610
|
+
FROM code_chunks cc
|
|
1611
|
+
ORDER BY updated_at DESC
|
|
1612
|
+
LIMIT 80`
|
|
1613
|
+
).all();
|
|
1614
|
+
for (const row of rows) {
|
|
1615
|
+
const chunk = rowToCodeChunk(row);
|
|
1616
|
+
candidates.set(chunk.id, chunk);
|
|
1617
|
+
}
|
|
1618
|
+
}
|
|
1619
|
+
return [...candidates.values()];
|
|
1620
|
+
}
|
|
1621
|
+
function rankCodeChunks(db, input) {
|
|
1622
|
+
const queryFiles = input.files ?? [];
|
|
1623
|
+
const querySymbols = input.symbols ?? [];
|
|
1624
|
+
const ranked = loadCodeCandidates(db, input).map((chunk) => {
|
|
1625
|
+
const parts = {
|
|
1626
|
+
filePathMatch: filePathMatch2(chunk.filePath, queryFiles),
|
|
1627
|
+
symbolMatch: symbolMatch2(chunk, querySymbols),
|
|
1628
|
+
textMatch: textMatch2(chunk, input),
|
|
1629
|
+
recency: recencyScore2(chunk)
|
|
1630
|
+
};
|
|
1631
|
+
const score = 0.4 * parts.filePathMatch + 0.25 * parts.symbolMatch + 0.25 * parts.textMatch + 0.1 * parts.recency;
|
|
1632
|
+
return {
|
|
1633
|
+
...chunk,
|
|
1634
|
+
symbols: uniqueStrings(chunk.symbols),
|
|
1635
|
+
score: Number(score.toFixed(4)),
|
|
1636
|
+
scoreParts: parts
|
|
1637
|
+
};
|
|
1638
|
+
}).sort((a, b) => b.score - a.score || b.startLine - a.startLine);
|
|
1639
|
+
const limit = Math.min(5, clampMaxResults(input.maxResults, 5));
|
|
1640
|
+
return ranked.slice(0, limit);
|
|
1641
|
+
}
|
|
1642
|
+
|
|
1102
1643
|
// src/retrieval/formatter.ts
|
|
1103
1644
|
function evidenceLine(unit) {
|
|
1104
1645
|
const author = unit.authors[0] ? ` by @${unit.authors[0]}` : "";
|
|
@@ -1125,14 +1666,20 @@ function whyItMatters(unit, input) {
|
|
|
1125
1666
|
function riskLines(units) {
|
|
1126
1667
|
const risks = /* @__PURE__ */ new Set();
|
|
1127
1668
|
for (const unit of units) {
|
|
1128
|
-
if (unit.category === "security_note")
|
|
1129
|
-
|
|
1130
|
-
if (unit.category === "
|
|
1131
|
-
|
|
1669
|
+
if (unit.category === "security_note")
|
|
1670
|
+
risks.add("Avoid logging, exposing, or weakening security-sensitive values.");
|
|
1671
|
+
if (unit.category === "bug_regression")
|
|
1672
|
+
risks.add("Check for regressions similar to the cited PR history.");
|
|
1673
|
+
if (unit.category === "api_contract")
|
|
1674
|
+
risks.add("Preserve documented API and backward-compatibility contracts.");
|
|
1675
|
+
if (unit.category === "constraint")
|
|
1676
|
+
risks.add(
|
|
1677
|
+
"Do not remove constraints without verifying the original reason no longer applies."
|
|
1678
|
+
);
|
|
1132
1679
|
}
|
|
1133
1680
|
return [...risks].slice(0, 4);
|
|
1134
1681
|
}
|
|
1135
|
-
function formatAnchorContext(units, input) {
|
|
1682
|
+
function formatAnchorContext(units, input, codeChunks = []) {
|
|
1136
1683
|
const lines = ["# Anchor Context", "", "## Must know", ""];
|
|
1137
1684
|
if (units.length === 0) {
|
|
1138
1685
|
lines.push("No directly relevant indexed PR history found.", "");
|
|
@@ -1146,6 +1693,18 @@ function formatAnchorContext(units, input) {
|
|
|
1146
1693
|
lines.push("");
|
|
1147
1694
|
});
|
|
1148
1695
|
}
|
|
1696
|
+
lines.push("## Codebase Evidence", "");
|
|
1697
|
+
if (codeChunks.length === 0) {
|
|
1698
|
+
lines.push("No directly relevant indexed codebase context found.", "");
|
|
1699
|
+
} else {
|
|
1700
|
+
codeChunks.forEach((chunk, index) => {
|
|
1701
|
+
const symbols = chunk.symbols.length ? `; symbols: ${chunk.symbols.slice(0, 6).join(", ")}` : "";
|
|
1702
|
+
lines.push(`${index + 1}. ${chunk.filePath}:${chunk.startLine}-${chunk.endLine}${symbols}`);
|
|
1703
|
+
lines.push(` Why it matters: Current code near this match may affect the requested edit.`);
|
|
1704
|
+
lines.push(` Snippet: ${clipSentence(chunk.sanitizedText, 260)}`);
|
|
1705
|
+
lines.push("");
|
|
1706
|
+
});
|
|
1707
|
+
}
|
|
1149
1708
|
lines.push("## Risks", "");
|
|
1150
1709
|
const risks = riskLines(units);
|
|
1151
1710
|
if (risks.length === 0) {
|
|
@@ -1172,6 +1731,15 @@ function formatAnchorContext(units, input) {
|
|
|
1172
1731
|
filePaths: unit.filePaths,
|
|
1173
1732
|
symbols: unit.symbols,
|
|
1174
1733
|
duplicateCount: unit.duplicateCount
|
|
1734
|
+
})),
|
|
1735
|
+
codeEvidence: codeChunks.map((chunk) => ({
|
|
1736
|
+
id: chunk.id,
|
|
1737
|
+
score: chunk.score,
|
|
1738
|
+
filePath: chunk.filePath,
|
|
1739
|
+
language: chunk.language,
|
|
1740
|
+
startLine: chunk.startLine,
|
|
1741
|
+
endLine: chunk.endLine,
|
|
1742
|
+
symbols: chunk.symbols
|
|
1175
1743
|
}))
|
|
1176
1744
|
}
|
|
1177
1745
|
};
|
|
@@ -1220,7 +1788,10 @@ function formatIndexStatus(status) {
|
|
|
1220
1788
|
`- Files: ${status.fileCount}`,
|
|
1221
1789
|
`- Comments: ${status.commentCount}`,
|
|
1222
1790
|
`- Wisdom units: ${status.wisdomUnitCount}`,
|
|
1791
|
+
`- Code files: ${status.codeFileCount}`,
|
|
1792
|
+
`- Code chunks: ${status.codeChunkCount}`,
|
|
1223
1793
|
`- Last sync: ${status.lastSyncTime ?? "never"}`,
|
|
1794
|
+
`- Last code index: ${status.lastCodeIndexTime ?? "never"}`,
|
|
1224
1795
|
`- GitHub token configured: ${status.githubTokenConfigured ? "yes" : "no"}`,
|
|
1225
1796
|
`- Health: ${status.health}`
|
|
1226
1797
|
];
|
|
@@ -1416,8 +1987,8 @@ async function fetchMergedPullRequests(options) {
|
|
|
1416
1987
|
}
|
|
1417
1988
|
|
|
1418
1989
|
// src/doctor.ts
|
|
1419
|
-
import
|
|
1420
|
-
import
|
|
1990
|
+
import fs4 from "fs";
|
|
1991
|
+
import path9 from "path";
|
|
1421
1992
|
function check(name, ok, message, fix) {
|
|
1422
1993
|
return { name, ok, message, fix: ok ? void 0 : fix };
|
|
1423
1994
|
}
|
|
@@ -1478,12 +2049,12 @@ async function runDoctor(options) {
|
|
|
1478
2049
|
)
|
|
1479
2050
|
);
|
|
1480
2051
|
}
|
|
1481
|
-
const cursorConfigPath =
|
|
2052
|
+
const cursorConfigPath = path9.join(gitRoot ?? cwd, ".cursor", "mcp.json");
|
|
1482
2053
|
let cursorConfig;
|
|
1483
2054
|
let cursorConfigValid = false;
|
|
1484
|
-
if (
|
|
2055
|
+
if (fs4.existsSync(cursorConfigPath)) {
|
|
1485
2056
|
try {
|
|
1486
|
-
cursorConfig = JSON.parse(
|
|
2057
|
+
cursorConfig = JSON.parse(fs4.readFileSync(cursorConfigPath, "utf8"));
|
|
1487
2058
|
cursorConfigValid = true;
|
|
1488
2059
|
} catch {
|
|
1489
2060
|
cursorConfigValid = false;
|
|
@@ -1492,7 +2063,7 @@ async function runDoctor(options) {
|
|
|
1492
2063
|
checks.push(
|
|
1493
2064
|
check(
|
|
1494
2065
|
".cursor/mcp.json valid",
|
|
1495
|
-
|
|
2066
|
+
fs4.existsSync(cursorConfigPath) && cursorConfigValid,
|
|
1496
2067
|
cursorConfigValid ? ".cursor/mcp.json exists and is valid JSON." : ".cursor/mcp.json is missing or invalid.",
|
|
1497
2068
|
"Run anchor init. If the file is malformed, fix the JSON and rerun anchor init."
|
|
1498
2069
|
)
|
|
@@ -1509,7 +2080,7 @@ async function runDoctor(options) {
|
|
|
1509
2080
|
)
|
|
1510
2081
|
);
|
|
1511
2082
|
const dbPath = defaultDatabasePath(gitRoot ?? cwd);
|
|
1512
|
-
const dbExists =
|
|
2083
|
+
const dbExists = fs4.existsSync(dbPath);
|
|
1513
2084
|
checks.push(
|
|
1514
2085
|
check(
|
|
1515
2086
|
".anchor/index.sqlite exists",
|
|
@@ -1553,12 +2124,12 @@ async function runDoctor(options) {
|
|
|
1553
2124
|
"Run pnpm build, then try anchor serve from the repository."
|
|
1554
2125
|
)
|
|
1555
2126
|
);
|
|
1556
|
-
const rulePath =
|
|
2127
|
+
const rulePath = path9.join(gitRoot ?? cwd, ".cursor", "rules", "anchor.mdc");
|
|
1557
2128
|
checks.push(
|
|
1558
2129
|
check(
|
|
1559
2130
|
"Cursor rule file exists",
|
|
1560
|
-
|
|
1561
|
-
|
|
2131
|
+
fs4.existsSync(rulePath),
|
|
2132
|
+
fs4.existsSync(rulePath) ? "Cursor rule file exists." : "Cursor rule file is missing.",
|
|
1562
2133
|
"Run anchor init to create .cursor/rules/anchor.mdc."
|
|
1563
2134
|
)
|
|
1564
2135
|
);
|
|
@@ -1566,12 +2137,14 @@ async function runDoctor(options) {
|
|
|
1566
2137
|
}
|
|
1567
2138
|
export {
|
|
1568
2139
|
ANCHOR_CURSOR_RULE,
|
|
2140
|
+
DEFAULT_MAX_CODE_FILE_BYTES,
|
|
1569
2141
|
SCHEMA_SQL,
|
|
1570
2142
|
anchorMcpEntry,
|
|
1571
2143
|
buildFtsQuery,
|
|
1572
2144
|
canonicalizeText,
|
|
1573
2145
|
categorizeWisdom,
|
|
1574
2146
|
checkSchema,
|
|
2147
|
+
chunkCodeFile,
|
|
1575
2148
|
chunkHistoricalText,
|
|
1576
2149
|
clampMaxResults,
|
|
1577
2150
|
clipSentence,
|
|
@@ -1579,10 +2152,13 @@ export {
|
|
|
1579
2152
|
defaultDatabasePath,
|
|
1580
2153
|
detectGitHubRepo,
|
|
1581
2154
|
detectGitRoot,
|
|
2155
|
+
discoverCodeFiles,
|
|
2156
|
+
emptyCodeIndexSummary,
|
|
1582
2157
|
ensureAnchorGitExclude,
|
|
1583
2158
|
ensureCursorConfig,
|
|
1584
2159
|
ensureCursorRule,
|
|
1585
2160
|
ensureRepository,
|
|
2161
|
+
extractCodeSymbols,
|
|
1586
2162
|
extractSymbols,
|
|
1587
2163
|
extractWisdomUnits,
|
|
1588
2164
|
fetchMergedPullRequests,
|
|
@@ -1594,15 +2170,19 @@ export {
|
|
|
1594
2170
|
getLastSyncTime,
|
|
1595
2171
|
githubAuthFixMessage,
|
|
1596
2172
|
hasHighSignalLanguage,
|
|
2173
|
+
indexCodebase,
|
|
1597
2174
|
indexPullRequests,
|
|
1598
2175
|
initializeSchema,
|
|
2176
|
+
isHardExcludedCodePath,
|
|
1599
2177
|
mergeAnchorMcpConfig,
|
|
1600
2178
|
normalizePullRequest,
|
|
1601
2179
|
openAnchorDatabase,
|
|
1602
2180
|
parseGitHubRemote,
|
|
2181
|
+
rankCodeChunks,
|
|
1603
2182
|
rankWisdomUnits,
|
|
1604
2183
|
redactSecrets,
|
|
1605
2184
|
redactedHistoricalText,
|
|
2185
|
+
replaceCodeIndex,
|
|
1606
2186
|
resolveGitHubToken,
|
|
1607
2187
|
resolvePullRequestDetailConcurrency,
|
|
1608
2188
|
resolvePullRequestFetchLimit,
|