opencode-codebase-index 0.1.10 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +108 -36
- package/dist/index.cjs +525 -136
- package/dist/index.cjs.map +1 -1
- package/dist/index.js +527 -138
- package/dist/index.js.map +1 -1
- package/native/codebase-index-native.darwin-arm64.node +0 -0
- package/native/codebase-index-native.darwin-x64.node +0 -0
- package/native/codebase-index-native.linux-arm64-gnu.node +0 -0
- package/native/codebase-index-native.linux-x64-gnu.node +0 -0
- package/native/codebase-index-native.win32-x64-msvc.node +0 -0
- package/package.json +1 -1
package/dist/index.cjs
CHANGED
|
@@ -693,6 +693,8 @@ function getDefaultIndexingConfig() {
|
|
|
693
693
|
autoIndex: false,
|
|
694
694
|
watchFiles: true,
|
|
695
695
|
maxFileSize: 1048576,
|
|
696
|
+
maxChunksPerFile: 100,
|
|
697
|
+
semanticOnly: false,
|
|
696
698
|
retries: 3,
|
|
697
699
|
retryDelayMs: 1e3
|
|
698
700
|
};
|
|
@@ -726,6 +728,8 @@ function parseConfig(raw) {
|
|
|
726
728
|
autoIndex: typeof rawIndexing.autoIndex === "boolean" ? rawIndexing.autoIndex : defaultIndexing.autoIndex,
|
|
727
729
|
watchFiles: typeof rawIndexing.watchFiles === "boolean" ? rawIndexing.watchFiles : defaultIndexing.watchFiles,
|
|
728
730
|
maxFileSize: typeof rawIndexing.maxFileSize === "number" ? rawIndexing.maxFileSize : defaultIndexing.maxFileSize,
|
|
731
|
+
maxChunksPerFile: typeof rawIndexing.maxChunksPerFile === "number" ? Math.max(1, rawIndexing.maxChunksPerFile) : defaultIndexing.maxChunksPerFile,
|
|
732
|
+
semanticOnly: typeof rawIndexing.semanticOnly === "boolean" ? rawIndexing.semanticOnly : defaultIndexing.semanticOnly,
|
|
729
733
|
retries: typeof rawIndexing.retries === "number" ? rawIndexing.retries : defaultIndexing.retries,
|
|
730
734
|
retryDelayMs: typeof rawIndexing.retryDelayMs === "number" ? rawIndexing.retryDelayMs : defaultIndexing.retryDelayMs
|
|
731
735
|
};
|
|
@@ -2189,7 +2193,10 @@ function shouldIncludeFile(filePath, projectRoot, includePatterns, excludePatter
|
|
|
2189
2193
|
return false;
|
|
2190
2194
|
}
|
|
2191
2195
|
function matchGlob(filePath, pattern) {
|
|
2192
|
-
|
|
2196
|
+
let regexPattern = pattern.replace(/\*\*/g, "<<<DOUBLESTAR>>>").replace(/\*/g, "[^/]*").replace(/<<<DOUBLESTAR>>>/g, ".*").replace(/\?/g, ".").replace(/\{([^}]+)\}/g, (_, p1) => `(${p1.split(",").join("|")})`);
|
|
2197
|
+
if (regexPattern.startsWith(".*/")) {
|
|
2198
|
+
regexPattern = `(.*\\/)?${regexPattern.slice(3)}`;
|
|
2199
|
+
}
|
|
2193
2200
|
const regex = new RegExp(`^${regexPattern}$`);
|
|
2194
2201
|
return regex.test(filePath);
|
|
2195
2202
|
}
|
|
@@ -2626,158 +2633,204 @@ function generateChunkId(filePath, chunk) {
|
|
|
2626
2633
|
function generateChunkHash(chunk) {
|
|
2627
2634
|
return hashContent(chunk.content);
|
|
2628
2635
|
}
|
|
2629
|
-
|
|
2630
|
-
// src/indexer/inverted-index.ts
|
|
2631
|
-
var import_fs3 = require("fs");
|
|
2632
|
-
var path4 = __toESM(require("path"), 1);
|
|
2633
2636
|
var InvertedIndex = class {
|
|
2634
|
-
|
|
2635
|
-
termToChunks = /* @__PURE__ */ new Map();
|
|
2636
|
-
chunkTokens = /* @__PURE__ */ new Map();
|
|
2637
|
-
totalTokenCount = 0;
|
|
2637
|
+
inner;
|
|
2638
2638
|
constructor(indexPath) {
|
|
2639
|
-
this.
|
|
2639
|
+
this.inner = new native.InvertedIndex(indexPath);
|
|
2640
2640
|
}
|
|
2641
2641
|
load() {
|
|
2642
|
-
|
|
2643
|
-
return;
|
|
2644
|
-
}
|
|
2645
|
-
try {
|
|
2646
|
-
const content = (0, import_fs3.readFileSync)(this.indexPath, "utf-8");
|
|
2647
|
-
const data = JSON.parse(content);
|
|
2648
|
-
for (const [term, chunkIds] of Object.entries(data.termToChunks)) {
|
|
2649
|
-
this.termToChunks.set(term, new Set(chunkIds));
|
|
2650
|
-
}
|
|
2651
|
-
for (const [chunkId, tokens] of Object.entries(data.chunkTokens)) {
|
|
2652
|
-
const tokenMap = new Map(Object.entries(tokens).map(([k, v]) => [k, v]));
|
|
2653
|
-
this.chunkTokens.set(chunkId, tokenMap);
|
|
2654
|
-
for (const count of tokenMap.values()) {
|
|
2655
|
-
this.totalTokenCount += count;
|
|
2656
|
-
}
|
|
2657
|
-
}
|
|
2658
|
-
} catch {
|
|
2659
|
-
this.termToChunks.clear();
|
|
2660
|
-
this.chunkTokens.clear();
|
|
2661
|
-
this.totalTokenCount = 0;
|
|
2662
|
-
}
|
|
2642
|
+
this.inner.load();
|
|
2663
2643
|
}
|
|
2664
2644
|
save() {
|
|
2665
|
-
|
|
2666
|
-
termToChunks: {},
|
|
2667
|
-
chunkTokens: {},
|
|
2668
|
-
avgDocLength: this.getAvgDocLength()
|
|
2669
|
-
};
|
|
2670
|
-
for (const [term, chunkIds] of this.termToChunks) {
|
|
2671
|
-
data.termToChunks[term] = Array.from(chunkIds);
|
|
2672
|
-
}
|
|
2673
|
-
for (const [chunkId, tokens] of this.chunkTokens) {
|
|
2674
|
-
data.chunkTokens[chunkId] = Object.fromEntries(tokens);
|
|
2675
|
-
}
|
|
2676
|
-
(0, import_fs3.writeFileSync)(this.indexPath, JSON.stringify(data));
|
|
2645
|
+
this.inner.save();
|
|
2677
2646
|
}
|
|
2678
2647
|
addChunk(chunkId, content) {
|
|
2679
|
-
|
|
2680
|
-
const termFreq = /* @__PURE__ */ new Map();
|
|
2681
|
-
for (const token of tokens) {
|
|
2682
|
-
termFreq.set(token, (termFreq.get(token) || 0) + 1);
|
|
2683
|
-
const chunks = this.termToChunks.get(token) || /* @__PURE__ */ new Set();
|
|
2684
|
-
chunks.add(chunkId);
|
|
2685
|
-
this.termToChunks.set(token, chunks);
|
|
2686
|
-
}
|
|
2687
|
-
this.chunkTokens.set(chunkId, termFreq);
|
|
2688
|
-
this.totalTokenCount += tokens.length;
|
|
2648
|
+
this.inner.addChunk(chunkId, content);
|
|
2689
2649
|
}
|
|
2690
2650
|
removeChunk(chunkId) {
|
|
2691
|
-
|
|
2692
|
-
if (!tokens) return;
|
|
2693
|
-
for (const [token, count] of tokens) {
|
|
2694
|
-
this.totalTokenCount -= count;
|
|
2695
|
-
const chunks = this.termToChunks.get(token);
|
|
2696
|
-
if (chunks) {
|
|
2697
|
-
chunks.delete(chunkId);
|
|
2698
|
-
if (chunks.size === 0) {
|
|
2699
|
-
this.termToChunks.delete(token);
|
|
2700
|
-
}
|
|
2701
|
-
}
|
|
2702
|
-
}
|
|
2703
|
-
this.chunkTokens.delete(chunkId);
|
|
2651
|
+
return this.inner.removeChunk(chunkId);
|
|
2704
2652
|
}
|
|
2705
|
-
search(query) {
|
|
2706
|
-
const
|
|
2707
|
-
|
|
2708
|
-
|
|
2709
|
-
|
|
2710
|
-
const candidateChunks = /* @__PURE__ */ new Set();
|
|
2711
|
-
for (const token of queryTokens) {
|
|
2712
|
-
const chunks = this.termToChunks.get(token);
|
|
2713
|
-
if (chunks) {
|
|
2714
|
-
for (const chunkId of chunks) {
|
|
2715
|
-
candidateChunks.add(chunkId);
|
|
2716
|
-
}
|
|
2717
|
-
}
|
|
2653
|
+
search(query, limit) {
|
|
2654
|
+
const results = this.inner.search(query, limit ?? 100);
|
|
2655
|
+
const map = /* @__PURE__ */ new Map();
|
|
2656
|
+
for (const r of results) {
|
|
2657
|
+
map.set(r.chunkId, r.score);
|
|
2718
2658
|
}
|
|
2719
|
-
|
|
2720
|
-
const k1 = 1.2;
|
|
2721
|
-
const b = 0.75;
|
|
2722
|
-
const N = this.chunkTokens.size;
|
|
2723
|
-
const avgDocLength = this.getAvgDocLength();
|
|
2724
|
-
for (const chunkId of candidateChunks) {
|
|
2725
|
-
const termFreq = this.chunkTokens.get(chunkId);
|
|
2726
|
-
if (!termFreq) continue;
|
|
2727
|
-
const docLength = Array.from(termFreq.values()).reduce((a, b2) => a + b2, 0);
|
|
2728
|
-
let score = 0;
|
|
2729
|
-
for (const term of queryTokens) {
|
|
2730
|
-
const tf = termFreq.get(term) || 0;
|
|
2731
|
-
if (tf === 0) continue;
|
|
2732
|
-
const df = this.termToChunks.get(term)?.size || 0;
|
|
2733
|
-
const idf = Math.log((N - df + 0.5) / (df + 0.5) + 1);
|
|
2734
|
-
const tfNorm = tf * (k1 + 1) / (tf + k1 * (1 - b + b * (docLength / avgDocLength)));
|
|
2735
|
-
score += idf * tfNorm;
|
|
2736
|
-
}
|
|
2737
|
-
scores.set(chunkId, score);
|
|
2738
|
-
}
|
|
2739
|
-
const maxScore = Math.max(...scores.values(), 1);
|
|
2740
|
-
for (const [chunkId, score] of scores) {
|
|
2741
|
-
scores.set(chunkId, score / maxScore);
|
|
2742
|
-
}
|
|
2743
|
-
return scores;
|
|
2659
|
+
return map;
|
|
2744
2660
|
}
|
|
2745
2661
|
hasChunk(chunkId) {
|
|
2746
|
-
return this.
|
|
2662
|
+
return this.inner.hasChunk(chunkId);
|
|
2747
2663
|
}
|
|
2748
2664
|
clear() {
|
|
2749
|
-
this.
|
|
2750
|
-
this.chunkTokens.clear();
|
|
2751
|
-
this.totalTokenCount = 0;
|
|
2665
|
+
this.inner.clear();
|
|
2752
2666
|
}
|
|
2753
2667
|
getDocumentCount() {
|
|
2754
|
-
return this.
|
|
2668
|
+
return this.inner.documentCount();
|
|
2669
|
+
}
|
|
2670
|
+
};
|
|
2671
|
+
var Database = class {
|
|
2672
|
+
inner;
|
|
2673
|
+
constructor(dbPath) {
|
|
2674
|
+
this.inner = new native.Database(dbPath);
|
|
2675
|
+
}
|
|
2676
|
+
embeddingExists(contentHash) {
|
|
2677
|
+
return this.inner.embeddingExists(contentHash);
|
|
2678
|
+
}
|
|
2679
|
+
getEmbedding(contentHash) {
|
|
2680
|
+
return this.inner.getEmbedding(contentHash) ?? null;
|
|
2681
|
+
}
|
|
2682
|
+
upsertEmbedding(contentHash, embedding, chunkText, model) {
|
|
2683
|
+
this.inner.upsertEmbedding(contentHash, embedding, chunkText, model);
|
|
2684
|
+
}
|
|
2685
|
+
getMissingEmbeddings(contentHashes) {
|
|
2686
|
+
return this.inner.getMissingEmbeddings(contentHashes);
|
|
2687
|
+
}
|
|
2688
|
+
upsertChunk(chunk) {
|
|
2689
|
+
this.inner.upsertChunk(chunk);
|
|
2690
|
+
}
|
|
2691
|
+
getChunk(chunkId) {
|
|
2692
|
+
return this.inner.getChunk(chunkId) ?? null;
|
|
2755
2693
|
}
|
|
2756
|
-
|
|
2757
|
-
|
|
2758
|
-
return count > 0 ? this.totalTokenCount / count : 100;
|
|
2694
|
+
getChunksByFile(filePath) {
|
|
2695
|
+
return this.inner.getChunksByFile(filePath);
|
|
2759
2696
|
}
|
|
2760
|
-
|
|
2761
|
-
return
|
|
2697
|
+
deleteChunksByFile(filePath) {
|
|
2698
|
+
return this.inner.deleteChunksByFile(filePath);
|
|
2699
|
+
}
|
|
2700
|
+
addChunksToBranch(branch, chunkIds) {
|
|
2701
|
+
this.inner.addChunksToBranch(branch, chunkIds);
|
|
2702
|
+
}
|
|
2703
|
+
clearBranch(branch) {
|
|
2704
|
+
return this.inner.clearBranch(branch);
|
|
2705
|
+
}
|
|
2706
|
+
getBranchChunkIds(branch) {
|
|
2707
|
+
return this.inner.getBranchChunkIds(branch);
|
|
2708
|
+
}
|
|
2709
|
+
getBranchDelta(branch, baseBranch) {
|
|
2710
|
+
return this.inner.getBranchDelta(branch, baseBranch);
|
|
2711
|
+
}
|
|
2712
|
+
chunkExistsOnBranch(branch, chunkId) {
|
|
2713
|
+
return this.inner.chunkExistsOnBranch(branch, chunkId);
|
|
2714
|
+
}
|
|
2715
|
+
getAllBranches() {
|
|
2716
|
+
return this.inner.getAllBranches();
|
|
2717
|
+
}
|
|
2718
|
+
getMetadata(key) {
|
|
2719
|
+
return this.inner.getMetadata(key) ?? null;
|
|
2720
|
+
}
|
|
2721
|
+
setMetadata(key, value) {
|
|
2722
|
+
this.inner.setMetadata(key, value);
|
|
2723
|
+
}
|
|
2724
|
+
deleteMetadata(key) {
|
|
2725
|
+
return this.inner.deleteMetadata(key);
|
|
2726
|
+
}
|
|
2727
|
+
gcOrphanEmbeddings() {
|
|
2728
|
+
return this.inner.gcOrphanEmbeddings();
|
|
2729
|
+
}
|
|
2730
|
+
gcOrphanChunks() {
|
|
2731
|
+
return this.inner.gcOrphanChunks();
|
|
2732
|
+
}
|
|
2733
|
+
getStats() {
|
|
2734
|
+
return this.inner.getStats();
|
|
2762
2735
|
}
|
|
2763
2736
|
};
|
|
2764
2737
|
|
|
2738
|
+
// src/git/index.ts
|
|
2739
|
+
var import_fs3 = require("fs");
|
|
2740
|
+
var path4 = __toESM(require("path"), 1);
|
|
2741
|
+
var import_child_process = require("child_process");
|
|
2742
|
+
function isGitRepo(dir) {
|
|
2743
|
+
return (0, import_fs3.existsSync)(path4.join(dir, ".git"));
|
|
2744
|
+
}
|
|
2745
|
+
function getCurrentBranch(repoRoot) {
|
|
2746
|
+
const headPath = path4.join(repoRoot, ".git", "HEAD");
|
|
2747
|
+
if (!(0, import_fs3.existsSync)(headPath)) {
|
|
2748
|
+
return null;
|
|
2749
|
+
}
|
|
2750
|
+
try {
|
|
2751
|
+
const headContent = (0, import_fs3.readFileSync)(headPath, "utf-8").trim();
|
|
2752
|
+
const match = headContent.match(/^ref: refs\/heads\/(.+)$/);
|
|
2753
|
+
if (match) {
|
|
2754
|
+
return match[1];
|
|
2755
|
+
}
|
|
2756
|
+
if (/^[0-9a-f]{40}$/i.test(headContent)) {
|
|
2757
|
+
return headContent.slice(0, 7);
|
|
2758
|
+
}
|
|
2759
|
+
return null;
|
|
2760
|
+
} catch {
|
|
2761
|
+
return null;
|
|
2762
|
+
}
|
|
2763
|
+
}
|
|
2764
|
+
function getBaseBranch(repoRoot) {
|
|
2765
|
+
const candidates = ["main", "master", "develop", "trunk"];
|
|
2766
|
+
for (const candidate of candidates) {
|
|
2767
|
+
const refPath = path4.join(repoRoot, ".git", "refs", "heads", candidate);
|
|
2768
|
+
if ((0, import_fs3.existsSync)(refPath)) {
|
|
2769
|
+
return candidate;
|
|
2770
|
+
}
|
|
2771
|
+
const packedRefsPath = path4.join(repoRoot, ".git", "packed-refs");
|
|
2772
|
+
if ((0, import_fs3.existsSync)(packedRefsPath)) {
|
|
2773
|
+
try {
|
|
2774
|
+
const content = (0, import_fs3.readFileSync)(packedRefsPath, "utf-8");
|
|
2775
|
+
if (content.includes(`refs/heads/${candidate}`)) {
|
|
2776
|
+
return candidate;
|
|
2777
|
+
}
|
|
2778
|
+
} catch {
|
|
2779
|
+
}
|
|
2780
|
+
}
|
|
2781
|
+
}
|
|
2782
|
+
try {
|
|
2783
|
+
const result = (0, import_child_process.execSync)("git remote show origin", {
|
|
2784
|
+
cwd: repoRoot,
|
|
2785
|
+
encoding: "utf-8",
|
|
2786
|
+
stdio: ["pipe", "pipe", "pipe"]
|
|
2787
|
+
});
|
|
2788
|
+
const match = result.match(/HEAD branch: (.+)/);
|
|
2789
|
+
if (match) {
|
|
2790
|
+
return match[1].trim();
|
|
2791
|
+
}
|
|
2792
|
+
} catch {
|
|
2793
|
+
}
|
|
2794
|
+
return getCurrentBranch(repoRoot) ?? "main";
|
|
2795
|
+
}
|
|
2796
|
+
function getBranchOrDefault(repoRoot) {
|
|
2797
|
+
if (!isGitRepo(repoRoot)) {
|
|
2798
|
+
return "default";
|
|
2799
|
+
}
|
|
2800
|
+
return getCurrentBranch(repoRoot) ?? "default";
|
|
2801
|
+
}
|
|
2802
|
+
function getHeadPath(repoRoot) {
|
|
2803
|
+
return path4.join(repoRoot, ".git", "HEAD");
|
|
2804
|
+
}
|
|
2805
|
+
|
|
2765
2806
|
// src/indexer/index.ts
|
|
2807
|
+
function float32ArrayToBuffer(arr) {
|
|
2808
|
+
const float32 = new Float32Array(arr);
|
|
2809
|
+
return Buffer.from(float32.buffer);
|
|
2810
|
+
}
|
|
2811
|
+
function bufferToFloat32Array(buf) {
|
|
2812
|
+
return new Float32Array(buf.buffer, buf.byteOffset, buf.byteLength / 4);
|
|
2813
|
+
}
|
|
2766
2814
|
var Indexer = class {
|
|
2767
2815
|
config;
|
|
2768
2816
|
projectRoot;
|
|
2769
2817
|
indexPath;
|
|
2770
2818
|
store = null;
|
|
2771
2819
|
invertedIndex = null;
|
|
2820
|
+
database = null;
|
|
2772
2821
|
provider = null;
|
|
2773
2822
|
detectedProvider = null;
|
|
2774
2823
|
fileHashCache = /* @__PURE__ */ new Map();
|
|
2775
2824
|
fileHashCachePath = "";
|
|
2825
|
+
failedBatchesPath = "";
|
|
2826
|
+
currentBranch = "default";
|
|
2827
|
+
baseBranch = "main";
|
|
2776
2828
|
constructor(projectRoot, config) {
|
|
2777
2829
|
this.projectRoot = projectRoot;
|
|
2778
2830
|
this.config = config;
|
|
2779
2831
|
this.indexPath = this.getIndexPath();
|
|
2780
2832
|
this.fileHashCachePath = path5.join(this.indexPath, "file-hashes.json");
|
|
2833
|
+
this.failedBatchesPath = path5.join(this.indexPath, "failed-batches.json");
|
|
2781
2834
|
}
|
|
2782
2835
|
getIndexPath() {
|
|
2783
2836
|
if (this.config.scope === "global") {
|
|
@@ -2804,6 +2857,37 @@ var Indexer = class {
|
|
|
2804
2857
|
}
|
|
2805
2858
|
(0, import_fs4.writeFileSync)(this.fileHashCachePath, JSON.stringify(obj));
|
|
2806
2859
|
}
|
|
2860
|
+
loadFailedBatches() {
|
|
2861
|
+
try {
|
|
2862
|
+
if ((0, import_fs4.existsSync)(this.failedBatchesPath)) {
|
|
2863
|
+
const data = (0, import_fs4.readFileSync)(this.failedBatchesPath, "utf-8");
|
|
2864
|
+
return JSON.parse(data);
|
|
2865
|
+
}
|
|
2866
|
+
} catch {
|
|
2867
|
+
return [];
|
|
2868
|
+
}
|
|
2869
|
+
return [];
|
|
2870
|
+
}
|
|
2871
|
+
saveFailedBatches(batches) {
|
|
2872
|
+
if (batches.length === 0) {
|
|
2873
|
+
if ((0, import_fs4.existsSync)(this.failedBatchesPath)) {
|
|
2874
|
+
import_fs4.promises.unlink(this.failedBatchesPath).catch(() => {
|
|
2875
|
+
});
|
|
2876
|
+
}
|
|
2877
|
+
return;
|
|
2878
|
+
}
|
|
2879
|
+
(0, import_fs4.writeFileSync)(this.failedBatchesPath, JSON.stringify(batches, null, 2));
|
|
2880
|
+
}
|
|
2881
|
+
addFailedBatch(batch, error) {
|
|
2882
|
+
const existing = this.loadFailedBatches();
|
|
2883
|
+
existing.push({
|
|
2884
|
+
chunks: batch,
|
|
2885
|
+
error,
|
|
2886
|
+
attemptCount: 1,
|
|
2887
|
+
lastAttempt: (/* @__PURE__ */ new Date()).toISOString()
|
|
2888
|
+
});
|
|
2889
|
+
this.saveFailedBatches(existing);
|
|
2890
|
+
}
|
|
2807
2891
|
async initialize() {
|
|
2808
2892
|
this.detectedProvider = await detectEmbeddingProvider(this.config.embeddingProvider);
|
|
2809
2893
|
if (!this.detectedProvider) {
|
|
@@ -2823,18 +2907,60 @@ var Indexer = class {
|
|
|
2823
2907
|
if ((0, import_fs4.existsSync)(indexFilePath)) {
|
|
2824
2908
|
this.store.load();
|
|
2825
2909
|
}
|
|
2826
|
-
|
|
2827
|
-
this.invertedIndex
|
|
2910
|
+
const invertedIndexPath = path5.join(this.indexPath, "inverted-index.json");
|
|
2911
|
+
this.invertedIndex = new InvertedIndex(invertedIndexPath);
|
|
2912
|
+
try {
|
|
2913
|
+
this.invertedIndex.load();
|
|
2914
|
+
} catch {
|
|
2915
|
+
if ((0, import_fs4.existsSync)(invertedIndexPath)) {
|
|
2916
|
+
await import_fs4.promises.unlink(invertedIndexPath);
|
|
2917
|
+
}
|
|
2918
|
+
this.invertedIndex = new InvertedIndex(invertedIndexPath);
|
|
2919
|
+
}
|
|
2920
|
+
const dbPath = path5.join(this.indexPath, "codebase.db");
|
|
2921
|
+
const dbIsNew = !(0, import_fs4.existsSync)(dbPath);
|
|
2922
|
+
this.database = new Database(dbPath);
|
|
2923
|
+
if (dbIsNew && this.store.count() > 0) {
|
|
2924
|
+
this.migrateFromLegacyIndex();
|
|
2925
|
+
}
|
|
2926
|
+
if (isGitRepo(this.projectRoot)) {
|
|
2927
|
+
this.currentBranch = getBranchOrDefault(this.projectRoot);
|
|
2928
|
+
this.baseBranch = getBaseBranch(this.projectRoot);
|
|
2929
|
+
} else {
|
|
2930
|
+
this.currentBranch = "default";
|
|
2931
|
+
this.baseBranch = "default";
|
|
2932
|
+
}
|
|
2933
|
+
}
|
|
2934
|
+
migrateFromLegacyIndex() {
|
|
2935
|
+
if (!this.store || !this.database) return;
|
|
2936
|
+
const allMetadata = this.store.getAllMetadata();
|
|
2937
|
+
const chunkIds = [];
|
|
2938
|
+
for (const { key, metadata } of allMetadata) {
|
|
2939
|
+
const chunkData = {
|
|
2940
|
+
chunkId: key,
|
|
2941
|
+
contentHash: metadata.hash,
|
|
2942
|
+
filePath: metadata.filePath,
|
|
2943
|
+
startLine: metadata.startLine,
|
|
2944
|
+
endLine: metadata.endLine,
|
|
2945
|
+
nodeType: metadata.chunkType,
|
|
2946
|
+
name: metadata.name,
|
|
2947
|
+
language: metadata.language
|
|
2948
|
+
};
|
|
2949
|
+
this.database.upsertChunk(chunkData);
|
|
2950
|
+
chunkIds.push(key);
|
|
2951
|
+
}
|
|
2952
|
+
this.database.addChunksToBranch(this.currentBranch || "default", chunkIds);
|
|
2828
2953
|
}
|
|
2829
2954
|
async ensureInitialized() {
|
|
2830
|
-
if (!this.store || !this.provider || !this.invertedIndex || !this.detectedProvider) {
|
|
2955
|
+
if (!this.store || !this.provider || !this.invertedIndex || !this.detectedProvider || !this.database) {
|
|
2831
2956
|
await this.initialize();
|
|
2832
2957
|
}
|
|
2833
2958
|
return {
|
|
2834
2959
|
store: this.store,
|
|
2835
2960
|
provider: this.provider,
|
|
2836
2961
|
invertedIndex: this.invertedIndex,
|
|
2837
|
-
detectedProvider: this.detectedProvider
|
|
2962
|
+
detectedProvider: this.detectedProvider,
|
|
2963
|
+
database: this.database
|
|
2838
2964
|
};
|
|
2839
2965
|
}
|
|
2840
2966
|
async estimateCost() {
|
|
@@ -2848,7 +2974,7 @@ var Indexer = class {
|
|
|
2848
2974
|
return createCostEstimate(files, detectedProvider);
|
|
2849
2975
|
}
|
|
2850
2976
|
async index(onProgress) {
|
|
2851
|
-
const { store, provider, invertedIndex } = await this.ensureInitialized();
|
|
2977
|
+
const { store, provider, invertedIndex, database, detectedProvider } = await this.ensureInitialized();
|
|
2852
2978
|
const startTime = Date.now();
|
|
2853
2979
|
const stats = {
|
|
2854
2980
|
totalFiles: 0,
|
|
@@ -2925,11 +3051,30 @@ var Indexer = class {
|
|
|
2925
3051
|
const relativePath = path5.relative(this.projectRoot, parsed.path);
|
|
2926
3052
|
stats.parseFailures.push(relativePath);
|
|
2927
3053
|
}
|
|
3054
|
+
let fileChunkCount = 0;
|
|
2928
3055
|
for (const chunk of parsed.chunks) {
|
|
3056
|
+
if (fileChunkCount >= this.config.indexing.maxChunksPerFile) {
|
|
3057
|
+
break;
|
|
3058
|
+
}
|
|
3059
|
+
if (this.config.indexing.semanticOnly && chunk.chunkType === "other") {
|
|
3060
|
+
continue;
|
|
3061
|
+
}
|
|
2929
3062
|
const id = generateChunkId(parsed.path, chunk);
|
|
2930
3063
|
const contentHash = generateChunkHash(chunk);
|
|
2931
3064
|
currentChunkIds.add(id);
|
|
3065
|
+
const chunkData = {
|
|
3066
|
+
chunkId: id,
|
|
3067
|
+
contentHash,
|
|
3068
|
+
filePath: parsed.path,
|
|
3069
|
+
startLine: chunk.startLine,
|
|
3070
|
+
endLine: chunk.endLine,
|
|
3071
|
+
nodeType: chunk.chunkType,
|
|
3072
|
+
name: chunk.name,
|
|
3073
|
+
language: chunk.language
|
|
3074
|
+
};
|
|
3075
|
+
database.upsertChunk(chunkData);
|
|
2932
3076
|
if (existingChunks.get(id) === contentHash) {
|
|
3077
|
+
fileChunkCount++;
|
|
2933
3078
|
continue;
|
|
2934
3079
|
}
|
|
2935
3080
|
const text = createEmbeddingText(chunk, parsed.path);
|
|
@@ -2942,7 +3087,8 @@ var Indexer = class {
|
|
|
2942
3087
|
language: chunk.language,
|
|
2943
3088
|
hash: contentHash
|
|
2944
3089
|
};
|
|
2945
|
-
pendingChunks.push({ id, text, content: chunk.content, metadata });
|
|
3090
|
+
pendingChunks.push({ id, text, content: chunk.content, contentHash, metadata });
|
|
3091
|
+
fileChunkCount++;
|
|
2946
3092
|
}
|
|
2947
3093
|
}
|
|
2948
3094
|
let removedCount = 0;
|
|
@@ -2957,6 +3103,8 @@ var Indexer = class {
|
|
|
2957
3103
|
stats.existingChunks = currentChunkIds.size - pendingChunks.length;
|
|
2958
3104
|
stats.removedChunks = removedCount;
|
|
2959
3105
|
if (pendingChunks.length === 0 && removedCount === 0) {
|
|
3106
|
+
database.clearBranch(this.currentBranch);
|
|
3107
|
+
database.addChunksToBranch(this.currentBranch, Array.from(currentChunkIds));
|
|
2960
3108
|
this.fileHashCache = currentFileHashes;
|
|
2961
3109
|
this.saveFileHashCache();
|
|
2962
3110
|
stats.durationMs = Date.now() - startTime;
|
|
@@ -2970,6 +3118,8 @@ var Indexer = class {
|
|
|
2970
3118
|
return stats;
|
|
2971
3119
|
}
|
|
2972
3120
|
if (pendingChunks.length === 0) {
|
|
3121
|
+
database.clearBranch(this.currentBranch);
|
|
3122
|
+
database.addChunksToBranch(this.currentBranch, Array.from(currentChunkIds));
|
|
2973
3123
|
store.save();
|
|
2974
3124
|
invertedIndex.save();
|
|
2975
3125
|
this.fileHashCache = currentFileHashes;
|
|
@@ -2991,8 +3141,22 @@ var Indexer = class {
|
|
|
2991
3141
|
chunksProcessed: 0,
|
|
2992
3142
|
totalChunks: pendingChunks.length
|
|
2993
3143
|
});
|
|
3144
|
+
const allContentHashes = pendingChunks.map((c) => c.contentHash);
|
|
3145
|
+
const missingHashes = new Set(database.getMissingEmbeddings(allContentHashes));
|
|
3146
|
+
const chunksNeedingEmbedding = pendingChunks.filter((c) => missingHashes.has(c.contentHash));
|
|
3147
|
+
const chunksWithExistingEmbedding = pendingChunks.filter((c) => !missingHashes.has(c.contentHash));
|
|
3148
|
+
for (const chunk of chunksWithExistingEmbedding) {
|
|
3149
|
+
const embeddingBuffer = database.getEmbedding(chunk.contentHash);
|
|
3150
|
+
if (embeddingBuffer) {
|
|
3151
|
+
const vector = bufferToFloat32Array(embeddingBuffer);
|
|
3152
|
+
store.add(chunk.id, Array.from(vector), chunk.metadata);
|
|
3153
|
+
invertedIndex.removeChunk(chunk.id);
|
|
3154
|
+
invertedIndex.addChunk(chunk.id, chunk.content);
|
|
3155
|
+
stats.indexedChunks++;
|
|
3156
|
+
}
|
|
3157
|
+
}
|
|
2994
3158
|
const queue = new PQueue({ concurrency: 3 });
|
|
2995
|
-
const dynamicBatches = createDynamicBatches(
|
|
3159
|
+
const dynamicBatches = createDynamicBatches(chunksNeedingEmbedding);
|
|
2996
3160
|
for (const batch of dynamicBatches) {
|
|
2997
3161
|
queue.add(async () => {
|
|
2998
3162
|
try {
|
|
@@ -3017,7 +3181,15 @@ var Indexer = class {
|
|
|
3017
3181
|
metadata: chunk.metadata
|
|
3018
3182
|
}));
|
|
3019
3183
|
store.addBatch(items);
|
|
3020
|
-
for (
|
|
3184
|
+
for (let i = 0; i < batch.length; i++) {
|
|
3185
|
+
const chunk = batch[i];
|
|
3186
|
+
const embedding = result.embeddings[i];
|
|
3187
|
+
database.upsertEmbedding(
|
|
3188
|
+
chunk.contentHash,
|
|
3189
|
+
float32ArrayToBuffer(embedding),
|
|
3190
|
+
chunk.text,
|
|
3191
|
+
detectedProvider.modelInfo.model
|
|
3192
|
+
);
|
|
3021
3193
|
invertedIndex.removeChunk(chunk.id);
|
|
3022
3194
|
invertedIndex.addChunk(chunk.id, chunk.content);
|
|
3023
3195
|
}
|
|
@@ -3032,6 +3204,7 @@ var Indexer = class {
|
|
|
3032
3204
|
});
|
|
3033
3205
|
} catch (error) {
|
|
3034
3206
|
stats.failedChunks += batch.length;
|
|
3207
|
+
this.addFailedBatch(batch, String(error));
|
|
3035
3208
|
console.error(`Failed to embed batch after retries: ${error}`);
|
|
3036
3209
|
}
|
|
3037
3210
|
});
|
|
@@ -3044,11 +3217,16 @@ var Indexer = class {
|
|
|
3044
3217
|
chunksProcessed: stats.indexedChunks,
|
|
3045
3218
|
totalChunks: pendingChunks.length
|
|
3046
3219
|
});
|
|
3220
|
+
database.clearBranch(this.currentBranch);
|
|
3221
|
+
database.addChunksToBranch(this.currentBranch, Array.from(currentChunkIds));
|
|
3047
3222
|
store.save();
|
|
3048
3223
|
invertedIndex.save();
|
|
3049
3224
|
this.fileHashCache = currentFileHashes;
|
|
3050
3225
|
this.saveFileHashCache();
|
|
3051
3226
|
stats.durationMs = Date.now() - startTime;
|
|
3227
|
+
if (stats.failedChunks > 0) {
|
|
3228
|
+
stats.failedBatchesPath = this.failedBatchesPath;
|
|
3229
|
+
}
|
|
3052
3230
|
onProgress?.({
|
|
3053
3231
|
phase: "complete",
|
|
3054
3232
|
filesProcessed: files.length,
|
|
@@ -3059,18 +3237,24 @@ var Indexer = class {
|
|
|
3059
3237
|
return stats;
|
|
3060
3238
|
}
|
|
3061
3239
|
async search(query, limit, options) {
|
|
3062
|
-
const { store, provider } = await this.ensureInitialized();
|
|
3240
|
+
const { store, provider, database } = await this.ensureInitialized();
|
|
3063
3241
|
if (store.count() === 0) {
|
|
3064
3242
|
return [];
|
|
3065
3243
|
}
|
|
3066
3244
|
const maxResults = limit ?? this.config.search.maxResults;
|
|
3067
3245
|
const hybridWeight = options?.hybridWeight ?? this.config.search.hybridWeight;
|
|
3246
|
+
const filterByBranch = options?.filterByBranch ?? true;
|
|
3068
3247
|
const { embedding } = await provider.embed(query);
|
|
3069
3248
|
const semanticResults = store.search(embedding, maxResults * 4);
|
|
3070
3249
|
const keywordResults = await this.keywordSearch(query, maxResults * 4);
|
|
3071
3250
|
const combined = this.fuseResults(semanticResults, keywordResults, hybridWeight, maxResults * 4);
|
|
3251
|
+
let branchChunkIds = null;
|
|
3252
|
+
if (filterByBranch && this.currentBranch !== "default") {
|
|
3253
|
+
branchChunkIds = new Set(database.getBranchChunkIds(this.currentBranch));
|
|
3254
|
+
}
|
|
3072
3255
|
const filtered = combined.filter((r) => {
|
|
3073
3256
|
if (r.score < this.config.search.minScore) return false;
|
|
3257
|
+
if (branchChunkIds && !branchChunkIds.has(r.id)) return false;
|
|
3074
3258
|
if (options?.fileType) {
|
|
3075
3259
|
const ext = r.metadata.filePath.split(".").pop()?.toLowerCase();
|
|
3076
3260
|
if (ext !== options.fileType.toLowerCase().replace(/^\./, "")) return false;
|
|
@@ -3172,7 +3356,9 @@ var Indexer = class {
|
|
|
3172
3356
|
vectorCount: store.count(),
|
|
3173
3357
|
provider: detectedProvider.provider,
|
|
3174
3358
|
model: detectedProvider.modelInfo.model,
|
|
3175
|
-
indexPath: this.indexPath
|
|
3359
|
+
indexPath: this.indexPath,
|
|
3360
|
+
currentBranch: this.currentBranch,
|
|
3361
|
+
baseBranch: this.baseBranch
|
|
3176
3362
|
};
|
|
3177
3363
|
}
|
|
3178
3364
|
async clearIndex() {
|
|
@@ -3183,7 +3369,7 @@ var Indexer = class {
|
|
|
3183
3369
|
invertedIndex.save();
|
|
3184
3370
|
}
|
|
3185
3371
|
async healthCheck() {
|
|
3186
|
-
const { store, invertedIndex } = await this.ensureInitialized();
|
|
3372
|
+
const { store, invertedIndex, database } = await this.ensureInitialized();
|
|
3187
3373
|
const allMetadata = store.getAllMetadata();
|
|
3188
3374
|
const filePathsToChunkKeys = /* @__PURE__ */ new Map();
|
|
3189
3375
|
for (const { key, metadata } of allMetadata) {
|
|
@@ -3200,6 +3386,7 @@ var Indexer = class {
|
|
|
3200
3386
|
invertedIndex.removeChunk(key);
|
|
3201
3387
|
removedCount++;
|
|
3202
3388
|
}
|
|
3389
|
+
database.deleteChunksByFile(filePath);
|
|
3203
3390
|
removedFilePaths.push(filePath);
|
|
3204
3391
|
}
|
|
3205
3392
|
}
|
|
@@ -3207,7 +3394,77 @@ var Indexer = class {
|
|
|
3207
3394
|
store.save();
|
|
3208
3395
|
invertedIndex.save();
|
|
3209
3396
|
}
|
|
3210
|
-
|
|
3397
|
+
const gcOrphanEmbeddings = database.gcOrphanEmbeddings();
|
|
3398
|
+
const gcOrphanChunks = database.gcOrphanChunks();
|
|
3399
|
+
return { removed: removedCount, filePaths: removedFilePaths, gcOrphanEmbeddings, gcOrphanChunks };
|
|
3400
|
+
}
|
|
3401
|
+
async retryFailedBatches() {
|
|
3402
|
+
const { store, provider, invertedIndex } = await this.ensureInitialized();
|
|
3403
|
+
const failedBatches = this.loadFailedBatches();
|
|
3404
|
+
if (failedBatches.length === 0) {
|
|
3405
|
+
return { succeeded: 0, failed: 0, remaining: 0 };
|
|
3406
|
+
}
|
|
3407
|
+
let succeeded = 0;
|
|
3408
|
+
let failed = 0;
|
|
3409
|
+
const stillFailing = [];
|
|
3410
|
+
for (const batch of failedBatches) {
|
|
3411
|
+
try {
|
|
3412
|
+
const result = await pRetry(
|
|
3413
|
+
async () => {
|
|
3414
|
+
const texts = batch.chunks.map((c) => c.text);
|
|
3415
|
+
return provider.embedBatch(texts);
|
|
3416
|
+
},
|
|
3417
|
+
{
|
|
3418
|
+
retries: this.config.indexing.retries,
|
|
3419
|
+
minTimeout: this.config.indexing.retryDelayMs
|
|
3420
|
+
}
|
|
3421
|
+
);
|
|
3422
|
+
const items = batch.chunks.map((chunk, idx) => ({
|
|
3423
|
+
id: chunk.id,
|
|
3424
|
+
vector: result.embeddings[idx],
|
|
3425
|
+
metadata: chunk.metadata
|
|
3426
|
+
}));
|
|
3427
|
+
store.addBatch(items);
|
|
3428
|
+
for (const chunk of batch.chunks) {
|
|
3429
|
+
invertedIndex.removeChunk(chunk.id);
|
|
3430
|
+
invertedIndex.addChunk(chunk.id, chunk.content);
|
|
3431
|
+
}
|
|
3432
|
+
succeeded += batch.chunks.length;
|
|
3433
|
+
} catch (error) {
|
|
3434
|
+
failed += batch.chunks.length;
|
|
3435
|
+
stillFailing.push({
|
|
3436
|
+
...batch,
|
|
3437
|
+
attemptCount: batch.attemptCount + 1,
|
|
3438
|
+
lastAttempt: (/* @__PURE__ */ new Date()).toISOString(),
|
|
3439
|
+
error: String(error)
|
|
3440
|
+
});
|
|
3441
|
+
}
|
|
3442
|
+
}
|
|
3443
|
+
this.saveFailedBatches(stillFailing);
|
|
3444
|
+
if (succeeded > 0) {
|
|
3445
|
+
store.save();
|
|
3446
|
+
invertedIndex.save();
|
|
3447
|
+
}
|
|
3448
|
+
return { succeeded, failed, remaining: stillFailing.length };
|
|
3449
|
+
}
|
|
3450
|
+
getFailedBatchesCount() {
|
|
3451
|
+
return this.loadFailedBatches().length;
|
|
3452
|
+
}
|
|
3453
|
+
getCurrentBranch() {
|
|
3454
|
+
return this.currentBranch;
|
|
3455
|
+
}
|
|
3456
|
+
getBaseBranch() {
|
|
3457
|
+
return this.baseBranch;
|
|
3458
|
+
}
|
|
3459
|
+
refreshBranchInfo() {
|
|
3460
|
+
if (isGitRepo(this.projectRoot)) {
|
|
3461
|
+
this.currentBranch = getBranchOrDefault(this.projectRoot);
|
|
3462
|
+
this.baseBranch = getBaseBranch(this.projectRoot);
|
|
3463
|
+
}
|
|
3464
|
+
}
|
|
3465
|
+
async getDatabaseStats() {
|
|
3466
|
+
const { database } = await this.ensureInitialized();
|
|
3467
|
+
return database.getStats();
|
|
3211
3468
|
}
|
|
3212
3469
|
};
|
|
3213
3470
|
|
|
@@ -5034,9 +5291,82 @@ var FileWatcher = class {
|
|
|
5034
5291
|
return this.watcher !== null;
|
|
5035
5292
|
}
|
|
5036
5293
|
};
|
|
5294
|
+
var GitHeadWatcher = class {
|
|
5295
|
+
watcher = null;
|
|
5296
|
+
projectRoot;
|
|
5297
|
+
currentBranch = null;
|
|
5298
|
+
onBranchChange = null;
|
|
5299
|
+
debounceTimer = null;
|
|
5300
|
+
debounceMs = 100;
|
|
5301
|
+
// Short debounce for git operations
|
|
5302
|
+
constructor(projectRoot) {
|
|
5303
|
+
this.projectRoot = projectRoot;
|
|
5304
|
+
}
|
|
5305
|
+
start(handler) {
|
|
5306
|
+
if (this.watcher) {
|
|
5307
|
+
return;
|
|
5308
|
+
}
|
|
5309
|
+
if (!isGitRepo(this.projectRoot)) {
|
|
5310
|
+
return;
|
|
5311
|
+
}
|
|
5312
|
+
this.onBranchChange = handler;
|
|
5313
|
+
this.currentBranch = getCurrentBranch(this.projectRoot);
|
|
5314
|
+
const headPath = getHeadPath(this.projectRoot);
|
|
5315
|
+
const refsPath = path6.join(this.projectRoot, ".git", "refs", "heads");
|
|
5316
|
+
this.watcher = chokidar_default.watch([headPath, refsPath], {
|
|
5317
|
+
persistent: true,
|
|
5318
|
+
ignoreInitial: true,
|
|
5319
|
+
awaitWriteFinish: {
|
|
5320
|
+
stabilityThreshold: 50,
|
|
5321
|
+
pollInterval: 10
|
|
5322
|
+
}
|
|
5323
|
+
});
|
|
5324
|
+
this.watcher.on("change", () => this.handleHeadChange());
|
|
5325
|
+
this.watcher.on("add", () => this.handleHeadChange());
|
|
5326
|
+
}
|
|
5327
|
+
handleHeadChange() {
|
|
5328
|
+
if (this.debounceTimer) {
|
|
5329
|
+
clearTimeout(this.debounceTimer);
|
|
5330
|
+
}
|
|
5331
|
+
this.debounceTimer = setTimeout(() => {
|
|
5332
|
+
this.checkBranchChange();
|
|
5333
|
+
}, this.debounceMs);
|
|
5334
|
+
}
|
|
5335
|
+
async checkBranchChange() {
|
|
5336
|
+
const newBranch = getCurrentBranch(this.projectRoot);
|
|
5337
|
+
if (newBranch && newBranch !== this.currentBranch && this.onBranchChange) {
|
|
5338
|
+
const oldBranch = this.currentBranch;
|
|
5339
|
+
this.currentBranch = newBranch;
|
|
5340
|
+
try {
|
|
5341
|
+
await this.onBranchChange(oldBranch, newBranch);
|
|
5342
|
+
} catch (error) {
|
|
5343
|
+
console.error("Error handling branch change:", error);
|
|
5344
|
+
}
|
|
5345
|
+
} else if (newBranch) {
|
|
5346
|
+
this.currentBranch = newBranch;
|
|
5347
|
+
}
|
|
5348
|
+
}
|
|
5349
|
+
getCurrentBranch() {
|
|
5350
|
+
return this.currentBranch;
|
|
5351
|
+
}
|
|
5352
|
+
stop() {
|
|
5353
|
+
if (this.debounceTimer) {
|
|
5354
|
+
clearTimeout(this.debounceTimer);
|
|
5355
|
+
this.debounceTimer = null;
|
|
5356
|
+
}
|
|
5357
|
+
if (this.watcher) {
|
|
5358
|
+
this.watcher.close();
|
|
5359
|
+
this.watcher = null;
|
|
5360
|
+
}
|
|
5361
|
+
this.onBranchChange = null;
|
|
5362
|
+
}
|
|
5363
|
+
isRunning() {
|
|
5364
|
+
return this.watcher !== null;
|
|
5365
|
+
}
|
|
5366
|
+
};
|
|
5037
5367
|
function createWatcherWithIndexer(indexer, projectRoot, config) {
|
|
5038
|
-
const
|
|
5039
|
-
|
|
5368
|
+
const fileWatcher = new FileWatcher(projectRoot, config);
|
|
5369
|
+
fileWatcher.start(async (changes) => {
|
|
5040
5370
|
const hasAddOrChange = changes.some(
|
|
5041
5371
|
(c) => c.type === "add" || c.type === "change"
|
|
5042
5372
|
);
|
|
@@ -5045,7 +5375,22 @@ function createWatcherWithIndexer(indexer, projectRoot, config) {
|
|
|
5045
5375
|
await indexer.index();
|
|
5046
5376
|
}
|
|
5047
5377
|
});
|
|
5048
|
-
|
|
5378
|
+
let gitWatcher = null;
|
|
5379
|
+
if (isGitRepo(projectRoot)) {
|
|
5380
|
+
gitWatcher = new GitHeadWatcher(projectRoot);
|
|
5381
|
+
gitWatcher.start(async (oldBranch, newBranch) => {
|
|
5382
|
+
console.log(`Branch changed: ${oldBranch ?? "(none)"} -> ${newBranch}`);
|
|
5383
|
+
await indexer.index();
|
|
5384
|
+
});
|
|
5385
|
+
}
|
|
5386
|
+
return {
|
|
5387
|
+
fileWatcher,
|
|
5388
|
+
gitWatcher,
|
|
5389
|
+
stop() {
|
|
5390
|
+
fileWatcher.stop();
|
|
5391
|
+
gitWatcher?.stop();
|
|
5392
|
+
}
|
|
5393
|
+
};
|
|
5049
5394
|
}
|
|
5050
5395
|
|
|
5051
5396
|
// src/tools/index.ts
|
|
@@ -5129,13 +5474,19 @@ var index_health_check = (0, import_plugin.tool)({
|
|
|
5129
5474
|
async execute() {
|
|
5130
5475
|
const indexer = getIndexer();
|
|
5131
5476
|
const result = await indexer.healthCheck();
|
|
5132
|
-
if (result.removed === 0) {
|
|
5477
|
+
if (result.removed === 0 && result.gcOrphanEmbeddings === 0 && result.gcOrphanChunks === 0) {
|
|
5133
5478
|
return "Index is healthy. No stale entries found.";
|
|
5134
5479
|
}
|
|
5135
|
-
const lines = [
|
|
5136
|
-
|
|
5137
|
-
` Removed stale entries: ${result.removed}`
|
|
5138
|
-
|
|
5480
|
+
const lines = [`Health check complete:`];
|
|
5481
|
+
if (result.removed > 0) {
|
|
5482
|
+
lines.push(` Removed stale entries: ${result.removed}`);
|
|
5483
|
+
}
|
|
5484
|
+
if (result.gcOrphanEmbeddings > 0) {
|
|
5485
|
+
lines.push(` Garbage collected orphan embeddings: ${result.gcOrphanEmbeddings}`);
|
|
5486
|
+
}
|
|
5487
|
+
if (result.gcOrphanChunks > 0) {
|
|
5488
|
+
lines.push(` Garbage collected orphan chunks: ${result.gcOrphanChunks}`);
|
|
5489
|
+
}
|
|
5139
5490
|
if (result.filePaths.length > 0) {
|
|
5140
5491
|
lines.push(` Cleaned paths: ${result.filePaths.join(", ")}`);
|
|
5141
5492
|
}
|
|
@@ -5190,13 +5541,18 @@ function formatStatus(status) {
|
|
|
5190
5541
|
if (!status.indexed) {
|
|
5191
5542
|
return "Codebase is not indexed. Run index_codebase to create an index.";
|
|
5192
5543
|
}
|
|
5193
|
-
|
|
5544
|
+
const lines = [
|
|
5194
5545
|
`Index status:`,
|
|
5195
5546
|
` Indexed chunks: ${status.vectorCount.toLocaleString()}`,
|
|
5196
5547
|
` Provider: ${status.provider}`,
|
|
5197
5548
|
` Model: ${status.model}`,
|
|
5198
5549
|
` Location: ${status.indexPath}`
|
|
5199
|
-
]
|
|
5550
|
+
];
|
|
5551
|
+
if (status.currentBranch !== "default") {
|
|
5552
|
+
lines.push(` Current branch: ${status.currentBranch}`);
|
|
5553
|
+
lines.push(` Base branch: ${status.baseBranch}`);
|
|
5554
|
+
}
|
|
5555
|
+
return lines.join("\n");
|
|
5200
5556
|
}
|
|
5201
5557
|
|
|
5202
5558
|
// src/index.ts
|
|
@@ -5233,6 +5589,39 @@ var plugin = async ({ directory }) => {
|
|
|
5233
5589
|
index_codebase,
|
|
5234
5590
|
index_status,
|
|
5235
5591
|
index_health_check
|
|
5592
|
+
},
|
|
5593
|
+
async config(cfg) {
|
|
5594
|
+
cfg.command = cfg.command ?? {};
|
|
5595
|
+
cfg.command["search"] = {
|
|
5596
|
+
description: "Search codebase by meaning using semantic search",
|
|
5597
|
+
template: `Use the \`codebase_search\` tool to find code related to: $ARGUMENTS
|
|
5598
|
+
|
|
5599
|
+
If the index doesn't exist yet, run \`index_codebase\` first.
|
|
5600
|
+
|
|
5601
|
+
Return the most relevant results with file paths and line numbers.`
|
|
5602
|
+
};
|
|
5603
|
+
cfg.command["find"] = {
|
|
5604
|
+
description: "Find code using hybrid approach (semantic + grep)",
|
|
5605
|
+
template: `Find code related to: $ARGUMENTS
|
|
5606
|
+
|
|
5607
|
+
Strategy:
|
|
5608
|
+
1. First use \`codebase_search\` to find semantically related code
|
|
5609
|
+
2. From the results, identify specific function/class names
|
|
5610
|
+
3. Use grep to find all occurrences of those identifiers
|
|
5611
|
+
4. Combine findings into a comprehensive answer
|
|
5612
|
+
|
|
5613
|
+
If the semantic index doesn't exist, run \`index_codebase\` first.`
|
|
5614
|
+
};
|
|
5615
|
+
cfg.command["index"] = {
|
|
5616
|
+
description: "Index the codebase for semantic search",
|
|
5617
|
+
template: `Run the \`index_codebase\` tool to create or update the semantic search index.
|
|
5618
|
+
|
|
5619
|
+
Show progress and final statistics including:
|
|
5620
|
+
- Number of files processed
|
|
5621
|
+
- Number of chunks indexed
|
|
5622
|
+
- Tokens used
|
|
5623
|
+
- Duration`
|
|
5624
|
+
};
|
|
5236
5625
|
}
|
|
5237
5626
|
};
|
|
5238
5627
|
};
|