opencode-codebase-index 0.1.10 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +108 -36
- package/dist/index.cjs +525 -136
- package/dist/index.cjs.map +1 -1
- package/dist/index.js +527 -138
- package/dist/index.js.map +1 -1
- package/native/codebase-index-native.darwin-arm64.node +0 -0
- package/native/codebase-index-native.darwin-x64.node +0 -0
- package/native/codebase-index-native.linux-arm64-gnu.node +0 -0
- package/native/codebase-index-native.linux-x64-gnu.node +0 -0
- package/native/codebase-index-native.win32-x64-msvc.node +0 -0
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -688,6 +688,8 @@ function getDefaultIndexingConfig() {
|
|
|
688
688
|
autoIndex: false,
|
|
689
689
|
watchFiles: true,
|
|
690
690
|
maxFileSize: 1048576,
|
|
691
|
+
maxChunksPerFile: 100,
|
|
692
|
+
semanticOnly: false,
|
|
691
693
|
retries: 3,
|
|
692
694
|
retryDelayMs: 1e3
|
|
693
695
|
};
|
|
@@ -721,6 +723,8 @@ function parseConfig(raw) {
|
|
|
721
723
|
autoIndex: typeof rawIndexing.autoIndex === "boolean" ? rawIndexing.autoIndex : defaultIndexing.autoIndex,
|
|
722
724
|
watchFiles: typeof rawIndexing.watchFiles === "boolean" ? rawIndexing.watchFiles : defaultIndexing.watchFiles,
|
|
723
725
|
maxFileSize: typeof rawIndexing.maxFileSize === "number" ? rawIndexing.maxFileSize : defaultIndexing.maxFileSize,
|
|
726
|
+
maxChunksPerFile: typeof rawIndexing.maxChunksPerFile === "number" ? Math.max(1, rawIndexing.maxChunksPerFile) : defaultIndexing.maxChunksPerFile,
|
|
727
|
+
semanticOnly: typeof rawIndexing.semanticOnly === "boolean" ? rawIndexing.semanticOnly : defaultIndexing.semanticOnly,
|
|
724
728
|
retries: typeof rawIndexing.retries === "number" ? rawIndexing.retries : defaultIndexing.retries,
|
|
725
729
|
retryDelayMs: typeof rawIndexing.retryDelayMs === "number" ? rawIndexing.retryDelayMs : defaultIndexing.retryDelayMs
|
|
726
730
|
};
|
|
@@ -802,7 +806,7 @@ function getDefaultModelForProvider(provider) {
|
|
|
802
806
|
}
|
|
803
807
|
|
|
804
808
|
// src/indexer/index.ts
|
|
805
|
-
import { existsSync as existsSync4, readFileSync as readFileSync4, writeFileSync
|
|
809
|
+
import { existsSync as existsSync4, readFileSync as readFileSync4, writeFileSync, promises as fsPromises2 } from "fs";
|
|
806
810
|
import * as path5 from "path";
|
|
807
811
|
|
|
808
812
|
// node_modules/eventemitter3/index.mjs
|
|
@@ -2184,7 +2188,10 @@ function shouldIncludeFile(filePath, projectRoot, includePatterns, excludePatter
|
|
|
2184
2188
|
return false;
|
|
2185
2189
|
}
|
|
2186
2190
|
function matchGlob(filePath, pattern) {
|
|
2187
|
-
|
|
2191
|
+
let regexPattern = pattern.replace(/\*\*/g, "<<<DOUBLESTAR>>>").replace(/\*/g, "[^/]*").replace(/<<<DOUBLESTAR>>>/g, ".*").replace(/\?/g, ".").replace(/\{([^}]+)\}/g, (_, p1) => `(${p1.split(",").join("|")})`);
|
|
2192
|
+
if (regexPattern.startsWith(".*/")) {
|
|
2193
|
+
regexPattern = `(.*\\/)?${regexPattern.slice(3)}`;
|
|
2194
|
+
}
|
|
2188
2195
|
const regex = new RegExp(`^${regexPattern}$`);
|
|
2189
2196
|
return regex.test(filePath);
|
|
2190
2197
|
}
|
|
@@ -2620,158 +2627,204 @@ function generateChunkId(filePath, chunk) {
|
|
|
2620
2627
|
function generateChunkHash(chunk) {
|
|
2621
2628
|
return hashContent(chunk.content);
|
|
2622
2629
|
}
|
|
2623
|
-
|
|
2624
|
-
// src/indexer/inverted-index.ts
|
|
2625
|
-
import { existsSync as existsSync3, readFileSync as readFileSync3, writeFileSync } from "fs";
|
|
2626
|
-
import * as path4 from "path";
|
|
2627
2630
|
var InvertedIndex = class {
|
|
2628
|
-
|
|
2629
|
-
termToChunks = /* @__PURE__ */ new Map();
|
|
2630
|
-
chunkTokens = /* @__PURE__ */ new Map();
|
|
2631
|
-
totalTokenCount = 0;
|
|
2631
|
+
inner;
|
|
2632
2632
|
constructor(indexPath) {
|
|
2633
|
-
this.
|
|
2633
|
+
this.inner = new native.InvertedIndex(indexPath);
|
|
2634
2634
|
}
|
|
2635
2635
|
load() {
|
|
2636
|
-
|
|
2637
|
-
return;
|
|
2638
|
-
}
|
|
2639
|
-
try {
|
|
2640
|
-
const content = readFileSync3(this.indexPath, "utf-8");
|
|
2641
|
-
const data = JSON.parse(content);
|
|
2642
|
-
for (const [term, chunkIds] of Object.entries(data.termToChunks)) {
|
|
2643
|
-
this.termToChunks.set(term, new Set(chunkIds));
|
|
2644
|
-
}
|
|
2645
|
-
for (const [chunkId, tokens] of Object.entries(data.chunkTokens)) {
|
|
2646
|
-
const tokenMap = new Map(Object.entries(tokens).map(([k, v]) => [k, v]));
|
|
2647
|
-
this.chunkTokens.set(chunkId, tokenMap);
|
|
2648
|
-
for (const count of tokenMap.values()) {
|
|
2649
|
-
this.totalTokenCount += count;
|
|
2650
|
-
}
|
|
2651
|
-
}
|
|
2652
|
-
} catch {
|
|
2653
|
-
this.termToChunks.clear();
|
|
2654
|
-
this.chunkTokens.clear();
|
|
2655
|
-
this.totalTokenCount = 0;
|
|
2656
|
-
}
|
|
2636
|
+
this.inner.load();
|
|
2657
2637
|
}
|
|
2658
2638
|
save() {
|
|
2659
|
-
|
|
2660
|
-
termToChunks: {},
|
|
2661
|
-
chunkTokens: {},
|
|
2662
|
-
avgDocLength: this.getAvgDocLength()
|
|
2663
|
-
};
|
|
2664
|
-
for (const [term, chunkIds] of this.termToChunks) {
|
|
2665
|
-
data.termToChunks[term] = Array.from(chunkIds);
|
|
2666
|
-
}
|
|
2667
|
-
for (const [chunkId, tokens] of this.chunkTokens) {
|
|
2668
|
-
data.chunkTokens[chunkId] = Object.fromEntries(tokens);
|
|
2669
|
-
}
|
|
2670
|
-
writeFileSync(this.indexPath, JSON.stringify(data));
|
|
2639
|
+
this.inner.save();
|
|
2671
2640
|
}
|
|
2672
2641
|
addChunk(chunkId, content) {
|
|
2673
|
-
|
|
2674
|
-
const termFreq = /* @__PURE__ */ new Map();
|
|
2675
|
-
for (const token of tokens) {
|
|
2676
|
-
termFreq.set(token, (termFreq.get(token) || 0) + 1);
|
|
2677
|
-
const chunks = this.termToChunks.get(token) || /* @__PURE__ */ new Set();
|
|
2678
|
-
chunks.add(chunkId);
|
|
2679
|
-
this.termToChunks.set(token, chunks);
|
|
2680
|
-
}
|
|
2681
|
-
this.chunkTokens.set(chunkId, termFreq);
|
|
2682
|
-
this.totalTokenCount += tokens.length;
|
|
2642
|
+
this.inner.addChunk(chunkId, content);
|
|
2683
2643
|
}
|
|
2684
2644
|
removeChunk(chunkId) {
|
|
2685
|
-
|
|
2686
|
-
if (!tokens) return;
|
|
2687
|
-
for (const [token, count] of tokens) {
|
|
2688
|
-
this.totalTokenCount -= count;
|
|
2689
|
-
const chunks = this.termToChunks.get(token);
|
|
2690
|
-
if (chunks) {
|
|
2691
|
-
chunks.delete(chunkId);
|
|
2692
|
-
if (chunks.size === 0) {
|
|
2693
|
-
this.termToChunks.delete(token);
|
|
2694
|
-
}
|
|
2695
|
-
}
|
|
2696
|
-
}
|
|
2697
|
-
this.chunkTokens.delete(chunkId);
|
|
2645
|
+
return this.inner.removeChunk(chunkId);
|
|
2698
2646
|
}
|
|
2699
|
-
search(query) {
|
|
2700
|
-
const
|
|
2701
|
-
|
|
2702
|
-
|
|
2703
|
-
|
|
2704
|
-
const candidateChunks = /* @__PURE__ */ new Set();
|
|
2705
|
-
for (const token of queryTokens) {
|
|
2706
|
-
const chunks = this.termToChunks.get(token);
|
|
2707
|
-
if (chunks) {
|
|
2708
|
-
for (const chunkId of chunks) {
|
|
2709
|
-
candidateChunks.add(chunkId);
|
|
2710
|
-
}
|
|
2711
|
-
}
|
|
2712
|
-
}
|
|
2713
|
-
const scores = /* @__PURE__ */ new Map();
|
|
2714
|
-
const k1 = 1.2;
|
|
2715
|
-
const b = 0.75;
|
|
2716
|
-
const N = this.chunkTokens.size;
|
|
2717
|
-
const avgDocLength = this.getAvgDocLength();
|
|
2718
|
-
for (const chunkId of candidateChunks) {
|
|
2719
|
-
const termFreq = this.chunkTokens.get(chunkId);
|
|
2720
|
-
if (!termFreq) continue;
|
|
2721
|
-
const docLength = Array.from(termFreq.values()).reduce((a, b2) => a + b2, 0);
|
|
2722
|
-
let score = 0;
|
|
2723
|
-
for (const term of queryTokens) {
|
|
2724
|
-
const tf = termFreq.get(term) || 0;
|
|
2725
|
-
if (tf === 0) continue;
|
|
2726
|
-
const df = this.termToChunks.get(term)?.size || 0;
|
|
2727
|
-
const idf = Math.log((N - df + 0.5) / (df + 0.5) + 1);
|
|
2728
|
-
const tfNorm = tf * (k1 + 1) / (tf + k1 * (1 - b + b * (docLength / avgDocLength)));
|
|
2729
|
-
score += idf * tfNorm;
|
|
2730
|
-
}
|
|
2731
|
-
scores.set(chunkId, score);
|
|
2732
|
-
}
|
|
2733
|
-
const maxScore = Math.max(...scores.values(), 1);
|
|
2734
|
-
for (const [chunkId, score] of scores) {
|
|
2735
|
-
scores.set(chunkId, score / maxScore);
|
|
2647
|
+
search(query, limit) {
|
|
2648
|
+
const results = this.inner.search(query, limit ?? 100);
|
|
2649
|
+
const map = /* @__PURE__ */ new Map();
|
|
2650
|
+
for (const r of results) {
|
|
2651
|
+
map.set(r.chunkId, r.score);
|
|
2736
2652
|
}
|
|
2737
|
-
return
|
|
2653
|
+
return map;
|
|
2738
2654
|
}
|
|
2739
2655
|
hasChunk(chunkId) {
|
|
2740
|
-
return this.
|
|
2656
|
+
return this.inner.hasChunk(chunkId);
|
|
2741
2657
|
}
|
|
2742
2658
|
clear() {
|
|
2743
|
-
this.
|
|
2744
|
-
this.chunkTokens.clear();
|
|
2745
|
-
this.totalTokenCount = 0;
|
|
2659
|
+
this.inner.clear();
|
|
2746
2660
|
}
|
|
2747
2661
|
getDocumentCount() {
|
|
2748
|
-
return this.
|
|
2662
|
+
return this.inner.documentCount();
|
|
2663
|
+
}
|
|
2664
|
+
};
|
|
2665
|
+
var Database = class {
|
|
2666
|
+
inner;
|
|
2667
|
+
constructor(dbPath) {
|
|
2668
|
+
this.inner = new native.Database(dbPath);
|
|
2669
|
+
}
|
|
2670
|
+
embeddingExists(contentHash) {
|
|
2671
|
+
return this.inner.embeddingExists(contentHash);
|
|
2672
|
+
}
|
|
2673
|
+
getEmbedding(contentHash) {
|
|
2674
|
+
return this.inner.getEmbedding(contentHash) ?? null;
|
|
2675
|
+
}
|
|
2676
|
+
upsertEmbedding(contentHash, embedding, chunkText, model) {
|
|
2677
|
+
this.inner.upsertEmbedding(contentHash, embedding, chunkText, model);
|
|
2678
|
+
}
|
|
2679
|
+
getMissingEmbeddings(contentHashes) {
|
|
2680
|
+
return this.inner.getMissingEmbeddings(contentHashes);
|
|
2681
|
+
}
|
|
2682
|
+
upsertChunk(chunk) {
|
|
2683
|
+
this.inner.upsertChunk(chunk);
|
|
2684
|
+
}
|
|
2685
|
+
getChunk(chunkId) {
|
|
2686
|
+
return this.inner.getChunk(chunkId) ?? null;
|
|
2749
2687
|
}
|
|
2750
|
-
|
|
2751
|
-
|
|
2752
|
-
return count > 0 ? this.totalTokenCount / count : 100;
|
|
2688
|
+
getChunksByFile(filePath) {
|
|
2689
|
+
return this.inner.getChunksByFile(filePath);
|
|
2753
2690
|
}
|
|
2754
|
-
|
|
2755
|
-
return
|
|
2691
|
+
deleteChunksByFile(filePath) {
|
|
2692
|
+
return this.inner.deleteChunksByFile(filePath);
|
|
2693
|
+
}
|
|
2694
|
+
addChunksToBranch(branch, chunkIds) {
|
|
2695
|
+
this.inner.addChunksToBranch(branch, chunkIds);
|
|
2696
|
+
}
|
|
2697
|
+
clearBranch(branch) {
|
|
2698
|
+
return this.inner.clearBranch(branch);
|
|
2699
|
+
}
|
|
2700
|
+
getBranchChunkIds(branch) {
|
|
2701
|
+
return this.inner.getBranchChunkIds(branch);
|
|
2702
|
+
}
|
|
2703
|
+
getBranchDelta(branch, baseBranch) {
|
|
2704
|
+
return this.inner.getBranchDelta(branch, baseBranch);
|
|
2705
|
+
}
|
|
2706
|
+
chunkExistsOnBranch(branch, chunkId) {
|
|
2707
|
+
return this.inner.chunkExistsOnBranch(branch, chunkId);
|
|
2708
|
+
}
|
|
2709
|
+
getAllBranches() {
|
|
2710
|
+
return this.inner.getAllBranches();
|
|
2711
|
+
}
|
|
2712
|
+
getMetadata(key) {
|
|
2713
|
+
return this.inner.getMetadata(key) ?? null;
|
|
2714
|
+
}
|
|
2715
|
+
setMetadata(key, value) {
|
|
2716
|
+
this.inner.setMetadata(key, value);
|
|
2717
|
+
}
|
|
2718
|
+
deleteMetadata(key) {
|
|
2719
|
+
return this.inner.deleteMetadata(key);
|
|
2720
|
+
}
|
|
2721
|
+
gcOrphanEmbeddings() {
|
|
2722
|
+
return this.inner.gcOrphanEmbeddings();
|
|
2723
|
+
}
|
|
2724
|
+
gcOrphanChunks() {
|
|
2725
|
+
return this.inner.gcOrphanChunks();
|
|
2726
|
+
}
|
|
2727
|
+
getStats() {
|
|
2728
|
+
return this.inner.getStats();
|
|
2756
2729
|
}
|
|
2757
2730
|
};
|
|
2758
2731
|
|
|
2732
|
+
// src/git/index.ts
|
|
2733
|
+
import { existsSync as existsSync3, readFileSync as readFileSync3, readdirSync, statSync } from "fs";
|
|
2734
|
+
import * as path4 from "path";
|
|
2735
|
+
import { execSync } from "child_process";
|
|
2736
|
+
function isGitRepo(dir) {
|
|
2737
|
+
return existsSync3(path4.join(dir, ".git"));
|
|
2738
|
+
}
|
|
2739
|
+
function getCurrentBranch(repoRoot) {
|
|
2740
|
+
const headPath = path4.join(repoRoot, ".git", "HEAD");
|
|
2741
|
+
if (!existsSync3(headPath)) {
|
|
2742
|
+
return null;
|
|
2743
|
+
}
|
|
2744
|
+
try {
|
|
2745
|
+
const headContent = readFileSync3(headPath, "utf-8").trim();
|
|
2746
|
+
const match = headContent.match(/^ref: refs\/heads\/(.+)$/);
|
|
2747
|
+
if (match) {
|
|
2748
|
+
return match[1];
|
|
2749
|
+
}
|
|
2750
|
+
if (/^[0-9a-f]{40}$/i.test(headContent)) {
|
|
2751
|
+
return headContent.slice(0, 7);
|
|
2752
|
+
}
|
|
2753
|
+
return null;
|
|
2754
|
+
} catch {
|
|
2755
|
+
return null;
|
|
2756
|
+
}
|
|
2757
|
+
}
|
|
2758
|
+
function getBaseBranch(repoRoot) {
|
|
2759
|
+
const candidates = ["main", "master", "develop", "trunk"];
|
|
2760
|
+
for (const candidate of candidates) {
|
|
2761
|
+
const refPath = path4.join(repoRoot, ".git", "refs", "heads", candidate);
|
|
2762
|
+
if (existsSync3(refPath)) {
|
|
2763
|
+
return candidate;
|
|
2764
|
+
}
|
|
2765
|
+
const packedRefsPath = path4.join(repoRoot, ".git", "packed-refs");
|
|
2766
|
+
if (existsSync3(packedRefsPath)) {
|
|
2767
|
+
try {
|
|
2768
|
+
const content = readFileSync3(packedRefsPath, "utf-8");
|
|
2769
|
+
if (content.includes(`refs/heads/${candidate}`)) {
|
|
2770
|
+
return candidate;
|
|
2771
|
+
}
|
|
2772
|
+
} catch {
|
|
2773
|
+
}
|
|
2774
|
+
}
|
|
2775
|
+
}
|
|
2776
|
+
try {
|
|
2777
|
+
const result = execSync("git remote show origin", {
|
|
2778
|
+
cwd: repoRoot,
|
|
2779
|
+
encoding: "utf-8",
|
|
2780
|
+
stdio: ["pipe", "pipe", "pipe"]
|
|
2781
|
+
});
|
|
2782
|
+
const match = result.match(/HEAD branch: (.+)/);
|
|
2783
|
+
if (match) {
|
|
2784
|
+
return match[1].trim();
|
|
2785
|
+
}
|
|
2786
|
+
} catch {
|
|
2787
|
+
}
|
|
2788
|
+
return getCurrentBranch(repoRoot) ?? "main";
|
|
2789
|
+
}
|
|
2790
|
+
function getBranchOrDefault(repoRoot) {
|
|
2791
|
+
if (!isGitRepo(repoRoot)) {
|
|
2792
|
+
return "default";
|
|
2793
|
+
}
|
|
2794
|
+
return getCurrentBranch(repoRoot) ?? "default";
|
|
2795
|
+
}
|
|
2796
|
+
function getHeadPath(repoRoot) {
|
|
2797
|
+
return path4.join(repoRoot, ".git", "HEAD");
|
|
2798
|
+
}
|
|
2799
|
+
|
|
2759
2800
|
// src/indexer/index.ts
|
|
2801
|
+
function float32ArrayToBuffer(arr) {
|
|
2802
|
+
const float32 = new Float32Array(arr);
|
|
2803
|
+
return Buffer.from(float32.buffer);
|
|
2804
|
+
}
|
|
2805
|
+
function bufferToFloat32Array(buf) {
|
|
2806
|
+
return new Float32Array(buf.buffer, buf.byteOffset, buf.byteLength / 4);
|
|
2807
|
+
}
|
|
2760
2808
|
var Indexer = class {
|
|
2761
2809
|
config;
|
|
2762
2810
|
projectRoot;
|
|
2763
2811
|
indexPath;
|
|
2764
2812
|
store = null;
|
|
2765
2813
|
invertedIndex = null;
|
|
2814
|
+
database = null;
|
|
2766
2815
|
provider = null;
|
|
2767
2816
|
detectedProvider = null;
|
|
2768
2817
|
fileHashCache = /* @__PURE__ */ new Map();
|
|
2769
2818
|
fileHashCachePath = "";
|
|
2819
|
+
failedBatchesPath = "";
|
|
2820
|
+
currentBranch = "default";
|
|
2821
|
+
baseBranch = "main";
|
|
2770
2822
|
constructor(projectRoot, config) {
|
|
2771
2823
|
this.projectRoot = projectRoot;
|
|
2772
2824
|
this.config = config;
|
|
2773
2825
|
this.indexPath = this.getIndexPath();
|
|
2774
2826
|
this.fileHashCachePath = path5.join(this.indexPath, "file-hashes.json");
|
|
2827
|
+
this.failedBatchesPath = path5.join(this.indexPath, "failed-batches.json");
|
|
2775
2828
|
}
|
|
2776
2829
|
getIndexPath() {
|
|
2777
2830
|
if (this.config.scope === "global") {
|
|
@@ -2796,7 +2849,38 @@ var Indexer = class {
|
|
|
2796
2849
|
for (const [k, v] of this.fileHashCache) {
|
|
2797
2850
|
obj[k] = v;
|
|
2798
2851
|
}
|
|
2799
|
-
|
|
2852
|
+
writeFileSync(this.fileHashCachePath, JSON.stringify(obj));
|
|
2853
|
+
}
|
|
2854
|
+
loadFailedBatches() {
|
|
2855
|
+
try {
|
|
2856
|
+
if (existsSync4(this.failedBatchesPath)) {
|
|
2857
|
+
const data = readFileSync4(this.failedBatchesPath, "utf-8");
|
|
2858
|
+
return JSON.parse(data);
|
|
2859
|
+
}
|
|
2860
|
+
} catch {
|
|
2861
|
+
return [];
|
|
2862
|
+
}
|
|
2863
|
+
return [];
|
|
2864
|
+
}
|
|
2865
|
+
saveFailedBatches(batches) {
|
|
2866
|
+
if (batches.length === 0) {
|
|
2867
|
+
if (existsSync4(this.failedBatchesPath)) {
|
|
2868
|
+
fsPromises2.unlink(this.failedBatchesPath).catch(() => {
|
|
2869
|
+
});
|
|
2870
|
+
}
|
|
2871
|
+
return;
|
|
2872
|
+
}
|
|
2873
|
+
writeFileSync(this.failedBatchesPath, JSON.stringify(batches, null, 2));
|
|
2874
|
+
}
|
|
2875
|
+
addFailedBatch(batch, error) {
|
|
2876
|
+
const existing = this.loadFailedBatches();
|
|
2877
|
+
existing.push({
|
|
2878
|
+
chunks: batch,
|
|
2879
|
+
error,
|
|
2880
|
+
attemptCount: 1,
|
|
2881
|
+
lastAttempt: (/* @__PURE__ */ new Date()).toISOString()
|
|
2882
|
+
});
|
|
2883
|
+
this.saveFailedBatches(existing);
|
|
2800
2884
|
}
|
|
2801
2885
|
async initialize() {
|
|
2802
2886
|
this.detectedProvider = await detectEmbeddingProvider(this.config.embeddingProvider);
|
|
@@ -2817,18 +2901,60 @@ var Indexer = class {
|
|
|
2817
2901
|
if (existsSync4(indexFilePath)) {
|
|
2818
2902
|
this.store.load();
|
|
2819
2903
|
}
|
|
2820
|
-
|
|
2821
|
-
this.invertedIndex
|
|
2904
|
+
const invertedIndexPath = path5.join(this.indexPath, "inverted-index.json");
|
|
2905
|
+
this.invertedIndex = new InvertedIndex(invertedIndexPath);
|
|
2906
|
+
try {
|
|
2907
|
+
this.invertedIndex.load();
|
|
2908
|
+
} catch {
|
|
2909
|
+
if (existsSync4(invertedIndexPath)) {
|
|
2910
|
+
await fsPromises2.unlink(invertedIndexPath);
|
|
2911
|
+
}
|
|
2912
|
+
this.invertedIndex = new InvertedIndex(invertedIndexPath);
|
|
2913
|
+
}
|
|
2914
|
+
const dbPath = path5.join(this.indexPath, "codebase.db");
|
|
2915
|
+
const dbIsNew = !existsSync4(dbPath);
|
|
2916
|
+
this.database = new Database(dbPath);
|
|
2917
|
+
if (dbIsNew && this.store.count() > 0) {
|
|
2918
|
+
this.migrateFromLegacyIndex();
|
|
2919
|
+
}
|
|
2920
|
+
if (isGitRepo(this.projectRoot)) {
|
|
2921
|
+
this.currentBranch = getBranchOrDefault(this.projectRoot);
|
|
2922
|
+
this.baseBranch = getBaseBranch(this.projectRoot);
|
|
2923
|
+
} else {
|
|
2924
|
+
this.currentBranch = "default";
|
|
2925
|
+
this.baseBranch = "default";
|
|
2926
|
+
}
|
|
2927
|
+
}
|
|
2928
|
+
migrateFromLegacyIndex() {
|
|
2929
|
+
if (!this.store || !this.database) return;
|
|
2930
|
+
const allMetadata = this.store.getAllMetadata();
|
|
2931
|
+
const chunkIds = [];
|
|
2932
|
+
for (const { key, metadata } of allMetadata) {
|
|
2933
|
+
const chunkData = {
|
|
2934
|
+
chunkId: key,
|
|
2935
|
+
contentHash: metadata.hash,
|
|
2936
|
+
filePath: metadata.filePath,
|
|
2937
|
+
startLine: metadata.startLine,
|
|
2938
|
+
endLine: metadata.endLine,
|
|
2939
|
+
nodeType: metadata.chunkType,
|
|
2940
|
+
name: metadata.name,
|
|
2941
|
+
language: metadata.language
|
|
2942
|
+
};
|
|
2943
|
+
this.database.upsertChunk(chunkData);
|
|
2944
|
+
chunkIds.push(key);
|
|
2945
|
+
}
|
|
2946
|
+
this.database.addChunksToBranch(this.currentBranch || "default", chunkIds);
|
|
2822
2947
|
}
|
|
2823
2948
|
async ensureInitialized() {
|
|
2824
|
-
if (!this.store || !this.provider || !this.invertedIndex || !this.detectedProvider) {
|
|
2949
|
+
if (!this.store || !this.provider || !this.invertedIndex || !this.detectedProvider || !this.database) {
|
|
2825
2950
|
await this.initialize();
|
|
2826
2951
|
}
|
|
2827
2952
|
return {
|
|
2828
2953
|
store: this.store,
|
|
2829
2954
|
provider: this.provider,
|
|
2830
2955
|
invertedIndex: this.invertedIndex,
|
|
2831
|
-
detectedProvider: this.detectedProvider
|
|
2956
|
+
detectedProvider: this.detectedProvider,
|
|
2957
|
+
database: this.database
|
|
2832
2958
|
};
|
|
2833
2959
|
}
|
|
2834
2960
|
async estimateCost() {
|
|
@@ -2842,7 +2968,7 @@ var Indexer = class {
|
|
|
2842
2968
|
return createCostEstimate(files, detectedProvider);
|
|
2843
2969
|
}
|
|
2844
2970
|
async index(onProgress) {
|
|
2845
|
-
const { store, provider, invertedIndex } = await this.ensureInitialized();
|
|
2971
|
+
const { store, provider, invertedIndex, database, detectedProvider } = await this.ensureInitialized();
|
|
2846
2972
|
const startTime = Date.now();
|
|
2847
2973
|
const stats = {
|
|
2848
2974
|
totalFiles: 0,
|
|
@@ -2919,11 +3045,30 @@ var Indexer = class {
|
|
|
2919
3045
|
const relativePath = path5.relative(this.projectRoot, parsed.path);
|
|
2920
3046
|
stats.parseFailures.push(relativePath);
|
|
2921
3047
|
}
|
|
3048
|
+
let fileChunkCount = 0;
|
|
2922
3049
|
for (const chunk of parsed.chunks) {
|
|
3050
|
+
if (fileChunkCount >= this.config.indexing.maxChunksPerFile) {
|
|
3051
|
+
break;
|
|
3052
|
+
}
|
|
3053
|
+
if (this.config.indexing.semanticOnly && chunk.chunkType === "other") {
|
|
3054
|
+
continue;
|
|
3055
|
+
}
|
|
2923
3056
|
const id = generateChunkId(parsed.path, chunk);
|
|
2924
3057
|
const contentHash = generateChunkHash(chunk);
|
|
2925
3058
|
currentChunkIds.add(id);
|
|
3059
|
+
const chunkData = {
|
|
3060
|
+
chunkId: id,
|
|
3061
|
+
contentHash,
|
|
3062
|
+
filePath: parsed.path,
|
|
3063
|
+
startLine: chunk.startLine,
|
|
3064
|
+
endLine: chunk.endLine,
|
|
3065
|
+
nodeType: chunk.chunkType,
|
|
3066
|
+
name: chunk.name,
|
|
3067
|
+
language: chunk.language
|
|
3068
|
+
};
|
|
3069
|
+
database.upsertChunk(chunkData);
|
|
2926
3070
|
if (existingChunks.get(id) === contentHash) {
|
|
3071
|
+
fileChunkCount++;
|
|
2927
3072
|
continue;
|
|
2928
3073
|
}
|
|
2929
3074
|
const text = createEmbeddingText(chunk, parsed.path);
|
|
@@ -2936,7 +3081,8 @@ var Indexer = class {
|
|
|
2936
3081
|
language: chunk.language,
|
|
2937
3082
|
hash: contentHash
|
|
2938
3083
|
};
|
|
2939
|
-
pendingChunks.push({ id, text, content: chunk.content, metadata });
|
|
3084
|
+
pendingChunks.push({ id, text, content: chunk.content, contentHash, metadata });
|
|
3085
|
+
fileChunkCount++;
|
|
2940
3086
|
}
|
|
2941
3087
|
}
|
|
2942
3088
|
let removedCount = 0;
|
|
@@ -2951,6 +3097,8 @@ var Indexer = class {
|
|
|
2951
3097
|
stats.existingChunks = currentChunkIds.size - pendingChunks.length;
|
|
2952
3098
|
stats.removedChunks = removedCount;
|
|
2953
3099
|
if (pendingChunks.length === 0 && removedCount === 0) {
|
|
3100
|
+
database.clearBranch(this.currentBranch);
|
|
3101
|
+
database.addChunksToBranch(this.currentBranch, Array.from(currentChunkIds));
|
|
2954
3102
|
this.fileHashCache = currentFileHashes;
|
|
2955
3103
|
this.saveFileHashCache();
|
|
2956
3104
|
stats.durationMs = Date.now() - startTime;
|
|
@@ -2964,6 +3112,8 @@ var Indexer = class {
|
|
|
2964
3112
|
return stats;
|
|
2965
3113
|
}
|
|
2966
3114
|
if (pendingChunks.length === 0) {
|
|
3115
|
+
database.clearBranch(this.currentBranch);
|
|
3116
|
+
database.addChunksToBranch(this.currentBranch, Array.from(currentChunkIds));
|
|
2967
3117
|
store.save();
|
|
2968
3118
|
invertedIndex.save();
|
|
2969
3119
|
this.fileHashCache = currentFileHashes;
|
|
@@ -2985,8 +3135,22 @@ var Indexer = class {
|
|
|
2985
3135
|
chunksProcessed: 0,
|
|
2986
3136
|
totalChunks: pendingChunks.length
|
|
2987
3137
|
});
|
|
3138
|
+
const allContentHashes = pendingChunks.map((c) => c.contentHash);
|
|
3139
|
+
const missingHashes = new Set(database.getMissingEmbeddings(allContentHashes));
|
|
3140
|
+
const chunksNeedingEmbedding = pendingChunks.filter((c) => missingHashes.has(c.contentHash));
|
|
3141
|
+
const chunksWithExistingEmbedding = pendingChunks.filter((c) => !missingHashes.has(c.contentHash));
|
|
3142
|
+
for (const chunk of chunksWithExistingEmbedding) {
|
|
3143
|
+
const embeddingBuffer = database.getEmbedding(chunk.contentHash);
|
|
3144
|
+
if (embeddingBuffer) {
|
|
3145
|
+
const vector = bufferToFloat32Array(embeddingBuffer);
|
|
3146
|
+
store.add(chunk.id, Array.from(vector), chunk.metadata);
|
|
3147
|
+
invertedIndex.removeChunk(chunk.id);
|
|
3148
|
+
invertedIndex.addChunk(chunk.id, chunk.content);
|
|
3149
|
+
stats.indexedChunks++;
|
|
3150
|
+
}
|
|
3151
|
+
}
|
|
2988
3152
|
const queue = new PQueue({ concurrency: 3 });
|
|
2989
|
-
const dynamicBatches = createDynamicBatches(
|
|
3153
|
+
const dynamicBatches = createDynamicBatches(chunksNeedingEmbedding);
|
|
2990
3154
|
for (const batch of dynamicBatches) {
|
|
2991
3155
|
queue.add(async () => {
|
|
2992
3156
|
try {
|
|
@@ -3011,7 +3175,15 @@ var Indexer = class {
|
|
|
3011
3175
|
metadata: chunk.metadata
|
|
3012
3176
|
}));
|
|
3013
3177
|
store.addBatch(items);
|
|
3014
|
-
for (
|
|
3178
|
+
for (let i = 0; i < batch.length; i++) {
|
|
3179
|
+
const chunk = batch[i];
|
|
3180
|
+
const embedding = result.embeddings[i];
|
|
3181
|
+
database.upsertEmbedding(
|
|
3182
|
+
chunk.contentHash,
|
|
3183
|
+
float32ArrayToBuffer(embedding),
|
|
3184
|
+
chunk.text,
|
|
3185
|
+
detectedProvider.modelInfo.model
|
|
3186
|
+
);
|
|
3015
3187
|
invertedIndex.removeChunk(chunk.id);
|
|
3016
3188
|
invertedIndex.addChunk(chunk.id, chunk.content);
|
|
3017
3189
|
}
|
|
@@ -3026,6 +3198,7 @@ var Indexer = class {
|
|
|
3026
3198
|
});
|
|
3027
3199
|
} catch (error) {
|
|
3028
3200
|
stats.failedChunks += batch.length;
|
|
3201
|
+
this.addFailedBatch(batch, String(error));
|
|
3029
3202
|
console.error(`Failed to embed batch after retries: ${error}`);
|
|
3030
3203
|
}
|
|
3031
3204
|
});
|
|
@@ -3038,11 +3211,16 @@ var Indexer = class {
|
|
|
3038
3211
|
chunksProcessed: stats.indexedChunks,
|
|
3039
3212
|
totalChunks: pendingChunks.length
|
|
3040
3213
|
});
|
|
3214
|
+
database.clearBranch(this.currentBranch);
|
|
3215
|
+
database.addChunksToBranch(this.currentBranch, Array.from(currentChunkIds));
|
|
3041
3216
|
store.save();
|
|
3042
3217
|
invertedIndex.save();
|
|
3043
3218
|
this.fileHashCache = currentFileHashes;
|
|
3044
3219
|
this.saveFileHashCache();
|
|
3045
3220
|
stats.durationMs = Date.now() - startTime;
|
|
3221
|
+
if (stats.failedChunks > 0) {
|
|
3222
|
+
stats.failedBatchesPath = this.failedBatchesPath;
|
|
3223
|
+
}
|
|
3046
3224
|
onProgress?.({
|
|
3047
3225
|
phase: "complete",
|
|
3048
3226
|
filesProcessed: files.length,
|
|
@@ -3053,18 +3231,24 @@ var Indexer = class {
|
|
|
3053
3231
|
return stats;
|
|
3054
3232
|
}
|
|
3055
3233
|
async search(query, limit, options) {
|
|
3056
|
-
const { store, provider } = await this.ensureInitialized();
|
|
3234
|
+
const { store, provider, database } = await this.ensureInitialized();
|
|
3057
3235
|
if (store.count() === 0) {
|
|
3058
3236
|
return [];
|
|
3059
3237
|
}
|
|
3060
3238
|
const maxResults = limit ?? this.config.search.maxResults;
|
|
3061
3239
|
const hybridWeight = options?.hybridWeight ?? this.config.search.hybridWeight;
|
|
3240
|
+
const filterByBranch = options?.filterByBranch ?? true;
|
|
3062
3241
|
const { embedding } = await provider.embed(query);
|
|
3063
3242
|
const semanticResults = store.search(embedding, maxResults * 4);
|
|
3064
3243
|
const keywordResults = await this.keywordSearch(query, maxResults * 4);
|
|
3065
3244
|
const combined = this.fuseResults(semanticResults, keywordResults, hybridWeight, maxResults * 4);
|
|
3245
|
+
let branchChunkIds = null;
|
|
3246
|
+
if (filterByBranch && this.currentBranch !== "default") {
|
|
3247
|
+
branchChunkIds = new Set(database.getBranchChunkIds(this.currentBranch));
|
|
3248
|
+
}
|
|
3066
3249
|
const filtered = combined.filter((r) => {
|
|
3067
3250
|
if (r.score < this.config.search.minScore) return false;
|
|
3251
|
+
if (branchChunkIds && !branchChunkIds.has(r.id)) return false;
|
|
3068
3252
|
if (options?.fileType) {
|
|
3069
3253
|
const ext = r.metadata.filePath.split(".").pop()?.toLowerCase();
|
|
3070
3254
|
if (ext !== options.fileType.toLowerCase().replace(/^\./, "")) return false;
|
|
@@ -3166,7 +3350,9 @@ var Indexer = class {
|
|
|
3166
3350
|
vectorCount: store.count(),
|
|
3167
3351
|
provider: detectedProvider.provider,
|
|
3168
3352
|
model: detectedProvider.modelInfo.model,
|
|
3169
|
-
indexPath: this.indexPath
|
|
3353
|
+
indexPath: this.indexPath,
|
|
3354
|
+
currentBranch: this.currentBranch,
|
|
3355
|
+
baseBranch: this.baseBranch
|
|
3170
3356
|
};
|
|
3171
3357
|
}
|
|
3172
3358
|
async clearIndex() {
|
|
@@ -3177,7 +3363,7 @@ var Indexer = class {
|
|
|
3177
3363
|
invertedIndex.save();
|
|
3178
3364
|
}
|
|
3179
3365
|
async healthCheck() {
|
|
3180
|
-
const { store, invertedIndex } = await this.ensureInitialized();
|
|
3366
|
+
const { store, invertedIndex, database } = await this.ensureInitialized();
|
|
3181
3367
|
const allMetadata = store.getAllMetadata();
|
|
3182
3368
|
const filePathsToChunkKeys = /* @__PURE__ */ new Map();
|
|
3183
3369
|
for (const { key, metadata } of allMetadata) {
|
|
@@ -3194,6 +3380,7 @@ var Indexer = class {
|
|
|
3194
3380
|
invertedIndex.removeChunk(key);
|
|
3195
3381
|
removedCount++;
|
|
3196
3382
|
}
|
|
3383
|
+
database.deleteChunksByFile(filePath);
|
|
3197
3384
|
removedFilePaths.push(filePath);
|
|
3198
3385
|
}
|
|
3199
3386
|
}
|
|
@@ -3201,7 +3388,77 @@ var Indexer = class {
|
|
|
3201
3388
|
store.save();
|
|
3202
3389
|
invertedIndex.save();
|
|
3203
3390
|
}
|
|
3204
|
-
|
|
3391
|
+
const gcOrphanEmbeddings = database.gcOrphanEmbeddings();
|
|
3392
|
+
const gcOrphanChunks = database.gcOrphanChunks();
|
|
3393
|
+
return { removed: removedCount, filePaths: removedFilePaths, gcOrphanEmbeddings, gcOrphanChunks };
|
|
3394
|
+
}
|
|
3395
|
+
async retryFailedBatches() {
|
|
3396
|
+
const { store, provider, invertedIndex } = await this.ensureInitialized();
|
|
3397
|
+
const failedBatches = this.loadFailedBatches();
|
|
3398
|
+
if (failedBatches.length === 0) {
|
|
3399
|
+
return { succeeded: 0, failed: 0, remaining: 0 };
|
|
3400
|
+
}
|
|
3401
|
+
let succeeded = 0;
|
|
3402
|
+
let failed = 0;
|
|
3403
|
+
const stillFailing = [];
|
|
3404
|
+
for (const batch of failedBatches) {
|
|
3405
|
+
try {
|
|
3406
|
+
const result = await pRetry(
|
|
3407
|
+
async () => {
|
|
3408
|
+
const texts = batch.chunks.map((c) => c.text);
|
|
3409
|
+
return provider.embedBatch(texts);
|
|
3410
|
+
},
|
|
3411
|
+
{
|
|
3412
|
+
retries: this.config.indexing.retries,
|
|
3413
|
+
minTimeout: this.config.indexing.retryDelayMs
|
|
3414
|
+
}
|
|
3415
|
+
);
|
|
3416
|
+
const items = batch.chunks.map((chunk, idx) => ({
|
|
3417
|
+
id: chunk.id,
|
|
3418
|
+
vector: result.embeddings[idx],
|
|
3419
|
+
metadata: chunk.metadata
|
|
3420
|
+
}));
|
|
3421
|
+
store.addBatch(items);
|
|
3422
|
+
for (const chunk of batch.chunks) {
|
|
3423
|
+
invertedIndex.removeChunk(chunk.id);
|
|
3424
|
+
invertedIndex.addChunk(chunk.id, chunk.content);
|
|
3425
|
+
}
|
|
3426
|
+
succeeded += batch.chunks.length;
|
|
3427
|
+
} catch (error) {
|
|
3428
|
+
failed += batch.chunks.length;
|
|
3429
|
+
stillFailing.push({
|
|
3430
|
+
...batch,
|
|
3431
|
+
attemptCount: batch.attemptCount + 1,
|
|
3432
|
+
lastAttempt: (/* @__PURE__ */ new Date()).toISOString(),
|
|
3433
|
+
error: String(error)
|
|
3434
|
+
});
|
|
3435
|
+
}
|
|
3436
|
+
}
|
|
3437
|
+
this.saveFailedBatches(stillFailing);
|
|
3438
|
+
if (succeeded > 0) {
|
|
3439
|
+
store.save();
|
|
3440
|
+
invertedIndex.save();
|
|
3441
|
+
}
|
|
3442
|
+
return { succeeded, failed, remaining: stillFailing.length };
|
|
3443
|
+
}
|
|
3444
|
+
getFailedBatchesCount() {
|
|
3445
|
+
return this.loadFailedBatches().length;
|
|
3446
|
+
}
|
|
3447
|
+
getCurrentBranch() {
|
|
3448
|
+
return this.currentBranch;
|
|
3449
|
+
}
|
|
3450
|
+
getBaseBranch() {
|
|
3451
|
+
return this.baseBranch;
|
|
3452
|
+
}
|
|
3453
|
+
refreshBranchInfo() {
|
|
3454
|
+
if (isGitRepo(this.projectRoot)) {
|
|
3455
|
+
this.currentBranch = getBranchOrDefault(this.projectRoot);
|
|
3456
|
+
this.baseBranch = getBaseBranch(this.projectRoot);
|
|
3457
|
+
}
|
|
3458
|
+
}
|
|
3459
|
+
async getDatabaseStats() {
|
|
3460
|
+
const { database } = await this.ensureInitialized();
|
|
3461
|
+
return database.getStats();
|
|
3205
3462
|
}
|
|
3206
3463
|
};
|
|
3207
3464
|
|
|
@@ -5028,9 +5285,82 @@ var FileWatcher = class {
|
|
|
5028
5285
|
return this.watcher !== null;
|
|
5029
5286
|
}
|
|
5030
5287
|
};
|
|
5288
|
+
var GitHeadWatcher = class {
|
|
5289
|
+
watcher = null;
|
|
5290
|
+
projectRoot;
|
|
5291
|
+
currentBranch = null;
|
|
5292
|
+
onBranchChange = null;
|
|
5293
|
+
debounceTimer = null;
|
|
5294
|
+
debounceMs = 100;
|
|
5295
|
+
// Short debounce for git operations
|
|
5296
|
+
constructor(projectRoot) {
|
|
5297
|
+
this.projectRoot = projectRoot;
|
|
5298
|
+
}
|
|
5299
|
+
start(handler) {
|
|
5300
|
+
if (this.watcher) {
|
|
5301
|
+
return;
|
|
5302
|
+
}
|
|
5303
|
+
if (!isGitRepo(this.projectRoot)) {
|
|
5304
|
+
return;
|
|
5305
|
+
}
|
|
5306
|
+
this.onBranchChange = handler;
|
|
5307
|
+
this.currentBranch = getCurrentBranch(this.projectRoot);
|
|
5308
|
+
const headPath = getHeadPath(this.projectRoot);
|
|
5309
|
+
const refsPath = path6.join(this.projectRoot, ".git", "refs", "heads");
|
|
5310
|
+
this.watcher = chokidar_default.watch([headPath, refsPath], {
|
|
5311
|
+
persistent: true,
|
|
5312
|
+
ignoreInitial: true,
|
|
5313
|
+
awaitWriteFinish: {
|
|
5314
|
+
stabilityThreshold: 50,
|
|
5315
|
+
pollInterval: 10
|
|
5316
|
+
}
|
|
5317
|
+
});
|
|
5318
|
+
this.watcher.on("change", () => this.handleHeadChange());
|
|
5319
|
+
this.watcher.on("add", () => this.handleHeadChange());
|
|
5320
|
+
}
|
|
5321
|
+
handleHeadChange() {
|
|
5322
|
+
if (this.debounceTimer) {
|
|
5323
|
+
clearTimeout(this.debounceTimer);
|
|
5324
|
+
}
|
|
5325
|
+
this.debounceTimer = setTimeout(() => {
|
|
5326
|
+
this.checkBranchChange();
|
|
5327
|
+
}, this.debounceMs);
|
|
5328
|
+
}
|
|
5329
|
+
async checkBranchChange() {
|
|
5330
|
+
const newBranch = getCurrentBranch(this.projectRoot);
|
|
5331
|
+
if (newBranch && newBranch !== this.currentBranch && this.onBranchChange) {
|
|
5332
|
+
const oldBranch = this.currentBranch;
|
|
5333
|
+
this.currentBranch = newBranch;
|
|
5334
|
+
try {
|
|
5335
|
+
await this.onBranchChange(oldBranch, newBranch);
|
|
5336
|
+
} catch (error) {
|
|
5337
|
+
console.error("Error handling branch change:", error);
|
|
5338
|
+
}
|
|
5339
|
+
} else if (newBranch) {
|
|
5340
|
+
this.currentBranch = newBranch;
|
|
5341
|
+
}
|
|
5342
|
+
}
|
|
5343
|
+
getCurrentBranch() {
|
|
5344
|
+
return this.currentBranch;
|
|
5345
|
+
}
|
|
5346
|
+
stop() {
|
|
5347
|
+
if (this.debounceTimer) {
|
|
5348
|
+
clearTimeout(this.debounceTimer);
|
|
5349
|
+
this.debounceTimer = null;
|
|
5350
|
+
}
|
|
5351
|
+
if (this.watcher) {
|
|
5352
|
+
this.watcher.close();
|
|
5353
|
+
this.watcher = null;
|
|
5354
|
+
}
|
|
5355
|
+
this.onBranchChange = null;
|
|
5356
|
+
}
|
|
5357
|
+
isRunning() {
|
|
5358
|
+
return this.watcher !== null;
|
|
5359
|
+
}
|
|
5360
|
+
};
|
|
5031
5361
|
function createWatcherWithIndexer(indexer, projectRoot, config) {
|
|
5032
|
-
const
|
|
5033
|
-
|
|
5362
|
+
const fileWatcher = new FileWatcher(projectRoot, config);
|
|
5363
|
+
fileWatcher.start(async (changes) => {
|
|
5034
5364
|
const hasAddOrChange = changes.some(
|
|
5035
5365
|
(c) => c.type === "add" || c.type === "change"
|
|
5036
5366
|
);
|
|
@@ -5039,7 +5369,22 @@ function createWatcherWithIndexer(indexer, projectRoot, config) {
|
|
|
5039
5369
|
await indexer.index();
|
|
5040
5370
|
}
|
|
5041
5371
|
});
|
|
5042
|
-
|
|
5372
|
+
let gitWatcher = null;
|
|
5373
|
+
if (isGitRepo(projectRoot)) {
|
|
5374
|
+
gitWatcher = new GitHeadWatcher(projectRoot);
|
|
5375
|
+
gitWatcher.start(async (oldBranch, newBranch) => {
|
|
5376
|
+
console.log(`Branch changed: ${oldBranch ?? "(none)"} -> ${newBranch}`);
|
|
5377
|
+
await indexer.index();
|
|
5378
|
+
});
|
|
5379
|
+
}
|
|
5380
|
+
return {
|
|
5381
|
+
fileWatcher,
|
|
5382
|
+
gitWatcher,
|
|
5383
|
+
stop() {
|
|
5384
|
+
fileWatcher.stop();
|
|
5385
|
+
gitWatcher?.stop();
|
|
5386
|
+
}
|
|
5387
|
+
};
|
|
5043
5388
|
}
|
|
5044
5389
|
|
|
5045
5390
|
// src/tools/index.ts
|
|
@@ -5123,13 +5468,19 @@ var index_health_check = tool({
|
|
|
5123
5468
|
async execute() {
|
|
5124
5469
|
const indexer = getIndexer();
|
|
5125
5470
|
const result = await indexer.healthCheck();
|
|
5126
|
-
if (result.removed === 0) {
|
|
5471
|
+
if (result.removed === 0 && result.gcOrphanEmbeddings === 0 && result.gcOrphanChunks === 0) {
|
|
5127
5472
|
return "Index is healthy. No stale entries found.";
|
|
5128
5473
|
}
|
|
5129
|
-
const lines = [
|
|
5130
|
-
|
|
5131
|
-
` Removed stale entries: ${result.removed}`
|
|
5132
|
-
|
|
5474
|
+
const lines = [`Health check complete:`];
|
|
5475
|
+
if (result.removed > 0) {
|
|
5476
|
+
lines.push(` Removed stale entries: ${result.removed}`);
|
|
5477
|
+
}
|
|
5478
|
+
if (result.gcOrphanEmbeddings > 0) {
|
|
5479
|
+
lines.push(` Garbage collected orphan embeddings: ${result.gcOrphanEmbeddings}`);
|
|
5480
|
+
}
|
|
5481
|
+
if (result.gcOrphanChunks > 0) {
|
|
5482
|
+
lines.push(` Garbage collected orphan chunks: ${result.gcOrphanChunks}`);
|
|
5483
|
+
}
|
|
5133
5484
|
if (result.filePaths.length > 0) {
|
|
5134
5485
|
lines.push(` Cleaned paths: ${result.filePaths.join(", ")}`);
|
|
5135
5486
|
}
|
|
@@ -5184,13 +5535,18 @@ function formatStatus(status) {
|
|
|
5184
5535
|
if (!status.indexed) {
|
|
5185
5536
|
return "Codebase is not indexed. Run index_codebase to create an index.";
|
|
5186
5537
|
}
|
|
5187
|
-
|
|
5538
|
+
const lines = [
|
|
5188
5539
|
`Index status:`,
|
|
5189
5540
|
` Indexed chunks: ${status.vectorCount.toLocaleString()}`,
|
|
5190
5541
|
` Provider: ${status.provider}`,
|
|
5191
5542
|
` Model: ${status.model}`,
|
|
5192
5543
|
` Location: ${status.indexPath}`
|
|
5193
|
-
]
|
|
5544
|
+
];
|
|
5545
|
+
if (status.currentBranch !== "default") {
|
|
5546
|
+
lines.push(` Current branch: ${status.currentBranch}`);
|
|
5547
|
+
lines.push(` Base branch: ${status.baseBranch}`);
|
|
5548
|
+
}
|
|
5549
|
+
return lines.join("\n");
|
|
5194
5550
|
}
|
|
5195
5551
|
|
|
5196
5552
|
// src/index.ts
|
|
@@ -5227,6 +5583,39 @@ var plugin = async ({ directory }) => {
|
|
|
5227
5583
|
index_codebase,
|
|
5228
5584
|
index_status,
|
|
5229
5585
|
index_health_check
|
|
5586
|
+
},
|
|
5587
|
+
async config(cfg) {
|
|
5588
|
+
cfg.command = cfg.command ?? {};
|
|
5589
|
+
cfg.command["search"] = {
|
|
5590
|
+
description: "Search codebase by meaning using semantic search",
|
|
5591
|
+
template: `Use the \`codebase_search\` tool to find code related to: $ARGUMENTS
|
|
5592
|
+
|
|
5593
|
+
If the index doesn't exist yet, run \`index_codebase\` first.
|
|
5594
|
+
|
|
5595
|
+
Return the most relevant results with file paths and line numbers.`
|
|
5596
|
+
};
|
|
5597
|
+
cfg.command["find"] = {
|
|
5598
|
+
description: "Find code using hybrid approach (semantic + grep)",
|
|
5599
|
+
template: `Find code related to: $ARGUMENTS
|
|
5600
|
+
|
|
5601
|
+
Strategy:
|
|
5602
|
+
1. First use \`codebase_search\` to find semantically related code
|
|
5603
|
+
2. From the results, identify specific function/class names
|
|
5604
|
+
3. Use grep to find all occurrences of those identifiers
|
|
5605
|
+
4. Combine findings into a comprehensive answer
|
|
5606
|
+
|
|
5607
|
+
If the semantic index doesn't exist, run \`index_codebase\` first.`
|
|
5608
|
+
};
|
|
5609
|
+
cfg.command["index"] = {
|
|
5610
|
+
description: "Index the codebase for semantic search",
|
|
5611
|
+
template: `Run the \`index_codebase\` tool to create or update the semantic search index.
|
|
5612
|
+
|
|
5613
|
+
Show progress and final statistics including:
|
|
5614
|
+
- Number of files processed
|
|
5615
|
+
- Number of chunks indexed
|
|
5616
|
+
- Tokens used
|
|
5617
|
+
- Duration`
|
|
5618
|
+
};
|
|
5230
5619
|
}
|
|
5231
5620
|
};
|
|
5232
5621
|
};
|