raggrep 0.8.4 → 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/main.js +204 -40
- package/dist/cli/main.js.map +15 -15
- package/dist/domain/entities/fileIndex.d.ts +5 -0
- package/dist/domain/services/bm25.d.ts +23 -0
- package/dist/index.js +203 -39
- package/dist/index.js.map +14 -14
- package/dist/infrastructure/storage/symbolicIndex.d.ts +25 -5
- package/dist/modules/data/json/index.d.ts +2 -1
- package/dist/modules/language/typescript/index.d.ts +2 -1
- package/package.json +1 -1
package/dist/cli/main.js
CHANGED
|
@@ -614,6 +614,33 @@ class BM25Index {
|
|
|
614
614
|
addDocument(id, tokens) {
|
|
615
615
|
this.addDocuments([{ id, content: "", tokens }]);
|
|
616
616
|
}
|
|
617
|
+
removeDocument(id) {
|
|
618
|
+
const doc = this.documents.get(id);
|
|
619
|
+
if (!doc)
|
|
620
|
+
return false;
|
|
621
|
+
const tokens = doc.tokens;
|
|
622
|
+
const uniqueTerms = new Set(tokens);
|
|
623
|
+
for (const term of uniqueTerms) {
|
|
624
|
+
const count = this.documentFrequencies.get(term) || 0;
|
|
625
|
+
if (count <= 1) {
|
|
626
|
+
this.documentFrequencies.delete(term);
|
|
627
|
+
} else {
|
|
628
|
+
this.documentFrequencies.set(term, count - 1);
|
|
629
|
+
}
|
|
630
|
+
}
|
|
631
|
+
const totalLength = this.avgDocLength * this.totalDocs - tokens.length;
|
|
632
|
+
this.totalDocs--;
|
|
633
|
+
this.avgDocLength = this.totalDocs > 0 ? totalLength / this.totalDocs : 0;
|
|
634
|
+
this.documents.delete(id);
|
|
635
|
+
return true;
|
|
636
|
+
}
|
|
637
|
+
updateDocument(id, newTokens) {
|
|
638
|
+
this.removeDocument(id);
|
|
639
|
+
this.addDocument(id, newTokens);
|
|
640
|
+
}
|
|
641
|
+
hasDocument(id) {
|
|
642
|
+
return this.documents.has(id);
|
|
643
|
+
}
|
|
617
644
|
serialize() {
|
|
618
645
|
const documents = {};
|
|
619
646
|
for (const [id, { tokens }] of this.documents) {
|
|
@@ -3787,18 +3814,41 @@ class SymbolicIndex {
|
|
|
3787
3814
|
addFile(summary) {
|
|
3788
3815
|
this.fileSummaries.set(summary.filepath, summary);
|
|
3789
3816
|
}
|
|
3817
|
+
addFileIncremental(summary) {
|
|
3818
|
+
const filepath = summary.filepath;
|
|
3819
|
+
const oldSummary = this.fileSummaries.get(filepath);
|
|
3820
|
+
this.fileSummaries.set(filepath, summary);
|
|
3821
|
+
if (this.bm25Index) {
|
|
3822
|
+
if (oldSummary) {
|
|
3823
|
+
this.bm25Index.removeDocument(filepath);
|
|
3824
|
+
}
|
|
3825
|
+
const tokens = this.getTokensForSummary(filepath, summary);
|
|
3826
|
+
this.bm25Index.addDocument(filepath, tokens);
|
|
3827
|
+
}
|
|
3828
|
+
}
|
|
3790
3829
|
removeFile(filepath) {
|
|
3791
3830
|
return this.fileSummaries.delete(filepath);
|
|
3792
3831
|
}
|
|
3832
|
+
removeFileIncremental(filepath) {
|
|
3833
|
+
const existed = this.fileSummaries.delete(filepath);
|
|
3834
|
+
if (existed && this.bm25Index) {
|
|
3835
|
+
this.bm25Index.removeDocument(filepath);
|
|
3836
|
+
}
|
|
3837
|
+
return existed;
|
|
3838
|
+
}
|
|
3839
|
+
getTokensForSummary(filepath, summary) {
|
|
3840
|
+
const content = [
|
|
3841
|
+
...summary.keywords,
|
|
3842
|
+
...summary.exports,
|
|
3843
|
+
...extractPathKeywords(filepath)
|
|
3844
|
+
].join(" ");
|
|
3845
|
+
return tokenize(content);
|
|
3846
|
+
}
|
|
3793
3847
|
buildBM25Index() {
|
|
3794
3848
|
this.bm25Index = new BM25Index;
|
|
3795
3849
|
for (const [filepath, summary] of this.fileSummaries) {
|
|
3796
|
-
const
|
|
3797
|
-
|
|
3798
|
-
...summary.exports,
|
|
3799
|
-
...extractPathKeywords(filepath)
|
|
3800
|
-
].join(" ");
|
|
3801
|
-
this.bm25Index.addDocuments([{ id: filepath, content }]);
|
|
3850
|
+
const tokens = this.getTokensForSummary(filepath, summary);
|
|
3851
|
+
this.bm25Index.addDocument(filepath, tokens);
|
|
3802
3852
|
}
|
|
3803
3853
|
if (this.meta) {
|
|
3804
3854
|
this.meta.fileCount = this.fileSummaries.size;
|
|
@@ -3823,6 +3873,9 @@ class SymbolicIndex {
|
|
|
3823
3873
|
throw new Error("Index not initialized");
|
|
3824
3874
|
this.meta.lastUpdated = new Date().toISOString();
|
|
3825
3875
|
this.meta.fileCount = this.fileSummaries.size;
|
|
3876
|
+
if (this.bm25Index) {
|
|
3877
|
+
this.meta.bm25Serialized = this.bm25Index.serialize();
|
|
3878
|
+
}
|
|
3826
3879
|
await fs3.mkdir(this.symbolicPath, { recursive: true });
|
|
3827
3880
|
const metaPath = path8.join(this.symbolicPath, "_meta.json");
|
|
3828
3881
|
await fs3.writeFile(metaPath, JSON.stringify(this.meta, null, 2));
|
|
@@ -3832,13 +3885,37 @@ class SymbolicIndex {
|
|
|
3832
3885
|
await fs3.writeFile(summaryPath, JSON.stringify(summary, null, 2));
|
|
3833
3886
|
}
|
|
3834
3887
|
}
|
|
3888
|
+
async saveIncremental(filepaths) {
|
|
3889
|
+
if (!this.meta)
|
|
3890
|
+
throw new Error("Index not initialized");
|
|
3891
|
+
this.meta.lastUpdated = new Date().toISOString();
|
|
3892
|
+
this.meta.fileCount = this.fileSummaries.size;
|
|
3893
|
+
if (this.bm25Index) {
|
|
3894
|
+
this.meta.bm25Serialized = this.bm25Index.serialize();
|
|
3895
|
+
}
|
|
3896
|
+
await fs3.mkdir(this.symbolicPath, { recursive: true });
|
|
3897
|
+
const metaPath = path8.join(this.symbolicPath, "_meta.json");
|
|
3898
|
+
await fs3.writeFile(metaPath, JSON.stringify(this.meta, null, 2));
|
|
3899
|
+
for (const filepath of filepaths) {
|
|
3900
|
+
const summary = this.fileSummaries.get(filepath);
|
|
3901
|
+
if (summary) {
|
|
3902
|
+
const summaryPath = this.getFileSummaryPath(filepath);
|
|
3903
|
+
await fs3.mkdir(path8.dirname(summaryPath), { recursive: true });
|
|
3904
|
+
await fs3.writeFile(summaryPath, JSON.stringify(summary, null, 2));
|
|
3905
|
+
}
|
|
3906
|
+
}
|
|
3907
|
+
}
|
|
3835
3908
|
async load() {
|
|
3836
3909
|
const metaPath = path8.join(this.symbolicPath, "_meta.json");
|
|
3837
3910
|
const metaContent = await fs3.readFile(metaPath, "utf-8");
|
|
3838
3911
|
this.meta = JSON.parse(metaContent);
|
|
3839
3912
|
this.fileSummaries.clear();
|
|
3840
3913
|
await this.loadFileSummariesRecursive(this.symbolicPath);
|
|
3841
|
-
this.
|
|
3914
|
+
if (this.meta?.bm25Serialized) {
|
|
3915
|
+
this.bm25Index = BM25Index.deserialize(this.meta.bm25Serialized);
|
|
3916
|
+
} else {
|
|
3917
|
+
this.buildBM25Index();
|
|
3918
|
+
}
|
|
3842
3919
|
}
|
|
3843
3920
|
async loadFileSummariesRecursive(dir) {
|
|
3844
3921
|
try {
|
|
@@ -4222,11 +4299,14 @@ class TypeScriptModule {
|
|
|
4222
4299
|
const indexDir = getRaggrepDir(ctx.rootDir, ctx.config);
|
|
4223
4300
|
this.symbolicIndex = new SymbolicIndex(indexDir, this.id);
|
|
4224
4301
|
await this.symbolicIndex.initialize();
|
|
4302
|
+
const updatedFilepaths = [];
|
|
4225
4303
|
for (const [filepath, summary] of this.pendingSummaries) {
|
|
4226
|
-
this.symbolicIndex.
|
|
4304
|
+
this.symbolicIndex.addFileIncremental(summary);
|
|
4305
|
+
updatedFilepaths.push(filepath);
|
|
4306
|
+
}
|
|
4307
|
+
if (updatedFilepaths.length > 0) {
|
|
4308
|
+
await this.symbolicIndex.saveIncremental(updatedFilepaths);
|
|
4227
4309
|
}
|
|
4228
|
-
this.symbolicIndex.buildBM25Index();
|
|
4229
|
-
await this.symbolicIndex.save();
|
|
4230
4310
|
this.literalIndex = new LiteralIndex(indexDir, this.id);
|
|
4231
4311
|
await this.literalIndex.initialize();
|
|
4232
4312
|
const indexedFilepaths = new Set;
|
|
@@ -4578,11 +4658,14 @@ class JsonModule {
|
|
|
4578
4658
|
const indexDir = getRaggrepDir(ctx.rootDir, ctx.config);
|
|
4579
4659
|
this.symbolicIndex = new SymbolicIndex(indexDir, this.id);
|
|
4580
4660
|
await this.symbolicIndex.initialize();
|
|
4661
|
+
const updatedFilepaths = [];
|
|
4581
4662
|
for (const [filepath, summary] of this.pendingSummaries) {
|
|
4582
|
-
this.symbolicIndex.
|
|
4663
|
+
this.symbolicIndex.addFileIncremental(summary);
|
|
4664
|
+
updatedFilepaths.push(filepath);
|
|
4665
|
+
}
|
|
4666
|
+
if (updatedFilepaths.length > 0) {
|
|
4667
|
+
await this.symbolicIndex.saveIncremental(updatedFilepaths);
|
|
4583
4668
|
}
|
|
4584
|
-
this.symbolicIndex.buildBM25Index();
|
|
4585
|
-
await this.symbolicIndex.save();
|
|
4586
4669
|
this.literalIndex = new LiteralIndex(indexDir, this.id);
|
|
4587
4670
|
await this.literalIndex.initialize();
|
|
4588
4671
|
const indexedFilepaths = new Set;
|
|
@@ -4909,11 +4992,14 @@ ${section.content}` : section.content,
|
|
|
4909
4992
|
const indexDir = getRaggrepDir(ctx.rootDir, ctx.config);
|
|
4910
4993
|
this.symbolicIndex = new SymbolicIndex(indexDir, this.id);
|
|
4911
4994
|
await this.symbolicIndex.initialize();
|
|
4995
|
+
const updatedFilepaths = [];
|
|
4912
4996
|
for (const [filepath, summary] of this.pendingSummaries) {
|
|
4913
|
-
this.symbolicIndex.
|
|
4997
|
+
this.symbolicIndex.addFileIncremental(summary);
|
|
4998
|
+
updatedFilepaths.push(filepath);
|
|
4999
|
+
}
|
|
5000
|
+
if (updatedFilepaths.length > 0) {
|
|
5001
|
+
await this.symbolicIndex.saveIncremental(updatedFilepaths);
|
|
4914
5002
|
}
|
|
4915
|
-
this.symbolicIndex.buildBM25Index();
|
|
4916
|
-
await this.symbolicIndex.save();
|
|
4917
5003
|
this.pendingSummaries.clear();
|
|
4918
5004
|
}
|
|
4919
5005
|
async search(query, ctx, options = {}) {
|
|
@@ -5500,6 +5586,10 @@ import { glob } from "glob";
|
|
|
5500
5586
|
import * as fs7 from "fs/promises";
|
|
5501
5587
|
import * as path16 from "path";
|
|
5502
5588
|
import * as os3 from "os";
|
|
5589
|
+
import * as crypto2 from "crypto";
|
|
5590
|
+
function computeContentHash(content) {
|
|
5591
|
+
return crypto2.createHash("sha256").update(content, "utf-8").digest("hex");
|
|
5592
|
+
}
|
|
5503
5593
|
async function parallelMap(items, processor, concurrency) {
|
|
5504
5594
|
const results = new Array(items.length);
|
|
5505
5595
|
let nextIndex = 0;
|
|
@@ -5749,42 +5839,93 @@ async function ensureIndexFresh(rootDir, options = {}) {
|
|
|
5749
5839
|
getIntrospection: (filepath) => introspection.getFile(filepath)
|
|
5750
5840
|
};
|
|
5751
5841
|
const totalFiles = currentFiles.length;
|
|
5752
|
-
|
|
5753
|
-
|
|
5842
|
+
let completedCount = 0;
|
|
5843
|
+
const processIncrementalFile = async (filepath) => {
|
|
5754
5844
|
const relativePath = path16.relative(rootDir, filepath);
|
|
5755
|
-
const progress = `[${i + 1}/${totalFiles}]`;
|
|
5756
5845
|
try {
|
|
5757
5846
|
const stats = await fs7.stat(filepath);
|
|
5758
5847
|
const lastModified = stats.mtime.toISOString();
|
|
5759
5848
|
const existingEntry = manifest.files[relativePath];
|
|
5760
5849
|
if (existingEntry && existingEntry.lastModified === lastModified) {
|
|
5761
|
-
|
|
5762
|
-
|
|
5850
|
+
completedCount++;
|
|
5851
|
+
return { relativePath, status: "unchanged" };
|
|
5763
5852
|
}
|
|
5764
|
-
logger.progress(` ${progress} Indexing: ${relativePath}`);
|
|
5765
5853
|
const content = await fs7.readFile(filepath, "utf-8");
|
|
5766
|
-
|
|
5767
|
-
|
|
5768
|
-
|
|
5769
|
-
|
|
5770
|
-
|
|
5854
|
+
const contentHash = computeContentHash(content);
|
|
5855
|
+
if (existingEntry?.contentHash && existingEntry.contentHash === contentHash) {
|
|
5856
|
+
completedCount++;
|
|
5857
|
+
return {
|
|
5858
|
+
relativePath,
|
|
5859
|
+
status: "mtime_updated",
|
|
5771
5860
|
lastModified,
|
|
5772
|
-
|
|
5861
|
+
contentHash
|
|
5773
5862
|
};
|
|
5774
|
-
totalIndexed++;
|
|
5775
5863
|
}
|
|
5864
|
+
completedCount++;
|
|
5865
|
+
logger.progress(` [${completedCount}/${totalFiles}] Indexing: ${relativePath}`);
|
|
5866
|
+
introspection.addFile(relativePath, content);
|
|
5867
|
+
const fileIndex = await module.indexFile(relativePath, content, ctx);
|
|
5868
|
+
if (!fileIndex) {
|
|
5869
|
+
return { relativePath, status: "unchanged" };
|
|
5870
|
+
}
|
|
5871
|
+
await writeFileIndex(rootDir, module.id, relativePath, fileIndex, config);
|
|
5872
|
+
return {
|
|
5873
|
+
relativePath,
|
|
5874
|
+
status: "indexed",
|
|
5875
|
+
lastModified,
|
|
5876
|
+
chunkCount: fileIndex.chunks.length,
|
|
5877
|
+
contentHash
|
|
5878
|
+
};
|
|
5776
5879
|
} catch (error) {
|
|
5777
|
-
|
|
5778
|
-
|
|
5880
|
+
completedCount++;
|
|
5881
|
+
return { relativePath, status: "error", error };
|
|
5779
5882
|
}
|
|
5780
|
-
}
|
|
5883
|
+
};
|
|
5884
|
+
const concurrency = options.concurrency ?? DEFAULT_CONCURRENCY;
|
|
5885
|
+
const results = await parallelMap(currentFiles, processIncrementalFile, concurrency);
|
|
5781
5886
|
logger.clearProgress();
|
|
5782
|
-
|
|
5887
|
+
let mtimeUpdates = 0;
|
|
5888
|
+
for (const item of results) {
|
|
5889
|
+
if (!item.success) {
|
|
5890
|
+
continue;
|
|
5891
|
+
}
|
|
5892
|
+
const fileResult = item.value;
|
|
5893
|
+
switch (fileResult.status) {
|
|
5894
|
+
case "indexed":
|
|
5895
|
+
manifest.files[fileResult.relativePath] = {
|
|
5896
|
+
lastModified: fileResult.lastModified,
|
|
5897
|
+
chunkCount: fileResult.chunkCount,
|
|
5898
|
+
contentHash: fileResult.contentHash
|
|
5899
|
+
};
|
|
5900
|
+
totalIndexed++;
|
|
5901
|
+
break;
|
|
5902
|
+
case "mtime_updated":
|
|
5903
|
+
if (manifest.files[fileResult.relativePath]) {
|
|
5904
|
+
manifest.files[fileResult.relativePath] = {
|
|
5905
|
+
...manifest.files[fileResult.relativePath],
|
|
5906
|
+
lastModified: fileResult.lastModified,
|
|
5907
|
+
contentHash: fileResult.contentHash
|
|
5908
|
+
};
|
|
5909
|
+
mtimeUpdates++;
|
|
5910
|
+
}
|
|
5911
|
+
totalUnchanged++;
|
|
5912
|
+
break;
|
|
5913
|
+
case "unchanged":
|
|
5914
|
+
totalUnchanged++;
|
|
5915
|
+
break;
|
|
5916
|
+
case "error":
|
|
5917
|
+
logger.error(` Error indexing ${fileResult.relativePath}: ${fileResult.error}`);
|
|
5918
|
+
break;
|
|
5919
|
+
}
|
|
5920
|
+
}
|
|
5921
|
+
const hasManifestChanges = totalIndexed > 0 || totalRemoved > 0 || mtimeUpdates > 0;
|
|
5922
|
+
if (hasManifestChanges) {
|
|
5783
5923
|
manifest.lastUpdated = new Date().toISOString();
|
|
5784
5924
|
await writeModuleManifest(rootDir, module.id, manifest, config);
|
|
5785
|
-
|
|
5786
|
-
|
|
5787
|
-
|
|
5925
|
+
}
|
|
5926
|
+
const hasContentChanges = totalIndexed > 0 || totalRemoved > 0;
|
|
5927
|
+
if (hasContentChanges && module.finalize) {
|
|
5928
|
+
await module.finalize(ctx);
|
|
5788
5929
|
}
|
|
5789
5930
|
if (totalRemoved > 0) {
|
|
5790
5931
|
await cleanupEmptyDirectories(indexPath);
|
|
@@ -5862,6 +6003,17 @@ async function indexWithModule(rootDir, files, module, config, verbose, introspe
|
|
|
5862
6003
|
return { relativePath, status: "skipped" };
|
|
5863
6004
|
}
|
|
5864
6005
|
const content = await fs7.readFile(filepath, "utf-8");
|
|
6006
|
+
const contentHash = computeContentHash(content);
|
|
6007
|
+
if (existingEntry?.contentHash && existingEntry.contentHash === contentHash) {
|
|
6008
|
+
completedCount++;
|
|
6009
|
+
logger.debug(` [${completedCount}/${totalFiles}] Skipped ${relativePath} (content unchanged)`);
|
|
6010
|
+
return {
|
|
6011
|
+
relativePath,
|
|
6012
|
+
status: "skipped",
|
|
6013
|
+
lastModified,
|
|
6014
|
+
contentHash
|
|
6015
|
+
};
|
|
6016
|
+
}
|
|
5865
6017
|
introspection.addFile(relativePath, content);
|
|
5866
6018
|
completedCount++;
|
|
5867
6019
|
logger.progress(` [${completedCount}/${totalFiles}] Processing: ${relativePath}`);
|
|
@@ -5875,7 +6027,8 @@ async function indexWithModule(rootDir, files, module, config, verbose, introspe
|
|
|
5875
6027
|
relativePath,
|
|
5876
6028
|
status: "indexed",
|
|
5877
6029
|
lastModified,
|
|
5878
|
-
chunkCount: fileIndex.chunks.length
|
|
6030
|
+
chunkCount: fileIndex.chunks.length,
|
|
6031
|
+
contentHash
|
|
5879
6032
|
};
|
|
5880
6033
|
} catch (error) {
|
|
5881
6034
|
completedCount++;
|
|
@@ -5895,11 +6048,22 @@ async function indexWithModule(rootDir, files, module, config, verbose, introspe
|
|
|
5895
6048
|
case "indexed":
|
|
5896
6049
|
manifest.files[fileResult.relativePath] = {
|
|
5897
6050
|
lastModified: fileResult.lastModified,
|
|
5898
|
-
chunkCount: fileResult.chunkCount
|
|
6051
|
+
chunkCount: fileResult.chunkCount,
|
|
6052
|
+
contentHash: fileResult.contentHash
|
|
5899
6053
|
};
|
|
5900
6054
|
result.indexed++;
|
|
5901
6055
|
break;
|
|
5902
6056
|
case "skipped":
|
|
6057
|
+
if (fileResult.lastModified && fileResult.contentHash) {
|
|
6058
|
+
const existingEntry = manifest.files[fileResult.relativePath];
|
|
6059
|
+
if (existingEntry) {
|
|
6060
|
+
manifest.files[fileResult.relativePath] = {
|
|
6061
|
+
...existingEntry,
|
|
6062
|
+
lastModified: fileResult.lastModified,
|
|
6063
|
+
contentHash: fileResult.contentHash
|
|
6064
|
+
};
|
|
6065
|
+
}
|
|
6066
|
+
}
|
|
5903
6067
|
result.skipped++;
|
|
5904
6068
|
break;
|
|
5905
6069
|
case "error":
|
|
@@ -7674,7 +7838,7 @@ init_logger();
|
|
|
7674
7838
|
// package.json
|
|
7675
7839
|
var package_default = {
|
|
7676
7840
|
name: "raggrep",
|
|
7677
|
-
version: "0.
|
|
7841
|
+
version: "0.9.0",
|
|
7678
7842
|
description: "Local filesystem-based RAG system for codebases - semantic search using local embeddings",
|
|
7679
7843
|
type: "module",
|
|
7680
7844
|
main: "./dist/index.js",
|
|
@@ -8267,4 +8431,4 @@ Run 'raggrep <command> --help' for more information.
|
|
|
8267
8431
|
}
|
|
8268
8432
|
main();
|
|
8269
8433
|
|
|
8270
|
-
//# debugId=
|
|
8434
|
+
//# debugId=94148A85E332DDBC64756E2164756E21
|