raggrep 0.8.3 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli/main.js CHANGED
@@ -614,6 +614,33 @@ class BM25Index {
614
614
  addDocument(id, tokens) {
615
615
  this.addDocuments([{ id, content: "", tokens }]);
616
616
  }
617
+ removeDocument(id) {
618
+ const doc = this.documents.get(id);
619
+ if (!doc)
620
+ return false;
621
+ const tokens = doc.tokens;
622
+ const uniqueTerms = new Set(tokens);
623
+ for (const term of uniqueTerms) {
624
+ const count = this.documentFrequencies.get(term) || 0;
625
+ if (count <= 1) {
626
+ this.documentFrequencies.delete(term);
627
+ } else {
628
+ this.documentFrequencies.set(term, count - 1);
629
+ }
630
+ }
631
+ const totalLength = this.avgDocLength * this.totalDocs - tokens.length;
632
+ this.totalDocs--;
633
+ this.avgDocLength = this.totalDocs > 0 ? totalLength / this.totalDocs : 0;
634
+ this.documents.delete(id);
635
+ return true;
636
+ }
637
+ updateDocument(id, newTokens) {
638
+ this.removeDocument(id);
639
+ this.addDocument(id, newTokens);
640
+ }
641
+ hasDocument(id) {
642
+ return this.documents.has(id);
643
+ }
617
644
  serialize() {
618
645
  const documents = {};
619
646
  for (const [id, { tokens }] of this.documents) {
@@ -3787,18 +3814,41 @@ class SymbolicIndex {
3787
3814
  addFile(summary) {
3788
3815
  this.fileSummaries.set(summary.filepath, summary);
3789
3816
  }
3817
+ addFileIncremental(summary) {
3818
+ const filepath = summary.filepath;
3819
+ const oldSummary = this.fileSummaries.get(filepath);
3820
+ this.fileSummaries.set(filepath, summary);
3821
+ if (this.bm25Index) {
3822
+ if (oldSummary) {
3823
+ this.bm25Index.removeDocument(filepath);
3824
+ }
3825
+ const tokens = this.getTokensForSummary(filepath, summary);
3826
+ this.bm25Index.addDocument(filepath, tokens);
3827
+ }
3828
+ }
3790
3829
  removeFile(filepath) {
3791
3830
  return this.fileSummaries.delete(filepath);
3792
3831
  }
3832
+ removeFileIncremental(filepath) {
3833
+ const existed = this.fileSummaries.delete(filepath);
3834
+ if (existed && this.bm25Index) {
3835
+ this.bm25Index.removeDocument(filepath);
3836
+ }
3837
+ return existed;
3838
+ }
3839
+ getTokensForSummary(filepath, summary) {
3840
+ const content = [
3841
+ ...summary.keywords,
3842
+ ...summary.exports,
3843
+ ...extractPathKeywords(filepath)
3844
+ ].join(" ");
3845
+ return tokenize(content);
3846
+ }
3793
3847
  buildBM25Index() {
3794
3848
  this.bm25Index = new BM25Index;
3795
3849
  for (const [filepath, summary] of this.fileSummaries) {
3796
- const content = [
3797
- ...summary.keywords,
3798
- ...summary.exports,
3799
- ...extractPathKeywords(filepath)
3800
- ].join(" ");
3801
- this.bm25Index.addDocuments([{ id: filepath, content }]);
3850
+ const tokens = this.getTokensForSummary(filepath, summary);
3851
+ this.bm25Index.addDocument(filepath, tokens);
3802
3852
  }
3803
3853
  if (this.meta) {
3804
3854
  this.meta.fileCount = this.fileSummaries.size;
@@ -3823,6 +3873,9 @@ class SymbolicIndex {
3823
3873
  throw new Error("Index not initialized");
3824
3874
  this.meta.lastUpdated = new Date().toISOString();
3825
3875
  this.meta.fileCount = this.fileSummaries.size;
3876
+ if (this.bm25Index) {
3877
+ this.meta.bm25Serialized = this.bm25Index.serialize();
3878
+ }
3826
3879
  await fs3.mkdir(this.symbolicPath, { recursive: true });
3827
3880
  const metaPath = path8.join(this.symbolicPath, "_meta.json");
3828
3881
  await fs3.writeFile(metaPath, JSON.stringify(this.meta, null, 2));
@@ -3832,13 +3885,37 @@ class SymbolicIndex {
3832
3885
  await fs3.writeFile(summaryPath, JSON.stringify(summary, null, 2));
3833
3886
  }
3834
3887
  }
3888
+ async saveIncremental(filepaths) {
3889
+ if (!this.meta)
3890
+ throw new Error("Index not initialized");
3891
+ this.meta.lastUpdated = new Date().toISOString();
3892
+ this.meta.fileCount = this.fileSummaries.size;
3893
+ if (this.bm25Index) {
3894
+ this.meta.bm25Serialized = this.bm25Index.serialize();
3895
+ }
3896
+ await fs3.mkdir(this.symbolicPath, { recursive: true });
3897
+ const metaPath = path8.join(this.symbolicPath, "_meta.json");
3898
+ await fs3.writeFile(metaPath, JSON.stringify(this.meta, null, 2));
3899
+ for (const filepath of filepaths) {
3900
+ const summary = this.fileSummaries.get(filepath);
3901
+ if (summary) {
3902
+ const summaryPath = this.getFileSummaryPath(filepath);
3903
+ await fs3.mkdir(path8.dirname(summaryPath), { recursive: true });
3904
+ await fs3.writeFile(summaryPath, JSON.stringify(summary, null, 2));
3905
+ }
3906
+ }
3907
+ }
3835
3908
  async load() {
3836
3909
  const metaPath = path8.join(this.symbolicPath, "_meta.json");
3837
3910
  const metaContent = await fs3.readFile(metaPath, "utf-8");
3838
3911
  this.meta = JSON.parse(metaContent);
3839
3912
  this.fileSummaries.clear();
3840
3913
  await this.loadFileSummariesRecursive(this.symbolicPath);
3841
- this.buildBM25Index();
3914
+ if (this.meta?.bm25Serialized) {
3915
+ this.bm25Index = BM25Index.deserialize(this.meta.bm25Serialized);
3916
+ } else {
3917
+ this.buildBM25Index();
3918
+ }
3842
3919
  }
3843
3920
  async loadFileSummariesRecursive(dir) {
3844
3921
  try {
@@ -4222,11 +4299,14 @@ class TypeScriptModule {
4222
4299
  const indexDir = getRaggrepDir(ctx.rootDir, ctx.config);
4223
4300
  this.symbolicIndex = new SymbolicIndex(indexDir, this.id);
4224
4301
  await this.symbolicIndex.initialize();
4302
+ const updatedFilepaths = [];
4225
4303
  for (const [filepath, summary] of this.pendingSummaries) {
4226
- this.symbolicIndex.addFile(summary);
4304
+ this.symbolicIndex.addFileIncremental(summary);
4305
+ updatedFilepaths.push(filepath);
4306
+ }
4307
+ if (updatedFilepaths.length > 0) {
4308
+ await this.symbolicIndex.saveIncremental(updatedFilepaths);
4227
4309
  }
4228
- this.symbolicIndex.buildBM25Index();
4229
- await this.symbolicIndex.save();
4230
4310
  this.literalIndex = new LiteralIndex(indexDir, this.id);
4231
4311
  await this.literalIndex.initialize();
4232
4312
  const indexedFilepaths = new Set;
@@ -4578,11 +4658,14 @@ class JsonModule {
4578
4658
  const indexDir = getRaggrepDir(ctx.rootDir, ctx.config);
4579
4659
  this.symbolicIndex = new SymbolicIndex(indexDir, this.id);
4580
4660
  await this.symbolicIndex.initialize();
4661
+ const updatedFilepaths = [];
4581
4662
  for (const [filepath, summary] of this.pendingSummaries) {
4582
- this.symbolicIndex.addFile(summary);
4663
+ this.symbolicIndex.addFileIncremental(summary);
4664
+ updatedFilepaths.push(filepath);
4665
+ }
4666
+ if (updatedFilepaths.length > 0) {
4667
+ await this.symbolicIndex.saveIncremental(updatedFilepaths);
4583
4668
  }
4584
- this.symbolicIndex.buildBM25Index();
4585
- await this.symbolicIndex.save();
4586
4669
  this.literalIndex = new LiteralIndex(indexDir, this.id);
4587
4670
  await this.literalIndex.initialize();
4588
4671
  const indexedFilepaths = new Set;
@@ -4909,11 +4992,14 @@ ${section.content}` : section.content,
4909
4992
  const indexDir = getRaggrepDir(ctx.rootDir, ctx.config);
4910
4993
  this.symbolicIndex = new SymbolicIndex(indexDir, this.id);
4911
4994
  await this.symbolicIndex.initialize();
4995
+ const updatedFilepaths = [];
4912
4996
  for (const [filepath, summary] of this.pendingSummaries) {
4913
- this.symbolicIndex.addFile(summary);
4997
+ this.symbolicIndex.addFileIncremental(summary);
4998
+ updatedFilepaths.push(filepath);
4999
+ }
5000
+ if (updatedFilepaths.length > 0) {
5001
+ await this.symbolicIndex.saveIncremental(updatedFilepaths);
4914
5002
  }
4915
- this.symbolicIndex.buildBM25Index();
4916
- await this.symbolicIndex.save();
4917
5003
  this.pendingSummaries.clear();
4918
5004
  }
4919
5005
  async search(query, ctx, options = {}) {
@@ -5500,6 +5586,10 @@ import { glob } from "glob";
5500
5586
  import * as fs7 from "fs/promises";
5501
5587
  import * as path16 from "path";
5502
5588
  import * as os3 from "os";
5589
+ import * as crypto2 from "crypto";
5590
+ function computeContentHash(content) {
5591
+ return crypto2.createHash("sha256").update(content, "utf-8").digest("hex");
5592
+ }
5503
5593
  async function parallelMap(items, processor, concurrency) {
5504
5594
  const results = new Array(items.length);
5505
5595
  let nextIndex = 0;
@@ -5749,42 +5839,93 @@ async function ensureIndexFresh(rootDir, options = {}) {
5749
5839
  getIntrospection: (filepath) => introspection.getFile(filepath)
5750
5840
  };
5751
5841
  const totalFiles = currentFiles.length;
5752
- for (let i = 0;i < currentFiles.length; i++) {
5753
- const filepath = currentFiles[i];
5842
+ let completedCount = 0;
5843
+ const processIncrementalFile = async (filepath) => {
5754
5844
  const relativePath = path16.relative(rootDir, filepath);
5755
- const progress = `[${i + 1}/${totalFiles}]`;
5756
5845
  try {
5757
5846
  const stats = await fs7.stat(filepath);
5758
5847
  const lastModified = stats.mtime.toISOString();
5759
5848
  const existingEntry = manifest.files[relativePath];
5760
5849
  if (existingEntry && existingEntry.lastModified === lastModified) {
5761
- totalUnchanged++;
5762
- continue;
5850
+ completedCount++;
5851
+ return { relativePath, status: "unchanged" };
5763
5852
  }
5764
- logger.progress(` ${progress} Indexing: ${relativePath}`);
5765
5853
  const content = await fs7.readFile(filepath, "utf-8");
5766
- introspection.addFile(relativePath, content);
5767
- const fileIndex = await module.indexFile(relativePath, content, ctx);
5768
- if (fileIndex) {
5769
- await writeFileIndex(rootDir, module.id, relativePath, fileIndex, config);
5770
- manifest.files[relativePath] = {
5854
+ const contentHash = computeContentHash(content);
5855
+ if (existingEntry?.contentHash && existingEntry.contentHash === contentHash) {
5856
+ completedCount++;
5857
+ return {
5858
+ relativePath,
5859
+ status: "mtime_updated",
5771
5860
  lastModified,
5772
- chunkCount: fileIndex.chunks.length
5861
+ contentHash
5773
5862
  };
5774
- totalIndexed++;
5775
5863
  }
5864
+ completedCount++;
5865
+ logger.progress(` [${completedCount}/${totalFiles}] Indexing: ${relativePath}`);
5866
+ introspection.addFile(relativePath, content);
5867
+ const fileIndex = await module.indexFile(relativePath, content, ctx);
5868
+ if (!fileIndex) {
5869
+ return { relativePath, status: "unchanged" };
5870
+ }
5871
+ await writeFileIndex(rootDir, module.id, relativePath, fileIndex, config);
5872
+ return {
5873
+ relativePath,
5874
+ status: "indexed",
5875
+ lastModified,
5876
+ chunkCount: fileIndex.chunks.length,
5877
+ contentHash
5878
+ };
5776
5879
  } catch (error) {
5777
- logger.clearProgress();
5778
- logger.error(` ${progress} Error indexing ${relativePath}: ${error}`);
5880
+ completedCount++;
5881
+ return { relativePath, status: "error", error };
5779
5882
  }
5780
- }
5883
+ };
5884
+ const concurrency = options.concurrency ?? DEFAULT_CONCURRENCY;
5885
+ const results = await parallelMap(currentFiles, processIncrementalFile, concurrency);
5781
5886
  logger.clearProgress();
5782
- if (totalIndexed > 0 || totalRemoved > 0) {
5887
+ let mtimeUpdates = 0;
5888
+ for (const item of results) {
5889
+ if (!item.success) {
5890
+ continue;
5891
+ }
5892
+ const fileResult = item.value;
5893
+ switch (fileResult.status) {
5894
+ case "indexed":
5895
+ manifest.files[fileResult.relativePath] = {
5896
+ lastModified: fileResult.lastModified,
5897
+ chunkCount: fileResult.chunkCount,
5898
+ contentHash: fileResult.contentHash
5899
+ };
5900
+ totalIndexed++;
5901
+ break;
5902
+ case "mtime_updated":
5903
+ if (manifest.files[fileResult.relativePath]) {
5904
+ manifest.files[fileResult.relativePath] = {
5905
+ ...manifest.files[fileResult.relativePath],
5906
+ lastModified: fileResult.lastModified,
5907
+ contentHash: fileResult.contentHash
5908
+ };
5909
+ mtimeUpdates++;
5910
+ }
5911
+ totalUnchanged++;
5912
+ break;
5913
+ case "unchanged":
5914
+ totalUnchanged++;
5915
+ break;
5916
+ case "error":
5917
+ logger.error(` Error indexing ${fileResult.relativePath}: ${fileResult.error}`);
5918
+ break;
5919
+ }
5920
+ }
5921
+ const hasManifestChanges = totalIndexed > 0 || totalRemoved > 0 || mtimeUpdates > 0;
5922
+ if (hasManifestChanges) {
5783
5923
  manifest.lastUpdated = new Date().toISOString();
5784
5924
  await writeModuleManifest(rootDir, module.id, manifest, config);
5785
- if (module.finalize) {
5786
- await module.finalize(ctx);
5787
- }
5925
+ }
5926
+ const hasContentChanges = totalIndexed > 0 || totalRemoved > 0;
5927
+ if (hasContentChanges && module.finalize) {
5928
+ await module.finalize(ctx);
5788
5929
  }
5789
5930
  if (totalRemoved > 0) {
5790
5931
  await cleanupEmptyDirectories(indexPath);
@@ -5862,6 +6003,17 @@ async function indexWithModule(rootDir, files, module, config, verbose, introspe
5862
6003
  return { relativePath, status: "skipped" };
5863
6004
  }
5864
6005
  const content = await fs7.readFile(filepath, "utf-8");
6006
+ const contentHash = computeContentHash(content);
6007
+ if (existingEntry?.contentHash && existingEntry.contentHash === contentHash) {
6008
+ completedCount++;
6009
+ logger.debug(` [${completedCount}/${totalFiles}] Skipped ${relativePath} (content unchanged)`);
6010
+ return {
6011
+ relativePath,
6012
+ status: "skipped",
6013
+ lastModified,
6014
+ contentHash
6015
+ };
6016
+ }
5865
6017
  introspection.addFile(relativePath, content);
5866
6018
  completedCount++;
5867
6019
  logger.progress(` [${completedCount}/${totalFiles}] Processing: ${relativePath}`);
@@ -5875,7 +6027,8 @@ async function indexWithModule(rootDir, files, module, config, verbose, introspe
5875
6027
  relativePath,
5876
6028
  status: "indexed",
5877
6029
  lastModified,
5878
- chunkCount: fileIndex.chunks.length
6030
+ chunkCount: fileIndex.chunks.length,
6031
+ contentHash
5879
6032
  };
5880
6033
  } catch (error) {
5881
6034
  completedCount++;
@@ -5895,11 +6048,22 @@ async function indexWithModule(rootDir, files, module, config, verbose, introspe
5895
6048
  case "indexed":
5896
6049
  manifest.files[fileResult.relativePath] = {
5897
6050
  lastModified: fileResult.lastModified,
5898
- chunkCount: fileResult.chunkCount
6051
+ chunkCount: fileResult.chunkCount,
6052
+ contentHash: fileResult.contentHash
5899
6053
  };
5900
6054
  result.indexed++;
5901
6055
  break;
5902
6056
  case "skipped":
6057
+ if (fileResult.lastModified && fileResult.contentHash) {
6058
+ const existingEntry = manifest.files[fileResult.relativePath];
6059
+ if (existingEntry) {
6060
+ manifest.files[fileResult.relativePath] = {
6061
+ ...existingEntry,
6062
+ lastModified: fileResult.lastModified,
6063
+ contentHash: fileResult.contentHash
6064
+ };
6065
+ }
6066
+ }
5903
6067
  result.skipped++;
5904
6068
  break;
5905
6069
  case "error":
@@ -7674,7 +7838,7 @@ init_logger();
7674
7838
  // package.json
7675
7839
  var package_default = {
7676
7840
  name: "raggrep",
7677
- version: "0.8.3",
7841
+ version: "0.9.0",
7678
7842
  description: "Local filesystem-based RAG system for codebases - semantic search using local embeddings",
7679
7843
  type: "module",
7680
7844
  main: "./dist/index.js",
@@ -8207,13 +8371,18 @@ export default tool({
8207
8371
  try {
8208
8372
  await fs9.mkdir(toolDir, { recursive: true });
8209
8373
  let action = "Installed";
8374
+ const backupPath = toolPath + ".backup";
8210
8375
  try {
8211
8376
  await fs9.access(toolPath);
8377
+ await fs9.copyFile(toolPath, backupPath);
8212
8378
  action = "Updated";
8213
8379
  } catch {}
8214
8380
  await fs9.writeFile(toolPath, toolContent, "utf-8");
8215
8381
  console.log(`${action} raggrep tool for opencode.`);
8216
8382
  console.log(` Location: ${toolPath}`);
8383
+ if (action === "Updated") {
8384
+ console.log(` Backup: ${backupPath}`);
8385
+ }
8217
8386
  console.log(`
8218
8387
  The raggrep tool is now available in opencode.`);
8219
8388
  } catch (error) {
@@ -8262,4 +8431,4 @@ Run 'raggrep <command> --help' for more information.
8262
8431
  }
8263
8432
  main();
8264
8433
 
8265
- //# debugId=D0960428F32C3CB564756E2164756E21
8434
+ //# debugId=94148A85E332DDBC64756E2164756E21