raggrep 0.8.3 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -31,6 +31,11 @@ export interface FileManifestEntry {
31
31
  lastModified: string;
32
32
  /** Number of chunks in the file */
33
33
  chunkCount: number;
34
+ /**
35
+ * SHA-256 hash of file content for reliable change detection.
36
+ * This prevents false positives when git updates mtime on branch switches.
37
+ */
38
+ contentHash?: string;
34
39
  }
35
40
  /**
36
41
  * Manifest tracking all indexed files for a specific module.
@@ -78,6 +78,29 @@ export declare class BM25Index {
78
78
  * @param tokens - Pre-computed tokens
79
79
  */
80
80
  addDocument(id: string, tokens: string[]): void;
81
+ /**
82
+ * Remove a document from the index.
83
+ * Updates document frequencies and average document length.
84
+ *
85
+ * @param id - Document identifier to remove
86
+ * @returns true if document was removed, false if not found
87
+ */
88
+ removeDocument(id: string): boolean;
89
+ /**
90
+ * Update a document in the index (remove + add).
91
+ * More efficient than separate remove/add as it batches the operations.
92
+ *
93
+ * @param id - Document identifier
94
+ * @param newTokens - New tokens for the document
95
+ */
96
+ updateDocument(id: string, newTokens: string[]): void;
97
+ /**
98
+ * Check if a document exists in the index.
99
+ *
100
+ * @param id - Document identifier
101
+ * @returns true if document exists
102
+ */
103
+ hasDocument(id: string): boolean;
81
104
  /**
82
105
  * Serialize the index to a JSON-compatible object.
83
106
  */
package/dist/index.js CHANGED
@@ -314,6 +314,33 @@ class BM25Index {
314
314
  addDocument(id, tokens) {
315
315
  this.addDocuments([{ id, content: "", tokens }]);
316
316
  }
317
+ removeDocument(id) {
318
+ const doc = this.documents.get(id);
319
+ if (!doc)
320
+ return false;
321
+ const tokens = doc.tokens;
322
+ const uniqueTerms = new Set(tokens);
323
+ for (const term of uniqueTerms) {
324
+ const count = this.documentFrequencies.get(term) || 0;
325
+ if (count <= 1) {
326
+ this.documentFrequencies.delete(term);
327
+ } else {
328
+ this.documentFrequencies.set(term, count - 1);
329
+ }
330
+ }
331
+ const totalLength = this.avgDocLength * this.totalDocs - tokens.length;
332
+ this.totalDocs--;
333
+ this.avgDocLength = this.totalDocs > 0 ? totalLength / this.totalDocs : 0;
334
+ this.documents.delete(id);
335
+ return true;
336
+ }
337
+ updateDocument(id, newTokens) {
338
+ this.removeDocument(id);
339
+ this.addDocument(id, newTokens);
340
+ }
341
+ hasDocument(id) {
342
+ return this.documents.has(id);
343
+ }
317
344
  serialize() {
318
345
  const documents = {};
319
346
  for (const [id, { tokens }] of this.documents) {
@@ -3693,18 +3720,41 @@ class SymbolicIndex {
3693
3720
  addFile(summary) {
3694
3721
  this.fileSummaries.set(summary.filepath, summary);
3695
3722
  }
3723
+ addFileIncremental(summary) {
3724
+ const filepath = summary.filepath;
3725
+ const oldSummary = this.fileSummaries.get(filepath);
3726
+ this.fileSummaries.set(filepath, summary);
3727
+ if (this.bm25Index) {
3728
+ if (oldSummary) {
3729
+ this.bm25Index.removeDocument(filepath);
3730
+ }
3731
+ const tokens = this.getTokensForSummary(filepath, summary);
3732
+ this.bm25Index.addDocument(filepath, tokens);
3733
+ }
3734
+ }
3696
3735
  removeFile(filepath) {
3697
3736
  return this.fileSummaries.delete(filepath);
3698
3737
  }
3738
+ removeFileIncremental(filepath) {
3739
+ const existed = this.fileSummaries.delete(filepath);
3740
+ if (existed && this.bm25Index) {
3741
+ this.bm25Index.removeDocument(filepath);
3742
+ }
3743
+ return existed;
3744
+ }
3745
+ getTokensForSummary(filepath, summary) {
3746
+ const content = [
3747
+ ...summary.keywords,
3748
+ ...summary.exports,
3749
+ ...extractPathKeywords(filepath)
3750
+ ].join(" ");
3751
+ return tokenize(content);
3752
+ }
3699
3753
  buildBM25Index() {
3700
3754
  this.bm25Index = new BM25Index;
3701
3755
  for (const [filepath, summary] of this.fileSummaries) {
3702
- const content = [
3703
- ...summary.keywords,
3704
- ...summary.exports,
3705
- ...extractPathKeywords(filepath)
3706
- ].join(" ");
3707
- this.bm25Index.addDocuments([{ id: filepath, content }]);
3756
+ const tokens = this.getTokensForSummary(filepath, summary);
3757
+ this.bm25Index.addDocument(filepath, tokens);
3708
3758
  }
3709
3759
  if (this.meta) {
3710
3760
  this.meta.fileCount = this.fileSummaries.size;
@@ -3729,6 +3779,9 @@ class SymbolicIndex {
3729
3779
  throw new Error("Index not initialized");
3730
3780
  this.meta.lastUpdated = new Date().toISOString();
3731
3781
  this.meta.fileCount = this.fileSummaries.size;
3782
+ if (this.bm25Index) {
3783
+ this.meta.bm25Serialized = this.bm25Index.serialize();
3784
+ }
3732
3785
  await fs3.mkdir(this.symbolicPath, { recursive: true });
3733
3786
  const metaPath = path8.join(this.symbolicPath, "_meta.json");
3734
3787
  await fs3.writeFile(metaPath, JSON.stringify(this.meta, null, 2));
@@ -3738,13 +3791,37 @@ class SymbolicIndex {
3738
3791
  await fs3.writeFile(summaryPath, JSON.stringify(summary, null, 2));
3739
3792
  }
3740
3793
  }
3794
+ async saveIncremental(filepaths) {
3795
+ if (!this.meta)
3796
+ throw new Error("Index not initialized");
3797
+ this.meta.lastUpdated = new Date().toISOString();
3798
+ this.meta.fileCount = this.fileSummaries.size;
3799
+ if (this.bm25Index) {
3800
+ this.meta.bm25Serialized = this.bm25Index.serialize();
3801
+ }
3802
+ await fs3.mkdir(this.symbolicPath, { recursive: true });
3803
+ const metaPath = path8.join(this.symbolicPath, "_meta.json");
3804
+ await fs3.writeFile(metaPath, JSON.stringify(this.meta, null, 2));
3805
+ for (const filepath of filepaths) {
3806
+ const summary = this.fileSummaries.get(filepath);
3807
+ if (summary) {
3808
+ const summaryPath = this.getFileSummaryPath(filepath);
3809
+ await fs3.mkdir(path8.dirname(summaryPath), { recursive: true });
3810
+ await fs3.writeFile(summaryPath, JSON.stringify(summary, null, 2));
3811
+ }
3812
+ }
3813
+ }
3741
3814
  async load() {
3742
3815
  const metaPath = path8.join(this.symbolicPath, "_meta.json");
3743
3816
  const metaContent = await fs3.readFile(metaPath, "utf-8");
3744
3817
  this.meta = JSON.parse(metaContent);
3745
3818
  this.fileSummaries.clear();
3746
3819
  await this.loadFileSummariesRecursive(this.symbolicPath);
3747
- this.buildBM25Index();
3820
+ if (this.meta?.bm25Serialized) {
3821
+ this.bm25Index = BM25Index.deserialize(this.meta.bm25Serialized);
3822
+ } else {
3823
+ this.buildBM25Index();
3824
+ }
3748
3825
  }
3749
3826
  async loadFileSummariesRecursive(dir) {
3750
3827
  try {
@@ -4128,11 +4205,14 @@ class TypeScriptModule {
4128
4205
  const indexDir = getRaggrepDir(ctx.rootDir, ctx.config);
4129
4206
  this.symbolicIndex = new SymbolicIndex(indexDir, this.id);
4130
4207
  await this.symbolicIndex.initialize();
4208
+ const updatedFilepaths = [];
4131
4209
  for (const [filepath, summary] of this.pendingSummaries) {
4132
- this.symbolicIndex.addFile(summary);
4210
+ this.symbolicIndex.addFileIncremental(summary);
4211
+ updatedFilepaths.push(filepath);
4212
+ }
4213
+ if (updatedFilepaths.length > 0) {
4214
+ await this.symbolicIndex.saveIncremental(updatedFilepaths);
4133
4215
  }
4134
- this.symbolicIndex.buildBM25Index();
4135
- await this.symbolicIndex.save();
4136
4216
  this.literalIndex = new LiteralIndex(indexDir, this.id);
4137
4217
  await this.literalIndex.initialize();
4138
4218
  const indexedFilepaths = new Set;
@@ -4484,11 +4564,14 @@ class JsonModule {
4484
4564
  const indexDir = getRaggrepDir(ctx.rootDir, ctx.config);
4485
4565
  this.symbolicIndex = new SymbolicIndex(indexDir, this.id);
4486
4566
  await this.symbolicIndex.initialize();
4567
+ const updatedFilepaths = [];
4487
4568
  for (const [filepath, summary] of this.pendingSummaries) {
4488
- this.symbolicIndex.addFile(summary);
4569
+ this.symbolicIndex.addFileIncremental(summary);
4570
+ updatedFilepaths.push(filepath);
4571
+ }
4572
+ if (updatedFilepaths.length > 0) {
4573
+ await this.symbolicIndex.saveIncremental(updatedFilepaths);
4489
4574
  }
4490
- this.symbolicIndex.buildBM25Index();
4491
- await this.symbolicIndex.save();
4492
4575
  this.literalIndex = new LiteralIndex(indexDir, this.id);
4493
4576
  await this.literalIndex.initialize();
4494
4577
  const indexedFilepaths = new Set;
@@ -4815,11 +4898,14 @@ ${section.content}` : section.content,
4815
4898
  const indexDir = getRaggrepDir(ctx.rootDir, ctx.config);
4816
4899
  this.symbolicIndex = new SymbolicIndex(indexDir, this.id);
4817
4900
  await this.symbolicIndex.initialize();
4901
+ const updatedFilepaths = [];
4818
4902
  for (const [filepath, summary] of this.pendingSummaries) {
4819
- this.symbolicIndex.addFile(summary);
4903
+ this.symbolicIndex.addFileIncremental(summary);
4904
+ updatedFilepaths.push(filepath);
4905
+ }
4906
+ if (updatedFilepaths.length > 0) {
4907
+ await this.symbolicIndex.saveIncremental(updatedFilepaths);
4820
4908
  }
4821
- this.symbolicIndex.buildBM25Index();
4822
- await this.symbolicIndex.save();
4823
4909
  this.pendingSummaries.clear();
4824
4910
  }
4825
4911
  async search(query, ctx, options = {}) {
@@ -5135,6 +5221,7 @@ import { glob } from "glob";
5135
5221
  import * as fs7 from "fs/promises";
5136
5222
  import * as path15 from "path";
5137
5223
  import * as os3 from "os";
5224
+ import * as crypto2 from "crypto";
5138
5225
 
5139
5226
  // src/modules/registry.ts
5140
5227
  class ModuleRegistryImpl {
@@ -5511,6 +5598,9 @@ import { watch } from "chokidar";
5511
5598
  init_config2();
5512
5599
 
5513
5600
  // src/app/indexer/index.ts
5601
+ function computeContentHash(content) {
5602
+ return crypto2.createHash("sha256").update(content, "utf-8").digest("hex");
5603
+ }
5514
5604
  async function parallelMap(items, processor, concurrency) {
5515
5605
  const results = new Array(items.length);
5516
5606
  let nextIndex = 0;
@@ -5762,42 +5852,93 @@ async function ensureIndexFresh(rootDir, options = {}) {
5762
5852
  getIntrospection: (filepath) => introspection.getFile(filepath)
5763
5853
  };
5764
5854
  const totalFiles = currentFiles.length;
5765
- for (let i = 0;i < currentFiles.length; i++) {
5766
- const filepath = currentFiles[i];
5855
+ let completedCount = 0;
5856
+ const processIncrementalFile = async (filepath) => {
5767
5857
  const relativePath = path15.relative(rootDir, filepath);
5768
- const progress = `[${i + 1}/${totalFiles}]`;
5769
5858
  try {
5770
5859
  const stats = await fs7.stat(filepath);
5771
5860
  const lastModified = stats.mtime.toISOString();
5772
5861
  const existingEntry = manifest.files[relativePath];
5773
5862
  if (existingEntry && existingEntry.lastModified === lastModified) {
5774
- totalUnchanged++;
5775
- continue;
5863
+ completedCount++;
5864
+ return { relativePath, status: "unchanged" };
5776
5865
  }
5777
- logger.progress(` ${progress} Indexing: ${relativePath}`);
5778
5866
  const content = await fs7.readFile(filepath, "utf-8");
5779
- introspection.addFile(relativePath, content);
5780
- const fileIndex = await module.indexFile(relativePath, content, ctx);
5781
- if (fileIndex) {
5782
- await writeFileIndex(rootDir, module.id, relativePath, fileIndex, config);
5783
- manifest.files[relativePath] = {
5867
+ const contentHash = computeContentHash(content);
5868
+ if (existingEntry?.contentHash && existingEntry.contentHash === contentHash) {
5869
+ completedCount++;
5870
+ return {
5871
+ relativePath,
5872
+ status: "mtime_updated",
5784
5873
  lastModified,
5785
- chunkCount: fileIndex.chunks.length
5874
+ contentHash
5786
5875
  };
5787
- totalIndexed++;
5788
5876
  }
5877
+ completedCount++;
5878
+ logger.progress(` [${completedCount}/${totalFiles}] Indexing: ${relativePath}`);
5879
+ introspection.addFile(relativePath, content);
5880
+ const fileIndex = await module.indexFile(relativePath, content, ctx);
5881
+ if (!fileIndex) {
5882
+ return { relativePath, status: "unchanged" };
5883
+ }
5884
+ await writeFileIndex(rootDir, module.id, relativePath, fileIndex, config);
5885
+ return {
5886
+ relativePath,
5887
+ status: "indexed",
5888
+ lastModified,
5889
+ chunkCount: fileIndex.chunks.length,
5890
+ contentHash
5891
+ };
5789
5892
  } catch (error) {
5790
- logger.clearProgress();
5791
- logger.error(` ${progress} Error indexing ${relativePath}: ${error}`);
5893
+ completedCount++;
5894
+ return { relativePath, status: "error", error };
5792
5895
  }
5793
- }
5896
+ };
5897
+ const concurrency = options.concurrency ?? DEFAULT_CONCURRENCY;
5898
+ const results = await parallelMap(currentFiles, processIncrementalFile, concurrency);
5794
5899
  logger.clearProgress();
5795
- if (totalIndexed > 0 || totalRemoved > 0) {
5900
+ let mtimeUpdates = 0;
5901
+ for (const item of results) {
5902
+ if (!item.success) {
5903
+ continue;
5904
+ }
5905
+ const fileResult = item.value;
5906
+ switch (fileResult.status) {
5907
+ case "indexed":
5908
+ manifest.files[fileResult.relativePath] = {
5909
+ lastModified: fileResult.lastModified,
5910
+ chunkCount: fileResult.chunkCount,
5911
+ contentHash: fileResult.contentHash
5912
+ };
5913
+ totalIndexed++;
5914
+ break;
5915
+ case "mtime_updated":
5916
+ if (manifest.files[fileResult.relativePath]) {
5917
+ manifest.files[fileResult.relativePath] = {
5918
+ ...manifest.files[fileResult.relativePath],
5919
+ lastModified: fileResult.lastModified,
5920
+ contentHash: fileResult.contentHash
5921
+ };
5922
+ mtimeUpdates++;
5923
+ }
5924
+ totalUnchanged++;
5925
+ break;
5926
+ case "unchanged":
5927
+ totalUnchanged++;
5928
+ break;
5929
+ case "error":
5930
+ logger.error(` Error indexing ${fileResult.relativePath}: ${fileResult.error}`);
5931
+ break;
5932
+ }
5933
+ }
5934
+ const hasManifestChanges = totalIndexed > 0 || totalRemoved > 0 || mtimeUpdates > 0;
5935
+ if (hasManifestChanges) {
5796
5936
  manifest.lastUpdated = new Date().toISOString();
5797
5937
  await writeModuleManifest(rootDir, module.id, manifest, config);
5798
- if (module.finalize) {
5799
- await module.finalize(ctx);
5800
- }
5938
+ }
5939
+ const hasContentChanges = totalIndexed > 0 || totalRemoved > 0;
5940
+ if (hasContentChanges && module.finalize) {
5941
+ await module.finalize(ctx);
5801
5942
  }
5802
5943
  if (totalRemoved > 0) {
5803
5944
  await cleanupEmptyDirectories(indexPath);
@@ -5875,6 +6016,17 @@ async function indexWithModule(rootDir, files, module, config, verbose, introspe
5875
6016
  return { relativePath, status: "skipped" };
5876
6017
  }
5877
6018
  const content = await fs7.readFile(filepath, "utf-8");
6019
+ const contentHash = computeContentHash(content);
6020
+ if (existingEntry?.contentHash && existingEntry.contentHash === contentHash) {
6021
+ completedCount++;
6022
+ logger.debug(` [${completedCount}/${totalFiles}] Skipped ${relativePath} (content unchanged)`);
6023
+ return {
6024
+ relativePath,
6025
+ status: "skipped",
6026
+ lastModified,
6027
+ contentHash
6028
+ };
6029
+ }
5878
6030
  introspection.addFile(relativePath, content);
5879
6031
  completedCount++;
5880
6032
  logger.progress(` [${completedCount}/${totalFiles}] Processing: ${relativePath}`);
@@ -5888,7 +6040,8 @@ async function indexWithModule(rootDir, files, module, config, verbose, introspe
5888
6040
  relativePath,
5889
6041
  status: "indexed",
5890
6042
  lastModified,
5891
- chunkCount: fileIndex.chunks.length
6043
+ chunkCount: fileIndex.chunks.length,
6044
+ contentHash
5892
6045
  };
5893
6046
  } catch (error) {
5894
6047
  completedCount++;
@@ -5908,11 +6061,22 @@ async function indexWithModule(rootDir, files, module, config, verbose, introspe
5908
6061
  case "indexed":
5909
6062
  manifest.files[fileResult.relativePath] = {
5910
6063
  lastModified: fileResult.lastModified,
5911
- chunkCount: fileResult.chunkCount
6064
+ chunkCount: fileResult.chunkCount,
6065
+ contentHash: fileResult.contentHash
5912
6066
  };
5913
6067
  result.indexed++;
5914
6068
  break;
5915
6069
  case "skipped":
6070
+ if (fileResult.lastModified && fileResult.contentHash) {
6071
+ const existingEntry = manifest.files[fileResult.relativePath];
6072
+ if (existingEntry) {
6073
+ manifest.files[fileResult.relativePath] = {
6074
+ ...existingEntry,
6075
+ lastModified: fileResult.lastModified,
6076
+ contentHash: fileResult.contentHash
6077
+ };
6078
+ }
6079
+ }
5916
6080
  result.skipped++;
5917
6081
  break;
5918
6082
  case "error":
@@ -7515,4 +7679,4 @@ export {
7515
7679
  ConsoleLogger
7516
7680
  };
7517
7681
 
7518
- //# debugId=70C95E2CED98827164756E2164756E21
7682
+ //# debugId=0AC0D39FF430E09564756E2164756E21