sweet-search 2.5.2 → 2.5.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (155) hide show
  1. package/core/cli.js +24 -3
  2. package/core/graph/graph-expansion.js +215 -36
  3. package/core/graph/graph-extractor.js +196 -11
  4. package/core/graph/graph-search.js +395 -92
  5. package/core/graph/hcgs-generator.js +2 -1
  6. package/core/graph/index.js +2 -0
  7. package/core/graph/repo-map.js +28 -6
  8. package/core/graph/structural-answer-cues.js +168 -0
  9. package/core/graph/structural-callsite-hints.js +40 -0
  10. package/core/graph/structural-context-format.js +40 -0
  11. package/core/graph/structural-context.js +450 -0
  12. package/core/graph/structural-forward-push.js +156 -0
  13. package/core/graph/structural-header-context.js +19 -0
  14. package/core/graph/structural-importance.js +148 -0
  15. package/core/graph/structural-pagerank.js +197 -0
  16. package/core/graph/summary-manager.js +13 -9
  17. package/core/incremental-indexing/application/dirty-scan.mjs +236 -0
  18. package/core/incremental-indexing/application/file-watcher.mjs +197 -0
  19. package/core/incremental-indexing/application/maintenance-handlers.mjs +519 -0
  20. package/core/incremental-indexing/application/maintenance-worker.mjs +380 -0
  21. package/core/incremental-indexing/application/operator-cli.mjs +554 -0
  22. package/core/incremental-indexing/application/production-li-delta.mjs +192 -0
  23. package/core/incremental-indexing/application/production-reconciler-helpers.mjs +107 -0
  24. package/core/incremental-indexing/application/production-reconciler.mjs +583 -0
  25. package/core/incremental-indexing/application/reconciler.mjs +477 -0
  26. package/core/incremental-indexing/application/tombstone-injector.mjs +148 -0
  27. package/core/incremental-indexing/domain/chunk-identity.mjs +260 -0
  28. package/core/incremental-indexing/domain/encoder-deps.mjs +193 -0
  29. package/core/incremental-indexing/domain/encoder-input.mjs +225 -0
  30. package/core/incremental-indexing/domain/interval-autotune.mjs +255 -0
  31. package/core/incremental-indexing/domain/reconcile-counters.mjs +149 -0
  32. package/core/incremental-indexing/domain/watermark-scheduler.mjs +239 -0
  33. package/core/incremental-indexing/infrastructure/artifact-temp-sweep.mjs +163 -0
  34. package/core/incremental-indexing/infrastructure/baseline-readiness.mjs +121 -0
  35. package/core/incremental-indexing/infrastructure/dirty-set.mjs +233 -0
  36. package/core/incremental-indexing/infrastructure/graph-gc.mjs +314 -0
  37. package/core/incremental-indexing/infrastructure/hashing.mjs +298 -0
  38. package/core/incremental-indexing/infrastructure/hcgs-invalidation.mjs +182 -0
  39. package/core/incremental-indexing/infrastructure/li-segment-merge.mjs +278 -0
  40. package/core/incremental-indexing/infrastructure/li-segment-state.mjs +173 -0
  41. package/core/incremental-indexing/infrastructure/lockfile.mjs +119 -0
  42. package/core/incremental-indexing/infrastructure/maintenance-state-reader.mjs +283 -0
  43. package/core/incremental-indexing/infrastructure/manifest.mjs +194 -0
  44. package/core/incremental-indexing/infrastructure/path-filter.mjs +190 -0
  45. package/core/incremental-indexing/infrastructure/reader-heartbeat.mjs +201 -0
  46. package/core/incremental-indexing/infrastructure/schema-migrations.mjs +257 -0
  47. package/core/incremental-indexing/infrastructure/sparse-gram-delta.mjs +335 -0
  48. package/core/incremental-indexing/infrastructure/sqlite-fts5.mjs +176 -0
  49. package/core/incremental-indexing/infrastructure/staleness-display.mjs +105 -0
  50. package/core/incremental-indexing/infrastructure/tombstone-bitmap.mjs +234 -0
  51. package/core/incremental-indexing/infrastructure/vector-delta-writer.mjs +359 -0
  52. package/core/incremental-indexing/infrastructure/vector-gc.mjs +133 -0
  53. package/core/incremental-indexing/infrastructure/worktree-stamp.mjs +155 -0
  54. package/core/incremental-indexing/infrastructure/wsl2-detect.mjs +115 -0
  55. package/core/indexing/admission-policy.js +139 -0
  56. package/core/indexing/artifact-builder.js +29 -12
  57. package/core/indexing/ast-chunker.js +107 -30
  58. package/core/indexing/dedup/exemplar-selector.js +19 -1
  59. package/core/indexing/gitignore-filter.js +223 -0
  60. package/core/indexing/incremental-tracker.js +99 -30
  61. package/core/indexing/index-codebase-v21.js +6 -5
  62. package/core/indexing/index-maintainer.mjs +698 -6
  63. package/core/indexing/indexer-ann.js +99 -15
  64. package/core/indexing/indexer-build.js +158 -45
  65. package/core/indexing/indexer-empty-baseline.js +80 -0
  66. package/core/indexing/indexer-manifest.js +66 -0
  67. package/core/indexing/indexer-phases.js +56 -23
  68. package/core/indexing/indexer-sparse-gram.js +54 -13
  69. package/core/indexing/indexer-utils.js +26 -208
  70. package/core/indexing/indexing-file-policy.js +32 -7
  71. package/core/indexing/maintainer-launcher.mjs +137 -0
  72. package/core/indexing/merkle-tracker.js +251 -244
  73. package/core/indexing/model-pool.js +46 -5
  74. package/core/infrastructure/code-graph-repository.js +758 -6
  75. package/core/infrastructure/code-graph-visibility.js +157 -0
  76. package/core/infrastructure/codebase-repository.js +100 -13
  77. package/core/infrastructure/config/search.js +1 -1
  78. package/core/infrastructure/db-utils.js +118 -0
  79. package/core/infrastructure/dedup-hashing.js +10 -13
  80. package/core/infrastructure/hardware-capability.js +17 -7
  81. package/core/infrastructure/index.js +8 -2
  82. package/core/infrastructure/language-patterns/maps.js +4 -1
  83. package/core/infrastructure/language-patterns/registry-core.js +56 -17
  84. package/core/infrastructure/language-patterns/registry-object-oriented.js +12 -5
  85. package/core/infrastructure/language-patterns.js +69 -0
  86. package/core/infrastructure/model-registry.js +20 -0
  87. package/core/infrastructure/native-inference.js +7 -12
  88. package/core/infrastructure/native-resolver.js +52 -37
  89. package/core/infrastructure/native-sparse-gram.js +261 -20
  90. package/core/infrastructure/native-tokenizer.js +6 -15
  91. package/core/infrastructure/simd-distance.js +10 -16
  92. package/core/infrastructure/sparse-gram-delta-reader.js +76 -0
  93. package/core/infrastructure/structural-alias-resolver.js +122 -0
  94. package/core/infrastructure/structural-candidate-ranker.js +34 -0
  95. package/core/infrastructure/structural-context-repository.js +472 -0
  96. package/core/infrastructure/structural-context-utils.js +51 -0
  97. package/core/infrastructure/structural-graph-signals.js +121 -0
  98. package/core/infrastructure/structural-qualified-resolution.js +15 -0
  99. package/core/infrastructure/structural-source-definitions.js +100 -0
  100. package/core/infrastructure/tombstone-bitmap-reader.js +139 -0
  101. package/core/infrastructure/tree-sitter-provider.js +811 -37
  102. package/core/prompt-optimization/data/p7-final/sweet-search-system-prompt.md +50 -0
  103. package/core/query/query-router.js +55 -5
  104. package/core/ranking/file-kind-ranking.js +2192 -15
  105. package/core/ranking/late-interaction-index.js +87 -12
  106. package/core/search/cli-decoration.js +290 -0
  107. package/core/search/context-expander.js +988 -78
  108. package/core/search/index.js +1 -0
  109. package/core/search/output-policy.js +275 -0
  110. package/core/search/search-anchor.js +499 -0
  111. package/core/search/search-boost.js +93 -1
  112. package/core/search/search-cli.js +61 -204
  113. package/core/search/search-hybrid.js +250 -10
  114. package/core/search/search-pattern-chunks.js +57 -8
  115. package/core/search/search-pattern-planner.js +68 -9
  116. package/core/search/search-pattern-prefilter.js +30 -10
  117. package/core/search/search-pattern-ripgrep.js +40 -4
  118. package/core/search/search-pattern-sparse-overlay.js +256 -0
  119. package/core/search/search-pattern.js +117 -29
  120. package/core/search/search-postprocess.js +479 -5
  121. package/core/search/search-read-semantic.js +260 -23
  122. package/core/search/search-read.js +82 -64
  123. package/core/search/search-reader-pin.js +71 -0
  124. package/core/search/search-rrf.js +279 -0
  125. package/core/search/search-semantic.js +110 -5
  126. package/core/search/search-server.js +130 -57
  127. package/core/search/search-trace.js +107 -0
  128. package/core/search/server-identity.js +93 -0
  129. package/core/search/session-daemon-prewarm.mjs +33 -10
  130. package/core/search/sweet-search.js +399 -7
  131. package/core/skills/sweet-index/SKILL.md +8 -6
  132. package/core/vector-store/binary-hnsw-index.js +194 -30
  133. package/core/vector-store/float-vector-store.js +96 -6
  134. package/core/vector-store/hnsw-index.js +220 -49
  135. package/eval/agent-read-workflows/bin/_ss-helpers.mjs +471 -0
  136. package/eval/agent-read-workflows/bin/ss-find +15 -0
  137. package/eval/agent-read-workflows/bin/ss-grep +12 -0
  138. package/eval/agent-read-workflows/bin/ss-read +14 -0
  139. package/eval/agent-read-workflows/bin/ss-search +18 -0
  140. package/eval/agent-read-workflows/bin/ss-semantic +12 -0
  141. package/eval/agent-read-workflows/bin/ss-trace +11 -0
  142. package/mcp/read-tool.js +109 -0
  143. package/mcp/server.js +55 -15
  144. package/mcp/tool-handlers.js +14 -124
  145. package/mcp/trace-tool.js +81 -0
  146. package/package.json +25 -10
  147. package/scripts/hooks/intercept-read.mjs +55 -0
  148. package/scripts/hooks/remind-tools.mjs +40 -0
  149. package/scripts/init.js +698 -54
  150. package/scripts/inject-agent-instructions.js +431 -0
  151. package/scripts/install-prompt-reminders.js +188 -0
  152. package/scripts/install-tool-enforcement.js +220 -0
  153. package/scripts/smoke-test.js +12 -9
  154. package/scripts/uninstall.js +276 -18
  155. package/scripts/write-claude-rules.js +110 -0
@@ -59,9 +59,8 @@ export function resolveSearchSymbolFilter(options = {}) {
59
59
  export function buildChunkLocationMap(liIndex) {
60
60
  const map = new Map();
61
61
 
62
- for (const [id, doc] of liIndex.documents) {
63
- const meta = doc.metadata;
64
- if (!meta?.file || meta.startLine == null || meta.endLine == null) continue;
62
+ const addInterval = (id, meta) => {
63
+ if (!meta?.file || meta.startLine == null || meta.endLine == null) return;
65
64
 
66
65
  let bucket = map.get(meta.file);
67
66
  if (!bucket) {
@@ -75,11 +74,24 @@ export function buildChunkLocationMap(liIndex) {
75
74
  type: meta.type || null,
76
75
  name: meta.name || null,
77
76
  });
77
+ };
78
+
79
+ for (const [id, doc] of liIndex.documents) {
80
+ addInterval(id, doc.metadata);
81
+ }
82
+
83
+ for (const [id, ptr] of liIndex.aliasPointers || []) {
84
+ addInterval(id, ptr.metadata);
78
85
  }
79
86
 
80
87
  // Sort each file's intervals by startLine for binary search
81
88
  for (const bucket of map.values()) {
82
89
  bucket.sort((a, b) => a.startLine - b.startLine);
90
+ let maxEnd = -Infinity;
91
+ for (const interval of bucket) {
92
+ maxEnd = Math.max(maxEnd, interval.endLine);
93
+ interval._maxEndSoFar = maxEnd;
94
+ }
83
95
  }
84
96
 
85
97
  return map;
@@ -90,6 +102,7 @@ export function findChunkIntervalForLine(intervals, lineNumber) {
90
102
 
91
103
  let lo = 0;
92
104
  let hi = intervals.length - 1;
105
+ let candidateIdx = -1;
93
106
 
94
107
  while (lo <= hi) {
95
108
  const mid = (lo + hi) >>> 1;
@@ -97,13 +110,38 @@ export function findChunkIntervalForLine(intervals, lineNumber) {
97
110
 
98
111
  if (lineNumber < iv.startLine) {
99
112
  hi = mid - 1;
100
- } else if (lineNumber > iv.endLine) {
101
- lo = mid + 1;
102
113
  } else {
103
- return { interval: iv, index: mid };
114
+ candidateIdx = mid;
115
+ lo = mid + 1;
104
116
  }
105
117
  }
106
118
 
119
+ if (candidateIdx < 0) return null;
120
+
121
+ let best = null;
122
+ let bestIdx = -1;
123
+ for (let i = candidateIdx; i >= 0; i--) {
124
+ const iv = intervals[i];
125
+ if (iv.startLine > lineNumber) continue;
126
+ if (iv._maxEndSoFar != null && iv._maxEndSoFar < lineNumber) break;
127
+ if (iv.endLine < lineNumber) continue;
128
+
129
+ // Prefer the tightest containing interval; if spans tie, prefer the
130
+ // later-starting interval because it is usually the nested symbol chunk.
131
+ if (
132
+ !best ||
133
+ (iv.endLine - iv.startLine) < (best.endLine - best.startLine) ||
134
+ ((iv.endLine - iv.startLine) === (best.endLine - best.startLine) && iv.startLine > best.startLine)
135
+ ) {
136
+ best = iv;
137
+ bestIdx = i;
138
+ }
139
+ }
140
+
141
+ if (best) {
142
+ return { interval: best, index: bestIdx };
143
+ }
144
+
107
145
  return null;
108
146
  }
109
147
 
@@ -270,12 +308,18 @@ export function buildBareGrepResults(matches, options = {}) {
270
308
  * Rebuilds only when the LI index document count changes (re-index).
271
309
  */
272
310
  export function getChunkLocationMap() {
273
- const currentSize = this.lateInteractionIndex.documents.size;
274
- if (this._chunkLocationMap && this._chunkLocationMapSize === currentSize) {
311
+ const currentSize = this.lateInteractionIndex.documents.size
312
+ + (this.lateInteractionIndex.aliasPointers?.size || 0);
313
+ if (
314
+ this._chunkLocationMap
315
+ && this._chunkLocationMapSize === currentSize
316
+ && this._chunkLocationMapIndex === this.lateInteractionIndex
317
+ ) {
275
318
  return this._chunkLocationMap;
276
319
  }
277
320
  this._chunkLocationMap = buildChunkLocationMap(this.lateInteractionIndex);
278
321
  this._chunkLocationMapSize = currentSize;
322
+ this._chunkLocationMapIndex = this.lateInteractionIndex;
279
323
  return this._chunkLocationMap;
280
324
  }
281
325
 
@@ -314,6 +358,11 @@ export function getCodebaseChunkTypeMap(searcher) {
314
358
 
315
359
  for (const bucket of map.values()) {
316
360
  bucket.sort((a, b) => a.startLine - b.startLine);
361
+ let maxEnd = -Infinity;
362
+ for (const interval of bucket) {
363
+ maxEnd = Math.max(maxEnd, interval.endLine);
364
+ interval._maxEndSoFar = maxEnd;
365
+ }
317
366
  }
318
367
 
319
368
  searcher._codebaseChunkTypeMap = map;
@@ -13,9 +13,10 @@
13
13
  import {
14
14
  extractLiteralClauses, runLiteralPrefilterClauses, querySparseGramCandidates,
15
15
  ensureSparseGramIndex,
16
+ sparseDeltaOverlayHasChanges, getSparseGramAllFilesWithOverlay,
16
17
  hasCaseInsensitiveRegexFlag, nativeGrepFilesWithMatches,
17
18
  nativeGrepFilesWithMatchesFixed, nativeGrepLines, nativeGrepFull,
18
- getSparseGramAllFiles, queryAndGrepLines, queryAndGrepFull,
19
+ queryAndGrepLines, queryAndGrepFull,
19
20
  searchLines, searchFull, resolveSparseSymbolMask,
20
21
  } from './search-pattern-prefilter.js';
21
22
  import { CODE_FILE_EXTENSIONS } from '../infrastructure/constants.js';
@@ -30,10 +31,12 @@ import { _getRgCapabilities, runRipgrepFilesWithMatches, runRipgrepJson, normali
30
31
  * Native grep bypasses ripgrep, so we must normalize here.
31
32
  */
32
33
  function normalizeNativeMatches(matches, searchDir) {
33
- for (const m of matches) {
34
- m.file = normalizeSearchPath(searchDir, m.file);
34
+ const out = [];
35
+ for (const m of matches || []) {
36
+ const file = normalizeSearchPath(searchDir, m.file);
37
+ if (file) out.push({ ...m, file });
35
38
  }
36
- return matches;
39
+ return out;
37
40
  }
38
41
 
39
42
  // Cached once at module load — passed to Rust for code extension filtering.
@@ -61,7 +64,8 @@ export async function generateRegexMatches(searcher, regex, searchDir, options =
61
64
  // Rust internally: tries gram narrowing → if eligible, greps candidates; if not, greps all files.
62
65
  // Eliminates the JS planner, separate getSparseGramAllFiles call, and multiple NAPI crossings.
63
66
  const useGramIndex = options.useGramIndex ?? options.gramIndex ?? true;
64
- const canUseUnifiedSearch = !fixedString && globs.length === 0;
67
+ const hasSparseDeltaOverlay = sparseDeltaOverlayHasChanges(searcher, options);
68
+ const canUseUnifiedSearch = !fixedString && globs.length === 0 && !hasSparseDeltaOverlay;
65
69
  if (canUseUnifiedSearch) {
66
70
  const sparseGramIndex = ensureSparseGramIndex(searcher, options);
67
71
  if (sparseGramIndex) {
@@ -173,6 +177,62 @@ export async function generateRegexMatches(searcher, regex, searchDir, options =
173
177
  const gramCandidateFiles = gramLookupResult?.candidateFiles || 0;
174
178
  const gramTotalFiles = gramLookupResult?.totalFiles || 0;
175
179
  const gramSelectivity = gramTotalFiles > 0 ? gramCandidateFiles / gramTotalFiles : null;
180
+ const narrowedThreshold = options.narrowedJsonThreshold ?? 300;
181
+ const directJsonThreshold = options.directJsonFileThreshold ?? 4096;
182
+
183
+ if (gramLookupResult?.eligible === true && Array.isArray(gramLookupResult.files) && gramLookupResult.files.length === 0) {
184
+ return {
185
+ indexedMatches: [],
186
+ overlayMatches: [],
187
+ matchingFiles: [],
188
+ stats: {
189
+ nativeGrepUsed: false,
190
+ candidateGenTime_ms: Math.round(performance.now() - start),
191
+ grepTime_ms: 0,
192
+ literalFilterTime_ms: 0,
193
+ gramLookupTime_ms: Math.round(gramLookupTime),
194
+ filesConsidered: gramTotalFiles,
195
+ filesScanned: 0,
196
+ filesSkipped: 0,
197
+ dirtyOverlayFiles: 0,
198
+ candidateFilesBeforeFilter: 0,
199
+ candidateFilesAfterFilter: 0,
200
+ candidateReductionRatio: 0,
201
+ literalExtractionHit: literalPlan.clauses.length > 0,
202
+ literalExtractionSource: literalPlan.source,
203
+ gramLookupReason: gramLookupResult.reason || 'ok',
204
+ prefilterDiscarded: false,
205
+ prefilterDiscardedCount: 0,
206
+ denseGramsTouched: gramLookupResult.denseGramsTouched || 0,
207
+ sparseGramsTouched: gramLookupResult.sparseGramsTouched || 0,
208
+ gramFalsePositiveRatio: 0,
209
+ grepStrategy: 'none',
210
+ plannerRoute: 'empty_gram_candidates',
211
+ gramSelectivity,
212
+ plannerInputs: {
213
+ narrowedFileCount: 0,
214
+ gramCandidateFiles,
215
+ gramTotalFiles,
216
+ narrowedThreshold,
217
+ directJsonThreshold,
218
+ skipLiteralPrefilter: true,
219
+ },
220
+ symbolTypeFilter,
221
+ trackerLastIndex: null,
222
+ grepMatches: 0,
223
+ stageTiming: {
224
+ literalExtractionTime_ms: +literalExtractionTime.toFixed(3),
225
+ gramQueryTime_ms: +gramLookupTime.toFixed(3),
226
+ regexBuildTime_ms: 0,
227
+ literalPrefilterTime_ms: 0,
228
+ plannerTime_ms: 0,
229
+ grepVerifyTime_ms: 0,
230
+ napiOverheadTime_ms: 0,
231
+ resultMaterializationTime_ms: 0,
232
+ },
233
+ },
234
+ };
235
+ }
176
236
 
177
237
  const fileGramTooBroad = gramLookupResult?.eligible === false && gramLookupResult?.reason === 'too_broad';
178
238
 
@@ -190,7 +250,9 @@ export async function generateRegexMatches(searcher, regex, searchDir, options =
190
250
  const sparseForAllFiles = (!fixedString && globs.length === 0)
191
251
  ? ensureSparseGramIndex(searcher, options)
192
252
  : null;
193
- const allIndexedFiles = sparseForAllFiles ? getSparseGramAllFiles(sparseForAllFiles) : null;
253
+ const allIndexedFiles = sparseForAllFiles
254
+ ? getSparseGramAllFilesWithOverlay(searcher, sparseForAllFiles, options)
255
+ : null;
194
256
  const canNativeGrepAll = Array.isArray(allIndexedFiles) && allIndexedFiles.length > 0;
195
257
 
196
258
  const skipLiteralPrefilter = gramTooBroad || gramSaysBroad || canNativeGrepAll;
@@ -247,9 +309,6 @@ export async function generateRegexMatches(searcher, regex, searchDir, options =
247
309
  // Cost-model query planner
248
310
  // ==========================================================================
249
311
 
250
- const narrowedThreshold = options.narrowedJsonThreshold ?? 300;
251
- const directJsonThreshold = options.directJsonFileThreshold ?? 4096;
252
-
253
312
  let plannerRoute;
254
313
  let grepStrategy;
255
314
 
@@ -23,6 +23,7 @@ import {
23
23
  searchLines as _searchLines,
24
24
  searchFull as _searchFull,
25
25
  } from '../infrastructure/native-sparse-gram.js';
26
+ import { applySparseDeltaOverlay, liveOverlayFiles, loadSparseDeltaOverlay } from './search-pattern-sparse-overlay.js';
26
27
 
27
28
  // Re-export for search-pattern.js (avoids circular import through native-sparse-gram.js)
28
29
  export const resolveSparseSymbolMask = _resolveSparseSymbolMask;
@@ -35,8 +36,13 @@ export const queryAndGrepLines = _queryAndGrepLines;
35
36
  export const queryAndGrepFull = _queryAndGrepFull;
36
37
  export const searchLines = _searchLines;
37
38
  export const searchFull = _searchFull;
38
- import { DB_PATHS, PROJECT_ROOT } from '../infrastructure/config/index.js';
39
- import { CODE_FILE_EXTENSIONS } from '../infrastructure/constants.js';
39
+ export {
40
+ applySparseDeltaOverlay,
41
+ getSparseGramAllFilesWithOverlay,
42
+ loadSparseDeltaOverlay,
43
+ sparseDeltaOverlayHasChanges,
44
+ } from './search-pattern-sparse-overlay.js';
45
+ import { DB_PATHS } from '../infrastructure/config/index.js';
40
46
  import { isRipgrepCodePath, resolveSearchSymbolFilter } from './search-pattern-chunks.js';
41
47
 
42
48
  // =============================================================================
@@ -251,12 +257,18 @@ export function ensureSparseGramIndex(searcher, options = {}) {
251
257
  if (!searcher) return null;
252
258
  const useGramIndex = options.useGramIndex ?? options.gramIndex ?? true;
253
259
  if (!useGramIndex) return null;
254
- if (searcher.sparseGramIndex) return searcher.sparseGramIndex;
255
-
256
260
  const indexPath = options.sparseGramIndexPath || searcher.sparseGramIndexPath || DB_PATHS.sparseGramIndex;
261
+ if (searcher.sparseGramIndex) {
262
+ if (!searcher._sparseGramLoadedPath || searcher._sparseGramLoadedPath === indexPath) {
263
+ return searcher.sparseGramIndex;
264
+ }
265
+ searcher.sparseGramIndex = null;
266
+ searcher._sparseGramLoadedPath = null;
267
+ }
257
268
  const loaded = loadSparseGramIndex(indexPath);
258
269
  if (loaded) {
259
270
  searcher.sparseGramIndex = loaded;
271
+ searcher._sparseGramLoadedPath = indexPath;
260
272
  }
261
273
  return loaded;
262
274
  }
@@ -289,6 +301,8 @@ export function querySparseGramCandidates(searcher, literalClauses, options = {}
289
301
  }
290
302
 
291
303
  try {
304
+ const symbolMask = _resolveSparseSymbolMask(resolveSearchSymbolFilter(options));
305
+ const overlay = loadSparseDeltaOverlay(searcher, options);
292
306
  const sparseGramIndex = ensureSparseGramIndex(searcher, options);
293
307
  if (!sparseGramIndex) {
294
308
  return {
@@ -305,7 +319,6 @@ export function querySparseGramCandidates(searcher, literalClauses, options = {}
305
319
 
306
320
  const maxCandidateFiles = options.maxGramCandidateFiles ?? 100000;
307
321
  const maxCandidateRatio = options.maxGramCandidateRatio ?? 1.0;
308
- const symbolMask = _resolveSparseSymbolMask(resolveSearchSymbolFilter(options));
309
322
  const combined = new Set();
310
323
  let totalFiles = 0;
311
324
  let gramsUsed = 0;
@@ -346,13 +359,22 @@ export function querySparseGramCandidates(searcher, literalClauses, options = {}
346
359
  gramsUsed += result.gramsUsed || 0;
347
360
  denseGramsTouched += result.denseGramsTouched || 0;
348
361
  sparseGramsTouched += result.sparseGramsTouched || 0;
349
- const clauseFiles = Array.isArray(result.files)
362
+ const baseClauseFiles = Array.isArray(result.files)
350
363
  ? result.files.filter(isRipgrepCodePath)
351
364
  : [];
365
+ const clauseFiles = applySparseDeltaOverlay(
366
+ baseClauseFiles,
367
+ overlay,
368
+ symbolMask || 0,
369
+ searcher?.projectRoot || options.projectRoot,
370
+ clause,
371
+ sparseGramIndex,
372
+ );
373
+ const clauseTotalFiles = (result.totalFiles || 0) + liveOverlayFiles(overlay, symbolMask || 0).length;
374
+ totalFiles = Math.max(totalFiles, clauseTotalFiles);
352
375
  if (
353
- clauseFiles.length === 0 ||
354
376
  clauseFiles.length > maxCandidateFiles ||
355
- (result.totalFiles > 0 && (clauseFiles.length / result.totalFiles) > maxCandidateRatio)
377
+ (clauseTotalFiles > 0 && (clauseFiles.length / clauseTotalFiles) > maxCandidateRatio)
356
378
  ) {
357
379
  return {
358
380
  eligible: false,
@@ -370,7 +392,6 @@ export function querySparseGramCandidates(searcher, literalClauses, options = {}
370
392
 
371
393
  const files = [...combined];
372
394
  if (
373
- files.length === 0 ||
374
395
  files.length > maxCandidateFiles ||
375
396
  (totalFiles > 0 && (files.length / totalFiles) > maxCandidateRatio)
376
397
  ) {
@@ -409,4 +430,3 @@ export function querySparseGramCandidates(searcher, literalClauses, options = {}
409
430
  };
410
431
  }
411
432
  }
412
-
@@ -10,7 +10,7 @@
10
10
  */
11
11
 
12
12
  import { spawn, execFileSync } from 'child_process';
13
- import { existsSync, readdirSync } from 'fs';
13
+ import { existsSync, realpathSync, readdirSync } from 'fs';
14
14
  import { StringDecoder } from 'string_decoder';
15
15
  import path from 'path';
16
16
  import { RIPGREP_CODE_TYPE_GLOB } from '../infrastructure/constants.js';
@@ -134,7 +134,28 @@ export function normalizeSearchPath(searchDir, filePath) {
134
134
  const relative = path.isAbsolute(filePath)
135
135
  ? path.relative(searchDir, filePath)
136
136
  : filePath;
137
- return relative.replace(/\\/g, '/').replace(/^\.\//, '');
137
+ const normalized = normalizeRelativeSearchPath(relative);
138
+ if (normalized) return normalized;
139
+ if (path.isAbsolute(filePath)) {
140
+ try {
141
+ const realRelative = path.relative(
142
+ realpathSync.native(searchDir),
143
+ realpathSync.native(filePath),
144
+ );
145
+ return normalizeRelativeSearchPath(realRelative);
146
+ } catch {
147
+ return null;
148
+ }
149
+ }
150
+ return null;
151
+ }
152
+
153
+ function normalizeRelativeSearchPath(relative) {
154
+ const normalized = relative.replace(/\\/g, '/').replace(/^\.\//, '');
155
+ if (!normalized || normalized === '..' || normalized.startsWith('../') || normalized.includes('/../')) {
156
+ return null;
157
+ }
158
+ return normalized;
138
159
  }
139
160
 
140
161
  export function chunkRipgrepFiles(files) {
@@ -197,6 +218,10 @@ async function executeRipgrep({
197
218
  outputMode === 'json' ? '--json' : '--files-with-matches',
198
219
  '--type-add', RIPGREP_CODE_TYPE,
199
220
  '--type', 'code',
221
+ // Suppress I/O error messages (e.g. a candidate file deleted during the
222
+ // reconcile window). ripgrep still exits 2 on such errors but stderr
223
+ // stays empty; pattern-syntax errors still surface on stderr.
224
+ '--no-messages',
200
225
  ];
201
226
 
202
227
  if (maxCount > 0) args.push('--max-count', String(maxCount));
@@ -238,7 +263,11 @@ async function executeRipgrep({
238
263
  proc.stderr.on('data', (chunk) => { stderr += chunk; });
239
264
 
240
265
  proc.on('close', (code) => {
241
- if (code !== 0 && code !== 1) {
266
+ // code 2 with empty stderr = benign I/O error (a candidate file vanished
267
+ // mid-flight under concurrent reconcile); use whatever matched rather
268
+ // than failing the whole query. Real errors (bad regex) keep stderr.
269
+ const benignIoError = code === 2 && stderr.trim() === '';
270
+ if (code !== 0 && code !== 1 && !benignIoError) {
242
271
  reject(new Error(`ripgrep failed (code ${code}): ${stderr.slice(0, 200)}`));
243
272
  return;
244
273
  }
@@ -371,6 +400,10 @@ async function executeRipgrepStreaming({
371
400
  '--json',
372
401
  '--type-add', RIPGREP_CODE_TYPE,
373
402
  '--type', 'code',
403
+ // Suppress I/O error messages (e.g. a candidate file deleted during the
404
+ // reconcile window). ripgrep still exits 2 on such errors but stderr
405
+ // stays empty; pattern-syntax errors still surface on stderr.
406
+ '--no-messages',
374
407
  ];
375
408
 
376
409
  if (maxCount > 0) args.push('--max-count', String(maxCount));
@@ -472,7 +505,10 @@ async function executeRipgrepStreaming({
472
505
  }
473
506
  }
474
507
 
475
- if (killed || code === 0 || code === 1) {
508
+ // code 2 with empty stderr = benign I/O error (a candidate file vanished
509
+ // mid-flight under concurrent reconcile); resolve with what matched.
510
+ const benignIoError = code === 2 && stderr.trim() === '';
511
+ if (killed || code === 0 || code === 1 || benignIoError) {
476
512
  resolve(matches);
477
513
  return;
478
514
  }
@@ -0,0 +1,256 @@
1
+ import fs from 'fs';
2
+ import path from 'path';
3
+ import {
4
+ extractSparseGramRequiredGrams,
5
+ getSparseGramAllFiles as _getSparseGramAllFiles,
6
+ resolveSparseSymbolMask as _resolveSparseSymbolMask,
7
+ } from '../infrastructure/native-sparse-gram.js';
8
+ import { resolveLatestSparseGramDeltaRecords } from '../infrastructure/sparse-gram-delta-reader.js';
9
+ import { DB_PATHS, PROJECT_ROOT } from '../infrastructure/config/index.js';
10
+ import { isRipgrepCodePath, resolveSearchSymbolFilter } from './search-pattern-chunks.js';
11
+
12
+ const RECONCILE_MANIFEST_FILENAME = 'reconcile-manifest.json';
13
+
14
+ function sparseGramIndexPath(searcher, options = {}) {
15
+ return options.sparseGramIndexPath || searcher?.sparseGramIndexPath || DB_PATHS.sparseGramIndex;
16
+ }
17
+
18
+ function normalizeDeltaPath(filePath, projectRoot = PROJECT_ROOT) {
19
+ if (!filePath || typeof filePath !== 'string') return null;
20
+ let normalized = filePath;
21
+ if (path.isAbsolute(normalized)) {
22
+ normalized = relativeInsideRoot(projectRoot, normalized);
23
+ return normalized || null;
24
+ }
25
+ normalized = normalized.replace(/\\/g, '/').replace(/^\.\//, '');
26
+ return normalized && !normalized.startsWith('../') ? normalized : null;
27
+ }
28
+
29
+ function relativeInsideRoot(projectRoot, absolutePath) {
30
+ const root = path.resolve(projectRoot);
31
+ const candidate = path.resolve(absolutePath);
32
+ const lexical = safeRelative(root, candidate);
33
+ if (lexical !== null) return lexical;
34
+
35
+ const rootReal = realpathOrNull(root);
36
+ const candidateReal = materializedRealpath(candidate);
37
+ if (!rootReal || !candidateReal) return null;
38
+ return safeRelative(rootReal, candidateReal);
39
+ }
40
+
41
+ function safeRelative(root, candidate) {
42
+ const rel = path.relative(root, candidate).replace(/\\/g, '/').replace(/^\.\//, '');
43
+ if (!rel || rel.startsWith('../') || path.isAbsolute(rel)) return null;
44
+ return rel;
45
+ }
46
+
47
+ function realpathOrNull(filePath) {
48
+ try {
49
+ return fs.realpathSync.native(filePath);
50
+ } catch {
51
+ return null;
52
+ }
53
+ }
54
+
55
+ function materializedRealpath(filePath) {
56
+ let current = filePath;
57
+ const rest = [];
58
+ while (current && current !== path.dirname(current)) {
59
+ const real = realpathOrNull(current);
60
+ if (real) return rest.length > 0 ? path.join(real, ...rest.reverse()) : real;
61
+ rest.push(path.basename(current));
62
+ current = path.dirname(current);
63
+ }
64
+ const rootReal = realpathOrNull(current);
65
+ return rootReal && rest.length > 0 ? path.join(rootReal, ...rest.reverse()) : rootReal;
66
+ }
67
+
68
+ function sparseManifestStateDirs(searcher, options = {}, indexPath) {
69
+ return [
70
+ options.manifestStateDir,
71
+ searcher?._manifestStateDir,
72
+ indexPath ? path.dirname(indexPath) : null,
73
+ ].filter((dir, idx, dirs) => typeof dir === 'string' && dir && dirs.indexOf(dir) === idx);
74
+ }
75
+
76
+ // Negative cache for stateDirs known to lack reconcile-manifest.json.
77
+ // 1s TTL bounds staleness when reconcile later starts publishing.
78
+ const _sparseManifestAbsentAt = new Map();
79
+ const SPARSE_MANIFEST_ABSENT_TTL_MS = 1000;
80
+
81
+ export function _resetSparseManifestAbsentCache() {
82
+ _sparseManifestAbsentAt.clear();
83
+ }
84
+
85
+ function readSparseManifest(searcher, options, indexPath) {
86
+ const dirs = sparseManifestStateDirs(searcher, options, indexPath);
87
+ const now = Date.now();
88
+ for (const dir of dirs) {
89
+ const absentAt = _sparseManifestAbsentAt.get(dir);
90
+ if (absentAt !== undefined && now - absentAt < SPARSE_MANIFEST_ABSENT_TTL_MS) {
91
+ continue;
92
+ }
93
+ const manifest = readSparseManifestFromDir(dir);
94
+ if (manifest) {
95
+ _sparseManifestAbsentAt.delete(dir);
96
+ return manifest;
97
+ }
98
+ _sparseManifestAbsentAt.set(dir, now);
99
+ }
100
+ return null;
101
+ }
102
+
103
+ function readSparseManifestFromDir(dir) {
104
+ try {
105
+ const manifestPath = path.join(dir, RECONCILE_MANIFEST_FILENAME);
106
+ if (!fs.existsSync(manifestPath)) return null;
107
+ const manifest = JSON.parse(fs.readFileSync(manifestPath, 'utf-8'));
108
+ const epoch = manifest?.sparseGram?.epoch ?? manifest?.epoch;
109
+ return {
110
+ epoch: Number.isInteger(epoch) ? epoch : null,
111
+ weightsId: typeof manifest?.sparseGram?.weightsId === 'string'
112
+ ? manifest.sparseGram.weightsId
113
+ : null,
114
+ deltas: Array.isArray(manifest?.sparseGram?.deltas)
115
+ ? manifest.sparseGram.deltas.filter((entry) => typeof entry === 'string')
116
+ : null,
117
+ stateDir: dir,
118
+ };
119
+ } catch {
120
+ return null;
121
+ }
122
+ }
123
+
124
+ function resolveDeltaSegments(segments, stateDir) {
125
+ if (!Array.isArray(segments)) return null;
126
+ const out = [];
127
+ for (const segment of segments) {
128
+ if (path.isAbsolute(segment)) {
129
+ out.push(segment);
130
+ } else {
131
+ if (stateDir) out.push(path.join(stateDir, segment));
132
+ out.push(segment);
133
+ }
134
+ }
135
+ return [...new Set(out)];
136
+ }
137
+
138
+ function sparseManifestEpoch(searcher, options, manifestInfo) {
139
+ if (Number.isInteger(options.manifestEpoch)) return options.manifestEpoch;
140
+ if (Number.isInteger(searcher?.manifestEpoch)) return searcher.manifestEpoch;
141
+ const repoEpoch = searcher?.codebaseRepo?.getManifestEpoch?.();
142
+ if (Number.isInteger(repoEpoch)) return repoEpoch;
143
+ const graphEpoch = searcher?.graphSearch?.getManifestEpoch?.();
144
+ if (Number.isInteger(graphEpoch)) return graphEpoch;
145
+ return manifestInfo?.epoch ?? null;
146
+ }
147
+
148
+ function sparseWeightsId(searcher, options, manifestInfo) {
149
+ if (typeof options.sparseGramWeightsId === 'string') return options.sparseGramWeightsId;
150
+ if (typeof searcher?.sparseGramWeightsId === 'string') return searcher.sparseGramWeightsId;
151
+ return manifestInfo?.weightsId ?? null;
152
+ }
153
+
154
+ function sparseDeltaSegments(searcher, options, manifestInfo) {
155
+ if (Array.isArray(options.sparseGramDeltas)) return options.sparseGramDeltas;
156
+ if (Array.isArray(searcher?.sparseGramDeltas)) return searcher.sparseGramDeltas;
157
+ return Array.isArray(manifestInfo?.deltas) ? manifestInfo.deltas : null;
158
+ }
159
+
160
+ function normalizeRecordGrams(grams) {
161
+ if (!Array.isArray(grams) || grams.length === 0) return null;
162
+ const out = new Set();
163
+ for (const entry of grams) {
164
+ const gram = Array.isArray(entry) ? entry[0] : entry;
165
+ if (typeof gram === 'string' && gram.length > 0) out.add(gram);
166
+ }
167
+ return out.size > 0 ? out : null;
168
+ }
169
+
170
+ function recordMatchesClause(record, literals, sparseGramIndex) {
171
+ if (!Array.isArray(literals)) return true;
172
+ if (!record.grams) return true;
173
+ const required = extractSparseGramRequiredGrams(sparseGramIndex, literals);
174
+ if (!required) return true;
175
+ if (!required.eligible || !Array.isArray(required.grams) || required.grams.length === 0) return false;
176
+ return required.grams.every((gram) => record.grams.has(gram));
177
+ }
178
+
179
+ export function loadSparseDeltaOverlay(searcher, options = {}) {
180
+ const indexPath = sparseGramIndexPath(searcher, options);
181
+ if (!indexPath) return null;
182
+ const manifestInfo = readSparseManifest(searcher, options, indexPath);
183
+ const maxEpoch = sparseManifestEpoch(searcher, options, manifestInfo);
184
+ const manifestSegments = sparseDeltaSegments(searcher, options, manifestInfo);
185
+ if (!Array.isArray(manifestSegments)) return null;
186
+ const segments = resolveDeltaSegments(
187
+ manifestSegments,
188
+ manifestInfo?.stateDir || sparseManifestStateDirs(searcher, options, indexPath)[0],
189
+ );
190
+ if (!Array.isArray(segments) || segments.length === 0) return null;
191
+ const latest = resolveLatestSparseGramDeltaRecords(indexPath, {
192
+ ...(Number.isInteger(maxEpoch) ? { maxEpoch } : {}),
193
+ segments,
194
+ });
195
+ if (latest.size === 0) return null;
196
+
197
+ const hidden = new Set();
198
+ const live = [];
199
+ const projectRoot = searcher?.projectRoot || options.projectRoot || PROJECT_ROOT;
200
+ const expectedWeightsId = sparseWeightsId(searcher, options, manifestInfo);
201
+ for (const { record } of latest.values()) {
202
+ if (expectedWeightsId && record.weightsId !== expectedWeightsId) continue;
203
+ const filePath = normalizeDeltaPath(record.filePath, projectRoot);
204
+ if (!filePath) continue;
205
+ hidden.add(filePath);
206
+ if (!record.deleted) {
207
+ live.push({
208
+ filePath,
209
+ symbolMask: Number.isInteger(record.symbolMask) ? record.symbolMask : 0,
210
+ grams: normalizeRecordGrams(record.grams),
211
+ });
212
+ }
213
+ }
214
+ if (hidden.size === 0 && live.length === 0) return null;
215
+ return { hidden, live, maxEpoch };
216
+ }
217
+
218
+ export function sparseDeltaOverlayHasChanges(searcher, options = {}) {
219
+ const overlay = loadSparseDeltaOverlay(searcher, options);
220
+ return !!overlay && (overlay.hidden.size > 0 || overlay.live.length > 0);
221
+ }
222
+
223
+ export function liveOverlayFiles(overlay, symbolMask = 0, literals = null, sparseGramIndex = null) {
224
+ if (!overlay) return [];
225
+ const out = [];
226
+ for (const record of overlay.live) {
227
+ if (!isRipgrepCodePath(record.filePath)) continue;
228
+ if (symbolMask && record.symbolMask && (record.symbolMask & symbolMask) === 0) continue;
229
+ if (!recordMatchesClause(record, literals, sparseGramIndex)) continue;
230
+ out.push(record.filePath);
231
+ }
232
+ return out;
233
+ }
234
+
235
+ export function applySparseDeltaOverlay(files, overlay, symbolMask = 0, projectRoot = PROJECT_ROOT, literals = null, sparseGramIndex = null) {
236
+ if (!overlay) return Array.isArray(files) ? files : [];
237
+ const merged = new Set();
238
+ for (const file of Array.isArray(files) ? files : []) {
239
+ const normalized = normalizeDeltaPath(file, projectRoot);
240
+ if (normalized && !overlay.hidden.has(normalized) && isRipgrepCodePath(normalized)) {
241
+ merged.add(normalized);
242
+ }
243
+ }
244
+ for (const file of liveOverlayFiles(overlay, symbolMask, literals, sparseGramIndex)) {
245
+ merged.add(file);
246
+ }
247
+ return [...merged];
248
+ }
249
+
250
+ export function getSparseGramAllFilesWithOverlay(searcher, sparseGramIndex, options = {}) {
251
+ const baseFiles = _getSparseGramAllFiles(sparseGramIndex);
252
+ if (!Array.isArray(baseFiles)) return baseFiles;
253
+ const symbolMask = _resolveSparseSymbolMask(resolveSearchSymbolFilter(options));
254
+ const projectRoot = searcher?.projectRoot || options.projectRoot || PROJECT_ROOT;
255
+ return applySparseDeltaOverlay(baseFiles, loadSparseDeltaOverlay(searcher, options), symbolMask || 0, projectRoot);
256
+ }