sweet-search 2.5.2 → 2.5.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (155) hide show
  1. package/core/cli.js +24 -3
  2. package/core/graph/graph-expansion.js +215 -36
  3. package/core/graph/graph-extractor.js +196 -11
  4. package/core/graph/graph-search.js +395 -92
  5. package/core/graph/hcgs-generator.js +2 -1
  6. package/core/graph/index.js +2 -0
  7. package/core/graph/repo-map.js +28 -6
  8. package/core/graph/structural-answer-cues.js +168 -0
  9. package/core/graph/structural-callsite-hints.js +40 -0
  10. package/core/graph/structural-context-format.js +40 -0
  11. package/core/graph/structural-context.js +450 -0
  12. package/core/graph/structural-forward-push.js +156 -0
  13. package/core/graph/structural-header-context.js +19 -0
  14. package/core/graph/structural-importance.js +148 -0
  15. package/core/graph/structural-pagerank.js +197 -0
  16. package/core/graph/summary-manager.js +13 -9
  17. package/core/incremental-indexing/application/dirty-scan.mjs +236 -0
  18. package/core/incremental-indexing/application/file-watcher.mjs +197 -0
  19. package/core/incremental-indexing/application/maintenance-handlers.mjs +519 -0
  20. package/core/incremental-indexing/application/maintenance-worker.mjs +380 -0
  21. package/core/incremental-indexing/application/operator-cli.mjs +554 -0
  22. package/core/incremental-indexing/application/production-li-delta.mjs +192 -0
  23. package/core/incremental-indexing/application/production-reconciler-helpers.mjs +107 -0
  24. package/core/incremental-indexing/application/production-reconciler.mjs +583 -0
  25. package/core/incremental-indexing/application/reconciler.mjs +477 -0
  26. package/core/incremental-indexing/application/tombstone-injector.mjs +148 -0
  27. package/core/incremental-indexing/domain/chunk-identity.mjs +260 -0
  28. package/core/incremental-indexing/domain/encoder-deps.mjs +193 -0
  29. package/core/incremental-indexing/domain/encoder-input.mjs +225 -0
  30. package/core/incremental-indexing/domain/interval-autotune.mjs +255 -0
  31. package/core/incremental-indexing/domain/reconcile-counters.mjs +149 -0
  32. package/core/incremental-indexing/domain/watermark-scheduler.mjs +239 -0
  33. package/core/incremental-indexing/infrastructure/artifact-temp-sweep.mjs +163 -0
  34. package/core/incremental-indexing/infrastructure/baseline-readiness.mjs +121 -0
  35. package/core/incremental-indexing/infrastructure/dirty-set.mjs +233 -0
  36. package/core/incremental-indexing/infrastructure/graph-gc.mjs +314 -0
  37. package/core/incremental-indexing/infrastructure/hashing.mjs +298 -0
  38. package/core/incremental-indexing/infrastructure/hcgs-invalidation.mjs +182 -0
  39. package/core/incremental-indexing/infrastructure/li-segment-merge.mjs +278 -0
  40. package/core/incremental-indexing/infrastructure/li-segment-state.mjs +173 -0
  41. package/core/incremental-indexing/infrastructure/lockfile.mjs +119 -0
  42. package/core/incremental-indexing/infrastructure/maintenance-state-reader.mjs +283 -0
  43. package/core/incremental-indexing/infrastructure/manifest.mjs +194 -0
  44. package/core/incremental-indexing/infrastructure/path-filter.mjs +190 -0
  45. package/core/incremental-indexing/infrastructure/reader-heartbeat.mjs +201 -0
  46. package/core/incremental-indexing/infrastructure/schema-migrations.mjs +257 -0
  47. package/core/incremental-indexing/infrastructure/sparse-gram-delta.mjs +335 -0
  48. package/core/incremental-indexing/infrastructure/sqlite-fts5.mjs +176 -0
  49. package/core/incremental-indexing/infrastructure/staleness-display.mjs +105 -0
  50. package/core/incremental-indexing/infrastructure/tombstone-bitmap.mjs +234 -0
  51. package/core/incremental-indexing/infrastructure/vector-delta-writer.mjs +359 -0
  52. package/core/incremental-indexing/infrastructure/vector-gc.mjs +133 -0
  53. package/core/incremental-indexing/infrastructure/worktree-stamp.mjs +155 -0
  54. package/core/incremental-indexing/infrastructure/wsl2-detect.mjs +115 -0
  55. package/core/indexing/admission-policy.js +139 -0
  56. package/core/indexing/artifact-builder.js +29 -12
  57. package/core/indexing/ast-chunker.js +107 -30
  58. package/core/indexing/dedup/exemplar-selector.js +19 -1
  59. package/core/indexing/gitignore-filter.js +223 -0
  60. package/core/indexing/incremental-tracker.js +99 -30
  61. package/core/indexing/index-codebase-v21.js +6 -5
  62. package/core/indexing/index-maintainer.mjs +698 -6
  63. package/core/indexing/indexer-ann.js +99 -15
  64. package/core/indexing/indexer-build.js +158 -45
  65. package/core/indexing/indexer-empty-baseline.js +80 -0
  66. package/core/indexing/indexer-manifest.js +66 -0
  67. package/core/indexing/indexer-phases.js +56 -23
  68. package/core/indexing/indexer-sparse-gram.js +54 -13
  69. package/core/indexing/indexer-utils.js +26 -208
  70. package/core/indexing/indexing-file-policy.js +32 -7
  71. package/core/indexing/maintainer-launcher.mjs +137 -0
  72. package/core/indexing/merkle-tracker.js +251 -244
  73. package/core/indexing/model-pool.js +46 -5
  74. package/core/infrastructure/code-graph-repository.js +758 -6
  75. package/core/infrastructure/code-graph-visibility.js +157 -0
  76. package/core/infrastructure/codebase-repository.js +100 -13
  77. package/core/infrastructure/config/search.js +1 -1
  78. package/core/infrastructure/db-utils.js +118 -0
  79. package/core/infrastructure/dedup-hashing.js +10 -13
  80. package/core/infrastructure/hardware-capability.js +17 -7
  81. package/core/infrastructure/index.js +8 -2
  82. package/core/infrastructure/language-patterns/maps.js +4 -1
  83. package/core/infrastructure/language-patterns/registry-core.js +56 -17
  84. package/core/infrastructure/language-patterns/registry-object-oriented.js +12 -5
  85. package/core/infrastructure/language-patterns.js +69 -0
  86. package/core/infrastructure/model-registry.js +20 -0
  87. package/core/infrastructure/native-inference.js +7 -12
  88. package/core/infrastructure/native-resolver.js +52 -37
  89. package/core/infrastructure/native-sparse-gram.js +261 -20
  90. package/core/infrastructure/native-tokenizer.js +6 -15
  91. package/core/infrastructure/simd-distance.js +10 -16
  92. package/core/infrastructure/sparse-gram-delta-reader.js +76 -0
  93. package/core/infrastructure/structural-alias-resolver.js +122 -0
  94. package/core/infrastructure/structural-candidate-ranker.js +34 -0
  95. package/core/infrastructure/structural-context-repository.js +472 -0
  96. package/core/infrastructure/structural-context-utils.js +51 -0
  97. package/core/infrastructure/structural-graph-signals.js +121 -0
  98. package/core/infrastructure/structural-qualified-resolution.js +15 -0
  99. package/core/infrastructure/structural-source-definitions.js +100 -0
  100. package/core/infrastructure/tombstone-bitmap-reader.js +139 -0
  101. package/core/infrastructure/tree-sitter-provider.js +811 -37
  102. package/core/prompt-optimization/data/p7-final/sweet-search-system-prompt.md +50 -0
  103. package/core/query/query-router.js +55 -5
  104. package/core/ranking/file-kind-ranking.js +2192 -15
  105. package/core/ranking/late-interaction-index.js +87 -12
  106. package/core/search/cli-decoration.js +290 -0
  107. package/core/search/context-expander.js +988 -78
  108. package/core/search/index.js +1 -0
  109. package/core/search/output-policy.js +275 -0
  110. package/core/search/search-anchor.js +499 -0
  111. package/core/search/search-boost.js +93 -1
  112. package/core/search/search-cli.js +61 -204
  113. package/core/search/search-hybrid.js +250 -10
  114. package/core/search/search-pattern-chunks.js +57 -8
  115. package/core/search/search-pattern-planner.js +68 -9
  116. package/core/search/search-pattern-prefilter.js +30 -10
  117. package/core/search/search-pattern-ripgrep.js +40 -4
  118. package/core/search/search-pattern-sparse-overlay.js +256 -0
  119. package/core/search/search-pattern.js +117 -29
  120. package/core/search/search-postprocess.js +479 -5
  121. package/core/search/search-read-semantic.js +260 -23
  122. package/core/search/search-read.js +82 -64
  123. package/core/search/search-reader-pin.js +71 -0
  124. package/core/search/search-rrf.js +279 -0
  125. package/core/search/search-semantic.js +110 -5
  126. package/core/search/search-server.js +130 -57
  127. package/core/search/search-trace.js +107 -0
  128. package/core/search/server-identity.js +93 -0
  129. package/core/search/session-daemon-prewarm.mjs +33 -10
  130. package/core/search/sweet-search.js +399 -7
  131. package/core/skills/sweet-index/SKILL.md +8 -6
  132. package/core/vector-store/binary-hnsw-index.js +194 -30
  133. package/core/vector-store/float-vector-store.js +96 -6
  134. package/core/vector-store/hnsw-index.js +220 -49
  135. package/eval/agent-read-workflows/bin/_ss-helpers.mjs +471 -0
  136. package/eval/agent-read-workflows/bin/ss-find +15 -0
  137. package/eval/agent-read-workflows/bin/ss-grep +12 -0
  138. package/eval/agent-read-workflows/bin/ss-read +14 -0
  139. package/eval/agent-read-workflows/bin/ss-search +18 -0
  140. package/eval/agent-read-workflows/bin/ss-semantic +12 -0
  141. package/eval/agent-read-workflows/bin/ss-trace +11 -0
  142. package/mcp/read-tool.js +109 -0
  143. package/mcp/server.js +55 -15
  144. package/mcp/tool-handlers.js +14 -124
  145. package/mcp/trace-tool.js +81 -0
  146. package/package.json +25 -10
  147. package/scripts/hooks/intercept-read.mjs +55 -0
  148. package/scripts/hooks/remind-tools.mjs +40 -0
  149. package/scripts/init.js +698 -54
  150. package/scripts/inject-agent-instructions.js +431 -0
  151. package/scripts/install-prompt-reminders.js +188 -0
  152. package/scripts/install-tool-enforcement.js +220 -0
  153. package/scripts/smoke-test.js +12 -9
  154. package/scripts/uninstall.js +276 -18
  155. package/scripts/write-claude-rules.js +110 -0
@@ -17,7 +17,9 @@ import { PROJECT_ROOT } from '../infrastructure/config/index.js';
17
17
  import { generateRegexMatches } from './search-pattern-planner.js';
18
18
  import { buildBareGrepResults, filterMatchesBySymbolType, resolveSearchSymbolFilter, mapMatchesToChunks, readFileRange } from './search-pattern-chunks.js';
19
19
  import { isRipgrepAvailable, runRipgrepJson } from './search-pattern-ripgrep.js';
20
+ import { ensureSparseGramIndex } from './search-pattern-prefilter.js';
20
21
  import { packageForAgent } from './context-expander.js';
22
+ import { applyFileKindRanking, applyResultDemotions } from '../ranking/file-kind-ranking.js';
21
23
 
22
24
  // =============================================================================
23
25
  // Ripgrep runner (thin wrapper for external callers)
@@ -74,11 +76,61 @@ export function mergeRegexIntoQuery(query, regex) {
74
76
  return `${query} ${novel.join(' ')}`;
75
77
  }
76
78
 
79
+ // =============================================================================
80
+ // Grep engine selection — native in-process grep vs ripgrep fallback
81
+ // =============================================================================
82
+
83
+ /**
84
+ * Whether native in-process grep can serve this request without ripgrep.
85
+ *
86
+ * Native unified/narrowed grep (searchFull/searchLines + nativeGrep*) covers
87
+ * plain-regex queries whenever a sparse-gram index is loaded. The loaded index
88
+ * object only exposes searchFull/searchLines when the native addon built it, so
89
+ * their presence is a reliable signal that native grep is available — and it
90
+ * keeps the check deterministic for tests that supply a mock index.
91
+ *
92
+ * Fixed-string and glob queries are gated off the native path in
93
+ * generateRegexMatches, so they still require ripgrep as the fallback engine.
94
+ */
95
+ function nativeGrepCanServe(searcher, options = {}) {
96
+ const fixedString = options.fixedString ?? false;
97
+ const globs = options.globs ?? [];
98
+ if (fixedString || (Array.isArray(globs) && globs.length > 0)) return false;
99
+ const index = ensureSparseGramIndex(searcher, options);
100
+ return !!(index && typeof index.searchFull === 'function' && typeof index.searchLines === 'function');
101
+ }
102
+
103
+ /**
104
+ * Fail fast when no grep engine can serve the request. Native in-process grep
105
+ * is preferred and needs no external binary; ripgrep is an optional fallback
106
+ * (required only for fixed-string/glob queries, or when the native addon/index
107
+ * is absent). Throws a single actionable error naming both engines when neither
108
+ * is available — generateRegexMatches itself prefers native and only reaches a
109
+ * ripgrep call in the fallback branches this guard protects.
110
+ *
111
+ * @returns {Promise<boolean>} true when native grep will serve (ripgrep unused)
112
+ */
113
+ async function ensureGrepEngineAvailable(searcher, options, label) {
114
+ if (nativeGrepCanServe(searcher, options)) return true;
115
+ if (await isRipgrepAvailable()) return false;
116
+
117
+ const fixedString = options.fixedString ?? false;
118
+ const globs = options.globs ?? [];
119
+ const reason = (fixedString || (Array.isArray(globs) && globs.length > 0))
120
+ ? 'fixed-string and glob queries use the ripgrep fallback, which is not installed'
121
+ : 'native grep is unavailable (no sparse-gram index built, or the native addon is missing) and ripgrep is not installed';
122
+ throw new Error(
123
+ `${label} needs an in-process grep engine, but none is available: ${reason}. ` +
124
+ 'Re-index to build the native sparse-gram index, or install ripgrep (brew install ripgrep).'
125
+ );
126
+ }
127
+
77
128
  // =============================================================================
78
129
  // Bare grep (wired onto SweetSearch.prototype)
79
130
  // =============================================================================
80
131
 
81
132
  export async function bareGrep(query, routing, options = {}) {
133
+ await this?._refreshManifestPins?.({ reloadScope: 'grep' });
82
134
  const regex = options.regex || query;
83
135
  const searchDir = this?.projectRoot || options.projectRoot || PROJECT_ROOT;
84
136
  const maxMatches = options.maxMatches ?? 0;
@@ -91,9 +143,10 @@ export async function bareGrep(query, routing, options = {}) {
91
143
  throw new Error('Bare grep requires a regex or fixed-string pattern.');
92
144
  }
93
145
 
94
- if (!await isRipgrepAvailable()) {
95
- throw new Error('Bare grep requires ripgrep (rg). Install: brew install ripgrep');
96
- }
146
+ // Native in-process grep serves this when a sparse-gram index is loaded;
147
+ // ripgrep is only required for fixed-string/glob queries or when native is
148
+ // unavailable. Throws a clear error only when neither engine can run.
149
+ await ensureGrepEngineAvailable(this, options, 'Bare grep');
97
150
 
98
151
  // Disable chunk gram for bare grep — bare grep uses file:line matches, not chunk IDs.
99
152
  const candidateResult = await generateRegexMatches(this || {}, regex, searchDir, options);
@@ -161,6 +214,7 @@ export async function bareGrep(query, routing, options = {}) {
161
214
  * Uses `this` — must be wired onto SweetSearch.prototype.
162
215
  */
163
216
  export async function patternSearch(query, routing, options = {}) {
217
+ await this?._refreshManifestPins?.({ reloadScope: 'all' });
164
218
  const {
165
219
  regex,
166
220
  k = 10,
@@ -180,9 +234,10 @@ export async function patternSearch(query, routing, options = {}) {
180
234
  throw new Error('Pattern search requires a late interaction index. Re-index with late interaction enabled.');
181
235
  }
182
236
 
183
- if (!await isRipgrepAvailable()) {
184
- throw new Error('Pattern search requires ripgrep (rg). Install: brew install ripgrep');
185
- }
237
+ // Native in-process grep serves candidate generation when a sparse-gram index
238
+ // is loaded; ripgrep is only required for fixed-string/glob queries or when
239
+ // native is unavailable. Throws a clear error only when neither engine runs.
240
+ await ensureGrepEngineAvailable(this, options, 'Pattern search');
186
241
 
187
242
  await this.lateInteractionIndex.init();
188
243
 
@@ -310,32 +365,15 @@ export async function patternSearch(query, routing, options = {}) {
310
365
  s.grepDensity = matchCount;
311
366
  s.lateInteractionScore = s.lateInteractionScore * (1 + GREP_DENSITY_ALPHA * Math.log(matchCount));
312
367
  }
313
- const TEST_DEMOTION = options.testDemotion ?? 0.05;
314
- if (TEST_DEMOTION > 0) {
315
- const queryLower = query.toLowerCase();
316
- const queryMentionsTest = /\btest|spec|describe|it\b/.test(queryLower);
317
- if (!queryMentionsTest) {
318
- for (const s of scored) {
319
- const doc = this.lateInteractionIndex.documents.get(s.id);
320
- const file = doc?.metadata?.file || '';
321
- const name = doc?.metadata?.name || '';
322
- if (/test|spec|__test__|\.test\.|\.spec\./.test(file) ||
323
- /test|spec/i.test(name)) {
324
- s.lateInteractionScore -= TEST_DEMOTION;
325
- }
326
- }
327
- }
328
- }
329
-
330
368
  scored.sort((a, b) => b.lateInteractionScore - a.lateInteractionScore);
331
369
  log(`MaxSim rerank: ${scored.length} candidates in ${rerankTime.toFixed(1)}ms`);
332
370
  }
333
371
 
334
372
  const fileCache = new Map();
335
373
 
336
- const results = scored.slice(0, k).map((s, rank) => {
374
+ let rankedResults = scored.map((s, rank) => {
337
375
  const doc = this.lateInteractionIndex.documents.get(s.id);
338
- const meta = doc?.metadata || {};
376
+ const meta = doc?.metadata || this.lateInteractionIndex.aliasPointers?.get(s.id)?.metadata || {};
339
377
  const text = readFileRange(fileCache, meta.file, meta.startLine, meta.endLine, this.projectRoot);
340
378
 
341
379
  return {
@@ -355,6 +393,56 @@ export async function patternSearch(query, routing, options = {}) {
355
393
  metadata: meta,
356
394
  };
357
395
  });
396
+ // Mode E (2026-05-13): for agent-format ss-find queries, apply file-kind
397
+ // demotion before result demotions. The regex+symbol shape of ss-find is
398
+ // implementation-intent by construction (agents using patternSearch are
399
+ // hunting for code, not docs), so we force intent='implementation' rather
400
+ // than inferring it from the NL query — `classifyFileKindIntent` requires
401
+ // explicit "show me the function/class" verbs that ss-find prompts don't
402
+ // always include.
403
+ //
404
+ // CRITICAL — format-gated to agent variants only. Same gate as the BM25F
405
+ // boosts in applyResultDemotions (round-1/2 lessons: -27.57pp GCSN if
406
+ // ungated structural signals fire on benchmark NL traffic). Probes use
407
+ // format='agent', so behaviour matches expectations; GCSN bench uses
408
+ // mode='auto' with no format, so this skip preserves the 86.92% baseline.
409
+ //
410
+ // Targets stage3-taxonomy.md Mode E failures:
411
+ // - JS-005: index.d.ts: interface AxiosHeaders outranks lib/core/AxiosHeaders.js
412
+ // - TSL-004/008: packages/docs/content/packages/core.mdx outranks schemas.ts
413
+ // The .d.ts (types kind) and .mdx (docs kind) factors mirror hybrid's
414
+ // tuned defaults (typeFactor 0.70, docFactor 0.35).
415
+ const ssFindIsAgentFormat = options?.format === 'agent'
416
+ || options?.format === 'agent_full'
417
+ || options?.format === 'agent_full_xl'
418
+ || options?.format === 'agent_preview';
419
+ const skipPatternFileKind = Array.isArray(ablations)
420
+ ? ablations.includes('no-pattern-file-kind-ranking')
421
+ : (ablations instanceof Set ? ablations.has('no-pattern-file-kind-ranking') : false);
422
+ if (ssFindIsAgentFormat && !skipPatternFileKind) {
423
+ rankedResults = applyFileKindRanking(rankedResults, {
424
+ intent: 'implementation',
425
+ window: options.fileKindWindow ?? 100,
426
+ docFactor: options.patternDocFactor ?? 0.35,
427
+ testFactor: options.patternTestFactor ?? 0.35,
428
+ typeFactor: options.patternTypeFactor ?? 0.70,
429
+ ancillaryFactor: options.patternAncillaryFactor ?? 0.15,
430
+ tinyAncillaryFactor: options.patternTinyAncillaryFactor ?? 0.05,
431
+ });
432
+ }
433
+ rankedResults = applyResultDemotions(rankedResults, {
434
+ query,
435
+ ablations,
436
+ format: options?.format,
437
+ projectRoot: this.projectRoot,
438
+ codeGraphRepo: this.codeGraphRepo,
439
+ }).map((result, rank) => ({
440
+ ...result,
441
+ rank: rank + 1,
442
+ lateInteractionScore: result.score,
443
+ }));
444
+
445
+ const results = rankedResults.slice(0, k);
358
446
 
359
447
  const remaining = Math.max(0, k - results.length);
360
448
  if (remaining > 0 && unindexedMatches.length > 0) {
@@ -365,7 +453,7 @@ export async function patternSearch(query, routing, options = {}) {
365
453
  const key = `${m.file}:${m.line}`;
366
454
  if (seen.has(key)) continue;
367
455
  seen.add(key);
368
-
456
+ const content = m.content || readFileRange(fileCache, m.file, m.line, m.line, this.projectRoot) || m.matchText || '';
369
457
  results.push({
370
458
  id: `unindexed:${m.file}:${m.line}`,
371
459
  file: m.file,
@@ -373,8 +461,8 @@ export async function patternSearch(query, routing, options = {}) {
373
461
  type: 'code',
374
462
  startLine: m.line,
375
463
  endLine: m.line,
376
- text: m.content,
377
- content: m.content,
464
+ text: content,
465
+ content,
378
466
  score: 0,
379
467
  lateInteractionScore: 0,
380
468
  rank: results.length + 1,
@@ -461,4 +549,4 @@ export { generateRegexMatches } from './search-pattern-planner.js';
461
549
  export { hasCaseInsensitiveRegexFlag, extractRequiredLiteralsHeuristic, extractLiteralClausesHeuristic, extractLiteralClauses, normalizeLiteralClauses, querySparseGramCandidates, ensureSparseGramIndex, nativeGrepFilesWithMatches, nativeGrepLines, getSparseGramAllFiles } from './search-pattern-prefilter.js';
462
550
  export { buildChunkLocationMap, findChunkForLine, findChunkIntervalForLine, mapMatchesToChunks, readFileRange, getChunkLocationMap, getCodebaseChunkTypeMap, normalizeSearchSymbolType, resolveSearchSymbolFilter, isRipgrepCodePath, buildBareGrepResults, filterMatchesBySymbolType } from './search-pattern-chunks.js';
463
551
  export { isRipgrepAvailable, _resetRgCache, normalizeSearchPath, chunkRipgrepFiles } from './search-pattern-ripgrep.js';
464
- export { packageForAgent, estimateTokens, computeConfidence, computeSufficiency, allocateBudget, expandToSymbol, expandBySyntax, extractHeaderContext, truncateToTokenCap, findEnclosingEntity, checkStaleness } from './context-expander.js';
552
+ export { packageForAgent, estimateTokens, computeConfidence, computeSufficiency, allocateBudget, expandToSymbol, expandBySyntax, expandLeadingTrivia, extractHeaderContext, truncateToTokenCap, findEnclosingEntity, checkStaleness, renderGraphNeighbors } from './context-expander.js';