sweet-search 2.5.2 → 2.5.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (155) hide show
  1. package/core/cli.js +24 -3
  2. package/core/graph/graph-expansion.js +215 -36
  3. package/core/graph/graph-extractor.js +196 -11
  4. package/core/graph/graph-search.js +395 -92
  5. package/core/graph/hcgs-generator.js +2 -1
  6. package/core/graph/index.js +2 -0
  7. package/core/graph/repo-map.js +28 -6
  8. package/core/graph/structural-answer-cues.js +168 -0
  9. package/core/graph/structural-callsite-hints.js +40 -0
  10. package/core/graph/structural-context-format.js +40 -0
  11. package/core/graph/structural-context.js +450 -0
  12. package/core/graph/structural-forward-push.js +156 -0
  13. package/core/graph/structural-header-context.js +19 -0
  14. package/core/graph/structural-importance.js +148 -0
  15. package/core/graph/structural-pagerank.js +197 -0
  16. package/core/graph/summary-manager.js +13 -9
  17. package/core/incremental-indexing/application/dirty-scan.mjs +236 -0
  18. package/core/incremental-indexing/application/file-watcher.mjs +197 -0
  19. package/core/incremental-indexing/application/maintenance-handlers.mjs +519 -0
  20. package/core/incremental-indexing/application/maintenance-worker.mjs +380 -0
  21. package/core/incremental-indexing/application/operator-cli.mjs +554 -0
  22. package/core/incremental-indexing/application/production-li-delta.mjs +192 -0
  23. package/core/incremental-indexing/application/production-reconciler-helpers.mjs +107 -0
  24. package/core/incremental-indexing/application/production-reconciler.mjs +583 -0
  25. package/core/incremental-indexing/application/reconciler.mjs +477 -0
  26. package/core/incremental-indexing/application/tombstone-injector.mjs +148 -0
  27. package/core/incremental-indexing/domain/chunk-identity.mjs +260 -0
  28. package/core/incremental-indexing/domain/encoder-deps.mjs +193 -0
  29. package/core/incremental-indexing/domain/encoder-input.mjs +225 -0
  30. package/core/incremental-indexing/domain/interval-autotune.mjs +255 -0
  31. package/core/incremental-indexing/domain/reconcile-counters.mjs +149 -0
  32. package/core/incremental-indexing/domain/watermark-scheduler.mjs +239 -0
  33. package/core/incremental-indexing/infrastructure/artifact-temp-sweep.mjs +163 -0
  34. package/core/incremental-indexing/infrastructure/baseline-readiness.mjs +121 -0
  35. package/core/incremental-indexing/infrastructure/dirty-set.mjs +233 -0
  36. package/core/incremental-indexing/infrastructure/graph-gc.mjs +314 -0
  37. package/core/incremental-indexing/infrastructure/hashing.mjs +298 -0
  38. package/core/incremental-indexing/infrastructure/hcgs-invalidation.mjs +182 -0
  39. package/core/incremental-indexing/infrastructure/li-segment-merge.mjs +278 -0
  40. package/core/incremental-indexing/infrastructure/li-segment-state.mjs +173 -0
  41. package/core/incremental-indexing/infrastructure/lockfile.mjs +119 -0
  42. package/core/incremental-indexing/infrastructure/maintenance-state-reader.mjs +283 -0
  43. package/core/incremental-indexing/infrastructure/manifest.mjs +194 -0
  44. package/core/incremental-indexing/infrastructure/path-filter.mjs +190 -0
  45. package/core/incremental-indexing/infrastructure/reader-heartbeat.mjs +201 -0
  46. package/core/incremental-indexing/infrastructure/schema-migrations.mjs +257 -0
  47. package/core/incremental-indexing/infrastructure/sparse-gram-delta.mjs +335 -0
  48. package/core/incremental-indexing/infrastructure/sqlite-fts5.mjs +176 -0
  49. package/core/incremental-indexing/infrastructure/staleness-display.mjs +105 -0
  50. package/core/incremental-indexing/infrastructure/tombstone-bitmap.mjs +234 -0
  51. package/core/incremental-indexing/infrastructure/vector-delta-writer.mjs +359 -0
  52. package/core/incremental-indexing/infrastructure/vector-gc.mjs +133 -0
  53. package/core/incremental-indexing/infrastructure/worktree-stamp.mjs +155 -0
  54. package/core/incremental-indexing/infrastructure/wsl2-detect.mjs +115 -0
  55. package/core/indexing/admission-policy.js +139 -0
  56. package/core/indexing/artifact-builder.js +29 -12
  57. package/core/indexing/ast-chunker.js +107 -30
  58. package/core/indexing/dedup/exemplar-selector.js +19 -1
  59. package/core/indexing/gitignore-filter.js +223 -0
  60. package/core/indexing/incremental-tracker.js +99 -30
  61. package/core/indexing/index-codebase-v21.js +6 -5
  62. package/core/indexing/index-maintainer.mjs +698 -6
  63. package/core/indexing/indexer-ann.js +99 -15
  64. package/core/indexing/indexer-build.js +158 -45
  65. package/core/indexing/indexer-empty-baseline.js +80 -0
  66. package/core/indexing/indexer-manifest.js +66 -0
  67. package/core/indexing/indexer-phases.js +56 -23
  68. package/core/indexing/indexer-sparse-gram.js +54 -13
  69. package/core/indexing/indexer-utils.js +26 -208
  70. package/core/indexing/indexing-file-policy.js +32 -7
  71. package/core/indexing/maintainer-launcher.mjs +137 -0
  72. package/core/indexing/merkle-tracker.js +251 -244
  73. package/core/indexing/model-pool.js +46 -5
  74. package/core/infrastructure/code-graph-repository.js +758 -6
  75. package/core/infrastructure/code-graph-visibility.js +157 -0
  76. package/core/infrastructure/codebase-repository.js +100 -13
  77. package/core/infrastructure/config/search.js +1 -1
  78. package/core/infrastructure/db-utils.js +118 -0
  79. package/core/infrastructure/dedup-hashing.js +10 -13
  80. package/core/infrastructure/hardware-capability.js +17 -7
  81. package/core/infrastructure/index.js +8 -2
  82. package/core/infrastructure/language-patterns/maps.js +4 -1
  83. package/core/infrastructure/language-patterns/registry-core.js +56 -17
  84. package/core/infrastructure/language-patterns/registry-object-oriented.js +12 -5
  85. package/core/infrastructure/language-patterns.js +69 -0
  86. package/core/infrastructure/model-registry.js +20 -0
  87. package/core/infrastructure/native-inference.js +7 -12
  88. package/core/infrastructure/native-resolver.js +52 -37
  89. package/core/infrastructure/native-sparse-gram.js +261 -20
  90. package/core/infrastructure/native-tokenizer.js +6 -15
  91. package/core/infrastructure/simd-distance.js +10 -16
  92. package/core/infrastructure/sparse-gram-delta-reader.js +76 -0
  93. package/core/infrastructure/structural-alias-resolver.js +122 -0
  94. package/core/infrastructure/structural-candidate-ranker.js +34 -0
  95. package/core/infrastructure/structural-context-repository.js +472 -0
  96. package/core/infrastructure/structural-context-utils.js +51 -0
  97. package/core/infrastructure/structural-graph-signals.js +121 -0
  98. package/core/infrastructure/structural-qualified-resolution.js +15 -0
  99. package/core/infrastructure/structural-source-definitions.js +100 -0
  100. package/core/infrastructure/tombstone-bitmap-reader.js +139 -0
  101. package/core/infrastructure/tree-sitter-provider.js +811 -37
  102. package/core/prompt-optimization/data/p7-final/sweet-search-system-prompt.md +50 -0
  103. package/core/query/query-router.js +55 -5
  104. package/core/ranking/file-kind-ranking.js +2192 -15
  105. package/core/ranking/late-interaction-index.js +87 -12
  106. package/core/search/cli-decoration.js +290 -0
  107. package/core/search/context-expander.js +988 -78
  108. package/core/search/index.js +1 -0
  109. package/core/search/output-policy.js +275 -0
  110. package/core/search/search-anchor.js +499 -0
  111. package/core/search/search-boost.js +93 -1
  112. package/core/search/search-cli.js +61 -204
  113. package/core/search/search-hybrid.js +250 -10
  114. package/core/search/search-pattern-chunks.js +57 -8
  115. package/core/search/search-pattern-planner.js +68 -9
  116. package/core/search/search-pattern-prefilter.js +30 -10
  117. package/core/search/search-pattern-ripgrep.js +40 -4
  118. package/core/search/search-pattern-sparse-overlay.js +256 -0
  119. package/core/search/search-pattern.js +117 -29
  120. package/core/search/search-postprocess.js +479 -5
  121. package/core/search/search-read-semantic.js +260 -23
  122. package/core/search/search-read.js +82 -64
  123. package/core/search/search-reader-pin.js +71 -0
  124. package/core/search/search-rrf.js +279 -0
  125. package/core/search/search-semantic.js +110 -5
  126. package/core/search/search-server.js +130 -57
  127. package/core/search/search-trace.js +107 -0
  128. package/core/search/server-identity.js +93 -0
  129. package/core/search/session-daemon-prewarm.mjs +33 -10
  130. package/core/search/sweet-search.js +399 -7
  131. package/core/skills/sweet-index/SKILL.md +8 -6
  132. package/core/vector-store/binary-hnsw-index.js +194 -30
  133. package/core/vector-store/float-vector-store.js +96 -6
  134. package/core/vector-store/hnsw-index.js +220 -49
  135. package/eval/agent-read-workflows/bin/_ss-helpers.mjs +471 -0
  136. package/eval/agent-read-workflows/bin/ss-find +15 -0
  137. package/eval/agent-read-workflows/bin/ss-grep +12 -0
  138. package/eval/agent-read-workflows/bin/ss-read +14 -0
  139. package/eval/agent-read-workflows/bin/ss-search +18 -0
  140. package/eval/agent-read-workflows/bin/ss-semantic +12 -0
  141. package/eval/agent-read-workflows/bin/ss-trace +11 -0
  142. package/mcp/read-tool.js +109 -0
  143. package/mcp/server.js +55 -15
  144. package/mcp/tool-handlers.js +14 -124
  145. package/mcp/trace-tool.js +81 -0
  146. package/package.json +25 -10
  147. package/scripts/hooks/intercept-read.mjs +55 -0
  148. package/scripts/hooks/remind-tools.mjs +40 -0
  149. package/scripts/init.js +698 -54
  150. package/scripts/inject-agent-instructions.js +431 -0
  151. package/scripts/install-prompt-reminders.js +188 -0
  152. package/scripts/install-tool-enforcement.js +220 -0
  153. package/scripts/smoke-test.js +12 -9
  154. package/scripts/uninstall.js +276 -18
  155. package/scripts/write-claude-rules.js +110 -0
@@ -0,0 +1,50 @@
1
+ ---
2
+ run_id: p7-v1-mpp
3
+ score_sonnet: 0.993
4
+ score_gpt5_5: 0.988
5
+ joint_maximin: 0.988
6
+ eas_factor: null
7
+ length_penalty: null
8
+ final_score: null
9
+ token_count: 1066
10
+ scs_cwSCS: 0.931
11
+ scs_min_paraphrase_accuracy: 1
12
+ homp_family_pass: true
13
+ homp_reasoning_pass: true
14
+ ood_maximin_sonnet: 0.96
15
+ ood_maximin_gpt: 0.952
16
+ ood_pass: true
17
+ vault_maximin: 0.963
18
+ vault_within_15pct_of_heldout: true
19
+ ---
20
+ # Sweet-search — code search tool guide
21
+
22
+ Sweet-search indexes the working tree (uncommitted edits too) and searches it faster and cheaper than raw shell. Use the `ss-*` tools for all code search and navigation. Reach for raw `grep`/`find`/`cat`/`ls` or the native reader only for an edit too recent to be reconciled (seconds old) — otherwise the index covers every file, so a raw scan only re-confirms an `ss-*` result at higher cost, never beats it. Never re-run an `ss-*` hit as raw grep. Any sub-agent you delegate to must use these `ss-*` tools, with this system prompt verbatim.
23
+
24
+ ## Tools (invoke via Bash)
25
+ `ss-search`/`ss-find` return ranked code blocks; `ss-grep` is file:line only; `ss-semantic` returns the top ranked spans in one file; `ss-trace` returns a symbol's callers, callees and impact. `ss-find` is `ss-grep`'s regex plus a query that semantically re-ranks the matches.
26
+ - `ss-search "<query>" [-k N]` — semantic; default when you have no exact symbol
27
+ - `ss-find "<query>" --regex "<regex>" [-k N]`
28
+ - `ss-grep "<regex>" [-k N]` — exact literals
29
+ - `ss-semantic <file> "<query>"` — top ranked spans in one known file (semantic query)
30
+ - `ss-trace <symbol> [callers|callees|impact] [--in <file>]`
31
+ - `ss-read <file> [start] [end]` — a narrow range
32
+
33
+ ## Open with the cheapest tool for what you hold
34
+ - **An exact token** (identifier, function/class/constant, error string, config key, path you could copy-paste): ONE `ss-grep` on that literal (rarest token, escaped) or `ss-find` `\b<symbol>\b`. Trust the top hit and stop — no `ss-search` first, no confirming re-search. One exception: if the top hit is an autogenerated file (a "do not edit" or "@generated" header, or a name like `schema11`/`validateN`), it is a generated copy, not where the value is authored — follow it to the real source it is generated from.
35
+ - **Only a behavior or concept**: one `ss-search` in natural language for what you're looking for, then anchor on the symbol that surfaces. Shape it lightly by the target language — short and interrogative for JS/TS/Dart, a touch longer with a domain keyword otherwise.
36
+ - **How something flows / dispatches / is called / what a change impacts**: anchor one symbol (a literal, or `ss-search`), then `ss-trace` it — one call returns callers, callees and impact. Prefer callees over impact (especially Python/Ruby/PHP). If a trace is sparse or empty, anchor the downstream symbol with `ss-find`/`ss-search` rather than retrying or hand-crawling; never make `ss-trace` the spine of a multi-file search.
37
+
38
+ Trust the top ranked result; confirm with at most one narrow `ss-read`, never a re-run of a matching hit.
39
+
40
+ ## Multi-file
41
+ Chain inside the tools: land the entry file, `ss-semantic` it for the import or handoff symbol, then `ss-search`/`ss-find` the downstream module. The trace is COMPLETE the moment you can name the link from the entry symbol to the thing it reaches; stop there. Leaf bodies, macro expansions, and the next hop down are not the answer unless asked, and chasing them — or dropping to raw `cat`/`grep` to "just look" — is the main multi-file cost trap.
42
+
43
+ ## A confirmed absence is a complete answer
44
+ When what you're looking for may not exist, absence is settled once TWO complementary index probes come back empty for the same concept: one `ss-search` in natural language and one broad `ss-grep` on its likeliest identifier (a short substring/prefix). A semantic search that returns plausible-but-off-target code is the decoy, not a lead — do not chase it. Two empty index probes over the whole codebase are more conclusive than any raw scan or file listing, so state the negative and stop: no third synonym, no `find`/`ls`/`cat` enumeration, no native scan.
45
+
46
+ ## Before the third probe
47
+ Before your third sweet-search probe in the current search iteration — or before your final answer, whichever comes first — output a `<state_summary>` block with exactly: (1) one sentence on what you've established, (2) one sentence on your current blind spot.
48
+
49
+ ## Output
50
+ Stop the instant your evidence answers what you're looking for — one confirmed file+symbol, or one named cross-file link, is enough; gather no corroboration you were not asked for. Name the file(s) and symbol(s) and how they answer what you need, or `no-match`.
@@ -128,9 +128,13 @@ export class QueryRouter {
128
128
  }
129
129
 
130
130
  // === FILE PATH CHECK (~0.1μs) ===
131
- // File extensions and paths are always lexical
132
- if (/\.(java|js|jsx|ts|tsx|py|go|rs|kt|swift|rb|php|c|cpp|h|proto|json|xml|yml|yaml|md|sql)$/i.test(trimmed) ||
133
- /[/\\]/.test(trimmed)) {
131
+ // File extensions and paths route to lexical via the file_pattern
132
+ // fast-path. The previous heuristic (`/[/\\]/.test(query)`) fired on
133
+ // ANY slash, which mis-routed natural-language phrases like
134
+ // "HTTP/2 server setup" to lexical. The new rule (looksLikePath)
135
+ // requires either an extension anchor (`.js`, `.json`, ...) OR a slash
136
+ // with NO whitespace anywhere — true paths never contain whitespace.
137
+ if (looksLikePath(trimmed)) {
134
138
  return {
135
139
  mode: 'lexical',
136
140
  confidence: 0.95,
@@ -149,10 +153,16 @@ export class QueryRouter {
149
153
  const confidence = result.confidence;
150
154
  const rejected = result.rejected;
151
155
 
156
+ // Collapse semantic → hybrid: empirically hybrid >= semantic on MRR
157
+ // across both gencodesearchnet and fastify/gin/ripgrep at ~+1ms p50.
158
+ const collapsedMode = (mode === 'semantic') ? 'hybrid' : mode;
152
159
  return {
153
- mode: rejected ? 'hybrid' : mode,
160
+ mode: rejected ? 'hybrid' : collapsedMode,
161
+ rawMode: mode,
154
162
  confidence,
155
- method: rejected ? 'wasm_rejected' : 'wasm_catboost',
163
+ method: rejected
164
+ ? 'wasm_rejected'
165
+ : (mode === 'semantic' ? 'wasm_collapsed_semantic' : 'wasm_catboost'),
156
166
  routingLatency_us: Math.round((performance.now() - start) * 1000),
157
167
  };
158
168
  } catch (err) {
@@ -177,6 +187,46 @@ export class QueryRouter {
177
187
  }
178
188
  }
179
189
 
190
+ // =============================================================================
191
+ // PATH-LIKENESS HEURISTIC
192
+ // =============================================================================
193
+
194
+ const FILE_EXT_RE = /\.(java|js|jsx|ts|tsx|mjs|cjs|py|go|rs|kt|swift|rb|php|c|cpp|h|hpp|proto|json|xml|yml|yaml|md|sql|toml|ini|conf|cfg|sh|bash|zsh|env|lock|gitignore|gitattributes|dockerfile|makefile|rake|gemspec|cargo)$/i;
195
+
196
+ /**
197
+ * Decide whether a query is a "real" file path / glob the user wants
198
+ * routed verbatim through lexical search, vs a natural-language phrase
199
+ * that just happens to contain a slash.
200
+ *
201
+ * Path-likeness rules (model-agnostic):
202
+ * 1. extension-anchored (`*.test.js`, `package.json`, `README.md`)
203
+ * → looks like a path (regardless of slashes).
204
+ * 2. contains `/` or `\` AND has NO whitespace anywhere
205
+ * → looks like a path. True paths never contain whitespace.
206
+ * 3. starts with `.`, `./`, `..`, or `~/` (relative-path prefix)
207
+ * → looks like a path.
208
+ * 4. anything else (plain identifiers, NL phrases including ones that
209
+ * contain slashes like "HTTP/2 server setup", "TCP/IP stack")
210
+ * → NOT a path; let the WASM router decide.
211
+ *
212
+ * @param {string} query
213
+ * @returns {boolean}
214
+ */
215
+ export function looksLikePath(query) {
216
+ if (typeof query !== 'string') return false;
217
+ const trimmed = query.trim();
218
+ if (!trimmed) return false;
219
+ if (FILE_EXT_RE.test(trimmed)) return true;
220
+ // Whitespace immediately disqualifies — even if a slash is present, this
221
+ // is natural language ("HTTP/2 server setup", "client/server architecture").
222
+ if (/\s/.test(trimmed)) return false;
223
+ // No-whitespace + slash/backslash → true path or glob.
224
+ if (/[/\\]/.test(trimmed)) return true;
225
+ // Relative-path prefixes without slashes already returned above when an
226
+ // extension is present (e.g. `.env`); plain identifiers fall through.
227
+ return false;
228
+ }
229
+
180
230
  // =============================================================================
181
231
  // CONVENIENCE EXPORTS
182
232
  // =============================================================================