sweet-search 2.5.2 → 2.5.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (155) hide show
  1. package/core/cli.js +24 -3
  2. package/core/graph/graph-expansion.js +215 -36
  3. package/core/graph/graph-extractor.js +196 -11
  4. package/core/graph/graph-search.js +395 -92
  5. package/core/graph/hcgs-generator.js +2 -1
  6. package/core/graph/index.js +2 -0
  7. package/core/graph/repo-map.js +28 -6
  8. package/core/graph/structural-answer-cues.js +168 -0
  9. package/core/graph/structural-callsite-hints.js +40 -0
  10. package/core/graph/structural-context-format.js +40 -0
  11. package/core/graph/structural-context.js +450 -0
  12. package/core/graph/structural-forward-push.js +156 -0
  13. package/core/graph/structural-header-context.js +19 -0
  14. package/core/graph/structural-importance.js +148 -0
  15. package/core/graph/structural-pagerank.js +197 -0
  16. package/core/graph/summary-manager.js +13 -9
  17. package/core/incremental-indexing/application/dirty-scan.mjs +236 -0
  18. package/core/incremental-indexing/application/file-watcher.mjs +197 -0
  19. package/core/incremental-indexing/application/maintenance-handlers.mjs +519 -0
  20. package/core/incremental-indexing/application/maintenance-worker.mjs +380 -0
  21. package/core/incremental-indexing/application/operator-cli.mjs +554 -0
  22. package/core/incremental-indexing/application/production-li-delta.mjs +192 -0
  23. package/core/incremental-indexing/application/production-reconciler-helpers.mjs +107 -0
  24. package/core/incremental-indexing/application/production-reconciler.mjs +583 -0
  25. package/core/incremental-indexing/application/reconciler.mjs +477 -0
  26. package/core/incremental-indexing/application/tombstone-injector.mjs +148 -0
  27. package/core/incremental-indexing/domain/chunk-identity.mjs +260 -0
  28. package/core/incremental-indexing/domain/encoder-deps.mjs +193 -0
  29. package/core/incremental-indexing/domain/encoder-input.mjs +225 -0
  30. package/core/incremental-indexing/domain/interval-autotune.mjs +255 -0
  31. package/core/incremental-indexing/domain/reconcile-counters.mjs +149 -0
  32. package/core/incremental-indexing/domain/watermark-scheduler.mjs +239 -0
  33. package/core/incremental-indexing/infrastructure/artifact-temp-sweep.mjs +163 -0
  34. package/core/incremental-indexing/infrastructure/baseline-readiness.mjs +121 -0
  35. package/core/incremental-indexing/infrastructure/dirty-set.mjs +233 -0
  36. package/core/incremental-indexing/infrastructure/graph-gc.mjs +314 -0
  37. package/core/incremental-indexing/infrastructure/hashing.mjs +298 -0
  38. package/core/incremental-indexing/infrastructure/hcgs-invalidation.mjs +182 -0
  39. package/core/incremental-indexing/infrastructure/li-segment-merge.mjs +278 -0
  40. package/core/incremental-indexing/infrastructure/li-segment-state.mjs +173 -0
  41. package/core/incremental-indexing/infrastructure/lockfile.mjs +119 -0
  42. package/core/incremental-indexing/infrastructure/maintenance-state-reader.mjs +283 -0
  43. package/core/incremental-indexing/infrastructure/manifest.mjs +194 -0
  44. package/core/incremental-indexing/infrastructure/path-filter.mjs +190 -0
  45. package/core/incremental-indexing/infrastructure/reader-heartbeat.mjs +201 -0
  46. package/core/incremental-indexing/infrastructure/schema-migrations.mjs +257 -0
  47. package/core/incremental-indexing/infrastructure/sparse-gram-delta.mjs +335 -0
  48. package/core/incremental-indexing/infrastructure/sqlite-fts5.mjs +176 -0
  49. package/core/incremental-indexing/infrastructure/staleness-display.mjs +105 -0
  50. package/core/incremental-indexing/infrastructure/tombstone-bitmap.mjs +234 -0
  51. package/core/incremental-indexing/infrastructure/vector-delta-writer.mjs +359 -0
  52. package/core/incremental-indexing/infrastructure/vector-gc.mjs +133 -0
  53. package/core/incremental-indexing/infrastructure/worktree-stamp.mjs +155 -0
  54. package/core/incremental-indexing/infrastructure/wsl2-detect.mjs +115 -0
  55. package/core/indexing/admission-policy.js +139 -0
  56. package/core/indexing/artifact-builder.js +29 -12
  57. package/core/indexing/ast-chunker.js +107 -30
  58. package/core/indexing/dedup/exemplar-selector.js +19 -1
  59. package/core/indexing/gitignore-filter.js +223 -0
  60. package/core/indexing/incremental-tracker.js +99 -30
  61. package/core/indexing/index-codebase-v21.js +6 -5
  62. package/core/indexing/index-maintainer.mjs +698 -6
  63. package/core/indexing/indexer-ann.js +99 -15
  64. package/core/indexing/indexer-build.js +158 -45
  65. package/core/indexing/indexer-empty-baseline.js +80 -0
  66. package/core/indexing/indexer-manifest.js +66 -0
  67. package/core/indexing/indexer-phases.js +56 -23
  68. package/core/indexing/indexer-sparse-gram.js +54 -13
  69. package/core/indexing/indexer-utils.js +26 -208
  70. package/core/indexing/indexing-file-policy.js +32 -7
  71. package/core/indexing/maintainer-launcher.mjs +137 -0
  72. package/core/indexing/merkle-tracker.js +251 -244
  73. package/core/indexing/model-pool.js +46 -5
  74. package/core/infrastructure/code-graph-repository.js +758 -6
  75. package/core/infrastructure/code-graph-visibility.js +157 -0
  76. package/core/infrastructure/codebase-repository.js +100 -13
  77. package/core/infrastructure/config/search.js +1 -1
  78. package/core/infrastructure/db-utils.js +118 -0
  79. package/core/infrastructure/dedup-hashing.js +10 -13
  80. package/core/infrastructure/hardware-capability.js +17 -7
  81. package/core/infrastructure/index.js +8 -2
  82. package/core/infrastructure/language-patterns/maps.js +4 -1
  83. package/core/infrastructure/language-patterns/registry-core.js +56 -17
  84. package/core/infrastructure/language-patterns/registry-object-oriented.js +12 -5
  85. package/core/infrastructure/language-patterns.js +69 -0
  86. package/core/infrastructure/model-registry.js +20 -0
  87. package/core/infrastructure/native-inference.js +7 -12
  88. package/core/infrastructure/native-resolver.js +52 -37
  89. package/core/infrastructure/native-sparse-gram.js +261 -20
  90. package/core/infrastructure/native-tokenizer.js +6 -15
  91. package/core/infrastructure/simd-distance.js +10 -16
  92. package/core/infrastructure/sparse-gram-delta-reader.js +76 -0
  93. package/core/infrastructure/structural-alias-resolver.js +122 -0
  94. package/core/infrastructure/structural-candidate-ranker.js +34 -0
  95. package/core/infrastructure/structural-context-repository.js +472 -0
  96. package/core/infrastructure/structural-context-utils.js +51 -0
  97. package/core/infrastructure/structural-graph-signals.js +121 -0
  98. package/core/infrastructure/structural-qualified-resolution.js +15 -0
  99. package/core/infrastructure/structural-source-definitions.js +100 -0
  100. package/core/infrastructure/tombstone-bitmap-reader.js +139 -0
  101. package/core/infrastructure/tree-sitter-provider.js +811 -37
  102. package/core/prompt-optimization/data/p7-final/sweet-search-system-prompt.md +50 -0
  103. package/core/query/query-router.js +55 -5
  104. package/core/ranking/file-kind-ranking.js +2192 -15
  105. package/core/ranking/late-interaction-index.js +87 -12
  106. package/core/search/cli-decoration.js +290 -0
  107. package/core/search/context-expander.js +988 -78
  108. package/core/search/index.js +1 -0
  109. package/core/search/output-policy.js +275 -0
  110. package/core/search/search-anchor.js +499 -0
  111. package/core/search/search-boost.js +93 -1
  112. package/core/search/search-cli.js +61 -204
  113. package/core/search/search-hybrid.js +250 -10
  114. package/core/search/search-pattern-chunks.js +57 -8
  115. package/core/search/search-pattern-planner.js +68 -9
  116. package/core/search/search-pattern-prefilter.js +30 -10
  117. package/core/search/search-pattern-ripgrep.js +40 -4
  118. package/core/search/search-pattern-sparse-overlay.js +256 -0
  119. package/core/search/search-pattern.js +117 -29
  120. package/core/search/search-postprocess.js +479 -5
  121. package/core/search/search-read-semantic.js +260 -23
  122. package/core/search/search-read.js +82 -64
  123. package/core/search/search-reader-pin.js +71 -0
  124. package/core/search/search-rrf.js +279 -0
  125. package/core/search/search-semantic.js +110 -5
  126. package/core/search/search-server.js +130 -57
  127. package/core/search/search-trace.js +107 -0
  128. package/core/search/server-identity.js +93 -0
  129. package/core/search/session-daemon-prewarm.mjs +33 -10
  130. package/core/search/sweet-search.js +399 -7
  131. package/core/skills/sweet-index/SKILL.md +8 -6
  132. package/core/vector-store/binary-hnsw-index.js +194 -30
  133. package/core/vector-store/float-vector-store.js +96 -6
  134. package/core/vector-store/hnsw-index.js +220 -49
  135. package/eval/agent-read-workflows/bin/_ss-helpers.mjs +471 -0
  136. package/eval/agent-read-workflows/bin/ss-find +15 -0
  137. package/eval/agent-read-workflows/bin/ss-grep +12 -0
  138. package/eval/agent-read-workflows/bin/ss-read +14 -0
  139. package/eval/agent-read-workflows/bin/ss-search +18 -0
  140. package/eval/agent-read-workflows/bin/ss-semantic +12 -0
  141. package/eval/agent-read-workflows/bin/ss-trace +11 -0
  142. package/mcp/read-tool.js +109 -0
  143. package/mcp/server.js +55 -15
  144. package/mcp/tool-handlers.js +14 -124
  145. package/mcp/trace-tool.js +81 -0
  146. package/package.json +25 -10
  147. package/scripts/hooks/intercept-read.mjs +55 -0
  148. package/scripts/hooks/remind-tools.mjs +40 -0
  149. package/scripts/init.js +698 -54
  150. package/scripts/inject-agent-instructions.js +431 -0
  151. package/scripts/install-prompt-reminders.js +188 -0
  152. package/scripts/install-tool-enforcement.js +220 -0
  153. package/scripts/smoke-test.js +12 -9
  154. package/scripts/uninstall.js +276 -18
  155. package/scripts/write-claude-rules.js +110 -0
@@ -1,8 +1,9 @@
1
1
  /**
2
2
  * Search CLI Module
3
3
  *
4
- * Extracted from sweet-search.js (SOLID refactor).
5
- * Contains all CLI/terminal code: styling, pixel art, argument parsing, runCli.
4
+ * Extracted from sweet-search.js (SOLID refactor). Argument parsing + runCli.
5
+ * Decoration rendering lives in cli-decoration.js; the where-to-emit decision
6
+ * lives in output-policy.js.
6
7
  *
7
8
  * IMPORTANT: Uses dynamic import() for sweet-search.js and search-server.js
8
9
  * references to avoid circular dependencies.
@@ -17,177 +18,8 @@ import {
17
18
  formatSummaryFirst,
18
19
  formatMiddleRes,
19
20
  } from './search-format.js';
20
-
21
- // =============================================================================
22
- // CLI STYLING (ANSI truecolor w/ fallback)
23
- // =============================================================================
24
-
25
- export const STYLE = (() => {
26
- // 5 shades of dark blue (edge -> center)
27
- const colors = {
28
- darkest: { r: 6, g: 10, b: 31 },
29
- darker: { r: 10, g: 17, b: 52 },
30
- dark: { r: 14, g: 24, b: 73 },
31
- lightDark: { r: 18, g: 32, b: 95 },
32
- lightestDark: { r: 22, g: 40, b: 116 },
33
- border: { r: 90, g: 115, b: 220 },
34
- white: { r: 255, g: 255, b: 255 },
35
- };
36
-
37
- const reset = '\x1b[0m';
38
- const bold = '\x1b[1m';
39
-
40
- const lerp = (c1, c2, t) => ({
41
- r: Math.round(c1.r + (c2.r - c1.r) * t),
42
- g: Math.round(c1.g + (c2.g - c1.g) * t),
43
- b: Math.round(c1.b + (c2.b - c1.b) * t),
44
- });
45
-
46
- // Convert RGB to xterm-256 color code (fallback for terminals without truecolor)
47
- const rgbToAnsi256 = (r, g, b) => {
48
- // Grayscale range
49
- if (r === g && g === b) {
50
- if (r < 8) return 16;
51
- if (r > 248) return 231;
52
- return Math.round(((r - 8) / 247) * 24) + 232;
53
- }
54
-
55
- const to6 = (v) => Math.round((v / 255) * 5);
56
- const rr = to6(r);
57
- const gg = to6(g);
58
- const bb = to6(b);
59
- return 16 + (36 * rr) + (6 * gg) + bb;
60
- };
61
-
62
- const fg24 = (c) => `\x1b[38;2;${c.r};${c.g};${c.b}m`;
63
- const bg24 = (c) => `\x1b[48;2;${c.r};${c.g};${c.b}m`;
64
-
65
- const fg256 = (c) => `\x1b[38;5;${rgbToAnsi256(c.r, c.g, c.b)}m`;
66
- const bg256 = (c) => `\x1b[48;5;${rgbToAnsi256(c.r, c.g, c.b)}m`;
67
-
68
- const detectColorMode = () => {
69
- const forced = (process.env.SWEET_SEARCH_COLOR_MODE || process.env.SMART_SEARCH_COLOR_MODE || '').trim().toLowerCase();
70
- if (forced === 'none' || forced === '0' || forced === 'off') return 'none';
71
- if (forced === '256' || forced === 'ansi256' || forced === 'xterm256') return 'ansi256';
72
- if (forced === 'truecolor' || forced === '24bit' || forced === 'rgb') return 'truecolor';
73
-
74
- if (process.env.NO_COLOR) return 'none';
75
-
76
- const colorterm = process.env.COLORTERM || '';
77
- if (/truecolor|24bit/i.test(colorterm)) return 'truecolor';
78
-
79
- // Windows Terminal + VS Code terminals are typically truecolor-capable.
80
- if (process.env.WT_SESSION || process.env.TERM_PROGRAM === 'vscode') return 'truecolor';
81
-
82
- const term = process.env.TERM || '';
83
- if (/256color/i.test(term)) return 'ansi256';
84
-
85
- return 'none';
86
- };
87
-
88
- const colorMode = detectColorMode(); // 'truecolor' | 'ansi256' | 'none'
89
- const fg = colorMode === 'truecolor' ? fg24 : colorMode === 'ansi256' ? fg256 : () => '';
90
- const bg = colorMode === 'truecolor' ? bg24 : colorMode === 'ansi256' ? bg256 : () => '';
91
-
92
- const headerStyleEnv = (process.env.SWEET_SEARCH_HEADER_STYLE || process.env.SMART_SEARCH_HEADER_STYLE || '').trim().toLowerCase();
93
- const headerStyle =
94
- headerStyleEnv === 'zones' || headerStyleEnv === 'gradient'
95
- ? headerStyleEnv
96
- : (colorMode === 'truecolor' ? 'gradient' : 'zones');
97
-
98
- return {
99
- colors,
100
- fg,
101
- bg,
102
- reset: colorMode === 'none' ? '' : reset,
103
- bold: colorMode === 'none' ? '' : bold,
104
- lerp,
105
- colorMode,
106
- headerStyle,
107
- };
108
- })();
109
-
110
- // 2-line pixel art using half-blocks - SWEET SEARCH
111
- const SWEET_SEARCH_L1 = '█▀▀ █ █ █ █▀▀ █▀▀ ▀█▀ █▀▀ █▀▀ ▄▀▄ █▀▄ █▀▀ █▄█';
112
- const SWEET_SEARCH_L2 = '▄▄█ ▀▄█▄▀ ██▄ ██▄ █ ▄▄█ ██▄ █▀█ ██▄ █▄▄ █▀█';
113
-
114
- /**
115
- * Print styled header - 2-line pixel art with query on right = 2 content lines.
116
- */
117
- export function printStyledHeader(query) {
118
- const width = Math.min(process.stdout.columns || 80, 80);
119
- const { colors, fg, bg, reset, bold } = STYLE;
120
-
121
- const artLen = SWEET_SEARCH_L2.length;
122
- const maxQueryLen = width - artLen - 8;
123
- const displayQuery = query.length > maxQueryLen
124
- ? query.slice(0, maxQueryLen - 3) + '...'
125
- : query;
126
-
127
- const palette = [
128
- colors.darkest,
129
- colors.darker,
130
- colors.dark,
131
- colors.lightDark,
132
- colors.lightestDark,
133
- ];
134
-
135
- const getBgColor = (i, w) => {
136
- const pos = i / Math.max(1, w - 1);
137
- const t = 1 - Math.abs(0.5 - pos) * 2;
138
- const zone = Math.min(Math.floor(t * palette.length), palette.length - 1);
139
- return palette[zone];
140
- };
141
-
142
- const buildLine = (leftContent, rightContent = null, isArt = false) => {
143
- let result = '';
144
- const leftPad = 2;
145
- const rightPad = 2;
146
- const rightStart = rightContent ? width - rightPad - rightContent.length : width;
147
-
148
- for (let i = 0; i < width; i++) {
149
- const bgColor = getBgColor(i, width);
150
- const leftCharIdx = i - leftPad;
151
- const rightCharIdx = i - rightStart;
152
-
153
- if (rightContent && rightCharIdx >= 0 && rightCharIdx < rightContent.length) {
154
- result += bold + fg(colors.white) + bg(bgColor) + rightContent[rightCharIdx];
155
- } else if (leftCharIdx >= 0 && leftCharIdx < leftContent.length) {
156
- const fgColor = isArt ? colors.border : colors.white;
157
- result += bold + fg(fgColor) + bg(bgColor) + leftContent[leftCharIdx];
158
- } else {
159
- result += bg(bgColor) + ' ';
160
- }
161
- }
162
- return result + reset;
163
- };
164
-
165
- const queryStr = `"${displayQuery}"`;
166
-
167
- console.log('');
168
- console.log(buildLine(SWEET_SEARCH_L1, null, true));
169
- console.log(buildLine(SWEET_SEARCH_L2, queryStr, true));
170
- }
171
-
172
- /**
173
- * Print styled stats line
174
- */
175
- export function printStyledStats(stats, isWarm = false) {
176
- const { colors, fg, reset } = STYLE;
177
- const mode = stats.routing?.mode || stats.mode || 'auto';
178
- const pathType = stats.path || 'hybrid';
179
- const timeMs = stats.server_ms || stats.total_ms || 0;
180
-
181
- const modeIcon = { lexical: '⚡', semantic: '🧠', hybrid: '⚗️', structural: '🔗', auto: '✨' }[mode] || '◆';
182
- const warmIcon = isWarm ? `${fg(colors.border)}●${reset}` : `${fg(colors.darker)}○${reset}`;
183
-
184
- console.log(
185
- ` ${modeIcon} ${fg(colors.white)}${mode}${reset} ` +
186
- `${fg(colors.dark)}│${reset} ${fg(colors.border)}${pathType}${reset} ` +
187
- `${fg(colors.dark)}│${reset} ${fg(colors.white)}${timeMs}ms${reset} ${warmIcon}`
188
- );
189
- console.log('');
190
- }
21
+ import { detectOutputPolicy } from './output-policy.js';
22
+ import { emitDecoration } from './cli-decoration.js';
191
23
 
192
24
  // =============================================================================
193
25
  // CLI entry point
@@ -196,9 +28,9 @@ export function printStyledStats(stats, isWarm = false) {
196
28
  export async function runCli(args) {
197
29
  // Dynamic imports to avoid circular dependencies
198
30
  const { default: SweetSearch } = await import('./sweet-search.js');
199
- const { startServer, queryServer, isServerRunning, autoSpawnServer,
200
- SEARCH_SERVER_SOCKET, SEARCH_SERVER_SOCKET_LEGACY, SEARCH_SERVER_PORT
31
+ const { startServer, queryServer, isServerRunning, autoSpawnServer
201
32
  } = await import('./search-server.js');
33
+ const { projectSocketPath } = await import('./server-identity.js');
202
34
 
203
35
  if (args.length === 0 || args[0] === '--help' || args[0] === '-h') {
204
36
  console.log(`
@@ -220,13 +52,25 @@ Options:
220
52
  --fusion <type> Legacy: cc or rrf (ignored for hybrid - always uses robust CC fusion)
221
53
  --late-interaction Enable late interaction reranking (if index available)
222
54
  --late-interaction-model=ID Use specific model (lateon-code or lateon-code-edge)
223
- --agent Agent mode: return self-contained code blocks (ColGrep context packaging)
224
- --agent-full Agent mode with full expansion for top-3 results (budget: 8000)
225
- --budget <n> Agent mode token budget (default: 4000 preview, 8000 full)
55
+ --agent Agent mode: self-contained code blocks. Auto-picks 4k/8k/12k
56
+ tier from score-distribution signals (top-1 dominance,
57
+ entropy, candidate-pool breadth) no need to choose a tier.
58
+ --agent-preview Force the 4k preview tier (rarely needed; --agent auto-picks)
59
+ --agent-full Force the 8k full tier (rarely needed; --agent auto-picks)
60
+ --agent-full-xl Force the 12k XL tier; gated on top-1 dominance ≥ 2× top-2
61
+ --budget <n> Explicit total token budget (overrides auto-tier; --agent
62
+ keeps it as the format)
226
63
  --summary HCGS summary-first output (10x token reduction)
227
64
  --mid Middle-res view: signature + docstring (5x token reduction)
228
65
  --json Output as JSON
66
+ --format <fmt> Output format: plain (no banner/color), json
67
+ --no-banner Suppress the decorative banner (keep text results)
229
68
  --verbose, -v Enable verbose logging
69
+
70
+ Decoration is auto by default (shown only where it is token-free). Override with
71
+ SWEET_SEARCH_DECORATION=never (always plain) or =always (force the banner onto
72
+ stdout even when captured — you accept the added token cost). NO_COLOR disables
73
+ ANSI color.
230
74
  --cold Force cold start (skip auto-start server)
231
75
  --serve Manually start server (usually not needed)
232
76
 
@@ -271,28 +115,15 @@ Examples:
271
115
  req.end();
272
116
  });
273
117
 
274
- // F-06: Stop via Unix socket first so CLI behavior matches server policy.
118
+ // Stop this project's server via its per-project Unix socket (C3). No
119
+ // legacy/global fallback — that would risk stopping another project's
120
+ // server.
121
+ const socketPath = projectSocketPath();
275
122
  let stopResponse = null;
276
- if (existsSync(SEARCH_SERVER_SOCKET)) {
277
- stopResponse = await requestStop({
278
- socketPath: SEARCH_SERVER_SOCKET,
279
- path: '/stop',
280
- method: 'GET',
281
- });
282
- } else if (existsSync(SEARCH_SERVER_SOCKET_LEGACY)) {
283
- stopResponse = await requestStop({
284
- socketPath: SEARCH_SERVER_SOCKET_LEGACY,
285
- path: '/stop',
286
- method: 'GET',
287
- });
123
+ if (existsSync(socketPath)) {
124
+ stopResponse = await requestStop({ socketPath, path: '/stop', method: 'GET' });
288
125
  } else {
289
- // Backward-compatible fallback for older servers without Unix socket.
290
- stopResponse = await requestStop({
291
- hostname: 'localhost',
292
- port: SEARCH_SERVER_PORT,
293
- path: '/stop',
294
- method: 'GET',
295
- });
126
+ stopResponse = { statusCode: 0, body: 'no server running for this project' };
296
127
  }
297
128
 
298
129
  if (stopResponse.statusCode === 200) {
@@ -324,6 +155,8 @@ Examples:
324
155
  let fusion = 'cc';
325
156
  let useLateInteraction = LATE_INTERACTION_CONFIG.enabled;
326
157
  let json = false;
158
+ let outputFormat = null; // e.g. 'plain' — disables decoration but keeps text results
159
+ let noBanner = false;
327
160
  let verbose = false;
328
161
  let summaryFirst = false;
329
162
  let middleRes = false;
@@ -373,6 +206,14 @@ Examples:
373
206
  LATE_INTERACTION_CONFIG.model = arg.split('=')[1];
374
207
  } else if (arg === '--json') {
375
208
  json = true;
209
+ } else if (arg === '--no-banner') {
210
+ noBanner = true;
211
+ } else if (arg === '--format' && args[i + 1]) {
212
+ outputFormat = args[++i];
213
+ if (outputFormat === 'json') json = true;
214
+ } else if (arg.startsWith('--format=')) {
215
+ outputFormat = arg.slice('--format='.length);
216
+ if (outputFormat === 'json') json = true;
376
217
  } else if (arg === '--summary') {
377
218
  summaryFirst = true;
378
219
  } else if (arg === '--mid') {
@@ -382,14 +223,22 @@ Examples:
382
223
  } else if (arg === '--cold') {
383
224
  forceCold = true;
384
225
  } else if (arg === '--agent') {
385
- agentFormat = agentFormat || 'agent_preview';
226
+ // 'agent' triggers auto-tier selection (preview/full/xl picked from
227
+ // top-1 dominance, entropy, and candidate-pool breadth). Power users
228
+ // can still force a specific tier with --agent-preview / --agent-full
229
+ // / --agent-full-xl.
230
+ agentFormat = agentFormat || 'agent';
231
+ } else if (arg === '--agent-preview') {
232
+ agentFormat = 'agent_preview';
386
233
  } else if (arg === '--agent-full') {
387
234
  agentFormat = 'agent_full';
388
235
  } else if (arg === '--agent-full-xl') {
389
236
  agentFormat = 'agent_full_xl';
390
237
  } else if (arg === '--budget' && args[i + 1]) {
391
238
  agentBudget = parseInt(args[++i], 10);
392
- agentFormat = agentFormat || 'agent_preview';
239
+ // Pair an explicit numeric budget with auto-tier so the response's
240
+ // budgetReason reflects 'explicit_budget' rather than a tier override.
241
+ agentFormat = agentFormat || 'agent';
393
242
  } else if (!arg.startsWith('--')) {
394
243
  query = arg;
395
244
  }
@@ -407,6 +256,16 @@ Examples:
407
256
  process.exit(1);
408
257
  }
409
258
 
259
+ // Decide where decoration may go (token-free channels only). Computed once;
260
+ // both the warm and cold paths consult it.
261
+ const outputPolicy = detectOutputPolicy({
262
+ json,
263
+ format: outputFormat,
264
+ noBanner,
265
+ env: process.env,
266
+ stream: process.stdout,
267
+ });
268
+
410
269
  // Check if warm server is running (unless --cold)
411
270
  let serverRunning = !forceCold && await isServerRunning();
412
271
 
@@ -452,8 +311,7 @@ Examples:
452
311
  if (json) {
453
312
  console.log(JSON.stringify({ results, stats }, null, 2));
454
313
  } else {
455
- printStyledHeader(query);
456
- printStyledStats(stats, true);
314
+ emitDecoration(outputPolicy, query, stats, true);
457
315
 
458
316
  // Use pure formatting helpers (no full SweetSearch instantiation needed).
459
317
  // Contract note: formatResults currently only depends on `this` for
@@ -521,8 +379,7 @@ Examples:
521
379
  if (json) {
522
380
  console.log(JSON.stringify({ results, stats }, null, 2));
523
381
  } else {
524
- printStyledHeader(query);
525
- printStyledStats(stats, false);
382
+ emitDecoration(outputPolicy, query, stats, false);
526
383
 
527
384
  if (stats.path === 'structural') {
528
385
  console.log(searcher.formatStructuralResults(results, stats));
@@ -10,6 +10,83 @@
10
10
 
11
11
  import { routeQuery } from '../query/query-router.js';
12
12
  import { applyMMR, shouldApplyMMR, getLambdaForIntent } from '../ranking/mmr.js';
13
+ import { applyFileKindRanking, applyResultDemotions, classifyFileKindIntent, detectFileKind } from '../ranking/file-kind-ranking.js';
14
+ import { injectAnchorCandidates } from './search-anchor.js';
15
+ import { runRRFFallback } from './search-rrf.js';
16
+
17
+ const QUERY_SCAFFOLD_RE = /^(?:where|when|how)\s+(?:does|do|did|is|are|was|were|can|could|should)?\s*/i;
18
+ const IMPLEMENTATION_VERB_RE = /^(?:abort|bind|build|call|compute|create|decode|decide|detect|encode|handle|load|parse|parsed|redirect|register|run|search|skip|transform|validate|write)s?\b/i;
19
+ const QUERY_STOPWORDS = new Set([
20
+ 'a', 'an', 'are', 'can', 'could', 'did', 'do', 'does', 'for', 'from',
21
+ 'how', 'in', 'into', 'is', 'of', 'on', 'should', 'the', 'to', 'was',
22
+ 'were', 'when', 'where', 'with',
23
+ ]);
24
+
25
+ function hasAblation(ablations, name) {
26
+ return ablations instanceof Set
27
+ ? ablations.has(name)
28
+ : Array.isArray(ablations) && ablations.includes(name);
29
+ }
30
+
31
+ // Per-stage profiling hooks. No-op unless `globalThis.__stageTimings` is set
32
+ // by scripts/profile-search-stages.mjs. See search-postprocess.js for the
33
+ // matching helpers — same convention for consistency.
34
+ function __ptStart() {
35
+ return globalThis.__stageTimings ? performance.now() : null;
36
+ }
37
+ function __ptEnd(stage, t0) {
38
+ if (t0 == null || !globalThis.__stageTimings) return;
39
+ const ms = performance.now() - t0;
40
+ const buf = globalThis.__stageTimings;
41
+ (buf[stage] = buf[stage] || []).push(ms);
42
+ }
43
+
44
+ function envFloat(name, fallback, min = 0, max = 1) {
45
+ const raw = process.env[name];
46
+ if (raw == null || raw === '') return fallback;
47
+ const parsed = Number.parseFloat(raw);
48
+ return Number.isFinite(parsed) && parsed >= min && parsed <= max ? parsed : fallback;
49
+ }
50
+
51
+ export function rewriteImplementationQuery(query) {
52
+ if (classifyFileKindIntent(query) !== 'implementation') return null;
53
+ const stripped = String(query || '').trim().replace(QUERY_SCAFFOLD_RE, '').trim();
54
+ if (!stripped) return null;
55
+
56
+ const words = stripped.split(/\s+/).filter(Boolean);
57
+ if (words.length >= 3 && /^[A-Z][A-Za-z0-9_.-]*$/.test(words[0]) && IMPLEMENTATION_VERB_RE.test(words[1])) {
58
+ words.shift();
59
+ }
60
+
61
+ const compact = words.filter(word => {
62
+ const normalized = word.toLowerCase().replace(/^[^\w]+|[^\w/.-]+$/g, '');
63
+ return normalized && !QUERY_STOPWORDS.has(normalized);
64
+ }).join(' ');
65
+
66
+ return compact && compact !== query ? compact : null;
67
+ }
68
+
69
+ function resultFileKind(result) {
70
+ return detectFileKind(
71
+ result?.file
72
+ || result?.file_path
73
+ || result?.path
74
+ || result?.metadata?.file
75
+ || result?.metadata?.file_path
76
+ || result?.metadata?.path
77
+ || ''
78
+ );
79
+ }
80
+
81
+ function implementationRetryReason(query, results) {
82
+ if (classifyFileKindIntent(query) !== 'implementation') return null;
83
+ if (!Array.isArray(results) || results.length === 0) return 'empty_results';
84
+
85
+ const window = results.slice(0, Math.min(5, results.length));
86
+ const hasImplementation = window.some(result => resultFileKind(result) === 'implementation');
87
+ const hasDemotable = window.some(result => resultFileKind(result) !== 'implementation');
88
+ return !hasImplementation && hasDemotable ? 'no_implementation_in_top_results' : null;
89
+ }
13
90
 
14
91
  // =============================================================================
15
92
  // Hybrid Search V2
@@ -35,7 +112,13 @@ export async function hybridSearchV2(query, options = {}) {
35
112
  // without synthetic graph-expansion scores polluting the distribution.
36
113
  const [lexicalSearchResult, semanticSearchResult] = await Promise.all([
37
114
  this.graphSearch.bm25SearchRaw(query, 50),
38
- this.semanticSearch(query, { k: 50, rerank: false, useLateInteraction }),
115
+ this.semanticSearch(query, {
116
+ k: 50,
117
+ rerank: false,
118
+ useLateInteraction,
119
+ format: options.format,
120
+ ablations: options.ablations,
121
+ }),
39
122
  ]);
40
123
 
41
124
  const lexicalResults = lexicalSearchResult.results.map(r => ({
@@ -48,26 +131,97 @@ export async function hybridSearchV2(query, options = {}) {
48
131
  const lexicalLatencyMs = lexicalSearchResult.latency ?? null;
49
132
 
50
133
  // Step 2: Robust CC fusion with RRF fallback for edge cases
134
+ const fusionAlpha = options.fusionAlpha ?? (
135
+ routing.rawMode === 'semantic'
136
+ ? envFloat('SWEET_SEARCH_COLLAPSED_SEMANTIC_ALPHA', undefined)
137
+ : undefined
138
+ );
51
139
  const { results: fused, method, fallbackReason } = this.robustCCFusion(
52
140
  lexicalResults,
53
141
  semanticResults,
54
- routeType
142
+ routeType,
143
+ fusionAlpha == null ? undefined : { alpha: fusionAlpha }
144
+ );
145
+
146
+ // Step 2.5: Identifier-Anchored Retrieval (IAR).
147
+ // Couples dense fusion with an exact-name symbol lookup so abstract
148
+ // natural-language queries that mention a real entity name can land on
149
+ // that entity even when the encoder ranked something tangentially-similar
150
+ // higher. Mirrors the Aider repo-map / Cody+SCIP / Cursor recipe.
151
+ // Purely additive: only surfaces entities that exist in the index, deduped
152
+ // against the fused set. Disable via ablations 'no-anchor-injection' or
153
+ // env SWEET_SEARCH_DISABLE_IAR=1 (kill switch — overrides the format-based
154
+ // default for ablation experiments).
155
+ const iarKilled = process.env.SWEET_SEARCH_DISABLE_IAR === '1'
156
+ || hasAblation(options.ablations, 'no-anchor-injection');
157
+ const shouldInjectAnchors = !iarKilled && (
158
+ options.anchorInjection === true
159
+ || options.format === 'agent'
160
+ || process.env.SWEET_SEARCH_ANCHOR_INJECTION === '1'
55
161
  );
162
+ const __t_iar = __ptStart();
163
+ const { results: anchored, stats: anchorStats } = shouldInjectAnchors
164
+ ? injectAnchorCandidates(fused, query, {
165
+ codeGraphRepo: this.codeGraphRepo,
166
+ lateInteractionIndex: this.lateInteractionIndex,
167
+ ablations: options.ablations,
168
+ allowPlainTitlecase: options.format === 'agent',
169
+ })
170
+ : { results: fused, stats: { skipped: true, reason: 'disabled_for_non_agent_search' } };
171
+ __ptEnd('hybrid:injectAnchorCandidates', __t_iar);
56
172
 
57
173
  // Step 3: Apply post-fusion boosts uniformly (both paths benefit equally)
58
- const boosted = this.applyPostFusionBoosts(fused, query, routing.mode, routing.confidence);
174
+ const boosted = this.applyPostFusionBoosts(anchored, query, routing.mode, routing.confidence, {
175
+ format: options.format,
176
+ });
177
+
178
+ // Step 3.5: Apply source-vs-doc/test/config preference before the top-k cut.
179
+ // The post-retrieval pass has the same guard, but hybrid used to slice first,
180
+ // so docs/tests/tiny YAML could occupy top-1 and hide implementation chunks.
181
+ const fileKindIntent = classifyFileKindIntent(query);
182
+ const __t_fk = __ptStart();
183
+ const rankedByFileKind = applyFileKindRanking(boosted, {
184
+ intent: fileKindIntent,
185
+ window: options.fileKindWindow ?? 100,
186
+ docFactor: options.hybridDocFactor ?? 0.35,
187
+ testFactor: options.hybridTestFactor ?? 0.35,
188
+ typeFactor: options.hybridTypeFactor ?? 0.70,
189
+ ancillaryFactor: options.hybridAncillaryFactor ?? 0.15,
190
+ tinyAncillaryFactor: options.hybridTinyAncillaryFactor ?? 0.05,
191
+ _fileKindCache: options._fileKindCache,
192
+ });
193
+ __ptEnd('hybrid:applyFileKindRanking', __t_fk);
194
+ const __t_dem = __ptStart();
195
+ const demoted = applyResultDemotions(rankedByFileKind, {
196
+ query,
197
+ window: options.resultDemotionWindow ?? 100,
198
+ ablations: options.ablations,
199
+ format: options.format,
200
+ projectRoot: this.projectRoot,
201
+ codeGraphRepo: this.codeGraphRepo,
202
+ _entityKindCache: options._entityKindCache,
203
+ _entityNameCache: options._entityNameCache,
204
+ _resultTextCache: options._resultTextCache,
205
+ _fullFileTextCache: options._fullFileTextCache,
206
+ _isTestSupportCache: options._isTestSupportCache,
207
+ _isTestChunkCache: options._isTestChunkCache,
208
+ _fileKindCache: options._fileKindCache,
209
+ });
210
+ __ptEnd('hybrid:applyResultDemotions', __t_dem);
59
211
 
60
212
  // Step 4: MMR Diversification (replaces flood control)
61
- let diversified = boosted;
213
+ let diversified = demoted;
62
214
  let mmrStats = null;
63
215
 
64
- const useMMR = options.useMMR ?? true; // Enable by default
65
- if (useMMR && shouldApplyMMR(boosted)) {
216
+ const useMMR = (options.useMMR ?? true) && !hasAblation(options.ablations, 'no-mmr');
217
+ if (useMMR && shouldApplyMMR(demoted)) {
66
218
  const lambda = getLambdaForIntent(routing.mode, routing.confidence);
67
- const mmrResult = applyMMR(boosted, {
68
- k: Math.min(k * 2, boosted.length), // Get more candidates for diversity
219
+ const __t_mmr = __ptStart();
220
+ const mmrResult = applyMMR(demoted, {
221
+ k: Math.min(k * 2, demoted.length), // Get more candidates for diversity
69
222
  lambda,
70
223
  });
224
+ __ptEnd('hybrid:applyMMR', __t_mmr);
71
225
  diversified = mmrResult.results;
72
226
  mmrStats = mmrResult.stats;
73
227
 
@@ -83,10 +237,91 @@ export async function hybridSearchV2(query, options = {}) {
83
237
  fusionMethod: method,
84
238
  }));
85
239
 
86
- this.log(`Hybrid V2 (${method}, alpha=${results[0]?.alpha?.toFixed(2) || '?'}): ${lexicalResults.length} lex + ${semanticResults.length} sem -> ${results.length} final`);
240
+ const retryReason = implementationRetryReason(query, results);
241
+ if (retryReason && options.allowQueryRewrite !== false && !hasAblation(options.ablations, 'no-query-rewrite')) {
242
+ const rewrittenQuery = rewriteImplementationQuery(query);
243
+ if (rewrittenQuery && rewrittenQuery !== query) {
244
+ this.log(`Hybrid rewrite retry: "${query}" -> "${rewrittenQuery}"`);
245
+ const retry = await hybridSearchV2.call(this, rewrittenQuery, {
246
+ ...options,
247
+ allowQueryRewrite: false,
248
+ });
249
+ retry.fusionStats = {
250
+ ...(retry.fusionStats || {}),
251
+ queryRewrite: {
252
+ from: query,
253
+ to: rewrittenQuery,
254
+ reason: retryReason,
255
+ },
256
+ };
257
+ return retry;
258
+ }
259
+ }
260
+
261
+ // Step 6: Multi-query BM25F + RRF tail fallback.
262
+ // Last-resort safety net: when hybrid + IAR + rewrite-retry STILL leaves
263
+ // top-1 below the confidence floor, the candidate list empty, or no
264
+ // source file in top-3, fire one BM25F query per content keyword
265
+ // and fuse with reciprocal-rank-fusion. RRF (Cormack 2009) is the
266
+ // SOTA pattern used by SWE-grep, Polarity Omnigrep, and Cody Deep
267
+ // Search; corpus-agnostic (no stopword denylist) and naturally
268
+ // demotes single-keyword noise hits via rank-position weighting.
269
+ // Disable via ablations 'no-rrf-fallback'.
270
+ let finalResults = results;
271
+ let fallbackStats = null;
272
+ if (options.allowKeywordFallback !== false) {
273
+ const __t_rrf = __ptStart();
274
+ const fb = await runRRFFallback(results, query, {
275
+ searcher: this,
276
+ ablations: options.ablations,
277
+ confidenceFloor: options.confidenceFloor,
278
+ });
279
+ __ptEnd('hybrid:runRRFFallback', __t_rrf);
280
+ fallbackStats = fb.stats;
281
+ if (fb.results !== results) {
282
+ // Re-run BOTH file-kind ranking AND content demotions on the merged
283
+ // set so doc/test/example demotion AND tiny/test-name/entity-kind
284
+ // rules apply to the RRF-injected chunks. Without the file-kind
285
+ // re-pass, a test-file chunk that RRF-fused on a couple of keywords
286
+ // would slip past the primary doc/test demotion.
287
+ const reRanked = applyFileKindRanking(fb.results, {
288
+ intent: fileKindIntent,
289
+ window: options.fileKindWindow ?? 100,
290
+ docFactor: options.hybridDocFactor ?? 0.35,
291
+ testFactor: options.hybridTestFactor ?? 0.35,
292
+ typeFactor: options.hybridTypeFactor ?? 0.70,
293
+ ancillaryFactor: options.hybridAncillaryFactor ?? 0.15,
294
+ tinyAncillaryFactor: options.hybridTinyAncillaryFactor ?? 0.05,
295
+ });
296
+ const remerged = applyResultDemotions(reRanked, {
297
+ query,
298
+ window: options.resultDemotionWindow ?? 100,
299
+ ablations: options.ablations,
300
+ format: options.format,
301
+ projectRoot: this.projectRoot,
302
+ codeGraphRepo: this.codeGraphRepo,
303
+ _entityKindCache: options._entityKindCache,
304
+ _entityNameCache: options._entityNameCache,
305
+ _resultTextCache: options._resultTextCache,
306
+ _fullFileTextCache: options._fullFileTextCache,
307
+ _isTestSupportCache: options._isTestSupportCache,
308
+ _isTestChunkCache: options._isTestChunkCache,
309
+ _fileKindCache: options._fileKindCache,
310
+ });
311
+ finalResults = remerged.slice(0, k).map(r => ({
312
+ ...r,
313
+ searchPath: r.searchPath || 'hybrid',
314
+ hybridScore: r.score,
315
+ fusionMethod: method,
316
+ }));
317
+ this.log(`Hybrid RRF fallback (${fb.stats.reason}, ${fb.stats.keywords.length}kw, ${fb.stats.fusedCount} fused): +${fb.stats.injected} new, +${fb.stats.boosted} boosted`);
318
+ }
319
+ }
320
+
321
+ this.log(`Hybrid V2 (${method}, alpha=${finalResults[0]?.alpha?.toFixed(2) || '?'}): ${lexicalResults.length} lex + ${semanticResults.length} sem -> ${finalResults.length} final`);
87
322
 
88
323
  return {
89
- results,
324
+ results: finalResults,
90
325
  semanticStats,
91
326
  fusionStats: {
92
327
  method,
@@ -95,6 +330,11 @@ export async function hybridSearchV2(query, options = {}) {
95
330
  routerMode: routing.mode,
96
331
  routerConfidence: routing.confidence,
97
332
  lexicalLatencyMs,
333
+ fileKindIntent,
334
+ fileKindRankingApplied: rankedByFileKind !== boosted,
335
+ resultDemotionsApplied: demoted !== rankedByFileKind,
336
+ anchorInjection: anchorStats,
337
+ keywordFallback: fallbackStats,
98
338
  },
99
339
  };
100
340
  }