sweet-search 2.4.2 → 2.5.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. package/core/cli.js +43 -5
  2. package/core/embedding/embedding-cache.js +266 -18
  3. package/core/embedding/embedding-service.js +45 -9
  4. package/core/graph/graph-expansion.js +52 -12
  5. package/core/graph/graph-extractor.js +30 -1
  6. package/core/indexing/ast-chunker.js +331 -16
  7. package/core/indexing/chunking/chunk-builder.js +34 -1
  8. package/core/indexing/index-codebase-v21.js +31 -2
  9. package/core/indexing/index.js +6 -3
  10. package/core/indexing/indexer-ann.js +45 -6
  11. package/core/indexing/indexer-build.js +9 -1
  12. package/core/indexing/indexer-phases.js +6 -4
  13. package/core/indexing/indexing-file-policy.js +140 -0
  14. package/core/indexing/li-skip-policy.js +11 -220
  15. package/core/infrastructure/codebase-repository.js +21 -0
  16. package/core/infrastructure/config/embedding.js +20 -1
  17. package/core/infrastructure/config/graph.js +2 -2
  18. package/core/infrastructure/config/ranking.js +10 -0
  19. package/core/infrastructure/config/vector-store.js +1 -1
  20. package/core/infrastructure/coreml-cascade.js +236 -30
  21. package/core/infrastructure/coreml-cascade.json +25 -0
  22. package/core/infrastructure/index.js +17 -0
  23. package/core/infrastructure/init-config.js +216 -0
  24. package/core/infrastructure/language-patterns/registry-core.js +18 -0
  25. package/core/infrastructure/model-registry.js +12 -0
  26. package/core/infrastructure/native-inference.js +143 -51
  27. package/core/infrastructure/tree-sitter-provider.js +92 -2
  28. package/core/ranking/cascaded-scorer.js +6 -2
  29. package/core/ranking/file-kind-ranking.js +264 -0
  30. package/core/ranking/late-interaction-index.js +10 -4
  31. package/core/ranking/late-interaction-policy.js +304 -0
  32. package/core/search/context-expander.js +267 -28
  33. package/core/search/index.js +4 -0
  34. package/core/search/search-cli.js +3 -1
  35. package/core/search/search-pattern.js +4 -3
  36. package/core/search/search-postprocess.js +189 -8
  37. package/core/search/search-read-semantic.js +734 -0
  38. package/core/search/search-read.js +481 -0
  39. package/core/search/search-server.js +153 -5
  40. package/core/search/sweet-search.js +133 -16
  41. package/core/start-server.js +13 -2
  42. package/mcp/server.js +41 -0
  43. package/mcp/tool-handlers.js +117 -6
  44. package/package.json +9 -7
  45. package/scripts/init.js +386 -5
  46. package/scripts/uninstall.js +152 -6
@@ -38,6 +38,10 @@ export function estimateTokens(text) {
38
38
 
39
39
  const DEFAULT_TOKEN_BUDGET = 4000;
40
40
  const AGENT_FULL_TOKEN_BUDGET = 8000;
41
+ // Stretch budget — opt-in only via subMode 'agent_full_xl'. Gated on top-1
42
+ // dominance (>=2× top-2). Default remains compact 4k; this is for the
43
+ // "explicit, single dominant answer fits" case only.
44
+ const AGENT_FULL_XL_TOKEN_BUDGET = 12000;
41
45
  const DEFAULT_PER_RESULT_CAPS = [2000, 800, 400]; // rank 1, 2, 3+
42
46
  const MAX_HEADER_TOKENS = 200;
43
47
 
@@ -106,11 +110,15 @@ export function findEnclosingEntity(codeGraphRepo, filePath, startLine, endLine)
106
110
  /**
107
111
  * Expand a result to symbol-complete boundaries.
108
112
  *
109
- * Decision tree (from plan §4.1):
110
- * 1. Is chunk already a complete symbol? → return as-is
111
- * 2. Look up enclosing entity in code graph → expand to entity boundaries
113
+ * Decision tree:
114
+ * 1. Is chunk already a complete symbol? → return chunk
115
+ * 2. Look up enclosing entity:
116
+ * a. fits in cap → expand to entity boundaries (kind: 'full')
117
+ * b. too large → build symbol sandwich (kind: 'sandwich')
118
+ * c. sandwich infeasible → bare chunk with entity name (kind: 'chunk')
112
119
  * 3. Merge contiguous sibling chunks → stop at next symbol boundary
113
- * 4. Fall back: return chunk as-is
120
+ * 4. Syntax-aware brace/indent expansion (kind: 'syntax')
121
+ * 5. Fall back: chunk as-is (kind: 'chunk')
114
122
  *
115
123
  * @param {object} result - Ranked result with file, startLine, endLine, metadata
116
124
  * @param {object} opts
@@ -119,7 +127,16 @@ export function findEnclosingEntity(codeGraphRepo, filePath, startLine, endLine)
119
127
  * @param {Map} opts.fileCache - Shared file cache for readFileRange
120
128
  * @param {string} opts.projectRoot
121
129
  * @param {number} opts.tokenCap - Max tokens for this result
122
- * @returns {{ startLine: number, endLine: number, expanded: boolean, expandedFrom: string|null, symbol: string|null, symbolType: string|null }}
130
+ * @returns {{
131
+ * startLine: number,
132
+ * endLine: number,
133
+ * expanded: boolean,
134
+ * expandedFrom: string|null,
135
+ * symbol: string|null,
136
+ * symbolType: string|null,
137
+ * kind: 'full'|'sandwich'|'syntax'|'chunk',
138
+ * sandwich?: { parts: Array<{kind:'signature'|'gold'|'closing', startLine:number, endLine:number}>, elidedHead:number, elidedTail:number, elisionMarkers:number }
139
+ * }}
123
140
  */
124
141
  export function expandToSymbol(result, opts) {
125
142
  const { codeGraphRepo, locationMap, tokenCap } = opts;
@@ -139,6 +156,7 @@ export function expandToSymbol(result, opts) {
139
156
  expandedFrom: null,
140
157
  symbol: meta.name,
141
158
  symbolType: meta.type || null,
159
+ kind: 'chunk',
142
160
  };
143
161
  }
144
162
 
@@ -158,9 +176,29 @@ export function expandToSymbol(result, opts) {
158
176
  expandedFrom: origRange,
159
177
  symbol: entity.name,
160
178
  symbolType: entity.type,
179
+ kind: 'full',
161
180
  };
162
181
  }
163
- // Entity too large still use its name but keep original range
182
+ // Entity too large for full expansion. Try a "symbol sandwich":
183
+ // signature + elision marker + gold chunk + elision marker + closing brace.
184
+ // Goal: preserve gold evidence + ground the agent in the enclosing symbol
185
+ // without dumping the whole function (which causes context rot).
186
+ if (!opts.ablations?.has('no-sandwich')) {
187
+ const sandwich = buildSandwichExpansion(entity, origStart, origEnd, tokenCap);
188
+ if (sandwich) {
189
+ return {
190
+ startLine: entity.startLine,
191
+ endLine: entity.endLine,
192
+ expanded: true,
193
+ expandedFrom: origRange,
194
+ symbol: entity.name,
195
+ symbolType: entity.type,
196
+ kind: 'sandwich',
197
+ sandwich,
198
+ };
199
+ }
200
+ }
201
+ // Sandwich infeasible (cap too tight). Fall back to bare chunk + entity name.
164
202
  return {
165
203
  startLine: origStart,
166
204
  endLine: origEnd,
@@ -168,6 +206,7 @@ export function expandToSymbol(result, opts) {
168
206
  expandedFrom: null,
169
207
  symbol: entity.name,
170
208
  symbolType: entity.type,
209
+ kind: 'chunk',
171
210
  };
172
211
  }
173
212
 
@@ -183,6 +222,7 @@ export function expandToSymbol(result, opts) {
183
222
  expandedFrom: origRange,
184
223
  symbol: meta.name || null,
185
224
  symbolType: meta.type || null,
225
+ kind: 'syntax',
186
226
  };
187
227
  }
188
228
  }
@@ -195,6 +235,7 @@ export function expandToSymbol(result, opts) {
195
235
  startLine: origStart, endLine: origEnd,
196
236
  expanded: false, expandedFrom: null,
197
237
  symbol: meta.name || null, symbolType: meta.type || null,
238
+ kind: 'chunk',
198
239
  };
199
240
  }
200
241
  const { fileCache, projectRoot } = opts;
@@ -209,6 +250,7 @@ export function expandToSymbol(result, opts) {
209
250
  expandedFrom: origRange,
210
251
  symbol: meta.name || null,
211
252
  symbolType: meta.type || null,
253
+ kind: 'syntax',
212
254
  };
213
255
  }
214
256
 
@@ -220,9 +262,139 @@ export function expandToSymbol(result, opts) {
220
262
  expandedFrom: null,
221
263
  symbol: meta.name || null,
222
264
  symbolType: meta.type || null,
265
+ kind: 'chunk',
223
266
  };
224
267
  }
225
268
 
269
+ /**
270
+ * Build a "symbol sandwich" expansion when the enclosing entity is too large
271
+ * to fit in the token cap as a whole.
272
+ *
273
+ * The sandwich preserves:
274
+ * - the gold/matched chunk verbatim (the actual evidence — never dropped)
275
+ * - the function/class signature (small, high-leverage anchor)
276
+ * - the closing brace line (cheap, helps the agent know the symbol bounds)
277
+ * separated by explicit `// ... (N lines elided) ...` markers.
278
+ *
279
+ * Sizing uses a conservative ~10-tokens-per-line estimate (matches the rest
280
+ * of the file). If even bare gold doesn't fit, returns null so the caller
281
+ * falls back to the bare-chunk path. If the signature+gold+closing doesn't
282
+ * fit, drops closing first, then signature.
283
+ *
284
+ * @param {{ name:string, type:string, startLine:number, endLine:number }} entity
285
+ * @param {number} origStart - gold chunk start line
286
+ * @param {number} origEnd - gold chunk end line
287
+ * @param {number} tokenCap - hard cap for the assembled sandwich
288
+ * @returns {{ parts: Array, elidedHead:number, elidedTail:number, elisionMarkers:number }|null}
289
+ */
290
+ function buildSandwichExpansion(entity, origStart, origEnd, tokenCap) {
291
+ const SIG_MAX_LINES = 4; // signature window
292
+ const ELISION_TOKENS = 10; // approx cost of one `// ... (N lines elided) ...` line
293
+ const TOKENS_PER_LINE = 10; // pessimistic estimate, matches `entityTokens` heuristic above
294
+
295
+ // Signature: from entity.startLine up to min(SIG_MAX_LINES, just before gold)
296
+ const sigStart = entity.startLine;
297
+ const sigEndCandidate = Math.min(entity.startLine + SIG_MAX_LINES - 1, origStart - 1);
298
+ const hasSignatureCandidate = sigEndCandidate >= sigStart && origStart > entity.startLine;
299
+ const sigEnd = hasSignatureCandidate ? sigEndCandidate : null;
300
+ const sigLines = sigEnd != null ? (sigEnd - sigStart + 1) : 0;
301
+
302
+ // Gold: original chunk
303
+ const goldLines = origEnd - origStart + 1;
304
+
305
+ // Closing: just the last line of the entity, only if it's strictly after gold
306
+ const closeLineCandidate = entity.endLine > origEnd ? entity.endLine : null;
307
+ const closingLines = closeLineCandidate != null ? 1 : 0;
308
+
309
+ // Elisions (gaps between parts). Only emit a marker if there's actually a gap.
310
+ const headElidedAll = sigEnd != null && origStart > sigEnd + 1 ? origStart - sigEnd - 1 : 0;
311
+ const tailElidedAll = closeLineCandidate != null && closeLineCandidate > origEnd + 1
312
+ ? closeLineCandidate - origEnd - 1
313
+ : 0;
314
+
315
+ // Token estimates
316
+ const goldTokens = goldLines * TOKENS_PER_LINE;
317
+ if (goldTokens > tokenCap) {
318
+ // Even gold alone doesn't fit. Caller will fall back to bare-chunk + truncate.
319
+ return null;
320
+ }
321
+
322
+ const sigTokens = sigLines * TOKENS_PER_LINE;
323
+ const closingTokens = closingLines * TOKENS_PER_LINE;
324
+
325
+ // Decide which optional parts to include, in priority order:
326
+ // 1. Always include gold.
327
+ // 2. Include signature if it fits (signature is the biggest grounding win).
328
+ // 3. Include closing if it fits (cheap).
329
+ let includeSignature = sigEnd != null;
330
+ let includeClosing = closeLineCandidate != null;
331
+
332
+ function totalTokens() {
333
+ let t = goldTokens;
334
+ let elisions = 0;
335
+ if (includeSignature) {
336
+ t += sigTokens;
337
+ if (headElidedAll > 0) elisions++;
338
+ }
339
+ if (includeClosing) {
340
+ t += closingTokens;
341
+ if (tailElidedAll > 0) elisions++;
342
+ }
343
+ return t + elisions * ELISION_TOKENS;
344
+ }
345
+
346
+ if (totalTokens() > tokenCap && includeClosing) {
347
+ includeClosing = false;
348
+ }
349
+ if (totalTokens() > tokenCap && includeSignature) {
350
+ includeSignature = false;
351
+ }
352
+
353
+ // If neither signature nor closing fits, sandwich gives no value over bare chunk.
354
+ if (!includeSignature && !includeClosing) {
355
+ return null;
356
+ }
357
+
358
+ const parts = [];
359
+ if (includeSignature) {
360
+ parts.push({ kind: 'signature', startLine: sigStart, endLine: sigEnd });
361
+ }
362
+ parts.push({ kind: 'gold', startLine: origStart, endLine: origEnd });
363
+ if (includeClosing) {
364
+ parts.push({ kind: 'closing', startLine: closeLineCandidate, endLine: closeLineCandidate });
365
+ }
366
+
367
+ const elidedHead = includeSignature && headElidedAll > 0 ? headElidedAll : 0;
368
+ const elidedTail = includeClosing && tailElidedAll > 0 ? tailElidedAll : 0;
369
+ const elisionMarkers = (elidedHead > 0 ? 1 : 0) + (elidedTail > 0 ? 1 : 0);
370
+
371
+ return { parts, elidedHead, elidedTail, elisionMarkers };
372
+ }
373
+
374
+ /**
375
+ * Render a sandwich expansion into a single code string with elision markers.
376
+ * Reads each part from the file cache and joins them with explicit
377
+ * `// ... (N lines elided) ...` markers between non-contiguous parts.
378
+ *
379
+ * Returns '' if no part can be read (caller falls back to chunk path).
380
+ */
381
+ function assembleSandwichCode(fileCache, filePath, sandwich, projectRoot) {
382
+ if (!sandwich || !sandwich.parts || sandwich.parts.length === 0) return '';
383
+ const out = [];
384
+ let prevEnd = null;
385
+ for (const part of sandwich.parts) {
386
+ const text = readFileRange(fileCache, filePath, part.startLine, part.endLine, projectRoot);
387
+ if (!text) continue;
388
+ if (prevEnd != null) {
389
+ const gap = part.startLine - prevEnd - 1;
390
+ if (gap > 0) out.push(`// ... (${gap} lines elided) ...`);
391
+ }
392
+ out.push(text);
393
+ prevEnd = part.endLine;
394
+ }
395
+ return out.join('\n');
396
+ }
397
+
226
398
  /**
227
399
  * Merge contiguous sibling chunks around the target range.
228
400
  * Stops at the next non-contiguous gap or when token cap would be exceeded.
@@ -708,28 +880,37 @@ export function computeSufficiency(topResult, confidenceInfo) {
708
880
  *
709
881
  * Base split: 60/20/20 (preview) or 40/30/30 (full).
710
882
  * Adaptations:
711
- * - When grepMatches > 200 (broad regex): concentrate on top-1 (70/15/15)
883
+ * - High retrieval breadth (broad regex / large candidate pool): sharpen top-1 (70/15/15)
712
884
  * - In agent_full: only expand rank 2/3 to full if score gap < 2× from top-1
713
885
  * - Unused top-1 cap is redistributed to top-2/3 when they are distinct
714
886
  *
887
+ * Breadth signal generalization (for non-grep retrieval modes):
888
+ * - colgrep / pattern: uses `grepMatches` (existing behavior)
889
+ * - lexical / semantic / hybrid: uses `candidatePoolSize` if provided
890
+ * - falls back to 0 (no sharpening) if neither is set
891
+ *
715
892
  * @param {number} totalBudget - Total token budget for all results
716
893
  * @param {number} numResults - Number of results
717
- * @param {string} subMode - 'agent_preview' | 'agent_full'
718
- * @param {object} [context] - Search context for adaptive decisions
719
- * @param {number} [context.grepMatches] - Number of grep matches (broad vs selective)
894
+ * @param {string} subMode - 'agent_preview' | 'agent_full' | 'agent_full_xl'
895
+ * @param {object} [context]
896
+ * @param {number} [context.grepMatches] - Number of grep matches (colgrep)
897
+ * @param {number} [context.candidatePoolSize] - Generic candidate pool (lexical/semantic/hybrid)
720
898
  * @param {Array<{score: number, file: string}>} [context.results] - Ranked results for score-gap gating
721
899
  * @returns {Array<{ presentation: 'full'|'preview'|'summary', tokenCap: number }>}
722
900
  */
723
901
  export function allocateBudget(totalBudget, numResults, subMode = 'agent_preview', context = {}) {
724
902
  const allocations = [];
725
- const isFullMode = subMode === 'agent_full';
726
- const grepMatches = context.grepMatches || 0;
903
+ const isFullMode = subMode === 'agent_full' || subMode === 'agent_full_xl';
904
+ const isXlMode = subMode === 'agent_full_xl';
905
+ // Generalized breadth signal: prefer `grepMatches` for backwards compatibility,
906
+ // fall back to `candidatePoolSize` for non-grep retrieval modes (lexical/semantic/hybrid).
907
+ const breadthHint = context.grepMatches ?? context.candidatePoolSize ?? 0;
727
908
  const results = context.results || [];
728
909
 
729
- // Adaptive split based on regex breadth
910
+ // Adaptive split based on retrieval breadth
730
911
  let top1Share, top23Share;
731
- if (grepMatches > 200) {
732
- // Broad regex: sharpen top-1, reduce previews
912
+ if (breadthHint > 200) {
913
+ // Broad retrieval: sharpen top-1, reduce previews
733
914
  top1Share = 0.70;
734
915
  top23Share = 0.15;
735
916
  } else if (isFullMode) {
@@ -740,9 +921,22 @@ export function allocateBudget(totalBudget, numResults, subMode = 'agent_preview
740
921
  top23Share = 0.20;
741
922
  }
742
923
 
924
+ // Stretch budget (agent_full_xl): allow per-result caps up to 8000 for top-1
925
+ // when the gate fires (top1 >= 2 * top2). This is opt-in via subMode only.
926
+ const xlPerResultCap = 8000;
927
+ const baselinePerResultCap = DEFAULT_PER_RESULT_CAPS[0]; // 2000
928
+ let xlGateActive = false;
929
+ if (isXlMode && results.length > 0) {
930
+ const top1Score = results[0]?.score || 0;
931
+ const top2Score = results[1]?.score || 0;
932
+ // Gate fires when top-1 dominates: 2× top-2 OR there is no top-2.
933
+ xlGateActive = top1Score > 0 && (top2Score === 0 || top1Score >= 2 * top2Score);
934
+ }
935
+ const top1HardCap = xlGateActive ? xlPerResultCap : baselinePerResultCap;
936
+
743
937
  for (let i = 0; i < numResults; i++) {
744
938
  if (i === 0) {
745
- const cap = Math.min(Math.floor(totalBudget * top1Share), DEFAULT_PER_RESULT_CAPS[0]);
939
+ const cap = Math.min(Math.floor(totalBudget * top1Share), top1HardCap);
746
940
  allocations.push({ presentation: 'full', tokenCap: cap });
747
941
  } else if (i <= 2) {
748
942
  // In agent_full: gate full expansion on score gap from top-1.
@@ -875,9 +1069,14 @@ function compressToPreview(code, tokenCap) {
875
1069
 
876
1070
  /**
877
1071
  * Resolve the effective sub-mode from the format string.
878
- * 'agent' 'agent_preview' (default), 'agent_preview', 'agent_full'.
1072
+ * 'agent' / 'agent_preview' 'agent_preview' (compact 4k budget)
1073
+ * 'agent_full' → 'agent_full' (8k budget)
1074
+ * 'agent_full_xl' → 'agent_full_xl' (12k budget, opt-in only;
1075
+ * falls back to agent_full at allocation time
1076
+ * when the dominance gate fails)
879
1077
  */
880
1078
  function resolveSubMode(format) {
1079
+ if (format === 'agent_full_xl') return 'agent_full_xl';
881
1080
  if (format === 'agent_full') return 'agent_full';
882
1081
  return 'agent_preview'; // 'agent' and 'agent_preview' both map here
883
1082
  }
@@ -912,6 +1111,7 @@ export function packageForAgent(rankedResults, searchStats, opts) {
912
1111
  const {
913
1112
  query,
914
1113
  regex,
1114
+ mode: modeOpt = null,
915
1115
  format: formatOpt = 'agent',
916
1116
  codeGraphRepo = null,
917
1117
  locationMap = null,
@@ -920,7 +1120,9 @@ export function packageForAgent(rankedResults, searchStats, opts) {
920
1120
  const ablations = opts.ablations || new Set();
921
1121
 
922
1122
  const subMode = resolveSubMode(formatOpt);
923
- const defaultBudget = subMode === 'agent_full' ? AGENT_FULL_TOKEN_BUDGET : DEFAULT_TOKEN_BUDGET;
1123
+ const defaultBudget = subMode === 'agent_full_xl' ? AGENT_FULL_XL_TOKEN_BUDGET
1124
+ : subMode === 'agent_full' ? AGENT_FULL_TOKEN_BUDGET
1125
+ : DEFAULT_TOKEN_BUDGET;
924
1126
  const tokenBudget = opts.tokenBudget ?? defaultBudget;
925
1127
 
926
1128
  const start = performance.now();
@@ -954,7 +1156,11 @@ export function packageForAgent(rankedResults, searchStats, opts) {
954
1156
  // When 'no-adaptive-budget' ablation is active, use fixed splits (no context param)
955
1157
  const budgetContext = ablations.has('no-adaptive-budget')
956
1158
  ? {}
957
- : { grepMatches: searchStats?.grepMatches || 0, results: rankedResults };
1159
+ : {
1160
+ ...(searchStats?.grepMatches != null ? { grepMatches: searchStats.grepMatches } : {}),
1161
+ ...(searchStats?.candidatePoolSize != null ? { candidatePoolSize: searchStats.candidatePoolSize } : {}),
1162
+ results: rankedResults,
1163
+ };
958
1164
  const allocations = allocateBudget(tokenBudget, rankedResults.length, subMode, budgetContext);
959
1165
 
960
1166
  // Compute confidence from ranked results (Fix #4: regex selectivity included)
@@ -1010,14 +1216,21 @@ export function packageForAgent(rankedResults, searchStats, opts) {
1010
1216
  ablations,
1011
1217
  });
1012
1218
 
1013
- // Phase 1: Load code via readFileRange
1014
- let code = readFileRange(
1015
- fileCache,
1016
- filePath,
1017
- expansion.startLine,
1018
- expansion.endLine,
1019
- projectRoot
1020
- );
1219
+ // Phase 1: Load code via readFileRange.
1220
+ // For sandwich expansions, assemble from parts with explicit elision markers
1221
+ // so the gold chunk is preserved even when the enclosing entity is huge.
1222
+ let code;
1223
+ if (expansion.kind === 'sandwich' && expansion.sandwich) {
1224
+ code = assembleSandwichCode(fileCache, filePath, expansion.sandwich, projectRoot);
1225
+ } else {
1226
+ code = readFileRange(
1227
+ fileCache,
1228
+ filePath,
1229
+ expansion.startLine,
1230
+ expansion.endLine,
1231
+ projectRoot
1232
+ );
1233
+ }
1021
1234
 
1022
1235
  if (!code) {
1023
1236
  // Fallback: try with ±20 lines padding (plan §13, step 3)
@@ -1061,6 +1274,21 @@ export function packageForAgent(rankedResults, searchStats, opts) {
1061
1274
  if (resultTokenCap <= 0) {
1062
1275
  code = '';
1063
1276
  codeTokens = 0;
1277
+ } else if (expansion.kind === 'sandwich') {
1278
+ // Sandwich is pre-sized via 10-tokens/line estimate. If actual content
1279
+ // happens to overshoot (very long lines), do NOT call truncateToTokenCap
1280
+ // here — that truncates from the start and would drop the gold tail.
1281
+ // Instead, fall back to gold-only chunk + truncate (agent keeps the
1282
+ // evidence; loses signature, but not the match itself).
1283
+ codeTokens = estimateTokens(code);
1284
+ if (codeTokens > resultTokenCap) {
1285
+ const goldStart = meta.startLine || result.startLine;
1286
+ const goldEnd = meta.endLine || result.endLine;
1287
+ const goldOnly = readFileRange(fileCache, filePath, goldStart, goldEnd, projectRoot) || '';
1288
+ const trunc = truncateToTokenCap(goldOnly, resultTokenCap);
1289
+ code = trunc.code;
1290
+ codeTokens = estimateTokens(code);
1291
+ }
1064
1292
  } else if (allocation.presentation === 'full') {
1065
1293
  const truncResult = truncateToTokenCap(code, resultTokenCap);
1066
1294
  code = truncResult.code;
@@ -1104,6 +1332,17 @@ export function packageForAgent(rankedResults, searchStats, opts) {
1104
1332
  score: result.score || result.lateInteractionScore || 0,
1105
1333
  expanded: expansion.expanded,
1106
1334
  expandedFrom: expansion.expandedFrom,
1335
+ expansionKind: expansion.kind || null,
1336
+ ...(expansion.kind === 'sandwich' && expansion.sandwich
1337
+ ? {
1338
+ sandwich: {
1339
+ partKinds: expansion.sandwich.parts.map(p => p.kind),
1340
+ elidedHead: expansion.sandwich.elidedHead,
1341
+ elidedTail: expansion.sandwich.elidedTail,
1342
+ elisionMarkers: expansion.sandwich.elisionMarkers,
1343
+ },
1344
+ }
1345
+ : {}),
1107
1346
  presentation: allocation.presentation,
1108
1347
  stale,
1109
1348
  indexedAt,
@@ -1148,7 +1387,7 @@ export function packageForAgent(rankedResults, searchStats, opts) {
1148
1387
  return {
1149
1388
  query,
1150
1389
  regex,
1151
- mode: 'pattern',
1390
+ mode: modeOpt || searchStats?.path || 'pattern',
1152
1391
  totalResults: rankedResults.length,
1153
1392
  latencyMs: searchStats?.total_ms || 0,
1154
1393
  packagingMs,
@@ -14,3 +14,7 @@ export * from './search-pattern.js';
14
14
  export * from './search-semantic.js';
15
15
  export { formatResults, formatGrepResults, formatStructuralResults, enrichWithSummaries, formatSummaryFirst, formatMiddleRes } from './search-format.js';
16
16
  export { hybridSearch, hybridSearchV2 } from './search-hybrid.js';
17
+
18
+ // Read tools — filesystem-grounded exact reads + hybrid semantic span selection
19
+ export { readFile, readFiles, formatReadResults, handleReadCli } from './search-read.js';
20
+ export { readSemantic, formatReadSemanticResult, handleReadSemanticCli } from './search-read-semantic.js';
@@ -328,7 +328,7 @@ Examples:
328
328
  let summaryFirst = false;
329
329
  let middleRes = false;
330
330
  let forceCold = false;
331
- let agentFormat = null; // null | 'agent_preview' | 'agent_full'
331
+ let agentFormat = null; // null | 'agent_preview' | 'agent_full' | 'agent_full_xl'
332
332
  let agentBudget = null;
333
333
 
334
334
  for (let i = isGrepCommand ? 1 : 0; i < args.length; i++) {
@@ -385,6 +385,8 @@ Examples:
385
385
  agentFormat = agentFormat || 'agent_preview';
386
386
  } else if (arg === '--agent-full') {
387
387
  agentFormat = 'agent_full';
388
+ } else if (arg === '--agent-full-xl') {
389
+ agentFormat = 'agent_full_xl';
388
390
  } else if (arg === '--budget' && args[i + 1]) {
389
391
  agentBudget = parseInt(args[++i], 10);
390
392
  agentFormat = agentFormat || 'agent_preview';
@@ -260,9 +260,9 @@ export async function patternSearch(query, routing, options = {}) {
260
260
  };
261
261
 
262
262
  // Agent mode: return proper agent schema even for zero results
263
- if (format === 'agent' || format === 'agent_preview' || format === 'agent_full') {
263
+ if (format === 'agent' || format === 'agent_preview' || format === 'agent_full' || format === 'agent_full_xl') {
264
264
  const agentResponse = packageForAgent([], emptyStats, {
265
- query, regex, format, tokenBudget, ablations, projectRoot: this.projectRoot || PROJECT_ROOT,
265
+ query, regex, mode: 'pattern', format, tokenBudget, ablations, projectRoot: this.projectRoot || PROJECT_ROOT,
266
266
  });
267
267
  agentResponse.stats = emptyStats;
268
268
  return agentResponse;
@@ -432,11 +432,12 @@ export async function patternSearch(query, routing, options = {}) {
432
432
 
433
433
  // Agent mode: post-ranking context packaging (Phases 1-5)
434
434
  // Ranking is frozen — agent mode only transforms presentation.
435
- if (format === 'agent' || format === 'agent_preview' || format === 'agent_full') {
435
+ if (format === 'agent' || format === 'agent_preview' || format === 'agent_full' || format === 'agent_full_xl') {
436
436
  const searchDir = this.projectRoot || PROJECT_ROOT;
437
437
  const agentResponse = packageForAgent(results, stats, {
438
438
  query,
439
439
  regex,
440
+ mode: 'pattern',
440
441
  format,
441
442
  tokenBudget,
442
443
  codeGraphRepo: this.codeGraphRepo || null,