sweet-search 2.4.2 → 2.5.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/core/cli.js +43 -5
- package/core/embedding/embedding-cache.js +266 -18
- package/core/embedding/embedding-service.js +45 -9
- package/core/graph/graph-expansion.js +52 -12
- package/core/graph/graph-extractor.js +30 -1
- package/core/indexing/ast-chunker.js +331 -16
- package/core/indexing/chunking/chunk-builder.js +34 -1
- package/core/indexing/index-codebase-v21.js +31 -2
- package/core/indexing/index.js +6 -3
- package/core/indexing/indexer-ann.js +45 -6
- package/core/indexing/indexer-build.js +9 -1
- package/core/indexing/indexer-phases.js +6 -4
- package/core/indexing/indexing-file-policy.js +140 -0
- package/core/indexing/li-skip-policy.js +11 -220
- package/core/infrastructure/codebase-repository.js +21 -0
- package/core/infrastructure/config/embedding.js +20 -1
- package/core/infrastructure/config/graph.js +2 -2
- package/core/infrastructure/config/ranking.js +10 -0
- package/core/infrastructure/config/vector-store.js +1 -1
- package/core/infrastructure/coreml-cascade.js +236 -30
- package/core/infrastructure/coreml-cascade.json +25 -0
- package/core/infrastructure/index.js +17 -0
- package/core/infrastructure/init-config.js +216 -0
- package/core/infrastructure/language-patterns/registry-core.js +18 -0
- package/core/infrastructure/model-registry.js +12 -0
- package/core/infrastructure/native-inference.js +143 -51
- package/core/infrastructure/tree-sitter-provider.js +92 -2
- package/core/ranking/cascaded-scorer.js +6 -2
- package/core/ranking/file-kind-ranking.js +264 -0
- package/core/ranking/late-interaction-index.js +10 -4
- package/core/ranking/late-interaction-policy.js +304 -0
- package/core/search/context-expander.js +267 -28
- package/core/search/index.js +4 -0
- package/core/search/search-cli.js +3 -1
- package/core/search/search-pattern.js +4 -3
- package/core/search/search-postprocess.js +189 -8
- package/core/search/search-read-semantic.js +734 -0
- package/core/search/search-read.js +481 -0
- package/core/search/search-server.js +153 -5
- package/core/search/sweet-search.js +133 -16
- package/core/start-server.js +13 -2
- package/mcp/server.js +41 -0
- package/mcp/tool-handlers.js +117 -6
- package/package.json +9 -7
- package/scripts/init.js +386 -5
- package/scripts/uninstall.js +152 -6
|
@@ -38,6 +38,10 @@ export function estimateTokens(text) {
|
|
|
38
38
|
|
|
39
39
|
const DEFAULT_TOKEN_BUDGET = 4000;
|
|
40
40
|
const AGENT_FULL_TOKEN_BUDGET = 8000;
|
|
41
|
+
// Stretch budget — opt-in only via subMode 'agent_full_xl'. Gated on top-1
|
|
42
|
+
// dominance (>=2× top-2). Default remains compact 4k; this is for the
|
|
43
|
+
// "explicit, single dominant answer fits" case only.
|
|
44
|
+
const AGENT_FULL_XL_TOKEN_BUDGET = 12000;
|
|
41
45
|
const DEFAULT_PER_RESULT_CAPS = [2000, 800, 400]; // rank 1, 2, 3+
|
|
42
46
|
const MAX_HEADER_TOKENS = 200;
|
|
43
47
|
|
|
@@ -106,11 +110,15 @@ export function findEnclosingEntity(codeGraphRepo, filePath, startLine, endLine)
|
|
|
106
110
|
/**
|
|
107
111
|
* Expand a result to symbol-complete boundaries.
|
|
108
112
|
*
|
|
109
|
-
* Decision tree
|
|
110
|
-
* 1. Is chunk already a complete symbol? → return
|
|
111
|
-
* 2. Look up enclosing entity
|
|
113
|
+
* Decision tree:
|
|
114
|
+
* 1. Is chunk already a complete symbol? → return chunk
|
|
115
|
+
* 2. Look up enclosing entity:
|
|
116
|
+
* a. fits in cap → expand to entity boundaries (kind: 'full')
|
|
117
|
+
* b. too large → build symbol sandwich (kind: 'sandwich')
|
|
118
|
+
* c. sandwich infeasible → bare chunk with entity name (kind: 'chunk')
|
|
112
119
|
* 3. Merge contiguous sibling chunks → stop at next symbol boundary
|
|
113
|
-
* 4.
|
|
120
|
+
* 4. Syntax-aware brace/indent expansion (kind: 'syntax')
|
|
121
|
+
* 5. Fall back: chunk as-is (kind: 'chunk')
|
|
114
122
|
*
|
|
115
123
|
* @param {object} result - Ranked result with file, startLine, endLine, metadata
|
|
116
124
|
* @param {object} opts
|
|
@@ -119,7 +127,16 @@ export function findEnclosingEntity(codeGraphRepo, filePath, startLine, endLine)
|
|
|
119
127
|
* @param {Map} opts.fileCache - Shared file cache for readFileRange
|
|
120
128
|
* @param {string} opts.projectRoot
|
|
121
129
|
* @param {number} opts.tokenCap - Max tokens for this result
|
|
122
|
-
* @returns {{
|
|
130
|
+
* @returns {{
|
|
131
|
+
* startLine: number,
|
|
132
|
+
* endLine: number,
|
|
133
|
+
* expanded: boolean,
|
|
134
|
+
* expandedFrom: string|null,
|
|
135
|
+
* symbol: string|null,
|
|
136
|
+
* symbolType: string|null,
|
|
137
|
+
* kind: 'full'|'sandwich'|'syntax'|'chunk',
|
|
138
|
+
* sandwich?: { parts: Array<{kind:'signature'|'gold'|'closing', startLine:number, endLine:number}>, elidedHead:number, elidedTail:number, elisionMarkers:number }
|
|
139
|
+
* }}
|
|
123
140
|
*/
|
|
124
141
|
export function expandToSymbol(result, opts) {
|
|
125
142
|
const { codeGraphRepo, locationMap, tokenCap } = opts;
|
|
@@ -139,6 +156,7 @@ export function expandToSymbol(result, opts) {
|
|
|
139
156
|
expandedFrom: null,
|
|
140
157
|
symbol: meta.name,
|
|
141
158
|
symbolType: meta.type || null,
|
|
159
|
+
kind: 'chunk',
|
|
142
160
|
};
|
|
143
161
|
}
|
|
144
162
|
|
|
@@ -158,9 +176,29 @@ export function expandToSymbol(result, opts) {
|
|
|
158
176
|
expandedFrom: origRange,
|
|
159
177
|
symbol: entity.name,
|
|
160
178
|
symbolType: entity.type,
|
|
179
|
+
kind: 'full',
|
|
161
180
|
};
|
|
162
181
|
}
|
|
163
|
-
// Entity too large
|
|
182
|
+
// Entity too large for full expansion. Try a "symbol sandwich":
|
|
183
|
+
// signature + elision marker + gold chunk + elision marker + closing brace.
|
|
184
|
+
// Goal: preserve gold evidence + ground the agent in the enclosing symbol
|
|
185
|
+
// without dumping the whole function (which causes context rot).
|
|
186
|
+
if (!opts.ablations?.has('no-sandwich')) {
|
|
187
|
+
const sandwich = buildSandwichExpansion(entity, origStart, origEnd, tokenCap);
|
|
188
|
+
if (sandwich) {
|
|
189
|
+
return {
|
|
190
|
+
startLine: entity.startLine,
|
|
191
|
+
endLine: entity.endLine,
|
|
192
|
+
expanded: true,
|
|
193
|
+
expandedFrom: origRange,
|
|
194
|
+
symbol: entity.name,
|
|
195
|
+
symbolType: entity.type,
|
|
196
|
+
kind: 'sandwich',
|
|
197
|
+
sandwich,
|
|
198
|
+
};
|
|
199
|
+
}
|
|
200
|
+
}
|
|
201
|
+
// Sandwich infeasible (cap too tight). Fall back to bare chunk + entity name.
|
|
164
202
|
return {
|
|
165
203
|
startLine: origStart,
|
|
166
204
|
endLine: origEnd,
|
|
@@ -168,6 +206,7 @@ export function expandToSymbol(result, opts) {
|
|
|
168
206
|
expandedFrom: null,
|
|
169
207
|
symbol: entity.name,
|
|
170
208
|
symbolType: entity.type,
|
|
209
|
+
kind: 'chunk',
|
|
171
210
|
};
|
|
172
211
|
}
|
|
173
212
|
|
|
@@ -183,6 +222,7 @@ export function expandToSymbol(result, opts) {
|
|
|
183
222
|
expandedFrom: origRange,
|
|
184
223
|
symbol: meta.name || null,
|
|
185
224
|
symbolType: meta.type || null,
|
|
225
|
+
kind: 'syntax',
|
|
186
226
|
};
|
|
187
227
|
}
|
|
188
228
|
}
|
|
@@ -195,6 +235,7 @@ export function expandToSymbol(result, opts) {
|
|
|
195
235
|
startLine: origStart, endLine: origEnd,
|
|
196
236
|
expanded: false, expandedFrom: null,
|
|
197
237
|
symbol: meta.name || null, symbolType: meta.type || null,
|
|
238
|
+
kind: 'chunk',
|
|
198
239
|
};
|
|
199
240
|
}
|
|
200
241
|
const { fileCache, projectRoot } = opts;
|
|
@@ -209,6 +250,7 @@ export function expandToSymbol(result, opts) {
|
|
|
209
250
|
expandedFrom: origRange,
|
|
210
251
|
symbol: meta.name || null,
|
|
211
252
|
symbolType: meta.type || null,
|
|
253
|
+
kind: 'syntax',
|
|
212
254
|
};
|
|
213
255
|
}
|
|
214
256
|
|
|
@@ -220,9 +262,139 @@ export function expandToSymbol(result, opts) {
|
|
|
220
262
|
expandedFrom: null,
|
|
221
263
|
symbol: meta.name || null,
|
|
222
264
|
symbolType: meta.type || null,
|
|
265
|
+
kind: 'chunk',
|
|
223
266
|
};
|
|
224
267
|
}
|
|
225
268
|
|
|
269
|
+
/**
|
|
270
|
+
* Build a "symbol sandwich" expansion when the enclosing entity is too large
|
|
271
|
+
* to fit in the token cap as a whole.
|
|
272
|
+
*
|
|
273
|
+
* The sandwich preserves:
|
|
274
|
+
* - the gold/matched chunk verbatim (the actual evidence — never dropped)
|
|
275
|
+
* - the function/class signature (small, high-leverage anchor)
|
|
276
|
+
* - the closing brace line (cheap, helps the agent know the symbol bounds)
|
|
277
|
+
* separated by explicit `// ... (N lines elided) ...` markers.
|
|
278
|
+
*
|
|
279
|
+
* Sizing uses a conservative ~10-tokens-per-line estimate (matches the rest
|
|
280
|
+
* of the file). If even bare gold doesn't fit, returns null so the caller
|
|
281
|
+
* falls back to the bare-chunk path. If the signature+gold+closing doesn't
|
|
282
|
+
* fit, drops closing first, then signature.
|
|
283
|
+
*
|
|
284
|
+
* @param {{ name:string, type:string, startLine:number, endLine:number }} entity
|
|
285
|
+
* @param {number} origStart - gold chunk start line
|
|
286
|
+
* @param {number} origEnd - gold chunk end line
|
|
287
|
+
* @param {number} tokenCap - hard cap for the assembled sandwich
|
|
288
|
+
* @returns {{ parts: Array, elidedHead:number, elidedTail:number, elisionMarkers:number }|null}
|
|
289
|
+
*/
|
|
290
|
+
function buildSandwichExpansion(entity, origStart, origEnd, tokenCap) {
|
|
291
|
+
const SIG_MAX_LINES = 4; // signature window
|
|
292
|
+
const ELISION_TOKENS = 10; // approx cost of one `// ... (N lines elided) ...` line
|
|
293
|
+
const TOKENS_PER_LINE = 10; // pessimistic estimate, matches `entityTokens` heuristic above
|
|
294
|
+
|
|
295
|
+
// Signature: from entity.startLine up to min(SIG_MAX_LINES, just before gold)
|
|
296
|
+
const sigStart = entity.startLine;
|
|
297
|
+
const sigEndCandidate = Math.min(entity.startLine + SIG_MAX_LINES - 1, origStart - 1);
|
|
298
|
+
const hasSignatureCandidate = sigEndCandidate >= sigStart && origStart > entity.startLine;
|
|
299
|
+
const sigEnd = hasSignatureCandidate ? sigEndCandidate : null;
|
|
300
|
+
const sigLines = sigEnd != null ? (sigEnd - sigStart + 1) : 0;
|
|
301
|
+
|
|
302
|
+
// Gold: original chunk
|
|
303
|
+
const goldLines = origEnd - origStart + 1;
|
|
304
|
+
|
|
305
|
+
// Closing: just the last line of the entity, only if it's strictly after gold
|
|
306
|
+
const closeLineCandidate = entity.endLine > origEnd ? entity.endLine : null;
|
|
307
|
+
const closingLines = closeLineCandidate != null ? 1 : 0;
|
|
308
|
+
|
|
309
|
+
// Elisions (gaps between parts). Only emit a marker if there's actually a gap.
|
|
310
|
+
const headElidedAll = sigEnd != null && origStart > sigEnd + 1 ? origStart - sigEnd - 1 : 0;
|
|
311
|
+
const tailElidedAll = closeLineCandidate != null && closeLineCandidate > origEnd + 1
|
|
312
|
+
? closeLineCandidate - origEnd - 1
|
|
313
|
+
: 0;
|
|
314
|
+
|
|
315
|
+
// Token estimates
|
|
316
|
+
const goldTokens = goldLines * TOKENS_PER_LINE;
|
|
317
|
+
if (goldTokens > tokenCap) {
|
|
318
|
+
// Even gold alone doesn't fit. Caller will fall back to bare-chunk + truncate.
|
|
319
|
+
return null;
|
|
320
|
+
}
|
|
321
|
+
|
|
322
|
+
const sigTokens = sigLines * TOKENS_PER_LINE;
|
|
323
|
+
const closingTokens = closingLines * TOKENS_PER_LINE;
|
|
324
|
+
|
|
325
|
+
// Decide which optional parts to include, in priority order:
|
|
326
|
+
// 1. Always include gold.
|
|
327
|
+
// 2. Include signature if it fits (signature is the biggest grounding win).
|
|
328
|
+
// 3. Include closing if it fits (cheap).
|
|
329
|
+
let includeSignature = sigEnd != null;
|
|
330
|
+
let includeClosing = closeLineCandidate != null;
|
|
331
|
+
|
|
332
|
+
function totalTokens() {
|
|
333
|
+
let t = goldTokens;
|
|
334
|
+
let elisions = 0;
|
|
335
|
+
if (includeSignature) {
|
|
336
|
+
t += sigTokens;
|
|
337
|
+
if (headElidedAll > 0) elisions++;
|
|
338
|
+
}
|
|
339
|
+
if (includeClosing) {
|
|
340
|
+
t += closingTokens;
|
|
341
|
+
if (tailElidedAll > 0) elisions++;
|
|
342
|
+
}
|
|
343
|
+
return t + elisions * ELISION_TOKENS;
|
|
344
|
+
}
|
|
345
|
+
|
|
346
|
+
if (totalTokens() > tokenCap && includeClosing) {
|
|
347
|
+
includeClosing = false;
|
|
348
|
+
}
|
|
349
|
+
if (totalTokens() > tokenCap && includeSignature) {
|
|
350
|
+
includeSignature = false;
|
|
351
|
+
}
|
|
352
|
+
|
|
353
|
+
// If neither signature nor closing fits, sandwich gives no value over bare chunk.
|
|
354
|
+
if (!includeSignature && !includeClosing) {
|
|
355
|
+
return null;
|
|
356
|
+
}
|
|
357
|
+
|
|
358
|
+
const parts = [];
|
|
359
|
+
if (includeSignature) {
|
|
360
|
+
parts.push({ kind: 'signature', startLine: sigStart, endLine: sigEnd });
|
|
361
|
+
}
|
|
362
|
+
parts.push({ kind: 'gold', startLine: origStart, endLine: origEnd });
|
|
363
|
+
if (includeClosing) {
|
|
364
|
+
parts.push({ kind: 'closing', startLine: closeLineCandidate, endLine: closeLineCandidate });
|
|
365
|
+
}
|
|
366
|
+
|
|
367
|
+
const elidedHead = includeSignature && headElidedAll > 0 ? headElidedAll : 0;
|
|
368
|
+
const elidedTail = includeClosing && tailElidedAll > 0 ? tailElidedAll : 0;
|
|
369
|
+
const elisionMarkers = (elidedHead > 0 ? 1 : 0) + (elidedTail > 0 ? 1 : 0);
|
|
370
|
+
|
|
371
|
+
return { parts, elidedHead, elidedTail, elisionMarkers };
|
|
372
|
+
}
|
|
373
|
+
|
|
374
|
+
/**
|
|
375
|
+
* Render a sandwich expansion into a single code string with elision markers.
|
|
376
|
+
* Reads each part from the file cache and joins them with explicit
|
|
377
|
+
* `// ... (N lines elided) ...` markers between non-contiguous parts.
|
|
378
|
+
*
|
|
379
|
+
* Returns '' if no part can be read (caller falls back to chunk path).
|
|
380
|
+
*/
|
|
381
|
+
function assembleSandwichCode(fileCache, filePath, sandwich, projectRoot) {
|
|
382
|
+
if (!sandwich || !sandwich.parts || sandwich.parts.length === 0) return '';
|
|
383
|
+
const out = [];
|
|
384
|
+
let prevEnd = null;
|
|
385
|
+
for (const part of sandwich.parts) {
|
|
386
|
+
const text = readFileRange(fileCache, filePath, part.startLine, part.endLine, projectRoot);
|
|
387
|
+
if (!text) continue;
|
|
388
|
+
if (prevEnd != null) {
|
|
389
|
+
const gap = part.startLine - prevEnd - 1;
|
|
390
|
+
if (gap > 0) out.push(`// ... (${gap} lines elided) ...`);
|
|
391
|
+
}
|
|
392
|
+
out.push(text);
|
|
393
|
+
prevEnd = part.endLine;
|
|
394
|
+
}
|
|
395
|
+
return out.join('\n');
|
|
396
|
+
}
|
|
397
|
+
|
|
226
398
|
/**
|
|
227
399
|
* Merge contiguous sibling chunks around the target range.
|
|
228
400
|
* Stops at the next non-contiguous gap or when token cap would be exceeded.
|
|
@@ -708,28 +880,37 @@ export function computeSufficiency(topResult, confidenceInfo) {
|
|
|
708
880
|
*
|
|
709
881
|
* Base split: 60/20/20 (preview) or 40/30/30 (full).
|
|
710
882
|
* Adaptations:
|
|
711
|
-
* -
|
|
883
|
+
* - High retrieval breadth (broad regex / large candidate pool): sharpen top-1 (70/15/15)
|
|
712
884
|
* - In agent_full: only expand rank 2/3 to full if score gap < 2× from top-1
|
|
713
885
|
* - Unused top-1 cap is redistributed to top-2/3 when they are distinct
|
|
714
886
|
*
|
|
887
|
+
* Breadth signal generalization (for non-grep retrieval modes):
|
|
888
|
+
* - colgrep / pattern: uses `grepMatches` (existing behavior)
|
|
889
|
+
* - lexical / semantic / hybrid: uses `candidatePoolSize` if provided
|
|
890
|
+
* - falls back to 0 (no sharpening) if neither is set
|
|
891
|
+
*
|
|
715
892
|
* @param {number} totalBudget - Total token budget for all results
|
|
716
893
|
* @param {number} numResults - Number of results
|
|
717
|
-
* @param {string} subMode - 'agent_preview' | 'agent_full'
|
|
718
|
-
* @param {object} [context]
|
|
719
|
-
* @param {number} [context.grepMatches] - Number of grep matches (
|
|
894
|
+
* @param {string} subMode - 'agent_preview' | 'agent_full' | 'agent_full_xl'
|
|
895
|
+
* @param {object} [context]
|
|
896
|
+
* @param {number} [context.grepMatches] - Number of grep matches (colgrep)
|
|
897
|
+
* @param {number} [context.candidatePoolSize] - Generic candidate pool (lexical/semantic/hybrid)
|
|
720
898
|
* @param {Array<{score: number, file: string}>} [context.results] - Ranked results for score-gap gating
|
|
721
899
|
* @returns {Array<{ presentation: 'full'|'preview'|'summary', tokenCap: number }>}
|
|
722
900
|
*/
|
|
723
901
|
export function allocateBudget(totalBudget, numResults, subMode = 'agent_preview', context = {}) {
|
|
724
902
|
const allocations = [];
|
|
725
|
-
const isFullMode = subMode === 'agent_full';
|
|
726
|
-
const
|
|
903
|
+
const isFullMode = subMode === 'agent_full' || subMode === 'agent_full_xl';
|
|
904
|
+
const isXlMode = subMode === 'agent_full_xl';
|
|
905
|
+
// Generalized breadth signal: prefer `grepMatches` for backwards compatibility,
|
|
906
|
+
// fall back to `candidatePoolSize` for non-grep retrieval modes (lexical/semantic/hybrid).
|
|
907
|
+
const breadthHint = context.grepMatches ?? context.candidatePoolSize ?? 0;
|
|
727
908
|
const results = context.results || [];
|
|
728
909
|
|
|
729
|
-
// Adaptive split based on
|
|
910
|
+
// Adaptive split based on retrieval breadth
|
|
730
911
|
let top1Share, top23Share;
|
|
731
|
-
if (
|
|
732
|
-
// Broad
|
|
912
|
+
if (breadthHint > 200) {
|
|
913
|
+
// Broad retrieval: sharpen top-1, reduce previews
|
|
733
914
|
top1Share = 0.70;
|
|
734
915
|
top23Share = 0.15;
|
|
735
916
|
} else if (isFullMode) {
|
|
@@ -740,9 +921,22 @@ export function allocateBudget(totalBudget, numResults, subMode = 'agent_preview
|
|
|
740
921
|
top23Share = 0.20;
|
|
741
922
|
}
|
|
742
923
|
|
|
924
|
+
// Stretch budget (agent_full_xl): allow per-result caps up to 8000 for top-1
|
|
925
|
+
// when the gate fires (top1 >= 2 * top2). This is opt-in via subMode only.
|
|
926
|
+
const xlPerResultCap = 8000;
|
|
927
|
+
const baselinePerResultCap = DEFAULT_PER_RESULT_CAPS[0]; // 2000
|
|
928
|
+
let xlGateActive = false;
|
|
929
|
+
if (isXlMode && results.length > 0) {
|
|
930
|
+
const top1Score = results[0]?.score || 0;
|
|
931
|
+
const top2Score = results[1]?.score || 0;
|
|
932
|
+
// Gate fires when top-1 dominates: 2× top-2 OR there is no top-2.
|
|
933
|
+
xlGateActive = top1Score > 0 && (top2Score === 0 || top1Score >= 2 * top2Score);
|
|
934
|
+
}
|
|
935
|
+
const top1HardCap = xlGateActive ? xlPerResultCap : baselinePerResultCap;
|
|
936
|
+
|
|
743
937
|
for (let i = 0; i < numResults; i++) {
|
|
744
938
|
if (i === 0) {
|
|
745
|
-
const cap = Math.min(Math.floor(totalBudget * top1Share),
|
|
939
|
+
const cap = Math.min(Math.floor(totalBudget * top1Share), top1HardCap);
|
|
746
940
|
allocations.push({ presentation: 'full', tokenCap: cap });
|
|
747
941
|
} else if (i <= 2) {
|
|
748
942
|
// In agent_full: gate full expansion on score gap from top-1.
|
|
@@ -875,9 +1069,14 @@ function compressToPreview(code, tokenCap) {
|
|
|
875
1069
|
|
|
876
1070
|
/**
|
|
877
1071
|
* Resolve the effective sub-mode from the format string.
|
|
878
|
-
*
|
|
1072
|
+
* 'agent' / 'agent_preview' → 'agent_preview' (compact 4k budget)
|
|
1073
|
+
* 'agent_full' → 'agent_full' (8k budget)
|
|
1074
|
+
* 'agent_full_xl' → 'agent_full_xl' (12k budget, opt-in only;
|
|
1075
|
+
* falls back to agent_full at allocation time
|
|
1076
|
+
* when the dominance gate fails)
|
|
879
1077
|
*/
|
|
880
1078
|
function resolveSubMode(format) {
|
|
1079
|
+
if (format === 'agent_full_xl') return 'agent_full_xl';
|
|
881
1080
|
if (format === 'agent_full') return 'agent_full';
|
|
882
1081
|
return 'agent_preview'; // 'agent' and 'agent_preview' both map here
|
|
883
1082
|
}
|
|
@@ -912,6 +1111,7 @@ export function packageForAgent(rankedResults, searchStats, opts) {
|
|
|
912
1111
|
const {
|
|
913
1112
|
query,
|
|
914
1113
|
regex,
|
|
1114
|
+
mode: modeOpt = null,
|
|
915
1115
|
format: formatOpt = 'agent',
|
|
916
1116
|
codeGraphRepo = null,
|
|
917
1117
|
locationMap = null,
|
|
@@ -920,7 +1120,9 @@ export function packageForAgent(rankedResults, searchStats, opts) {
|
|
|
920
1120
|
const ablations = opts.ablations || new Set();
|
|
921
1121
|
|
|
922
1122
|
const subMode = resolveSubMode(formatOpt);
|
|
923
|
-
const defaultBudget = subMode === '
|
|
1123
|
+
const defaultBudget = subMode === 'agent_full_xl' ? AGENT_FULL_XL_TOKEN_BUDGET
|
|
1124
|
+
: subMode === 'agent_full' ? AGENT_FULL_TOKEN_BUDGET
|
|
1125
|
+
: DEFAULT_TOKEN_BUDGET;
|
|
924
1126
|
const tokenBudget = opts.tokenBudget ?? defaultBudget;
|
|
925
1127
|
|
|
926
1128
|
const start = performance.now();
|
|
@@ -954,7 +1156,11 @@ export function packageForAgent(rankedResults, searchStats, opts) {
|
|
|
954
1156
|
// When 'no-adaptive-budget' ablation is active, use fixed splits (no context param)
|
|
955
1157
|
const budgetContext = ablations.has('no-adaptive-budget')
|
|
956
1158
|
? {}
|
|
957
|
-
: {
|
|
1159
|
+
: {
|
|
1160
|
+
...(searchStats?.grepMatches != null ? { grepMatches: searchStats.grepMatches } : {}),
|
|
1161
|
+
...(searchStats?.candidatePoolSize != null ? { candidatePoolSize: searchStats.candidatePoolSize } : {}),
|
|
1162
|
+
results: rankedResults,
|
|
1163
|
+
};
|
|
958
1164
|
const allocations = allocateBudget(tokenBudget, rankedResults.length, subMode, budgetContext);
|
|
959
1165
|
|
|
960
1166
|
// Compute confidence from ranked results (Fix #4: regex selectivity included)
|
|
@@ -1010,14 +1216,21 @@ export function packageForAgent(rankedResults, searchStats, opts) {
|
|
|
1010
1216
|
ablations,
|
|
1011
1217
|
});
|
|
1012
1218
|
|
|
1013
|
-
// Phase 1: Load code via readFileRange
|
|
1014
|
-
|
|
1015
|
-
|
|
1016
|
-
|
|
1017
|
-
|
|
1018
|
-
expansion.
|
|
1019
|
-
|
|
1020
|
-
|
|
1219
|
+
// Phase 1: Load code via readFileRange.
|
|
1220
|
+
// For sandwich expansions, assemble from parts with explicit elision markers
|
|
1221
|
+
// so the gold chunk is preserved even when the enclosing entity is huge.
|
|
1222
|
+
let code;
|
|
1223
|
+
if (expansion.kind === 'sandwich' && expansion.sandwich) {
|
|
1224
|
+
code = assembleSandwichCode(fileCache, filePath, expansion.sandwich, projectRoot);
|
|
1225
|
+
} else {
|
|
1226
|
+
code = readFileRange(
|
|
1227
|
+
fileCache,
|
|
1228
|
+
filePath,
|
|
1229
|
+
expansion.startLine,
|
|
1230
|
+
expansion.endLine,
|
|
1231
|
+
projectRoot
|
|
1232
|
+
);
|
|
1233
|
+
}
|
|
1021
1234
|
|
|
1022
1235
|
if (!code) {
|
|
1023
1236
|
// Fallback: try with ±20 lines padding (plan §13, step 3)
|
|
@@ -1061,6 +1274,21 @@ export function packageForAgent(rankedResults, searchStats, opts) {
|
|
|
1061
1274
|
if (resultTokenCap <= 0) {
|
|
1062
1275
|
code = '';
|
|
1063
1276
|
codeTokens = 0;
|
|
1277
|
+
} else if (expansion.kind === 'sandwich') {
|
|
1278
|
+
// Sandwich is pre-sized via 10-tokens/line estimate. If actual content
|
|
1279
|
+
// happens to overshoot (very long lines), do NOT call truncateToTokenCap
|
|
1280
|
+
// here — that truncates from the start and would drop the gold tail.
|
|
1281
|
+
// Instead, fall back to gold-only chunk + truncate (agent keeps the
|
|
1282
|
+
// evidence; loses signature, but not the match itself).
|
|
1283
|
+
codeTokens = estimateTokens(code);
|
|
1284
|
+
if (codeTokens > resultTokenCap) {
|
|
1285
|
+
const goldStart = meta.startLine || result.startLine;
|
|
1286
|
+
const goldEnd = meta.endLine || result.endLine;
|
|
1287
|
+
const goldOnly = readFileRange(fileCache, filePath, goldStart, goldEnd, projectRoot) || '';
|
|
1288
|
+
const trunc = truncateToTokenCap(goldOnly, resultTokenCap);
|
|
1289
|
+
code = trunc.code;
|
|
1290
|
+
codeTokens = estimateTokens(code);
|
|
1291
|
+
}
|
|
1064
1292
|
} else if (allocation.presentation === 'full') {
|
|
1065
1293
|
const truncResult = truncateToTokenCap(code, resultTokenCap);
|
|
1066
1294
|
code = truncResult.code;
|
|
@@ -1104,6 +1332,17 @@ export function packageForAgent(rankedResults, searchStats, opts) {
|
|
|
1104
1332
|
score: result.score || result.lateInteractionScore || 0,
|
|
1105
1333
|
expanded: expansion.expanded,
|
|
1106
1334
|
expandedFrom: expansion.expandedFrom,
|
|
1335
|
+
expansionKind: expansion.kind || null,
|
|
1336
|
+
...(expansion.kind === 'sandwich' && expansion.sandwich
|
|
1337
|
+
? {
|
|
1338
|
+
sandwich: {
|
|
1339
|
+
partKinds: expansion.sandwich.parts.map(p => p.kind),
|
|
1340
|
+
elidedHead: expansion.sandwich.elidedHead,
|
|
1341
|
+
elidedTail: expansion.sandwich.elidedTail,
|
|
1342
|
+
elisionMarkers: expansion.sandwich.elisionMarkers,
|
|
1343
|
+
},
|
|
1344
|
+
}
|
|
1345
|
+
: {}),
|
|
1107
1346
|
presentation: allocation.presentation,
|
|
1108
1347
|
stale,
|
|
1109
1348
|
indexedAt,
|
|
@@ -1148,7 +1387,7 @@ export function packageForAgent(rankedResults, searchStats, opts) {
|
|
|
1148
1387
|
return {
|
|
1149
1388
|
query,
|
|
1150
1389
|
regex,
|
|
1151
|
-
mode: 'pattern',
|
|
1390
|
+
mode: modeOpt || searchStats?.path || 'pattern',
|
|
1152
1391
|
totalResults: rankedResults.length,
|
|
1153
1392
|
latencyMs: searchStats?.total_ms || 0,
|
|
1154
1393
|
packagingMs,
|
package/core/search/index.js
CHANGED
|
@@ -14,3 +14,7 @@ export * from './search-pattern.js';
|
|
|
14
14
|
export * from './search-semantic.js';
|
|
15
15
|
export { formatResults, formatGrepResults, formatStructuralResults, enrichWithSummaries, formatSummaryFirst, formatMiddleRes } from './search-format.js';
|
|
16
16
|
export { hybridSearch, hybridSearchV2 } from './search-hybrid.js';
|
|
17
|
+
|
|
18
|
+
// Read tools — filesystem-grounded exact reads + hybrid semantic span selection
|
|
19
|
+
export { readFile, readFiles, formatReadResults, handleReadCli } from './search-read.js';
|
|
20
|
+
export { readSemantic, formatReadSemanticResult, handleReadSemanticCli } from './search-read-semantic.js';
|
|
@@ -328,7 +328,7 @@ Examples:
|
|
|
328
328
|
let summaryFirst = false;
|
|
329
329
|
let middleRes = false;
|
|
330
330
|
let forceCold = false;
|
|
331
|
-
let agentFormat = null; // null | 'agent_preview' | 'agent_full'
|
|
331
|
+
let agentFormat = null; // null | 'agent_preview' | 'agent_full' | 'agent_full_xl'
|
|
332
332
|
let agentBudget = null;
|
|
333
333
|
|
|
334
334
|
for (let i = isGrepCommand ? 1 : 0; i < args.length; i++) {
|
|
@@ -385,6 +385,8 @@ Examples:
|
|
|
385
385
|
agentFormat = agentFormat || 'agent_preview';
|
|
386
386
|
} else if (arg === '--agent-full') {
|
|
387
387
|
agentFormat = 'agent_full';
|
|
388
|
+
} else if (arg === '--agent-full-xl') {
|
|
389
|
+
agentFormat = 'agent_full_xl';
|
|
388
390
|
} else if (arg === '--budget' && args[i + 1]) {
|
|
389
391
|
agentBudget = parseInt(args[++i], 10);
|
|
390
392
|
agentFormat = agentFormat || 'agent_preview';
|
|
@@ -260,9 +260,9 @@ export async function patternSearch(query, routing, options = {}) {
|
|
|
260
260
|
};
|
|
261
261
|
|
|
262
262
|
// Agent mode: return proper agent schema even for zero results
|
|
263
|
-
if (format === 'agent' || format === 'agent_preview' || format === 'agent_full') {
|
|
263
|
+
if (format === 'agent' || format === 'agent_preview' || format === 'agent_full' || format === 'agent_full_xl') {
|
|
264
264
|
const agentResponse = packageForAgent([], emptyStats, {
|
|
265
|
-
query, regex, format, tokenBudget, ablations, projectRoot: this.projectRoot || PROJECT_ROOT,
|
|
265
|
+
query, regex, mode: 'pattern', format, tokenBudget, ablations, projectRoot: this.projectRoot || PROJECT_ROOT,
|
|
266
266
|
});
|
|
267
267
|
agentResponse.stats = emptyStats;
|
|
268
268
|
return agentResponse;
|
|
@@ -432,11 +432,12 @@ export async function patternSearch(query, routing, options = {}) {
|
|
|
432
432
|
|
|
433
433
|
// Agent mode: post-ranking context packaging (Phases 1-5)
|
|
434
434
|
// Ranking is frozen — agent mode only transforms presentation.
|
|
435
|
-
if (format === 'agent' || format === 'agent_preview' || format === 'agent_full') {
|
|
435
|
+
if (format === 'agent' || format === 'agent_preview' || format === 'agent_full' || format === 'agent_full_xl') {
|
|
436
436
|
const searchDir = this.projectRoot || PROJECT_ROOT;
|
|
437
437
|
const agentResponse = packageForAgent(results, stats, {
|
|
438
438
|
query,
|
|
439
439
|
regex,
|
|
440
|
+
mode: 'pattern',
|
|
440
441
|
format,
|
|
441
442
|
tokenBudget,
|
|
442
443
|
codeGraphRepo: this.codeGraphRepo || null,
|