@rce-mcp/retrieval-core 0.1.2 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/index.ts CHANGED
@@ -105,6 +105,15 @@ export interface RetrievalRerankConfig {
105
105
  max_chunks_per_path_file_lookup: number;
106
106
  same_directory_penalty: number;
107
107
  same_extension_penalty: number;
108
+ merge_overlapping_chunks_enabled: boolean;
109
+ merge_gap_lines: number;
110
+ merge_max_span_lines: number;
111
+ smart_cutoff_enabled: boolean;
112
+ smart_cutoff_min_k: number;
113
+ smart_cutoff_max_k: number;
114
+ smart_cutoff_min_score: number;
115
+ smart_cutoff_top_ratio: number;
116
+ smart_cutoff_delta_abs: number;
108
117
  }
109
118
 
110
119
  export interface RetrievalScoringConfig {
@@ -133,8 +142,33 @@ export interface RetrievalChunkingConfig {
133
142
  fallback_strategy: "sliding";
134
143
  target_chunk_tokens: number;
135
144
  chunk_overlap_tokens: number;
145
+ budget_tokenizer: "ranking" | "lightweight";
146
+ boundary_strictness: "legacy" | "semantic_js_ts";
136
147
  parse_timeout_ms: number;
137
148
  enabled_languages: string[];
149
+ recursive_semantic_chunking_enabled: boolean;
150
+ semantic_merge_gap_lines: number;
151
+ semantic_merge_max_span_lines: number;
152
+ comment_forward_absorb_enabled: boolean;
153
+ embedding_context_prefix_enabled: boolean;
154
+ }
155
+
156
+ export interface RetrievalContextPackingConfig {
157
+ enabled: boolean;
158
+ max_spans_per_result: number;
159
+ max_gap_lines: number;
160
+ max_snippet_chars: number;
161
+ enhancer_snippet_char_limit: number;
162
+ }
163
+
164
+ export interface RetrievalSnippetIntegrityConfig {
165
+ enabled: boolean;
166
+ target_languages: string[];
167
+ max_contiguous_gap_lines: number;
168
+ marker_template_version: "v1";
169
+ repair_enabled: boolean;
170
+ repair_max_envelope_lines: number;
171
+ repair_max_snippet_chars: number;
138
172
  }
139
173
 
140
174
  export type RetrievalScoringConfigInput = Partial<{
@@ -150,8 +184,25 @@ export type RetrievalChunkingConfigInput = Partial<{
150
184
  fallback_strategy: "sliding";
151
185
  target_chunk_tokens: number;
152
186
  chunk_overlap_tokens: number;
187
+ budget_tokenizer: "ranking" | "lightweight";
188
+ boundary_strictness: "legacy" | "semantic_js_ts";
153
189
  parse_timeout_ms: number;
154
190
  enabled_languages: string[];
191
+ recursive_semantic_chunking_enabled: boolean;
192
+ semantic_merge_gap_lines: number;
193
+ semantic_merge_max_span_lines: number;
194
+ comment_forward_absorb_enabled: boolean;
195
+ embedding_context_prefix_enabled: boolean;
196
+ }>;
197
+ export type RetrievalContextPackingConfigInput = Partial<RetrievalContextPackingConfig>;
198
+ export type RetrievalSnippetIntegrityConfigInput = Partial<{
199
+ enabled: boolean;
200
+ target_languages: string[];
201
+ max_contiguous_gap_lines: number;
202
+ marker_template_version: "v1";
203
+ repair_enabled: boolean;
204
+ repair_max_envelope_lines: number;
205
+ repair_max_snippet_chars: number;
155
206
  }>;
156
207
 
157
208
  export const BASELINE_RETRIEVAL_SCORING_CONFIG: RetrievalScoringConfig = {
@@ -196,7 +247,16 @@ export const BASELINE_RETRIEVAL_SCORING_CONFIG: RetrievalScoringConfig = {
196
247
  max_chunks_per_path_default: 2,
197
248
  max_chunks_per_path_file_lookup: 1,
198
249
  same_directory_penalty: 0,
199
- same_extension_penalty: 0
250
+ same_extension_penalty: 0,
251
+ merge_overlapping_chunks_enabled: true,
252
+ merge_gap_lines: 6,
253
+ merge_max_span_lines: 220,
254
+ smart_cutoff_enabled: false,
255
+ smart_cutoff_min_k: 2,
256
+ smart_cutoff_max_k: 8,
257
+ smart_cutoff_min_score: 0.25,
258
+ smart_cutoff_top_ratio: 0.5,
259
+ smart_cutoff_delta_abs: 0.25
200
260
  }
201
261
  };
202
262
 
@@ -248,7 +308,16 @@ export const CONSERVATIVE_RETRIEVAL_SCORING_CONFIG: RetrievalScoringConfig = {
248
308
  max_chunks_per_path_default: 2,
249
309
  max_chunks_per_path_file_lookup: 1,
250
310
  same_directory_penalty: 0,
251
- same_extension_penalty: 0
311
+ same_extension_penalty: 0,
312
+ merge_overlapping_chunks_enabled: true,
313
+ merge_gap_lines: 6,
314
+ merge_max_span_lines: 220,
315
+ smart_cutoff_enabled: false,
316
+ smart_cutoff_min_k: 2,
317
+ smart_cutoff_max_k: 8,
318
+ smart_cutoff_min_score: 0.25,
319
+ smart_cutoff_top_ratio: 0.5,
320
+ smart_cutoff_delta_abs: 0.25
252
321
  }
253
322
  };
254
323
 
@@ -270,8 +339,33 @@ export const DEFAULT_RETRIEVAL_CHUNKING_CONFIG: RetrievalChunkingConfig = {
270
339
  fallback_strategy: "sliding",
271
340
  target_chunk_tokens: DEFAULT_TARGET_CHUNK_TOKENS,
272
341
  chunk_overlap_tokens: DEFAULT_CHUNK_OVERLAP_TOKENS,
342
+ budget_tokenizer: "ranking",
343
+ boundary_strictness: "legacy",
273
344
  parse_timeout_ms: 80,
274
- enabled_languages: ["typescript", "javascript", "python", "go"]
345
+ enabled_languages: ["typescript", "javascript", "python", "go"],
346
+ recursive_semantic_chunking_enabled: false,
347
+ semantic_merge_gap_lines: 6,
348
+ semantic_merge_max_span_lines: 220,
349
+ comment_forward_absorb_enabled: true,
350
+ embedding_context_prefix_enabled: true
351
+ };
352
+
353
+ export const DEFAULT_RETRIEVAL_CONTEXT_PACKING_CONFIG: RetrievalContextPackingConfig = {
354
+ enabled: false,
355
+ max_spans_per_result: 3,
356
+ max_gap_lines: 120,
357
+ max_snippet_chars: 3_200,
358
+ enhancer_snippet_char_limit: 2_200
359
+ };
360
+
361
+ export const DEFAULT_RETRIEVAL_SNIPPET_INTEGRITY_CONFIG: RetrievalSnippetIntegrityConfig = {
362
+ enabled: false,
363
+ target_languages: ["typescript", "tsx", "javascript", "jsx"],
364
+ max_contiguous_gap_lines: 6,
365
+ marker_template_version: "v1",
366
+ repair_enabled: false,
367
+ repair_max_envelope_lines: 260,
368
+ repair_max_snippet_chars: 3_600
275
369
  };
276
370
 
277
371
  const BUILTIN_RETRIEVAL_SCORING_PROFILES = {
@@ -337,6 +431,36 @@ function validateScoringConfig(config: RetrievalScoringConfig): void {
337
431
  if (rerank.same_extension_penalty < 0) {
338
432
  throw new Error("invalid retrieval scoring config: rerank.same_extension_penalty must be >= 0");
339
433
  }
434
+ if (typeof rerank.merge_overlapping_chunks_enabled !== "boolean") {
435
+ throw new Error("invalid retrieval scoring config: rerank.merge_overlapping_chunks_enabled must be boolean");
436
+ }
437
+ if (!Number.isInteger(rerank.merge_gap_lines) || rerank.merge_gap_lines < 0) {
438
+ throw new Error("invalid retrieval scoring config: rerank.merge_gap_lines must be an integer >= 0");
439
+ }
440
+ if (!Number.isInteger(rerank.merge_max_span_lines) || rerank.merge_max_span_lines <= 0) {
441
+ throw new Error("invalid retrieval scoring config: rerank.merge_max_span_lines must be a positive integer");
442
+ }
443
+ if (typeof rerank.smart_cutoff_enabled !== "boolean") {
444
+ throw new Error("invalid retrieval scoring config: rerank.smart_cutoff_enabled must be boolean");
445
+ }
446
+ if (!Number.isInteger(rerank.smart_cutoff_min_k) || rerank.smart_cutoff_min_k <= 0) {
447
+ throw new Error("invalid retrieval scoring config: rerank.smart_cutoff_min_k must be a positive integer");
448
+ }
449
+ if (!Number.isInteger(rerank.smart_cutoff_max_k) || rerank.smart_cutoff_max_k <= 0) {
450
+ throw new Error("invalid retrieval scoring config: rerank.smart_cutoff_max_k must be a positive integer");
451
+ }
452
+ if (rerank.smart_cutoff_max_k < rerank.smart_cutoff_min_k) {
453
+ throw new Error("invalid retrieval scoring config: rerank.smart_cutoff_max_k must be >= smart_cutoff_min_k");
454
+ }
455
+ assertFiniteNumber(rerank.smart_cutoff_min_score, "rerank.smart_cutoff_min_score");
456
+ assertFiniteNumber(rerank.smart_cutoff_top_ratio, "rerank.smart_cutoff_top_ratio");
457
+ assertFiniteNumber(rerank.smart_cutoff_delta_abs, "rerank.smart_cutoff_delta_abs");
458
+ if (rerank.smart_cutoff_top_ratio <= 0 || rerank.smart_cutoff_top_ratio > 1) {
459
+ throw new Error("invalid retrieval scoring config: rerank.smart_cutoff_top_ratio must be in (0, 1]");
460
+ }
461
+ if (rerank.smart_cutoff_delta_abs < 0) {
462
+ throw new Error("invalid retrieval scoring config: rerank.smart_cutoff_delta_abs must be >= 0");
463
+ }
340
464
  }
341
465
 
342
466
  export function resolveRetrievalScoringProfile(profile_id: string | undefined): {
@@ -466,6 +590,12 @@ function validateChunkingConfig(config: RetrievalChunkingConfig): void {
466
590
  if (!Number.isInteger(config.parse_timeout_ms) || config.parse_timeout_ms <= 0) {
467
591
  throw new Error("invalid retrieval chunking config: parse_timeout_ms must be a positive integer");
468
592
  }
593
+ if (config.budget_tokenizer !== "ranking" && config.budget_tokenizer !== "lightweight") {
594
+ throw new Error("invalid retrieval chunking config: budget_tokenizer must be ranking|lightweight");
595
+ }
596
+ if (config.boundary_strictness !== "legacy" && config.boundary_strictness !== "semantic_js_ts") {
597
+ throw new Error("invalid retrieval chunking config: boundary_strictness must be legacy|semantic_js_ts");
598
+ }
469
599
  if (!Array.isArray(config.enabled_languages) || config.enabled_languages.length === 0) {
470
600
  throw new Error("invalid retrieval chunking config: enabled_languages must include at least one language");
471
601
  }
@@ -474,6 +604,21 @@ function validateChunkingConfig(config: RetrievalChunkingConfig): void {
474
604
  throw new Error("invalid retrieval chunking config: enabled_languages must contain non-empty strings");
475
605
  }
476
606
  }
607
+ if (typeof config.recursive_semantic_chunking_enabled !== "boolean") {
608
+ throw new Error("invalid retrieval chunking config: recursive_semantic_chunking_enabled must be boolean");
609
+ }
610
+ if (!Number.isInteger(config.semantic_merge_gap_lines) || config.semantic_merge_gap_lines < 0) {
611
+ throw new Error("invalid retrieval chunking config: semantic_merge_gap_lines must be a non-negative integer");
612
+ }
613
+ if (!Number.isInteger(config.semantic_merge_max_span_lines) || config.semantic_merge_max_span_lines <= 0) {
614
+ throw new Error("invalid retrieval chunking config: semantic_merge_max_span_lines must be a positive integer");
615
+ }
616
+ if (typeof config.comment_forward_absorb_enabled !== "boolean") {
617
+ throw new Error("invalid retrieval chunking config: comment_forward_absorb_enabled must be boolean");
618
+ }
619
+ if (typeof config.embedding_context_prefix_enabled !== "boolean") {
620
+ throw new Error("invalid retrieval chunking config: embedding_context_prefix_enabled must be boolean");
621
+ }
477
622
  }
478
623
 
479
624
  export function mergeRetrievalChunkingConfig(
@@ -489,6 +634,110 @@ export function mergeRetrievalChunkingConfig(
489
634
  return next;
490
635
  }
491
636
 
637
+ function validateContextPackingConfig(config: RetrievalContextPackingConfig): void {
638
+ if (typeof config.enabled !== "boolean") {
639
+ throw new Error("invalid retrieval context packing config: enabled must be boolean");
640
+ }
641
+ if (!Number.isInteger(config.max_spans_per_result) || config.max_spans_per_result <= 0) {
642
+ throw new Error("invalid retrieval context packing config: max_spans_per_result must be a positive integer");
643
+ }
644
+ if (!Number.isInteger(config.max_gap_lines) || config.max_gap_lines < 0) {
645
+ throw new Error("invalid retrieval context packing config: max_gap_lines must be a non-negative integer");
646
+ }
647
+ if (!Number.isInteger(config.max_snippet_chars) || config.max_snippet_chars <= 0) {
648
+ throw new Error("invalid retrieval context packing config: max_snippet_chars must be a positive integer");
649
+ }
650
+ if (!Number.isInteger(config.enhancer_snippet_char_limit) || config.enhancer_snippet_char_limit <= 0) {
651
+ throw new Error("invalid retrieval context packing config: enhancer_snippet_char_limit must be a positive integer");
652
+ }
653
+ }
654
+
655
+ export function mergeRetrievalContextPackingConfig(
656
+ base: RetrievalContextPackingConfig,
657
+ overrides?: RetrievalContextPackingConfigInput
658
+ ): RetrievalContextPackingConfig {
659
+ const next: RetrievalContextPackingConfig = {
660
+ ...base,
661
+ ...(overrides ?? {})
662
+ };
663
+ validateContextPackingConfig(next);
664
+ return next;
665
+ }
666
+
667
+ function normalizeSnippetIntegrityLanguage(value: string): string | undefined {
668
+ const normalized = value.trim().toLowerCase();
669
+ if (normalized === "typescript" || normalized === "ts" || normalized === "mts" || normalized === "cts") {
670
+ return "typescript";
671
+ }
672
+ if (normalized === "tsx") {
673
+ return "tsx";
674
+ }
675
+ if (normalized === "javascript" || normalized === "js" || normalized === "mjs" || normalized === "cjs") {
676
+ return "javascript";
677
+ }
678
+ if (normalized === "jsx") {
679
+ return "jsx";
680
+ }
681
+ return undefined;
682
+ }
683
+
684
+ function normalizeSnippetIntegrityLanguageList(value: string[]): string[] {
685
+ const deduped = new Set<string>();
686
+ for (const language of value) {
687
+ const raw = language.trim().toLowerCase();
688
+ if (raw.length === 0) {
689
+ continue;
690
+ }
691
+ deduped.add(normalizeSnippetIntegrityLanguage(raw) ?? raw);
692
+ }
693
+ return [...deduped];
694
+ }
695
+
696
+ function validateSnippetIntegrityConfig(config: RetrievalSnippetIntegrityConfig): void {
697
+ if (typeof config.enabled !== "boolean") {
698
+ throw new Error("invalid retrieval snippet integrity config: enabled must be boolean");
699
+ }
700
+ if (!Array.isArray(config.target_languages) || config.target_languages.length === 0) {
701
+ throw new Error("invalid retrieval snippet integrity config: target_languages must include at least one language");
702
+ }
703
+ for (const language of config.target_languages) {
704
+ if (typeof language !== "string" || language.trim().length === 0) {
705
+ throw new Error("invalid retrieval snippet integrity config: target_languages must contain non-empty strings");
706
+ }
707
+ if (!normalizeSnippetIntegrityLanguage(language)) {
708
+ throw new Error("invalid retrieval snippet integrity config: unsupported target language");
709
+ }
710
+ }
711
+ if (!Number.isInteger(config.max_contiguous_gap_lines) || config.max_contiguous_gap_lines < 0) {
712
+ throw new Error("invalid retrieval snippet integrity config: max_contiguous_gap_lines must be a non-negative integer");
713
+ }
714
+ if (config.marker_template_version !== "v1") {
715
+ throw new Error("invalid retrieval snippet integrity config: marker_template_version must be v1");
716
+ }
717
+ if (typeof config.repair_enabled !== "boolean") {
718
+ throw new Error("invalid retrieval snippet integrity config: repair_enabled must be boolean");
719
+ }
720
+ if (!Number.isInteger(config.repair_max_envelope_lines) || config.repair_max_envelope_lines <= 0) {
721
+ throw new Error("invalid retrieval snippet integrity config: repair_max_envelope_lines must be a positive integer");
722
+ }
723
+ if (!Number.isInteger(config.repair_max_snippet_chars) || config.repair_max_snippet_chars <= 0) {
724
+ throw new Error("invalid retrieval snippet integrity config: repair_max_snippet_chars must be a positive integer");
725
+ }
726
+ }
727
+
728
+ export function mergeRetrievalSnippetIntegrityConfig(
729
+ base: RetrievalSnippetIntegrityConfig,
730
+ overrides?: RetrievalSnippetIntegrityConfigInput
731
+ ): RetrievalSnippetIntegrityConfig {
732
+ const next: RetrievalSnippetIntegrityConfig = {
733
+ ...base,
734
+ ...(overrides ?? {}),
735
+ target_languages: normalizeSnippetIntegrityLanguageList(overrides?.target_languages ?? base.target_languages)
736
+ };
737
+ validateSnippetIntegrityConfig(next);
738
+ return next;
739
+ }
740
+
492
741
  function stableSerialize(value: unknown): string {
493
742
  if (Array.isArray(value)) {
494
743
  return `[${value.map((entry) => stableSerialize(entry)).join(",")}]`;
@@ -621,6 +870,8 @@ export interface RetrievalCoreOptions {
621
870
  enhancerConfig?: RetrievalEnhancerConfigInput;
622
871
  enhancerGenerationConfig?: RetrievalEnhancerGenerationConfigInput;
623
872
  chunkingConfig?: RetrievalChunkingConfigInput;
873
+ contextPackingConfig?: RetrievalContextPackingConfigInput;
874
+ snippetIntegrityConfig?: RetrievalSnippetIntegrityConfigInput;
624
875
  enhancerDecisionTraceEnabled?: boolean;
625
876
  }
626
877
 
@@ -849,10 +1100,25 @@ function singularizeToken(token: string): string | undefined {
849
1100
  return undefined;
850
1101
  }
851
1102
 
1103
+ function tokenizeLightweight(text: string): string[] {
1104
+ return text
1105
+ .normalize("NFKC")
1106
+ .split(/[^A-Za-z0-9_]+/)
1107
+ .map((token) => token.trim().toLowerCase())
1108
+ .filter(Boolean);
1109
+ }
1110
+
852
1111
  function tokenize(text: string): string[] {
853
1112
  return tokenizeForRanking(text);
854
1113
  }
855
1114
 
1115
+ function chunkBudgetTokenize(text: string, mode: "ranking" | "lightweight"): string[] {
1116
+ if (mode === "lightweight") {
1117
+ return tokenizeLightweight(text);
1118
+ }
1119
+ return tokenize(text);
1120
+ }
1121
+
856
1122
  function lexicalScore(query: string, haystack: string): number {
857
1123
  const q = new Set(tokenize(query));
858
1124
  if (q.size === 0) {
@@ -1356,6 +1622,7 @@ interface ChunkBuildReport {
1356
1622
  language_aware_attempt_latency_ms?: number;
1357
1623
  fallback_path_latency_ms?: number;
1358
1624
  language?: string;
1625
+ recursive_semantic_chunking_used?: boolean;
1359
1626
  }
1360
1627
 
1361
1628
  function buildChunks(file: RawFile, chunkingConfig: RetrievalChunkingConfig): ChunkBuildReport {
@@ -1371,11 +1638,17 @@ function buildChunks(file: RawFile, chunkingConfig: RetrievalChunkingConfig): Ch
1371
1638
  fallback_strategy: chunkingConfig.fallback_strategy,
1372
1639
  target_chunk_tokens: chunkingConfig.target_chunk_tokens,
1373
1640
  chunk_overlap_tokens: chunkingConfig.chunk_overlap_tokens,
1641
+ budget_tokenizer: chunkingConfig.budget_tokenizer,
1642
+ boundary_strictness: chunkingConfig.boundary_strictness,
1374
1643
  max_chunks_per_file: MAX_CHUNKS_PER_FILE,
1375
1644
  parse_timeout_ms: chunkingConfig.parse_timeout_ms,
1376
- enabled_languages: chunkingConfig.enabled_languages
1645
+ enabled_languages: chunkingConfig.enabled_languages,
1646
+ recursive_semantic_chunking_enabled: chunkingConfig.recursive_semantic_chunking_enabled,
1647
+ semantic_merge_gap_lines: chunkingConfig.semantic_merge_gap_lines,
1648
+ semantic_merge_max_span_lines: chunkingConfig.semantic_merge_max_span_lines,
1649
+ comment_forward_absorb_enabled: chunkingConfig.comment_forward_absorb_enabled
1377
1650
  },
1378
- tokenize
1651
+ tokenize: (text: string) => chunkBudgetTokenize(text, chunkingConfig.budget_tokenizer)
1379
1652
  });
1380
1653
 
1381
1654
  return {
@@ -1394,10 +1667,38 @@ function buildChunks(file: RawFile, chunkingConfig: RetrievalChunkingConfig): Ch
1394
1667
  parse_latency_ms: chunkingResult.parse_latency_ms,
1395
1668
  language_aware_attempt_latency_ms: chunkingResult.language_aware_attempt_latency_ms,
1396
1669
  fallback_path_latency_ms: chunkingResult.fallback_path_latency_ms,
1397
- language: chunkingResult.language
1670
+ language: chunkingResult.language,
1671
+ recursive_semantic_chunking_used: chunkingResult.recursive_semantic_chunking_used
1398
1672
  };
1399
1673
  }
1400
1674
 
1675
+ function buildChunkEmbeddingText(
1676
+ chunk: IndexedChunk,
1677
+ config: RetrievalChunkingConfig,
1678
+ embeddingProviderId: string
1679
+ ): string {
1680
+ const isDeterministicProvider = embeddingProviderId.trim().toLowerCase() === "deterministic";
1681
+ if (!config.embedding_context_prefix_enabled || isDeterministicProvider) {
1682
+ return chunk.snippet;
1683
+ }
1684
+ const normalizedPath = normalizePath(chunk.path);
1685
+ const pathParts = normalizedPath.split("/").filter(Boolean);
1686
+ const contextPath = pathParts.length > 2 ? pathParts.slice(-2).join("/") : normalizedPath;
1687
+ const symbol = detectSnippetSymbolName(chunk.snippet);
1688
+ const linesLabel = `${chunk.start_line}-${chunk.end_line}`;
1689
+ const symbolLabel = symbol ? ` > ${symbol}` : "";
1690
+ const prefix = `${contextPath}:${linesLabel}${symbolLabel}`;
1691
+ return `${prefix}\n${chunk.snippet}`;
1692
+ }
1693
+
1694
+ function buildChunkEmbeddingTexts(
1695
+ chunks: IndexedChunk[],
1696
+ config: RetrievalChunkingConfig,
1697
+ embeddingProviderId: string
1698
+ ): string[] {
1699
+ return chunks.map((chunk) => buildChunkEmbeddingText(chunk, config, embeddingProviderId));
1700
+ }
1701
+
1401
1702
  function pseudoEmbedding(input: string, dimensions = 24): number[] {
1402
1703
  const safeDimensions = Math.max(1, dimensions);
1403
1704
  let source = sha256(input);
@@ -3858,6 +4159,827 @@ function compareSearchResults(a: SearchContextOutput["results"][number], b: Sear
3858
4159
  return a.end_line - b.end_line;
3859
4160
  }
3860
4161
 
4162
+ function compareSearchResultsByLineRange(
4163
+ a: SearchContextOutput["results"][number],
4164
+ b: SearchContextOutput["results"][number]
4165
+ ): number {
4166
+ if (a.start_line !== b.start_line) {
4167
+ return a.start_line - b.start_line;
4168
+ }
4169
+ if (a.end_line !== b.end_line) {
4170
+ return a.end_line - b.end_line;
4171
+ }
4172
+ return compareSearchResults(a, b);
4173
+ }
4174
+
4175
+ function mergeSnippetCluster(
4176
+ cluster: SearchContextOutput["results"],
4177
+ mergedStartLine: number,
4178
+ mergedEndLine: number
4179
+ ): string {
4180
+ const byRelevance = [...cluster].sort(compareSearchResults);
4181
+ const primary = byRelevance[0];
4182
+ if (!primary) {
4183
+ return "";
4184
+ }
4185
+ const lineMap = new Map<number, { text: string; score: number; rank: number }>();
4186
+ for (let rank = 0; rank < byRelevance.length; rank += 1) {
4187
+ const candidate = byRelevance[rank];
4188
+ if (!candidate) {
4189
+ continue;
4190
+ }
4191
+ const lines = candidate.snippet.replace(/\r\n/g, "\n").split("\n");
4192
+ const expectedLineCount = Math.max(1, candidate.end_line - candidate.start_line + 1);
4193
+ const maxLines = Math.min(lines.length, expectedLineCount);
4194
+ for (let offset = 0; offset < maxLines; offset += 1) {
4195
+ const lineNumber = candidate.start_line + offset;
4196
+ if (lineNumber < mergedStartLine || lineNumber > mergedEndLine) {
4197
+ continue;
4198
+ }
4199
+ const text = lines[offset];
4200
+ if (typeof text !== "string") {
4201
+ continue;
4202
+ }
4203
+ const existing = lineMap.get(lineNumber);
4204
+ if (!existing || candidate.score > existing.score + 1e-9 || (Math.abs(candidate.score - existing.score) <= 1e-9 && rank < existing.rank)) {
4205
+ lineMap.set(lineNumber, { text, score: candidate.score, rank });
4206
+ }
4207
+ }
4208
+ }
4209
+
4210
+ const mergedLines: string[] = [];
4211
+ let missingLines = 0;
4212
+ for (let line = mergedStartLine; line <= mergedEndLine; line += 1) {
4213
+ const entry = lineMap.get(line);
4214
+ if (!entry) {
4215
+ missingLines += 1;
4216
+ mergedLines.push("");
4217
+ continue;
4218
+ }
4219
+ mergedLines.push(entry.text);
4220
+ }
4221
+
4222
+ const totalLines = Math.max(1, mergedEndLine - mergedStartLine + 1);
4223
+ const maxMissingLines = Math.max(2, Math.floor(totalLines * 0.2));
4224
+ if (missingLines > maxMissingLines) {
4225
+ return primary.snippet;
4226
+ }
4227
+
4228
+ return mergedLines.join("\n");
4229
+ }
4230
+
4231
+ function mergeCandidateCluster(cluster: SearchContextOutput["results"]): SearchContextOutput["results"][number] {
4232
+ if (cluster.length === 0) {
4233
+ throw new Error("mergeCandidateCluster requires at least one candidate");
4234
+ }
4235
+ if (cluster.length === 1) {
4236
+ return cluster[0]!;
4237
+ }
4238
+
4239
+ const byRelevance = [...cluster].sort(compareSearchResults);
4240
+ const primary = byRelevance[0]!;
4241
+ const mergedStartLine = Math.min(...cluster.map((candidate) => candidate.start_line));
4242
+ const mergedEndLine = Math.max(...cluster.map((candidate) => candidate.end_line));
4243
+ const stitchedSnippet = mergeSnippetCluster(cluster, mergedStartLine, mergedEndLine);
4244
+
4245
+ return {
4246
+ ...primary,
4247
+ start_line: mergedStartLine,
4248
+ end_line: mergedEndLine,
4249
+ snippet: stitchedSnippet.length > 0 ? stitchedSnippet : primary.snippet
4250
+ };
4251
+ }
4252
+
4253
+ const HEAVY_LINE_RANGE_OVERLAP_RATIO = 0.2;
4254
+
4255
+ function lineRangeLength(startLine: number, endLine: number): number {
4256
+ return Math.max(1, endLine - startLine + 1);
4257
+ }
4258
+
4259
+ function lineRangeOverlapLength(
4260
+ aStartLine: number,
4261
+ aEndLine: number,
4262
+ bStartLine: number,
4263
+ bEndLine: number
4264
+ ): number {
4265
+ const start = Math.max(aStartLine, bStartLine);
4266
+ const end = Math.min(aEndLine, bEndLine);
4267
+ if (end < start) {
4268
+ return 0;
4269
+ }
4270
+ return end - start + 1;
4271
+ }
4272
+
4273
+ function isHeavilyOverlappingLineRange(
4274
+ candidate: SearchContextOutput["results"][number],
4275
+ selectedRanges: Array<{ start_line: number; end_line: number }>
4276
+ ): boolean {
4277
+ for (const selected of selectedRanges) {
4278
+ const overlapLength = lineRangeOverlapLength(
4279
+ selected.start_line,
4280
+ selected.end_line,
4281
+ candidate.start_line,
4282
+ candidate.end_line
4283
+ );
4284
+ if (overlapLength <= 0) {
4285
+ continue;
4286
+ }
4287
+ const smallerRange = Math.min(
4288
+ lineRangeLength(selected.start_line, selected.end_line),
4289
+ lineRangeLength(candidate.start_line, candidate.end_line)
4290
+ );
4291
+ const overlapRatio = overlapLength / Math.max(1, smallerRange);
4292
+ if (overlapRatio >= HEAVY_LINE_RANGE_OVERLAP_RATIO) {
4293
+ return true;
4294
+ }
4295
+ }
4296
+ return false;
4297
+ }
4298
+
4299
+ function mergeLineSpans(spans: Array<{ start_line: number; end_line: number }>): Array<{ start_line: number; end_line: number }> {
4300
+ if (spans.length <= 1) {
4301
+ return [...spans];
4302
+ }
4303
+ const ordered = [...spans]
4304
+ .filter((span) => span.end_line >= span.start_line)
4305
+ .sort((a, b) => a.start_line - b.start_line || a.end_line - b.end_line);
4306
+ const merged: Array<{ start_line: number; end_line: number }> = [];
4307
+ for (const span of ordered) {
4308
+ const last = merged[merged.length - 1];
4309
+ if (!last || span.start_line > last.end_line + 1) {
4310
+ merged.push({ ...span });
4311
+ continue;
4312
+ }
4313
+ last.end_line = Math.max(last.end_line, span.end_line);
4314
+ }
4315
+ return merged;
4316
+ }
4317
+
4318
+ function lineRangeGap(anchor: { start_line: number; end_line: number }, candidate: { start_line: number; end_line: number }): number {
4319
+ if (candidate.start_line > anchor.end_line) {
4320
+ return candidate.start_line - anchor.end_line - 1;
4321
+ }
4322
+ if (anchor.start_line > candidate.end_line) {
4323
+ return anchor.start_line - candidate.end_line - 1;
4324
+ }
4325
+ return 0;
4326
+ }
4327
+
4328
+ function buildPreferredLineMap(candidates: SearchContextOutput["results"]): Map<number, string> {
4329
+ const byRelevance = [...candidates].sort(compareSearchResults);
4330
+ const lineMap = new Map<number, { text: string; score: number; rank: number }>();
4331
+ for (let rank = 0; rank < byRelevance.length; rank += 1) {
4332
+ const candidate = byRelevance[rank];
4333
+ if (!candidate) {
4334
+ continue;
4335
+ }
4336
+ const lines = candidate.snippet.replace(/\r\n/g, "\n").split("\n");
4337
+ const expectedLineCount = Math.max(1, candidate.end_line - candidate.start_line + 1);
4338
+ const maxLines = Math.min(lines.length, expectedLineCount);
4339
+ for (let offset = 0; offset < maxLines; offset += 1) {
4340
+ const lineNumber = candidate.start_line + offset;
4341
+ const text = lines[offset];
4342
+ if (typeof text !== "string") {
4343
+ continue;
4344
+ }
4345
+ const existing = lineMap.get(lineNumber);
4346
+ if (!existing || candidate.score > existing.score + 1e-9 || (Math.abs(candidate.score - existing.score) <= 1e-9 && rank < existing.rank)) {
4347
+ lineMap.set(lineNumber, { text, score: candidate.score, rank });
4348
+ }
4349
+ }
4350
+ }
4351
+ return new Map([...lineMap.entries()].map(([line, value]) => [line, value.text]));
4352
+ }
4353
+
4354
+ function clipSnippetToMaxChars(snippet: string, maxChars: number): string {
4355
+ if (snippet.length <= maxChars) {
4356
+ return snippet;
4357
+ }
4358
+ const clipped = snippet.slice(0, Math.max(0, maxChars));
4359
+ const lastNewline = clipped.lastIndexOf("\n");
4360
+ if (lastNewline > 80) {
4361
+ return clipped.slice(0, lastNewline).trimEnd();
4362
+ }
4363
+ return clipped.trimEnd();
4364
+ }
4365
+
4366
+ function snippetIntegrityLanguageFromPath(path: string): string | undefined {
4367
+ const extension = fileExtension(path);
4368
+ if (extension === ".ts" || extension === ".mts" || extension === ".cts") {
4369
+ return "typescript";
4370
+ }
4371
+ if (extension === ".tsx") {
4372
+ return "tsx";
4373
+ }
4374
+ if (extension === ".js" || extension === ".mjs" || extension === ".cjs") {
4375
+ return "javascript";
4376
+ }
4377
+ if (extension === ".jsx") {
4378
+ return "jsx";
4379
+ }
4380
+ return undefined;
4381
+ }
4382
+
4383
+ function firstNonEmptyLine(snippet: string): string {
4384
+ const lines = snippet.replace(/\r\n/g, "\n").split("\n");
4385
+ for (const line of lines) {
4386
+ const trimmed = line.trim();
4387
+ if (trimmed.length > 0) {
4388
+ return trimmed;
4389
+ }
4390
+ }
4391
+ return "";
4392
+ }
4393
+
4394
+ function lastNonEmptyLine(snippet: string): string {
4395
+ const lines = snippet.replace(/\r\n/g, "\n").split("\n");
4396
+ for (let idx = lines.length - 1; idx >= 0; idx -= 1) {
4397
+ const trimmed = (lines[idx] ?? "").trim();
4398
+ if (trimmed.length > 0) {
4399
+ return trimmed;
4400
+ }
4401
+ }
4402
+ return "";
4403
+ }
4404
+
4405
+ function curlyBraceDelta(snippet: string): number {
4406
+ let opens = 0;
4407
+ let closes = 0;
4408
+ for (const char of snippet) {
4409
+ if (char === "{") {
4410
+ opens += 1;
4411
+ continue;
4412
+ }
4413
+ if (char === "}") {
4414
+ closes += 1;
4415
+ }
4416
+ }
4417
+ return opens - closes;
4418
+ }
4419
+
4420
+ function looksLikeDeclarationStart(line: string): boolean {
4421
+ if (line.length === 0) {
4422
+ return false;
4423
+ }
4424
+ if (line.startsWith("@")) {
4425
+ return true;
4426
+ }
4427
+ return (
4428
+ /^(?:export\s+)?(?:async\s+)?function\s+[A-Za-z_$][\w$]*\s*\(/u.test(line) ||
4429
+ /^(?:export\s+)?(?:default\s+)?class\s+[A-Za-z_$][\w$]*/u.test(line) ||
4430
+ /^(?:export\s+)?(?:const|let|var)\s+[A-Za-z_$][\w$]*\s*=/u.test(line) ||
4431
+ /^(?:public|private|protected|static|readonly|async)\s+[A-Za-z_$][\w$]*\s*\(/u.test(line) ||
4432
+ /^(?:[A-Za-z_$][\w$]*)\s*\([^)]*\)\s*\{/u.test(line)
4433
+ );
4434
+ }
4435
+
4436
+ function looksLikeSnippetTerminalBoundary(line: string): boolean {
4437
+ if (line.length === 0) {
4438
+ return false;
4439
+ }
4440
+ return (
4441
+ line.endsWith("}") ||
4442
+ line.endsWith("};") ||
4443
+ line.endsWith(");") ||
4444
+ line.endsWith("]") ||
4445
+ line.endsWith("];")
4446
+ );
4447
+ }
4448
+
4449
+ function detectSnippetSymbolName(snippet: string): string | undefined {
4450
+ const lines = snippet.replace(/\r\n/g, "\n").split("\n").slice(0, 40);
4451
+ const patterns = [
4452
+ /^(?:export\s+)?(?:async\s+)?function\s+([A-Za-z_$][\w$]*)\s*\(/u,
4453
+ /^(?:export\s+)?(?:default\s+)?class\s+([A-Za-z_$][\w$]*)\b/u,
4454
+ /^(?:export\s+)?(?:const|let|var)\s+([A-Za-z_$][\w$]*)\s*=\s*(?:async\s*)?\([^)]*\)\s*=>/u,
4455
+ /^(?:export\s+)?(?:const|let|var)\s+([A-Za-z_$][\w$]*)\s*=\s*(?:async\s*)?[A-Za-z_$][\w$]*\s*=>/u,
4456
+ /^(?:public|private|protected|static|readonly|async)\s+([A-Za-z_$][\w$]*)\s*\(/u,
4457
+ /^([A-Za-z_$][\w$]*)\s*\([^)]*\)\s*\{/u
4458
+ ];
4459
+ const disallowed = new Set(["if", "for", "while", "switch", "catch", "return"]);
4460
+ for (const line of lines) {
4461
+ const trimmed = line.trim();
4462
+ if (trimmed.length === 0) {
4463
+ continue;
4464
+ }
4465
+ for (const pattern of patterns) {
4466
+ const match = trimmed.match(pattern);
4467
+ const symbol = match?.[1];
4468
+ if (symbol && !disallowed.has(symbol)) {
4469
+ return symbol;
4470
+ }
4471
+ }
4472
+ }
4473
+ return undefined;
4474
+ }
4475
+
4476
+ function shouldAnnotateSnippetAsTruncated(
4477
+ result: SearchResultRow,
4478
+ omittedBefore: number,
4479
+ omittedAfter: number
4480
+ ): boolean {
4481
+ if (omittedBefore <= 0 && omittedAfter <= 0) {
4482
+ return false;
4483
+ }
4484
+ const firstLine = firstNonEmptyLine(result.snippet);
4485
+ const lastLine = lastNonEmptyLine(result.snippet);
4486
+ if (omittedBefore > 0 && !looksLikeDeclarationStart(firstLine)) {
4487
+ return true;
4488
+ }
4489
+ if (omittedAfter > 0) {
4490
+ if (curlyBraceDelta(result.snippet) > 0) {
4491
+ return true;
4492
+ }
4493
+ if (!looksLikeSnippetTerminalBoundary(lastLine)) {
4494
+ return true;
4495
+ }
4496
+ }
4497
+ return omittedBefore > 0 && omittedAfter > 0;
4498
+ }
4499
+
4500
+ function estimateContiguousEnvelope(input: {
4501
+ anchor: SearchResultRow;
4502
+ candidates: SearchResultRow[];
4503
+ maxGapLines: number;
4504
+ }): { start_line: number; end_line: number } {
4505
+ let start = input.anchor.start_line;
4506
+ let end = input.anchor.end_line;
4507
+ let changed = true;
4508
+ while (changed) {
4509
+ changed = false;
4510
+ for (const candidate of input.candidates) {
4511
+ const gap = lineRangeGap({ start_line: start, end_line: end }, candidate);
4512
+ if (gap > input.maxGapLines) {
4513
+ continue;
4514
+ }
4515
+ const nextStart = Math.min(start, candidate.start_line);
4516
+ const nextEnd = Math.max(end, candidate.end_line);
4517
+ if (nextStart !== start || nextEnd !== end) {
4518
+ start = nextStart;
4519
+ end = nextEnd;
4520
+ changed = true;
4521
+ }
4522
+ }
4523
+ }
4524
+ return { start_line: start, end_line: end };
4525
+ }
4526
+
4527
+ function repairSnippetFromEnvelope(input: {
4528
+ anchor: SearchResultRow;
4529
+ envelope: { start_line: number; end_line: number };
4530
+ samePathCandidates: SearchResultRow[];
4531
+ config: RetrievalSnippetIntegrityConfig;
4532
+ }): { repaired?: SearchResultRow; reason?: string; clipped: boolean } {
4533
+ const envelopeSpan = input.envelope.end_line - input.envelope.start_line + 1;
4534
+ if (envelopeSpan > input.config.repair_max_envelope_lines) {
4535
+ return { reason: "envelope_cap_exceeded", clipped: false };
4536
+ }
4537
+
4538
+ const envelopeCandidates = input.samePathCandidates
4539
+ .filter((candidate) => candidate.end_line >= input.envelope.start_line && candidate.start_line <= input.envelope.end_line)
4540
+ .sort(compareSearchResultsByLineRange);
4541
+ if (envelopeCandidates.length === 0) {
4542
+ return { reason: "no_envelope_candidates", clipped: false };
4543
+ }
4544
+
4545
+ const lineMap = buildPreferredLineMap(envelopeCandidates);
4546
+ const renderedLines: string[] = [];
4547
+ let missingLines = 0;
4548
+ for (let line = input.envelope.start_line; line <= input.envelope.end_line; line += 1) {
4549
+ const text = lineMap.get(line);
4550
+ if (typeof text !== "string") {
4551
+ missingLines += 1;
4552
+ renderedLines.push("");
4553
+ continue;
4554
+ }
4555
+ renderedLines.push(text);
4556
+ }
4557
+
4558
+ const maxMissingLines = Math.max(2, Math.floor(envelopeSpan * 0.2));
4559
+ if (missingLines > maxMissingLines) {
4560
+ return { reason: "missing_line_density_too_high", clipped: false };
4561
+ }
4562
+
4563
+ const clippedLines: string[] = [];
4564
+ let usedChars = 0;
4565
+ let clipped = false;
4566
+ for (let index = 0; index < renderedLines.length; index += 1) {
4567
+ const line = renderedLines[index] ?? "";
4568
+ const additionalChars = index === 0 ? line.length : line.length + 1;
4569
+ if (clippedLines.length > 0 && usedChars + additionalChars > input.config.repair_max_snippet_chars) {
4570
+ clipped = true;
4571
+ break;
4572
+ }
4573
+ if (clippedLines.length === 0 && line.length > input.config.repair_max_snippet_chars) {
4574
+ const clippedLine = line.slice(0, input.config.repair_max_snippet_chars);
4575
+ if (clippedLine.length === 0) {
4576
+ return { reason: "snippet_char_cap_exceeded", clipped: false };
4577
+ }
4578
+ clippedLines.push(clippedLine);
4579
+ usedChars = clippedLine.length;
4580
+ clipped = true;
4581
+ break;
4582
+ }
4583
+ clippedLines.push(line);
4584
+ usedChars += additionalChars;
4585
+ }
4586
+
4587
+ if (clippedLines.length === 0) {
4588
+ return { reason: "snippet_char_cap_exceeded", clipped: false };
4589
+ }
4590
+
4591
+ const repairedSnippet = clippedLines.join("\n").trimEnd();
4592
+ if (repairedSnippet.length === 0) {
4593
+ return { reason: "empty_repaired_snippet", clipped: false };
4594
+ }
4595
+
4596
+ const repairedEndLine = input.envelope.start_line + clippedLines.length - 1;
4597
+ return {
4598
+ repaired: {
4599
+ ...input.anchor,
4600
+ start_line: input.envelope.start_line,
4601
+ end_line: repairedEndLine,
4602
+ snippet: repairedSnippet
4603
+ },
4604
+ clipped
4605
+ };
4606
+ }
4607
+
4608
+ function buildSnippetTruncationMarker(input: {
4609
+ result: SearchResultRow;
4610
+ symbolName?: string;
4611
+ envelope_start_line: number;
4612
+ envelope_end_line: number;
4613
+ marker_template_version: "v1";
4614
+ }): string {
4615
+ const estimatedTotalLines = Math.max(1, input.envelope_end_line - input.envelope_start_line + 1);
4616
+ const omittedBefore = Math.max(0, input.result.start_line - input.envelope_start_line);
4617
+ const omittedAfter = Math.max(0, input.envelope_end_line - input.result.end_line);
4618
+ return `// [truncated:${input.marker_template_version} symbol=${input.symbolName ?? "unknown"} estimated_span=${
4619
+ input.envelope_start_line
4620
+ }-${input.envelope_end_line} estimated_total_lines=${estimatedTotalLines} omitted_before=${omittedBefore} omitted_after=${omittedAfter} through_line=${input.result.end_line}]`;
4621
+ }
4622
+
4623
+ function annotateSearchResultsWithSnippetIntegrity(input: {
4624
+ selected: SearchResultRow[];
4625
+ sourceCandidates: SearchResultRow[];
4626
+ config: RetrievalSnippetIntegrityConfig;
4627
+ observability: Observability;
4628
+ retrievalProfileId: string;
4629
+ }): SearchResultRow[] {
4630
+ if (!input.config.enabled || input.selected.length === 0) {
4631
+ return [...input.selected];
4632
+ }
4633
+
4634
+ const enabledLanguages = new Set(normalizeSnippetIntegrityLanguageList(input.config.target_languages));
4635
+ if (enabledLanguages.size === 0) {
4636
+ return [...input.selected];
4637
+ }
4638
+
4639
+ const sourceByPath = new Map<string, SearchResultRow[]>();
4640
+ for (const candidate of input.sourceCandidates) {
4641
+ const rows = sourceByPath.get(candidate.path);
4642
+ if (rows) {
4643
+ rows.push(candidate);
4644
+ } else {
4645
+ sourceByPath.set(candidate.path, [candidate]);
4646
+ }
4647
+ }
4648
+
4649
+ return input.selected.map((result) => {
4650
+ const language = snippetIntegrityLanguageFromPath(result.path);
4651
+ if (!language || !enabledLanguages.has(language)) {
4652
+ return result;
4653
+ }
4654
+ const samePath = sourceByPath.get(result.path) ?? [result];
4655
+ if (samePath.length <= 1) {
4656
+ return result;
4657
+ }
4658
+ const envelope = estimateContiguousEnvelope({
4659
+ anchor: result,
4660
+ candidates: samePath,
4661
+ maxGapLines: input.config.max_contiguous_gap_lines
4662
+ });
4663
+ const originalOmittedBefore = Math.max(0, result.start_line - envelope.start_line);
4664
+ const originalOmittedAfter = Math.max(0, envelope.end_line - result.end_line);
4665
+ const originalLooksTruncated = shouldAnnotateSnippetAsTruncated(result, originalOmittedBefore, originalOmittedAfter);
4666
+ if (!originalLooksTruncated) {
4667
+ return result;
4668
+ }
4669
+
4670
+ const envelopeCandidates = samePath
4671
+ .filter((candidate) => candidate.end_line >= envelope.start_line && candidate.start_line <= envelope.end_line)
4672
+ .sort(compareSearchResultsByLineRange);
4673
+ let assembled = result;
4674
+
4675
+ if (input.config.repair_enabled) {
4676
+ input.observability.metrics.increment("retrieval_snippet_repair_attempt_total", 1, {
4677
+ retrieval_profile_id: input.retrievalProfileId,
4678
+ language
4679
+ });
4680
+ const repairOutcome = repairSnippetFromEnvelope({
4681
+ anchor: result,
4682
+ envelope,
4683
+ samePathCandidates: samePath,
4684
+ config: input.config
4685
+ });
4686
+ if (repairOutcome.repaired) {
4687
+ assembled = repairOutcome.repaired;
4688
+ input.observability.metrics.increment("retrieval_snippet_repair_success_total", 1, {
4689
+ retrieval_profile_id: input.retrievalProfileId,
4690
+ language,
4691
+ clipped: repairOutcome.clipped ? "true" : "false"
4692
+ });
4693
+ input.observability.logger.info("snippet integrity repair decision", {
4694
+ retrieval_profile_id: input.retrievalProfileId,
4695
+ path: result.path,
4696
+ language,
4697
+ envelope_start_line: envelope.start_line,
4698
+ envelope_end_line: envelope.end_line,
4699
+ envelope_span_lines: envelope.end_line - envelope.start_line + 1,
4700
+ status: "repaired",
4701
+ clipped: repairOutcome.clipped
4702
+ });
4703
+ } else {
4704
+ input.observability.logger.info("snippet integrity repair decision", {
4705
+ retrieval_profile_id: input.retrievalProfileId,
4706
+ path: result.path,
4707
+ language,
4708
+ envelope_start_line: envelope.start_line,
4709
+ envelope_end_line: envelope.end_line,
4710
+ envelope_span_lines: envelope.end_line - envelope.start_line + 1,
4711
+ status: "repair_skipped",
4712
+ reason: repairOutcome.reason ?? "unknown"
4713
+ });
4714
+ }
4715
+ }
4716
+
4717
+ const omittedBefore = Math.max(0, assembled.start_line - envelope.start_line);
4718
+ const omittedAfter = Math.max(0, envelope.end_line - assembled.end_line);
4719
+ if (!shouldAnnotateSnippetAsTruncated(assembled, omittedBefore, omittedAfter)) {
4720
+ return assembled;
4721
+ }
4722
+
4723
+ let symbolName = detectSnippetSymbolName(assembled.snippet);
4724
+ if (!symbolName) {
4725
+ for (const candidate of envelopeCandidates) {
4726
+ symbolName = detectSnippetSymbolName(candidate.snippet);
4727
+ if (symbolName) {
4728
+ break;
4729
+ }
4730
+ }
4731
+ }
4732
+ const marker = buildSnippetTruncationMarker({
4733
+ result: assembled,
4734
+ symbolName,
4735
+ envelope_start_line: envelope.start_line,
4736
+ envelope_end_line: envelope.end_line,
4737
+ marker_template_version: input.config.marker_template_version
4738
+ });
4739
+ input.observability.metrics.increment("retrieval_snippet_repair_fallback_marker_total", 1, {
4740
+ retrieval_profile_id: input.retrievalProfileId,
4741
+ language
4742
+ });
4743
+ input.observability.metrics.increment("retrieval_snippet_truncation_marker_total", 1, {
4744
+ retrieval_profile_id: input.retrievalProfileId,
4745
+ language,
4746
+ symbol_detected: symbolName ? "true" : "false",
4747
+ marker_template_version: input.config.marker_template_version
4748
+ });
4749
+ input.observability.metrics.observe("retrieval_snippet_omitted_after_lines", omittedAfter, {
4750
+ retrieval_profile_id: input.retrievalProfileId,
4751
+ language
4752
+ });
4753
+ const baseSnippet = assembled.snippet.trimEnd();
4754
+ return {
4755
+ ...assembled,
4756
+ snippet: baseSnippet.length > 0 ? `${baseSnippet}\n${marker}` : marker
4757
+ };
4758
+ });
4759
+ }
4760
+
4761
+ function packSearchResultsWithContext(input: {
4762
+ selected: SearchContextOutput["results"];
4763
+ sourceCandidates: SearchContextOutput["results"];
4764
+ config: RetrievalContextPackingConfig;
4765
+ }): SearchContextOutput["results"] {
4766
+ if (!input.config.enabled || input.selected.length === 0) {
4767
+ return [...input.selected];
4768
+ }
4769
+
4770
+ const sourceByPath = new Map<string, SearchContextOutput["results"]>();
4771
+ for (const candidate of input.sourceCandidates) {
4772
+ const rows = sourceByPath.get(candidate.path);
4773
+ if (rows) {
4774
+ rows.push(candidate);
4775
+ } else {
4776
+ sourceByPath.set(candidate.path, [candidate]);
4777
+ }
4778
+ }
4779
+
4780
+ return input.selected.map((anchor) => {
4781
+ const samePath = sourceByPath.get(anchor.path) ?? [anchor];
4782
+ if (samePath.length <= 1 || input.config.max_spans_per_result <= 1) {
4783
+ return anchor;
4784
+ }
4785
+
4786
+ const anchorRange = { start_line: anchor.start_line, end_line: anchor.end_line };
4787
+ const candidates = samePath
4788
+ .filter(
4789
+ (candidate) =>
4790
+ !(candidate.start_line === anchor.start_line && candidate.end_line === anchor.end_line) &&
4791
+ !isHeavilyOverlappingLineRange(candidate, [anchorRange]) &&
4792
+ lineRangeGap(anchorRange, candidate) <= input.config.max_gap_lines
4793
+ )
4794
+ .sort((a, b) => {
4795
+ const relevanceDiff = compareSearchResults(a, b);
4796
+ if (relevanceDiff !== 0) {
4797
+ return relevanceDiff;
4798
+ }
4799
+ return lineRangeGap(anchorRange, a) - lineRangeGap(anchorRange, b);
4800
+ });
4801
+
4802
+ const spans: Array<{ start_line: number; end_line: number }> = [{ ...anchorRange }];
4803
+ for (const candidate of candidates) {
4804
+ if (spans.length >= input.config.max_spans_per_result) {
4805
+ break;
4806
+ }
4807
+ const nextSpan = { start_line: candidate.start_line, end_line: candidate.end_line };
4808
+ const nextEnvelope = mergeLineSpans([...spans, nextSpan]);
4809
+ if (
4810
+ nextEnvelope.some((span, idx) => idx > 0 && span.start_line - (nextEnvelope[idx - 1]?.end_line ?? span.start_line) - 1 > input.config.max_gap_lines)
4811
+ ) {
4812
+ continue;
4813
+ }
4814
+ spans.push(nextSpan);
4815
+ }
4816
+
4817
+ const mergedSpans = mergeLineSpans(spans);
4818
+ if (mergedSpans.length <= 1) {
4819
+ return anchor;
4820
+ }
4821
+
4822
+ const lineMap = buildPreferredLineMap([anchor, ...samePath]);
4823
+ const renderedLines: string[] = [];
4824
+ let contentLineCount = 0;
4825
+ let elisionCount = 0;
4826
+ for (let index = 0; index < mergedSpans.length; index += 1) {
4827
+ const span = mergedSpans[index];
4828
+ if (!span) {
4829
+ continue;
4830
+ }
4831
+ if (index > 0) {
4832
+ const previous = mergedSpans[index - 1];
4833
+ if (previous && span.start_line - previous.end_line > 0) {
4834
+ renderedLines.push("...");
4835
+ elisionCount += 1;
4836
+ }
4837
+ }
4838
+ for (let line = span.start_line; line <= span.end_line; line += 1) {
4839
+ renderedLines.push(lineMap.get(line) ?? "");
4840
+ contentLineCount += 1;
4841
+ }
4842
+ }
4843
+ if (renderedLines.length === 0) {
4844
+ return anchor;
4845
+ }
4846
+
4847
+ const elisionDensity = elisionCount / Math.max(1, contentLineCount + elisionCount);
4848
+ if (elisionDensity > 0.25) {
4849
+ return anchor;
4850
+ }
4851
+
4852
+ const packedSnippet = clipSnippetToMaxChars(renderedLines.join("\n"), input.config.max_snippet_chars);
4853
+ if (packedSnippet.length === 0) {
4854
+ return anchor;
4855
+ }
4856
+ const packedStart = mergedSpans[0]?.start_line ?? anchor.start_line;
4857
+ const packedEnd = mergedSpans[mergedSpans.length - 1]?.end_line ?? anchor.end_line;
4858
+
4859
+ return {
4860
+ ...anchor,
4861
+ start_line: packedStart,
4862
+ end_line: packedEnd,
4863
+ snippet: packedSnippet,
4864
+ reason: `${anchor.reason} + contextual spans`
4865
+ };
4866
+ });
4867
+ }
4868
+
4869
+ function mergeOverlappingCandidates(
4870
+ candidates: SearchContextOutput["results"],
4871
+ config: RetrievalRerankConfig
4872
+ ): SearchContextOutput["results"] {
4873
+ if (!config.merge_overlapping_chunks_enabled || candidates.length <= 1) {
4874
+ return [...candidates];
4875
+ }
4876
+
4877
+ const byPath = new Map<string, SearchContextOutput["results"]>();
4878
+ for (const candidate of candidates) {
4879
+ const group = byPath.get(candidate.path);
4880
+ if (group) {
4881
+ group.push(candidate);
4882
+ } else {
4883
+ byPath.set(candidate.path, [candidate]);
4884
+ }
4885
+ }
4886
+
4887
+ const merged: SearchContextOutput["results"] = [];
4888
+ for (const group of byPath.values()) {
4889
+ const ordered = [...group].sort(compareSearchResultsByLineRange);
4890
+ let cluster: SearchContextOutput["results"] = [];
4891
+ let clusterStart = 0;
4892
+ let clusterEnd = 0;
4893
+
4894
+ const flush = (): void => {
4895
+ if (cluster.length === 0) {
4896
+ return;
4897
+ }
4898
+ merged.push(mergeCandidateCluster(cluster));
4899
+ cluster = [];
4900
+ };
4901
+
4902
+ for (const candidate of ordered) {
4903
+ if (cluster.length === 0) {
4904
+ cluster = [candidate];
4905
+ clusterStart = candidate.start_line;
4906
+ clusterEnd = candidate.end_line;
4907
+ continue;
4908
+ }
4909
+
4910
+ const nextStart = Math.min(clusterStart, candidate.start_line);
4911
+ const nextEnd = Math.max(clusterEnd, candidate.end_line);
4912
+ const nextSpan = nextEnd - nextStart + 1;
4913
+ const gapLines = Math.max(0, candidate.start_line - clusterEnd - 1);
4914
+ const canMerge = gapLines <= config.merge_gap_lines && nextSpan <= config.merge_max_span_lines;
4915
+
4916
+ if (!canMerge) {
4917
+ flush();
4918
+ cluster = [candidate];
4919
+ clusterStart = candidate.start_line;
4920
+ clusterEnd = candidate.end_line;
4921
+ continue;
4922
+ }
4923
+
4924
+ cluster.push(candidate);
4925
+ clusterStart = nextStart;
4926
+ clusterEnd = nextEnd;
4927
+ }
4928
+
4929
+ flush();
4930
+ }
4931
+
4932
+ return merged.sort(compareSearchResults);
4933
+ }
4934
+
4935
+ function applySmartCutoffCandidates(
4936
+ candidates: SearchContextOutput["results"],
4937
+ config: RetrievalRerankConfig
4938
+ ): SearchContextOutput["results"] {
4939
+ if (!config.smart_cutoff_enabled || candidates.length === 0) {
4940
+ return [...candidates];
4941
+ }
4942
+ const ordered = [...candidates].sort(compareSearchResults);
4943
+ const minK = Math.max(1, config.smart_cutoff_min_k);
4944
+ const maxK = Math.max(minK, config.smart_cutoff_max_k);
4945
+ const topScore = ordered[0]?.score ?? Number.NEGATIVE_INFINITY;
4946
+ const kept: SearchContextOutput["results"] = [];
4947
+
4948
+ for (let index = 0; index < ordered.length; index += 1) {
4949
+ const candidate = ordered[index];
4950
+ if (!candidate) {
4951
+ continue;
4952
+ }
4953
+ if (kept.length >= maxK) {
4954
+ break;
4955
+ }
4956
+ if (kept.length < minK) {
4957
+ kept.push(candidate);
4958
+ continue;
4959
+ }
4960
+ if (candidate.score < config.smart_cutoff_min_score) {
4961
+ break;
4962
+ }
4963
+ if (candidate.score < topScore * config.smart_cutoff_top_ratio) {
4964
+ break;
4965
+ }
4966
+ const previous = ordered[index - 1];
4967
+ if (previous && previous.score - candidate.score > config.smart_cutoff_delta_abs) {
4968
+ break;
4969
+ }
4970
+ kept.push(candidate);
4971
+ }
4972
+
4973
+ return kept;
4974
+ }
4975
+
4976
+ export function __applySmartCutoffCandidatesForTests(input: {
4977
+ candidates: SearchContextOutput["results"];
4978
+ config: RetrievalRerankConfig;
4979
+ }): SearchContextOutput["results"] {
4980
+ return applySmartCutoffCandidates(input.candidates, input.config);
4981
+ }
4982
+
3861
4983
  function dedupeEnhancerCandidatesByPath(results: SearchContextOutput["results"]): SearchContextOutput["results"] {
3862
4984
  const byPath = new Map<string, SearchContextOutput["results"][number]>();
3863
4985
  for (const result of results) {
@@ -4248,11 +5370,14 @@ function deterministicEnhancerFallbackRanking(input: {
4248
5370
  return [...preferred, ...tolerated, ...avoided];
4249
5371
  }
4250
5372
 
4251
- function trimToContextBudget(results: SearchContextOutput["results"]): SearchContextOutput["results"] {
5373
+ function trimToContextBudget(
5374
+ results: SearchContextOutput["results"],
5375
+ budgetTokenizerMode: "ranking" | "lightweight"
5376
+ ): SearchContextOutput["results"] {
4252
5377
  let total = 0;
4253
5378
  const out: SearchContextOutput["results"] = [];
4254
5379
  for (const result of results) {
4255
- total += tokenize(result.snippet).length;
5380
+ total += chunkBudgetTokenize(result.snippet, budgetTokenizerMode).length;
4256
5381
  if (total > MAX_CONTEXT_BUDGET_TOKENS) {
4257
5382
  break;
4258
5383
  }
@@ -4857,6 +5982,8 @@ export class RetrievalCore {
4857
5982
  private readonly enhancerConfig: RetrievalEnhancerConfig;
4858
5983
  private readonly enhancerGenerationConfig: RetrievalEnhancerGenerationConfig;
4859
5984
  private readonly chunkingConfig: RetrievalChunkingConfig;
5985
+ private readonly contextPackingConfig: RetrievalContextPackingConfig;
5986
+ private readonly snippetIntegrityConfig: RetrievalSnippetIntegrityConfig;
4860
5987
  private readonly enhancerDecisionTraceEnabled: boolean;
4861
5988
  private cacheHits = 0;
4862
5989
  private cacheMisses = 0;
@@ -4898,6 +6025,14 @@ export class RetrievalCore {
4898
6025
  options?.enhancerGenerationConfig
4899
6026
  );
4900
6027
  this.chunkingConfig = mergeRetrievalChunkingConfig(DEFAULT_RETRIEVAL_CHUNKING_CONFIG, options?.chunkingConfig);
6028
+ this.contextPackingConfig = mergeRetrievalContextPackingConfig(
6029
+ DEFAULT_RETRIEVAL_CONTEXT_PACKING_CONFIG,
6030
+ options?.contextPackingConfig
6031
+ );
6032
+ this.snippetIntegrityConfig = mergeRetrievalSnippetIntegrityConfig(
6033
+ DEFAULT_RETRIEVAL_SNIPPET_INTEGRITY_CONFIG,
6034
+ options?.snippetIntegrityConfig
6035
+ );
4901
6036
  this.enhancerDecisionTraceEnabled = Boolean(options?.enhancerDecisionTraceEnabled);
4902
6037
  }
4903
6038
 
@@ -5092,6 +6227,12 @@ export class RetrievalCore {
5092
6227
  language: chunkLanguage,
5093
6228
  reason: chunkBuild.fallback_reason ?? "none"
5094
6229
  });
6230
+ if (chunkBuild.recursive_semantic_chunking_used) {
6231
+ this.observability.metrics.increment("index_recursive_semantic_chunking_used_total", 1, {
6232
+ tenant_id: artifact.tenant_id,
6233
+ language: chunkLanguage
6234
+ });
6235
+ }
5095
6236
  if (chunkBuild.fallback_reason) {
5096
6237
  this.observability.metrics.increment("index_chunking_fallback_total", 1, {
5097
6238
  tenant_id: artifact.tenant_id,
@@ -5123,7 +6264,8 @@ export class RetrievalCore {
5123
6264
  reason: chunkBuild.fallback_reason
5124
6265
  });
5125
6266
  }
5126
- const estimatedEmbeddingTokens = chunks.reduce((sum, chunk) => sum + tokenize(chunk.snippet).length, 0);
6267
+ const embeddingTexts = buildChunkEmbeddingTexts(chunks, this.chunkingConfig, this.embeddingDescriptor.provider);
6268
+ const estimatedEmbeddingTokens = embeddingTexts.reduce((sum, text) => sum + tokenize(text).length, 0);
5127
6269
  this.observability.metrics.increment("index_embedding_tokens_total", estimatedEmbeddingTokens, {
5128
6270
  tenant_id: artifact.tenant_id
5129
6271
  });
@@ -5131,7 +6273,7 @@ export class RetrievalCore {
5131
6273
  chunks.length === 0
5132
6274
  ? []
5133
6275
  : await this.embeddingProvider.embed({
5134
- texts: chunks.map((chunk) => chunk.snippet),
6276
+ texts: embeddingTexts,
5135
6277
  purpose: "index"
5136
6278
  });
5137
6279
  if (embeddings.length !== chunks.length) {
@@ -5449,6 +6591,12 @@ export class RetrievalCore {
5449
6591
  language: chunkLanguage,
5450
6592
  reason: chunkBuild.fallback_reason ?? "none"
5451
6593
  });
6594
+ if (chunkBuild.recursive_semantic_chunking_used) {
6595
+ this.observability.metrics.increment("index_recursive_semantic_chunking_used_total", 1, {
6596
+ tenant_id: artifact.tenant_id,
6597
+ language: chunkLanguage
6598
+ });
6599
+ }
5452
6600
  if (chunkBuild.fallback_reason) {
5453
6601
  this.observability.metrics.increment("index_chunking_fallback_total", 1, {
5454
6602
  tenant_id: artifact.tenant_id,
@@ -5480,7 +6628,8 @@ export class RetrievalCore {
5480
6628
  reason: chunkBuild.fallback_reason
5481
6629
  });
5482
6630
  }
5483
- const estimatedEmbeddingTokens = chunks.reduce((sum, chunk) => sum + tokenize(chunk.snippet).length, 0);
6631
+ const embeddingTexts = buildChunkEmbeddingTexts(chunks, this.chunkingConfig, this.embeddingDescriptor.provider);
6632
+ const estimatedEmbeddingTokens = embeddingTexts.reduce((sum, text) => sum + tokenize(text).length, 0);
5484
6633
  this.observability.metrics.increment("index_embedding_tokens_total", estimatedEmbeddingTokens, {
5485
6634
  tenant_id: artifact.tenant_id
5486
6635
  });
@@ -5488,7 +6637,7 @@ export class RetrievalCore {
5488
6637
  chunks.length === 0
5489
6638
  ? []
5490
6639
  : await this.embeddingProvider.embed({
5491
- texts: chunks.map((chunk) => chunk.snippet),
6640
+ texts: embeddingTexts,
5492
6641
  purpose: "index"
5493
6642
  });
5494
6643
  if (embeddings.length !== chunks.length) {
@@ -5823,7 +6972,27 @@ export class RetrievalCore {
5823
6972
  query,
5824
6973
  top_k: topK,
5825
6974
  filters: input.request.filters,
5826
- retrieval_variant: this.rerankerCacheVariant
6975
+ retrieval_variant: `${this.rerankerCacheVariant}|context_pack:${
6976
+ this.contextPackingConfig.enabled ? "on" : "off"
6977
+ }|context_pack_spans:${this.contextPackingConfig.max_spans_per_result}|context_pack_gap:${this.contextPackingConfig.max_gap_lines}|snippet_integrity:${
6978
+ this.snippetIntegrityConfig.enabled ? "on" : "off"
6979
+ }|snippet_integrity_gap:${this.snippetIntegrityConfig.max_contiguous_gap_lines}|snippet_integrity_langs:${this.snippetIntegrityConfig.target_languages.join(
6980
+ ","
6981
+ )}|snippet_repair:${this.snippetIntegrityConfig.repair_enabled ? "on" : "off"}|snippet_repair_env:${
6982
+ this.snippetIntegrityConfig.repair_max_envelope_lines
6983
+ }|snippet_repair_chars:${this.snippetIntegrityConfig.repair_max_snippet_chars}|chunk_recursive:${
6984
+ this.chunkingConfig.recursive_semantic_chunking_enabled ? "on" : "off"
6985
+ }|chunk_semantic_gap:${this.chunkingConfig.semantic_merge_gap_lines}|chunk_semantic_span:${
6986
+ this.chunkingConfig.semantic_merge_max_span_lines
6987
+ }|chunk_comment_absorb:${this.chunkingConfig.comment_forward_absorb_enabled ? "on" : "off"}|chunk_embed_prefix:${
6988
+ this.chunkingConfig.embedding_context_prefix_enabled ? "on" : "off"
6989
+ }|smart_cutoff:${this.scoringConfig.rerank.smart_cutoff_enabled ? "on" : "off"}|smart_cutoff_min_k:${
6990
+ this.scoringConfig.rerank.smart_cutoff_min_k
6991
+ }|smart_cutoff_max_k:${this.scoringConfig.rerank.smart_cutoff_max_k}|smart_cutoff_min_score:${
6992
+ this.scoringConfig.rerank.smart_cutoff_min_score
6993
+ }|smart_cutoff_top_ratio:${this.scoringConfig.rerank.smart_cutoff_top_ratio}|smart_cutoff_delta_abs:${
6994
+ this.scoringConfig.rerank.smart_cutoff_delta_abs
6995
+ }`
5827
6996
  });
5828
6997
 
5829
6998
  const cached = await this.cache.get(cacheKey);
@@ -5988,17 +7157,47 @@ export class RetrievalCore {
5988
7157
  })
5989
7158
  );
5990
7159
 
7160
+ const consolidatedCandidates = await this.observability.tracing.withSpan(
7161
+ "retrieval.overlap_merge",
7162
+ { trace_id: input.trace_id },
7163
+ async () => mergeOverlappingCandidates(rerankedCandidates, this.scoringConfig.rerank)
7164
+ );
7165
+ this.observability.metrics.observe("retrieval_candidates_post_overlap_merge_count", consolidatedCandidates.length, {
7166
+ retrieval_profile_id: this.scoringProfileId
7167
+ });
7168
+ const mergedCandidateCount = Math.max(0, rerankedCandidates.length - consolidatedCandidates.length);
7169
+ if (mergedCandidateCount > 0) {
7170
+ this.observability.metrics.increment("retrieval_overlap_candidates_merged_total", mergedCandidateCount, {
7171
+ retrieval_profile_id: this.scoringProfileId
7172
+ });
7173
+ }
7174
+ const cutoffCandidates = await this.observability.tracing.withSpan(
7175
+ "retrieval.smart_cutoff",
7176
+ { trace_id: input.trace_id },
7177
+ async () => applySmartCutoffCandidates(consolidatedCandidates, this.scoringConfig.rerank)
7178
+ );
7179
+ if (this.scoringConfig.rerank.smart_cutoff_enabled) {
7180
+ this.observability.metrics.increment("retrieval_smart_cutoff_applied_total", 1, {
7181
+ retrieval_profile_id: this.scoringProfileId
7182
+ });
7183
+ const droppedCount = Math.max(0, consolidatedCandidates.length - cutoffCandidates.length);
7184
+ this.observability.metrics.increment("retrieval_smart_cutoff_drop_count", droppedCount, {
7185
+ retrieval_profile_id: this.scoringProfileId
7186
+ });
7187
+ }
7188
+
5991
7189
  const deduped = await this.observability.tracing.withSpan("retrieval.rerank", { trace_id: input.trace_id }, async () => {
5992
7190
  const output: SearchContextOutput["results"] = [];
5993
7191
  const seen = new Set<string>();
5994
7192
  const pathCounts = new Map<string, number>();
7193
+ const selectedRangesByPath = new Map<string, Array<{ start_line: number; end_line: number }>>();
5995
7194
  const directoryCounts = new Map<string, number>();
5996
7195
  const extensionCounts = new Map<string, number>();
5997
7196
  const maxChunksPerPath = hasFileLookupIntent(queryTokens)
5998
7197
  ? this.scoringConfig.rerank.max_chunks_per_path_file_lookup
5999
7198
  : this.scoringConfig.rerank.max_chunks_per_path_default;
6000
7199
 
6001
- const available = [...rerankedCandidates];
7200
+ const available = [...cutoffCandidates];
6002
7201
  while (output.length < topK && available.length > 0) {
6003
7202
  let bestIndex = -1;
6004
7203
  let bestAdjustedScore = Number.NEGATIVE_INFINITY;
@@ -6018,6 +7217,12 @@ export class RetrievalCore {
6018
7217
  if (pathCount >= maxChunksPerPath) {
6019
7218
  continue;
6020
7219
  }
7220
+ if (this.scoringConfig.rerank.merge_overlapping_chunks_enabled && pathCount > 0) {
7221
+ const selectedRanges = selectedRangesByPath.get(candidate.path) ?? [];
7222
+ if (isHeavilyOverlappingLineRange(candidate, selectedRanges)) {
7223
+ continue;
7224
+ }
7225
+ }
6021
7226
 
6022
7227
  const directoryKey = parentDirectory(candidate.path).toLowerCase();
6023
7228
  const extensionKey = fileExtension(candidate.path);
@@ -6057,6 +7262,12 @@ export class RetrievalCore {
6057
7262
  const selectedKey = `${selected.path}:${selected.start_line}:${selected.end_line}`;
6058
7263
  seen.add(selectedKey);
6059
7264
  pathCounts.set(selected.path, (pathCounts.get(selected.path) ?? 0) + 1);
7265
+ const selectedRanges = selectedRangesByPath.get(selected.path);
7266
+ if (selectedRanges) {
7267
+ selectedRanges.push({ start_line: selected.start_line, end_line: selected.end_line });
7268
+ } else {
7269
+ selectedRangesByPath.set(selected.path, [{ start_line: selected.start_line, end_line: selected.end_line }]);
7270
+ }
6060
7271
  const selectedDirectory = parentDirectory(selected.path).toLowerCase();
6061
7272
  const selectedExtension = fileExtension(selected.path);
6062
7273
  directoryCounts.set(selectedDirectory, (directoryCounts.get(selectedDirectory) ?? 0) + 1);
@@ -6067,8 +7278,8 @@ export class RetrievalCore {
6067
7278
  });
6068
7279
 
6069
7280
  const candidateRankByKey = new Map<string, number>();
6070
- for (let index = 0; index < rerankedCandidates.length; index += 1) {
6071
- const candidate = rerankedCandidates[index];
7281
+ for (let index = 0; index < cutoffCandidates.length; index += 1) {
7282
+ const candidate = cutoffCandidates[index];
6072
7283
  if (!candidate) {
6073
7284
  continue;
6074
7285
  }
@@ -6100,10 +7311,22 @@ export class RetrievalCore {
6100
7311
  this.observability.metrics.observe("retrieval_literal_matches_topk", literalMatchesInTopK, {
6101
7312
  retrieval_profile_id: this.scoringProfileId
6102
7313
  });
7314
+ const packedResults = packSearchResultsWithContext({
7315
+ selected: deduped,
7316
+ sourceCandidates: cutoffCandidates,
7317
+ config: this.contextPackingConfig
7318
+ });
7319
+ const assembledResults = annotateSearchResultsWithSnippetIntegrity({
7320
+ selected: packedResults,
7321
+ sourceCandidates: cutoffCandidates,
7322
+ config: this.snippetIntegrityConfig,
7323
+ observability: this.observability,
7324
+ retrievalProfileId: this.scoringProfileId
7325
+ });
6103
7326
 
6104
7327
  const output: SearchContextOutput = {
6105
7328
  trace_id: input.trace_id,
6106
- results: deduped,
7329
+ results: assembledResults,
6107
7330
  search_metadata: {
6108
7331
  latency_ms: Date.now() - searchStartedAt,
6109
7332
  retrieval_mode: "hybrid" satisfies RetrievalMode,
@@ -6111,7 +7334,7 @@ export class RetrievalCore {
6111
7334
  }
6112
7335
  };
6113
7336
 
6114
- this.observability.metrics.observe("retrieval_topk_hit_proxy", deduped.length > 0 ? 1 : 0, {
7337
+ this.observability.metrics.observe("retrieval_topk_hit_proxy", assembledResults.length > 0 ? 1 : 0, {
6115
7338
  retrieval_profile_id: this.scoringProfileId
6116
7339
  });
6117
7340
  this.observability.logger.info("search_context completed", {
@@ -6138,6 +7361,7 @@ export class RetrievalCore {
6138
7361
 
6139
7362
  private buildEnhancerContextSnippets(results: SearchContextOutput["results"]): EnhancerContextSnippet[] {
6140
7363
  const maxSnippets = this.enhancerGenerationConfig.max_context_snippets;
7364
+ const snippetCharLimit = this.contextPackingConfig.enabled ? this.contextPackingConfig.enhancer_snippet_char_limit : 1_600;
6141
7365
  const snippets: EnhancerContextSnippet[] = [];
6142
7366
  for (const result of results.slice(0, maxSnippets)) {
6143
7367
  snippets.push({
@@ -6145,7 +7369,7 @@ export class RetrievalCore {
6145
7369
  start_line: result.start_line,
6146
7370
  end_line: result.end_line,
6147
7371
  reason: result.reason,
6148
- snippet: result.snippet.slice(0, 1_600),
7372
+ snippet: result.snippet.slice(0, snippetCharLimit),
6149
7373
  score: result.score
6150
7374
  });
6151
7375
  }
@@ -6320,7 +7544,10 @@ export class RetrievalCore {
6320
7544
  top_k: MAX_TOP_K
6321
7545
  }
6322
7546
  });
6323
- const budgetedResults = trimToContextBudget(retrieval.results);
7547
+ const budgetedResults = trimToContextBudget(
7548
+ retrieval.results,
7549
+ this.contextPackingConfig.enabled ? "lightweight" : "ranking"
7550
+ );
6324
7551
  const dedupedByPath = dedupeEnhancerCandidatesByPath(budgetedResults);
6325
7552
  const collapsedByDirectory = collapseEnhancerCandidatesByDirectory(
6326
7553
  dedupedByPath,