@rce-mcp/retrieval-core 0.1.2 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -78,7 +78,16 @@ export const BASELINE_RETRIEVAL_SCORING_CONFIG = {
78
78
  max_chunks_per_path_default: 2,
79
79
  max_chunks_per_path_file_lookup: 1,
80
80
  same_directory_penalty: 0,
81
- same_extension_penalty: 0
81
+ same_extension_penalty: 0,
82
+ merge_overlapping_chunks_enabled: true,
83
+ merge_gap_lines: 6,
84
+ merge_max_span_lines: 220,
85
+ smart_cutoff_enabled: false,
86
+ smart_cutoff_min_k: 2,
87
+ smart_cutoff_max_k: 8,
88
+ smart_cutoff_min_score: 0.25,
89
+ smart_cutoff_top_ratio: 0.5,
90
+ smart_cutoff_delta_abs: 0.25
82
91
  }
83
92
  };
84
93
  export const CONSERVATIVE_RETRIEVAL_SCORING_CONFIG = {
@@ -129,7 +138,16 @@ export const CONSERVATIVE_RETRIEVAL_SCORING_CONFIG = {
129
138
  max_chunks_per_path_default: 2,
130
139
  max_chunks_per_path_file_lookup: 1,
131
140
  same_directory_penalty: 0,
132
- same_extension_penalty: 0
141
+ same_extension_penalty: 0,
142
+ merge_overlapping_chunks_enabled: true,
143
+ merge_gap_lines: 6,
144
+ merge_max_span_lines: 220,
145
+ smart_cutoff_enabled: false,
146
+ smart_cutoff_min_k: 2,
147
+ smart_cutoff_max_k: 8,
148
+ smart_cutoff_min_score: 0.25,
149
+ smart_cutoff_top_ratio: 0.5,
150
+ smart_cutoff_delta_abs: 0.25
133
151
  }
134
152
  };
135
153
  export const DEFAULT_RETRIEVAL_ENHANCER_CONFIG = {
@@ -148,8 +166,31 @@ export const DEFAULT_RETRIEVAL_CHUNKING_CONFIG = {
148
166
  fallback_strategy: "sliding",
149
167
  target_chunk_tokens: DEFAULT_TARGET_CHUNK_TOKENS,
150
168
  chunk_overlap_tokens: DEFAULT_CHUNK_OVERLAP_TOKENS,
169
+ budget_tokenizer: "ranking",
170
+ boundary_strictness: "legacy",
151
171
  parse_timeout_ms: 80,
152
- enabled_languages: ["typescript", "javascript", "python", "go"]
172
+ enabled_languages: ["typescript", "javascript", "python", "go"],
173
+ recursive_semantic_chunking_enabled: false,
174
+ semantic_merge_gap_lines: 6,
175
+ semantic_merge_max_span_lines: 220,
176
+ comment_forward_absorb_enabled: true,
177
+ embedding_context_prefix_enabled: true
178
+ };
179
+ export const DEFAULT_RETRIEVAL_CONTEXT_PACKING_CONFIG = {
180
+ enabled: false,
181
+ max_spans_per_result: 3,
182
+ max_gap_lines: 120,
183
+ max_snippet_chars: 3_200,
184
+ enhancer_snippet_char_limit: 2_200
185
+ };
186
+ export const DEFAULT_RETRIEVAL_SNIPPET_INTEGRITY_CONFIG = {
187
+ enabled: false,
188
+ target_languages: ["typescript", "tsx", "javascript", "jsx"],
189
+ max_contiguous_gap_lines: 6,
190
+ marker_template_version: "v1",
191
+ repair_enabled: false,
192
+ repair_max_envelope_lines: 260,
193
+ repair_max_snippet_chars: 3_600
153
194
  };
154
195
  const BUILTIN_RETRIEVAL_SCORING_PROFILES = {
155
196
  baseline: BASELINE_RETRIEVAL_SCORING_CONFIG,
@@ -202,6 +243,36 @@ function validateScoringConfig(config) {
202
243
  if (rerank.same_extension_penalty < 0) {
203
244
  throw new Error("invalid retrieval scoring config: rerank.same_extension_penalty must be >= 0");
204
245
  }
246
+ if (typeof rerank.merge_overlapping_chunks_enabled !== "boolean") {
247
+ throw new Error("invalid retrieval scoring config: rerank.merge_overlapping_chunks_enabled must be boolean");
248
+ }
249
+ if (!Number.isInteger(rerank.merge_gap_lines) || rerank.merge_gap_lines < 0) {
250
+ throw new Error("invalid retrieval scoring config: rerank.merge_gap_lines must be an integer >= 0");
251
+ }
252
+ if (!Number.isInteger(rerank.merge_max_span_lines) || rerank.merge_max_span_lines <= 0) {
253
+ throw new Error("invalid retrieval scoring config: rerank.merge_max_span_lines must be a positive integer");
254
+ }
255
+ if (typeof rerank.smart_cutoff_enabled !== "boolean") {
256
+ throw new Error("invalid retrieval scoring config: rerank.smart_cutoff_enabled must be boolean");
257
+ }
258
+ if (!Number.isInteger(rerank.smart_cutoff_min_k) || rerank.smart_cutoff_min_k <= 0) {
259
+ throw new Error("invalid retrieval scoring config: rerank.smart_cutoff_min_k must be a positive integer");
260
+ }
261
+ if (!Number.isInteger(rerank.smart_cutoff_max_k) || rerank.smart_cutoff_max_k <= 0) {
262
+ throw new Error("invalid retrieval scoring config: rerank.smart_cutoff_max_k must be a positive integer");
263
+ }
264
+ if (rerank.smart_cutoff_max_k < rerank.smart_cutoff_min_k) {
265
+ throw new Error("invalid retrieval scoring config: rerank.smart_cutoff_max_k must be >= smart_cutoff_min_k");
266
+ }
267
+ assertFiniteNumber(rerank.smart_cutoff_min_score, "rerank.smart_cutoff_min_score");
268
+ assertFiniteNumber(rerank.smart_cutoff_top_ratio, "rerank.smart_cutoff_top_ratio");
269
+ assertFiniteNumber(rerank.smart_cutoff_delta_abs, "rerank.smart_cutoff_delta_abs");
270
+ if (rerank.smart_cutoff_top_ratio <= 0 || rerank.smart_cutoff_top_ratio > 1) {
271
+ throw new Error("invalid retrieval scoring config: rerank.smart_cutoff_top_ratio must be in (0, 1]");
272
+ }
273
+ if (rerank.smart_cutoff_delta_abs < 0) {
274
+ throw new Error("invalid retrieval scoring config: rerank.smart_cutoff_delta_abs must be >= 0");
275
+ }
205
276
  }
206
277
  export function resolveRetrievalScoringProfile(profile_id) {
207
278
  const normalized = (profile_id ?? "baseline").trim().toLowerCase();
@@ -307,6 +378,12 @@ function validateChunkingConfig(config) {
307
378
  if (!Number.isInteger(config.parse_timeout_ms) || config.parse_timeout_ms <= 0) {
308
379
  throw new Error("invalid retrieval chunking config: parse_timeout_ms must be a positive integer");
309
380
  }
381
+ if (config.budget_tokenizer !== "ranking" && config.budget_tokenizer !== "lightweight") {
382
+ throw new Error("invalid retrieval chunking config: budget_tokenizer must be ranking|lightweight");
383
+ }
384
+ if (config.boundary_strictness !== "legacy" && config.boundary_strictness !== "semantic_js_ts") {
385
+ throw new Error("invalid retrieval chunking config: boundary_strictness must be legacy|semantic_js_ts");
386
+ }
310
387
  if (!Array.isArray(config.enabled_languages) || config.enabled_languages.length === 0) {
311
388
  throw new Error("invalid retrieval chunking config: enabled_languages must include at least one language");
312
389
  }
@@ -315,6 +392,21 @@ function validateChunkingConfig(config) {
315
392
  throw new Error("invalid retrieval chunking config: enabled_languages must contain non-empty strings");
316
393
  }
317
394
  }
395
+ if (typeof config.recursive_semantic_chunking_enabled !== "boolean") {
396
+ throw new Error("invalid retrieval chunking config: recursive_semantic_chunking_enabled must be boolean");
397
+ }
398
+ if (!Number.isInteger(config.semantic_merge_gap_lines) || config.semantic_merge_gap_lines < 0) {
399
+ throw new Error("invalid retrieval chunking config: semantic_merge_gap_lines must be a non-negative integer");
400
+ }
401
+ if (!Number.isInteger(config.semantic_merge_max_span_lines) || config.semantic_merge_max_span_lines <= 0) {
402
+ throw new Error("invalid retrieval chunking config: semantic_merge_max_span_lines must be a positive integer");
403
+ }
404
+ if (typeof config.comment_forward_absorb_enabled !== "boolean") {
405
+ throw new Error("invalid retrieval chunking config: comment_forward_absorb_enabled must be boolean");
406
+ }
407
+ if (typeof config.embedding_context_prefix_enabled !== "boolean") {
408
+ throw new Error("invalid retrieval chunking config: embedding_context_prefix_enabled must be boolean");
409
+ }
318
410
  }
319
411
  export function mergeRetrievalChunkingConfig(base, overrides) {
320
412
  const next = {
@@ -325,6 +417,98 @@ export function mergeRetrievalChunkingConfig(base, overrides) {
325
417
  validateChunkingConfig(next);
326
418
  return next;
327
419
  }
420
+ function validateContextPackingConfig(config) {
421
+ if (typeof config.enabled !== "boolean") {
422
+ throw new Error("invalid retrieval context packing config: enabled must be boolean");
423
+ }
424
+ if (!Number.isInteger(config.max_spans_per_result) || config.max_spans_per_result <= 0) {
425
+ throw new Error("invalid retrieval context packing config: max_spans_per_result must be a positive integer");
426
+ }
427
+ if (!Number.isInteger(config.max_gap_lines) || config.max_gap_lines < 0) {
428
+ throw new Error("invalid retrieval context packing config: max_gap_lines must be a non-negative integer");
429
+ }
430
+ if (!Number.isInteger(config.max_snippet_chars) || config.max_snippet_chars <= 0) {
431
+ throw new Error("invalid retrieval context packing config: max_snippet_chars must be a positive integer");
432
+ }
433
+ if (!Number.isInteger(config.enhancer_snippet_char_limit) || config.enhancer_snippet_char_limit <= 0) {
434
+ throw new Error("invalid retrieval context packing config: enhancer_snippet_char_limit must be a positive integer");
435
+ }
436
+ }
437
+ export function mergeRetrievalContextPackingConfig(base, overrides) {
438
+ const next = {
439
+ ...base,
440
+ ...(overrides ?? {})
441
+ };
442
+ validateContextPackingConfig(next);
443
+ return next;
444
+ }
445
+ function normalizeSnippetIntegrityLanguage(value) {
446
+ const normalized = value.trim().toLowerCase();
447
+ if (normalized === "typescript" || normalized === "ts" || normalized === "mts" || normalized === "cts") {
448
+ return "typescript";
449
+ }
450
+ if (normalized === "tsx") {
451
+ return "tsx";
452
+ }
453
+ if (normalized === "javascript" || normalized === "js" || normalized === "mjs" || normalized === "cjs") {
454
+ return "javascript";
455
+ }
456
+ if (normalized === "jsx") {
457
+ return "jsx";
458
+ }
459
+ return undefined;
460
+ }
461
+ function normalizeSnippetIntegrityLanguageList(value) {
462
+ const deduped = new Set();
463
+ for (const language of value) {
464
+ const raw = language.trim().toLowerCase();
465
+ if (raw.length === 0) {
466
+ continue;
467
+ }
468
+ deduped.add(normalizeSnippetIntegrityLanguage(raw) ?? raw);
469
+ }
470
+ return [...deduped];
471
+ }
472
+ function validateSnippetIntegrityConfig(config) {
473
+ if (typeof config.enabled !== "boolean") {
474
+ throw new Error("invalid retrieval snippet integrity config: enabled must be boolean");
475
+ }
476
+ if (!Array.isArray(config.target_languages) || config.target_languages.length === 0) {
477
+ throw new Error("invalid retrieval snippet integrity config: target_languages must include at least one language");
478
+ }
479
+ for (const language of config.target_languages) {
480
+ if (typeof language !== "string" || language.trim().length === 0) {
481
+ throw new Error("invalid retrieval snippet integrity config: target_languages must contain non-empty strings");
482
+ }
483
+ if (!normalizeSnippetIntegrityLanguage(language)) {
484
+ throw new Error("invalid retrieval snippet integrity config: unsupported target language");
485
+ }
486
+ }
487
+ if (!Number.isInteger(config.max_contiguous_gap_lines) || config.max_contiguous_gap_lines < 0) {
488
+ throw new Error("invalid retrieval snippet integrity config: max_contiguous_gap_lines must be a non-negative integer");
489
+ }
490
+ if (config.marker_template_version !== "v1") {
491
+ throw new Error("invalid retrieval snippet integrity config: marker_template_version must be v1");
492
+ }
493
+ if (typeof config.repair_enabled !== "boolean") {
494
+ throw new Error("invalid retrieval snippet integrity config: repair_enabled must be boolean");
495
+ }
496
+ if (!Number.isInteger(config.repair_max_envelope_lines) || config.repair_max_envelope_lines <= 0) {
497
+ throw new Error("invalid retrieval snippet integrity config: repair_max_envelope_lines must be a positive integer");
498
+ }
499
+ if (!Number.isInteger(config.repair_max_snippet_chars) || config.repair_max_snippet_chars <= 0) {
500
+ throw new Error("invalid retrieval snippet integrity config: repair_max_snippet_chars must be a positive integer");
501
+ }
502
+ }
503
+ export function mergeRetrievalSnippetIntegrityConfig(base, overrides) {
504
+ const next = {
505
+ ...base,
506
+ ...(overrides ?? {}),
507
+ target_languages: normalizeSnippetIntegrityLanguageList(overrides?.target_languages ?? base.target_languages)
508
+ };
509
+ validateSnippetIntegrityConfig(next);
510
+ return next;
511
+ }
328
512
  function stableSerialize(value) {
329
513
  if (Array.isArray(value)) {
330
514
  return `[${value.map((entry) => stableSerialize(entry)).join(",")}]`;
@@ -422,9 +606,22 @@ function singularizeToken(token) {
422
606
  }
423
607
  return undefined;
424
608
  }
609
+ function tokenizeLightweight(text) {
610
+ return text
611
+ .normalize("NFKC")
612
+ .split(/[^A-Za-z0-9_]+/)
613
+ .map((token) => token.trim().toLowerCase())
614
+ .filter(Boolean);
615
+ }
425
616
  function tokenize(text) {
426
617
  return tokenizeForRanking(text);
427
618
  }
619
+ function chunkBudgetTokenize(text, mode) {
620
+ if (mode === "lightweight") {
621
+ return tokenizeLightweight(text);
622
+ }
623
+ return tokenize(text);
624
+ }
428
625
  function lexicalScore(query, haystack) {
429
626
  const q = new Set(tokenize(query));
430
627
  if (q.size === 0) {
@@ -848,11 +1045,17 @@ function buildChunks(file, chunkingConfig) {
848
1045
  fallback_strategy: chunkingConfig.fallback_strategy,
849
1046
  target_chunk_tokens: chunkingConfig.target_chunk_tokens,
850
1047
  chunk_overlap_tokens: chunkingConfig.chunk_overlap_tokens,
1048
+ budget_tokenizer: chunkingConfig.budget_tokenizer,
1049
+ boundary_strictness: chunkingConfig.boundary_strictness,
851
1050
  max_chunks_per_file: MAX_CHUNKS_PER_FILE,
852
1051
  parse_timeout_ms: chunkingConfig.parse_timeout_ms,
853
- enabled_languages: chunkingConfig.enabled_languages
1052
+ enabled_languages: chunkingConfig.enabled_languages,
1053
+ recursive_semantic_chunking_enabled: chunkingConfig.recursive_semantic_chunking_enabled,
1054
+ semantic_merge_gap_lines: chunkingConfig.semantic_merge_gap_lines,
1055
+ semantic_merge_max_span_lines: chunkingConfig.semantic_merge_max_span_lines,
1056
+ comment_forward_absorb_enabled: chunkingConfig.comment_forward_absorb_enabled
854
1057
  },
855
- tokenize
1058
+ tokenize: (text) => chunkBudgetTokenize(text, chunkingConfig.budget_tokenizer)
856
1059
  });
857
1060
  return {
858
1061
  chunks: chunkingResult.chunks.map((chunk) => ({
@@ -870,9 +1073,27 @@ function buildChunks(file, chunkingConfig) {
870
1073
  parse_latency_ms: chunkingResult.parse_latency_ms,
871
1074
  language_aware_attempt_latency_ms: chunkingResult.language_aware_attempt_latency_ms,
872
1075
  fallback_path_latency_ms: chunkingResult.fallback_path_latency_ms,
873
- language: chunkingResult.language
1076
+ language: chunkingResult.language,
1077
+ recursive_semantic_chunking_used: chunkingResult.recursive_semantic_chunking_used
874
1078
  };
875
1079
  }
1080
+ function buildChunkEmbeddingText(chunk, config, embeddingProviderId) {
1081
+ const isDeterministicProvider = embeddingProviderId.trim().toLowerCase() === "deterministic";
1082
+ if (!config.embedding_context_prefix_enabled || isDeterministicProvider) {
1083
+ return chunk.snippet;
1084
+ }
1085
+ const normalizedPath = normalizePath(chunk.path);
1086
+ const pathParts = normalizedPath.split("/").filter(Boolean);
1087
+ const contextPath = pathParts.length > 2 ? pathParts.slice(-2).join("/") : normalizedPath;
1088
+ const symbol = detectSnippetSymbolName(chunk.snippet);
1089
+ const linesLabel = `${chunk.start_line}-${chunk.end_line}`;
1090
+ const symbolLabel = symbol ? ` > ${symbol}` : "";
1091
+ const prefix = `${contextPath}:${linesLabel}${symbolLabel}`;
1092
+ return `${prefix}\n${chunk.snippet}`;
1093
+ }
1094
+ function buildChunkEmbeddingTexts(chunks, config, embeddingProviderId) {
1095
+ return chunks.map((chunk) => buildChunkEmbeddingText(chunk, config, embeddingProviderId));
1096
+ }
876
1097
  function pseudoEmbedding(input, dimensions = 24) {
877
1098
  const safeDimensions = Math.max(1, dimensions);
878
1099
  let source = sha256(input);
@@ -2978,6 +3199,693 @@ function compareSearchResults(a, b) {
2978
3199
  }
2979
3200
  return a.end_line - b.end_line;
2980
3201
  }
3202
+ function compareSearchResultsByLineRange(a, b) {
3203
+ if (a.start_line !== b.start_line) {
3204
+ return a.start_line - b.start_line;
3205
+ }
3206
+ if (a.end_line !== b.end_line) {
3207
+ return a.end_line - b.end_line;
3208
+ }
3209
+ return compareSearchResults(a, b);
3210
+ }
3211
+ function mergeSnippetCluster(cluster, mergedStartLine, mergedEndLine) {
3212
+ const byRelevance = [...cluster].sort(compareSearchResults);
3213
+ const primary = byRelevance[0];
3214
+ if (!primary) {
3215
+ return "";
3216
+ }
3217
+ const lineMap = new Map();
3218
+ for (let rank = 0; rank < byRelevance.length; rank += 1) {
3219
+ const candidate = byRelevance[rank];
3220
+ if (!candidate) {
3221
+ continue;
3222
+ }
3223
+ const lines = candidate.snippet.replace(/\r\n/g, "\n").split("\n");
3224
+ const expectedLineCount = Math.max(1, candidate.end_line - candidate.start_line + 1);
3225
+ const maxLines = Math.min(lines.length, expectedLineCount);
3226
+ for (let offset = 0; offset < maxLines; offset += 1) {
3227
+ const lineNumber = candidate.start_line + offset;
3228
+ if (lineNumber < mergedStartLine || lineNumber > mergedEndLine) {
3229
+ continue;
3230
+ }
3231
+ const text = lines[offset];
3232
+ if (typeof text !== "string") {
3233
+ continue;
3234
+ }
3235
+ const existing = lineMap.get(lineNumber);
3236
+ if (!existing || candidate.score > existing.score + 1e-9 || (Math.abs(candidate.score - existing.score) <= 1e-9 && rank < existing.rank)) {
3237
+ lineMap.set(lineNumber, { text, score: candidate.score, rank });
3238
+ }
3239
+ }
3240
+ }
3241
+ const mergedLines = [];
3242
+ let missingLines = 0;
3243
+ for (let line = mergedStartLine; line <= mergedEndLine; line += 1) {
3244
+ const entry = lineMap.get(line);
3245
+ if (!entry) {
3246
+ missingLines += 1;
3247
+ mergedLines.push("");
3248
+ continue;
3249
+ }
3250
+ mergedLines.push(entry.text);
3251
+ }
3252
+ const totalLines = Math.max(1, mergedEndLine - mergedStartLine + 1);
3253
+ const maxMissingLines = Math.max(2, Math.floor(totalLines * 0.2));
3254
+ if (missingLines > maxMissingLines) {
3255
+ return primary.snippet;
3256
+ }
3257
+ return mergedLines.join("\n");
3258
+ }
3259
+ function mergeCandidateCluster(cluster) {
3260
+ if (cluster.length === 0) {
3261
+ throw new Error("mergeCandidateCluster requires at least one candidate");
3262
+ }
3263
+ if (cluster.length === 1) {
3264
+ return cluster[0];
3265
+ }
3266
+ const byRelevance = [...cluster].sort(compareSearchResults);
3267
+ const primary = byRelevance[0];
3268
+ const mergedStartLine = Math.min(...cluster.map((candidate) => candidate.start_line));
3269
+ const mergedEndLine = Math.max(...cluster.map((candidate) => candidate.end_line));
3270
+ const stitchedSnippet = mergeSnippetCluster(cluster, mergedStartLine, mergedEndLine);
3271
+ return {
3272
+ ...primary,
3273
+ start_line: mergedStartLine,
3274
+ end_line: mergedEndLine,
3275
+ snippet: stitchedSnippet.length > 0 ? stitchedSnippet : primary.snippet
3276
+ };
3277
+ }
3278
+ const HEAVY_LINE_RANGE_OVERLAP_RATIO = 0.2;
3279
+ function lineRangeLength(startLine, endLine) {
3280
+ return Math.max(1, endLine - startLine + 1);
3281
+ }
3282
+ function lineRangeOverlapLength(aStartLine, aEndLine, bStartLine, bEndLine) {
3283
+ const start = Math.max(aStartLine, bStartLine);
3284
+ const end = Math.min(aEndLine, bEndLine);
3285
+ if (end < start) {
3286
+ return 0;
3287
+ }
3288
+ return end - start + 1;
3289
+ }
3290
+ function isHeavilyOverlappingLineRange(candidate, selectedRanges) {
3291
+ for (const selected of selectedRanges) {
3292
+ const overlapLength = lineRangeOverlapLength(selected.start_line, selected.end_line, candidate.start_line, candidate.end_line);
3293
+ if (overlapLength <= 0) {
3294
+ continue;
3295
+ }
3296
+ const smallerRange = Math.min(lineRangeLength(selected.start_line, selected.end_line), lineRangeLength(candidate.start_line, candidate.end_line));
3297
+ const overlapRatio = overlapLength / Math.max(1, smallerRange);
3298
+ if (overlapRatio >= HEAVY_LINE_RANGE_OVERLAP_RATIO) {
3299
+ return true;
3300
+ }
3301
+ }
3302
+ return false;
3303
+ }
3304
+ function mergeLineSpans(spans) {
3305
+ if (spans.length <= 1) {
3306
+ return [...spans];
3307
+ }
3308
+ const ordered = [...spans]
3309
+ .filter((span) => span.end_line >= span.start_line)
3310
+ .sort((a, b) => a.start_line - b.start_line || a.end_line - b.end_line);
3311
+ const merged = [];
3312
+ for (const span of ordered) {
3313
+ const last = merged[merged.length - 1];
3314
+ if (!last || span.start_line > last.end_line + 1) {
3315
+ merged.push({ ...span });
3316
+ continue;
3317
+ }
3318
+ last.end_line = Math.max(last.end_line, span.end_line);
3319
+ }
3320
+ return merged;
3321
+ }
3322
+ function lineRangeGap(anchor, candidate) {
3323
+ if (candidate.start_line > anchor.end_line) {
3324
+ return candidate.start_line - anchor.end_line - 1;
3325
+ }
3326
+ if (anchor.start_line > candidate.end_line) {
3327
+ return anchor.start_line - candidate.end_line - 1;
3328
+ }
3329
+ return 0;
3330
+ }
3331
+ function buildPreferredLineMap(candidates) {
3332
+ const byRelevance = [...candidates].sort(compareSearchResults);
3333
+ const lineMap = new Map();
3334
+ for (let rank = 0; rank < byRelevance.length; rank += 1) {
3335
+ const candidate = byRelevance[rank];
3336
+ if (!candidate) {
3337
+ continue;
3338
+ }
3339
+ const lines = candidate.snippet.replace(/\r\n/g, "\n").split("\n");
3340
+ const expectedLineCount = Math.max(1, candidate.end_line - candidate.start_line + 1);
3341
+ const maxLines = Math.min(lines.length, expectedLineCount);
3342
+ for (let offset = 0; offset < maxLines; offset += 1) {
3343
+ const lineNumber = candidate.start_line + offset;
3344
+ const text = lines[offset];
3345
+ if (typeof text !== "string") {
3346
+ continue;
3347
+ }
3348
+ const existing = lineMap.get(lineNumber);
3349
+ if (!existing || candidate.score > existing.score + 1e-9 || (Math.abs(candidate.score - existing.score) <= 1e-9 && rank < existing.rank)) {
3350
+ lineMap.set(lineNumber, { text, score: candidate.score, rank });
3351
+ }
3352
+ }
3353
+ }
3354
+ return new Map([...lineMap.entries()].map(([line, value]) => [line, value.text]));
3355
+ }
3356
+ function clipSnippetToMaxChars(snippet, maxChars) {
3357
+ if (snippet.length <= maxChars) {
3358
+ return snippet;
3359
+ }
3360
+ const clipped = snippet.slice(0, Math.max(0, maxChars));
3361
+ const lastNewline = clipped.lastIndexOf("\n");
3362
+ if (lastNewline > 80) {
3363
+ return clipped.slice(0, lastNewline).trimEnd();
3364
+ }
3365
+ return clipped.trimEnd();
3366
+ }
3367
+ function snippetIntegrityLanguageFromPath(path) {
3368
+ const extension = fileExtension(path);
3369
+ if (extension === ".ts" || extension === ".mts" || extension === ".cts") {
3370
+ return "typescript";
3371
+ }
3372
+ if (extension === ".tsx") {
3373
+ return "tsx";
3374
+ }
3375
+ if (extension === ".js" || extension === ".mjs" || extension === ".cjs") {
3376
+ return "javascript";
3377
+ }
3378
+ if (extension === ".jsx") {
3379
+ return "jsx";
3380
+ }
3381
+ return undefined;
3382
+ }
3383
+ function firstNonEmptyLine(snippet) {
3384
+ const lines = snippet.replace(/\r\n/g, "\n").split("\n");
3385
+ for (const line of lines) {
3386
+ const trimmed = line.trim();
3387
+ if (trimmed.length > 0) {
3388
+ return trimmed;
3389
+ }
3390
+ }
3391
+ return "";
3392
+ }
3393
+ function lastNonEmptyLine(snippet) {
3394
+ const lines = snippet.replace(/\r\n/g, "\n").split("\n");
3395
+ for (let idx = lines.length - 1; idx >= 0; idx -= 1) {
3396
+ const trimmed = (lines[idx] ?? "").trim();
3397
+ if (trimmed.length > 0) {
3398
+ return trimmed;
3399
+ }
3400
+ }
3401
+ return "";
3402
+ }
3403
+ function curlyBraceDelta(snippet) {
3404
+ let opens = 0;
3405
+ let closes = 0;
3406
+ for (const char of snippet) {
3407
+ if (char === "{") {
3408
+ opens += 1;
3409
+ continue;
3410
+ }
3411
+ if (char === "}") {
3412
+ closes += 1;
3413
+ }
3414
+ }
3415
+ return opens - closes;
3416
+ }
3417
+ function looksLikeDeclarationStart(line) {
3418
+ if (line.length === 0) {
3419
+ return false;
3420
+ }
3421
+ if (line.startsWith("@")) {
3422
+ return true;
3423
+ }
3424
+ return (/^(?:export\s+)?(?:async\s+)?function\s+[A-Za-z_$][\w$]*\s*\(/u.test(line) ||
3425
+ /^(?:export\s+)?(?:default\s+)?class\s+[A-Za-z_$][\w$]*/u.test(line) ||
3426
+ /^(?:export\s+)?(?:const|let|var)\s+[A-Za-z_$][\w$]*\s*=/u.test(line) ||
3427
+ /^(?:public|private|protected|static|readonly|async)\s+[A-Za-z_$][\w$]*\s*\(/u.test(line) ||
3428
+ /^(?:[A-Za-z_$][\w$]*)\s*\([^)]*\)\s*\{/u.test(line));
3429
+ }
3430
+ function looksLikeSnippetTerminalBoundary(line) {
3431
+ if (line.length === 0) {
3432
+ return false;
3433
+ }
3434
+ return (line.endsWith("}") ||
3435
+ line.endsWith("};") ||
3436
+ line.endsWith(");") ||
3437
+ line.endsWith("]") ||
3438
+ line.endsWith("];"));
3439
+ }
3440
+ function detectSnippetSymbolName(snippet) {
3441
+ const lines = snippet.replace(/\r\n/g, "\n").split("\n").slice(0, 40);
3442
+ const patterns = [
3443
+ /^(?:export\s+)?(?:async\s+)?function\s+([A-Za-z_$][\w$]*)\s*\(/u,
3444
+ /^(?:export\s+)?(?:default\s+)?class\s+([A-Za-z_$][\w$]*)\b/u,
3445
+ /^(?:export\s+)?(?:const|let|var)\s+([A-Za-z_$][\w$]*)\s*=\s*(?:async\s*)?\([^)]*\)\s*=>/u,
3446
+ /^(?:export\s+)?(?:const|let|var)\s+([A-Za-z_$][\w$]*)\s*=\s*(?:async\s*)?[A-Za-z_$][\w$]*\s*=>/u,
3447
+ /^(?:public|private|protected|static|readonly|async)\s+([A-Za-z_$][\w$]*)\s*\(/u,
3448
+ /^([A-Za-z_$][\w$]*)\s*\([^)]*\)\s*\{/u
3449
+ ];
3450
+ const disallowed = new Set(["if", "for", "while", "switch", "catch", "return"]);
3451
+ for (const line of lines) {
3452
+ const trimmed = line.trim();
3453
+ if (trimmed.length === 0) {
3454
+ continue;
3455
+ }
3456
+ for (const pattern of patterns) {
3457
+ const match = trimmed.match(pattern);
3458
+ const symbol = match?.[1];
3459
+ if (symbol && !disallowed.has(symbol)) {
3460
+ return symbol;
3461
+ }
3462
+ }
3463
+ }
3464
+ return undefined;
3465
+ }
3466
+ function shouldAnnotateSnippetAsTruncated(result, omittedBefore, omittedAfter) {
3467
+ if (omittedBefore <= 0 && omittedAfter <= 0) {
3468
+ return false;
3469
+ }
3470
+ const firstLine = firstNonEmptyLine(result.snippet);
3471
+ const lastLine = lastNonEmptyLine(result.snippet);
3472
+ if (omittedBefore > 0 && !looksLikeDeclarationStart(firstLine)) {
3473
+ return true;
3474
+ }
3475
+ if (omittedAfter > 0) {
3476
+ if (curlyBraceDelta(result.snippet) > 0) {
3477
+ return true;
3478
+ }
3479
+ if (!looksLikeSnippetTerminalBoundary(lastLine)) {
3480
+ return true;
3481
+ }
3482
+ }
3483
+ return omittedBefore > 0 && omittedAfter > 0;
3484
+ }
3485
+ function estimateContiguousEnvelope(input) {
3486
+ let start = input.anchor.start_line;
3487
+ let end = input.anchor.end_line;
3488
+ let changed = true;
3489
+ while (changed) {
3490
+ changed = false;
3491
+ for (const candidate of input.candidates) {
3492
+ const gap = lineRangeGap({ start_line: start, end_line: end }, candidate);
3493
+ if (gap > input.maxGapLines) {
3494
+ continue;
3495
+ }
3496
+ const nextStart = Math.min(start, candidate.start_line);
3497
+ const nextEnd = Math.max(end, candidate.end_line);
3498
+ if (nextStart !== start || nextEnd !== end) {
3499
+ start = nextStart;
3500
+ end = nextEnd;
3501
+ changed = true;
3502
+ }
3503
+ }
3504
+ }
3505
+ return { start_line: start, end_line: end };
3506
+ }
3507
+ function repairSnippetFromEnvelope(input) {
3508
+ const envelopeSpan = input.envelope.end_line - input.envelope.start_line + 1;
3509
+ if (envelopeSpan > input.config.repair_max_envelope_lines) {
3510
+ return { reason: "envelope_cap_exceeded", clipped: false };
3511
+ }
3512
+ const envelopeCandidates = input.samePathCandidates
3513
+ .filter((candidate) => candidate.end_line >= input.envelope.start_line && candidate.start_line <= input.envelope.end_line)
3514
+ .sort(compareSearchResultsByLineRange);
3515
+ if (envelopeCandidates.length === 0) {
3516
+ return { reason: "no_envelope_candidates", clipped: false };
3517
+ }
3518
+ const lineMap = buildPreferredLineMap(envelopeCandidates);
3519
+ const renderedLines = [];
3520
+ let missingLines = 0;
3521
+ for (let line = input.envelope.start_line; line <= input.envelope.end_line; line += 1) {
3522
+ const text = lineMap.get(line);
3523
+ if (typeof text !== "string") {
3524
+ missingLines += 1;
3525
+ renderedLines.push("");
3526
+ continue;
3527
+ }
3528
+ renderedLines.push(text);
3529
+ }
3530
+ const maxMissingLines = Math.max(2, Math.floor(envelopeSpan * 0.2));
3531
+ if (missingLines > maxMissingLines) {
3532
+ return { reason: "missing_line_density_too_high", clipped: false };
3533
+ }
3534
+ const clippedLines = [];
3535
+ let usedChars = 0;
3536
+ let clipped = false;
3537
+ for (let index = 0; index < renderedLines.length; index += 1) {
3538
+ const line = renderedLines[index] ?? "";
3539
+ const additionalChars = index === 0 ? line.length : line.length + 1;
3540
+ if (clippedLines.length > 0 && usedChars + additionalChars > input.config.repair_max_snippet_chars) {
3541
+ clipped = true;
3542
+ break;
3543
+ }
3544
+ if (clippedLines.length === 0 && line.length > input.config.repair_max_snippet_chars) {
3545
+ const clippedLine = line.slice(0, input.config.repair_max_snippet_chars);
3546
+ if (clippedLine.length === 0) {
3547
+ return { reason: "snippet_char_cap_exceeded", clipped: false };
3548
+ }
3549
+ clippedLines.push(clippedLine);
3550
+ usedChars = clippedLine.length;
3551
+ clipped = true;
3552
+ break;
3553
+ }
3554
+ clippedLines.push(line);
3555
+ usedChars += additionalChars;
3556
+ }
3557
+ if (clippedLines.length === 0) {
3558
+ return { reason: "snippet_char_cap_exceeded", clipped: false };
3559
+ }
3560
+ const repairedSnippet = clippedLines.join("\n").trimEnd();
3561
+ if (repairedSnippet.length === 0) {
3562
+ return { reason: "empty_repaired_snippet", clipped: false };
3563
+ }
3564
+ const repairedEndLine = input.envelope.start_line + clippedLines.length - 1;
3565
+ return {
3566
+ repaired: {
3567
+ ...input.anchor,
3568
+ start_line: input.envelope.start_line,
3569
+ end_line: repairedEndLine,
3570
+ snippet: repairedSnippet
3571
+ },
3572
+ clipped
3573
+ };
3574
+ }
3575
+ function buildSnippetTruncationMarker(input) {
3576
+ const estimatedTotalLines = Math.max(1, input.envelope_end_line - input.envelope_start_line + 1);
3577
+ const omittedBefore = Math.max(0, input.result.start_line - input.envelope_start_line);
3578
+ const omittedAfter = Math.max(0, input.envelope_end_line - input.result.end_line);
3579
+ return `// [truncated:${input.marker_template_version} symbol=${input.symbolName ?? "unknown"} estimated_span=${input.envelope_start_line}-${input.envelope_end_line} estimated_total_lines=${estimatedTotalLines} omitted_before=${omittedBefore} omitted_after=${omittedAfter} through_line=${input.result.end_line}]`;
3580
+ }
3581
+ function annotateSearchResultsWithSnippetIntegrity(input) {
3582
+ if (!input.config.enabled || input.selected.length === 0) {
3583
+ return [...input.selected];
3584
+ }
3585
+ const enabledLanguages = new Set(normalizeSnippetIntegrityLanguageList(input.config.target_languages));
3586
+ if (enabledLanguages.size === 0) {
3587
+ return [...input.selected];
3588
+ }
3589
+ const sourceByPath = new Map();
3590
+ for (const candidate of input.sourceCandidates) {
3591
+ const rows = sourceByPath.get(candidate.path);
3592
+ if (rows) {
3593
+ rows.push(candidate);
3594
+ }
3595
+ else {
3596
+ sourceByPath.set(candidate.path, [candidate]);
3597
+ }
3598
+ }
3599
+ return input.selected.map((result) => {
3600
+ const language = snippetIntegrityLanguageFromPath(result.path);
3601
+ if (!language || !enabledLanguages.has(language)) {
3602
+ return result;
3603
+ }
3604
+ const samePath = sourceByPath.get(result.path) ?? [result];
3605
+ if (samePath.length <= 1) {
3606
+ return result;
3607
+ }
3608
+ const envelope = estimateContiguousEnvelope({
3609
+ anchor: result,
3610
+ candidates: samePath,
3611
+ maxGapLines: input.config.max_contiguous_gap_lines
3612
+ });
3613
+ const originalOmittedBefore = Math.max(0, result.start_line - envelope.start_line);
3614
+ const originalOmittedAfter = Math.max(0, envelope.end_line - result.end_line);
3615
+ const originalLooksTruncated = shouldAnnotateSnippetAsTruncated(result, originalOmittedBefore, originalOmittedAfter);
3616
+ if (!originalLooksTruncated) {
3617
+ return result;
3618
+ }
3619
+ const envelopeCandidates = samePath
3620
+ .filter((candidate) => candidate.end_line >= envelope.start_line && candidate.start_line <= envelope.end_line)
3621
+ .sort(compareSearchResultsByLineRange);
3622
+ let assembled = result;
3623
+ if (input.config.repair_enabled) {
3624
+ input.observability.metrics.increment("retrieval_snippet_repair_attempt_total", 1, {
3625
+ retrieval_profile_id: input.retrievalProfileId,
3626
+ language
3627
+ });
3628
+ const repairOutcome = repairSnippetFromEnvelope({
3629
+ anchor: result,
3630
+ envelope,
3631
+ samePathCandidates: samePath,
3632
+ config: input.config
3633
+ });
3634
+ if (repairOutcome.repaired) {
3635
+ assembled = repairOutcome.repaired;
3636
+ input.observability.metrics.increment("retrieval_snippet_repair_success_total", 1, {
3637
+ retrieval_profile_id: input.retrievalProfileId,
3638
+ language,
3639
+ clipped: repairOutcome.clipped ? "true" : "false"
3640
+ });
3641
+ input.observability.logger.info("snippet integrity repair decision", {
3642
+ retrieval_profile_id: input.retrievalProfileId,
3643
+ path: result.path,
3644
+ language,
3645
+ envelope_start_line: envelope.start_line,
3646
+ envelope_end_line: envelope.end_line,
3647
+ envelope_span_lines: envelope.end_line - envelope.start_line + 1,
3648
+ status: "repaired",
3649
+ clipped: repairOutcome.clipped
3650
+ });
3651
+ }
3652
+ else {
3653
+ input.observability.logger.info("snippet integrity repair decision", {
3654
+ retrieval_profile_id: input.retrievalProfileId,
3655
+ path: result.path,
3656
+ language,
3657
+ envelope_start_line: envelope.start_line,
3658
+ envelope_end_line: envelope.end_line,
3659
+ envelope_span_lines: envelope.end_line - envelope.start_line + 1,
3660
+ status: "repair_skipped",
3661
+ reason: repairOutcome.reason ?? "unknown"
3662
+ });
3663
+ }
3664
+ }
3665
+ const omittedBefore = Math.max(0, assembled.start_line - envelope.start_line);
3666
+ const omittedAfter = Math.max(0, envelope.end_line - assembled.end_line);
3667
+ if (!shouldAnnotateSnippetAsTruncated(assembled, omittedBefore, omittedAfter)) {
3668
+ return assembled;
3669
+ }
3670
+ let symbolName = detectSnippetSymbolName(assembled.snippet);
3671
+ if (!symbolName) {
3672
+ for (const candidate of envelopeCandidates) {
3673
+ symbolName = detectSnippetSymbolName(candidate.snippet);
3674
+ if (symbolName) {
3675
+ break;
3676
+ }
3677
+ }
3678
+ }
3679
+ const marker = buildSnippetTruncationMarker({
3680
+ result: assembled,
3681
+ symbolName,
3682
+ envelope_start_line: envelope.start_line,
3683
+ envelope_end_line: envelope.end_line,
3684
+ marker_template_version: input.config.marker_template_version
3685
+ });
3686
+ input.observability.metrics.increment("retrieval_snippet_repair_fallback_marker_total", 1, {
3687
+ retrieval_profile_id: input.retrievalProfileId,
3688
+ language
3689
+ });
3690
+ input.observability.metrics.increment("retrieval_snippet_truncation_marker_total", 1, {
3691
+ retrieval_profile_id: input.retrievalProfileId,
3692
+ language,
3693
+ symbol_detected: symbolName ? "true" : "false",
3694
+ marker_template_version: input.config.marker_template_version
3695
+ });
3696
+ input.observability.metrics.observe("retrieval_snippet_omitted_after_lines", omittedAfter, {
3697
+ retrieval_profile_id: input.retrievalProfileId,
3698
+ language
3699
+ });
3700
+ const baseSnippet = assembled.snippet.trimEnd();
3701
+ return {
3702
+ ...assembled,
3703
+ snippet: baseSnippet.length > 0 ? `${baseSnippet}\n${marker}` : marker
3704
+ };
3705
+ });
3706
+ }
3707
+ function packSearchResultsWithContext(input) {
3708
+ if (!input.config.enabled || input.selected.length === 0) {
3709
+ return [...input.selected];
3710
+ }
3711
+ const sourceByPath = new Map();
3712
+ for (const candidate of input.sourceCandidates) {
3713
+ const rows = sourceByPath.get(candidate.path);
3714
+ if (rows) {
3715
+ rows.push(candidate);
3716
+ }
3717
+ else {
3718
+ sourceByPath.set(candidate.path, [candidate]);
3719
+ }
3720
+ }
3721
+ return input.selected.map((anchor) => {
3722
+ const samePath = sourceByPath.get(anchor.path) ?? [anchor];
3723
+ if (samePath.length <= 1 || input.config.max_spans_per_result <= 1) {
3724
+ return anchor;
3725
+ }
3726
+ const anchorRange = { start_line: anchor.start_line, end_line: anchor.end_line };
3727
+ const candidates = samePath
3728
+ .filter((candidate) => !(candidate.start_line === anchor.start_line && candidate.end_line === anchor.end_line) &&
3729
+ !isHeavilyOverlappingLineRange(candidate, [anchorRange]) &&
3730
+ lineRangeGap(anchorRange, candidate) <= input.config.max_gap_lines)
3731
+ .sort((a, b) => {
3732
+ const relevanceDiff = compareSearchResults(a, b);
3733
+ if (relevanceDiff !== 0) {
3734
+ return relevanceDiff;
3735
+ }
3736
+ return lineRangeGap(anchorRange, a) - lineRangeGap(anchorRange, b);
3737
+ });
3738
+ const spans = [{ ...anchorRange }];
3739
+ for (const candidate of candidates) {
3740
+ if (spans.length >= input.config.max_spans_per_result) {
3741
+ break;
3742
+ }
3743
+ const nextSpan = { start_line: candidate.start_line, end_line: candidate.end_line };
3744
+ const nextEnvelope = mergeLineSpans([...spans, nextSpan]);
3745
+ if (nextEnvelope.some((span, idx) => idx > 0 && span.start_line - (nextEnvelope[idx - 1]?.end_line ?? span.start_line) - 1 > input.config.max_gap_lines)) {
3746
+ continue;
3747
+ }
3748
+ spans.push(nextSpan);
3749
+ }
3750
+ const mergedSpans = mergeLineSpans(spans);
3751
+ if (mergedSpans.length <= 1) {
3752
+ return anchor;
3753
+ }
3754
+ const lineMap = buildPreferredLineMap([anchor, ...samePath]);
3755
+ const renderedLines = [];
3756
+ let contentLineCount = 0;
3757
+ let elisionCount = 0;
3758
+ for (let index = 0; index < mergedSpans.length; index += 1) {
3759
+ const span = mergedSpans[index];
3760
+ if (!span) {
3761
+ continue;
3762
+ }
3763
+ if (index > 0) {
3764
+ const previous = mergedSpans[index - 1];
3765
+ if (previous && span.start_line - previous.end_line > 0) {
3766
+ renderedLines.push("...");
3767
+ elisionCount += 1;
3768
+ }
3769
+ }
3770
+ for (let line = span.start_line; line <= span.end_line; line += 1) {
3771
+ renderedLines.push(lineMap.get(line) ?? "");
3772
+ contentLineCount += 1;
3773
+ }
3774
+ }
3775
+ if (renderedLines.length === 0) {
3776
+ return anchor;
3777
+ }
3778
+ const elisionDensity = elisionCount / Math.max(1, contentLineCount + elisionCount);
3779
+ if (elisionDensity > 0.25) {
3780
+ return anchor;
3781
+ }
3782
+ const packedSnippet = clipSnippetToMaxChars(renderedLines.join("\n"), input.config.max_snippet_chars);
3783
+ if (packedSnippet.length === 0) {
3784
+ return anchor;
3785
+ }
3786
+ const packedStart = mergedSpans[0]?.start_line ?? anchor.start_line;
3787
+ const packedEnd = mergedSpans[mergedSpans.length - 1]?.end_line ?? anchor.end_line;
3788
+ return {
3789
+ ...anchor,
3790
+ start_line: packedStart,
3791
+ end_line: packedEnd,
3792
+ snippet: packedSnippet,
3793
+ reason: `${anchor.reason} + contextual spans`
3794
+ };
3795
+ });
3796
+ }
3797
+ function mergeOverlappingCandidates(candidates, config) {
3798
+ if (!config.merge_overlapping_chunks_enabled || candidates.length <= 1) {
3799
+ return [...candidates];
3800
+ }
3801
+ const byPath = new Map();
3802
+ for (const candidate of candidates) {
3803
+ const group = byPath.get(candidate.path);
3804
+ if (group) {
3805
+ group.push(candidate);
3806
+ }
3807
+ else {
3808
+ byPath.set(candidate.path, [candidate]);
3809
+ }
3810
+ }
3811
+ const merged = [];
3812
+ for (const group of byPath.values()) {
3813
+ const ordered = [...group].sort(compareSearchResultsByLineRange);
3814
+ let cluster = [];
3815
+ let clusterStart = 0;
3816
+ let clusterEnd = 0;
3817
+ const flush = () => {
3818
+ if (cluster.length === 0) {
3819
+ return;
3820
+ }
3821
+ merged.push(mergeCandidateCluster(cluster));
3822
+ cluster = [];
3823
+ };
3824
+ for (const candidate of ordered) {
3825
+ if (cluster.length === 0) {
3826
+ cluster = [candidate];
3827
+ clusterStart = candidate.start_line;
3828
+ clusterEnd = candidate.end_line;
3829
+ continue;
3830
+ }
3831
+ const nextStart = Math.min(clusterStart, candidate.start_line);
3832
+ const nextEnd = Math.max(clusterEnd, candidate.end_line);
3833
+ const nextSpan = nextEnd - nextStart + 1;
3834
+ const gapLines = Math.max(0, candidate.start_line - clusterEnd - 1);
3835
+ const canMerge = gapLines <= config.merge_gap_lines && nextSpan <= config.merge_max_span_lines;
3836
+ if (!canMerge) {
3837
+ flush();
3838
+ cluster = [candidate];
3839
+ clusterStart = candidate.start_line;
3840
+ clusterEnd = candidate.end_line;
3841
+ continue;
3842
+ }
3843
+ cluster.push(candidate);
3844
+ clusterStart = nextStart;
3845
+ clusterEnd = nextEnd;
3846
+ }
3847
+ flush();
3848
+ }
3849
+ return merged.sort(compareSearchResults);
3850
+ }
3851
+ function applySmartCutoffCandidates(candidates, config) {
3852
+ if (!config.smart_cutoff_enabled || candidates.length === 0) {
3853
+ return [...candidates];
3854
+ }
3855
+ const ordered = [...candidates].sort(compareSearchResults);
3856
+ const minK = Math.max(1, config.smart_cutoff_min_k);
3857
+ const maxK = Math.max(minK, config.smart_cutoff_max_k);
3858
+ const topScore = ordered[0]?.score ?? Number.NEGATIVE_INFINITY;
3859
+ const kept = [];
3860
+ for (let index = 0; index < ordered.length; index += 1) {
3861
+ const candidate = ordered[index];
3862
+ if (!candidate) {
3863
+ continue;
3864
+ }
3865
+ if (kept.length >= maxK) {
3866
+ break;
3867
+ }
3868
+ if (kept.length < minK) {
3869
+ kept.push(candidate);
3870
+ continue;
3871
+ }
3872
+ if (candidate.score < config.smart_cutoff_min_score) {
3873
+ break;
3874
+ }
3875
+ if (candidate.score < topScore * config.smart_cutoff_top_ratio) {
3876
+ break;
3877
+ }
3878
+ const previous = ordered[index - 1];
3879
+ if (previous && previous.score - candidate.score > config.smart_cutoff_delta_abs) {
3880
+ break;
3881
+ }
3882
+ kept.push(candidate);
3883
+ }
3884
+ return kept;
3885
+ }
3886
+ export function __applySmartCutoffCandidatesForTests(input) {
3887
+ return applySmartCutoffCandidates(input.candidates, input.config);
3888
+ }
2981
3889
  function dedupeEnhancerCandidatesByPath(results) {
2982
3890
  const byPath = new Map();
2983
3891
  for (const result of results) {
@@ -3281,11 +4189,11 @@ function deterministicEnhancerFallbackRanking(input) {
3281
4189
  const avoided = input.results.filter((result) => !preferred.includes(result) && !tolerated.includes(result));
3282
4190
  return [...preferred, ...tolerated, ...avoided];
3283
4191
  }
3284
- function trimToContextBudget(results) {
4192
+ function trimToContextBudget(results, budgetTokenizerMode) {
3285
4193
  let total = 0;
3286
4194
  const out = [];
3287
4195
  for (const result of results) {
3288
- total += tokenize(result.snippet).length;
4196
+ total += chunkBudgetTokenize(result.snippet, budgetTokenizerMode).length;
3289
4197
  if (total > MAX_CONTEXT_BUDGET_TOKENS) {
3290
4198
  break;
3291
4199
  }
@@ -3731,6 +4639,8 @@ export class RetrievalCore {
3731
4639
  enhancerConfig;
3732
4640
  enhancerGenerationConfig;
3733
4641
  chunkingConfig;
4642
+ contextPackingConfig;
4643
+ snippetIntegrityConfig;
3734
4644
  enhancerDecisionTraceEnabled;
3735
4645
  cacheHits = 0;
3736
4646
  cacheMisses = 0;
@@ -3764,6 +4674,8 @@ export class RetrievalCore {
3764
4674
  this.enhancerConfig = mergeRetrievalEnhancerConfig(DEFAULT_RETRIEVAL_ENHANCER_CONFIG, options?.enhancerConfig);
3765
4675
  this.enhancerGenerationConfig = mergeRetrievalEnhancerGenerationConfig(DEFAULT_RETRIEVAL_ENHANCER_GENERATION_CONFIG, options?.enhancerGenerationConfig);
3766
4676
  this.chunkingConfig = mergeRetrievalChunkingConfig(DEFAULT_RETRIEVAL_CHUNKING_CONFIG, options?.chunkingConfig);
4677
+ this.contextPackingConfig = mergeRetrievalContextPackingConfig(DEFAULT_RETRIEVAL_CONTEXT_PACKING_CONFIG, options?.contextPackingConfig);
4678
+ this.snippetIntegrityConfig = mergeRetrievalSnippetIntegrityConfig(DEFAULT_RETRIEVAL_SNIPPET_INTEGRITY_CONFIG, options?.snippetIntegrityConfig);
3767
4679
  this.enhancerDecisionTraceEnabled = Boolean(options?.enhancerDecisionTraceEnabled);
3768
4680
  }
3769
4681
  async indexArtifact(artifact) {
@@ -3937,6 +4849,12 @@ export class RetrievalCore {
3937
4849
  language: chunkLanguage,
3938
4850
  reason: chunkBuild.fallback_reason ?? "none"
3939
4851
  });
4852
+ if (chunkBuild.recursive_semantic_chunking_used) {
4853
+ this.observability.metrics.increment("index_recursive_semantic_chunking_used_total", 1, {
4854
+ tenant_id: artifact.tenant_id,
4855
+ language: chunkLanguage
4856
+ });
4857
+ }
3940
4858
  if (chunkBuild.fallback_reason) {
3941
4859
  this.observability.metrics.increment("index_chunking_fallback_total", 1, {
3942
4860
  tenant_id: artifact.tenant_id,
@@ -3964,14 +4882,15 @@ export class RetrievalCore {
3964
4882
  reason: chunkBuild.fallback_reason
3965
4883
  });
3966
4884
  }
3967
- const estimatedEmbeddingTokens = chunks.reduce((sum, chunk) => sum + tokenize(chunk.snippet).length, 0);
4885
+ const embeddingTexts = buildChunkEmbeddingTexts(chunks, this.chunkingConfig, this.embeddingDescriptor.provider);
4886
+ const estimatedEmbeddingTokens = embeddingTexts.reduce((sum, text) => sum + tokenize(text).length, 0);
3968
4887
  this.observability.metrics.increment("index_embedding_tokens_total", estimatedEmbeddingTokens, {
3969
4888
  tenant_id: artifact.tenant_id
3970
4889
  });
3971
4890
  const embeddings = chunks.length === 0
3972
4891
  ? []
3973
4892
  : await this.embeddingProvider.embed({
3974
- texts: chunks.map((chunk) => chunk.snippet),
4893
+ texts: embeddingTexts,
3975
4894
  purpose: "index"
3976
4895
  });
3977
4896
  if (embeddings.length !== chunks.length) {
@@ -4246,6 +5165,12 @@ export class RetrievalCore {
4246
5165
  language: chunkLanguage,
4247
5166
  reason: chunkBuild.fallback_reason ?? "none"
4248
5167
  });
5168
+ if (chunkBuild.recursive_semantic_chunking_used) {
5169
+ this.observability.metrics.increment("index_recursive_semantic_chunking_used_total", 1, {
5170
+ tenant_id: artifact.tenant_id,
5171
+ language: chunkLanguage
5172
+ });
5173
+ }
4249
5174
  if (chunkBuild.fallback_reason) {
4250
5175
  this.observability.metrics.increment("index_chunking_fallback_total", 1, {
4251
5176
  tenant_id: artifact.tenant_id,
@@ -4273,14 +5198,15 @@ export class RetrievalCore {
4273
5198
  reason: chunkBuild.fallback_reason
4274
5199
  });
4275
5200
  }
4276
- const estimatedEmbeddingTokens = chunks.reduce((sum, chunk) => sum + tokenize(chunk.snippet).length, 0);
5201
+ const embeddingTexts = buildChunkEmbeddingTexts(chunks, this.chunkingConfig, this.embeddingDescriptor.provider);
5202
+ const estimatedEmbeddingTokens = embeddingTexts.reduce((sum, text) => sum + tokenize(text).length, 0);
4277
5203
  this.observability.metrics.increment("index_embedding_tokens_total", estimatedEmbeddingTokens, {
4278
5204
  tenant_id: artifact.tenant_id
4279
5205
  });
4280
5206
  const embeddings = chunks.length === 0
4281
5207
  ? []
4282
5208
  : await this.embeddingProvider.embed({
4283
- texts: chunks.map((chunk) => chunk.snippet),
5209
+ texts: embeddingTexts,
4284
5210
  purpose: "index"
4285
5211
  });
4286
5212
  if (embeddings.length !== chunks.length) {
@@ -4556,7 +5482,7 @@ export class RetrievalCore {
4556
5482
  query,
4557
5483
  top_k: topK,
4558
5484
  filters: input.request.filters,
4559
- retrieval_variant: this.rerankerCacheVariant
5485
+ retrieval_variant: `${this.rerankerCacheVariant}|context_pack:${this.contextPackingConfig.enabled ? "on" : "off"}|context_pack_spans:${this.contextPackingConfig.max_spans_per_result}|context_pack_gap:${this.contextPackingConfig.max_gap_lines}|snippet_integrity:${this.snippetIntegrityConfig.enabled ? "on" : "off"}|snippet_integrity_gap:${this.snippetIntegrityConfig.max_contiguous_gap_lines}|snippet_integrity_langs:${this.snippetIntegrityConfig.target_languages.join(",")}|snippet_repair:${this.snippetIntegrityConfig.repair_enabled ? "on" : "off"}|snippet_repair_env:${this.snippetIntegrityConfig.repair_max_envelope_lines}|snippet_repair_chars:${this.snippetIntegrityConfig.repair_max_snippet_chars}|chunk_recursive:${this.chunkingConfig.recursive_semantic_chunking_enabled ? "on" : "off"}|chunk_semantic_gap:${this.chunkingConfig.semantic_merge_gap_lines}|chunk_semantic_span:${this.chunkingConfig.semantic_merge_max_span_lines}|chunk_comment_absorb:${this.chunkingConfig.comment_forward_absorb_enabled ? "on" : "off"}|chunk_embed_prefix:${this.chunkingConfig.embedding_context_prefix_enabled ? "on" : "off"}|smart_cutoff:${this.scoringConfig.rerank.smart_cutoff_enabled ? "on" : "off"}|smart_cutoff_min_k:${this.scoringConfig.rerank.smart_cutoff_min_k}|smart_cutoff_max_k:${this.scoringConfig.rerank.smart_cutoff_max_k}|smart_cutoff_min_score:${this.scoringConfig.rerank.smart_cutoff_min_score}|smart_cutoff_top_ratio:${this.scoringConfig.rerank.smart_cutoff_top_ratio}|smart_cutoff_delta_abs:${this.scoringConfig.rerank.smart_cutoff_delta_abs}`
4560
5486
  });
4561
5487
  const cached = await this.cache.get(cacheKey);
4562
5488
  if (cached) {
@@ -4700,16 +5626,37 @@ export class RetrievalCore {
4700
5626
  query,
4701
5627
  candidates
4702
5628
  }));
5629
+ const consolidatedCandidates = await this.observability.tracing.withSpan("retrieval.overlap_merge", { trace_id: input.trace_id }, async () => mergeOverlappingCandidates(rerankedCandidates, this.scoringConfig.rerank));
5630
+ this.observability.metrics.observe("retrieval_candidates_post_overlap_merge_count", consolidatedCandidates.length, {
5631
+ retrieval_profile_id: this.scoringProfileId
5632
+ });
5633
+ const mergedCandidateCount = Math.max(0, rerankedCandidates.length - consolidatedCandidates.length);
5634
+ if (mergedCandidateCount > 0) {
5635
+ this.observability.metrics.increment("retrieval_overlap_candidates_merged_total", mergedCandidateCount, {
5636
+ retrieval_profile_id: this.scoringProfileId
5637
+ });
5638
+ }
5639
+ const cutoffCandidates = await this.observability.tracing.withSpan("retrieval.smart_cutoff", { trace_id: input.trace_id }, async () => applySmartCutoffCandidates(consolidatedCandidates, this.scoringConfig.rerank));
5640
+ if (this.scoringConfig.rerank.smart_cutoff_enabled) {
5641
+ this.observability.metrics.increment("retrieval_smart_cutoff_applied_total", 1, {
5642
+ retrieval_profile_id: this.scoringProfileId
5643
+ });
5644
+ const droppedCount = Math.max(0, consolidatedCandidates.length - cutoffCandidates.length);
5645
+ this.observability.metrics.increment("retrieval_smart_cutoff_drop_count", droppedCount, {
5646
+ retrieval_profile_id: this.scoringProfileId
5647
+ });
5648
+ }
4703
5649
  const deduped = await this.observability.tracing.withSpan("retrieval.rerank", { trace_id: input.trace_id }, async () => {
4704
5650
  const output = [];
4705
5651
  const seen = new Set();
4706
5652
  const pathCounts = new Map();
5653
+ const selectedRangesByPath = new Map();
4707
5654
  const directoryCounts = new Map();
4708
5655
  const extensionCounts = new Map();
4709
5656
  const maxChunksPerPath = hasFileLookupIntent(queryTokens)
4710
5657
  ? this.scoringConfig.rerank.max_chunks_per_path_file_lookup
4711
5658
  : this.scoringConfig.rerank.max_chunks_per_path_default;
4712
- const available = [...rerankedCandidates];
5659
+ const available = [...cutoffCandidates];
4713
5660
  while (output.length < topK && available.length > 0) {
4714
5661
  let bestIndex = -1;
4715
5662
  let bestAdjustedScore = Number.NEGATIVE_INFINITY;
@@ -4727,6 +5674,12 @@ export class RetrievalCore {
4727
5674
  if (pathCount >= maxChunksPerPath) {
4728
5675
  continue;
4729
5676
  }
5677
+ if (this.scoringConfig.rerank.merge_overlapping_chunks_enabled && pathCount > 0) {
5678
+ const selectedRanges = selectedRangesByPath.get(candidate.path) ?? [];
5679
+ if (isHeavilyOverlappingLineRange(candidate, selectedRanges)) {
5680
+ continue;
5681
+ }
5682
+ }
4730
5683
  const directoryKey = parentDirectory(candidate.path).toLowerCase();
4731
5684
  const extensionKey = fileExtension(candidate.path);
4732
5685
  const adjustedScore = candidate.score -
@@ -4759,6 +5712,13 @@ export class RetrievalCore {
4759
5712
  const selectedKey = `${selected.path}:${selected.start_line}:${selected.end_line}`;
4760
5713
  seen.add(selectedKey);
4761
5714
  pathCounts.set(selected.path, (pathCounts.get(selected.path) ?? 0) + 1);
5715
+ const selectedRanges = selectedRangesByPath.get(selected.path);
5716
+ if (selectedRanges) {
5717
+ selectedRanges.push({ start_line: selected.start_line, end_line: selected.end_line });
5718
+ }
5719
+ else {
5720
+ selectedRangesByPath.set(selected.path, [{ start_line: selected.start_line, end_line: selected.end_line }]);
5721
+ }
4762
5722
  const selectedDirectory = parentDirectory(selected.path).toLowerCase();
4763
5723
  const selectedExtension = fileExtension(selected.path);
4764
5724
  directoryCounts.set(selectedDirectory, (directoryCounts.get(selectedDirectory) ?? 0) + 1);
@@ -4768,8 +5728,8 @@ export class RetrievalCore {
4768
5728
  return output;
4769
5729
  });
4770
5730
  const candidateRankByKey = new Map();
4771
- for (let index = 0; index < rerankedCandidates.length; index += 1) {
4772
- const candidate = rerankedCandidates[index];
5731
+ for (let index = 0; index < cutoffCandidates.length; index += 1) {
5732
+ const candidate = cutoffCandidates[index];
4773
5733
  if (!candidate) {
4774
5734
  continue;
4775
5735
  }
@@ -4800,16 +5760,28 @@ export class RetrievalCore {
4800
5760
  this.observability.metrics.observe("retrieval_literal_matches_topk", literalMatchesInTopK, {
4801
5761
  retrieval_profile_id: this.scoringProfileId
4802
5762
  });
5763
+ const packedResults = packSearchResultsWithContext({
5764
+ selected: deduped,
5765
+ sourceCandidates: cutoffCandidates,
5766
+ config: this.contextPackingConfig
5767
+ });
5768
+ const assembledResults = annotateSearchResultsWithSnippetIntegrity({
5769
+ selected: packedResults,
5770
+ sourceCandidates: cutoffCandidates,
5771
+ config: this.snippetIntegrityConfig,
5772
+ observability: this.observability,
5773
+ retrievalProfileId: this.scoringProfileId
5774
+ });
4803
5775
  const output = {
4804
5776
  trace_id: input.trace_id,
4805
- results: deduped,
5777
+ results: assembledResults,
4806
5778
  search_metadata: {
4807
5779
  latency_ms: Date.now() - searchStartedAt,
4808
5780
  retrieval_mode: "hybrid",
4809
5781
  index_version: index.index_version
4810
5782
  }
4811
5783
  };
4812
- this.observability.metrics.observe("retrieval_topk_hit_proxy", deduped.length > 0 ? 1 : 0, {
5784
+ this.observability.metrics.observe("retrieval_topk_hit_proxy", assembledResults.length > 0 ? 1 : 0, {
4813
5785
  retrieval_profile_id: this.scoringProfileId
4814
5786
  });
4815
5787
  this.observability.logger.info("search_context completed", {
@@ -4833,6 +5805,7 @@ export class RetrievalCore {
4833
5805
  }
4834
5806
  buildEnhancerContextSnippets(results) {
4835
5807
  const maxSnippets = this.enhancerGenerationConfig.max_context_snippets;
5808
+ const snippetCharLimit = this.contextPackingConfig.enabled ? this.contextPackingConfig.enhancer_snippet_char_limit : 1_600;
4836
5809
  const snippets = [];
4837
5810
  for (const result of results.slice(0, maxSnippets)) {
4838
5811
  snippets.push({
@@ -4840,7 +5813,7 @@ export class RetrievalCore {
4840
5813
  start_line: result.start_line,
4841
5814
  end_line: result.end_line,
4842
5815
  reason: result.reason,
4843
- snippet: result.snippet.slice(0, 1_600),
5816
+ snippet: result.snippet.slice(0, snippetCharLimit),
4844
5817
  score: result.score
4845
5818
  });
4846
5819
  }
@@ -4981,7 +5954,7 @@ export class RetrievalCore {
4981
5954
  top_k: MAX_TOP_K
4982
5955
  }
4983
5956
  });
4984
- const budgetedResults = trimToContextBudget(retrieval.results);
5957
+ const budgetedResults = trimToContextBudget(retrieval.results, this.contextPackingConfig.enabled ? "lightweight" : "ranking");
4985
5958
  const dedupedByPath = dedupeEnhancerCandidatesByPath(budgetedResults);
4986
5959
  const collapsedByDirectory = collapseEnhancerCandidatesByDirectory(dedupedByPath, intentPolicy.max_candidates_per_directory_pre_rerank);
4987
5960
  const filteredCandidates = applyEnhancerIntentPathFiltering(collapsedByDirectory, {