@rce-mcp/retrieval-core 0.1.2 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENTS.md +7 -0
- package/dist/.tsbuildinfo +1 -1
- package/dist/chunking.d.ts +13 -0
- package/dist/chunking.js +488 -77
- package/dist/index.d.ts +61 -0
- package/dist/index.js +993 -20
- package/dist/remote-sync.js +2 -1
- package/package.json +2 -2
- package/scripts/poc-parser-availability-benchmark.ts +2 -0
- package/src/chunking.ts +573 -80
- package/src/index.ts +1247 -20
- package/src/remote-sync.ts +3 -1
- package/test/benchmark.thresholds.test.ts +8 -0
- package/test/chunking.config.test.ts +47 -1
- package/test/chunking.language-aware.test.ts +227 -0
- package/test/embedding-context-prefix.test.ts +101 -0
- package/test/enhance-confidence.test.ts +4 -4
- package/test/mcp-search-quality.regression.test.ts +691 -4
- package/test/remote-sync.integration.test.ts +5 -1
- package/test/smart-cutoff.config.test.ts +86 -0
- package/test/snippet-integrity.config.test.ts +59 -0
package/src/index.ts
CHANGED
|
@@ -105,6 +105,15 @@ export interface RetrievalRerankConfig {
|
|
|
105
105
|
max_chunks_per_path_file_lookup: number;
|
|
106
106
|
same_directory_penalty: number;
|
|
107
107
|
same_extension_penalty: number;
|
|
108
|
+
merge_overlapping_chunks_enabled: boolean;
|
|
109
|
+
merge_gap_lines: number;
|
|
110
|
+
merge_max_span_lines: number;
|
|
111
|
+
smart_cutoff_enabled: boolean;
|
|
112
|
+
smart_cutoff_min_k: number;
|
|
113
|
+
smart_cutoff_max_k: number;
|
|
114
|
+
smart_cutoff_min_score: number;
|
|
115
|
+
smart_cutoff_top_ratio: number;
|
|
116
|
+
smart_cutoff_delta_abs: number;
|
|
108
117
|
}
|
|
109
118
|
|
|
110
119
|
export interface RetrievalScoringConfig {
|
|
@@ -133,8 +142,33 @@ export interface RetrievalChunkingConfig {
|
|
|
133
142
|
fallback_strategy: "sliding";
|
|
134
143
|
target_chunk_tokens: number;
|
|
135
144
|
chunk_overlap_tokens: number;
|
|
145
|
+
budget_tokenizer: "ranking" | "lightweight";
|
|
146
|
+
boundary_strictness: "legacy" | "semantic_js_ts";
|
|
136
147
|
parse_timeout_ms: number;
|
|
137
148
|
enabled_languages: string[];
|
|
149
|
+
recursive_semantic_chunking_enabled: boolean;
|
|
150
|
+
semantic_merge_gap_lines: number;
|
|
151
|
+
semantic_merge_max_span_lines: number;
|
|
152
|
+
comment_forward_absorb_enabled: boolean;
|
|
153
|
+
embedding_context_prefix_enabled: boolean;
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
export interface RetrievalContextPackingConfig {
|
|
157
|
+
enabled: boolean;
|
|
158
|
+
max_spans_per_result: number;
|
|
159
|
+
max_gap_lines: number;
|
|
160
|
+
max_snippet_chars: number;
|
|
161
|
+
enhancer_snippet_char_limit: number;
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
export interface RetrievalSnippetIntegrityConfig {
|
|
165
|
+
enabled: boolean;
|
|
166
|
+
target_languages: string[];
|
|
167
|
+
max_contiguous_gap_lines: number;
|
|
168
|
+
marker_template_version: "v1";
|
|
169
|
+
repair_enabled: boolean;
|
|
170
|
+
repair_max_envelope_lines: number;
|
|
171
|
+
repair_max_snippet_chars: number;
|
|
138
172
|
}
|
|
139
173
|
|
|
140
174
|
export type RetrievalScoringConfigInput = Partial<{
|
|
@@ -150,8 +184,25 @@ export type RetrievalChunkingConfigInput = Partial<{
|
|
|
150
184
|
fallback_strategy: "sliding";
|
|
151
185
|
target_chunk_tokens: number;
|
|
152
186
|
chunk_overlap_tokens: number;
|
|
187
|
+
budget_tokenizer: "ranking" | "lightweight";
|
|
188
|
+
boundary_strictness: "legacy" | "semantic_js_ts";
|
|
153
189
|
parse_timeout_ms: number;
|
|
154
190
|
enabled_languages: string[];
|
|
191
|
+
recursive_semantic_chunking_enabled: boolean;
|
|
192
|
+
semantic_merge_gap_lines: number;
|
|
193
|
+
semantic_merge_max_span_lines: number;
|
|
194
|
+
comment_forward_absorb_enabled: boolean;
|
|
195
|
+
embedding_context_prefix_enabled: boolean;
|
|
196
|
+
}>;
|
|
197
|
+
export type RetrievalContextPackingConfigInput = Partial<RetrievalContextPackingConfig>;
|
|
198
|
+
export type RetrievalSnippetIntegrityConfigInput = Partial<{
|
|
199
|
+
enabled: boolean;
|
|
200
|
+
target_languages: string[];
|
|
201
|
+
max_contiguous_gap_lines: number;
|
|
202
|
+
marker_template_version: "v1";
|
|
203
|
+
repair_enabled: boolean;
|
|
204
|
+
repair_max_envelope_lines: number;
|
|
205
|
+
repair_max_snippet_chars: number;
|
|
155
206
|
}>;
|
|
156
207
|
|
|
157
208
|
export const BASELINE_RETRIEVAL_SCORING_CONFIG: RetrievalScoringConfig = {
|
|
@@ -196,7 +247,16 @@ export const BASELINE_RETRIEVAL_SCORING_CONFIG: RetrievalScoringConfig = {
|
|
|
196
247
|
max_chunks_per_path_default: 2,
|
|
197
248
|
max_chunks_per_path_file_lookup: 1,
|
|
198
249
|
same_directory_penalty: 0,
|
|
199
|
-
same_extension_penalty: 0
|
|
250
|
+
same_extension_penalty: 0,
|
|
251
|
+
merge_overlapping_chunks_enabled: true,
|
|
252
|
+
merge_gap_lines: 6,
|
|
253
|
+
merge_max_span_lines: 220,
|
|
254
|
+
smart_cutoff_enabled: false,
|
|
255
|
+
smart_cutoff_min_k: 2,
|
|
256
|
+
smart_cutoff_max_k: 8,
|
|
257
|
+
smart_cutoff_min_score: 0.25,
|
|
258
|
+
smart_cutoff_top_ratio: 0.5,
|
|
259
|
+
smart_cutoff_delta_abs: 0.25
|
|
200
260
|
}
|
|
201
261
|
};
|
|
202
262
|
|
|
@@ -248,7 +308,16 @@ export const CONSERVATIVE_RETRIEVAL_SCORING_CONFIG: RetrievalScoringConfig = {
|
|
|
248
308
|
max_chunks_per_path_default: 2,
|
|
249
309
|
max_chunks_per_path_file_lookup: 1,
|
|
250
310
|
same_directory_penalty: 0,
|
|
251
|
-
same_extension_penalty: 0
|
|
311
|
+
same_extension_penalty: 0,
|
|
312
|
+
merge_overlapping_chunks_enabled: true,
|
|
313
|
+
merge_gap_lines: 6,
|
|
314
|
+
merge_max_span_lines: 220,
|
|
315
|
+
smart_cutoff_enabled: false,
|
|
316
|
+
smart_cutoff_min_k: 2,
|
|
317
|
+
smart_cutoff_max_k: 8,
|
|
318
|
+
smart_cutoff_min_score: 0.25,
|
|
319
|
+
smart_cutoff_top_ratio: 0.5,
|
|
320
|
+
smart_cutoff_delta_abs: 0.25
|
|
252
321
|
}
|
|
253
322
|
};
|
|
254
323
|
|
|
@@ -270,8 +339,33 @@ export const DEFAULT_RETRIEVAL_CHUNKING_CONFIG: RetrievalChunkingConfig = {
|
|
|
270
339
|
fallback_strategy: "sliding",
|
|
271
340
|
target_chunk_tokens: DEFAULT_TARGET_CHUNK_TOKENS,
|
|
272
341
|
chunk_overlap_tokens: DEFAULT_CHUNK_OVERLAP_TOKENS,
|
|
342
|
+
budget_tokenizer: "ranking",
|
|
343
|
+
boundary_strictness: "legacy",
|
|
273
344
|
parse_timeout_ms: 80,
|
|
274
|
-
enabled_languages: ["typescript", "javascript", "python", "go"]
|
|
345
|
+
enabled_languages: ["typescript", "javascript", "python", "go"],
|
|
346
|
+
recursive_semantic_chunking_enabled: false,
|
|
347
|
+
semantic_merge_gap_lines: 6,
|
|
348
|
+
semantic_merge_max_span_lines: 220,
|
|
349
|
+
comment_forward_absorb_enabled: true,
|
|
350
|
+
embedding_context_prefix_enabled: true
|
|
351
|
+
};
|
|
352
|
+
|
|
353
|
+
export const DEFAULT_RETRIEVAL_CONTEXT_PACKING_CONFIG: RetrievalContextPackingConfig = {
|
|
354
|
+
enabled: false,
|
|
355
|
+
max_spans_per_result: 3,
|
|
356
|
+
max_gap_lines: 120,
|
|
357
|
+
max_snippet_chars: 3_200,
|
|
358
|
+
enhancer_snippet_char_limit: 2_200
|
|
359
|
+
};
|
|
360
|
+
|
|
361
|
+
export const DEFAULT_RETRIEVAL_SNIPPET_INTEGRITY_CONFIG: RetrievalSnippetIntegrityConfig = {
|
|
362
|
+
enabled: false,
|
|
363
|
+
target_languages: ["typescript", "tsx", "javascript", "jsx"],
|
|
364
|
+
max_contiguous_gap_lines: 6,
|
|
365
|
+
marker_template_version: "v1",
|
|
366
|
+
repair_enabled: false,
|
|
367
|
+
repair_max_envelope_lines: 260,
|
|
368
|
+
repair_max_snippet_chars: 3_600
|
|
275
369
|
};
|
|
276
370
|
|
|
277
371
|
const BUILTIN_RETRIEVAL_SCORING_PROFILES = {
|
|
@@ -337,6 +431,36 @@ function validateScoringConfig(config: RetrievalScoringConfig): void {
|
|
|
337
431
|
if (rerank.same_extension_penalty < 0) {
|
|
338
432
|
throw new Error("invalid retrieval scoring config: rerank.same_extension_penalty must be >= 0");
|
|
339
433
|
}
|
|
434
|
+
if (typeof rerank.merge_overlapping_chunks_enabled !== "boolean") {
|
|
435
|
+
throw new Error("invalid retrieval scoring config: rerank.merge_overlapping_chunks_enabled must be boolean");
|
|
436
|
+
}
|
|
437
|
+
if (!Number.isInteger(rerank.merge_gap_lines) || rerank.merge_gap_lines < 0) {
|
|
438
|
+
throw new Error("invalid retrieval scoring config: rerank.merge_gap_lines must be an integer >= 0");
|
|
439
|
+
}
|
|
440
|
+
if (!Number.isInteger(rerank.merge_max_span_lines) || rerank.merge_max_span_lines <= 0) {
|
|
441
|
+
throw new Error("invalid retrieval scoring config: rerank.merge_max_span_lines must be a positive integer");
|
|
442
|
+
}
|
|
443
|
+
if (typeof rerank.smart_cutoff_enabled !== "boolean") {
|
|
444
|
+
throw new Error("invalid retrieval scoring config: rerank.smart_cutoff_enabled must be boolean");
|
|
445
|
+
}
|
|
446
|
+
if (!Number.isInteger(rerank.smart_cutoff_min_k) || rerank.smart_cutoff_min_k <= 0) {
|
|
447
|
+
throw new Error("invalid retrieval scoring config: rerank.smart_cutoff_min_k must be a positive integer");
|
|
448
|
+
}
|
|
449
|
+
if (!Number.isInteger(rerank.smart_cutoff_max_k) || rerank.smart_cutoff_max_k <= 0) {
|
|
450
|
+
throw new Error("invalid retrieval scoring config: rerank.smart_cutoff_max_k must be a positive integer");
|
|
451
|
+
}
|
|
452
|
+
if (rerank.smart_cutoff_max_k < rerank.smart_cutoff_min_k) {
|
|
453
|
+
throw new Error("invalid retrieval scoring config: rerank.smart_cutoff_max_k must be >= smart_cutoff_min_k");
|
|
454
|
+
}
|
|
455
|
+
assertFiniteNumber(rerank.smart_cutoff_min_score, "rerank.smart_cutoff_min_score");
|
|
456
|
+
assertFiniteNumber(rerank.smart_cutoff_top_ratio, "rerank.smart_cutoff_top_ratio");
|
|
457
|
+
assertFiniteNumber(rerank.smart_cutoff_delta_abs, "rerank.smart_cutoff_delta_abs");
|
|
458
|
+
if (rerank.smart_cutoff_top_ratio <= 0 || rerank.smart_cutoff_top_ratio > 1) {
|
|
459
|
+
throw new Error("invalid retrieval scoring config: rerank.smart_cutoff_top_ratio must be in (0, 1]");
|
|
460
|
+
}
|
|
461
|
+
if (rerank.smart_cutoff_delta_abs < 0) {
|
|
462
|
+
throw new Error("invalid retrieval scoring config: rerank.smart_cutoff_delta_abs must be >= 0");
|
|
463
|
+
}
|
|
340
464
|
}
|
|
341
465
|
|
|
342
466
|
export function resolveRetrievalScoringProfile(profile_id: string | undefined): {
|
|
@@ -466,6 +590,12 @@ function validateChunkingConfig(config: RetrievalChunkingConfig): void {
|
|
|
466
590
|
if (!Number.isInteger(config.parse_timeout_ms) || config.parse_timeout_ms <= 0) {
|
|
467
591
|
throw new Error("invalid retrieval chunking config: parse_timeout_ms must be a positive integer");
|
|
468
592
|
}
|
|
593
|
+
if (config.budget_tokenizer !== "ranking" && config.budget_tokenizer !== "lightweight") {
|
|
594
|
+
throw new Error("invalid retrieval chunking config: budget_tokenizer must be ranking|lightweight");
|
|
595
|
+
}
|
|
596
|
+
if (config.boundary_strictness !== "legacy" && config.boundary_strictness !== "semantic_js_ts") {
|
|
597
|
+
throw new Error("invalid retrieval chunking config: boundary_strictness must be legacy|semantic_js_ts");
|
|
598
|
+
}
|
|
469
599
|
if (!Array.isArray(config.enabled_languages) || config.enabled_languages.length === 0) {
|
|
470
600
|
throw new Error("invalid retrieval chunking config: enabled_languages must include at least one language");
|
|
471
601
|
}
|
|
@@ -474,6 +604,21 @@ function validateChunkingConfig(config: RetrievalChunkingConfig): void {
|
|
|
474
604
|
throw new Error("invalid retrieval chunking config: enabled_languages must contain non-empty strings");
|
|
475
605
|
}
|
|
476
606
|
}
|
|
607
|
+
if (typeof config.recursive_semantic_chunking_enabled !== "boolean") {
|
|
608
|
+
throw new Error("invalid retrieval chunking config: recursive_semantic_chunking_enabled must be boolean");
|
|
609
|
+
}
|
|
610
|
+
if (!Number.isInteger(config.semantic_merge_gap_lines) || config.semantic_merge_gap_lines < 0) {
|
|
611
|
+
throw new Error("invalid retrieval chunking config: semantic_merge_gap_lines must be a non-negative integer");
|
|
612
|
+
}
|
|
613
|
+
if (!Number.isInteger(config.semantic_merge_max_span_lines) || config.semantic_merge_max_span_lines <= 0) {
|
|
614
|
+
throw new Error("invalid retrieval chunking config: semantic_merge_max_span_lines must be a positive integer");
|
|
615
|
+
}
|
|
616
|
+
if (typeof config.comment_forward_absorb_enabled !== "boolean") {
|
|
617
|
+
throw new Error("invalid retrieval chunking config: comment_forward_absorb_enabled must be boolean");
|
|
618
|
+
}
|
|
619
|
+
if (typeof config.embedding_context_prefix_enabled !== "boolean") {
|
|
620
|
+
throw new Error("invalid retrieval chunking config: embedding_context_prefix_enabled must be boolean");
|
|
621
|
+
}
|
|
477
622
|
}
|
|
478
623
|
|
|
479
624
|
export function mergeRetrievalChunkingConfig(
|
|
@@ -489,6 +634,110 @@ export function mergeRetrievalChunkingConfig(
|
|
|
489
634
|
return next;
|
|
490
635
|
}
|
|
491
636
|
|
|
637
|
+
function validateContextPackingConfig(config: RetrievalContextPackingConfig): void {
|
|
638
|
+
if (typeof config.enabled !== "boolean") {
|
|
639
|
+
throw new Error("invalid retrieval context packing config: enabled must be boolean");
|
|
640
|
+
}
|
|
641
|
+
if (!Number.isInteger(config.max_spans_per_result) || config.max_spans_per_result <= 0) {
|
|
642
|
+
throw new Error("invalid retrieval context packing config: max_spans_per_result must be a positive integer");
|
|
643
|
+
}
|
|
644
|
+
if (!Number.isInteger(config.max_gap_lines) || config.max_gap_lines < 0) {
|
|
645
|
+
throw new Error("invalid retrieval context packing config: max_gap_lines must be a non-negative integer");
|
|
646
|
+
}
|
|
647
|
+
if (!Number.isInteger(config.max_snippet_chars) || config.max_snippet_chars <= 0) {
|
|
648
|
+
throw new Error("invalid retrieval context packing config: max_snippet_chars must be a positive integer");
|
|
649
|
+
}
|
|
650
|
+
if (!Number.isInteger(config.enhancer_snippet_char_limit) || config.enhancer_snippet_char_limit <= 0) {
|
|
651
|
+
throw new Error("invalid retrieval context packing config: enhancer_snippet_char_limit must be a positive integer");
|
|
652
|
+
}
|
|
653
|
+
}
|
|
654
|
+
|
|
655
|
+
export function mergeRetrievalContextPackingConfig(
|
|
656
|
+
base: RetrievalContextPackingConfig,
|
|
657
|
+
overrides?: RetrievalContextPackingConfigInput
|
|
658
|
+
): RetrievalContextPackingConfig {
|
|
659
|
+
const next: RetrievalContextPackingConfig = {
|
|
660
|
+
...base,
|
|
661
|
+
...(overrides ?? {})
|
|
662
|
+
};
|
|
663
|
+
validateContextPackingConfig(next);
|
|
664
|
+
return next;
|
|
665
|
+
}
|
|
666
|
+
|
|
667
|
+
function normalizeSnippetIntegrityLanguage(value: string): string | undefined {
|
|
668
|
+
const normalized = value.trim().toLowerCase();
|
|
669
|
+
if (normalized === "typescript" || normalized === "ts" || normalized === "mts" || normalized === "cts") {
|
|
670
|
+
return "typescript";
|
|
671
|
+
}
|
|
672
|
+
if (normalized === "tsx") {
|
|
673
|
+
return "tsx";
|
|
674
|
+
}
|
|
675
|
+
if (normalized === "javascript" || normalized === "js" || normalized === "mjs" || normalized === "cjs") {
|
|
676
|
+
return "javascript";
|
|
677
|
+
}
|
|
678
|
+
if (normalized === "jsx") {
|
|
679
|
+
return "jsx";
|
|
680
|
+
}
|
|
681
|
+
return undefined;
|
|
682
|
+
}
|
|
683
|
+
|
|
684
|
+
function normalizeSnippetIntegrityLanguageList(value: string[]): string[] {
|
|
685
|
+
const deduped = new Set<string>();
|
|
686
|
+
for (const language of value) {
|
|
687
|
+
const raw = language.trim().toLowerCase();
|
|
688
|
+
if (raw.length === 0) {
|
|
689
|
+
continue;
|
|
690
|
+
}
|
|
691
|
+
deduped.add(normalizeSnippetIntegrityLanguage(raw) ?? raw);
|
|
692
|
+
}
|
|
693
|
+
return [...deduped];
|
|
694
|
+
}
|
|
695
|
+
|
|
696
|
+
function validateSnippetIntegrityConfig(config: RetrievalSnippetIntegrityConfig): void {
|
|
697
|
+
if (typeof config.enabled !== "boolean") {
|
|
698
|
+
throw new Error("invalid retrieval snippet integrity config: enabled must be boolean");
|
|
699
|
+
}
|
|
700
|
+
if (!Array.isArray(config.target_languages) || config.target_languages.length === 0) {
|
|
701
|
+
throw new Error("invalid retrieval snippet integrity config: target_languages must include at least one language");
|
|
702
|
+
}
|
|
703
|
+
for (const language of config.target_languages) {
|
|
704
|
+
if (typeof language !== "string" || language.trim().length === 0) {
|
|
705
|
+
throw new Error("invalid retrieval snippet integrity config: target_languages must contain non-empty strings");
|
|
706
|
+
}
|
|
707
|
+
if (!normalizeSnippetIntegrityLanguage(language)) {
|
|
708
|
+
throw new Error("invalid retrieval snippet integrity config: unsupported target language");
|
|
709
|
+
}
|
|
710
|
+
}
|
|
711
|
+
if (!Number.isInteger(config.max_contiguous_gap_lines) || config.max_contiguous_gap_lines < 0) {
|
|
712
|
+
throw new Error("invalid retrieval snippet integrity config: max_contiguous_gap_lines must be a non-negative integer");
|
|
713
|
+
}
|
|
714
|
+
if (config.marker_template_version !== "v1") {
|
|
715
|
+
throw new Error("invalid retrieval snippet integrity config: marker_template_version must be v1");
|
|
716
|
+
}
|
|
717
|
+
if (typeof config.repair_enabled !== "boolean") {
|
|
718
|
+
throw new Error("invalid retrieval snippet integrity config: repair_enabled must be boolean");
|
|
719
|
+
}
|
|
720
|
+
if (!Number.isInteger(config.repair_max_envelope_lines) || config.repair_max_envelope_lines <= 0) {
|
|
721
|
+
throw new Error("invalid retrieval snippet integrity config: repair_max_envelope_lines must be a positive integer");
|
|
722
|
+
}
|
|
723
|
+
if (!Number.isInteger(config.repair_max_snippet_chars) || config.repair_max_snippet_chars <= 0) {
|
|
724
|
+
throw new Error("invalid retrieval snippet integrity config: repair_max_snippet_chars must be a positive integer");
|
|
725
|
+
}
|
|
726
|
+
}
|
|
727
|
+
|
|
728
|
+
export function mergeRetrievalSnippetIntegrityConfig(
|
|
729
|
+
base: RetrievalSnippetIntegrityConfig,
|
|
730
|
+
overrides?: RetrievalSnippetIntegrityConfigInput
|
|
731
|
+
): RetrievalSnippetIntegrityConfig {
|
|
732
|
+
const next: RetrievalSnippetIntegrityConfig = {
|
|
733
|
+
...base,
|
|
734
|
+
...(overrides ?? {}),
|
|
735
|
+
target_languages: normalizeSnippetIntegrityLanguageList(overrides?.target_languages ?? base.target_languages)
|
|
736
|
+
};
|
|
737
|
+
validateSnippetIntegrityConfig(next);
|
|
738
|
+
return next;
|
|
739
|
+
}
|
|
740
|
+
|
|
492
741
|
function stableSerialize(value: unknown): string {
|
|
493
742
|
if (Array.isArray(value)) {
|
|
494
743
|
return `[${value.map((entry) => stableSerialize(entry)).join(",")}]`;
|
|
@@ -621,6 +870,8 @@ export interface RetrievalCoreOptions {
|
|
|
621
870
|
enhancerConfig?: RetrievalEnhancerConfigInput;
|
|
622
871
|
enhancerGenerationConfig?: RetrievalEnhancerGenerationConfigInput;
|
|
623
872
|
chunkingConfig?: RetrievalChunkingConfigInput;
|
|
873
|
+
contextPackingConfig?: RetrievalContextPackingConfigInput;
|
|
874
|
+
snippetIntegrityConfig?: RetrievalSnippetIntegrityConfigInput;
|
|
624
875
|
enhancerDecisionTraceEnabled?: boolean;
|
|
625
876
|
}
|
|
626
877
|
|
|
@@ -849,10 +1100,25 @@ function singularizeToken(token: string): string | undefined {
|
|
|
849
1100
|
return undefined;
|
|
850
1101
|
}
|
|
851
1102
|
|
|
1103
|
+
function tokenizeLightweight(text: string): string[] {
|
|
1104
|
+
return text
|
|
1105
|
+
.normalize("NFKC")
|
|
1106
|
+
.split(/[^A-Za-z0-9_]+/)
|
|
1107
|
+
.map((token) => token.trim().toLowerCase())
|
|
1108
|
+
.filter(Boolean);
|
|
1109
|
+
}
|
|
1110
|
+
|
|
852
1111
|
function tokenize(text: string): string[] {
|
|
853
1112
|
return tokenizeForRanking(text);
|
|
854
1113
|
}
|
|
855
1114
|
|
|
1115
|
+
function chunkBudgetTokenize(text: string, mode: "ranking" | "lightweight"): string[] {
|
|
1116
|
+
if (mode === "lightweight") {
|
|
1117
|
+
return tokenizeLightweight(text);
|
|
1118
|
+
}
|
|
1119
|
+
return tokenize(text);
|
|
1120
|
+
}
|
|
1121
|
+
|
|
856
1122
|
function lexicalScore(query: string, haystack: string): number {
|
|
857
1123
|
const q = new Set(tokenize(query));
|
|
858
1124
|
if (q.size === 0) {
|
|
@@ -1356,6 +1622,7 @@ interface ChunkBuildReport {
|
|
|
1356
1622
|
language_aware_attempt_latency_ms?: number;
|
|
1357
1623
|
fallback_path_latency_ms?: number;
|
|
1358
1624
|
language?: string;
|
|
1625
|
+
recursive_semantic_chunking_used?: boolean;
|
|
1359
1626
|
}
|
|
1360
1627
|
|
|
1361
1628
|
function buildChunks(file: RawFile, chunkingConfig: RetrievalChunkingConfig): ChunkBuildReport {
|
|
@@ -1371,11 +1638,17 @@ function buildChunks(file: RawFile, chunkingConfig: RetrievalChunkingConfig): Ch
|
|
|
1371
1638
|
fallback_strategy: chunkingConfig.fallback_strategy,
|
|
1372
1639
|
target_chunk_tokens: chunkingConfig.target_chunk_tokens,
|
|
1373
1640
|
chunk_overlap_tokens: chunkingConfig.chunk_overlap_tokens,
|
|
1641
|
+
budget_tokenizer: chunkingConfig.budget_tokenizer,
|
|
1642
|
+
boundary_strictness: chunkingConfig.boundary_strictness,
|
|
1374
1643
|
max_chunks_per_file: MAX_CHUNKS_PER_FILE,
|
|
1375
1644
|
parse_timeout_ms: chunkingConfig.parse_timeout_ms,
|
|
1376
|
-
enabled_languages: chunkingConfig.enabled_languages
|
|
1645
|
+
enabled_languages: chunkingConfig.enabled_languages,
|
|
1646
|
+
recursive_semantic_chunking_enabled: chunkingConfig.recursive_semantic_chunking_enabled,
|
|
1647
|
+
semantic_merge_gap_lines: chunkingConfig.semantic_merge_gap_lines,
|
|
1648
|
+
semantic_merge_max_span_lines: chunkingConfig.semantic_merge_max_span_lines,
|
|
1649
|
+
comment_forward_absorb_enabled: chunkingConfig.comment_forward_absorb_enabled
|
|
1377
1650
|
},
|
|
1378
|
-
tokenize
|
|
1651
|
+
tokenize: (text: string) => chunkBudgetTokenize(text, chunkingConfig.budget_tokenizer)
|
|
1379
1652
|
});
|
|
1380
1653
|
|
|
1381
1654
|
return {
|
|
@@ -1394,10 +1667,38 @@ function buildChunks(file: RawFile, chunkingConfig: RetrievalChunkingConfig): Ch
|
|
|
1394
1667
|
parse_latency_ms: chunkingResult.parse_latency_ms,
|
|
1395
1668
|
language_aware_attempt_latency_ms: chunkingResult.language_aware_attempt_latency_ms,
|
|
1396
1669
|
fallback_path_latency_ms: chunkingResult.fallback_path_latency_ms,
|
|
1397
|
-
language: chunkingResult.language
|
|
1670
|
+
language: chunkingResult.language,
|
|
1671
|
+
recursive_semantic_chunking_used: chunkingResult.recursive_semantic_chunking_used
|
|
1398
1672
|
};
|
|
1399
1673
|
}
|
|
1400
1674
|
|
|
1675
|
+
function buildChunkEmbeddingText(
|
|
1676
|
+
chunk: IndexedChunk,
|
|
1677
|
+
config: RetrievalChunkingConfig,
|
|
1678
|
+
embeddingProviderId: string
|
|
1679
|
+
): string {
|
|
1680
|
+
const isDeterministicProvider = embeddingProviderId.trim().toLowerCase() === "deterministic";
|
|
1681
|
+
if (!config.embedding_context_prefix_enabled || isDeterministicProvider) {
|
|
1682
|
+
return chunk.snippet;
|
|
1683
|
+
}
|
|
1684
|
+
const normalizedPath = normalizePath(chunk.path);
|
|
1685
|
+
const pathParts = normalizedPath.split("/").filter(Boolean);
|
|
1686
|
+
const contextPath = pathParts.length > 2 ? pathParts.slice(-2).join("/") : normalizedPath;
|
|
1687
|
+
const symbol = detectSnippetSymbolName(chunk.snippet);
|
|
1688
|
+
const linesLabel = `${chunk.start_line}-${chunk.end_line}`;
|
|
1689
|
+
const symbolLabel = symbol ? ` > ${symbol}` : "";
|
|
1690
|
+
const prefix = `${contextPath}:${linesLabel}${symbolLabel}`;
|
|
1691
|
+
return `${prefix}\n${chunk.snippet}`;
|
|
1692
|
+
}
|
|
1693
|
+
|
|
1694
|
+
function buildChunkEmbeddingTexts(
|
|
1695
|
+
chunks: IndexedChunk[],
|
|
1696
|
+
config: RetrievalChunkingConfig,
|
|
1697
|
+
embeddingProviderId: string
|
|
1698
|
+
): string[] {
|
|
1699
|
+
return chunks.map((chunk) => buildChunkEmbeddingText(chunk, config, embeddingProviderId));
|
|
1700
|
+
}
|
|
1701
|
+
|
|
1401
1702
|
function pseudoEmbedding(input: string, dimensions = 24): number[] {
|
|
1402
1703
|
const safeDimensions = Math.max(1, dimensions);
|
|
1403
1704
|
let source = sha256(input);
|
|
@@ -3858,6 +4159,827 @@ function compareSearchResults(a: SearchContextOutput["results"][number], b: Sear
|
|
|
3858
4159
|
return a.end_line - b.end_line;
|
|
3859
4160
|
}
|
|
3860
4161
|
|
|
4162
|
+
function compareSearchResultsByLineRange(
|
|
4163
|
+
a: SearchContextOutput["results"][number],
|
|
4164
|
+
b: SearchContextOutput["results"][number]
|
|
4165
|
+
): number {
|
|
4166
|
+
if (a.start_line !== b.start_line) {
|
|
4167
|
+
return a.start_line - b.start_line;
|
|
4168
|
+
}
|
|
4169
|
+
if (a.end_line !== b.end_line) {
|
|
4170
|
+
return a.end_line - b.end_line;
|
|
4171
|
+
}
|
|
4172
|
+
return compareSearchResults(a, b);
|
|
4173
|
+
}
|
|
4174
|
+
|
|
4175
|
+
function mergeSnippetCluster(
|
|
4176
|
+
cluster: SearchContextOutput["results"],
|
|
4177
|
+
mergedStartLine: number,
|
|
4178
|
+
mergedEndLine: number
|
|
4179
|
+
): string {
|
|
4180
|
+
const byRelevance = [...cluster].sort(compareSearchResults);
|
|
4181
|
+
const primary = byRelevance[0];
|
|
4182
|
+
if (!primary) {
|
|
4183
|
+
return "";
|
|
4184
|
+
}
|
|
4185
|
+
const lineMap = new Map<number, { text: string; score: number; rank: number }>();
|
|
4186
|
+
for (let rank = 0; rank < byRelevance.length; rank += 1) {
|
|
4187
|
+
const candidate = byRelevance[rank];
|
|
4188
|
+
if (!candidate) {
|
|
4189
|
+
continue;
|
|
4190
|
+
}
|
|
4191
|
+
const lines = candidate.snippet.replace(/\r\n/g, "\n").split("\n");
|
|
4192
|
+
const expectedLineCount = Math.max(1, candidate.end_line - candidate.start_line + 1);
|
|
4193
|
+
const maxLines = Math.min(lines.length, expectedLineCount);
|
|
4194
|
+
for (let offset = 0; offset < maxLines; offset += 1) {
|
|
4195
|
+
const lineNumber = candidate.start_line + offset;
|
|
4196
|
+
if (lineNumber < mergedStartLine || lineNumber > mergedEndLine) {
|
|
4197
|
+
continue;
|
|
4198
|
+
}
|
|
4199
|
+
const text = lines[offset];
|
|
4200
|
+
if (typeof text !== "string") {
|
|
4201
|
+
continue;
|
|
4202
|
+
}
|
|
4203
|
+
const existing = lineMap.get(lineNumber);
|
|
4204
|
+
if (!existing || candidate.score > existing.score + 1e-9 || (Math.abs(candidate.score - existing.score) <= 1e-9 && rank < existing.rank)) {
|
|
4205
|
+
lineMap.set(lineNumber, { text, score: candidate.score, rank });
|
|
4206
|
+
}
|
|
4207
|
+
}
|
|
4208
|
+
}
|
|
4209
|
+
|
|
4210
|
+
const mergedLines: string[] = [];
|
|
4211
|
+
let missingLines = 0;
|
|
4212
|
+
for (let line = mergedStartLine; line <= mergedEndLine; line += 1) {
|
|
4213
|
+
const entry = lineMap.get(line);
|
|
4214
|
+
if (!entry) {
|
|
4215
|
+
missingLines += 1;
|
|
4216
|
+
mergedLines.push("");
|
|
4217
|
+
continue;
|
|
4218
|
+
}
|
|
4219
|
+
mergedLines.push(entry.text);
|
|
4220
|
+
}
|
|
4221
|
+
|
|
4222
|
+
const totalLines = Math.max(1, mergedEndLine - mergedStartLine + 1);
|
|
4223
|
+
const maxMissingLines = Math.max(2, Math.floor(totalLines * 0.2));
|
|
4224
|
+
if (missingLines > maxMissingLines) {
|
|
4225
|
+
return primary.snippet;
|
|
4226
|
+
}
|
|
4227
|
+
|
|
4228
|
+
return mergedLines.join("\n");
|
|
4229
|
+
}
|
|
4230
|
+
|
|
4231
|
+
function mergeCandidateCluster(cluster: SearchContextOutput["results"]): SearchContextOutput["results"][number] {
|
|
4232
|
+
if (cluster.length === 0) {
|
|
4233
|
+
throw new Error("mergeCandidateCluster requires at least one candidate");
|
|
4234
|
+
}
|
|
4235
|
+
if (cluster.length === 1) {
|
|
4236
|
+
return cluster[0]!;
|
|
4237
|
+
}
|
|
4238
|
+
|
|
4239
|
+
const byRelevance = [...cluster].sort(compareSearchResults);
|
|
4240
|
+
const primary = byRelevance[0]!;
|
|
4241
|
+
const mergedStartLine = Math.min(...cluster.map((candidate) => candidate.start_line));
|
|
4242
|
+
const mergedEndLine = Math.max(...cluster.map((candidate) => candidate.end_line));
|
|
4243
|
+
const stitchedSnippet = mergeSnippetCluster(cluster, mergedStartLine, mergedEndLine);
|
|
4244
|
+
|
|
4245
|
+
return {
|
|
4246
|
+
...primary,
|
|
4247
|
+
start_line: mergedStartLine,
|
|
4248
|
+
end_line: mergedEndLine,
|
|
4249
|
+
snippet: stitchedSnippet.length > 0 ? stitchedSnippet : primary.snippet
|
|
4250
|
+
};
|
|
4251
|
+
}
|
|
4252
|
+
|
|
4253
|
+
const HEAVY_LINE_RANGE_OVERLAP_RATIO = 0.2;
|
|
4254
|
+
|
|
4255
|
+
function lineRangeLength(startLine: number, endLine: number): number {
|
|
4256
|
+
return Math.max(1, endLine - startLine + 1);
|
|
4257
|
+
}
|
|
4258
|
+
|
|
4259
|
+
function lineRangeOverlapLength(
|
|
4260
|
+
aStartLine: number,
|
|
4261
|
+
aEndLine: number,
|
|
4262
|
+
bStartLine: number,
|
|
4263
|
+
bEndLine: number
|
|
4264
|
+
): number {
|
|
4265
|
+
const start = Math.max(aStartLine, bStartLine);
|
|
4266
|
+
const end = Math.min(aEndLine, bEndLine);
|
|
4267
|
+
if (end < start) {
|
|
4268
|
+
return 0;
|
|
4269
|
+
}
|
|
4270
|
+
return end - start + 1;
|
|
4271
|
+
}
|
|
4272
|
+
|
|
4273
|
+
function isHeavilyOverlappingLineRange(
|
|
4274
|
+
candidate: SearchContextOutput["results"][number],
|
|
4275
|
+
selectedRanges: Array<{ start_line: number; end_line: number }>
|
|
4276
|
+
): boolean {
|
|
4277
|
+
for (const selected of selectedRanges) {
|
|
4278
|
+
const overlapLength = lineRangeOverlapLength(
|
|
4279
|
+
selected.start_line,
|
|
4280
|
+
selected.end_line,
|
|
4281
|
+
candidate.start_line,
|
|
4282
|
+
candidate.end_line
|
|
4283
|
+
);
|
|
4284
|
+
if (overlapLength <= 0) {
|
|
4285
|
+
continue;
|
|
4286
|
+
}
|
|
4287
|
+
const smallerRange = Math.min(
|
|
4288
|
+
lineRangeLength(selected.start_line, selected.end_line),
|
|
4289
|
+
lineRangeLength(candidate.start_line, candidate.end_line)
|
|
4290
|
+
);
|
|
4291
|
+
const overlapRatio = overlapLength / Math.max(1, smallerRange);
|
|
4292
|
+
if (overlapRatio >= HEAVY_LINE_RANGE_OVERLAP_RATIO) {
|
|
4293
|
+
return true;
|
|
4294
|
+
}
|
|
4295
|
+
}
|
|
4296
|
+
return false;
|
|
4297
|
+
}
|
|
4298
|
+
|
|
4299
|
+
function mergeLineSpans(spans: Array<{ start_line: number; end_line: number }>): Array<{ start_line: number; end_line: number }> {
|
|
4300
|
+
if (spans.length <= 1) {
|
|
4301
|
+
return [...spans];
|
|
4302
|
+
}
|
|
4303
|
+
const ordered = [...spans]
|
|
4304
|
+
.filter((span) => span.end_line >= span.start_line)
|
|
4305
|
+
.sort((a, b) => a.start_line - b.start_line || a.end_line - b.end_line);
|
|
4306
|
+
const merged: Array<{ start_line: number; end_line: number }> = [];
|
|
4307
|
+
for (const span of ordered) {
|
|
4308
|
+
const last = merged[merged.length - 1];
|
|
4309
|
+
if (!last || span.start_line > last.end_line + 1) {
|
|
4310
|
+
merged.push({ ...span });
|
|
4311
|
+
continue;
|
|
4312
|
+
}
|
|
4313
|
+
last.end_line = Math.max(last.end_line, span.end_line);
|
|
4314
|
+
}
|
|
4315
|
+
return merged;
|
|
4316
|
+
}
|
|
4317
|
+
|
|
4318
|
+
function lineRangeGap(anchor: { start_line: number; end_line: number }, candidate: { start_line: number; end_line: number }): number {
|
|
4319
|
+
if (candidate.start_line > anchor.end_line) {
|
|
4320
|
+
return candidate.start_line - anchor.end_line - 1;
|
|
4321
|
+
}
|
|
4322
|
+
if (anchor.start_line > candidate.end_line) {
|
|
4323
|
+
return anchor.start_line - candidate.end_line - 1;
|
|
4324
|
+
}
|
|
4325
|
+
return 0;
|
|
4326
|
+
}
|
|
4327
|
+
|
|
4328
|
+
function buildPreferredLineMap(candidates: SearchContextOutput["results"]): Map<number, string> {
|
|
4329
|
+
const byRelevance = [...candidates].sort(compareSearchResults);
|
|
4330
|
+
const lineMap = new Map<number, { text: string; score: number; rank: number }>();
|
|
4331
|
+
for (let rank = 0; rank < byRelevance.length; rank += 1) {
|
|
4332
|
+
const candidate = byRelevance[rank];
|
|
4333
|
+
if (!candidate) {
|
|
4334
|
+
continue;
|
|
4335
|
+
}
|
|
4336
|
+
const lines = candidate.snippet.replace(/\r\n/g, "\n").split("\n");
|
|
4337
|
+
const expectedLineCount = Math.max(1, candidate.end_line - candidate.start_line + 1);
|
|
4338
|
+
const maxLines = Math.min(lines.length, expectedLineCount);
|
|
4339
|
+
for (let offset = 0; offset < maxLines; offset += 1) {
|
|
4340
|
+
const lineNumber = candidate.start_line + offset;
|
|
4341
|
+
const text = lines[offset];
|
|
4342
|
+
if (typeof text !== "string") {
|
|
4343
|
+
continue;
|
|
4344
|
+
}
|
|
4345
|
+
const existing = lineMap.get(lineNumber);
|
|
4346
|
+
if (!existing || candidate.score > existing.score + 1e-9 || (Math.abs(candidate.score - existing.score) <= 1e-9 && rank < existing.rank)) {
|
|
4347
|
+
lineMap.set(lineNumber, { text, score: candidate.score, rank });
|
|
4348
|
+
}
|
|
4349
|
+
}
|
|
4350
|
+
}
|
|
4351
|
+
return new Map([...lineMap.entries()].map(([line, value]) => [line, value.text]));
|
|
4352
|
+
}
|
|
4353
|
+
|
|
4354
|
+
function clipSnippetToMaxChars(snippet: string, maxChars: number): string {
|
|
4355
|
+
if (snippet.length <= maxChars) {
|
|
4356
|
+
return snippet;
|
|
4357
|
+
}
|
|
4358
|
+
const clipped = snippet.slice(0, Math.max(0, maxChars));
|
|
4359
|
+
const lastNewline = clipped.lastIndexOf("\n");
|
|
4360
|
+
if (lastNewline > 80) {
|
|
4361
|
+
return clipped.slice(0, lastNewline).trimEnd();
|
|
4362
|
+
}
|
|
4363
|
+
return clipped.trimEnd();
|
|
4364
|
+
}
|
|
4365
|
+
|
|
4366
|
+
function snippetIntegrityLanguageFromPath(path: string): string | undefined {
|
|
4367
|
+
const extension = fileExtension(path);
|
|
4368
|
+
if (extension === ".ts" || extension === ".mts" || extension === ".cts") {
|
|
4369
|
+
return "typescript";
|
|
4370
|
+
}
|
|
4371
|
+
if (extension === ".tsx") {
|
|
4372
|
+
return "tsx";
|
|
4373
|
+
}
|
|
4374
|
+
if (extension === ".js" || extension === ".mjs" || extension === ".cjs") {
|
|
4375
|
+
return "javascript";
|
|
4376
|
+
}
|
|
4377
|
+
if (extension === ".jsx") {
|
|
4378
|
+
return "jsx";
|
|
4379
|
+
}
|
|
4380
|
+
return undefined;
|
|
4381
|
+
}
|
|
4382
|
+
|
|
4383
|
+
function firstNonEmptyLine(snippet: string): string {
|
|
4384
|
+
const lines = snippet.replace(/\r\n/g, "\n").split("\n");
|
|
4385
|
+
for (const line of lines) {
|
|
4386
|
+
const trimmed = line.trim();
|
|
4387
|
+
if (trimmed.length > 0) {
|
|
4388
|
+
return trimmed;
|
|
4389
|
+
}
|
|
4390
|
+
}
|
|
4391
|
+
return "";
|
|
4392
|
+
}
|
|
4393
|
+
|
|
4394
|
+
function lastNonEmptyLine(snippet: string): string {
|
|
4395
|
+
const lines = snippet.replace(/\r\n/g, "\n").split("\n");
|
|
4396
|
+
for (let idx = lines.length - 1; idx >= 0; idx -= 1) {
|
|
4397
|
+
const trimmed = (lines[idx] ?? "").trim();
|
|
4398
|
+
if (trimmed.length > 0) {
|
|
4399
|
+
return trimmed;
|
|
4400
|
+
}
|
|
4401
|
+
}
|
|
4402
|
+
return "";
|
|
4403
|
+
}
|
|
4404
|
+
|
|
4405
|
+
function curlyBraceDelta(snippet: string): number {
|
|
4406
|
+
let opens = 0;
|
|
4407
|
+
let closes = 0;
|
|
4408
|
+
for (const char of snippet) {
|
|
4409
|
+
if (char === "{") {
|
|
4410
|
+
opens += 1;
|
|
4411
|
+
continue;
|
|
4412
|
+
}
|
|
4413
|
+
if (char === "}") {
|
|
4414
|
+
closes += 1;
|
|
4415
|
+
}
|
|
4416
|
+
}
|
|
4417
|
+
return opens - closes;
|
|
4418
|
+
}
|
|
4419
|
+
|
|
4420
|
+
function looksLikeDeclarationStart(line: string): boolean {
|
|
4421
|
+
if (line.length === 0) {
|
|
4422
|
+
return false;
|
|
4423
|
+
}
|
|
4424
|
+
if (line.startsWith("@")) {
|
|
4425
|
+
return true;
|
|
4426
|
+
}
|
|
4427
|
+
return (
|
|
4428
|
+
/^(?:export\s+)?(?:async\s+)?function\s+[A-Za-z_$][\w$]*\s*\(/u.test(line) ||
|
|
4429
|
+
/^(?:export\s+)?(?:default\s+)?class\s+[A-Za-z_$][\w$]*/u.test(line) ||
|
|
4430
|
+
/^(?:export\s+)?(?:const|let|var)\s+[A-Za-z_$][\w$]*\s*=/u.test(line) ||
|
|
4431
|
+
/^(?:public|private|protected|static|readonly|async)\s+[A-Za-z_$][\w$]*\s*\(/u.test(line) ||
|
|
4432
|
+
/^(?:[A-Za-z_$][\w$]*)\s*\([^)]*\)\s*\{/u.test(line)
|
|
4433
|
+
);
|
|
4434
|
+
}
|
|
4435
|
+
|
|
4436
|
+
function looksLikeSnippetTerminalBoundary(line: string): boolean {
|
|
4437
|
+
if (line.length === 0) {
|
|
4438
|
+
return false;
|
|
4439
|
+
}
|
|
4440
|
+
return (
|
|
4441
|
+
line.endsWith("}") ||
|
|
4442
|
+
line.endsWith("};") ||
|
|
4443
|
+
line.endsWith(");") ||
|
|
4444
|
+
line.endsWith("]") ||
|
|
4445
|
+
line.endsWith("];")
|
|
4446
|
+
);
|
|
4447
|
+
}
|
|
4448
|
+
|
|
4449
|
+
function detectSnippetSymbolName(snippet: string): string | undefined {
|
|
4450
|
+
const lines = snippet.replace(/\r\n/g, "\n").split("\n").slice(0, 40);
|
|
4451
|
+
const patterns = [
|
|
4452
|
+
/^(?:export\s+)?(?:async\s+)?function\s+([A-Za-z_$][\w$]*)\s*\(/u,
|
|
4453
|
+
/^(?:export\s+)?(?:default\s+)?class\s+([A-Za-z_$][\w$]*)\b/u,
|
|
4454
|
+
/^(?:export\s+)?(?:const|let|var)\s+([A-Za-z_$][\w$]*)\s*=\s*(?:async\s*)?\([^)]*\)\s*=>/u,
|
|
4455
|
+
/^(?:export\s+)?(?:const|let|var)\s+([A-Za-z_$][\w$]*)\s*=\s*(?:async\s*)?[A-Za-z_$][\w$]*\s*=>/u,
|
|
4456
|
+
/^(?:public|private|protected|static|readonly|async)\s+([A-Za-z_$][\w$]*)\s*\(/u,
|
|
4457
|
+
/^([A-Za-z_$][\w$]*)\s*\([^)]*\)\s*\{/u
|
|
4458
|
+
];
|
|
4459
|
+
const disallowed = new Set(["if", "for", "while", "switch", "catch", "return"]);
|
|
4460
|
+
for (const line of lines) {
|
|
4461
|
+
const trimmed = line.trim();
|
|
4462
|
+
if (trimmed.length === 0) {
|
|
4463
|
+
continue;
|
|
4464
|
+
}
|
|
4465
|
+
for (const pattern of patterns) {
|
|
4466
|
+
const match = trimmed.match(pattern);
|
|
4467
|
+
const symbol = match?.[1];
|
|
4468
|
+
if (symbol && !disallowed.has(symbol)) {
|
|
4469
|
+
return symbol;
|
|
4470
|
+
}
|
|
4471
|
+
}
|
|
4472
|
+
}
|
|
4473
|
+
return undefined;
|
|
4474
|
+
}
|
|
4475
|
+
|
|
4476
|
+
function shouldAnnotateSnippetAsTruncated(
|
|
4477
|
+
result: SearchResultRow,
|
|
4478
|
+
omittedBefore: number,
|
|
4479
|
+
omittedAfter: number
|
|
4480
|
+
): boolean {
|
|
4481
|
+
if (omittedBefore <= 0 && omittedAfter <= 0) {
|
|
4482
|
+
return false;
|
|
4483
|
+
}
|
|
4484
|
+
const firstLine = firstNonEmptyLine(result.snippet);
|
|
4485
|
+
const lastLine = lastNonEmptyLine(result.snippet);
|
|
4486
|
+
if (omittedBefore > 0 && !looksLikeDeclarationStart(firstLine)) {
|
|
4487
|
+
return true;
|
|
4488
|
+
}
|
|
4489
|
+
if (omittedAfter > 0) {
|
|
4490
|
+
if (curlyBraceDelta(result.snippet) > 0) {
|
|
4491
|
+
return true;
|
|
4492
|
+
}
|
|
4493
|
+
if (!looksLikeSnippetTerminalBoundary(lastLine)) {
|
|
4494
|
+
return true;
|
|
4495
|
+
}
|
|
4496
|
+
}
|
|
4497
|
+
return omittedBefore > 0 && omittedAfter > 0;
|
|
4498
|
+
}
|
|
4499
|
+
|
|
4500
|
+
function estimateContiguousEnvelope(input: {
|
|
4501
|
+
anchor: SearchResultRow;
|
|
4502
|
+
candidates: SearchResultRow[];
|
|
4503
|
+
maxGapLines: number;
|
|
4504
|
+
}): { start_line: number; end_line: number } {
|
|
4505
|
+
let start = input.anchor.start_line;
|
|
4506
|
+
let end = input.anchor.end_line;
|
|
4507
|
+
let changed = true;
|
|
4508
|
+
while (changed) {
|
|
4509
|
+
changed = false;
|
|
4510
|
+
for (const candidate of input.candidates) {
|
|
4511
|
+
const gap = lineRangeGap({ start_line: start, end_line: end }, candidate);
|
|
4512
|
+
if (gap > input.maxGapLines) {
|
|
4513
|
+
continue;
|
|
4514
|
+
}
|
|
4515
|
+
const nextStart = Math.min(start, candidate.start_line);
|
|
4516
|
+
const nextEnd = Math.max(end, candidate.end_line);
|
|
4517
|
+
if (nextStart !== start || nextEnd !== end) {
|
|
4518
|
+
start = nextStart;
|
|
4519
|
+
end = nextEnd;
|
|
4520
|
+
changed = true;
|
|
4521
|
+
}
|
|
4522
|
+
}
|
|
4523
|
+
}
|
|
4524
|
+
return { start_line: start, end_line: end };
|
|
4525
|
+
}
|
|
4526
|
+
|
|
4527
|
+
function repairSnippetFromEnvelope(input: {
|
|
4528
|
+
anchor: SearchResultRow;
|
|
4529
|
+
envelope: { start_line: number; end_line: number };
|
|
4530
|
+
samePathCandidates: SearchResultRow[];
|
|
4531
|
+
config: RetrievalSnippetIntegrityConfig;
|
|
4532
|
+
}): { repaired?: SearchResultRow; reason?: string; clipped: boolean } {
|
|
4533
|
+
const envelopeSpan = input.envelope.end_line - input.envelope.start_line + 1;
|
|
4534
|
+
if (envelopeSpan > input.config.repair_max_envelope_lines) {
|
|
4535
|
+
return { reason: "envelope_cap_exceeded", clipped: false };
|
|
4536
|
+
}
|
|
4537
|
+
|
|
4538
|
+
const envelopeCandidates = input.samePathCandidates
|
|
4539
|
+
.filter((candidate) => candidate.end_line >= input.envelope.start_line && candidate.start_line <= input.envelope.end_line)
|
|
4540
|
+
.sort(compareSearchResultsByLineRange);
|
|
4541
|
+
if (envelopeCandidates.length === 0) {
|
|
4542
|
+
return { reason: "no_envelope_candidates", clipped: false };
|
|
4543
|
+
}
|
|
4544
|
+
|
|
4545
|
+
const lineMap = buildPreferredLineMap(envelopeCandidates);
|
|
4546
|
+
const renderedLines: string[] = [];
|
|
4547
|
+
let missingLines = 0;
|
|
4548
|
+
for (let line = input.envelope.start_line; line <= input.envelope.end_line; line += 1) {
|
|
4549
|
+
const text = lineMap.get(line);
|
|
4550
|
+
if (typeof text !== "string") {
|
|
4551
|
+
missingLines += 1;
|
|
4552
|
+
renderedLines.push("");
|
|
4553
|
+
continue;
|
|
4554
|
+
}
|
|
4555
|
+
renderedLines.push(text);
|
|
4556
|
+
}
|
|
4557
|
+
|
|
4558
|
+
const maxMissingLines = Math.max(2, Math.floor(envelopeSpan * 0.2));
|
|
4559
|
+
if (missingLines > maxMissingLines) {
|
|
4560
|
+
return { reason: "missing_line_density_too_high", clipped: false };
|
|
4561
|
+
}
|
|
4562
|
+
|
|
4563
|
+
const clippedLines: string[] = [];
|
|
4564
|
+
let usedChars = 0;
|
|
4565
|
+
let clipped = false;
|
|
4566
|
+
for (let index = 0; index < renderedLines.length; index += 1) {
|
|
4567
|
+
const line = renderedLines[index] ?? "";
|
|
4568
|
+
const additionalChars = index === 0 ? line.length : line.length + 1;
|
|
4569
|
+
if (clippedLines.length > 0 && usedChars + additionalChars > input.config.repair_max_snippet_chars) {
|
|
4570
|
+
clipped = true;
|
|
4571
|
+
break;
|
|
4572
|
+
}
|
|
4573
|
+
if (clippedLines.length === 0 && line.length > input.config.repair_max_snippet_chars) {
|
|
4574
|
+
const clippedLine = line.slice(0, input.config.repair_max_snippet_chars);
|
|
4575
|
+
if (clippedLine.length === 0) {
|
|
4576
|
+
return { reason: "snippet_char_cap_exceeded", clipped: false };
|
|
4577
|
+
}
|
|
4578
|
+
clippedLines.push(clippedLine);
|
|
4579
|
+
usedChars = clippedLine.length;
|
|
4580
|
+
clipped = true;
|
|
4581
|
+
break;
|
|
4582
|
+
}
|
|
4583
|
+
clippedLines.push(line);
|
|
4584
|
+
usedChars += additionalChars;
|
|
4585
|
+
}
|
|
4586
|
+
|
|
4587
|
+
if (clippedLines.length === 0) {
|
|
4588
|
+
return { reason: "snippet_char_cap_exceeded", clipped: false };
|
|
4589
|
+
}
|
|
4590
|
+
|
|
4591
|
+
const repairedSnippet = clippedLines.join("\n").trimEnd();
|
|
4592
|
+
if (repairedSnippet.length === 0) {
|
|
4593
|
+
return { reason: "empty_repaired_snippet", clipped: false };
|
|
4594
|
+
}
|
|
4595
|
+
|
|
4596
|
+
const repairedEndLine = input.envelope.start_line + clippedLines.length - 1;
|
|
4597
|
+
return {
|
|
4598
|
+
repaired: {
|
|
4599
|
+
...input.anchor,
|
|
4600
|
+
start_line: input.envelope.start_line,
|
|
4601
|
+
end_line: repairedEndLine,
|
|
4602
|
+
snippet: repairedSnippet
|
|
4603
|
+
},
|
|
4604
|
+
clipped
|
|
4605
|
+
};
|
|
4606
|
+
}
|
|
4607
|
+
|
|
4608
|
+
function buildSnippetTruncationMarker(input: {
|
|
4609
|
+
result: SearchResultRow;
|
|
4610
|
+
symbolName?: string;
|
|
4611
|
+
envelope_start_line: number;
|
|
4612
|
+
envelope_end_line: number;
|
|
4613
|
+
marker_template_version: "v1";
|
|
4614
|
+
}): string {
|
|
4615
|
+
const estimatedTotalLines = Math.max(1, input.envelope_end_line - input.envelope_start_line + 1);
|
|
4616
|
+
const omittedBefore = Math.max(0, input.result.start_line - input.envelope_start_line);
|
|
4617
|
+
const omittedAfter = Math.max(0, input.envelope_end_line - input.result.end_line);
|
|
4618
|
+
return `// [truncated:${input.marker_template_version} symbol=${input.symbolName ?? "unknown"} estimated_span=${
|
|
4619
|
+
input.envelope_start_line
|
|
4620
|
+
}-${input.envelope_end_line} estimated_total_lines=${estimatedTotalLines} omitted_before=${omittedBefore} omitted_after=${omittedAfter} through_line=${input.result.end_line}]`;
|
|
4621
|
+
}
|
|
4622
|
+
|
|
4623
|
+
function annotateSearchResultsWithSnippetIntegrity(input: {
|
|
4624
|
+
selected: SearchResultRow[];
|
|
4625
|
+
sourceCandidates: SearchResultRow[];
|
|
4626
|
+
config: RetrievalSnippetIntegrityConfig;
|
|
4627
|
+
observability: Observability;
|
|
4628
|
+
retrievalProfileId: string;
|
|
4629
|
+
}): SearchResultRow[] {
|
|
4630
|
+
if (!input.config.enabled || input.selected.length === 0) {
|
|
4631
|
+
return [...input.selected];
|
|
4632
|
+
}
|
|
4633
|
+
|
|
4634
|
+
const enabledLanguages = new Set(normalizeSnippetIntegrityLanguageList(input.config.target_languages));
|
|
4635
|
+
if (enabledLanguages.size === 0) {
|
|
4636
|
+
return [...input.selected];
|
|
4637
|
+
}
|
|
4638
|
+
|
|
4639
|
+
const sourceByPath = new Map<string, SearchResultRow[]>();
|
|
4640
|
+
for (const candidate of input.sourceCandidates) {
|
|
4641
|
+
const rows = sourceByPath.get(candidate.path);
|
|
4642
|
+
if (rows) {
|
|
4643
|
+
rows.push(candidate);
|
|
4644
|
+
} else {
|
|
4645
|
+
sourceByPath.set(candidate.path, [candidate]);
|
|
4646
|
+
}
|
|
4647
|
+
}
|
|
4648
|
+
|
|
4649
|
+
return input.selected.map((result) => {
|
|
4650
|
+
const language = snippetIntegrityLanguageFromPath(result.path);
|
|
4651
|
+
if (!language || !enabledLanguages.has(language)) {
|
|
4652
|
+
return result;
|
|
4653
|
+
}
|
|
4654
|
+
const samePath = sourceByPath.get(result.path) ?? [result];
|
|
4655
|
+
if (samePath.length <= 1) {
|
|
4656
|
+
return result;
|
|
4657
|
+
}
|
|
4658
|
+
const envelope = estimateContiguousEnvelope({
|
|
4659
|
+
anchor: result,
|
|
4660
|
+
candidates: samePath,
|
|
4661
|
+
maxGapLines: input.config.max_contiguous_gap_lines
|
|
4662
|
+
});
|
|
4663
|
+
const originalOmittedBefore = Math.max(0, result.start_line - envelope.start_line);
|
|
4664
|
+
const originalOmittedAfter = Math.max(0, envelope.end_line - result.end_line);
|
|
4665
|
+
const originalLooksTruncated = shouldAnnotateSnippetAsTruncated(result, originalOmittedBefore, originalOmittedAfter);
|
|
4666
|
+
if (!originalLooksTruncated) {
|
|
4667
|
+
return result;
|
|
4668
|
+
}
|
|
4669
|
+
|
|
4670
|
+
const envelopeCandidates = samePath
|
|
4671
|
+
.filter((candidate) => candidate.end_line >= envelope.start_line && candidate.start_line <= envelope.end_line)
|
|
4672
|
+
.sort(compareSearchResultsByLineRange);
|
|
4673
|
+
let assembled = result;
|
|
4674
|
+
|
|
4675
|
+
if (input.config.repair_enabled) {
|
|
4676
|
+
input.observability.metrics.increment("retrieval_snippet_repair_attempt_total", 1, {
|
|
4677
|
+
retrieval_profile_id: input.retrievalProfileId,
|
|
4678
|
+
language
|
|
4679
|
+
});
|
|
4680
|
+
const repairOutcome = repairSnippetFromEnvelope({
|
|
4681
|
+
anchor: result,
|
|
4682
|
+
envelope,
|
|
4683
|
+
samePathCandidates: samePath,
|
|
4684
|
+
config: input.config
|
|
4685
|
+
});
|
|
4686
|
+
if (repairOutcome.repaired) {
|
|
4687
|
+
assembled = repairOutcome.repaired;
|
|
4688
|
+
input.observability.metrics.increment("retrieval_snippet_repair_success_total", 1, {
|
|
4689
|
+
retrieval_profile_id: input.retrievalProfileId,
|
|
4690
|
+
language,
|
|
4691
|
+
clipped: repairOutcome.clipped ? "true" : "false"
|
|
4692
|
+
});
|
|
4693
|
+
input.observability.logger.info("snippet integrity repair decision", {
|
|
4694
|
+
retrieval_profile_id: input.retrievalProfileId,
|
|
4695
|
+
path: result.path,
|
|
4696
|
+
language,
|
|
4697
|
+
envelope_start_line: envelope.start_line,
|
|
4698
|
+
envelope_end_line: envelope.end_line,
|
|
4699
|
+
envelope_span_lines: envelope.end_line - envelope.start_line + 1,
|
|
4700
|
+
status: "repaired",
|
|
4701
|
+
clipped: repairOutcome.clipped
|
|
4702
|
+
});
|
|
4703
|
+
} else {
|
|
4704
|
+
input.observability.logger.info("snippet integrity repair decision", {
|
|
4705
|
+
retrieval_profile_id: input.retrievalProfileId,
|
|
4706
|
+
path: result.path,
|
|
4707
|
+
language,
|
|
4708
|
+
envelope_start_line: envelope.start_line,
|
|
4709
|
+
envelope_end_line: envelope.end_line,
|
|
4710
|
+
envelope_span_lines: envelope.end_line - envelope.start_line + 1,
|
|
4711
|
+
status: "repair_skipped",
|
|
4712
|
+
reason: repairOutcome.reason ?? "unknown"
|
|
4713
|
+
});
|
|
4714
|
+
}
|
|
4715
|
+
}
|
|
4716
|
+
|
|
4717
|
+
const omittedBefore = Math.max(0, assembled.start_line - envelope.start_line);
|
|
4718
|
+
const omittedAfter = Math.max(0, envelope.end_line - assembled.end_line);
|
|
4719
|
+
if (!shouldAnnotateSnippetAsTruncated(assembled, omittedBefore, omittedAfter)) {
|
|
4720
|
+
return assembled;
|
|
4721
|
+
}
|
|
4722
|
+
|
|
4723
|
+
let symbolName = detectSnippetSymbolName(assembled.snippet);
|
|
4724
|
+
if (!symbolName) {
|
|
4725
|
+
for (const candidate of envelopeCandidates) {
|
|
4726
|
+
symbolName = detectSnippetSymbolName(candidate.snippet);
|
|
4727
|
+
if (symbolName) {
|
|
4728
|
+
break;
|
|
4729
|
+
}
|
|
4730
|
+
}
|
|
4731
|
+
}
|
|
4732
|
+
const marker = buildSnippetTruncationMarker({
|
|
4733
|
+
result: assembled,
|
|
4734
|
+
symbolName,
|
|
4735
|
+
envelope_start_line: envelope.start_line,
|
|
4736
|
+
envelope_end_line: envelope.end_line,
|
|
4737
|
+
marker_template_version: input.config.marker_template_version
|
|
4738
|
+
});
|
|
4739
|
+
input.observability.metrics.increment("retrieval_snippet_repair_fallback_marker_total", 1, {
|
|
4740
|
+
retrieval_profile_id: input.retrievalProfileId,
|
|
4741
|
+
language
|
|
4742
|
+
});
|
|
4743
|
+
input.observability.metrics.increment("retrieval_snippet_truncation_marker_total", 1, {
|
|
4744
|
+
retrieval_profile_id: input.retrievalProfileId,
|
|
4745
|
+
language,
|
|
4746
|
+
symbol_detected: symbolName ? "true" : "false",
|
|
4747
|
+
marker_template_version: input.config.marker_template_version
|
|
4748
|
+
});
|
|
4749
|
+
input.observability.metrics.observe("retrieval_snippet_omitted_after_lines", omittedAfter, {
|
|
4750
|
+
retrieval_profile_id: input.retrievalProfileId,
|
|
4751
|
+
language
|
|
4752
|
+
});
|
|
4753
|
+
const baseSnippet = assembled.snippet.trimEnd();
|
|
4754
|
+
return {
|
|
4755
|
+
...assembled,
|
|
4756
|
+
snippet: baseSnippet.length > 0 ? `${baseSnippet}\n${marker}` : marker
|
|
4757
|
+
};
|
|
4758
|
+
});
|
|
4759
|
+
}
|
|
4760
|
+
|
|
4761
|
+
function packSearchResultsWithContext(input: {
|
|
4762
|
+
selected: SearchContextOutput["results"];
|
|
4763
|
+
sourceCandidates: SearchContextOutput["results"];
|
|
4764
|
+
config: RetrievalContextPackingConfig;
|
|
4765
|
+
}): SearchContextOutput["results"] {
|
|
4766
|
+
if (!input.config.enabled || input.selected.length === 0) {
|
|
4767
|
+
return [...input.selected];
|
|
4768
|
+
}
|
|
4769
|
+
|
|
4770
|
+
const sourceByPath = new Map<string, SearchContextOutput["results"]>();
|
|
4771
|
+
for (const candidate of input.sourceCandidates) {
|
|
4772
|
+
const rows = sourceByPath.get(candidate.path);
|
|
4773
|
+
if (rows) {
|
|
4774
|
+
rows.push(candidate);
|
|
4775
|
+
} else {
|
|
4776
|
+
sourceByPath.set(candidate.path, [candidate]);
|
|
4777
|
+
}
|
|
4778
|
+
}
|
|
4779
|
+
|
|
4780
|
+
return input.selected.map((anchor) => {
|
|
4781
|
+
const samePath = sourceByPath.get(anchor.path) ?? [anchor];
|
|
4782
|
+
if (samePath.length <= 1 || input.config.max_spans_per_result <= 1) {
|
|
4783
|
+
return anchor;
|
|
4784
|
+
}
|
|
4785
|
+
|
|
4786
|
+
const anchorRange = { start_line: anchor.start_line, end_line: anchor.end_line };
|
|
4787
|
+
const candidates = samePath
|
|
4788
|
+
.filter(
|
|
4789
|
+
(candidate) =>
|
|
4790
|
+
!(candidate.start_line === anchor.start_line && candidate.end_line === anchor.end_line) &&
|
|
4791
|
+
!isHeavilyOverlappingLineRange(candidate, [anchorRange]) &&
|
|
4792
|
+
lineRangeGap(anchorRange, candidate) <= input.config.max_gap_lines
|
|
4793
|
+
)
|
|
4794
|
+
.sort((a, b) => {
|
|
4795
|
+
const relevanceDiff = compareSearchResults(a, b);
|
|
4796
|
+
if (relevanceDiff !== 0) {
|
|
4797
|
+
return relevanceDiff;
|
|
4798
|
+
}
|
|
4799
|
+
return lineRangeGap(anchorRange, a) - lineRangeGap(anchorRange, b);
|
|
4800
|
+
});
|
|
4801
|
+
|
|
4802
|
+
const spans: Array<{ start_line: number; end_line: number }> = [{ ...anchorRange }];
|
|
4803
|
+
for (const candidate of candidates) {
|
|
4804
|
+
if (spans.length >= input.config.max_spans_per_result) {
|
|
4805
|
+
break;
|
|
4806
|
+
}
|
|
4807
|
+
const nextSpan = { start_line: candidate.start_line, end_line: candidate.end_line };
|
|
4808
|
+
const nextEnvelope = mergeLineSpans([...spans, nextSpan]);
|
|
4809
|
+
if (
|
|
4810
|
+
nextEnvelope.some((span, idx) => idx > 0 && span.start_line - (nextEnvelope[idx - 1]?.end_line ?? span.start_line) - 1 > input.config.max_gap_lines)
|
|
4811
|
+
) {
|
|
4812
|
+
continue;
|
|
4813
|
+
}
|
|
4814
|
+
spans.push(nextSpan);
|
|
4815
|
+
}
|
|
4816
|
+
|
|
4817
|
+
const mergedSpans = mergeLineSpans(spans);
|
|
4818
|
+
if (mergedSpans.length <= 1) {
|
|
4819
|
+
return anchor;
|
|
4820
|
+
}
|
|
4821
|
+
|
|
4822
|
+
const lineMap = buildPreferredLineMap([anchor, ...samePath]);
|
|
4823
|
+
const renderedLines: string[] = [];
|
|
4824
|
+
let contentLineCount = 0;
|
|
4825
|
+
let elisionCount = 0;
|
|
4826
|
+
for (let index = 0; index < mergedSpans.length; index += 1) {
|
|
4827
|
+
const span = mergedSpans[index];
|
|
4828
|
+
if (!span) {
|
|
4829
|
+
continue;
|
|
4830
|
+
}
|
|
4831
|
+
if (index > 0) {
|
|
4832
|
+
const previous = mergedSpans[index - 1];
|
|
4833
|
+
if (previous && span.start_line - previous.end_line > 0) {
|
|
4834
|
+
renderedLines.push("...");
|
|
4835
|
+
elisionCount += 1;
|
|
4836
|
+
}
|
|
4837
|
+
}
|
|
4838
|
+
for (let line = span.start_line; line <= span.end_line; line += 1) {
|
|
4839
|
+
renderedLines.push(lineMap.get(line) ?? "");
|
|
4840
|
+
contentLineCount += 1;
|
|
4841
|
+
}
|
|
4842
|
+
}
|
|
4843
|
+
if (renderedLines.length === 0) {
|
|
4844
|
+
return anchor;
|
|
4845
|
+
}
|
|
4846
|
+
|
|
4847
|
+
const elisionDensity = elisionCount / Math.max(1, contentLineCount + elisionCount);
|
|
4848
|
+
if (elisionDensity > 0.25) {
|
|
4849
|
+
return anchor;
|
|
4850
|
+
}
|
|
4851
|
+
|
|
4852
|
+
const packedSnippet = clipSnippetToMaxChars(renderedLines.join("\n"), input.config.max_snippet_chars);
|
|
4853
|
+
if (packedSnippet.length === 0) {
|
|
4854
|
+
return anchor;
|
|
4855
|
+
}
|
|
4856
|
+
const packedStart = mergedSpans[0]?.start_line ?? anchor.start_line;
|
|
4857
|
+
const packedEnd = mergedSpans[mergedSpans.length - 1]?.end_line ?? anchor.end_line;
|
|
4858
|
+
|
|
4859
|
+
return {
|
|
4860
|
+
...anchor,
|
|
4861
|
+
start_line: packedStart,
|
|
4862
|
+
end_line: packedEnd,
|
|
4863
|
+
snippet: packedSnippet,
|
|
4864
|
+
reason: `${anchor.reason} + contextual spans`
|
|
4865
|
+
};
|
|
4866
|
+
});
|
|
4867
|
+
}
|
|
4868
|
+
|
|
4869
|
+
function mergeOverlappingCandidates(
|
|
4870
|
+
candidates: SearchContextOutput["results"],
|
|
4871
|
+
config: RetrievalRerankConfig
|
|
4872
|
+
): SearchContextOutput["results"] {
|
|
4873
|
+
if (!config.merge_overlapping_chunks_enabled || candidates.length <= 1) {
|
|
4874
|
+
return [...candidates];
|
|
4875
|
+
}
|
|
4876
|
+
|
|
4877
|
+
const byPath = new Map<string, SearchContextOutput["results"]>();
|
|
4878
|
+
for (const candidate of candidates) {
|
|
4879
|
+
const group = byPath.get(candidate.path);
|
|
4880
|
+
if (group) {
|
|
4881
|
+
group.push(candidate);
|
|
4882
|
+
} else {
|
|
4883
|
+
byPath.set(candidate.path, [candidate]);
|
|
4884
|
+
}
|
|
4885
|
+
}
|
|
4886
|
+
|
|
4887
|
+
const merged: SearchContextOutput["results"] = [];
|
|
4888
|
+
for (const group of byPath.values()) {
|
|
4889
|
+
const ordered = [...group].sort(compareSearchResultsByLineRange);
|
|
4890
|
+
let cluster: SearchContextOutput["results"] = [];
|
|
4891
|
+
let clusterStart = 0;
|
|
4892
|
+
let clusterEnd = 0;
|
|
4893
|
+
|
|
4894
|
+
const flush = (): void => {
|
|
4895
|
+
if (cluster.length === 0) {
|
|
4896
|
+
return;
|
|
4897
|
+
}
|
|
4898
|
+
merged.push(mergeCandidateCluster(cluster));
|
|
4899
|
+
cluster = [];
|
|
4900
|
+
};
|
|
4901
|
+
|
|
4902
|
+
for (const candidate of ordered) {
|
|
4903
|
+
if (cluster.length === 0) {
|
|
4904
|
+
cluster = [candidate];
|
|
4905
|
+
clusterStart = candidate.start_line;
|
|
4906
|
+
clusterEnd = candidate.end_line;
|
|
4907
|
+
continue;
|
|
4908
|
+
}
|
|
4909
|
+
|
|
4910
|
+
const nextStart = Math.min(clusterStart, candidate.start_line);
|
|
4911
|
+
const nextEnd = Math.max(clusterEnd, candidate.end_line);
|
|
4912
|
+
const nextSpan = nextEnd - nextStart + 1;
|
|
4913
|
+
const gapLines = Math.max(0, candidate.start_line - clusterEnd - 1);
|
|
4914
|
+
const canMerge = gapLines <= config.merge_gap_lines && nextSpan <= config.merge_max_span_lines;
|
|
4915
|
+
|
|
4916
|
+
if (!canMerge) {
|
|
4917
|
+
flush();
|
|
4918
|
+
cluster = [candidate];
|
|
4919
|
+
clusterStart = candidate.start_line;
|
|
4920
|
+
clusterEnd = candidate.end_line;
|
|
4921
|
+
continue;
|
|
4922
|
+
}
|
|
4923
|
+
|
|
4924
|
+
cluster.push(candidate);
|
|
4925
|
+
clusterStart = nextStart;
|
|
4926
|
+
clusterEnd = nextEnd;
|
|
4927
|
+
}
|
|
4928
|
+
|
|
4929
|
+
flush();
|
|
4930
|
+
}
|
|
4931
|
+
|
|
4932
|
+
return merged.sort(compareSearchResults);
|
|
4933
|
+
}
|
|
4934
|
+
|
|
4935
|
+
function applySmartCutoffCandidates(
|
|
4936
|
+
candidates: SearchContextOutput["results"],
|
|
4937
|
+
config: RetrievalRerankConfig
|
|
4938
|
+
): SearchContextOutput["results"] {
|
|
4939
|
+
if (!config.smart_cutoff_enabled || candidates.length === 0) {
|
|
4940
|
+
return [...candidates];
|
|
4941
|
+
}
|
|
4942
|
+
const ordered = [...candidates].sort(compareSearchResults);
|
|
4943
|
+
const minK = Math.max(1, config.smart_cutoff_min_k);
|
|
4944
|
+
const maxK = Math.max(minK, config.smart_cutoff_max_k);
|
|
4945
|
+
const topScore = ordered[0]?.score ?? Number.NEGATIVE_INFINITY;
|
|
4946
|
+
const kept: SearchContextOutput["results"] = [];
|
|
4947
|
+
|
|
4948
|
+
for (let index = 0; index < ordered.length; index += 1) {
|
|
4949
|
+
const candidate = ordered[index];
|
|
4950
|
+
if (!candidate) {
|
|
4951
|
+
continue;
|
|
4952
|
+
}
|
|
4953
|
+
if (kept.length >= maxK) {
|
|
4954
|
+
break;
|
|
4955
|
+
}
|
|
4956
|
+
if (kept.length < minK) {
|
|
4957
|
+
kept.push(candidate);
|
|
4958
|
+
continue;
|
|
4959
|
+
}
|
|
4960
|
+
if (candidate.score < config.smart_cutoff_min_score) {
|
|
4961
|
+
break;
|
|
4962
|
+
}
|
|
4963
|
+
if (candidate.score < topScore * config.smart_cutoff_top_ratio) {
|
|
4964
|
+
break;
|
|
4965
|
+
}
|
|
4966
|
+
const previous = ordered[index - 1];
|
|
4967
|
+
if (previous && previous.score - candidate.score > config.smart_cutoff_delta_abs) {
|
|
4968
|
+
break;
|
|
4969
|
+
}
|
|
4970
|
+
kept.push(candidate);
|
|
4971
|
+
}
|
|
4972
|
+
|
|
4973
|
+
return kept;
|
|
4974
|
+
}
|
|
4975
|
+
|
|
4976
|
+
export function __applySmartCutoffCandidatesForTests(input: {
|
|
4977
|
+
candidates: SearchContextOutput["results"];
|
|
4978
|
+
config: RetrievalRerankConfig;
|
|
4979
|
+
}): SearchContextOutput["results"] {
|
|
4980
|
+
return applySmartCutoffCandidates(input.candidates, input.config);
|
|
4981
|
+
}
|
|
4982
|
+
|
|
3861
4983
|
function dedupeEnhancerCandidatesByPath(results: SearchContextOutput["results"]): SearchContextOutput["results"] {
|
|
3862
4984
|
const byPath = new Map<string, SearchContextOutput["results"][number]>();
|
|
3863
4985
|
for (const result of results) {
|
|
@@ -4248,11 +5370,14 @@ function deterministicEnhancerFallbackRanking(input: {
|
|
|
4248
5370
|
return [...preferred, ...tolerated, ...avoided];
|
|
4249
5371
|
}
|
|
4250
5372
|
|
|
4251
|
-
function trimToContextBudget(
|
|
5373
|
+
function trimToContextBudget(
|
|
5374
|
+
results: SearchContextOutput["results"],
|
|
5375
|
+
budgetTokenizerMode: "ranking" | "lightweight"
|
|
5376
|
+
): SearchContextOutput["results"] {
|
|
4252
5377
|
let total = 0;
|
|
4253
5378
|
const out: SearchContextOutput["results"] = [];
|
|
4254
5379
|
for (const result of results) {
|
|
4255
|
-
total +=
|
|
5380
|
+
total += chunkBudgetTokenize(result.snippet, budgetTokenizerMode).length;
|
|
4256
5381
|
if (total > MAX_CONTEXT_BUDGET_TOKENS) {
|
|
4257
5382
|
break;
|
|
4258
5383
|
}
|
|
@@ -4857,6 +5982,8 @@ export class RetrievalCore {
|
|
|
4857
5982
|
private readonly enhancerConfig: RetrievalEnhancerConfig;
|
|
4858
5983
|
private readonly enhancerGenerationConfig: RetrievalEnhancerGenerationConfig;
|
|
4859
5984
|
private readonly chunkingConfig: RetrievalChunkingConfig;
|
|
5985
|
+
private readonly contextPackingConfig: RetrievalContextPackingConfig;
|
|
5986
|
+
private readonly snippetIntegrityConfig: RetrievalSnippetIntegrityConfig;
|
|
4860
5987
|
private readonly enhancerDecisionTraceEnabled: boolean;
|
|
4861
5988
|
private cacheHits = 0;
|
|
4862
5989
|
private cacheMisses = 0;
|
|
@@ -4898,6 +6025,14 @@ export class RetrievalCore {
|
|
|
4898
6025
|
options?.enhancerGenerationConfig
|
|
4899
6026
|
);
|
|
4900
6027
|
this.chunkingConfig = mergeRetrievalChunkingConfig(DEFAULT_RETRIEVAL_CHUNKING_CONFIG, options?.chunkingConfig);
|
|
6028
|
+
this.contextPackingConfig = mergeRetrievalContextPackingConfig(
|
|
6029
|
+
DEFAULT_RETRIEVAL_CONTEXT_PACKING_CONFIG,
|
|
6030
|
+
options?.contextPackingConfig
|
|
6031
|
+
);
|
|
6032
|
+
this.snippetIntegrityConfig = mergeRetrievalSnippetIntegrityConfig(
|
|
6033
|
+
DEFAULT_RETRIEVAL_SNIPPET_INTEGRITY_CONFIG,
|
|
6034
|
+
options?.snippetIntegrityConfig
|
|
6035
|
+
);
|
|
4901
6036
|
this.enhancerDecisionTraceEnabled = Boolean(options?.enhancerDecisionTraceEnabled);
|
|
4902
6037
|
}
|
|
4903
6038
|
|
|
@@ -5092,6 +6227,12 @@ export class RetrievalCore {
|
|
|
5092
6227
|
language: chunkLanguage,
|
|
5093
6228
|
reason: chunkBuild.fallback_reason ?? "none"
|
|
5094
6229
|
});
|
|
6230
|
+
if (chunkBuild.recursive_semantic_chunking_used) {
|
|
6231
|
+
this.observability.metrics.increment("index_recursive_semantic_chunking_used_total", 1, {
|
|
6232
|
+
tenant_id: artifact.tenant_id,
|
|
6233
|
+
language: chunkLanguage
|
|
6234
|
+
});
|
|
6235
|
+
}
|
|
5095
6236
|
if (chunkBuild.fallback_reason) {
|
|
5096
6237
|
this.observability.metrics.increment("index_chunking_fallback_total", 1, {
|
|
5097
6238
|
tenant_id: artifact.tenant_id,
|
|
@@ -5123,7 +6264,8 @@ export class RetrievalCore {
|
|
|
5123
6264
|
reason: chunkBuild.fallback_reason
|
|
5124
6265
|
});
|
|
5125
6266
|
}
|
|
5126
|
-
const
|
|
6267
|
+
const embeddingTexts = buildChunkEmbeddingTexts(chunks, this.chunkingConfig, this.embeddingDescriptor.provider);
|
|
6268
|
+
const estimatedEmbeddingTokens = embeddingTexts.reduce((sum, text) => sum + tokenize(text).length, 0);
|
|
5127
6269
|
this.observability.metrics.increment("index_embedding_tokens_total", estimatedEmbeddingTokens, {
|
|
5128
6270
|
tenant_id: artifact.tenant_id
|
|
5129
6271
|
});
|
|
@@ -5131,7 +6273,7 @@ export class RetrievalCore {
|
|
|
5131
6273
|
chunks.length === 0
|
|
5132
6274
|
? []
|
|
5133
6275
|
: await this.embeddingProvider.embed({
|
|
5134
|
-
texts:
|
|
6276
|
+
texts: embeddingTexts,
|
|
5135
6277
|
purpose: "index"
|
|
5136
6278
|
});
|
|
5137
6279
|
if (embeddings.length !== chunks.length) {
|
|
@@ -5449,6 +6591,12 @@ export class RetrievalCore {
|
|
|
5449
6591
|
language: chunkLanguage,
|
|
5450
6592
|
reason: chunkBuild.fallback_reason ?? "none"
|
|
5451
6593
|
});
|
|
6594
|
+
if (chunkBuild.recursive_semantic_chunking_used) {
|
|
6595
|
+
this.observability.metrics.increment("index_recursive_semantic_chunking_used_total", 1, {
|
|
6596
|
+
tenant_id: artifact.tenant_id,
|
|
6597
|
+
language: chunkLanguage
|
|
6598
|
+
});
|
|
6599
|
+
}
|
|
5452
6600
|
if (chunkBuild.fallback_reason) {
|
|
5453
6601
|
this.observability.metrics.increment("index_chunking_fallback_total", 1, {
|
|
5454
6602
|
tenant_id: artifact.tenant_id,
|
|
@@ -5480,7 +6628,8 @@ export class RetrievalCore {
|
|
|
5480
6628
|
reason: chunkBuild.fallback_reason
|
|
5481
6629
|
});
|
|
5482
6630
|
}
|
|
5483
|
-
const
|
|
6631
|
+
const embeddingTexts = buildChunkEmbeddingTexts(chunks, this.chunkingConfig, this.embeddingDescriptor.provider);
|
|
6632
|
+
const estimatedEmbeddingTokens = embeddingTexts.reduce((sum, text) => sum + tokenize(text).length, 0);
|
|
5484
6633
|
this.observability.metrics.increment("index_embedding_tokens_total", estimatedEmbeddingTokens, {
|
|
5485
6634
|
tenant_id: artifact.tenant_id
|
|
5486
6635
|
});
|
|
@@ -5488,7 +6637,7 @@ export class RetrievalCore {
|
|
|
5488
6637
|
chunks.length === 0
|
|
5489
6638
|
? []
|
|
5490
6639
|
: await this.embeddingProvider.embed({
|
|
5491
|
-
texts:
|
|
6640
|
+
texts: embeddingTexts,
|
|
5492
6641
|
purpose: "index"
|
|
5493
6642
|
});
|
|
5494
6643
|
if (embeddings.length !== chunks.length) {
|
|
@@ -5823,7 +6972,27 @@ export class RetrievalCore {
|
|
|
5823
6972
|
query,
|
|
5824
6973
|
top_k: topK,
|
|
5825
6974
|
filters: input.request.filters,
|
|
5826
|
-
retrieval_variant: this.rerankerCacheVariant
|
|
6975
|
+
retrieval_variant: `${this.rerankerCacheVariant}|context_pack:${
|
|
6976
|
+
this.contextPackingConfig.enabled ? "on" : "off"
|
|
6977
|
+
}|context_pack_spans:${this.contextPackingConfig.max_spans_per_result}|context_pack_gap:${this.contextPackingConfig.max_gap_lines}|snippet_integrity:${
|
|
6978
|
+
this.snippetIntegrityConfig.enabled ? "on" : "off"
|
|
6979
|
+
}|snippet_integrity_gap:${this.snippetIntegrityConfig.max_contiguous_gap_lines}|snippet_integrity_langs:${this.snippetIntegrityConfig.target_languages.join(
|
|
6980
|
+
","
|
|
6981
|
+
)}|snippet_repair:${this.snippetIntegrityConfig.repair_enabled ? "on" : "off"}|snippet_repair_env:${
|
|
6982
|
+
this.snippetIntegrityConfig.repair_max_envelope_lines
|
|
6983
|
+
}|snippet_repair_chars:${this.snippetIntegrityConfig.repair_max_snippet_chars}|chunk_recursive:${
|
|
6984
|
+
this.chunkingConfig.recursive_semantic_chunking_enabled ? "on" : "off"
|
|
6985
|
+
}|chunk_semantic_gap:${this.chunkingConfig.semantic_merge_gap_lines}|chunk_semantic_span:${
|
|
6986
|
+
this.chunkingConfig.semantic_merge_max_span_lines
|
|
6987
|
+
}|chunk_comment_absorb:${this.chunkingConfig.comment_forward_absorb_enabled ? "on" : "off"}|chunk_embed_prefix:${
|
|
6988
|
+
this.chunkingConfig.embedding_context_prefix_enabled ? "on" : "off"
|
|
6989
|
+
}|smart_cutoff:${this.scoringConfig.rerank.smart_cutoff_enabled ? "on" : "off"}|smart_cutoff_min_k:${
|
|
6990
|
+
this.scoringConfig.rerank.smart_cutoff_min_k
|
|
6991
|
+
}|smart_cutoff_max_k:${this.scoringConfig.rerank.smart_cutoff_max_k}|smart_cutoff_min_score:${
|
|
6992
|
+
this.scoringConfig.rerank.smart_cutoff_min_score
|
|
6993
|
+
}|smart_cutoff_top_ratio:${this.scoringConfig.rerank.smart_cutoff_top_ratio}|smart_cutoff_delta_abs:${
|
|
6994
|
+
this.scoringConfig.rerank.smart_cutoff_delta_abs
|
|
6995
|
+
}`
|
|
5827
6996
|
});
|
|
5828
6997
|
|
|
5829
6998
|
const cached = await this.cache.get(cacheKey);
|
|
@@ -5988,17 +7157,47 @@ export class RetrievalCore {
|
|
|
5988
7157
|
})
|
|
5989
7158
|
);
|
|
5990
7159
|
|
|
7160
|
+
const consolidatedCandidates = await this.observability.tracing.withSpan(
|
|
7161
|
+
"retrieval.overlap_merge",
|
|
7162
|
+
{ trace_id: input.trace_id },
|
|
7163
|
+
async () => mergeOverlappingCandidates(rerankedCandidates, this.scoringConfig.rerank)
|
|
7164
|
+
);
|
|
7165
|
+
this.observability.metrics.observe("retrieval_candidates_post_overlap_merge_count", consolidatedCandidates.length, {
|
|
7166
|
+
retrieval_profile_id: this.scoringProfileId
|
|
7167
|
+
});
|
|
7168
|
+
const mergedCandidateCount = Math.max(0, rerankedCandidates.length - consolidatedCandidates.length);
|
|
7169
|
+
if (mergedCandidateCount > 0) {
|
|
7170
|
+
this.observability.metrics.increment("retrieval_overlap_candidates_merged_total", mergedCandidateCount, {
|
|
7171
|
+
retrieval_profile_id: this.scoringProfileId
|
|
7172
|
+
});
|
|
7173
|
+
}
|
|
7174
|
+
const cutoffCandidates = await this.observability.tracing.withSpan(
|
|
7175
|
+
"retrieval.smart_cutoff",
|
|
7176
|
+
{ trace_id: input.trace_id },
|
|
7177
|
+
async () => applySmartCutoffCandidates(consolidatedCandidates, this.scoringConfig.rerank)
|
|
7178
|
+
);
|
|
7179
|
+
if (this.scoringConfig.rerank.smart_cutoff_enabled) {
|
|
7180
|
+
this.observability.metrics.increment("retrieval_smart_cutoff_applied_total", 1, {
|
|
7181
|
+
retrieval_profile_id: this.scoringProfileId
|
|
7182
|
+
});
|
|
7183
|
+
const droppedCount = Math.max(0, consolidatedCandidates.length - cutoffCandidates.length);
|
|
7184
|
+
this.observability.metrics.increment("retrieval_smart_cutoff_drop_count", droppedCount, {
|
|
7185
|
+
retrieval_profile_id: this.scoringProfileId
|
|
7186
|
+
});
|
|
7187
|
+
}
|
|
7188
|
+
|
|
5991
7189
|
const deduped = await this.observability.tracing.withSpan("retrieval.rerank", { trace_id: input.trace_id }, async () => {
|
|
5992
7190
|
const output: SearchContextOutput["results"] = [];
|
|
5993
7191
|
const seen = new Set<string>();
|
|
5994
7192
|
const pathCounts = new Map<string, number>();
|
|
7193
|
+
const selectedRangesByPath = new Map<string, Array<{ start_line: number; end_line: number }>>();
|
|
5995
7194
|
const directoryCounts = new Map<string, number>();
|
|
5996
7195
|
const extensionCounts = new Map<string, number>();
|
|
5997
7196
|
const maxChunksPerPath = hasFileLookupIntent(queryTokens)
|
|
5998
7197
|
? this.scoringConfig.rerank.max_chunks_per_path_file_lookup
|
|
5999
7198
|
: this.scoringConfig.rerank.max_chunks_per_path_default;
|
|
6000
7199
|
|
|
6001
|
-
const available = [...
|
|
7200
|
+
const available = [...cutoffCandidates];
|
|
6002
7201
|
while (output.length < topK && available.length > 0) {
|
|
6003
7202
|
let bestIndex = -1;
|
|
6004
7203
|
let bestAdjustedScore = Number.NEGATIVE_INFINITY;
|
|
@@ -6018,6 +7217,12 @@ export class RetrievalCore {
|
|
|
6018
7217
|
if (pathCount >= maxChunksPerPath) {
|
|
6019
7218
|
continue;
|
|
6020
7219
|
}
|
|
7220
|
+
if (this.scoringConfig.rerank.merge_overlapping_chunks_enabled && pathCount > 0) {
|
|
7221
|
+
const selectedRanges = selectedRangesByPath.get(candidate.path) ?? [];
|
|
7222
|
+
if (isHeavilyOverlappingLineRange(candidate, selectedRanges)) {
|
|
7223
|
+
continue;
|
|
7224
|
+
}
|
|
7225
|
+
}
|
|
6021
7226
|
|
|
6022
7227
|
const directoryKey = parentDirectory(candidate.path).toLowerCase();
|
|
6023
7228
|
const extensionKey = fileExtension(candidate.path);
|
|
@@ -6057,6 +7262,12 @@ export class RetrievalCore {
|
|
|
6057
7262
|
const selectedKey = `${selected.path}:${selected.start_line}:${selected.end_line}`;
|
|
6058
7263
|
seen.add(selectedKey);
|
|
6059
7264
|
pathCounts.set(selected.path, (pathCounts.get(selected.path) ?? 0) + 1);
|
|
7265
|
+
const selectedRanges = selectedRangesByPath.get(selected.path);
|
|
7266
|
+
if (selectedRanges) {
|
|
7267
|
+
selectedRanges.push({ start_line: selected.start_line, end_line: selected.end_line });
|
|
7268
|
+
} else {
|
|
7269
|
+
selectedRangesByPath.set(selected.path, [{ start_line: selected.start_line, end_line: selected.end_line }]);
|
|
7270
|
+
}
|
|
6060
7271
|
const selectedDirectory = parentDirectory(selected.path).toLowerCase();
|
|
6061
7272
|
const selectedExtension = fileExtension(selected.path);
|
|
6062
7273
|
directoryCounts.set(selectedDirectory, (directoryCounts.get(selectedDirectory) ?? 0) + 1);
|
|
@@ -6067,8 +7278,8 @@ export class RetrievalCore {
|
|
|
6067
7278
|
});
|
|
6068
7279
|
|
|
6069
7280
|
const candidateRankByKey = new Map<string, number>();
|
|
6070
|
-
for (let index = 0; index <
|
|
6071
|
-
const candidate =
|
|
7281
|
+
for (let index = 0; index < cutoffCandidates.length; index += 1) {
|
|
7282
|
+
const candidate = cutoffCandidates[index];
|
|
6072
7283
|
if (!candidate) {
|
|
6073
7284
|
continue;
|
|
6074
7285
|
}
|
|
@@ -6100,10 +7311,22 @@ export class RetrievalCore {
|
|
|
6100
7311
|
this.observability.metrics.observe("retrieval_literal_matches_topk", literalMatchesInTopK, {
|
|
6101
7312
|
retrieval_profile_id: this.scoringProfileId
|
|
6102
7313
|
});
|
|
7314
|
+
const packedResults = packSearchResultsWithContext({
|
|
7315
|
+
selected: deduped,
|
|
7316
|
+
sourceCandidates: cutoffCandidates,
|
|
7317
|
+
config: this.contextPackingConfig
|
|
7318
|
+
});
|
|
7319
|
+
const assembledResults = annotateSearchResultsWithSnippetIntegrity({
|
|
7320
|
+
selected: packedResults,
|
|
7321
|
+
sourceCandidates: cutoffCandidates,
|
|
7322
|
+
config: this.snippetIntegrityConfig,
|
|
7323
|
+
observability: this.observability,
|
|
7324
|
+
retrievalProfileId: this.scoringProfileId
|
|
7325
|
+
});
|
|
6103
7326
|
|
|
6104
7327
|
const output: SearchContextOutput = {
|
|
6105
7328
|
trace_id: input.trace_id,
|
|
6106
|
-
results:
|
|
7329
|
+
results: assembledResults,
|
|
6107
7330
|
search_metadata: {
|
|
6108
7331
|
latency_ms: Date.now() - searchStartedAt,
|
|
6109
7332
|
retrieval_mode: "hybrid" satisfies RetrievalMode,
|
|
@@ -6111,7 +7334,7 @@ export class RetrievalCore {
|
|
|
6111
7334
|
}
|
|
6112
7335
|
};
|
|
6113
7336
|
|
|
6114
|
-
this.observability.metrics.observe("retrieval_topk_hit_proxy",
|
|
7337
|
+
this.observability.metrics.observe("retrieval_topk_hit_proxy", assembledResults.length > 0 ? 1 : 0, {
|
|
6115
7338
|
retrieval_profile_id: this.scoringProfileId
|
|
6116
7339
|
});
|
|
6117
7340
|
this.observability.logger.info("search_context completed", {
|
|
@@ -6138,6 +7361,7 @@ export class RetrievalCore {
|
|
|
6138
7361
|
|
|
6139
7362
|
private buildEnhancerContextSnippets(results: SearchContextOutput["results"]): EnhancerContextSnippet[] {
|
|
6140
7363
|
const maxSnippets = this.enhancerGenerationConfig.max_context_snippets;
|
|
7364
|
+
const snippetCharLimit = this.contextPackingConfig.enabled ? this.contextPackingConfig.enhancer_snippet_char_limit : 1_600;
|
|
6141
7365
|
const snippets: EnhancerContextSnippet[] = [];
|
|
6142
7366
|
for (const result of results.slice(0, maxSnippets)) {
|
|
6143
7367
|
snippets.push({
|
|
@@ -6145,7 +7369,7 @@ export class RetrievalCore {
|
|
|
6145
7369
|
start_line: result.start_line,
|
|
6146
7370
|
end_line: result.end_line,
|
|
6147
7371
|
reason: result.reason,
|
|
6148
|
-
snippet: result.snippet.slice(0,
|
|
7372
|
+
snippet: result.snippet.slice(0, snippetCharLimit),
|
|
6149
7373
|
score: result.score
|
|
6150
7374
|
});
|
|
6151
7375
|
}
|
|
@@ -6320,7 +7544,10 @@ export class RetrievalCore {
|
|
|
6320
7544
|
top_k: MAX_TOP_K
|
|
6321
7545
|
}
|
|
6322
7546
|
});
|
|
6323
|
-
const budgetedResults = trimToContextBudget(
|
|
7547
|
+
const budgetedResults = trimToContextBudget(
|
|
7548
|
+
retrieval.results,
|
|
7549
|
+
this.contextPackingConfig.enabled ? "lightweight" : "ranking"
|
|
7550
|
+
);
|
|
6324
7551
|
const dedupedByPath = dedupeEnhancerCandidatesByPath(budgetedResults);
|
|
6325
7552
|
const collapsedByDirectory = collapseEnhancerCandidatesByDirectory(
|
|
6326
7553
|
dedupedByPath,
|