@rce-mcp/retrieval-core 0.1.2 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENTS.md +7 -0
- package/dist/.tsbuildinfo +1 -1
- package/dist/chunking.d.ts +13 -0
- package/dist/chunking.js +488 -77
- package/dist/index.d.ts +61 -0
- package/dist/index.js +993 -20
- package/dist/remote-sync.js +2 -1
- package/package.json +2 -2
- package/scripts/poc-parser-availability-benchmark.ts +2 -0
- package/src/chunking.ts +573 -80
- package/src/index.ts +1247 -20
- package/src/remote-sync.ts +3 -1
- package/test/benchmark.thresholds.test.ts +8 -0
- package/test/chunking.config.test.ts +47 -1
- package/test/chunking.language-aware.test.ts +227 -0
- package/test/embedding-context-prefix.test.ts +101 -0
- package/test/enhance-confidence.test.ts +4 -4
- package/test/mcp-search-quality.regression.test.ts +691 -4
- package/test/remote-sync.integration.test.ts +5 -1
- package/test/smart-cutoff.config.test.ts +86 -0
- package/test/snippet-integrity.config.test.ts +59 -0
package/dist/index.js
CHANGED
|
@@ -78,7 +78,16 @@ export const BASELINE_RETRIEVAL_SCORING_CONFIG = {
|
|
|
78
78
|
max_chunks_per_path_default: 2,
|
|
79
79
|
max_chunks_per_path_file_lookup: 1,
|
|
80
80
|
same_directory_penalty: 0,
|
|
81
|
-
same_extension_penalty: 0
|
|
81
|
+
same_extension_penalty: 0,
|
|
82
|
+
merge_overlapping_chunks_enabled: true,
|
|
83
|
+
merge_gap_lines: 6,
|
|
84
|
+
merge_max_span_lines: 220,
|
|
85
|
+
smart_cutoff_enabled: false,
|
|
86
|
+
smart_cutoff_min_k: 2,
|
|
87
|
+
smart_cutoff_max_k: 8,
|
|
88
|
+
smart_cutoff_min_score: 0.25,
|
|
89
|
+
smart_cutoff_top_ratio: 0.5,
|
|
90
|
+
smart_cutoff_delta_abs: 0.25
|
|
82
91
|
}
|
|
83
92
|
};
|
|
84
93
|
export const CONSERVATIVE_RETRIEVAL_SCORING_CONFIG = {
|
|
@@ -129,7 +138,16 @@ export const CONSERVATIVE_RETRIEVAL_SCORING_CONFIG = {
|
|
|
129
138
|
max_chunks_per_path_default: 2,
|
|
130
139
|
max_chunks_per_path_file_lookup: 1,
|
|
131
140
|
same_directory_penalty: 0,
|
|
132
|
-
same_extension_penalty: 0
|
|
141
|
+
same_extension_penalty: 0,
|
|
142
|
+
merge_overlapping_chunks_enabled: true,
|
|
143
|
+
merge_gap_lines: 6,
|
|
144
|
+
merge_max_span_lines: 220,
|
|
145
|
+
smart_cutoff_enabled: false,
|
|
146
|
+
smart_cutoff_min_k: 2,
|
|
147
|
+
smart_cutoff_max_k: 8,
|
|
148
|
+
smart_cutoff_min_score: 0.25,
|
|
149
|
+
smart_cutoff_top_ratio: 0.5,
|
|
150
|
+
smart_cutoff_delta_abs: 0.25
|
|
133
151
|
}
|
|
134
152
|
};
|
|
135
153
|
export const DEFAULT_RETRIEVAL_ENHANCER_CONFIG = {
|
|
@@ -148,8 +166,31 @@ export const DEFAULT_RETRIEVAL_CHUNKING_CONFIG = {
|
|
|
148
166
|
fallback_strategy: "sliding",
|
|
149
167
|
target_chunk_tokens: DEFAULT_TARGET_CHUNK_TOKENS,
|
|
150
168
|
chunk_overlap_tokens: DEFAULT_CHUNK_OVERLAP_TOKENS,
|
|
169
|
+
budget_tokenizer: "ranking",
|
|
170
|
+
boundary_strictness: "legacy",
|
|
151
171
|
parse_timeout_ms: 80,
|
|
152
|
-
enabled_languages: ["typescript", "javascript", "python", "go"]
|
|
172
|
+
enabled_languages: ["typescript", "javascript", "python", "go"],
|
|
173
|
+
recursive_semantic_chunking_enabled: false,
|
|
174
|
+
semantic_merge_gap_lines: 6,
|
|
175
|
+
semantic_merge_max_span_lines: 220,
|
|
176
|
+
comment_forward_absorb_enabled: true,
|
|
177
|
+
embedding_context_prefix_enabled: true
|
|
178
|
+
};
|
|
179
|
+
export const DEFAULT_RETRIEVAL_CONTEXT_PACKING_CONFIG = {
|
|
180
|
+
enabled: false,
|
|
181
|
+
max_spans_per_result: 3,
|
|
182
|
+
max_gap_lines: 120,
|
|
183
|
+
max_snippet_chars: 3_200,
|
|
184
|
+
enhancer_snippet_char_limit: 2_200
|
|
185
|
+
};
|
|
186
|
+
export const DEFAULT_RETRIEVAL_SNIPPET_INTEGRITY_CONFIG = {
|
|
187
|
+
enabled: false,
|
|
188
|
+
target_languages: ["typescript", "tsx", "javascript", "jsx"],
|
|
189
|
+
max_contiguous_gap_lines: 6,
|
|
190
|
+
marker_template_version: "v1",
|
|
191
|
+
repair_enabled: false,
|
|
192
|
+
repair_max_envelope_lines: 260,
|
|
193
|
+
repair_max_snippet_chars: 3_600
|
|
153
194
|
};
|
|
154
195
|
const BUILTIN_RETRIEVAL_SCORING_PROFILES = {
|
|
155
196
|
baseline: BASELINE_RETRIEVAL_SCORING_CONFIG,
|
|
@@ -202,6 +243,36 @@ function validateScoringConfig(config) {
|
|
|
202
243
|
if (rerank.same_extension_penalty < 0) {
|
|
203
244
|
throw new Error("invalid retrieval scoring config: rerank.same_extension_penalty must be >= 0");
|
|
204
245
|
}
|
|
246
|
+
if (typeof rerank.merge_overlapping_chunks_enabled !== "boolean") {
|
|
247
|
+
throw new Error("invalid retrieval scoring config: rerank.merge_overlapping_chunks_enabled must be boolean");
|
|
248
|
+
}
|
|
249
|
+
if (!Number.isInteger(rerank.merge_gap_lines) || rerank.merge_gap_lines < 0) {
|
|
250
|
+
throw new Error("invalid retrieval scoring config: rerank.merge_gap_lines must be an integer >= 0");
|
|
251
|
+
}
|
|
252
|
+
if (!Number.isInteger(rerank.merge_max_span_lines) || rerank.merge_max_span_lines <= 0) {
|
|
253
|
+
throw new Error("invalid retrieval scoring config: rerank.merge_max_span_lines must be a positive integer");
|
|
254
|
+
}
|
|
255
|
+
if (typeof rerank.smart_cutoff_enabled !== "boolean") {
|
|
256
|
+
throw new Error("invalid retrieval scoring config: rerank.smart_cutoff_enabled must be boolean");
|
|
257
|
+
}
|
|
258
|
+
if (!Number.isInteger(rerank.smart_cutoff_min_k) || rerank.smart_cutoff_min_k <= 0) {
|
|
259
|
+
throw new Error("invalid retrieval scoring config: rerank.smart_cutoff_min_k must be a positive integer");
|
|
260
|
+
}
|
|
261
|
+
if (!Number.isInteger(rerank.smart_cutoff_max_k) || rerank.smart_cutoff_max_k <= 0) {
|
|
262
|
+
throw new Error("invalid retrieval scoring config: rerank.smart_cutoff_max_k must be a positive integer");
|
|
263
|
+
}
|
|
264
|
+
if (rerank.smart_cutoff_max_k < rerank.smart_cutoff_min_k) {
|
|
265
|
+
throw new Error("invalid retrieval scoring config: rerank.smart_cutoff_max_k must be >= smart_cutoff_min_k");
|
|
266
|
+
}
|
|
267
|
+
assertFiniteNumber(rerank.smart_cutoff_min_score, "rerank.smart_cutoff_min_score");
|
|
268
|
+
assertFiniteNumber(rerank.smart_cutoff_top_ratio, "rerank.smart_cutoff_top_ratio");
|
|
269
|
+
assertFiniteNumber(rerank.smart_cutoff_delta_abs, "rerank.smart_cutoff_delta_abs");
|
|
270
|
+
if (rerank.smart_cutoff_top_ratio <= 0 || rerank.smart_cutoff_top_ratio > 1) {
|
|
271
|
+
throw new Error("invalid retrieval scoring config: rerank.smart_cutoff_top_ratio must be in (0, 1]");
|
|
272
|
+
}
|
|
273
|
+
if (rerank.smart_cutoff_delta_abs < 0) {
|
|
274
|
+
throw new Error("invalid retrieval scoring config: rerank.smart_cutoff_delta_abs must be >= 0");
|
|
275
|
+
}
|
|
205
276
|
}
|
|
206
277
|
export function resolveRetrievalScoringProfile(profile_id) {
|
|
207
278
|
const normalized = (profile_id ?? "baseline").trim().toLowerCase();
|
|
@@ -307,6 +378,12 @@ function validateChunkingConfig(config) {
|
|
|
307
378
|
if (!Number.isInteger(config.parse_timeout_ms) || config.parse_timeout_ms <= 0) {
|
|
308
379
|
throw new Error("invalid retrieval chunking config: parse_timeout_ms must be a positive integer");
|
|
309
380
|
}
|
|
381
|
+
if (config.budget_tokenizer !== "ranking" && config.budget_tokenizer !== "lightweight") {
|
|
382
|
+
throw new Error("invalid retrieval chunking config: budget_tokenizer must be ranking|lightweight");
|
|
383
|
+
}
|
|
384
|
+
if (config.boundary_strictness !== "legacy" && config.boundary_strictness !== "semantic_js_ts") {
|
|
385
|
+
throw new Error("invalid retrieval chunking config: boundary_strictness must be legacy|semantic_js_ts");
|
|
386
|
+
}
|
|
310
387
|
if (!Array.isArray(config.enabled_languages) || config.enabled_languages.length === 0) {
|
|
311
388
|
throw new Error("invalid retrieval chunking config: enabled_languages must include at least one language");
|
|
312
389
|
}
|
|
@@ -315,6 +392,21 @@ function validateChunkingConfig(config) {
|
|
|
315
392
|
throw new Error("invalid retrieval chunking config: enabled_languages must contain non-empty strings");
|
|
316
393
|
}
|
|
317
394
|
}
|
|
395
|
+
if (typeof config.recursive_semantic_chunking_enabled !== "boolean") {
|
|
396
|
+
throw new Error("invalid retrieval chunking config: recursive_semantic_chunking_enabled must be boolean");
|
|
397
|
+
}
|
|
398
|
+
if (!Number.isInteger(config.semantic_merge_gap_lines) || config.semantic_merge_gap_lines < 0) {
|
|
399
|
+
throw new Error("invalid retrieval chunking config: semantic_merge_gap_lines must be a non-negative integer");
|
|
400
|
+
}
|
|
401
|
+
if (!Number.isInteger(config.semantic_merge_max_span_lines) || config.semantic_merge_max_span_lines <= 0) {
|
|
402
|
+
throw new Error("invalid retrieval chunking config: semantic_merge_max_span_lines must be a positive integer");
|
|
403
|
+
}
|
|
404
|
+
if (typeof config.comment_forward_absorb_enabled !== "boolean") {
|
|
405
|
+
throw new Error("invalid retrieval chunking config: comment_forward_absorb_enabled must be boolean");
|
|
406
|
+
}
|
|
407
|
+
if (typeof config.embedding_context_prefix_enabled !== "boolean") {
|
|
408
|
+
throw new Error("invalid retrieval chunking config: embedding_context_prefix_enabled must be boolean");
|
|
409
|
+
}
|
|
318
410
|
}
|
|
319
411
|
export function mergeRetrievalChunkingConfig(base, overrides) {
|
|
320
412
|
const next = {
|
|
@@ -325,6 +417,98 @@ export function mergeRetrievalChunkingConfig(base, overrides) {
|
|
|
325
417
|
validateChunkingConfig(next);
|
|
326
418
|
return next;
|
|
327
419
|
}
|
|
420
|
+
function validateContextPackingConfig(config) {
|
|
421
|
+
if (typeof config.enabled !== "boolean") {
|
|
422
|
+
throw new Error("invalid retrieval context packing config: enabled must be boolean");
|
|
423
|
+
}
|
|
424
|
+
if (!Number.isInteger(config.max_spans_per_result) || config.max_spans_per_result <= 0) {
|
|
425
|
+
throw new Error("invalid retrieval context packing config: max_spans_per_result must be a positive integer");
|
|
426
|
+
}
|
|
427
|
+
if (!Number.isInteger(config.max_gap_lines) || config.max_gap_lines < 0) {
|
|
428
|
+
throw new Error("invalid retrieval context packing config: max_gap_lines must be a non-negative integer");
|
|
429
|
+
}
|
|
430
|
+
if (!Number.isInteger(config.max_snippet_chars) || config.max_snippet_chars <= 0) {
|
|
431
|
+
throw new Error("invalid retrieval context packing config: max_snippet_chars must be a positive integer");
|
|
432
|
+
}
|
|
433
|
+
if (!Number.isInteger(config.enhancer_snippet_char_limit) || config.enhancer_snippet_char_limit <= 0) {
|
|
434
|
+
throw new Error("invalid retrieval context packing config: enhancer_snippet_char_limit must be a positive integer");
|
|
435
|
+
}
|
|
436
|
+
}
|
|
437
|
+
export function mergeRetrievalContextPackingConfig(base, overrides) {
|
|
438
|
+
const next = {
|
|
439
|
+
...base,
|
|
440
|
+
...(overrides ?? {})
|
|
441
|
+
};
|
|
442
|
+
validateContextPackingConfig(next);
|
|
443
|
+
return next;
|
|
444
|
+
}
|
|
445
|
+
function normalizeSnippetIntegrityLanguage(value) {
|
|
446
|
+
const normalized = value.trim().toLowerCase();
|
|
447
|
+
if (normalized === "typescript" || normalized === "ts" || normalized === "mts" || normalized === "cts") {
|
|
448
|
+
return "typescript";
|
|
449
|
+
}
|
|
450
|
+
if (normalized === "tsx") {
|
|
451
|
+
return "tsx";
|
|
452
|
+
}
|
|
453
|
+
if (normalized === "javascript" || normalized === "js" || normalized === "mjs" || normalized === "cjs") {
|
|
454
|
+
return "javascript";
|
|
455
|
+
}
|
|
456
|
+
if (normalized === "jsx") {
|
|
457
|
+
return "jsx";
|
|
458
|
+
}
|
|
459
|
+
return undefined;
|
|
460
|
+
}
|
|
461
|
+
function normalizeSnippetIntegrityLanguageList(value) {
|
|
462
|
+
const deduped = new Set();
|
|
463
|
+
for (const language of value) {
|
|
464
|
+
const raw = language.trim().toLowerCase();
|
|
465
|
+
if (raw.length === 0) {
|
|
466
|
+
continue;
|
|
467
|
+
}
|
|
468
|
+
deduped.add(normalizeSnippetIntegrityLanguage(raw) ?? raw);
|
|
469
|
+
}
|
|
470
|
+
return [...deduped];
|
|
471
|
+
}
|
|
472
|
+
function validateSnippetIntegrityConfig(config) {
|
|
473
|
+
if (typeof config.enabled !== "boolean") {
|
|
474
|
+
throw new Error("invalid retrieval snippet integrity config: enabled must be boolean");
|
|
475
|
+
}
|
|
476
|
+
if (!Array.isArray(config.target_languages) || config.target_languages.length === 0) {
|
|
477
|
+
throw new Error("invalid retrieval snippet integrity config: target_languages must include at least one language");
|
|
478
|
+
}
|
|
479
|
+
for (const language of config.target_languages) {
|
|
480
|
+
if (typeof language !== "string" || language.trim().length === 0) {
|
|
481
|
+
throw new Error("invalid retrieval snippet integrity config: target_languages must contain non-empty strings");
|
|
482
|
+
}
|
|
483
|
+
if (!normalizeSnippetIntegrityLanguage(language)) {
|
|
484
|
+
throw new Error("invalid retrieval snippet integrity config: unsupported target language");
|
|
485
|
+
}
|
|
486
|
+
}
|
|
487
|
+
if (!Number.isInteger(config.max_contiguous_gap_lines) || config.max_contiguous_gap_lines < 0) {
|
|
488
|
+
throw new Error("invalid retrieval snippet integrity config: max_contiguous_gap_lines must be a non-negative integer");
|
|
489
|
+
}
|
|
490
|
+
if (config.marker_template_version !== "v1") {
|
|
491
|
+
throw new Error("invalid retrieval snippet integrity config: marker_template_version must be v1");
|
|
492
|
+
}
|
|
493
|
+
if (typeof config.repair_enabled !== "boolean") {
|
|
494
|
+
throw new Error("invalid retrieval snippet integrity config: repair_enabled must be boolean");
|
|
495
|
+
}
|
|
496
|
+
if (!Number.isInteger(config.repair_max_envelope_lines) || config.repair_max_envelope_lines <= 0) {
|
|
497
|
+
throw new Error("invalid retrieval snippet integrity config: repair_max_envelope_lines must be a positive integer");
|
|
498
|
+
}
|
|
499
|
+
if (!Number.isInteger(config.repair_max_snippet_chars) || config.repair_max_snippet_chars <= 0) {
|
|
500
|
+
throw new Error("invalid retrieval snippet integrity config: repair_max_snippet_chars must be a positive integer");
|
|
501
|
+
}
|
|
502
|
+
}
|
|
503
|
+
export function mergeRetrievalSnippetIntegrityConfig(base, overrides) {
|
|
504
|
+
const next = {
|
|
505
|
+
...base,
|
|
506
|
+
...(overrides ?? {}),
|
|
507
|
+
target_languages: normalizeSnippetIntegrityLanguageList(overrides?.target_languages ?? base.target_languages)
|
|
508
|
+
};
|
|
509
|
+
validateSnippetIntegrityConfig(next);
|
|
510
|
+
return next;
|
|
511
|
+
}
|
|
328
512
|
function stableSerialize(value) {
|
|
329
513
|
if (Array.isArray(value)) {
|
|
330
514
|
return `[${value.map((entry) => stableSerialize(entry)).join(",")}]`;
|
|
@@ -422,9 +606,22 @@ function singularizeToken(token) {
|
|
|
422
606
|
}
|
|
423
607
|
return undefined;
|
|
424
608
|
}
|
|
609
|
+
function tokenizeLightweight(text) {
|
|
610
|
+
return text
|
|
611
|
+
.normalize("NFKC")
|
|
612
|
+
.split(/[^A-Za-z0-9_]+/)
|
|
613
|
+
.map((token) => token.trim().toLowerCase())
|
|
614
|
+
.filter(Boolean);
|
|
615
|
+
}
|
|
425
616
|
function tokenize(text) {
|
|
426
617
|
return tokenizeForRanking(text);
|
|
427
618
|
}
|
|
619
|
+
function chunkBudgetTokenize(text, mode) {
|
|
620
|
+
if (mode === "lightweight") {
|
|
621
|
+
return tokenizeLightweight(text);
|
|
622
|
+
}
|
|
623
|
+
return tokenize(text);
|
|
624
|
+
}
|
|
428
625
|
function lexicalScore(query, haystack) {
|
|
429
626
|
const q = new Set(tokenize(query));
|
|
430
627
|
if (q.size === 0) {
|
|
@@ -848,11 +1045,17 @@ function buildChunks(file, chunkingConfig) {
|
|
|
848
1045
|
fallback_strategy: chunkingConfig.fallback_strategy,
|
|
849
1046
|
target_chunk_tokens: chunkingConfig.target_chunk_tokens,
|
|
850
1047
|
chunk_overlap_tokens: chunkingConfig.chunk_overlap_tokens,
|
|
1048
|
+
budget_tokenizer: chunkingConfig.budget_tokenizer,
|
|
1049
|
+
boundary_strictness: chunkingConfig.boundary_strictness,
|
|
851
1050
|
max_chunks_per_file: MAX_CHUNKS_PER_FILE,
|
|
852
1051
|
parse_timeout_ms: chunkingConfig.parse_timeout_ms,
|
|
853
|
-
enabled_languages: chunkingConfig.enabled_languages
|
|
1052
|
+
enabled_languages: chunkingConfig.enabled_languages,
|
|
1053
|
+
recursive_semantic_chunking_enabled: chunkingConfig.recursive_semantic_chunking_enabled,
|
|
1054
|
+
semantic_merge_gap_lines: chunkingConfig.semantic_merge_gap_lines,
|
|
1055
|
+
semantic_merge_max_span_lines: chunkingConfig.semantic_merge_max_span_lines,
|
|
1056
|
+
comment_forward_absorb_enabled: chunkingConfig.comment_forward_absorb_enabled
|
|
854
1057
|
},
|
|
855
|
-
tokenize
|
|
1058
|
+
tokenize: (text) => chunkBudgetTokenize(text, chunkingConfig.budget_tokenizer)
|
|
856
1059
|
});
|
|
857
1060
|
return {
|
|
858
1061
|
chunks: chunkingResult.chunks.map((chunk) => ({
|
|
@@ -870,9 +1073,27 @@ function buildChunks(file, chunkingConfig) {
|
|
|
870
1073
|
parse_latency_ms: chunkingResult.parse_latency_ms,
|
|
871
1074
|
language_aware_attempt_latency_ms: chunkingResult.language_aware_attempt_latency_ms,
|
|
872
1075
|
fallback_path_latency_ms: chunkingResult.fallback_path_latency_ms,
|
|
873
|
-
language: chunkingResult.language
|
|
1076
|
+
language: chunkingResult.language,
|
|
1077
|
+
recursive_semantic_chunking_used: chunkingResult.recursive_semantic_chunking_used
|
|
874
1078
|
};
|
|
875
1079
|
}
|
|
1080
|
+
function buildChunkEmbeddingText(chunk, config, embeddingProviderId) {
|
|
1081
|
+
const isDeterministicProvider = embeddingProviderId.trim().toLowerCase() === "deterministic";
|
|
1082
|
+
if (!config.embedding_context_prefix_enabled || isDeterministicProvider) {
|
|
1083
|
+
return chunk.snippet;
|
|
1084
|
+
}
|
|
1085
|
+
const normalizedPath = normalizePath(chunk.path);
|
|
1086
|
+
const pathParts = normalizedPath.split("/").filter(Boolean);
|
|
1087
|
+
const contextPath = pathParts.length > 2 ? pathParts.slice(-2).join("/") : normalizedPath;
|
|
1088
|
+
const symbol = detectSnippetSymbolName(chunk.snippet);
|
|
1089
|
+
const linesLabel = `${chunk.start_line}-${chunk.end_line}`;
|
|
1090
|
+
const symbolLabel = symbol ? ` > ${symbol}` : "";
|
|
1091
|
+
const prefix = `${contextPath}:${linesLabel}${symbolLabel}`;
|
|
1092
|
+
return `${prefix}\n${chunk.snippet}`;
|
|
1093
|
+
}
|
|
1094
|
+
function buildChunkEmbeddingTexts(chunks, config, embeddingProviderId) {
|
|
1095
|
+
return chunks.map((chunk) => buildChunkEmbeddingText(chunk, config, embeddingProviderId));
|
|
1096
|
+
}
|
|
876
1097
|
function pseudoEmbedding(input, dimensions = 24) {
|
|
877
1098
|
const safeDimensions = Math.max(1, dimensions);
|
|
878
1099
|
let source = sha256(input);
|
|
@@ -2978,6 +3199,693 @@ function compareSearchResults(a, b) {
|
|
|
2978
3199
|
}
|
|
2979
3200
|
return a.end_line - b.end_line;
|
|
2980
3201
|
}
|
|
3202
|
+
function compareSearchResultsByLineRange(a, b) {
|
|
3203
|
+
if (a.start_line !== b.start_line) {
|
|
3204
|
+
return a.start_line - b.start_line;
|
|
3205
|
+
}
|
|
3206
|
+
if (a.end_line !== b.end_line) {
|
|
3207
|
+
return a.end_line - b.end_line;
|
|
3208
|
+
}
|
|
3209
|
+
return compareSearchResults(a, b);
|
|
3210
|
+
}
|
|
3211
|
+
function mergeSnippetCluster(cluster, mergedStartLine, mergedEndLine) {
|
|
3212
|
+
const byRelevance = [...cluster].sort(compareSearchResults);
|
|
3213
|
+
const primary = byRelevance[0];
|
|
3214
|
+
if (!primary) {
|
|
3215
|
+
return "";
|
|
3216
|
+
}
|
|
3217
|
+
const lineMap = new Map();
|
|
3218
|
+
for (let rank = 0; rank < byRelevance.length; rank += 1) {
|
|
3219
|
+
const candidate = byRelevance[rank];
|
|
3220
|
+
if (!candidate) {
|
|
3221
|
+
continue;
|
|
3222
|
+
}
|
|
3223
|
+
const lines = candidate.snippet.replace(/\r\n/g, "\n").split("\n");
|
|
3224
|
+
const expectedLineCount = Math.max(1, candidate.end_line - candidate.start_line + 1);
|
|
3225
|
+
const maxLines = Math.min(lines.length, expectedLineCount);
|
|
3226
|
+
for (let offset = 0; offset < maxLines; offset += 1) {
|
|
3227
|
+
const lineNumber = candidate.start_line + offset;
|
|
3228
|
+
if (lineNumber < mergedStartLine || lineNumber > mergedEndLine) {
|
|
3229
|
+
continue;
|
|
3230
|
+
}
|
|
3231
|
+
const text = lines[offset];
|
|
3232
|
+
if (typeof text !== "string") {
|
|
3233
|
+
continue;
|
|
3234
|
+
}
|
|
3235
|
+
const existing = lineMap.get(lineNumber);
|
|
3236
|
+
if (!existing || candidate.score > existing.score + 1e-9 || (Math.abs(candidate.score - existing.score) <= 1e-9 && rank < existing.rank)) {
|
|
3237
|
+
lineMap.set(lineNumber, { text, score: candidate.score, rank });
|
|
3238
|
+
}
|
|
3239
|
+
}
|
|
3240
|
+
}
|
|
3241
|
+
const mergedLines = [];
|
|
3242
|
+
let missingLines = 0;
|
|
3243
|
+
for (let line = mergedStartLine; line <= mergedEndLine; line += 1) {
|
|
3244
|
+
const entry = lineMap.get(line);
|
|
3245
|
+
if (!entry) {
|
|
3246
|
+
missingLines += 1;
|
|
3247
|
+
mergedLines.push("");
|
|
3248
|
+
continue;
|
|
3249
|
+
}
|
|
3250
|
+
mergedLines.push(entry.text);
|
|
3251
|
+
}
|
|
3252
|
+
const totalLines = Math.max(1, mergedEndLine - mergedStartLine + 1);
|
|
3253
|
+
const maxMissingLines = Math.max(2, Math.floor(totalLines * 0.2));
|
|
3254
|
+
if (missingLines > maxMissingLines) {
|
|
3255
|
+
return primary.snippet;
|
|
3256
|
+
}
|
|
3257
|
+
return mergedLines.join("\n");
|
|
3258
|
+
}
|
|
3259
|
+
function mergeCandidateCluster(cluster) {
|
|
3260
|
+
if (cluster.length === 0) {
|
|
3261
|
+
throw new Error("mergeCandidateCluster requires at least one candidate");
|
|
3262
|
+
}
|
|
3263
|
+
if (cluster.length === 1) {
|
|
3264
|
+
return cluster[0];
|
|
3265
|
+
}
|
|
3266
|
+
const byRelevance = [...cluster].sort(compareSearchResults);
|
|
3267
|
+
const primary = byRelevance[0];
|
|
3268
|
+
const mergedStartLine = Math.min(...cluster.map((candidate) => candidate.start_line));
|
|
3269
|
+
const mergedEndLine = Math.max(...cluster.map((candidate) => candidate.end_line));
|
|
3270
|
+
const stitchedSnippet = mergeSnippetCluster(cluster, mergedStartLine, mergedEndLine);
|
|
3271
|
+
return {
|
|
3272
|
+
...primary,
|
|
3273
|
+
start_line: mergedStartLine,
|
|
3274
|
+
end_line: mergedEndLine,
|
|
3275
|
+
snippet: stitchedSnippet.length > 0 ? stitchedSnippet : primary.snippet
|
|
3276
|
+
};
|
|
3277
|
+
}
|
|
3278
|
+
const HEAVY_LINE_RANGE_OVERLAP_RATIO = 0.2;
|
|
3279
|
+
function lineRangeLength(startLine, endLine) {
|
|
3280
|
+
return Math.max(1, endLine - startLine + 1);
|
|
3281
|
+
}
|
|
3282
|
+
function lineRangeOverlapLength(aStartLine, aEndLine, bStartLine, bEndLine) {
|
|
3283
|
+
const start = Math.max(aStartLine, bStartLine);
|
|
3284
|
+
const end = Math.min(aEndLine, bEndLine);
|
|
3285
|
+
if (end < start) {
|
|
3286
|
+
return 0;
|
|
3287
|
+
}
|
|
3288
|
+
return end - start + 1;
|
|
3289
|
+
}
|
|
3290
|
+
function isHeavilyOverlappingLineRange(candidate, selectedRanges) {
|
|
3291
|
+
for (const selected of selectedRanges) {
|
|
3292
|
+
const overlapLength = lineRangeOverlapLength(selected.start_line, selected.end_line, candidate.start_line, candidate.end_line);
|
|
3293
|
+
if (overlapLength <= 0) {
|
|
3294
|
+
continue;
|
|
3295
|
+
}
|
|
3296
|
+
const smallerRange = Math.min(lineRangeLength(selected.start_line, selected.end_line), lineRangeLength(candidate.start_line, candidate.end_line));
|
|
3297
|
+
const overlapRatio = overlapLength / Math.max(1, smallerRange);
|
|
3298
|
+
if (overlapRatio >= HEAVY_LINE_RANGE_OVERLAP_RATIO) {
|
|
3299
|
+
return true;
|
|
3300
|
+
}
|
|
3301
|
+
}
|
|
3302
|
+
return false;
|
|
3303
|
+
}
|
|
3304
|
+
function mergeLineSpans(spans) {
|
|
3305
|
+
if (spans.length <= 1) {
|
|
3306
|
+
return [...spans];
|
|
3307
|
+
}
|
|
3308
|
+
const ordered = [...spans]
|
|
3309
|
+
.filter((span) => span.end_line >= span.start_line)
|
|
3310
|
+
.sort((a, b) => a.start_line - b.start_line || a.end_line - b.end_line);
|
|
3311
|
+
const merged = [];
|
|
3312
|
+
for (const span of ordered) {
|
|
3313
|
+
const last = merged[merged.length - 1];
|
|
3314
|
+
if (!last || span.start_line > last.end_line + 1) {
|
|
3315
|
+
merged.push({ ...span });
|
|
3316
|
+
continue;
|
|
3317
|
+
}
|
|
3318
|
+
last.end_line = Math.max(last.end_line, span.end_line);
|
|
3319
|
+
}
|
|
3320
|
+
return merged;
|
|
3321
|
+
}
|
|
3322
|
+
function lineRangeGap(anchor, candidate) {
|
|
3323
|
+
if (candidate.start_line > anchor.end_line) {
|
|
3324
|
+
return candidate.start_line - anchor.end_line - 1;
|
|
3325
|
+
}
|
|
3326
|
+
if (anchor.start_line > candidate.end_line) {
|
|
3327
|
+
return anchor.start_line - candidate.end_line - 1;
|
|
3328
|
+
}
|
|
3329
|
+
return 0;
|
|
3330
|
+
}
|
|
3331
|
+
function buildPreferredLineMap(candidates) {
|
|
3332
|
+
const byRelevance = [...candidates].sort(compareSearchResults);
|
|
3333
|
+
const lineMap = new Map();
|
|
3334
|
+
for (let rank = 0; rank < byRelevance.length; rank += 1) {
|
|
3335
|
+
const candidate = byRelevance[rank];
|
|
3336
|
+
if (!candidate) {
|
|
3337
|
+
continue;
|
|
3338
|
+
}
|
|
3339
|
+
const lines = candidate.snippet.replace(/\r\n/g, "\n").split("\n");
|
|
3340
|
+
const expectedLineCount = Math.max(1, candidate.end_line - candidate.start_line + 1);
|
|
3341
|
+
const maxLines = Math.min(lines.length, expectedLineCount);
|
|
3342
|
+
for (let offset = 0; offset < maxLines; offset += 1) {
|
|
3343
|
+
const lineNumber = candidate.start_line + offset;
|
|
3344
|
+
const text = lines[offset];
|
|
3345
|
+
if (typeof text !== "string") {
|
|
3346
|
+
continue;
|
|
3347
|
+
}
|
|
3348
|
+
const existing = lineMap.get(lineNumber);
|
|
3349
|
+
if (!existing || candidate.score > existing.score + 1e-9 || (Math.abs(candidate.score - existing.score) <= 1e-9 && rank < existing.rank)) {
|
|
3350
|
+
lineMap.set(lineNumber, { text, score: candidate.score, rank });
|
|
3351
|
+
}
|
|
3352
|
+
}
|
|
3353
|
+
}
|
|
3354
|
+
return new Map([...lineMap.entries()].map(([line, value]) => [line, value.text]));
|
|
3355
|
+
}
|
|
3356
|
+
function clipSnippetToMaxChars(snippet, maxChars) {
|
|
3357
|
+
if (snippet.length <= maxChars) {
|
|
3358
|
+
return snippet;
|
|
3359
|
+
}
|
|
3360
|
+
const clipped = snippet.slice(0, Math.max(0, maxChars));
|
|
3361
|
+
const lastNewline = clipped.lastIndexOf("\n");
|
|
3362
|
+
if (lastNewline > 80) {
|
|
3363
|
+
return clipped.slice(0, lastNewline).trimEnd();
|
|
3364
|
+
}
|
|
3365
|
+
return clipped.trimEnd();
|
|
3366
|
+
}
|
|
3367
|
+
function snippetIntegrityLanguageFromPath(path) {
|
|
3368
|
+
const extension = fileExtension(path);
|
|
3369
|
+
if (extension === ".ts" || extension === ".mts" || extension === ".cts") {
|
|
3370
|
+
return "typescript";
|
|
3371
|
+
}
|
|
3372
|
+
if (extension === ".tsx") {
|
|
3373
|
+
return "tsx";
|
|
3374
|
+
}
|
|
3375
|
+
if (extension === ".js" || extension === ".mjs" || extension === ".cjs") {
|
|
3376
|
+
return "javascript";
|
|
3377
|
+
}
|
|
3378
|
+
if (extension === ".jsx") {
|
|
3379
|
+
return "jsx";
|
|
3380
|
+
}
|
|
3381
|
+
return undefined;
|
|
3382
|
+
}
|
|
3383
|
+
function firstNonEmptyLine(snippet) {
|
|
3384
|
+
const lines = snippet.replace(/\r\n/g, "\n").split("\n");
|
|
3385
|
+
for (const line of lines) {
|
|
3386
|
+
const trimmed = line.trim();
|
|
3387
|
+
if (trimmed.length > 0) {
|
|
3388
|
+
return trimmed;
|
|
3389
|
+
}
|
|
3390
|
+
}
|
|
3391
|
+
return "";
|
|
3392
|
+
}
|
|
3393
|
+
function lastNonEmptyLine(snippet) {
|
|
3394
|
+
const lines = snippet.replace(/\r\n/g, "\n").split("\n");
|
|
3395
|
+
for (let idx = lines.length - 1; idx >= 0; idx -= 1) {
|
|
3396
|
+
const trimmed = (lines[idx] ?? "").trim();
|
|
3397
|
+
if (trimmed.length > 0) {
|
|
3398
|
+
return trimmed;
|
|
3399
|
+
}
|
|
3400
|
+
}
|
|
3401
|
+
return "";
|
|
3402
|
+
}
|
|
3403
|
+
function curlyBraceDelta(snippet) {
|
|
3404
|
+
let opens = 0;
|
|
3405
|
+
let closes = 0;
|
|
3406
|
+
for (const char of snippet) {
|
|
3407
|
+
if (char === "{") {
|
|
3408
|
+
opens += 1;
|
|
3409
|
+
continue;
|
|
3410
|
+
}
|
|
3411
|
+
if (char === "}") {
|
|
3412
|
+
closes += 1;
|
|
3413
|
+
}
|
|
3414
|
+
}
|
|
3415
|
+
return opens - closes;
|
|
3416
|
+
}
|
|
3417
|
+
function looksLikeDeclarationStart(line) {
|
|
3418
|
+
if (line.length === 0) {
|
|
3419
|
+
return false;
|
|
3420
|
+
}
|
|
3421
|
+
if (line.startsWith("@")) {
|
|
3422
|
+
return true;
|
|
3423
|
+
}
|
|
3424
|
+
return (/^(?:export\s+)?(?:async\s+)?function\s+[A-Za-z_$][\w$]*\s*\(/u.test(line) ||
|
|
3425
|
+
/^(?:export\s+)?(?:default\s+)?class\s+[A-Za-z_$][\w$]*/u.test(line) ||
|
|
3426
|
+
/^(?:export\s+)?(?:const|let|var)\s+[A-Za-z_$][\w$]*\s*=/u.test(line) ||
|
|
3427
|
+
/^(?:public|private|protected|static|readonly|async)\s+[A-Za-z_$][\w$]*\s*\(/u.test(line) ||
|
|
3428
|
+
/^(?:[A-Za-z_$][\w$]*)\s*\([^)]*\)\s*\{/u.test(line));
|
|
3429
|
+
}
|
|
3430
|
+
function looksLikeSnippetTerminalBoundary(line) {
|
|
3431
|
+
if (line.length === 0) {
|
|
3432
|
+
return false;
|
|
3433
|
+
}
|
|
3434
|
+
return (line.endsWith("}") ||
|
|
3435
|
+
line.endsWith("};") ||
|
|
3436
|
+
line.endsWith(");") ||
|
|
3437
|
+
line.endsWith("]") ||
|
|
3438
|
+
line.endsWith("];"));
|
|
3439
|
+
}
|
|
3440
|
+
function detectSnippetSymbolName(snippet) {
|
|
3441
|
+
const lines = snippet.replace(/\r\n/g, "\n").split("\n").slice(0, 40);
|
|
3442
|
+
const patterns = [
|
|
3443
|
+
/^(?:export\s+)?(?:async\s+)?function\s+([A-Za-z_$][\w$]*)\s*\(/u,
|
|
3444
|
+
/^(?:export\s+)?(?:default\s+)?class\s+([A-Za-z_$][\w$]*)\b/u,
|
|
3445
|
+
/^(?:export\s+)?(?:const|let|var)\s+([A-Za-z_$][\w$]*)\s*=\s*(?:async\s*)?\([^)]*\)\s*=>/u,
|
|
3446
|
+
/^(?:export\s+)?(?:const|let|var)\s+([A-Za-z_$][\w$]*)\s*=\s*(?:async\s*)?[A-Za-z_$][\w$]*\s*=>/u,
|
|
3447
|
+
/^(?:public|private|protected|static|readonly|async)\s+([A-Za-z_$][\w$]*)\s*\(/u,
|
|
3448
|
+
/^([A-Za-z_$][\w$]*)\s*\([^)]*\)\s*\{/u
|
|
3449
|
+
];
|
|
3450
|
+
const disallowed = new Set(["if", "for", "while", "switch", "catch", "return"]);
|
|
3451
|
+
for (const line of lines) {
|
|
3452
|
+
const trimmed = line.trim();
|
|
3453
|
+
if (trimmed.length === 0) {
|
|
3454
|
+
continue;
|
|
3455
|
+
}
|
|
3456
|
+
for (const pattern of patterns) {
|
|
3457
|
+
const match = trimmed.match(pattern);
|
|
3458
|
+
const symbol = match?.[1];
|
|
3459
|
+
if (symbol && !disallowed.has(symbol)) {
|
|
3460
|
+
return symbol;
|
|
3461
|
+
}
|
|
3462
|
+
}
|
|
3463
|
+
}
|
|
3464
|
+
return undefined;
|
|
3465
|
+
}
|
|
3466
|
+
function shouldAnnotateSnippetAsTruncated(result, omittedBefore, omittedAfter) {
|
|
3467
|
+
if (omittedBefore <= 0 && omittedAfter <= 0) {
|
|
3468
|
+
return false;
|
|
3469
|
+
}
|
|
3470
|
+
const firstLine = firstNonEmptyLine(result.snippet);
|
|
3471
|
+
const lastLine = lastNonEmptyLine(result.snippet);
|
|
3472
|
+
if (omittedBefore > 0 && !looksLikeDeclarationStart(firstLine)) {
|
|
3473
|
+
return true;
|
|
3474
|
+
}
|
|
3475
|
+
if (omittedAfter > 0) {
|
|
3476
|
+
if (curlyBraceDelta(result.snippet) > 0) {
|
|
3477
|
+
return true;
|
|
3478
|
+
}
|
|
3479
|
+
if (!looksLikeSnippetTerminalBoundary(lastLine)) {
|
|
3480
|
+
return true;
|
|
3481
|
+
}
|
|
3482
|
+
}
|
|
3483
|
+
return omittedBefore > 0 && omittedAfter > 0;
|
|
3484
|
+
}
|
|
3485
|
+
function estimateContiguousEnvelope(input) {
|
|
3486
|
+
let start = input.anchor.start_line;
|
|
3487
|
+
let end = input.anchor.end_line;
|
|
3488
|
+
let changed = true;
|
|
3489
|
+
while (changed) {
|
|
3490
|
+
changed = false;
|
|
3491
|
+
for (const candidate of input.candidates) {
|
|
3492
|
+
const gap = lineRangeGap({ start_line: start, end_line: end }, candidate);
|
|
3493
|
+
if (gap > input.maxGapLines) {
|
|
3494
|
+
continue;
|
|
3495
|
+
}
|
|
3496
|
+
const nextStart = Math.min(start, candidate.start_line);
|
|
3497
|
+
const nextEnd = Math.max(end, candidate.end_line);
|
|
3498
|
+
if (nextStart !== start || nextEnd !== end) {
|
|
3499
|
+
start = nextStart;
|
|
3500
|
+
end = nextEnd;
|
|
3501
|
+
changed = true;
|
|
3502
|
+
}
|
|
3503
|
+
}
|
|
3504
|
+
}
|
|
3505
|
+
return { start_line: start, end_line: end };
|
|
3506
|
+
}
|
|
3507
|
+
function repairSnippetFromEnvelope(input) {
|
|
3508
|
+
const envelopeSpan = input.envelope.end_line - input.envelope.start_line + 1;
|
|
3509
|
+
if (envelopeSpan > input.config.repair_max_envelope_lines) {
|
|
3510
|
+
return { reason: "envelope_cap_exceeded", clipped: false };
|
|
3511
|
+
}
|
|
3512
|
+
const envelopeCandidates = input.samePathCandidates
|
|
3513
|
+
.filter((candidate) => candidate.end_line >= input.envelope.start_line && candidate.start_line <= input.envelope.end_line)
|
|
3514
|
+
.sort(compareSearchResultsByLineRange);
|
|
3515
|
+
if (envelopeCandidates.length === 0) {
|
|
3516
|
+
return { reason: "no_envelope_candidates", clipped: false };
|
|
3517
|
+
}
|
|
3518
|
+
const lineMap = buildPreferredLineMap(envelopeCandidates);
|
|
3519
|
+
const renderedLines = [];
|
|
3520
|
+
let missingLines = 0;
|
|
3521
|
+
for (let line = input.envelope.start_line; line <= input.envelope.end_line; line += 1) {
|
|
3522
|
+
const text = lineMap.get(line);
|
|
3523
|
+
if (typeof text !== "string") {
|
|
3524
|
+
missingLines += 1;
|
|
3525
|
+
renderedLines.push("");
|
|
3526
|
+
continue;
|
|
3527
|
+
}
|
|
3528
|
+
renderedLines.push(text);
|
|
3529
|
+
}
|
|
3530
|
+
const maxMissingLines = Math.max(2, Math.floor(envelopeSpan * 0.2));
|
|
3531
|
+
if (missingLines > maxMissingLines) {
|
|
3532
|
+
return { reason: "missing_line_density_too_high", clipped: false };
|
|
3533
|
+
}
|
|
3534
|
+
const clippedLines = [];
|
|
3535
|
+
let usedChars = 0;
|
|
3536
|
+
let clipped = false;
|
|
3537
|
+
for (let index = 0; index < renderedLines.length; index += 1) {
|
|
3538
|
+
const line = renderedLines[index] ?? "";
|
|
3539
|
+
const additionalChars = index === 0 ? line.length : line.length + 1;
|
|
3540
|
+
if (clippedLines.length > 0 && usedChars + additionalChars > input.config.repair_max_snippet_chars) {
|
|
3541
|
+
clipped = true;
|
|
3542
|
+
break;
|
|
3543
|
+
}
|
|
3544
|
+
if (clippedLines.length === 0 && line.length > input.config.repair_max_snippet_chars) {
|
|
3545
|
+
const clippedLine = line.slice(0, input.config.repair_max_snippet_chars);
|
|
3546
|
+
if (clippedLine.length === 0) {
|
|
3547
|
+
return { reason: "snippet_char_cap_exceeded", clipped: false };
|
|
3548
|
+
}
|
|
3549
|
+
clippedLines.push(clippedLine);
|
|
3550
|
+
usedChars = clippedLine.length;
|
|
3551
|
+
clipped = true;
|
|
3552
|
+
break;
|
|
3553
|
+
}
|
|
3554
|
+
clippedLines.push(line);
|
|
3555
|
+
usedChars += additionalChars;
|
|
3556
|
+
}
|
|
3557
|
+
if (clippedLines.length === 0) {
|
|
3558
|
+
return { reason: "snippet_char_cap_exceeded", clipped: false };
|
|
3559
|
+
}
|
|
3560
|
+
const repairedSnippet = clippedLines.join("\n").trimEnd();
|
|
3561
|
+
if (repairedSnippet.length === 0) {
|
|
3562
|
+
return { reason: "empty_repaired_snippet", clipped: false };
|
|
3563
|
+
}
|
|
3564
|
+
const repairedEndLine = input.envelope.start_line + clippedLines.length - 1;
|
|
3565
|
+
return {
|
|
3566
|
+
repaired: {
|
|
3567
|
+
...input.anchor,
|
|
3568
|
+
start_line: input.envelope.start_line,
|
|
3569
|
+
end_line: repairedEndLine,
|
|
3570
|
+
snippet: repairedSnippet
|
|
3571
|
+
},
|
|
3572
|
+
clipped
|
|
3573
|
+
};
|
|
3574
|
+
}
|
|
3575
|
+
function buildSnippetTruncationMarker(input) {
|
|
3576
|
+
const estimatedTotalLines = Math.max(1, input.envelope_end_line - input.envelope_start_line + 1);
|
|
3577
|
+
const omittedBefore = Math.max(0, input.result.start_line - input.envelope_start_line);
|
|
3578
|
+
const omittedAfter = Math.max(0, input.envelope_end_line - input.result.end_line);
|
|
3579
|
+
return `// [truncated:${input.marker_template_version} symbol=${input.symbolName ?? "unknown"} estimated_span=${input.envelope_start_line}-${input.envelope_end_line} estimated_total_lines=${estimatedTotalLines} omitted_before=${omittedBefore} omitted_after=${omittedAfter} through_line=${input.result.end_line}]`;
|
|
3580
|
+
}
|
|
3581
|
+
function annotateSearchResultsWithSnippetIntegrity(input) {
|
|
3582
|
+
if (!input.config.enabled || input.selected.length === 0) {
|
|
3583
|
+
return [...input.selected];
|
|
3584
|
+
}
|
|
3585
|
+
const enabledLanguages = new Set(normalizeSnippetIntegrityLanguageList(input.config.target_languages));
|
|
3586
|
+
if (enabledLanguages.size === 0) {
|
|
3587
|
+
return [...input.selected];
|
|
3588
|
+
}
|
|
3589
|
+
const sourceByPath = new Map();
|
|
3590
|
+
for (const candidate of input.sourceCandidates) {
|
|
3591
|
+
const rows = sourceByPath.get(candidate.path);
|
|
3592
|
+
if (rows) {
|
|
3593
|
+
rows.push(candidate);
|
|
3594
|
+
}
|
|
3595
|
+
else {
|
|
3596
|
+
sourceByPath.set(candidate.path, [candidate]);
|
|
3597
|
+
}
|
|
3598
|
+
}
|
|
3599
|
+
return input.selected.map((result) => {
|
|
3600
|
+
const language = snippetIntegrityLanguageFromPath(result.path);
|
|
3601
|
+
if (!language || !enabledLanguages.has(language)) {
|
|
3602
|
+
return result;
|
|
3603
|
+
}
|
|
3604
|
+
const samePath = sourceByPath.get(result.path) ?? [result];
|
|
3605
|
+
if (samePath.length <= 1) {
|
|
3606
|
+
return result;
|
|
3607
|
+
}
|
|
3608
|
+
const envelope = estimateContiguousEnvelope({
|
|
3609
|
+
anchor: result,
|
|
3610
|
+
candidates: samePath,
|
|
3611
|
+
maxGapLines: input.config.max_contiguous_gap_lines
|
|
3612
|
+
});
|
|
3613
|
+
const originalOmittedBefore = Math.max(0, result.start_line - envelope.start_line);
|
|
3614
|
+
const originalOmittedAfter = Math.max(0, envelope.end_line - result.end_line);
|
|
3615
|
+
const originalLooksTruncated = shouldAnnotateSnippetAsTruncated(result, originalOmittedBefore, originalOmittedAfter);
|
|
3616
|
+
if (!originalLooksTruncated) {
|
|
3617
|
+
return result;
|
|
3618
|
+
}
|
|
3619
|
+
const envelopeCandidates = samePath
|
|
3620
|
+
.filter((candidate) => candidate.end_line >= envelope.start_line && candidate.start_line <= envelope.end_line)
|
|
3621
|
+
.sort(compareSearchResultsByLineRange);
|
|
3622
|
+
let assembled = result;
|
|
3623
|
+
if (input.config.repair_enabled) {
|
|
3624
|
+
input.observability.metrics.increment("retrieval_snippet_repair_attempt_total", 1, {
|
|
3625
|
+
retrieval_profile_id: input.retrievalProfileId,
|
|
3626
|
+
language
|
|
3627
|
+
});
|
|
3628
|
+
const repairOutcome = repairSnippetFromEnvelope({
|
|
3629
|
+
anchor: result,
|
|
3630
|
+
envelope,
|
|
3631
|
+
samePathCandidates: samePath,
|
|
3632
|
+
config: input.config
|
|
3633
|
+
});
|
|
3634
|
+
if (repairOutcome.repaired) {
|
|
3635
|
+
assembled = repairOutcome.repaired;
|
|
3636
|
+
input.observability.metrics.increment("retrieval_snippet_repair_success_total", 1, {
|
|
3637
|
+
retrieval_profile_id: input.retrievalProfileId,
|
|
3638
|
+
language,
|
|
3639
|
+
clipped: repairOutcome.clipped ? "true" : "false"
|
|
3640
|
+
});
|
|
3641
|
+
input.observability.logger.info("snippet integrity repair decision", {
|
|
3642
|
+
retrieval_profile_id: input.retrievalProfileId,
|
|
3643
|
+
path: result.path,
|
|
3644
|
+
language,
|
|
3645
|
+
envelope_start_line: envelope.start_line,
|
|
3646
|
+
envelope_end_line: envelope.end_line,
|
|
3647
|
+
envelope_span_lines: envelope.end_line - envelope.start_line + 1,
|
|
3648
|
+
status: "repaired",
|
|
3649
|
+
clipped: repairOutcome.clipped
|
|
3650
|
+
});
|
|
3651
|
+
}
|
|
3652
|
+
else {
|
|
3653
|
+
input.observability.logger.info("snippet integrity repair decision", {
|
|
3654
|
+
retrieval_profile_id: input.retrievalProfileId,
|
|
3655
|
+
path: result.path,
|
|
3656
|
+
language,
|
|
3657
|
+
envelope_start_line: envelope.start_line,
|
|
3658
|
+
envelope_end_line: envelope.end_line,
|
|
3659
|
+
envelope_span_lines: envelope.end_line - envelope.start_line + 1,
|
|
3660
|
+
status: "repair_skipped",
|
|
3661
|
+
reason: repairOutcome.reason ?? "unknown"
|
|
3662
|
+
});
|
|
3663
|
+
}
|
|
3664
|
+
}
|
|
3665
|
+
const omittedBefore = Math.max(0, assembled.start_line - envelope.start_line);
|
|
3666
|
+
const omittedAfter = Math.max(0, envelope.end_line - assembled.end_line);
|
|
3667
|
+
if (!shouldAnnotateSnippetAsTruncated(assembled, omittedBefore, omittedAfter)) {
|
|
3668
|
+
return assembled;
|
|
3669
|
+
}
|
|
3670
|
+
let symbolName = detectSnippetSymbolName(assembled.snippet);
|
|
3671
|
+
if (!symbolName) {
|
|
3672
|
+
for (const candidate of envelopeCandidates) {
|
|
3673
|
+
symbolName = detectSnippetSymbolName(candidate.snippet);
|
|
3674
|
+
if (symbolName) {
|
|
3675
|
+
break;
|
|
3676
|
+
}
|
|
3677
|
+
}
|
|
3678
|
+
}
|
|
3679
|
+
const marker = buildSnippetTruncationMarker({
|
|
3680
|
+
result: assembled,
|
|
3681
|
+
symbolName,
|
|
3682
|
+
envelope_start_line: envelope.start_line,
|
|
3683
|
+
envelope_end_line: envelope.end_line,
|
|
3684
|
+
marker_template_version: input.config.marker_template_version
|
|
3685
|
+
});
|
|
3686
|
+
input.observability.metrics.increment("retrieval_snippet_repair_fallback_marker_total", 1, {
|
|
3687
|
+
retrieval_profile_id: input.retrievalProfileId,
|
|
3688
|
+
language
|
|
3689
|
+
});
|
|
3690
|
+
input.observability.metrics.increment("retrieval_snippet_truncation_marker_total", 1, {
|
|
3691
|
+
retrieval_profile_id: input.retrievalProfileId,
|
|
3692
|
+
language,
|
|
3693
|
+
symbol_detected: symbolName ? "true" : "false",
|
|
3694
|
+
marker_template_version: input.config.marker_template_version
|
|
3695
|
+
});
|
|
3696
|
+
input.observability.metrics.observe("retrieval_snippet_omitted_after_lines", omittedAfter, {
|
|
3697
|
+
retrieval_profile_id: input.retrievalProfileId,
|
|
3698
|
+
language
|
|
3699
|
+
});
|
|
3700
|
+
const baseSnippet = assembled.snippet.trimEnd();
|
|
3701
|
+
return {
|
|
3702
|
+
...assembled,
|
|
3703
|
+
snippet: baseSnippet.length > 0 ? `${baseSnippet}\n${marker}` : marker
|
|
3704
|
+
};
|
|
3705
|
+
});
|
|
3706
|
+
}
|
|
3707
|
+
function packSearchResultsWithContext(input) {
|
|
3708
|
+
if (!input.config.enabled || input.selected.length === 0) {
|
|
3709
|
+
return [...input.selected];
|
|
3710
|
+
}
|
|
3711
|
+
const sourceByPath = new Map();
|
|
3712
|
+
for (const candidate of input.sourceCandidates) {
|
|
3713
|
+
const rows = sourceByPath.get(candidate.path);
|
|
3714
|
+
if (rows) {
|
|
3715
|
+
rows.push(candidate);
|
|
3716
|
+
}
|
|
3717
|
+
else {
|
|
3718
|
+
sourceByPath.set(candidate.path, [candidate]);
|
|
3719
|
+
}
|
|
3720
|
+
}
|
|
3721
|
+
return input.selected.map((anchor) => {
|
|
3722
|
+
const samePath = sourceByPath.get(anchor.path) ?? [anchor];
|
|
3723
|
+
if (samePath.length <= 1 || input.config.max_spans_per_result <= 1) {
|
|
3724
|
+
return anchor;
|
|
3725
|
+
}
|
|
3726
|
+
const anchorRange = { start_line: anchor.start_line, end_line: anchor.end_line };
|
|
3727
|
+
const candidates = samePath
|
|
3728
|
+
.filter((candidate) => !(candidate.start_line === anchor.start_line && candidate.end_line === anchor.end_line) &&
|
|
3729
|
+
!isHeavilyOverlappingLineRange(candidate, [anchorRange]) &&
|
|
3730
|
+
lineRangeGap(anchorRange, candidate) <= input.config.max_gap_lines)
|
|
3731
|
+
.sort((a, b) => {
|
|
3732
|
+
const relevanceDiff = compareSearchResults(a, b);
|
|
3733
|
+
if (relevanceDiff !== 0) {
|
|
3734
|
+
return relevanceDiff;
|
|
3735
|
+
}
|
|
3736
|
+
return lineRangeGap(anchorRange, a) - lineRangeGap(anchorRange, b);
|
|
3737
|
+
});
|
|
3738
|
+
const spans = [{ ...anchorRange }];
|
|
3739
|
+
for (const candidate of candidates) {
|
|
3740
|
+
if (spans.length >= input.config.max_spans_per_result) {
|
|
3741
|
+
break;
|
|
3742
|
+
}
|
|
3743
|
+
const nextSpan = { start_line: candidate.start_line, end_line: candidate.end_line };
|
|
3744
|
+
const nextEnvelope = mergeLineSpans([...spans, nextSpan]);
|
|
3745
|
+
if (nextEnvelope.some((span, idx) => idx > 0 && span.start_line - (nextEnvelope[idx - 1]?.end_line ?? span.start_line) - 1 > input.config.max_gap_lines)) {
|
|
3746
|
+
continue;
|
|
3747
|
+
}
|
|
3748
|
+
spans.push(nextSpan);
|
|
3749
|
+
}
|
|
3750
|
+
const mergedSpans = mergeLineSpans(spans);
|
|
3751
|
+
if (mergedSpans.length <= 1) {
|
|
3752
|
+
return anchor;
|
|
3753
|
+
}
|
|
3754
|
+
const lineMap = buildPreferredLineMap([anchor, ...samePath]);
|
|
3755
|
+
const renderedLines = [];
|
|
3756
|
+
let contentLineCount = 0;
|
|
3757
|
+
let elisionCount = 0;
|
|
3758
|
+
for (let index = 0; index < mergedSpans.length; index += 1) {
|
|
3759
|
+
const span = mergedSpans[index];
|
|
3760
|
+
if (!span) {
|
|
3761
|
+
continue;
|
|
3762
|
+
}
|
|
3763
|
+
if (index > 0) {
|
|
3764
|
+
const previous = mergedSpans[index - 1];
|
|
3765
|
+
if (previous && span.start_line - previous.end_line > 0) {
|
|
3766
|
+
renderedLines.push("...");
|
|
3767
|
+
elisionCount += 1;
|
|
3768
|
+
}
|
|
3769
|
+
}
|
|
3770
|
+
for (let line = span.start_line; line <= span.end_line; line += 1) {
|
|
3771
|
+
renderedLines.push(lineMap.get(line) ?? "");
|
|
3772
|
+
contentLineCount += 1;
|
|
3773
|
+
}
|
|
3774
|
+
}
|
|
3775
|
+
if (renderedLines.length === 0) {
|
|
3776
|
+
return anchor;
|
|
3777
|
+
}
|
|
3778
|
+
const elisionDensity = elisionCount / Math.max(1, contentLineCount + elisionCount);
|
|
3779
|
+
if (elisionDensity > 0.25) {
|
|
3780
|
+
return anchor;
|
|
3781
|
+
}
|
|
3782
|
+
const packedSnippet = clipSnippetToMaxChars(renderedLines.join("\n"), input.config.max_snippet_chars);
|
|
3783
|
+
if (packedSnippet.length === 0) {
|
|
3784
|
+
return anchor;
|
|
3785
|
+
}
|
|
3786
|
+
const packedStart = mergedSpans[0]?.start_line ?? anchor.start_line;
|
|
3787
|
+
const packedEnd = mergedSpans[mergedSpans.length - 1]?.end_line ?? anchor.end_line;
|
|
3788
|
+
return {
|
|
3789
|
+
...anchor,
|
|
3790
|
+
start_line: packedStart,
|
|
3791
|
+
end_line: packedEnd,
|
|
3792
|
+
snippet: packedSnippet,
|
|
3793
|
+
reason: `${anchor.reason} + contextual spans`
|
|
3794
|
+
};
|
|
3795
|
+
});
|
|
3796
|
+
}
|
|
3797
|
+
function mergeOverlappingCandidates(candidates, config) {
|
|
3798
|
+
if (!config.merge_overlapping_chunks_enabled || candidates.length <= 1) {
|
|
3799
|
+
return [...candidates];
|
|
3800
|
+
}
|
|
3801
|
+
const byPath = new Map();
|
|
3802
|
+
for (const candidate of candidates) {
|
|
3803
|
+
const group = byPath.get(candidate.path);
|
|
3804
|
+
if (group) {
|
|
3805
|
+
group.push(candidate);
|
|
3806
|
+
}
|
|
3807
|
+
else {
|
|
3808
|
+
byPath.set(candidate.path, [candidate]);
|
|
3809
|
+
}
|
|
3810
|
+
}
|
|
3811
|
+
const merged = [];
|
|
3812
|
+
for (const group of byPath.values()) {
|
|
3813
|
+
const ordered = [...group].sort(compareSearchResultsByLineRange);
|
|
3814
|
+
let cluster = [];
|
|
3815
|
+
let clusterStart = 0;
|
|
3816
|
+
let clusterEnd = 0;
|
|
3817
|
+
const flush = () => {
|
|
3818
|
+
if (cluster.length === 0) {
|
|
3819
|
+
return;
|
|
3820
|
+
}
|
|
3821
|
+
merged.push(mergeCandidateCluster(cluster));
|
|
3822
|
+
cluster = [];
|
|
3823
|
+
};
|
|
3824
|
+
for (const candidate of ordered) {
|
|
3825
|
+
if (cluster.length === 0) {
|
|
3826
|
+
cluster = [candidate];
|
|
3827
|
+
clusterStart = candidate.start_line;
|
|
3828
|
+
clusterEnd = candidate.end_line;
|
|
3829
|
+
continue;
|
|
3830
|
+
}
|
|
3831
|
+
const nextStart = Math.min(clusterStart, candidate.start_line);
|
|
3832
|
+
const nextEnd = Math.max(clusterEnd, candidate.end_line);
|
|
3833
|
+
const nextSpan = nextEnd - nextStart + 1;
|
|
3834
|
+
const gapLines = Math.max(0, candidate.start_line - clusterEnd - 1);
|
|
3835
|
+
const canMerge = gapLines <= config.merge_gap_lines && nextSpan <= config.merge_max_span_lines;
|
|
3836
|
+
if (!canMerge) {
|
|
3837
|
+
flush();
|
|
3838
|
+
cluster = [candidate];
|
|
3839
|
+
clusterStart = candidate.start_line;
|
|
3840
|
+
clusterEnd = candidate.end_line;
|
|
3841
|
+
continue;
|
|
3842
|
+
}
|
|
3843
|
+
cluster.push(candidate);
|
|
3844
|
+
clusterStart = nextStart;
|
|
3845
|
+
clusterEnd = nextEnd;
|
|
3846
|
+
}
|
|
3847
|
+
flush();
|
|
3848
|
+
}
|
|
3849
|
+
return merged.sort(compareSearchResults);
|
|
3850
|
+
}
|
|
3851
|
+
function applySmartCutoffCandidates(candidates, config) {
|
|
3852
|
+
if (!config.smart_cutoff_enabled || candidates.length === 0) {
|
|
3853
|
+
return [...candidates];
|
|
3854
|
+
}
|
|
3855
|
+
const ordered = [...candidates].sort(compareSearchResults);
|
|
3856
|
+
const minK = Math.max(1, config.smart_cutoff_min_k);
|
|
3857
|
+
const maxK = Math.max(minK, config.smart_cutoff_max_k);
|
|
3858
|
+
const topScore = ordered[0]?.score ?? Number.NEGATIVE_INFINITY;
|
|
3859
|
+
const kept = [];
|
|
3860
|
+
for (let index = 0; index < ordered.length; index += 1) {
|
|
3861
|
+
const candidate = ordered[index];
|
|
3862
|
+
if (!candidate) {
|
|
3863
|
+
continue;
|
|
3864
|
+
}
|
|
3865
|
+
if (kept.length >= maxK) {
|
|
3866
|
+
break;
|
|
3867
|
+
}
|
|
3868
|
+
if (kept.length < minK) {
|
|
3869
|
+
kept.push(candidate);
|
|
3870
|
+
continue;
|
|
3871
|
+
}
|
|
3872
|
+
if (candidate.score < config.smart_cutoff_min_score) {
|
|
3873
|
+
break;
|
|
3874
|
+
}
|
|
3875
|
+
if (candidate.score < topScore * config.smart_cutoff_top_ratio) {
|
|
3876
|
+
break;
|
|
3877
|
+
}
|
|
3878
|
+
const previous = ordered[index - 1];
|
|
3879
|
+
if (previous && previous.score - candidate.score > config.smart_cutoff_delta_abs) {
|
|
3880
|
+
break;
|
|
3881
|
+
}
|
|
3882
|
+
kept.push(candidate);
|
|
3883
|
+
}
|
|
3884
|
+
return kept;
|
|
3885
|
+
}
|
|
3886
|
+
export function __applySmartCutoffCandidatesForTests(input) {
|
|
3887
|
+
return applySmartCutoffCandidates(input.candidates, input.config);
|
|
3888
|
+
}
|
|
2981
3889
|
function dedupeEnhancerCandidatesByPath(results) {
|
|
2982
3890
|
const byPath = new Map();
|
|
2983
3891
|
for (const result of results) {
|
|
@@ -3281,11 +4189,11 @@ function deterministicEnhancerFallbackRanking(input) {
|
|
|
3281
4189
|
const avoided = input.results.filter((result) => !preferred.includes(result) && !tolerated.includes(result));
|
|
3282
4190
|
return [...preferred, ...tolerated, ...avoided];
|
|
3283
4191
|
}
|
|
3284
|
-
function trimToContextBudget(results) {
|
|
4192
|
+
function trimToContextBudget(results, budgetTokenizerMode) {
|
|
3285
4193
|
let total = 0;
|
|
3286
4194
|
const out = [];
|
|
3287
4195
|
for (const result of results) {
|
|
3288
|
-
total +=
|
|
4196
|
+
total += chunkBudgetTokenize(result.snippet, budgetTokenizerMode).length;
|
|
3289
4197
|
if (total > MAX_CONTEXT_BUDGET_TOKENS) {
|
|
3290
4198
|
break;
|
|
3291
4199
|
}
|
|
@@ -3731,6 +4639,8 @@ export class RetrievalCore {
|
|
|
3731
4639
|
enhancerConfig;
|
|
3732
4640
|
enhancerGenerationConfig;
|
|
3733
4641
|
chunkingConfig;
|
|
4642
|
+
contextPackingConfig;
|
|
4643
|
+
snippetIntegrityConfig;
|
|
3734
4644
|
enhancerDecisionTraceEnabled;
|
|
3735
4645
|
cacheHits = 0;
|
|
3736
4646
|
cacheMisses = 0;
|
|
@@ -3764,6 +4674,8 @@ export class RetrievalCore {
|
|
|
3764
4674
|
this.enhancerConfig = mergeRetrievalEnhancerConfig(DEFAULT_RETRIEVAL_ENHANCER_CONFIG, options?.enhancerConfig);
|
|
3765
4675
|
this.enhancerGenerationConfig = mergeRetrievalEnhancerGenerationConfig(DEFAULT_RETRIEVAL_ENHANCER_GENERATION_CONFIG, options?.enhancerGenerationConfig);
|
|
3766
4676
|
this.chunkingConfig = mergeRetrievalChunkingConfig(DEFAULT_RETRIEVAL_CHUNKING_CONFIG, options?.chunkingConfig);
|
|
4677
|
+
this.contextPackingConfig = mergeRetrievalContextPackingConfig(DEFAULT_RETRIEVAL_CONTEXT_PACKING_CONFIG, options?.contextPackingConfig);
|
|
4678
|
+
this.snippetIntegrityConfig = mergeRetrievalSnippetIntegrityConfig(DEFAULT_RETRIEVAL_SNIPPET_INTEGRITY_CONFIG, options?.snippetIntegrityConfig);
|
|
3767
4679
|
this.enhancerDecisionTraceEnabled = Boolean(options?.enhancerDecisionTraceEnabled);
|
|
3768
4680
|
}
|
|
3769
4681
|
async indexArtifact(artifact) {
|
|
@@ -3937,6 +4849,12 @@ export class RetrievalCore {
|
|
|
3937
4849
|
language: chunkLanguage,
|
|
3938
4850
|
reason: chunkBuild.fallback_reason ?? "none"
|
|
3939
4851
|
});
|
|
4852
|
+
if (chunkBuild.recursive_semantic_chunking_used) {
|
|
4853
|
+
this.observability.metrics.increment("index_recursive_semantic_chunking_used_total", 1, {
|
|
4854
|
+
tenant_id: artifact.tenant_id,
|
|
4855
|
+
language: chunkLanguage
|
|
4856
|
+
});
|
|
4857
|
+
}
|
|
3940
4858
|
if (chunkBuild.fallback_reason) {
|
|
3941
4859
|
this.observability.metrics.increment("index_chunking_fallback_total", 1, {
|
|
3942
4860
|
tenant_id: artifact.tenant_id,
|
|
@@ -3964,14 +4882,15 @@ export class RetrievalCore {
|
|
|
3964
4882
|
reason: chunkBuild.fallback_reason
|
|
3965
4883
|
});
|
|
3966
4884
|
}
|
|
3967
|
-
const
|
|
4885
|
+
const embeddingTexts = buildChunkEmbeddingTexts(chunks, this.chunkingConfig, this.embeddingDescriptor.provider);
|
|
4886
|
+
const estimatedEmbeddingTokens = embeddingTexts.reduce((sum, text) => sum + tokenize(text).length, 0);
|
|
3968
4887
|
this.observability.metrics.increment("index_embedding_tokens_total", estimatedEmbeddingTokens, {
|
|
3969
4888
|
tenant_id: artifact.tenant_id
|
|
3970
4889
|
});
|
|
3971
4890
|
const embeddings = chunks.length === 0
|
|
3972
4891
|
? []
|
|
3973
4892
|
: await this.embeddingProvider.embed({
|
|
3974
|
-
texts:
|
|
4893
|
+
texts: embeddingTexts,
|
|
3975
4894
|
purpose: "index"
|
|
3976
4895
|
});
|
|
3977
4896
|
if (embeddings.length !== chunks.length) {
|
|
@@ -4246,6 +5165,12 @@ export class RetrievalCore {
|
|
|
4246
5165
|
language: chunkLanguage,
|
|
4247
5166
|
reason: chunkBuild.fallback_reason ?? "none"
|
|
4248
5167
|
});
|
|
5168
|
+
if (chunkBuild.recursive_semantic_chunking_used) {
|
|
5169
|
+
this.observability.metrics.increment("index_recursive_semantic_chunking_used_total", 1, {
|
|
5170
|
+
tenant_id: artifact.tenant_id,
|
|
5171
|
+
language: chunkLanguage
|
|
5172
|
+
});
|
|
5173
|
+
}
|
|
4249
5174
|
if (chunkBuild.fallback_reason) {
|
|
4250
5175
|
this.observability.metrics.increment("index_chunking_fallback_total", 1, {
|
|
4251
5176
|
tenant_id: artifact.tenant_id,
|
|
@@ -4273,14 +5198,15 @@ export class RetrievalCore {
|
|
|
4273
5198
|
reason: chunkBuild.fallback_reason
|
|
4274
5199
|
});
|
|
4275
5200
|
}
|
|
4276
|
-
const
|
|
5201
|
+
const embeddingTexts = buildChunkEmbeddingTexts(chunks, this.chunkingConfig, this.embeddingDescriptor.provider);
|
|
5202
|
+
const estimatedEmbeddingTokens = embeddingTexts.reduce((sum, text) => sum + tokenize(text).length, 0);
|
|
4277
5203
|
this.observability.metrics.increment("index_embedding_tokens_total", estimatedEmbeddingTokens, {
|
|
4278
5204
|
tenant_id: artifact.tenant_id
|
|
4279
5205
|
});
|
|
4280
5206
|
const embeddings = chunks.length === 0
|
|
4281
5207
|
? []
|
|
4282
5208
|
: await this.embeddingProvider.embed({
|
|
4283
|
-
texts:
|
|
5209
|
+
texts: embeddingTexts,
|
|
4284
5210
|
purpose: "index"
|
|
4285
5211
|
});
|
|
4286
5212
|
if (embeddings.length !== chunks.length) {
|
|
@@ -4556,7 +5482,7 @@ export class RetrievalCore {
|
|
|
4556
5482
|
query,
|
|
4557
5483
|
top_k: topK,
|
|
4558
5484
|
filters: input.request.filters,
|
|
4559
|
-
retrieval_variant: this.rerankerCacheVariant
|
|
5485
|
+
retrieval_variant: `${this.rerankerCacheVariant}|context_pack:${this.contextPackingConfig.enabled ? "on" : "off"}|context_pack_spans:${this.contextPackingConfig.max_spans_per_result}|context_pack_gap:${this.contextPackingConfig.max_gap_lines}|snippet_integrity:${this.snippetIntegrityConfig.enabled ? "on" : "off"}|snippet_integrity_gap:${this.snippetIntegrityConfig.max_contiguous_gap_lines}|snippet_integrity_langs:${this.snippetIntegrityConfig.target_languages.join(",")}|snippet_repair:${this.snippetIntegrityConfig.repair_enabled ? "on" : "off"}|snippet_repair_env:${this.snippetIntegrityConfig.repair_max_envelope_lines}|snippet_repair_chars:${this.snippetIntegrityConfig.repair_max_snippet_chars}|chunk_recursive:${this.chunkingConfig.recursive_semantic_chunking_enabled ? "on" : "off"}|chunk_semantic_gap:${this.chunkingConfig.semantic_merge_gap_lines}|chunk_semantic_span:${this.chunkingConfig.semantic_merge_max_span_lines}|chunk_comment_absorb:${this.chunkingConfig.comment_forward_absorb_enabled ? "on" : "off"}|chunk_embed_prefix:${this.chunkingConfig.embedding_context_prefix_enabled ? "on" : "off"}|smart_cutoff:${this.scoringConfig.rerank.smart_cutoff_enabled ? "on" : "off"}|smart_cutoff_min_k:${this.scoringConfig.rerank.smart_cutoff_min_k}|smart_cutoff_max_k:${this.scoringConfig.rerank.smart_cutoff_max_k}|smart_cutoff_min_score:${this.scoringConfig.rerank.smart_cutoff_min_score}|smart_cutoff_top_ratio:${this.scoringConfig.rerank.smart_cutoff_top_ratio}|smart_cutoff_delta_abs:${this.scoringConfig.rerank.smart_cutoff_delta_abs}`
|
|
4560
5486
|
});
|
|
4561
5487
|
const cached = await this.cache.get(cacheKey);
|
|
4562
5488
|
if (cached) {
|
|
@@ -4700,16 +5626,37 @@ export class RetrievalCore {
|
|
|
4700
5626
|
query,
|
|
4701
5627
|
candidates
|
|
4702
5628
|
}));
|
|
5629
|
+
const consolidatedCandidates = await this.observability.tracing.withSpan("retrieval.overlap_merge", { trace_id: input.trace_id }, async () => mergeOverlappingCandidates(rerankedCandidates, this.scoringConfig.rerank));
|
|
5630
|
+
this.observability.metrics.observe("retrieval_candidates_post_overlap_merge_count", consolidatedCandidates.length, {
|
|
5631
|
+
retrieval_profile_id: this.scoringProfileId
|
|
5632
|
+
});
|
|
5633
|
+
const mergedCandidateCount = Math.max(0, rerankedCandidates.length - consolidatedCandidates.length);
|
|
5634
|
+
if (mergedCandidateCount > 0) {
|
|
5635
|
+
this.observability.metrics.increment("retrieval_overlap_candidates_merged_total", mergedCandidateCount, {
|
|
5636
|
+
retrieval_profile_id: this.scoringProfileId
|
|
5637
|
+
});
|
|
5638
|
+
}
|
|
5639
|
+
const cutoffCandidates = await this.observability.tracing.withSpan("retrieval.smart_cutoff", { trace_id: input.trace_id }, async () => applySmartCutoffCandidates(consolidatedCandidates, this.scoringConfig.rerank));
|
|
5640
|
+
if (this.scoringConfig.rerank.smart_cutoff_enabled) {
|
|
5641
|
+
this.observability.metrics.increment("retrieval_smart_cutoff_applied_total", 1, {
|
|
5642
|
+
retrieval_profile_id: this.scoringProfileId
|
|
5643
|
+
});
|
|
5644
|
+
const droppedCount = Math.max(0, consolidatedCandidates.length - cutoffCandidates.length);
|
|
5645
|
+
this.observability.metrics.increment("retrieval_smart_cutoff_drop_count", droppedCount, {
|
|
5646
|
+
retrieval_profile_id: this.scoringProfileId
|
|
5647
|
+
});
|
|
5648
|
+
}
|
|
4703
5649
|
const deduped = await this.observability.tracing.withSpan("retrieval.rerank", { trace_id: input.trace_id }, async () => {
|
|
4704
5650
|
const output = [];
|
|
4705
5651
|
const seen = new Set();
|
|
4706
5652
|
const pathCounts = new Map();
|
|
5653
|
+
const selectedRangesByPath = new Map();
|
|
4707
5654
|
const directoryCounts = new Map();
|
|
4708
5655
|
const extensionCounts = new Map();
|
|
4709
5656
|
const maxChunksPerPath = hasFileLookupIntent(queryTokens)
|
|
4710
5657
|
? this.scoringConfig.rerank.max_chunks_per_path_file_lookup
|
|
4711
5658
|
: this.scoringConfig.rerank.max_chunks_per_path_default;
|
|
4712
|
-
const available = [...
|
|
5659
|
+
const available = [...cutoffCandidates];
|
|
4713
5660
|
while (output.length < topK && available.length > 0) {
|
|
4714
5661
|
let bestIndex = -1;
|
|
4715
5662
|
let bestAdjustedScore = Number.NEGATIVE_INFINITY;
|
|
@@ -4727,6 +5674,12 @@ export class RetrievalCore {
|
|
|
4727
5674
|
if (pathCount >= maxChunksPerPath) {
|
|
4728
5675
|
continue;
|
|
4729
5676
|
}
|
|
5677
|
+
if (this.scoringConfig.rerank.merge_overlapping_chunks_enabled && pathCount > 0) {
|
|
5678
|
+
const selectedRanges = selectedRangesByPath.get(candidate.path) ?? [];
|
|
5679
|
+
if (isHeavilyOverlappingLineRange(candidate, selectedRanges)) {
|
|
5680
|
+
continue;
|
|
5681
|
+
}
|
|
5682
|
+
}
|
|
4730
5683
|
const directoryKey = parentDirectory(candidate.path).toLowerCase();
|
|
4731
5684
|
const extensionKey = fileExtension(candidate.path);
|
|
4732
5685
|
const adjustedScore = candidate.score -
|
|
@@ -4759,6 +5712,13 @@ export class RetrievalCore {
|
|
|
4759
5712
|
const selectedKey = `${selected.path}:${selected.start_line}:${selected.end_line}`;
|
|
4760
5713
|
seen.add(selectedKey);
|
|
4761
5714
|
pathCounts.set(selected.path, (pathCounts.get(selected.path) ?? 0) + 1);
|
|
5715
|
+
const selectedRanges = selectedRangesByPath.get(selected.path);
|
|
5716
|
+
if (selectedRanges) {
|
|
5717
|
+
selectedRanges.push({ start_line: selected.start_line, end_line: selected.end_line });
|
|
5718
|
+
}
|
|
5719
|
+
else {
|
|
5720
|
+
selectedRangesByPath.set(selected.path, [{ start_line: selected.start_line, end_line: selected.end_line }]);
|
|
5721
|
+
}
|
|
4762
5722
|
const selectedDirectory = parentDirectory(selected.path).toLowerCase();
|
|
4763
5723
|
const selectedExtension = fileExtension(selected.path);
|
|
4764
5724
|
directoryCounts.set(selectedDirectory, (directoryCounts.get(selectedDirectory) ?? 0) + 1);
|
|
@@ -4768,8 +5728,8 @@ export class RetrievalCore {
|
|
|
4768
5728
|
return output;
|
|
4769
5729
|
});
|
|
4770
5730
|
const candidateRankByKey = new Map();
|
|
4771
|
-
for (let index = 0; index <
|
|
4772
|
-
const candidate =
|
|
5731
|
+
for (let index = 0; index < cutoffCandidates.length; index += 1) {
|
|
5732
|
+
const candidate = cutoffCandidates[index];
|
|
4773
5733
|
if (!candidate) {
|
|
4774
5734
|
continue;
|
|
4775
5735
|
}
|
|
@@ -4800,16 +5760,28 @@ export class RetrievalCore {
|
|
|
4800
5760
|
this.observability.metrics.observe("retrieval_literal_matches_topk", literalMatchesInTopK, {
|
|
4801
5761
|
retrieval_profile_id: this.scoringProfileId
|
|
4802
5762
|
});
|
|
5763
|
+
const packedResults = packSearchResultsWithContext({
|
|
5764
|
+
selected: deduped,
|
|
5765
|
+
sourceCandidates: cutoffCandidates,
|
|
5766
|
+
config: this.contextPackingConfig
|
|
5767
|
+
});
|
|
5768
|
+
const assembledResults = annotateSearchResultsWithSnippetIntegrity({
|
|
5769
|
+
selected: packedResults,
|
|
5770
|
+
sourceCandidates: cutoffCandidates,
|
|
5771
|
+
config: this.snippetIntegrityConfig,
|
|
5772
|
+
observability: this.observability,
|
|
5773
|
+
retrievalProfileId: this.scoringProfileId
|
|
5774
|
+
});
|
|
4803
5775
|
const output = {
|
|
4804
5776
|
trace_id: input.trace_id,
|
|
4805
|
-
results:
|
|
5777
|
+
results: assembledResults,
|
|
4806
5778
|
search_metadata: {
|
|
4807
5779
|
latency_ms: Date.now() - searchStartedAt,
|
|
4808
5780
|
retrieval_mode: "hybrid",
|
|
4809
5781
|
index_version: index.index_version
|
|
4810
5782
|
}
|
|
4811
5783
|
};
|
|
4812
|
-
this.observability.metrics.observe("retrieval_topk_hit_proxy",
|
|
5784
|
+
this.observability.metrics.observe("retrieval_topk_hit_proxy", assembledResults.length > 0 ? 1 : 0, {
|
|
4813
5785
|
retrieval_profile_id: this.scoringProfileId
|
|
4814
5786
|
});
|
|
4815
5787
|
this.observability.logger.info("search_context completed", {
|
|
@@ -4833,6 +5805,7 @@ export class RetrievalCore {
|
|
|
4833
5805
|
}
|
|
4834
5806
|
buildEnhancerContextSnippets(results) {
|
|
4835
5807
|
const maxSnippets = this.enhancerGenerationConfig.max_context_snippets;
|
|
5808
|
+
const snippetCharLimit = this.contextPackingConfig.enabled ? this.contextPackingConfig.enhancer_snippet_char_limit : 1_600;
|
|
4836
5809
|
const snippets = [];
|
|
4837
5810
|
for (const result of results.slice(0, maxSnippets)) {
|
|
4838
5811
|
snippets.push({
|
|
@@ -4840,7 +5813,7 @@ export class RetrievalCore {
|
|
|
4840
5813
|
start_line: result.start_line,
|
|
4841
5814
|
end_line: result.end_line,
|
|
4842
5815
|
reason: result.reason,
|
|
4843
|
-
snippet: result.snippet.slice(0,
|
|
5816
|
+
snippet: result.snippet.slice(0, snippetCharLimit),
|
|
4844
5817
|
score: result.score
|
|
4845
5818
|
});
|
|
4846
5819
|
}
|
|
@@ -4981,7 +5954,7 @@ export class RetrievalCore {
|
|
|
4981
5954
|
top_k: MAX_TOP_K
|
|
4982
5955
|
}
|
|
4983
5956
|
});
|
|
4984
|
-
const budgetedResults = trimToContextBudget(retrieval.results);
|
|
5957
|
+
const budgetedResults = trimToContextBudget(retrieval.results, this.contextPackingConfig.enabled ? "lightweight" : "ranking");
|
|
4985
5958
|
const dedupedByPath = dedupeEnhancerCandidatesByPath(budgetedResults);
|
|
4986
5959
|
const collapsedByDirectory = collapseEnhancerCandidatesByDirectory(dedupedByPath, intentPolicy.max_candidates_per_directory_pre_rerank);
|
|
4987
5960
|
const filteredCandidates = applyEnhancerIntentPathFiltering(collapsedByDirectory, {
|