@rce-mcp/retrieval-core 0.1.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/index.ts CHANGED
@@ -1,11 +1,18 @@
1
1
  import { createHash, randomUUID } from "node:crypto";
2
- import type { EnhancePromptInput, EnhancePromptOutput, SearchContextInput, SearchContextOutput } from "@rce-mcp/contracts";
2
+ import type {
3
+ EnhancePromptInput,
4
+ EnhancePromptOutput,
5
+ EnhancePromptStyle,
6
+ SearchContextInput,
7
+ SearchContextOutput
8
+ } from "@rce-mcp/contracts";
3
9
  import {
4
10
  buildQueryCacheKey,
5
11
  type CandidateScoreWeights,
6
12
  type IndexRepository,
7
13
  type QueryCache,
8
14
  type RankedChunkCandidate,
15
+ tokenizeForRanking,
9
16
  type WorkspaceRecord
10
17
  } from "@rce-mcp/data-plane";
11
18
  import { InMemoryQueryCache } from "@rce-mcp/data-plane";
@@ -19,12 +26,16 @@ import {
19
26
 
20
27
  type RetrievalMode = SearchContextOutput["search_metadata"]["retrieval_mode"];
21
28
  type ContextRef = EnhancePromptOutput["context_refs"][number];
29
+ type SearchResultRow = SearchContextOutput["results"][number];
22
30
 
23
31
  const MAX_FILE_SIZE_BYTES = 1_000_000;
24
32
  const MAX_CHUNKS_PER_FILE = 300;
25
- const TARGET_CHUNK_TOKENS = 220;
26
- const CHUNK_OVERLAP_TOKENS = 40;
33
+ const DEFAULT_TARGET_CHUNK_TOKENS = 420;
34
+ const DEFAULT_CHUNK_OVERLAP_TOKENS = 90;
27
35
  const MAX_TOP_K = 20;
36
+ const DEFAULT_INTERNAL_CANDIDATE_DEPTH = 100;
37
+ const MIN_INTERNAL_CANDIDATE_DEPTH = 20;
38
+ const MAX_INTERNAL_CANDIDATE_DEPTH = 200;
28
39
  const MAX_CONTEXT_BUDGET_TOKENS = 12_000;
29
40
  export const DEFAULT_OPENAI_COMPATIBLE_EMBEDDING_BASE_URL = "https://router.tumuer.me/v1";
30
41
  export const DEFAULT_OPENAI_COMPATIBLE_EMBEDDING_MODEL = "Qwen/Qwen3-Embedding-4B";
@@ -32,6 +43,17 @@ export const DEFAULT_OPENAI_COMPATIBLE_EMBEDDING_DIMENSIONS = 2560;
32
43
  export const DEFAULT_OPENAI_COMPATIBLE_EMBEDDING_TIMEOUT_MS = 10_000;
33
44
  export const DEFAULT_OPENAI_COMPATIBLE_EMBEDDING_BATCH_SIZE = 64;
34
45
  export const DEFAULT_OPENAI_COMPATIBLE_EMBEDDING_MAX_RETRIES = 2;
46
+ export const DEFAULT_OPENAI_COMPATIBLE_EMBEDDING_TRANSIENT_403_MAX_RETRIES = 4;
47
+ export const DEFAULT_OPENAI_COMPATIBLE_RERANKER_BASE_URL = "https://router.tumuer.me/v1";
48
+ export const DEFAULT_OPENAI_COMPATIBLE_RERANKER_MODEL = "Qwen/Qwen3-Reranker-4B";
49
+ export const DEFAULT_OPENAI_COMPATIBLE_RERANKER_TIMEOUT_MS = 2_500;
50
+ export const DEFAULT_SEARCH_RERANKER_TOP_N = 30;
51
+ export const DEFAULT_PROVIDER_MAX_REQUESTS_PER_MINUTE = 90;
52
+ export const DEFAULT_PROVIDER_LIMIT_INDEX_MAX_WAIT_MS = 120_000;
53
+ export const DEFAULT_PROVIDER_LIMIT_QUERY_MAX_WAIT_MS = 1_000;
54
+ export const DEFAULT_PROVIDER_LIMIT_RERANK_MAX_WAIT_MS = 500;
55
+ export const DEFAULT_CLAUDE_ENHANCER_MODEL = "claude-3-5-sonnet-latest";
56
+ const DEFAULT_CLAUDE_ENHANCER_MAX_TURNS = 3;
35
57
 
36
58
  const DEFAULT_CANDIDATE_SCORE_WEIGHTS: CandidateScoreWeights = {
37
59
  lexical_weight: 0.6,
@@ -69,6 +91,10 @@ export interface RetrievalPathBiasConfig {
69
91
  negation_avoid_tests_penalty: number;
70
92
  negation_avoid_examples_penalty: number;
71
93
  negation_avoid_archive_penalty: number;
94
+ security_trace_meta_penalty: number;
95
+ literal_path_boost: number;
96
+ literal_snippet_boost: number;
97
+ literal_max_boost: number;
72
98
  min_total_bias: number;
73
99
  max_total_bias: number;
74
100
  }
@@ -93,9 +119,20 @@ export interface RetrievalEnhancerConfig {
93
119
  rerank_timeout_ms: number;
94
120
  }
95
121
 
122
+ export type EnhancerToolMode = "none" | "read_only";
123
+
124
+ export interface RetrievalEnhancerGenerationConfig {
125
+ timeout_ms: number;
126
+ max_retries: number;
127
+ tool_mode: EnhancerToolMode;
128
+ max_context_snippets: number;
129
+ }
130
+
96
131
  export interface RetrievalChunkingConfig {
97
132
  strategy: ChunkingStrategy;
98
133
  fallback_strategy: "sliding";
134
+ target_chunk_tokens: number;
135
+ chunk_overlap_tokens: number;
99
136
  parse_timeout_ms: number;
100
137
  enabled_languages: string[];
101
138
  }
@@ -107,9 +144,12 @@ export type RetrievalScoringConfigInput = Partial<{
107
144
  }>;
108
145
 
109
146
  export type RetrievalEnhancerConfigInput = Partial<RetrievalEnhancerConfig>;
147
+ export type RetrievalEnhancerGenerationConfigInput = Partial<RetrievalEnhancerGenerationConfig>;
110
148
  export type RetrievalChunkingConfigInput = Partial<{
111
149
  strategy: ChunkingStrategy;
112
150
  fallback_strategy: "sliding";
151
+ target_chunk_tokens: number;
152
+ chunk_overlap_tokens: number;
113
153
  parse_timeout_ms: number;
114
154
  enabled_languages: string[];
115
155
  }>;
@@ -144,6 +184,10 @@ export const BASELINE_RETRIEVAL_SCORING_CONFIG: RetrievalScoringConfig = {
144
184
  negation_avoid_tests_penalty: 0.35,
145
185
  negation_avoid_examples_penalty: 0.3,
146
186
  negation_avoid_archive_penalty: 0.35,
187
+ security_trace_meta_penalty: 0.22,
188
+ literal_path_boost: 0.3,
189
+ literal_snippet_boost: 0.18,
190
+ literal_max_boost: 0.5,
147
191
  min_total_bias: -0.45,
148
192
  max_total_bias: 0.35
149
193
  },
@@ -192,6 +236,10 @@ export const CONSERVATIVE_RETRIEVAL_SCORING_CONFIG: RetrievalScoringConfig = {
192
236
  negation_avoid_tests_penalty: 0.2,
193
237
  negation_avoid_examples_penalty: 0.16,
194
238
  negation_avoid_archive_penalty: 0.2,
239
+ security_trace_meta_penalty: 0.14,
240
+ literal_path_boost: 0.18,
241
+ literal_snippet_boost: 0.1,
242
+ literal_max_boost: 0.28,
195
243
  min_total_bias: -0.25,
196
244
  max_total_bias: 0.2
197
245
  },
@@ -210,9 +258,18 @@ export const DEFAULT_RETRIEVAL_ENHANCER_CONFIG: RetrievalEnhancerConfig = {
210
258
  rerank_timeout_ms: 40
211
259
  };
212
260
 
261
+ export const DEFAULT_RETRIEVAL_ENHANCER_GENERATION_CONFIG: RetrievalEnhancerGenerationConfig = {
262
+ timeout_ms: 18_000,
263
+ max_retries: 1,
264
+ tool_mode: "read_only",
265
+ max_context_snippets: 6
266
+ };
267
+
213
268
  export const DEFAULT_RETRIEVAL_CHUNKING_CONFIG: RetrievalChunkingConfig = {
214
269
  strategy: "sliding",
215
270
  fallback_strategy: "sliding",
271
+ target_chunk_tokens: DEFAULT_TARGET_CHUNK_TOKENS,
272
+ chunk_overlap_tokens: DEFAULT_CHUNK_OVERLAP_TOKENS,
216
273
  parse_timeout_ms: 80,
217
274
  enabled_languages: ["typescript", "javascript", "python", "go"]
218
275
  };
@@ -351,6 +408,33 @@ export function mergeRetrievalEnhancerConfig(
351
408
  return next;
352
409
  }
353
410
 
411
+ function validateEnhancerGenerationConfig(config: RetrievalEnhancerGenerationConfig): void {
412
+ if (!Number.isInteger(config.timeout_ms) || config.timeout_ms <= 0) {
413
+ throw new Error("invalid retrieval enhancer generation config: timeout_ms must be a positive integer");
414
+ }
415
+ if (!Number.isInteger(config.max_retries) || config.max_retries < 0) {
416
+ throw new Error("invalid retrieval enhancer generation config: max_retries must be a non-negative integer");
417
+ }
418
+ if (config.tool_mode !== "none" && config.tool_mode !== "read_only") {
419
+ throw new Error("invalid retrieval enhancer generation config: tool_mode must be none|read_only");
420
+ }
421
+ if (!Number.isInteger(config.max_context_snippets) || config.max_context_snippets <= 0) {
422
+ throw new Error("invalid retrieval enhancer generation config: max_context_snippets must be a positive integer");
423
+ }
424
+ }
425
+
426
+ export function mergeRetrievalEnhancerGenerationConfig(
427
+ base: RetrievalEnhancerGenerationConfig,
428
+ overrides?: RetrievalEnhancerGenerationConfigInput
429
+ ): RetrievalEnhancerGenerationConfig {
430
+ const next: RetrievalEnhancerGenerationConfig = {
431
+ ...base,
432
+ ...(overrides ?? {})
433
+ };
434
+ validateEnhancerGenerationConfig(next);
435
+ return next;
436
+ }
437
+
354
438
  function normalizeChunkingLanguageList(value: string[]): string[] {
355
439
  const deduped = new Set<string>();
356
440
  for (const language of value) {
@@ -370,6 +454,15 @@ function validateChunkingConfig(config: RetrievalChunkingConfig): void {
370
454
  if (config.fallback_strategy !== "sliding") {
371
455
  throw new Error("invalid retrieval chunking config: fallback_strategy must be sliding");
372
456
  }
457
+ if (!Number.isInteger(config.target_chunk_tokens) || config.target_chunk_tokens <= 0) {
458
+ throw new Error("invalid retrieval chunking config: target_chunk_tokens must be a positive integer");
459
+ }
460
+ if (!Number.isInteger(config.chunk_overlap_tokens) || config.chunk_overlap_tokens <= 0) {
461
+ throw new Error("invalid retrieval chunking config: chunk_overlap_tokens must be a positive integer");
462
+ }
463
+ if (config.chunk_overlap_tokens >= config.target_chunk_tokens) {
464
+ throw new Error("invalid retrieval chunking config: chunk_overlap_tokens must be less than target_chunk_tokens");
465
+ }
373
466
  if (!Number.isInteger(config.parse_timeout_ms) || config.parse_timeout_ms <= 0) {
374
467
  throw new Error("invalid retrieval chunking config: parse_timeout_ms must be a positive integer");
375
468
  }
@@ -411,19 +504,24 @@ function scoringConfigChecksum(config: RetrievalScoringConfig): string {
411
504
  return sha256(stableSerialize(config)).slice(0, 12);
412
505
  }
413
506
 
507
+ function clampInternalCandidateDepth(value: number | undefined): number {
508
+ const raw = Number.isFinite(value) ? Math.trunc(value ?? DEFAULT_INTERNAL_CANDIDATE_DEPTH) : DEFAULT_INTERNAL_CANDIDATE_DEPTH;
509
+ return Math.max(MIN_INTERNAL_CANDIDATE_DEPTH, Math.min(MAX_INTERNAL_CANDIDATE_DEPTH, raw));
510
+ }
511
+
414
512
  const REASON_STRINGS = [
415
513
  "semantic match",
416
- "exact symbol match",
417
- "path and token overlap",
514
+ "exact literal match",
515
+ "path token overlap",
418
516
  "recently modified relevant module"
419
517
  ] as const;
420
518
 
421
519
  export type RetrievalReason = (typeof REASON_STRINGS)[number];
422
520
 
423
521
  export class RetrievalError extends Error {
424
- readonly code: "INVALID_ARGUMENT" | "NOT_FOUND" | "UPSTREAM_FAILURE";
522
+ readonly code: "INVALID_ARGUMENT" | "NOT_FOUND" | "RATE_LIMITED" | "UPSTREAM_FAILURE";
425
523
 
426
- constructor(code: "INVALID_ARGUMENT" | "NOT_FOUND" | "UPSTREAM_FAILURE", message: string) {
524
+ constructor(code: "INVALID_ARGUMENT" | "NOT_FOUND" | "RATE_LIMITED" | "UPSTREAM_FAILURE", message: string) {
427
525
  super(message);
428
526
  this.code = code;
429
527
  }
@@ -510,13 +608,18 @@ export interface IndexingReport {
510
608
 
511
609
  export interface RetrievalCoreOptions {
512
610
  cacheTtlSeconds?: number;
611
+ internalCandidateDepth?: number;
513
612
  embeddingProvider?: EmbeddingProvider;
514
613
  embeddingDescriptor?: EmbeddingDescriptor;
614
+ rerankerProvider?: RerankerProvider;
615
+ rerankerTopN?: number;
616
+ enhancerProvider?: EnhancerGenerationProvider;
515
617
  observability?: Observability;
516
618
  scoringProfile?: BuiltinRetrievalScoringProfileId;
517
619
  scoringProfileId?: string;
518
620
  scoringConfig?: RetrievalScoringConfigInput;
519
621
  enhancerConfig?: RetrievalEnhancerConfigInput;
622
+ enhancerGenerationConfig?: RetrievalEnhancerGenerationConfigInput;
520
623
  chunkingConfig?: RetrievalChunkingConfigInput;
521
624
  enhancerDecisionTraceEnabled?: boolean;
522
625
  }
@@ -535,6 +638,67 @@ export interface EmbeddingProvider {
535
638
  describe?(): EmbeddingDescriptor;
536
639
  }
537
640
 
641
+ export interface RerankerDescriptor {
642
+ provider: string;
643
+ model?: string;
644
+ }
645
+
646
+ export interface RerankerResult {
647
+ index: number;
648
+ relevance_score: number;
649
+ }
650
+
651
+ export interface RerankerProvider {
652
+ rerank(input: { query: string; documents: string[]; top_n: number }): Promise<RerankerResult[]>;
653
+ describe?(): RerankerDescriptor;
654
+ }
655
+
656
+ export type EnhancerIntent = "bugfix" | "feature" | "refactor" | "docs" | "tests" | "unknown";
657
+ export type EnhancerOutputLanguage = "en" | "es" | "zh";
658
+ type ResolvedEnhancerPromptStyle = Exclude<EnhancePromptStyle, "auto">;
659
+
660
+ export interface EnhancerContextSnippet {
661
+ path: string;
662
+ start_line: number;
663
+ end_line: number;
664
+ reason: string;
665
+ snippet: string;
666
+ score: number;
667
+ }
668
+
669
+ export interface EnhancerGenerationRequest {
670
+ trace_id: string;
671
+ tenant_id: string;
672
+ workspace_id?: string;
673
+ request: EnhancePromptInput;
674
+ style_requested: EnhancePromptStyle;
675
+ style_resolved: ResolvedEnhancerPromptStyle;
676
+ intent: EnhancerIntent;
677
+ query_intent: "symbol-heavy" | "impl-focused" | "conceptual";
678
+ language: EnhancerOutputLanguage;
679
+ context_refs: ContextRef[];
680
+ context_snippets: EnhancerContextSnippet[];
681
+ warnings: string[];
682
+ questions: string[];
683
+ tool_mode: EnhancerToolMode;
684
+ abort_signal?: AbortSignal;
685
+ on_progress?: () => void;
686
+ }
687
+
688
+ export interface EnhancerGenerationResult {
689
+ enhanced_prompt: string;
690
+ }
691
+
692
+ export interface EnhancerProviderDescriptor {
693
+ provider: string;
694
+ model?: string;
695
+ }
696
+
697
+ export interface EnhancerGenerationProvider {
698
+ generate(input: EnhancerGenerationRequest): Promise<EnhancerGenerationResult>;
699
+ describe?(): EnhancerProviderDescriptor;
700
+ }
701
+
538
702
  export interface DeterministicEmbeddingProviderOptions {
539
703
  dimensions?: number;
540
704
  model?: string;
@@ -549,13 +713,88 @@ export interface OpenAICompatibleEmbeddingProviderOptions {
549
713
  timeout_ms?: number;
550
714
  batch_size?: number;
551
715
  max_retries?: number;
716
+ transient_forbidden_max_retries?: number;
717
+ request_limiter?: ProviderRequestLimiter;
718
+ request_limit_scope_id?: string;
719
+ max_requests_per_minute?: number;
720
+ index_max_wait_ms?: number;
721
+ query_max_wait_ms?: number;
722
+ observability?: Observability;
723
+ }
724
+
725
+ export interface OpenAICompatibleRerankerProviderOptions {
726
+ base_url: string;
727
+ api_key: string;
728
+ model?: string;
729
+ timeout_ms?: number;
730
+ request_limiter?: ProviderRequestLimiter;
731
+ request_limit_scope_id?: string;
732
+ max_requests_per_minute?: number;
733
+ rerank_max_wait_ms?: number;
552
734
  observability?: Observability;
553
735
  }
554
736
 
737
+ export interface ClaudeAgentEnhancerProviderOptions {
738
+ api_key: string;
739
+ model?: string;
740
+ base_url?: string;
741
+ max_tokens?: number;
742
+ path_to_claude_code_executable?: string;
743
+ permission_mode?: ClaudeCodePermissionMode;
744
+ }
745
+
746
+ export type ClaudeCodePermissionMode = "default" | "acceptEdits" | "bypassPermissions" | "plan";
747
+
748
+ export interface ProviderRateLimitAcquireInput {
749
+ scope: string;
750
+ max_requests_per_minute: number;
751
+ max_wait_ms: number;
752
+ }
753
+
754
+ export interface ProviderRateLimitAcquireResult {
755
+ wait_ms: number;
756
+ }
757
+
758
+ export interface ProviderRequestLimiter {
759
+ readonly mode?: "local" | "redis" | "custom";
760
+ acquire(input: ProviderRateLimitAcquireInput): Promise<ProviderRateLimitAcquireResult>;
761
+ }
762
+
763
+ export interface RedisProviderRequestLimiterClient {
764
+ eval(script: string, numKeys: number, ...args: Array<string | number>): Promise<unknown>;
765
+ }
766
+
767
+ export interface RedisProviderRequestLimiterOptions {
768
+ redis: RedisProviderRequestLimiterClient;
769
+ key_prefix?: string;
770
+ window_ms?: number;
771
+ now?: () => number;
772
+ sleeper?: (ms: number) => Promise<void>;
773
+ }
774
+
555
775
  class EmbeddingProviderRequestError extends Error {
556
776
  constructor(
557
777
  readonly reason: string,
558
778
  readonly retryable: boolean,
779
+ message: string,
780
+ readonly retry_after_ms?: number
781
+ ) {
782
+ super(message);
783
+ }
784
+ }
785
+
786
+ class RerankerProviderRequestError extends Error {
787
+ constructor(
788
+ readonly reason: string,
789
+ message: string
790
+ ) {
791
+ super(message);
792
+ }
793
+ }
794
+
795
+ class EnhancerProviderRequestError extends Error {
796
+ constructor(
797
+ readonly reason: "timeout" | "rate_limited" | "schema_error" | "invalid_response" | "upstream_error",
559
798
  message: string
560
799
  ) {
561
800
  super(message);
@@ -611,44 +850,7 @@ function singularizeToken(token: string): string | undefined {
611
850
  }
612
851
 
613
852
  function tokenize(text: string): string[] {
614
- const coarseTokens = text
615
- .split(/[^a-z0-9_./-]+/)
616
- .map((token) => token.trim())
617
- .filter(Boolean);
618
-
619
- const expandedTokens = new Set<string>();
620
- const addToken = (value: string): void => {
621
- const normalized = value.trim().toLowerCase();
622
- if (!normalized) {
623
- return;
624
- }
625
- expandedTokens.add(normalized);
626
- const singular = singularizeToken(normalized);
627
- if (singular) {
628
- expandedTokens.add(singular);
629
- }
630
- const plural = pluralizeToken(normalized);
631
- if (plural) {
632
- expandedTokens.add(plural);
633
- }
634
- };
635
-
636
- for (const token of coarseTokens) {
637
- addToken(token);
638
- for (const part of token.split(/[./_-]+/).filter(Boolean)) {
639
- addToken(part);
640
- const camelSplit = part
641
- .replace(/([a-z0-9])([A-Z])/g, "$1 $2")
642
- .split(/\s+/)
643
- .map((segment) => segment.trim().toLowerCase())
644
- .filter(Boolean);
645
- for (const segment of camelSplit) {
646
- addToken(segment);
647
- }
648
- }
649
- }
650
-
651
- return [...expandedTokens];
853
+ return tokenizeForRanking(text);
652
854
  }
653
855
 
654
856
  function lexicalScore(query: string, haystack: string): number {
@@ -699,21 +901,130 @@ function looksLowInformation(snippet: string): boolean {
699
901
 
700
902
  function chooseReason(input: {
701
903
  lexical: number;
904
+ literal_match: boolean;
702
905
  path_match: boolean;
703
906
  recency_boosted: boolean;
704
907
  }): RetrievalReason {
908
+ if (input.literal_match) {
909
+ return "exact literal match";
910
+ }
705
911
  if (input.path_match) {
706
- return "exact symbol match";
912
+ return "path token overlap";
707
913
  }
708
914
  if (input.recency_boosted) {
709
915
  return "recently modified relevant module";
710
916
  }
711
917
  if (input.lexical > 0.3) {
712
- return "path and token overlap";
918
+ return "path token overlap";
713
919
  }
714
920
  return "semantic match";
715
921
  }
716
922
 
923
+ function isExactLiteralReason(reason: string): boolean {
924
+ return reason === "exact literal match" || reason === "exact symbol match";
925
+ }
926
+
927
+ interface LiteralBoostResult {
928
+ boost: number;
929
+ matched: boolean;
930
+ path_matches: number;
931
+ snippet_matches: number;
932
+ }
933
+
934
+ function extractSearchLiterals(query: string): string[] {
935
+ const literals: string[] = [];
936
+ const seen = new Set<string>();
937
+
938
+ const addLiteral = (raw: string): void => {
939
+ const cleaned = raw.trim().replace(/^[`"'([{]+|[`"')\]}:;,.]+$/g, "");
940
+ const normalized = cleaned.toLowerCase();
941
+ if (!normalized || seen.has(normalized)) {
942
+ return;
943
+ }
944
+ if (normalized.length < 3) {
945
+ return;
946
+ }
947
+ const looksEnvLiteral = /^[A-Z0-9]+(?:_[A-Z0-9]+){2,}$/.test(cleaned);
948
+ const looksPathOrFileLiteral = /[/.]/.test(cleaned);
949
+ const looksCamelLiteral = /[a-z][A-Z]/.test(cleaned) || /[A-Z][a-z]+[A-Z]/.test(cleaned);
950
+ const looksHyphenLiteral = cleaned.includes("-");
951
+ const isSpecificLiteral = looksEnvLiteral || looksPathOrFileLiteral || looksCamelLiteral || looksHyphenLiteral;
952
+ if (!isSpecificLiteral) {
953
+ return;
954
+ }
955
+ seen.add(normalized);
956
+ literals.push(normalized);
957
+ };
958
+
959
+ for (const symbol of extractLikelyCodeSymbols(query, 24)) {
960
+ addLiteral(symbol);
961
+ }
962
+
963
+ for (const pathSymbol of extractPathLikeSymbols(query)) {
964
+ addLiteral(pathSymbol);
965
+ const leaf = normalizePath(pathSymbol).split("/").pop();
966
+ if (leaf) {
967
+ addLiteral(leaf);
968
+ }
969
+ }
970
+
971
+ for (const envMatch of query.matchAll(/\bRCE_[A-Z0-9_]{4,}\b/g)) {
972
+ addLiteral(envMatch[0] ?? "");
973
+ }
974
+
975
+ for (const fileName of query.matchAll(/\b[A-Za-z0-9_.-]+\.(?:ts|tsx|js|jsx|mjs|cjs|py|go|json|md)\b/g)) {
976
+ addLiteral(fileName[0] ?? "");
977
+ }
978
+
979
+ return literals.slice(0, 24);
980
+ }
981
+
982
+ function applyLiteralBoost(input: {
983
+ path: string;
984
+ snippet: string;
985
+ literals: string[];
986
+ path_bias: RetrievalPathBiasConfig;
987
+ }): LiteralBoostResult {
988
+ if (input.literals.length === 0) {
989
+ return {
990
+ boost: 0,
991
+ matched: false,
992
+ path_matches: 0,
993
+ snippet_matches: 0
994
+ };
995
+ }
996
+
997
+ const normalizedPath = input.path.toLowerCase();
998
+ const normalizedSnippet = input.snippet.toLowerCase();
999
+ const pathBias = input.path_bias;
1000
+ let boost = 0;
1001
+ let pathMatches = 0;
1002
+ let snippetMatches = 0;
1003
+
1004
+ for (const literal of input.literals) {
1005
+ if (normalizedPath.includes(literal)) {
1006
+ boost += pathBias.literal_path_boost;
1007
+ pathMatches += 1;
1008
+ continue;
1009
+ }
1010
+ if (normalizedSnippet.includes(literal)) {
1011
+ boost += pathBias.literal_snippet_boost;
1012
+ snippetMatches += 1;
1013
+ }
1014
+ if (boost >= pathBias.literal_max_boost) {
1015
+ break;
1016
+ }
1017
+ }
1018
+
1019
+ const clampedBoost = Math.min(pathBias.literal_max_boost, boost);
1020
+ return {
1021
+ boost: clampedBoost,
1022
+ matched: clampedBoost > 0,
1023
+ path_matches: pathMatches,
1024
+ snippet_matches: snippetMatches
1025
+ };
1026
+ }
1027
+
717
1028
  const DOC_INTENT_TOKENS = new Set([
718
1029
  "adr",
719
1030
  "architecture",
@@ -769,6 +1080,22 @@ const UI_COMPONENT_TOKENS = new Set(["component", "layout", "react", "tsx", "ui"
769
1080
  const FILE_LOOKUP_TOKENS = new Set(["entrypoint", "file", "locate", "path", "where", "which"]);
770
1081
  const TEST_INTENT_TOKENS = new Set(["assert", "coverage", "e2e", "integration", "spec", "test", "tests", "unit"]);
771
1082
  const EXAMPLE_INTENT_TOKENS = new Set(["demo", "example", "examples", "sample", "tutorial"]);
1083
+ const SECURITY_TRACE_INTENT_TOKENS = new Set([
1084
+ "auth",
1085
+ "authorization",
1086
+ "binding",
1087
+ "config",
1088
+ "enforce",
1089
+ "mcp",
1090
+ "project_root_path",
1091
+ "security",
1092
+ "session",
1093
+ "stdio",
1094
+ "tenant",
1095
+ "token",
1096
+ "workspace",
1097
+ "workspace_id"
1098
+ ]);
772
1099
 
773
1100
  const SOURCE_PATH_PREFIXES = ["src/", "app/", "apps/", "crates/", "internal/", "lib/", "package/", "packages/"];
774
1101
  const LOW_PRIORITY_PATH_PREFIXES = [
@@ -852,6 +1179,24 @@ function hasExampleIntent(tokens: string[]): boolean {
852
1179
  return tokens.some((token) => EXAMPLE_INTENT_TOKENS.has(token));
853
1180
  }
854
1181
 
1182
+ function hasSecurityTraceIntent(tokens: string[], queryText: string): boolean {
1183
+ if (tokens.some((token) => SECURITY_TRACE_INTENT_TOKENS.has(token))) {
1184
+ return true;
1185
+ }
1186
+ return /\btenant_id\b|\bworkspace_id\b|\bproject_root_path\b|\bRCE_[A-Z0-9_]{4,}\b/.test(queryText);
1187
+ }
1188
+
1189
+ function isGuidanceOrMetaPath(path: string): boolean {
1190
+ const normalized = path.toLowerCase();
1191
+ return (
1192
+ normalized.endsWith("mcp-tool-guidance.ts") ||
1193
+ normalized.includes("/guidance/") ||
1194
+ normalized.includes("/meta/") ||
1195
+ normalized.includes("/_meta/") ||
1196
+ normalized.includes("tool-guidance")
1197
+ );
1198
+ }
1199
+
855
1200
  function pathQualityBias(path: string, queryTokens: string[], config: RetrievalScoringConfig, queryText?: string): number {
856
1201
  const normalizedPath = path.toLowerCase();
857
1202
  const docIntent = hasDocIntent(queryTokens);
@@ -862,6 +1207,7 @@ function pathQualityBias(path: string, queryTokens: string[], config: RetrievalS
862
1207
  const uiComponentIntent = hasUiComponentIntent(queryTokens);
863
1208
  const testIntent = hasTestIntent(queryTokens);
864
1209
  const exampleIntent = hasExampleIntent(queryTokens);
1210
+ const securityTraceIntent = hasSecurityTraceIntent(queryTokens, queryText ?? queryTokens.join(" "));
865
1211
  let bias = 0;
866
1212
  const pathBias = config.path_bias;
867
1213
 
@@ -934,6 +1280,9 @@ function pathQualityBias(path: string, queryTokens: string[], config: RetrievalS
934
1280
  if (docsPreferred && isSourcePath) {
935
1281
  bias -= pathBias.doc_intent_source_penalty;
936
1282
  }
1283
+ if (securityTraceIntent && !docsPreferred && isGuidanceOrMetaPath(normalizedPath)) {
1284
+ bias -= pathBias.security_trace_meta_penalty;
1285
+ }
937
1286
 
938
1287
  if (workspaceManifestIntent && normalizedPath === "cargo.toml") {
939
1288
  bias += pathBias.workspace_manifest_root_boost;
@@ -1020,8 +1369,8 @@ function buildChunks(file: RawFile, chunkingConfig: RetrievalChunkingConfig): Ch
1020
1369
  config: {
1021
1370
  strategy: chunkingConfig.strategy,
1022
1371
  fallback_strategy: chunkingConfig.fallback_strategy,
1023
- target_chunk_tokens: TARGET_CHUNK_TOKENS,
1024
- chunk_overlap_tokens: CHUNK_OVERLAP_TOKENS,
1372
+ target_chunk_tokens: chunkingConfig.target_chunk_tokens,
1373
+ chunk_overlap_tokens: chunkingConfig.chunk_overlap_tokens,
1025
1374
  max_chunks_per_file: MAX_CHUNKS_PER_FILE,
1026
1375
  parse_timeout_ms: chunkingConfig.parse_timeout_ms,
1027
1376
  enabled_languages: chunkingConfig.enabled_languages
@@ -1076,6 +1425,184 @@ function sleep(ms: number): Promise<void> {
1076
1425
  });
1077
1426
  }
1078
1427
 
1428
+ export class ProviderRateLimitExceededError extends Error {
1429
+ readonly retry_after_ms: number;
1430
+
1431
+ constructor(message: string, retry_after_ms: number) {
1432
+ super(message);
1433
+ this.retry_after_ms = retry_after_ms;
1434
+ }
1435
+ }
1436
+
1437
+ interface LocalProviderRateBucket {
1438
+ tokens: number;
1439
+ last_refill_ms: number;
1440
+ }
1441
+
1442
+ export class LocalProviderRequestLimiter implements ProviderRequestLimiter {
1443
+ readonly mode = "local" as const;
1444
+ private readonly buckets = new Map<string, LocalProviderRateBucket>();
1445
+ private readonly now: () => number;
1446
+ private readonly sleeper: (ms: number) => Promise<void>;
1447
+
1448
+ constructor(options?: {
1449
+ now?: () => number;
1450
+ sleeper?: (ms: number) => Promise<void>;
1451
+ }) {
1452
+ this.now = options?.now ?? (() => Date.now());
1453
+ this.sleeper = options?.sleeper ?? sleep;
1454
+ }
1455
+
1456
+ async acquire(input: ProviderRateLimitAcquireInput): Promise<ProviderRateLimitAcquireResult> {
1457
+ if (!Number.isInteger(input.max_requests_per_minute) || input.max_requests_per_minute <= 0) {
1458
+ throw new Error("provider limiter requires max_requests_per_minute to be a positive integer");
1459
+ }
1460
+ if (!Number.isInteger(input.max_wait_ms) || input.max_wait_ms < 0) {
1461
+ throw new Error("provider limiter requires max_wait_ms to be a non-negative integer");
1462
+ }
1463
+
1464
+ const refillPerMs = input.max_requests_per_minute / 60_000;
1465
+ let waitedMs = 0;
1466
+ const deadline = this.now() + input.max_wait_ms;
1467
+
1468
+ while (true) {
1469
+ const nowMs = this.now();
1470
+ let bucket = this.buckets.get(input.scope);
1471
+ if (!bucket) {
1472
+ bucket = {
1473
+ tokens: input.max_requests_per_minute,
1474
+ last_refill_ms: nowMs
1475
+ };
1476
+ this.buckets.set(input.scope, bucket);
1477
+ }
1478
+
1479
+ if (nowMs > bucket.last_refill_ms) {
1480
+ const elapsedMs = nowMs - bucket.last_refill_ms;
1481
+ bucket.tokens = Math.min(input.max_requests_per_minute, bucket.tokens + elapsedMs * refillPerMs);
1482
+ bucket.last_refill_ms = nowMs;
1483
+ }
1484
+
1485
+ if (bucket.tokens >= 1) {
1486
+ bucket.tokens -= 1;
1487
+ return { wait_ms: waitedMs };
1488
+ }
1489
+
1490
+ const retryAfterMs = Math.max(1, Math.ceil((1 - bucket.tokens) / refillPerMs));
1491
+ const remainingMs = deadline - nowMs;
1492
+ if (remainingMs <= 0 || retryAfterMs > remainingMs) {
1493
+ throw new ProviderRateLimitExceededError(
1494
+ `provider request rate limit exceeded for scope "${input.scope}"`,
1495
+ Math.max(1, retryAfterMs)
1496
+ );
1497
+ }
1498
+
1499
+ const sleepMs = Math.max(1, Math.min(retryAfterMs, remainingMs));
1500
+ await this.sleeper(sleepMs);
1501
+ waitedMs += sleepMs;
1502
+ }
1503
+ }
1504
+ }
1505
+
1506
+ interface RedisLimiterAttemptResult {
1507
+ allowed: boolean;
1508
+ retry_after_ms: number;
1509
+ }
1510
+
1511
+ const REDIS_PROVIDER_LIMITER_SCRIPT = `
1512
+ local key = KEYS[1]
1513
+ local limit = tonumber(ARGV[1])
1514
+ local window_ms = tonumber(ARGV[2])
1515
+ local count = redis.call("INCR", key)
1516
+ if count == 1 then
1517
+ redis.call("PEXPIRE", key, window_ms)
1518
+ end
1519
+ if count <= limit then
1520
+ return {1, 0}
1521
+ end
1522
+ local ttl = redis.call("PTTL", key)
1523
+ if ttl < 0 then
1524
+ ttl = window_ms
1525
+ end
1526
+ return {0, ttl}
1527
+ `;
1528
+
1529
+ export class RedisProviderRequestLimiter implements ProviderRequestLimiter {
1530
+ readonly mode = "redis" as const;
1531
+ private readonly redis: RedisProviderRequestLimiterClient;
1532
+ private readonly keyPrefix: string;
1533
+ private readonly windowMs: number;
1534
+ private readonly now: () => number;
1535
+ private readonly sleeper: (ms: number) => Promise<void>;
1536
+
1537
+ constructor(options: RedisProviderRequestLimiterOptions) {
1538
+ if (!options.redis || typeof options.redis.eval !== "function") {
1539
+ throw new Error("invalid redis provider limiter config: redis client with eval() is required");
1540
+ }
1541
+ this.redis = options.redis;
1542
+ this.keyPrefix = options.key_prefix?.trim() || "rce:provider_rate_limit";
1543
+ this.windowMs = options.window_ms ?? 60_000;
1544
+ this.now = options.now ?? (() => Date.now());
1545
+ this.sleeper = options.sleeper ?? sleep;
1546
+
1547
+ if (!Number.isInteger(this.windowMs) || this.windowMs <= 0) {
1548
+ throw new Error("invalid redis provider limiter config: window_ms must be a positive integer");
1549
+ }
1550
+ }
1551
+
1552
+ async acquire(input: ProviderRateLimitAcquireInput): Promise<ProviderRateLimitAcquireResult> {
1553
+ if (!Number.isInteger(input.max_requests_per_minute) || input.max_requests_per_minute <= 0) {
1554
+ throw new Error("provider limiter requires max_requests_per_minute to be a positive integer");
1555
+ }
1556
+ if (!Number.isInteger(input.max_wait_ms) || input.max_wait_ms < 0) {
1557
+ throw new Error("provider limiter requires max_wait_ms to be a non-negative integer");
1558
+ }
1559
+
1560
+ let waitedMs = 0;
1561
+ const deadline = this.now() + input.max_wait_ms;
1562
+
1563
+ while (true) {
1564
+ const attempt = await this.reserveAttempt(input.scope, input.max_requests_per_minute);
1565
+ if (attempt.allowed) {
1566
+ return { wait_ms: waitedMs };
1567
+ }
1568
+
1569
+ const nowMs = this.now();
1570
+ const remainingMs = deadline - nowMs;
1571
+ const retryAfterMs = Math.max(1, attempt.retry_after_ms);
1572
+ if (remainingMs <= 0 || retryAfterMs > remainingMs) {
1573
+ throw new ProviderRateLimitExceededError(
1574
+ `provider request rate limit exceeded for scope "${input.scope}"`,
1575
+ retryAfterMs
1576
+ );
1577
+ }
1578
+
1579
+ const sleepMs = Math.max(1, Math.min(retryAfterMs, remainingMs));
1580
+ await this.sleeper(sleepMs);
1581
+ waitedMs += sleepMs;
1582
+ }
1583
+ }
1584
+
1585
+ private async reserveAttempt(scope: string, maxRequestsPerMinute: number): Promise<RedisLimiterAttemptResult> {
1586
+ const key = `${this.keyPrefix}:${scope}`;
1587
+ const raw = await this.redis.eval(
1588
+ REDIS_PROVIDER_LIMITER_SCRIPT,
1589
+ 1,
1590
+ key,
1591
+ maxRequestsPerMinute,
1592
+ this.windowMs
1593
+ );
1594
+ if (Array.isArray(raw)) {
1595
+ const allowed = Number(raw[0] ?? 0) === 1;
1596
+ const retryAfterMs = Number(raw[1] ?? 0);
1597
+ return {
1598
+ allowed,
1599
+ retry_after_ms: Number.isFinite(retryAfterMs) ? Math.max(0, Math.trunc(retryAfterMs)) : this.windowMs
1600
+ };
1601
+ }
1602
+ throw new Error("redis provider limiter returned unexpected eval() payload");
1603
+ }
1604
+ }
1605
+
1079
1606
  export class DeterministicEmbeddingProvider implements EmbeddingProvider {
1080
1607
  private readonly dimensions: number;
1081
1608
  private readonly model: string;
@@ -1110,6 +1637,12 @@ export class OpenAICompatibleEmbeddingProvider implements EmbeddingProvider {
1110
1637
  private readonly timeoutMs: number;
1111
1638
  private readonly batchSize: number;
1112
1639
  private readonly maxRetries: number;
1640
+ private readonly transientForbiddenMaxRetries: number;
1641
+ private readonly requestLimiter?: ProviderRequestLimiter;
1642
+ private readonly requestLimitScope: string;
1643
+ private readonly maxRequestsPerMinute: number;
1644
+ private readonly indexMaxWaitMs: number;
1645
+ private readonly queryMaxWaitMs: number;
1113
1646
  private readonly observability: Observability;
1114
1647
 
1115
1648
  constructor(options: OpenAICompatibleEmbeddingProviderOptions) {
@@ -1130,6 +1663,17 @@ export class OpenAICompatibleEmbeddingProvider implements EmbeddingProvider {
1130
1663
  this.timeoutMs = options.timeout_ms ?? DEFAULT_OPENAI_COMPATIBLE_EMBEDDING_TIMEOUT_MS;
1131
1664
  this.batchSize = options.batch_size ?? DEFAULT_OPENAI_COMPATIBLE_EMBEDDING_BATCH_SIZE;
1132
1665
  this.maxRetries = options.max_retries ?? DEFAULT_OPENAI_COMPATIBLE_EMBEDDING_MAX_RETRIES;
1666
+ this.transientForbiddenMaxRetries =
1667
+ options.transient_forbidden_max_retries ?? DEFAULT_OPENAI_COMPATIBLE_EMBEDDING_TRANSIENT_403_MAX_RETRIES;
1668
+ this.requestLimiter = options.request_limiter;
1669
+ this.requestLimitScope = resolveProviderLimiterScope({
1670
+ provider: "openai_compatible",
1671
+ apiKey,
1672
+ overrideScopeId: options.request_limit_scope_id
1673
+ });
1674
+ this.maxRequestsPerMinute = options.max_requests_per_minute ?? DEFAULT_PROVIDER_MAX_REQUESTS_PER_MINUTE;
1675
+ this.indexMaxWaitMs = options.index_max_wait_ms ?? DEFAULT_PROVIDER_LIMIT_INDEX_MAX_WAIT_MS;
1676
+ this.queryMaxWaitMs = options.query_max_wait_ms ?? DEFAULT_PROVIDER_LIMIT_QUERY_MAX_WAIT_MS;
1133
1677
  this.observability = options.observability ?? getObservability("retrieval-core");
1134
1678
 
1135
1679
  if (!Number.isInteger(this.dimensions) || this.dimensions <= 0) {
@@ -1144,6 +1688,20 @@ export class OpenAICompatibleEmbeddingProvider implements EmbeddingProvider {
1144
1688
  if (!Number.isInteger(this.maxRetries) || this.maxRetries < 0) {
1145
1689
  throw new Error("invalid openai-compatible embedding config: max_retries must be a non-negative integer");
1146
1690
  }
1691
+ if (!Number.isInteger(this.transientForbiddenMaxRetries) || this.transientForbiddenMaxRetries < 0) {
1692
+ throw new Error(
1693
+ "invalid openai-compatible embedding config: transient_forbidden_max_retries must be a non-negative integer"
1694
+ );
1695
+ }
1696
+ if (!Number.isInteger(this.maxRequestsPerMinute) || this.maxRequestsPerMinute <= 0) {
1697
+ throw new Error("invalid openai-compatible embedding config: max_requests_per_minute must be a positive integer");
1698
+ }
1699
+ if (!Number.isInteger(this.indexMaxWaitMs) || this.indexMaxWaitMs < 0) {
1700
+ throw new Error("invalid openai-compatible embedding config: index_max_wait_ms must be a non-negative integer");
1701
+ }
1702
+ if (!Number.isInteger(this.queryMaxWaitMs) || this.queryMaxWaitMs < 0) {
1703
+ throw new Error("invalid openai-compatible embedding config: query_max_wait_ms must be a non-negative integer");
1704
+ }
1147
1705
  }
1148
1706
 
1149
1707
  describe(): EmbeddingDescriptor {
@@ -1175,11 +1733,12 @@ export class OpenAICompatibleEmbeddingProvider implements EmbeddingProvider {
1175
1733
  purpose
1176
1734
  } as const;
1177
1735
 
1178
- for (let attempt = 0; attempt <= this.maxRetries; attempt += 1) {
1736
+ let attempt = 0;
1737
+ while (true) {
1179
1738
  const startedAt = Date.now();
1180
1739
  this.observability.metrics.increment("retrieval_embedding_provider_requests_total", 1, labels);
1181
1740
  try {
1182
- return await this.embedBatchOnce(texts);
1741
+ return await this.embedBatchOnce(texts, purpose);
1183
1742
  } catch (error) {
1184
1743
  const failure = this.toProviderFailure(error);
1185
1744
  this.observability.metrics.increment("retrieval_embedding_provider_failures_total", 1, {
@@ -1187,23 +1746,31 @@ export class OpenAICompatibleEmbeddingProvider implements EmbeddingProvider {
1187
1746
  reason: failure.reason
1188
1747
  });
1189
1748
 
1190
- const shouldRetry = failure.retryable && attempt < this.maxRetries;
1749
+ const maxRetriesForFailure = this.maxRetriesForReason(failure.reason);
1750
+ const shouldRetry = failure.retryable && attempt < maxRetriesForFailure;
1191
1751
  this.observability.logger.warn("embedding provider request failed", {
1192
1752
  provider: "openai_compatible",
1193
1753
  model: this.model,
1194
1754
  purpose,
1195
1755
  reason: failure.reason,
1756
+ provider_message: failure.message,
1196
1757
  retryable: failure.retryable,
1197
1758
  retrying: shouldRetry,
1198
1759
  attempt: attempt + 1,
1199
- max_attempts: this.maxRetries + 1
1760
+ max_attempts: maxRetriesForFailure + 1,
1761
+ retry_after_ms: failure.retry_after_ms
1200
1762
  });
1201
1763
 
1202
1764
  if (shouldRetry) {
1203
- await sleep(this.retryDelayMs(attempt));
1765
+ await sleep(this.retryDelayMs(attempt, failure));
1766
+ attempt += 1;
1204
1767
  continue;
1205
1768
  }
1206
1769
 
1770
+ if (failure.reason === "client_rate_limited" || failure.reason === "rate_limited") {
1771
+ throw new RetrievalError("RATE_LIMITED", `embedding provider rate limited; ${failure.message}`);
1772
+ }
1773
+
1207
1774
  throw new RetrievalError(
1208
1775
  "UPSTREAM_FAILURE",
1209
1776
  `embedding provider request failed (${failure.reason}); ${failure.message}`
@@ -1212,11 +1779,50 @@ export class OpenAICompatibleEmbeddingProvider implements EmbeddingProvider {
1212
1779
  this.observability.metrics.observe("retrieval_embedding_provider_latency_ms", Date.now() - startedAt, labels);
1213
1780
  }
1214
1781
  }
1782
+ }
1783
+
1784
+ private async enforceRequestLimit(purpose: EmbeddingPurpose): Promise<void> {
1785
+ if (!this.requestLimiter) {
1786
+ return;
1787
+ }
1788
+
1789
+ const maxWaitMs = purpose === "index" ? this.indexMaxWaitMs : this.queryMaxWaitMs;
1790
+ const labels = {
1791
+ provider: "openai_compatible",
1792
+ model: this.model,
1793
+ purpose,
1794
+ limiter_mode: this.requestLimiter.mode ?? "custom"
1795
+ } as const;
1215
1796
 
1216
- throw new RetrievalError("UPSTREAM_FAILURE", "embedding provider retries exhausted");
1797
+ try {
1798
+ const acquired = await this.requestLimiter.acquire({
1799
+ scope: this.requestLimitScope,
1800
+ max_requests_per_minute: this.maxRequestsPerMinute,
1801
+ max_wait_ms: maxWaitMs
1802
+ });
1803
+ this.observability.metrics.observe("retrieval_provider_limiter_wait_ms", acquired.wait_ms, labels);
1804
+ this.observability.metrics.increment("retrieval_provider_requests_shaped_total", 1, labels);
1805
+ } catch (error) {
1806
+ this.observability.metrics.increment("retrieval_provider_limiter_block_total", 1, {
1807
+ ...labels,
1808
+ reason: "wait_timeout"
1809
+ });
1810
+ if (error instanceof ProviderRateLimitExceededError) {
1811
+ const retryable = purpose === "index";
1812
+ throw new EmbeddingProviderRequestError(
1813
+ "client_rate_limited",
1814
+ retryable,
1815
+ `${error.message}; retry_after_ms=${error.retry_after_ms}`,
1816
+ error.retry_after_ms
1817
+ );
1818
+ }
1819
+ throw error;
1820
+ }
1217
1821
  }
1218
1822
 
1219
- private async embedBatchOnce(texts: string[]): Promise<number[][]> {
1823
+ private async embedBatchOnce(texts: string[], purpose: EmbeddingPurpose): Promise<number[][]> {
1824
+ await this.enforceRequestLimit(purpose);
1825
+
1220
1826
  const controller = new AbortController();
1221
1827
  const timeoutId = setTimeout(() => {
1222
1828
  controller.abort();
@@ -1253,13 +1859,28 @@ export class OpenAICompatibleEmbeddingProvider implements EmbeddingProvider {
1253
1859
  if (!response.ok) {
1254
1860
  const details = await safeResponseText(response);
1255
1861
  if (response.status === 429) {
1256
- throw new EmbeddingProviderRequestError("rate_limited", true, `HTTP 429 ${details}`.trim());
1862
+ throw new EmbeddingProviderRequestError(
1863
+ "rate_limited",
1864
+ true,
1865
+ `HTTP 429 ${details}`.trim(),
1866
+ parseRetryAfterMs(response.headers.get("retry-after"))
1867
+ );
1257
1868
  }
1258
1869
  if (response.status >= 500) {
1259
1870
  throw new EmbeddingProviderRequestError("http_5xx", true, `HTTP ${response.status} ${details}`.trim());
1260
1871
  }
1261
- if (response.status === 401 || response.status === 403) {
1262
- throw new EmbeddingProviderRequestError("auth_error", false, `HTTP ${response.status} ${details}`.trim());
1872
+ if (response.status === 401) {
1873
+ throw new EmbeddingProviderRequestError("auth_error", false, `HTTP 401 ${details}`.trim());
1874
+ }
1875
+ if (response.status === 403) {
1876
+ const retryAfterMs = parseRetryAfterMs(response.headers.get("retry-after"));
1877
+ const retryable = this.isTransientForbidden(details, retryAfterMs);
1878
+ throw new EmbeddingProviderRequestError(
1879
+ retryable ? "forbidden_transient" : "auth_error",
1880
+ retryable,
1881
+ `HTTP 403 ${details}`.trim(),
1882
+ retryAfterMs
1883
+ );
1263
1884
  }
1264
1885
  if (response.status === 404) {
1265
1886
  throw new EmbeddingProviderRequestError("endpoint_not_found", false, `HTTP 404 ${details}`.trim());
@@ -1328,25 +1949,951 @@ export class OpenAICompatibleEmbeddingProvider implements EmbeddingProvider {
1328
1949
  return vectors;
1329
1950
  }
1330
1951
 
1331
- private retryDelayMs(attempt: number): number {
1332
- const base = 100 * (attempt + 1);
1333
- const jitter = Math.floor(Math.random() * 75);
1334
- return base + jitter;
1952
+ private maxRetriesForReason(reason: string): number {
1953
+ if (reason === "forbidden_transient") {
1954
+ return Math.max(this.maxRetries, this.transientForbiddenMaxRetries);
1955
+ }
1956
+ return this.maxRetries;
1957
+ }
1958
+
1959
+ private retryDelayMs(attempt: number, failure: EmbeddingProviderRequestError): number {
1960
+ const baseBackoffMs =
1961
+ failure.reason === "forbidden_transient"
1962
+ ? Math.min(2_500, 250 * 2 ** attempt)
1963
+ : 100 * (attempt + 1);
1964
+ const jitterMs =
1965
+ failure.reason === "forbidden_transient" ? Math.floor(Math.random() * 150) : Math.floor(Math.random() * 75);
1966
+ const computedDelayMs = baseBackoffMs + jitterMs;
1967
+ if (failure.retry_after_ms === undefined) {
1968
+ return computedDelayMs;
1969
+ }
1970
+ return Math.max(computedDelayMs, Math.max(1, failure.retry_after_ms));
1335
1971
  }
1336
1972
 
1337
- private toProviderFailure(error: unknown): EmbeddingProviderRequestError {
1338
- if (error instanceof EmbeddingProviderRequestError) {
1339
- return error;
1973
+ private isTransientForbidden(details: string, retryAfterMs?: number): boolean {
1974
+ if (retryAfterMs !== undefined) {
1975
+ return true;
1340
1976
  }
1341
- if (error instanceof RetrievalError) {
1342
- return new EmbeddingProviderRequestError("upstream_failure", false, error.message);
1977
+
1978
+ const normalized = details.trim().toLowerCase();
1979
+ if (normalized.length === 0) {
1980
+ return false;
1343
1981
  }
1344
- if (error instanceof Error) {
1345
- return new EmbeddingProviderRequestError("unknown_error", false, error.message);
1982
+
1983
+ const transientSignals = [
1984
+ "rate limit",
1985
+ "too many requests",
1986
+ "temporar",
1987
+ "try again",
1988
+ "upstream",
1989
+ "timeout",
1990
+ "busy",
1991
+ "capacity",
1992
+ "bad_response_status_code"
1993
+ ];
1994
+ if (transientSignals.some((signal) => normalized.includes(signal))) {
1995
+ return true;
1346
1996
  }
1347
- return new EmbeddingProviderRequestError("unknown_error", false, String(error));
1348
- }
1349
- }
1997
+
1998
+ const hardFailureSignals = [
1999
+ "invalid api key",
2000
+ "incorrect api key",
2001
+ "authentication",
2002
+ "unauthorized",
2003
+ "insufficient permissions",
2004
+ "insufficient scope",
2005
+ "permission denied",
2006
+ "organization not found",
2007
+ "account disabled",
2008
+ "insufficient quota",
2009
+ "quota exceeded",
2010
+ "billing",
2011
+ "credit",
2012
+ "payment required",
2013
+ "model not found",
2014
+ "unknown model",
2015
+ "unsupported model",
2016
+ "not allowed"
2017
+ ];
2018
+ if (hardFailureSignals.some((signal) => normalized.includes(signal))) {
2019
+ return false;
2020
+ }
2021
+
2022
+ return false;
2023
+ }
2024
+
2025
+ private toProviderFailure(error: unknown): EmbeddingProviderRequestError {
2026
+ if (error instanceof EmbeddingProviderRequestError) {
2027
+ return error;
2028
+ }
2029
+ if (error instanceof ProviderRateLimitExceededError) {
2030
+ return new EmbeddingProviderRequestError(
2031
+ "client_rate_limited",
2032
+ false,
2033
+ `${error.message}; retry_after_ms=${error.retry_after_ms}`
2034
+ );
2035
+ }
2036
+ if (error instanceof RetrievalError) {
2037
+ if (error.code === "RATE_LIMITED") {
2038
+ return new EmbeddingProviderRequestError("client_rate_limited", false, error.message);
2039
+ }
2040
+ return new EmbeddingProviderRequestError("upstream_failure", false, error.message);
2041
+ }
2042
+ if (error instanceof Error) {
2043
+ return new EmbeddingProviderRequestError("unknown_error", false, error.message);
2044
+ }
2045
+ return new EmbeddingProviderRequestError("unknown_error", false, String(error));
2046
+ }
2047
+ }
2048
+
2049
+ export class OpenAICompatibleRerankerProvider implements RerankerProvider {
2050
+ private readonly endpoint: string;
2051
+ private readonly apiKey: string;
2052
+ private readonly model: string;
2053
+ private readonly timeoutMs: number;
2054
+ private readonly requestLimiter?: ProviderRequestLimiter;
2055
+ private readonly requestLimitScope: string;
2056
+ private readonly maxRequestsPerMinute: number;
2057
+ private readonly rerankMaxWaitMs: number;
2058
+ private readonly observability: Observability;
2059
+
2060
+ constructor(options: OpenAICompatibleRerankerProviderOptions) {
2061
+ const baseUrl = options.base_url.trim().replace(/\/+$/, "");
2062
+ if (baseUrl.length === 0) {
2063
+ throw new Error("invalid openai-compatible reranker config: base_url must be non-empty");
2064
+ }
2065
+ const apiKey = options.api_key.trim();
2066
+ if (apiKey.length === 0) {
2067
+ throw new Error("invalid openai-compatible reranker config: api_key must be non-empty");
2068
+ }
2069
+
2070
+ this.endpoint = `${baseUrl}/rerank`;
2071
+ this.apiKey = apiKey;
2072
+ this.model = options.model?.trim() || DEFAULT_OPENAI_COMPATIBLE_RERANKER_MODEL;
2073
+ this.timeoutMs = options.timeout_ms ?? DEFAULT_OPENAI_COMPATIBLE_RERANKER_TIMEOUT_MS;
2074
+ this.requestLimiter = options.request_limiter;
2075
+ this.requestLimitScope = resolveProviderLimiterScope({
2076
+ provider: "openai_compatible",
2077
+ apiKey,
2078
+ overrideScopeId: options.request_limit_scope_id
2079
+ });
2080
+ this.maxRequestsPerMinute = options.max_requests_per_minute ?? DEFAULT_PROVIDER_MAX_REQUESTS_PER_MINUTE;
2081
+ this.rerankMaxWaitMs = options.rerank_max_wait_ms ?? DEFAULT_PROVIDER_LIMIT_RERANK_MAX_WAIT_MS;
2082
+ this.observability = options.observability ?? getObservability("retrieval-core");
2083
+
2084
+ if (!Number.isInteger(this.timeoutMs) || this.timeoutMs <= 0) {
2085
+ throw new Error("invalid openai-compatible reranker config: timeout_ms must be a positive integer");
2086
+ }
2087
+ if (!Number.isInteger(this.maxRequestsPerMinute) || this.maxRequestsPerMinute <= 0) {
2088
+ throw new Error("invalid openai-compatible reranker config: max_requests_per_minute must be a positive integer");
2089
+ }
2090
+ if (!Number.isInteger(this.rerankMaxWaitMs) || this.rerankMaxWaitMs < 0) {
2091
+ throw new Error("invalid openai-compatible reranker config: rerank_max_wait_ms must be a non-negative integer");
2092
+ }
2093
+ }
2094
+
2095
+ describe(): RerankerDescriptor {
2096
+ return {
2097
+ provider: "openai_compatible",
2098
+ model: this.model
2099
+ };
2100
+ }
2101
+
2102
+ async rerank(input: { query: string; documents: string[]; top_n: number }): Promise<RerankerResult[]> {
2103
+ if (input.documents.length === 0) {
2104
+ return [];
2105
+ }
2106
+
2107
+ await this.enforceRequestLimit();
2108
+
2109
+ const topN = Math.max(1, Math.min(input.top_n, input.documents.length));
2110
+ const controller = new AbortController();
2111
+ const timeoutId = setTimeout(() => {
2112
+ controller.abort();
2113
+ }, this.timeoutMs);
2114
+
2115
+ let response: Response;
2116
+ try {
2117
+ response = await fetch(this.endpoint, {
2118
+ method: "POST",
2119
+ headers: {
2120
+ authorization: `Bearer ${this.apiKey}`,
2121
+ "content-type": "application/json"
2122
+ },
2123
+ body: JSON.stringify({
2124
+ model: this.model,
2125
+ query: input.query,
2126
+ documents: input.documents,
2127
+ top_n: topN
2128
+ }),
2129
+ signal: controller.signal
2130
+ });
2131
+ } catch (error) {
2132
+ if (error && typeof error === "object" && "name" in error && (error as { name?: string }).name === "AbortError") {
2133
+ throw new RerankerProviderRequestError("timeout", `request timed out after ${this.timeoutMs}ms`);
2134
+ }
2135
+ throw new RerankerProviderRequestError(
2136
+ "network_error",
2137
+ error instanceof Error ? error.message : String(error)
2138
+ );
2139
+ } finally {
2140
+ clearTimeout(timeoutId);
2141
+ }
2142
+
2143
+ if (!response.ok) {
2144
+ const details = await safeResponseText(response);
2145
+ if (response.status === 429) {
2146
+ throw new RerankerProviderRequestError("rate_limited", `HTTP 429 ${details}`.trim());
2147
+ }
2148
+ if (response.status === 401 || response.status === 403) {
2149
+ throw new RerankerProviderRequestError("auth_error", `HTTP ${response.status} ${details}`.trim());
2150
+ }
2151
+ if (response.status === 404) {
2152
+ throw new RerankerProviderRequestError("endpoint_not_found", `HTTP 404 ${details}`.trim());
2153
+ }
2154
+ if (response.status >= 500) {
2155
+ throw new RerankerProviderRequestError("http_5xx", `HTTP ${response.status} ${details}`.trim());
2156
+ }
2157
+ throw new RerankerProviderRequestError("http_4xx", `HTTP ${response.status} ${details}`.trim());
2158
+ }
2159
+
2160
+ let payload: unknown;
2161
+ try {
2162
+ payload = await response.json();
2163
+ } catch {
2164
+ throw new RerankerProviderRequestError("invalid_json", "provider returned non-JSON response");
2165
+ }
2166
+
2167
+ if (!payload || typeof payload !== "object") {
2168
+ throw new RerankerProviderRequestError("invalid_response", "provider response must be an object");
2169
+ }
2170
+
2171
+ const maybeResults = "results" in payload ? (payload as { results?: unknown }).results : (payload as { data?: unknown }).data;
2172
+ if (!Array.isArray(maybeResults)) {
2173
+ throw new RerankerProviderRequestError("invalid_response", "provider response missing results array");
2174
+ }
2175
+
2176
+ const output: RerankerResult[] = [];
2177
+ for (const row of maybeResults) {
2178
+ if (!row || typeof row !== "object") {
2179
+ throw new RerankerProviderRequestError("invalid_response", "rerank row must be an object");
2180
+ }
2181
+ const rawIndex = (row as { index?: unknown }).index;
2182
+ if (!Number.isInteger(rawIndex)) {
2183
+ throw new RerankerProviderRequestError("invalid_response", "rerank row index must be an integer");
2184
+ }
2185
+ const index = rawIndex as number;
2186
+ if (index < 0 || index >= input.documents.length) {
2187
+ throw new RerankerProviderRequestError("invalid_response", "rerank row index out of range");
2188
+ }
2189
+ const rawScore = (row as { relevance_score?: unknown; score?: unknown }).relevance_score ?? (row as { score?: unknown }).score;
2190
+ if (typeof rawScore !== "number" || !Number.isFinite(rawScore)) {
2191
+ throw new RerankerProviderRequestError("invalid_response", "rerank row score must be finite");
2192
+ }
2193
+ output.push({
2194
+ index,
2195
+ relevance_score: rawScore
2196
+ });
2197
+ }
2198
+
2199
+ const seen = new Set<number>();
2200
+ const ordered = [...output]
2201
+ .sort((a, b) => b.relevance_score - a.relevance_score || a.index - b.index)
2202
+ .filter((row) => {
2203
+ if (seen.has(row.index)) {
2204
+ return false;
2205
+ }
2206
+ seen.add(row.index);
2207
+ return true;
2208
+ })
2209
+ .slice(0, topN);
2210
+
2211
+ if (ordered.length === 0) {
2212
+ throw new RerankerProviderRequestError("invalid_response", "provider returned zero rerank results");
2213
+ }
2214
+ return ordered;
2215
+ }
2216
+
2217
+ private async enforceRequestLimit(): Promise<void> {
2218
+ if (!this.requestLimiter) {
2219
+ return;
2220
+ }
2221
+ const labels = {
2222
+ provider: "openai_compatible",
2223
+ model: this.model,
2224
+ purpose: "rerank",
2225
+ limiter_mode: this.requestLimiter.mode ?? "custom"
2226
+ } as const;
2227
+
2228
+ try {
2229
+ const acquired = await this.requestLimiter.acquire({
2230
+ scope: this.requestLimitScope,
2231
+ max_requests_per_minute: this.maxRequestsPerMinute,
2232
+ max_wait_ms: this.rerankMaxWaitMs
2233
+ });
2234
+ this.observability.metrics.observe("retrieval_provider_limiter_wait_ms", acquired.wait_ms, labels);
2235
+ this.observability.metrics.increment("retrieval_provider_requests_shaped_total", 1, labels);
2236
+ } catch (error) {
2237
+ this.observability.metrics.increment("retrieval_provider_limiter_block_total", 1, {
2238
+ ...labels,
2239
+ reason: "wait_timeout"
2240
+ });
2241
+ if (error instanceof ProviderRateLimitExceededError) {
2242
+ throw new RerankerProviderRequestError(
2243
+ "rate_limited",
2244
+ `${error.message}; retry_after_ms=${error.retry_after_ms}`
2245
+ );
2246
+ }
2247
+ throw error;
2248
+ }
2249
+ }
2250
+ }
2251
+
2252
+ function buildClaudeEnhancerSystemInstruction(
2253
+ language: EnhancerOutputLanguage,
2254
+ style: ResolvedEnhancerPromptStyle
2255
+ ): string {
2256
+ const languageRule =
2257
+ language === "zh"
2258
+ ? "Output language must be Simplified Chinese."
2259
+ : language === "es"
2260
+ ? "Output language must be Spanish."
2261
+ : "Output language must be English.";
2262
+ const styleRule =
2263
+ style === "lean"
2264
+ ? "Style is lean: keep the response compact (roughly 90-180 words), avoid extra headings, and include only essential steps."
2265
+ : style === "deep"
2266
+ ? "Style is deep: provide comprehensive but grounded guidance (roughly 260-420 words) with concrete constraints, edge cases, and validation."
2267
+ : "Style is standard: provide balanced depth (roughly 160-300 words) with clear scope, steps, and validation.";
2268
+ return [
2269
+ "You are a high-precision prompt enhancement agent for software engineering tasks.",
2270
+ languageRule,
2271
+ styleRule,
2272
+ "Return plain text only: the final enhanced prompt.",
2273
+ "Do not include markdown code fences.",
2274
+ "Preserve user intent exactly; do not add unrelated features.",
2275
+ "Do not invent file paths or symbols that are not present in provided context.",
2276
+ "Produce concise execution-ready prompts, not long generic templates.",
2277
+ "Prefer practical sections only: objective, scoped constraints, codebase anchors, implementation steps, validation.",
2278
+ "Use concrete file/symbol anchors when context exists.",
2279
+ "Avoid repeating generic process advice, broad deliverables lists, or organizational boilerplate."
2280
+ ].join(" ");
2281
+ }
2282
+
2283
+ function normalizeEnhancerContextPath(path: string): string {
2284
+ return normalizePath(path).toLowerCase();
2285
+ }
2286
+
2287
+ function looksLikeEnhancerConventionsFile(path: string): boolean {
2288
+ const normalized = normalizeEnhancerContextPath(path);
2289
+ return (
2290
+ normalized === "agents.md" ||
2291
+ normalized.endsWith("/agents.md") ||
2292
+ normalized === "claude.md" ||
2293
+ normalized.endsWith("/claude.md") ||
2294
+ normalized === "readme.md" ||
2295
+ normalized.endsWith("/readme.md") ||
2296
+ normalized === "contributing.md" ||
2297
+ normalized.endsWith("/contributing.md")
2298
+ );
2299
+ }
2300
+
2301
+ function extractProjectConventionsFromEnhancerContext(snippets: EnhancerContextSnippet[]): string[] {
2302
+ const candidateSnippets = snippets.filter((snippet) => looksLikeEnhancerConventionsFile(snippet.path));
2303
+ if (candidateSnippets.length === 0) {
2304
+ return [];
2305
+ }
2306
+ const signalPattern =
2307
+ /\b(always|never|must|should|avoid|prefer|preserve|keep|strict|isolation|tenant|workspace|contract|schema|backward|compatibility|regression|test|typecheck|bun)\b/i;
2308
+ const out: string[] = [];
2309
+ const seen = new Set<string>();
2310
+ for (const snippet of candidateSnippets) {
2311
+ const lines = snippet.snippet.split(/\r?\n/u);
2312
+ for (const rawLine of lines) {
2313
+ const cleaned = rawLine
2314
+ .replace(/^\s*[-*+]\s+/u, "")
2315
+ .replace(/^\s*\d+\.\s+/u, "")
2316
+ .trim();
2317
+ if (cleaned.length < 16 || cleaned.length > 180) {
2318
+ continue;
2319
+ }
2320
+ if (!signalPattern.test(cleaned)) {
2321
+ continue;
2322
+ }
2323
+ if (/^(import|export|const|let|var|if|for|while|return)\b/i.test(cleaned)) {
2324
+ continue;
2325
+ }
2326
+ const normalized = cleaned.toLowerCase();
2327
+ if (seen.has(normalized)) {
2328
+ continue;
2329
+ }
2330
+ seen.add(normalized);
2331
+ out.push(cleaned);
2332
+ if (out.length >= 8) {
2333
+ return out;
2334
+ }
2335
+ }
2336
+ }
2337
+ return out;
2338
+ }
2339
+
2340
+ function extractEnhancerNonNegotiables(input: {
2341
+ prompt: string;
2342
+ history: EnhancePromptInput["conversation_history"];
2343
+ }): string[] {
2344
+ const combined = `${input.prompt}\n${input.history.map((entry) => entry.content).join("\n")}`;
2345
+ const lower = combined.toLowerCase();
2346
+ const out: string[] = [];
2347
+ const add = (value: string): void => {
2348
+ if (!out.includes(value)) {
2349
+ out.push(value);
2350
+ }
2351
+ };
2352
+ if (
2353
+ /keep (?:behavior|behaviour) stable|preserve (?:existing )?(?:behavior|behaviour)|backward.?compat|no breaking changes|without breaking/i.test(
2354
+ lower
2355
+ )
2356
+ ) {
2357
+ add("Preserve existing behavior and avoid breaking API/contract semantics.");
2358
+ }
2359
+ if (/regression tests?|add tests?|test coverage|boundary tests?/i.test(lower)) {
2360
+ add("Include regression tests for any changed behavior.");
2361
+ }
2362
+ if (/tenant|workspace|authorization|auth boundaries?|scope enforcement|isolation/i.test(lower)) {
2363
+ add("Maintain strict tenant/workspace isolation and authorization boundaries.");
2364
+ }
2365
+ if (/no docs|avoid docs|exclude docs/i.test(lower)) {
2366
+ add("Do not prioritize documentation-only changes unless explicitly requested.");
2367
+ }
2368
+ if (/no refactor|minimal changes?|smallest safe change/i.test(lower)) {
2369
+ add("Prefer the smallest safe change set.");
2370
+ }
2371
+ return out.slice(0, 6);
2372
+ }
2373
+
2374
+ function buildEnhancerOutputContract(input: {
2375
+ style: ResolvedEnhancerPromptStyle;
2376
+ intent: EnhancerIntent;
2377
+ query_intent: "symbol-heavy" | "impl-focused" | "conceptual";
2378
+ has_context: boolean;
2379
+ }): {
2380
+ target_style: string;
2381
+ max_words: number;
2382
+ preferred_sections: string[];
2383
+ avoid_patterns: string[];
2384
+ } {
2385
+ const isConceptual = input.query_intent === "conceptual";
2386
+ if (input.style === "lean") {
2387
+ if (input.intent === "tests") {
2388
+ return {
2389
+ target_style: "lean_test_plan",
2390
+ max_words: input.has_context ? 220 : 170,
2391
+ preferred_sections: ["Goal", "Key test cases", "Validation"],
2392
+ avoid_patterns: ["long checklists", "broad architecture proposals", "generic deliverables blocks"]
2393
+ };
2394
+ }
2395
+ if (input.intent === "docs" || isConceptual) {
2396
+ return {
2397
+ target_style: "lean_spec",
2398
+ max_words: input.has_context ? 220 : 170,
2399
+ preferred_sections: ["Goal", "Scope", "Validation"],
2400
+ avoid_patterns: ["verbose outlines", "boilerplate context blocks", "generic process advice"]
2401
+ };
2402
+ }
2403
+ return {
2404
+ target_style: "lean_implementation_plan",
2405
+ max_words: input.has_context ? 230 : 180,
2406
+ preferred_sections: ["Goal", "Constraints", "Action steps", "Validation"],
2407
+ avoid_patterns: ["deep background sections", "broad deliverables lists", "repeated boilerplate"]
2408
+ };
2409
+ }
2410
+
2411
+ if (input.style === "deep") {
2412
+ if (input.intent === "tests") {
2413
+ return {
2414
+ target_style: "deep_test_plan",
2415
+ max_words: input.has_context ? 420 : 340,
2416
+ preferred_sections: ["Goal", "Behavior under test", "Test matrix", "Edge cases", "Validation"],
2417
+ avoid_patterns: ["vague test advice", "non-test deliverables", "ungrounded file guesses"]
2418
+ };
2419
+ }
2420
+ if (input.intent === "docs" || isConceptual) {
2421
+ return {
2422
+ target_style: "deep_spec",
2423
+ max_words: input.has_context ? 420 : 340,
2424
+ preferred_sections: ["Goal", "Scope", "Relevant sources", "Proposed outline", "Risks", "Validation"],
2425
+ avoid_patterns: ["implementation-only checklists", "generic organizational boilerplate", "speculation"]
2426
+ };
2427
+ }
2428
+ return {
2429
+ target_style: "deep_implementation_plan",
2430
+ max_words: input.has_context ? 420 : 360,
2431
+ preferred_sections: [
2432
+ "Goal",
2433
+ "Scope and constraints",
2434
+ "Codebase anchors",
2435
+ "Implementation plan",
2436
+ "Edge cases",
2437
+ "Validation"
2438
+ ],
2439
+ avoid_patterns: ["security theater", "repeated compliance boilerplate", "invented file/symbol references"]
2440
+ };
2441
+ }
2442
+
2443
+ if (input.intent === "docs" || isConceptual) {
2444
+ return {
2445
+ target_style: "concise_spec",
2446
+ max_words: input.has_context ? 320 : 260,
2447
+ preferred_sections: ["Goal", "Scope", "Relevant sources", "Proposed outline", "Validation"],
2448
+ avoid_patterns: ["long implementation checklists", "generic deliverables sections", "repeated boilerplate"]
2449
+ };
2450
+ }
2451
+ if (input.intent === "tests") {
2452
+ return {
2453
+ target_style: "test_plan",
2454
+ max_words: input.has_context ? 320 : 260,
2455
+ preferred_sections: ["Goal", "Behavior under test", "Test matrix", "Implementation notes", "Validation"],
2456
+ avoid_patterns: ["broad architecture rewrites", "non-test deliverables", "generic process bullets"]
2457
+ };
2458
+ }
2459
+ return {
2460
+ target_style: "implementation_plan",
2461
+ max_words: input.has_context ? 360 : 300,
2462
+ preferred_sections: ["Goal", "Scope and constraints", "Codebase anchors", "Implementation plan", "Validation"],
2463
+ avoid_patterns: ["broad security theater", "repeated compliance boilerplate", "vague deliverables lists"]
2464
+ };
2465
+ }
2466
+
2467
+ function buildClaudeEnhancerUserPayload(input: EnhancerGenerationRequest): string {
2468
+ const projectConventions = extractProjectConventionsFromEnhancerContext(input.context_snippets);
2469
+ const outputContract = buildEnhancerOutputContract({
2470
+ style: input.style_resolved,
2471
+ intent: input.intent,
2472
+ query_intent: input.query_intent,
2473
+ has_context: input.context_refs.length > 0
2474
+ });
2475
+ const nonNegotiables = extractEnhancerNonNegotiables({
2476
+ prompt: input.request.prompt,
2477
+ history: input.request.conversation_history
2478
+ });
2479
+ const payload = {
2480
+ trace_id: input.trace_id,
2481
+ tenant_id: input.tenant_id,
2482
+ workspace_id: input.workspace_id ?? "none",
2483
+ tool_mode: input.tool_mode,
2484
+ style_requested: input.style_requested,
2485
+ style_resolved: input.style_resolved,
2486
+ intent: input.intent,
2487
+ query_intent: input.query_intent,
2488
+ language: input.language,
2489
+ original_prompt: input.request.prompt,
2490
+ conversation_history: input.request.conversation_history,
2491
+ context_refs: input.context_refs,
2492
+ context_snippets: input.context_snippets.map((snippet) => ({
2493
+ path: snippet.path,
2494
+ start_line: snippet.start_line,
2495
+ end_line: snippet.end_line,
2496
+ reason: snippet.reason,
2497
+ score: Number(snippet.score.toFixed(4)),
2498
+ snippet: snippet.snippet
2499
+ })),
2500
+ output_contract: outputContract,
2501
+ non_negotiables: nonNegotiables,
2502
+ project_conventions: projectConventions
2503
+ };
2504
+ return [
2505
+ "Enhance the following request into a concise, implementation-ready prompt.",
2506
+ "Prioritize user intent fidelity, concrete repo anchors, and verifiable validation steps.",
2507
+ "Honor the requested enhancement style while avoiding invented details.",
2508
+ "Input JSON:",
2509
+ JSON.stringify(payload, null, 2)
2510
+ ].join("\n");
2511
+ }
2512
+
2513
+ function removeEnhancerCodeFences(text: string): string {
2514
+ return text.trim().replace(/^```(?:json|markdown|md)?\s*/iu, "").replace(/\s*```$/u, "").trim();
2515
+ }
2516
+
2517
+ function normalizeProviderEnhancedPrompt(text: string): string {
2518
+ let normalized = removeEnhancerCodeFences(text).replace(/\r\n/g, "\n");
2519
+ normalized = normalized
2520
+ .split("\n")
2521
+ .map((line) => line.replace(/[ \t]+$/u, ""))
2522
+ .join("\n")
2523
+ .replace(/\n{3,}/g, "\n\n")
2524
+ .trim();
2525
+ if (!normalized) {
2526
+ return normalized;
2527
+ }
2528
+ try {
2529
+ const payload = JSON.parse(normalized) as { enhanced_prompt?: unknown };
2530
+ if (payload && typeof payload === "object" && typeof payload.enhanced_prompt === "string") {
2531
+ return payload.enhanced_prompt.trim();
2532
+ }
2533
+ } catch {
2534
+ return normalized;
2535
+ }
2536
+ return normalized;
2537
+ }
2538
+
2539
+ type ClaudeAgentSdkQueryFn = (input: {
2540
+ prompt: string;
2541
+ options?: Record<string, unknown>;
2542
+ }) => AsyncIterable<unknown>;
2543
+
2544
+ let cachedClaudeAgentSdkQueryFn: ClaudeAgentSdkQueryFn | undefined;
2545
+
2546
+ function isRecord(value: unknown): value is Record<string, unknown> {
2547
+ return Boolean(value) && typeof value === "object" && !Array.isArray(value);
2548
+ }
2549
+
2550
+ async function loadClaudeAgentSdkQueryFn(): Promise<ClaudeAgentSdkQueryFn> {
2551
+ if (cachedClaudeAgentSdkQueryFn) {
2552
+ return cachedClaudeAgentSdkQueryFn;
2553
+ }
2554
+
2555
+ const moduleNames = ["@anthropic-ai/claude-agent-sdk", "@anthropic-ai/claude-code"];
2556
+ let lastError: unknown;
2557
+ for (const moduleName of moduleNames) {
2558
+ try {
2559
+ const sdkModule = (await import(moduleName)) as { query?: unknown };
2560
+ if (typeof sdkModule.query === "function") {
2561
+ cachedClaudeAgentSdkQueryFn = sdkModule.query as ClaudeAgentSdkQueryFn;
2562
+ return cachedClaudeAgentSdkQueryFn;
2563
+ }
2564
+ lastError = new Error(`${moduleName} does not export query()`);
2565
+ } catch (error) {
2566
+ lastError = error;
2567
+ }
2568
+ }
2569
+
2570
+ const reason = lastError instanceof Error ? lastError.message : String(lastError ?? "unknown error");
2571
+ throw new EnhancerProviderRequestError(
2572
+ "upstream_error",
2573
+ `claude agent sdk is not available; install @anthropic-ai/claude-agent-sdk (${reason})`
2574
+ );
2575
+ }
2576
+
2577
+ function extractTextFromClaudeMessageContent(content: unknown): string | undefined {
2578
+ if (typeof content === "string") {
2579
+ const trimmed = content.trim();
2580
+ return trimmed.length > 0 ? trimmed : undefined;
2581
+ }
2582
+ if (!Array.isArray(content)) {
2583
+ return undefined;
2584
+ }
2585
+ const parts: string[] = [];
2586
+ for (const item of content) {
2587
+ if (!isRecord(item)) {
2588
+ continue;
2589
+ }
2590
+ const text = item.text;
2591
+ if (typeof text !== "string") {
2592
+ continue;
2593
+ }
2594
+ const trimmed = text.trim();
2595
+ if (trimmed.length > 0) {
2596
+ parts.push(trimmed);
2597
+ }
2598
+ }
2599
+ if (parts.length === 0) {
2600
+ return undefined;
2601
+ }
2602
+ return parts.join("\n");
2603
+ }
2604
+
2605
+ function extractTextFromClaudeSdkMessage(message: unknown): string | undefined {
2606
+ if (!isRecord(message)) {
2607
+ return undefined;
2608
+ }
2609
+ if (typeof message.summary === "string") {
2610
+ const trimmed = message.summary.trim();
2611
+ if (trimmed.length > 0) {
2612
+ return trimmed;
2613
+ }
2614
+ }
2615
+ if (typeof message.result === "string") {
2616
+ const trimmed = message.result.trim();
2617
+ if (trimmed.length > 0) {
2618
+ return trimmed;
2619
+ }
2620
+ }
2621
+ if (typeof message.text === "string") {
2622
+ const trimmed = message.text.trim();
2623
+ if (trimmed.length > 0) {
2624
+ return trimmed;
2625
+ }
2626
+ }
2627
+ const directContent = extractTextFromClaudeMessageContent(message.content);
2628
+ if (directContent) {
2629
+ return directContent;
2630
+ }
2631
+ if (isRecord(message.message)) {
2632
+ if (typeof message.message.text === "string") {
2633
+ const trimmed = message.message.text.trim();
2634
+ if (trimmed.length > 0) {
2635
+ return trimmed;
2636
+ }
2637
+ }
2638
+ const nestedContent = extractTextFromClaudeMessageContent(message.message.content);
2639
+ if (nestedContent) {
2640
+ return nestedContent;
2641
+ }
2642
+ }
2643
+ return undefined;
2644
+ }
2645
+
2646
+ function extractTextChunkFromClaudeSdkStreamEvent(message: unknown): string | undefined {
2647
+ if (!isRecord(message) || message.type !== "stream_event") {
2648
+ return undefined;
2649
+ }
2650
+ const event = message.event;
2651
+ if (!isRecord(event)) {
2652
+ return undefined;
2653
+ }
2654
+
2655
+ if (event.type === "content_block_start") {
2656
+ const contentBlock = event.content_block;
2657
+ if (isRecord(contentBlock) && typeof contentBlock.text === "string") {
2658
+ return contentBlock.text;
2659
+ }
2660
+ }
2661
+
2662
+ if (event.type === "content_block_delta") {
2663
+ const delta = event.delta;
2664
+ if (!isRecord(delta)) {
2665
+ return undefined;
2666
+ }
2667
+ if (typeof delta.text === "string") {
2668
+ return delta.text;
2669
+ }
2670
+ }
2671
+
2672
+ return undefined;
2673
+ }
2674
+
2675
+ function extractStructuredOutputFromClaudeSdkMessage(message: unknown): EnhancerGenerationResult | undefined {
2676
+ if (!isRecord(message)) {
2677
+ return undefined;
2678
+ }
2679
+ const structuredOutput = message.structured_output;
2680
+ if (!isRecord(structuredOutput)) {
2681
+ return undefined;
2682
+ }
2683
+ const enhancedPrompt = structuredOutput.enhanced_prompt;
2684
+ if (typeof enhancedPrompt !== "string" || enhancedPrompt.trim().length === 0) {
2685
+ return undefined;
2686
+ }
2687
+ return {
2688
+ enhanced_prompt: enhancedPrompt.trim()
2689
+ };
2690
+ }
2691
+
2692
+ function extractResultFailureFromClaudeSdkMessage(message: unknown): {
2693
+ subtype: string;
2694
+ errors: string[];
2695
+ } | undefined {
2696
+ if (!isRecord(message) || message.type !== "result") {
2697
+ return undefined;
2698
+ }
2699
+ const subtype = message.subtype;
2700
+ if (typeof subtype !== "string" || subtype === "success") {
2701
+ return undefined;
2702
+ }
2703
+ const rawErrors = Array.isArray(message.errors) ? message.errors : [];
2704
+ const errors = rawErrors
2705
+ .filter((entry): entry is string => typeof entry === "string")
2706
+ .map((entry) => entry.trim())
2707
+ .filter((entry) => entry.length > 0);
2708
+ return {
2709
+ subtype,
2710
+ errors
2711
+ };
2712
+ }
2713
+
2714
+ function describeClaudeSdkMessage(message: unknown): string {
2715
+ if (!isRecord(message)) {
2716
+ return typeof message;
2717
+ }
2718
+ const type = typeof message.type === "string" ? message.type : "unknown";
2719
+ const subtype = typeof message.subtype === "string" ? message.subtype : undefined;
2720
+ return subtype ? `${type}:${subtype}` : type;
2721
+ }
2722
+
2723
+ function classifyEnhancerProviderError(error: unknown): EnhancerProviderRequestError {
2724
+ if (error instanceof EnhancerProviderRequestError) {
2725
+ return error;
2726
+ }
2727
+ if (error instanceof Error) {
2728
+ const message = error.message || "unknown enhancer provider error";
2729
+ if (/(timeout|timed out|abort)/i.test(message)) {
2730
+ return new EnhancerProviderRequestError("timeout", message);
2731
+ }
2732
+ if (/(rate.?limit|too many requests|429)/i.test(message)) {
2733
+ return new EnhancerProviderRequestError("rate_limited", message);
2734
+ }
2735
+ if (/(no such file|not found|ENOENT)/i.test(message) && /claude/i.test(message)) {
2736
+ return new EnhancerProviderRequestError("upstream_error", `claude code executable not found: ${message}`);
2737
+ }
2738
+ return new EnhancerProviderRequestError("upstream_error", message);
2739
+ }
2740
+ return new EnhancerProviderRequestError("upstream_error", String(error));
2741
+ }
2742
+
2743
+ export class ClaudeAgentEnhancerProvider implements EnhancerGenerationProvider {
2744
+ private readonly apiKey: string;
2745
+ private readonly model: string;
2746
+ private readonly maxTokens: number;
2747
+ private readonly baseUrl?: string;
2748
+ private readonly pathToClaudeCodeExecutable?: string;
2749
+ private readonly permissionMode: ClaudeCodePermissionMode;
2750
+
2751
+ constructor(options: ClaudeAgentEnhancerProviderOptions) {
2752
+ const apiKey = options.api_key.trim();
2753
+ if (apiKey.length === 0) {
2754
+ throw new Error("invalid claude enhancer config: api_key must be non-empty");
2755
+ }
2756
+ const model = options.model?.trim() ?? DEFAULT_CLAUDE_ENHANCER_MODEL;
2757
+ if (model.length === 0) {
2758
+ throw new Error("invalid claude enhancer config: model must be non-empty");
2759
+ }
2760
+ const maxTokens = options.max_tokens ?? 1_200;
2761
+ if (!Number.isInteger(maxTokens) || maxTokens <= 0) {
2762
+ throw new Error("invalid claude enhancer config: max_tokens must be a positive integer");
2763
+ }
2764
+ const permissionMode = options.permission_mode ?? "default";
2765
+ if (
2766
+ permissionMode !== "default" &&
2767
+ permissionMode !== "acceptEdits" &&
2768
+ permissionMode !== "bypassPermissions" &&
2769
+ permissionMode !== "plan"
2770
+ ) {
2771
+ throw new Error("invalid claude enhancer config: permission_mode must be default|acceptEdits|bypassPermissions|plan");
2772
+ }
2773
+
2774
+ this.apiKey = apiKey;
2775
+ this.model = model;
2776
+ this.maxTokens = maxTokens;
2777
+ this.baseUrl = options.base_url?.trim();
2778
+ const executablePath = options.path_to_claude_code_executable?.trim();
2779
+ this.pathToClaudeCodeExecutable = executablePath && executablePath.length > 0 ? executablePath : undefined;
2780
+ this.permissionMode = permissionMode;
2781
+ }
2782
+
2783
+ describe(): EnhancerProviderDescriptor {
2784
+ return {
2785
+ provider: "claude_agent",
2786
+ model: this.model
2787
+ };
2788
+ }
2789
+
2790
+ async generate(input: EnhancerGenerationRequest): Promise<EnhancerGenerationResult> {
2791
+ const query = await loadClaudeAgentSdkQueryFn();
2792
+ const prompt = buildClaudeEnhancerUserPayload(input);
2793
+ const abortController = new AbortController();
2794
+ const upstreamAbortSignal = input.abort_signal;
2795
+ const upstreamAbortHandler = (): void => {
2796
+ abortController.abort();
2797
+ };
2798
+ if (upstreamAbortSignal) {
2799
+ if (upstreamAbortSignal.aborted) {
2800
+ abortController.abort();
2801
+ } else {
2802
+ upstreamAbortSignal.addEventListener("abort", upstreamAbortHandler, { once: true });
2803
+ }
2804
+ }
2805
+ const options: Record<string, unknown> = {
2806
+ model: this.model,
2807
+ maxThinkingTokens: this.maxTokens,
2808
+ maxTurns: DEFAULT_CLAUDE_ENHANCER_MAX_TURNS,
2809
+ includePartialMessages: true,
2810
+ thinking: {
2811
+ type: "disabled"
2812
+ },
2813
+ permissionMode: this.permissionMode,
2814
+ systemPrompt: buildClaudeEnhancerSystemInstruction(input.language, input.style_resolved),
2815
+ // Enhancer already receives scoped context snippets; keep Claude Code tools disabled to avoid long tool loops.
2816
+ tools: [],
2817
+ allowedTools: [],
2818
+ env: {
2819
+ ANTHROPIC_API_KEY: this.apiKey,
2820
+ ...(this.baseUrl ? { ANTHROPIC_BASE_URL: this.baseUrl } : {})
2821
+ },
2822
+ abortController,
2823
+ ...(this.pathToClaudeCodeExecutable ? { pathToClaudeCodeExecutable: this.pathToClaudeCodeExecutable } : {}),
2824
+ ...(input.request.project_root_path ? { cwd: input.request.project_root_path } : {})
2825
+ };
2826
+
2827
+ let structured: EnhancerGenerationResult | undefined;
2828
+ let lastText: string | undefined;
2829
+ const streamTextParts: string[] = [];
2830
+ const seenMessageKinds = new Set<string>();
2831
+ let maxTurnsFailure: { subtype: string; errors: string[] } | undefined;
2832
+ try {
2833
+ for await (const message of query({ prompt, options })) {
2834
+ input.on_progress?.();
2835
+ seenMessageKinds.add(describeClaudeSdkMessage(message));
2836
+ const partialChunk = extractTextChunkFromClaudeSdkStreamEvent(message);
2837
+ if (typeof partialChunk === "string" && partialChunk.length > 0) {
2838
+ streamTextParts.push(partialChunk);
2839
+ }
2840
+ const resultFailure = extractResultFailureFromClaudeSdkMessage(message);
2841
+ if (resultFailure) {
2842
+ if (resultFailure.subtype === "error_max_turns") {
2843
+ maxTurnsFailure = resultFailure;
2844
+ continue;
2845
+ }
2846
+ const details = resultFailure.errors.length > 0 ? `: ${resultFailure.errors.join(" | ")}` : "";
2847
+ throw new EnhancerProviderRequestError(
2848
+ "upstream_error",
2849
+ `claude agent sdk result error (${resultFailure.subtype})${details}`
2850
+ );
2851
+ }
2852
+ const maybeStructured = extractStructuredOutputFromClaudeSdkMessage(message);
2853
+ if (maybeStructured) {
2854
+ structured = maybeStructured;
2855
+ }
2856
+ const maybeText = extractTextFromClaudeSdkMessage(message);
2857
+ if (maybeText) {
2858
+ lastText = maybeText;
2859
+ }
2860
+ if (isRecord(message) && message.type === "assistant" && typeof message.error === "string") {
2861
+ throw new EnhancerProviderRequestError(
2862
+ "upstream_error",
2863
+ `claude agent sdk assistant error: ${message.error}`
2864
+ );
2865
+ }
2866
+ }
2867
+ } catch (error) {
2868
+ throw classifyEnhancerProviderError(error);
2869
+ } finally {
2870
+ if (upstreamAbortSignal) {
2871
+ upstreamAbortSignal.removeEventListener("abort", upstreamAbortHandler);
2872
+ }
2873
+ }
2874
+ if (structured) {
2875
+ return structured;
2876
+ }
2877
+ if (!lastText && streamTextParts.length > 0) {
2878
+ lastText = streamTextParts.join("").trim();
2879
+ }
2880
+ if (maxTurnsFailure && !lastText) {
2881
+ const details = maxTurnsFailure.errors.length > 0 ? `: ${maxTurnsFailure.errors.join(" | ")}` : "";
2882
+ throw new EnhancerProviderRequestError(
2883
+ "upstream_error",
2884
+ `claude agent sdk hit max turns before returning output${details}`
2885
+ );
2886
+ }
2887
+ if (!lastText) {
2888
+ const seenKinds = [...seenMessageKinds].join(", ") || "none";
2889
+ throw new EnhancerProviderRequestError(
2890
+ "invalid_response",
2891
+ `claude agent sdk returned no text output (messages=${seenKinds})`
2892
+ );
2893
+ }
2894
+ return { enhanced_prompt: normalizeProviderEnhancedPrompt(lastText) };
2895
+ }
2896
+ }
1350
2897
 
1351
2898
  async function safeResponseText(response: Response): Promise<string> {
1352
2899
  try {
@@ -1357,6 +2904,37 @@ async function safeResponseText(response: Response): Promise<string> {
1357
2904
  }
1358
2905
  }
1359
2906
 
2907
+ function parseRetryAfterMs(headerValue: string | null): number | undefined {
2908
+ if (!headerValue) {
2909
+ return undefined;
2910
+ }
2911
+ const trimmed = headerValue.trim();
2912
+ if (!trimmed) {
2913
+ return undefined;
2914
+ }
2915
+ const seconds = Number(trimmed);
2916
+ if (Number.isFinite(seconds) && seconds >= 0) {
2917
+ return Math.ceil(seconds * 1000);
2918
+ }
2919
+ const dateMs = Date.parse(trimmed);
2920
+ if (!Number.isNaN(dateMs)) {
2921
+ return Math.max(0, dateMs - Date.now());
2922
+ }
2923
+ return undefined;
2924
+ }
2925
+
2926
+ function resolveProviderLimiterScope(input: {
2927
+ provider: string;
2928
+ apiKey: string;
2929
+ overrideScopeId?: string;
2930
+ }): string {
2931
+ const override = input.overrideScopeId?.trim();
2932
+ if (override) {
2933
+ return `provider:${input.provider}|credential:${override}`;
2934
+ }
2935
+ return `provider:${input.provider}|credential:${sha256(input.apiKey).slice(0, 16)}`;
2936
+ }
2937
+
1360
2938
  function resolveEmbeddingDescriptor(provider: EmbeddingProvider): EmbeddingDescriptor {
1361
2939
  const described = provider.describe?.();
1362
2940
  if (!described) {
@@ -1373,23 +2951,120 @@ function resolveEmbeddingDescriptor(provider: EmbeddingProvider): EmbeddingDescr
1373
2951
  };
1374
2952
  }
1375
2953
 
1376
- function normalizeEmbeddingDescriptor(descriptor: EmbeddingDescriptor): EmbeddingDescriptor {
1377
- const provider = descriptor.provider.trim();
1378
- if (provider.length === 0) {
1379
- throw new Error("invalid embedding descriptor: provider must be non-empty");
2954
+ function resolveRerankerDescriptor(provider: RerankerProvider): RerankerDescriptor {
2955
+ const described = provider.describe?.();
2956
+ if (!described) {
2957
+ return {
2958
+ provider: "custom"
2959
+ };
2960
+ }
2961
+ return {
2962
+ provider: described.provider,
2963
+ ...(described.model ? { model: described.model } : {})
2964
+ };
2965
+ }
2966
+
2967
+ function resolveEnhancerProviderDescriptor(provider: EnhancerGenerationProvider): EnhancerProviderDescriptor {
2968
+ const described = provider.describe?.();
2969
+ if (!described) {
2970
+ return {
2971
+ provider: "custom"
2972
+ };
2973
+ }
2974
+ return {
2975
+ provider: described.provider,
2976
+ ...(described.model ? { model: described.model } : {})
2977
+ };
2978
+ }
2979
+
2980
+ function normalizeEmbeddingDescriptor(descriptor: EmbeddingDescriptor): EmbeddingDescriptor {
2981
+ const provider = descriptor.provider.trim();
2982
+ if (provider.length === 0) {
2983
+ throw new Error("invalid embedding descriptor: provider must be non-empty");
2984
+ }
2985
+ if (!Number.isInteger(descriptor.dimensions) || descriptor.dimensions <= 0) {
2986
+ throw new Error("invalid embedding descriptor: dimensions must be a positive integer");
2987
+ }
2988
+ return {
2989
+ provider: provider.toLowerCase(),
2990
+ ...(descriptor.model ? { model: descriptor.model.trim() } : {}),
2991
+ dimensions: descriptor.dimensions,
2992
+ ...(descriptor.version ? { version: descriptor.version.trim() } : {})
2993
+ };
2994
+ }
2995
+
2996
+ function normalizeRerankerDescriptor(descriptor: RerankerDescriptor): RerankerDescriptor {
2997
+ const provider = descriptor.provider.trim().toLowerCase();
2998
+ if (provider.length === 0) {
2999
+ throw new Error("invalid reranker descriptor: provider must be non-empty");
3000
+ }
3001
+ const model = descriptor.model?.trim();
3002
+ return {
3003
+ provider,
3004
+ ...(model ? { model } : {})
3005
+ };
3006
+ }
3007
+
3008
+ function normalizeEnhancerProviderDescriptor(descriptor: EnhancerProviderDescriptor): EnhancerProviderDescriptor {
3009
+ const provider = descriptor.provider.trim().toLowerCase();
3010
+ if (provider.length === 0) {
3011
+ throw new Error("invalid enhancer descriptor: provider must be non-empty");
3012
+ }
3013
+ const model = descriptor.model?.trim();
3014
+ return {
3015
+ provider,
3016
+ ...(model ? { model } : {})
3017
+ };
3018
+ }
3019
+
3020
+ function buildRerankerDocument(candidate: SearchResultRow): string {
3021
+ return `${candidate.path}\n${candidate.snippet}`;
3022
+ }
3023
+
3024
+ function classifyRerankerFailureReason(error: unknown): "timeout" | "schema_error" | "rate_limited" | "upstream_error" {
3025
+ if (error instanceof RerankerProviderRequestError) {
3026
+ if (error.reason === "timeout") {
3027
+ return "timeout";
3028
+ }
3029
+ if (error.reason === "rate_limited") {
3030
+ return "rate_limited";
3031
+ }
3032
+ if (error.reason === "invalid_json" || error.reason === "invalid_response") {
3033
+ return "schema_error";
3034
+ }
3035
+ return "upstream_error";
3036
+ }
3037
+ if (error instanceof Error) {
3038
+ if (/(rate.?limit|too many requests|429)/i.test(error.message)) {
3039
+ return "rate_limited";
3040
+ }
3041
+ if (/(timeout|timed out)/i.test(error.message)) {
3042
+ return "timeout";
3043
+ }
3044
+ return "upstream_error";
3045
+ }
3046
+ return "upstream_error";
3047
+ }
3048
+
3049
+ function classifyEnhancerGenerationFailureReason(
3050
+ error: unknown
3051
+ ): "timeout" | "schema_error" | "rate_limited" | "invalid_response" | "upstream_error" {
3052
+ if (error instanceof EnhancerProviderRequestError) {
3053
+ return error.reason;
1380
3054
  }
1381
- if (!Number.isInteger(descriptor.dimensions) || descriptor.dimensions <= 0) {
1382
- throw new Error("invalid embedding descriptor: dimensions must be a positive integer");
3055
+ if (error instanceof Error) {
3056
+ if (/(timeout|timed out)/i.test(error.message)) {
3057
+ return "timeout";
3058
+ }
3059
+ if (/(rate.?limit|too many requests|429)/i.test(error.message)) {
3060
+ return "rate_limited";
3061
+ }
3062
+ return "upstream_error";
1383
3063
  }
1384
- return {
1385
- provider: provider.toLowerCase(),
1386
- ...(descriptor.model ? { model: descriptor.model.trim() } : {}),
1387
- dimensions: descriptor.dimensions,
1388
- ...(descriptor.version ? { version: descriptor.version.trim() } : {})
1389
- };
3064
+ return "upstream_error";
1390
3065
  }
1391
3066
 
1392
- function classifyIntent(prompt: string): "bugfix" | "feature" | "refactor" | "docs" | "tests" | "unknown" {
3067
+ function classifyIntent(prompt: string): EnhancerIntent {
1393
3068
  const p = prompt.toLowerCase();
1394
3069
  if (/fix|bug|error|crash|regression/.test(p)) {
1395
3070
  return "bugfix";
@@ -1409,7 +3084,54 @@ function classifyIntent(prompt: string): "bugfix" | "feature" | "refactor" | "do
1409
3084
  return "unknown";
1410
3085
  }
1411
3086
 
1412
- function detectDominantLanguage(prompt: string, history: EnhancePromptInput["conversation_history"]): "en" | "es" | "zh" {
3087
+ function resolveEnhancerPromptStyle(input: {
3088
+ requested?: EnhancePromptStyle;
3089
+ intent: EnhancerIntent;
3090
+ query_intent: EnhancerQueryIntent;
3091
+ prompt: string;
3092
+ history: EnhancePromptInput["conversation_history"];
3093
+ has_context: boolean;
3094
+ }): {
3095
+ requested: EnhancePromptStyle;
3096
+ resolved: ResolvedEnhancerPromptStyle;
3097
+ } {
3098
+ const requested = input.requested ?? "standard";
3099
+ if (requested !== "auto") {
3100
+ return {
3101
+ requested,
3102
+ resolved: requested
3103
+ };
3104
+ }
3105
+
3106
+ const combined = `${input.prompt}\n${input.history.map((entry) => entry.content).join("\n")}`.trim();
3107
+ const words = tokenize(combined);
3108
+ const isShort = words.length <= 18 && input.history.length <= 1;
3109
+ const asksConcise = /\b(concise|brief|short|minimal|quick)\b/i.test(combined);
3110
+ const asksDepth = /\b(detailed|comprehensive|thorough|step-by-step|checklist)\b/i.test(combined);
3111
+ const highRisk = /\b(security|auth|authorization|tenant|workspace|migration|data loss|rollback|incident|compliance|backward)\b/i.test(
3112
+ combined
3113
+ );
3114
+ const complexityScore = Number(input.has_context) + Number(words.length >= 32) + Number(input.history.length >= 3);
3115
+
3116
+ if (asksConcise || (isShort && !highRisk && !asksDepth)) {
3117
+ return {
3118
+ requested,
3119
+ resolved: "lean"
3120
+ };
3121
+ }
3122
+ if (asksDepth || highRisk || complexityScore >= 2 || input.query_intent === "symbol-heavy" || input.intent === "tests") {
3123
+ return {
3124
+ requested,
3125
+ resolved: "deep"
3126
+ };
3127
+ }
3128
+ return {
3129
+ requested,
3130
+ resolved: "standard"
3131
+ };
3132
+ }
3133
+
3134
+ function detectDominantLanguage(prompt: string, history: EnhancePromptInput["conversation_history"]): EnhancerOutputLanguage {
1413
3135
  const latestUser = [...history].reverse().find((m) => m.role === "user")?.content ?? prompt;
1414
3136
  const sample = `${prompt}\n${latestUser}`.toLowerCase();
1415
3137
  if (/[\u3400-\u9fff]/.test(sample)) {
@@ -1917,7 +3639,7 @@ function buildEnhancerRetrievalQuery(
1917
3639
  };
1918
3640
  }
1919
3641
 
1920
- const ENHANCER_LOW_CONFIDENCE_WARNING = "Low retrieval confidence; narrowed context refs and added clarification questions.";
3642
+ const ENHANCER_LOW_CONFIDENCE_WARNING = "Low retrieval confidence; narrowed context refs.";
1921
3643
 
1922
3644
  const ENHANCER_CONFIDENCE_OVERLAP_STOPWORDS = new Set([
1923
3645
  "a",
@@ -2203,7 +3925,7 @@ function hasStrongEnhancerAnchorMatch(input: {
2203
3925
  const topScore = top[0]?.score ?? 0;
2204
3926
  const runnerUpScore = top[1]?.score ?? Number.NEGATIVE_INFINITY;
2205
3927
  const strongScoreMargin = top.length === 1 || topScore - runnerUpScore >= 0.08;
2206
- const hasTopExactSymbolMatch = top.some((result) => result.reason === "exact symbol match");
3928
+ const hasTopExactSymbolMatch = top.some((result) => isExactLiteralReason(result.reason));
2207
3929
  if (hasTopExactSymbolMatch && strongScoreMargin && topScore >= 0.55) {
2208
3930
  return true;
2209
3931
  }
@@ -2328,7 +4050,7 @@ function evaluateEnhancerConfidence(input: {
2328
4050
  if (diversityStrength < confidenceThreshold) {
2329
4051
  failedSignals.push("path_diversity");
2330
4052
  }
2331
- const strongSymbolOrPathSignal = top.some((result) => result.reason === "exact symbol match") && topOverlap >= 0.16;
4053
+ const strongSymbolOrPathSignal = top.some((result) => isExactLiteralReason(result.reason)) && topOverlap >= 0.16;
2332
4054
  const lowConfidence = !strongSymbolOrPathSignal && confidenceScore + 0.01 < confidenceThreshold;
2333
4055
 
2334
4056
  return {
@@ -2355,7 +4077,7 @@ function rankEnhancerResultsForConfidence(input: {
2355
4077
  const anchorScore = (result: SearchContextOutput["results"][number]): number => {
2356
4078
  const normalizedPath = normalizePath(result.path).toLowerCase();
2357
4079
  const normalizedSnippet = result.snippet.toLowerCase();
2358
- let score = result.reason === "exact symbol match" ? 2 : 0;
4080
+ let score = isExactLiteralReason(result.reason) ? 2 : 0;
2359
4081
  for (const anchor of anchors) {
2360
4082
  if (normalizedPath.includes(anchor)) {
2361
4083
  score += 2;
@@ -2415,7 +4137,11 @@ function rankEnhancerResultsForConfidence(input: {
2415
4137
  });
2416
4138
  }
2417
4139
 
2418
- async function runWithTimeout<T>(input: { timeout_ms: number; fn: () => Promise<T> | T }): Promise<T> {
4140
+ async function runWithTimeout<T>(input: {
4141
+ timeout_ms: number;
4142
+ fn: () => Promise<T> | T;
4143
+ on_timeout?: () => void;
4144
+ }): Promise<T> {
2419
4145
  return await new Promise<T>((resolve, reject) => {
2420
4146
  let settled = false;
2421
4147
  const timer = setTimeout(() => {
@@ -2423,6 +4149,7 @@ async function runWithTimeout<T>(input: { timeout_ms: number; fn: () => Promise<
2423
4149
  return;
2424
4150
  }
2425
4151
  settled = true;
4152
+ input.on_timeout?.();
2426
4153
  reject(new Error(`timeout_after_${input.timeout_ms}ms`));
2427
4154
  }, input.timeout_ms);
2428
4155
 
@@ -2447,6 +4174,65 @@ async function runWithTimeout<T>(input: { timeout_ms: number; fn: () => Promise<
2447
4174
  });
2448
4175
  }
2449
4176
 
4177
+ async function runWithInactivityTimeout<T>(input: {
4178
+ timeout_ms: number;
4179
+ fn: (helpers: { touch: () => void; signal: AbortSignal }) => Promise<T> | T;
4180
+ }): Promise<T> {
4181
+ return await new Promise<T>((resolve, reject) => {
4182
+ let settled = false;
4183
+ const abortController = new AbortController();
4184
+ let timer: ReturnType<typeof setTimeout> | undefined;
4185
+
4186
+ const onTimeout = (): void => {
4187
+ if (settled) {
4188
+ return;
4189
+ }
4190
+ settled = true;
4191
+ abortController.abort();
4192
+ reject(new Error(`timeout_after_${input.timeout_ms}ms`));
4193
+ };
4194
+
4195
+ const touch = (): void => {
4196
+ if (settled) {
4197
+ return;
4198
+ }
4199
+ if (timer) {
4200
+ clearTimeout(timer);
4201
+ }
4202
+ timer = setTimeout(onTimeout, input.timeout_ms);
4203
+ };
4204
+
4205
+ touch();
4206
+ Promise.resolve()
4207
+ .then(() =>
4208
+ input.fn({
4209
+ touch,
4210
+ signal: abortController.signal
4211
+ })
4212
+ )
4213
+ .then((value) => {
4214
+ if (settled) {
4215
+ return;
4216
+ }
4217
+ settled = true;
4218
+ if (timer) {
4219
+ clearTimeout(timer);
4220
+ }
4221
+ resolve(value);
4222
+ })
4223
+ .catch((error) => {
4224
+ if (settled) {
4225
+ return;
4226
+ }
4227
+ settled = true;
4228
+ if (timer) {
4229
+ clearTimeout(timer);
4230
+ }
4231
+ reject(error);
4232
+ });
4233
+ });
4234
+ }
4235
+
2450
4236
  function deterministicEnhancerFallbackRanking(input: {
2451
4237
  results: SearchContextOutput["results"];
2452
4238
  intent: ReturnType<typeof classifyIntent>;
@@ -2462,46 +4248,6 @@ function deterministicEnhancerFallbackRanking(input: {
2462
4248
  return [...preferred, ...tolerated, ...avoided];
2463
4249
  }
2464
4250
 
2465
- function localizeLowConfidenceQuestion(input: {
2466
- language: "en" | "es" | "zh";
2467
- kind: "scope" | "symbol" | "source_priority";
2468
- symbol?: string;
2469
- }): string {
2470
- if (input.kind === "symbol") {
2471
- if (input.language === "es") {
2472
- return input.symbol
2473
- ? `¿Puedes confirmar si el cambio debe centrarse en el símbolo "${input.symbol}"?`
2474
- : "¿Qué función, clase o archivo exacto debe modificarse primero?";
2475
- }
2476
- if (input.language === "zh") {
2477
- return input.symbol
2478
- ? `请确认这次改动是否应优先围绕符号“${input.symbol}”展开?`
2479
- : "请明确首先要修改的函数、类或文件路径。";
2480
- }
2481
- return input.symbol
2482
- ? `Can you confirm whether "${input.symbol}" is the primary symbol to change?`
2483
- : "Which exact function, class, or file should be edited first?";
2484
- }
2485
-
2486
- if (input.kind === "source_priority") {
2487
- if (input.language === "es") {
2488
- return "¿Debemos priorizar archivos de implementación en src/lib y dejar docs/tests/examples fuera de alcance?";
2489
- }
2490
- if (input.language === "zh") {
2491
- return "是否应优先修改 src/lib 下的实现代码,并排除 docs/tests/examples?";
2492
- }
2493
- return "Should we prioritize runtime implementation files (src/lib) and exclude docs/tests/examples from scope?";
2494
- }
2495
-
2496
- if (input.language === "es") {
2497
- return "¿Cuál es el alcance mínimo y el comportamiento que no debe cambiar?";
2498
- }
2499
- if (input.language === "zh") {
2500
- return "这次改动的最小范围是什么?哪些行为必须保持不变?";
2501
- }
2502
- return "What is the minimal scope, and which behavior must remain unchanged?";
2503
- }
2504
-
2505
4251
  function trimToContextBudget(results: SearchContextOutput["results"]): SearchContextOutput["results"] {
2506
4252
  let total = 0;
2507
4253
  const out: SearchContextOutput["results"] = [];
@@ -2516,7 +4262,7 @@ function trimToContextBudget(results: SearchContextOutput["results"]): SearchCon
2516
4262
  }
2517
4263
 
2518
4264
  function formatEnhancedPrompt(input: {
2519
- intent: ReturnType<typeof classifyIntent>;
4265
+ style: ResolvedEnhancerPromptStyle;
2520
4266
  language: "en" | "es" | "zh";
2521
4267
  original_prompt: string;
2522
4268
  refs: ContextRef[];
@@ -2530,62 +4276,175 @@ function formatEnhancedPrompt(input: {
2530
4276
  input.refs.length > 0 ? input.refs.map((r) => `- ${r.path}:${r.start_line}`).join("\n") : emptyRefsByLanguage[input.language];
2531
4277
 
2532
4278
  if (input.language === "zh") {
4279
+ if (input.style === "lean") {
4280
+ return [
4281
+ "目标",
4282
+ input.original_prompt,
4283
+ "",
4284
+ "约束",
4285
+ "- 保持现有行为与合约兼容。",
4286
+ "- 优先最小且安全的改动。",
4287
+ "",
4288
+ "行动步骤",
4289
+ "- 先确认当前行为与目标范围。",
4290
+ "- 在必要位置完成最小实现并补充回归测试。",
4291
+ "",
4292
+ "验证",
4293
+ "- 运行相关测试并确认无回归。"
4294
+ ].join("\n");
4295
+ }
4296
+ if (input.style === "deep") {
4297
+ return [
4298
+ "目标",
4299
+ input.original_prompt,
4300
+ "",
4301
+ "范围与约束",
4302
+ "- 保持现有行为与 API/合约语义稳定。",
4303
+ "- 仅在必要边界内调整实现,避免扩散改动。",
4304
+ "- 发现风险路径时优先失败安全(deny-by-default)。",
4305
+ "",
4306
+ "代码锚点",
4307
+ likelyFiles,
4308
+ "",
4309
+ "实施步骤",
4310
+ "- 基线确认:先验证当前行为与关键路径。",
4311
+ "- 变更实现:对关键分支做最小、安全、可回退的改动。",
4312
+ "- 回归测试:覆盖正向、跨边界、异常与空输入场景。",
4313
+ "",
4314
+ "边界情况",
4315
+ "- 缺失上下文、无索引或空结果时,保持行为可解释且可回退。",
4316
+ "- 异步/并发路径中避免上下文泄漏与跨租户访问。",
4317
+ "",
4318
+ "验证",
4319
+ "- 运行 typecheck 与目标测试集;确认关键路径稳定无回归。"
4320
+ ].join("\n");
4321
+ }
2533
4322
  return [
2534
4323
  "目标",
2535
4324
  input.original_prompt,
2536
4325
  "",
2537
- "当前状态",
2538
- `- 识别意图: ${input.intent}`,
2539
- "",
2540
4326
  "约束",
2541
4327
  "- 保持 v1 合约兼容和严格校验。",
2542
4328
  "",
2543
- "可能涉及的文件",
4329
+ "代码锚点",
2544
4330
  likelyFiles,
2545
4331
  "",
2546
4332
  "实现清单",
2547
4333
  "- 在改动前确认请求/响应合约。",
2548
4334
  "- 最小化改动并保持 tenant/workspace 隔离。",
2549
4335
  "",
2550
- "边界情况",
2551
- "- Workspace 没有可用索引。",
2552
- "- 搜索过滤后结果为空。",
2553
- "",
2554
4336
  "验证与测试",
2555
4337
  "- 运行 typecheck 和合约/工具测试。",
2556
- "",
2557
- "完成定义",
2558
- "- 测试通过且行为符合 v1 规范。"
2559
4338
  ].join("\n");
2560
4339
  }
2561
4340
 
2562
4341
  if (input.language === "es") {
4342
+ if (input.style === "lean") {
4343
+ return [
4344
+ "Objetivo",
4345
+ input.original_prompt,
4346
+ "",
4347
+ "Restricciones",
4348
+ "- Mantener compatibilidad de comportamiento y contratos.",
4349
+ "- Priorizar cambios mínimos y seguros.",
4350
+ "",
4351
+ "Pasos",
4352
+ "- Confirmar alcance y comportamiento actual antes de editar.",
4353
+ "- Implementar el cambio mínimo necesario y añadir regresiones.",
4354
+ "",
4355
+ "Validación",
4356
+ "- Ejecutar pruebas relevantes y confirmar que no hay regresiones."
4357
+ ].join("\n");
4358
+ }
4359
+ if (input.style === "deep") {
4360
+ return [
4361
+ "Objetivo",
4362
+ input.original_prompt,
4363
+ "",
4364
+ "Alcance y restricciones",
4365
+ "- Preservar comportamiento existente y contratos/API vigentes.",
4366
+ "- Limitar cambios al alcance mínimo necesario.",
4367
+ "- Aplicar defaults de seguridad (deny-by-default) cuando aplique.",
4368
+ "",
4369
+ "Anclas del código",
4370
+ likelyFiles,
4371
+ "",
4372
+ "Plan de implementación",
4373
+ "- Establecer línea base del comportamiento actual.",
4374
+ "- Aplicar cambios mínimos y reversibles en rutas críticas.",
4375
+ "- Añadir pruebas de regresión para casos positivos, negativos y límites.",
4376
+ "",
4377
+ "Casos límite",
4378
+ "- Contexto faltante o resultados vacíos no deben romper el flujo.",
4379
+ "- Evitar fuga de contexto entre tenants/workspaces.",
4380
+ "",
4381
+ "Validación",
4382
+ "- Ejecutar typecheck y pruebas objetivo; confirmar estabilidad."
4383
+ ].join("\n");
4384
+ }
2563
4385
  return [
2564
4386
  "Objetivo",
2565
4387
  input.original_prompt,
2566
4388
  "",
2567
- "Estado actual",
2568
- `- Intención clasificada: ${input.intent}`,
2569
- "",
2570
4389
  "Restricciones",
2571
4390
  "- Mantener compatibilidad con contratos v1 y validación estricta.",
2572
4391
  "",
2573
- "Archivos probables a editar",
4392
+ "Anclas del código",
2574
4393
  likelyFiles,
2575
4394
  "",
2576
4395
  "Checklist de implementación",
2577
4396
  "- Confirmar entradas/salidas del contrato antes de modificar lógica.",
2578
4397
  "- Aplicar cambios mínimos y mantener aislamiento por tenant/workspace.",
2579
4398
  "",
2580
- "Casos límite",
2581
- "- Workspace sin índice listo.",
2582
- "- Filtros de búsqueda que no devuelven resultados.",
2583
- "",
2584
4399
  "Validación y pruebas",
2585
- "- Ejecutar typecheck y pruebas de contratos/herramientas.",
4400
+ "- Ejecutar typecheck y pruebas de contratos/herramientas."
4401
+ ].join("\n");
4402
+ }
4403
+
4404
+ if (input.style === "lean") {
4405
+ const anchors = input.refs.length > 0 ? `- Anchors: ${input.refs.slice(0, 2).map((ref) => `${ref.path}:${ref.start_line}`).join(", ")}` : "";
4406
+ return [
4407
+ "Goal",
4408
+ input.original_prompt,
4409
+ "",
4410
+ "Constraints",
4411
+ "- Preserve existing behavior and contract compatibility.",
4412
+ "- Keep changes minimal and safe.",
4413
+ ...(anchors ? ["", anchors] : []),
4414
+ "",
4415
+ "Action steps",
4416
+ "- Confirm current behavior and target scope.",
4417
+ "- Implement the smallest safe change and add regression coverage.",
4418
+ "",
4419
+ "Validation",
4420
+ "- Run relevant tests and confirm no regressions."
4421
+ ].join("\n");
4422
+ }
4423
+
4424
+ if (input.style === "deep") {
4425
+ return [
4426
+ "Goal",
4427
+ input.original_prompt,
4428
+ "",
4429
+ "Scope and constraints",
4430
+ "- Preserve current behavior and API/contract semantics.",
4431
+ "- Limit changes to the required scope and keep them reversible.",
4432
+ "- Prefer fail-secure defaults where policy boundaries are involved.",
4433
+ "",
4434
+ "Codebase anchors",
4435
+ likelyFiles,
4436
+ "",
4437
+ "Implementation plan",
4438
+ "- Establish baseline behavior and invariants before edits.",
4439
+ "- Apply minimal, safe changes on critical paths only.",
4440
+ "- Add regression coverage for positive, negative, and boundary scenarios.",
2586
4441
  "",
2587
- "Definición de terminado",
2588
- "- Los tests pasan y el comportamiento coincide con el spec."
4442
+ "Edge cases",
4443
+ "- Missing context, empty retrieval results, and async boundary leakage.",
4444
+ "- Cross-tenant/workspace access paths and authorization bypass attempts.",
4445
+ "",
4446
+ "Validation",
4447
+ "- Run typecheck and focused test suites; verify no behavioral regressions."
2589
4448
  ].join("\n");
2590
4449
  }
2591
4450
 
@@ -2593,28 +4452,18 @@ function formatEnhancedPrompt(input: {
2593
4452
  "Goal",
2594
4453
  input.original_prompt,
2595
4454
  "",
2596
- "Current state",
2597
- `- Classified intent: ${input.intent}`,
2598
- "",
2599
4455
  "Constraints",
2600
4456
  "- Keep v1 contract compatibility and strict schema validation.",
2601
4457
  "",
2602
- "Likely files to edit",
4458
+ "Codebase anchors",
2603
4459
  likelyFiles,
2604
4460
  "",
2605
- "Implementation checklist",
4461
+ "Implementation plan",
2606
4462
  "- Confirm request/response contract assumptions before code edits.",
2607
4463
  "- Apply smallest safe changes while preserving tenant/workspace isolation.",
2608
4464
  "",
2609
- "Edge cases",
2610
- "- Workspace has no ready index.",
2611
- "- Search filters produce empty result sets.",
2612
- "",
2613
4465
  "Validation and tests",
2614
- "- Run typecheck and contract/tool tests.",
2615
- "",
2616
- "Definition of done",
2617
- "- Tests pass and behavior matches the v1 spec."
4466
+ "- Run typecheck and contract/tool tests."
2618
4467
  ].join("\n");
2619
4468
  }
2620
4469
 
@@ -2992,13 +4841,21 @@ function compileGlob(glob: string): RegExp {
2992
4841
 
2993
4842
  export class RetrievalCore {
2994
4843
  private readonly cacheTtlSeconds: number;
4844
+ private readonly internalCandidateDepth: number;
2995
4845
  private readonly embeddingProvider: EmbeddingProvider;
2996
4846
  private readonly embeddingDescriptor: EmbeddingDescriptor;
4847
+ private readonly rerankerProvider?: RerankerProvider;
4848
+ private readonly rerankerDescriptor?: RerankerDescriptor;
4849
+ private readonly rerankerTopN: number;
4850
+ private readonly rerankerCacheVariant: string;
2997
4851
  private readonly observability: Observability;
2998
4852
  private readonly scoringConfig: RetrievalScoringConfig;
2999
4853
  private readonly scoringProfileId: string;
3000
4854
  private readonly scoringConfigChecksum: string;
4855
+ private readonly enhancerProvider?: EnhancerGenerationProvider;
4856
+ private readonly enhancerProviderDescriptor?: EnhancerProviderDescriptor;
3001
4857
  private readonly enhancerConfig: RetrievalEnhancerConfig;
4858
+ private readonly enhancerGenerationConfig: RetrievalEnhancerGenerationConfig;
3002
4859
  private readonly chunkingConfig: RetrievalChunkingConfig;
3003
4860
  private readonly enhancerDecisionTraceEnabled: boolean;
3004
4861
  private cacheHits = 0;
@@ -3010,16 +4867,36 @@ export class RetrievalCore {
3010
4867
  options?: RetrievalCoreOptions
3011
4868
  ) {
3012
4869
  this.cacheTtlSeconds = options?.cacheTtlSeconds ?? 60;
4870
+ this.internalCandidateDepth = clampInternalCandidateDepth(options?.internalCandidateDepth);
3013
4871
  this.embeddingProvider = options?.embeddingProvider ?? new DeterministicEmbeddingProvider();
3014
4872
  this.embeddingDescriptor = normalizeEmbeddingDescriptor(
3015
4873
  options?.embeddingDescriptor ?? resolveEmbeddingDescriptor(this.embeddingProvider)
3016
4874
  );
4875
+ this.rerankerProvider = options?.rerankerProvider;
4876
+ this.rerankerTopN = options?.rerankerTopN ?? DEFAULT_SEARCH_RERANKER_TOP_N;
4877
+ if (!Number.isInteger(this.rerankerTopN) || this.rerankerTopN <= 0) {
4878
+ throw new Error("invalid retrieval reranker config: rerankerTopN must be a positive integer");
4879
+ }
4880
+ this.rerankerDescriptor = this.rerankerProvider
4881
+ ? normalizeRerankerDescriptor(resolveRerankerDescriptor(this.rerankerProvider))
4882
+ : undefined;
4883
+ this.rerankerCacheVariant = this.rerankerDescriptor
4884
+ ? `provider:${this.rerankerDescriptor.provider}|model:${this.rerankerDescriptor.model ?? "unknown"}|top_n:${this.rerankerTopN}`
4885
+ : "provider:disabled";
3017
4886
  this.observability = options?.observability ?? getObservability("retrieval-core");
3018
4887
  const baseProfile = resolveRetrievalScoringProfile(options?.scoringProfile);
3019
4888
  this.scoringConfig = mergeRetrievalScoringConfig(baseProfile.config, options?.scoringConfig);
3020
4889
  this.scoringProfileId = options?.scoringProfileId ?? baseProfile.profile_id;
3021
4890
  this.scoringConfigChecksum = scoringConfigChecksum(this.scoringConfig);
4891
+ this.enhancerProvider = options?.enhancerProvider;
4892
+ this.enhancerProviderDescriptor = this.enhancerProvider
4893
+ ? normalizeEnhancerProviderDescriptor(resolveEnhancerProviderDescriptor(this.enhancerProvider))
4894
+ : undefined;
3022
4895
  this.enhancerConfig = mergeRetrievalEnhancerConfig(DEFAULT_RETRIEVAL_ENHANCER_CONFIG, options?.enhancerConfig);
4896
+ this.enhancerGenerationConfig = mergeRetrievalEnhancerGenerationConfig(
4897
+ DEFAULT_RETRIEVAL_ENHANCER_GENERATION_CONFIG,
4898
+ options?.enhancerGenerationConfig
4899
+ );
3023
4900
  this.chunkingConfig = mergeRetrievalChunkingConfig(DEFAULT_RETRIEVAL_CHUNKING_CONFIG, options?.chunkingConfig);
3024
4901
  this.enhancerDecisionTraceEnabled = Boolean(options?.enhancerDecisionTraceEnabled);
3025
4902
  }
@@ -3736,6 +5613,106 @@ export class RetrievalCore {
3736
5613
  };
3737
5614
  }
3738
5615
 
5616
+ private async applyLearnedReranker(input: {
5617
+ trace_id: string;
5618
+ query: string;
5619
+ candidates: SearchResultRow[];
5620
+ }): Promise<SearchResultRow[]> {
5621
+ if (!this.rerankerProvider || !this.rerankerDescriptor) {
5622
+ return input.candidates;
5623
+ }
5624
+
5625
+ const cappedTopN = Math.min(this.rerankerTopN, input.candidates.length);
5626
+ if (cappedTopN <= 1) {
5627
+ return input.candidates;
5628
+ }
5629
+
5630
+ const head = input.candidates.slice(0, cappedTopN);
5631
+ const tail = input.candidates.slice(cappedTopN);
5632
+ const labels = {
5633
+ provider: this.rerankerDescriptor.provider,
5634
+ model: this.rerankerDescriptor.model ?? "unknown"
5635
+ } as const;
5636
+
5637
+ this.observability.metrics.increment("retrieval_reranker_requests_total", 1, labels);
5638
+ const startedAt = Date.now();
5639
+ try {
5640
+ const reranked = await this.rerankerProvider.rerank({
5641
+ query: input.query,
5642
+ documents: head.map((candidate) => buildRerankerDocument(candidate)),
5643
+ top_n: cappedTopN
5644
+ });
5645
+
5646
+ if (!Array.isArray(reranked) || reranked.length === 0) {
5647
+ throw new RerankerProviderRequestError("invalid_response", "reranker response must contain at least one result");
5648
+ }
5649
+
5650
+ const seen = new Set<number>();
5651
+ const reordered: SearchResultRow[] = [];
5652
+ for (const row of reranked) {
5653
+ if (!Number.isInteger(row.index)) {
5654
+ throw new RerankerProviderRequestError("invalid_response", "reranker result index must be an integer");
5655
+ }
5656
+ if (row.index < 0 || row.index >= head.length) {
5657
+ throw new RerankerProviderRequestError("invalid_response", "reranker result index out of range");
5658
+ }
5659
+ if (seen.has(row.index)) {
5660
+ continue;
5661
+ }
5662
+ const candidate = head[row.index];
5663
+ if (!candidate) {
5664
+ continue;
5665
+ }
5666
+ seen.add(row.index);
5667
+ reordered.push(candidate);
5668
+ }
5669
+
5670
+ for (let index = 0; index < head.length; index += 1) {
5671
+ if (seen.has(index)) {
5672
+ continue;
5673
+ }
5674
+ const candidate = head[index];
5675
+ if (candidate) {
5676
+ reordered.push(candidate);
5677
+ }
5678
+ }
5679
+
5680
+ if (reordered.length === 0) {
5681
+ throw new RerankerProviderRequestError("invalid_response", "reranker did not return usable indexes");
5682
+ }
5683
+
5684
+ const maxTailScore = tail[0]?.score ?? Number.NEGATIVE_INFINITY;
5685
+ const maxHeadScore = head[0]?.score ?? 0;
5686
+ const scoreAnchor = Math.max(maxHeadScore, maxTailScore) + 1;
5687
+ const scoreStep = 1e-6;
5688
+ const adjusted = reordered.map((candidate, index) => ({
5689
+ ...candidate,
5690
+ score: scoreAnchor - index * scoreStep
5691
+ }));
5692
+ return [...adjusted, ...tail];
5693
+ } catch (error) {
5694
+ const reason = classifyRerankerFailureReason(error);
5695
+ this.observability.metrics.increment("retrieval_reranker_failures_total", 1, {
5696
+ ...labels,
5697
+ reason
5698
+ });
5699
+ this.observability.metrics.increment("retrieval_reranker_fallback_total", 1, {
5700
+ reason
5701
+ });
5702
+ this.observability.logger.warn("search_context reranker fallback applied", {
5703
+ trace_id: input.trace_id,
5704
+ provider: labels.provider,
5705
+ model: labels.model,
5706
+ reason,
5707
+ top_n: cappedTopN,
5708
+ error_message: error instanceof Error ? error.message : String(error)
5709
+ });
5710
+ return input.candidates;
5711
+ } finally {
5712
+ this.observability.metrics.observe("retrieval_reranker_latency_ms", Date.now() - startedAt, labels);
5713
+ }
5714
+ }
5715
+
3739
5716
  async searchContext(input: {
3740
5717
  trace_id: string;
3741
5718
  tenant_id: string;
@@ -3757,9 +5734,9 @@ export class RetrievalCore {
3757
5734
  index_id: index.index_id
3758
5735
  });
3759
5736
 
3760
- const topK = Math.min(input.request.top_k ?? 8, MAX_TOP_K);
3761
- const candidatePoolTopK = Math.min(MAX_TOP_K, Math.max(topK * 4, 12));
3762
5737
  const query = normalizeQuery(input.request.query);
5738
+ const topK = Math.min(input.request.top_k ?? 8, MAX_TOP_K);
5739
+ const candidatePoolTopK = Math.max(Math.max(topK * 4, 12), this.internalCandidateDepth);
3763
5740
 
3764
5741
  if (!indexMetadata) {
3765
5742
  this.observability.metrics.increment("retrieval_embedding_metadata_mismatch_total", 1, {
@@ -3831,13 +5808,22 @@ export class RetrievalCore {
3831
5808
  );
3832
5809
  }
3833
5810
  const queryTokens = tokenize(query);
5811
+ const searchLiterals = extractSearchLiterals(query);
5812
+
5813
+ this.observability.metrics.observe("retrieval_candidate_depth_requested", topK, {
5814
+ retrieval_profile_id: this.scoringProfileId
5815
+ });
5816
+ this.observability.metrics.observe("retrieval_candidate_depth_effective", candidatePoolTopK, {
5817
+ retrieval_profile_id: this.scoringProfileId
5818
+ });
3834
5819
 
3835
5820
  const cacheKey = buildQueryCacheKey({
3836
5821
  workspace_id: input.workspace_id,
3837
5822
  index_version: index.index_version,
3838
5823
  query,
3839
5824
  top_k: topK,
3840
- filters: input.request.filters
5825
+ filters: input.request.filters,
5826
+ retrieval_variant: this.rerankerCacheVariant
3841
5827
  });
3842
5828
 
3843
5829
  const cached = await this.cache.get(cacheKey);
@@ -3859,6 +5845,8 @@ export class RetrievalCore {
3859
5845
  workspace_id: input.workspace_id
3860
5846
  },
3861
5847
  async () => {
5848
+ let literalPathMatchCount = 0;
5849
+ let literalSnippetMatchCount = 0;
3862
5850
  let ranked: RankedChunkCandidate[] | undefined;
3863
5851
  if (this.store.rankChunksByIndex) {
3864
5852
  ranked = await this.store.rankChunksByIndex({
@@ -3879,11 +5867,21 @@ export class RetrievalCore {
3879
5867
  .map((candidate) => {
3880
5868
  let score = candidate.score;
3881
5869
  score += pathQualityBias(candidate.path, queryTokens, this.scoringConfig, query);
5870
+ const literalBoost = applyLiteralBoost({
5871
+ path: candidate.path,
5872
+ snippet: candidate.snippet,
5873
+ literals: searchLiterals,
5874
+ path_bias: this.scoringConfig.path_bias
5875
+ });
5876
+ score += literalBoost.boost;
5877
+ literalPathMatchCount += literalBoost.path_matches;
5878
+ literalSnippetMatchCount += literalBoost.snippet_matches;
3882
5879
  if (looksLowInformation(candidate.snippet)) {
3883
5880
  score -= this.scoringConfig.rerank.low_information_penalty;
3884
5881
  }
3885
5882
  const reason = chooseReason({
3886
5883
  lexical: candidate.lexical_score,
5884
+ literal_match: literalBoost.matched,
3887
5885
  path_match: candidate.path_match,
3888
5886
  recency_boosted: candidate.recency_boosted
3889
5887
  });
@@ -3924,11 +5922,25 @@ export class RetrievalCore {
3924
5922
  score -= candidateWeights.generated_penalty;
3925
5923
  }
3926
5924
  score += pathQualityBias(chunk.path, queryTokens, this.scoringConfig, query);
5925
+ const literalBoost = applyLiteralBoost({
5926
+ path: chunk.path,
5927
+ snippet: chunk.snippet,
5928
+ literals: searchLiterals,
5929
+ path_bias: this.scoringConfig.path_bias
5930
+ });
5931
+ score += literalBoost.boost;
5932
+ literalPathMatchCount += literalBoost.path_matches;
5933
+ literalSnippetMatchCount += literalBoost.snippet_matches;
3927
5934
  if (looksLowInformation(chunk.snippet)) {
3928
5935
  score -= this.scoringConfig.rerank.low_information_penalty;
3929
5936
  }
3930
5937
 
3931
- const reason = chooseReason({ lexical: l, path_match: pathMatch, recency_boosted: recencyBoost });
5938
+ const reason = chooseReason({
5939
+ lexical: l,
5940
+ literal_match: literalBoost.matched,
5941
+ path_match: pathMatch,
5942
+ recency_boosted: recencyBoost
5943
+ });
3932
5944
 
3933
5945
  return {
3934
5946
  path: chunk.path,
@@ -3946,10 +5958,36 @@ export class RetrievalCore {
3946
5958
  channel: "hybrid",
3947
5959
  retrieval_profile_id: this.scoringProfileId
3948
5960
  });
5961
+ this.observability.metrics.observe("retrieval_candidates_pre_rerank_count", output.length, {
5962
+ retrieval_profile_id: this.scoringProfileId
5963
+ });
5964
+ if (literalPathMatchCount > 0) {
5965
+ this.observability.metrics.increment("retrieval_literal_boost_applied_total", literalPathMatchCount, {
5966
+ retrieval_profile_id: this.scoringProfileId,
5967
+ channel: "path"
5968
+ });
5969
+ }
5970
+ if (literalSnippetMatchCount > 0) {
5971
+ this.observability.metrics.increment("retrieval_literal_boost_applied_total", literalSnippetMatchCount, {
5972
+ retrieval_profile_id: this.scoringProfileId,
5973
+ channel: "snippet"
5974
+ });
5975
+ }
3949
5976
  return output;
3950
5977
  }
3951
5978
  );
3952
5979
 
5980
+ const rerankedCandidates = await this.observability.tracing.withSpan(
5981
+ "retrieval.learned_rerank",
5982
+ { trace_id: input.trace_id },
5983
+ async () =>
5984
+ this.applyLearnedReranker({
5985
+ trace_id: input.trace_id,
5986
+ query,
5987
+ candidates
5988
+ })
5989
+ );
5990
+
3953
5991
  const deduped = await this.observability.tracing.withSpan("retrieval.rerank", { trace_id: input.trace_id }, async () => {
3954
5992
  const output: SearchContextOutput["results"] = [];
3955
5993
  const seen = new Set<string>();
@@ -3960,7 +5998,7 @@ export class RetrievalCore {
3960
5998
  ? this.scoringConfig.rerank.max_chunks_per_path_file_lookup
3961
5999
  : this.scoringConfig.rerank.max_chunks_per_path_default;
3962
6000
 
3963
- const available = [...candidates];
6001
+ const available = [...rerankedCandidates];
3964
6002
  while (output.length < topK && available.length > 0) {
3965
6003
  let bestIndex = -1;
3966
6004
  let bestAdjustedScore = Number.NEGATIVE_INFINITY;
@@ -4028,6 +6066,41 @@ export class RetrievalCore {
4028
6066
  return output;
4029
6067
  });
4030
6068
 
6069
+ const candidateRankByKey = new Map<string, number>();
6070
+ for (let index = 0; index < rerankedCandidates.length; index += 1) {
6071
+ const candidate = rerankedCandidates[index];
6072
+ if (!candidate) {
6073
+ continue;
6074
+ }
6075
+ const key = `${candidate.path}:${candidate.start_line}:${candidate.end_line}`;
6076
+ if (!candidateRankByKey.has(key)) {
6077
+ candidateRankByKey.set(key, index + 1);
6078
+ }
6079
+ }
6080
+
6081
+ let literalMatchesInTopK = 0;
6082
+ for (let postRank = 0; postRank < deduped.length; postRank += 1) {
6083
+ const row = deduped[postRank];
6084
+ if (!row) {
6085
+ continue;
6086
+ }
6087
+ if (isExactLiteralReason(row.reason)) {
6088
+ literalMatchesInTopK += 1;
6089
+ }
6090
+ this.observability.metrics.increment("retrieval_reason_topk_total", 1, {
6091
+ retrieval_profile_id: this.scoringProfileId,
6092
+ reason: row.reason
6093
+ });
6094
+ const key = `${row.path}:${row.start_line}:${row.end_line}`;
6095
+ const preRank = candidateRankByKey.get(key) ?? postRank + 1;
6096
+ this.observability.metrics.observe("retrieval_rank_shift_delta", preRank - (postRank + 1), {
6097
+ retrieval_profile_id: this.scoringProfileId
6098
+ });
6099
+ }
6100
+ this.observability.metrics.observe("retrieval_literal_matches_topk", literalMatchesInTopK, {
6101
+ retrieval_profile_id: this.scoringProfileId
6102
+ });
6103
+
4031
6104
  const output: SearchContextOutput = {
4032
6105
  trace_id: input.trace_id,
4033
6106
  results: deduped,
@@ -4055,6 +6128,127 @@ export class RetrievalCore {
4055
6128
  return output;
4056
6129
  }
4057
6130
 
6131
+ private enhancerProviderLabels(): Record<string, string> {
6132
+ return {
6133
+ provider: this.enhancerProviderDescriptor?.provider ?? "template",
6134
+ model: this.enhancerProviderDescriptor?.model ?? "n/a",
6135
+ tool_mode: this.enhancerGenerationConfig.tool_mode
6136
+ };
6137
+ }
6138
+
6139
+ private buildEnhancerContextSnippets(results: SearchContextOutput["results"]): EnhancerContextSnippet[] {
6140
+ const maxSnippets = this.enhancerGenerationConfig.max_context_snippets;
6141
+ const snippets: EnhancerContextSnippet[] = [];
6142
+ for (const result of results.slice(0, maxSnippets)) {
6143
+ snippets.push({
6144
+ path: result.path,
6145
+ start_line: result.start_line,
6146
+ end_line: result.end_line,
6147
+ reason: result.reason,
6148
+ snippet: result.snippet.slice(0, 1_600),
6149
+ score: result.score
6150
+ });
6151
+ }
6152
+ return snippets;
6153
+ }
6154
+
6155
+ private async generateEnhancedPrompt(input: {
6156
+ trace_id: string;
6157
+ tenant_id: string;
6158
+ workspace_id?: string;
6159
+ request: EnhancePromptInput;
6160
+ style_requested: EnhancePromptStyle;
6161
+ style_resolved: ResolvedEnhancerPromptStyle;
6162
+ intent: EnhancerIntent;
6163
+ query_intent: "symbol-heavy" | "impl-focused" | "conceptual";
6164
+ language: EnhancerOutputLanguage;
6165
+ context_refs: ContextRef[];
6166
+ context_snippets: EnhancerContextSnippet[];
6167
+ warnings: string[];
6168
+ questions: string[];
6169
+ }): Promise<string> {
6170
+ if (!this.enhancerProvider) {
6171
+ return formatEnhancedPrompt({
6172
+ style: input.style_resolved,
6173
+ language: input.language,
6174
+ original_prompt: input.request.prompt,
6175
+ refs: input.context_refs
6176
+ });
6177
+ }
6178
+
6179
+ const maxAttempts = this.enhancerGenerationConfig.max_retries + 1;
6180
+ let lastFailure: EnhancerProviderRequestError | undefined;
6181
+ for (let attempt = 1; attempt <= maxAttempts; attempt += 1) {
6182
+ const startedAt = Date.now();
6183
+ this.observability.metrics.increment("enhancer_provider_requests_total", 1, this.enhancerProviderLabels());
6184
+ try {
6185
+ const generated = await runWithInactivityTimeout({
6186
+ timeout_ms: this.enhancerGenerationConfig.timeout_ms,
6187
+ fn: ({ touch, signal }) =>
6188
+ this.enhancerProvider!.generate({
6189
+ trace_id: input.trace_id,
6190
+ tenant_id: input.tenant_id,
6191
+ workspace_id: input.workspace_id,
6192
+ request: input.request,
6193
+ style_requested: input.style_requested,
6194
+ style_resolved: input.style_resolved,
6195
+ intent: input.intent,
6196
+ query_intent: input.query_intent,
6197
+ language: input.language,
6198
+ context_refs: input.context_refs,
6199
+ context_snippets: input.context_snippets,
6200
+ warnings: input.warnings,
6201
+ questions: input.questions,
6202
+ tool_mode: this.enhancerGenerationConfig.tool_mode,
6203
+ abort_signal: signal,
6204
+ on_progress: touch
6205
+ })
6206
+ });
6207
+ this.observability.metrics.observe(
6208
+ "enhancer_provider_latency_ms",
6209
+ Date.now() - startedAt,
6210
+ this.enhancerProviderLabels()
6211
+ );
6212
+ const enhancedPrompt = normalizeProviderEnhancedPrompt(generated.enhanced_prompt);
6213
+ if (enhancedPrompt.length === 0) {
6214
+ throw new EnhancerProviderRequestError("invalid_response", "enhancer provider returned an empty enhanced_prompt");
6215
+ }
6216
+ return enhancedPrompt;
6217
+ } catch (error) {
6218
+ const reason = classifyEnhancerGenerationFailureReason(error);
6219
+ const failure =
6220
+ error instanceof EnhancerProviderRequestError ? error : new EnhancerProviderRequestError(reason, String(error));
6221
+ lastFailure = failure;
6222
+ this.observability.metrics.increment("enhancer_provider_failures_total", 1, {
6223
+ ...this.enhancerProviderLabels(),
6224
+ reason
6225
+ });
6226
+ this.observability.logger.warn("enhancer provider generation failed", {
6227
+ trace_id: input.trace_id,
6228
+ attempt,
6229
+ max_attempts: maxAttempts,
6230
+ reason,
6231
+ retrying:
6232
+ attempt < maxAttempts &&
6233
+ reason !== "timeout" &&
6234
+ reason !== "schema_error" &&
6235
+ reason !== "invalid_response",
6236
+ style_requested: input.style_requested,
6237
+ style_resolved: input.style_resolved,
6238
+ provider: this.enhancerProviderDescriptor?.provider ?? "custom",
6239
+ model: this.enhancerProviderDescriptor?.model ?? "unknown",
6240
+ error: failure.message
6241
+ });
6242
+ if (reason === "timeout" || reason === "schema_error" || reason === "invalid_response") {
6243
+ break;
6244
+ }
6245
+ }
6246
+ }
6247
+
6248
+ const message = lastFailure?.message ?? "enhancer provider failed";
6249
+ throw new RetrievalError("UPSTREAM_FAILURE", `enhancer provider failed after retries: ${message}`);
6250
+ }
6251
+
4058
6252
  async enhancePrompt(input: {
4059
6253
  trace_id: string;
4060
6254
  tenant_id: string;
@@ -4063,16 +6257,18 @@ export class RetrievalCore {
4063
6257
  }): Promise<EnhancePromptOutput> {
4064
6258
  const startedAt = Date.now();
4065
6259
  const warnings: string[] = [];
4066
- const questions: string[] = [];
4067
- const addQuestion = (value: string): void => {
4068
- if (!questions.includes(value)) {
4069
- questions.push(value);
4070
- }
4071
- };
4072
6260
 
4073
6261
  const intent = classifyIntent(input.request.prompt);
4074
6262
  const queryIntent = classifyEnhancerQueryIntent(input.request.prompt, input.request.conversation_history);
4075
6263
  const language = detectDominantLanguage(input.request.prompt, input.request.conversation_history);
6264
+ const style = resolveEnhancerPromptStyle({
6265
+ requested: input.request.style,
6266
+ intent,
6267
+ query_intent: queryIntent,
6268
+ prompt: input.request.prompt,
6269
+ history: input.request.conversation_history,
6270
+ has_context: Boolean(input.request.project_root_path && input.workspace_id)
6271
+ });
4076
6272
  const negativePreferences = detectNegativePathPreferences(
4077
6273
  `${input.request.prompt}\n${input.request.conversation_history.map((entry) => entry.content).join("\n")}`
4078
6274
  );
@@ -4198,20 +6394,6 @@ export class RetrievalCore {
4198
6394
  searchResults,
4199
6395
  intentPolicy.max_candidates_per_directory_pre_rerank
4200
6396
  ).slice(0, intentPolicy.max_candidates_pre_rerank);
4201
-
4202
- const symbolCandidates = extractLikelyCodeSymbols(
4203
- `${input.request.prompt}\n${input.request.conversation_history.map((entry) => entry.content).join("\n")}`,
4204
- 3
4205
- );
4206
- if (confidenceSignals.failed_signals.includes("score_spread")) {
4207
- addQuestion(localizeLowConfidenceQuestion({ language, kind: "scope" }));
4208
- }
4209
- if (confidenceSignals.failed_signals.includes("token_overlap")) {
4210
- addQuestion(localizeLowConfidenceQuestion({ language, kind: "symbol", symbol: symbolCandidates[0] }));
4211
- }
4212
- if (confidenceSignals.failed_signals.includes("path_diversity")) {
4213
- addQuestion(localizeLowConfidenceQuestion({ language, kind: "source_priority" }));
4214
- }
4215
6397
  } else {
4216
6398
  searchResults = dedupeEnhancerCandidatesByPath(searchResults);
4217
6399
  searchResults = collapseEnhancerCandidatesByDirectory(
@@ -4221,6 +6403,9 @@ export class RetrievalCore {
4221
6403
  }
4222
6404
  candidateCountPostRerank = searchResults.length;
4223
6405
  } catch (error) {
6406
+ if (error instanceof RetrievalError && error.code === "RATE_LIMITED") {
6407
+ throw error;
6408
+ }
4224
6409
  warnings.push("Context retrieval unavailable; enhancement generated with limited confidence.");
4225
6410
  fallbackTriggered = true;
4226
6411
  fallbackReason = "context_retrieval_unavailable";
@@ -4231,16 +6416,6 @@ export class RetrievalCore {
4231
6416
  }
4232
6417
  }
4233
6418
 
4234
- if (intent === "unknown") {
4235
- addQuestion(
4236
- language === "es"
4237
- ? "¿Cuál es el resultado esperado exacto y el alcance del cambio?"
4238
- : language === "zh"
4239
- ? "这次变更的精确目标和范围是什么?"
4240
- : "What exact outcome and scope should this change target?"
4241
- );
4242
- }
4243
-
4244
6419
  const contextRefs: ContextRef[] = searchResults.map((result) => ({
4245
6420
  path: result.path,
4246
6421
  start_line: result.start_line,
@@ -4248,19 +6423,29 @@ export class RetrievalCore {
4248
6423
  reason: result.reason
4249
6424
  }));
4250
6425
 
4251
- const enhancedPrompt = formatEnhancedPrompt({
6426
+ const contextSnippets = this.buildEnhancerContextSnippets(searchResults);
6427
+ const enhancedPrompt = await this.generateEnhancedPrompt({
6428
+ trace_id: input.trace_id,
6429
+ tenant_id: input.tenant_id,
6430
+ workspace_id: input.workspace_id,
6431
+ request: input.request,
6432
+ style_requested: style.requested,
6433
+ style_resolved: style.resolved,
4252
6434
  intent,
6435
+ query_intent: queryIntent,
4253
6436
  language,
4254
- original_prompt: input.request.prompt,
4255
- refs: contextRefs
6437
+ context_refs: contextRefs,
6438
+ context_snippets: contextSnippets,
6439
+ warnings: [],
6440
+ questions: []
4256
6441
  });
4257
6442
 
4258
6443
  const output: EnhancePromptOutput = {
4259
6444
  trace_id: input.trace_id,
4260
6445
  enhanced_prompt: enhancedPrompt,
4261
6446
  context_refs: contextRefs,
4262
- warnings,
4263
- questions
6447
+ warnings: [],
6448
+ questions: []
4264
6449
  };
4265
6450
 
4266
6451
  const latency_ms = Date.now() - startedAt;
@@ -4310,6 +6495,11 @@ export class RetrievalCore {
4310
6495
  fallback_triggered: fallbackTriggered,
4311
6496
  fallback_reason: fallbackReason,
4312
6497
  query_intent: queryIntent,
6498
+ style_requested: style.requested,
6499
+ style_resolved: style.resolved,
6500
+ enhancer_provider: this.enhancerProviderDescriptor?.provider ?? "template",
6501
+ enhancer_model: this.enhancerProviderDescriptor?.model ?? null,
6502
+ enhancer_tool_mode: this.enhancerGenerationConfig.tool_mode,
4313
6503
  confidence_score_spread: confidenceSignals?.score_spread ?? null,
4314
6504
  confidence_token_overlap: confidenceSignals?.token_overlap ?? null,
4315
6505
  confidence_path_diversity: confidenceSignals?.path_diversity ?? null,