npm - @rce-mcp/retrieval-core - Versions diffs - 0.1.1 → 0.1.2 - Mend

@rce-mcp/retrieval-core 0.1.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

package/AGENTS.md +1 -0
package/dist/.tsbuildinfo +1 -1
package/dist/chunking.js +5 -4
package/dist/index.d.ts +219 -4
package/dist/index.js +1928 -176
package/dist/remote-sync.js +2 -1
package/package.json +8 -6
package/src/chunking.ts +5 -4
package/src/index.ts +2398 -208
package/src/remote-sync.ts +3 -1
package/test/benchmark.thresholds.test.ts +55 -0
package/test/chunking.config.test.ts +28 -0
package/test/chunking.language-aware.test.ts +23 -4
package/test/chunking.parser-availability.poc.test.ts +3 -3
package/test/claude-agent-provider.test.ts +209 -0
package/test/embedding-provider.test.ts +450 -1
package/test/enhance-confidence.test.ts +275 -3
package/test/integration.test.ts +185 -1
package/test/mcp-search-quality.regression.test.ts +322 -0
package/test/remote-sync.integration.test.ts +11 -0

package/src/index.ts CHANGED Viewed

@@ -1,11 +1,18 @@
 import { createHash, randomUUID } from "node:crypto";
-import type { EnhancePromptInput, EnhancePromptOutput, SearchContextInput, SearchContextOutput } from "@rce-mcp/contracts";
+import type {
+  EnhancePromptInput,
+  EnhancePromptOutput,
+  EnhancePromptStyle,
+  SearchContextInput,
+  SearchContextOutput
+} from "@rce-mcp/contracts";
 import {
   buildQueryCacheKey,
   type CandidateScoreWeights,
   type IndexRepository,
   type QueryCache,
   type RankedChunkCandidate,
+  tokenizeForRanking,
   type WorkspaceRecord
 } from "@rce-mcp/data-plane";
 import { InMemoryQueryCache } from "@rce-mcp/data-plane";
@@ -19,12 +26,16 @@ import {
 type RetrievalMode = SearchContextOutput["search_metadata"]["retrieval_mode"];
 type ContextRef = EnhancePromptOutput["context_refs"][number];
+type SearchResultRow = SearchContextOutput["results"][number];
 const MAX_FILE_SIZE_BYTES = 1_000_000;
 const MAX_CHUNKS_PER_FILE = 300;
-const TARGET_CHUNK_TOKENS = 220;
-const CHUNK_OVERLAP_TOKENS = 40;
+const DEFAULT_TARGET_CHUNK_TOKENS = 420;
+const DEFAULT_CHUNK_OVERLAP_TOKENS = 90;
 const MAX_TOP_K = 20;
+const DEFAULT_INTERNAL_CANDIDATE_DEPTH = 100;
+const MIN_INTERNAL_CANDIDATE_DEPTH = 20;
+const MAX_INTERNAL_CANDIDATE_DEPTH = 200;
 const MAX_CONTEXT_BUDGET_TOKENS = 12_000;
 export const DEFAULT_OPENAI_COMPATIBLE_EMBEDDING_BASE_URL = "https://router.tumuer.me/v1";
 export const DEFAULT_OPENAI_COMPATIBLE_EMBEDDING_MODEL = "Qwen/Qwen3-Embedding-4B";
@@ -32,6 +43,17 @@ export const DEFAULT_OPENAI_COMPATIBLE_EMBEDDING_DIMENSIONS = 2560;
 export const DEFAULT_OPENAI_COMPATIBLE_EMBEDDING_TIMEOUT_MS = 10_000;
 export const DEFAULT_OPENAI_COMPATIBLE_EMBEDDING_BATCH_SIZE = 64;
 export const DEFAULT_OPENAI_COMPATIBLE_EMBEDDING_MAX_RETRIES = 2;
+export const DEFAULT_OPENAI_COMPATIBLE_EMBEDDING_TRANSIENT_403_MAX_RETRIES = 4;
+export const DEFAULT_OPENAI_COMPATIBLE_RERANKER_BASE_URL = "https://router.tumuer.me/v1";
+export const DEFAULT_OPENAI_COMPATIBLE_RERANKER_MODEL = "Qwen/Qwen3-Reranker-4B";
+export const DEFAULT_OPENAI_COMPATIBLE_RERANKER_TIMEOUT_MS = 2_500;
+export const DEFAULT_SEARCH_RERANKER_TOP_N = 30;
+export const DEFAULT_PROVIDER_MAX_REQUESTS_PER_MINUTE = 90;
+export const DEFAULT_PROVIDER_LIMIT_INDEX_MAX_WAIT_MS = 120_000;
+export const DEFAULT_PROVIDER_LIMIT_QUERY_MAX_WAIT_MS = 1_000;
+export const DEFAULT_PROVIDER_LIMIT_RERANK_MAX_WAIT_MS = 500;
+export const DEFAULT_CLAUDE_ENHANCER_MODEL = "claude-3-5-sonnet-latest";
+const DEFAULT_CLAUDE_ENHANCER_MAX_TURNS = 3;
 const DEFAULT_CANDIDATE_SCORE_WEIGHTS: CandidateScoreWeights = {
   lexical_weight: 0.6,
@@ -69,6 +91,10 @@ export interface RetrievalPathBiasConfig {
   negation_avoid_tests_penalty: number;
   negation_avoid_examples_penalty: number;
   negation_avoid_archive_penalty: number;
+  security_trace_meta_penalty: number;
+  literal_path_boost: number;
+  literal_snippet_boost: number;
+  literal_max_boost: number;
   min_total_bias: number;
   max_total_bias: number;
 }
@@ -93,9 +119,20 @@ export interface RetrievalEnhancerConfig {
   rerank_timeout_ms: number;
 }
+export type EnhancerToolMode = "none" | "read_only";
+export interface RetrievalEnhancerGenerationConfig {
+  timeout_ms: number;
+  max_retries: number;
+  tool_mode: EnhancerToolMode;
+  max_context_snippets: number;
+}
 export interface RetrievalChunkingConfig {
   strategy: ChunkingStrategy;
   fallback_strategy: "sliding";
+  target_chunk_tokens: number;
+  chunk_overlap_tokens: number;
   parse_timeout_ms: number;
   enabled_languages: string[];
 }
@@ -107,9 +144,12 @@ export type RetrievalScoringConfigInput = Partial<{
 }>;
 export type RetrievalEnhancerConfigInput = Partial<RetrievalEnhancerConfig>;
+export type RetrievalEnhancerGenerationConfigInput = Partial<RetrievalEnhancerGenerationConfig>;
 export type RetrievalChunkingConfigInput = Partial<{
   strategy: ChunkingStrategy;
   fallback_strategy: "sliding";
+  target_chunk_tokens: number;
+  chunk_overlap_tokens: number;
   parse_timeout_ms: number;
   enabled_languages: string[];
 }>;
@@ -144,6 +184,10 @@ export const BASELINE_RETRIEVAL_SCORING_CONFIG: RetrievalScoringConfig = {
     negation_avoid_tests_penalty: 0.35,
     negation_avoid_examples_penalty: 0.3,
     negation_avoid_archive_penalty: 0.35,
+    security_trace_meta_penalty: 0.22,
+    literal_path_boost: 0.3,
+    literal_snippet_boost: 0.18,
+    literal_max_boost: 0.5,
     min_total_bias: -0.45,
     max_total_bias: 0.35
   },
@@ -192,6 +236,10 @@ export const CONSERVATIVE_RETRIEVAL_SCORING_CONFIG: RetrievalScoringConfig = {
     negation_avoid_tests_penalty: 0.2,
     negation_avoid_examples_penalty: 0.16,
     negation_avoid_archive_penalty: 0.2,
+    security_trace_meta_penalty: 0.14,
+    literal_path_boost: 0.18,
+    literal_snippet_boost: 0.1,
+    literal_max_boost: 0.28,
     min_total_bias: -0.25,
     max_total_bias: 0.2
   },
@@ -210,9 +258,18 @@ export const DEFAULT_RETRIEVAL_ENHANCER_CONFIG: RetrievalEnhancerConfig = {
   rerank_timeout_ms: 40
 };
+export const DEFAULT_RETRIEVAL_ENHANCER_GENERATION_CONFIG: RetrievalEnhancerGenerationConfig = {
+  timeout_ms: 18_000,
+  max_retries: 1,
+  tool_mode: "read_only",
+  max_context_snippets: 6
+};
 export const DEFAULT_RETRIEVAL_CHUNKING_CONFIG: RetrievalChunkingConfig = {
   strategy: "sliding",
   fallback_strategy: "sliding",
+  target_chunk_tokens: DEFAULT_TARGET_CHUNK_TOKENS,
+  chunk_overlap_tokens: DEFAULT_CHUNK_OVERLAP_TOKENS,
   parse_timeout_ms: 80,
   enabled_languages: ["typescript", "javascript", "python", "go"]
 };
@@ -351,6 +408,33 @@ export function mergeRetrievalEnhancerConfig(
   return next;
 }
+function validateEnhancerGenerationConfig(config: RetrievalEnhancerGenerationConfig): void {
+  if (!Number.isInteger(config.timeout_ms) || config.timeout_ms <= 0) {
+    throw new Error("invalid retrieval enhancer generation config: timeout_ms must be a positive integer");
+  }
+  if (!Number.isInteger(config.max_retries) || config.max_retries < 0) {
+    throw new Error("invalid retrieval enhancer generation config: max_retries must be a non-negative integer");
+  }
+  if (config.tool_mode !== "none" && config.tool_mode !== "read_only") {
+    throw new Error("invalid retrieval enhancer generation config: tool_mode must be none|read_only");
+  }
+  if (!Number.isInteger(config.max_context_snippets) || config.max_context_snippets <= 0) {
+    throw new Error("invalid retrieval enhancer generation config: max_context_snippets must be a positive integer");
+  }
+}
+export function mergeRetrievalEnhancerGenerationConfig(
+  base: RetrievalEnhancerGenerationConfig,
+  overrides?: RetrievalEnhancerGenerationConfigInput
+): RetrievalEnhancerGenerationConfig {
+  const next: RetrievalEnhancerGenerationConfig = {
+    ...base,
+    ...(overrides ?? {})
+  };
+  validateEnhancerGenerationConfig(next);
+  return next;
+}
 function normalizeChunkingLanguageList(value: string[]): string[] {
   const deduped = new Set<string>();
   for (const language of value) {
@@ -370,6 +454,15 @@ function validateChunkingConfig(config: RetrievalChunkingConfig): void {
   if (config.fallback_strategy !== "sliding") {
     throw new Error("invalid retrieval chunking config: fallback_strategy must be sliding");
   }
+  if (!Number.isInteger(config.target_chunk_tokens) || config.target_chunk_tokens <= 0) {
+    throw new Error("invalid retrieval chunking config: target_chunk_tokens must be a positive integer");
+  }
+  if (!Number.isInteger(config.chunk_overlap_tokens) || config.chunk_overlap_tokens <= 0) {
+    throw new Error("invalid retrieval chunking config: chunk_overlap_tokens must be a positive integer");
+  }
+  if (config.chunk_overlap_tokens >= config.target_chunk_tokens) {
+    throw new Error("invalid retrieval chunking config: chunk_overlap_tokens must be less than target_chunk_tokens");
+  }
   if (!Number.isInteger(config.parse_timeout_ms) || config.parse_timeout_ms <= 0) {
     throw new Error("invalid retrieval chunking config: parse_timeout_ms must be a positive integer");
   }
@@ -411,19 +504,24 @@ function scoringConfigChecksum(config: RetrievalScoringConfig): string {
   return sha256(stableSerialize(config)).slice(0, 12);
 }
+function clampInternalCandidateDepth(value: number | undefined): number {
+  const raw = Number.isFinite(value) ? Math.trunc(value ?? DEFAULT_INTERNAL_CANDIDATE_DEPTH) : DEFAULT_INTERNAL_CANDIDATE_DEPTH;
+  return Math.max(MIN_INTERNAL_CANDIDATE_DEPTH, Math.min(MAX_INTERNAL_CANDIDATE_DEPTH, raw));
+}
 const REASON_STRINGS = [
   "semantic match",
-  "exact symbol match",
-  "path and token overlap",
+  "exact literal match",
+  "path token overlap",
   "recently modified relevant module"
 ] as const;
 export type RetrievalReason = (typeof REASON_STRINGS)[number];
 export class RetrievalError extends Error {
-  readonly code: "INVALID_ARGUMENT" | "NOT_FOUND" | "UPSTREAM_FAILURE";
+  readonly code: "INVALID_ARGUMENT" | "NOT_FOUND" | "RATE_LIMITED" | "UPSTREAM_FAILURE";
-  constructor(code: "INVALID_ARGUMENT" | "NOT_FOUND" | "UPSTREAM_FAILURE", message: string) {
+  constructor(code: "INVALID_ARGUMENT" | "NOT_FOUND" | "RATE_LIMITED" | "UPSTREAM_FAILURE", message: string) {
     super(message);
     this.code = code;
   }
@@ -510,13 +608,18 @@ export interface IndexingReport {
 export interface RetrievalCoreOptions {
   cacheTtlSeconds?: number;
+  internalCandidateDepth?: number;
   embeddingProvider?: EmbeddingProvider;
   embeddingDescriptor?: EmbeddingDescriptor;
+  rerankerProvider?: RerankerProvider;
+  rerankerTopN?: number;
+  enhancerProvider?: EnhancerGenerationProvider;
   observability?: Observability;
   scoringProfile?: BuiltinRetrievalScoringProfileId;
   scoringProfileId?: string;
   scoringConfig?: RetrievalScoringConfigInput;
   enhancerConfig?: RetrievalEnhancerConfigInput;
+  enhancerGenerationConfig?: RetrievalEnhancerGenerationConfigInput;
   chunkingConfig?: RetrievalChunkingConfigInput;
   enhancerDecisionTraceEnabled?: boolean;
 }
@@ -535,6 +638,67 @@ export interface EmbeddingProvider {
   describe?(): EmbeddingDescriptor;
 }
+export interface RerankerDescriptor {
+  provider: string;
+  model?: string;
+}
+export interface RerankerResult {
+  index: number;
+  relevance_score: number;
+}
+export interface RerankerProvider {
+  rerank(input: { query: string; documents: string[]; top_n: number }): Promise<RerankerResult[]>;
+  describe?(): RerankerDescriptor;
+}
+export type EnhancerIntent = "bugfix" | "feature" | "refactor" | "docs" | "tests" | "unknown";
+export type EnhancerOutputLanguage = "en" | "es" | "zh";
+type ResolvedEnhancerPromptStyle = Exclude<EnhancePromptStyle, "auto">;
+export interface EnhancerContextSnippet {
+  path: string;
+  start_line: number;
+  end_line: number;
+  reason: string;
+  snippet: string;
+  score: number;
+}
+export interface EnhancerGenerationRequest {
+  trace_id: string;
+  tenant_id: string;
+  workspace_id?: string;
+  request: EnhancePromptInput;
+  style_requested: EnhancePromptStyle;
+  style_resolved: ResolvedEnhancerPromptStyle;
+  intent: EnhancerIntent;
+  query_intent: "symbol-heavy" | "impl-focused" | "conceptual";
+  language: EnhancerOutputLanguage;
+  context_refs: ContextRef[];
+  context_snippets: EnhancerContextSnippet[];
+  warnings: string[];
+  questions: string[];
+  tool_mode: EnhancerToolMode;
+  abort_signal?: AbortSignal;
+  on_progress?: () => void;
+}
+export interface EnhancerGenerationResult {
+  enhanced_prompt: string;
+}
+export interface EnhancerProviderDescriptor {
+  provider: string;
+  model?: string;
+}
+export interface EnhancerGenerationProvider {
+  generate(input: EnhancerGenerationRequest): Promise<EnhancerGenerationResult>;
+  describe?(): EnhancerProviderDescriptor;
+}
 export interface DeterministicEmbeddingProviderOptions {
   dimensions?: number;
   model?: string;
@@ -549,13 +713,88 @@ export interface OpenAICompatibleEmbeddingProviderOptions {
   timeout_ms?: number;
   batch_size?: number;
   max_retries?: number;
+  transient_forbidden_max_retries?: number;
+  request_limiter?: ProviderRequestLimiter;
+  request_limit_scope_id?: string;
+  max_requests_per_minute?: number;
+  index_max_wait_ms?: number;
+  query_max_wait_ms?: number;
+  observability?: Observability;
+}
+export interface OpenAICompatibleRerankerProviderOptions {
+  base_url: string;
+  api_key: string;
+  model?: string;
+  timeout_ms?: number;
+  request_limiter?: ProviderRequestLimiter;
+  request_limit_scope_id?: string;
+  max_requests_per_minute?: number;
+  rerank_max_wait_ms?: number;
   observability?: Observability;
 }
+export interface ClaudeAgentEnhancerProviderOptions {
+  api_key: string;
+  model?: string;
+  base_url?: string;
+  max_tokens?: number;
+  path_to_claude_code_executable?: string;
+  permission_mode?: ClaudeCodePermissionMode;
+}
+export type ClaudeCodePermissionMode = "default" | "acceptEdits" | "bypassPermissions" | "plan";
+export interface ProviderRateLimitAcquireInput {
+  scope: string;
+  max_requests_per_minute: number;
+  max_wait_ms: number;
+}
+export interface ProviderRateLimitAcquireResult {
+  wait_ms: number;
+}
+export interface ProviderRequestLimiter {
+  readonly mode?: "local" | "redis" | "custom";
+  acquire(input: ProviderRateLimitAcquireInput): Promise<ProviderRateLimitAcquireResult>;
+}
+export interface RedisProviderRequestLimiterClient {
+  eval(script: string, numKeys: number, ...args: Array<string | number>): Promise<unknown>;
+}
+export interface RedisProviderRequestLimiterOptions {
+  redis: RedisProviderRequestLimiterClient;
+  key_prefix?: string;
+  window_ms?: number;
+  now?: () => number;
+  sleeper?: (ms: number) => Promise<void>;
+}
 class EmbeddingProviderRequestError extends Error {
   constructor(
     readonly reason: string,
     readonly retryable: boolean,
+    message: string,
+    readonly retry_after_ms?: number
+  ) {
+    super(message);
+  }
+}
+class RerankerProviderRequestError extends Error {
+  constructor(
+    readonly reason: string,
+    message: string
+  ) {
+    super(message);
+  }
+}
+class EnhancerProviderRequestError extends Error {
+  constructor(
+    readonly reason: "timeout" | "rate_limited" | "schema_error" | "invalid_response" | "upstream_error",
     message: string
   ) {
     super(message);
@@ -611,44 +850,7 @@ function singularizeToken(token: string): string | undefined {
 }
 function tokenize(text: string): string[] {
-  const coarseTokens = text
-    .split(/[^a-z0-9_./-]+/)
-    .map((token) => token.trim())
-    .filter(Boolean);
-  const expandedTokens = new Set<string>();
-  const addToken = (value: string): void => {
-    const normalized = value.trim().toLowerCase();
-    if (!normalized) {
-      return;
-    }
-    expandedTokens.add(normalized);
-    const singular = singularizeToken(normalized);
-    if (singular) {
-      expandedTokens.add(singular);
-    }
-    const plural = pluralizeToken(normalized);
-    if (plural) {
-      expandedTokens.add(plural);
-    }
-  };
-  for (const token of coarseTokens) {
-    addToken(token);
-    for (const part of token.split(/[./_-]+/).filter(Boolean)) {
-      addToken(part);
-      const camelSplit = part
-        .replace(/([a-z0-9])([A-Z])/g, "$1 $2")
-        .split(/\s+/)
-        .map((segment) => segment.trim().toLowerCase())
-        .filter(Boolean);
-      for (const segment of camelSplit) {
-        addToken(segment);
-      }
-    }
-  }
-  return [...expandedTokens];
+  return tokenizeForRanking(text);
 }
 function lexicalScore(query: string, haystack: string): number {
@@ -699,21 +901,130 @@ function looksLowInformation(snippet: string): boolean {
 function chooseReason(input: {
   lexical: number;
+  literal_match: boolean;
   path_match: boolean;
   recency_boosted: boolean;
 }): RetrievalReason {
+  if (input.literal_match) {
+    return "exact literal match";
+  }
   if (input.path_match) {
-    return "exact symbol match";
+    return "path token overlap";
   }
   if (input.recency_boosted) {
     return "recently modified relevant module";
   }
   if (input.lexical > 0.3) {
-    return "path and token overlap";
+    return "path token overlap";
   }
   return "semantic match";
 }
+function isExactLiteralReason(reason: string): boolean {
+  return reason === "exact literal match" || reason === "exact symbol match";
+}
+interface LiteralBoostResult {
+  boost: number;
+  matched: boolean;
+  path_matches: number;
+  snippet_matches: number;
+}
+function extractSearchLiterals(query: string): string[] {
+  const literals: string[] = [];
+  const seen = new Set<string>();
+  const addLiteral = (raw: string): void => {
+    const cleaned = raw.trim().replace(/^[`"'([{]+|[`"')\]}:;,.]+$/g, "");
+    const normalized = cleaned.toLowerCase();
+    if (!normalized || seen.has(normalized)) {
+      return;
+    }
+    if (normalized.length < 3) {
+      return;
+    }
+    const looksEnvLiteral = /^[A-Z0-9]+(?:_[A-Z0-9]+){2,}$/.test(cleaned);
+    const looksPathOrFileLiteral = /[/.]/.test(cleaned);
+    const looksCamelLiteral = /[a-z][A-Z]/.test(cleaned) || /[A-Z][a-z]+[A-Z]/.test(cleaned);
+    const looksHyphenLiteral = cleaned.includes("-");
+    const isSpecificLiteral = looksEnvLiteral || looksPathOrFileLiteral || looksCamelLiteral || looksHyphenLiteral;
+    if (!isSpecificLiteral) {
+      return;
+    }
+    seen.add(normalized);
+    literals.push(normalized);
+  };
+  for (const symbol of extractLikelyCodeSymbols(query, 24)) {
+    addLiteral(symbol);
+  }
+  for (const pathSymbol of extractPathLikeSymbols(query)) {
+    addLiteral(pathSymbol);
+    const leaf = normalizePath(pathSymbol).split("/").pop();
+    if (leaf) {
+      addLiteral(leaf);
+    }
+  }
+  for (const envMatch of query.matchAll(/\bRCE_[A-Z0-9_]{4,}\b/g)) {
+    addLiteral(envMatch[0] ?? "");
+  }
+  for (const fileName of query.matchAll(/\b[A-Za-z0-9_.-]+\.(?:ts|tsx|js|jsx|mjs|cjs|py|go|json|md)\b/g)) {
+    addLiteral(fileName[0] ?? "");
+  }
+  return literals.slice(0, 24);
+}
+function applyLiteralBoost(input: {
+  path: string;
+  snippet: string;
+  literals: string[];
+  path_bias: RetrievalPathBiasConfig;
+}): LiteralBoostResult {
+  if (input.literals.length === 0) {
+    return {
+      boost: 0,
+      matched: false,
+      path_matches: 0,
+      snippet_matches: 0
+    };
+  }
+  const normalizedPath = input.path.toLowerCase();
+  const normalizedSnippet = input.snippet.toLowerCase();
+  const pathBias = input.path_bias;
+  let boost = 0;
+  let pathMatches = 0;
+  let snippetMatches = 0;
+  for (const literal of input.literals) {
+    if (normalizedPath.includes(literal)) {
+      boost += pathBias.literal_path_boost;
+      pathMatches += 1;
+      continue;
+    }
+    if (normalizedSnippet.includes(literal)) {
+      boost += pathBias.literal_snippet_boost;
+      snippetMatches += 1;
+    }
+    if (boost >= pathBias.literal_max_boost) {
+      break;
+    }
+  }
+  const clampedBoost = Math.min(pathBias.literal_max_boost, boost);
+  return {
+    boost: clampedBoost,
+    matched: clampedBoost > 0,
+    path_matches: pathMatches,
+    snippet_matches: snippetMatches
+  };
+}
 const DOC_INTENT_TOKENS = new Set([
   "adr",
   "architecture",
@@ -769,6 +1080,22 @@ const UI_COMPONENT_TOKENS = new Set(["component", "layout", "react", "tsx", "ui"
 const FILE_LOOKUP_TOKENS = new Set(["entrypoint", "file", "locate", "path", "where", "which"]);
 const TEST_INTENT_TOKENS = new Set(["assert", "coverage", "e2e", "integration", "spec", "test", "tests", "unit"]);
 const EXAMPLE_INTENT_TOKENS = new Set(["demo", "example", "examples", "sample", "tutorial"]);
+const SECURITY_TRACE_INTENT_TOKENS = new Set([
+  "auth",
+  "authorization",
+  "binding",
+  "config",
+  "enforce",
+  "mcp",
+  "project_root_path",
+  "security",
+  "session",
+  "stdio",
+  "tenant",
+  "token",
+  "workspace",
+  "workspace_id"
+]);
 const SOURCE_PATH_PREFIXES = ["src/", "app/", "apps/", "crates/", "internal/", "lib/", "package/", "packages/"];
 const LOW_PRIORITY_PATH_PREFIXES = [
@@ -852,6 +1179,24 @@ function hasExampleIntent(tokens: string[]): boolean {
   return tokens.some((token) => EXAMPLE_INTENT_TOKENS.has(token));
 }
+function hasSecurityTraceIntent(tokens: string[], queryText: string): boolean {
+  if (tokens.some((token) => SECURITY_TRACE_INTENT_TOKENS.has(token))) {
+    return true;
+  }
+  return /\btenant_id\b|\bworkspace_id\b|\bproject_root_path\b|\bRCE_[A-Z0-9_]{4,}\b/.test(queryText);
+}
+function isGuidanceOrMetaPath(path: string): boolean {
+  const normalized = path.toLowerCase();
+  return (
+    normalized.endsWith("mcp-tool-guidance.ts") ||
+    normalized.includes("/guidance/") ||
+    normalized.includes("/meta/") ||
+    normalized.includes("/_meta/") ||
+    normalized.includes("tool-guidance")
+  );
+}
 function pathQualityBias(path: string, queryTokens: string[], config: RetrievalScoringConfig, queryText?: string): number {
   const normalizedPath = path.toLowerCase();
   const docIntent = hasDocIntent(queryTokens);
@@ -862,6 +1207,7 @@ function pathQualityBias(path: string, queryTokens: string[], config: RetrievalS
   const uiComponentIntent = hasUiComponentIntent(queryTokens);
   const testIntent = hasTestIntent(queryTokens);
   const exampleIntent = hasExampleIntent(queryTokens);
+  const securityTraceIntent = hasSecurityTraceIntent(queryTokens, queryText ?? queryTokens.join(" "));
   let bias = 0;
   const pathBias = config.path_bias;
@@ -934,6 +1280,9 @@ function pathQualityBias(path: string, queryTokens: string[], config: RetrievalS
   if (docsPreferred && isSourcePath) {
     bias -= pathBias.doc_intent_source_penalty;
   }
+  if (securityTraceIntent && !docsPreferred && isGuidanceOrMetaPath(normalizedPath)) {
+    bias -= pathBias.security_trace_meta_penalty;
+  }
   if (workspaceManifestIntent && normalizedPath === "cargo.toml") {
     bias += pathBias.workspace_manifest_root_boost;
@@ -1020,8 +1369,8 @@ function buildChunks(file: RawFile, chunkingConfig: RetrievalChunkingConfig): Ch
     config: {
       strategy: chunkingConfig.strategy,
       fallback_strategy: chunkingConfig.fallback_strategy,
-      target_chunk_tokens: TARGET_CHUNK_TOKENS,
-      chunk_overlap_tokens: CHUNK_OVERLAP_TOKENS,
+      target_chunk_tokens: chunkingConfig.target_chunk_tokens,
+      chunk_overlap_tokens: chunkingConfig.chunk_overlap_tokens,
       max_chunks_per_file: MAX_CHUNKS_PER_FILE,
       parse_timeout_ms: chunkingConfig.parse_timeout_ms,
       enabled_languages: chunkingConfig.enabled_languages
@@ -1076,6 +1425,184 @@ function sleep(ms: number): Promise<void> {
   });
 }
+export class ProviderRateLimitExceededError extends Error {
+  readonly retry_after_ms: number;
+  constructor(message: string, retry_after_ms: number) {
+    super(message);
+    this.retry_after_ms = retry_after_ms;
+  }
+}
+interface LocalProviderRateBucket {
+  tokens: number;
+  last_refill_ms: number;
+}
+export class LocalProviderRequestLimiter implements ProviderRequestLimiter {
+  readonly mode = "local" as const;
+  private readonly buckets = new Map<string, LocalProviderRateBucket>();
+  private readonly now: () => number;
+  private readonly sleeper: (ms: number) => Promise<void>;
+  constructor(options?: {
+    now?: () => number;
+    sleeper?: (ms: number) => Promise<void>;
+  }) {
+    this.now = options?.now ?? (() => Date.now());
+    this.sleeper = options?.sleeper ?? sleep;
+  }
+  async acquire(input: ProviderRateLimitAcquireInput): Promise<ProviderRateLimitAcquireResult> {
+    if (!Number.isInteger(input.max_requests_per_minute) || input.max_requests_per_minute <= 0) {
+      throw new Error("provider limiter requires max_requests_per_minute to be a positive integer");
+    }
+    if (!Number.isInteger(input.max_wait_ms) || input.max_wait_ms < 0) {
+      throw new Error("provider limiter requires max_wait_ms to be a non-negative integer");
+    }
+    const refillPerMs = input.max_requests_per_minute / 60_000;
+    let waitedMs = 0;
+    const deadline = this.now() + input.max_wait_ms;
+    while (true) {
+      const nowMs = this.now();
+      let bucket = this.buckets.get(input.scope);
+      if (!bucket) {
+        bucket = {
+          tokens: input.max_requests_per_minute,
+          last_refill_ms: nowMs
+        };
+        this.buckets.set(input.scope, bucket);
+      }
+      if (nowMs > bucket.last_refill_ms) {
+        const elapsedMs = nowMs - bucket.last_refill_ms;
+        bucket.tokens = Math.min(input.max_requests_per_minute, bucket.tokens + elapsedMs * refillPerMs);
+        bucket.last_refill_ms = nowMs;
+      }
+      if (bucket.tokens >= 1) {
+        bucket.tokens -= 1;
+        return { wait_ms: waitedMs };
+      }
+      const retryAfterMs = Math.max(1, Math.ceil((1 - bucket.tokens) / refillPerMs));
+      const remainingMs = deadline - nowMs;
+      if (remainingMs <= 0 || retryAfterMs > remainingMs) {
+        throw new ProviderRateLimitExceededError(
+          `provider request rate limit exceeded for scope "${input.scope}"`,
+          Math.max(1, retryAfterMs)
+        );
+      }
+      const sleepMs = Math.max(1, Math.min(retryAfterMs, remainingMs));
+      await this.sleeper(sleepMs);
+      waitedMs += sleepMs;
+    }
+  }
+}
+interface RedisLimiterAttemptResult {
+  allowed: boolean;
+  retry_after_ms: number;
+}
+const REDIS_PROVIDER_LIMITER_SCRIPT = `
+local key = KEYS[1]
+local limit = tonumber(ARGV[1])
+local window_ms = tonumber(ARGV[2])
+local count = redis.call("INCR", key)
+if count == 1 then
+  redis.call("PEXPIRE", key, window_ms)
+end
+if count <= limit then
+  return {1, 0}
+end
+local ttl = redis.call("PTTL", key)
+if ttl < 0 then
+  ttl = window_ms
+end
+return {0, ttl}
+`;
+export class RedisProviderRequestLimiter implements ProviderRequestLimiter {
+  readonly mode = "redis" as const;
+  private readonly redis: RedisProviderRequestLimiterClient;
+  private readonly keyPrefix: string;
+  private readonly windowMs: number;
+  private readonly now: () => number;
+  private readonly sleeper: (ms: number) => Promise<void>;
+  constructor(options: RedisProviderRequestLimiterOptions) {
+    if (!options.redis || typeof options.redis.eval !== "function") {
+      throw new Error("invalid redis provider limiter config: redis client with eval() is required");
+    }
+    this.redis = options.redis;
+    this.keyPrefix = options.key_prefix?.trim() || "rce:provider_rate_limit";
+    this.windowMs = options.window_ms ?? 60_000;
+    this.now = options.now ?? (() => Date.now());
+    this.sleeper = options.sleeper ?? sleep;
+    if (!Number.isInteger(this.windowMs) || this.windowMs <= 0) {
+      throw new Error("invalid redis provider limiter config: window_ms must be a positive integer");
+    }
+  }
+  async acquire(input: ProviderRateLimitAcquireInput): Promise<ProviderRateLimitAcquireResult> {
+    if (!Number.isInteger(input.max_requests_per_minute) || input.max_requests_per_minute <= 0) {
+      throw new Error("provider limiter requires max_requests_per_minute to be a positive integer");
+    }
+    if (!Number.isInteger(input.max_wait_ms) || input.max_wait_ms < 0) {
+      throw new Error("provider limiter requires max_wait_ms to be a non-negative integer");
+    }
+    let waitedMs = 0;
+    const deadline = this.now() + input.max_wait_ms;
+    while (true) {
+      const attempt = await this.reserveAttempt(input.scope, input.max_requests_per_minute);
+      if (attempt.allowed) {
+        return { wait_ms: waitedMs };
+      }
+      const nowMs = this.now();
+      const remainingMs = deadline - nowMs;
+      const retryAfterMs = Math.max(1, attempt.retry_after_ms);
+      if (remainingMs <= 0 || retryAfterMs > remainingMs) {
+        throw new ProviderRateLimitExceededError(
+          `provider request rate limit exceeded for scope "${input.scope}"`,
+          retryAfterMs
+        );
+      }
+      const sleepMs = Math.max(1, Math.min(retryAfterMs, remainingMs));
+      await this.sleeper(sleepMs);
+      waitedMs += sleepMs;
+    }
+  }
+  private async reserveAttempt(scope: string, maxRequestsPerMinute: number): Promise<RedisLimiterAttemptResult> {
+    const key = `${this.keyPrefix}:${scope}`;
+    const raw = await this.redis.eval(
+      REDIS_PROVIDER_LIMITER_SCRIPT,
+      1,
+      key,
+      maxRequestsPerMinute,
+      this.windowMs
+    );
+    if (Array.isArray(raw)) {
+      const allowed = Number(raw[0] ?? 0) === 1;
+      const retryAfterMs = Number(raw[1] ?? 0);
+      return {
+        allowed,
+        retry_after_ms: Number.isFinite(retryAfterMs) ? Math.max(0, Math.trunc(retryAfterMs)) : this.windowMs
+      };
+    }
+    throw new Error("redis provider limiter returned unexpected eval() payload");
+  }
+}
 export class DeterministicEmbeddingProvider implements EmbeddingProvider {
   private readonly dimensions: number;
   private readonly model: string;
@@ -1110,6 +1637,12 @@ export class OpenAICompatibleEmbeddingProvider implements EmbeddingProvider {
   private readonly timeoutMs: number;
   private readonly batchSize: number;
   private readonly maxRetries: number;
+  private readonly transientForbiddenMaxRetries: number;
+  private readonly requestLimiter?: ProviderRequestLimiter;
+  private readonly requestLimitScope: string;
+  private readonly maxRequestsPerMinute: number;
+  private readonly indexMaxWaitMs: number;
+  private readonly queryMaxWaitMs: number;
   private readonly observability: Observability;
   constructor(options: OpenAICompatibleEmbeddingProviderOptions) {
@@ -1130,6 +1663,17 @@ export class OpenAICompatibleEmbeddingProvider implements EmbeddingProvider {
     this.timeoutMs = options.timeout_ms ?? DEFAULT_OPENAI_COMPATIBLE_EMBEDDING_TIMEOUT_MS;
     this.batchSize = options.batch_size ?? DEFAULT_OPENAI_COMPATIBLE_EMBEDDING_BATCH_SIZE;
     this.maxRetries = options.max_retries ?? DEFAULT_OPENAI_COMPATIBLE_EMBEDDING_MAX_RETRIES;
+    this.transientForbiddenMaxRetries =
+      options.transient_forbidden_max_retries ?? DEFAULT_OPENAI_COMPATIBLE_EMBEDDING_TRANSIENT_403_MAX_RETRIES;
+    this.requestLimiter = options.request_limiter;
+    this.requestLimitScope = resolveProviderLimiterScope({
+      provider: "openai_compatible",
+      apiKey,
+      overrideScopeId: options.request_limit_scope_id
+    });
+    this.maxRequestsPerMinute = options.max_requests_per_minute ?? DEFAULT_PROVIDER_MAX_REQUESTS_PER_MINUTE;
+    this.indexMaxWaitMs = options.index_max_wait_ms ?? DEFAULT_PROVIDER_LIMIT_INDEX_MAX_WAIT_MS;
+    this.queryMaxWaitMs = options.query_max_wait_ms ?? DEFAULT_PROVIDER_LIMIT_QUERY_MAX_WAIT_MS;
     this.observability = options.observability ?? getObservability("retrieval-core");
     if (!Number.isInteger(this.dimensions) || this.dimensions <= 0) {
@@ -1144,6 +1688,20 @@ export class OpenAICompatibleEmbeddingProvider implements EmbeddingProvider {
     if (!Number.isInteger(this.maxRetries) || this.maxRetries < 0) {
       throw new Error("invalid openai-compatible embedding config: max_retries must be a non-negative integer");
     }
+    if (!Number.isInteger(this.transientForbiddenMaxRetries) || this.transientForbiddenMaxRetries < 0) {
+      throw new Error(
+        "invalid openai-compatible embedding config: transient_forbidden_max_retries must be a non-negative integer"
+      );
+    }
+    if (!Number.isInteger(this.maxRequestsPerMinute) || this.maxRequestsPerMinute <= 0) {
+      throw new Error("invalid openai-compatible embedding config: max_requests_per_minute must be a positive integer");
+    }
+    if (!Number.isInteger(this.indexMaxWaitMs) || this.indexMaxWaitMs < 0) {
+      throw new Error("invalid openai-compatible embedding config: index_max_wait_ms must be a non-negative integer");
+    }
+    if (!Number.isInteger(this.queryMaxWaitMs) || this.queryMaxWaitMs < 0) {
+      throw new Error("invalid openai-compatible embedding config: query_max_wait_ms must be a non-negative integer");
+    }
   }
   describe(): EmbeddingDescriptor {
@@ -1175,11 +1733,12 @@ export class OpenAICompatibleEmbeddingProvider implements EmbeddingProvider {
       purpose
     } as const;
-    for (let attempt = 0; attempt <= this.maxRetries; attempt += 1) {
+    let attempt = 0;
+    while (true) {
       const startedAt = Date.now();
       this.observability.metrics.increment("retrieval_embedding_provider_requests_total", 1, labels);
       try {
-        return await this.embedBatchOnce(texts);
+        return await this.embedBatchOnce(texts, purpose);
       } catch (error) {
         const failure = this.toProviderFailure(error);
         this.observability.metrics.increment("retrieval_embedding_provider_failures_total", 1, {
@@ -1187,23 +1746,31 @@ export class OpenAICompatibleEmbeddingProvider implements EmbeddingProvider {
           reason: failure.reason
         });
-        const shouldRetry = failure.retryable && attempt < this.maxRetries;
+        const maxRetriesForFailure = this.maxRetriesForReason(failure.reason);
+        const shouldRetry = failure.retryable && attempt < maxRetriesForFailure;
         this.observability.logger.warn("embedding provider request failed", {
           provider: "openai_compatible",
           model: this.model,
           purpose,
           reason: failure.reason,
+          provider_message: failure.message,
           retryable: failure.retryable,
           retrying: shouldRetry,
           attempt: attempt + 1,
-          max_attempts: this.maxRetries + 1
+          max_attempts: maxRetriesForFailure + 1,
+          retry_after_ms: failure.retry_after_ms
         });
         if (shouldRetry) {
-          await sleep(this.retryDelayMs(attempt));
+          await sleep(this.retryDelayMs(attempt, failure));
+          attempt += 1;
           continue;
         }
+        if (failure.reason === "client_rate_limited" || failure.reason === "rate_limited") {
+          throw new RetrievalError("RATE_LIMITED", `embedding provider rate limited; ${failure.message}`);
+        }
         throw new RetrievalError(
           "UPSTREAM_FAILURE",
           `embedding provider request failed (${failure.reason}); ${failure.message}`
@@ -1212,11 +1779,50 @@ export class OpenAICompatibleEmbeddingProvider implements EmbeddingProvider {
         this.observability.metrics.observe("retrieval_embedding_provider_latency_ms", Date.now() - startedAt, labels);
       }
     }
+  }
+  private async enforceRequestLimit(purpose: EmbeddingPurpose): Promise<void> {
+    if (!this.requestLimiter) {
+      return;
+    }
+    const maxWaitMs = purpose === "index" ? this.indexMaxWaitMs : this.queryMaxWaitMs;
+    const labels = {
+      provider: "openai_compatible",
+      model: this.model,
+      purpose,
+      limiter_mode: this.requestLimiter.mode ?? "custom"
+    } as const;
-    throw new RetrievalError("UPSTREAM_FAILURE", "embedding provider retries exhausted");
+    try {
+      const acquired = await this.requestLimiter.acquire({
+        scope: this.requestLimitScope,
+        max_requests_per_minute: this.maxRequestsPerMinute,
+        max_wait_ms: maxWaitMs
+      });
+      this.observability.metrics.observe("retrieval_provider_limiter_wait_ms", acquired.wait_ms, labels);
+      this.observability.metrics.increment("retrieval_provider_requests_shaped_total", 1, labels);
+    } catch (error) {
+      this.observability.metrics.increment("retrieval_provider_limiter_block_total", 1, {
+        ...labels,
+        reason: "wait_timeout"
+      });
+      if (error instanceof ProviderRateLimitExceededError) {
+        const retryable = purpose === "index";
+        throw new EmbeddingProviderRequestError(
+          "client_rate_limited",
+          retryable,
+          `${error.message}; retry_after_ms=${error.retry_after_ms}`,
+          error.retry_after_ms
+        );
+      }
+      throw error;
+    }
   }
-  private async embedBatchOnce(texts: string[]): Promise<number[][]> {
+  private async embedBatchOnce(texts: string[], purpose: EmbeddingPurpose): Promise<number[][]> {
+    await this.enforceRequestLimit(purpose);
     const controller = new AbortController();
     const timeoutId = setTimeout(() => {
       controller.abort();
@@ -1253,13 +1859,28 @@ export class OpenAICompatibleEmbeddingProvider implements EmbeddingProvider {
     if (!response.ok) {
       const details = await safeResponseText(response);
       if (response.status === 429) {
-        throw new EmbeddingProviderRequestError("rate_limited", true, `HTTP 429 ${details}`.trim());
+        throw new EmbeddingProviderRequestError(
+          "rate_limited",
+          true,
+          `HTTP 429 ${details}`.trim(),
+          parseRetryAfterMs(response.headers.get("retry-after"))
+        );
       }
       if (response.status >= 500) {
         throw new EmbeddingProviderRequestError("http_5xx", true, `HTTP ${response.status} ${details}`.trim());
       }
-      if (response.status === 401 || response.status === 403) {
-        throw new EmbeddingProviderRequestError("auth_error", false, `HTTP ${response.status} ${details}`.trim());
+      if (response.status === 401) {
+        throw new EmbeddingProviderRequestError("auth_error", false, `HTTP 401 ${details}`.trim());
+      }
+      if (response.status === 403) {
+        const retryAfterMs = parseRetryAfterMs(response.headers.get("retry-after"));
+        const retryable = this.isTransientForbidden(details, retryAfterMs);
+        throw new EmbeddingProviderRequestError(
+          retryable ? "forbidden_transient" : "auth_error",
+          retryable,
+          `HTTP 403 ${details}`.trim(),
+          retryAfterMs
+        );
       }
       if (response.status === 404) {
         throw new EmbeddingProviderRequestError("endpoint_not_found", false, `HTTP 404 ${details}`.trim());
@@ -1328,25 +1949,951 @@ export class OpenAICompatibleEmbeddingProvider implements EmbeddingProvider {
     return vectors;
   }
-  private retryDelayMs(attempt: number): number {
-    const base = 100 * (attempt + 1);
-    const jitter = Math.floor(Math.random() * 75);
-    return base + jitter;
+  private maxRetriesForReason(reason: string): number {
+    if (reason === "forbidden_transient") {
+      return Math.max(this.maxRetries, this.transientForbiddenMaxRetries);
+    }
+    return this.maxRetries;
+  }
+  private retryDelayMs(attempt: number, failure: EmbeddingProviderRequestError): number {
+    const baseBackoffMs =
+      failure.reason === "forbidden_transient"
+        ? Math.min(2_500, 250 * 2 ** attempt)
+        : 100 * (attempt + 1);
+    const jitterMs =
+      failure.reason === "forbidden_transient" ? Math.floor(Math.random() * 150) : Math.floor(Math.random() * 75);
+    const computedDelayMs = baseBackoffMs + jitterMs;
+    if (failure.retry_after_ms === undefined) {
+      return computedDelayMs;
+    }
+    return Math.max(computedDelayMs, Math.max(1, failure.retry_after_ms));
   }
-  private toProviderFailure(error: unknown): EmbeddingProviderRequestError {
-    if (error instanceof EmbeddingProviderRequestError) {
-      return error;
+  private isTransientForbidden(details: string, retryAfterMs?: number): boolean {
+    if (retryAfterMs !== undefined) {
+      return true;
     }
-    if (error instanceof RetrievalError) {
-      return new EmbeddingProviderRequestError("upstream_failure", false, error.message);
+    const normalized = details.trim().toLowerCase();
+    if (normalized.length === 0) {
+      return false;
     }
-    if (error instanceof Error) {
-      return new EmbeddingProviderRequestError("unknown_error", false, error.message);
+    const transientSignals = [
+      "rate limit",
+      "too many requests",
+      "temporar",
+      "try again",
+      "upstream",
+      "timeout",
+      "busy",
+      "capacity",
+      "bad_response_status_code"
+    ];
+    if (transientSignals.some((signal) => normalized.includes(signal))) {
+      return true;
     }
-    return new EmbeddingProviderRequestError("unknown_error", false, String(error));
-  }
-}
+    const hardFailureSignals = [
+      "invalid api key",
+      "incorrect api key",
+      "authentication",
+      "unauthorized",
+      "insufficient permissions",
+      "insufficient scope",
+      "permission denied",
+      "organization not found",
+      "account disabled",
+      "insufficient quota",
+      "quota exceeded",
+      "billing",
+      "credit",
+      "payment required",
+      "model not found",
+      "unknown model",
+      "unsupported model",
+      "not allowed"
+    ];
+    if (hardFailureSignals.some((signal) => normalized.includes(signal))) {
+      return false;
+    }
+    return false;
+  }
+  private toProviderFailure(error: unknown): EmbeddingProviderRequestError {
+    if (error instanceof EmbeddingProviderRequestError) {
+      return error;
+    }
+    if (error instanceof ProviderRateLimitExceededError) {
+      return new EmbeddingProviderRequestError(
+        "client_rate_limited",
+        false,
+        `${error.message}; retry_after_ms=${error.retry_after_ms}`
+      );
+    }
+    if (error instanceof RetrievalError) {
+      if (error.code === "RATE_LIMITED") {
+        return new EmbeddingProviderRequestError("client_rate_limited", false, error.message);
+      }
+      return new EmbeddingProviderRequestError("upstream_failure", false, error.message);
+    }
+    if (error instanceof Error) {
+      return new EmbeddingProviderRequestError("unknown_error", false, error.message);
+    }
+    return new EmbeddingProviderRequestError("unknown_error", false, String(error));
+  }
+}
+export class OpenAICompatibleRerankerProvider implements RerankerProvider {
+  private readonly endpoint: string;
+  private readonly apiKey: string;
+  private readonly model: string;
+  private readonly timeoutMs: number;
+  private readonly requestLimiter?: ProviderRequestLimiter;
+  private readonly requestLimitScope: string;
+  private readonly maxRequestsPerMinute: number;
+  private readonly rerankMaxWaitMs: number;
+  private readonly observability: Observability;
+  constructor(options: OpenAICompatibleRerankerProviderOptions) {
+    const baseUrl = options.base_url.trim().replace(/\/+$/, "");
+    if (baseUrl.length === 0) {
+      throw new Error("invalid openai-compatible reranker config: base_url must be non-empty");
+    }
+    const apiKey = options.api_key.trim();
+    if (apiKey.length === 0) {
+      throw new Error("invalid openai-compatible reranker config: api_key must be non-empty");
+    }
+    this.endpoint = `${baseUrl}/rerank`;
+    this.apiKey = apiKey;
+    this.model = options.model?.trim() || DEFAULT_OPENAI_COMPATIBLE_RERANKER_MODEL;
+    this.timeoutMs = options.timeout_ms ?? DEFAULT_OPENAI_COMPATIBLE_RERANKER_TIMEOUT_MS;
+    this.requestLimiter = options.request_limiter;
+    this.requestLimitScope = resolveProviderLimiterScope({
+      provider: "openai_compatible",
+      apiKey,
+      overrideScopeId: options.request_limit_scope_id
+    });
+    this.maxRequestsPerMinute = options.max_requests_per_minute ?? DEFAULT_PROVIDER_MAX_REQUESTS_PER_MINUTE;
+    this.rerankMaxWaitMs = options.rerank_max_wait_ms ?? DEFAULT_PROVIDER_LIMIT_RERANK_MAX_WAIT_MS;
+    this.observability = options.observability ?? getObservability("retrieval-core");
+    if (!Number.isInteger(this.timeoutMs) || this.timeoutMs <= 0) {
+      throw new Error("invalid openai-compatible reranker config: timeout_ms must be a positive integer");
+    }
+    if (!Number.isInteger(this.maxRequestsPerMinute) || this.maxRequestsPerMinute <= 0) {
+      throw new Error("invalid openai-compatible reranker config: max_requests_per_minute must be a positive integer");
+    }
+    if (!Number.isInteger(this.rerankMaxWaitMs) || this.rerankMaxWaitMs < 0) {
+      throw new Error("invalid openai-compatible reranker config: rerank_max_wait_ms must be a non-negative integer");
+    }
+  }
+  describe(): RerankerDescriptor {
+    return {
+      provider: "openai_compatible",
+      model: this.model
+    };
+  }
+  async rerank(input: { query: string; documents: string[]; top_n: number }): Promise<RerankerResult[]> {
+    if (input.documents.length === 0) {
+      return [];
+    }
+    await this.enforceRequestLimit();
+    const topN = Math.max(1, Math.min(input.top_n, input.documents.length));
+    const controller = new AbortController();
+    const timeoutId = setTimeout(() => {
+      controller.abort();
+    }, this.timeoutMs);
+    let response: Response;
+    try {
+      response = await fetch(this.endpoint, {
+        method: "POST",
+        headers: {
+          authorization: `Bearer ${this.apiKey}`,
+          "content-type": "application/json"
+        },
+        body: JSON.stringify({
+          model: this.model,
+          query: input.query,
+          documents: input.documents,
+          top_n: topN
+        }),
+        signal: controller.signal
+      });
+    } catch (error) {
+      if (error && typeof error === "object" && "name" in error && (error as { name?: string }).name === "AbortError") {
+        throw new RerankerProviderRequestError("timeout", `request timed out after ${this.timeoutMs}ms`);
+      }
+      throw new RerankerProviderRequestError(
+        "network_error",
+        error instanceof Error ? error.message : String(error)
+      );
+    } finally {
+      clearTimeout(timeoutId);
+    }
+    if (!response.ok) {
+      const details = await safeResponseText(response);
+      if (response.status === 429) {
+        throw new RerankerProviderRequestError("rate_limited", `HTTP 429 ${details}`.trim());
+      }
+      if (response.status === 401 || response.status === 403) {
+        throw new RerankerProviderRequestError("auth_error", `HTTP ${response.status} ${details}`.trim());
+      }
+      if (response.status === 404) {
+        throw new RerankerProviderRequestError("endpoint_not_found", `HTTP 404 ${details}`.trim());
+      }
+      if (response.status >= 500) {
+        throw new RerankerProviderRequestError("http_5xx", `HTTP ${response.status} ${details}`.trim());
+      }
+      throw new RerankerProviderRequestError("http_4xx", `HTTP ${response.status} ${details}`.trim());
+    }
+    let payload: unknown;
+    try {
+      payload = await response.json();
+    } catch {
+      throw new RerankerProviderRequestError("invalid_json", "provider returned non-JSON response");
+    }
+    if (!payload || typeof payload !== "object") {
+      throw new RerankerProviderRequestError("invalid_response", "provider response must be an object");
+    }
+    const maybeResults = "results" in payload ? (payload as { results?: unknown }).results : (payload as { data?: unknown }).data;
+    if (!Array.isArray(maybeResults)) {
+      throw new RerankerProviderRequestError("invalid_response", "provider response missing results array");
+    }
+    const output: RerankerResult[] = [];
+    for (const row of maybeResults) {
+      if (!row || typeof row !== "object") {
+        throw new RerankerProviderRequestError("invalid_response", "rerank row must be an object");
+      }
+      const rawIndex = (row as { index?: unknown }).index;
+      if (!Number.isInteger(rawIndex)) {
+        throw new RerankerProviderRequestError("invalid_response", "rerank row index must be an integer");
+      }
+      const index = rawIndex as number;
+      if (index < 0 || index >= input.documents.length) {
+        throw new RerankerProviderRequestError("invalid_response", "rerank row index out of range");
+      }
+      const rawScore = (row as { relevance_score?: unknown; score?: unknown }).relevance_score ?? (row as { score?: unknown }).score;
+      if (typeof rawScore !== "number" || !Number.isFinite(rawScore)) {
+        throw new RerankerProviderRequestError("invalid_response", "rerank row score must be finite");
+      }
+      output.push({
+        index,
+        relevance_score: rawScore
+      });
+    }
+    const seen = new Set<number>();
+    const ordered = [...output]
+      .sort((a, b) => b.relevance_score - a.relevance_score || a.index - b.index)
+      .filter((row) => {
+        if (seen.has(row.index)) {
+          return false;
+        }
+        seen.add(row.index);
+        return true;
+      })
+      .slice(0, topN);
+    if (ordered.length === 0) {
+      throw new RerankerProviderRequestError("invalid_response", "provider returned zero rerank results");
+    }
+    return ordered;
+  }
+  private async enforceRequestLimit(): Promise<void> {
+    if (!this.requestLimiter) {
+      return;
+    }
+    const labels = {
+      provider: "openai_compatible",
+      model: this.model,
+      purpose: "rerank",
+      limiter_mode: this.requestLimiter.mode ?? "custom"
+    } as const;
+    try {
+      const acquired = await this.requestLimiter.acquire({
+        scope: this.requestLimitScope,
+        max_requests_per_minute: this.maxRequestsPerMinute,
+        max_wait_ms: this.rerankMaxWaitMs
+      });
+      this.observability.metrics.observe("retrieval_provider_limiter_wait_ms", acquired.wait_ms, labels);
+      this.observability.metrics.increment("retrieval_provider_requests_shaped_total", 1, labels);
+    } catch (error) {
+      this.observability.metrics.increment("retrieval_provider_limiter_block_total", 1, {
+        ...labels,
+        reason: "wait_timeout"
+      });
+      if (error instanceof ProviderRateLimitExceededError) {
+        throw new RerankerProviderRequestError(
+          "rate_limited",
+          `${error.message}; retry_after_ms=${error.retry_after_ms}`
+        );
+      }
+      throw error;
+    }
+  }
+}
+function buildClaudeEnhancerSystemInstruction(
+  language: EnhancerOutputLanguage,
+  style: ResolvedEnhancerPromptStyle
+): string {
+  const languageRule =
+    language === "zh"
+      ? "Output language must be Simplified Chinese."
+      : language === "es"
+        ? "Output language must be Spanish."
+        : "Output language must be English.";
+  const styleRule =
+    style === "lean"
+      ? "Style is lean: keep the response compact (roughly 90-180 words), avoid extra headings, and include only essential steps."
+      : style === "deep"
+        ? "Style is deep: provide comprehensive but grounded guidance (roughly 260-420 words) with concrete constraints, edge cases, and validation."
+        : "Style is standard: provide balanced depth (roughly 160-300 words) with clear scope, steps, and validation.";
+  return [
+    "You are a high-precision prompt enhancement agent for software engineering tasks.",
+    languageRule,
+    styleRule,
+    "Return plain text only: the final enhanced prompt.",
+    "Do not include markdown code fences.",
+    "Preserve user intent exactly; do not add unrelated features.",
+    "Do not invent file paths or symbols that are not present in provided context.",
+    "Produce concise execution-ready prompts, not long generic templates.",
+    "Prefer practical sections only: objective, scoped constraints, codebase anchors, implementation steps, validation.",
+    "Use concrete file/symbol anchors when context exists.",
+    "Avoid repeating generic process advice, broad deliverables lists, or organizational boilerplate."
+  ].join(" ");
+}
+function normalizeEnhancerContextPath(path: string): string {
+  return normalizePath(path).toLowerCase();
+}
+function looksLikeEnhancerConventionsFile(path: string): boolean {
+  const normalized = normalizeEnhancerContextPath(path);
+  return (
+    normalized === "agents.md" ||
+    normalized.endsWith("/agents.md") ||
+    normalized === "claude.md" ||
+    normalized.endsWith("/claude.md") ||
+    normalized === "readme.md" ||
+    normalized.endsWith("/readme.md") ||
+    normalized === "contributing.md" ||
+    normalized.endsWith("/contributing.md")
+  );
+}
+function extractProjectConventionsFromEnhancerContext(snippets: EnhancerContextSnippet[]): string[] {
+  const candidateSnippets = snippets.filter((snippet) => looksLikeEnhancerConventionsFile(snippet.path));
+  if (candidateSnippets.length === 0) {
+    return [];
+  }
+  const signalPattern =
+    /\b(always|never|must|should|avoid|prefer|preserve|keep|strict|isolation|tenant|workspace|contract|schema|backward|compatibility|regression|test|typecheck|bun)\b/i;
+  const out: string[] = [];
+  const seen = new Set<string>();
+  for (const snippet of candidateSnippets) {
+    const lines = snippet.snippet.split(/\r?\n/u);
+    for (const rawLine of lines) {
+      const cleaned = rawLine
+        .replace(/^\s*[-*+]\s+/u, "")
+        .replace(/^\s*\d+\.\s+/u, "")
+        .trim();
+      if (cleaned.length < 16 || cleaned.length > 180) {
+        continue;
+      }
+      if (!signalPattern.test(cleaned)) {
+        continue;
+      }
+      if (/^(import|export|const|let|var|if|for|while|return)\b/i.test(cleaned)) {
+        continue;
+      }
+      const normalized = cleaned.toLowerCase();
+      if (seen.has(normalized)) {
+        continue;
+      }
+      seen.add(normalized);
+      out.push(cleaned);
+      if (out.length >= 8) {
+        return out;
+      }
+    }
+  }
+  return out;
+}
+function extractEnhancerNonNegotiables(input: {
+  prompt: string;
+  history: EnhancePromptInput["conversation_history"];
+}): string[] {
+  const combined = `${input.prompt}\n${input.history.map((entry) => entry.content).join("\n")}`;
+  const lower = combined.toLowerCase();
+  const out: string[] = [];
+  const add = (value: string): void => {
+    if (!out.includes(value)) {
+      out.push(value);
+    }
+  };
+  if (
+    /keep (?:behavior|behaviour) stable|preserve (?:existing )?(?:behavior|behaviour)|backward.?compat|no breaking changes|without breaking/i.test(
+      lower
+    )
+  ) {
+    add("Preserve existing behavior and avoid breaking API/contract semantics.");
+  }
+  if (/regression tests?|add tests?|test coverage|boundary tests?/i.test(lower)) {
+    add("Include regression tests for any changed behavior.");
+  }
+  if (/tenant|workspace|authorization|auth boundaries?|scope enforcement|isolation/i.test(lower)) {
+    add("Maintain strict tenant/workspace isolation and authorization boundaries.");
+  }
+  if (/no docs|avoid docs|exclude docs/i.test(lower)) {
+    add("Do not prioritize documentation-only changes unless explicitly requested.");
+  }
+  if (/no refactor|minimal changes?|smallest safe change/i.test(lower)) {
+    add("Prefer the smallest safe change set.");
+  }
+  return out.slice(0, 6);
+}
+function buildEnhancerOutputContract(input: {
+  style: ResolvedEnhancerPromptStyle;
+  intent: EnhancerIntent;
+  query_intent: "symbol-heavy" | "impl-focused" | "conceptual";
+  has_context: boolean;
+}): {
+  target_style: string;
+  max_words: number;
+  preferred_sections: string[];
+  avoid_patterns: string[];
+} {
+  const isConceptual = input.query_intent === "conceptual";
+  if (input.style === "lean") {
+    if (input.intent === "tests") {
+      return {
+        target_style: "lean_test_plan",
+        max_words: input.has_context ? 220 : 170,
+        preferred_sections: ["Goal", "Key test cases", "Validation"],
+        avoid_patterns: ["long checklists", "broad architecture proposals", "generic deliverables blocks"]
+      };
+    }
+    if (input.intent === "docs" || isConceptual) {
+      return {
+        target_style: "lean_spec",
+        max_words: input.has_context ? 220 : 170,
+        preferred_sections: ["Goal", "Scope", "Validation"],
+        avoid_patterns: ["verbose outlines", "boilerplate context blocks", "generic process advice"]
+      };
+    }
+    return {
+      target_style: "lean_implementation_plan",
+      max_words: input.has_context ? 230 : 180,
+      preferred_sections: ["Goal", "Constraints", "Action steps", "Validation"],
+      avoid_patterns: ["deep background sections", "broad deliverables lists", "repeated boilerplate"]
+    };
+  }
+  if (input.style === "deep") {
+    if (input.intent === "tests") {
+      return {
+        target_style: "deep_test_plan",
+        max_words: input.has_context ? 420 : 340,
+        preferred_sections: ["Goal", "Behavior under test", "Test matrix", "Edge cases", "Validation"],
+        avoid_patterns: ["vague test advice", "non-test deliverables", "ungrounded file guesses"]
+      };
+    }
+    if (input.intent === "docs" || isConceptual) {
+      return {
+        target_style: "deep_spec",
+        max_words: input.has_context ? 420 : 340,
+        preferred_sections: ["Goal", "Scope", "Relevant sources", "Proposed outline", "Risks", "Validation"],
+        avoid_patterns: ["implementation-only checklists", "generic organizational boilerplate", "speculation"]
+      };
+    }
+    return {
+      target_style: "deep_implementation_plan",
+      max_words: input.has_context ? 420 : 360,
+      preferred_sections: [
+        "Goal",
+        "Scope and constraints",
+        "Codebase anchors",
+        "Implementation plan",
+        "Edge cases",
+        "Validation"
+      ],
+      avoid_patterns: ["security theater", "repeated compliance boilerplate", "invented file/symbol references"]
+    };
+  }
+  if (input.intent === "docs" || isConceptual) {
+    return {
+      target_style: "concise_spec",
+      max_words: input.has_context ? 320 : 260,
+      preferred_sections: ["Goal", "Scope", "Relevant sources", "Proposed outline", "Validation"],
+      avoid_patterns: ["long implementation checklists", "generic deliverables sections", "repeated boilerplate"]
+    };
+  }
+  if (input.intent === "tests") {
+    return {
+      target_style: "test_plan",
+      max_words: input.has_context ? 320 : 260,
+      preferred_sections: ["Goal", "Behavior under test", "Test matrix", "Implementation notes", "Validation"],
+      avoid_patterns: ["broad architecture rewrites", "non-test deliverables", "generic process bullets"]
+    };
+  }
+  return {
+    target_style: "implementation_plan",
+    max_words: input.has_context ? 360 : 300,
+    preferred_sections: ["Goal", "Scope and constraints", "Codebase anchors", "Implementation plan", "Validation"],
+    avoid_patterns: ["broad security theater", "repeated compliance boilerplate", "vague deliverables lists"]
+  };
+}
+function buildClaudeEnhancerUserPayload(input: EnhancerGenerationRequest): string {
+  const projectConventions = extractProjectConventionsFromEnhancerContext(input.context_snippets);
+  const outputContract = buildEnhancerOutputContract({
+    style: input.style_resolved,
+    intent: input.intent,
+    query_intent: input.query_intent,
+    has_context: input.context_refs.length > 0
+  });
+  const nonNegotiables = extractEnhancerNonNegotiables({
+    prompt: input.request.prompt,
+    history: input.request.conversation_history
+  });
+  const payload = {
+    trace_id: input.trace_id,
+    tenant_id: input.tenant_id,
+    workspace_id: input.workspace_id ?? "none",
+    tool_mode: input.tool_mode,
+    style_requested: input.style_requested,
+    style_resolved: input.style_resolved,
+    intent: input.intent,
+    query_intent: input.query_intent,
+    language: input.language,
+    original_prompt: input.request.prompt,
+    conversation_history: input.request.conversation_history,
+    context_refs: input.context_refs,
+    context_snippets: input.context_snippets.map((snippet) => ({
+      path: snippet.path,
+      start_line: snippet.start_line,
+      end_line: snippet.end_line,
+      reason: snippet.reason,
+      score: Number(snippet.score.toFixed(4)),
+      snippet: snippet.snippet
+    })),
+    output_contract: outputContract,
+    non_negotiables: nonNegotiables,
+    project_conventions: projectConventions
+  };
+  return [
+    "Enhance the following request into a concise, implementation-ready prompt.",
+    "Prioritize user intent fidelity, concrete repo anchors, and verifiable validation steps.",
+    "Honor the requested enhancement style while avoiding invented details.",
+    "Input JSON:",
+    JSON.stringify(payload, null, 2)
+  ].join("\n");
+}
+function removeEnhancerCodeFences(text: string): string {
+  return text.trim().replace(/^```(?:json|markdown|md)?\s*/iu, "").replace(/\s*```$/u, "").trim();
+}
+function normalizeProviderEnhancedPrompt(text: string): string {
+  let normalized = removeEnhancerCodeFences(text).replace(/\r\n/g, "\n");
+  normalized = normalized
+    .split("\n")
+    .map((line) => line.replace(/[ \t]+$/u, ""))
+    .join("\n")
+    .replace(/\n{3,}/g, "\n\n")
+    .trim();
+  if (!normalized) {
+    return normalized;
+  }
+  try {
+    const payload = JSON.parse(normalized) as { enhanced_prompt?: unknown };
+    if (payload && typeof payload === "object" && typeof payload.enhanced_prompt === "string") {
+      return payload.enhanced_prompt.trim();
+    }
+  } catch {
+    return normalized;
+  }
+  return normalized;
+}
+type ClaudeAgentSdkQueryFn = (input: {
+  prompt: string;
+  options?: Record<string, unknown>;
+}) => AsyncIterable<unknown>;
+let cachedClaudeAgentSdkQueryFn: ClaudeAgentSdkQueryFn | undefined;
+function isRecord(value: unknown): value is Record<string, unknown> {
+  return Boolean(value) && typeof value === "object" && !Array.isArray(value);
+}
+async function loadClaudeAgentSdkQueryFn(): Promise<ClaudeAgentSdkQueryFn> {
+  if (cachedClaudeAgentSdkQueryFn) {
+    return cachedClaudeAgentSdkQueryFn;
+  }
+  const moduleNames = ["@anthropic-ai/claude-agent-sdk", "@anthropic-ai/claude-code"];
+  let lastError: unknown;
+  for (const moduleName of moduleNames) {
+    try {
+      const sdkModule = (await import(moduleName)) as { query?: unknown };
+      if (typeof sdkModule.query === "function") {
+        cachedClaudeAgentSdkQueryFn = sdkModule.query as ClaudeAgentSdkQueryFn;
+        return cachedClaudeAgentSdkQueryFn;
+      }
+      lastError = new Error(`${moduleName} does not export query()`);
+    } catch (error) {
+      lastError = error;
+    }
+  }
+  const reason = lastError instanceof Error ? lastError.message : String(lastError ?? "unknown error");
+  throw new EnhancerProviderRequestError(
+    "upstream_error",
+    `claude agent sdk is not available; install @anthropic-ai/claude-agent-sdk (${reason})`
+  );
+}
+function extractTextFromClaudeMessageContent(content: unknown): string | undefined {
+  if (typeof content === "string") {
+    const trimmed = content.trim();
+    return trimmed.length > 0 ? trimmed : undefined;
+  }
+  if (!Array.isArray(content)) {
+    return undefined;
+  }
+  const parts: string[] = [];
+  for (const item of content) {
+    if (!isRecord(item)) {
+      continue;
+    }
+    const text = item.text;
+    if (typeof text !== "string") {
+      continue;
+    }
+    const trimmed = text.trim();
+    if (trimmed.length > 0) {
+      parts.push(trimmed);
+    }
+  }
+  if (parts.length === 0) {
+    return undefined;
+  }
+  return parts.join("\n");
+}
+function extractTextFromClaudeSdkMessage(message: unknown): string | undefined {
+  if (!isRecord(message)) {
+    return undefined;
+  }
+  if (typeof message.summary === "string") {
+    const trimmed = message.summary.trim();
+    if (trimmed.length > 0) {
+      return trimmed;
+    }
+  }
+  if (typeof message.result === "string") {
+    const trimmed = message.result.trim();
+    if (trimmed.length > 0) {
+      return trimmed;
+    }
+  }
+  if (typeof message.text === "string") {
+    const trimmed = message.text.trim();
+    if (trimmed.length > 0) {
+      return trimmed;
+    }
+  }
+  const directContent = extractTextFromClaudeMessageContent(message.content);
+  if (directContent) {
+    return directContent;
+  }
+  if (isRecord(message.message)) {
+    if (typeof message.message.text === "string") {
+      const trimmed = message.message.text.trim();
+      if (trimmed.length > 0) {
+        return trimmed;
+      }
+    }
+    const nestedContent = extractTextFromClaudeMessageContent(message.message.content);
+    if (nestedContent) {
+      return nestedContent;
+    }
+  }
+  return undefined;
+}
+function extractTextChunkFromClaudeSdkStreamEvent(message: unknown): string | undefined {
+  if (!isRecord(message) || message.type !== "stream_event") {
+    return undefined;
+  }
+  const event = message.event;
+  if (!isRecord(event)) {
+    return undefined;
+  }
+  if (event.type === "content_block_start") {
+    const contentBlock = event.content_block;
+    if (isRecord(contentBlock) && typeof contentBlock.text === "string") {
+      return contentBlock.text;
+    }
+  }
+  if (event.type === "content_block_delta") {
+    const delta = event.delta;
+    if (!isRecord(delta)) {
+      return undefined;
+    }
+    if (typeof delta.text === "string") {
+      return delta.text;
+    }
+  }
+  return undefined;
+}
+function extractStructuredOutputFromClaudeSdkMessage(message: unknown): EnhancerGenerationResult | undefined {
+  if (!isRecord(message)) {
+    return undefined;
+  }
+  const structuredOutput = message.structured_output;
+  if (!isRecord(structuredOutput)) {
+    return undefined;
+  }
+  const enhancedPrompt = structuredOutput.enhanced_prompt;
+  if (typeof enhancedPrompt !== "string" || enhancedPrompt.trim().length === 0) {
+    return undefined;
+  }
+  return {
+    enhanced_prompt: enhancedPrompt.trim()
+  };
+}
+function extractResultFailureFromClaudeSdkMessage(message: unknown): {
+  subtype: string;
+  errors: string[];
+} | undefined {
+  if (!isRecord(message) || message.type !== "result") {
+    return undefined;
+  }
+  const subtype = message.subtype;
+  if (typeof subtype !== "string" || subtype === "success") {
+    return undefined;
+  }
+  const rawErrors = Array.isArray(message.errors) ? message.errors : [];
+  const errors = rawErrors
+    .filter((entry): entry is string => typeof entry === "string")
+    .map((entry) => entry.trim())
+    .filter((entry) => entry.length > 0);
+  return {
+    subtype,
+    errors
+  };
+}
+function describeClaudeSdkMessage(message: unknown): string {
+  if (!isRecord(message)) {
+    return typeof message;
+  }
+  const type = typeof message.type === "string" ? message.type : "unknown";
+  const subtype = typeof message.subtype === "string" ? message.subtype : undefined;
+  return subtype ? `${type}:${subtype}` : type;
+}
+function classifyEnhancerProviderError(error: unknown): EnhancerProviderRequestError {
+  if (error instanceof EnhancerProviderRequestError) {
+    return error;
+  }
+  if (error instanceof Error) {
+    const message = error.message || "unknown enhancer provider error";
+    if (/(timeout|timed out|abort)/i.test(message)) {
+      return new EnhancerProviderRequestError("timeout", message);
+    }
+    if (/(rate.?limit|too many requests|429)/i.test(message)) {
+      return new EnhancerProviderRequestError("rate_limited", message);
+    }
+    if (/(no such file|not found|ENOENT)/i.test(message) && /claude/i.test(message)) {
+      return new EnhancerProviderRequestError("upstream_error", `claude code executable not found: ${message}`);
+    }
+    return new EnhancerProviderRequestError("upstream_error", message);
+  }
+  return new EnhancerProviderRequestError("upstream_error", String(error));
+}
+export class ClaudeAgentEnhancerProvider implements EnhancerGenerationProvider {
+  private readonly apiKey: string;
+  private readonly model: string;
+  private readonly maxTokens: number;
+  private readonly baseUrl?: string;
+  private readonly pathToClaudeCodeExecutable?: string;
+  private readonly permissionMode: ClaudeCodePermissionMode;
+  constructor(options: ClaudeAgentEnhancerProviderOptions) {
+    const apiKey = options.api_key.trim();
+    if (apiKey.length === 0) {
+      throw new Error("invalid claude enhancer config: api_key must be non-empty");
+    }
+    const model = options.model?.trim() ?? DEFAULT_CLAUDE_ENHANCER_MODEL;
+    if (model.length === 0) {
+      throw new Error("invalid claude enhancer config: model must be non-empty");
+    }
+    const maxTokens = options.max_tokens ?? 1_200;
+    if (!Number.isInteger(maxTokens) || maxTokens <= 0) {
+      throw new Error("invalid claude enhancer config: max_tokens must be a positive integer");
+    }
+    const permissionMode = options.permission_mode ?? "default";
+    if (
+      permissionMode !== "default" &&
+      permissionMode !== "acceptEdits" &&
+      permissionMode !== "bypassPermissions" &&
+      permissionMode !== "plan"
+    ) {
+      throw new Error("invalid claude enhancer config: permission_mode must be default|acceptEdits|bypassPermissions|plan");
+    }
+    this.apiKey = apiKey;
+    this.model = model;
+    this.maxTokens = maxTokens;
+    this.baseUrl = options.base_url?.trim();
+    const executablePath = options.path_to_claude_code_executable?.trim();
+    this.pathToClaudeCodeExecutable = executablePath && executablePath.length > 0 ? executablePath : undefined;
+    this.permissionMode = permissionMode;
+  }
+  describe(): EnhancerProviderDescriptor {
+    return {
+      provider: "claude_agent",
+      model: this.model
+    };
+  }
+  async generate(input: EnhancerGenerationRequest): Promise<EnhancerGenerationResult> {
+    const query = await loadClaudeAgentSdkQueryFn();
+    const prompt = buildClaudeEnhancerUserPayload(input);
+    const abortController = new AbortController();
+    const upstreamAbortSignal = input.abort_signal;
+    const upstreamAbortHandler = (): void => {
+      abortController.abort();
+    };
+    if (upstreamAbortSignal) {
+      if (upstreamAbortSignal.aborted) {
+        abortController.abort();
+      } else {
+        upstreamAbortSignal.addEventListener("abort", upstreamAbortHandler, { once: true });
+      }
+    }
+    const options: Record<string, unknown> = {
+      model: this.model,
+      maxThinkingTokens: this.maxTokens,
+      maxTurns: DEFAULT_CLAUDE_ENHANCER_MAX_TURNS,
+      includePartialMessages: true,
+      thinking: {
+        type: "disabled"
+      },
+      permissionMode: this.permissionMode,
+      systemPrompt: buildClaudeEnhancerSystemInstruction(input.language, input.style_resolved),
+      // Enhancer already receives scoped context snippets; keep Claude Code tools disabled to avoid long tool loops.
+      tools: [],
+      allowedTools: [],
+      env: {
+        ANTHROPIC_API_KEY: this.apiKey,
+        ...(this.baseUrl ? { ANTHROPIC_BASE_URL: this.baseUrl } : {})
+      },
+      abortController,
+      ...(this.pathToClaudeCodeExecutable ? { pathToClaudeCodeExecutable: this.pathToClaudeCodeExecutable } : {}),
+      ...(input.request.project_root_path ? { cwd: input.request.project_root_path } : {})
+    };
+    let structured: EnhancerGenerationResult | undefined;
+    let lastText: string | undefined;
+    const streamTextParts: string[] = [];
+    const seenMessageKinds = new Set<string>();
+    let maxTurnsFailure: { subtype: string; errors: string[] } | undefined;
+    try {
+      for await (const message of query({ prompt, options })) {
+        input.on_progress?.();
+        seenMessageKinds.add(describeClaudeSdkMessage(message));
+        const partialChunk = extractTextChunkFromClaudeSdkStreamEvent(message);
+        if (typeof partialChunk === "string" && partialChunk.length > 0) {
+          streamTextParts.push(partialChunk);
+        }
+        const resultFailure = extractResultFailureFromClaudeSdkMessage(message);
+        if (resultFailure) {
+          if (resultFailure.subtype === "error_max_turns") {
+            maxTurnsFailure = resultFailure;
+            continue;
+          }
+          const details = resultFailure.errors.length > 0 ? `: ${resultFailure.errors.join(" | ")}` : "";
+          throw new EnhancerProviderRequestError(
+            "upstream_error",
+            `claude agent sdk result error (${resultFailure.subtype})${details}`
+          );
+        }
+        const maybeStructured = extractStructuredOutputFromClaudeSdkMessage(message);
+        if (maybeStructured) {
+          structured = maybeStructured;
+        }
+        const maybeText = extractTextFromClaudeSdkMessage(message);
+        if (maybeText) {
+          lastText = maybeText;
+        }
+        if (isRecord(message) && message.type === "assistant" && typeof message.error === "string") {
+          throw new EnhancerProviderRequestError(
+            "upstream_error",
+            `claude agent sdk assistant error: ${message.error}`
+          );
+        }
+      }
+    } catch (error) {
+      throw classifyEnhancerProviderError(error);
+    } finally {
+      if (upstreamAbortSignal) {
+        upstreamAbortSignal.removeEventListener("abort", upstreamAbortHandler);
+      }
+    }
+    if (structured) {
+      return structured;
+    }
+    if (!lastText && streamTextParts.length > 0) {
+      lastText = streamTextParts.join("").trim();
+    }
+    if (maxTurnsFailure && !lastText) {
+      const details = maxTurnsFailure.errors.length > 0 ? `: ${maxTurnsFailure.errors.join(" | ")}` : "";
+      throw new EnhancerProviderRequestError(
+        "upstream_error",
+        `claude agent sdk hit max turns before returning output${details}`
+      );
+    }
+    if (!lastText) {
+      const seenKinds = [...seenMessageKinds].join(", ") || "none";
+      throw new EnhancerProviderRequestError(
+        "invalid_response",
+        `claude agent sdk returned no text output (messages=${seenKinds})`
+      );
+    }
+    return { enhanced_prompt: normalizeProviderEnhancedPrompt(lastText) };
+  }
+}
 async function safeResponseText(response: Response): Promise<string> {
   try {
@@ -1357,6 +2904,37 @@ async function safeResponseText(response: Response): Promise<string> {
   }
 }
+function parseRetryAfterMs(headerValue: string | null): number | undefined {
+  if (!headerValue) {
+    return undefined;
+  }
+  const trimmed = headerValue.trim();
+  if (!trimmed) {
+    return undefined;
+  }
+  const seconds = Number(trimmed);
+  if (Number.isFinite(seconds) && seconds >= 0) {
+    return Math.ceil(seconds * 1000);
+  }
+  const dateMs = Date.parse(trimmed);
+  if (!Number.isNaN(dateMs)) {
+    return Math.max(0, dateMs - Date.now());
+  }
+  return undefined;
+}
+function resolveProviderLimiterScope(input: {
+  provider: string;
+  apiKey: string;
+  overrideScopeId?: string;
+}): string {
+  const override = input.overrideScopeId?.trim();
+  if (override) {
+    return `provider:${input.provider}|credential:${override}`;
+  }
+  return `provider:${input.provider}|credential:${sha256(input.apiKey).slice(0, 16)}`;
+}
 function resolveEmbeddingDescriptor(provider: EmbeddingProvider): EmbeddingDescriptor {
   const described = provider.describe?.();
   if (!described) {
@@ -1373,23 +2951,120 @@ function resolveEmbeddingDescriptor(provider: EmbeddingProvider): EmbeddingDescr
   };
 }
-function normalizeEmbeddingDescriptor(descriptor: EmbeddingDescriptor): EmbeddingDescriptor {
-  const provider = descriptor.provider.trim();
-  if (provider.length === 0) {
-    throw new Error("invalid embedding descriptor: provider must be non-empty");
+function resolveRerankerDescriptor(provider: RerankerProvider): RerankerDescriptor {
+  const described = provider.describe?.();
+  if (!described) {
+    return {
+      provider: "custom"
+    };
+  }
+  return {
+    provider: described.provider,
+    ...(described.model ? { model: described.model } : {})
+  };
+}
+function resolveEnhancerProviderDescriptor(provider: EnhancerGenerationProvider): EnhancerProviderDescriptor {
+  const described = provider.describe?.();
+  if (!described) {
+    return {
+      provider: "custom"
+    };
+  }
+  return {
+    provider: described.provider,
+    ...(described.model ? { model: described.model } : {})
+  };
+}
+function normalizeEmbeddingDescriptor(descriptor: EmbeddingDescriptor): EmbeddingDescriptor {
+  const provider = descriptor.provider.trim();
+  if (provider.length === 0) {
+    throw new Error("invalid embedding descriptor: provider must be non-empty");
+  }
+  if (!Number.isInteger(descriptor.dimensions) || descriptor.dimensions <= 0) {
+    throw new Error("invalid embedding descriptor: dimensions must be a positive integer");
+  }
+  return {
+    provider: provider.toLowerCase(),
+    ...(descriptor.model ? { model: descriptor.model.trim() } : {}),
+    dimensions: descriptor.dimensions,
+    ...(descriptor.version ? { version: descriptor.version.trim() } : {})
+  };
+}
+function normalizeRerankerDescriptor(descriptor: RerankerDescriptor): RerankerDescriptor {
+  const provider = descriptor.provider.trim().toLowerCase();
+  if (provider.length === 0) {
+    throw new Error("invalid reranker descriptor: provider must be non-empty");
+  }
+  const model = descriptor.model?.trim();
+  return {
+    provider,
+    ...(model ? { model } : {})
+  };
+}
+function normalizeEnhancerProviderDescriptor(descriptor: EnhancerProviderDescriptor): EnhancerProviderDescriptor {
+  const provider = descriptor.provider.trim().toLowerCase();
+  if (provider.length === 0) {
+    throw new Error("invalid enhancer descriptor: provider must be non-empty");
+  }
+  const model = descriptor.model?.trim();
+  return {
+    provider,
+    ...(model ? { model } : {})
+  };
+}
+function buildRerankerDocument(candidate: SearchResultRow): string {
+  return `${candidate.path}\n${candidate.snippet}`;
+}
+function classifyRerankerFailureReason(error: unknown): "timeout" | "schema_error" | "rate_limited" | "upstream_error" {
+  if (error instanceof RerankerProviderRequestError) {
+    if (error.reason === "timeout") {
+      return "timeout";
+    }
+    if (error.reason === "rate_limited") {
+      return "rate_limited";
+    }
+    if (error.reason === "invalid_json" || error.reason === "invalid_response") {
+      return "schema_error";
+    }
+    return "upstream_error";
+  }
+  if (error instanceof Error) {
+    if (/(rate.?limit|too many requests|429)/i.test(error.message)) {
+      return "rate_limited";
+    }
+    if (/(timeout|timed out)/i.test(error.message)) {
+      return "timeout";
+    }
+    return "upstream_error";
+  }
+  return "upstream_error";
+}
+function classifyEnhancerGenerationFailureReason(
+  error: unknown
+): "timeout" | "schema_error" | "rate_limited" | "invalid_response" | "upstream_error" {
+  if (error instanceof EnhancerProviderRequestError) {
+    return error.reason;
   }
-  if (!Number.isInteger(descriptor.dimensions) || descriptor.dimensions <= 0) {
-    throw new Error("invalid embedding descriptor: dimensions must be a positive integer");
+  if (error instanceof Error) {
+    if (/(timeout|timed out)/i.test(error.message)) {
+      return "timeout";
+    }
+    if (/(rate.?limit|too many requests|429)/i.test(error.message)) {
+      return "rate_limited";
+    }
+    return "upstream_error";
   }
-  return {
-    provider: provider.toLowerCase(),
-    ...(descriptor.model ? { model: descriptor.model.trim() } : {}),
-    dimensions: descriptor.dimensions,
-    ...(descriptor.version ? { version: descriptor.version.trim() } : {})
-  };
+  return "upstream_error";
 }
-function classifyIntent(prompt: string): "bugfix" | "feature" | "refactor" | "docs" | "tests" | "unknown" {
+function classifyIntent(prompt: string): EnhancerIntent {
   const p = prompt.toLowerCase();
   if (/fix|bug|error|crash|regression/.test(p)) {
     return "bugfix";
@@ -1409,7 +3084,54 @@ function classifyIntent(prompt: string): "bugfix" | "feature" | "refactor" | "do
   return "unknown";
 }
-function detectDominantLanguage(prompt: string, history: EnhancePromptInput["conversation_history"]): "en" | "es" | "zh" {
+function resolveEnhancerPromptStyle(input: {
+  requested?: EnhancePromptStyle;
+  intent: EnhancerIntent;
+  query_intent: EnhancerQueryIntent;
+  prompt: string;
+  history: EnhancePromptInput["conversation_history"];
+  has_context: boolean;
+}): {
+  requested: EnhancePromptStyle;
+  resolved: ResolvedEnhancerPromptStyle;
+} {
+  const requested = input.requested ?? "standard";
+  if (requested !== "auto") {
+    return {
+      requested,
+      resolved: requested
+    };
+  }
+  const combined = `${input.prompt}\n${input.history.map((entry) => entry.content).join("\n")}`.trim();
+  const words = tokenize(combined);
+  const isShort = words.length <= 18 && input.history.length <= 1;
+  const asksConcise = /\b(concise|brief|short|minimal|quick)\b/i.test(combined);
+  const asksDepth = /\b(detailed|comprehensive|thorough|step-by-step|checklist)\b/i.test(combined);
+  const highRisk = /\b(security|auth|authorization|tenant|workspace|migration|data loss|rollback|incident|compliance|backward)\b/i.test(
+    combined
+  );
+  const complexityScore = Number(input.has_context) + Number(words.length >= 32) + Number(input.history.length >= 3);
+  if (asksConcise || (isShort && !highRisk && !asksDepth)) {
+    return {
+      requested,
+      resolved: "lean"
+    };
+  }
+  if (asksDepth || highRisk || complexityScore >= 2 || input.query_intent === "symbol-heavy" || input.intent === "tests") {
+    return {
+      requested,
+      resolved: "deep"
+    };
+  }
+  return {
+    requested,
+    resolved: "standard"
+  };
+}
+function detectDominantLanguage(prompt: string, history: EnhancePromptInput["conversation_history"]): EnhancerOutputLanguage {
   const latestUser = [...history].reverse().find((m) => m.role === "user")?.content ?? prompt;
   const sample = `${prompt}\n${latestUser}`.toLowerCase();
   if (/[\u3400-\u9fff]/.test(sample)) {
@@ -1917,7 +3639,7 @@ function buildEnhancerRetrievalQuery(
   };
 }
-const ENHANCER_LOW_CONFIDENCE_WARNING = "Low retrieval confidence; narrowed context refs and added clarification questions.";
+const ENHANCER_LOW_CONFIDENCE_WARNING = "Low retrieval confidence; narrowed context refs.";
 const ENHANCER_CONFIDENCE_OVERLAP_STOPWORDS = new Set([
   "a",
@@ -2203,7 +3925,7 @@ function hasStrongEnhancerAnchorMatch(input: {
   const topScore = top[0]?.score ?? 0;
   const runnerUpScore = top[1]?.score ?? Number.NEGATIVE_INFINITY;
   const strongScoreMargin = top.length === 1 || topScore - runnerUpScore >= 0.08;
-  const hasTopExactSymbolMatch = top.some((result) => result.reason === "exact symbol match");
+  const hasTopExactSymbolMatch = top.some((result) => isExactLiteralReason(result.reason));
   if (hasTopExactSymbolMatch && strongScoreMargin && topScore >= 0.55) {
     return true;
   }
@@ -2328,7 +4050,7 @@ function evaluateEnhancerConfidence(input: {
   if (diversityStrength < confidenceThreshold) {
     failedSignals.push("path_diversity");
   }
-  const strongSymbolOrPathSignal = top.some((result) => result.reason === "exact symbol match") && topOverlap >= 0.16;
+  const strongSymbolOrPathSignal = top.some((result) => isExactLiteralReason(result.reason)) && topOverlap >= 0.16;
   const lowConfidence = !strongSymbolOrPathSignal && confidenceScore + 0.01 < confidenceThreshold;
   return {
@@ -2355,7 +4077,7 @@ function rankEnhancerResultsForConfidence(input: {
   const anchorScore = (result: SearchContextOutput["results"][number]): number => {
     const normalizedPath = normalizePath(result.path).toLowerCase();
     const normalizedSnippet = result.snippet.toLowerCase();
-    let score = result.reason === "exact symbol match" ? 2 : 0;
+    let score = isExactLiteralReason(result.reason) ? 2 : 0;
     for (const anchor of anchors) {
       if (normalizedPath.includes(anchor)) {
         score += 2;
@@ -2415,7 +4137,11 @@ function rankEnhancerResultsForConfidence(input: {
   });
 }
-async function runWithTimeout<T>(input: { timeout_ms: number; fn: () => Promise<T> | T }): Promise<T> {
+async function runWithTimeout<T>(input: {
+  timeout_ms: number;
+  fn: () => Promise<T> | T;
+  on_timeout?: () => void;
+}): Promise<T> {
   return await new Promise<T>((resolve, reject) => {
     let settled = false;
     const timer = setTimeout(() => {
@@ -2423,6 +4149,7 @@ async function runWithTimeout<T>(input: { timeout_ms: number; fn: () => Promise<
         return;
       }
       settled = true;
+      input.on_timeout?.();
       reject(new Error(`timeout_after_${input.timeout_ms}ms`));
     }, input.timeout_ms);
@@ -2447,6 +4174,65 @@ async function runWithTimeout<T>(input: { timeout_ms: number; fn: () => Promise<
   });
 }
+async function runWithInactivityTimeout<T>(input: {
+  timeout_ms: number;
+  fn: (helpers: { touch: () => void; signal: AbortSignal }) => Promise<T> | T;
+}): Promise<T> {
+  return await new Promise<T>((resolve, reject) => {
+    let settled = false;
+    const abortController = new AbortController();
+    let timer: ReturnType<typeof setTimeout> | undefined;
+    const onTimeout = (): void => {
+      if (settled) {
+        return;
+      }
+      settled = true;
+      abortController.abort();
+      reject(new Error(`timeout_after_${input.timeout_ms}ms`));
+    };
+    const touch = (): void => {
+      if (settled) {
+        return;
+      }
+      if (timer) {
+        clearTimeout(timer);
+      }
+      timer = setTimeout(onTimeout, input.timeout_ms);
+    };
+    touch();
+    Promise.resolve()
+      .then(() =>
+        input.fn({
+          touch,
+          signal: abortController.signal
+        })
+      )
+      .then((value) => {
+        if (settled) {
+          return;
+        }
+        settled = true;
+        if (timer) {
+          clearTimeout(timer);
+        }
+        resolve(value);
+      })
+      .catch((error) => {
+        if (settled) {
+          return;
+        }
+        settled = true;
+        if (timer) {
+          clearTimeout(timer);
+        }
+        reject(error);
+      });
+  });
+}
 function deterministicEnhancerFallbackRanking(input: {
   results: SearchContextOutput["results"];
   intent: ReturnType<typeof classifyIntent>;
@@ -2462,46 +4248,6 @@ function deterministicEnhancerFallbackRanking(input: {
   return [...preferred, ...tolerated, ...avoided];
 }
-function localizeLowConfidenceQuestion(input: {
-  language: "en" | "es" | "zh";
-  kind: "scope" | "symbol" | "source_priority";
-  symbol?: string;
-}): string {
-  if (input.kind === "symbol") {
-    if (input.language === "es") {
-      return input.symbol
-        ? `¿Puedes confirmar si el cambio debe centrarse en el símbolo "${input.symbol}"?`
-        : "¿Qué función, clase o archivo exacto debe modificarse primero?";
-    }
-    if (input.language === "zh") {
-      return input.symbol
-        ? `请确认这次改动是否应优先围绕符号“${input.symbol}”展开？`
-        : "请明确首先要修改的函数、类或文件路径。";
-    }
-    return input.symbol
-      ? `Can you confirm whether "${input.symbol}" is the primary symbol to change?`
-      : "Which exact function, class, or file should be edited first?";
-  }
-  if (input.kind === "source_priority") {
-    if (input.language === "es") {
-      return "¿Debemos priorizar archivos de implementación en src/lib y dejar docs/tests/examples fuera de alcance?";
-    }
-    if (input.language === "zh") {
-      return "是否应优先修改 src/lib 下的实现代码，并排除 docs/tests/examples？";
-    }
-    return "Should we prioritize runtime implementation files (src/lib) and exclude docs/tests/examples from scope?";
-  }
-  if (input.language === "es") {
-    return "¿Cuál es el alcance mínimo y el comportamiento que no debe cambiar?";
-  }
-  if (input.language === "zh") {
-    return "这次改动的最小范围是什么？哪些行为必须保持不变？";
-  }
-  return "What is the minimal scope, and which behavior must remain unchanged?";
-}
 function trimToContextBudget(results: SearchContextOutput["results"]): SearchContextOutput["results"] {
   let total = 0;
   const out: SearchContextOutput["results"] = [];
@@ -2516,7 +4262,7 @@ function trimToContextBudget(results: SearchContextOutput["results"]): SearchCon
 }
 function formatEnhancedPrompt(input: {
-  intent: ReturnType<typeof classifyIntent>;
+  style: ResolvedEnhancerPromptStyle;
   language: "en" | "es" | "zh";
   original_prompt: string;
   refs: ContextRef[];
@@ -2530,62 +4276,175 @@ function formatEnhancedPrompt(input: {
     input.refs.length > 0 ? input.refs.map((r) => `- ${r.path}:${r.start_line}`).join("\n") : emptyRefsByLanguage[input.language];
   if (input.language === "zh") {
+    if (input.style === "lean") {
+      return [
+        "目标",
+        input.original_prompt,
+        "",
+        "约束",
+        "- 保持现有行为与合约兼容。",
+        "- 优先最小且安全的改动。",
+        "",
+        "行动步骤",
+        "- 先确认当前行为与目标范围。",
+        "- 在必要位置完成最小实现并补充回归测试。",
+        "",
+        "验证",
+        "- 运行相关测试并确认无回归。"
+      ].join("\n");
+    }
+    if (input.style === "deep") {
+      return [
+        "目标",
+        input.original_prompt,
+        "",
+        "范围与约束",
+        "- 保持现有行为与 API/合约语义稳定。",
+        "- 仅在必要边界内调整实现，避免扩散改动。",
+        "- 发现风险路径时优先失败安全（deny-by-default）。",
+        "",
+        "代码锚点",
+        likelyFiles,
+        "",
+        "实施步骤",
+        "- 基线确认：先验证当前行为与关键路径。",
+        "- 变更实现：对关键分支做最小、安全、可回退的改动。",
+        "- 回归测试：覆盖正向、跨边界、异常与空输入场景。",
+        "",
+        "边界情况",
+        "- 缺失上下文、无索引或空结果时，保持行为可解释且可回退。",
+        "- 异步/并发路径中避免上下文泄漏与跨租户访问。",
+        "",
+        "验证",
+        "- 运行 typecheck 与目标测试集；确认关键路径稳定无回归。"
+      ].join("\n");
+    }
     return [
       "目标",
       input.original_prompt,
       "",
-      "当前状态",
-      `- 识别意图: ${input.intent}`,
-      "",
       "约束",
       "- 保持 v1 合约兼容和严格校验。",
       "",
-      "可能涉及的文件",
+      "代码锚点",
       likelyFiles,
       "",
       "实现清单",
       "- 在改动前确认请求/响应合约。",
       "- 最小化改动并保持 tenant/workspace 隔离。",
       "",
-      "边界情况",
-      "- Workspace 没有可用索引。",
-      "- 搜索过滤后结果为空。",
-      "",
       "验证与测试",
       "- 运行 typecheck 和合约/工具测试。",
-      "",
-      "完成定义",
-      "- 测试通过且行为符合 v1 规范。"
     ].join("\n");
   }
   if (input.language === "es") {
+    if (input.style === "lean") {
+      return [
+        "Objetivo",
+        input.original_prompt,
+        "",
+        "Restricciones",
+        "- Mantener compatibilidad de comportamiento y contratos.",
+        "- Priorizar cambios mínimos y seguros.",
+        "",
+        "Pasos",
+        "- Confirmar alcance y comportamiento actual antes de editar.",
+        "- Implementar el cambio mínimo necesario y añadir regresiones.",
+        "",
+        "Validación",
+        "- Ejecutar pruebas relevantes y confirmar que no hay regresiones."
+      ].join("\n");
+    }
+    if (input.style === "deep") {
+      return [
+        "Objetivo",
+        input.original_prompt,
+        "",
+        "Alcance y restricciones",
+        "- Preservar comportamiento existente y contratos/API vigentes.",
+        "- Limitar cambios al alcance mínimo necesario.",
+        "- Aplicar defaults de seguridad (deny-by-default) cuando aplique.",
+        "",
+        "Anclas del código",
+        likelyFiles,
+        "",
+        "Plan de implementación",
+        "- Establecer línea base del comportamiento actual.",
+        "- Aplicar cambios mínimos y reversibles en rutas críticas.",
+        "- Añadir pruebas de regresión para casos positivos, negativos y límites.",
+        "",
+        "Casos límite",
+        "- Contexto faltante o resultados vacíos no deben romper el flujo.",
+        "- Evitar fuga de contexto entre tenants/workspaces.",
+        "",
+        "Validación",
+        "- Ejecutar typecheck y pruebas objetivo; confirmar estabilidad."
+      ].join("\n");
+    }
     return [
       "Objetivo",
       input.original_prompt,
       "",
-      "Estado actual",
-      `- Intención clasificada: ${input.intent}`,
-      "",
       "Restricciones",
       "- Mantener compatibilidad con contratos v1 y validación estricta.",
       "",
-      "Archivos probables a editar",
+      "Anclas del código",
       likelyFiles,
       "",
       "Checklist de implementación",
       "- Confirmar entradas/salidas del contrato antes de modificar lógica.",
       "- Aplicar cambios mínimos y mantener aislamiento por tenant/workspace.",
       "",
-      "Casos límite",
-      "- Workspace sin índice listo.",
-      "- Filtros de búsqueda que no devuelven resultados.",
-      "",
       "Validación y pruebas",
-      "- Ejecutar typecheck y pruebas de contratos/herramientas.",
+      "- Ejecutar typecheck y pruebas de contratos/herramientas."
+    ].join("\n");
+  }
+  if (input.style === "lean") {
+    const anchors = input.refs.length > 0 ? `- Anchors: ${input.refs.slice(0, 2).map((ref) => `${ref.path}:${ref.start_line}`).join(", ")}` : "";
+    return [
+      "Goal",
+      input.original_prompt,
+      "",
+      "Constraints",
+      "- Preserve existing behavior and contract compatibility.",
+      "- Keep changes minimal and safe.",
+      ...(anchors ? ["", anchors] : []),
+      "",
+      "Action steps",
+      "- Confirm current behavior and target scope.",
+      "- Implement the smallest safe change and add regression coverage.",
+      "",
+      "Validation",
+      "- Run relevant tests and confirm no regressions."
+    ].join("\n");
+  }
+  if (input.style === "deep") {
+    return [
+      "Goal",
+      input.original_prompt,
+      "",
+      "Scope and constraints",
+      "- Preserve current behavior and API/contract semantics.",
+      "- Limit changes to the required scope and keep them reversible.",
+      "- Prefer fail-secure defaults where policy boundaries are involved.",
+      "",
+      "Codebase anchors",
+      likelyFiles,
+      "",
+      "Implementation plan",
+      "- Establish baseline behavior and invariants before edits.",
+      "- Apply minimal, safe changes on critical paths only.",
+      "- Add regression coverage for positive, negative, and boundary scenarios.",
       "",
-      "Definición de terminado",
-      "- Los tests pasan y el comportamiento coincide con el spec."
+      "Edge cases",
+      "- Missing context, empty retrieval results, and async boundary leakage.",
+      "- Cross-tenant/workspace access paths and authorization bypass attempts.",
+      "",
+      "Validation",
+      "- Run typecheck and focused test suites; verify no behavioral regressions."
     ].join("\n");
   }
@@ -2593,28 +4452,18 @@ function formatEnhancedPrompt(input: {
     "Goal",
     input.original_prompt,
     "",
-    "Current state",
-    `- Classified intent: ${input.intent}`,
-    "",
     "Constraints",
     "- Keep v1 contract compatibility and strict schema validation.",
     "",
-    "Likely files to edit",
+    "Codebase anchors",
     likelyFiles,
     "",
-    "Implementation checklist",
+    "Implementation plan",
     "- Confirm request/response contract assumptions before code edits.",
     "- Apply smallest safe changes while preserving tenant/workspace isolation.",
     "",
-    "Edge cases",
-    "- Workspace has no ready index.",
-    "- Search filters produce empty result sets.",
-    "",
     "Validation and tests",
-    "- Run typecheck and contract/tool tests.",
-    "",
-    "Definition of done",
-    "- Tests pass and behavior matches the v1 spec."
+    "- Run typecheck and contract/tool tests."
   ].join("\n");
 }
@@ -2992,13 +4841,21 @@ function compileGlob(glob: string): RegExp {
 export class RetrievalCore {
   private readonly cacheTtlSeconds: number;
+  private readonly internalCandidateDepth: number;
   private readonly embeddingProvider: EmbeddingProvider;
   private readonly embeddingDescriptor: EmbeddingDescriptor;
+  private readonly rerankerProvider?: RerankerProvider;
+  private readonly rerankerDescriptor?: RerankerDescriptor;
+  private readonly rerankerTopN: number;
+  private readonly rerankerCacheVariant: string;
   private readonly observability: Observability;
   private readonly scoringConfig: RetrievalScoringConfig;
   private readonly scoringProfileId: string;
   private readonly scoringConfigChecksum: string;
+  private readonly enhancerProvider?: EnhancerGenerationProvider;
+  private readonly enhancerProviderDescriptor?: EnhancerProviderDescriptor;
   private readonly enhancerConfig: RetrievalEnhancerConfig;
+  private readonly enhancerGenerationConfig: RetrievalEnhancerGenerationConfig;
   private readonly chunkingConfig: RetrievalChunkingConfig;
   private readonly enhancerDecisionTraceEnabled: boolean;
   private cacheHits = 0;
@@ -3010,16 +4867,36 @@ export class RetrievalCore {
     options?: RetrievalCoreOptions
   ) {
     this.cacheTtlSeconds = options?.cacheTtlSeconds ?? 60;
+    this.internalCandidateDepth = clampInternalCandidateDepth(options?.internalCandidateDepth);
     this.embeddingProvider = options?.embeddingProvider ?? new DeterministicEmbeddingProvider();
     this.embeddingDescriptor = normalizeEmbeddingDescriptor(
       options?.embeddingDescriptor ?? resolveEmbeddingDescriptor(this.embeddingProvider)
     );
+    this.rerankerProvider = options?.rerankerProvider;
+    this.rerankerTopN = options?.rerankerTopN ?? DEFAULT_SEARCH_RERANKER_TOP_N;
+    if (!Number.isInteger(this.rerankerTopN) || this.rerankerTopN <= 0) {
+      throw new Error("invalid retrieval reranker config: rerankerTopN must be a positive integer");
+    }
+    this.rerankerDescriptor = this.rerankerProvider
+      ? normalizeRerankerDescriptor(resolveRerankerDescriptor(this.rerankerProvider))
+      : undefined;
+    this.rerankerCacheVariant = this.rerankerDescriptor
+      ? `provider:${this.rerankerDescriptor.provider}|model:${this.rerankerDescriptor.model ?? "unknown"}|top_n:${this.rerankerTopN}`
+      : "provider:disabled";
     this.observability = options?.observability ?? getObservability("retrieval-core");
     const baseProfile = resolveRetrievalScoringProfile(options?.scoringProfile);
     this.scoringConfig = mergeRetrievalScoringConfig(baseProfile.config, options?.scoringConfig);
     this.scoringProfileId = options?.scoringProfileId ?? baseProfile.profile_id;
     this.scoringConfigChecksum = scoringConfigChecksum(this.scoringConfig);
+    this.enhancerProvider = options?.enhancerProvider;
+    this.enhancerProviderDescriptor = this.enhancerProvider
+      ? normalizeEnhancerProviderDescriptor(resolveEnhancerProviderDescriptor(this.enhancerProvider))
+      : undefined;
     this.enhancerConfig = mergeRetrievalEnhancerConfig(DEFAULT_RETRIEVAL_ENHANCER_CONFIG, options?.enhancerConfig);
+    this.enhancerGenerationConfig = mergeRetrievalEnhancerGenerationConfig(
+      DEFAULT_RETRIEVAL_ENHANCER_GENERATION_CONFIG,
+      options?.enhancerGenerationConfig
+    );
     this.chunkingConfig = mergeRetrievalChunkingConfig(DEFAULT_RETRIEVAL_CHUNKING_CONFIG, options?.chunkingConfig);
     this.enhancerDecisionTraceEnabled = Boolean(options?.enhancerDecisionTraceEnabled);
   }
@@ -3736,6 +5613,106 @@ export class RetrievalCore {
     };
   }
+  private async applyLearnedReranker(input: {
+    trace_id: string;
+    query: string;
+    candidates: SearchResultRow[];
+  }): Promise<SearchResultRow[]> {
+    if (!this.rerankerProvider || !this.rerankerDescriptor) {
+      return input.candidates;
+    }
+    const cappedTopN = Math.min(this.rerankerTopN, input.candidates.length);
+    if (cappedTopN <= 1) {
+      return input.candidates;
+    }
+    const head = input.candidates.slice(0, cappedTopN);
+    const tail = input.candidates.slice(cappedTopN);
+    const labels = {
+      provider: this.rerankerDescriptor.provider,
+      model: this.rerankerDescriptor.model ?? "unknown"
+    } as const;
+    this.observability.metrics.increment("retrieval_reranker_requests_total", 1, labels);
+    const startedAt = Date.now();
+    try {
+      const reranked = await this.rerankerProvider.rerank({
+        query: input.query,
+        documents: head.map((candidate) => buildRerankerDocument(candidate)),
+        top_n: cappedTopN
+      });
+      if (!Array.isArray(reranked) || reranked.length === 0) {
+        throw new RerankerProviderRequestError("invalid_response", "reranker response must contain at least one result");
+      }
+      const seen = new Set<number>();
+      const reordered: SearchResultRow[] = [];
+      for (const row of reranked) {
+        if (!Number.isInteger(row.index)) {
+          throw new RerankerProviderRequestError("invalid_response", "reranker result index must be an integer");
+        }
+        if (row.index < 0 || row.index >= head.length) {
+          throw new RerankerProviderRequestError("invalid_response", "reranker result index out of range");
+        }
+        if (seen.has(row.index)) {
+          continue;
+        }
+        const candidate = head[row.index];
+        if (!candidate) {
+          continue;
+        }
+        seen.add(row.index);
+        reordered.push(candidate);
+      }
+      for (let index = 0; index < head.length; index += 1) {
+        if (seen.has(index)) {
+          continue;
+        }
+        const candidate = head[index];
+        if (candidate) {
+          reordered.push(candidate);
+        }
+      }
+      if (reordered.length === 0) {
+        throw new RerankerProviderRequestError("invalid_response", "reranker did not return usable indexes");
+      }
+      const maxTailScore = tail[0]?.score ?? Number.NEGATIVE_INFINITY;
+      const maxHeadScore = head[0]?.score ?? 0;
+      const scoreAnchor = Math.max(maxHeadScore, maxTailScore) + 1;
+      const scoreStep = 1e-6;
+      const adjusted = reordered.map((candidate, index) => ({
+        ...candidate,
+        score: scoreAnchor - index * scoreStep
+      }));
+      return [...adjusted, ...tail];
+    } catch (error) {
+      const reason = classifyRerankerFailureReason(error);
+      this.observability.metrics.increment("retrieval_reranker_failures_total", 1, {
+        ...labels,
+        reason
+      });
+      this.observability.metrics.increment("retrieval_reranker_fallback_total", 1, {
+        reason
+      });
+      this.observability.logger.warn("search_context reranker fallback applied", {
+        trace_id: input.trace_id,
+        provider: labels.provider,
+        model: labels.model,
+        reason,
+        top_n: cappedTopN,
+        error_message: error instanceof Error ? error.message : String(error)
+      });
+      return input.candidates;
+    } finally {
+      this.observability.metrics.observe("retrieval_reranker_latency_ms", Date.now() - startedAt, labels);
+    }
+  }
   async searchContext(input: {
     trace_id: string;
     tenant_id: string;
@@ -3757,9 +5734,9 @@ export class RetrievalCore {
       index_id: index.index_id
     });
-    const topK = Math.min(input.request.top_k ?? 8, MAX_TOP_K);
-    const candidatePoolTopK = Math.min(MAX_TOP_K, Math.max(topK * 4, 12));
     const query = normalizeQuery(input.request.query);
+    const topK = Math.min(input.request.top_k ?? 8, MAX_TOP_K);
+    const candidatePoolTopK = Math.max(Math.max(topK * 4, 12), this.internalCandidateDepth);
     if (!indexMetadata) {
       this.observability.metrics.increment("retrieval_embedding_metadata_mismatch_total", 1, {
@@ -3831,13 +5808,22 @@ export class RetrievalCore {
       );
     }
     const queryTokens = tokenize(query);
+    const searchLiterals = extractSearchLiterals(query);
+    this.observability.metrics.observe("retrieval_candidate_depth_requested", topK, {
+      retrieval_profile_id: this.scoringProfileId
+    });
+    this.observability.metrics.observe("retrieval_candidate_depth_effective", candidatePoolTopK, {
+      retrieval_profile_id: this.scoringProfileId
+    });
     const cacheKey = buildQueryCacheKey({
       workspace_id: input.workspace_id,
       index_version: index.index_version,
       query,
       top_k: topK,
-      filters: input.request.filters
+      filters: input.request.filters,
+      retrieval_variant: this.rerankerCacheVariant
     });
     const cached = await this.cache.get(cacheKey);
@@ -3859,6 +5845,8 @@ export class RetrievalCore {
         workspace_id: input.workspace_id
       },
       async () => {
+        let literalPathMatchCount = 0;
+        let literalSnippetMatchCount = 0;
         let ranked: RankedChunkCandidate[] | undefined;
         if (this.store.rankChunksByIndex) {
           ranked = await this.store.rankChunksByIndex({
@@ -3879,11 +5867,21 @@ export class RetrievalCore {
                 .map((candidate) => {
                   let score = candidate.score;
                   score += pathQualityBias(candidate.path, queryTokens, this.scoringConfig, query);
+                  const literalBoost = applyLiteralBoost({
+                    path: candidate.path,
+                    snippet: candidate.snippet,
+                    literals: searchLiterals,
+                    path_bias: this.scoringConfig.path_bias
+                  });
+                  score += literalBoost.boost;
+                  literalPathMatchCount += literalBoost.path_matches;
+                  literalSnippetMatchCount += literalBoost.snippet_matches;
                   if (looksLowInformation(candidate.snippet)) {
                     score -= this.scoringConfig.rerank.low_information_penalty;
                   }
                   const reason = chooseReason({
                     lexical: candidate.lexical_score,
+                    literal_match: literalBoost.matched,
                     path_match: candidate.path_match,
                     recency_boosted: candidate.recency_boosted
                   });
@@ -3924,11 +5922,25 @@ export class RetrievalCore {
                     score -= candidateWeights.generated_penalty;
                   }
                   score += pathQualityBias(chunk.path, queryTokens, this.scoringConfig, query);
+                  const literalBoost = applyLiteralBoost({
+                    path: chunk.path,
+                    snippet: chunk.snippet,
+                    literals: searchLiterals,
+                    path_bias: this.scoringConfig.path_bias
+                  });
+                  score += literalBoost.boost;
+                  literalPathMatchCount += literalBoost.path_matches;
+                  literalSnippetMatchCount += literalBoost.snippet_matches;
                   if (looksLowInformation(chunk.snippet)) {
                     score -= this.scoringConfig.rerank.low_information_penalty;
                   }
-                  const reason = chooseReason({ lexical: l, path_match: pathMatch, recency_boosted: recencyBoost });
+                  const reason = chooseReason({
+                    lexical: l,
+                    literal_match: literalBoost.matched,
+                    path_match: pathMatch,
+                    recency_boosted: recencyBoost
+                  });
                   return {
                     path: chunk.path,
@@ -3946,10 +5958,36 @@ export class RetrievalCore {
           channel: "hybrid",
           retrieval_profile_id: this.scoringProfileId
         });
+        this.observability.metrics.observe("retrieval_candidates_pre_rerank_count", output.length, {
+          retrieval_profile_id: this.scoringProfileId
+        });
+        if (literalPathMatchCount > 0) {
+          this.observability.metrics.increment("retrieval_literal_boost_applied_total", literalPathMatchCount, {
+            retrieval_profile_id: this.scoringProfileId,
+            channel: "path"
+          });
+        }
+        if (literalSnippetMatchCount > 0) {
+          this.observability.metrics.increment("retrieval_literal_boost_applied_total", literalSnippetMatchCount, {
+            retrieval_profile_id: this.scoringProfileId,
+            channel: "snippet"
+          });
+        }
         return output;
       }
     );
+    const rerankedCandidates = await this.observability.tracing.withSpan(
+      "retrieval.learned_rerank",
+      { trace_id: input.trace_id },
+      async () =>
+        this.applyLearnedReranker({
+          trace_id: input.trace_id,
+          query,
+          candidates
+        })
+    );
     const deduped = await this.observability.tracing.withSpan("retrieval.rerank", { trace_id: input.trace_id }, async () => {
       const output: SearchContextOutput["results"] = [];
       const seen = new Set<string>();
@@ -3960,7 +5998,7 @@ export class RetrievalCore {
         ? this.scoringConfig.rerank.max_chunks_per_path_file_lookup
         : this.scoringConfig.rerank.max_chunks_per_path_default;
-      const available = [...candidates];
+      const available = [...rerankedCandidates];
       while (output.length < topK && available.length > 0) {
         let bestIndex = -1;
         let bestAdjustedScore = Number.NEGATIVE_INFINITY;
@@ -4028,6 +6066,41 @@ export class RetrievalCore {
       return output;
     });
+    const candidateRankByKey = new Map<string, number>();
+    for (let index = 0; index < rerankedCandidates.length; index += 1) {
+      const candidate = rerankedCandidates[index];
+      if (!candidate) {
+        continue;
+      }
+      const key = `${candidate.path}:${candidate.start_line}:${candidate.end_line}`;
+      if (!candidateRankByKey.has(key)) {
+        candidateRankByKey.set(key, index + 1);
+      }
+    }
+    let literalMatchesInTopK = 0;
+    for (let postRank = 0; postRank < deduped.length; postRank += 1) {
+      const row = deduped[postRank];
+      if (!row) {
+        continue;
+      }
+      if (isExactLiteralReason(row.reason)) {
+        literalMatchesInTopK += 1;
+      }
+      this.observability.metrics.increment("retrieval_reason_topk_total", 1, {
+        retrieval_profile_id: this.scoringProfileId,
+        reason: row.reason
+      });
+      const key = `${row.path}:${row.start_line}:${row.end_line}`;
+      const preRank = candidateRankByKey.get(key) ?? postRank + 1;
+      this.observability.metrics.observe("retrieval_rank_shift_delta", preRank - (postRank + 1), {
+        retrieval_profile_id: this.scoringProfileId
+      });
+    }
+    this.observability.metrics.observe("retrieval_literal_matches_topk", literalMatchesInTopK, {
+      retrieval_profile_id: this.scoringProfileId
+    });
     const output: SearchContextOutput = {
       trace_id: input.trace_id,
       results: deduped,
@@ -4055,6 +6128,127 @@ export class RetrievalCore {
     return output;
   }
+  private enhancerProviderLabels(): Record<string, string> {
+    return {
+      provider: this.enhancerProviderDescriptor?.provider ?? "template",
+      model: this.enhancerProviderDescriptor?.model ?? "n/a",
+      tool_mode: this.enhancerGenerationConfig.tool_mode
+    };
+  }
+  private buildEnhancerContextSnippets(results: SearchContextOutput["results"]): EnhancerContextSnippet[] {
+    const maxSnippets = this.enhancerGenerationConfig.max_context_snippets;
+    const snippets: EnhancerContextSnippet[] = [];
+    for (const result of results.slice(0, maxSnippets)) {
+      snippets.push({
+        path: result.path,
+        start_line: result.start_line,
+        end_line: result.end_line,
+        reason: result.reason,
+        snippet: result.snippet.slice(0, 1_600),
+        score: result.score
+      });
+    }
+    return snippets;
+  }
+  private async generateEnhancedPrompt(input: {
+    trace_id: string;
+    tenant_id: string;
+    workspace_id?: string;
+    request: EnhancePromptInput;
+    style_requested: EnhancePromptStyle;
+    style_resolved: ResolvedEnhancerPromptStyle;
+    intent: EnhancerIntent;
+    query_intent: "symbol-heavy" | "impl-focused" | "conceptual";
+    language: EnhancerOutputLanguage;
+    context_refs: ContextRef[];
+    context_snippets: EnhancerContextSnippet[];
+    warnings: string[];
+    questions: string[];
+  }): Promise<string> {
+    if (!this.enhancerProvider) {
+      return formatEnhancedPrompt({
+        style: input.style_resolved,
+        language: input.language,
+        original_prompt: input.request.prompt,
+        refs: input.context_refs
+      });
+    }
+    const maxAttempts = this.enhancerGenerationConfig.max_retries + 1;
+    let lastFailure: EnhancerProviderRequestError | undefined;
+    for (let attempt = 1; attempt <= maxAttempts; attempt += 1) {
+      const startedAt = Date.now();
+      this.observability.metrics.increment("enhancer_provider_requests_total", 1, this.enhancerProviderLabels());
+      try {
+        const generated = await runWithInactivityTimeout({
+          timeout_ms: this.enhancerGenerationConfig.timeout_ms,
+          fn: ({ touch, signal }) =>
+            this.enhancerProvider!.generate({
+              trace_id: input.trace_id,
+              tenant_id: input.tenant_id,
+              workspace_id: input.workspace_id,
+              request: input.request,
+              style_requested: input.style_requested,
+              style_resolved: input.style_resolved,
+              intent: input.intent,
+              query_intent: input.query_intent,
+              language: input.language,
+              context_refs: input.context_refs,
+              context_snippets: input.context_snippets,
+              warnings: input.warnings,
+              questions: input.questions,
+              tool_mode: this.enhancerGenerationConfig.tool_mode,
+              abort_signal: signal,
+              on_progress: touch
+            })
+        });
+        this.observability.metrics.observe(
+          "enhancer_provider_latency_ms",
+          Date.now() - startedAt,
+          this.enhancerProviderLabels()
+        );
+        const enhancedPrompt = normalizeProviderEnhancedPrompt(generated.enhanced_prompt);
+        if (enhancedPrompt.length === 0) {
+          throw new EnhancerProviderRequestError("invalid_response", "enhancer provider returned an empty enhanced_prompt");
+        }
+        return enhancedPrompt;
+      } catch (error) {
+        const reason = classifyEnhancerGenerationFailureReason(error);
+        const failure =
+          error instanceof EnhancerProviderRequestError ? error : new EnhancerProviderRequestError(reason, String(error));
+        lastFailure = failure;
+        this.observability.metrics.increment("enhancer_provider_failures_total", 1, {
+          ...this.enhancerProviderLabels(),
+          reason
+        });
+        this.observability.logger.warn("enhancer provider generation failed", {
+          trace_id: input.trace_id,
+          attempt,
+          max_attempts: maxAttempts,
+          reason,
+          retrying:
+            attempt < maxAttempts &&
+            reason !== "timeout" &&
+            reason !== "schema_error" &&
+            reason !== "invalid_response",
+          style_requested: input.style_requested,
+          style_resolved: input.style_resolved,
+          provider: this.enhancerProviderDescriptor?.provider ?? "custom",
+          model: this.enhancerProviderDescriptor?.model ?? "unknown",
+          error: failure.message
+        });
+        if (reason === "timeout" || reason === "schema_error" || reason === "invalid_response") {
+          break;
+        }
+      }
+    }
+    const message = lastFailure?.message ?? "enhancer provider failed";
+    throw new RetrievalError("UPSTREAM_FAILURE", `enhancer provider failed after retries: ${message}`);
+  }
   async enhancePrompt(input: {
     trace_id: string;
     tenant_id: string;
@@ -4063,16 +6257,18 @@ export class RetrievalCore {
   }): Promise<EnhancePromptOutput> {
     const startedAt = Date.now();
     const warnings: string[] = [];
-    const questions: string[] = [];
-    const addQuestion = (value: string): void => {
-      if (!questions.includes(value)) {
-        questions.push(value);
-      }
-    };
     const intent = classifyIntent(input.request.prompt);
     const queryIntent = classifyEnhancerQueryIntent(input.request.prompt, input.request.conversation_history);
     const language = detectDominantLanguage(input.request.prompt, input.request.conversation_history);
+    const style = resolveEnhancerPromptStyle({
+      requested: input.request.style,
+      intent,
+      query_intent: queryIntent,
+      prompt: input.request.prompt,
+      history: input.request.conversation_history,
+      has_context: Boolean(input.request.project_root_path && input.workspace_id)
+    });
     const negativePreferences = detectNegativePathPreferences(
       `${input.request.prompt}\n${input.request.conversation_history.map((entry) => entry.content).join("\n")}`
     );
@@ -4198,20 +6394,6 @@ export class RetrievalCore {
             searchResults,
             intentPolicy.max_candidates_per_directory_pre_rerank
           ).slice(0, intentPolicy.max_candidates_pre_rerank);
-          const symbolCandidates = extractLikelyCodeSymbols(
-            `${input.request.prompt}\n${input.request.conversation_history.map((entry) => entry.content).join("\n")}`,
-            3
-          );
-          if (confidenceSignals.failed_signals.includes("score_spread")) {
-            addQuestion(localizeLowConfidenceQuestion({ language, kind: "scope" }));
-          }
-          if (confidenceSignals.failed_signals.includes("token_overlap")) {
-            addQuestion(localizeLowConfidenceQuestion({ language, kind: "symbol", symbol: symbolCandidates[0] }));
-          }
-          if (confidenceSignals.failed_signals.includes("path_diversity")) {
-            addQuestion(localizeLowConfidenceQuestion({ language, kind: "source_priority" }));
-          }
         } else {
           searchResults = dedupeEnhancerCandidatesByPath(searchResults);
           searchResults = collapseEnhancerCandidatesByDirectory(
@@ -4221,6 +6403,9 @@ export class RetrievalCore {
         }
         candidateCountPostRerank = searchResults.length;
       } catch (error) {
+        if (error instanceof RetrievalError && error.code === "RATE_LIMITED") {
+          throw error;
+        }
         warnings.push("Context retrieval unavailable; enhancement generated with limited confidence.");
         fallbackTriggered = true;
         fallbackReason = "context_retrieval_unavailable";
@@ -4231,16 +6416,6 @@ export class RetrievalCore {
       }
     }
-    if (intent === "unknown") {
-      addQuestion(
-        language === "es"
-          ? "¿Cuál es el resultado esperado exacto y el alcance del cambio?"
-          : language === "zh"
-            ? "这次变更的精确目标和范围是什么？"
-            : "What exact outcome and scope should this change target?"
-      );
-    }
     const contextRefs: ContextRef[] = searchResults.map((result) => ({
       path: result.path,
       start_line: result.start_line,
@@ -4248,19 +6423,29 @@ export class RetrievalCore {
       reason: result.reason
     }));
-    const enhancedPrompt = formatEnhancedPrompt({
+    const contextSnippets = this.buildEnhancerContextSnippets(searchResults);
+    const enhancedPrompt = await this.generateEnhancedPrompt({
+      trace_id: input.trace_id,
+      tenant_id: input.tenant_id,
+      workspace_id: input.workspace_id,
+      request: input.request,
+      style_requested: style.requested,
+      style_resolved: style.resolved,
       intent,
+      query_intent: queryIntent,
       language,
-      original_prompt: input.request.prompt,
-      refs: contextRefs
+      context_refs: contextRefs,
+      context_snippets: contextSnippets,
+      warnings: [],
+      questions: []
     });
     const output: EnhancePromptOutput = {
       trace_id: input.trace_id,
       enhanced_prompt: enhancedPrompt,
       context_refs: contextRefs,
-      warnings,
-      questions
+      warnings: [],
+      questions: []
     };
     const latency_ms = Date.now() - startedAt;
@@ -4310,6 +6495,11 @@ export class RetrievalCore {
       fallback_triggered: fallbackTriggered,
       fallback_reason: fallbackReason,
       query_intent: queryIntent,
+      style_requested: style.requested,
+      style_resolved: style.resolved,
+      enhancer_provider: this.enhancerProviderDescriptor?.provider ?? "template",
+      enhancer_model: this.enhancerProviderDescriptor?.model ?? null,
+      enhancer_tool_mode: this.enhancerGenerationConfig.tool_mode,
       confidence_score_spread: confidenceSignals?.score_spread ?? null,
       confidence_token_overlap: confidenceSignals?.token_overlap ?? null,
       confidence_path_diversity: confidenceSignals?.path_diversity ?? null,