@rce-mcp/retrieval-core 0.1.0 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -1,13 +1,16 @@
1
1
  import { createHash, randomUUID } from "node:crypto";
2
- import { buildQueryCacheKey } from "@rce-mcp/data-plane";
2
+ import { buildQueryCacheKey, tokenizeForRanking } from "@rce-mcp/data-plane";
3
3
  import { InMemoryQueryCache } from "@rce-mcp/data-plane";
4
4
  import { getObservability } from "@rce-mcp/observability";
5
5
  import { buildChunksForFile, getChunkingParserAvailabilitySnapshot } from "./chunking.js";
6
6
  const MAX_FILE_SIZE_BYTES = 1_000_000;
7
7
  const MAX_CHUNKS_PER_FILE = 300;
8
- const TARGET_CHUNK_TOKENS = 220;
9
- const CHUNK_OVERLAP_TOKENS = 40;
8
+ const DEFAULT_TARGET_CHUNK_TOKENS = 420;
9
+ const DEFAULT_CHUNK_OVERLAP_TOKENS = 90;
10
10
  const MAX_TOP_K = 20;
11
+ const DEFAULT_INTERNAL_CANDIDATE_DEPTH = 100;
12
+ const MIN_INTERNAL_CANDIDATE_DEPTH = 20;
13
+ const MAX_INTERNAL_CANDIDATE_DEPTH = 200;
11
14
  const MAX_CONTEXT_BUDGET_TOKENS = 12_000;
12
15
  export const DEFAULT_OPENAI_COMPATIBLE_EMBEDDING_BASE_URL = "https://router.tumuer.me/v1";
13
16
  export const DEFAULT_OPENAI_COMPATIBLE_EMBEDDING_MODEL = "Qwen/Qwen3-Embedding-4B";
@@ -15,6 +18,17 @@ export const DEFAULT_OPENAI_COMPATIBLE_EMBEDDING_DIMENSIONS = 2560;
15
18
  export const DEFAULT_OPENAI_COMPATIBLE_EMBEDDING_TIMEOUT_MS = 10_000;
16
19
  export const DEFAULT_OPENAI_COMPATIBLE_EMBEDDING_BATCH_SIZE = 64;
17
20
  export const DEFAULT_OPENAI_COMPATIBLE_EMBEDDING_MAX_RETRIES = 2;
21
+ export const DEFAULT_OPENAI_COMPATIBLE_EMBEDDING_TRANSIENT_403_MAX_RETRIES = 4;
22
+ export const DEFAULT_OPENAI_COMPATIBLE_RERANKER_BASE_URL = "https://router.tumuer.me/v1";
23
+ export const DEFAULT_OPENAI_COMPATIBLE_RERANKER_MODEL = "Qwen/Qwen3-Reranker-4B";
24
+ export const DEFAULT_OPENAI_COMPATIBLE_RERANKER_TIMEOUT_MS = 2_500;
25
+ export const DEFAULT_SEARCH_RERANKER_TOP_N = 30;
26
+ export const DEFAULT_PROVIDER_MAX_REQUESTS_PER_MINUTE = 90;
27
+ export const DEFAULT_PROVIDER_LIMIT_INDEX_MAX_WAIT_MS = 120_000;
28
+ export const DEFAULT_PROVIDER_LIMIT_QUERY_MAX_WAIT_MS = 1_000;
29
+ export const DEFAULT_PROVIDER_LIMIT_RERANK_MAX_WAIT_MS = 500;
30
+ export const DEFAULT_CLAUDE_ENHANCER_MODEL = "claude-3-5-sonnet-latest";
31
+ const DEFAULT_CLAUDE_ENHANCER_MAX_TURNS = 3;
18
32
  const DEFAULT_CANDIDATE_SCORE_WEIGHTS = {
19
33
  lexical_weight: 0.6,
20
34
  vector_weight: 0.4,
@@ -52,6 +66,10 @@ export const BASELINE_RETRIEVAL_SCORING_CONFIG = {
52
66
  negation_avoid_tests_penalty: 0.35,
53
67
  negation_avoid_examples_penalty: 0.3,
54
68
  negation_avoid_archive_penalty: 0.35,
69
+ security_trace_meta_penalty: 0.22,
70
+ literal_path_boost: 0.3,
71
+ literal_snippet_boost: 0.18,
72
+ literal_max_boost: 0.5,
55
73
  min_total_bias: -0.45,
56
74
  max_total_bias: 0.35
57
75
  },
@@ -99,6 +117,10 @@ export const CONSERVATIVE_RETRIEVAL_SCORING_CONFIG = {
99
117
  negation_avoid_tests_penalty: 0.2,
100
118
  negation_avoid_examples_penalty: 0.16,
101
119
  negation_avoid_archive_penalty: 0.2,
120
+ security_trace_meta_penalty: 0.14,
121
+ literal_path_boost: 0.18,
122
+ literal_snippet_boost: 0.1,
123
+ literal_max_boost: 0.28,
102
124
  min_total_bias: -0.25,
103
125
  max_total_bias: 0.2
104
126
  },
@@ -115,9 +137,17 @@ export const DEFAULT_RETRIEVAL_ENHANCER_CONFIG = {
115
137
  max_candidates_pre_rerank: 4,
116
138
  rerank_timeout_ms: 40
117
139
  };
140
+ export const DEFAULT_RETRIEVAL_ENHANCER_GENERATION_CONFIG = {
141
+ timeout_ms: 18_000,
142
+ max_retries: 1,
143
+ tool_mode: "read_only",
144
+ max_context_snippets: 6
145
+ };
118
146
  export const DEFAULT_RETRIEVAL_CHUNKING_CONFIG = {
119
147
  strategy: "sliding",
120
148
  fallback_strategy: "sliding",
149
+ target_chunk_tokens: DEFAULT_TARGET_CHUNK_TOKENS,
150
+ chunk_overlap_tokens: DEFAULT_CHUNK_OVERLAP_TOKENS,
121
151
  parse_timeout_ms: 80,
122
152
  enabled_languages: ["typescript", "javascript", "python", "go"]
123
153
  };
@@ -225,6 +255,28 @@ export function mergeRetrievalEnhancerConfig(base, overrides) {
225
255
  validateEnhancerConfig(next);
226
256
  return next;
227
257
  }
258
+ function validateEnhancerGenerationConfig(config) {
259
+ if (!Number.isInteger(config.timeout_ms) || config.timeout_ms <= 0) {
260
+ throw new Error("invalid retrieval enhancer generation config: timeout_ms must be a positive integer");
261
+ }
262
+ if (!Number.isInteger(config.max_retries) || config.max_retries < 0) {
263
+ throw new Error("invalid retrieval enhancer generation config: max_retries must be a non-negative integer");
264
+ }
265
+ if (config.tool_mode !== "none" && config.tool_mode !== "read_only") {
266
+ throw new Error("invalid retrieval enhancer generation config: tool_mode must be none|read_only");
267
+ }
268
+ if (!Number.isInteger(config.max_context_snippets) || config.max_context_snippets <= 0) {
269
+ throw new Error("invalid retrieval enhancer generation config: max_context_snippets must be a positive integer");
270
+ }
271
+ }
272
+ export function mergeRetrievalEnhancerGenerationConfig(base, overrides) {
273
+ const next = {
274
+ ...base,
275
+ ...(overrides ?? {})
276
+ };
277
+ validateEnhancerGenerationConfig(next);
278
+ return next;
279
+ }
228
280
  function normalizeChunkingLanguageList(value) {
229
281
  const deduped = new Set();
230
282
  for (const language of value) {
@@ -243,6 +295,15 @@ function validateChunkingConfig(config) {
243
295
  if (config.fallback_strategy !== "sliding") {
244
296
  throw new Error("invalid retrieval chunking config: fallback_strategy must be sliding");
245
297
  }
298
+ if (!Number.isInteger(config.target_chunk_tokens) || config.target_chunk_tokens <= 0) {
299
+ throw new Error("invalid retrieval chunking config: target_chunk_tokens must be a positive integer");
300
+ }
301
+ if (!Number.isInteger(config.chunk_overlap_tokens) || config.chunk_overlap_tokens <= 0) {
302
+ throw new Error("invalid retrieval chunking config: chunk_overlap_tokens must be a positive integer");
303
+ }
304
+ if (config.chunk_overlap_tokens >= config.target_chunk_tokens) {
305
+ throw new Error("invalid retrieval chunking config: chunk_overlap_tokens must be less than target_chunk_tokens");
306
+ }
246
307
  if (!Number.isInteger(config.parse_timeout_ms) || config.parse_timeout_ms <= 0) {
247
308
  throw new Error("invalid retrieval chunking config: parse_timeout_ms must be a positive integer");
248
309
  }
@@ -277,10 +338,14 @@ function stableSerialize(value) {
277
338
  function scoringConfigChecksum(config) {
278
339
  return sha256(stableSerialize(config)).slice(0, 12);
279
340
  }
341
+ function clampInternalCandidateDepth(value) {
342
+ const raw = Number.isFinite(value) ? Math.trunc(value ?? DEFAULT_INTERNAL_CANDIDATE_DEPTH) : DEFAULT_INTERNAL_CANDIDATE_DEPTH;
343
+ return Math.max(MIN_INTERNAL_CANDIDATE_DEPTH, Math.min(MAX_INTERNAL_CANDIDATE_DEPTH, raw));
344
+ }
280
345
  const REASON_STRINGS = [
281
346
  "semantic match",
282
- "exact symbol match",
283
- "path and token overlap",
347
+ "exact literal match",
348
+ "path token overlap",
284
349
  "recently modified relevant module"
285
350
  ];
286
351
  export class RetrievalError extends Error {
@@ -293,10 +358,26 @@ export class RetrievalError extends Error {
293
358
  class EmbeddingProviderRequestError extends Error {
294
359
  reason;
295
360
  retryable;
296
- constructor(reason, retryable, message) {
361
+ retry_after_ms;
362
+ constructor(reason, retryable, message, retry_after_ms) {
297
363
  super(message);
298
364
  this.reason = reason;
299
365
  this.retryable = retryable;
366
+ this.retry_after_ms = retry_after_ms;
367
+ }
368
+ }
369
+ class RerankerProviderRequestError extends Error {
370
+ reason;
371
+ constructor(reason, message) {
372
+ super(message);
373
+ this.reason = reason;
374
+ }
375
+ }
376
+ class EnhancerProviderRequestError extends Error {
377
+ reason;
378
+ constructor(reason, message) {
379
+ super(message);
380
+ this.reason = reason;
300
381
  }
301
382
  }
302
383
  const SECRET_PATTERNS = [
@@ -342,41 +423,7 @@ function singularizeToken(token) {
342
423
  return undefined;
343
424
  }
344
425
  function tokenize(text) {
345
- const coarseTokens = text
346
- .split(/[^a-z0-9_./-]+/)
347
- .map((token) => token.trim())
348
- .filter(Boolean);
349
- const expandedTokens = new Set();
350
- const addToken = (value) => {
351
- const normalized = value.trim().toLowerCase();
352
- if (!normalized) {
353
- return;
354
- }
355
- expandedTokens.add(normalized);
356
- const singular = singularizeToken(normalized);
357
- if (singular) {
358
- expandedTokens.add(singular);
359
- }
360
- const plural = pluralizeToken(normalized);
361
- if (plural) {
362
- expandedTokens.add(plural);
363
- }
364
- };
365
- for (const token of coarseTokens) {
366
- addToken(token);
367
- for (const part of token.split(/[./_-]+/).filter(Boolean)) {
368
- addToken(part);
369
- const camelSplit = part
370
- .replace(/([a-z0-9])([A-Z])/g, "$1 $2")
371
- .split(/\s+/)
372
- .map((segment) => segment.trim().toLowerCase())
373
- .filter(Boolean);
374
- for (const segment of camelSplit) {
375
- addToken(segment);
376
- }
377
- }
378
- }
379
- return [...expandedTokens];
426
+ return tokenizeForRanking(text);
380
427
  }
381
428
  function lexicalScore(query, haystack) {
382
429
  const q = new Set(tokenize(query));
@@ -419,17 +466,101 @@ function looksLowInformation(snippet) {
419
466
  return /^(.)\1+$/.test(noWhitespace);
420
467
  }
421
468
  function chooseReason(input) {
469
+ if (input.literal_match) {
470
+ return "exact literal match";
471
+ }
422
472
  if (input.path_match) {
423
- return "exact symbol match";
473
+ return "path token overlap";
424
474
  }
425
475
  if (input.recency_boosted) {
426
476
  return "recently modified relevant module";
427
477
  }
428
478
  if (input.lexical > 0.3) {
429
- return "path and token overlap";
479
+ return "path token overlap";
430
480
  }
431
481
  return "semantic match";
432
482
  }
483
+ function isExactLiteralReason(reason) {
484
+ return reason === "exact literal match" || reason === "exact symbol match";
485
+ }
486
+ function extractSearchLiterals(query) {
487
+ const literals = [];
488
+ const seen = new Set();
489
+ const addLiteral = (raw) => {
490
+ const cleaned = raw.trim().replace(/^[`"'([{]+|[`"')\]}:;,.]+$/g, "");
491
+ const normalized = cleaned.toLowerCase();
492
+ if (!normalized || seen.has(normalized)) {
493
+ return;
494
+ }
495
+ if (normalized.length < 3) {
496
+ return;
497
+ }
498
+ const looksEnvLiteral = /^[A-Z0-9]+(?:_[A-Z0-9]+){2,}$/.test(cleaned);
499
+ const looksPathOrFileLiteral = /[/.]/.test(cleaned);
500
+ const looksCamelLiteral = /[a-z][A-Z]/.test(cleaned) || /[A-Z][a-z]+[A-Z]/.test(cleaned);
501
+ const looksHyphenLiteral = cleaned.includes("-");
502
+ const isSpecificLiteral = looksEnvLiteral || looksPathOrFileLiteral || looksCamelLiteral || looksHyphenLiteral;
503
+ if (!isSpecificLiteral) {
504
+ return;
505
+ }
506
+ seen.add(normalized);
507
+ literals.push(normalized);
508
+ };
509
+ for (const symbol of extractLikelyCodeSymbols(query, 24)) {
510
+ addLiteral(symbol);
511
+ }
512
+ for (const pathSymbol of extractPathLikeSymbols(query)) {
513
+ addLiteral(pathSymbol);
514
+ const leaf = normalizePath(pathSymbol).split("/").pop();
515
+ if (leaf) {
516
+ addLiteral(leaf);
517
+ }
518
+ }
519
+ for (const envMatch of query.matchAll(/\bRCE_[A-Z0-9_]{4,}\b/g)) {
520
+ addLiteral(envMatch[0] ?? "");
521
+ }
522
+ for (const fileName of query.matchAll(/\b[A-Za-z0-9_.-]+\.(?:ts|tsx|js|jsx|mjs|cjs|py|go|json|md)\b/g)) {
523
+ addLiteral(fileName[0] ?? "");
524
+ }
525
+ return literals.slice(0, 24);
526
+ }
527
+ function applyLiteralBoost(input) {
528
+ if (input.literals.length === 0) {
529
+ return {
530
+ boost: 0,
531
+ matched: false,
532
+ path_matches: 0,
533
+ snippet_matches: 0
534
+ };
535
+ }
536
+ const normalizedPath = input.path.toLowerCase();
537
+ const normalizedSnippet = input.snippet.toLowerCase();
538
+ const pathBias = input.path_bias;
539
+ let boost = 0;
540
+ let pathMatches = 0;
541
+ let snippetMatches = 0;
542
+ for (const literal of input.literals) {
543
+ if (normalizedPath.includes(literal)) {
544
+ boost += pathBias.literal_path_boost;
545
+ pathMatches += 1;
546
+ continue;
547
+ }
548
+ if (normalizedSnippet.includes(literal)) {
549
+ boost += pathBias.literal_snippet_boost;
550
+ snippetMatches += 1;
551
+ }
552
+ if (boost >= pathBias.literal_max_boost) {
553
+ break;
554
+ }
555
+ }
556
+ const clampedBoost = Math.min(pathBias.literal_max_boost, boost);
557
+ return {
558
+ boost: clampedBoost,
559
+ matched: clampedBoost > 0,
560
+ path_matches: pathMatches,
561
+ snippet_matches: snippetMatches
562
+ };
563
+ }
433
564
  const DOC_INTENT_TOKENS = new Set([
434
565
  "adr",
435
566
  "architecture",
@@ -483,6 +614,22 @@ const UI_COMPONENT_TOKENS = new Set(["component", "layout", "react", "tsx", "ui"
483
614
  const FILE_LOOKUP_TOKENS = new Set(["entrypoint", "file", "locate", "path", "where", "which"]);
484
615
  const TEST_INTENT_TOKENS = new Set(["assert", "coverage", "e2e", "integration", "spec", "test", "tests", "unit"]);
485
616
  const EXAMPLE_INTENT_TOKENS = new Set(["demo", "example", "examples", "sample", "tutorial"]);
617
+ const SECURITY_TRACE_INTENT_TOKENS = new Set([
618
+ "auth",
619
+ "authorization",
620
+ "binding",
621
+ "config",
622
+ "enforce",
623
+ "mcp",
624
+ "project_root_path",
625
+ "security",
626
+ "session",
627
+ "stdio",
628
+ "tenant",
629
+ "token",
630
+ "workspace",
631
+ "workspace_id"
632
+ ]);
486
633
  const SOURCE_PATH_PREFIXES = ["src/", "app/", "apps/", "crates/", "internal/", "lib/", "package/", "packages/"];
487
634
  const LOW_PRIORITY_PATH_PREFIXES = [
488
635
  ".next/",
@@ -543,6 +690,20 @@ function hasTestIntent(tokens) {
543
690
  function hasExampleIntent(tokens) {
544
691
  return tokens.some((token) => EXAMPLE_INTENT_TOKENS.has(token));
545
692
  }
693
+ function hasSecurityTraceIntent(tokens, queryText) {
694
+ if (tokens.some((token) => SECURITY_TRACE_INTENT_TOKENS.has(token))) {
695
+ return true;
696
+ }
697
+ return /\btenant_id\b|\bworkspace_id\b|\bproject_root_path\b|\bRCE_[A-Z0-9_]{4,}\b/.test(queryText);
698
+ }
699
+ function isGuidanceOrMetaPath(path) {
700
+ const normalized = path.toLowerCase();
701
+ return (normalized.endsWith("mcp-tool-guidance.ts") ||
702
+ normalized.includes("/guidance/") ||
703
+ normalized.includes("/meta/") ||
704
+ normalized.includes("/_meta/") ||
705
+ normalized.includes("tool-guidance"));
706
+ }
546
707
  function pathQualityBias(path, queryTokens, config, queryText) {
547
708
  const normalizedPath = path.toLowerCase();
548
709
  const docIntent = hasDocIntent(queryTokens);
@@ -553,6 +714,7 @@ function pathQualityBias(path, queryTokens, config, queryText) {
553
714
  const uiComponentIntent = hasUiComponentIntent(queryTokens);
554
715
  const testIntent = hasTestIntent(queryTokens);
555
716
  const exampleIntent = hasExampleIntent(queryTokens);
717
+ const securityTraceIntent = hasSecurityTraceIntent(queryTokens, queryText ?? queryTokens.join(" "));
556
718
  let bias = 0;
557
719
  const pathBias = config.path_bias;
558
720
  const isSourcePath = SOURCE_PATH_PREFIXES.some((prefix) => normalizedPath.startsWith(prefix) || normalizedPath.includes(`/${prefix}`));
@@ -616,6 +778,9 @@ function pathQualityBias(path, queryTokens, config, queryText) {
616
778
  if (docsPreferred && isSourcePath) {
617
779
  bias -= pathBias.doc_intent_source_penalty;
618
780
  }
781
+ if (securityTraceIntent && !docsPreferred && isGuidanceOrMetaPath(normalizedPath)) {
782
+ bias -= pathBias.security_trace_meta_penalty;
783
+ }
619
784
  if (workspaceManifestIntent && normalizedPath === "cargo.toml") {
620
785
  bias += pathBias.workspace_manifest_root_boost;
621
786
  }
@@ -681,8 +846,8 @@ function buildChunks(file, chunkingConfig) {
681
846
  config: {
682
847
  strategy: chunkingConfig.strategy,
683
848
  fallback_strategy: chunkingConfig.fallback_strategy,
684
- target_chunk_tokens: TARGET_CHUNK_TOKENS,
685
- chunk_overlap_tokens: CHUNK_OVERLAP_TOKENS,
849
+ target_chunk_tokens: chunkingConfig.target_chunk_tokens,
850
+ chunk_overlap_tokens: chunkingConfig.chunk_overlap_tokens,
686
851
  max_chunks_per_file: MAX_CHUNKS_PER_FILE,
687
852
  parse_timeout_ms: chunkingConfig.parse_timeout_ms,
688
853
  enabled_languages: chunkingConfig.enabled_languages
@@ -732,6 +897,138 @@ function sleep(ms) {
732
897
  setTimeout(resolve, ms);
733
898
  });
734
899
  }
900
+ export class ProviderRateLimitExceededError extends Error {
901
+ retry_after_ms;
902
+ constructor(message, retry_after_ms) {
903
+ super(message);
904
+ this.retry_after_ms = retry_after_ms;
905
+ }
906
+ }
907
+ export class LocalProviderRequestLimiter {
908
+ mode = "local";
909
+ buckets = new Map();
910
+ now;
911
+ sleeper;
912
+ constructor(options) {
913
+ this.now = options?.now ?? (() => Date.now());
914
+ this.sleeper = options?.sleeper ?? sleep;
915
+ }
916
+ async acquire(input) {
917
+ if (!Number.isInteger(input.max_requests_per_minute) || input.max_requests_per_minute <= 0) {
918
+ throw new Error("provider limiter requires max_requests_per_minute to be a positive integer");
919
+ }
920
+ if (!Number.isInteger(input.max_wait_ms) || input.max_wait_ms < 0) {
921
+ throw new Error("provider limiter requires max_wait_ms to be a non-negative integer");
922
+ }
923
+ const refillPerMs = input.max_requests_per_minute / 60_000;
924
+ let waitedMs = 0;
925
+ const deadline = this.now() + input.max_wait_ms;
926
+ while (true) {
927
+ const nowMs = this.now();
928
+ let bucket = this.buckets.get(input.scope);
929
+ if (!bucket) {
930
+ bucket = {
931
+ tokens: input.max_requests_per_minute,
932
+ last_refill_ms: nowMs
933
+ };
934
+ this.buckets.set(input.scope, bucket);
935
+ }
936
+ if (nowMs > bucket.last_refill_ms) {
937
+ const elapsedMs = nowMs - bucket.last_refill_ms;
938
+ bucket.tokens = Math.min(input.max_requests_per_minute, bucket.tokens + elapsedMs * refillPerMs);
939
+ bucket.last_refill_ms = nowMs;
940
+ }
941
+ if (bucket.tokens >= 1) {
942
+ bucket.tokens -= 1;
943
+ return { wait_ms: waitedMs };
944
+ }
945
+ const retryAfterMs = Math.max(1, Math.ceil((1 - bucket.tokens) / refillPerMs));
946
+ const remainingMs = deadline - nowMs;
947
+ if (remainingMs <= 0 || retryAfterMs > remainingMs) {
948
+ throw new ProviderRateLimitExceededError(`provider request rate limit exceeded for scope "${input.scope}"`, Math.max(1, retryAfterMs));
949
+ }
950
+ const sleepMs = Math.max(1, Math.min(retryAfterMs, remainingMs));
951
+ await this.sleeper(sleepMs);
952
+ waitedMs += sleepMs;
953
+ }
954
+ }
955
+ }
956
+ const REDIS_PROVIDER_LIMITER_SCRIPT = `
957
+ local key = KEYS[1]
958
+ local limit = tonumber(ARGV[1])
959
+ local window_ms = tonumber(ARGV[2])
960
+ local count = redis.call("INCR", key)
961
+ if count == 1 then
962
+ redis.call("PEXPIRE", key, window_ms)
963
+ end
964
+ if count <= limit then
965
+ return {1, 0}
966
+ end
967
+ local ttl = redis.call("PTTL", key)
968
+ if ttl < 0 then
969
+ ttl = window_ms
970
+ end
971
+ return {0, ttl}
972
+ `;
973
+ export class RedisProviderRequestLimiter {
974
+ mode = "redis";
975
+ redis;
976
+ keyPrefix;
977
+ windowMs;
978
+ now;
979
+ sleeper;
980
+ constructor(options) {
981
+ if (!options.redis || typeof options.redis.eval !== "function") {
982
+ throw new Error("invalid redis provider limiter config: redis client with eval() is required");
983
+ }
984
+ this.redis = options.redis;
985
+ this.keyPrefix = options.key_prefix?.trim() || "rce:provider_rate_limit";
986
+ this.windowMs = options.window_ms ?? 60_000;
987
+ this.now = options.now ?? (() => Date.now());
988
+ this.sleeper = options.sleeper ?? sleep;
989
+ if (!Number.isInteger(this.windowMs) || this.windowMs <= 0) {
990
+ throw new Error("invalid redis provider limiter config: window_ms must be a positive integer");
991
+ }
992
+ }
993
+ async acquire(input) {
994
+ if (!Number.isInteger(input.max_requests_per_minute) || input.max_requests_per_minute <= 0) {
995
+ throw new Error("provider limiter requires max_requests_per_minute to be a positive integer");
996
+ }
997
+ if (!Number.isInteger(input.max_wait_ms) || input.max_wait_ms < 0) {
998
+ throw new Error("provider limiter requires max_wait_ms to be a non-negative integer");
999
+ }
1000
+ let waitedMs = 0;
1001
+ const deadline = this.now() + input.max_wait_ms;
1002
+ while (true) {
1003
+ const attempt = await this.reserveAttempt(input.scope, input.max_requests_per_minute);
1004
+ if (attempt.allowed) {
1005
+ return { wait_ms: waitedMs };
1006
+ }
1007
+ const nowMs = this.now();
1008
+ const remainingMs = deadline - nowMs;
1009
+ const retryAfterMs = Math.max(1, attempt.retry_after_ms);
1010
+ if (remainingMs <= 0 || retryAfterMs > remainingMs) {
1011
+ throw new ProviderRateLimitExceededError(`provider request rate limit exceeded for scope "${input.scope}"`, retryAfterMs);
1012
+ }
1013
+ const sleepMs = Math.max(1, Math.min(retryAfterMs, remainingMs));
1014
+ await this.sleeper(sleepMs);
1015
+ waitedMs += sleepMs;
1016
+ }
1017
+ }
1018
+ async reserveAttempt(scope, maxRequestsPerMinute) {
1019
+ const key = `${this.keyPrefix}:${scope}`;
1020
+ const raw = await this.redis.eval(REDIS_PROVIDER_LIMITER_SCRIPT, 1, key, maxRequestsPerMinute, this.windowMs);
1021
+ if (Array.isArray(raw)) {
1022
+ const allowed = Number(raw[0] ?? 0) === 1;
1023
+ const retryAfterMs = Number(raw[1] ?? 0);
1024
+ return {
1025
+ allowed,
1026
+ retry_after_ms: Number.isFinite(retryAfterMs) ? Math.max(0, Math.trunc(retryAfterMs)) : this.windowMs
1027
+ };
1028
+ }
1029
+ throw new Error("redis provider limiter returned unexpected eval() payload");
1030
+ }
1031
+ }
735
1032
  export class DeterministicEmbeddingProvider {
736
1033
  dimensions;
737
1034
  model;
@@ -762,6 +1059,12 @@ export class OpenAICompatibleEmbeddingProvider {
762
1059
  timeoutMs;
763
1060
  batchSize;
764
1061
  maxRetries;
1062
+ transientForbiddenMaxRetries;
1063
+ requestLimiter;
1064
+ requestLimitScope;
1065
+ maxRequestsPerMinute;
1066
+ indexMaxWaitMs;
1067
+ queryMaxWaitMs;
765
1068
  observability;
766
1069
  constructor(options) {
767
1070
  const baseUrl = options.base_url.trim().replace(/\/+$/, "");
@@ -780,6 +1083,17 @@ export class OpenAICompatibleEmbeddingProvider {
780
1083
  this.timeoutMs = options.timeout_ms ?? DEFAULT_OPENAI_COMPATIBLE_EMBEDDING_TIMEOUT_MS;
781
1084
  this.batchSize = options.batch_size ?? DEFAULT_OPENAI_COMPATIBLE_EMBEDDING_BATCH_SIZE;
782
1085
  this.maxRetries = options.max_retries ?? DEFAULT_OPENAI_COMPATIBLE_EMBEDDING_MAX_RETRIES;
1086
+ this.transientForbiddenMaxRetries =
1087
+ options.transient_forbidden_max_retries ?? DEFAULT_OPENAI_COMPATIBLE_EMBEDDING_TRANSIENT_403_MAX_RETRIES;
1088
+ this.requestLimiter = options.request_limiter;
1089
+ this.requestLimitScope = resolveProviderLimiterScope({
1090
+ provider: "openai_compatible",
1091
+ apiKey,
1092
+ overrideScopeId: options.request_limit_scope_id
1093
+ });
1094
+ this.maxRequestsPerMinute = options.max_requests_per_minute ?? DEFAULT_PROVIDER_MAX_REQUESTS_PER_MINUTE;
1095
+ this.indexMaxWaitMs = options.index_max_wait_ms ?? DEFAULT_PROVIDER_LIMIT_INDEX_MAX_WAIT_MS;
1096
+ this.queryMaxWaitMs = options.query_max_wait_ms ?? DEFAULT_PROVIDER_LIMIT_QUERY_MAX_WAIT_MS;
783
1097
  this.observability = options.observability ?? getObservability("retrieval-core");
784
1098
  if (!Number.isInteger(this.dimensions) || this.dimensions <= 0) {
785
1099
  throw new Error("invalid openai-compatible embedding config: dimensions must be a positive integer");
@@ -793,6 +1107,18 @@ export class OpenAICompatibleEmbeddingProvider {
793
1107
  if (!Number.isInteger(this.maxRetries) || this.maxRetries < 0) {
794
1108
  throw new Error("invalid openai-compatible embedding config: max_retries must be a non-negative integer");
795
1109
  }
1110
+ if (!Number.isInteger(this.transientForbiddenMaxRetries) || this.transientForbiddenMaxRetries < 0) {
1111
+ throw new Error("invalid openai-compatible embedding config: transient_forbidden_max_retries must be a non-negative integer");
1112
+ }
1113
+ if (!Number.isInteger(this.maxRequestsPerMinute) || this.maxRequestsPerMinute <= 0) {
1114
+ throw new Error("invalid openai-compatible embedding config: max_requests_per_minute must be a positive integer");
1115
+ }
1116
+ if (!Number.isInteger(this.indexMaxWaitMs) || this.indexMaxWaitMs < 0) {
1117
+ throw new Error("invalid openai-compatible embedding config: index_max_wait_ms must be a non-negative integer");
1118
+ }
1119
+ if (!Number.isInteger(this.queryMaxWaitMs) || this.queryMaxWaitMs < 0) {
1120
+ throw new Error("invalid openai-compatible embedding config: query_max_wait_ms must be a non-negative integer");
1121
+ }
796
1122
  }
797
1123
  describe() {
798
1124
  return {
@@ -819,11 +1145,12 @@ export class OpenAICompatibleEmbeddingProvider {
819
1145
  model: this.model,
820
1146
  purpose
821
1147
  };
822
- for (let attempt = 0; attempt <= this.maxRetries; attempt += 1) {
1148
+ let attempt = 0;
1149
+ while (true) {
823
1150
  const startedAt = Date.now();
824
1151
  this.observability.metrics.increment("retrieval_embedding_provider_requests_total", 1, labels);
825
1152
  try {
826
- return await this.embedBatchOnce(texts);
1153
+ return await this.embedBatchOnce(texts, purpose);
827
1154
  }
828
1155
  catch (error) {
829
1156
  const failure = this.toProviderFailure(error);
@@ -831,30 +1158,69 @@ export class OpenAICompatibleEmbeddingProvider {
831
1158
  ...labels,
832
1159
  reason: failure.reason
833
1160
  });
834
- const shouldRetry = failure.retryable && attempt < this.maxRetries;
1161
+ const maxRetriesForFailure = this.maxRetriesForReason(failure.reason);
1162
+ const shouldRetry = failure.retryable && attempt < maxRetriesForFailure;
835
1163
  this.observability.logger.warn("embedding provider request failed", {
836
1164
  provider: "openai_compatible",
837
1165
  model: this.model,
838
1166
  purpose,
839
1167
  reason: failure.reason,
1168
+ provider_message: failure.message,
840
1169
  retryable: failure.retryable,
841
1170
  retrying: shouldRetry,
842
1171
  attempt: attempt + 1,
843
- max_attempts: this.maxRetries + 1
1172
+ max_attempts: maxRetriesForFailure + 1,
1173
+ retry_after_ms: failure.retry_after_ms
844
1174
  });
845
1175
  if (shouldRetry) {
846
- await sleep(this.retryDelayMs(attempt));
1176
+ await sleep(this.retryDelayMs(attempt, failure));
1177
+ attempt += 1;
847
1178
  continue;
848
1179
  }
1180
+ if (failure.reason === "client_rate_limited" || failure.reason === "rate_limited") {
1181
+ throw new RetrievalError("RATE_LIMITED", `embedding provider rate limited; ${failure.message}`);
1182
+ }
849
1183
  throw new RetrievalError("UPSTREAM_FAILURE", `embedding provider request failed (${failure.reason}); ${failure.message}`);
850
1184
  }
851
1185
  finally {
852
1186
  this.observability.metrics.observe("retrieval_embedding_provider_latency_ms", Date.now() - startedAt, labels);
853
1187
  }
854
1188
  }
855
- throw new RetrievalError("UPSTREAM_FAILURE", "embedding provider retries exhausted");
856
1189
  }
857
- async embedBatchOnce(texts) {
1190
+ async enforceRequestLimit(purpose) {
1191
+ if (!this.requestLimiter) {
1192
+ return;
1193
+ }
1194
+ const maxWaitMs = purpose === "index" ? this.indexMaxWaitMs : this.queryMaxWaitMs;
1195
+ const labels = {
1196
+ provider: "openai_compatible",
1197
+ model: this.model,
1198
+ purpose,
1199
+ limiter_mode: this.requestLimiter.mode ?? "custom"
1200
+ };
1201
+ try {
1202
+ const acquired = await this.requestLimiter.acquire({
1203
+ scope: this.requestLimitScope,
1204
+ max_requests_per_minute: this.maxRequestsPerMinute,
1205
+ max_wait_ms: maxWaitMs
1206
+ });
1207
+ this.observability.metrics.observe("retrieval_provider_limiter_wait_ms", acquired.wait_ms, labels);
1208
+ this.observability.metrics.increment("retrieval_provider_requests_shaped_total", 1, labels);
1209
+ }
1210
+ catch (error) {
1211
+ this.observability.metrics.increment("retrieval_provider_limiter_block_total", 1, {
1212
+ ...labels,
1213
+ reason: "wait_timeout"
1214
+ });
1215
+ if (error instanceof ProviderRateLimitExceededError) {
1216
+ const retryable = purpose === "index";
1217
+ throw new EmbeddingProviderRequestError("client_rate_limited", retryable, `${error.message}; retry_after_ms=${error.retry_after_ms}`, error.retry_after_ms);
1218
+ }
1219
+ throw error;
1220
+ }
1221
+ }
1222
+ async embedBatchOnce(texts, purpose) {
1223
+ await this.enforceRequestLimit(purpose);
858
1224
  const controller = new AbortController();
859
1225
  const timeoutId = setTimeout(() => {
860
1226
  controller.abort();
@@ -887,13 +1253,18 @@ export class OpenAICompatibleEmbeddingProvider {
887
1253
  if (!response.ok) {
888
1254
  const details = await safeResponseText(response);
889
1255
  if (response.status === 429) {
890
- throw new EmbeddingProviderRequestError("rate_limited", true, `HTTP 429 ${details}`.trim());
1256
+ throw new EmbeddingProviderRequestError("rate_limited", true, `HTTP 429 ${details}`.trim(), parseRetryAfterMs(response.headers.get("retry-after")));
891
1257
  }
892
1258
  if (response.status >= 500) {
893
1259
  throw new EmbeddingProviderRequestError("http_5xx", true, `HTTP ${response.status} ${details}`.trim());
894
1260
  }
895
- if (response.status === 401 || response.status === 403) {
896
- throw new EmbeddingProviderRequestError("auth_error", false, `HTTP ${response.status} ${details}`.trim());
1261
+ if (response.status === 401) {
1262
+ throw new EmbeddingProviderRequestError("auth_error", false, `HTTP 401 ${details}`.trim());
1263
+ }
1264
+ if (response.status === 403) {
1265
+ const retryAfterMs = parseRetryAfterMs(response.headers.get("retry-after"));
1266
+ const retryable = this.isTransientForbidden(details, retryAfterMs);
1267
+ throw new EmbeddingProviderRequestError(retryable ? "forbidden_transient" : "auth_error", retryable, `HTTP 403 ${details}`.trim(), retryAfterMs);
897
1268
  }
898
1269
  if (response.status === 404) {
899
1270
  throw new EmbeddingProviderRequestError("endpoint_not_found", false, `HTTP 404 ${details}`.trim());
@@ -941,26 +1312,843 @@ export class OpenAICompatibleEmbeddingProvider {
941
1312
  if (embedding.length !== this.dimensions) {
942
1313
  throw new EmbeddingProviderRequestError("dimension_mismatch", false, `expected ${this.dimensions} dimensions, received ${embedding.length}`);
943
1314
  }
944
- vectors.push([...embedding]);
1315
+ vectors.push([...embedding]);
1316
+ }
1317
+ return vectors;
1318
+ }
1319
+ maxRetriesForReason(reason) {
1320
+ if (reason === "forbidden_transient") {
1321
+ return Math.max(this.maxRetries, this.transientForbiddenMaxRetries);
1322
+ }
1323
+ return this.maxRetries;
1324
+ }
1325
+ retryDelayMs(attempt, failure) {
1326
+ const baseBackoffMs = failure.reason === "forbidden_transient"
1327
+ ? Math.min(2_500, 250 * 2 ** attempt)
1328
+ : 100 * (attempt + 1);
1329
+ const jitterMs = failure.reason === "forbidden_transient" ? Math.floor(Math.random() * 150) : Math.floor(Math.random() * 75);
1330
+ const computedDelayMs = baseBackoffMs + jitterMs;
1331
+ if (failure.retry_after_ms === undefined) {
1332
+ return computedDelayMs;
1333
+ }
1334
+ return Math.max(computedDelayMs, Math.max(1, failure.retry_after_ms));
1335
+ }
1336
+ isTransientForbidden(details, retryAfterMs) {
1337
+ if (retryAfterMs !== undefined) {
1338
+ return true;
1339
+ }
1340
+ const normalized = details.trim().toLowerCase();
1341
+ if (normalized.length === 0) {
1342
+ return false;
1343
+ }
1344
+ const transientSignals = [
1345
+ "rate limit",
1346
+ "too many requests",
1347
+ "temporar",
1348
+ "try again",
1349
+ "upstream",
1350
+ "timeout",
1351
+ "busy",
1352
+ "capacity",
1353
+ "bad_response_status_code"
1354
+ ];
1355
+ if (transientSignals.some((signal) => normalized.includes(signal))) {
1356
+ return true;
1357
+ }
1358
+ const hardFailureSignals = [
1359
+ "invalid api key",
1360
+ "incorrect api key",
1361
+ "authentication",
1362
+ "unauthorized",
1363
+ "insufficient permissions",
1364
+ "insufficient scope",
1365
+ "permission denied",
1366
+ "organization not found",
1367
+ "account disabled",
1368
+ "insufficient quota",
1369
+ "quota exceeded",
1370
+ "billing",
1371
+ "credit",
1372
+ "payment required",
1373
+ "model not found",
1374
+ "unknown model",
1375
+ "unsupported model",
1376
+ "not allowed"
1377
+ ];
1378
+ if (hardFailureSignals.some((signal) => normalized.includes(signal))) {
1379
+ return false;
1380
+ }
1381
+ return false;
1382
+ }
1383
+ toProviderFailure(error) {
1384
+ if (error instanceof EmbeddingProviderRequestError) {
1385
+ return error;
1386
+ }
1387
+ if (error instanceof ProviderRateLimitExceededError) {
1388
+ return new EmbeddingProviderRequestError("client_rate_limited", false, `${error.message}; retry_after_ms=${error.retry_after_ms}`);
1389
+ }
1390
+ if (error instanceof RetrievalError) {
1391
+ if (error.code === "RATE_LIMITED") {
1392
+ return new EmbeddingProviderRequestError("client_rate_limited", false, error.message);
1393
+ }
1394
+ return new EmbeddingProviderRequestError("upstream_failure", false, error.message);
1395
+ }
1396
+ if (error instanceof Error) {
1397
+ return new EmbeddingProviderRequestError("unknown_error", false, error.message);
1398
+ }
1399
+ return new EmbeddingProviderRequestError("unknown_error", false, String(error));
1400
+ }
1401
+ }
1402
+ export class OpenAICompatibleRerankerProvider {
1403
+ endpoint;
1404
+ apiKey;
1405
+ model;
1406
+ timeoutMs;
1407
+ requestLimiter;
1408
+ requestLimitScope;
1409
+ maxRequestsPerMinute;
1410
+ rerankMaxWaitMs;
1411
+ observability;
1412
+ constructor(options) {
1413
+ const baseUrl = options.base_url.trim().replace(/\/+$/, "");
1414
+ if (baseUrl.length === 0) {
1415
+ throw new Error("invalid openai-compatible reranker config: base_url must be non-empty");
1416
+ }
1417
+ const apiKey = options.api_key.trim();
1418
+ if (apiKey.length === 0) {
1419
+ throw new Error("invalid openai-compatible reranker config: api_key must be non-empty");
1420
+ }
1421
+ this.endpoint = `${baseUrl}/rerank`;
1422
+ this.apiKey = apiKey;
1423
+ this.model = options.model?.trim() || DEFAULT_OPENAI_COMPATIBLE_RERANKER_MODEL;
1424
+ this.timeoutMs = options.timeout_ms ?? DEFAULT_OPENAI_COMPATIBLE_RERANKER_TIMEOUT_MS;
1425
+ this.requestLimiter = options.request_limiter;
1426
+ this.requestLimitScope = resolveProviderLimiterScope({
1427
+ provider: "openai_compatible",
1428
+ apiKey,
1429
+ overrideScopeId: options.request_limit_scope_id
1430
+ });
1431
+ this.maxRequestsPerMinute = options.max_requests_per_minute ?? DEFAULT_PROVIDER_MAX_REQUESTS_PER_MINUTE;
1432
+ this.rerankMaxWaitMs = options.rerank_max_wait_ms ?? DEFAULT_PROVIDER_LIMIT_RERANK_MAX_WAIT_MS;
1433
+ this.observability = options.observability ?? getObservability("retrieval-core");
1434
+ if (!Number.isInteger(this.timeoutMs) || this.timeoutMs <= 0) {
1435
+ throw new Error("invalid openai-compatible reranker config: timeout_ms must be a positive integer");
1436
+ }
1437
+ if (!Number.isInteger(this.maxRequestsPerMinute) || this.maxRequestsPerMinute <= 0) {
1438
+ throw new Error("invalid openai-compatible reranker config: max_requests_per_minute must be a positive integer");
1439
+ }
1440
+ if (!Number.isInteger(this.rerankMaxWaitMs) || this.rerankMaxWaitMs < 0) {
1441
+ throw new Error("invalid openai-compatible reranker config: rerank_max_wait_ms must be a non-negative integer");
1442
+ }
1443
+ }
1444
+ describe() {
1445
+ return {
1446
+ provider: "openai_compatible",
1447
+ model: this.model
1448
+ };
1449
+ }
1450
+ async rerank(input) {
1451
+ if (input.documents.length === 0) {
1452
+ return [];
1453
+ }
1454
+ await this.enforceRequestLimit();
1455
+ const topN = Math.max(1, Math.min(input.top_n, input.documents.length));
1456
+ const controller = new AbortController();
1457
+ const timeoutId = setTimeout(() => {
1458
+ controller.abort();
1459
+ }, this.timeoutMs);
1460
+ let response;
1461
+ try {
1462
+ response = await fetch(this.endpoint, {
1463
+ method: "POST",
1464
+ headers: {
1465
+ authorization: `Bearer ${this.apiKey}`,
1466
+ "content-type": "application/json"
1467
+ },
1468
+ body: JSON.stringify({
1469
+ model: this.model,
1470
+ query: input.query,
1471
+ documents: input.documents,
1472
+ top_n: topN
1473
+ }),
1474
+ signal: controller.signal
1475
+ });
1476
+ }
1477
+ catch (error) {
1478
+ if (error && typeof error === "object" && "name" in error && error.name === "AbortError") {
1479
+ throw new RerankerProviderRequestError("timeout", `request timed out after ${this.timeoutMs}ms`);
1480
+ }
1481
+ throw new RerankerProviderRequestError("network_error", error instanceof Error ? error.message : String(error));
1482
+ }
1483
+ finally {
1484
+ clearTimeout(timeoutId);
1485
+ }
1486
+ if (!response.ok) {
1487
+ const details = await safeResponseText(response);
1488
+ if (response.status === 429) {
1489
+ throw new RerankerProviderRequestError("rate_limited", `HTTP 429 ${details}`.trim());
1490
+ }
1491
+ if (response.status === 401 || response.status === 403) {
1492
+ throw new RerankerProviderRequestError("auth_error", `HTTP ${response.status} ${details}`.trim());
1493
+ }
1494
+ if (response.status === 404) {
1495
+ throw new RerankerProviderRequestError("endpoint_not_found", `HTTP 404 ${details}`.trim());
1496
+ }
1497
+ if (response.status >= 500) {
1498
+ throw new RerankerProviderRequestError("http_5xx", `HTTP ${response.status} ${details}`.trim());
1499
+ }
1500
+ throw new RerankerProviderRequestError("http_4xx", `HTTP ${response.status} ${details}`.trim());
1501
+ }
1502
+ let payload;
1503
+ try {
1504
+ payload = await response.json();
1505
+ }
1506
+ catch {
1507
+ throw new RerankerProviderRequestError("invalid_json", "provider returned non-JSON response");
1508
+ }
1509
+ if (!payload || typeof payload !== "object") {
1510
+ throw new RerankerProviderRequestError("invalid_response", "provider response must be an object");
1511
+ }
1512
+ const maybeResults = "results" in payload ? payload.results : payload.data;
1513
+ if (!Array.isArray(maybeResults)) {
1514
+ throw new RerankerProviderRequestError("invalid_response", "provider response missing results array");
1515
+ }
1516
+ const output = [];
1517
+ for (const row of maybeResults) {
1518
+ if (!row || typeof row !== "object") {
1519
+ throw new RerankerProviderRequestError("invalid_response", "rerank row must be an object");
1520
+ }
1521
+ const rawIndex = row.index;
1522
+ if (!Number.isInteger(rawIndex)) {
1523
+ throw new RerankerProviderRequestError("invalid_response", "rerank row index must be an integer");
1524
+ }
1525
+ const index = rawIndex;
1526
+ if (index < 0 || index >= input.documents.length) {
1527
+ throw new RerankerProviderRequestError("invalid_response", "rerank row index out of range");
1528
+ }
1529
+ const rawScore = row.relevance_score ?? row.score;
1530
+ if (typeof rawScore !== "number" || !Number.isFinite(rawScore)) {
1531
+ throw new RerankerProviderRequestError("invalid_response", "rerank row score must be finite");
1532
+ }
1533
+ output.push({
1534
+ index,
1535
+ relevance_score: rawScore
1536
+ });
1537
+ }
1538
+ const seen = new Set();
1539
+ const ordered = [...output]
1540
+ .sort((a, b) => b.relevance_score - a.relevance_score || a.index - b.index)
1541
+ .filter((row) => {
1542
+ if (seen.has(row.index)) {
1543
+ return false;
1544
+ }
1545
+ seen.add(row.index);
1546
+ return true;
1547
+ })
1548
+ .slice(0, topN);
1549
+ if (ordered.length === 0) {
1550
+ throw new RerankerProviderRequestError("invalid_response", "provider returned zero rerank results");
1551
+ }
1552
+ return ordered;
1553
+ }
1554
+ async enforceRequestLimit() {
1555
+ if (!this.requestLimiter) {
1556
+ return;
1557
+ }
1558
+ const labels = {
1559
+ provider: "openai_compatible",
1560
+ model: this.model,
1561
+ purpose: "rerank",
1562
+ limiter_mode: this.requestLimiter.mode ?? "custom"
1563
+ };
1564
+ try {
1565
+ const acquired = await this.requestLimiter.acquire({
1566
+ scope: this.requestLimitScope,
1567
+ max_requests_per_minute: this.maxRequestsPerMinute,
1568
+ max_wait_ms: this.rerankMaxWaitMs
1569
+ });
1570
+ this.observability.metrics.observe("retrieval_provider_limiter_wait_ms", acquired.wait_ms, labels);
1571
+ this.observability.metrics.increment("retrieval_provider_requests_shaped_total", 1, labels);
1572
+ }
1573
+ catch (error) {
1574
+ this.observability.metrics.increment("retrieval_provider_limiter_block_total", 1, {
1575
+ ...labels,
1576
+ reason: "wait_timeout"
1577
+ });
1578
+ if (error instanceof ProviderRateLimitExceededError) {
1579
+ throw new RerankerProviderRequestError("rate_limited", `${error.message}; retry_after_ms=${error.retry_after_ms}`);
1580
+ }
1581
+ throw error;
1582
+ }
1583
+ }
1584
+ }
1585
+ function buildClaudeEnhancerSystemInstruction(language, style) {
1586
+ const languageRule = language === "zh"
1587
+ ? "Output language must be Simplified Chinese."
1588
+ : language === "es"
1589
+ ? "Output language must be Spanish."
1590
+ : "Output language must be English.";
1591
+ const styleRule = style === "lean"
1592
+ ? "Style is lean: keep the response compact (roughly 90-180 words), avoid extra headings, and include only essential steps."
1593
+ : style === "deep"
1594
+ ? "Style is deep: provide comprehensive but grounded guidance (roughly 260-420 words) with concrete constraints, edge cases, and validation."
1595
+ : "Style is standard: provide balanced depth (roughly 160-300 words) with clear scope, steps, and validation.";
1596
+ return [
1597
+ "You are a high-precision prompt enhancement agent for software engineering tasks.",
1598
+ languageRule,
1599
+ styleRule,
1600
+ "Return plain text only: the final enhanced prompt.",
1601
+ "Do not include markdown code fences.",
1602
+ "Preserve user intent exactly; do not add unrelated features.",
1603
+ "Do not invent file paths or symbols that are not present in provided context.",
1604
+ "Produce concise execution-ready prompts, not long generic templates.",
1605
+ "Prefer practical sections only: objective, scoped constraints, codebase anchors, implementation steps, validation.",
1606
+ "Use concrete file/symbol anchors when context exists.",
1607
+ "Avoid repeating generic process advice, broad deliverables lists, or organizational boilerplate."
1608
+ ].join(" ");
1609
+ }
1610
+ function normalizeEnhancerContextPath(path) {
1611
+ return normalizePath(path).toLowerCase();
1612
+ }
1613
+ function looksLikeEnhancerConventionsFile(path) {
1614
+ const normalized = normalizeEnhancerContextPath(path);
1615
+ return (normalized === "agents.md" ||
1616
+ normalized.endsWith("/agents.md") ||
1617
+ normalized === "claude.md" ||
1618
+ normalized.endsWith("/claude.md") ||
1619
+ normalized === "readme.md" ||
1620
+ normalized.endsWith("/readme.md") ||
1621
+ normalized === "contributing.md" ||
1622
+ normalized.endsWith("/contributing.md"));
1623
+ }
1624
+ function extractProjectConventionsFromEnhancerContext(snippets) {
1625
+ const candidateSnippets = snippets.filter((snippet) => looksLikeEnhancerConventionsFile(snippet.path));
1626
+ if (candidateSnippets.length === 0) {
1627
+ return [];
1628
+ }
1629
+ const signalPattern = /\b(always|never|must|should|avoid|prefer|preserve|keep|strict|isolation|tenant|workspace|contract|schema|backward|compatibility|regression|test|typecheck|bun)\b/i;
1630
+ const out = [];
1631
+ const seen = new Set();
1632
+ for (const snippet of candidateSnippets) {
1633
+ const lines = snippet.snippet.split(/\r?\n/u);
1634
+ for (const rawLine of lines) {
1635
+ const cleaned = rawLine
1636
+ .replace(/^\s*[-*+]\s+/u, "")
1637
+ .replace(/^\s*\d+\.\s+/u, "")
1638
+ .trim();
1639
+ if (cleaned.length < 16 || cleaned.length > 180) {
1640
+ continue;
1641
+ }
1642
+ if (!signalPattern.test(cleaned)) {
1643
+ continue;
1644
+ }
1645
+ if (/^(import|export|const|let|var|if|for|while|return)\b/i.test(cleaned)) {
1646
+ continue;
1647
+ }
1648
+ const normalized = cleaned.toLowerCase();
1649
+ if (seen.has(normalized)) {
1650
+ continue;
1651
+ }
1652
+ seen.add(normalized);
1653
+ out.push(cleaned);
1654
+ if (out.length >= 8) {
1655
+ return out;
1656
+ }
1657
+ }
1658
+ }
1659
+ return out;
1660
+ }
1661
+ function extractEnhancerNonNegotiables(input) {
1662
+ const combined = `${input.prompt}\n${input.history.map((entry) => entry.content).join("\n")}`;
1663
+ const lower = combined.toLowerCase();
1664
+ const out = [];
1665
+ const add = (value) => {
1666
+ if (!out.includes(value)) {
1667
+ out.push(value);
1668
+ }
1669
+ };
1670
+ if (/keep (?:behavior|behaviour) stable|preserve (?:existing )?(?:behavior|behaviour)|backward.?compat|no breaking changes|without breaking/i.test(lower)) {
1671
+ add("Preserve existing behavior and avoid breaking API/contract semantics.");
1672
+ }
1673
+ if (/regression tests?|add tests?|test coverage|boundary tests?/i.test(lower)) {
1674
+ add("Include regression tests for any changed behavior.");
1675
+ }
1676
+ if (/tenant|workspace|authorization|auth boundaries?|scope enforcement|isolation/i.test(lower)) {
1677
+ add("Maintain strict tenant/workspace isolation and authorization boundaries.");
1678
+ }
1679
+ if (/no docs|avoid docs|exclude docs/i.test(lower)) {
1680
+ add("Do not prioritize documentation-only changes unless explicitly requested.");
1681
+ }
1682
+ if (/no refactor|minimal changes?|smallest safe change/i.test(lower)) {
1683
+ add("Prefer the smallest safe change set.");
1684
+ }
1685
+ return out.slice(0, 6);
1686
+ }
1687
+ function buildEnhancerOutputContract(input) {
1688
+ const isConceptual = input.query_intent === "conceptual";
1689
+ if (input.style === "lean") {
1690
+ if (input.intent === "tests") {
1691
+ return {
1692
+ target_style: "lean_test_plan",
1693
+ max_words: input.has_context ? 220 : 170,
1694
+ preferred_sections: ["Goal", "Key test cases", "Validation"],
1695
+ avoid_patterns: ["long checklists", "broad architecture proposals", "generic deliverables blocks"]
1696
+ };
1697
+ }
1698
+ if (input.intent === "docs" || isConceptual) {
1699
+ return {
1700
+ target_style: "lean_spec",
1701
+ max_words: input.has_context ? 220 : 170,
1702
+ preferred_sections: ["Goal", "Scope", "Validation"],
1703
+ avoid_patterns: ["verbose outlines", "boilerplate context blocks", "generic process advice"]
1704
+ };
1705
+ }
1706
+ return {
1707
+ target_style: "lean_implementation_plan",
1708
+ max_words: input.has_context ? 230 : 180,
1709
+ preferred_sections: ["Goal", "Constraints", "Action steps", "Validation"],
1710
+ avoid_patterns: ["deep background sections", "broad deliverables lists", "repeated boilerplate"]
1711
+ };
1712
+ }
1713
+ if (input.style === "deep") {
1714
+ if (input.intent === "tests") {
1715
+ return {
1716
+ target_style: "deep_test_plan",
1717
+ max_words: input.has_context ? 420 : 340,
1718
+ preferred_sections: ["Goal", "Behavior under test", "Test matrix", "Edge cases", "Validation"],
1719
+ avoid_patterns: ["vague test advice", "non-test deliverables", "ungrounded file guesses"]
1720
+ };
1721
+ }
1722
+ if (input.intent === "docs" || isConceptual) {
1723
+ return {
1724
+ target_style: "deep_spec",
1725
+ max_words: input.has_context ? 420 : 340,
1726
+ preferred_sections: ["Goal", "Scope", "Relevant sources", "Proposed outline", "Risks", "Validation"],
1727
+ avoid_patterns: ["implementation-only checklists", "generic organizational boilerplate", "speculation"]
1728
+ };
1729
+ }
1730
+ return {
1731
+ target_style: "deep_implementation_plan",
1732
+ max_words: input.has_context ? 420 : 360,
1733
+ preferred_sections: [
1734
+ "Goal",
1735
+ "Scope and constraints",
1736
+ "Codebase anchors",
1737
+ "Implementation plan",
1738
+ "Edge cases",
1739
+ "Validation"
1740
+ ],
1741
+ avoid_patterns: ["security theater", "repeated compliance boilerplate", "invented file/symbol references"]
1742
+ };
1743
+ }
1744
+ if (input.intent === "docs" || isConceptual) {
1745
+ return {
1746
+ target_style: "concise_spec",
1747
+ max_words: input.has_context ? 320 : 260,
1748
+ preferred_sections: ["Goal", "Scope", "Relevant sources", "Proposed outline", "Validation"],
1749
+ avoid_patterns: ["long implementation checklists", "generic deliverables sections", "repeated boilerplate"]
1750
+ };
1751
+ }
1752
+ if (input.intent === "tests") {
1753
+ return {
1754
+ target_style: "test_plan",
1755
+ max_words: input.has_context ? 320 : 260,
1756
+ preferred_sections: ["Goal", "Behavior under test", "Test matrix", "Implementation notes", "Validation"],
1757
+ avoid_patterns: ["broad architecture rewrites", "non-test deliverables", "generic process bullets"]
1758
+ };
1759
+ }
1760
+ return {
1761
+ target_style: "implementation_plan",
1762
+ max_words: input.has_context ? 360 : 300,
1763
+ preferred_sections: ["Goal", "Scope and constraints", "Codebase anchors", "Implementation plan", "Validation"],
1764
+ avoid_patterns: ["broad security theater", "repeated compliance boilerplate", "vague deliverables lists"]
1765
+ };
1766
+ }
1767
+ function buildClaudeEnhancerUserPayload(input) {
1768
+ const projectConventions = extractProjectConventionsFromEnhancerContext(input.context_snippets);
1769
+ const outputContract = buildEnhancerOutputContract({
1770
+ style: input.style_resolved,
1771
+ intent: input.intent,
1772
+ query_intent: input.query_intent,
1773
+ has_context: input.context_refs.length > 0
1774
+ });
1775
+ const nonNegotiables = extractEnhancerNonNegotiables({
1776
+ prompt: input.request.prompt,
1777
+ history: input.request.conversation_history
1778
+ });
1779
+ const payload = {
1780
+ trace_id: input.trace_id,
1781
+ tenant_id: input.tenant_id,
1782
+ workspace_id: input.workspace_id ?? "none",
1783
+ tool_mode: input.tool_mode,
1784
+ style_requested: input.style_requested,
1785
+ style_resolved: input.style_resolved,
1786
+ intent: input.intent,
1787
+ query_intent: input.query_intent,
1788
+ language: input.language,
1789
+ original_prompt: input.request.prompt,
1790
+ conversation_history: input.request.conversation_history,
1791
+ context_refs: input.context_refs,
1792
+ context_snippets: input.context_snippets.map((snippet) => ({
1793
+ path: snippet.path,
1794
+ start_line: snippet.start_line,
1795
+ end_line: snippet.end_line,
1796
+ reason: snippet.reason,
1797
+ score: Number(snippet.score.toFixed(4)),
1798
+ snippet: snippet.snippet
1799
+ })),
1800
+ output_contract: outputContract,
1801
+ non_negotiables: nonNegotiables,
1802
+ project_conventions: projectConventions
1803
+ };
1804
+ return [
1805
+ "Enhance the following request into a concise, implementation-ready prompt.",
1806
+ "Prioritize user intent fidelity, concrete repo anchors, and verifiable validation steps.",
1807
+ "Honor the requested enhancement style while avoiding invented details.",
1808
+ "Input JSON:",
1809
+ JSON.stringify(payload, null, 2)
1810
+ ].join("\n");
1811
+ }
1812
+ function removeEnhancerCodeFences(text) {
1813
+ return text.trim().replace(/^```(?:json|markdown|md)?\s*/iu, "").replace(/\s*```$/u, "").trim();
1814
+ }
1815
+ function normalizeProviderEnhancedPrompt(text) {
1816
+ let normalized = removeEnhancerCodeFences(text).replace(/\r\n/g, "\n");
1817
+ normalized = normalized
1818
+ .split("\n")
1819
+ .map((line) => line.replace(/[ \t]+$/u, ""))
1820
+ .join("\n")
1821
+ .replace(/\n{3,}/g, "\n\n")
1822
+ .trim();
1823
+ if (!normalized) {
1824
+ return normalized;
1825
+ }
1826
+ try {
1827
+ const payload = JSON.parse(normalized);
1828
+ if (payload && typeof payload === "object" && typeof payload.enhanced_prompt === "string") {
1829
+ return payload.enhanced_prompt.trim();
1830
+ }
1831
+ }
1832
+ catch {
1833
+ return normalized;
1834
+ }
1835
+ return normalized;
1836
+ }
1837
+ let cachedClaudeAgentSdkQueryFn;
1838
+ function isRecord(value) {
1839
+ return Boolean(value) && typeof value === "object" && !Array.isArray(value);
1840
+ }
1841
+ async function loadClaudeAgentSdkQueryFn() {
1842
+ if (cachedClaudeAgentSdkQueryFn) {
1843
+ return cachedClaudeAgentSdkQueryFn;
1844
+ }
1845
+ const moduleNames = ["@anthropic-ai/claude-agent-sdk", "@anthropic-ai/claude-code"];
1846
+ let lastError;
1847
+ for (const moduleName of moduleNames) {
1848
+ try {
1849
+ const sdkModule = (await import(moduleName));
1850
+ if (typeof sdkModule.query === "function") {
1851
+ cachedClaudeAgentSdkQueryFn = sdkModule.query;
1852
+ return cachedClaudeAgentSdkQueryFn;
1853
+ }
1854
+ lastError = new Error(`${moduleName} does not export query()`);
1855
+ }
1856
+ catch (error) {
1857
+ lastError = error;
1858
+ }
1859
+ }
1860
+ const reason = lastError instanceof Error ? lastError.message : String(lastError ?? "unknown error");
1861
+ throw new EnhancerProviderRequestError("upstream_error", `claude agent sdk is not available; install @anthropic-ai/claude-agent-sdk (${reason})`);
1862
+ }
1863
+ function extractTextFromClaudeMessageContent(content) {
1864
+ if (typeof content === "string") {
1865
+ const trimmed = content.trim();
1866
+ return trimmed.length > 0 ? trimmed : undefined;
1867
+ }
1868
+ if (!Array.isArray(content)) {
1869
+ return undefined;
1870
+ }
1871
+ const parts = [];
1872
+ for (const item of content) {
1873
+ if (!isRecord(item)) {
1874
+ continue;
1875
+ }
1876
+ const text = item.text;
1877
+ if (typeof text !== "string") {
1878
+ continue;
1879
+ }
1880
+ const trimmed = text.trim();
1881
+ if (trimmed.length > 0) {
1882
+ parts.push(trimmed);
1883
+ }
1884
+ }
1885
+ if (parts.length === 0) {
1886
+ return undefined;
1887
+ }
1888
+ return parts.join("\n");
1889
+ }
1890
+ function extractTextFromClaudeSdkMessage(message) {
1891
+ if (!isRecord(message)) {
1892
+ return undefined;
1893
+ }
1894
+ if (typeof message.summary === "string") {
1895
+ const trimmed = message.summary.trim();
1896
+ if (trimmed.length > 0) {
1897
+ return trimmed;
1898
+ }
1899
+ }
1900
+ if (typeof message.result === "string") {
1901
+ const trimmed = message.result.trim();
1902
+ if (trimmed.length > 0) {
1903
+ return trimmed;
1904
+ }
1905
+ }
1906
+ if (typeof message.text === "string") {
1907
+ const trimmed = message.text.trim();
1908
+ if (trimmed.length > 0) {
1909
+ return trimmed;
1910
+ }
1911
+ }
1912
+ const directContent = extractTextFromClaudeMessageContent(message.content);
1913
+ if (directContent) {
1914
+ return directContent;
1915
+ }
1916
+ if (isRecord(message.message)) {
1917
+ if (typeof message.message.text === "string") {
1918
+ const trimmed = message.message.text.trim();
1919
+ if (trimmed.length > 0) {
1920
+ return trimmed;
1921
+ }
1922
+ }
1923
+ const nestedContent = extractTextFromClaudeMessageContent(message.message.content);
1924
+ if (nestedContent) {
1925
+ return nestedContent;
1926
+ }
1927
+ }
1928
+ return undefined;
1929
+ }
1930
+ function extractTextChunkFromClaudeSdkStreamEvent(message) {
1931
+ if (!isRecord(message) || message.type !== "stream_event") {
1932
+ return undefined;
1933
+ }
1934
+ const event = message.event;
1935
+ if (!isRecord(event)) {
1936
+ return undefined;
1937
+ }
1938
+ if (event.type === "content_block_start") {
1939
+ const contentBlock = event.content_block;
1940
+ if (isRecord(contentBlock) && typeof contentBlock.text === "string") {
1941
+ return contentBlock.text;
1942
+ }
1943
+ }
1944
+ if (event.type === "content_block_delta") {
1945
+ const delta = event.delta;
1946
+ if (!isRecord(delta)) {
1947
+ return undefined;
1948
+ }
1949
+ if (typeof delta.text === "string") {
1950
+ return delta.text;
1951
+ }
1952
+ }
1953
+ return undefined;
1954
+ }
1955
+ function extractStructuredOutputFromClaudeSdkMessage(message) {
1956
+ if (!isRecord(message)) {
1957
+ return undefined;
1958
+ }
1959
+ const structuredOutput = message.structured_output;
1960
+ if (!isRecord(structuredOutput)) {
1961
+ return undefined;
1962
+ }
1963
+ const enhancedPrompt = structuredOutput.enhanced_prompt;
1964
+ if (typeof enhancedPrompt !== "string" || enhancedPrompt.trim().length === 0) {
1965
+ return undefined;
1966
+ }
1967
+ return {
1968
+ enhanced_prompt: enhancedPrompt.trim()
1969
+ };
1970
+ }
1971
+ function extractResultFailureFromClaudeSdkMessage(message) {
1972
+ if (!isRecord(message) || message.type !== "result") {
1973
+ return undefined;
1974
+ }
1975
+ const subtype = message.subtype;
1976
+ if (typeof subtype !== "string" || subtype === "success") {
1977
+ return undefined;
1978
+ }
1979
+ const rawErrors = Array.isArray(message.errors) ? message.errors : [];
1980
+ const errors = rawErrors
1981
+ .filter((entry) => typeof entry === "string")
1982
+ .map((entry) => entry.trim())
1983
+ .filter((entry) => entry.length > 0);
1984
+ return {
1985
+ subtype,
1986
+ errors
1987
+ };
1988
+ }
1989
+ function describeClaudeSdkMessage(message) {
1990
+ if (!isRecord(message)) {
1991
+ return typeof message;
1992
+ }
1993
+ const type = typeof message.type === "string" ? message.type : "unknown";
1994
+ const subtype = typeof message.subtype === "string" ? message.subtype : undefined;
1995
+ return subtype ? `${type}:${subtype}` : type;
1996
+ }
1997
+ function classifyEnhancerProviderError(error) {
1998
+ if (error instanceof EnhancerProviderRequestError) {
1999
+ return error;
2000
+ }
2001
+ if (error instanceof Error) {
2002
+ const message = error.message || "unknown enhancer provider error";
2003
+ if (/(timeout|timed out|abort)/i.test(message)) {
2004
+ return new EnhancerProviderRequestError("timeout", message);
2005
+ }
2006
+ if (/(rate.?limit|too many requests|429)/i.test(message)) {
2007
+ return new EnhancerProviderRequestError("rate_limited", message);
2008
+ }
2009
+ if (/(no such file|not found|ENOENT)/i.test(message) && /claude/i.test(message)) {
2010
+ return new EnhancerProviderRequestError("upstream_error", `claude code executable not found: ${message}`);
2011
+ }
2012
+ return new EnhancerProviderRequestError("upstream_error", message);
2013
+ }
2014
+ return new EnhancerProviderRequestError("upstream_error", String(error));
2015
+ }
2016
+ export class ClaudeAgentEnhancerProvider {
2017
+ apiKey;
2018
+ model;
2019
+ maxTokens;
2020
+ baseUrl;
2021
+ pathToClaudeCodeExecutable;
2022
+ permissionMode;
2023
+ constructor(options) {
2024
+ const apiKey = options.api_key.trim();
2025
+ if (apiKey.length === 0) {
2026
+ throw new Error("invalid claude enhancer config: api_key must be non-empty");
2027
+ }
2028
+ const model = options.model?.trim() ?? DEFAULT_CLAUDE_ENHANCER_MODEL;
2029
+ if (model.length === 0) {
2030
+ throw new Error("invalid claude enhancer config: model must be non-empty");
2031
+ }
2032
+ const maxTokens = options.max_tokens ?? 1_200;
2033
+ if (!Number.isInteger(maxTokens) || maxTokens <= 0) {
2034
+ throw new Error("invalid claude enhancer config: max_tokens must be a positive integer");
2035
+ }
2036
+ const permissionMode = options.permission_mode ?? "default";
2037
+ if (permissionMode !== "default" &&
2038
+ permissionMode !== "acceptEdits" &&
2039
+ permissionMode !== "bypassPermissions" &&
2040
+ permissionMode !== "plan") {
2041
+ throw new Error("invalid claude enhancer config: permission_mode must be default|acceptEdits|bypassPermissions|plan");
2042
+ }
2043
+ this.apiKey = apiKey;
2044
+ this.model = model;
2045
+ this.maxTokens = maxTokens;
2046
+ this.baseUrl = options.base_url?.trim();
2047
+ const executablePath = options.path_to_claude_code_executable?.trim();
2048
+ this.pathToClaudeCodeExecutable = executablePath && executablePath.length > 0 ? executablePath : undefined;
2049
+ this.permissionMode = permissionMode;
2050
+ }
2051
+ describe() {
2052
+ return {
2053
+ provider: "claude_agent",
2054
+ model: this.model
2055
+ };
2056
+ }
2057
+ async generate(input) {
2058
+ const query = await loadClaudeAgentSdkQueryFn();
2059
+ const prompt = buildClaudeEnhancerUserPayload(input);
2060
+ const abortController = new AbortController();
2061
+ const upstreamAbortSignal = input.abort_signal;
2062
+ const upstreamAbortHandler = () => {
2063
+ abortController.abort();
2064
+ };
2065
+ if (upstreamAbortSignal) {
2066
+ if (upstreamAbortSignal.aborted) {
2067
+ abortController.abort();
2068
+ }
2069
+ else {
2070
+ upstreamAbortSignal.addEventListener("abort", upstreamAbortHandler, { once: true });
2071
+ }
2072
+ }
2073
+ const options = {
2074
+ model: this.model,
2075
+ maxThinkingTokens: this.maxTokens,
2076
+ maxTurns: DEFAULT_CLAUDE_ENHANCER_MAX_TURNS,
2077
+ includePartialMessages: true,
2078
+ thinking: {
2079
+ type: "disabled"
2080
+ },
2081
+ permissionMode: this.permissionMode,
2082
+ systemPrompt: buildClaudeEnhancerSystemInstruction(input.language, input.style_resolved),
2083
+ // Enhancer already receives scoped context snippets; keep Claude Code tools disabled to avoid long tool loops.
2084
+ tools: [],
2085
+ allowedTools: [],
2086
+ env: {
2087
+ ANTHROPIC_API_KEY: this.apiKey,
2088
+ ...(this.baseUrl ? { ANTHROPIC_BASE_URL: this.baseUrl } : {})
2089
+ },
2090
+ abortController,
2091
+ ...(this.pathToClaudeCodeExecutable ? { pathToClaudeCodeExecutable: this.pathToClaudeCodeExecutable } : {}),
2092
+ ...(input.request.project_root_path ? { cwd: input.request.project_root_path } : {})
2093
+ };
2094
+ let structured;
2095
+ let lastText;
2096
+ const streamTextParts = [];
2097
+ const seenMessageKinds = new Set();
2098
+ let maxTurnsFailure;
2099
+ try {
2100
+ for await (const message of query({ prompt, options })) {
2101
+ input.on_progress?.();
2102
+ seenMessageKinds.add(describeClaudeSdkMessage(message));
2103
+ const partialChunk = extractTextChunkFromClaudeSdkStreamEvent(message);
2104
+ if (typeof partialChunk === "string" && partialChunk.length > 0) {
2105
+ streamTextParts.push(partialChunk);
2106
+ }
2107
+ const resultFailure = extractResultFailureFromClaudeSdkMessage(message);
2108
+ if (resultFailure) {
2109
+ if (resultFailure.subtype === "error_max_turns") {
2110
+ maxTurnsFailure = resultFailure;
2111
+ continue;
2112
+ }
2113
+ const details = resultFailure.errors.length > 0 ? `: ${resultFailure.errors.join(" | ")}` : "";
2114
+ throw new EnhancerProviderRequestError("upstream_error", `claude agent sdk result error (${resultFailure.subtype})${details}`);
2115
+ }
2116
+ const maybeStructured = extractStructuredOutputFromClaudeSdkMessage(message);
2117
+ if (maybeStructured) {
2118
+ structured = maybeStructured;
2119
+ }
2120
+ const maybeText = extractTextFromClaudeSdkMessage(message);
2121
+ if (maybeText) {
2122
+ lastText = maybeText;
2123
+ }
2124
+ if (isRecord(message) && message.type === "assistant" && typeof message.error === "string") {
2125
+ throw new EnhancerProviderRequestError("upstream_error", `claude agent sdk assistant error: ${message.error}`);
2126
+ }
2127
+ }
2128
+ }
2129
+ catch (error) {
2130
+ throw classifyEnhancerProviderError(error);
2131
+ }
2132
+ finally {
2133
+ if (upstreamAbortSignal) {
2134
+ upstreamAbortSignal.removeEventListener("abort", upstreamAbortHandler);
2135
+ }
945
2136
  }
946
- return vectors;
947
- }
948
- retryDelayMs(attempt) {
949
- const base = 100 * (attempt + 1);
950
- const jitter = Math.floor(Math.random() * 75);
951
- return base + jitter;
952
- }
953
- toProviderFailure(error) {
954
- if (error instanceof EmbeddingProviderRequestError) {
955
- return error;
2137
+ if (structured) {
2138
+ return structured;
956
2139
  }
957
- if (error instanceof RetrievalError) {
958
- return new EmbeddingProviderRequestError("upstream_failure", false, error.message);
2140
+ if (!lastText && streamTextParts.length > 0) {
2141
+ lastText = streamTextParts.join("").trim();
959
2142
  }
960
- if (error instanceof Error) {
961
- return new EmbeddingProviderRequestError("unknown_error", false, error.message);
2143
+ if (maxTurnsFailure && !lastText) {
2144
+ const details = maxTurnsFailure.errors.length > 0 ? `: ${maxTurnsFailure.errors.join(" | ")}` : "";
2145
+ throw new EnhancerProviderRequestError("upstream_error", `claude agent sdk hit max turns before returning output${details}`);
962
2146
  }
963
- return new EmbeddingProviderRequestError("unknown_error", false, String(error));
2147
+ if (!lastText) {
2148
+ const seenKinds = [...seenMessageKinds].join(", ") || "none";
2149
+ throw new EnhancerProviderRequestError("invalid_response", `claude agent sdk returned no text output (messages=${seenKinds})`);
2150
+ }
2151
+ return { enhanced_prompt: normalizeProviderEnhancedPrompt(lastText) };
964
2152
  }
965
2153
  }
966
2154
  async function safeResponseText(response) {
@@ -972,6 +2160,31 @@ async function safeResponseText(response) {
972
2160
  return "";
973
2161
  }
974
2162
  }
2163
+ function parseRetryAfterMs(headerValue) {
2164
+ if (!headerValue) {
2165
+ return undefined;
2166
+ }
2167
+ const trimmed = headerValue.trim();
2168
+ if (!trimmed) {
2169
+ return undefined;
2170
+ }
2171
+ const seconds = Number(trimmed);
2172
+ if (Number.isFinite(seconds) && seconds >= 0) {
2173
+ return Math.ceil(seconds * 1000);
2174
+ }
2175
+ const dateMs = Date.parse(trimmed);
2176
+ if (!Number.isNaN(dateMs)) {
2177
+ return Math.max(0, dateMs - Date.now());
2178
+ }
2179
+ return undefined;
2180
+ }
2181
+ function resolveProviderLimiterScope(input) {
2182
+ const override = input.overrideScopeId?.trim();
2183
+ if (override) {
2184
+ return `provider:${input.provider}|credential:${override}`;
2185
+ }
2186
+ return `provider:${input.provider}|credential:${sha256(input.apiKey).slice(0, 16)}`;
2187
+ }
975
2188
  function resolveEmbeddingDescriptor(provider) {
976
2189
  const described = provider.describe?.();
977
2190
  if (!described) {
@@ -987,6 +2200,30 @@ function resolveEmbeddingDescriptor(provider) {
987
2200
  ...(described.version ? { version: described.version } : {})
988
2201
  };
989
2202
  }
2203
+ function resolveRerankerDescriptor(provider) {
2204
+ const described = provider.describe?.();
2205
+ if (!described) {
2206
+ return {
2207
+ provider: "custom"
2208
+ };
2209
+ }
2210
+ return {
2211
+ provider: described.provider,
2212
+ ...(described.model ? { model: described.model } : {})
2213
+ };
2214
+ }
2215
+ function resolveEnhancerProviderDescriptor(provider) {
2216
+ const described = provider.describe?.();
2217
+ if (!described) {
2218
+ return {
2219
+ provider: "custom"
2220
+ };
2221
+ }
2222
+ return {
2223
+ provider: described.provider,
2224
+ ...(described.model ? { model: described.model } : {})
2225
+ };
2226
+ }
990
2227
  function normalizeEmbeddingDescriptor(descriptor) {
991
2228
  const provider = descriptor.provider.trim();
992
2229
  if (provider.length === 0) {
@@ -1002,6 +2239,70 @@ function normalizeEmbeddingDescriptor(descriptor) {
1002
2239
  ...(descriptor.version ? { version: descriptor.version.trim() } : {})
1003
2240
  };
1004
2241
  }
2242
+ function normalizeRerankerDescriptor(descriptor) {
2243
+ const provider = descriptor.provider.trim().toLowerCase();
2244
+ if (provider.length === 0) {
2245
+ throw new Error("invalid reranker descriptor: provider must be non-empty");
2246
+ }
2247
+ const model = descriptor.model?.trim();
2248
+ return {
2249
+ provider,
2250
+ ...(model ? { model } : {})
2251
+ };
2252
+ }
2253
+ function normalizeEnhancerProviderDescriptor(descriptor) {
2254
+ const provider = descriptor.provider.trim().toLowerCase();
2255
+ if (provider.length === 0) {
2256
+ throw new Error("invalid enhancer descriptor: provider must be non-empty");
2257
+ }
2258
+ const model = descriptor.model?.trim();
2259
+ return {
2260
+ provider,
2261
+ ...(model ? { model } : {})
2262
+ };
2263
+ }
2264
+ function buildRerankerDocument(candidate) {
2265
+ return `${candidate.path}\n${candidate.snippet}`;
2266
+ }
2267
+ function classifyRerankerFailureReason(error) {
2268
+ if (error instanceof RerankerProviderRequestError) {
2269
+ if (error.reason === "timeout") {
2270
+ return "timeout";
2271
+ }
2272
+ if (error.reason === "rate_limited") {
2273
+ return "rate_limited";
2274
+ }
2275
+ if (error.reason === "invalid_json" || error.reason === "invalid_response") {
2276
+ return "schema_error";
2277
+ }
2278
+ return "upstream_error";
2279
+ }
2280
+ if (error instanceof Error) {
2281
+ if (/(rate.?limit|too many requests|429)/i.test(error.message)) {
2282
+ return "rate_limited";
2283
+ }
2284
+ if (/(timeout|timed out)/i.test(error.message)) {
2285
+ return "timeout";
2286
+ }
2287
+ return "upstream_error";
2288
+ }
2289
+ return "upstream_error";
2290
+ }
2291
+ function classifyEnhancerGenerationFailureReason(error) {
2292
+ if (error instanceof EnhancerProviderRequestError) {
2293
+ return error.reason;
2294
+ }
2295
+ if (error instanceof Error) {
2296
+ if (/(timeout|timed out)/i.test(error.message)) {
2297
+ return "timeout";
2298
+ }
2299
+ if (/(rate.?limit|too many requests|429)/i.test(error.message)) {
2300
+ return "rate_limited";
2301
+ }
2302
+ return "upstream_error";
2303
+ }
2304
+ return "upstream_error";
2305
+ }
1005
2306
  function classifyIntent(prompt) {
1006
2307
  const p = prompt.toLowerCase();
1007
2308
  if (/fix|bug|error|crash|regression/.test(p)) {
@@ -1021,6 +2322,38 @@ function classifyIntent(prompt) {
1021
2322
  }
1022
2323
  return "unknown";
1023
2324
  }
2325
+ function resolveEnhancerPromptStyle(input) {
2326
+ const requested = input.requested ?? "standard";
2327
+ if (requested !== "auto") {
2328
+ return {
2329
+ requested,
2330
+ resolved: requested
2331
+ };
2332
+ }
2333
+ const combined = `${input.prompt}\n${input.history.map((entry) => entry.content).join("\n")}`.trim();
2334
+ const words = tokenize(combined);
2335
+ const isShort = words.length <= 18 && input.history.length <= 1;
2336
+ const asksConcise = /\b(concise|brief|short|minimal|quick)\b/i.test(combined);
2337
+ const asksDepth = /\b(detailed|comprehensive|thorough|step-by-step|checklist)\b/i.test(combined);
2338
+ const highRisk = /\b(security|auth|authorization|tenant|workspace|migration|data loss|rollback|incident|compliance|backward)\b/i.test(combined);
2339
+ const complexityScore = Number(input.has_context) + Number(words.length >= 32) + Number(input.history.length >= 3);
2340
+ if (asksConcise || (isShort && !highRisk && !asksDepth)) {
2341
+ return {
2342
+ requested,
2343
+ resolved: "lean"
2344
+ };
2345
+ }
2346
+ if (asksDepth || highRisk || complexityScore >= 2 || input.query_intent === "symbol-heavy" || input.intent === "tests") {
2347
+ return {
2348
+ requested,
2349
+ resolved: "deep"
2350
+ };
2351
+ }
2352
+ return {
2353
+ requested,
2354
+ resolved: "standard"
2355
+ };
2356
+ }
1024
2357
  function detectDominantLanguage(prompt, history) {
1025
2358
  const latestUser = [...history].reverse().find((m) => m.role === "user")?.content ?? prompt;
1026
2359
  const sample = `${prompt}\n${latestUser}`.toLowerCase();
@@ -1464,7 +2797,7 @@ function buildEnhancerRetrievalQuery(prompt, history, options) {
1464
2797
  query_intent: queryIntent
1465
2798
  };
1466
2799
  }
1467
- const ENHANCER_LOW_CONFIDENCE_WARNING = "Low retrieval confidence; narrowed context refs and added clarification questions.";
2800
+ const ENHANCER_LOW_CONFIDENCE_WARNING = "Low retrieval confidence; narrowed context refs.";
1468
2801
  const ENHANCER_CONFIDENCE_OVERLAP_STOPWORDS = new Set([
1469
2802
  "a",
1470
2803
  "about",
@@ -1701,7 +3034,7 @@ function hasStrongEnhancerAnchorMatch(input) {
1701
3034
  const topScore = top[0]?.score ?? 0;
1702
3035
  const runnerUpScore = top[1]?.score ?? Number.NEGATIVE_INFINITY;
1703
3036
  const strongScoreMargin = top.length === 1 || topScore - runnerUpScore >= 0.08;
1704
- const hasTopExactSymbolMatch = top.some((result) => result.reason === "exact symbol match");
3037
+ const hasTopExactSymbolMatch = top.some((result) => isExactLiteralReason(result.reason));
1705
3038
  if (hasTopExactSymbolMatch && strongScoreMargin && topScore >= 0.55) {
1706
3039
  return true;
1707
3040
  }
@@ -1792,7 +3125,7 @@ function evaluateEnhancerConfidence(input) {
1792
3125
  if (diversityStrength < confidenceThreshold) {
1793
3126
  failedSignals.push("path_diversity");
1794
3127
  }
1795
- const strongSymbolOrPathSignal = top.some((result) => result.reason === "exact symbol match") && topOverlap >= 0.16;
3128
+ const strongSymbolOrPathSignal = top.some((result) => isExactLiteralReason(result.reason)) && topOverlap >= 0.16;
1796
3129
  const lowConfidence = !strongSymbolOrPathSignal && confidenceScore + 0.01 < confidenceThreshold;
1797
3130
  return {
1798
3131
  score_spread: scoreSpread,
@@ -1810,7 +3143,7 @@ function rankEnhancerResultsForConfidence(input) {
1810
3143
  const anchorScore = (result) => {
1811
3144
  const normalizedPath = normalizePath(result.path).toLowerCase();
1812
3145
  const normalizedSnippet = result.snippet.toLowerCase();
1813
- let score = result.reason === "exact symbol match" ? 2 : 0;
3146
+ let score = isExactLiteralReason(result.reason) ? 2 : 0;
1814
3147
  for (const anchor of anchors) {
1815
3148
  if (normalizedPath.includes(anchor)) {
1816
3149
  score += 2;
@@ -1869,6 +3202,7 @@ async function runWithTimeout(input) {
1869
3202
  return;
1870
3203
  }
1871
3204
  settled = true;
3205
+ input.on_timeout?.();
1872
3206
  reject(new Error(`timeout_after_${input.timeout_ms}ms`));
1873
3207
  }, input.timeout_ms);
1874
3208
  Promise.resolve()
@@ -1891,45 +3225,62 @@ async function runWithTimeout(input) {
1891
3225
  });
1892
3226
  });
1893
3227
  }
3228
+ async function runWithInactivityTimeout(input) {
3229
+ return await new Promise((resolve, reject) => {
3230
+ let settled = false;
3231
+ const abortController = new AbortController();
3232
+ let timer;
3233
+ const onTimeout = () => {
3234
+ if (settled) {
3235
+ return;
3236
+ }
3237
+ settled = true;
3238
+ abortController.abort();
3239
+ reject(new Error(`timeout_after_${input.timeout_ms}ms`));
3240
+ };
3241
+ const touch = () => {
3242
+ if (settled) {
3243
+ return;
3244
+ }
3245
+ if (timer) {
3246
+ clearTimeout(timer);
3247
+ }
3248
+ timer = setTimeout(onTimeout, input.timeout_ms);
3249
+ };
3250
+ touch();
3251
+ Promise.resolve()
3252
+ .then(() => input.fn({
3253
+ touch,
3254
+ signal: abortController.signal
3255
+ }))
3256
+ .then((value) => {
3257
+ if (settled) {
3258
+ return;
3259
+ }
3260
+ settled = true;
3261
+ if (timer) {
3262
+ clearTimeout(timer);
3263
+ }
3264
+ resolve(value);
3265
+ })
3266
+ .catch((error) => {
3267
+ if (settled) {
3268
+ return;
3269
+ }
3270
+ settled = true;
3271
+ if (timer) {
3272
+ clearTimeout(timer);
3273
+ }
3274
+ reject(error);
3275
+ });
3276
+ });
3277
+ }
1894
3278
  function deterministicEnhancerFallbackRanking(input) {
1895
3279
  const preferred = input.results.filter((result) => !isRiskyEnhancerPath(result.path, input.intent) && !shouldAvoidPathFromNegation(result.path, input.negative_preferences));
1896
3280
  const tolerated = input.results.filter((result) => !preferred.includes(result) && !shouldAvoidPathFromNegation(result.path, input.negative_preferences));
1897
3281
  const avoided = input.results.filter((result) => !preferred.includes(result) && !tolerated.includes(result));
1898
3282
  return [...preferred, ...tolerated, ...avoided];
1899
3283
  }
1900
- function localizeLowConfidenceQuestion(input) {
1901
- if (input.kind === "symbol") {
1902
- if (input.language === "es") {
1903
- return input.symbol
1904
- ? `¿Puedes confirmar si el cambio debe centrarse en el símbolo "${input.symbol}"?`
1905
- : "¿Qué función, clase o archivo exacto debe modificarse primero?";
1906
- }
1907
- if (input.language === "zh") {
1908
- return input.symbol
1909
- ? `请确认这次改动是否应优先围绕符号“${input.symbol}”展开?`
1910
- : "请明确首先要修改的函数、类或文件路径。";
1911
- }
1912
- return input.symbol
1913
- ? `Can you confirm whether "${input.symbol}" is the primary symbol to change?`
1914
- : "Which exact function, class, or file should be edited first?";
1915
- }
1916
- if (input.kind === "source_priority") {
1917
- if (input.language === "es") {
1918
- return "¿Debemos priorizar archivos de implementación en src/lib y dejar docs/tests/examples fuera de alcance?";
1919
- }
1920
- if (input.language === "zh") {
1921
- return "是否应优先修改 src/lib 下的实现代码,并排除 docs/tests/examples?";
1922
- }
1923
- return "Should we prioritize runtime implementation files (src/lib) and exclude docs/tests/examples from scope?";
1924
- }
1925
- if (input.language === "es") {
1926
- return "¿Cuál es el alcance mínimo y el comportamiento que no debe cambiar?";
1927
- }
1928
- if (input.language === "zh") {
1929
- return "这次改动的最小范围是什么?哪些行为必须保持不变?";
1930
- }
1931
- return "What is the minimal scope, and which behavior must remain unchanged?";
1932
- }
1933
3284
  function trimToContextBudget(results) {
1934
3285
  let total = 0;
1935
3286
  const out = [];
@@ -1950,89 +3301,190 @@ function formatEnhancedPrompt(input) {
1950
3301
  };
1951
3302
  const likelyFiles = input.refs.length > 0 ? input.refs.map((r) => `- ${r.path}:${r.start_line}`).join("\n") : emptyRefsByLanguage[input.language];
1952
3303
  if (input.language === "zh") {
3304
+ if (input.style === "lean") {
3305
+ return [
3306
+ "目标",
3307
+ input.original_prompt,
3308
+ "",
3309
+ "约束",
3310
+ "- 保持现有行为与合约兼容。",
3311
+ "- 优先最小且安全的改动。",
3312
+ "",
3313
+ "行动步骤",
3314
+ "- 先确认当前行为与目标范围。",
3315
+ "- 在必要位置完成最小实现并补充回归测试。",
3316
+ "",
3317
+ "验证",
3318
+ "- 运行相关测试并确认无回归。"
3319
+ ].join("\n");
3320
+ }
3321
+ if (input.style === "deep") {
3322
+ return [
3323
+ "目标",
3324
+ input.original_prompt,
3325
+ "",
3326
+ "范围与约束",
3327
+ "- 保持现有行为与 API/合约语义稳定。",
3328
+ "- 仅在必要边界内调整实现,避免扩散改动。",
3329
+ "- 发现风险路径时优先失败安全(deny-by-default)。",
3330
+ "",
3331
+ "代码锚点",
3332
+ likelyFiles,
3333
+ "",
3334
+ "实施步骤",
3335
+ "- 基线确认:先验证当前行为与关键路径。",
3336
+ "- 变更实现:对关键分支做最小、安全、可回退的改动。",
3337
+ "- 回归测试:覆盖正向、跨边界、异常与空输入场景。",
3338
+ "",
3339
+ "边界情况",
3340
+ "- 缺失上下文、无索引或空结果时,保持行为可解释且可回退。",
3341
+ "- 异步/并发路径中避免上下文泄漏与跨租户访问。",
3342
+ "",
3343
+ "验证",
3344
+ "- 运行 typecheck 与目标测试集;确认关键路径稳定无回归。"
3345
+ ].join("\n");
3346
+ }
1953
3347
  return [
1954
3348
  "目标",
1955
3349
  input.original_prompt,
1956
3350
  "",
1957
- "当前状态",
1958
- `- 识别意图: ${input.intent}`,
1959
- "",
1960
3351
  "约束",
1961
3352
  "- 保持 v1 合约兼容和严格校验。",
1962
3353
  "",
1963
- "可能涉及的文件",
3354
+ "代码锚点",
1964
3355
  likelyFiles,
1965
3356
  "",
1966
3357
  "实现清单",
1967
3358
  "- 在改动前确认请求/响应合约。",
1968
3359
  "- 最小化改动并保持 tenant/workspace 隔离。",
1969
3360
  "",
1970
- "边界情况",
1971
- "- Workspace 没有可用索引。",
1972
- "- 搜索过滤后结果为空。",
1973
- "",
1974
3361
  "验证与测试",
1975
3362
  "- 运行 typecheck 和合约/工具测试。",
1976
- "",
1977
- "完成定义",
1978
- "- 测试通过且行为符合 v1 规范。"
1979
3363
  ].join("\n");
1980
3364
  }
1981
3365
  if (input.language === "es") {
3366
+ if (input.style === "lean") {
3367
+ return [
3368
+ "Objetivo",
3369
+ input.original_prompt,
3370
+ "",
3371
+ "Restricciones",
3372
+ "- Mantener compatibilidad de comportamiento y contratos.",
3373
+ "- Priorizar cambios mínimos y seguros.",
3374
+ "",
3375
+ "Pasos",
3376
+ "- Confirmar alcance y comportamiento actual antes de editar.",
3377
+ "- Implementar el cambio mínimo necesario y añadir regresiones.",
3378
+ "",
3379
+ "Validación",
3380
+ "- Ejecutar pruebas relevantes y confirmar que no hay regresiones."
3381
+ ].join("\n");
3382
+ }
3383
+ if (input.style === "deep") {
3384
+ return [
3385
+ "Objetivo",
3386
+ input.original_prompt,
3387
+ "",
3388
+ "Alcance y restricciones",
3389
+ "- Preservar comportamiento existente y contratos/API vigentes.",
3390
+ "- Limitar cambios al alcance mínimo necesario.",
3391
+ "- Aplicar defaults de seguridad (deny-by-default) cuando aplique.",
3392
+ "",
3393
+ "Anclas del código",
3394
+ likelyFiles,
3395
+ "",
3396
+ "Plan de implementación",
3397
+ "- Establecer línea base del comportamiento actual.",
3398
+ "- Aplicar cambios mínimos y reversibles en rutas críticas.",
3399
+ "- Añadir pruebas de regresión para casos positivos, negativos y límites.",
3400
+ "",
3401
+ "Casos límite",
3402
+ "- Contexto faltante o resultados vacíos no deben romper el flujo.",
3403
+ "- Evitar fuga de contexto entre tenants/workspaces.",
3404
+ "",
3405
+ "Validación",
3406
+ "- Ejecutar typecheck y pruebas objetivo; confirmar estabilidad."
3407
+ ].join("\n");
3408
+ }
1982
3409
  return [
1983
3410
  "Objetivo",
1984
3411
  input.original_prompt,
1985
3412
  "",
1986
- "Estado actual",
1987
- `- Intención clasificada: ${input.intent}`,
1988
- "",
1989
3413
  "Restricciones",
1990
3414
  "- Mantener compatibilidad con contratos v1 y validación estricta.",
1991
3415
  "",
1992
- "Archivos probables a editar",
3416
+ "Anclas del código",
1993
3417
  likelyFiles,
1994
3418
  "",
1995
3419
  "Checklist de implementación",
1996
3420
  "- Confirmar entradas/salidas del contrato antes de modificar lógica.",
1997
3421
  "- Aplicar cambios mínimos y mantener aislamiento por tenant/workspace.",
1998
3422
  "",
1999
- "Casos límite",
2000
- "- Workspace sin índice listo.",
2001
- "- Filtros de búsqueda que no devuelven resultados.",
2002
- "",
2003
3423
  "Validación y pruebas",
2004
- "- Ejecutar typecheck y pruebas de contratos/herramientas.",
3424
+ "- Ejecutar typecheck y pruebas de contratos/herramientas."
3425
+ ].join("\n");
3426
+ }
3427
+ if (input.style === "lean") {
3428
+ const anchors = input.refs.length > 0 ? `- Anchors: ${input.refs.slice(0, 2).map((ref) => `${ref.path}:${ref.start_line}`).join(", ")}` : "";
3429
+ return [
3430
+ "Goal",
3431
+ input.original_prompt,
3432
+ "",
3433
+ "Constraints",
3434
+ "- Preserve existing behavior and contract compatibility.",
3435
+ "- Keep changes minimal and safe.",
3436
+ ...(anchors ? ["", anchors] : []),
3437
+ "",
3438
+ "Action steps",
3439
+ "- Confirm current behavior and target scope.",
3440
+ "- Implement the smallest safe change and add regression coverage.",
3441
+ "",
3442
+ "Validation",
3443
+ "- Run relevant tests and confirm no regressions."
3444
+ ].join("\n");
3445
+ }
3446
+ if (input.style === "deep") {
3447
+ return [
3448
+ "Goal",
3449
+ input.original_prompt,
2005
3450
  "",
2006
- "Definición de terminado",
2007
- "- Los tests pasan y el comportamiento coincide con el spec."
3451
+ "Scope and constraints",
3452
+ "- Preserve current behavior and API/contract semantics.",
3453
+ "- Limit changes to the required scope and keep them reversible.",
3454
+ "- Prefer fail-secure defaults where policy boundaries are involved.",
3455
+ "",
3456
+ "Codebase anchors",
3457
+ likelyFiles,
3458
+ "",
3459
+ "Implementation plan",
3460
+ "- Establish baseline behavior and invariants before edits.",
3461
+ "- Apply minimal, safe changes on critical paths only.",
3462
+ "- Add regression coverage for positive, negative, and boundary scenarios.",
3463
+ "",
3464
+ "Edge cases",
3465
+ "- Missing context, empty retrieval results, and async boundary leakage.",
3466
+ "- Cross-tenant/workspace access paths and authorization bypass attempts.",
3467
+ "",
3468
+ "Validation",
3469
+ "- Run typecheck and focused test suites; verify no behavioral regressions."
2008
3470
  ].join("\n");
2009
3471
  }
2010
3472
  return [
2011
3473
  "Goal",
2012
3474
  input.original_prompt,
2013
3475
  "",
2014
- "Current state",
2015
- `- Classified intent: ${input.intent}`,
2016
- "",
2017
3476
  "Constraints",
2018
3477
  "- Keep v1 contract compatibility and strict schema validation.",
2019
3478
  "",
2020
- "Likely files to edit",
3479
+ "Codebase anchors",
2021
3480
  likelyFiles,
2022
3481
  "",
2023
- "Implementation checklist",
3482
+ "Implementation plan",
2024
3483
  "- Confirm request/response contract assumptions before code edits.",
2025
3484
  "- Apply smallest safe changes while preserving tenant/workspace isolation.",
2026
3485
  "",
2027
- "Edge cases",
2028
- "- Workspace has no ready index.",
2029
- "- Search filters produce empty result sets.",
2030
- "",
2031
3486
  "Validation and tests",
2032
- "- Run typecheck and contract/tool tests.",
2033
- "",
2034
- "Definition of done",
2035
- "- Tests pass and behavior matches the v1 spec."
3487
+ "- Run typecheck and contract/tool tests."
2036
3488
  ].join("\n");
2037
3489
  }
2038
3490
  function detectSecretMatches(content) {
@@ -2263,13 +3715,21 @@ export class RetrievalCore {
2263
3715
  store;
2264
3716
  cache;
2265
3717
  cacheTtlSeconds;
3718
+ internalCandidateDepth;
2266
3719
  embeddingProvider;
2267
3720
  embeddingDescriptor;
3721
+ rerankerProvider;
3722
+ rerankerDescriptor;
3723
+ rerankerTopN;
3724
+ rerankerCacheVariant;
2268
3725
  observability;
2269
3726
  scoringConfig;
2270
3727
  scoringProfileId;
2271
3728
  scoringConfigChecksum;
3729
+ enhancerProvider;
3730
+ enhancerProviderDescriptor;
2272
3731
  enhancerConfig;
3732
+ enhancerGenerationConfig;
2273
3733
  chunkingConfig;
2274
3734
  enhancerDecisionTraceEnabled;
2275
3735
  cacheHits = 0;
@@ -2278,14 +3738,31 @@ export class RetrievalCore {
2278
3738
  this.store = store;
2279
3739
  this.cache = cache;
2280
3740
  this.cacheTtlSeconds = options?.cacheTtlSeconds ?? 60;
3741
+ this.internalCandidateDepth = clampInternalCandidateDepth(options?.internalCandidateDepth);
2281
3742
  this.embeddingProvider = options?.embeddingProvider ?? new DeterministicEmbeddingProvider();
2282
3743
  this.embeddingDescriptor = normalizeEmbeddingDescriptor(options?.embeddingDescriptor ?? resolveEmbeddingDescriptor(this.embeddingProvider));
3744
+ this.rerankerProvider = options?.rerankerProvider;
3745
+ this.rerankerTopN = options?.rerankerTopN ?? DEFAULT_SEARCH_RERANKER_TOP_N;
3746
+ if (!Number.isInteger(this.rerankerTopN) || this.rerankerTopN <= 0) {
3747
+ throw new Error("invalid retrieval reranker config: rerankerTopN must be a positive integer");
3748
+ }
3749
+ this.rerankerDescriptor = this.rerankerProvider
3750
+ ? normalizeRerankerDescriptor(resolveRerankerDescriptor(this.rerankerProvider))
3751
+ : undefined;
3752
+ this.rerankerCacheVariant = this.rerankerDescriptor
3753
+ ? `provider:${this.rerankerDescriptor.provider}|model:${this.rerankerDescriptor.model ?? "unknown"}|top_n:${this.rerankerTopN}`
3754
+ : "provider:disabled";
2283
3755
  this.observability = options?.observability ?? getObservability("retrieval-core");
2284
3756
  const baseProfile = resolveRetrievalScoringProfile(options?.scoringProfile);
2285
3757
  this.scoringConfig = mergeRetrievalScoringConfig(baseProfile.config, options?.scoringConfig);
2286
3758
  this.scoringProfileId = options?.scoringProfileId ?? baseProfile.profile_id;
2287
3759
  this.scoringConfigChecksum = scoringConfigChecksum(this.scoringConfig);
3760
+ this.enhancerProvider = options?.enhancerProvider;
3761
+ this.enhancerProviderDescriptor = this.enhancerProvider
3762
+ ? normalizeEnhancerProviderDescriptor(resolveEnhancerProviderDescriptor(this.enhancerProvider))
3763
+ : undefined;
2288
3764
  this.enhancerConfig = mergeRetrievalEnhancerConfig(DEFAULT_RETRIEVAL_ENHANCER_CONFIG, options?.enhancerConfig);
3765
+ this.enhancerGenerationConfig = mergeRetrievalEnhancerGenerationConfig(DEFAULT_RETRIEVAL_ENHANCER_GENERATION_CONFIG, options?.enhancerGenerationConfig);
2289
3766
  this.chunkingConfig = mergeRetrievalChunkingConfig(DEFAULT_RETRIEVAL_CHUNKING_CONFIG, options?.chunkingConfig);
2290
3767
  this.enhancerDecisionTraceEnabled = Boolean(options?.enhancerDecisionTraceEnabled);
2291
3768
  }
@@ -2902,6 +4379,95 @@ export class RetrievalCore {
2902
4379
  status: existing.status
2903
4380
  };
2904
4381
  }
4382
+ async applyLearnedReranker(input) {
4383
+ if (!this.rerankerProvider || !this.rerankerDescriptor) {
4384
+ return input.candidates;
4385
+ }
4386
+ const cappedTopN = Math.min(this.rerankerTopN, input.candidates.length);
4387
+ if (cappedTopN <= 1) {
4388
+ return input.candidates;
4389
+ }
4390
+ const head = input.candidates.slice(0, cappedTopN);
4391
+ const tail = input.candidates.slice(cappedTopN);
4392
+ const labels = {
4393
+ provider: this.rerankerDescriptor.provider,
4394
+ model: this.rerankerDescriptor.model ?? "unknown"
4395
+ };
4396
+ this.observability.metrics.increment("retrieval_reranker_requests_total", 1, labels);
4397
+ const startedAt = Date.now();
4398
+ try {
4399
+ const reranked = await this.rerankerProvider.rerank({
4400
+ query: input.query,
4401
+ documents: head.map((candidate) => buildRerankerDocument(candidate)),
4402
+ top_n: cappedTopN
4403
+ });
4404
+ if (!Array.isArray(reranked) || reranked.length === 0) {
4405
+ throw new RerankerProviderRequestError("invalid_response", "reranker response must contain at least one result");
4406
+ }
4407
+ const seen = new Set();
4408
+ const reordered = [];
4409
+ for (const row of reranked) {
4410
+ if (!Number.isInteger(row.index)) {
4411
+ throw new RerankerProviderRequestError("invalid_response", "reranker result index must be an integer");
4412
+ }
4413
+ if (row.index < 0 || row.index >= head.length) {
4414
+ throw new RerankerProviderRequestError("invalid_response", "reranker result index out of range");
4415
+ }
4416
+ if (seen.has(row.index)) {
4417
+ continue;
4418
+ }
4419
+ const candidate = head[row.index];
4420
+ if (!candidate) {
4421
+ continue;
4422
+ }
4423
+ seen.add(row.index);
4424
+ reordered.push(candidate);
4425
+ }
4426
+ for (let index = 0; index < head.length; index += 1) {
4427
+ if (seen.has(index)) {
4428
+ continue;
4429
+ }
4430
+ const candidate = head[index];
4431
+ if (candidate) {
4432
+ reordered.push(candidate);
4433
+ }
4434
+ }
4435
+ if (reordered.length === 0) {
4436
+ throw new RerankerProviderRequestError("invalid_response", "reranker did not return usable indexes");
4437
+ }
4438
+ const maxTailScore = tail[0]?.score ?? Number.NEGATIVE_INFINITY;
4439
+ const maxHeadScore = head[0]?.score ?? 0;
4440
+ const scoreAnchor = Math.max(maxHeadScore, maxTailScore) + 1;
4441
+ const scoreStep = 1e-6;
4442
+ const adjusted = reordered.map((candidate, index) => ({
4443
+ ...candidate,
4444
+ score: scoreAnchor - index * scoreStep
4445
+ }));
4446
+ return [...adjusted, ...tail];
4447
+ }
4448
+ catch (error) {
4449
+ const reason = classifyRerankerFailureReason(error);
4450
+ this.observability.metrics.increment("retrieval_reranker_failures_total", 1, {
4451
+ ...labels,
4452
+ reason
4453
+ });
4454
+ this.observability.metrics.increment("retrieval_reranker_fallback_total", 1, {
4455
+ reason
4456
+ });
4457
+ this.observability.logger.warn("search_context reranker fallback applied", {
4458
+ trace_id: input.trace_id,
4459
+ provider: labels.provider,
4460
+ model: labels.model,
4461
+ reason,
4462
+ top_n: cappedTopN,
4463
+ error_message: error instanceof Error ? error.message : String(error)
4464
+ });
4465
+ return input.candidates;
4466
+ }
4467
+ finally {
4468
+ this.observability.metrics.observe("retrieval_reranker_latency_ms", Date.now() - startedAt, labels);
4469
+ }
4470
+ }
2905
4471
  async searchContext(input) {
2906
4472
  const searchStartedAt = Date.now();
2907
4473
  const index = await this.store.getLatestReadyIndex({
@@ -2915,9 +4481,9 @@ export class RetrievalCore {
2915
4481
  tenant_id: input.tenant_id,
2916
4482
  index_id: index.index_id
2917
4483
  });
2918
- const topK = Math.min(input.request.top_k ?? 8, MAX_TOP_K);
2919
- const candidatePoolTopK = Math.min(MAX_TOP_K, Math.max(topK * 4, 12));
2920
4484
  const query = normalizeQuery(input.request.query);
4485
+ const topK = Math.min(input.request.top_k ?? 8, MAX_TOP_K);
4486
+ const candidatePoolTopK = Math.max(Math.max(topK * 4, 12), this.internalCandidateDepth);
2921
4487
  if (!indexMetadata) {
2922
4488
  this.observability.metrics.increment("retrieval_embedding_metadata_mismatch_total", 1, {
2923
4489
  reason: "metadata_missing"
@@ -2977,12 +4543,20 @@ export class RetrievalCore {
2977
4543
  throw new RetrievalError("UPSTREAM_FAILURE", `embedding provider returned query embedding dimensions ${queryEmbedding.length}; expected ${this.embeddingDescriptor.dimensions}`);
2978
4544
  }
2979
4545
  const queryTokens = tokenize(query);
4546
+ const searchLiterals = extractSearchLiterals(query);
4547
+ this.observability.metrics.observe("retrieval_candidate_depth_requested", topK, {
4548
+ retrieval_profile_id: this.scoringProfileId
4549
+ });
4550
+ this.observability.metrics.observe("retrieval_candidate_depth_effective", candidatePoolTopK, {
4551
+ retrieval_profile_id: this.scoringProfileId
4552
+ });
2980
4553
  const cacheKey = buildQueryCacheKey({
2981
4554
  workspace_id: input.workspace_id,
2982
4555
  index_version: index.index_version,
2983
4556
  query,
2984
4557
  top_k: topK,
2985
- filters: input.request.filters
4558
+ filters: input.request.filters,
4559
+ retrieval_variant: this.rerankerCacheVariant
2986
4560
  });
2987
4561
  const cached = await this.cache.get(cacheKey);
2988
4562
  if (cached) {
@@ -2999,6 +4573,8 @@ export class RetrievalCore {
2999
4573
  tenant_id: input.tenant_id,
3000
4574
  workspace_id: input.workspace_id
3001
4575
  }, async () => {
4576
+ let literalPathMatchCount = 0;
4577
+ let literalSnippetMatchCount = 0;
3002
4578
  let ranked;
3003
4579
  if (this.store.rankChunksByIndex) {
3004
4580
  ranked = await this.store.rankChunksByIndex({
@@ -3017,11 +4593,21 @@ export class RetrievalCore {
3017
4593
  .map((candidate) => {
3018
4594
  let score = candidate.score;
3019
4595
  score += pathQualityBias(candidate.path, queryTokens, this.scoringConfig, query);
4596
+ const literalBoost = applyLiteralBoost({
4597
+ path: candidate.path,
4598
+ snippet: candidate.snippet,
4599
+ literals: searchLiterals,
4600
+ path_bias: this.scoringConfig.path_bias
4601
+ });
4602
+ score += literalBoost.boost;
4603
+ literalPathMatchCount += literalBoost.path_matches;
4604
+ literalSnippetMatchCount += literalBoost.snippet_matches;
3020
4605
  if (looksLowInformation(candidate.snippet)) {
3021
4606
  score -= this.scoringConfig.rerank.low_information_penalty;
3022
4607
  }
3023
4608
  const reason = chooseReason({
3024
4609
  lexical: candidate.lexical_score,
4610
+ literal_match: literalBoost.matched,
3025
4611
  path_match: candidate.path_match,
3026
4612
  recency_boosted: candidate.recency_boosted
3027
4613
  });
@@ -3059,10 +4645,24 @@ export class RetrievalCore {
3059
4645
  score -= candidateWeights.generated_penalty;
3060
4646
  }
3061
4647
  score += pathQualityBias(chunk.path, queryTokens, this.scoringConfig, query);
4648
+ const literalBoost = applyLiteralBoost({
4649
+ path: chunk.path,
4650
+ snippet: chunk.snippet,
4651
+ literals: searchLiterals,
4652
+ path_bias: this.scoringConfig.path_bias
4653
+ });
4654
+ score += literalBoost.boost;
4655
+ literalPathMatchCount += literalBoost.path_matches;
4656
+ literalSnippetMatchCount += literalBoost.snippet_matches;
3062
4657
  if (looksLowInformation(chunk.snippet)) {
3063
4658
  score -= this.scoringConfig.rerank.low_information_penalty;
3064
4659
  }
3065
- const reason = chooseReason({ lexical: l, path_match: pathMatch, recency_boosted: recencyBoost });
4660
+ const reason = chooseReason({
4661
+ lexical: l,
4662
+ literal_match: literalBoost.matched,
4663
+ path_match: pathMatch,
4664
+ recency_boosted: recencyBoost
4665
+ });
3066
4666
  return {
3067
4667
  path: chunk.path,
3068
4668
  start_line: chunk.start_line,
@@ -3078,8 +4678,28 @@ export class RetrievalCore {
3078
4678
  channel: "hybrid",
3079
4679
  retrieval_profile_id: this.scoringProfileId
3080
4680
  });
4681
+ this.observability.metrics.observe("retrieval_candidates_pre_rerank_count", output.length, {
4682
+ retrieval_profile_id: this.scoringProfileId
4683
+ });
4684
+ if (literalPathMatchCount > 0) {
4685
+ this.observability.metrics.increment("retrieval_literal_boost_applied_total", literalPathMatchCount, {
4686
+ retrieval_profile_id: this.scoringProfileId,
4687
+ channel: "path"
4688
+ });
4689
+ }
4690
+ if (literalSnippetMatchCount > 0) {
4691
+ this.observability.metrics.increment("retrieval_literal_boost_applied_total", literalSnippetMatchCount, {
4692
+ retrieval_profile_id: this.scoringProfileId,
4693
+ channel: "snippet"
4694
+ });
4695
+ }
3081
4696
  return output;
3082
4697
  });
4698
+ const rerankedCandidates = await this.observability.tracing.withSpan("retrieval.learned_rerank", { trace_id: input.trace_id }, async () => this.applyLearnedReranker({
4699
+ trace_id: input.trace_id,
4700
+ query,
4701
+ candidates
4702
+ }));
3083
4703
  const deduped = await this.observability.tracing.withSpan("retrieval.rerank", { trace_id: input.trace_id }, async () => {
3084
4704
  const output = [];
3085
4705
  const seen = new Set();
@@ -3089,7 +4709,7 @@ export class RetrievalCore {
3089
4709
  const maxChunksPerPath = hasFileLookupIntent(queryTokens)
3090
4710
  ? this.scoringConfig.rerank.max_chunks_per_path_file_lookup
3091
4711
  : this.scoringConfig.rerank.max_chunks_per_path_default;
3092
- const available = [...candidates];
4712
+ const available = [...rerankedCandidates];
3093
4713
  while (output.length < topK && available.length > 0) {
3094
4714
  let bestIndex = -1;
3095
4715
  let bestAdjustedScore = Number.NEGATIVE_INFINITY;
@@ -3147,6 +4767,39 @@ export class RetrievalCore {
3147
4767
  }
3148
4768
  return output;
3149
4769
  });
4770
+ const candidateRankByKey = new Map();
4771
+ for (let index = 0; index < rerankedCandidates.length; index += 1) {
4772
+ const candidate = rerankedCandidates[index];
4773
+ if (!candidate) {
4774
+ continue;
4775
+ }
4776
+ const key = `${candidate.path}:${candidate.start_line}:${candidate.end_line}`;
4777
+ if (!candidateRankByKey.has(key)) {
4778
+ candidateRankByKey.set(key, index + 1);
4779
+ }
4780
+ }
4781
+ let literalMatchesInTopK = 0;
4782
+ for (let postRank = 0; postRank < deduped.length; postRank += 1) {
4783
+ const row = deduped[postRank];
4784
+ if (!row) {
4785
+ continue;
4786
+ }
4787
+ if (isExactLiteralReason(row.reason)) {
4788
+ literalMatchesInTopK += 1;
4789
+ }
4790
+ this.observability.metrics.increment("retrieval_reason_topk_total", 1, {
4791
+ retrieval_profile_id: this.scoringProfileId,
4792
+ reason: row.reason
4793
+ });
4794
+ const key = `${row.path}:${row.start_line}:${row.end_line}`;
4795
+ const preRank = candidateRankByKey.get(key) ?? postRank + 1;
4796
+ this.observability.metrics.observe("retrieval_rank_shift_delta", preRank - (postRank + 1), {
4797
+ retrieval_profile_id: this.scoringProfileId
4798
+ });
4799
+ }
4800
+ this.observability.metrics.observe("retrieval_literal_matches_topk", literalMatchesInTopK, {
4801
+ retrieval_profile_id: this.scoringProfileId
4802
+ });
3150
4803
  const output = {
3151
4804
  trace_id: input.trace_id,
3152
4805
  results: deduped,
@@ -3171,18 +4824,116 @@ export class RetrievalCore {
3171
4824
  await this.cache.set(cacheKey, output, this.cacheTtlSeconds);
3172
4825
  return output;
3173
4826
  }
4827
+ enhancerProviderLabels() {
4828
+ return {
4829
+ provider: this.enhancerProviderDescriptor?.provider ?? "template",
4830
+ model: this.enhancerProviderDescriptor?.model ?? "n/a",
4831
+ tool_mode: this.enhancerGenerationConfig.tool_mode
4832
+ };
4833
+ }
4834
+ buildEnhancerContextSnippets(results) {
4835
+ const maxSnippets = this.enhancerGenerationConfig.max_context_snippets;
4836
+ const snippets = [];
4837
+ for (const result of results.slice(0, maxSnippets)) {
4838
+ snippets.push({
4839
+ path: result.path,
4840
+ start_line: result.start_line,
4841
+ end_line: result.end_line,
4842
+ reason: result.reason,
4843
+ snippet: result.snippet.slice(0, 1_600),
4844
+ score: result.score
4845
+ });
4846
+ }
4847
+ return snippets;
4848
+ }
4849
+ async generateEnhancedPrompt(input) {
4850
+ if (!this.enhancerProvider) {
4851
+ return formatEnhancedPrompt({
4852
+ style: input.style_resolved,
4853
+ language: input.language,
4854
+ original_prompt: input.request.prompt,
4855
+ refs: input.context_refs
4856
+ });
4857
+ }
4858
+ const maxAttempts = this.enhancerGenerationConfig.max_retries + 1;
4859
+ let lastFailure;
4860
+ for (let attempt = 1; attempt <= maxAttempts; attempt += 1) {
4861
+ const startedAt = Date.now();
4862
+ this.observability.metrics.increment("enhancer_provider_requests_total", 1, this.enhancerProviderLabels());
4863
+ try {
4864
+ const generated = await runWithInactivityTimeout({
4865
+ timeout_ms: this.enhancerGenerationConfig.timeout_ms,
4866
+ fn: ({ touch, signal }) => this.enhancerProvider.generate({
4867
+ trace_id: input.trace_id,
4868
+ tenant_id: input.tenant_id,
4869
+ workspace_id: input.workspace_id,
4870
+ request: input.request,
4871
+ style_requested: input.style_requested,
4872
+ style_resolved: input.style_resolved,
4873
+ intent: input.intent,
4874
+ query_intent: input.query_intent,
4875
+ language: input.language,
4876
+ context_refs: input.context_refs,
4877
+ context_snippets: input.context_snippets,
4878
+ warnings: input.warnings,
4879
+ questions: input.questions,
4880
+ tool_mode: this.enhancerGenerationConfig.tool_mode,
4881
+ abort_signal: signal,
4882
+ on_progress: touch
4883
+ })
4884
+ });
4885
+ this.observability.metrics.observe("enhancer_provider_latency_ms", Date.now() - startedAt, this.enhancerProviderLabels());
4886
+ const enhancedPrompt = normalizeProviderEnhancedPrompt(generated.enhanced_prompt);
4887
+ if (enhancedPrompt.length === 0) {
4888
+ throw new EnhancerProviderRequestError("invalid_response", "enhancer provider returned an empty enhanced_prompt");
4889
+ }
4890
+ return enhancedPrompt;
4891
+ }
4892
+ catch (error) {
4893
+ const reason = classifyEnhancerGenerationFailureReason(error);
4894
+ const failure = error instanceof EnhancerProviderRequestError ? error : new EnhancerProviderRequestError(reason, String(error));
4895
+ lastFailure = failure;
4896
+ this.observability.metrics.increment("enhancer_provider_failures_total", 1, {
4897
+ ...this.enhancerProviderLabels(),
4898
+ reason
4899
+ });
4900
+ this.observability.logger.warn("enhancer provider generation failed", {
4901
+ trace_id: input.trace_id,
4902
+ attempt,
4903
+ max_attempts: maxAttempts,
4904
+ reason,
4905
+ retrying: attempt < maxAttempts &&
4906
+ reason !== "timeout" &&
4907
+ reason !== "schema_error" &&
4908
+ reason !== "invalid_response",
4909
+ style_requested: input.style_requested,
4910
+ style_resolved: input.style_resolved,
4911
+ provider: this.enhancerProviderDescriptor?.provider ?? "custom",
4912
+ model: this.enhancerProviderDescriptor?.model ?? "unknown",
4913
+ error: failure.message
4914
+ });
4915
+ if (reason === "timeout" || reason === "schema_error" || reason === "invalid_response") {
4916
+ break;
4917
+ }
4918
+ }
4919
+ }
4920
+ const message = lastFailure?.message ?? "enhancer provider failed";
4921
+ throw new RetrievalError("UPSTREAM_FAILURE", `enhancer provider failed after retries: ${message}`);
4922
+ }
3174
4923
  async enhancePrompt(input) {
3175
4924
  const startedAt = Date.now();
3176
4925
  const warnings = [];
3177
- const questions = [];
3178
- const addQuestion = (value) => {
3179
- if (!questions.includes(value)) {
3180
- questions.push(value);
3181
- }
3182
- };
3183
4926
  const intent = classifyIntent(input.request.prompt);
3184
4927
  const queryIntent = classifyEnhancerQueryIntent(input.request.prompt, input.request.conversation_history);
3185
4928
  const language = detectDominantLanguage(input.request.prompt, input.request.conversation_history);
4929
+ const style = resolveEnhancerPromptStyle({
4930
+ requested: input.request.style,
4931
+ intent,
4932
+ query_intent: queryIntent,
4933
+ prompt: input.request.prompt,
4934
+ history: input.request.conversation_history,
4935
+ has_context: Boolean(input.request.project_root_path && input.workspace_id)
4936
+ });
3186
4937
  const negativePreferences = detectNegativePathPreferences(`${input.request.prompt}\n${input.request.conversation_history.map((entry) => entry.content).join("\n")}`);
3187
4938
  const intentPolicy = resolveEnhancerIntentPolicy({
3188
4939
  query_intent: queryIntent,
@@ -3292,16 +5043,6 @@ export class RetrievalCore {
3292
5043
  strict_impl_only_filtering: intentPolicy.strict_impl_only_filtering
3293
5044
  }));
3294
5045
  searchResults = collapseEnhancerCandidatesByDirectory(searchResults, intentPolicy.max_candidates_per_directory_pre_rerank).slice(0, intentPolicy.max_candidates_pre_rerank);
3295
- const symbolCandidates = extractLikelyCodeSymbols(`${input.request.prompt}\n${input.request.conversation_history.map((entry) => entry.content).join("\n")}`, 3);
3296
- if (confidenceSignals.failed_signals.includes("score_spread")) {
3297
- addQuestion(localizeLowConfidenceQuestion({ language, kind: "scope" }));
3298
- }
3299
- if (confidenceSignals.failed_signals.includes("token_overlap")) {
3300
- addQuestion(localizeLowConfidenceQuestion({ language, kind: "symbol", symbol: symbolCandidates[0] }));
3301
- }
3302
- if (confidenceSignals.failed_signals.includes("path_diversity")) {
3303
- addQuestion(localizeLowConfidenceQuestion({ language, kind: "source_priority" }));
3304
- }
3305
5046
  }
3306
5047
  else {
3307
5048
  searchResults = dedupeEnhancerCandidatesByPath(searchResults);
@@ -3310,6 +5051,9 @@ export class RetrievalCore {
3310
5051
  candidateCountPostRerank = searchResults.length;
3311
5052
  }
3312
5053
  catch (error) {
5054
+ if (error instanceof RetrievalError && error.code === "RATE_LIMITED") {
5055
+ throw error;
5056
+ }
3313
5057
  warnings.push("Context retrieval unavailable; enhancement generated with limited confidence.");
3314
5058
  fallbackTriggered = true;
3315
5059
  fallbackReason = "context_retrieval_unavailable";
@@ -3319,31 +5063,34 @@ export class RetrievalCore {
3319
5063
  }
3320
5064
  }
3321
5065
  }
3322
- if (intent === "unknown") {
3323
- addQuestion(language === "es"
3324
- ? "¿Cuál es el resultado esperado exacto y el alcance del cambio?"
3325
- : language === "zh"
3326
- ? "这次变更的精确目标和范围是什么?"
3327
- : "What exact outcome and scope should this change target?");
3328
- }
3329
5066
  const contextRefs = searchResults.map((result) => ({
3330
5067
  path: result.path,
3331
5068
  start_line: result.start_line,
3332
5069
  end_line: result.end_line,
3333
5070
  reason: result.reason
3334
5071
  }));
3335
- const enhancedPrompt = formatEnhancedPrompt({
5072
+ const contextSnippets = this.buildEnhancerContextSnippets(searchResults);
5073
+ const enhancedPrompt = await this.generateEnhancedPrompt({
5074
+ trace_id: input.trace_id,
5075
+ tenant_id: input.tenant_id,
5076
+ workspace_id: input.workspace_id,
5077
+ request: input.request,
5078
+ style_requested: style.requested,
5079
+ style_resolved: style.resolved,
3336
5080
  intent,
5081
+ query_intent: queryIntent,
3337
5082
  language,
3338
- original_prompt: input.request.prompt,
3339
- refs: contextRefs
5083
+ context_refs: contextRefs,
5084
+ context_snippets: contextSnippets,
5085
+ warnings: [],
5086
+ questions: []
3340
5087
  });
3341
5088
  const output = {
3342
5089
  trace_id: input.trace_id,
3343
5090
  enhanced_prompt: enhancedPrompt,
3344
5091
  context_refs: contextRefs,
3345
- warnings,
3346
- questions
5092
+ warnings: [],
5093
+ questions: []
3347
5094
  };
3348
5095
  const latency_ms = Date.now() - startedAt;
3349
5096
  this.observability.metrics.observe("enhancer_latency_ms", latency_ms, {});
@@ -3392,6 +5139,11 @@ export class RetrievalCore {
3392
5139
  fallback_triggered: fallbackTriggered,
3393
5140
  fallback_reason: fallbackReason,
3394
5141
  query_intent: queryIntent,
5142
+ style_requested: style.requested,
5143
+ style_resolved: style.resolved,
5144
+ enhancer_provider: this.enhancerProviderDescriptor?.provider ?? "template",
5145
+ enhancer_model: this.enhancerProviderDescriptor?.model ?? null,
5146
+ enhancer_tool_mode: this.enhancerGenerationConfig.tool_mode,
3395
5147
  confidence_score_spread: confidenceSignals?.score_spread ?? null,
3396
5148
  confidence_token_overlap: confidenceSignals?.token_overlap ?? null,
3397
5149
  confidence_path_diversity: confidenceSignals?.path_diversity ?? null,
@@ -3415,3 +5167,4 @@ export async function seedWorkspaceIndex(core, artifact) {
3415
5167
  return core.indexArtifact(artifact);
3416
5168
  }
3417
5169
  export * from "./remote-sync.js";
5170
+ export * from "./indexing-ignore.js";