@rce-mcp/retrieval-core 0.1.1 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENTS.md +1 -0
- package/dist/.tsbuildinfo +1 -1
- package/dist/chunking.js +5 -4
- package/dist/index.d.ts +219 -4
- package/dist/index.js +1928 -176
- package/dist/remote-sync.js +2 -1
- package/package.json +8 -6
- package/src/chunking.ts +5 -4
- package/src/index.ts +2398 -208
- package/src/remote-sync.ts +3 -1
- package/test/benchmark.thresholds.test.ts +55 -0
- package/test/chunking.config.test.ts +28 -0
- package/test/chunking.language-aware.test.ts +23 -4
- package/test/chunking.parser-availability.poc.test.ts +3 -3
- package/test/claude-agent-provider.test.ts +209 -0
- package/test/embedding-provider.test.ts +450 -1
- package/test/enhance-confidence.test.ts +275 -3
- package/test/integration.test.ts +185 -1
- package/test/mcp-search-quality.regression.test.ts +322 -0
- package/test/remote-sync.integration.test.ts +11 -0
package/src/index.ts
CHANGED
|
@@ -1,11 +1,18 @@
|
|
|
1
1
|
import { createHash, randomUUID } from "node:crypto";
|
|
2
|
-
import type {
|
|
2
|
+
import type {
|
|
3
|
+
EnhancePromptInput,
|
|
4
|
+
EnhancePromptOutput,
|
|
5
|
+
EnhancePromptStyle,
|
|
6
|
+
SearchContextInput,
|
|
7
|
+
SearchContextOutput
|
|
8
|
+
} from "@rce-mcp/contracts";
|
|
3
9
|
import {
|
|
4
10
|
buildQueryCacheKey,
|
|
5
11
|
type CandidateScoreWeights,
|
|
6
12
|
type IndexRepository,
|
|
7
13
|
type QueryCache,
|
|
8
14
|
type RankedChunkCandidate,
|
|
15
|
+
tokenizeForRanking,
|
|
9
16
|
type WorkspaceRecord
|
|
10
17
|
} from "@rce-mcp/data-plane";
|
|
11
18
|
import { InMemoryQueryCache } from "@rce-mcp/data-plane";
|
|
@@ -19,12 +26,16 @@ import {
|
|
|
19
26
|
|
|
20
27
|
type RetrievalMode = SearchContextOutput["search_metadata"]["retrieval_mode"];
|
|
21
28
|
type ContextRef = EnhancePromptOutput["context_refs"][number];
|
|
29
|
+
type SearchResultRow = SearchContextOutput["results"][number];
|
|
22
30
|
|
|
23
31
|
const MAX_FILE_SIZE_BYTES = 1_000_000;
|
|
24
32
|
const MAX_CHUNKS_PER_FILE = 300;
|
|
25
|
-
const
|
|
26
|
-
const
|
|
33
|
+
const DEFAULT_TARGET_CHUNK_TOKENS = 420;
|
|
34
|
+
const DEFAULT_CHUNK_OVERLAP_TOKENS = 90;
|
|
27
35
|
const MAX_TOP_K = 20;
|
|
36
|
+
const DEFAULT_INTERNAL_CANDIDATE_DEPTH = 100;
|
|
37
|
+
const MIN_INTERNAL_CANDIDATE_DEPTH = 20;
|
|
38
|
+
const MAX_INTERNAL_CANDIDATE_DEPTH = 200;
|
|
28
39
|
const MAX_CONTEXT_BUDGET_TOKENS = 12_000;
|
|
29
40
|
export const DEFAULT_OPENAI_COMPATIBLE_EMBEDDING_BASE_URL = "https://router.tumuer.me/v1";
|
|
30
41
|
export const DEFAULT_OPENAI_COMPATIBLE_EMBEDDING_MODEL = "Qwen/Qwen3-Embedding-4B";
|
|
@@ -32,6 +43,17 @@ export const DEFAULT_OPENAI_COMPATIBLE_EMBEDDING_DIMENSIONS = 2560;
|
|
|
32
43
|
export const DEFAULT_OPENAI_COMPATIBLE_EMBEDDING_TIMEOUT_MS = 10_000;
|
|
33
44
|
export const DEFAULT_OPENAI_COMPATIBLE_EMBEDDING_BATCH_SIZE = 64;
|
|
34
45
|
export const DEFAULT_OPENAI_COMPATIBLE_EMBEDDING_MAX_RETRIES = 2;
|
|
46
|
+
export const DEFAULT_OPENAI_COMPATIBLE_EMBEDDING_TRANSIENT_403_MAX_RETRIES = 4;
|
|
47
|
+
export const DEFAULT_OPENAI_COMPATIBLE_RERANKER_BASE_URL = "https://router.tumuer.me/v1";
|
|
48
|
+
export const DEFAULT_OPENAI_COMPATIBLE_RERANKER_MODEL = "Qwen/Qwen3-Reranker-4B";
|
|
49
|
+
export const DEFAULT_OPENAI_COMPATIBLE_RERANKER_TIMEOUT_MS = 2_500;
|
|
50
|
+
export const DEFAULT_SEARCH_RERANKER_TOP_N = 30;
|
|
51
|
+
export const DEFAULT_PROVIDER_MAX_REQUESTS_PER_MINUTE = 90;
|
|
52
|
+
export const DEFAULT_PROVIDER_LIMIT_INDEX_MAX_WAIT_MS = 120_000;
|
|
53
|
+
export const DEFAULT_PROVIDER_LIMIT_QUERY_MAX_WAIT_MS = 1_000;
|
|
54
|
+
export const DEFAULT_PROVIDER_LIMIT_RERANK_MAX_WAIT_MS = 500;
|
|
55
|
+
export const DEFAULT_CLAUDE_ENHANCER_MODEL = "claude-3-5-sonnet-latest";
|
|
56
|
+
const DEFAULT_CLAUDE_ENHANCER_MAX_TURNS = 3;
|
|
35
57
|
|
|
36
58
|
const DEFAULT_CANDIDATE_SCORE_WEIGHTS: CandidateScoreWeights = {
|
|
37
59
|
lexical_weight: 0.6,
|
|
@@ -69,6 +91,10 @@ export interface RetrievalPathBiasConfig {
|
|
|
69
91
|
negation_avoid_tests_penalty: number;
|
|
70
92
|
negation_avoid_examples_penalty: number;
|
|
71
93
|
negation_avoid_archive_penalty: number;
|
|
94
|
+
security_trace_meta_penalty: number;
|
|
95
|
+
literal_path_boost: number;
|
|
96
|
+
literal_snippet_boost: number;
|
|
97
|
+
literal_max_boost: number;
|
|
72
98
|
min_total_bias: number;
|
|
73
99
|
max_total_bias: number;
|
|
74
100
|
}
|
|
@@ -93,9 +119,20 @@ export interface RetrievalEnhancerConfig {
|
|
|
93
119
|
rerank_timeout_ms: number;
|
|
94
120
|
}
|
|
95
121
|
|
|
122
|
+
export type EnhancerToolMode = "none" | "read_only";
|
|
123
|
+
|
|
124
|
+
export interface RetrievalEnhancerGenerationConfig {
|
|
125
|
+
timeout_ms: number;
|
|
126
|
+
max_retries: number;
|
|
127
|
+
tool_mode: EnhancerToolMode;
|
|
128
|
+
max_context_snippets: number;
|
|
129
|
+
}
|
|
130
|
+
|
|
96
131
|
export interface RetrievalChunkingConfig {
|
|
97
132
|
strategy: ChunkingStrategy;
|
|
98
133
|
fallback_strategy: "sliding";
|
|
134
|
+
target_chunk_tokens: number;
|
|
135
|
+
chunk_overlap_tokens: number;
|
|
99
136
|
parse_timeout_ms: number;
|
|
100
137
|
enabled_languages: string[];
|
|
101
138
|
}
|
|
@@ -107,9 +144,12 @@ export type RetrievalScoringConfigInput = Partial<{
|
|
|
107
144
|
}>;
|
|
108
145
|
|
|
109
146
|
export type RetrievalEnhancerConfigInput = Partial<RetrievalEnhancerConfig>;
|
|
147
|
+
export type RetrievalEnhancerGenerationConfigInput = Partial<RetrievalEnhancerGenerationConfig>;
|
|
110
148
|
export type RetrievalChunkingConfigInput = Partial<{
|
|
111
149
|
strategy: ChunkingStrategy;
|
|
112
150
|
fallback_strategy: "sliding";
|
|
151
|
+
target_chunk_tokens: number;
|
|
152
|
+
chunk_overlap_tokens: number;
|
|
113
153
|
parse_timeout_ms: number;
|
|
114
154
|
enabled_languages: string[];
|
|
115
155
|
}>;
|
|
@@ -144,6 +184,10 @@ export const BASELINE_RETRIEVAL_SCORING_CONFIG: RetrievalScoringConfig = {
|
|
|
144
184
|
negation_avoid_tests_penalty: 0.35,
|
|
145
185
|
negation_avoid_examples_penalty: 0.3,
|
|
146
186
|
negation_avoid_archive_penalty: 0.35,
|
|
187
|
+
security_trace_meta_penalty: 0.22,
|
|
188
|
+
literal_path_boost: 0.3,
|
|
189
|
+
literal_snippet_boost: 0.18,
|
|
190
|
+
literal_max_boost: 0.5,
|
|
147
191
|
min_total_bias: -0.45,
|
|
148
192
|
max_total_bias: 0.35
|
|
149
193
|
},
|
|
@@ -192,6 +236,10 @@ export const CONSERVATIVE_RETRIEVAL_SCORING_CONFIG: RetrievalScoringConfig = {
|
|
|
192
236
|
negation_avoid_tests_penalty: 0.2,
|
|
193
237
|
negation_avoid_examples_penalty: 0.16,
|
|
194
238
|
negation_avoid_archive_penalty: 0.2,
|
|
239
|
+
security_trace_meta_penalty: 0.14,
|
|
240
|
+
literal_path_boost: 0.18,
|
|
241
|
+
literal_snippet_boost: 0.1,
|
|
242
|
+
literal_max_boost: 0.28,
|
|
195
243
|
min_total_bias: -0.25,
|
|
196
244
|
max_total_bias: 0.2
|
|
197
245
|
},
|
|
@@ -210,9 +258,18 @@ export const DEFAULT_RETRIEVAL_ENHANCER_CONFIG: RetrievalEnhancerConfig = {
|
|
|
210
258
|
rerank_timeout_ms: 40
|
|
211
259
|
};
|
|
212
260
|
|
|
261
|
+
export const DEFAULT_RETRIEVAL_ENHANCER_GENERATION_CONFIG: RetrievalEnhancerGenerationConfig = {
|
|
262
|
+
timeout_ms: 18_000,
|
|
263
|
+
max_retries: 1,
|
|
264
|
+
tool_mode: "read_only",
|
|
265
|
+
max_context_snippets: 6
|
|
266
|
+
};
|
|
267
|
+
|
|
213
268
|
export const DEFAULT_RETRIEVAL_CHUNKING_CONFIG: RetrievalChunkingConfig = {
|
|
214
269
|
strategy: "sliding",
|
|
215
270
|
fallback_strategy: "sliding",
|
|
271
|
+
target_chunk_tokens: DEFAULT_TARGET_CHUNK_TOKENS,
|
|
272
|
+
chunk_overlap_tokens: DEFAULT_CHUNK_OVERLAP_TOKENS,
|
|
216
273
|
parse_timeout_ms: 80,
|
|
217
274
|
enabled_languages: ["typescript", "javascript", "python", "go"]
|
|
218
275
|
};
|
|
@@ -351,6 +408,33 @@ export function mergeRetrievalEnhancerConfig(
|
|
|
351
408
|
return next;
|
|
352
409
|
}
|
|
353
410
|
|
|
411
|
+
function validateEnhancerGenerationConfig(config: RetrievalEnhancerGenerationConfig): void {
|
|
412
|
+
if (!Number.isInteger(config.timeout_ms) || config.timeout_ms <= 0) {
|
|
413
|
+
throw new Error("invalid retrieval enhancer generation config: timeout_ms must be a positive integer");
|
|
414
|
+
}
|
|
415
|
+
if (!Number.isInteger(config.max_retries) || config.max_retries < 0) {
|
|
416
|
+
throw new Error("invalid retrieval enhancer generation config: max_retries must be a non-negative integer");
|
|
417
|
+
}
|
|
418
|
+
if (config.tool_mode !== "none" && config.tool_mode !== "read_only") {
|
|
419
|
+
throw new Error("invalid retrieval enhancer generation config: tool_mode must be none|read_only");
|
|
420
|
+
}
|
|
421
|
+
if (!Number.isInteger(config.max_context_snippets) || config.max_context_snippets <= 0) {
|
|
422
|
+
throw new Error("invalid retrieval enhancer generation config: max_context_snippets must be a positive integer");
|
|
423
|
+
}
|
|
424
|
+
}
|
|
425
|
+
|
|
426
|
+
export function mergeRetrievalEnhancerGenerationConfig(
|
|
427
|
+
base: RetrievalEnhancerGenerationConfig,
|
|
428
|
+
overrides?: RetrievalEnhancerGenerationConfigInput
|
|
429
|
+
): RetrievalEnhancerGenerationConfig {
|
|
430
|
+
const next: RetrievalEnhancerGenerationConfig = {
|
|
431
|
+
...base,
|
|
432
|
+
...(overrides ?? {})
|
|
433
|
+
};
|
|
434
|
+
validateEnhancerGenerationConfig(next);
|
|
435
|
+
return next;
|
|
436
|
+
}
|
|
437
|
+
|
|
354
438
|
function normalizeChunkingLanguageList(value: string[]): string[] {
|
|
355
439
|
const deduped = new Set<string>();
|
|
356
440
|
for (const language of value) {
|
|
@@ -370,6 +454,15 @@ function validateChunkingConfig(config: RetrievalChunkingConfig): void {
|
|
|
370
454
|
if (config.fallback_strategy !== "sliding") {
|
|
371
455
|
throw new Error("invalid retrieval chunking config: fallback_strategy must be sliding");
|
|
372
456
|
}
|
|
457
|
+
if (!Number.isInteger(config.target_chunk_tokens) || config.target_chunk_tokens <= 0) {
|
|
458
|
+
throw new Error("invalid retrieval chunking config: target_chunk_tokens must be a positive integer");
|
|
459
|
+
}
|
|
460
|
+
if (!Number.isInteger(config.chunk_overlap_tokens) || config.chunk_overlap_tokens <= 0) {
|
|
461
|
+
throw new Error("invalid retrieval chunking config: chunk_overlap_tokens must be a positive integer");
|
|
462
|
+
}
|
|
463
|
+
if (config.chunk_overlap_tokens >= config.target_chunk_tokens) {
|
|
464
|
+
throw new Error("invalid retrieval chunking config: chunk_overlap_tokens must be less than target_chunk_tokens");
|
|
465
|
+
}
|
|
373
466
|
if (!Number.isInteger(config.parse_timeout_ms) || config.parse_timeout_ms <= 0) {
|
|
374
467
|
throw new Error("invalid retrieval chunking config: parse_timeout_ms must be a positive integer");
|
|
375
468
|
}
|
|
@@ -411,19 +504,24 @@ function scoringConfigChecksum(config: RetrievalScoringConfig): string {
|
|
|
411
504
|
return sha256(stableSerialize(config)).slice(0, 12);
|
|
412
505
|
}
|
|
413
506
|
|
|
507
|
+
function clampInternalCandidateDepth(value: number | undefined): number {
|
|
508
|
+
const raw = Number.isFinite(value) ? Math.trunc(value ?? DEFAULT_INTERNAL_CANDIDATE_DEPTH) : DEFAULT_INTERNAL_CANDIDATE_DEPTH;
|
|
509
|
+
return Math.max(MIN_INTERNAL_CANDIDATE_DEPTH, Math.min(MAX_INTERNAL_CANDIDATE_DEPTH, raw));
|
|
510
|
+
}
|
|
511
|
+
|
|
414
512
|
const REASON_STRINGS = [
|
|
415
513
|
"semantic match",
|
|
416
|
-
"exact
|
|
417
|
-
"path
|
|
514
|
+
"exact literal match",
|
|
515
|
+
"path token overlap",
|
|
418
516
|
"recently modified relevant module"
|
|
419
517
|
] as const;
|
|
420
518
|
|
|
421
519
|
export type RetrievalReason = (typeof REASON_STRINGS)[number];
|
|
422
520
|
|
|
423
521
|
export class RetrievalError extends Error {
|
|
424
|
-
readonly code: "INVALID_ARGUMENT" | "NOT_FOUND" | "UPSTREAM_FAILURE";
|
|
522
|
+
readonly code: "INVALID_ARGUMENT" | "NOT_FOUND" | "RATE_LIMITED" | "UPSTREAM_FAILURE";
|
|
425
523
|
|
|
426
|
-
constructor(code: "INVALID_ARGUMENT" | "NOT_FOUND" | "UPSTREAM_FAILURE", message: string) {
|
|
524
|
+
constructor(code: "INVALID_ARGUMENT" | "NOT_FOUND" | "RATE_LIMITED" | "UPSTREAM_FAILURE", message: string) {
|
|
427
525
|
super(message);
|
|
428
526
|
this.code = code;
|
|
429
527
|
}
|
|
@@ -510,13 +608,18 @@ export interface IndexingReport {
|
|
|
510
608
|
|
|
511
609
|
export interface RetrievalCoreOptions {
|
|
512
610
|
cacheTtlSeconds?: number;
|
|
611
|
+
internalCandidateDepth?: number;
|
|
513
612
|
embeddingProvider?: EmbeddingProvider;
|
|
514
613
|
embeddingDescriptor?: EmbeddingDescriptor;
|
|
614
|
+
rerankerProvider?: RerankerProvider;
|
|
615
|
+
rerankerTopN?: number;
|
|
616
|
+
enhancerProvider?: EnhancerGenerationProvider;
|
|
515
617
|
observability?: Observability;
|
|
516
618
|
scoringProfile?: BuiltinRetrievalScoringProfileId;
|
|
517
619
|
scoringProfileId?: string;
|
|
518
620
|
scoringConfig?: RetrievalScoringConfigInput;
|
|
519
621
|
enhancerConfig?: RetrievalEnhancerConfigInput;
|
|
622
|
+
enhancerGenerationConfig?: RetrievalEnhancerGenerationConfigInput;
|
|
520
623
|
chunkingConfig?: RetrievalChunkingConfigInput;
|
|
521
624
|
enhancerDecisionTraceEnabled?: boolean;
|
|
522
625
|
}
|
|
@@ -535,6 +638,67 @@ export interface EmbeddingProvider {
|
|
|
535
638
|
describe?(): EmbeddingDescriptor;
|
|
536
639
|
}
|
|
537
640
|
|
|
641
|
+
export interface RerankerDescriptor {
|
|
642
|
+
provider: string;
|
|
643
|
+
model?: string;
|
|
644
|
+
}
|
|
645
|
+
|
|
646
|
+
export interface RerankerResult {
|
|
647
|
+
index: number;
|
|
648
|
+
relevance_score: number;
|
|
649
|
+
}
|
|
650
|
+
|
|
651
|
+
export interface RerankerProvider {
|
|
652
|
+
rerank(input: { query: string; documents: string[]; top_n: number }): Promise<RerankerResult[]>;
|
|
653
|
+
describe?(): RerankerDescriptor;
|
|
654
|
+
}
|
|
655
|
+
|
|
656
|
+
export type EnhancerIntent = "bugfix" | "feature" | "refactor" | "docs" | "tests" | "unknown";
|
|
657
|
+
export type EnhancerOutputLanguage = "en" | "es" | "zh";
|
|
658
|
+
type ResolvedEnhancerPromptStyle = Exclude<EnhancePromptStyle, "auto">;
|
|
659
|
+
|
|
660
|
+
export interface EnhancerContextSnippet {
|
|
661
|
+
path: string;
|
|
662
|
+
start_line: number;
|
|
663
|
+
end_line: number;
|
|
664
|
+
reason: string;
|
|
665
|
+
snippet: string;
|
|
666
|
+
score: number;
|
|
667
|
+
}
|
|
668
|
+
|
|
669
|
+
export interface EnhancerGenerationRequest {
|
|
670
|
+
trace_id: string;
|
|
671
|
+
tenant_id: string;
|
|
672
|
+
workspace_id?: string;
|
|
673
|
+
request: EnhancePromptInput;
|
|
674
|
+
style_requested: EnhancePromptStyle;
|
|
675
|
+
style_resolved: ResolvedEnhancerPromptStyle;
|
|
676
|
+
intent: EnhancerIntent;
|
|
677
|
+
query_intent: "symbol-heavy" | "impl-focused" | "conceptual";
|
|
678
|
+
language: EnhancerOutputLanguage;
|
|
679
|
+
context_refs: ContextRef[];
|
|
680
|
+
context_snippets: EnhancerContextSnippet[];
|
|
681
|
+
warnings: string[];
|
|
682
|
+
questions: string[];
|
|
683
|
+
tool_mode: EnhancerToolMode;
|
|
684
|
+
abort_signal?: AbortSignal;
|
|
685
|
+
on_progress?: () => void;
|
|
686
|
+
}
|
|
687
|
+
|
|
688
|
+
export interface EnhancerGenerationResult {
|
|
689
|
+
enhanced_prompt: string;
|
|
690
|
+
}
|
|
691
|
+
|
|
692
|
+
export interface EnhancerProviderDescriptor {
|
|
693
|
+
provider: string;
|
|
694
|
+
model?: string;
|
|
695
|
+
}
|
|
696
|
+
|
|
697
|
+
export interface EnhancerGenerationProvider {
|
|
698
|
+
generate(input: EnhancerGenerationRequest): Promise<EnhancerGenerationResult>;
|
|
699
|
+
describe?(): EnhancerProviderDescriptor;
|
|
700
|
+
}
|
|
701
|
+
|
|
538
702
|
export interface DeterministicEmbeddingProviderOptions {
|
|
539
703
|
dimensions?: number;
|
|
540
704
|
model?: string;
|
|
@@ -549,13 +713,88 @@ export interface OpenAICompatibleEmbeddingProviderOptions {
|
|
|
549
713
|
timeout_ms?: number;
|
|
550
714
|
batch_size?: number;
|
|
551
715
|
max_retries?: number;
|
|
716
|
+
transient_forbidden_max_retries?: number;
|
|
717
|
+
request_limiter?: ProviderRequestLimiter;
|
|
718
|
+
request_limit_scope_id?: string;
|
|
719
|
+
max_requests_per_minute?: number;
|
|
720
|
+
index_max_wait_ms?: number;
|
|
721
|
+
query_max_wait_ms?: number;
|
|
722
|
+
observability?: Observability;
|
|
723
|
+
}
|
|
724
|
+
|
|
725
|
+
export interface OpenAICompatibleRerankerProviderOptions {
|
|
726
|
+
base_url: string;
|
|
727
|
+
api_key: string;
|
|
728
|
+
model?: string;
|
|
729
|
+
timeout_ms?: number;
|
|
730
|
+
request_limiter?: ProviderRequestLimiter;
|
|
731
|
+
request_limit_scope_id?: string;
|
|
732
|
+
max_requests_per_minute?: number;
|
|
733
|
+
rerank_max_wait_ms?: number;
|
|
552
734
|
observability?: Observability;
|
|
553
735
|
}
|
|
554
736
|
|
|
737
|
+
export interface ClaudeAgentEnhancerProviderOptions {
|
|
738
|
+
api_key: string;
|
|
739
|
+
model?: string;
|
|
740
|
+
base_url?: string;
|
|
741
|
+
max_tokens?: number;
|
|
742
|
+
path_to_claude_code_executable?: string;
|
|
743
|
+
permission_mode?: ClaudeCodePermissionMode;
|
|
744
|
+
}
|
|
745
|
+
|
|
746
|
+
export type ClaudeCodePermissionMode = "default" | "acceptEdits" | "bypassPermissions" | "plan";
|
|
747
|
+
|
|
748
|
+
export interface ProviderRateLimitAcquireInput {
|
|
749
|
+
scope: string;
|
|
750
|
+
max_requests_per_minute: number;
|
|
751
|
+
max_wait_ms: number;
|
|
752
|
+
}
|
|
753
|
+
|
|
754
|
+
export interface ProviderRateLimitAcquireResult {
|
|
755
|
+
wait_ms: number;
|
|
756
|
+
}
|
|
757
|
+
|
|
758
|
+
export interface ProviderRequestLimiter {
|
|
759
|
+
readonly mode?: "local" | "redis" | "custom";
|
|
760
|
+
acquire(input: ProviderRateLimitAcquireInput): Promise<ProviderRateLimitAcquireResult>;
|
|
761
|
+
}
|
|
762
|
+
|
|
763
|
+
export interface RedisProviderRequestLimiterClient {
|
|
764
|
+
eval(script: string, numKeys: number, ...args: Array<string | number>): Promise<unknown>;
|
|
765
|
+
}
|
|
766
|
+
|
|
767
|
+
export interface RedisProviderRequestLimiterOptions {
|
|
768
|
+
redis: RedisProviderRequestLimiterClient;
|
|
769
|
+
key_prefix?: string;
|
|
770
|
+
window_ms?: number;
|
|
771
|
+
now?: () => number;
|
|
772
|
+
sleeper?: (ms: number) => Promise<void>;
|
|
773
|
+
}
|
|
774
|
+
|
|
555
775
|
class EmbeddingProviderRequestError extends Error {
|
|
556
776
|
constructor(
|
|
557
777
|
readonly reason: string,
|
|
558
778
|
readonly retryable: boolean,
|
|
779
|
+
message: string,
|
|
780
|
+
readonly retry_after_ms?: number
|
|
781
|
+
) {
|
|
782
|
+
super(message);
|
|
783
|
+
}
|
|
784
|
+
}
|
|
785
|
+
|
|
786
|
+
class RerankerProviderRequestError extends Error {
|
|
787
|
+
constructor(
|
|
788
|
+
readonly reason: string,
|
|
789
|
+
message: string
|
|
790
|
+
) {
|
|
791
|
+
super(message);
|
|
792
|
+
}
|
|
793
|
+
}
|
|
794
|
+
|
|
795
|
+
class EnhancerProviderRequestError extends Error {
|
|
796
|
+
constructor(
|
|
797
|
+
readonly reason: "timeout" | "rate_limited" | "schema_error" | "invalid_response" | "upstream_error",
|
|
559
798
|
message: string
|
|
560
799
|
) {
|
|
561
800
|
super(message);
|
|
@@ -611,44 +850,7 @@ function singularizeToken(token: string): string | undefined {
|
|
|
611
850
|
}
|
|
612
851
|
|
|
613
852
|
function tokenize(text: string): string[] {
|
|
614
|
-
|
|
615
|
-
.split(/[^a-z0-9_./-]+/)
|
|
616
|
-
.map((token) => token.trim())
|
|
617
|
-
.filter(Boolean);
|
|
618
|
-
|
|
619
|
-
const expandedTokens = new Set<string>();
|
|
620
|
-
const addToken = (value: string): void => {
|
|
621
|
-
const normalized = value.trim().toLowerCase();
|
|
622
|
-
if (!normalized) {
|
|
623
|
-
return;
|
|
624
|
-
}
|
|
625
|
-
expandedTokens.add(normalized);
|
|
626
|
-
const singular = singularizeToken(normalized);
|
|
627
|
-
if (singular) {
|
|
628
|
-
expandedTokens.add(singular);
|
|
629
|
-
}
|
|
630
|
-
const plural = pluralizeToken(normalized);
|
|
631
|
-
if (plural) {
|
|
632
|
-
expandedTokens.add(plural);
|
|
633
|
-
}
|
|
634
|
-
};
|
|
635
|
-
|
|
636
|
-
for (const token of coarseTokens) {
|
|
637
|
-
addToken(token);
|
|
638
|
-
for (const part of token.split(/[./_-]+/).filter(Boolean)) {
|
|
639
|
-
addToken(part);
|
|
640
|
-
const camelSplit = part
|
|
641
|
-
.replace(/([a-z0-9])([A-Z])/g, "$1 $2")
|
|
642
|
-
.split(/\s+/)
|
|
643
|
-
.map((segment) => segment.trim().toLowerCase())
|
|
644
|
-
.filter(Boolean);
|
|
645
|
-
for (const segment of camelSplit) {
|
|
646
|
-
addToken(segment);
|
|
647
|
-
}
|
|
648
|
-
}
|
|
649
|
-
}
|
|
650
|
-
|
|
651
|
-
return [...expandedTokens];
|
|
853
|
+
return tokenizeForRanking(text);
|
|
652
854
|
}
|
|
653
855
|
|
|
654
856
|
function lexicalScore(query: string, haystack: string): number {
|
|
@@ -699,21 +901,130 @@ function looksLowInformation(snippet: string): boolean {
|
|
|
699
901
|
|
|
700
902
|
function chooseReason(input: {
|
|
701
903
|
lexical: number;
|
|
904
|
+
literal_match: boolean;
|
|
702
905
|
path_match: boolean;
|
|
703
906
|
recency_boosted: boolean;
|
|
704
907
|
}): RetrievalReason {
|
|
908
|
+
if (input.literal_match) {
|
|
909
|
+
return "exact literal match";
|
|
910
|
+
}
|
|
705
911
|
if (input.path_match) {
|
|
706
|
-
return "
|
|
912
|
+
return "path token overlap";
|
|
707
913
|
}
|
|
708
914
|
if (input.recency_boosted) {
|
|
709
915
|
return "recently modified relevant module";
|
|
710
916
|
}
|
|
711
917
|
if (input.lexical > 0.3) {
|
|
712
|
-
return "path
|
|
918
|
+
return "path token overlap";
|
|
713
919
|
}
|
|
714
920
|
return "semantic match";
|
|
715
921
|
}
|
|
716
922
|
|
|
923
|
+
function isExactLiteralReason(reason: string): boolean {
|
|
924
|
+
return reason === "exact literal match" || reason === "exact symbol match";
|
|
925
|
+
}
|
|
926
|
+
|
|
927
|
+
interface LiteralBoostResult {
|
|
928
|
+
boost: number;
|
|
929
|
+
matched: boolean;
|
|
930
|
+
path_matches: number;
|
|
931
|
+
snippet_matches: number;
|
|
932
|
+
}
|
|
933
|
+
|
|
934
|
+
function extractSearchLiterals(query: string): string[] {
|
|
935
|
+
const literals: string[] = [];
|
|
936
|
+
const seen = new Set<string>();
|
|
937
|
+
|
|
938
|
+
const addLiteral = (raw: string): void => {
|
|
939
|
+
const cleaned = raw.trim().replace(/^[`"'([{]+|[`"')\]}:;,.]+$/g, "");
|
|
940
|
+
const normalized = cleaned.toLowerCase();
|
|
941
|
+
if (!normalized || seen.has(normalized)) {
|
|
942
|
+
return;
|
|
943
|
+
}
|
|
944
|
+
if (normalized.length < 3) {
|
|
945
|
+
return;
|
|
946
|
+
}
|
|
947
|
+
const looksEnvLiteral = /^[A-Z0-9]+(?:_[A-Z0-9]+){2,}$/.test(cleaned);
|
|
948
|
+
const looksPathOrFileLiteral = /[/.]/.test(cleaned);
|
|
949
|
+
const looksCamelLiteral = /[a-z][A-Z]/.test(cleaned) || /[A-Z][a-z]+[A-Z]/.test(cleaned);
|
|
950
|
+
const looksHyphenLiteral = cleaned.includes("-");
|
|
951
|
+
const isSpecificLiteral = looksEnvLiteral || looksPathOrFileLiteral || looksCamelLiteral || looksHyphenLiteral;
|
|
952
|
+
if (!isSpecificLiteral) {
|
|
953
|
+
return;
|
|
954
|
+
}
|
|
955
|
+
seen.add(normalized);
|
|
956
|
+
literals.push(normalized);
|
|
957
|
+
};
|
|
958
|
+
|
|
959
|
+
for (const symbol of extractLikelyCodeSymbols(query, 24)) {
|
|
960
|
+
addLiteral(symbol);
|
|
961
|
+
}
|
|
962
|
+
|
|
963
|
+
for (const pathSymbol of extractPathLikeSymbols(query)) {
|
|
964
|
+
addLiteral(pathSymbol);
|
|
965
|
+
const leaf = normalizePath(pathSymbol).split("/").pop();
|
|
966
|
+
if (leaf) {
|
|
967
|
+
addLiteral(leaf);
|
|
968
|
+
}
|
|
969
|
+
}
|
|
970
|
+
|
|
971
|
+
for (const envMatch of query.matchAll(/\bRCE_[A-Z0-9_]{4,}\b/g)) {
|
|
972
|
+
addLiteral(envMatch[0] ?? "");
|
|
973
|
+
}
|
|
974
|
+
|
|
975
|
+
for (const fileName of query.matchAll(/\b[A-Za-z0-9_.-]+\.(?:ts|tsx|js|jsx|mjs|cjs|py|go|json|md)\b/g)) {
|
|
976
|
+
addLiteral(fileName[0] ?? "");
|
|
977
|
+
}
|
|
978
|
+
|
|
979
|
+
return literals.slice(0, 24);
|
|
980
|
+
}
|
|
981
|
+
|
|
982
|
+
function applyLiteralBoost(input: {
|
|
983
|
+
path: string;
|
|
984
|
+
snippet: string;
|
|
985
|
+
literals: string[];
|
|
986
|
+
path_bias: RetrievalPathBiasConfig;
|
|
987
|
+
}): LiteralBoostResult {
|
|
988
|
+
if (input.literals.length === 0) {
|
|
989
|
+
return {
|
|
990
|
+
boost: 0,
|
|
991
|
+
matched: false,
|
|
992
|
+
path_matches: 0,
|
|
993
|
+
snippet_matches: 0
|
|
994
|
+
};
|
|
995
|
+
}
|
|
996
|
+
|
|
997
|
+
const normalizedPath = input.path.toLowerCase();
|
|
998
|
+
const normalizedSnippet = input.snippet.toLowerCase();
|
|
999
|
+
const pathBias = input.path_bias;
|
|
1000
|
+
let boost = 0;
|
|
1001
|
+
let pathMatches = 0;
|
|
1002
|
+
let snippetMatches = 0;
|
|
1003
|
+
|
|
1004
|
+
for (const literal of input.literals) {
|
|
1005
|
+
if (normalizedPath.includes(literal)) {
|
|
1006
|
+
boost += pathBias.literal_path_boost;
|
|
1007
|
+
pathMatches += 1;
|
|
1008
|
+
continue;
|
|
1009
|
+
}
|
|
1010
|
+
if (normalizedSnippet.includes(literal)) {
|
|
1011
|
+
boost += pathBias.literal_snippet_boost;
|
|
1012
|
+
snippetMatches += 1;
|
|
1013
|
+
}
|
|
1014
|
+
if (boost >= pathBias.literal_max_boost) {
|
|
1015
|
+
break;
|
|
1016
|
+
}
|
|
1017
|
+
}
|
|
1018
|
+
|
|
1019
|
+
const clampedBoost = Math.min(pathBias.literal_max_boost, boost);
|
|
1020
|
+
return {
|
|
1021
|
+
boost: clampedBoost,
|
|
1022
|
+
matched: clampedBoost > 0,
|
|
1023
|
+
path_matches: pathMatches,
|
|
1024
|
+
snippet_matches: snippetMatches
|
|
1025
|
+
};
|
|
1026
|
+
}
|
|
1027
|
+
|
|
717
1028
|
const DOC_INTENT_TOKENS = new Set([
|
|
718
1029
|
"adr",
|
|
719
1030
|
"architecture",
|
|
@@ -769,6 +1080,22 @@ const UI_COMPONENT_TOKENS = new Set(["component", "layout", "react", "tsx", "ui"
|
|
|
769
1080
|
const FILE_LOOKUP_TOKENS = new Set(["entrypoint", "file", "locate", "path", "where", "which"]);
|
|
770
1081
|
const TEST_INTENT_TOKENS = new Set(["assert", "coverage", "e2e", "integration", "spec", "test", "tests", "unit"]);
|
|
771
1082
|
const EXAMPLE_INTENT_TOKENS = new Set(["demo", "example", "examples", "sample", "tutorial"]);
|
|
1083
|
+
const SECURITY_TRACE_INTENT_TOKENS = new Set([
|
|
1084
|
+
"auth",
|
|
1085
|
+
"authorization",
|
|
1086
|
+
"binding",
|
|
1087
|
+
"config",
|
|
1088
|
+
"enforce",
|
|
1089
|
+
"mcp",
|
|
1090
|
+
"project_root_path",
|
|
1091
|
+
"security",
|
|
1092
|
+
"session",
|
|
1093
|
+
"stdio",
|
|
1094
|
+
"tenant",
|
|
1095
|
+
"token",
|
|
1096
|
+
"workspace",
|
|
1097
|
+
"workspace_id"
|
|
1098
|
+
]);
|
|
772
1099
|
|
|
773
1100
|
const SOURCE_PATH_PREFIXES = ["src/", "app/", "apps/", "crates/", "internal/", "lib/", "package/", "packages/"];
|
|
774
1101
|
const LOW_PRIORITY_PATH_PREFIXES = [
|
|
@@ -852,6 +1179,24 @@ function hasExampleIntent(tokens: string[]): boolean {
|
|
|
852
1179
|
return tokens.some((token) => EXAMPLE_INTENT_TOKENS.has(token));
|
|
853
1180
|
}
|
|
854
1181
|
|
|
1182
|
+
function hasSecurityTraceIntent(tokens: string[], queryText: string): boolean {
|
|
1183
|
+
if (tokens.some((token) => SECURITY_TRACE_INTENT_TOKENS.has(token))) {
|
|
1184
|
+
return true;
|
|
1185
|
+
}
|
|
1186
|
+
return /\btenant_id\b|\bworkspace_id\b|\bproject_root_path\b|\bRCE_[A-Z0-9_]{4,}\b/.test(queryText);
|
|
1187
|
+
}
|
|
1188
|
+
|
|
1189
|
+
function isGuidanceOrMetaPath(path: string): boolean {
|
|
1190
|
+
const normalized = path.toLowerCase();
|
|
1191
|
+
return (
|
|
1192
|
+
normalized.endsWith("mcp-tool-guidance.ts") ||
|
|
1193
|
+
normalized.includes("/guidance/") ||
|
|
1194
|
+
normalized.includes("/meta/") ||
|
|
1195
|
+
normalized.includes("/_meta/") ||
|
|
1196
|
+
normalized.includes("tool-guidance")
|
|
1197
|
+
);
|
|
1198
|
+
}
|
|
1199
|
+
|
|
855
1200
|
function pathQualityBias(path: string, queryTokens: string[], config: RetrievalScoringConfig, queryText?: string): number {
|
|
856
1201
|
const normalizedPath = path.toLowerCase();
|
|
857
1202
|
const docIntent = hasDocIntent(queryTokens);
|
|
@@ -862,6 +1207,7 @@ function pathQualityBias(path: string, queryTokens: string[], config: RetrievalS
|
|
|
862
1207
|
const uiComponentIntent = hasUiComponentIntent(queryTokens);
|
|
863
1208
|
const testIntent = hasTestIntent(queryTokens);
|
|
864
1209
|
const exampleIntent = hasExampleIntent(queryTokens);
|
|
1210
|
+
const securityTraceIntent = hasSecurityTraceIntent(queryTokens, queryText ?? queryTokens.join(" "));
|
|
865
1211
|
let bias = 0;
|
|
866
1212
|
const pathBias = config.path_bias;
|
|
867
1213
|
|
|
@@ -934,6 +1280,9 @@ function pathQualityBias(path: string, queryTokens: string[], config: RetrievalS
|
|
|
934
1280
|
if (docsPreferred && isSourcePath) {
|
|
935
1281
|
bias -= pathBias.doc_intent_source_penalty;
|
|
936
1282
|
}
|
|
1283
|
+
if (securityTraceIntent && !docsPreferred && isGuidanceOrMetaPath(normalizedPath)) {
|
|
1284
|
+
bias -= pathBias.security_trace_meta_penalty;
|
|
1285
|
+
}
|
|
937
1286
|
|
|
938
1287
|
if (workspaceManifestIntent && normalizedPath === "cargo.toml") {
|
|
939
1288
|
bias += pathBias.workspace_manifest_root_boost;
|
|
@@ -1020,8 +1369,8 @@ function buildChunks(file: RawFile, chunkingConfig: RetrievalChunkingConfig): Ch
|
|
|
1020
1369
|
config: {
|
|
1021
1370
|
strategy: chunkingConfig.strategy,
|
|
1022
1371
|
fallback_strategy: chunkingConfig.fallback_strategy,
|
|
1023
|
-
target_chunk_tokens:
|
|
1024
|
-
chunk_overlap_tokens:
|
|
1372
|
+
target_chunk_tokens: chunkingConfig.target_chunk_tokens,
|
|
1373
|
+
chunk_overlap_tokens: chunkingConfig.chunk_overlap_tokens,
|
|
1025
1374
|
max_chunks_per_file: MAX_CHUNKS_PER_FILE,
|
|
1026
1375
|
parse_timeout_ms: chunkingConfig.parse_timeout_ms,
|
|
1027
1376
|
enabled_languages: chunkingConfig.enabled_languages
|
|
@@ -1076,6 +1425,184 @@ function sleep(ms: number): Promise<void> {
|
|
|
1076
1425
|
});
|
|
1077
1426
|
}
|
|
1078
1427
|
|
|
1428
|
+
export class ProviderRateLimitExceededError extends Error {
|
|
1429
|
+
readonly retry_after_ms: number;
|
|
1430
|
+
|
|
1431
|
+
constructor(message: string, retry_after_ms: number) {
|
|
1432
|
+
super(message);
|
|
1433
|
+
this.retry_after_ms = retry_after_ms;
|
|
1434
|
+
}
|
|
1435
|
+
}
|
|
1436
|
+
|
|
1437
|
+
interface LocalProviderRateBucket {
|
|
1438
|
+
tokens: number;
|
|
1439
|
+
last_refill_ms: number;
|
|
1440
|
+
}
|
|
1441
|
+
|
|
1442
|
+
export class LocalProviderRequestLimiter implements ProviderRequestLimiter {
|
|
1443
|
+
readonly mode = "local" as const;
|
|
1444
|
+
private readonly buckets = new Map<string, LocalProviderRateBucket>();
|
|
1445
|
+
private readonly now: () => number;
|
|
1446
|
+
private readonly sleeper: (ms: number) => Promise<void>;
|
|
1447
|
+
|
|
1448
|
+
constructor(options?: {
|
|
1449
|
+
now?: () => number;
|
|
1450
|
+
sleeper?: (ms: number) => Promise<void>;
|
|
1451
|
+
}) {
|
|
1452
|
+
this.now = options?.now ?? (() => Date.now());
|
|
1453
|
+
this.sleeper = options?.sleeper ?? sleep;
|
|
1454
|
+
}
|
|
1455
|
+
|
|
1456
|
+
async acquire(input: ProviderRateLimitAcquireInput): Promise<ProviderRateLimitAcquireResult> {
|
|
1457
|
+
if (!Number.isInteger(input.max_requests_per_minute) || input.max_requests_per_minute <= 0) {
|
|
1458
|
+
throw new Error("provider limiter requires max_requests_per_minute to be a positive integer");
|
|
1459
|
+
}
|
|
1460
|
+
if (!Number.isInteger(input.max_wait_ms) || input.max_wait_ms < 0) {
|
|
1461
|
+
throw new Error("provider limiter requires max_wait_ms to be a non-negative integer");
|
|
1462
|
+
}
|
|
1463
|
+
|
|
1464
|
+
const refillPerMs = input.max_requests_per_minute / 60_000;
|
|
1465
|
+
let waitedMs = 0;
|
|
1466
|
+
const deadline = this.now() + input.max_wait_ms;
|
|
1467
|
+
|
|
1468
|
+
while (true) {
|
|
1469
|
+
const nowMs = this.now();
|
|
1470
|
+
let bucket = this.buckets.get(input.scope);
|
|
1471
|
+
if (!bucket) {
|
|
1472
|
+
bucket = {
|
|
1473
|
+
tokens: input.max_requests_per_minute,
|
|
1474
|
+
last_refill_ms: nowMs
|
|
1475
|
+
};
|
|
1476
|
+
this.buckets.set(input.scope, bucket);
|
|
1477
|
+
}
|
|
1478
|
+
|
|
1479
|
+
if (nowMs > bucket.last_refill_ms) {
|
|
1480
|
+
const elapsedMs = nowMs - bucket.last_refill_ms;
|
|
1481
|
+
bucket.tokens = Math.min(input.max_requests_per_minute, bucket.tokens + elapsedMs * refillPerMs);
|
|
1482
|
+
bucket.last_refill_ms = nowMs;
|
|
1483
|
+
}
|
|
1484
|
+
|
|
1485
|
+
if (bucket.tokens >= 1) {
|
|
1486
|
+
bucket.tokens -= 1;
|
|
1487
|
+
return { wait_ms: waitedMs };
|
|
1488
|
+
}
|
|
1489
|
+
|
|
1490
|
+
const retryAfterMs = Math.max(1, Math.ceil((1 - bucket.tokens) / refillPerMs));
|
|
1491
|
+
const remainingMs = deadline - nowMs;
|
|
1492
|
+
if (remainingMs <= 0 || retryAfterMs > remainingMs) {
|
|
1493
|
+
throw new ProviderRateLimitExceededError(
|
|
1494
|
+
`provider request rate limit exceeded for scope "${input.scope}"`,
|
|
1495
|
+
Math.max(1, retryAfterMs)
|
|
1496
|
+
);
|
|
1497
|
+
}
|
|
1498
|
+
|
|
1499
|
+
const sleepMs = Math.max(1, Math.min(retryAfterMs, remainingMs));
|
|
1500
|
+
await this.sleeper(sleepMs);
|
|
1501
|
+
waitedMs += sleepMs;
|
|
1502
|
+
}
|
|
1503
|
+
}
|
|
1504
|
+
}
|
|
1505
|
+
|
|
1506
|
+
interface RedisLimiterAttemptResult {
|
|
1507
|
+
allowed: boolean;
|
|
1508
|
+
retry_after_ms: number;
|
|
1509
|
+
}
|
|
1510
|
+
|
|
1511
|
+
const REDIS_PROVIDER_LIMITER_SCRIPT = `
|
|
1512
|
+
local key = KEYS[1]
|
|
1513
|
+
local limit = tonumber(ARGV[1])
|
|
1514
|
+
local window_ms = tonumber(ARGV[2])
|
|
1515
|
+
local count = redis.call("INCR", key)
|
|
1516
|
+
if count == 1 then
|
|
1517
|
+
redis.call("PEXPIRE", key, window_ms)
|
|
1518
|
+
end
|
|
1519
|
+
if count <= limit then
|
|
1520
|
+
return {1, 0}
|
|
1521
|
+
end
|
|
1522
|
+
local ttl = redis.call("PTTL", key)
|
|
1523
|
+
if ttl < 0 then
|
|
1524
|
+
ttl = window_ms
|
|
1525
|
+
end
|
|
1526
|
+
return {0, ttl}
|
|
1527
|
+
`;
|
|
1528
|
+
|
|
1529
|
+
export class RedisProviderRequestLimiter implements ProviderRequestLimiter {
|
|
1530
|
+
readonly mode = "redis" as const;
|
|
1531
|
+
private readonly redis: RedisProviderRequestLimiterClient;
|
|
1532
|
+
private readonly keyPrefix: string;
|
|
1533
|
+
private readonly windowMs: number;
|
|
1534
|
+
private readonly now: () => number;
|
|
1535
|
+
private readonly sleeper: (ms: number) => Promise<void>;
|
|
1536
|
+
|
|
1537
|
+
constructor(options: RedisProviderRequestLimiterOptions) {
|
|
1538
|
+
if (!options.redis || typeof options.redis.eval !== "function") {
|
|
1539
|
+
throw new Error("invalid redis provider limiter config: redis client with eval() is required");
|
|
1540
|
+
}
|
|
1541
|
+
this.redis = options.redis;
|
|
1542
|
+
this.keyPrefix = options.key_prefix?.trim() || "rce:provider_rate_limit";
|
|
1543
|
+
this.windowMs = options.window_ms ?? 60_000;
|
|
1544
|
+
this.now = options.now ?? (() => Date.now());
|
|
1545
|
+
this.sleeper = options.sleeper ?? sleep;
|
|
1546
|
+
|
|
1547
|
+
if (!Number.isInteger(this.windowMs) || this.windowMs <= 0) {
|
|
1548
|
+
throw new Error("invalid redis provider limiter config: window_ms must be a positive integer");
|
|
1549
|
+
}
|
|
1550
|
+
}
|
|
1551
|
+
|
|
1552
|
+
async acquire(input: ProviderRateLimitAcquireInput): Promise<ProviderRateLimitAcquireResult> {
|
|
1553
|
+
if (!Number.isInteger(input.max_requests_per_minute) || input.max_requests_per_minute <= 0) {
|
|
1554
|
+
throw new Error("provider limiter requires max_requests_per_minute to be a positive integer");
|
|
1555
|
+
}
|
|
1556
|
+
if (!Number.isInteger(input.max_wait_ms) || input.max_wait_ms < 0) {
|
|
1557
|
+
throw new Error("provider limiter requires max_wait_ms to be a non-negative integer");
|
|
1558
|
+
}
|
|
1559
|
+
|
|
1560
|
+
let waitedMs = 0;
|
|
1561
|
+
const deadline = this.now() + input.max_wait_ms;
|
|
1562
|
+
|
|
1563
|
+
while (true) {
|
|
1564
|
+
const attempt = await this.reserveAttempt(input.scope, input.max_requests_per_minute);
|
|
1565
|
+
if (attempt.allowed) {
|
|
1566
|
+
return { wait_ms: waitedMs };
|
|
1567
|
+
}
|
|
1568
|
+
|
|
1569
|
+
const nowMs = this.now();
|
|
1570
|
+
const remainingMs = deadline - nowMs;
|
|
1571
|
+
const retryAfterMs = Math.max(1, attempt.retry_after_ms);
|
|
1572
|
+
if (remainingMs <= 0 || retryAfterMs > remainingMs) {
|
|
1573
|
+
throw new ProviderRateLimitExceededError(
|
|
1574
|
+
`provider request rate limit exceeded for scope "${input.scope}"`,
|
|
1575
|
+
retryAfterMs
|
|
1576
|
+
);
|
|
1577
|
+
}
|
|
1578
|
+
|
|
1579
|
+
const sleepMs = Math.max(1, Math.min(retryAfterMs, remainingMs));
|
|
1580
|
+
await this.sleeper(sleepMs);
|
|
1581
|
+
waitedMs += sleepMs;
|
|
1582
|
+
}
|
|
1583
|
+
}
|
|
1584
|
+
|
|
1585
|
+
private async reserveAttempt(scope: string, maxRequestsPerMinute: number): Promise<RedisLimiterAttemptResult> {
|
|
1586
|
+
const key = `${this.keyPrefix}:${scope}`;
|
|
1587
|
+
const raw = await this.redis.eval(
|
|
1588
|
+
REDIS_PROVIDER_LIMITER_SCRIPT,
|
|
1589
|
+
1,
|
|
1590
|
+
key,
|
|
1591
|
+
maxRequestsPerMinute,
|
|
1592
|
+
this.windowMs
|
|
1593
|
+
);
|
|
1594
|
+
if (Array.isArray(raw)) {
|
|
1595
|
+
const allowed = Number(raw[0] ?? 0) === 1;
|
|
1596
|
+
const retryAfterMs = Number(raw[1] ?? 0);
|
|
1597
|
+
return {
|
|
1598
|
+
allowed,
|
|
1599
|
+
retry_after_ms: Number.isFinite(retryAfterMs) ? Math.max(0, Math.trunc(retryAfterMs)) : this.windowMs
|
|
1600
|
+
};
|
|
1601
|
+
}
|
|
1602
|
+
throw new Error("redis provider limiter returned unexpected eval() payload");
|
|
1603
|
+
}
|
|
1604
|
+
}
|
|
1605
|
+
|
|
1079
1606
|
export class DeterministicEmbeddingProvider implements EmbeddingProvider {
|
|
1080
1607
|
private readonly dimensions: number;
|
|
1081
1608
|
private readonly model: string;
|
|
@@ -1110,6 +1637,12 @@ export class OpenAICompatibleEmbeddingProvider implements EmbeddingProvider {
|
|
|
1110
1637
|
private readonly timeoutMs: number;
|
|
1111
1638
|
private readonly batchSize: number;
|
|
1112
1639
|
private readonly maxRetries: number;
|
|
1640
|
+
private readonly transientForbiddenMaxRetries: number;
|
|
1641
|
+
private readonly requestLimiter?: ProviderRequestLimiter;
|
|
1642
|
+
private readonly requestLimitScope: string;
|
|
1643
|
+
private readonly maxRequestsPerMinute: number;
|
|
1644
|
+
private readonly indexMaxWaitMs: number;
|
|
1645
|
+
private readonly queryMaxWaitMs: number;
|
|
1113
1646
|
private readonly observability: Observability;
|
|
1114
1647
|
|
|
1115
1648
|
constructor(options: OpenAICompatibleEmbeddingProviderOptions) {
|
|
@@ -1130,6 +1663,17 @@ export class OpenAICompatibleEmbeddingProvider implements EmbeddingProvider {
|
|
|
1130
1663
|
this.timeoutMs = options.timeout_ms ?? DEFAULT_OPENAI_COMPATIBLE_EMBEDDING_TIMEOUT_MS;
|
|
1131
1664
|
this.batchSize = options.batch_size ?? DEFAULT_OPENAI_COMPATIBLE_EMBEDDING_BATCH_SIZE;
|
|
1132
1665
|
this.maxRetries = options.max_retries ?? DEFAULT_OPENAI_COMPATIBLE_EMBEDDING_MAX_RETRIES;
|
|
1666
|
+
this.transientForbiddenMaxRetries =
|
|
1667
|
+
options.transient_forbidden_max_retries ?? DEFAULT_OPENAI_COMPATIBLE_EMBEDDING_TRANSIENT_403_MAX_RETRIES;
|
|
1668
|
+
this.requestLimiter = options.request_limiter;
|
|
1669
|
+
this.requestLimitScope = resolveProviderLimiterScope({
|
|
1670
|
+
provider: "openai_compatible",
|
|
1671
|
+
apiKey,
|
|
1672
|
+
overrideScopeId: options.request_limit_scope_id
|
|
1673
|
+
});
|
|
1674
|
+
this.maxRequestsPerMinute = options.max_requests_per_minute ?? DEFAULT_PROVIDER_MAX_REQUESTS_PER_MINUTE;
|
|
1675
|
+
this.indexMaxWaitMs = options.index_max_wait_ms ?? DEFAULT_PROVIDER_LIMIT_INDEX_MAX_WAIT_MS;
|
|
1676
|
+
this.queryMaxWaitMs = options.query_max_wait_ms ?? DEFAULT_PROVIDER_LIMIT_QUERY_MAX_WAIT_MS;
|
|
1133
1677
|
this.observability = options.observability ?? getObservability("retrieval-core");
|
|
1134
1678
|
|
|
1135
1679
|
if (!Number.isInteger(this.dimensions) || this.dimensions <= 0) {
|
|
@@ -1144,6 +1688,20 @@ export class OpenAICompatibleEmbeddingProvider implements EmbeddingProvider {
|
|
|
1144
1688
|
if (!Number.isInteger(this.maxRetries) || this.maxRetries < 0) {
|
|
1145
1689
|
throw new Error("invalid openai-compatible embedding config: max_retries must be a non-negative integer");
|
|
1146
1690
|
}
|
|
1691
|
+
if (!Number.isInteger(this.transientForbiddenMaxRetries) || this.transientForbiddenMaxRetries < 0) {
|
|
1692
|
+
throw new Error(
|
|
1693
|
+
"invalid openai-compatible embedding config: transient_forbidden_max_retries must be a non-negative integer"
|
|
1694
|
+
);
|
|
1695
|
+
}
|
|
1696
|
+
if (!Number.isInteger(this.maxRequestsPerMinute) || this.maxRequestsPerMinute <= 0) {
|
|
1697
|
+
throw new Error("invalid openai-compatible embedding config: max_requests_per_minute must be a positive integer");
|
|
1698
|
+
}
|
|
1699
|
+
if (!Number.isInteger(this.indexMaxWaitMs) || this.indexMaxWaitMs < 0) {
|
|
1700
|
+
throw new Error("invalid openai-compatible embedding config: index_max_wait_ms must be a non-negative integer");
|
|
1701
|
+
}
|
|
1702
|
+
if (!Number.isInteger(this.queryMaxWaitMs) || this.queryMaxWaitMs < 0) {
|
|
1703
|
+
throw new Error("invalid openai-compatible embedding config: query_max_wait_ms must be a non-negative integer");
|
|
1704
|
+
}
|
|
1147
1705
|
}
|
|
1148
1706
|
|
|
1149
1707
|
describe(): EmbeddingDescriptor {
|
|
@@ -1175,11 +1733,12 @@ export class OpenAICompatibleEmbeddingProvider implements EmbeddingProvider {
|
|
|
1175
1733
|
purpose
|
|
1176
1734
|
} as const;
|
|
1177
1735
|
|
|
1178
|
-
|
|
1736
|
+
let attempt = 0;
|
|
1737
|
+
while (true) {
|
|
1179
1738
|
const startedAt = Date.now();
|
|
1180
1739
|
this.observability.metrics.increment("retrieval_embedding_provider_requests_total", 1, labels);
|
|
1181
1740
|
try {
|
|
1182
|
-
return await this.embedBatchOnce(texts);
|
|
1741
|
+
return await this.embedBatchOnce(texts, purpose);
|
|
1183
1742
|
} catch (error) {
|
|
1184
1743
|
const failure = this.toProviderFailure(error);
|
|
1185
1744
|
this.observability.metrics.increment("retrieval_embedding_provider_failures_total", 1, {
|
|
@@ -1187,23 +1746,31 @@ export class OpenAICompatibleEmbeddingProvider implements EmbeddingProvider {
|
|
|
1187
1746
|
reason: failure.reason
|
|
1188
1747
|
});
|
|
1189
1748
|
|
|
1190
|
-
const
|
|
1749
|
+
const maxRetriesForFailure = this.maxRetriesForReason(failure.reason);
|
|
1750
|
+
const shouldRetry = failure.retryable && attempt < maxRetriesForFailure;
|
|
1191
1751
|
this.observability.logger.warn("embedding provider request failed", {
|
|
1192
1752
|
provider: "openai_compatible",
|
|
1193
1753
|
model: this.model,
|
|
1194
1754
|
purpose,
|
|
1195
1755
|
reason: failure.reason,
|
|
1756
|
+
provider_message: failure.message,
|
|
1196
1757
|
retryable: failure.retryable,
|
|
1197
1758
|
retrying: shouldRetry,
|
|
1198
1759
|
attempt: attempt + 1,
|
|
1199
|
-
max_attempts:
|
|
1760
|
+
max_attempts: maxRetriesForFailure + 1,
|
|
1761
|
+
retry_after_ms: failure.retry_after_ms
|
|
1200
1762
|
});
|
|
1201
1763
|
|
|
1202
1764
|
if (shouldRetry) {
|
|
1203
|
-
await sleep(this.retryDelayMs(attempt));
|
|
1765
|
+
await sleep(this.retryDelayMs(attempt, failure));
|
|
1766
|
+
attempt += 1;
|
|
1204
1767
|
continue;
|
|
1205
1768
|
}
|
|
1206
1769
|
|
|
1770
|
+
if (failure.reason === "client_rate_limited" || failure.reason === "rate_limited") {
|
|
1771
|
+
throw new RetrievalError("RATE_LIMITED", `embedding provider rate limited; ${failure.message}`);
|
|
1772
|
+
}
|
|
1773
|
+
|
|
1207
1774
|
throw new RetrievalError(
|
|
1208
1775
|
"UPSTREAM_FAILURE",
|
|
1209
1776
|
`embedding provider request failed (${failure.reason}); ${failure.message}`
|
|
@@ -1212,11 +1779,50 @@ export class OpenAICompatibleEmbeddingProvider implements EmbeddingProvider {
|
|
|
1212
1779
|
this.observability.metrics.observe("retrieval_embedding_provider_latency_ms", Date.now() - startedAt, labels);
|
|
1213
1780
|
}
|
|
1214
1781
|
}
|
|
1782
|
+
}
|
|
1783
|
+
|
|
1784
|
+
private async enforceRequestLimit(purpose: EmbeddingPurpose): Promise<void> {
|
|
1785
|
+
if (!this.requestLimiter) {
|
|
1786
|
+
return;
|
|
1787
|
+
}
|
|
1788
|
+
|
|
1789
|
+
const maxWaitMs = purpose === "index" ? this.indexMaxWaitMs : this.queryMaxWaitMs;
|
|
1790
|
+
const labels = {
|
|
1791
|
+
provider: "openai_compatible",
|
|
1792
|
+
model: this.model,
|
|
1793
|
+
purpose,
|
|
1794
|
+
limiter_mode: this.requestLimiter.mode ?? "custom"
|
|
1795
|
+
} as const;
|
|
1215
1796
|
|
|
1216
|
-
|
|
1797
|
+
try {
|
|
1798
|
+
const acquired = await this.requestLimiter.acquire({
|
|
1799
|
+
scope: this.requestLimitScope,
|
|
1800
|
+
max_requests_per_minute: this.maxRequestsPerMinute,
|
|
1801
|
+
max_wait_ms: maxWaitMs
|
|
1802
|
+
});
|
|
1803
|
+
this.observability.metrics.observe("retrieval_provider_limiter_wait_ms", acquired.wait_ms, labels);
|
|
1804
|
+
this.observability.metrics.increment("retrieval_provider_requests_shaped_total", 1, labels);
|
|
1805
|
+
} catch (error) {
|
|
1806
|
+
this.observability.metrics.increment("retrieval_provider_limiter_block_total", 1, {
|
|
1807
|
+
...labels,
|
|
1808
|
+
reason: "wait_timeout"
|
|
1809
|
+
});
|
|
1810
|
+
if (error instanceof ProviderRateLimitExceededError) {
|
|
1811
|
+
const retryable = purpose === "index";
|
|
1812
|
+
throw new EmbeddingProviderRequestError(
|
|
1813
|
+
"client_rate_limited",
|
|
1814
|
+
retryable,
|
|
1815
|
+
`${error.message}; retry_after_ms=${error.retry_after_ms}`,
|
|
1816
|
+
error.retry_after_ms
|
|
1817
|
+
);
|
|
1818
|
+
}
|
|
1819
|
+
throw error;
|
|
1820
|
+
}
|
|
1217
1821
|
}
|
|
1218
1822
|
|
|
1219
|
-
private async embedBatchOnce(texts: string[]): Promise<number[][]> {
|
|
1823
|
+
private async embedBatchOnce(texts: string[], purpose: EmbeddingPurpose): Promise<number[][]> {
|
|
1824
|
+
await this.enforceRequestLimit(purpose);
|
|
1825
|
+
|
|
1220
1826
|
const controller = new AbortController();
|
|
1221
1827
|
const timeoutId = setTimeout(() => {
|
|
1222
1828
|
controller.abort();
|
|
@@ -1253,13 +1859,28 @@ export class OpenAICompatibleEmbeddingProvider implements EmbeddingProvider {
|
|
|
1253
1859
|
if (!response.ok) {
|
|
1254
1860
|
const details = await safeResponseText(response);
|
|
1255
1861
|
if (response.status === 429) {
|
|
1256
|
-
throw new EmbeddingProviderRequestError(
|
|
1862
|
+
throw new EmbeddingProviderRequestError(
|
|
1863
|
+
"rate_limited",
|
|
1864
|
+
true,
|
|
1865
|
+
`HTTP 429 ${details}`.trim(),
|
|
1866
|
+
parseRetryAfterMs(response.headers.get("retry-after"))
|
|
1867
|
+
);
|
|
1257
1868
|
}
|
|
1258
1869
|
if (response.status >= 500) {
|
|
1259
1870
|
throw new EmbeddingProviderRequestError("http_5xx", true, `HTTP ${response.status} ${details}`.trim());
|
|
1260
1871
|
}
|
|
1261
|
-
if (response.status === 401
|
|
1262
|
-
throw new EmbeddingProviderRequestError("auth_error", false, `HTTP
|
|
1872
|
+
if (response.status === 401) {
|
|
1873
|
+
throw new EmbeddingProviderRequestError("auth_error", false, `HTTP 401 ${details}`.trim());
|
|
1874
|
+
}
|
|
1875
|
+
if (response.status === 403) {
|
|
1876
|
+
const retryAfterMs = parseRetryAfterMs(response.headers.get("retry-after"));
|
|
1877
|
+
const retryable = this.isTransientForbidden(details, retryAfterMs);
|
|
1878
|
+
throw new EmbeddingProviderRequestError(
|
|
1879
|
+
retryable ? "forbidden_transient" : "auth_error",
|
|
1880
|
+
retryable,
|
|
1881
|
+
`HTTP 403 ${details}`.trim(),
|
|
1882
|
+
retryAfterMs
|
|
1883
|
+
);
|
|
1263
1884
|
}
|
|
1264
1885
|
if (response.status === 404) {
|
|
1265
1886
|
throw new EmbeddingProviderRequestError("endpoint_not_found", false, `HTTP 404 ${details}`.trim());
|
|
@@ -1328,25 +1949,951 @@ export class OpenAICompatibleEmbeddingProvider implements EmbeddingProvider {
|
|
|
1328
1949
|
return vectors;
|
|
1329
1950
|
}
|
|
1330
1951
|
|
|
1331
|
-
private
|
|
1332
|
-
|
|
1333
|
-
|
|
1334
|
-
|
|
1952
|
+
private maxRetriesForReason(reason: string): number {
|
|
1953
|
+
if (reason === "forbidden_transient") {
|
|
1954
|
+
return Math.max(this.maxRetries, this.transientForbiddenMaxRetries);
|
|
1955
|
+
}
|
|
1956
|
+
return this.maxRetries;
|
|
1957
|
+
}
|
|
1958
|
+
|
|
1959
|
+
private retryDelayMs(attempt: number, failure: EmbeddingProviderRequestError): number {
|
|
1960
|
+
const baseBackoffMs =
|
|
1961
|
+
failure.reason === "forbidden_transient"
|
|
1962
|
+
? Math.min(2_500, 250 * 2 ** attempt)
|
|
1963
|
+
: 100 * (attempt + 1);
|
|
1964
|
+
const jitterMs =
|
|
1965
|
+
failure.reason === "forbidden_transient" ? Math.floor(Math.random() * 150) : Math.floor(Math.random() * 75);
|
|
1966
|
+
const computedDelayMs = baseBackoffMs + jitterMs;
|
|
1967
|
+
if (failure.retry_after_ms === undefined) {
|
|
1968
|
+
return computedDelayMs;
|
|
1969
|
+
}
|
|
1970
|
+
return Math.max(computedDelayMs, Math.max(1, failure.retry_after_ms));
|
|
1335
1971
|
}
|
|
1336
1972
|
|
|
1337
|
-
private
|
|
1338
|
-
if (
|
|
1339
|
-
return
|
|
1973
|
+
private isTransientForbidden(details: string, retryAfterMs?: number): boolean {
|
|
1974
|
+
if (retryAfterMs !== undefined) {
|
|
1975
|
+
return true;
|
|
1340
1976
|
}
|
|
1341
|
-
|
|
1342
|
-
|
|
1977
|
+
|
|
1978
|
+
const normalized = details.trim().toLowerCase();
|
|
1979
|
+
if (normalized.length === 0) {
|
|
1980
|
+
return false;
|
|
1343
1981
|
}
|
|
1344
|
-
|
|
1345
|
-
|
|
1982
|
+
|
|
1983
|
+
const transientSignals = [
|
|
1984
|
+
"rate limit",
|
|
1985
|
+
"too many requests",
|
|
1986
|
+
"temporar",
|
|
1987
|
+
"try again",
|
|
1988
|
+
"upstream",
|
|
1989
|
+
"timeout",
|
|
1990
|
+
"busy",
|
|
1991
|
+
"capacity",
|
|
1992
|
+
"bad_response_status_code"
|
|
1993
|
+
];
|
|
1994
|
+
if (transientSignals.some((signal) => normalized.includes(signal))) {
|
|
1995
|
+
return true;
|
|
1346
1996
|
}
|
|
1347
|
-
|
|
1348
|
-
|
|
1349
|
-
|
|
1997
|
+
|
|
1998
|
+
const hardFailureSignals = [
|
|
1999
|
+
"invalid api key",
|
|
2000
|
+
"incorrect api key",
|
|
2001
|
+
"authentication",
|
|
2002
|
+
"unauthorized",
|
|
2003
|
+
"insufficient permissions",
|
|
2004
|
+
"insufficient scope",
|
|
2005
|
+
"permission denied",
|
|
2006
|
+
"organization not found",
|
|
2007
|
+
"account disabled",
|
|
2008
|
+
"insufficient quota",
|
|
2009
|
+
"quota exceeded",
|
|
2010
|
+
"billing",
|
|
2011
|
+
"credit",
|
|
2012
|
+
"payment required",
|
|
2013
|
+
"model not found",
|
|
2014
|
+
"unknown model",
|
|
2015
|
+
"unsupported model",
|
|
2016
|
+
"not allowed"
|
|
2017
|
+
];
|
|
2018
|
+
if (hardFailureSignals.some((signal) => normalized.includes(signal))) {
|
|
2019
|
+
return false;
|
|
2020
|
+
}
|
|
2021
|
+
|
|
2022
|
+
return false;
|
|
2023
|
+
}
|
|
2024
|
+
|
|
2025
|
+
private toProviderFailure(error: unknown): EmbeddingProviderRequestError {
|
|
2026
|
+
if (error instanceof EmbeddingProviderRequestError) {
|
|
2027
|
+
return error;
|
|
2028
|
+
}
|
|
2029
|
+
if (error instanceof ProviderRateLimitExceededError) {
|
|
2030
|
+
return new EmbeddingProviderRequestError(
|
|
2031
|
+
"client_rate_limited",
|
|
2032
|
+
false,
|
|
2033
|
+
`${error.message}; retry_after_ms=${error.retry_after_ms}`
|
|
2034
|
+
);
|
|
2035
|
+
}
|
|
2036
|
+
if (error instanceof RetrievalError) {
|
|
2037
|
+
if (error.code === "RATE_LIMITED") {
|
|
2038
|
+
return new EmbeddingProviderRequestError("client_rate_limited", false, error.message);
|
|
2039
|
+
}
|
|
2040
|
+
return new EmbeddingProviderRequestError("upstream_failure", false, error.message);
|
|
2041
|
+
}
|
|
2042
|
+
if (error instanceof Error) {
|
|
2043
|
+
return new EmbeddingProviderRequestError("unknown_error", false, error.message);
|
|
2044
|
+
}
|
|
2045
|
+
return new EmbeddingProviderRequestError("unknown_error", false, String(error));
|
|
2046
|
+
}
|
|
2047
|
+
}
|
|
2048
|
+
|
|
2049
|
+
export class OpenAICompatibleRerankerProvider implements RerankerProvider {
|
|
2050
|
+
private readonly endpoint: string;
|
|
2051
|
+
private readonly apiKey: string;
|
|
2052
|
+
private readonly model: string;
|
|
2053
|
+
private readonly timeoutMs: number;
|
|
2054
|
+
private readonly requestLimiter?: ProviderRequestLimiter;
|
|
2055
|
+
private readonly requestLimitScope: string;
|
|
2056
|
+
private readonly maxRequestsPerMinute: number;
|
|
2057
|
+
private readonly rerankMaxWaitMs: number;
|
|
2058
|
+
private readonly observability: Observability;
|
|
2059
|
+
|
|
2060
|
+
constructor(options: OpenAICompatibleRerankerProviderOptions) {
|
|
2061
|
+
const baseUrl = options.base_url.trim().replace(/\/+$/, "");
|
|
2062
|
+
if (baseUrl.length === 0) {
|
|
2063
|
+
throw new Error("invalid openai-compatible reranker config: base_url must be non-empty");
|
|
2064
|
+
}
|
|
2065
|
+
const apiKey = options.api_key.trim();
|
|
2066
|
+
if (apiKey.length === 0) {
|
|
2067
|
+
throw new Error("invalid openai-compatible reranker config: api_key must be non-empty");
|
|
2068
|
+
}
|
|
2069
|
+
|
|
2070
|
+
this.endpoint = `${baseUrl}/rerank`;
|
|
2071
|
+
this.apiKey = apiKey;
|
|
2072
|
+
this.model = options.model?.trim() || DEFAULT_OPENAI_COMPATIBLE_RERANKER_MODEL;
|
|
2073
|
+
this.timeoutMs = options.timeout_ms ?? DEFAULT_OPENAI_COMPATIBLE_RERANKER_TIMEOUT_MS;
|
|
2074
|
+
this.requestLimiter = options.request_limiter;
|
|
2075
|
+
this.requestLimitScope = resolveProviderLimiterScope({
|
|
2076
|
+
provider: "openai_compatible",
|
|
2077
|
+
apiKey,
|
|
2078
|
+
overrideScopeId: options.request_limit_scope_id
|
|
2079
|
+
});
|
|
2080
|
+
this.maxRequestsPerMinute = options.max_requests_per_minute ?? DEFAULT_PROVIDER_MAX_REQUESTS_PER_MINUTE;
|
|
2081
|
+
this.rerankMaxWaitMs = options.rerank_max_wait_ms ?? DEFAULT_PROVIDER_LIMIT_RERANK_MAX_WAIT_MS;
|
|
2082
|
+
this.observability = options.observability ?? getObservability("retrieval-core");
|
|
2083
|
+
|
|
2084
|
+
if (!Number.isInteger(this.timeoutMs) || this.timeoutMs <= 0) {
|
|
2085
|
+
throw new Error("invalid openai-compatible reranker config: timeout_ms must be a positive integer");
|
|
2086
|
+
}
|
|
2087
|
+
if (!Number.isInteger(this.maxRequestsPerMinute) || this.maxRequestsPerMinute <= 0) {
|
|
2088
|
+
throw new Error("invalid openai-compatible reranker config: max_requests_per_minute must be a positive integer");
|
|
2089
|
+
}
|
|
2090
|
+
if (!Number.isInteger(this.rerankMaxWaitMs) || this.rerankMaxWaitMs < 0) {
|
|
2091
|
+
throw new Error("invalid openai-compatible reranker config: rerank_max_wait_ms must be a non-negative integer");
|
|
2092
|
+
}
|
|
2093
|
+
}
|
|
2094
|
+
|
|
2095
|
+
describe(): RerankerDescriptor {
|
|
2096
|
+
return {
|
|
2097
|
+
provider: "openai_compatible",
|
|
2098
|
+
model: this.model
|
|
2099
|
+
};
|
|
2100
|
+
}
|
|
2101
|
+
|
|
2102
|
+
async rerank(input: { query: string; documents: string[]; top_n: number }): Promise<RerankerResult[]> {
|
|
2103
|
+
if (input.documents.length === 0) {
|
|
2104
|
+
return [];
|
|
2105
|
+
}
|
|
2106
|
+
|
|
2107
|
+
await this.enforceRequestLimit();
|
|
2108
|
+
|
|
2109
|
+
const topN = Math.max(1, Math.min(input.top_n, input.documents.length));
|
|
2110
|
+
const controller = new AbortController();
|
|
2111
|
+
const timeoutId = setTimeout(() => {
|
|
2112
|
+
controller.abort();
|
|
2113
|
+
}, this.timeoutMs);
|
|
2114
|
+
|
|
2115
|
+
let response: Response;
|
|
2116
|
+
try {
|
|
2117
|
+
response = await fetch(this.endpoint, {
|
|
2118
|
+
method: "POST",
|
|
2119
|
+
headers: {
|
|
2120
|
+
authorization: `Bearer ${this.apiKey}`,
|
|
2121
|
+
"content-type": "application/json"
|
|
2122
|
+
},
|
|
2123
|
+
body: JSON.stringify({
|
|
2124
|
+
model: this.model,
|
|
2125
|
+
query: input.query,
|
|
2126
|
+
documents: input.documents,
|
|
2127
|
+
top_n: topN
|
|
2128
|
+
}),
|
|
2129
|
+
signal: controller.signal
|
|
2130
|
+
});
|
|
2131
|
+
} catch (error) {
|
|
2132
|
+
if (error && typeof error === "object" && "name" in error && (error as { name?: string }).name === "AbortError") {
|
|
2133
|
+
throw new RerankerProviderRequestError("timeout", `request timed out after ${this.timeoutMs}ms`);
|
|
2134
|
+
}
|
|
2135
|
+
throw new RerankerProviderRequestError(
|
|
2136
|
+
"network_error",
|
|
2137
|
+
error instanceof Error ? error.message : String(error)
|
|
2138
|
+
);
|
|
2139
|
+
} finally {
|
|
2140
|
+
clearTimeout(timeoutId);
|
|
2141
|
+
}
|
|
2142
|
+
|
|
2143
|
+
if (!response.ok) {
|
|
2144
|
+
const details = await safeResponseText(response);
|
|
2145
|
+
if (response.status === 429) {
|
|
2146
|
+
throw new RerankerProviderRequestError("rate_limited", `HTTP 429 ${details}`.trim());
|
|
2147
|
+
}
|
|
2148
|
+
if (response.status === 401 || response.status === 403) {
|
|
2149
|
+
throw new RerankerProviderRequestError("auth_error", `HTTP ${response.status} ${details}`.trim());
|
|
2150
|
+
}
|
|
2151
|
+
if (response.status === 404) {
|
|
2152
|
+
throw new RerankerProviderRequestError("endpoint_not_found", `HTTP 404 ${details}`.trim());
|
|
2153
|
+
}
|
|
2154
|
+
if (response.status >= 500) {
|
|
2155
|
+
throw new RerankerProviderRequestError("http_5xx", `HTTP ${response.status} ${details}`.trim());
|
|
2156
|
+
}
|
|
2157
|
+
throw new RerankerProviderRequestError("http_4xx", `HTTP ${response.status} ${details}`.trim());
|
|
2158
|
+
}
|
|
2159
|
+
|
|
2160
|
+
let payload: unknown;
|
|
2161
|
+
try {
|
|
2162
|
+
payload = await response.json();
|
|
2163
|
+
} catch {
|
|
2164
|
+
throw new RerankerProviderRequestError("invalid_json", "provider returned non-JSON response");
|
|
2165
|
+
}
|
|
2166
|
+
|
|
2167
|
+
if (!payload || typeof payload !== "object") {
|
|
2168
|
+
throw new RerankerProviderRequestError("invalid_response", "provider response must be an object");
|
|
2169
|
+
}
|
|
2170
|
+
|
|
2171
|
+
const maybeResults = "results" in payload ? (payload as { results?: unknown }).results : (payload as { data?: unknown }).data;
|
|
2172
|
+
if (!Array.isArray(maybeResults)) {
|
|
2173
|
+
throw new RerankerProviderRequestError("invalid_response", "provider response missing results array");
|
|
2174
|
+
}
|
|
2175
|
+
|
|
2176
|
+
const output: RerankerResult[] = [];
|
|
2177
|
+
for (const row of maybeResults) {
|
|
2178
|
+
if (!row || typeof row !== "object") {
|
|
2179
|
+
throw new RerankerProviderRequestError("invalid_response", "rerank row must be an object");
|
|
2180
|
+
}
|
|
2181
|
+
const rawIndex = (row as { index?: unknown }).index;
|
|
2182
|
+
if (!Number.isInteger(rawIndex)) {
|
|
2183
|
+
throw new RerankerProviderRequestError("invalid_response", "rerank row index must be an integer");
|
|
2184
|
+
}
|
|
2185
|
+
const index = rawIndex as number;
|
|
2186
|
+
if (index < 0 || index >= input.documents.length) {
|
|
2187
|
+
throw new RerankerProviderRequestError("invalid_response", "rerank row index out of range");
|
|
2188
|
+
}
|
|
2189
|
+
const rawScore = (row as { relevance_score?: unknown; score?: unknown }).relevance_score ?? (row as { score?: unknown }).score;
|
|
2190
|
+
if (typeof rawScore !== "number" || !Number.isFinite(rawScore)) {
|
|
2191
|
+
throw new RerankerProviderRequestError("invalid_response", "rerank row score must be finite");
|
|
2192
|
+
}
|
|
2193
|
+
output.push({
|
|
2194
|
+
index,
|
|
2195
|
+
relevance_score: rawScore
|
|
2196
|
+
});
|
|
2197
|
+
}
|
|
2198
|
+
|
|
2199
|
+
const seen = new Set<number>();
|
|
2200
|
+
const ordered = [...output]
|
|
2201
|
+
.sort((a, b) => b.relevance_score - a.relevance_score || a.index - b.index)
|
|
2202
|
+
.filter((row) => {
|
|
2203
|
+
if (seen.has(row.index)) {
|
|
2204
|
+
return false;
|
|
2205
|
+
}
|
|
2206
|
+
seen.add(row.index);
|
|
2207
|
+
return true;
|
|
2208
|
+
})
|
|
2209
|
+
.slice(0, topN);
|
|
2210
|
+
|
|
2211
|
+
if (ordered.length === 0) {
|
|
2212
|
+
throw new RerankerProviderRequestError("invalid_response", "provider returned zero rerank results");
|
|
2213
|
+
}
|
|
2214
|
+
return ordered;
|
|
2215
|
+
}
|
|
2216
|
+
|
|
2217
|
+
private async enforceRequestLimit(): Promise<void> {
|
|
2218
|
+
if (!this.requestLimiter) {
|
|
2219
|
+
return;
|
|
2220
|
+
}
|
|
2221
|
+
const labels = {
|
|
2222
|
+
provider: "openai_compatible",
|
|
2223
|
+
model: this.model,
|
|
2224
|
+
purpose: "rerank",
|
|
2225
|
+
limiter_mode: this.requestLimiter.mode ?? "custom"
|
|
2226
|
+
} as const;
|
|
2227
|
+
|
|
2228
|
+
try {
|
|
2229
|
+
const acquired = await this.requestLimiter.acquire({
|
|
2230
|
+
scope: this.requestLimitScope,
|
|
2231
|
+
max_requests_per_minute: this.maxRequestsPerMinute,
|
|
2232
|
+
max_wait_ms: this.rerankMaxWaitMs
|
|
2233
|
+
});
|
|
2234
|
+
this.observability.metrics.observe("retrieval_provider_limiter_wait_ms", acquired.wait_ms, labels);
|
|
2235
|
+
this.observability.metrics.increment("retrieval_provider_requests_shaped_total", 1, labels);
|
|
2236
|
+
} catch (error) {
|
|
2237
|
+
this.observability.metrics.increment("retrieval_provider_limiter_block_total", 1, {
|
|
2238
|
+
...labels,
|
|
2239
|
+
reason: "wait_timeout"
|
|
2240
|
+
});
|
|
2241
|
+
if (error instanceof ProviderRateLimitExceededError) {
|
|
2242
|
+
throw new RerankerProviderRequestError(
|
|
2243
|
+
"rate_limited",
|
|
2244
|
+
`${error.message}; retry_after_ms=${error.retry_after_ms}`
|
|
2245
|
+
);
|
|
2246
|
+
}
|
|
2247
|
+
throw error;
|
|
2248
|
+
}
|
|
2249
|
+
}
|
|
2250
|
+
}
|
|
2251
|
+
|
|
2252
|
+
function buildClaudeEnhancerSystemInstruction(
|
|
2253
|
+
language: EnhancerOutputLanguage,
|
|
2254
|
+
style: ResolvedEnhancerPromptStyle
|
|
2255
|
+
): string {
|
|
2256
|
+
const languageRule =
|
|
2257
|
+
language === "zh"
|
|
2258
|
+
? "Output language must be Simplified Chinese."
|
|
2259
|
+
: language === "es"
|
|
2260
|
+
? "Output language must be Spanish."
|
|
2261
|
+
: "Output language must be English.";
|
|
2262
|
+
const styleRule =
|
|
2263
|
+
style === "lean"
|
|
2264
|
+
? "Style is lean: keep the response compact (roughly 90-180 words), avoid extra headings, and include only essential steps."
|
|
2265
|
+
: style === "deep"
|
|
2266
|
+
? "Style is deep: provide comprehensive but grounded guidance (roughly 260-420 words) with concrete constraints, edge cases, and validation."
|
|
2267
|
+
: "Style is standard: provide balanced depth (roughly 160-300 words) with clear scope, steps, and validation.";
|
|
2268
|
+
return [
|
|
2269
|
+
"You are a high-precision prompt enhancement agent for software engineering tasks.",
|
|
2270
|
+
languageRule,
|
|
2271
|
+
styleRule,
|
|
2272
|
+
"Return plain text only: the final enhanced prompt.",
|
|
2273
|
+
"Do not include markdown code fences.",
|
|
2274
|
+
"Preserve user intent exactly; do not add unrelated features.",
|
|
2275
|
+
"Do not invent file paths or symbols that are not present in provided context.",
|
|
2276
|
+
"Produce concise execution-ready prompts, not long generic templates.",
|
|
2277
|
+
"Prefer practical sections only: objective, scoped constraints, codebase anchors, implementation steps, validation.",
|
|
2278
|
+
"Use concrete file/symbol anchors when context exists.",
|
|
2279
|
+
"Avoid repeating generic process advice, broad deliverables lists, or organizational boilerplate."
|
|
2280
|
+
].join(" ");
|
|
2281
|
+
}
|
|
2282
|
+
|
|
2283
|
+
function normalizeEnhancerContextPath(path: string): string {
|
|
2284
|
+
return normalizePath(path).toLowerCase();
|
|
2285
|
+
}
|
|
2286
|
+
|
|
2287
|
+
function looksLikeEnhancerConventionsFile(path: string): boolean {
|
|
2288
|
+
const normalized = normalizeEnhancerContextPath(path);
|
|
2289
|
+
return (
|
|
2290
|
+
normalized === "agents.md" ||
|
|
2291
|
+
normalized.endsWith("/agents.md") ||
|
|
2292
|
+
normalized === "claude.md" ||
|
|
2293
|
+
normalized.endsWith("/claude.md") ||
|
|
2294
|
+
normalized === "readme.md" ||
|
|
2295
|
+
normalized.endsWith("/readme.md") ||
|
|
2296
|
+
normalized === "contributing.md" ||
|
|
2297
|
+
normalized.endsWith("/contributing.md")
|
|
2298
|
+
);
|
|
2299
|
+
}
|
|
2300
|
+
|
|
2301
|
+
function extractProjectConventionsFromEnhancerContext(snippets: EnhancerContextSnippet[]): string[] {
|
|
2302
|
+
const candidateSnippets = snippets.filter((snippet) => looksLikeEnhancerConventionsFile(snippet.path));
|
|
2303
|
+
if (candidateSnippets.length === 0) {
|
|
2304
|
+
return [];
|
|
2305
|
+
}
|
|
2306
|
+
const signalPattern =
|
|
2307
|
+
/\b(always|never|must|should|avoid|prefer|preserve|keep|strict|isolation|tenant|workspace|contract|schema|backward|compatibility|regression|test|typecheck|bun)\b/i;
|
|
2308
|
+
const out: string[] = [];
|
|
2309
|
+
const seen = new Set<string>();
|
|
2310
|
+
for (const snippet of candidateSnippets) {
|
|
2311
|
+
const lines = snippet.snippet.split(/\r?\n/u);
|
|
2312
|
+
for (const rawLine of lines) {
|
|
2313
|
+
const cleaned = rawLine
|
|
2314
|
+
.replace(/^\s*[-*+]\s+/u, "")
|
|
2315
|
+
.replace(/^\s*\d+\.\s+/u, "")
|
|
2316
|
+
.trim();
|
|
2317
|
+
if (cleaned.length < 16 || cleaned.length > 180) {
|
|
2318
|
+
continue;
|
|
2319
|
+
}
|
|
2320
|
+
if (!signalPattern.test(cleaned)) {
|
|
2321
|
+
continue;
|
|
2322
|
+
}
|
|
2323
|
+
if (/^(import|export|const|let|var|if|for|while|return)\b/i.test(cleaned)) {
|
|
2324
|
+
continue;
|
|
2325
|
+
}
|
|
2326
|
+
const normalized = cleaned.toLowerCase();
|
|
2327
|
+
if (seen.has(normalized)) {
|
|
2328
|
+
continue;
|
|
2329
|
+
}
|
|
2330
|
+
seen.add(normalized);
|
|
2331
|
+
out.push(cleaned);
|
|
2332
|
+
if (out.length >= 8) {
|
|
2333
|
+
return out;
|
|
2334
|
+
}
|
|
2335
|
+
}
|
|
2336
|
+
}
|
|
2337
|
+
return out;
|
|
2338
|
+
}
|
|
2339
|
+
|
|
2340
|
+
function extractEnhancerNonNegotiables(input: {
|
|
2341
|
+
prompt: string;
|
|
2342
|
+
history: EnhancePromptInput["conversation_history"];
|
|
2343
|
+
}): string[] {
|
|
2344
|
+
const combined = `${input.prompt}\n${input.history.map((entry) => entry.content).join("\n")}`;
|
|
2345
|
+
const lower = combined.toLowerCase();
|
|
2346
|
+
const out: string[] = [];
|
|
2347
|
+
const add = (value: string): void => {
|
|
2348
|
+
if (!out.includes(value)) {
|
|
2349
|
+
out.push(value);
|
|
2350
|
+
}
|
|
2351
|
+
};
|
|
2352
|
+
if (
|
|
2353
|
+
/keep (?:behavior|behaviour) stable|preserve (?:existing )?(?:behavior|behaviour)|backward.?compat|no breaking changes|without breaking/i.test(
|
|
2354
|
+
lower
|
|
2355
|
+
)
|
|
2356
|
+
) {
|
|
2357
|
+
add("Preserve existing behavior and avoid breaking API/contract semantics.");
|
|
2358
|
+
}
|
|
2359
|
+
if (/regression tests?|add tests?|test coverage|boundary tests?/i.test(lower)) {
|
|
2360
|
+
add("Include regression tests for any changed behavior.");
|
|
2361
|
+
}
|
|
2362
|
+
if (/tenant|workspace|authorization|auth boundaries?|scope enforcement|isolation/i.test(lower)) {
|
|
2363
|
+
add("Maintain strict tenant/workspace isolation and authorization boundaries.");
|
|
2364
|
+
}
|
|
2365
|
+
if (/no docs|avoid docs|exclude docs/i.test(lower)) {
|
|
2366
|
+
add("Do not prioritize documentation-only changes unless explicitly requested.");
|
|
2367
|
+
}
|
|
2368
|
+
if (/no refactor|minimal changes?|smallest safe change/i.test(lower)) {
|
|
2369
|
+
add("Prefer the smallest safe change set.");
|
|
2370
|
+
}
|
|
2371
|
+
return out.slice(0, 6);
|
|
2372
|
+
}
|
|
2373
|
+
|
|
2374
|
+
function buildEnhancerOutputContract(input: {
|
|
2375
|
+
style: ResolvedEnhancerPromptStyle;
|
|
2376
|
+
intent: EnhancerIntent;
|
|
2377
|
+
query_intent: "symbol-heavy" | "impl-focused" | "conceptual";
|
|
2378
|
+
has_context: boolean;
|
|
2379
|
+
}): {
|
|
2380
|
+
target_style: string;
|
|
2381
|
+
max_words: number;
|
|
2382
|
+
preferred_sections: string[];
|
|
2383
|
+
avoid_patterns: string[];
|
|
2384
|
+
} {
|
|
2385
|
+
const isConceptual = input.query_intent === "conceptual";
|
|
2386
|
+
if (input.style === "lean") {
|
|
2387
|
+
if (input.intent === "tests") {
|
|
2388
|
+
return {
|
|
2389
|
+
target_style: "lean_test_plan",
|
|
2390
|
+
max_words: input.has_context ? 220 : 170,
|
|
2391
|
+
preferred_sections: ["Goal", "Key test cases", "Validation"],
|
|
2392
|
+
avoid_patterns: ["long checklists", "broad architecture proposals", "generic deliverables blocks"]
|
|
2393
|
+
};
|
|
2394
|
+
}
|
|
2395
|
+
if (input.intent === "docs" || isConceptual) {
|
|
2396
|
+
return {
|
|
2397
|
+
target_style: "lean_spec",
|
|
2398
|
+
max_words: input.has_context ? 220 : 170,
|
|
2399
|
+
preferred_sections: ["Goal", "Scope", "Validation"],
|
|
2400
|
+
avoid_patterns: ["verbose outlines", "boilerplate context blocks", "generic process advice"]
|
|
2401
|
+
};
|
|
2402
|
+
}
|
|
2403
|
+
return {
|
|
2404
|
+
target_style: "lean_implementation_plan",
|
|
2405
|
+
max_words: input.has_context ? 230 : 180,
|
|
2406
|
+
preferred_sections: ["Goal", "Constraints", "Action steps", "Validation"],
|
|
2407
|
+
avoid_patterns: ["deep background sections", "broad deliverables lists", "repeated boilerplate"]
|
|
2408
|
+
};
|
|
2409
|
+
}
|
|
2410
|
+
|
|
2411
|
+
if (input.style === "deep") {
|
|
2412
|
+
if (input.intent === "tests") {
|
|
2413
|
+
return {
|
|
2414
|
+
target_style: "deep_test_plan",
|
|
2415
|
+
max_words: input.has_context ? 420 : 340,
|
|
2416
|
+
preferred_sections: ["Goal", "Behavior under test", "Test matrix", "Edge cases", "Validation"],
|
|
2417
|
+
avoid_patterns: ["vague test advice", "non-test deliverables", "ungrounded file guesses"]
|
|
2418
|
+
};
|
|
2419
|
+
}
|
|
2420
|
+
if (input.intent === "docs" || isConceptual) {
|
|
2421
|
+
return {
|
|
2422
|
+
target_style: "deep_spec",
|
|
2423
|
+
max_words: input.has_context ? 420 : 340,
|
|
2424
|
+
preferred_sections: ["Goal", "Scope", "Relevant sources", "Proposed outline", "Risks", "Validation"],
|
|
2425
|
+
avoid_patterns: ["implementation-only checklists", "generic organizational boilerplate", "speculation"]
|
|
2426
|
+
};
|
|
2427
|
+
}
|
|
2428
|
+
return {
|
|
2429
|
+
target_style: "deep_implementation_plan",
|
|
2430
|
+
max_words: input.has_context ? 420 : 360,
|
|
2431
|
+
preferred_sections: [
|
|
2432
|
+
"Goal",
|
|
2433
|
+
"Scope and constraints",
|
|
2434
|
+
"Codebase anchors",
|
|
2435
|
+
"Implementation plan",
|
|
2436
|
+
"Edge cases",
|
|
2437
|
+
"Validation"
|
|
2438
|
+
],
|
|
2439
|
+
avoid_patterns: ["security theater", "repeated compliance boilerplate", "invented file/symbol references"]
|
|
2440
|
+
};
|
|
2441
|
+
}
|
|
2442
|
+
|
|
2443
|
+
if (input.intent === "docs" || isConceptual) {
|
|
2444
|
+
return {
|
|
2445
|
+
target_style: "concise_spec",
|
|
2446
|
+
max_words: input.has_context ? 320 : 260,
|
|
2447
|
+
preferred_sections: ["Goal", "Scope", "Relevant sources", "Proposed outline", "Validation"],
|
|
2448
|
+
avoid_patterns: ["long implementation checklists", "generic deliverables sections", "repeated boilerplate"]
|
|
2449
|
+
};
|
|
2450
|
+
}
|
|
2451
|
+
if (input.intent === "tests") {
|
|
2452
|
+
return {
|
|
2453
|
+
target_style: "test_plan",
|
|
2454
|
+
max_words: input.has_context ? 320 : 260,
|
|
2455
|
+
preferred_sections: ["Goal", "Behavior under test", "Test matrix", "Implementation notes", "Validation"],
|
|
2456
|
+
avoid_patterns: ["broad architecture rewrites", "non-test deliverables", "generic process bullets"]
|
|
2457
|
+
};
|
|
2458
|
+
}
|
|
2459
|
+
return {
|
|
2460
|
+
target_style: "implementation_plan",
|
|
2461
|
+
max_words: input.has_context ? 360 : 300,
|
|
2462
|
+
preferred_sections: ["Goal", "Scope and constraints", "Codebase anchors", "Implementation plan", "Validation"],
|
|
2463
|
+
avoid_patterns: ["broad security theater", "repeated compliance boilerplate", "vague deliverables lists"]
|
|
2464
|
+
};
|
|
2465
|
+
}
|
|
2466
|
+
|
|
2467
|
+
function buildClaudeEnhancerUserPayload(input: EnhancerGenerationRequest): string {
|
|
2468
|
+
const projectConventions = extractProjectConventionsFromEnhancerContext(input.context_snippets);
|
|
2469
|
+
const outputContract = buildEnhancerOutputContract({
|
|
2470
|
+
style: input.style_resolved,
|
|
2471
|
+
intent: input.intent,
|
|
2472
|
+
query_intent: input.query_intent,
|
|
2473
|
+
has_context: input.context_refs.length > 0
|
|
2474
|
+
});
|
|
2475
|
+
const nonNegotiables = extractEnhancerNonNegotiables({
|
|
2476
|
+
prompt: input.request.prompt,
|
|
2477
|
+
history: input.request.conversation_history
|
|
2478
|
+
});
|
|
2479
|
+
const payload = {
|
|
2480
|
+
trace_id: input.trace_id,
|
|
2481
|
+
tenant_id: input.tenant_id,
|
|
2482
|
+
workspace_id: input.workspace_id ?? "none",
|
|
2483
|
+
tool_mode: input.tool_mode,
|
|
2484
|
+
style_requested: input.style_requested,
|
|
2485
|
+
style_resolved: input.style_resolved,
|
|
2486
|
+
intent: input.intent,
|
|
2487
|
+
query_intent: input.query_intent,
|
|
2488
|
+
language: input.language,
|
|
2489
|
+
original_prompt: input.request.prompt,
|
|
2490
|
+
conversation_history: input.request.conversation_history,
|
|
2491
|
+
context_refs: input.context_refs,
|
|
2492
|
+
context_snippets: input.context_snippets.map((snippet) => ({
|
|
2493
|
+
path: snippet.path,
|
|
2494
|
+
start_line: snippet.start_line,
|
|
2495
|
+
end_line: snippet.end_line,
|
|
2496
|
+
reason: snippet.reason,
|
|
2497
|
+
score: Number(snippet.score.toFixed(4)),
|
|
2498
|
+
snippet: snippet.snippet
|
|
2499
|
+
})),
|
|
2500
|
+
output_contract: outputContract,
|
|
2501
|
+
non_negotiables: nonNegotiables,
|
|
2502
|
+
project_conventions: projectConventions
|
|
2503
|
+
};
|
|
2504
|
+
return [
|
|
2505
|
+
"Enhance the following request into a concise, implementation-ready prompt.",
|
|
2506
|
+
"Prioritize user intent fidelity, concrete repo anchors, and verifiable validation steps.",
|
|
2507
|
+
"Honor the requested enhancement style while avoiding invented details.",
|
|
2508
|
+
"Input JSON:",
|
|
2509
|
+
JSON.stringify(payload, null, 2)
|
|
2510
|
+
].join("\n");
|
|
2511
|
+
}
|
|
2512
|
+
|
|
2513
|
+
function removeEnhancerCodeFences(text: string): string {
|
|
2514
|
+
return text.trim().replace(/^```(?:json|markdown|md)?\s*/iu, "").replace(/\s*```$/u, "").trim();
|
|
2515
|
+
}
|
|
2516
|
+
|
|
2517
|
+
function normalizeProviderEnhancedPrompt(text: string): string {
|
|
2518
|
+
let normalized = removeEnhancerCodeFences(text).replace(/\r\n/g, "\n");
|
|
2519
|
+
normalized = normalized
|
|
2520
|
+
.split("\n")
|
|
2521
|
+
.map((line) => line.replace(/[ \t]+$/u, ""))
|
|
2522
|
+
.join("\n")
|
|
2523
|
+
.replace(/\n{3,}/g, "\n\n")
|
|
2524
|
+
.trim();
|
|
2525
|
+
if (!normalized) {
|
|
2526
|
+
return normalized;
|
|
2527
|
+
}
|
|
2528
|
+
try {
|
|
2529
|
+
const payload = JSON.parse(normalized) as { enhanced_prompt?: unknown };
|
|
2530
|
+
if (payload && typeof payload === "object" && typeof payload.enhanced_prompt === "string") {
|
|
2531
|
+
return payload.enhanced_prompt.trim();
|
|
2532
|
+
}
|
|
2533
|
+
} catch {
|
|
2534
|
+
return normalized;
|
|
2535
|
+
}
|
|
2536
|
+
return normalized;
|
|
2537
|
+
}
|
|
2538
|
+
|
|
2539
|
+
type ClaudeAgentSdkQueryFn = (input: {
|
|
2540
|
+
prompt: string;
|
|
2541
|
+
options?: Record<string, unknown>;
|
|
2542
|
+
}) => AsyncIterable<unknown>;
|
|
2543
|
+
|
|
2544
|
+
let cachedClaudeAgentSdkQueryFn: ClaudeAgentSdkQueryFn | undefined;
|
|
2545
|
+
|
|
2546
|
+
function isRecord(value: unknown): value is Record<string, unknown> {
|
|
2547
|
+
return Boolean(value) && typeof value === "object" && !Array.isArray(value);
|
|
2548
|
+
}
|
|
2549
|
+
|
|
2550
|
+
async function loadClaudeAgentSdkQueryFn(): Promise<ClaudeAgentSdkQueryFn> {
|
|
2551
|
+
if (cachedClaudeAgentSdkQueryFn) {
|
|
2552
|
+
return cachedClaudeAgentSdkQueryFn;
|
|
2553
|
+
}
|
|
2554
|
+
|
|
2555
|
+
const moduleNames = ["@anthropic-ai/claude-agent-sdk", "@anthropic-ai/claude-code"];
|
|
2556
|
+
let lastError: unknown;
|
|
2557
|
+
for (const moduleName of moduleNames) {
|
|
2558
|
+
try {
|
|
2559
|
+
const sdkModule = (await import(moduleName)) as { query?: unknown };
|
|
2560
|
+
if (typeof sdkModule.query === "function") {
|
|
2561
|
+
cachedClaudeAgentSdkQueryFn = sdkModule.query as ClaudeAgentSdkQueryFn;
|
|
2562
|
+
return cachedClaudeAgentSdkQueryFn;
|
|
2563
|
+
}
|
|
2564
|
+
lastError = new Error(`${moduleName} does not export query()`);
|
|
2565
|
+
} catch (error) {
|
|
2566
|
+
lastError = error;
|
|
2567
|
+
}
|
|
2568
|
+
}
|
|
2569
|
+
|
|
2570
|
+
const reason = lastError instanceof Error ? lastError.message : String(lastError ?? "unknown error");
|
|
2571
|
+
throw new EnhancerProviderRequestError(
|
|
2572
|
+
"upstream_error",
|
|
2573
|
+
`claude agent sdk is not available; install @anthropic-ai/claude-agent-sdk (${reason})`
|
|
2574
|
+
);
|
|
2575
|
+
}
|
|
2576
|
+
|
|
2577
|
+
function extractTextFromClaudeMessageContent(content: unknown): string | undefined {
|
|
2578
|
+
if (typeof content === "string") {
|
|
2579
|
+
const trimmed = content.trim();
|
|
2580
|
+
return trimmed.length > 0 ? trimmed : undefined;
|
|
2581
|
+
}
|
|
2582
|
+
if (!Array.isArray(content)) {
|
|
2583
|
+
return undefined;
|
|
2584
|
+
}
|
|
2585
|
+
const parts: string[] = [];
|
|
2586
|
+
for (const item of content) {
|
|
2587
|
+
if (!isRecord(item)) {
|
|
2588
|
+
continue;
|
|
2589
|
+
}
|
|
2590
|
+
const text = item.text;
|
|
2591
|
+
if (typeof text !== "string") {
|
|
2592
|
+
continue;
|
|
2593
|
+
}
|
|
2594
|
+
const trimmed = text.trim();
|
|
2595
|
+
if (trimmed.length > 0) {
|
|
2596
|
+
parts.push(trimmed);
|
|
2597
|
+
}
|
|
2598
|
+
}
|
|
2599
|
+
if (parts.length === 0) {
|
|
2600
|
+
return undefined;
|
|
2601
|
+
}
|
|
2602
|
+
return parts.join("\n");
|
|
2603
|
+
}
|
|
2604
|
+
|
|
2605
|
+
function extractTextFromClaudeSdkMessage(message: unknown): string | undefined {
|
|
2606
|
+
if (!isRecord(message)) {
|
|
2607
|
+
return undefined;
|
|
2608
|
+
}
|
|
2609
|
+
if (typeof message.summary === "string") {
|
|
2610
|
+
const trimmed = message.summary.trim();
|
|
2611
|
+
if (trimmed.length > 0) {
|
|
2612
|
+
return trimmed;
|
|
2613
|
+
}
|
|
2614
|
+
}
|
|
2615
|
+
if (typeof message.result === "string") {
|
|
2616
|
+
const trimmed = message.result.trim();
|
|
2617
|
+
if (trimmed.length > 0) {
|
|
2618
|
+
return trimmed;
|
|
2619
|
+
}
|
|
2620
|
+
}
|
|
2621
|
+
if (typeof message.text === "string") {
|
|
2622
|
+
const trimmed = message.text.trim();
|
|
2623
|
+
if (trimmed.length > 0) {
|
|
2624
|
+
return trimmed;
|
|
2625
|
+
}
|
|
2626
|
+
}
|
|
2627
|
+
const directContent = extractTextFromClaudeMessageContent(message.content);
|
|
2628
|
+
if (directContent) {
|
|
2629
|
+
return directContent;
|
|
2630
|
+
}
|
|
2631
|
+
if (isRecord(message.message)) {
|
|
2632
|
+
if (typeof message.message.text === "string") {
|
|
2633
|
+
const trimmed = message.message.text.trim();
|
|
2634
|
+
if (trimmed.length > 0) {
|
|
2635
|
+
return trimmed;
|
|
2636
|
+
}
|
|
2637
|
+
}
|
|
2638
|
+
const nestedContent = extractTextFromClaudeMessageContent(message.message.content);
|
|
2639
|
+
if (nestedContent) {
|
|
2640
|
+
return nestedContent;
|
|
2641
|
+
}
|
|
2642
|
+
}
|
|
2643
|
+
return undefined;
|
|
2644
|
+
}
|
|
2645
|
+
|
|
2646
|
+
function extractTextChunkFromClaudeSdkStreamEvent(message: unknown): string | undefined {
|
|
2647
|
+
if (!isRecord(message) || message.type !== "stream_event") {
|
|
2648
|
+
return undefined;
|
|
2649
|
+
}
|
|
2650
|
+
const event = message.event;
|
|
2651
|
+
if (!isRecord(event)) {
|
|
2652
|
+
return undefined;
|
|
2653
|
+
}
|
|
2654
|
+
|
|
2655
|
+
if (event.type === "content_block_start") {
|
|
2656
|
+
const contentBlock = event.content_block;
|
|
2657
|
+
if (isRecord(contentBlock) && typeof contentBlock.text === "string") {
|
|
2658
|
+
return contentBlock.text;
|
|
2659
|
+
}
|
|
2660
|
+
}
|
|
2661
|
+
|
|
2662
|
+
if (event.type === "content_block_delta") {
|
|
2663
|
+
const delta = event.delta;
|
|
2664
|
+
if (!isRecord(delta)) {
|
|
2665
|
+
return undefined;
|
|
2666
|
+
}
|
|
2667
|
+
if (typeof delta.text === "string") {
|
|
2668
|
+
return delta.text;
|
|
2669
|
+
}
|
|
2670
|
+
}
|
|
2671
|
+
|
|
2672
|
+
return undefined;
|
|
2673
|
+
}
|
|
2674
|
+
|
|
2675
|
+
function extractStructuredOutputFromClaudeSdkMessage(message: unknown): EnhancerGenerationResult | undefined {
|
|
2676
|
+
if (!isRecord(message)) {
|
|
2677
|
+
return undefined;
|
|
2678
|
+
}
|
|
2679
|
+
const structuredOutput = message.structured_output;
|
|
2680
|
+
if (!isRecord(structuredOutput)) {
|
|
2681
|
+
return undefined;
|
|
2682
|
+
}
|
|
2683
|
+
const enhancedPrompt = structuredOutput.enhanced_prompt;
|
|
2684
|
+
if (typeof enhancedPrompt !== "string" || enhancedPrompt.trim().length === 0) {
|
|
2685
|
+
return undefined;
|
|
2686
|
+
}
|
|
2687
|
+
return {
|
|
2688
|
+
enhanced_prompt: enhancedPrompt.trim()
|
|
2689
|
+
};
|
|
2690
|
+
}
|
|
2691
|
+
|
|
2692
|
+
function extractResultFailureFromClaudeSdkMessage(message: unknown): {
|
|
2693
|
+
subtype: string;
|
|
2694
|
+
errors: string[];
|
|
2695
|
+
} | undefined {
|
|
2696
|
+
if (!isRecord(message) || message.type !== "result") {
|
|
2697
|
+
return undefined;
|
|
2698
|
+
}
|
|
2699
|
+
const subtype = message.subtype;
|
|
2700
|
+
if (typeof subtype !== "string" || subtype === "success") {
|
|
2701
|
+
return undefined;
|
|
2702
|
+
}
|
|
2703
|
+
const rawErrors = Array.isArray(message.errors) ? message.errors : [];
|
|
2704
|
+
const errors = rawErrors
|
|
2705
|
+
.filter((entry): entry is string => typeof entry === "string")
|
|
2706
|
+
.map((entry) => entry.trim())
|
|
2707
|
+
.filter((entry) => entry.length > 0);
|
|
2708
|
+
return {
|
|
2709
|
+
subtype,
|
|
2710
|
+
errors
|
|
2711
|
+
};
|
|
2712
|
+
}
|
|
2713
|
+
|
|
2714
|
+
function describeClaudeSdkMessage(message: unknown): string {
|
|
2715
|
+
if (!isRecord(message)) {
|
|
2716
|
+
return typeof message;
|
|
2717
|
+
}
|
|
2718
|
+
const type = typeof message.type === "string" ? message.type : "unknown";
|
|
2719
|
+
const subtype = typeof message.subtype === "string" ? message.subtype : undefined;
|
|
2720
|
+
return subtype ? `${type}:${subtype}` : type;
|
|
2721
|
+
}
|
|
2722
|
+
|
|
2723
|
+
function classifyEnhancerProviderError(error: unknown): EnhancerProviderRequestError {
|
|
2724
|
+
if (error instanceof EnhancerProviderRequestError) {
|
|
2725
|
+
return error;
|
|
2726
|
+
}
|
|
2727
|
+
if (error instanceof Error) {
|
|
2728
|
+
const message = error.message || "unknown enhancer provider error";
|
|
2729
|
+
if (/(timeout|timed out|abort)/i.test(message)) {
|
|
2730
|
+
return new EnhancerProviderRequestError("timeout", message);
|
|
2731
|
+
}
|
|
2732
|
+
if (/(rate.?limit|too many requests|429)/i.test(message)) {
|
|
2733
|
+
return new EnhancerProviderRequestError("rate_limited", message);
|
|
2734
|
+
}
|
|
2735
|
+
if (/(no such file|not found|ENOENT)/i.test(message) && /claude/i.test(message)) {
|
|
2736
|
+
return new EnhancerProviderRequestError("upstream_error", `claude code executable not found: ${message}`);
|
|
2737
|
+
}
|
|
2738
|
+
return new EnhancerProviderRequestError("upstream_error", message);
|
|
2739
|
+
}
|
|
2740
|
+
return new EnhancerProviderRequestError("upstream_error", String(error));
|
|
2741
|
+
}
|
|
2742
|
+
|
|
2743
|
+
export class ClaudeAgentEnhancerProvider implements EnhancerGenerationProvider {
|
|
2744
|
+
private readonly apiKey: string;
|
|
2745
|
+
private readonly model: string;
|
|
2746
|
+
private readonly maxTokens: number;
|
|
2747
|
+
private readonly baseUrl?: string;
|
|
2748
|
+
private readonly pathToClaudeCodeExecutable?: string;
|
|
2749
|
+
private readonly permissionMode: ClaudeCodePermissionMode;
|
|
2750
|
+
|
|
2751
|
+
constructor(options: ClaudeAgentEnhancerProviderOptions) {
|
|
2752
|
+
const apiKey = options.api_key.trim();
|
|
2753
|
+
if (apiKey.length === 0) {
|
|
2754
|
+
throw new Error("invalid claude enhancer config: api_key must be non-empty");
|
|
2755
|
+
}
|
|
2756
|
+
const model = options.model?.trim() ?? DEFAULT_CLAUDE_ENHANCER_MODEL;
|
|
2757
|
+
if (model.length === 0) {
|
|
2758
|
+
throw new Error("invalid claude enhancer config: model must be non-empty");
|
|
2759
|
+
}
|
|
2760
|
+
const maxTokens = options.max_tokens ?? 1_200;
|
|
2761
|
+
if (!Number.isInteger(maxTokens) || maxTokens <= 0) {
|
|
2762
|
+
throw new Error("invalid claude enhancer config: max_tokens must be a positive integer");
|
|
2763
|
+
}
|
|
2764
|
+
const permissionMode = options.permission_mode ?? "default";
|
|
2765
|
+
if (
|
|
2766
|
+
permissionMode !== "default" &&
|
|
2767
|
+
permissionMode !== "acceptEdits" &&
|
|
2768
|
+
permissionMode !== "bypassPermissions" &&
|
|
2769
|
+
permissionMode !== "plan"
|
|
2770
|
+
) {
|
|
2771
|
+
throw new Error("invalid claude enhancer config: permission_mode must be default|acceptEdits|bypassPermissions|plan");
|
|
2772
|
+
}
|
|
2773
|
+
|
|
2774
|
+
this.apiKey = apiKey;
|
|
2775
|
+
this.model = model;
|
|
2776
|
+
this.maxTokens = maxTokens;
|
|
2777
|
+
this.baseUrl = options.base_url?.trim();
|
|
2778
|
+
const executablePath = options.path_to_claude_code_executable?.trim();
|
|
2779
|
+
this.pathToClaudeCodeExecutable = executablePath && executablePath.length > 0 ? executablePath : undefined;
|
|
2780
|
+
this.permissionMode = permissionMode;
|
|
2781
|
+
}
|
|
2782
|
+
|
|
2783
|
+
describe(): EnhancerProviderDescriptor {
|
|
2784
|
+
return {
|
|
2785
|
+
provider: "claude_agent",
|
|
2786
|
+
model: this.model
|
|
2787
|
+
};
|
|
2788
|
+
}
|
|
2789
|
+
|
|
2790
|
+
async generate(input: EnhancerGenerationRequest): Promise<EnhancerGenerationResult> {
|
|
2791
|
+
const query = await loadClaudeAgentSdkQueryFn();
|
|
2792
|
+
const prompt = buildClaudeEnhancerUserPayload(input);
|
|
2793
|
+
const abortController = new AbortController();
|
|
2794
|
+
const upstreamAbortSignal = input.abort_signal;
|
|
2795
|
+
const upstreamAbortHandler = (): void => {
|
|
2796
|
+
abortController.abort();
|
|
2797
|
+
};
|
|
2798
|
+
if (upstreamAbortSignal) {
|
|
2799
|
+
if (upstreamAbortSignal.aborted) {
|
|
2800
|
+
abortController.abort();
|
|
2801
|
+
} else {
|
|
2802
|
+
upstreamAbortSignal.addEventListener("abort", upstreamAbortHandler, { once: true });
|
|
2803
|
+
}
|
|
2804
|
+
}
|
|
2805
|
+
const options: Record<string, unknown> = {
|
|
2806
|
+
model: this.model,
|
|
2807
|
+
maxThinkingTokens: this.maxTokens,
|
|
2808
|
+
maxTurns: DEFAULT_CLAUDE_ENHANCER_MAX_TURNS,
|
|
2809
|
+
includePartialMessages: true,
|
|
2810
|
+
thinking: {
|
|
2811
|
+
type: "disabled"
|
|
2812
|
+
},
|
|
2813
|
+
permissionMode: this.permissionMode,
|
|
2814
|
+
systemPrompt: buildClaudeEnhancerSystemInstruction(input.language, input.style_resolved),
|
|
2815
|
+
// Enhancer already receives scoped context snippets; keep Claude Code tools disabled to avoid long tool loops.
|
|
2816
|
+
tools: [],
|
|
2817
|
+
allowedTools: [],
|
|
2818
|
+
env: {
|
|
2819
|
+
ANTHROPIC_API_KEY: this.apiKey,
|
|
2820
|
+
...(this.baseUrl ? { ANTHROPIC_BASE_URL: this.baseUrl } : {})
|
|
2821
|
+
},
|
|
2822
|
+
abortController,
|
|
2823
|
+
...(this.pathToClaudeCodeExecutable ? { pathToClaudeCodeExecutable: this.pathToClaudeCodeExecutable } : {}),
|
|
2824
|
+
...(input.request.project_root_path ? { cwd: input.request.project_root_path } : {})
|
|
2825
|
+
};
|
|
2826
|
+
|
|
2827
|
+
let structured: EnhancerGenerationResult | undefined;
|
|
2828
|
+
let lastText: string | undefined;
|
|
2829
|
+
const streamTextParts: string[] = [];
|
|
2830
|
+
const seenMessageKinds = new Set<string>();
|
|
2831
|
+
let maxTurnsFailure: { subtype: string; errors: string[] } | undefined;
|
|
2832
|
+
try {
|
|
2833
|
+
for await (const message of query({ prompt, options })) {
|
|
2834
|
+
input.on_progress?.();
|
|
2835
|
+
seenMessageKinds.add(describeClaudeSdkMessage(message));
|
|
2836
|
+
const partialChunk = extractTextChunkFromClaudeSdkStreamEvent(message);
|
|
2837
|
+
if (typeof partialChunk === "string" && partialChunk.length > 0) {
|
|
2838
|
+
streamTextParts.push(partialChunk);
|
|
2839
|
+
}
|
|
2840
|
+
const resultFailure = extractResultFailureFromClaudeSdkMessage(message);
|
|
2841
|
+
if (resultFailure) {
|
|
2842
|
+
if (resultFailure.subtype === "error_max_turns") {
|
|
2843
|
+
maxTurnsFailure = resultFailure;
|
|
2844
|
+
continue;
|
|
2845
|
+
}
|
|
2846
|
+
const details = resultFailure.errors.length > 0 ? `: ${resultFailure.errors.join(" | ")}` : "";
|
|
2847
|
+
throw new EnhancerProviderRequestError(
|
|
2848
|
+
"upstream_error",
|
|
2849
|
+
`claude agent sdk result error (${resultFailure.subtype})${details}`
|
|
2850
|
+
);
|
|
2851
|
+
}
|
|
2852
|
+
const maybeStructured = extractStructuredOutputFromClaudeSdkMessage(message);
|
|
2853
|
+
if (maybeStructured) {
|
|
2854
|
+
structured = maybeStructured;
|
|
2855
|
+
}
|
|
2856
|
+
const maybeText = extractTextFromClaudeSdkMessage(message);
|
|
2857
|
+
if (maybeText) {
|
|
2858
|
+
lastText = maybeText;
|
|
2859
|
+
}
|
|
2860
|
+
if (isRecord(message) && message.type === "assistant" && typeof message.error === "string") {
|
|
2861
|
+
throw new EnhancerProviderRequestError(
|
|
2862
|
+
"upstream_error",
|
|
2863
|
+
`claude agent sdk assistant error: ${message.error}`
|
|
2864
|
+
);
|
|
2865
|
+
}
|
|
2866
|
+
}
|
|
2867
|
+
} catch (error) {
|
|
2868
|
+
throw classifyEnhancerProviderError(error);
|
|
2869
|
+
} finally {
|
|
2870
|
+
if (upstreamAbortSignal) {
|
|
2871
|
+
upstreamAbortSignal.removeEventListener("abort", upstreamAbortHandler);
|
|
2872
|
+
}
|
|
2873
|
+
}
|
|
2874
|
+
if (structured) {
|
|
2875
|
+
return structured;
|
|
2876
|
+
}
|
|
2877
|
+
if (!lastText && streamTextParts.length > 0) {
|
|
2878
|
+
lastText = streamTextParts.join("").trim();
|
|
2879
|
+
}
|
|
2880
|
+
if (maxTurnsFailure && !lastText) {
|
|
2881
|
+
const details = maxTurnsFailure.errors.length > 0 ? `: ${maxTurnsFailure.errors.join(" | ")}` : "";
|
|
2882
|
+
throw new EnhancerProviderRequestError(
|
|
2883
|
+
"upstream_error",
|
|
2884
|
+
`claude agent sdk hit max turns before returning output${details}`
|
|
2885
|
+
);
|
|
2886
|
+
}
|
|
2887
|
+
if (!lastText) {
|
|
2888
|
+
const seenKinds = [...seenMessageKinds].join(", ") || "none";
|
|
2889
|
+
throw new EnhancerProviderRequestError(
|
|
2890
|
+
"invalid_response",
|
|
2891
|
+
`claude agent sdk returned no text output (messages=${seenKinds})`
|
|
2892
|
+
);
|
|
2893
|
+
}
|
|
2894
|
+
return { enhanced_prompt: normalizeProviderEnhancedPrompt(lastText) };
|
|
2895
|
+
}
|
|
2896
|
+
}
|
|
1350
2897
|
|
|
1351
2898
|
async function safeResponseText(response: Response): Promise<string> {
|
|
1352
2899
|
try {
|
|
@@ -1357,6 +2904,37 @@ async function safeResponseText(response: Response): Promise<string> {
|
|
|
1357
2904
|
}
|
|
1358
2905
|
}
|
|
1359
2906
|
|
|
2907
|
+
function parseRetryAfterMs(headerValue: string | null): number | undefined {
|
|
2908
|
+
if (!headerValue) {
|
|
2909
|
+
return undefined;
|
|
2910
|
+
}
|
|
2911
|
+
const trimmed = headerValue.trim();
|
|
2912
|
+
if (!trimmed) {
|
|
2913
|
+
return undefined;
|
|
2914
|
+
}
|
|
2915
|
+
const seconds = Number(trimmed);
|
|
2916
|
+
if (Number.isFinite(seconds) && seconds >= 0) {
|
|
2917
|
+
return Math.ceil(seconds * 1000);
|
|
2918
|
+
}
|
|
2919
|
+
const dateMs = Date.parse(trimmed);
|
|
2920
|
+
if (!Number.isNaN(dateMs)) {
|
|
2921
|
+
return Math.max(0, dateMs - Date.now());
|
|
2922
|
+
}
|
|
2923
|
+
return undefined;
|
|
2924
|
+
}
|
|
2925
|
+
|
|
2926
|
+
function resolveProviderLimiterScope(input: {
|
|
2927
|
+
provider: string;
|
|
2928
|
+
apiKey: string;
|
|
2929
|
+
overrideScopeId?: string;
|
|
2930
|
+
}): string {
|
|
2931
|
+
const override = input.overrideScopeId?.trim();
|
|
2932
|
+
if (override) {
|
|
2933
|
+
return `provider:${input.provider}|credential:${override}`;
|
|
2934
|
+
}
|
|
2935
|
+
return `provider:${input.provider}|credential:${sha256(input.apiKey).slice(0, 16)}`;
|
|
2936
|
+
}
|
|
2937
|
+
|
|
1360
2938
|
function resolveEmbeddingDescriptor(provider: EmbeddingProvider): EmbeddingDescriptor {
|
|
1361
2939
|
const described = provider.describe?.();
|
|
1362
2940
|
if (!described) {
|
|
@@ -1373,23 +2951,120 @@ function resolveEmbeddingDescriptor(provider: EmbeddingProvider): EmbeddingDescr
|
|
|
1373
2951
|
};
|
|
1374
2952
|
}
|
|
1375
2953
|
|
|
1376
|
-
function
|
|
1377
|
-
const
|
|
1378
|
-
if (
|
|
1379
|
-
|
|
2954
|
+
function resolveRerankerDescriptor(provider: RerankerProvider): RerankerDescriptor {
|
|
2955
|
+
const described = provider.describe?.();
|
|
2956
|
+
if (!described) {
|
|
2957
|
+
return {
|
|
2958
|
+
provider: "custom"
|
|
2959
|
+
};
|
|
2960
|
+
}
|
|
2961
|
+
return {
|
|
2962
|
+
provider: described.provider,
|
|
2963
|
+
...(described.model ? { model: described.model } : {})
|
|
2964
|
+
};
|
|
2965
|
+
}
|
|
2966
|
+
|
|
2967
|
+
function resolveEnhancerProviderDescriptor(provider: EnhancerGenerationProvider): EnhancerProviderDescriptor {
|
|
2968
|
+
const described = provider.describe?.();
|
|
2969
|
+
if (!described) {
|
|
2970
|
+
return {
|
|
2971
|
+
provider: "custom"
|
|
2972
|
+
};
|
|
2973
|
+
}
|
|
2974
|
+
return {
|
|
2975
|
+
provider: described.provider,
|
|
2976
|
+
...(described.model ? { model: described.model } : {})
|
|
2977
|
+
};
|
|
2978
|
+
}
|
|
2979
|
+
|
|
2980
|
+
function normalizeEmbeddingDescriptor(descriptor: EmbeddingDescriptor): EmbeddingDescriptor {
|
|
2981
|
+
const provider = descriptor.provider.trim();
|
|
2982
|
+
if (provider.length === 0) {
|
|
2983
|
+
throw new Error("invalid embedding descriptor: provider must be non-empty");
|
|
2984
|
+
}
|
|
2985
|
+
if (!Number.isInteger(descriptor.dimensions) || descriptor.dimensions <= 0) {
|
|
2986
|
+
throw new Error("invalid embedding descriptor: dimensions must be a positive integer");
|
|
2987
|
+
}
|
|
2988
|
+
return {
|
|
2989
|
+
provider: provider.toLowerCase(),
|
|
2990
|
+
...(descriptor.model ? { model: descriptor.model.trim() } : {}),
|
|
2991
|
+
dimensions: descriptor.dimensions,
|
|
2992
|
+
...(descriptor.version ? { version: descriptor.version.trim() } : {})
|
|
2993
|
+
};
|
|
2994
|
+
}
|
|
2995
|
+
|
|
2996
|
+
function normalizeRerankerDescriptor(descriptor: RerankerDescriptor): RerankerDescriptor {
|
|
2997
|
+
const provider = descriptor.provider.trim().toLowerCase();
|
|
2998
|
+
if (provider.length === 0) {
|
|
2999
|
+
throw new Error("invalid reranker descriptor: provider must be non-empty");
|
|
3000
|
+
}
|
|
3001
|
+
const model = descriptor.model?.trim();
|
|
3002
|
+
return {
|
|
3003
|
+
provider,
|
|
3004
|
+
...(model ? { model } : {})
|
|
3005
|
+
};
|
|
3006
|
+
}
|
|
3007
|
+
|
|
3008
|
+
function normalizeEnhancerProviderDescriptor(descriptor: EnhancerProviderDescriptor): EnhancerProviderDescriptor {
|
|
3009
|
+
const provider = descriptor.provider.trim().toLowerCase();
|
|
3010
|
+
if (provider.length === 0) {
|
|
3011
|
+
throw new Error("invalid enhancer descriptor: provider must be non-empty");
|
|
3012
|
+
}
|
|
3013
|
+
const model = descriptor.model?.trim();
|
|
3014
|
+
return {
|
|
3015
|
+
provider,
|
|
3016
|
+
...(model ? { model } : {})
|
|
3017
|
+
};
|
|
3018
|
+
}
|
|
3019
|
+
|
|
3020
|
+
function buildRerankerDocument(candidate: SearchResultRow): string {
|
|
3021
|
+
return `${candidate.path}\n${candidate.snippet}`;
|
|
3022
|
+
}
|
|
3023
|
+
|
|
3024
|
+
function classifyRerankerFailureReason(error: unknown): "timeout" | "schema_error" | "rate_limited" | "upstream_error" {
|
|
3025
|
+
if (error instanceof RerankerProviderRequestError) {
|
|
3026
|
+
if (error.reason === "timeout") {
|
|
3027
|
+
return "timeout";
|
|
3028
|
+
}
|
|
3029
|
+
if (error.reason === "rate_limited") {
|
|
3030
|
+
return "rate_limited";
|
|
3031
|
+
}
|
|
3032
|
+
if (error.reason === "invalid_json" || error.reason === "invalid_response") {
|
|
3033
|
+
return "schema_error";
|
|
3034
|
+
}
|
|
3035
|
+
return "upstream_error";
|
|
3036
|
+
}
|
|
3037
|
+
if (error instanceof Error) {
|
|
3038
|
+
if (/(rate.?limit|too many requests|429)/i.test(error.message)) {
|
|
3039
|
+
return "rate_limited";
|
|
3040
|
+
}
|
|
3041
|
+
if (/(timeout|timed out)/i.test(error.message)) {
|
|
3042
|
+
return "timeout";
|
|
3043
|
+
}
|
|
3044
|
+
return "upstream_error";
|
|
3045
|
+
}
|
|
3046
|
+
return "upstream_error";
|
|
3047
|
+
}
|
|
3048
|
+
|
|
3049
|
+
function classifyEnhancerGenerationFailureReason(
|
|
3050
|
+
error: unknown
|
|
3051
|
+
): "timeout" | "schema_error" | "rate_limited" | "invalid_response" | "upstream_error" {
|
|
3052
|
+
if (error instanceof EnhancerProviderRequestError) {
|
|
3053
|
+
return error.reason;
|
|
1380
3054
|
}
|
|
1381
|
-
if (
|
|
1382
|
-
|
|
3055
|
+
if (error instanceof Error) {
|
|
3056
|
+
if (/(timeout|timed out)/i.test(error.message)) {
|
|
3057
|
+
return "timeout";
|
|
3058
|
+
}
|
|
3059
|
+
if (/(rate.?limit|too many requests|429)/i.test(error.message)) {
|
|
3060
|
+
return "rate_limited";
|
|
3061
|
+
}
|
|
3062
|
+
return "upstream_error";
|
|
1383
3063
|
}
|
|
1384
|
-
return
|
|
1385
|
-
provider: provider.toLowerCase(),
|
|
1386
|
-
...(descriptor.model ? { model: descriptor.model.trim() } : {}),
|
|
1387
|
-
dimensions: descriptor.dimensions,
|
|
1388
|
-
...(descriptor.version ? { version: descriptor.version.trim() } : {})
|
|
1389
|
-
};
|
|
3064
|
+
return "upstream_error";
|
|
1390
3065
|
}
|
|
1391
3066
|
|
|
1392
|
-
function classifyIntent(prompt: string):
|
|
3067
|
+
function classifyIntent(prompt: string): EnhancerIntent {
|
|
1393
3068
|
const p = prompt.toLowerCase();
|
|
1394
3069
|
if (/fix|bug|error|crash|regression/.test(p)) {
|
|
1395
3070
|
return "bugfix";
|
|
@@ -1409,7 +3084,54 @@ function classifyIntent(prompt: string): "bugfix" | "feature" | "refactor" | "do
|
|
|
1409
3084
|
return "unknown";
|
|
1410
3085
|
}
|
|
1411
3086
|
|
|
1412
|
-
function
|
|
3087
|
+
function resolveEnhancerPromptStyle(input: {
|
|
3088
|
+
requested?: EnhancePromptStyle;
|
|
3089
|
+
intent: EnhancerIntent;
|
|
3090
|
+
query_intent: EnhancerQueryIntent;
|
|
3091
|
+
prompt: string;
|
|
3092
|
+
history: EnhancePromptInput["conversation_history"];
|
|
3093
|
+
has_context: boolean;
|
|
3094
|
+
}): {
|
|
3095
|
+
requested: EnhancePromptStyle;
|
|
3096
|
+
resolved: ResolvedEnhancerPromptStyle;
|
|
3097
|
+
} {
|
|
3098
|
+
const requested = input.requested ?? "standard";
|
|
3099
|
+
if (requested !== "auto") {
|
|
3100
|
+
return {
|
|
3101
|
+
requested,
|
|
3102
|
+
resolved: requested
|
|
3103
|
+
};
|
|
3104
|
+
}
|
|
3105
|
+
|
|
3106
|
+
const combined = `${input.prompt}\n${input.history.map((entry) => entry.content).join("\n")}`.trim();
|
|
3107
|
+
const words = tokenize(combined);
|
|
3108
|
+
const isShort = words.length <= 18 && input.history.length <= 1;
|
|
3109
|
+
const asksConcise = /\b(concise|brief|short|minimal|quick)\b/i.test(combined);
|
|
3110
|
+
const asksDepth = /\b(detailed|comprehensive|thorough|step-by-step|checklist)\b/i.test(combined);
|
|
3111
|
+
const highRisk = /\b(security|auth|authorization|tenant|workspace|migration|data loss|rollback|incident|compliance|backward)\b/i.test(
|
|
3112
|
+
combined
|
|
3113
|
+
);
|
|
3114
|
+
const complexityScore = Number(input.has_context) + Number(words.length >= 32) + Number(input.history.length >= 3);
|
|
3115
|
+
|
|
3116
|
+
if (asksConcise || (isShort && !highRisk && !asksDepth)) {
|
|
3117
|
+
return {
|
|
3118
|
+
requested,
|
|
3119
|
+
resolved: "lean"
|
|
3120
|
+
};
|
|
3121
|
+
}
|
|
3122
|
+
if (asksDepth || highRisk || complexityScore >= 2 || input.query_intent === "symbol-heavy" || input.intent === "tests") {
|
|
3123
|
+
return {
|
|
3124
|
+
requested,
|
|
3125
|
+
resolved: "deep"
|
|
3126
|
+
};
|
|
3127
|
+
}
|
|
3128
|
+
return {
|
|
3129
|
+
requested,
|
|
3130
|
+
resolved: "standard"
|
|
3131
|
+
};
|
|
3132
|
+
}
|
|
3133
|
+
|
|
3134
|
+
function detectDominantLanguage(prompt: string, history: EnhancePromptInput["conversation_history"]): EnhancerOutputLanguage {
|
|
1413
3135
|
const latestUser = [...history].reverse().find((m) => m.role === "user")?.content ?? prompt;
|
|
1414
3136
|
const sample = `${prompt}\n${latestUser}`.toLowerCase();
|
|
1415
3137
|
if (/[\u3400-\u9fff]/.test(sample)) {
|
|
@@ -1917,7 +3639,7 @@ function buildEnhancerRetrievalQuery(
|
|
|
1917
3639
|
};
|
|
1918
3640
|
}
|
|
1919
3641
|
|
|
1920
|
-
const ENHANCER_LOW_CONFIDENCE_WARNING = "Low retrieval confidence; narrowed context refs
|
|
3642
|
+
const ENHANCER_LOW_CONFIDENCE_WARNING = "Low retrieval confidence; narrowed context refs.";
|
|
1921
3643
|
|
|
1922
3644
|
const ENHANCER_CONFIDENCE_OVERLAP_STOPWORDS = new Set([
|
|
1923
3645
|
"a",
|
|
@@ -2203,7 +3925,7 @@ function hasStrongEnhancerAnchorMatch(input: {
|
|
|
2203
3925
|
const topScore = top[0]?.score ?? 0;
|
|
2204
3926
|
const runnerUpScore = top[1]?.score ?? Number.NEGATIVE_INFINITY;
|
|
2205
3927
|
const strongScoreMargin = top.length === 1 || topScore - runnerUpScore >= 0.08;
|
|
2206
|
-
const hasTopExactSymbolMatch = top.some((result) => result.reason
|
|
3928
|
+
const hasTopExactSymbolMatch = top.some((result) => isExactLiteralReason(result.reason));
|
|
2207
3929
|
if (hasTopExactSymbolMatch && strongScoreMargin && topScore >= 0.55) {
|
|
2208
3930
|
return true;
|
|
2209
3931
|
}
|
|
@@ -2328,7 +4050,7 @@ function evaluateEnhancerConfidence(input: {
|
|
|
2328
4050
|
if (diversityStrength < confidenceThreshold) {
|
|
2329
4051
|
failedSignals.push("path_diversity");
|
|
2330
4052
|
}
|
|
2331
|
-
const strongSymbolOrPathSignal = top.some((result) => result.reason
|
|
4053
|
+
const strongSymbolOrPathSignal = top.some((result) => isExactLiteralReason(result.reason)) && topOverlap >= 0.16;
|
|
2332
4054
|
const lowConfidence = !strongSymbolOrPathSignal && confidenceScore + 0.01 < confidenceThreshold;
|
|
2333
4055
|
|
|
2334
4056
|
return {
|
|
@@ -2355,7 +4077,7 @@ function rankEnhancerResultsForConfidence(input: {
|
|
|
2355
4077
|
const anchorScore = (result: SearchContextOutput["results"][number]): number => {
|
|
2356
4078
|
const normalizedPath = normalizePath(result.path).toLowerCase();
|
|
2357
4079
|
const normalizedSnippet = result.snippet.toLowerCase();
|
|
2358
|
-
let score = result.reason
|
|
4080
|
+
let score = isExactLiteralReason(result.reason) ? 2 : 0;
|
|
2359
4081
|
for (const anchor of anchors) {
|
|
2360
4082
|
if (normalizedPath.includes(anchor)) {
|
|
2361
4083
|
score += 2;
|
|
@@ -2415,7 +4137,11 @@ function rankEnhancerResultsForConfidence(input: {
|
|
|
2415
4137
|
});
|
|
2416
4138
|
}
|
|
2417
4139
|
|
|
2418
|
-
async function runWithTimeout<T>(input: {
|
|
4140
|
+
async function runWithTimeout<T>(input: {
|
|
4141
|
+
timeout_ms: number;
|
|
4142
|
+
fn: () => Promise<T> | T;
|
|
4143
|
+
on_timeout?: () => void;
|
|
4144
|
+
}): Promise<T> {
|
|
2419
4145
|
return await new Promise<T>((resolve, reject) => {
|
|
2420
4146
|
let settled = false;
|
|
2421
4147
|
const timer = setTimeout(() => {
|
|
@@ -2423,6 +4149,7 @@ async function runWithTimeout<T>(input: { timeout_ms: number; fn: () => Promise<
|
|
|
2423
4149
|
return;
|
|
2424
4150
|
}
|
|
2425
4151
|
settled = true;
|
|
4152
|
+
input.on_timeout?.();
|
|
2426
4153
|
reject(new Error(`timeout_after_${input.timeout_ms}ms`));
|
|
2427
4154
|
}, input.timeout_ms);
|
|
2428
4155
|
|
|
@@ -2447,6 +4174,65 @@ async function runWithTimeout<T>(input: { timeout_ms: number; fn: () => Promise<
|
|
|
2447
4174
|
});
|
|
2448
4175
|
}
|
|
2449
4176
|
|
|
4177
|
+
async function runWithInactivityTimeout<T>(input: {
|
|
4178
|
+
timeout_ms: number;
|
|
4179
|
+
fn: (helpers: { touch: () => void; signal: AbortSignal }) => Promise<T> | T;
|
|
4180
|
+
}): Promise<T> {
|
|
4181
|
+
return await new Promise<T>((resolve, reject) => {
|
|
4182
|
+
let settled = false;
|
|
4183
|
+
const abortController = new AbortController();
|
|
4184
|
+
let timer: ReturnType<typeof setTimeout> | undefined;
|
|
4185
|
+
|
|
4186
|
+
const onTimeout = (): void => {
|
|
4187
|
+
if (settled) {
|
|
4188
|
+
return;
|
|
4189
|
+
}
|
|
4190
|
+
settled = true;
|
|
4191
|
+
abortController.abort();
|
|
4192
|
+
reject(new Error(`timeout_after_${input.timeout_ms}ms`));
|
|
4193
|
+
};
|
|
4194
|
+
|
|
4195
|
+
const touch = (): void => {
|
|
4196
|
+
if (settled) {
|
|
4197
|
+
return;
|
|
4198
|
+
}
|
|
4199
|
+
if (timer) {
|
|
4200
|
+
clearTimeout(timer);
|
|
4201
|
+
}
|
|
4202
|
+
timer = setTimeout(onTimeout, input.timeout_ms);
|
|
4203
|
+
};
|
|
4204
|
+
|
|
4205
|
+
touch();
|
|
4206
|
+
Promise.resolve()
|
|
4207
|
+
.then(() =>
|
|
4208
|
+
input.fn({
|
|
4209
|
+
touch,
|
|
4210
|
+
signal: abortController.signal
|
|
4211
|
+
})
|
|
4212
|
+
)
|
|
4213
|
+
.then((value) => {
|
|
4214
|
+
if (settled) {
|
|
4215
|
+
return;
|
|
4216
|
+
}
|
|
4217
|
+
settled = true;
|
|
4218
|
+
if (timer) {
|
|
4219
|
+
clearTimeout(timer);
|
|
4220
|
+
}
|
|
4221
|
+
resolve(value);
|
|
4222
|
+
})
|
|
4223
|
+
.catch((error) => {
|
|
4224
|
+
if (settled) {
|
|
4225
|
+
return;
|
|
4226
|
+
}
|
|
4227
|
+
settled = true;
|
|
4228
|
+
if (timer) {
|
|
4229
|
+
clearTimeout(timer);
|
|
4230
|
+
}
|
|
4231
|
+
reject(error);
|
|
4232
|
+
});
|
|
4233
|
+
});
|
|
4234
|
+
}
|
|
4235
|
+
|
|
2450
4236
|
function deterministicEnhancerFallbackRanking(input: {
|
|
2451
4237
|
results: SearchContextOutput["results"];
|
|
2452
4238
|
intent: ReturnType<typeof classifyIntent>;
|
|
@@ -2462,46 +4248,6 @@ function deterministicEnhancerFallbackRanking(input: {
|
|
|
2462
4248
|
return [...preferred, ...tolerated, ...avoided];
|
|
2463
4249
|
}
|
|
2464
4250
|
|
|
2465
|
-
function localizeLowConfidenceQuestion(input: {
|
|
2466
|
-
language: "en" | "es" | "zh";
|
|
2467
|
-
kind: "scope" | "symbol" | "source_priority";
|
|
2468
|
-
symbol?: string;
|
|
2469
|
-
}): string {
|
|
2470
|
-
if (input.kind === "symbol") {
|
|
2471
|
-
if (input.language === "es") {
|
|
2472
|
-
return input.symbol
|
|
2473
|
-
? `¿Puedes confirmar si el cambio debe centrarse en el símbolo "${input.symbol}"?`
|
|
2474
|
-
: "¿Qué función, clase o archivo exacto debe modificarse primero?";
|
|
2475
|
-
}
|
|
2476
|
-
if (input.language === "zh") {
|
|
2477
|
-
return input.symbol
|
|
2478
|
-
? `请确认这次改动是否应优先围绕符号“${input.symbol}”展开?`
|
|
2479
|
-
: "请明确首先要修改的函数、类或文件路径。";
|
|
2480
|
-
}
|
|
2481
|
-
return input.symbol
|
|
2482
|
-
? `Can you confirm whether "${input.symbol}" is the primary symbol to change?`
|
|
2483
|
-
: "Which exact function, class, or file should be edited first?";
|
|
2484
|
-
}
|
|
2485
|
-
|
|
2486
|
-
if (input.kind === "source_priority") {
|
|
2487
|
-
if (input.language === "es") {
|
|
2488
|
-
return "¿Debemos priorizar archivos de implementación en src/lib y dejar docs/tests/examples fuera de alcance?";
|
|
2489
|
-
}
|
|
2490
|
-
if (input.language === "zh") {
|
|
2491
|
-
return "是否应优先修改 src/lib 下的实现代码,并排除 docs/tests/examples?";
|
|
2492
|
-
}
|
|
2493
|
-
return "Should we prioritize runtime implementation files (src/lib) and exclude docs/tests/examples from scope?";
|
|
2494
|
-
}
|
|
2495
|
-
|
|
2496
|
-
if (input.language === "es") {
|
|
2497
|
-
return "¿Cuál es el alcance mínimo y el comportamiento que no debe cambiar?";
|
|
2498
|
-
}
|
|
2499
|
-
if (input.language === "zh") {
|
|
2500
|
-
return "这次改动的最小范围是什么?哪些行为必须保持不变?";
|
|
2501
|
-
}
|
|
2502
|
-
return "What is the minimal scope, and which behavior must remain unchanged?";
|
|
2503
|
-
}
|
|
2504
|
-
|
|
2505
4251
|
function trimToContextBudget(results: SearchContextOutput["results"]): SearchContextOutput["results"] {
|
|
2506
4252
|
let total = 0;
|
|
2507
4253
|
const out: SearchContextOutput["results"] = [];
|
|
@@ -2516,7 +4262,7 @@ function trimToContextBudget(results: SearchContextOutput["results"]): SearchCon
|
|
|
2516
4262
|
}
|
|
2517
4263
|
|
|
2518
4264
|
function formatEnhancedPrompt(input: {
|
|
2519
|
-
|
|
4265
|
+
style: ResolvedEnhancerPromptStyle;
|
|
2520
4266
|
language: "en" | "es" | "zh";
|
|
2521
4267
|
original_prompt: string;
|
|
2522
4268
|
refs: ContextRef[];
|
|
@@ -2530,62 +4276,175 @@ function formatEnhancedPrompt(input: {
|
|
|
2530
4276
|
input.refs.length > 0 ? input.refs.map((r) => `- ${r.path}:${r.start_line}`).join("\n") : emptyRefsByLanguage[input.language];
|
|
2531
4277
|
|
|
2532
4278
|
if (input.language === "zh") {
|
|
4279
|
+
if (input.style === "lean") {
|
|
4280
|
+
return [
|
|
4281
|
+
"目标",
|
|
4282
|
+
input.original_prompt,
|
|
4283
|
+
"",
|
|
4284
|
+
"约束",
|
|
4285
|
+
"- 保持现有行为与合约兼容。",
|
|
4286
|
+
"- 优先最小且安全的改动。",
|
|
4287
|
+
"",
|
|
4288
|
+
"行动步骤",
|
|
4289
|
+
"- 先确认当前行为与目标范围。",
|
|
4290
|
+
"- 在必要位置完成最小实现并补充回归测试。",
|
|
4291
|
+
"",
|
|
4292
|
+
"验证",
|
|
4293
|
+
"- 运行相关测试并确认无回归。"
|
|
4294
|
+
].join("\n");
|
|
4295
|
+
}
|
|
4296
|
+
if (input.style === "deep") {
|
|
4297
|
+
return [
|
|
4298
|
+
"目标",
|
|
4299
|
+
input.original_prompt,
|
|
4300
|
+
"",
|
|
4301
|
+
"范围与约束",
|
|
4302
|
+
"- 保持现有行为与 API/合约语义稳定。",
|
|
4303
|
+
"- 仅在必要边界内调整实现,避免扩散改动。",
|
|
4304
|
+
"- 发现风险路径时优先失败安全(deny-by-default)。",
|
|
4305
|
+
"",
|
|
4306
|
+
"代码锚点",
|
|
4307
|
+
likelyFiles,
|
|
4308
|
+
"",
|
|
4309
|
+
"实施步骤",
|
|
4310
|
+
"- 基线确认:先验证当前行为与关键路径。",
|
|
4311
|
+
"- 变更实现:对关键分支做最小、安全、可回退的改动。",
|
|
4312
|
+
"- 回归测试:覆盖正向、跨边界、异常与空输入场景。",
|
|
4313
|
+
"",
|
|
4314
|
+
"边界情况",
|
|
4315
|
+
"- 缺失上下文、无索引或空结果时,保持行为可解释且可回退。",
|
|
4316
|
+
"- 异步/并发路径中避免上下文泄漏与跨租户访问。",
|
|
4317
|
+
"",
|
|
4318
|
+
"验证",
|
|
4319
|
+
"- 运行 typecheck 与目标测试集;确认关键路径稳定无回归。"
|
|
4320
|
+
].join("\n");
|
|
4321
|
+
}
|
|
2533
4322
|
return [
|
|
2534
4323
|
"目标",
|
|
2535
4324
|
input.original_prompt,
|
|
2536
4325
|
"",
|
|
2537
|
-
"当前状态",
|
|
2538
|
-
`- 识别意图: ${input.intent}`,
|
|
2539
|
-
"",
|
|
2540
4326
|
"约束",
|
|
2541
4327
|
"- 保持 v1 合约兼容和严格校验。",
|
|
2542
4328
|
"",
|
|
2543
|
-
"
|
|
4329
|
+
"代码锚点",
|
|
2544
4330
|
likelyFiles,
|
|
2545
4331
|
"",
|
|
2546
4332
|
"实现清单",
|
|
2547
4333
|
"- 在改动前确认请求/响应合约。",
|
|
2548
4334
|
"- 最小化改动并保持 tenant/workspace 隔离。",
|
|
2549
4335
|
"",
|
|
2550
|
-
"边界情况",
|
|
2551
|
-
"- Workspace 没有可用索引。",
|
|
2552
|
-
"- 搜索过滤后结果为空。",
|
|
2553
|
-
"",
|
|
2554
4336
|
"验证与测试",
|
|
2555
4337
|
"- 运行 typecheck 和合约/工具测试。",
|
|
2556
|
-
"",
|
|
2557
|
-
"完成定义",
|
|
2558
|
-
"- 测试通过且行为符合 v1 规范。"
|
|
2559
4338
|
].join("\n");
|
|
2560
4339
|
}
|
|
2561
4340
|
|
|
2562
4341
|
if (input.language === "es") {
|
|
4342
|
+
if (input.style === "lean") {
|
|
4343
|
+
return [
|
|
4344
|
+
"Objetivo",
|
|
4345
|
+
input.original_prompt,
|
|
4346
|
+
"",
|
|
4347
|
+
"Restricciones",
|
|
4348
|
+
"- Mantener compatibilidad de comportamiento y contratos.",
|
|
4349
|
+
"- Priorizar cambios mínimos y seguros.",
|
|
4350
|
+
"",
|
|
4351
|
+
"Pasos",
|
|
4352
|
+
"- Confirmar alcance y comportamiento actual antes de editar.",
|
|
4353
|
+
"- Implementar el cambio mínimo necesario y añadir regresiones.",
|
|
4354
|
+
"",
|
|
4355
|
+
"Validación",
|
|
4356
|
+
"- Ejecutar pruebas relevantes y confirmar que no hay regresiones."
|
|
4357
|
+
].join("\n");
|
|
4358
|
+
}
|
|
4359
|
+
if (input.style === "deep") {
|
|
4360
|
+
return [
|
|
4361
|
+
"Objetivo",
|
|
4362
|
+
input.original_prompt,
|
|
4363
|
+
"",
|
|
4364
|
+
"Alcance y restricciones",
|
|
4365
|
+
"- Preservar comportamiento existente y contratos/API vigentes.",
|
|
4366
|
+
"- Limitar cambios al alcance mínimo necesario.",
|
|
4367
|
+
"- Aplicar defaults de seguridad (deny-by-default) cuando aplique.",
|
|
4368
|
+
"",
|
|
4369
|
+
"Anclas del código",
|
|
4370
|
+
likelyFiles,
|
|
4371
|
+
"",
|
|
4372
|
+
"Plan de implementación",
|
|
4373
|
+
"- Establecer línea base del comportamiento actual.",
|
|
4374
|
+
"- Aplicar cambios mínimos y reversibles en rutas críticas.",
|
|
4375
|
+
"- Añadir pruebas de regresión para casos positivos, negativos y límites.",
|
|
4376
|
+
"",
|
|
4377
|
+
"Casos límite",
|
|
4378
|
+
"- Contexto faltante o resultados vacíos no deben romper el flujo.",
|
|
4379
|
+
"- Evitar fuga de contexto entre tenants/workspaces.",
|
|
4380
|
+
"",
|
|
4381
|
+
"Validación",
|
|
4382
|
+
"- Ejecutar typecheck y pruebas objetivo; confirmar estabilidad."
|
|
4383
|
+
].join("\n");
|
|
4384
|
+
}
|
|
2563
4385
|
return [
|
|
2564
4386
|
"Objetivo",
|
|
2565
4387
|
input.original_prompt,
|
|
2566
4388
|
"",
|
|
2567
|
-
"Estado actual",
|
|
2568
|
-
`- Intención clasificada: ${input.intent}`,
|
|
2569
|
-
"",
|
|
2570
4389
|
"Restricciones",
|
|
2571
4390
|
"- Mantener compatibilidad con contratos v1 y validación estricta.",
|
|
2572
4391
|
"",
|
|
2573
|
-
"
|
|
4392
|
+
"Anclas del código",
|
|
2574
4393
|
likelyFiles,
|
|
2575
4394
|
"",
|
|
2576
4395
|
"Checklist de implementación",
|
|
2577
4396
|
"- Confirmar entradas/salidas del contrato antes de modificar lógica.",
|
|
2578
4397
|
"- Aplicar cambios mínimos y mantener aislamiento por tenant/workspace.",
|
|
2579
4398
|
"",
|
|
2580
|
-
"Casos límite",
|
|
2581
|
-
"- Workspace sin índice listo.",
|
|
2582
|
-
"- Filtros de búsqueda que no devuelven resultados.",
|
|
2583
|
-
"",
|
|
2584
4399
|
"Validación y pruebas",
|
|
2585
|
-
"- Ejecutar typecheck y pruebas de contratos/herramientas."
|
|
4400
|
+
"- Ejecutar typecheck y pruebas de contratos/herramientas."
|
|
4401
|
+
].join("\n");
|
|
4402
|
+
}
|
|
4403
|
+
|
|
4404
|
+
if (input.style === "lean") {
|
|
4405
|
+
const anchors = input.refs.length > 0 ? `- Anchors: ${input.refs.slice(0, 2).map((ref) => `${ref.path}:${ref.start_line}`).join(", ")}` : "";
|
|
4406
|
+
return [
|
|
4407
|
+
"Goal",
|
|
4408
|
+
input.original_prompt,
|
|
4409
|
+
"",
|
|
4410
|
+
"Constraints",
|
|
4411
|
+
"- Preserve existing behavior and contract compatibility.",
|
|
4412
|
+
"- Keep changes minimal and safe.",
|
|
4413
|
+
...(anchors ? ["", anchors] : []),
|
|
4414
|
+
"",
|
|
4415
|
+
"Action steps",
|
|
4416
|
+
"- Confirm current behavior and target scope.",
|
|
4417
|
+
"- Implement the smallest safe change and add regression coverage.",
|
|
4418
|
+
"",
|
|
4419
|
+
"Validation",
|
|
4420
|
+
"- Run relevant tests and confirm no regressions."
|
|
4421
|
+
].join("\n");
|
|
4422
|
+
}
|
|
4423
|
+
|
|
4424
|
+
if (input.style === "deep") {
|
|
4425
|
+
return [
|
|
4426
|
+
"Goal",
|
|
4427
|
+
input.original_prompt,
|
|
4428
|
+
"",
|
|
4429
|
+
"Scope and constraints",
|
|
4430
|
+
"- Preserve current behavior and API/contract semantics.",
|
|
4431
|
+
"- Limit changes to the required scope and keep them reversible.",
|
|
4432
|
+
"- Prefer fail-secure defaults where policy boundaries are involved.",
|
|
4433
|
+
"",
|
|
4434
|
+
"Codebase anchors",
|
|
4435
|
+
likelyFiles,
|
|
4436
|
+
"",
|
|
4437
|
+
"Implementation plan",
|
|
4438
|
+
"- Establish baseline behavior and invariants before edits.",
|
|
4439
|
+
"- Apply minimal, safe changes on critical paths only.",
|
|
4440
|
+
"- Add regression coverage for positive, negative, and boundary scenarios.",
|
|
2586
4441
|
"",
|
|
2587
|
-
"
|
|
2588
|
-
"-
|
|
4442
|
+
"Edge cases",
|
|
4443
|
+
"- Missing context, empty retrieval results, and async boundary leakage.",
|
|
4444
|
+
"- Cross-tenant/workspace access paths and authorization bypass attempts.",
|
|
4445
|
+
"",
|
|
4446
|
+
"Validation",
|
|
4447
|
+
"- Run typecheck and focused test suites; verify no behavioral regressions."
|
|
2589
4448
|
].join("\n");
|
|
2590
4449
|
}
|
|
2591
4450
|
|
|
@@ -2593,28 +4452,18 @@ function formatEnhancedPrompt(input: {
|
|
|
2593
4452
|
"Goal",
|
|
2594
4453
|
input.original_prompt,
|
|
2595
4454
|
"",
|
|
2596
|
-
"Current state",
|
|
2597
|
-
`- Classified intent: ${input.intent}`,
|
|
2598
|
-
"",
|
|
2599
4455
|
"Constraints",
|
|
2600
4456
|
"- Keep v1 contract compatibility and strict schema validation.",
|
|
2601
4457
|
"",
|
|
2602
|
-
"
|
|
4458
|
+
"Codebase anchors",
|
|
2603
4459
|
likelyFiles,
|
|
2604
4460
|
"",
|
|
2605
|
-
"Implementation
|
|
4461
|
+
"Implementation plan",
|
|
2606
4462
|
"- Confirm request/response contract assumptions before code edits.",
|
|
2607
4463
|
"- Apply smallest safe changes while preserving tenant/workspace isolation.",
|
|
2608
4464
|
"",
|
|
2609
|
-
"Edge cases",
|
|
2610
|
-
"- Workspace has no ready index.",
|
|
2611
|
-
"- Search filters produce empty result sets.",
|
|
2612
|
-
"",
|
|
2613
4465
|
"Validation and tests",
|
|
2614
|
-
"- Run typecheck and contract/tool tests."
|
|
2615
|
-
"",
|
|
2616
|
-
"Definition of done",
|
|
2617
|
-
"- Tests pass and behavior matches the v1 spec."
|
|
4466
|
+
"- Run typecheck and contract/tool tests."
|
|
2618
4467
|
].join("\n");
|
|
2619
4468
|
}
|
|
2620
4469
|
|
|
@@ -2992,13 +4841,21 @@ function compileGlob(glob: string): RegExp {
|
|
|
2992
4841
|
|
|
2993
4842
|
export class RetrievalCore {
|
|
2994
4843
|
private readonly cacheTtlSeconds: number;
|
|
4844
|
+
private readonly internalCandidateDepth: number;
|
|
2995
4845
|
private readonly embeddingProvider: EmbeddingProvider;
|
|
2996
4846
|
private readonly embeddingDescriptor: EmbeddingDescriptor;
|
|
4847
|
+
private readonly rerankerProvider?: RerankerProvider;
|
|
4848
|
+
private readonly rerankerDescriptor?: RerankerDescriptor;
|
|
4849
|
+
private readonly rerankerTopN: number;
|
|
4850
|
+
private readonly rerankerCacheVariant: string;
|
|
2997
4851
|
private readonly observability: Observability;
|
|
2998
4852
|
private readonly scoringConfig: RetrievalScoringConfig;
|
|
2999
4853
|
private readonly scoringProfileId: string;
|
|
3000
4854
|
private readonly scoringConfigChecksum: string;
|
|
4855
|
+
private readonly enhancerProvider?: EnhancerGenerationProvider;
|
|
4856
|
+
private readonly enhancerProviderDescriptor?: EnhancerProviderDescriptor;
|
|
3001
4857
|
private readonly enhancerConfig: RetrievalEnhancerConfig;
|
|
4858
|
+
private readonly enhancerGenerationConfig: RetrievalEnhancerGenerationConfig;
|
|
3002
4859
|
private readonly chunkingConfig: RetrievalChunkingConfig;
|
|
3003
4860
|
private readonly enhancerDecisionTraceEnabled: boolean;
|
|
3004
4861
|
private cacheHits = 0;
|
|
@@ -3010,16 +4867,36 @@ export class RetrievalCore {
|
|
|
3010
4867
|
options?: RetrievalCoreOptions
|
|
3011
4868
|
) {
|
|
3012
4869
|
this.cacheTtlSeconds = options?.cacheTtlSeconds ?? 60;
|
|
4870
|
+
this.internalCandidateDepth = clampInternalCandidateDepth(options?.internalCandidateDepth);
|
|
3013
4871
|
this.embeddingProvider = options?.embeddingProvider ?? new DeterministicEmbeddingProvider();
|
|
3014
4872
|
this.embeddingDescriptor = normalizeEmbeddingDescriptor(
|
|
3015
4873
|
options?.embeddingDescriptor ?? resolveEmbeddingDescriptor(this.embeddingProvider)
|
|
3016
4874
|
);
|
|
4875
|
+
this.rerankerProvider = options?.rerankerProvider;
|
|
4876
|
+
this.rerankerTopN = options?.rerankerTopN ?? DEFAULT_SEARCH_RERANKER_TOP_N;
|
|
4877
|
+
if (!Number.isInteger(this.rerankerTopN) || this.rerankerTopN <= 0) {
|
|
4878
|
+
throw new Error("invalid retrieval reranker config: rerankerTopN must be a positive integer");
|
|
4879
|
+
}
|
|
4880
|
+
this.rerankerDescriptor = this.rerankerProvider
|
|
4881
|
+
? normalizeRerankerDescriptor(resolveRerankerDescriptor(this.rerankerProvider))
|
|
4882
|
+
: undefined;
|
|
4883
|
+
this.rerankerCacheVariant = this.rerankerDescriptor
|
|
4884
|
+
? `provider:${this.rerankerDescriptor.provider}|model:${this.rerankerDescriptor.model ?? "unknown"}|top_n:${this.rerankerTopN}`
|
|
4885
|
+
: "provider:disabled";
|
|
3017
4886
|
this.observability = options?.observability ?? getObservability("retrieval-core");
|
|
3018
4887
|
const baseProfile = resolveRetrievalScoringProfile(options?.scoringProfile);
|
|
3019
4888
|
this.scoringConfig = mergeRetrievalScoringConfig(baseProfile.config, options?.scoringConfig);
|
|
3020
4889
|
this.scoringProfileId = options?.scoringProfileId ?? baseProfile.profile_id;
|
|
3021
4890
|
this.scoringConfigChecksum = scoringConfigChecksum(this.scoringConfig);
|
|
4891
|
+
this.enhancerProvider = options?.enhancerProvider;
|
|
4892
|
+
this.enhancerProviderDescriptor = this.enhancerProvider
|
|
4893
|
+
? normalizeEnhancerProviderDescriptor(resolveEnhancerProviderDescriptor(this.enhancerProvider))
|
|
4894
|
+
: undefined;
|
|
3022
4895
|
this.enhancerConfig = mergeRetrievalEnhancerConfig(DEFAULT_RETRIEVAL_ENHANCER_CONFIG, options?.enhancerConfig);
|
|
4896
|
+
this.enhancerGenerationConfig = mergeRetrievalEnhancerGenerationConfig(
|
|
4897
|
+
DEFAULT_RETRIEVAL_ENHANCER_GENERATION_CONFIG,
|
|
4898
|
+
options?.enhancerGenerationConfig
|
|
4899
|
+
);
|
|
3023
4900
|
this.chunkingConfig = mergeRetrievalChunkingConfig(DEFAULT_RETRIEVAL_CHUNKING_CONFIG, options?.chunkingConfig);
|
|
3024
4901
|
this.enhancerDecisionTraceEnabled = Boolean(options?.enhancerDecisionTraceEnabled);
|
|
3025
4902
|
}
|
|
@@ -3736,6 +5613,106 @@ export class RetrievalCore {
|
|
|
3736
5613
|
};
|
|
3737
5614
|
}
|
|
3738
5615
|
|
|
5616
|
+
private async applyLearnedReranker(input: {
|
|
5617
|
+
trace_id: string;
|
|
5618
|
+
query: string;
|
|
5619
|
+
candidates: SearchResultRow[];
|
|
5620
|
+
}): Promise<SearchResultRow[]> {
|
|
5621
|
+
if (!this.rerankerProvider || !this.rerankerDescriptor) {
|
|
5622
|
+
return input.candidates;
|
|
5623
|
+
}
|
|
5624
|
+
|
|
5625
|
+
const cappedTopN = Math.min(this.rerankerTopN, input.candidates.length);
|
|
5626
|
+
if (cappedTopN <= 1) {
|
|
5627
|
+
return input.candidates;
|
|
5628
|
+
}
|
|
5629
|
+
|
|
5630
|
+
const head = input.candidates.slice(0, cappedTopN);
|
|
5631
|
+
const tail = input.candidates.slice(cappedTopN);
|
|
5632
|
+
const labels = {
|
|
5633
|
+
provider: this.rerankerDescriptor.provider,
|
|
5634
|
+
model: this.rerankerDescriptor.model ?? "unknown"
|
|
5635
|
+
} as const;
|
|
5636
|
+
|
|
5637
|
+
this.observability.metrics.increment("retrieval_reranker_requests_total", 1, labels);
|
|
5638
|
+
const startedAt = Date.now();
|
|
5639
|
+
try {
|
|
5640
|
+
const reranked = await this.rerankerProvider.rerank({
|
|
5641
|
+
query: input.query,
|
|
5642
|
+
documents: head.map((candidate) => buildRerankerDocument(candidate)),
|
|
5643
|
+
top_n: cappedTopN
|
|
5644
|
+
});
|
|
5645
|
+
|
|
5646
|
+
if (!Array.isArray(reranked) || reranked.length === 0) {
|
|
5647
|
+
throw new RerankerProviderRequestError("invalid_response", "reranker response must contain at least one result");
|
|
5648
|
+
}
|
|
5649
|
+
|
|
5650
|
+
const seen = new Set<number>();
|
|
5651
|
+
const reordered: SearchResultRow[] = [];
|
|
5652
|
+
for (const row of reranked) {
|
|
5653
|
+
if (!Number.isInteger(row.index)) {
|
|
5654
|
+
throw new RerankerProviderRequestError("invalid_response", "reranker result index must be an integer");
|
|
5655
|
+
}
|
|
5656
|
+
if (row.index < 0 || row.index >= head.length) {
|
|
5657
|
+
throw new RerankerProviderRequestError("invalid_response", "reranker result index out of range");
|
|
5658
|
+
}
|
|
5659
|
+
if (seen.has(row.index)) {
|
|
5660
|
+
continue;
|
|
5661
|
+
}
|
|
5662
|
+
const candidate = head[row.index];
|
|
5663
|
+
if (!candidate) {
|
|
5664
|
+
continue;
|
|
5665
|
+
}
|
|
5666
|
+
seen.add(row.index);
|
|
5667
|
+
reordered.push(candidate);
|
|
5668
|
+
}
|
|
5669
|
+
|
|
5670
|
+
for (let index = 0; index < head.length; index += 1) {
|
|
5671
|
+
if (seen.has(index)) {
|
|
5672
|
+
continue;
|
|
5673
|
+
}
|
|
5674
|
+
const candidate = head[index];
|
|
5675
|
+
if (candidate) {
|
|
5676
|
+
reordered.push(candidate);
|
|
5677
|
+
}
|
|
5678
|
+
}
|
|
5679
|
+
|
|
5680
|
+
if (reordered.length === 0) {
|
|
5681
|
+
throw new RerankerProviderRequestError("invalid_response", "reranker did not return usable indexes");
|
|
5682
|
+
}
|
|
5683
|
+
|
|
5684
|
+
const maxTailScore = tail[0]?.score ?? Number.NEGATIVE_INFINITY;
|
|
5685
|
+
const maxHeadScore = head[0]?.score ?? 0;
|
|
5686
|
+
const scoreAnchor = Math.max(maxHeadScore, maxTailScore) + 1;
|
|
5687
|
+
const scoreStep = 1e-6;
|
|
5688
|
+
const adjusted = reordered.map((candidate, index) => ({
|
|
5689
|
+
...candidate,
|
|
5690
|
+
score: scoreAnchor - index * scoreStep
|
|
5691
|
+
}));
|
|
5692
|
+
return [...adjusted, ...tail];
|
|
5693
|
+
} catch (error) {
|
|
5694
|
+
const reason = classifyRerankerFailureReason(error);
|
|
5695
|
+
this.observability.metrics.increment("retrieval_reranker_failures_total", 1, {
|
|
5696
|
+
...labels,
|
|
5697
|
+
reason
|
|
5698
|
+
});
|
|
5699
|
+
this.observability.metrics.increment("retrieval_reranker_fallback_total", 1, {
|
|
5700
|
+
reason
|
|
5701
|
+
});
|
|
5702
|
+
this.observability.logger.warn("search_context reranker fallback applied", {
|
|
5703
|
+
trace_id: input.trace_id,
|
|
5704
|
+
provider: labels.provider,
|
|
5705
|
+
model: labels.model,
|
|
5706
|
+
reason,
|
|
5707
|
+
top_n: cappedTopN,
|
|
5708
|
+
error_message: error instanceof Error ? error.message : String(error)
|
|
5709
|
+
});
|
|
5710
|
+
return input.candidates;
|
|
5711
|
+
} finally {
|
|
5712
|
+
this.observability.metrics.observe("retrieval_reranker_latency_ms", Date.now() - startedAt, labels);
|
|
5713
|
+
}
|
|
5714
|
+
}
|
|
5715
|
+
|
|
3739
5716
|
async searchContext(input: {
|
|
3740
5717
|
trace_id: string;
|
|
3741
5718
|
tenant_id: string;
|
|
@@ -3757,9 +5734,9 @@ export class RetrievalCore {
|
|
|
3757
5734
|
index_id: index.index_id
|
|
3758
5735
|
});
|
|
3759
5736
|
|
|
3760
|
-
const topK = Math.min(input.request.top_k ?? 8, MAX_TOP_K);
|
|
3761
|
-
const candidatePoolTopK = Math.min(MAX_TOP_K, Math.max(topK * 4, 12));
|
|
3762
5737
|
const query = normalizeQuery(input.request.query);
|
|
5738
|
+
const topK = Math.min(input.request.top_k ?? 8, MAX_TOP_K);
|
|
5739
|
+
const candidatePoolTopK = Math.max(Math.max(topK * 4, 12), this.internalCandidateDepth);
|
|
3763
5740
|
|
|
3764
5741
|
if (!indexMetadata) {
|
|
3765
5742
|
this.observability.metrics.increment("retrieval_embedding_metadata_mismatch_total", 1, {
|
|
@@ -3831,13 +5808,22 @@ export class RetrievalCore {
|
|
|
3831
5808
|
);
|
|
3832
5809
|
}
|
|
3833
5810
|
const queryTokens = tokenize(query);
|
|
5811
|
+
const searchLiterals = extractSearchLiterals(query);
|
|
5812
|
+
|
|
5813
|
+
this.observability.metrics.observe("retrieval_candidate_depth_requested", topK, {
|
|
5814
|
+
retrieval_profile_id: this.scoringProfileId
|
|
5815
|
+
});
|
|
5816
|
+
this.observability.metrics.observe("retrieval_candidate_depth_effective", candidatePoolTopK, {
|
|
5817
|
+
retrieval_profile_id: this.scoringProfileId
|
|
5818
|
+
});
|
|
3834
5819
|
|
|
3835
5820
|
const cacheKey = buildQueryCacheKey({
|
|
3836
5821
|
workspace_id: input.workspace_id,
|
|
3837
5822
|
index_version: index.index_version,
|
|
3838
5823
|
query,
|
|
3839
5824
|
top_k: topK,
|
|
3840
|
-
filters: input.request.filters
|
|
5825
|
+
filters: input.request.filters,
|
|
5826
|
+
retrieval_variant: this.rerankerCacheVariant
|
|
3841
5827
|
});
|
|
3842
5828
|
|
|
3843
5829
|
const cached = await this.cache.get(cacheKey);
|
|
@@ -3859,6 +5845,8 @@ export class RetrievalCore {
|
|
|
3859
5845
|
workspace_id: input.workspace_id
|
|
3860
5846
|
},
|
|
3861
5847
|
async () => {
|
|
5848
|
+
let literalPathMatchCount = 0;
|
|
5849
|
+
let literalSnippetMatchCount = 0;
|
|
3862
5850
|
let ranked: RankedChunkCandidate[] | undefined;
|
|
3863
5851
|
if (this.store.rankChunksByIndex) {
|
|
3864
5852
|
ranked = await this.store.rankChunksByIndex({
|
|
@@ -3879,11 +5867,21 @@ export class RetrievalCore {
|
|
|
3879
5867
|
.map((candidate) => {
|
|
3880
5868
|
let score = candidate.score;
|
|
3881
5869
|
score += pathQualityBias(candidate.path, queryTokens, this.scoringConfig, query);
|
|
5870
|
+
const literalBoost = applyLiteralBoost({
|
|
5871
|
+
path: candidate.path,
|
|
5872
|
+
snippet: candidate.snippet,
|
|
5873
|
+
literals: searchLiterals,
|
|
5874
|
+
path_bias: this.scoringConfig.path_bias
|
|
5875
|
+
});
|
|
5876
|
+
score += literalBoost.boost;
|
|
5877
|
+
literalPathMatchCount += literalBoost.path_matches;
|
|
5878
|
+
literalSnippetMatchCount += literalBoost.snippet_matches;
|
|
3882
5879
|
if (looksLowInformation(candidate.snippet)) {
|
|
3883
5880
|
score -= this.scoringConfig.rerank.low_information_penalty;
|
|
3884
5881
|
}
|
|
3885
5882
|
const reason = chooseReason({
|
|
3886
5883
|
lexical: candidate.lexical_score,
|
|
5884
|
+
literal_match: literalBoost.matched,
|
|
3887
5885
|
path_match: candidate.path_match,
|
|
3888
5886
|
recency_boosted: candidate.recency_boosted
|
|
3889
5887
|
});
|
|
@@ -3924,11 +5922,25 @@ export class RetrievalCore {
|
|
|
3924
5922
|
score -= candidateWeights.generated_penalty;
|
|
3925
5923
|
}
|
|
3926
5924
|
score += pathQualityBias(chunk.path, queryTokens, this.scoringConfig, query);
|
|
5925
|
+
const literalBoost = applyLiteralBoost({
|
|
5926
|
+
path: chunk.path,
|
|
5927
|
+
snippet: chunk.snippet,
|
|
5928
|
+
literals: searchLiterals,
|
|
5929
|
+
path_bias: this.scoringConfig.path_bias
|
|
5930
|
+
});
|
|
5931
|
+
score += literalBoost.boost;
|
|
5932
|
+
literalPathMatchCount += literalBoost.path_matches;
|
|
5933
|
+
literalSnippetMatchCount += literalBoost.snippet_matches;
|
|
3927
5934
|
if (looksLowInformation(chunk.snippet)) {
|
|
3928
5935
|
score -= this.scoringConfig.rerank.low_information_penalty;
|
|
3929
5936
|
}
|
|
3930
5937
|
|
|
3931
|
-
const reason = chooseReason({
|
|
5938
|
+
const reason = chooseReason({
|
|
5939
|
+
lexical: l,
|
|
5940
|
+
literal_match: literalBoost.matched,
|
|
5941
|
+
path_match: pathMatch,
|
|
5942
|
+
recency_boosted: recencyBoost
|
|
5943
|
+
});
|
|
3932
5944
|
|
|
3933
5945
|
return {
|
|
3934
5946
|
path: chunk.path,
|
|
@@ -3946,10 +5958,36 @@ export class RetrievalCore {
|
|
|
3946
5958
|
channel: "hybrid",
|
|
3947
5959
|
retrieval_profile_id: this.scoringProfileId
|
|
3948
5960
|
});
|
|
5961
|
+
this.observability.metrics.observe("retrieval_candidates_pre_rerank_count", output.length, {
|
|
5962
|
+
retrieval_profile_id: this.scoringProfileId
|
|
5963
|
+
});
|
|
5964
|
+
if (literalPathMatchCount > 0) {
|
|
5965
|
+
this.observability.metrics.increment("retrieval_literal_boost_applied_total", literalPathMatchCount, {
|
|
5966
|
+
retrieval_profile_id: this.scoringProfileId,
|
|
5967
|
+
channel: "path"
|
|
5968
|
+
});
|
|
5969
|
+
}
|
|
5970
|
+
if (literalSnippetMatchCount > 0) {
|
|
5971
|
+
this.observability.metrics.increment("retrieval_literal_boost_applied_total", literalSnippetMatchCount, {
|
|
5972
|
+
retrieval_profile_id: this.scoringProfileId,
|
|
5973
|
+
channel: "snippet"
|
|
5974
|
+
});
|
|
5975
|
+
}
|
|
3949
5976
|
return output;
|
|
3950
5977
|
}
|
|
3951
5978
|
);
|
|
3952
5979
|
|
|
5980
|
+
const rerankedCandidates = await this.observability.tracing.withSpan(
|
|
5981
|
+
"retrieval.learned_rerank",
|
|
5982
|
+
{ trace_id: input.trace_id },
|
|
5983
|
+
async () =>
|
|
5984
|
+
this.applyLearnedReranker({
|
|
5985
|
+
trace_id: input.trace_id,
|
|
5986
|
+
query,
|
|
5987
|
+
candidates
|
|
5988
|
+
})
|
|
5989
|
+
);
|
|
5990
|
+
|
|
3953
5991
|
const deduped = await this.observability.tracing.withSpan("retrieval.rerank", { trace_id: input.trace_id }, async () => {
|
|
3954
5992
|
const output: SearchContextOutput["results"] = [];
|
|
3955
5993
|
const seen = new Set<string>();
|
|
@@ -3960,7 +5998,7 @@ export class RetrievalCore {
|
|
|
3960
5998
|
? this.scoringConfig.rerank.max_chunks_per_path_file_lookup
|
|
3961
5999
|
: this.scoringConfig.rerank.max_chunks_per_path_default;
|
|
3962
6000
|
|
|
3963
|
-
const available = [...
|
|
6001
|
+
const available = [...rerankedCandidates];
|
|
3964
6002
|
while (output.length < topK && available.length > 0) {
|
|
3965
6003
|
let bestIndex = -1;
|
|
3966
6004
|
let bestAdjustedScore = Number.NEGATIVE_INFINITY;
|
|
@@ -4028,6 +6066,41 @@ export class RetrievalCore {
|
|
|
4028
6066
|
return output;
|
|
4029
6067
|
});
|
|
4030
6068
|
|
|
6069
|
+
const candidateRankByKey = new Map<string, number>();
|
|
6070
|
+
for (let index = 0; index < rerankedCandidates.length; index += 1) {
|
|
6071
|
+
const candidate = rerankedCandidates[index];
|
|
6072
|
+
if (!candidate) {
|
|
6073
|
+
continue;
|
|
6074
|
+
}
|
|
6075
|
+
const key = `${candidate.path}:${candidate.start_line}:${candidate.end_line}`;
|
|
6076
|
+
if (!candidateRankByKey.has(key)) {
|
|
6077
|
+
candidateRankByKey.set(key, index + 1);
|
|
6078
|
+
}
|
|
6079
|
+
}
|
|
6080
|
+
|
|
6081
|
+
let literalMatchesInTopK = 0;
|
|
6082
|
+
for (let postRank = 0; postRank < deduped.length; postRank += 1) {
|
|
6083
|
+
const row = deduped[postRank];
|
|
6084
|
+
if (!row) {
|
|
6085
|
+
continue;
|
|
6086
|
+
}
|
|
6087
|
+
if (isExactLiteralReason(row.reason)) {
|
|
6088
|
+
literalMatchesInTopK += 1;
|
|
6089
|
+
}
|
|
6090
|
+
this.observability.metrics.increment("retrieval_reason_topk_total", 1, {
|
|
6091
|
+
retrieval_profile_id: this.scoringProfileId,
|
|
6092
|
+
reason: row.reason
|
|
6093
|
+
});
|
|
6094
|
+
const key = `${row.path}:${row.start_line}:${row.end_line}`;
|
|
6095
|
+
const preRank = candidateRankByKey.get(key) ?? postRank + 1;
|
|
6096
|
+
this.observability.metrics.observe("retrieval_rank_shift_delta", preRank - (postRank + 1), {
|
|
6097
|
+
retrieval_profile_id: this.scoringProfileId
|
|
6098
|
+
});
|
|
6099
|
+
}
|
|
6100
|
+
this.observability.metrics.observe("retrieval_literal_matches_topk", literalMatchesInTopK, {
|
|
6101
|
+
retrieval_profile_id: this.scoringProfileId
|
|
6102
|
+
});
|
|
6103
|
+
|
|
4031
6104
|
const output: SearchContextOutput = {
|
|
4032
6105
|
trace_id: input.trace_id,
|
|
4033
6106
|
results: deduped,
|
|
@@ -4055,6 +6128,127 @@ export class RetrievalCore {
|
|
|
4055
6128
|
return output;
|
|
4056
6129
|
}
|
|
4057
6130
|
|
|
6131
|
+
private enhancerProviderLabels(): Record<string, string> {
|
|
6132
|
+
return {
|
|
6133
|
+
provider: this.enhancerProviderDescriptor?.provider ?? "template",
|
|
6134
|
+
model: this.enhancerProviderDescriptor?.model ?? "n/a",
|
|
6135
|
+
tool_mode: this.enhancerGenerationConfig.tool_mode
|
|
6136
|
+
};
|
|
6137
|
+
}
|
|
6138
|
+
|
|
6139
|
+
private buildEnhancerContextSnippets(results: SearchContextOutput["results"]): EnhancerContextSnippet[] {
|
|
6140
|
+
const maxSnippets = this.enhancerGenerationConfig.max_context_snippets;
|
|
6141
|
+
const snippets: EnhancerContextSnippet[] = [];
|
|
6142
|
+
for (const result of results.slice(0, maxSnippets)) {
|
|
6143
|
+
snippets.push({
|
|
6144
|
+
path: result.path,
|
|
6145
|
+
start_line: result.start_line,
|
|
6146
|
+
end_line: result.end_line,
|
|
6147
|
+
reason: result.reason,
|
|
6148
|
+
snippet: result.snippet.slice(0, 1_600),
|
|
6149
|
+
score: result.score
|
|
6150
|
+
});
|
|
6151
|
+
}
|
|
6152
|
+
return snippets;
|
|
6153
|
+
}
|
|
6154
|
+
|
|
6155
|
+
private async generateEnhancedPrompt(input: {
|
|
6156
|
+
trace_id: string;
|
|
6157
|
+
tenant_id: string;
|
|
6158
|
+
workspace_id?: string;
|
|
6159
|
+
request: EnhancePromptInput;
|
|
6160
|
+
style_requested: EnhancePromptStyle;
|
|
6161
|
+
style_resolved: ResolvedEnhancerPromptStyle;
|
|
6162
|
+
intent: EnhancerIntent;
|
|
6163
|
+
query_intent: "symbol-heavy" | "impl-focused" | "conceptual";
|
|
6164
|
+
language: EnhancerOutputLanguage;
|
|
6165
|
+
context_refs: ContextRef[];
|
|
6166
|
+
context_snippets: EnhancerContextSnippet[];
|
|
6167
|
+
warnings: string[];
|
|
6168
|
+
questions: string[];
|
|
6169
|
+
}): Promise<string> {
|
|
6170
|
+
if (!this.enhancerProvider) {
|
|
6171
|
+
return formatEnhancedPrompt({
|
|
6172
|
+
style: input.style_resolved,
|
|
6173
|
+
language: input.language,
|
|
6174
|
+
original_prompt: input.request.prompt,
|
|
6175
|
+
refs: input.context_refs
|
|
6176
|
+
});
|
|
6177
|
+
}
|
|
6178
|
+
|
|
6179
|
+
const maxAttempts = this.enhancerGenerationConfig.max_retries + 1;
|
|
6180
|
+
let lastFailure: EnhancerProviderRequestError | undefined;
|
|
6181
|
+
for (let attempt = 1; attempt <= maxAttempts; attempt += 1) {
|
|
6182
|
+
const startedAt = Date.now();
|
|
6183
|
+
this.observability.metrics.increment("enhancer_provider_requests_total", 1, this.enhancerProviderLabels());
|
|
6184
|
+
try {
|
|
6185
|
+
const generated = await runWithInactivityTimeout({
|
|
6186
|
+
timeout_ms: this.enhancerGenerationConfig.timeout_ms,
|
|
6187
|
+
fn: ({ touch, signal }) =>
|
|
6188
|
+
this.enhancerProvider!.generate({
|
|
6189
|
+
trace_id: input.trace_id,
|
|
6190
|
+
tenant_id: input.tenant_id,
|
|
6191
|
+
workspace_id: input.workspace_id,
|
|
6192
|
+
request: input.request,
|
|
6193
|
+
style_requested: input.style_requested,
|
|
6194
|
+
style_resolved: input.style_resolved,
|
|
6195
|
+
intent: input.intent,
|
|
6196
|
+
query_intent: input.query_intent,
|
|
6197
|
+
language: input.language,
|
|
6198
|
+
context_refs: input.context_refs,
|
|
6199
|
+
context_snippets: input.context_snippets,
|
|
6200
|
+
warnings: input.warnings,
|
|
6201
|
+
questions: input.questions,
|
|
6202
|
+
tool_mode: this.enhancerGenerationConfig.tool_mode,
|
|
6203
|
+
abort_signal: signal,
|
|
6204
|
+
on_progress: touch
|
|
6205
|
+
})
|
|
6206
|
+
});
|
|
6207
|
+
this.observability.metrics.observe(
|
|
6208
|
+
"enhancer_provider_latency_ms",
|
|
6209
|
+
Date.now() - startedAt,
|
|
6210
|
+
this.enhancerProviderLabels()
|
|
6211
|
+
);
|
|
6212
|
+
const enhancedPrompt = normalizeProviderEnhancedPrompt(generated.enhanced_prompt);
|
|
6213
|
+
if (enhancedPrompt.length === 0) {
|
|
6214
|
+
throw new EnhancerProviderRequestError("invalid_response", "enhancer provider returned an empty enhanced_prompt");
|
|
6215
|
+
}
|
|
6216
|
+
return enhancedPrompt;
|
|
6217
|
+
} catch (error) {
|
|
6218
|
+
const reason = classifyEnhancerGenerationFailureReason(error);
|
|
6219
|
+
const failure =
|
|
6220
|
+
error instanceof EnhancerProviderRequestError ? error : new EnhancerProviderRequestError(reason, String(error));
|
|
6221
|
+
lastFailure = failure;
|
|
6222
|
+
this.observability.metrics.increment("enhancer_provider_failures_total", 1, {
|
|
6223
|
+
...this.enhancerProviderLabels(),
|
|
6224
|
+
reason
|
|
6225
|
+
});
|
|
6226
|
+
this.observability.logger.warn("enhancer provider generation failed", {
|
|
6227
|
+
trace_id: input.trace_id,
|
|
6228
|
+
attempt,
|
|
6229
|
+
max_attempts: maxAttempts,
|
|
6230
|
+
reason,
|
|
6231
|
+
retrying:
|
|
6232
|
+
attempt < maxAttempts &&
|
|
6233
|
+
reason !== "timeout" &&
|
|
6234
|
+
reason !== "schema_error" &&
|
|
6235
|
+
reason !== "invalid_response",
|
|
6236
|
+
style_requested: input.style_requested,
|
|
6237
|
+
style_resolved: input.style_resolved,
|
|
6238
|
+
provider: this.enhancerProviderDescriptor?.provider ?? "custom",
|
|
6239
|
+
model: this.enhancerProviderDescriptor?.model ?? "unknown",
|
|
6240
|
+
error: failure.message
|
|
6241
|
+
});
|
|
6242
|
+
if (reason === "timeout" || reason === "schema_error" || reason === "invalid_response") {
|
|
6243
|
+
break;
|
|
6244
|
+
}
|
|
6245
|
+
}
|
|
6246
|
+
}
|
|
6247
|
+
|
|
6248
|
+
const message = lastFailure?.message ?? "enhancer provider failed";
|
|
6249
|
+
throw new RetrievalError("UPSTREAM_FAILURE", `enhancer provider failed after retries: ${message}`);
|
|
6250
|
+
}
|
|
6251
|
+
|
|
4058
6252
|
async enhancePrompt(input: {
|
|
4059
6253
|
trace_id: string;
|
|
4060
6254
|
tenant_id: string;
|
|
@@ -4063,16 +6257,18 @@ export class RetrievalCore {
|
|
|
4063
6257
|
}): Promise<EnhancePromptOutput> {
|
|
4064
6258
|
const startedAt = Date.now();
|
|
4065
6259
|
const warnings: string[] = [];
|
|
4066
|
-
const questions: string[] = [];
|
|
4067
|
-
const addQuestion = (value: string): void => {
|
|
4068
|
-
if (!questions.includes(value)) {
|
|
4069
|
-
questions.push(value);
|
|
4070
|
-
}
|
|
4071
|
-
};
|
|
4072
6260
|
|
|
4073
6261
|
const intent = classifyIntent(input.request.prompt);
|
|
4074
6262
|
const queryIntent = classifyEnhancerQueryIntent(input.request.prompt, input.request.conversation_history);
|
|
4075
6263
|
const language = detectDominantLanguage(input.request.prompt, input.request.conversation_history);
|
|
6264
|
+
const style = resolveEnhancerPromptStyle({
|
|
6265
|
+
requested: input.request.style,
|
|
6266
|
+
intent,
|
|
6267
|
+
query_intent: queryIntent,
|
|
6268
|
+
prompt: input.request.prompt,
|
|
6269
|
+
history: input.request.conversation_history,
|
|
6270
|
+
has_context: Boolean(input.request.project_root_path && input.workspace_id)
|
|
6271
|
+
});
|
|
4076
6272
|
const negativePreferences = detectNegativePathPreferences(
|
|
4077
6273
|
`${input.request.prompt}\n${input.request.conversation_history.map((entry) => entry.content).join("\n")}`
|
|
4078
6274
|
);
|
|
@@ -4198,20 +6394,6 @@ export class RetrievalCore {
|
|
|
4198
6394
|
searchResults,
|
|
4199
6395
|
intentPolicy.max_candidates_per_directory_pre_rerank
|
|
4200
6396
|
).slice(0, intentPolicy.max_candidates_pre_rerank);
|
|
4201
|
-
|
|
4202
|
-
const symbolCandidates = extractLikelyCodeSymbols(
|
|
4203
|
-
`${input.request.prompt}\n${input.request.conversation_history.map((entry) => entry.content).join("\n")}`,
|
|
4204
|
-
3
|
|
4205
|
-
);
|
|
4206
|
-
if (confidenceSignals.failed_signals.includes("score_spread")) {
|
|
4207
|
-
addQuestion(localizeLowConfidenceQuestion({ language, kind: "scope" }));
|
|
4208
|
-
}
|
|
4209
|
-
if (confidenceSignals.failed_signals.includes("token_overlap")) {
|
|
4210
|
-
addQuestion(localizeLowConfidenceQuestion({ language, kind: "symbol", symbol: symbolCandidates[0] }));
|
|
4211
|
-
}
|
|
4212
|
-
if (confidenceSignals.failed_signals.includes("path_diversity")) {
|
|
4213
|
-
addQuestion(localizeLowConfidenceQuestion({ language, kind: "source_priority" }));
|
|
4214
|
-
}
|
|
4215
6397
|
} else {
|
|
4216
6398
|
searchResults = dedupeEnhancerCandidatesByPath(searchResults);
|
|
4217
6399
|
searchResults = collapseEnhancerCandidatesByDirectory(
|
|
@@ -4221,6 +6403,9 @@ export class RetrievalCore {
|
|
|
4221
6403
|
}
|
|
4222
6404
|
candidateCountPostRerank = searchResults.length;
|
|
4223
6405
|
} catch (error) {
|
|
6406
|
+
if (error instanceof RetrievalError && error.code === "RATE_LIMITED") {
|
|
6407
|
+
throw error;
|
|
6408
|
+
}
|
|
4224
6409
|
warnings.push("Context retrieval unavailable; enhancement generated with limited confidence.");
|
|
4225
6410
|
fallbackTriggered = true;
|
|
4226
6411
|
fallbackReason = "context_retrieval_unavailable";
|
|
@@ -4231,16 +6416,6 @@ export class RetrievalCore {
|
|
|
4231
6416
|
}
|
|
4232
6417
|
}
|
|
4233
6418
|
|
|
4234
|
-
if (intent === "unknown") {
|
|
4235
|
-
addQuestion(
|
|
4236
|
-
language === "es"
|
|
4237
|
-
? "¿Cuál es el resultado esperado exacto y el alcance del cambio?"
|
|
4238
|
-
: language === "zh"
|
|
4239
|
-
? "这次变更的精确目标和范围是什么?"
|
|
4240
|
-
: "What exact outcome and scope should this change target?"
|
|
4241
|
-
);
|
|
4242
|
-
}
|
|
4243
|
-
|
|
4244
6419
|
const contextRefs: ContextRef[] = searchResults.map((result) => ({
|
|
4245
6420
|
path: result.path,
|
|
4246
6421
|
start_line: result.start_line,
|
|
@@ -4248,19 +6423,29 @@ export class RetrievalCore {
|
|
|
4248
6423
|
reason: result.reason
|
|
4249
6424
|
}));
|
|
4250
6425
|
|
|
4251
|
-
const
|
|
6426
|
+
const contextSnippets = this.buildEnhancerContextSnippets(searchResults);
|
|
6427
|
+
const enhancedPrompt = await this.generateEnhancedPrompt({
|
|
6428
|
+
trace_id: input.trace_id,
|
|
6429
|
+
tenant_id: input.tenant_id,
|
|
6430
|
+
workspace_id: input.workspace_id,
|
|
6431
|
+
request: input.request,
|
|
6432
|
+
style_requested: style.requested,
|
|
6433
|
+
style_resolved: style.resolved,
|
|
4252
6434
|
intent,
|
|
6435
|
+
query_intent: queryIntent,
|
|
4253
6436
|
language,
|
|
4254
|
-
|
|
4255
|
-
|
|
6437
|
+
context_refs: contextRefs,
|
|
6438
|
+
context_snippets: contextSnippets,
|
|
6439
|
+
warnings: [],
|
|
6440
|
+
questions: []
|
|
4256
6441
|
});
|
|
4257
6442
|
|
|
4258
6443
|
const output: EnhancePromptOutput = {
|
|
4259
6444
|
trace_id: input.trace_id,
|
|
4260
6445
|
enhanced_prompt: enhancedPrompt,
|
|
4261
6446
|
context_refs: contextRefs,
|
|
4262
|
-
warnings,
|
|
4263
|
-
questions
|
|
6447
|
+
warnings: [],
|
|
6448
|
+
questions: []
|
|
4264
6449
|
};
|
|
4265
6450
|
|
|
4266
6451
|
const latency_ms = Date.now() - startedAt;
|
|
@@ -4310,6 +6495,11 @@ export class RetrievalCore {
|
|
|
4310
6495
|
fallback_triggered: fallbackTriggered,
|
|
4311
6496
|
fallback_reason: fallbackReason,
|
|
4312
6497
|
query_intent: queryIntent,
|
|
6498
|
+
style_requested: style.requested,
|
|
6499
|
+
style_resolved: style.resolved,
|
|
6500
|
+
enhancer_provider: this.enhancerProviderDescriptor?.provider ?? "template",
|
|
6501
|
+
enhancer_model: this.enhancerProviderDescriptor?.model ?? null,
|
|
6502
|
+
enhancer_tool_mode: this.enhancerGenerationConfig.tool_mode,
|
|
4313
6503
|
confidence_score_spread: confidenceSignals?.score_spread ?? null,
|
|
4314
6504
|
confidence_token_overlap: confidenceSignals?.token_overlap ?? null,
|
|
4315
6505
|
confidence_path_diversity: confidenceSignals?.path_diversity ?? null,
|