@juspay/neurolink 9.54.1 → 9.54.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +12 -0
- package/dist/browser/neurolink.min.js +288 -288
- package/dist/cli/factories/commandFactory.js +43 -4
- package/dist/cli/utils/abortHandler.d.ts +22 -0
- package/dist/cli/utils/abortHandler.js +53 -0
- package/dist/core/baseProvider.d.ts +7 -1
- package/dist/core/baseProvider.js +19 -0
- package/dist/lib/core/baseProvider.d.ts +7 -1
- package/dist/lib/core/baseProvider.js +19 -0
- package/dist/lib/neurolink.js +17 -1
- package/dist/lib/providers/anthropic.js +1 -0
- package/dist/lib/providers/anthropicBaseProvider.js +1 -0
- package/dist/lib/providers/azureOpenai.js +1 -0
- package/dist/lib/providers/googleAiStudio.js +1 -0
- package/dist/lib/providers/googleVertex.d.ts +14 -0
- package/dist/lib/providers/googleVertex.js +51 -12
- package/dist/lib/providers/huggingFace.js +1 -0
- package/dist/lib/providers/litellm.js +1 -0
- package/dist/lib/providers/mistral.js +1 -0
- package/dist/lib/providers/openAI.js +1 -0
- package/dist/lib/providers/openRouter.js +1 -0
- package/dist/lib/providers/openaiCompatible.js +1 -0
- package/dist/lib/proxy/routingPolicy.d.ts +27 -17
- package/dist/lib/proxy/routingPolicy.js +53 -209
- package/dist/lib/server/routes/claudeProxyRoutes.js +35 -73
- package/dist/lib/types/proxyTypes.d.ts +9 -50
- package/dist/lib/types/streamTypes.d.ts +6 -0
- package/dist/lib/utils/messageBuilder.js +39 -6
- package/dist/lib/utils/toolCallRepair.d.ts +21 -0
- package/dist/lib/utils/toolCallRepair.js +298 -0
- package/dist/neurolink.js +17 -1
- package/dist/providers/anthropic.js +1 -0
- package/dist/providers/anthropicBaseProvider.js +1 -0
- package/dist/providers/azureOpenai.js +1 -0
- package/dist/providers/googleAiStudio.js +1 -0
- package/dist/providers/googleVertex.d.ts +14 -0
- package/dist/providers/googleVertex.js +51 -12
- package/dist/providers/huggingFace.js +1 -0
- package/dist/providers/litellm.js +1 -0
- package/dist/providers/mistral.js +1 -0
- package/dist/providers/openAI.js +1 -0
- package/dist/providers/openRouter.js +1 -0
- package/dist/providers/openaiCompatible.js +1 -0
- package/dist/proxy/routingPolicy.d.ts +27 -17
- package/dist/proxy/routingPolicy.js +53 -209
- package/dist/server/routes/claudeProxyRoutes.js +35 -73
- package/dist/types/proxyTypes.d.ts +9 -50
- package/dist/types/streamTypes.d.ts +6 -0
- package/dist/utils/messageBuilder.js +39 -6
- package/dist/utils/toolCallRepair.d.ts +21 -0
- package/dist/utils/toolCallRepair.js +297 -0
- package/package.json +1 -1
|
@@ -1,9 +1,4 @@
|
|
|
1
|
-
const STREAMING_CONVERSATIONAL_TOOL_THRESHOLD = 4;
|
|
2
|
-
const STRONG_TOOL_FIDELITY_THRESHOLD = 8;
|
|
3
|
-
const HIGH_TOOL_COUNT_THRESHOLD = 24;
|
|
4
1
|
const DEFAULT_COOLDOWN_FLOOR_MS = 1_000;
|
|
5
|
-
const HIGH_TOOL_COUNT_COOLDOWN_FLOOR_MS = 10_000;
|
|
6
|
-
const HIGH_FIDELITY_COOLDOWN_FLOOR_MS = 300_000;
|
|
7
2
|
export function inferClaudeProxyModelTier(modelName) {
|
|
8
3
|
const normalized = modelName.toLowerCase();
|
|
9
4
|
if (normalized.includes("opus")) {
|
|
@@ -17,101 +12,13 @@ export function inferClaudeProxyModelTier(modelName) {
|
|
|
17
12
|
}
|
|
18
13
|
return "other";
|
|
19
14
|
}
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
export function
|
|
27
|
-
const toolCount = Object.keys(parsed.tools).length;
|
|
28
|
-
const hasImages = parsed.images.length > 0;
|
|
29
|
-
const hasThinking = !!parsed.thinkingConfig?.enabled;
|
|
30
|
-
const hasToolHistory = detectToolHistory(parsed);
|
|
31
|
-
const requiresSpecificTool = !!parsed.toolChoiceName;
|
|
32
|
-
const requiresToolUse = parsed.toolChoice === "required" || requiresSpecificTool || hasToolHistory;
|
|
33
|
-
const requiresStrongToolFidelity = toolCount >= STRONG_TOOL_FIDELITY_THRESHOLD ||
|
|
34
|
-
requiresSpecificTool ||
|
|
35
|
-
hasToolHistory;
|
|
36
|
-
const isHighToolCountNonStream = !parsed.stream && toolCount >= HIGH_TOOL_COUNT_THRESHOLD;
|
|
37
|
-
const isStreamingConversational = parsed.stream &&
|
|
38
|
-
!hasImages &&
|
|
39
|
-
toolCount <= STREAMING_CONVERSATIONAL_TOOL_THRESHOLD &&
|
|
40
|
-
!requiresStrongToolFidelity;
|
|
41
|
-
const classes = [];
|
|
42
|
-
if (hasImages) {
|
|
43
|
-
classes.push("multimodal");
|
|
44
|
-
}
|
|
45
|
-
if (isHighToolCountNonStream) {
|
|
46
|
-
classes.push("high-tool-count-non-stream-structured");
|
|
47
|
-
}
|
|
48
|
-
if (requiresStrongToolFidelity) {
|
|
49
|
-
classes.push("strong-tool-fidelity");
|
|
50
|
-
}
|
|
51
|
-
if (isStreamingConversational) {
|
|
52
|
-
classes.push("streaming-conversational");
|
|
53
|
-
}
|
|
54
|
-
if (classes.length === 0) {
|
|
55
|
-
classes.push("standard");
|
|
56
|
-
}
|
|
57
|
-
return {
|
|
58
|
-
requestedModel,
|
|
59
|
-
modelTier: inferClaudeProxyModelTier(requestedModel),
|
|
60
|
-
primaryClass: classes[0],
|
|
61
|
-
classes,
|
|
62
|
-
stream: parsed.stream,
|
|
63
|
-
toolCount,
|
|
64
|
-
hasImages,
|
|
65
|
-
hasThinking,
|
|
66
|
-
hasToolHistory,
|
|
67
|
-
requiresToolUse,
|
|
68
|
-
requiresSpecificTool,
|
|
69
|
-
requiresStrongToolFidelity,
|
|
70
|
-
isHighToolCountNonStream,
|
|
71
|
-
isStreamingConversational,
|
|
72
|
-
isMultimodal: hasImages,
|
|
73
|
-
};
|
|
74
|
-
}
|
|
75
|
-
export function getRequestClassCooldownKey(profile) {
|
|
76
|
-
return `${profile.primaryClass}:${profile.requestedModel.toLowerCase()}`;
|
|
77
|
-
}
|
|
78
|
-
export function getModelTierCooldownKey(profile) {
|
|
79
|
-
return profile.modelTier;
|
|
80
|
-
}
|
|
81
|
-
function getQualityGuardReason(profile, provider, _model) {
|
|
82
|
-
// Only gate auto-provider fallback (no explicit provider).
|
|
83
|
-
// Configured fallback-chain entries are always allowed through —
|
|
84
|
-
// let them attempt the request and fail naturally if the provider
|
|
85
|
-
// cannot handle it.
|
|
86
|
-
if (!provider) {
|
|
87
|
-
if (profile.modelTier === "opus" ||
|
|
88
|
-
profile.requiresStrongToolFidelity ||
|
|
89
|
-
profile.isHighToolCountNonStream) {
|
|
90
|
-
return "auto-provider fallback is disabled for requests that require contract preservation";
|
|
91
|
-
}
|
|
92
|
-
return null;
|
|
93
|
-
}
|
|
94
|
-
return null;
|
|
95
|
-
}
|
|
96
|
-
export function evaluateFallbackEligibility(profile, candidate) {
|
|
97
|
-
const policyBlockReason = getQualityGuardReason(profile, candidate.provider, candidate.model);
|
|
98
|
-
if (policyBlockReason) {
|
|
99
|
-
return {
|
|
100
|
-
provider: candidate.provider,
|
|
101
|
-
model: candidate.model,
|
|
102
|
-
eligible: false,
|
|
103
|
-
reason: policyBlockReason,
|
|
104
|
-
};
|
|
105
|
-
}
|
|
106
|
-
return {
|
|
107
|
-
provider: candidate.provider,
|
|
108
|
-
model: candidate.model,
|
|
109
|
-
eligible: true,
|
|
110
|
-
reason: "eligible",
|
|
111
|
-
};
|
|
112
|
-
}
|
|
113
|
-
export function buildProxyTranslationPlan(primary, fallbackChain, requestedModel, parsed) {
|
|
114
|
-
const profile = classifyClaudeProxyRequest(requestedModel, parsed);
|
|
15
|
+
/**
|
|
16
|
+
* Build a translation plan for a Claude-compatible proxy request.
|
|
17
|
+
* The plan lists the primary provider followed by eligible fallback targets.
|
|
18
|
+
* All configured fallback entries are always eligible — no contract-based gating.
|
|
19
|
+
* When no fallback chain is configured, an "auto-provider" entry is appended.
|
|
20
|
+
*/
|
|
21
|
+
export function buildProxyTranslationPlan(primary, fallbackChain, requestedModel, _parsed) {
|
|
115
22
|
const attempts = [
|
|
116
23
|
{
|
|
117
24
|
provider: primary.provider,
|
|
@@ -119,142 +26,79 @@ export function buildProxyTranslationPlan(primary, fallbackChain, requestedModel
|
|
|
119
26
|
label: `${primary.provider}/${primary.model ?? "unknown"}`,
|
|
120
27
|
},
|
|
121
28
|
];
|
|
122
|
-
const skipped = [];
|
|
123
29
|
for (const fallback of fallbackChain) {
|
|
124
30
|
if (fallback.provider === primary.provider &&
|
|
125
31
|
fallback.model === primary.model) {
|
|
126
32
|
continue;
|
|
127
33
|
}
|
|
128
|
-
const decision = evaluateFallbackEligibility(profile, fallback);
|
|
129
|
-
if (!decision.eligible) {
|
|
130
|
-
skipped.push(decision);
|
|
131
|
-
continue;
|
|
132
|
-
}
|
|
133
34
|
attempts.push({
|
|
134
35
|
provider: fallback.provider,
|
|
135
36
|
model: fallback.model,
|
|
136
37
|
label: `${fallback.provider}/${fallback.model}`,
|
|
137
38
|
});
|
|
138
39
|
}
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
}
|
|
144
|
-
else {
|
|
145
|
-
skipped.push(autoDecision);
|
|
146
|
-
}
|
|
40
|
+
// Append auto-provider when no configured fallback chain exists,
|
|
41
|
+
// or when all configured entries were deduped (same as primary).
|
|
42
|
+
if (fallbackChain.length === 0 || attempts.length === 1) {
|
|
43
|
+
attempts.push({ label: "auto-provider" });
|
|
147
44
|
}
|
|
148
45
|
return {
|
|
149
|
-
|
|
46
|
+
requestedModel,
|
|
47
|
+
modelTier: inferClaudeProxyModelTier(requestedModel),
|
|
150
48
|
attempts,
|
|
151
|
-
skipped,
|
|
49
|
+
skipped: [],
|
|
152
50
|
};
|
|
153
51
|
}
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
return
|
|
164
|
-
})
|
|
165
|
-
.join("; ");
|
|
166
|
-
return `Fallback policy preserved the requested ${plan.profile.primaryClass} contract by skipping ineligible targets. ${summary}`;
|
|
167
|
-
}
|
|
168
|
-
export function getActiveCooldownScope(state, profile, now = Date.now()) {
|
|
169
|
-
let longest = null;
|
|
170
|
-
const requestClassKey = getRequestClassCooldownKey(profile);
|
|
171
|
-
const requestClassUntil = state.requestClassCooldowns?.[requestClassKey] ?? undefined;
|
|
172
|
-
if (requestClassUntil && requestClassUntil > now) {
|
|
173
|
-
longest = {
|
|
174
|
-
scope: "request_class",
|
|
175
|
-
key: requestClassKey,
|
|
176
|
-
until: requestClassUntil,
|
|
177
|
-
};
|
|
178
|
-
}
|
|
179
|
-
const modelTierKey = getModelTierCooldownKey(profile);
|
|
180
|
-
const modelTierUntil = state.modelTierCooldowns?.[modelTierKey] ?? undefined;
|
|
181
|
-
if (modelTierUntil &&
|
|
182
|
-
modelTierUntil > now &&
|
|
183
|
-
modelTierUntil > (longest?.until ?? 0)) {
|
|
184
|
-
longest = {
|
|
185
|
-
scope: "model_tier",
|
|
186
|
-
key: modelTierKey,
|
|
187
|
-
until: modelTierUntil,
|
|
188
|
-
};
|
|
189
|
-
}
|
|
190
|
-
if (state.coolingUntil &&
|
|
191
|
-
state.coolingUntil > now &&
|
|
192
|
-
state.coolingUntil > (longest?.until ?? 0)) {
|
|
193
|
-
longest = {
|
|
194
|
-
scope: "generic",
|
|
195
|
-
key: "generic",
|
|
196
|
-
until: state.coolingUntil,
|
|
197
|
-
};
|
|
52
|
+
// ---------------------------------------------------------------------------
|
|
53
|
+
// Simple per-account cooldown
|
|
54
|
+
// ---------------------------------------------------------------------------
|
|
55
|
+
/**
|
|
56
|
+
* Check whether an account is currently cooling down.
|
|
57
|
+
* Returns the cooldown timestamp if active, null otherwise.
|
|
58
|
+
*/
|
|
59
|
+
export function getAccountCooldownUntil(state, now = Date.now()) {
|
|
60
|
+
if (state.coolingUntil && state.coolingUntil > now) {
|
|
61
|
+
return state.coolingUntil;
|
|
198
62
|
}
|
|
199
|
-
return
|
|
63
|
+
return null;
|
|
200
64
|
}
|
|
201
|
-
|
|
65
|
+
/**
|
|
66
|
+
* Partition accounts into eligible (no cooldown) and skipped (cooling down).
|
|
67
|
+
*/
|
|
68
|
+
export function partitionAccountsByCooldown(accounts, getState, now = Date.now()) {
|
|
202
69
|
const eligible = [];
|
|
203
70
|
const skipped = [];
|
|
204
71
|
for (const account of accounts) {
|
|
205
|
-
const
|
|
206
|
-
|
|
207
|
-
|
|
72
|
+
const state = getState(account);
|
|
73
|
+
const until = getAccountCooldownUntil(state, now);
|
|
74
|
+
if (until !== null) {
|
|
75
|
+
skipped.push({
|
|
76
|
+
account,
|
|
77
|
+
cooldown: { until, backoffLevel: state.backoffLevel },
|
|
78
|
+
});
|
|
208
79
|
continue;
|
|
209
80
|
}
|
|
210
81
|
eligible.push(account);
|
|
211
82
|
}
|
|
212
|
-
return {
|
|
213
|
-
eligible,
|
|
214
|
-
skipped,
|
|
215
|
-
};
|
|
83
|
+
return { eligible, skipped };
|
|
216
84
|
}
|
|
217
|
-
|
|
85
|
+
/**
|
|
86
|
+
* Apply a rate-limit cooldown to an account.
|
|
87
|
+
* Uses simple exponential backoff with a floor and cap.
|
|
88
|
+
*/
|
|
89
|
+
export function applyRateLimitCooldown(args) {
|
|
218
90
|
const now = args.now ?? Date.now();
|
|
219
|
-
const
|
|
220
|
-
const
|
|
221
|
-
|
|
222
|
-
const mtBackoffLevels = args.state.modelTierBackoffLevels ?? {};
|
|
223
|
-
const scopedBackoffLevel = Math.max(rcBackoffLevels[requestClassKey] ?? 0, mtBackoffLevels[modelTierKey] ?? 0);
|
|
224
|
-
// High-tool-count-non-stream gets its own (lower) floor so that requests
|
|
225
|
-
// recover faster once proper OAuth betas are forwarded. Check it first
|
|
226
|
-
// because every >=24-tool request also satisfies requiresStrongToolFidelity
|
|
227
|
-
// (threshold 8), which would otherwise shadow this branch.
|
|
228
|
-
const floorMs = args.profile.isHighToolCountNonStream
|
|
229
|
-
? HIGH_TOOL_COUNT_COOLDOWN_FLOOR_MS
|
|
230
|
-
: args.profile.modelTier === "opus" ||
|
|
231
|
-
args.profile.requiresStrongToolFidelity
|
|
232
|
-
? HIGH_FIDELITY_COOLDOWN_FLOOR_MS
|
|
233
|
-
: DEFAULT_COOLDOWN_FLOOR_MS;
|
|
234
|
-
const baseCooldownMs = Math.max(args.retryAfterMs ?? 0, floorMs);
|
|
235
|
-
const backoffMs = Math.min(baseCooldownMs * 2 ** scopedBackoffLevel, args.capMs);
|
|
236
|
-
const until = now + backoffMs;
|
|
237
|
-
args.state.requestClassCooldowns = {
|
|
238
|
-
...(args.state.requestClassCooldowns ?? {}),
|
|
239
|
-
[requestClassKey]: Math.max(args.state.requestClassCooldowns?.[requestClassKey] ?? 0, until),
|
|
240
|
-
};
|
|
241
|
-
args.state.modelTierCooldowns = {
|
|
242
|
-
...(args.state.modelTierCooldowns ?? {}),
|
|
243
|
-
[modelTierKey]: Math.max(args.state.modelTierCooldowns?.[modelTierKey] ?? 0, until),
|
|
244
|
-
};
|
|
245
|
-
args.state.requestClassBackoffLevels = {
|
|
246
|
-
...rcBackoffLevels,
|
|
247
|
-
[requestClassKey]: (rcBackoffLevels[requestClassKey] ?? 0) + 1,
|
|
248
|
-
};
|
|
249
|
-
args.state.modelTierBackoffLevels = {
|
|
250
|
-
...mtBackoffLevels,
|
|
251
|
-
[modelTierKey]: (mtBackoffLevels[modelTierKey] ?? 0) + 1,
|
|
252
|
-
};
|
|
91
|
+
const baseCooldownMs = Math.max(args.retryAfterMs ?? 0, DEFAULT_COOLDOWN_FLOOR_MS);
|
|
92
|
+
const backoffMs = Math.min(baseCooldownMs * 2 ** args.state.backoffLevel, args.capMs);
|
|
93
|
+
args.state.coolingUntil = now + backoffMs;
|
|
253
94
|
args.state.backoffLevel += 1;
|
|
254
|
-
return {
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
95
|
+
return { backoffMs };
|
|
96
|
+
}
|
|
97
|
+
/**
|
|
98
|
+
* Clear cooldown state for an account after a successful request.
|
|
99
|
+
*/
|
|
100
|
+
export function clearAccountCooldown(state) {
|
|
101
|
+
state.coolingUntil = undefined;
|
|
102
|
+
state.backoffLevel = 0;
|
|
259
103
|
}
|
|
260
104
|
//# sourceMappingURL=routingPolicy.js.map
|
|
@@ -20,7 +20,7 @@ import { createRawStreamCapture } from "../../proxy/rawStreamCapture.js";
|
|
|
20
20
|
import { logBodyCapture, logRequest, logRequestAttempt, logStreamError, } from "../../proxy/requestLogger.js";
|
|
21
21
|
import { createSSEInterceptor } from "../../proxy/sseInterceptor.js";
|
|
22
22
|
import { needsRefresh, persistTokens, refreshToken, } from "../../proxy/tokenRefresh.js";
|
|
23
|
-
import {
|
|
23
|
+
import { applyRateLimitCooldown, buildProxyTranslationPlan, clearAccountCooldown, getAccountCooldownUntil, partitionAccountsByCooldown, } from "../../proxy/routingPolicy.js";
|
|
24
24
|
import { writeJsonSnapshotAtomically } from "../../proxy/snapshotPersistence.js";
|
|
25
25
|
import { recordAttempt, recordAttemptError, recordCooldown, recordFinalError, recordFinalSuccess, } from "../../proxy/usageStats.js";
|
|
26
26
|
import { logger } from "../../utils/logger.js";
|
|
@@ -440,9 +440,7 @@ function logProxyRoutingPlan(logProxyBody, stage, plan) {
|
|
|
440
440
|
contentType: "application/json",
|
|
441
441
|
body: {
|
|
442
442
|
stage,
|
|
443
|
-
requestProfile: plan.profile,
|
|
444
443
|
attempts: plan.attempts,
|
|
445
|
-
skipped: plan.skipped,
|
|
446
444
|
},
|
|
447
445
|
});
|
|
448
446
|
}
|
|
@@ -1347,30 +1345,21 @@ async function executeClaudeFallbackTranslation(args) {
|
|
|
1347
1345
|
return clientResponse;
|
|
1348
1346
|
}
|
|
1349
1347
|
async function tryConfiguredClaudeFallbackChain(args) {
|
|
1350
|
-
const { ctx, body, parsedFallbackRequest,
|
|
1348
|
+
const { ctx, body, parsedFallbackRequest, modelRouter, tracer, requestStartTime, logProxyBody, logFinalRequest, } = args;
|
|
1351
1349
|
const chain = modelRouter?.getFallbackChain() ?? [];
|
|
1352
1350
|
const fallbackPlan = buildProxyTranslationPlan({ provider: "anthropic", model: body.model }, chain, body.model, parsedFallbackRequest);
|
|
1353
|
-
const fallbackPolicyReason = summarizeSkippedFallbacks(fallbackPlan);
|
|
1354
1351
|
logProxyBody({
|
|
1355
1352
|
phase: "routing_decision",
|
|
1356
1353
|
contentType: "application/json",
|
|
1357
1354
|
body: {
|
|
1358
1355
|
stage: "anthropic_fallback",
|
|
1359
|
-
requestProfile,
|
|
1360
1356
|
attempts: fallbackPlan.attempts.slice(1),
|
|
1361
|
-
skipped: fallbackPlan.skipped,
|
|
1362
1357
|
},
|
|
1363
1358
|
});
|
|
1364
|
-
for (const skipped of fallbackPlan.skipped) {
|
|
1365
|
-
const label = skipped.provider
|
|
1366
|
-
? `${skipped.provider}/${skipped.model ?? "unknown"}`
|
|
1367
|
-
: "auto-provider";
|
|
1368
|
-
logger.always(`[proxy] skipping fallback ${label}: ${skipped.reason}`);
|
|
1369
|
-
}
|
|
1370
1359
|
tracer?.setFallbackInfo({
|
|
1371
1360
|
triggered: true,
|
|
1372
1361
|
attemptCount: fallbackPlan.attempts.slice(1).length,
|
|
1373
|
-
reason:
|
|
1362
|
+
reason: "all_anthropic_accounts_exhausted",
|
|
1374
1363
|
});
|
|
1375
1364
|
for (const fallback of fallbackPlan.attempts.slice(1)) {
|
|
1376
1365
|
if (!fallback.provider || !fallback.model) {
|
|
@@ -1410,10 +1399,7 @@ async function tryConfiguredClaudeFallbackChain(args) {
|
|
|
1410
1399
|
attemptCount: fallbackPlan.attempts.slice(1).length,
|
|
1411
1400
|
reason: "fallback_success",
|
|
1412
1401
|
});
|
|
1413
|
-
return {
|
|
1414
|
-
response,
|
|
1415
|
-
fallbackPolicyReason,
|
|
1416
|
-
};
|
|
1402
|
+
return { response };
|
|
1417
1403
|
}
|
|
1418
1404
|
catch (fallbackErr) {
|
|
1419
1405
|
const errMsg = fallbackErr instanceof Error
|
|
@@ -1450,10 +1436,7 @@ async function tryConfiguredClaudeFallbackChain(args) {
|
|
|
1450
1436
|
});
|
|
1451
1437
|
}
|
|
1452
1438
|
}
|
|
1453
|
-
return {
|
|
1454
|
-
response: null,
|
|
1455
|
-
fallbackPolicyReason,
|
|
1456
|
-
};
|
|
1439
|
+
return { response: null };
|
|
1457
1440
|
}
|
|
1458
1441
|
async function tryAutoClaudeFallback(args) {
|
|
1459
1442
|
const { ctx, body, tracer, requestStartTime, logProxyBody, logFinalRequest } = args;
|
|
@@ -1484,7 +1467,7 @@ async function tryAutoClaudeFallback(args) {
|
|
|
1484
1467
|
}
|
|
1485
1468
|
}
|
|
1486
1469
|
function buildClaudeAnthropicFailureResponse(args) {
|
|
1487
|
-
const { tracer, requestStartTime, authFailureMessage, invalidRequestFailure, sawNetworkError, sawTransientFailure, sawRateLimit, lastError, orderedAccounts,
|
|
1470
|
+
const { tracer, requestStartTime, authFailureMessage, invalidRequestFailure, sawNetworkError, sawTransientFailure, sawRateLimit, lastError, orderedAccounts, buildLoggedClaudeError, logProxyBody, logFinalRequest, } = args;
|
|
1488
1471
|
if (authFailureMessage && !sawRateLimit) {
|
|
1489
1472
|
tracer?.setError("authentication_error", authFailureMessage);
|
|
1490
1473
|
tracer?.end(401, Date.now() - requestStartTime);
|
|
@@ -1531,21 +1514,21 @@ function buildClaudeAnthropicFailureResponse(args) {
|
|
|
1531
1514
|
return buildLoggedClaudeError(502, msg);
|
|
1532
1515
|
}
|
|
1533
1516
|
const earliestRecovery = orderedAccounts.reduce((min, account) => {
|
|
1534
|
-
const
|
|
1535
|
-
return
|
|
1517
|
+
const until = getAccountCooldownUntil(getOrCreateRuntimeState(account.key));
|
|
1518
|
+
return until !== null ? Math.min(min, until) : min;
|
|
1536
1519
|
}, Infinity);
|
|
1520
|
+
// If no active cooldown remains (expired while retries ran), use 1s
|
|
1521
|
+
// instead of fabricating a long retry-after.
|
|
1537
1522
|
const retryAfterSec = Number.isFinite(earliestRecovery)
|
|
1538
1523
|
? Math.max(1, Math.ceil((earliestRecovery - Date.now()) / 1000))
|
|
1539
|
-
:
|
|
1540
|
-
const
|
|
1541
|
-
|
|
1542
|
-
|
|
1543
|
-
|
|
1544
|
-
const errorBody = buildClaudeError(429, `All accounts rate-limited. Earliest recovery in ${retryAfterSec}s.${contractMessage}`, "overloaded_error");
|
|
1545
|
-
tracer?.setError("rate_limit_error", `All accounts rate-limited. Retry in ${retryAfterSec}s.${contractMessage}`);
|
|
1524
|
+
: 1;
|
|
1525
|
+
const errorMessage = `All accounts rate-limited. Earliest recovery in ${retryAfterSec}s.`;
|
|
1526
|
+
logger.always(`[proxy] all accounts rate-limited, retry in ${retryAfterSec}s`);
|
|
1527
|
+
const errorBody = buildClaudeError(429, errorMessage, "overloaded_error");
|
|
1528
|
+
tracer?.setError("rate_limit_error", errorMessage);
|
|
1546
1529
|
tracer?.end(429, Date.now() - requestStartTime);
|
|
1547
1530
|
recordFinalError(429);
|
|
1548
|
-
logFinalRequest(429, "", "final", "rate_limit_error",
|
|
1531
|
+
logFinalRequest(429, "", "final", "rate_limit_error", errorMessage);
|
|
1549
1532
|
const errorBodyText = JSON.stringify(errorBody);
|
|
1550
1533
|
logProxyBody({
|
|
1551
1534
|
phase: "client_response",
|
|
@@ -1568,22 +1551,9 @@ function buildClaudeAnthropicFailureResponse(args) {
|
|
|
1568
1551
|
});
|
|
1569
1552
|
}
|
|
1570
1553
|
async function handleAnthropicSuccessfulResponse(args) {
|
|
1571
|
-
const { ctx, body, account, accountState,
|
|
1572
|
-
accountState
|
|
1573
|
-
accountState.coolingUntil = undefined;
|
|
1554
|
+
const { ctx, body, account, accountState, response, tracer, requestStartTime, fetchStartMs, attemptNumber, finalBodyStr, upstreamSpan, logProxyBody, logFinalRequest, } = args;
|
|
1555
|
+
clearAccountCooldown(accountState);
|
|
1574
1556
|
accountState.consecutiveRefreshFailures = 0;
|
|
1575
|
-
if (accountState.requestClassCooldowns) {
|
|
1576
|
-
delete accountState.requestClassCooldowns[`${requestProfile.primaryClass}:${requestProfile.requestedModel.toLowerCase()}`];
|
|
1577
|
-
}
|
|
1578
|
-
if (accountState.modelTierCooldowns) {
|
|
1579
|
-
delete accountState.modelTierCooldowns[requestProfile.modelTier];
|
|
1580
|
-
}
|
|
1581
|
-
if (accountState.requestClassBackoffLevels) {
|
|
1582
|
-
delete accountState.requestClassBackoffLevels[`${requestProfile.primaryClass}:${requestProfile.requestedModel.toLowerCase()}`];
|
|
1583
|
-
}
|
|
1584
|
-
if (accountState.modelTierBackoffLevels) {
|
|
1585
|
-
delete accountState.modelTierBackoffLevels[requestProfile.modelTier];
|
|
1586
|
-
}
|
|
1587
1557
|
logger.always(`[proxy] ← ${response.status} account=${account.label}`);
|
|
1588
1558
|
const quota = parseQuotaHeaders(response.headers);
|
|
1589
1559
|
if (quota) {
|
|
@@ -2173,7 +2143,7 @@ async function handleAnthropicSuccessfulRetryResponse(args) {
|
|
|
2173
2143
|
return retryJson;
|
|
2174
2144
|
}
|
|
2175
2145
|
async function handleAnthropicAuthRetry(args) {
|
|
2176
|
-
const { ctx, body, account, accountState,
|
|
2146
|
+
const { ctx, body, account, accountState, headers, buildUpstreamBody, enabledAccounts, orderedAccounts, response: _response, tracer, requestStartTime, fetchStartMs, attemptNumber, finalBodyStr, upstreamSpan, logAttempt, logProxyBody, logFinalRequest, lastError, authFailureMessage, sawRateLimit, sawTransientFailure, sawNetworkError, } = args;
|
|
2177
2147
|
recordAttemptError(account.label, account.type, 401);
|
|
2178
2148
|
let currentLastError = lastError;
|
|
2179
2149
|
let currentAuthFailureMessage = authFailureMessage;
|
|
@@ -2257,9 +2227,8 @@ async function handleAnthropicAuthRetry(args) {
|
|
|
2257
2227
|
const cooldownMs = Number.isNaN(parsedRetryAfter)
|
|
2258
2228
|
? 60_000
|
|
2259
2229
|
: Math.max(1, parsedRetryAfter) * 1000;
|
|
2260
|
-
const cooldown =
|
|
2230
|
+
const cooldown = applyRateLimitCooldown({
|
|
2261
2231
|
state: accountState,
|
|
2262
|
-
profile: requestProfile,
|
|
2263
2232
|
retryAfterMs: cooldownMs,
|
|
2264
2233
|
capMs: RATE_LIMIT_BACKOFF_CAP_MS,
|
|
2265
2234
|
});
|
|
@@ -2836,7 +2805,7 @@ async function prepareAnthropicAccountAttempt(args) {
|
|
|
2836
2805
|
};
|
|
2837
2806
|
}
|
|
2838
2807
|
async function fetchAnthropicAccountResponse(args) {
|
|
2839
|
-
const { url, headers, finalBodyStr, account, accountState,
|
|
2808
|
+
const { url, headers, finalBodyStr, account, accountState, enabledAccounts, orderedAccounts, tracer, logAttempt, currentLastError, currentSawRateLimit, currentSawNetworkError, upstreamSpan, } = args;
|
|
2840
2809
|
let lastError = currentLastError;
|
|
2841
2810
|
let sawRateLimit = currentSawRateLimit;
|
|
2842
2811
|
let sawNetworkError = currentSawNetworkError;
|
|
@@ -2889,9 +2858,8 @@ async function fetchAnthropicAccountResponse(args) {
|
|
|
2889
2858
|
}
|
|
2890
2859
|
}
|
|
2891
2860
|
}
|
|
2892
|
-
const cooldown =
|
|
2861
|
+
const cooldown = applyRateLimitCooldown({
|
|
2893
2862
|
state: accountState,
|
|
2894
|
-
profile: requestProfile,
|
|
2895
2863
|
retryAfterMs: cooldownMs > 0 ? cooldownMs : undefined,
|
|
2896
2864
|
capMs: RATE_LIMIT_BACKOFF_CAP_MS,
|
|
2897
2865
|
});
|
|
@@ -2899,7 +2867,7 @@ async function fetchAnthropicAccountResponse(args) {
|
|
|
2899
2867
|
recordAttemptError(account.label, account.type, 429);
|
|
2900
2868
|
recordCooldown(account.label, account.type, Date.now() + cooldown.backoffMs, accountState.backoffLevel);
|
|
2901
2869
|
lastError = await response.text();
|
|
2902
|
-
logger.always(`[proxy] ← 429 account=${account.label} backoff-level=${accountState.backoffLevel} cooldown=${Math.round(cooldown.backoffMs / 1000)}s
|
|
2870
|
+
logger.always(`[proxy] ← 429 account=${account.label} backoff-level=${accountState.backoffLevel} cooldown=${Math.round(cooldown.backoffMs / 1000)}s`);
|
|
2903
2871
|
logAttempt(429, "rate_limit_error", String(lastError));
|
|
2904
2872
|
tracer?.setError("rate_limit_error", String(lastError).slice(0, 500));
|
|
2905
2873
|
tracer?.recordRetry(account.label, "rate_limit");
|
|
@@ -2924,7 +2892,6 @@ async function fetchAnthropicAccountResponse(args) {
|
|
|
2924
2892
|
async function handleAnthropicRoutedClaudeRequest(args) {
|
|
2925
2893
|
const { ctx, body, modelRouter, tracer, requestStartTime, accountStrategy, buildLoggedClaudeError, logProxyBody, logFinalRequest, } = args;
|
|
2926
2894
|
const parsedRequest = parseClaudeRequest(body);
|
|
2927
|
-
const requestProfile = classifyClaudeProxyRequest(body.model, parsedRequest);
|
|
2928
2895
|
const loadedAccounts = await loadClaudeProxyAccounts({
|
|
2929
2896
|
ctx,
|
|
2930
2897
|
body,
|
|
@@ -2947,14 +2914,16 @@ async function handleAnthropicRoutedClaudeRequest(args) {
|
|
|
2947
2914
|
attemptNumber: 0,
|
|
2948
2915
|
};
|
|
2949
2916
|
const acctSelectionSpan = tracer?.startAccountSelection();
|
|
2950
|
-
const accountPartition = partitionAccountsByCooldown(orderedAccounts, (account) => getOrCreateRuntimeState(account.key)
|
|
2917
|
+
const accountPartition = partitionAccountsByCooldown(orderedAccounts, (account) => getOrCreateRuntimeState(account.key));
|
|
2951
2918
|
for (const skippedAccount of accountPartition.skipped) {
|
|
2952
|
-
|
|
2953
|
-
|
|
2954
|
-
|
|
2955
|
-
|
|
2956
|
-
|
|
2957
|
-
|
|
2919
|
+
logger.always(`[proxy] skipping account=${skippedAccount.account.label} cooldown remaining=${Math.max(1, Math.ceil((skippedAccount.cooldown.until - Date.now()) / 1000))}s`);
|
|
2920
|
+
}
|
|
2921
|
+
// Only flag rate-limit when ALL accounts are cooling — if some are eligible,
|
|
2922
|
+
// let the actual attempt results determine sawRateLimit via real 429 responses.
|
|
2923
|
+
if (accountPartition.skipped.length > 0 &&
|
|
2924
|
+
accountPartition.eligible.length === 0) {
|
|
2925
|
+
loopState.sawRateLimit = true;
|
|
2926
|
+
loopState.lastError = `All ${accountPartition.skipped.length} accounts are cooling down`;
|
|
2958
2927
|
}
|
|
2959
2928
|
accountLoop: for (const account of accountPartition.eligible) {
|
|
2960
2929
|
const accountState = getOrCreateRuntimeState(account.key);
|
|
@@ -3009,7 +2978,6 @@ async function handleAnthropicRoutedClaudeRequest(args) {
|
|
|
3009
2978
|
finalBodyStr: preparedAttempt.finalBodyStr,
|
|
3010
2979
|
account,
|
|
3011
2980
|
accountState,
|
|
3012
|
-
requestProfile,
|
|
3013
2981
|
enabledAccounts,
|
|
3014
2982
|
orderedAccounts,
|
|
3015
2983
|
tracer,
|
|
@@ -3046,7 +3014,6 @@ async function handleAnthropicRoutedClaudeRequest(args) {
|
|
|
3046
3014
|
body,
|
|
3047
3015
|
account,
|
|
3048
3016
|
accountState,
|
|
3049
|
-
requestProfile,
|
|
3050
3017
|
headers: preparedAttempt.headers,
|
|
3051
3018
|
buildUpstreamBody: preparedAttempt.buildUpstreamBody,
|
|
3052
3019
|
enabledAccounts,
|
|
@@ -3126,7 +3093,6 @@ async function handleAnthropicRoutedClaudeRequest(args) {
|
|
|
3126
3093
|
body,
|
|
3127
3094
|
account,
|
|
3128
3095
|
accountState,
|
|
3129
|
-
requestProfile,
|
|
3130
3096
|
response,
|
|
3131
3097
|
tracer,
|
|
3132
3098
|
requestStartTime,
|
|
@@ -3150,7 +3116,6 @@ async function handleAnthropicRoutedClaudeRequest(args) {
|
|
|
3150
3116
|
ctx,
|
|
3151
3117
|
body,
|
|
3152
3118
|
parsedFallbackRequest: parsedRequest,
|
|
3153
|
-
requestProfile,
|
|
3154
3119
|
modelRouter,
|
|
3155
3120
|
tracer,
|
|
3156
3121
|
requestStartTime,
|
|
@@ -3160,8 +3125,9 @@ async function handleAnthropicRoutedClaudeRequest(args) {
|
|
|
3160
3125
|
if (configuredFallbackResult.response) {
|
|
3161
3126
|
return configuredFallbackResult.response;
|
|
3162
3127
|
}
|
|
3163
|
-
|
|
3164
|
-
|
|
3128
|
+
// Try auto-provider fallback when the configured chain didn't produce a
|
|
3129
|
+
// response (either no chain configured, or all entries failed/deduped).
|
|
3130
|
+
if (!loopState.sawRateLimit) {
|
|
3165
3131
|
const autoFallbackResponse = await tryAutoClaudeFallback({
|
|
3166
3132
|
ctx,
|
|
3167
3133
|
body,
|
|
@@ -3184,8 +3150,6 @@ async function handleAnthropicRoutedClaudeRequest(args) {
|
|
|
3184
3150
|
sawRateLimit: loopState.sawRateLimit,
|
|
3185
3151
|
lastError: loopState.lastError,
|
|
3186
3152
|
orderedAccounts,
|
|
3187
|
-
requestProfile,
|
|
3188
|
-
fallbackPolicyReason: configuredFallbackResult.fallbackPolicyReason,
|
|
3189
3153
|
buildLoggedClaudeError,
|
|
3190
3154
|
logProxyBody,
|
|
3191
3155
|
logFinalRequest,
|
|
@@ -3406,8 +3370,6 @@ function getOrCreateRuntimeState(accountKey) {
|
|
|
3406
3370
|
backoffLevel: 0,
|
|
3407
3371
|
consecutiveRefreshFailures: 0,
|
|
3408
3372
|
permanentlyDisabled: false,
|
|
3409
|
-
requestClassCooldowns: {},
|
|
3410
|
-
modelTierCooldowns: {},
|
|
3411
3373
|
};
|
|
3412
3374
|
accountRuntimeState.set(accountKey, initial);
|
|
3413
3375
|
return initial;
|
|
@@ -641,10 +641,6 @@ export type RuntimeAccountState = {
|
|
|
641
641
|
backoffLevel: number;
|
|
642
642
|
consecutiveRefreshFailures: number;
|
|
643
643
|
permanentlyDisabled: boolean;
|
|
644
|
-
requestClassCooldowns?: Record<string, number>;
|
|
645
|
-
modelTierCooldowns?: Record<string, number>;
|
|
646
|
-
requestClassBackoffLevels?: Record<string, number>;
|
|
647
|
-
modelTierBackoffLevels?: Record<string, number>;
|
|
648
644
|
lastToken?: string;
|
|
649
645
|
lastRefreshToken?: string;
|
|
650
646
|
};
|
|
@@ -697,63 +693,26 @@ export type CachedSession = {
|
|
|
697
693
|
};
|
|
698
694
|
/** Model tier classification for proxy routing decisions. */
|
|
699
695
|
export type ClaudeProxyModelTier = "opus" | "sonnet" | "haiku" | "other";
|
|
700
|
-
/** Request class for proxy routing policy. */
|
|
701
|
-
export type ClaudeProxyRequestClass = "multimodal" | "high-tool-count-non-stream-structured" | "strong-tool-fidelity" | "streaming-conversational" | "standard";
|
|
702
|
-
/** Full classification profile for a proxy request. */
|
|
703
|
-
export type ClaudeProxyRequestProfile = {
|
|
704
|
-
requestedModel: string;
|
|
705
|
-
modelTier: ClaudeProxyModelTier;
|
|
706
|
-
primaryClass: ClaudeProxyRequestClass;
|
|
707
|
-
classes: ClaudeProxyRequestClass[];
|
|
708
|
-
stream: boolean;
|
|
709
|
-
toolCount: number;
|
|
710
|
-
hasImages: boolean;
|
|
711
|
-
hasThinking: boolean;
|
|
712
|
-
hasToolHistory: boolean;
|
|
713
|
-
requiresToolUse: boolean;
|
|
714
|
-
requiresSpecificTool: boolean;
|
|
715
|
-
requiresStrongToolFidelity: boolean;
|
|
716
|
-
isHighToolCountNonStream: boolean;
|
|
717
|
-
isStreamingConversational: boolean;
|
|
718
|
-
isMultimodal: boolean;
|
|
719
|
-
};
|
|
720
|
-
/** Outcome of evaluating a single fallback candidate. */
|
|
721
|
-
export type FallbackEligibilityDecision = {
|
|
722
|
-
provider?: string;
|
|
723
|
-
model?: string;
|
|
724
|
-
eligible: boolean;
|
|
725
|
-
reason: string;
|
|
726
|
-
};
|
|
727
696
|
/** A single provider attempt in the proxy translation plan. */
|
|
728
697
|
export type ProxyTranslationAttempt = {
|
|
729
698
|
provider?: string;
|
|
730
699
|
model?: string;
|
|
731
700
|
label: string;
|
|
732
701
|
};
|
|
733
|
-
/** Ordered plan of provider attempts
|
|
702
|
+
/** Ordered plan of provider attempts for a proxy request. */
|
|
734
703
|
export type ProxyTranslationPlan = {
|
|
735
|
-
|
|
704
|
+
requestedModel: string;
|
|
705
|
+
modelTier: ClaudeProxyModelTier;
|
|
736
706
|
attempts: ProxyTranslationAttempt[];
|
|
737
|
-
skipped:
|
|
707
|
+
skipped: never[];
|
|
738
708
|
};
|
|
739
|
-
/**
|
|
740
|
-
export type CooldownScope = {
|
|
741
|
-
scope: "request_class";
|
|
742
|
-
key: string;
|
|
743
|
-
until: number;
|
|
744
|
-
} | {
|
|
745
|
-
scope: "model_tier";
|
|
746
|
-
key: string;
|
|
747
|
-
until: number;
|
|
748
|
-
} | {
|
|
749
|
-
scope: "generic";
|
|
750
|
-
key: "generic";
|
|
751
|
-
until: number;
|
|
752
|
-
};
|
|
753
|
-
/** An account skipped during partitioning, with the cooldown that caused it. */
|
|
709
|
+
/** An account skipped during partitioning, with its cooldown info. */
|
|
754
710
|
export type CooldownSkippedAccount<T> = {
|
|
755
711
|
account: T;
|
|
756
|
-
cooldown:
|
|
712
|
+
cooldown: {
|
|
713
|
+
until: number;
|
|
714
|
+
backoffLevel: number;
|
|
715
|
+
};
|
|
757
716
|
};
|
|
758
717
|
/** Mutable readiness state tracked by the proxy process. */
|
|
759
718
|
export type ProxyReadinessState = {
|