@juspay/neurolink 9.54.1 → 9.54.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. package/CHANGELOG.md +12 -0
  2. package/dist/browser/neurolink.min.js +288 -288
  3. package/dist/cli/factories/commandFactory.js +43 -4
  4. package/dist/cli/utils/abortHandler.d.ts +22 -0
  5. package/dist/cli/utils/abortHandler.js +53 -0
  6. package/dist/core/baseProvider.d.ts +7 -1
  7. package/dist/core/baseProvider.js +19 -0
  8. package/dist/lib/core/baseProvider.d.ts +7 -1
  9. package/dist/lib/core/baseProvider.js +19 -0
  10. package/dist/lib/neurolink.js +17 -1
  11. package/dist/lib/providers/anthropic.js +1 -0
  12. package/dist/lib/providers/anthropicBaseProvider.js +1 -0
  13. package/dist/lib/providers/azureOpenai.js +1 -0
  14. package/dist/lib/providers/googleAiStudio.js +1 -0
  15. package/dist/lib/providers/googleVertex.d.ts +14 -0
  16. package/dist/lib/providers/googleVertex.js +51 -12
  17. package/dist/lib/providers/huggingFace.js +1 -0
  18. package/dist/lib/providers/litellm.js +1 -0
  19. package/dist/lib/providers/mistral.js +1 -0
  20. package/dist/lib/providers/openAI.js +1 -0
  21. package/dist/lib/providers/openRouter.js +1 -0
  22. package/dist/lib/providers/openaiCompatible.js +1 -0
  23. package/dist/lib/proxy/routingPolicy.d.ts +27 -17
  24. package/dist/lib/proxy/routingPolicy.js +53 -209
  25. package/dist/lib/server/routes/claudeProxyRoutes.js +35 -73
  26. package/dist/lib/types/proxyTypes.d.ts +9 -50
  27. package/dist/lib/types/streamTypes.d.ts +6 -0
  28. package/dist/lib/utils/messageBuilder.js +39 -6
  29. package/dist/lib/utils/toolCallRepair.d.ts +21 -0
  30. package/dist/lib/utils/toolCallRepair.js +298 -0
  31. package/dist/neurolink.js +17 -1
  32. package/dist/providers/anthropic.js +1 -0
  33. package/dist/providers/anthropicBaseProvider.js +1 -0
  34. package/dist/providers/azureOpenai.js +1 -0
  35. package/dist/providers/googleAiStudio.js +1 -0
  36. package/dist/providers/googleVertex.d.ts +14 -0
  37. package/dist/providers/googleVertex.js +51 -12
  38. package/dist/providers/huggingFace.js +1 -0
  39. package/dist/providers/litellm.js +1 -0
  40. package/dist/providers/mistral.js +1 -0
  41. package/dist/providers/openAI.js +1 -0
  42. package/dist/providers/openRouter.js +1 -0
  43. package/dist/providers/openaiCompatible.js +1 -0
  44. package/dist/proxy/routingPolicy.d.ts +27 -17
  45. package/dist/proxy/routingPolicy.js +53 -209
  46. package/dist/server/routes/claudeProxyRoutes.js +35 -73
  47. package/dist/types/proxyTypes.d.ts +9 -50
  48. package/dist/types/streamTypes.d.ts +6 -0
  49. package/dist/utils/messageBuilder.js +39 -6
  50. package/dist/utils/toolCallRepair.d.ts +21 -0
  51. package/dist/utils/toolCallRepair.js +297 -0
  52. package/package.json +1 -1
@@ -1,9 +1,4 @@
1
- const STREAMING_CONVERSATIONAL_TOOL_THRESHOLD = 4;
2
- const STRONG_TOOL_FIDELITY_THRESHOLD = 8;
3
- const HIGH_TOOL_COUNT_THRESHOLD = 24;
4
1
  const DEFAULT_COOLDOWN_FLOOR_MS = 1_000;
5
- const HIGH_TOOL_COUNT_COOLDOWN_FLOOR_MS = 10_000;
6
- const HIGH_FIDELITY_COOLDOWN_FLOOR_MS = 300_000;
7
2
  export function inferClaudeProxyModelTier(modelName) {
8
3
  const normalized = modelName.toLowerCase();
9
4
  if (normalized.includes("opus")) {
@@ -17,101 +12,13 @@ export function inferClaudeProxyModelTier(modelName) {
17
12
  }
18
13
  return "other";
19
14
  }
20
- function detectToolHistory(parsed) {
21
- return parsed.conversationMessages.some((message) => {
22
- return (message.content.includes("[tool_use:") ||
23
- message.content.includes("[tool_result:"));
24
- });
25
- }
26
- export function classifyClaudeProxyRequest(requestedModel, parsed) {
27
- const toolCount = Object.keys(parsed.tools).length;
28
- const hasImages = parsed.images.length > 0;
29
- const hasThinking = !!parsed.thinkingConfig?.enabled;
30
- const hasToolHistory = detectToolHistory(parsed);
31
- const requiresSpecificTool = !!parsed.toolChoiceName;
32
- const requiresToolUse = parsed.toolChoice === "required" || requiresSpecificTool || hasToolHistory;
33
- const requiresStrongToolFidelity = toolCount >= STRONG_TOOL_FIDELITY_THRESHOLD ||
34
- requiresSpecificTool ||
35
- hasToolHistory;
36
- const isHighToolCountNonStream = !parsed.stream && toolCount >= HIGH_TOOL_COUNT_THRESHOLD;
37
- const isStreamingConversational = parsed.stream &&
38
- !hasImages &&
39
- toolCount <= STREAMING_CONVERSATIONAL_TOOL_THRESHOLD &&
40
- !requiresStrongToolFidelity;
41
- const classes = [];
42
- if (hasImages) {
43
- classes.push("multimodal");
44
- }
45
- if (isHighToolCountNonStream) {
46
- classes.push("high-tool-count-non-stream-structured");
47
- }
48
- if (requiresStrongToolFidelity) {
49
- classes.push("strong-tool-fidelity");
50
- }
51
- if (isStreamingConversational) {
52
- classes.push("streaming-conversational");
53
- }
54
- if (classes.length === 0) {
55
- classes.push("standard");
56
- }
57
- return {
58
- requestedModel,
59
- modelTier: inferClaudeProxyModelTier(requestedModel),
60
- primaryClass: classes[0],
61
- classes,
62
- stream: parsed.stream,
63
- toolCount,
64
- hasImages,
65
- hasThinking,
66
- hasToolHistory,
67
- requiresToolUse,
68
- requiresSpecificTool,
69
- requiresStrongToolFidelity,
70
- isHighToolCountNonStream,
71
- isStreamingConversational,
72
- isMultimodal: hasImages,
73
- };
74
- }
75
- export function getRequestClassCooldownKey(profile) {
76
- return `${profile.primaryClass}:${profile.requestedModel.toLowerCase()}`;
77
- }
78
- export function getModelTierCooldownKey(profile) {
79
- return profile.modelTier;
80
- }
81
- function getQualityGuardReason(profile, provider, _model) {
82
- // Only gate auto-provider fallback (no explicit provider).
83
- // Configured fallback-chain entries are always allowed through —
84
- // let them attempt the request and fail naturally if the provider
85
- // cannot handle it.
86
- if (!provider) {
87
- if (profile.modelTier === "opus" ||
88
- profile.requiresStrongToolFidelity ||
89
- profile.isHighToolCountNonStream) {
90
- return "auto-provider fallback is disabled for requests that require contract preservation";
91
- }
92
- return null;
93
- }
94
- return null;
95
- }
96
- export function evaluateFallbackEligibility(profile, candidate) {
97
- const policyBlockReason = getQualityGuardReason(profile, candidate.provider, candidate.model);
98
- if (policyBlockReason) {
99
- return {
100
- provider: candidate.provider,
101
- model: candidate.model,
102
- eligible: false,
103
- reason: policyBlockReason,
104
- };
105
- }
106
- return {
107
- provider: candidate.provider,
108
- model: candidate.model,
109
- eligible: true,
110
- reason: "eligible",
111
- };
112
- }
113
- export function buildProxyTranslationPlan(primary, fallbackChain, requestedModel, parsed) {
114
- const profile = classifyClaudeProxyRequest(requestedModel, parsed);
15
+ /**
16
+ * Build a translation plan for a Claude-compatible proxy request.
17
+ * The plan lists the primary provider followed by eligible fallback targets.
18
+ * All configured fallback entries are always eligible — no contract-based gating.
19
+ * When no fallback chain is configured, an "auto-provider" entry is appended.
20
+ */
21
+ export function buildProxyTranslationPlan(primary, fallbackChain, requestedModel, _parsed) {
115
22
  const attempts = [
116
23
  {
117
24
  provider: primary.provider,
@@ -119,142 +26,79 @@ export function buildProxyTranslationPlan(primary, fallbackChain, requestedModel
119
26
  label: `${primary.provider}/${primary.model ?? "unknown"}`,
120
27
  },
121
28
  ];
122
- const skipped = [];
123
29
  for (const fallback of fallbackChain) {
124
30
  if (fallback.provider === primary.provider &&
125
31
  fallback.model === primary.model) {
126
32
  continue;
127
33
  }
128
- const decision = evaluateFallbackEligibility(profile, fallback);
129
- if (!decision.eligible) {
130
- skipped.push(decision);
131
- continue;
132
- }
133
34
  attempts.push({
134
35
  provider: fallback.provider,
135
36
  model: fallback.model,
136
37
  label: `${fallback.provider}/${fallback.model}`,
137
38
  });
138
39
  }
139
- if (fallbackChain.length === 0) {
140
- const autoDecision = evaluateFallbackEligibility(profile, {});
141
- if (autoDecision.eligible) {
142
- attempts.push({ label: "auto-provider" });
143
- }
144
- else {
145
- skipped.push(autoDecision);
146
- }
40
+ // Append auto-provider when no configured fallback chain exists,
41
+ // or when all configured entries were deduped (same as primary).
42
+ if (fallbackChain.length === 0 || attempts.length === 1) {
43
+ attempts.push({ label: "auto-provider" });
147
44
  }
148
45
  return {
149
- profile,
46
+ requestedModel,
47
+ modelTier: inferClaudeProxyModelTier(requestedModel),
150
48
  attempts,
151
- skipped,
49
+ skipped: [],
152
50
  };
153
51
  }
154
- export function summarizeSkippedFallbacks(plan) {
155
- if (plan.skipped.length === 0) {
156
- return null;
157
- }
158
- const summary = plan.skipped
159
- .map((decision) => {
160
- const label = decision.provider
161
- ? `${decision.provider}/${decision.model ?? "unknown"}`
162
- : "auto-provider";
163
- return `${label}: ${decision.reason}`;
164
- })
165
- .join("; ");
166
- return `Fallback policy preserved the requested ${plan.profile.primaryClass} contract by skipping ineligible targets. ${summary}`;
167
- }
168
- export function getActiveCooldownScope(state, profile, now = Date.now()) {
169
- let longest = null;
170
- const requestClassKey = getRequestClassCooldownKey(profile);
171
- const requestClassUntil = state.requestClassCooldowns?.[requestClassKey] ?? undefined;
172
- if (requestClassUntil && requestClassUntil > now) {
173
- longest = {
174
- scope: "request_class",
175
- key: requestClassKey,
176
- until: requestClassUntil,
177
- };
178
- }
179
- const modelTierKey = getModelTierCooldownKey(profile);
180
- const modelTierUntil = state.modelTierCooldowns?.[modelTierKey] ?? undefined;
181
- if (modelTierUntil &&
182
- modelTierUntil > now &&
183
- modelTierUntil > (longest?.until ?? 0)) {
184
- longest = {
185
- scope: "model_tier",
186
- key: modelTierKey,
187
- until: modelTierUntil,
188
- };
189
- }
190
- if (state.coolingUntil &&
191
- state.coolingUntil > now &&
192
- state.coolingUntil > (longest?.until ?? 0)) {
193
- longest = {
194
- scope: "generic",
195
- key: "generic",
196
- until: state.coolingUntil,
197
- };
52
+ // ---------------------------------------------------------------------------
53
+ // Simple per-account cooldown
54
+ // ---------------------------------------------------------------------------
55
+ /**
56
+ * Check whether an account is currently cooling down.
57
+ * Returns the cooldown timestamp if active, null otherwise.
58
+ */
59
+ export function getAccountCooldownUntil(state, now = Date.now()) {
60
+ if (state.coolingUntil && state.coolingUntil > now) {
61
+ return state.coolingUntil;
198
62
  }
199
- return longest;
63
+ return null;
200
64
  }
201
- export function partitionAccountsByCooldown(accounts, getState, profile, now = Date.now()) {
65
+ /**
66
+ * Partition accounts into eligible (no cooldown) and skipped (cooling down).
67
+ */
68
+ export function partitionAccountsByCooldown(accounts, getState, now = Date.now()) {
202
69
  const eligible = [];
203
70
  const skipped = [];
204
71
  for (const account of accounts) {
205
- const cooldown = getActiveCooldownScope(getState(account), profile, now);
206
- if (cooldown) {
207
- skipped.push({ account, cooldown });
72
+ const state = getState(account);
73
+ const until = getAccountCooldownUntil(state, now);
74
+ if (until !== null) {
75
+ skipped.push({
76
+ account,
77
+ cooldown: { until, backoffLevel: state.backoffLevel },
78
+ });
208
79
  continue;
209
80
  }
210
81
  eligible.push(account);
211
82
  }
212
- return {
213
- eligible,
214
- skipped,
215
- };
83
+ return { eligible, skipped };
216
84
  }
217
- export function applyRateLimitCooldownScope(args) {
85
+ /**
86
+ * Apply a rate-limit cooldown to an account.
87
+ * Uses simple exponential backoff with a floor and cap.
88
+ */
89
+ export function applyRateLimitCooldown(args) {
218
90
  const now = args.now ?? Date.now();
219
- const requestClassKey = getRequestClassCooldownKey(args.profile);
220
- const modelTierKey = getModelTierCooldownKey(args.profile);
221
- const rcBackoffLevels = args.state.requestClassBackoffLevels ?? {};
222
- const mtBackoffLevels = args.state.modelTierBackoffLevels ?? {};
223
- const scopedBackoffLevel = Math.max(rcBackoffLevels[requestClassKey] ?? 0, mtBackoffLevels[modelTierKey] ?? 0);
224
- // High-tool-count-non-stream gets its own (lower) floor so that requests
225
- // recover faster once proper OAuth betas are forwarded. Check it first
226
- // because every >=24-tool request also satisfies requiresStrongToolFidelity
227
- // (threshold 8), which would otherwise shadow this branch.
228
- const floorMs = args.profile.isHighToolCountNonStream
229
- ? HIGH_TOOL_COUNT_COOLDOWN_FLOOR_MS
230
- : args.profile.modelTier === "opus" ||
231
- args.profile.requiresStrongToolFidelity
232
- ? HIGH_FIDELITY_COOLDOWN_FLOOR_MS
233
- : DEFAULT_COOLDOWN_FLOOR_MS;
234
- const baseCooldownMs = Math.max(args.retryAfterMs ?? 0, floorMs);
235
- const backoffMs = Math.min(baseCooldownMs * 2 ** scopedBackoffLevel, args.capMs);
236
- const until = now + backoffMs;
237
- args.state.requestClassCooldowns = {
238
- ...(args.state.requestClassCooldowns ?? {}),
239
- [requestClassKey]: Math.max(args.state.requestClassCooldowns?.[requestClassKey] ?? 0, until),
240
- };
241
- args.state.modelTierCooldowns = {
242
- ...(args.state.modelTierCooldowns ?? {}),
243
- [modelTierKey]: Math.max(args.state.modelTierCooldowns?.[modelTierKey] ?? 0, until),
244
- };
245
- args.state.requestClassBackoffLevels = {
246
- ...rcBackoffLevels,
247
- [requestClassKey]: (rcBackoffLevels[requestClassKey] ?? 0) + 1,
248
- };
249
- args.state.modelTierBackoffLevels = {
250
- ...mtBackoffLevels,
251
- [modelTierKey]: (mtBackoffLevels[modelTierKey] ?? 0) + 1,
252
- };
91
+ const baseCooldownMs = Math.max(args.retryAfterMs ?? 0, DEFAULT_COOLDOWN_FLOOR_MS);
92
+ const backoffMs = Math.min(baseCooldownMs * 2 ** args.state.backoffLevel, args.capMs);
93
+ args.state.coolingUntil = now + backoffMs;
253
94
  args.state.backoffLevel += 1;
254
- return {
255
- backoffMs,
256
- requestClassKey,
257
- modelTierKey,
258
- };
95
+ return { backoffMs };
96
+ }
97
+ /**
98
+ * Clear cooldown state for an account after a successful request.
99
+ */
100
+ export function clearAccountCooldown(state) {
101
+ state.coolingUntil = undefined;
102
+ state.backoffLevel = 0;
259
103
  }
260
104
  //# sourceMappingURL=routingPolicy.js.map
@@ -20,7 +20,7 @@ import { createRawStreamCapture } from "../../proxy/rawStreamCapture.js";
20
20
  import { logBodyCapture, logRequest, logRequestAttempt, logStreamError, } from "../../proxy/requestLogger.js";
21
21
  import { createSSEInterceptor } from "../../proxy/sseInterceptor.js";
22
22
  import { needsRefresh, persistTokens, refreshToken, } from "../../proxy/tokenRefresh.js";
23
- import { applyRateLimitCooldownScope, buildProxyTranslationPlan, classifyClaudeProxyRequest, getActiveCooldownScope, partitionAccountsByCooldown, summarizeSkippedFallbacks, } from "../../proxy/routingPolicy.js";
23
+ import { applyRateLimitCooldown, buildProxyTranslationPlan, clearAccountCooldown, getAccountCooldownUntil, partitionAccountsByCooldown, } from "../../proxy/routingPolicy.js";
24
24
  import { writeJsonSnapshotAtomically } from "../../proxy/snapshotPersistence.js";
25
25
  import { recordAttempt, recordAttemptError, recordCooldown, recordFinalError, recordFinalSuccess, } from "../../proxy/usageStats.js";
26
26
  import { logger } from "../../utils/logger.js";
@@ -440,9 +440,7 @@ function logProxyRoutingPlan(logProxyBody, stage, plan) {
440
440
  contentType: "application/json",
441
441
  body: {
442
442
  stage,
443
- requestProfile: plan.profile,
444
443
  attempts: plan.attempts,
445
- skipped: plan.skipped,
446
444
  },
447
445
  });
448
446
  }
@@ -1347,30 +1345,21 @@ async function executeClaudeFallbackTranslation(args) {
1347
1345
  return clientResponse;
1348
1346
  }
1349
1347
  async function tryConfiguredClaudeFallbackChain(args) {
1350
- const { ctx, body, parsedFallbackRequest, requestProfile, modelRouter, tracer, requestStartTime, logProxyBody, logFinalRequest, } = args;
1348
+ const { ctx, body, parsedFallbackRequest, modelRouter, tracer, requestStartTime, logProxyBody, logFinalRequest, } = args;
1351
1349
  const chain = modelRouter?.getFallbackChain() ?? [];
1352
1350
  const fallbackPlan = buildProxyTranslationPlan({ provider: "anthropic", model: body.model }, chain, body.model, parsedFallbackRequest);
1353
- const fallbackPolicyReason = summarizeSkippedFallbacks(fallbackPlan);
1354
1351
  logProxyBody({
1355
1352
  phase: "routing_decision",
1356
1353
  contentType: "application/json",
1357
1354
  body: {
1358
1355
  stage: "anthropic_fallback",
1359
- requestProfile,
1360
1356
  attempts: fallbackPlan.attempts.slice(1),
1361
- skipped: fallbackPlan.skipped,
1362
1357
  },
1363
1358
  });
1364
- for (const skipped of fallbackPlan.skipped) {
1365
- const label = skipped.provider
1366
- ? `${skipped.provider}/${skipped.model ?? "unknown"}`
1367
- : "auto-provider";
1368
- logger.always(`[proxy] skipping fallback ${label}: ${skipped.reason}`);
1369
- }
1370
1359
  tracer?.setFallbackInfo({
1371
1360
  triggered: true,
1372
1361
  attemptCount: fallbackPlan.attempts.slice(1).length,
1373
- reason: fallbackPolicyReason ?? "all_anthropic_accounts_exhausted",
1362
+ reason: "all_anthropic_accounts_exhausted",
1374
1363
  });
1375
1364
  for (const fallback of fallbackPlan.attempts.slice(1)) {
1376
1365
  if (!fallback.provider || !fallback.model) {
@@ -1410,10 +1399,7 @@ async function tryConfiguredClaudeFallbackChain(args) {
1410
1399
  attemptCount: fallbackPlan.attempts.slice(1).length,
1411
1400
  reason: "fallback_success",
1412
1401
  });
1413
- return {
1414
- response,
1415
- fallbackPolicyReason,
1416
- };
1402
+ return { response };
1417
1403
  }
1418
1404
  catch (fallbackErr) {
1419
1405
  const errMsg = fallbackErr instanceof Error
@@ -1450,10 +1436,7 @@ async function tryConfiguredClaudeFallbackChain(args) {
1450
1436
  });
1451
1437
  }
1452
1438
  }
1453
- return {
1454
- response: null,
1455
- fallbackPolicyReason,
1456
- };
1439
+ return { response: null };
1457
1440
  }
1458
1441
  async function tryAutoClaudeFallback(args) {
1459
1442
  const { ctx, body, tracer, requestStartTime, logProxyBody, logFinalRequest } = args;
@@ -1484,7 +1467,7 @@ async function tryAutoClaudeFallback(args) {
1484
1467
  }
1485
1468
  }
1486
1469
  function buildClaudeAnthropicFailureResponse(args) {
1487
- const { tracer, requestStartTime, authFailureMessage, invalidRequestFailure, sawNetworkError, sawTransientFailure, sawRateLimit, lastError, orderedAccounts, requestProfile, fallbackPolicyReason, buildLoggedClaudeError, logProxyBody, logFinalRequest, } = args;
1470
+ const { tracer, requestStartTime, authFailureMessage, invalidRequestFailure, sawNetworkError, sawTransientFailure, sawRateLimit, lastError, orderedAccounts, buildLoggedClaudeError, logProxyBody, logFinalRequest, } = args;
1488
1471
  if (authFailureMessage && !sawRateLimit) {
1489
1472
  tracer?.setError("authentication_error", authFailureMessage);
1490
1473
  tracer?.end(401, Date.now() - requestStartTime);
@@ -1531,21 +1514,21 @@ function buildClaudeAnthropicFailureResponse(args) {
1531
1514
  return buildLoggedClaudeError(502, msg);
1532
1515
  }
1533
1516
  const earliestRecovery = orderedAccounts.reduce((min, account) => {
1534
- const cooldown = getActiveCooldownScope(getOrCreateRuntimeState(account.key), requestProfile);
1535
- return cooldown ? Math.min(min, cooldown.until) : min;
1517
+ const until = getAccountCooldownUntil(getOrCreateRuntimeState(account.key));
1518
+ return until !== null ? Math.min(min, until) : min;
1536
1519
  }, Infinity);
1520
+ // If no active cooldown remains (expired while retries ran), use 1s
1521
+ // instead of fabricating a long retry-after.
1537
1522
  const retryAfterSec = Number.isFinite(earliestRecovery)
1538
1523
  ? Math.max(1, Math.ceil((earliestRecovery - Date.now()) / 1000))
1539
- : 60;
1540
- const contractMessage = fallbackPolicyReason
1541
- ? ` ${fallbackPolicyReason}`
1542
- : "";
1543
- logger.always(`[proxy] all accounts rate-limited for request-class=${requestProfile.primaryClass}, retry in ${retryAfterSec}s`);
1544
- const errorBody = buildClaudeError(429, `All accounts rate-limited. Earliest recovery in ${retryAfterSec}s.${contractMessage}`, "overloaded_error");
1545
- tracer?.setError("rate_limit_error", `All accounts rate-limited. Retry in ${retryAfterSec}s.${contractMessage}`);
1524
+ : 1;
1525
+ const errorMessage = `All accounts rate-limited. Earliest recovery in ${retryAfterSec}s.`;
1526
+ logger.always(`[proxy] all accounts rate-limited, retry in ${retryAfterSec}s`);
1527
+ const errorBody = buildClaudeError(429, errorMessage, "overloaded_error");
1528
+ tracer?.setError("rate_limit_error", errorMessage);
1546
1529
  tracer?.end(429, Date.now() - requestStartTime);
1547
1530
  recordFinalError(429);
1548
- logFinalRequest(429, "", "final", "rate_limit_error", `All accounts rate-limited. Retry in ${retryAfterSec}s.${contractMessage}`);
1531
+ logFinalRequest(429, "", "final", "rate_limit_error", errorMessage);
1549
1532
  const errorBodyText = JSON.stringify(errorBody);
1550
1533
  logProxyBody({
1551
1534
  phase: "client_response",
@@ -1568,22 +1551,9 @@ function buildClaudeAnthropicFailureResponse(args) {
1568
1551
  });
1569
1552
  }
1570
1553
  async function handleAnthropicSuccessfulResponse(args) {
1571
- const { ctx, body, account, accountState, requestProfile, response, tracer, requestStartTime, fetchStartMs, attemptNumber, finalBodyStr, upstreamSpan, logProxyBody, logFinalRequest, } = args;
1572
- accountState.backoffLevel = 0;
1573
- accountState.coolingUntil = undefined;
1554
+ const { ctx, body, account, accountState, response, tracer, requestStartTime, fetchStartMs, attemptNumber, finalBodyStr, upstreamSpan, logProxyBody, logFinalRequest, } = args;
1555
+ clearAccountCooldown(accountState);
1574
1556
  accountState.consecutiveRefreshFailures = 0;
1575
- if (accountState.requestClassCooldowns) {
1576
- delete accountState.requestClassCooldowns[`${requestProfile.primaryClass}:${requestProfile.requestedModel.toLowerCase()}`];
1577
- }
1578
- if (accountState.modelTierCooldowns) {
1579
- delete accountState.modelTierCooldowns[requestProfile.modelTier];
1580
- }
1581
- if (accountState.requestClassBackoffLevels) {
1582
- delete accountState.requestClassBackoffLevels[`${requestProfile.primaryClass}:${requestProfile.requestedModel.toLowerCase()}`];
1583
- }
1584
- if (accountState.modelTierBackoffLevels) {
1585
- delete accountState.modelTierBackoffLevels[requestProfile.modelTier];
1586
- }
1587
1557
  logger.always(`[proxy] ← ${response.status} account=${account.label}`);
1588
1558
  const quota = parseQuotaHeaders(response.headers);
1589
1559
  if (quota) {
@@ -2173,7 +2143,7 @@ async function handleAnthropicSuccessfulRetryResponse(args) {
2173
2143
  return retryJson;
2174
2144
  }
2175
2145
  async function handleAnthropicAuthRetry(args) {
2176
- const { ctx, body, account, accountState, requestProfile, headers, buildUpstreamBody, enabledAccounts, orderedAccounts, response: _response, tracer, requestStartTime, fetchStartMs, attemptNumber, finalBodyStr, upstreamSpan, logAttempt, logProxyBody, logFinalRequest, lastError, authFailureMessage, sawRateLimit, sawTransientFailure, sawNetworkError, } = args;
2146
+ const { ctx, body, account, accountState, headers, buildUpstreamBody, enabledAccounts, orderedAccounts, response: _response, tracer, requestStartTime, fetchStartMs, attemptNumber, finalBodyStr, upstreamSpan, logAttempt, logProxyBody, logFinalRequest, lastError, authFailureMessage, sawRateLimit, sawTransientFailure, sawNetworkError, } = args;
2177
2147
  recordAttemptError(account.label, account.type, 401);
2178
2148
  let currentLastError = lastError;
2179
2149
  let currentAuthFailureMessage = authFailureMessage;
@@ -2257,9 +2227,8 @@ async function handleAnthropicAuthRetry(args) {
2257
2227
  const cooldownMs = Number.isNaN(parsedRetryAfter)
2258
2228
  ? 60_000
2259
2229
  : Math.max(1, parsedRetryAfter) * 1000;
2260
- const cooldown = applyRateLimitCooldownScope({
2230
+ const cooldown = applyRateLimitCooldown({
2261
2231
  state: accountState,
2262
- profile: requestProfile,
2263
2232
  retryAfterMs: cooldownMs,
2264
2233
  capMs: RATE_LIMIT_BACKOFF_CAP_MS,
2265
2234
  });
@@ -2836,7 +2805,7 @@ async function prepareAnthropicAccountAttempt(args) {
2836
2805
  };
2837
2806
  }
2838
2807
  async function fetchAnthropicAccountResponse(args) {
2839
- const { url, headers, finalBodyStr, account, accountState, requestProfile, enabledAccounts, orderedAccounts, tracer, logAttempt, currentLastError, currentSawRateLimit, currentSawNetworkError, upstreamSpan, } = args;
2808
+ const { url, headers, finalBodyStr, account, accountState, enabledAccounts, orderedAccounts, tracer, logAttempt, currentLastError, currentSawRateLimit, currentSawNetworkError, upstreamSpan, } = args;
2840
2809
  let lastError = currentLastError;
2841
2810
  let sawRateLimit = currentSawRateLimit;
2842
2811
  let sawNetworkError = currentSawNetworkError;
@@ -2889,9 +2858,8 @@ async function fetchAnthropicAccountResponse(args) {
2889
2858
  }
2890
2859
  }
2891
2860
  }
2892
- const cooldown = applyRateLimitCooldownScope({
2861
+ const cooldown = applyRateLimitCooldown({
2893
2862
  state: accountState,
2894
- profile: requestProfile,
2895
2863
  retryAfterMs: cooldownMs > 0 ? cooldownMs : undefined,
2896
2864
  capMs: RATE_LIMIT_BACKOFF_CAP_MS,
2897
2865
  });
@@ -2899,7 +2867,7 @@ async function fetchAnthropicAccountResponse(args) {
2899
2867
  recordAttemptError(account.label, account.type, 429);
2900
2868
  recordCooldown(account.label, account.type, Date.now() + cooldown.backoffMs, accountState.backoffLevel);
2901
2869
  lastError = await response.text();
2902
- logger.always(`[proxy] ← 429 account=${account.label} backoff-level=${accountState.backoffLevel} cooldown=${Math.round(cooldown.backoffMs / 1000)}s request-class=${cooldown.requestClassKey} model-tier=${cooldown.modelTierKey}`);
2870
+ logger.always(`[proxy] ← 429 account=${account.label} backoff-level=${accountState.backoffLevel} cooldown=${Math.round(cooldown.backoffMs / 1000)}s`);
2903
2871
  logAttempt(429, "rate_limit_error", String(lastError));
2904
2872
  tracer?.setError("rate_limit_error", String(lastError).slice(0, 500));
2905
2873
  tracer?.recordRetry(account.label, "rate_limit");
@@ -2924,7 +2892,6 @@ async function fetchAnthropicAccountResponse(args) {
2924
2892
  async function handleAnthropicRoutedClaudeRequest(args) {
2925
2893
  const { ctx, body, modelRouter, tracer, requestStartTime, accountStrategy, buildLoggedClaudeError, logProxyBody, logFinalRequest, } = args;
2926
2894
  const parsedRequest = parseClaudeRequest(body);
2927
- const requestProfile = classifyClaudeProxyRequest(body.model, parsedRequest);
2928
2895
  const loadedAccounts = await loadClaudeProxyAccounts({
2929
2896
  ctx,
2930
2897
  body,
@@ -2947,14 +2914,16 @@ async function handleAnthropicRoutedClaudeRequest(args) {
2947
2914
  attemptNumber: 0,
2948
2915
  };
2949
2916
  const acctSelectionSpan = tracer?.startAccountSelection();
2950
- const accountPartition = partitionAccountsByCooldown(orderedAccounts, (account) => getOrCreateRuntimeState(account.key), requestProfile);
2917
+ const accountPartition = partitionAccountsByCooldown(orderedAccounts, (account) => getOrCreateRuntimeState(account.key));
2951
2918
  for (const skippedAccount of accountPartition.skipped) {
2952
- if (skippedAccount.cooldown.scope === "request_class" ||
2953
- skippedAccount.cooldown.scope === "model_tier") {
2954
- loopState.sawRateLimit = true;
2955
- }
2956
- loopState.lastError = `Skipped account=${skippedAccount.account.label} due to ${skippedAccount.cooldown.scope} cooldown ${skippedAccount.cooldown.key}`;
2957
- logger.always(`[proxy] skipping account=${skippedAccount.account.label} due to ${skippedAccount.cooldown.scope} cooldown=${skippedAccount.cooldown.key} remaining=${Math.max(1, Math.ceil((skippedAccount.cooldown.until - Date.now()) / 1000))}s`);
2919
+ logger.always(`[proxy] skipping account=${skippedAccount.account.label} cooldown remaining=${Math.max(1, Math.ceil((skippedAccount.cooldown.until - Date.now()) / 1000))}s`);
2920
+ }
2921
+ // Only flag rate-limit when ALL accounts are cooling — if some are eligible,
2922
+ // let the actual attempt results determine sawRateLimit via real 429 responses.
2923
+ if (accountPartition.skipped.length > 0 &&
2924
+ accountPartition.eligible.length === 0) {
2925
+ loopState.sawRateLimit = true;
2926
+ loopState.lastError = `All ${accountPartition.skipped.length} accounts are cooling down`;
2958
2927
  }
2959
2928
  accountLoop: for (const account of accountPartition.eligible) {
2960
2929
  const accountState = getOrCreateRuntimeState(account.key);
@@ -3009,7 +2978,6 @@ async function handleAnthropicRoutedClaudeRequest(args) {
3009
2978
  finalBodyStr: preparedAttempt.finalBodyStr,
3010
2979
  account,
3011
2980
  accountState,
3012
- requestProfile,
3013
2981
  enabledAccounts,
3014
2982
  orderedAccounts,
3015
2983
  tracer,
@@ -3046,7 +3014,6 @@ async function handleAnthropicRoutedClaudeRequest(args) {
3046
3014
  body,
3047
3015
  account,
3048
3016
  accountState,
3049
- requestProfile,
3050
3017
  headers: preparedAttempt.headers,
3051
3018
  buildUpstreamBody: preparedAttempt.buildUpstreamBody,
3052
3019
  enabledAccounts,
@@ -3126,7 +3093,6 @@ async function handleAnthropicRoutedClaudeRequest(args) {
3126
3093
  body,
3127
3094
  account,
3128
3095
  accountState,
3129
- requestProfile,
3130
3096
  response,
3131
3097
  tracer,
3132
3098
  requestStartTime,
@@ -3150,7 +3116,6 @@ async function handleAnthropicRoutedClaudeRequest(args) {
3150
3116
  ctx,
3151
3117
  body,
3152
3118
  parsedFallbackRequest: parsedRequest,
3153
- requestProfile,
3154
3119
  modelRouter,
3155
3120
  tracer,
3156
3121
  requestStartTime,
@@ -3160,8 +3125,9 @@ async function handleAnthropicRoutedClaudeRequest(args) {
3160
3125
  if (configuredFallbackResult.response) {
3161
3126
  return configuredFallbackResult.response;
3162
3127
  }
3163
- const configuredChain = modelRouter?.getFallbackChain() ?? [];
3164
- if (configuredChain.length === 0 && !loopState.sawRateLimit) {
3128
+ // Try auto-provider fallback when the configured chain didn't produce a
3129
+ // response (either no chain configured, or all entries failed/deduped).
3130
+ if (!loopState.sawRateLimit) {
3165
3131
  const autoFallbackResponse = await tryAutoClaudeFallback({
3166
3132
  ctx,
3167
3133
  body,
@@ -3184,8 +3150,6 @@ async function handleAnthropicRoutedClaudeRequest(args) {
3184
3150
  sawRateLimit: loopState.sawRateLimit,
3185
3151
  lastError: loopState.lastError,
3186
3152
  orderedAccounts,
3187
- requestProfile,
3188
- fallbackPolicyReason: configuredFallbackResult.fallbackPolicyReason,
3189
3153
  buildLoggedClaudeError,
3190
3154
  logProxyBody,
3191
3155
  logFinalRequest,
@@ -3406,8 +3370,6 @@ function getOrCreateRuntimeState(accountKey) {
3406
3370
  backoffLevel: 0,
3407
3371
  consecutiveRefreshFailures: 0,
3408
3372
  permanentlyDisabled: false,
3409
- requestClassCooldowns: {},
3410
- modelTierCooldowns: {},
3411
3373
  };
3412
3374
  accountRuntimeState.set(accountKey, initial);
3413
3375
  return initial;
@@ -641,10 +641,6 @@ export type RuntimeAccountState = {
641
641
  backoffLevel: number;
642
642
  consecutiveRefreshFailures: number;
643
643
  permanentlyDisabled: boolean;
644
- requestClassCooldowns?: Record<string, number>;
645
- modelTierCooldowns?: Record<string, number>;
646
- requestClassBackoffLevels?: Record<string, number>;
647
- modelTierBackoffLevels?: Record<string, number>;
648
644
  lastToken?: string;
649
645
  lastRefreshToken?: string;
650
646
  };
@@ -697,63 +693,26 @@ export type CachedSession = {
697
693
  };
698
694
  /** Model tier classification for proxy routing decisions. */
699
695
  export type ClaudeProxyModelTier = "opus" | "sonnet" | "haiku" | "other";
700
- /** Request class for proxy routing policy. */
701
- export type ClaudeProxyRequestClass = "multimodal" | "high-tool-count-non-stream-structured" | "strong-tool-fidelity" | "streaming-conversational" | "standard";
702
- /** Full classification profile for a proxy request. */
703
- export type ClaudeProxyRequestProfile = {
704
- requestedModel: string;
705
- modelTier: ClaudeProxyModelTier;
706
- primaryClass: ClaudeProxyRequestClass;
707
- classes: ClaudeProxyRequestClass[];
708
- stream: boolean;
709
- toolCount: number;
710
- hasImages: boolean;
711
- hasThinking: boolean;
712
- hasToolHistory: boolean;
713
- requiresToolUse: boolean;
714
- requiresSpecificTool: boolean;
715
- requiresStrongToolFidelity: boolean;
716
- isHighToolCountNonStream: boolean;
717
- isStreamingConversational: boolean;
718
- isMultimodal: boolean;
719
- };
720
- /** Outcome of evaluating a single fallback candidate. */
721
- export type FallbackEligibilityDecision = {
722
- provider?: string;
723
- model?: string;
724
- eligible: boolean;
725
- reason: string;
726
- };
727
696
  /** A single provider attempt in the proxy translation plan. */
728
697
  export type ProxyTranslationAttempt = {
729
698
  provider?: string;
730
699
  model?: string;
731
700
  label: string;
732
701
  };
733
- /** Ordered plan of provider attempts and skipped candidates. */
702
+ /** Ordered plan of provider attempts for a proxy request. */
734
703
  export type ProxyTranslationPlan = {
735
- profile: ClaudeProxyRequestProfile;
704
+ requestedModel: string;
705
+ modelTier: ClaudeProxyModelTier;
736
706
  attempts: ProxyTranslationAttempt[];
737
- skipped: FallbackEligibilityDecision[];
707
+ skipped: never[];
738
708
  };
739
- /** Discriminated union describing why a cooldown is active. */
740
- export type CooldownScope = {
741
- scope: "request_class";
742
- key: string;
743
- until: number;
744
- } | {
745
- scope: "model_tier";
746
- key: string;
747
- until: number;
748
- } | {
749
- scope: "generic";
750
- key: "generic";
751
- until: number;
752
- };
753
- /** An account skipped during partitioning, with the cooldown that caused it. */
709
+ /** An account skipped during partitioning, with its cooldown info. */
754
710
  export type CooldownSkippedAccount<T> = {
755
711
  account: T;
756
- cooldown: CooldownScope;
712
+ cooldown: {
713
+ until: number;
714
+ backoffLevel: number;
715
+ };
757
716
  };
758
717
  /** Mutable readiness state tracked by the proxy process. */
759
718
  export type ProxyReadinessState = {