@juspay/neurolink 9.42.1 → 9.44.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. package/CHANGELOG.md +12 -0
  2. package/dist/browser/neurolink.min.js +300 -300
  3. package/dist/cli/commands/mcp.js +15 -3
  4. package/dist/cli/commands/proxy.js +29 -6
  5. package/dist/core/baseProvider.js +12 -3
  6. package/dist/core/factory.js +4 -4
  7. package/dist/core/modules/ToolsManager.d.ts +1 -0
  8. package/dist/core/modules/ToolsManager.js +40 -42
  9. package/dist/core/toolEvents.d.ts +3 -0
  10. package/dist/core/toolEvents.js +7 -0
  11. package/dist/evaluation/scorers/scorerRegistry.js +3 -2
  12. package/dist/lib/core/baseProvider.js +12 -3
  13. package/dist/lib/core/factory.js +4 -4
  14. package/dist/lib/core/modules/ToolsManager.d.ts +1 -0
  15. package/dist/lib/core/modules/ToolsManager.js +40 -42
  16. package/dist/lib/core/toolEvents.d.ts +3 -0
  17. package/dist/lib/core/toolEvents.js +8 -0
  18. package/dist/lib/evaluation/scorers/scorerRegistry.js +3 -2
  19. package/dist/lib/neurolink.js +33 -19
  20. package/dist/lib/providers/googleNativeGemini3.d.ts +4 -0
  21. package/dist/lib/providers/googleNativeGemini3.js +39 -1
  22. package/dist/lib/providers/googleVertex.js +10 -2
  23. package/dist/lib/proxy/claudeFormat.js +2 -1
  24. package/dist/lib/proxy/proxyHealth.d.ts +17 -0
  25. package/dist/lib/proxy/proxyHealth.js +55 -0
  26. package/dist/lib/proxy/requestLogger.js +8 -3
  27. package/dist/lib/proxy/routingPolicy.d.ts +33 -0
  28. package/dist/lib/proxy/routingPolicy.js +255 -0
  29. package/dist/lib/proxy/snapshotPersistence.d.ts +2 -0
  30. package/dist/lib/proxy/snapshotPersistence.js +41 -0
  31. package/dist/lib/server/routes/claudeProxyRoutes.d.ts +1 -9
  32. package/dist/lib/server/routes/claudeProxyRoutes.js +304 -219
  33. package/dist/lib/tasks/store/redisTaskStore.js +34 -16
  34. package/dist/lib/types/cli.d.ts +4 -0
  35. package/dist/lib/types/proxyTypes.d.ts +87 -0
  36. package/dist/lib/types/tools.d.ts +18 -0
  37. package/dist/lib/utils/schemaConversion.d.ts +1 -0
  38. package/dist/lib/utils/schemaConversion.js +3 -0
  39. package/dist/neurolink.js +33 -19
  40. package/dist/providers/googleNativeGemini3.d.ts +4 -0
  41. package/dist/providers/googleNativeGemini3.js +39 -1
  42. package/dist/providers/googleVertex.js +10 -2
  43. package/dist/proxy/claudeFormat.js +2 -1
  44. package/dist/proxy/proxyHealth.d.ts +17 -0
  45. package/dist/proxy/proxyHealth.js +54 -0
  46. package/dist/proxy/requestLogger.js +8 -3
  47. package/dist/proxy/routingPolicy.d.ts +33 -0
  48. package/dist/proxy/routingPolicy.js +254 -0
  49. package/dist/proxy/snapshotPersistence.d.ts +2 -0
  50. package/dist/proxy/snapshotPersistence.js +40 -0
  51. package/dist/server/routes/claudeProxyRoutes.d.ts +1 -9
  52. package/dist/server/routes/claudeProxyRoutes.js +304 -219
  53. package/dist/tasks/store/redisTaskStore.js +34 -16
  54. package/dist/types/cli.d.ts +4 -0
  55. package/dist/types/proxyTypes.d.ts +87 -0
  56. package/dist/types/tools.d.ts +18 -0
  57. package/dist/utils/schemaConversion.d.ts +1 -0
  58. package/dist/utils/schemaConversion.js +3 -0
  59. package/package.json +1 -1
@@ -9,8 +9,7 @@
9
9
  * provider/model pairs (e.g. "claude-sonnet-4-20250514" -> vertex/gemini-2.5-pro).
10
10
  * Without a router, models are passed through to the Anthropic provider.
11
11
  */
12
- import { randomUUID } from "node:crypto";
13
- import { access, mkdir, readFile, rename, writeFile } from "node:fs/promises";
12
+ import { access, readFile } from "node:fs/promises";
14
13
  import { homedir } from "node:os";
15
14
  import { join } from "node:path";
16
15
  import { buildStableClaudeCodeBillingHeader, CLAUDE_CLI_USER_AGENT, CLAUDE_CODE_OAUTH_BETAS, getOrCreateClaudeCodeIdentity, parseClaudeCodeUserId, } from "../../auth/anthropicOAuth.js";
@@ -21,6 +20,8 @@ import { createRawStreamCapture } from "../../proxy/rawStreamCapture.js";
21
20
  import { logBodyCapture, logRequest, logRequestAttempt, logStreamError, } from "../../proxy/requestLogger.js";
22
21
  import { createSSEInterceptor } from "../../proxy/sseInterceptor.js";
23
22
  import { needsRefresh, persistTokens, refreshToken, } from "../../proxy/tokenRefresh.js";
23
+ import { applyRateLimitCooldownScope, buildProxyTranslationPlan, classifyClaudeProxyRequest, getActiveCooldownScope, partitionAccountsByCooldown, summarizeSkippedFallbacks, } from "../../proxy/routingPolicy.js";
24
+ import { writeJsonSnapshotAtomically } from "../../proxy/snapshotPersistence.js";
24
25
  import { recordAttempt, recordAttemptError, recordCooldown, recordFinalError, recordFinalSuccess, } from "../../proxy/usageStats.js";
25
26
  import { logger } from "../../utils/logger.js";
26
27
  import { ProviderHealthChecker } from "../../utils/providerHealth.js";
@@ -46,9 +47,10 @@ let primaryAccountIndex = 0;
46
47
  let lastKnownAccountCount = 0;
47
48
  const MAX_AUTH_RETRIES = 5;
48
49
  const MAX_CONSECUTIVE_REFRESH_FAILURES = 15;
50
+ const MAX_TRANSIENT_SAME_ACCOUNT_RETRIES = 2;
51
+ const TRANSIENT_SAME_ACCOUNT_RETRY_DELAYS_MS = [250, 1_000];
49
52
  /** Decision 8: Cooldowns only for 401 and 429. */
50
53
  const AUTH_COOLDOWN_MS = 5 * 60 * 1000; // 5 minutes for 401
51
- const RATE_LIMIT_BACKOFF_BASE_MS = 1000; // 1 second base for 429
52
54
  const RATE_LIMIT_BACKOFF_CAP_MS = 10 * 60 * 1000; // 10 minute cap for 429
53
55
  /** Timeout for upstream requests to Anthropic. Must be generous enough
54
56
  * to cover the full lifecycle of streaming responses, including extended
@@ -220,11 +222,7 @@ function snapshotsMatch(existing, next) {
220
222
  }
221
223
  async function persistClaudeSnapshot(accountLabel, snapshot) {
222
224
  const snapshotPath = getSnapshotPath(accountLabel);
223
- const dirPath = join(homedir(), ".neurolink", "header-snapshots");
224
- await mkdir(dirPath, { recursive: true });
225
- const tmpPath = `${snapshotPath}.${process.pid}.${randomUUID()}.tmp`;
226
- await writeFile(tmpPath, JSON.stringify(snapshot, null, 2), { mode: 0o600 });
227
- await rename(tmpPath, snapshotPath);
225
+ await writeJsonSnapshotAtomically(snapshotPath, snapshot, 0o600);
228
226
  snapshotCache.set(getSnapshotSafeLabel(accountLabel), {
229
227
  snapshot,
230
228
  loadedAt: Date.now(),
@@ -259,7 +257,19 @@ async function maybeRefreshClaudeSnapshot(accountLabel, accountKey, headers, bod
259
257
  if (snapshotsMatch(existing, next)) {
260
258
  return existing;
261
259
  }
262
- await persistClaudeSnapshot(accountLabel, next);
260
+ try {
261
+ await persistClaudeSnapshot(accountLabel, next);
262
+ }
263
+ catch (error) {
264
+ logger.warn("[proxy] failed to persist Claude snapshot", {
265
+ accountLabel,
266
+ error: error instanceof Error ? error.message : String(error),
267
+ });
268
+ snapshotCache.set(getSnapshotSafeLabel(accountLabel), {
269
+ snapshot: next,
270
+ loadedAt: Date.now(),
271
+ });
272
+ }
263
273
  return next;
264
274
  }
265
275
  /**
@@ -398,10 +408,12 @@ async function handleTranslatedClaudeRequest(args) {
398
408
  const { ctx, body, route, modelRouter, tracer, requestStartTime, logProxyBody, } = args;
399
409
  tracer?.setMode("full");
400
410
  const parsed = parseClaudeRequest(body);
401
- const attempts = buildProxyTranslationAttempts({
411
+ const plan = buildProxyTranslationPlan({
402
412
  provider: route.provider,
403
413
  model: route.model,
404
- }, modelRouter, parsed);
414
+ }, modelRouter?.getFallbackChain() ?? [], body.model, parsed);
415
+ logProxyRoutingPlan(logProxyBody, "translated_request", plan);
416
+ const attempts = plan.attempts;
405
417
  if (body.stream) {
406
418
  return handleTranslatedClaudeStreamRequest({
407
419
  ctx,
@@ -422,6 +434,18 @@ async function handleTranslatedClaudeRequest(args) {
422
434
  logProxyBody,
423
435
  });
424
436
  }
437
+ function logProxyRoutingPlan(logProxyBody, stage, plan) {
438
+ logProxyBody({
439
+ phase: "routing_decision",
440
+ contentType: "application/json",
441
+ body: {
442
+ stage,
443
+ requestProfile: plan.profile,
444
+ attempts: plan.attempts,
445
+ skipped: plan.skipped,
446
+ },
447
+ });
448
+ }
425
449
  async function handleTranslatedClaudeStreamRequest(args) {
426
450
  const { ctx, body, attempts, parsed, tracer, requestStartTime } = args;
427
451
  const serializer = new ClaudeStreamSerializer(body.model, 0);
@@ -1302,18 +1326,33 @@ async function executeClaudeFallbackTranslation(args) {
1302
1326
  return clientResponse;
1303
1327
  }
1304
1328
  async function tryConfiguredClaudeFallbackChain(args) {
1305
- const { ctx, body, modelRouter, tracer, requestStartTime, logProxyBody, logFinalRequest, } = args;
1306
- const parsedFallbackRequest = parseClaudeRequest(body);
1329
+ const { ctx, body, parsedFallbackRequest, requestProfile, modelRouter, tracer, requestStartTime, logProxyBody, logFinalRequest, } = args;
1307
1330
  const chain = modelRouter?.getFallbackChain() ?? [];
1308
- for (const fallback of chain) {
1309
- if (shouldSkipTranslationTarget(fallback.provider, fallback.model, parsedFallbackRequest)) {
1310
- logger.debug(`[proxy] skipping fallback ${fallback.provider}/${fallback.model}: incompatible with request shape`);
1331
+ const fallbackPlan = buildProxyTranslationPlan({ provider: "anthropic", model: body.model }, chain, body.model, parsedFallbackRequest);
1332
+ const fallbackPolicyReason = summarizeSkippedFallbacks(fallbackPlan);
1333
+ logProxyBody({
1334
+ phase: "routing_decision",
1335
+ contentType: "application/json",
1336
+ body: {
1337
+ stage: "anthropic_fallback",
1338
+ requestProfile,
1339
+ attempts: fallbackPlan.attempts.slice(1),
1340
+ skipped: fallbackPlan.skipped,
1341
+ },
1342
+ });
1343
+ for (const skipped of fallbackPlan.skipped) {
1344
+ const label = skipped.provider
1345
+ ? `${skipped.provider}/${skipped.model ?? "unknown"}`
1346
+ : "auto-provider";
1347
+ logger.always(`[proxy] skipping fallback ${label}: ${skipped.reason}`);
1348
+ }
1349
+ for (const fallback of fallbackPlan.attempts.slice(1)) {
1350
+ if (!fallback.provider || !fallback.model) {
1311
1351
  continue;
1312
1352
  }
1313
1353
  const availability = await ProviderHealthChecker.checkFallbackProviderAvailability(fallback.provider, fallback.model);
1314
1354
  if (!availability.available) {
1315
- logger.debug(`[proxy] skipping fallback ${fallback.provider}/${fallback.model}: ${availability.reason ?? "provider unavailable"}`);
1316
- continue;
1355
+ logger.always(`[proxy] fallback ${fallback.provider}/${fallback.model} health-check failed (${availability.reason ?? "provider unavailable"}), attempting anyway`);
1317
1356
  }
1318
1357
  try {
1319
1358
  logger.always(`[proxy] fallback → ${fallback.provider}/${fallback.model}`);
@@ -1321,7 +1360,7 @@ async function tryConfiguredClaudeFallbackChain(args) {
1321
1360
  provider: fallback.provider,
1322
1361
  model: fallback.model,
1323
1362
  });
1324
- return await executeClaudeFallbackTranslation({
1363
+ const response = await executeClaudeFallbackTranslation({
1325
1364
  ctx,
1326
1365
  body,
1327
1366
  tracer,
@@ -1331,18 +1370,31 @@ async function tryConfiguredClaudeFallbackChain(args) {
1331
1370
  options: options,
1332
1371
  providerLabel: fallback.provider,
1333
1372
  });
1373
+ return {
1374
+ response,
1375
+ fallbackPolicyReason,
1376
+ };
1334
1377
  }
1335
1378
  catch (fallbackErr) {
1336
- logger.debug(`[proxy] fallback ${fallback.provider}/${fallback.model} failed: ${fallbackErr instanceof Error ? fallbackErr.message : String(fallbackErr)}`);
1379
+ logger.always(`[proxy] fallback ${fallback.provider}/${fallback.model} failed: ${fallbackErr instanceof Error ? fallbackErr.message : String(fallbackErr)}`);
1337
1380
  }
1338
1381
  }
1339
- return null;
1382
+ return {
1383
+ response: null,
1384
+ fallbackPolicyReason,
1385
+ };
1340
1386
  }
1341
1387
  async function tryAutoClaudeFallback(args) {
1342
1388
  const { ctx, body, tracer, requestStartTime, logProxyBody, logFinalRequest } = args;
1343
1389
  try {
1344
- logger.always("[proxy] fallback → auto-provider");
1345
1390
  const parsed = parseClaudeRequest(body);
1391
+ const plan = buildProxyTranslationPlan({ provider: "anthropic", model: body.model }, [], body.model, parsed);
1392
+ logProxyRoutingPlan(logProxyBody, "auto_fallback", plan);
1393
+ const autoAttempt = plan.attempts.find((attempt) => attempt.label === "auto-provider");
1394
+ if (!autoAttempt) {
1395
+ return null;
1396
+ }
1397
+ logger.always("[proxy] fallback → auto-provider");
1346
1398
  const options = buildProxyFallbackOptions(parsed);
1347
1399
  return await executeClaudeFallbackTranslation({
1348
1400
  ctx,
@@ -1361,7 +1413,7 @@ async function tryAutoClaudeFallback(args) {
1361
1413
  }
1362
1414
  }
1363
1415
  function buildClaudeAnthropicFailureResponse(args) {
1364
- const { tracer, requestStartTime, authFailureMessage, invalidRequestFailure, sawNetworkError, sawTransientFailure, sawRateLimit, lastError, orderedAccounts, buildLoggedClaudeError, logProxyBody, logFinalRequest, } = args;
1416
+ const { tracer, requestStartTime, authFailureMessage, invalidRequestFailure, sawNetworkError, sawTransientFailure, sawRateLimit, lastError, orderedAccounts, requestProfile, fallbackPolicyReason, buildLoggedClaudeError, logProxyBody, logFinalRequest, } = args;
1365
1417
  if (authFailureMessage && !sawRateLimit) {
1366
1418
  tracer?.setError("authentication_error", authFailureMessage);
1367
1419
  tracer?.end(401, Date.now() - requestStartTime);
@@ -1408,18 +1460,21 @@ function buildClaudeAnthropicFailureResponse(args) {
1408
1460
  return buildLoggedClaudeError(502, msg);
1409
1461
  }
1410
1462
  const earliestRecovery = orderedAccounts.reduce((min, account) => {
1411
- const coolingUntil = getOrCreateRuntimeState(account.key).coolingUntil;
1412
- return coolingUntil ? Math.min(min, coolingUntil) : min;
1463
+ const cooldown = getActiveCooldownScope(getOrCreateRuntimeState(account.key), requestProfile);
1464
+ return cooldown ? Math.min(min, cooldown.until) : min;
1413
1465
  }, Infinity);
1414
1466
  const retryAfterSec = Number.isFinite(earliestRecovery)
1415
1467
  ? Math.max(1, Math.ceil((earliestRecovery - Date.now()) / 1000))
1416
1468
  : 60;
1417
- logger.always(`[proxy] all accounts rate-limited, retry in ${retryAfterSec}s`);
1418
- const errorBody = buildClaudeError(429, `All accounts rate-limited. Earliest recovery in ${retryAfterSec}s.`, "overloaded_error");
1419
- tracer?.setError("rate_limit_error", `All accounts rate-limited. Retry in ${retryAfterSec}s.`);
1469
+ const contractMessage = fallbackPolicyReason
1470
+ ? ` ${fallbackPolicyReason}`
1471
+ : "";
1472
+ logger.always(`[proxy] all accounts rate-limited for request-class=${requestProfile.primaryClass}, retry in ${retryAfterSec}s`);
1473
+ const errorBody = buildClaudeError(429, `All accounts rate-limited. Earliest recovery in ${retryAfterSec}s.${contractMessage}`, "overloaded_error");
1474
+ tracer?.setError("rate_limit_error", `All accounts rate-limited. Retry in ${retryAfterSec}s.${contractMessage}`);
1420
1475
  tracer?.end(429, Date.now() - requestStartTime);
1421
1476
  recordFinalError(429);
1422
- logFinalRequest(429, "", "final", "rate_limit_error", `All accounts rate-limited. Retry in ${retryAfterSec}s.`);
1477
+ logFinalRequest(429, "", "final", "rate_limit_error", `All accounts rate-limited. Retry in ${retryAfterSec}s.${contractMessage}`);
1423
1478
  const errorBodyText = JSON.stringify(errorBody);
1424
1479
  logProxyBody({
1425
1480
  phase: "client_response",
@@ -1442,10 +1497,22 @@ function buildClaudeAnthropicFailureResponse(args) {
1442
1497
  });
1443
1498
  }
1444
1499
  async function handleAnthropicSuccessfulResponse(args) {
1445
- const { ctx, body, account, accountState, response, tracer, requestStartTime, fetchStartMs, attemptNumber, finalBodyStr, upstreamSpan, logProxyBody, logFinalRequest, } = args;
1500
+ const { ctx, body, account, accountState, requestProfile, response, tracer, requestStartTime, fetchStartMs, attemptNumber, finalBodyStr, upstreamSpan, logProxyBody, logFinalRequest, } = args;
1446
1501
  accountState.backoffLevel = 0;
1447
1502
  accountState.coolingUntil = undefined;
1448
1503
  accountState.consecutiveRefreshFailures = 0;
1504
+ if (accountState.requestClassCooldowns) {
1505
+ delete accountState.requestClassCooldowns[`${requestProfile.primaryClass}:${requestProfile.requestedModel.toLowerCase()}`];
1506
+ }
1507
+ if (accountState.modelTierCooldowns) {
1508
+ delete accountState.modelTierCooldowns[requestProfile.modelTier];
1509
+ }
1510
+ if (accountState.requestClassBackoffLevels) {
1511
+ delete accountState.requestClassBackoffLevels[`${requestProfile.primaryClass}:${requestProfile.requestedModel.toLowerCase()}`];
1512
+ }
1513
+ if (accountState.modelTierBackoffLevels) {
1514
+ delete accountState.modelTierBackoffLevels[requestProfile.modelTier];
1515
+ }
1449
1516
  logger.always(`[proxy] ← ${response.status} account=${account.label}`);
1450
1517
  const quota = parseQuotaHeaders(response.headers);
1451
1518
  if (quota) {
@@ -2035,7 +2102,7 @@ async function handleAnthropicSuccessfulRetryResponse(args) {
2035
2102
  return retryJson;
2036
2103
  }
2037
2104
  async function handleAnthropicAuthRetry(args) {
2038
- const { ctx, body, account, accountState, headers, buildUpstreamBody, enabledAccounts, orderedAccounts, response: _response, tracer, requestStartTime, fetchStartMs, attemptNumber, finalBodyStr, upstreamSpan, logAttempt, logProxyBody, logFinalRequest, lastError, authFailureMessage, sawRateLimit, sawTransientFailure, sawNetworkError, } = args;
2105
+ const { ctx, body, account, accountState, requestProfile, headers, buildUpstreamBody, enabledAccounts, orderedAccounts, response: _response, tracer, requestStartTime, fetchStartMs, attemptNumber, finalBodyStr, upstreamSpan, logAttempt, logProxyBody, logFinalRequest, lastError, authFailureMessage, sawRateLimit, sawTransientFailure, sawNetworkError, } = args;
2039
2106
  recordAttemptError(account.label, account.type, 401);
2040
2107
  let currentLastError = lastError;
2041
2108
  let currentAuthFailureMessage = authFailureMessage;
@@ -2119,9 +2186,14 @@ async function handleAnthropicAuthRetry(args) {
2119
2186
  const cooldownMs = Number.isNaN(parsedRetryAfter)
2120
2187
  ? 60_000
2121
2188
  : Math.max(1, parsedRetryAfter) * 1000;
2122
- accountState.coolingUntil = Date.now() + cooldownMs;
2189
+ const cooldown = applyRateLimitCooldownScope({
2190
+ state: accountState,
2191
+ profile: requestProfile,
2192
+ retryAfterMs: cooldownMs,
2193
+ capMs: RATE_LIMIT_BACKOFF_CAP_MS,
2194
+ });
2123
2195
  advancePrimaryIfCurrent(account.key, enabledAccounts.length, orderedAccounts[0]?.key);
2124
- recordCooldown(account.label, account.type, accountState.coolingUntil, accountState.backoffLevel);
2196
+ recordCooldown(account.label, account.type, Date.now() + cooldown.backoffMs, accountState.backoffLevel);
2125
2197
  break;
2126
2198
  }
2127
2199
  if (retryStatus === 401 || retryStatus === 402 || retryStatus === 403) {
@@ -2364,13 +2436,14 @@ async function handleAnthropicNonOkResponse(args) {
2364
2436
  if (isTransientHttpFailure(response.status, errBody)) {
2365
2437
  recordAttemptError(account.label, account.type, response.status);
2366
2438
  currentSawTransientFailure = true;
2367
- logger.always(`[proxy] ← ${response.status} account=${account.label} (transient, rotating)`);
2439
+ logger.always(`[proxy] ← ${response.status} account=${account.label} (transient)`);
2368
2440
  currentLastError = errBody;
2369
2441
  logAttempt(response.status, "api_error", summarizeErrorMessage(errBody));
2370
2442
  tracer?.setError("transient_error", summarizeErrorMessage(errBody));
2371
2443
  tracer?.recordRetry(account.label, "transient");
2372
2444
  return {
2373
2445
  continueLoop: true,
2446
+ retrySameAccount: true,
2374
2447
  lastError: currentLastError,
2375
2448
  authFailureMessage: currentAuthFailureMessage,
2376
2449
  sawTransientFailure: currentSawTransientFailure,
@@ -2692,7 +2765,7 @@ async function prepareAnthropicAccountAttempt(args) {
2692
2765
  };
2693
2766
  }
2694
2767
  async function fetchAnthropicAccountResponse(args) {
2695
- const { url, headers, finalBodyStr, account, accountState, enabledAccounts, orderedAccounts, tracer, logAttempt, currentLastError, currentSawRateLimit, currentSawNetworkError, upstreamSpan, } = args;
2768
+ const { url, headers, finalBodyStr, account, accountState, requestProfile, enabledAccounts, orderedAccounts, tracer, logAttempt, currentLastError, currentSawRateLimit, currentSawNetworkError, upstreamSpan, } = args;
2696
2769
  let lastError = currentLastError;
2697
2770
  let sawRateLimit = currentSawRateLimit;
2698
2771
  let sawNetworkError = currentSawNetworkError;
@@ -2715,13 +2788,14 @@ async function fetchAnthropicAccountResponse(args) {
2715
2788
  const errorCode = getErrorCode(fetchErr) ?? "unknown";
2716
2789
  const errorMessage = fetchErr instanceof Error ? fetchErr.message : String(fetchErr);
2717
2790
  lastError = errorMessage;
2718
- logger.always(`[proxy] fetch error account=${account.label} code=${errorCode} (rotating): ${errorMessage}`);
2791
+ logger.always(`[proxy] fetch error account=${account.label} code=${errorCode} (retryable): ${errorMessage}`);
2719
2792
  logAttempt(502, "network_error", errorMessage);
2720
2793
  tracer?.setError("network_error", errorMessage);
2721
2794
  tracer?.recordRetry(account.label, "network_error");
2722
2795
  currentUpstreamSpan?.end();
2723
2796
  return {
2724
2797
  continueLoop: true,
2798
+ retrySameAccount: true,
2725
2799
  lastError,
2726
2800
  sawRateLimit,
2727
2801
  sawNetworkError,
@@ -2744,16 +2818,17 @@ async function fetchAnthropicAccountResponse(args) {
2744
2818
  }
2745
2819
  }
2746
2820
  }
2747
- const level = accountState.backoffLevel;
2748
- const baseCooldown = cooldownMs > 0 ? cooldownMs : RATE_LIMIT_BACKOFF_BASE_MS;
2749
- const backoffMs = Math.min(baseCooldown * 2 ** level, RATE_LIMIT_BACKOFF_CAP_MS);
2750
- accountState.coolingUntil = Date.now() + backoffMs;
2751
- accountState.backoffLevel += 1;
2821
+ const cooldown = applyRateLimitCooldownScope({
2822
+ state: accountState,
2823
+ profile: requestProfile,
2824
+ retryAfterMs: cooldownMs > 0 ? cooldownMs : undefined,
2825
+ capMs: RATE_LIMIT_BACKOFF_CAP_MS,
2826
+ });
2752
2827
  advancePrimaryIfCurrent(account.key, enabledAccounts.length, orderedAccounts[0]?.key);
2753
2828
  recordAttemptError(account.label, account.type, 429);
2754
- recordCooldown(account.label, account.type, accountState.coolingUntil, accountState.backoffLevel);
2829
+ recordCooldown(account.label, account.type, Date.now() + cooldown.backoffMs, accountState.backoffLevel);
2755
2830
  lastError = await response.text();
2756
- logger.always(`[proxy] ← 429 account=${account.label} backoff-level=${accountState.backoffLevel} cooldown=${Math.round(backoffMs / 1000)}s`);
2831
+ logger.always(`[proxy] ← 429 account=${account.label} backoff-level=${accountState.backoffLevel} cooldown=${Math.round(cooldown.backoffMs / 1000)}s request-class=${cooldown.requestClassKey} model-tier=${cooldown.modelTierKey}`);
2757
2832
  logAttempt(429, "rate_limit_error", String(lastError));
2758
2833
  tracer?.setError("rate_limit_error", String(lastError).slice(0, 500));
2759
2834
  tracer?.recordRetry(account.label, "rate_limit");
@@ -2777,6 +2852,8 @@ async function fetchAnthropicAccountResponse(args) {
2777
2852
  }
2778
2853
  async function handleAnthropicRoutedClaudeRequest(args) {
2779
2854
  const { ctx, body, modelRouter, tracer, requestStartTime, accountStrategy, buildLoggedClaudeError, logProxyBody, logFinalRequest, } = args;
2855
+ const parsedRequest = parseClaudeRequest(body);
2856
+ const requestProfile = classifyClaudeProxyRequest(body.model, parsedRequest);
2780
2857
  const loadedAccounts = await loadClaudeProxyAccounts({
2781
2858
  ctx,
2782
2859
  body,
@@ -2799,182 +2876,218 @@ async function handleAnthropicRoutedClaudeRequest(args) {
2799
2876
  attemptNumber: 0,
2800
2877
  };
2801
2878
  const acctSelectionSpan = tracer?.startAccountSelection();
2802
- for (const account of orderedAccounts) {
2803
- const accountState = getOrCreateRuntimeState(account.key);
2804
- if (accountState.coolingUntil && accountState.coolingUntil > Date.now()) {
2805
- continue;
2879
+ const accountPartition = partitionAccountsByCooldown(orderedAccounts, (account) => getOrCreateRuntimeState(account.key), requestProfile);
2880
+ for (const skippedAccount of accountPartition.skipped) {
2881
+ if (skippedAccount.cooldown.scope === "request_class" ||
2882
+ skippedAccount.cooldown.scope === "model_tier") {
2883
+ loopState.sawRateLimit = true;
2806
2884
  }
2807
- loopState.attemptNumber += 1;
2808
- if (tracer && loopState.attemptNumber === 1 && acctSelectionSpan) {
2809
- tracer.setAccountSelection({
2810
- strategy: accountStrategy,
2811
- accountsTotal: accounts.length,
2812
- accountsHealthy: enabledAccounts.length,
2813
- selectedAccount: account.label,
2814
- accountType: account.type,
2815
- });
2816
- acctSelectionSpan.end();
2817
- }
2818
- const logAttempt = createAnthropicAttemptLogger({
2819
- ctx,
2820
- body,
2821
- toolCount,
2822
- requestStart,
2823
- tracer,
2824
- account,
2825
- attemptNumber: loopState.attemptNumber,
2826
- });
2827
- const preparedAttempt = await prepareAnthropicAccountAttempt({
2828
- account,
2829
- accountState,
2830
- bodyStr,
2831
- clientHeaders,
2832
- isClaudeClientRequest,
2833
- url,
2834
- tracer,
2835
- attemptNumber: loopState.attemptNumber,
2836
- currentLastError: loopState.lastError,
2837
- currentAuthFailureMessage: loopState.authFailureMessage,
2838
- logAttempt,
2839
- logProxyBody,
2840
- });
2841
- loopState.lastError = preparedAttempt.lastError;
2842
- loopState.authFailureMessage = preparedAttempt.authFailureMessage;
2843
- if (preparedAttempt.continueLoop ||
2844
- !preparedAttempt.headers ||
2845
- !preparedAttempt.buildUpstreamBody ||
2846
- !preparedAttempt.finalBodyStr ||
2847
- preparedAttempt.fetchStartMs === undefined) {
2848
- continue;
2849
- }
2850
- const fetchResult = await fetchAnthropicAccountResponse({
2851
- url,
2852
- headers: preparedAttempt.headers,
2853
- finalBodyStr: preparedAttempt.finalBodyStr,
2854
- account,
2855
- accountState,
2856
- enabledAccounts,
2857
- orderedAccounts,
2858
- tracer,
2859
- logAttempt,
2860
- currentLastError: loopState.lastError,
2861
- currentSawRateLimit: loopState.sawRateLimit,
2862
- currentSawNetworkError: loopState.sawNetworkError,
2863
- upstreamSpan: preparedAttempt.upstreamSpan,
2864
- });
2865
- loopState.lastError = fetchResult.lastError;
2866
- loopState.sawRateLimit = fetchResult.sawRateLimit;
2867
- loopState.sawNetworkError = fetchResult.sawNetworkError;
2868
- if (fetchResult.continueLoop || !fetchResult.response) {
2869
- continue;
2870
- }
2871
- let upstreamSpan = fetchResult.upstreamSpan;
2872
- const response = fetchResult.response;
2873
- if (response.status === 401 &&
2874
- account.type === "oauth" &&
2875
- account.refreshToken) {
2876
- const authRetryResult = await handleAnthropicAuthRetry({
2885
+ loopState.lastError = `Skipped account=${skippedAccount.account.label} due to ${skippedAccount.cooldown.scope} cooldown ${skippedAccount.cooldown.key}`;
2886
+ logger.always(`[proxy] skipping account=${skippedAccount.account.label} due to ${skippedAccount.cooldown.scope} cooldown=${skippedAccount.cooldown.key} remaining=${Math.max(1, Math.ceil((skippedAccount.cooldown.until - Date.now()) / 1000))}s`);
2887
+ }
2888
+ accountLoop: for (const account of accountPartition.eligible) {
2889
+ const accountState = getOrCreateRuntimeState(account.key);
2890
+ let transientSameAccountRetries = 0;
2891
+ while (true) {
2892
+ loopState.attemptNumber += 1;
2893
+ if (tracer && loopState.attemptNumber === 1 && acctSelectionSpan) {
2894
+ tracer.setAccountSelection({
2895
+ strategy: accountStrategy,
2896
+ accountsTotal: accounts.length,
2897
+ accountsHealthy: enabledAccounts.length,
2898
+ selectedAccount: account.label,
2899
+ accountType: account.type,
2900
+ });
2901
+ acctSelectionSpan.end();
2902
+ }
2903
+ const logAttempt = createAnthropicAttemptLogger({
2877
2904
  ctx,
2878
2905
  body,
2906
+ toolCount,
2907
+ requestStart,
2908
+ tracer,
2909
+ account,
2910
+ attemptNumber: loopState.attemptNumber,
2911
+ });
2912
+ const preparedAttempt = await prepareAnthropicAccountAttempt({
2879
2913
  account,
2880
2914
  accountState,
2915
+ bodyStr,
2916
+ clientHeaders,
2917
+ isClaudeClientRequest,
2918
+ url,
2919
+ tracer,
2920
+ attemptNumber: loopState.attemptNumber,
2921
+ currentLastError: loopState.lastError,
2922
+ currentAuthFailureMessage: loopState.authFailureMessage,
2923
+ logAttempt,
2924
+ logProxyBody,
2925
+ });
2926
+ loopState.lastError = preparedAttempt.lastError;
2927
+ loopState.authFailureMessage = preparedAttempt.authFailureMessage;
2928
+ if (preparedAttempt.continueLoop ||
2929
+ !preparedAttempt.headers ||
2930
+ !preparedAttempt.buildUpstreamBody ||
2931
+ !preparedAttempt.finalBodyStr ||
2932
+ preparedAttempt.fetchStartMs === undefined) {
2933
+ continue accountLoop;
2934
+ }
2935
+ const fetchResult = await fetchAnthropicAccountResponse({
2936
+ url,
2881
2937
  headers: preparedAttempt.headers,
2882
- buildUpstreamBody: preparedAttempt.buildUpstreamBody,
2938
+ finalBodyStr: preparedAttempt.finalBodyStr,
2939
+ account,
2940
+ accountState,
2941
+ requestProfile,
2883
2942
  enabledAccounts,
2884
2943
  orderedAccounts,
2885
- response,
2886
2944
  tracer,
2887
- requestStartTime,
2888
- fetchStartMs: preparedAttempt.fetchStartMs,
2889
- attemptNumber: loopState.attemptNumber,
2890
- finalBodyStr: preparedAttempt.finalBodyStr,
2891
- upstreamSpan,
2892
2945
  logAttempt,
2893
- logProxyBody,
2894
- logFinalRequest,
2895
- lastError: loopState.lastError,
2896
- authFailureMessage: loopState.authFailureMessage,
2897
- sawRateLimit: loopState.sawRateLimit,
2898
- sawTransientFailure: loopState.sawTransientFailure,
2899
- sawNetworkError: loopState.sawNetworkError,
2946
+ currentLastError: loopState.lastError,
2947
+ currentSawRateLimit: loopState.sawRateLimit,
2948
+ currentSawNetworkError: loopState.sawNetworkError,
2949
+ upstreamSpan: preparedAttempt.upstreamSpan,
2900
2950
  });
2901
- loopState.lastError = authRetryResult.lastError;
2902
- loopState.authFailureMessage = authRetryResult.authFailureMessage;
2903
- loopState.sawRateLimit = authRetryResult.sawRateLimit;
2904
- loopState.sawTransientFailure = authRetryResult.sawTransientFailure;
2905
- loopState.sawNetworkError = authRetryResult.sawNetworkError;
2906
- upstreamSpan = authRetryResult.upstreamSpan;
2907
- if (authRetryResult.response !== undefined) {
2908
- return authRetryResult.response;
2951
+ loopState.lastError = fetchResult.lastError;
2952
+ loopState.sawRateLimit = fetchResult.sawRateLimit;
2953
+ loopState.sawNetworkError = fetchResult.sawNetworkError;
2954
+ if (fetchResult.continueLoop || !fetchResult.response) {
2955
+ if (fetchResult.retrySameAccount &&
2956
+ transientSameAccountRetries < MAX_TRANSIENT_SAME_ACCOUNT_RETRIES) {
2957
+ transientSameAccountRetries += 1;
2958
+ const delayMs = getTransientSameAccountRetryDelayMs(transientSameAccountRetries);
2959
+ logger.always(`[proxy] retrying same account=${account.label} after transient network error (${transientSameAccountRetries}/${MAX_TRANSIENT_SAME_ACCOUNT_RETRIES}) in ${delayMs}ms`);
2960
+ await sleep(delayMs);
2961
+ continue;
2962
+ }
2963
+ if (fetchResult.retrySameAccount) {
2964
+ logger.always(`[proxy] exhausted transient same-account retries for account=${account.label}; rotating`);
2965
+ }
2966
+ continue accountLoop;
2909
2967
  }
2910
- if (authRetryResult.continueLoop) {
2911
- continue;
2968
+ let upstreamSpan = fetchResult.upstreamSpan;
2969
+ const response = fetchResult.response;
2970
+ if (response.status === 401 &&
2971
+ account.type === "oauth" &&
2972
+ account.refreshToken) {
2973
+ const authRetryResult = await handleAnthropicAuthRetry({
2974
+ ctx,
2975
+ body,
2976
+ account,
2977
+ accountState,
2978
+ requestProfile,
2979
+ headers: preparedAttempt.headers,
2980
+ buildUpstreamBody: preparedAttempt.buildUpstreamBody,
2981
+ enabledAccounts,
2982
+ orderedAccounts,
2983
+ response,
2984
+ tracer,
2985
+ requestStartTime,
2986
+ fetchStartMs: preparedAttempt.fetchStartMs,
2987
+ attemptNumber: loopState.attemptNumber,
2988
+ finalBodyStr: preparedAttempt.finalBodyStr,
2989
+ upstreamSpan,
2990
+ logAttempt,
2991
+ logProxyBody,
2992
+ logFinalRequest,
2993
+ lastError: loopState.lastError,
2994
+ authFailureMessage: loopState.authFailureMessage,
2995
+ sawRateLimit: loopState.sawRateLimit,
2996
+ sawTransientFailure: loopState.sawTransientFailure,
2997
+ sawNetworkError: loopState.sawNetworkError,
2998
+ });
2999
+ loopState.lastError = authRetryResult.lastError;
3000
+ loopState.authFailureMessage = authRetryResult.authFailureMessage;
3001
+ loopState.sawRateLimit = authRetryResult.sawRateLimit;
3002
+ loopState.sawTransientFailure = authRetryResult.sawTransientFailure;
3003
+ loopState.sawNetworkError = authRetryResult.sawNetworkError;
3004
+ upstreamSpan = authRetryResult.upstreamSpan;
3005
+ if (authRetryResult.response !== undefined) {
3006
+ return authRetryResult.response;
3007
+ }
3008
+ if (authRetryResult.continueLoop) {
3009
+ continue accountLoop;
3010
+ }
2912
3011
  }
2913
- }
2914
- if (!response.ok) {
2915
- const nonOkResult = await handleAnthropicNonOkResponse({
2916
- response,
3012
+ if (!response.ok) {
3013
+ const nonOkResult = await handleAnthropicNonOkResponse({
3014
+ response,
3015
+ account,
3016
+ accountState,
3017
+ tracer,
3018
+ requestStartTime,
3019
+ fetchStartMs: preparedAttempt.fetchStartMs,
3020
+ attemptNumber: loopState.attemptNumber,
3021
+ logAttempt,
3022
+ logProxyBody,
3023
+ logFinalRequest,
3024
+ lastError: loopState.lastError,
3025
+ authFailureMessage: loopState.authFailureMessage,
3026
+ sawTransientFailure: loopState.sawTransientFailure,
3027
+ invalidRequestFailure: loopState.invalidRequestFailure,
3028
+ maxConsecutiveRefreshFailures: MAX_CONSECUTIVE_REFRESH_FAILURES,
3029
+ });
3030
+ loopState.lastError = nonOkResult.lastError;
3031
+ loopState.authFailureMessage = nonOkResult.authFailureMessage;
3032
+ loopState.sawTransientFailure = nonOkResult.sawTransientFailure;
3033
+ loopState.invalidRequestFailure = nonOkResult.invalidRequestFailure;
3034
+ if (nonOkResult.response !== undefined) {
3035
+ return nonOkResult.response;
3036
+ }
3037
+ if (nonOkResult.continueLoop) {
3038
+ if (nonOkResult.retrySameAccount &&
3039
+ transientSameAccountRetries < MAX_TRANSIENT_SAME_ACCOUNT_RETRIES) {
3040
+ transientSameAccountRetries += 1;
3041
+ const delayMs = getTransientSameAccountRetryDelayMs(transientSameAccountRetries);
3042
+ logger.always(`[proxy] retrying same account=${account.label} after transient upstream ${response.status} (${transientSameAccountRetries}/${MAX_TRANSIENT_SAME_ACCOUNT_RETRIES}) in ${delayMs}ms`);
3043
+ await sleep(delayMs);
3044
+ continue;
3045
+ }
3046
+ if (nonOkResult.retrySameAccount) {
3047
+ logger.always(`[proxy] exhausted transient same-account retries for account=${account.label}; rotating`);
3048
+ }
3049
+ continue accountLoop;
3050
+ }
3051
+ break accountLoop;
3052
+ }
3053
+ const successResult = await handleAnthropicSuccessfulResponse({
3054
+ ctx,
3055
+ body,
2917
3056
  account,
2918
3057
  accountState,
3058
+ requestProfile,
3059
+ response,
2919
3060
  tracer,
2920
3061
  requestStartTime,
2921
3062
  fetchStartMs: preparedAttempt.fetchStartMs,
2922
3063
  attemptNumber: loopState.attemptNumber,
2923
- logAttempt,
3064
+ finalBodyStr: preparedAttempt.finalBodyStr,
3065
+ upstreamSpan,
2924
3066
  logProxyBody,
2925
3067
  logFinalRequest,
2926
- lastError: loopState.lastError,
2927
- authFailureMessage: loopState.authFailureMessage,
2928
- sawTransientFailure: loopState.sawTransientFailure,
2929
- invalidRequestFailure: loopState.invalidRequestFailure,
2930
- maxConsecutiveRefreshFailures: MAX_CONSECUTIVE_REFRESH_FAILURES,
2931
3068
  });
2932
- loopState.lastError = nonOkResult.lastError;
2933
- loopState.authFailureMessage = nonOkResult.authFailureMessage;
2934
- loopState.sawTransientFailure = nonOkResult.sawTransientFailure;
2935
- loopState.invalidRequestFailure = nonOkResult.invalidRequestFailure;
2936
- if (nonOkResult.response !== undefined) {
2937
- return nonOkResult.response;
2938
- }
2939
- if (nonOkResult.continueLoop) {
2940
- continue;
3069
+ if ("retryNextAccount" in successResult) {
3070
+ continue accountLoop;
2941
3071
  }
2942
- break;
2943
- }
2944
- const successResult = await handleAnthropicSuccessfulResponse({
2945
- ctx,
2946
- body,
2947
- account,
2948
- accountState,
2949
- response,
2950
- tracer,
2951
- requestStartTime,
2952
- fetchStartMs: preparedAttempt.fetchStartMs,
2953
- attemptNumber: loopState.attemptNumber,
2954
- finalBodyStr: preparedAttempt.finalBodyStr,
2955
- upstreamSpan,
2956
- logProxyBody,
2957
- logFinalRequest,
2958
- });
2959
- if ("retryNextAccount" in successResult) {
2960
- continue;
3072
+ return successResult.response;
2961
3073
  }
2962
- return successResult.response;
2963
3074
  }
2964
3075
  if (loopState.attemptNumber === 0) {
2965
3076
  acctSelectionSpan?.end();
2966
3077
  }
2967
- const configuredFallbackResponse = await tryConfiguredClaudeFallbackChain({
3078
+ const configuredFallbackResult = await tryConfiguredClaudeFallbackChain({
2968
3079
  ctx,
2969
3080
  body,
3081
+ parsedFallbackRequest: parsedRequest,
3082
+ requestProfile,
2970
3083
  modelRouter,
2971
3084
  tracer,
2972
3085
  requestStartTime,
2973
3086
  logProxyBody,
2974
3087
  logFinalRequest,
2975
3088
  });
2976
- if (configuredFallbackResponse) {
2977
- return configuredFallbackResponse;
3089
+ if (configuredFallbackResult.response) {
3090
+ return configuredFallbackResult.response;
2978
3091
  }
2979
3092
  const configuredChain = modelRouter?.getFallbackChain() ?? [];
2980
3093
  if (configuredChain.length === 0 && !loopState.sawRateLimit) {
@@ -3000,6 +3113,8 @@ async function handleAnthropicRoutedClaudeRequest(args) {
3000
3113
  sawRateLimit: loopState.sawRateLimit,
3001
3114
  lastError: loopState.lastError,
3002
3115
  orderedAccounts,
3116
+ requestProfile,
3117
+ fallbackPolicyReason: configuredFallbackResult.fallbackPolicyReason,
3003
3118
  buildLoggedClaudeError,
3004
3119
  logProxyBody,
3005
3120
  logFinalRequest,
@@ -3220,6 +3335,8 @@ function getOrCreateRuntimeState(accountKey) {
3220
3335
  backoffLevel: 0,
3221
3336
  consecutiveRefreshFailures: 0,
3222
3337
  permanentlyDisabled: false,
3338
+ requestClassCooldowns: {},
3339
+ modelTierCooldowns: {},
3223
3340
  };
3224
3341
  accountRuntimeState.set(accountKey, initial);
3225
3342
  return initial;
@@ -3249,6 +3366,10 @@ function summarizeErrorMessage(message, maxLength = 180) {
3249
3366
  }
3250
3367
  return `${compact.slice(0, maxLength)}...`;
3251
3368
  }
3369
+ export function getTransientSameAccountRetryDelayMs(retryNumber) {
3370
+ const index = Math.min(Math.max(retryNumber - 1, 0), TRANSIENT_SAME_ACCOUNT_RETRY_DELAYS_MS.length - 1);
3371
+ return TRANSIENT_SAME_ACCOUNT_RETRY_DELAYS_MS[index] ?? 0;
3372
+ }
3252
3373
  async function sleep(ms) {
3253
3374
  await new Promise((resolve) => setTimeout(resolve, ms));
3254
3375
  }
@@ -3409,34 +3530,6 @@ export function buildProxyFallbackOptions(parsed, overrides = {}) {
3409
3530
  maxSteps: 1,
3410
3531
  };
3411
3532
  }
3412
- export function buildProxyTranslationAttempts(primary, modelRouter, parsed) {
3413
- const attempts = [
3414
- {
3415
- provider: primary.provider,
3416
- model: primary.model,
3417
- label: `${primary.provider}/${primary.model ?? "unknown"}`,
3418
- },
3419
- ];
3420
- const chain = modelRouter?.getFallbackChain() ?? [];
3421
- for (const fallback of chain) {
3422
- if (fallback.provider === primary.provider &&
3423
- fallback.model === primary.model) {
3424
- continue;
3425
- }
3426
- if (shouldSkipTranslationTarget(fallback.provider, fallback.model, parsed)) {
3427
- continue;
3428
- }
3429
- attempts.push({
3430
- provider: fallback.provider,
3431
- model: fallback.model,
3432
- label: `${fallback.provider}/${fallback.model}`,
3433
- });
3434
- }
3435
- if (chain.length === 0) {
3436
- attempts.push({ label: "auto-provider" });
3437
- }
3438
- return attempts;
3439
- }
3440
3533
  function hasTranslatedOutput(collectedText, toolCalls) {
3441
3534
  return collectedText.trim().length > 0 || (toolCalls?.length ?? 0) > 0;
3442
3535
  }
@@ -3450,14 +3543,6 @@ function shouldOmitImagesForTarget(provider, model) {
3450
3543
  function shouldOmitThinkingConfigForTarget(provider, model) {
3451
3544
  return provider === "vertex" && model === "gemini-2.5-flash";
3452
3545
  }
3453
- function shouldSkipTranslationTarget(provider, model, parsed) {
3454
- if (provider === "ollama" &&
3455
- model === "qwen2.5:0.5b" &&
3456
- (parsed?.images.length ?? 0) > 0) {
3457
- return true;
3458
- }
3459
- return false;
3460
- }
3461
3546
  function extractToolArgs(toolCall) {
3462
3547
  return (toolCall.args ??
3463
3548
  toolCall.parameters ??