@juspay/neurolink 9.42.1 → 9.44.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +12 -0
- package/dist/browser/neurolink.min.js +300 -300
- package/dist/cli/commands/mcp.js +15 -3
- package/dist/cli/commands/proxy.js +29 -6
- package/dist/core/baseProvider.js +12 -3
- package/dist/core/factory.js +4 -4
- package/dist/core/modules/ToolsManager.d.ts +1 -0
- package/dist/core/modules/ToolsManager.js +40 -42
- package/dist/core/toolEvents.d.ts +3 -0
- package/dist/core/toolEvents.js +7 -0
- package/dist/evaluation/scorers/scorerRegistry.js +3 -2
- package/dist/lib/core/baseProvider.js +12 -3
- package/dist/lib/core/factory.js +4 -4
- package/dist/lib/core/modules/ToolsManager.d.ts +1 -0
- package/dist/lib/core/modules/ToolsManager.js +40 -42
- package/dist/lib/core/toolEvents.d.ts +3 -0
- package/dist/lib/core/toolEvents.js +8 -0
- package/dist/lib/evaluation/scorers/scorerRegistry.js +3 -2
- package/dist/lib/neurolink.js +33 -19
- package/dist/lib/providers/googleNativeGemini3.d.ts +4 -0
- package/dist/lib/providers/googleNativeGemini3.js +39 -1
- package/dist/lib/providers/googleVertex.js +10 -2
- package/dist/lib/proxy/claudeFormat.js +2 -1
- package/dist/lib/proxy/proxyHealth.d.ts +17 -0
- package/dist/lib/proxy/proxyHealth.js +55 -0
- package/dist/lib/proxy/requestLogger.js +8 -3
- package/dist/lib/proxy/routingPolicy.d.ts +33 -0
- package/dist/lib/proxy/routingPolicy.js +255 -0
- package/dist/lib/proxy/snapshotPersistence.d.ts +2 -0
- package/dist/lib/proxy/snapshotPersistence.js +41 -0
- package/dist/lib/server/routes/claudeProxyRoutes.d.ts +1 -9
- package/dist/lib/server/routes/claudeProxyRoutes.js +304 -219
- package/dist/lib/tasks/store/redisTaskStore.js +34 -16
- package/dist/lib/types/cli.d.ts +4 -0
- package/dist/lib/types/proxyTypes.d.ts +87 -0
- package/dist/lib/types/tools.d.ts +18 -0
- package/dist/lib/utils/schemaConversion.d.ts +1 -0
- package/dist/lib/utils/schemaConversion.js +3 -0
- package/dist/neurolink.js +33 -19
- package/dist/providers/googleNativeGemini3.d.ts +4 -0
- package/dist/providers/googleNativeGemini3.js +39 -1
- package/dist/providers/googleVertex.js +10 -2
- package/dist/proxy/claudeFormat.js +2 -1
- package/dist/proxy/proxyHealth.d.ts +17 -0
- package/dist/proxy/proxyHealth.js +54 -0
- package/dist/proxy/requestLogger.js +8 -3
- package/dist/proxy/routingPolicy.d.ts +33 -0
- package/dist/proxy/routingPolicy.js +254 -0
- package/dist/proxy/snapshotPersistence.d.ts +2 -0
- package/dist/proxy/snapshotPersistence.js +40 -0
- package/dist/server/routes/claudeProxyRoutes.d.ts +1 -9
- package/dist/server/routes/claudeProxyRoutes.js +304 -219
- package/dist/tasks/store/redisTaskStore.js +34 -16
- package/dist/types/cli.d.ts +4 -0
- package/dist/types/proxyTypes.d.ts +87 -0
- package/dist/types/tools.d.ts +18 -0
- package/dist/utils/schemaConversion.d.ts +1 -0
- package/dist/utils/schemaConversion.js +3 -0
- package/package.json +1 -1
|
@@ -9,8 +9,7 @@
|
|
|
9
9
|
* provider/model pairs (e.g. "claude-sonnet-4-20250514" -> vertex/gemini-2.5-pro).
|
|
10
10
|
* Without a router, models are passed through to the Anthropic provider.
|
|
11
11
|
*/
|
|
12
|
-
import {
|
|
13
|
-
import { access, mkdir, readFile, rename, writeFile } from "node:fs/promises";
|
|
12
|
+
import { access, readFile } from "node:fs/promises";
|
|
14
13
|
import { homedir } from "node:os";
|
|
15
14
|
import { join } from "node:path";
|
|
16
15
|
import { buildStableClaudeCodeBillingHeader, CLAUDE_CLI_USER_AGENT, CLAUDE_CODE_OAUTH_BETAS, getOrCreateClaudeCodeIdentity, parseClaudeCodeUserId, } from "../../auth/anthropicOAuth.js";
|
|
@@ -21,6 +20,8 @@ import { createRawStreamCapture } from "../../proxy/rawStreamCapture.js";
|
|
|
21
20
|
import { logBodyCapture, logRequest, logRequestAttempt, logStreamError, } from "../../proxy/requestLogger.js";
|
|
22
21
|
import { createSSEInterceptor } from "../../proxy/sseInterceptor.js";
|
|
23
22
|
import { needsRefresh, persistTokens, refreshToken, } from "../../proxy/tokenRefresh.js";
|
|
23
|
+
import { applyRateLimitCooldownScope, buildProxyTranslationPlan, classifyClaudeProxyRequest, getActiveCooldownScope, partitionAccountsByCooldown, summarizeSkippedFallbacks, } from "../../proxy/routingPolicy.js";
|
|
24
|
+
import { writeJsonSnapshotAtomically } from "../../proxy/snapshotPersistence.js";
|
|
24
25
|
import { recordAttempt, recordAttemptError, recordCooldown, recordFinalError, recordFinalSuccess, } from "../../proxy/usageStats.js";
|
|
25
26
|
import { logger } from "../../utils/logger.js";
|
|
26
27
|
import { ProviderHealthChecker } from "../../utils/providerHealth.js";
|
|
@@ -46,9 +47,10 @@ let primaryAccountIndex = 0;
|
|
|
46
47
|
let lastKnownAccountCount = 0;
|
|
47
48
|
const MAX_AUTH_RETRIES = 5;
|
|
48
49
|
const MAX_CONSECUTIVE_REFRESH_FAILURES = 15;
|
|
50
|
+
const MAX_TRANSIENT_SAME_ACCOUNT_RETRIES = 2;
|
|
51
|
+
const TRANSIENT_SAME_ACCOUNT_RETRY_DELAYS_MS = [250, 1_000];
|
|
49
52
|
/** Decision 8: Cooldowns only for 401 and 429. */
|
|
50
53
|
const AUTH_COOLDOWN_MS = 5 * 60 * 1000; // 5 minutes for 401
|
|
51
|
-
const RATE_LIMIT_BACKOFF_BASE_MS = 1000; // 1 second base for 429
|
|
52
54
|
const RATE_LIMIT_BACKOFF_CAP_MS = 10 * 60 * 1000; // 10 minute cap for 429
|
|
53
55
|
/** Timeout for upstream requests to Anthropic. Must be generous enough
|
|
54
56
|
* to cover the full lifecycle of streaming responses, including extended
|
|
@@ -220,11 +222,7 @@ function snapshotsMatch(existing, next) {
|
|
|
220
222
|
}
|
|
221
223
|
async function persistClaudeSnapshot(accountLabel, snapshot) {
|
|
222
224
|
const snapshotPath = getSnapshotPath(accountLabel);
|
|
223
|
-
|
|
224
|
-
await mkdir(dirPath, { recursive: true });
|
|
225
|
-
const tmpPath = `${snapshotPath}.${process.pid}.${randomUUID()}.tmp`;
|
|
226
|
-
await writeFile(tmpPath, JSON.stringify(snapshot, null, 2), { mode: 0o600 });
|
|
227
|
-
await rename(tmpPath, snapshotPath);
|
|
225
|
+
await writeJsonSnapshotAtomically(snapshotPath, snapshot, 0o600);
|
|
228
226
|
snapshotCache.set(getSnapshotSafeLabel(accountLabel), {
|
|
229
227
|
snapshot,
|
|
230
228
|
loadedAt: Date.now(),
|
|
@@ -259,7 +257,19 @@ async function maybeRefreshClaudeSnapshot(accountLabel, accountKey, headers, bod
|
|
|
259
257
|
if (snapshotsMatch(existing, next)) {
|
|
260
258
|
return existing;
|
|
261
259
|
}
|
|
262
|
-
|
|
260
|
+
try {
|
|
261
|
+
await persistClaudeSnapshot(accountLabel, next);
|
|
262
|
+
}
|
|
263
|
+
catch (error) {
|
|
264
|
+
logger.warn("[proxy] failed to persist Claude snapshot", {
|
|
265
|
+
accountLabel,
|
|
266
|
+
error: error instanceof Error ? error.message : String(error),
|
|
267
|
+
});
|
|
268
|
+
snapshotCache.set(getSnapshotSafeLabel(accountLabel), {
|
|
269
|
+
snapshot: next,
|
|
270
|
+
loadedAt: Date.now(),
|
|
271
|
+
});
|
|
272
|
+
}
|
|
263
273
|
return next;
|
|
264
274
|
}
|
|
265
275
|
/**
|
|
@@ -398,10 +408,12 @@ async function handleTranslatedClaudeRequest(args) {
|
|
|
398
408
|
const { ctx, body, route, modelRouter, tracer, requestStartTime, logProxyBody, } = args;
|
|
399
409
|
tracer?.setMode("full");
|
|
400
410
|
const parsed = parseClaudeRequest(body);
|
|
401
|
-
const
|
|
411
|
+
const plan = buildProxyTranslationPlan({
|
|
402
412
|
provider: route.provider,
|
|
403
413
|
model: route.model,
|
|
404
|
-
}, modelRouter, parsed);
|
|
414
|
+
}, modelRouter?.getFallbackChain() ?? [], body.model, parsed);
|
|
415
|
+
logProxyRoutingPlan(logProxyBody, "translated_request", plan);
|
|
416
|
+
const attempts = plan.attempts;
|
|
405
417
|
if (body.stream) {
|
|
406
418
|
return handleTranslatedClaudeStreamRequest({
|
|
407
419
|
ctx,
|
|
@@ -422,6 +434,18 @@ async function handleTranslatedClaudeRequest(args) {
|
|
|
422
434
|
logProxyBody,
|
|
423
435
|
});
|
|
424
436
|
}
|
|
437
|
+
function logProxyRoutingPlan(logProxyBody, stage, plan) {
|
|
438
|
+
logProxyBody({
|
|
439
|
+
phase: "routing_decision",
|
|
440
|
+
contentType: "application/json",
|
|
441
|
+
body: {
|
|
442
|
+
stage,
|
|
443
|
+
requestProfile: plan.profile,
|
|
444
|
+
attempts: plan.attempts,
|
|
445
|
+
skipped: plan.skipped,
|
|
446
|
+
},
|
|
447
|
+
});
|
|
448
|
+
}
|
|
425
449
|
async function handleTranslatedClaudeStreamRequest(args) {
|
|
426
450
|
const { ctx, body, attempts, parsed, tracer, requestStartTime } = args;
|
|
427
451
|
const serializer = new ClaudeStreamSerializer(body.model, 0);
|
|
@@ -1302,18 +1326,33 @@ async function executeClaudeFallbackTranslation(args) {
|
|
|
1302
1326
|
return clientResponse;
|
|
1303
1327
|
}
|
|
1304
1328
|
async function tryConfiguredClaudeFallbackChain(args) {
|
|
1305
|
-
const { ctx, body, modelRouter, tracer, requestStartTime, logProxyBody, logFinalRequest, } = args;
|
|
1306
|
-
const parsedFallbackRequest = parseClaudeRequest(body);
|
|
1329
|
+
const { ctx, body, parsedFallbackRequest, requestProfile, modelRouter, tracer, requestStartTime, logProxyBody, logFinalRequest, } = args;
|
|
1307
1330
|
const chain = modelRouter?.getFallbackChain() ?? [];
|
|
1308
|
-
|
|
1309
|
-
|
|
1310
|
-
|
|
1331
|
+
const fallbackPlan = buildProxyTranslationPlan({ provider: "anthropic", model: body.model }, chain, body.model, parsedFallbackRequest);
|
|
1332
|
+
const fallbackPolicyReason = summarizeSkippedFallbacks(fallbackPlan);
|
|
1333
|
+
logProxyBody({
|
|
1334
|
+
phase: "routing_decision",
|
|
1335
|
+
contentType: "application/json",
|
|
1336
|
+
body: {
|
|
1337
|
+
stage: "anthropic_fallback",
|
|
1338
|
+
requestProfile,
|
|
1339
|
+
attempts: fallbackPlan.attempts.slice(1),
|
|
1340
|
+
skipped: fallbackPlan.skipped,
|
|
1341
|
+
},
|
|
1342
|
+
});
|
|
1343
|
+
for (const skipped of fallbackPlan.skipped) {
|
|
1344
|
+
const label = skipped.provider
|
|
1345
|
+
? `${skipped.provider}/${skipped.model ?? "unknown"}`
|
|
1346
|
+
: "auto-provider";
|
|
1347
|
+
logger.always(`[proxy] skipping fallback ${label}: ${skipped.reason}`);
|
|
1348
|
+
}
|
|
1349
|
+
for (const fallback of fallbackPlan.attempts.slice(1)) {
|
|
1350
|
+
if (!fallback.provider || !fallback.model) {
|
|
1311
1351
|
continue;
|
|
1312
1352
|
}
|
|
1313
1353
|
const availability = await ProviderHealthChecker.checkFallbackProviderAvailability(fallback.provider, fallback.model);
|
|
1314
1354
|
if (!availability.available) {
|
|
1315
|
-
logger.
|
|
1316
|
-
continue;
|
|
1355
|
+
logger.always(`[proxy] fallback ${fallback.provider}/${fallback.model} health-check failed (${availability.reason ?? "provider unavailable"}), attempting anyway`);
|
|
1317
1356
|
}
|
|
1318
1357
|
try {
|
|
1319
1358
|
logger.always(`[proxy] fallback → ${fallback.provider}/${fallback.model}`);
|
|
@@ -1321,7 +1360,7 @@ async function tryConfiguredClaudeFallbackChain(args) {
|
|
|
1321
1360
|
provider: fallback.provider,
|
|
1322
1361
|
model: fallback.model,
|
|
1323
1362
|
});
|
|
1324
|
-
|
|
1363
|
+
const response = await executeClaudeFallbackTranslation({
|
|
1325
1364
|
ctx,
|
|
1326
1365
|
body,
|
|
1327
1366
|
tracer,
|
|
@@ -1331,18 +1370,31 @@ async function tryConfiguredClaudeFallbackChain(args) {
|
|
|
1331
1370
|
options: options,
|
|
1332
1371
|
providerLabel: fallback.provider,
|
|
1333
1372
|
});
|
|
1373
|
+
return {
|
|
1374
|
+
response,
|
|
1375
|
+
fallbackPolicyReason,
|
|
1376
|
+
};
|
|
1334
1377
|
}
|
|
1335
1378
|
catch (fallbackErr) {
|
|
1336
|
-
logger.
|
|
1379
|
+
logger.always(`[proxy] fallback ${fallback.provider}/${fallback.model} failed: ${fallbackErr instanceof Error ? fallbackErr.message : String(fallbackErr)}`);
|
|
1337
1380
|
}
|
|
1338
1381
|
}
|
|
1339
|
-
return
|
|
1382
|
+
return {
|
|
1383
|
+
response: null,
|
|
1384
|
+
fallbackPolicyReason,
|
|
1385
|
+
};
|
|
1340
1386
|
}
|
|
1341
1387
|
async function tryAutoClaudeFallback(args) {
|
|
1342
1388
|
const { ctx, body, tracer, requestStartTime, logProxyBody, logFinalRequest } = args;
|
|
1343
1389
|
try {
|
|
1344
|
-
logger.always("[proxy] fallback → auto-provider");
|
|
1345
1390
|
const parsed = parseClaudeRequest(body);
|
|
1391
|
+
const plan = buildProxyTranslationPlan({ provider: "anthropic", model: body.model }, [], body.model, parsed);
|
|
1392
|
+
logProxyRoutingPlan(logProxyBody, "auto_fallback", plan);
|
|
1393
|
+
const autoAttempt = plan.attempts.find((attempt) => attempt.label === "auto-provider");
|
|
1394
|
+
if (!autoAttempt) {
|
|
1395
|
+
return null;
|
|
1396
|
+
}
|
|
1397
|
+
logger.always("[proxy] fallback → auto-provider");
|
|
1346
1398
|
const options = buildProxyFallbackOptions(parsed);
|
|
1347
1399
|
return await executeClaudeFallbackTranslation({
|
|
1348
1400
|
ctx,
|
|
@@ -1361,7 +1413,7 @@ async function tryAutoClaudeFallback(args) {
|
|
|
1361
1413
|
}
|
|
1362
1414
|
}
|
|
1363
1415
|
function buildClaudeAnthropicFailureResponse(args) {
|
|
1364
|
-
const { tracer, requestStartTime, authFailureMessage, invalidRequestFailure, sawNetworkError, sawTransientFailure, sawRateLimit, lastError, orderedAccounts, buildLoggedClaudeError, logProxyBody, logFinalRequest, } = args;
|
|
1416
|
+
const { tracer, requestStartTime, authFailureMessage, invalidRequestFailure, sawNetworkError, sawTransientFailure, sawRateLimit, lastError, orderedAccounts, requestProfile, fallbackPolicyReason, buildLoggedClaudeError, logProxyBody, logFinalRequest, } = args;
|
|
1365
1417
|
if (authFailureMessage && !sawRateLimit) {
|
|
1366
1418
|
tracer?.setError("authentication_error", authFailureMessage);
|
|
1367
1419
|
tracer?.end(401, Date.now() - requestStartTime);
|
|
@@ -1408,18 +1460,21 @@ function buildClaudeAnthropicFailureResponse(args) {
|
|
|
1408
1460
|
return buildLoggedClaudeError(502, msg);
|
|
1409
1461
|
}
|
|
1410
1462
|
const earliestRecovery = orderedAccounts.reduce((min, account) => {
|
|
1411
|
-
const
|
|
1412
|
-
return
|
|
1463
|
+
const cooldown = getActiveCooldownScope(getOrCreateRuntimeState(account.key), requestProfile);
|
|
1464
|
+
return cooldown ? Math.min(min, cooldown.until) : min;
|
|
1413
1465
|
}, Infinity);
|
|
1414
1466
|
const retryAfterSec = Number.isFinite(earliestRecovery)
|
|
1415
1467
|
? Math.max(1, Math.ceil((earliestRecovery - Date.now()) / 1000))
|
|
1416
1468
|
: 60;
|
|
1417
|
-
|
|
1418
|
-
|
|
1419
|
-
|
|
1469
|
+
const contractMessage = fallbackPolicyReason
|
|
1470
|
+
? ` ${fallbackPolicyReason}`
|
|
1471
|
+
: "";
|
|
1472
|
+
logger.always(`[proxy] all accounts rate-limited for request-class=${requestProfile.primaryClass}, retry in ${retryAfterSec}s`);
|
|
1473
|
+
const errorBody = buildClaudeError(429, `All accounts rate-limited. Earliest recovery in ${retryAfterSec}s.${contractMessage}`, "overloaded_error");
|
|
1474
|
+
tracer?.setError("rate_limit_error", `All accounts rate-limited. Retry in ${retryAfterSec}s.${contractMessage}`);
|
|
1420
1475
|
tracer?.end(429, Date.now() - requestStartTime);
|
|
1421
1476
|
recordFinalError(429);
|
|
1422
|
-
logFinalRequest(429, "", "final", "rate_limit_error", `All accounts rate-limited. Retry in ${retryAfterSec}s
|
|
1477
|
+
logFinalRequest(429, "", "final", "rate_limit_error", `All accounts rate-limited. Retry in ${retryAfterSec}s.${contractMessage}`);
|
|
1423
1478
|
const errorBodyText = JSON.stringify(errorBody);
|
|
1424
1479
|
logProxyBody({
|
|
1425
1480
|
phase: "client_response",
|
|
@@ -1442,10 +1497,22 @@ function buildClaudeAnthropicFailureResponse(args) {
|
|
|
1442
1497
|
});
|
|
1443
1498
|
}
|
|
1444
1499
|
async function handleAnthropicSuccessfulResponse(args) {
|
|
1445
|
-
const { ctx, body, account, accountState, response, tracer, requestStartTime, fetchStartMs, attemptNumber, finalBodyStr, upstreamSpan, logProxyBody, logFinalRequest, } = args;
|
|
1500
|
+
const { ctx, body, account, accountState, requestProfile, response, tracer, requestStartTime, fetchStartMs, attemptNumber, finalBodyStr, upstreamSpan, logProxyBody, logFinalRequest, } = args;
|
|
1446
1501
|
accountState.backoffLevel = 0;
|
|
1447
1502
|
accountState.coolingUntil = undefined;
|
|
1448
1503
|
accountState.consecutiveRefreshFailures = 0;
|
|
1504
|
+
if (accountState.requestClassCooldowns) {
|
|
1505
|
+
delete accountState.requestClassCooldowns[`${requestProfile.primaryClass}:${requestProfile.requestedModel.toLowerCase()}`];
|
|
1506
|
+
}
|
|
1507
|
+
if (accountState.modelTierCooldowns) {
|
|
1508
|
+
delete accountState.modelTierCooldowns[requestProfile.modelTier];
|
|
1509
|
+
}
|
|
1510
|
+
if (accountState.requestClassBackoffLevels) {
|
|
1511
|
+
delete accountState.requestClassBackoffLevels[`${requestProfile.primaryClass}:${requestProfile.requestedModel.toLowerCase()}`];
|
|
1512
|
+
}
|
|
1513
|
+
if (accountState.modelTierBackoffLevels) {
|
|
1514
|
+
delete accountState.modelTierBackoffLevels[requestProfile.modelTier];
|
|
1515
|
+
}
|
|
1449
1516
|
logger.always(`[proxy] ← ${response.status} account=${account.label}`);
|
|
1450
1517
|
const quota = parseQuotaHeaders(response.headers);
|
|
1451
1518
|
if (quota) {
|
|
@@ -2035,7 +2102,7 @@ async function handleAnthropicSuccessfulRetryResponse(args) {
|
|
|
2035
2102
|
return retryJson;
|
|
2036
2103
|
}
|
|
2037
2104
|
async function handleAnthropicAuthRetry(args) {
|
|
2038
|
-
const { ctx, body, account, accountState, headers, buildUpstreamBody, enabledAccounts, orderedAccounts, response: _response, tracer, requestStartTime, fetchStartMs, attemptNumber, finalBodyStr, upstreamSpan, logAttempt, logProxyBody, logFinalRequest, lastError, authFailureMessage, sawRateLimit, sawTransientFailure, sawNetworkError, } = args;
|
|
2105
|
+
const { ctx, body, account, accountState, requestProfile, headers, buildUpstreamBody, enabledAccounts, orderedAccounts, response: _response, tracer, requestStartTime, fetchStartMs, attemptNumber, finalBodyStr, upstreamSpan, logAttempt, logProxyBody, logFinalRequest, lastError, authFailureMessage, sawRateLimit, sawTransientFailure, sawNetworkError, } = args;
|
|
2039
2106
|
recordAttemptError(account.label, account.type, 401);
|
|
2040
2107
|
let currentLastError = lastError;
|
|
2041
2108
|
let currentAuthFailureMessage = authFailureMessage;
|
|
@@ -2119,9 +2186,14 @@ async function handleAnthropicAuthRetry(args) {
|
|
|
2119
2186
|
const cooldownMs = Number.isNaN(parsedRetryAfter)
|
|
2120
2187
|
? 60_000
|
|
2121
2188
|
: Math.max(1, parsedRetryAfter) * 1000;
|
|
2122
|
-
|
|
2189
|
+
const cooldown = applyRateLimitCooldownScope({
|
|
2190
|
+
state: accountState,
|
|
2191
|
+
profile: requestProfile,
|
|
2192
|
+
retryAfterMs: cooldownMs,
|
|
2193
|
+
capMs: RATE_LIMIT_BACKOFF_CAP_MS,
|
|
2194
|
+
});
|
|
2123
2195
|
advancePrimaryIfCurrent(account.key, enabledAccounts.length, orderedAccounts[0]?.key);
|
|
2124
|
-
recordCooldown(account.label, account.type,
|
|
2196
|
+
recordCooldown(account.label, account.type, Date.now() + cooldown.backoffMs, accountState.backoffLevel);
|
|
2125
2197
|
break;
|
|
2126
2198
|
}
|
|
2127
2199
|
if (retryStatus === 401 || retryStatus === 402 || retryStatus === 403) {
|
|
@@ -2364,13 +2436,14 @@ async function handleAnthropicNonOkResponse(args) {
|
|
|
2364
2436
|
if (isTransientHttpFailure(response.status, errBody)) {
|
|
2365
2437
|
recordAttemptError(account.label, account.type, response.status);
|
|
2366
2438
|
currentSawTransientFailure = true;
|
|
2367
|
-
logger.always(`[proxy] ← ${response.status} account=${account.label} (transient
|
|
2439
|
+
logger.always(`[proxy] ← ${response.status} account=${account.label} (transient)`);
|
|
2368
2440
|
currentLastError = errBody;
|
|
2369
2441
|
logAttempt(response.status, "api_error", summarizeErrorMessage(errBody));
|
|
2370
2442
|
tracer?.setError("transient_error", summarizeErrorMessage(errBody));
|
|
2371
2443
|
tracer?.recordRetry(account.label, "transient");
|
|
2372
2444
|
return {
|
|
2373
2445
|
continueLoop: true,
|
|
2446
|
+
retrySameAccount: true,
|
|
2374
2447
|
lastError: currentLastError,
|
|
2375
2448
|
authFailureMessage: currentAuthFailureMessage,
|
|
2376
2449
|
sawTransientFailure: currentSawTransientFailure,
|
|
@@ -2692,7 +2765,7 @@ async function prepareAnthropicAccountAttempt(args) {
|
|
|
2692
2765
|
};
|
|
2693
2766
|
}
|
|
2694
2767
|
async function fetchAnthropicAccountResponse(args) {
|
|
2695
|
-
const { url, headers, finalBodyStr, account, accountState, enabledAccounts, orderedAccounts, tracer, logAttempt, currentLastError, currentSawRateLimit, currentSawNetworkError, upstreamSpan, } = args;
|
|
2768
|
+
const { url, headers, finalBodyStr, account, accountState, requestProfile, enabledAccounts, orderedAccounts, tracer, logAttempt, currentLastError, currentSawRateLimit, currentSawNetworkError, upstreamSpan, } = args;
|
|
2696
2769
|
let lastError = currentLastError;
|
|
2697
2770
|
let sawRateLimit = currentSawRateLimit;
|
|
2698
2771
|
let sawNetworkError = currentSawNetworkError;
|
|
@@ -2715,13 +2788,14 @@ async function fetchAnthropicAccountResponse(args) {
|
|
|
2715
2788
|
const errorCode = getErrorCode(fetchErr) ?? "unknown";
|
|
2716
2789
|
const errorMessage = fetchErr instanceof Error ? fetchErr.message : String(fetchErr);
|
|
2717
2790
|
lastError = errorMessage;
|
|
2718
|
-
logger.always(`[proxy] fetch error account=${account.label} code=${errorCode} (
|
|
2791
|
+
logger.always(`[proxy] fetch error account=${account.label} code=${errorCode} (retryable): ${errorMessage}`);
|
|
2719
2792
|
logAttempt(502, "network_error", errorMessage);
|
|
2720
2793
|
tracer?.setError("network_error", errorMessage);
|
|
2721
2794
|
tracer?.recordRetry(account.label, "network_error");
|
|
2722
2795
|
currentUpstreamSpan?.end();
|
|
2723
2796
|
return {
|
|
2724
2797
|
continueLoop: true,
|
|
2798
|
+
retrySameAccount: true,
|
|
2725
2799
|
lastError,
|
|
2726
2800
|
sawRateLimit,
|
|
2727
2801
|
sawNetworkError,
|
|
@@ -2744,16 +2818,17 @@ async function fetchAnthropicAccountResponse(args) {
|
|
|
2744
2818
|
}
|
|
2745
2819
|
}
|
|
2746
2820
|
}
|
|
2747
|
-
const
|
|
2748
|
-
|
|
2749
|
-
|
|
2750
|
-
|
|
2751
|
-
|
|
2821
|
+
const cooldown = applyRateLimitCooldownScope({
|
|
2822
|
+
state: accountState,
|
|
2823
|
+
profile: requestProfile,
|
|
2824
|
+
retryAfterMs: cooldownMs > 0 ? cooldownMs : undefined,
|
|
2825
|
+
capMs: RATE_LIMIT_BACKOFF_CAP_MS,
|
|
2826
|
+
});
|
|
2752
2827
|
advancePrimaryIfCurrent(account.key, enabledAccounts.length, orderedAccounts[0]?.key);
|
|
2753
2828
|
recordAttemptError(account.label, account.type, 429);
|
|
2754
|
-
recordCooldown(account.label, account.type,
|
|
2829
|
+
recordCooldown(account.label, account.type, Date.now() + cooldown.backoffMs, accountState.backoffLevel);
|
|
2755
2830
|
lastError = await response.text();
|
|
2756
|
-
logger.always(`[proxy] ← 429 account=${account.label} backoff-level=${accountState.backoffLevel} cooldown=${Math.round(backoffMs / 1000)}s`);
|
|
2831
|
+
logger.always(`[proxy] ← 429 account=${account.label} backoff-level=${accountState.backoffLevel} cooldown=${Math.round(cooldown.backoffMs / 1000)}s request-class=${cooldown.requestClassKey} model-tier=${cooldown.modelTierKey}`);
|
|
2757
2832
|
logAttempt(429, "rate_limit_error", String(lastError));
|
|
2758
2833
|
tracer?.setError("rate_limit_error", String(lastError).slice(0, 500));
|
|
2759
2834
|
tracer?.recordRetry(account.label, "rate_limit");
|
|
@@ -2777,6 +2852,8 @@ async function fetchAnthropicAccountResponse(args) {
|
|
|
2777
2852
|
}
|
|
2778
2853
|
async function handleAnthropicRoutedClaudeRequest(args) {
|
|
2779
2854
|
const { ctx, body, modelRouter, tracer, requestStartTime, accountStrategy, buildLoggedClaudeError, logProxyBody, logFinalRequest, } = args;
|
|
2855
|
+
const parsedRequest = parseClaudeRequest(body);
|
|
2856
|
+
const requestProfile = classifyClaudeProxyRequest(body.model, parsedRequest);
|
|
2780
2857
|
const loadedAccounts = await loadClaudeProxyAccounts({
|
|
2781
2858
|
ctx,
|
|
2782
2859
|
body,
|
|
@@ -2799,182 +2876,218 @@ async function handleAnthropicRoutedClaudeRequest(args) {
|
|
|
2799
2876
|
attemptNumber: 0,
|
|
2800
2877
|
};
|
|
2801
2878
|
const acctSelectionSpan = tracer?.startAccountSelection();
|
|
2802
|
-
|
|
2803
|
-
|
|
2804
|
-
if (
|
|
2805
|
-
|
|
2879
|
+
const accountPartition = partitionAccountsByCooldown(orderedAccounts, (account) => getOrCreateRuntimeState(account.key), requestProfile);
|
|
2880
|
+
for (const skippedAccount of accountPartition.skipped) {
|
|
2881
|
+
if (skippedAccount.cooldown.scope === "request_class" ||
|
|
2882
|
+
skippedAccount.cooldown.scope === "model_tier") {
|
|
2883
|
+
loopState.sawRateLimit = true;
|
|
2806
2884
|
}
|
|
2807
|
-
loopState.
|
|
2808
|
-
|
|
2809
|
-
|
|
2810
|
-
|
|
2811
|
-
|
|
2812
|
-
|
|
2813
|
-
|
|
2814
|
-
|
|
2815
|
-
|
|
2816
|
-
|
|
2817
|
-
|
|
2818
|
-
|
|
2819
|
-
|
|
2820
|
-
|
|
2821
|
-
|
|
2822
|
-
|
|
2823
|
-
|
|
2824
|
-
|
|
2825
|
-
|
|
2826
|
-
});
|
|
2827
|
-
const preparedAttempt = await prepareAnthropicAccountAttempt({
|
|
2828
|
-
account,
|
|
2829
|
-
accountState,
|
|
2830
|
-
bodyStr,
|
|
2831
|
-
clientHeaders,
|
|
2832
|
-
isClaudeClientRequest,
|
|
2833
|
-
url,
|
|
2834
|
-
tracer,
|
|
2835
|
-
attemptNumber: loopState.attemptNumber,
|
|
2836
|
-
currentLastError: loopState.lastError,
|
|
2837
|
-
currentAuthFailureMessage: loopState.authFailureMessage,
|
|
2838
|
-
logAttempt,
|
|
2839
|
-
logProxyBody,
|
|
2840
|
-
});
|
|
2841
|
-
loopState.lastError = preparedAttempt.lastError;
|
|
2842
|
-
loopState.authFailureMessage = preparedAttempt.authFailureMessage;
|
|
2843
|
-
if (preparedAttempt.continueLoop ||
|
|
2844
|
-
!preparedAttempt.headers ||
|
|
2845
|
-
!preparedAttempt.buildUpstreamBody ||
|
|
2846
|
-
!preparedAttempt.finalBodyStr ||
|
|
2847
|
-
preparedAttempt.fetchStartMs === undefined) {
|
|
2848
|
-
continue;
|
|
2849
|
-
}
|
|
2850
|
-
const fetchResult = await fetchAnthropicAccountResponse({
|
|
2851
|
-
url,
|
|
2852
|
-
headers: preparedAttempt.headers,
|
|
2853
|
-
finalBodyStr: preparedAttempt.finalBodyStr,
|
|
2854
|
-
account,
|
|
2855
|
-
accountState,
|
|
2856
|
-
enabledAccounts,
|
|
2857
|
-
orderedAccounts,
|
|
2858
|
-
tracer,
|
|
2859
|
-
logAttempt,
|
|
2860
|
-
currentLastError: loopState.lastError,
|
|
2861
|
-
currentSawRateLimit: loopState.sawRateLimit,
|
|
2862
|
-
currentSawNetworkError: loopState.sawNetworkError,
|
|
2863
|
-
upstreamSpan: preparedAttempt.upstreamSpan,
|
|
2864
|
-
});
|
|
2865
|
-
loopState.lastError = fetchResult.lastError;
|
|
2866
|
-
loopState.sawRateLimit = fetchResult.sawRateLimit;
|
|
2867
|
-
loopState.sawNetworkError = fetchResult.sawNetworkError;
|
|
2868
|
-
if (fetchResult.continueLoop || !fetchResult.response) {
|
|
2869
|
-
continue;
|
|
2870
|
-
}
|
|
2871
|
-
let upstreamSpan = fetchResult.upstreamSpan;
|
|
2872
|
-
const response = fetchResult.response;
|
|
2873
|
-
if (response.status === 401 &&
|
|
2874
|
-
account.type === "oauth" &&
|
|
2875
|
-
account.refreshToken) {
|
|
2876
|
-
const authRetryResult = await handleAnthropicAuthRetry({
|
|
2885
|
+
loopState.lastError = `Skipped account=${skippedAccount.account.label} due to ${skippedAccount.cooldown.scope} cooldown ${skippedAccount.cooldown.key}`;
|
|
2886
|
+
logger.always(`[proxy] skipping account=${skippedAccount.account.label} due to ${skippedAccount.cooldown.scope} cooldown=${skippedAccount.cooldown.key} remaining=${Math.max(1, Math.ceil((skippedAccount.cooldown.until - Date.now()) / 1000))}s`);
|
|
2887
|
+
}
|
|
2888
|
+
accountLoop: for (const account of accountPartition.eligible) {
|
|
2889
|
+
const accountState = getOrCreateRuntimeState(account.key);
|
|
2890
|
+
let transientSameAccountRetries = 0;
|
|
2891
|
+
while (true) {
|
|
2892
|
+
loopState.attemptNumber += 1;
|
|
2893
|
+
if (tracer && loopState.attemptNumber === 1 && acctSelectionSpan) {
|
|
2894
|
+
tracer.setAccountSelection({
|
|
2895
|
+
strategy: accountStrategy,
|
|
2896
|
+
accountsTotal: accounts.length,
|
|
2897
|
+
accountsHealthy: enabledAccounts.length,
|
|
2898
|
+
selectedAccount: account.label,
|
|
2899
|
+
accountType: account.type,
|
|
2900
|
+
});
|
|
2901
|
+
acctSelectionSpan.end();
|
|
2902
|
+
}
|
|
2903
|
+
const logAttempt = createAnthropicAttemptLogger({
|
|
2877
2904
|
ctx,
|
|
2878
2905
|
body,
|
|
2906
|
+
toolCount,
|
|
2907
|
+
requestStart,
|
|
2908
|
+
tracer,
|
|
2909
|
+
account,
|
|
2910
|
+
attemptNumber: loopState.attemptNumber,
|
|
2911
|
+
});
|
|
2912
|
+
const preparedAttempt = await prepareAnthropicAccountAttempt({
|
|
2879
2913
|
account,
|
|
2880
2914
|
accountState,
|
|
2915
|
+
bodyStr,
|
|
2916
|
+
clientHeaders,
|
|
2917
|
+
isClaudeClientRequest,
|
|
2918
|
+
url,
|
|
2919
|
+
tracer,
|
|
2920
|
+
attemptNumber: loopState.attemptNumber,
|
|
2921
|
+
currentLastError: loopState.lastError,
|
|
2922
|
+
currentAuthFailureMessage: loopState.authFailureMessage,
|
|
2923
|
+
logAttempt,
|
|
2924
|
+
logProxyBody,
|
|
2925
|
+
});
|
|
2926
|
+
loopState.lastError = preparedAttempt.lastError;
|
|
2927
|
+
loopState.authFailureMessage = preparedAttempt.authFailureMessage;
|
|
2928
|
+
if (preparedAttempt.continueLoop ||
|
|
2929
|
+
!preparedAttempt.headers ||
|
|
2930
|
+
!preparedAttempt.buildUpstreamBody ||
|
|
2931
|
+
!preparedAttempt.finalBodyStr ||
|
|
2932
|
+
preparedAttempt.fetchStartMs === undefined) {
|
|
2933
|
+
continue accountLoop;
|
|
2934
|
+
}
|
|
2935
|
+
const fetchResult = await fetchAnthropicAccountResponse({
|
|
2936
|
+
url,
|
|
2881
2937
|
headers: preparedAttempt.headers,
|
|
2882
|
-
|
|
2938
|
+
finalBodyStr: preparedAttempt.finalBodyStr,
|
|
2939
|
+
account,
|
|
2940
|
+
accountState,
|
|
2941
|
+
requestProfile,
|
|
2883
2942
|
enabledAccounts,
|
|
2884
2943
|
orderedAccounts,
|
|
2885
|
-
response,
|
|
2886
2944
|
tracer,
|
|
2887
|
-
requestStartTime,
|
|
2888
|
-
fetchStartMs: preparedAttempt.fetchStartMs,
|
|
2889
|
-
attemptNumber: loopState.attemptNumber,
|
|
2890
|
-
finalBodyStr: preparedAttempt.finalBodyStr,
|
|
2891
|
-
upstreamSpan,
|
|
2892
2945
|
logAttempt,
|
|
2893
|
-
|
|
2894
|
-
|
|
2895
|
-
|
|
2896
|
-
|
|
2897
|
-
sawRateLimit: loopState.sawRateLimit,
|
|
2898
|
-
sawTransientFailure: loopState.sawTransientFailure,
|
|
2899
|
-
sawNetworkError: loopState.sawNetworkError,
|
|
2946
|
+
currentLastError: loopState.lastError,
|
|
2947
|
+
currentSawRateLimit: loopState.sawRateLimit,
|
|
2948
|
+
currentSawNetworkError: loopState.sawNetworkError,
|
|
2949
|
+
upstreamSpan: preparedAttempt.upstreamSpan,
|
|
2900
2950
|
});
|
|
2901
|
-
loopState.lastError =
|
|
2902
|
-
loopState.
|
|
2903
|
-
loopState.
|
|
2904
|
-
|
|
2905
|
-
|
|
2906
|
-
|
|
2907
|
-
|
|
2908
|
-
|
|
2951
|
+
loopState.lastError = fetchResult.lastError;
|
|
2952
|
+
loopState.sawRateLimit = fetchResult.sawRateLimit;
|
|
2953
|
+
loopState.sawNetworkError = fetchResult.sawNetworkError;
|
|
2954
|
+
if (fetchResult.continueLoop || !fetchResult.response) {
|
|
2955
|
+
if (fetchResult.retrySameAccount &&
|
|
2956
|
+
transientSameAccountRetries < MAX_TRANSIENT_SAME_ACCOUNT_RETRIES) {
|
|
2957
|
+
transientSameAccountRetries += 1;
|
|
2958
|
+
const delayMs = getTransientSameAccountRetryDelayMs(transientSameAccountRetries);
|
|
2959
|
+
logger.always(`[proxy] retrying same account=${account.label} after transient network error (${transientSameAccountRetries}/${MAX_TRANSIENT_SAME_ACCOUNT_RETRIES}) in ${delayMs}ms`);
|
|
2960
|
+
await sleep(delayMs);
|
|
2961
|
+
continue;
|
|
2962
|
+
}
|
|
2963
|
+
if (fetchResult.retrySameAccount) {
|
|
2964
|
+
logger.always(`[proxy] exhausted transient same-account retries for account=${account.label}; rotating`);
|
|
2965
|
+
}
|
|
2966
|
+
continue accountLoop;
|
|
2909
2967
|
}
|
|
2910
|
-
|
|
2911
|
-
|
|
2968
|
+
let upstreamSpan = fetchResult.upstreamSpan;
|
|
2969
|
+
const response = fetchResult.response;
|
|
2970
|
+
if (response.status === 401 &&
|
|
2971
|
+
account.type === "oauth" &&
|
|
2972
|
+
account.refreshToken) {
|
|
2973
|
+
const authRetryResult = await handleAnthropicAuthRetry({
|
|
2974
|
+
ctx,
|
|
2975
|
+
body,
|
|
2976
|
+
account,
|
|
2977
|
+
accountState,
|
|
2978
|
+
requestProfile,
|
|
2979
|
+
headers: preparedAttempt.headers,
|
|
2980
|
+
buildUpstreamBody: preparedAttempt.buildUpstreamBody,
|
|
2981
|
+
enabledAccounts,
|
|
2982
|
+
orderedAccounts,
|
|
2983
|
+
response,
|
|
2984
|
+
tracer,
|
|
2985
|
+
requestStartTime,
|
|
2986
|
+
fetchStartMs: preparedAttempt.fetchStartMs,
|
|
2987
|
+
attemptNumber: loopState.attemptNumber,
|
|
2988
|
+
finalBodyStr: preparedAttempt.finalBodyStr,
|
|
2989
|
+
upstreamSpan,
|
|
2990
|
+
logAttempt,
|
|
2991
|
+
logProxyBody,
|
|
2992
|
+
logFinalRequest,
|
|
2993
|
+
lastError: loopState.lastError,
|
|
2994
|
+
authFailureMessage: loopState.authFailureMessage,
|
|
2995
|
+
sawRateLimit: loopState.sawRateLimit,
|
|
2996
|
+
sawTransientFailure: loopState.sawTransientFailure,
|
|
2997
|
+
sawNetworkError: loopState.sawNetworkError,
|
|
2998
|
+
});
|
|
2999
|
+
loopState.lastError = authRetryResult.lastError;
|
|
3000
|
+
loopState.authFailureMessage = authRetryResult.authFailureMessage;
|
|
3001
|
+
loopState.sawRateLimit = authRetryResult.sawRateLimit;
|
|
3002
|
+
loopState.sawTransientFailure = authRetryResult.sawTransientFailure;
|
|
3003
|
+
loopState.sawNetworkError = authRetryResult.sawNetworkError;
|
|
3004
|
+
upstreamSpan = authRetryResult.upstreamSpan;
|
|
3005
|
+
if (authRetryResult.response !== undefined) {
|
|
3006
|
+
return authRetryResult.response;
|
|
3007
|
+
}
|
|
3008
|
+
if (authRetryResult.continueLoop) {
|
|
3009
|
+
continue accountLoop;
|
|
3010
|
+
}
|
|
2912
3011
|
}
|
|
2913
|
-
|
|
2914
|
-
|
|
2915
|
-
|
|
2916
|
-
|
|
3012
|
+
if (!response.ok) {
|
|
3013
|
+
const nonOkResult = await handleAnthropicNonOkResponse({
|
|
3014
|
+
response,
|
|
3015
|
+
account,
|
|
3016
|
+
accountState,
|
|
3017
|
+
tracer,
|
|
3018
|
+
requestStartTime,
|
|
3019
|
+
fetchStartMs: preparedAttempt.fetchStartMs,
|
|
3020
|
+
attemptNumber: loopState.attemptNumber,
|
|
3021
|
+
logAttempt,
|
|
3022
|
+
logProxyBody,
|
|
3023
|
+
logFinalRequest,
|
|
3024
|
+
lastError: loopState.lastError,
|
|
3025
|
+
authFailureMessage: loopState.authFailureMessage,
|
|
3026
|
+
sawTransientFailure: loopState.sawTransientFailure,
|
|
3027
|
+
invalidRequestFailure: loopState.invalidRequestFailure,
|
|
3028
|
+
maxConsecutiveRefreshFailures: MAX_CONSECUTIVE_REFRESH_FAILURES,
|
|
3029
|
+
});
|
|
3030
|
+
loopState.lastError = nonOkResult.lastError;
|
|
3031
|
+
loopState.authFailureMessage = nonOkResult.authFailureMessage;
|
|
3032
|
+
loopState.sawTransientFailure = nonOkResult.sawTransientFailure;
|
|
3033
|
+
loopState.invalidRequestFailure = nonOkResult.invalidRequestFailure;
|
|
3034
|
+
if (nonOkResult.response !== undefined) {
|
|
3035
|
+
return nonOkResult.response;
|
|
3036
|
+
}
|
|
3037
|
+
if (nonOkResult.continueLoop) {
|
|
3038
|
+
if (nonOkResult.retrySameAccount &&
|
|
3039
|
+
transientSameAccountRetries < MAX_TRANSIENT_SAME_ACCOUNT_RETRIES) {
|
|
3040
|
+
transientSameAccountRetries += 1;
|
|
3041
|
+
const delayMs = getTransientSameAccountRetryDelayMs(transientSameAccountRetries);
|
|
3042
|
+
logger.always(`[proxy] retrying same account=${account.label} after transient upstream ${response.status} (${transientSameAccountRetries}/${MAX_TRANSIENT_SAME_ACCOUNT_RETRIES}) in ${delayMs}ms`);
|
|
3043
|
+
await sleep(delayMs);
|
|
3044
|
+
continue;
|
|
3045
|
+
}
|
|
3046
|
+
if (nonOkResult.retrySameAccount) {
|
|
3047
|
+
logger.always(`[proxy] exhausted transient same-account retries for account=${account.label}; rotating`);
|
|
3048
|
+
}
|
|
3049
|
+
continue accountLoop;
|
|
3050
|
+
}
|
|
3051
|
+
break accountLoop;
|
|
3052
|
+
}
|
|
3053
|
+
const successResult = await handleAnthropicSuccessfulResponse({
|
|
3054
|
+
ctx,
|
|
3055
|
+
body,
|
|
2917
3056
|
account,
|
|
2918
3057
|
accountState,
|
|
3058
|
+
requestProfile,
|
|
3059
|
+
response,
|
|
2919
3060
|
tracer,
|
|
2920
3061
|
requestStartTime,
|
|
2921
3062
|
fetchStartMs: preparedAttempt.fetchStartMs,
|
|
2922
3063
|
attemptNumber: loopState.attemptNumber,
|
|
2923
|
-
|
|
3064
|
+
finalBodyStr: preparedAttempt.finalBodyStr,
|
|
3065
|
+
upstreamSpan,
|
|
2924
3066
|
logProxyBody,
|
|
2925
3067
|
logFinalRequest,
|
|
2926
|
-
lastError: loopState.lastError,
|
|
2927
|
-
authFailureMessage: loopState.authFailureMessage,
|
|
2928
|
-
sawTransientFailure: loopState.sawTransientFailure,
|
|
2929
|
-
invalidRequestFailure: loopState.invalidRequestFailure,
|
|
2930
|
-
maxConsecutiveRefreshFailures: MAX_CONSECUTIVE_REFRESH_FAILURES,
|
|
2931
3068
|
});
|
|
2932
|
-
|
|
2933
|
-
|
|
2934
|
-
loopState.sawTransientFailure = nonOkResult.sawTransientFailure;
|
|
2935
|
-
loopState.invalidRequestFailure = nonOkResult.invalidRequestFailure;
|
|
2936
|
-
if (nonOkResult.response !== undefined) {
|
|
2937
|
-
return nonOkResult.response;
|
|
2938
|
-
}
|
|
2939
|
-
if (nonOkResult.continueLoop) {
|
|
2940
|
-
continue;
|
|
3069
|
+
if ("retryNextAccount" in successResult) {
|
|
3070
|
+
continue accountLoop;
|
|
2941
3071
|
}
|
|
2942
|
-
|
|
2943
|
-
}
|
|
2944
|
-
const successResult = await handleAnthropicSuccessfulResponse({
|
|
2945
|
-
ctx,
|
|
2946
|
-
body,
|
|
2947
|
-
account,
|
|
2948
|
-
accountState,
|
|
2949
|
-
response,
|
|
2950
|
-
tracer,
|
|
2951
|
-
requestStartTime,
|
|
2952
|
-
fetchStartMs: preparedAttempt.fetchStartMs,
|
|
2953
|
-
attemptNumber: loopState.attemptNumber,
|
|
2954
|
-
finalBodyStr: preparedAttempt.finalBodyStr,
|
|
2955
|
-
upstreamSpan,
|
|
2956
|
-
logProxyBody,
|
|
2957
|
-
logFinalRequest,
|
|
2958
|
-
});
|
|
2959
|
-
if ("retryNextAccount" in successResult) {
|
|
2960
|
-
continue;
|
|
3072
|
+
return successResult.response;
|
|
2961
3073
|
}
|
|
2962
|
-
return successResult.response;
|
|
2963
3074
|
}
|
|
2964
3075
|
if (loopState.attemptNumber === 0) {
|
|
2965
3076
|
acctSelectionSpan?.end();
|
|
2966
3077
|
}
|
|
2967
|
-
const
|
|
3078
|
+
const configuredFallbackResult = await tryConfiguredClaudeFallbackChain({
|
|
2968
3079
|
ctx,
|
|
2969
3080
|
body,
|
|
3081
|
+
parsedFallbackRequest: parsedRequest,
|
|
3082
|
+
requestProfile,
|
|
2970
3083
|
modelRouter,
|
|
2971
3084
|
tracer,
|
|
2972
3085
|
requestStartTime,
|
|
2973
3086
|
logProxyBody,
|
|
2974
3087
|
logFinalRequest,
|
|
2975
3088
|
});
|
|
2976
|
-
if (
|
|
2977
|
-
return
|
|
3089
|
+
if (configuredFallbackResult.response) {
|
|
3090
|
+
return configuredFallbackResult.response;
|
|
2978
3091
|
}
|
|
2979
3092
|
const configuredChain = modelRouter?.getFallbackChain() ?? [];
|
|
2980
3093
|
if (configuredChain.length === 0 && !loopState.sawRateLimit) {
|
|
@@ -3000,6 +3113,8 @@ async function handleAnthropicRoutedClaudeRequest(args) {
|
|
|
3000
3113
|
sawRateLimit: loopState.sawRateLimit,
|
|
3001
3114
|
lastError: loopState.lastError,
|
|
3002
3115
|
orderedAccounts,
|
|
3116
|
+
requestProfile,
|
|
3117
|
+
fallbackPolicyReason: configuredFallbackResult.fallbackPolicyReason,
|
|
3003
3118
|
buildLoggedClaudeError,
|
|
3004
3119
|
logProxyBody,
|
|
3005
3120
|
logFinalRequest,
|
|
@@ -3220,6 +3335,8 @@ function getOrCreateRuntimeState(accountKey) {
|
|
|
3220
3335
|
backoffLevel: 0,
|
|
3221
3336
|
consecutiveRefreshFailures: 0,
|
|
3222
3337
|
permanentlyDisabled: false,
|
|
3338
|
+
requestClassCooldowns: {},
|
|
3339
|
+
modelTierCooldowns: {},
|
|
3223
3340
|
};
|
|
3224
3341
|
accountRuntimeState.set(accountKey, initial);
|
|
3225
3342
|
return initial;
|
|
@@ -3249,6 +3366,10 @@ function summarizeErrorMessage(message, maxLength = 180) {
|
|
|
3249
3366
|
}
|
|
3250
3367
|
return `${compact.slice(0, maxLength)}...`;
|
|
3251
3368
|
}
|
|
3369
|
+
export function getTransientSameAccountRetryDelayMs(retryNumber) {
|
|
3370
|
+
const index = Math.min(Math.max(retryNumber - 1, 0), TRANSIENT_SAME_ACCOUNT_RETRY_DELAYS_MS.length - 1);
|
|
3371
|
+
return TRANSIENT_SAME_ACCOUNT_RETRY_DELAYS_MS[index] ?? 0;
|
|
3372
|
+
}
|
|
3252
3373
|
async function sleep(ms) {
|
|
3253
3374
|
await new Promise((resolve) => setTimeout(resolve, ms));
|
|
3254
3375
|
}
|
|
@@ -3409,34 +3530,6 @@ export function buildProxyFallbackOptions(parsed, overrides = {}) {
|
|
|
3409
3530
|
maxSteps: 1,
|
|
3410
3531
|
};
|
|
3411
3532
|
}
|
|
3412
|
-
export function buildProxyTranslationAttempts(primary, modelRouter, parsed) {
|
|
3413
|
-
const attempts = [
|
|
3414
|
-
{
|
|
3415
|
-
provider: primary.provider,
|
|
3416
|
-
model: primary.model,
|
|
3417
|
-
label: `${primary.provider}/${primary.model ?? "unknown"}`,
|
|
3418
|
-
},
|
|
3419
|
-
];
|
|
3420
|
-
const chain = modelRouter?.getFallbackChain() ?? [];
|
|
3421
|
-
for (const fallback of chain) {
|
|
3422
|
-
if (fallback.provider === primary.provider &&
|
|
3423
|
-
fallback.model === primary.model) {
|
|
3424
|
-
continue;
|
|
3425
|
-
}
|
|
3426
|
-
if (shouldSkipTranslationTarget(fallback.provider, fallback.model, parsed)) {
|
|
3427
|
-
continue;
|
|
3428
|
-
}
|
|
3429
|
-
attempts.push({
|
|
3430
|
-
provider: fallback.provider,
|
|
3431
|
-
model: fallback.model,
|
|
3432
|
-
label: `${fallback.provider}/${fallback.model}`,
|
|
3433
|
-
});
|
|
3434
|
-
}
|
|
3435
|
-
if (chain.length === 0) {
|
|
3436
|
-
attempts.push({ label: "auto-provider" });
|
|
3437
|
-
}
|
|
3438
|
-
return attempts;
|
|
3439
|
-
}
|
|
3440
3533
|
function hasTranslatedOutput(collectedText, toolCalls) {
|
|
3441
3534
|
return collectedText.trim().length > 0 || (toolCalls?.length ?? 0) > 0;
|
|
3442
3535
|
}
|
|
@@ -3450,14 +3543,6 @@ function shouldOmitImagesForTarget(provider, model) {
|
|
|
3450
3543
|
function shouldOmitThinkingConfigForTarget(provider, model) {
|
|
3451
3544
|
return provider === "vertex" && model === "gemini-2.5-flash";
|
|
3452
3545
|
}
|
|
3453
|
-
function shouldSkipTranslationTarget(provider, model, parsed) {
|
|
3454
|
-
if (provider === "ollama" &&
|
|
3455
|
-
model === "qwen2.5:0.5b" &&
|
|
3456
|
-
(parsed?.images.length ?? 0) > 0) {
|
|
3457
|
-
return true;
|
|
3458
|
-
}
|
|
3459
|
-
return false;
|
|
3460
|
-
}
|
|
3461
3546
|
function extractToolArgs(toolCall) {
|
|
3462
3547
|
return (toolCall.args ??
|
|
3463
3548
|
toolCall.parameters ??
|