@juspay/neurolink 9.54.1 → 9.54.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +6 -0
- package/dist/browser/neurolink.min.js +296 -296
- package/dist/lib/providers/googleVertex.d.ts +14 -0
- package/dist/lib/providers/googleVertex.js +50 -12
- package/dist/lib/proxy/routingPolicy.d.ts +27 -17
- package/dist/lib/proxy/routingPolicy.js +53 -209
- package/dist/lib/server/routes/claudeProxyRoutes.js +35 -73
- package/dist/lib/types/proxyTypes.d.ts +9 -50
- package/dist/lib/utils/messageBuilder.js +39 -6
- package/dist/providers/googleVertex.d.ts +14 -0
- package/dist/providers/googleVertex.js +50 -12
- package/dist/proxy/routingPolicy.d.ts +27 -17
- package/dist/proxy/routingPolicy.js +53 -209
- package/dist/server/routes/claudeProxyRoutes.js +35 -73
- package/dist/types/proxyTypes.d.ts +9 -50
- package/dist/utils/messageBuilder.js +39 -6
- package/package.json +1 -1
|
@@ -20,7 +20,7 @@ import { createRawStreamCapture } from "../../proxy/rawStreamCapture.js";
|
|
|
20
20
|
import { logBodyCapture, logRequest, logRequestAttempt, logStreamError, } from "../../proxy/requestLogger.js";
|
|
21
21
|
import { createSSEInterceptor } from "../../proxy/sseInterceptor.js";
|
|
22
22
|
import { needsRefresh, persistTokens, refreshToken, } from "../../proxy/tokenRefresh.js";
|
|
23
|
-
import {
|
|
23
|
+
import { applyRateLimitCooldown, buildProxyTranslationPlan, clearAccountCooldown, getAccountCooldownUntil, partitionAccountsByCooldown, } from "../../proxy/routingPolicy.js";
|
|
24
24
|
import { writeJsonSnapshotAtomically } from "../../proxy/snapshotPersistence.js";
|
|
25
25
|
import { recordAttempt, recordAttemptError, recordCooldown, recordFinalError, recordFinalSuccess, } from "../../proxy/usageStats.js";
|
|
26
26
|
import { logger } from "../../utils/logger.js";
|
|
@@ -440,9 +440,7 @@ function logProxyRoutingPlan(logProxyBody, stage, plan) {
|
|
|
440
440
|
contentType: "application/json",
|
|
441
441
|
body: {
|
|
442
442
|
stage,
|
|
443
|
-
requestProfile: plan.profile,
|
|
444
443
|
attempts: plan.attempts,
|
|
445
|
-
skipped: plan.skipped,
|
|
446
444
|
},
|
|
447
445
|
});
|
|
448
446
|
}
|
|
@@ -1347,30 +1345,21 @@ async function executeClaudeFallbackTranslation(args) {
|
|
|
1347
1345
|
return clientResponse;
|
|
1348
1346
|
}
|
|
1349
1347
|
async function tryConfiguredClaudeFallbackChain(args) {
|
|
1350
|
-
const { ctx, body, parsedFallbackRequest,
|
|
1348
|
+
const { ctx, body, parsedFallbackRequest, modelRouter, tracer, requestStartTime, logProxyBody, logFinalRequest, } = args;
|
|
1351
1349
|
const chain = modelRouter?.getFallbackChain() ?? [];
|
|
1352
1350
|
const fallbackPlan = buildProxyTranslationPlan({ provider: "anthropic", model: body.model }, chain, body.model, parsedFallbackRequest);
|
|
1353
|
-
const fallbackPolicyReason = summarizeSkippedFallbacks(fallbackPlan);
|
|
1354
1351
|
logProxyBody({
|
|
1355
1352
|
phase: "routing_decision",
|
|
1356
1353
|
contentType: "application/json",
|
|
1357
1354
|
body: {
|
|
1358
1355
|
stage: "anthropic_fallback",
|
|
1359
|
-
requestProfile,
|
|
1360
1356
|
attempts: fallbackPlan.attempts.slice(1),
|
|
1361
|
-
skipped: fallbackPlan.skipped,
|
|
1362
1357
|
},
|
|
1363
1358
|
});
|
|
1364
|
-
for (const skipped of fallbackPlan.skipped) {
|
|
1365
|
-
const label = skipped.provider
|
|
1366
|
-
? `${skipped.provider}/${skipped.model ?? "unknown"}`
|
|
1367
|
-
: "auto-provider";
|
|
1368
|
-
logger.always(`[proxy] skipping fallback ${label}: ${skipped.reason}`);
|
|
1369
|
-
}
|
|
1370
1359
|
tracer?.setFallbackInfo({
|
|
1371
1360
|
triggered: true,
|
|
1372
1361
|
attemptCount: fallbackPlan.attempts.slice(1).length,
|
|
1373
|
-
reason:
|
|
1362
|
+
reason: "all_anthropic_accounts_exhausted",
|
|
1374
1363
|
});
|
|
1375
1364
|
for (const fallback of fallbackPlan.attempts.slice(1)) {
|
|
1376
1365
|
if (!fallback.provider || !fallback.model) {
|
|
@@ -1410,10 +1399,7 @@ async function tryConfiguredClaudeFallbackChain(args) {
|
|
|
1410
1399
|
attemptCount: fallbackPlan.attempts.slice(1).length,
|
|
1411
1400
|
reason: "fallback_success",
|
|
1412
1401
|
});
|
|
1413
|
-
return {
|
|
1414
|
-
response,
|
|
1415
|
-
fallbackPolicyReason,
|
|
1416
|
-
};
|
|
1402
|
+
return { response };
|
|
1417
1403
|
}
|
|
1418
1404
|
catch (fallbackErr) {
|
|
1419
1405
|
const errMsg = fallbackErr instanceof Error
|
|
@@ -1450,10 +1436,7 @@ async function tryConfiguredClaudeFallbackChain(args) {
|
|
|
1450
1436
|
});
|
|
1451
1437
|
}
|
|
1452
1438
|
}
|
|
1453
|
-
return {
|
|
1454
|
-
response: null,
|
|
1455
|
-
fallbackPolicyReason,
|
|
1456
|
-
};
|
|
1439
|
+
return { response: null };
|
|
1457
1440
|
}
|
|
1458
1441
|
async function tryAutoClaudeFallback(args) {
|
|
1459
1442
|
const { ctx, body, tracer, requestStartTime, logProxyBody, logFinalRequest } = args;
|
|
@@ -1484,7 +1467,7 @@ async function tryAutoClaudeFallback(args) {
|
|
|
1484
1467
|
}
|
|
1485
1468
|
}
|
|
1486
1469
|
function buildClaudeAnthropicFailureResponse(args) {
|
|
1487
|
-
const { tracer, requestStartTime, authFailureMessage, invalidRequestFailure, sawNetworkError, sawTransientFailure, sawRateLimit, lastError, orderedAccounts,
|
|
1470
|
+
const { tracer, requestStartTime, authFailureMessage, invalidRequestFailure, sawNetworkError, sawTransientFailure, sawRateLimit, lastError, orderedAccounts, buildLoggedClaudeError, logProxyBody, logFinalRequest, } = args;
|
|
1488
1471
|
if (authFailureMessage && !sawRateLimit) {
|
|
1489
1472
|
tracer?.setError("authentication_error", authFailureMessage);
|
|
1490
1473
|
tracer?.end(401, Date.now() - requestStartTime);
|
|
@@ -1531,21 +1514,21 @@ function buildClaudeAnthropicFailureResponse(args) {
|
|
|
1531
1514
|
return buildLoggedClaudeError(502, msg);
|
|
1532
1515
|
}
|
|
1533
1516
|
const earliestRecovery = orderedAccounts.reduce((min, account) => {
|
|
1534
|
-
const
|
|
1535
|
-
return
|
|
1517
|
+
const until = getAccountCooldownUntil(getOrCreateRuntimeState(account.key));
|
|
1518
|
+
return until !== null ? Math.min(min, until) : min;
|
|
1536
1519
|
}, Infinity);
|
|
1520
|
+
// If no active cooldown remains (expired while retries ran), use 1s
|
|
1521
|
+
// instead of fabricating a long retry-after.
|
|
1537
1522
|
const retryAfterSec = Number.isFinite(earliestRecovery)
|
|
1538
1523
|
? Math.max(1, Math.ceil((earliestRecovery - Date.now()) / 1000))
|
|
1539
|
-
:
|
|
1540
|
-
const
|
|
1541
|
-
|
|
1542
|
-
|
|
1543
|
-
|
|
1544
|
-
const errorBody = buildClaudeError(429, `All accounts rate-limited. Earliest recovery in ${retryAfterSec}s.${contractMessage}`, "overloaded_error");
|
|
1545
|
-
tracer?.setError("rate_limit_error", `All accounts rate-limited. Retry in ${retryAfterSec}s.${contractMessage}`);
|
|
1524
|
+
: 1;
|
|
1525
|
+
const errorMessage = `All accounts rate-limited. Earliest recovery in ${retryAfterSec}s.`;
|
|
1526
|
+
logger.always(`[proxy] all accounts rate-limited, retry in ${retryAfterSec}s`);
|
|
1527
|
+
const errorBody = buildClaudeError(429, errorMessage, "overloaded_error");
|
|
1528
|
+
tracer?.setError("rate_limit_error", errorMessage);
|
|
1546
1529
|
tracer?.end(429, Date.now() - requestStartTime);
|
|
1547
1530
|
recordFinalError(429);
|
|
1548
|
-
logFinalRequest(429, "", "final", "rate_limit_error",
|
|
1531
|
+
logFinalRequest(429, "", "final", "rate_limit_error", errorMessage);
|
|
1549
1532
|
const errorBodyText = JSON.stringify(errorBody);
|
|
1550
1533
|
logProxyBody({
|
|
1551
1534
|
phase: "client_response",
|
|
@@ -1568,22 +1551,9 @@ function buildClaudeAnthropicFailureResponse(args) {
|
|
|
1568
1551
|
});
|
|
1569
1552
|
}
|
|
1570
1553
|
async function handleAnthropicSuccessfulResponse(args) {
|
|
1571
|
-
const { ctx, body, account, accountState,
|
|
1572
|
-
accountState
|
|
1573
|
-
accountState.coolingUntil = undefined;
|
|
1554
|
+
const { ctx, body, account, accountState, response, tracer, requestStartTime, fetchStartMs, attemptNumber, finalBodyStr, upstreamSpan, logProxyBody, logFinalRequest, } = args;
|
|
1555
|
+
clearAccountCooldown(accountState);
|
|
1574
1556
|
accountState.consecutiveRefreshFailures = 0;
|
|
1575
|
-
if (accountState.requestClassCooldowns) {
|
|
1576
|
-
delete accountState.requestClassCooldowns[`${requestProfile.primaryClass}:${requestProfile.requestedModel.toLowerCase()}`];
|
|
1577
|
-
}
|
|
1578
|
-
if (accountState.modelTierCooldowns) {
|
|
1579
|
-
delete accountState.modelTierCooldowns[requestProfile.modelTier];
|
|
1580
|
-
}
|
|
1581
|
-
if (accountState.requestClassBackoffLevels) {
|
|
1582
|
-
delete accountState.requestClassBackoffLevels[`${requestProfile.primaryClass}:${requestProfile.requestedModel.toLowerCase()}`];
|
|
1583
|
-
}
|
|
1584
|
-
if (accountState.modelTierBackoffLevels) {
|
|
1585
|
-
delete accountState.modelTierBackoffLevels[requestProfile.modelTier];
|
|
1586
|
-
}
|
|
1587
1557
|
logger.always(`[proxy] ← ${response.status} account=${account.label}`);
|
|
1588
1558
|
const quota = parseQuotaHeaders(response.headers);
|
|
1589
1559
|
if (quota) {
|
|
@@ -2173,7 +2143,7 @@ async function handleAnthropicSuccessfulRetryResponse(args) {
|
|
|
2173
2143
|
return retryJson;
|
|
2174
2144
|
}
|
|
2175
2145
|
async function handleAnthropicAuthRetry(args) {
|
|
2176
|
-
const { ctx, body, account, accountState,
|
|
2146
|
+
const { ctx, body, account, accountState, headers, buildUpstreamBody, enabledAccounts, orderedAccounts, response: _response, tracer, requestStartTime, fetchStartMs, attemptNumber, finalBodyStr, upstreamSpan, logAttempt, logProxyBody, logFinalRequest, lastError, authFailureMessage, sawRateLimit, sawTransientFailure, sawNetworkError, } = args;
|
|
2177
2147
|
recordAttemptError(account.label, account.type, 401);
|
|
2178
2148
|
let currentLastError = lastError;
|
|
2179
2149
|
let currentAuthFailureMessage = authFailureMessage;
|
|
@@ -2257,9 +2227,8 @@ async function handleAnthropicAuthRetry(args) {
|
|
|
2257
2227
|
const cooldownMs = Number.isNaN(parsedRetryAfter)
|
|
2258
2228
|
? 60_000
|
|
2259
2229
|
: Math.max(1, parsedRetryAfter) * 1000;
|
|
2260
|
-
const cooldown =
|
|
2230
|
+
const cooldown = applyRateLimitCooldown({
|
|
2261
2231
|
state: accountState,
|
|
2262
|
-
profile: requestProfile,
|
|
2263
2232
|
retryAfterMs: cooldownMs,
|
|
2264
2233
|
capMs: RATE_LIMIT_BACKOFF_CAP_MS,
|
|
2265
2234
|
});
|
|
@@ -2836,7 +2805,7 @@ async function prepareAnthropicAccountAttempt(args) {
|
|
|
2836
2805
|
};
|
|
2837
2806
|
}
|
|
2838
2807
|
async function fetchAnthropicAccountResponse(args) {
|
|
2839
|
-
const { url, headers, finalBodyStr, account, accountState,
|
|
2808
|
+
const { url, headers, finalBodyStr, account, accountState, enabledAccounts, orderedAccounts, tracer, logAttempt, currentLastError, currentSawRateLimit, currentSawNetworkError, upstreamSpan, } = args;
|
|
2840
2809
|
let lastError = currentLastError;
|
|
2841
2810
|
let sawRateLimit = currentSawRateLimit;
|
|
2842
2811
|
let sawNetworkError = currentSawNetworkError;
|
|
@@ -2889,9 +2858,8 @@ async function fetchAnthropicAccountResponse(args) {
|
|
|
2889
2858
|
}
|
|
2890
2859
|
}
|
|
2891
2860
|
}
|
|
2892
|
-
const cooldown =
|
|
2861
|
+
const cooldown = applyRateLimitCooldown({
|
|
2893
2862
|
state: accountState,
|
|
2894
|
-
profile: requestProfile,
|
|
2895
2863
|
retryAfterMs: cooldownMs > 0 ? cooldownMs : undefined,
|
|
2896
2864
|
capMs: RATE_LIMIT_BACKOFF_CAP_MS,
|
|
2897
2865
|
});
|
|
@@ -2899,7 +2867,7 @@ async function fetchAnthropicAccountResponse(args) {
|
|
|
2899
2867
|
recordAttemptError(account.label, account.type, 429);
|
|
2900
2868
|
recordCooldown(account.label, account.type, Date.now() + cooldown.backoffMs, accountState.backoffLevel);
|
|
2901
2869
|
lastError = await response.text();
|
|
2902
|
-
logger.always(`[proxy] ← 429 account=${account.label} backoff-level=${accountState.backoffLevel} cooldown=${Math.round(cooldown.backoffMs / 1000)}s
|
|
2870
|
+
logger.always(`[proxy] ← 429 account=${account.label} backoff-level=${accountState.backoffLevel} cooldown=${Math.round(cooldown.backoffMs / 1000)}s`);
|
|
2903
2871
|
logAttempt(429, "rate_limit_error", String(lastError));
|
|
2904
2872
|
tracer?.setError("rate_limit_error", String(lastError).slice(0, 500));
|
|
2905
2873
|
tracer?.recordRetry(account.label, "rate_limit");
|
|
@@ -2924,7 +2892,6 @@ async function fetchAnthropicAccountResponse(args) {
|
|
|
2924
2892
|
async function handleAnthropicRoutedClaudeRequest(args) {
|
|
2925
2893
|
const { ctx, body, modelRouter, tracer, requestStartTime, accountStrategy, buildLoggedClaudeError, logProxyBody, logFinalRequest, } = args;
|
|
2926
2894
|
const parsedRequest = parseClaudeRequest(body);
|
|
2927
|
-
const requestProfile = classifyClaudeProxyRequest(body.model, parsedRequest);
|
|
2928
2895
|
const loadedAccounts = await loadClaudeProxyAccounts({
|
|
2929
2896
|
ctx,
|
|
2930
2897
|
body,
|
|
@@ -2947,14 +2914,16 @@ async function handleAnthropicRoutedClaudeRequest(args) {
|
|
|
2947
2914
|
attemptNumber: 0,
|
|
2948
2915
|
};
|
|
2949
2916
|
const acctSelectionSpan = tracer?.startAccountSelection();
|
|
2950
|
-
const accountPartition = partitionAccountsByCooldown(orderedAccounts, (account) => getOrCreateRuntimeState(account.key)
|
|
2917
|
+
const accountPartition = partitionAccountsByCooldown(orderedAccounts, (account) => getOrCreateRuntimeState(account.key));
|
|
2951
2918
|
for (const skippedAccount of accountPartition.skipped) {
|
|
2952
|
-
|
|
2953
|
-
|
|
2954
|
-
|
|
2955
|
-
|
|
2956
|
-
|
|
2957
|
-
|
|
2919
|
+
logger.always(`[proxy] skipping account=${skippedAccount.account.label} cooldown remaining=${Math.max(1, Math.ceil((skippedAccount.cooldown.until - Date.now()) / 1000))}s`);
|
|
2920
|
+
}
|
|
2921
|
+
// Only flag rate-limit when ALL accounts are cooling — if some are eligible,
|
|
2922
|
+
// let the actual attempt results determine sawRateLimit via real 429 responses.
|
|
2923
|
+
if (accountPartition.skipped.length > 0 &&
|
|
2924
|
+
accountPartition.eligible.length === 0) {
|
|
2925
|
+
loopState.sawRateLimit = true;
|
|
2926
|
+
loopState.lastError = `All ${accountPartition.skipped.length} accounts are cooling down`;
|
|
2958
2927
|
}
|
|
2959
2928
|
accountLoop: for (const account of accountPartition.eligible) {
|
|
2960
2929
|
const accountState = getOrCreateRuntimeState(account.key);
|
|
@@ -3009,7 +2978,6 @@ async function handleAnthropicRoutedClaudeRequest(args) {
|
|
|
3009
2978
|
finalBodyStr: preparedAttempt.finalBodyStr,
|
|
3010
2979
|
account,
|
|
3011
2980
|
accountState,
|
|
3012
|
-
requestProfile,
|
|
3013
2981
|
enabledAccounts,
|
|
3014
2982
|
orderedAccounts,
|
|
3015
2983
|
tracer,
|
|
@@ -3046,7 +3014,6 @@ async function handleAnthropicRoutedClaudeRequest(args) {
|
|
|
3046
3014
|
body,
|
|
3047
3015
|
account,
|
|
3048
3016
|
accountState,
|
|
3049
|
-
requestProfile,
|
|
3050
3017
|
headers: preparedAttempt.headers,
|
|
3051
3018
|
buildUpstreamBody: preparedAttempt.buildUpstreamBody,
|
|
3052
3019
|
enabledAccounts,
|
|
@@ -3126,7 +3093,6 @@ async function handleAnthropicRoutedClaudeRequest(args) {
|
|
|
3126
3093
|
body,
|
|
3127
3094
|
account,
|
|
3128
3095
|
accountState,
|
|
3129
|
-
requestProfile,
|
|
3130
3096
|
response,
|
|
3131
3097
|
tracer,
|
|
3132
3098
|
requestStartTime,
|
|
@@ -3150,7 +3116,6 @@ async function handleAnthropicRoutedClaudeRequest(args) {
|
|
|
3150
3116
|
ctx,
|
|
3151
3117
|
body,
|
|
3152
3118
|
parsedFallbackRequest: parsedRequest,
|
|
3153
|
-
requestProfile,
|
|
3154
3119
|
modelRouter,
|
|
3155
3120
|
tracer,
|
|
3156
3121
|
requestStartTime,
|
|
@@ -3160,8 +3125,9 @@ async function handleAnthropicRoutedClaudeRequest(args) {
|
|
|
3160
3125
|
if (configuredFallbackResult.response) {
|
|
3161
3126
|
return configuredFallbackResult.response;
|
|
3162
3127
|
}
|
|
3163
|
-
|
|
3164
|
-
|
|
3128
|
+
// Try auto-provider fallback when the configured chain didn't produce a
|
|
3129
|
+
// response (either no chain configured, or all entries failed/deduped).
|
|
3130
|
+
if (!loopState.sawRateLimit) {
|
|
3165
3131
|
const autoFallbackResponse = await tryAutoClaudeFallback({
|
|
3166
3132
|
ctx,
|
|
3167
3133
|
body,
|
|
@@ -3184,8 +3150,6 @@ async function handleAnthropicRoutedClaudeRequest(args) {
|
|
|
3184
3150
|
sawRateLimit: loopState.sawRateLimit,
|
|
3185
3151
|
lastError: loopState.lastError,
|
|
3186
3152
|
orderedAccounts,
|
|
3187
|
-
requestProfile,
|
|
3188
|
-
fallbackPolicyReason: configuredFallbackResult.fallbackPolicyReason,
|
|
3189
3153
|
buildLoggedClaudeError,
|
|
3190
3154
|
logProxyBody,
|
|
3191
3155
|
logFinalRequest,
|
|
@@ -3406,8 +3370,6 @@ function getOrCreateRuntimeState(accountKey) {
|
|
|
3406
3370
|
backoffLevel: 0,
|
|
3407
3371
|
consecutiveRefreshFailures: 0,
|
|
3408
3372
|
permanentlyDisabled: false,
|
|
3409
|
-
requestClassCooldowns: {},
|
|
3410
|
-
modelTierCooldowns: {},
|
|
3411
3373
|
};
|
|
3412
3374
|
accountRuntimeState.set(accountKey, initial);
|
|
3413
3375
|
return initial;
|
|
@@ -641,10 +641,6 @@ export type RuntimeAccountState = {
|
|
|
641
641
|
backoffLevel: number;
|
|
642
642
|
consecutiveRefreshFailures: number;
|
|
643
643
|
permanentlyDisabled: boolean;
|
|
644
|
-
requestClassCooldowns?: Record<string, number>;
|
|
645
|
-
modelTierCooldowns?: Record<string, number>;
|
|
646
|
-
requestClassBackoffLevels?: Record<string, number>;
|
|
647
|
-
modelTierBackoffLevels?: Record<string, number>;
|
|
648
644
|
lastToken?: string;
|
|
649
645
|
lastRefreshToken?: string;
|
|
650
646
|
};
|
|
@@ -697,63 +693,26 @@ export type CachedSession = {
|
|
|
697
693
|
};
|
|
698
694
|
/** Model tier classification for proxy routing decisions. */
|
|
699
695
|
export type ClaudeProxyModelTier = "opus" | "sonnet" | "haiku" | "other";
|
|
700
|
-
/** Request class for proxy routing policy. */
|
|
701
|
-
export type ClaudeProxyRequestClass = "multimodal" | "high-tool-count-non-stream-structured" | "strong-tool-fidelity" | "streaming-conversational" | "standard";
|
|
702
|
-
/** Full classification profile for a proxy request. */
|
|
703
|
-
export type ClaudeProxyRequestProfile = {
|
|
704
|
-
requestedModel: string;
|
|
705
|
-
modelTier: ClaudeProxyModelTier;
|
|
706
|
-
primaryClass: ClaudeProxyRequestClass;
|
|
707
|
-
classes: ClaudeProxyRequestClass[];
|
|
708
|
-
stream: boolean;
|
|
709
|
-
toolCount: number;
|
|
710
|
-
hasImages: boolean;
|
|
711
|
-
hasThinking: boolean;
|
|
712
|
-
hasToolHistory: boolean;
|
|
713
|
-
requiresToolUse: boolean;
|
|
714
|
-
requiresSpecificTool: boolean;
|
|
715
|
-
requiresStrongToolFidelity: boolean;
|
|
716
|
-
isHighToolCountNonStream: boolean;
|
|
717
|
-
isStreamingConversational: boolean;
|
|
718
|
-
isMultimodal: boolean;
|
|
719
|
-
};
|
|
720
|
-
/** Outcome of evaluating a single fallback candidate. */
|
|
721
|
-
export type FallbackEligibilityDecision = {
|
|
722
|
-
provider?: string;
|
|
723
|
-
model?: string;
|
|
724
|
-
eligible: boolean;
|
|
725
|
-
reason: string;
|
|
726
|
-
};
|
|
727
696
|
/** A single provider attempt in the proxy translation plan. */
|
|
728
697
|
export type ProxyTranslationAttempt = {
|
|
729
698
|
provider?: string;
|
|
730
699
|
model?: string;
|
|
731
700
|
label: string;
|
|
732
701
|
};
|
|
733
|
-
/** Ordered plan of provider attempts
|
|
702
|
+
/** Ordered plan of provider attempts for a proxy request. */
|
|
734
703
|
export type ProxyTranslationPlan = {
|
|
735
|
-
|
|
704
|
+
requestedModel: string;
|
|
705
|
+
modelTier: ClaudeProxyModelTier;
|
|
736
706
|
attempts: ProxyTranslationAttempt[];
|
|
737
|
-
skipped:
|
|
707
|
+
skipped: never[];
|
|
738
708
|
};
|
|
739
|
-
/**
|
|
740
|
-
export type CooldownScope = {
|
|
741
|
-
scope: "request_class";
|
|
742
|
-
key: string;
|
|
743
|
-
until: number;
|
|
744
|
-
} | {
|
|
745
|
-
scope: "model_tier";
|
|
746
|
-
key: string;
|
|
747
|
-
until: number;
|
|
748
|
-
} | {
|
|
749
|
-
scope: "generic";
|
|
750
|
-
key: "generic";
|
|
751
|
-
until: number;
|
|
752
|
-
};
|
|
753
|
-
/** An account skipped during partitioning, with the cooldown that caused it. */
|
|
709
|
+
/** An account skipped during partitioning, with its cooldown info. */
|
|
754
710
|
export type CooldownSkippedAccount<T> = {
|
|
755
711
|
account: T;
|
|
756
|
-
cooldown:
|
|
712
|
+
cooldown: {
|
|
713
|
+
until: number;
|
|
714
|
+
backoffLevel: number;
|
|
715
|
+
};
|
|
757
716
|
};
|
|
758
717
|
/** Mutable readiness state tracked by the proxy process. */
|
|
759
718
|
export type ProxyReadinessState = {
|
|
@@ -350,11 +350,9 @@ export function convertToModelMessages(messages) {
|
|
|
350
350
|
// Assistant messages only support text content, filter out images
|
|
351
351
|
const textOnlyContent = validContent.filter((item) => item.type === "text");
|
|
352
352
|
if (textOnlyContent.length === 0) {
|
|
353
|
-
//
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
content: "",
|
|
357
|
-
};
|
|
353
|
+
// No text content (e.g., only images/files) — skip message
|
|
354
|
+
// to avoid sending empty content to providers like Claude
|
|
355
|
+
return null;
|
|
358
356
|
}
|
|
359
357
|
else if (textOnlyContent.length === 1) {
|
|
360
358
|
// Single text item, use string content
|
|
@@ -1073,9 +1071,44 @@ export async function buildMultimodalMessagesArray(options, provider, model) {
|
|
|
1073
1071
|
msg.role === "assistant" ||
|
|
1074
1072
|
msg.role === "system") {
|
|
1075
1073
|
const providerOptions = msg.providerOptions;
|
|
1074
|
+
// Sanitize assistant array content: strip tool_use/tool_result blocks
|
|
1075
|
+
// that providers cannot handle. If an assistant message ends up empty
|
|
1076
|
+
// after stripping, skip it to avoid sending content: "" to Claude.
|
|
1077
|
+
// Only assistant messages need this — user messages may contain valid
|
|
1078
|
+
// image/file blocks that must pass through unchanged.
|
|
1079
|
+
let sanitizedContent = msg.content;
|
|
1080
|
+
if (msg.role === "assistant" && Array.isArray(msg.content)) {
|
|
1081
|
+
const textParts = msg.content.filter((item) => !!item &&
|
|
1082
|
+
typeof item === "object" &&
|
|
1083
|
+
item.type === "text" &&
|
|
1084
|
+
typeof item.text === "string");
|
|
1085
|
+
if (textParts.length === 0) {
|
|
1086
|
+
// All content was tool_use/tool_result/non-text — skip message
|
|
1087
|
+
continue;
|
|
1088
|
+
}
|
|
1089
|
+
// Check if any retained text part carries providerOptions
|
|
1090
|
+
// (e.g. Anthropic cache_control). If so, preserve them as
|
|
1091
|
+
// array content to avoid losing per-block metadata.
|
|
1092
|
+
const hasItemProviderOptions = textParts.some((item) => !!item.providerOptions);
|
|
1093
|
+
if (hasItemProviderOptions) {
|
|
1094
|
+
sanitizedContent = textParts;
|
|
1095
|
+
}
|
|
1096
|
+
else {
|
|
1097
|
+
sanitizedContent =
|
|
1098
|
+
textParts.length === 1
|
|
1099
|
+
? textParts[0].text
|
|
1100
|
+
: textParts
|
|
1101
|
+
.map((p) => p.text)
|
|
1102
|
+
.join(" ");
|
|
1103
|
+
}
|
|
1104
|
+
}
|
|
1105
|
+
// Skip empty string content to avoid Claude API rejection
|
|
1106
|
+
if (sanitizedContent === "") {
|
|
1107
|
+
continue;
|
|
1108
|
+
}
|
|
1076
1109
|
messages.push({
|
|
1077
1110
|
role: msg.role,
|
|
1078
|
-
content:
|
|
1111
|
+
content: sanitizedContent,
|
|
1079
1112
|
...(providerOptions && { providerOptions }),
|
|
1080
1113
|
});
|
|
1081
1114
|
}
|
|
@@ -5,6 +5,20 @@ import { BaseProvider } from "../core/baseProvider.js";
|
|
|
5
5
|
import type { EnhancedGenerateResult, TextGenerationOptions } from "../types/generateTypes.js";
|
|
6
6
|
import type { NeurolinkCredentials } from "../types/providers.js";
|
|
7
7
|
import type { StreamOptions, StreamResult } from "../types/streamTypes.js";
|
|
8
|
+
/**
|
|
9
|
+
* Resolve the correct Vertex AI location for a given model.
|
|
10
|
+
*
|
|
11
|
+
* Google-published models (gemini-*) require the global endpoint
|
|
12
|
+
* (`aiplatform.googleapis.com`), not regional endpoints like
|
|
13
|
+
* `us-east5-aiplatform.googleapis.com`. Regional endpoints return
|
|
14
|
+
* "model not found" for these models.
|
|
15
|
+
*
|
|
16
|
+
* Anthropic-on-Vertex models (claude-*) require regional endpoints
|
|
17
|
+
* and are handled separately by `createVertexAnthropicSettings`.
|
|
18
|
+
*
|
|
19
|
+
* Embedding models and custom models use the configured location as-is.
|
|
20
|
+
*/
|
|
21
|
+
export declare const resolveVertexLocation: (modelName: string | undefined, configuredLocation: string) => string;
|
|
8
22
|
/**
|
|
9
23
|
* Vertex Model Aliases
|
|
10
24
|
*
|
|
@@ -79,7 +79,36 @@ const getVertexLocation = () => {
|
|
|
79
79
|
return (process.env.GOOGLE_CLOUD_LOCATION ||
|
|
80
80
|
process.env.VERTEX_LOCATION ||
|
|
81
81
|
process.env.GOOGLE_VERTEX_LOCATION ||
|
|
82
|
-
"
|
|
82
|
+
"global");
|
|
83
|
+
};
|
|
84
|
+
/**
|
|
85
|
+
* Resolve the correct Vertex AI location for a given model.
|
|
86
|
+
*
|
|
87
|
+
* Google-published models (gemini-*) require the global endpoint
|
|
88
|
+
* (`aiplatform.googleapis.com`), not regional endpoints like
|
|
89
|
+
* `us-east5-aiplatform.googleapis.com`. Regional endpoints return
|
|
90
|
+
* "model not found" for these models.
|
|
91
|
+
*
|
|
92
|
+
* Anthropic-on-Vertex models (claude-*) require regional endpoints
|
|
93
|
+
* and are handled separately by `createVertexAnthropicSettings`.
|
|
94
|
+
*
|
|
95
|
+
* Embedding models and custom models use the configured location as-is.
|
|
96
|
+
*/
|
|
97
|
+
export const resolveVertexLocation = (modelName, configuredLocation) => {
|
|
98
|
+
if (!modelName) {
|
|
99
|
+
return configuredLocation;
|
|
100
|
+
}
|
|
101
|
+
const normalized = modelName.toLowerCase();
|
|
102
|
+
// Google-published models always use the global endpoint.
|
|
103
|
+
// Hardcoded because Google's Vertex AI serves Gemini models exclusively
|
|
104
|
+
// from the global endpoint — regional endpoints like us-east5 return
|
|
105
|
+
// "Publisher Model was not found" errors. The env var GOOGLE_VERTEX_LOCATION
|
|
106
|
+
// is typically set for Anthropic-on-Vertex (which needs regional), so we
|
|
107
|
+
// cannot rely on it for Gemini routing.
|
|
108
|
+
if (normalized.startsWith("gemini-")) {
|
|
109
|
+
return "global";
|
|
110
|
+
}
|
|
111
|
+
return configuredLocation;
|
|
83
112
|
};
|
|
84
113
|
const getDefaultVertexModel = () => {
|
|
85
114
|
// Use gemini-2.5-flash as default - latest and best price-performance model
|
|
@@ -96,8 +125,9 @@ const hasGoogleCredentials = () => {
|
|
|
96
125
|
// Module-level cache for runtime-created credentials file to avoid per-request writes
|
|
97
126
|
let cachedCredentialsPath = null;
|
|
98
127
|
// Enhanced Vertex settings creation with authentication fallback and proxy support
|
|
99
|
-
const createVertexSettings = async (region, credentials) => {
|
|
100
|
-
const
|
|
128
|
+
const createVertexSettings = async (region, credentials, modelName) => {
|
|
129
|
+
const configuredLocation = credentials?.location || region || getVertexLocation();
|
|
130
|
+
const location = resolveVertexLocation(modelName, configuredLocation);
|
|
101
131
|
const project = credentials?.projectId || getVertexProjectId();
|
|
102
132
|
const baseSettings = {
|
|
103
133
|
project,
|
|
@@ -326,7 +356,12 @@ const createVertexAnthropicSettings = async (region, credentials) => {
|
|
|
326
356
|
// which is invalid. The correct global endpoint omits the region prefix entirely.
|
|
327
357
|
// Since the SDK doesn't handle this, redirect "global" to "us-east5" for Anthropic.
|
|
328
358
|
const anthropicRegion = !region || region === "global" ? "us-east5" : region;
|
|
329
|
-
|
|
359
|
+
// Override credentials.location so it cannot conflict with the redirected
|
|
360
|
+
// region — createVertexSettings checks credentials.location first.
|
|
361
|
+
const anthropicCredentials = credentials?.location
|
|
362
|
+
? { ...credentials, location: anthropicRegion }
|
|
363
|
+
: credentials;
|
|
364
|
+
const baseVertexSettings = await createVertexSettings(anthropicRegion, anthropicCredentials);
|
|
330
365
|
// GoogleVertexAnthropicProviderSettings extends GoogleVertexProviderSettings
|
|
331
366
|
// so we can use the same settings with proper typing
|
|
332
367
|
return {
|
|
@@ -570,7 +605,9 @@ export class GoogleVertexProvider extends BaseProvider {
|
|
|
570
605
|
networkConfig: {
|
|
571
606
|
projectId: this.projectId,
|
|
572
607
|
location: this.location,
|
|
573
|
-
expectedEndpoint:
|
|
608
|
+
expectedEndpoint: this.location === "global"
|
|
609
|
+
? "https://aiplatform.googleapis.com"
|
|
610
|
+
: `https://${this.location}-aiplatform.googleapis.com`,
|
|
574
611
|
httpProxy: process.env.HTTP_PROXY || process.env.http_proxy,
|
|
575
612
|
httpsProxy: process.env.HTTPS_PROXY || process.env.https_proxy,
|
|
576
613
|
noProxy: process.env.NO_PROXY || process.env.no_proxy,
|
|
@@ -582,7 +619,7 @@ export class GoogleVertexProvider extends BaseProvider {
|
|
|
582
619
|
message: "Starting Vertex settings creation with network configuration analysis",
|
|
583
620
|
});
|
|
584
621
|
try {
|
|
585
|
-
const vertexSettings = await createVertexSettings(this.location, this.credentials);
|
|
622
|
+
const vertexSettings = await createVertexSettings(this.location, this.credentials, modelName);
|
|
586
623
|
const vertexSettingsEndTime = process.hrtime.bigint();
|
|
587
624
|
const vertexSettingsDurationNs = vertexSettingsEndTime - vertexSettingsStartTime;
|
|
588
625
|
logger.debug(`[GoogleVertexProvider] ✅ LOG_POINT_V009_VERTEX_SETTINGS_SUCCESS`, {
|
|
@@ -1116,12 +1153,13 @@ export class GoogleVertexProvider extends BaseProvider {
|
|
|
1116
1153
|
/**
|
|
1117
1154
|
* Create @google/genai client configured for Vertex AI
|
|
1118
1155
|
*/
|
|
1119
|
-
async createVertexGenAIClient(regionOverride) {
|
|
1156
|
+
async createVertexGenAIClient(regionOverride, modelName) {
|
|
1120
1157
|
const project = this.credentials?.projectId || getVertexProjectId();
|
|
1121
|
-
const
|
|
1158
|
+
const configuredLocation = this.credentials?.location ||
|
|
1122
1159
|
regionOverride ||
|
|
1123
1160
|
this.location ||
|
|
1124
1161
|
getVertexLocation();
|
|
1162
|
+
const location = resolveVertexLocation(modelName, configuredLocation);
|
|
1125
1163
|
const mod = await import("@google/genai");
|
|
1126
1164
|
const ctor = mod.GoogleGenAI;
|
|
1127
1165
|
if (!ctor) {
|
|
@@ -1308,8 +1346,8 @@ export class GoogleVertexProvider extends BaseProvider {
|
|
|
1308
1346
|
}, (span) => this.executeNativeGemini3StreamWithSpan(options, modelName, span));
|
|
1309
1347
|
}
|
|
1310
1348
|
async executeNativeGemini3StreamWithSpan(options, modelName, span) {
|
|
1311
|
-
const client = await this.createVertexGenAIClient(options.region);
|
|
1312
|
-
const effectiveLocation = options.region || this.location || getVertexLocation();
|
|
1349
|
+
const client = await this.createVertexGenAIClient(options.region, modelName);
|
|
1350
|
+
const effectiveLocation = resolveVertexLocation(modelName, options.region || this.location || getVertexLocation());
|
|
1313
1351
|
logger.debug("[GoogleVertex] Using native @google/genai for Gemini 3", {
|
|
1314
1352
|
model: modelName,
|
|
1315
1353
|
hasTools: !!options.tools && Object.keys(options.tools).length > 0,
|
|
@@ -1503,8 +1541,8 @@ export class GoogleVertexProvider extends BaseProvider {
|
|
|
1503
1541
|
[ATTR.NL_PROVIDER]: this.providerName,
|
|
1504
1542
|
},
|
|
1505
1543
|
}, async (span) => {
|
|
1506
|
-
const client = await this.createVertexGenAIClient(options.region);
|
|
1507
|
-
const effectiveLocation = options.region || this.location || getVertexLocation();
|
|
1544
|
+
const client = await this.createVertexGenAIClient(options.region, modelName);
|
|
1545
|
+
const effectiveLocation = resolveVertexLocation(modelName, options.region || this.location || getVertexLocation());
|
|
1508
1546
|
logger.debug("[GoogleVertex] Using native @google/genai for Gemini 3 generate", {
|
|
1509
1547
|
model: modelName,
|
|
1510
1548
|
project: this.projectId,
|
|
@@ -1,33 +1,43 @@
|
|
|
1
|
-
import type { ClaudeProxyModelTier,
|
|
2
|
-
export type { ClaudeProxyModelTier,
|
|
1
|
+
import type { ClaudeProxyModelTier, CooldownSkippedAccount, FallbackEntry, ParsedClaudeRequest, ProxyTranslationAttempt, ProxyTranslationPlan, RuntimeAccountState } from "../types/index.js";
|
|
2
|
+
export type { ClaudeProxyModelTier, ProxyTranslationAttempt, ProxyTranslationPlan, };
|
|
3
3
|
export declare function inferClaudeProxyModelTier(modelName: string): ClaudeProxyModelTier;
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
}): FallbackEligibilityDecision;
|
|
4
|
+
/**
|
|
5
|
+
* Build a translation plan for a Claude-compatible proxy request.
|
|
6
|
+
* The plan lists the primary provider followed by eligible fallback targets.
|
|
7
|
+
* All configured fallback entries are always eligible — no contract-based gating.
|
|
8
|
+
* When no fallback chain is configured, an "auto-provider" entry is appended.
|
|
9
|
+
*/
|
|
11
10
|
export declare function buildProxyTranslationPlan(primary: {
|
|
12
11
|
provider: string;
|
|
13
12
|
model?: string;
|
|
14
|
-
}, fallbackChain: FallbackEntry[], requestedModel: string,
|
|
15
|
-
|
|
16
|
-
|
|
13
|
+
}, fallbackChain: FallbackEntry[], requestedModel: string, _parsed: ParsedClaudeRequest): ProxyTranslationPlan;
|
|
14
|
+
/**
|
|
15
|
+
* Check whether an account is currently cooling down.
|
|
16
|
+
* Returns the cooldown timestamp if active, null otherwise.
|
|
17
|
+
*/
|
|
18
|
+
export declare function getAccountCooldownUntil(state: RuntimeAccountState, now?: number): number | null;
|
|
19
|
+
/**
|
|
20
|
+
* Partition accounts into eligible (no cooldown) and skipped (cooling down).
|
|
21
|
+
*/
|
|
17
22
|
export declare function partitionAccountsByCooldown<T extends {
|
|
18
23
|
key: string;
|
|
19
|
-
}>(accounts: T[], getState: (account: T) => RuntimeAccountState,
|
|
24
|
+
}>(accounts: T[], getState: (account: T) => RuntimeAccountState, now?: number): {
|
|
20
25
|
eligible: T[];
|
|
21
26
|
skipped: CooldownSkippedAccount<T>[];
|
|
22
27
|
};
|
|
23
|
-
|
|
28
|
+
/**
|
|
29
|
+
* Apply a rate-limit cooldown to an account.
|
|
30
|
+
* Uses simple exponential backoff with a floor and cap.
|
|
31
|
+
*/
|
|
32
|
+
export declare function applyRateLimitCooldown(args: {
|
|
24
33
|
state: RuntimeAccountState;
|
|
25
|
-
profile: ClaudeProxyRequestProfile;
|
|
26
34
|
retryAfterMs?: number;
|
|
27
35
|
now?: number;
|
|
28
36
|
capMs: number;
|
|
29
37
|
}): {
|
|
30
38
|
backoffMs: number;
|
|
31
|
-
requestClassKey: string;
|
|
32
|
-
modelTierKey: string;
|
|
33
39
|
};
|
|
40
|
+
/**
|
|
41
|
+
* Clear cooldown state for an account after a successful request.
|
|
42
|
+
*/
|
|
43
|
+
export declare function clearAccountCooldown(state: RuntimeAccountState): void;
|