@juspay/neurolink 9.54.0 → 9.54.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -20,7 +20,7 @@ import { createRawStreamCapture } from "../../proxy/rawStreamCapture.js";
20
20
  import { logBodyCapture, logRequest, logRequestAttempt, logStreamError, } from "../../proxy/requestLogger.js";
21
21
  import { createSSEInterceptor } from "../../proxy/sseInterceptor.js";
22
22
  import { needsRefresh, persistTokens, refreshToken, } from "../../proxy/tokenRefresh.js";
23
- import { applyRateLimitCooldownScope, buildProxyTranslationPlan, classifyClaudeProxyRequest, getActiveCooldownScope, partitionAccountsByCooldown, summarizeSkippedFallbacks, } from "../../proxy/routingPolicy.js";
23
+ import { applyRateLimitCooldown, buildProxyTranslationPlan, clearAccountCooldown, getAccountCooldownUntil, partitionAccountsByCooldown, } from "../../proxy/routingPolicy.js";
24
24
  import { writeJsonSnapshotAtomically } from "../../proxy/snapshotPersistence.js";
25
25
  import { recordAttempt, recordAttemptError, recordCooldown, recordFinalError, recordFinalSuccess, } from "../../proxy/usageStats.js";
26
26
  import { logger } from "../../utils/logger.js";
@@ -440,9 +440,7 @@ function logProxyRoutingPlan(logProxyBody, stage, plan) {
440
440
  contentType: "application/json",
441
441
  body: {
442
442
  stage,
443
- requestProfile: plan.profile,
444
443
  attempts: plan.attempts,
445
- skipped: plan.skipped,
446
444
  },
447
445
  });
448
446
  }
@@ -1347,30 +1345,21 @@ async function executeClaudeFallbackTranslation(args) {
1347
1345
  return clientResponse;
1348
1346
  }
1349
1347
  async function tryConfiguredClaudeFallbackChain(args) {
1350
- const { ctx, body, parsedFallbackRequest, requestProfile, modelRouter, tracer, requestStartTime, logProxyBody, logFinalRequest, } = args;
1348
+ const { ctx, body, parsedFallbackRequest, modelRouter, tracer, requestStartTime, logProxyBody, logFinalRequest, } = args;
1351
1349
  const chain = modelRouter?.getFallbackChain() ?? [];
1352
1350
  const fallbackPlan = buildProxyTranslationPlan({ provider: "anthropic", model: body.model }, chain, body.model, parsedFallbackRequest);
1353
- const fallbackPolicyReason = summarizeSkippedFallbacks(fallbackPlan);
1354
1351
  logProxyBody({
1355
1352
  phase: "routing_decision",
1356
1353
  contentType: "application/json",
1357
1354
  body: {
1358
1355
  stage: "anthropic_fallback",
1359
- requestProfile,
1360
1356
  attempts: fallbackPlan.attempts.slice(1),
1361
- skipped: fallbackPlan.skipped,
1362
1357
  },
1363
1358
  });
1364
- for (const skipped of fallbackPlan.skipped) {
1365
- const label = skipped.provider
1366
- ? `${skipped.provider}/${skipped.model ?? "unknown"}`
1367
- : "auto-provider";
1368
- logger.always(`[proxy] skipping fallback ${label}: ${skipped.reason}`);
1369
- }
1370
1359
  tracer?.setFallbackInfo({
1371
1360
  triggered: true,
1372
1361
  attemptCount: fallbackPlan.attempts.slice(1).length,
1373
- reason: fallbackPolicyReason ?? "all_anthropic_accounts_exhausted",
1362
+ reason: "all_anthropic_accounts_exhausted",
1374
1363
  });
1375
1364
  for (const fallback of fallbackPlan.attempts.slice(1)) {
1376
1365
  if (!fallback.provider || !fallback.model) {
@@ -1410,10 +1399,7 @@ async function tryConfiguredClaudeFallbackChain(args) {
1410
1399
  attemptCount: fallbackPlan.attempts.slice(1).length,
1411
1400
  reason: "fallback_success",
1412
1401
  });
1413
- return {
1414
- response,
1415
- fallbackPolicyReason,
1416
- };
1402
+ return { response };
1417
1403
  }
1418
1404
  catch (fallbackErr) {
1419
1405
  const errMsg = fallbackErr instanceof Error
@@ -1450,10 +1436,7 @@ async function tryConfiguredClaudeFallbackChain(args) {
1450
1436
  });
1451
1437
  }
1452
1438
  }
1453
- return {
1454
- response: null,
1455
- fallbackPolicyReason,
1456
- };
1439
+ return { response: null };
1457
1440
  }
1458
1441
  async function tryAutoClaudeFallback(args) {
1459
1442
  const { ctx, body, tracer, requestStartTime, logProxyBody, logFinalRequest } = args;
@@ -1484,7 +1467,7 @@ async function tryAutoClaudeFallback(args) {
1484
1467
  }
1485
1468
  }
1486
1469
  function buildClaudeAnthropicFailureResponse(args) {
1487
- const { tracer, requestStartTime, authFailureMessage, invalidRequestFailure, sawNetworkError, sawTransientFailure, sawRateLimit, lastError, orderedAccounts, requestProfile, fallbackPolicyReason, buildLoggedClaudeError, logProxyBody, logFinalRequest, } = args;
1470
+ const { tracer, requestStartTime, authFailureMessage, invalidRequestFailure, sawNetworkError, sawTransientFailure, sawRateLimit, lastError, orderedAccounts, buildLoggedClaudeError, logProxyBody, logFinalRequest, } = args;
1488
1471
  if (authFailureMessage && !sawRateLimit) {
1489
1472
  tracer?.setError("authentication_error", authFailureMessage);
1490
1473
  tracer?.end(401, Date.now() - requestStartTime);
@@ -1531,21 +1514,21 @@ function buildClaudeAnthropicFailureResponse(args) {
1531
1514
  return buildLoggedClaudeError(502, msg);
1532
1515
  }
1533
1516
  const earliestRecovery = orderedAccounts.reduce((min, account) => {
1534
- const cooldown = getActiveCooldownScope(getOrCreateRuntimeState(account.key), requestProfile);
1535
- return cooldown ? Math.min(min, cooldown.until) : min;
1517
+ const until = getAccountCooldownUntil(getOrCreateRuntimeState(account.key));
1518
+ return until !== null ? Math.min(min, until) : min;
1536
1519
  }, Infinity);
1520
+ // If no active cooldown remains (expired while retries ran), use 1s
1521
+ // instead of fabricating a long retry-after.
1537
1522
  const retryAfterSec = Number.isFinite(earliestRecovery)
1538
1523
  ? Math.max(1, Math.ceil((earliestRecovery - Date.now()) / 1000))
1539
- : 60;
1540
- const contractMessage = fallbackPolicyReason
1541
- ? ` ${fallbackPolicyReason}`
1542
- : "";
1543
- logger.always(`[proxy] all accounts rate-limited for request-class=${requestProfile.primaryClass}, retry in ${retryAfterSec}s`);
1544
- const errorBody = buildClaudeError(429, `All accounts rate-limited. Earliest recovery in ${retryAfterSec}s.${contractMessage}`, "overloaded_error");
1545
- tracer?.setError("rate_limit_error", `All accounts rate-limited. Retry in ${retryAfterSec}s.${contractMessage}`);
1524
+ : 1;
1525
+ const errorMessage = `All accounts rate-limited. Earliest recovery in ${retryAfterSec}s.`;
1526
+ logger.always(`[proxy] all accounts rate-limited, retry in ${retryAfterSec}s`);
1527
+ const errorBody = buildClaudeError(429, errorMessage, "overloaded_error");
1528
+ tracer?.setError("rate_limit_error", errorMessage);
1546
1529
  tracer?.end(429, Date.now() - requestStartTime);
1547
1530
  recordFinalError(429);
1548
- logFinalRequest(429, "", "final", "rate_limit_error", `All accounts rate-limited. Retry in ${retryAfterSec}s.${contractMessage}`);
1531
+ logFinalRequest(429, "", "final", "rate_limit_error", errorMessage);
1549
1532
  const errorBodyText = JSON.stringify(errorBody);
1550
1533
  logProxyBody({
1551
1534
  phase: "client_response",
@@ -1568,22 +1551,9 @@ function buildClaudeAnthropicFailureResponse(args) {
1568
1551
  });
1569
1552
  }
1570
1553
  async function handleAnthropicSuccessfulResponse(args) {
1571
- const { ctx, body, account, accountState, requestProfile, response, tracer, requestStartTime, fetchStartMs, attemptNumber, finalBodyStr, upstreamSpan, logProxyBody, logFinalRequest, } = args;
1572
- accountState.backoffLevel = 0;
1573
- accountState.coolingUntil = undefined;
1554
+ const { ctx, body, account, accountState, response, tracer, requestStartTime, fetchStartMs, attemptNumber, finalBodyStr, upstreamSpan, logProxyBody, logFinalRequest, } = args;
1555
+ clearAccountCooldown(accountState);
1574
1556
  accountState.consecutiveRefreshFailures = 0;
1575
- if (accountState.requestClassCooldowns) {
1576
- delete accountState.requestClassCooldowns[`${requestProfile.primaryClass}:${requestProfile.requestedModel.toLowerCase()}`];
1577
- }
1578
- if (accountState.modelTierCooldowns) {
1579
- delete accountState.modelTierCooldowns[requestProfile.modelTier];
1580
- }
1581
- if (accountState.requestClassBackoffLevels) {
1582
- delete accountState.requestClassBackoffLevels[`${requestProfile.primaryClass}:${requestProfile.requestedModel.toLowerCase()}`];
1583
- }
1584
- if (accountState.modelTierBackoffLevels) {
1585
- delete accountState.modelTierBackoffLevels[requestProfile.modelTier];
1586
- }
1587
1557
  logger.always(`[proxy] ← ${response.status} account=${account.label}`);
1588
1558
  const quota = parseQuotaHeaders(response.headers);
1589
1559
  if (quota) {
@@ -2173,7 +2143,7 @@ async function handleAnthropicSuccessfulRetryResponse(args) {
2173
2143
  return retryJson;
2174
2144
  }
2175
2145
  async function handleAnthropicAuthRetry(args) {
2176
- const { ctx, body, account, accountState, requestProfile, headers, buildUpstreamBody, enabledAccounts, orderedAccounts, response: _response, tracer, requestStartTime, fetchStartMs, attemptNumber, finalBodyStr, upstreamSpan, logAttempt, logProxyBody, logFinalRequest, lastError, authFailureMessage, sawRateLimit, sawTransientFailure, sawNetworkError, } = args;
2146
+ const { ctx, body, account, accountState, headers, buildUpstreamBody, enabledAccounts, orderedAccounts, response: _response, tracer, requestStartTime, fetchStartMs, attemptNumber, finalBodyStr, upstreamSpan, logAttempt, logProxyBody, logFinalRequest, lastError, authFailureMessage, sawRateLimit, sawTransientFailure, sawNetworkError, } = args;
2177
2147
  recordAttemptError(account.label, account.type, 401);
2178
2148
  let currentLastError = lastError;
2179
2149
  let currentAuthFailureMessage = authFailureMessage;
@@ -2257,9 +2227,8 @@ async function handleAnthropicAuthRetry(args) {
2257
2227
  const cooldownMs = Number.isNaN(parsedRetryAfter)
2258
2228
  ? 60_000
2259
2229
  : Math.max(1, parsedRetryAfter) * 1000;
2260
- const cooldown = applyRateLimitCooldownScope({
2230
+ const cooldown = applyRateLimitCooldown({
2261
2231
  state: accountState,
2262
- profile: requestProfile,
2263
2232
  retryAfterMs: cooldownMs,
2264
2233
  capMs: RATE_LIMIT_BACKOFF_CAP_MS,
2265
2234
  });
@@ -2836,7 +2805,7 @@ async function prepareAnthropicAccountAttempt(args) {
2836
2805
  };
2837
2806
  }
2838
2807
  async function fetchAnthropicAccountResponse(args) {
2839
- const { url, headers, finalBodyStr, account, accountState, requestProfile, enabledAccounts, orderedAccounts, tracer, logAttempt, currentLastError, currentSawRateLimit, currentSawNetworkError, upstreamSpan, } = args;
2808
+ const { url, headers, finalBodyStr, account, accountState, enabledAccounts, orderedAccounts, tracer, logAttempt, currentLastError, currentSawRateLimit, currentSawNetworkError, upstreamSpan, } = args;
2840
2809
  let lastError = currentLastError;
2841
2810
  let sawRateLimit = currentSawRateLimit;
2842
2811
  let sawNetworkError = currentSawNetworkError;
@@ -2889,9 +2858,8 @@ async function fetchAnthropicAccountResponse(args) {
2889
2858
  }
2890
2859
  }
2891
2860
  }
2892
- const cooldown = applyRateLimitCooldownScope({
2861
+ const cooldown = applyRateLimitCooldown({
2893
2862
  state: accountState,
2894
- profile: requestProfile,
2895
2863
  retryAfterMs: cooldownMs > 0 ? cooldownMs : undefined,
2896
2864
  capMs: RATE_LIMIT_BACKOFF_CAP_MS,
2897
2865
  });
@@ -2899,7 +2867,7 @@ async function fetchAnthropicAccountResponse(args) {
2899
2867
  recordAttemptError(account.label, account.type, 429);
2900
2868
  recordCooldown(account.label, account.type, Date.now() + cooldown.backoffMs, accountState.backoffLevel);
2901
2869
  lastError = await response.text();
2902
- logger.always(`[proxy] ← 429 account=${account.label} backoff-level=${accountState.backoffLevel} cooldown=${Math.round(cooldown.backoffMs / 1000)}s request-class=${cooldown.requestClassKey} model-tier=${cooldown.modelTierKey}`);
2870
+ logger.always(`[proxy] ← 429 account=${account.label} backoff-level=${accountState.backoffLevel} cooldown=${Math.round(cooldown.backoffMs / 1000)}s`);
2903
2871
  logAttempt(429, "rate_limit_error", String(lastError));
2904
2872
  tracer?.setError("rate_limit_error", String(lastError).slice(0, 500));
2905
2873
  tracer?.recordRetry(account.label, "rate_limit");
@@ -2924,7 +2892,6 @@ async function fetchAnthropicAccountResponse(args) {
2924
2892
  async function handleAnthropicRoutedClaudeRequest(args) {
2925
2893
  const { ctx, body, modelRouter, tracer, requestStartTime, accountStrategy, buildLoggedClaudeError, logProxyBody, logFinalRequest, } = args;
2926
2894
  const parsedRequest = parseClaudeRequest(body);
2927
- const requestProfile = classifyClaudeProxyRequest(body.model, parsedRequest);
2928
2895
  const loadedAccounts = await loadClaudeProxyAccounts({
2929
2896
  ctx,
2930
2897
  body,
@@ -2947,14 +2914,16 @@ async function handleAnthropicRoutedClaudeRequest(args) {
2947
2914
  attemptNumber: 0,
2948
2915
  };
2949
2916
  const acctSelectionSpan = tracer?.startAccountSelection();
2950
- const accountPartition = partitionAccountsByCooldown(orderedAccounts, (account) => getOrCreateRuntimeState(account.key), requestProfile);
2917
+ const accountPartition = partitionAccountsByCooldown(orderedAccounts, (account) => getOrCreateRuntimeState(account.key));
2951
2918
  for (const skippedAccount of accountPartition.skipped) {
2952
- if (skippedAccount.cooldown.scope === "request_class" ||
2953
- skippedAccount.cooldown.scope === "model_tier") {
2954
- loopState.sawRateLimit = true;
2955
- }
2956
- loopState.lastError = `Skipped account=${skippedAccount.account.label} due to ${skippedAccount.cooldown.scope} cooldown ${skippedAccount.cooldown.key}`;
2957
- logger.always(`[proxy] skipping account=${skippedAccount.account.label} due to ${skippedAccount.cooldown.scope} cooldown=${skippedAccount.cooldown.key} remaining=${Math.max(1, Math.ceil((skippedAccount.cooldown.until - Date.now()) / 1000))}s`);
2919
+ logger.always(`[proxy] skipping account=${skippedAccount.account.label} cooldown remaining=${Math.max(1, Math.ceil((skippedAccount.cooldown.until - Date.now()) / 1000))}s`);
2920
+ }
2921
+ // Only flag rate-limit when ALL accounts are cooling — if some are eligible,
2922
+ // let the actual attempt results determine sawRateLimit via real 429 responses.
2923
+ if (accountPartition.skipped.length > 0 &&
2924
+ accountPartition.eligible.length === 0) {
2925
+ loopState.sawRateLimit = true;
2926
+ loopState.lastError = `All ${accountPartition.skipped.length} accounts are cooling down`;
2958
2927
  }
2959
2928
  accountLoop: for (const account of accountPartition.eligible) {
2960
2929
  const accountState = getOrCreateRuntimeState(account.key);
@@ -3009,7 +2978,6 @@ async function handleAnthropicRoutedClaudeRequest(args) {
3009
2978
  finalBodyStr: preparedAttempt.finalBodyStr,
3010
2979
  account,
3011
2980
  accountState,
3012
- requestProfile,
3013
2981
  enabledAccounts,
3014
2982
  orderedAccounts,
3015
2983
  tracer,
@@ -3046,7 +3014,6 @@ async function handleAnthropicRoutedClaudeRequest(args) {
3046
3014
  body,
3047
3015
  account,
3048
3016
  accountState,
3049
- requestProfile,
3050
3017
  headers: preparedAttempt.headers,
3051
3018
  buildUpstreamBody: preparedAttempt.buildUpstreamBody,
3052
3019
  enabledAccounts,
@@ -3126,7 +3093,6 @@ async function handleAnthropicRoutedClaudeRequest(args) {
3126
3093
  body,
3127
3094
  account,
3128
3095
  accountState,
3129
- requestProfile,
3130
3096
  response,
3131
3097
  tracer,
3132
3098
  requestStartTime,
@@ -3150,7 +3116,6 @@ async function handleAnthropicRoutedClaudeRequest(args) {
3150
3116
  ctx,
3151
3117
  body,
3152
3118
  parsedFallbackRequest: parsedRequest,
3153
- requestProfile,
3154
3119
  modelRouter,
3155
3120
  tracer,
3156
3121
  requestStartTime,
@@ -3160,8 +3125,9 @@ async function handleAnthropicRoutedClaudeRequest(args) {
3160
3125
  if (configuredFallbackResult.response) {
3161
3126
  return configuredFallbackResult.response;
3162
3127
  }
3163
- const configuredChain = modelRouter?.getFallbackChain() ?? [];
3164
- if (configuredChain.length === 0 && !loopState.sawRateLimit) {
3128
+ // Try auto-provider fallback when the configured chain didn't produce a
3129
+ // response (either no chain configured, or all entries failed/deduped).
3130
+ if (!loopState.sawRateLimit) {
3165
3131
  const autoFallbackResponse = await tryAutoClaudeFallback({
3166
3132
  ctx,
3167
3133
  body,
@@ -3184,8 +3150,6 @@ async function handleAnthropicRoutedClaudeRequest(args) {
3184
3150
  sawRateLimit: loopState.sawRateLimit,
3185
3151
  lastError: loopState.lastError,
3186
3152
  orderedAccounts,
3187
- requestProfile,
3188
- fallbackPolicyReason: configuredFallbackResult.fallbackPolicyReason,
3189
3153
  buildLoggedClaudeError,
3190
3154
  logProxyBody,
3191
3155
  logFinalRequest,
@@ -3406,8 +3370,6 @@ function getOrCreateRuntimeState(accountKey) {
3406
3370
  backoffLevel: 0,
3407
3371
  consecutiveRefreshFailures: 0,
3408
3372
  permanentlyDisabled: false,
3409
- requestClassCooldowns: {},
3410
- modelTierCooldowns: {},
3411
3373
  };
3412
3374
  accountRuntimeState.set(accountKey, initial);
3413
3375
  return initial;
@@ -641,10 +641,6 @@ export type RuntimeAccountState = {
641
641
  backoffLevel: number;
642
642
  consecutiveRefreshFailures: number;
643
643
  permanentlyDisabled: boolean;
644
- requestClassCooldowns?: Record<string, number>;
645
- modelTierCooldowns?: Record<string, number>;
646
- requestClassBackoffLevels?: Record<string, number>;
647
- modelTierBackoffLevels?: Record<string, number>;
648
644
  lastToken?: string;
649
645
  lastRefreshToken?: string;
650
646
  };
@@ -697,63 +693,26 @@ export type CachedSession = {
697
693
  };
698
694
  /** Model tier classification for proxy routing decisions. */
699
695
  export type ClaudeProxyModelTier = "opus" | "sonnet" | "haiku" | "other";
700
- /** Request class for proxy routing policy. */
701
- export type ClaudeProxyRequestClass = "multimodal" | "high-tool-count-non-stream-structured" | "strong-tool-fidelity" | "streaming-conversational" | "standard";
702
- /** Full classification profile for a proxy request. */
703
- export type ClaudeProxyRequestProfile = {
704
- requestedModel: string;
705
- modelTier: ClaudeProxyModelTier;
706
- primaryClass: ClaudeProxyRequestClass;
707
- classes: ClaudeProxyRequestClass[];
708
- stream: boolean;
709
- toolCount: number;
710
- hasImages: boolean;
711
- hasThinking: boolean;
712
- hasToolHistory: boolean;
713
- requiresToolUse: boolean;
714
- requiresSpecificTool: boolean;
715
- requiresStrongToolFidelity: boolean;
716
- isHighToolCountNonStream: boolean;
717
- isStreamingConversational: boolean;
718
- isMultimodal: boolean;
719
- };
720
- /** Outcome of evaluating a single fallback candidate. */
721
- export type FallbackEligibilityDecision = {
722
- provider?: string;
723
- model?: string;
724
- eligible: boolean;
725
- reason: string;
726
- };
727
696
  /** A single provider attempt in the proxy translation plan. */
728
697
  export type ProxyTranslationAttempt = {
729
698
  provider?: string;
730
699
  model?: string;
731
700
  label: string;
732
701
  };
733
- /** Ordered plan of provider attempts and skipped candidates. */
702
+ /** Ordered plan of provider attempts for a proxy request. */
734
703
  export type ProxyTranslationPlan = {
735
- profile: ClaudeProxyRequestProfile;
704
+ requestedModel: string;
705
+ modelTier: ClaudeProxyModelTier;
736
706
  attempts: ProxyTranslationAttempt[];
737
- skipped: FallbackEligibilityDecision[];
707
+ skipped: never[];
738
708
  };
739
- /** Discriminated union describing why a cooldown is active. */
740
- export type CooldownScope = {
741
- scope: "request_class";
742
- key: string;
743
- until: number;
744
- } | {
745
- scope: "model_tier";
746
- key: string;
747
- until: number;
748
- } | {
749
- scope: "generic";
750
- key: "generic";
751
- until: number;
752
- };
753
- /** An account skipped during partitioning, with the cooldown that caused it. */
709
+ /** An account skipped during partitioning, with its cooldown info. */
754
710
  export type CooldownSkippedAccount<T> = {
755
711
  account: T;
756
- cooldown: CooldownScope;
712
+ cooldown: {
713
+ until: number;
714
+ backoffLevel: number;
715
+ };
757
716
  };
758
717
  /** Mutable readiness state tracked by the proxy process. */
759
718
  export type ProxyReadinessState = {
@@ -350,11 +350,9 @@ export function convertToModelMessages(messages) {
350
350
  // Assistant messages only support text content, filter out images
351
351
  const textOnlyContent = validContent.filter((item) => item.type === "text");
352
352
  if (textOnlyContent.length === 0) {
353
- // If no text content, convert to empty string
354
- return {
355
- role: "assistant",
356
- content: "",
357
- };
353
+ // No text content (e.g., only images/files) — skip message
354
+ // to avoid sending empty content to providers like Claude
355
+ return null;
358
356
  }
359
357
  else if (textOnlyContent.length === 1) {
360
358
  // Single text item, use string content
@@ -1073,9 +1071,44 @@ export async function buildMultimodalMessagesArray(options, provider, model) {
1073
1071
  msg.role === "assistant" ||
1074
1072
  msg.role === "system") {
1075
1073
  const providerOptions = msg.providerOptions;
1074
+ // Sanitize assistant array content: strip tool_use/tool_result blocks
1075
+ // that providers cannot handle. If an assistant message ends up empty
1076
+ // after stripping, skip it to avoid sending content: "" to Claude.
1077
+ // Only assistant messages need this — user messages may contain valid
1078
+ // image/file blocks that must pass through unchanged.
1079
+ let sanitizedContent = msg.content;
1080
+ if (msg.role === "assistant" && Array.isArray(msg.content)) {
1081
+ const textParts = msg.content.filter((item) => !!item &&
1082
+ typeof item === "object" &&
1083
+ item.type === "text" &&
1084
+ typeof item.text === "string");
1085
+ if (textParts.length === 0) {
1086
+ // All content was tool_use/tool_result/non-text — skip message
1087
+ continue;
1088
+ }
1089
+ // Check if any retained text part carries providerOptions
1090
+ // (e.g. Anthropic cache_control). If so, preserve them as
1091
+ // array content to avoid losing per-block metadata.
1092
+ const hasItemProviderOptions = textParts.some((item) => !!item.providerOptions);
1093
+ if (hasItemProviderOptions) {
1094
+ sanitizedContent = textParts;
1095
+ }
1096
+ else {
1097
+ sanitizedContent =
1098
+ textParts.length === 1
1099
+ ? textParts[0].text
1100
+ : textParts
1101
+ .map((p) => p.text)
1102
+ .join(" ");
1103
+ }
1104
+ }
1105
+ // Skip empty string content to avoid Claude API rejection
1106
+ if (sanitizedContent === "") {
1107
+ continue;
1108
+ }
1076
1109
  messages.push({
1077
1110
  role: msg.role,
1078
- content: msg.content,
1111
+ content: sanitizedContent,
1079
1112
  ...(providerOptions && { providerOptions }),
1080
1113
  });
1081
1114
  }
@@ -5,6 +5,20 @@ import { BaseProvider } from "../core/baseProvider.js";
5
5
  import type { EnhancedGenerateResult, TextGenerationOptions } from "../types/generateTypes.js";
6
6
  import type { NeurolinkCredentials } from "../types/providers.js";
7
7
  import type { StreamOptions, StreamResult } from "../types/streamTypes.js";
8
+ /**
9
+ * Resolve the correct Vertex AI location for a given model.
10
+ *
11
+ * Google-published models (gemini-*) require the global endpoint
12
+ * (`aiplatform.googleapis.com`), not regional endpoints like
13
+ * `us-east5-aiplatform.googleapis.com`. Regional endpoints return
14
+ * "model not found" for these models.
15
+ *
16
+ * Anthropic-on-Vertex models (claude-*) require regional endpoints
17
+ * and are handled separately by `createVertexAnthropicSettings`.
18
+ *
19
+ * Embedding models and custom models use the configured location as-is.
20
+ */
21
+ export declare const resolveVertexLocation: (modelName: string | undefined, configuredLocation: string) => string;
8
22
  /**
9
23
  * Vertex Model Aliases
10
24
  *
@@ -79,7 +79,36 @@ const getVertexLocation = () => {
79
79
  return (process.env.GOOGLE_CLOUD_LOCATION ||
80
80
  process.env.VERTEX_LOCATION ||
81
81
  process.env.GOOGLE_VERTEX_LOCATION ||
82
- "us-central1");
82
+ "global");
83
+ };
84
+ /**
85
+ * Resolve the correct Vertex AI location for a given model.
86
+ *
87
+ * Google-published models (gemini-*) require the global endpoint
88
+ * (`aiplatform.googleapis.com`), not regional endpoints like
89
+ * `us-east5-aiplatform.googleapis.com`. Regional endpoints return
90
+ * "model not found" for these models.
91
+ *
92
+ * Anthropic-on-Vertex models (claude-*) require regional endpoints
93
+ * and are handled separately by `createVertexAnthropicSettings`.
94
+ *
95
+ * Embedding models and custom models use the configured location as-is.
96
+ */
97
+ export const resolveVertexLocation = (modelName, configuredLocation) => {
98
+ if (!modelName) {
99
+ return configuredLocation;
100
+ }
101
+ const normalized = modelName.toLowerCase();
102
+ // Google-published models always use the global endpoint.
103
+ // Hardcoded because Google's Vertex AI serves Gemini models exclusively
104
+ // from the global endpoint — regional endpoints like us-east5 return
105
+ // "Publisher Model was not found" errors. The env var GOOGLE_VERTEX_LOCATION
106
+ // is typically set for Anthropic-on-Vertex (which needs regional), so we
107
+ // cannot rely on it for Gemini routing.
108
+ if (normalized.startsWith("gemini-")) {
109
+ return "global";
110
+ }
111
+ return configuredLocation;
83
112
  };
84
113
  const getDefaultVertexModel = () => {
85
114
  // Use gemini-2.5-flash as default - latest and best price-performance model
@@ -96,8 +125,9 @@ const hasGoogleCredentials = () => {
96
125
  // Module-level cache for runtime-created credentials file to avoid per-request writes
97
126
  let cachedCredentialsPath = null;
98
127
  // Enhanced Vertex settings creation with authentication fallback and proxy support
99
- const createVertexSettings = async (region, credentials) => {
100
- const location = credentials?.location || region || getVertexLocation();
128
+ const createVertexSettings = async (region, credentials, modelName) => {
129
+ const configuredLocation = credentials?.location || region || getVertexLocation();
130
+ const location = resolveVertexLocation(modelName, configuredLocation);
101
131
  const project = credentials?.projectId || getVertexProjectId();
102
132
  const baseSettings = {
103
133
  project,
@@ -326,7 +356,12 @@ const createVertexAnthropicSettings = async (region, credentials) => {
326
356
  // which is invalid. The correct global endpoint omits the region prefix entirely.
327
357
  // Since the SDK doesn't handle this, redirect "global" to "us-east5" for Anthropic.
328
358
  const anthropicRegion = !region || region === "global" ? "us-east5" : region;
329
- const baseVertexSettings = await createVertexSettings(anthropicRegion, credentials);
359
+ // Override credentials.location so it cannot conflict with the redirected
360
+ // region — createVertexSettings checks credentials.location first.
361
+ const anthropicCredentials = credentials?.location
362
+ ? { ...credentials, location: anthropicRegion }
363
+ : credentials;
364
+ const baseVertexSettings = await createVertexSettings(anthropicRegion, anthropicCredentials);
330
365
  // GoogleVertexAnthropicProviderSettings extends GoogleVertexProviderSettings
331
366
  // so we can use the same settings with proper typing
332
367
  return {
@@ -570,7 +605,9 @@ export class GoogleVertexProvider extends BaseProvider {
570
605
  networkConfig: {
571
606
  projectId: this.projectId,
572
607
  location: this.location,
573
- expectedEndpoint: `https://${this.location}-aiplatform.googleapis.com`,
608
+ expectedEndpoint: this.location === "global"
609
+ ? "https://aiplatform.googleapis.com"
610
+ : `https://${this.location}-aiplatform.googleapis.com`,
574
611
  httpProxy: process.env.HTTP_PROXY || process.env.http_proxy,
575
612
  httpsProxy: process.env.HTTPS_PROXY || process.env.https_proxy,
576
613
  noProxy: process.env.NO_PROXY || process.env.no_proxy,
@@ -582,7 +619,7 @@ export class GoogleVertexProvider extends BaseProvider {
582
619
  message: "Starting Vertex settings creation with network configuration analysis",
583
620
  });
584
621
  try {
585
- const vertexSettings = await createVertexSettings(this.location, this.credentials);
622
+ const vertexSettings = await createVertexSettings(this.location, this.credentials, modelName);
586
623
  const vertexSettingsEndTime = process.hrtime.bigint();
587
624
  const vertexSettingsDurationNs = vertexSettingsEndTime - vertexSettingsStartTime;
588
625
  logger.debug(`[GoogleVertexProvider] ✅ LOG_POINT_V009_VERTEX_SETTINGS_SUCCESS`, {
@@ -1116,12 +1153,13 @@ export class GoogleVertexProvider extends BaseProvider {
1116
1153
  /**
1117
1154
  * Create @google/genai client configured for Vertex AI
1118
1155
  */
1119
- async createVertexGenAIClient(regionOverride) {
1156
+ async createVertexGenAIClient(regionOverride, modelName) {
1120
1157
  const project = this.credentials?.projectId || getVertexProjectId();
1121
- const location = this.credentials?.location ||
1158
+ const configuredLocation = this.credentials?.location ||
1122
1159
  regionOverride ||
1123
1160
  this.location ||
1124
1161
  getVertexLocation();
1162
+ const location = resolveVertexLocation(modelName, configuredLocation);
1125
1163
  const mod = await import("@google/genai");
1126
1164
  const ctor = mod.GoogleGenAI;
1127
1165
  if (!ctor) {
@@ -1308,8 +1346,8 @@ export class GoogleVertexProvider extends BaseProvider {
1308
1346
  }, (span) => this.executeNativeGemini3StreamWithSpan(options, modelName, span));
1309
1347
  }
1310
1348
  async executeNativeGemini3StreamWithSpan(options, modelName, span) {
1311
- const client = await this.createVertexGenAIClient(options.region);
1312
- const effectiveLocation = options.region || this.location || getVertexLocation();
1349
+ const client = await this.createVertexGenAIClient(options.region, modelName);
1350
+ const effectiveLocation = resolveVertexLocation(modelName, options.region || this.location || getVertexLocation());
1313
1351
  logger.debug("[GoogleVertex] Using native @google/genai for Gemini 3", {
1314
1352
  model: modelName,
1315
1353
  hasTools: !!options.tools && Object.keys(options.tools).length > 0,
@@ -1503,8 +1541,8 @@ export class GoogleVertexProvider extends BaseProvider {
1503
1541
  [ATTR.NL_PROVIDER]: this.providerName,
1504
1542
  },
1505
1543
  }, async (span) => {
1506
- const client = await this.createVertexGenAIClient(options.region);
1507
- const effectiveLocation = options.region || this.location || getVertexLocation();
1544
+ const client = await this.createVertexGenAIClient(options.region, modelName);
1545
+ const effectiveLocation = resolveVertexLocation(modelName, options.region || this.location || getVertexLocation());
1508
1546
  logger.debug("[GoogleVertex] Using native @google/genai for Gemini 3 generate", {
1509
1547
  model: modelName,
1510
1548
  project: this.projectId,
@@ -1,33 +1,43 @@
1
- import type { ClaudeProxyModelTier, ClaudeProxyRequestClass, ClaudeProxyRequestProfile, CooldownScope, CooldownSkippedAccount, FallbackEligibilityDecision, FallbackEntry, ParsedClaudeRequest, ProxyTranslationAttempt, ProxyTranslationPlan, RuntimeAccountState } from "../types/index.js";
2
- export type { ClaudeProxyModelTier, ClaudeProxyRequestClass, ClaudeProxyRequestProfile, CooldownScope, CooldownSkippedAccount, FallbackEligibilityDecision, ProxyTranslationAttempt, ProxyTranslationPlan, };
1
+ import type { ClaudeProxyModelTier, CooldownSkippedAccount, FallbackEntry, ParsedClaudeRequest, ProxyTranslationAttempt, ProxyTranslationPlan, RuntimeAccountState } from "../types/index.js";
2
+ export type { ClaudeProxyModelTier, ProxyTranslationAttempt, ProxyTranslationPlan, };
3
3
  export declare function inferClaudeProxyModelTier(modelName: string): ClaudeProxyModelTier;
4
- export declare function classifyClaudeProxyRequest(requestedModel: string, parsed: ParsedClaudeRequest): ClaudeProxyRequestProfile;
5
- export declare function getRequestClassCooldownKey(profile: ClaudeProxyRequestProfile): string;
6
- export declare function getModelTierCooldownKey(profile: ClaudeProxyRequestProfile): string;
7
- export declare function evaluateFallbackEligibility(profile: ClaudeProxyRequestProfile, candidate: {
8
- provider?: string;
9
- model?: string;
10
- }): FallbackEligibilityDecision;
4
+ /**
5
+ * Build a translation plan for a Claude-compatible proxy request.
6
+ * The plan lists the primary provider followed by eligible fallback targets.
7
+ * All configured fallback entries are always eligible — no contract-based gating.
8
+ * When no fallback chain is configured, an "auto-provider" entry is appended.
9
+ */
11
10
  export declare function buildProxyTranslationPlan(primary: {
12
11
  provider: string;
13
12
  model?: string;
14
- }, fallbackChain: FallbackEntry[], requestedModel: string, parsed: ParsedClaudeRequest): ProxyTranslationPlan;
15
- export declare function summarizeSkippedFallbacks(plan: Pick<ProxyTranslationPlan, "profile" | "skipped">): string | null;
16
- export declare function getActiveCooldownScope(state: RuntimeAccountState, profile: ClaudeProxyRequestProfile, now?: number): CooldownScope | null;
13
+ }, fallbackChain: FallbackEntry[], requestedModel: string, _parsed: ParsedClaudeRequest): ProxyTranslationPlan;
14
+ /**
15
+ * Check whether an account is currently cooling down.
16
+ * Returns the cooldown timestamp if active, null otherwise.
17
+ */
18
+ export declare function getAccountCooldownUntil(state: RuntimeAccountState, now?: number): number | null;
19
+ /**
20
+ * Partition accounts into eligible (no cooldown) and skipped (cooling down).
21
+ */
17
22
  export declare function partitionAccountsByCooldown<T extends {
18
23
  key: string;
19
- }>(accounts: T[], getState: (account: T) => RuntimeAccountState, profile: ClaudeProxyRequestProfile, now?: number): {
24
+ }>(accounts: T[], getState: (account: T) => RuntimeAccountState, now?: number): {
20
25
  eligible: T[];
21
26
  skipped: CooldownSkippedAccount<T>[];
22
27
  };
23
- export declare function applyRateLimitCooldownScope(args: {
28
+ /**
29
+ * Apply a rate-limit cooldown to an account.
30
+ * Uses simple exponential backoff with a floor and cap.
31
+ */
32
+ export declare function applyRateLimitCooldown(args: {
24
33
  state: RuntimeAccountState;
25
- profile: ClaudeProxyRequestProfile;
26
34
  retryAfterMs?: number;
27
35
  now?: number;
28
36
  capMs: number;
29
37
  }): {
30
38
  backoffMs: number;
31
- requestClassKey: string;
32
- modelTierKey: string;
33
39
  };
40
+ /**
41
+ * Clear cooldown state for an account after a successful request.
42
+ */
43
+ export declare function clearAccountCooldown(state: RuntimeAccountState): void;