npm - @juspay/neurolink - Versions diffs - 9.54.1 → 9.54.2 - Mend

@juspay/neurolink 9.54.1 → 9.54.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

package/CHANGELOG.md +6 -0
package/dist/browser/neurolink.min.js +296 -296
package/dist/lib/providers/googleVertex.d.ts +14 -0
package/dist/lib/providers/googleVertex.js +50 -12
package/dist/lib/proxy/routingPolicy.d.ts +27 -17
package/dist/lib/proxy/routingPolicy.js +53 -209
package/dist/lib/server/routes/claudeProxyRoutes.js +35 -73
package/dist/lib/types/proxyTypes.d.ts +9 -50
package/dist/lib/utils/messageBuilder.js +39 -6
package/dist/providers/googleVertex.d.ts +14 -0
package/dist/providers/googleVertex.js +50 -12
package/dist/proxy/routingPolicy.d.ts +27 -17
package/dist/proxy/routingPolicy.js +53 -209
package/dist/server/routes/claudeProxyRoutes.js +35 -73
package/dist/types/proxyTypes.d.ts +9 -50
package/dist/utils/messageBuilder.js +39 -6
package/package.json +1 -1

package/dist/lib/server/routes/claudeProxyRoutes.js CHANGED Viewed

@@ -20,7 +20,7 @@ import { createRawStreamCapture } from "../../proxy/rawStreamCapture.js";
 import { logBodyCapture, logRequest, logRequestAttempt, logStreamError, } from "../../proxy/requestLogger.js";
 import { createSSEInterceptor } from "../../proxy/sseInterceptor.js";
 import { needsRefresh, persistTokens, refreshToken, } from "../../proxy/tokenRefresh.js";
-import { applyRateLimitCooldownScope, buildProxyTranslationPlan, classifyClaudeProxyRequest, getActiveCooldownScope, partitionAccountsByCooldown, summarizeSkippedFallbacks, } from "../../proxy/routingPolicy.js";
+import { applyRateLimitCooldown, buildProxyTranslationPlan, clearAccountCooldown, getAccountCooldownUntil, partitionAccountsByCooldown, } from "../../proxy/routingPolicy.js";
 import { writeJsonSnapshotAtomically } from "../../proxy/snapshotPersistence.js";
 import { recordAttempt, recordAttemptError, recordCooldown, recordFinalError, recordFinalSuccess, } from "../../proxy/usageStats.js";
 import { logger } from "../../utils/logger.js";
@@ -440,9 +440,7 @@ function logProxyRoutingPlan(logProxyBody, stage, plan) {
         contentType: "application/json",
         body: {
             stage,
-            requestProfile: plan.profile,
             attempts: plan.attempts,
-            skipped: plan.skipped,
         },
     });
 }
@@ -1347,30 +1345,21 @@ async function executeClaudeFallbackTranslation(args) {
     return clientResponse;
 }
 async function tryConfiguredClaudeFallbackChain(args) {
-    const { ctx, body, parsedFallbackRequest, requestProfile, modelRouter, tracer, requestStartTime, logProxyBody, logFinalRequest, } = args;
+    const { ctx, body, parsedFallbackRequest, modelRouter, tracer, requestStartTime, logProxyBody, logFinalRequest, } = args;
     const chain = modelRouter?.getFallbackChain() ?? [];
     const fallbackPlan = buildProxyTranslationPlan({ provider: "anthropic", model: body.model }, chain, body.model, parsedFallbackRequest);
-    const fallbackPolicyReason = summarizeSkippedFallbacks(fallbackPlan);
     logProxyBody({
         phase: "routing_decision",
         contentType: "application/json",
         body: {
             stage: "anthropic_fallback",
-            requestProfile,
             attempts: fallbackPlan.attempts.slice(1),
-            skipped: fallbackPlan.skipped,
         },
     });
-    for (const skipped of fallbackPlan.skipped) {
-        const label = skipped.provider
-            ? `${skipped.provider}/${skipped.model ?? "unknown"}`
-            : "auto-provider";
-        logger.always(`[proxy] skipping fallback ${label}: ${skipped.reason}`);
-    }
     tracer?.setFallbackInfo({
         triggered: true,
         attemptCount: fallbackPlan.attempts.slice(1).length,
-        reason: fallbackPolicyReason ?? "all_anthropic_accounts_exhausted",
+        reason: "all_anthropic_accounts_exhausted",
     });
     for (const fallback of fallbackPlan.attempts.slice(1)) {
         if (!fallback.provider || !fallback.model) {
@@ -1410,10 +1399,7 @@ async function tryConfiguredClaudeFallbackChain(args) {
                 attemptCount: fallbackPlan.attempts.slice(1).length,
                 reason: "fallback_success",
             });
-            return {
-                response,
-                fallbackPolicyReason,
-            };
+            return { response };
         }
         catch (fallbackErr) {
             const errMsg = fallbackErr instanceof Error
@@ -1450,10 +1436,7 @@ async function tryConfiguredClaudeFallbackChain(args) {
             });
         }
     }
-    return {
-        response: null,
-        fallbackPolicyReason,
-    };
+    return { response: null };
 }
 async function tryAutoClaudeFallback(args) {
     const { ctx, body, tracer, requestStartTime, logProxyBody, logFinalRequest } = args;
@@ -1484,7 +1467,7 @@ async function tryAutoClaudeFallback(args) {
     }
 }
 function buildClaudeAnthropicFailureResponse(args) {
-    const { tracer, requestStartTime, authFailureMessage, invalidRequestFailure, sawNetworkError, sawTransientFailure, sawRateLimit, lastError, orderedAccounts, requestProfile, fallbackPolicyReason, buildLoggedClaudeError, logProxyBody, logFinalRequest, } = args;
+    const { tracer, requestStartTime, authFailureMessage, invalidRequestFailure, sawNetworkError, sawTransientFailure, sawRateLimit, lastError, orderedAccounts, buildLoggedClaudeError, logProxyBody, logFinalRequest, } = args;
     if (authFailureMessage && !sawRateLimit) {
         tracer?.setError("authentication_error", authFailureMessage);
         tracer?.end(401, Date.now() - requestStartTime);
@@ -1531,21 +1514,21 @@ function buildClaudeAnthropicFailureResponse(args) {
         return buildLoggedClaudeError(502, msg);
     }
     const earliestRecovery = orderedAccounts.reduce((min, account) => {
-        const cooldown = getActiveCooldownScope(getOrCreateRuntimeState(account.key), requestProfile);
-        return cooldown ? Math.min(min, cooldown.until) : min;
+        const until = getAccountCooldownUntil(getOrCreateRuntimeState(account.key));
+        return until !== null ? Math.min(min, until) : min;
     }, Infinity);
+    // If no active cooldown remains (expired while retries ran), use 1s
+    // instead of fabricating a long retry-after.
     const retryAfterSec = Number.isFinite(earliestRecovery)
         ? Math.max(1, Math.ceil((earliestRecovery - Date.now()) / 1000))
-        : 60;
-    const contractMessage = fallbackPolicyReason
-        ? ` ${fallbackPolicyReason}`
-        : "";
-    logger.always(`[proxy] all accounts rate-limited for request-class=${requestProfile.primaryClass}, retry in ${retryAfterSec}s`);
-    const errorBody = buildClaudeError(429, `All accounts rate-limited. Earliest recovery in ${retryAfterSec}s.${contractMessage}`, "overloaded_error");
-    tracer?.setError("rate_limit_error", `All accounts rate-limited. Retry in ${retryAfterSec}s.${contractMessage}`);
+        : 1;
+    const errorMessage = `All accounts rate-limited. Earliest recovery in ${retryAfterSec}s.`;
+    logger.always(`[proxy] all accounts rate-limited, retry in ${retryAfterSec}s`);
+    const errorBody = buildClaudeError(429, errorMessage, "overloaded_error");
+    tracer?.setError("rate_limit_error", errorMessage);
     tracer?.end(429, Date.now() - requestStartTime);
     recordFinalError(429);
-    logFinalRequest(429, "", "final", "rate_limit_error", `All accounts rate-limited. Retry in ${retryAfterSec}s.${contractMessage}`);
+    logFinalRequest(429, "", "final", "rate_limit_error", errorMessage);
     const errorBodyText = JSON.stringify(errorBody);
     logProxyBody({
         phase: "client_response",
@@ -1568,22 +1551,9 @@ function buildClaudeAnthropicFailureResponse(args) {
     });
 }
 async function handleAnthropicSuccessfulResponse(args) {
-    const { ctx, body, account, accountState, requestProfile, response, tracer, requestStartTime, fetchStartMs, attemptNumber, finalBodyStr, upstreamSpan, logProxyBody, logFinalRequest, } = args;
-    accountState.backoffLevel = 0;
-    accountState.coolingUntil = undefined;
+    const { ctx, body, account, accountState, response, tracer, requestStartTime, fetchStartMs, attemptNumber, finalBodyStr, upstreamSpan, logProxyBody, logFinalRequest, } = args;
+    clearAccountCooldown(accountState);
     accountState.consecutiveRefreshFailures = 0;
-    if (accountState.requestClassCooldowns) {
-        delete accountState.requestClassCooldowns[`${requestProfile.primaryClass}:${requestProfile.requestedModel.toLowerCase()}`];
-    }
-    if (accountState.modelTierCooldowns) {
-        delete accountState.modelTierCooldowns[requestProfile.modelTier];
-    }
-    if (accountState.requestClassBackoffLevels) {
-        delete accountState.requestClassBackoffLevels[`${requestProfile.primaryClass}:${requestProfile.requestedModel.toLowerCase()}`];
-    }
-    if (accountState.modelTierBackoffLevels) {
-        delete accountState.modelTierBackoffLevels[requestProfile.modelTier];
-    }
     logger.always(`[proxy] ← ${response.status} account=${account.label}`);
     const quota = parseQuotaHeaders(response.headers);
     if (quota) {
@@ -2173,7 +2143,7 @@ async function handleAnthropicSuccessfulRetryResponse(args) {
     return retryJson;
 }
 async function handleAnthropicAuthRetry(args) {
-    const { ctx, body, account, accountState, requestProfile, headers, buildUpstreamBody, enabledAccounts, orderedAccounts, response: _response, tracer, requestStartTime, fetchStartMs, attemptNumber, finalBodyStr, upstreamSpan, logAttempt, logProxyBody, logFinalRequest, lastError, authFailureMessage, sawRateLimit, sawTransientFailure, sawNetworkError, } = args;
+    const { ctx, body, account, accountState, headers, buildUpstreamBody, enabledAccounts, orderedAccounts, response: _response, tracer, requestStartTime, fetchStartMs, attemptNumber, finalBodyStr, upstreamSpan, logAttempt, logProxyBody, logFinalRequest, lastError, authFailureMessage, sawRateLimit, sawTransientFailure, sawNetworkError, } = args;
     recordAttemptError(account.label, account.type, 401);
     let currentLastError = lastError;
     let currentAuthFailureMessage = authFailureMessage;
@@ -2257,9 +2227,8 @@ async function handleAnthropicAuthRetry(args) {
                 const cooldownMs = Number.isNaN(parsedRetryAfter)
                     ? 60_000
                     : Math.max(1, parsedRetryAfter) * 1000;
-                const cooldown = applyRateLimitCooldownScope({
+                const cooldown = applyRateLimitCooldown({
                     state: accountState,
-                    profile: requestProfile,
                     retryAfterMs: cooldownMs,
                     capMs: RATE_LIMIT_BACKOFF_CAP_MS,
                 });
@@ -2836,7 +2805,7 @@ async function prepareAnthropicAccountAttempt(args) {
     };
 }
 async function fetchAnthropicAccountResponse(args) {
-    const { url, headers, finalBodyStr, account, accountState, requestProfile, enabledAccounts, orderedAccounts, tracer, logAttempt, currentLastError, currentSawRateLimit, currentSawNetworkError, upstreamSpan, } = args;
+    const { url, headers, finalBodyStr, account, accountState, enabledAccounts, orderedAccounts, tracer, logAttempt, currentLastError, currentSawRateLimit, currentSawNetworkError, upstreamSpan, } = args;
     let lastError = currentLastError;
     let sawRateLimit = currentSawRateLimit;
     let sawNetworkError = currentSawNetworkError;
@@ -2889,9 +2858,8 @@ async function fetchAnthropicAccountResponse(args) {
                 }
             }
         }
-        const cooldown = applyRateLimitCooldownScope({
+        const cooldown = applyRateLimitCooldown({
             state: accountState,
-            profile: requestProfile,
             retryAfterMs: cooldownMs > 0 ? cooldownMs : undefined,
             capMs: RATE_LIMIT_BACKOFF_CAP_MS,
         });
@@ -2899,7 +2867,7 @@ async function fetchAnthropicAccountResponse(args) {
         recordAttemptError(account.label, account.type, 429);
         recordCooldown(account.label, account.type, Date.now() + cooldown.backoffMs, accountState.backoffLevel);
         lastError = await response.text();
-        logger.always(`[proxy] ← 429 account=${account.label} backoff-level=${accountState.backoffLevel} cooldown=${Math.round(cooldown.backoffMs / 1000)}s request-class=${cooldown.requestClassKey} model-tier=${cooldown.modelTierKey}`);
+        logger.always(`[proxy] ← 429 account=${account.label} backoff-level=${accountState.backoffLevel} cooldown=${Math.round(cooldown.backoffMs / 1000)}s`);
         logAttempt(429, "rate_limit_error", String(lastError));
         tracer?.setError("rate_limit_error", String(lastError).slice(0, 500));
         tracer?.recordRetry(account.label, "rate_limit");
@@ -2924,7 +2892,6 @@ async function fetchAnthropicAccountResponse(args) {
 async function handleAnthropicRoutedClaudeRequest(args) {
     const { ctx, body, modelRouter, tracer, requestStartTime, accountStrategy, buildLoggedClaudeError, logProxyBody, logFinalRequest, } = args;
     const parsedRequest = parseClaudeRequest(body);
-    const requestProfile = classifyClaudeProxyRequest(body.model, parsedRequest);
     const loadedAccounts = await loadClaudeProxyAccounts({
         ctx,
         body,
@@ -2947,14 +2914,16 @@ async function handleAnthropicRoutedClaudeRequest(args) {
         attemptNumber: 0,
     };
     const acctSelectionSpan = tracer?.startAccountSelection();
-    const accountPartition = partitionAccountsByCooldown(orderedAccounts, (account) => getOrCreateRuntimeState(account.key), requestProfile);
+    const accountPartition = partitionAccountsByCooldown(orderedAccounts, (account) => getOrCreateRuntimeState(account.key));
     for (const skippedAccount of accountPartition.skipped) {
-        if (skippedAccount.cooldown.scope === "request_class" ||
-            skippedAccount.cooldown.scope === "model_tier") {
-            loopState.sawRateLimit = true;
-        }
-        loopState.lastError = `Skipped account=${skippedAccount.account.label} due to ${skippedAccount.cooldown.scope} cooldown ${skippedAccount.cooldown.key}`;
-        logger.always(`[proxy] skipping account=${skippedAccount.account.label} due to ${skippedAccount.cooldown.scope} cooldown=${skippedAccount.cooldown.key} remaining=${Math.max(1, Math.ceil((skippedAccount.cooldown.until - Date.now()) / 1000))}s`);
+        logger.always(`[proxy] skipping account=${skippedAccount.account.label} cooldown remaining=${Math.max(1, Math.ceil((skippedAccount.cooldown.until - Date.now()) / 1000))}s`);
+    }
+    // Only flag rate-limit when ALL accounts are cooling — if some are eligible,
+    // let the actual attempt results determine sawRateLimit via real 429 responses.
+    if (accountPartition.skipped.length > 0 &&
+        accountPartition.eligible.length === 0) {
+        loopState.sawRateLimit = true;
+        loopState.lastError = `All ${accountPartition.skipped.length} accounts are cooling down`;
     }
     accountLoop: for (const account of accountPartition.eligible) {
         const accountState = getOrCreateRuntimeState(account.key);
@@ -3009,7 +2978,6 @@ async function handleAnthropicRoutedClaudeRequest(args) {
                 finalBodyStr: preparedAttempt.finalBodyStr,
                 account,
                 accountState,
-                requestProfile,
                 enabledAccounts,
                 orderedAccounts,
                 tracer,
@@ -3046,7 +3014,6 @@ async function handleAnthropicRoutedClaudeRequest(args) {
                     body,
                     account,
                     accountState,
-                    requestProfile,
                     headers: preparedAttempt.headers,
                     buildUpstreamBody: preparedAttempt.buildUpstreamBody,
                     enabledAccounts,
@@ -3126,7 +3093,6 @@ async function handleAnthropicRoutedClaudeRequest(args) {
                 body,
                 account,
                 accountState,
-                requestProfile,
                 response,
                 tracer,
                 requestStartTime,
@@ -3150,7 +3116,6 @@ async function handleAnthropicRoutedClaudeRequest(args) {
         ctx,
         body,
         parsedFallbackRequest: parsedRequest,
-        requestProfile,
         modelRouter,
         tracer,
         requestStartTime,
@@ -3160,8 +3125,9 @@ async function handleAnthropicRoutedClaudeRequest(args) {
     if (configuredFallbackResult.response) {
         return configuredFallbackResult.response;
     }
-    const configuredChain = modelRouter?.getFallbackChain() ?? [];
-    if (configuredChain.length === 0 && !loopState.sawRateLimit) {
+    // Try auto-provider fallback when the configured chain didn't produce a
+    // response (either no chain configured, or all entries failed/deduped).
+    if (!loopState.sawRateLimit) {
         const autoFallbackResponse = await tryAutoClaudeFallback({
             ctx,
             body,
@@ -3184,8 +3150,6 @@ async function handleAnthropicRoutedClaudeRequest(args) {
         sawRateLimit: loopState.sawRateLimit,
         lastError: loopState.lastError,
         orderedAccounts,
-        requestProfile,
-        fallbackPolicyReason: configuredFallbackResult.fallbackPolicyReason,
         buildLoggedClaudeError,
         logProxyBody,
         logFinalRequest,
@@ -3406,8 +3370,6 @@ function getOrCreateRuntimeState(accountKey) {
         backoffLevel: 0,
         consecutiveRefreshFailures: 0,
         permanentlyDisabled: false,
-        requestClassCooldowns: {},
-        modelTierCooldowns: {},
     };
     accountRuntimeState.set(accountKey, initial);
     return initial;

package/dist/lib/types/proxyTypes.d.ts CHANGED Viewed

@@ -641,10 +641,6 @@ export type RuntimeAccountState = {
     backoffLevel: number;
     consecutiveRefreshFailures: number;
     permanentlyDisabled: boolean;
-    requestClassCooldowns?: Record<string, number>;
-    modelTierCooldowns?: Record<string, number>;
-    requestClassBackoffLevels?: Record<string, number>;
-    modelTierBackoffLevels?: Record<string, number>;
     lastToken?: string;
     lastRefreshToken?: string;
 };
@@ -697,63 +693,26 @@ export type CachedSession = {
 };
 /** Model tier classification for proxy routing decisions. */
 export type ClaudeProxyModelTier = "opus" | "sonnet" | "haiku" | "other";
-/** Request class for proxy routing policy. */
-export type ClaudeProxyRequestClass = "multimodal" | "high-tool-count-non-stream-structured" | "strong-tool-fidelity" | "streaming-conversational" | "standard";
-/** Full classification profile for a proxy request. */
-export type ClaudeProxyRequestProfile = {
-    requestedModel: string;
-    modelTier: ClaudeProxyModelTier;
-    primaryClass: ClaudeProxyRequestClass;
-    classes: ClaudeProxyRequestClass[];
-    stream: boolean;
-    toolCount: number;
-    hasImages: boolean;
-    hasThinking: boolean;
-    hasToolHistory: boolean;
-    requiresToolUse: boolean;
-    requiresSpecificTool: boolean;
-    requiresStrongToolFidelity: boolean;
-    isHighToolCountNonStream: boolean;
-    isStreamingConversational: boolean;
-    isMultimodal: boolean;
-};
-/** Outcome of evaluating a single fallback candidate. */
-export type FallbackEligibilityDecision = {
-    provider?: string;
-    model?: string;
-    eligible: boolean;
-    reason: string;
-};
 /** A single provider attempt in the proxy translation plan. */
 export type ProxyTranslationAttempt = {
     provider?: string;
     model?: string;
     label: string;
 };
-/** Ordered plan of provider attempts and skipped candidates. */
+/** Ordered plan of provider attempts for a proxy request. */
 export type ProxyTranslationPlan = {
-    profile: ClaudeProxyRequestProfile;
+    requestedModel: string;
+    modelTier: ClaudeProxyModelTier;
     attempts: ProxyTranslationAttempt[];
-    skipped: FallbackEligibilityDecision[];
+    skipped: never[];
 };
-/** Discriminated union describing why a cooldown is active. */
-export type CooldownScope = {
-    scope: "request_class";
-    key: string;
-    until: number;
-} | {
-    scope: "model_tier";
-    key: string;
-    until: number;
-} | {
-    scope: "generic";
-    key: "generic";
-    until: number;
-};
-/** An account skipped during partitioning, with the cooldown that caused it. */
+/** An account skipped during partitioning, with its cooldown info. */
 export type CooldownSkippedAccount<T> = {
     account: T;
-    cooldown: CooldownScope;
+    cooldown: {
+        until: number;
+        backoffLevel: number;
+    };
 };
 /** Mutable readiness state tracked by the proxy process. */
 export type ProxyReadinessState = {

package/dist/lib/utils/messageBuilder.js CHANGED Viewed

@@ -350,11 +350,9 @@ export function convertToModelMessages(messages) {
                 // Assistant messages only support text content, filter out images
                 const textOnlyContent = validContent.filter((item) => item.type === "text");
                 if (textOnlyContent.length === 0) {
-                    // If no text content, convert to empty string
-                    return {
-                        role: "assistant",
-                        content: "",
-                    };
+                    // No text content (e.g., only images/files) — skip message
+                    // to avoid sending empty content to providers like Claude
+                    return null;
                 }
                 else if (textOnlyContent.length === 1) {
                     // Single text item, use string content
@@ -1073,9 +1071,44 @@ export async function buildMultimodalMessagesArray(options, provider, model) {
                 msg.role === "assistant" ||
                 msg.role === "system") {
                 const providerOptions = msg.providerOptions;
+                // Sanitize assistant array content: strip tool_use/tool_result blocks
+                // that providers cannot handle. If an assistant message ends up empty
+                // after stripping, skip it to avoid sending content: "" to Claude.
+                // Only assistant messages need this — user messages may contain valid
+                // image/file blocks that must pass through unchanged.
+                let sanitizedContent = msg.content;
+                if (msg.role === "assistant" && Array.isArray(msg.content)) {
+                    const textParts = msg.content.filter((item) => !!item &&
+                        typeof item === "object" &&
+                        item.type === "text" &&
+                        typeof item.text === "string");
+                    if (textParts.length === 0) {
+                        // All content was tool_use/tool_result/non-text — skip message
+                        continue;
+                    }
+                    // Check if any retained text part carries providerOptions
+                    // (e.g. Anthropic cache_control). If so, preserve them as
+                    // array content to avoid losing per-block metadata.
+                    const hasItemProviderOptions = textParts.some((item) => !!item.providerOptions);
+                    if (hasItemProviderOptions) {
+                        sanitizedContent = textParts;
+                    }
+                    else {
+                        sanitizedContent =
+                            textParts.length === 1
+                                ? textParts[0].text
+                                : textParts
+                                    .map((p) => p.text)
+                                    .join(" ");
+                    }
+                }
+                // Skip empty string content to avoid Claude API rejection
+                if (sanitizedContent === "") {
+                    continue;
+                }
                 messages.push({
                     role: msg.role,
-                    content: msg.content,
+                    content: sanitizedContent,
                     ...(providerOptions && { providerOptions }),
                 });
             }

package/dist/providers/googleVertex.d.ts CHANGED Viewed

@@ -5,6 +5,20 @@ import { BaseProvider } from "../core/baseProvider.js";
 import type { EnhancedGenerateResult, TextGenerationOptions } from "../types/generateTypes.js";
 import type { NeurolinkCredentials } from "../types/providers.js";
 import type { StreamOptions, StreamResult } from "../types/streamTypes.js";
+/**
+ * Resolve the correct Vertex AI location for a given model.
+ *
+ * Google-published models (gemini-*) require the global endpoint
+ * (`aiplatform.googleapis.com`), not regional endpoints like
+ * `us-east5-aiplatform.googleapis.com`. Regional endpoints return
+ * "model not found" for these models.
+ *
+ * Anthropic-on-Vertex models (claude-*) require regional endpoints
+ * and are handled separately by `createVertexAnthropicSettings`.
+ *
+ * Embedding models and custom models use the configured location as-is.
+ */
+export declare const resolveVertexLocation: (modelName: string | undefined, configuredLocation: string) => string;
 /**
  * Vertex Model Aliases
  *

package/dist/providers/googleVertex.js CHANGED Viewed

@@ -79,7 +79,36 @@ const getVertexLocation = () => {
     return (process.env.GOOGLE_CLOUD_LOCATION ||
         process.env.VERTEX_LOCATION ||
         process.env.GOOGLE_VERTEX_LOCATION ||
-        "us-central1");
+        "global");
+};
+/**
+ * Resolve the correct Vertex AI location for a given model.
+ *
+ * Google-published models (gemini-*) require the global endpoint
+ * (`aiplatform.googleapis.com`), not regional endpoints like
+ * `us-east5-aiplatform.googleapis.com`. Regional endpoints return
+ * "model not found" for these models.
+ *
+ * Anthropic-on-Vertex models (claude-*) require regional endpoints
+ * and are handled separately by `createVertexAnthropicSettings`.
+ *
+ * Embedding models and custom models use the configured location as-is.
+ */
+export const resolveVertexLocation = (modelName, configuredLocation) => {
+    if (!modelName) {
+        return configuredLocation;
+    }
+    const normalized = modelName.toLowerCase();
+    // Google-published models always use the global endpoint.
+    // Hardcoded because Google's Vertex AI serves Gemini models exclusively
+    // from the global endpoint — regional endpoints like us-east5 return
+    // "Publisher Model was not found" errors. The env var GOOGLE_VERTEX_LOCATION
+    // is typically set for Anthropic-on-Vertex (which needs regional), so we
+    // cannot rely on it for Gemini routing.
+    if (normalized.startsWith("gemini-")) {
+        return "global";
+    }
+    return configuredLocation;
 };
 const getDefaultVertexModel = () => {
     // Use gemini-2.5-flash as default - latest and best price-performance model
@@ -96,8 +125,9 @@ const hasGoogleCredentials = () => {
 // Module-level cache for runtime-created credentials file to avoid per-request writes
 let cachedCredentialsPath = null;
 // Enhanced Vertex settings creation with authentication fallback and proxy support
-const createVertexSettings = async (region, credentials) => {
-    const location = credentials?.location || region || getVertexLocation();
+const createVertexSettings = async (region, credentials, modelName) => {
+    const configuredLocation = credentials?.location || region || getVertexLocation();
+    const location = resolveVertexLocation(modelName, configuredLocation);
     const project = credentials?.projectId || getVertexProjectId();
     const baseSettings = {
         project,
@@ -326,7 +356,12 @@ const createVertexAnthropicSettings = async (region, credentials) => {
     // which is invalid. The correct global endpoint omits the region prefix entirely.
     // Since the SDK doesn't handle this, redirect "global" to "us-east5" for Anthropic.
     const anthropicRegion = !region || region === "global" ? "us-east5" : region;
-    const baseVertexSettings = await createVertexSettings(anthropicRegion, credentials);
+    // Override credentials.location so it cannot conflict with the redirected
+    // region — createVertexSettings checks credentials.location first.
+    const anthropicCredentials = credentials?.location
+        ? { ...credentials, location: anthropicRegion }
+        : credentials;
+    const baseVertexSettings = await createVertexSettings(anthropicRegion, anthropicCredentials);
     // GoogleVertexAnthropicProviderSettings extends GoogleVertexProviderSettings
     // so we can use the same settings with proper typing
     return {
@@ -570,7 +605,9 @@ export class GoogleVertexProvider extends BaseProvider {
             networkConfig: {
                 projectId: this.projectId,
                 location: this.location,
-                expectedEndpoint: `https://${this.location}-aiplatform.googleapis.com`,
+                expectedEndpoint: this.location === "global"
+                    ? "https://aiplatform.googleapis.com"
+                    : `https://${this.location}-aiplatform.googleapis.com`,
                 httpProxy: process.env.HTTP_PROXY || process.env.http_proxy,
                 httpsProxy: process.env.HTTPS_PROXY || process.env.https_proxy,
                 noProxy: process.env.NO_PROXY || process.env.no_proxy,
@@ -582,7 +619,7 @@ export class GoogleVertexProvider extends BaseProvider {
             message: "Starting Vertex settings creation with network configuration analysis",
         });
         try {
-            const vertexSettings = await createVertexSettings(this.location, this.credentials);
+            const vertexSettings = await createVertexSettings(this.location, this.credentials, modelName);
             const vertexSettingsEndTime = process.hrtime.bigint();
             const vertexSettingsDurationNs = vertexSettingsEndTime - vertexSettingsStartTime;
             logger.debug(`[GoogleVertexProvider] ✅ LOG_POINT_V009_VERTEX_SETTINGS_SUCCESS`, {
@@ -1116,12 +1153,13 @@ export class GoogleVertexProvider extends BaseProvider {
     /**
      * Create @google/genai client configured for Vertex AI
      */
-    async createVertexGenAIClient(regionOverride) {
+    async createVertexGenAIClient(regionOverride, modelName) {
         const project = this.credentials?.projectId || getVertexProjectId();
-        const location = this.credentials?.location ||
+        const configuredLocation = this.credentials?.location ||
             regionOverride ||
             this.location ||
             getVertexLocation();
+        const location = resolveVertexLocation(modelName, configuredLocation);
         const mod = await import("@google/genai");
         const ctor = mod.GoogleGenAI;
         if (!ctor) {
@@ -1308,8 +1346,8 @@ export class GoogleVertexProvider extends BaseProvider {
         }, (span) => this.executeNativeGemini3StreamWithSpan(options, modelName, span));
     }
     async executeNativeGemini3StreamWithSpan(options, modelName, span) {
-        const client = await this.createVertexGenAIClient(options.region);
-        const effectiveLocation = options.region || this.location || getVertexLocation();
+        const client = await this.createVertexGenAIClient(options.region, modelName);
+        const effectiveLocation = resolveVertexLocation(modelName, options.region || this.location || getVertexLocation());
         logger.debug("[GoogleVertex] Using native @google/genai for Gemini 3", {
             model: modelName,
             hasTools: !!options.tools && Object.keys(options.tools).length > 0,
@@ -1503,8 +1541,8 @@ export class GoogleVertexProvider extends BaseProvider {
                 [ATTR.NL_PROVIDER]: this.providerName,
             },
         }, async (span) => {
-            const client = await this.createVertexGenAIClient(options.region);
-            const effectiveLocation = options.region || this.location || getVertexLocation();
+            const client = await this.createVertexGenAIClient(options.region, modelName);
+            const effectiveLocation = resolveVertexLocation(modelName, options.region || this.location || getVertexLocation());
             logger.debug("[GoogleVertex] Using native @google/genai for Gemini 3 generate", {
                 model: modelName,
                 project: this.projectId,

package/dist/proxy/routingPolicy.d.ts CHANGED Viewed

@@ -1,33 +1,43 @@
-import type { ClaudeProxyModelTier, ClaudeProxyRequestClass, ClaudeProxyRequestProfile, CooldownScope, CooldownSkippedAccount, FallbackEligibilityDecision, FallbackEntry, ParsedClaudeRequest, ProxyTranslationAttempt, ProxyTranslationPlan, RuntimeAccountState } from "../types/index.js";
-export type { ClaudeProxyModelTier, ClaudeProxyRequestClass, ClaudeProxyRequestProfile, CooldownScope, CooldownSkippedAccount, FallbackEligibilityDecision, ProxyTranslationAttempt, ProxyTranslationPlan, };
+import type { ClaudeProxyModelTier, CooldownSkippedAccount, FallbackEntry, ParsedClaudeRequest, ProxyTranslationAttempt, ProxyTranslationPlan, RuntimeAccountState } from "../types/index.js";
+export type { ClaudeProxyModelTier, ProxyTranslationAttempt, ProxyTranslationPlan, };
 export declare function inferClaudeProxyModelTier(modelName: string): ClaudeProxyModelTier;
-export declare function classifyClaudeProxyRequest(requestedModel: string, parsed: ParsedClaudeRequest): ClaudeProxyRequestProfile;
-export declare function getRequestClassCooldownKey(profile: ClaudeProxyRequestProfile): string;
-export declare function getModelTierCooldownKey(profile: ClaudeProxyRequestProfile): string;
-export declare function evaluateFallbackEligibility(profile: ClaudeProxyRequestProfile, candidate: {
-    provider?: string;
-    model?: string;
-}): FallbackEligibilityDecision;
+/**
+ * Build a translation plan for a Claude-compatible proxy request.
+ * The plan lists the primary provider followed by eligible fallback targets.
+ * All configured fallback entries are always eligible — no contract-based gating.
+ * When no fallback chain is configured, an "auto-provider" entry is appended.
+ */
 export declare function buildProxyTranslationPlan(primary: {
     provider: string;
     model?: string;
-}, fallbackChain: FallbackEntry[], requestedModel: string, parsed: ParsedClaudeRequest): ProxyTranslationPlan;
-export declare function summarizeSkippedFallbacks(plan: Pick<ProxyTranslationPlan, "profile" | "skipped">): string | null;
-export declare function getActiveCooldownScope(state: RuntimeAccountState, profile: ClaudeProxyRequestProfile, now?: number): CooldownScope | null;
+}, fallbackChain: FallbackEntry[], requestedModel: string, _parsed: ParsedClaudeRequest): ProxyTranslationPlan;
+/**
+ * Check whether an account is currently cooling down.
+ * Returns the cooldown timestamp if active, null otherwise.
+ */
+export declare function getAccountCooldownUntil(state: RuntimeAccountState, now?: number): number | null;
+/**
+ * Partition accounts into eligible (no cooldown) and skipped (cooling down).
+ */
 export declare function partitionAccountsByCooldown<T extends {
     key: string;
-}>(accounts: T[], getState: (account: T) => RuntimeAccountState, profile: ClaudeProxyRequestProfile, now?: number): {
+}>(accounts: T[], getState: (account: T) => RuntimeAccountState, now?: number): {
     eligible: T[];
     skipped: CooldownSkippedAccount<T>[];
 };
-export declare function applyRateLimitCooldownScope(args: {
+/**
+ * Apply a rate-limit cooldown to an account.
+ * Uses simple exponential backoff with a floor and cap.
+ */
+export declare function applyRateLimitCooldown(args: {
     state: RuntimeAccountState;
-    profile: ClaudeProxyRequestProfile;
     retryAfterMs?: number;
     now?: number;
     capMs: number;
 }): {
     backoffMs: number;
-    requestClassKey: string;
-    modelTierKey: string;
 };
+/**
+ * Clear cooldown state for an account after a successful request.
+ */
+export declare function clearAccountCooldown(state: RuntimeAccountState): void;