npm - @juspay/neurolink - Versions diffs - 9.54.1 → 9.54.3 - Mend

@juspay/neurolink 9.54.1 → 9.54.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (52) hide show

package/CHANGELOG.md +12 -0
package/dist/browser/neurolink.min.js +288 -288
package/dist/cli/factories/commandFactory.js +43 -4
package/dist/cli/utils/abortHandler.d.ts +22 -0
package/dist/cli/utils/abortHandler.js +53 -0
package/dist/core/baseProvider.d.ts +7 -1
package/dist/core/baseProvider.js +19 -0
package/dist/lib/core/baseProvider.d.ts +7 -1
package/dist/lib/core/baseProvider.js +19 -0
package/dist/lib/neurolink.js +17 -1
package/dist/lib/providers/anthropic.js +1 -0
package/dist/lib/providers/anthropicBaseProvider.js +1 -0
package/dist/lib/providers/azureOpenai.js +1 -0
package/dist/lib/providers/googleAiStudio.js +1 -0
package/dist/lib/providers/googleVertex.d.ts +14 -0
package/dist/lib/providers/googleVertex.js +51 -12
package/dist/lib/providers/huggingFace.js +1 -0
package/dist/lib/providers/litellm.js +1 -0
package/dist/lib/providers/mistral.js +1 -0
package/dist/lib/providers/openAI.js +1 -0
package/dist/lib/providers/openRouter.js +1 -0
package/dist/lib/providers/openaiCompatible.js +1 -0
package/dist/lib/proxy/routingPolicy.d.ts +27 -17
package/dist/lib/proxy/routingPolicy.js +53 -209
package/dist/lib/server/routes/claudeProxyRoutes.js +35 -73
package/dist/lib/types/proxyTypes.d.ts +9 -50
package/dist/lib/types/streamTypes.d.ts +6 -0
package/dist/lib/utils/messageBuilder.js +39 -6
package/dist/lib/utils/toolCallRepair.d.ts +21 -0
package/dist/lib/utils/toolCallRepair.js +298 -0
package/dist/neurolink.js +17 -1
package/dist/providers/anthropic.js +1 -0
package/dist/providers/anthropicBaseProvider.js +1 -0
package/dist/providers/azureOpenai.js +1 -0
package/dist/providers/googleAiStudio.js +1 -0
package/dist/providers/googleVertex.d.ts +14 -0
package/dist/providers/googleVertex.js +51 -12
package/dist/providers/huggingFace.js +1 -0
package/dist/providers/litellm.js +1 -0
package/dist/providers/mistral.js +1 -0
package/dist/providers/openAI.js +1 -0
package/dist/providers/openRouter.js +1 -0
package/dist/providers/openaiCompatible.js +1 -0
package/dist/proxy/routingPolicy.d.ts +27 -17
package/dist/proxy/routingPolicy.js +53 -209
package/dist/server/routes/claudeProxyRoutes.js +35 -73
package/dist/types/proxyTypes.d.ts +9 -50
package/dist/types/streamTypes.d.ts +6 -0
package/dist/utils/messageBuilder.js +39 -6
package/dist/utils/toolCallRepair.d.ts +21 -0
package/dist/utils/toolCallRepair.js +297 -0
package/package.json +1 -1

package/dist/lib/proxy/routingPolicy.js CHANGED Viewed

@@ -1,9 +1,4 @@
-const STREAMING_CONVERSATIONAL_TOOL_THRESHOLD = 4;
-const STRONG_TOOL_FIDELITY_THRESHOLD = 8;
-const HIGH_TOOL_COUNT_THRESHOLD = 24;
 const DEFAULT_COOLDOWN_FLOOR_MS = 1_000;
-const HIGH_TOOL_COUNT_COOLDOWN_FLOOR_MS = 10_000;
-const HIGH_FIDELITY_COOLDOWN_FLOOR_MS = 300_000;
 export function inferClaudeProxyModelTier(modelName) {
     const normalized = modelName.toLowerCase();
     if (normalized.includes("opus")) {
@@ -17,101 +12,13 @@ export function inferClaudeProxyModelTier(modelName) {
     }
     return "other";
 }
-function detectToolHistory(parsed) {
-    return parsed.conversationMessages.some((message) => {
-        return (message.content.includes("[tool_use:") ||
-            message.content.includes("[tool_result:"));
-    });
-}
-export function classifyClaudeProxyRequest(requestedModel, parsed) {
-    const toolCount = Object.keys(parsed.tools).length;
-    const hasImages = parsed.images.length > 0;
-    const hasThinking = !!parsed.thinkingConfig?.enabled;
-    const hasToolHistory = detectToolHistory(parsed);
-    const requiresSpecificTool = !!parsed.toolChoiceName;
-    const requiresToolUse = parsed.toolChoice === "required" || requiresSpecificTool || hasToolHistory;
-    const requiresStrongToolFidelity = toolCount >= STRONG_TOOL_FIDELITY_THRESHOLD ||
-        requiresSpecificTool ||
-        hasToolHistory;
-    const isHighToolCountNonStream = !parsed.stream && toolCount >= HIGH_TOOL_COUNT_THRESHOLD;
-    const isStreamingConversational = parsed.stream &&
-        !hasImages &&
-        toolCount <= STREAMING_CONVERSATIONAL_TOOL_THRESHOLD &&
-        !requiresStrongToolFidelity;
-    const classes = [];
-    if (hasImages) {
-        classes.push("multimodal");
-    }
-    if (isHighToolCountNonStream) {
-        classes.push("high-tool-count-non-stream-structured");
-    }
-    if (requiresStrongToolFidelity) {
-        classes.push("strong-tool-fidelity");
-    }
-    if (isStreamingConversational) {
-        classes.push("streaming-conversational");
-    }
-    if (classes.length === 0) {
-        classes.push("standard");
-    }
-    return {
-        requestedModel,
-        modelTier: inferClaudeProxyModelTier(requestedModel),
-        primaryClass: classes[0],
-        classes,
-        stream: parsed.stream,
-        toolCount,
-        hasImages,
-        hasThinking,
-        hasToolHistory,
-        requiresToolUse,
-        requiresSpecificTool,
-        requiresStrongToolFidelity,
-        isHighToolCountNonStream,
-        isStreamingConversational,
-        isMultimodal: hasImages,
-    };
-}
-export function getRequestClassCooldownKey(profile) {
-    return `${profile.primaryClass}:${profile.requestedModel.toLowerCase()}`;
-}
-export function getModelTierCooldownKey(profile) {
-    return profile.modelTier;
-}
-function getQualityGuardReason(profile, provider, _model) {
-    // Only gate auto-provider fallback (no explicit provider).
-    // Configured fallback-chain entries are always allowed through —
-    // let them attempt the request and fail naturally if the provider
-    // cannot handle it.
-    if (!provider) {
-        if (profile.modelTier === "opus" ||
-            profile.requiresStrongToolFidelity ||
-            profile.isHighToolCountNonStream) {
-            return "auto-provider fallback is disabled for requests that require contract preservation";
-        }
-        return null;
-    }
-    return null;
-}
-export function evaluateFallbackEligibility(profile, candidate) {
-    const policyBlockReason = getQualityGuardReason(profile, candidate.provider, candidate.model);
-    if (policyBlockReason) {
-        return {
-            provider: candidate.provider,
-            model: candidate.model,
-            eligible: false,
-            reason: policyBlockReason,
-        };
-    }
-    return {
-        provider: candidate.provider,
-        model: candidate.model,
-        eligible: true,
-        reason: "eligible",
-    };
-}
-export function buildProxyTranslationPlan(primary, fallbackChain, requestedModel, parsed) {
-    const profile = classifyClaudeProxyRequest(requestedModel, parsed);
+/**
+ * Build a translation plan for a Claude-compatible proxy request.
+ * The plan lists the primary provider followed by eligible fallback targets.
+ * All configured fallback entries are always eligible — no contract-based gating.
+ * When no fallback chain is configured, an "auto-provider" entry is appended.
+ */
+export function buildProxyTranslationPlan(primary, fallbackChain, requestedModel, _parsed) {
     const attempts = [
         {
             provider: primary.provider,
@@ -119,142 +26,79 @@ export function buildProxyTranslationPlan(primary, fallbackChain, requestedModel
             label: `${primary.provider}/${primary.model ?? "unknown"}`,
         },
     ];
-    const skipped = [];
     for (const fallback of fallbackChain) {
         if (fallback.provider === primary.provider &&
             fallback.model === primary.model) {
             continue;
         }
-        const decision = evaluateFallbackEligibility(profile, fallback);
-        if (!decision.eligible) {
-            skipped.push(decision);
-            continue;
-        }
         attempts.push({
             provider: fallback.provider,
             model: fallback.model,
             label: `${fallback.provider}/${fallback.model}`,
         });
     }
-    if (fallbackChain.length === 0) {
-        const autoDecision = evaluateFallbackEligibility(profile, {});
-        if (autoDecision.eligible) {
-            attempts.push({ label: "auto-provider" });
-        }
-        else {
-            skipped.push(autoDecision);
-        }
+    // Append auto-provider when no configured fallback chain exists,
+    // or when all configured entries were deduped (same as primary).
+    if (fallbackChain.length === 0 || attempts.length === 1) {
+        attempts.push({ label: "auto-provider" });
     }
     return {
-        profile,
+        requestedModel,
+        modelTier: inferClaudeProxyModelTier(requestedModel),
         attempts,
-        skipped,
+        skipped: [],
     };
 }
-export function summarizeSkippedFallbacks(plan) {
-    if (plan.skipped.length === 0) {
-        return null;
-    }
-    const summary = plan.skipped
-        .map((decision) => {
-        const label = decision.provider
-            ? `${decision.provider}/${decision.model ?? "unknown"}`
-            : "auto-provider";
-        return `${label}: ${decision.reason}`;
-    })
-        .join("; ");
-    return `Fallback policy preserved the requested ${plan.profile.primaryClass} contract by skipping ineligible targets. ${summary}`;
-}
-export function getActiveCooldownScope(state, profile, now = Date.now()) {
-    let longest = null;
-    const requestClassKey = getRequestClassCooldownKey(profile);
-    const requestClassUntil = state.requestClassCooldowns?.[requestClassKey] ?? undefined;
-    if (requestClassUntil && requestClassUntil > now) {
-        longest = {
-            scope: "request_class",
-            key: requestClassKey,
-            until: requestClassUntil,
-        };
-    }
-    const modelTierKey = getModelTierCooldownKey(profile);
-    const modelTierUntil = state.modelTierCooldowns?.[modelTierKey] ?? undefined;
-    if (modelTierUntil &&
-        modelTierUntil > now &&
-        modelTierUntil > (longest?.until ?? 0)) {
-        longest = {
-            scope: "model_tier",
-            key: modelTierKey,
-            until: modelTierUntil,
-        };
-    }
-    if (state.coolingUntil &&
-        state.coolingUntil > now &&
-        state.coolingUntil > (longest?.until ?? 0)) {
-        longest = {
-            scope: "generic",
-            key: "generic",
-            until: state.coolingUntil,
-        };
+// ---------------------------------------------------------------------------
+// Simple per-account cooldown
+// ---------------------------------------------------------------------------
+/**
+ * Check whether an account is currently cooling down.
+ * Returns the cooldown timestamp if active, null otherwise.
+ */
+export function getAccountCooldownUntil(state, now = Date.now()) {
+    if (state.coolingUntil && state.coolingUntil > now) {
+        return state.coolingUntil;
     }
-    return longest;
+    return null;
 }
-export function partitionAccountsByCooldown(accounts, getState, profile, now = Date.now()) {
+/**
+ * Partition accounts into eligible (no cooldown) and skipped (cooling down).
+ */
+export function partitionAccountsByCooldown(accounts, getState, now = Date.now()) {
     const eligible = [];
     const skipped = [];
     for (const account of accounts) {
-        const cooldown = getActiveCooldownScope(getState(account), profile, now);
-        if (cooldown) {
-            skipped.push({ account, cooldown });
+        const state = getState(account);
+        const until = getAccountCooldownUntil(state, now);
+        if (until !== null) {
+            skipped.push({
+                account,
+                cooldown: { until, backoffLevel: state.backoffLevel },
+            });
             continue;
         }
         eligible.push(account);
     }
-    return {
-        eligible,
-        skipped,
-    };
+    return { eligible, skipped };
 }
-export function applyRateLimitCooldownScope(args) {
+/**
+ * Apply a rate-limit cooldown to an account.
+ * Uses simple exponential backoff with a floor and cap.
+ */
+export function applyRateLimitCooldown(args) {
     const now = args.now ?? Date.now();
-    const requestClassKey = getRequestClassCooldownKey(args.profile);
-    const modelTierKey = getModelTierCooldownKey(args.profile);
-    const rcBackoffLevels = args.state.requestClassBackoffLevels ?? {};
-    const mtBackoffLevels = args.state.modelTierBackoffLevels ?? {};
-    const scopedBackoffLevel = Math.max(rcBackoffLevels[requestClassKey] ?? 0, mtBackoffLevels[modelTierKey] ?? 0);
-    // High-tool-count-non-stream gets its own (lower) floor so that requests
-    // recover faster once proper OAuth betas are forwarded. Check it first
-    // because every >=24-tool request also satisfies requiresStrongToolFidelity
-    // (threshold 8), which would otherwise shadow this branch.
-    const floorMs = args.profile.isHighToolCountNonStream
-        ? HIGH_TOOL_COUNT_COOLDOWN_FLOOR_MS
-        : args.profile.modelTier === "opus" ||
-            args.profile.requiresStrongToolFidelity
-            ? HIGH_FIDELITY_COOLDOWN_FLOOR_MS
-            : DEFAULT_COOLDOWN_FLOOR_MS;
-    const baseCooldownMs = Math.max(args.retryAfterMs ?? 0, floorMs);
-    const backoffMs = Math.min(baseCooldownMs * 2 ** scopedBackoffLevel, args.capMs);
-    const until = now + backoffMs;
-    args.state.requestClassCooldowns = {
-        ...(args.state.requestClassCooldowns ?? {}),
-        [requestClassKey]: Math.max(args.state.requestClassCooldowns?.[requestClassKey] ?? 0, until),
-    };
-    args.state.modelTierCooldowns = {
-        ...(args.state.modelTierCooldowns ?? {}),
-        [modelTierKey]: Math.max(args.state.modelTierCooldowns?.[modelTierKey] ?? 0, until),
-    };
-    args.state.requestClassBackoffLevels = {
-        ...rcBackoffLevels,
-        [requestClassKey]: (rcBackoffLevels[requestClassKey] ?? 0) + 1,
-    };
-    args.state.modelTierBackoffLevels = {
-        ...mtBackoffLevels,
-        [modelTierKey]: (mtBackoffLevels[modelTierKey] ?? 0) + 1,
-    };
+    const baseCooldownMs = Math.max(args.retryAfterMs ?? 0, DEFAULT_COOLDOWN_FLOOR_MS);
+    const backoffMs = Math.min(baseCooldownMs * 2 ** args.state.backoffLevel, args.capMs);
+    args.state.coolingUntil = now + backoffMs;
     args.state.backoffLevel += 1;
-    return {
-        backoffMs,
-        requestClassKey,
-        modelTierKey,
-    };
+    return { backoffMs };
+}
+/**
+ * Clear cooldown state for an account after a successful request.
+ */
+export function clearAccountCooldown(state) {
+    state.coolingUntil = undefined;
+    state.backoffLevel = 0;
 }
 //# sourceMappingURL=routingPolicy.js.map

package/dist/lib/server/routes/claudeProxyRoutes.js CHANGED Viewed

@@ -20,7 +20,7 @@ import { createRawStreamCapture } from "../../proxy/rawStreamCapture.js";
 import { logBodyCapture, logRequest, logRequestAttempt, logStreamError, } from "../../proxy/requestLogger.js";
 import { createSSEInterceptor } from "../../proxy/sseInterceptor.js";
 import { needsRefresh, persistTokens, refreshToken, } from "../../proxy/tokenRefresh.js";
-import { applyRateLimitCooldownScope, buildProxyTranslationPlan, classifyClaudeProxyRequest, getActiveCooldownScope, partitionAccountsByCooldown, summarizeSkippedFallbacks, } from "../../proxy/routingPolicy.js";
+import { applyRateLimitCooldown, buildProxyTranslationPlan, clearAccountCooldown, getAccountCooldownUntil, partitionAccountsByCooldown, } from "../../proxy/routingPolicy.js";
 import { writeJsonSnapshotAtomically } from "../../proxy/snapshotPersistence.js";
 import { recordAttempt, recordAttemptError, recordCooldown, recordFinalError, recordFinalSuccess, } from "../../proxy/usageStats.js";
 import { logger } from "../../utils/logger.js";
@@ -440,9 +440,7 @@ function logProxyRoutingPlan(logProxyBody, stage, plan) {
         contentType: "application/json",
         body: {
             stage,
-            requestProfile: plan.profile,
             attempts: plan.attempts,
-            skipped: plan.skipped,
         },
     });
 }
@@ -1347,30 +1345,21 @@ async function executeClaudeFallbackTranslation(args) {
     return clientResponse;
 }
 async function tryConfiguredClaudeFallbackChain(args) {
-    const { ctx, body, parsedFallbackRequest, requestProfile, modelRouter, tracer, requestStartTime, logProxyBody, logFinalRequest, } = args;
+    const { ctx, body, parsedFallbackRequest, modelRouter, tracer, requestStartTime, logProxyBody, logFinalRequest, } = args;
     const chain = modelRouter?.getFallbackChain() ?? [];
     const fallbackPlan = buildProxyTranslationPlan({ provider: "anthropic", model: body.model }, chain, body.model, parsedFallbackRequest);
-    const fallbackPolicyReason = summarizeSkippedFallbacks(fallbackPlan);
     logProxyBody({
         phase: "routing_decision",
         contentType: "application/json",
         body: {
             stage: "anthropic_fallback",
-            requestProfile,
             attempts: fallbackPlan.attempts.slice(1),
-            skipped: fallbackPlan.skipped,
         },
     });
-    for (const skipped of fallbackPlan.skipped) {
-        const label = skipped.provider
-            ? `${skipped.provider}/${skipped.model ?? "unknown"}`
-            : "auto-provider";
-        logger.always(`[proxy] skipping fallback ${label}: ${skipped.reason}`);
-    }
     tracer?.setFallbackInfo({
         triggered: true,
         attemptCount: fallbackPlan.attempts.slice(1).length,
-        reason: fallbackPolicyReason ?? "all_anthropic_accounts_exhausted",
+        reason: "all_anthropic_accounts_exhausted",
     });
     for (const fallback of fallbackPlan.attempts.slice(1)) {
         if (!fallback.provider || !fallback.model) {
@@ -1410,10 +1399,7 @@ async function tryConfiguredClaudeFallbackChain(args) {
                 attemptCount: fallbackPlan.attempts.slice(1).length,
                 reason: "fallback_success",
             });
-            return {
-                response,
-                fallbackPolicyReason,
-            };
+            return { response };
         }
         catch (fallbackErr) {
             const errMsg = fallbackErr instanceof Error
@@ -1450,10 +1436,7 @@ async function tryConfiguredClaudeFallbackChain(args) {
             });
         }
     }
-    return {
-        response: null,
-        fallbackPolicyReason,
-    };
+    return { response: null };
 }
 async function tryAutoClaudeFallback(args) {
     const { ctx, body, tracer, requestStartTime, logProxyBody, logFinalRequest } = args;
@@ -1484,7 +1467,7 @@ async function tryAutoClaudeFallback(args) {
     }
 }
 function buildClaudeAnthropicFailureResponse(args) {
-    const { tracer, requestStartTime, authFailureMessage, invalidRequestFailure, sawNetworkError, sawTransientFailure, sawRateLimit, lastError, orderedAccounts, requestProfile, fallbackPolicyReason, buildLoggedClaudeError, logProxyBody, logFinalRequest, } = args;
+    const { tracer, requestStartTime, authFailureMessage, invalidRequestFailure, sawNetworkError, sawTransientFailure, sawRateLimit, lastError, orderedAccounts, buildLoggedClaudeError, logProxyBody, logFinalRequest, } = args;
     if (authFailureMessage && !sawRateLimit) {
         tracer?.setError("authentication_error", authFailureMessage);
         tracer?.end(401, Date.now() - requestStartTime);
@@ -1531,21 +1514,21 @@ function buildClaudeAnthropicFailureResponse(args) {
         return buildLoggedClaudeError(502, msg);
     }
     const earliestRecovery = orderedAccounts.reduce((min, account) => {
-        const cooldown = getActiveCooldownScope(getOrCreateRuntimeState(account.key), requestProfile);
-        return cooldown ? Math.min(min, cooldown.until) : min;
+        const until = getAccountCooldownUntil(getOrCreateRuntimeState(account.key));
+        return until !== null ? Math.min(min, until) : min;
     }, Infinity);
+    // If no active cooldown remains (expired while retries ran), use 1s
+    // instead of fabricating a long retry-after.
     const retryAfterSec = Number.isFinite(earliestRecovery)
         ? Math.max(1, Math.ceil((earliestRecovery - Date.now()) / 1000))
-        : 60;
-    const contractMessage = fallbackPolicyReason
-        ? ` ${fallbackPolicyReason}`
-        : "";
-    logger.always(`[proxy] all accounts rate-limited for request-class=${requestProfile.primaryClass}, retry in ${retryAfterSec}s`);
-    const errorBody = buildClaudeError(429, `All accounts rate-limited. Earliest recovery in ${retryAfterSec}s.${contractMessage}`, "overloaded_error");
-    tracer?.setError("rate_limit_error", `All accounts rate-limited. Retry in ${retryAfterSec}s.${contractMessage}`);
+        : 1;
+    const errorMessage = `All accounts rate-limited. Earliest recovery in ${retryAfterSec}s.`;
+    logger.always(`[proxy] all accounts rate-limited, retry in ${retryAfterSec}s`);
+    const errorBody = buildClaudeError(429, errorMessage, "overloaded_error");
+    tracer?.setError("rate_limit_error", errorMessage);
     tracer?.end(429, Date.now() - requestStartTime);
     recordFinalError(429);
-    logFinalRequest(429, "", "final", "rate_limit_error", `All accounts rate-limited. Retry in ${retryAfterSec}s.${contractMessage}`);
+    logFinalRequest(429, "", "final", "rate_limit_error", errorMessage);
     const errorBodyText = JSON.stringify(errorBody);
     logProxyBody({
         phase: "client_response",
@@ -1568,22 +1551,9 @@ function buildClaudeAnthropicFailureResponse(args) {
     });
 }
 async function handleAnthropicSuccessfulResponse(args) {
-    const { ctx, body, account, accountState, requestProfile, response, tracer, requestStartTime, fetchStartMs, attemptNumber, finalBodyStr, upstreamSpan, logProxyBody, logFinalRequest, } = args;
-    accountState.backoffLevel = 0;
-    accountState.coolingUntil = undefined;
+    const { ctx, body, account, accountState, response, tracer, requestStartTime, fetchStartMs, attemptNumber, finalBodyStr, upstreamSpan, logProxyBody, logFinalRequest, } = args;
+    clearAccountCooldown(accountState);
     accountState.consecutiveRefreshFailures = 0;
-    if (accountState.requestClassCooldowns) {
-        delete accountState.requestClassCooldowns[`${requestProfile.primaryClass}:${requestProfile.requestedModel.toLowerCase()}`];
-    }
-    if (accountState.modelTierCooldowns) {
-        delete accountState.modelTierCooldowns[requestProfile.modelTier];
-    }
-    if (accountState.requestClassBackoffLevels) {
-        delete accountState.requestClassBackoffLevels[`${requestProfile.primaryClass}:${requestProfile.requestedModel.toLowerCase()}`];
-    }
-    if (accountState.modelTierBackoffLevels) {
-        delete accountState.modelTierBackoffLevels[requestProfile.modelTier];
-    }
     logger.always(`[proxy] ← ${response.status} account=${account.label}`);
     const quota = parseQuotaHeaders(response.headers);
     if (quota) {
@@ -2173,7 +2143,7 @@ async function handleAnthropicSuccessfulRetryResponse(args) {
     return retryJson;
 }
 async function handleAnthropicAuthRetry(args) {
-    const { ctx, body, account, accountState, requestProfile, headers, buildUpstreamBody, enabledAccounts, orderedAccounts, response: _response, tracer, requestStartTime, fetchStartMs, attemptNumber, finalBodyStr, upstreamSpan, logAttempt, logProxyBody, logFinalRequest, lastError, authFailureMessage, sawRateLimit, sawTransientFailure, sawNetworkError, } = args;
+    const { ctx, body, account, accountState, headers, buildUpstreamBody, enabledAccounts, orderedAccounts, response: _response, tracer, requestStartTime, fetchStartMs, attemptNumber, finalBodyStr, upstreamSpan, logAttempt, logProxyBody, logFinalRequest, lastError, authFailureMessage, sawRateLimit, sawTransientFailure, sawNetworkError, } = args;
     recordAttemptError(account.label, account.type, 401);
     let currentLastError = lastError;
     let currentAuthFailureMessage = authFailureMessage;
@@ -2257,9 +2227,8 @@ async function handleAnthropicAuthRetry(args) {
                 const cooldownMs = Number.isNaN(parsedRetryAfter)
                     ? 60_000
                     : Math.max(1, parsedRetryAfter) * 1000;
-                const cooldown = applyRateLimitCooldownScope({
+                const cooldown = applyRateLimitCooldown({
                     state: accountState,
-                    profile: requestProfile,
                     retryAfterMs: cooldownMs,
                     capMs: RATE_LIMIT_BACKOFF_CAP_MS,
                 });
@@ -2836,7 +2805,7 @@ async function prepareAnthropicAccountAttempt(args) {
     };
 }
 async function fetchAnthropicAccountResponse(args) {
-    const { url, headers, finalBodyStr, account, accountState, requestProfile, enabledAccounts, orderedAccounts, tracer, logAttempt, currentLastError, currentSawRateLimit, currentSawNetworkError, upstreamSpan, } = args;
+    const { url, headers, finalBodyStr, account, accountState, enabledAccounts, orderedAccounts, tracer, logAttempt, currentLastError, currentSawRateLimit, currentSawNetworkError, upstreamSpan, } = args;
     let lastError = currentLastError;
     let sawRateLimit = currentSawRateLimit;
     let sawNetworkError = currentSawNetworkError;
@@ -2889,9 +2858,8 @@ async function fetchAnthropicAccountResponse(args) {
                 }
             }
         }
-        const cooldown = applyRateLimitCooldownScope({
+        const cooldown = applyRateLimitCooldown({
             state: accountState,
-            profile: requestProfile,
             retryAfterMs: cooldownMs > 0 ? cooldownMs : undefined,
             capMs: RATE_LIMIT_BACKOFF_CAP_MS,
         });
@@ -2899,7 +2867,7 @@ async function fetchAnthropicAccountResponse(args) {
         recordAttemptError(account.label, account.type, 429);
         recordCooldown(account.label, account.type, Date.now() + cooldown.backoffMs, accountState.backoffLevel);
         lastError = await response.text();
-        logger.always(`[proxy] ← 429 account=${account.label} backoff-level=${accountState.backoffLevel} cooldown=${Math.round(cooldown.backoffMs / 1000)}s request-class=${cooldown.requestClassKey} model-tier=${cooldown.modelTierKey}`);
+        logger.always(`[proxy] ← 429 account=${account.label} backoff-level=${accountState.backoffLevel} cooldown=${Math.round(cooldown.backoffMs / 1000)}s`);
         logAttempt(429, "rate_limit_error", String(lastError));
         tracer?.setError("rate_limit_error", String(lastError).slice(0, 500));
         tracer?.recordRetry(account.label, "rate_limit");
@@ -2924,7 +2892,6 @@ async function fetchAnthropicAccountResponse(args) {
 async function handleAnthropicRoutedClaudeRequest(args) {
     const { ctx, body, modelRouter, tracer, requestStartTime, accountStrategy, buildLoggedClaudeError, logProxyBody, logFinalRequest, } = args;
     const parsedRequest = parseClaudeRequest(body);
-    const requestProfile = classifyClaudeProxyRequest(body.model, parsedRequest);
     const loadedAccounts = await loadClaudeProxyAccounts({
         ctx,
         body,
@@ -2947,14 +2914,16 @@ async function handleAnthropicRoutedClaudeRequest(args) {
         attemptNumber: 0,
     };
     const acctSelectionSpan = tracer?.startAccountSelection();
-    const accountPartition = partitionAccountsByCooldown(orderedAccounts, (account) => getOrCreateRuntimeState(account.key), requestProfile);
+    const accountPartition = partitionAccountsByCooldown(orderedAccounts, (account) => getOrCreateRuntimeState(account.key));
     for (const skippedAccount of accountPartition.skipped) {
-        if (skippedAccount.cooldown.scope === "request_class" ||
-            skippedAccount.cooldown.scope === "model_tier") {
-            loopState.sawRateLimit = true;
-        }
-        loopState.lastError = `Skipped account=${skippedAccount.account.label} due to ${skippedAccount.cooldown.scope} cooldown ${skippedAccount.cooldown.key}`;
-        logger.always(`[proxy] skipping account=${skippedAccount.account.label} due to ${skippedAccount.cooldown.scope} cooldown=${skippedAccount.cooldown.key} remaining=${Math.max(1, Math.ceil((skippedAccount.cooldown.until - Date.now()) / 1000))}s`);
+        logger.always(`[proxy] skipping account=${skippedAccount.account.label} cooldown remaining=${Math.max(1, Math.ceil((skippedAccount.cooldown.until - Date.now()) / 1000))}s`);
+    }
+    // Only flag rate-limit when ALL accounts are cooling — if some are eligible,
+    // let the actual attempt results determine sawRateLimit via real 429 responses.
+    if (accountPartition.skipped.length > 0 &&
+        accountPartition.eligible.length === 0) {
+        loopState.sawRateLimit = true;
+        loopState.lastError = `All ${accountPartition.skipped.length} accounts are cooling down`;
     }
     accountLoop: for (const account of accountPartition.eligible) {
         const accountState = getOrCreateRuntimeState(account.key);
@@ -3009,7 +2978,6 @@ async function handleAnthropicRoutedClaudeRequest(args) {
                 finalBodyStr: preparedAttempt.finalBodyStr,
                 account,
                 accountState,
-                requestProfile,
                 enabledAccounts,
                 orderedAccounts,
                 tracer,
@@ -3046,7 +3014,6 @@ async function handleAnthropicRoutedClaudeRequest(args) {
                     body,
                     account,
                     accountState,
-                    requestProfile,
                     headers: preparedAttempt.headers,
                     buildUpstreamBody: preparedAttempt.buildUpstreamBody,
                     enabledAccounts,
@@ -3126,7 +3093,6 @@ async function handleAnthropicRoutedClaudeRequest(args) {
                 body,
                 account,
                 accountState,
-                requestProfile,
                 response,
                 tracer,
                 requestStartTime,
@@ -3150,7 +3116,6 @@ async function handleAnthropicRoutedClaudeRequest(args) {
         ctx,
         body,
         parsedFallbackRequest: parsedRequest,
-        requestProfile,
         modelRouter,
         tracer,
         requestStartTime,
@@ -3160,8 +3125,9 @@ async function handleAnthropicRoutedClaudeRequest(args) {
     if (configuredFallbackResult.response) {
         return configuredFallbackResult.response;
     }
-    const configuredChain = modelRouter?.getFallbackChain() ?? [];
-    if (configuredChain.length === 0 && !loopState.sawRateLimit) {
+    // Try auto-provider fallback when the configured chain didn't produce a
+    // response (either no chain configured, or all entries failed/deduped).
+    if (!loopState.sawRateLimit) {
         const autoFallbackResponse = await tryAutoClaudeFallback({
             ctx,
             body,
@@ -3184,8 +3150,6 @@ async function handleAnthropicRoutedClaudeRequest(args) {
         sawRateLimit: loopState.sawRateLimit,
         lastError: loopState.lastError,
         orderedAccounts,
-        requestProfile,
-        fallbackPolicyReason: configuredFallbackResult.fallbackPolicyReason,
         buildLoggedClaudeError,
         logProxyBody,
         logFinalRequest,
@@ -3406,8 +3370,6 @@ function getOrCreateRuntimeState(accountKey) {
         backoffLevel: 0,
         consecutiveRefreshFailures: 0,
         permanentlyDisabled: false,
-        requestClassCooldowns: {},
-        modelTierCooldowns: {},
     };
     accountRuntimeState.set(accountKey, initial);
     return initial;

package/dist/lib/types/proxyTypes.d.ts CHANGED Viewed

@@ -641,10 +641,6 @@ export type RuntimeAccountState = {
     backoffLevel: number;
     consecutiveRefreshFailures: number;
     permanentlyDisabled: boolean;
-    requestClassCooldowns?: Record<string, number>;
-    modelTierCooldowns?: Record<string, number>;
-    requestClassBackoffLevels?: Record<string, number>;
-    modelTierBackoffLevels?: Record<string, number>;
     lastToken?: string;
     lastRefreshToken?: string;
 };
@@ -697,63 +693,26 @@ export type CachedSession = {
 };
 /** Model tier classification for proxy routing decisions. */
 export type ClaudeProxyModelTier = "opus" | "sonnet" | "haiku" | "other";
-/** Request class for proxy routing policy. */
-export type ClaudeProxyRequestClass = "multimodal" | "high-tool-count-non-stream-structured" | "strong-tool-fidelity" | "streaming-conversational" | "standard";
-/** Full classification profile for a proxy request. */
-export type ClaudeProxyRequestProfile = {
-    requestedModel: string;
-    modelTier: ClaudeProxyModelTier;
-    primaryClass: ClaudeProxyRequestClass;
-    classes: ClaudeProxyRequestClass[];
-    stream: boolean;
-    toolCount: number;
-    hasImages: boolean;
-    hasThinking: boolean;
-    hasToolHistory: boolean;
-    requiresToolUse: boolean;
-    requiresSpecificTool: boolean;
-    requiresStrongToolFidelity: boolean;
-    isHighToolCountNonStream: boolean;
-    isStreamingConversational: boolean;
-    isMultimodal: boolean;
-};
-/** Outcome of evaluating a single fallback candidate. */
-export type FallbackEligibilityDecision = {
-    provider?: string;
-    model?: string;
-    eligible: boolean;
-    reason: string;
-};
 /** A single provider attempt in the proxy translation plan. */
 export type ProxyTranslationAttempt = {
     provider?: string;
     model?: string;
     label: string;
 };
-/** Ordered plan of provider attempts and skipped candidates. */
+/** Ordered plan of provider attempts for a proxy request. */
 export type ProxyTranslationPlan = {
-    profile: ClaudeProxyRequestProfile;
+    requestedModel: string;
+    modelTier: ClaudeProxyModelTier;
     attempts: ProxyTranslationAttempt[];
-    skipped: FallbackEligibilityDecision[];
+    skipped: never[];
 };
-/** Discriminated union describing why a cooldown is active. */
-export type CooldownScope = {
-    scope: "request_class";
-    key: string;
-    until: number;
-} | {
-    scope: "model_tier";
-    key: string;
-    until: number;
-} | {
-    scope: "generic";
-    key: "generic";
-    until: number;
-};
-/** An account skipped during partitioning, with the cooldown that caused it. */
+/** An account skipped during partitioning, with its cooldown info. */
 export type CooldownSkippedAccount<T> = {
     account: T;
-    cooldown: CooldownScope;
+    cooldown: {
+        until: number;
+        backoffLevel: number;
+    };
 };
 /** Mutable readiness state tracked by the proxy process. */
 export type ProxyReadinessState = {