@juspay/neurolink 9.54.1 → 9.54.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5,6 +5,20 @@ import { BaseProvider } from "../core/baseProvider.js";
5
5
  import type { EnhancedGenerateResult, TextGenerationOptions } from "../types/generateTypes.js";
6
6
  import type { NeurolinkCredentials } from "../types/providers.js";
7
7
  import type { StreamOptions, StreamResult } from "../types/streamTypes.js";
8
+ /**
9
+ * Resolve the correct Vertex AI location for a given model.
10
+ *
11
+ * Google-published models (gemini-*) require the global endpoint
12
+ * (`aiplatform.googleapis.com`), not regional endpoints like
13
+ * `us-east5-aiplatform.googleapis.com`. Regional endpoints return
14
+ * "model not found" for these models.
15
+ *
16
+ * Anthropic-on-Vertex models (claude-*) require regional endpoints
17
+ * and are handled separately by `createVertexAnthropicSettings`.
18
+ *
19
+ * Embedding models and custom models use the configured location as-is.
20
+ */
21
+ export declare const resolveVertexLocation: (modelName: string | undefined, configuredLocation: string) => string;
8
22
  /**
9
23
  * Vertex Model Aliases
10
24
  *
@@ -79,7 +79,36 @@ const getVertexLocation = () => {
79
79
  return (process.env.GOOGLE_CLOUD_LOCATION ||
80
80
  process.env.VERTEX_LOCATION ||
81
81
  process.env.GOOGLE_VERTEX_LOCATION ||
82
- "us-central1");
82
+ "global");
83
+ };
84
+ /**
85
+ * Resolve the correct Vertex AI location for a given model.
86
+ *
87
+ * Google-published models (gemini-*) require the global endpoint
88
+ * (`aiplatform.googleapis.com`), not regional endpoints like
89
+ * `us-east5-aiplatform.googleapis.com`. Regional endpoints return
90
+ * "model not found" for these models.
91
+ *
92
+ * Anthropic-on-Vertex models (claude-*) require regional endpoints
93
+ * and are handled separately by `createVertexAnthropicSettings`.
94
+ *
95
+ * Embedding models and custom models use the configured location as-is.
96
+ */
97
+ export const resolveVertexLocation = (modelName, configuredLocation) => {
98
+ if (!modelName) {
99
+ return configuredLocation;
100
+ }
101
+ const normalized = modelName.toLowerCase();
102
+ // Google-published models always use the global endpoint.
103
+ // Hardcoded because Google's Vertex AI serves Gemini models exclusively
104
+ // from the global endpoint — regional endpoints like us-east5 return
105
+ // "Publisher Model was not found" errors. The env var GOOGLE_VERTEX_LOCATION
106
+ // is typically set for Anthropic-on-Vertex (which needs regional), so we
107
+ // cannot rely on it for Gemini routing.
108
+ if (normalized.startsWith("gemini-")) {
109
+ return "global";
110
+ }
111
+ return configuredLocation;
83
112
  };
84
113
  const getDefaultVertexModel = () => {
85
114
  // Use gemini-2.5-flash as default - latest and best price-performance model
@@ -96,8 +125,9 @@ const hasGoogleCredentials = () => {
96
125
  // Module-level cache for runtime-created credentials file to avoid per-request writes
97
126
  let cachedCredentialsPath = null;
98
127
  // Enhanced Vertex settings creation with authentication fallback and proxy support
99
- const createVertexSettings = async (region, credentials) => {
100
- const location = credentials?.location || region || getVertexLocation();
128
+ const createVertexSettings = async (region, credentials, modelName) => {
129
+ const configuredLocation = credentials?.location || region || getVertexLocation();
130
+ const location = resolveVertexLocation(modelName, configuredLocation);
101
131
  const project = credentials?.projectId || getVertexProjectId();
102
132
  const baseSettings = {
103
133
  project,
@@ -326,7 +356,12 @@ const createVertexAnthropicSettings = async (region, credentials) => {
326
356
  // which is invalid. The correct global endpoint omits the region prefix entirely.
327
357
  // Since the SDK doesn't handle this, redirect "global" to "us-east5" for Anthropic.
328
358
  const anthropicRegion = !region || region === "global" ? "us-east5" : region;
329
- const baseVertexSettings = await createVertexSettings(anthropicRegion, credentials);
359
+ // Override credentials.location so it cannot conflict with the redirected
360
+ // region — createVertexSettings checks credentials.location first.
361
+ const anthropicCredentials = credentials?.location
362
+ ? { ...credentials, location: anthropicRegion }
363
+ : credentials;
364
+ const baseVertexSettings = await createVertexSettings(anthropicRegion, anthropicCredentials);
330
365
  // GoogleVertexAnthropicProviderSettings extends GoogleVertexProviderSettings
331
366
  // so we can use the same settings with proper typing
332
367
  return {
@@ -570,7 +605,9 @@ export class GoogleVertexProvider extends BaseProvider {
570
605
  networkConfig: {
571
606
  projectId: this.projectId,
572
607
  location: this.location,
573
- expectedEndpoint: `https://${this.location}-aiplatform.googleapis.com`,
608
+ expectedEndpoint: this.location === "global"
609
+ ? "https://aiplatform.googleapis.com"
610
+ : `https://${this.location}-aiplatform.googleapis.com`,
574
611
  httpProxy: process.env.HTTP_PROXY || process.env.http_proxy,
575
612
  httpsProxy: process.env.HTTPS_PROXY || process.env.https_proxy,
576
613
  noProxy: process.env.NO_PROXY || process.env.no_proxy,
@@ -582,7 +619,7 @@ export class GoogleVertexProvider extends BaseProvider {
582
619
  message: "Starting Vertex settings creation with network configuration analysis",
583
620
  });
584
621
  try {
585
- const vertexSettings = await createVertexSettings(this.location, this.credentials);
622
+ const vertexSettings = await createVertexSettings(this.location, this.credentials, modelName);
586
623
  const vertexSettingsEndTime = process.hrtime.bigint();
587
624
  const vertexSettingsDurationNs = vertexSettingsEndTime - vertexSettingsStartTime;
588
625
  logger.debug(`[GoogleVertexProvider] ✅ LOG_POINT_V009_VERTEX_SETTINGS_SUCCESS`, {
@@ -1116,12 +1153,13 @@ export class GoogleVertexProvider extends BaseProvider {
1116
1153
  /**
1117
1154
  * Create @google/genai client configured for Vertex AI
1118
1155
  */
1119
- async createVertexGenAIClient(regionOverride) {
1156
+ async createVertexGenAIClient(regionOverride, modelName) {
1120
1157
  const project = this.credentials?.projectId || getVertexProjectId();
1121
- const location = this.credentials?.location ||
1158
+ const configuredLocation = this.credentials?.location ||
1122
1159
  regionOverride ||
1123
1160
  this.location ||
1124
1161
  getVertexLocation();
1162
+ const location = resolveVertexLocation(modelName, configuredLocation);
1125
1163
  const mod = await import("@google/genai");
1126
1164
  const ctor = mod.GoogleGenAI;
1127
1165
  if (!ctor) {
@@ -1308,8 +1346,8 @@ export class GoogleVertexProvider extends BaseProvider {
1308
1346
  }, (span) => this.executeNativeGemini3StreamWithSpan(options, modelName, span));
1309
1347
  }
1310
1348
  async executeNativeGemini3StreamWithSpan(options, modelName, span) {
1311
- const client = await this.createVertexGenAIClient(options.region);
1312
- const effectiveLocation = options.region || this.location || getVertexLocation();
1349
+ const client = await this.createVertexGenAIClient(options.region, modelName);
1350
+ const effectiveLocation = resolveVertexLocation(modelName, options.region || this.location || getVertexLocation());
1313
1351
  logger.debug("[GoogleVertex] Using native @google/genai for Gemini 3", {
1314
1352
  model: modelName,
1315
1353
  hasTools: !!options.tools && Object.keys(options.tools).length > 0,
@@ -1503,8 +1541,8 @@ export class GoogleVertexProvider extends BaseProvider {
1503
1541
  [ATTR.NL_PROVIDER]: this.providerName,
1504
1542
  },
1505
1543
  }, async (span) => {
1506
- const client = await this.createVertexGenAIClient(options.region);
1507
- const effectiveLocation = options.region || this.location || getVertexLocation();
1544
+ const client = await this.createVertexGenAIClient(options.region, modelName);
1545
+ const effectiveLocation = resolveVertexLocation(modelName, options.region || this.location || getVertexLocation());
1508
1546
  logger.debug("[GoogleVertex] Using native @google/genai for Gemini 3 generate", {
1509
1547
  model: modelName,
1510
1548
  project: this.projectId,
@@ -1,33 +1,43 @@
1
- import type { ClaudeProxyModelTier, ClaudeProxyRequestClass, ClaudeProxyRequestProfile, CooldownScope, CooldownSkippedAccount, FallbackEligibilityDecision, FallbackEntry, ParsedClaudeRequest, ProxyTranslationAttempt, ProxyTranslationPlan, RuntimeAccountState } from "../types/index.js";
2
- export type { ClaudeProxyModelTier, ClaudeProxyRequestClass, ClaudeProxyRequestProfile, CooldownScope, CooldownSkippedAccount, FallbackEligibilityDecision, ProxyTranslationAttempt, ProxyTranslationPlan, };
1
+ import type { ClaudeProxyModelTier, CooldownSkippedAccount, FallbackEntry, ParsedClaudeRequest, ProxyTranslationAttempt, ProxyTranslationPlan, RuntimeAccountState } from "../types/index.js";
2
+ export type { ClaudeProxyModelTier, ProxyTranslationAttempt, ProxyTranslationPlan, };
3
3
  export declare function inferClaudeProxyModelTier(modelName: string): ClaudeProxyModelTier;
4
- export declare function classifyClaudeProxyRequest(requestedModel: string, parsed: ParsedClaudeRequest): ClaudeProxyRequestProfile;
5
- export declare function getRequestClassCooldownKey(profile: ClaudeProxyRequestProfile): string;
6
- export declare function getModelTierCooldownKey(profile: ClaudeProxyRequestProfile): string;
7
- export declare function evaluateFallbackEligibility(profile: ClaudeProxyRequestProfile, candidate: {
8
- provider?: string;
9
- model?: string;
10
- }): FallbackEligibilityDecision;
4
+ /**
5
+ * Build a translation plan for a Claude-compatible proxy request.
6
+ * The plan lists the primary provider followed by eligible fallback targets.
7
+ * All configured fallback entries are always eligible — no contract-based gating.
8
+ * When no fallback chain is configured, an "auto-provider" entry is appended.
9
+ */
11
10
  export declare function buildProxyTranslationPlan(primary: {
12
11
  provider: string;
13
12
  model?: string;
14
- }, fallbackChain: FallbackEntry[], requestedModel: string, parsed: ParsedClaudeRequest): ProxyTranslationPlan;
15
- export declare function summarizeSkippedFallbacks(plan: Pick<ProxyTranslationPlan, "profile" | "skipped">): string | null;
16
- export declare function getActiveCooldownScope(state: RuntimeAccountState, profile: ClaudeProxyRequestProfile, now?: number): CooldownScope | null;
13
+ }, fallbackChain: FallbackEntry[], requestedModel: string, _parsed: ParsedClaudeRequest): ProxyTranslationPlan;
14
+ /**
15
+ * Check whether an account is currently cooling down.
16
+ * Returns the cooldown timestamp if active, null otherwise.
17
+ */
18
+ export declare function getAccountCooldownUntil(state: RuntimeAccountState, now?: number): number | null;
19
+ /**
20
+ * Partition accounts into eligible (no cooldown) and skipped (cooling down).
21
+ */
17
22
  export declare function partitionAccountsByCooldown<T extends {
18
23
  key: string;
19
- }>(accounts: T[], getState: (account: T) => RuntimeAccountState, profile: ClaudeProxyRequestProfile, now?: number): {
24
+ }>(accounts: T[], getState: (account: T) => RuntimeAccountState, now?: number): {
20
25
  eligible: T[];
21
26
  skipped: CooldownSkippedAccount<T>[];
22
27
  };
23
- export declare function applyRateLimitCooldownScope(args: {
28
+ /**
29
+ * Apply a rate-limit cooldown to an account.
30
+ * Uses simple exponential backoff with a floor and cap.
31
+ */
32
+ export declare function applyRateLimitCooldown(args: {
24
33
  state: RuntimeAccountState;
25
- profile: ClaudeProxyRequestProfile;
26
34
  retryAfterMs?: number;
27
35
  now?: number;
28
36
  capMs: number;
29
37
  }): {
30
38
  backoffMs: number;
31
- requestClassKey: string;
32
- modelTierKey: string;
33
39
  };
40
+ /**
41
+ * Clear cooldown state for an account after a successful request.
42
+ */
43
+ export declare function clearAccountCooldown(state: RuntimeAccountState): void;
@@ -1,9 +1,4 @@
1
- const STREAMING_CONVERSATIONAL_TOOL_THRESHOLD = 4;
2
- const STRONG_TOOL_FIDELITY_THRESHOLD = 8;
3
- const HIGH_TOOL_COUNT_THRESHOLD = 24;
4
1
  const DEFAULT_COOLDOWN_FLOOR_MS = 1_000;
5
- const HIGH_TOOL_COUNT_COOLDOWN_FLOOR_MS = 10_000;
6
- const HIGH_FIDELITY_COOLDOWN_FLOOR_MS = 300_000;
7
2
  export function inferClaudeProxyModelTier(modelName) {
8
3
  const normalized = modelName.toLowerCase();
9
4
  if (normalized.includes("opus")) {
@@ -17,101 +12,13 @@ export function inferClaudeProxyModelTier(modelName) {
17
12
  }
18
13
  return "other";
19
14
  }
20
- function detectToolHistory(parsed) {
21
- return parsed.conversationMessages.some((message) => {
22
- return (message.content.includes("[tool_use:") ||
23
- message.content.includes("[tool_result:"));
24
- });
25
- }
26
- export function classifyClaudeProxyRequest(requestedModel, parsed) {
27
- const toolCount = Object.keys(parsed.tools).length;
28
- const hasImages = parsed.images.length > 0;
29
- const hasThinking = !!parsed.thinkingConfig?.enabled;
30
- const hasToolHistory = detectToolHistory(parsed);
31
- const requiresSpecificTool = !!parsed.toolChoiceName;
32
- const requiresToolUse = parsed.toolChoice === "required" || requiresSpecificTool || hasToolHistory;
33
- const requiresStrongToolFidelity = toolCount >= STRONG_TOOL_FIDELITY_THRESHOLD ||
34
- requiresSpecificTool ||
35
- hasToolHistory;
36
- const isHighToolCountNonStream = !parsed.stream && toolCount >= HIGH_TOOL_COUNT_THRESHOLD;
37
- const isStreamingConversational = parsed.stream &&
38
- !hasImages &&
39
- toolCount <= STREAMING_CONVERSATIONAL_TOOL_THRESHOLD &&
40
- !requiresStrongToolFidelity;
41
- const classes = [];
42
- if (hasImages) {
43
- classes.push("multimodal");
44
- }
45
- if (isHighToolCountNonStream) {
46
- classes.push("high-tool-count-non-stream-structured");
47
- }
48
- if (requiresStrongToolFidelity) {
49
- classes.push("strong-tool-fidelity");
50
- }
51
- if (isStreamingConversational) {
52
- classes.push("streaming-conversational");
53
- }
54
- if (classes.length === 0) {
55
- classes.push("standard");
56
- }
57
- return {
58
- requestedModel,
59
- modelTier: inferClaudeProxyModelTier(requestedModel),
60
- primaryClass: classes[0],
61
- classes,
62
- stream: parsed.stream,
63
- toolCount,
64
- hasImages,
65
- hasThinking,
66
- hasToolHistory,
67
- requiresToolUse,
68
- requiresSpecificTool,
69
- requiresStrongToolFidelity,
70
- isHighToolCountNonStream,
71
- isStreamingConversational,
72
- isMultimodal: hasImages,
73
- };
74
- }
75
- export function getRequestClassCooldownKey(profile) {
76
- return `${profile.primaryClass}:${profile.requestedModel.toLowerCase()}`;
77
- }
78
- export function getModelTierCooldownKey(profile) {
79
- return profile.modelTier;
80
- }
81
- function getQualityGuardReason(profile, provider, _model) {
82
- // Only gate auto-provider fallback (no explicit provider).
83
- // Configured fallback-chain entries are always allowed through —
84
- // let them attempt the request and fail naturally if the provider
85
- // cannot handle it.
86
- if (!provider) {
87
- if (profile.modelTier === "opus" ||
88
- profile.requiresStrongToolFidelity ||
89
- profile.isHighToolCountNonStream) {
90
- return "auto-provider fallback is disabled for requests that require contract preservation";
91
- }
92
- return null;
93
- }
94
- return null;
95
- }
96
- export function evaluateFallbackEligibility(profile, candidate) {
97
- const policyBlockReason = getQualityGuardReason(profile, candidate.provider, candidate.model);
98
- if (policyBlockReason) {
99
- return {
100
- provider: candidate.provider,
101
- model: candidate.model,
102
- eligible: false,
103
- reason: policyBlockReason,
104
- };
105
- }
106
- return {
107
- provider: candidate.provider,
108
- model: candidate.model,
109
- eligible: true,
110
- reason: "eligible",
111
- };
112
- }
113
- export function buildProxyTranslationPlan(primary, fallbackChain, requestedModel, parsed) {
114
- const profile = classifyClaudeProxyRequest(requestedModel, parsed);
15
+ /**
16
+ * Build a translation plan for a Claude-compatible proxy request.
17
+ * The plan lists the primary provider followed by eligible fallback targets.
18
+ * All configured fallback entries are always eligible — no contract-based gating.
19
+ * When no fallback chain is configured, an "auto-provider" entry is appended.
20
+ */
21
+ export function buildProxyTranslationPlan(primary, fallbackChain, requestedModel, _parsed) {
115
22
  const attempts = [
116
23
  {
117
24
  provider: primary.provider,
@@ -119,142 +26,79 @@ export function buildProxyTranslationPlan(primary, fallbackChain, requestedModel
119
26
  label: `${primary.provider}/${primary.model ?? "unknown"}`,
120
27
  },
121
28
  ];
122
- const skipped = [];
123
29
  for (const fallback of fallbackChain) {
124
30
  if (fallback.provider === primary.provider &&
125
31
  fallback.model === primary.model) {
126
32
  continue;
127
33
  }
128
- const decision = evaluateFallbackEligibility(profile, fallback);
129
- if (!decision.eligible) {
130
- skipped.push(decision);
131
- continue;
132
- }
133
34
  attempts.push({
134
35
  provider: fallback.provider,
135
36
  model: fallback.model,
136
37
  label: `${fallback.provider}/${fallback.model}`,
137
38
  });
138
39
  }
139
- if (fallbackChain.length === 0) {
140
- const autoDecision = evaluateFallbackEligibility(profile, {});
141
- if (autoDecision.eligible) {
142
- attempts.push({ label: "auto-provider" });
143
- }
144
- else {
145
- skipped.push(autoDecision);
146
- }
40
+ // Append auto-provider when no configured fallback chain exists,
41
+ // or when all configured entries were deduped (same as primary).
42
+ if (fallbackChain.length === 0 || attempts.length === 1) {
43
+ attempts.push({ label: "auto-provider" });
147
44
  }
148
45
  return {
149
- profile,
46
+ requestedModel,
47
+ modelTier: inferClaudeProxyModelTier(requestedModel),
150
48
  attempts,
151
- skipped,
49
+ skipped: [],
152
50
  };
153
51
  }
154
- export function summarizeSkippedFallbacks(plan) {
155
- if (plan.skipped.length === 0) {
156
- return null;
157
- }
158
- const summary = plan.skipped
159
- .map((decision) => {
160
- const label = decision.provider
161
- ? `${decision.provider}/${decision.model ?? "unknown"}`
162
- : "auto-provider";
163
- return `${label}: ${decision.reason}`;
164
- })
165
- .join("; ");
166
- return `Fallback policy preserved the requested ${plan.profile.primaryClass} contract by skipping ineligible targets. ${summary}`;
167
- }
168
- export function getActiveCooldownScope(state, profile, now = Date.now()) {
169
- let longest = null;
170
- const requestClassKey = getRequestClassCooldownKey(profile);
171
- const requestClassUntil = state.requestClassCooldowns?.[requestClassKey] ?? undefined;
172
- if (requestClassUntil && requestClassUntil > now) {
173
- longest = {
174
- scope: "request_class",
175
- key: requestClassKey,
176
- until: requestClassUntil,
177
- };
178
- }
179
- const modelTierKey = getModelTierCooldownKey(profile);
180
- const modelTierUntil = state.modelTierCooldowns?.[modelTierKey] ?? undefined;
181
- if (modelTierUntil &&
182
- modelTierUntil > now &&
183
- modelTierUntil > (longest?.until ?? 0)) {
184
- longest = {
185
- scope: "model_tier",
186
- key: modelTierKey,
187
- until: modelTierUntil,
188
- };
189
- }
190
- if (state.coolingUntil &&
191
- state.coolingUntil > now &&
192
- state.coolingUntil > (longest?.until ?? 0)) {
193
- longest = {
194
- scope: "generic",
195
- key: "generic",
196
- until: state.coolingUntil,
197
- };
52
+ // ---------------------------------------------------------------------------
53
+ // Simple per-account cooldown
54
+ // ---------------------------------------------------------------------------
55
+ /**
56
+ * Check whether an account is currently cooling down.
57
+ * Returns the cooldown timestamp if active, null otherwise.
58
+ */
59
+ export function getAccountCooldownUntil(state, now = Date.now()) {
60
+ if (state.coolingUntil && state.coolingUntil > now) {
61
+ return state.coolingUntil;
198
62
  }
199
- return longest;
63
+ return null;
200
64
  }
201
- export function partitionAccountsByCooldown(accounts, getState, profile, now = Date.now()) {
65
+ /**
66
+ * Partition accounts into eligible (no cooldown) and skipped (cooling down).
67
+ */
68
+ export function partitionAccountsByCooldown(accounts, getState, now = Date.now()) {
202
69
  const eligible = [];
203
70
  const skipped = [];
204
71
  for (const account of accounts) {
205
- const cooldown = getActiveCooldownScope(getState(account), profile, now);
206
- if (cooldown) {
207
- skipped.push({ account, cooldown });
72
+ const state = getState(account);
73
+ const until = getAccountCooldownUntil(state, now);
74
+ if (until !== null) {
75
+ skipped.push({
76
+ account,
77
+ cooldown: { until, backoffLevel: state.backoffLevel },
78
+ });
208
79
  continue;
209
80
  }
210
81
  eligible.push(account);
211
82
  }
212
- return {
213
- eligible,
214
- skipped,
215
- };
83
+ return { eligible, skipped };
216
84
  }
217
- export function applyRateLimitCooldownScope(args) {
85
+ /**
86
+ * Apply a rate-limit cooldown to an account.
87
+ * Uses simple exponential backoff with a floor and cap.
88
+ */
89
+ export function applyRateLimitCooldown(args) {
218
90
  const now = args.now ?? Date.now();
219
- const requestClassKey = getRequestClassCooldownKey(args.profile);
220
- const modelTierKey = getModelTierCooldownKey(args.profile);
221
- const rcBackoffLevels = args.state.requestClassBackoffLevels ?? {};
222
- const mtBackoffLevels = args.state.modelTierBackoffLevels ?? {};
223
- const scopedBackoffLevel = Math.max(rcBackoffLevels[requestClassKey] ?? 0, mtBackoffLevels[modelTierKey] ?? 0);
224
- // High-tool-count-non-stream gets its own (lower) floor so that requests
225
- // recover faster once proper OAuth betas are forwarded. Check it first
226
- // because every >=24-tool request also satisfies requiresStrongToolFidelity
227
- // (threshold 8), which would otherwise shadow this branch.
228
- const floorMs = args.profile.isHighToolCountNonStream
229
- ? HIGH_TOOL_COUNT_COOLDOWN_FLOOR_MS
230
- : args.profile.modelTier === "opus" ||
231
- args.profile.requiresStrongToolFidelity
232
- ? HIGH_FIDELITY_COOLDOWN_FLOOR_MS
233
- : DEFAULT_COOLDOWN_FLOOR_MS;
234
- const baseCooldownMs = Math.max(args.retryAfterMs ?? 0, floorMs);
235
- const backoffMs = Math.min(baseCooldownMs * 2 ** scopedBackoffLevel, args.capMs);
236
- const until = now + backoffMs;
237
- args.state.requestClassCooldowns = {
238
- ...(args.state.requestClassCooldowns ?? {}),
239
- [requestClassKey]: Math.max(args.state.requestClassCooldowns?.[requestClassKey] ?? 0, until),
240
- };
241
- args.state.modelTierCooldowns = {
242
- ...(args.state.modelTierCooldowns ?? {}),
243
- [modelTierKey]: Math.max(args.state.modelTierCooldowns?.[modelTierKey] ?? 0, until),
244
- };
245
- args.state.requestClassBackoffLevels = {
246
- ...rcBackoffLevels,
247
- [requestClassKey]: (rcBackoffLevels[requestClassKey] ?? 0) + 1,
248
- };
249
- args.state.modelTierBackoffLevels = {
250
- ...mtBackoffLevels,
251
- [modelTierKey]: (mtBackoffLevels[modelTierKey] ?? 0) + 1,
252
- };
91
+ const baseCooldownMs = Math.max(args.retryAfterMs ?? 0, DEFAULT_COOLDOWN_FLOOR_MS);
92
+ const backoffMs = Math.min(baseCooldownMs * 2 ** args.state.backoffLevel, args.capMs);
93
+ args.state.coolingUntil = now + backoffMs;
253
94
  args.state.backoffLevel += 1;
254
- return {
255
- backoffMs,
256
- requestClassKey,
257
- modelTierKey,
258
- };
95
+ return { backoffMs };
96
+ }
97
+ /**
98
+ * Clear cooldown state for an account after a successful request.
99
+ */
100
+ export function clearAccountCooldown(state) {
101
+ state.coolingUntil = undefined;
102
+ state.backoffLevel = 0;
259
103
  }
260
104
  //# sourceMappingURL=routingPolicy.js.map