@juspay/neurolink 9.54.0 → 9.54.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +12 -0
- package/dist/browser/neurolink.min.js +296 -296
- package/dist/cli/commands/auth.js +6 -0
- package/dist/lib/providers/googleVertex.d.ts +14 -0
- package/dist/lib/providers/googleVertex.js +50 -12
- package/dist/lib/proxy/routingPolicy.d.ts +27 -17
- package/dist/lib/proxy/routingPolicy.js +53 -209
- package/dist/lib/server/routes/claudeProxyRoutes.js +35 -73
- package/dist/lib/types/proxyTypes.d.ts +9 -50
- package/dist/lib/utils/messageBuilder.js +39 -6
- package/dist/providers/googleVertex.d.ts +14 -0
- package/dist/providers/googleVertex.js +50 -12
- package/dist/proxy/routingPolicy.d.ts +27 -17
- package/dist/proxy/routingPolicy.js +53 -209
- package/dist/server/routes/claudeProxyRoutes.js +35 -73
- package/dist/types/proxyTypes.d.ts +9 -50
- package/dist/utils/messageBuilder.js +39 -6
- package/package.json +1 -1
|
@@ -996,6 +996,9 @@ async function handleCreateApiKeyOAuth(provider) {
|
|
|
996
996
|
logger.always("3. Copy the authorization code shown on the page");
|
|
997
997
|
logger.always("4. Paste the code below");
|
|
998
998
|
logger.always("");
|
|
999
|
+
logger.always(chalk.dim(" Authentication URL:"));
|
|
1000
|
+
logger.always(chalk.cyan(` ${authUrl.toString()}`));
|
|
1001
|
+
logger.always("");
|
|
999
1002
|
// Prompt user to enter the authorization code
|
|
1000
1003
|
const { authCode } = await inquirer.prompt([
|
|
1001
1004
|
{
|
|
@@ -1158,6 +1161,9 @@ async function handleOAuthAuth(provider) {
|
|
|
1158
1161
|
logger.always("3. Copy the authorization code shown on the page");
|
|
1159
1162
|
logger.always("4. Paste the code below");
|
|
1160
1163
|
logger.always("");
|
|
1164
|
+
logger.always(chalk.dim(" Authentication URL:"));
|
|
1165
|
+
logger.always(chalk.cyan(` ${authUrl.toString()}`));
|
|
1166
|
+
logger.always("");
|
|
1161
1167
|
// Prompt user to enter the authorization code
|
|
1162
1168
|
const { authCode } = await inquirer.prompt([
|
|
1163
1169
|
{
|
|
@@ -5,6 +5,20 @@ import { BaseProvider } from "../core/baseProvider.js";
|
|
|
5
5
|
import type { EnhancedGenerateResult, TextGenerationOptions } from "../types/generateTypes.js";
|
|
6
6
|
import type { NeurolinkCredentials } from "../types/providers.js";
|
|
7
7
|
import type { StreamOptions, StreamResult } from "../types/streamTypes.js";
|
|
8
|
+
/**
|
|
9
|
+
* Resolve the correct Vertex AI location for a given model.
|
|
10
|
+
*
|
|
11
|
+
* Google-published models (gemini-*) require the global endpoint
|
|
12
|
+
* (`aiplatform.googleapis.com`), not regional endpoints like
|
|
13
|
+
* `us-east5-aiplatform.googleapis.com`. Regional endpoints return
|
|
14
|
+
* "model not found" for these models.
|
|
15
|
+
*
|
|
16
|
+
* Anthropic-on-Vertex models (claude-*) require regional endpoints
|
|
17
|
+
* and are handled separately by `createVertexAnthropicSettings`.
|
|
18
|
+
*
|
|
19
|
+
* Embedding models and custom models use the configured location as-is.
|
|
20
|
+
*/
|
|
21
|
+
export declare const resolveVertexLocation: (modelName: string | undefined, configuredLocation: string) => string;
|
|
8
22
|
/**
|
|
9
23
|
* Vertex Model Aliases
|
|
10
24
|
*
|
|
@@ -79,7 +79,36 @@ const getVertexLocation = () => {
|
|
|
79
79
|
return (process.env.GOOGLE_CLOUD_LOCATION ||
|
|
80
80
|
process.env.VERTEX_LOCATION ||
|
|
81
81
|
process.env.GOOGLE_VERTEX_LOCATION ||
|
|
82
|
-
"
|
|
82
|
+
"global");
|
|
83
|
+
};
|
|
84
|
+
/**
|
|
85
|
+
* Resolve the correct Vertex AI location for a given model.
|
|
86
|
+
*
|
|
87
|
+
* Google-published models (gemini-*) require the global endpoint
|
|
88
|
+
* (`aiplatform.googleapis.com`), not regional endpoints like
|
|
89
|
+
* `us-east5-aiplatform.googleapis.com`. Regional endpoints return
|
|
90
|
+
* "model not found" for these models.
|
|
91
|
+
*
|
|
92
|
+
* Anthropic-on-Vertex models (claude-*) require regional endpoints
|
|
93
|
+
* and are handled separately by `createVertexAnthropicSettings`.
|
|
94
|
+
*
|
|
95
|
+
* Embedding models and custom models use the configured location as-is.
|
|
96
|
+
*/
|
|
97
|
+
export const resolveVertexLocation = (modelName, configuredLocation) => {
|
|
98
|
+
if (!modelName) {
|
|
99
|
+
return configuredLocation;
|
|
100
|
+
}
|
|
101
|
+
const normalized = modelName.toLowerCase();
|
|
102
|
+
// Google-published models always use the global endpoint.
|
|
103
|
+
// Hardcoded because Google's Vertex AI serves Gemini models exclusively
|
|
104
|
+
// from the global endpoint — regional endpoints like us-east5 return
|
|
105
|
+
// "Publisher Model was not found" errors. The env var GOOGLE_VERTEX_LOCATION
|
|
106
|
+
// is typically set for Anthropic-on-Vertex (which needs regional), so we
|
|
107
|
+
// cannot rely on it for Gemini routing.
|
|
108
|
+
if (normalized.startsWith("gemini-")) {
|
|
109
|
+
return "global";
|
|
110
|
+
}
|
|
111
|
+
return configuredLocation;
|
|
83
112
|
};
|
|
84
113
|
const getDefaultVertexModel = () => {
|
|
85
114
|
// Use gemini-2.5-flash as default - latest and best price-performance model
|
|
@@ -96,8 +125,9 @@ const hasGoogleCredentials = () => {
|
|
|
96
125
|
// Module-level cache for runtime-created credentials file to avoid per-request writes
|
|
97
126
|
let cachedCredentialsPath = null;
|
|
98
127
|
// Enhanced Vertex settings creation with authentication fallback and proxy support
|
|
99
|
-
const createVertexSettings = async (region, credentials) => {
|
|
100
|
-
const
|
|
128
|
+
const createVertexSettings = async (region, credentials, modelName) => {
|
|
129
|
+
const configuredLocation = credentials?.location || region || getVertexLocation();
|
|
130
|
+
const location = resolveVertexLocation(modelName, configuredLocation);
|
|
101
131
|
const project = credentials?.projectId || getVertexProjectId();
|
|
102
132
|
const baseSettings = {
|
|
103
133
|
project,
|
|
@@ -326,7 +356,12 @@ const createVertexAnthropicSettings = async (region, credentials) => {
|
|
|
326
356
|
// which is invalid. The correct global endpoint omits the region prefix entirely.
|
|
327
357
|
// Since the SDK doesn't handle this, redirect "global" to "us-east5" for Anthropic.
|
|
328
358
|
const anthropicRegion = !region || region === "global" ? "us-east5" : region;
|
|
329
|
-
|
|
359
|
+
// Override credentials.location so it cannot conflict with the redirected
|
|
360
|
+
// region — createVertexSettings checks credentials.location first.
|
|
361
|
+
const anthropicCredentials = credentials?.location
|
|
362
|
+
? { ...credentials, location: anthropicRegion }
|
|
363
|
+
: credentials;
|
|
364
|
+
const baseVertexSettings = await createVertexSettings(anthropicRegion, anthropicCredentials);
|
|
330
365
|
// GoogleVertexAnthropicProviderSettings extends GoogleVertexProviderSettings
|
|
331
366
|
// so we can use the same settings with proper typing
|
|
332
367
|
return {
|
|
@@ -570,7 +605,9 @@ export class GoogleVertexProvider extends BaseProvider {
|
|
|
570
605
|
networkConfig: {
|
|
571
606
|
projectId: this.projectId,
|
|
572
607
|
location: this.location,
|
|
573
|
-
expectedEndpoint:
|
|
608
|
+
expectedEndpoint: this.location === "global"
|
|
609
|
+
? "https://aiplatform.googleapis.com"
|
|
610
|
+
: `https://${this.location}-aiplatform.googleapis.com`,
|
|
574
611
|
httpProxy: process.env.HTTP_PROXY || process.env.http_proxy,
|
|
575
612
|
httpsProxy: process.env.HTTPS_PROXY || process.env.https_proxy,
|
|
576
613
|
noProxy: process.env.NO_PROXY || process.env.no_proxy,
|
|
@@ -582,7 +619,7 @@ export class GoogleVertexProvider extends BaseProvider {
|
|
|
582
619
|
message: "Starting Vertex settings creation with network configuration analysis",
|
|
583
620
|
});
|
|
584
621
|
try {
|
|
585
|
-
const vertexSettings = await createVertexSettings(this.location, this.credentials);
|
|
622
|
+
const vertexSettings = await createVertexSettings(this.location, this.credentials, modelName);
|
|
586
623
|
const vertexSettingsEndTime = process.hrtime.bigint();
|
|
587
624
|
const vertexSettingsDurationNs = vertexSettingsEndTime - vertexSettingsStartTime;
|
|
588
625
|
logger.debug(`[GoogleVertexProvider] ✅ LOG_POINT_V009_VERTEX_SETTINGS_SUCCESS`, {
|
|
@@ -1116,12 +1153,13 @@ export class GoogleVertexProvider extends BaseProvider {
|
|
|
1116
1153
|
/**
|
|
1117
1154
|
* Create @google/genai client configured for Vertex AI
|
|
1118
1155
|
*/
|
|
1119
|
-
async createVertexGenAIClient(regionOverride) {
|
|
1156
|
+
async createVertexGenAIClient(regionOverride, modelName) {
|
|
1120
1157
|
const project = this.credentials?.projectId || getVertexProjectId();
|
|
1121
|
-
const
|
|
1158
|
+
const configuredLocation = this.credentials?.location ||
|
|
1122
1159
|
regionOverride ||
|
|
1123
1160
|
this.location ||
|
|
1124
1161
|
getVertexLocation();
|
|
1162
|
+
const location = resolveVertexLocation(modelName, configuredLocation);
|
|
1125
1163
|
const mod = await import("@google/genai");
|
|
1126
1164
|
const ctor = mod.GoogleGenAI;
|
|
1127
1165
|
if (!ctor) {
|
|
@@ -1308,8 +1346,8 @@ export class GoogleVertexProvider extends BaseProvider {
|
|
|
1308
1346
|
}, (span) => this.executeNativeGemini3StreamWithSpan(options, modelName, span));
|
|
1309
1347
|
}
|
|
1310
1348
|
async executeNativeGemini3StreamWithSpan(options, modelName, span) {
|
|
1311
|
-
const client = await this.createVertexGenAIClient(options.region);
|
|
1312
|
-
const effectiveLocation = options.region || this.location || getVertexLocation();
|
|
1349
|
+
const client = await this.createVertexGenAIClient(options.region, modelName);
|
|
1350
|
+
const effectiveLocation = resolveVertexLocation(modelName, options.region || this.location || getVertexLocation());
|
|
1313
1351
|
logger.debug("[GoogleVertex] Using native @google/genai for Gemini 3", {
|
|
1314
1352
|
model: modelName,
|
|
1315
1353
|
hasTools: !!options.tools && Object.keys(options.tools).length > 0,
|
|
@@ -1503,8 +1541,8 @@ export class GoogleVertexProvider extends BaseProvider {
|
|
|
1503
1541
|
[ATTR.NL_PROVIDER]: this.providerName,
|
|
1504
1542
|
},
|
|
1505
1543
|
}, async (span) => {
|
|
1506
|
-
const client = await this.createVertexGenAIClient(options.region);
|
|
1507
|
-
const effectiveLocation = options.region || this.location || getVertexLocation();
|
|
1544
|
+
const client = await this.createVertexGenAIClient(options.region, modelName);
|
|
1545
|
+
const effectiveLocation = resolveVertexLocation(modelName, options.region || this.location || getVertexLocation());
|
|
1508
1546
|
logger.debug("[GoogleVertex] Using native @google/genai for Gemini 3 generate", {
|
|
1509
1547
|
model: modelName,
|
|
1510
1548
|
project: this.projectId,
|
|
@@ -1,33 +1,43 @@
|
|
|
1
|
-
import type { ClaudeProxyModelTier,
|
|
2
|
-
export type { ClaudeProxyModelTier,
|
|
1
|
+
import type { ClaudeProxyModelTier, CooldownSkippedAccount, FallbackEntry, ParsedClaudeRequest, ProxyTranslationAttempt, ProxyTranslationPlan, RuntimeAccountState } from "../types/index.js";
|
|
2
|
+
export type { ClaudeProxyModelTier, ProxyTranslationAttempt, ProxyTranslationPlan, };
|
|
3
3
|
export declare function inferClaudeProxyModelTier(modelName: string): ClaudeProxyModelTier;
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
}): FallbackEligibilityDecision;
|
|
4
|
+
/**
|
|
5
|
+
* Build a translation plan for a Claude-compatible proxy request.
|
|
6
|
+
* The plan lists the primary provider followed by eligible fallback targets.
|
|
7
|
+
* All configured fallback entries are always eligible — no contract-based gating.
|
|
8
|
+
* When no fallback chain is configured, an "auto-provider" entry is appended.
|
|
9
|
+
*/
|
|
11
10
|
export declare function buildProxyTranslationPlan(primary: {
|
|
12
11
|
provider: string;
|
|
13
12
|
model?: string;
|
|
14
|
-
}, fallbackChain: FallbackEntry[], requestedModel: string,
|
|
15
|
-
|
|
16
|
-
|
|
13
|
+
}, fallbackChain: FallbackEntry[], requestedModel: string, _parsed: ParsedClaudeRequest): ProxyTranslationPlan;
|
|
14
|
+
/**
|
|
15
|
+
* Check whether an account is currently cooling down.
|
|
16
|
+
* Returns the cooldown timestamp if active, null otherwise.
|
|
17
|
+
*/
|
|
18
|
+
export declare function getAccountCooldownUntil(state: RuntimeAccountState, now?: number): number | null;
|
|
19
|
+
/**
|
|
20
|
+
* Partition accounts into eligible (no cooldown) and skipped (cooling down).
|
|
21
|
+
*/
|
|
17
22
|
export declare function partitionAccountsByCooldown<T extends {
|
|
18
23
|
key: string;
|
|
19
|
-
}>(accounts: T[], getState: (account: T) => RuntimeAccountState,
|
|
24
|
+
}>(accounts: T[], getState: (account: T) => RuntimeAccountState, now?: number): {
|
|
20
25
|
eligible: T[];
|
|
21
26
|
skipped: CooldownSkippedAccount<T>[];
|
|
22
27
|
};
|
|
23
|
-
|
|
28
|
+
/**
|
|
29
|
+
* Apply a rate-limit cooldown to an account.
|
|
30
|
+
* Uses simple exponential backoff with a floor and cap.
|
|
31
|
+
*/
|
|
32
|
+
export declare function applyRateLimitCooldown(args: {
|
|
24
33
|
state: RuntimeAccountState;
|
|
25
|
-
profile: ClaudeProxyRequestProfile;
|
|
26
34
|
retryAfterMs?: number;
|
|
27
35
|
now?: number;
|
|
28
36
|
capMs: number;
|
|
29
37
|
}): {
|
|
30
38
|
backoffMs: number;
|
|
31
|
-
requestClassKey: string;
|
|
32
|
-
modelTierKey: string;
|
|
33
39
|
};
|
|
40
|
+
/**
|
|
41
|
+
* Clear cooldown state for an account after a successful request.
|
|
42
|
+
*/
|
|
43
|
+
export declare function clearAccountCooldown(state: RuntimeAccountState): void;
|
|
@@ -1,9 +1,4 @@
|
|
|
1
|
-
const STREAMING_CONVERSATIONAL_TOOL_THRESHOLD = 4;
|
|
2
|
-
const STRONG_TOOL_FIDELITY_THRESHOLD = 8;
|
|
3
|
-
const HIGH_TOOL_COUNT_THRESHOLD = 24;
|
|
4
1
|
const DEFAULT_COOLDOWN_FLOOR_MS = 1_000;
|
|
5
|
-
const HIGH_TOOL_COUNT_COOLDOWN_FLOOR_MS = 10_000;
|
|
6
|
-
const HIGH_FIDELITY_COOLDOWN_FLOOR_MS = 300_000;
|
|
7
2
|
export function inferClaudeProxyModelTier(modelName) {
|
|
8
3
|
const normalized = modelName.toLowerCase();
|
|
9
4
|
if (normalized.includes("opus")) {
|
|
@@ -17,101 +12,13 @@ export function inferClaudeProxyModelTier(modelName) {
|
|
|
17
12
|
}
|
|
18
13
|
return "other";
|
|
19
14
|
}
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
export function
|
|
27
|
-
const toolCount = Object.keys(parsed.tools).length;
|
|
28
|
-
const hasImages = parsed.images.length > 0;
|
|
29
|
-
const hasThinking = !!parsed.thinkingConfig?.enabled;
|
|
30
|
-
const hasToolHistory = detectToolHistory(parsed);
|
|
31
|
-
const requiresSpecificTool = !!parsed.toolChoiceName;
|
|
32
|
-
const requiresToolUse = parsed.toolChoice === "required" || requiresSpecificTool || hasToolHistory;
|
|
33
|
-
const requiresStrongToolFidelity = toolCount >= STRONG_TOOL_FIDELITY_THRESHOLD ||
|
|
34
|
-
requiresSpecificTool ||
|
|
35
|
-
hasToolHistory;
|
|
36
|
-
const isHighToolCountNonStream = !parsed.stream && toolCount >= HIGH_TOOL_COUNT_THRESHOLD;
|
|
37
|
-
const isStreamingConversational = parsed.stream &&
|
|
38
|
-
!hasImages &&
|
|
39
|
-
toolCount <= STREAMING_CONVERSATIONAL_TOOL_THRESHOLD &&
|
|
40
|
-
!requiresStrongToolFidelity;
|
|
41
|
-
const classes = [];
|
|
42
|
-
if (hasImages) {
|
|
43
|
-
classes.push("multimodal");
|
|
44
|
-
}
|
|
45
|
-
if (isHighToolCountNonStream) {
|
|
46
|
-
classes.push("high-tool-count-non-stream-structured");
|
|
47
|
-
}
|
|
48
|
-
if (requiresStrongToolFidelity) {
|
|
49
|
-
classes.push("strong-tool-fidelity");
|
|
50
|
-
}
|
|
51
|
-
if (isStreamingConversational) {
|
|
52
|
-
classes.push("streaming-conversational");
|
|
53
|
-
}
|
|
54
|
-
if (classes.length === 0) {
|
|
55
|
-
classes.push("standard");
|
|
56
|
-
}
|
|
57
|
-
return {
|
|
58
|
-
requestedModel,
|
|
59
|
-
modelTier: inferClaudeProxyModelTier(requestedModel),
|
|
60
|
-
primaryClass: classes[0],
|
|
61
|
-
classes,
|
|
62
|
-
stream: parsed.stream,
|
|
63
|
-
toolCount,
|
|
64
|
-
hasImages,
|
|
65
|
-
hasThinking,
|
|
66
|
-
hasToolHistory,
|
|
67
|
-
requiresToolUse,
|
|
68
|
-
requiresSpecificTool,
|
|
69
|
-
requiresStrongToolFidelity,
|
|
70
|
-
isHighToolCountNonStream,
|
|
71
|
-
isStreamingConversational,
|
|
72
|
-
isMultimodal: hasImages,
|
|
73
|
-
};
|
|
74
|
-
}
|
|
75
|
-
export function getRequestClassCooldownKey(profile) {
|
|
76
|
-
return `${profile.primaryClass}:${profile.requestedModel.toLowerCase()}`;
|
|
77
|
-
}
|
|
78
|
-
export function getModelTierCooldownKey(profile) {
|
|
79
|
-
return profile.modelTier;
|
|
80
|
-
}
|
|
81
|
-
function getQualityGuardReason(profile, provider, _model) {
|
|
82
|
-
// Only gate auto-provider fallback (no explicit provider).
|
|
83
|
-
// Configured fallback-chain entries are always allowed through —
|
|
84
|
-
// let them attempt the request and fail naturally if the provider
|
|
85
|
-
// cannot handle it.
|
|
86
|
-
if (!provider) {
|
|
87
|
-
if (profile.modelTier === "opus" ||
|
|
88
|
-
profile.requiresStrongToolFidelity ||
|
|
89
|
-
profile.isHighToolCountNonStream) {
|
|
90
|
-
return "auto-provider fallback is disabled for requests that require contract preservation";
|
|
91
|
-
}
|
|
92
|
-
return null;
|
|
93
|
-
}
|
|
94
|
-
return null;
|
|
95
|
-
}
|
|
96
|
-
export function evaluateFallbackEligibility(profile, candidate) {
|
|
97
|
-
const policyBlockReason = getQualityGuardReason(profile, candidate.provider, candidate.model);
|
|
98
|
-
if (policyBlockReason) {
|
|
99
|
-
return {
|
|
100
|
-
provider: candidate.provider,
|
|
101
|
-
model: candidate.model,
|
|
102
|
-
eligible: false,
|
|
103
|
-
reason: policyBlockReason,
|
|
104
|
-
};
|
|
105
|
-
}
|
|
106
|
-
return {
|
|
107
|
-
provider: candidate.provider,
|
|
108
|
-
model: candidate.model,
|
|
109
|
-
eligible: true,
|
|
110
|
-
reason: "eligible",
|
|
111
|
-
};
|
|
112
|
-
}
|
|
113
|
-
export function buildProxyTranslationPlan(primary, fallbackChain, requestedModel, parsed) {
|
|
114
|
-
const profile = classifyClaudeProxyRequest(requestedModel, parsed);
|
|
15
|
+
/**
|
|
16
|
+
* Build a translation plan for a Claude-compatible proxy request.
|
|
17
|
+
* The plan lists the primary provider followed by eligible fallback targets.
|
|
18
|
+
* All configured fallback entries are always eligible — no contract-based gating.
|
|
19
|
+
* When no fallback chain is configured, an "auto-provider" entry is appended.
|
|
20
|
+
*/
|
|
21
|
+
export function buildProxyTranslationPlan(primary, fallbackChain, requestedModel, _parsed) {
|
|
115
22
|
const attempts = [
|
|
116
23
|
{
|
|
117
24
|
provider: primary.provider,
|
|
@@ -119,142 +26,79 @@ export function buildProxyTranslationPlan(primary, fallbackChain, requestedModel
|
|
|
119
26
|
label: `${primary.provider}/${primary.model ?? "unknown"}`,
|
|
120
27
|
},
|
|
121
28
|
];
|
|
122
|
-
const skipped = [];
|
|
123
29
|
for (const fallback of fallbackChain) {
|
|
124
30
|
if (fallback.provider === primary.provider &&
|
|
125
31
|
fallback.model === primary.model) {
|
|
126
32
|
continue;
|
|
127
33
|
}
|
|
128
|
-
const decision = evaluateFallbackEligibility(profile, fallback);
|
|
129
|
-
if (!decision.eligible) {
|
|
130
|
-
skipped.push(decision);
|
|
131
|
-
continue;
|
|
132
|
-
}
|
|
133
34
|
attempts.push({
|
|
134
35
|
provider: fallback.provider,
|
|
135
36
|
model: fallback.model,
|
|
136
37
|
label: `${fallback.provider}/${fallback.model}`,
|
|
137
38
|
});
|
|
138
39
|
}
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
}
|
|
144
|
-
else {
|
|
145
|
-
skipped.push(autoDecision);
|
|
146
|
-
}
|
|
40
|
+
// Append auto-provider when no configured fallback chain exists,
|
|
41
|
+
// or when all configured entries were deduped (same as primary).
|
|
42
|
+
if (fallbackChain.length === 0 || attempts.length === 1) {
|
|
43
|
+
attempts.push({ label: "auto-provider" });
|
|
147
44
|
}
|
|
148
45
|
return {
|
|
149
|
-
|
|
46
|
+
requestedModel,
|
|
47
|
+
modelTier: inferClaudeProxyModelTier(requestedModel),
|
|
150
48
|
attempts,
|
|
151
|
-
skipped,
|
|
49
|
+
skipped: [],
|
|
152
50
|
};
|
|
153
51
|
}
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
return
|
|
164
|
-
})
|
|
165
|
-
.join("; ");
|
|
166
|
-
return `Fallback policy preserved the requested ${plan.profile.primaryClass} contract by skipping ineligible targets. ${summary}`;
|
|
167
|
-
}
|
|
168
|
-
export function getActiveCooldownScope(state, profile, now = Date.now()) {
|
|
169
|
-
let longest = null;
|
|
170
|
-
const requestClassKey = getRequestClassCooldownKey(profile);
|
|
171
|
-
const requestClassUntil = state.requestClassCooldowns?.[requestClassKey] ?? undefined;
|
|
172
|
-
if (requestClassUntil && requestClassUntil > now) {
|
|
173
|
-
longest = {
|
|
174
|
-
scope: "request_class",
|
|
175
|
-
key: requestClassKey,
|
|
176
|
-
until: requestClassUntil,
|
|
177
|
-
};
|
|
178
|
-
}
|
|
179
|
-
const modelTierKey = getModelTierCooldownKey(profile);
|
|
180
|
-
const modelTierUntil = state.modelTierCooldowns?.[modelTierKey] ?? undefined;
|
|
181
|
-
if (modelTierUntil &&
|
|
182
|
-
modelTierUntil > now &&
|
|
183
|
-
modelTierUntil > (longest?.until ?? 0)) {
|
|
184
|
-
longest = {
|
|
185
|
-
scope: "model_tier",
|
|
186
|
-
key: modelTierKey,
|
|
187
|
-
until: modelTierUntil,
|
|
188
|
-
};
|
|
189
|
-
}
|
|
190
|
-
if (state.coolingUntil &&
|
|
191
|
-
state.coolingUntil > now &&
|
|
192
|
-
state.coolingUntil > (longest?.until ?? 0)) {
|
|
193
|
-
longest = {
|
|
194
|
-
scope: "generic",
|
|
195
|
-
key: "generic",
|
|
196
|
-
until: state.coolingUntil,
|
|
197
|
-
};
|
|
52
|
+
// ---------------------------------------------------------------------------
|
|
53
|
+
// Simple per-account cooldown
|
|
54
|
+
// ---------------------------------------------------------------------------
|
|
55
|
+
/**
|
|
56
|
+
* Check whether an account is currently cooling down.
|
|
57
|
+
* Returns the cooldown timestamp if active, null otherwise.
|
|
58
|
+
*/
|
|
59
|
+
export function getAccountCooldownUntil(state, now = Date.now()) {
|
|
60
|
+
if (state.coolingUntil && state.coolingUntil > now) {
|
|
61
|
+
return state.coolingUntil;
|
|
198
62
|
}
|
|
199
|
-
return
|
|
63
|
+
return null;
|
|
200
64
|
}
|
|
201
|
-
|
|
65
|
+
/**
|
|
66
|
+
* Partition accounts into eligible (no cooldown) and skipped (cooling down).
|
|
67
|
+
*/
|
|
68
|
+
export function partitionAccountsByCooldown(accounts, getState, now = Date.now()) {
|
|
202
69
|
const eligible = [];
|
|
203
70
|
const skipped = [];
|
|
204
71
|
for (const account of accounts) {
|
|
205
|
-
const
|
|
206
|
-
|
|
207
|
-
|
|
72
|
+
const state = getState(account);
|
|
73
|
+
const until = getAccountCooldownUntil(state, now);
|
|
74
|
+
if (until !== null) {
|
|
75
|
+
skipped.push({
|
|
76
|
+
account,
|
|
77
|
+
cooldown: { until, backoffLevel: state.backoffLevel },
|
|
78
|
+
});
|
|
208
79
|
continue;
|
|
209
80
|
}
|
|
210
81
|
eligible.push(account);
|
|
211
82
|
}
|
|
212
|
-
return {
|
|
213
|
-
eligible,
|
|
214
|
-
skipped,
|
|
215
|
-
};
|
|
83
|
+
return { eligible, skipped };
|
|
216
84
|
}
|
|
217
|
-
|
|
85
|
+
/**
|
|
86
|
+
* Apply a rate-limit cooldown to an account.
|
|
87
|
+
* Uses simple exponential backoff with a floor and cap.
|
|
88
|
+
*/
|
|
89
|
+
export function applyRateLimitCooldown(args) {
|
|
218
90
|
const now = args.now ?? Date.now();
|
|
219
|
-
const
|
|
220
|
-
const
|
|
221
|
-
|
|
222
|
-
const mtBackoffLevels = args.state.modelTierBackoffLevels ?? {};
|
|
223
|
-
const scopedBackoffLevel = Math.max(rcBackoffLevels[requestClassKey] ?? 0, mtBackoffLevels[modelTierKey] ?? 0);
|
|
224
|
-
// High-tool-count-non-stream gets its own (lower) floor so that requests
|
|
225
|
-
// recover faster once proper OAuth betas are forwarded. Check it first
|
|
226
|
-
// because every >=24-tool request also satisfies requiresStrongToolFidelity
|
|
227
|
-
// (threshold 8), which would otherwise shadow this branch.
|
|
228
|
-
const floorMs = args.profile.isHighToolCountNonStream
|
|
229
|
-
? HIGH_TOOL_COUNT_COOLDOWN_FLOOR_MS
|
|
230
|
-
: args.profile.modelTier === "opus" ||
|
|
231
|
-
args.profile.requiresStrongToolFidelity
|
|
232
|
-
? HIGH_FIDELITY_COOLDOWN_FLOOR_MS
|
|
233
|
-
: DEFAULT_COOLDOWN_FLOOR_MS;
|
|
234
|
-
const baseCooldownMs = Math.max(args.retryAfterMs ?? 0, floorMs);
|
|
235
|
-
const backoffMs = Math.min(baseCooldownMs * 2 ** scopedBackoffLevel, args.capMs);
|
|
236
|
-
const until = now + backoffMs;
|
|
237
|
-
args.state.requestClassCooldowns = {
|
|
238
|
-
...(args.state.requestClassCooldowns ?? {}),
|
|
239
|
-
[requestClassKey]: Math.max(args.state.requestClassCooldowns?.[requestClassKey] ?? 0, until),
|
|
240
|
-
};
|
|
241
|
-
args.state.modelTierCooldowns = {
|
|
242
|
-
...(args.state.modelTierCooldowns ?? {}),
|
|
243
|
-
[modelTierKey]: Math.max(args.state.modelTierCooldowns?.[modelTierKey] ?? 0, until),
|
|
244
|
-
};
|
|
245
|
-
args.state.requestClassBackoffLevels = {
|
|
246
|
-
...rcBackoffLevels,
|
|
247
|
-
[requestClassKey]: (rcBackoffLevels[requestClassKey] ?? 0) + 1,
|
|
248
|
-
};
|
|
249
|
-
args.state.modelTierBackoffLevels = {
|
|
250
|
-
...mtBackoffLevels,
|
|
251
|
-
[modelTierKey]: (mtBackoffLevels[modelTierKey] ?? 0) + 1,
|
|
252
|
-
};
|
|
91
|
+
const baseCooldownMs = Math.max(args.retryAfterMs ?? 0, DEFAULT_COOLDOWN_FLOOR_MS);
|
|
92
|
+
const backoffMs = Math.min(baseCooldownMs * 2 ** args.state.backoffLevel, args.capMs);
|
|
93
|
+
args.state.coolingUntil = now + backoffMs;
|
|
253
94
|
args.state.backoffLevel += 1;
|
|
254
|
-
return {
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
95
|
+
return { backoffMs };
|
|
96
|
+
}
|
|
97
|
+
/**
|
|
98
|
+
* Clear cooldown state for an account after a successful request.
|
|
99
|
+
*/
|
|
100
|
+
export function clearAccountCooldown(state) {
|
|
101
|
+
state.coolingUntil = undefined;
|
|
102
|
+
state.backoffLevel = 0;
|
|
259
103
|
}
|
|
260
104
|
//# sourceMappingURL=routingPolicy.js.map
|