@khanglvm/llm-router 1.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.test-suite.example +19 -0
- package/README.md +230 -0
- package/package.json +26 -0
- package/src/cli/router-module.js +3987 -0
- package/src/cli-entry.js +144 -0
- package/src/index.js +18 -0
- package/src/node/config-store.js +74 -0
- package/src/node/config-workflows.js +245 -0
- package/src/node/instance-state.js +206 -0
- package/src/node/local-server.js +294 -0
- package/src/node/provider-probe.js +905 -0
- package/src/node/start-command.js +498 -0
- package/src/node/startup-manager.js +369 -0
- package/src/runtime/config.js +655 -0
- package/src/runtime/handler/auth.js +32 -0
- package/src/runtime/handler/config-loading.js +45 -0
- package/src/runtime/handler/fallback.js +424 -0
- package/src/runtime/handler/http.js +71 -0
- package/src/runtime/handler/network-guards.js +137 -0
- package/src/runtime/handler/provider-call.js +245 -0
- package/src/runtime/handler/provider-translation.js +232 -0
- package/src/runtime/handler/request.js +194 -0
- package/src/runtime/handler/utils.js +41 -0
- package/src/runtime/handler.js +301 -0
- package/src/translator/formats.js +7 -0
- package/src/translator/index.js +73 -0
- package/src/translator/request/claude-to-openai.js +228 -0
- package/src/translator/request/openai-to-claude.js +241 -0
- package/src/translator/response/claude-to-openai.js +204 -0
- package/src/translator/response/openai-to-claude.js +197 -0
- package/wrangler.toml +20 -0
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
import {
|
|
2
|
+
listConfiguredModels,
|
|
3
|
+
normalizeRuntimeConfig
|
|
4
|
+
} from "../config.js";
|
|
5
|
+
|
|
6
|
+
const modelListCache = new WeakMap();
|
|
7
|
+
|
|
8
|
+
function looksNormalizedConfig(config) {
|
|
9
|
+
return Boolean(
|
|
10
|
+
config &&
|
|
11
|
+
typeof config === "object" &&
|
|
12
|
+
Array.isArray(config.providers) &&
|
|
13
|
+
Number.isFinite(config.version)
|
|
14
|
+
);
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
export async function loadRuntimeConfig(getConfig, env) {
|
|
18
|
+
const raw = await getConfig(env);
|
|
19
|
+
return looksNormalizedConfig(raw) ? raw : normalizeRuntimeConfig(raw);
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
export function getCachedModelList(config, endpointFormat) {
|
|
23
|
+
if (!config || typeof config !== "object") {
|
|
24
|
+
return listConfiguredModels(config, {
|
|
25
|
+
endpointFormat
|
|
26
|
+
});
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
const cacheKey = endpointFormat || "__auto__";
|
|
30
|
+
let byFormat = modelListCache.get(config);
|
|
31
|
+
if (!byFormat) {
|
|
32
|
+
byFormat = new Map();
|
|
33
|
+
modelListCache.set(config, byFormat);
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
if (byFormat.has(cacheKey)) {
|
|
37
|
+
return byFormat.get(cacheKey);
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
const rows = listConfiguredModels(config, {
|
|
41
|
+
endpointFormat
|
|
42
|
+
});
|
|
43
|
+
byFormat.set(cacheKey, rows);
|
|
44
|
+
return rows;
|
|
45
|
+
}
|
|
@@ -0,0 +1,424 @@
|
|
|
1
|
+
import {
|
|
2
|
+
parseJsonSafely,
|
|
3
|
+
parseRetryAfterMs,
|
|
4
|
+
toBoolean,
|
|
5
|
+
toNonNegativeInteger
|
|
6
|
+
} from "./utils.js";
|
|
7
|
+
|
|
8
|
+
const DEFAULT_FALLBACK_CIRCUIT_FAILURES = 2;
|
|
9
|
+
const DEFAULT_FALLBACK_CIRCUIT_COOLDOWN_MS = 30_000;
|
|
10
|
+
const DEFAULT_ORIGIN_RETRY_ATTEMPTS = 3;
|
|
11
|
+
const DEFAULT_ORIGIN_RETRY_BASE_DELAY_MS = 250;
|
|
12
|
+
const DEFAULT_ORIGIN_RETRY_MAX_DELAY_MS = 3_000;
|
|
13
|
+
const DEFAULT_ORIGIN_FALLBACK_COOLDOWN_MS = 45_000;
|
|
14
|
+
const DEFAULT_ORIGIN_RATE_LIMIT_COOLDOWN_MS = 30_000;
|
|
15
|
+
const DEFAULT_ORIGIN_BILLING_COOLDOWN_MS = 15 * 60_000;
|
|
16
|
+
const DEFAULT_ORIGIN_AUTH_COOLDOWN_MS = 10 * 60_000;
|
|
17
|
+
const DEFAULT_ORIGIN_POLICY_COOLDOWN_MS = 2 * 60_000;
|
|
18
|
+
const ERROR_TEXT_SCAN_LIMIT = 4_096;
|
|
19
|
+
const BILLING_HINTS = [
|
|
20
|
+
"insufficient_quota",
|
|
21
|
+
"insufficient quota",
|
|
22
|
+
"insufficient balance",
|
|
23
|
+
"insufficient credits",
|
|
24
|
+
"not enough credits",
|
|
25
|
+
"out of credits",
|
|
26
|
+
"payment required",
|
|
27
|
+
"billing hard limit",
|
|
28
|
+
"quota exceeded"
|
|
29
|
+
];
|
|
30
|
+
const AUTH_HINTS = [
|
|
31
|
+
"invalid api key",
|
|
32
|
+
"incorrect api key",
|
|
33
|
+
"api key not valid",
|
|
34
|
+
"authentication",
|
|
35
|
+
"unauthorized",
|
|
36
|
+
"permission denied",
|
|
37
|
+
"forbidden"
|
|
38
|
+
];
|
|
39
|
+
const POLICY_HINTS = [
|
|
40
|
+
"moderation",
|
|
41
|
+
"policy_violation",
|
|
42
|
+
"content policy",
|
|
43
|
+
"safety",
|
|
44
|
+
"unsafe",
|
|
45
|
+
"flagged"
|
|
46
|
+
];
|
|
47
|
+
const fallbackCircuitState = new Map();
|
|
48
|
+
|
|
49
|
+
export function shouldRetryStatus(status) {
|
|
50
|
+
return status === 408 || status === 409 || status === 429 || status >= 500;
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
export function resolveRetryPolicy(env = {}) {
|
|
54
|
+
const originRetryAttemptsRaw = toNonNegativeInteger(
|
|
55
|
+
env?.LLM_ROUTER_ORIGIN_RETRY_ATTEMPTS,
|
|
56
|
+
DEFAULT_ORIGIN_RETRY_ATTEMPTS
|
|
57
|
+
);
|
|
58
|
+
const originRetryAttempts = Math.min(Math.max(originRetryAttemptsRaw, 1), 10);
|
|
59
|
+
|
|
60
|
+
const originRetryBaseDelayMs = Math.max(
|
|
61
|
+
0,
|
|
62
|
+
toNonNegativeInteger(
|
|
63
|
+
env?.LLM_ROUTER_ORIGIN_RETRY_BASE_DELAY_MS,
|
|
64
|
+
DEFAULT_ORIGIN_RETRY_BASE_DELAY_MS
|
|
65
|
+
)
|
|
66
|
+
);
|
|
67
|
+
const originRetryMaxDelayMs = Math.max(
|
|
68
|
+
originRetryBaseDelayMs,
|
|
69
|
+
toNonNegativeInteger(
|
|
70
|
+
env?.LLM_ROUTER_ORIGIN_RETRY_MAX_DELAY_MS,
|
|
71
|
+
DEFAULT_ORIGIN_RETRY_MAX_DELAY_MS
|
|
72
|
+
)
|
|
73
|
+
);
|
|
74
|
+
|
|
75
|
+
return {
|
|
76
|
+
originRetryAttempts,
|
|
77
|
+
originRetryBaseDelayMs,
|
|
78
|
+
originRetryMaxDelayMs,
|
|
79
|
+
originFallbackCooldownMs: toNonNegativeInteger(
|
|
80
|
+
env?.LLM_ROUTER_ORIGIN_FALLBACK_COOLDOWN_MS,
|
|
81
|
+
DEFAULT_ORIGIN_FALLBACK_COOLDOWN_MS
|
|
82
|
+
),
|
|
83
|
+
originRateLimitCooldownMs: toNonNegativeInteger(
|
|
84
|
+
env?.LLM_ROUTER_ORIGIN_RATE_LIMIT_COOLDOWN_MS,
|
|
85
|
+
DEFAULT_ORIGIN_RATE_LIMIT_COOLDOWN_MS
|
|
86
|
+
),
|
|
87
|
+
originBillingCooldownMs: toNonNegativeInteger(
|
|
88
|
+
env?.LLM_ROUTER_ORIGIN_BILLING_COOLDOWN_MS,
|
|
89
|
+
DEFAULT_ORIGIN_BILLING_COOLDOWN_MS
|
|
90
|
+
),
|
|
91
|
+
originAuthCooldownMs: toNonNegativeInteger(
|
|
92
|
+
env?.LLM_ROUTER_ORIGIN_AUTH_COOLDOWN_MS,
|
|
93
|
+
DEFAULT_ORIGIN_AUTH_COOLDOWN_MS
|
|
94
|
+
),
|
|
95
|
+
originPolicyCooldownMs: toNonNegativeInteger(
|
|
96
|
+
env?.LLM_ROUTER_ORIGIN_POLICY_COOLDOWN_MS,
|
|
97
|
+
DEFAULT_ORIGIN_POLICY_COOLDOWN_MS
|
|
98
|
+
),
|
|
99
|
+
allowPolicyFallback: toBoolean(env?.LLM_ROUTER_ALLOW_POLICY_FALLBACK, false)
|
|
100
|
+
};
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
export function computeRetryDelayMs(attemptNumber, policy) {
|
|
104
|
+
const exponent = Math.max(0, attemptNumber - 1);
|
|
105
|
+
const exponential = policy.originRetryBaseDelayMs * (2 ** exponent);
|
|
106
|
+
const capped = Math.min(exponential, policy.originRetryMaxDelayMs);
|
|
107
|
+
const jitterMultiplier = 0.5 + (Math.random() * 0.5);
|
|
108
|
+
return Math.max(0, Math.round(capped * jitterMultiplier));
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
function hasAnyHint(text, hints) {
|
|
112
|
+
if (!text) return false;
|
|
113
|
+
for (const hint of hints) {
|
|
114
|
+
if (text.includes(hint)) return true;
|
|
115
|
+
}
|
|
116
|
+
return false;
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
export function resolveFallbackCircuitPolicy(env = {}) {
|
|
120
|
+
return {
|
|
121
|
+
failureThreshold: toNonNegativeInteger(
|
|
122
|
+
env?.LLM_ROUTER_FALLBACK_CIRCUIT_FAILURES,
|
|
123
|
+
DEFAULT_FALLBACK_CIRCUIT_FAILURES
|
|
124
|
+
),
|
|
125
|
+
cooldownMs: toNonNegativeInteger(
|
|
126
|
+
env?.LLM_ROUTER_FALLBACK_CIRCUIT_COOLDOWN_MS,
|
|
127
|
+
DEFAULT_FALLBACK_CIRCUIT_COOLDOWN_MS
|
|
128
|
+
)
|
|
129
|
+
};
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
function isFallbackCircuitEnabled(policy) {
|
|
133
|
+
return Number.isFinite(policy?.failureThreshold) &&
|
|
134
|
+
Number.isFinite(policy?.cooldownMs) &&
|
|
135
|
+
policy.failureThreshold > 0 &&
|
|
136
|
+
policy.cooldownMs > 0;
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
function candidateCircuitKey(candidate) {
|
|
140
|
+
const model = candidate?.requestModelId || `${candidate?.providerId || "unknown"}/${candidate?.modelId || "unknown"}`;
|
|
141
|
+
const format = candidate?.targetFormat || "unknown";
|
|
142
|
+
return `${model}@${format}`;
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
function getCandidateCircuitSnapshot(candidate, now = Date.now()) {
|
|
146
|
+
const key = candidateCircuitKey(candidate);
|
|
147
|
+
const state = fallbackCircuitState.get(key);
|
|
148
|
+
if (!state) {
|
|
149
|
+
return { key, isOpen: false, openUntil: 0 };
|
|
150
|
+
}
|
|
151
|
+
const openUntil = Number.isFinite(state.openUntil) ? Number(state.openUntil) : 0;
|
|
152
|
+
return {
|
|
153
|
+
key,
|
|
154
|
+
isOpen: openUntil > now,
|
|
155
|
+
openUntil
|
|
156
|
+
};
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
export function orderCandidatesByCircuit(candidates, policy, now = Date.now()) {
|
|
160
|
+
const ranked = (candidates || []).map((candidate, originalIndex) => ({
|
|
161
|
+
candidate,
|
|
162
|
+
originalIndex,
|
|
163
|
+
circuit: getCandidateCircuitSnapshot(candidate, now)
|
|
164
|
+
}));
|
|
165
|
+
|
|
166
|
+
if (!isFallbackCircuitEnabled(policy) || ranked.length <= 1) {
|
|
167
|
+
return ranked;
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
ranked.sort((left, right) => {
|
|
171
|
+
if (left.circuit.isOpen !== right.circuit.isOpen) {
|
|
172
|
+
return left.circuit.isOpen ? 1 : -1;
|
|
173
|
+
}
|
|
174
|
+
if (left.circuit.isOpen && right.circuit.isOpen && left.circuit.openUntil !== right.circuit.openUntil) {
|
|
175
|
+
return left.circuit.openUntil - right.circuit.openUntil;
|
|
176
|
+
}
|
|
177
|
+
return left.originalIndex - right.originalIndex;
|
|
178
|
+
});
|
|
179
|
+
return ranked;
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
export function markCandidateSuccess(candidate) {
|
|
183
|
+
fallbackCircuitState.delete(candidateCircuitKey(candidate));
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
export function markCandidateFailure(candidate, result, policy, options = {}) {
|
|
187
|
+
const now = options?.now ?? Date.now();
|
|
188
|
+
const trackFailure = options?.trackFailure !== false;
|
|
189
|
+
const key = candidateCircuitKey(candidate);
|
|
190
|
+
if (!trackFailure) {
|
|
191
|
+
fallbackCircuitState.delete(key);
|
|
192
|
+
return;
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
if (!isFallbackCircuitEnabled(policy)) {
|
|
196
|
+
fallbackCircuitState.delete(key);
|
|
197
|
+
return;
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
if (!result?.retryable) {
|
|
201
|
+
fallbackCircuitState.delete(key);
|
|
202
|
+
return;
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
const prior = fallbackCircuitState.get(key);
|
|
206
|
+
const resetAfterCooldown = prior && Number.isFinite(prior.openUntil) && prior.openUntil <= now;
|
|
207
|
+
const previousFailures = resetAfterCooldown ? 0 : (prior?.consecutiveRetryableFailures || 0);
|
|
208
|
+
const consecutiveRetryableFailures = previousFailures + 1;
|
|
209
|
+
|
|
210
|
+
fallbackCircuitState.set(key, {
|
|
211
|
+
consecutiveRetryableFailures,
|
|
212
|
+
openUntil: consecutiveRetryableFailures >= policy.failureThreshold
|
|
213
|
+
? now + policy.cooldownMs
|
|
214
|
+
: 0,
|
|
215
|
+
lastFailureAt: now,
|
|
216
|
+
lastFailureStatus: result.status
|
|
217
|
+
});
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
export function setCandidateCooldown(candidate, cooldownMs, policy, status, now = Date.now()) {
|
|
221
|
+
if (!isFallbackCircuitEnabled(policy)) return;
|
|
222
|
+
if (!Number.isFinite(cooldownMs) || cooldownMs <= 0) return;
|
|
223
|
+
|
|
224
|
+
const key = candidateCircuitKey(candidate);
|
|
225
|
+
const prior = fallbackCircuitState.get(key) || {};
|
|
226
|
+
const priorOpenUntil = Number.isFinite(prior.openUntil) ? Number(prior.openUntil) : 0;
|
|
227
|
+
const openUntil = Math.max(priorOpenUntil, now + cooldownMs);
|
|
228
|
+
|
|
229
|
+
fallbackCircuitState.set(key, {
|
|
230
|
+
consecutiveRetryableFailures: prior?.consecutiveRetryableFailures || 0,
|
|
231
|
+
openUntil,
|
|
232
|
+
lastFailureAt: now,
|
|
233
|
+
lastFailureStatus: status
|
|
234
|
+
});
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
async function readProviderErrorHint(result) {
|
|
238
|
+
if (!(result?.upstreamResponse instanceof Response)) return "";
|
|
239
|
+
try {
|
|
240
|
+
const raw = await result.upstreamResponse.clone().text();
|
|
241
|
+
if (!raw) return "";
|
|
242
|
+
const limitedRaw = raw.slice(0, ERROR_TEXT_SCAN_LIMIT);
|
|
243
|
+
const parsed = parseJsonSafely(limitedRaw);
|
|
244
|
+
const fragments = [
|
|
245
|
+
parsed?.error?.code,
|
|
246
|
+
parsed?.error?.type,
|
|
247
|
+
parsed?.error?.message,
|
|
248
|
+
parsed?.error,
|
|
249
|
+
parsed?.code,
|
|
250
|
+
parsed?.type,
|
|
251
|
+
parsed?.message,
|
|
252
|
+
limitedRaw
|
|
253
|
+
];
|
|
254
|
+
return fragments
|
|
255
|
+
.filter((entry) => entry !== undefined && entry !== null)
|
|
256
|
+
.map((entry) => String(entry).toLowerCase())
|
|
257
|
+
.join(" ");
|
|
258
|
+
} catch {
|
|
259
|
+
return "";
|
|
260
|
+
}
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
export async function classifyFailureResult(result, retryPolicy) {
|
|
264
|
+
const status = Number.isFinite(result?.status) ? Number(result.status) : 0;
|
|
265
|
+
const retryAfterMs = parseRetryAfterMs(result?.upstreamResponse?.headers?.get("retry-after"));
|
|
266
|
+
|
|
267
|
+
if (result?.errorKind === "configuration_error") {
|
|
268
|
+
return {
|
|
269
|
+
category: "configuration_error",
|
|
270
|
+
retryable: false,
|
|
271
|
+
retryOrigin: false,
|
|
272
|
+
allowFallback: true,
|
|
273
|
+
originCooldownMs: retryPolicy.originFallbackCooldownMs
|
|
274
|
+
};
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
if (result?.errorKind === "not_supported_error") {
|
|
278
|
+
return {
|
|
279
|
+
category: "not_supported_error",
|
|
280
|
+
retryable: false,
|
|
281
|
+
retryOrigin: false,
|
|
282
|
+
allowFallback: true,
|
|
283
|
+
originCooldownMs: 0
|
|
284
|
+
};
|
|
285
|
+
}
|
|
286
|
+
|
|
287
|
+
if (result?.errorKind === "network_error") {
|
|
288
|
+
return {
|
|
289
|
+
category: "network_error",
|
|
290
|
+
retryable: true,
|
|
291
|
+
retryOrigin: true,
|
|
292
|
+
allowFallback: true,
|
|
293
|
+
originCooldownMs: 0
|
|
294
|
+
};
|
|
295
|
+
}
|
|
296
|
+
|
|
297
|
+
if (status === 429) {
|
|
298
|
+
const rateLimitCooldown = retryAfterMs > 0 ? retryAfterMs : retryPolicy.originRateLimitCooldownMs;
|
|
299
|
+
return {
|
|
300
|
+
category: "rate_limited",
|
|
301
|
+
retryable: true,
|
|
302
|
+
retryOrigin: false,
|
|
303
|
+
allowFallback: true,
|
|
304
|
+
originCooldownMs: rateLimitCooldown
|
|
305
|
+
};
|
|
306
|
+
}
|
|
307
|
+
|
|
308
|
+
if (status === 402) {
|
|
309
|
+
return {
|
|
310
|
+
category: "billing_exhausted",
|
|
311
|
+
retryable: false,
|
|
312
|
+
retryOrigin: false,
|
|
313
|
+
allowFallback: true,
|
|
314
|
+
originCooldownMs: retryPolicy.originBillingCooldownMs
|
|
315
|
+
};
|
|
316
|
+
}
|
|
317
|
+
|
|
318
|
+
if (status === 401) {
|
|
319
|
+
return {
|
|
320
|
+
category: "auth_failed",
|
|
321
|
+
retryable: false,
|
|
322
|
+
retryOrigin: false,
|
|
323
|
+
allowFallback: true,
|
|
324
|
+
originCooldownMs: retryPolicy.originAuthCooldownMs
|
|
325
|
+
};
|
|
326
|
+
}
|
|
327
|
+
|
|
328
|
+
if (status === 403) {
|
|
329
|
+
const hintText = await readProviderErrorHint(result);
|
|
330
|
+
if (hasAnyHint(hintText, BILLING_HINTS)) {
|
|
331
|
+
return {
|
|
332
|
+
category: "billing_exhausted",
|
|
333
|
+
retryable: false,
|
|
334
|
+
retryOrigin: false,
|
|
335
|
+
allowFallback: true,
|
|
336
|
+
originCooldownMs: retryPolicy.originBillingCooldownMs
|
|
337
|
+
};
|
|
338
|
+
}
|
|
339
|
+
|
|
340
|
+
if (hasAnyHint(hintText, POLICY_HINTS)) {
|
|
341
|
+
return {
|
|
342
|
+
category: "policy_blocked",
|
|
343
|
+
retryable: false,
|
|
344
|
+
retryOrigin: false,
|
|
345
|
+
allowFallback: retryPolicy.allowPolicyFallback,
|
|
346
|
+
originCooldownMs: retryPolicy.originPolicyCooldownMs
|
|
347
|
+
};
|
|
348
|
+
}
|
|
349
|
+
|
|
350
|
+
if (hasAnyHint(hintText, AUTH_HINTS)) {
|
|
351
|
+
return {
|
|
352
|
+
category: "auth_failed",
|
|
353
|
+
retryable: false,
|
|
354
|
+
retryOrigin: false,
|
|
355
|
+
allowFallback: true,
|
|
356
|
+
originCooldownMs: retryPolicy.originAuthCooldownMs
|
|
357
|
+
};
|
|
358
|
+
}
|
|
359
|
+
|
|
360
|
+
return {
|
|
361
|
+
category: "forbidden",
|
|
362
|
+
retryable: false,
|
|
363
|
+
retryOrigin: false,
|
|
364
|
+
allowFallback: true,
|
|
365
|
+
originCooldownMs: retryPolicy.originAuthCooldownMs
|
|
366
|
+
};
|
|
367
|
+
}
|
|
368
|
+
|
|
369
|
+
if (status === 404 || status === 410) {
|
|
370
|
+
return {
|
|
371
|
+
category: "not_found",
|
|
372
|
+
retryable: false,
|
|
373
|
+
retryOrigin: false,
|
|
374
|
+
allowFallback: true,
|
|
375
|
+
originCooldownMs: retryPolicy.originFallbackCooldownMs
|
|
376
|
+
};
|
|
377
|
+
}
|
|
378
|
+
|
|
379
|
+
if (status === 408 || status === 409 || status >= 500) {
|
|
380
|
+
return {
|
|
381
|
+
category: "temporary_error",
|
|
382
|
+
retryable: true,
|
|
383
|
+
retryOrigin: true,
|
|
384
|
+
allowFallback: true,
|
|
385
|
+
originCooldownMs: retryAfterMs
|
|
386
|
+
};
|
|
387
|
+
}
|
|
388
|
+
|
|
389
|
+
if ([400, 413, 422].includes(status)) {
|
|
390
|
+
return {
|
|
391
|
+
category: "invalid_request",
|
|
392
|
+
retryable: false,
|
|
393
|
+
retryOrigin: false,
|
|
394
|
+
allowFallback: false,
|
|
395
|
+
originCooldownMs: 0
|
|
396
|
+
};
|
|
397
|
+
}
|
|
398
|
+
|
|
399
|
+
if (status >= 400 && status < 500) {
|
|
400
|
+
return {
|
|
401
|
+
category: "client_error",
|
|
402
|
+
retryable: false,
|
|
403
|
+
retryOrigin: false,
|
|
404
|
+
allowFallback: false,
|
|
405
|
+
originCooldownMs: 0
|
|
406
|
+
};
|
|
407
|
+
}
|
|
408
|
+
|
|
409
|
+
return {
|
|
410
|
+
category: "unknown_error",
|
|
411
|
+
retryable: false,
|
|
412
|
+
retryOrigin: false,
|
|
413
|
+
allowFallback: true,
|
|
414
|
+
originCooldownMs: 0
|
|
415
|
+
};
|
|
416
|
+
}
|
|
417
|
+
|
|
418
|
+
export function enrichErrorMessage(error, candidate, isFallback) {
|
|
419
|
+
const prefix = `${candidate.providerId}/${candidate.modelId}`;
|
|
420
|
+
if (isFallback) {
|
|
421
|
+
return `[fallback ${prefix}] ${error}`;
|
|
422
|
+
}
|
|
423
|
+
return `[${prefix}] ${error}`;
|
|
424
|
+
}
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
const HOP_BY_HOP_HEADERS = [
|
|
2
|
+
"connection",
|
|
3
|
+
"keep-alive",
|
|
4
|
+
"proxy-authenticate",
|
|
5
|
+
"proxy-authorization",
|
|
6
|
+
"te",
|
|
7
|
+
"trailer",
|
|
8
|
+
"transfer-encoding",
|
|
9
|
+
"upgrade"
|
|
10
|
+
];
|
|
11
|
+
|
|
12
|
+
export function withCorsHeaders(headers = {}) {
|
|
13
|
+
return { ...headers };
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
function sanitizePassthroughHeaders(headers) {
|
|
17
|
+
// Node fetch/undici transparently decodes compressed upstream responses
|
|
18
|
+
// but keeps content-encoding/content-length headers, which breaks clients
|
|
19
|
+
// that attempt to decompress the forwarded payload again.
|
|
20
|
+
headers.delete("content-encoding");
|
|
21
|
+
headers.delete("content-length");
|
|
22
|
+
for (const name of HOP_BY_HOP_HEADERS) {
|
|
23
|
+
headers.delete(name);
|
|
24
|
+
}
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
export function jsonResponse(data, status = 200) {
|
|
28
|
+
return new Response(JSON.stringify(data), {
|
|
29
|
+
status,
|
|
30
|
+
headers: withCorsHeaders({
|
|
31
|
+
"Content-Type": "application/json",
|
|
32
|
+
"Cache-Control": "no-store",
|
|
33
|
+
"X-Content-Type-Options": "nosniff"
|
|
34
|
+
})
|
|
35
|
+
});
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
export function corsResponse() {
|
|
39
|
+
return new Response(null, {
|
|
40
|
+
headers: withCorsHeaders({
|
|
41
|
+
"Access-Control-Allow-Methods": "GET, POST, OPTIONS",
|
|
42
|
+
"Access-Control-Allow-Headers": "Content-Type, Authorization, x-api-key, anthropic-version"
|
|
43
|
+
})
|
|
44
|
+
});
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
export function passthroughResponseWithCors(response, overrideHeaders = undefined) {
|
|
48
|
+
const headers = new Headers(response.headers);
|
|
49
|
+
sanitizePassthroughHeaders(headers);
|
|
50
|
+
|
|
51
|
+
if (overrideHeaders && typeof overrideHeaders === "object") {
|
|
52
|
+
for (const [name, value] of Object.entries(overrideHeaders)) {
|
|
53
|
+
if (value === undefined || value === null) continue;
|
|
54
|
+
headers.set(name, String(value));
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
return new Response(response.body, {
|
|
59
|
+
status: response.status,
|
|
60
|
+
statusText: response.statusText,
|
|
61
|
+
headers
|
|
62
|
+
});
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
export function appendVaryHeader(existingValue, entry) {
|
|
66
|
+
const value = String(existingValue || "").trim();
|
|
67
|
+
if (!value) return entry;
|
|
68
|
+
const rows = value.split(",").map((item) => item.trim()).filter(Boolean);
|
|
69
|
+
if (rows.some((item) => item.toLowerCase() === entry.toLowerCase())) return value;
|
|
70
|
+
return `${value}, ${entry}`;
|
|
71
|
+
}
|
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
import { appendVaryHeader } from "./http.js";
|
|
2
|
+
import { toBoolean } from "./utils.js";
|
|
3
|
+
|
|
4
|
+
const csvSetCache = new Map();
|
|
5
|
+
const csvLowerSetCache = new Map();
|
|
6
|
+
|
|
7
|
+
function putBoundedCache(cache, key, value, maxEntries = 64) {
|
|
8
|
+
if (cache.size >= maxEntries) {
|
|
9
|
+
const oldestKey = cache.keys().next().value;
|
|
10
|
+
if (oldestKey !== undefined) cache.delete(oldestKey);
|
|
11
|
+
}
|
|
12
|
+
cache.set(key, value);
|
|
13
|
+
return value;
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
function parseCsvSetCached(rawValue) {
|
|
17
|
+
const key = String(rawValue ?? "");
|
|
18
|
+
const cached = csvSetCache.get(key);
|
|
19
|
+
if (cached) return cached;
|
|
20
|
+
const parsed = [...new Set(
|
|
21
|
+
key
|
|
22
|
+
.split(",")
|
|
23
|
+
.map((item) => item.trim())
|
|
24
|
+
.filter(Boolean)
|
|
25
|
+
)];
|
|
26
|
+
return putBoundedCache(csvSetCache, key, parsed);
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
function parseCsvLowerSetCached(rawValue) {
|
|
30
|
+
const key = String(rawValue ?? "");
|
|
31
|
+
const cached = csvLowerSetCache.get(key);
|
|
32
|
+
if (cached) return cached;
|
|
33
|
+
const parsed = [...new Set(
|
|
34
|
+
key
|
|
35
|
+
.split(",")
|
|
36
|
+
.map((item) => normalizeIpCandidate(item))
|
|
37
|
+
.filter(Boolean)
|
|
38
|
+
)];
|
|
39
|
+
return putBoundedCache(csvLowerSetCache, key, parsed);
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
function parseAllowedOrigins(env = {}) {
|
|
43
|
+
const raw =
|
|
44
|
+
env?.LLM_ROUTER_CORS_ALLOWED_ORIGINS ??
|
|
45
|
+
env?.LLM_ROUTER_ALLOWED_ORIGINS ??
|
|
46
|
+
"";
|
|
47
|
+
return parseCsvSetCached(raw);
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
function parseAllowedIps(env = {}) {
|
|
51
|
+
const raw =
|
|
52
|
+
env?.LLM_ROUTER_ALLOWED_IPS ??
|
|
53
|
+
env?.LLM_ROUTER_IP_ALLOWLIST ??
|
|
54
|
+
"";
|
|
55
|
+
return parseCsvLowerSetCached(raw);
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
export function normalizeIpCandidate(value) {
|
|
59
|
+
let ip = String(value || "").trim().toLowerCase();
|
|
60
|
+
if (!ip) return "";
|
|
61
|
+
|
|
62
|
+
if (ip.startsWith("[") && ip.includes("]")) {
|
|
63
|
+
const end = ip.indexOf("]");
|
|
64
|
+
ip = ip.slice(1, end);
|
|
65
|
+
} else if (/^\d+\.\d+\.\d+\.\d+:\d+$/.test(ip)) {
|
|
66
|
+
// IPv4 with port suffix in forwarded headers.
|
|
67
|
+
ip = ip.replace(/:\d+$/, "");
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
if (ip.startsWith("::ffff:") && ip.includes(".")) {
|
|
71
|
+
ip = ip.slice("::ffff:".length);
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
const zoneIndex = ip.indexOf("%");
|
|
75
|
+
if (zoneIndex > -1) {
|
|
76
|
+
ip = ip.slice(0, zoneIndex);
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
return ip;
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
function getClientIp(request) {
|
|
83
|
+
const cfIp = normalizeIpCandidate(request.headers.get("cf-connecting-ip"));
|
|
84
|
+
if (cfIp) return cfIp;
|
|
85
|
+
|
|
86
|
+
const xRealIp = normalizeIpCandidate(request.headers.get("x-real-ip"));
|
|
87
|
+
if (xRealIp) return xRealIp;
|
|
88
|
+
|
|
89
|
+
const xff = String(request.headers.get("x-forwarded-for") || "").trim();
|
|
90
|
+
if (!xff) return "";
|
|
91
|
+
const first = xff.split(",")[0];
|
|
92
|
+
return normalizeIpCandidate(first);
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
export function isRequestFromAllowedIp(request, env = {}) {
|
|
96
|
+
const allowedIps = parseAllowedIps(env);
|
|
97
|
+
if (allowedIps.length === 0 || allowedIps.includes("*")) return true;
|
|
98
|
+
const clientIp = getClientIp(request);
|
|
99
|
+
if (!clientIp) return false;
|
|
100
|
+
return allowedIps.includes(clientIp);
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
export function resolveAllowedOrigin(request, env = {}) {
|
|
104
|
+
const origin = request.headers.get("origin");
|
|
105
|
+
if (!origin) return "";
|
|
106
|
+
const allowAll = toBoolean(env?.LLM_ROUTER_CORS_ALLOW_ALL, false);
|
|
107
|
+
if (allowAll) return "*";
|
|
108
|
+
const allowedOrigins = parseAllowedOrigins(env);
|
|
109
|
+
if (allowedOrigins.includes("*")) return "*";
|
|
110
|
+
return allowedOrigins.includes(origin) ? origin : "";
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
export function withRequestCors(response, request, env = {}, { isPreflight = false, allowedOrigin } = {}) {
|
|
114
|
+
const resolvedOrigin = allowedOrigin === undefined
|
|
115
|
+
? resolveAllowedOrigin(request, env)
|
|
116
|
+
: allowedOrigin;
|
|
117
|
+
const allowed = resolvedOrigin || "";
|
|
118
|
+
if (!allowed) return response;
|
|
119
|
+
|
|
120
|
+
const headers = new Headers(response.headers);
|
|
121
|
+
headers.set("Access-Control-Allow-Origin", allowed);
|
|
122
|
+
if (allowed !== "*") {
|
|
123
|
+
headers.set("Vary", appendVaryHeader(headers.get("Vary"), "Origin"));
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
if (isPreflight) {
|
|
127
|
+
headers.set("Access-Control-Allow-Methods", "GET, POST, OPTIONS");
|
|
128
|
+
headers.set("Access-Control-Allow-Headers", "Content-Type, Authorization, x-api-key, anthropic-version");
|
|
129
|
+
headers.set("Access-Control-Max-Age", "600");
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
return new Response(response.body, {
|
|
133
|
+
status: response.status,
|
|
134
|
+
statusText: response.statusText,
|
|
135
|
+
headers
|
|
136
|
+
});
|
|
137
|
+
}
|