@khanglvm/llm-router 1.0.8 → 1.0.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +12 -0
- package/README.md +7 -2
- package/package.json +1 -1
- package/src/cli/cloudflare-api.js +267 -0
- package/src/cli/router-module.js +45 -568
- package/src/cli/wrangler-toml.js +324 -0
- package/src/index.js +3 -1
- package/src/node/port-reclaim.js +224 -0
- package/src/node/start-command.js +2 -128
- package/src/runtime/handler/provider-call.js +8 -2
- package/src/runtime/handler/route-debug.js +104 -0
- package/src/runtime/handler/runtime-policy.js +161 -0
- package/src/runtime/handler.js +43 -236
- package/src/shared/timeout-signal.js +23 -0
|
@@ -15,6 +15,7 @@ import { applyCachingMapping, mergeCachingHeaders } from "./cache-mapping.js";
|
|
|
15
15
|
import { applyReasoningEffortMapping } from "./reasoning-effort.js";
|
|
16
16
|
import { resolveUpstreamTimeoutMs } from "./request.js";
|
|
17
17
|
import { parseJsonSafely } from "./utils.js";
|
|
18
|
+
import { buildTimeoutSignal } from "../../shared/timeout-signal.js";
|
|
18
19
|
|
|
19
20
|
async function toProviderError(response) {
|
|
20
21
|
const raw = await response.text();
|
|
@@ -136,15 +137,18 @@ export async function makeProviderCall({
|
|
|
136
137
|
}
|
|
137
138
|
|
|
138
139
|
let response;
|
|
140
|
+
let cleanupTimeout = () => {};
|
|
139
141
|
try {
|
|
140
142
|
const timeoutMs = resolveUpstreamTimeoutMs(env);
|
|
143
|
+
const timeoutControl = buildTimeoutSignal(timeoutMs);
|
|
144
|
+
cleanupTimeout = timeoutControl.cleanup;
|
|
141
145
|
const init = {
|
|
142
146
|
method: "POST",
|
|
143
147
|
headers,
|
|
144
148
|
body: JSON.stringify(providerBody)
|
|
145
149
|
};
|
|
146
|
-
if (
|
|
147
|
-
init.signal =
|
|
150
|
+
if (timeoutControl.signal) {
|
|
151
|
+
init.signal = timeoutControl.signal;
|
|
148
152
|
}
|
|
149
153
|
|
|
150
154
|
response = await fetch(providerUrl, {
|
|
@@ -164,6 +168,8 @@ export async function makeProviderCall({
|
|
|
164
168
|
}
|
|
165
169
|
}, 503)
|
|
166
170
|
};
|
|
171
|
+
} finally {
|
|
172
|
+
cleanupTimeout();
|
|
167
173
|
}
|
|
168
174
|
|
|
169
175
|
if (!response.ok) {
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
import { toBoolean } from "./utils.js";
|
|
2
|
+
|
|
3
|
+
const ROUTE_DEBUG_MAX_LIST_ITEMS = 8;
|
|
4
|
+
const ROUTE_DEBUG_MAX_HEADER_VALUE_LENGTH = 512;
|
|
5
|
+
|
|
6
|
+
function candidateRef(candidate) {
|
|
7
|
+
return candidate?.requestModelId ||
|
|
8
|
+
(candidate?.providerId && candidate?.modelId
|
|
9
|
+
? `${candidate.providerId}/${candidate.modelId}`
|
|
10
|
+
: candidate?.backend || "unknown/unknown");
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
function pushBounded(list, value, maxItems = ROUTE_DEBUG_MAX_LIST_ITEMS) {
|
|
14
|
+
if (!Array.isArray(list) || !value) return;
|
|
15
|
+
if (list.length >= maxItems) return;
|
|
16
|
+
list.push(value);
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
function toSafeHeaderValue(value) {
|
|
20
|
+
const text = String(value || "").replace(/[\r\n]+/g, " ").trim();
|
|
21
|
+
if (!text) return "";
|
|
22
|
+
return text.length > ROUTE_DEBUG_MAX_HEADER_VALUE_LENGTH
|
|
23
|
+
? text.slice(0, ROUTE_DEBUG_MAX_HEADER_VALUE_LENGTH)
|
|
24
|
+
: text;
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
export function isRoutingDebugEnabled(env = {}) {
|
|
28
|
+
return toBoolean(
|
|
29
|
+
env?.LLM_ROUTER_DEBUG_ROUTING,
|
|
30
|
+
toBoolean(env?.LLM_ROUTER_DEBUG, false)
|
|
31
|
+
);
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
export function buildRouteDebugState(enabled, resolved) {
|
|
35
|
+
return {
|
|
36
|
+
enabled,
|
|
37
|
+
requestedModel: resolved?.requestedModel || "smart",
|
|
38
|
+
routeType: resolved?.routeType || "direct",
|
|
39
|
+
routeRef: resolved?.routeRef || resolved?.resolvedModel || resolved?.requestedModel || "smart",
|
|
40
|
+
strategy: resolved?.routeStrategy || "ordered",
|
|
41
|
+
selectedCandidate: "",
|
|
42
|
+
skippedCandidates: [],
|
|
43
|
+
attempts: []
|
|
44
|
+
};
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
export function recordRouteSkip(debugState, candidate, reasons) {
|
|
48
|
+
if (!debugState?.enabled) return;
|
|
49
|
+
const reasonText = Array.isArray(reasons)
|
|
50
|
+
? reasons.filter(Boolean).join("+")
|
|
51
|
+
: String(reasons || "").trim();
|
|
52
|
+
pushBounded(
|
|
53
|
+
debugState.skippedCandidates,
|
|
54
|
+
`${candidateRef(candidate)}:${reasonText || "skipped"}`
|
|
55
|
+
);
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
export function recordRouteAttempt(debugState, candidate, status, classification, attempt) {
|
|
59
|
+
if (!debugState?.enabled) return;
|
|
60
|
+
const category = classification?.category || (status && status < 400 ? "ok" : "unknown");
|
|
61
|
+
pushBounded(
|
|
62
|
+
debugState.attempts,
|
|
63
|
+
`${candidateRef(candidate)}:${Number.isFinite(status) ? status : "error"}/${category}#${attempt}`
|
|
64
|
+
);
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
export function setRouteSelectedCandidate(debugState, candidate, { overwrite = false } = {}) {
|
|
68
|
+
if (!debugState?.enabled || !candidate) return;
|
|
69
|
+
if (debugState.selectedCandidate && !overwrite) return;
|
|
70
|
+
debugState.selectedCandidate = candidateRef(candidate);
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
export function withRouteDebugHeaders(response, debugState) {
|
|
74
|
+
if (!debugState?.enabled || !(response instanceof Response)) {
|
|
75
|
+
return response;
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
const headers = new Headers(response.headers);
|
|
79
|
+
headers.set("x-llm-router-requested-model", toSafeHeaderValue(debugState.requestedModel));
|
|
80
|
+
headers.set("x-llm-router-route-type", toSafeHeaderValue(debugState.routeType));
|
|
81
|
+
headers.set("x-llm-router-route-ref", toSafeHeaderValue(debugState.routeRef));
|
|
82
|
+
headers.set("x-llm-router-route-strategy", toSafeHeaderValue(debugState.strategy));
|
|
83
|
+
|
|
84
|
+
const selectedCandidate = toSafeHeaderValue(debugState.selectedCandidate);
|
|
85
|
+
if (selectedCandidate) {
|
|
86
|
+
headers.set("x-llm-router-selected-candidate", selectedCandidate);
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
const skippedCandidates = toSafeHeaderValue(debugState.skippedCandidates.join(","));
|
|
90
|
+
if (skippedCandidates) {
|
|
91
|
+
headers.set("x-llm-router-skipped-candidates", skippedCandidates);
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
const attempts = toSafeHeaderValue(debugState.attempts.join(","));
|
|
95
|
+
if (attempts) {
|
|
96
|
+
headers.set("x-llm-router-attempts", attempts);
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
return new Response(response.body, {
|
|
100
|
+
status: response.status,
|
|
101
|
+
statusText: response.statusText,
|
|
102
|
+
headers
|
|
103
|
+
});
|
|
104
|
+
}
|
|
@@ -0,0 +1,161 @@
|
|
|
1
|
+
import { normalizeStateStoreBackend } from "../state-store.js";
|
|
2
|
+
import { toBoolean, toNonNegativeInteger } from "./utils.js";
|
|
3
|
+
|
|
4
|
+
function normalizeRuntimeName(value) {
|
|
5
|
+
const runtime = String(value || "").trim().toLowerCase();
|
|
6
|
+
if (runtime === "worker" || runtime === "cloudflare-worker" || runtime === "cloudflare") {
|
|
7
|
+
return "worker";
|
|
8
|
+
}
|
|
9
|
+
return "node";
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
export function resolveRuntimeFlags(options = {}, env = {}) {
|
|
13
|
+
const runtime = normalizeRuntimeName(options.runtime);
|
|
14
|
+
const workerRuntime = runtime === "worker";
|
|
15
|
+
const workerSafeMode = workerRuntime
|
|
16
|
+
? toBoolean(env?.LLM_ROUTER_WORKER_SAFE_MODE, toBoolean(options.workerSafeMode, true))
|
|
17
|
+
: false;
|
|
18
|
+
const allowBestEffortStatefulRouting = workerRuntime
|
|
19
|
+
? toBoolean(
|
|
20
|
+
env?.LLM_ROUTER_WORKER_ALLOW_BEST_EFFORT_STATEFUL_ROUTING,
|
|
21
|
+
toBoolean(options.allowWorkerBestEffortStatefulRouting, false)
|
|
22
|
+
)
|
|
23
|
+
: false;
|
|
24
|
+
|
|
25
|
+
return {
|
|
26
|
+
runtime,
|
|
27
|
+
workerRuntime,
|
|
28
|
+
workerSafeMode,
|
|
29
|
+
allowBestEffortStatefulRouting,
|
|
30
|
+
statefulRoutingEnabled: !workerSafeMode || allowBestEffortStatefulRouting
|
|
31
|
+
};
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
export function applyRuntimeRetryPolicyGuards(retryPolicy, runtimeFlags) {
|
|
35
|
+
if (!runtimeFlags?.workerSafeMode || runtimeFlags.statefulRoutingEnabled) {
|
|
36
|
+
return retryPolicy;
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
return {
|
|
40
|
+
...retryPolicy,
|
|
41
|
+
originRetryAttempts: 1,
|
|
42
|
+
originRetryBaseDelayMs: 0,
|
|
43
|
+
originRetryMaxDelayMs: 0
|
|
44
|
+
};
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
function normalizeTimestamp(value, fallback = 0) {
|
|
48
|
+
const parsed = Number(value);
|
|
49
|
+
if (!Number.isFinite(parsed) || parsed < 0) return fallback;
|
|
50
|
+
return Math.floor(parsed);
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
function normalizeCount(value, fallback = 0) {
|
|
54
|
+
const parsed = Number(value);
|
|
55
|
+
if (!Number.isFinite(parsed) || parsed < 0) return fallback;
|
|
56
|
+
return Math.floor(parsed);
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
function isFallbackCircuitTrackingEnabled(policy) {
|
|
60
|
+
return Number.isFinite(policy?.failureThreshold) &&
|
|
61
|
+
Number.isFinite(policy?.cooldownMs) &&
|
|
62
|
+
policy.failureThreshold > 0 &&
|
|
63
|
+
policy.cooldownMs > 0;
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
function shouldTrackCandidateFailure(classification) {
|
|
67
|
+
if (!classification) return false;
|
|
68
|
+
if (classification.category === "invalid_request" || classification.category === "client_error") {
|
|
69
|
+
return false;
|
|
70
|
+
}
|
|
71
|
+
if (classification.category === "not_supported_error") {
|
|
72
|
+
return false;
|
|
73
|
+
}
|
|
74
|
+
return Boolean(classification.retryable || normalizeTimestamp(classification.originCooldownMs) > 0);
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
export async function clearCandidateRoutingState(stateStore, candidateKey) {
|
|
78
|
+
if (!stateStore || !candidateKey) return;
|
|
79
|
+
await stateStore.setCandidateState(candidateKey, null);
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
export async function applyCandidateFailureState(
|
|
83
|
+
stateStore,
|
|
84
|
+
candidateKey,
|
|
85
|
+
classification,
|
|
86
|
+
fallbackCircuitPolicy,
|
|
87
|
+
status,
|
|
88
|
+
now = Date.now()
|
|
89
|
+
) {
|
|
90
|
+
if (!stateStore || !candidateKey || !shouldTrackCandidateFailure(classification)) {
|
|
91
|
+
return;
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
const prior = await stateStore.getCandidateState(candidateKey) || {};
|
|
95
|
+
const priorCooldownUntil = normalizeTimestamp(prior.cooldownUntil);
|
|
96
|
+
const priorOpenUntil = normalizeTimestamp(prior.openUntil);
|
|
97
|
+
const priorFailures = normalizeCount(
|
|
98
|
+
prior.consecutiveRetryableFailures ?? prior.consecutiveFailures
|
|
99
|
+
);
|
|
100
|
+
|
|
101
|
+
const consecutiveRetryableFailures = classification.retryable
|
|
102
|
+
? priorFailures + 1
|
|
103
|
+
: 0;
|
|
104
|
+
|
|
105
|
+
let openUntil = priorOpenUntil > now ? priorOpenUntil : 0;
|
|
106
|
+
if (
|
|
107
|
+
classification.retryable &&
|
|
108
|
+
isFallbackCircuitTrackingEnabled(fallbackCircuitPolicy) &&
|
|
109
|
+
consecutiveRetryableFailures >= fallbackCircuitPolicy.failureThreshold
|
|
110
|
+
) {
|
|
111
|
+
openUntil = Math.max(openUntil, now + fallbackCircuitPolicy.cooldownMs);
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
const cooldownMs = normalizeTimestamp(classification.originCooldownMs);
|
|
115
|
+
const cooldownUntil = cooldownMs > 0
|
|
116
|
+
? Math.max(priorCooldownUntil, now + cooldownMs)
|
|
117
|
+
: (priorCooldownUntil > now ? priorCooldownUntil : 0);
|
|
118
|
+
|
|
119
|
+
await stateStore.setCandidateState(candidateKey, {
|
|
120
|
+
...prior,
|
|
121
|
+
cooldownUntil,
|
|
122
|
+
openUntil,
|
|
123
|
+
consecutiveRetryableFailures,
|
|
124
|
+
lastFailureAt: now,
|
|
125
|
+
lastFailureStatus: Number.isFinite(status) ? Number(status) : 0,
|
|
126
|
+
lastFailureCategory: classification.category,
|
|
127
|
+
updatedAt: now
|
|
128
|
+
});
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
export function resolveStateStoreOptions(options = {}, env = {}, runtimeFlags = {}) {
|
|
132
|
+
const baseOptions = options.stateStoreOptions && typeof options.stateStoreOptions === "object"
|
|
133
|
+
? { ...options.stateStoreOptions }
|
|
134
|
+
: {};
|
|
135
|
+
const defaultBackend = normalizeStateStoreBackend(
|
|
136
|
+
options.defaultStateStoreBackend || baseOptions.backend,
|
|
137
|
+
"memory"
|
|
138
|
+
);
|
|
139
|
+
const backend = normalizeStateStoreBackend(
|
|
140
|
+
options.stateStoreBackend || env?.LLM_ROUTER_STATE_BACKEND || baseOptions.backend,
|
|
141
|
+
defaultBackend
|
|
142
|
+
);
|
|
143
|
+
const effectiveBackend = runtimeFlags?.workerRuntime && backend === "file"
|
|
144
|
+
? "memory"
|
|
145
|
+
: backend;
|
|
146
|
+
const candidateStateTtlMs = toNonNegativeInteger(
|
|
147
|
+
env?.LLM_ROUTER_CANDIDATE_STATE_TTL_MS,
|
|
148
|
+
toNonNegativeInteger(options.stateStoreCandidateStateTtlMs, baseOptions.candidateStateTtlMs)
|
|
149
|
+
);
|
|
150
|
+
const rawFilePath = options.stateStoreFilePath || env?.LLM_ROUTER_STATE_FILE_PATH || baseOptions.filePath;
|
|
151
|
+
const filePath = typeof rawFilePath === "string" && rawFilePath.trim()
|
|
152
|
+
? rawFilePath.trim()
|
|
153
|
+
: undefined;
|
|
154
|
+
|
|
155
|
+
return {
|
|
156
|
+
...baseOptions,
|
|
157
|
+
backend: effectiveBackend,
|
|
158
|
+
...(candidateStateTtlMs !== undefined ? { candidateStateTtlMs } : {}),
|
|
159
|
+
...(effectiveBackend === "file" && filePath ? { filePath } : {})
|
|
160
|
+
};
|
|
161
|
+
}
|
package/src/runtime/handler.js
CHANGED
|
@@ -11,8 +11,7 @@ import {
|
|
|
11
11
|
import { consumeCandidateRateLimits } from "./rate-limits.js";
|
|
12
12
|
import {
|
|
13
13
|
buildRouteKey,
|
|
14
|
-
createStateStore
|
|
15
|
-
normalizeStateStoreBackend
|
|
14
|
+
createStateStore
|
|
16
15
|
} from "./state-store.js";
|
|
17
16
|
import { FORMATS } from "../translator/index.js";
|
|
18
17
|
import { shouldEnforceWorkerAuth, validateAuth } from "./handler/auth.js";
|
|
@@ -43,52 +42,27 @@ import {
|
|
|
43
42
|
resolveFallbackCircuitPolicy,
|
|
44
43
|
resolveRetryPolicy
|
|
45
44
|
} from "./handler/fallback.js";
|
|
46
|
-
import { sleep
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
}
|
|
62
|
-
|
|
63
|
-
function isFallbackCircuitTrackingEnabled(policy) {
|
|
64
|
-
return Number.isFinite(policy?.failureThreshold) &&
|
|
65
|
-
Number.isFinite(policy?.cooldownMs) &&
|
|
66
|
-
policy.failureThreshold > 0 &&
|
|
67
|
-
policy.cooldownMs > 0;
|
|
68
|
-
}
|
|
69
|
-
|
|
70
|
-
function shouldTrackCandidateFailure(classification) {
|
|
71
|
-
if (!classification) return false;
|
|
72
|
-
if (classification.category === "invalid_request" || classification.category === "client_error") {
|
|
73
|
-
return false;
|
|
74
|
-
}
|
|
75
|
-
if (classification.category === "not_supported_error") {
|
|
76
|
-
return false;
|
|
77
|
-
}
|
|
78
|
-
return Boolean(classification.retryable || normalizeTimestamp(classification.originCooldownMs) > 0);
|
|
79
|
-
}
|
|
45
|
+
import { sleep } from "./handler/utils.js";
|
|
46
|
+
import {
|
|
47
|
+
applyCandidateFailureState,
|
|
48
|
+
applyRuntimeRetryPolicyGuards,
|
|
49
|
+
clearCandidateRoutingState,
|
|
50
|
+
resolveRuntimeFlags,
|
|
51
|
+
resolveStateStoreOptions
|
|
52
|
+
} from "./handler/runtime-policy.js";
|
|
53
|
+
import {
|
|
54
|
+
buildRouteDebugState,
|
|
55
|
+
isRoutingDebugEnabled,
|
|
56
|
+
recordRouteAttempt,
|
|
57
|
+
recordRouteSkip,
|
|
58
|
+
setRouteSelectedCandidate,
|
|
59
|
+
withRouteDebugHeaders
|
|
60
|
+
} from "./handler/route-debug.js";
|
|
80
61
|
|
|
81
62
|
function shouldConsumeQuotaFromResult(result) {
|
|
82
63
|
return Boolean(result?.ok || result?.upstreamResponse instanceof Response);
|
|
83
64
|
}
|
|
84
65
|
|
|
85
|
-
function candidateRef(candidate) {
|
|
86
|
-
return candidate?.requestModelId ||
|
|
87
|
-
(candidate?.providerId && candidate?.modelId
|
|
88
|
-
? `${candidate.providerId}/${candidate.modelId}`
|
|
89
|
-
: candidate?.backend || "unknown/unknown");
|
|
90
|
-
}
|
|
91
|
-
|
|
92
66
|
function filterCandidatesByFormat(candidates) {
|
|
93
67
|
const eligible = [];
|
|
94
68
|
const skipped = [];
|
|
@@ -115,182 +89,6 @@ function hasNextEligibleCandidate(entries, startIndex) {
|
|
|
115
89
|
return false;
|
|
116
90
|
}
|
|
117
91
|
|
|
118
|
-
function isRoutingDebugEnabled(env = {}) {
|
|
119
|
-
return toBoolean(
|
|
120
|
-
env?.LLM_ROUTER_DEBUG_ROUTING,
|
|
121
|
-
toBoolean(env?.LLM_ROUTER_DEBUG, false)
|
|
122
|
-
);
|
|
123
|
-
}
|
|
124
|
-
|
|
125
|
-
function pushBounded(list, value, maxItems = ROUTE_DEBUG_MAX_LIST_ITEMS) {
|
|
126
|
-
if (!Array.isArray(list) || !value) return;
|
|
127
|
-
if (list.length >= maxItems) return;
|
|
128
|
-
list.push(value);
|
|
129
|
-
}
|
|
130
|
-
|
|
131
|
-
function buildRouteDebugState(enabled, resolved) {
|
|
132
|
-
return {
|
|
133
|
-
enabled,
|
|
134
|
-
requestedModel: resolved?.requestedModel || "smart",
|
|
135
|
-
routeType: resolved?.routeType || "direct",
|
|
136
|
-
routeRef: resolved?.routeRef || resolved?.resolvedModel || resolved?.requestedModel || "smart",
|
|
137
|
-
strategy: resolved?.routeStrategy || "ordered",
|
|
138
|
-
selectedCandidate: "",
|
|
139
|
-
skippedCandidates: [],
|
|
140
|
-
attempts: []
|
|
141
|
-
};
|
|
142
|
-
}
|
|
143
|
-
|
|
144
|
-
function recordRouteSkip(debugState, candidate, reasons) {
|
|
145
|
-
if (!debugState?.enabled) return;
|
|
146
|
-
const reasonText = Array.isArray(reasons)
|
|
147
|
-
? reasons.filter(Boolean).join("+")
|
|
148
|
-
: String(reasons || "").trim();
|
|
149
|
-
pushBounded(
|
|
150
|
-
debugState.skippedCandidates,
|
|
151
|
-
`${candidateRef(candidate)}:${reasonText || "skipped"}`
|
|
152
|
-
);
|
|
153
|
-
}
|
|
154
|
-
|
|
155
|
-
function recordRouteAttempt(debugState, candidate, status, classification, attempt) {
|
|
156
|
-
if (!debugState?.enabled) return;
|
|
157
|
-
const category = classification?.category || (status && status < 400 ? "ok" : "unknown");
|
|
158
|
-
pushBounded(
|
|
159
|
-
debugState.attempts,
|
|
160
|
-
`${candidateRef(candidate)}:${Number.isFinite(status) ? status : "error"}/${category}#${attempt}`
|
|
161
|
-
);
|
|
162
|
-
}
|
|
163
|
-
|
|
164
|
-
function setRouteSelectedCandidate(debugState, candidate, { overwrite = false } = {}) {
|
|
165
|
-
if (!debugState?.enabled || !candidate) return;
|
|
166
|
-
if (debugState.selectedCandidate && !overwrite) return;
|
|
167
|
-
debugState.selectedCandidate = candidateRef(candidate);
|
|
168
|
-
}
|
|
169
|
-
|
|
170
|
-
function toSafeHeaderValue(value) {
|
|
171
|
-
const text = String(value || "").replace(/[\r\n]+/g, " ").trim();
|
|
172
|
-
if (!text) return "";
|
|
173
|
-
return text.length > ROUTE_DEBUG_MAX_HEADER_VALUE_LENGTH
|
|
174
|
-
? text.slice(0, ROUTE_DEBUG_MAX_HEADER_VALUE_LENGTH)
|
|
175
|
-
: text;
|
|
176
|
-
}
|
|
177
|
-
|
|
178
|
-
function withRouteDebugHeaders(response, debugState) {
|
|
179
|
-
if (!debugState?.enabled || !(response instanceof Response)) {
|
|
180
|
-
return response;
|
|
181
|
-
}
|
|
182
|
-
|
|
183
|
-
const headers = new Headers(response.headers);
|
|
184
|
-
headers.set("x-llm-router-requested-model", toSafeHeaderValue(debugState.requestedModel));
|
|
185
|
-
headers.set("x-llm-router-route-type", toSafeHeaderValue(debugState.routeType));
|
|
186
|
-
headers.set("x-llm-router-route-ref", toSafeHeaderValue(debugState.routeRef));
|
|
187
|
-
headers.set("x-llm-router-route-strategy", toSafeHeaderValue(debugState.strategy));
|
|
188
|
-
|
|
189
|
-
const selectedCandidate = toSafeHeaderValue(debugState.selectedCandidate);
|
|
190
|
-
if (selectedCandidate) {
|
|
191
|
-
headers.set("x-llm-router-selected-candidate", selectedCandidate);
|
|
192
|
-
}
|
|
193
|
-
|
|
194
|
-
const skippedCandidates = toSafeHeaderValue(debugState.skippedCandidates.join(","));
|
|
195
|
-
if (skippedCandidates) {
|
|
196
|
-
headers.set("x-llm-router-skipped-candidates", skippedCandidates);
|
|
197
|
-
}
|
|
198
|
-
|
|
199
|
-
const attempts = toSafeHeaderValue(debugState.attempts.join(","));
|
|
200
|
-
if (attempts) {
|
|
201
|
-
headers.set("x-llm-router-attempts", attempts);
|
|
202
|
-
}
|
|
203
|
-
|
|
204
|
-
return new Response(response.body, {
|
|
205
|
-
status: response.status,
|
|
206
|
-
statusText: response.statusText,
|
|
207
|
-
headers
|
|
208
|
-
});
|
|
209
|
-
}
|
|
210
|
-
|
|
211
|
-
async function clearCandidateRoutingState(stateStore, candidateKey) {
|
|
212
|
-
if (!stateStore || !candidateKey) return;
|
|
213
|
-
await stateStore.setCandidateState(candidateKey, null);
|
|
214
|
-
}
|
|
215
|
-
|
|
216
|
-
async function applyCandidateFailureState(
|
|
217
|
-
stateStore,
|
|
218
|
-
candidateKey,
|
|
219
|
-
classification,
|
|
220
|
-
fallbackCircuitPolicy,
|
|
221
|
-
status,
|
|
222
|
-
now = Date.now()
|
|
223
|
-
) {
|
|
224
|
-
if (!stateStore || !candidateKey || !shouldTrackCandidateFailure(classification)) {
|
|
225
|
-
return;
|
|
226
|
-
}
|
|
227
|
-
|
|
228
|
-
const prior = await stateStore.getCandidateState(candidateKey) || {};
|
|
229
|
-
const priorCooldownUntil = normalizeTimestamp(prior.cooldownUntil);
|
|
230
|
-
const priorOpenUntil = normalizeTimestamp(prior.openUntil);
|
|
231
|
-
const priorFailures = normalizeCount(
|
|
232
|
-
prior.consecutiveRetryableFailures ?? prior.consecutiveFailures
|
|
233
|
-
);
|
|
234
|
-
|
|
235
|
-
const consecutiveRetryableFailures = classification.retryable
|
|
236
|
-
? priorFailures + 1
|
|
237
|
-
: 0;
|
|
238
|
-
|
|
239
|
-
let openUntil = priorOpenUntil > now ? priorOpenUntil : 0;
|
|
240
|
-
if (
|
|
241
|
-
classification.retryable &&
|
|
242
|
-
isFallbackCircuitTrackingEnabled(fallbackCircuitPolicy) &&
|
|
243
|
-
consecutiveRetryableFailures >= fallbackCircuitPolicy.failureThreshold
|
|
244
|
-
) {
|
|
245
|
-
openUntil = Math.max(openUntil, now + fallbackCircuitPolicy.cooldownMs);
|
|
246
|
-
}
|
|
247
|
-
|
|
248
|
-
const cooldownMs = normalizeTimestamp(classification.originCooldownMs);
|
|
249
|
-
const cooldownUntil = cooldownMs > 0
|
|
250
|
-
? Math.max(priorCooldownUntil, now + cooldownMs)
|
|
251
|
-
: (priorCooldownUntil > now ? priorCooldownUntil : 0);
|
|
252
|
-
|
|
253
|
-
await stateStore.setCandidateState(candidateKey, {
|
|
254
|
-
...prior,
|
|
255
|
-
cooldownUntil,
|
|
256
|
-
openUntil,
|
|
257
|
-
consecutiveRetryableFailures,
|
|
258
|
-
lastFailureAt: now,
|
|
259
|
-
lastFailureStatus: Number.isFinite(status) ? Number(status) : 0,
|
|
260
|
-
lastFailureCategory: classification.category,
|
|
261
|
-
updatedAt: now
|
|
262
|
-
});
|
|
263
|
-
}
|
|
264
|
-
|
|
265
|
-
function resolveStateStoreOptions(options = {}, env = {}) {
|
|
266
|
-
const baseOptions = options.stateStoreOptions && typeof options.stateStoreOptions === "object"
|
|
267
|
-
? { ...options.stateStoreOptions }
|
|
268
|
-
: {};
|
|
269
|
-
const defaultBackend = normalizeStateStoreBackend(
|
|
270
|
-
options.defaultStateStoreBackend || baseOptions.backend,
|
|
271
|
-
"memory"
|
|
272
|
-
);
|
|
273
|
-
const backend = normalizeStateStoreBackend(
|
|
274
|
-
options.stateStoreBackend || env?.LLM_ROUTER_STATE_BACKEND || baseOptions.backend,
|
|
275
|
-
defaultBackend
|
|
276
|
-
);
|
|
277
|
-
const candidateStateTtlMs = toNonNegativeInteger(
|
|
278
|
-
env?.LLM_ROUTER_CANDIDATE_STATE_TTL_MS,
|
|
279
|
-
toNonNegativeInteger(options.stateStoreCandidateStateTtlMs, baseOptions.candidateStateTtlMs)
|
|
280
|
-
);
|
|
281
|
-
const rawFilePath = options.stateStoreFilePath || env?.LLM_ROUTER_STATE_FILE_PATH || baseOptions.filePath;
|
|
282
|
-
const filePath = typeof rawFilePath === "string" && rawFilePath.trim()
|
|
283
|
-
? rawFilePath.trim()
|
|
284
|
-
: undefined;
|
|
285
|
-
|
|
286
|
-
return {
|
|
287
|
-
...baseOptions,
|
|
288
|
-
backend,
|
|
289
|
-
...(candidateStateTtlMs !== undefined ? { candidateStateTtlMs } : {}),
|
|
290
|
-
...(backend === "file" && filePath ? { filePath } : {})
|
|
291
|
-
};
|
|
292
|
-
}
|
|
293
|
-
|
|
294
92
|
async function handleRouteRequest(request, env, getConfig, sourceFormatHint, options = {}) {
|
|
295
93
|
let config;
|
|
296
94
|
try {
|
|
@@ -353,9 +151,12 @@ async function handleRouteRequest(request, env, getConfig, sourceFormatHint, opt
|
|
|
353
151
|
}, 400);
|
|
354
152
|
}
|
|
355
153
|
|
|
154
|
+
const runtimeFlags = options.runtimeFlags || resolveRuntimeFlags(options, env);
|
|
356
155
|
const fallbackCircuitPolicy = resolveFallbackCircuitPolicy(env);
|
|
357
|
-
const retryPolicy = resolveRetryPolicy(env);
|
|
358
|
-
const stateStore =
|
|
156
|
+
const retryPolicy = applyRuntimeRetryPolicyGuards(resolveRetryPolicy(env), runtimeFlags);
|
|
157
|
+
const stateStore = runtimeFlags.statefulRoutingEnabled
|
|
158
|
+
? (options.stateStore || null)
|
|
159
|
+
: null;
|
|
359
160
|
const routeDebug = buildRouteDebugState(isRoutingDebugEnabled(env), resolved);
|
|
360
161
|
const now = Date.now();
|
|
361
162
|
|
|
@@ -392,7 +193,9 @@ async function handleRouteRequest(request, env, getConfig, sourceFormatHint, opt
|
|
|
392
193
|
ranking = await rankRouteCandidates({
|
|
393
194
|
route: routePlan,
|
|
394
195
|
routeKey: buildRouteKey(routePlan, { sourceFormat }),
|
|
395
|
-
strategy: resolved.routeType === "alias"
|
|
196
|
+
strategy: runtimeFlags.statefulRoutingEnabled && resolved.routeType === "alias"
|
|
197
|
+
? resolved.routeStrategy
|
|
198
|
+
: "ordered",
|
|
396
199
|
candidates: formatFiltered.eligible,
|
|
397
200
|
stateStore,
|
|
398
201
|
config,
|
|
@@ -531,10 +334,10 @@ export function createFetchHandler(options) {
|
|
|
531
334
|
let stateStoreRef = options.stateStore || null;
|
|
532
335
|
let stateStorePromise = null;
|
|
533
336
|
|
|
534
|
-
async function ensureStateStore(env = {}) {
|
|
337
|
+
async function ensureStateStore(env = {}, runtimeFlags = {}) {
|
|
535
338
|
if (stateStoreRef) return stateStoreRef;
|
|
536
339
|
if (!stateStorePromise) {
|
|
537
|
-
stateStorePromise = createStateStore(resolveStateStoreOptions(options, env))
|
|
340
|
+
stateStorePromise = createStateStore(resolveStateStoreOptions(options, env, runtimeFlags))
|
|
538
341
|
.then((store) => {
|
|
539
342
|
stateStoreRef = store;
|
|
540
343
|
return store;
|
|
@@ -557,6 +360,7 @@ export function createFetchHandler(options) {
|
|
|
557
360
|
const fetchHandler = async function fetchHandler(request, env = {}, ctx) {
|
|
558
361
|
const url = new URL(request.url);
|
|
559
362
|
const respond = (response, corsOptions = {}) => withRequestCors(response, request, env, corsOptions);
|
|
363
|
+
const runtimeFlags = resolveRuntimeFlags(options, env);
|
|
560
364
|
let preloadedConfig = null;
|
|
561
365
|
let authValidated = false;
|
|
562
366
|
|
|
@@ -641,24 +445,27 @@ export function createFetchHandler(options) {
|
|
|
641
445
|
}
|
|
642
446
|
|
|
643
447
|
if (route?.type === "route") {
|
|
644
|
-
let stateStore;
|
|
645
|
-
|
|
646
|
-
|
|
647
|
-
|
|
648
|
-
|
|
649
|
-
|
|
650
|
-
|
|
651
|
-
|
|
652
|
-
|
|
653
|
-
|
|
654
|
-
|
|
448
|
+
let stateStore = null;
|
|
449
|
+
if (runtimeFlags.statefulRoutingEnabled) {
|
|
450
|
+
try {
|
|
451
|
+
stateStore = await ensureStateStore(env, runtimeFlags);
|
|
452
|
+
} catch (error) {
|
|
453
|
+
return respond(jsonResponse({
|
|
454
|
+
type: "error",
|
|
455
|
+
error: {
|
|
456
|
+
type: "configuration_error",
|
|
457
|
+
message: `Failed initializing routing state: ${error instanceof Error ? error.message : String(error)}`
|
|
458
|
+
}
|
|
459
|
+
}, 500));
|
|
460
|
+
}
|
|
655
461
|
}
|
|
656
462
|
|
|
657
463
|
const routeResponse = await handleRouteRequest(request, env, options.getConfig, route.sourceFormat, {
|
|
658
464
|
...options,
|
|
659
465
|
preloadedConfig,
|
|
660
466
|
authValidated,
|
|
661
|
-
stateStore
|
|
467
|
+
stateStore,
|
|
468
|
+
runtimeFlags
|
|
662
469
|
});
|
|
663
470
|
return respond(routeResponse);
|
|
664
471
|
}
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
export function buildTimeoutSignal(timeoutMs) {
|
|
2
|
+
if (!Number.isFinite(timeoutMs) || timeoutMs <= 0) {
|
|
3
|
+
return { signal: undefined, cleanup: () => {} };
|
|
4
|
+
}
|
|
5
|
+
|
|
6
|
+
if (typeof AbortSignal !== "undefined" && typeof AbortSignal.timeout === "function") {
|
|
7
|
+
return {
|
|
8
|
+
signal: AbortSignal.timeout(timeoutMs),
|
|
9
|
+
cleanup: () => {}
|
|
10
|
+
};
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
if (typeof AbortController === "undefined") {
|
|
14
|
+
return { signal: undefined, cleanup: () => {} };
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
const controller = new AbortController();
|
|
18
|
+
const timer = setTimeout(() => controller.abort(`timeout:${timeoutMs}`), timeoutMs);
|
|
19
|
+
return {
|
|
20
|
+
signal: controller.signal,
|
|
21
|
+
cleanup: () => clearTimeout(timer)
|
|
22
|
+
};
|
|
23
|
+
}
|