@khanglvm/llm-router 2.0.0 → 2.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +5 -0
- package/README.md +1 -1
- package/package.json +1 -1
- package/src/node/startup-manager.js +66 -11
- package/src/runtime/balancer.js +33 -4
package/CHANGELOG.md
CHANGED
|
@@ -10,6 +10,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|
|
10
10
|
### Fixed
|
|
11
11
|
- Raised the default inbound JSON body limit for OpenAI `/responses` requests from `1 MiB` to `8 MiB` while keeping other JSON routes at `1 MiB`. This prevents local `413 Request body too large` failures for Codex CLI and other Responses API clients carrying larger conversation state.
|
|
12
12
|
|
|
13
|
+
## [2.0.1] - 2026-03-15
|
|
14
|
+
|
|
15
|
+
### Fixed
|
|
16
|
+
- Fixed alias-route failover after transient upstream failures. When every candidate on a route was only in cooldown, the balancer now retries the earliest-recovering candidate instead of returning `No eligible providers remain for route ...`.
|
|
17
|
+
|
|
13
18
|
## [2.0.0] - 2026-03-15
|
|
14
19
|
|
|
15
20
|
### Changed
|
package/README.md
CHANGED
|
@@ -14,7 +14,7 @@ The primary CLI command is now:
|
|
|
14
14
|
llr
|
|
15
15
|
```
|
|
16
16
|
|
|
17
|
-
`2.0.
|
|
17
|
+
`2.0.1` is the current public release. It includes the Web UI, AMP routing, and coding-tool integrations introduced in the 2.x line.
|
|
18
18
|
|
|
19
19
|
## Install
|
|
20
20
|
|
package/package.json
CHANGED
|
@@ -11,6 +11,20 @@ import { FIXED_LOCAL_ROUTER_HOST, FIXED_LOCAL_ROUTER_PORT } from "./local-server
|
|
|
11
11
|
|
|
12
12
|
const SERVICE_NAME = "llm-router";
|
|
13
13
|
const LAUNCH_AGENT_ID = "dev.llm-router";
|
|
14
|
+
const STARTUP_ENV_PASSTHROUGH_KEYS = [
|
|
15
|
+
"NODE_EXTRA_CA_CERTS",
|
|
16
|
+
"SSL_CERT_FILE",
|
|
17
|
+
"SSL_CERT_DIR",
|
|
18
|
+
"HTTP_PROXY",
|
|
19
|
+
"HTTPS_PROXY",
|
|
20
|
+
"ALL_PROXY",
|
|
21
|
+
"NO_PROXY",
|
|
22
|
+
"http_proxy",
|
|
23
|
+
"https_proxy",
|
|
24
|
+
"all_proxy",
|
|
25
|
+
"no_proxy",
|
|
26
|
+
"npm_config_cafile"
|
|
27
|
+
];
|
|
14
28
|
|
|
15
29
|
function resolveDarwinDomain() {
|
|
16
30
|
const uid = process.getuid?.();
|
|
@@ -83,13 +97,48 @@ function isMissingServiceMessage(value) {
|
|
|
83
97
|
|| text.includes("unit llm-router.service could not be found");
|
|
84
98
|
}
|
|
85
99
|
|
|
100
|
+
function escapeXml(value) {
|
|
101
|
+
return String(value)
|
|
102
|
+
.replaceAll("&", "&")
|
|
103
|
+
.replaceAll("<", "<")
|
|
104
|
+
.replaceAll(">", ">")
|
|
105
|
+
.replaceAll('"', """)
|
|
106
|
+
.replaceAll("'", "'");
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
export function buildStartupEnvironment(env = process.env) {
|
|
110
|
+
const configuredCliPath = String(env?.LLM_ROUTER_CLI_PATH || "").trim();
|
|
111
|
+
const startupEnv = {
|
|
112
|
+
LLM_ROUTER_MANAGED_BY_STARTUP: "1",
|
|
113
|
+
LLM_ROUTER_CLI_PATH: configuredCliPath || String(resolveStartupCliEntryPath({ env }) || "").trim()
|
|
114
|
+
};
|
|
115
|
+
|
|
116
|
+
for (const key of STARTUP_ENV_PASSTHROUGH_KEYS) {
|
|
117
|
+
const value = String(env?.[key] || "").trim();
|
|
118
|
+
if (!value) continue;
|
|
119
|
+
startupEnv[key] = value;
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
return startupEnv;
|
|
123
|
+
}
|
|
124
|
+
|
|
86
125
|
function buildLaunchAgentPlist({ nodePath, cliPath, configPath, host, port, watchConfig, watchBinary, requireAuth }) {
|
|
87
126
|
const logDir = path.join(os.homedir(), "Library", "Logs");
|
|
88
127
|
const stdoutPath = path.join(logDir, "llm-router.out.log");
|
|
89
128
|
const stderrPath = path.join(logDir, "llm-router.err.log");
|
|
90
129
|
const args = [nodePath, cliPath, ...makeExecArgs({ configPath, host, port, watchConfig, watchBinary, requireAuth })];
|
|
130
|
+
const environment = {
|
|
131
|
+
...buildStartupEnvironment({
|
|
132
|
+
...process.env,
|
|
133
|
+
LLM_ROUTER_CLI_PATH: cliPath
|
|
134
|
+
}),
|
|
135
|
+
LLM_ROUTER_CLI_PATH: cliPath
|
|
136
|
+
};
|
|
91
137
|
|
|
92
|
-
const xmlArgs = args.map((arg) => ` <string>${arg}</string>`).join("\n");
|
|
138
|
+
const xmlArgs = args.map((arg) => ` <string>${escapeXml(arg)}</string>`).join("\n");
|
|
139
|
+
const xmlEnvironment = Object.entries(environment)
|
|
140
|
+
.map(([key, value]) => ` <key>${escapeXml(key)}</key>\n <string>${escapeXml(value)}</string>`)
|
|
141
|
+
.join("\n");
|
|
93
142
|
|
|
94
143
|
return `<?xml version="1.0" encoding="UTF-8"?>
|
|
95
144
|
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
|
|
@@ -107,17 +156,14 @@ ${xmlArgs}
|
|
|
107
156
|
<true/>
|
|
108
157
|
<key>EnvironmentVariables</key>
|
|
109
158
|
<dict>
|
|
110
|
-
|
|
111
|
-
<string>1</string>
|
|
112
|
-
<key>LLM_ROUTER_CLI_PATH</key>
|
|
113
|
-
<string>${cliPath}</string>
|
|
159
|
+
${xmlEnvironment}
|
|
114
160
|
</dict>
|
|
115
161
|
<key>StandardOutPath</key>
|
|
116
|
-
<string>${stdoutPath}</string>
|
|
162
|
+
<string>${escapeXml(stdoutPath)}</string>
|
|
117
163
|
<key>StandardErrorPath</key>
|
|
118
|
-
<string>${stderrPath}</string>
|
|
164
|
+
<string>${escapeXml(stderrPath)}</string>
|
|
119
165
|
<key>WorkingDirectory</key>
|
|
120
|
-
<string>${process.cwd()}</string>
|
|
166
|
+
<string>${escapeXml(process.cwd())}</string>
|
|
121
167
|
</dict>
|
|
122
168
|
</plist>
|
|
123
169
|
`;
|
|
@@ -126,6 +172,17 @@ ${xmlArgs}
|
|
|
126
172
|
function buildSystemdService({ nodePath, cliPath, configPath, host, port, watchConfig, watchBinary, requireAuth }) {
|
|
127
173
|
const execArgs = makeExecArgs({ configPath, host, port, watchConfig, watchBinary, requireAuth }).map(quoteArg).join(" ");
|
|
128
174
|
const execStart = `${quoteArg(nodePath)} ${quoteArg(cliPath)} ${execArgs}`;
|
|
175
|
+
const environment = {
|
|
176
|
+
...buildStartupEnvironment({
|
|
177
|
+
...process.env,
|
|
178
|
+
LLM_ROUTER_CLI_PATH: cliPath
|
|
179
|
+
}),
|
|
180
|
+
LLM_ROUTER_CLI_PATH: cliPath,
|
|
181
|
+
NODE_ENV: "production"
|
|
182
|
+
};
|
|
183
|
+
const systemdEnvironment = Object.entries(environment)
|
|
184
|
+
.map(([key, value]) => `Environment=${key}=${value}`)
|
|
185
|
+
.join("\n");
|
|
129
186
|
|
|
130
187
|
return `[Unit]
|
|
131
188
|
Description=LLM Router local route
|
|
@@ -136,9 +193,7 @@ Type=simple
|
|
|
136
193
|
ExecStart=${execStart}
|
|
137
194
|
Restart=always
|
|
138
195
|
RestartSec=2
|
|
139
|
-
|
|
140
|
-
Environment=LLM_ROUTER_MANAGED_BY_STARTUP=1
|
|
141
|
-
Environment=LLM_ROUTER_CLI_PATH=${cliPath}
|
|
196
|
+
${systemdEnvironment}
|
|
142
197
|
WorkingDirectory=${process.cwd()}
|
|
143
198
|
|
|
144
199
|
[Install]
|
package/src/runtime/balancer.js
CHANGED
|
@@ -186,6 +186,19 @@ function sortEntriesByOriginalOrder(left, right) {
|
|
|
186
186
|
return left.originalIndex - right.originalIndex;
|
|
187
187
|
}
|
|
188
188
|
|
|
189
|
+
function sortCooldownEntries(left, right) {
|
|
190
|
+
if (left.openUntil !== right.openUntil) {
|
|
191
|
+
return left.openUntil - right.openUntil;
|
|
192
|
+
}
|
|
193
|
+
return sortEntriesByOriginalOrder(left, right);
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
function isCooldownOnlyEntry(entry) {
|
|
197
|
+
if (!entry || entry.eligible) return false;
|
|
198
|
+
const reasons = Array.isArray(entry.skipReasons) ? entry.skipReasons : [];
|
|
199
|
+
return reasons.length > 0 && reasons.every((reason) => reason === "cooldown");
|
|
200
|
+
}
|
|
201
|
+
|
|
189
202
|
async function buildCandidateEntries({
|
|
190
203
|
candidates,
|
|
191
204
|
stateStore,
|
|
@@ -315,6 +328,19 @@ export async function rankRouteCandidates({
|
|
|
315
328
|
const ineligibleEntries = entries
|
|
316
329
|
.filter((entry) => !entry.eligible)
|
|
317
330
|
.sort(sortEntriesByOriginalOrder);
|
|
331
|
+
const fallbackCooldownEntries = eligibleEntries.length === 0
|
|
332
|
+
? ineligibleEntries
|
|
333
|
+
.filter((entry) => isCooldownOnlyEntry(entry))
|
|
334
|
+
.sort(sortCooldownEntries)
|
|
335
|
+
.map((entry) => ({
|
|
336
|
+
...entry,
|
|
337
|
+
eligible: true,
|
|
338
|
+
skipReasons: [...entry.skipReasons, "cooldown-overridden"]
|
|
339
|
+
}))
|
|
340
|
+
: [];
|
|
341
|
+
const skippedIneligibleEntries = fallbackCooldownEntries.length > 0
|
|
342
|
+
? ineligibleEntries.filter((entry) => !isCooldownOnlyEntry(entry))
|
|
343
|
+
: ineligibleEntries;
|
|
318
344
|
const estimatedRequiredTokens = normalizeNonNegativeInteger(
|
|
319
345
|
requestContext?.estimatedRequiredTokens ??
|
|
320
346
|
requestContext?.requiredTokens ??
|
|
@@ -324,10 +350,13 @@ export async function rankRouteCandidates({
|
|
|
324
350
|
const routeCursor = stateStore
|
|
325
351
|
? await stateStore.getRouteCursor(resolvedRouteKey)
|
|
326
352
|
: 0;
|
|
353
|
+
const rankableEntries = fallbackCooldownEntries.length > 0
|
|
354
|
+
? fallbackCooldownEntries
|
|
355
|
+
: eligibleEntries;
|
|
327
356
|
const contextAwareGroups = shouldApplyContextAwareOrdering(route, estimatedRequiredTokens)
|
|
328
|
-
? partitionEligibleEntriesByContextWindow(
|
|
357
|
+
? partitionEligibleEntriesByContextWindow(rankableEntries, estimatedRequiredTokens)
|
|
329
358
|
: {
|
|
330
|
-
prioritizedEntries:
|
|
359
|
+
prioritizedEntries: rankableEntries,
|
|
331
360
|
deferredEntries: []
|
|
332
361
|
};
|
|
333
362
|
const ranking = rankEligibleEntries(
|
|
@@ -339,7 +368,7 @@ export async function rankRouteCandidates({
|
|
|
339
368
|
const rankedEntries = [
|
|
340
369
|
...ranking.orderedEligible,
|
|
341
370
|
...contextAwareGroups.deferredEntries,
|
|
342
|
-
...
|
|
371
|
+
...skippedIneligibleEntries
|
|
343
372
|
];
|
|
344
373
|
|
|
345
374
|
return {
|
|
@@ -351,7 +380,7 @@ export async function rankRouteCandidates({
|
|
|
351
380
|
shouldAdvanceCursor: ranking.shouldAdvanceCursor,
|
|
352
381
|
entries: rankedEntries,
|
|
353
382
|
selectedEntry: ranking.orderedEligible[0] || null,
|
|
354
|
-
skippedEntries:
|
|
383
|
+
skippedEntries: skippedIneligibleEntries,
|
|
355
384
|
rankedCandidates: rankedEntries.map((entry) => entry.candidate)
|
|
356
385
|
};
|
|
357
386
|
}
|