@khanglvm/llm-router 2.3.5 → 2.3.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -7,6 +7,18 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
7
7
 
8
8
  ## [Unreleased]
9
9
 
10
+ ## [2.3.7] - 2026-04-18
11
+
12
+ ### Fixed
13
+ - Local `llr start` now keeps a fixed-port supervisor in front of the router runtime so CLI and tool traffic can survive backend restarts and upgrades without losing the public router endpoint.
14
+ - `llr update` now upgrades the installed package before asking the live backend to reload, keeping the running router online during the install step and draining in-flight requests before the new version takes over.
15
+ - Requests that arrive during a backend restart window are now deferred and automatically retried through the supervisor instead of failing immediately when the backend socket is briefly unavailable.
16
+
17
+ ## [2.3.6] - 2026-04-18
18
+
19
+ ### Fixed
20
+ - Factory Droid routing now injects every managed alias/provider model as its own router-managed `customModels` entry, writes friendly custom model `displayName` labels for the Droid CLI picker, and stores selected defaults as explicit `custom:llm-*` IDs in `model`, `sessionDefaultSettings.model`, `missionOrchestratorModel`, and `missionModelSettings.*` so Droid resolves the router-managed custom provider instead of falling back to native built-in models.
21
+
10
22
  ## [2.3.5] - 2026-04-17
11
23
 
12
24
  ### Fixed
package/README.md CHANGED
@@ -29,10 +29,17 @@ llr ai-help # agent-oriented setup brief
29
29
  - **Model aliases with routing** — group models into stable alias names with weighted round-robin, quota-aware balancing, and automatic fallback
30
30
  - **Rate limiting** — set request caps per model or across all models over configurable time windows
31
31
  - **Coding tool routing** — one-click routing config for Codex CLI, Claude Code, Factory Droid, and AMP
32
+ - **Seamless local updates** — `llr update` keeps the fixed local router endpoint online, drains in-flight requests, and automatically retries through backend restart windows
32
33
  - **Web search** — built-in web search for AMP and other router-managed tools
33
34
  - **Deployable** — run locally or deploy to Cloudflare Workers
34
35
  - **AI-agent friendly** — full CLI parity with `llr config --operation=...` so agents can configure everything programmatically
35
36
 
37
+ ## Local Runtime Reliability
38
+
39
+ `llr start` keeps a small supervisor bound to the fixed local router port and runs the real router backend behind it on an internal loopback port.
40
+
41
+ That means `llr update` can install a new package version and gracefully swap the backend without breaking active CLI or tool requests. Requests that arrive during the short backend handoff are deferred and retried automatically instead of failing immediately. The Web UI may reconnect during that window, but router-managed API traffic keeps the same public local endpoint.
42
+
36
43
  ## Web UI
37
44
 
38
45
  ### Alias & Fallback
@@ -62,6 +69,7 @@ Route Claude Code through the gateway with per-tier model bindings.
62
69
  ### Factory Droid
63
70
 
64
71
  Route Factory Droid through the gateway via a managed custom model entry with reasoning effort control.
72
+ LLM Router injects router-managed `customModels` entries for aliases and provider/model routes, then writes Factory defaults as `custom:llm-*` IDs so Droid selects the custom provider entry instead of a native built-in model with the same name.
65
73
 
66
74
  ### Web Search
67
75
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@khanglvm/llm-router",
3
- "version": "2.3.5",
3
+ "version": "2.3.7",
4
4
  "description": "LLM Router: single gateway endpoint for multi-provider LLMs with unified OpenAI+Anthropic format and seamless fallback",
5
5
  "keywords": [
6
6
  "llm-router",
@@ -3853,7 +3853,8 @@ async function buildCodingToolRoutingSnapshot({
3853
3853
  }));
3854
3854
  const factoryDroid = await readFactoryDroidRoutingState({
3855
3855
  settingsFilePath: readArg(args, ["factory-droid-settings-file", "factoryDroidSettingsFile"], ""),
3856
- endpointUrl
3856
+ endpointUrl,
3857
+ config
3857
3858
  }).catch((error) => ({
3858
3859
  tool: "factory-droid",
3859
3860
  settingsFilePath: resolveFactoryDroidSettingsFilePath({}),
@@ -6635,7 +6636,8 @@ async function doSetFactoryDroidRouting(context) {
6635
6636
 
6636
6637
  const existingState = await readFactoryDroidRoutingState({
6637
6638
  settingsFilePath,
6638
- endpointUrl
6639
+ endpointUrl,
6640
+ config
6639
6641
  });
6640
6642
  const apiKey = String(
6641
6643
  readArg(args, ["master-key", "masterKey", "api-key", "apiKey"], config?.masterKey || "") || ""
@@ -6679,6 +6681,7 @@ async function doSetFactoryDroidRouting(context) {
6679
6681
  endpointUrl,
6680
6682
  apiKey,
6681
6683
  bindings,
6684
+ config,
6682
6685
  captureBackup: true
6683
6686
  });
6684
6687
  return {
@@ -6691,7 +6694,7 @@ async function doSetFactoryDroidRouting(context) {
6691
6694
  ["Settings File", patchResult.settingsFilePath],
6692
6695
  ["Backup File", patchResult.backupFilePath],
6693
6696
  ["Base URL", patchResult.baseUrl],
6694
- ["Provider", patchResult.configuredProvider || "generic-chat-completion-api"],
6697
+ ["Provider", patchResult.configuredProvider || "(not set)"],
6695
6698
  ["Default Model", patchResult.bindings?.defaultModel || "(not set)"],
6696
6699
  ["Mission Orchestrator", patchResult.bindings?.missionOrchestratorModel || "(not set)"],
6697
6700
  ["Mission Worker", patchResult.bindings?.missionWorkerModel || "(not set)"],
package/src/cli-entry.js CHANGED
@@ -117,14 +117,22 @@ async function promptStartupConflictResolution({ port }) {
117
117
  }
118
118
  }
119
119
 
120
- async function runStartFastPath(args) {
120
+ async function runStartFastPath(args, { backendMode = false } = {}) {
121
+ const explicitPort = backendMode
122
+ ? (() => {
123
+ const parsed = Number.parseInt(String(args.port ?? ""), 10);
124
+ return Number.isInteger(parsed) && parsed > 0 ? parsed : FIXED_LOCAL_ROUTER_PORT;
125
+ })()
126
+ : resolveListenPort({ explicitPort: args.port });
121
127
  const result = await runStartCommand({
122
128
  configPath: args.config || args.configPath || getDefaultConfigPath(),
123
129
  host: FIXED_LOCAL_ROUTER_HOST,
124
- port: resolveListenPort({ explicitPort: args.port }),
130
+ port: explicitPort,
125
131
  watchConfig: parseBoolean(args["watch-config"] ?? args.watchConfig, true),
126
132
  watchBinary: parseBoolean(args["watch-binary"] ?? args.watchBinary, true),
127
133
  requireAuth: parseBoolean(args["require-auth"] ?? args.requireAuth, false),
134
+ backendMode,
135
+ startCommand: backendMode ? "start-runtime" : "start",
128
136
  onStartupConflict: (payload) => promptStartupConflictResolution(payload),
129
137
  cliPathForWatch: process.argv[1],
130
138
  onLine: (line) => console.log(line),
@@ -163,6 +171,7 @@ export async function runCli(argv = process.argv.slice(2), isTTY = undefined, ov
163
171
  const parsed = parseSimpleArgs(argv);
164
172
  const first = parsed.positional[0];
165
173
  const firstIsStart = first === "start";
174
+ const firstIsStartRuntime = first === "start-runtime";
166
175
  const firstIsWeb = first === "web";
167
176
  const firstIsConfig = first === "config";
168
177
  const firstIsSetup = first === "setup";
@@ -188,6 +197,12 @@ export async function runCli(argv = process.argv.slice(2), isTTY = undefined, ov
188
197
  return runStartFastPathImpl(parsedStart.args);
189
198
  }
190
199
 
200
+ if (firstIsStartRuntime && !parsed.wantsHelp) {
201
+ const startArgs = argv.slice(1);
202
+ const parsedStart = parseSimpleArgs(startArgs);
203
+ return runStartFastPathImpl(parsedStart.args, { backendMode: true });
204
+ }
205
+
191
206
  if (firstIsWeb && !parsed.wantsHelp) {
192
207
  const webArgs = argv.slice(1);
193
208
  const parsedWeb = parseSimpleArgs(webArgs);