@khanglvm/llm-router 2.3.6 → 2.3.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -7,6 +7,13 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
7
7
 
8
8
  ## [Unreleased]
9
9
 
10
+ ## [2.3.7] - 2026-04-18
11
+
12
+ ### Fixed
13
+ - Local `llr start` now keeps a fixed-port supervisor in front of the router runtime so CLI and tool traffic can survive backend restarts and upgrades without losing the public router endpoint.
14
+ - `llr update` now upgrades the installed package before asking the live backend to reload, keeping the running router online during the install step and draining in-flight requests before the new version takes over.
15
+ - Requests that arrive during a backend restart window are now deferred and automatically retried through the supervisor instead of failing immediately when the backend socket is briefly unavailable.
16
+
10
17
  ## [2.3.6] - 2026-04-18
11
18
 
12
19
  ### Fixed
package/README.md CHANGED
@@ -29,10 +29,17 @@ llr ai-help # agent-oriented setup brief
29
29
  - **Model aliases with routing** — group models into stable alias names with weighted round-robin, quota-aware balancing, and automatic fallback
30
30
  - **Rate limiting** — set request caps per model or across all models over configurable time windows
31
31
  - **Coding tool routing** — one-click routing config for Codex CLI, Claude Code, Factory Droid, and AMP
32
+ - **Seamless local updates** — `llr update` keeps the fixed local router endpoint online, drains in-flight requests, and automatically retries through backend restart windows
32
33
  - **Web search** — built-in web search for AMP and other router-managed tools
33
34
  - **Deployable** — run locally or deploy to Cloudflare Workers
34
35
  - **AI-agent friendly** — full CLI parity with `llr config --operation=...` so agents can configure everything programmatically
35
36
 
37
+ ## Local Runtime Reliability
38
+
39
+ `llr start` keeps a small supervisor bound to the fixed local router port and runs the real router backend behind it on an internal loopback port.
40
+
41
+ That means `llr update` can install a new package version and gracefully swap the backend without breaking active CLI or tool requests. Requests that arrive during the short backend handoff are deferred and retried automatically instead of failing immediately. The Web UI may reconnect during that window, but router-managed API traffic keeps the same public local endpoint.
42
+
36
43
  ## Web UI
37
44
 
38
45
  ### Alias & Fallback
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@khanglvm/llm-router",
3
- "version": "2.3.6",
3
+ "version": "2.3.7",
4
4
  "description": "LLM Router: single gateway endpoint for multi-provider LLMs with unified OpenAI+Anthropic format and seamless fallback",
5
5
  "keywords": [
6
6
  "llm-router",
@@ -6694,7 +6694,7 @@ async function doSetFactoryDroidRouting(context) {
6694
6694
  ["Settings File", patchResult.settingsFilePath],
6695
6695
  ["Backup File", patchResult.backupFilePath],
6696
6696
  ["Base URL", patchResult.baseUrl],
6697
- ["Provider", patchResult.configuredProvider || "generic-chat-completion-api"],
6697
+ ["Provider", patchResult.configuredProvider || "(not set)"],
6698
6698
  ["Default Model", patchResult.bindings?.defaultModel || "(not set)"],
6699
6699
  ["Mission Orchestrator", patchResult.bindings?.missionOrchestratorModel || "(not set)"],
6700
6700
  ["Mission Worker", patchResult.bindings?.missionWorkerModel || "(not set)"],
package/src/cli-entry.js CHANGED
@@ -117,14 +117,22 @@ async function promptStartupConflictResolution({ port }) {
117
117
  }
118
118
  }
119
119
 
120
- async function runStartFastPath(args) {
120
+ async function runStartFastPath(args, { backendMode = false } = {}) {
121
+ const explicitPort = backendMode
122
+ ? (() => {
123
+ const parsed = Number.parseInt(String(args.port ?? ""), 10);
124
+ return Number.isInteger(parsed) && parsed > 0 ? parsed : FIXED_LOCAL_ROUTER_PORT;
125
+ })()
126
+ : resolveListenPort({ explicitPort: args.port });
121
127
  const result = await runStartCommand({
122
128
  configPath: args.config || args.configPath || getDefaultConfigPath(),
123
129
  host: FIXED_LOCAL_ROUTER_HOST,
124
- port: resolveListenPort({ explicitPort: args.port }),
130
+ port: explicitPort,
125
131
  watchConfig: parseBoolean(args["watch-config"] ?? args.watchConfig, true),
126
132
  watchBinary: parseBoolean(args["watch-binary"] ?? args.watchBinary, true),
127
133
  requireAuth: parseBoolean(args["require-auth"] ?? args.requireAuth, false),
134
+ backendMode,
135
+ startCommand: backendMode ? "start-runtime" : "start",
128
136
  onStartupConflict: (payload) => promptStartupConflictResolution(payload),
129
137
  cliPathForWatch: process.argv[1],
130
138
  onLine: (line) => console.log(line),
@@ -163,6 +171,7 @@ export async function runCli(argv = process.argv.slice(2), isTTY = undefined, ov
163
171
  const parsed = parseSimpleArgs(argv);
164
172
  const first = parsed.positional[0];
165
173
  const firstIsStart = first === "start";
174
+ const firstIsStartRuntime = first === "start-runtime";
166
175
  const firstIsWeb = first === "web";
167
176
  const firstIsConfig = first === "config";
168
177
  const firstIsSetup = first === "setup";
@@ -188,6 +197,12 @@ export async function runCli(argv = process.argv.slice(2), isTTY = undefined, ov
188
197
  return runStartFastPathImpl(parsedStart.args);
189
198
  }
190
199
 
200
+ if (firstIsStartRuntime && !parsed.wantsHelp) {
201
+ const startArgs = argv.slice(1);
202
+ const parsedStart = parseSimpleArgs(startArgs);
203
+ return runStartFastPathImpl(parsedStart.args, { backendMode: true });
204
+ }
205
+
191
206
  if (firstIsWeb && !parsed.wantsHelp) {
192
207
  const webArgs = argv.slice(1);
193
208
  const parsedWeb = parseSimpleArgs(webArgs);
@@ -970,7 +970,38 @@ export async function patchClaudeCodeEffortLevel({
970
970
  }
971
971
 
972
972
  const FACTORY_DROID_ROUTER_MARKER = "_llmRouterManaged";
973
- const FACTORY_DROID_ROUTER_PROVIDER = "generic-chat-completion-api";
973
+ const FACTORY_DROID_OPENAI_PROVIDER = "openai";
974
+ const FACTORY_DROID_ANTHROPIC_PROVIDER = "anthropic";
975
+ const FACTORY_DROID_ROUTER_PROVIDERS = Object.freeze([
976
+ FACTORY_DROID_OPENAI_PROVIDER,
977
+ FACTORY_DROID_ANTHROPIC_PROVIDER
978
+ ]);
979
+
980
+ function dedupeStrings(values = []) {
981
+ const seen = new Set();
982
+ const out = [];
983
+ for (const value of values) {
984
+ const normalized = String(value || "").trim();
985
+ if (!normalized || seen.has(normalized)) continue;
986
+ seen.add(normalized);
987
+ out.push(normalized);
988
+ }
989
+ return out;
990
+ }
991
+
992
+ function normalizeFactoryDroidFormat(value) {
993
+ const normalized = String(value || "").trim().toLowerCase();
994
+ if (normalized === "openai") return "openai";
995
+ if (normalized === "claude" || normalized === "anthropic") return "claude";
996
+ return "";
997
+ }
998
+
999
+ function mapFactoryDroidFormatToProvider(format) {
1000
+ const normalized = normalizeFactoryDroidFormat(format);
1001
+ if (normalized === "claude") return FACTORY_DROID_ANTHROPIC_PROVIDER;
1002
+ if (normalized === "openai") return FACTORY_DROID_OPENAI_PROVIDER;
1003
+ return "";
1004
+ }
974
1005
 
975
1006
  function normalizeFactoryDroidBindings(bindings = {}) {
976
1007
  const source = bindings && typeof bindings === "object" && !Array.isArray(bindings) ? bindings : {};
@@ -984,9 +1015,119 @@ function normalizeFactoryDroidBindings(bindings = {}) {
984
1015
  };
985
1016
  }
986
1017
 
987
- function buildFactoryDroidBaseUrl(endpointUrl) {
1018
+ function buildFactoryDroidBaseUrl(endpointUrl, provider = FACTORY_DROID_OPENAI_PROVIDER) {
988
1019
  const normalized = normalizeHttpUrl(endpointUrl);
989
- return normalized ? `${normalized}/openai/v1` : "";
1020
+ const resolvedProvider = String(provider || "").trim().toLowerCase() || FACTORY_DROID_OPENAI_PROVIDER;
1021
+ if (!normalized) return "";
1022
+ return resolvedProvider === FACTORY_DROID_ANTHROPIC_PROVIDER
1023
+ ? `${normalized}/anthropic`
1024
+ : `${normalized}/openai/v1`;
1025
+ }
1026
+
1027
+ function inferFactoryDroidFormatFromModelId(modelId) {
1028
+ const normalized = String(modelId || "").trim().toLowerCase();
1029
+ if (!normalized) return "";
1030
+ if (/^(?:claude|opus|sonnet|haiku)(?=[-./\s]|$)/i.test(normalized)) return "claude";
1031
+ if (/^gpt(?=[-./\s]|$)/i.test(normalized)) return "openai";
1032
+ return "";
1033
+ }
1034
+
1035
+ function inferFactoryDroidFormatFromProviderId(providerId) {
1036
+ const normalized = String(providerId || "").trim().toLowerCase();
1037
+ if (!normalized) return "";
1038
+ if (normalized === "anthropic") return "claude";
1039
+ if (normalized === "openai") return "openai";
1040
+ return "";
1041
+ }
1042
+
1043
+ function getFactoryDroidProviderModelFormats(provider, model, modelId = "") {
1044
+ const resolvedModelId = String(modelId || model?.id || "").trim();
1045
+ const preferredFormat = normalizeFactoryDroidFormat(provider?.lastProbe?.modelPreferredFormat?.[resolvedModelId]);
1046
+ if (preferredFormat) return [preferredFormat];
1047
+
1048
+ return dedupeStrings([
1049
+ ...(provider?.lastProbe?.modelSupport?.[resolvedModelId] || []),
1050
+ ...(model?.formats || []),
1051
+ model?.format
1052
+ ])
1053
+ .map(normalizeFactoryDroidFormat)
1054
+ .filter(Boolean);
1055
+ }
1056
+
1057
+ function getFactoryDroidProviderFormats(provider) {
1058
+ return dedupeStrings([
1059
+ ...(provider?.formats || []),
1060
+ provider?.format
1061
+ ])
1062
+ .map(normalizeFactoryDroidFormat)
1063
+ .filter(Boolean);
1064
+ }
1065
+
1066
+ function getFactoryDroidAliasTargetRefs(alias) {
1067
+ const refs = [];
1068
+ const push = (entry) => {
1069
+ const ref = String(
1070
+ typeof entry === "string"
1071
+ ? entry
1072
+ : (entry?.ref || entry?.sourceRef || "")
1073
+ ).trim();
1074
+ if (ref) refs.push(ref);
1075
+ };
1076
+
1077
+ for (const entry of Array.isArray(alias?.targets) ? alias.targets : []) push(entry);
1078
+ for (const entry of Array.isArray(alias?.fallbackTargets) ? alias.fallbackTargets : []) push(entry);
1079
+
1080
+ return refs;
1081
+ }
1082
+
1083
+ function resolveFactoryDroidRouteFormat(modelRef, config = {}, seen = new Set()) {
1084
+ const normalizedModelRef = String(modelRef || "").trim();
1085
+ if (!normalizedModelRef || seen.has(normalizedModelRef)) return "";
1086
+
1087
+ if (normalizedModelRef.includes("/")) {
1088
+ const separatorIndex = normalizedModelRef.indexOf("/");
1089
+ const providerId = normalizedModelRef.slice(0, separatorIndex).trim();
1090
+ const modelId = normalizedModelRef.slice(separatorIndex + 1).trim();
1091
+ const provider = (Array.isArray(config?.providers) ? config.providers : [])
1092
+ .find((entry) => String(entry?.id || "").trim() === providerId);
1093
+ const model = Array.isArray(provider?.models)
1094
+ ? provider.models.find((entry) => String(entry?.id || "").trim() === modelId)
1095
+ : null;
1096
+ return getFactoryDroidProviderModelFormats(provider, model, modelId)[0]
1097
+ || inferFactoryDroidFormatFromModelId(modelId)
1098
+ || getFactoryDroidProviderFormats(provider)[0]
1099
+ || inferFactoryDroidFormatFromProviderId(providerId)
1100
+ || "";
1101
+ }
1102
+
1103
+ seen.add(normalizedModelRef);
1104
+ const aliases = config?.modelAliases && typeof config.modelAliases === "object" && !Array.isArray(config.modelAliases)
1105
+ ? config.modelAliases
1106
+ : {};
1107
+ const alias = aliases[normalizedModelRef];
1108
+ if (!alias || typeof alias !== "object" || Array.isArray(alias)) return "";
1109
+
1110
+ for (const targetRef of getFactoryDroidAliasTargetRefs(alias)) {
1111
+ const resolved = resolveFactoryDroidRouteFormat(targetRef, config, new Set(seen));
1112
+ if (resolved) return resolved;
1113
+ }
1114
+
1115
+ return "";
1116
+ }
1117
+
1118
+ function resolveFactoryDroidCustomModelProvider(modelRef, config = {}) {
1119
+ return mapFactoryDroidFormatToProvider(resolveFactoryDroidRouteFormat(modelRef, config))
1120
+ || FACTORY_DROID_OPENAI_PROVIDER;
1121
+ }
1122
+
1123
+ function resolveFactoryDroidProviderDisplayName(modelRef, config = {}) {
1124
+ const normalizedModelRef = String(modelRef || "").trim();
1125
+ if (!normalizedModelRef.includes("/")) return "";
1126
+ const separatorIndex = normalizedModelRef.indexOf("/");
1127
+ const providerId = normalizedModelRef.slice(0, separatorIndex).trim();
1128
+ const provider = (Array.isArray(config?.providers) ? config.providers : [])
1129
+ .find((entry) => String(entry?.id || "").trim() === providerId);
1130
+ return String(provider?.name || providerId || "").trim();
990
1131
  }
991
1132
 
992
1133
  function collectFactoryDroidAvailableModels(config = {}, bindings = {}) {
@@ -1034,7 +1175,10 @@ function buildFactoryDroidAvailableModelDescriptors(config = {}, bindings = {})
1034
1175
  modelRef,
1035
1176
  kind,
1036
1177
  id: buildFactoryDroidRouterModelId(modelRef, { kind }),
1037
- displayName: buildFactoryDroidRouterDisplayName(modelRef, { kind })
1178
+ displayName: buildFactoryDroidRouterDisplayName(modelRef, {
1179
+ kind,
1180
+ providerName: kind === "model" ? resolveFactoryDroidProviderDisplayName(modelRef, config) : ""
1181
+ })
1038
1182
  };
1039
1183
  })
1040
1184
  .filter((entry) => String(entry.id || "").trim() && String(entry.modelRef || "").trim());
@@ -1156,14 +1300,17 @@ function isFactoryDroidRouterManagedEntry(entry, { baseUrl = "" } = {}) {
1156
1300
  const entryId = String(entry.id || "").trim();
1157
1301
  if (isFactoryDroidRouterModelId(entryId)) return true;
1158
1302
 
1159
- const provider = String(entry.provider || "").trim();
1160
- if (provider !== FACTORY_DROID_ROUTER_PROVIDER) return false;
1303
+ const provider = String(entry.provider || "").trim().toLowerCase();
1304
+ if (!FACTORY_DROID_ROUTER_PROVIDERS.includes(provider)) return false;
1161
1305
 
1162
1306
  const entryBaseUrl = String(entry.baseUrl || "").trim();
1163
1307
  if (baseUrl && entryBaseUrl === String(baseUrl || "").trim()) return true;
1164
1308
 
1165
1309
  const apiKey = String(entry.apiKey || "").trim();
1166
- return apiKey.startsWith("gw_") && entryBaseUrl.includes("/openai/v1");
1310
+ return apiKey.startsWith("gw_") && (
1311
+ entryBaseUrl.includes("/openai/v1")
1312
+ || entryBaseUrl.includes("/anthropic")
1313
+ );
1167
1314
  }
1168
1315
 
1169
1316
  function stripRouterManagedCustomModels(customModels, { baseUrl = "" } = {}) {
@@ -1310,28 +1457,30 @@ export async function readFactoryDroidRoutingState({
1310
1457
  } = {}) {
1311
1458
  const resolvedSettingsPath = path.resolve(String(settingsFilePath || resolveFactoryDroidSettingsFilePath({ homeDir })).trim());
1312
1459
  const resolvedBackupPath = path.resolve(String(backupFilePath || resolveCodingToolBackupFilePath(resolvedSettingsPath)).trim());
1313
- const expectedBaseUrl = buildFactoryDroidBaseUrl(endpointUrl);
1314
1460
  const routeLookup = buildFactoryDroidRouteLookup(config);
1315
1461
  const settingsState = await readJsonObjectFile(resolvedSettingsPath, `Factory Droid settings file '${resolvedSettingsPath}'`);
1316
1462
  const backupState = await readJsonObjectFile(resolvedBackupPath, `Backup file '${resolvedBackupPath}'`);
1317
1463
  const customModels = Array.isArray(settingsState.data?.customModels) ? settingsState.data.customModels : [];
1318
- const routerEntry = getRouterManagedCustomModel(customModels);
1319
- const configuredBaseUrl = routerEntry ? String(routerEntry.baseUrl || "").trim() : "";
1320
- const configuredProvider = routerEntry ? String(routerEntry.provider || "").trim() : "";
1321
- const routedViaRouter = Boolean(
1322
- expectedBaseUrl
1323
- && routerEntry
1324
- && configuredBaseUrl === expectedBaseUrl
1325
- );
1326
-
1327
1464
  const resolvedDefaultModelValue = getNestedObjectValue(settingsState.data, ["sessionDefaultSettings", "model"])
1328
1465
  || settingsState.data?.model
1329
- || routerEntry?.id
1330
- || routerEntry?.model
1331
1466
  || "";
1332
1467
  const resolvedMissionOrchestratorValue = settingsState.data?.missionOrchestratorModel || "";
1333
1468
  const resolvedMissionWorkerValue = getNestedObjectValue(settingsState.data, ["missionModelSettings", "workerModel"]) || "";
1334
1469
  const resolvedMissionValidatorValue = getNestedObjectValue(settingsState.data, ["missionModelSettings", "validationWorkerModel"]) || "";
1470
+ const routerEntry = getFactoryDroidCustomModelEntryByValue(customModels, resolvedDefaultModelValue, { preferRouterManaged: true })
1471
+ || getRouterManagedCustomModel(customModels);
1472
+ const configuredBaseUrl = routerEntry ? String(routerEntry.baseUrl || "").trim() : "";
1473
+ const configuredProvider = routerEntry ? String(routerEntry.provider || "").trim() : "";
1474
+ const expectedBaseUrls = new Set(
1475
+ FACTORY_DROID_ROUTER_PROVIDERS
1476
+ .map((provider) => buildFactoryDroidBaseUrl(endpointUrl, provider))
1477
+ .filter(Boolean)
1478
+ );
1479
+ const routedViaRouter = Boolean(
1480
+ configuredBaseUrl
1481
+ && routerEntry
1482
+ && expectedBaseUrls.has(configuredBaseUrl)
1483
+ );
1335
1484
 
1336
1485
  return {
1337
1486
  tool: "factory-droid",
@@ -1371,7 +1520,7 @@ export async function patchFactoryDroidSettingsFile({
1371
1520
  } = {}) {
1372
1521
  const resolvedSettingsPath = path.resolve(String(settingsFilePath || resolveFactoryDroidSettingsFilePath({ homeDir })).trim());
1373
1522
  const resolvedBackupPath = path.resolve(String(backupFilePath || resolveCodingToolBackupFilePath(resolvedSettingsPath)).trim());
1374
- const baseUrl = buildFactoryDroidBaseUrl(endpointUrl);
1523
+ const baseUrl = buildFactoryDroidBaseUrl(endpointUrl, FACTORY_DROID_OPENAI_PROVIDER);
1375
1524
  const normalizedApiKey = String(apiKey || "").trim();
1376
1525
  const normalizedBindings = normalizeFactoryDroidBindings(bindings);
1377
1526
  const routeLookup = buildFactoryDroidRouteLookup(config);
@@ -1411,15 +1560,16 @@ export async function patchFactoryDroidSettingsFile({
1411
1560
  ? availableModels.map((descriptor, index) => {
1412
1561
  const entryIndex = routerEntryStartIndex + index;
1413
1562
  const modelId = buildFactoryDroidCustomModelId(descriptor.modelRef, entryIndex);
1563
+ const provider = resolveFactoryDroidCustomModelProvider(descriptor.modelRef, config);
1414
1564
  return {
1415
1565
  [FACTORY_DROID_ROUTER_MARKER]: true,
1416
1566
  model: descriptor.modelRef,
1417
1567
  id: modelId,
1418
1568
  index: entryIndex,
1419
1569
  displayName: descriptor.displayName,
1420
- baseUrl,
1570
+ baseUrl: buildFactoryDroidBaseUrl(endpointUrl, provider),
1421
1571
  apiKey: normalizedApiKey,
1422
- provider: FACTORY_DROID_ROUTER_PROVIDER
1572
+ provider
1423
1573
  };
1424
1574
  })
1425
1575
  : [{
@@ -1430,7 +1580,7 @@ export async function patchFactoryDroidSettingsFile({
1430
1580
  displayName: buildFactoryDroidRouterDisplayName("llm-router", { kind: "alias" }),
1431
1581
  baseUrl,
1432
1582
  apiKey: normalizedApiKey,
1433
- provider: FACTORY_DROID_ROUTER_PROVIDER
1583
+ provider: FACTORY_DROID_OPENAI_PROVIDER
1434
1584
  }];
1435
1585
 
1436
1586
  customModels.push(...routerEntries);
@@ -1483,12 +1633,16 @@ export async function patchFactoryDroidSettingsFile({
1483
1633
  }
1484
1634
 
1485
1635
  await writeJsonObjectFile(resolvedSettingsPath, nextSettings);
1636
+ const primaryEntry = resolvedBindings.defaultModel
1637
+ ? getFactoryDroidCustomModelEntryByValue(allCustomModels, resolvedBindings.defaultModel, { preferRouterManaged: true })
1638
+ : null;
1639
+ const configuredEntry = primaryEntry || getRouterManagedCustomModel(allCustomModels);
1486
1640
  return {
1487
1641
  settingsFilePath: resolvedSettingsPath,
1488
1642
  backupFilePath: resolvedBackupPath,
1489
1643
  settingsCreated: !settingsState.existed,
1490
- baseUrl,
1491
- configuredProvider: FACTORY_DROID_ROUTER_PROVIDER,
1644
+ baseUrl: String(configuredEntry?.baseUrl || baseUrl).trim(),
1645
+ configuredProvider: String(configuredEntry?.provider || FACTORY_DROID_OPENAI_PROVIDER).trim(),
1492
1646
  bindings: resolvedBindings,
1493
1647
  bindingIds: normalizeFactoryDroidBindings({
1494
1648
  defaultModel: normalizedBindings.defaultModel
@@ -1,5 +1,5 @@
1
1
  /**
2
- * Local config persistence for ~/.llm-router.json.
2
+ * Local config persistence for the default and development config files.
3
3
  */
4
4
 
5
5
  import os from "node:os";
@@ -14,11 +14,16 @@ import {
14
14
  import { sanitizePersistedLocalServerConfig } from "./local-server-settings.js";
15
15
 
16
16
  export const DEFAULT_CONFIG_FILENAME = ".llm-router.json";
17
+ export const DEFAULT_DEV_CONFIG_FILENAME = ".llm-router-dev.json";
17
18
 
18
19
  export function getDefaultConfigPath() {
19
20
  return path.join(os.homedir(), DEFAULT_CONFIG_FILENAME);
20
21
  }
21
22
 
23
+ export function getDefaultDevConfigPath() {
24
+ return path.join(os.homedir(), DEFAULT_DEV_CONFIG_FILENAME);
25
+ }
26
+
22
27
  function normalizePersistedConfig(config, normalizeOptions = undefined) {
23
28
  return sanitizePersistedLocalServerConfig(
24
29
  normalizeRuntimeConfig(config, normalizeOptions)
@@ -6,6 +6,7 @@ import { FIXED_LOCAL_ROUTER_HOST, FIXED_LOCAL_ROUTER_PORT } from "./local-server
6
6
 
7
7
  const DEFAULT_INSTANCE_STATE_FILENAME = ".llm-router.runtime.json";
8
8
  const MAX_START_OUTPUT_CHARS = 4000;
9
+ export const RUNTIME_STATE_PATH_ENV = "LLM_ROUTER_RUNTIME_STATE_PATH";
9
10
 
10
11
  function sleep(ms) {
11
12
  return new Promise((resolve) => setTimeout(resolve, ms));
@@ -80,7 +81,9 @@ function runtimeMatchesStartOptions(runtime, {
80
81
  && normalized.requireAuth === normalizeBoolean(requireAuth, false);
81
82
  }
82
83
 
83
- export function getRuntimeStatePath() {
84
+ export function getRuntimeStatePath({ env = process.env } = {}) {
85
+ const override = String(env?.[RUNTIME_STATE_PATH_ENV] || "").trim();
86
+ if (override) return path.resolve(override);
84
87
  return path.join(os.homedir(), DEFAULT_INSTANCE_STATE_FILENAME);
85
88
  }
86
89
 
@@ -292,8 +292,36 @@ export async function startLocalRouteServer({
292
292
  });
293
293
 
294
294
  const fallbackHost = formatHostForUrl(host, port);
295
+ let shuttingDown = false;
296
+ const socketRequestCounts = new Map();
297
+
298
+ function closeSocketIfIdle(socket) {
299
+ if (!socket || socket.destroyed) return;
300
+ if (Number(socketRequestCounts.get(socket) || 0) > 0) return;
301
+ socket.end();
302
+ }
295
303
 
296
304
  const server = http.createServer(async (req, res) => {
305
+ const socket = req.socket;
306
+ socketRequestCounts.set(socket, Number(socketRequestCounts.get(socket) || 0) + 1);
307
+ let finalized = false;
308
+ const finalizeRequest = () => {
309
+ if (finalized) return;
310
+ finalized = true;
311
+ const remaining = Math.max(0, Number(socketRequestCounts.get(socket) || 0) - 1);
312
+ if (remaining > 0) {
313
+ socketRequestCounts.set(socket, remaining);
314
+ return;
315
+ }
316
+ socketRequestCounts.set(socket, 0);
317
+ if (shuttingDown) {
318
+ closeSocketIfIdle(socket);
319
+ }
320
+ };
321
+
322
+ res.once("finish", finalizeRequest);
323
+ res.once("close", finalizeRequest);
324
+
297
325
  try {
298
326
  const request = nodeRequestToFetchRequest(req, fallbackHost);
299
327
  const response = await fetchHandler(request, {}, undefined);
@@ -308,6 +336,13 @@ export async function startLocalRouteServer({
308
336
  }
309
337
  });
310
338
 
339
+ server.on("connection", (socket) => {
340
+ socketRequestCounts.set(socket, Number(socketRequestCounts.get(socket) || 0));
341
+ socket.on("close", () => {
342
+ socketRequestCounts.delete(socket);
343
+ });
344
+ });
345
+
311
346
  await new Promise((resolve, reject) => {
312
347
  server.once("error", reject);
313
348
  server.listen(port, host, () => {
@@ -318,10 +353,15 @@ export async function startLocalRouteServer({
318
353
 
319
354
  const originalClose = server.close.bind(server);
320
355
  server.close = (callback) => {
356
+ shuttingDown = true;
321
357
  Promise.resolve()
322
358
  .then(() => configStore.close())
323
359
  .then(() => (typeof fetchHandler.close === "function" ? fetchHandler.close() : undefined))
324
360
  .finally(() => {
361
+ server.closeIdleConnections?.();
362
+ for (const socket of socketRequestCounts.keys()) {
363
+ closeSocketIfIdle(socket);
364
+ }
325
365
  originalClose(callback);
326
366
  });
327
367
  return server;