@khanglvm/llm-router 2.3.6 → 2.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -7,6 +7,25 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
7
7
 
8
8
  ## [Unreleased]
9
9
 
10
+ ## [2.4.0] - 2026-04-18
11
+
12
+ ### Added
13
+ - Claude Code can now store a router-managed `claudeCode.webSearchProvider` selection, with validation plus Web UI controls for choosing either built-in web search providers or hosted search routes.
14
+ - Native Claude web search and page-fetch tool calls can now be intercepted locally for non-AMP clients, so Claude-compatible traffic can use the router-managed shared web search stack instead of falling back to upstream-native tools.
15
+ - `yarn dev` now exposes a one-click "Sync production config" action that clones the current production config into the dedicated dev config while preserving the dev router's local server settings.
16
+
17
+ ### Fixed
18
+ - `yarn dev` now launches the detached router backend with the runtime start path, so the dedicated dev router port is honored instead of conflicting with the fixed production port.
19
+ - Dev-mode router reclaim now leaves startup-managed production instances alone when reclaiming non-fixed dev ports.
20
+ - The dev terminal and web console now clearly mark when you are operating in the isolated development sandbox.
21
+
22
+ ## [2.3.7] - 2026-04-18
23
+
24
+ ### Fixed
25
+ - Local `llr start` now keeps a fixed-port supervisor in front of the router runtime so CLI and tool traffic can survive backend restarts and upgrades without losing the public router endpoint.
26
+ - `llr update` now upgrades the installed package before asking the live backend to reload, keeping the running router online during the install step and draining in-flight requests before the new version takes over.
27
+ - Requests that arrive during a backend restart window are now deferred and automatically retried through the supervisor instead of failing immediately when the backend socket is briefly unavailable.
28
+
10
29
  ## [2.3.6] - 2026-04-18
11
30
 
12
31
  ### Fixed
package/README.md CHANGED
@@ -29,10 +29,27 @@ llr ai-help # agent-oriented setup brief
29
29
  - **Model aliases with routing** — group models into stable alias names with weighted round-robin, quota-aware balancing, and automatic fallback
30
30
  - **Rate limiting** — set request caps per model or across all models over configurable time windows
31
31
  - **Coding tool routing** — one-click routing config for Codex CLI, Claude Code, Factory Droid, and AMP
32
+ - **Dev sandbox** — `yarn dev` runs the console against a dedicated dev config/router port, highlights dev mode in terminal + UI, and can clone the production config into the sandbox for quick iteration
33
+ - **Claude native web tools** — local handling for Claude web search and page fetch requests, with selectable Claude Code web-search providers from the shared Web Search config
34
+ - **Seamless local updates** — `llr update` keeps the fixed local router endpoint online, drains in-flight requests, and automatically retries through backend restart windows
32
35
  - **Web search** — built-in web search for AMP and other router-managed tools
33
36
  - **Deployable** — run locally or deploy to Cloudflare Workers
34
37
  - **AI-agent friendly** — full CLI parity with `llr config --operation=...` so agents can configure everything programmatically
35
38
 
39
+ ## Local Runtime Reliability
40
+
41
+ `llr start` keeps a small supervisor bound to the fixed local router port and runs the real router backend behind it on an internal loopback port.
42
+
43
+ That means `llr update` can install a new package version and gracefully swap the backend without breaking active CLI or tool requests. Requests that arrive during the short backend handoff are deferred and retried automatically instead of failing immediately. The Web UI may reconnect during that window, but router-managed API traffic keeps the same public local endpoint.
44
+
45
+ ## Development Sandbox
46
+
47
+ ```bash
48
+ yarn dev
49
+ ```
50
+
51
+ Development mode uses the dedicated `~/.llm-router-dev.json` config and its own local router port so it can run alongside a startup-managed or manually started production router. The terminal and Web UI both show a dev-mode indicator, and the dev Web UI includes a one-click sync action to copy the current production config into the sandbox without changing the dev router binding.
52
+
36
53
  ## Web UI
37
54
 
38
55
  ### Alias & Fallback
@@ -59,6 +76,8 @@ Route Claude Code through the gateway with per-tier model bindings.
59
76
 
60
77
  ![Claude Code Routing](./assets/screenshots/web-ui-claude-code.png)
61
78
 
79
+ Claude Code can also select a shared Web Search provider or hosted search route from the router config. When Claude-compatible traffic uses native web-search or page-fetch tools, LLM Router can satisfy those calls through the selected shared web-search provider instead of relying on upstream-native web tooling.
80
+
62
81
  ### Factory Droid
63
82
 
64
83
  Route Factory Droid through the gateway via a managed custom model entry with reasoning effort control.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@khanglvm/llm-router",
3
- "version": "2.3.6",
3
+ "version": "2.4.0",
4
4
  "description": "LLM Router: single gateway endpoint for multi-provider LLMs with unified OpenAI+Anthropic format and seamless fallback",
5
5
  "keywords": [
6
6
  "llm-router",
@@ -6694,7 +6694,7 @@ async function doSetFactoryDroidRouting(context) {
6694
6694
  ["Settings File", patchResult.settingsFilePath],
6695
6695
  ["Backup File", patchResult.backupFilePath],
6696
6696
  ["Base URL", patchResult.baseUrl],
6697
- ["Provider", patchResult.configuredProvider || "generic-chat-completion-api"],
6697
+ ["Provider", patchResult.configuredProvider || "(not set)"],
6698
6698
  ["Default Model", patchResult.bindings?.defaultModel || "(not set)"],
6699
6699
  ["Mission Orchestrator", patchResult.bindings?.missionOrchestratorModel || "(not set)"],
6700
6700
  ["Mission Worker", patchResult.bindings?.missionWorkerModel || "(not set)"],
package/src/cli-entry.js CHANGED
@@ -117,14 +117,22 @@ async function promptStartupConflictResolution({ port }) {
117
117
  }
118
118
  }
119
119
 
120
- async function runStartFastPath(args) {
120
+ async function runStartFastPath(args, { backendMode = false } = {}) {
121
+ const explicitPort = backendMode
122
+ ? (() => {
123
+ const parsed = Number.parseInt(String(args.port ?? ""), 10);
124
+ return Number.isInteger(parsed) && parsed > 0 ? parsed : FIXED_LOCAL_ROUTER_PORT;
125
+ })()
126
+ : resolveListenPort({ explicitPort: args.port });
121
127
  const result = await runStartCommand({
122
128
  configPath: args.config || args.configPath || getDefaultConfigPath(),
123
129
  host: FIXED_LOCAL_ROUTER_HOST,
124
- port: resolveListenPort({ explicitPort: args.port }),
130
+ port: explicitPort,
125
131
  watchConfig: parseBoolean(args["watch-config"] ?? args.watchConfig, true),
126
132
  watchBinary: parseBoolean(args["watch-binary"] ?? args.watchBinary, true),
127
133
  requireAuth: parseBoolean(args["require-auth"] ?? args.requireAuth, false),
134
+ backendMode,
135
+ startCommand: backendMode ? "start-runtime" : "start",
128
136
  onStartupConflict: (payload) => promptStartupConflictResolution(payload),
129
137
  cliPathForWatch: process.argv[1],
130
138
  onLine: (line) => console.log(line),
@@ -163,6 +171,7 @@ export async function runCli(argv = process.argv.slice(2), isTTY = undefined, ov
163
171
  const parsed = parseSimpleArgs(argv);
164
172
  const first = parsed.positional[0];
165
173
  const firstIsStart = first === "start";
174
+ const firstIsStartRuntime = first === "start-runtime";
166
175
  const firstIsWeb = first === "web";
167
176
  const firstIsConfig = first === "config";
168
177
  const firstIsSetup = first === "setup";
@@ -188,6 +197,12 @@ export async function runCli(argv = process.argv.slice(2), isTTY = undefined, ov
188
197
  return runStartFastPathImpl(parsedStart.args);
189
198
  }
190
199
 
200
+ if (firstIsStartRuntime && !parsed.wantsHelp) {
201
+ const startArgs = argv.slice(1);
202
+ const parsedStart = parseSimpleArgs(startArgs);
203
+ return runStartFastPathImpl(parsedStart.args, { backendMode: true });
204
+ }
205
+
191
206
  if (firstIsWeb && !parsed.wantsHelp) {
192
207
  const webArgs = argv.slice(1);
193
208
  const parsedWeb = parseSimpleArgs(webArgs);
@@ -970,7 +970,38 @@ export async function patchClaudeCodeEffortLevel({
970
970
  }
971
971
 
972
972
  const FACTORY_DROID_ROUTER_MARKER = "_llmRouterManaged";
973
- const FACTORY_DROID_ROUTER_PROVIDER = "generic-chat-completion-api";
973
+ const FACTORY_DROID_OPENAI_PROVIDER = "openai";
974
+ const FACTORY_DROID_ANTHROPIC_PROVIDER = "anthropic";
975
+ const FACTORY_DROID_ROUTER_PROVIDERS = Object.freeze([
976
+ FACTORY_DROID_OPENAI_PROVIDER,
977
+ FACTORY_DROID_ANTHROPIC_PROVIDER
978
+ ]);
979
+
980
+ function dedupeStrings(values = []) {
981
+ const seen = new Set();
982
+ const out = [];
983
+ for (const value of values) {
984
+ const normalized = String(value || "").trim();
985
+ if (!normalized || seen.has(normalized)) continue;
986
+ seen.add(normalized);
987
+ out.push(normalized);
988
+ }
989
+ return out;
990
+ }
991
+
992
+ function normalizeFactoryDroidFormat(value) {
993
+ const normalized = String(value || "").trim().toLowerCase();
994
+ if (normalized === "openai") return "openai";
995
+ if (normalized === "claude" || normalized === "anthropic") return "claude";
996
+ return "";
997
+ }
998
+
999
+ function mapFactoryDroidFormatToProvider(format) {
1000
+ const normalized = normalizeFactoryDroidFormat(format);
1001
+ if (normalized === "claude") return FACTORY_DROID_ANTHROPIC_PROVIDER;
1002
+ if (normalized === "openai") return FACTORY_DROID_OPENAI_PROVIDER;
1003
+ return "";
1004
+ }
974
1005
 
975
1006
  function normalizeFactoryDroidBindings(bindings = {}) {
976
1007
  const source = bindings && typeof bindings === "object" && !Array.isArray(bindings) ? bindings : {};
@@ -984,9 +1015,119 @@ function normalizeFactoryDroidBindings(bindings = {}) {
984
1015
  };
985
1016
  }
986
1017
 
987
- function buildFactoryDroidBaseUrl(endpointUrl) {
1018
+ function buildFactoryDroidBaseUrl(endpointUrl, provider = FACTORY_DROID_OPENAI_PROVIDER) {
988
1019
  const normalized = normalizeHttpUrl(endpointUrl);
989
- return normalized ? `${normalized}/openai/v1` : "";
1020
+ const resolvedProvider = String(provider || "").trim().toLowerCase() || FACTORY_DROID_OPENAI_PROVIDER;
1021
+ if (!normalized) return "";
1022
+ return resolvedProvider === FACTORY_DROID_ANTHROPIC_PROVIDER
1023
+ ? `${normalized}/anthropic`
1024
+ : `${normalized}/openai/v1`;
1025
+ }
1026
+
1027
+ function inferFactoryDroidFormatFromModelId(modelId) {
1028
+ const normalized = String(modelId || "").trim().toLowerCase();
1029
+ if (!normalized) return "";
1030
+ if (/^(?:claude|opus|sonnet|haiku)(?=[-./\s]|$)/i.test(normalized)) return "claude";
1031
+ if (/^gpt(?=[-./\s]|$)/i.test(normalized)) return "openai";
1032
+ return "";
1033
+ }
1034
+
1035
+ function inferFactoryDroidFormatFromProviderId(providerId) {
1036
+ const normalized = String(providerId || "").trim().toLowerCase();
1037
+ if (!normalized) return "";
1038
+ if (normalized === "anthropic") return "claude";
1039
+ if (normalized === "openai") return "openai";
1040
+ return "";
1041
+ }
1042
+
1043
+ function getFactoryDroidProviderModelFormats(provider, model, modelId = "") {
1044
+ const resolvedModelId = String(modelId || model?.id || "").trim();
1045
+ const preferredFormat = normalizeFactoryDroidFormat(provider?.lastProbe?.modelPreferredFormat?.[resolvedModelId]);
1046
+ if (preferredFormat) return [preferredFormat];
1047
+
1048
+ return dedupeStrings([
1049
+ ...(provider?.lastProbe?.modelSupport?.[resolvedModelId] || []),
1050
+ ...(model?.formats || []),
1051
+ model?.format
1052
+ ])
1053
+ .map(normalizeFactoryDroidFormat)
1054
+ .filter(Boolean);
1055
+ }
1056
+
1057
+ function getFactoryDroidProviderFormats(provider) {
1058
+ return dedupeStrings([
1059
+ ...(provider?.formats || []),
1060
+ provider?.format
1061
+ ])
1062
+ .map(normalizeFactoryDroidFormat)
1063
+ .filter(Boolean);
1064
+ }
1065
+
1066
+ function getFactoryDroidAliasTargetRefs(alias) {
1067
+ const refs = [];
1068
+ const push = (entry) => {
1069
+ const ref = String(
1070
+ typeof entry === "string"
1071
+ ? entry
1072
+ : (entry?.ref || entry?.sourceRef || "")
1073
+ ).trim();
1074
+ if (ref) refs.push(ref);
1075
+ };
1076
+
1077
+ for (const entry of Array.isArray(alias?.targets) ? alias.targets : []) push(entry);
1078
+ for (const entry of Array.isArray(alias?.fallbackTargets) ? alias.fallbackTargets : []) push(entry);
1079
+
1080
+ return refs;
1081
+ }
1082
+
1083
+ function resolveFactoryDroidRouteFormat(modelRef, config = {}, seen = new Set()) {
1084
+ const normalizedModelRef = String(modelRef || "").trim();
1085
+ if (!normalizedModelRef || seen.has(normalizedModelRef)) return "";
1086
+
1087
+ if (normalizedModelRef.includes("/")) {
1088
+ const separatorIndex = normalizedModelRef.indexOf("/");
1089
+ const providerId = normalizedModelRef.slice(0, separatorIndex).trim();
1090
+ const modelId = normalizedModelRef.slice(separatorIndex + 1).trim();
1091
+ const provider = (Array.isArray(config?.providers) ? config.providers : [])
1092
+ .find((entry) => String(entry?.id || "").trim() === providerId);
1093
+ const model = Array.isArray(provider?.models)
1094
+ ? provider.models.find((entry) => String(entry?.id || "").trim() === modelId)
1095
+ : null;
1096
+ return getFactoryDroidProviderModelFormats(provider, model, modelId)[0]
1097
+ || inferFactoryDroidFormatFromModelId(modelId)
1098
+ || getFactoryDroidProviderFormats(provider)[0]
1099
+ || inferFactoryDroidFormatFromProviderId(providerId)
1100
+ || "";
1101
+ }
1102
+
1103
+ seen.add(normalizedModelRef);
1104
+ const aliases = config?.modelAliases && typeof config.modelAliases === "object" && !Array.isArray(config.modelAliases)
1105
+ ? config.modelAliases
1106
+ : {};
1107
+ const alias = aliases[normalizedModelRef];
1108
+ if (!alias || typeof alias !== "object" || Array.isArray(alias)) return "";
1109
+
1110
+ for (const targetRef of getFactoryDroidAliasTargetRefs(alias)) {
1111
+ const resolved = resolveFactoryDroidRouteFormat(targetRef, config, new Set(seen));
1112
+ if (resolved) return resolved;
1113
+ }
1114
+
1115
+ return "";
1116
+ }
1117
+
1118
+ function resolveFactoryDroidCustomModelProvider(modelRef, config = {}) {
1119
+ return mapFactoryDroidFormatToProvider(resolveFactoryDroidRouteFormat(modelRef, config))
1120
+ || FACTORY_DROID_OPENAI_PROVIDER;
1121
+ }
1122
+
1123
+ function resolveFactoryDroidProviderDisplayName(modelRef, config = {}) {
1124
+ const normalizedModelRef = String(modelRef || "").trim();
1125
+ if (!normalizedModelRef.includes("/")) return "";
1126
+ const separatorIndex = normalizedModelRef.indexOf("/");
1127
+ const providerId = normalizedModelRef.slice(0, separatorIndex).trim();
1128
+ const provider = (Array.isArray(config?.providers) ? config.providers : [])
1129
+ .find((entry) => String(entry?.id || "").trim() === providerId);
1130
+ return String(provider?.name || providerId || "").trim();
990
1131
  }
991
1132
 
992
1133
  function collectFactoryDroidAvailableModels(config = {}, bindings = {}) {
@@ -1034,7 +1175,10 @@ function buildFactoryDroidAvailableModelDescriptors(config = {}, bindings = {})
1034
1175
  modelRef,
1035
1176
  kind,
1036
1177
  id: buildFactoryDroidRouterModelId(modelRef, { kind }),
1037
- displayName: buildFactoryDroidRouterDisplayName(modelRef, { kind })
1178
+ displayName: buildFactoryDroidRouterDisplayName(modelRef, {
1179
+ kind,
1180
+ providerName: kind === "model" ? resolveFactoryDroidProviderDisplayName(modelRef, config) : ""
1181
+ })
1038
1182
  };
1039
1183
  })
1040
1184
  .filter((entry) => String(entry.id || "").trim() && String(entry.modelRef || "").trim());
@@ -1156,14 +1300,17 @@ function isFactoryDroidRouterManagedEntry(entry, { baseUrl = "" } = {}) {
1156
1300
  const entryId = String(entry.id || "").trim();
1157
1301
  if (isFactoryDroidRouterModelId(entryId)) return true;
1158
1302
 
1159
- const provider = String(entry.provider || "").trim();
1160
- if (provider !== FACTORY_DROID_ROUTER_PROVIDER) return false;
1303
+ const provider = String(entry.provider || "").trim().toLowerCase();
1304
+ if (!FACTORY_DROID_ROUTER_PROVIDERS.includes(provider)) return false;
1161
1305
 
1162
1306
  const entryBaseUrl = String(entry.baseUrl || "").trim();
1163
1307
  if (baseUrl && entryBaseUrl === String(baseUrl || "").trim()) return true;
1164
1308
 
1165
1309
  const apiKey = String(entry.apiKey || "").trim();
1166
- return apiKey.startsWith("gw_") && entryBaseUrl.includes("/openai/v1");
1310
+ return apiKey.startsWith("gw_") && (
1311
+ entryBaseUrl.includes("/openai/v1")
1312
+ || entryBaseUrl.includes("/anthropic")
1313
+ );
1167
1314
  }
1168
1315
 
1169
1316
  function stripRouterManagedCustomModels(customModels, { baseUrl = "" } = {}) {
@@ -1310,28 +1457,30 @@ export async function readFactoryDroidRoutingState({
1310
1457
  } = {}) {
1311
1458
  const resolvedSettingsPath = path.resolve(String(settingsFilePath || resolveFactoryDroidSettingsFilePath({ homeDir })).trim());
1312
1459
  const resolvedBackupPath = path.resolve(String(backupFilePath || resolveCodingToolBackupFilePath(resolvedSettingsPath)).trim());
1313
- const expectedBaseUrl = buildFactoryDroidBaseUrl(endpointUrl);
1314
1460
  const routeLookup = buildFactoryDroidRouteLookup(config);
1315
1461
  const settingsState = await readJsonObjectFile(resolvedSettingsPath, `Factory Droid settings file '${resolvedSettingsPath}'`);
1316
1462
  const backupState = await readJsonObjectFile(resolvedBackupPath, `Backup file '${resolvedBackupPath}'`);
1317
1463
  const customModels = Array.isArray(settingsState.data?.customModels) ? settingsState.data.customModels : [];
1318
- const routerEntry = getRouterManagedCustomModel(customModels);
1319
- const configuredBaseUrl = routerEntry ? String(routerEntry.baseUrl || "").trim() : "";
1320
- const configuredProvider = routerEntry ? String(routerEntry.provider || "").trim() : "";
1321
- const routedViaRouter = Boolean(
1322
- expectedBaseUrl
1323
- && routerEntry
1324
- && configuredBaseUrl === expectedBaseUrl
1325
- );
1326
-
1327
1464
  const resolvedDefaultModelValue = getNestedObjectValue(settingsState.data, ["sessionDefaultSettings", "model"])
1328
1465
  || settingsState.data?.model
1329
- || routerEntry?.id
1330
- || routerEntry?.model
1331
1466
  || "";
1332
1467
  const resolvedMissionOrchestratorValue = settingsState.data?.missionOrchestratorModel || "";
1333
1468
  const resolvedMissionWorkerValue = getNestedObjectValue(settingsState.data, ["missionModelSettings", "workerModel"]) || "";
1334
1469
  const resolvedMissionValidatorValue = getNestedObjectValue(settingsState.data, ["missionModelSettings", "validationWorkerModel"]) || "";
1470
+ const routerEntry = getFactoryDroidCustomModelEntryByValue(customModels, resolvedDefaultModelValue, { preferRouterManaged: true })
1471
+ || getRouterManagedCustomModel(customModels);
1472
+ const configuredBaseUrl = routerEntry ? String(routerEntry.baseUrl || "").trim() : "";
1473
+ const configuredProvider = routerEntry ? String(routerEntry.provider || "").trim() : "";
1474
+ const expectedBaseUrls = new Set(
1475
+ FACTORY_DROID_ROUTER_PROVIDERS
1476
+ .map((provider) => buildFactoryDroidBaseUrl(endpointUrl, provider))
1477
+ .filter(Boolean)
1478
+ );
1479
+ const routedViaRouter = Boolean(
1480
+ configuredBaseUrl
1481
+ && routerEntry
1482
+ && expectedBaseUrls.has(configuredBaseUrl)
1483
+ );
1335
1484
 
1336
1485
  return {
1337
1486
  tool: "factory-droid",
@@ -1371,7 +1520,7 @@ export async function patchFactoryDroidSettingsFile({
1371
1520
  } = {}) {
1372
1521
  const resolvedSettingsPath = path.resolve(String(settingsFilePath || resolveFactoryDroidSettingsFilePath({ homeDir })).trim());
1373
1522
  const resolvedBackupPath = path.resolve(String(backupFilePath || resolveCodingToolBackupFilePath(resolvedSettingsPath)).trim());
1374
- const baseUrl = buildFactoryDroidBaseUrl(endpointUrl);
1523
+ const baseUrl = buildFactoryDroidBaseUrl(endpointUrl, FACTORY_DROID_OPENAI_PROVIDER);
1375
1524
  const normalizedApiKey = String(apiKey || "").trim();
1376
1525
  const normalizedBindings = normalizeFactoryDroidBindings(bindings);
1377
1526
  const routeLookup = buildFactoryDroidRouteLookup(config);
@@ -1411,15 +1560,16 @@ export async function patchFactoryDroidSettingsFile({
1411
1560
  ? availableModels.map((descriptor, index) => {
1412
1561
  const entryIndex = routerEntryStartIndex + index;
1413
1562
  const modelId = buildFactoryDroidCustomModelId(descriptor.modelRef, entryIndex);
1563
+ const provider = resolveFactoryDroidCustomModelProvider(descriptor.modelRef, config);
1414
1564
  return {
1415
1565
  [FACTORY_DROID_ROUTER_MARKER]: true,
1416
1566
  model: descriptor.modelRef,
1417
1567
  id: modelId,
1418
1568
  index: entryIndex,
1419
1569
  displayName: descriptor.displayName,
1420
- baseUrl,
1570
+ baseUrl: buildFactoryDroidBaseUrl(endpointUrl, provider),
1421
1571
  apiKey: normalizedApiKey,
1422
- provider: FACTORY_DROID_ROUTER_PROVIDER
1572
+ provider
1423
1573
  };
1424
1574
  })
1425
1575
  : [{
@@ -1430,7 +1580,7 @@ export async function patchFactoryDroidSettingsFile({
1430
1580
  displayName: buildFactoryDroidRouterDisplayName("llm-router", { kind: "alias" }),
1431
1581
  baseUrl,
1432
1582
  apiKey: normalizedApiKey,
1433
- provider: FACTORY_DROID_ROUTER_PROVIDER
1583
+ provider: FACTORY_DROID_OPENAI_PROVIDER
1434
1584
  }];
1435
1585
 
1436
1586
  customModels.push(...routerEntries);
@@ -1483,12 +1633,16 @@ export async function patchFactoryDroidSettingsFile({
1483
1633
  }
1484
1634
 
1485
1635
  await writeJsonObjectFile(resolvedSettingsPath, nextSettings);
1636
+ const primaryEntry = resolvedBindings.defaultModel
1637
+ ? getFactoryDroidCustomModelEntryByValue(allCustomModels, resolvedBindings.defaultModel, { preferRouterManaged: true })
1638
+ : null;
1639
+ const configuredEntry = primaryEntry || getRouterManagedCustomModel(allCustomModels);
1486
1640
  return {
1487
1641
  settingsFilePath: resolvedSettingsPath,
1488
1642
  backupFilePath: resolvedBackupPath,
1489
1643
  settingsCreated: !settingsState.existed,
1490
- baseUrl,
1491
- configuredProvider: FACTORY_DROID_ROUTER_PROVIDER,
1644
+ baseUrl: String(configuredEntry?.baseUrl || baseUrl).trim(),
1645
+ configuredProvider: String(configuredEntry?.provider || FACTORY_DROID_OPENAI_PROVIDER).trim(),
1492
1646
  bindings: resolvedBindings,
1493
1647
  bindingIds: normalizeFactoryDroidBindings({
1494
1648
  defaultModel: normalizedBindings.defaultModel
@@ -1,5 +1,5 @@
1
1
  /**
2
- * Local config persistence for ~/.llm-router.json.
2
+ * Local config persistence for the default and development config files.
3
3
  */
4
4
 
5
5
  import os from "node:os";
@@ -14,11 +14,16 @@ import {
14
14
  import { sanitizePersistedLocalServerConfig } from "./local-server-settings.js";
15
15
 
16
16
  export const DEFAULT_CONFIG_FILENAME = ".llm-router.json";
17
+ export const DEFAULT_DEV_CONFIG_FILENAME = ".llm-router-dev.json";
17
18
 
18
19
  export function getDefaultConfigPath() {
19
20
  return path.join(os.homedir(), DEFAULT_CONFIG_FILENAME);
20
21
  }
21
22
 
23
+ export function getDefaultDevConfigPath() {
24
+ return path.join(os.homedir(), DEFAULT_DEV_CONFIG_FILENAME);
25
+ }
26
+
22
27
  function normalizePersistedConfig(config, normalizeOptions = undefined) {
23
28
  return sanitizePersistedLocalServerConfig(
24
29
  normalizeRuntimeConfig(config, normalizeOptions)
@@ -6,6 +6,7 @@ import { FIXED_LOCAL_ROUTER_HOST, FIXED_LOCAL_ROUTER_PORT } from "./local-server
6
6
 
7
7
  const DEFAULT_INSTANCE_STATE_FILENAME = ".llm-router.runtime.json";
8
8
  const MAX_START_OUTPUT_CHARS = 4000;
9
+ export const RUNTIME_STATE_PATH_ENV = "LLM_ROUTER_RUNTIME_STATE_PATH";
9
10
 
10
11
  function sleep(ms) {
11
12
  return new Promise((resolve) => setTimeout(resolve, ms));
@@ -80,7 +81,9 @@ function runtimeMatchesStartOptions(runtime, {
80
81
  && normalized.requireAuth === normalizeBoolean(requireAuth, false);
81
82
  }
82
83
 
83
- export function getRuntimeStatePath() {
84
+ export function getRuntimeStatePath({ env = process.env } = {}) {
85
+ const override = String(env?.[RUNTIME_STATE_PATH_ENV] || "").trim();
86
+ if (override) return path.resolve(override);
84
87
  return path.join(os.homedir(), DEFAULT_INSTANCE_STATE_FILENAME);
85
88
  }
86
89
 
@@ -236,6 +239,7 @@ export async function waitForRuntimeMatch(options = {}, deps = {}) {
236
239
 
237
240
  export function spawnStartProcess({
238
241
  cliPath,
242
+ startCommand = "start-runtime",
239
243
  configPath,
240
244
  host = FIXED_LOCAL_ROUTER_HOST,
241
245
  port = FIXED_LOCAL_ROUTER_PORT,
@@ -253,7 +257,7 @@ export function spawnStartProcess({
253
257
 
254
258
  const args = [
255
259
  finalCliPath,
256
- "start",
260
+ String(startCommand || "start-runtime").trim() || "start-runtime",
257
261
  `--config=${configPath}`,
258
262
  `--host=${host}`,
259
263
  `--port=${port}`,
@@ -287,7 +291,10 @@ export async function startDetachedRouterService(options = {}, deps = {}) {
287
291
 
288
292
  let child;
289
293
  try {
290
- child = spawnStartProcessFn(options, {
294
+ child = spawnStartProcessFn({
295
+ ...options,
296
+ startCommand: String(options?.startCommand || "start-runtime").trim() || "start-runtime"
297
+ }, {
291
298
  detached: true,
292
299
  stdio: ["ignore", "pipe", "pipe"],
293
300
  unref: false,
@@ -292,8 +292,36 @@ export async function startLocalRouteServer({
292
292
  });
293
293
 
294
294
  const fallbackHost = formatHostForUrl(host, port);
295
+ let shuttingDown = false;
296
+ const socketRequestCounts = new Map();
297
+
298
+ function closeSocketIfIdle(socket) {
299
+ if (!socket || socket.destroyed) return;
300
+ if (Number(socketRequestCounts.get(socket) || 0) > 0) return;
301
+ socket.end();
302
+ }
295
303
 
296
304
  const server = http.createServer(async (req, res) => {
305
+ const socket = req.socket;
306
+ socketRequestCounts.set(socket, Number(socketRequestCounts.get(socket) || 0) + 1);
307
+ let finalized = false;
308
+ const finalizeRequest = () => {
309
+ if (finalized) return;
310
+ finalized = true;
311
+ const remaining = Math.max(0, Number(socketRequestCounts.get(socket) || 0) - 1);
312
+ if (remaining > 0) {
313
+ socketRequestCounts.set(socket, remaining);
314
+ return;
315
+ }
316
+ socketRequestCounts.set(socket, 0);
317
+ if (shuttingDown) {
318
+ closeSocketIfIdle(socket);
319
+ }
320
+ };
321
+
322
+ res.once("finish", finalizeRequest);
323
+ res.once("close", finalizeRequest);
324
+
297
325
  try {
298
326
  const request = nodeRequestToFetchRequest(req, fallbackHost);
299
327
  const response = await fetchHandler(request, {}, undefined);
@@ -308,6 +336,13 @@ export async function startLocalRouteServer({
308
336
  }
309
337
  });
310
338
 
339
+ server.on("connection", (socket) => {
340
+ socketRequestCounts.set(socket, Number(socketRequestCounts.get(socket) || 0));
341
+ socket.on("close", () => {
342
+ socketRequestCounts.delete(socket);
343
+ });
344
+ });
345
+
311
346
  await new Promise((resolve, reject) => {
312
347
  server.once("error", reject);
313
348
  server.listen(port, host, () => {
@@ -318,10 +353,15 @@ export async function startLocalRouteServer({
318
353
 
319
354
  const originalClose = server.close.bind(server);
320
355
  server.close = (callback) => {
356
+ shuttingDown = true;
321
357
  Promise.resolve()
322
358
  .then(() => configStore.close())
323
359
  .then(() => (typeof fetchHandler.close === "function" ? fetchHandler.close() : undefined))
324
360
  .finally(() => {
361
+ server.closeIdleConnections?.();
362
+ for (const socket of socketRequestCounts.keys()) {
363
+ closeSocketIfIdle(socket);
364
+ }
325
365
  originalClose(callback);
326
366
  });
327
367
  return server;
@@ -1,5 +1,6 @@
1
1
  import { spawnSync } from "node:child_process";
2
2
  import { clearRuntimeState, getActiveRuntimeState } from "./instance-state.js";
3
+ import { FIXED_LOCAL_ROUTER_PORT } from "./local-server-settings.js";
3
4
  import { startupStatus, stopStartup } from "./startup-manager.js";
4
5
 
5
6
  export function parsePidList(text) {
@@ -119,7 +120,9 @@ export async function stopStartupManagedListener({ port, line, error }, deps = {
119
120
  let shouldStopStartup = false;
120
121
  if (activeRuntimeState?.managedByStartup) {
121
122
  shouldStopStartup = Number(activeRuntimeState.port) === Number(port);
122
- } else if (!activeRuntimeState) {
123
+ }
124
+
125
+ if (!shouldStopStartup && Number(port) === Number(FIXED_LOCAL_ROUTER_PORT)) {
123
126
  try {
124
127
  const status = await startupStatusFn();
125
128
  shouldStopStartup = Boolean(status?.running);