@khanglvm/llm-router 2.0.0-beta.1 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. package/CHANGELOG.md +27 -0
  2. package/README.md +163 -426
  3. package/package.json +3 -3
  4. package/src/cli/router-module.js +2773 -2587
  5. package/src/cli-entry.js +32 -103
  6. package/src/node/activity-log.js +119 -0
  7. package/src/node/coding-tool-config.js +85 -11
  8. package/src/node/config-workflows.js +51 -12
  9. package/src/node/instance-state.js +1 -1
  10. package/src/node/litellm-context-catalog.js +184 -0
  11. package/src/node/local-server.js +23 -3
  12. package/src/node/port-reclaim.js +2 -2
  13. package/src/node/start-command.js +22 -22
  14. package/src/node/startup-manager.js +3 -3
  15. package/src/node/web-command.js +1 -1
  16. package/src/node/web-console-assets.js +1 -1
  17. package/src/node/web-console-client.js +34 -29
  18. package/src/node/web-console-server.js +420 -38
  19. package/src/node/web-console-styles.generated.js +1 -1
  20. package/src/node/web-console-ui/buffered-text-input.js +133 -0
  21. package/src/node/web-console-ui/config-editor-utils.js +57 -4
  22. package/src/node/web-console-ui/dropdown-placement.js +153 -0
  23. package/src/node/web-console-ui/select-search-utils.js +6 -0
  24. package/src/node/web-console-ui/transient-integer-input-utils.js +12 -0
  25. package/src/runtime/balancer.js +78 -1
  26. package/src/runtime/codex-request-transformer.js +16 -7
  27. package/src/runtime/config.js +448 -12
  28. package/src/runtime/handler/amp-response.js +5 -3
  29. package/src/runtime/handler/amp-web-search.js +2232 -0
  30. package/src/runtime/handler/fallback.js +30 -2
  31. package/src/runtime/handler/provider-call.js +353 -36
  32. package/src/runtime/handler/provider-translation.js +14 -0
  33. package/src/runtime/handler/request.js +128 -2
  34. package/src/runtime/handler/route-debug.js +36 -0
  35. package/src/runtime/handler.js +210 -20
  36. package/src/runtime/subscription-provider.js +1 -1
  37. package/src/shared/coding-tool-bindings.js +49 -0
  38. package/src/shared/local-router-defaults.js +62 -0
  39. package/src/translator/request/claude-to-openai.js +43 -0
package/src/cli-entry.js CHANGED
@@ -55,53 +55,6 @@ function parseBoolean(value, fallback = true) {
55
55
  return fallback;
56
56
  }
57
57
 
58
- function isBooleanLikeToken(value) {
59
- if (value === undefined || value === null) return false;
60
- return /^(?:1|0|true|false|yes|no|y|n|on|off)$/i.test(String(value).trim());
61
- }
62
-
63
- function readRawFlagValue(argv, flagName) {
64
- const prefix = `--${flagName}`;
65
-
66
- for (let index = 0; index < argv.length; index += 1) {
67
- const token = argv[index];
68
- if (token === prefix) {
69
- const next = argv[index + 1];
70
- if (isBooleanLikeToken(next)) {
71
- return next;
72
- }
73
- return true;
74
- }
75
- if (token.startsWith(`${prefix}=`)) {
76
- return token.slice(prefix.length + 1);
77
- }
78
- }
79
-
80
- return undefined;
81
- }
82
-
83
- function stripFlagFromArgv(argv, flagName) {
84
- const prefix = `--${flagName}`;
85
- const nextArgv = [];
86
-
87
- for (let index = 0; index < argv.length; index += 1) {
88
- const token = argv[index];
89
- if (token === prefix) {
90
- const next = argv[index + 1];
91
- if (isBooleanLikeToken(next)) {
92
- index += 1;
93
- }
94
- continue;
95
- }
96
- if (token.startsWith(`${prefix}=`)) {
97
- continue;
98
- }
99
- nextArgv.push(token);
100
- }
101
-
102
- return nextArgv;
103
- }
104
-
105
58
  function hasExplicitConfigOperation(args = {}) {
106
59
  return Boolean(String(args.operation || args.op || "").trim());
107
60
  }
@@ -142,7 +95,7 @@ async function promptStartupConflictResolution({ port }) {
142
95
 
143
96
  const lines = [
144
97
  "",
145
- `Port ${port} is already used by the llm-router startup service.`,
98
+ `Port ${port} is already used by the LLM Router startup service.`,
146
99
  "Choose an action:",
147
100
  "1. Restart service",
148
101
  "2. Run here instead",
@@ -184,34 +137,6 @@ async function runStartFastPath(args) {
184
137
  return result.exitCode ?? (result.ok ? 0 : 1);
185
138
  }
186
139
 
187
- export function shouldExitTuiOnKeypress(character, key = {}) {
188
- const sequence = typeof key?.sequence === "string" && key.sequence
189
- ? key.sequence
190
- : (typeof character === "string" ? character : "");
191
- return sequence === "" || (key?.ctrl === true && String(key?.name || "").toLowerCase() === "c");
192
- }
193
-
194
- export function installTuiSigintExitHandler({
195
- input = process.stdin,
196
- isTTY = Boolean(process.stdout.isTTY && process.stdin?.isTTY),
197
- exit = (code) => process.exit(code)
198
- } = {}) {
199
- if (!isTTY || !input || typeof input.on !== "function" || typeof input.off !== "function") {
200
- return () => {};
201
- }
202
-
203
- const onKeypress = (character, key) => {
204
- if (shouldExitTuiOnKeypress(character, key)) {
205
- exit(130);
206
- }
207
- };
208
-
209
- input.on("keypress", onKeypress);
210
- return () => {
211
- input.off("keypress", onKeypress);
212
- };
213
- }
214
-
215
140
  async function runSnapCli(argv, isTTY) {
216
141
  const [{ createRegistry, runSingleModuleCli }, { default: routerModule }] = await Promise.all([
217
142
  import("@levu/snap/dist/index.js"),
@@ -219,67 +144,71 @@ async function runSnapCli(argv, isTTY) {
219
144
  ]);
220
145
 
221
146
  const registry = createRegistry([routerModule]);
222
- const disposeSigintHandler = installTuiSigintExitHandler({ isTTY });
223
- try {
224
- return await runSingleModuleCli({
225
- registry,
226
- argv,
227
- moduleId: "router",
228
- defaultActionId: "config",
229
- helpDefaultTarget: "module",
230
- isTTY
231
- });
232
- } finally {
233
- disposeSigintHandler();
234
- }
147
+ return runSingleModuleCli({
148
+ registry,
149
+ argv,
150
+ moduleId: "router",
151
+ defaultActionId: "config",
152
+ helpDefaultTarget: "module",
153
+ isTTY
154
+ });
235
155
  }
236
156
 
237
157
  export async function runCli(argv = process.argv.slice(2), isTTY = undefined, overrides = {}) {
238
158
  const runSnapCliImpl = overrides.runSnapCli || runSnapCli;
239
159
  const runStartFastPathImpl = overrides.runStartFastPath || runStartFastPath;
240
160
  const runWebCommandImpl = overrides.runWebCommand || runWebCommand;
241
- const tuiRequested = parseBoolean(readRawFlagValue(argv, "tui"), false);
242
- const normalizedArgv = stripFlagFromArgv(argv, "tui");
243
- const parsed = parseSimpleArgs(normalizedArgv);
161
+ const errorImpl = overrides.error || ((message) => console.error(message));
162
+ const parsed = parseSimpleArgs(argv);
244
163
  const first = parsed.positional[0];
245
164
  const firstIsStart = first === "start";
246
165
  const firstIsWeb = first === "web";
247
166
  const firstIsConfig = first === "config";
167
+ const firstIsSetup = first === "setup";
248
168
  const explicitConfigOperation = hasExplicitConfigOperation(parsed.args);
249
169
 
170
+ if (Object.hasOwn(parsed.args, "tui")) {
171
+ errorImpl("The TUI flow has been removed. Use `llr` or `llr config` for the web console, or `llr config --operation=...` for direct CLI workflows.");
172
+ return 1;
173
+ }
174
+
250
175
  // Bare invocation opens the browser-based console by default.
251
176
  if (!first && !parsed.wantsHelp) {
252
- if (tuiRequested || explicitConfigOperation) {
253
- return runSnapCliImpl(["config", ...normalizedArgv], isTTY);
177
+ if (explicitConfigOperation) {
178
+ return runSnapCliImpl(["config", ...argv], isTTY);
254
179
  }
255
180
  return runWebFastPath(parsed.args, runWebCommandImpl);
256
181
  }
257
182
 
258
183
  // Fast-path explicit local start without loading Snap to minimize startup overhead.
259
184
  if (firstIsStart && !parsed.wantsHelp) {
260
- const startArgs = normalizedArgv.slice(1);
185
+ const startArgs = argv.slice(1);
261
186
  const parsedStart = parseSimpleArgs(startArgs);
262
187
  return runStartFastPathImpl(parsedStart.args);
263
188
  }
264
189
 
265
190
  if (firstIsWeb && !parsed.wantsHelp) {
266
- const webArgs = normalizedArgv.slice(1);
191
+ const webArgs = argv.slice(1);
267
192
  const parsedWeb = parseSimpleArgs(webArgs);
268
193
  return runWebFastPath(parsedWeb.args, runWebCommandImpl);
269
194
  }
270
195
 
271
196
  if (firstIsConfig && !parsed.wantsHelp && !explicitConfigOperation) {
272
- if (tuiRequested) {
273
- return runSnapCliImpl(normalizedArgv, isTTY);
274
- }
275
-
276
- const configArgs = parseSimpleArgs(normalizedArgv.slice(1));
197
+ const configArgs = parseSimpleArgs(argv.slice(1));
277
198
  return runWebFastPath(configArgs.args, runWebCommandImpl);
278
199
  }
279
200
 
280
- const normalized = [...normalizedArgv];
201
+ if (firstIsSetup && !parsed.wantsHelp) {
202
+ const setupArgs = argv.slice(1);
203
+ const parsedSetup = parseSimpleArgs(setupArgs);
204
+ if (hasExplicitConfigOperation(parsedSetup.args)) {
205
+ return runSnapCliImpl(["config", ...setupArgs], isTTY);
206
+ }
207
+ return runWebFastPath(parsedSetup.args, runWebCommandImpl);
208
+ }
209
+
210
+ const normalized = [...argv];
281
211
  if (normalized[0] === "help") normalized[0] = "--help";
282
- if (normalized[0] === "setup") normalized[0] = "config";
283
212
  return runSnapCliImpl(normalized, isTTY);
284
213
  }
285
214
 
@@ -0,0 +1,119 @@
1
+ import os from "node:os";
2
+ import path from "node:path";
3
+ import { promises as fs } from "node:fs";
4
+
5
+ export const DEFAULT_ACTIVITY_LOG_FILENAME = ".llm-router.activity.jsonl";
6
+ export const ACTIVITY_LOG_CATEGORIES = Object.freeze({
7
+ USAGE: "usage",
8
+ ROUTER: "router"
9
+ });
10
+
11
+ function normalizeLevel(value) {
12
+ const normalized = String(value || "").trim().toLowerCase();
13
+ if (["info", "success", "warn", "error"].includes(normalized)) return normalized;
14
+ return "info";
15
+ }
16
+
17
+ function normalizeText(value, fallback = "") {
18
+ const text = String(value ?? fallback).trim();
19
+ return text || fallback;
20
+ }
21
+
22
+ function normalizeCategory(value, source = "", kind = "") {
23
+ const normalized = String(value || "").trim().toLowerCase();
24
+ if (normalized === ACTIVITY_LOG_CATEGORIES.USAGE || normalized === ACTIVITY_LOG_CATEGORIES.ROUTER) {
25
+ return normalized;
26
+ }
27
+
28
+ const normalizedSource = String(source || "").trim().toLowerCase();
29
+ const normalizedKind = String(kind || "").trim().toLowerCase();
30
+ if (normalizedSource === "runtime" || normalizedKind.startsWith("request") || normalizedKind.startsWith("fallback")) {
31
+ return ACTIVITY_LOG_CATEGORIES.USAGE;
32
+ }
33
+ return ACTIVITY_LOG_CATEGORIES.ROUTER;
34
+ }
35
+
36
+ export function resolveActivityLogPath(configPath = "", explicitPath = "") {
37
+ const override = String(explicitPath || "").trim();
38
+ if (override) return path.resolve(override);
39
+
40
+ const resolvedConfigPath = String(configPath || "").trim();
41
+ if (resolvedConfigPath) {
42
+ const absoluteConfigPath = path.resolve(resolvedConfigPath);
43
+ const configDir = path.dirname(absoluteConfigPath);
44
+ const configName = path.basename(absoluteConfigPath);
45
+ const stem = configName
46
+ .replace(/\.[^.]+$/, "")
47
+ .replace(/[^A-Za-z0-9._-]+/g, "-")
48
+ .replace(/-+/g, "-")
49
+ .replace(/^-+|-+$/g, "");
50
+ return path.join(configDir, `.${stem || "llm-router"}.activity.jsonl`);
51
+ }
52
+
53
+ return path.join(os.homedir(), DEFAULT_ACTIVITY_LOG_FILENAME);
54
+ }
55
+
56
+ export function createActivityLogEntry({
57
+ id = "",
58
+ time = "",
59
+ level = "info",
60
+ message = "",
61
+ detail = "",
62
+ source = "web-console",
63
+ kind = "",
64
+ category = ""
65
+ } = {}) {
66
+ const entryId = normalizeText(id) || `${Date.now()}-${Math.random().toString(16).slice(2, 10)}`;
67
+ const entryTime = normalizeText(time) || new Date().toISOString();
68
+ const normalizedSource = normalizeText(source, "web-console");
69
+ const normalizedKind = normalizeText(kind);
70
+
71
+ return {
72
+ id: entryId,
73
+ time: entryTime,
74
+ level: normalizeLevel(level),
75
+ message: normalizeText(message),
76
+ detail: normalizeText(detail),
77
+ source: normalizedSource,
78
+ kind: normalizedKind,
79
+ category: normalizeCategory(category, normalizedSource, normalizedKind)
80
+ };
81
+ }
82
+
83
+ export async function appendActivityLogEntry(filePath, entry) {
84
+ const targetPath = resolveActivityLogPath("", filePath);
85
+ const normalized = createActivityLogEntry(entry);
86
+ await fs.mkdir(path.dirname(targetPath), { recursive: true });
87
+ await fs.appendFile(targetPath, `${JSON.stringify(normalized)}\n`, { encoding: "utf8", mode: 0o600 });
88
+ return normalized;
89
+ }
90
+
91
+ export async function readActivityLogEntries(filePath, { limit = 150 } = {}) {
92
+ const targetPath = resolveActivityLogPath("", filePath);
93
+ try {
94
+ const raw = await fs.readFile(targetPath, "utf8");
95
+ const entries = raw
96
+ .split("\n")
97
+ .map((line) => line.trim())
98
+ .filter(Boolean)
99
+ .map((line) => {
100
+ try {
101
+ return createActivityLogEntry(JSON.parse(line));
102
+ } catch {
103
+ return null;
104
+ }
105
+ })
106
+ .filter(Boolean);
107
+ return entries.slice(-Math.max(1, limit)).reverse();
108
+ } catch (error) {
109
+ if (error && typeof error === "object" && error.code === "ENOENT") {
110
+ return [];
111
+ }
112
+ throw error;
113
+ }
114
+ }
115
+
116
+ export async function clearActivityLogFile(filePath) {
117
+ const targetPath = resolveActivityLogPath("", filePath);
118
+ await fs.rm(targetPath, { force: true });
119
+ }
@@ -1,6 +1,14 @@
1
1
  import os from "node:os";
2
2
  import path from "node:path";
3
3
  import { promises as fs } from "node:fs";
4
+ import {
5
+ CODEX_CLI_INHERIT_MODEL_VALUE,
6
+ isCodexCliInheritModelBinding,
7
+ mapClaudeCodeThinkingLevelToTokens,
8
+ mapClaudeCodeThinkingTokensToLevel,
9
+ normalizeClaudeCodeThinkingLevel,
10
+ normalizeCodexCliReasoningEffort
11
+ } from "../shared/coding-tool-bindings.js";
4
12
 
5
13
  const BACKUP_SUFFIX = ".llm_router_backup";
6
14
  const CODEX_PROVIDER_ID = "llm-router";
@@ -12,7 +20,8 @@ const CLAUDE_MANAGED_ENV_KEYS = Object.freeze([
12
20
  "ANTHROPIC_DEFAULT_OPUS_MODEL",
13
21
  "ANTHROPIC_DEFAULT_SONNET_MODEL",
14
22
  "ANTHROPIC_DEFAULT_HAIKU_MODEL",
15
- "CLAUDE_CODE_SUBAGENT_MODEL"
23
+ "CLAUDE_CODE_SUBAGENT_MODEL",
24
+ "MAX_THINKING_TOKENS"
16
25
  ]);
17
26
  const CLAUDE_BACKUP_ENV_KEYS = Object.freeze([
18
27
  ...CLAUDE_MANAGED_ENV_KEYS,
@@ -66,6 +75,13 @@ function normalizeModelBinding(value) {
66
75
  return String(value || "").trim();
67
76
  }
68
77
 
78
+ function areResolvedFilePathsEqual(left, right) {
79
+ const leftText = String(left || "").trim();
80
+ const rightText = String(right || "").trim();
81
+ if (!leftText || !rightText) return false;
82
+ return path.resolve(leftText) === path.resolve(rightText);
83
+ }
84
+
69
85
  function backupHasData(backup) {
70
86
  return Boolean(backup && typeof backup === "object" && !Array.isArray(backup) && Object.keys(backup).length > 0);
71
87
  }
@@ -321,14 +337,19 @@ function normalizeClaudeBindings(bindings = {}) {
321
337
  defaultOpusModel: normalizeModelBinding(source.defaultOpusModel),
322
338
  defaultSonnetModel: normalizeModelBinding(source.defaultSonnetModel),
323
339
  defaultHaikuModel: normalizeModelBinding(source.defaultHaikuModel),
324
- subagentModel: normalizeModelBinding(source.subagentModel)
340
+ subagentModel: normalizeModelBinding(source.subagentModel),
341
+ thinkingLevel: normalizeClaudeCodeThinkingLevel(source.thinkingLevel)
325
342
  };
326
343
  }
327
344
 
328
345
  function normalizeCodexBindings(bindings = {}) {
329
346
  const source = bindings && typeof bindings === "object" && !Array.isArray(bindings) ? bindings : {};
347
+ const defaultModel = normalizeModelBinding(source.defaultModel);
330
348
  return {
331
- defaultModel: normalizeModelBinding(source.defaultModel)
349
+ defaultModel: isCodexCliInheritModelBinding(defaultModel)
350
+ ? CODEX_CLI_INHERIT_MODEL_VALUE
351
+ : defaultModel,
352
+ thinkingLevel: normalizeCodexCliReasoningEffort(source.thinkingLevel)
332
353
  };
333
354
  }
334
355
 
@@ -339,6 +360,7 @@ function captureCodexBackup(document) {
339
360
  version: 1,
340
361
  modelProvider: getTopLevelTomlStringField(document, "model_provider"),
341
362
  model: getTopLevelTomlStringField(document, "model"),
363
+ modelReasoningEffort: getTopLevelTomlStringField(document, "model_reasoning_effort"),
342
364
  modelCatalogJson: getTopLevelTomlStringField(document, "model_catalog_json"),
343
365
  providerSection: {
344
366
  exists: Boolean(providerSection),
@@ -354,6 +376,9 @@ function applyCodexBackup(document, backup = {}) {
354
376
  if (backup?.model?.exists) setTopLevelTomlStringField(document, "model", backup.model.value);
355
377
  else deleteTopLevelTomlField(document, "model");
356
378
 
379
+ if (backup?.modelReasoningEffort?.exists) setTopLevelTomlStringField(document, "model_reasoning_effort", backup.modelReasoningEffort.value);
380
+ else deleteTopLevelTomlField(document, "model_reasoning_effort");
381
+
357
382
  if (backup?.modelCatalogJson?.exists) setTopLevelTomlStringField(document, "model_catalog_json", backup.modelCatalogJson.value);
358
383
  else deleteTopLevelTomlField(document, "model_catalog_json");
359
384
 
@@ -415,7 +440,10 @@ function sanitizeBackup(backup, tool) {
415
440
  }
416
441
 
417
442
  export function resolveCodingToolBackupFilePath(configFilePath = "") {
418
- return `${path.resolve(String(configFilePath || "").trim())}${BACKUP_SUFFIX}`;
443
+ const resolvedPath = path.resolve(String(configFilePath || "").trim());
444
+ const parsed = path.parse(resolvedPath);
445
+ if (!parsed.ext) return `${resolvedPath}${BACKUP_SUFFIX}`;
446
+ return path.join(parsed.dir, `${parsed.name}${BACKUP_SUFFIX}${parsed.ext}`);
419
447
  }
420
448
 
421
449
  export function resolveCodexCliConfigFilePath({
@@ -504,6 +532,8 @@ export async function readCodexCliRoutingState({
504
532
  const document = splitTomlDocument(configState.text);
505
533
  const modelProvider = getTopLevelTomlStringField(document, "model_provider");
506
534
  const model = getTopLevelTomlStringField(document, "model");
535
+ const modelReasoningEffort = getTopLevelTomlStringField(document, "model_reasoning_effort");
536
+ const modelCatalogJson = getTopLevelTomlStringField(document, "model_catalog_json");
507
537
  const providerSection = parseTomlSectionKeyValues(getTomlSection(document, `model_providers.${CODEX_PROVIDER_ID}`));
508
538
  const configuredBaseUrl = String(providerSection.base_url || "").trim();
509
539
  const configuredBearerToken = String(providerSection.experimental_bearer_token || "").trim();
@@ -512,6 +542,12 @@ export async function readCodexCliRoutingState({
512
542
  && modelProvider.value === CODEX_PROVIDER_ID
513
543
  && configuredBaseUrl === expectedBaseUrl
514
544
  );
545
+ const routerCatalogPath = resolveCodexCliModelCatalogFilePath({
546
+ configFilePath: resolvedConfigPath,
547
+ env,
548
+ homeDir
549
+ });
550
+ const usingRouterManagedCatalog = areResolvedFilePathsEqual(modelCatalogJson.value, routerCatalogPath);
515
551
 
516
552
  return {
517
553
  tool: "codex-cli",
@@ -522,8 +558,15 @@ export async function readCodexCliRoutingState({
522
558
  routedViaRouter,
523
559
  configuredBaseUrl,
524
560
  modelProvider: modelProvider.value,
561
+ configuredModel: model.value,
562
+ configuredThinkingLevel: modelReasoningEffort.value,
563
+ configuredModelCatalogJson: modelCatalogJson.value,
564
+ inheritCliModel: routedViaRouter && !usingRouterManagedCatalog,
525
565
  bindings: {
526
- defaultModel: model.value
566
+ defaultModel: routedViaRouter && !usingRouterManagedCatalog
567
+ ? CODEX_CLI_INHERIT_MODEL_VALUE
568
+ : model.value,
569
+ thinkingLevel: modelReasoningEffort.value
527
570
  }
528
571
  };
529
572
  }
@@ -563,6 +606,8 @@ export async function patchCodexCliConfigFile({
563
606
  const document = splitTomlDocument(configState.text);
564
607
  const backupState = await ensureToolBackupFileExists(resolvedBackupPath);
565
608
  const existingBackup = sanitizeBackup(backupState.data, "codex-cli");
609
+ const currentModelCatalogJson = getTopLevelTomlStringField(document, "model_catalog_json");
610
+ const currentlyUsingRouterManagedCatalog = areResolvedFilePathsEqual(currentModelCatalogJson.value, resolvedCatalogPath);
566
611
 
567
612
  if (captureBackup && !backupHasData(existingBackup)) {
568
613
  const backup = configState.existed ? captureCodexBackup(document) : {};
@@ -570,26 +615,50 @@ export async function patchCodexCliConfigFile({
570
615
  }
571
616
 
572
617
  setTopLevelTomlStringField(document, "model_provider", CODEX_PROVIDER_ID);
573
- if (normalizedBindings.defaultModel) setTopLevelTomlStringField(document, "model", normalizedBindings.defaultModel);
618
+ if (isCodexCliInheritModelBinding(normalizedBindings.defaultModel)) {
619
+ if (currentlyUsingRouterManagedCatalog) {
620
+ if (existingBackup?.model?.exists) setTopLevelTomlStringField(document, "model", existingBackup.model.value);
621
+ else deleteTopLevelTomlField(document, "model");
622
+
623
+ if (existingBackup?.modelCatalogJson?.exists) {
624
+ setTopLevelTomlStringField(document, "model_catalog_json", existingBackup.modelCatalogJson.value);
625
+ } else {
626
+ deleteTopLevelTomlField(document, "model_catalog_json");
627
+ }
628
+ }
629
+ } else if (normalizedBindings.defaultModel) {
630
+ setTopLevelTomlStringField(document, "model", normalizedBindings.defaultModel);
631
+ } else {
632
+ deleteTopLevelTomlField(document, "model");
633
+ }
634
+ if (normalizedBindings.thinkingLevel) {
635
+ setTopLevelTomlStringField(document, "model_reasoning_effort", normalizedBindings.thinkingLevel);
636
+ } else {
637
+ deleteTopLevelTomlField(document, "model_reasoning_effort");
638
+ }
574
639
  setTomlSection(document, `model_providers.${CODEX_PROVIDER_ID}`, createCodexProviderSection({
575
640
  baseUrl,
576
641
  apiKey: normalizedApiKey
577
642
  }));
578
- if (normalizedModelCatalog) {
579
- if (normalizedModelCatalog.models.length > 0) {
643
+ if (!isCodexCliInheritModelBinding(normalizedBindings.defaultModel)) {
644
+ if (normalizedModelCatalog?.models?.length > 0) {
580
645
  await writeJsonObjectFile(resolvedCatalogPath, normalizedModelCatalog);
581
646
  setTopLevelTomlStringField(document, "model_catalog_json", resolvedCatalogPath);
582
647
  } else {
583
648
  deleteTopLevelTomlField(document, "model_catalog_json");
584
- await fs.rm(resolvedCatalogPath, { force: true });
585
649
  }
586
650
  }
587
651
 
588
652
  await writeTextFile(resolvedConfigPath, serializeTomlDocument(document));
653
+ const finalModelCatalogJson = getTopLevelTomlStringField(document, "model_catalog_json");
654
+ const usingRouterManagedCatalog = areResolvedFilePathsEqual(finalModelCatalogJson.value, resolvedCatalogPath);
655
+ if (!usingRouterManagedCatalog) {
656
+ await fs.rm(resolvedCatalogPath, { force: true });
657
+ }
589
658
  return {
590
659
  configFilePath: resolvedConfigPath,
591
660
  backupFilePath: resolvedBackupPath,
592
- modelCatalogFilePath: normalizedModelCatalog?.models?.length > 0 ? resolvedCatalogPath : "",
661
+ modelCatalogFilePath: usingRouterManagedCatalog ? resolvedCatalogPath : "",
593
662
  configCreated: !configState.existed,
594
663
  baseUrl,
595
664
  bindings: normalizedBindings
@@ -663,7 +732,8 @@ export async function readClaudeCodeRoutingState({
663
732
  defaultOpusModel: envConfig.ANTHROPIC_DEFAULT_OPUS_MODEL,
664
733
  defaultSonnetModel: envConfig.ANTHROPIC_DEFAULT_SONNET_MODEL,
665
734
  defaultHaikuModel: envConfig.ANTHROPIC_DEFAULT_HAIKU_MODEL,
666
- subagentModel: envConfig.CLAUDE_CODE_SUBAGENT_MODEL
735
+ subagentModel: envConfig.CLAUDE_CODE_SUBAGENT_MODEL,
736
+ thinkingLevel: mapClaudeCodeThinkingTokensToLevel(envConfig.MAX_THINKING_TOKENS)
667
737
  })
668
738
  };
669
739
  }
@@ -728,6 +798,10 @@ export async function patchClaudeCodeSettingsFile({
728
798
  if (normalizedBindings.subagentModel) nextSettings.env.CLAUDE_CODE_SUBAGENT_MODEL = normalizedBindings.subagentModel;
729
799
  else delete nextSettings.env.CLAUDE_CODE_SUBAGENT_MODEL;
730
800
 
801
+ const maxThinkingTokens = mapClaudeCodeThinkingLevelToTokens(normalizedBindings.thinkingLevel);
802
+ if (maxThinkingTokens) nextSettings.env.MAX_THINKING_TOKENS = maxThinkingTokens;
803
+ else delete nextSettings.env.MAX_THINKING_TOKENS;
804
+
731
805
  await writeJsonObjectFile(resolvedSettingsPath, nextSettings);
732
806
  return {
733
807
  settingsFilePath: resolvedSettingsPath,
@@ -12,6 +12,11 @@ function dedupe(values) {
12
12
  return [...new Set((values || []).filter(Boolean).map((value) => String(value).trim()).filter(Boolean))];
13
13
  }
14
14
 
15
+ function normalizePositiveInteger(value) {
16
+ const parsed = Number.parseInt(String(value ?? "").trim(), 10);
17
+ return Number.isFinite(parsed) && parsed > 0 ? parsed : null;
18
+ }
19
+
15
20
  function normalizeProviderType(value) {
16
21
  const normalized = String(value || "").trim().toLowerCase();
17
22
  return normalized === PROVIDER_TYPE_SUBSCRIPTION ? PROVIDER_TYPE_SUBSCRIPTION : undefined;
@@ -67,22 +72,47 @@ function normalizeModelArray(models) {
67
72
  if (!id) return null;
68
73
  const formats = dedupe(entry.formats || entry.format || []).filter((value) => value === "openai" || value === "claude");
69
74
  const variant = typeof entry.variant === "string" ? entry.variant.trim() : "";
75
+ const contextWindow = normalizePositiveInteger(
76
+ entry.contextWindow
77
+ ?? entry.context_window
78
+ ?? entry.contextLimit
79
+ ?? entry.context_limit
80
+ );
70
81
  return {
71
82
  id,
72
83
  ...(formats.length > 0 ? { formats } : {}),
73
- ...(variant ? { variant } : {})
84
+ ...(variant ? { variant } : {}),
85
+ ...(contextWindow ? { contextWindow } : {})
74
86
  };
75
87
  })
76
88
  .filter(Boolean);
77
89
  }
78
90
 
79
- function buildModelsWithPreferredFormat(modelIds, modelSupport = {}, modelPreferredFormat = {}) {
91
+ function normalizeModelContextWindows(input) {
92
+ if (!input || typeof input !== "object" || Array.isArray(input)) return {};
93
+ return Object.fromEntries(
94
+ Object.entries(input)
95
+ .map(([modelId, value]) => [String(modelId || "").trim(), normalizePositiveInteger(value)])
96
+ .filter(([modelId, contextWindow]) => Boolean(modelId && contextWindow))
97
+ );
98
+ }
99
+
100
+ function buildModelsWithPreferredFormat(modelIds, modelSupport = {}, modelPreferredFormat = {}, modelContextWindows = {}) {
80
101
  return normalizeModelArray(modelIds.map((id) => {
81
102
  const preferred = modelPreferredFormat[id];
103
+ const contextWindow = normalizePositiveInteger(modelContextWindows[id]);
82
104
  if (preferred) {
83
- return { id, formats: [preferred] };
105
+ return {
106
+ id,
107
+ formats: [preferred],
108
+ ...(contextWindow ? { contextWindow } : {})
109
+ };
84
110
  }
85
- return { id, formats: modelSupport[id] || [] };
111
+ return {
112
+ id,
113
+ formats: modelSupport[id] || [],
114
+ ...(contextWindow ? { contextWindow } : {})
115
+ };
86
116
  }));
87
117
  }
88
118
 
@@ -107,6 +137,10 @@ export function buildProviderFromConfigInput(input) {
107
137
  ).trim() || "default";
108
138
  const baseUrlByFormat = normalizeBaseUrlByFormatInput(input);
109
139
  const explicitModelIds = parseModelListInput(input.models);
140
+ const modelContextWindows = normalizeModelContextWindows(
141
+ input.modelContextWindows
142
+ || input["model-context-windows"]
143
+ );
110
144
  const probeModelSupport = input.probe?.modelSupport && typeof input.probe.modelSupport === "object"
111
145
  ? input.probe.modelSupport
112
146
  : {};
@@ -114,10 +148,10 @@ export function buildProviderFromConfigInput(input) {
114
148
  ? input.probe.modelPreferredFormat
115
149
  : {};
116
150
  const explicitModels = explicitModelIds.length > 0
117
- ? buildModelsWithPreferredFormat(explicitModelIds, probeModelSupport, probeModelPreferredFormat)
151
+ ? buildModelsWithPreferredFormat(explicitModelIds, probeModelSupport, probeModelPreferredFormat, modelContextWindows)
118
152
  : [];
119
153
  const probeModels = input.probe?.models?.length
120
- ? buildModelsWithPreferredFormat(input.probe.models, probeModelSupport, probeModelPreferredFormat)
154
+ ? buildModelsWithPreferredFormat(input.probe.models, probeModelSupport, probeModelPreferredFormat, modelContextWindows)
121
155
  : [];
122
156
  const mergedModels = explicitModels.length > 0 ? explicitModels : probeModels;
123
157
  const endpointFormats = baseUrlByFormat ? Object.keys(baseUrlByFormat) : [];
@@ -185,12 +219,17 @@ function mergeProviderModelsWithExistingFallbacks(existingProvider, incomingProv
185
219
  const mergedModels = (incomingProvider?.models || []).map((model) => {
186
220
  const previous = existingModelById.get(model.id);
187
221
  const hasExplicitFallbacks = Object.prototype.hasOwnProperty.call(model, "fallbackModels");
188
- if (hasExplicitFallbacks || !previous) return model;
189
- if (!Object.prototype.hasOwnProperty.call(previous, "fallbackModels")) return model;
190
- return {
191
- ...model,
192
- fallbackModels: previous.fallbackModels || []
193
- };
222
+ const hasExplicitContextWindow = Object.prototype.hasOwnProperty.call(model, "contextWindow");
223
+ if (!previous) return model;
224
+
225
+ const nextModel = { ...model };
226
+ if (!hasExplicitFallbacks && Object.prototype.hasOwnProperty.call(previous, "fallbackModels")) {
227
+ nextModel.fallbackModels = previous.fallbackModels || [];
228
+ }
229
+ if (!hasExplicitContextWindow && Number.isFinite(Number(previous?.contextWindow))) {
230
+ nextModel.contextWindow = Number(previous.contextWindow);
231
+ }
232
+ return nextModel;
194
233
  });
195
234
 
196
235
  return {
@@ -249,7 +249,7 @@ export function spawnStartProcess({
249
249
  env = process.env
250
250
  } = {}) {
251
251
  const finalCliPath = normalizeCliPath(cliPath || env.LLM_ROUTER_CLI_PATH || process.argv[1] || "");
252
- if (!finalCliPath) throw new Error("Cannot spawn llm-router start: CLI path is unknown.");
252
+ if (!finalCliPath) throw new Error("Cannot spawn LLM Router start: CLI path is unknown.");
253
253
 
254
254
  const args = [
255
255
  finalCliPath,