@khanglvm/llm-router 2.0.5 → 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -7,6 +7,15 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
7
7
 
8
8
  ## [Unreleased]
9
9
 
10
+ ## [2.2.0] - 2026-03-21
11
+
12
+ ### Added
13
+ - Standalone `set-claude-code-effort-level` CLI operation sets `CLAUDE_CODE_EFFORT_LEVEL` in Claude Code settings and shell profile without requiring a router connection.
14
+ - Web console effort level dropdown now works independently of routing — no need to connect Claude Code to LLM Router just to change thinking effort.
15
+
16
+ ### Changed
17
+ - Claude Code live test uses process env vars (`ANTHROPIC_BASE_URL`, `ANTHROPIC_AUTH_TOKEN`, `ANTHROPIC_MODEL`) instead of patching settings.json, keeping the config file untouched during tests.
18
+
10
19
  ## [2.0.5] - 2026-03-15
11
20
 
12
21
  ### Fixed
package/README.md CHANGED
@@ -2,20 +2,18 @@
2
2
 
3
3
  LLM Router is a local and Cloudflare-deployable gateway for routing one client endpoint across multiple LLM providers, models, aliases, fallbacks, and rate limits.
4
4
 
5
- The npm package name stays the same:
5
+ **Current version**: `2.2.0`
6
6
 
7
+ NPM package:
7
8
  ```bash
8
9
  @khanglvm/llm-router
9
10
  ```
10
11
 
11
- The primary CLI command is now:
12
-
12
+ Primary CLI command:
13
13
  ```bash
14
14
  llr
15
15
  ```
16
16
 
17
- `2.0.1` is the current public release. It includes the Web UI, AMP routing, and coding-tool integrations introduced in the 2.x line.
18
-
19
17
  ## Install
20
18
 
21
19
  ```bash
@@ -266,7 +264,18 @@ Run the JavaScript test suite:
266
264
  node --test $(rg --files -g "*.test.js" src)
267
265
  ```
268
266
 
267
+ ## Documentation
268
+
269
+ Comprehensive documentation is available in the `docs/` directory:
270
+
271
+ - **[Project Overview & PDR](./docs/project-overview-pdr.md)** — Feature matrix, target users, success metrics, constraints
272
+ - **[Codebase Summary](./docs/codebase-summary.md)** — Directory structure, module relationships, entry points, test infrastructure
273
+ - **[Code Standards](./docs/code-standards.md)** — Patterns, naming conventions, testing, error handling
274
+ - **[System Architecture](./docs/system-architecture.md)** — Request lifecycle, subsystem boundaries, data flow, deployment models
275
+ - **[Project Roadmap](./docs/project-roadmap.md)** — Current status, planned phases, timeline, success metrics
276
+
269
277
  ## Security and Releases
270
278
 
271
279
  - Security: [`SECURITY.md`](https://github.com/khanglvm/llm-router/blob/master/SECURITY.md)
272
280
  - Release notes: [`CHANGELOG.md`](https://github.com/khanglvm/llm-router/blob/master/CHANGELOG.md)
281
+ - AMP routing: [`docs/amp-routing.md`](./docs/amp-routing.md)
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@khanglvm/llm-router",
3
- "version": "2.0.5",
3
+ "version": "2.2.0",
4
4
  "description": "LLM Router: single gateway endpoint for multi-provider LLMs with unified OpenAI+Anthropic format and seamless fallback",
5
5
  "keywords": [
6
6
  "llm-router",
@@ -28,6 +28,7 @@ import {
28
28
  unpatchAmpClientConfigFiles as unpatchAmpClientConfigFilesFile
29
29
  } from "../node/amp-client-config.js";
30
30
  import {
31
+ patchClaudeCodeEffortLevel,
31
32
  patchClaudeCodeSettingsFile,
32
33
  patchCodexCliConfigFile,
33
34
  readClaudeCodeRoutingState,
@@ -73,7 +74,7 @@ import {
73
74
  import {
74
75
  CODEX_CLI_INHERIT_MODEL_VALUE,
75
76
  isCodexCliInheritModelBinding,
76
- normalizeClaudeCodeThinkingLevel,
77
+ normalizeClaudeCodeEffortLevel,
77
78
  normalizeCodexCliReasoningEffort
78
79
  } from "../shared/coding-tool-bindings.js";
79
80
  import { FORMATS } from "../translator/index.js";
@@ -3609,7 +3610,7 @@ function normalizeClaudeBindingState(bindings = {}) {
3609
3610
  defaultSonnetModel: String(source.defaultSonnetModel || "").trim(),
3610
3611
  defaultHaikuModel: String(source.defaultHaikuModel || "").trim(),
3611
3612
  subagentModel: String(source.subagentModel || "").trim(),
3612
- thinkingLevel: normalizeClaudeCodeThinkingLevel(source.thinkingLevel)
3613
+ thinkingLevel: normalizeClaudeCodeEffortLevel(source.thinkingLevel)
3613
3614
  };
3614
3615
  }
3615
3616
 
@@ -6532,6 +6533,32 @@ async function doSetClaudeCodeRouting(context) {
6532
6533
  };
6533
6534
  }
6534
6535
 
6536
+ async function doSetClaudeCodeEffortLevel(context) {
6537
+ const args = context.args || {};
6538
+ const settingsFilePath = String(readArg(args, ["claude-code-settings-file", "claudeCodeSettingsFile", "claude-settings-file", "claudeSettingsFile"], "") || "").trim();
6539
+ const effortLevel = String(readArg(args, ["thinking-level", "thinkingLevel", "effort-level", "effortLevel"], "") || "").trim();
6540
+
6541
+ const result = await patchClaudeCodeEffortLevel({
6542
+ settingsFilePath,
6543
+ effortLevel,
6544
+ env: process.env
6545
+ });
6546
+
6547
+ return {
6548
+ ok: true,
6549
+ mode: context.mode,
6550
+ exitCode: EXIT_SUCCESS,
6551
+ data: buildOperationReport(
6552
+ result.effortLevel ? "Claude Code Effort Level Set" : "Claude Code Effort Level Cleared",
6553
+ [
6554
+ ["Settings File", result.settingsFilePath],
6555
+ ["Effort Level", result.effortLevel || "(cleared)"],
6556
+ ["Shell Profile Updated", formatYesNo(result.shellProfileUpdated)]
6557
+ ]
6558
+ )
6559
+ };
6560
+ }
6561
+
6535
6562
  async function doDiscoverProviderModels(context) {
6536
6563
  const args = context.args || {};
6537
6564
  let headers;
@@ -8395,6 +8422,8 @@ async function runConfigAction(context) {
8395
8422
  case "set-claude-code-routing":
8396
8423
  case "set-claude-code":
8397
8424
  return doSetClaudeCodeRouting(context);
8425
+ case "set-claude-code-effort-level":
8426
+ return doSetClaudeCodeEffortLevel(context);
8398
8427
  case "discover-provider-models":
8399
8428
  return doDiscoverProviderModels(context);
8400
8429
  case "test-provider":
@@ -9264,8 +9293,9 @@ async function runAiHelpAction(context) {
9264
9293
  "### Claude Code",
9265
9294
  "- required_gate=patch_gate_claude_code=ready",
9266
9295
  `- enable/update route: ${CLI_COMMAND} config --operation=set-claude-code-routing --enabled=true --primary-model=<target_model_or_group>`,
9267
- `- optional bindings: --default-opus-model=<route> --default-sonnet-model=<route> --default-haiku-model=<route> --subagent-model=<route> --thinking-level=low|medium|high|max`,
9296
+ `- optional bindings: --default-opus-model=<route> --default-sonnet-model=<route> --default-haiku-model=<route> --subagent-model=<route> --thinking-level=low|medium|high|max (sets CLAUDE_CODE_EFFORT_LEVEL in shell profile)`,
9268
9297
  `- disable route: ${CLI_COMMAND} config --operation=set-claude-code-routing --enabled=false`,
9298
+ `- standalone effort level (no router needed): ${CLI_COMMAND} config --operation=set-claude-code-effort-level --thinking-level=low|medium|high|max`,
9269
9299
  "",
9270
9300
  "### Codex CLI",
9271
9301
  "- required_gate=patch_gate_codex_cli=ready",
@@ -10535,7 +10565,7 @@ const routerModule = {
10535
10565
  { name: "master-key-length", required: false, description: "Generated master key length (min 24).", example: "--master-key-length=48" },
10536
10566
  { name: "master-key-prefix", required: false, description: "Generated master key prefix.", example: "--master-key-prefix=gw_" },
10537
10567
  { name: "default-model", required: false, description: `For set-codex-cli-routing: managed route binding, or ${CODEX_CLI_INHERIT_MODEL_VALUE} to keep Codex's own model selection.`, example: "--default-model=chat.default" },
10538
- { name: "thinking-level", required: false, description: "For set-codex-cli-routing / set-claude-code-routing: reasoning level.", example: "--thinking-level=medium" },
10568
+ { name: "thinking-level", required: false, description: "For set-codex-cli-routing / set-claude-code-routing / set-claude-code-effort-level: reasoning level.", example: "--thinking-level=medium" },
10539
10569
  { name: "primary-model", required: false, description: "For set-claude-code-routing: primary ANTHROPIC_MODEL route.", example: "--primary-model=chat.default" },
10540
10570
  { name: "default-opus-model", required: false, description: "For set-claude-code-routing: ANTHROPIC_DEFAULT_OPUS_MODEL route.", example: "--default-opus-model=chat.deep" },
10541
10571
  { name: "default-sonnet-model", required: false, description: "For set-claude-code-routing: ANTHROPIC_DEFAULT_SONNET_MODEL route.", example: "--default-sonnet-model=chat.default" },
@@ -10606,6 +10636,7 @@ const routerModule = {
10606
10636
  `${CLI_COMMAND} config --operation=set-amp-config --amp-subagent-mappings="oracle => rc/gpt-5.3-codex, librarian => rc/gpt-5.3-codex, search => rc/gpt-5.3-codex, look-at => rc/gpt-5.3-codex"`,
10607
10637
  `${CLI_COMMAND} config --operation=set-codex-cli-routing --enabled=true --default-model=chat.default`,
10608
10638
  `${CLI_COMMAND} config --operation=set-claude-code-routing --enabled=true --primary-model=chat.default --default-haiku-model=chat.fast`,
10639
+ `${CLI_COMMAND} config --operation=set-claude-code-effort-level --thinking-level=high`,
10609
10640
  `${CLI_COMMAND} config --operation=set-amp-client-routing --enabled=true --amp-client-settings-scope=workspace`,
10610
10641
  `${CLI_COMMAND} config --operation=set-amp-config --patch-amp-client-config=true --amp-client-settings-scope=workspace --amp-client-url=${LOCAL_ROUTER_ORIGIN} --amp-client-api-key=gw_...`,
10611
10642
  `${CLI_COMMAND} config --operation=list-routing`,
@@ -3,10 +3,13 @@ import path from "node:path";
3
3
  import { promises as fs } from "node:fs";
4
4
  import {
5
5
  CODEX_CLI_INHERIT_MODEL_VALUE,
6
+ CLAUDE_CODE_EFFORT_LEVEL_SETTINGS_JSON_VALUE,
6
7
  isCodexCliInheritModelBinding,
7
8
  mapClaudeCodeThinkingLevelToTokens,
8
9
  mapClaudeCodeThinkingTokensToLevel,
9
10
  normalizeClaudeCodeThinkingLevel,
11
+ normalizeClaudeCodeEffortLevel,
12
+ migrateLegacyThinkingTokensToEffortLevel,
10
13
  normalizeCodexCliReasoningEffort
11
14
  } from "../shared/coding-tool-bindings.js";
12
15
 
@@ -21,7 +24,7 @@ const CLAUDE_MANAGED_ENV_KEYS = Object.freeze([
21
24
  "ANTHROPIC_DEFAULT_SONNET_MODEL",
22
25
  "ANTHROPIC_DEFAULT_HAIKU_MODEL",
23
26
  "CLAUDE_CODE_SUBAGENT_MODEL",
24
- "MAX_THINKING_TOKENS"
27
+ "CLAUDE_CODE_EFFORT_LEVEL"
25
28
  ]);
26
29
  const CLAUDE_BACKUP_ENV_KEYS = Object.freeze([
27
30
  ...CLAUDE_MANAGED_ENV_KEYS,
@@ -338,7 +341,7 @@ function normalizeClaudeBindings(bindings = {}) {
338
341
  defaultSonnetModel: normalizeModelBinding(source.defaultSonnetModel),
339
342
  defaultHaikuModel: normalizeModelBinding(source.defaultHaikuModel),
340
343
  subagentModel: normalizeModelBinding(source.subagentModel),
341
- thinkingLevel: normalizeClaudeCodeThinkingLevel(source.thinkingLevel)
344
+ thinkingLevel: normalizeClaudeCodeEffortLevel(source.thinkingLevel)
342
345
  };
343
346
  }
344
347
 
@@ -392,16 +395,20 @@ function applyCodexBackup(document, backup = {}) {
392
395
  function captureClaudeBackup(config) {
393
396
  const env = config?.env && typeof config.env === "object" && !Array.isArray(config.env) ? config.env : {};
394
397
  const backupEnv = {};
395
- for (const key of CLAUDE_BACKUP_ENV_KEYS) {
398
+ for (const key of [...CLAUDE_BACKUP_ENV_KEYS, "MAX_THINKING_TOKENS"]) {
396
399
  if (Object.prototype.hasOwnProperty.call(env, key)) {
397
400
  backupEnv[key] = getBackupValue(env[key]);
398
401
  }
399
402
  }
400
- return {
403
+ const backup = {
401
404
  tool: "claude-code",
402
405
  version: 1,
403
406
  env: backupEnv
404
407
  };
408
+ if (config && typeof config === "object" && config.effortLevel !== undefined) {
409
+ backup.effortLevel = getBackupValue(config.effortLevel);
410
+ }
411
+ return backup;
405
412
  }
406
413
 
407
414
  function applyClaudeBackup(config, backup = {}) {
@@ -412,7 +419,7 @@ function applyClaudeBackup(config, backup = {}) {
412
419
  ? { ...next.env }
413
420
  : {};
414
421
 
415
- for (const key of CLAUDE_BACKUP_ENV_KEYS) {
422
+ for (const key of [...CLAUDE_BACKUP_ENV_KEYS, "MAX_THINKING_TOKENS"]) {
416
423
  if (backup?.env && Object.prototype.hasOwnProperty.call(backup.env, key)) {
417
424
  applyBackupValue(env, key, backup.env[key]);
418
425
  } else {
@@ -420,11 +427,65 @@ function applyClaudeBackup(config, backup = {}) {
420
427
  }
421
428
  }
422
429
 
430
+ if (backup?.effortLevel?.exists) {
431
+ next.effortLevel = backup.effortLevel.value;
432
+ } else {
433
+ delete next.effortLevel;
434
+ }
435
+
423
436
  if (Object.keys(env).length > 0) next.env = env;
424
437
  else delete next.env;
425
438
  return next;
426
439
  }
427
440
 
441
+ const SHELL_EFFORT_MARKER_START = "# >>> llm-router effort-level >>>";
442
+ const SHELL_EFFORT_MARKER_END = "# <<< llm-router effort-level <<<";
443
+
444
+ function resolveShellProfilePath(homeDir) {
445
+ const shell = String(process.env.SHELL || "").trim();
446
+ const profileName = shell.endsWith("/zsh") || shell.endsWith("/zsh5") ? ".zshrc" : ".bashrc";
447
+ return path.join(homeDir, profileName);
448
+ }
449
+
450
+ async function patchShellProfileEffortLevel(effortLevel, homeDir) {
451
+ const profilePath = resolveShellProfilePath(homeDir);
452
+ const markerPattern = new RegExp(
453
+ `${escapeRegex(SHELL_EFFORT_MARKER_START)}[\\s\\S]*?${escapeRegex(SHELL_EFFORT_MARKER_END)}\\n?`,
454
+ "g"
455
+ );
456
+
457
+ let text;
458
+ try {
459
+ text = await fs.readFile(profilePath, "utf8");
460
+ } catch (error) {
461
+ if (error && typeof error === "object" && error.code === "ENOENT") {
462
+ if (!effortLevel) return false;
463
+ text = "";
464
+ } else {
465
+ return false;
466
+ }
467
+ }
468
+
469
+ const cleaned = text.replace(markerPattern, "");
470
+ if (!effortLevel) {
471
+ if (cleaned !== text) {
472
+ await fs.writeFile(profilePath, cleaned, "utf8");
473
+ }
474
+ return true;
475
+ }
476
+
477
+ const block = [
478
+ SHELL_EFFORT_MARKER_START,
479
+ `export CLAUDE_CODE_EFFORT_LEVEL="${effortLevel}"`,
480
+ SHELL_EFFORT_MARKER_END,
481
+ ""
482
+ ].join("\n");
483
+
484
+ const separator = cleaned.length > 0 && !cleaned.endsWith("\n") ? "\n" : "";
485
+ await fs.writeFile(profilePath, `${cleaned}${separator}${block}`, "utf8");
486
+ return true;
487
+ }
488
+
428
489
  async function ensureToolBackupFileExists(backupFilePath) {
429
490
  const backupState = await readJsonObjectFile(backupFilePath, `Backup file '${backupFilePath}'`);
430
491
  if (!backupState.existed) {
@@ -733,7 +794,9 @@ export async function readClaudeCodeRoutingState({
733
794
  defaultSonnetModel: envConfig.ANTHROPIC_DEFAULT_SONNET_MODEL,
734
795
  defaultHaikuModel: envConfig.ANTHROPIC_DEFAULT_HAIKU_MODEL,
735
796
  subagentModel: envConfig.CLAUDE_CODE_SUBAGENT_MODEL,
736
- thinkingLevel: mapClaudeCodeThinkingTokensToLevel(envConfig.MAX_THINKING_TOKENS)
797
+ thinkingLevel: normalizeClaudeCodeEffortLevel(envConfig.CLAUDE_CODE_EFFORT_LEVEL)
798
+ || normalizeClaudeCodeEffortLevel(settingsState.data?.effortLevel)
799
+ || migrateLegacyThinkingTokensToEffortLevel(envConfig.MAX_THINKING_TOKENS)
737
800
  })
738
801
  };
739
802
  }
@@ -798,15 +861,33 @@ export async function patchClaudeCodeSettingsFile({
798
861
  if (normalizedBindings.subagentModel) nextSettings.env.CLAUDE_CODE_SUBAGENT_MODEL = normalizedBindings.subagentModel;
799
862
  else delete nextSettings.env.CLAUDE_CODE_SUBAGENT_MODEL;
800
863
 
801
- const maxThinkingTokens = mapClaudeCodeThinkingLevelToTokens(normalizedBindings.thinkingLevel);
802
- if (maxThinkingTokens) nextSettings.env.MAX_THINKING_TOKENS = maxThinkingTokens;
803
- else delete nextSettings.env.MAX_THINKING_TOKENS;
864
+ delete nextSettings.env.MAX_THINKING_TOKENS;
865
+
866
+ const effortLevel = normalizeClaudeCodeEffortLevel(normalizedBindings.thinkingLevel);
867
+ let shellProfileUpdated = false;
868
+ if (effortLevel) {
869
+ nextSettings.env.CLAUDE_CODE_EFFORT_LEVEL = effortLevel;
870
+ if (effortLevel === CLAUDE_CODE_EFFORT_LEVEL_SETTINGS_JSON_VALUE) {
871
+ nextSettings.effortLevel = effortLevel;
872
+ } else {
873
+ delete nextSettings.effortLevel;
874
+ }
875
+ shellProfileUpdated = await patchShellProfileEffortLevel(effortLevel, homeDir);
876
+ if (!shellProfileUpdated) {
877
+ nextSettings.effortLevel = CLAUDE_CODE_EFFORT_LEVEL_SETTINGS_JSON_VALUE;
878
+ }
879
+ } else {
880
+ delete nextSettings.env.CLAUDE_CODE_EFFORT_LEVEL;
881
+ delete nextSettings.effortLevel;
882
+ shellProfileUpdated = await patchShellProfileEffortLevel("", homeDir);
883
+ }
804
884
 
805
885
  await writeJsonObjectFile(resolvedSettingsPath, nextSettings);
806
886
  return {
807
887
  settingsFilePath: resolvedSettingsPath,
808
888
  backupFilePath: resolvedBackupPath,
809
889
  settingsCreated: !settingsState.existed,
890
+ shellProfileUpdated,
810
891
  baseUrl,
811
892
  bindings: normalizedBindings
812
893
  };
@@ -824,9 +905,11 @@ export async function unpatchClaudeCodeSettingsFile({
824
905
  const backupState = await readJsonObjectFile(resolvedBackupPath, `Backup file '${resolvedBackupPath}'`);
825
906
  const backup = sanitizeBackup(backupState.data, "claude-code");
826
907
  const restoredSettings = applyClaudeBackup(settingsState.data, backup);
908
+ delete restoredSettings.effortLevel;
827
909
 
828
910
  await writeJsonObjectFile(resolvedSettingsPath, restoredSettings);
829
911
  await writeJsonObjectFile(resolvedBackupPath, {});
912
+ await patchShellProfileEffortLevel("", homeDir);
830
913
 
831
914
  return {
832
915
  settingsFilePath: resolvedSettingsPath,
@@ -835,3 +918,48 @@ export async function unpatchClaudeCodeSettingsFile({
835
918
  backupRestored: backupHasData(backup)
836
919
  };
837
920
  }
921
+
922
+ export async function patchClaudeCodeEffortLevel({
923
+ settingsFilePath = "",
924
+ effortLevel = "",
925
+ env = process.env,
926
+ homeDir = os.homedir()
927
+ } = {}) {
928
+ const resolvedSettingsPath = path.resolve(String(settingsFilePath || resolveClaudeCodeSettingsFilePath({ env, homeDir })).trim());
929
+ const normalizedLevel = normalizeClaudeCodeEffortLevel(effortLevel);
930
+
931
+ const settingsState = await readJsonObjectFile(resolvedSettingsPath, `Claude Code settings file '${resolvedSettingsPath}'`);
932
+ const nextSettings = settingsState.data && typeof settingsState.data === "object" && !Array.isArray(settingsState.data)
933
+ ? structuredClone(settingsState.data)
934
+ : {};
935
+
936
+ if (!nextSettings.env || typeof nextSettings.env !== "object" || Array.isArray(nextSettings.env)) {
937
+ nextSettings.env = {};
938
+ }
939
+
940
+ let shellProfileUpdated = false;
941
+ if (normalizedLevel) {
942
+ nextSettings.env.CLAUDE_CODE_EFFORT_LEVEL = normalizedLevel;
943
+ if (normalizedLevel === CLAUDE_CODE_EFFORT_LEVEL_SETTINGS_JSON_VALUE) {
944
+ nextSettings.effortLevel = normalizedLevel;
945
+ } else {
946
+ delete nextSettings.effortLevel;
947
+ }
948
+ shellProfileUpdated = await patchShellProfileEffortLevel(normalizedLevel, homeDir);
949
+ if (!shellProfileUpdated) {
950
+ nextSettings.effortLevel = CLAUDE_CODE_EFFORT_LEVEL_SETTINGS_JSON_VALUE;
951
+ }
952
+ } else {
953
+ delete nextSettings.env.CLAUDE_CODE_EFFORT_LEVEL;
954
+ delete nextSettings.effortLevel;
955
+ shellProfileUpdated = await patchShellProfileEffortLevel("", homeDir);
956
+ }
957
+
958
+ if (Object.keys(nextSettings.env).length === 0) delete nextSettings.env;
959
+ await writeJsonObjectFile(resolvedSettingsPath, nextSettings);
960
+ return {
961
+ settingsFilePath: resolvedSettingsPath,
962
+ effortLevel: normalizedLevel,
963
+ shellProfileUpdated
964
+ };
965
+ }