npm - @khanglvm/llm-router - Versions diffs - 2.0.5 → 2.2.0 - Mend

@khanglvm/llm-router 2.0.5 → 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

package/CHANGELOG.md +9 -0
package/README.md +14 -5
package/package.json +1 -1
package/src/cli/router-module.js +35 -4
package/src/node/coding-tool-config.js +137 -9
package/src/node/web-console-client.js +26 -26
package/src/node/web-console-server.js +20 -3
package/src/node/web-console-styles.generated.js +1 -1
package/src/runtime/codex-request-transformer.js +22 -2
package/src/runtime/config.js +5 -1
package/src/runtime/handler/amp-web-search.js +130 -0
package/src/runtime/handler/provider-call.js +15 -3
package/src/runtime/handler/provider-translation.js +7 -2
package/src/runtime/handler/request.js +25 -0
package/src/runtime/handler.js +40 -5
package/src/runtime/thread-affinity.js +41 -0
package/src/shared/coding-tool-bindings.js +5 -0
package/src/translator/response/openai-to-claude.js +6 -1

package/CHANGELOG.md CHANGED Viewed

@@ -7,6 +7,15 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ## [Unreleased]
+## [2.2.0] - 2026-03-21
+### Added
+- Standalone `set-claude-code-effort-level` CLI operation sets `CLAUDE_CODE_EFFORT_LEVEL` in Claude Code settings and shell profile without requiring a router connection.
+- Web console effort level dropdown now works independently of routing — no need to connect Claude Code to LLM Router just to change thinking effort.
+### Changed
+- Claude Code live test uses process env vars (`ANTHROPIC_BASE_URL`, `ANTHROPIC_AUTH_TOKEN`, `ANTHROPIC_MODEL`) instead of patching settings.json, keeping the config file untouched during tests.
 ## [2.0.5] - 2026-03-15
 ### Fixed

package/README.md CHANGED Viewed

@@ -2,20 +2,18 @@
 LLM Router is a local and Cloudflare-deployable gateway for routing one client endpoint across multiple LLM providers, models, aliases, fallbacks, and rate limits.
-The npm package name stays the same:
+**Current version**: `2.2.0`
+NPM package:
 ```bash
 @khanglvm/llm-router
 ```
-The primary CLI command is now:
+Primary CLI command:
 ```bash
 llr
 ```
-`2.0.1` is the current public release. It includes the Web UI, AMP routing, and coding-tool integrations introduced in the 2.x line.
 ## Install
 ```bash
@@ -266,7 +264,18 @@ Run the JavaScript test suite:
 node --test $(rg --files -g "*.test.js" src)
 ```
+## Documentation
+Comprehensive documentation is available in the `docs/` directory:
+- **[Project Overview & PDR](./docs/project-overview-pdr.md)** — Feature matrix, target users, success metrics, constraints
+- **[Codebase Summary](./docs/codebase-summary.md)** — Directory structure, module relationships, entry points, test infrastructure
+- **[Code Standards](./docs/code-standards.md)** — Patterns, naming conventions, testing, error handling
+- **[System Architecture](./docs/system-architecture.md)** — Request lifecycle, subsystem boundaries, data flow, deployment models
+- **[Project Roadmap](./docs/project-roadmap.md)** — Current status, planned phases, timeline, success metrics
 ## Security and Releases
 - Security: [`SECURITY.md`](https://github.com/khanglvm/llm-router/blob/master/SECURITY.md)
 - Release notes: [`CHANGELOG.md`](https://github.com/khanglvm/llm-router/blob/master/CHANGELOG.md)
+- AMP routing: [`docs/amp-routing.md`](./docs/amp-routing.md)

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@khanglvm/llm-router",
-  "version": "2.0.5",
+  "version": "2.2.0",
   "description": "LLM Router: single gateway endpoint for multi-provider LLMs with unified OpenAI+Anthropic format and seamless fallback",
   "keywords": [
     "llm-router",

package/src/cli/router-module.js CHANGED Viewed

@@ -28,6 +28,7 @@ import {
   unpatchAmpClientConfigFiles as unpatchAmpClientConfigFilesFile
 } from "../node/amp-client-config.js";
 import {
+  patchClaudeCodeEffortLevel,
   patchClaudeCodeSettingsFile,
   patchCodexCliConfigFile,
   readClaudeCodeRoutingState,
@@ -73,7 +74,7 @@ import {
 import {
   CODEX_CLI_INHERIT_MODEL_VALUE,
   isCodexCliInheritModelBinding,
-  normalizeClaudeCodeThinkingLevel,
+  normalizeClaudeCodeEffortLevel,
   normalizeCodexCliReasoningEffort
 } from "../shared/coding-tool-bindings.js";
 import { FORMATS } from "../translator/index.js";
@@ -3609,7 +3610,7 @@ function normalizeClaudeBindingState(bindings = {}) {
     defaultSonnetModel: String(source.defaultSonnetModel || "").trim(),
     defaultHaikuModel: String(source.defaultHaikuModel || "").trim(),
     subagentModel: String(source.subagentModel || "").trim(),
-    thinkingLevel: normalizeClaudeCodeThinkingLevel(source.thinkingLevel)
+    thinkingLevel: normalizeClaudeCodeEffortLevel(source.thinkingLevel)
   };
 }
@@ -6532,6 +6533,32 @@ async function doSetClaudeCodeRouting(context) {
   };
 }
+async function doSetClaudeCodeEffortLevel(context) {
+  const args = context.args || {};
+  const settingsFilePath = String(readArg(args, ["claude-code-settings-file", "claudeCodeSettingsFile", "claude-settings-file", "claudeSettingsFile"], "") || "").trim();
+  const effortLevel = String(readArg(args, ["thinking-level", "thinkingLevel", "effort-level", "effortLevel"], "") || "").trim();
+  const result = await patchClaudeCodeEffortLevel({
+    settingsFilePath,
+    effortLevel,
+    env: process.env
+  });
+  return {
+    ok: true,
+    mode: context.mode,
+    exitCode: EXIT_SUCCESS,
+    data: buildOperationReport(
+      result.effortLevel ? "Claude Code Effort Level Set" : "Claude Code Effort Level Cleared",
+      [
+        ["Settings File", result.settingsFilePath],
+        ["Effort Level", result.effortLevel || "(cleared)"],
+        ["Shell Profile Updated", formatYesNo(result.shellProfileUpdated)]
+      ]
+    )
+  };
+}
 async function doDiscoverProviderModels(context) {
   const args = context.args || {};
   let headers;
@@ -8395,6 +8422,8 @@ async function runConfigAction(context) {
     case "set-claude-code-routing":
     case "set-claude-code":
       return doSetClaudeCodeRouting(context);
+    case "set-claude-code-effort-level":
+      return doSetClaudeCodeEffortLevel(context);
     case "discover-provider-models":
       return doDiscoverProviderModels(context);
     case "test-provider":
@@ -9264,8 +9293,9 @@ async function runAiHelpAction(context) {
     "### Claude Code",
     "- required_gate=patch_gate_claude_code=ready",
     `- enable/update route: ${CLI_COMMAND} config --operation=set-claude-code-routing --enabled=true --primary-model=<target_model_or_group>`,
-    `- optional bindings: --default-opus-model=<route> --default-sonnet-model=<route> --default-haiku-model=<route> --subagent-model=<route> --thinking-level=low|medium|high|max`,
+    `- optional bindings: --default-opus-model=<route> --default-sonnet-model=<route> --default-haiku-model=<route> --subagent-model=<route> --thinking-level=low|medium|high|max (sets CLAUDE_CODE_EFFORT_LEVEL in shell profile)`,
     `- disable route: ${CLI_COMMAND} config --operation=set-claude-code-routing --enabled=false`,
+    `- standalone effort level (no router needed): ${CLI_COMMAND} config --operation=set-claude-code-effort-level --thinking-level=low|medium|high|max`,
     "",
     "### Codex CLI",
     "- required_gate=patch_gate_codex_cli=ready",
@@ -10535,7 +10565,7 @@ const routerModule = {
           { name: "master-key-length", required: false, description: "Generated master key length (min 24).", example: "--master-key-length=48" },
           { name: "master-key-prefix", required: false, description: "Generated master key prefix.", example: "--master-key-prefix=gw_" },
           { name: "default-model", required: false, description: `For set-codex-cli-routing: managed route binding, or ${CODEX_CLI_INHERIT_MODEL_VALUE} to keep Codex's own model selection.`, example: "--default-model=chat.default" },
-          { name: "thinking-level", required: false, description: "For set-codex-cli-routing / set-claude-code-routing: reasoning level.", example: "--thinking-level=medium" },
+          { name: "thinking-level", required: false, description: "For set-codex-cli-routing / set-claude-code-routing / set-claude-code-effort-level: reasoning level.", example: "--thinking-level=medium" },
           { name: "primary-model", required: false, description: "For set-claude-code-routing: primary ANTHROPIC_MODEL route.", example: "--primary-model=chat.default" },
           { name: "default-opus-model", required: false, description: "For set-claude-code-routing: ANTHROPIC_DEFAULT_OPUS_MODEL route.", example: "--default-opus-model=chat.deep" },
           { name: "default-sonnet-model", required: false, description: "For set-claude-code-routing: ANTHROPIC_DEFAULT_SONNET_MODEL route.", example: "--default-sonnet-model=chat.default" },
@@ -10606,6 +10636,7 @@ const routerModule = {
           `${CLI_COMMAND} config --operation=set-amp-config --amp-subagent-mappings="oracle => rc/gpt-5.3-codex, librarian => rc/gpt-5.3-codex, search => rc/gpt-5.3-codex, look-at => rc/gpt-5.3-codex"`,
           `${CLI_COMMAND} config --operation=set-codex-cli-routing --enabled=true --default-model=chat.default`,
           `${CLI_COMMAND} config --operation=set-claude-code-routing --enabled=true --primary-model=chat.default --default-haiku-model=chat.fast`,
+          `${CLI_COMMAND} config --operation=set-claude-code-effort-level --thinking-level=high`,
           `${CLI_COMMAND} config --operation=set-amp-client-routing --enabled=true --amp-client-settings-scope=workspace`,
           `${CLI_COMMAND} config --operation=set-amp-config --patch-amp-client-config=true --amp-client-settings-scope=workspace --amp-client-url=${LOCAL_ROUTER_ORIGIN} --amp-client-api-key=gw_...`,
           `${CLI_COMMAND} config --operation=list-routing`,

package/src/node/coding-tool-config.js CHANGED Viewed

@@ -3,10 +3,13 @@ import path from "node:path";
 import { promises as fs } from "node:fs";
 import {
   CODEX_CLI_INHERIT_MODEL_VALUE,
+  CLAUDE_CODE_EFFORT_LEVEL_SETTINGS_JSON_VALUE,
   isCodexCliInheritModelBinding,
   mapClaudeCodeThinkingLevelToTokens,
   mapClaudeCodeThinkingTokensToLevel,
   normalizeClaudeCodeThinkingLevel,
+  normalizeClaudeCodeEffortLevel,
+  migrateLegacyThinkingTokensToEffortLevel,
   normalizeCodexCliReasoningEffort
 } from "../shared/coding-tool-bindings.js";
@@ -21,7 +24,7 @@ const CLAUDE_MANAGED_ENV_KEYS = Object.freeze([
   "ANTHROPIC_DEFAULT_SONNET_MODEL",
   "ANTHROPIC_DEFAULT_HAIKU_MODEL",
   "CLAUDE_CODE_SUBAGENT_MODEL",
-  "MAX_THINKING_TOKENS"
+  "CLAUDE_CODE_EFFORT_LEVEL"
 ]);
 const CLAUDE_BACKUP_ENV_KEYS = Object.freeze([
   ...CLAUDE_MANAGED_ENV_KEYS,
@@ -338,7 +341,7 @@ function normalizeClaudeBindings(bindings = {}) {
     defaultSonnetModel: normalizeModelBinding(source.defaultSonnetModel),
     defaultHaikuModel: normalizeModelBinding(source.defaultHaikuModel),
     subagentModel: normalizeModelBinding(source.subagentModel),
-    thinkingLevel: normalizeClaudeCodeThinkingLevel(source.thinkingLevel)
+    thinkingLevel: normalizeClaudeCodeEffortLevel(source.thinkingLevel)
   };
 }
@@ -392,16 +395,20 @@ function applyCodexBackup(document, backup = {}) {
 function captureClaudeBackup(config) {
   const env = config?.env && typeof config.env === "object" && !Array.isArray(config.env) ? config.env : {};
   const backupEnv = {};
-  for (const key of CLAUDE_BACKUP_ENV_KEYS) {
+  for (const key of [...CLAUDE_BACKUP_ENV_KEYS, "MAX_THINKING_TOKENS"]) {
     if (Object.prototype.hasOwnProperty.call(env, key)) {
       backupEnv[key] = getBackupValue(env[key]);
     }
   }
-  return {
+  const backup = {
     tool: "claude-code",
     version: 1,
     env: backupEnv
   };
+  if (config && typeof config === "object" && config.effortLevel !== undefined) {
+    backup.effortLevel = getBackupValue(config.effortLevel);
+  }
+  return backup;
 }
 function applyClaudeBackup(config, backup = {}) {
@@ -412,7 +419,7 @@ function applyClaudeBackup(config, backup = {}) {
     ? { ...next.env }
     : {};
-  for (const key of CLAUDE_BACKUP_ENV_KEYS) {
+  for (const key of [...CLAUDE_BACKUP_ENV_KEYS, "MAX_THINKING_TOKENS"]) {
     if (backup?.env && Object.prototype.hasOwnProperty.call(backup.env, key)) {
       applyBackupValue(env, key, backup.env[key]);
     } else {
@@ -420,11 +427,65 @@ function applyClaudeBackup(config, backup = {}) {
     }
   }
+  if (backup?.effortLevel?.exists) {
+    next.effortLevel = backup.effortLevel.value;
+  } else {
+    delete next.effortLevel;
+  }
   if (Object.keys(env).length > 0) next.env = env;
   else delete next.env;
   return next;
 }
+const SHELL_EFFORT_MARKER_START = "# >>> llm-router effort-level >>>";
+const SHELL_EFFORT_MARKER_END = "# <<< llm-router effort-level <<<";
+function resolveShellProfilePath(homeDir) {
+  const shell = String(process.env.SHELL || "").trim();
+  const profileName = shell.endsWith("/zsh") || shell.endsWith("/zsh5") ? ".zshrc" : ".bashrc";
+  return path.join(homeDir, profileName);
+}
+async function patchShellProfileEffortLevel(effortLevel, homeDir) {
+  const profilePath = resolveShellProfilePath(homeDir);
+  const markerPattern = new RegExp(
+    `${escapeRegex(SHELL_EFFORT_MARKER_START)}[\\s\\S]*?${escapeRegex(SHELL_EFFORT_MARKER_END)}\\n?`,
+    "g"
+  );
+  let text;
+  try {
+    text = await fs.readFile(profilePath, "utf8");
+  } catch (error) {
+    if (error && typeof error === "object" && error.code === "ENOENT") {
+      if (!effortLevel) return false;
+      text = "";
+    } else {
+      return false;
+    }
+  }
+  const cleaned = text.replace(markerPattern, "");
+  if (!effortLevel) {
+    if (cleaned !== text) {
+      await fs.writeFile(profilePath, cleaned, "utf8");
+    }
+    return true;
+  }
+  const block = [
+    SHELL_EFFORT_MARKER_START,
+    `export CLAUDE_CODE_EFFORT_LEVEL="${effortLevel}"`,
+    SHELL_EFFORT_MARKER_END,
+    ""
+  ].join("\n");
+  const separator = cleaned.length > 0 && !cleaned.endsWith("\n") ? "\n" : "";
+  await fs.writeFile(profilePath, `${cleaned}${separator}${block}`, "utf8");
+  return true;
+}
 async function ensureToolBackupFileExists(backupFilePath) {
   const backupState = await readJsonObjectFile(backupFilePath, `Backup file '${backupFilePath}'`);
   if (!backupState.existed) {
@@ -733,7 +794,9 @@ export async function readClaudeCodeRoutingState({
       defaultSonnetModel: envConfig.ANTHROPIC_DEFAULT_SONNET_MODEL,
       defaultHaikuModel: envConfig.ANTHROPIC_DEFAULT_HAIKU_MODEL,
       subagentModel: envConfig.CLAUDE_CODE_SUBAGENT_MODEL,
-      thinkingLevel: mapClaudeCodeThinkingTokensToLevel(envConfig.MAX_THINKING_TOKENS)
+      thinkingLevel: normalizeClaudeCodeEffortLevel(envConfig.CLAUDE_CODE_EFFORT_LEVEL)
+        || normalizeClaudeCodeEffortLevel(settingsState.data?.effortLevel)
+        || migrateLegacyThinkingTokensToEffortLevel(envConfig.MAX_THINKING_TOKENS)
     })
   };
 }
@@ -798,15 +861,33 @@ export async function patchClaudeCodeSettingsFile({
   if (normalizedBindings.subagentModel) nextSettings.env.CLAUDE_CODE_SUBAGENT_MODEL = normalizedBindings.subagentModel;
   else delete nextSettings.env.CLAUDE_CODE_SUBAGENT_MODEL;
-  const maxThinkingTokens = mapClaudeCodeThinkingLevelToTokens(normalizedBindings.thinkingLevel);
-  if (maxThinkingTokens) nextSettings.env.MAX_THINKING_TOKENS = maxThinkingTokens;
-  else delete nextSettings.env.MAX_THINKING_TOKENS;
+  delete nextSettings.env.MAX_THINKING_TOKENS;
+  const effortLevel = normalizeClaudeCodeEffortLevel(normalizedBindings.thinkingLevel);
+  let shellProfileUpdated = false;
+  if (effortLevel) {
+    nextSettings.env.CLAUDE_CODE_EFFORT_LEVEL = effortLevel;
+    if (effortLevel === CLAUDE_CODE_EFFORT_LEVEL_SETTINGS_JSON_VALUE) {
+      nextSettings.effortLevel = effortLevel;
+    } else {
+      delete nextSettings.effortLevel;
+    }
+    shellProfileUpdated = await patchShellProfileEffortLevel(effortLevel, homeDir);
+    if (!shellProfileUpdated) {
+      nextSettings.effortLevel = CLAUDE_CODE_EFFORT_LEVEL_SETTINGS_JSON_VALUE;
+    }
+  } else {
+    delete nextSettings.env.CLAUDE_CODE_EFFORT_LEVEL;
+    delete nextSettings.effortLevel;
+    shellProfileUpdated = await patchShellProfileEffortLevel("", homeDir);
+  }
   await writeJsonObjectFile(resolvedSettingsPath, nextSettings);
   return {
     settingsFilePath: resolvedSettingsPath,
     backupFilePath: resolvedBackupPath,
     settingsCreated: !settingsState.existed,
+    shellProfileUpdated,
     baseUrl,
     bindings: normalizedBindings
   };
@@ -824,9 +905,11 @@ export async function unpatchClaudeCodeSettingsFile({
   const backupState = await readJsonObjectFile(resolvedBackupPath, `Backup file '${resolvedBackupPath}'`);
   const backup = sanitizeBackup(backupState.data, "claude-code");
   const restoredSettings = applyClaudeBackup(settingsState.data, backup);
+  delete restoredSettings.effortLevel;
   await writeJsonObjectFile(resolvedSettingsPath, restoredSettings);
   await writeJsonObjectFile(resolvedBackupPath, {});
+  await patchShellProfileEffortLevel("", homeDir);
   return {
     settingsFilePath: resolvedSettingsPath,
@@ -835,3 +918,48 @@ export async function unpatchClaudeCodeSettingsFile({
     backupRestored: backupHasData(backup)
   };
 }
+export async function patchClaudeCodeEffortLevel({
+  settingsFilePath = "",
+  effortLevel = "",
+  env = process.env,
+  homeDir = os.homedir()
+} = {}) {
+  const resolvedSettingsPath = path.resolve(String(settingsFilePath || resolveClaudeCodeSettingsFilePath({ env, homeDir })).trim());
+  const normalizedLevel = normalizeClaudeCodeEffortLevel(effortLevel);
+  const settingsState = await readJsonObjectFile(resolvedSettingsPath, `Claude Code settings file '${resolvedSettingsPath}'`);
+  const nextSettings = settingsState.data && typeof settingsState.data === "object" && !Array.isArray(settingsState.data)
+    ? structuredClone(settingsState.data)
+    : {};
+  if (!nextSettings.env || typeof nextSettings.env !== "object" || Array.isArray(nextSettings.env)) {
+    nextSettings.env = {};
+  }
+  let shellProfileUpdated = false;
+  if (normalizedLevel) {
+    nextSettings.env.CLAUDE_CODE_EFFORT_LEVEL = normalizedLevel;
+    if (normalizedLevel === CLAUDE_CODE_EFFORT_LEVEL_SETTINGS_JSON_VALUE) {
+      nextSettings.effortLevel = normalizedLevel;
+    } else {
+      delete nextSettings.effortLevel;
+    }
+    shellProfileUpdated = await patchShellProfileEffortLevel(normalizedLevel, homeDir);
+    if (!shellProfileUpdated) {
+      nextSettings.effortLevel = CLAUDE_CODE_EFFORT_LEVEL_SETTINGS_JSON_VALUE;
+    }
+  } else {
+    delete nextSettings.env.CLAUDE_CODE_EFFORT_LEVEL;
+    delete nextSettings.effortLevel;
+    shellProfileUpdated = await patchShellProfileEffortLevel("", homeDir);
+  }
+  if (Object.keys(nextSettings.env).length === 0) delete nextSettings.env;
+  await writeJsonObjectFile(resolvedSettingsPath, nextSettings);
+  return {
+    settingsFilePath: resolvedSettingsPath,
+    effortLevel: normalizedLevel,
+    shellProfileUpdated
+  };
+}