@khanglvm/llm-router 2.3.2 → 2.3.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -7,6 +7,21 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
7
7
 
8
8
  ## [Unreleased]
9
9
 
10
+ ## [2.3.5] - 2026-04-17
11
+
12
+ ### Fixed
13
+ - Added model-aware reasoning/effort conversion so routed requests automatically fall back to the safest supported effort level for the actual backend model, including GPT-5 Codex/OpenAI targets and Claude Opus 4.6 vs 4.7 targets behind the same alias.
14
+
15
+ ## [2.3.4] - 2026-04-17
16
+
17
+ ### Fixed
18
+ - Updated the live provider suite to exercise RamCloud with `minimax-m2.7` only and switched the Claude Code live alias from `normal` to `default`, matching the generated router config so real-provider publish checks pass again.
19
+
20
+ ## [2.3.3] - 2026-04-17
21
+
22
+ ### Fixed
23
+ - Prevented repeated failed OpenAI `/v1/chat/completions` tool-routing attempts for Claude Code requests on dual-format Claude routes by respecting model format preferences and suppressing noisy re-tries after a successful Claude fallback.
24
+
10
25
  ## [2.3.2] - 2026-04-17
11
26
 
12
27
  ### Fixed
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@khanglvm/llm-router",
3
- "version": "2.3.2",
3
+ "version": "2.3.5",
4
4
  "description": "LLM Router: single gateway endpoint for multi-provider LLMs with unified OpenAI+Anthropic format and seamless fallback",
5
5
  "keywords": [
6
6
  "llm-router",
@@ -43,10 +43,87 @@ import {
43
43
  resolveLargeRequestLogThresholdBytes
44
44
  } from "./large-request-log.js";
45
45
 
46
+ const OPENAI_TOOL_ROUTING_SUPPRESSION_TTL_MS = 30 * 60 * 1000;
47
+ const openAIToolRoutingSuppressionUntil = new Map();
48
+
46
49
  function isSubscriptionProvider(provider) {
47
50
  return provider?.type === "subscription";
48
51
  }
49
52
 
53
+ function normalizeFormatList(values) {
54
+ return [...new Set(
55
+ (Array.isArray(values) ? values : [values])
56
+ .map((value) => String(value || "").trim())
57
+ .filter((value) => value === FORMATS.OPENAI || value === FORMATS.CLAUDE)
58
+ )];
59
+ }
60
+
61
+ function resolveCandidateModel(provider, model, modelId) {
62
+ if (model && typeof model === "object" && !Array.isArray(model)) {
63
+ return model;
64
+ }
65
+ const normalizedModelId = String(modelId || "").trim();
66
+ if (!normalizedModelId || !Array.isArray(provider?.models)) return null;
67
+ return provider.models.find((entry) => String(entry?.id || "").trim() === normalizedModelId) || null;
68
+ }
69
+
70
+ function getProviderModelSupportedFormats(provider, model, modelId) {
71
+ const resolvedModel = resolveCandidateModel(provider, model, modelId);
72
+ const configuredFormats = normalizeFormatList(resolvedModel?.formats || resolvedModel?.format);
73
+ const resolvedModelId = String(resolvedModel?.id || modelId || "").trim();
74
+ if (!resolvedModelId) return configuredFormats;
75
+
76
+ const preferredFormat = provider?.lastProbe?.modelPreferredFormat?.[resolvedModelId];
77
+ if (preferredFormat === FORMATS.OPENAI || preferredFormat === FORMATS.CLAUDE) {
78
+ return [preferredFormat];
79
+ }
80
+
81
+ const probedFormats = normalizeFormatList(provider?.lastProbe?.modelSupport?.[resolvedModelId]);
82
+ return probedFormats.length > 0 ? probedFormats : configuredFormats;
83
+ }
84
+
85
+ function getProviderModelPreferredFormat(provider, model, modelId) {
86
+ const resolvedModel = resolveCandidateModel(provider, model, modelId);
87
+ const resolvedModelId = String(resolvedModel?.id || modelId || "").trim();
88
+ if (!resolvedModelId) return "";
89
+ const preferredFormat = String(provider?.lastProbe?.modelPreferredFormat?.[resolvedModelId] || "").trim();
90
+ return preferredFormat === FORMATS.OPENAI || preferredFormat === FORMATS.CLAUDE
91
+ ? preferredFormat
92
+ : "";
93
+ }
94
+
95
+ function buildOpenAIToolRoutingSuppressionKey(candidate) {
96
+ const providerId = String(candidate?.providerId || candidate?.provider?.id || "").trim();
97
+ const modelId = String(candidate?.modelId || candidate?.model?.id || candidate?.backend || "").trim();
98
+ if (!providerId || !modelId) return "";
99
+ return `${providerId}/${modelId}`;
100
+ }
101
+
102
+ function pruneOpenAIToolRoutingSuppressions(now = Date.now()) {
103
+ for (const [key, expiresAt] of openAIToolRoutingSuppressionUntil.entries()) {
104
+ if (!Number.isFinite(expiresAt) || expiresAt <= now) {
105
+ openAIToolRoutingSuppressionUntil.delete(key);
106
+ }
107
+ }
108
+ }
109
+
110
+ function isOpenAIToolRoutingSuppressed(candidate, now = Date.now()) {
111
+ const key = buildOpenAIToolRoutingSuppressionKey(candidate);
112
+ if (!key) return false;
113
+ pruneOpenAIToolRoutingSuppressions(now);
114
+ return Number(openAIToolRoutingSuppressionUntil.get(key)) > now;
115
+ }
116
+
117
+ function suppressOpenAIToolRouting(candidate, now = Date.now()) {
118
+ const key = buildOpenAIToolRoutingSuppressionKey(candidate);
119
+ if (!key) return;
120
+ openAIToolRoutingSuppressionUntil.set(key, now + OPENAI_TOOL_ROUTING_SUPPRESSION_TTL_MS);
121
+ }
122
+
123
+ export function resetOpenAIToolRoutingLearningState() {
124
+ openAIToolRoutingSuppressionUntil.clear();
125
+ }
126
+
50
127
  function queueLargeRequestEvent(onLargeRequestLog, payload) {
51
128
  if (typeof onLargeRequestLog !== "function") return;
52
129
  try {
@@ -313,6 +390,9 @@ function normalizeProviderRequestKind(targetFormat, requestKind) {
313
390
 
314
391
  function shouldPreferOpenAIForClaudeToolCalls({
315
392
  provider,
393
+ model,
394
+ modelId,
395
+ candidate,
316
396
  sourceFormat,
317
397
  targetFormat,
318
398
  requestKind,
@@ -320,6 +400,11 @@ function shouldPreferOpenAIForClaudeToolCalls({
320
400
  } = {}) {
321
401
  if (sourceFormat !== FORMATS.CLAUDE || targetFormat !== FORMATS.CLAUDE) return false;
322
402
  if (!hasToolDefinitions(body)) return false;
403
+ if (candidate && isOpenAIToolRoutingSuppressed(candidate)) return false;
404
+ const preferredFormat = getProviderModelPreferredFormat(provider, model, modelId);
405
+ if (preferredFormat === FORMATS.CLAUDE) return false;
406
+ const modelFormats = getProviderModelSupportedFormats(provider, model, modelId);
407
+ if (modelFormats.length > 0 && !modelFormats.includes(FORMATS.OPENAI)) return false;
323
408
  if (!getProviderFormats(provider).includes(FORMATS.OPENAI)) return false;
324
409
  return Boolean(resolveProviderUrl(provider, FORMATS.OPENAI, normalizeProviderRequestKind(FORMATS.OPENAI, requestKind)));
325
410
  }
@@ -664,6 +749,9 @@ export async function makeProviderCall({
664
749
 
665
750
  const preferOpenAIToolRouting = !isSubscriptionProvider(provider) && shouldPreferOpenAIForClaudeToolCalls({
666
751
  provider,
752
+ model: candidate?.model,
753
+ modelId: candidate?.modelId,
754
+ candidate,
667
755
  sourceFormat,
668
756
  targetFormat,
669
757
  requestKind,
@@ -1064,6 +1152,9 @@ export async function makeProviderCall({
1064
1152
  try {
1065
1153
  const fallbackResponse = await executeHttpProviderRequest(fallbackPlan);
1066
1154
  if (fallbackResponse instanceof Response && fallbackResponse.ok) {
1155
+ if (preferOpenAIToolRouting) {
1156
+ suppressOpenAIToolRouting(candidate);
1157
+ }
1067
1158
  response = fallbackResponse;
1068
1159
  activePlan = fallbackPlan;
1069
1160
  }
@@ -16,6 +16,16 @@ const EFFORT_HEADER_PATTERNS = [
16
16
  /thinking[-_]?effort/i
17
17
  ];
18
18
 
19
+ const ORDERED_EFFORT_LEVELS = Object.freeze([
20
+ "none",
21
+ "minimal",
22
+ "low",
23
+ "medium",
24
+ "high",
25
+ "xhigh",
26
+ "max"
27
+ ]);
28
+
19
29
  function readHeaderValue(headers, name) {
20
30
  if (!headers || !name) return "";
21
31
  if (typeof headers.get === "function") {
@@ -63,7 +73,8 @@ function normalizeEffort(rawValue) {
63
73
  if (compact === "low") return "low";
64
74
  if (["medium", "normal", "standard", "default"].includes(compact)) return "medium";
65
75
  if (compact === "high") return "high";
66
- if (["xhigh", "extra high", "max", "maximum"].includes(compact)) return "xhigh";
76
+ if (["xhigh", "extra high"].includes(compact)) return "xhigh";
77
+ if (["max", "maximum"].includes(compact)) return "max";
67
78
 
68
79
  if (compact.includes("ultra")) return "xhigh";
69
80
  if (compact.includes("think hard") || compact.includes("harder")) return "high";
@@ -71,6 +82,60 @@ function normalizeEffort(rawValue) {
71
82
  return "";
72
83
  }
73
84
 
85
+ function getEffortRank(effort) {
86
+ return ORDERED_EFFORT_LEVELS.indexOf(normalizeEffort(effort));
87
+ }
88
+
89
+ function normalizeModelMatcherValue(value) {
90
+ let text = String(value || "").trim().toLowerCase();
91
+ if (!text) return "";
92
+
93
+ const slashIndex = Math.max(text.lastIndexOf("/"), text.lastIndexOf(":"));
94
+ if (slashIndex >= 0) {
95
+ text = text.slice(slashIndex + 1);
96
+ }
97
+
98
+ return text
99
+ .replace(/[^a-z0-9]+/g, "-")
100
+ .replace(/-+/g, "-")
101
+ .replace(/^-+|-+$/g, "");
102
+ }
103
+
104
+ function matchesModelPattern(targetModel, pattern) {
105
+ const normalizedModel = normalizeModelMatcherValue(targetModel);
106
+ if (!normalizedModel) return false;
107
+ return new RegExp(`(?:^|-)${pattern}(?:-|$)`).test(normalizedModel);
108
+ }
109
+
110
+ function resolveSupportedEffort(requestedEffort, supportedEfforts = []) {
111
+ const normalizedRequested = normalizeEffort(requestedEffort);
112
+ if (!normalizedRequested) return "";
113
+
114
+ const normalizedSupported = [...new Set(
115
+ (Array.isArray(supportedEfforts) ? supportedEfforts : [supportedEfforts])
116
+ .map((effort) => normalizeEffort(effort))
117
+ .filter(Boolean)
118
+ )];
119
+ if (normalizedSupported.length === 0) return normalizedRequested;
120
+ if (normalizedSupported.includes(normalizedRequested)) return normalizedRequested;
121
+
122
+ const requestedRank = getEffortRank(normalizedRequested);
123
+ let bestAtOrBelow = "";
124
+ let bestAtOrBelowRank = -1;
125
+ for (const supported of normalizedSupported) {
126
+ const supportedRank = getEffortRank(supported);
127
+ if (supportedRank <= requestedRank && supportedRank > bestAtOrBelowRank) {
128
+ bestAtOrBelow = supported;
129
+ bestAtOrBelowRank = supportedRank;
130
+ }
131
+ }
132
+ if (bestAtOrBelow) return bestAtOrBelow;
133
+
134
+ return normalizedSupported.reduce((lowest, supported) => (
135
+ getEffortRank(supported) < getEffortRank(lowest) ? supported : lowest
136
+ ), normalizedSupported[0]);
137
+ }
138
+
74
139
  function parseNumber(value) {
75
140
  const parsed = Number(value);
76
141
  if (!Number.isFinite(parsed)) return undefined;
@@ -81,6 +146,7 @@ function extractEffortFromBody(body) {
81
146
  if (!body || typeof body !== "object") return "";
82
147
 
83
148
  const directCandidates = [
149
+ body.output_config?.effort,
84
150
  body.reasoning_effort,
85
151
  body.reasoningEffort,
86
152
  body["reasoning-effort"],
@@ -121,12 +187,15 @@ function inferEffortFromClaudeThinking(body) {
121
187
 
122
188
  if (Number.isFinite(maxTokens) && maxTokens > 0) {
123
189
  const ratio = budgetTokens / maxTokens;
124
- if (ratio >= 0.9) return "max";
190
+ if (ratio >= 0.97) return "max";
191
+ if (ratio >= 0.82) return "xhigh";
125
192
  if (ratio >= 0.65) return "high";
126
193
  if (ratio >= 0.3) return "medium";
127
194
  return "low";
128
195
  }
129
196
 
197
+ if (budgetTokens >= 31999) return "max";
198
+ if (budgetTokens >= 28000) return "xhigh";
130
199
  if (budgetTokens >= 24000) return "high";
131
200
  if (budgetTokens >= 6000) return "medium";
132
201
  return "low";
@@ -153,39 +222,55 @@ function prefersNestedOpenAIReasoning(targetModel) {
153
222
  return model.startsWith("gpt-5");
154
223
  }
155
224
 
156
- function supportsOpenAIXHighEffort(targetModel) {
157
- const model = String(targetModel || "").trim().toLowerCase();
158
- if (!model) return false;
159
- if (model.startsWith("gpt-5.2")) return true;
160
- if (model.startsWith("gpt-5.3-codex")) return true;
161
- return false;
225
+ function resolveOpenAISupportedEfforts(targetModel) {
226
+ if (matchesModelPattern(targetModel, "gpt-5-4-pro")) return ["medium", "high", "xhigh"];
227
+ if (matchesModelPattern(targetModel, "gpt-5-pro")) return ["high"];
228
+ if (matchesModelPattern(targetModel, "gpt-5-4")) return ["none", "low", "medium", "high", "xhigh"];
229
+ if (matchesModelPattern(targetModel, "gpt-5-3-codex")) return ["low", "medium", "high", "xhigh"];
230
+ if (matchesModelPattern(targetModel, "gpt-5-2-codex")) return ["low", "medium", "high", "xhigh"];
231
+ if (matchesModelPattern(targetModel, "gpt-5-2-pro")) return ["medium", "high", "xhigh"];
232
+ if (matchesModelPattern(targetModel, "gpt-5-2")) return ["none", "low", "medium", "high", "xhigh"];
233
+ if (matchesModelPattern(targetModel, "gpt-5-1-codex")) return ["low", "medium", "high"];
234
+ if (matchesModelPattern(targetModel, "gpt-5-1")) return ["none", "low", "medium", "high"];
235
+ if (matchesModelPattern(targetModel, "gpt-5")) return ["minimal", "low", "medium", "high"];
236
+ return ["low", "medium", "high"];
162
237
  }
163
238
 
164
- function supportsOpenAINoneEffort(targetModel) {
165
- const model = String(targetModel || "").trim().toLowerCase();
166
- if (!model) return false;
167
- if (model.startsWith("gpt-5.1") && !model.includes("codex")) return true;
168
- if (model.startsWith("gpt-5.2") && !model.includes("codex") && !model.includes("pro")) return true;
169
- return false;
239
+ function resolveClaudeEffortProfile(targetModel) {
240
+ if (matchesModelPattern(targetModel, "opus-4-7")) {
241
+ return {
242
+ supportsEffortApi: true,
243
+ requiresAdaptiveThinking: true,
244
+ preserveManualBudgetThinking: false,
245
+ supportedEfforts: ["low", "medium", "high", "xhigh", "max"]
246
+ };
247
+ }
248
+ if (matchesModelPattern(targetModel, "opus-4-6") || matchesModelPattern(targetModel, "sonnet-4-6")) {
249
+ return {
250
+ supportsEffortApi: true,
251
+ requiresAdaptiveThinking: true,
252
+ preserveManualBudgetThinking: true,
253
+ supportedEfforts: ["low", "medium", "high", "max"]
254
+ };
255
+ }
256
+ if (matchesModelPattern(targetModel, "opus-4-5")) {
257
+ return {
258
+ supportsEffortApi: false,
259
+ requiresAdaptiveThinking: false,
260
+ preserveManualBudgetThinking: true,
261
+ supportedEfforts: ["low", "medium", "high", "max"]
262
+ };
263
+ }
264
+ return {
265
+ supportsEffortApi: false,
266
+ requiresAdaptiveThinking: false,
267
+ preserveManualBudgetThinking: true,
268
+ supportedEfforts: ["low", "medium", "high"]
269
+ };
170
270
  }
171
271
 
172
272
  function mapEffortToOpenAI(effort, targetModel) {
173
- switch (effort) {
174
- case "none":
175
- return supportsOpenAINoneEffort(targetModel) ? "none" : "low";
176
- case "minimal":
177
- return "low";
178
- case "low":
179
- return "low";
180
- case "medium":
181
- return "medium";
182
- case "high":
183
- return "high";
184
- case "xhigh":
185
- return supportsOpenAIXHighEffort(targetModel) ? "xhigh" : "high";
186
- default:
187
- return "";
188
- }
273
+ return resolveSupportedEffort(effort, resolveOpenAISupportedEfforts(targetModel));
189
274
  }
190
275
 
191
276
  function applyOpenAIEffort(providerBody, effort, targetModel) {
@@ -236,6 +321,7 @@ function toClaudeThinkingBudget(effort, maxTokens) {
236
321
  case "high":
237
322
  return clampBudget(Math.round(safeMaxTokens * 0.75), 1024, maxBudget);
238
323
  case "xhigh":
324
+ return clampBudget(Math.round(safeMaxTokens * 0.9), 1024, maxBudget);
239
325
  case "max":
240
326
  return maxBudget;
241
327
  default:
@@ -243,10 +329,37 @@ function toClaudeThinkingBudget(effort, maxTokens) {
243
329
  }
244
330
  }
245
331
 
246
- function applyClaudeEffort(providerBody, effort, { sourceFormat, originalBody } = {}) {
332
+ function applyClaudeEffort(providerBody, effort, { sourceFormat, originalBody, targetModel } = {}) {
247
333
  const nextBody = { ...(providerBody || {}) };
334
+ const requestedEffort = normalizeEffort(effort);
335
+ const profile = resolveClaudeEffortProfile(targetModel);
336
+ const mappedEffort = resolveSupportedEffort(requestedEffort, profile.supportedEfforts);
337
+
338
+ if (profile.supportsEffortApi && mappedEffort) {
339
+ nextBody.output_config = {
340
+ ...(nextBody.output_config && typeof nextBody.output_config === "object" && !Array.isArray(nextBody.output_config)
341
+ ? nextBody.output_config
342
+ : {}),
343
+ effort: mappedEffort
344
+ };
345
+
346
+ const explicitBudgetTokens = parseNumber(nextBody?.thinking?.budget_tokens);
347
+ const explicitThinkingType = String(nextBody?.thinking?.type || "").trim().toLowerCase();
348
+ if (profile.preserveManualBudgetThinking && Number.isFinite(explicitBudgetTokens)) {
349
+ return nextBody;
350
+ }
351
+
352
+ if (profile.requiresAdaptiveThinking) {
353
+ if (explicitThinkingType === "disabled") {
354
+ nextBody.thinking = { type: "disabled" };
355
+ } else {
356
+ nextBody.thinking = { type: "adaptive" };
357
+ }
358
+ }
359
+ return nextBody;
360
+ }
248
361
 
249
- if (effort === "none" || effort === "minimal") {
362
+ if (requestedEffort === "none" || requestedEffort === "minimal") {
250
363
  delete nextBody.thinking;
251
364
  return nextBody;
252
365
  }
@@ -267,7 +380,7 @@ function applyClaudeEffort(providerBody, effort, { sourceFormat, originalBody }
267
380
  nextBody.max_tokens = maxTokens;
268
381
  }
269
382
 
270
- const budgetTokens = toClaudeThinkingBudget(effort, maxTokens);
383
+ const budgetTokens = toClaudeThinkingBudget(mappedEffort || requestedEffort, maxTokens);
271
384
  if (!Number.isFinite(budgetTokens)) {
272
385
  return nextBody;
273
386
  }
@@ -316,7 +429,8 @@ export function applyReasoningEffortMapping({
316
429
  if (targetFormat === FORMATS.CLAUDE) {
317
430
  return applyClaudeEffort(providerBody, effort, {
318
431
  sourceFormat,
319
- originalBody
432
+ originalBody,
433
+ targetModel
320
434
  });
321
435
  }
322
436
  return providerBody;