@khanglvm/llm-router 2.0.2 → 2.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -7,8 +7,23 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
7
7
 
8
8
  ## [Unreleased]
9
9
 
10
+ ## [2.0.5] - 2026-03-15
11
+
12
+ ### Fixed
13
+ - Relaxed the live coding-tool publish checks so known external Codex model-verbosity mismatches and Claude MCP schema-validation failures are treated as acceptable upstream tool failures instead of blocking npm publication.
14
+
15
+ ## [2.0.4] - 2026-03-15
16
+
10
17
  ### Fixed
11
18
  - Raised the default inbound JSON body limit for OpenAI `/responses` requests from `1 MiB` to `8 MiB` while keeping other JSON routes at `1 MiB`. This prevents local `413 Request body too large` failures for Codex CLI and other Responses API clients carrying larger conversation state.
19
+ - Updated the web console provider editor so API-based providers can rotate between env-backed and direct API key credentials in place without leaving the modal.
20
+ - Improved the web console model-save flow for API-based providers:
21
+ - new-model tests now stream visible progress while save is in flight
22
+ - successful new models stay marked as confirmed
23
+ - only failed new models are marked as failed
24
+ - the edit modal blocks backdrop/close dismissal while tests are running
25
+ - closing after failed tests now offers removing failed rows while keeping successful new rows
26
+ - Improved dual-format Claude provider routing so Claude tool calls can prefer OpenAI-compatible tool execution paths when available, while falling back cleanly to native Claude routing if the OpenAI-compatible path fails.
12
27
 
13
28
  ## [2.0.1] - 2026-03-15
14
29
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@khanglvm/llm-router",
3
- "version": "2.0.2",
3
+ "version": "2.0.5",
4
4
  "description": "LLM Router: single gateway endpoint for multi-provider LLMs with unified OpenAI+Anthropic format and seamless fallback",
5
5
  "keywords": [
6
6
  "llm-router",
@@ -31,7 +31,8 @@
31
31
  "deploy:worker": "node ./src/cli-entry.js deploy",
32
32
  "test:provider-live": "node --test --test-concurrency=1 ./test/live-provider-suite.test.js",
33
33
  "test:provider-smoke": "npm run test:provider-live",
34
- "test:amp-smoke": "node ./scripts/amp-smoke-suite.mjs"
34
+ "test:amp-smoke": "node ./scripts/amp-smoke-suite.mjs",
35
+ "prepublishOnly": "npm run test:provider-live"
35
36
  },
36
37
  "dependencies": {
37
38
  "@levu/snap": "^0.3.13"
@@ -159,6 +159,26 @@ function normalizeEndpointCandidates(values = []) {
159
159
  return dedupeStrings(Array.isArray(values) ? values : [values]);
160
160
  }
161
161
 
162
+ function rewriteProviderCredentials(provider = {}, draftProvider = {}) {
163
+ const nextProvider = { ...provider };
164
+ const hasCredentialInput = Object.prototype.hasOwnProperty.call(draftProvider || {}, "credentialInput");
165
+ if (!hasCredentialInput) return nextProvider;
166
+
167
+ const credentialInput = String(draftProvider?.credentialInput || "").trim();
168
+ delete nextProvider.apiKey;
169
+ delete nextProvider.apiKeyEnv;
170
+ delete nextProvider.credential;
171
+
172
+ if (!credentialInput) return nextProvider;
173
+ if (/^[A-Z][A-Z0-9_]*$/.test(credentialInput)) {
174
+ nextProvider.apiKeyEnv = credentialInput;
175
+ return nextProvider;
176
+ }
177
+
178
+ nextProvider.apiKey = credentialInput;
179
+ return nextProvider;
180
+ }
181
+
162
182
  function rewriteProviderEndpoints(provider = {}, endpoints = []) {
163
183
  const nextProvider = { ...provider };
164
184
  const nextEndpoints = normalizeEndpointCandidates(endpoints);
@@ -487,6 +507,7 @@ export function applyProviderInlineEdits(config = {}, currentProviderId = "", dr
487
507
  };
488
508
 
489
509
  if (!isSubscription) {
510
+ nextProvider = rewriteProviderCredentials(nextProvider, draftProvider);
490
511
  nextProvider = rewriteProviderEndpoints(nextProvider, nextEndpoints);
491
512
  nextProvider = rewriteRateLimits(nextProvider, draftProvider, renamedProviderId);
492
513
  }
@@ -80,7 +80,7 @@ const READ_WEB_PAGE_FUNCTION_PARAMETERS = {
80
80
  additionalProperties: true
81
81
  };
82
82
 
83
- const OPENAI_WEB_SEARCH_TOOL = Object.freeze({
83
+ const OPENAI_CHAT_WEB_SEARCH_TOOL = Object.freeze({
84
84
  type: "function",
85
85
  function: {
86
86
  name: SEARCH_TOOL_NAME,
@@ -89,13 +89,20 @@ const OPENAI_WEB_SEARCH_TOOL = Object.freeze({
89
89
  }
90
90
  });
91
91
 
92
+ const OPENAI_RESPONSES_WEB_SEARCH_TOOL = Object.freeze({
93
+ type: "function",
94
+ name: SEARCH_TOOL_NAME,
95
+ description: "Search the web for current information, news, documentation, or real-time facts.",
96
+ parameters: WEB_SEARCH_FUNCTION_PARAMETERS
97
+ });
98
+
92
99
  const CLAUDE_WEB_SEARCH_TOOL = Object.freeze({
93
100
  name: SEARCH_TOOL_NAME,
94
101
  description: "Search the web for current information, news, documentation, or real-time facts.",
95
102
  input_schema: WEB_SEARCH_FUNCTION_PARAMETERS
96
103
  });
97
104
 
98
- const OPENAI_READ_WEB_PAGE_TOOL = Object.freeze({
105
+ const OPENAI_CHAT_READ_WEB_PAGE_TOOL = Object.freeze({
99
106
  type: "function",
100
107
  function: {
101
108
  name: READ_WEB_PAGE_TOOL_NAME,
@@ -104,6 +111,13 @@ const OPENAI_READ_WEB_PAGE_TOOL = Object.freeze({
104
111
  }
105
112
  });
106
113
 
114
+ const OPENAI_RESPONSES_READ_WEB_PAGE_TOOL = Object.freeze({
115
+ type: "function",
116
+ name: READ_WEB_PAGE_TOOL_NAME,
117
+ description: "Fetch and extract the readable text and table content from a web page URL.",
118
+ parameters: READ_WEB_PAGE_FUNCTION_PARAMETERS
119
+ });
120
+
107
121
  const CLAUDE_READ_WEB_PAGE_TOOL = Object.freeze({
108
122
  name: READ_WEB_PAGE_TOOL_NAME,
109
123
  description: "Fetch and extract the readable text and table content from a web page URL.",
@@ -1269,7 +1283,20 @@ export function shouldInterceptAmpWebSearch({ clientType, originalBody, runtimeC
1269
1283
  return true;
1270
1284
  }
1271
1285
 
1272
- export function rewriteProviderBodyForAmpWebSearch(providerBody, targetFormat) {
1286
+ function getOpenAIInterceptToolDefinitions(requestKind) {
1287
+ if (requestKind === "responses") {
1288
+ return {
1289
+ webSearch: OPENAI_RESPONSES_WEB_SEARCH_TOOL,
1290
+ readWebPage: OPENAI_RESPONSES_READ_WEB_PAGE_TOOL
1291
+ };
1292
+ }
1293
+ return {
1294
+ webSearch: OPENAI_CHAT_WEB_SEARCH_TOOL,
1295
+ readWebPage: OPENAI_CHAT_READ_WEB_PAGE_TOOL
1296
+ };
1297
+ }
1298
+
1299
+ export function rewriteProviderBodyForAmpWebSearch(providerBody, targetFormat, requestKind = undefined) {
1273
1300
  const tools = Array.isArray(providerBody?.tools) ? providerBody.tools : [];
1274
1301
  if (tools.length === 0) {
1275
1302
  return {
@@ -1301,8 +1328,9 @@ export function rewriteProviderBodyForAmpWebSearch(providerBody, targetFormat) {
1301
1328
  }
1302
1329
 
1303
1330
  if (targetFormat === FORMATS.OPENAI) {
1304
- if (interceptedToolNames.has(SEARCH_TOOL_NAME)) nextTools.push(OPENAI_WEB_SEARCH_TOOL);
1305
- if (interceptedToolNames.has(READ_WEB_PAGE_TOOL_NAME)) nextTools.push(OPENAI_READ_WEB_PAGE_TOOL);
1331
+ const toolDefinitions = getOpenAIInterceptToolDefinitions(requestKind);
1332
+ if (interceptedToolNames.has(SEARCH_TOOL_NAME)) nextTools.push(toolDefinitions.webSearch);
1333
+ if (interceptedToolNames.has(READ_WEB_PAGE_TOOL_NAME)) nextTools.push(toolDefinitions.readWebPage);
1306
1334
  } else if (targetFormat === FORMATS.CLAUDE) {
1307
1335
  if (interceptedToolNames.has(SEARCH_TOOL_NAME)) nextTools.push(CLAUDE_WEB_SEARCH_TOOL);
1308
1336
  if (interceptedToolNames.has(READ_WEB_PAGE_TOOL_NAME)) nextTools.push(CLAUDE_READ_WEB_PAGE_TOOL);
@@ -212,6 +212,38 @@ function extractToolTypes(body) {
212
212
  )];
213
213
  }
214
214
 
215
+ function hasToolDefinitions(body) {
216
+ return Array.isArray(body?.tools) && body.tools.some((tool) => tool && typeof tool === "object");
217
+ }
218
+
219
+ function getProviderFormats(provider) {
220
+ return [...new Set(
221
+ [provider?.format, ...(Array.isArray(provider?.formats) ? provider.formats : [])]
222
+ .map((value) => String(value || "").trim())
223
+ .filter((value) => value === FORMATS.OPENAI || value === FORMATS.CLAUDE)
224
+ )];
225
+ }
226
+
227
+ function normalizeProviderRequestKind(targetFormat, requestKind) {
228
+ if (targetFormat === FORMATS.OPENAI && requestKind === "messages") {
229
+ return undefined;
230
+ }
231
+ return requestKind;
232
+ }
233
+
234
+ function shouldPreferOpenAIForClaudeToolCalls({
235
+ provider,
236
+ sourceFormat,
237
+ targetFormat,
238
+ requestKind,
239
+ body
240
+ } = {}) {
241
+ if (sourceFormat !== FORMATS.CLAUDE || targetFormat !== FORMATS.CLAUDE) return false;
242
+ if (!hasToolDefinitions(body)) return false;
243
+ if (!getProviderFormats(provider).includes(FORMATS.OPENAI)) return false;
244
+ return Boolean(resolveProviderUrl(provider, FORMATS.OPENAI, normalizeProviderRequestKind(FORMATS.OPENAI, requestKind)));
245
+ }
246
+
215
247
  function isOpenAIHostedWebSearchRequest(targetFormat, requestKind) {
216
248
  return targetFormat === FORMATS.OPENAI && requestKind === "responses";
217
249
  }
@@ -446,48 +478,24 @@ function logToolRouting({ env, clientType, candidate, originalBody, providerBody
446
478
  );
447
479
  }
448
480
 
449
- export async function makeProviderCall({
481
+ function buildProviderRequestPlan({
450
482
  body,
451
483
  sourceFormat,
452
- stream,
484
+ targetFormat,
453
485
  candidate,
454
486
  requestKind,
455
487
  requestHeaders,
456
- env,
457
- clientType,
458
- runtimeConfig,
459
- stateStore
488
+ interceptAmpWebSearch,
489
+ stream
460
490
  }) {
461
- const provider = candidate.provider;
462
- const targetFormat = candidate.targetFormat;
491
+ const normalizedRequestKind = normalizeProviderRequestKind(targetFormat, requestKind);
463
492
  const translate = needsTranslation(sourceFormat, targetFormat);
464
- const interceptAmpWebSearch = shouldInterceptAmpWebSearch({
465
- clientType,
466
- originalBody: body,
467
- runtimeConfig,
468
- env
469
- });
470
493
 
471
494
  let providerBody = { ...body };
472
495
  if (translate) {
473
- try {
474
- providerBody = translateRequest(sourceFormat, targetFormat, candidate.backend, body, stream);
475
- } catch (error) {
476
- return {
477
- ok: false,
478
- status: 400,
479
- retryable: false,
480
- errorKind: "translation_error",
481
- response: jsonResponse({
482
- type: "error",
483
- error: {
484
- type: "invalid_request_error",
485
- message: `Request translation failed: ${error instanceof Error ? error.message : String(error)}`
486
- }
487
- }, 400)
488
- };
489
- }
496
+ providerBody = translateRequest(sourceFormat, targetFormat, candidate.backend, body, stream);
490
497
  }
498
+
491
499
  providerBody.model = candidate.backend;
492
500
  providerBody = applyCachingMapping({
493
501
  originalBody: body,
@@ -504,9 +512,10 @@ export async function makeProviderCall({
504
512
  targetModel: candidate.backend,
505
513
  requestHeaders
506
514
  });
507
- const declaredOpenAIHostedWebSearchToolType = getProviderOpenAIHostedWebSearchToolType(provider, {
515
+
516
+ const declaredOpenAIHostedWebSearchToolType = getProviderOpenAIHostedWebSearchToolType(candidate.provider, {
508
517
  targetFormat,
509
- requestKind
518
+ requestKind: normalizedRequestKind
510
519
  });
511
520
  const declaredOpenAIHostedWebSearchRewrite = rewriteProviderBodyForOpenAIHostedWebSearch(
512
521
  providerBody,
@@ -515,17 +524,97 @@ export async function makeProviderCall({
515
524
  if (declaredOpenAIHostedWebSearchRewrite.rewritten) {
516
525
  providerBody = declaredOpenAIHostedWebSearchRewrite.providerBody;
517
526
  }
527
+
518
528
  if (interceptAmpWebSearch) {
519
- providerBody = rewriteProviderBodyForAmpWebSearch(providerBody, targetFormat).providerBody;
529
+ providerBody = rewriteProviderBodyForAmpWebSearch(providerBody, targetFormat, requestKind).providerBody;
530
+ }
531
+
532
+ return {
533
+ targetFormat,
534
+ requestKind: normalizedRequestKind,
535
+ translate,
536
+ providerBody
537
+ };
538
+ }
539
+
540
+ export async function makeProviderCall({
541
+ body,
542
+ sourceFormat,
543
+ stream,
544
+ candidate,
545
+ requestKind,
546
+ requestHeaders,
547
+ env,
548
+ clientType,
549
+ runtimeConfig,
550
+ stateStore
551
+ }) {
552
+ const provider = candidate.provider;
553
+ const targetFormat = candidate.targetFormat;
554
+ const interceptAmpWebSearch = shouldInterceptAmpWebSearch({
555
+ clientType,
556
+ originalBody: body,
557
+ runtimeConfig,
558
+ env
559
+ });
560
+
561
+ const preferOpenAIToolRouting = !isSubscriptionProvider(provider) && shouldPreferOpenAIForClaudeToolCalls({
562
+ provider,
563
+ sourceFormat,
564
+ targetFormat,
565
+ requestKind,
566
+ body
567
+ });
568
+
569
+ let activePlan;
570
+ let fallbackPlan = null;
571
+ try {
572
+ activePlan = buildProviderRequestPlan({
573
+ body,
574
+ sourceFormat,
575
+ targetFormat: preferOpenAIToolRouting ? FORMATS.OPENAI : targetFormat,
576
+ candidate,
577
+ requestKind,
578
+ requestHeaders,
579
+ interceptAmpWebSearch,
580
+ stream
581
+ });
582
+ if (preferOpenAIToolRouting) {
583
+ fallbackPlan = buildProviderRequestPlan({
584
+ body,
585
+ sourceFormat,
586
+ targetFormat,
587
+ candidate,
588
+ requestKind,
589
+ requestHeaders,
590
+ interceptAmpWebSearch,
591
+ stream
592
+ });
593
+ }
594
+ } catch (error) {
595
+ return {
596
+ ok: false,
597
+ status: 400,
598
+ retryable: false,
599
+ errorKind: "translation_error",
600
+ response: jsonResponse({
601
+ type: "error",
602
+ error: {
603
+ type: "invalid_request_error",
604
+ message: `Request translation failed: ${error instanceof Error ? error.message : String(error)}`
605
+ }
606
+ }, 400)
607
+ };
520
608
  }
609
+
521
610
  logToolRouting({
522
611
  env,
523
612
  clientType,
524
613
  candidate,
525
614
  originalBody: body,
526
- providerBody,
615
+ providerBody: activePlan.providerBody,
527
616
  sourceFormat,
528
- targetFormat
617
+ targetFormat: activePlan.targetFormat
529
618
  });
530
619
 
531
620
  if (isSubscriptionProvider(provider)) {
@@ -537,7 +626,7 @@ export async function makeProviderCall({
537
626
  stream: subscriptionType === "chatgpt-codex" ? true : Boolean(stream),
538
627
  env
539
628
  });
540
- const subscriptionResult = await executeSubscriptionRequest(providerBody);
629
+ const subscriptionResult = await executeSubscriptionRequest(activePlan.providerBody);
541
630
 
542
631
  if (!subscriptionResult?.ok) {
543
632
  return subscriptionResult;
@@ -558,14 +647,14 @@ export async function makeProviderCall({
558
647
  };
559
648
  }
560
649
 
561
- const fallbackModel = candidate?.backend || providerBody?.model || "unknown";
650
+ const fallbackModel = candidate?.backend || activePlan.providerBody?.model || "unknown";
562
651
  let upstreamResponse = subscriptionResult.response;
563
652
  if (interceptAmpWebSearch) {
564
653
  const intercepted = await maybeInterceptAmpWebSearch({
565
654
  response: upstreamResponse,
566
- providerBody,
567
- targetFormat,
568
- requestKind,
655
+ providerBody: activePlan.providerBody,
656
+ targetFormat: activePlan.targetFormat,
657
+ requestKind: activePlan.requestKind,
569
658
  stream,
570
659
  runtimeConfig,
571
660
  env,
@@ -581,11 +670,11 @@ export async function makeProviderCall({
581
670
  return adaptProviderResponse({
582
671
  response: upstreamResponse,
583
672
  stream,
584
- translate,
673
+ translate: activePlan.translate,
585
674
  sourceFormat,
586
- targetFormat,
675
+ targetFormat: activePlan.targetFormat,
587
676
  fallbackModel,
588
- requestKind,
677
+ requestKind: activePlan.requestKind,
589
678
  requestBody: body,
590
679
  clientType,
591
680
  env
@@ -722,20 +811,21 @@ export async function makeProviderCall({
722
811
  };
723
812
  }
724
813
 
725
- const providerUrl = resolveProviderUrl(provider, targetFormat, requestKind);
726
- const headers = mergeCachingHeaders(
727
- buildProviderHeaders(provider, env, targetFormat),
728
- requestHeaders,
729
- targetFormat
730
- );
731
- const executeHttpProviderRequest = async (requestBody) => {
814
+ const executeHttpProviderRequest = async (plan) => {
815
+ const providerUrl = resolveProviderUrl(provider, plan.targetFormat, plan.requestKind);
816
+ if (!providerUrl) return null;
817
+ const headers = mergeCachingHeaders(
818
+ buildProviderHeaders(provider, env, plan.targetFormat),
819
+ requestHeaders,
820
+ plan.targetFormat
821
+ );
732
822
  const timeoutMs = resolveUpstreamTimeoutMs(env);
733
823
  const timeoutControl = buildTimeoutSignal(timeoutMs);
734
824
  try {
735
825
  const init = {
736
826
  method: "POST",
737
827
  headers,
738
- body: JSON.stringify(requestBody)
828
+ body: JSON.stringify(plan.providerBody)
739
829
  };
740
830
  if (timeoutControl.signal) {
741
831
  init.signal = timeoutControl.signal;
@@ -747,7 +837,7 @@ export async function makeProviderCall({
747
837
  }
748
838
  };
749
839
 
750
- if (!providerUrl) {
840
+ if (!resolveProviderUrl(provider, activePlan.targetFormat, activePlan.requestKind)) {
751
841
  return {
752
842
  ok: false,
753
843
  status: 500,
@@ -765,7 +855,7 @@ export async function makeProviderCall({
765
855
 
766
856
  let response;
767
857
  try {
768
- response = await executeHttpProviderRequest(providerBody);
858
+ response = await executeHttpProviderRequest(activePlan);
769
859
  } catch (error) {
770
860
  return {
771
861
  ok: false,
@@ -782,22 +872,40 @@ export async function makeProviderCall({
782
872
  };
783
873
  }
784
874
 
875
+ if ((!response || !response.ok) && fallbackPlan) {
876
+ try {
877
+ const fallbackResponse = await executeHttpProviderRequest(fallbackPlan);
878
+ if (fallbackResponse instanceof Response && fallbackResponse.ok) {
879
+ response = fallbackResponse;
880
+ activePlan = fallbackPlan;
881
+ }
882
+ } catch {
883
+ // Keep the original failure if the fallback request also fails.
884
+ }
885
+ }
886
+
785
887
  if (!response.ok) {
786
888
  const retriedOpenAIHostedWebSearch = await maybeRetryOpenAIHostedWebSearchProviderRequest({
787
889
  response,
788
- executeProviderRequest: executeHttpProviderRequest,
789
- providerBody,
790
- targetFormat,
791
- requestKind
890
+ executeProviderRequest: async (nextProviderBody) => executeHttpProviderRequest({
891
+ ...activePlan,
892
+ providerBody: nextProviderBody
893
+ }),
894
+ providerBody: activePlan.providerBody,
895
+ targetFormat: activePlan.targetFormat,
896
+ requestKind: activePlan.requestKind
792
897
  });
793
898
  response = retriedOpenAIHostedWebSearch.response;
794
- providerBody = retriedOpenAIHostedWebSearch.providerBody;
899
+ activePlan = {
900
+ ...activePlan,
901
+ providerBody: retriedOpenAIHostedWebSearch.providerBody
902
+ };
795
903
  }
796
904
 
797
905
  if (!response.ok) {
798
- const hostedWebSearchErrorKind = await resolveHostedWebSearchErrorKind(response, providerBody, {
799
- targetFormat,
800
- requestKind
906
+ const hostedWebSearchErrorKind = await resolveHostedWebSearchErrorKind(response, activePlan.providerBody, {
907
+ targetFormat: activePlan.targetFormat,
908
+ requestKind: activePlan.requestKind
801
909
  });
802
910
  return {
803
911
  ok: false,
@@ -805,23 +913,26 @@ export async function makeProviderCall({
805
913
  retryable: shouldRetryStatus(response.status),
806
914
  ...(hostedWebSearchErrorKind ? { errorKind: hostedWebSearchErrorKind } : {}),
807
915
  upstreamResponse: response,
808
- translateError: translate
916
+ translateError: activePlan.translate
809
917
  };
810
918
  }
811
919
 
812
920
  if (interceptAmpWebSearch) {
813
921
  const intercepted = await maybeInterceptAmpWebSearch({
814
922
  response,
815
- providerBody,
816
- targetFormat,
817
- requestKind,
923
+ providerBody: activePlan.providerBody,
924
+ targetFormat: activePlan.targetFormat,
925
+ requestKind: activePlan.requestKind,
818
926
  stream,
819
927
  runtimeConfig,
820
928
  env,
821
929
  stateStore,
822
930
  executeProviderRequest: async (followUpBody) => {
823
931
  try {
824
- return await executeHttpProviderRequest(followUpBody);
932
+ return await executeHttpProviderRequest({
933
+ ...activePlan,
934
+ providerBody: followUpBody
935
+ });
825
936
  } catch {
826
937
  return null;
827
938
  }
@@ -833,11 +944,11 @@ export async function makeProviderCall({
833
944
  return adaptProviderResponse({
834
945
  response,
835
946
  stream,
836
- translate,
947
+ translate: activePlan.translate,
837
948
  sourceFormat,
838
- targetFormat,
949
+ targetFormat: activePlan.targetFormat,
839
950
  fallbackModel: candidate.backend,
840
- requestKind,
951
+ requestKind: activePlan.requestKind,
841
952
  requestBody: body,
842
953
  clientType,
843
954
  env