@khanglvm/llm-router 2.0.3 → 2.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -7,8 +7,23 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
7
7
 
8
8
  ## [Unreleased]
9
9
 
10
+ ## [2.0.5] - 2026-03-15
11
+
12
+ ### Fixed
13
+ - Relaxed the live coding-tool publish checks so known external Codex model-verbosity mismatches and Claude MCP schema-validation failures are treated as acceptable upstream tool failures instead of blocking npm publication.
14
+
15
+ ## [2.0.4] - 2026-03-15
16
+
10
17
  ### Fixed
11
18
  - Raised the default inbound JSON body limit for OpenAI `/responses` requests from `1 MiB` to `8 MiB` while keeping other JSON routes at `1 MiB`. This prevents local `413 Request body too large` failures for Codex CLI and other Responses API clients carrying larger conversation state.
19
+ - Updated the web console provider editor so API-based providers can rotate between env-backed and direct API key credentials in place without leaving the modal.
20
+ - Improved the web console model-save flow for API-based providers:
21
+ - new-model tests now stream visible progress while save is in flight
22
+ - successful new models stay marked as confirmed
23
+ - only failed new models are marked as failed
24
+ - the edit modal blocks backdrop/close dismissal while tests are running
25
+ - closing after failed tests now offers removing failed rows while keeping successful new rows
26
+ - Improved dual-format Claude provider routing so Claude tool calls can prefer OpenAI-compatible tool execution paths when available, while falling back cleanly to native Claude routing if the OpenAI-compatible path fails.
12
27
 
13
28
  ## [2.0.1] - 2026-03-15
14
29
 
package/README.md CHANGED
@@ -2,20 +2,18 @@
2
2
 
3
3
  LLM Router is a local and Cloudflare-deployable gateway for routing one client endpoint across multiple LLM providers, models, aliases, fallbacks, and rate limits.
4
4
 
5
- The npm package name stays the same:
5
+ **Current version**: `2.0.5`
6
6
 
7
+ NPM package:
7
8
  ```bash
8
9
  @khanglvm/llm-router
9
10
  ```
10
11
 
11
- The primary CLI command is now:
12
-
12
+ Primary CLI command:
13
13
  ```bash
14
14
  llr
15
15
  ```
16
16
 
17
- `2.0.1` is the current public release. It includes the Web UI, AMP routing, and coding-tool integrations introduced in the 2.x line.
18
-
19
17
  ## Install
20
18
 
21
19
  ```bash
@@ -266,7 +264,18 @@ Run the JavaScript test suite:
266
264
  node --test $(rg --files -g "*.test.js" src)
267
265
  ```
268
266
 
267
+ ## Documentation
268
+
269
+ Comprehensive documentation is available in the `docs/` directory:
270
+
271
+ - **[Project Overview & PDR](./docs/project-overview-pdr.md)** — Feature matrix, target users, success metrics, constraints
272
+ - **[Codebase Summary](./docs/codebase-summary.md)** — Directory structure, module relationships, entry points, test infrastructure
273
+ - **[Code Standards](./docs/code-standards.md)** — Patterns, naming conventions, testing, error handling
274
+ - **[System Architecture](./docs/system-architecture.md)** — Request lifecycle, subsystem boundaries, data flow, deployment models
275
+ - **[Project Roadmap](./docs/project-roadmap.md)** — Current status, planned phases, timeline, success metrics
276
+
269
277
  ## Security and Releases
270
278
 
271
279
  - Security: [`SECURITY.md`](https://github.com/khanglvm/llm-router/blob/master/SECURITY.md)
272
280
  - Release notes: [`CHANGELOG.md`](https://github.com/khanglvm/llm-router/blob/master/CHANGELOG.md)
281
+ - AMP routing: [`docs/amp-routing.md`](./docs/amp-routing.md)
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@khanglvm/llm-router",
3
- "version": "2.0.3",
3
+ "version": "2.0.6",
4
4
  "description": "LLM Router: single gateway endpoint for multi-provider LLMs with unified OpenAI+Anthropic format and seamless fallback",
5
5
  "keywords": [
6
6
  "llm-router",
@@ -159,6 +159,26 @@ function normalizeEndpointCandidates(values = []) {
159
159
  return dedupeStrings(Array.isArray(values) ? values : [values]);
160
160
  }
161
161
 
162
+ function rewriteProviderCredentials(provider = {}, draftProvider = {}) {
163
+ const nextProvider = { ...provider };
164
+ const hasCredentialInput = Object.prototype.hasOwnProperty.call(draftProvider || {}, "credentialInput");
165
+ if (!hasCredentialInput) return nextProvider;
166
+
167
+ const credentialInput = String(draftProvider?.credentialInput || "").trim();
168
+ delete nextProvider.apiKey;
169
+ delete nextProvider.apiKeyEnv;
170
+ delete nextProvider.credential;
171
+
172
+ if (!credentialInput) return nextProvider;
173
+ if (/^[A-Z][A-Z0-9_]*$/.test(credentialInput)) {
174
+ nextProvider.apiKeyEnv = credentialInput;
175
+ return nextProvider;
176
+ }
177
+
178
+ nextProvider.apiKey = credentialInput;
179
+ return nextProvider;
180
+ }
181
+
162
182
  function rewriteProviderEndpoints(provider = {}, endpoints = []) {
163
183
  const nextProvider = { ...provider };
164
184
  const nextEndpoints = normalizeEndpointCandidates(endpoints);
@@ -487,6 +507,7 @@ export function applyProviderInlineEdits(config = {}, currentProviderId = "", dr
487
507
  };
488
508
 
489
509
  if (!isSubscription) {
510
+ nextProvider = rewriteProviderCredentials(nextProvider, draftProvider);
490
511
  nextProvider = rewriteProviderEndpoints(nextProvider, nextEndpoints);
491
512
  nextProvider = rewriteRateLimits(nextProvider, draftProvider, renamedProviderId);
492
513
  }
@@ -224,10 +224,15 @@ function normalizeInputMessageContent(content, role) {
224
224
  ? part.image_url
225
225
  : part.image_url?.url;
226
226
  if (typeof rawUrl === 'string' && rawUrl.trim()) {
227
- parts.push({
227
+ const imageItem = {
228
228
  type: 'input_image',
229
229
  image_url: rawUrl
230
- });
230
+ };
231
+ const detail = part.image_url?.detail || part.detail;
232
+ if (typeof detail === 'string' && detail.trim()) {
233
+ imageItem.detail = detail.trim();
234
+ }
235
+ parts.push(imageItem);
231
236
  }
232
237
  continue;
233
238
  }
@@ -333,6 +338,21 @@ function normalizeToolChoiceForResponses(toolChoice) {
333
338
  const normalizedType = String(toolChoice.type || '').trim().toLowerCase();
334
339
  if (normalizedType === 'none') return 'none';
335
340
  if (normalizedType === 'required' || normalizedType === 'any' || normalizedType === 'tool') {
341
+ const functionName = String(
342
+ toolChoice.function?.name || toolChoice.name || ''
343
+ ).trim();
344
+ if (functionName) {
345
+ return { type: 'function', name: functionName };
346
+ }
347
+ return 'required';
348
+ }
349
+ if (normalizedType === 'function') {
350
+ const functionName = String(
351
+ toolChoice.function?.name || toolChoice.name || ''
352
+ ).trim();
353
+ if (functionName) {
354
+ return { type: 'function', name: functionName };
355
+ }
336
356
  return 'required';
337
357
  }
338
358
  }
@@ -735,7 +735,11 @@ function normalizeAmpWebSearchConfig(rawWebSearch) {
735
735
  return {
736
736
  strategy: normalizeAmpWebSearchStrategy(rawWebSearch.strategy),
737
737
  count,
738
- providers
738
+ providers,
739
+ interceptInternalSearch: normalizeBooleanValue(
740
+ rawWebSearch.interceptInternalSearch ?? rawWebSearch["intercept-internal-search"],
741
+ false
742
+ )
739
743
  };
740
744
  }
741
745
 
@@ -10,6 +10,7 @@ import {
10
10
  resolveRouteReference
11
11
  } from "../config.js";
12
12
  import { isSubscriptionProvider, makeSubscriptionProviderCall } from "../subscription-provider.js";
13
+ import { jsonResponse } from "./http.js";
13
14
 
14
15
  const SEARCH_TOOL_NAME = "web_search";
15
16
  const READ_WEB_PAGE_TOOL_NAME = "read_web_page";
@@ -2178,6 +2179,135 @@ export async function testHostedWebSearchProviderRoute({
2178
2179
  }, query, runtimeConfig, env);
2179
2180
  }
2180
2181
 
2182
+ async function fetchStructuredSearchResults(query, count, provider) {
2183
+ const normalizedQuery = String(query || "").trim();
2184
+ if (!normalizedQuery || !provider) return [];
2185
+
2186
+ const id = provider.id;
2187
+
2188
+ if (id === "brave") {
2189
+ if (!provider.apiKey) return [];
2190
+ const url = `https://api.search.brave.com/res/v1/web/search?q=${encodeURIComponent(normalizedQuery)}&count=${count}&text_decorations=false`;
2191
+ const response = await runFetchWithTimeout(url, {
2192
+ headers: { Accept: "application/json", "X-Subscription-Token": provider.apiKey }
2193
+ });
2194
+ if (!response.ok) return [];
2195
+ const payload = await response.json();
2196
+ return (Array.isArray(payload?.web?.results) ? payload.web.results.slice(0, count) : [])
2197
+ .map((item) => ({ title: String(item?.title || ""), url: String(item?.url || ""), snippet: String(item?.description || "") }));
2198
+ }
2199
+
2200
+ if (id === "tavily") {
2201
+ if (!provider.apiKey) return [];
2202
+ const response = await runFetchWithTimeout("https://api.tavily.com/search", {
2203
+ method: "POST",
2204
+ headers: { "Content-Type": "application/json" },
2205
+ body: JSON.stringify({ api_key: provider.apiKey, query: normalizedQuery, max_results: count, search_depth: "basic" })
2206
+ });
2207
+ if (!response.ok) return [];
2208
+ const payload = await response.json();
2209
+ return (Array.isArray(payload?.results) ? payload.results.slice(0, count) : [])
2210
+ .map((item) => ({ title: String(item?.title || ""), url: String(item?.url || ""), snippet: String(item?.content || "") }));
2211
+ }
2212
+
2213
+ if (id === "exa") {
2214
+ if (!provider.apiKey) return [];
2215
+ const response = await runFetchWithTimeout("https://api.exa.ai/search", {
2216
+ method: "POST",
2217
+ headers: { "Content-Type": "application/json", "x-api-key": provider.apiKey },
2218
+ body: JSON.stringify({ query: normalizedQuery, numResults: count, type: "auto", contents: { text: { maxCharacters: 500 } } })
2219
+ });
2220
+ if (!response.ok) return [];
2221
+ const payload = await response.json();
2222
+ return (Array.isArray(payload?.results) ? payload.results.slice(0, count) : [])
2223
+ .map((item) => ({ title: String(item?.title || ""), url: String(item?.url || ""), snippet: String(item?.text || item?.snippet || "") }));
2224
+ }
2225
+
2226
+ if (id === "searxng") {
2227
+ if (!provider.url) return [];
2228
+ const url = `${provider.url}/search?q=${encodeURIComponent(normalizedQuery)}&format=json&categories=general&language=auto`;
2229
+ const response = await runFetchWithTimeout(url, {
2230
+ headers: { Accept: "application/json", "User-Agent": "llm-router" }
2231
+ });
2232
+ if (!response.ok) return [];
2233
+ const payload = await response.json();
2234
+ return (Array.isArray(payload?.results) ? payload.results.slice(0, count) : [])
2235
+ .map((item) => ({ title: String(item?.title || ""), url: String(item?.url || ""), snippet: String(item?.content || "") }));
2236
+ }
2237
+
2238
+ return [];
2239
+ }
2240
+
2241
+ export async function executeWebSearchQueries({ queries, maxResults, config, env }) {
2242
+ const normalizedQueries = (Array.isArray(queries) ? queries : []).map((q) => String(q || "").trim()).filter(Boolean).slice(0, 10);
2243
+ if (normalizedQueries.length === 0) return { results: [], provider: "" };
2244
+
2245
+ const count = Math.max(1, Math.min(20, Number(maxResults) || 5));
2246
+ const snapshot = await buildAmpWebSearchSnapshot(config, { env });
2247
+ const readyProviders = snapshot.providers.filter((p) => p.ready && !isHostedSearchProvider(p));
2248
+
2249
+ for (const providerStatus of readyProviders) {
2250
+ try {
2251
+ const allResults = [];
2252
+ const batchResults = await Promise.all(
2253
+ normalizedQueries.map((query) => fetchStructuredSearchResults(query, count, providerStatus))
2254
+ );
2255
+ for (const results of batchResults) allResults.push(...results);
2256
+ if (allResults.length > 0) {
2257
+ return { results: allResults, provider: providerStatus.id };
2258
+ }
2259
+ } catch {
2260
+ continue;
2261
+ }
2262
+ }
2263
+
2264
+ return { results: [], provider: "" };
2265
+ }
2266
+
2267
+ export async function maybeInterceptAmpInternalSearch(request, url, config, env) {
2268
+ const searchParams = url.searchParams;
2269
+ if (!searchParams.has("webSearch2")) return null;
2270
+
2271
+ const webSearchConfig = config?.webSearch || config?.amp?.webSearch;
2272
+ if (!webSearchConfig?.interceptInternalSearch) return null;
2273
+
2274
+ const providers = Array.isArray(webSearchConfig?.providers) ? webSearchConfig.providers : [];
2275
+ if (providers.length === 0) return null;
2276
+
2277
+ let body;
2278
+ try {
2279
+ body = await request.clone().json();
2280
+ } catch {
2281
+ return null;
2282
+ }
2283
+
2284
+ const params = body?.params;
2285
+ if (!params || !Array.isArray(params.searchQueries) || params.searchQueries.length === 0) return null;
2286
+
2287
+ try {
2288
+ const results = await executeWebSearchQueries({
2289
+ queries: params.searchQueries,
2290
+ maxResults: Number(params.maxResults) || 5,
2291
+ config,
2292
+ env
2293
+ });
2294
+
2295
+ return jsonResponse({
2296
+ result: {
2297
+ results: results.results.map((r) => ({
2298
+ title: r.title || "",
2299
+ url: r.url || "",
2300
+ snippet: r.snippet || "",
2301
+ content: r.snippet || ""
2302
+ }))
2303
+ }
2304
+ });
2305
+ } catch (error) {
2306
+ console.warn(`[llm-router] webSearch2 interception failed: ${error?.message || error}`);
2307
+ return null;
2308
+ }
2309
+ }
2310
+
2181
2311
  export async function maybeInterceptAmpWebSearch({
2182
2312
  response,
2183
2313
  providerBody,
@@ -212,6 +212,38 @@ function extractToolTypes(body) {
212
212
  )];
213
213
  }
214
214
 
215
+ function hasToolDefinitions(body) {
216
+ return Array.isArray(body?.tools) && body.tools.some((tool) => tool && typeof tool === "object");
217
+ }
218
+
219
+ function getProviderFormats(provider) {
220
+ return [...new Set(
221
+ [provider?.format, ...(Array.isArray(provider?.formats) ? provider.formats : [])]
222
+ .map((value) => String(value || "").trim())
223
+ .filter((value) => value === FORMATS.OPENAI || value === FORMATS.CLAUDE)
224
+ )];
225
+ }
226
+
227
+ function normalizeProviderRequestKind(targetFormat, requestKind) {
228
+ if (targetFormat === FORMATS.OPENAI && requestKind === "messages") {
229
+ return undefined;
230
+ }
231
+ return requestKind;
232
+ }
233
+
234
+ function shouldPreferOpenAIForClaudeToolCalls({
235
+ provider,
236
+ sourceFormat,
237
+ targetFormat,
238
+ requestKind,
239
+ body
240
+ } = {}) {
241
+ if (sourceFormat !== FORMATS.CLAUDE || targetFormat !== FORMATS.CLAUDE) return false;
242
+ if (!hasToolDefinitions(body)) return false;
243
+ if (!getProviderFormats(provider).includes(FORMATS.OPENAI)) return false;
244
+ return Boolean(resolveProviderUrl(provider, FORMATS.OPENAI, normalizeProviderRequestKind(FORMATS.OPENAI, requestKind)));
245
+ }
246
+
215
247
  function isOpenAIHostedWebSearchRequest(targetFormat, requestKind) {
216
248
  return targetFormat === FORMATS.OPENAI && requestKind === "responses";
217
249
  }
@@ -446,48 +478,24 @@ function logToolRouting({ env, clientType, candidate, originalBody, providerBody
446
478
  );
447
479
  }
448
480
 
449
- export async function makeProviderCall({
481
+ function buildProviderRequestPlan({
450
482
  body,
451
483
  sourceFormat,
452
- stream,
484
+ targetFormat,
453
485
  candidate,
454
486
  requestKind,
455
487
  requestHeaders,
456
- env,
457
- clientType,
458
- runtimeConfig,
459
- stateStore
488
+ interceptAmpWebSearch,
489
+ stream
460
490
  }) {
461
- const provider = candidate.provider;
462
- const targetFormat = candidate.targetFormat;
491
+ const normalizedRequestKind = normalizeProviderRequestKind(targetFormat, requestKind);
463
492
  const translate = needsTranslation(sourceFormat, targetFormat);
464
- const interceptAmpWebSearch = shouldInterceptAmpWebSearch({
465
- clientType,
466
- originalBody: body,
467
- runtimeConfig,
468
- env
469
- });
470
493
 
471
494
  let providerBody = { ...body };
472
495
  if (translate) {
473
- try {
474
- providerBody = translateRequest(sourceFormat, targetFormat, candidate.backend, body, stream);
475
- } catch (error) {
476
- return {
477
- ok: false,
478
- status: 400,
479
- retryable: false,
480
- errorKind: "translation_error",
481
- response: jsonResponse({
482
- type: "error",
483
- error: {
484
- type: "invalid_request_error",
485
- message: `Request translation failed: ${error instanceof Error ? error.message : String(error)}`
486
- }
487
- }, 400)
488
- };
489
- }
496
+ providerBody = translateRequest(sourceFormat, targetFormat, candidate.backend, body, stream);
490
497
  }
498
+
491
499
  providerBody.model = candidate.backend;
492
500
  providerBody = applyCachingMapping({
493
501
  originalBody: body,
@@ -504,9 +512,10 @@ export async function makeProviderCall({
504
512
  targetModel: candidate.backend,
505
513
  requestHeaders
506
514
  });
507
- const declaredOpenAIHostedWebSearchToolType = getProviderOpenAIHostedWebSearchToolType(provider, {
515
+
516
+ const declaredOpenAIHostedWebSearchToolType = getProviderOpenAIHostedWebSearchToolType(candidate.provider, {
508
517
  targetFormat,
509
- requestKind
518
+ requestKind: normalizedRequestKind
510
519
  });
511
520
  const declaredOpenAIHostedWebSearchRewrite = rewriteProviderBodyForOpenAIHostedWebSearch(
512
521
  providerBody,
@@ -515,21 +524,113 @@ export async function makeProviderCall({
515
524
  if (declaredOpenAIHostedWebSearchRewrite.rewritten) {
516
525
  providerBody = declaredOpenAIHostedWebSearchRewrite.providerBody;
517
526
  }
527
+
518
528
  if (interceptAmpWebSearch) {
519
529
  providerBody = rewriteProviderBodyForAmpWebSearch(providerBody, targetFormat, requestKind).providerBody;
520
530
  }
531
+
532
+ return {
533
+ targetFormat,
534
+ requestKind: normalizedRequestKind,
535
+ translate,
536
+ providerBody
537
+ };
538
+ }
539
+
540
+ export async function makeProviderCall({
541
+ body,
542
+ sourceFormat,
543
+ stream,
544
+ candidate,
545
+ requestKind,
546
+ requestHeaders,
547
+ env,
548
+ clientType,
549
+ runtimeConfig,
550
+ stateStore,
551
+ ampContext
552
+ }) {
553
+ const provider = candidate.provider;
554
+ const targetFormat = candidate.targetFormat;
555
+ const interceptAmpWebSearch = shouldInterceptAmpWebSearch({
556
+ clientType,
557
+ originalBody: body,
558
+ runtimeConfig,
559
+ env
560
+ });
561
+
562
+ const preferOpenAIToolRouting = !isSubscriptionProvider(provider) && shouldPreferOpenAIForClaudeToolCalls({
563
+ provider,
564
+ sourceFormat,
565
+ targetFormat,
566
+ requestKind,
567
+ body
568
+ });
569
+
570
+ let effectiveBody = body;
571
+ if (ampContext?.presets?.reasoningEffort && !body?.reasoning_effort && !body?.reasoning?.effort) {
572
+ effectiveBody = { ...body, reasoning_effort: ampContext.presets.reasoningEffort };
573
+ }
574
+
575
+ let activePlan;
576
+ let fallbackPlan = null;
577
+ try {
578
+ activePlan = buildProviderRequestPlan({
579
+ body: effectiveBody,
580
+ sourceFormat,
581
+ targetFormat: preferOpenAIToolRouting ? FORMATS.OPENAI : targetFormat,
582
+ candidate,
583
+ requestKind,
584
+ requestHeaders,
585
+ interceptAmpWebSearch,
586
+ stream
587
+ });
588
+ if (preferOpenAIToolRouting) {
589
+ fallbackPlan = buildProviderRequestPlan({
590
+ body: effectiveBody,
591
+ sourceFormat,
592
+ targetFormat,
593
+ candidate,
594
+ requestKind,
595
+ requestHeaders,
596
+ interceptAmpWebSearch,
597
+ stream
598
+ });
599
+ }
600
+ } catch (error) {
601
+ return {
602
+ ok: false,
603
+ status: 400,
604
+ retryable: false,
605
+ errorKind: "translation_error",
606
+ response: jsonResponse({
607
+ type: "error",
608
+ error: {
609
+ type: "invalid_request_error",
610
+ message: `Request translation failed: ${error instanceof Error ? error.message : String(error)}`
611
+ }
612
+ }, 400)
613
+ };
614
+ }
615
+
521
616
  logToolRouting({
522
617
  env,
523
618
  clientType,
524
619
  candidate,
525
620
  originalBody: body,
526
- providerBody,
621
+ providerBody: activePlan.providerBody,
527
622
  sourceFormat,
528
- targetFormat
623
+ targetFormat: activePlan.targetFormat
529
624
  });
530
625
 
531
626
  if (isSubscriptionProvider(provider)) {
532
627
  const subscriptionType = String(provider?.subscriptionType || provider?.subscription_type || "").trim().toLowerCase();
628
+ if (subscriptionType === "chatgpt-codex" && ampContext?.threadId) {
629
+ activePlan.providerBody = {
630
+ ...activePlan.providerBody,
631
+ prompt_cache_key: activePlan.providerBody.prompt_cache_key || ampContext.threadId
632
+ };
633
+ }
533
634
  const executeSubscriptionRequest = async (requestBody) => makeSubscriptionProviderCall({
534
635
  provider,
535
636
  body: requestBody,
@@ -537,7 +638,7 @@ export async function makeProviderCall({
537
638
  stream: subscriptionType === "chatgpt-codex" ? true : Boolean(stream),
538
639
  env
539
640
  });
540
- const subscriptionResult = await executeSubscriptionRequest(providerBody);
641
+ const subscriptionResult = await executeSubscriptionRequest(activePlan.providerBody);
541
642
 
542
643
  if (!subscriptionResult?.ok) {
543
644
  return subscriptionResult;
@@ -558,14 +659,14 @@ export async function makeProviderCall({
558
659
  };
559
660
  }
560
661
 
561
- const fallbackModel = candidate?.backend || providerBody?.model || "unknown";
662
+ const fallbackModel = candidate?.backend || activePlan.providerBody?.model || "unknown";
562
663
  let upstreamResponse = subscriptionResult.response;
563
664
  if (interceptAmpWebSearch) {
564
665
  const intercepted = await maybeInterceptAmpWebSearch({
565
666
  response: upstreamResponse,
566
- providerBody,
567
- targetFormat,
568
- requestKind,
667
+ providerBody: activePlan.providerBody,
668
+ targetFormat: activePlan.targetFormat,
669
+ requestKind: activePlan.requestKind,
569
670
  stream,
570
671
  runtimeConfig,
571
672
  env,
@@ -581,11 +682,11 @@ export async function makeProviderCall({
581
682
  return adaptProviderResponse({
582
683
  response: upstreamResponse,
583
684
  stream,
584
- translate,
685
+ translate: activePlan.translate,
585
686
  sourceFormat,
586
- targetFormat,
687
+ targetFormat: activePlan.targetFormat,
587
688
  fallbackModel,
588
- requestKind,
689
+ requestKind: activePlan.requestKind,
589
690
  requestBody: body,
590
691
  clientType,
591
692
  env
@@ -722,20 +823,21 @@ export async function makeProviderCall({
722
823
  };
723
824
  }
724
825
 
725
- const providerUrl = resolveProviderUrl(provider, targetFormat, requestKind);
726
- const headers = mergeCachingHeaders(
727
- buildProviderHeaders(provider, env, targetFormat),
728
- requestHeaders,
729
- targetFormat
730
- );
731
- const executeHttpProviderRequest = async (requestBody) => {
826
+ const executeHttpProviderRequest = async (plan) => {
827
+ const providerUrl = resolveProviderUrl(provider, plan.targetFormat, plan.requestKind);
828
+ if (!providerUrl) return null;
829
+ const headers = mergeCachingHeaders(
830
+ buildProviderHeaders(provider, env, plan.targetFormat),
831
+ requestHeaders,
832
+ plan.targetFormat
833
+ );
732
834
  const timeoutMs = resolveUpstreamTimeoutMs(env);
733
835
  const timeoutControl = buildTimeoutSignal(timeoutMs);
734
836
  try {
735
837
  const init = {
736
838
  method: "POST",
737
839
  headers,
738
- body: JSON.stringify(requestBody)
840
+ body: JSON.stringify(plan.providerBody)
739
841
  };
740
842
  if (timeoutControl.signal) {
741
843
  init.signal = timeoutControl.signal;
@@ -747,7 +849,7 @@ export async function makeProviderCall({
747
849
  }
748
850
  };
749
851
 
750
- if (!providerUrl) {
852
+ if (!resolveProviderUrl(provider, activePlan.targetFormat, activePlan.requestKind)) {
751
853
  return {
752
854
  ok: false,
753
855
  status: 500,
@@ -765,7 +867,7 @@ export async function makeProviderCall({
765
867
 
766
868
  let response;
767
869
  try {
768
- response = await executeHttpProviderRequest(providerBody);
870
+ response = await executeHttpProviderRequest(activePlan);
769
871
  } catch (error) {
770
872
  return {
771
873
  ok: false,
@@ -782,22 +884,40 @@ export async function makeProviderCall({
782
884
  };
783
885
  }
784
886
 
887
+ if ((!response || !response.ok) && fallbackPlan) {
888
+ try {
889
+ const fallbackResponse = await executeHttpProviderRequest(fallbackPlan);
890
+ if (fallbackResponse instanceof Response && fallbackResponse.ok) {
891
+ response = fallbackResponse;
892
+ activePlan = fallbackPlan;
893
+ }
894
+ } catch {
895
+ // Keep the original failure if the fallback request also fails.
896
+ }
897
+ }
898
+
785
899
  if (!response.ok) {
786
900
  const retriedOpenAIHostedWebSearch = await maybeRetryOpenAIHostedWebSearchProviderRequest({
787
901
  response,
788
- executeProviderRequest: executeHttpProviderRequest,
789
- providerBody,
790
- targetFormat,
791
- requestKind
902
+ executeProviderRequest: async (nextProviderBody) => executeHttpProviderRequest({
903
+ ...activePlan,
904
+ providerBody: nextProviderBody
905
+ }),
906
+ providerBody: activePlan.providerBody,
907
+ targetFormat: activePlan.targetFormat,
908
+ requestKind: activePlan.requestKind
792
909
  });
793
910
  response = retriedOpenAIHostedWebSearch.response;
794
- providerBody = retriedOpenAIHostedWebSearch.providerBody;
911
+ activePlan = {
912
+ ...activePlan,
913
+ providerBody: retriedOpenAIHostedWebSearch.providerBody
914
+ };
795
915
  }
796
916
 
797
917
  if (!response.ok) {
798
- const hostedWebSearchErrorKind = await resolveHostedWebSearchErrorKind(response, providerBody, {
799
- targetFormat,
800
- requestKind
918
+ const hostedWebSearchErrorKind = await resolveHostedWebSearchErrorKind(response, activePlan.providerBody, {
919
+ targetFormat: activePlan.targetFormat,
920
+ requestKind: activePlan.requestKind
801
921
  });
802
922
  return {
803
923
  ok: false,
@@ -805,23 +925,26 @@ export async function makeProviderCall({
805
925
  retryable: shouldRetryStatus(response.status),
806
926
  ...(hostedWebSearchErrorKind ? { errorKind: hostedWebSearchErrorKind } : {}),
807
927
  upstreamResponse: response,
808
- translateError: translate
928
+ translateError: activePlan.translate
809
929
  };
810
930
  }
811
931
 
812
932
  if (interceptAmpWebSearch) {
813
933
  const intercepted = await maybeInterceptAmpWebSearch({
814
934
  response,
815
- providerBody,
816
- targetFormat,
817
- requestKind,
935
+ providerBody: activePlan.providerBody,
936
+ targetFormat: activePlan.targetFormat,
937
+ requestKind: activePlan.requestKind,
818
938
  stream,
819
939
  runtimeConfig,
820
940
  env,
821
941
  stateStore,
822
942
  executeProviderRequest: async (followUpBody) => {
823
943
  try {
824
- return await executeHttpProviderRequest(followUpBody);
944
+ return await executeHttpProviderRequest({
945
+ ...activePlan,
946
+ providerBody: followUpBody
947
+ });
825
948
  } catch {
826
949
  return null;
827
950
  }
@@ -833,11 +956,11 @@ export async function makeProviderCall({
833
956
  return adaptProviderResponse({
834
957
  response,
835
958
  stream,
836
- translate,
959
+ translate: activePlan.translate,
837
960
  sourceFormat,
838
- targetFormat,
961
+ targetFormat: activePlan.targetFormat,
839
962
  fallbackModel: candidate.backend,
840
- requestKind,
963
+ requestKind: activePlan.requestKind,
841
964
  requestBody: body,
842
965
  clientType,
843
966
  env
@@ -694,9 +694,10 @@ export function handleClaudeStreamToOpenAIResponses(response, requestBody, fallb
694
694
  const index = Number(payload.index);
695
695
  const blockInfo = payload.content_block || {};
696
696
  state.activeBlocks.set(index, String(blockInfo.type || "").trim());
697
+ // Defer text output item creation until first renderable text delta
698
+ // to avoid emitting empty assistant text scaffolding before tool calls.
697
699
  if (blockInfo.type === "text") {
698
- ensureOpenAIResponsesTextItem(state, controller, encoder);
699
- state.textOpened = true;
700
+ // Intentionally do NOT open text item yet; wait for renderable text in content_block_delta.
700
701
  } else if (blockInfo.type === "thinking" || blockInfo.type === "redacted_thinking") {
701
702
  ensureOpenAIResponsesReasoningItem(state, index, controller, encoder);
702
703
  } else if (blockInfo.type === "tool_use") {
@@ -709,6 +710,10 @@ export function handleClaudeStreamToOpenAIResponses(response, requestBody, fallb
709
710
  const index = Number(payload.index);
710
711
  const delta = payload.delta || {};
711
712
  if (delta.type === "text_delta" && typeof delta.text === "string") {
713
+ const hasRenderableText = /\S/.test(delta.text);
714
+ if (!state.textOpened && !hasRenderableText) {
715
+ return;
716
+ }
712
717
  ensureOpenAIResponsesTextItem(state, controller, encoder);
713
718
  state.textOpened = true;
714
719
  state.textBuffer += delta.text;
@@ -461,3 +461,28 @@ export function isStreamingEnabled(sourceFormat, body) {
461
461
  // Some clients omit `stream` on follow-up/tool turns and expect JSON responses.
462
462
  return body?.stream === true;
463
463
  }
464
+
465
+ const AMP_MODE_PRESETS = new Map([
466
+ ["smart", { reasoningEffort: "", toolChoice: "" }],
467
+ ["free", { reasoningEffort: "", toolChoice: "" }],
468
+ ["rush", { reasoningEffort: "low", toolChoice: "" }],
469
+ ["deep", { reasoningEffort: "high", toolChoice: "" }],
470
+ ["large", { reasoningEffort: "", toolChoice: "" }],
471
+ ["bombadil", { reasoningEffort: "", toolChoice: "" }]
472
+ ]);
473
+
474
+ export function extractAmpContext(request) {
475
+ const headers = request?.headers;
476
+ if (!headers || typeof headers.get !== "function") {
477
+ return { threadId: "", mode: "", overrideProvider: "", feature: "", messageId: "", presets: null };
478
+ }
479
+
480
+ const threadId = String(headers.get("x-amp-thread-id") || "").trim();
481
+ const mode = String(headers.get("x-amp-mode") || "").trim().toLowerCase();
482
+ const overrideProvider = String(headers.get("x-amp-override-provider") || "").trim().toLowerCase();
483
+ const feature = String(headers.get("x-amp-feature") || "").trim();
484
+ const messageId = String(headers.get("x-amp-message-id") || "").trim();
485
+ const presets = AMP_MODE_PRESETS.get(mode) || null;
486
+
487
+ return { threadId, mode, overrideProvider, feature, messageId, presets };
488
+ }
@@ -24,6 +24,7 @@ import { corsResponse, jsonResponse } from "./handler/http.js";
24
24
  import {
25
25
  detectUserRequestFormat,
26
26
  estimateRequestContextTokens,
27
+ extractAmpContext,
27
28
  inferAmpContextRequirement,
28
29
  isAmpManagementPath,
29
30
  isJsonRequest,
@@ -45,7 +46,7 @@ import {
45
46
  convertAmpGeminiRequestToOpenAI,
46
47
  hasGeminiWebSearchTool
47
48
  } from "./handler/amp-gemini.js";
48
- import { shouldInterceptAmpWebSearch } from "./handler/amp-web-search.js";
49
+ import { shouldInterceptAmpWebSearch, maybeInterceptAmpInternalSearch } from "./handler/amp-web-search.js";
49
50
  import {
50
51
  isRequestFromAllowedIp,
51
52
  resolveAllowedOrigin,
@@ -59,6 +60,7 @@ import {
59
60
  resolveRetryPolicy
60
61
  } from "./handler/fallback.js";
61
62
  import { parseJsonSafely, sleep } from "./handler/utils.js";
63
+ import { createThreadAffinityStore } from "./thread-affinity.js";
62
64
  import {
63
65
  applyCandidateFailureState,
64
66
  applyRuntimeRetryPolicyGuards,
@@ -457,6 +459,9 @@ async function handleRouteRequest(request, env, getConfig, sourceFormatHint, opt
457
459
 
458
460
  const requestedModel = body?.model || "smart";
459
461
  const stream = isStreamingEnabled(sourceFormat, body);
462
+ const ampContext = options.clientType === "amp"
463
+ ? extractAmpContext(request)
464
+ : null;
460
465
 
461
466
  const interceptAmpWebSearch = shouldInterceptAmpWebSearch({
462
467
  clientType: options.clientType,
@@ -481,7 +486,7 @@ async function handleRouteRequest(request, env, getConfig, sourceFormatHint, opt
481
486
 
482
487
  const resolved = resolveRequestModel(config, requestedModel, sourceFormat, {
483
488
  clientType: options.clientType,
484
- providerHint: options.providerHint
489
+ providerHint: ampContext?.overrideProvider || options.providerHint
485
490
  });
486
491
  if (!resolved.primary) {
487
492
  if (options.clientType === "amp" && resolved.allowAmpProxy !== false && isAmpProxyEnabled(config)) {
@@ -604,6 +609,24 @@ async function handleRouteRequest(request, env, getConfig, sourceFormatHint, opt
604
609
  recordRouteSkip(routeDebug, skippedEntry.candidate, skippedEntry.skipReasons);
605
610
  }
606
611
 
612
+ // Thread affinity: reorder candidates to prefer affinity-bound candidate
613
+ if (ampContext?.threadId && options.threadAffinityStore) {
614
+ const affinityCandidateKey = options.threadAffinityStore.getAffinity(ampContext.threadId);
615
+ if (affinityCandidateKey) {
616
+ const affinityIndex = ranking.entries.findIndex(
617
+ (entry) => entry.eligible && entry.candidateKey === affinityCandidateKey
618
+ );
619
+ if (affinityIndex > 0) {
620
+ const [affinityEntry] = ranking.entries.splice(affinityIndex, 1);
621
+ ranking.entries.unshift(affinityEntry);
622
+ ranking.selectedEntry = affinityEntry;
623
+ }
624
+ if (affinityIndex < 0) {
625
+ options.threadAffinityStore.clearAffinity(ampContext.threadId);
626
+ }
627
+ }
628
+ }
629
+
607
630
  if (!ranking.selectedEntry) {
608
631
  return withRouteDebugHeaders(jsonResponse({
609
632
  type: "error",
@@ -652,7 +675,8 @@ async function handleRouteRequest(request, env, getConfig, sourceFormatHint, opt
652
675
  env,
653
676
  clientType: options.clientType,
654
677
  runtimeConfig: config,
655
- stateStore
678
+ stateStore,
679
+ ampContext
656
680
  });
657
681
 
658
682
  if (!quotaConsumed && shouldConsumeQuotaFromResult(result)) {
@@ -694,6 +718,9 @@ async function handleRouteRequest(request, env, getConfig, sourceFormatHint, opt
694
718
  });
695
719
  pendingFallbackContext = null;
696
720
  }
721
+ if (ampContext?.threadId && options.threadAffinityStore) {
722
+ options.threadAffinityStore.setAffinity(ampContext.threadId, entry.candidateKey);
723
+ }
697
724
  return withRouteDebugHeaders(result.response, routeDebug);
698
725
  }
699
726
 
@@ -784,6 +811,7 @@ export function createFetchHandler(options) {
784
811
  }
785
812
 
786
813
  let stateStoreRef = options.stateStore || null;
814
+ const threadAffinityStore = createThreadAffinityStore();
787
815
  let stateStorePromise = null;
788
816
 
789
817
  async function ensureStateStore(env = {}, runtimeFlags = {}) {
@@ -906,6 +934,11 @@ export function createFetchHandler(options) {
906
934
  return respond(jsonResponse({ error: "Forbidden" }, 403));
907
935
  }
908
936
 
937
+ const searchInterceptResult = await maybeInterceptAmpInternalSearch(request, url, config, env);
938
+ if (searchInterceptResult) {
939
+ return respond(searchInterceptResult);
940
+ }
941
+
909
942
  return respond(await proxyAmpUpstreamRequest({ request, config }));
910
943
  }
911
944
 
@@ -1051,7 +1084,8 @@ export function createFetchHandler(options) {
1051
1084
  providerHint: "google",
1052
1085
  requestKind: "chat-completions",
1053
1086
  stateStore,
1054
- runtimeFlags
1087
+ runtimeFlags,
1088
+ threadAffinityStore
1055
1089
  });
1056
1090
 
1057
1091
  if (routeResponse.status >= 400) {
@@ -1095,7 +1129,8 @@ export function createFetchHandler(options) {
1095
1129
  providerHint: route.providerHint,
1096
1130
  requestKind: route.requestKind,
1097
1131
  stateStore,
1098
- runtimeFlags
1132
+ runtimeFlags,
1133
+ threadAffinityStore
1099
1134
  });
1100
1135
  return respond(routeResponse);
1101
1136
  }
@@ -0,0 +1,41 @@
1
+ const DEFAULT_AFFINITY_TTL_MS = 60 * 60 * 1000; // 1 hour
2
+ const MAX_BINDINGS = 10_000;
3
+
4
+ export function createThreadAffinityStore(options = {}) {
5
+ const ttlMs = options.ttlMs || DEFAULT_AFFINITY_TTL_MS;
6
+ const bindings = new Map();
7
+
8
+ function pruneExpired(now = Date.now()) {
9
+ for (const [key, binding] of bindings) {
10
+ if (binding.expiresAt <= now) bindings.delete(key);
11
+ }
12
+ }
13
+
14
+ function getAffinity(threadId) {
15
+ if (!threadId) return null;
16
+ const binding = bindings.get(threadId);
17
+ if (!binding) return null;
18
+ if (binding.expiresAt <= Date.now()) {
19
+ bindings.delete(threadId);
20
+ return null;
21
+ }
22
+ return binding.candidateKey;
23
+ }
24
+
25
+ function setAffinity(threadId, candidateKey) {
26
+ if (!threadId || !candidateKey) return;
27
+ const now = Date.now();
28
+ bindings.set(threadId, {
29
+ candidateKey,
30
+ lastSeen: now,
31
+ expiresAt: now + ttlMs
32
+ });
33
+ if (bindings.size > MAX_BINDINGS) pruneExpired(now);
34
+ }
35
+
36
+ function clearAffinity(threadId) {
37
+ if (threadId) bindings.delete(threadId);
38
+ }
39
+
40
+ return { getAffinity, setAffinity, clearAffinity, pruneExpired, _bindings: bindings };
41
+ }
@@ -52,7 +52,7 @@ export function openaiToClaudeResponse(chunk, state) {
52
52
 
53
53
  // Handle regular content
54
54
  const textDelta = normalizeTextDelta(delta?.content);
55
- if (textDelta) {
55
+ if (textDelta && (state.textBlockStarted || hasRenderableText(textDelta))) {
56
56
  stopThinkingBlock(state, results);
57
57
 
58
58
  if (!state.textBlockStarted) {
@@ -119,6 +119,10 @@ export function openaiToClaudeResponse(chunk, state) {
119
119
  return results.length > 0 ? results : null;
120
120
  }
121
121
 
122
+ function hasRenderableText(text) {
123
+ return typeof text === "string" && /\S/.test(text);
124
+ }
125
+
122
126
  function normalizeTextDelta(content) {
123
127
  if (typeof content === "string") return content;
124
128
  if (!Array.isArray(content)) return "";
@@ -191,6 +195,7 @@ function normalizeMessageToolCalls(message) {
191
195
 
192
196
  function emitTextDelta(text, state, results) {
193
197
  if (!text) return;
198
+ if (!state.textBlockStarted && !hasRenderableText(text)) return;
194
199
  stopThinkingBlock(state, results);
195
200
 
196
201
  if (!state.textBlockStarted) {