@khanglvm/llm-router 2.0.0-beta.2 → 2.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -7,6 +7,27 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
7
7
 
8
8
  ## [Unreleased]
9
9
 
10
+ ### Fixed
11
+ - Raised the default inbound JSON body limit for OpenAI `/responses` requests from `1 MiB` to `8 MiB` while keeping other JSON routes at `1 MiB`. This prevents local `413 Request body too large` failures for Codex CLI and other Responses API clients carrying larger conversation state.
12
+
13
+ ## [2.0.1] - 2026-03-15
14
+
15
+ ### Fixed
16
+ - Fixed alias-route failover after transient upstream failures. When every candidate on a route was only in cooldown, the balancer now retries the earliest-recovering candidate instead of returning `No eligible providers remain for route ...`.
17
+
18
+ ## [2.0.0] - 2026-03-15
19
+
20
+ ### Changed
21
+ - Promoted the 2.x operator surface to the official stable `2.0.0` release.
22
+ - Rebranded the user-facing CLI/docs name to `LLM Router` with `llr` as the primary command while keeping the published package scope as `@khanglvm/llm-router`.
23
+ - Updated README and CLI help/examples to use the new branding and command.
24
+ - Expanded the CLI management surface so agents can validate config state, inspect runtime/tool state (`validate`, `snapshot`, `tool-status`), reclaim the fixed local router port, run standalone provider diagnostics, and patch Codex CLI / Claude Code / AMP client routing without depending on the web console.
25
+ - Updated `llr ai-help` and local agent instructions to prefer first-party CLI commands for validation, router recovery, coding-tool routing, and router inspection.
26
+
27
+ ### Removed
28
+ - Removed prerelease release notes from the main public docs surface for the stable `2.0.0` release.
29
+ - Removed the deprecated TUI entry flow from the supported operator surface and from the real-provider live suite coverage.
30
+
10
31
  ## [2.0.0-beta.2] - 2026-03-13
11
32
 
12
33
  ### Changed
package/README.md CHANGED
@@ -14,24 +14,14 @@ The primary CLI command is now:
14
14
  llr
15
15
  ```
16
16
 
17
- ## Beta Notice
18
-
19
- `2.0.0-beta.2` is the current public prerelease. It includes major operator-surface changes around the Web UI, AMP routing, and coding-tool integrations.
17
+ `2.0.1` is the current public release. It includes the Web UI, AMP routing, and coding-tool integrations introduced in the 2.x line.
20
18
 
21
19
  ## Install
22
20
 
23
- Stable:
24
-
25
21
  ```bash
26
22
  npm i -g @khanglvm/llm-router@latest
27
23
  ```
28
24
 
29
- Beta:
30
-
31
- ```bash
32
- npm i -g @khanglvm/llm-router@2.0.0-beta.2
33
- ```
34
-
35
25
  ## Quick Start
36
26
 
37
27
  1. Open the Web UI:
@@ -251,6 +241,11 @@ Local config path:
251
241
 
252
242
  LLM Router also keeps related runtime and token state under the same namespace for backward compatibility with the published package.
253
243
 
244
+ Useful runtime env knobs:
245
+
246
+ - `LLM_ROUTER_MAX_REQUEST_BODY_BYTES`: caps inbound JSON body size for the local router and worker runtime. Default is `8 MiB` for `/responses` requests and `1 MiB` for other JSON endpoints.
247
+ - `LLM_ROUTER_UPSTREAM_TIMEOUT_MS`: overrides the provider request timeout.
248
+
254
249
  ## Development
255
250
 
256
251
  Web UI dev loop:
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@khanglvm/llm-router",
3
- "version": "2.0.0-beta.2",
3
+ "version": "2.0.1",
4
4
  "description": "LLM Router: single gateway endpoint for multi-provider LLMs with unified OpenAI+Anthropic format and seamless fallback",
5
5
  "keywords": [
6
6
  "llm-router",
@@ -186,6 +186,19 @@ function sortEntriesByOriginalOrder(left, right) {
186
186
  return left.originalIndex - right.originalIndex;
187
187
  }
188
188
 
189
+ function sortCooldownEntries(left, right) {
190
+ if (left.openUntil !== right.openUntil) {
191
+ return left.openUntil - right.openUntil;
192
+ }
193
+ return sortEntriesByOriginalOrder(left, right);
194
+ }
195
+
196
+ function isCooldownOnlyEntry(entry) {
197
+ if (!entry || entry.eligible) return false;
198
+ const reasons = Array.isArray(entry.skipReasons) ? entry.skipReasons : [];
199
+ return reasons.length > 0 && reasons.every((reason) => reason === "cooldown");
200
+ }
201
+
189
202
  async function buildCandidateEntries({
190
203
  candidates,
191
204
  stateStore,
@@ -315,6 +328,19 @@ export async function rankRouteCandidates({
315
328
  const ineligibleEntries = entries
316
329
  .filter((entry) => !entry.eligible)
317
330
  .sort(sortEntriesByOriginalOrder);
331
+ const fallbackCooldownEntries = eligibleEntries.length === 0
332
+ ? ineligibleEntries
333
+ .filter((entry) => isCooldownOnlyEntry(entry))
334
+ .sort(sortCooldownEntries)
335
+ .map((entry) => ({
336
+ ...entry,
337
+ eligible: true,
338
+ skipReasons: [...entry.skipReasons, "cooldown-overridden"]
339
+ }))
340
+ : [];
341
+ const skippedIneligibleEntries = fallbackCooldownEntries.length > 0
342
+ ? ineligibleEntries.filter((entry) => !isCooldownOnlyEntry(entry))
343
+ : ineligibleEntries;
318
344
  const estimatedRequiredTokens = normalizeNonNegativeInteger(
319
345
  requestContext?.estimatedRequiredTokens ??
320
346
  requestContext?.requiredTokens ??
@@ -324,10 +350,13 @@ export async function rankRouteCandidates({
324
350
  const routeCursor = stateStore
325
351
  ? await stateStore.getRouteCursor(resolvedRouteKey)
326
352
  : 0;
353
+ const rankableEntries = fallbackCooldownEntries.length > 0
354
+ ? fallbackCooldownEntries
355
+ : eligibleEntries;
327
356
  const contextAwareGroups = shouldApplyContextAwareOrdering(route, estimatedRequiredTokens)
328
- ? partitionEligibleEntriesByContextWindow(eligibleEntries, estimatedRequiredTokens)
357
+ ? partitionEligibleEntriesByContextWindow(rankableEntries, estimatedRequiredTokens)
329
358
  : {
330
- prioritizedEntries: eligibleEntries,
359
+ prioritizedEntries: rankableEntries,
331
360
  deferredEntries: []
332
361
  };
333
362
  const ranking = rankEligibleEntries(
@@ -339,7 +368,7 @@ export async function rankRouteCandidates({
339
368
  const rankedEntries = [
340
369
  ...ranking.orderedEligible,
341
370
  ...contextAwareGroups.deferredEntries,
342
- ...ineligibleEntries
371
+ ...skippedIneligibleEntries
343
372
  ];
344
373
 
345
374
  return {
@@ -351,7 +380,7 @@ export async function rankRouteCandidates({
351
380
  shouldAdvanceCursor: ranking.shouldAdvanceCursor,
352
381
  entries: rankedEntries,
353
382
  selectedEntry: ranking.orderedEligible[0] || null,
354
- skippedEntries: ineligibleEntries,
383
+ skippedEntries: skippedIneligibleEntries,
355
384
  rankedCandidates: rankedEntries.map((entry) => entry.candidate)
356
385
  };
357
386
  }
@@ -193,10 +193,15 @@ function normalizeMessageRole(role) {
193
193
  return 'user';
194
194
  }
195
195
 
196
- function normalizeInputMessageContent(content) {
196
+ function getResponsesTextPartTypeForRole(role) {
197
+ return normalizeMessageRole(role) === 'assistant' ? 'output_text' : 'input_text';
198
+ }
199
+
200
+ function normalizeInputMessageContent(content, role) {
201
+ const textPartType = getResponsesTextPartTypeForRole(role);
197
202
  if (typeof content === 'string') {
198
203
  return content
199
- ? [{ type: 'input_text', text: content }]
204
+ ? [{ type: textPartType, text: content }]
200
205
  : [];
201
206
  }
202
207
 
@@ -208,7 +213,7 @@ function normalizeInputMessageContent(content) {
208
213
 
209
214
  if ((part.type === 'text' || part.type === 'input_text' || part.type === 'output_text') && typeof part.text === 'string') {
210
215
  parts.push({
211
- type: 'input_text',
216
+ type: textPartType,
212
217
  text: part.text
213
218
  });
214
219
  continue;
@@ -284,11 +289,12 @@ function convertMessagesToResponseInput(messages) {
284
289
  continue;
285
290
  }
286
291
 
287
- const contentParts = normalizeInputMessageContent(normalizedMessage.content);
292
+ const normalizedRole = normalizeMessageRole(normalizedMessage.role);
293
+ const contentParts = normalizeInputMessageContent(normalizedMessage.content, normalizedRole);
288
294
  if (contentParts.length > 0) {
289
295
  items.push({
290
296
  type: 'message',
291
- role: normalizeMessageRole(normalizedMessage.role),
297
+ role: normalizedRole,
292
298
  content: contentParts
293
299
  });
294
300
  } else {
@@ -296,8 +302,11 @@ function convertMessagesToResponseInput(messages) {
296
302
  if (fallbackText) {
297
303
  items.push({
298
304
  type: 'message',
299
- role: normalizeMessageRole(normalizedMessage.role),
300
- content: [{ type: 'input_text', text: fallbackText }]
305
+ role: normalizedRole,
306
+ content: [{
307
+ type: getResponsesTextPartTypeForRole(normalizedRole),
308
+ text: fallbackText
309
+ }]
301
310
  });
302
311
  }
303
312
  }
@@ -12,10 +12,14 @@ import {
12
12
  import { isSubscriptionProvider, makeSubscriptionProviderCall } from "../subscription-provider.js";
13
13
 
14
14
  const SEARCH_TOOL_NAME = "web_search";
15
+ const READ_WEB_PAGE_TOOL_NAME = "read_web_page";
15
16
  const DEFAULT_SEARCH_COUNT = 5;
16
17
  const MIN_SEARCH_COUNT = 1;
17
18
  const MAX_SEARCH_COUNT = 20;
18
19
  const DEFAULT_TIMEOUT_MS = 15_000;
20
+ const MAX_READ_WEB_PAGE_TEXT_CHARS = 24_000;
21
+ const MAX_READ_WEB_PAGE_TABLES = 8;
22
+ const MAX_READ_WEB_PAGE_TABLE_ROWS = 40;
19
23
  const SEARCH_ROUTE_KEY = "route:amp-web-search";
20
24
  const HOSTED_WEB_SEARCH_TEST_QUERY = "Find the sunrise time in Paris today and cite the source.";
21
25
  const SEARCH_SYSTEM_INSTRUCTION = [
@@ -64,6 +68,18 @@ const WEB_SEARCH_FUNCTION_PARAMETERS = {
64
68
  additionalProperties: false
65
69
  };
66
70
 
71
+ const READ_WEB_PAGE_FUNCTION_PARAMETERS = {
72
+ type: "object",
73
+ properties: {
74
+ url: {
75
+ type: "string",
76
+ description: "The absolute URL of the web page to read."
77
+ }
78
+ },
79
+ required: ["url"],
80
+ additionalProperties: true
81
+ };
82
+
67
83
  const OPENAI_WEB_SEARCH_TOOL = Object.freeze({
68
84
  type: "function",
69
85
  function: {
@@ -79,6 +95,21 @@ const CLAUDE_WEB_SEARCH_TOOL = Object.freeze({
79
95
  input_schema: WEB_SEARCH_FUNCTION_PARAMETERS
80
96
  });
81
97
 
98
+ const OPENAI_READ_WEB_PAGE_TOOL = Object.freeze({
99
+ type: "function",
100
+ function: {
101
+ name: READ_WEB_PAGE_TOOL_NAME,
102
+ description: "Fetch and extract the readable text and table content from a web page URL.",
103
+ parameters: READ_WEB_PAGE_FUNCTION_PARAMETERS
104
+ }
105
+ });
106
+
107
+ const CLAUDE_READ_WEB_PAGE_TOOL = Object.freeze({
108
+ name: READ_WEB_PAGE_TOOL_NAME,
109
+ description: "Fetch and extract the readable text and table content from a web page URL.",
110
+ input_schema: READ_WEB_PAGE_FUNCTION_PARAMETERS
111
+ });
112
+
82
113
  function toInteger(value, fallback, { min = Number.MIN_SAFE_INTEGER, max = Number.MAX_SAFE_INTEGER } = {}) {
83
114
  const parsed = Number.parseInt(String(value ?? ""), 10);
84
115
  if (!Number.isFinite(parsed)) return fallback;
@@ -128,6 +159,119 @@ function stripHtml(text) {
128
159
  .trim();
129
160
  }
130
161
 
162
+ function clampText(value, maxChars = MAX_READ_WEB_PAGE_TEXT_CHARS) {
163
+ const normalized = String(value || "").trim();
164
+ if (!normalized) return "";
165
+ if (normalized.length <= maxChars) return normalized;
166
+ return `${normalized.slice(0, Math.max(0, maxChars - 15)).trimEnd()}\n[truncated]`;
167
+ }
168
+
169
+ function stripHtmlPreservingLines(text) {
170
+ const normalized = String(text || "")
171
+ .replace(/<(br|hr)\s*\/?>/gi, "\n")
172
+ .replace(/<\/(p|div|section|article|main|header|footer|aside|nav|li|tr|h1|h2|h3|h4|h5|h6|ul|ol|table)>/gi, "\n")
173
+ .replace(/<li\b[^>]*>/gi, "- ")
174
+ .replace(/<\/t[dh]>/gi, " | ")
175
+ .replace(/<t[dh]\b[^>]*>/gi, " ")
176
+ .replace(/<[^>]+>/g, " ");
177
+ return normalized
178
+ .replace(/&nbsp;/gi, " ")
179
+ .replace(/&amp;/gi, "&")
180
+ .replace(/&#x27;|&#39;/gi, "'")
181
+ .replace(/&quot;/gi, "\"")
182
+ .replace(/&lt;/gi, "<")
183
+ .replace(/&gt;/gi, ">")
184
+ .split(/\n+/)
185
+ .map((line) => line.replace(/\s+/g, " ").trim())
186
+ .filter(Boolean)
187
+ .join("\n")
188
+ .trim();
189
+ }
190
+
191
+ function extractHtmlTitle(html) {
192
+ return stripHtml((String(html || "").match(/<title[^>]*>([\s\S]*?)<\/title>/i) || [])[1] || "");
193
+ }
194
+
195
+ function extractPreferredHtmlSection(html) {
196
+ const normalized = String(html || "");
197
+ for (const tagName of ["main", "article", "body"]) {
198
+ const match = normalized.match(new RegExp(`<${tagName}\\b[^>]*>([\\s\\S]*?)<\\/${tagName}>`, "i"));
199
+ if (match?.[1]) return match[1];
200
+ }
201
+ return normalized;
202
+ }
203
+
204
+ function removeHtmlNoise(html) {
205
+ return String(html || "")
206
+ .replace(/<script\b[\s\S]*?<\/script>/gi, " ")
207
+ .replace(/<style\b[\s\S]*?<\/style>/gi, " ")
208
+ .replace(/<noscript\b[\s\S]*?<\/noscript>/gi, " ")
209
+ .replace(/<svg\b[\s\S]*?<\/svg>/gi, " ")
210
+ .replace(/<template\b[\s\S]*?<\/template>/gi, " ");
211
+ }
212
+
213
+ function extractHtmlTables(html) {
214
+ const tableBlocks = [...String(html || "").matchAll(/<table\b[\s\S]*?<\/table>/gi)].slice(0, MAX_READ_WEB_PAGE_TABLES);
215
+ const tables = [];
216
+
217
+ for (let index = 0; index < tableBlocks.length; index += 1) {
218
+ const tableHtml = tableBlocks[index]?.[0] || "";
219
+ const caption = stripHtml((tableHtml.match(/<caption\b[^>]*>([\s\S]*?)<\/caption>/i) || [])[1] || "");
220
+ const rowBlocks = [...tableHtml.matchAll(/<tr\b[\s\S]*?<\/tr>/gi)].slice(0, MAX_READ_WEB_PAGE_TABLE_ROWS);
221
+ const rows = rowBlocks.map((rowBlock) => {
222
+ const rowHtml = rowBlock?.[0] || "";
223
+ return [...rowHtml.matchAll(/<t[dh]\b[^>]*>([\s\S]*?)<\/t[dh]>/gi)]
224
+ .map((cellMatch) => stripHtml(cellMatch?.[1] || ""))
225
+ .filter((cell) => cell.length > 0);
226
+ }).filter((row) => row.length > 0);
227
+
228
+ if (rows.length === 0) continue;
229
+ const formattedRows = rows.map((row) => `| ${row.join(" | ")} |`).join("\n");
230
+ tables.push([
231
+ caption ? `Table ${tables.length + 1}: ${caption}` : `Table ${tables.length + 1}:`,
232
+ formattedRows
233
+ ].join("\n"));
234
+ }
235
+
236
+ return tables;
237
+ }
238
+
239
+ function formatReadWebPageHtml(url, html) {
240
+ const cleanHtml = removeHtmlNoise(html);
241
+ const title = extractHtmlTitle(cleanHtml);
242
+ const mainSection = extractPreferredHtmlSection(cleanHtml);
243
+ const tables = extractHtmlTables(mainSection);
244
+ const pageText = clampText(stripHtmlPreservingLines(mainSection));
245
+ const sections = [
246
+ `URL: ${url}`
247
+ ];
248
+
249
+ if (title) sections.push(`Title: ${title}`);
250
+ if (tables.length > 0) sections.push(`Tables:\n${tables.join("\n\n")}`);
251
+ if (pageText) sections.push(`Page text:\n${pageText}`);
252
+
253
+ return sections.join("\n\n").trim();
254
+ }
255
+
256
+ function formatReadWebPageBody(url, bodyText, contentType = "") {
257
+ const sections = [
258
+ `URL: ${url}`
259
+ ];
260
+ const normalizedContentType = String(contentType || "").trim();
261
+ if (normalizedContentType) sections.push(`Content-Type: ${normalizedContentType}`);
262
+ sections.push(`Page text:\n${clampText(bodyText) || "[No readable page text extracted]"}`);
263
+ return sections.join("\n\n").trim();
264
+ }
265
+
266
+ function looksLikeHtml(contentType, bodyText) {
267
+ const normalizedContentType = String(contentType || "").toLowerCase();
268
+ if (normalizedContentType.includes("text/html") || normalizedContentType.includes("application/xhtml+xml")) {
269
+ return true;
270
+ }
271
+ const sample = String(bodyText || "").trim().slice(0, 512).toLowerCase();
272
+ return sample.startsWith("<!doctype html") || sample.startsWith("<html") || sample.includes("<body");
273
+ }
274
+
131
275
  function formatSearchResults(results) {
132
276
  const lines = [];
133
277
  for (let index = 0; index < results.length; index += 1) {
@@ -157,6 +301,34 @@ function hasSearchToolName(name) {
157
301
  return normalized === SEARCH_TOOL_NAME || normalized === "web_search_preview";
158
302
  }
159
303
 
304
+ function hasReadWebPageToolName(name) {
305
+ return String(name || "").trim().toLowerCase() === READ_WEB_PAGE_TOOL_NAME;
306
+ }
307
+
308
+ function hasInterceptableTool(tool) {
309
+ if (!tool || typeof tool !== "object") return false;
310
+ return hasSearchToolType(tool.type)
311
+ || hasSearchToolName(tool.name)
312
+ || hasSearchToolName(tool.function?.name)
313
+ || hasReadWebPageToolName(tool.name)
314
+ || hasReadWebPageToolName(tool.function?.name);
315
+ }
316
+
317
+ function hasInterceptableToolName(name) {
318
+ return hasSearchToolName(name) || hasReadWebPageToolName(name);
319
+ }
320
+
321
+ function getToolName(tool) {
322
+ if (!tool || typeof tool !== "object") return "";
323
+ if (hasReadWebPageToolName(tool.name) || hasReadWebPageToolName(tool.function?.name)) {
324
+ return READ_WEB_PAGE_TOOL_NAME;
325
+ }
326
+ if (hasSearchToolType(tool.type) || hasSearchToolName(tool.name) || hasSearchToolName(tool.function?.name)) {
327
+ return SEARCH_TOOL_NAME;
328
+ }
329
+ return "";
330
+ }
331
+
160
332
  function dedupeStrings(values = []) {
161
333
  return [...new Set(
162
334
  (Array.isArray(values) ? values : [values])
@@ -1076,20 +1248,25 @@ export async function executeAmpWebSearch(query, runtimeConfig = {}, env = {}, o
1076
1248
  }
1077
1249
 
1078
1250
  export function shouldInterceptAmpWebSearch({ clientType, originalBody, runtimeConfig, env }) {
1079
- if (clientType !== "amp") return false;
1080
1251
  const tools = Array.isArray(originalBody?.tools) ? originalBody.tools : [];
1081
- if (!tools.some((tool) => {
1082
- if (!tool || typeof tool !== "object") return false;
1083
- return hasSearchToolType(tool.type) || hasSearchToolName(tool.name) || hasSearchToolName(tool.function?.name);
1084
- })) {
1252
+ const requestedToolNames = dedupeStrings(tools.map((tool) => getToolName(tool)).filter(Boolean));
1253
+ if (requestedToolNames.length === 0) {
1085
1254
  return false;
1086
1255
  }
1087
- return resolveAmpWebSearchConfig(runtimeConfig, env).providers.some((provider) => {
1256
+ const readyProviders = resolveAmpWebSearchConfig(runtimeConfig, env).providers.filter((provider) => {
1088
1257
  if (!isSearchProviderConfigured(provider)) return false;
1089
1258
  if (!isHostedSearchProvider(provider)) return true;
1090
1259
  const resolvedRoute = getResolvedHostedSearchRoute(runtimeConfig, provider);
1091
1260
  return Boolean(resolvedRoute && supportsResolvedHostedSearchRoute(resolvedRoute.provider, resolvedRoute.model));
1092
1261
  });
1262
+ if (readyProviders.length === 0) {
1263
+ return clientType === "amp" && requestedToolNames.includes(READ_WEB_PAGE_TOOL_NAME);
1264
+ }
1265
+ if (clientType === "amp") {
1266
+ if (requestedToolNames.includes(READ_WEB_PAGE_TOOL_NAME)) return true;
1267
+ return true;
1268
+ }
1269
+ return true;
1093
1270
  }
1094
1271
 
1095
1272
  export function rewriteProviderBodyForAmpWebSearch(providerBody, targetFormat) {
@@ -1101,21 +1278,22 @@ export function rewriteProviderBodyForAmpWebSearch(providerBody, targetFormat) {
1101
1278
  };
1102
1279
  }
1103
1280
 
1104
- let hasWebSearch = false;
1281
+ const interceptedToolNames = new Set();
1105
1282
  const nextTools = [];
1106
1283
  for (const tool of tools) {
1107
1284
  if (!tool || typeof tool !== "object") {
1108
1285
  nextTools.push(tool);
1109
1286
  continue;
1110
1287
  }
1111
- if (hasSearchToolType(tool.type) || hasSearchToolName(tool.name) || hasSearchToolName(tool.function?.name)) {
1112
- hasWebSearch = true;
1288
+ const toolName = getToolName(tool);
1289
+ if (toolName) {
1290
+ interceptedToolNames.add(toolName);
1113
1291
  continue;
1114
1292
  }
1115
1293
  nextTools.push(tool);
1116
1294
  }
1117
1295
 
1118
- if (!hasWebSearch) {
1296
+ if (interceptedToolNames.size === 0) {
1119
1297
  return {
1120
1298
  providerBody,
1121
1299
  hasWebSearch: false
@@ -1123,9 +1301,11 @@ export function rewriteProviderBodyForAmpWebSearch(providerBody, targetFormat) {
1123
1301
  }
1124
1302
 
1125
1303
  if (targetFormat === FORMATS.OPENAI) {
1126
- nextTools.push(OPENAI_WEB_SEARCH_TOOL);
1304
+ if (interceptedToolNames.has(SEARCH_TOOL_NAME)) nextTools.push(OPENAI_WEB_SEARCH_TOOL);
1305
+ if (interceptedToolNames.has(READ_WEB_PAGE_TOOL_NAME)) nextTools.push(OPENAI_READ_WEB_PAGE_TOOL);
1127
1306
  } else if (targetFormat === FORMATS.CLAUDE) {
1128
- nextTools.push(CLAUDE_WEB_SEARCH_TOOL);
1307
+ if (interceptedToolNames.has(SEARCH_TOOL_NAME)) nextTools.push(CLAUDE_WEB_SEARCH_TOOL);
1308
+ if (interceptedToolNames.has(READ_WEB_PAGE_TOOL_NAME)) nextTools.push(CLAUDE_READ_WEB_PAGE_TOOL);
1129
1309
  }
1130
1310
 
1131
1311
  return {
@@ -1141,7 +1321,7 @@ function extractOpenAIChatProbe(payload) {
1141
1321
  const choice = Array.isArray(payload?.choices) ? payload.choices[0] : null;
1142
1322
  const message = choice?.message && typeof choice.message === "object" ? choice.message : null;
1143
1323
  const toolCalls = Array.isArray(message?.tool_calls)
1144
- ? message.tool_calls.filter((item) => hasSearchToolName(item?.function?.name))
1324
+ ? message.tool_calls.filter((item) => hasInterceptableToolName(item?.function?.name))
1145
1325
  : [];
1146
1326
 
1147
1327
  return {
@@ -1171,7 +1351,7 @@ function normalizeResponseInput(input) {
1171
1351
 
1172
1352
  function extractOpenAIResponsesProbe(payload) {
1173
1353
  const output = Array.isArray(payload?.output) ? payload.output : [];
1174
- const toolCalls = output.filter((item) => item?.type === "function_call" && hasSearchToolName(item?.name));
1354
+ const toolCalls = output.filter((item) => item?.type === "function_call" && hasInterceptableToolName(item?.name));
1175
1355
  const assistantInputItems = output
1176
1356
  .filter((item) => item && item.type !== "reasoning")
1177
1357
  .map((item) => {
@@ -1204,7 +1384,7 @@ function extractOpenAIResponsesProbe(payload) {
1204
1384
  function extractClaudeProbe(payload) {
1205
1385
  const content = Array.isArray(payload?.content) ? payload.content : [];
1206
1386
  const assistantContent = content.filter((item) => item?.type !== "thinking" && item?.type !== "redacted_thinking");
1207
- const toolCalls = assistantContent.filter((item) => item?.type === "tool_use" && hasSearchToolName(item?.name));
1387
+ const toolCalls = assistantContent.filter((item) => item?.type === "tool_use" && hasInterceptableToolName(item?.name));
1208
1388
 
1209
1389
  return {
1210
1390
  hasWebSearchCalls: toolCalls.length > 0,
@@ -1240,6 +1420,33 @@ function extractQueryFromToolCall(toolCall) {
1240
1420
  return "";
1241
1421
  }
1242
1422
 
1423
+ function extractUrlFromToolCall(toolCall) {
1424
+ if (!toolCall || typeof toolCall !== "object") return "";
1425
+ for (const candidate of [
1426
+ toolCall?.input?.url,
1427
+ toolCall?.input?.uri,
1428
+ toolCall?.input?.href
1429
+ ]) {
1430
+ if (typeof candidate === "string" && candidate.trim()) return candidate.trim();
1431
+ }
1432
+ const parsedArguments = parseJsonSafely(toolCall?.arguments, {});
1433
+ for (const candidate of [parsedArguments?.url, parsedArguments?.uri, parsedArguments?.href]) {
1434
+ if (typeof candidate === "string" && candidate.trim()) return candidate.trim();
1435
+ }
1436
+ const parsedFunctionArguments = parseJsonSafely(toolCall?.function?.arguments, {});
1437
+ for (const candidate of [parsedFunctionArguments?.url, parsedFunctionArguments?.uri, parsedFunctionArguments?.href]) {
1438
+ if (typeof candidate === "string" && candidate.trim()) return candidate.trim();
1439
+ }
1440
+ for (const candidate of [toolCall?.function?.url, toolCall?.function?.uri, toolCall?.function?.href]) {
1441
+ if (typeof candidate === "string" && candidate.trim()) return candidate.trim();
1442
+ }
1443
+ return "";
1444
+ }
1445
+
1446
+ function getToolCallName(toolCall) {
1447
+ return getToolName(toolCall);
1448
+ }
1449
+
1243
1450
  function buildToolResultText(query, searchText) {
1244
1451
  const normalizedQuery = String(query || "").trim();
1245
1452
  const normalizedResults = String(searchText || "").trim() || "[No search results available]";
@@ -1247,6 +1454,96 @@ function buildToolResultText(query, searchText) {
1247
1454
  return `Web search results for "${normalizedQuery}":\n\n${normalizedResults}`;
1248
1455
  }
1249
1456
 
1457
+ function buildReadWebPageResultText(url, pageText) {
1458
+ const normalizedUrl = String(url || "").trim();
1459
+ const normalizedPageText = String(pageText || "").trim() || "[Unable to extract web page content]";
1460
+ if (!normalizedUrl) return normalizedPageText;
1461
+ return `Web page content from "${normalizedUrl}":\n\n${normalizedPageText}`;
1462
+ }
1463
+
1464
+ async function executeAmpReadWebPage(url) {
1465
+ const normalizedUrl = String(url || "").trim();
1466
+ if (!normalizedUrl) {
1467
+ return {
1468
+ text: "[Missing URL for read_web_page]",
1469
+ providerId: READ_WEB_PAGE_TOOL_NAME,
1470
+ backend: READ_WEB_PAGE_TOOL_NAME,
1471
+ tag: READ_WEB_PAGE_TOOL_NAME
1472
+ };
1473
+ }
1474
+
1475
+ let parsedUrl;
1476
+ try {
1477
+ parsedUrl = new URL(normalizedUrl);
1478
+ } catch {
1479
+ return {
1480
+ text: `[Invalid URL for read_web_page: ${normalizedUrl}]`,
1481
+ providerId: READ_WEB_PAGE_TOOL_NAME,
1482
+ backend: READ_WEB_PAGE_TOOL_NAME,
1483
+ tag: READ_WEB_PAGE_TOOL_NAME
1484
+ };
1485
+ }
1486
+
1487
+ if (!["http:", "https:"].includes(parsedUrl.protocol)) {
1488
+ return {
1489
+ text: `[Unsupported URL protocol for read_web_page: ${parsedUrl.protocol}]`,
1490
+ providerId: READ_WEB_PAGE_TOOL_NAME,
1491
+ backend: READ_WEB_PAGE_TOOL_NAME,
1492
+ tag: READ_WEB_PAGE_TOOL_NAME
1493
+ };
1494
+ }
1495
+
1496
+ try {
1497
+ const response = await runFetchWithTimeout(parsedUrl.toString(), {
1498
+ headers: {
1499
+ Accept: "text/html,application/xhtml+xml,text/plain,application/json;q=0.9,*/*;q=0.8",
1500
+ "User-Agent": "llm-router"
1501
+ }
1502
+ });
1503
+ if (!response.ok) {
1504
+ return {
1505
+ text: `[Failed to read web page: ${await readSearchProviderError(response)}]`,
1506
+ providerId: READ_WEB_PAGE_TOOL_NAME,
1507
+ backend: READ_WEB_PAGE_TOOL_NAME,
1508
+ tag: READ_WEB_PAGE_TOOL_NAME
1509
+ };
1510
+ }
1511
+
1512
+ const contentType = String(response.headers.get("content-type") || "").trim();
1513
+ const bodyText = await response.text();
1514
+ const formattedText = looksLikeHtml(contentType, bodyText)
1515
+ ? formatReadWebPageHtml(parsedUrl.toString(), bodyText)
1516
+ : formatReadWebPageBody(parsedUrl.toString(), bodyText, contentType);
1517
+
1518
+ return {
1519
+ text: formattedText,
1520
+ providerId: READ_WEB_PAGE_TOOL_NAME,
1521
+ backend: READ_WEB_PAGE_TOOL_NAME,
1522
+ tag: READ_WEB_PAGE_TOOL_NAME
1523
+ };
1524
+ } catch (error) {
1525
+ return {
1526
+ text: `[Failed to read web page: ${error instanceof Error ? error.message : String(error)}]`,
1527
+ providerId: READ_WEB_PAGE_TOOL_NAME,
1528
+ backend: READ_WEB_PAGE_TOOL_NAME,
1529
+ tag: READ_WEB_PAGE_TOOL_NAME
1530
+ };
1531
+ }
1532
+ }
1533
+
1534
+ async function executeAmpInterceptedToolCall(toolCall, runtimeConfig, env, options = {}) {
1535
+ const toolName = getToolCallName(toolCall);
1536
+ if (toolName === READ_WEB_PAGE_TOOL_NAME) {
1537
+ return executeAmpReadWebPage(extractUrlFromToolCall(toolCall));
1538
+ }
1539
+ return executeAmpWebSearch(
1540
+ extractQueryFromToolCall(toolCall),
1541
+ runtimeConfig,
1542
+ env,
1543
+ options
1544
+ );
1545
+ }
1546
+
1250
1547
  function mergeClaudeSystemInstruction(system, instruction) {
1251
1548
  if (typeof system === "string" && system.trim()) {
1252
1549
  return `${system.trim()}\n\n${instruction}`;
@@ -1270,7 +1567,7 @@ function mergeOpenAIInstructions(originalInstructions, instruction) {
1270
1567
 
1271
1568
  export function buildAmpWebSearchFollowUp(providerBody, probePayload, probe, searchResultsByCall, { targetFormat, requestKind, stream }) {
1272
1569
  const toolCalls = Array.isArray(probe?.toolCalls) ? probe.toolCalls : [];
1273
- const normalizedSearchResults = Array.isArray(searchResultsByCall)
1570
+ const normalizedToolResults = Array.isArray(searchResultsByCall)
1274
1571
  ? searchResultsByCall
1275
1572
  : [];
1276
1573
 
@@ -1278,10 +1575,15 @@ export function buildAmpWebSearchFollowUp(providerBody, probePayload, probe, sea
1278
1575
  const toolResults = toolCalls.map((toolCall, index) => ({
1279
1576
  type: "tool_result",
1280
1577
  tool_use_id: toolCall.id || `tool_${index + 1}`,
1281
- content: buildToolResultText(
1282
- extractQueryFromToolCall(toolCall),
1283
- normalizedSearchResults[index]?.text
1284
- )
1578
+ content: getToolCallName(toolCall) === READ_WEB_PAGE_TOOL_NAME
1579
+ ? buildReadWebPageResultText(
1580
+ extractUrlFromToolCall(toolCall),
1581
+ normalizedToolResults[index]?.text
1582
+ )
1583
+ : buildToolResultText(
1584
+ extractQueryFromToolCall(toolCall),
1585
+ normalizedToolResults[index]?.text
1586
+ )
1285
1587
  }));
1286
1588
  return {
1287
1589
  ...providerBody,
@@ -1305,10 +1607,15 @@ export function buildAmpWebSearchFollowUp(providerBody, probePayload, probe, sea
1305
1607
  const toolOutputs = toolCalls.map((toolCall, index) => ({
1306
1608
  type: "function_call_output",
1307
1609
  call_id: toolCall.call_id || toolCall.id || `call_${index + 1}`,
1308
- output: buildToolResultText(
1309
- extractQueryFromToolCall(toolCall),
1310
- normalizedSearchResults[index]?.text
1311
- )
1610
+ output: getToolCallName(toolCall) === READ_WEB_PAGE_TOOL_NAME
1611
+ ? buildReadWebPageResultText(
1612
+ extractUrlFromToolCall(toolCall),
1613
+ normalizedToolResults[index]?.text
1614
+ )
1615
+ : buildToolResultText(
1616
+ extractQueryFromToolCall(toolCall),
1617
+ normalizedToolResults[index]?.text
1618
+ )
1312
1619
  }));
1313
1620
  return {
1314
1621
  ...providerBody,
@@ -1331,10 +1638,15 @@ export function buildAmpWebSearchFollowUp(providerBody, probePayload, probe, sea
1331
1638
  const toolMessages = toolCalls.map((toolCall, index) => ({
1332
1639
  role: "tool",
1333
1640
  tool_call_id: toolCall.id || `call_${index + 1}`,
1334
- content: buildToolResultText(
1335
- extractQueryFromToolCall(toolCall),
1336
- normalizedSearchResults[index]?.text
1337
- )
1641
+ content: getToolCallName(toolCall) === READ_WEB_PAGE_TOOL_NAME
1642
+ ? buildReadWebPageResultText(
1643
+ extractUrlFromToolCall(toolCall),
1644
+ normalizedToolResults[index]?.text
1645
+ )
1646
+ : buildToolResultText(
1647
+ extractQueryFromToolCall(toolCall),
1648
+ normalizedToolResults[index]?.text
1649
+ )
1338
1650
  }));
1339
1651
  const nextMessages = Array.isArray(providerBody.messages) ? providerBody.messages.slice() : [];
1340
1652
  const hasLeadingSystem = nextMessages[0]?.role === "system";
@@ -1880,8 +2192,8 @@ export async function maybeInterceptAmpWebSearch({
1880
2192
 
1881
2193
  const searchResultsByCall = [];
1882
2194
  for (const toolCall of (probe.toolCalls || [])) {
1883
- searchResultsByCall.push(await executeAmpWebSearch(
1884
- extractQueryFromToolCall(toolCall),
2195
+ searchResultsByCall.push(await executeAmpInterceptedToolCall(
2196
+ toolCall,
1885
2197
  runtimeConfig,
1886
2198
  env,
1887
2199
  {
@@ -889,6 +889,16 @@ export function normalizeClaudePassthroughStream(response) {
889
889
  state.messageStopped = true;
890
890
  }
891
891
 
892
+ function beginNextClaudeMessage() {
893
+ state.messageStarted = false;
894
+ state.messageStopped = false;
895
+ state.terminalDeltaSeen = false;
896
+ state.hasToolUse = false;
897
+ state.stopReason = null;
898
+ state.stopSequence = undefined;
899
+ state.usage = undefined;
900
+ }
901
+
892
902
  function processBlock(block, controller) {
893
903
  if (!block || !block.trim()) return;
894
904
  const parsedBlock = parseSseBlock(block);
@@ -913,6 +923,10 @@ export function normalizeClaudePassthroughStream(response) {
913
923
 
914
924
  const eventType = String(payload?.type || parsedBlock.eventType || "").trim();
915
925
  if (eventType === "message_start") {
926
+ if (state.messageStarted && !state.messageStopped) {
927
+ finalizeClaudeMessage(controller);
928
+ beginNextClaudeMessage();
929
+ }
916
930
  state.messageStarted = true;
917
931
  mergeClaudeUsage(state, payload.message?.usage);
918
932
  enqueueRawBlock(controller, block);
@@ -3,6 +3,7 @@ import { extractAmpGeminiRouteInfo } from "./amp-gemini.js";
3
3
  import { toNonNegativeInteger } from "./utils.js";
4
4
 
5
5
  const DEFAULT_MAX_REQUEST_BODY_BYTES = 1 * 1024 * 1024;
6
+ const DEFAULT_RESPONSES_MAX_REQUEST_BODY_BYTES = 8 * 1024 * 1024;
6
7
  const MIN_MAX_REQUEST_BODY_BYTES = 4 * 1024;
7
8
  const MAX_MAX_REQUEST_BODY_BYTES = 20 * 1024 * 1024;
8
9
  const DEFAULT_UPSTREAM_TIMEOUT_MS = 60_000;
@@ -149,10 +150,14 @@ function resolveAmpProviderRoute(path, method) {
149
150
  return null;
150
151
  }
151
152
 
152
- export function resolveMaxRequestBodyBytes(env = {}) {
153
+ export function resolveMaxRequestBodyBytes(env = {}, options = {}) {
154
+ const requestKind = String(options?.requestKind || "").trim().toLowerCase();
155
+ const fallbackLimit = requestKind === "responses"
156
+ ? DEFAULT_RESPONSES_MAX_REQUEST_BODY_BYTES
157
+ : DEFAULT_MAX_REQUEST_BODY_BYTES;
153
158
  const configured = toNonNegativeInteger(
154
159
  env?.LLM_ROUTER_MAX_REQUEST_BODY_BYTES,
155
- DEFAULT_MAX_REQUEST_BODY_BYTES
160
+ fallbackLimit
156
161
  );
157
162
  return Math.min(
158
163
  MAX_MAX_REQUEST_BODY_BYTES,
@@ -437,7 +437,9 @@ async function handleRouteRequest(request, env, getConfig, sourceFormatHint, opt
437
437
  return jsonResponse({ error: "Unsupported Media Type. Use application/json." }, 415);
438
438
  }
439
439
 
440
- const maxRequestBodyBytes = resolveMaxRequestBodyBytes(env);
440
+ const maxRequestBodyBytes = resolveMaxRequestBodyBytes(env, {
441
+ requestKind: options.requestKind
442
+ });
441
443
  let body;
442
444
  try {
443
445
  body = await parseJsonBodyWithLimit(request, maxRequestBodyBytes);
@@ -965,7 +967,9 @@ export function createFetchHandler(options) {
965
967
 
966
968
  let body;
967
969
  try {
968
- body = await parseJsonBodyWithLimit(request, resolveMaxRequestBodyBytes(env));
970
+ body = await parseJsonBodyWithLimit(request, resolveMaxRequestBodyBytes(env, {
971
+ requestKind: route.requestKind
972
+ }));
969
973
  } catch (error) {
970
974
  if (error && typeof error === "object" && error.code === "REQUEST_BODY_TOO_LARGE") {
971
975
  return respond(jsonResponse({ error: "Request body too large" }, 413));
@@ -4,6 +4,19 @@
4
4
 
5
5
  import { FORMATS } from "../formats.js";
6
6
 
7
+ const WEB_SEARCH_TOOL_NAME = "web_search";
8
+ const WEB_SEARCH_FUNCTION_PARAMETERS = {
9
+ type: "object",
10
+ properties: {
11
+ query: {
12
+ type: "string",
13
+ description: "The search query to run against the web."
14
+ }
15
+ },
16
+ required: ["query"],
17
+ additionalProperties: false
18
+ };
19
+
7
20
  function cloneCacheControl(value) {
8
21
  if (!value || typeof value !== "object" || Array.isArray(value)) return undefined;
9
22
  const type = typeof value.type === "string" ? value.type.trim() : "";
@@ -40,6 +53,33 @@ function convertClaudeSystemToOpenAIContent(system) {
40
53
  return parts;
41
54
  }
42
55
 
56
+ function normalizeWebSearchType(value) {
57
+ return String(value || "").trim().toLowerCase();
58
+ }
59
+
60
+ function isClaudeNativeWebSearchTool(tool) {
61
+ if (!tool || typeof tool !== "object") return false;
62
+ const normalizedType = normalizeWebSearchType(tool.type);
63
+ return normalizedType === "web_search"
64
+ || (normalizedType.startsWith("web_search_") && !normalizedType.startsWith("web_search_preview"));
65
+ }
66
+
67
+ function convertClaudeWebSearchTool(tool) {
68
+ const description = typeof tool?.description === "string" && tool.description.trim()
69
+ ? tool.description.trim()
70
+ : "Search the web for current information, news, documentation, or real-time facts.";
71
+ const cacheControl = cloneCacheControl(tool?.cache_control);
72
+ return {
73
+ type: "function",
74
+ function: {
75
+ name: WEB_SEARCH_TOOL_NAME,
76
+ description,
77
+ parameters: WEB_SEARCH_FUNCTION_PARAMETERS
78
+ },
79
+ ...(cacheControl ? { cache_control: cacheControl } : {})
80
+ };
81
+ }
82
+
43
83
  /**
44
84
  * Convert Claude request to OpenAI format
45
85
  */
@@ -89,6 +129,9 @@ export function claudeToOpenAIRequest(model, body, stream) {
89
129
  // Tools
90
130
  if (body.tools && Array.isArray(body.tools)) {
91
131
  result.tools = body.tools.map(tool => {
132
+ if (isClaudeNativeWebSearchTool(tool)) {
133
+ return convertClaudeWebSearchTool(tool);
134
+ }
92
135
  const cacheControl = cloneCacheControl(tool.cache_control);
93
136
  return {
94
137
  type: "function",