@khanglvm/llm-router 2.3.1 → 2.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. package/CHANGELOG.md +5 -0
  2. package/README.md +2 -2
  3. package/package.json +1 -1
  4. package/src/cli/router-module.js +32 -5
  5. package/src/node/coding-tool-config.js +138 -25
  6. package/src/node/large-request-log.js +54 -0
  7. package/src/node/litellm-context-catalog.js +13 -1
  8. package/src/node/local-server.js +10 -0
  9. package/src/node/ollama-client.js +195 -0
  10. package/src/node/ollama-hardware.js +94 -0
  11. package/src/node/ollama-install.js +230 -0
  12. package/src/node/provider-probe.js +69 -5
  13. package/src/node/web-console-client.js +36 -36
  14. package/src/node/web-console-server.js +478 -8
  15. package/src/node/web-console-styles.generated.js +1 -1
  16. package/src/node/web-console-ui/amp-utils.js +272 -0
  17. package/src/node/web-console-ui/api-client.js +128 -0
  18. package/src/node/web-console-ui/capability-utils.js +36 -0
  19. package/src/node/web-console-ui/config-editor-utils.js +20 -5
  20. package/src/node/web-console-ui/constants.js +140 -0
  21. package/src/node/web-console-ui/context-window-utils.js +262 -0
  22. package/src/node/web-console-ui/hooks/use-reorder-layout-animation.js +65 -0
  23. package/src/node/web-console-ui/provider-presets.js +211 -0
  24. package/src/node/web-console-ui/quick-start-utils.js +790 -0
  25. package/src/node/web-console-ui/utils.js +353 -0
  26. package/src/node/web-console-ui/web-search-utils.js +460 -0
  27. package/src/runtime/config.js +96 -9
  28. package/src/runtime/handler/fallback.js +71 -0
  29. package/src/runtime/handler/field-filter.js +39 -0
  30. package/src/runtime/handler/large-request-log.js +211 -0
  31. package/src/runtime/handler/provider-call.js +185 -15
  32. package/src/runtime/handler/reasoning-effort.js +11 -1
  33. package/src/runtime/handler/tool-name-sanitizer.js +258 -0
  34. package/src/runtime/handler.js +16 -3
  35. package/src/shared/coding-tool-bindings.js +3 -0
@@ -0,0 +1,258 @@
1
+ /**
2
+ * Sanitizes tool names that contain characters rejected by OpenAI-compatible APIs.
3
+ * Pattern: ^[a-zA-Z0-9_-]+$
4
+ *
5
+ * Applies sanitization to the request body before sending to the provider,
6
+ * and reverse-maps tool names in the response so the client receives original names.
7
+ */
8
+
9
+ const VALID_TOOL_NAME = /^[a-zA-Z0-9_-]+$/;
10
+
11
+ function sanitizeName(name) {
12
+ return String(name || "").replace(/[^a-zA-Z0-9_-]/g, "_");
13
+ }
14
+
15
+ /**
16
+ * Scan body.tools for names needing sanitization.
17
+ * Returns Map<sanitizedName, originalName> or null if none needed.
18
+ */
19
+ export function buildToolNameMap(body) {
20
+ if (!body || !Array.isArray(body.tools) || body.tools.length === 0) return null;
21
+
22
+ const map = new Map();
23
+ for (const tool of body.tools) {
24
+ if (!tool || typeof tool !== "object") continue;
25
+ const name = typeof tool.function?.name === "string"
26
+ ? tool.function.name
27
+ : typeof tool.name === "string" ? tool.name : "";
28
+ if (name && !VALID_TOOL_NAME.test(name)) {
29
+ const sanitized = sanitizeName(name);
30
+ if (!map.has(sanitized)) {
31
+ map.set(sanitized, name);
32
+ }
33
+ }
34
+ }
35
+ return map.size > 0 ? map : null;
36
+ }
37
+
38
+ function applySanitizationToName(name, nameMap) {
39
+ if (typeof name !== "string" || !name) return name;
40
+ const sanitized = sanitizeName(name);
41
+ return nameMap.has(sanitized) ? sanitized : name;
42
+ }
43
+
44
+ /**
45
+ * Return a shallow-cloned body with every matching tool name replaced.
46
+ * Covers: tools[], tool_choice, input[] (Responses API), messages[] (Chat Completions).
47
+ */
48
+ export function sanitizeBodyToolNames(body, nameMap) {
49
+ if (!nameMap || nameMap.size === 0 || !body) return body;
50
+
51
+ const result = { ...body };
52
+
53
+ // 1. Tool definitions
54
+ if (Array.isArray(result.tools)) {
55
+ result.tools = result.tools.map((tool) => {
56
+ if (!tool || typeof tool !== "object") return tool;
57
+ if (tool.function && typeof tool.function.name === "string" && !VALID_TOOL_NAME.test(tool.function.name)) {
58
+ return { ...tool, function: { ...tool.function, name: applySanitizationToName(tool.function.name, nameMap) } };
59
+ }
60
+ if (typeof tool.name === "string" && !VALID_TOOL_NAME.test(tool.name)) {
61
+ return { ...tool, name: applySanitizationToName(tool.name, nameMap) };
62
+ }
63
+ return tool;
64
+ });
65
+ }
66
+
67
+ // 2. tool_choice
68
+ if (result.tool_choice && typeof result.tool_choice === "object") {
69
+ const choiceName = result.tool_choice.function?.name || result.tool_choice.name;
70
+ if (typeof choiceName === "string" && !VALID_TOOL_NAME.test(choiceName)) {
71
+ if (result.tool_choice.function) {
72
+ result.tool_choice = { ...result.tool_choice, function: { ...result.tool_choice.function, name: applySanitizationToName(choiceName, nameMap) } };
73
+ } else {
74
+ result.tool_choice = { ...result.tool_choice, name: applySanitizationToName(choiceName, nameMap) };
75
+ }
76
+ }
77
+ }
78
+
79
+ // 3. Responses API input — function_call items carry name
80
+ if (Array.isArray(result.input)) {
81
+ result.input = result.input.map((item) => {
82
+ if (item?.type === "function_call" && typeof item.name === "string" && !VALID_TOOL_NAME.test(item.name)) {
83
+ return { ...item, name: applySanitizationToName(item.name, nameMap) };
84
+ }
85
+ return item;
86
+ });
87
+ }
88
+
89
+ // 4. Chat Completions messages — assistant tool_calls carry function.name
90
+ if (Array.isArray(result.messages)) {
91
+ result.messages = result.messages.map((msg) => {
92
+ if (!Array.isArray(msg?.tool_calls)) return msg;
93
+ const toolCalls = msg.tool_calls.map((tc) => {
94
+ if (tc?.function && typeof tc.function.name === "string" && !VALID_TOOL_NAME.test(tc.function.name)) {
95
+ return { ...tc, function: { ...tc.function, name: applySanitizationToName(tc.function.name, nameMap) } };
96
+ }
97
+ return tc;
98
+ });
99
+ return { ...msg, tool_calls: toolCalls };
100
+ });
101
+ }
102
+
103
+ return result;
104
+ }
105
+
106
+ // ---------------------------------------------------------------------------
107
+ // Response reverse-mapping
108
+ // ---------------------------------------------------------------------------
109
+
110
+ function reverseNameInParsed(parsed, nameMap) {
111
+ if (!parsed || typeof parsed !== "object") return parsed;
112
+
113
+ // OpenAI Chat Completions: choices[].delta.tool_calls[].function.name
114
+ if (Array.isArray(parsed.choices)) {
115
+ for (const choice of parsed.choices) {
116
+ for (const tc of (choice?.message?.tool_calls || choice?.delta?.tool_calls || [])) {
117
+ if (tc?.function?.name && nameMap.has(tc.function.name)) {
118
+ tc.function.name = nameMap.get(tc.function.name);
119
+ }
120
+ }
121
+ }
122
+ }
123
+
124
+ // Responses API: output[].name or top-level name for streaming events
125
+ if (Array.isArray(parsed.output)) {
126
+ for (const item of parsed.output) {
127
+ if (item?.type === "function_call" && nameMap.has(item.name)) {
128
+ item.name = nameMap.get(item.name);
129
+ }
130
+ }
131
+ }
132
+ // Streaming events: response.output_item.added → item.name
133
+ if (parsed.item?.type === "function_call" && nameMap.has(parsed.item.name)) {
134
+ parsed.item = { ...parsed.item, name: nameMap.get(parsed.item.name) };
135
+ }
136
+ // Streaming event: response.function_call_arguments.done → name
137
+ if (typeof parsed.name === "string" && nameMap.has(parsed.name) && (parsed.type || "").includes("function_call")) {
138
+ parsed.name = nameMap.get(parsed.name);
139
+ }
140
+
141
+ // Responses API completed response embedded in streaming
142
+ if (parsed.response && Array.isArray(parsed.response.output)) {
143
+ for (const item of parsed.response.output) {
144
+ if (item?.type === "function_call" && nameMap.has(item.name)) {
145
+ item.name = nameMap.get(item.name);
146
+ }
147
+ }
148
+ }
149
+
150
+ // Claude: content[].name for tool_use blocks
151
+ if (Array.isArray(parsed.content)) {
152
+ for (const block of parsed.content) {
153
+ if (block?.type === "tool_use" && nameMap.has(block.name)) {
154
+ block.name = nameMap.get(block.name);
155
+ }
156
+ }
157
+ }
158
+ // Claude streaming: content_block_start → content_block.name
159
+ if (parsed.content_block?.type === "tool_use" && nameMap.has(parsed.content_block.name)) {
160
+ parsed.content_block = { ...parsed.content_block, name: nameMap.get(parsed.content_block.name) };
161
+ }
162
+
163
+ return parsed;
164
+ }
165
+
166
+ /**
167
+ * Wrap a Response so tool names are reversed.
168
+ * For streaming: TransformStream over SSE events.
169
+ * For non-streaming: buffer, parse JSON, replace, re-serialize.
170
+ */
171
+ export function reverseToolNamesInResponse(response, nameMap, isStream) {
172
+ if (!nameMap || nameMap.size === 0 || !(response instanceof Response)) return response;
173
+ if (!response.body) return response;
174
+
175
+ const decoder = new TextDecoder();
176
+ const encoder = new TextEncoder();
177
+
178
+ // Build a quick-check set of sanitized names for fast rejection
179
+ const sanitizedNames = [...nameMap.keys()];
180
+
181
+ if (isStream) {
182
+ let buffer = "";
183
+ const transformStream = new TransformStream({
184
+ transform(chunk, controller) {
185
+ buffer += decoder.decode(chunk, { stream: true });
186
+ let boundaryIndex;
187
+ while ((boundaryIndex = buffer.indexOf("\n\n")) >= 0) {
188
+ const block = buffer.slice(0, boundaryIndex);
189
+ buffer = buffer.slice(boundaryIndex + 2);
190
+ controller.enqueue(encoder.encode(processBlock(block, sanitizedNames, nameMap) + "\n\n"));
191
+ }
192
+ },
193
+ flush(controller) {
194
+ if (buffer.trim()) {
195
+ controller.enqueue(encoder.encode(processBlock(buffer, sanitizedNames, nameMap)));
196
+ }
197
+ }
198
+ });
199
+
200
+ return new Response(response.body.pipeThrough(transformStream), {
201
+ status: response.status,
202
+ headers: response.headers
203
+ });
204
+ }
205
+
206
+ // Non-streaming: buffer the whole body, parse, reverse, re-serialize.
207
+ const { readable, writable } = new TransformStream();
208
+ (async () => {
209
+ const writer = writable.getWriter();
210
+ try {
211
+ let text = "";
212
+ const reader = response.body.getReader();
213
+ while (true) {
214
+ const { done, value } = await reader.read();
215
+ if (done) break;
216
+ text += decoder.decode(value, { stream: true });
217
+ }
218
+ if (!sanitizedNames.some((n) => text.includes(n))) {
219
+ writer.write(encoder.encode(text));
220
+ } else {
221
+ try {
222
+ const parsed = JSON.parse(text);
223
+ reverseNameInParsed(parsed, nameMap);
224
+ writer.write(encoder.encode(JSON.stringify(parsed)));
225
+ } catch {
226
+ writer.write(encoder.encode(text));
227
+ }
228
+ }
229
+ } finally {
230
+ writer.close();
231
+ }
232
+ })();
233
+
234
+ return new Response(readable, {
235
+ status: response.status,
236
+ headers: response.headers
237
+ });
238
+ }
239
+
240
+ function processBlock(block, sanitizedNames, nameMap) {
241
+ // Quick-check: skip parsing if no sanitized name appears in the block
242
+ if (!sanitizedNames.some((n) => block.includes(n))) return block;
243
+
244
+ // Extract data lines from SSE block
245
+ const lines = block.split("\n");
246
+ return lines.map((line) => {
247
+ if (!line.startsWith("data:")) return line;
248
+ const jsonStr = line.slice(5).trimStart();
249
+ if (jsonStr === "[DONE]") return line;
250
+ try {
251
+ const parsed = JSON.parse(jsonStr);
252
+ reverseNameInParsed(parsed, nameMap);
253
+ return `data: ${JSON.stringify(parsed)}`;
254
+ } catch {
255
+ return line;
256
+ }
257
+ }).join("\n");
258
+ }
@@ -20,6 +20,11 @@ import {
20
20
  buildFailureResponse,
21
21
  makeProviderCall
22
22
  } from "./handler/provider-call.js";
23
+ import {
24
+ buildToolNameMap,
25
+ sanitizeBodyToolNames,
26
+ reverseToolNamesInResponse
27
+ } from "./handler/tool-name-sanitizer.js";
23
28
  import { corsResponse, jsonResponse } from "./handler/http.js";
24
29
  import {
25
30
  detectUserRequestFormat,
@@ -637,6 +642,10 @@ async function handleRouteRequest(request, env, getConfig, sourceFormatHint, opt
637
642
  }, 503), routeDebug);
638
643
  }
639
644
 
645
+ // Sanitize tool names that upstream providers would reject (e.g. dots in MCP names).
646
+ const toolNameMap = buildToolNameMap(body);
647
+ const sanitizedBody = toolNameMap ? sanitizeBodyToolNames(body, toolNameMap) : body;
648
+
640
649
  let lastErrorResult = null;
641
650
  let lastErrorMessage = "Unknown error";
642
651
  let routeSelectionCommitted = false;
@@ -666,7 +675,7 @@ async function handleRouteRequest(request, env, getConfig, sourceFormatHint, opt
666
675
  while (attempt < maxAttempts) {
667
676
  attempt += 1;
668
677
  result = await makeProviderCall({
669
- body,
678
+ body: sanitizedBody,
670
679
  sourceFormat,
671
680
  stream,
672
681
  requestKind: options.requestKind,
@@ -677,7 +686,8 @@ async function handleRouteRequest(request, env, getConfig, sourceFormatHint, opt
677
686
  runtimeConfig: config,
678
687
  stateStore,
679
688
  ampContext,
680
- runtimeFlags
689
+ runtimeFlags,
690
+ onLargeRequestLog: options.onLargeRequestLog
681
691
  });
682
692
 
683
693
  if (!quotaConsumed && shouldConsumeQuotaFromResult(result)) {
@@ -722,7 +732,10 @@ async function handleRouteRequest(request, env, getConfig, sourceFormatHint, opt
722
732
  if (ampContext?.threadId && options.threadAffinityStore) {
723
733
  options.threadAffinityStore.setAffinity(ampContext.threadId, entry.candidateKey);
724
734
  }
725
- return withRouteDebugHeaders(result.response, routeDebug);
735
+ const finalResponse = toolNameMap
736
+ ? reverseToolNamesInResponse(result.response, toolNameMap, stream)
737
+ : result.response;
738
+ return withRouteDebugHeaders(finalResponse, routeDebug);
726
739
  }
727
740
 
728
741
  classification = await classifyFailureResult(result, retryPolicy);
@@ -10,12 +10,14 @@ export const CLAUDE_CODE_THINKING_LEVEL_VALUES = Object.freeze([
10
10
  "low",
11
11
  "medium",
12
12
  "high",
13
+ "xhigh",
13
14
  "max"
14
15
  ]);
15
16
  export const CLAUDE_CODE_THINKING_TOKENS_BY_LEVEL = Object.freeze({
16
17
  low: 4096,
17
18
  medium: 12000,
18
19
  high: 24000,
20
+ xhigh: 28000,
19
21
  max: 31999
20
22
  });
21
23
  export const CLAUDE_CODE_EFFORT_LEVEL_VALUES = CLAUDE_CODE_THINKING_LEVEL_VALUES;
@@ -45,6 +47,7 @@ export function mapClaudeCodeThinkingTokensToLevel(value) {
45
47
  const parsed = Number(value);
46
48
  if (!Number.isFinite(parsed) || parsed <= 0) return "";
47
49
  if (parsed >= CLAUDE_CODE_THINKING_TOKENS_BY_LEVEL.max) return "max";
50
+ if (parsed >= CLAUDE_CODE_THINKING_TOKENS_BY_LEVEL.xhigh) return "xhigh";
48
51
  if (parsed >= CLAUDE_CODE_THINKING_TOKENS_BY_LEVEL.high) return "high";
49
52
  if (parsed >= 6000) return "medium";
50
53
  return "low";