@khanglvm/llm-router 2.3.0 → 2.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +5 -0
- package/README.md +2 -2
- package/package.json +1 -1
- package/src/cli/router-module.js +32 -5
- package/src/node/coding-tool-config.js +138 -25
- package/src/node/large-request-log.js +54 -0
- package/src/node/litellm-context-catalog.js +13 -1
- package/src/node/local-server.js +10 -0
- package/src/node/ollama-client.js +195 -0
- package/src/node/ollama-hardware.js +94 -0
- package/src/node/ollama-install.js +230 -0
- package/src/node/provider-probe.js +69 -5
- package/src/node/web-console-client.js +36 -36
- package/src/node/web-console-server.js +478 -8
- package/src/node/web-console-styles.generated.js +1 -1
- package/src/node/web-console-ui/amp-utils.js +272 -0
- package/src/node/web-console-ui/api-client.js +128 -0
- package/src/node/web-console-ui/capability-utils.js +36 -0
- package/src/node/web-console-ui/config-editor-utils.js +20 -5
- package/src/node/web-console-ui/constants.js +140 -0
- package/src/node/web-console-ui/context-window-utils.js +262 -0
- package/src/node/web-console-ui/hooks/use-reorder-layout-animation.js +65 -0
- package/src/node/web-console-ui/provider-presets.js +211 -0
- package/src/node/web-console-ui/quick-start-utils.js +790 -0
- package/src/node/web-console-ui/utils.js +353 -0
- package/src/node/web-console-ui/web-search-utils.js +460 -0
- package/src/runtime/config.js +96 -9
- package/src/runtime/handler/fallback.js +71 -0
- package/src/runtime/handler/field-filter.js +39 -0
- package/src/runtime/handler/large-request-log.js +211 -0
- package/src/runtime/handler/provider-call.js +185 -15
- package/src/runtime/handler/reasoning-effort.js +11 -1
- package/src/runtime/handler/tool-name-sanitizer.js +258 -0
- package/src/runtime/handler.js +16 -3
- package/src/shared/coding-tool-bindings.js +3 -0
|
@@ -0,0 +1,211 @@
|
|
|
1
|
+
import { JSON_HEADERS } from "./constants.js";
|
|
2
|
+
import { CODEX_SUBSCRIPTION_MODELS, CLAUDE_CODE_SUBSCRIPTION_MODELS } from "../../runtime/subscription-constants.js";
|
|
3
|
+
|
|
4
|
+
/** Factory for OpenAI-compatible model discovery via /models endpoint. */
|
|
5
|
+
function createOpenAICompatDiscover(endpoint, { requiresAuth = true } = {}) {
|
|
6
|
+
return Object.freeze({
|
|
7
|
+
requiresAuth,
|
|
8
|
+
fetchModels: async ({ apiKey, apiKeyEnv } = {}) => {
|
|
9
|
+
const body = { endpoints: [endpoint] };
|
|
10
|
+
if (apiKey) body.apiKey = apiKey;
|
|
11
|
+
if (apiKeyEnv) body.apiKeyEnv = apiKeyEnv;
|
|
12
|
+
const res = await fetchJson("/api/config/discover-provider-models", {
|
|
13
|
+
method: "POST",
|
|
14
|
+
headers: JSON_HEADERS,
|
|
15
|
+
body: JSON.stringify(body)
|
|
16
|
+
});
|
|
17
|
+
return (res.result?.models || []).map((id) => String(id || "").trim()).filter(Boolean);
|
|
18
|
+
}
|
|
19
|
+
});
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
/** Unified provider preset registry — single source of truth for all presets. */
|
|
23
|
+
const PROVIDER_PRESETS = Object.freeze([
|
|
24
|
+
// ── API presets ──
|
|
25
|
+
Object.freeze({
|
|
26
|
+
key: "custom",
|
|
27
|
+
category: "api",
|
|
28
|
+
label: "Custom",
|
|
29
|
+
description: "Generic OpenAI-compatible API provider.",
|
|
30
|
+
providerName: "My Provider",
|
|
31
|
+
providerId: "my-provider",
|
|
32
|
+
endpoint: "",
|
|
33
|
+
apiKeyEnv: "",
|
|
34
|
+
defaultModels: Object.freeze({ openai: Object.freeze(["gpt-4o-mini", "gpt-4.1-mini"]), claude: Object.freeze(["claude-3-5-sonnet", "claude-3-5-haiku"]) }),
|
|
35
|
+
rateLimitDefaults: Object.freeze({ limit: 60, windowValue: 1, windowUnit: "minute" })
|
|
36
|
+
}),
|
|
37
|
+
Object.freeze({
|
|
38
|
+
key: "groq",
|
|
39
|
+
category: "api",
|
|
40
|
+
label: "Groq",
|
|
41
|
+
description: "Groq cloud inference with Llama, Qwen, and GPT-OSS models.",
|
|
42
|
+
providerName: "Groq",
|
|
43
|
+
providerId: "groq",
|
|
44
|
+
endpoint: "https://api.groq.com/openai/v1",
|
|
45
|
+
apiKeyEnv: "",
|
|
46
|
+
defaultModels: Object.freeze(["llama-3.3-70b-versatile", "llama-3.1-8b-instant"]),
|
|
47
|
+
rateLimitDefaults: Object.freeze({ limit: 30, windowValue: 1, windowUnit: "minute" }),
|
|
48
|
+
freeTierRpm: Object.freeze({
|
|
49
|
+
host: "api.groq.com",
|
|
50
|
+
models: Object.freeze({
|
|
51
|
+
"llama-3.1-8b-instant": 30,
|
|
52
|
+
"llama-3.3-70b-versatile": 30,
|
|
53
|
+
"openai/gpt-oss-20b": 30,
|
|
54
|
+
"openai/gpt-oss-120b": 15,
|
|
55
|
+
"qwen/qwen3-32b": 30,
|
|
56
|
+
"meta-llama/llama-4-scout-17b-16e-instruct": 15,
|
|
57
|
+
"moonshotai/kimi-k2-instruct": 15,
|
|
58
|
+
"_default": 30
|
|
59
|
+
})
|
|
60
|
+
}),
|
|
61
|
+
discover: createOpenAICompatDiscover("https://api.groq.com/openai/v1")
|
|
62
|
+
}),
|
|
63
|
+
Object.freeze({
|
|
64
|
+
key: "gemini",
|
|
65
|
+
category: "api",
|
|
66
|
+
label: "Google Gemini",
|
|
67
|
+
description: "Google Gemini models via OpenAI-compatible endpoint.",
|
|
68
|
+
providerName: "Google Gemini",
|
|
69
|
+
providerId: "gemini",
|
|
70
|
+
endpoint: "https://generativelanguage.googleapis.com/v1beta/openai",
|
|
71
|
+
apiKeyEnv: "",
|
|
72
|
+
defaultModels: Object.freeze(["gemini-3-flash-preview", "gemini-3.1-flash-lite-preview"]),
|
|
73
|
+
rateLimitDefaults: Object.freeze({ limit: 10, windowValue: 1, windowUnit: "minute" }),
|
|
74
|
+
freeTierRpm: Object.freeze({
|
|
75
|
+
host: "generativelanguage.googleapis.com",
|
|
76
|
+
models: Object.freeze({
|
|
77
|
+
"gemini-3-flash-preview": 15,
|
|
78
|
+
"gemini-3.1-flash-lite-preview": 15,
|
|
79
|
+
"gemini-3.1-pro-preview": 5,
|
|
80
|
+
"gemini-2.5-flash": 15,
|
|
81
|
+
"gemini-2.5-flash-lite": 15,
|
|
82
|
+
"gemini-2.5-pro": 5,
|
|
83
|
+
"_default": 10
|
|
84
|
+
})
|
|
85
|
+
}),
|
|
86
|
+
discover: createOpenAICompatDiscover("https://generativelanguage.googleapis.com/v1beta/openai")
|
|
87
|
+
}),
|
|
88
|
+
Object.freeze({
|
|
89
|
+
key: "zai-global",
|
|
90
|
+
category: "api",
|
|
91
|
+
label: "Z.AI Coding (Global)",
|
|
92
|
+
description: "Z.AI coding models via the global coding endpoint.",
|
|
93
|
+
providerName: "Z.AI Coding",
|
|
94
|
+
providerId: "zai-coding",
|
|
95
|
+
endpoint: "https://api.z.ai/api/coding/paas/v4",
|
|
96
|
+
apiKeyEnv: "",
|
|
97
|
+
defaultModels: Object.freeze(["glm-5.1", "glm-5V-turbo", "glm-4.7", "glm-4.7-flash"]),
|
|
98
|
+
rateLimitDefaults: Object.freeze({ limit: 60, windowValue: 1, windowUnit: "minute" }),
|
|
99
|
+
discover: createOpenAICompatDiscover("https://api.z.ai/api/coding/paas/v4")
|
|
100
|
+
}),
|
|
101
|
+
Object.freeze({
|
|
102
|
+
key: "zai-china",
|
|
103
|
+
category: "api",
|
|
104
|
+
label: "Z.AI Coding (China)",
|
|
105
|
+
description: "Z.AI coding models via the China mainland coding endpoint.",
|
|
106
|
+
providerName: "Z.AI Coding CN",
|
|
107
|
+
providerId: "zai-coding-cn",
|
|
108
|
+
endpoint: "https://open.bigmodel.cn/api/coding/paas/v4",
|
|
109
|
+
apiKeyEnv: "",
|
|
110
|
+
defaultModels: Object.freeze(["glm-5.1", "glm-5V-turbo", "glm-4.7", "glm-4.7-flash"]),
|
|
111
|
+
rateLimitDefaults: Object.freeze({ limit: 60, windowValue: 1, windowUnit: "minute" }),
|
|
112
|
+
discover: createOpenAICompatDiscover("https://open.bigmodel.cn/api/coding/paas/v4")
|
|
113
|
+
}),
|
|
114
|
+
Object.freeze({
|
|
115
|
+
key: "openrouter",
|
|
116
|
+
category: "api",
|
|
117
|
+
label: "OpenRouter",
|
|
118
|
+
description: "300+ models from multiple providers, including free tier models.",
|
|
119
|
+
providerName: "OpenRouter",
|
|
120
|
+
providerId: "openrouter",
|
|
121
|
+
endpoint: "https://openrouter.ai/api/v1",
|
|
122
|
+
apiKeyEnv: "",
|
|
123
|
+
defaultModels: Object.freeze(["qwen/qwen3.6-plus:free", "google/gemma-4-26b-a4b-it"]),
|
|
124
|
+
rateLimitDefaults: Object.freeze({ limit: 200, windowValue: 1, windowUnit: "minute" }),
|
|
125
|
+
discover: createOpenAICompatDiscover("https://openrouter.ai/api/v1", { requiresAuth: false })
|
|
126
|
+
}),
|
|
127
|
+
|
|
128
|
+
// ── Subscription (OAuth) presets ──
|
|
129
|
+
Object.freeze({
|
|
130
|
+
key: "oauth-gpt",
|
|
131
|
+
category: "subscription",
|
|
132
|
+
label: "ChatGPT",
|
|
133
|
+
description: "Use ChatGPT subscription login with GPT models.",
|
|
134
|
+
providerName: "ChatGPT Subscription",
|
|
135
|
+
providerId: "chatgpt-sub",
|
|
136
|
+
subscriptionType: "chatgpt-codex",
|
|
137
|
+
format: "openai",
|
|
138
|
+
defaultModels: CODEX_SUBSCRIPTION_MODELS,
|
|
139
|
+
rateLimitDefaults: Object.freeze({ limit: 999999, windowValue: 1, windowUnit: "month" }),
|
|
140
|
+
warning: "chatgpt-tos"
|
|
141
|
+
}),
|
|
142
|
+
Object.freeze({
|
|
143
|
+
key: "oauth-claude",
|
|
144
|
+
category: "subscription",
|
|
145
|
+
label: "Claude",
|
|
146
|
+
description: "Use Claude Code subscription login with Claude models.",
|
|
147
|
+
providerName: "Claude Subscription",
|
|
148
|
+
providerId: "claude-sub",
|
|
149
|
+
subscriptionType: "claude-code",
|
|
150
|
+
format: "claude",
|
|
151
|
+
defaultModels: CLAUDE_CODE_SUBSCRIPTION_MODELS,
|
|
152
|
+
rateLimitDefaults: Object.freeze({ limit: 999999, windowValue: 1, windowUnit: "month" }),
|
|
153
|
+
warning: "claude-extra-usage"
|
|
154
|
+
})
|
|
155
|
+
]);
|
|
156
|
+
|
|
157
|
+
/** Index helpers for PROVIDER_PRESETS registry. */
|
|
158
|
+
const PROVIDER_PRESET_BY_KEY = Object.freeze(Object.fromEntries(PROVIDER_PRESETS.map((p) => [p.key, p])));
|
|
159
|
+
|
|
160
|
+
function findPresetByKey(key) {
|
|
161
|
+
return PROVIDER_PRESET_BY_KEY[key] || PROVIDER_PRESET_BY_KEY.custom;
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
function findPresetByHost(hostname) {
|
|
165
|
+
return PROVIDER_PRESETS.find((p) => p.freeTierRpm?.host === hostname) || null;
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
function getPresetOptionsByCategory(category) {
|
|
169
|
+
return PROVIDER_PRESETS.filter((p) => p.category === category);
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
/** Build the free-tier RPM lookup map keyed by hostname (used by detectPresetHostFromEndpoints). */
|
|
173
|
+
const PROVIDER_PRESET_FREE_TIER_RPM_BY_HOST = Object.freeze(
|
|
174
|
+
Object.fromEntries(
|
|
175
|
+
PROVIDER_PRESETS
|
|
176
|
+
.filter((p) => p.freeTierRpm?.host && p.freeTierRpm?.models)
|
|
177
|
+
.map((p) => [p.freeTierRpm.host, p.freeTierRpm.models])
|
|
178
|
+
)
|
|
179
|
+
);
|
|
180
|
+
|
|
181
|
+
/** Module-level cache for preset model discovery — survives React re-renders. */
|
|
182
|
+
const presetModelCache = new Map();
|
|
183
|
+
let _presetInitPromise = null;
|
|
184
|
+
|
|
185
|
+
/** Non-blocking background init: fetches models for presets that don't require auth. */
|
|
186
|
+
function initPresetModels() {
|
|
187
|
+
if (_presetInitPromise) return _presetInitPromise;
|
|
188
|
+
_presetInitPromise = Promise.allSettled(
|
|
189
|
+
PROVIDER_PRESETS
|
|
190
|
+
.filter((p) => p.discover && !p.discover.requiresAuth)
|
|
191
|
+
.map(async (preset) => {
|
|
192
|
+
try {
|
|
193
|
+
const models = await preset.discover.fetchModels();
|
|
194
|
+
if (models.length) presetModelCache.set(preset.key, models);
|
|
195
|
+
} catch { /* background — swallow errors */ }
|
|
196
|
+
})
|
|
197
|
+
);
|
|
198
|
+
return _presetInitPromise;
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
export {
|
|
202
|
+
createOpenAICompatDiscover,
|
|
203
|
+
PROVIDER_PRESETS,
|
|
204
|
+
PROVIDER_PRESET_BY_KEY,
|
|
205
|
+
findPresetByKey,
|
|
206
|
+
findPresetByHost,
|
|
207
|
+
getPresetOptionsByCategory,
|
|
208
|
+
PROVIDER_PRESET_FREE_TIER_RPM_BY_HOST,
|
|
209
|
+
presetModelCache,
|
|
210
|
+
initPresetModels
|
|
211
|
+
};
|