@khanglvm/llm-router 2.4.0 → 2.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +22 -0
- package/README.md +12 -0
- package/package.json +2 -1
- package/src/node/huggingface-gguf.js +273 -0
- package/src/node/llamacpp-runtime.js +309 -0
- package/src/node/local-model-browser.js +132 -0
- package/src/node/local-model-capacity.js +39 -0
- package/src/node/local-models-service.js +238 -0
- package/src/node/start-command.js +12 -0
- package/src/node/web-console-client.js +27 -27
- package/src/node/web-console-server.js +575 -0
- package/src/node/web-console-styles.generated.js +1 -1
- package/src/node/web-console-ui/api-client.js +94 -0
- package/src/node/web-console-ui/local-models-utils.js +138 -0
- package/src/runtime/config.js +22 -7
- package/src/runtime/handler/provider-translation.js +5 -5
- package/src/runtime/local-models.js +168 -0
- package/src/translator/response/openai-to-claude.js +70 -9
package/src/runtime/config.js
CHANGED
|
@@ -9,6 +9,12 @@ import {
|
|
|
9
9
|
CLAUDE_CODE_SUBSCRIPTION_MODELS
|
|
10
10
|
} from "./subscription-constants.js";
|
|
11
11
|
import { sanitizeRuntimeMetadata } from "../shared/local-router-defaults.js";
|
|
12
|
+
import {
|
|
13
|
+
LOCAL_RUNTIME_PROVIDER_TYPE,
|
|
14
|
+
collectDuplicateLocalVariantModelIds,
|
|
15
|
+
materializeLocalVariantProvider,
|
|
16
|
+
normalizeLocalModelsMetadata
|
|
17
|
+
} from "./local-models.js";
|
|
12
18
|
|
|
13
19
|
export const CONFIG_VERSION = 2;
|
|
14
20
|
export const MIN_SUPPORTED_CONFIG_VERSION = 1;
|
|
@@ -1769,12 +1775,15 @@ export function normalizeRuntimeConfig(rawConfig, options = {}) {
|
|
|
1769
1775
|
const raw = shouldMigrate
|
|
1770
1776
|
? migrateRuntimeConfig(rawInput, { targetVersion })
|
|
1771
1777
|
: rawInput;
|
|
1772
|
-
const
|
|
1773
|
-
|
|
1774
|
-
.
|
|
1775
|
-
|
|
1776
|
-
|
|
1777
|
-
|
|
1778
|
+
const localModels = normalizeLocalModelsMetadata(raw.metadata?.localModels);
|
|
1779
|
+
const providers = sanitizeModelFallbackReferences([
|
|
1780
|
+
...toArray(raw.providers)
|
|
1781
|
+
.map(normalizeProvider)
|
|
1782
|
+
.filter(Boolean)
|
|
1783
|
+
.filter((provider) => provider.enabled !== false)
|
|
1784
|
+
.filter((provider) => provider.type !== LOCAL_RUNTIME_PROVIDER_TYPE),
|
|
1785
|
+
...materializeLocalVariantProvider({ metadata: { localModels } })
|
|
1786
|
+
]);
|
|
1778
1787
|
const modelAliasResult = normalizeModelAliases(raw.modelAliases || raw["model-aliases"]);
|
|
1779
1788
|
const rawDefaultModel = typeof raw.defaultModel === "string"
|
|
1780
1789
|
? raw.defaultModel
|
|
@@ -1816,7 +1825,10 @@ export function normalizeRuntimeConfig(rawConfig, options = {}) {
|
|
|
1816
1825
|
...(webSearch ? { webSearch } : {}),
|
|
1817
1826
|
...(claudeCode && Object.keys(claudeCode).length > 0 ? { claudeCode } : {}),
|
|
1818
1827
|
ollama,
|
|
1819
|
-
metadata: sanitizeRuntimeMetadata(
|
|
1828
|
+
metadata: sanitizeRuntimeMetadata({
|
|
1829
|
+
...(normalizeMetadataObject(raw.metadata) || {}),
|
|
1830
|
+
localModels
|
|
1831
|
+
})
|
|
1820
1832
|
};
|
|
1821
1833
|
Object.defineProperty(normalized, NORMALIZATION_ISSUES_SYMBOL, {
|
|
1822
1834
|
value: {
|
|
@@ -2185,6 +2197,9 @@ export function validateRuntimeConfig(config, { requireMasterKey = false, requir
|
|
|
2185
2197
|
validateProviderRateLimits(config, routingIndex, errors);
|
|
2186
2198
|
validateModelAliases(config, routingIndex, errors);
|
|
2187
2199
|
validateAmpConfig(config, routingIndex, errors);
|
|
2200
|
+
for (const duplicateModelId of collectDuplicateLocalVariantModelIds(config.metadata?.localModels)) {
|
|
2201
|
+
errors.push(`Duplicate local variant model id '${duplicateModelId}'.`);
|
|
2202
|
+
}
|
|
2188
2203
|
|
|
2189
2204
|
if (requireMasterKey && !config.masterKey) {
|
|
2190
2205
|
errors.push("masterKey is required for worker deployment/export.");
|
|
@@ -3,7 +3,10 @@ import {
|
|
|
3
3
|
claudeEventToOpenAIChunks,
|
|
4
4
|
initClaudeToOpenAIState
|
|
5
5
|
} from "../../translator/response/claude-to-openai.js";
|
|
6
|
-
import {
|
|
6
|
+
import {
|
|
7
|
+
finalizeOpenAIToClaudeStream,
|
|
8
|
+
normalizeOpenAIUsageToClaude
|
|
9
|
+
} from "../../translator/response/openai-to-claude.js";
|
|
7
10
|
import { passthroughResponseWithCors, withCorsHeaders } from "./http.js";
|
|
8
11
|
|
|
9
12
|
function normalizeOpenAIContent(content) {
|
|
@@ -120,10 +123,7 @@ export function convertOpenAINonStreamToClaude(result, fallbackModel = "unknown"
|
|
|
120
123
|
content,
|
|
121
124
|
stop_reason: convertOpenAIFinishReason(resolveOpenAINonStreamFinishReason(choice)),
|
|
122
125
|
stop_sequence: null,
|
|
123
|
-
usage:
|
|
124
|
-
input_tokens: result?.usage?.prompt_tokens || 0,
|
|
125
|
-
output_tokens: result?.usage?.completion_tokens || 0
|
|
126
|
-
}
|
|
126
|
+
usage: normalizeOpenAIUsageToClaude(result?.usage)
|
|
127
127
|
};
|
|
128
128
|
}
|
|
129
129
|
|
|
@@ -0,0 +1,168 @@
|
|
|
1
|
+
export const LOCAL_RUNTIME_PROVIDER_TYPE = "local-runtime";
|
|
2
|
+
export const LOCAL_RUNTIME_PROVIDER_ID = "local-models";
|
|
3
|
+
export const LOCAL_RUNTIME_BASE_URL = "http://127.0.0.1:39391/v1";
|
|
4
|
+
|
|
5
|
+
function isPlainObject(value) {
|
|
6
|
+
return Boolean(value) && typeof value === "object" && !Array.isArray(value);
|
|
7
|
+
}
|
|
8
|
+
|
|
9
|
+
function clonePlainObject(value) {
|
|
10
|
+
return isPlainObject(value) ? { ...value } : {};
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
function normalizeString(value) {
|
|
14
|
+
return typeof value === "string" ? value.trim() : "";
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
function normalizePositiveNumber(value) {
|
|
18
|
+
const parsed = Number(value);
|
|
19
|
+
if (!Number.isFinite(parsed) || parsed <= 0) return undefined;
|
|
20
|
+
return Math.floor(parsed);
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
function normalizeLocalModelLibraryEntry(key, entry) {
|
|
24
|
+
if (!isPlainObject(entry)) return null;
|
|
25
|
+
|
|
26
|
+
const normalized = {
|
|
27
|
+
...entry,
|
|
28
|
+
id: normalizeString(entry.id) || key
|
|
29
|
+
};
|
|
30
|
+
|
|
31
|
+
for (const field of ["source", "displayName", "path", "availability"]) {
|
|
32
|
+
if (field in normalized) {
|
|
33
|
+
const value = normalizeString(normalized[field]);
|
|
34
|
+
if (value) normalized[field] = value;
|
|
35
|
+
else delete normalized[field];
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
return normalized;
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
function normalizeLocalModelVariantEntry(key, entry) {
|
|
43
|
+
if (!isPlainObject(entry)) return null;
|
|
44
|
+
|
|
45
|
+
const normalized = {
|
|
46
|
+
...entry,
|
|
47
|
+
key: normalizeString(entry.key) || key,
|
|
48
|
+
baseModelId: normalizeString(entry.baseModelId),
|
|
49
|
+
id: normalizeString(entry.id),
|
|
50
|
+
name: normalizeString(entry.name),
|
|
51
|
+
runtime: normalizeString(entry.runtime),
|
|
52
|
+
enabled: entry.enabled === true,
|
|
53
|
+
preload: entry.preload === true
|
|
54
|
+
};
|
|
55
|
+
|
|
56
|
+
if ("preset" in normalized) {
|
|
57
|
+
const preset = normalizeString(normalized.preset);
|
|
58
|
+
if (preset) normalized.preset = preset;
|
|
59
|
+
else delete normalized.preset;
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
const contextWindow = normalizePositiveNumber(entry.contextWindow);
|
|
63
|
+
if (contextWindow !== undefined) normalized.contextWindow = contextWindow;
|
|
64
|
+
else delete normalized.contextWindow;
|
|
65
|
+
|
|
66
|
+
const estimatedBytes = normalizePositiveNumber(entry.estimatedBytes);
|
|
67
|
+
if (estimatedBytes !== undefined) normalized.estimatedBytes = estimatedBytes;
|
|
68
|
+
else delete normalized.estimatedBytes;
|
|
69
|
+
|
|
70
|
+
if (isPlainObject(entry.capabilities)) normalized.capabilities = { ...entry.capabilities };
|
|
71
|
+
else delete normalized.capabilities;
|
|
72
|
+
|
|
73
|
+
if ("availability" in normalized) {
|
|
74
|
+
const availability = normalizeString(normalized.availability);
|
|
75
|
+
if (availability) normalized.availability = availability;
|
|
76
|
+
else delete normalized.availability;
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
return normalized;
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
export function normalizeLocalModelsMetadata(raw = {}) {
|
|
83
|
+
const source = isPlainObject(raw) ? raw : {};
|
|
84
|
+
const runtime = clonePlainObject(source.runtime);
|
|
85
|
+
const capacity = clonePlainObject(source.capacity);
|
|
86
|
+
const library = {};
|
|
87
|
+
const variants = {};
|
|
88
|
+
|
|
89
|
+
for (const [key, value] of Object.entries(clonePlainObject(source.library))) {
|
|
90
|
+
const normalizedEntry = normalizeLocalModelLibraryEntry(normalizeString(key), value);
|
|
91
|
+
if (!normalizedEntry) continue;
|
|
92
|
+
library[normalizedEntry.id || key] = normalizedEntry;
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
for (const [key, value] of Object.entries(clonePlainObject(source.variants))) {
|
|
96
|
+
const normalizedEntry = normalizeLocalModelVariantEntry(normalizeString(key), value);
|
|
97
|
+
if (!normalizedEntry?.key) continue;
|
|
98
|
+
variants[normalizedEntry.key] = normalizedEntry;
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
return {
|
|
102
|
+
runtime,
|
|
103
|
+
library,
|
|
104
|
+
variants,
|
|
105
|
+
capacity
|
|
106
|
+
};
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
export function collectDuplicateLocalVariantModelIds(localModelsMetadata) {
|
|
110
|
+
const metadata = normalizeLocalModelsMetadata(localModelsMetadata);
|
|
111
|
+
const seen = new Set();
|
|
112
|
+
const duplicates = new Set();
|
|
113
|
+
|
|
114
|
+
for (const variant of Object.values(metadata.variants)) {
|
|
115
|
+
const modelId = normalizeString(variant?.id);
|
|
116
|
+
if (!modelId) continue;
|
|
117
|
+
if (seen.has(modelId)) duplicates.add(modelId);
|
|
118
|
+
else seen.add(modelId);
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
return [...duplicates];
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
export function materializeLocalVariantProvider(config = {}) {
|
|
125
|
+
const metadata = normalizeLocalModelsMetadata(config?.metadata?.localModels);
|
|
126
|
+
const models = [];
|
|
127
|
+
|
|
128
|
+
for (const variant of Object.values(metadata.variants)) {
|
|
129
|
+
if (!variant || variant.enabled !== true) continue;
|
|
130
|
+
if (!variant.id) continue;
|
|
131
|
+
|
|
132
|
+
const baseModel = metadata.library[variant.baseModelId] || null;
|
|
133
|
+
const materialized = {
|
|
134
|
+
id: variant.id,
|
|
135
|
+
enabled: true,
|
|
136
|
+
metadata: {
|
|
137
|
+
localVariantKey: variant.key,
|
|
138
|
+
baseModelId: variant.baseModelId,
|
|
139
|
+
runtime: variant.runtime,
|
|
140
|
+
preload: variant.preload === true,
|
|
141
|
+
availability: variant.availability || baseModel?.availability || "available",
|
|
142
|
+
capacityState: variant.capacityState,
|
|
143
|
+
estimatedBytes: variant.estimatedBytes
|
|
144
|
+
}
|
|
145
|
+
};
|
|
146
|
+
|
|
147
|
+
if (variant.name) materialized.name = variant.name;
|
|
148
|
+
if (variant.contextWindow !== undefined) materialized.contextWindow = variant.contextWindow;
|
|
149
|
+
if (isPlainObject(variant.capabilities)) materialized.capabilities = { ...variant.capabilities };
|
|
150
|
+
|
|
151
|
+
models.push(materialized);
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
if (models.length === 0) return [];
|
|
155
|
+
|
|
156
|
+
return [{
|
|
157
|
+
id: LOCAL_RUNTIME_PROVIDER_ID,
|
|
158
|
+
name: "Local Models",
|
|
159
|
+
type: LOCAL_RUNTIME_PROVIDER_TYPE,
|
|
160
|
+
baseUrl: LOCAL_RUNTIME_BASE_URL,
|
|
161
|
+
format: "openai",
|
|
162
|
+
formats: ["openai"],
|
|
163
|
+
apiKey: "local-runtime",
|
|
164
|
+
enabled: true,
|
|
165
|
+
models,
|
|
166
|
+
rateLimits: []
|
|
167
|
+
}];
|
|
168
|
+
}
|
|
@@ -4,6 +4,73 @@
|
|
|
4
4
|
|
|
5
5
|
import { FORMATS } from "../formats.js";
|
|
6
6
|
|
|
7
|
+
const DEFAULT_CLAUDE_SERVER_TOOL_USE = Object.freeze({
|
|
8
|
+
web_search_requests: 0,
|
|
9
|
+
web_fetch_requests: 0
|
|
10
|
+
});
|
|
11
|
+
|
|
12
|
+
const DEFAULT_CLAUDE_CACHE_CREATION = Object.freeze({
|
|
13
|
+
ephemeral_1h_input_tokens: 0,
|
|
14
|
+
ephemeral_5m_input_tokens: 0
|
|
15
|
+
});
|
|
16
|
+
|
|
17
|
+
function toNonNegativeNumber(value, fallback = 0) {
|
|
18
|
+
const parsed = Number(value);
|
|
19
|
+
return Number.isFinite(parsed) && parsed >= 0 ? parsed : fallback;
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
function normalizeClaudeServerToolUse(value) {
|
|
23
|
+
if (!value || typeof value !== "object" || Array.isArray(value)) {
|
|
24
|
+
return { ...DEFAULT_CLAUDE_SERVER_TOOL_USE };
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
return {
|
|
28
|
+
web_search_requests: toNonNegativeNumber(value.web_search_requests),
|
|
29
|
+
web_fetch_requests: toNonNegativeNumber(value.web_fetch_requests)
|
|
30
|
+
};
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
function normalizeClaudeCacheCreation(value) {
|
|
34
|
+
if (!value || typeof value !== "object" || Array.isArray(value)) {
|
|
35
|
+
return { ...DEFAULT_CLAUDE_CACHE_CREATION };
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
return {
|
|
39
|
+
ephemeral_1h_input_tokens: toNonNegativeNumber(value.ephemeral_1h_input_tokens),
|
|
40
|
+
ephemeral_5m_input_tokens: toNonNegativeNumber(value.ephemeral_5m_input_tokens)
|
|
41
|
+
};
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
export function normalizeOpenAIUsageToClaude(rawUsage) {
|
|
45
|
+
const usage = rawUsage && typeof rawUsage === "object" && !Array.isArray(rawUsage)
|
|
46
|
+
? rawUsage
|
|
47
|
+
: {};
|
|
48
|
+
const cacheCreation = normalizeClaudeCacheCreation(usage.cache_creation);
|
|
49
|
+
const inputTokens = usage.prompt_tokens ?? usage.input_tokens;
|
|
50
|
+
const outputTokens = usage.completion_tokens ?? usage.output_tokens;
|
|
51
|
+
const cacheCreationInputTokens = usage.cache_creation_input_tokens
|
|
52
|
+
?? (cacheCreation.ephemeral_1h_input_tokens + cacheCreation.ephemeral_5m_input_tokens);
|
|
53
|
+
const speed = typeof usage.speed === "string" && usage.speed.trim()
|
|
54
|
+
? usage.speed.trim()
|
|
55
|
+
: "standard";
|
|
56
|
+
const serviceTier = typeof usage.service_tier === "string" && usage.service_tier.trim()
|
|
57
|
+
? usage.service_tier.trim()
|
|
58
|
+
: "standard";
|
|
59
|
+
|
|
60
|
+
return {
|
|
61
|
+
input_tokens: toNonNegativeNumber(inputTokens),
|
|
62
|
+
cache_creation_input_tokens: toNonNegativeNumber(cacheCreationInputTokens),
|
|
63
|
+
cache_read_input_tokens: toNonNegativeNumber(usage.cache_read_input_tokens),
|
|
64
|
+
output_tokens: toNonNegativeNumber(outputTokens),
|
|
65
|
+
server_tool_use: normalizeClaudeServerToolUse(usage.server_tool_use),
|
|
66
|
+
service_tier: serviceTier,
|
|
67
|
+
cache_creation: cacheCreation,
|
|
68
|
+
inference_geo: typeof usage.inference_geo === "string" ? usage.inference_geo : "",
|
|
69
|
+
iterations: Array.isArray(usage.iterations) ? usage.iterations : [],
|
|
70
|
+
speed
|
|
71
|
+
};
|
|
72
|
+
}
|
|
73
|
+
|
|
7
74
|
/**
|
|
8
75
|
* Convert OpenAI stream chunk to Claude format
|
|
9
76
|
*/
|
|
@@ -16,13 +83,7 @@ export function openaiToClaudeResponse(chunk, state) {
|
|
|
16
83
|
|
|
17
84
|
// Track usage
|
|
18
85
|
if (chunk.usage && typeof chunk.usage === "object") {
|
|
19
|
-
|
|
20
|
-
const outputTokens = chunk.usage.completion_tokens || 0;
|
|
21
|
-
|
|
22
|
-
state.usage = {
|
|
23
|
-
input_tokens: promptTokens,
|
|
24
|
-
output_tokens: outputTokens
|
|
25
|
-
};
|
|
86
|
+
state.usage = normalizeOpenAIUsageToClaude(chunk.usage);
|
|
26
87
|
}
|
|
27
88
|
|
|
28
89
|
// First chunk - send message_start
|
|
@@ -264,7 +325,7 @@ function ensureMessageStart(state, results, chunk = undefined) {
|
|
|
264
325
|
content: [],
|
|
265
326
|
stop_reason: null,
|
|
266
327
|
stop_sequence: null,
|
|
267
|
-
usage:
|
|
328
|
+
usage: normalizeOpenAIUsageToClaude(state.usage)
|
|
268
329
|
}
|
|
269
330
|
});
|
|
270
331
|
}
|
|
@@ -297,7 +358,7 @@ export function finalizeOpenAIToClaudeStream(state, { force = false } = {}) {
|
|
|
297
358
|
results.push({
|
|
298
359
|
type: "message_delta",
|
|
299
360
|
delta: { stop_reason: convertFinishReason(normalizedFinishReason) },
|
|
300
|
-
usage: state.usage
|
|
361
|
+
usage: normalizeOpenAIUsageToClaude(state.usage)
|
|
301
362
|
});
|
|
302
363
|
state.messageDeltaSent = true;
|
|
303
364
|
}
|