@khanglvm/llm-router 2.4.0 → 2.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -9,6 +9,12 @@ import {
9
9
  CLAUDE_CODE_SUBSCRIPTION_MODELS
10
10
  } from "./subscription-constants.js";
11
11
  import { sanitizeRuntimeMetadata } from "../shared/local-router-defaults.js";
12
+ import {
13
+ LOCAL_RUNTIME_PROVIDER_TYPE,
14
+ collectDuplicateLocalVariantModelIds,
15
+ materializeLocalVariantProvider,
16
+ normalizeLocalModelsMetadata
17
+ } from "./local-models.js";
12
18
 
13
19
  export const CONFIG_VERSION = 2;
14
20
  export const MIN_SUPPORTED_CONFIG_VERSION = 1;
@@ -1769,12 +1775,15 @@ export function normalizeRuntimeConfig(rawConfig, options = {}) {
1769
1775
  const raw = shouldMigrate
1770
1776
  ? migrateRuntimeConfig(rawInput, { targetVersion })
1771
1777
  : rawInput;
1772
- const providers = sanitizeModelFallbackReferences(
1773
- toArray(raw.providers)
1774
- .map(normalizeProvider)
1775
- .filter(Boolean)
1776
- .filter((provider) => provider.enabled !== false)
1777
- );
1778
+ const localModels = normalizeLocalModelsMetadata(raw.metadata?.localModels);
1779
+ const providers = sanitizeModelFallbackReferences([
1780
+ ...toArray(raw.providers)
1781
+ .map(normalizeProvider)
1782
+ .filter(Boolean)
1783
+ .filter((provider) => provider.enabled !== false)
1784
+ .filter((provider) => provider.type !== LOCAL_RUNTIME_PROVIDER_TYPE),
1785
+ ...materializeLocalVariantProvider({ metadata: { localModels } })
1786
+ ]);
1778
1787
  const modelAliasResult = normalizeModelAliases(raw.modelAliases || raw["model-aliases"]);
1779
1788
  const rawDefaultModel = typeof raw.defaultModel === "string"
1780
1789
  ? raw.defaultModel
@@ -1816,7 +1825,10 @@ export function normalizeRuntimeConfig(rawConfig, options = {}) {
1816
1825
  ...(webSearch ? { webSearch } : {}),
1817
1826
  ...(claudeCode && Object.keys(claudeCode).length > 0 ? { claudeCode } : {}),
1818
1827
  ollama,
1819
- metadata: sanitizeRuntimeMetadata(raw.metadata)
1828
+ metadata: sanitizeRuntimeMetadata({
1829
+ ...(normalizeMetadataObject(raw.metadata) || {}),
1830
+ localModels
1831
+ })
1820
1832
  };
1821
1833
  Object.defineProperty(normalized, NORMALIZATION_ISSUES_SYMBOL, {
1822
1834
  value: {
@@ -2185,6 +2197,9 @@ export function validateRuntimeConfig(config, { requireMasterKey = false, requir
2185
2197
  validateProviderRateLimits(config, routingIndex, errors);
2186
2198
  validateModelAliases(config, routingIndex, errors);
2187
2199
  validateAmpConfig(config, routingIndex, errors);
2200
+ for (const duplicateModelId of collectDuplicateLocalVariantModelIds(config.metadata?.localModels)) {
2201
+ errors.push(`Duplicate local variant model id '${duplicateModelId}'.`);
2202
+ }
2188
2203
 
2189
2204
  if (requireMasterKey && !config.masterKey) {
2190
2205
  errors.push("masterKey is required for worker deployment/export.");
@@ -3,7 +3,10 @@ import {
3
3
  claudeEventToOpenAIChunks,
4
4
  initClaudeToOpenAIState
5
5
  } from "../../translator/response/claude-to-openai.js";
6
- import { finalizeOpenAIToClaudeStream } from "../../translator/response/openai-to-claude.js";
6
+ import {
7
+ finalizeOpenAIToClaudeStream,
8
+ normalizeOpenAIUsageToClaude
9
+ } from "../../translator/response/openai-to-claude.js";
7
10
  import { passthroughResponseWithCors, withCorsHeaders } from "./http.js";
8
11
 
9
12
  function normalizeOpenAIContent(content) {
@@ -120,10 +123,7 @@ export function convertOpenAINonStreamToClaude(result, fallbackModel = "unknown"
120
123
  content,
121
124
  stop_reason: convertOpenAIFinishReason(resolveOpenAINonStreamFinishReason(choice)),
122
125
  stop_sequence: null,
123
- usage: {
124
- input_tokens: result?.usage?.prompt_tokens || 0,
125
- output_tokens: result?.usage?.completion_tokens || 0
126
- }
126
+ usage: normalizeOpenAIUsageToClaude(result?.usage)
127
127
  };
128
128
  }
129
129
 
@@ -0,0 +1,168 @@
1
+ export const LOCAL_RUNTIME_PROVIDER_TYPE = "local-runtime";
2
+ export const LOCAL_RUNTIME_PROVIDER_ID = "local-models";
3
+ export const LOCAL_RUNTIME_BASE_URL = "http://127.0.0.1:39391/v1";
4
+
5
+ function isPlainObject(value) {
6
+ return Boolean(value) && typeof value === "object" && !Array.isArray(value);
7
+ }
8
+
9
+ function clonePlainObject(value) {
10
+ return isPlainObject(value) ? { ...value } : {};
11
+ }
12
+
13
+ function normalizeString(value) {
14
+ return typeof value === "string" ? value.trim() : "";
15
+ }
16
+
17
+ function normalizePositiveNumber(value) {
18
+ const parsed = Number(value);
19
+ if (!Number.isFinite(parsed) || parsed <= 0) return undefined;
20
+ return Math.floor(parsed);
21
+ }
22
+
23
+ function normalizeLocalModelLibraryEntry(key, entry) {
24
+ if (!isPlainObject(entry)) return null;
25
+
26
+ const normalized = {
27
+ ...entry,
28
+ id: normalizeString(entry.id) || key
29
+ };
30
+
31
+ for (const field of ["source", "displayName", "path", "availability"]) {
32
+ if (field in normalized) {
33
+ const value = normalizeString(normalized[field]);
34
+ if (value) normalized[field] = value;
35
+ else delete normalized[field];
36
+ }
37
+ }
38
+
39
+ return normalized;
40
+ }
41
+
42
+ function normalizeLocalModelVariantEntry(key, entry) {
43
+ if (!isPlainObject(entry)) return null;
44
+
45
+ const normalized = {
46
+ ...entry,
47
+ key: normalizeString(entry.key) || key,
48
+ baseModelId: normalizeString(entry.baseModelId),
49
+ id: normalizeString(entry.id),
50
+ name: normalizeString(entry.name),
51
+ runtime: normalizeString(entry.runtime),
52
+ enabled: entry.enabled === true,
53
+ preload: entry.preload === true
54
+ };
55
+
56
+ if ("preset" in normalized) {
57
+ const preset = normalizeString(normalized.preset);
58
+ if (preset) normalized.preset = preset;
59
+ else delete normalized.preset;
60
+ }
61
+
62
+ const contextWindow = normalizePositiveNumber(entry.contextWindow);
63
+ if (contextWindow !== undefined) normalized.contextWindow = contextWindow;
64
+ else delete normalized.contextWindow;
65
+
66
+ const estimatedBytes = normalizePositiveNumber(entry.estimatedBytes);
67
+ if (estimatedBytes !== undefined) normalized.estimatedBytes = estimatedBytes;
68
+ else delete normalized.estimatedBytes;
69
+
70
+ if (isPlainObject(entry.capabilities)) normalized.capabilities = { ...entry.capabilities };
71
+ else delete normalized.capabilities;
72
+
73
+ if ("availability" in normalized) {
74
+ const availability = normalizeString(normalized.availability);
75
+ if (availability) normalized.availability = availability;
76
+ else delete normalized.availability;
77
+ }
78
+
79
+ return normalized;
80
+ }
81
+
82
+ export function normalizeLocalModelsMetadata(raw = {}) {
83
+ const source = isPlainObject(raw) ? raw : {};
84
+ const runtime = clonePlainObject(source.runtime);
85
+ const capacity = clonePlainObject(source.capacity);
86
+ const library = {};
87
+ const variants = {};
88
+
89
+ for (const [key, value] of Object.entries(clonePlainObject(source.library))) {
90
+ const normalizedEntry = normalizeLocalModelLibraryEntry(normalizeString(key), value);
91
+ if (!normalizedEntry) continue;
92
+ library[normalizedEntry.id || key] = normalizedEntry;
93
+ }
94
+
95
+ for (const [key, value] of Object.entries(clonePlainObject(source.variants))) {
96
+ const normalizedEntry = normalizeLocalModelVariantEntry(normalizeString(key), value);
97
+ if (!normalizedEntry?.key) continue;
98
+ variants[normalizedEntry.key] = normalizedEntry;
99
+ }
100
+
101
+ return {
102
+ runtime,
103
+ library,
104
+ variants,
105
+ capacity
106
+ };
107
+ }
108
+
109
+ export function collectDuplicateLocalVariantModelIds(localModelsMetadata) {
110
+ const metadata = normalizeLocalModelsMetadata(localModelsMetadata);
111
+ const seen = new Set();
112
+ const duplicates = new Set();
113
+
114
+ for (const variant of Object.values(metadata.variants)) {
115
+ const modelId = normalizeString(variant?.id);
116
+ if (!modelId) continue;
117
+ if (seen.has(modelId)) duplicates.add(modelId);
118
+ else seen.add(modelId);
119
+ }
120
+
121
+ return [...duplicates];
122
+ }
123
+
124
+ export function materializeLocalVariantProvider(config = {}) {
125
+ const metadata = normalizeLocalModelsMetadata(config?.metadata?.localModels);
126
+ const models = [];
127
+
128
+ for (const variant of Object.values(metadata.variants)) {
129
+ if (!variant || variant.enabled !== true) continue;
130
+ if (!variant.id) continue;
131
+
132
+ const baseModel = metadata.library[variant.baseModelId] || null;
133
+ const materialized = {
134
+ id: variant.id,
135
+ enabled: true,
136
+ metadata: {
137
+ localVariantKey: variant.key,
138
+ baseModelId: variant.baseModelId,
139
+ runtime: variant.runtime,
140
+ preload: variant.preload === true,
141
+ availability: variant.availability || baseModel?.availability || "available",
142
+ capacityState: variant.capacityState,
143
+ estimatedBytes: variant.estimatedBytes
144
+ }
145
+ };
146
+
147
+ if (variant.name) materialized.name = variant.name;
148
+ if (variant.contextWindow !== undefined) materialized.contextWindow = variant.contextWindow;
149
+ if (isPlainObject(variant.capabilities)) materialized.capabilities = { ...variant.capabilities };
150
+
151
+ models.push(materialized);
152
+ }
153
+
154
+ if (models.length === 0) return [];
155
+
156
+ return [{
157
+ id: LOCAL_RUNTIME_PROVIDER_ID,
158
+ name: "Local Models",
159
+ type: LOCAL_RUNTIME_PROVIDER_TYPE,
160
+ baseUrl: LOCAL_RUNTIME_BASE_URL,
161
+ format: "openai",
162
+ formats: ["openai"],
163
+ apiKey: "local-runtime",
164
+ enabled: true,
165
+ models,
166
+ rateLimits: []
167
+ }];
168
+ }
@@ -4,6 +4,73 @@
4
4
 
5
5
  import { FORMATS } from "../formats.js";
6
6
 
7
+ const DEFAULT_CLAUDE_SERVER_TOOL_USE = Object.freeze({
8
+ web_search_requests: 0,
9
+ web_fetch_requests: 0
10
+ });
11
+
12
+ const DEFAULT_CLAUDE_CACHE_CREATION = Object.freeze({
13
+ ephemeral_1h_input_tokens: 0,
14
+ ephemeral_5m_input_tokens: 0
15
+ });
16
+
17
+ function toNonNegativeNumber(value, fallback = 0) {
18
+ const parsed = Number(value);
19
+ return Number.isFinite(parsed) && parsed >= 0 ? parsed : fallback;
20
+ }
21
+
22
+ function normalizeClaudeServerToolUse(value) {
23
+ if (!value || typeof value !== "object" || Array.isArray(value)) {
24
+ return { ...DEFAULT_CLAUDE_SERVER_TOOL_USE };
25
+ }
26
+
27
+ return {
28
+ web_search_requests: toNonNegativeNumber(value.web_search_requests),
29
+ web_fetch_requests: toNonNegativeNumber(value.web_fetch_requests)
30
+ };
31
+ }
32
+
33
+ function normalizeClaudeCacheCreation(value) {
34
+ if (!value || typeof value !== "object" || Array.isArray(value)) {
35
+ return { ...DEFAULT_CLAUDE_CACHE_CREATION };
36
+ }
37
+
38
+ return {
39
+ ephemeral_1h_input_tokens: toNonNegativeNumber(value.ephemeral_1h_input_tokens),
40
+ ephemeral_5m_input_tokens: toNonNegativeNumber(value.ephemeral_5m_input_tokens)
41
+ };
42
+ }
43
+
44
+ export function normalizeOpenAIUsageToClaude(rawUsage) {
45
+ const usage = rawUsage && typeof rawUsage === "object" && !Array.isArray(rawUsage)
46
+ ? rawUsage
47
+ : {};
48
+ const cacheCreation = normalizeClaudeCacheCreation(usage.cache_creation);
49
+ const inputTokens = usage.prompt_tokens ?? usage.input_tokens;
50
+ const outputTokens = usage.completion_tokens ?? usage.output_tokens;
51
+ const cacheCreationInputTokens = usage.cache_creation_input_tokens
52
+ ?? (cacheCreation.ephemeral_1h_input_tokens + cacheCreation.ephemeral_5m_input_tokens);
53
+ const speed = typeof usage.speed === "string" && usage.speed.trim()
54
+ ? usage.speed.trim()
55
+ : "standard";
56
+ const serviceTier = typeof usage.service_tier === "string" && usage.service_tier.trim()
57
+ ? usage.service_tier.trim()
58
+ : "standard";
59
+
60
+ return {
61
+ input_tokens: toNonNegativeNumber(inputTokens),
62
+ cache_creation_input_tokens: toNonNegativeNumber(cacheCreationInputTokens),
63
+ cache_read_input_tokens: toNonNegativeNumber(usage.cache_read_input_tokens),
64
+ output_tokens: toNonNegativeNumber(outputTokens),
65
+ server_tool_use: normalizeClaudeServerToolUse(usage.server_tool_use),
66
+ service_tier: serviceTier,
67
+ cache_creation: cacheCreation,
68
+ inference_geo: typeof usage.inference_geo === "string" ? usage.inference_geo : "",
69
+ iterations: Array.isArray(usage.iterations) ? usage.iterations : [],
70
+ speed
71
+ };
72
+ }
73
+
7
74
  /**
8
75
  * Convert OpenAI stream chunk to Claude format
9
76
  */
@@ -16,13 +83,7 @@ export function openaiToClaudeResponse(chunk, state) {
16
83
 
17
84
  // Track usage
18
85
  if (chunk.usage && typeof chunk.usage === "object") {
19
- const promptTokens = chunk.usage.prompt_tokens || 0;
20
- const outputTokens = chunk.usage.completion_tokens || 0;
21
-
22
- state.usage = {
23
- input_tokens: promptTokens,
24
- output_tokens: outputTokens
25
- };
86
+ state.usage = normalizeOpenAIUsageToClaude(chunk.usage);
26
87
  }
27
88
 
28
89
  // First chunk - send message_start
@@ -264,7 +325,7 @@ function ensureMessageStart(state, results, chunk = undefined) {
264
325
  content: [],
265
326
  stop_reason: null,
266
327
  stop_sequence: null,
267
- usage: { input_tokens: 0, output_tokens: 0 }
328
+ usage: normalizeOpenAIUsageToClaude(state.usage)
268
329
  }
269
330
  });
270
331
  }
@@ -297,7 +358,7 @@ export function finalizeOpenAIToClaudeStream(state, { force = false } = {}) {
297
358
  results.push({
298
359
  type: "message_delta",
299
360
  delta: { stop_reason: convertFinishReason(normalizedFinishReason) },
300
- usage: state.usage || { input_tokens: 0, output_tokens: 0 }
361
+ usage: normalizeOpenAIUsageToClaude(state.usage)
301
362
  });
302
363
  state.messageDeltaSent = true;
303
364
  }