@khanglvm/llm-router 2.5.1 → 2.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +18 -0
- package/README.md +5 -2
- package/package.json +1 -1
- package/src/node/dev-command.js +114 -0
- package/src/node/huggingface-gguf.js +12 -0
- package/src/node/llamacpp-managed-runtime.js +202 -0
- package/src/node/llamacpp-runtime-profile.js +133 -0
- package/src/node/llamacpp-runtime.js +256 -78
- package/src/node/local-models-service.js +25 -2
- package/src/node/local-server.js +60 -2
- package/src/node/web-console-client.js +20 -20
- package/src/node/web-console-server.js +64 -8
- package/src/node/web-console-styles.generated.js +1 -1
- package/src/node/web-console-ui/local-models-utils.js +33 -0
- package/src/runtime/handler/provider-call.js +36 -18
- package/src/runtime/handler/runtime-policy.js +4 -1
- package/src/runtime/local-models.js +36 -0
|
@@ -135,6 +135,21 @@ function queueLargeRequestEvent(onLargeRequestLog, payload) {
|
|
|
135
135
|
}
|
|
136
136
|
}
|
|
137
137
|
|
|
138
|
+
async function resolveRequestProviderUrl(provider, plan, candidate, runtimeFlags) {
|
|
139
|
+
if (provider?.type === "local-runtime" && typeof runtimeFlags?.resolveLocalRuntimeBaseUrl === "function") {
|
|
140
|
+
const dynamicBaseUrl = await runtimeFlags.resolveLocalRuntimeBaseUrl({
|
|
141
|
+
candidate,
|
|
142
|
+
targetFormat: plan.targetFormat,
|
|
143
|
+
requestKind: plan.requestKind
|
|
144
|
+
});
|
|
145
|
+
if (dynamicBaseUrl) {
|
|
146
|
+
return resolveProviderUrl({ ...provider, baseUrl: dynamicBaseUrl }, plan.targetFormat, plan.requestKind);
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
return resolveProviderUrl(provider, plan.targetFormat, plan.requestKind);
|
|
151
|
+
}
|
|
152
|
+
|
|
138
153
|
function maybeQueueLargeRequestLog({
|
|
139
154
|
env,
|
|
140
155
|
onLargeRequestLog,
|
|
@@ -1074,8 +1089,27 @@ export async function makeProviderCall({
|
|
|
1074
1089
|
}
|
|
1075
1090
|
|
|
1076
1091
|
const executeHttpProviderRequest = async (plan) => {
|
|
1077
|
-
|
|
1078
|
-
|
|
1092
|
+
let providerUrl;
|
|
1093
|
+
try {
|
|
1094
|
+
providerUrl = await resolveRequestProviderUrl(provider, plan, candidate, runtimeFlags);
|
|
1095
|
+
} catch (error) {
|
|
1096
|
+
return jsonResponse({
|
|
1097
|
+
type: "error",
|
|
1098
|
+
error: {
|
|
1099
|
+
type: "api_error",
|
|
1100
|
+
message: error instanceof Error ? error.message : String(error)
|
|
1101
|
+
}
|
|
1102
|
+
}, 503);
|
|
1103
|
+
}
|
|
1104
|
+
if (!providerUrl) {
|
|
1105
|
+
return jsonResponse({
|
|
1106
|
+
type: "error",
|
|
1107
|
+
error: {
|
|
1108
|
+
type: "configuration_error",
|
|
1109
|
+
message: `Provider ${provider.id} has invalid baseUrl.`
|
|
1110
|
+
}
|
|
1111
|
+
}, 500);
|
|
1112
|
+
}
|
|
1079
1113
|
const headers = mergeCachingHeaders(
|
|
1080
1114
|
buildProviderHeaders(provider, env, plan.targetFormat),
|
|
1081
1115
|
requestHeaders,
|
|
@@ -1114,22 +1148,6 @@ export async function makeProviderCall({
|
|
|
1114
1148
|
}
|
|
1115
1149
|
};
|
|
1116
1150
|
|
|
1117
|
-
if (!resolveProviderUrl(provider, activePlan.targetFormat, activePlan.requestKind)) {
|
|
1118
|
-
return {
|
|
1119
|
-
ok: false,
|
|
1120
|
-
status: 500,
|
|
1121
|
-
retryable: false,
|
|
1122
|
-
errorKind: "configuration_error",
|
|
1123
|
-
response: jsonResponse({
|
|
1124
|
-
type: "error",
|
|
1125
|
-
error: {
|
|
1126
|
-
type: "configuration_error",
|
|
1127
|
-
message: `Provider ${provider.id} has invalid baseUrl.`
|
|
1128
|
-
}
|
|
1129
|
-
}, 500)
|
|
1130
|
-
};
|
|
1131
|
-
}
|
|
1132
|
-
|
|
1133
1151
|
let response;
|
|
1134
1152
|
try {
|
|
1135
1153
|
response = await executeHttpProviderRequest(activePlan);
|
|
@@ -27,7 +27,10 @@ export function resolveRuntimeFlags(options = {}, env = {}) {
|
|
|
27
27
|
workerRuntime,
|
|
28
28
|
workerSafeMode,
|
|
29
29
|
allowBestEffortStatefulRouting,
|
|
30
|
-
statefulRoutingEnabled: !workerSafeMode || allowBestEffortStatefulRouting
|
|
30
|
+
statefulRoutingEnabled: !workerSafeMode || allowBestEffortStatefulRouting,
|
|
31
|
+
...(typeof options.resolveLocalRuntimeBaseUrl === "function"
|
|
32
|
+
? { resolveLocalRuntimeBaseUrl: options.resolveLocalRuntimeBaseUrl }
|
|
33
|
+
: {})
|
|
31
34
|
};
|
|
32
35
|
}
|
|
33
36
|
|
|
@@ -20,6 +20,34 @@ function normalizePositiveNumber(value) {
|
|
|
20
20
|
return Math.floor(parsed);
|
|
21
21
|
}
|
|
22
22
|
|
|
23
|
+
function normalizeRuntimeProfile(raw = {}) {
|
|
24
|
+
const source = isPlainObject(raw) ? raw : {};
|
|
25
|
+
const overrides = isPlainObject(source.overrides) ? { ...source.overrides } : {};
|
|
26
|
+
const extraArgs = Array.isArray(source.extraArgs)
|
|
27
|
+
? source.extraArgs.map((value) => normalizeString(value)).filter(Boolean)
|
|
28
|
+
: [];
|
|
29
|
+
|
|
30
|
+
return {
|
|
31
|
+
mode: normalizeString(source.mode) === "custom" ? "custom" : "auto",
|
|
32
|
+
preset: normalizeString(source.preset) || "balanced",
|
|
33
|
+
overrides,
|
|
34
|
+
extraArgs,
|
|
35
|
+
lastKnownGood: isPlainObject(source.lastKnownGood) ? { ...source.lastKnownGood } : null,
|
|
36
|
+
lastFailure: isPlainObject(source.lastFailure) ? { ...source.lastFailure } : null
|
|
37
|
+
};
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
function normalizeRuntimeStatus(raw = {}) {
|
|
41
|
+
const source = isPlainObject(raw) ? raw : {};
|
|
42
|
+
|
|
43
|
+
return {
|
|
44
|
+
activeInstanceId: normalizeString(source.activeInstanceId),
|
|
45
|
+
lastFailure: isPlainObject(source.lastFailure) ? { ...source.lastFailure } : null,
|
|
46
|
+
lastStartedAt: normalizeString(source.lastStartedAt),
|
|
47
|
+
lastHealthyAt: normalizeString(source.lastHealthyAt)
|
|
48
|
+
};
|
|
49
|
+
}
|
|
50
|
+
|
|
23
51
|
function normalizeLocalModelLibraryEntry(key, entry) {
|
|
24
52
|
if (!isPlainObject(entry)) return null;
|
|
25
53
|
|
|
@@ -76,6 +104,14 @@ function normalizeLocalModelVariantEntry(key, entry) {
|
|
|
76
104
|
else delete normalized.availability;
|
|
77
105
|
}
|
|
78
106
|
|
|
107
|
+
if (normalized.runtime === "llamacpp") {
|
|
108
|
+
normalized.runtimeProfile = normalizeRuntimeProfile(entry.runtimeProfile);
|
|
109
|
+
normalized.runtimeStatus = normalizeRuntimeStatus(entry.runtimeStatus);
|
|
110
|
+
} else {
|
|
111
|
+
delete normalized.runtimeProfile;
|
|
112
|
+
delete normalized.runtimeStatus;
|
|
113
|
+
}
|
|
114
|
+
|
|
79
115
|
return normalized;
|
|
80
116
|
}
|
|
81
117
|
|