@yzj01/llm-router 1.0.0 → 1.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +14 -0
- package/dist/cli.d.ts +1 -1
- package/dist/cli.js +257 -62
- package/dist/cli.js.map +1 -1
- package/dist/index.d.ts +2 -2
- package/dist/index.js +257 -62
- package/dist/index.js.map +1 -1
- package/dist/{proxy-CrRX9deF.d.ts → proxy-CpR5cxND.d.ts} +27 -1
- package/openclaw.plugin.json +1 -1
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -130,6 +130,20 @@ Unsupported model IDs return HTTP 400. Supported request IDs are currently only
|
|
|
130
130
|
`auto`. Configured aliases remain internal routing outputs and are surfaced via
|
|
131
131
|
response headers, not accepted in request bodies.
|
|
132
132
|
|
|
133
|
+
## Runtime Protections
|
|
134
|
+
|
|
135
|
+
The local proxy applies conservative runtime limits by default. These limits are
|
|
136
|
+
internal defaults rather than public config fields:
|
|
137
|
+
|
|
138
|
+
| Limit | Default | Behavior |
|
|
139
|
+
| --- | ---: | --- |
|
|
140
|
+
| Request body size | 10 MB | Returns `413 Payload Too Large` before forwarding upstream. |
|
|
141
|
+
| Request body read time | 30 s | Returns `408 Request Timeout` if the client does not finish sending the body. |
|
|
142
|
+
| Upstream request time | 300 s | Aborts the upstream request and returns `504 Gateway Timeout`. |
|
|
143
|
+
|
|
144
|
+
If the client disconnects before the upstream call completes, the proxy aborts
|
|
145
|
+
the upstream request to avoid keeping stale work running.
|
|
146
|
+
|
|
133
147
|
## Response Headers
|
|
134
148
|
|
|
135
149
|
The proxy adds routing headers:
|
package/dist/cli.d.ts
CHANGED
package/dist/cli.js
CHANGED
|
@@ -1792,7 +1792,7 @@ function hashHex(value, length) {
|
|
|
1792
1792
|
}
|
|
1793
1793
|
|
|
1794
1794
|
// src/proxy.ts
|
|
1795
|
-
var VERSION = "1.0.
|
|
1795
|
+
var VERSION = "1.0.2";
|
|
1796
1796
|
var HOP_BY_HOP = /* @__PURE__ */ new Set([
|
|
1797
1797
|
"connection",
|
|
1798
1798
|
"keep-alive",
|
|
@@ -1806,24 +1806,91 @@ var HOP_BY_HOP = /* @__PURE__ */ new Set([
|
|
|
1806
1806
|
"content-length"
|
|
1807
1807
|
]);
|
|
1808
1808
|
var RETRYABLE_STATUS = /* @__PURE__ */ new Set([429, 500, 502, 503, 504]);
|
|
1809
|
+
var DEFAULT_MAX_BODY_BYTES = 10 * 1024 * 1024;
|
|
1810
|
+
var DEFAULT_BODY_READ_TIMEOUT_MS = 3e4;
|
|
1811
|
+
var DEFAULT_UPSTREAM_REQUEST_TIMEOUT_MS = 3e5;
|
|
1812
|
+
var DEFAULT_RUNTIME_LIMITS = {
|
|
1813
|
+
maxBodyBytes: DEFAULT_MAX_BODY_BYTES,
|
|
1814
|
+
bodyReadTimeoutMs: DEFAULT_BODY_READ_TIMEOUT_MS,
|
|
1815
|
+
upstreamRequestTimeoutMs: DEFAULT_UPSTREAM_REQUEST_TIMEOUT_MS
|
|
1816
|
+
};
|
|
1817
|
+
function resolveRuntimeLimits(input) {
|
|
1818
|
+
return {
|
|
1819
|
+
maxBodyBytes: input?.maxBodyBytes ?? DEFAULT_RUNTIME_LIMITS.maxBodyBytes,
|
|
1820
|
+
bodyReadTimeoutMs: input?.bodyReadTimeoutMs ?? DEFAULT_RUNTIME_LIMITS.bodyReadTimeoutMs,
|
|
1821
|
+
upstreamRequestTimeoutMs: input?.upstreamRequestTimeoutMs ?? DEFAULT_RUNTIME_LIMITS.upstreamRequestTimeoutMs
|
|
1822
|
+
};
|
|
1823
|
+
}
|
|
1809
1824
|
var REQUESTABLE_PUBLIC_MODELS = /* @__PURE__ */ new Set(["auto"]);
|
|
1810
1825
|
var PUBLIC_HEADER_PREFIXES = ["x-xy-router-"];
|
|
1811
|
-
|
|
1826
|
+
var BodyReadError = class extends Error {
|
|
1827
|
+
statusCode;
|
|
1828
|
+
constructor(statusCode, message) {
|
|
1829
|
+
super(message);
|
|
1830
|
+
this.name = "BodyReadError";
|
|
1831
|
+
this.statusCode = statusCode;
|
|
1832
|
+
}
|
|
1833
|
+
};
|
|
1834
|
+
function isBodyReadError(error) {
|
|
1835
|
+
return error instanceof BodyReadError;
|
|
1836
|
+
}
|
|
1837
|
+
function writeBodyReadErrorAndCloseRequest(req, res, error) {
|
|
1838
|
+
res.shouldKeepAlive = false;
|
|
1839
|
+
res.setHeader("connection", "close");
|
|
1840
|
+
res.once("finish", () => {
|
|
1841
|
+
if (!req.destroyed) {
|
|
1842
|
+
req.destroy();
|
|
1843
|
+
}
|
|
1844
|
+
});
|
|
1845
|
+
writeOpenAiError(res, error.statusCode, error.message);
|
|
1846
|
+
}
|
|
1847
|
+
function readBody(req, limits) {
|
|
1812
1848
|
return new Promise((resolve, reject) => {
|
|
1813
1849
|
let body = "";
|
|
1850
|
+
let bodyBytes = 0;
|
|
1851
|
+
let settled = false;
|
|
1814
1852
|
req.setEncoding("utf8");
|
|
1815
|
-
|
|
1853
|
+
const timeout = setTimeout(() => {
|
|
1854
|
+
rejectOnce(new BodyReadError(408, "Request body read timeout"));
|
|
1855
|
+
}, limits.bodyReadTimeoutMs);
|
|
1856
|
+
const cleanup = () => {
|
|
1857
|
+
clearTimeout(timeout);
|
|
1858
|
+
req.off("data", onData);
|
|
1859
|
+
req.off("end", onEnd);
|
|
1860
|
+
req.off("error", onError);
|
|
1861
|
+
};
|
|
1862
|
+
const rejectOnce = (error) => {
|
|
1863
|
+
if (settled) return;
|
|
1864
|
+
settled = true;
|
|
1865
|
+
cleanup();
|
|
1866
|
+
reject(error);
|
|
1867
|
+
};
|
|
1868
|
+
const resolveOnce = (value) => {
|
|
1869
|
+
if (settled) return;
|
|
1870
|
+
settled = true;
|
|
1871
|
+
cleanup();
|
|
1872
|
+
resolve(value);
|
|
1873
|
+
};
|
|
1874
|
+
const onData = (chunk) => {
|
|
1875
|
+
if (settled) return;
|
|
1876
|
+
bodyBytes += Buffer.byteLength(chunk, "utf8");
|
|
1877
|
+
if (bodyBytes > limits.maxBodyBytes) {
|
|
1878
|
+
rejectOnce(new BodyReadError(413, "Payload Too Large"));
|
|
1879
|
+
return;
|
|
1880
|
+
}
|
|
1816
1881
|
body += chunk;
|
|
1817
|
-
}
|
|
1818
|
-
|
|
1819
|
-
|
|
1882
|
+
};
|
|
1883
|
+
const onEnd = () => resolveOnce(body);
|
|
1884
|
+
const onError = (error) => rejectOnce(error);
|
|
1885
|
+
req.on("data", onData);
|
|
1886
|
+
req.on("end", onEnd);
|
|
1887
|
+
req.on("error", onError);
|
|
1820
1888
|
});
|
|
1821
1889
|
}
|
|
1822
1890
|
var OPENCLAW_CLI_TURN_PATTERN = /(?:^|\n)\[[^\]\n]+?\]\s+([\s\S]*?)(?=(?:\n\[[^\]\n]+?\]\s+)|$)/g;
|
|
1823
1891
|
function extractRouteTextFromUserMessage(text) {
|
|
1824
|
-
|
|
1825
|
-
|
|
1826
|
-
return last || text;
|
|
1892
|
+
void OPENCLAW_CLI_TURN_PATTERN;
|
|
1893
|
+
return text;
|
|
1827
1894
|
}
|
|
1828
1895
|
function extractPrompt(messages) {
|
|
1829
1896
|
const parts = [];
|
|
@@ -1931,19 +1998,68 @@ function writeOpenAiError(res, status, message, type = "invalid_request_error",
|
|
|
1931
1998
|
}
|
|
1932
1999
|
});
|
|
1933
2000
|
}
|
|
1934
|
-
|
|
2001
|
+
function anySignal(signals) {
|
|
2002
|
+
const controller = new AbortController();
|
|
2003
|
+
const listeners = [];
|
|
2004
|
+
let cleaned = false;
|
|
2005
|
+
const cleanup = () => {
|
|
2006
|
+
if (cleaned) return;
|
|
2007
|
+
cleaned = true;
|
|
2008
|
+
for (const { signal, listener } of listeners) {
|
|
2009
|
+
signal.removeEventListener("abort", listener);
|
|
2010
|
+
}
|
|
2011
|
+
};
|
|
2012
|
+
const abortFrom = (signal) => {
|
|
2013
|
+
if (!controller.signal.aborted) {
|
|
2014
|
+
controller.abort(signal.reason);
|
|
2015
|
+
}
|
|
2016
|
+
cleanup();
|
|
2017
|
+
};
|
|
2018
|
+
for (const signal of signals) {
|
|
2019
|
+
if (signal.aborted) {
|
|
2020
|
+
abortFrom(signal);
|
|
2021
|
+
return { signal: controller.signal, cleanup };
|
|
2022
|
+
}
|
|
2023
|
+
}
|
|
2024
|
+
for (const signal of signals) {
|
|
2025
|
+
const listener = () => {
|
|
2026
|
+
abortFrom(signal);
|
|
2027
|
+
};
|
|
2028
|
+
listeners.push({ signal, listener });
|
|
2029
|
+
signal.addEventListener("abort", listener, { once: true });
|
|
2030
|
+
}
|
|
2031
|
+
return { signal: controller.signal, cleanup };
|
|
2032
|
+
}
|
|
2033
|
+
async function fetchUpstream(cfg, req, body, actualModel, runtimeLimits, requestSignal) {
|
|
2034
|
+
const timeoutController = new AbortController();
|
|
2035
|
+
const timeout = setTimeout(() => {
|
|
2036
|
+
timeoutController.abort();
|
|
2037
|
+
}, runtimeLimits.upstreamRequestTimeoutMs);
|
|
2038
|
+
const linkedSignal = anySignal([requestSignal, timeoutController.signal]);
|
|
2039
|
+
const cleanup = () => {
|
|
2040
|
+
clearTimeout(timeout);
|
|
2041
|
+
linkedSignal.cleanup();
|
|
2042
|
+
};
|
|
1935
2043
|
try {
|
|
1936
2044
|
const response = await fetch(`${cfg.baseUrl}/chat/completions`, {
|
|
1937
2045
|
method: "POST",
|
|
1938
2046
|
headers: buildUpstreamHeaders(req, cfg),
|
|
1939
|
-
body: JSON.stringify({ ...body, model: actualModel })
|
|
2047
|
+
body: JSON.stringify({ ...body, model: actualModel }),
|
|
2048
|
+
signal: linkedSignal.signal
|
|
1940
2049
|
});
|
|
2050
|
+
clearTimeout(timeout);
|
|
1941
2051
|
if (RETRYABLE_STATUS.has(response.status)) {
|
|
1942
|
-
return { ok: false, reason: "retryable", response };
|
|
2052
|
+
return { ok: false, reason: "retryable", response, cleanup };
|
|
1943
2053
|
}
|
|
1944
|
-
return { ok: true, response };
|
|
2054
|
+
return { ok: true, response, cleanup };
|
|
1945
2055
|
} catch (error) {
|
|
1946
|
-
|
|
2056
|
+
if (timeoutController.signal.aborted) {
|
|
2057
|
+
return { ok: false, reason: "timeout", error, cleanup };
|
|
2058
|
+
}
|
|
2059
|
+
if (requestSignal.aborted) {
|
|
2060
|
+
return { ok: false, reason: "aborted", error, cleanup };
|
|
2061
|
+
}
|
|
2062
|
+
return { ok: false, reason: "network_error", error, cleanup };
|
|
1947
2063
|
}
|
|
1948
2064
|
}
|
|
1949
2065
|
function getMaxOutputTokens(body) {
|
|
@@ -2159,8 +2275,17 @@ function chooseModel(requestedModel, body, headers, sessionStore, cfg, tierEntri
|
|
|
2159
2275
|
sessionAction: "none"
|
|
2160
2276
|
};
|
|
2161
2277
|
}
|
|
2162
|
-
async function proxyChat(req, res, cfg, sessionStore, tierEntries, publicModels, registry2, routerOptions) {
|
|
2163
|
-
|
|
2278
|
+
async function proxyChat(req, res, cfg, runtimeLimits, sessionStore, tierEntries, publicModels, registry2, routerOptions) {
|
|
2279
|
+
let rawBody;
|
|
2280
|
+
try {
|
|
2281
|
+
rawBody = await readBody(req, runtimeLimits);
|
|
2282
|
+
} catch (error) {
|
|
2283
|
+
if (isBodyReadError(error)) {
|
|
2284
|
+
writeBodyReadErrorAndCloseRequest(req, res, error);
|
|
2285
|
+
return;
|
|
2286
|
+
}
|
|
2287
|
+
throw error;
|
|
2288
|
+
}
|
|
2164
2289
|
let body;
|
|
2165
2290
|
try {
|
|
2166
2291
|
body = JSON.parse(rawBody);
|
|
@@ -2205,61 +2330,129 @@ async function proxyChat(req, res, cfg, sessionStore, tierEntries, publicModels,
|
|
|
2205
2330
|
);
|
|
2206
2331
|
return;
|
|
2207
2332
|
}
|
|
2208
|
-
const
|
|
2209
|
-
|
|
2210
|
-
|
|
2211
|
-
|
|
2212
|
-
|
|
2213
|
-
|
|
2333
|
+
const requestController = new AbortController();
|
|
2334
|
+
let responseFinished = false;
|
|
2335
|
+
const abortUpstreamRequest = () => {
|
|
2336
|
+
if (responseFinished || requestController.signal.aborted) return;
|
|
2337
|
+
requestController.abort();
|
|
2338
|
+
};
|
|
2339
|
+
const onRequestAborted = () => {
|
|
2340
|
+
abortUpstreamRequest();
|
|
2341
|
+
};
|
|
2342
|
+
const onResponseClose = () => {
|
|
2343
|
+
abortUpstreamRequest();
|
|
2344
|
+
};
|
|
2345
|
+
const onResponseFinish = () => {
|
|
2346
|
+
responseFinished = true;
|
|
2347
|
+
};
|
|
2348
|
+
const cleanupRequestAbortListeners = () => {
|
|
2349
|
+
req.off("aborted", onRequestAborted);
|
|
2350
|
+
res.off("close", onResponseClose);
|
|
2351
|
+
res.off("finish", onResponseFinish);
|
|
2352
|
+
};
|
|
2353
|
+
req.on("aborted", onRequestAborted);
|
|
2354
|
+
res.on("close", onResponseClose);
|
|
2355
|
+
res.on("finish", onResponseFinish);
|
|
2356
|
+
let attempt;
|
|
2357
|
+
try {
|
|
2358
|
+
attempt = await fetchUpstream(
|
|
2359
|
+
cfg,
|
|
2360
|
+
req,
|
|
2361
|
+
bodyObj,
|
|
2362
|
+
physicalModel.id,
|
|
2363
|
+
runtimeLimits,
|
|
2364
|
+
requestController.signal
|
|
2365
|
+
);
|
|
2366
|
+
const attempts = [
|
|
2367
|
+
attempt.ok ? { model: selected.actualModel, status: "success" } : {
|
|
2368
|
+
model: selected.actualModel,
|
|
2369
|
+
status: "error",
|
|
2370
|
+
error: attempt.reason === "timeout" ? "upstream_timeout" : attempt.reason === "network_error" ? "network_error" : attempt.reason === "aborted" ? "client_aborted" : `upstream_http_${attempt.response.status}`
|
|
2371
|
+
}
|
|
2372
|
+
];
|
|
2373
|
+
const finalTier = selected.tier;
|
|
2374
|
+
let sessionAction = selected.sessionAction;
|
|
2375
|
+
if (!attempt.ok && attempt.reason === "aborted") {
|
|
2376
|
+
emitProxyTrace(
|
|
2377
|
+
cfg,
|
|
2378
|
+
selected,
|
|
2379
|
+
finalTier,
|
|
2380
|
+
attempts,
|
|
2381
|
+
sessionAction,
|
|
2382
|
+
true
|
|
2383
|
+
);
|
|
2384
|
+
return;
|
|
2214
2385
|
}
|
|
2215
|
-
|
|
2216
|
-
|
|
2217
|
-
|
|
2218
|
-
|
|
2219
|
-
|
|
2386
|
+
if (!attempt.ok && attempt.reason === "timeout") {
|
|
2387
|
+
const trace2 = emitProxyTrace(
|
|
2388
|
+
cfg,
|
|
2389
|
+
selected,
|
|
2390
|
+
finalTier,
|
|
2391
|
+
attempts,
|
|
2392
|
+
sessionAction,
|
|
2393
|
+
true
|
|
2394
|
+
);
|
|
2395
|
+
const headers2 = buildPublicHeaders(cfg, selected, finalTier, trace2);
|
|
2396
|
+
writeOpenAiError(
|
|
2397
|
+
res,
|
|
2398
|
+
504,
|
|
2399
|
+
"Upstream request timed out",
|
|
2400
|
+
"invalid_request_error",
|
|
2401
|
+
null,
|
|
2402
|
+
headers2
|
|
2403
|
+
);
|
|
2404
|
+
return;
|
|
2405
|
+
}
|
|
2406
|
+
if (!attempt.ok && attempt.reason === "network_error") {
|
|
2407
|
+
const trace2 = emitProxyTrace(
|
|
2408
|
+
cfg,
|
|
2409
|
+
selected,
|
|
2410
|
+
finalTier,
|
|
2411
|
+
attempts,
|
|
2412
|
+
sessionAction,
|
|
2413
|
+
true
|
|
2414
|
+
);
|
|
2415
|
+
const headers2 = buildPublicHeaders(cfg, selected, finalTier, trace2);
|
|
2416
|
+
writeOpenAiError(
|
|
2417
|
+
res,
|
|
2418
|
+
502,
|
|
2419
|
+
attempt.error instanceof Error ? attempt.error.message : "Upstream request failed",
|
|
2420
|
+
"invalid_request_error",
|
|
2421
|
+
null,
|
|
2422
|
+
headers2
|
|
2423
|
+
);
|
|
2424
|
+
return;
|
|
2425
|
+
}
|
|
2426
|
+
if (attempt.ok && selected.sessionId && !selected.explicit) {
|
|
2427
|
+
sessionStore.setSession(selected.sessionId, {
|
|
2428
|
+
physicalModelId: selected.actualModel,
|
|
2429
|
+
routedPublicModel: selected.routedModel,
|
|
2430
|
+
pinnedTier: finalTier
|
|
2431
|
+
});
|
|
2432
|
+
if (sessionAction === "none") {
|
|
2433
|
+
sessionAction = "set";
|
|
2434
|
+
}
|
|
2435
|
+
}
|
|
2436
|
+
const trace = emitProxyTrace(
|
|
2220
2437
|
cfg,
|
|
2221
2438
|
selected,
|
|
2222
2439
|
finalTier,
|
|
2223
2440
|
attempts,
|
|
2224
2441
|
sessionAction,
|
|
2225
|
-
|
|
2226
|
-
);
|
|
2227
|
-
const headers2 = buildPublicHeaders(cfg, selected, finalTier, trace2);
|
|
2228
|
-
writeOpenAiError(
|
|
2229
|
-
res,
|
|
2230
|
-
502,
|
|
2231
|
-
attempt.error instanceof Error ? attempt.error.message : "Upstream request failed",
|
|
2232
|
-
"invalid_request_error",
|
|
2233
|
-
null,
|
|
2234
|
-
headers2
|
|
2442
|
+
!attempt.ok
|
|
2235
2443
|
);
|
|
2236
|
-
|
|
2237
|
-
|
|
2238
|
-
|
|
2239
|
-
|
|
2240
|
-
|
|
2241
|
-
routedPublicModel: selected.routedModel,
|
|
2242
|
-
pinnedTier: finalTier
|
|
2243
|
-
});
|
|
2244
|
-
if (sessionAction === "none") {
|
|
2245
|
-
sessionAction = "set";
|
|
2444
|
+
const headers = buildPublicHeaders(cfg, selected, finalTier, trace);
|
|
2445
|
+
const responseHeaders = copyResponseHeaders(attempt.response, headers);
|
|
2446
|
+
res.statusCode = attempt.response.status;
|
|
2447
|
+
for (const [k, v] of Object.entries(responseHeaders)) {
|
|
2448
|
+
res.setHeader(k, v);
|
|
2246
2449
|
}
|
|
2450
|
+
await streamResponse(attempt.response, res);
|
|
2451
|
+
responseFinished = true;
|
|
2452
|
+
} finally {
|
|
2453
|
+
cleanupRequestAbortListeners();
|
|
2454
|
+
attempt?.cleanup();
|
|
2247
2455
|
}
|
|
2248
|
-
const trace = emitProxyTrace(
|
|
2249
|
-
cfg,
|
|
2250
|
-
selected,
|
|
2251
|
-
finalTier,
|
|
2252
|
-
attempts,
|
|
2253
|
-
sessionAction,
|
|
2254
|
-
!attempt.ok
|
|
2255
|
-
);
|
|
2256
|
-
const headers = buildPublicHeaders(cfg, selected, finalTier, trace);
|
|
2257
|
-
const responseHeaders = copyResponseHeaders(attempt.response, headers);
|
|
2258
|
-
res.statusCode = attempt.response.status;
|
|
2259
|
-
for (const [k, v] of Object.entries(responseHeaders)) {
|
|
2260
|
-
res.setHeader(k, v);
|
|
2261
|
-
}
|
|
2262
|
-
await streamResponse(attempt.response, res);
|
|
2263
2456
|
}
|
|
2264
2457
|
async function startProxy(options) {
|
|
2265
2458
|
const cfg = resolveConfig({
|
|
@@ -2271,6 +2464,7 @@ async function startProxy(options) {
|
|
|
2271
2464
|
traceLogger: options.traceLogger,
|
|
2272
2465
|
sessionPinning: options.session?.enabled
|
|
2273
2466
|
});
|
|
2467
|
+
const runtimeLimits = resolveRuntimeLimits(options.runtimeLimits);
|
|
2274
2468
|
const sessionStore = new SessionStore(options.session);
|
|
2275
2469
|
const publicModels = options.config.publicModels;
|
|
2276
2470
|
const tierEntries = options.config.routing.tiers;
|
|
@@ -2299,6 +2493,7 @@ async function startProxy(options) {
|
|
|
2299
2493
|
req,
|
|
2300
2494
|
res,
|
|
2301
2495
|
cfg,
|
|
2496
|
+
runtimeLimits,
|
|
2302
2497
|
sessionStore,
|
|
2303
2498
|
tierEntries,
|
|
2304
2499
|
publicModels,
|