@yzj01/llm-router 1.0.0 → 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +14 -0
- package/dist/cli.d.ts +1 -1
- package/dist/cli.js +255 -59
- package/dist/cli.js.map +1 -1
- package/dist/index.d.ts +2 -2
- package/dist/index.js +255 -59
- package/dist/index.js.map +1 -1
- package/dist/{proxy-CrRX9deF.d.ts → proxy-Co87mKak.d.ts} +27 -1
- package/openclaw.plugin.json +1 -1
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -130,6 +130,20 @@ Unsupported model IDs return HTTP 400. Supported request IDs are currently only
|
|
|
130
130
|
`auto`. Configured aliases remain internal routing outputs and are surfaced via
|
|
131
131
|
response headers, not accepted in request bodies.
|
|
132
132
|
|
|
133
|
+
## Runtime Protections
|
|
134
|
+
|
|
135
|
+
The local proxy applies conservative runtime limits by default. These limits are
|
|
136
|
+
internal defaults rather than public config fields:
|
|
137
|
+
|
|
138
|
+
| Limit | Default | Behavior |
|
|
139
|
+
| --- | ---: | --- |
|
|
140
|
+
| Request body size | 10 MB | Returns `413 Payload Too Large` before forwarding upstream. |
|
|
141
|
+
| Request body read time | 30 s | Returns `408 Request Timeout` if the client does not finish sending the body. |
|
|
142
|
+
| Upstream request time | 300 s | Aborts the upstream request and returns `504 Gateway Timeout`. |
|
|
143
|
+
|
|
144
|
+
If the client disconnects before the upstream call completes, the proxy aborts
|
|
145
|
+
the upstream request to avoid keeping stale work running.
|
|
146
|
+
|
|
133
147
|
## Response Headers
|
|
134
148
|
|
|
135
149
|
The proxy adds routing headers:
|
package/dist/cli.d.ts
CHANGED
package/dist/cli.js
CHANGED
|
@@ -1792,7 +1792,7 @@ function hashHex(value, length) {
|
|
|
1792
1792
|
}
|
|
1793
1793
|
|
|
1794
1794
|
// src/proxy.ts
|
|
1795
|
-
var VERSION = "1.0.
|
|
1795
|
+
var VERSION = "1.0.1";
|
|
1796
1796
|
var HOP_BY_HOP = /* @__PURE__ */ new Set([
|
|
1797
1797
|
"connection",
|
|
1798
1798
|
"keep-alive",
|
|
@@ -1806,17 +1806,85 @@ var HOP_BY_HOP = /* @__PURE__ */ new Set([
|
|
|
1806
1806
|
"content-length"
|
|
1807
1807
|
]);
|
|
1808
1808
|
var RETRYABLE_STATUS = /* @__PURE__ */ new Set([429, 500, 502, 503, 504]);
|
|
1809
|
+
var DEFAULT_MAX_BODY_BYTES = 10 * 1024 * 1024;
|
|
1810
|
+
var DEFAULT_BODY_READ_TIMEOUT_MS = 3e4;
|
|
1811
|
+
var DEFAULT_UPSTREAM_REQUEST_TIMEOUT_MS = 3e5;
|
|
1812
|
+
var DEFAULT_RUNTIME_LIMITS = {
|
|
1813
|
+
maxBodyBytes: DEFAULT_MAX_BODY_BYTES,
|
|
1814
|
+
bodyReadTimeoutMs: DEFAULT_BODY_READ_TIMEOUT_MS,
|
|
1815
|
+
upstreamRequestTimeoutMs: DEFAULT_UPSTREAM_REQUEST_TIMEOUT_MS
|
|
1816
|
+
};
|
|
1817
|
+
function resolveRuntimeLimits(input) {
|
|
1818
|
+
return {
|
|
1819
|
+
maxBodyBytes: input?.maxBodyBytes ?? DEFAULT_RUNTIME_LIMITS.maxBodyBytes,
|
|
1820
|
+
bodyReadTimeoutMs: input?.bodyReadTimeoutMs ?? DEFAULT_RUNTIME_LIMITS.bodyReadTimeoutMs,
|
|
1821
|
+
upstreamRequestTimeoutMs: input?.upstreamRequestTimeoutMs ?? DEFAULT_RUNTIME_LIMITS.upstreamRequestTimeoutMs
|
|
1822
|
+
};
|
|
1823
|
+
}
|
|
1809
1824
|
var REQUESTABLE_PUBLIC_MODELS = /* @__PURE__ */ new Set(["auto"]);
|
|
1810
1825
|
var PUBLIC_HEADER_PREFIXES = ["x-xy-router-"];
|
|
1811
|
-
|
|
1826
|
+
var BodyReadError = class extends Error {
|
|
1827
|
+
statusCode;
|
|
1828
|
+
constructor(statusCode, message) {
|
|
1829
|
+
super(message);
|
|
1830
|
+
this.name = "BodyReadError";
|
|
1831
|
+
this.statusCode = statusCode;
|
|
1832
|
+
}
|
|
1833
|
+
};
|
|
1834
|
+
function isBodyReadError(error) {
|
|
1835
|
+
return error instanceof BodyReadError;
|
|
1836
|
+
}
|
|
1837
|
+
function writeBodyReadErrorAndCloseRequest(req, res, error) {
|
|
1838
|
+
res.shouldKeepAlive = false;
|
|
1839
|
+
res.setHeader("connection", "close");
|
|
1840
|
+
res.once("finish", () => {
|
|
1841
|
+
if (!req.destroyed) {
|
|
1842
|
+
req.destroy();
|
|
1843
|
+
}
|
|
1844
|
+
});
|
|
1845
|
+
writeOpenAiError(res, error.statusCode, error.message);
|
|
1846
|
+
}
|
|
1847
|
+
function readBody(req, limits) {
|
|
1812
1848
|
return new Promise((resolve, reject) => {
|
|
1813
1849
|
let body = "";
|
|
1850
|
+
let bodyBytes = 0;
|
|
1851
|
+
let settled = false;
|
|
1814
1852
|
req.setEncoding("utf8");
|
|
1815
|
-
|
|
1853
|
+
const timeout = setTimeout(() => {
|
|
1854
|
+
rejectOnce(new BodyReadError(408, "Request body read timeout"));
|
|
1855
|
+
}, limits.bodyReadTimeoutMs);
|
|
1856
|
+
const cleanup = () => {
|
|
1857
|
+
clearTimeout(timeout);
|
|
1858
|
+
req.off("data", onData);
|
|
1859
|
+
req.off("end", onEnd);
|
|
1860
|
+
req.off("error", onError);
|
|
1861
|
+
};
|
|
1862
|
+
const rejectOnce = (error) => {
|
|
1863
|
+
if (settled) return;
|
|
1864
|
+
settled = true;
|
|
1865
|
+
cleanup();
|
|
1866
|
+
reject(error);
|
|
1867
|
+
};
|
|
1868
|
+
const resolveOnce = (value) => {
|
|
1869
|
+
if (settled) return;
|
|
1870
|
+
settled = true;
|
|
1871
|
+
cleanup();
|
|
1872
|
+
resolve(value);
|
|
1873
|
+
};
|
|
1874
|
+
const onData = (chunk) => {
|
|
1875
|
+
if (settled) return;
|
|
1876
|
+
bodyBytes += Buffer.byteLength(chunk, "utf8");
|
|
1877
|
+
if (bodyBytes > limits.maxBodyBytes) {
|
|
1878
|
+
rejectOnce(new BodyReadError(413, "Payload Too Large"));
|
|
1879
|
+
return;
|
|
1880
|
+
}
|
|
1816
1881
|
body += chunk;
|
|
1817
|
-
}
|
|
1818
|
-
|
|
1819
|
-
|
|
1882
|
+
};
|
|
1883
|
+
const onEnd = () => resolveOnce(body);
|
|
1884
|
+
const onError = (error) => rejectOnce(error);
|
|
1885
|
+
req.on("data", onData);
|
|
1886
|
+
req.on("end", onEnd);
|
|
1887
|
+
req.on("error", onError);
|
|
1820
1888
|
});
|
|
1821
1889
|
}
|
|
1822
1890
|
var OPENCLAW_CLI_TURN_PATTERN = /(?:^|\n)\[[^\]\n]+?\]\s+([\s\S]*?)(?=(?:\n\[[^\]\n]+?\]\s+)|$)/g;
|
|
@@ -1931,19 +1999,68 @@ function writeOpenAiError(res, status, message, type = "invalid_request_error",
|
|
|
1931
1999
|
}
|
|
1932
2000
|
});
|
|
1933
2001
|
}
|
|
1934
|
-
|
|
2002
|
+
function anySignal(signals) {
|
|
2003
|
+
const controller = new AbortController();
|
|
2004
|
+
const listeners = [];
|
|
2005
|
+
let cleaned = false;
|
|
2006
|
+
const cleanup = () => {
|
|
2007
|
+
if (cleaned) return;
|
|
2008
|
+
cleaned = true;
|
|
2009
|
+
for (const { signal, listener } of listeners) {
|
|
2010
|
+
signal.removeEventListener("abort", listener);
|
|
2011
|
+
}
|
|
2012
|
+
};
|
|
2013
|
+
const abortFrom = (signal) => {
|
|
2014
|
+
if (!controller.signal.aborted) {
|
|
2015
|
+
controller.abort(signal.reason);
|
|
2016
|
+
}
|
|
2017
|
+
cleanup();
|
|
2018
|
+
};
|
|
2019
|
+
for (const signal of signals) {
|
|
2020
|
+
if (signal.aborted) {
|
|
2021
|
+
abortFrom(signal);
|
|
2022
|
+
return { signal: controller.signal, cleanup };
|
|
2023
|
+
}
|
|
2024
|
+
}
|
|
2025
|
+
for (const signal of signals) {
|
|
2026
|
+
const listener = () => {
|
|
2027
|
+
abortFrom(signal);
|
|
2028
|
+
};
|
|
2029
|
+
listeners.push({ signal, listener });
|
|
2030
|
+
signal.addEventListener("abort", listener, { once: true });
|
|
2031
|
+
}
|
|
2032
|
+
return { signal: controller.signal, cleanup };
|
|
2033
|
+
}
|
|
2034
|
+
async function fetchUpstream(cfg, req, body, actualModel, runtimeLimits, requestSignal) {
|
|
2035
|
+
const timeoutController = new AbortController();
|
|
2036
|
+
const timeout = setTimeout(() => {
|
|
2037
|
+
timeoutController.abort();
|
|
2038
|
+
}, runtimeLimits.upstreamRequestTimeoutMs);
|
|
2039
|
+
const linkedSignal = anySignal([requestSignal, timeoutController.signal]);
|
|
2040
|
+
const cleanup = () => {
|
|
2041
|
+
clearTimeout(timeout);
|
|
2042
|
+
linkedSignal.cleanup();
|
|
2043
|
+
};
|
|
1935
2044
|
try {
|
|
1936
2045
|
const response = await fetch(`${cfg.baseUrl}/chat/completions`, {
|
|
1937
2046
|
method: "POST",
|
|
1938
2047
|
headers: buildUpstreamHeaders(req, cfg),
|
|
1939
|
-
body: JSON.stringify({ ...body, model: actualModel })
|
|
2048
|
+
body: JSON.stringify({ ...body, model: actualModel }),
|
|
2049
|
+
signal: linkedSignal.signal
|
|
1940
2050
|
});
|
|
2051
|
+
clearTimeout(timeout);
|
|
1941
2052
|
if (RETRYABLE_STATUS.has(response.status)) {
|
|
1942
|
-
return { ok: false, reason: "retryable", response };
|
|
2053
|
+
return { ok: false, reason: "retryable", response, cleanup };
|
|
1943
2054
|
}
|
|
1944
|
-
return { ok: true, response };
|
|
2055
|
+
return { ok: true, response, cleanup };
|
|
1945
2056
|
} catch (error) {
|
|
1946
|
-
|
|
2057
|
+
if (timeoutController.signal.aborted) {
|
|
2058
|
+
return { ok: false, reason: "timeout", error, cleanup };
|
|
2059
|
+
}
|
|
2060
|
+
if (requestSignal.aborted) {
|
|
2061
|
+
return { ok: false, reason: "aborted", error, cleanup };
|
|
2062
|
+
}
|
|
2063
|
+
return { ok: false, reason: "network_error", error, cleanup };
|
|
1947
2064
|
}
|
|
1948
2065
|
}
|
|
1949
2066
|
function getMaxOutputTokens(body) {
|
|
@@ -2159,8 +2276,17 @@ function chooseModel(requestedModel, body, headers, sessionStore, cfg, tierEntri
|
|
|
2159
2276
|
sessionAction: "none"
|
|
2160
2277
|
};
|
|
2161
2278
|
}
|
|
2162
|
-
async function proxyChat(req, res, cfg, sessionStore, tierEntries, publicModels, registry2, routerOptions) {
|
|
2163
|
-
|
|
2279
|
+
async function proxyChat(req, res, cfg, runtimeLimits, sessionStore, tierEntries, publicModels, registry2, routerOptions) {
|
|
2280
|
+
let rawBody;
|
|
2281
|
+
try {
|
|
2282
|
+
rawBody = await readBody(req, runtimeLimits);
|
|
2283
|
+
} catch (error) {
|
|
2284
|
+
if (isBodyReadError(error)) {
|
|
2285
|
+
writeBodyReadErrorAndCloseRequest(req, res, error);
|
|
2286
|
+
return;
|
|
2287
|
+
}
|
|
2288
|
+
throw error;
|
|
2289
|
+
}
|
|
2164
2290
|
let body;
|
|
2165
2291
|
try {
|
|
2166
2292
|
body = JSON.parse(rawBody);
|
|
@@ -2205,61 +2331,129 @@ async function proxyChat(req, res, cfg, sessionStore, tierEntries, publicModels,
|
|
|
2205
2331
|
);
|
|
2206
2332
|
return;
|
|
2207
2333
|
}
|
|
2208
|
-
const
|
|
2209
|
-
|
|
2210
|
-
|
|
2211
|
-
|
|
2212
|
-
|
|
2213
|
-
|
|
2334
|
+
const requestController = new AbortController();
|
|
2335
|
+
let responseFinished = false;
|
|
2336
|
+
const abortUpstreamRequest = () => {
|
|
2337
|
+
if (responseFinished || requestController.signal.aborted) return;
|
|
2338
|
+
requestController.abort();
|
|
2339
|
+
};
|
|
2340
|
+
const onRequestAborted = () => {
|
|
2341
|
+
abortUpstreamRequest();
|
|
2342
|
+
};
|
|
2343
|
+
const onResponseClose = () => {
|
|
2344
|
+
abortUpstreamRequest();
|
|
2345
|
+
};
|
|
2346
|
+
const onResponseFinish = () => {
|
|
2347
|
+
responseFinished = true;
|
|
2348
|
+
};
|
|
2349
|
+
const cleanupRequestAbortListeners = () => {
|
|
2350
|
+
req.off("aborted", onRequestAborted);
|
|
2351
|
+
res.off("close", onResponseClose);
|
|
2352
|
+
res.off("finish", onResponseFinish);
|
|
2353
|
+
};
|
|
2354
|
+
req.on("aborted", onRequestAborted);
|
|
2355
|
+
res.on("close", onResponseClose);
|
|
2356
|
+
res.on("finish", onResponseFinish);
|
|
2357
|
+
let attempt;
|
|
2358
|
+
try {
|
|
2359
|
+
attempt = await fetchUpstream(
|
|
2360
|
+
cfg,
|
|
2361
|
+
req,
|
|
2362
|
+
bodyObj,
|
|
2363
|
+
physicalModel.id,
|
|
2364
|
+
runtimeLimits,
|
|
2365
|
+
requestController.signal
|
|
2366
|
+
);
|
|
2367
|
+
const attempts = [
|
|
2368
|
+
attempt.ok ? { model: selected.actualModel, status: "success" } : {
|
|
2369
|
+
model: selected.actualModel,
|
|
2370
|
+
status: "error",
|
|
2371
|
+
error: attempt.reason === "timeout" ? "upstream_timeout" : attempt.reason === "network_error" ? "network_error" : attempt.reason === "aborted" ? "client_aborted" : `upstream_http_${attempt.response.status}`
|
|
2372
|
+
}
|
|
2373
|
+
];
|
|
2374
|
+
const finalTier = selected.tier;
|
|
2375
|
+
let sessionAction = selected.sessionAction;
|
|
2376
|
+
if (!attempt.ok && attempt.reason === "aborted") {
|
|
2377
|
+
emitProxyTrace(
|
|
2378
|
+
cfg,
|
|
2379
|
+
selected,
|
|
2380
|
+
finalTier,
|
|
2381
|
+
attempts,
|
|
2382
|
+
sessionAction,
|
|
2383
|
+
true
|
|
2384
|
+
);
|
|
2385
|
+
return;
|
|
2214
2386
|
}
|
|
2215
|
-
|
|
2216
|
-
|
|
2217
|
-
|
|
2218
|
-
|
|
2219
|
-
|
|
2387
|
+
if (!attempt.ok && attempt.reason === "timeout") {
|
|
2388
|
+
const trace2 = emitProxyTrace(
|
|
2389
|
+
cfg,
|
|
2390
|
+
selected,
|
|
2391
|
+
finalTier,
|
|
2392
|
+
attempts,
|
|
2393
|
+
sessionAction,
|
|
2394
|
+
true
|
|
2395
|
+
);
|
|
2396
|
+
const headers2 = buildPublicHeaders(cfg, selected, finalTier, trace2);
|
|
2397
|
+
writeOpenAiError(
|
|
2398
|
+
res,
|
|
2399
|
+
504,
|
|
2400
|
+
"Upstream request timed out",
|
|
2401
|
+
"invalid_request_error",
|
|
2402
|
+
null,
|
|
2403
|
+
headers2
|
|
2404
|
+
);
|
|
2405
|
+
return;
|
|
2406
|
+
}
|
|
2407
|
+
if (!attempt.ok && attempt.reason === "network_error") {
|
|
2408
|
+
const trace2 = emitProxyTrace(
|
|
2409
|
+
cfg,
|
|
2410
|
+
selected,
|
|
2411
|
+
finalTier,
|
|
2412
|
+
attempts,
|
|
2413
|
+
sessionAction,
|
|
2414
|
+
true
|
|
2415
|
+
);
|
|
2416
|
+
const headers2 = buildPublicHeaders(cfg, selected, finalTier, trace2);
|
|
2417
|
+
writeOpenAiError(
|
|
2418
|
+
res,
|
|
2419
|
+
502,
|
|
2420
|
+
attempt.error instanceof Error ? attempt.error.message : "Upstream request failed",
|
|
2421
|
+
"invalid_request_error",
|
|
2422
|
+
null,
|
|
2423
|
+
headers2
|
|
2424
|
+
);
|
|
2425
|
+
return;
|
|
2426
|
+
}
|
|
2427
|
+
if (attempt.ok && selected.sessionId && !selected.explicit) {
|
|
2428
|
+
sessionStore.setSession(selected.sessionId, {
|
|
2429
|
+
physicalModelId: selected.actualModel,
|
|
2430
|
+
routedPublicModel: selected.routedModel,
|
|
2431
|
+
pinnedTier: finalTier
|
|
2432
|
+
});
|
|
2433
|
+
if (sessionAction === "none") {
|
|
2434
|
+
sessionAction = "set";
|
|
2435
|
+
}
|
|
2436
|
+
}
|
|
2437
|
+
const trace = emitProxyTrace(
|
|
2220
2438
|
cfg,
|
|
2221
2439
|
selected,
|
|
2222
2440
|
finalTier,
|
|
2223
2441
|
attempts,
|
|
2224
2442
|
sessionAction,
|
|
2225
|
-
|
|
2226
|
-
);
|
|
2227
|
-
const headers2 = buildPublicHeaders(cfg, selected, finalTier, trace2);
|
|
2228
|
-
writeOpenAiError(
|
|
2229
|
-
res,
|
|
2230
|
-
502,
|
|
2231
|
-
attempt.error instanceof Error ? attempt.error.message : "Upstream request failed",
|
|
2232
|
-
"invalid_request_error",
|
|
2233
|
-
null,
|
|
2234
|
-
headers2
|
|
2443
|
+
!attempt.ok
|
|
2235
2444
|
);
|
|
2236
|
-
|
|
2237
|
-
|
|
2238
|
-
|
|
2239
|
-
|
|
2240
|
-
|
|
2241
|
-
routedPublicModel: selected.routedModel,
|
|
2242
|
-
pinnedTier: finalTier
|
|
2243
|
-
});
|
|
2244
|
-
if (sessionAction === "none") {
|
|
2245
|
-
sessionAction = "set";
|
|
2445
|
+
const headers = buildPublicHeaders(cfg, selected, finalTier, trace);
|
|
2446
|
+
const responseHeaders = copyResponseHeaders(attempt.response, headers);
|
|
2447
|
+
res.statusCode = attempt.response.status;
|
|
2448
|
+
for (const [k, v] of Object.entries(responseHeaders)) {
|
|
2449
|
+
res.setHeader(k, v);
|
|
2246
2450
|
}
|
|
2451
|
+
await streamResponse(attempt.response, res);
|
|
2452
|
+
responseFinished = true;
|
|
2453
|
+
} finally {
|
|
2454
|
+
cleanupRequestAbortListeners();
|
|
2455
|
+
attempt?.cleanup();
|
|
2247
2456
|
}
|
|
2248
|
-
const trace = emitProxyTrace(
|
|
2249
|
-
cfg,
|
|
2250
|
-
selected,
|
|
2251
|
-
finalTier,
|
|
2252
|
-
attempts,
|
|
2253
|
-
sessionAction,
|
|
2254
|
-
!attempt.ok
|
|
2255
|
-
);
|
|
2256
|
-
const headers = buildPublicHeaders(cfg, selected, finalTier, trace);
|
|
2257
|
-
const responseHeaders = copyResponseHeaders(attempt.response, headers);
|
|
2258
|
-
res.statusCode = attempt.response.status;
|
|
2259
|
-
for (const [k, v] of Object.entries(responseHeaders)) {
|
|
2260
|
-
res.setHeader(k, v);
|
|
2261
|
-
}
|
|
2262
|
-
await streamResponse(attempt.response, res);
|
|
2263
2457
|
}
|
|
2264
2458
|
async function startProxy(options) {
|
|
2265
2459
|
const cfg = resolveConfig({
|
|
@@ -2271,6 +2465,7 @@ async function startProxy(options) {
|
|
|
2271
2465
|
traceLogger: options.traceLogger,
|
|
2272
2466
|
sessionPinning: options.session?.enabled
|
|
2273
2467
|
});
|
|
2468
|
+
const runtimeLimits = resolveRuntimeLimits(options.runtimeLimits);
|
|
2274
2469
|
const sessionStore = new SessionStore(options.session);
|
|
2275
2470
|
const publicModels = options.config.publicModels;
|
|
2276
2471
|
const tierEntries = options.config.routing.tiers;
|
|
@@ -2299,6 +2494,7 @@ async function startProxy(options) {
|
|
|
2299
2494
|
req,
|
|
2300
2495
|
res,
|
|
2301
2496
|
cfg,
|
|
2497
|
+
runtimeLimits,
|
|
2302
2498
|
sessionStore,
|
|
2303
2499
|
tierEntries,
|
|
2304
2500
|
publicModels,
|