@yzj01/llm-router 1.0.0 → 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -130,6 +130,20 @@ Unsupported model IDs return HTTP 400. Supported request IDs are currently only
130
130
  `auto`. Configured aliases remain internal routing outputs and are surfaced via
131
131
  response headers, not accepted in request bodies.
132
132
 
133
+ ## Runtime Protections
134
+
135
+ The local proxy applies conservative runtime limits by default. These limits are
136
+ internal defaults rather than public config fields:
137
+
138
+ | Limit | Default | Behavior |
139
+ | --- | ---: | --- |
140
+ | Request body size | 10 MB | Returns `413 Payload Too Large` before forwarding upstream. |
141
+ | Request body read time | 30 s | Returns `408 Request Timeout` if the client does not finish sending the body. |
142
+ | Upstream request time | 300 s | Aborts the upstream request and returns `504 Gateway Timeout`. |
143
+
144
+ If the client disconnects before the upstream call completes, the proxy aborts
145
+ the upstream request to avoid keeping stale work running.
146
+
133
147
  ## Response Headers
134
148
 
135
149
  The proxy adds routing headers:
package/dist/cli.d.ts CHANGED
@@ -1,4 +1,4 @@
1
- import { P as ProxyOptions, a as ProxyHandle } from './proxy-CrRX9deF.js';
1
+ import { P as ProxyOptions, a as ProxyHandle } from './proxy-Co87mKak.js';
2
2
 
3
3
  type CliRuntime = {
4
4
  log: (msg: string) => void;
package/dist/cli.js CHANGED
@@ -1792,7 +1792,7 @@ function hashHex(value, length) {
1792
1792
  }
1793
1793
 
1794
1794
  // src/proxy.ts
1795
- var VERSION = "1.0.0";
1795
+ var VERSION = "1.0.1";
1796
1796
  var HOP_BY_HOP = /* @__PURE__ */ new Set([
1797
1797
  "connection",
1798
1798
  "keep-alive",
@@ -1806,17 +1806,85 @@ var HOP_BY_HOP = /* @__PURE__ */ new Set([
1806
1806
  "content-length"
1807
1807
  ]);
1808
1808
  var RETRYABLE_STATUS = /* @__PURE__ */ new Set([429, 500, 502, 503, 504]);
1809
+ var DEFAULT_MAX_BODY_BYTES = 10 * 1024 * 1024;
1810
+ var DEFAULT_BODY_READ_TIMEOUT_MS = 3e4;
1811
+ var DEFAULT_UPSTREAM_REQUEST_TIMEOUT_MS = 3e5;
1812
+ var DEFAULT_RUNTIME_LIMITS = {
1813
+ maxBodyBytes: DEFAULT_MAX_BODY_BYTES,
1814
+ bodyReadTimeoutMs: DEFAULT_BODY_READ_TIMEOUT_MS,
1815
+ upstreamRequestTimeoutMs: DEFAULT_UPSTREAM_REQUEST_TIMEOUT_MS
1816
+ };
1817
+ function resolveRuntimeLimits(input) {
1818
+ return {
1819
+ maxBodyBytes: input?.maxBodyBytes ?? DEFAULT_RUNTIME_LIMITS.maxBodyBytes,
1820
+ bodyReadTimeoutMs: input?.bodyReadTimeoutMs ?? DEFAULT_RUNTIME_LIMITS.bodyReadTimeoutMs,
1821
+ upstreamRequestTimeoutMs: input?.upstreamRequestTimeoutMs ?? DEFAULT_RUNTIME_LIMITS.upstreamRequestTimeoutMs
1822
+ };
1823
+ }
1809
1824
  var REQUESTABLE_PUBLIC_MODELS = /* @__PURE__ */ new Set(["auto"]);
1810
1825
  var PUBLIC_HEADER_PREFIXES = ["x-xy-router-"];
1811
- function readBody(req) {
1826
+ var BodyReadError = class extends Error {
1827
+ statusCode;
1828
+ constructor(statusCode, message) {
1829
+ super(message);
1830
+ this.name = "BodyReadError";
1831
+ this.statusCode = statusCode;
1832
+ }
1833
+ };
1834
+ function isBodyReadError(error) {
1835
+ return error instanceof BodyReadError;
1836
+ }
1837
+ function writeBodyReadErrorAndCloseRequest(req, res, error) {
1838
+ res.shouldKeepAlive = false;
1839
+ res.setHeader("connection", "close");
1840
+ res.once("finish", () => {
1841
+ if (!req.destroyed) {
1842
+ req.destroy();
1843
+ }
1844
+ });
1845
+ writeOpenAiError(res, error.statusCode, error.message);
1846
+ }
1847
+ function readBody(req, limits) {
1812
1848
  return new Promise((resolve, reject) => {
1813
1849
  let body = "";
1850
+ let bodyBytes = 0;
1851
+ let settled = false;
1814
1852
  req.setEncoding("utf8");
1815
- req.on("data", (chunk) => {
1853
+ const timeout = setTimeout(() => {
1854
+ rejectOnce(new BodyReadError(408, "Request body read timeout"));
1855
+ }, limits.bodyReadTimeoutMs);
1856
+ const cleanup = () => {
1857
+ clearTimeout(timeout);
1858
+ req.off("data", onData);
1859
+ req.off("end", onEnd);
1860
+ req.off("error", onError);
1861
+ };
1862
+ const rejectOnce = (error) => {
1863
+ if (settled) return;
1864
+ settled = true;
1865
+ cleanup();
1866
+ reject(error);
1867
+ };
1868
+ const resolveOnce = (value) => {
1869
+ if (settled) return;
1870
+ settled = true;
1871
+ cleanup();
1872
+ resolve(value);
1873
+ };
1874
+ const onData = (chunk) => {
1875
+ if (settled) return;
1876
+ bodyBytes += Buffer.byteLength(chunk, "utf8");
1877
+ if (bodyBytes > limits.maxBodyBytes) {
1878
+ rejectOnce(new BodyReadError(413, "Payload Too Large"));
1879
+ return;
1880
+ }
1816
1881
  body += chunk;
1817
- });
1818
- req.on("end", () => resolve(body));
1819
- req.on("error", reject);
1882
+ };
1883
+ const onEnd = () => resolveOnce(body);
1884
+ const onError = (error) => rejectOnce(error);
1885
+ req.on("data", onData);
1886
+ req.on("end", onEnd);
1887
+ req.on("error", onError);
1820
1888
  });
1821
1889
  }
1822
1890
  var OPENCLAW_CLI_TURN_PATTERN = /(?:^|\n)\[[^\]\n]+?\]\s+([\s\S]*?)(?=(?:\n\[[^\]\n]+?\]\s+)|$)/g;
@@ -1931,19 +1999,68 @@ function writeOpenAiError(res, status, message, type = "invalid_request_error",
1931
1999
  }
1932
2000
  });
1933
2001
  }
1934
- async function fetchUpstream(cfg, req, body, actualModel) {
2002
+ function anySignal(signals) {
2003
+ const controller = new AbortController();
2004
+ const listeners = [];
2005
+ let cleaned = false;
2006
+ const cleanup = () => {
2007
+ if (cleaned) return;
2008
+ cleaned = true;
2009
+ for (const { signal, listener } of listeners) {
2010
+ signal.removeEventListener("abort", listener);
2011
+ }
2012
+ };
2013
+ const abortFrom = (signal) => {
2014
+ if (!controller.signal.aborted) {
2015
+ controller.abort(signal.reason);
2016
+ }
2017
+ cleanup();
2018
+ };
2019
+ for (const signal of signals) {
2020
+ if (signal.aborted) {
2021
+ abortFrom(signal);
2022
+ return { signal: controller.signal, cleanup };
2023
+ }
2024
+ }
2025
+ for (const signal of signals) {
2026
+ const listener = () => {
2027
+ abortFrom(signal);
2028
+ };
2029
+ listeners.push({ signal, listener });
2030
+ signal.addEventListener("abort", listener, { once: true });
2031
+ }
2032
+ return { signal: controller.signal, cleanup };
2033
+ }
2034
+ async function fetchUpstream(cfg, req, body, actualModel, runtimeLimits, requestSignal) {
2035
+ const timeoutController = new AbortController();
2036
+ const timeout = setTimeout(() => {
2037
+ timeoutController.abort();
2038
+ }, runtimeLimits.upstreamRequestTimeoutMs);
2039
+ const linkedSignal = anySignal([requestSignal, timeoutController.signal]);
2040
+ const cleanup = () => {
2041
+ clearTimeout(timeout);
2042
+ linkedSignal.cleanup();
2043
+ };
1935
2044
  try {
1936
2045
  const response = await fetch(`${cfg.baseUrl}/chat/completions`, {
1937
2046
  method: "POST",
1938
2047
  headers: buildUpstreamHeaders(req, cfg),
1939
- body: JSON.stringify({ ...body, model: actualModel })
2048
+ body: JSON.stringify({ ...body, model: actualModel }),
2049
+ signal: linkedSignal.signal
1940
2050
  });
2051
+ clearTimeout(timeout);
1941
2052
  if (RETRYABLE_STATUS.has(response.status)) {
1942
- return { ok: false, reason: "retryable", response };
2053
+ return { ok: false, reason: "retryable", response, cleanup };
1943
2054
  }
1944
- return { ok: true, response };
2055
+ return { ok: true, response, cleanup };
1945
2056
  } catch (error) {
1946
- return { ok: false, reason: "network_error", error };
2057
+ if (timeoutController.signal.aborted) {
2058
+ return { ok: false, reason: "timeout", error, cleanup };
2059
+ }
2060
+ if (requestSignal.aborted) {
2061
+ return { ok: false, reason: "aborted", error, cleanup };
2062
+ }
2063
+ return { ok: false, reason: "network_error", error, cleanup };
1947
2064
  }
1948
2065
  }
1949
2066
  function getMaxOutputTokens(body) {
@@ -2159,8 +2276,17 @@ function chooseModel(requestedModel, body, headers, sessionStore, cfg, tierEntri
2159
2276
  sessionAction: "none"
2160
2277
  };
2161
2278
  }
2162
- async function proxyChat(req, res, cfg, sessionStore, tierEntries, publicModels, registry2, routerOptions) {
2163
- const rawBody = await readBody(req);
2279
+ async function proxyChat(req, res, cfg, runtimeLimits, sessionStore, tierEntries, publicModels, registry2, routerOptions) {
2280
+ let rawBody;
2281
+ try {
2282
+ rawBody = await readBody(req, runtimeLimits);
2283
+ } catch (error) {
2284
+ if (isBodyReadError(error)) {
2285
+ writeBodyReadErrorAndCloseRequest(req, res, error);
2286
+ return;
2287
+ }
2288
+ throw error;
2289
+ }
2164
2290
  let body;
2165
2291
  try {
2166
2292
  body = JSON.parse(rawBody);
@@ -2205,61 +2331,129 @@ async function proxyChat(req, res, cfg, sessionStore, tierEntries, publicModels,
2205
2331
  );
2206
2332
  return;
2207
2333
  }
2208
- const attempt = await fetchUpstream(cfg, req, bodyObj, physicalModel.id);
2209
- const attempts = [
2210
- attempt.ok ? { model: selected.actualModel, status: "success" } : {
2211
- model: selected.actualModel,
2212
- status: "error",
2213
- error: attempt.reason === "network_error" ? "network_error" : `upstream_http_${attempt.response.status}`
2334
+ const requestController = new AbortController();
2335
+ let responseFinished = false;
2336
+ const abortUpstreamRequest = () => {
2337
+ if (responseFinished || requestController.signal.aborted) return;
2338
+ requestController.abort();
2339
+ };
2340
+ const onRequestAborted = () => {
2341
+ abortUpstreamRequest();
2342
+ };
2343
+ const onResponseClose = () => {
2344
+ abortUpstreamRequest();
2345
+ };
2346
+ const onResponseFinish = () => {
2347
+ responseFinished = true;
2348
+ };
2349
+ const cleanupRequestAbortListeners = () => {
2350
+ req.off("aborted", onRequestAborted);
2351
+ res.off("close", onResponseClose);
2352
+ res.off("finish", onResponseFinish);
2353
+ };
2354
+ req.on("aborted", onRequestAborted);
2355
+ res.on("close", onResponseClose);
2356
+ res.on("finish", onResponseFinish);
2357
+ let attempt;
2358
+ try {
2359
+ attempt = await fetchUpstream(
2360
+ cfg,
2361
+ req,
2362
+ bodyObj,
2363
+ physicalModel.id,
2364
+ runtimeLimits,
2365
+ requestController.signal
2366
+ );
2367
+ const attempts = [
2368
+ attempt.ok ? { model: selected.actualModel, status: "success" } : {
2369
+ model: selected.actualModel,
2370
+ status: "error",
2371
+ error: attempt.reason === "timeout" ? "upstream_timeout" : attempt.reason === "network_error" ? "network_error" : attempt.reason === "aborted" ? "client_aborted" : `upstream_http_${attempt.response.status}`
2372
+ }
2373
+ ];
2374
+ const finalTier = selected.tier;
2375
+ let sessionAction = selected.sessionAction;
2376
+ if (!attempt.ok && attempt.reason === "aborted") {
2377
+ emitProxyTrace(
2378
+ cfg,
2379
+ selected,
2380
+ finalTier,
2381
+ attempts,
2382
+ sessionAction,
2383
+ true
2384
+ );
2385
+ return;
2214
2386
  }
2215
- ];
2216
- const finalTier = selected.tier;
2217
- let sessionAction = selected.sessionAction;
2218
- if (!attempt.ok && attempt.reason === "network_error") {
2219
- const trace2 = emitProxyTrace(
2387
+ if (!attempt.ok && attempt.reason === "timeout") {
2388
+ const trace2 = emitProxyTrace(
2389
+ cfg,
2390
+ selected,
2391
+ finalTier,
2392
+ attempts,
2393
+ sessionAction,
2394
+ true
2395
+ );
2396
+ const headers2 = buildPublicHeaders(cfg, selected, finalTier, trace2);
2397
+ writeOpenAiError(
2398
+ res,
2399
+ 504,
2400
+ "Upstream request timed out",
2401
+ "invalid_request_error",
2402
+ null,
2403
+ headers2
2404
+ );
2405
+ return;
2406
+ }
2407
+ if (!attempt.ok && attempt.reason === "network_error") {
2408
+ const trace2 = emitProxyTrace(
2409
+ cfg,
2410
+ selected,
2411
+ finalTier,
2412
+ attempts,
2413
+ sessionAction,
2414
+ true
2415
+ );
2416
+ const headers2 = buildPublicHeaders(cfg, selected, finalTier, trace2);
2417
+ writeOpenAiError(
2418
+ res,
2419
+ 502,
2420
+ attempt.error instanceof Error ? attempt.error.message : "Upstream request failed",
2421
+ "invalid_request_error",
2422
+ null,
2423
+ headers2
2424
+ );
2425
+ return;
2426
+ }
2427
+ if (attempt.ok && selected.sessionId && !selected.explicit) {
2428
+ sessionStore.setSession(selected.sessionId, {
2429
+ physicalModelId: selected.actualModel,
2430
+ routedPublicModel: selected.routedModel,
2431
+ pinnedTier: finalTier
2432
+ });
2433
+ if (sessionAction === "none") {
2434
+ sessionAction = "set";
2435
+ }
2436
+ }
2437
+ const trace = emitProxyTrace(
2220
2438
  cfg,
2221
2439
  selected,
2222
2440
  finalTier,
2223
2441
  attempts,
2224
2442
  sessionAction,
2225
- true
2226
- );
2227
- const headers2 = buildPublicHeaders(cfg, selected, finalTier, trace2);
2228
- writeOpenAiError(
2229
- res,
2230
- 502,
2231
- attempt.error instanceof Error ? attempt.error.message : "Upstream request failed",
2232
- "invalid_request_error",
2233
- null,
2234
- headers2
2443
+ !attempt.ok
2235
2444
  );
2236
- return;
2237
- }
2238
- if (attempt.ok && selected.sessionId && !selected.explicit) {
2239
- sessionStore.setSession(selected.sessionId, {
2240
- physicalModelId: selected.actualModel,
2241
- routedPublicModel: selected.routedModel,
2242
- pinnedTier: finalTier
2243
- });
2244
- if (sessionAction === "none") {
2245
- sessionAction = "set";
2445
+ const headers = buildPublicHeaders(cfg, selected, finalTier, trace);
2446
+ const responseHeaders = copyResponseHeaders(attempt.response, headers);
2447
+ res.statusCode = attempt.response.status;
2448
+ for (const [k, v] of Object.entries(responseHeaders)) {
2449
+ res.setHeader(k, v);
2246
2450
  }
2451
+ await streamResponse(attempt.response, res);
2452
+ responseFinished = true;
2453
+ } finally {
2454
+ cleanupRequestAbortListeners();
2455
+ attempt?.cleanup();
2247
2456
  }
2248
- const trace = emitProxyTrace(
2249
- cfg,
2250
- selected,
2251
- finalTier,
2252
- attempts,
2253
- sessionAction,
2254
- !attempt.ok
2255
- );
2256
- const headers = buildPublicHeaders(cfg, selected, finalTier, trace);
2257
- const responseHeaders = copyResponseHeaders(attempt.response, headers);
2258
- res.statusCode = attempt.response.status;
2259
- for (const [k, v] of Object.entries(responseHeaders)) {
2260
- res.setHeader(k, v);
2261
- }
2262
- await streamResponse(attempt.response, res);
2263
2457
  }
2264
2458
  async function startProxy(options) {
2265
2459
  const cfg = resolveConfig({
@@ -2271,6 +2465,7 @@ async function startProxy(options) {
2271
2465
  traceLogger: options.traceLogger,
2272
2466
  sessionPinning: options.session?.enabled
2273
2467
  });
2468
+ const runtimeLimits = resolveRuntimeLimits(options.runtimeLimits);
2274
2469
  const sessionStore = new SessionStore(options.session);
2275
2470
  const publicModels = options.config.publicModels;
2276
2471
  const tierEntries = options.config.routing.tiers;
@@ -2299,6 +2494,7 @@ async function startProxy(options) {
2299
2494
  req,
2300
2495
  res,
2301
2496
  cfg,
2497
+ runtimeLimits,
2302
2498
  sessionStore,
2303
2499
  tierEntries,
2304
2500
  publicModels,