@yzj01/llm-router 1.0.0 → 1.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -130,6 +130,20 @@ Unsupported model IDs return HTTP 400. Supported request IDs are currently only
130
130
  `auto`. Configured aliases remain internal routing outputs and are surfaced via
131
131
  response headers, not accepted in request bodies.
132
132
 
133
+ ## Runtime Protections
134
+
135
+ The local proxy applies conservative runtime limits by default. These limits are
136
+ internal defaults rather than public config fields:
137
+
138
+ | Limit | Default | Behavior |
139
+ | --- | ---: | --- |
140
+ | Request body size | 10 MB | Returns `413 Payload Too Large` before forwarding upstream. |
141
+ | Request body read time | 30 s | Returns `408 Request Timeout` if the client does not finish sending the body. |
142
+ | Upstream request time | 300 s | Aborts the upstream request and returns `504 Gateway Timeout`. |
143
+
144
+ If the client disconnects before the upstream call completes, the proxy aborts
145
+ the upstream request to avoid keeping stale work running.
146
+
133
147
  ## Response Headers
134
148
 
135
149
  The proxy adds routing headers:
package/dist/cli.d.ts CHANGED
@@ -1,4 +1,4 @@
1
- import { P as ProxyOptions, a as ProxyHandle } from './proxy-CrRX9deF.js';
1
+ import { P as ProxyOptions, a as ProxyHandle } from './proxy-CpR5cxND.js';
2
2
 
3
3
  type CliRuntime = {
4
4
  log: (msg: string) => void;
package/dist/cli.js CHANGED
@@ -1792,7 +1792,7 @@ function hashHex(value, length) {
1792
1792
  }
1793
1793
 
1794
1794
  // src/proxy.ts
1795
- var VERSION = "1.0.0";
1795
+ var VERSION = "1.0.2";
1796
1796
  var HOP_BY_HOP = /* @__PURE__ */ new Set([
1797
1797
  "connection",
1798
1798
  "keep-alive",
@@ -1806,24 +1806,91 @@ var HOP_BY_HOP = /* @__PURE__ */ new Set([
1806
1806
  "content-length"
1807
1807
  ]);
1808
1808
  var RETRYABLE_STATUS = /* @__PURE__ */ new Set([429, 500, 502, 503, 504]);
1809
+ var DEFAULT_MAX_BODY_BYTES = 10 * 1024 * 1024;
1810
+ var DEFAULT_BODY_READ_TIMEOUT_MS = 3e4;
1811
+ var DEFAULT_UPSTREAM_REQUEST_TIMEOUT_MS = 3e5;
1812
+ var DEFAULT_RUNTIME_LIMITS = {
1813
+ maxBodyBytes: DEFAULT_MAX_BODY_BYTES,
1814
+ bodyReadTimeoutMs: DEFAULT_BODY_READ_TIMEOUT_MS,
1815
+ upstreamRequestTimeoutMs: DEFAULT_UPSTREAM_REQUEST_TIMEOUT_MS
1816
+ };
1817
+ function resolveRuntimeLimits(input) {
1818
+ return {
1819
+ maxBodyBytes: input?.maxBodyBytes ?? DEFAULT_RUNTIME_LIMITS.maxBodyBytes,
1820
+ bodyReadTimeoutMs: input?.bodyReadTimeoutMs ?? DEFAULT_RUNTIME_LIMITS.bodyReadTimeoutMs,
1821
+ upstreamRequestTimeoutMs: input?.upstreamRequestTimeoutMs ?? DEFAULT_RUNTIME_LIMITS.upstreamRequestTimeoutMs
1822
+ };
1823
+ }
1809
1824
  var REQUESTABLE_PUBLIC_MODELS = /* @__PURE__ */ new Set(["auto"]);
1810
1825
  var PUBLIC_HEADER_PREFIXES = ["x-xy-router-"];
1811
- function readBody(req) {
1826
+ var BodyReadError = class extends Error {
1827
+ statusCode;
1828
+ constructor(statusCode, message) {
1829
+ super(message);
1830
+ this.name = "BodyReadError";
1831
+ this.statusCode = statusCode;
1832
+ }
1833
+ };
1834
+ function isBodyReadError(error) {
1835
+ return error instanceof BodyReadError;
1836
+ }
1837
+ function writeBodyReadErrorAndCloseRequest(req, res, error) {
1838
+ res.shouldKeepAlive = false;
1839
+ res.setHeader("connection", "close");
1840
+ res.once("finish", () => {
1841
+ if (!req.destroyed) {
1842
+ req.destroy();
1843
+ }
1844
+ });
1845
+ writeOpenAiError(res, error.statusCode, error.message);
1846
+ }
1847
+ function readBody(req, limits) {
1812
1848
  return new Promise((resolve, reject) => {
1813
1849
  let body = "";
1850
+ let bodyBytes = 0;
1851
+ let settled = false;
1814
1852
  req.setEncoding("utf8");
1815
- req.on("data", (chunk) => {
1853
+ const timeout = setTimeout(() => {
1854
+ rejectOnce(new BodyReadError(408, "Request body read timeout"));
1855
+ }, limits.bodyReadTimeoutMs);
1856
+ const cleanup = () => {
1857
+ clearTimeout(timeout);
1858
+ req.off("data", onData);
1859
+ req.off("end", onEnd);
1860
+ req.off("error", onError);
1861
+ };
1862
+ const rejectOnce = (error) => {
1863
+ if (settled) return;
1864
+ settled = true;
1865
+ cleanup();
1866
+ reject(error);
1867
+ };
1868
+ const resolveOnce = (value) => {
1869
+ if (settled) return;
1870
+ settled = true;
1871
+ cleanup();
1872
+ resolve(value);
1873
+ };
1874
+ const onData = (chunk) => {
1875
+ if (settled) return;
1876
+ bodyBytes += Buffer.byteLength(chunk, "utf8");
1877
+ if (bodyBytes > limits.maxBodyBytes) {
1878
+ rejectOnce(new BodyReadError(413, "Payload Too Large"));
1879
+ return;
1880
+ }
1816
1881
  body += chunk;
1817
- });
1818
- req.on("end", () => resolve(body));
1819
- req.on("error", reject);
1882
+ };
1883
+ const onEnd = () => resolveOnce(body);
1884
+ const onError = (error) => rejectOnce(error);
1885
+ req.on("data", onData);
1886
+ req.on("end", onEnd);
1887
+ req.on("error", onError);
1820
1888
  });
1821
1889
  }
1822
1890
  var OPENCLAW_CLI_TURN_PATTERN = /(?:^|\n)\[[^\]\n]+?\]\s+([\s\S]*?)(?=(?:\n\[[^\]\n]+?\]\s+)|$)/g;
1823
1891
  function extractRouteTextFromUserMessage(text) {
1824
- const matches = [...text.matchAll(OPENCLAW_CLI_TURN_PATTERN)];
1825
- const last = matches.at(-1)?.[1]?.trim();
1826
- return last || text;
1892
+ void OPENCLAW_CLI_TURN_PATTERN;
1893
+ return text;
1827
1894
  }
1828
1895
  function extractPrompt(messages) {
1829
1896
  const parts = [];
@@ -1931,19 +1998,68 @@ function writeOpenAiError(res, status, message, type = "invalid_request_error",
1931
1998
  }
1932
1999
  });
1933
2000
  }
1934
- async function fetchUpstream(cfg, req, body, actualModel) {
2001
+ function anySignal(signals) {
2002
+ const controller = new AbortController();
2003
+ const listeners = [];
2004
+ let cleaned = false;
2005
+ const cleanup = () => {
2006
+ if (cleaned) return;
2007
+ cleaned = true;
2008
+ for (const { signal, listener } of listeners) {
2009
+ signal.removeEventListener("abort", listener);
2010
+ }
2011
+ };
2012
+ const abortFrom = (signal) => {
2013
+ if (!controller.signal.aborted) {
2014
+ controller.abort(signal.reason);
2015
+ }
2016
+ cleanup();
2017
+ };
2018
+ for (const signal of signals) {
2019
+ if (signal.aborted) {
2020
+ abortFrom(signal);
2021
+ return { signal: controller.signal, cleanup };
2022
+ }
2023
+ }
2024
+ for (const signal of signals) {
2025
+ const listener = () => {
2026
+ abortFrom(signal);
2027
+ };
2028
+ listeners.push({ signal, listener });
2029
+ signal.addEventListener("abort", listener, { once: true });
2030
+ }
2031
+ return { signal: controller.signal, cleanup };
2032
+ }
2033
+ async function fetchUpstream(cfg, req, body, actualModel, runtimeLimits, requestSignal) {
2034
+ const timeoutController = new AbortController();
2035
+ const timeout = setTimeout(() => {
2036
+ timeoutController.abort();
2037
+ }, runtimeLimits.upstreamRequestTimeoutMs);
2038
+ const linkedSignal = anySignal([requestSignal, timeoutController.signal]);
2039
+ const cleanup = () => {
2040
+ clearTimeout(timeout);
2041
+ linkedSignal.cleanup();
2042
+ };
1935
2043
  try {
1936
2044
  const response = await fetch(`${cfg.baseUrl}/chat/completions`, {
1937
2045
  method: "POST",
1938
2046
  headers: buildUpstreamHeaders(req, cfg),
1939
- body: JSON.stringify({ ...body, model: actualModel })
2047
+ body: JSON.stringify({ ...body, model: actualModel }),
2048
+ signal: linkedSignal.signal
1940
2049
  });
2050
+ clearTimeout(timeout);
1941
2051
  if (RETRYABLE_STATUS.has(response.status)) {
1942
- return { ok: false, reason: "retryable", response };
2052
+ return { ok: false, reason: "retryable", response, cleanup };
1943
2053
  }
1944
- return { ok: true, response };
2054
+ return { ok: true, response, cleanup };
1945
2055
  } catch (error) {
1946
- return { ok: false, reason: "network_error", error };
2056
+ if (timeoutController.signal.aborted) {
2057
+ return { ok: false, reason: "timeout", error, cleanup };
2058
+ }
2059
+ if (requestSignal.aborted) {
2060
+ return { ok: false, reason: "aborted", error, cleanup };
2061
+ }
2062
+ return { ok: false, reason: "network_error", error, cleanup };
1947
2063
  }
1948
2064
  }
1949
2065
  function getMaxOutputTokens(body) {
@@ -2159,8 +2275,17 @@ function chooseModel(requestedModel, body, headers, sessionStore, cfg, tierEntri
2159
2275
  sessionAction: "none"
2160
2276
  };
2161
2277
  }
2162
- async function proxyChat(req, res, cfg, sessionStore, tierEntries, publicModels, registry2, routerOptions) {
2163
- const rawBody = await readBody(req);
2278
+ async function proxyChat(req, res, cfg, runtimeLimits, sessionStore, tierEntries, publicModels, registry2, routerOptions) {
2279
+ let rawBody;
2280
+ try {
2281
+ rawBody = await readBody(req, runtimeLimits);
2282
+ } catch (error) {
2283
+ if (isBodyReadError(error)) {
2284
+ writeBodyReadErrorAndCloseRequest(req, res, error);
2285
+ return;
2286
+ }
2287
+ throw error;
2288
+ }
2164
2289
  let body;
2165
2290
  try {
2166
2291
  body = JSON.parse(rawBody);
@@ -2205,61 +2330,129 @@ async function proxyChat(req, res, cfg, sessionStore, tierEntries, publicModels,
2205
2330
  );
2206
2331
  return;
2207
2332
  }
2208
- const attempt = await fetchUpstream(cfg, req, bodyObj, physicalModel.id);
2209
- const attempts = [
2210
- attempt.ok ? { model: selected.actualModel, status: "success" } : {
2211
- model: selected.actualModel,
2212
- status: "error",
2213
- error: attempt.reason === "network_error" ? "network_error" : `upstream_http_${attempt.response.status}`
2333
+ const requestController = new AbortController();
2334
+ let responseFinished = false;
2335
+ const abortUpstreamRequest = () => {
2336
+ if (responseFinished || requestController.signal.aborted) return;
2337
+ requestController.abort();
2338
+ };
2339
+ const onRequestAborted = () => {
2340
+ abortUpstreamRequest();
2341
+ };
2342
+ const onResponseClose = () => {
2343
+ abortUpstreamRequest();
2344
+ };
2345
+ const onResponseFinish = () => {
2346
+ responseFinished = true;
2347
+ };
2348
+ const cleanupRequestAbortListeners = () => {
2349
+ req.off("aborted", onRequestAborted);
2350
+ res.off("close", onResponseClose);
2351
+ res.off("finish", onResponseFinish);
2352
+ };
2353
+ req.on("aborted", onRequestAborted);
2354
+ res.on("close", onResponseClose);
2355
+ res.on("finish", onResponseFinish);
2356
+ let attempt;
2357
+ try {
2358
+ attempt = await fetchUpstream(
2359
+ cfg,
2360
+ req,
2361
+ bodyObj,
2362
+ physicalModel.id,
2363
+ runtimeLimits,
2364
+ requestController.signal
2365
+ );
2366
+ const attempts = [
2367
+ attempt.ok ? { model: selected.actualModel, status: "success" } : {
2368
+ model: selected.actualModel,
2369
+ status: "error",
2370
+ error: attempt.reason === "timeout" ? "upstream_timeout" : attempt.reason === "network_error" ? "network_error" : attempt.reason === "aborted" ? "client_aborted" : `upstream_http_${attempt.response.status}`
2371
+ }
2372
+ ];
2373
+ const finalTier = selected.tier;
2374
+ let sessionAction = selected.sessionAction;
2375
+ if (!attempt.ok && attempt.reason === "aborted") {
2376
+ emitProxyTrace(
2377
+ cfg,
2378
+ selected,
2379
+ finalTier,
2380
+ attempts,
2381
+ sessionAction,
2382
+ true
2383
+ );
2384
+ return;
2214
2385
  }
2215
- ];
2216
- const finalTier = selected.tier;
2217
- let sessionAction = selected.sessionAction;
2218
- if (!attempt.ok && attempt.reason === "network_error") {
2219
- const trace2 = emitProxyTrace(
2386
+ if (!attempt.ok && attempt.reason === "timeout") {
2387
+ const trace2 = emitProxyTrace(
2388
+ cfg,
2389
+ selected,
2390
+ finalTier,
2391
+ attempts,
2392
+ sessionAction,
2393
+ true
2394
+ );
2395
+ const headers2 = buildPublicHeaders(cfg, selected, finalTier, trace2);
2396
+ writeOpenAiError(
2397
+ res,
2398
+ 504,
2399
+ "Upstream request timed out",
2400
+ "invalid_request_error",
2401
+ null,
2402
+ headers2
2403
+ );
2404
+ return;
2405
+ }
2406
+ if (!attempt.ok && attempt.reason === "network_error") {
2407
+ const trace2 = emitProxyTrace(
2408
+ cfg,
2409
+ selected,
2410
+ finalTier,
2411
+ attempts,
2412
+ sessionAction,
2413
+ true
2414
+ );
2415
+ const headers2 = buildPublicHeaders(cfg, selected, finalTier, trace2);
2416
+ writeOpenAiError(
2417
+ res,
2418
+ 502,
2419
+ attempt.error instanceof Error ? attempt.error.message : "Upstream request failed",
2420
+ "invalid_request_error",
2421
+ null,
2422
+ headers2
2423
+ );
2424
+ return;
2425
+ }
2426
+ if (attempt.ok && selected.sessionId && !selected.explicit) {
2427
+ sessionStore.setSession(selected.sessionId, {
2428
+ physicalModelId: selected.actualModel,
2429
+ routedPublicModel: selected.routedModel,
2430
+ pinnedTier: finalTier
2431
+ });
2432
+ if (sessionAction === "none") {
2433
+ sessionAction = "set";
2434
+ }
2435
+ }
2436
+ const trace = emitProxyTrace(
2220
2437
  cfg,
2221
2438
  selected,
2222
2439
  finalTier,
2223
2440
  attempts,
2224
2441
  sessionAction,
2225
- true
2226
- );
2227
- const headers2 = buildPublicHeaders(cfg, selected, finalTier, trace2);
2228
- writeOpenAiError(
2229
- res,
2230
- 502,
2231
- attempt.error instanceof Error ? attempt.error.message : "Upstream request failed",
2232
- "invalid_request_error",
2233
- null,
2234
- headers2
2442
+ !attempt.ok
2235
2443
  );
2236
- return;
2237
- }
2238
- if (attempt.ok && selected.sessionId && !selected.explicit) {
2239
- sessionStore.setSession(selected.sessionId, {
2240
- physicalModelId: selected.actualModel,
2241
- routedPublicModel: selected.routedModel,
2242
- pinnedTier: finalTier
2243
- });
2244
- if (sessionAction === "none") {
2245
- sessionAction = "set";
2444
+ const headers = buildPublicHeaders(cfg, selected, finalTier, trace);
2445
+ const responseHeaders = copyResponseHeaders(attempt.response, headers);
2446
+ res.statusCode = attempt.response.status;
2447
+ for (const [k, v] of Object.entries(responseHeaders)) {
2448
+ res.setHeader(k, v);
2246
2449
  }
2450
+ await streamResponse(attempt.response, res);
2451
+ responseFinished = true;
2452
+ } finally {
2453
+ cleanupRequestAbortListeners();
2454
+ attempt?.cleanup();
2247
2455
  }
2248
- const trace = emitProxyTrace(
2249
- cfg,
2250
- selected,
2251
- finalTier,
2252
- attempts,
2253
- sessionAction,
2254
- !attempt.ok
2255
- );
2256
- const headers = buildPublicHeaders(cfg, selected, finalTier, trace);
2257
- const responseHeaders = copyResponseHeaders(attempt.response, headers);
2258
- res.statusCode = attempt.response.status;
2259
- for (const [k, v] of Object.entries(responseHeaders)) {
2260
- res.setHeader(k, v);
2261
- }
2262
- await streamResponse(attempt.response, res);
2263
2456
  }
2264
2457
  async function startProxy(options) {
2265
2458
  const cfg = resolveConfig({
@@ -2271,6 +2464,7 @@ async function startProxy(options) {
2271
2464
  traceLogger: options.traceLogger,
2272
2465
  sessionPinning: options.session?.enabled
2273
2466
  });
2467
+ const runtimeLimits = resolveRuntimeLimits(options.runtimeLimits);
2274
2468
  const sessionStore = new SessionStore(options.session);
2275
2469
  const publicModels = options.config.publicModels;
2276
2470
  const tierEntries = options.config.routing.tiers;
@@ -2299,6 +2493,7 @@ async function startProxy(options) {
2299
2493
  req,
2300
2494
  res,
2301
2495
  cfg,
2496
+ runtimeLimits,
2302
2497
  sessionStore,
2303
2498
  tierEntries,
2304
2499
  publicModels,