@yzj01/llm-router 1.0.0 → 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.ts CHANGED
@@ -1,5 +1,5 @@
1
- import { T as Tier, R as RawConfig, b as PhysicalModel, P as ProxyOptions, a as ProxyHandle, c as TraceMode, d as TraceLogger, C as ConfigSource, e as PublicModelConfig } from './proxy-CrRX9deF.js';
2
- export { D as DEFAULT_SESSION_CONFIG, f as PublicModelMetadata, g as RouteTraceLog, S as SessionConfig, h as SessionEntry, i as SessionStats, j as SessionStore, k as TierEntry, l as TraceAttempt, m as TraceReason, n as TraceSessionAction, o as TraceSummaryInput, p as TraceWriter, V as VERSION, q as buildTraceSummary, r as deriveSessionId, s as emitRouteTrace, t as getPromptPreview, u as hashRequestContent, v as normalizeTraceMode, w as resolveTraceWriter, x as startProxy } from './proxy-CrRX9deF.js';
1
+ import { T as Tier, R as RawConfig, b as PhysicalModel, P as ProxyOptions, a as ProxyHandle, c as TraceMode, d as TraceLogger, C as ConfigSource, e as PublicModelConfig } from './proxy-Co87mKak.js';
2
+ export { D as DEFAULT_SESSION_CONFIG, f as PublicModelMetadata, g as RouteTraceLog, S as SessionConfig, h as SessionEntry, i as SessionStats, j as SessionStore, k as TierEntry, l as TraceAttempt, m as TraceReason, n as TraceSessionAction, o as TraceSummaryInput, p as TraceWriter, V as VERSION, q as buildTraceSummary, r as deriveSessionId, s as emitRouteTrace, t as getPromptPreview, u as hashRequestContent, v as normalizeTraceMode, w as resolveTraceWriter, x as startProxy } from './proxy-Co87mKak.js';
3
3
 
4
4
  /**
5
5
  * 路由层只关心 alias 的成本画像,因此这里的 key 是 public alias,而不是
package/dist/index.js CHANGED
@@ -1643,7 +1643,7 @@ function hashHex(value, length) {
1643
1643
  }
1644
1644
 
1645
1645
  // src/proxy.ts
1646
- var VERSION = "1.0.0";
1646
+ var VERSION = "1.0.1";
1647
1647
  var HOP_BY_HOP = /* @__PURE__ */ new Set([
1648
1648
  "connection",
1649
1649
  "keep-alive",
@@ -1657,17 +1657,85 @@ var HOP_BY_HOP = /* @__PURE__ */ new Set([
1657
1657
  "content-length"
1658
1658
  ]);
1659
1659
  var RETRYABLE_STATUS = /* @__PURE__ */ new Set([429, 500, 502, 503, 504]);
1660
+ var DEFAULT_MAX_BODY_BYTES = 10 * 1024 * 1024;
1661
+ var DEFAULT_BODY_READ_TIMEOUT_MS = 3e4;
1662
+ var DEFAULT_UPSTREAM_REQUEST_TIMEOUT_MS = 3e5;
1663
+ var DEFAULT_RUNTIME_LIMITS = {
1664
+ maxBodyBytes: DEFAULT_MAX_BODY_BYTES,
1665
+ bodyReadTimeoutMs: DEFAULT_BODY_READ_TIMEOUT_MS,
1666
+ upstreamRequestTimeoutMs: DEFAULT_UPSTREAM_REQUEST_TIMEOUT_MS
1667
+ };
1668
+ function resolveRuntimeLimits(input) {
1669
+ return {
1670
+ maxBodyBytes: input?.maxBodyBytes ?? DEFAULT_RUNTIME_LIMITS.maxBodyBytes,
1671
+ bodyReadTimeoutMs: input?.bodyReadTimeoutMs ?? DEFAULT_RUNTIME_LIMITS.bodyReadTimeoutMs,
1672
+ upstreamRequestTimeoutMs: input?.upstreamRequestTimeoutMs ?? DEFAULT_RUNTIME_LIMITS.upstreamRequestTimeoutMs
1673
+ };
1674
+ }
1660
1675
  var REQUESTABLE_PUBLIC_MODELS = /* @__PURE__ */ new Set(["auto"]);
1661
1676
  var PUBLIC_HEADER_PREFIXES = ["x-xy-router-"];
1662
- function readBody(req) {
1677
+ var BodyReadError = class extends Error {
1678
+ statusCode;
1679
+ constructor(statusCode, message) {
1680
+ super(message);
1681
+ this.name = "BodyReadError";
1682
+ this.statusCode = statusCode;
1683
+ }
1684
+ };
1685
+ function isBodyReadError(error) {
1686
+ return error instanceof BodyReadError;
1687
+ }
1688
+ function writeBodyReadErrorAndCloseRequest(req, res, error) {
1689
+ res.shouldKeepAlive = false;
1690
+ res.setHeader("connection", "close");
1691
+ res.once("finish", () => {
1692
+ if (!req.destroyed) {
1693
+ req.destroy();
1694
+ }
1695
+ });
1696
+ writeOpenAiError(res, error.statusCode, error.message);
1697
+ }
1698
+ function readBody(req, limits) {
1663
1699
  return new Promise((resolve, reject) => {
1664
1700
  let body = "";
1701
+ let bodyBytes = 0;
1702
+ let settled = false;
1665
1703
  req.setEncoding("utf8");
1666
- req.on("data", (chunk) => {
1704
+ const timeout = setTimeout(() => {
1705
+ rejectOnce(new BodyReadError(408, "Request body read timeout"));
1706
+ }, limits.bodyReadTimeoutMs);
1707
+ const cleanup = () => {
1708
+ clearTimeout(timeout);
1709
+ req.off("data", onData);
1710
+ req.off("end", onEnd);
1711
+ req.off("error", onError);
1712
+ };
1713
+ const rejectOnce = (error) => {
1714
+ if (settled) return;
1715
+ settled = true;
1716
+ cleanup();
1717
+ reject(error);
1718
+ };
1719
+ const resolveOnce = (value) => {
1720
+ if (settled) return;
1721
+ settled = true;
1722
+ cleanup();
1723
+ resolve(value);
1724
+ };
1725
+ const onData = (chunk) => {
1726
+ if (settled) return;
1727
+ bodyBytes += Buffer.byteLength(chunk, "utf8");
1728
+ if (bodyBytes > limits.maxBodyBytes) {
1729
+ rejectOnce(new BodyReadError(413, "Payload Too Large"));
1730
+ return;
1731
+ }
1667
1732
  body += chunk;
1668
- });
1669
- req.on("end", () => resolve(body));
1670
- req.on("error", reject);
1733
+ };
1734
+ const onEnd = () => resolveOnce(body);
1735
+ const onError = (error) => rejectOnce(error);
1736
+ req.on("data", onData);
1737
+ req.on("end", onEnd);
1738
+ req.on("error", onError);
1671
1739
  });
1672
1740
  }
1673
1741
  var OPENCLAW_CLI_TURN_PATTERN = /(?:^|\n)\[[^\]\n]+?\]\s+([\s\S]*?)(?=(?:\n\[[^\]\n]+?\]\s+)|$)/g;
@@ -1782,19 +1850,68 @@ function writeOpenAiError(res, status, message, type = "invalid_request_error",
1782
1850
  }
1783
1851
  });
1784
1852
  }
1785
- async function fetchUpstream(cfg, req, body, actualModel) {
1853
+ function anySignal(signals) {
1854
+ const controller = new AbortController();
1855
+ const listeners = [];
1856
+ let cleaned = false;
1857
+ const cleanup = () => {
1858
+ if (cleaned) return;
1859
+ cleaned = true;
1860
+ for (const { signal, listener } of listeners) {
1861
+ signal.removeEventListener("abort", listener);
1862
+ }
1863
+ };
1864
+ const abortFrom = (signal) => {
1865
+ if (!controller.signal.aborted) {
1866
+ controller.abort(signal.reason);
1867
+ }
1868
+ cleanup();
1869
+ };
1870
+ for (const signal of signals) {
1871
+ if (signal.aborted) {
1872
+ abortFrom(signal);
1873
+ return { signal: controller.signal, cleanup };
1874
+ }
1875
+ }
1876
+ for (const signal of signals) {
1877
+ const listener = () => {
1878
+ abortFrom(signal);
1879
+ };
1880
+ listeners.push({ signal, listener });
1881
+ signal.addEventListener("abort", listener, { once: true });
1882
+ }
1883
+ return { signal: controller.signal, cleanup };
1884
+ }
1885
+ async function fetchUpstream(cfg, req, body, actualModel, runtimeLimits, requestSignal) {
1886
+ const timeoutController = new AbortController();
1887
+ const timeout = setTimeout(() => {
1888
+ timeoutController.abort();
1889
+ }, runtimeLimits.upstreamRequestTimeoutMs);
1890
+ const linkedSignal = anySignal([requestSignal, timeoutController.signal]);
1891
+ const cleanup = () => {
1892
+ clearTimeout(timeout);
1893
+ linkedSignal.cleanup();
1894
+ };
1786
1895
  try {
1787
1896
  const response = await fetch(`${cfg.baseUrl}/chat/completions`, {
1788
1897
  method: "POST",
1789
1898
  headers: buildUpstreamHeaders(req, cfg),
1790
- body: JSON.stringify({ ...body, model: actualModel })
1899
+ body: JSON.stringify({ ...body, model: actualModel }),
1900
+ signal: linkedSignal.signal
1791
1901
  });
1902
+ clearTimeout(timeout);
1792
1903
  if (RETRYABLE_STATUS.has(response.status)) {
1793
- return { ok: false, reason: "retryable", response };
1904
+ return { ok: false, reason: "retryable", response, cleanup };
1794
1905
  }
1795
- return { ok: true, response };
1906
+ return { ok: true, response, cleanup };
1796
1907
  } catch (error) {
1797
- return { ok: false, reason: "network_error", error };
1908
+ if (timeoutController.signal.aborted) {
1909
+ return { ok: false, reason: "timeout", error, cleanup };
1910
+ }
1911
+ if (requestSignal.aborted) {
1912
+ return { ok: false, reason: "aborted", error, cleanup };
1913
+ }
1914
+ return { ok: false, reason: "network_error", error, cleanup };
1798
1915
  }
1799
1916
  }
1800
1917
  function getMaxOutputTokens(body) {
@@ -2010,8 +2127,17 @@ function chooseModel(requestedModel, body, headers, sessionStore, cfg, tierEntri
2010
2127
  sessionAction: "none"
2011
2128
  };
2012
2129
  }
2013
- async function proxyChat(req, res, cfg, sessionStore, tierEntries, publicModels, registry2, routerOptions) {
2014
- const rawBody = await readBody(req);
2130
+ async function proxyChat(req, res, cfg, runtimeLimits, sessionStore, tierEntries, publicModels, registry2, routerOptions) {
2131
+ let rawBody;
2132
+ try {
2133
+ rawBody = await readBody(req, runtimeLimits);
2134
+ } catch (error) {
2135
+ if (isBodyReadError(error)) {
2136
+ writeBodyReadErrorAndCloseRequest(req, res, error);
2137
+ return;
2138
+ }
2139
+ throw error;
2140
+ }
2015
2141
  let body;
2016
2142
  try {
2017
2143
  body = JSON.parse(rawBody);
@@ -2056,61 +2182,129 @@ async function proxyChat(req, res, cfg, sessionStore, tierEntries, publicModels,
2056
2182
  );
2057
2183
  return;
2058
2184
  }
2059
- const attempt = await fetchUpstream(cfg, req, bodyObj, physicalModel.id);
2060
- const attempts = [
2061
- attempt.ok ? { model: selected.actualModel, status: "success" } : {
2062
- model: selected.actualModel,
2063
- status: "error",
2064
- error: attempt.reason === "network_error" ? "network_error" : `upstream_http_${attempt.response.status}`
2185
+ const requestController = new AbortController();
2186
+ let responseFinished = false;
2187
+ const abortUpstreamRequest = () => {
2188
+ if (responseFinished || requestController.signal.aborted) return;
2189
+ requestController.abort();
2190
+ };
2191
+ const onRequestAborted = () => {
2192
+ abortUpstreamRequest();
2193
+ };
2194
+ const onResponseClose = () => {
2195
+ abortUpstreamRequest();
2196
+ };
2197
+ const onResponseFinish = () => {
2198
+ responseFinished = true;
2199
+ };
2200
+ const cleanupRequestAbortListeners = () => {
2201
+ req.off("aborted", onRequestAborted);
2202
+ res.off("close", onResponseClose);
2203
+ res.off("finish", onResponseFinish);
2204
+ };
2205
+ req.on("aborted", onRequestAborted);
2206
+ res.on("close", onResponseClose);
2207
+ res.on("finish", onResponseFinish);
2208
+ let attempt;
2209
+ try {
2210
+ attempt = await fetchUpstream(
2211
+ cfg,
2212
+ req,
2213
+ bodyObj,
2214
+ physicalModel.id,
2215
+ runtimeLimits,
2216
+ requestController.signal
2217
+ );
2218
+ const attempts = [
2219
+ attempt.ok ? { model: selected.actualModel, status: "success" } : {
2220
+ model: selected.actualModel,
2221
+ status: "error",
2222
+ error: attempt.reason === "timeout" ? "upstream_timeout" : attempt.reason === "network_error" ? "network_error" : attempt.reason === "aborted" ? "client_aborted" : `upstream_http_${attempt.response.status}`
2223
+ }
2224
+ ];
2225
+ const finalTier = selected.tier;
2226
+ let sessionAction = selected.sessionAction;
2227
+ if (!attempt.ok && attempt.reason === "aborted") {
2228
+ emitProxyTrace(
2229
+ cfg,
2230
+ selected,
2231
+ finalTier,
2232
+ attempts,
2233
+ sessionAction,
2234
+ true
2235
+ );
2236
+ return;
2065
2237
  }
2066
- ];
2067
- const finalTier = selected.tier;
2068
- let sessionAction = selected.sessionAction;
2069
- if (!attempt.ok && attempt.reason === "network_error") {
2070
- const trace2 = emitProxyTrace(
2238
+ if (!attempt.ok && attempt.reason === "timeout") {
2239
+ const trace2 = emitProxyTrace(
2240
+ cfg,
2241
+ selected,
2242
+ finalTier,
2243
+ attempts,
2244
+ sessionAction,
2245
+ true
2246
+ );
2247
+ const headers2 = buildPublicHeaders(cfg, selected, finalTier, trace2);
2248
+ writeOpenAiError(
2249
+ res,
2250
+ 504,
2251
+ "Upstream request timed out",
2252
+ "invalid_request_error",
2253
+ null,
2254
+ headers2
2255
+ );
2256
+ return;
2257
+ }
2258
+ if (!attempt.ok && attempt.reason === "network_error") {
2259
+ const trace2 = emitProxyTrace(
2260
+ cfg,
2261
+ selected,
2262
+ finalTier,
2263
+ attempts,
2264
+ sessionAction,
2265
+ true
2266
+ );
2267
+ const headers2 = buildPublicHeaders(cfg, selected, finalTier, trace2);
2268
+ writeOpenAiError(
2269
+ res,
2270
+ 502,
2271
+ attempt.error instanceof Error ? attempt.error.message : "Upstream request failed",
2272
+ "invalid_request_error",
2273
+ null,
2274
+ headers2
2275
+ );
2276
+ return;
2277
+ }
2278
+ if (attempt.ok && selected.sessionId && !selected.explicit) {
2279
+ sessionStore.setSession(selected.sessionId, {
2280
+ physicalModelId: selected.actualModel,
2281
+ routedPublicModel: selected.routedModel,
2282
+ pinnedTier: finalTier
2283
+ });
2284
+ if (sessionAction === "none") {
2285
+ sessionAction = "set";
2286
+ }
2287
+ }
2288
+ const trace = emitProxyTrace(
2071
2289
  cfg,
2072
2290
  selected,
2073
2291
  finalTier,
2074
2292
  attempts,
2075
2293
  sessionAction,
2076
- true
2077
- );
2078
- const headers2 = buildPublicHeaders(cfg, selected, finalTier, trace2);
2079
- writeOpenAiError(
2080
- res,
2081
- 502,
2082
- attempt.error instanceof Error ? attempt.error.message : "Upstream request failed",
2083
- "invalid_request_error",
2084
- null,
2085
- headers2
2294
+ !attempt.ok
2086
2295
  );
2087
- return;
2088
- }
2089
- if (attempt.ok && selected.sessionId && !selected.explicit) {
2090
- sessionStore.setSession(selected.sessionId, {
2091
- physicalModelId: selected.actualModel,
2092
- routedPublicModel: selected.routedModel,
2093
- pinnedTier: finalTier
2094
- });
2095
- if (sessionAction === "none") {
2096
- sessionAction = "set";
2296
+ const headers = buildPublicHeaders(cfg, selected, finalTier, trace);
2297
+ const responseHeaders = copyResponseHeaders(attempt.response, headers);
2298
+ res.statusCode = attempt.response.status;
2299
+ for (const [k, v] of Object.entries(responseHeaders)) {
2300
+ res.setHeader(k, v);
2097
2301
  }
2302
+ await streamResponse(attempt.response, res);
2303
+ responseFinished = true;
2304
+ } finally {
2305
+ cleanupRequestAbortListeners();
2306
+ attempt?.cleanup();
2098
2307
  }
2099
- const trace = emitProxyTrace(
2100
- cfg,
2101
- selected,
2102
- finalTier,
2103
- attempts,
2104
- sessionAction,
2105
- !attempt.ok
2106
- );
2107
- const headers = buildPublicHeaders(cfg, selected, finalTier, trace);
2108
- const responseHeaders = copyResponseHeaders(attempt.response, headers);
2109
- res.statusCode = attempt.response.status;
2110
- for (const [k, v] of Object.entries(responseHeaders)) {
2111
- res.setHeader(k, v);
2112
- }
2113
- await streamResponse(attempt.response, res);
2114
2308
  }
2115
2309
  async function startProxy(options) {
2116
2310
  const cfg = resolveConfig({
@@ -2122,6 +2316,7 @@ async function startProxy(options) {
2122
2316
  traceLogger: options.traceLogger,
2123
2317
  sessionPinning: options.session?.enabled
2124
2318
  });
2319
+ const runtimeLimits = resolveRuntimeLimits(options.runtimeLimits);
2125
2320
  const sessionStore = new SessionStore(options.session);
2126
2321
  const publicModels = options.config.publicModels;
2127
2322
  const tierEntries = options.config.routing.tiers;
@@ -2150,6 +2345,7 @@ async function startProxy(options) {
2150
2345
  req,
2151
2346
  res,
2152
2347
  cfg,
2348
+ runtimeLimits,
2153
2349
  sessionStore,
2154
2350
  tierEntries,
2155
2351
  publicModels,