@yzj01/llm-router 1.0.0 → 1.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.ts CHANGED
@@ -1,5 +1,5 @@
1
- import { T as Tier, R as RawConfig, b as PhysicalModel, P as ProxyOptions, a as ProxyHandle, c as TraceMode, d as TraceLogger, C as ConfigSource, e as PublicModelConfig } from './proxy-CrRX9deF.js';
2
- export { D as DEFAULT_SESSION_CONFIG, f as PublicModelMetadata, g as RouteTraceLog, S as SessionConfig, h as SessionEntry, i as SessionStats, j as SessionStore, k as TierEntry, l as TraceAttempt, m as TraceReason, n as TraceSessionAction, o as TraceSummaryInput, p as TraceWriter, V as VERSION, q as buildTraceSummary, r as deriveSessionId, s as emitRouteTrace, t as getPromptPreview, u as hashRequestContent, v as normalizeTraceMode, w as resolveTraceWriter, x as startProxy } from './proxy-CrRX9deF.js';
1
+ import { T as Tier, R as RawConfig, b as PhysicalModel, P as ProxyOptions, a as ProxyHandle, c as TraceMode, d as TraceLogger, C as ConfigSource, e as PublicModelConfig } from './proxy-CpR5cxND.js';
2
+ export { D as DEFAULT_SESSION_CONFIG, f as PublicModelMetadata, g as RouteTraceLog, S as SessionConfig, h as SessionEntry, i as SessionStats, j as SessionStore, k as TierEntry, l as TraceAttempt, m as TraceReason, n as TraceSessionAction, o as TraceSummaryInput, p as TraceWriter, V as VERSION, q as buildTraceSummary, r as deriveSessionId, s as emitRouteTrace, t as getPromptPreview, u as hashRequestContent, v as normalizeTraceMode, w as resolveTraceWriter, x as startProxy } from './proxy-CpR5cxND.js';
3
3
 
4
4
  /**
5
5
  * 路由层只关心 alias 的成本画像,因此这里的 key 是 public alias,而不是
package/dist/index.js CHANGED
@@ -1643,7 +1643,7 @@ function hashHex(value, length) {
1643
1643
  }
1644
1644
 
1645
1645
  // src/proxy.ts
1646
- var VERSION = "1.0.0";
1646
+ var VERSION = "1.0.2";
1647
1647
  var HOP_BY_HOP = /* @__PURE__ */ new Set([
1648
1648
  "connection",
1649
1649
  "keep-alive",
@@ -1657,24 +1657,91 @@ var HOP_BY_HOP = /* @__PURE__ */ new Set([
1657
1657
  "content-length"
1658
1658
  ]);
1659
1659
  var RETRYABLE_STATUS = /* @__PURE__ */ new Set([429, 500, 502, 503, 504]);
1660
+ var DEFAULT_MAX_BODY_BYTES = 10 * 1024 * 1024;
1661
+ var DEFAULT_BODY_READ_TIMEOUT_MS = 3e4;
1662
+ var DEFAULT_UPSTREAM_REQUEST_TIMEOUT_MS = 3e5;
1663
+ var DEFAULT_RUNTIME_LIMITS = {
1664
+ maxBodyBytes: DEFAULT_MAX_BODY_BYTES,
1665
+ bodyReadTimeoutMs: DEFAULT_BODY_READ_TIMEOUT_MS,
1666
+ upstreamRequestTimeoutMs: DEFAULT_UPSTREAM_REQUEST_TIMEOUT_MS
1667
+ };
1668
+ function resolveRuntimeLimits(input) {
1669
+ return {
1670
+ maxBodyBytes: input?.maxBodyBytes ?? DEFAULT_RUNTIME_LIMITS.maxBodyBytes,
1671
+ bodyReadTimeoutMs: input?.bodyReadTimeoutMs ?? DEFAULT_RUNTIME_LIMITS.bodyReadTimeoutMs,
1672
+ upstreamRequestTimeoutMs: input?.upstreamRequestTimeoutMs ?? DEFAULT_RUNTIME_LIMITS.upstreamRequestTimeoutMs
1673
+ };
1674
+ }
1660
1675
  var REQUESTABLE_PUBLIC_MODELS = /* @__PURE__ */ new Set(["auto"]);
1661
1676
  var PUBLIC_HEADER_PREFIXES = ["x-xy-router-"];
1662
- function readBody(req) {
1677
+ var BodyReadError = class extends Error {
1678
+ statusCode;
1679
+ constructor(statusCode, message) {
1680
+ super(message);
1681
+ this.name = "BodyReadError";
1682
+ this.statusCode = statusCode;
1683
+ }
1684
+ };
1685
+ function isBodyReadError(error) {
1686
+ return error instanceof BodyReadError;
1687
+ }
1688
+ function writeBodyReadErrorAndCloseRequest(req, res, error) {
1689
+ res.shouldKeepAlive = false;
1690
+ res.setHeader("connection", "close");
1691
+ res.once("finish", () => {
1692
+ if (!req.destroyed) {
1693
+ req.destroy();
1694
+ }
1695
+ });
1696
+ writeOpenAiError(res, error.statusCode, error.message);
1697
+ }
1698
+ function readBody(req, limits) {
1663
1699
  return new Promise((resolve, reject) => {
1664
1700
  let body = "";
1701
+ let bodyBytes = 0;
1702
+ let settled = false;
1665
1703
  req.setEncoding("utf8");
1666
- req.on("data", (chunk) => {
1704
+ const timeout = setTimeout(() => {
1705
+ rejectOnce(new BodyReadError(408, "Request body read timeout"));
1706
+ }, limits.bodyReadTimeoutMs);
1707
+ const cleanup = () => {
1708
+ clearTimeout(timeout);
1709
+ req.off("data", onData);
1710
+ req.off("end", onEnd);
1711
+ req.off("error", onError);
1712
+ };
1713
+ const rejectOnce = (error) => {
1714
+ if (settled) return;
1715
+ settled = true;
1716
+ cleanup();
1717
+ reject(error);
1718
+ };
1719
+ const resolveOnce = (value) => {
1720
+ if (settled) return;
1721
+ settled = true;
1722
+ cleanup();
1723
+ resolve(value);
1724
+ };
1725
+ const onData = (chunk) => {
1726
+ if (settled) return;
1727
+ bodyBytes += Buffer.byteLength(chunk, "utf8");
1728
+ if (bodyBytes > limits.maxBodyBytes) {
1729
+ rejectOnce(new BodyReadError(413, "Payload Too Large"));
1730
+ return;
1731
+ }
1667
1732
  body += chunk;
1668
- });
1669
- req.on("end", () => resolve(body));
1670
- req.on("error", reject);
1733
+ };
1734
+ const onEnd = () => resolveOnce(body);
1735
+ const onError = (error) => rejectOnce(error);
1736
+ req.on("data", onData);
1737
+ req.on("end", onEnd);
1738
+ req.on("error", onError);
1671
1739
  });
1672
1740
  }
1673
1741
  var OPENCLAW_CLI_TURN_PATTERN = /(?:^|\n)\[[^\]\n]+?\]\s+([\s\S]*?)(?=(?:\n\[[^\]\n]+?\]\s+)|$)/g;
1674
1742
  function extractRouteTextFromUserMessage(text) {
1675
- const matches = [...text.matchAll(OPENCLAW_CLI_TURN_PATTERN)];
1676
- const last = matches.at(-1)?.[1]?.trim();
1677
- return last || text;
1743
+ void OPENCLAW_CLI_TURN_PATTERN;
1744
+ return text;
1678
1745
  }
1679
1746
  function extractPrompt(messages) {
1680
1747
  const parts = [];
@@ -1782,19 +1849,68 @@ function writeOpenAiError(res, status, message, type = "invalid_request_error",
1782
1849
  }
1783
1850
  });
1784
1851
  }
1785
- async function fetchUpstream(cfg, req, body, actualModel) {
1852
+ function anySignal(signals) {
1853
+ const controller = new AbortController();
1854
+ const listeners = [];
1855
+ let cleaned = false;
1856
+ const cleanup = () => {
1857
+ if (cleaned) return;
1858
+ cleaned = true;
1859
+ for (const { signal, listener } of listeners) {
1860
+ signal.removeEventListener("abort", listener);
1861
+ }
1862
+ };
1863
+ const abortFrom = (signal) => {
1864
+ if (!controller.signal.aborted) {
1865
+ controller.abort(signal.reason);
1866
+ }
1867
+ cleanup();
1868
+ };
1869
+ for (const signal of signals) {
1870
+ if (signal.aborted) {
1871
+ abortFrom(signal);
1872
+ return { signal: controller.signal, cleanup };
1873
+ }
1874
+ }
1875
+ for (const signal of signals) {
1876
+ const listener = () => {
1877
+ abortFrom(signal);
1878
+ };
1879
+ listeners.push({ signal, listener });
1880
+ signal.addEventListener("abort", listener, { once: true });
1881
+ }
1882
+ return { signal: controller.signal, cleanup };
1883
+ }
1884
+ async function fetchUpstream(cfg, req, body, actualModel, runtimeLimits, requestSignal) {
1885
+ const timeoutController = new AbortController();
1886
+ const timeout = setTimeout(() => {
1887
+ timeoutController.abort();
1888
+ }, runtimeLimits.upstreamRequestTimeoutMs);
1889
+ const linkedSignal = anySignal([requestSignal, timeoutController.signal]);
1890
+ const cleanup = () => {
1891
+ clearTimeout(timeout);
1892
+ linkedSignal.cleanup();
1893
+ };
1786
1894
  try {
1787
1895
  const response = await fetch(`${cfg.baseUrl}/chat/completions`, {
1788
1896
  method: "POST",
1789
1897
  headers: buildUpstreamHeaders(req, cfg),
1790
- body: JSON.stringify({ ...body, model: actualModel })
1898
+ body: JSON.stringify({ ...body, model: actualModel }),
1899
+ signal: linkedSignal.signal
1791
1900
  });
1901
+ clearTimeout(timeout);
1792
1902
  if (RETRYABLE_STATUS.has(response.status)) {
1793
- return { ok: false, reason: "retryable", response };
1903
+ return { ok: false, reason: "retryable", response, cleanup };
1794
1904
  }
1795
- return { ok: true, response };
1905
+ return { ok: true, response, cleanup };
1796
1906
  } catch (error) {
1797
- return { ok: false, reason: "network_error", error };
1907
+ if (timeoutController.signal.aborted) {
1908
+ return { ok: false, reason: "timeout", error, cleanup };
1909
+ }
1910
+ if (requestSignal.aborted) {
1911
+ return { ok: false, reason: "aborted", error, cleanup };
1912
+ }
1913
+ return { ok: false, reason: "network_error", error, cleanup };
1798
1914
  }
1799
1915
  }
1800
1916
  function getMaxOutputTokens(body) {
@@ -2010,8 +2126,17 @@ function chooseModel(requestedModel, body, headers, sessionStore, cfg, tierEntri
2010
2126
  sessionAction: "none"
2011
2127
  };
2012
2128
  }
2013
- async function proxyChat(req, res, cfg, sessionStore, tierEntries, publicModels, registry2, routerOptions) {
2014
- const rawBody = await readBody(req);
2129
+ async function proxyChat(req, res, cfg, runtimeLimits, sessionStore, tierEntries, publicModels, registry2, routerOptions) {
2130
+ let rawBody;
2131
+ try {
2132
+ rawBody = await readBody(req, runtimeLimits);
2133
+ } catch (error) {
2134
+ if (isBodyReadError(error)) {
2135
+ writeBodyReadErrorAndCloseRequest(req, res, error);
2136
+ return;
2137
+ }
2138
+ throw error;
2139
+ }
2015
2140
  let body;
2016
2141
  try {
2017
2142
  body = JSON.parse(rawBody);
@@ -2056,61 +2181,129 @@ async function proxyChat(req, res, cfg, sessionStore, tierEntries, publicModels,
2056
2181
  );
2057
2182
  return;
2058
2183
  }
2059
- const attempt = await fetchUpstream(cfg, req, bodyObj, physicalModel.id);
2060
- const attempts = [
2061
- attempt.ok ? { model: selected.actualModel, status: "success" } : {
2062
- model: selected.actualModel,
2063
- status: "error",
2064
- error: attempt.reason === "network_error" ? "network_error" : `upstream_http_${attempt.response.status}`
2184
+ const requestController = new AbortController();
2185
+ let responseFinished = false;
2186
+ const abortUpstreamRequest = () => {
2187
+ if (responseFinished || requestController.signal.aborted) return;
2188
+ requestController.abort();
2189
+ };
2190
+ const onRequestAborted = () => {
2191
+ abortUpstreamRequest();
2192
+ };
2193
+ const onResponseClose = () => {
2194
+ abortUpstreamRequest();
2195
+ };
2196
+ const onResponseFinish = () => {
2197
+ responseFinished = true;
2198
+ };
2199
+ const cleanupRequestAbortListeners = () => {
2200
+ req.off("aborted", onRequestAborted);
2201
+ res.off("close", onResponseClose);
2202
+ res.off("finish", onResponseFinish);
2203
+ };
2204
+ req.on("aborted", onRequestAborted);
2205
+ res.on("close", onResponseClose);
2206
+ res.on("finish", onResponseFinish);
2207
+ let attempt;
2208
+ try {
2209
+ attempt = await fetchUpstream(
2210
+ cfg,
2211
+ req,
2212
+ bodyObj,
2213
+ physicalModel.id,
2214
+ runtimeLimits,
2215
+ requestController.signal
2216
+ );
2217
+ const attempts = [
2218
+ attempt.ok ? { model: selected.actualModel, status: "success" } : {
2219
+ model: selected.actualModel,
2220
+ status: "error",
2221
+ error: attempt.reason === "timeout" ? "upstream_timeout" : attempt.reason === "network_error" ? "network_error" : attempt.reason === "aborted" ? "client_aborted" : `upstream_http_${attempt.response.status}`
2222
+ }
2223
+ ];
2224
+ const finalTier = selected.tier;
2225
+ let sessionAction = selected.sessionAction;
2226
+ if (!attempt.ok && attempt.reason === "aborted") {
2227
+ emitProxyTrace(
2228
+ cfg,
2229
+ selected,
2230
+ finalTier,
2231
+ attempts,
2232
+ sessionAction,
2233
+ true
2234
+ );
2235
+ return;
2065
2236
  }
2066
- ];
2067
- const finalTier = selected.tier;
2068
- let sessionAction = selected.sessionAction;
2069
- if (!attempt.ok && attempt.reason === "network_error") {
2070
- const trace2 = emitProxyTrace(
2237
+ if (!attempt.ok && attempt.reason === "timeout") {
2238
+ const trace2 = emitProxyTrace(
2239
+ cfg,
2240
+ selected,
2241
+ finalTier,
2242
+ attempts,
2243
+ sessionAction,
2244
+ true
2245
+ );
2246
+ const headers2 = buildPublicHeaders(cfg, selected, finalTier, trace2);
2247
+ writeOpenAiError(
2248
+ res,
2249
+ 504,
2250
+ "Upstream request timed out",
2251
+ "invalid_request_error",
2252
+ null,
2253
+ headers2
2254
+ );
2255
+ return;
2256
+ }
2257
+ if (!attempt.ok && attempt.reason === "network_error") {
2258
+ const trace2 = emitProxyTrace(
2259
+ cfg,
2260
+ selected,
2261
+ finalTier,
2262
+ attempts,
2263
+ sessionAction,
2264
+ true
2265
+ );
2266
+ const headers2 = buildPublicHeaders(cfg, selected, finalTier, trace2);
2267
+ writeOpenAiError(
2268
+ res,
2269
+ 502,
2270
+ attempt.error instanceof Error ? attempt.error.message : "Upstream request failed",
2271
+ "invalid_request_error",
2272
+ null,
2273
+ headers2
2274
+ );
2275
+ return;
2276
+ }
2277
+ if (attempt.ok && selected.sessionId && !selected.explicit) {
2278
+ sessionStore.setSession(selected.sessionId, {
2279
+ physicalModelId: selected.actualModel,
2280
+ routedPublicModel: selected.routedModel,
2281
+ pinnedTier: finalTier
2282
+ });
2283
+ if (sessionAction === "none") {
2284
+ sessionAction = "set";
2285
+ }
2286
+ }
2287
+ const trace = emitProxyTrace(
2071
2288
  cfg,
2072
2289
  selected,
2073
2290
  finalTier,
2074
2291
  attempts,
2075
2292
  sessionAction,
2076
- true
2077
- );
2078
- const headers2 = buildPublicHeaders(cfg, selected, finalTier, trace2);
2079
- writeOpenAiError(
2080
- res,
2081
- 502,
2082
- attempt.error instanceof Error ? attempt.error.message : "Upstream request failed",
2083
- "invalid_request_error",
2084
- null,
2085
- headers2
2293
+ !attempt.ok
2086
2294
  );
2087
- return;
2088
- }
2089
- if (attempt.ok && selected.sessionId && !selected.explicit) {
2090
- sessionStore.setSession(selected.sessionId, {
2091
- physicalModelId: selected.actualModel,
2092
- routedPublicModel: selected.routedModel,
2093
- pinnedTier: finalTier
2094
- });
2095
- if (sessionAction === "none") {
2096
- sessionAction = "set";
2295
+ const headers = buildPublicHeaders(cfg, selected, finalTier, trace);
2296
+ const responseHeaders = copyResponseHeaders(attempt.response, headers);
2297
+ res.statusCode = attempt.response.status;
2298
+ for (const [k, v] of Object.entries(responseHeaders)) {
2299
+ res.setHeader(k, v);
2097
2300
  }
2301
+ await streamResponse(attempt.response, res);
2302
+ responseFinished = true;
2303
+ } finally {
2304
+ cleanupRequestAbortListeners();
2305
+ attempt?.cleanup();
2098
2306
  }
2099
- const trace = emitProxyTrace(
2100
- cfg,
2101
- selected,
2102
- finalTier,
2103
- attempts,
2104
- sessionAction,
2105
- !attempt.ok
2106
- );
2107
- const headers = buildPublicHeaders(cfg, selected, finalTier, trace);
2108
- const responseHeaders = copyResponseHeaders(attempt.response, headers);
2109
- res.statusCode = attempt.response.status;
2110
- for (const [k, v] of Object.entries(responseHeaders)) {
2111
- res.setHeader(k, v);
2112
- }
2113
- await streamResponse(attempt.response, res);
2114
2307
  }
2115
2308
  async function startProxy(options) {
2116
2309
  const cfg = resolveConfig({
@@ -2122,6 +2315,7 @@ async function startProxy(options) {
2122
2315
  traceLogger: options.traceLogger,
2123
2316
  sessionPinning: options.session?.enabled
2124
2317
  });
2318
+ const runtimeLimits = resolveRuntimeLimits(options.runtimeLimits);
2125
2319
  const sessionStore = new SessionStore(options.session);
2126
2320
  const publicModels = options.config.publicModels;
2127
2321
  const tierEntries = options.config.routing.tiers;
@@ -2150,6 +2344,7 @@ async function startProxy(options) {
2150
2344
  req,
2151
2345
  res,
2152
2346
  cfg,
2347
+ runtimeLimits,
2153
2348
  sessionStore,
2154
2349
  tierEntries,
2155
2350
  publicModels,