github-router 0.3.36 → 0.3.37

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/main.js CHANGED
@@ -3055,10 +3055,11 @@ async function bridgeCall(endpoint, tool, args, timeoutMs, signal) {
3055
3055
  const id = randomUUID();
3056
3056
  const ws = new WebSocket(`ws://127.0.0.1:${endpoint.port}`, { headers: { authorization: `Bearer ${endpoint.token}` } });
3057
3057
  let settled = false;
3058
+ let timer = void 0;
3058
3059
  const finish = (fn) => {
3059
3060
  if (settled) return;
3060
3061
  settled = true;
3061
- clearTimeout(timer);
3062
+ if (timer !== void 0) clearTimeout(timer);
3062
3063
  if (signal) signal.removeEventListener("abort", onAbort);
3063
3064
  try {
3064
3065
  ws.close();
@@ -3073,7 +3074,7 @@ async function bridgeCall(endpoint, tool, args, timeoutMs, signal) {
3073
3074
  }
3074
3075
  signal.addEventListener("abort", onAbort, { once: true });
3075
3076
  }
3076
- const timer = setTimeout(() => finish(() => reject(/* @__PURE__ */ new Error(`timeout after ${timeoutMs}ms`))), timeoutMs);
3077
+ timer = setTimeout(() => finish(() => reject(/* @__PURE__ */ new Error(`timeout after ${timeoutMs}ms`))), timeoutMs);
3077
3078
  ws.on("open", () => {
3078
3079
  if (settled) {
3079
3080
  try {
@@ -5181,6 +5182,117 @@ async function acquireWorkerSlot(signal) {
5181
5182
  };
5182
5183
  }
5183
5184
 
5185
+ //#endregion
5186
+ //#region src/lib/diagnose-response.ts
5187
+ const PREVIEW_LIMIT = 200;
5188
+ async function parseJsonOrDiagnose(response, routePath) {
5189
+ const cloned = response.clone();
5190
+ try {
5191
+ return await response.json();
5192
+ } catch (error) {
5193
+ const contentType = response.headers.get("content-type") ?? "(none)";
5194
+ const bodyText = await cloned.text().catch(() => "(unreadable)");
5195
+ const preview = bodyText.length > PREVIEW_LIMIT ? bodyText.slice(0, PREVIEW_LIMIT) + "...(truncated)" : bodyText;
5196
+ consola.error(`Upstream JSON parse failed at ${routePath}: status=${response.status} content-type="${contentType}" body[0..${PREVIEW_LIMIT}]=${JSON.stringify(preview)}`);
5197
+ throw error;
5198
+ }
5199
+ }
5200
+
5201
+ //#endregion
5202
+ //#region src/lib/response-cap.ts
5203
+ /**
5204
+ * Hard byte cap for non-streaming upstream response bodies.
5205
+ *
5206
+ * Anthropic responses with large tool_use blocks can legitimately reach
5207
+ * several MB, but a multi-GB body is either a buggy upstream or a malicious
5208
+ * one. Buffering it would OOM the proxy and crash all in-flight requests.
5209
+ *
5210
+ * Applies to /v1/messages, /v1/chat/completions, and /v1/responses.
5211
+ */
5212
+ const MAX_RESPONSE_BODY_BYTES = 10 * 1024 * 1024;
5213
+ /**
5214
+ * Read a Response body with a hard byte cap, then parse as JSON.
5215
+ *
5216
+ * Falls back to the fast path (response.json()) when Content-Length is
5217
+ * present and within the cap, avoiding the streaming-reader overhead for
5218
+ * the vast majority of normal responses.
5219
+ *
5220
+ * When the cap is hit:
5221
+ * - the reader is cancelled to release the upstream socket
5222
+ * - a structured Anthropic-format error is returned to the caller
5223
+ * (the caller wraps it in c.json(), not throws — the client gets a
5224
+ * clean 413 error, not an unhandled-rejection crash)
5225
+ *
5226
+ * Returns `{ ok: true, value }` on success or `{ ok: false, errorResponse, status }`
5227
+ * on cap exceeded.
5228
+ */
5229
+ async function readResponseBodyCapped(response, routePath, capBytes = MAX_RESPONSE_BODY_BYTES) {
5230
+ const contentLengthHeader = response.headers.get("content-length");
5231
+ const contentLength = contentLengthHeader ? parseInt(contentLengthHeader, 10) : NaN;
5232
+ if (!isNaN(contentLength) && contentLength <= capBytes) return {
5233
+ ok: true,
5234
+ value: await parseJsonOrDiagnose(response, routePath)
5235
+ };
5236
+ const reader = response.body?.getReader();
5237
+ if (!reader) return {
5238
+ ok: true,
5239
+ value: await parseJsonOrDiagnose(response, routePath)
5240
+ };
5241
+ const chunks = [];
5242
+ let totalBytes = 0;
5243
+ let capped = false;
5244
+ try {
5245
+ while (true) {
5246
+ const { done, value } = await reader.read();
5247
+ if (done) break;
5248
+ if (!value) continue;
5249
+ totalBytes += value.byteLength;
5250
+ if (totalBytes > capBytes) {
5251
+ capped = true;
5252
+ try {
5253
+ await reader.cancel("size_cap");
5254
+ } catch {}
5255
+ break;
5256
+ }
5257
+ chunks.push(value);
5258
+ }
5259
+ } catch (err) {
5260
+ if (!capped) consola.warn(`readResponseBodyCapped: read error at ${routePath}:`, err);
5261
+ }
5262
+ if (capped) {
5263
+ consola.warn(`Non-streaming upstream response at ${routePath} exceeded ${capBytes} bytes (10 MiB cap); dropping body to prevent OOM. Check upstream health.`);
5264
+ return {
5265
+ ok: false,
5266
+ status: 502,
5267
+ errorResponse: {
5268
+ type: "error",
5269
+ error: {
5270
+ type: "api_error",
5271
+ message: `Upstream response body exceeded the 10 MiB size cap for non-streaming ${routePath}. The upstream may be misbehaving. Try enabling streaming (stream: true) which handles large responses chunk-by-chunk.`
5272
+ }
5273
+ }
5274
+ };
5275
+ }
5276
+ const merged = new Uint8Array(totalBytes);
5277
+ let offset = 0;
5278
+ for (const chunk of chunks) {
5279
+ merged.set(chunk, offset);
5280
+ offset += chunk.byteLength;
5281
+ }
5282
+ const text = new TextDecoder().decode(merged);
5283
+ try {
5284
+ return {
5285
+ ok: true,
5286
+ value: JSON.parse(text)
5287
+ };
5288
+ } catch (err) {
5289
+ const preview = text.slice(0, 200);
5290
+ const contentType = response.headers.get("content-type") ?? "(none)";
5291
+ consola.error(`Upstream JSON parse failed at ${routePath}: status=${response.status} content-type="${contentType}" body[0..200]=${JSON.stringify(preview)}`);
5292
+ throw err;
5293
+ }
5294
+ }
5295
+
5184
5296
  //#endregion
5185
5297
  //#region src/services/copilot/create-chat-completions.ts
5186
5298
  const createChatCompletions = async (payload, modelHeaders, callerSignal) => {
@@ -5222,7 +5334,12 @@ const createChatCompletions = async (payload, modelHeaders, callerSignal) => {
5222
5334
  }));
5223
5335
  }
5224
5336
  if (payload.stream) return events(response);
5225
- return await response.json();
5337
+ const cappedResult = await readResponseBodyCapped(response, "/v1/chat/completions", MAX_RESPONSE_BODY_BYTES);
5338
+ if (!cappedResult.ok) throw new HTTPError("Upstream /v1/chat/completions response exceeded 10 MiB size cap", new Response(JSON.stringify(cappedResult.errorResponse), {
5339
+ status: cappedResult.status,
5340
+ headers: { "content-type": "application/json" }
5341
+ }));
5342
+ return cappedResult.value;
5226
5343
  };
5227
5344
 
5228
5345
  //#endregion
@@ -5883,7 +6000,12 @@ const createResponses = async (payload, modelHeaders, callerSignal) => {
5883
6000
  throw new HTTPError("Failed to create responses", response);
5884
6001
  }
5885
6002
  if (payload.stream) return events(response);
5886
- return await response.json();
6003
+ const cappedResult = await readResponseBodyCapped(response, "/v1/responses", MAX_RESPONSE_BODY_BYTES);
6004
+ if (!cappedResult.ok) throw new HTTPError("Upstream /v1/responses response exceeded 10 MiB size cap", new Response(JSON.stringify(cappedResult.errorResponse), {
6005
+ status: cappedResult.status,
6006
+ headers: { "content-type": "application/json" }
6007
+ }));
6008
+ return cappedResult.value;
5887
6009
  };
5888
6010
  function detectVision(input) {
5889
6011
  if (typeof input === "string") return false;
@@ -6847,6 +6969,34 @@ async function readWithInactivityTimeout(reader, timeoutMs) {
6847
6969
  }
6848
6970
  }
6849
6971
  /**
6972
+ * Race an `AsyncIterableIterator.next()` call against an inactivity timeout.
6973
+ *
6974
+ * Follows the same pattern as `readWithInactivityTimeout` (including the
6975
+ * noop catcher to avoid Node 24 unhandled-rejection crashes) but works
6976
+ * with typed iterators that yield parsed objects rather than raw bytes.
6977
+ *
6978
+ * On timeout, throws an `InactivityTimeout` error (same classification as
6979
+ * the byte-reader variant — surfaced to the consumer as `timeout_error` via
6980
+ * `buildOpenAIErrorEvent`).
6981
+ *
6982
+ * @param iterator - An AsyncIterableIterator whose `.next()` we want to race.
6983
+ * @param timeoutMs - Milliseconds before the timeout fires.
6984
+ */
6985
+ async function readIteratorWithTimeout(iterator, timeoutMs) {
6986
+ let timeoutHandle;
6987
+ const timeoutPromise = new Promise((_, reject) => {
6988
+ timeoutHandle = setTimeout(() => {
6989
+ reject(Object.assign(/* @__PURE__ */ new Error("upstream_inactive"), { name: "InactivityTimeout" }));
6990
+ }, timeoutMs);
6991
+ });
6992
+ timeoutPromise.catch(() => {});
6993
+ try {
6994
+ return await Promise.race([iterator.next(), timeoutPromise]);
6995
+ } finally {
6996
+ if (timeoutHandle !== void 0) clearTimeout(timeoutHandle);
6997
+ }
6998
+ }
6999
+ /**
6850
7000
  * Build the SSE wire bytes for an Anthropic-format streaming error event.
6851
7001
  * Per Anthropic streaming spec, errors are sent as:
6852
7002
  * event: error
@@ -7162,7 +7312,7 @@ function sseEvent(type, data) {
7162
7312
  function buildAdvisorStream(opts) {
7163
7313
  const advisorModel = opts.advisorModel ?? ADVISOR_DEFAULT_MODEL;
7164
7314
  const advisorEffort = opts.advisorEffort ?? ADVISOR_DEFAULT_EFFORT;
7165
- const aborter = new AbortController();
7315
+ const aborter = opts.externalAborter ?? new AbortController();
7166
7316
  let conversation = [...opts.initialConversation];
7167
7317
  return new ReadableStream({
7168
7318
  async start(controller) {
@@ -10948,7 +11098,7 @@ function initProxyFromEnv() {
10948
11098
  //#endregion
10949
11099
  //#region package.json
10950
11100
  var name = "github-router";
10951
- var version = "0.3.36";
11101
+ var version = "0.3.37";
10952
11102
 
10953
11103
  //#endregion
10954
11104
  //#region src/lib/approval.ts
@@ -11365,7 +11515,7 @@ async function handleCompletion$1(c) {
11365
11515
  return c.json(response);
11366
11516
  }
11367
11517
  const iterator = response[Symbol.asyncIterator]();
11368
- const firstResult = await iterator.next();
11518
+ const firstResult = await readIteratorWithTimeout(iterator, UPSTREAM_INACTIVITY_TIMEOUT_MS);
11369
11519
  if (firstResult.done) consola.warn(`Upstream /chat/completions returned an empty stream at ${c.req.path}`);
11370
11520
  let pendingFirstChunk = firstResult.done ? void 0 : firstResult.value;
11371
11521
  let upstreamFinished = firstResult.done;
@@ -11405,7 +11555,7 @@ async function handleCompletion$1(c) {
11405
11555
  return;
11406
11556
  }
11407
11557
  try {
11408
- const result = await iterator.next();
11558
+ const result = await readIteratorWithTimeout(iterator, UPSTREAM_INACTIVITY_TIMEOUT_MS);
11409
11559
  if (consumerCancelled) {
11410
11560
  safeClose(controller);
11411
11561
  return;
@@ -11726,22 +11876,6 @@ function sanitizeAnthropicBody(rawBody) {
11726
11876
  return JSON.stringify(parsed);
11727
11877
  }
11728
11878
 
11729
- //#endregion
11730
- //#region src/lib/diagnose-response.ts
11731
- const PREVIEW_LIMIT = 200;
11732
- async function parseJsonOrDiagnose(response, routePath) {
11733
- const cloned = response.clone();
11734
- try {
11735
- return await response.json();
11736
- } catch (error) {
11737
- const contentType = response.headers.get("content-type") ?? "(none)";
11738
- const bodyText = await cloned.text().catch(() => "(unreadable)");
11739
- const preview = bodyText.length > PREVIEW_LIMIT ? bodyText.slice(0, PREVIEW_LIMIT) + "...(truncated)" : bodyText;
11740
- consola.error(`Upstream JSON parse failed at ${routePath}: status=${response.status} content-type="${contentType}" body[0..${PREVIEW_LIMIT}]=${JSON.stringify(preview)}`);
11741
- throw error;
11742
- }
11743
- }
11744
-
11745
11879
  //#endregion
11746
11880
  //#region src/routes/messages/count-tokens-handler.ts
11747
11881
  const isWebSearchTool$1 = (tool) => typeof tool.type === "string" && tool.type.startsWith("web_search") || tool.name === "web_search";
@@ -11912,89 +12046,6 @@ function stripAnthropicOnlyFields$1(body) {
11912
12046
 
11913
12047
  //#endregion
11914
12048
  //#region src/routes/messages/handler.ts
11915
- const NON_STREAMING_BODY_CAP_BYTES = 10 * 1024 * 1024;
11916
- /**
11917
- * Read a Response body with a hard byte cap, then parse as JSON.
11918
- *
11919
- * Falls back to the fast path (response.json()) when Content-Length is
11920
- * present and within the cap, avoiding the streaming-reader overhead for
11921
- * the vast majority of normal responses.
11922
- *
11923
- * When the cap is hit:
11924
- * - the reader is cancelled to release the upstream socket
11925
- * - a structured Anthropic-format error is returned to the caller
11926
- * (the caller wraps it in c.json(), not throws — the client gets a
11927
- * clean 413 error, not an unhandled-rejection crash)
11928
- *
11929
- * Returns `{ ok: true, value }` on success or `{ ok: false, errorResponse }`
11930
- * on cap exceeded.
11931
- */
11932
- async function readResponseBodyCapped(response, routePath, capBytes) {
11933
- const contentLengthHeader = response.headers.get("content-length");
11934
- const contentLength = contentLengthHeader ? parseInt(contentLengthHeader, 10) : NaN;
11935
- if (!isNaN(contentLength) && contentLength <= capBytes) return {
11936
- ok: true,
11937
- value: await parseJsonOrDiagnose(response, routePath)
11938
- };
11939
- const reader = response.body?.getReader();
11940
- if (!reader) return {
11941
- ok: true,
11942
- value: await parseJsonOrDiagnose(response, routePath)
11943
- };
11944
- const chunks = [];
11945
- let totalBytes = 0;
11946
- let capped = false;
11947
- try {
11948
- while (true) {
11949
- const { done, value } = await reader.read();
11950
- if (done) break;
11951
- if (!value) continue;
11952
- totalBytes += value.byteLength;
11953
- if (totalBytes > capBytes) {
11954
- capped = true;
11955
- try {
11956
- await reader.cancel("size_cap");
11957
- } catch {}
11958
- break;
11959
- }
11960
- chunks.push(value);
11961
- }
11962
- } catch (err) {
11963
- if (!capped) consola.warn(`readResponseBodyCapped: read error at ${routePath}:`, err);
11964
- }
11965
- if (capped) {
11966
- consola.warn(`Non-streaming upstream response at ${routePath} exceeded ${capBytes} bytes (10 MiB cap); dropping body to prevent OOM. Check upstream health.`);
11967
- return {
11968
- ok: false,
11969
- status: 502,
11970
- errorResponse: {
11971
- type: "error",
11972
- error: {
11973
- type: "api_error",
11974
- message: "Upstream response body exceeded the 10 MiB size cap for non-streaming /v1/messages. The upstream may be misbehaving. Try enabling streaming (stream: true) which handles large responses chunk-by-chunk."
11975
- }
11976
- }
11977
- };
11978
- }
11979
- const merged = new Uint8Array(totalBytes);
11980
- let offset = 0;
11981
- for (const chunk of chunks) {
11982
- merged.set(chunk, offset);
11983
- offset += chunk.byteLength;
11984
- }
11985
- const text = new TextDecoder().decode(merged);
11986
- try {
11987
- return {
11988
- ok: true,
11989
- value: JSON.parse(text)
11990
- };
11991
- } catch (err) {
11992
- const preview = text.slice(0, 200);
11993
- const contentType = response.headers.get("content-type") ?? "(none)";
11994
- consola.error(`Upstream JSON parse failed at ${routePath}: status=${response.status} content-type="${contentType}" body[0..200]=${JSON.stringify(preview)}`);
11995
- throw err;
11996
- }
11997
- }
11998
12049
  const isWebSearchTool = (tool) => typeof tool.type === "string" && tool.type.startsWith("web_search") || tool.name === "web_search";
11999
12050
  /**
12000
12051
  * Extract whitelisted beta headers from the incoming request to forward
@@ -12134,12 +12185,13 @@ async function handleCompletion(c) {
12134
12185
  const modelId = resolvedModel ?? originalModel;
12135
12186
  if (modelId) logEndpointMismatch(modelId, "/v1/messages");
12136
12187
  const effectiveBetas = applyDefaultBetas(betaHeaders, resolvedModel ?? originalModel);
12188
+ const advisorAborter = advisorEnabled ? new AbortController() : void 0;
12137
12189
  let response;
12138
12190
  try {
12139
12191
  response = await createMessages(resolvedBody, {
12140
12192
  ...selectedModel?.requestHeaders,
12141
12193
  ...effectiveBetas
12142
- });
12194
+ }, advisorAborter?.signal);
12143
12195
  } catch (error) {
12144
12196
  if (error instanceof HTTPError) {
12145
12197
  const errorBody = await error.response.clone().text().catch(() => "");
@@ -12197,7 +12249,8 @@ async function handleCompletion(c) {
12197
12249
  requestHeaders: {
12198
12250
  ...selectedModel?.requestHeaders,
12199
12251
  ...effectiveBetas
12200
- }
12252
+ },
12253
+ externalAborter: advisorAborter
12201
12254
  }), {
12202
12255
  status: response.status,
12203
12256
  headers: streamHeaders
@@ -12208,7 +12261,7 @@ async function handleCompletion(c) {
12208
12261
  headers: streamHeaders
12209
12262
  });
12210
12263
  }
12211
- const cappedResult = await readResponseBodyCapped(response, c.req.path, NON_STREAMING_BODY_CAP_BYTES);
12264
+ const cappedResult = await readResponseBodyCapped(response, c.req.path, MAX_RESPONSE_BODY_BYTES);
12212
12265
  if (!cappedResult.ok) return c.json(cappedResult.errorResponse, cappedResult.status);
12213
12266
  const responseBody = cappedResult.value;
12214
12267
  logRequest({
@@ -12561,7 +12614,7 @@ async function handleResponses(c) {
12561
12614
  let firstChunk;
12562
12615
  let upstreamFinished = false;
12563
12616
  while (true) {
12564
- const r = await iterator.next();
12617
+ const r = await readIteratorWithTimeout(iterator, UPSTREAM_INACTIVITY_TIMEOUT_MS);
12565
12618
  if (r.done) {
12566
12619
  upstreamFinished = true;
12567
12620
  break;
@@ -12613,7 +12666,7 @@ async function handleResponses(c) {
12613
12666
  return;
12614
12667
  }
12615
12668
  try {
12616
- const result = await iterator.next();
12669
+ const result = await readIteratorWithTimeout(iterator, UPSTREAM_INACTIVITY_TIMEOUT_MS);
12617
12670
  if (consumerCancelled) {
12618
12671
  safeClose(controller);
12619
12672
  return;
@@ -12722,11 +12775,14 @@ async function handleResponsesCompact(c) {
12722
12775
  if (!state.copilotToken) throw new Error("Copilot token not found");
12723
12776
  if (state.manualApprove) await awaitApproval();
12724
12777
  const body = await c.req.json();
12725
- const response = await fetch(`${copilotBaseUrl(state)}/responses/compact`, {
12778
+ const compactUrl = `${copilotBaseUrl(state)}/responses/compact`;
12779
+ const doFetch = () => fetch(compactUrl, {
12726
12780
  method: "POST",
12727
12781
  headers: copilotHeaders(state),
12728
- body: JSON.stringify(body)
12782
+ body: JSON.stringify(body),
12783
+ signal: AbortSignal.timeout(UPSTREAM_FETCH_TIMEOUT_MS || 3e5)
12729
12784
  });
12785
+ const response = await tryRefreshAndRetry(doFetch, "/responses/compact");
12730
12786
  if (response.ok) {
12731
12787
  logRequest({
12732
12788
  method: "POST",
@@ -12737,6 +12793,7 @@ async function handleResponsesCompact(c) {
12737
12793
  }
12738
12794
  if (response.status === 404) {
12739
12795
  consola.debug("Copilot API does not support /responses/compact, using synthetic compaction");
12796
+ await response.body?.cancel().catch(() => {});
12740
12797
  return await syntheticCompact(c, body, startTime);
12741
12798
  }
12742
12799
  logRequest({