github-router 0.3.34 → 0.3.36

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/main.js CHANGED
@@ -929,31 +929,15 @@ async function autoUpdateClaude(latestVersion) {
929
929
  //#endregion
930
930
  //#region src/lib/port.ts
931
931
  const DEFAULT_PORT = 8787;
932
- /**
933
- * Default model for `github-router claude`. The Anthropic-published dashed
934
- * slug (`claude-opus-4-7`) — NOT the Copilot-internal slug
935
- * (`claude-opus-4.7-1m-internal`) — because Claude Code 2.1.126's `/model`
936
- * UI is backed by a hardcoded registry of Anthropic slugs, and an
937
- * unrecognized slug causes the menu to highlight "Opus 4" with a
938
- * "Newer version available" hint instead of "Opus 4.7 (1M context)".
939
- *
940
- * The proxy's `resolveModel` (`src/lib/utils.ts`) translates this to
941
- * Copilot's `claude-opus-4.7-1m-internal` (enterprise) or
942
- * `claude-opus-4.7` (Pro+/Business/Max) at request time via the
943
- * family-preference + version-match branch — round-trip covered by
944
- * `tests/lib-utils.test.ts:154`.
945
- *
946
- * `DEFAULT_CLAUDE_MODEL_FALLBACKS` covers major.minor regressions only;
947
- * 1M↔200K downgrade is handled inside the resolver, so we don't need
948
- * separate `-1m` entries here.
949
- */
950
- const DEFAULT_CLAUDE_MODEL = "claude-opus-4-7";
951
932
  const DEFAULT_CLAUDE_MODEL_FALLBACKS = ["claude-opus-4-6", "claude-opus-4-5"];
952
933
  /**
953
934
  * Cap-aware default picker for `ANTHROPIC_MODEL` on the implicit-default
954
- * path. Returns `claude-opus-4-7[1m]` when the live Copilot catalog
955
- * contains a `*-opus-4.7-1m*` variant (enterprise tier), else
956
- * `DEFAULT_CLAUDE_MODEL` (the bare slug).
935
+ * path. Returns `claude-opus-${family}[1m]` when the live Copilot catalog
936
+ * contains an `opus-${family}-1m*` variant (enterprise tier), else the
937
+ * bare `claude-opus-${family}` slug. `family` defaults to `"4.7"` so the
938
+ * no-arg call preserves the original behavior; explicit values like
939
+ * `"4.6"` or `"4.8"` are used to honor the `github-router claude
940
+ * -m <version>` family shorthand.
957
941
  *
958
942
  * The `[1m]` literal-bracket suffix is Claude Code's local 1M-context
959
943
  * unlock — cc-backup `src/utils/context.ts:35-40` matches `/\[1m\]/i`
@@ -980,12 +964,21 @@ const DEFAULT_CLAUDE_MODEL_FALLBACKS = ["claude-opus-4-6", "claude-opus-4-5"];
980
964
  * can't tell the difference between "no catalog yet" and "no 1M
981
965
  * variant" — defaulting safe-side preserves the pre-change behavior).
982
966
  */
983
- function pickClaudeDefault() {
984
- if (state.models?.data.some((m) => /opus-4[.-]7-1m(?:$|-)/i.test(m.id)) ?? false) {
985
- consola.info(`Catalog contains opus-4.7-1m variant; defaulting ANTHROPIC_MODEL to "${DEFAULT_CLAUDE_MODEL}[1m]" so Claude Code accounts for 1M context locally. Set CLAUDE_CODE_DISABLE_1M_CONTEXT=1 to opt out (HIPAA), or pass --model ${DEFAULT_CLAUDE_MODEL} to pin 200K.`);
986
- return `${DEFAULT_CLAUDE_MODEL}[1m]`;
967
+ const DEFAULT_OPUS_FAMILY = "4.7";
968
+ function pickClaudeDefault(opusFamily = DEFAULT_OPUS_FAMILY) {
969
+ const dotted = opusFamily.replace(/-/g, ".");
970
+ const bareSlug = `claude-opus-${dotted.replace(/\./g, "-")}`;
971
+ const versionPattern = dotted.replace(/\./g, "[.-]");
972
+ const oneMRegex = new RegExp(`opus-${versionPattern}-1m(?:$|-)`, "i");
973
+ const familyRegex = new RegExp(`opus-${versionPattern}(?:$|[-.])`, "i");
974
+ const models = state.models?.data ?? [];
975
+ const has1m = models.some((m) => oneMRegex.test(m.id));
976
+ if (opusFamily !== DEFAULT_OPUS_FAMILY && state.models && models.length > 0 && !models.some((m) => familyRegex.test(m.id))) consola.warn(`Requested Opus family "${dotted}" not found in Copilot catalog; using "${bareSlug}" anyway (resolveModel may not find a backend for it).`);
977
+ if (has1m) {
978
+ consola.info(`Catalog contains opus-${dotted}-1m variant; defaulting ANTHROPIC_MODEL to "${bareSlug}[1m]" so Claude Code accounts for 1M context locally. Set CLAUDE_CODE_DISABLE_1M_CONTEXT=1 to opt out (HIPAA), or pass --model ${bareSlug} to pin 200K.`);
979
+ return `${bareSlug}[1m]`;
987
980
  }
988
- return DEFAULT_CLAUDE_MODEL;
981
+ return bareSlug;
989
982
  }
990
983
  /**
991
984
  * Default model for `github-router codex`. `gpt-5.5` is the new flagship
@@ -2899,8 +2892,28 @@ function buildInstallRequired(reason, autoInstalled) {
2899
2892
  * model. Side effect: when reason is `extension_not_loaded`, attempts
2900
2893
  * to install the NMH manifest for every detected browser so that the
2901
2894
  * extension can connect immediately on load.
2895
+ *
2896
+ * Single-flight: concurrent calls share one in-flight Promise so that
2897
+ * `installNativeHostForAll` (which writes files and spawns reg.exe on
2898
+ * Windows) is called exactly once per check cycle, regardless of how
2899
+ * many browser_* tool calls arrive concurrently.
2900
+ */
2901
+ let _inFlightReady;
2902
+ /**
2903
+ * @internal — counts how many times _ensureBridgeReadyImpl has started.
2904
+ * Used by regression tests for the single-flight property (Bug #6).
2905
+ * Always 0 in production (only incremented when imported by tests).
2902
2906
  */
2907
+ let __implInvocationsForTests = 0;
2903
2908
  async function ensureBridgeReady() {
2909
+ if (_inFlightReady) return _inFlightReady;
2910
+ _inFlightReady = _ensureBridgeReadyImpl().finally(() => {
2911
+ _inFlightReady = void 0;
2912
+ });
2913
+ return _inFlightReady;
2914
+ }
2915
+ async function _ensureBridgeReadyImpl() {
2916
+ __implInvocationsForTests++;
2904
2917
  const browsers = detectSupportedBrowsers();
2905
2918
  if (browsers.length === 0) return buildInstallRequired("no_supported_browser", []);
2906
2919
  if (!bridgeBundleExists()) return buildInstallRequired("bridge_bundle_missing", []);
@@ -3062,6 +3075,12 @@ async function bridgeCall(endpoint, tool, args, timeoutMs, signal) {
3062
3075
  }
3063
3076
  const timer = setTimeout(() => finish(() => reject(/* @__PURE__ */ new Error(`timeout after ${timeoutMs}ms`))), timeoutMs);
3064
3077
  ws.on("open", () => {
3078
+ if (settled) {
3079
+ try {
3080
+ ws.close();
3081
+ } catch {}
3082
+ return;
3083
+ }
3065
3084
  ws.send(JSON.stringify({
3066
3085
  id,
3067
3086
  tool,
@@ -5889,40 +5908,25 @@ function detectAgentCall(input) {
5889
5908
  const MCP_PROTOCOL_VERSION = "2025-06-18";
5890
5909
  const SERVER_NAME = "github-router-peers";
5891
5910
  const SERVER_VERSION = "1";
5892
- /** Bounded concurrency. Originally capped at 2 (commit 4317a25) as a defensive
5893
- * pre-launch guess against Opus's natural pattern of fanning out to all three
5894
- * critics at once. Raised to 8 (Phase 2D of the peer-MCP plan) so the
5895
- * decomposition pattern Phase 2B teaches Opus — "split a >20 KB artifact
5896
- * into 2-4 batches and call in parallel" — can actually run in parallel
5897
- * without the (3+)th call returning isError "queue full". The persona
5898
- * handlers (`callPersona`) hold no shared mutable state — there's no race
5899
- * the cap is hiding; the upstream Copilot's own rate-limit (surfaced as a
5900
- * per-call 429 → tool isError) is the real backpressure mechanism. 8 covers
5901
- * a 7-fork wave with one slot of headroom and is still a hard upper bound
5902
- * against runaway clients. See docs/research/peer-mcp-investigation.md
5903
- * § "Concurrency cap investigation" for the full justification.
5904
- *
5905
- * The counter itself lives in `src/lib/mcp-inflight.ts` so the
5906
- * worker-agent's nested `peer_review` / `advisor` tools share the
5907
- * same budget — otherwise a worker could fan out unboundedly to
5908
- * peers without showing up in the MCP-side cap. */
5909
- /**
5910
- * Per-request AbortController registry for `notifications/cancelled`
5911
- * (Phase D P1.5). When a client times out a tools/call before the
5912
- * upstream Copilot fetch completes, the JSON-RPC notification:
5913
- * { jsonrpc:"2.0", method:"notifications/cancelled",
5914
- * params:{ requestId: "<id>", reason?: "..." } }
5915
- * arrives. Without handling, the upstream fetch keeps running until
5916
- * natural completion, leaking the inFlightToolsCall slot for tens of
5917
- * minutes. Tracking the AbortController lets us abort the fetch and
5918
- * free the slot immediately.
5919
- *
5920
- * Important: per CLAUDE.md "Bun request-signal quirk", we use OUR own
5921
- * AbortController (NOT c.req.raw.signal which fires after request body
5922
- * is consumed). The signal is threaded into createResponses /
5923
- * createChatCompletions's `callerSignal` parameter.
5924
- */
5925
5911
  const inflightAborts = /* @__PURE__ */ new Map();
5912
+ /**
5913
+ * Idempotent teardown for an in-flight tools/call. Aborts the upstream
5914
+ * fetch, frees the concurrency slot, and removes the registry entry.
5915
+ * Safe to call from both `notifications/cancelled` and the SSE
5916
+ * `ReadableStream.cancel()` callback, in either order, and any number
5917
+ * of times — the second call is a no-op.
5918
+ */
5919
+ function cancelInflight(key, reason) {
5920
+ const entry = inflightAborts.get(key);
5921
+ if (!entry) return;
5922
+ inflightAborts.delete(key);
5923
+ try {
5924
+ entry.aborter.abort(new Error(reason));
5925
+ } catch {}
5926
+ try {
5927
+ entry.release();
5928
+ } catch {}
5929
+ }
5926
5930
  const RPC_PARSE_ERROR = -32700;
5927
5931
  const RPC_INVALID_REQUEST = -32600;
5928
5932
  const RPC_METHOD_NOT_FOUND = -32601;
@@ -6395,9 +6399,14 @@ async function handleToolsCall(body) {
6395
6399
  const startedAt = Date.now();
6396
6400
  const abortKey = body.id !== void 0 && body.id !== null ? body.id : void 0;
6397
6401
  let aborter;
6402
+ let inflightEntry;
6398
6403
  if (abortKey !== void 0) {
6399
6404
  aborter = new AbortController();
6400
- inflightAborts.set(abortKey, aborter);
6405
+ inflightEntry = {
6406
+ aborter,
6407
+ release
6408
+ };
6409
+ inflightAborts.set(abortKey, inflightEntry);
6401
6410
  }
6402
6411
  const telemetryName = persona ? persona.agentName : nonPersonaTool.toolNameHttp;
6403
6412
  const telemetryModel = persona ? persona.model : "(non-persona)";
@@ -6428,7 +6437,9 @@ async function handleToolsCall(body) {
6428
6437
  });
6429
6438
  } finally {
6430
6439
  release();
6431
- if (abortKey !== void 0) inflightAborts.delete(abortKey);
6440
+ if (abortKey !== void 0 && inflightEntry !== void 0) {
6441
+ if (inflightAborts.get(abortKey) === inflightEntry) inflightAborts.delete(abortKey);
6442
+ }
6432
6443
  }
6433
6444
  }
6434
6445
  /**
@@ -6443,9 +6454,7 @@ function handleCancelledNotification(body) {
6443
6454
  consola.debug(`[mcp] notifications/cancelled missing or invalid requestId: ${JSON.stringify(requestId)}`);
6444
6455
  return;
6445
6456
  }
6446
- const aborter = inflightAborts.get(requestId);
6447
- if (!aborter) return;
6448
- aborter.abort(/* @__PURE__ */ new Error("client requested cancellation"));
6457
+ cancelInflight(requestId, "client requested cancellation");
6449
6458
  }
6450
6459
  async function handleRpc(_c, body) {
6451
6460
  if (body === null || typeof body !== "object" || Array.isArray(body)) return {
@@ -6644,6 +6653,7 @@ const SSE_HEARTBEAT_INTERVAL_MS = 5e3;
6644
6653
  async function handleToolsCallSSE(body) {
6645
6654
  const encoder = new TextEncoder();
6646
6655
  const callPromise = handleToolsCall(body);
6656
+ let heartbeatHandle;
6647
6657
  const stream = new ReadableStream({
6648
6658
  async start(controller) {
6649
6659
  let closed = false;
@@ -6676,23 +6686,27 @@ async function handleToolsCallSSE(body) {
6676
6686
  }
6677
6687
  });
6678
6688
  safeEnqueue(heartbeatFrame());
6679
- const heartbeatHandle = setInterval(() => safeEnqueue(heartbeatFrame()), SSE_HEARTBEAT_INTERVAL_MS);
6689
+ heartbeatHandle = setInterval(() => safeEnqueue(heartbeatFrame()), SSE_HEARTBEAT_INTERVAL_MS);
6680
6690
  try {
6681
6691
  safeEnqueue(sseFrame(await callPromise));
6682
6692
  } catch (err) {
6683
6693
  consola.error("/mcp SSE upstream error:", err);
6684
6694
  safeEnqueue(sseFrame(rpcError(body.id ?? null, RPC_INTERNAL_ERROR, err instanceof Error ? err.message : String(err))));
6685
6695
  } finally {
6686
- clearInterval(heartbeatHandle);
6696
+ if (heartbeatHandle !== void 0) {
6697
+ clearInterval(heartbeatHandle);
6698
+ heartbeatHandle = void 0;
6699
+ }
6687
6700
  safeClose();
6688
6701
  }
6689
6702
  },
6690
6703
  cancel() {
6691
- const abortKey = body.id !== void 0 && body.id !== null ? body.id : void 0;
6692
- if (abortKey !== void 0) {
6693
- const aborter = inflightAborts.get(abortKey);
6694
- if (aborter) aborter.abort(/* @__PURE__ */ new Error("client disconnected SSE stream"));
6704
+ if (heartbeatHandle !== void 0) {
6705
+ clearInterval(heartbeatHandle);
6706
+ heartbeatHandle = void 0;
6695
6707
  }
6708
+ const abortKey = body.id !== void 0 && body.id !== null ? body.id : void 0;
6709
+ if (abortKey !== void 0) cancelInflight(abortKey, "client disconnected SSE stream");
6696
6710
  }
6697
6711
  });
6698
6712
  return new Response(stream, {
@@ -7054,7 +7068,8 @@ function renderConversationAsText(conversation, maxChars = ADVISOR_MAX_CONVERSAT
7054
7068
  * Anthropic's own ADVISOR ("see the whole task + every tool call +
7055
7069
  * every result").
7056
7070
  */
7057
- async function runAdvisor(conversation, advisorModel, advisorEffort) {
7071
+ async function runAdvisor(conversation, advisorModel, advisorEffort, signal) {
7072
+ if (signal?.aborted) throw new Error("advisor call aborted before dispatch");
7058
7073
  const advisorSystem = "You are an expert advisor reviewing an in-progress Claude Code session. The transcript below is the work-in-progress (turns numbered, with tool calls and results inlined). Read carefully and provide concrete, actionable advice on the next step or course-correction. Be specific — cite the parts of the transcript you're responding to. If the assistant is on the right track, say so explicitly. If they're stuck or off-track, name the specific assumption or step to revisit. Aim for 2-5 paragraphs of substantive guidance.";
7059
7074
  const conversationText = renderConversationAsText(conversation);
7060
7075
  const resolvedAdvisorModel = resolveModel(advisorModel);
@@ -7071,7 +7086,7 @@ async function runAdvisor(conversation, advisorModel, advisorEffort) {
7071
7086
  }],
7072
7087
  stream: false,
7073
7088
  reasoning: { effort: advisorEffort }
7074
- });
7089
+ }, void 0, signal);
7075
7090
  const out = [];
7076
7091
  for (const item of response.output) {
7077
7092
  if (typeof item !== "object" || item === null) continue;
@@ -7098,7 +7113,7 @@ async function runAdvisor(conversation, advisorModel, advisorEffort) {
7098
7113
  content: conversationText
7099
7114
  }],
7100
7115
  stream: false
7101
- }), {})).json();
7116
+ }), {}, signal)).json();
7102
7117
  const text = (Array.isArray(json.content) ? json.content : []).filter((b) => b.type === "text" && typeof b.text === "string").map((b) => b.text).join("\n\n");
7103
7118
  if (!text) throw new Error(`Advisor model ${resolvedAdvisorModel} returned empty response`);
7104
7119
  return text;
@@ -7147,284 +7162,305 @@ function sseEvent(type, data) {
7147
7162
  function buildAdvisorStream(opts) {
7148
7163
  const advisorModel = opts.advisorModel ?? ADVISOR_DEFAULT_MODEL;
7149
7164
  const advisorEffort = opts.advisorEffort ?? ADVISOR_DEFAULT_EFFORT;
7150
- return new ReadableStream({ async start(controller) {
7151
- const conversation = [...opts.initialConversation];
7152
- let messageStartForwarded = false;
7153
- let nextSyntheticIndex = 0;
7154
- let turnsRun = 0;
7155
- const safeEnqueue = (bytes) => {
7156
- try {
7157
- controller.enqueue(bytes);
7158
- return true;
7159
- } catch (err) {
7160
- if (isControllerClosedError(err)) return false;
7161
- throw err;
7162
- }
7163
- };
7164
- const safeEnqueueEvent = (type, data) => safeEnqueue(ENCODER$2.encode(sseEvent(type, data)));
7165
- async function processOneTurn(response) {
7166
- const capturedBlocks = [];
7167
- let advisorToolUse = null;
7168
- const indexToBlock = /* @__PURE__ */ new Map();
7169
- for await (const ev of events(response)) {
7170
- if (!ev.event || !ev.data) continue;
7171
- let payload;
7165
+ const aborter = new AbortController();
7166
+ let conversation = [...opts.initialConversation];
7167
+ return new ReadableStream({
7168
+ async start(controller) {
7169
+ let messageStartForwarded = false;
7170
+ let nextSyntheticIndex = 0;
7171
+ let turnsRun = 0;
7172
+ const safeEnqueue = (bytes) => {
7172
7173
  try {
7173
- payload = JSON.parse(ev.data);
7174
- } catch {
7175
- if (!safeEnqueue(ENCODER$2.encode(`event: ${ev.event}\ndata: ${ev.data}\n\n`))) return {
7176
- capturedBlocks,
7177
- advisorToolUse
7178
- };
7179
- continue;
7174
+ controller.enqueue(bytes);
7175
+ return true;
7176
+ } catch (err) {
7177
+ if (isControllerClosedError(err)) {
7178
+ if (!aborter.signal.aborted) aborter.abort(/* @__PURE__ */ new Error("advisor stream consumer disconnected"));
7179
+ return false;
7180
+ }
7181
+ throw err;
7180
7182
  }
7181
- switch (ev.event) {
7182
- case "message_start":
7183
- if (!messageStartForwarded) {
7184
- if (!safeEnqueueEvent(ev.event, payload)) return {
7185
- capturedBlocks,
7186
- advisorToolUse
7187
- };
7188
- messageStartForwarded = true;
7189
- }
7183
+ };
7184
+ const safeEnqueueEvent = (type, data) => safeEnqueue(ENCODER$2.encode(sseEvent(type, data)));
7185
+ async function processOneTurn(response) {
7186
+ const capturedBlocks = [];
7187
+ let advisorToolUse = null;
7188
+ const indexToBlock = /* @__PURE__ */ new Map();
7189
+ for await (const ev of events(response)) {
7190
+ if (!ev.event || !ev.data) continue;
7191
+ let payload;
7192
+ try {
7193
+ payload = JSON.parse(ev.data);
7194
+ } catch {
7195
+ if (!safeEnqueue(ENCODER$2.encode(`event: ${ev.event}\ndata: ${ev.data}\n\n`))) return {
7196
+ capturedBlocks,
7197
+ advisorToolUse
7198
+ };
7190
7199
  continue;
7191
- case "content_block_start": {
7192
- const block = payload.content_block;
7193
- const upstreamIndex = payload.index;
7194
- if (block && upstreamIndex !== void 0) {
7195
- const myIndex = nextSyntheticIndex++;
7196
- if (block.type === "tool_use" && block.name === ADVISOR_INTERNAL_TOOL_NAME) {
7197
- const id = typeof block.id === "string" ? block.id : `toolu_advisor_${myIndex}`;
7198
- advisorToolUse = {
7199
- index: myIndex,
7200
- id,
7201
- clientId: toClientServerToolUseId(id, myIndex),
7202
- inputJson: ""
7203
- };
7204
- const translated = {
7205
- ...payload,
7206
- index: myIndex,
7207
- content_block: {
7208
- type: "server_tool_use",
7209
- id: advisorToolUse.clientId,
7210
- name: ADVISOR_CLIENT_TOOL_NAME,
7211
- input: {}
7212
- }
7213
- };
7214
- if (!safeEnqueueEvent(ev.event, translated)) return {
7200
+ }
7201
+ switch (ev.event) {
7202
+ case "message_start":
7203
+ if (!messageStartForwarded) {
7204
+ if (!safeEnqueueEvent(ev.event, payload)) return {
7215
7205
  capturedBlocks,
7216
7206
  advisorToolUse
7217
7207
  };
7218
- const captured = {
7219
- block: {
7220
- type: "tool_use",
7208
+ messageStartForwarded = true;
7209
+ }
7210
+ continue;
7211
+ case "content_block_start": {
7212
+ const block = payload.content_block;
7213
+ const upstreamIndex = payload.index;
7214
+ if (block && upstreamIndex !== void 0) {
7215
+ const myIndex = nextSyntheticIndex++;
7216
+ if (block.type === "tool_use" && block.name === ADVISOR_INTERNAL_TOOL_NAME) {
7217
+ const id = typeof block.id === "string" ? block.id : `toolu_advisor_${myIndex}`;
7218
+ advisorToolUse = {
7219
+ index: myIndex,
7221
7220
  id,
7222
- name: ADVISOR_INTERNAL_TOOL_NAME,
7223
- input: {}
7224
- },
7225
- partialJson: "",
7226
- advisorReplay: { id }
7227
- };
7228
- capturedBlocks.push(captured);
7229
- indexToBlock.set(upstreamIndex, captured);
7230
- } else {
7221
+ clientId: toClientServerToolUseId(id, myIndex),
7222
+ inputJson: ""
7223
+ };
7224
+ const translated = {
7225
+ ...payload,
7226
+ index: myIndex,
7227
+ content_block: {
7228
+ type: "server_tool_use",
7229
+ id: advisorToolUse.clientId,
7230
+ name: ADVISOR_CLIENT_TOOL_NAME,
7231
+ input: {}
7232
+ }
7233
+ };
7234
+ if (!safeEnqueueEvent(ev.event, translated)) return {
7235
+ capturedBlocks,
7236
+ advisorToolUse
7237
+ };
7238
+ const captured = {
7239
+ block: {
7240
+ type: "tool_use",
7241
+ id,
7242
+ name: ADVISOR_INTERNAL_TOOL_NAME,
7243
+ input: {}
7244
+ },
7245
+ partialJson: "",
7246
+ advisorReplay: { id }
7247
+ };
7248
+ capturedBlocks.push(captured);
7249
+ indexToBlock.set(upstreamIndex, captured);
7250
+ } else {
7251
+ const reindexed = {
7252
+ ...payload,
7253
+ index: myIndex
7254
+ };
7255
+ if (!safeEnqueueEvent(ev.event, reindexed)) return {
7256
+ capturedBlocks,
7257
+ advisorToolUse
7258
+ };
7259
+ const captured = {
7260
+ block: { ...block },
7261
+ partialJson: ""
7262
+ };
7263
+ capturedBlocks.push(captured);
7264
+ indexToBlock.set(upstreamIndex, captured);
7265
+ }
7266
+ }
7267
+ continue;
7268
+ }
7269
+ case "content_block_delta": {
7270
+ const upstreamIndex = payload.index;
7271
+ const delta = payload.delta;
7272
+ if (upstreamIndex !== void 0) {
7273
+ const captured = upstreamIndex !== void 0 ? indexToBlock.get(upstreamIndex) : void 0;
7231
7274
  const reindexed = {
7232
7275
  ...payload,
7233
- index: myIndex
7276
+ index: captured ? capturedBlocks.indexOf(captured) >= 0 ? nextSyntheticIndex - capturedBlocks.length + capturedBlocks.indexOf(captured) : upstreamIndex : upstreamIndex
7234
7277
  };
7235
7278
  if (!safeEnqueueEvent(ev.event, reindexed)) return {
7236
7279
  capturedBlocks,
7237
7280
  advisorToolUse
7238
7281
  };
7239
- const captured = {
7240
- block: { ...block },
7241
- partialJson: ""
7242
- };
7243
- capturedBlocks.push(captured);
7244
- indexToBlock.set(upstreamIndex, captured);
7245
- }
7282
+ if (captured && delta) {
7283
+ if (delta.type === "text_delta" && typeof delta.text === "string") captured.block.text = (captured.block.text ?? "") + delta.text;
7284
+ else if (delta.type === "thinking_delta" && typeof delta.thinking === "string") captured.block.thinking = (captured.block.thinking ?? "") + delta.thinking;
7285
+ else if (delta.type === "signature_delta" && typeof delta.signature === "string") captured.block.signature = (captured.block.signature ?? "") + delta.signature;
7286
+ else if (delta.type === "input_json_delta" && typeof delta.partial_json === "string") captured.partialJson += delta.partial_json;
7287
+ else if (delta.type === "citations_delta" && delta.citation) {
7288
+ if (!Array.isArray(captured.block.citations)) captured.block.citations = [];
7289
+ captured.block.citations.push(delta.citation);
7290
+ }
7291
+ }
7292
+ } else if (!safeEnqueueEvent(ev.event, payload)) return {
7293
+ capturedBlocks,
7294
+ advisorToolUse
7295
+ };
7296
+ continue;
7246
7297
  }
7247
- continue;
7248
- }
7249
- case "content_block_delta": {
7250
- const upstreamIndex = payload.index;
7251
- const delta = payload.delta;
7252
- if (upstreamIndex !== void 0) {
7298
+ case "content_block_stop": {
7299
+ const upstreamIndex = payload.index;
7253
7300
  const captured = upstreamIndex !== void 0 ? indexToBlock.get(upstreamIndex) : void 0;
7254
7301
  const reindexed = {
7255
7302
  ...payload,
7256
- index: captured ? capturedBlocks.indexOf(captured) >= 0 ? nextSyntheticIndex - capturedBlocks.length + capturedBlocks.indexOf(captured) : upstreamIndex : upstreamIndex
7303
+ index: captured ? nextSyntheticIndex - capturedBlocks.length + capturedBlocks.indexOf(captured) : upstreamIndex ?? 0
7257
7304
  };
7258
7305
  if (!safeEnqueueEvent(ev.event, reindexed)) return {
7259
7306
  capturedBlocks,
7260
7307
  advisorToolUse
7261
7308
  };
7262
- if (captured && delta) {
7263
- if (delta.type === "text_delta" && typeof delta.text === "string") captured.block.text = (captured.block.text ?? "") + delta.text;
7264
- else if (delta.type === "thinking_delta" && typeof delta.thinking === "string") captured.block.thinking = (captured.block.thinking ?? "") + delta.thinking;
7265
- else if (delta.type === "signature_delta" && typeof delta.signature === "string") captured.block.signature = (captured.block.signature ?? "") + delta.signature;
7266
- else if (delta.type === "input_json_delta" && typeof delta.partial_json === "string") captured.partialJson += delta.partial_json;
7267
- else if (delta.type === "citations_delta" && delta.citation) {
7268
- if (!Array.isArray(captured.block.citations)) captured.block.citations = [];
7269
- captured.block.citations.push(delta.citation);
7309
+ if (captured) {
7310
+ if (captured.block.type === "tool_use" && captured.partialJson.length > 0) try {
7311
+ captured.block.input = JSON.parse(captured.partialJson);
7312
+ } catch (err) {
7313
+ consola.warn(`advisor: malformed input_json_delta for tool_use id=${captured.block.id ?? "?"} name=${captured.block.name ?? "?"} partialJson.length=${captured.partialJson.length} parseError=${err instanceof Error ? err.message : String(err)}`);
7314
+ captured.block.input = {};
7270
7315
  }
7316
+ if (captured.block.type === "text" && (typeof captured.block.text !== "string" || captured.block.text.length === 0)) captured.dropFromReplay = true;
7271
7317
  }
7272
- } else if (!safeEnqueueEvent(ev.event, payload)) return {
7273
- capturedBlocks,
7274
- advisorToolUse
7275
- };
7276
- continue;
7277
- }
7278
- case "content_block_stop": {
7279
- const upstreamIndex = payload.index;
7280
- const captured = upstreamIndex !== void 0 ? indexToBlock.get(upstreamIndex) : void 0;
7281
- const reindexed = {
7282
- ...payload,
7283
- index: captured ? nextSyntheticIndex - capturedBlocks.length + capturedBlocks.indexOf(captured) : upstreamIndex ?? 0
7284
- };
7285
- if (!safeEnqueueEvent(ev.event, reindexed)) return {
7286
- capturedBlocks,
7287
- advisorToolUse
7288
- };
7289
- if (captured) {
7290
- if (captured.block.type === "tool_use" && captured.partialJson.length > 0) try {
7291
- captured.block.input = JSON.parse(captured.partialJson);
7292
- } catch (err) {
7293
- consola.warn(`advisor: malformed input_json_delta for tool_use id=${captured.block.id ?? "?"} name=${captured.block.name ?? "?"} partialJson.length=${captured.partialJson.length} parseError=${err instanceof Error ? err.message : String(err)}`);
7294
- captured.block.input = {};
7295
- }
7296
- if (captured.block.type === "text" && (typeof captured.block.text !== "string" || captured.block.text.length === 0)) captured.dropFromReplay = true;
7318
+ continue;
7297
7319
  }
7298
- continue;
7299
- }
7300
- case "message_delta":
7301
- if (!safeEnqueueEvent(ev.event, payload)) return {
7302
- capturedBlocks,
7303
- advisorToolUse
7304
- };
7305
- continue;
7306
- case "message_stop":
7307
- if (advisorToolUse) return {
7308
- capturedBlocks,
7309
- advisorToolUse
7310
- };
7311
- if (!safeEnqueueEvent(ev.event, payload)) return {
7312
- capturedBlocks,
7313
- advisorToolUse
7314
- };
7315
- return {
7320
+ case "message_delta":
7321
+ if (!safeEnqueueEvent(ev.event, payload)) return {
7322
+ capturedBlocks,
7323
+ advisorToolUse
7324
+ };
7325
+ continue;
7326
+ case "message_stop":
7327
+ if (advisorToolUse) return {
7328
+ capturedBlocks,
7329
+ advisorToolUse
7330
+ };
7331
+ if (!safeEnqueueEvent(ev.event, payload)) return {
7332
+ capturedBlocks,
7333
+ advisorToolUse
7334
+ };
7335
+ return {
7336
+ capturedBlocks,
7337
+ advisorToolUse
7338
+ };
7339
+ default: if (!safeEnqueueEvent(ev.event, payload)) return {
7316
7340
  capturedBlocks,
7317
7341
  advisorToolUse
7318
7342
  };
7319
- default: if (!safeEnqueueEvent(ev.event, payload)) return {
7320
- capturedBlocks,
7321
- advisorToolUse
7322
- };
7343
+ }
7323
7344
  }
7345
+ return {
7346
+ capturedBlocks,
7347
+ advisorToolUse
7348
+ };
7324
7349
  }
7325
- return {
7326
- capturedBlocks,
7327
- advisorToolUse
7328
- };
7329
- }
7330
- try {
7331
- let response = opts.firstResponse;
7332
- for (turnsRun = 0; turnsRun < ADVISOR_MAX_TURNS; turnsRun++) {
7333
- const { capturedBlocks, advisorToolUse } = await processOneTurn(response);
7334
- if (!advisorToolUse) return;
7335
- const assistantTurn = {
7336
- role: "assistant",
7337
- content: capturedBlocks.filter((c) => !c.dropFromReplay).map((c) => {
7338
- if (c.advisorReplay) {
7339
- const input = typeof c.block.input === "object" && c.block.input !== null ? c.block.input : {};
7340
- return {
7341
- type: "tool_use",
7342
- id: c.advisorReplay.id,
7343
- name: ADVISOR_INTERNAL_TOOL_NAME,
7344
- input
7345
- };
7350
+ try {
7351
+ let response = opts.firstResponse;
7352
+ for (turnsRun = 0; turnsRun < ADVISOR_MAX_TURNS; turnsRun++) {
7353
+ if (aborter.signal.aborted) return;
7354
+ if (conversation === null) return;
7355
+ const { capturedBlocks, advisorToolUse } = await processOneTurn(response);
7356
+ if (!advisorToolUse) return;
7357
+ if (aborter.signal.aborted) return;
7358
+ if (conversation === null) return;
7359
+ const assistantTurn = {
7360
+ role: "assistant",
7361
+ content: capturedBlocks.filter((c) => !c.dropFromReplay).map((c) => {
7362
+ if (c.advisorReplay) {
7363
+ const input = typeof c.block.input === "object" && c.block.input !== null ? c.block.input : {};
7364
+ return {
7365
+ type: "tool_use",
7366
+ id: c.advisorReplay.id,
7367
+ name: ADVISOR_INTERNAL_TOOL_NAME,
7368
+ input
7369
+ };
7370
+ }
7371
+ return c.block;
7372
+ })
7373
+ };
7374
+ conversation.push(assistantTurn);
7375
+ let advisorText;
7376
+ try {
7377
+ advisorText = await runAdvisor(conversation, advisorModel, advisorEffort, aborter.signal);
7378
+ } catch (err) {
7379
+ if (aborter.signal.aborted) return;
7380
+ const msg = err instanceof Error ? err.message : String(err);
7381
+ consola.warn(`Advisor model call failed: ${msg}`);
7382
+ advisorText = `[Advisor unavailable: ${msg}. Continuing without external review — proceed with caution and consider self-checking against your primary-source evidence.]`;
7383
+ }
7384
+ if (aborter.signal.aborted) return;
7385
+ if (conversation === null) return;
7386
+ const resultIndex = nextSyntheticIndex++;
7387
+ if (!safeEnqueueEvent("content_block_start", {
7388
+ type: "content_block_start",
7389
+ index: resultIndex,
7390
+ content_block: {
7391
+ type: "advisor_tool_result",
7392
+ tool_use_id: advisorToolUse.clientId,
7393
+ content: {
7394
+ type: "advisor_result",
7395
+ text: advisorText
7396
+ }
7346
7397
  }
7347
- return c.block;
7348
- })
7349
- };
7350
- conversation.push(assistantTurn);
7351
- let advisorText;
7352
- try {
7353
- advisorText = await runAdvisor(conversation, advisorModel, advisorEffort);
7354
- } catch (err) {
7355
- const msg = err instanceof Error ? err.message : String(err);
7356
- consola.warn(`Advisor model call failed: ${msg}`);
7357
- advisorText = `[Advisor unavailable: ${msg}. Continuing without external review — proceed with caution and consider self-checking against your primary-source evidence.]`;
7398
+ })) return;
7399
+ if (!safeEnqueueEvent("content_block_stop", {
7400
+ type: "content_block_stop",
7401
+ index: resultIndex
7402
+ })) return;
7403
+ conversation.push({
7404
+ role: "user",
7405
+ content: [{
7406
+ type: "tool_result",
7407
+ tool_use_id: advisorToolUse.id,
7408
+ content: advisorText
7409
+ }]
7410
+ });
7411
+ if (aborter.signal.aborted) return;
7412
+ response = await createMessages(JSON.stringify({
7413
+ ...opts.baseBody,
7414
+ messages: conversation,
7415
+ stream: true
7416
+ }), opts.requestHeaders, aborter.signal);
7358
7417
  }
7359
- const resultIndex = nextSyntheticIndex++;
7360
- if (!safeEnqueueEvent("content_block_start", {
7418
+ if (aborter.signal.aborted) return;
7419
+ const finalIndex = nextSyntheticIndex++;
7420
+ safeEnqueueEvent("content_block_start", {
7361
7421
  type: "content_block_start",
7362
- index: resultIndex,
7422
+ index: finalIndex,
7363
7423
  content_block: {
7364
- type: "advisor_tool_result",
7365
- tool_use_id: advisorToolUse.clientId,
7366
- content: {
7367
- type: "advisor_result",
7368
- text: advisorText
7369
- }
7424
+ type: "text",
7425
+ text: ""
7370
7426
  }
7371
- })) return;
7372
- if (!safeEnqueueEvent("content_block_stop", {
7427
+ });
7428
+ safeEnqueueEvent("content_block_delta", {
7429
+ type: "content_block_delta",
7430
+ index: finalIndex,
7431
+ delta: {
7432
+ type: "text_delta",
7433
+ text: `\n\n[Advisor loop exceeded ${ADVISOR_MAX_TURNS} turns; halting]`
7434
+ }
7435
+ });
7436
+ safeEnqueueEvent("content_block_stop", {
7373
7437
  type: "content_block_stop",
7374
- index: resultIndex
7375
- })) return;
7376
- conversation.push({
7377
- role: "user",
7378
- content: [{
7379
- type: "tool_result",
7380
- tool_use_id: advisorToolUse.id,
7381
- content: advisorText
7382
- }]
7438
+ index: finalIndex
7439
+ });
7440
+ safeEnqueueEvent("message_stop", { type: "message_stop" });
7441
+ } catch (err) {
7442
+ if (aborter.signal.aborted) return;
7443
+ const msg = err instanceof Error ? err.message : String(err);
7444
+ consola.error(`Advisor stream error: ${msg}`);
7445
+ safeEnqueueEvent("error", {
7446
+ type: "error",
7447
+ error: {
7448
+ type: "api_error",
7449
+ message: `advisor loop failed: ${msg}`
7450
+ }
7383
7451
  });
7384
- response = await createMessages(JSON.stringify({
7385
- ...opts.baseBody,
7386
- messages: conversation,
7387
- stream: true
7388
- }), opts.requestHeaders);
7452
+ } finally {
7453
+ conversation = null;
7454
+ try {
7455
+ controller.close();
7456
+ } catch {}
7389
7457
  }
7390
- const finalIndex = nextSyntheticIndex++;
7391
- safeEnqueueEvent("content_block_start", {
7392
- type: "content_block_start",
7393
- index: finalIndex,
7394
- content_block: {
7395
- type: "text",
7396
- text: ""
7397
- }
7398
- });
7399
- safeEnqueueEvent("content_block_delta", {
7400
- type: "content_block_delta",
7401
- index: finalIndex,
7402
- delta: {
7403
- type: "text_delta",
7404
- text: `\n\n[Advisor loop exceeded ${ADVISOR_MAX_TURNS} turns; halting]`
7405
- }
7406
- });
7407
- safeEnqueueEvent("content_block_stop", {
7408
- type: "content_block_stop",
7409
- index: finalIndex
7410
- });
7411
- safeEnqueueEvent("message_stop", { type: "message_stop" });
7412
- } catch (err) {
7413
- const msg = err instanceof Error ? err.message : String(err);
7414
- consola.error(`Advisor stream error: ${msg}`);
7415
- safeEnqueueEvent("error", {
7416
- type: "error",
7417
- error: {
7418
- type: "api_error",
7419
- message: `advisor loop failed: ${msg}`
7420
- }
7421
- });
7422
- } finally {
7423
- try {
7424
- controller.close();
7425
- } catch {}
7458
+ },
7459
+ cancel(reason) {
7460
+ if (!aborter.signal.aborted) aborter.abort(/* @__PURE__ */ new Error(`advisor stream cancelled: ${reason instanceof Error ? reason.message : String(reason ?? "no reason")}`));
7461
+ conversation = null;
7426
7462
  }
7427
- } });
7463
+ });
7428
7464
  }
7429
7465
 
7430
7466
  //#endregion
@@ -7487,22 +7523,24 @@ function mcpHeaders(sid) {
7487
7523
  if (sid) headers["Mcp-Session-Id"] = sid;
7488
7524
  return headers;
7489
7525
  }
7490
- async function postMcp(body, sid, retry = true) {
7526
+ async function postMcp(body, sid, retry = true, signal) {
7491
7527
  const url = `${copilotBaseUrl(state)}/mcp`;
7492
7528
  const res = await fetch(url, {
7493
7529
  method: "POST",
7494
7530
  headers: mcpHeaders(sid),
7495
- body: JSON.stringify(body)
7531
+ body: JSON.stringify(body),
7532
+ signal
7496
7533
  });
7497
7534
  if (!res.ok && retry && res.status >= 500) {
7498
7535
  await sleep(500);
7499
- return postMcp(body, sid, false);
7536
+ return postMcp(body, sid, false, signal);
7500
7537
  }
7501
7538
  return res;
7502
7539
  }
7503
- async function searchWeb(query) {
7540
+ async function searchWeb(query, signal) {
7504
7541
  await throttleSearch();
7505
7542
  consola.info(`Web search (MCP): "${query.slice(0, 80)}"`);
7543
+ if (signal?.aborted) throw new Error("web search aborted before dispatch");
7506
7544
  const callId = Math.floor(Math.random() * 1e9);
7507
7545
  let sid;
7508
7546
  try {
@@ -7518,7 +7556,7 @@ async function searchWeb(query) {
7518
7556
  version: copilotVersion(state)
7519
7557
  }
7520
7558
  }
7521
- });
7559
+ }, void 0, true, signal);
7522
7560
  if (!initRes.ok) {
7523
7561
  consola.error("MCP initialize failed", initRes.status);
7524
7562
  throw new HTTPError("MCP initialize failed", initRes);
@@ -7528,7 +7566,7 @@ async function searchWeb(query) {
7528
7566
  const notifRes = await postMcp({
7529
7567
  jsonrpc: "2.0",
7530
7568
  method: "notifications/initialized"
7531
- }, sid);
7569
+ }, sid, true, signal);
7532
7570
  if (!notifRes.ok && notifRes.status !== 202) {
7533
7571
  consola.error("MCP notifications/initialized failed", notifRes.status);
7534
7572
  throw new HTTPError("MCP notifications/initialized failed", notifRes);
@@ -7541,13 +7579,14 @@ async function searchWeb(query) {
7541
7579
  name: "web_search",
7542
7580
  arguments: { query }
7543
7581
  }
7544
- }, sid);
7582
+ }, sid, true, signal);
7545
7583
  if (!callRes.ok) {
7546
7584
  consola.error("MCP tools/call failed", callRes.status);
7547
7585
  throw new HTTPError("MCP tools/call failed", callRes);
7548
7586
  }
7549
7587
  let rpc;
7550
7588
  for await (const ev of events(callRes)) {
7589
+ if (signal?.aborted) throw new Error("web search aborted during SSE stream");
7551
7590
  if (!ev.data) continue;
7552
7591
  let parsedJson;
7553
7592
  try {
@@ -9887,7 +9926,7 @@ const NON_PERSONA_MCP_TOOLS = Object.freeze([
9887
9926
  description: "The search query string. Natural-language queries work best — the upstream provider rewrites for the search index."
9888
9927
  } }
9889
9928
  },
9890
- async handler(args, _signal) {
9929
+ async handler(args, signal) {
9891
9930
  const query = typeof args.query === "string" ? args.query : "";
9892
9931
  if (!query) return {
9893
9932
  content: [{
@@ -9899,7 +9938,7 @@ const NON_PERSONA_MCP_TOOLS = Object.freeze([
9899
9938
  try {
9900
9939
  return { content: [{
9901
9940
  type: "text",
9902
- text: formatWebSearchResult(await searchWeb(query))
9941
+ text: formatWebSearchResult(await searchWeb(query, signal))
9903
9942
  }] };
9904
9943
  } catch (err) {
9905
9944
  return {
@@ -10759,9 +10798,11 @@ function rotateIfNeeded(filePath) {
10759
10798
  fs$1.renameSync(filePath, filePath + ".1");
10760
10799
  } catch {}
10761
10800
  }
10762
- var FileLogReporter = class {
10801
+ var FileLogReporter = class FileLogReporter {
10763
10802
  filePath;
10764
10803
  seen = /* @__PURE__ */ new Set();
10804
+ bytesSinceCheck = 0;
10805
+ static ROTATE_CHECK_BYTES = MAX_LOG_BYTES / 2;
10765
10806
  constructor(filePath) {
10766
10807
  this.filePath = filePath;
10767
10808
  rotateIfNeeded(filePath);
@@ -10773,6 +10814,11 @@ var FileLogReporter = class {
10773
10814
  if (this.seen.size >= DEDUP_MAX) this.seen.clear();
10774
10815
  this.seen.add(key);
10775
10816
  const line = formatLogLine(logObj);
10817
+ this.bytesSinceCheck += line.length;
10818
+ if (this.bytesSinceCheck >= FileLogReporter.ROTATE_CHECK_BYTES) {
10819
+ rotateIfNeeded(this.filePath);
10820
+ this.bytesSinceCheck = 0;
10821
+ }
10776
10822
  let fd;
10777
10823
  try {
10778
10824
  fd = fs$1.openSync(this.filePath, "a", 384);
@@ -10902,7 +10948,7 @@ function initProxyFromEnv() {
10902
10948
  //#endregion
10903
10949
  //#region package.json
10904
10950
  var name = "github-router";
10905
- var version = "0.3.34";
10951
+ var version = "0.3.36";
10906
10952
 
10907
10953
  //#endregion
10908
10954
  //#region src/lib/approval.ts
@@ -11866,6 +11912,89 @@ function stripAnthropicOnlyFields$1(body) {
11866
11912
 
11867
11913
  //#endregion
11868
11914
  //#region src/routes/messages/handler.ts
11915
+ const NON_STREAMING_BODY_CAP_BYTES = 10 * 1024 * 1024;
11916
+ /**
11917
+ * Read a Response body with a hard byte cap, then parse as JSON.
11918
+ *
11919
+ * Falls back to the fast path (response.json()) when Content-Length is
11920
+ * present and within the cap, avoiding the streaming-reader overhead for
11921
+ * the vast majority of normal responses.
11922
+ *
11923
+ * When the cap is hit:
11924
+ * - the reader is cancelled to release the upstream socket
11925
+ * - a structured Anthropic-format error is returned to the caller
11926
+ * (the caller wraps it in c.json(), not throws — the client gets a
11927
+ * clean 413 error, not an unhandled-rejection crash)
11928
+ *
11929
+ * Returns `{ ok: true, value }` on success or `{ ok: false, errorResponse }`
11930
+ * on cap exceeded.
11931
+ */
11932
+ async function readResponseBodyCapped(response, routePath, capBytes) {
11933
+ const contentLengthHeader = response.headers.get("content-length");
11934
+ const contentLength = contentLengthHeader ? parseInt(contentLengthHeader, 10) : NaN;
11935
+ if (!isNaN(contentLength) && contentLength <= capBytes) return {
11936
+ ok: true,
11937
+ value: await parseJsonOrDiagnose(response, routePath)
11938
+ };
11939
+ const reader = response.body?.getReader();
11940
+ if (!reader) return {
11941
+ ok: true,
11942
+ value: await parseJsonOrDiagnose(response, routePath)
11943
+ };
11944
+ const chunks = [];
11945
+ let totalBytes = 0;
11946
+ let capped = false;
11947
+ try {
11948
+ while (true) {
11949
+ const { done, value } = await reader.read();
11950
+ if (done) break;
11951
+ if (!value) continue;
11952
+ totalBytes += value.byteLength;
11953
+ if (totalBytes > capBytes) {
11954
+ capped = true;
11955
+ try {
11956
+ await reader.cancel("size_cap");
11957
+ } catch {}
11958
+ break;
11959
+ }
11960
+ chunks.push(value);
11961
+ }
11962
+ } catch (err) {
11963
+ if (!capped) consola.warn(`readResponseBodyCapped: read error at ${routePath}:`, err);
11964
+ }
11965
+ if (capped) {
11966
+ consola.warn(`Non-streaming upstream response at ${routePath} exceeded ${capBytes} bytes (10 MiB cap); dropping body to prevent OOM. Check upstream health.`);
11967
+ return {
11968
+ ok: false,
11969
+ status: 502,
11970
+ errorResponse: {
11971
+ type: "error",
11972
+ error: {
11973
+ type: "api_error",
11974
+ message: "Upstream response body exceeded the 10 MiB size cap for non-streaming /v1/messages. The upstream may be misbehaving. Try enabling streaming (stream: true) which handles large responses chunk-by-chunk."
11975
+ }
11976
+ }
11977
+ };
11978
+ }
11979
+ const merged = new Uint8Array(totalBytes);
11980
+ let offset = 0;
11981
+ for (const chunk of chunks) {
11982
+ merged.set(chunk, offset);
11983
+ offset += chunk.byteLength;
11984
+ }
11985
+ const text = new TextDecoder().decode(merged);
11986
+ try {
11987
+ return {
11988
+ ok: true,
11989
+ value: JSON.parse(text)
11990
+ };
11991
+ } catch (err) {
11992
+ const preview = text.slice(0, 200);
11993
+ const contentType = response.headers.get("content-type") ?? "(none)";
11994
+ consola.error(`Upstream JSON parse failed at ${routePath}: status=${response.status} content-type="${contentType}" body[0..200]=${JSON.stringify(preview)}`);
11995
+ throw err;
11996
+ }
11997
+ }
11869
11998
  const isWebSearchTool = (tool) => typeof tool.type === "string" && tool.type.startsWith("web_search") || tool.name === "web_search";
11870
11999
  /**
11871
12000
  * Extract whitelisted beta headers from the incoming request to forward
@@ -12079,7 +12208,9 @@ async function handleCompletion(c) {
12079
12208
  headers: streamHeaders
12080
12209
  });
12081
12210
  }
12082
- const responseBody = await parseJsonOrDiagnose(response, c.req.path);
12211
+ const cappedResult = await readResponseBodyCapped(response, c.req.path, NON_STREAMING_BODY_CAP_BYTES);
12212
+ if (!cappedResult.ok) return c.json(cappedResult.errorResponse, cappedResult.status);
12213
+ const responseBody = cappedResult.value;
12083
12214
  logRequest({
12084
12215
  method: "POST",
12085
12216
  path: c.req.path,
@@ -13023,7 +13154,7 @@ const claude = defineCommand({
13023
13154
  model: {
13024
13155
  alias: "m",
13025
13156
  type: "string",
13026
- description: "Override the default model for Claude Code"
13157
+ description: "Override the default model for Claude Code. Accepts a full slug (e.g. claude-opus-4-7) or an Opus family shorthand (e.g. 4.7, 4.8, 4.6) which expands to the best variant for that family — adding the [1m] suffix when a 1M-context backend is in the catalog."
13027
13158
  },
13028
13159
  "codex-mcp": {
13029
13160
  type: "boolean",
@@ -13102,7 +13233,8 @@ const claude = defineCommand({
13102
13233
  }
13103
13234
  enableFileLogging();
13104
13235
  const usingDefault = !args.model;
13105
- let chosenSlug = args.model ?? pickClaudeDefault();
13236
+ const opusFamilyShorthand = args.model?.match(/^(\d+\.\d+)$/)?.[1];
13237
+ let chosenSlug = opusFamilyShorthand ? pickClaudeDefault(opusFamilyShorthand) : args.model ?? pickClaudeDefault();
13106
13238
  let resolvedSlug = resolveModel(chosenSlug);
13107
13239
  if (usingDefault && state.models) {
13108
13240
  const inCache = (slug) => state.models?.data.some((m) => m.id === resolveModel(slug)) ?? false;