github-router 0.3.18 → 0.3.20

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/main.js CHANGED
@@ -6,7 +6,8 @@ import os from "node:os";
6
6
  import path from "node:path";
7
7
  import { randomBytes, randomUUID, timingSafeEqual } from "node:crypto";
8
8
  import process$1 from "node:process";
9
- import { execFileSync, spawn } from "node:child_process";
9
+ import { execFile, execFileSync, spawn } from "node:child_process";
10
+ import { promisify } from "node:util";
10
11
  import fs$1 from "node:fs";
11
12
  import { Writable } from "node:stream";
12
13
  import { serve } from "srvx";
@@ -494,9 +495,22 @@ const VSCODE_BETA_PREFIXES = [
494
495
  * to work with the Copilot API.
495
496
  *
496
497
  * Notably absent (Copilot 400s on these — verified live):
497
- * context-1m-, skills-, files-api-, code-execution-, output-128k-.
498
+ * context-1m-, skills-, files-api-, code-execution-, output-128k-,
499
+ * advisor-tool- (see EXPLICITLY_STRIPPED_BETA_PREFIXES below).
498
500
  * 1M context is unlocked by selecting `claude-opus-4.7-1m-internal`
499
501
  * as the model id, not via a beta header.
502
+ *
503
+ * Empirical verification (2026-05-11 against api.enterprise.githubcopilot.com):
504
+ * task-budgets-2026-03-13 → 200 ACCEPTED (cost-ceiling leverage)
505
+ * token-efficient-tools-2026-03-28 → 200 ACCEPTED (per-tool token saving)
506
+ * summarize-connector-text-2026-03-13 → 200 (Anthropic-internal feature flag,
507
+ * won't fire for non-ant users; allowlisted defensively for ant edge case)
508
+ * afk-mode-2026-01-31 → 200 (Anthropic-internal feature flag)
509
+ * cli-internal-2026-02-09 → 200 (USER_TYPE=ant only)
510
+ * oauth-2025-04-20 → 200 (Files-API path; Files-API itself
511
+ * is not supportable via Copilot, but the header alone is harmless)
512
+ * prompt-caching-scope-2026-01-05 → 200 even with body cache_control.scope
513
+ * stripped (already covered by `prompt-caching-` prefix above)
500
514
  */
501
515
  const EXTENDED_BETA_PREFIXES = [
502
516
  ...VSCODE_BETA_PREFIXES,
@@ -513,17 +527,39 @@ const EXTENDED_BETA_PREFIXES = [
513
527
  "mcp-client-",
514
528
  "mcp-servers-",
515
529
  "redact-thinking-",
516
- "web-search-"
530
+ "web-search-",
531
+ "task-budgets-",
532
+ "token-efficient-tools-",
533
+ "summarize-connector-text-",
534
+ "afk-mode-",
535
+ "cli-internal-",
536
+ "oauth-"
517
537
  ];
518
538
  /**
539
+ * Beta prefixes the proxy explicitly STRIPS even from the extended
540
+ * allowlist (and even if a future leverage mode broadens the allowlist
541
+ * further). Defensive layer: today's allowlist-only filter would already
542
+ * drop these because they're not in any allowlist, but keeping an
543
+ * explicit deny-list catches future changes that broaden allow rules
544
+ * (e.g. a hypothetical pattern-based mode that lets `claude-*` through).
545
+ *
546
+ * Empirical (2026-05-11): Copilot returns HTTP 400
547
+ * `unsupported beta header(s): advisor-tool-2026-03-01`
548
+ * on every request that includes `advisor-tool-`. Stripping it is the
549
+ * difference between a working request (no ADVISOR semantics) and a
550
+ * fully-failed request. Document upstream limitation in CLAUDE.md.
551
+ */
552
+ const EXPLICITLY_STRIPPED_BETA_PREFIXES = ["advisor-tool-"];
553
+ /**
519
554
  * Filter an `anthropic-beta` header value, keeping only beta flags
520
- * in the active whitelist. Uses extended prefixes when --extended-betas
521
- * is enabled, VS Code-only prefixes otherwise.
522
- * Returns the filtered comma-separated string, or undefined if nothing remains.
555
+ * in the active whitelist AND not in the explicit-strip list.
556
+ * Uses extended prefixes when --extended-betas is enabled, VS Code-only
557
+ * prefixes otherwise. Returns the filtered comma-separated string,
558
+ * or undefined if nothing remains.
523
559
  */
524
560
  function filterBetaHeader(value) {
525
561
  const prefixes = state.extendedBetas ? EXTENDED_BETA_PREFIXES : VSCODE_BETA_PREFIXES;
526
- return value.split(",").map((v) => v.trim()).filter((v) => v && prefixes.some((prefix) => v.startsWith(prefix))).join(",") || void 0;
562
+ return value.split(",").map((v) => v.trim()).filter((v) => v && prefixes.some((prefix) => v.startsWith(prefix)) && !EXPLICITLY_STRIPPED_BETA_PREFIXES.some((p) => v.startsWith(p))).join(",") || void 0;
527
563
  }
528
564
  /**
529
565
  * Normalize a model ID for fuzzy comparison: lowercase, replace dots with
@@ -541,7 +577,11 @@ function normalizeModelId(id) {
541
577
  * 2. Case-insensitive match
542
578
  * 3. Family preference (opus→1m, codex→highest version)
543
579
  * 4. Normalized match (dots→dashes, letter-digit boundaries)
544
- * 5. Return as-is with a warning
580
+ * 5. Anthropic dated-slug retry: if the input matches `claude-...-YYYYMMDD`,
581
+ * strip the date and re-run the cascade once. Family-guarded so non-claude
582
+ * 8-digit suffixes can't be mis-stripped; runs after Steps 1-4 so explicit
583
+ * version pinning (a dated catalog id matched at Step 1) always wins.
584
+ * 6. Return as-is with a warning
545
585
  */
546
586
  function resolveModel(modelId) {
547
587
  const models = state.models?.data;
@@ -567,6 +607,28 @@ function resolveModel(modelId) {
567
607
  const normalized = normalizeModelId(modelId);
568
608
  const normMatch = models.find((m) => normalizeModelId(m.id) === normalized);
569
609
  if (normMatch) return normMatch.id;
610
+ const dateStripped = modelId.replace(/^(claude-[\w.-]+)-20\d{6}$/i, "$1");
611
+ if (dateStripped !== modelId) {
612
+ const retried = resolveModel(dateStripped);
613
+ if (retried !== dateStripped || models.some((m) => m.id === dateStripped)) {
614
+ consola.info(`Resolved Anthropic dated slug "${modelId}" → "${retried}" (stripped -YYYYMMDD; pass an explicit catalog id to pin a snapshot)`);
615
+ return retried;
616
+ }
617
+ }
618
+ if (lower.startsWith("claude-")) {
619
+ const matchSonnet = /(?:^|-)sonnet(?:-|$)/.test(lower);
620
+ const matchHaiku = /(?:^|-)haiku(?:-|$)/.test(lower);
621
+ if (matchSonnet || matchHaiku) {
622
+ const family = matchSonnet ? "sonnet" : "haiku";
623
+ const familyMembers = models.filter((m) => (/* @__PURE__ */ new RegExp(`(?:^|-)${family}(?:-|$|\\.)`)).test(m.id));
624
+ if (familyMembers.length > 0) {
625
+ familyMembers.sort((a, b) => b.id.localeCompare(a.id, void 0, { numeric: true }));
626
+ const best = familyMembers[0].id;
627
+ consola.info(`Model "${modelId}" not in Copilot catalog; falling back to highest available "${best}" (legacy ${family} slug). Pin a current catalog id to silence.`);
628
+ return best;
629
+ }
630
+ }
631
+ }
570
632
  consola.warn(`Model "${modelId}" not found in Copilot model list. Available: ${models.map((m) => m.id).join(", ")}`);
571
633
  return modelId;
572
634
  }
@@ -823,6 +885,177 @@ const checkUsage = defineCommand({
823
885
  }
824
886
  });
825
887
 
888
+ //#endregion
889
+ //#region src/lib/claude-version-check.ts
890
+ const execFileAsync = promisify(execFile);
891
+ const NPM_PACKAGE = "@anthropic-ai/claude-code";
892
+ const THROTTLE_HOURS = 1;
893
+ const NPM_VIEW_TIMEOUT_MS = 5e3;
894
+ const NPM_INSTALL_TIMEOUT_MS = 12e4;
895
+ /** Path to the throttle cache. Created on demand. */
896
+ function cacheFilePath() {
897
+ return path.join(os.homedir(), ".local", "share", "github-router", "last-update-check");
898
+ }
899
+ /**
900
+ * Read the throttle cache. Returns null on missing/corrupt file —
901
+ * triggers a fresh check.
902
+ */
903
+ async function readCache() {
904
+ try {
905
+ const raw = await fs.readFile(cacheFilePath(), "utf8");
906
+ const parsed = JSON.parse(raw);
907
+ if (typeof parsed.checkedAt !== "string" || parsed.installedVersion !== null && typeof parsed.installedVersion !== "string" || parsed.latestVersion !== null && typeof parsed.latestVersion !== "string") return null;
908
+ return parsed;
909
+ } catch {
910
+ return null;
911
+ }
912
+ }
913
+ async function writeCache(cache) {
914
+ try {
915
+ await fs.mkdir(path.dirname(cacheFilePath()), { recursive: true });
916
+ await fs.writeFile(cacheFilePath(), JSON.stringify(cache), { mode: 384 });
917
+ } catch (err) {
918
+ consola.debug("Failed to write claude version-check cache:", err);
919
+ }
920
+ }
921
+ /** Check if it's been more than THROTTLE_HOURS since the last check. */
922
+ function shouldCheckNow(cache) {
923
+ if (!cache) return true;
924
+ const lastCheck = new Date(cache.checkedAt).getTime();
925
+ if (Number.isNaN(lastCheck)) return true;
926
+ return (Date.now() - lastCheck) / 1e3 / 3600 >= THROTTLE_HOURS;
927
+ }
928
+ /**
929
+ * Read the installed `claude` version. Returns null if claude is not
930
+ * on PATH or the version probe fails (e.g. older versions that don't
931
+ * support `--version` cleanly).
932
+ */
933
+ function getInstalledVersion() {
934
+ try {
935
+ const match = execFileSync("claude", ["--version"], {
936
+ stdio: [
937
+ "ignore",
938
+ "pipe",
939
+ "ignore"
940
+ ],
941
+ timeout: 3e3,
942
+ encoding: "utf8"
943
+ }).match(/^(\d+\.\d+\.\d+)/);
944
+ return match ? match[1] : null;
945
+ } catch {
946
+ return null;
947
+ }
948
+ }
949
+ /**
950
+ * Fetch the latest version of @anthropic-ai/claude-code from the npm
951
+ * registry. Returns null on network failure / npm unavailable.
952
+ */
953
+ async function getLatestVersion() {
954
+ try {
955
+ const { stdout } = await execFileAsync("npm", [
956
+ "view",
957
+ NPM_PACKAGE,
958
+ "version",
959
+ "--silent"
960
+ ], { timeout: NPM_VIEW_TIMEOUT_MS });
961
+ const v = stdout.trim();
962
+ return /^\d+\.\d+\.\d+/.test(v) ? v : null;
963
+ } catch {
964
+ return null;
965
+ }
966
+ }
967
+ /**
968
+ * Compare two semver-shaped strings (only the leading X.Y.Z, no
969
+ * pre-release / metadata handling — sufficient for npm-published
970
+ * stable releases). Returns true if `latest` is strictly higher than
971
+ * `installed`.
972
+ */
973
+ function isNewer(installed, latest) {
974
+ if (!installed || !latest) return false;
975
+ const a = installed.split(".").map((n) => parseInt(n, 10));
976
+ const b = latest.split(".").map((n) => parseInt(n, 10));
977
+ for (let i = 0; i < 3; i++) {
978
+ const av = a[i] ?? 0;
979
+ const bv = b[i] ?? 0;
980
+ if (av < bv) return true;
981
+ if (av > bv) return false;
982
+ }
983
+ return false;
984
+ }
985
+ /**
986
+ * Run a version check (subject to throttle). Side-effect: updates the
987
+ * throttle cache. Returns the comparison result.
988
+ */
989
+ async function checkClaudeVersion(opts = {}) {
990
+ if (opts.noCheck) return {
991
+ installed: false,
992
+ installedVersion: null,
993
+ latestVersion: null,
994
+ needsUpdate: false,
995
+ skipped: true,
996
+ skipReason: "disabled"
997
+ };
998
+ const cache = await readCache();
999
+ if (!opts.force && !shouldCheckNow(cache)) return {
1000
+ installed: cache?.installedVersion !== null,
1001
+ installedVersion: cache?.installedVersion ?? null,
1002
+ latestVersion: cache?.latestVersion ?? null,
1003
+ needsUpdate: isNewer(cache?.installedVersion ?? null, cache?.latestVersion ?? null),
1004
+ skipped: true,
1005
+ skipReason: "throttled"
1006
+ };
1007
+ const installedVersion = getInstalledVersion();
1008
+ if (installedVersion === null) return {
1009
+ installed: false,
1010
+ installedVersion: null,
1011
+ latestVersion: null,
1012
+ needsUpdate: false,
1013
+ skipped: true,
1014
+ skipReason: "no-claude"
1015
+ };
1016
+ const latestVersion = await getLatestVersion();
1017
+ await writeCache({
1018
+ checkedAt: (/* @__PURE__ */ new Date()).toISOString(),
1019
+ installedVersion,
1020
+ latestVersion
1021
+ });
1022
+ if (latestVersion === null) return {
1023
+ installed: true,
1024
+ installedVersion,
1025
+ latestVersion: null,
1026
+ needsUpdate: false,
1027
+ skipped: true,
1028
+ skipReason: "no-npm"
1029
+ };
1030
+ return {
1031
+ installed: true,
1032
+ installedVersion,
1033
+ latestVersion,
1034
+ needsUpdate: isNewer(installedVersion, latestVersion),
1035
+ skipped: false
1036
+ };
1037
+ }
1038
+ /**
1039
+ * Run `npm install -g @anthropic-ai/claude-code@latest` synchronously.
1040
+ * Throws on failure — the caller decides whether to abort the launch
1041
+ * or continue with the older version.
1042
+ */
1043
+ async function autoUpdateClaude(latestVersion) {
1044
+ consola.info(`Updating ${NPM_PACKAGE} to ${latestVersion} (this may take ~30s)...`);
1045
+ try {
1046
+ await execFileAsync("npm", [
1047
+ "install",
1048
+ "-g",
1049
+ `${NPM_PACKAGE}@latest`,
1050
+ "--silent"
1051
+ ], { timeout: NPM_INSTALL_TIMEOUT_MS });
1052
+ consola.success(`${NPM_PACKAGE} updated to ${latestVersion}`);
1053
+ } catch (err) {
1054
+ const msg = err instanceof Error ? err.message : String(err);
1055
+ throw new Error(`npm install failed: ${msg}`);
1056
+ }
1057
+ }
1058
+
826
1059
  //#endregion
827
1060
  //#region src/lib/port.ts
828
1061
  const DEFAULT_PORT = 8787;
@@ -907,6 +1140,15 @@ const STRIPPED_PARENT_ENV_KEYS = [
907
1140
  "CLAUDE_CODE_USE_VERTEX",
908
1141
  "CLAUDE_CODE_USE_FOUNDRY",
909
1142
  "CLAUDE_CONFIG_DIR",
1143
+ "CLAUDE_BRIDGE_OAUTH_TOKEN",
1144
+ "CLAUDE_BRIDGE_BASE_URL",
1145
+ "CLAUDE_BRIDGE_SESSION_INGRESS_URL",
1146
+ "SESSION_INGRESS_URL",
1147
+ "CLAUDE_CODE_REMOTE",
1148
+ "CLAUDE_CODE_CONTAINER_ID",
1149
+ "CLAUDE_CODE_REMOTE_SESSION_ID",
1150
+ "CLAUDE_CODE_SESSION_ID",
1151
+ "CLAUDE_CODE_ADDITIONAL_PROTECTION",
910
1152
  "OPENAI_API_KEY",
911
1153
  "OPENAI_BASE_URL",
912
1154
  "CODEX_HOME"
@@ -1771,7 +2013,7 @@ function initProxyFromEnv() {
1771
2013
  //#endregion
1772
2014
  //#region package.json
1773
2015
  var name = "github-router";
1774
- var version = "0.3.18";
2016
+ var version = "0.3.20";
1775
2017
 
1776
2018
  //#endregion
1777
2019
  //#region src/lib/approval.ts
@@ -1885,7 +2127,7 @@ function detectCapabilityMismatch(info, model) {
1885
2127
 
1886
2128
  //#endregion
1887
2129
  //#region src/lib/stream-relay.ts
1888
- const ENCODER$2 = new TextEncoder();
2130
+ const ENCODER$3 = new TextEncoder();
1889
2131
  /**
1890
2132
  * Detect the family of "controller has already closed" errors that Bun and
1891
2133
  * the WHATWG streams runtime throw when an enqueue/close call races with
@@ -1975,7 +2217,7 @@ function relayAnthropicStream(body, opts) {
1975
2217
  consola.error(`Upstream stream interrupted at ${opts.routePath}: bytes=${bytesRelayed} errType=${errName} message=${JSON.stringify(errMessage)}`);
1976
2218
  const event = buildAnthropicErrorEvent(errName, errMessage);
1977
2219
  try {
1978
- controller.enqueue(ENCODER$2.encode(event));
2220
+ controller.enqueue(ENCODER$3.encode(event));
1979
2221
  } catch (enqueueError) {
1980
2222
  if (!isControllerClosedError(enqueueError)) consola.warn(`Could not deliver error event to consumer at ${opts.routePath}: ${enqueueError instanceof Error ? enqueueError.message : String(enqueueError)}`);
1981
2223
  }
@@ -2459,7 +2701,7 @@ async function searchWeb(query) {
2459
2701
 
2460
2702
  //#endregion
2461
2703
  //#region src/routes/chat-completions/handler.ts
2462
- const ENCODER$1 = new TextEncoder();
2704
+ const ENCODER$2 = new TextEncoder();
2463
2705
  function formatSSE$1(chunk) {
2464
2706
  const parts = [];
2465
2707
  if (chunk.event) parts.push(`event: ${chunk.event}`);
@@ -2558,7 +2800,7 @@ async function handleCompletion$1(c) {
2558
2800
  const chunk = pendingFirstChunk;
2559
2801
  pendingFirstChunk = void 0;
2560
2802
  if (debugEnabled) consola.debug("Streaming chunk:", JSON.stringify(chunk));
2561
- safeEnqueue(controller, ENCODER$1.encode(formatSSE$1(chunk)));
2803
+ safeEnqueue(controller, ENCODER$2.encode(formatSSE$1(chunk)));
2562
2804
  return;
2563
2805
  }
2564
2806
  try {
@@ -2574,7 +2816,7 @@ async function handleCompletion$1(c) {
2574
2816
  }
2575
2817
  if (result.value === void 0 || result.value === null) return;
2576
2818
  if (debugEnabled) consola.debug("Streaming chunk:", JSON.stringify(result.value));
2577
- safeEnqueue(controller, ENCODER$1.encode(formatSSE$1(result.value)));
2819
+ safeEnqueue(controller, ENCODER$2.encode(formatSSE$1(result.value)));
2578
2820
  } catch (error) {
2579
2821
  upstreamFinished = true;
2580
2822
  if (consumerCancelled) {
@@ -2583,7 +2825,7 @@ async function handleCompletion$1(c) {
2583
2825
  return;
2584
2826
  }
2585
2827
  const { errName, errMessage } = logStreamError(c.req.path, error);
2586
- safeEnqueue(controller, ENCODER$1.encode(buildOpenAIErrorEvent(errName, errMessage)));
2828
+ safeEnqueue(controller, ENCODER$2.encode(buildOpenAIErrorEvent(errName, errMessage)));
2587
2829
  releaseUpstream(error);
2588
2830
  safeClose(controller);
2589
2831
  }
@@ -2708,7 +2950,13 @@ const createResponses = async (payload, modelHeaders, callerSignal) => {
2708
2950
  };
2709
2951
  const response = await tryRefreshAndRetry(doFetch, "/responses");
2710
2952
  if (!response.ok) {
2711
- consola.error("Failed to create responses", response);
2953
+ let bodyText;
2954
+ try {
2955
+ bodyText = await response.clone().text();
2956
+ } catch {
2957
+ bodyText = "(failed to read body)";
2958
+ }
2959
+ consola.error(`Failed to create responses: HTTP ${response.status} ${response.statusText} from ${url} — body: ${bodyText.slice(0, 2e3)}`);
2712
2960
  throw new HTTPError("Failed to create responses", response);
2713
2961
  }
2714
2962
  if (payload.stream) return events(response);
@@ -2772,6 +3020,23 @@ function isEffort(v) {
2772
3020
  * § "Concurrency cap investigation" for the full justification. */
2773
3021
  const MAX_INFLIGHT_TOOLS_CALL = 8;
2774
3022
  let inFlightToolsCall = 0;
3023
+ /**
3024
+ * Per-request AbortController registry for `notifications/cancelled`
3025
+ * (Phase D P1.5). When a client times out a tools/call before the
3026
+ * upstream Copilot fetch completes, the JSON-RPC notification:
3027
+ * { jsonrpc:"2.0", method:"notifications/cancelled",
3028
+ * params:{ requestId: "<id>", reason?: "..." } }
3029
+ * arrives. Without handling, the upstream fetch keeps running until
3030
+ * natural completion, leaking the inFlightToolsCall slot for tens of
3031
+ * minutes. Tracking the AbortController lets us abort the fetch and
3032
+ * free the slot immediately.
3033
+ *
3034
+ * Important: per CLAUDE.md "Bun request-signal quirk", we use OUR own
3035
+ * AbortController (NOT c.req.raw.signal which fires after request body
3036
+ * is consumed). The signal is threaded into createResponses /
3037
+ * createChatCompletions's `callerSignal` parameter.
3038
+ */
3039
+ const inflightAborts = /* @__PURE__ */ new Map();
2775
3040
  const RPC_PARSE_ERROR = -32700;
2776
3041
  const RPC_INVALID_REQUEST = -32600;
2777
3042
  const RPC_METHOD_NOT_FOUND = -32601;
@@ -2908,7 +3173,7 @@ function toolError(message) {
2908
3173
  isError: true
2909
3174
  };
2910
3175
  }
2911
- async function callPersona(persona, prompt, context, effort) {
3176
+ async function callPersona(persona, prompt, context, effort, signal) {
2912
3177
  const resolvedModel = resolveModel(persona.model);
2913
3178
  const userText = buildUserText(prompt, context);
2914
3179
  if (persona.endpoint === "/v1/responses") {
@@ -2924,7 +3189,7 @@ async function callPersona(persona, prompt, context, effort) {
2924
3189
  }],
2925
3190
  stream: false,
2926
3191
  reasoning: { effort }
2927
- }));
3192
+ }, void 0, signal));
2928
3193
  if (!text$1) return toolError(`persona ${persona.agentName}: empty assistant output`);
2929
3194
  return { content: [{
2930
3195
  type: "text",
@@ -2942,7 +3207,7 @@ async function callPersona(persona, prompt, context, effort) {
2942
3207
  }],
2943
3208
  stream: false,
2944
3209
  reasoning_effort: effort
2945
- }));
3210
+ }, void 0, signal));
2946
3211
  if (!text) return toolError(`persona ${persona.agentName}: empty assistant output`);
2947
3212
  return { content: [{
2948
3213
  type: "text",
@@ -2984,8 +3249,14 @@ async function handleToolsCall(body) {
2984
3249
  });
2985
3250
  inFlightToolsCall++;
2986
3251
  const startedAt = Date.now();
3252
+ const abortKey = body.id !== void 0 && body.id !== null ? body.id : void 0;
3253
+ let aborter;
3254
+ if (abortKey !== void 0) {
3255
+ aborter = new AbortController();
3256
+ inflightAborts.set(abortKey, aborter);
3257
+ }
2987
3258
  try {
2988
- const result = await callPersona(persona, prompt, context, effort);
3259
+ const result = await callPersona(persona, prompt, context, effort, aborter?.signal);
2989
3260
  logTelemetry({
2990
3261
  name: persona.agentName,
2991
3262
  model: persona.model,
@@ -3011,8 +3282,25 @@ async function handleToolsCall(body) {
3011
3282
  });
3012
3283
  } finally {
3013
3284
  inFlightToolsCall--;
3285
+ if (abortKey !== void 0) inflightAborts.delete(abortKey);
3014
3286
  }
3015
3287
  }
3288
+ /**
3289
+ * Handle `notifications/cancelled` per JSON-RPC 2.0 + MCP spec.
3290
+ * params.requestId is the id of an in-flight tools/call to abort.
3291
+ * Notifications return no body (handled by isNotification path in
3292
+ * handleRpc); this side-effect frees the in-flight slot.
3293
+ */
3294
+ function handleCancelledNotification(body) {
3295
+ const requestId = (body.params ?? {}).requestId;
3296
+ if (requestId === void 0 || typeof requestId !== "string" && typeof requestId !== "number") {
3297
+ consola.debug(`[mcp] notifications/cancelled missing or invalid requestId: ${JSON.stringify(requestId)}`);
3298
+ return;
3299
+ }
3300
+ const aborter = inflightAborts.get(requestId);
3301
+ if (!aborter) return;
3302
+ aborter.abort(/* @__PURE__ */ new Error("client requested cancellation"));
3303
+ }
3016
3304
  async function handleRpc(_c, body) {
3017
3305
  if (body === null || typeof body !== "object" || Array.isArray(body)) return {
3018
3306
  status: 200,
@@ -3033,7 +3321,11 @@ async function handleRpc(_c, body) {
3033
3321
  status: 200,
3034
3322
  body: rpcResult(body.id, {
3035
3323
  protocolVersion: MCP_PROTOCOL_VERSION,
3036
- capabilities: { tools: { listChanged: false } },
3324
+ capabilities: {
3325
+ tools: { listChanged: false },
3326
+ resources: {},
3327
+ prompts: {}
3328
+ },
3037
3329
  serverInfo: {
3038
3330
  name: SERVER_NAME,
3039
3331
  version: SERVER_VERSION
@@ -3062,6 +3354,61 @@ async function handleRpc(_c, body) {
3062
3354
  status: 200,
3063
3355
  body: await handleToolsCall(body)
3064
3356
  };
3357
+ case "resources/list":
3358
+ if (isNotification) return {
3359
+ status: 202,
3360
+ body: null
3361
+ };
3362
+ return {
3363
+ status: 200,
3364
+ body: rpcResult(body.id, { resources: [] })
3365
+ };
3366
+ case "resources/templates/list":
3367
+ if (isNotification) return {
3368
+ status: 202,
3369
+ body: null
3370
+ };
3371
+ return {
3372
+ status: 200,
3373
+ body: rpcResult(body.id, { resourceTemplates: [] })
3374
+ };
3375
+ case "resources/read": {
3376
+ if (isNotification) return {
3377
+ status: 202,
3378
+ body: null
3379
+ };
3380
+ const uri = body.params?.uri;
3381
+ return {
3382
+ status: 200,
3383
+ body: rpcError(body.id, RPC_INVALID_PARAMS, `resources/read: resource URI not found: ${typeof uri === "string" ? uri : "(missing/invalid uri)"}`)
3384
+ };
3385
+ }
3386
+ case "prompts/list":
3387
+ if (isNotification) return {
3388
+ status: 202,
3389
+ body: null
3390
+ };
3391
+ return {
3392
+ status: 200,
3393
+ body: rpcResult(body.id, { prompts: [] })
3394
+ };
3395
+ case "prompts/get": {
3396
+ if (isNotification) return {
3397
+ status: 202,
3398
+ body: null
3399
+ };
3400
+ const name$1 = body.params?.name;
3401
+ return {
3402
+ status: 200,
3403
+ body: rpcError(body.id, RPC_INVALID_PARAMS, `prompts/get: prompt name not found: ${typeof name$1 === "string" ? name$1 : "(missing/invalid name)"}`)
3404
+ };
3405
+ }
3406
+ case "notifications/cancelled":
3407
+ handleCancelledNotification(body);
3408
+ return {
3409
+ status: 202,
3410
+ body: null
3411
+ };
3065
3412
  case "ping":
3066
3413
  if (isNotification) return {
3067
3414
  status: 202,
@@ -3228,6 +3575,742 @@ async function countTokens(body, extraHeaders) {
3228
3575
  return response;
3229
3576
  }
3230
3577
 
3578
+ //#endregion
3579
+ //#region src/services/advisor/advisor.ts
3580
+ const ENCODER$1 = new TextEncoder();
3581
+ /** The tool name we inject for Copilot. Double-underscore prefix
3582
+ * avoids collision with any user MCP server's `advisor` tool. */
3583
+ const ADVISOR_INTERNAL_TOOL_NAME = "__anthropic_advisor";
3584
+ /** The Anthropic-spec name used in the translated server_tool_use
3585
+ * block sent to the client. cc-backup AdvisorMessage.tsx requires
3586
+ * this exact name to render the advisor spinner. */
3587
+ const ADVISOR_CLIENT_TOOL_NAME = "advisor";
3588
+ /** Hard cap on advisor calls per request to bound runaway behavior.
3589
+ * Matches Phase G's loop bound; ADVISOR is typically called 1-3
3590
+ * times per session per cc-backup ADVISOR_TOOL_INSTRUCTIONS. */
3591
+ const ADVISOR_MAX_TURNS = 16;
3592
+ /** Default advisor model + reasoning effort. Per gemini-critic + user
3593
+ * direction: hardcode to a cross-lab model (gpt-5.5 — Copilot's
3594
+ * /responses-only flagship) at xhigh effort. The cross-lab choice
3595
+ * gives a true "second set of eyes" instead of the main model
3596
+ * reviewing itself; xhigh effort buys the deep-dive reasoning that
3597
+ * matches Anthropic's own ADVISOR (which uses a stronger reviewer
3598
+ * model — Opus 4.6/Sonnet 4.6 typically). */
3599
+ const ADVISOR_DEFAULT_MODEL = "gpt-5.5";
3600
+ const ADVISOR_DEFAULT_EFFORT = "xhigh";
3601
+ /** ADVISOR_TOOL_INSTRUCTIONS verbatim from cc-backup
3602
+ * src/utils/advisor.ts — describes when the model should invoke
3603
+ * the advisor. Long-form prose; see source for justification. */
3604
+ const ADVISOR_TOOL_INSTRUCTIONS = `# Advisor Tool
3605
+
3606
+ You have access to an \`advisor\` tool backed by a stronger reviewer model. It takes NO parameters -- when you call it, your entire conversation history is automatically forwarded. The advisor sees the task, every tool call you've made, every result you've seen.
3607
+
3608
+ Call advisor BEFORE substantive work -- before writing code, before committing to an interpretation, before building on an assumption. If the task requires orientation first (finding files, reading code, seeing what's there), do that, then call advisor. Orientation is not substantive work. Writing, editing, and declaring an answer are.
3609
+
3610
+ Also call advisor:
3611
+ - When you believe the task is complete. BEFORE this call, make your deliverable durable: write the file, stage the change, save the result. The advisor call takes time; if the session ends during it, a durable result persists and an unwritten one doesn't.
3612
+ - When stuck -- errors recurring, approach not converging, results that don't fit.
3613
+ - When considering a change of approach.
3614
+
3615
+ On tasks longer than a few steps, call advisor at least once before committing to an approach and once before declaring done. On short reactive tasks where the next action is dictated by tool output you just read, you don't need to keep calling -- the advisor adds most of its value on the first call, before the approach crystallizes.
3616
+
3617
+ Give the advice serious weight. If you follow a step and it fails empirically, or you have primary-source evidence that contradicts a specific claim (the file says X, the code does Y), adapt. A passing self-test is not evidence the advice is wrong -- it's evidence your test doesn't check what the advice is checking.
3618
+
3619
+ If you've already retrieved data pointing one way and the advisor points another: don't silently switch. Surface the conflict in one more advisor call -- "I found X, you suggest Y, which constraint breaks the tie?" The advisor saw your evidence but may have underweighted it; a reconcile call is cheaper than committing to the wrong branch.`;
3620
+ const ADVISOR_OPT_OUT_ENV = "CLAUDE_CODE_DISABLE_ADVISOR_TOOL";
3621
+ /**
3622
+ * Detect whether the request asked for ADVISOR (incoming
3623
+ * `anthropic-beta` header contains an `advisor-tool-` prefix). Also
3624
+ * respects the `CLAUDE_CODE_DISABLE_ADVISOR_TOOL` opt-out env var
3625
+ * (set by the user to globally disable; matches cc-backup advisor.ts
3626
+ * line 61).
3627
+ */
3628
+ function isAdvisorRequested(rawBetaHeader) {
3629
+ if (!rawBetaHeader) return false;
3630
+ if (process.env[ADVISOR_OPT_OUT_ENV]) return false;
3631
+ return rawBetaHeader.split(",").map((s) => s.trim()).some((v) => v.startsWith("advisor-tool-"));
3632
+ }
3633
+ /**
3634
+ * Inject the __anthropic_advisor tool definition into the body's tools
3635
+ * array. Returns a new body string. Idempotent — if the tool is already
3636
+ * present (e.g. the user's MCP shadowed it) we leave the existing one
3637
+ * alone and return the body unchanged.
3638
+ *
3639
+ * Also strips any tool entry with `type: "advisor_*"` (Anthropic API's
3640
+ * native server-side advisor tool — `advisor_20260301` and future
3641
+ * variants). When `CLAUDE_CODE_ENABLE_EXPERIMENTAL_ADVISOR_TOOL=1` is
3642
+ * set, Claude Code injects its own advisor tool with this type into
3643
+ * `tools[]`. Copilot 400s on the unknown tool type ("Input tag
3644
+ * 'advisor_20260301' found using 'type' does not match any of the
3645
+ * expected tags"), so the proxy must strip it before forwarding while
3646
+ * still injecting our custom `__anthropic_advisor` tool that the model
3647
+ * can invoke. The proxy's intercept on the response stream then
3648
+ * translates the model's `tool_use{__anthropic_advisor}` to the
3649
+ * client-shape `server_tool_use{name:"advisor"}` + `advisor_tool_result`
3650
+ * blocks the client expects.
3651
+ */
3652
+ function injectAdvisorTool(rawBody) {
3653
+ let parsed;
3654
+ try {
3655
+ parsed = JSON.parse(rawBody);
3656
+ } catch {
3657
+ return rawBody;
3658
+ }
3659
+ const rawTools = Array.isArray(parsed.tools) ? parsed.tools : [];
3660
+ const tools = rawTools.filter((t) => {
3661
+ if (typeof t !== "object" || t === null) return true;
3662
+ const type = t.type;
3663
+ return typeof type !== "string" || !type.startsWith("advisor_");
3664
+ });
3665
+ const stripped = tools.length !== rawTools.length;
3666
+ const alreadyInjected = tools.some((t) => t?.name === ADVISOR_INTERNAL_TOOL_NAME);
3667
+ if (alreadyInjected && !stripped) return rawBody;
3668
+ parsed.tools = alreadyInjected ? tools : [...tools, {
3669
+ name: ADVISOR_INTERNAL_TOOL_NAME,
3670
+ description: ADVISOR_TOOL_INSTRUCTIONS,
3671
+ input_schema: {
3672
+ type: "object",
3673
+ properties: {},
3674
+ required: []
3675
+ }
3676
+ }];
3677
+ return JSON.stringify(parsed);
3678
+ }
3679
+ /** Character budget for rendered conversation text passed to the
3680
+ * advisor model. gpt-5.5 (default advisor) caps prompt input at
3681
+ * 272,000 tokens. At a conservative ~3 chars/token (mixed prose +
3682
+ * code + JSON), 720,000 chars renders to ≈240,000 tokens, leaving
3683
+ * ~32,000 tokens of headroom for the system prompt and per-turn
3684
+ * framing overhead. Without this cap, long Claude Code sessions
3685
+ * produce 400 `model_max_prompt_tokens_exceeded` from /v1/responses
3686
+ * and the advisor falls back silently. */
3687
+ const ADVISOR_MAX_CONVERSATION_CHARS = 72e4;
3688
+ /**
3689
+ * Render an Anthropic-shape conversation (messages array with
3690
+ * role/content blocks) as a single human-readable text blob. Used
3691
+ * as the input to the advisor model (gpt-5.5 via /v1/responses
3692
+ * doesn't have a 1:1 mapping for Anthropic's tool_use/tool_result
3693
+ * blocks; serializing to text preserves the semantics — the advisor
3694
+ * just needs to READ the conversation, not produce more of it).
3695
+ *
3696
+ * Front-truncates oldest turns when the rendered output would exceed
3697
+ * `maxChars`. The advisor cares more about current state (latest
3698
+ * tool calls, errors, in-flight task) than the original prompt —
3699
+ * mirrors Claude Code's own context-truncation strategy. When any
3700
+ * turns are dropped, prepends a `[TRUNCATED: N earlier turn(s)
3701
+ * omitted ...]` notice so the advisor knows the transcript is
3702
+ * partial and can flag if it needs the missing context.
3703
+ */
3704
+ function renderConversationAsText(conversation, maxChars = ADVISOR_MAX_CONVERSATION_CHARS) {
3705
+ const turnBlocks = [];
3706
+ for (let i = 0; i < conversation.length; i++) {
3707
+ const msg = conversation[i];
3708
+ const role = msg.role ?? "unknown";
3709
+ const block = [`### Turn ${i + 1} — ${role}`];
3710
+ const content = msg.content;
3711
+ if (typeof content === "string") block.push(content);
3712
+ else if (Array.isArray(content)) for (const part of content) {
3713
+ if (typeof part !== "object" || part === null) continue;
3714
+ const b = part;
3715
+ if (b.type === "text" && typeof b.text === "string") block.push(b.text);
3716
+ else if (b.type === "tool_use") block.push(`[tool_use ${b.name ?? "?"}(${b.id ?? "?"}): ${JSON.stringify(b.input ?? {})}]`);
3717
+ else if (b.type === "tool_result") {
3718
+ const c = typeof b.content === "string" ? b.content : JSON.stringify(b.content);
3719
+ block.push(`[tool_result ${b.tool_use_id ?? "?"}]:\n${c}`);
3720
+ } else block.push(`[${b.type}: ${JSON.stringify(b).slice(0, 500)}]`);
3721
+ }
3722
+ block.push("");
3723
+ turnBlocks.push(block.join("\n"));
3724
+ }
3725
+ let totalChars = 0;
3726
+ let firstKeptIdx = turnBlocks.length;
3727
+ for (let i = turnBlocks.length - 1; i >= 0; i--) {
3728
+ const len = turnBlocks[i].length + 1;
3729
+ if (totalChars + len > maxChars) break;
3730
+ totalChars += len;
3731
+ firstKeptIdx = i;
3732
+ }
3733
+ if (firstKeptIdx === turnBlocks.length && turnBlocks.length > 0) {
3734
+ const tail = turnBlocks[turnBlocks.length - 1].slice(-(maxChars - 200));
3735
+ return `[TRUNCATED: conversation too long for advisor model context; only the tail of the latest (turn ${turnBlocks.length}) is shown]\n\n` + tail;
3736
+ }
3737
+ const kept = turnBlocks.slice(firstKeptIdx);
3738
+ if (firstKeptIdx > 0) kept.unshift(`[TRUNCATED: ${firstKeptIdx} earlier turn(s) omitted to fit advisor model context budget; ${turnBlocks.length - firstKeptIdx} most-recent turn(s) shown below]\n`);
3739
+ return kept.join("\n");
3740
+ }
3741
+ /**
3742
+ * Run the advisor model with the full conversation context. Returns
3743
+ * the advisor's text response.
3744
+ *
3745
+ * Routes by model family:
3746
+ * - gpt-5.x / codex / o-series (have `/responses` in supported_endpoints):
3747
+ * use createResponses with `reasoning.effort` set. This is the
3748
+ * default path — gpt-5.5 at xhigh effort.
3749
+ * - claude-* (no `/responses`): fall back to createMessages.
3750
+ *
3751
+ * The conversation is serialized to text via renderConversationAsText
3752
+ * so the advisor model (which may not natively understand Anthropic's
3753
+ * tool_use/tool_result block shapes) sees a flat readable transcript.
3754
+ * This loses some structural fidelity but matches the spirit of
3755
+ * Anthropic's own ADVISOR ("see the whole task + every tool call +
3756
+ * every result").
3757
+ */
3758
+ async function runAdvisor(conversation, advisorModel, advisorEffort) {
3759
+ const advisorSystem = "You are an expert advisor reviewing an in-progress Claude Code session. The transcript below is the work-in-progress (turns numbered, with tool calls and results inlined). Read carefully and provide concrete, actionable advice on the next step or course-correction. Be specific — cite the parts of the transcript you're responding to. If the assistant is on the right track, say so explicitly. If they're stuck or off-track, name the specific assumption or step to revisit. Aim for 2-5 paragraphs of substantive guidance.";
3760
+ const conversationText = renderConversationAsText(conversation);
3761
+ const resolvedAdvisorModel = resolveModel(advisorModel);
3762
+ if (/^(gpt-|o\d|.*codex)/i.test(resolvedAdvisorModel)) {
3763
+ const response = await createResponses({
3764
+ model: resolvedAdvisorModel,
3765
+ instructions: advisorSystem,
3766
+ input: [{
3767
+ role: "user",
3768
+ content: [{
3769
+ type: "input_text",
3770
+ text: conversationText
3771
+ }]
3772
+ }],
3773
+ stream: false,
3774
+ reasoning: { effort: advisorEffort }
3775
+ });
3776
+ const out = [];
3777
+ for (const item of response.output) {
3778
+ if (typeof item !== "object" || item === null) continue;
3779
+ const obj = item;
3780
+ if (obj.type !== "message" || obj.role !== "assistant") continue;
3781
+ const content = obj.content;
3782
+ if (!Array.isArray(content)) continue;
3783
+ for (const part of content) {
3784
+ if (typeof part !== "object" || part === null) continue;
3785
+ const p = part;
3786
+ if ((p.type === "output_text" || p.type === "text") && typeof p.text === "string") out.push(p.text);
3787
+ }
3788
+ }
3789
+ const text$1 = out.join("");
3790
+ if (!text$1) throw new Error(`Advisor model ${resolvedAdvisorModel} returned empty assistant output`);
3791
+ return text$1;
3792
+ }
3793
+ const json = await (await createMessages(JSON.stringify({
3794
+ model: resolvedAdvisorModel,
3795
+ max_tokens: 4096,
3796
+ system: advisorSystem,
3797
+ messages: [{
3798
+ role: "user",
3799
+ content: conversationText
3800
+ }],
3801
+ stream: false
3802
+ }), {})).json();
3803
+ const text = (Array.isArray(json.content) ? json.content : []).filter((b) => b.type === "text" && typeof b.text === "string").map((b) => b.text).join("\n\n");
3804
+ if (!text) throw new Error(`Advisor model ${resolvedAdvisorModel} returned empty response`);
3805
+ return text;
3806
+ }
3807
+ /**
3808
+ * Derive a spec-compliant `srvtoolu_*` id for a client-facing
3809
+ * `server_tool_use` (and matching `advisor_tool_result.tool_use_id`)
3810
+ * from the upstream model's `toolu_*` id.
3811
+ *
3812
+ * Anthropic spec: `^srvtoolu_[a-zA-Z0-9_]+$`. If the upstream id
3813
+ * suffix contains chars outside that charset (e.g., a hyphenated id
3814
+ * from a non-Anthropic provider, or a corrupt id), fall back to a
3815
+ * synthesized stable id keyed by the SSE block index. Defensive
3816
+ * against edge cases that would otherwise emit a malformed block —
3817
+ * spec violation in either direction is a 400.
3818
+ */
3819
+ function toClientServerToolUseId(id, fallbackIndex) {
3820
+ const suffix = id.startsWith("toolu_") ? id.slice(6) : id;
3821
+ if (/^[a-zA-Z0-9_]+$/.test(suffix)) return `srvtoolu_${suffix}`;
3822
+ return `srvtoolu_advisor_${fallbackIndex}`;
3823
+ }
3824
+ /**
3825
+ * Build an SSE event line in the canonical Anthropic shape:
3826
+ * event: <type>
3827
+ * data: <json>
3828
+ * <blank>
3829
+ */
3830
+ function sseEvent(type, data) {
3831
+ return `event: ${type}\ndata: ${JSON.stringify(data)}\n\n`;
3832
+ }
3833
+ /**
3834
+ * The streaming translate-loop. Returns a ReadableStream<Uint8Array>
3835
+ * suitable to wrap with Hono's c.body() / new Response().
3836
+ *
3837
+ * @param firstResponse The first Copilot streaming response
3838
+ * @param initialConversation The conversation messages from the
3839
+ * incoming request (used as the starting context for advisor calls
3840
+ * and continuation Copilot calls).
3841
+ * @param baseBody Parsed initial request body (model, max_tokens,
3842
+ * system, etc.) — used as the template for continuation Copilot calls.
3843
+ * @param requestHeaders Extra headers (model-specific + filtered
3844
+ * anthropic-beta) for downstream Copilot calls.
3845
+ * @param advisorModel Which model to route advisor calls to. Defaults
3846
+ * to ADVISOR_DEFAULT_MODEL (cross-lab).
3847
+ */
3848
+ function buildAdvisorStream(opts) {
3849
+ const advisorModel = opts.advisorModel ?? ADVISOR_DEFAULT_MODEL;
3850
+ const advisorEffort = opts.advisorEffort ?? ADVISOR_DEFAULT_EFFORT;
3851
+ return new ReadableStream({ async start(controller) {
3852
+ const conversation = [...opts.initialConversation];
3853
+ let messageStartForwarded = false;
3854
+ let nextSyntheticIndex = 0;
3855
+ let turnsRun = 0;
3856
+ const safeEnqueue = (bytes) => {
3857
+ try {
3858
+ controller.enqueue(bytes);
3859
+ return true;
3860
+ } catch (err) {
3861
+ if (isControllerClosedError(err)) return false;
3862
+ throw err;
3863
+ }
3864
+ };
3865
+ const safeEnqueueEvent = (type, data) => safeEnqueue(ENCODER$1.encode(sseEvent(type, data)));
3866
+ async function processOneTurn(response) {
3867
+ const capturedBlocks = [];
3868
+ let advisorToolUse = null;
3869
+ const indexToBlock = /* @__PURE__ */ new Map();
3870
+ for await (const ev of events(response)) {
3871
+ if (!ev.event || !ev.data) continue;
3872
+ let payload;
3873
+ try {
3874
+ payload = JSON.parse(ev.data);
3875
+ } catch {
3876
+ if (!safeEnqueue(ENCODER$1.encode(`event: ${ev.event}\ndata: ${ev.data}\n\n`))) return {
3877
+ capturedBlocks,
3878
+ advisorToolUse
3879
+ };
3880
+ continue;
3881
+ }
3882
+ switch (ev.event) {
3883
+ case "message_start":
3884
+ if (!messageStartForwarded) {
3885
+ if (!safeEnqueueEvent(ev.event, payload)) return {
3886
+ capturedBlocks,
3887
+ advisorToolUse
3888
+ };
3889
+ messageStartForwarded = true;
3890
+ }
3891
+ continue;
3892
+ case "content_block_start": {
3893
+ const block = payload.content_block;
3894
+ const upstreamIndex = payload.index;
3895
+ if (block && upstreamIndex !== void 0) {
3896
+ const myIndex = nextSyntheticIndex++;
3897
+ if (block.type === "tool_use" && block.name === ADVISOR_INTERNAL_TOOL_NAME) {
3898
+ const id = typeof block.id === "string" ? block.id : `toolu_advisor_${myIndex}`;
3899
+ advisorToolUse = {
3900
+ index: myIndex,
3901
+ id,
3902
+ clientId: toClientServerToolUseId(id, myIndex),
3903
+ inputJson: ""
3904
+ };
3905
+ const translated = {
3906
+ ...payload,
3907
+ index: myIndex,
3908
+ content_block: {
3909
+ type: "server_tool_use",
3910
+ id: advisorToolUse.clientId,
3911
+ name: ADVISOR_CLIENT_TOOL_NAME,
3912
+ input: {}
3913
+ }
3914
+ };
3915
+ if (!safeEnqueueEvent(ev.event, translated)) return {
3916
+ capturedBlocks,
3917
+ advisorToolUse
3918
+ };
3919
+ const captured = {
3920
+ block: {
3921
+ type: "tool_use",
3922
+ id,
3923
+ name: ADVISOR_INTERNAL_TOOL_NAME,
3924
+ input: {}
3925
+ },
3926
+ partialJson: "",
3927
+ advisorReplay: { id }
3928
+ };
3929
+ capturedBlocks.push(captured);
3930
+ indexToBlock.set(upstreamIndex, captured);
3931
+ } else {
3932
+ const reindexed = {
3933
+ ...payload,
3934
+ index: myIndex
3935
+ };
3936
+ if (!safeEnqueueEvent(ev.event, reindexed)) return {
3937
+ capturedBlocks,
3938
+ advisorToolUse
3939
+ };
3940
+ const captured = {
3941
+ block: { ...block },
3942
+ partialJson: ""
3943
+ };
3944
+ capturedBlocks.push(captured);
3945
+ indexToBlock.set(upstreamIndex, captured);
3946
+ }
3947
+ }
3948
+ continue;
3949
+ }
3950
+ case "content_block_delta": {
3951
+ const upstreamIndex = payload.index;
3952
+ const delta = payload.delta;
3953
+ if (upstreamIndex !== void 0) {
3954
+ const captured = upstreamIndex !== void 0 ? indexToBlock.get(upstreamIndex) : void 0;
3955
+ const reindexed = {
3956
+ ...payload,
3957
+ index: captured ? capturedBlocks.indexOf(captured) >= 0 ? nextSyntheticIndex - capturedBlocks.length + capturedBlocks.indexOf(captured) : upstreamIndex : upstreamIndex
3958
+ };
3959
+ if (!safeEnqueueEvent(ev.event, reindexed)) return {
3960
+ capturedBlocks,
3961
+ advisorToolUse
3962
+ };
3963
+ if (captured && delta) {
3964
+ if (delta.type === "text_delta" && typeof delta.text === "string") captured.block.text = (captured.block.text ?? "") + delta.text;
3965
+ else if (delta.type === "thinking_delta" && typeof delta.thinking === "string") captured.block.thinking = (captured.block.thinking ?? "") + delta.thinking;
3966
+ else if (delta.type === "signature_delta" && typeof delta.signature === "string") captured.block.signature = (captured.block.signature ?? "") + delta.signature;
3967
+ else if (delta.type === "input_json_delta" && typeof delta.partial_json === "string") captured.partialJson += delta.partial_json;
3968
+ else if (delta.type === "citations_delta" && delta.citation) {
3969
+ if (!Array.isArray(captured.block.citations)) captured.block.citations = [];
3970
+ captured.block.citations.push(delta.citation);
3971
+ }
3972
+ }
3973
+ } else if (!safeEnqueueEvent(ev.event, payload)) return {
3974
+ capturedBlocks,
3975
+ advisorToolUse
3976
+ };
3977
+ continue;
3978
+ }
3979
+ case "content_block_stop": {
3980
+ const upstreamIndex = payload.index;
3981
+ const captured = upstreamIndex !== void 0 ? indexToBlock.get(upstreamIndex) : void 0;
3982
+ const reindexed = {
3983
+ ...payload,
3984
+ index: captured ? nextSyntheticIndex - capturedBlocks.length + capturedBlocks.indexOf(captured) : upstreamIndex ?? 0
3985
+ };
3986
+ if (!safeEnqueueEvent(ev.event, reindexed)) return {
3987
+ capturedBlocks,
3988
+ advisorToolUse
3989
+ };
3990
+ if (captured) {
3991
+ if (captured.block.type === "tool_use" && captured.partialJson.length > 0) try {
3992
+ captured.block.input = JSON.parse(captured.partialJson);
3993
+ } catch (err) {
3994
+ consola.warn(`advisor: malformed input_json_delta for tool_use id=${captured.block.id ?? "?"} name=${captured.block.name ?? "?"} partialJson.length=${captured.partialJson.length} parseError=${err instanceof Error ? err.message : String(err)}`);
3995
+ captured.block.input = {};
3996
+ }
3997
+ if (captured.block.type === "text" && (typeof captured.block.text !== "string" || captured.block.text.length === 0)) captured.dropFromReplay = true;
3998
+ }
3999
+ continue;
4000
+ }
4001
+ case "message_delta":
4002
+ if (!safeEnqueueEvent(ev.event, payload)) return {
4003
+ capturedBlocks,
4004
+ advisorToolUse
4005
+ };
4006
+ continue;
4007
+ case "message_stop":
4008
+ if (advisorToolUse) return {
4009
+ capturedBlocks,
4010
+ advisorToolUse
4011
+ };
4012
+ if (!safeEnqueueEvent(ev.event, payload)) return {
4013
+ capturedBlocks,
4014
+ advisorToolUse
4015
+ };
4016
+ return {
4017
+ capturedBlocks,
4018
+ advisorToolUse
4019
+ };
4020
+ default: if (!safeEnqueueEvent(ev.event, payload)) return {
4021
+ capturedBlocks,
4022
+ advisorToolUse
4023
+ };
4024
+ }
4025
+ }
4026
+ return {
4027
+ capturedBlocks,
4028
+ advisorToolUse
4029
+ };
4030
+ }
4031
+ try {
4032
+ let response = opts.firstResponse;
4033
+ for (turnsRun = 0; turnsRun < ADVISOR_MAX_TURNS; turnsRun++) {
4034
+ const { capturedBlocks, advisorToolUse } = await processOneTurn(response);
4035
+ if (!advisorToolUse) return;
4036
+ const assistantTurn = {
4037
+ role: "assistant",
4038
+ content: capturedBlocks.filter((c) => !c.dropFromReplay).map((c) => {
4039
+ if (c.advisorReplay) {
4040
+ const input = typeof c.block.input === "object" && c.block.input !== null ? c.block.input : {};
4041
+ return {
4042
+ type: "tool_use",
4043
+ id: c.advisorReplay.id,
4044
+ name: ADVISOR_INTERNAL_TOOL_NAME,
4045
+ input
4046
+ };
4047
+ }
4048
+ return c.block;
4049
+ })
4050
+ };
4051
+ conversation.push(assistantTurn);
4052
+ let advisorText;
4053
+ try {
4054
+ advisorText = await runAdvisor(conversation, advisorModel, advisorEffort);
4055
+ } catch (err) {
4056
+ const msg = err instanceof Error ? err.message : String(err);
4057
+ consola.warn(`Advisor model call failed: ${msg}`);
4058
+ advisorText = `[Advisor unavailable: ${msg}. Continuing without external review — proceed with caution and consider self-checking against your primary-source evidence.]`;
4059
+ }
4060
+ const resultIndex = nextSyntheticIndex++;
4061
+ if (!safeEnqueueEvent("content_block_start", {
4062
+ type: "content_block_start",
4063
+ index: resultIndex,
4064
+ content_block: {
4065
+ type: "advisor_tool_result",
4066
+ tool_use_id: advisorToolUse.clientId,
4067
+ content: {
4068
+ type: "advisor_result",
4069
+ text: advisorText
4070
+ }
4071
+ }
4072
+ })) return;
4073
+ if (!safeEnqueueEvent("content_block_stop", {
4074
+ type: "content_block_stop",
4075
+ index: resultIndex
4076
+ })) return;
4077
+ conversation.push({
4078
+ role: "user",
4079
+ content: [{
4080
+ type: "tool_result",
4081
+ tool_use_id: advisorToolUse.id,
4082
+ content: advisorText
4083
+ }]
4084
+ });
4085
+ response = await createMessages(JSON.stringify({
4086
+ ...opts.baseBody,
4087
+ messages: conversation,
4088
+ stream: true
4089
+ }), opts.requestHeaders);
4090
+ }
4091
+ const finalIndex = nextSyntheticIndex++;
4092
+ safeEnqueueEvent("content_block_start", {
4093
+ type: "content_block_start",
4094
+ index: finalIndex,
4095
+ content_block: {
4096
+ type: "text",
4097
+ text: ""
4098
+ }
4099
+ });
4100
+ safeEnqueueEvent("content_block_delta", {
4101
+ type: "content_block_delta",
4102
+ index: finalIndex,
4103
+ delta: {
4104
+ type: "text_delta",
4105
+ text: `\n\n[Advisor loop exceeded ${ADVISOR_MAX_TURNS} turns; halting]`
4106
+ }
4107
+ });
4108
+ safeEnqueueEvent("content_block_stop", {
4109
+ type: "content_block_stop",
4110
+ index: finalIndex
4111
+ });
4112
+ safeEnqueueEvent("message_stop", { type: "message_stop" });
4113
+ } catch (err) {
4114
+ const msg = err instanceof Error ? err.message : String(err);
4115
+ consola.error(`Advisor stream error: ${msg}`);
4116
+ safeEnqueueEvent("error", {
4117
+ type: "error",
4118
+ error: {
4119
+ type: "api_error",
4120
+ message: `advisor loop failed: ${msg}`
4121
+ }
4122
+ });
4123
+ } finally {
4124
+ try {
4125
+ controller.close();
4126
+ } catch {}
4127
+ }
4128
+ } });
4129
+ }
4130
+
4131
+ //#endregion
4132
+ //#region src/lib/sanitize-anthropic-body.ts
4133
+ /**
4134
+ * Convert a `srvtoolu_*` id to the matching `toolu_*` id used in the
4135
+ * Copilot-replay shape (`tool_use.id` must match `^toolu_*$`). For
4136
+ * any other input shape, fall back to a synthesized `toolu_advisor_N`
4137
+ * id.
4138
+ */
4139
+ function toCopilotToolUseId(srvId, fallbackIndex) {
4140
+ if (srvId.startsWith("srvtoolu_")) {
4141
+ const suffix = srvId.slice(9);
4142
+ if (/^[a-zA-Z0-9_]+$/.test(suffix)) return `toolu_${suffix}`;
4143
+ }
4144
+ return `toolu_advisor_${fallbackIndex}`;
4145
+ }
4146
+ /**
4147
+ * Fast-path detector: returns true if the raw body has any chance of
4148
+ * needing sanitization. Avoids a full JSON parse for the common case
4149
+ * where the body is already spec-compliant.
4150
+ *
4151
+ * Looks for either an Anthropic-native advisor typed tool entry, or
4152
+ * any advisor-related block type that would need rewriting/
4153
+ * translating.
4154
+ */
4155
+ function bodyMightNeedSanitize(rawBody) {
4156
+ return rawBody.includes("\"server_tool_use\"") || rawBody.includes("\"advisor_tool_result\"") || /"type":"advisor_\d+"/.test(rawBody);
4157
+ }
4158
+ /**
4159
+ * Translate one assistant turn's content array, splitting at advisor
4160
+ * pairs into the multi-message structure Copilot accepts.
4161
+ *
4162
+ * Input shape (Claude Code stores everything in one assistant turn):
4163
+ * [text*, server_tool_use{advisor}, advisor_tool_result, text*, ...]
4164
+ *
4165
+ * Output: array of {role, content[]} message objects, alternating
4166
+ * assistant→user→assistant for each advisor pair encountered.
4167
+ */
4168
+ function splitAssistantTurnAtAdvisorPairs(originalContent, syntheticIndexRef) {
4169
+ const messages = [];
4170
+ let currentAssistantContent = [];
4171
+ let translated = false;
4172
+ let i = 0;
4173
+ while (i < originalContent.length) {
4174
+ const block = originalContent[i];
4175
+ const b = typeof block === "object" && block !== null ? block : null;
4176
+ if (b && b.type === "server_tool_use" && b.name === ADVISOR_INTERNAL_TOOL_NAME.replace(/^__anthropic_/, "")) {
4177
+ const stuId = typeof b.id === "string" ? b.id : "";
4178
+ const nextBlock = originalContent[i + 1];
4179
+ const next = typeof nextBlock === "object" && nextBlock !== null ? nextBlock : null;
4180
+ const copilotId = stuId.startsWith("srvtoolu_") ? toCopilotToolUseId(stuId, syntheticIndexRef.value++) : stuId.startsWith("toolu_") && /^toolu_[a-zA-Z0-9_]+$/.test(stuId) ? stuId : `toolu_advisor_${syntheticIndexRef.value++}`;
4181
+ currentAssistantContent.push({
4182
+ type: "tool_use",
4183
+ id: copilotId,
4184
+ name: ADVISOR_INTERNAL_TOOL_NAME,
4185
+ input: {}
4186
+ });
4187
+ messages.push({
4188
+ role: "assistant",
4189
+ content: currentAssistantContent
4190
+ });
4191
+ translated = true;
4192
+ let resultText = "";
4193
+ if (next && next.type === "advisor_tool_result") {
4194
+ const c = next.content;
4195
+ if (typeof c === "string") resultText = c;
4196
+ else if (typeof c === "object" && c !== null) {
4197
+ const txt = c.text;
4198
+ if (typeof txt === "string") resultText = txt;
4199
+ }
4200
+ i += 2;
4201
+ } else {
4202
+ resultText = "[Advisor result missing in conversation history.]";
4203
+ i += 1;
4204
+ }
4205
+ messages.push({
4206
+ role: "user",
4207
+ content: [{
4208
+ type: "tool_result",
4209
+ tool_use_id: copilotId,
4210
+ content: resultText
4211
+ }]
4212
+ });
4213
+ currentAssistantContent = [];
4214
+ continue;
4215
+ }
4216
+ if (b && b.type === "advisor_tool_result") {
4217
+ translated = true;
4218
+ i += 1;
4219
+ continue;
4220
+ }
4221
+ currentAssistantContent.push(block);
4222
+ i += 1;
4223
+ }
4224
+ if (currentAssistantContent.length > 0) messages.push({
4225
+ role: "assistant",
4226
+ content: currentAssistantContent
4227
+ });
4228
+ if (!translated) return {
4229
+ messages: [{
4230
+ role: "assistant",
4231
+ content: originalContent
4232
+ }],
4233
+ translated: false
4234
+ };
4235
+ return {
4236
+ messages,
4237
+ translated: true
4238
+ };
4239
+ }
4240
+ function sanitizeAnthropicBody(rawBody) {
4241
+ if (!bodyMightNeedSanitize(rawBody)) return rawBody;
4242
+ let parsed;
4243
+ try {
4244
+ parsed = JSON.parse(rawBody);
4245
+ } catch {
4246
+ return rawBody;
4247
+ }
4248
+ let mutated = false;
4249
+ if (Array.isArray(parsed.tools)) {
4250
+ const tools = parsed.tools;
4251
+ const before = tools.length;
4252
+ const filtered = tools.filter((t) => {
4253
+ if (typeof t !== "object" || t === null) return true;
4254
+ const type = t.type;
4255
+ return typeof type !== "string" || !type.startsWith("advisor_");
4256
+ });
4257
+ if (filtered.length !== before) {
4258
+ parsed.tools = filtered;
4259
+ mutated = true;
4260
+ }
4261
+ }
4262
+ if (Array.isArray(parsed.messages)) {
4263
+ const original = parsed.messages;
4264
+ const rebuilt = [];
4265
+ let anyTranslated = false;
4266
+ const syntheticIndexRef = { value: 0 };
4267
+ for (const msg of original) {
4268
+ if (typeof msg !== "object" || msg === null || msg.role !== "assistant") {
4269
+ rebuilt.push(msg);
4270
+ continue;
4271
+ }
4272
+ const content = msg.content;
4273
+ if (!Array.isArray(content)) {
4274
+ rebuilt.push(msg);
4275
+ continue;
4276
+ }
4277
+ if (!content.some((b) => {
4278
+ if (typeof b !== "object" || b === null) return false;
4279
+ const type = b.type;
4280
+ const name$1 = b.name;
4281
+ return type === "server_tool_use" && name$1 === "advisor" || type === "advisor_tool_result";
4282
+ })) {
4283
+ rebuilt.push(msg);
4284
+ continue;
4285
+ }
4286
+ const { messages: split, translated } = splitAssistantTurnAtAdvisorPairs(content, syntheticIndexRef);
4287
+ if (translated) {
4288
+ anyTranslated = true;
4289
+ for (const m of split) rebuilt.push(m);
4290
+ } else rebuilt.push(msg);
4291
+ }
4292
+ if (anyTranslated) {
4293
+ parsed.messages = rebuilt;
4294
+ mutated = true;
4295
+ const existingTools = Array.isArray(parsed.tools) ? parsed.tools : [];
4296
+ if (!existingTools.some((t) => {
4297
+ if (typeof t !== "object" || t === null) return false;
4298
+ return t.name === ADVISOR_INTERNAL_TOOL_NAME;
4299
+ })) parsed.tools = [...existingTools, {
4300
+ name: ADVISOR_INTERNAL_TOOL_NAME,
4301
+ description: ADVISOR_TOOL_INSTRUCTIONS,
4302
+ input_schema: {
4303
+ type: "object",
4304
+ properties: {},
4305
+ required: []
4306
+ }
4307
+ }];
4308
+ }
4309
+ }
4310
+ if (!mutated) return rawBody;
4311
+ return JSON.stringify(parsed);
4312
+ }
4313
+
3231
4314
  //#endregion
3232
4315
  //#region src/lib/diagnose-response.ts
3233
4316
  const PREVIEW_LIMIT = 200;
@@ -3278,7 +4361,18 @@ function stripWebSearchFromBody(rawBody) {
3278
4361
  */
3279
4362
  async function handleCountTokens(c) {
3280
4363
  const startTime = Date.now();
3281
- const { body: finalBody, originalModel, resolvedModel } = resolveModelInBody$1(stripWebSearchFromBody(await c.req.text()));
4364
+ const strippedBody = stripWebSearchFromBody(sanitizeAnthropicBody(await c.req.text()));
4365
+ if (strippedBody.includes("\"mcp_servers\"")) try {
4366
+ const probe = JSON.parse(strippedBody);
4367
+ if (Array.isArray(probe.mcp_servers) && probe.mcp_servers.length > 0) return c.json({
4368
+ type: "error",
4369
+ error: {
4370
+ type: "invalid_request_error",
4371
+ message: "Inline `mcp_servers` body field is not supported by github-router. Configure remote MCP servers as local stdio entries in `~/.claude/mcp.json` instead."
4372
+ }
4373
+ }, 400);
4374
+ } catch {}
4375
+ const { body: finalBody, originalModel, resolvedModel } = resolveModelInBody$1(strippedBody);
3282
4376
  const extraHeaders = {};
3283
4377
  const anthropicBeta = c.req.header("anthropic-beta");
3284
4378
  if (anthropicBeta) {
@@ -3322,6 +4416,7 @@ function resolveModelInBody$1(rawBody) {
3322
4416
  }
3323
4417
  }
3324
4418
  if (rawBody.includes("\"scope\"") && sanitizeCacheControl$1(parsed)) modified = true;
4419
+ if ((rawBody.includes("\"budget\"") || rawBody.includes("\"output_config\"") || rawBody.includes("\"betas\"")) && stripAnthropicOnlyFields$1(parsed)) modified = true;
3325
4420
  const resolvedModel = typeof parsed.model === "string" ? parsed.model : originalModel;
3326
4421
  return {
3327
4422
  body: modified ? JSON.stringify(parsed) : rawBody,
@@ -3348,6 +4443,43 @@ function sanitizeCacheControl$1(body) {
3348
4443
  if (Array.isArray(body.tools)) for (const tool of body.tools) stripScope(tool);
3349
4444
  return stripped;
3350
4445
  }
4446
+ /**
4447
+ * Strip top-level body fields Copilot 400s on (budget, output_config.schema,
4448
+ * betas). Duplicated structurally from handler.ts because count_tokens uses
4449
+ * its own JSON-pass; the bodies are independent. Behavior must stay in lock-
4450
+ * step with handler.ts's stripAnthropicOnlyFields — covered by integration
4451
+ * tests (Phase F P2.4).
4452
+ */
4453
+ function stripAnthropicOnlyFields$1(body) {
4454
+ let stripped = false;
4455
+ if (body.budget !== void 0) {
4456
+ consola.warn("[count_tokens] Stripping body-level `budget` field (Copilot 400s)");
4457
+ delete body.budget;
4458
+ stripped = true;
4459
+ }
4460
+ if (body.output_config !== void 0) {
4461
+ if (body.output_config && typeof body.output_config === "object") {
4462
+ const oc = body.output_config;
4463
+ const PROXY_OWNED_FIELDS = new Set(["effort"]);
4464
+ let strippedAny = false;
4465
+ for (const key of Object.keys(oc)) if (!PROXY_OWNED_FIELDS.has(key)) {
4466
+ delete oc[key];
4467
+ strippedAny = true;
4468
+ }
4469
+ if (strippedAny) {
4470
+ consola.warn("[count_tokens] Stripping client-set `output_config` Structured-Outputs fields (Copilot 400s on `output_config.*` other than `effort`)");
4471
+ if (Object.keys(oc).length === 0) delete body.output_config;
4472
+ stripped = true;
4473
+ }
4474
+ }
4475
+ }
4476
+ if (Array.isArray(body.betas)) {
4477
+ consola.warn("[count_tokens] Stripping body-level `betas` array (Copilot 400s; conveyed via header)");
4478
+ delete body.betas;
4479
+ stripped = true;
4480
+ }
4481
+ return stripped;
4482
+ }
3351
4483
 
3352
4484
  //#endregion
3353
4485
  //#region src/routes/messages/handler.ts
@@ -3458,7 +4590,24 @@ async function handleCompletion(c) {
3458
4590
  if (debugEnabled) consola.debug("Anthropic request body:", rawBody.slice(0, 2e3));
3459
4591
  if (state.manualApprove) await awaitApproval();
3460
4592
  const betaHeaders = extractBetaHeaders(c);
3461
- const { body: resolvedBody, originalModel, resolvedModel, selectedModel } = resolveModelInBody(await processWebSearch(rawBody));
4593
+ const advisorEnabled = isAdvisorRequested(c.req.header("anthropic-beta"));
4594
+ let finalBody = await processWebSearch(rawBody);
4595
+ finalBody = sanitizeAnthropicBody(finalBody);
4596
+ if (advisorEnabled) {
4597
+ finalBody = injectAdvisorTool(finalBody);
4598
+ consola.info("ADVISOR enabled for this request — injecting __anthropic_advisor tool; will translate tool_use → server_tool_use{advisor} on the SSE stream");
4599
+ }
4600
+ if (finalBody.includes("\"mcp_servers\"")) try {
4601
+ const probe = JSON.parse(finalBody);
4602
+ if (Array.isArray(probe.mcp_servers) && probe.mcp_servers.length > 0) return c.json({
4603
+ type: "error",
4604
+ error: {
4605
+ type: "invalid_request_error",
4606
+ message: "Inline `mcp_servers` body field is not supported by github-router (Copilot returns 400 'Extra inputs are not permitted'; the proxy would need a multi-turn tool-loop translation that has unresolved design holes — see Phase G in the plan). Configure your remote MCP servers as local stdio entries in `~/.claude/mcp.json` instead — Claude Code will spawn them locally and the proxy passes their tool calls through transparently. (https://docs.claude.com/en/docs/claude-code/mcp)"
4607
+ }
4608
+ }, 400);
4609
+ } catch {}
4610
+ const { body: resolvedBody, originalModel, resolvedModel, selectedModel } = resolveModelInBody(finalBody);
3462
4611
  const modelId = resolvedModel ?? originalModel;
3463
4612
  if (modelId) logEndpointMismatch(modelId, "/v1/messages");
3464
4613
  const effectiveBetas = applyDefaultBetas(betaHeaders, resolvedModel ?? originalModel);
@@ -3512,6 +4661,25 @@ async function handleCompletion(c) {
3512
4661
  if (requestId) streamHeaders["x-request-id"] = requestId;
3513
4662
  const reqId = response.headers.get("request-id");
3514
4663
  if (reqId) streamHeaders["request-id"] = reqId;
4664
+ if (advisorEnabled && response.body) {
4665
+ let parsedBase = {};
4666
+ try {
4667
+ parsedBase = JSON.parse(resolvedBody);
4668
+ } catch {}
4669
+ const initialConversation = Array.isArray(parsedBase.messages) ? parsedBase.messages : [];
4670
+ return new Response(buildAdvisorStream({
4671
+ firstResponse: response,
4672
+ initialConversation,
4673
+ baseBody: parsedBase,
4674
+ requestHeaders: {
4675
+ ...selectedModel?.requestHeaders,
4676
+ ...effectiveBetas
4677
+ }
4678
+ }), {
4679
+ status: response.status,
4680
+ headers: streamHeaders
4681
+ });
4682
+ }
3515
4683
  return new Response(response.body ? relayAnthropicStream(response.body, { routePath: c.req.path }) : null, {
3516
4684
  status: response.status,
3517
4685
  headers: streamHeaders
@@ -3562,6 +4730,7 @@ function resolveModelInBody(rawBody) {
3562
4730
  const selectedModel = resolvedModel ? state.models?.data.find((m) => m.id === resolvedModel) : void 0;
3563
4731
  if (translateThinking(parsed, selectedModel)) modified = true;
3564
4732
  if (rawBody.includes("\"scope\"") && sanitizeCacheControl(parsed)) modified = true;
4733
+ if ((rawBody.includes("\"budget\"") || rawBody.includes("\"output_config\"") || rawBody.includes("\"betas\"")) && stripAnthropicOnlyFields(parsed)) modified = true;
3565
4734
  return {
3566
4735
  body: modified ? JSON.stringify(parsed) : rawBody,
3567
4736
  originalModel,
@@ -3677,6 +4846,81 @@ function applyDefaultBetas(betaHeaders, modelId) {
3677
4846
  "anthropic-beta": ["interleaved-thinking-2025-05-14", "context-management-2025-06-27"].join(",")
3678
4847
  };
3679
4848
  }
4849
+ /**
4850
+ * Strip top-level body fields that Anthropic's Messages API accepts but
4851
+ * Copilot rejects with HTTP 400 "Extra inputs are not permitted". Mutates
4852
+ * `body` in place; returns true if anything was stripped.
4853
+ *
4854
+ * Empirical verification (2026-05-11):
4855
+ * POST /v1/messages?beta=true { ..., budget: {total_tokens: 10000} } → 400
4856
+ * POST /v1/messages?beta=true { ..., output_config: {schema: {...}} } → 400
4857
+ * POST /v1/messages?beta=true { ..., betas: ["..."] } → 400
4858
+ *
4859
+ * Each strip emits a one-line consola.warn so users running with these
4860
+ * features (e.g. `claude --max-budget-usd`, `--json-schema`) understand
4861
+ * the request succeeds with the *body field* dropped — semantics may
4862
+ * differ from upstream Anthropic. The corresponding `anthropic-beta`
4863
+ * header is preserved (Phase A allowlist) so the *intent* still flows
4864
+ * to Copilot, even if the per-request enforcement field is gone.
4865
+ *
4866
+ * NOT stripped here:
4867
+ * - `mcp_servers` (Phase G translate path — silent strip causes LLM
4868
+ * to hallucinate tools per gemini-critic finding)
4869
+ * - `metadata` (Copilot 200s, ignores harmlessly)
4870
+ */
4871
+ function stripAnthropicOnlyFields(body) {
4872
+ let stripped = false;
4873
+ if (body.budget !== void 0) {
4874
+ consola.warn("Stripping body-level `budget` field (Copilot 400s; the `task-budgets-` beta header is preserved but cost ceiling is not enforced server-side)");
4875
+ delete body.budget;
4876
+ stripped = true;
4877
+ }
4878
+ if (body.output_config !== void 0) {
4879
+ if (body.output_config && typeof body.output_config === "object") {
4880
+ const oc = body.output_config;
4881
+ const PROXY_OWNED_FIELDS = new Set(["effort"]);
4882
+ const schema = oc.schema;
4883
+ const ocType = oc.type;
4884
+ let strippedAny = false;
4885
+ for (const key of Object.keys(oc)) if (!PROXY_OWNED_FIELDS.has(key)) {
4886
+ delete oc[key];
4887
+ strippedAny = true;
4888
+ }
4889
+ if (strippedAny) {
4890
+ consola.warn("Stripping client-set `output_config` Structured-Outputs fields (Copilot 400s on `output_config.*` other than `effort`; injecting schema as system-prompt instruction so the model still produces JSON conforming to the structured-outputs schema, since server-side enforcement is gone)");
4891
+ if (Object.keys(oc).length === 0) delete body.output_config;
4892
+ if (schema !== void 0 || ocType === "json_object") appendStructuredOutputInstruction(body, schema, ocType);
4893
+ stripped = true;
4894
+ }
4895
+ }
4896
+ }
4897
+ if (Array.isArray(body.betas)) {
4898
+ consola.warn("Stripping body-level `betas` array (Copilot 400s; the betas are conveyed via the `anthropic-beta` header instead)");
4899
+ delete body.betas;
4900
+ stripped = true;
4901
+ }
4902
+ return stripped;
4903
+ }
4904
+ /**
4905
+ * Append a system-prompt instruction telling the model to produce JSON
4906
+ * conforming to a Structured Outputs schema. Used after the proxy
4907
+ * strips `output_config` to preserve the schema enforcement intent
4908
+ * via prompt engineering instead of server-side validation.
4909
+ *
4910
+ * Mutates `body.system` in place. Handles both string and array shapes
4911
+ * (Anthropic spec allows either).
4912
+ */
4913
+ function appendStructuredOutputInstruction(body, schema, ocType) {
4914
+ let instruction = "\n\nIMPORTANT: Your response MUST be a single valid JSON object. Do not wrap it in markdown code fences. Do not include any text before or after the JSON object.";
4915
+ if (schema !== void 0) instruction += ` The JSON object MUST conform to this JSON Schema:\n${JSON.stringify(schema)}`;
4916
+ else if (typeof ocType === "string") instruction += ` Output type requested: ${ocType}.`;
4917
+ if (typeof body.system === "string") body.system = body.system + instruction;
4918
+ else if (Array.isArray(body.system)) body.system = [...body.system, {
4919
+ type: "text",
4920
+ text: instruction.trimStart()
4921
+ }];
4922
+ else body.system = instruction.trimStart();
4923
+ }
3680
4924
 
3681
4925
  //#endregion
3682
4926
  //#region src/routes/messages/route.ts
@@ -4096,6 +5340,13 @@ server.route("/v1/search", searchRoutes);
4096
5340
  server.route("/v1/messages", messageRoutes);
4097
5341
  server.route("/mcp", mcpRoutes);
4098
5342
  server.post("/api/event_logging/batch", (c) => c.body(null, 200));
5343
+ server.all("/v1/files/*", (c) => c.json({
5344
+ type: "error",
5345
+ error: {
5346
+ type: "not_found_error",
5347
+ message: "Files API is not supported by github-router (Copilot has no equivalent storage backend). Use the Anthropic API directly for file uploads/downloads."
5348
+ }
5349
+ }, 404));
4099
5350
  server.notFound((c) => c.json({
4100
5351
  type: "error",
4101
5352
  error: {
@@ -4306,9 +5557,17 @@ function getClaudeCodeEnvVars(serverUrl, model) {
4306
5557
  CLAUDE_CONFIG_DIR: path.join(os.homedir(), ".claude"),
4307
5558
  MCP_TIMEOUT: "600000",
4308
5559
  DISABLE_NON_ESSENTIAL_MODEL_CALLS: "1",
4309
- CLAUDE_CODE_DISABLE_NONESSENTIAL_TRAFFIC: "1"
5560
+ CLAUDE_CODE_DISABLE_NONESSENTIAL_TRAFFIC: "1",
5561
+ DISABLE_TELEMETRY: "1"
4310
5562
  };
4311
5563
  if (model) vars.ANTHROPIC_MODEL = model;
5564
+ for (const key of [
5565
+ "CLAUDE_CODE_ENABLE_EXPERIMENTAL_ADVISOR_TOOL",
5566
+ "CLAUDE_CODE_FORK_SUBAGENT",
5567
+ "CLAUDE_CODE_EXPERIMENTAL_AGENT_TEAMS",
5568
+ "CLAUDE_CODE_ENABLE_FINE_GRAINED_TOOL_STREAMING",
5569
+ "CLAUDE_CODE_ENABLE_TASKS"
5570
+ ]) if (process.env[key] === void 0) vars[key] = "1";
4312
5571
  return vars;
4313
5572
  }
4314
5573
  /**
@@ -4359,6 +5618,21 @@ const claude = defineCommand({
4359
5618
  type: "boolean",
4360
5619
  default: false,
4361
5620
  description: "Pass --strict-mcp-config to claude code so only github-router's MCP servers are loaded (hides user's existing MCP servers)"
5621
+ },
5622
+ stealth: {
5623
+ type: "boolean",
5624
+ default: false,
5625
+ description: "Opt back into VS Code-only beta header filtering. Loses leverage features (task budgets, token-efficient tools, prompt caching, etc.) but minimizes the wire-fingerprint difference from VS Code Copilot Chat. By default the `claude` subcommand enables extended/leverage betas because the spawned Claude Code already identifies itself via UA and other headers — partial stealth doesn't buy much."
5626
+ },
5627
+ "auto-update": {
5628
+ type: "boolean",
5629
+ default: true,
5630
+ description: "Check for and install latest Claude Code on launch (throttled to once per hour via ~/.local/share/github-router/last-update-check). Set to false (--no-auto-update) to keep the current installed version. Falls back gracefully if npm/network unavailable."
5631
+ },
5632
+ "update-check": {
5633
+ type: "boolean",
5634
+ default: true,
5635
+ description: "Check the npm registry for a newer Claude Code version on launch and warn if stale (non-blocking ~500ms cost). Set to false (--no-update-check) to skip the check entirely (useful for offline/CI). Independent from --auto-update: --no-update-check implies no auto-install (nothing to install since we never check)."
4362
5636
  }
4363
5637
  },
4364
5638
  async run({ args }) {
@@ -4367,6 +5641,24 @@ const claude = defineCommand({
4367
5641
  process$1.exit(1);
4368
5642
  }
4369
5643
  const parsed = parseSharedArgs(args);
5644
+ if (args.stealth) {
5645
+ parsed.extendedBetas = false;
5646
+ consola.info("Stealth mode: VS Code-only beta filtering. Leverage features disabled.");
5647
+ } else if (!args["extended-betas"]) parsed.extendedBetas = true;
5648
+ if (args["update-check"] !== false) try {
5649
+ const versionCheck = await checkClaudeVersion({ noCheck: false });
5650
+ if (versionCheck.skipped && versionCheck.skipReason === "no-claude") consola.debug("claude --version probe failed; skipping auto-update.");
5651
+ else if (versionCheck.skipped && versionCheck.skipReason === "no-npm") consola.debug("npm view @anthropic-ai/claude-code failed; skipping auto-update check (likely offline).");
5652
+ else if (versionCheck.needsUpdate && versionCheck.installedVersion && versionCheck.latestVersion) if (args["auto-update"] !== false) try {
5653
+ await autoUpdateClaude(versionCheck.latestVersion);
5654
+ } catch (err) {
5655
+ const msg = err instanceof Error ? err.message : String(err);
5656
+ consola.warn(`Auto-update of Claude Code from ${versionCheck.installedVersion} to ${versionCheck.latestVersion} failed (${msg}); continuing with installed version. Run \`npm install -g @anthropic-ai/claude-code@latest\` manually to retry.`);
5657
+ }
5658
+ else consola.warn(`Claude Code v${versionCheck.installedVersion} is installed; v${versionCheck.latestVersion} is available. Run with --auto-update (the default) to install on launch, or \`npm install -g @anthropic-ai/claude-code@latest\` manually.`);
5659
+ } catch (err) {
5660
+ consola.debug("Claude version check failed:", err);
5661
+ }
4370
5662
  let server$1;
4371
5663
  let serverUrl;
4372
5664
  try {