github-router 0.3.19 → 0.3.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/main.js CHANGED
@@ -1,12 +1,13 @@
1
1
  #!/usr/bin/env node
2
2
  import { defineCommand, runMain } from "citty";
3
3
  import consola from "consola";
4
+ import { randomBytes, randomUUID, timingSafeEqual } from "node:crypto";
4
5
  import fs from "node:fs/promises";
5
6
  import os from "node:os";
6
7
  import path from "node:path";
7
- import { randomBytes, randomUUID, timingSafeEqual } from "node:crypto";
8
8
  import process$1 from "node:process";
9
- import { execFileSync, spawn } from "node:child_process";
9
+ import { execFile, execFileSync, spawn } from "node:child_process";
10
+ import { promisify } from "node:util";
10
11
  import fs$1 from "node:fs";
11
12
  import { Writable } from "node:stream";
12
13
  import { serve } from "srvx";
@@ -37,6 +38,9 @@ const PATHS = {
37
38
  },
38
39
  get CLAUDE_RUNTIME_DIR() {
39
40
  return path.join(appDir(), "runtime");
41
+ },
42
+ get CLAUDE_CONFIG_DIR() {
43
+ return path.join(appDir(), "claude-config");
40
44
  }
41
45
  };
42
46
  async function ensurePaths() {
@@ -52,6 +56,318 @@ async function ensurePaths() {
52
56
  consola.debug("Peer-agent .md sweep skipped:", err);
53
57
  });
54
58
  }
59
+ const CLAUDE_HOME_POLICY = new Map([
60
+ [".credentials.json", "ISOLATED"],
61
+ [".credentials.json.lock", "ISOLATED"],
62
+ [".oauth_refresh.lock", "ISOLATED"],
63
+ [".github-router-managed", "ISOLATED"],
64
+ ["statsig", "ISOLATED"],
65
+ ["cache", "ISOLATED"],
66
+ ["logs", "ISOLATED"],
67
+ ["paste-cache", "ISOLATED"],
68
+ ["projects", "SHARED"],
69
+ ["sessions", "SHARED"],
70
+ ["tasks", "SHARED"],
71
+ ["todos", "SHARED"],
72
+ ["transcripts", "SHARED"],
73
+ ["shell-snapshots", "SHARED"],
74
+ ["shell_snapshots", "SHARED"],
75
+ ["plans", "SHARED"],
76
+ ["file-history", "SHARED"],
77
+ ["backups", "SHARED"]
78
+ ]);
79
+ function policyFor(name$1) {
80
+ return CLAUDE_HOME_POLICY.get(name$1) ?? "MIRRORED";
81
+ }
82
+ /**
83
+ * Names with `SHARED` policy, materialized once for iteration in
84
+ * `ensureClaudeConfigMirror`'s post-copy phase.
85
+ */
86
+ const SHARED_TOPLEVEL_NAMES = Array.from(CLAUDE_HOME_POLICY.entries()).filter(([, kind]) => kind === "SHARED").map(([name$1]) => name$1);
87
+ /**
88
+ * Marker file written into the router-owned CLAUDE_CONFIG_DIR so users
89
+ * (and our own future sweeps) can identify that the dir is managed by
90
+ * github-router. Content is informational only; no logic depends on
91
+ * its presence.
92
+ */
93
+ const MANAGED_MARKER_FILENAME = ".github-router-managed";
94
+ /**
95
+ * Synthetic Console OAuth credential the router writes into its own
96
+ * `CLAUDE_CONFIG_DIR/.credentials.json` so spawned Claude Code (and
97
+ * any teammates it spawns) can authenticate without a real user
98
+ * `/login`.
99
+ *
100
+ * Schema verified verbatim from `claude` v2.1.140 binary, function
101
+ * `guH` (the credentials-save mutation). Fields:
102
+ * - `accessToken` — sent as `Authorization: Bearer ...` to the
103
+ * proxy. Proxy accepts any bearer (per CLAUDE.md "doesn't enforce
104
+ * auth").
105
+ * - `refreshToken` — only used by Claude Code's reactive refresh
106
+ * path (function `nH8`), which fires on 401 from upstream. The
107
+ * proxy maintains the no-401 invariant on the Anthropic-shape
108
+ * boundary, so this is never invoked. Synthetic value is fine.
109
+ * - `expiresAt` — far-future (2099-01-01 ms epoch). Sidesteps the
110
+ * proactive refresh path (`R8H(expiresAt)` returns false).
111
+ * - `scopes` — claude-ai-shaped so `tB(scopes)` returns true,
112
+ * making `Hq()` true (full feature surface, not "inference only").
113
+ * - `subscriptionType` — `"max"`. Pure client-side label
114
+ * (`e7()` / `Zc_()` / `CZ1()`); no server validation since
115
+ * `CLAUDE_CODE_DISABLE_NONESSENTIAL_TRAFFIC=1` suppresses
116
+ * subscription-validation calls. Picks the most-permissive gating.
117
+ */
118
+ const SYNTHETIC_CREDENTIAL = { claudeAiOauth: {
119
+ accessToken: "github-router-synthetic",
120
+ refreshToken: "github-router-synthetic",
121
+ expiresAt: 40709088e5,
122
+ scopes: ["user:inference", "user:profile"],
123
+ subscriptionType: "max",
124
+ rateLimitTier: null,
125
+ clientId: "github-router"
126
+ } };
127
+ /**
128
+ * Snapshot-copy the user's `~/.claude/` into the router-owned
129
+ * CLAUDE_CONFIG_DIR (real files, not symlinks — symlinks don't isolate
130
+ * writes), classifying each top-level entry per `CLAUDE_HOME_POLICY`:
131
+ * ISOLATED entries are skipped, MIRRORED entries are copied, and
132
+ * SHARED entries become directory symlinks back to `~/.claude/<X>` so
133
+ * chat history (in `projects/<cwd-hash>/<session-uuid>.jsonl`) and
134
+ * other durable user state flow between proxy and plain-`claude`
135
+ * sessions. Then writes the synthetic `.credentials.json` so spawned
136
+ * Claude Code (and teammates that inherit `CLAUDE_CONFIG_DIR`)
137
+ * authenticate.
138
+ *
139
+ * Idempotent: only re-copies files whose source `mtime` is newer than
140
+ * target; SHARED-symlink creation no-ops when the symlink already
141
+ * points at the right target. Concurrent-safe: `mkdir({recursive:true})`
142
+ * is idempotent; symlinks are created via atomic temp+rename so two
143
+ * parallel github-router-claude startups can't race to EEXIST; the
144
+ * credentials write uses temp-file + atomic rename so Claude Code's
145
+ * `EZ1()` mtime watcher never sees a partial write.
146
+ *
147
+ * Walks with `lstat` (does NOT follow symlinks during traversal — a
148
+ * symlink-into-`/` would otherwise let the walk escape). Symlink leaves
149
+ * in the source tree are skipped during the MIRRORED copy walk (per the
150
+ * symlink-confused-deputy security finding); SHARED symlinks are
151
+ * created on the mirror side only, pointing at predetermined targets
152
+ * inside the user's real `~/.claude/`.
153
+ *
154
+ * Caller is expected to invoke this after `ensurePaths()` and before
155
+ * spawning Claude Code (`launchChild`). The mirror must exist before
156
+ * the child reads it. Currently called from the `claude` subcommand
157
+ * entry point only; `start` and `codex` subcommands don't need it.
158
+ */
159
+ async function ensureClaudeConfigMirror(opts = {}) {
160
+ const realHome = opts.realHome ?? os.homedir();
161
+ const sourceDir = path.join(realHome, ".claude");
162
+ const targetDir = PATHS.CLAUDE_CONFIG_DIR;
163
+ await fs.mkdir(targetDir, {
164
+ recursive: true,
165
+ mode: 448
166
+ });
167
+ await chmodIfPossible(targetDir, 448);
168
+ let sourceExists = false;
169
+ try {
170
+ sourceExists = (await fs.stat(sourceDir)).isDirectory();
171
+ } catch (err) {
172
+ if (err.code !== "ENOENT") consola.debug(`ensureClaudeConfigMirror: cannot stat ${sourceDir}:`, err);
173
+ }
174
+ if (sourceExists) await mirrorDirRecursive(sourceDir, targetDir, "");
175
+ await fs.mkdir(path.join(targetDir, "agents"), { recursive: true });
176
+ for (const name$1 of SHARED_TOPLEVEL_NAMES) await ensureSharedSymlink(name$1, sourceDir, targetDir).catch((err) => {
177
+ consola.debug(`ensureClaudeConfigMirror: SHARED symlink for ${name$1} skipped:`, err);
178
+ });
179
+ const credentialsPath = path.join(targetDir, ".credentials.json");
180
+ const desiredJson = JSON.stringify(SYNTHETIC_CREDENTIAL, null, 2);
181
+ let needsWrite = true;
182
+ try {
183
+ needsWrite = (await fs.readFile(credentialsPath, "utf8")).trim() !== desiredJson.trim();
184
+ } catch (err) {
185
+ if (err.code !== "ENOENT") consola.debug(`ensureClaudeConfigMirror: cannot read existing credentials:`, err);
186
+ }
187
+ if (needsWrite) {
188
+ const tempPath = `${credentialsPath}.${process.pid}.tmp`;
189
+ try {
190
+ await fs.writeFile(tempPath, desiredJson + "\n", {
191
+ mode: 384,
192
+ flag: "wx"
193
+ });
194
+ await fs.rename(tempPath, credentialsPath);
195
+ } catch (err) {
196
+ if (err.code === "EEXIST") consola.debug("ensureClaudeConfigMirror: concurrent credentials-write detected, skipping");
197
+ else {
198
+ await fs.unlink(tempPath).catch(() => {});
199
+ throw err;
200
+ }
201
+ }
202
+ }
203
+ await chmodIfPossible(credentialsPath, 384);
204
+ const markerPath = path.join(targetDir, MANAGED_MARKER_FILENAME);
205
+ let markerExists = false;
206
+ try {
207
+ const markerStat = await fs.lstat(markerPath);
208
+ if (markerStat.isFile()) markerExists = true;
209
+ else {
210
+ consola.warn(`ensureClaudeConfigMirror: ${markerPath} exists but is not a regular file (mode=${markerStat.mode.toString(8)}); refusing to overwrite. Inspect and remove manually if safe.`);
211
+ markerExists = true;
212
+ }
213
+ } catch (err) {
214
+ if (err.code !== "ENOENT") {
215
+ consola.debug(`ensureClaudeConfigMirror: cannot lstat marker:`, err);
216
+ markerExists = true;
217
+ }
218
+ }
219
+ if (!markerExists) {
220
+ const body = `Managed by github-router. Created ${(/* @__PURE__ */ new Date()).toISOString()}. Safe to delete (will be recreated).\n`;
221
+ await fs.writeFile(markerPath, body, {
222
+ mode: 384,
223
+ flag: "wx"
224
+ }).catch((err) => {
225
+ consola.debug(`ensureClaudeConfigMirror: marker write skipped:`, err);
226
+ });
227
+ }
228
+ }
229
+ /**
230
+ * Recursive snapshot-copy helper for `ensureClaudeConfigMirror`. Walks
231
+ * `sourceDir/relPath` and mirrors each entry into `targetDir/relPath`.
232
+ * - Top-level entries are dispatched on `policyFor(name)`:
233
+ * - `ISOLATED` → skipped entirely (no presence in mirror).
234
+ * - `SHARED` → skipped from the copy walk; handled by
235
+ * `ensureSharedSymlink` in the post-copy phase.
236
+ * - `MIRRORED` → copied as today.
237
+ * - Symlinks are skipped (not recreated) so the walk never follows out
238
+ * of `sourceDir` and we don't reintroduce a confused-deputy vector.
239
+ * - Files copy only if source mtime > target mtime (idempotent).
240
+ */
241
+ async function mirrorDirRecursive(sourceDir, targetDir, relPath) {
242
+ const sourcePath = path.join(sourceDir, relPath);
243
+ let entries;
244
+ try {
245
+ entries = await fs.readdir(sourcePath);
246
+ } catch (err) {
247
+ if (err.code === "ENOENT") return;
248
+ consola.debug(`mirrorDirRecursive: cannot readdir ${sourcePath}:`, err);
249
+ return;
250
+ }
251
+ for (const name$1 of entries) {
252
+ if (relPath === "") {
253
+ const policy = policyFor(name$1);
254
+ if (policy === "ISOLATED" || policy === "SHARED") continue;
255
+ }
256
+ const childRel = relPath === "" ? name$1 : path.join(relPath, name$1);
257
+ const childSource = path.join(sourceDir, childRel);
258
+ const childTarget = path.join(targetDir, childRel);
259
+ let stats;
260
+ try {
261
+ stats = await fs.lstat(childSource);
262
+ } catch (err) {
263
+ consola.debug(`mirrorDirRecursive: cannot lstat ${childSource}:`, err);
264
+ continue;
265
+ }
266
+ if (stats.isSymbolicLink()) {
267
+ consola.debug(`mirrorDirRecursive: skipping symlink ${childSource} (security policy)`);
268
+ continue;
269
+ }
270
+ if (stats.isDirectory()) {
271
+ await fs.mkdir(childTarget, { recursive: true });
272
+ await mirrorDirRecursive(sourceDir, targetDir, childRel);
273
+ continue;
274
+ }
275
+ if (stats.isFile()) {
276
+ let needsCopy = true;
277
+ try {
278
+ const targetStat = await fs.lstat(childTarget);
279
+ if (targetStat.isFile() && targetStat.mtimeMs >= stats.mtimeMs) needsCopy = false;
280
+ } catch (err) {
281
+ if (err.code !== "ENOENT") consola.debug(`mirrorDirRecursive: lstat target ${childTarget}:`, err);
282
+ }
283
+ if (!needsCopy) continue;
284
+ try {
285
+ await fs.copyFile(childSource, childTarget, fs.constants.COPYFILE_FICLONE);
286
+ } catch (err) {
287
+ consola.debug(`mirrorDirRecursive: copy ${childSource} → ${childTarget}:`, err);
288
+ }
289
+ continue;
290
+ }
291
+ }
292
+ }
293
+ /**
294
+ * Create or refresh a directory symlink `<mirrorDir>/<name>` →
295
+ * `<sourceDir>/<name>` (i.e. `~/.local/share/github-router/claude-config/<X>`
296
+ * → `~/.claude/<X>`). Idempotent and concurrent-safe.
297
+ *
298
+ * Behavior depending on what's already at `<mirrorDir>/<name>`:
299
+ * - Symlink with the correct target → no-op.
300
+ * - Symlink with the wrong target → replace atomically.
301
+ * - Empty real directory (legacy mirror leftover with no proxy-session
302
+ * writes accumulated yet) → `rmdir` and replace with the symlink.
303
+ * Safe by definition: `fs.rmdir` only succeeds on empty dirs (POSIX),
304
+ * so there is nothing to lose. Smooths the upgrade path for users
305
+ * whose legacy mirror dirs were never written to.
306
+ * - Non-empty real directory or regular file → loud-warn and skip.
307
+ * Auto-deleting would destroy proxy-session writes from the prior
308
+ * version. The user is told the exact path and remediation.
309
+ * - ENOENT → create symlink atomically.
310
+ *
311
+ * Atomic-creation: symlinks are first written at a unique side-path
312
+ * (`<mirrorDir>/<name>.tmp.<pid>.<8 hex>`) and then `fs.rename()`d into
313
+ * place. POSIX `rename` is atomic and replaces an existing symlink in
314
+ * a single step, so two concurrent `github-router claude` startups can't
315
+ * race to `EEXIST` — the loser's rename just overwrites the winner's
316
+ * symlink with an identical one. Gemini-critic 3-lab-review finding.
317
+ *
318
+ * Pre-creates `~/.claude/<name>/` as a real directory if missing so
319
+ * Claude Code's writes through the symlink don't fail with ENOENT.
320
+ */
321
+ async function ensureSharedSymlink(name$1, sourceDir, mirrorDir) {
322
+ const sourcePath = path.join(sourceDir, name$1);
323
+ const mirrorPath = path.join(mirrorDir, name$1);
324
+ try {
325
+ await fs.mkdir(sourcePath, { recursive: true });
326
+ } catch (err) {
327
+ consola.debug(`ensureSharedSymlink(${name$1}): cannot mkdir source ${sourcePath}:`, err);
328
+ return;
329
+ }
330
+ let existing = null;
331
+ try {
332
+ existing = await fs.lstat(mirrorPath);
333
+ } catch (err) {
334
+ if (err.code !== "ENOENT") {
335
+ consola.debug(`ensureSharedSymlink(${name$1}): cannot lstat ${mirrorPath}:`, err);
336
+ return;
337
+ }
338
+ }
339
+ if (existing?.isSymbolicLink()) {
340
+ let currentTarget = null;
341
+ try {
342
+ currentTarget = await fs.readlink(mirrorPath);
343
+ } catch (err) {
344
+ consola.debug(`ensureSharedSymlink(${name$1}): cannot readlink ${mirrorPath}:`, err);
345
+ }
346
+ if (currentTarget === sourcePath) return;
347
+ } else if (existing?.isDirectory()) try {
348
+ await fs.rmdir(mirrorPath);
349
+ } catch (err) {
350
+ consola.warn(`ensureClaudeConfigMirror: ${mirrorPath} is a non-empty real directory from an older github-router version; refusing to clobber. If you want chat-history continuity for "${name$1}", move its contents into ${sourcePath}/ then delete ${mirrorPath}; the mirror will create a symlink on next launch. (rmdir error: ${err.code ?? "unknown"})`);
351
+ return;
352
+ }
353
+ else if (existing) {
354
+ consola.warn(`ensureClaudeConfigMirror: ${mirrorPath} is a regular file at a SHARED symlink slot; refusing to clobber. Inspect and remove manually if safe; the mirror will create a symlink on next launch.`);
355
+ return;
356
+ }
357
+ const tempPath = `${mirrorPath}.tmp.${process.pid}.${randomBytes(4).toString("hex")}`;
358
+ try {
359
+ await fs.symlink(sourcePath, tempPath);
360
+ } catch (err) {
361
+ consola.debug(`ensureSharedSymlink(${name$1}): symlink ${tempPath} failed:`, err);
362
+ return;
363
+ }
364
+ try {
365
+ await fs.rename(tempPath, mirrorPath);
366
+ } catch (err) {
367
+ consola.debug(`ensureSharedSymlink(${name$1}): rename ${tempPath} → ${mirrorPath} failed:`, err);
368
+ await fs.unlink(tempPath).catch(() => {});
369
+ }
370
+ }
55
371
  async function ensureFile(filePath) {
56
372
  try {
57
373
  await fs.access(filePath, fs.constants.W_OK);
@@ -138,12 +454,15 @@ function isPidAlive(pid) {
138
454
  }
139
455
  }
140
456
  /**
141
- * Sweep stale peer-* subagent .md files from `~/.claude/agents/`. Phase
142
- * 2.5 writes one .md per peer agent into the canonical agents directory
143
- * so they appear in Claude Code's Task `subagent_type` enum. Files are
144
- * named `peer-<pid>-<rand>-<agentName>.md` so this sweep can drop
145
- * orphans from crashed prior proxy sessions without touching the user's
146
- * own .md files.
457
+ * Sweep stale peer-* subagent .md files from the router-owned
458
+ * `CLAUDE_CONFIG_DIR/agents/`. Phase 2.5 writes one .md per peer agent
459
+ * into Claude Code's agents directory (now our config dir's `agents/`
460
+ * subdir, since `getClaudeCodeEnvVars` points `CLAUDE_CONFIG_DIR` at
461
+ * `PATHS.CLAUDE_CONFIG_DIR`) so they appear in Claude Code's Task
462
+ * `subagent_type` enum. Files are named `peer-<pid>-<rand>-<agentName>.md`
463
+ * so this sweep can drop orphans from crashed prior proxy sessions
464
+ * without touching the user's own .md files (which were copied into
465
+ * the same dir during `ensureClaudeConfigMirror`).
147
466
  *
148
467
  * Same liveness rule as `sweepStaleRuntimeFiles`: only delete when the
149
468
  * file's embedded PID is no longer alive. Live PIDs keep their files —
@@ -159,7 +478,7 @@ function isPidAlive(pid) {
159
478
  * realistic user filename.
160
479
  */
161
480
  async function sweepStalePeerAgentMdFiles() {
162
- const dir = path.join(os.homedir(), ".claude", "agents");
481
+ const dir = path.join(PATHS.CLAUDE_CONFIG_DIR, "agents");
163
482
  let entries;
164
483
  try {
165
484
  entries = await fs.readdir(dir);
@@ -272,19 +591,20 @@ async function forwardError(c, error) {
272
591
  }
273
592
  }, 400);
274
593
  }
594
+ const responseStatus = error.response.status === 401 ? 503 : error.response.status;
275
595
  if (isAnthropicError(errorJson)) {
276
596
  consola.error("HTTP error:", errorJson);
277
- return c.json(errorJson, error.response.status);
597
+ return c.json(errorJson, responseStatus);
278
598
  }
279
599
  const message = resolveErrorMessage(errorJson, errorText);
280
600
  consola.error("HTTP error:", errorJson ?? errorText);
281
601
  return c.json({
282
602
  type: "error",
283
603
  error: {
284
- type: resolveErrorType(error.response.status),
604
+ type: resolveErrorType(responseStatus),
285
605
  message
286
606
  }
287
- }, error.response.status);
607
+ }, responseStatus);
288
608
  }
289
609
  return c.json({
290
610
  type: "error",
@@ -341,6 +661,12 @@ function isContextOverflow(status, errorJson, errorText) {
341
661
  }
342
662
  /**
343
663
  * Map HTTP status to Anthropic error type.
664
+ *
665
+ * Note: a 401 from upstream is remapped to 503 in `forwardError` BEFORE
666
+ * this function is called (no-401 invariant — see comment there). The
667
+ * 401 → "authentication_error" mapping below is preserved for
668
+ * defensive coverage in case any code path calls `resolveErrorType`
669
+ * directly with an unsanitized status.
344
670
  */
345
671
  function resolveErrorType(status) {
346
672
  if (status === 400) return "invalid_request_error";
@@ -348,6 +674,7 @@ function resolveErrorType(status) {
348
674
  if (status === 403) return "permission_error";
349
675
  if (status === 404) return "not_found_error";
350
676
  if (status === 429) return "rate_limit_error";
677
+ if (status === 503) return "overloaded_error";
351
678
  if (status === 529) return "overloaded_error";
352
679
  return "api_error";
353
680
  }
@@ -494,9 +821,22 @@ const VSCODE_BETA_PREFIXES = [
494
821
  * to work with the Copilot API.
495
822
  *
496
823
  * Notably absent (Copilot 400s on these — verified live):
497
- * context-1m-, skills-, files-api-, code-execution-, output-128k-.
824
+ * context-1m-, skills-, files-api-, code-execution-, output-128k-,
825
+ * advisor-tool- (see EXPLICITLY_STRIPPED_BETA_PREFIXES below).
498
826
  * 1M context is unlocked by selecting `claude-opus-4.7-1m-internal`
499
827
  * as the model id, not via a beta header.
828
+ *
829
+ * Empirical verification (2026-05-11 against api.enterprise.githubcopilot.com):
830
+ * task-budgets-2026-03-13 → 200 ACCEPTED (cost-ceiling leverage)
831
+ * token-efficient-tools-2026-03-28 → 200 ACCEPTED (per-tool token saving)
832
+ * summarize-connector-text-2026-03-13 → 200 (Anthropic-internal feature flag,
833
+ * won't fire for non-ant users; allowlisted defensively for ant edge case)
834
+ * afk-mode-2026-01-31 → 200 (Anthropic-internal feature flag)
835
+ * cli-internal-2026-02-09 → 200 (USER_TYPE=ant only)
836
+ * oauth-2025-04-20 → 200 (Files-API path; Files-API itself
837
+ * is not supportable via Copilot, but the header alone is harmless)
838
+ * prompt-caching-scope-2026-01-05 → 200 even with body cache_control.scope
839
+ * stripped (already covered by `prompt-caching-` prefix above)
500
840
  */
501
841
  const EXTENDED_BETA_PREFIXES = [
502
842
  ...VSCODE_BETA_PREFIXES,
@@ -513,17 +853,39 @@ const EXTENDED_BETA_PREFIXES = [
513
853
  "mcp-client-",
514
854
  "mcp-servers-",
515
855
  "redact-thinking-",
516
- "web-search-"
856
+ "web-search-",
857
+ "task-budgets-",
858
+ "token-efficient-tools-",
859
+ "summarize-connector-text-",
860
+ "afk-mode-",
861
+ "cli-internal-",
862
+ "oauth-"
517
863
  ];
518
864
  /**
865
+ * Beta prefixes the proxy explicitly STRIPS even from the extended
866
+ * allowlist (and even if a future leverage mode broadens the allowlist
867
+ * further). Defensive layer: today's allowlist-only filter would already
868
+ * drop these because they're not in any allowlist, but keeping an
869
+ * explicit deny-list catches future changes that broaden allow rules
870
+ * (e.g. a hypothetical pattern-based mode that lets `claude-*` through).
871
+ *
872
+ * Empirical (2026-05-11): Copilot returns HTTP 400
873
+ * `unsupported beta header(s): advisor-tool-2026-03-01`
874
+ * on every request that includes `advisor-tool-`. Stripping it is the
875
+ * difference between a working request (no ADVISOR semantics) and a
876
+ * fully-failed request. Document upstream limitation in CLAUDE.md.
877
+ */
878
+ const EXPLICITLY_STRIPPED_BETA_PREFIXES = ["advisor-tool-"];
879
+ /**
519
880
  * Filter an `anthropic-beta` header value, keeping only beta flags
520
- * in the active whitelist. Uses extended prefixes when --extended-betas
521
- * is enabled, VS Code-only prefixes otherwise.
522
- * Returns the filtered comma-separated string, or undefined if nothing remains.
881
+ * in the active whitelist AND not in the explicit-strip list.
882
+ * Uses extended prefixes when --extended-betas is enabled, VS Code-only
883
+ * prefixes otherwise. Returns the filtered comma-separated string,
884
+ * or undefined if nothing remains.
523
885
  */
524
886
  function filterBetaHeader(value) {
525
887
  const prefixes = state.extendedBetas ? EXTENDED_BETA_PREFIXES : VSCODE_BETA_PREFIXES;
526
- return value.split(",").map((v) => v.trim()).filter((v) => v && prefixes.some((prefix) => v.startsWith(prefix))).join(",") || void 0;
888
+ return value.split(",").map((v) => v.trim()).filter((v) => v && prefixes.some((prefix) => v.startsWith(prefix)) && !EXPLICITLY_STRIPPED_BETA_PREFIXES.some((p) => v.startsWith(p))).join(",") || void 0;
527
889
  }
528
890
  /**
529
891
  * Normalize a model ID for fuzzy comparison: lowercase, replace dots with
@@ -579,6 +941,20 @@ function resolveModel(modelId) {
579
941
  return retried;
580
942
  }
581
943
  }
944
+ if (lower.startsWith("claude-")) {
945
+ const matchSonnet = /(?:^|-)sonnet(?:-|$)/.test(lower);
946
+ const matchHaiku = /(?:^|-)haiku(?:-|$)/.test(lower);
947
+ if (matchSonnet || matchHaiku) {
948
+ const family = matchSonnet ? "sonnet" : "haiku";
949
+ const familyMembers = models.filter((m) => (/* @__PURE__ */ new RegExp(`(?:^|-)${family}(?:-|$|\\.)`)).test(m.id));
950
+ if (familyMembers.length > 0) {
951
+ familyMembers.sort((a, b) => b.id.localeCompare(a.id, void 0, { numeric: true }));
952
+ const best = familyMembers[0].id;
953
+ consola.info(`Model "${modelId}" not in Copilot catalog; falling back to highest available "${best}" (legacy ${family} slug). Pin a current catalog id to silence.`);
954
+ return best;
955
+ }
956
+ }
957
+ }
582
958
  consola.warn(`Model "${modelId}" not found in Copilot model list. Available: ${models.map((m) => m.id).join(", ")}`);
583
959
  return modelId;
584
960
  }
@@ -835,6 +1211,177 @@ const checkUsage = defineCommand({
835
1211
  }
836
1212
  });
837
1213
 
1214
+ //#endregion
1215
+ //#region src/lib/claude-version-check.ts
1216
+ const execFileAsync = promisify(execFile);
1217
+ const NPM_PACKAGE = "@anthropic-ai/claude-code";
1218
+ const THROTTLE_HOURS = 1;
1219
+ const NPM_VIEW_TIMEOUT_MS = 5e3;
1220
+ const NPM_INSTALL_TIMEOUT_MS = 12e4;
1221
+ /** Path to the throttle cache. Created on demand. */
1222
+ function cacheFilePath() {
1223
+ return path.join(os.homedir(), ".local", "share", "github-router", "last-update-check");
1224
+ }
1225
+ /**
1226
+ * Read the throttle cache. Returns null on missing/corrupt file —
1227
+ * triggers a fresh check.
1228
+ */
1229
+ async function readCache() {
1230
+ try {
1231
+ const raw = await fs.readFile(cacheFilePath(), "utf8");
1232
+ const parsed = JSON.parse(raw);
1233
+ if (typeof parsed.checkedAt !== "string" || parsed.installedVersion !== null && typeof parsed.installedVersion !== "string" || parsed.latestVersion !== null && typeof parsed.latestVersion !== "string") return null;
1234
+ return parsed;
1235
+ } catch {
1236
+ return null;
1237
+ }
1238
+ }
1239
+ async function writeCache(cache) {
1240
+ try {
1241
+ await fs.mkdir(path.dirname(cacheFilePath()), { recursive: true });
1242
+ await fs.writeFile(cacheFilePath(), JSON.stringify(cache), { mode: 384 });
1243
+ } catch (err) {
1244
+ consola.debug("Failed to write claude version-check cache:", err);
1245
+ }
1246
+ }
1247
+ /** Check if it's been more than THROTTLE_HOURS since the last check. */
1248
+ function shouldCheckNow(cache) {
1249
+ if (!cache) return true;
1250
+ const lastCheck = new Date(cache.checkedAt).getTime();
1251
+ if (Number.isNaN(lastCheck)) return true;
1252
+ return (Date.now() - lastCheck) / 1e3 / 3600 >= THROTTLE_HOURS;
1253
+ }
1254
+ /**
1255
+ * Read the installed `claude` version. Returns null if claude is not
1256
+ * on PATH or the version probe fails (e.g. older versions that don't
1257
+ * support `--version` cleanly).
1258
+ */
1259
+ function getInstalledVersion() {
1260
+ try {
1261
+ const match = execFileSync("claude", ["--version"], {
1262
+ stdio: [
1263
+ "ignore",
1264
+ "pipe",
1265
+ "ignore"
1266
+ ],
1267
+ timeout: 3e3,
1268
+ encoding: "utf8"
1269
+ }).match(/^(\d+\.\d+\.\d+)/);
1270
+ return match ? match[1] : null;
1271
+ } catch {
1272
+ return null;
1273
+ }
1274
+ }
1275
+ /**
1276
+ * Fetch the latest version of @anthropic-ai/claude-code from the npm
1277
+ * registry. Returns null on network failure / npm unavailable.
1278
+ */
1279
+ async function getLatestVersion() {
1280
+ try {
1281
+ const { stdout } = await execFileAsync("npm", [
1282
+ "view",
1283
+ NPM_PACKAGE,
1284
+ "version",
1285
+ "--silent"
1286
+ ], { timeout: NPM_VIEW_TIMEOUT_MS });
1287
+ const v = stdout.trim();
1288
+ return /^\d+\.\d+\.\d+/.test(v) ? v : null;
1289
+ } catch {
1290
+ return null;
1291
+ }
1292
+ }
1293
+ /**
1294
+ * Compare two semver-shaped strings (only the leading X.Y.Z, no
1295
+ * pre-release / metadata handling — sufficient for npm-published
1296
+ * stable releases). Returns true if `latest` is strictly higher than
1297
+ * `installed`.
1298
+ */
1299
+ function isNewer(installed, latest) {
1300
+ if (!installed || !latest) return false;
1301
+ const a = installed.split(".").map((n) => parseInt(n, 10));
1302
+ const b = latest.split(".").map((n) => parseInt(n, 10));
1303
+ for (let i = 0; i < 3; i++) {
1304
+ const av = a[i] ?? 0;
1305
+ const bv = b[i] ?? 0;
1306
+ if (av < bv) return true;
1307
+ if (av > bv) return false;
1308
+ }
1309
+ return false;
1310
+ }
1311
+ /**
1312
+ * Run a version check (subject to throttle). Side-effect: updates the
1313
+ * throttle cache. Returns the comparison result.
1314
+ */
1315
+ async function checkClaudeVersion(opts = {}) {
1316
+ if (opts.noCheck) return {
1317
+ installed: false,
1318
+ installedVersion: null,
1319
+ latestVersion: null,
1320
+ needsUpdate: false,
1321
+ skipped: true,
1322
+ skipReason: "disabled"
1323
+ };
1324
+ const cache = await readCache();
1325
+ if (!opts.force && !shouldCheckNow(cache)) return {
1326
+ installed: cache?.installedVersion !== null,
1327
+ installedVersion: cache?.installedVersion ?? null,
1328
+ latestVersion: cache?.latestVersion ?? null,
1329
+ needsUpdate: isNewer(cache?.installedVersion ?? null, cache?.latestVersion ?? null),
1330
+ skipped: true,
1331
+ skipReason: "throttled"
1332
+ };
1333
+ const installedVersion = getInstalledVersion();
1334
+ if (installedVersion === null) return {
1335
+ installed: false,
1336
+ installedVersion: null,
1337
+ latestVersion: null,
1338
+ needsUpdate: false,
1339
+ skipped: true,
1340
+ skipReason: "no-claude"
1341
+ };
1342
+ const latestVersion = await getLatestVersion();
1343
+ await writeCache({
1344
+ checkedAt: (/* @__PURE__ */ new Date()).toISOString(),
1345
+ installedVersion,
1346
+ latestVersion
1347
+ });
1348
+ if (latestVersion === null) return {
1349
+ installed: true,
1350
+ installedVersion,
1351
+ latestVersion: null,
1352
+ needsUpdate: false,
1353
+ skipped: true,
1354
+ skipReason: "no-npm"
1355
+ };
1356
+ return {
1357
+ installed: true,
1358
+ installedVersion,
1359
+ latestVersion,
1360
+ needsUpdate: isNewer(installedVersion, latestVersion),
1361
+ skipped: false
1362
+ };
1363
+ }
1364
+ /**
1365
+ * Run `npm install -g @anthropic-ai/claude-code@latest` synchronously.
1366
+ * Throws on failure — the caller decides whether to abort the launch
1367
+ * or continue with the older version.
1368
+ */
1369
+ async function autoUpdateClaude(latestVersion) {
1370
+ consola.info(`Updating ${NPM_PACKAGE} to ${latestVersion} (this may take ~30s)...`);
1371
+ try {
1372
+ await execFileAsync("npm", [
1373
+ "install",
1374
+ "-g",
1375
+ `${NPM_PACKAGE}@latest`,
1376
+ "--silent"
1377
+ ], { timeout: NPM_INSTALL_TIMEOUT_MS });
1378
+ consola.success(`${NPM_PACKAGE} updated to ${latestVersion}`);
1379
+ } catch (err) {
1380
+ const msg = err instanceof Error ? err.message : String(err);
1381
+ throw new Error(`npm install failed: ${msg}`);
1382
+ }
1383
+ }
1384
+
838
1385
  //#endregion
839
1386
  //#region src/lib/port.ts
840
1387
  const DEFAULT_PORT = 8787;
@@ -915,10 +1462,20 @@ const STRIPPED_PARENT_ENV_KEYS = [
915
1462
  "ANTHROPIC_CUSTOM_HEADERS",
916
1463
  "ANTHROPIC_MODEL",
917
1464
  "CLAUDE_CODE_OAUTH_TOKEN",
1465
+ "CLAUDE_CODE_OAUTH_TOKEN_FILE_DESCRIPTOR",
918
1466
  "CLAUDE_CODE_USE_BEDROCK",
919
1467
  "CLAUDE_CODE_USE_VERTEX",
920
1468
  "CLAUDE_CODE_USE_FOUNDRY",
921
1469
  "CLAUDE_CONFIG_DIR",
1470
+ "CLAUDE_BRIDGE_OAUTH_TOKEN",
1471
+ "CLAUDE_BRIDGE_BASE_URL",
1472
+ "CLAUDE_BRIDGE_SESSION_INGRESS_URL",
1473
+ "SESSION_INGRESS_URL",
1474
+ "CLAUDE_CODE_REMOTE",
1475
+ "CLAUDE_CODE_CONTAINER_ID",
1476
+ "CLAUDE_CODE_REMOTE_SESSION_ID",
1477
+ "CLAUDE_CODE_SESSION_ID",
1478
+ "CLAUDE_CODE_ADDITIONAL_PROTECTION",
922
1479
  "OPENAI_API_KEY",
923
1480
  "OPENAI_BASE_URL",
924
1481
  "CODEX_HOME"
@@ -1441,12 +1998,16 @@ function buildPeerAgentDefinitions(opts) {
1441
1998
  * Default location Claude Code reads subagent .md files from at session
1442
1999
  * startup. Files placed here populate the Task `subagent_type` enum.
1443
2000
  *
1444
- * We pin to the user's `~/.claude/agents/` because `getClaudeCodeEnvVars`
1445
- * sets `CLAUDE_CONFIG_DIR=$HOME/.claude` (the Spawned-CLI auth isolation
1446
- * trick) the spawned child reads from this exact path.
2001
+ * We point at the router-owned `PATHS.CLAUDE_CONFIG_DIR/agents/` because
2002
+ * `getClaudeCodeEnvVars` sets `CLAUDE_CONFIG_DIR=PATHS.CLAUDE_CONFIG_DIR`
2003
+ * (the snapshot-mirror substrate fix that gives spawned teammates an
2004
+ * authenticatable on-disk credential). The user's own custom-agent .md
2005
+ * files were copied into this same dir by `ensureClaudeConfigMirror`,
2006
+ * so writing peer-* files here doesn't conflict — and the boot-time
2007
+ * sweep is scoped to peer-* names only via the persona-name allowlist.
1447
2008
  */
1448
2009
  function defaultAgentsDir() {
1449
- return path.join(os.homedir(), ".claude", "agents");
2010
+ return path.join(PATHS.CLAUDE_CONFIG_DIR, "agents");
1450
2011
  }
1451
2012
  /**
1452
2013
  * YAML frontmatter string-escape — sufficient for our use case where
@@ -1783,7 +2344,7 @@ function initProxyFromEnv() {
1783
2344
  //#endregion
1784
2345
  //#region package.json
1785
2346
  var name = "github-router";
1786
- var version = "0.3.19";
2347
+ var version = "0.3.21";
1787
2348
 
1788
2349
  //#endregion
1789
2350
  //#region src/lib/approval.ts
@@ -1897,7 +2458,7 @@ function detectCapabilityMismatch(info, model) {
1897
2458
 
1898
2459
  //#endregion
1899
2460
  //#region src/lib/stream-relay.ts
1900
- const ENCODER$2 = new TextEncoder();
2461
+ const ENCODER$3 = new TextEncoder();
1901
2462
  /**
1902
2463
  * Detect the family of "controller has already closed" errors that Bun and
1903
2464
  * the WHATWG streams runtime throw when an enqueue/close call races with
@@ -1987,7 +2548,7 @@ function relayAnthropicStream(body, opts) {
1987
2548
  consola.error(`Upstream stream interrupted at ${opts.routePath}: bytes=${bytesRelayed} errType=${errName} message=${JSON.stringify(errMessage)}`);
1988
2549
  const event = buildAnthropicErrorEvent(errName, errMessage);
1989
2550
  try {
1990
- controller.enqueue(ENCODER$2.encode(event));
2551
+ controller.enqueue(ENCODER$3.encode(event));
1991
2552
  } catch (enqueueError) {
1992
2553
  if (!isControllerClosedError(enqueueError)) consola.warn(`Could not deliver error event to consumer at ${opts.routePath}: ${enqueueError instanceof Error ? enqueueError.message : String(enqueueError)}`);
1993
2554
  }
@@ -2471,7 +3032,7 @@ async function searchWeb(query) {
2471
3032
 
2472
3033
  //#endregion
2473
3034
  //#region src/routes/chat-completions/handler.ts
2474
- const ENCODER$1 = new TextEncoder();
3035
+ const ENCODER$2 = new TextEncoder();
2475
3036
  function formatSSE$1(chunk) {
2476
3037
  const parts = [];
2477
3038
  if (chunk.event) parts.push(`event: ${chunk.event}`);
@@ -2570,7 +3131,7 @@ async function handleCompletion$1(c) {
2570
3131
  const chunk = pendingFirstChunk;
2571
3132
  pendingFirstChunk = void 0;
2572
3133
  if (debugEnabled) consola.debug("Streaming chunk:", JSON.stringify(chunk));
2573
- safeEnqueue(controller, ENCODER$1.encode(formatSSE$1(chunk)));
3134
+ safeEnqueue(controller, ENCODER$2.encode(formatSSE$1(chunk)));
2574
3135
  return;
2575
3136
  }
2576
3137
  try {
@@ -2586,7 +3147,7 @@ async function handleCompletion$1(c) {
2586
3147
  }
2587
3148
  if (result.value === void 0 || result.value === null) return;
2588
3149
  if (debugEnabled) consola.debug("Streaming chunk:", JSON.stringify(result.value));
2589
- safeEnqueue(controller, ENCODER$1.encode(formatSSE$1(result.value)));
3150
+ safeEnqueue(controller, ENCODER$2.encode(formatSSE$1(result.value)));
2590
3151
  } catch (error) {
2591
3152
  upstreamFinished = true;
2592
3153
  if (consumerCancelled) {
@@ -2595,7 +3156,7 @@ async function handleCompletion$1(c) {
2595
3156
  return;
2596
3157
  }
2597
3158
  const { errName, errMessage } = logStreamError(c.req.path, error);
2598
- safeEnqueue(controller, ENCODER$1.encode(buildOpenAIErrorEvent(errName, errMessage)));
3159
+ safeEnqueue(controller, ENCODER$2.encode(buildOpenAIErrorEvent(errName, errMessage)));
2599
3160
  releaseUpstream(error);
2600
3161
  safeClose(controller);
2601
3162
  }
@@ -2720,7 +3281,13 @@ const createResponses = async (payload, modelHeaders, callerSignal) => {
2720
3281
  };
2721
3282
  const response = await tryRefreshAndRetry(doFetch, "/responses");
2722
3283
  if (!response.ok) {
2723
- consola.error("Failed to create responses", response);
3284
+ let bodyText;
3285
+ try {
3286
+ bodyText = await response.clone().text();
3287
+ } catch {
3288
+ bodyText = "(failed to read body)";
3289
+ }
3290
+ consola.error(`Failed to create responses: HTTP ${response.status} ${response.statusText} from ${url} — body: ${bodyText.slice(0, 2e3)}`);
2724
3291
  throw new HTTPError("Failed to create responses", response);
2725
3292
  }
2726
3293
  if (payload.stream) return events(response);
@@ -2784,6 +3351,23 @@ function isEffort(v) {
2784
3351
  * § "Concurrency cap investigation" for the full justification. */
2785
3352
  const MAX_INFLIGHT_TOOLS_CALL = 8;
2786
3353
  let inFlightToolsCall = 0;
3354
+ /**
3355
+ * Per-request AbortController registry for `notifications/cancelled`
3356
+ * (Phase D P1.5). When a client times out a tools/call before the
3357
+ * upstream Copilot fetch completes, the JSON-RPC notification:
3358
+ * { jsonrpc:"2.0", method:"notifications/cancelled",
3359
+ * params:{ requestId: "<id>", reason?: "..." } }
3360
+ * arrives. Without handling, the upstream fetch keeps running until
3361
+ * natural completion, leaking the inFlightToolsCall slot for tens of
3362
+ * minutes. Tracking the AbortController lets us abort the fetch and
3363
+ * free the slot immediately.
3364
+ *
3365
+ * Important: per CLAUDE.md "Bun request-signal quirk", we use OUR own
3366
+ * AbortController (NOT c.req.raw.signal which fires after request body
3367
+ * is consumed). The signal is threaded into createResponses /
3368
+ * createChatCompletions's `callerSignal` parameter.
3369
+ */
3370
+ const inflightAborts = /* @__PURE__ */ new Map();
2787
3371
  const RPC_PARSE_ERROR = -32700;
2788
3372
  const RPC_INVALID_REQUEST = -32600;
2789
3373
  const RPC_METHOD_NOT_FOUND = -32601;
@@ -2920,7 +3504,7 @@ function toolError(message) {
2920
3504
  isError: true
2921
3505
  };
2922
3506
  }
2923
- async function callPersona(persona, prompt, context, effort) {
3507
+ async function callPersona(persona, prompt, context, effort, signal) {
2924
3508
  const resolvedModel = resolveModel(persona.model);
2925
3509
  const userText = buildUserText(prompt, context);
2926
3510
  if (persona.endpoint === "/v1/responses") {
@@ -2936,7 +3520,7 @@ async function callPersona(persona, prompt, context, effort) {
2936
3520
  }],
2937
3521
  stream: false,
2938
3522
  reasoning: { effort }
2939
- }));
3523
+ }, void 0, signal));
2940
3524
  if (!text$1) return toolError(`persona ${persona.agentName}: empty assistant output`);
2941
3525
  return { content: [{
2942
3526
  type: "text",
@@ -2954,7 +3538,7 @@ async function callPersona(persona, prompt, context, effort) {
2954
3538
  }],
2955
3539
  stream: false,
2956
3540
  reasoning_effort: effort
2957
- }));
3541
+ }, void 0, signal));
2958
3542
  if (!text) return toolError(`persona ${persona.agentName}: empty assistant output`);
2959
3543
  return { content: [{
2960
3544
  type: "text",
@@ -2996,8 +3580,14 @@ async function handleToolsCall(body) {
2996
3580
  });
2997
3581
  inFlightToolsCall++;
2998
3582
  const startedAt = Date.now();
3583
+ const abortKey = body.id !== void 0 && body.id !== null ? body.id : void 0;
3584
+ let aborter;
3585
+ if (abortKey !== void 0) {
3586
+ aborter = new AbortController();
3587
+ inflightAborts.set(abortKey, aborter);
3588
+ }
2999
3589
  try {
3000
- const result = await callPersona(persona, prompt, context, effort);
3590
+ const result = await callPersona(persona, prompt, context, effort, aborter?.signal);
3001
3591
  logTelemetry({
3002
3592
  name: persona.agentName,
3003
3593
  model: persona.model,
@@ -3023,7 +3613,24 @@ async function handleToolsCall(body) {
3023
3613
  });
3024
3614
  } finally {
3025
3615
  inFlightToolsCall--;
3616
+ if (abortKey !== void 0) inflightAborts.delete(abortKey);
3617
+ }
3618
+ }
3619
+ /**
3620
+ * Handle `notifications/cancelled` per JSON-RPC 2.0 + MCP spec.
3621
+ * params.requestId is the id of an in-flight tools/call to abort.
3622
+ * Notifications return no body (handled by isNotification path in
3623
+ * handleRpc); this side-effect frees the in-flight slot.
3624
+ */
3625
+ function handleCancelledNotification(body) {
3626
+ const requestId = (body.params ?? {}).requestId;
3627
+ if (requestId === void 0 || typeof requestId !== "string" && typeof requestId !== "number") {
3628
+ consola.debug(`[mcp] notifications/cancelled missing or invalid requestId: ${JSON.stringify(requestId)}`);
3629
+ return;
3026
3630
  }
3631
+ const aborter = inflightAborts.get(requestId);
3632
+ if (!aborter) return;
3633
+ aborter.abort(/* @__PURE__ */ new Error("client requested cancellation"));
3027
3634
  }
3028
3635
  async function handleRpc(_c, body) {
3029
3636
  if (body === null || typeof body !== "object" || Array.isArray(body)) return {
@@ -3045,7 +3652,11 @@ async function handleRpc(_c, body) {
3045
3652
  status: 200,
3046
3653
  body: rpcResult(body.id, {
3047
3654
  protocolVersion: MCP_PROTOCOL_VERSION,
3048
- capabilities: { tools: { listChanged: false } },
3655
+ capabilities: {
3656
+ tools: { listChanged: false },
3657
+ resources: {},
3658
+ prompts: {}
3659
+ },
3049
3660
  serverInfo: {
3050
3661
  name: SERVER_NAME,
3051
3662
  version: SERVER_VERSION
@@ -3074,6 +3685,61 @@ async function handleRpc(_c, body) {
3074
3685
  status: 200,
3075
3686
  body: await handleToolsCall(body)
3076
3687
  };
3688
+ case "resources/list":
3689
+ if (isNotification) return {
3690
+ status: 202,
3691
+ body: null
3692
+ };
3693
+ return {
3694
+ status: 200,
3695
+ body: rpcResult(body.id, { resources: [] })
3696
+ };
3697
+ case "resources/templates/list":
3698
+ if (isNotification) return {
3699
+ status: 202,
3700
+ body: null
3701
+ };
3702
+ return {
3703
+ status: 200,
3704
+ body: rpcResult(body.id, { resourceTemplates: [] })
3705
+ };
3706
+ case "resources/read": {
3707
+ if (isNotification) return {
3708
+ status: 202,
3709
+ body: null
3710
+ };
3711
+ const uri = body.params?.uri;
3712
+ return {
3713
+ status: 200,
3714
+ body: rpcError(body.id, RPC_INVALID_PARAMS, `resources/read: resource URI not found: ${typeof uri === "string" ? uri : "(missing/invalid uri)"}`)
3715
+ };
3716
+ }
3717
+ case "prompts/list":
3718
+ if (isNotification) return {
3719
+ status: 202,
3720
+ body: null
3721
+ };
3722
+ return {
3723
+ status: 200,
3724
+ body: rpcResult(body.id, { prompts: [] })
3725
+ };
3726
+ case "prompts/get": {
3727
+ if (isNotification) return {
3728
+ status: 202,
3729
+ body: null
3730
+ };
3731
+ const name$1 = body.params?.name;
3732
+ return {
3733
+ status: 200,
3734
+ body: rpcError(body.id, RPC_INVALID_PARAMS, `prompts/get: prompt name not found: ${typeof name$1 === "string" ? name$1 : "(missing/invalid name)"}`)
3735
+ };
3736
+ }
3737
+ case "notifications/cancelled":
3738
+ handleCancelledNotification(body);
3739
+ return {
3740
+ status: 202,
3741
+ body: null
3742
+ };
3077
3743
  case "ping":
3078
3744
  if (isNotification) return {
3079
3745
  status: 202,
@@ -3240,6 +3906,742 @@ async function countTokens(body, extraHeaders) {
3240
3906
  return response;
3241
3907
  }
3242
3908
 
3909
+ //#endregion
3910
+ //#region src/services/advisor/advisor.ts
3911
+ const ENCODER$1 = new TextEncoder();
3912
+ /** The tool name we inject for Copilot. Double-underscore prefix
3913
+ * avoids collision with any user MCP server's `advisor` tool. */
3914
+ const ADVISOR_INTERNAL_TOOL_NAME = "__anthropic_advisor";
3915
+ /** The Anthropic-spec name used in the translated server_tool_use
3916
+ * block sent to the client. cc-backup AdvisorMessage.tsx requires
3917
+ * this exact name to render the advisor spinner. */
3918
+ const ADVISOR_CLIENT_TOOL_NAME = "advisor";
3919
+ /** Hard cap on advisor calls per request to bound runaway behavior.
3920
+ * Matches Phase G's loop bound; ADVISOR is typically called 1-3
3921
+ * times per session per cc-backup ADVISOR_TOOL_INSTRUCTIONS. */
3922
+ const ADVISOR_MAX_TURNS = 16;
3923
+ /** Default advisor model + reasoning effort. Per gemini-critic + user
3924
+ * direction: hardcode to a cross-lab model (gpt-5.5 — Copilot's
3925
+ * /responses-only flagship) at xhigh effort. The cross-lab choice
3926
+ * gives a true "second set of eyes" instead of the main model
3927
+ * reviewing itself; xhigh effort buys the deep-dive reasoning that
3928
+ * matches Anthropic's own ADVISOR (which uses a stronger reviewer
3929
+ * model — Opus 4.6/Sonnet 4.6 typically). */
3930
+ const ADVISOR_DEFAULT_MODEL = "gpt-5.5";
3931
+ const ADVISOR_DEFAULT_EFFORT = "xhigh";
3932
+ /** ADVISOR_TOOL_INSTRUCTIONS verbatim from cc-backup
3933
+ * src/utils/advisor.ts — describes when the model should invoke
3934
+ * the advisor. Long-form prose; see source for justification. */
3935
+ const ADVISOR_TOOL_INSTRUCTIONS = `# Advisor Tool
3936
+
3937
+ You have access to an \`advisor\` tool backed by a stronger reviewer model. It takes NO parameters -- when you call it, your entire conversation history is automatically forwarded. The advisor sees the task, every tool call you've made, every result you've seen.
3938
+
3939
+ Call advisor BEFORE substantive work -- before writing code, before committing to an interpretation, before building on an assumption. If the task requires orientation first (finding files, reading code, seeing what's there), do that, then call advisor. Orientation is not substantive work. Writing, editing, and declaring an answer are.
3940
+
3941
+ Also call advisor:
3942
+ - When you believe the task is complete. BEFORE this call, make your deliverable durable: write the file, stage the change, save the result. The advisor call takes time; if the session ends during it, a durable result persists and an unwritten one doesn't.
3943
+ - When stuck -- errors recurring, approach not converging, results that don't fit.
3944
+ - When considering a change of approach.
3945
+
3946
+ On tasks longer than a few steps, call advisor at least once before committing to an approach and once before declaring done. On short reactive tasks where the next action is dictated by tool output you just read, you don't need to keep calling -- the advisor adds most of its value on the first call, before the approach crystallizes.
3947
+
3948
+ Give the advice serious weight. If you follow a step and it fails empirically, or you have primary-source evidence that contradicts a specific claim (the file says X, the code does Y), adapt. A passing self-test is not evidence the advice is wrong -- it's evidence your test doesn't check what the advice is checking.
3949
+
3950
+ If you've already retrieved data pointing one way and the advisor points another: don't silently switch. Surface the conflict in one more advisor call -- "I found X, you suggest Y, which constraint breaks the tie?" The advisor saw your evidence but may have underweighted it; a reconcile call is cheaper than committing to the wrong branch.`;
3951
+ const ADVISOR_OPT_OUT_ENV = "CLAUDE_CODE_DISABLE_ADVISOR_TOOL";
3952
+ /**
3953
+ * Detect whether the request asked for ADVISOR (incoming
3954
+ * `anthropic-beta` header contains an `advisor-tool-` prefix). Also
3955
+ * respects the `CLAUDE_CODE_DISABLE_ADVISOR_TOOL` opt-out env var
3956
+ * (set by the user to globally disable; matches cc-backup advisor.ts
3957
+ * line 61).
3958
+ */
3959
+ function isAdvisorRequested(rawBetaHeader) {
3960
+ if (!rawBetaHeader) return false;
3961
+ if (process.env[ADVISOR_OPT_OUT_ENV]) return false;
3962
+ return rawBetaHeader.split(",").map((s) => s.trim()).some((v) => v.startsWith("advisor-tool-"));
3963
+ }
3964
+ /**
3965
+ * Inject the __anthropic_advisor tool definition into the body's tools
3966
+ * array. Returns a new body string. Idempotent — if the tool is already
3967
+ * present (e.g. the user's MCP shadowed it) we leave the existing one
3968
+ * alone and return the body unchanged.
3969
+ *
3970
+ * Also strips any tool entry with `type: "advisor_*"` (Anthropic API's
3971
+ * native server-side advisor tool — `advisor_20260301` and future
3972
+ * variants). When `CLAUDE_CODE_ENABLE_EXPERIMENTAL_ADVISOR_TOOL=1` is
3973
+ * set, Claude Code injects its own advisor tool with this type into
3974
+ * `tools[]`. Copilot 400s on the unknown tool type ("Input tag
3975
+ * 'advisor_20260301' found using 'type' does not match any of the
3976
+ * expected tags"), so the proxy must strip it before forwarding while
3977
+ * still injecting our custom `__anthropic_advisor` tool that the model
3978
+ * can invoke. The proxy's intercept on the response stream then
3979
+ * translates the model's `tool_use{__anthropic_advisor}` to the
3980
+ * client-shape `server_tool_use{name:"advisor"}` + `advisor_tool_result`
3981
+ * blocks the client expects.
3982
+ */
3983
+ function injectAdvisorTool(rawBody) {
3984
+ let parsed;
3985
+ try {
3986
+ parsed = JSON.parse(rawBody);
3987
+ } catch {
3988
+ return rawBody;
3989
+ }
3990
+ const rawTools = Array.isArray(parsed.tools) ? parsed.tools : [];
3991
+ const tools = rawTools.filter((t) => {
3992
+ if (typeof t !== "object" || t === null) return true;
3993
+ const type = t.type;
3994
+ return typeof type !== "string" || !type.startsWith("advisor_");
3995
+ });
3996
+ const stripped = tools.length !== rawTools.length;
3997
+ const alreadyInjected = tools.some((t) => t?.name === ADVISOR_INTERNAL_TOOL_NAME);
3998
+ if (alreadyInjected && !stripped) return rawBody;
3999
+ parsed.tools = alreadyInjected ? tools : [...tools, {
4000
+ name: ADVISOR_INTERNAL_TOOL_NAME,
4001
+ description: ADVISOR_TOOL_INSTRUCTIONS,
4002
+ input_schema: {
4003
+ type: "object",
4004
+ properties: {},
4005
+ required: []
4006
+ }
4007
+ }];
4008
+ return JSON.stringify(parsed);
4009
+ }
4010
+ /** Character budget for rendered conversation text passed to the
4011
+ * advisor model. gpt-5.5 (default advisor) caps prompt input at
4012
+ * 272,000 tokens. At a conservative ~3 chars/token (mixed prose +
4013
+ * code + JSON), 720,000 chars renders to ≈240,000 tokens, leaving
4014
+ * ~32,000 tokens of headroom for the system prompt and per-turn
4015
+ * framing overhead. Without this cap, long Claude Code sessions
4016
+ * produce 400 `model_max_prompt_tokens_exceeded` from /v1/responses
4017
+ * and the advisor falls back silently. */
4018
+ const ADVISOR_MAX_CONVERSATION_CHARS = 72e4;
4019
+ /**
4020
+ * Render an Anthropic-shape conversation (messages array with
4021
+ * role/content blocks) as a single human-readable text blob. Used
4022
+ * as the input to the advisor model (gpt-5.5 via /v1/responses
4023
+ * doesn't have a 1:1 mapping for Anthropic's tool_use/tool_result
4024
+ * blocks; serializing to text preserves the semantics — the advisor
4025
+ * just needs to READ the conversation, not produce more of it).
4026
+ *
4027
+ * Front-truncates oldest turns when the rendered output would exceed
4028
+ * `maxChars`. The advisor cares more about current state (latest
4029
+ * tool calls, errors, in-flight task) than the original prompt —
4030
+ * mirrors Claude Code's own context-truncation strategy. When any
4031
+ * turns are dropped, prepends a `[TRUNCATED: N earlier turn(s)
4032
+ * omitted ...]` notice so the advisor knows the transcript is
4033
+ * partial and can flag if it needs the missing context.
4034
+ */
4035
+ function renderConversationAsText(conversation, maxChars = ADVISOR_MAX_CONVERSATION_CHARS) {
4036
+ const turnBlocks = [];
4037
+ for (let i = 0; i < conversation.length; i++) {
4038
+ const msg = conversation[i];
4039
+ const role = msg.role ?? "unknown";
4040
+ const block = [`### Turn ${i + 1} — ${role}`];
4041
+ const content = msg.content;
4042
+ if (typeof content === "string") block.push(content);
4043
+ else if (Array.isArray(content)) for (const part of content) {
4044
+ if (typeof part !== "object" || part === null) continue;
4045
+ const b = part;
4046
+ if (b.type === "text" && typeof b.text === "string") block.push(b.text);
4047
+ else if (b.type === "tool_use") block.push(`[tool_use ${b.name ?? "?"}(${b.id ?? "?"}): ${JSON.stringify(b.input ?? {})}]`);
4048
+ else if (b.type === "tool_result") {
4049
+ const c = typeof b.content === "string" ? b.content : JSON.stringify(b.content);
4050
+ block.push(`[tool_result ${b.tool_use_id ?? "?"}]:\n${c}`);
4051
+ } else block.push(`[${b.type}: ${JSON.stringify(b).slice(0, 500)}]`);
4052
+ }
4053
+ block.push("");
4054
+ turnBlocks.push(block.join("\n"));
4055
+ }
4056
+ let totalChars = 0;
4057
+ let firstKeptIdx = turnBlocks.length;
4058
+ for (let i = turnBlocks.length - 1; i >= 0; i--) {
4059
+ const len = turnBlocks[i].length + 1;
4060
+ if (totalChars + len > maxChars) break;
4061
+ totalChars += len;
4062
+ firstKeptIdx = i;
4063
+ }
4064
+ if (firstKeptIdx === turnBlocks.length && turnBlocks.length > 0) {
4065
+ const tail = turnBlocks[turnBlocks.length - 1].slice(-(maxChars - 200));
4066
+ return `[TRUNCATED: conversation too long for advisor model context; only the tail of the latest (turn ${turnBlocks.length}) is shown]\n\n` + tail;
4067
+ }
4068
+ const kept = turnBlocks.slice(firstKeptIdx);
4069
+ if (firstKeptIdx > 0) kept.unshift(`[TRUNCATED: ${firstKeptIdx} earlier turn(s) omitted to fit advisor model context budget; ${turnBlocks.length - firstKeptIdx} most-recent turn(s) shown below]\n`);
4070
+ return kept.join("\n");
4071
+ }
4072
+ /**
4073
+ * Run the advisor model with the full conversation context. Returns
4074
+ * the advisor's text response.
4075
+ *
4076
+ * Routes by model family:
4077
+ * - gpt-5.x / codex / o-series (have `/responses` in supported_endpoints):
4078
+ * use createResponses with `reasoning.effort` set. This is the
4079
+ * default path — gpt-5.5 at xhigh effort.
4080
+ * - claude-* (no `/responses`): fall back to createMessages.
4081
+ *
4082
+ * The conversation is serialized to text via renderConversationAsText
4083
+ * so the advisor model (which may not natively understand Anthropic's
4084
+ * tool_use/tool_result block shapes) sees a flat readable transcript.
4085
+ * This loses some structural fidelity but matches the spirit of
4086
+ * Anthropic's own ADVISOR ("see the whole task + every tool call +
4087
+ * every result").
4088
+ */
4089
+ async function runAdvisor(conversation, advisorModel, advisorEffort) {
4090
+ const advisorSystem = "You are an expert advisor reviewing an in-progress Claude Code session. The transcript below is the work-in-progress (turns numbered, with tool calls and results inlined). Read carefully and provide concrete, actionable advice on the next step or course-correction. Be specific — cite the parts of the transcript you're responding to. If the assistant is on the right track, say so explicitly. If they're stuck or off-track, name the specific assumption or step to revisit. Aim for 2-5 paragraphs of substantive guidance.";
4091
+ const conversationText = renderConversationAsText(conversation);
4092
+ const resolvedAdvisorModel = resolveModel(advisorModel);
4093
+ if (/^(gpt-|o\d|.*codex)/i.test(resolvedAdvisorModel)) {
4094
+ const response = await createResponses({
4095
+ model: resolvedAdvisorModel,
4096
+ instructions: advisorSystem,
4097
+ input: [{
4098
+ role: "user",
4099
+ content: [{
4100
+ type: "input_text",
4101
+ text: conversationText
4102
+ }]
4103
+ }],
4104
+ stream: false,
4105
+ reasoning: { effort: advisorEffort }
4106
+ });
4107
+ const out = [];
4108
+ for (const item of response.output) {
4109
+ if (typeof item !== "object" || item === null) continue;
4110
+ const obj = item;
4111
+ if (obj.type !== "message" || obj.role !== "assistant") continue;
4112
+ const content = obj.content;
4113
+ if (!Array.isArray(content)) continue;
4114
+ for (const part of content) {
4115
+ if (typeof part !== "object" || part === null) continue;
4116
+ const p = part;
4117
+ if ((p.type === "output_text" || p.type === "text") && typeof p.text === "string") out.push(p.text);
4118
+ }
4119
+ }
4120
+ const text$1 = out.join("");
4121
+ if (!text$1) throw new Error(`Advisor model ${resolvedAdvisorModel} returned empty assistant output`);
4122
+ return text$1;
4123
+ }
4124
+ const json = await (await createMessages(JSON.stringify({
4125
+ model: resolvedAdvisorModel,
4126
+ max_tokens: 4096,
4127
+ system: advisorSystem,
4128
+ messages: [{
4129
+ role: "user",
4130
+ content: conversationText
4131
+ }],
4132
+ stream: false
4133
+ }), {})).json();
4134
+ const text = (Array.isArray(json.content) ? json.content : []).filter((b) => b.type === "text" && typeof b.text === "string").map((b) => b.text).join("\n\n");
4135
+ if (!text) throw new Error(`Advisor model ${resolvedAdvisorModel} returned empty response`);
4136
+ return text;
4137
+ }
4138
+ /**
4139
+ * Derive a spec-compliant `srvtoolu_*` id for a client-facing
4140
+ * `server_tool_use` (and matching `advisor_tool_result.tool_use_id`)
4141
+ * from the upstream model's `toolu_*` id.
4142
+ *
4143
+ * Anthropic spec: `^srvtoolu_[a-zA-Z0-9_]+$`. If the upstream id
4144
+ * suffix contains chars outside that charset (e.g., a hyphenated id
4145
+ * from a non-Anthropic provider, or a corrupt id), fall back to a
4146
+ * synthesized stable id keyed by the SSE block index. Defensive
4147
+ * against edge cases that would otherwise emit a malformed block —
4148
+ * spec violation in either direction is a 400.
4149
+ */
4150
+ function toClientServerToolUseId(id, fallbackIndex) {
4151
+ const suffix = id.startsWith("toolu_") ? id.slice(6) : id;
4152
+ if (/^[a-zA-Z0-9_]+$/.test(suffix)) return `srvtoolu_${suffix}`;
4153
+ return `srvtoolu_advisor_${fallbackIndex}`;
4154
+ }
4155
+ /**
4156
+ * Build an SSE event line in the canonical Anthropic shape:
4157
+ * event: <type>
4158
+ * data: <json>
4159
+ * <blank>
4160
+ */
4161
+ function sseEvent(type, data) {
4162
+ return `event: ${type}\ndata: ${JSON.stringify(data)}\n\n`;
4163
+ }
4164
+ /**
4165
+ * The streaming translate-loop. Returns a ReadableStream<Uint8Array>
4166
+ * suitable to wrap with Hono's c.body() / new Response().
4167
+ *
4168
+ * @param firstResponse The first Copilot streaming response
4169
+ * @param initialConversation The conversation messages from the
4170
+ * incoming request (used as the starting context for advisor calls
4171
+ * and continuation Copilot calls).
4172
+ * @param baseBody Parsed initial request body (model, max_tokens,
4173
+ * system, etc.) — used as the template for continuation Copilot calls.
4174
+ * @param requestHeaders Extra headers (model-specific + filtered
4175
+ * anthropic-beta) for downstream Copilot calls.
4176
+ * @param advisorModel Which model to route advisor calls to. Defaults
4177
+ * to ADVISOR_DEFAULT_MODEL (cross-lab).
4178
+ */
4179
+ function buildAdvisorStream(opts) {
4180
+ const advisorModel = opts.advisorModel ?? ADVISOR_DEFAULT_MODEL;
4181
+ const advisorEffort = opts.advisorEffort ?? ADVISOR_DEFAULT_EFFORT;
4182
+ return new ReadableStream({ async start(controller) {
4183
+ const conversation = [...opts.initialConversation];
4184
+ let messageStartForwarded = false;
4185
+ let nextSyntheticIndex = 0;
4186
+ let turnsRun = 0;
4187
+ const safeEnqueue = (bytes) => {
4188
+ try {
4189
+ controller.enqueue(bytes);
4190
+ return true;
4191
+ } catch (err) {
4192
+ if (isControllerClosedError(err)) return false;
4193
+ throw err;
4194
+ }
4195
+ };
4196
+ const safeEnqueueEvent = (type, data) => safeEnqueue(ENCODER$1.encode(sseEvent(type, data)));
4197
+ async function processOneTurn(response) {
4198
+ const capturedBlocks = [];
4199
+ let advisorToolUse = null;
4200
+ const indexToBlock = /* @__PURE__ */ new Map();
4201
+ for await (const ev of events(response)) {
4202
+ if (!ev.event || !ev.data) continue;
4203
+ let payload;
4204
+ try {
4205
+ payload = JSON.parse(ev.data);
4206
+ } catch {
4207
+ if (!safeEnqueue(ENCODER$1.encode(`event: ${ev.event}\ndata: ${ev.data}\n\n`))) return {
4208
+ capturedBlocks,
4209
+ advisorToolUse
4210
+ };
4211
+ continue;
4212
+ }
4213
+ switch (ev.event) {
4214
+ case "message_start":
4215
+ if (!messageStartForwarded) {
4216
+ if (!safeEnqueueEvent(ev.event, payload)) return {
4217
+ capturedBlocks,
4218
+ advisorToolUse
4219
+ };
4220
+ messageStartForwarded = true;
4221
+ }
4222
+ continue;
4223
+ case "content_block_start": {
4224
+ const block = payload.content_block;
4225
+ const upstreamIndex = payload.index;
4226
+ if (block && upstreamIndex !== void 0) {
4227
+ const myIndex = nextSyntheticIndex++;
4228
+ if (block.type === "tool_use" && block.name === ADVISOR_INTERNAL_TOOL_NAME) {
4229
+ const id = typeof block.id === "string" ? block.id : `toolu_advisor_${myIndex}`;
4230
+ advisorToolUse = {
4231
+ index: myIndex,
4232
+ id,
4233
+ clientId: toClientServerToolUseId(id, myIndex),
4234
+ inputJson: ""
4235
+ };
4236
+ const translated = {
4237
+ ...payload,
4238
+ index: myIndex,
4239
+ content_block: {
4240
+ type: "server_tool_use",
4241
+ id: advisorToolUse.clientId,
4242
+ name: ADVISOR_CLIENT_TOOL_NAME,
4243
+ input: {}
4244
+ }
4245
+ };
4246
+ if (!safeEnqueueEvent(ev.event, translated)) return {
4247
+ capturedBlocks,
4248
+ advisorToolUse
4249
+ };
4250
+ const captured = {
4251
+ block: {
4252
+ type: "tool_use",
4253
+ id,
4254
+ name: ADVISOR_INTERNAL_TOOL_NAME,
4255
+ input: {}
4256
+ },
4257
+ partialJson: "",
4258
+ advisorReplay: { id }
4259
+ };
4260
+ capturedBlocks.push(captured);
4261
+ indexToBlock.set(upstreamIndex, captured);
4262
+ } else {
4263
+ const reindexed = {
4264
+ ...payload,
4265
+ index: myIndex
4266
+ };
4267
+ if (!safeEnqueueEvent(ev.event, reindexed)) return {
4268
+ capturedBlocks,
4269
+ advisorToolUse
4270
+ };
4271
+ const captured = {
4272
+ block: { ...block },
4273
+ partialJson: ""
4274
+ };
4275
+ capturedBlocks.push(captured);
4276
+ indexToBlock.set(upstreamIndex, captured);
4277
+ }
4278
+ }
4279
+ continue;
4280
+ }
4281
+ case "content_block_delta": {
4282
+ const upstreamIndex = payload.index;
4283
+ const delta = payload.delta;
4284
+ if (upstreamIndex !== void 0) {
4285
+ const captured = upstreamIndex !== void 0 ? indexToBlock.get(upstreamIndex) : void 0;
4286
+ const reindexed = {
4287
+ ...payload,
4288
+ index: captured ? capturedBlocks.indexOf(captured) >= 0 ? nextSyntheticIndex - capturedBlocks.length + capturedBlocks.indexOf(captured) : upstreamIndex : upstreamIndex
4289
+ };
4290
+ if (!safeEnqueueEvent(ev.event, reindexed)) return {
4291
+ capturedBlocks,
4292
+ advisorToolUse
4293
+ };
4294
+ if (captured && delta) {
4295
+ if (delta.type === "text_delta" && typeof delta.text === "string") captured.block.text = (captured.block.text ?? "") + delta.text;
4296
+ else if (delta.type === "thinking_delta" && typeof delta.thinking === "string") captured.block.thinking = (captured.block.thinking ?? "") + delta.thinking;
4297
+ else if (delta.type === "signature_delta" && typeof delta.signature === "string") captured.block.signature = (captured.block.signature ?? "") + delta.signature;
4298
+ else if (delta.type === "input_json_delta" && typeof delta.partial_json === "string") captured.partialJson += delta.partial_json;
4299
+ else if (delta.type === "citations_delta" && delta.citation) {
4300
+ if (!Array.isArray(captured.block.citations)) captured.block.citations = [];
4301
+ captured.block.citations.push(delta.citation);
4302
+ }
4303
+ }
4304
+ } else if (!safeEnqueueEvent(ev.event, payload)) return {
4305
+ capturedBlocks,
4306
+ advisorToolUse
4307
+ };
4308
+ continue;
4309
+ }
4310
+ case "content_block_stop": {
4311
+ const upstreamIndex = payload.index;
4312
+ const captured = upstreamIndex !== void 0 ? indexToBlock.get(upstreamIndex) : void 0;
4313
+ const reindexed = {
4314
+ ...payload,
4315
+ index: captured ? nextSyntheticIndex - capturedBlocks.length + capturedBlocks.indexOf(captured) : upstreamIndex ?? 0
4316
+ };
4317
+ if (!safeEnqueueEvent(ev.event, reindexed)) return {
4318
+ capturedBlocks,
4319
+ advisorToolUse
4320
+ };
4321
+ if (captured) {
4322
+ if (captured.block.type === "tool_use" && captured.partialJson.length > 0) try {
4323
+ captured.block.input = JSON.parse(captured.partialJson);
4324
+ } catch (err) {
4325
+ consola.warn(`advisor: malformed input_json_delta for tool_use id=${captured.block.id ?? "?"} name=${captured.block.name ?? "?"} partialJson.length=${captured.partialJson.length} parseError=${err instanceof Error ? err.message : String(err)}`);
4326
+ captured.block.input = {};
4327
+ }
4328
+ if (captured.block.type === "text" && (typeof captured.block.text !== "string" || captured.block.text.length === 0)) captured.dropFromReplay = true;
4329
+ }
4330
+ continue;
4331
+ }
4332
+ case "message_delta":
4333
+ if (!safeEnqueueEvent(ev.event, payload)) return {
4334
+ capturedBlocks,
4335
+ advisorToolUse
4336
+ };
4337
+ continue;
4338
+ case "message_stop":
4339
+ if (advisorToolUse) return {
4340
+ capturedBlocks,
4341
+ advisorToolUse
4342
+ };
4343
+ if (!safeEnqueueEvent(ev.event, payload)) return {
4344
+ capturedBlocks,
4345
+ advisorToolUse
4346
+ };
4347
+ return {
4348
+ capturedBlocks,
4349
+ advisorToolUse
4350
+ };
4351
+ default: if (!safeEnqueueEvent(ev.event, payload)) return {
4352
+ capturedBlocks,
4353
+ advisorToolUse
4354
+ };
4355
+ }
4356
+ }
4357
+ return {
4358
+ capturedBlocks,
4359
+ advisorToolUse
4360
+ };
4361
+ }
4362
+ try {
4363
+ let response = opts.firstResponse;
4364
+ for (turnsRun = 0; turnsRun < ADVISOR_MAX_TURNS; turnsRun++) {
4365
+ const { capturedBlocks, advisorToolUse } = await processOneTurn(response);
4366
+ if (!advisorToolUse) return;
4367
+ const assistantTurn = {
4368
+ role: "assistant",
4369
+ content: capturedBlocks.filter((c) => !c.dropFromReplay).map((c) => {
4370
+ if (c.advisorReplay) {
4371
+ const input = typeof c.block.input === "object" && c.block.input !== null ? c.block.input : {};
4372
+ return {
4373
+ type: "tool_use",
4374
+ id: c.advisorReplay.id,
4375
+ name: ADVISOR_INTERNAL_TOOL_NAME,
4376
+ input
4377
+ };
4378
+ }
4379
+ return c.block;
4380
+ })
4381
+ };
4382
+ conversation.push(assistantTurn);
4383
+ let advisorText;
4384
+ try {
4385
+ advisorText = await runAdvisor(conversation, advisorModel, advisorEffort);
4386
+ } catch (err) {
4387
+ const msg = err instanceof Error ? err.message : String(err);
4388
+ consola.warn(`Advisor model call failed: ${msg}`);
4389
+ advisorText = `[Advisor unavailable: ${msg}. Continuing without external review — proceed with caution and consider self-checking against your primary-source evidence.]`;
4390
+ }
4391
+ const resultIndex = nextSyntheticIndex++;
4392
+ if (!safeEnqueueEvent("content_block_start", {
4393
+ type: "content_block_start",
4394
+ index: resultIndex,
4395
+ content_block: {
4396
+ type: "advisor_tool_result",
4397
+ tool_use_id: advisorToolUse.clientId,
4398
+ content: {
4399
+ type: "advisor_result",
4400
+ text: advisorText
4401
+ }
4402
+ }
4403
+ })) return;
4404
+ if (!safeEnqueueEvent("content_block_stop", {
4405
+ type: "content_block_stop",
4406
+ index: resultIndex
4407
+ })) return;
4408
+ conversation.push({
4409
+ role: "user",
4410
+ content: [{
4411
+ type: "tool_result",
4412
+ tool_use_id: advisorToolUse.id,
4413
+ content: advisorText
4414
+ }]
4415
+ });
4416
+ response = await createMessages(JSON.stringify({
4417
+ ...opts.baseBody,
4418
+ messages: conversation,
4419
+ stream: true
4420
+ }), opts.requestHeaders);
4421
+ }
4422
+ const finalIndex = nextSyntheticIndex++;
4423
+ safeEnqueueEvent("content_block_start", {
4424
+ type: "content_block_start",
4425
+ index: finalIndex,
4426
+ content_block: {
4427
+ type: "text",
4428
+ text: ""
4429
+ }
4430
+ });
4431
+ safeEnqueueEvent("content_block_delta", {
4432
+ type: "content_block_delta",
4433
+ index: finalIndex,
4434
+ delta: {
4435
+ type: "text_delta",
4436
+ text: `\n\n[Advisor loop exceeded ${ADVISOR_MAX_TURNS} turns; halting]`
4437
+ }
4438
+ });
4439
+ safeEnqueueEvent("content_block_stop", {
4440
+ type: "content_block_stop",
4441
+ index: finalIndex
4442
+ });
4443
+ safeEnqueueEvent("message_stop", { type: "message_stop" });
4444
+ } catch (err) {
4445
+ const msg = err instanceof Error ? err.message : String(err);
4446
+ consola.error(`Advisor stream error: ${msg}`);
4447
+ safeEnqueueEvent("error", {
4448
+ type: "error",
4449
+ error: {
4450
+ type: "api_error",
4451
+ message: `advisor loop failed: ${msg}`
4452
+ }
4453
+ });
4454
+ } finally {
4455
+ try {
4456
+ controller.close();
4457
+ } catch {}
4458
+ }
4459
+ } });
4460
+ }
4461
+
4462
+ //#endregion
4463
+ //#region src/lib/sanitize-anthropic-body.ts
4464
+ /**
4465
+ * Convert a `srvtoolu_*` id to the matching `toolu_*` id used in the
4466
+ * Copilot-replay shape (`tool_use.id` must match `^toolu_*$`). For
4467
+ * any other input shape, fall back to a synthesized `toolu_advisor_N`
4468
+ * id.
4469
+ */
4470
+ function toCopilotToolUseId(srvId, fallbackIndex) {
4471
+ if (srvId.startsWith("srvtoolu_")) {
4472
+ const suffix = srvId.slice(9);
4473
+ if (/^[a-zA-Z0-9_]+$/.test(suffix)) return `toolu_${suffix}`;
4474
+ }
4475
+ return `toolu_advisor_${fallbackIndex}`;
4476
+ }
4477
+ /**
4478
+ * Fast-path detector: returns true if the raw body has any chance of
4479
+ * needing sanitization. Avoids a full JSON parse for the common case
4480
+ * where the body is already spec-compliant.
4481
+ *
4482
+ * Looks for either an Anthropic-native advisor typed tool entry, or
4483
+ * any advisor-related block type that would need rewriting/
4484
+ * translating.
4485
+ */
4486
+ function bodyMightNeedSanitize(rawBody) {
4487
+ return rawBody.includes("\"server_tool_use\"") || rawBody.includes("\"advisor_tool_result\"") || /"type":"advisor_\d+"/.test(rawBody);
4488
+ }
4489
+ /**
4490
+ * Translate one assistant turn's content array, splitting at advisor
4491
+ * pairs into the multi-message structure Copilot accepts.
4492
+ *
4493
+ * Input shape (Claude Code stores everything in one assistant turn):
4494
+ * [text*, server_tool_use{advisor}, advisor_tool_result, text*, ...]
4495
+ *
4496
+ * Output: array of {role, content[]} message objects, alternating
4497
+ * assistant→user→assistant for each advisor pair encountered.
4498
+ */
4499
+ function splitAssistantTurnAtAdvisorPairs(originalContent, syntheticIndexRef) {
4500
+ const messages = [];
4501
+ let currentAssistantContent = [];
4502
+ let translated = false;
4503
+ let i = 0;
4504
+ while (i < originalContent.length) {
4505
+ const block = originalContent[i];
4506
+ const b = typeof block === "object" && block !== null ? block : null;
4507
+ if (b && b.type === "server_tool_use" && b.name === ADVISOR_INTERNAL_TOOL_NAME.replace(/^__anthropic_/, "")) {
4508
+ const stuId = typeof b.id === "string" ? b.id : "";
4509
+ const nextBlock = originalContent[i + 1];
4510
+ const next = typeof nextBlock === "object" && nextBlock !== null ? nextBlock : null;
4511
+ const copilotId = stuId.startsWith("srvtoolu_") ? toCopilotToolUseId(stuId, syntheticIndexRef.value++) : stuId.startsWith("toolu_") && /^toolu_[a-zA-Z0-9_]+$/.test(stuId) ? stuId : `toolu_advisor_${syntheticIndexRef.value++}`;
4512
+ currentAssistantContent.push({
4513
+ type: "tool_use",
4514
+ id: copilotId,
4515
+ name: ADVISOR_INTERNAL_TOOL_NAME,
4516
+ input: {}
4517
+ });
4518
+ messages.push({
4519
+ role: "assistant",
4520
+ content: currentAssistantContent
4521
+ });
4522
+ translated = true;
4523
+ let resultText = "";
4524
+ if (next && next.type === "advisor_tool_result") {
4525
+ const c = next.content;
4526
+ if (typeof c === "string") resultText = c;
4527
+ else if (typeof c === "object" && c !== null) {
4528
+ const txt = c.text;
4529
+ if (typeof txt === "string") resultText = txt;
4530
+ }
4531
+ i += 2;
4532
+ } else {
4533
+ resultText = "[Advisor result missing in conversation history.]";
4534
+ i += 1;
4535
+ }
4536
+ messages.push({
4537
+ role: "user",
4538
+ content: [{
4539
+ type: "tool_result",
4540
+ tool_use_id: copilotId,
4541
+ content: resultText
4542
+ }]
4543
+ });
4544
+ currentAssistantContent = [];
4545
+ continue;
4546
+ }
4547
+ if (b && b.type === "advisor_tool_result") {
4548
+ translated = true;
4549
+ i += 1;
4550
+ continue;
4551
+ }
4552
+ currentAssistantContent.push(block);
4553
+ i += 1;
4554
+ }
4555
+ if (currentAssistantContent.length > 0) messages.push({
4556
+ role: "assistant",
4557
+ content: currentAssistantContent
4558
+ });
4559
+ if (!translated) return {
4560
+ messages: [{
4561
+ role: "assistant",
4562
+ content: originalContent
4563
+ }],
4564
+ translated: false
4565
+ };
4566
+ return {
4567
+ messages,
4568
+ translated: true
4569
+ };
4570
+ }
4571
+ function sanitizeAnthropicBody(rawBody) {
4572
+ if (!bodyMightNeedSanitize(rawBody)) return rawBody;
4573
+ let parsed;
4574
+ try {
4575
+ parsed = JSON.parse(rawBody);
4576
+ } catch {
4577
+ return rawBody;
4578
+ }
4579
+ let mutated = false;
4580
+ if (Array.isArray(parsed.tools)) {
4581
+ const tools = parsed.tools;
4582
+ const before = tools.length;
4583
+ const filtered = tools.filter((t) => {
4584
+ if (typeof t !== "object" || t === null) return true;
4585
+ const type = t.type;
4586
+ return typeof type !== "string" || !type.startsWith("advisor_");
4587
+ });
4588
+ if (filtered.length !== before) {
4589
+ parsed.tools = filtered;
4590
+ mutated = true;
4591
+ }
4592
+ }
4593
+ if (Array.isArray(parsed.messages)) {
4594
+ const original = parsed.messages;
4595
+ const rebuilt = [];
4596
+ let anyTranslated = false;
4597
+ const syntheticIndexRef = { value: 0 };
4598
+ for (const msg of original) {
4599
+ if (typeof msg !== "object" || msg === null || msg.role !== "assistant") {
4600
+ rebuilt.push(msg);
4601
+ continue;
4602
+ }
4603
+ const content = msg.content;
4604
+ if (!Array.isArray(content)) {
4605
+ rebuilt.push(msg);
4606
+ continue;
4607
+ }
4608
+ if (!content.some((b) => {
4609
+ if (typeof b !== "object" || b === null) return false;
4610
+ const type = b.type;
4611
+ const name$1 = b.name;
4612
+ return type === "server_tool_use" && name$1 === "advisor" || type === "advisor_tool_result";
4613
+ })) {
4614
+ rebuilt.push(msg);
4615
+ continue;
4616
+ }
4617
+ const { messages: split, translated } = splitAssistantTurnAtAdvisorPairs(content, syntheticIndexRef);
4618
+ if (translated) {
4619
+ anyTranslated = true;
4620
+ for (const m of split) rebuilt.push(m);
4621
+ } else rebuilt.push(msg);
4622
+ }
4623
+ if (anyTranslated) {
4624
+ parsed.messages = rebuilt;
4625
+ mutated = true;
4626
+ const existingTools = Array.isArray(parsed.tools) ? parsed.tools : [];
4627
+ if (!existingTools.some((t) => {
4628
+ if (typeof t !== "object" || t === null) return false;
4629
+ return t.name === ADVISOR_INTERNAL_TOOL_NAME;
4630
+ })) parsed.tools = [...existingTools, {
4631
+ name: ADVISOR_INTERNAL_TOOL_NAME,
4632
+ description: ADVISOR_TOOL_INSTRUCTIONS,
4633
+ input_schema: {
4634
+ type: "object",
4635
+ properties: {},
4636
+ required: []
4637
+ }
4638
+ }];
4639
+ }
4640
+ }
4641
+ if (!mutated) return rawBody;
4642
+ return JSON.stringify(parsed);
4643
+ }
4644
+
3243
4645
  //#endregion
3244
4646
  //#region src/lib/diagnose-response.ts
3245
4647
  const PREVIEW_LIMIT = 200;
@@ -3290,7 +4692,18 @@ function stripWebSearchFromBody(rawBody) {
3290
4692
  */
3291
4693
  async function handleCountTokens(c) {
3292
4694
  const startTime = Date.now();
3293
- const { body: finalBody, originalModel, resolvedModel } = resolveModelInBody$1(stripWebSearchFromBody(await c.req.text()));
4695
+ const strippedBody = stripWebSearchFromBody(sanitizeAnthropicBody(await c.req.text()));
4696
+ if (strippedBody.includes("\"mcp_servers\"")) try {
4697
+ const probe = JSON.parse(strippedBody);
4698
+ if (Array.isArray(probe.mcp_servers) && probe.mcp_servers.length > 0) return c.json({
4699
+ type: "error",
4700
+ error: {
4701
+ type: "invalid_request_error",
4702
+ message: "Inline `mcp_servers` body field is not supported by github-router. Configure remote MCP servers as local stdio entries in `~/.claude/mcp.json` instead."
4703
+ }
4704
+ }, 400);
4705
+ } catch {}
4706
+ const { body: finalBody, originalModel, resolvedModel } = resolveModelInBody$1(strippedBody);
3294
4707
  const extraHeaders = {};
3295
4708
  const anthropicBeta = c.req.header("anthropic-beta");
3296
4709
  if (anthropicBeta) {
@@ -3334,6 +4747,7 @@ function resolveModelInBody$1(rawBody) {
3334
4747
  }
3335
4748
  }
3336
4749
  if (rawBody.includes("\"scope\"") && sanitizeCacheControl$1(parsed)) modified = true;
4750
+ if ((rawBody.includes("\"budget\"") || rawBody.includes("\"output_config\"") || rawBody.includes("\"betas\"")) && stripAnthropicOnlyFields$1(parsed)) modified = true;
3337
4751
  const resolvedModel = typeof parsed.model === "string" ? parsed.model : originalModel;
3338
4752
  return {
3339
4753
  body: modified ? JSON.stringify(parsed) : rawBody,
@@ -3360,6 +4774,43 @@ function sanitizeCacheControl$1(body) {
3360
4774
  if (Array.isArray(body.tools)) for (const tool of body.tools) stripScope(tool);
3361
4775
  return stripped;
3362
4776
  }
4777
+ /**
4778
+ * Strip top-level body fields Copilot 400s on (budget, output_config.schema,
4779
+ * betas). Duplicated structurally from handler.ts because count_tokens uses
4780
+ * its own JSON-pass; the bodies are independent. Behavior must stay in lock-
4781
+ * step with handler.ts's stripAnthropicOnlyFields — covered by integration
4782
+ * tests (Phase F P2.4).
4783
+ */
4784
+ function stripAnthropicOnlyFields$1(body) {
4785
+ let stripped = false;
4786
+ if (body.budget !== void 0) {
4787
+ consola.warn("[count_tokens] Stripping body-level `budget` field (Copilot 400s)");
4788
+ delete body.budget;
4789
+ stripped = true;
4790
+ }
4791
+ if (body.output_config !== void 0) {
4792
+ if (body.output_config && typeof body.output_config === "object") {
4793
+ const oc = body.output_config;
4794
+ const PROXY_OWNED_FIELDS = new Set(["effort"]);
4795
+ let strippedAny = false;
4796
+ for (const key of Object.keys(oc)) if (!PROXY_OWNED_FIELDS.has(key)) {
4797
+ delete oc[key];
4798
+ strippedAny = true;
4799
+ }
4800
+ if (strippedAny) {
4801
+ consola.warn("[count_tokens] Stripping client-set `output_config` Structured-Outputs fields (Copilot 400s on `output_config.*` other than `effort`)");
4802
+ if (Object.keys(oc).length === 0) delete body.output_config;
4803
+ stripped = true;
4804
+ }
4805
+ }
4806
+ }
4807
+ if (Array.isArray(body.betas)) {
4808
+ consola.warn("[count_tokens] Stripping body-level `betas` array (Copilot 400s; conveyed via header)");
4809
+ delete body.betas;
4810
+ stripped = true;
4811
+ }
4812
+ return stripped;
4813
+ }
3363
4814
 
3364
4815
  //#endregion
3365
4816
  //#region src/routes/messages/handler.ts
@@ -3470,7 +4921,24 @@ async function handleCompletion(c) {
3470
4921
  if (debugEnabled) consola.debug("Anthropic request body:", rawBody.slice(0, 2e3));
3471
4922
  if (state.manualApprove) await awaitApproval();
3472
4923
  const betaHeaders = extractBetaHeaders(c);
3473
- const { body: resolvedBody, originalModel, resolvedModel, selectedModel } = resolveModelInBody(await processWebSearch(rawBody));
4924
+ const advisorEnabled = isAdvisorRequested(c.req.header("anthropic-beta"));
4925
+ let finalBody = await processWebSearch(rawBody);
4926
+ finalBody = sanitizeAnthropicBody(finalBody);
4927
+ if (advisorEnabled) {
4928
+ finalBody = injectAdvisorTool(finalBody);
4929
+ consola.info("ADVISOR enabled for this request — injecting __anthropic_advisor tool; will translate tool_use → server_tool_use{advisor} on the SSE stream");
4930
+ }
4931
+ if (finalBody.includes("\"mcp_servers\"")) try {
4932
+ const probe = JSON.parse(finalBody);
4933
+ if (Array.isArray(probe.mcp_servers) && probe.mcp_servers.length > 0) return c.json({
4934
+ type: "error",
4935
+ error: {
4936
+ type: "invalid_request_error",
4937
+ message: "Inline `mcp_servers` body field is not supported by github-router (Copilot returns 400 'Extra inputs are not permitted'; the proxy would need a multi-turn tool-loop translation that has unresolved design holes — see Phase G in the plan). Configure your remote MCP servers as local stdio entries in `~/.claude/mcp.json` instead — Claude Code will spawn them locally and the proxy passes their tool calls through transparently. (https://docs.claude.com/en/docs/claude-code/mcp)"
4938
+ }
4939
+ }, 400);
4940
+ } catch {}
4941
+ const { body: resolvedBody, originalModel, resolvedModel, selectedModel } = resolveModelInBody(finalBody);
3474
4942
  const modelId = resolvedModel ?? originalModel;
3475
4943
  if (modelId) logEndpointMismatch(modelId, "/v1/messages");
3476
4944
  const effectiveBetas = applyDefaultBetas(betaHeaders, resolvedModel ?? originalModel);
@@ -3524,6 +4992,25 @@ async function handleCompletion(c) {
3524
4992
  if (requestId) streamHeaders["x-request-id"] = requestId;
3525
4993
  const reqId = response.headers.get("request-id");
3526
4994
  if (reqId) streamHeaders["request-id"] = reqId;
4995
+ if (advisorEnabled && response.body) {
4996
+ let parsedBase = {};
4997
+ try {
4998
+ parsedBase = JSON.parse(resolvedBody);
4999
+ } catch {}
5000
+ const initialConversation = Array.isArray(parsedBase.messages) ? parsedBase.messages : [];
5001
+ return new Response(buildAdvisorStream({
5002
+ firstResponse: response,
5003
+ initialConversation,
5004
+ baseBody: parsedBase,
5005
+ requestHeaders: {
5006
+ ...selectedModel?.requestHeaders,
5007
+ ...effectiveBetas
5008
+ }
5009
+ }), {
5010
+ status: response.status,
5011
+ headers: streamHeaders
5012
+ });
5013
+ }
3527
5014
  return new Response(response.body ? relayAnthropicStream(response.body, { routePath: c.req.path }) : null, {
3528
5015
  status: response.status,
3529
5016
  headers: streamHeaders
@@ -3574,6 +5061,7 @@ function resolveModelInBody(rawBody) {
3574
5061
  const selectedModel = resolvedModel ? state.models?.data.find((m) => m.id === resolvedModel) : void 0;
3575
5062
  if (translateThinking(parsed, selectedModel)) modified = true;
3576
5063
  if (rawBody.includes("\"scope\"") && sanitizeCacheControl(parsed)) modified = true;
5064
+ if ((rawBody.includes("\"budget\"") || rawBody.includes("\"output_config\"") || rawBody.includes("\"betas\"")) && stripAnthropicOnlyFields(parsed)) modified = true;
3577
5065
  return {
3578
5066
  body: modified ? JSON.stringify(parsed) : rawBody,
3579
5067
  originalModel,
@@ -3689,6 +5177,81 @@ function applyDefaultBetas(betaHeaders, modelId) {
3689
5177
  "anthropic-beta": ["interleaved-thinking-2025-05-14", "context-management-2025-06-27"].join(",")
3690
5178
  };
3691
5179
  }
5180
+ /**
5181
+ * Strip top-level body fields that Anthropic's Messages API accepts but
5182
+ * Copilot rejects with HTTP 400 "Extra inputs are not permitted". Mutates
5183
+ * `body` in place; returns true if anything was stripped.
5184
+ *
5185
+ * Empirical verification (2026-05-11):
5186
+ * POST /v1/messages?beta=true { ..., budget: {total_tokens: 10000} } → 400
5187
+ * POST /v1/messages?beta=true { ..., output_config: {schema: {...}} } → 400
5188
+ * POST /v1/messages?beta=true { ..., betas: ["..."] } → 400
5189
+ *
5190
+ * Each strip emits a one-line consola.warn so users running with these
5191
+ * features (e.g. `claude --max-budget-usd`, `--json-schema`) understand
5192
+ * the request succeeds with the *body field* dropped — semantics may
5193
+ * differ from upstream Anthropic. The corresponding `anthropic-beta`
5194
+ * header is preserved (Phase A allowlist) so the *intent* still flows
5195
+ * to Copilot, even if the per-request enforcement field is gone.
5196
+ *
5197
+ * NOT stripped here:
5198
+ * - `mcp_servers` (Phase G translate path — silent strip causes LLM
5199
+ * to hallucinate tools per gemini-critic finding)
5200
+ * - `metadata` (Copilot 200s, ignores harmlessly)
5201
+ */
5202
+ function stripAnthropicOnlyFields(body) {
5203
+ let stripped = false;
5204
+ if (body.budget !== void 0) {
5205
+ consola.warn("Stripping body-level `budget` field (Copilot 400s; the `task-budgets-` beta header is preserved but cost ceiling is not enforced server-side)");
5206
+ delete body.budget;
5207
+ stripped = true;
5208
+ }
5209
+ if (body.output_config !== void 0) {
5210
+ if (body.output_config && typeof body.output_config === "object") {
5211
+ const oc = body.output_config;
5212
+ const PROXY_OWNED_FIELDS = new Set(["effort"]);
5213
+ const schema = oc.schema;
5214
+ const ocType = oc.type;
5215
+ let strippedAny = false;
5216
+ for (const key of Object.keys(oc)) if (!PROXY_OWNED_FIELDS.has(key)) {
5217
+ delete oc[key];
5218
+ strippedAny = true;
5219
+ }
5220
+ if (strippedAny) {
5221
+ consola.warn("Stripping client-set `output_config` Structured-Outputs fields (Copilot 400s on `output_config.*` other than `effort`; injecting schema as system-prompt instruction so the model still produces JSON conforming to the structured-outputs schema, since server-side enforcement is gone)");
5222
+ if (Object.keys(oc).length === 0) delete body.output_config;
5223
+ if (schema !== void 0 || ocType === "json_object") appendStructuredOutputInstruction(body, schema, ocType);
5224
+ stripped = true;
5225
+ }
5226
+ }
5227
+ }
5228
+ if (Array.isArray(body.betas)) {
5229
+ consola.warn("Stripping body-level `betas` array (Copilot 400s; the betas are conveyed via the `anthropic-beta` header instead)");
5230
+ delete body.betas;
5231
+ stripped = true;
5232
+ }
5233
+ return stripped;
5234
+ }
5235
+ /**
5236
+ * Append a system-prompt instruction telling the model to produce JSON
5237
+ * conforming to a Structured Outputs schema. Used after the proxy
5238
+ * strips `output_config` to preserve the schema enforcement intent
5239
+ * via prompt engineering instead of server-side validation.
5240
+ *
5241
+ * Mutates `body.system` in place. Handles both string and array shapes
5242
+ * (Anthropic spec allows either).
5243
+ */
5244
+ function appendStructuredOutputInstruction(body, schema, ocType) {
5245
+ let instruction = "\n\nIMPORTANT: Your response MUST be a single valid JSON object. Do not wrap it in markdown code fences. Do not include any text before or after the JSON object.";
5246
+ if (schema !== void 0) instruction += ` The JSON object MUST conform to this JSON Schema:\n${JSON.stringify(schema)}`;
5247
+ else if (typeof ocType === "string") instruction += ` Output type requested: ${ocType}.`;
5248
+ if (typeof body.system === "string") body.system = body.system + instruction;
5249
+ else if (Array.isArray(body.system)) body.system = [...body.system, {
5250
+ type: "text",
5251
+ text: instruction.trimStart()
5252
+ }];
5253
+ else body.system = instruction.trimStart();
5254
+ }
3692
5255
 
3693
5256
  //#endregion
3694
5257
  //#region src/routes/messages/route.ts
@@ -4108,6 +5671,13 @@ server.route("/v1/search", searchRoutes);
4108
5671
  server.route("/v1/messages", messageRoutes);
4109
5672
  server.route("/mcp", mcpRoutes);
4110
5673
  server.post("/api/event_logging/batch", (c) => c.body(null, 200));
5674
+ server.all("/v1/files/*", (c) => c.json({
5675
+ type: "error",
5676
+ error: {
5677
+ type: "not_found_error",
5678
+ message: "Files API is not supported by github-router (Copilot has no equivalent storage backend). Use the Anthropic API directly for file uploads/downloads."
5679
+ }
5680
+ }, 404));
4111
5681
  server.notFound((c) => c.json({
4112
5682
  type: "error",
4113
5683
  error: {
@@ -4280,47 +5850,57 @@ function parseSharedArgs(args) {
4280
5850
  * (see `src/lib/launch.ts`) BEFORE these overrides are merged in, so we
4281
5851
  * only need to provide the positive values.
4282
5852
  *
4283
- * Auth precedence in Claude Code (https://code.claude.com/docs/en/iam):
5853
+ * Auth precedence in Claude Code (https://code.claude.com/docs/en/iam),
5854
+ * after the github-router substrate fix:
4284
5855
  * 1. Cloud provider (CLAUDE_CODE_USE_BEDROCK / VERTEX / FOUNDRY) — stripped at parent.
4285
- * 2. ANTHROPIC_AUTH_TOKEN — set here to "dummy"; wins over #4–#6.
4286
- * 3. ANTHROPIC_API_KEY stripped at parent, intentionally NOT re-set
4287
- * (Claude Code emits an Auth conflict warning when both AUTH_TOKEN
4288
- * and API_KEY are present, even with dummy values).
4289
- * 4. apiKeyHelper in settings.json beaten by #2.
5856
+ * 2. ANTHROPIC_AUTH_TOKEN — NOT set by the proxy. Stripped at parent
5857
+ * (no env-source auth in the spawned child at all).
5858
+ * 3. ANTHROPIC_API_KEY stripped at parent.
5859
+ * 4. apiKeyHelper in settings.json copied into our config dir as
5860
+ * part of the mirror; if the user defined one, it still fires
5861
+ * and may mint an `x-api-key` header. Copilot ignores `x-api-key`,
5862
+ * so behavior is unchanged from before this fix.
4290
5863
  * 5. CLAUDE_CODE_OAUTH_TOKEN — stripped at parent.
4291
- * 6. Subscription OAuth (Keychain / ~/.claude/.credentials.json)
4292
- * INVISIBLE to the spawned child via the CLAUDE_CONFIG_DIR trick
4293
- * below. The credential file is left in place so `claude /logout`
4294
- * still works outside the proxy.
5864
+ * 6. Subscription OAuth (Keychain / `<CLAUDE_CONFIG_DIR>/.credentials.json`)
5865
+ * the credentials file is OURS (synthetic blob, written by
5866
+ * `ensureClaudeConfigMirror`). Claude Code reads accessToken from
5867
+ * it and sends as `Authorization: Bearer <accessToken>`. The
5868
+ * teammate-spawn allowlist propagates `CLAUDE_CONFIG_DIR` to
5869
+ * children, so spawned teammates find the same synthetic credential
5870
+ * and authenticate (the bug this whole fix addresses).
4295
5871
  *
4296
5872
  * `CLAUDE_CONFIG_DIR` activates Claude Code's per-config-dir keychain
4297
- * isolation. Per binary-grep of Claude Code 2.1.126's `iN()` function:
4298
- *
4299
- * function iN(H = "") {
4300
- * let _ = B6(), // resolved config-dir path
4301
- * K = !process.env.CLAUDE_CONFIG_DIR ? "" : `-${sha256(_).slice(0, 8)}`;
4302
- * return `Claude Code${OAUTH_FILE_SUFFIX}${H}${K}`
4303
- * }
5873
+ * isolation (per binary-grep of v2.1.126's `iN()` function: when set,
5874
+ * the keychain service name becomes `Claude Code-<sha256(path)[0..8]>`,
5875
+ * missing the user's real `Claude Code` entry). Pointing it at our
5876
+ * snapshot-copied `PATHS.CLAUDE_CONFIG_DIR` preserves user customization
5877
+ * (mirrored settings.json, skills, MCP, hooks, CLAUDE.md, custom
5878
+ * agents) while giving teammates a credential they can find on disk.
4304
5879
  *
4305
- * The conditional is on PRESENCE, not value. When CLAUDE_CONFIG_DIR is
4306
- * unset (the user's normal `claude` usage), the keychain service name is
4307
- * "Claude Code" and their `/login` credential is found there. When set
4308
- * (the proxy session), the service name becomes "Claude Code-<hash>"
4309
- * the user's credential is invisible, `iCH()` returns null, and all
4310
- * three auth-conflict warnings fire `false`. The path resolves to the
4311
- * default config-dir, so settings.json/skills/MCP/plugins/hooks/CLAUDE.md
4312
- * still load from `~/.claude` as normal.
5880
+ * No-401 invariant: Claude Code's reactive refresh path (`SZ1`
5881
+ * `D3(0,true,...)`) fires on any 401 from upstream. The synthetic
5882
+ * refreshToken would fail any real refresh attempt, so the proxy
5883
+ * MUST NOT return 401 on the Anthropic-shape boundary even when
5884
+ * upstream Copilot returns 401. See `src/routes/messages/handler.ts`.
4313
5885
  */
4314
5886
  function getClaudeCodeEnvVars(serverUrl, model) {
4315
5887
  const vars = {
4316
5888
  ANTHROPIC_BASE_URL: serverUrl,
4317
- ANTHROPIC_AUTH_TOKEN: "dummy",
4318
- CLAUDE_CONFIG_DIR: path.join(os.homedir(), ".claude"),
5889
+ CLAUDE_CONFIG_DIR: PATHS.CLAUDE_CONFIG_DIR,
4319
5890
  MCP_TIMEOUT: "600000",
4320
5891
  DISABLE_NON_ESSENTIAL_MODEL_CALLS: "1",
4321
- CLAUDE_CODE_DISABLE_NONESSENTIAL_TRAFFIC: "1"
5892
+ CLAUDE_CODE_DISABLE_NONESSENTIAL_TRAFFIC: "1",
5893
+ DISABLE_TELEMETRY: "1"
4322
5894
  };
4323
5895
  if (model) vars.ANTHROPIC_MODEL = model;
5896
+ if (process.env.ANTHROPIC_SMALL_FAST_MODEL === void 0) vars.ANTHROPIC_SMALL_FAST_MODEL = "claude-haiku-4-5";
5897
+ for (const key of [
5898
+ "CLAUDE_CODE_ENABLE_EXPERIMENTAL_ADVISOR_TOOL",
5899
+ "CLAUDE_CODE_FORK_SUBAGENT",
5900
+ "CLAUDE_CODE_EXPERIMENTAL_AGENT_TEAMS",
5901
+ "CLAUDE_CODE_ENABLE_FINE_GRAINED_TOOL_STREAMING",
5902
+ "CLAUDE_CODE_ENABLE_TASKS"
5903
+ ]) if (process.env[key] === void 0) vars[key] = "1";
4324
5904
  return vars;
4325
5905
  }
4326
5906
  /**
@@ -4371,6 +5951,21 @@ const claude = defineCommand({
4371
5951
  type: "boolean",
4372
5952
  default: false,
4373
5953
  description: "Pass --strict-mcp-config to claude code so only github-router's MCP servers are loaded (hides user's existing MCP servers)"
5954
+ },
5955
+ stealth: {
5956
+ type: "boolean",
5957
+ default: false,
5958
+ description: "Opt back into VS Code-only beta header filtering. Loses leverage features (task budgets, token-efficient tools, prompt caching, etc.) but minimizes the wire-fingerprint difference from VS Code Copilot Chat. By default the `claude` subcommand enables extended/leverage betas because the spawned Claude Code already identifies itself via UA and other headers — partial stealth doesn't buy much."
5959
+ },
5960
+ "auto-update": {
5961
+ type: "boolean",
5962
+ default: true,
5963
+ description: "Check for and install latest Claude Code on launch (throttled to once per hour via ~/.local/share/github-router/last-update-check). Set to false (--no-auto-update) to keep the current installed version. Falls back gracefully if npm/network unavailable."
5964
+ },
5965
+ "update-check": {
5966
+ type: "boolean",
5967
+ default: true,
5968
+ description: "Check the npm registry for a newer Claude Code version on launch and warn if stale (non-blocking ~500ms cost). Set to false (--no-update-check) to skip the check entirely (useful for offline/CI). Independent from --auto-update: --no-update-check implies no auto-install (nothing to install since we never check)."
4374
5969
  }
4375
5970
  },
4376
5971
  async run({ args }) {
@@ -4379,6 +5974,24 @@ const claude = defineCommand({
4379
5974
  process$1.exit(1);
4380
5975
  }
4381
5976
  const parsed = parseSharedArgs(args);
5977
+ if (args.stealth) {
5978
+ parsed.extendedBetas = false;
5979
+ consola.info("Stealth mode: VS Code-only beta filtering. Leverage features disabled.");
5980
+ } else if (!args["extended-betas"]) parsed.extendedBetas = true;
5981
+ if (args["update-check"] !== false) try {
5982
+ const versionCheck = await checkClaudeVersion({ noCheck: false });
5983
+ if (versionCheck.skipped && versionCheck.skipReason === "no-claude") consola.debug("claude --version probe failed; skipping auto-update.");
5984
+ else if (versionCheck.skipped && versionCheck.skipReason === "no-npm") consola.debug("npm view @anthropic-ai/claude-code failed; skipping auto-update check (likely offline).");
5985
+ else if (versionCheck.needsUpdate && versionCheck.installedVersion && versionCheck.latestVersion) if (args["auto-update"] !== false) try {
5986
+ await autoUpdateClaude(versionCheck.latestVersion);
5987
+ } catch (err) {
5988
+ const msg = err instanceof Error ? err.message : String(err);
5989
+ consola.warn(`Auto-update of Claude Code from ${versionCheck.installedVersion} to ${versionCheck.latestVersion} failed (${msg}); continuing with installed version. Run \`npm install -g @anthropic-ai/claude-code@latest\` manually to retry.`);
5990
+ }
5991
+ else consola.warn(`Claude Code v${versionCheck.installedVersion} is installed; v${versionCheck.latestVersion} is available. Run with --auto-update (the default) to install on launch, or \`npm install -g @anthropic-ai/claude-code@latest\` manually.`);
5992
+ } catch (err) {
5993
+ consola.debug("Claude version check failed:", err);
5994
+ }
4382
5995
  let server$1;
4383
5996
  let serverUrl;
4384
5997
  try {
@@ -4393,6 +6006,12 @@ const claude = defineCommand({
4393
6006
  consola.error("Failed to start server:", error instanceof Error ? error.message : error);
4394
6007
  process$1.exit(1);
4395
6008
  }
6009
+ try {
6010
+ await ensureClaudeConfigMirror();
6011
+ } catch (err) {
6012
+ consola.error(`Failed to provision CLAUDE_CONFIG_DIR mirror: ${err instanceof Error ? err.message : String(err)}. Spawned Claude Code would not be able to authenticate.`);
6013
+ process$1.exit(1);
6014
+ }
4396
6015
  enableFileLogging();
4397
6016
  const usingDefault = !args.model;
4398
6017
  let chosenSlug = args.model ?? DEFAULT_CLAUDE_MODEL;