github-router 0.3.17 → 0.3.19

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/main.js CHANGED
@@ -4,17 +4,16 @@ import consola from "consola";
4
4
  import fs from "node:fs/promises";
5
5
  import os from "node:os";
6
6
  import path from "node:path";
7
- import { randomBytes, randomUUID } from "node:crypto";
7
+ import { randomBytes, randomUUID, timingSafeEqual } from "node:crypto";
8
8
  import process$1 from "node:process";
9
+ import { execFileSync, spawn } from "node:child_process";
9
10
  import fs$1 from "node:fs";
10
11
  import { Writable } from "node:stream";
11
- import { execFileSync, spawn } from "node:child_process";
12
12
  import { serve } from "srvx";
13
13
  import { getProxyForUrl } from "proxy-from-env";
14
14
  import { Agent, ProxyAgent, setGlobalDispatcher } from "undici";
15
15
  import { Hono } from "hono";
16
16
  import { cors } from "hono/cors";
17
- import { streamSSE } from "hono/streaming";
18
17
  import { events } from "fetch-event-stream";
19
18
  import { z } from "zod";
20
19
  import clipboard from "clipboardy";
@@ -35,12 +34,23 @@ const PATHS = {
35
34
  },
36
35
  get CODEX_HOME() {
37
36
  return path.join(appDir(), "codex-isolated");
37
+ },
38
+ get CLAUDE_RUNTIME_DIR() {
39
+ return path.join(appDir(), "runtime");
38
40
  }
39
41
  };
40
42
  async function ensurePaths() {
41
43
  await fs.mkdir(PATHS.APP_DIR, { recursive: true });
42
44
  await fs.mkdir(PATHS.CODEX_HOME, { recursive: true });
45
+ await fs.mkdir(PATHS.CLAUDE_RUNTIME_DIR, { recursive: true });
46
+ await chmodIfPossible(PATHS.CLAUDE_RUNTIME_DIR, 448);
43
47
  await ensureFile(PATHS.GITHUB_TOKEN_PATH);
48
+ await sweepStaleRuntimeFiles().catch((err) => {
49
+ consola.debug("Runtime sweep skipped:", err);
50
+ });
51
+ await sweepStalePeerAgentMdFiles().catch((err) => {
52
+ consola.debug("Peer-agent .md sweep skipped:", err);
53
+ });
44
54
  }
45
55
  async function ensureFile(filePath) {
46
56
  try {
@@ -50,6 +60,129 @@ async function ensureFile(filePath) {
50
60
  await fs.chmod(filePath, 384);
51
61
  }
52
62
  }
63
+ async function chmodIfPossible(target, mode) {
64
+ if (process.platform === "win32") return;
65
+ try {
66
+ await fs.chmod(target, mode);
67
+ } catch (err) {
68
+ consola.debug(`chmod ${target} ${mode.toString(8)} failed:`, err);
69
+ }
70
+ }
71
+ /**
72
+ * Write a runtime tempfile securely.
73
+ *
74
+ * - Mode `0o600` so other local users (multi-tenant boxes, shared
75
+ * dev containers) can't read the per-launch nonce or runtime URL.
76
+ * - `flag: "wx"` (O_CREAT | O_EXCL | O_WRONLY) refuses to overwrite
77
+ * an existing path. POSIX open(2) with O_EXCL also rejects
78
+ * pre-placed symlinks, killing the symlink-clobber attack vector.
79
+ * - The caller's responsibility to pick a path NOT yet in use.
80
+ * We intentionally do NOT pre-unlink: an `lstat` + `unlink` +
81
+ * `open(O_EXCL)` sequence still has a TOCTOU window where an
82
+ * attacker can drop a symlink between unlink and open. Letting
83
+ * `wx` fail is the safer behavior — surfaces the conflict
84
+ * instead of silently following.
85
+ */
86
+ async function writeRuntimeFileSecure(filePath, content) {
87
+ await fs.writeFile(filePath, content, {
88
+ mode: 384,
89
+ flag: "wx"
90
+ });
91
+ }
92
+ /**
93
+ * Sweep stale runtime tempfiles. Removes files whose embedded PID is no
94
+ * longer a live process. A proxy crash (`kill -9`, OS reboot) leaves
95
+ * orphans that would otherwise accumulate forever — and worse, a stale
96
+ * config pointing at a now-recycled port could route MCP traffic to
97
+ * whatever process bound that port next.
98
+ *
99
+ * Naming convention: `peer-mcp-<pid>.json` and `peer-agents-<pid>.json`.
100
+ * Files not matching either pattern are left alone — this directory
101
+ * is shared with future runtime artifacts.
102
+ *
103
+ * We deliberately do NOT age-prune files whose PID is alive. A
104
+ * legitimately long-running proxy can have a tempfile older than any
105
+ * arbitrary threshold; deleting it out from under the live process
106
+ * breaks the spawned Claude Code child's MCP/agent wiring with no clean
107
+ * recovery. PID-wraparound risk is mitigated by (a) PID reuse on Linux
108
+ * being slow under typical loads, and (b) the file is only consulted by
109
+ * github-router itself — an unrelated process that inherits the PID
110
+ * never reads it.
111
+ */
112
+ async function sweepStaleRuntimeFiles() {
113
+ const dir = PATHS.CLAUDE_RUNTIME_DIR;
114
+ let entries;
115
+ try {
116
+ entries = await fs.readdir(dir);
117
+ } catch (err) {
118
+ if (err.code === "ENOENT") return;
119
+ throw err;
120
+ }
121
+ for (const name$1 of entries) {
122
+ const match = /^peer-(?:mcp|agents)-(\d+)(?:-[0-9a-f]+)?\.json$/.exec(name$1);
123
+ if (!match) continue;
124
+ const pid = Number.parseInt(match[1], 10);
125
+ const filePath = path.join(dir, name$1);
126
+ if (isPidAlive(pid)) continue;
127
+ await fs.unlink(filePath).catch(() => {});
128
+ }
129
+ }
130
+ function isPidAlive(pid) {
131
+ if (!Number.isInteger(pid) || pid <= 0) return false;
132
+ try {
133
+ process.kill(pid, 0);
134
+ return true;
135
+ } catch (err) {
136
+ if (err.code === "EPERM") return true;
137
+ return false;
138
+ }
139
+ }
140
+ /**
141
+ * Sweep stale peer-* subagent .md files from `~/.claude/agents/`. Phase
142
+ * 2.5 writes one .md per peer agent into the canonical agents directory
143
+ * so they appear in Claude Code's Task `subagent_type` enum. Files are
144
+ * named `peer-<pid>-<rand>-<agentName>.md` so this sweep can drop
145
+ * orphans from crashed prior proxy sessions without touching the user's
146
+ * own .md files.
147
+ *
148
+ * Same liveness rule as `sweepStaleRuntimeFiles`: only delete when the
149
+ * file's embedded PID is no longer alive. Live PIDs keep their files —
150
+ * a long-running proxy doesn't lose its agent registrations.
151
+ *
152
+ * Regex tightening (Phase 2.6, codex-critic + gemini-critic 2-lab finding):
153
+ * the original sweep regex `^peer-(\d+)(?:-[0-9a-f]+)?-.+\.md$` was too
154
+ * permissive — a user-authored `peer-12345-meeting-notes.md` matches
155
+ * (`12345` = "PID", `-meeting-notes` = trailing `.+`) and would be
156
+ * silently unlinked when 12345 happens to be a dead PID (overwhelmingly
157
+ * likely). Tightened to require BOTH the 8-hex-char random suffix AND
158
+ * an exact-match persona name suffix, eliminating the risk for any
159
+ * realistic user filename.
160
+ */
161
+ async function sweepStalePeerAgentMdFiles() {
162
+ const dir = path.join(os.homedir(), ".claude", "agents");
163
+ let entries;
164
+ try {
165
+ entries = await fs.readdir(dir);
166
+ } catch (err) {
167
+ if (err.code === "ENOENT") return;
168
+ throw err;
169
+ }
170
+ for (const name$1 of entries) {
171
+ const match = PEER_AGENT_MD_FILENAME.exec(name$1);
172
+ if (!match) continue;
173
+ if (isPidAlive(Number.parseInt(match[1], 10))) continue;
174
+ await fs.unlink(path.join(dir, name$1)).catch(() => {});
175
+ }
176
+ }
177
+ /**
178
+ * Strict regex matching only files this proxy writes:
179
+ * peer-<pid>-<8 hex>-<exact persona/coordinator name>.md
180
+ * The persona-name allowlist is the load-bearing protection against
181
+ * deleting user files. Update this list whenever a new persona is added
182
+ * to `PERSONAS_READ` / `PERSONAS_WRITE` in `peer-mcp-personas.ts` or a
183
+ * new coordinator-style agent is added in `codex-mcp-config.ts`.
184
+ */
185
+ const PEER_AGENT_MD_FILENAME = /^peer-(\d+)-[0-9a-f]{8}-(?:codex-critic|codex-reviewer|gemini-critic|codex-implementer|peer-review-coordinator)\.md$/;
53
186
 
54
187
  //#endregion
55
188
  //#region src/lib/state.ts
@@ -76,14 +209,14 @@ function copilotVersion(state$1) {
76
209
  const API_VERSION = "2026-01-09";
77
210
  const copilotBaseUrl = (state$1) => state$1.copilotApiUrl ?? "https://api.githubcopilot.com";
78
211
  const copilotHeaders = (state$1, vision = false, integrationId = "vscode-chat") => {
79
- const version = copilotVersion(state$1);
212
+ const version$1 = copilotVersion(state$1);
80
213
  const headers = {
81
214
  Authorization: `Bearer ${state$1.copilotToken}`,
82
215
  "content-type": standardHeaders()["content-type"],
83
216
  "copilot-integration-id": integrationId,
84
217
  "editor-version": `vscode/${state$1.vsCodeVersion}`,
85
- "editor-plugin-version": `copilot-chat/${version}`,
86
- "user-agent": `GitHubCopilotChat/${version}`,
218
+ "editor-plugin-version": `copilot-chat/${version$1}`,
219
+ "user-agent": `GitHubCopilotChat/${version$1}`,
87
220
  "openai-intent": "conversation-panel",
88
221
  "x-interaction-type": "conversation-panel",
89
222
  "x-github-api-version": API_VERSION,
@@ -119,7 +252,7 @@ var HTTPError = class extends Error {
119
252
  }
120
253
  };
121
254
  async function forwardError(c, error) {
122
- consola.error("Error occurred:", error);
255
+ consola.error(`Error occurred at ${c.req.path}:`, error);
123
256
  if (error instanceof HTTPError) {
124
257
  const errorText = await error.response.text().catch(() => "");
125
258
  let errorJson;
@@ -408,7 +541,11 @@ function normalizeModelId(id) {
408
541
  * 2. Case-insensitive match
409
542
  * 3. Family preference (opus→1m, codex→highest version)
410
543
  * 4. Normalized match (dots→dashes, letter-digit boundaries)
411
- * 5. Return as-is with a warning
544
+ * 5. Anthropic dated-slug retry: if the input matches `claude-...-YYYYMMDD`,
545
+ * strip the date and re-run the cascade once. Family-guarded so non-claude
546
+ * 8-digit suffixes can't be mis-stripped; runs after Steps 1-4 so explicit
547
+ * version pinning (a dated catalog id matched at Step 1) always wins.
548
+ * 6. Return as-is with a warning
412
549
  */
413
550
  function resolveModel(modelId) {
414
551
  const models = state.models?.data;
@@ -434,6 +571,14 @@ function resolveModel(modelId) {
434
571
  const normalized = normalizeModelId(modelId);
435
572
  const normMatch = models.find((m) => normalizeModelId(m.id) === normalized);
436
573
  if (normMatch) return normMatch.id;
574
+ const dateStripped = modelId.replace(/^(claude-[\w.-]+)-20\d{6}$/i, "$1");
575
+ if (dateStripped !== modelId) {
576
+ const retried = resolveModel(dateStripped);
577
+ if (retried !== dateStripped || models.some((m) => m.id === dateStripped)) {
578
+ consola.info(`Resolved Anthropic dated slug "${modelId}" → "${retried}" (stripped -YYYYMMDD; pass an explicit catalog id to pin a snapshot)`);
579
+ return retried;
580
+ }
581
+ }
437
582
  consola.warn(`Model "${modelId}" not found in Copilot model list. Available: ${models.map((m) => m.id).join(", ")}`);
438
583
  return modelId;
439
584
  }
@@ -473,9 +618,9 @@ const cacheVSCodeVersion = async () => {
473
618
  consola.info(`Using VSCode version: ${response}`);
474
619
  };
475
620
  const cacheCopilotVersion = async () => {
476
- const version = await getCopilotChatVersion();
477
- state.copilotVersion = version;
478
- consola.info(`Using Copilot Chat version: ${version}`);
621
+ const version$1 = await getCopilotChatVersion();
622
+ state.copilotVersion = version$1;
623
+ consola.info(`Using Copilot Chat version: ${version$1}`);
479
624
  };
480
625
 
481
626
  //#endregion
@@ -520,18 +665,62 @@ const setupCopilotToken = async () => {
520
665
  consola.debug("GitHub Copilot Token fetched successfully!");
521
666
  if (state.showToken) consola.info("Copilot token:", token);
522
667
  const refreshInterval = Math.max((refresh_in - 60) * 1e3, 1e3);
523
- setInterval(async () => {
524
- consola.debug("Refreshing Copilot token");
668
+ setInterval(() => {
669
+ refreshCopilotToken("interval");
670
+ }, refreshInterval);
671
+ };
672
+ let inflightRefresh;
673
+ let lastRefreshSuccess = 0;
674
+ let lastRefreshFailure = 0;
675
+ const REFRESH_SUCCESS_COOLDOWN_MS = 3e4;
676
+ const REFRESH_FAILURE_COOLDOWN_MS = 5e3;
677
+ async function refreshCopilotToken(reason) {
678
+ if (inflightRefresh) return inflightRefresh;
679
+ if (reason === "401-retry") {
680
+ const now = Date.now();
681
+ if (now - lastRefreshSuccess < REFRESH_SUCCESS_COOLDOWN_MS) {
682
+ consola.debug(`refreshCopilotToken(${reason}) skipped: prior success within ${REFRESH_SUCCESS_COOLDOWN_MS}ms`);
683
+ return;
684
+ }
685
+ if (now - lastRefreshFailure < REFRESH_FAILURE_COOLDOWN_MS) {
686
+ consola.debug(`refreshCopilotToken(${reason}) skipped: prior failure within ${REFRESH_FAILURE_COOLDOWN_MS}ms`);
687
+ return;
688
+ }
689
+ }
690
+ inflightRefresh = (async () => {
691
+ consola.debug(`Refreshing Copilot token (reason=${reason})`);
525
692
  try {
526
- const { token: token$1 } = await getCopilotToken();
527
- state.copilotToken = token$1;
693
+ const { token } = await getCopilotToken();
694
+ state.copilotToken = token;
695
+ lastRefreshSuccess = Date.now();
528
696
  consola.debug("Copilot token refreshed");
529
- if (state.showToken) consola.info("Refreshed Copilot token:", token$1);
697
+ if (state.showToken) consola.info("Refreshed Copilot token:", token);
530
698
  } catch (error) {
531
- consola.error("Failed to refresh Copilot token:", error);
699
+ lastRefreshFailure = Date.now();
700
+ consola.error(`Failed to refresh Copilot token (reason=${reason}):`, error);
701
+ } finally {
702
+ inflightRefresh = void 0;
532
703
  }
533
- }, refreshInterval);
534
- };
704
+ })();
705
+ return inflightRefresh;
706
+ }
707
+ /**
708
+ * Try `request()`. If it returns a 401, refresh the Copilot token (subject
709
+ * to the single-flight + refresh-storm-protection of `refreshCopilotToken`)
710
+ * and retry once. After one retry, propagate whatever the second attempt
711
+ * returned — the caller's existing 401-handling path is preserved.
712
+ *
713
+ * The `request` callback is responsible for capturing `state.copilotToken`
714
+ * locally before any await; this helper does NOT re-build the request
715
+ * itself, just re-invokes the callback after a refresh.
716
+ */
717
+ async function tryRefreshAndRetry(request, routePath) {
718
+ const first = await request();
719
+ if (first.status !== 401) return first;
720
+ consola.warn(`${routePath}: upstream returned 401, attempting one token refresh + retry`);
721
+ await refreshCopilotToken("401-retry");
722
+ return request();
723
+ }
535
724
  async function setupGitHubToken(options) {
536
725
  try {
537
726
  const githubToken = await readGithubToken();
@@ -627,13 +816,13 @@ const checkUsage = defineCommand({
627
816
  const premiumUsed = premiumTotal - premium.remaining;
628
817
  const premiumPercentUsed = premiumTotal > 0 ? premiumUsed / premiumTotal * 100 : 0;
629
818
  const premiumPercentRemaining = premium.percent_remaining;
630
- function summarizeQuota(name, snap) {
631
- if (!snap) return `${name}: N/A`;
819
+ function summarizeQuota(name$1, snap) {
820
+ if (!snap) return `${name$1}: N/A`;
632
821
  const total = snap.entitlement;
633
822
  const used = total - snap.remaining;
634
823
  const percentUsed = total > 0 ? used / total * 100 : 0;
635
824
  const percentRemaining = snap.percent_remaining;
636
- return `${name}: ${used}/${total} used (${percentUsed.toFixed(1)}% used, ${percentRemaining.toFixed(1)}% remaining)`;
825
+ return `${name$1}: ${used}/${total} used (${percentUsed.toFixed(1)}% used, ${percentRemaining.toFixed(1)}% remaining)`;
637
826
  }
638
827
  const premiumLine = `Premium: ${premiumUsed}/${premiumTotal} used (${premiumPercentUsed.toFixed(1)}% used, ${premiumPercentRemaining.toFixed(1)}% remaining)`;
639
828
  const chatLine = summarizeQuota("Chat", usage.quota_snapshots.chat);
@@ -646,100 +835,6 @@ const checkUsage = defineCommand({
646
835
  }
647
836
  });
648
837
 
649
- //#endregion
650
- //#region src/lib/file-log-reporter.ts
651
- const MAX_LOG_BYTES = 1024 * 1024;
652
- const DEDUP_MAX = 1e3;
653
- const ARG_MAX_LEN = 2048;
654
- const DEDUP_KEY_MAX_LEN = 200;
655
- const CREDENTIAL_RE = /\b(eyJ[A-Za-z0-9_-]{20,}(?:\.[A-Za-z0-9_-]+){0,2}|gh[opsu]_[A-Za-z0-9_]{20,}|Bearer\s+\S{20,})\b/g;
656
- const ALLOWED_TYPES = new Set([
657
- "fatal",
658
- "error",
659
- "warn"
660
- ]);
661
- function sanitize(line) {
662
- return line.replace(CREDENTIAL_RE, "[REDACTED]");
663
- }
664
- function serializeArg(arg) {
665
- if (typeof arg === "string") return arg;
666
- if (arg instanceof Error) {
667
- const parts = [arg.message];
668
- if (arg.stack) parts.push(arg.stack);
669
- return parts.join("\n");
670
- }
671
- return String(arg);
672
- }
673
- function formatLogLine(logObj) {
674
- return sanitize(`${logObj.date.toISOString()} [${(logObj.type ?? "error").toUpperCase()}] ${logObj.args.map((a) => {
675
- const s = serializeArg(a);
676
- return s.length > ARG_MAX_LEN ? s.slice(0, ARG_MAX_LEN) + "…" : s;
677
- }).join(" ").replace(/\r\n|\r|\n/g, "\\n")}\n`);
678
- }
679
- function makeDedupeKey(logObj) {
680
- const firstArg = logObj.args.length > 0 ? serializeArg(logObj.args[0]) : "";
681
- const key = `${logObj.type}:${firstArg}`;
682
- return key.length > DEDUP_KEY_MAX_LEN ? key.slice(0, DEDUP_KEY_MAX_LEN) : key;
683
- }
684
- function rotateIfNeeded(filePath) {
685
- let size;
686
- try {
687
- size = fs$1.statSync(filePath).size;
688
- } catch {
689
- return;
690
- }
691
- if (size <= MAX_LOG_BYTES) return;
692
- try {
693
- fs$1.renameSync(filePath, filePath + ".1");
694
- } catch {}
695
- }
696
- var FileLogReporter = class {
697
- filePath;
698
- seen = /* @__PURE__ */ new Set();
699
- writing = false;
700
- constructor(filePath) {
701
- this.filePath = filePath;
702
- rotateIfNeeded(filePath);
703
- }
704
- log(logObj, _ctx) {
705
- if (!ALLOWED_TYPES.has(logObj.type)) return;
706
- if (this.writing) return;
707
- const key = makeDedupeKey(logObj);
708
- if (this.seen.has(key)) return;
709
- if (this.seen.size >= DEDUP_MAX) this.seen.clear();
710
- this.seen.add(key);
711
- const line = formatLogLine(logObj);
712
- this.writing = true;
713
- try {
714
- const fd = fs$1.openSync(this.filePath, "a", 384);
715
- fs$1.writeSync(fd, line);
716
- fs$1.closeSync(fd);
717
- } catch {} finally {
718
- this.writing = false;
719
- }
720
- }
721
- };
722
- const nullStream = new Writable({ write(_chunk, _encoding, cb) {
723
- cb();
724
- } });
725
- /**
726
- * Switch consola to file-only mode for TUI sessions.
727
- * Removes the terminal reporter and installs a file reporter that
728
- * persists errors and warnings to disk with dedup and credential scrubbing.
729
- *
730
- * Also sinks consola's stdout/stderr streams as belt-and-suspenders:
731
- * even if a terminal reporter is re-added, it cannot write to the terminal.
732
- * Crash handlers that call process.stderr.write() directly are unaffected.
733
- * FileLogReporter uses fs.writeSync() directly and is also unaffected.
734
- */
735
- function enableFileLogging() {
736
- const reporter = new FileLogReporter(PATHS.ERROR_LOG_PATH);
737
- consola.options.throttle = 0;
738
- consola.setReporters([reporter]);
739
- consola.options.stdout = nullStream;
740
- consola.options.stderr = nullStream;
741
- }
742
-
743
838
  //#endregion
744
839
  //#region src/lib/port.ts
745
840
  const DEFAULT_PORT = 8787;
@@ -781,6 +876,18 @@ const PORT_RANGE_MAX = 65535;
781
876
  function generateRandomPort() {
782
877
  return Math.floor(Math.random() * (PORT_RANGE_MAX - PORT_RANGE_MIN + 1)) + PORT_RANGE_MIN;
783
878
  }
879
+ function envInt(key, fallback) {
880
+ const raw = process.env[key];
881
+ if (!raw) return fallback;
882
+ if (!/^[0-9]+$/.test(raw.trim())) {
883
+ consola.warn(`${key}=${JSON.stringify(raw)} is not a non-negative integer; using fallback ${fallback}`);
884
+ return fallback;
885
+ }
886
+ const parsed = Number.parseInt(raw, 10);
887
+ return Number.isFinite(parsed) && parsed > 0 ? parsed : fallback;
888
+ }
889
+ const UPSTREAM_FETCH_TIMEOUT_MS = envInt("UPSTREAM_FETCH_TIMEOUT_MS", 0);
890
+ const UPSTREAM_INACTIVITY_TIMEOUT_MS = envInt("UPSTREAM_INACTIVITY_TIMEOUT_MS", 3e5);
784
891
 
785
892
  //#endregion
786
893
  //#region src/lib/launch.ts
@@ -827,34 +934,95 @@ function sanitizeParentEnv(parent) {
827
934
  for (const key of STRIPPED_PARENT_ENV_KEYS) delete sanitized[key];
828
935
  return sanitized;
829
936
  }
830
- function commandExists(name) {
937
+ function commandExists(name$1) {
831
938
  try {
832
- execFileSync(process$1.platform === "win32" ? "where.exe" : "which", [name], { stdio: "ignore" });
939
+ execFileSync(process$1.platform === "win32" ? "where.exe" : "which", [name$1], { stdio: "ignore" });
833
940
  return true;
834
941
  } catch {
835
942
  return false;
836
943
  }
837
944
  }
945
+ /**
946
+ * Provider-config flags (`-c model_providers.github_router=...`) that
947
+ * point Codex at our proxy. Extracted from `buildCodexCmd` so the new
948
+ * `codex mcp-server` MCP-config builder can reuse the exact same
949
+ * provider definition — drift between the two paths would silently
950
+ * break the MCP wiring.
951
+ */
952
+ function buildCodexProviderConfigFlags(serverUrl) {
953
+ return [
954
+ "-c",
955
+ `model_providers.github_router={name="github-router",base_url="${serverUrl}/v1",wire_api="responses",env_key="OPENAI_API_KEY"}`,
956
+ "-c",
957
+ "model_provider=github_router"
958
+ ];
959
+ }
960
+ /**
961
+ * Inspect the installed `codex` binary. Used by the codex-MCP wiring
962
+ * in `claude.ts` to gate `--codex-cli`. Codex 0.129.0 introduced the
963
+ * `mcp-server` subcommand; older versions don't expose it, so we
964
+ * downgrade to the HTTP backend with a warning.
965
+ */
966
+ function getCodexVersion() {
967
+ if (!commandExists("codex")) return { ok: false };
968
+ let raw;
969
+ try {
970
+ raw = execFileSync("codex", ["--version"], {
971
+ encoding: "utf8",
972
+ stdio: [
973
+ "ignore",
974
+ "pipe",
975
+ "ignore"
976
+ ]
977
+ }).trim();
978
+ } catch {
979
+ return { ok: false };
980
+ }
981
+ const m = /(\d+)\.(\d+)\.(\d+)/.exec(raw);
982
+ if (!m) return {
983
+ ok: false,
984
+ version: raw
985
+ };
986
+ const major = Number.parseInt(m[1], 10);
987
+ const minor = Number.parseInt(m[2], 10);
988
+ const version$1 = `${m[1]}.${m[2]}.${m[3]}`;
989
+ return {
990
+ ok: major > 0 || major === 0 && minor >= 129,
991
+ version: version$1
992
+ };
993
+ }
994
+ /**
995
+ * Codex 0.129.0 broke two things the launcher had been relying on:
996
+ * (1) `--full-auto` was removed in favor of `--sandbox` + `--ask-for-approval`;
997
+ * passing it now exits the child immediately with
998
+ * `error: unexpected argument '--full-auto' found`.
999
+ * (2) `OPENAI_BASE_URL` is silently ignored — Codex hardcodes
1000
+ * `https://api.openai.com/v1/responses` and 401s out without an
1001
+ * explicit `-c model_providers.<name>.base_url` override.
1002
+ *
1003
+ * `buildCodexCmd` builds the launch argv that works on Codex 0.129+ while
1004
+ * still being compatible with older versions that accept the same flags.
1005
+ */
1006
+ function buildCodexCmd(target) {
1007
+ const cmd = ["codex"];
1008
+ if (target.serverUrl) cmd.push(...buildCodexProviderConfigFlags(target.serverUrl));
1009
+ cmd.push("--sandbox", "workspace-write", "--ask-for-approval", "on-request", "-m", target.model ?? DEFAULT_CODEX_MODEL, ...target.extraArgs);
1010
+ return cmd;
1011
+ }
838
1012
  function buildLaunchCommand(target) {
839
1013
  return {
840
1014
  cmd: target.kind === "claude-code" ? [
841
1015
  "claude",
842
1016
  "--dangerously-skip-permissions",
843
1017
  ...target.extraArgs
844
- ] : [
845
- "codex",
846
- "--full-auto",
847
- "-m",
848
- target.model ?? DEFAULT_CODEX_MODEL,
849
- ...target.extraArgs
850
- ],
1018
+ ] : buildCodexCmd(target),
851
1019
  env: {
852
1020
  ...sanitizeParentEnv(process$1.env),
853
1021
  ...target.envVars
854
1022
  }
855
1023
  };
856
1024
  }
857
- function launchChild(target, server$1) {
1025
+ function launchChild(target, server$1, options = {}) {
858
1026
  const { cmd, env } = buildLaunchCommand(target);
859
1027
  const executable = cmd[0];
860
1028
  if (!commandExists(executable)) {
@@ -879,6 +1047,7 @@ function launchChild(target, server$1) {
879
1047
  consola.error(msg);
880
1048
  process$1.stderr.write(msg + "\n");
881
1049
  server$1.close(true).catch(() => {});
1050
+ if (options.onShutdown) Promise.resolve(options.onShutdown()).catch(() => {});
882
1051
  process$1.exit(1);
883
1052
  }
884
1053
  let cleaned = false;
@@ -893,6 +1062,9 @@ function launchChild(target, server$1) {
893
1062
  try {
894
1063
  await server$1.close(true);
895
1064
  } catch {}
1065
+ if (options.onShutdown) try {
1066
+ await options.onShutdown();
1067
+ } catch {}
896
1068
  clearTimeout(timeout);
897
1069
  }
898
1070
  function exit(code) {
@@ -914,6 +1086,606 @@ function launchChild(target, server$1) {
914
1086
  });
915
1087
  }
916
1088
 
1089
+ //#endregion
1090
+ //#region src/lib/peer-mcp-personas.ts
1091
+ const CRITIC_RUBRIC = `
1092
+ Apply this grading rubric:
1093
+ - Score 1–5 on three axes:
1094
+ A. assumption-soundness (are stated assumptions accurate? are unstated ones load-bearing?)
1095
+ B. failure-mode coverage (which realistic failure modes are unaddressed?)
1096
+ C. alternative-considered (was a meaningfully different approach weighed and rejected with reason?)
1097
+ - If every axis scores ≥ 4, reply with the literal string "no material objection" and stop. Do not invent issues to satisfy this rubric.
1098
+ - Otherwise, the lowest-scoring axis IS your critique. Lead with that single critique; secondary observations may follow as "additional notes".
1099
+
1100
+ Reply format (markdown):
1101
+ ## Verdict
1102
+ <"no material objection" OR a one-sentence summary of the load-bearing critique>
1103
+ ## Scores
1104
+ - assumption-soundness: <n>/5
1105
+ - failure-mode coverage: <n>/5
1106
+ - alternative-considered: <n>/5
1107
+ ## Critique
1108
+ <only when at least one axis < 4 — concrete, specific, actionable>
1109
+ ## Additional notes (optional)
1110
+ <secondary observations; omit if none>
1111
+
1112
+ Self-reminder (read before every reply):
1113
+ Am I still acting as the adversarial critic per the rubric above?
1114
+ If I just produced agreement, restart and apply the grading rubric instead.
1115
+ Sycophancy is the failure mode I exist to fight; manufactured contrarianism is a different failure of the same shape — do neither.
1116
+ `.trim();
1117
+ const COLD_START_CONTRACT = `
1118
+ Cold-start contract for the lead orchestrator (Opus):
1119
+ When delegating to me, paste a self-contained brief. I have no access to your scrollback, CLAUDE.md, or the project tree. Always include:
1120
+ (a) the artifact under review verbatim (code/diff/plan text),
1121
+ (b) the constraints or "done" criteria,
1122
+ (c) any prior decisions I should not relitigate.
1123
+ If your brief lacks (a), I will reply with a one-line request for the artifact instead of speculating.
1124
+ `.trim();
1125
+ const CRITIC_BASE = `You are codex-critic, an adversarial reviewer running on gpt-5.5. Your single job is to overcome the lead orchestrator's blind spots — assumptions it didn't notice it was making, failure modes it didn't enumerate, alternatives it didn't consider.
1126
+
1127
+ You are NOT a helpful assistant. You are NOT a coach. Sycophancy is the failure mode you exist to fight. Manufactured contrarianism is a different failure of the same shape — silence on good work is a valid and welcome answer.
1128
+
1129
+ ${COLD_START_CONTRACT}
1130
+
1131
+ ${CRITIC_RUBRIC}`;
1132
+ const GEMINI_CRITIC_BASE = `You are gemini-critic, an adversarial reviewer running on Gemini 3.1 Pro. You exist to provide a second-lab perspective: your training data, RLHF priors, and attention patterns are systematically different from the lead orchestrator's (Opus, Anthropic) and from codex-critic (gpt-5.5, OpenAI). Use that to surface blind spots both miss.
1133
+
1134
+ Your strengths the lead may want to draw on:
1135
+ - long-context reasoning over large artifacts (the brief may include >50k tokens of context)
1136
+ - math, proofs, and formally-stated invariants
1137
+ - cross-checking conclusions where codex-critic has already weighed in (the lead may forward you both the artifact and codex-critic's verdict)
1138
+
1139
+ You are NOT a helpful assistant. Sycophancy is the failure mode you exist to fight; do not invent issues to look thorough.
1140
+
1141
+ ${COLD_START_CONTRACT}
1142
+
1143
+ ${CRITIC_RUBRIC}`;
1144
+ const REVIEWER_BASE = `You are codex-reviewer, a line-level code reviewer running on gpt-5.3-codex. You are the code-specialist persona — your job is to read concrete code (diffs, single files, function bodies) and surface bugs, edge cases, security issues, and idiom violations.
1145
+
1146
+ You are not a critic-of-architecture. If the brief is a plan or a high-level design, redirect: "this looks like architecture review; consider codex-critic or gemini-critic." Your tool is the magnifying glass, not the wide-angle lens.
1147
+
1148
+ ${COLD_START_CONTRACT}
1149
+
1150
+ Reply format (markdown):
1151
+ ## Summary
1152
+ <one sentence: clean / N findings / blocking issue>
1153
+ ## Findings
1154
+ For each:
1155
+ ### <severity: info | low | medium | high | critical> — <one-line title>
1156
+ - location: <file:line[-line]>
1157
+ - issue: <what's wrong, why it matters in this codebase>
1158
+ - suggested fix: <minimal change OR "needs design discussion">
1159
+ Number the findings if there are more than one. List them in severity-descending order (critical first).
1160
+ If there are zero findings of any severity, reply only with "## Summary\\nClean review — no findings." and stop.
1161
+
1162
+ Self-reminder (read before every reply):
1163
+ Am I citing real code at real line numbers in the brief? If a finding doesn't have a concrete file:line citation, drop it.
1164
+ Did I rank the finding's severity by impact-in-this-codebase, not by general-principle?
1165
+ If everything looks fine, say so cleanly — do not pad with stylistic nitpicks.`;
1166
+ const IMPLEMENTER_BASE = `You are codex-implementer, a focused implementation specialist running on gpt-5.3-codex with workspace-write access. You execute scoped, well-specified coding tasks end-to-end: read the relevant files, make the change, verify it, report back.
1167
+
1168
+ You are not a planner. If the brief is vague or missing acceptance criteria, ask the lead for the missing piece BEFORE editing anything. A wasted edit is worse than a clarifying question.
1169
+
1170
+ ${COLD_START_CONTRACT}
1171
+
1172
+ What "done" looks like for an implementation task:
1173
+ - Exactly the files specified by the brief have been changed (or you reported back why a different scope was needed).
1174
+ - The change is minimal — surrounding cleanup is out of scope unless requested.
1175
+ - You ran the relevant test(s) / typecheck / linter for the touched files and report the results.
1176
+ - The summary you return enumerates each file changed with a one-line description.
1177
+
1178
+ Reply format (markdown):
1179
+ ## Status
1180
+ <complete | needs-clarification | blocked>
1181
+ ## Files changed
1182
+ - path/one.ts: <one-line description>
1183
+ - path/two.ts: <one-line description>
1184
+ ## Verification
1185
+ <commands run + outcomes>
1186
+ ## Notes
1187
+ <anything the lead must know to integrate, e.g. follow-ups intentionally not done>
1188
+
1189
+ Resilience reminder:
1190
+ If your session terminates abnormally before "Status: complete", the lead will retry once. On recovery, ask the lead to confirm what's already been done before re-applying changes — duplicate edits are worse than a slow restart.`;
1191
+ const PERSONAS_READ = Object.freeze([
1192
+ {
1193
+ agentName: "codex-critic",
1194
+ toolNameHttp: "codex_critic",
1195
+ model: "gpt-5.5",
1196
+ endpoint: "/v1/responses",
1197
+ description: "Adversarial second opinion on plans, designs, code, or systems-engineering tradeoffs. Backed by gpt-5.5 (OpenAI) — different model, different training data, different blind spots than Opus. Uses a calibrated 1–5 grading rubric and is allowed to reply 'no material objection' on solid artifacts. **CALL BEFORE: ExitPlanMode for any plan involving >2 files or new architecture; finalizing a major design choice; TeamCreate when the team's task is non-trivial.** **CALL AFTER: any commit touching concurrency, security, or streaming code paths.** If the artifact is large (>20 KB), prefer to break it into 2-4 focused batches and call this tool once per batch IN PARALLEL — each call must complete under the Claude Code MCP per-tool-call ceiling (~150s on v2.1.138 per regression #50289), so monolithic large-artifact calls will time out client-side. Aggregate findings yourself. Always pass: (a) the artifact verbatim, (b) the constraints/'done' criteria, (c) any prior decisions. Optionally pass `effort: 'xhigh'` for explicit deep dives or `effort: 'medium'` for quick sanity checks (default 'high'). The subagent has no access to your scrollback or CLAUDE.md.",
1198
+ baseInstructions: CRITIC_BASE,
1199
+ agentPrompt: "",
1200
+ writeCapable: false,
1201
+ requiresHttp: false
1202
+ },
1203
+ {
1204
+ agentName: "gemini-critic",
1205
+ toolNameHttp: "gemini_critic",
1206
+ model: "gemini-3.1-pro-preview",
1207
+ endpoint: "/v1/chat/completions",
1208
+ description: "Adversarial second opinion from a different lab. Backed by gemini-3.1-pro-preview (Google) — different training data and RLHF priors than Opus AND codex-critic, the strongest blind-spot-buster when the lead wants triangulation across three labs. Use for long-context artifacts (>50k tokens), math/proof-shaped reasoning, or as a tie-breaker after codex-critic has weighed in. **CALL BEFORE: ExitPlanMode for plans where Opus + codex-critic agree (use as triangulation); finalizing irreversible architectural choices.** **CALL AFTER: commits where you want a third-lab cross-check.** If the artifact is large (>100 KB), prefer to break into batches and call in parallel — gemini handles long context well but each per-call MCP wait is still bounded (~150s on v2.1.138). Always pass: (a) the artifact verbatim, (b) the constraints/'done' criteria, (c) any prior decisions. The `effort` parameter is forwarded but may be silently ignored by Copilot's gemini route — gemini-3.x reasoning is largely auto-applied. The subagent has no access to your scrollback or CLAUDE.md.",
1209
+ baseInstructions: GEMINI_CRITIC_BASE,
1210
+ agentPrompt: "",
1211
+ writeCapable: false,
1212
+ requiresHttp: true
1213
+ },
1214
+ {
1215
+ agentName: "codex-reviewer",
1216
+ toolNameHttp: "codex_reviewer",
1217
+ model: "gpt-5.3-codex",
1218
+ endpoint: "/v1/responses",
1219
+ description: "Line-level code review of a specific diff or file. Backed by gpt-5.3-codex (OpenAI) — the code-specialist sibling of gpt-5.5, trained heavily on code-review datasets so it catches different bugs than Opus. Prefer over codex-critic when the artifact is a concrete diff or single file (codex-critic is for plans/designs). **CALL AFTER: any non-trivial commit (>50 lines OR touching critical paths: streaming, auth, concurrency, persistence, security).** **CALL BEFORE: opening a PR or pushing changes a peer would review.** For diffs >20 KB, split by file-group and call once per group in parallel — each per-call wait is bounded (~150s on v2.1.138). Always pass: (a) the diff or file verbatim, (b) the change's intent, (c) test status. Optionally pass `effort: 'xhigh'` when reviewing security-critical code, `effort: 'medium'` for routine reviews (default 'high'). The subagent has no access to your scrollback or CLAUDE.md.",
1220
+ baseInstructions: REVIEWER_BASE,
1221
+ agentPrompt: "",
1222
+ writeCapable: false,
1223
+ requiresHttp: false
1224
+ }
1225
+ ]);
1226
+ const PERSONAS_WRITE = Object.freeze([{
1227
+ agentName: "codex-implementer",
1228
+ toolNameHttp: "codex_implementer",
1229
+ model: "gpt-5.3-codex",
1230
+ endpoint: "/v1/responses",
1231
+ description: "Targeted implementation of a self-contained coding task — actual file edits via Codex's tool-use sandbox. Backed by gpt-5.3-codex with workspace-write access (only registered when --codex-cli is set). Use only when the task has a clear spec and acceptance criteria; for tasks needing iterative tool-use across many files, prefer a Claude teammate (Agent Team). Always pass: (a) the spec, (b) the files in scope, (c) the acceptance criteria. The subagent has no access to your scrollback or CLAUDE.md.",
1232
+ baseInstructions: IMPLEMENTER_BASE,
1233
+ agentPrompt: "",
1234
+ writeCapable: true,
1235
+ requiresHttp: false
1236
+ }]);
1237
+ /**
1238
+ * Build the agent-prompt body Claude Code uses as the subagent's full
1239
+ * system prompt. The prompt fully replaces Claude Code's default system
1240
+ * prompt (per Anthropic's subagent docs) so it must be self-sufficient.
1241
+ *
1242
+ * Two modes branch on `codexCli`:
1243
+ * - HTTP backend: subagent calls the per-persona tool
1244
+ * `mcp__gh-router-peers__<toolNameHttp>` with `{prompt, context}`;
1245
+ * model + instructions are server-baked.
1246
+ * - codex-cli backend: subagent calls the single
1247
+ * `mcp__codex-cli__codex` tool with `{prompt, model: <persona.model>,
1248
+ * base-instructions: <persona.baseInstructions>}`. Gemini stays on
1249
+ * HTTP regardless because Codex CLI can't run Gemini.
1250
+ */
1251
+ function buildAgentPrompt(persona, opts) {
1252
+ const useStdio = opts.codexCli && !persona.requiresHttp;
1253
+ const toolPath = useStdio ? "mcp__codex-cli__codex" : `mcp__gh-router-peers__${persona.toolNameHttp}`;
1254
+ const invocationBlock = useStdio ? [
1255
+ `Always invoke the \`${toolPath}\` tool with these arguments:`,
1256
+ " - `prompt`: the lead's brief, copied verbatim",
1257
+ ` - \`model\`: "${persona.model}"`,
1258
+ " - `base-instructions`: the persona text below (paste verbatim, do not paraphrase)",
1259
+ ...persona.writeCapable ? [" - `sandbox`: \"workspace-write\"", " - `approval-policy`: \"on-request\""] : [" - `sandbox`: \"read-only\""]
1260
+ ].join("\n") : [
1261
+ `Always invoke the \`${toolPath}\` tool with these arguments:`,
1262
+ " - `prompt`: the lead's brief, copied verbatim",
1263
+ " - `context` (optional): any additional file/diff content the persona needs",
1264
+ "Do NOT pass model or instructions — they are server-baked into this tool."
1265
+ ].join("\n");
1266
+ return [
1267
+ `# Subagent: ${persona.agentName}`,
1268
+ "",
1269
+ persona.baseInstructions,
1270
+ "",
1271
+ "---",
1272
+ "",
1273
+ "## Routing instructions for this subagent",
1274
+ "",
1275
+ invocationBlock,
1276
+ "",
1277
+ "When the tool returns, surface its output to the lead verbatim. Do not summarize, paraphrase, or add your own commentary on top — the lead integrates the persona's reply directly."
1278
+ ].join("\n");
1279
+ }
1280
+ /** Convenience: every persona that should be registered for the given mode. */
1281
+ function personasFor(opts) {
1282
+ const result = [];
1283
+ for (const p of PERSONAS_READ) {
1284
+ if (p.requiresHttp && !opts.geminiAvailable) continue;
1285
+ result.push(p);
1286
+ }
1287
+ if (opts.codexCli) for (const p of PERSONAS_WRITE) result.push(p);
1288
+ return result;
1289
+ }
1290
+
1291
+ //#endregion
1292
+ //#region src/lib/codex-mcp-config.ts
1293
+ /**
1294
+ * Decide which MCP backend serves the codex personas.
1295
+ *
1296
+ * - User passed `--codex-cli` AND codex 0.129+ is on PATH → "cli".
1297
+ * The peer config registers `codex-cli` as a stdio MCP server
1298
+ * spawning `codex mcp-server`; codex personas route there;
1299
+ * gemini-critic stays on the HTTP backend (Codex CLI can't run
1300
+ * Gemini).
1301
+ * - User passed `--codex-cli` but codex is missing or < 0.129 →
1302
+ * fallback to "http" with a warning. Never break
1303
+ * `github-router claude` over a missing optional dep.
1304
+ * - User did not pass `--codex-cli` → "http", read-only personas only.
1305
+ */
1306
+ function resolveCodexCliBackend(opts) {
1307
+ if (!opts.requested) return "http";
1308
+ if (!opts.codexInfo || !opts.codexInfo.ok) {
1309
+ const detail = opts.codexInfo?.version ? `installed version "${opts.codexInfo.version}" is too old (need 0.129+)` : "codex CLI not found on PATH";
1310
+ consola.warn(`--codex-cli requested but ${detail}; falling back to HTTP-only Codex MCP backend (codex-implementer will not be registered).`);
1311
+ return "http";
1312
+ }
1313
+ return "cli";
1314
+ }
1315
+ /**
1316
+ * Build the JSON payload for `claude --mcp-config <path>`.
1317
+ *
1318
+ * Always registers `gh-router-peers` (HTTP) — that's the home of all
1319
+ * read-only personas, and it's the only path Gemini can take. When
1320
+ * `codexCli` is true, also registers `codex-cli` (stdio) which spawns
1321
+ * `codex mcp-server` with the proxy's provider-config flags so codex
1322
+ * runs through our Copilot-routed billing path rather than its
1323
+ * default api.openai.com.
1324
+ */
1325
+ function buildPeerMcpConfig(serverUrl, opts) {
1326
+ const mcpServers = { "gh-router-peers": {
1327
+ type: "http",
1328
+ url: `${serverUrl}/mcp`,
1329
+ headers: { Authorization: `Bearer ${opts.nonce}` }
1330
+ } };
1331
+ if (opts.codexCli) mcpServers["codex-cli"] = {
1332
+ command: "codex",
1333
+ args: ["mcp-server", ...buildCodexProviderConfigFlags(serverUrl)],
1334
+ env: {
1335
+ OPENAI_BASE_URL: `${serverUrl}/v1`,
1336
+ OPENAI_API_KEY: "dummy",
1337
+ CODEX_HOME: opts.codexHome
1338
+ }
1339
+ };
1340
+ return { mcpServers };
1341
+ }
1342
+ /**
1343
+ * The `peer-review-coordinator` Claude Code subagent — the strongest
1344
+ * auto-invocation lever per Phase 2A of the peer-MCP plan. It is NOT a
1345
+ * peer-MCP tool; it is a regular Claude Code subagent that runs inside
1346
+ * the spawned-claude context, has tool access to the peer MCP tools
1347
+ * (codex_critic, codex_reviewer, gemini_critic), and fans out + aggregates.
1348
+ *
1349
+ * The "use proactively" phrase in the description is the documented Claude
1350
+ * Code idiom for subagents the parent (Opus) should delegate to without
1351
+ * explicit user request — at the natural checkpoints listed below.
1352
+ *
1353
+ * Empirically the polling-loop reliability for "use proactively" is ~60%
1354
+ * (claude-code-guide expert estimate); the plan calls for an acceptance
1355
+ * test (≥7/10 sessions delegate at the right checkpoints) before declaring
1356
+ * "auto-invoked". If <7/10 we flip the optional PreToolUse hook on
1357
+ * ExitPlanMode to default-on (env-disable-able).
1358
+ */
1359
+ function buildCoordinatorAgent(opts) {
1360
+ const peers = ["codex-critic"];
1361
+ if (opts.geminiAvailable) peers.push("gemini-critic");
1362
+ peers.push("codex-reviewer");
1363
+ return {
1364
+ description: "Coordinates cross-lab adversarial review. **Use proactively before ExitPlanMode for non-trivial plans and after non-trivial commits** (>50 lines OR touching streaming/auth/concurrency/persistence/security). Routes to codex-critic / codex-reviewer / gemini-critic in parallel based on artifact type and aggregates findings. Cheaper than calling each peer manually for the common case where you want a multi-lab triangulation. The subagent has no access to your scrollback or CLAUDE.md — pass the artifact verbatim.",
1365
+ prompt: [
1366
+ "# Subagent: peer-review-coordinator",
1367
+ "",
1368
+ "You orchestrate cross-lab adversarial review for the lead orchestrator (Opus). You have access to these peer-MCP subagents:",
1369
+ "",
1370
+ peers.map((p) => `- \`${p}\``).join("\n"),
1371
+ "",
1372
+ "## When the lead invokes you",
1373
+ "",
1374
+ "The lead's brief will include an artifact (plan, design, diff, or code) and a goal (e.g. 'review before exit-plan', 'review the commit I just made', 'cross-check codex-critic's verdict'). Pick the right peers for the artifact type:",
1375
+ "",
1376
+ "- **Plan / design / architecture choice** → fan out to `codex-critic`" + (opts.geminiAvailable ? " AND `gemini-critic` in parallel" : "") + ". codex-reviewer is the wrong tool for plans (it's a code-specialist, not an architecture critic).",
1377
+ "- **Concrete diff or single file** → fan out to `codex-reviewer`" + (opts.geminiAvailable ? " AND `gemini-critic` (gemini for cross-lab triangulation)" : "") + ". For very small changes (<20 lines), one `codex-reviewer` call is enough.",
1378
+ "- **Tie-breaker after codex-critic has weighed in** → call `gemini-critic`" + (opts.geminiAvailable ? "" : " (NOT REGISTERED in this session — gemini-3.x not in catalog; tie-break unavailable)") + " with the artifact AND codex-critic's verdict for cross-lab cross-check.",
1379
+ "- **Long-context artifact (>100 KB)** → prefer `gemini-critic`" + (opts.geminiAvailable ? "" : " (NOT REGISTERED in this session)") + ". Otherwise, decompose into 2-4 batches and fan out across `codex-critic` calls in parallel.",
1380
+ "",
1381
+ "## Decomposition for large artifacts",
1382
+ "",
1383
+ "Each per-call MCP wait is bounded (~150s on Claude Code v2.1.138 per regression #50289). For artifacts >20 KB, split into 2-4 logical batches BY CONCERN (not by raw size — semantic batches give better per-batch reviews) and call peers in parallel. The proxy's MCP cap allows up to 8 in-flight calls. Aggregate findings yourself before reporting back.",
1384
+ "",
1385
+ "## Aggregation contract",
1386
+ "",
1387
+ "When fan-out completes, return a SEVERITY-GROUPED, DEDUPLICATED finding list. Format:",
1388
+ "",
1389
+ " ## Findings",
1390
+ " ### HIGH",
1391
+ " 1. <one-line title> — `<file:line>` — sources: codex-critic, gemini-critic (3-lab confirmed if applicable)",
1392
+ " - bug: <one sentence>",
1393
+ " - mitigation: <one sentence>",
1394
+ " ### MEDIUM",
1395
+ " ...",
1396
+ " ### LOW",
1397
+ " ...",
1398
+ "",
1399
+ "Cite which peer raised each finding. If two or more peers raised the SAME finding (cross-lab confirmation), call it out — those are the highest-confidence bugs.",
1400
+ "",
1401
+ "## What NOT to do",
1402
+ "",
1403
+ "- Do not paraphrase or summarize per-peer verdicts BEFORE aggregating; aggregate from the raw verdicts.",
1404
+ "- Do not invent severity labels not present in the source verdicts.",
1405
+ "- Do not call peers serially (waste of wall-clock); always fan out in parallel.",
1406
+ "- Do not consult yourself — you are the coordinator, not a critic.",
1407
+ "",
1408
+ "Self-reminder (read before every reply):",
1409
+ " Did I fan out in parallel to the right peers for this artifact type?",
1410
+ " Did I aggregate findings by severity, citing which peer raised each?",
1411
+ " If two peers agreed, did I flag the cross-lab confirmation?"
1412
+ ].join("\n")
1413
+ };
1414
+ }
1415
+ /**
1416
+ * Build the JSON payload for `claude --agents <path>`.
1417
+ *
1418
+ * Always includes the read-only personas applicable to the mode (gemini
1419
+ * is dropped if absent from the catalog); adds `codex-implementer` only
1420
+ * when `codexCli` is true. Always appends the `peer-review-coordinator`
1421
+ * meta-subagent — the strongest "use proactively" auto-invocation lever
1422
+ * per Phase 2A of the peer-MCP plan.
1423
+ */
1424
+ function buildPeerAgentDefinitions(opts) {
1425
+ const out = {};
1426
+ const personas = personasFor({
1427
+ codexCli: opts.codexCli,
1428
+ geminiAvailable: opts.geminiAvailable
1429
+ });
1430
+ for (const persona of personas) out[persona.agentName] = {
1431
+ description: persona.description,
1432
+ prompt: buildAgentPrompt(persona, { codexCli: opts.codexCli })
1433
+ };
1434
+ out["peer-review-coordinator"] = buildCoordinatorAgent({
1435
+ codexCli: opts.codexCli,
1436
+ geminiAvailable: opts.geminiAvailable
1437
+ });
1438
+ return out;
1439
+ }
1440
+ /**
1441
+ * Default location Claude Code reads subagent .md files from at session
1442
+ * startup. Files placed here populate the Task `subagent_type` enum.
1443
+ *
1444
+ * We pin to the user's `~/.claude/agents/` because `getClaudeCodeEnvVars`
1445
+ * sets `CLAUDE_CONFIG_DIR=$HOME/.claude` (the Spawned-CLI auth isolation
1446
+ * trick) — the spawned child reads from this exact path.
1447
+ */
1448
+ function defaultAgentsDir() {
1449
+ return path.join(os.homedir(), ".claude", "agents");
1450
+ }
1451
+ /**
1452
+ * YAML frontmatter string-escape — sufficient for our use case where
1453
+ * descriptions can contain colons, quotes, newlines. Wraps the value
1454
+ * in double-quotes and escapes:
1455
+ * - `\` and `"` (canonical YAML)
1456
+ * - `\n`, `\r`, `\t` (whitespace controls — `\r` matters on Windows-edited
1457
+ * literals; strict YAML 1.2 parsers reject raw `\r` in double-quoted
1458
+ * scalars)
1459
+ * - other C0 control chars (\x00-\x08, \x0B, \x0C, \x0E-\x1F) and
1460
+ * DEL (\x7F) — encoded as `\xNN` so the YAML stays valid even if
1461
+ * a future description sources data from an external file
1462
+ *
1463
+ * NOT a general-purpose YAML serializer; we control the inputs.
1464
+ */
1465
+ function escapeYamlString(s) {
1466
+ return `"${s.replace(/\\/g, "\\\\").replace(/"/g, "\\\"").replace(/\n/g, "\\n").replace(/\r/g, "\\r").replace(/\t/g, "\\t").replace(/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/g, (c) => `\\x${c.charCodeAt(0).toString(16).padStart(2, "0")}`)}"`;
1467
+ }
1468
+ /**
1469
+ * Strict allowlist for subagent names — controls both the YAML
1470
+ * frontmatter `name:` field AND the filename suffix. Defense-in-depth:
1471
+ * even if a future contributor wires in a dynamic agent name from
1472
+ * outside, the validator at the top of `writePeerAgentMdFiles` rejects
1473
+ * anything that wouldn't be a safe bare YAML scalar AND a safe path
1474
+ * component.
1475
+ */
1476
+ const VALID_AGENT_NAME = /^[a-z][a-z0-9-]*$/;
1477
+ /** Build a single subagent .md file body (frontmatter + system prompt). */
1478
+ function buildAgentMd(spec) {
1479
+ return [
1480
+ "---",
1481
+ `name: ${spec.name}`,
1482
+ `description: ${escapeYamlString(spec.description)}`,
1483
+ "---",
1484
+ "",
1485
+ spec.prompt,
1486
+ ""
1487
+ ].join("\n");
1488
+ }
1489
+ /**
1490
+ * Write per-launch subagent .md files into the user's `~/.claude/agents/`
1491
+ * directory so they appear in Claude Code's Task `subagent_type` enum
1492
+ * (which `--agents` JSON files do NOT, per claude-code-guide expert).
1493
+ *
1494
+ * Filenames follow `peer-<pid>-<rand>-<agentName>.md` so the boot-time
1495
+ * sweep (`sweepStalePeerAgentMdFiles` in paths.ts) can drop orphans
1496
+ * from crashed prior proxy sessions without touching the user's other
1497
+ * `.claude/agents/` files. The `name:` field in the frontmatter is the
1498
+ * canonical agent identifier — matching across files would cause Claude
1499
+ * Code to (un)deterministically pick one, so concurrent proxies running
1500
+ * the same agents need different filenames but resolve to the same
1501
+ * agent name (intended — they're the same subagent, just registered
1502
+ * twice).
1503
+ *
1504
+ * Returns the file paths plus a cleanup() that unlinks them.
1505
+ */
1506
+ async function writePeerAgentMdFiles(agents, opts) {
1507
+ for (const name$1 of Object.keys(agents)) if (!VALID_AGENT_NAME.test(name$1)) throw new Error(`writePeerAgentMdFiles: invalid agent name ${JSON.stringify(name$1)} — must match ${VALID_AGENT_NAME.source}`);
1508
+ const dir = opts.agentsDir ?? defaultAgentsDir();
1509
+ await fs.mkdir(dir, { recursive: true });
1510
+ const paths = [];
1511
+ try {
1512
+ for (const [name$1, def] of Object.entries(agents)) {
1513
+ const filePath = path.join(dir, `peer-${opts.fileSuffix}-${name$1}.md`);
1514
+ await fs.unlink(filePath).catch(() => {});
1515
+ await writeRuntimeFileSecure(filePath, buildAgentMd({
1516
+ name: name$1,
1517
+ description: def.description,
1518
+ prompt: def.prompt
1519
+ }));
1520
+ paths.push(filePath);
1521
+ }
1522
+ } catch (err) {
1523
+ await Promise.allSettled(paths.map((p) => fs.unlink(p)));
1524
+ throw err;
1525
+ }
1526
+ const cleanup = async () => {
1527
+ await Promise.allSettled(paths.map((p) => fs.unlink(p)));
1528
+ };
1529
+ return {
1530
+ paths,
1531
+ cleanup
1532
+ };
1533
+ }
1534
+ /**
1535
+ * Generate a per-launch nonce, write the MCP config + agents JSON
1536
+ * tempfiles under `CLAUDE_RUNTIME_DIR` with mode 0o600 and `O_EXCL`,
1537
+ * and return a `cleanup()` to unlink them on shutdown.
1538
+ *
1539
+ * Filenames are `peer-mcp-<pid>-<rand>.json` and `peer-agents-<pid>-<rand>.json`.
1540
+ * The PID prefix is what the boot-time sweep (`sweepStaleRuntimeFiles` in
1541
+ * paths.ts) keys off to drop orphans from crashed prior sessions; the
1542
+ * random suffix prevents two concurrent calls within the same process
1543
+ * from clobbering each other's files (e.g., a proxy that internally
1544
+ * relaunches its spawned child without restarting itself).
1545
+ */
1546
+ async function writePeerMcpRuntimeFiles(serverUrl, opts) {
1547
+ const nonce = opts.nonce ?? randomBytes(32).toString("hex");
1548
+ const runtimeDir = opts.runtimeDir ?? PATHS.CLAUDE_RUNTIME_DIR;
1549
+ const codexHome = opts.codexHome ?? PATHS.CODEX_HOME;
1550
+ await fs.mkdir(runtimeDir, { recursive: true });
1551
+ if (process.platform !== "win32") await fs.chmod(runtimeDir, 448).catch(() => {});
1552
+ const fileSuffix = `${process.pid}-${randomBytes(4).toString("hex")}`;
1553
+ const mcpConfigPath = path.join(runtimeDir, `peer-mcp-${fileSuffix}.json`);
1554
+ const agentsPath = path.join(runtimeDir, `peer-agents-${fileSuffix}.json`);
1555
+ const mcpConfig = buildPeerMcpConfig(serverUrl, {
1556
+ codexCli: opts.codexCli,
1557
+ geminiAvailable: opts.geminiAvailable,
1558
+ nonce,
1559
+ codexHome
1560
+ });
1561
+ const agents = buildPeerAgentDefinitions({
1562
+ codexCli: opts.codexCli,
1563
+ geminiAvailable: opts.geminiAvailable,
1564
+ nonce,
1565
+ codexHome
1566
+ });
1567
+ await fs.unlink(mcpConfigPath).catch(() => {});
1568
+ await fs.unlink(agentsPath).catch(() => {});
1569
+ await writeRuntimeFileSecure(mcpConfigPath, JSON.stringify(mcpConfig, null, 2));
1570
+ await writeRuntimeFileSecure(agentsPath, JSON.stringify(agents, null, 2));
1571
+ const mdResult = await writePeerAgentMdFiles(agents, {
1572
+ agentsDir: opts.agentsDir,
1573
+ fileSuffix
1574
+ });
1575
+ const personas = personasFor({
1576
+ codexCli: opts.codexCli,
1577
+ geminiAvailable: opts.geminiAvailable
1578
+ });
1579
+ const cleanup = async () => {
1580
+ await Promise.allSettled([
1581
+ fs.unlink(mcpConfigPath),
1582
+ fs.unlink(agentsPath),
1583
+ mdResult.cleanup()
1584
+ ]);
1585
+ };
1586
+ return {
1587
+ mcpConfigPath,
1588
+ agentsPath,
1589
+ agentMdPaths: mdResult.paths,
1590
+ nonce,
1591
+ personas,
1592
+ cleanup
1593
+ };
1594
+ }
1595
+
1596
+ //#endregion
1597
+ //#region src/lib/file-log-reporter.ts
1598
+ const MAX_LOG_BYTES = 1024 * 1024;
1599
+ const DEDUP_MAX = 1e3;
1600
+ const ARG_MAX_LEN = 2048;
1601
+ const DEDUP_KEY_MAX_LEN = 200;
1602
+ const CREDENTIAL_RE = /\b(eyJ[A-Za-z0-9_-]{20,}(?:\.[A-Za-z0-9_-]+){0,2}|gh[opsu]_[A-Za-z0-9_]{20,}|Bearer\s+\S{20,})\b/g;
1603
+ const ALLOWED_TYPES = new Set([
1604
+ "fatal",
1605
+ "error",
1606
+ "warn"
1607
+ ]);
1608
+ function sanitize(line) {
1609
+ return line.replace(CREDENTIAL_RE, "[REDACTED]");
1610
+ }
1611
+ function serializeArg(arg) {
1612
+ if (typeof arg === "string") return arg;
1613
+ if (arg instanceof Error) {
1614
+ const parts = [arg.message];
1615
+ if (arg.stack) parts.push(arg.stack);
1616
+ return parts.join("\n");
1617
+ }
1618
+ return String(arg);
1619
+ }
1620
+ function formatLogLine(logObj) {
1621
+ return sanitize(`${logObj.date.toISOString()} [${(logObj.type ?? "error").toUpperCase()}] ${logObj.args.map((a) => {
1622
+ const s = serializeArg(a);
1623
+ return s.length > ARG_MAX_LEN ? s.slice(0, ARG_MAX_LEN) + "…" : s;
1624
+ }).join(" ").replace(/\r\n|\r|\n/g, "\\n")}\n`);
1625
+ }
1626
+ function makeDedupeKey(logObj) {
1627
+ const firstArg = logObj.args.length > 0 ? serializeArg(logObj.args[0]) : "";
1628
+ const key = `${logObj.type}:${firstArg}`;
1629
+ return key.length > DEDUP_KEY_MAX_LEN ? key.slice(0, DEDUP_KEY_MAX_LEN) : key;
1630
+ }
1631
+ function rotateIfNeeded(filePath) {
1632
+ let size;
1633
+ try {
1634
+ size = fs$1.statSync(filePath).size;
1635
+ } catch {
1636
+ return;
1637
+ }
1638
+ if (size <= MAX_LOG_BYTES) return;
1639
+ try {
1640
+ fs$1.renameSync(filePath, filePath + ".1");
1641
+ } catch {}
1642
+ }
1643
+ var FileLogReporter = class {
1644
+ filePath;
1645
+ seen = /* @__PURE__ */ new Set();
1646
+ constructor(filePath) {
1647
+ this.filePath = filePath;
1648
+ rotateIfNeeded(filePath);
1649
+ }
1650
+ log(logObj, _ctx) {
1651
+ if (!ALLOWED_TYPES.has(logObj.type)) return;
1652
+ const key = makeDedupeKey(logObj);
1653
+ if (this.seen.has(key)) return;
1654
+ if (this.seen.size >= DEDUP_MAX) this.seen.clear();
1655
+ this.seen.add(key);
1656
+ const line = formatLogLine(logObj);
1657
+ let fd;
1658
+ try {
1659
+ fd = fs$1.openSync(this.filePath, "a", 384);
1660
+ fs$1.writeSync(fd, line);
1661
+ } catch {} finally {
1662
+ if (fd !== void 0) try {
1663
+ fs$1.closeSync(fd);
1664
+ } catch {}
1665
+ }
1666
+ }
1667
+ };
1668
+ const nullStream = new Writable({ write(_chunk, _encoding, cb) {
1669
+ cb();
1670
+ } });
1671
+ /**
1672
+ * Switch consola to file-only mode for TUI sessions.
1673
+ * Removes the terminal reporter and installs a file reporter that
1674
+ * persists errors and warnings to disk with dedup and credential scrubbing.
1675
+ *
1676
+ * Also sinks consola's stdout/stderr streams as belt-and-suspenders:
1677
+ * even if a terminal reporter is re-added, it cannot write to the terminal.
1678
+ * Crash handlers that call process.stderr.write() directly are unaffected.
1679
+ * FileLogReporter uses fs.writeSync() directly and is also unaffected.
1680
+ */
1681
+ function enableFileLogging() {
1682
+ const reporter = new FileLogReporter(PATHS.ERROR_LOG_PATH);
1683
+ consola.options.throttle = 0;
1684
+ consola.setReporters([reporter]);
1685
+ consola.options.stdout = nullStream;
1686
+ consola.options.stderr = nullStream;
1687
+ }
1688
+
917
1689
  //#endregion
918
1690
  //#region src/lib/model-validation.ts
919
1691
  const ENDPOINT_ALIASES = {
@@ -1008,6 +1780,11 @@ function initProxyFromEnv() {
1008
1780
  }
1009
1781
  }
1010
1782
 
1783
+ //#endregion
1784
+ //#region package.json
1785
+ var name = "github-router";
1786
+ var version = "0.3.19";
1787
+
1011
1788
  //#endregion
1012
1789
  //#region src/lib/approval.ts
1013
1790
  const awaitApproval = async () => {
@@ -1016,8 +1793,27 @@ const awaitApproval = async () => {
1016
1793
 
1017
1794
  //#endregion
1018
1795
  //#region src/lib/rate-limit.ts
1796
+ const RATE_LIMIT_QUEUE_TIMEOUT_MS = 5e3;
1797
+ let rateLimitChain = Promise.resolve();
1019
1798
  async function checkRateLimit(state$1) {
1020
1799
  if (state$1.rateLimitSeconds === void 0) return;
1800
+ const ticket = { aborted: false };
1801
+ const myTurn = rateLimitChain.then(() => doCheck(state$1, ticket));
1802
+ rateLimitChain = myTurn.catch(() => {});
1803
+ return Promise.race([myTurn, sleep(RATE_LIMIT_QUEUE_TIMEOUT_MS).then(() => {
1804
+ ticket.aborted = true;
1805
+ throw new HTTPError("Rate limit queue wait exceeded", Response.json({
1806
+ type: "error",
1807
+ error: {
1808
+ type: "rate_limit_error",
1809
+ message: `Rate limit queue exceeded ${RATE_LIMIT_QUEUE_TIMEOUT_MS}ms; try again`
1810
+ }
1811
+ }, { status: 429 }));
1812
+ })]);
1813
+ }
1814
+ async function doCheck(state$1, ticket) {
1815
+ if (state$1.rateLimitSeconds === void 0) return;
1816
+ if (ticket.aborted) return;
1021
1817
  const now = Date.now();
1022
1818
  if (!state$1.lastRequestTimestamp) {
1023
1819
  state$1.lastRequestTimestamp = now;
@@ -1036,6 +1832,7 @@ async function checkRateLimit(state$1) {
1036
1832
  const waitTimeMs = waitTimeSeconds * 1e3;
1037
1833
  consola.warn(`Rate limit reached. Waiting ${waitTimeSeconds} seconds before proceeding...`);
1038
1834
  await sleep(waitTimeMs);
1835
+ if (ticket.aborted) return;
1039
1836
  state$1.lastRequestTimestamp = Date.now();
1040
1837
  consola.info("Rate limit wait completed, proceeding with request");
1041
1838
  }
@@ -1098,6 +1895,169 @@ function detectCapabilityMismatch(info, model) {
1098
1895
  return err.includes("token") || err.includes("context") || err.includes("too long") || err.includes("max_tokens") || err.includes("prompt is too long");
1099
1896
  }
1100
1897
 
1898
+ //#endregion
1899
+ //#region src/lib/stream-relay.ts
1900
+ const ENCODER$2 = new TextEncoder();
1901
+ /**
1902
+ * Detect the family of "controller has already closed" errors that Bun and
1903
+ * the WHATWG streams runtime throw when an enqueue/close call races with
1904
+ * the consumer cancelling its read. These are NOT upstream failures — they
1905
+ * mean the client has finished reading (or disconnected) and we should
1906
+ * exit pull() quietly without trying to write more bytes or log noise.
1907
+ *
1908
+ * Bun's wording: `TypeError: Invalid state: Controller is already closed`.
1909
+ * Other runtimes use `TypeError: The stream is closing` or
1910
+ * `TypeError: This ReadableStream is closed` or include "errored" / "cancelled".
1911
+ */
1912
+ function isControllerClosedError(error) {
1913
+ if (!(error instanceof Error)) return false;
1914
+ const msg = error.message.toLowerCase();
1915
+ return msg.includes("controller is already closed") || msg.includes("controller is already errored") || msg.includes("readablestream is closed") || msg.includes("readablestream is already closed") || msg.includes("stream is closing") || msg.includes("stream is already closed") || msg.includes("stream is closed");
1916
+ }
1917
+ /**
1918
+ * Wrap an upstream SSE byte stream so that:
1919
+ * - Backpressure is respected (pull-based; only reads when downstream demands).
1920
+ * - Mid-stream errors (undici "terminated", AbortError, network resets) are
1921
+ * caught, logged with structured context, and converted to a final
1922
+ * Anthropic-shape `event: error` SSE event before the downstream is closed.
1923
+ * - Upstream inactivity (no chunk for `inactivityTimeoutMs`) is treated as a
1924
+ * soft failure that emits an error event rather than hanging forever.
1925
+ * - Consumer cancellation (client disconnects mid-read or finishes early)
1926
+ * is recognized and handled silently — NOT logged as an upstream error,
1927
+ * NOT followed by a futile event:error write that can corrupt the
1928
+ * terminal bytes the client has already buffered.
1929
+ *
1930
+ * Pre-byte upstream errors (failure on the very first read) are handled by
1931
+ * the same code path: an `event: error` SSE event is emitted on a 200
1932
+ * response, then the connection is closed. Even if the consumer's SDK
1933
+ * silently swallows `event: error`, the immediate close triggers the
1934
+ * client's socket-disconnect handler — the user always sees an error
1935
+ * string, never a hang.
1936
+ */
1937
+ function relayAnthropicStream(body, opts) {
1938
+ const inactivityMs = opts.inactivityTimeoutMs ?? UPSTREAM_INACTIVITY_TIMEOUT_MS;
1939
+ const reader = body.getReader();
1940
+ let bytesRelayed = 0;
1941
+ let upstreamFinished = false;
1942
+ let consumerCancelled = false;
1943
+ const safeClose = (controller) => {
1944
+ try {
1945
+ controller.close();
1946
+ } catch {}
1947
+ };
1948
+ return new ReadableStream({
1949
+ async pull(controller) {
1950
+ if (consumerCancelled || upstreamFinished) {
1951
+ safeClose(controller);
1952
+ return;
1953
+ }
1954
+ try {
1955
+ const result = await readWithInactivityTimeout(reader, inactivityMs);
1956
+ if (consumerCancelled) {
1957
+ safeClose(controller);
1958
+ return;
1959
+ }
1960
+ if (result.done) {
1961
+ if (bytesRelayed === 0) consola.warn(`Upstream returned empty SSE stream at ${opts.routePath}`);
1962
+ upstreamFinished = true;
1963
+ safeClose(controller);
1964
+ return;
1965
+ }
1966
+ if (result.value) {
1967
+ bytesRelayed += result.value.byteLength;
1968
+ try {
1969
+ controller.enqueue(result.value);
1970
+ } catch (enqueueError) {
1971
+ if (isControllerClosedError(enqueueError)) {
1972
+ consumerCancelled = true;
1973
+ return;
1974
+ }
1975
+ throw enqueueError;
1976
+ }
1977
+ }
1978
+ } catch (error) {
1979
+ upstreamFinished = true;
1980
+ if (consumerCancelled) {
1981
+ reader.cancel(error).catch(() => {});
1982
+ safeClose(controller);
1983
+ return;
1984
+ }
1985
+ const errName = error instanceof Error ? error.name : "Error";
1986
+ const errMessage = error instanceof Error ? error.message : String(error);
1987
+ consola.error(`Upstream stream interrupted at ${opts.routePath}: bytes=${bytesRelayed} errType=${errName} message=${JSON.stringify(errMessage)}`);
1988
+ const event = buildAnthropicErrorEvent(errName, errMessage);
1989
+ try {
1990
+ controller.enqueue(ENCODER$2.encode(event));
1991
+ } catch (enqueueError) {
1992
+ if (!isControllerClosedError(enqueueError)) consola.warn(`Could not deliver error event to consumer at ${opts.routePath}: ${enqueueError instanceof Error ? enqueueError.message : String(enqueueError)}`);
1993
+ }
1994
+ reader.cancel(error).catch(() => {});
1995
+ safeClose(controller);
1996
+ }
1997
+ },
1998
+ cancel(reason) {
1999
+ consumerCancelled = true;
2000
+ upstreamFinished = true;
2001
+ reader.cancel(reason).catch(() => {});
2002
+ }
2003
+ });
2004
+ }
2005
+ async function readWithInactivityTimeout(reader, timeoutMs) {
2006
+ let timeoutHandle;
2007
+ const timeoutPromise = new Promise((_, reject) => {
2008
+ timeoutHandle = setTimeout(() => {
2009
+ reject(Object.assign(/* @__PURE__ */ new Error("upstream_inactive"), { name: "InactivityTimeout" }));
2010
+ }, timeoutMs);
2011
+ });
2012
+ timeoutPromise.catch(() => {});
2013
+ try {
2014
+ return await Promise.race([reader.read(), timeoutPromise]);
2015
+ } finally {
2016
+ if (timeoutHandle !== void 0) clearTimeout(timeoutHandle);
2017
+ }
2018
+ }
2019
+ /**
2020
+ * Build the SSE wire bytes for an Anthropic-format streaming error event.
2021
+ * Per Anthropic streaming spec, errors are sent as:
2022
+ * event: error
2023
+ * data: {"type":"error","error":{"type":"...","message":"..."}}
2024
+ */
2025
+ function buildAnthropicErrorEvent(errName, errMessage) {
2026
+ const payload = {
2027
+ type: "error",
2028
+ error: {
2029
+ type: classifyStreamError(errName),
2030
+ message: `Upstream stream interrupted: ${errName}: ${errMessage}`
2031
+ }
2032
+ };
2033
+ return `event: error\ndata: ${JSON.stringify(payload)}\n\n`;
2034
+ }
2035
+ /**
2036
+ * Build the SSE wire bytes for an OpenAI-format streaming error event,
2037
+ * followed by the `data: [DONE]` terminator that OpenAI clients expect.
2038
+ */
2039
+ function buildOpenAIErrorEvent(errName, errMessage) {
2040
+ const payload = { error: {
2041
+ type: classifyStreamError(errName),
2042
+ message: `Upstream stream interrupted: ${errName}: ${errMessage}`
2043
+ } };
2044
+ return `data: ${JSON.stringify(payload)}\n\ndata: [DONE]\n\n`;
2045
+ }
2046
+ function classifyStreamError(errName) {
2047
+ if (errName === "AbortError") return "timeout_error";
2048
+ if (errName === "InactivityTimeout") return "timeout_error";
2049
+ return "api_error";
2050
+ }
2051
+ function logStreamError(routePath, error) {
2052
+ const errName = error instanceof Error ? error.name : "Error";
2053
+ const errMessage = error instanceof Error ? error.message : String(error);
2054
+ consola.error(`Upstream stream interrupted at ${routePath}: errType=${errName} message=${JSON.stringify(errMessage)}`);
2055
+ return {
2056
+ errName,
2057
+ errMessage
2058
+ };
2059
+ }
2060
+
1101
2061
  //#endregion
1102
2062
  //#region src/lib/tokenizer.ts
1103
2063
  const ENCODING_MAP = {
@@ -1296,20 +2256,29 @@ const getTokenCount = async (payload, model) => {
1296
2256
 
1297
2257
  //#endregion
1298
2258
  //#region src/services/copilot/create-chat-completions.ts
1299
- const createChatCompletions = async (payload, modelHeaders) => {
2259
+ const createChatCompletions = async (payload, modelHeaders, callerSignal) => {
1300
2260
  if (!state.copilotToken) throw new Error("Copilot token not found");
1301
2261
  const enableVision = payload.messages.some((x) => typeof x.content !== "string" && x.content?.some((x$1) => x$1.type === "image_url"));
1302
2262
  const isAgentCall = payload.messages.some((msg) => ["assistant", "tool"].includes(msg.role));
1303
- const headers = {
1304
- ...copilotHeaders(state, enableVision),
1305
- ...modelHeaders,
1306
- "X-Initiator": isAgentCall ? "agent" : "user"
2263
+ const url = `${copilotBaseUrl(state)}/chat/completions`;
2264
+ const doFetch = () => {
2265
+ const fetchInit = {
2266
+ method: "POST",
2267
+ headers: {
2268
+ ...copilotHeaders(state, enableVision),
2269
+ ...modelHeaders,
2270
+ "X-Initiator": isAgentCall ? "agent" : "user"
2271
+ },
2272
+ body: JSON.stringify(payload)
2273
+ };
2274
+ const signals = [];
2275
+ if (UPSTREAM_FETCH_TIMEOUT_MS > 0) signals.push(AbortSignal.timeout(UPSTREAM_FETCH_TIMEOUT_MS));
2276
+ if (callerSignal) signals.push(callerSignal);
2277
+ if (signals.length === 1) fetchInit.signal = signals[0];
2278
+ else if (signals.length > 1) fetchInit.signal = AbortSignal.any(signals);
2279
+ return fetch(url, fetchInit);
1307
2280
  };
1308
- const response = await fetch(`${copilotBaseUrl(state)}/chat/completions`, {
1309
- method: "POST",
1310
- headers,
1311
- body: JSON.stringify(payload)
1312
- });
2281
+ const response = await tryRefreshAndRetry(doFetch, "/chat/completions");
1313
2282
  if (!response.ok) {
1314
2283
  let errorBody = "";
1315
2284
  try {
@@ -1352,23 +2321,28 @@ const InnerSchema = z.object({
1352
2321
  annotations: z.array(z.object({ url_citation: z.object({
1353
2322
  title: z.string(),
1354
2323
  url: z.string()
1355
- }).optional() })).optional()
2324
+ }).optional() })).nullable().optional()
1356
2325
  }),
1357
- bing_searches: z.array(z.unknown()).optional()
2326
+ bing_searches: z.array(z.unknown()).nullable().optional()
1358
2327
  });
1359
2328
  const MAX_SEARCHES_PER_SECOND = 3;
1360
2329
  let searchTimestamps = [];
2330
+ let throttleChain = Promise.resolve();
1361
2331
  async function throttleSearch() {
1362
- const now = Date.now();
1363
- searchTimestamps = searchTimestamps.filter((t) => now - t < 1e3);
1364
- if (searchTimestamps.length >= MAX_SEARCHES_PER_SECOND) {
1365
- const waitMs = 1e3 - (now - searchTimestamps[0]);
1366
- if (waitMs > 0) {
1367
- consola.debug(`Web search rate limited, waiting ${waitMs}ms`);
1368
- await sleep(waitMs);
2332
+ const myTurn = throttleChain.then(async () => {
2333
+ const now = Date.now();
2334
+ searchTimestamps = searchTimestamps.filter((t) => now - t < 1e3);
2335
+ if (searchTimestamps.length >= MAX_SEARCHES_PER_SECOND) {
2336
+ const waitMs = 1e3 - (now - searchTimestamps[0]);
2337
+ if (waitMs > 0) {
2338
+ consola.debug(`Web search rate limited, waiting ${waitMs}ms`);
2339
+ await sleep(waitMs);
2340
+ }
1369
2341
  }
1370
- }
1371
- searchTimestamps.push(Date.now());
2342
+ searchTimestamps.push(Date.now());
2343
+ });
2344
+ throttleChain = myTurn.catch(() => {});
2345
+ return myTurn;
1372
2346
  }
1373
2347
  function mcpHeaders(sid) {
1374
2348
  if (!state.githubToken) throw new Error("GitHub token missing — re-run auth flow. Web search uses the GitHub PAT (not the Copilot token); the on-disk token at ~/.local/share/github-router/github_token must be present.");
@@ -1497,6 +2471,14 @@ async function searchWeb(query) {
1497
2471
 
1498
2472
  //#endregion
1499
2473
  //#region src/routes/chat-completions/handler.ts
2474
+ const ENCODER$1 = new TextEncoder();
2475
+ function formatSSE$1(chunk) {
2476
+ const parts = [];
2477
+ if (chunk.event) parts.push(`event: ${chunk.event}`);
2478
+ if (chunk.data !== void 0) for (const line of String(chunk.data).split(/\r\n|\r|\n/)) parts.push(`data: ${line}`);
2479
+ if (chunk.id !== void 0) parts.push(`id: ${String(chunk.id)}`);
2480
+ return parts.join("\n") + "\n\n";
2481
+ }
1500
2482
  async function handleCompletion$1(c) {
1501
2483
  const startTime = Date.now();
1502
2484
  await checkRateLimit(state);
@@ -1545,16 +2527,91 @@ async function handleCompletion$1(c) {
1545
2527
  inputTokens,
1546
2528
  outputTokens,
1547
2529
  status: 200,
1548
- streaming: isStreaming
1549
- }, selectedModel, startTime);
1550
- if (!isStreaming) {
1551
- if (debugEnabled) consola.debug("Non-streaming response:", JSON.stringify(response));
1552
- return c.json(response);
1553
- }
1554
- return streamSSE(c, async (stream) => {
1555
- for await (const chunk of response) {
1556
- if (debugEnabled) consola.debug("Streaming chunk:", JSON.stringify(chunk));
1557
- await stream.writeSSE(chunk);
2530
+ streaming: isStreaming
2531
+ }, selectedModel, startTime);
2532
+ if (!isStreaming) {
2533
+ if (debugEnabled) consola.debug("Non-streaming response:", JSON.stringify(response));
2534
+ return c.json(response);
2535
+ }
2536
+ const iterator = response[Symbol.asyncIterator]();
2537
+ const firstResult = await iterator.next();
2538
+ if (firstResult.done) consola.warn(`Upstream /chat/completions returned an empty stream at ${c.req.path}`);
2539
+ let pendingFirstChunk = firstResult.done ? void 0 : firstResult.value;
2540
+ let upstreamFinished = firstResult.done;
2541
+ let consumerCancelled = false;
2542
+ const safeClose = (controller) => {
2543
+ try {
2544
+ controller.close();
2545
+ } catch {}
2546
+ };
2547
+ const releaseUpstream = (reason) => {
2548
+ if (typeof iterator.return === "function") iterator.return(reason).catch(() => {});
2549
+ };
2550
+ const safeEnqueue = (controller, bytes) => {
2551
+ try {
2552
+ controller.enqueue(bytes);
2553
+ return true;
2554
+ } catch (e) {
2555
+ if (isControllerClosedError(e)) {
2556
+ consumerCancelled = true;
2557
+ releaseUpstream(e);
2558
+ return false;
2559
+ }
2560
+ throw e;
2561
+ }
2562
+ };
2563
+ return new Response(new ReadableStream({
2564
+ async pull(controller) {
2565
+ if (consumerCancelled || upstreamFinished) {
2566
+ safeClose(controller);
2567
+ return;
2568
+ }
2569
+ if (pendingFirstChunk !== void 0) {
2570
+ const chunk = pendingFirstChunk;
2571
+ pendingFirstChunk = void 0;
2572
+ if (debugEnabled) consola.debug("Streaming chunk:", JSON.stringify(chunk));
2573
+ safeEnqueue(controller, ENCODER$1.encode(formatSSE$1(chunk)));
2574
+ return;
2575
+ }
2576
+ try {
2577
+ const result = await iterator.next();
2578
+ if (consumerCancelled) {
2579
+ safeClose(controller);
2580
+ return;
2581
+ }
2582
+ if (result.done) {
2583
+ upstreamFinished = true;
2584
+ safeClose(controller);
2585
+ return;
2586
+ }
2587
+ if (result.value === void 0 || result.value === null) return;
2588
+ if (debugEnabled) consola.debug("Streaming chunk:", JSON.stringify(result.value));
2589
+ safeEnqueue(controller, ENCODER$1.encode(formatSSE$1(result.value)));
2590
+ } catch (error) {
2591
+ upstreamFinished = true;
2592
+ if (consumerCancelled) {
2593
+ releaseUpstream(error);
2594
+ safeClose(controller);
2595
+ return;
2596
+ }
2597
+ const { errName, errMessage } = logStreamError(c.req.path, error);
2598
+ safeEnqueue(controller, ENCODER$1.encode(buildOpenAIErrorEvent(errName, errMessage)));
2599
+ releaseUpstream(error);
2600
+ safeClose(controller);
2601
+ }
2602
+ },
2603
+ cancel() {
2604
+ consumerCancelled = true;
2605
+ upstreamFinished = true;
2606
+ releaseUpstream();
2607
+ }
2608
+ }), {
2609
+ status: 200,
2610
+ headers: {
2611
+ "content-type": "text/event-stream",
2612
+ "cache-control": "no-cache",
2613
+ "transfer-encoding": "chunked",
2614
+ connection: "keep-alive"
1558
2615
  }
1559
2616
  });
1560
2617
  }
@@ -1637,6 +2694,450 @@ embeddingRoutes.post("/", async (c) => {
1637
2694
  }
1638
2695
  });
1639
2696
 
2697
+ //#endregion
2698
+ //#region src/services/copilot/create-responses.ts
2699
+ const createResponses = async (payload, modelHeaders, callerSignal) => {
2700
+ if (!state.copilotToken) throw new Error("Copilot token not found");
2701
+ const enableVision = detectVision(payload.input);
2702
+ const isAgentCall = detectAgentCall(payload.input);
2703
+ const url = `${copilotBaseUrl(state)}/responses`;
2704
+ const doFetch = () => {
2705
+ const fetchInit = {
2706
+ method: "POST",
2707
+ headers: {
2708
+ ...copilotHeaders(state, enableVision),
2709
+ ...modelHeaders,
2710
+ "X-Initiator": isAgentCall ? "agent" : "user"
2711
+ },
2712
+ body: JSON.stringify(payload)
2713
+ };
2714
+ const signals = [];
2715
+ if (UPSTREAM_FETCH_TIMEOUT_MS > 0) signals.push(AbortSignal.timeout(UPSTREAM_FETCH_TIMEOUT_MS));
2716
+ if (callerSignal) signals.push(callerSignal);
2717
+ if (signals.length === 1) fetchInit.signal = signals[0];
2718
+ else if (signals.length > 1) fetchInit.signal = AbortSignal.any(signals);
2719
+ return fetch(url, fetchInit);
2720
+ };
2721
+ const response = await tryRefreshAndRetry(doFetch, "/responses");
2722
+ if (!response.ok) {
2723
+ consola.error("Failed to create responses", response);
2724
+ throw new HTTPError("Failed to create responses", response);
2725
+ }
2726
+ if (payload.stream) return events(response);
2727
+ return await response.json();
2728
+ };
2729
+ function detectVision(input) {
2730
+ if (typeof input === "string") return false;
2731
+ if (!Array.isArray(input)) return false;
2732
+ return input.some((item) => {
2733
+ if ("content" in item && Array.isArray(item.content)) return item.content.some((part) => part.type === "input_image");
2734
+ return false;
2735
+ });
2736
+ }
2737
+ function detectAgentCall(input) {
2738
+ if (typeof input === "string") return false;
2739
+ if (!Array.isArray(input)) return false;
2740
+ return input.some((item) => {
2741
+ if ("role" in item && item.role === "assistant") return true;
2742
+ if ("type" in item && (item.type === "function_call" || item.type === "function_call_output")) return true;
2743
+ return false;
2744
+ });
2745
+ }
2746
+
2747
+ //#endregion
2748
+ //#region src/routes/mcp/handler.ts
2749
+ const MCP_PROTOCOL_VERSION = "2025-06-18";
2750
+ const SERVER_NAME = "github-router-peers";
2751
+ const SERVER_VERSION = "1";
2752
+ /**
2753
+ * Reasoning effort levels accepted by Copilot's /v1/responses (gpt-5.x) and
2754
+ * /v1/chat/completions endpoints. Per the proxy's existing thinking-mode
2755
+ * translator (CLAUDE.md "Thinking-mode translation"), Copilot's adaptive-
2756
+ * thinking path uses these same buckets:
2757
+ * <2k tokens → low, <8k → medium, <24k → high, else → xhigh.
2758
+ *
2759
+ * Default `high` for peer reviews — adversarial-by-design but still cost-
2760
+ * conscious. Callers can pass `xhigh` explicitly for deep dives, or `medium`
2761
+ * for quick sanity checks.
2762
+ */
2763
+ const EFFORT_LEVELS = [
2764
+ "low",
2765
+ "medium",
2766
+ "high",
2767
+ "xhigh"
2768
+ ];
2769
+ const DEFAULT_EFFORT = "high";
2770
+ function isEffort(v) {
2771
+ return typeof v === "string" && EFFORT_LEVELS.includes(v);
2772
+ }
2773
+ /** Bounded concurrency. Originally capped at 2 (commit 4317a25) as a defensive
2774
+ * pre-launch guess against Opus's natural pattern of fanning out to all three
2775
+ * critics at once. Raised to 8 (Phase 2D of the peer-MCP plan) so the
2776
+ * decomposition pattern Phase 2B teaches Opus — "split a >20 KB artifact
2777
+ * into 2-4 batches and call in parallel" — can actually run in parallel
2778
+ * without the (3+)th call returning isError "queue full". The persona
2779
+ * handlers (`callPersona`) hold no shared mutable state — there's no race
2780
+ * the cap is hiding; the upstream Copilot's own rate-limit (surfaced as a
2781
+ * per-call 429 → tool isError) is the real backpressure mechanism. 8 covers
2782
+ * a 7-fork wave with one slot of headroom and is still a hard upper bound
2783
+ * against runaway clients. See docs/research/peer-mcp-investigation.md
2784
+ * § "Concurrency cap investigation" for the full justification. */
2785
+ const MAX_INFLIGHT_TOOLS_CALL = 8;
2786
+ let inFlightToolsCall = 0;
2787
+ const RPC_PARSE_ERROR = -32700;
2788
+ const RPC_INVALID_REQUEST = -32600;
2789
+ const RPC_METHOD_NOT_FOUND = -32601;
2790
+ const RPC_INVALID_PARAMS = -32602;
2791
+ const RPC_INTERNAL_ERROR = -32603;
2792
+ function rpcError(id, code, message, data) {
2793
+ return {
2794
+ jsonrpc: "2.0",
2795
+ id: id ?? null,
2796
+ error: data === void 0 ? {
2797
+ code,
2798
+ message
2799
+ } : {
2800
+ code,
2801
+ message,
2802
+ data
2803
+ }
2804
+ };
2805
+ }
2806
+ function rpcResult(id, result) {
2807
+ return {
2808
+ jsonrpc: "2.0",
2809
+ id: id ?? null,
2810
+ result
2811
+ };
2812
+ }
2813
+ function isLoopbackHost(host) {
2814
+ if (!host) return false;
2815
+ const idx = host.lastIndexOf(":");
2816
+ const hostname = idx >= 0 ? host.slice(0, idx) : host;
2817
+ return hostname === "127.0.0.1" || hostname === "localhost";
2818
+ }
2819
+ /**
2820
+ * Constant-time bearer compare. Random per-launch nonces aren't really
2821
+ * timing-attackable in practice, but this costs nothing.
2822
+ */
2823
+ function nonceMatches(provided, expected) {
2824
+ if (provided.length !== expected.length) return false;
2825
+ const a = Buffer.from(provided);
2826
+ const b = Buffer.from(expected);
2827
+ try {
2828
+ return timingSafeEqual(a, b);
2829
+ } catch {
2830
+ return false;
2831
+ }
2832
+ }
2833
+ function checkAuth(c) {
2834
+ if (!isLoopbackHost(c.req.header("host"))) return {
2835
+ ok: false,
2836
+ status: 403,
2837
+ reason: "non-loopback Host header rejected"
2838
+ };
2839
+ const expected = state.peerMcpNonce;
2840
+ if (!expected) return {
2841
+ ok: false,
2842
+ status: 401,
2843
+ reason: "/mcp not enabled in this proxy session"
2844
+ };
2845
+ const auth$1 = c.req.header("authorization") ?? "";
2846
+ const m = /^Bearer\s+(.+)$/i.exec(auth$1);
2847
+ if (!m || !nonceMatches(m[1], expected)) return {
2848
+ ok: false,
2849
+ status: 401,
2850
+ reason: "missing or invalid Authorization bearer"
2851
+ };
2852
+ return { ok: true };
2853
+ }
2854
+ function geminiAvailable() {
2855
+ const models = state.models?.data;
2856
+ if (!models) return false;
2857
+ return models.some((m) => /^gemini-3\..*pro/i.test(m.id));
2858
+ }
2859
+ function activePersonas() {
2860
+ return PERSONAS_READ.filter((p) => !p.requiresHttp || geminiAvailable());
2861
+ }
2862
+ function toolEntries() {
2863
+ return activePersonas().map((p) => ({
2864
+ name: p.toolNameHttp,
2865
+ description: p.description,
2866
+ inputSchema: {
2867
+ type: "object",
2868
+ required: ["prompt"],
2869
+ additionalProperties: false,
2870
+ properties: {
2871
+ prompt: {
2872
+ type: "string",
2873
+ description: "The lead's brief — the artifact under review plus constraints."
2874
+ },
2875
+ context: {
2876
+ type: "string",
2877
+ description: "Optional additional context (extra file content, prior decisions). Concatenated to the brief before sending."
2878
+ },
2879
+ effort: {
2880
+ type: "string",
2881
+ enum: [...EFFORT_LEVELS],
2882
+ description: `Reasoning depth (low | medium | high | xhigh). Default "${DEFAULT_EFFORT}". Use 'xhigh' for explicit deep dives where you want maximum reasoning. Use 'medium' for quick sanity checks. Note: for non-OpenAI models routed via /v1/chat/completions (gemini-3.x), the upstream may silently ignore this knob.`
2883
+ }
2884
+ }
2885
+ }
2886
+ }));
2887
+ }
2888
+ function buildUserText(prompt, context) {
2889
+ if (!context) return prompt;
2890
+ return `${prompt}\n\n---\n\nAdditional context:\n${context}`;
2891
+ }
2892
+ function extractResponsesText(response) {
2893
+ const out = [];
2894
+ for (const item of response.output) {
2895
+ if (typeof item !== "object" || item === null) continue;
2896
+ const obj = item;
2897
+ if (obj.type !== "message" || obj.role !== "assistant") continue;
2898
+ const content = obj.content;
2899
+ if (!Array.isArray(content)) continue;
2900
+ for (const part of content) {
2901
+ if (typeof part !== "object" || part === null) continue;
2902
+ const p = part;
2903
+ if ((p.type === "output_text" || p.type === "text") && typeof p.text === "string") out.push(p.text);
2904
+ }
2905
+ }
2906
+ return out.join("");
2907
+ }
2908
+ function extractChatCompletionText(response) {
2909
+ const choice = response.choices?.[0];
2910
+ if (!choice) return "";
2911
+ const c = choice.message?.content;
2912
+ return typeof c === "string" ? c : "";
2913
+ }
2914
+ function toolError(message) {
2915
+ return {
2916
+ content: [{
2917
+ type: "text",
2918
+ text: message
2919
+ }],
2920
+ isError: true
2921
+ };
2922
+ }
2923
+ async function callPersona(persona, prompt, context, effort) {
2924
+ const resolvedModel = resolveModel(persona.model);
2925
+ const userText = buildUserText(prompt, context);
2926
+ if (persona.endpoint === "/v1/responses") {
2927
+ const text$1 = extractResponsesText(await createResponses({
2928
+ model: resolvedModel,
2929
+ instructions: persona.baseInstructions,
2930
+ input: [{
2931
+ role: "user",
2932
+ content: [{
2933
+ type: "input_text",
2934
+ text: userText
2935
+ }]
2936
+ }],
2937
+ stream: false,
2938
+ reasoning: { effort }
2939
+ }));
2940
+ if (!text$1) return toolError(`persona ${persona.agentName}: empty assistant output`);
2941
+ return { content: [{
2942
+ type: "text",
2943
+ text: text$1
2944
+ }] };
2945
+ }
2946
+ const text = extractChatCompletionText(await createChatCompletions({
2947
+ model: resolvedModel,
2948
+ messages: [{
2949
+ role: "system",
2950
+ content: persona.baseInstructions
2951
+ }, {
2952
+ role: "user",
2953
+ content: userText
2954
+ }],
2955
+ stream: false,
2956
+ reasoning_effort: effort
2957
+ }));
2958
+ if (!text) return toolError(`persona ${persona.agentName}: empty assistant output`);
2959
+ return { content: [{
2960
+ type: "text",
2961
+ text
2962
+ }] };
2963
+ }
2964
+ function logTelemetry(t) {
2965
+ const parts = [
2966
+ `[peer-mcp]`,
2967
+ `name=${t.name}`,
2968
+ `model=${t.model}`,
2969
+ `duration_ms=${t.durationMs}`,
2970
+ `result=${t.result}`
2971
+ ];
2972
+ if (t.errorMessage) parts.push(`error=${JSON.stringify(t.errorMessage)}`);
2973
+ process.stderr.write(parts.join(" ") + "\n");
2974
+ }
2975
+ async function handleToolsCall(body) {
2976
+ const params = body.params ?? {};
2977
+ const name$1 = typeof params.name === "string" ? params.name : "";
2978
+ const args = params.arguments ?? {};
2979
+ const prompt = typeof args.prompt === "string" ? args.prompt : "";
2980
+ const context = typeof args.context === "string" ? args.context : void 0;
2981
+ let effort = DEFAULT_EFFORT;
2982
+ if (args.effort !== void 0) {
2983
+ if (!isEffort(args.effort)) return rpcError(body.id, RPC_INVALID_PARAMS, `tools/call: arguments.effort must be one of ${EFFORT_LEVELS.join("|")}; got ${JSON.stringify(args.effort)}`);
2984
+ effort = args.effort;
2985
+ }
2986
+ if (!name$1) return rpcError(body.id, RPC_INVALID_PARAMS, "tools/call missing name");
2987
+ const persona = activePersonas().find((p) => p.toolNameHttp === name$1);
2988
+ if (!persona) return rpcError(body.id, RPC_METHOD_NOT_FOUND, `tools/call: unknown tool "${name$1}"`);
2989
+ if (!prompt) return rpcError(body.id, RPC_INVALID_PARAMS, `tools/call: arguments.prompt is required`);
2990
+ if (inFlightToolsCall >= MAX_INFLIGHT_TOOLS_CALL) return rpcResult(body.id, {
2991
+ content: [{
2992
+ type: "text",
2993
+ text: `Peer MCP queue full (${MAX_INFLIGHT_TOOLS_CALL} in-flight). Retry shortly, or wait for the current persona calls to complete.`
2994
+ }],
2995
+ isError: true
2996
+ });
2997
+ inFlightToolsCall++;
2998
+ const startedAt = Date.now();
2999
+ try {
3000
+ const result = await callPersona(persona, prompt, context, effort);
3001
+ logTelemetry({
3002
+ name: persona.agentName,
3003
+ model: persona.model,
3004
+ durationMs: Date.now() - startedAt,
3005
+ result: result.isError ? "isError" : "ok"
3006
+ });
3007
+ return rpcResult(body.id, result);
3008
+ } catch (err) {
3009
+ const message = err instanceof Error ? err.message : String(err);
3010
+ logTelemetry({
3011
+ name: persona.agentName,
3012
+ model: persona.model,
3013
+ durationMs: Date.now() - startedAt,
3014
+ result: "exception",
3015
+ errorMessage: message
3016
+ });
3017
+ return rpcResult(body.id, {
3018
+ content: [{
3019
+ type: "text",
3020
+ text: `persona ${persona.agentName} failed: ${message}`
3021
+ }],
3022
+ isError: true
3023
+ });
3024
+ } finally {
3025
+ inFlightToolsCall--;
3026
+ }
3027
+ }
3028
+ async function handleRpc(_c, body) {
3029
+ if (body === null || typeof body !== "object" || Array.isArray(body)) return {
3030
+ status: 200,
3031
+ body: rpcError(null, RPC_INVALID_REQUEST, "jsonrpc 2.0 envelope required")
3032
+ };
3033
+ if (body.jsonrpc !== "2.0" || typeof body.method !== "string") return {
3034
+ status: 200,
3035
+ body: rpcError(body.id ?? null, RPC_INVALID_REQUEST, "jsonrpc 2.0 envelope required")
3036
+ };
3037
+ const isNotification = body.id === void 0;
3038
+ switch (body.method) {
3039
+ case "initialize":
3040
+ if (isNotification) return {
3041
+ status: 202,
3042
+ body: null
3043
+ };
3044
+ return {
3045
+ status: 200,
3046
+ body: rpcResult(body.id, {
3047
+ protocolVersion: MCP_PROTOCOL_VERSION,
3048
+ capabilities: { tools: { listChanged: false } },
3049
+ serverInfo: {
3050
+ name: SERVER_NAME,
3051
+ version: SERVER_VERSION
3052
+ }
3053
+ })
3054
+ };
3055
+ case "notifications/initialized": return {
3056
+ status: 202,
3057
+ body: null
3058
+ };
3059
+ case "tools/list":
3060
+ if (isNotification) return {
3061
+ status: 202,
3062
+ body: null
3063
+ };
3064
+ return {
3065
+ status: 200,
3066
+ body: rpcResult(body.id, { tools: toolEntries() })
3067
+ };
3068
+ case "tools/call":
3069
+ if (isNotification) return {
3070
+ status: 202,
3071
+ body: null
3072
+ };
3073
+ return {
3074
+ status: 200,
3075
+ body: await handleToolsCall(body)
3076
+ };
3077
+ case "ping":
3078
+ if (isNotification) return {
3079
+ status: 202,
3080
+ body: null
3081
+ };
3082
+ return {
3083
+ status: 200,
3084
+ body: rpcResult(body.id, {})
3085
+ };
3086
+ default:
3087
+ if (isNotification) return {
3088
+ status: 202,
3089
+ body: null
3090
+ };
3091
+ return {
3092
+ status: 200,
3093
+ body: rpcError(body.id, RPC_METHOD_NOT_FOUND, `unknown method: ${body.method}`)
3094
+ };
3095
+ }
3096
+ }
3097
+ async function handleMcpPost(c) {
3098
+ const auth$1 = checkAuth(c);
3099
+ if (!auth$1.ok) return c.json(rpcError(null, RPC_INVALID_REQUEST, auth$1.reason), auth$1.status);
3100
+ let body;
3101
+ try {
3102
+ body = await c.req.json();
3103
+ } catch (err) {
3104
+ consola.debug("/mcp parse error:", err);
3105
+ return c.json(rpcError(null, RPC_PARSE_ERROR, "request body is not valid JSON"), 200);
3106
+ }
3107
+ try {
3108
+ const { status, body: respBody } = await handleRpc(c, body);
3109
+ if (respBody === null) return c.body(null, status);
3110
+ return c.json(respBody, status);
3111
+ } catch (err) {
3112
+ consola.error("/mcp handler error:", err);
3113
+ const echoId = typeof body === "object" && body !== null && !Array.isArray(body) ? body.id ?? null : null;
3114
+ return c.json(rpcError(echoId, RPC_INTERNAL_ERROR, err instanceof Error ? err.message : String(err)), 200);
3115
+ }
3116
+ }
3117
+ function handleMcpDelete(c) {
3118
+ const auth$1 = checkAuth(c);
3119
+ if (!auth$1.ok) return c.json(rpcError(null, RPC_INVALID_REQUEST, auth$1.reason), auth$1.status);
3120
+ return c.body(null, 200);
3121
+ }
3122
+
3123
+ //#endregion
3124
+ //#region src/routes/mcp/route.ts
3125
+ const mcpRoutes = new Hono();
3126
+ mcpRoutes.post("/", async (c) => {
3127
+ try {
3128
+ return await handleMcpPost(c);
3129
+ } catch (error) {
3130
+ return await forwardError(c, error);
3131
+ }
3132
+ });
3133
+ mcpRoutes.delete("/", (c) => {
3134
+ try {
3135
+ return handleMcpDelete(c);
3136
+ } catch {
3137
+ return c.body(null, 500);
3138
+ }
3139
+ });
3140
+
1640
3141
  //#endregion
1641
3142
  //#region src/services/copilot/create-messages.ts
1642
3143
  /**
@@ -1676,14 +3177,18 @@ function buildHeaders(extraHeaders) {
1676
3177
  */
1677
3178
  async function createMessages(body, extraHeaders) {
1678
3179
  if (!state.copilotToken) throw new Error("Copilot token not found");
1679
- const headers = buildHeaders(extraHeaders);
1680
3180
  const url = `${copilotBaseUrl(state)}/v1/messages?beta=true`;
1681
3181
  consola.debug(`Forwarding to ${url}`);
1682
- const response = await fetch(url, {
1683
- method: "POST",
1684
- headers,
1685
- body
1686
- });
3182
+ const doFetch = () => {
3183
+ const fetchInit = {
3184
+ method: "POST",
3185
+ headers: buildHeaders(extraHeaders),
3186
+ body
3187
+ };
3188
+ if (UPSTREAM_FETCH_TIMEOUT_MS > 0) fetchInit.signal = AbortSignal.timeout(UPSTREAM_FETCH_TIMEOUT_MS);
3189
+ return fetch(url, fetchInit);
3190
+ };
3191
+ const response = await tryRefreshAndRetry(doFetch, "/v1/messages");
1687
3192
  if (!response.ok) {
1688
3193
  let errorBody = "";
1689
3194
  try {
@@ -1706,14 +3211,18 @@ async function createMessages(body, extraHeaders) {
1706
3211
  */
1707
3212
  async function countTokens(body, extraHeaders) {
1708
3213
  if (!state.copilotToken) throw new Error("Copilot token not found");
1709
- const headers = buildHeaders(extraHeaders);
1710
3214
  const url = `${copilotBaseUrl(state)}/v1/messages/count_tokens?beta=true`;
1711
3215
  consola.debug(`Forwarding to ${url}`);
1712
- const response = await fetch(url, {
1713
- method: "POST",
1714
- headers,
1715
- body
1716
- });
3216
+ const doFetch = () => {
3217
+ const fetchInit = {
3218
+ method: "POST",
3219
+ headers: buildHeaders(extraHeaders),
3220
+ body
3221
+ };
3222
+ if (UPSTREAM_FETCH_TIMEOUT_MS > 0) fetchInit.signal = AbortSignal.timeout(UPSTREAM_FETCH_TIMEOUT_MS);
3223
+ return fetch(url, fetchInit);
3224
+ };
3225
+ const response = await tryRefreshAndRetry(doFetch, "/v1/messages/count_tokens");
1717
3226
  if (!response.ok) {
1718
3227
  let errorBody = "";
1719
3228
  try {
@@ -1731,6 +3240,22 @@ async function countTokens(body, extraHeaders) {
1731
3240
  return response;
1732
3241
  }
1733
3242
 
3243
+ //#endregion
3244
+ //#region src/lib/diagnose-response.ts
3245
+ const PREVIEW_LIMIT = 200;
3246
+ async function parseJsonOrDiagnose(response, routePath) {
3247
+ const cloned = response.clone();
3248
+ try {
3249
+ return await response.json();
3250
+ } catch (error) {
3251
+ const contentType = response.headers.get("content-type") ?? "(none)";
3252
+ const bodyText = await cloned.text().catch(() => "(unreadable)");
3253
+ const preview = bodyText.length > PREVIEW_LIMIT ? bodyText.slice(0, PREVIEW_LIMIT) + "...(truncated)" : bodyText;
3254
+ consola.error(`Upstream JSON parse failed at ${routePath}: status=${response.status} content-type="${contentType}" body[0..${PREVIEW_LIMIT}]=${JSON.stringify(preview)}`);
3255
+ throw error;
3256
+ }
3257
+ }
3258
+
1734
3259
  //#endregion
1735
3260
  //#region src/routes/messages/count-tokens-handler.ts
1736
3261
  const isWebSearchTool$1 = (tool) => typeof tool.type === "string" && tool.type.startsWith("web_search") || tool.name === "web_search";
@@ -1778,7 +3303,7 @@ async function handleCountTokens(c) {
1778
3303
  ...selectedModel?.requestHeaders,
1779
3304
  ...extraHeaders
1780
3305
  });
1781
- const responseBody = await response.json();
3306
+ const responseBody = await parseJsonOrDiagnose(response, c.req.path);
1782
3307
  logRequest({
1783
3308
  method: "POST",
1784
3309
  path: c.req.path,
@@ -1969,7 +3494,17 @@ async function handleCompletion(c) {
1969
3494
  }
1970
3495
  throw error;
1971
3496
  }
1972
- if ((response.headers.get("content-type") ?? "").includes("text/event-stream")) {
3497
+ const contentType = response.headers.get("content-type") ?? "";
3498
+ const clientAcceptsSSE = (c.req.header("accept") ?? "").includes("text/event-stream");
3499
+ let isStreaming = contentType.includes("text/event-stream");
3500
+ if (!isStreaming && clientAcceptsSSE) {
3501
+ if (contentType === "" || contentType === "application/octet-stream") {
3502
+ consola.warn(`Upstream /v1/messages returned status=${response.status} content-type=${JSON.stringify(contentType)} but client requested streaming; treating response body as SSE`);
3503
+ isStreaming = true;
3504
+ }
3505
+ }
3506
+ if (debugEnabled) consola.debug(`Upstream /v1/messages: status=${response.status} content-type="${contentType}" isStreaming=${isStreaming}`);
3507
+ if (isStreaming) {
1973
3508
  logRequest({
1974
3509
  method: "POST",
1975
3510
  path: c.req.path,
@@ -1982,18 +3517,19 @@ async function handleCompletion(c) {
1982
3517
  const streamHeaders = {
1983
3518
  "content-type": "text/event-stream",
1984
3519
  "cache-control": "no-cache",
3520
+ "transfer-encoding": "chunked",
1985
3521
  connection: "keep-alive"
1986
3522
  };
1987
3523
  const requestId = response.headers.get("x-request-id");
1988
3524
  if (requestId) streamHeaders["x-request-id"] = requestId;
1989
3525
  const reqId = response.headers.get("request-id");
1990
3526
  if (reqId) streamHeaders["request-id"] = reqId;
1991
- return new Response(response.body, {
3527
+ return new Response(response.body ? relayAnthropicStream(response.body, { routePath: c.req.path }) : null, {
1992
3528
  status: response.status,
1993
3529
  headers: streamHeaders
1994
3530
  });
1995
3531
  }
1996
- const responseBody = await response.json();
3532
+ const responseBody = await parseJsonOrDiagnose(response, c.req.path);
1997
3533
  logRequest({
1998
3534
  method: "POST",
1999
3535
  path: c.req.path,
@@ -2200,49 +3736,16 @@ modelRoutes.get("/", async (c) => {
2200
3736
  }
2201
3737
  });
2202
3738
 
2203
- //#endregion
2204
- //#region src/services/copilot/create-responses.ts
2205
- const createResponses = async (payload, modelHeaders) => {
2206
- if (!state.copilotToken) throw new Error("Copilot token not found");
2207
- const enableVision = detectVision(payload.input);
2208
- const isAgentCall = detectAgentCall(payload.input);
2209
- const headers = {
2210
- ...copilotHeaders(state, enableVision),
2211
- ...modelHeaders,
2212
- "X-Initiator": isAgentCall ? "agent" : "user"
2213
- };
2214
- const response = await fetch(`${copilotBaseUrl(state)}/responses`, {
2215
- method: "POST",
2216
- headers,
2217
- body: JSON.stringify(payload)
2218
- });
2219
- if (!response.ok) {
2220
- consola.error("Failed to create responses", response);
2221
- throw new HTTPError("Failed to create responses", response);
2222
- }
2223
- if (payload.stream) return events(response);
2224
- return await response.json();
2225
- };
2226
- function detectVision(input) {
2227
- if (typeof input === "string") return false;
2228
- if (!Array.isArray(input)) return false;
2229
- return input.some((item) => {
2230
- if ("content" in item && Array.isArray(item.content)) return item.content.some((part) => part.type === "input_image");
2231
- return false;
2232
- });
2233
- }
2234
- function detectAgentCall(input) {
2235
- if (typeof input === "string") return false;
2236
- if (!Array.isArray(input)) return false;
2237
- return input.some((item) => {
2238
- if ("role" in item && item.role === "assistant") return true;
2239
- if ("type" in item && (item.type === "function_call" || item.type === "function_call_output")) return true;
2240
- return false;
2241
- });
2242
- }
2243
-
2244
3739
  //#endregion
2245
3740
  //#region src/routes/responses/handler.ts
3741
+ const ENCODER = new TextEncoder();
3742
+ function formatSSE(chunk) {
3743
+ const parts = [];
3744
+ if (chunk.event) parts.push(`event: ${chunk.event}`);
3745
+ if (chunk.data !== void 0) for (const line of String(chunk.data).split(/\r\n|\r|\n/)) parts.push(`data: ${line}`);
3746
+ if (chunk.id !== void 0) parts.push(`id: ${String(chunk.id)}`);
3747
+ return parts.join("\n") + "\n\n";
3748
+ }
2246
3749
  async function handleResponses(c) {
2247
3750
  const startTime = Date.now();
2248
3751
  await checkRateLimit(state);
@@ -2283,16 +3786,106 @@ async function handleResponses(c) {
2283
3786
  if (debugEnabled) consola.debug("Non-streaming response:", JSON.stringify(response));
2284
3787
  return c.json(response);
2285
3788
  }
2286
- return streamSSE(c, async (stream) => {
2287
- for await (const chunk of response) {
2288
- if (debugEnabled) consola.debug("Streaming chunk:", JSON.stringify(chunk));
2289
- if (chunk.data === "[DONE]") break;
2290
- if (!chunk.data) continue;
2291
- await stream.writeSSE({
2292
- data: chunk.data,
2293
- event: chunk.event,
2294
- id: chunk.id?.toString()
2295
- });
3789
+ const iterator = response[Symbol.asyncIterator]();
3790
+ let firstChunk;
3791
+ let upstreamFinished = false;
3792
+ while (true) {
3793
+ const r = await iterator.next();
3794
+ if (r.done) {
3795
+ upstreamFinished = true;
3796
+ break;
3797
+ }
3798
+ if (r.value === void 0 || r.value === null) continue;
3799
+ if (r.value.data === "[DONE]") {
3800
+ upstreamFinished = true;
3801
+ break;
3802
+ }
3803
+ if (!r.value.data) continue;
3804
+ firstChunk = r.value;
3805
+ break;
3806
+ }
3807
+ if (firstChunk === void 0) consola.warn(`Upstream /responses returned no payload events at ${c.req.path}`);
3808
+ let pendingFirstChunk = firstChunk;
3809
+ let consumerCancelled = false;
3810
+ const safeClose = (controller) => {
3811
+ try {
3812
+ controller.close();
3813
+ } catch {}
3814
+ };
3815
+ const releaseUpstream = (reason) => {
3816
+ if (typeof iterator.return === "function") iterator.return(reason).catch(() => {});
3817
+ };
3818
+ const safeEnqueue = (controller, bytes) => {
3819
+ try {
3820
+ controller.enqueue(bytes);
3821
+ return true;
3822
+ } catch (e) {
3823
+ if (isControllerClosedError(e)) {
3824
+ consumerCancelled = true;
3825
+ releaseUpstream(e);
3826
+ return false;
3827
+ }
3828
+ throw e;
3829
+ }
3830
+ };
3831
+ return new Response(new ReadableStream({
3832
+ async pull(controller) {
3833
+ if (consumerCancelled || upstreamFinished) {
3834
+ safeClose(controller);
3835
+ return;
3836
+ }
3837
+ if (pendingFirstChunk !== void 0) {
3838
+ const chunk = pendingFirstChunk;
3839
+ pendingFirstChunk = void 0;
3840
+ if (debugEnabled) consola.debug("Streaming chunk:", JSON.stringify(chunk));
3841
+ safeEnqueue(controller, ENCODER.encode(formatSSE(chunk)));
3842
+ return;
3843
+ }
3844
+ try {
3845
+ const result = await iterator.next();
3846
+ if (consumerCancelled) {
3847
+ safeClose(controller);
3848
+ return;
3849
+ }
3850
+ if (result.done) {
3851
+ upstreamFinished = true;
3852
+ safeClose(controller);
3853
+ return;
3854
+ }
3855
+ if (result.value === void 0 || result.value === null) return;
3856
+ if (result.value.data === "[DONE]") {
3857
+ upstreamFinished = true;
3858
+ safeClose(controller);
3859
+ return;
3860
+ }
3861
+ if (!result.value.data) return;
3862
+ if (debugEnabled) consola.debug("Streaming chunk:", JSON.stringify(result.value));
3863
+ safeEnqueue(controller, ENCODER.encode(formatSSE(result.value)));
3864
+ } catch (error) {
3865
+ upstreamFinished = true;
3866
+ if (consumerCancelled) {
3867
+ releaseUpstream(error);
3868
+ safeClose(controller);
3869
+ return;
3870
+ }
3871
+ const { errName, errMessage } = logStreamError(c.req.path, error);
3872
+ safeEnqueue(controller, ENCODER.encode(buildOpenAIErrorEvent(errName, errMessage)));
3873
+ releaseUpstream(error);
3874
+ safeClose(controller);
3875
+ }
3876
+ },
3877
+ cancel() {
3878
+ consumerCancelled = true;
3879
+ upstreamFinished = true;
3880
+ releaseUpstream();
3881
+ }
3882
+ }), {
3883
+ status: 200,
3884
+ headers: {
3885
+ "content-type": "text/event-stream",
3886
+ "cache-control": "no-cache",
3887
+ "transfer-encoding": "chunked",
3888
+ connection: "keep-alive"
2296
3889
  }
2297
3890
  });
2298
3891
  }
@@ -2494,6 +4087,11 @@ usageRoute.get("/", async (c) => {
2494
4087
  const server = new Hono();
2495
4088
  server.use(cors());
2496
4089
  server.get("/", (c) => c.text("Server running"));
4090
+ server.get("/version", (c) => c.json({
4091
+ name,
4092
+ version,
4093
+ gitSha: process.env.GITHUB_SHA ?? "unknown"
4094
+ }));
2497
4095
  server.on("HEAD", ["/"], (c) => c.body(null, 200));
2498
4096
  server.route("/chat/completions", completionRoutes);
2499
4097
  server.route("/responses", responsesRoutes);
@@ -2508,6 +4106,7 @@ server.route("/v1/models", modelRoutes);
2508
4106
  server.route("/v1/embeddings", embeddingRoutes);
2509
4107
  server.route("/v1/search", searchRoutes);
2510
4108
  server.route("/v1/messages", messageRoutes);
4109
+ server.route("/mcp", mcpRoutes);
2511
4110
  server.post("/api/event_logging/batch", (c) => c.body(null, 200));
2512
4111
  server.notFound((c) => c.json({
2513
4112
  type: "error",
@@ -2717,6 +4316,7 @@ function getClaudeCodeEnvVars(serverUrl, model) {
2717
4316
  ANTHROPIC_BASE_URL: serverUrl,
2718
4317
  ANTHROPIC_AUTH_TOKEN: "dummy",
2719
4318
  CLAUDE_CONFIG_DIR: path.join(os.homedir(), ".claude"),
4319
+ MCP_TIMEOUT: "600000",
2720
4320
  DISABLE_NON_ESSENTIAL_MODEL_CALLS: "1",
2721
4321
  CLAUDE_CODE_DISABLE_NONESSENTIAL_TRAFFIC: "1"
2722
4322
  };
@@ -2756,6 +4356,21 @@ const claude = defineCommand({
2756
4356
  alias: "m",
2757
4357
  type: "string",
2758
4358
  description: "Override the default model for Claude Code"
4359
+ },
4360
+ "codex-mcp": {
4361
+ type: "boolean",
4362
+ default: true,
4363
+ description: "Wire peer-model MCP personas (codex-critic, codex-reviewer, gemini-critic) into the spawned Claude Code session"
4364
+ },
4365
+ "codex-cli": {
4366
+ type: "boolean",
4367
+ default: false,
4368
+ description: "Add a `codex mcp-server` stdio backend so codex-implementer can mutate files. Requires codex CLI 0.129+; gracefully falls back to HTTP-only if absent."
4369
+ },
4370
+ "codex-mcp-only": {
4371
+ type: "boolean",
4372
+ default: false,
4373
+ description: "Pass --strict-mcp-config to claude code so only github-router's MCP servers are loaded (hides user's existing MCP servers)"
2759
4374
  }
2760
4375
  },
2761
4376
  async run({ args }) {
@@ -2800,12 +4415,36 @@ const claude = defineCommand({
2800
4415
  }
2801
4416
  const banner = chosenSlug === resolvedSlug ? chosenSlug : `${chosenSlug} → ${resolvedSlug}`;
2802
4417
  process$1.stderr.write(`Server ready on ${serverUrl}, launching Claude Code (${banner})...\n`);
4418
+ const envVars = getClaudeCodeEnvVars(serverUrl, chosenSlug);
4419
+ const extraArgs = args._ ?? [];
4420
+ let onShutdown;
4421
+ if (args["codex-mcp"] !== false) try {
4422
+ const requestedCli = args["codex-cli"] ?? false;
4423
+ const backend = resolveCodexCliBackend({
4424
+ requested: requestedCli,
4425
+ codexInfo: requestedCli ? getCodexVersion() : null
4426
+ });
4427
+ const geminiAvailable$1 = state.models?.data.some((m) => /^gemini-3\..*pro/i.test(m.id)) ?? false;
4428
+ if (!geminiAvailable$1) consola.info("gemini-3.1-pro-preview not found in your Copilot model catalog; gemini-critic persona will not be registered.");
4429
+ const runtime = await writePeerMcpRuntimeFiles(serverUrl, {
4430
+ codexCli: backend === "cli",
4431
+ geminiAvailable: geminiAvailable$1
4432
+ });
4433
+ state.peerMcpNonce = runtime.nonce;
4434
+ onShutdown = runtime.cleanup;
4435
+ extraArgs.push("--mcp-config", runtime.mcpConfigPath);
4436
+ if (args["codex-mcp-only"] === true) extraArgs.push("--strict-mcp-config");
4437
+ const personaNames = runtime.personas.map((p) => p.agentName).join(", ");
4438
+ process$1.stderr.write(`Peer MCP wired (backend=${backend}, personas=[${personaNames}], subagent .md files=${runtime.agentMdPaths.length}).\n`);
4439
+ } catch (err) {
4440
+ consola.warn(`Peer MCP wiring failed (claude will launch without it): ${err instanceof Error ? err.message : String(err)}`);
4441
+ }
2803
4442
  launchChild({
2804
4443
  kind: "claude-code",
2805
- envVars: getClaudeCodeEnvVars(serverUrl, chosenSlug),
2806
- extraArgs: args._ ?? [],
4444
+ envVars,
4445
+ extraArgs,
2807
4446
  model: chosenSlug
2808
- }, server$1);
4447
+ }, server$1, { onShutdown });
2809
4448
  }
2810
4449
  });
2811
4450
 
@@ -2873,7 +4512,8 @@ const codex = defineCommand({
2873
4512
  kind: "codex",
2874
4513
  envVars: getCodexEnvVars(serverUrl),
2875
4514
  extraArgs: args._ ?? [],
2876
- model: codexModel
4515
+ model: codexModel,
4516
+ serverUrl
2877
4517
  }, server$1);
2878
4518
  }
2879
4519
  });
@@ -2906,9 +4546,9 @@ async function checkTokenExists() {
2906
4546
  }
2907
4547
  }
2908
4548
  async function getDebugInfo() {
2909
- const [version, tokenExists] = await Promise.all([getPackageVersion(), checkTokenExists()]);
4549
+ const [version$1, tokenExists] = await Promise.all([getPackageVersion(), checkTokenExists()]);
2910
4550
  return {
2911
- version,
4551
+ version: version$1,
2912
4552
  runtime: getRuntimeInfo(),
2913
4553
  paths: {
2914
4554
  APP_DIR: PATHS.APP_DIR,