github-router 0.3.22 → 0.3.24

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/main.js CHANGED
@@ -8,6 +8,8 @@ import path from "node:path";
8
8
  import process$1 from "node:process";
9
9
  import { execFile, execFileSync, spawn } from "node:child_process";
10
10
  import { promisify } from "node:util";
11
+ import { events } from "fetch-event-stream";
12
+ import { z } from "zod";
11
13
  import fs$1 from "node:fs";
12
14
  import { Writable } from "node:stream";
13
15
  import { serve } from "srvx";
@@ -15,8 +17,6 @@ import { getProxyForUrl } from "proxy-from-env";
15
17
  import { Agent, ProxyAgent, setGlobalDispatcher } from "undici";
16
18
  import { Hono } from "hono";
17
19
  import { cors } from "hono/cors";
18
- import { events } from "fetch-event-stream";
19
- import { z } from "zod";
20
20
  import clipboard from "clipboardy";
21
21
 
22
22
  //#region src/lib/paths.ts
@@ -65,6 +65,9 @@ const CLAUDE_HOME_POLICY = new Map([
65
65
  ["cache", "ISOLATED"],
66
66
  ["logs", "ISOLATED"],
67
67
  ["paste-cache", "ISOLATED"],
68
+ ["jobs", "ISOLATED"],
69
+ ["daemon", "ISOLATED"],
70
+ ["daemon.log", "ISOLATED"],
68
71
  ["projects", "SHARED"],
69
72
  ["sessions", "SHARED"],
70
73
  ["tasks", "SHARED"],
@@ -1643,8 +1646,206 @@ function launchChild(target, server$1, options = {}) {
1643
1646
  });
1644
1647
  }
1645
1648
 
1649
+ //#endregion
1650
+ //#region src/services/copilot/web-search.ts
1651
+ const RpcSchema = z.object({
1652
+ jsonrpc: z.literal("2.0"),
1653
+ id: z.number().optional(),
1654
+ result: z.object({
1655
+ content: z.array(z.object({
1656
+ type: z.literal("text"),
1657
+ text: z.string()
1658
+ })).optional(),
1659
+ isError: z.boolean().optional()
1660
+ }).optional(),
1661
+ error: z.object({
1662
+ code: z.number(),
1663
+ message: z.string()
1664
+ }).optional()
1665
+ });
1666
+ const InnerSchema = z.object({
1667
+ text: z.object({
1668
+ value: z.string(),
1669
+ annotations: z.array(z.object({ url_citation: z.object({
1670
+ title: z.string(),
1671
+ url: z.string()
1672
+ }).optional() })).nullable().optional()
1673
+ }),
1674
+ bing_searches: z.array(z.unknown()).nullable().optional()
1675
+ });
1676
+ const MAX_SEARCHES_PER_SECOND = 3;
1677
+ let searchTimestamps = [];
1678
+ let throttleChain = Promise.resolve();
1679
+ async function throttleSearch() {
1680
+ const myTurn = throttleChain.then(async () => {
1681
+ const now = Date.now();
1682
+ searchTimestamps = searchTimestamps.filter((t) => now - t < 1e3);
1683
+ if (searchTimestamps.length >= MAX_SEARCHES_PER_SECOND) {
1684
+ const waitMs = 1e3 - (now - searchTimestamps[0]);
1685
+ if (waitMs > 0) {
1686
+ consola.debug(`Web search rate limited, waiting ${waitMs}ms`);
1687
+ await sleep(waitMs);
1688
+ }
1689
+ }
1690
+ searchTimestamps.push(Date.now());
1691
+ });
1692
+ throttleChain = myTurn.catch(() => {});
1693
+ return myTurn;
1694
+ }
1695
+ function mcpHeaders(sid) {
1696
+ if (!state.githubToken) throw new Error("GitHub token missing — re-run auth flow. Web search uses the GitHub PAT (not the Copilot token); the on-disk token at ~/.local/share/github-router/github_token must be present.");
1697
+ const headers = {
1698
+ Authorization: `Bearer ${state.githubToken}`,
1699
+ "content-type": "application/json",
1700
+ accept: "application/json, text/event-stream",
1701
+ "X-MCP-Host": "copilot-cli",
1702
+ "X-MCP-Toolsets": "web_search",
1703
+ "Mcp-Protocol-Version": "2025-06-18",
1704
+ "user-agent": `GitHubCopilotChat/${copilotVersion(state)}`
1705
+ };
1706
+ if (sid) headers["Mcp-Session-Id"] = sid;
1707
+ return headers;
1708
+ }
1709
+ async function postMcp(body, sid, retry = true) {
1710
+ const url = `${copilotBaseUrl(state)}/mcp`;
1711
+ const res = await fetch(url, {
1712
+ method: "POST",
1713
+ headers: mcpHeaders(sid),
1714
+ body: JSON.stringify(body)
1715
+ });
1716
+ if (!res.ok && retry && res.status >= 500) {
1717
+ await sleep(500);
1718
+ return postMcp(body, sid, false);
1719
+ }
1720
+ return res;
1721
+ }
1722
+ async function searchWeb(query) {
1723
+ await throttleSearch();
1724
+ consola.info(`Web search (MCP): "${query.slice(0, 80)}"`);
1725
+ const callId = Math.floor(Math.random() * 1e9);
1726
+ let sid;
1727
+ try {
1728
+ const initRes = await postMcp({
1729
+ jsonrpc: "2.0",
1730
+ id: 1,
1731
+ method: "initialize",
1732
+ params: {
1733
+ protocolVersion: "2024-11-05",
1734
+ capabilities: {},
1735
+ clientInfo: {
1736
+ name: "GitHubCopilotChat",
1737
+ version: copilotVersion(state)
1738
+ }
1739
+ }
1740
+ });
1741
+ if (!initRes.ok) {
1742
+ consola.error("MCP initialize failed", initRes.status);
1743
+ throw new HTTPError("MCP initialize failed", initRes);
1744
+ }
1745
+ sid = initRes.headers.get("mcp-session-id") ?? void 0;
1746
+ if (!sid) throw new HTTPError("MCP initialize: missing Mcp-Session-Id header", initRes);
1747
+ const notifRes = await postMcp({
1748
+ jsonrpc: "2.0",
1749
+ method: "notifications/initialized"
1750
+ }, sid);
1751
+ if (!notifRes.ok && notifRes.status !== 202) {
1752
+ consola.error("MCP notifications/initialized failed", notifRes.status);
1753
+ throw new HTTPError("MCP notifications/initialized failed", notifRes);
1754
+ }
1755
+ const callRes = await postMcp({
1756
+ jsonrpc: "2.0",
1757
+ id: callId,
1758
+ method: "tools/call",
1759
+ params: {
1760
+ name: "web_search",
1761
+ arguments: { query }
1762
+ }
1763
+ }, sid);
1764
+ if (!callRes.ok) {
1765
+ consola.error("MCP tools/call failed", callRes.status);
1766
+ throw new HTTPError("MCP tools/call failed", callRes);
1767
+ }
1768
+ let rpc;
1769
+ for await (const ev of events(callRes)) {
1770
+ if (!ev.data) continue;
1771
+ let parsedJson;
1772
+ try {
1773
+ parsedJson = JSON.parse(ev.data);
1774
+ } catch {
1775
+ continue;
1776
+ }
1777
+ const parsed = RpcSchema.safeParse(parsedJson);
1778
+ if (parsed.success && parsed.data.id === callId) {
1779
+ rpc = parsed.data;
1780
+ break;
1781
+ }
1782
+ }
1783
+ if (!rpc) throw new HTTPError("MCP tools/call: no matching response id in SSE stream", callRes);
1784
+ if (rpc.error) throw new HTTPError(`MCP error ${rpc.error.code}: ${rpc.error.message}`, callRes);
1785
+ if (rpc.result?.isError) throw new HTTPError("MCP web_search tool error", callRes);
1786
+ const text = rpc.result?.content?.[0]?.text;
1787
+ if (!text) throw new HTTPError("MCP web_search: empty content", callRes);
1788
+ let innerRaw;
1789
+ try {
1790
+ innerRaw = JSON.parse(text);
1791
+ } catch (err) {
1792
+ throw new HTTPError(`MCP web_search: inner content not JSON: ${err instanceof Error ? err.message : String(err)}`, callRes);
1793
+ }
1794
+ const innerParsed = InnerSchema.safeParse(innerRaw);
1795
+ if (!innerParsed.success) throw new HTTPError(`MCP web_search: inner content shape changed (${innerParsed.error.issues.map((i) => `${i.path.join(".")}: ${i.message}`).join("; ")})`, callRes);
1796
+ const inner = innerParsed.data;
1797
+ const references = [];
1798
+ for (const ann of inner.text.annotations ?? []) {
1799
+ const cite = ann.url_citation;
1800
+ if (cite && !cite.url.toLowerCase().includes("bing.com/search")) references.push({
1801
+ title: cite.title,
1802
+ url: cite.url
1803
+ });
1804
+ }
1805
+ consola.debug(`Web search returned ${references.length} references`);
1806
+ return {
1807
+ content: inner.text.value,
1808
+ references
1809
+ };
1810
+ } finally {
1811
+ if (sid) try {
1812
+ fetch(`${copilotBaseUrl(state)}/mcp`, {
1813
+ method: "DELETE",
1814
+ headers: mcpHeaders(sid)
1815
+ }).catch(() => {});
1816
+ } catch {}
1817
+ }
1818
+ }
1819
+
1646
1820
  //#endregion
1647
1821
  //#region src/lib/peer-mcp-personas.ts
1822
+ /**
1823
+ * Reasoning effort levels accepted by Copilot's /v1/responses (gpt-5.x) and
1824
+ * /v1/chat/completions endpoints. Per the proxy's existing thinking-mode
1825
+ * translator (CLAUDE.md "Thinking-mode translation"), Copilot's adaptive-
1826
+ * thinking path uses these same buckets:
1827
+ * <2k tokens → low, <8k → medium, <24k → high, else → xhigh.
1828
+ *
1829
+ * Per-persona `allowedEfforts` and `defaultEffort` constrain which subset
1830
+ * each persona exposes — enforced in handler.ts:handleToolsCall.
1831
+ *
1832
+ * **xhigh on long-running personas works via SSE-streamed /mcp responses**
1833
+ * (handler.ts:handleToolsCallSSE). Claude Code's MCP HTTP client honors
1834
+ * `text/event-stream` responses without applying the ~60s per-tool-call
1835
+ * timer that previously broke xhigh on gpt-5.5 (~56s wall) and
1836
+ * claude-opus-4-7 (high+ thinking budgets). All four personas now expose
1837
+ * all four effort tiers with `high` default; SSE handles the long tail
1838
+ * transparently to the user.
1839
+ */
1840
+ const EFFORT_LEVELS = [
1841
+ "low",
1842
+ "medium",
1843
+ "high",
1844
+ "xhigh"
1845
+ ];
1846
+ function isEffort(v) {
1847
+ return typeof v === "string" && EFFORT_LEVELS.includes(v);
1848
+ }
1648
1849
  const CRITIC_RUBRIC = `
1649
1850
  Apply this grading rubric:
1650
1851
  - Score 1–5 on three axes:
@@ -1673,7 +1874,7 @@ Self-reminder (read before every reply):
1673
1874
  `.trim();
1674
1875
  const COLD_START_CONTRACT = `
1675
1876
  Cold-start contract for the lead orchestrator (Opus):
1676
- When delegating to me, paste a self-contained brief. I have no access to your scrollback, CLAUDE.md, or the project tree. Always include:
1877
+ When delegating to me, paste a self-contained brief. I have no access to your scrollback, project memory, or the project tree. Always include:
1677
1878
  (a) the artifact under review verbatim (code/diff/plan text),
1678
1879
  (b) the constraints or "done" criteria,
1679
1880
  (c) any prior decisions I should not relitigate.
@@ -1745,39 +1946,87 @@ Reply format (markdown):
1745
1946
 
1746
1947
  Resilience reminder:
1747
1948
  If your session terminates abnormally before "Status: complete", the lead will retry once. On recovery, ask the lead to confirm what's already been done before re-applying changes — duplicate edits are worse than a slow restart.`;
1949
+ const OPUS_CRITIC_BASE = `You are opus-critic, a fresh-context Anthropic-side adversarial reviewer running on Claude Opus 4.7 — the same model and lab as the lead orchestrator that just delegated to you. You are NOT the lead. You did not see the lead's reasoning trace. You only see the brief.
1950
+
1951
+ Your job is to spot what the lead missed because of cognitive momentum, sunk-cost on a plan, or motivated reasoning toward a particular fix. Your blind-spot diversification is LIMITED compared to codex-critic (gpt-5.5) and gemini-critic (gemini-3.1-pro) — same training, same lab, same RLHF priors. Use that honestly: don't pretend to find a different perspective when the obvious read is "the lead got it right." Silence on good work is a valid and welcome answer.
1952
+
1953
+ Sycophancy is the failure mode you exist to fight. Manufactured contrarianism is a different failure of the same shape — do neither.
1954
+
1955
+ ${COLD_START_CONTRACT}
1956
+
1957
+ ${CRITIC_RUBRIC}`;
1748
1958
  const PERSONAS_READ = Object.freeze([
1749
1959
  {
1750
1960
  agentName: "codex-critic",
1751
1961
  toolNameHttp: "codex_critic",
1752
1962
  model: "gpt-5.5",
1753
1963
  endpoint: "/v1/responses",
1754
- description: "Adversarial second opinion on plans, designs, code, or systems-engineering tradeoffs. Backed by gpt-5.5 (OpenAI) — different model, different training data, different blind spots than Opus. Uses a calibrated 1–5 grading rubric and is allowed to reply 'no material objection' on solid artifacts. **CALL BEFORE: ExitPlanMode for any plan involving >2 files or new architecture; finalizing a major design choice; TeamCreate when the team's task is non-trivial.** **CALL AFTER: any commit touching concurrency, security, or streaming code paths.** If the artifact is large (>20 KB), prefer to break it into 2-4 focused batches and call this tool once per batch IN PARALLEL — each call must complete under the Claude Code MCP per-tool-call ceiling (~150s on v2.1.138 per regression #50289), so monolithic large-artifact calls will time out client-side. Aggregate findings yourself. Always pass: (a) the artifact verbatim, (b) the constraints/'done' criteria, (c) any prior decisions. Optionally pass `effort: 'xhigh'` for explicit deep dives or `effort: 'medium'` for quick sanity checks (default 'high'). The subagent has no access to your scrollback or CLAUDE.md.",
1964
+ description: "Adversarial second opinion on plans, designs, or code tradeoffs. Backed by gpt-5.5 (OpenAI) — different lab than Opus. Pass artifact verbatim.",
1755
1965
  baseInstructions: CRITIC_BASE,
1756
1966
  agentPrompt: "",
1757
1967
  writeCapable: false,
1758
- requiresHttp: false
1968
+ requiresHttp: false,
1969
+ allowedEfforts: [
1970
+ "low",
1971
+ "medium",
1972
+ "high",
1973
+ "xhigh"
1974
+ ],
1975
+ defaultEffort: "xhigh"
1759
1976
  },
1760
1977
  {
1761
1978
  agentName: "gemini-critic",
1762
1979
  toolNameHttp: "gemini_critic",
1763
1980
  model: "gemini-3.1-pro-preview",
1764
1981
  endpoint: "/v1/chat/completions",
1765
- description: "Adversarial second opinion from a different lab. Backed by gemini-3.1-pro-preview (Google) — different training data and RLHF priors than Opus AND codex-critic, the strongest blind-spot-buster when the lead wants triangulation across three labs. Use for long-context artifacts (>50k tokens), math/proof-shaped reasoning, or as a tie-breaker after codex-critic has weighed in. **CALL BEFORE: ExitPlanMode for plans where Opus + codex-critic agree (use as triangulation); finalizing irreversible architectural choices.** **CALL AFTER: commits where you want a third-lab cross-check.** If the artifact is large (>100 KB), prefer to break into batches and call in parallel — gemini handles long context well but each per-call MCP wait is still bounded (~150s on v2.1.138). Always pass: (a) the artifact verbatim, (b) the constraints/'done' criteria, (c) any prior decisions. The `effort` parameter is forwarded but may be silently ignored by Copilot's gemini route — gemini-3.x reasoning is largely auto-applied. The subagent has no access to your scrollback or CLAUDE.md.",
1982
+ description: "Adversarial second opinion. Backed by gemini-3.1-pro (Google) — third-lab triangulation, strong on long-context and formal reasoning. Pass artifact verbatim.",
1766
1983
  baseInstructions: GEMINI_CRITIC_BASE,
1767
1984
  agentPrompt: "",
1768
1985
  writeCapable: false,
1769
- requiresHttp: true
1986
+ requiresHttp: true,
1987
+ requiresGeminiCatalog: true,
1988
+ allowedEfforts: [
1989
+ "low",
1990
+ "medium",
1991
+ "high"
1992
+ ],
1993
+ defaultEffort: "high"
1770
1994
  },
1771
1995
  {
1772
1996
  agentName: "codex-reviewer",
1773
1997
  toolNameHttp: "codex_reviewer",
1774
1998
  model: "gpt-5.3-codex",
1775
1999
  endpoint: "/v1/responses",
1776
- description: "Line-level code review of a specific diff or file. Backed by gpt-5.3-codex (OpenAI) — the code-specialist sibling of gpt-5.5, trained heavily on code-review datasets so it catches different bugs than Opus. Prefer over codex-critic when the artifact is a concrete diff or single file (codex-critic is for plans/designs). **CALL AFTER: any non-trivial commit (>50 lines OR touching critical paths: streaming, auth, concurrency, persistence, security).** **CALL BEFORE: opening a PR or pushing changes a peer would review.** For diffs >20 KB, split by file-group and call once per group in parallel — each per-call wait is bounded (~150s on v2.1.138). Always pass: (a) the diff or file verbatim, (b) the change's intent, (c) test status. Optionally pass `effort: 'xhigh'` when reviewing security-critical code, `effort: 'medium'` for routine reviews (default 'high'). The subagent has no access to your scrollback or CLAUDE.md.",
2000
+ description: "Line-level review of a concrete diff or single file. Backed by gpt-5.3-codex (OpenAI) — code-specialist, narrow-scope. Pass artifact verbatim.",
1777
2001
  baseInstructions: REVIEWER_BASE,
1778
2002
  agentPrompt: "",
1779
2003
  writeCapable: false,
1780
- requiresHttp: false
2004
+ requiresHttp: false,
2005
+ allowedEfforts: [
2006
+ "low",
2007
+ "medium",
2008
+ "high",
2009
+ "xhigh"
2010
+ ],
2011
+ defaultEffort: "xhigh"
2012
+ },
2013
+ {
2014
+ agentName: "opus-critic",
2015
+ toolNameHttp: "opus_critic",
2016
+ model: "claude-opus-4-7",
2017
+ endpoint: "/v1/messages",
2018
+ description: "Adversarial second opinion from a fresh-context Opus 4.7 — cheap same-lab sanity check. Pass artifact verbatim.",
2019
+ baseInstructions: OPUS_CRITIC_BASE,
2020
+ agentPrompt: "",
2021
+ writeCapable: false,
2022
+ requiresHttp: true,
2023
+ allowedEfforts: [
2024
+ "low",
2025
+ "medium",
2026
+ "high",
2027
+ "xhigh"
2028
+ ],
2029
+ defaultEffort: "xhigh"
1781
2030
  }
1782
2031
  ]);
1783
2032
  const PERSONAS_WRITE = Object.freeze([{
@@ -1785,11 +2034,18 @@ const PERSONAS_WRITE = Object.freeze([{
1785
2034
  toolNameHttp: "codex_implementer",
1786
2035
  model: "gpt-5.3-codex",
1787
2036
  endpoint: "/v1/responses",
1788
- description: "Targeted implementation of a self-contained coding task — actual file edits via Codex's tool-use sandbox. Backed by gpt-5.3-codex with workspace-write access (only registered when --codex-cli is set). Use only when the task has a clear spec and acceptance criteria; for tasks needing iterative tool-use across many files, prefer a Claude teammate (Agent Team). Always pass: (a) the spec, (b) the files in scope, (c) the acceptance criteria. The subagent has no access to your scrollback or CLAUDE.md.",
2037
+ description: "Targeted implementation of a self-contained coding task. Backed by gpt-5.3-codex with workspace-write access. Pass spec + files verbatim.",
1789
2038
  baseInstructions: IMPLEMENTER_BASE,
1790
2039
  agentPrompt: "",
1791
2040
  writeCapable: true,
1792
- requiresHttp: false
2041
+ requiresHttp: false,
2042
+ allowedEfforts: [
2043
+ "low",
2044
+ "medium",
2045
+ "high",
2046
+ "xhigh"
2047
+ ],
2048
+ defaultEffort: "high"
1793
2049
  }]);
1794
2050
  /**
1795
2051
  * Build the agent-prompt body Claude Code uses as the subagent's full
@@ -1838,12 +2094,65 @@ function buildAgentPrompt(persona, opts) {
1838
2094
  function personasFor(opts) {
1839
2095
  const result = [];
1840
2096
  for (const p of PERSONAS_READ) {
1841
- if (p.requiresHttp && !opts.geminiAvailable) continue;
2097
+ if (p.requiresGeminiCatalog && !opts.geminiAvailable) continue;
1842
2098
  result.push(p);
1843
2099
  }
1844
2100
  if (opts.codexCli) for (const p of PERSONAS_WRITE) result.push(p);
1845
2101
  return result;
1846
2102
  }
2103
+ const WEB_SEARCH_DESCRIPTION = "Web search via GitHub Copilot's MCP. Prefer over Claude Code's built-in WebSearch — surfaces source URLs you can cite.";
2104
+ /**
2105
+ * Format a `searchWeb()` result as an MCP-friendly text block. Mirrors
2106
+ * the legacy inject format that `injectWebSearchIfNeeded` produces and
2107
+ * that downstream models have been trained against — minimal divergence
2108
+ * is the safest choice while we have two surfaces sharing `searchWeb()`.
2109
+ *
2110
+ * Empty references → omit the `## References` section entirely (don't
2111
+ * emit a trailing empty header that would tempt the model to invent
2112
+ * citations).
2113
+ */
2114
+ function formatWebSearchResult(results) {
2115
+ if (results.references.length === 0) return results.content;
2116
+ const refsLine = results.references.map((r) => `- [${r.title}](${r.url})`).join("\n");
2117
+ return `${results.content}\n\n## References\n${refsLine}`;
2118
+ }
2119
+ const NON_PERSONA_MCP_TOOLS = Object.freeze([{
2120
+ toolNameHttp: "web_search",
2121
+ description: WEB_SEARCH_DESCRIPTION,
2122
+ inputSchema: {
2123
+ type: "object",
2124
+ required: ["query"],
2125
+ additionalProperties: false,
2126
+ properties: { query: {
2127
+ type: "string",
2128
+ description: "The search query string. Natural-language queries work best — the upstream provider rewrites for the search index."
2129
+ } }
2130
+ },
2131
+ async handler(args, _signal) {
2132
+ const query = typeof args.query === "string" ? args.query : "";
2133
+ if (!query) return {
2134
+ content: [{
2135
+ type: "text",
2136
+ text: "web_search: arguments.query is required (must be a non-empty string)"
2137
+ }],
2138
+ isError: true
2139
+ };
2140
+ try {
2141
+ return { content: [{
2142
+ type: "text",
2143
+ text: formatWebSearchResult(await searchWeb(query))
2144
+ }] };
2145
+ } catch (err) {
2146
+ return {
2147
+ content: [{
2148
+ type: "text",
2149
+ text: `web_search failed: ${err instanceof Error ? err.message : String(err)}`
2150
+ }],
2151
+ isError: true
2152
+ };
2153
+ }
2154
+ }
2155
+ }]);
1847
2156
 
1848
2157
  //#endregion
1849
2158
  //#region src/lib/codex-mcp-config.ts
@@ -1914,11 +2223,11 @@ function buildPeerMcpConfig(serverUrl, opts) {
1914
2223
  * ExitPlanMode to default-on (env-disable-able).
1915
2224
  */
1916
2225
  function buildCoordinatorAgent(opts) {
1917
- const peers = ["codex-critic"];
2226
+ const peers = ["codex-critic", "opus-critic"];
1918
2227
  if (opts.geminiAvailable) peers.push("gemini-critic");
1919
2228
  peers.push("codex-reviewer");
1920
2229
  return {
1921
- description: "Coordinates cross-lab adversarial review. **Use proactively before ExitPlanMode for non-trivial plans and after non-trivial commits** (>50 lines OR touching streaming/auth/concurrency/persistence/security). Routes to codex-critic / codex-reviewer / gemini-critic in parallel based on artifact type and aggregates findings. Cheaper than calling each peer manually for the common case where you want a multi-lab triangulation. The subagent has no access to your scrollback or CLAUDE.md — pass the artifact verbatim.",
2230
+ description: "Coordinates cross-lab adversarial review across codex-critic, opus-critic, gemini-critic, codex-reviewer. Use proactively before non-trivial plans and after non-trivial commits. Always pass artifacts verbatim peers are fresh-context.",
1922
2231
  prompt: [
1923
2232
  "# Subagent: peer-review-coordinator",
1924
2233
  "",
@@ -1934,10 +2243,11 @@ function buildCoordinatorAgent(opts) {
1934
2243
  "- **Concrete diff or single file** → fan out to `codex-reviewer`" + (opts.geminiAvailable ? " AND `gemini-critic` (gemini for cross-lab triangulation)" : "") + ". For very small changes (<20 lines), one `codex-reviewer` call is enough.",
1935
2244
  "- **Tie-breaker after codex-critic has weighed in** → call `gemini-critic`" + (opts.geminiAvailable ? "" : " (NOT REGISTERED in this session — gemini-3.x not in catalog; tie-break unavailable)") + " with the artifact AND codex-critic's verdict for cross-lab cross-check.",
1936
2245
  "- **Long-context artifact (>100 KB)** → prefer `gemini-critic`" + (opts.geminiAvailable ? "" : " (NOT REGISTERED in this session)") + ". Otherwise, decompose into 2-4 batches and fan out across `codex-critic` calls in parallel.",
2246
+ "- **Fast same-lab sanity check on a moderate artifact (<5 KB)** → prefer `opus-critic` (cheapest, ~22s, only `effort: low|medium` supported). Same lab as the lead — limited blind-spot diversification, but a useful gut-check before committing to a controversial decision. For cross-lab diversification or deep dives on larger artifacts, use codex/gemini at higher effort with decomposition for >5KB.",
1937
2247
  "",
1938
2248
  "## Decomposition for large artifacts",
1939
2249
  "",
1940
- "Each per-call MCP wait is bounded (~150s on Claude Code v2.1.138 per regression #50289). For artifacts >20 KB, split into 2-4 logical batches BY CONCERN (not by raw size — semantic batches give better per-batch reviews) and call peers in parallel. The proxy's MCP cap allows up to 8 in-flight calls. Aggregate findings yourself before reporting back.",
2250
+ "Each per-call MCP wait is bounded (~60s SDK default on Claude Code v2.1.113+ per regressions #50289 / #52137 — empirically reproduced 2026-05-14). The proxy enforces per-persona effort allowlists AND a pre-flight `predictedTooLong` cap (codex_critic@high >8 KB, codex_reviewer@high >12 KB, opus_critic@medium >6 KB) to surface would-be-timeouts as fast actionable errors. For artifacts that exceed the cap, split into 2-4 logical batches BY CONCERN (not by raw size — semantic batches give better per-batch reviews) and call peers in parallel. The proxy's MCP cap allows up to 8 in-flight calls. Aggregate findings yourself before reporting back.",
1941
2251
  "",
1942
2252
  "## Aggregation contract",
1943
2253
  "",
@@ -2344,7 +2654,7 @@ function initProxyFromEnv() {
2344
2654
  //#endregion
2345
2655
  //#region package.json
2346
2656
  var name = "github-router";
2347
- var version = "0.3.22";
2657
+ var version = "0.3.24";
2348
2658
 
2349
2659
  //#endregion
2350
2660
  //#region src/lib/approval.ts
@@ -2903,177 +3213,6 @@ const createChatCompletions = async (payload, modelHeaders, callerSignal) => {
2903
3213
  return await response.json();
2904
3214
  };
2905
3215
 
2906
- //#endregion
2907
- //#region src/services/copilot/web-search.ts
2908
- const RpcSchema = z.object({
2909
- jsonrpc: z.literal("2.0"),
2910
- id: z.number().optional(),
2911
- result: z.object({
2912
- content: z.array(z.object({
2913
- type: z.literal("text"),
2914
- text: z.string()
2915
- })).optional(),
2916
- isError: z.boolean().optional()
2917
- }).optional(),
2918
- error: z.object({
2919
- code: z.number(),
2920
- message: z.string()
2921
- }).optional()
2922
- });
2923
- const InnerSchema = z.object({
2924
- text: z.object({
2925
- value: z.string(),
2926
- annotations: z.array(z.object({ url_citation: z.object({
2927
- title: z.string(),
2928
- url: z.string()
2929
- }).optional() })).nullable().optional()
2930
- }),
2931
- bing_searches: z.array(z.unknown()).nullable().optional()
2932
- });
2933
- const MAX_SEARCHES_PER_SECOND = 3;
2934
- let searchTimestamps = [];
2935
- let throttleChain = Promise.resolve();
2936
- async function throttleSearch() {
2937
- const myTurn = throttleChain.then(async () => {
2938
- const now = Date.now();
2939
- searchTimestamps = searchTimestamps.filter((t) => now - t < 1e3);
2940
- if (searchTimestamps.length >= MAX_SEARCHES_PER_SECOND) {
2941
- const waitMs = 1e3 - (now - searchTimestamps[0]);
2942
- if (waitMs > 0) {
2943
- consola.debug(`Web search rate limited, waiting ${waitMs}ms`);
2944
- await sleep(waitMs);
2945
- }
2946
- }
2947
- searchTimestamps.push(Date.now());
2948
- });
2949
- throttleChain = myTurn.catch(() => {});
2950
- return myTurn;
2951
- }
2952
- function mcpHeaders(sid) {
2953
- if (!state.githubToken) throw new Error("GitHub token missing — re-run auth flow. Web search uses the GitHub PAT (not the Copilot token); the on-disk token at ~/.local/share/github-router/github_token must be present.");
2954
- const headers = {
2955
- Authorization: `Bearer ${state.githubToken}`,
2956
- "content-type": "application/json",
2957
- accept: "application/json, text/event-stream",
2958
- "X-MCP-Host": "copilot-cli",
2959
- "X-MCP-Toolsets": "web_search",
2960
- "Mcp-Protocol-Version": "2025-06-18",
2961
- "user-agent": `GitHubCopilotChat/${copilotVersion(state)}`
2962
- };
2963
- if (sid) headers["Mcp-Session-Id"] = sid;
2964
- return headers;
2965
- }
2966
- async function postMcp(body, sid, retry = true) {
2967
- const url = `${copilotBaseUrl(state)}/mcp`;
2968
- const res = await fetch(url, {
2969
- method: "POST",
2970
- headers: mcpHeaders(sid),
2971
- body: JSON.stringify(body)
2972
- });
2973
- if (!res.ok && retry && res.status >= 500) {
2974
- await sleep(500);
2975
- return postMcp(body, sid, false);
2976
- }
2977
- return res;
2978
- }
2979
- async function searchWeb(query) {
2980
- await throttleSearch();
2981
- consola.info(`Web search (MCP): "${query.slice(0, 80)}"`);
2982
- const callId = Math.floor(Math.random() * 1e9);
2983
- let sid;
2984
- try {
2985
- const initRes = await postMcp({
2986
- jsonrpc: "2.0",
2987
- id: 1,
2988
- method: "initialize",
2989
- params: {
2990
- protocolVersion: "2024-11-05",
2991
- capabilities: {},
2992
- clientInfo: {
2993
- name: "GitHubCopilotChat",
2994
- version: copilotVersion(state)
2995
- }
2996
- }
2997
- });
2998
- if (!initRes.ok) {
2999
- consola.error("MCP initialize failed", initRes.status);
3000
- throw new HTTPError("MCP initialize failed", initRes);
3001
- }
3002
- sid = initRes.headers.get("mcp-session-id") ?? void 0;
3003
- if (!sid) throw new HTTPError("MCP initialize: missing Mcp-Session-Id header", initRes);
3004
- const notifRes = await postMcp({
3005
- jsonrpc: "2.0",
3006
- method: "notifications/initialized"
3007
- }, sid);
3008
- if (!notifRes.ok && notifRes.status !== 202) {
3009
- consola.error("MCP notifications/initialized failed", notifRes.status);
3010
- throw new HTTPError("MCP notifications/initialized failed", notifRes);
3011
- }
3012
- const callRes = await postMcp({
3013
- jsonrpc: "2.0",
3014
- id: callId,
3015
- method: "tools/call",
3016
- params: {
3017
- name: "web_search",
3018
- arguments: { query }
3019
- }
3020
- }, sid);
3021
- if (!callRes.ok) {
3022
- consola.error("MCP tools/call failed", callRes.status);
3023
- throw new HTTPError("MCP tools/call failed", callRes);
3024
- }
3025
- let rpc;
3026
- for await (const ev of events(callRes)) {
3027
- if (!ev.data) continue;
3028
- let parsedJson;
3029
- try {
3030
- parsedJson = JSON.parse(ev.data);
3031
- } catch {
3032
- continue;
3033
- }
3034
- const parsed = RpcSchema.safeParse(parsedJson);
3035
- if (parsed.success && parsed.data.id === callId) {
3036
- rpc = parsed.data;
3037
- break;
3038
- }
3039
- }
3040
- if (!rpc) throw new HTTPError("MCP tools/call: no matching response id in SSE stream", callRes);
3041
- if (rpc.error) throw new HTTPError(`MCP error ${rpc.error.code}: ${rpc.error.message}`, callRes);
3042
- if (rpc.result?.isError) throw new HTTPError("MCP web_search tool error", callRes);
3043
- const text = rpc.result?.content?.[0]?.text;
3044
- if (!text) throw new HTTPError("MCP web_search: empty content", callRes);
3045
- let innerRaw;
3046
- try {
3047
- innerRaw = JSON.parse(text);
3048
- } catch (err) {
3049
- throw new HTTPError(`MCP web_search: inner content not JSON: ${err instanceof Error ? err.message : String(err)}`, callRes);
3050
- }
3051
- const innerParsed = InnerSchema.safeParse(innerRaw);
3052
- if (!innerParsed.success) throw new HTTPError(`MCP web_search: inner content shape changed (${innerParsed.error.issues.map((i) => `${i.path.join(".")}: ${i.message}`).join("; ")})`, callRes);
3053
- const inner = innerParsed.data;
3054
- const references = [];
3055
- for (const ann of inner.text.annotations ?? []) {
3056
- const cite = ann.url_citation;
3057
- if (cite && !cite.url.toLowerCase().includes("bing.com/search")) references.push({
3058
- title: cite.title,
3059
- url: cite.url
3060
- });
3061
- }
3062
- consola.debug(`Web search returned ${references.length} references`);
3063
- return {
3064
- content: inner.text.value,
3065
- references
3066
- };
3067
- } finally {
3068
- if (sid) try {
3069
- fetch(`${copilotBaseUrl(state)}/mcp`, {
3070
- method: "DELETE",
3071
- headers: mcpHeaders(sid)
3072
- }).catch(() => {});
3073
- } catch {}
3074
- }
3075
- }
3076
-
3077
3216
  //#endregion
3078
3217
  //#region src/routes/chat-completions/handler.ts
3079
3218
  const ENCODER$2 = new TextEncoder();
@@ -3299,6 +3438,125 @@ embeddingRoutes.post("/", async (c) => {
3299
3438
  }
3300
3439
  });
3301
3440
 
3441
+ //#endregion
3442
+ //#region src/services/copilot/create-messages.ts
3443
+ /**
3444
+ * Build headers that match what VS Code Copilot Chat sends to the Copilot API.
3445
+ *
3446
+ * copilotHeaders() provides: Authorization, content-type, copilot-integration-id,
3447
+ * editor-version, editor-plugin-version, user-agent, openai-intent,
3448
+ * x-github-api-version, x-request-id, x-vscode-user-agent-library-version.
3449
+ *
3450
+ * We add the remaining headers VS Code sends for /v1/messages:
3451
+ * - X-Initiator (VS Code sets dynamically; "agent" is safe for CLI use)
3452
+ * - anthropic-version (VS Code's Anthropic SDK sends this)
3453
+ * - X-Interaction-Id (VS Code sends a session-scoped UUID)
3454
+ *
3455
+ * We intentionally omit copilot-vision-request — VS Code only sends it when
3456
+ * images are present, and the native /v1/messages endpoint handles vision
3457
+ * without requiring the header.
3458
+ *
3459
+ * extraHeaders allows callers to forward client-supplied beta headers
3460
+ * (anthropic-beta) so Copilot enables extended features.
3461
+ */
3462
+ function buildHeaders(extraHeaders) {
3463
+ return {
3464
+ ...copilotHeaders(state),
3465
+ accept: "application/json",
3466
+ "openai-intent": "messages-proxy",
3467
+ "x-interaction-type": "conversation-agent",
3468
+ "X-Initiator": "agent",
3469
+ "anthropic-version": "2023-06-01",
3470
+ "X-Interaction-Id": randomUUID(),
3471
+ ...extraHeaders
3472
+ };
3473
+ }
3474
+ /**
3475
+ * Forward an Anthropic Messages API request to Copilot's native /v1/messages endpoint.
3476
+ * Returns the raw Response so callers can handle streaming vs non-streaming.
3477
+ *
3478
+ * `callerSignal` (optional) is composed with the standard
3479
+ * UPSTREAM_FETCH_TIMEOUT_MS via AbortSignal.any so callers (e.g. the
3480
+ * peer-MCP `opus-critic` persona) can cancel the upstream call when
3481
+ * Claude Code's MCP per-tool-call ceiling fires. Mirrors the pattern
3482
+ * in createResponses / createChatCompletions.
3483
+ */
3484
+ async function createMessages(body, extraHeaders, callerSignal) {
3485
+ if (!state.copilotToken) throw new Error("Copilot token not found");
3486
+ const url = `${copilotBaseUrl(state)}/v1/messages?beta=true`;
3487
+ consola.debug(`Forwarding to ${url}`);
3488
+ const doFetch = () => {
3489
+ const fetchInit = {
3490
+ method: "POST",
3491
+ headers: buildHeaders(extraHeaders),
3492
+ body
3493
+ };
3494
+ const signals = [];
3495
+ if (UPSTREAM_FETCH_TIMEOUT_MS > 0) signals.push(AbortSignal.timeout(UPSTREAM_FETCH_TIMEOUT_MS));
3496
+ if (callerSignal) signals.push(callerSignal);
3497
+ if (signals.length === 1) fetchInit.signal = signals[0];
3498
+ else if (signals.length > 1) fetchInit.signal = AbortSignal.any(signals);
3499
+ return fetch(url, fetchInit);
3500
+ };
3501
+ const response = await tryRefreshAndRetry(doFetch, "/v1/messages");
3502
+ if (!response.ok) {
3503
+ let errorBody = "";
3504
+ try {
3505
+ errorBody = await response.text();
3506
+ } catch {
3507
+ errorBody = "(could not read error body)";
3508
+ }
3509
+ consola.error(`Copilot /v1/messages error: ${response.status} ${errorBody}`);
3510
+ throw new HTTPError("Copilot messages request failed", new Response(errorBody, {
3511
+ status: response.status,
3512
+ statusText: response.statusText,
3513
+ headers: response.headers
3514
+ }));
3515
+ }
3516
+ return response;
3517
+ }
3518
+ /**
3519
+ * Forward an Anthropic count_tokens request to Copilot's native endpoint.
3520
+ * Returns the raw Response.
3521
+ *
3522
+ * `callerSignal` is composed with UPSTREAM_FETCH_TIMEOUT_MS — same pattern
3523
+ * as createMessages.
3524
+ */
3525
+ async function countTokens(body, extraHeaders, callerSignal) {
3526
+ if (!state.copilotToken) throw new Error("Copilot token not found");
3527
+ const url = `${copilotBaseUrl(state)}/v1/messages/count_tokens?beta=true`;
3528
+ consola.debug(`Forwarding to ${url}`);
3529
+ const doFetch = () => {
3530
+ const fetchInit = {
3531
+ method: "POST",
3532
+ headers: buildHeaders(extraHeaders),
3533
+ body
3534
+ };
3535
+ const signals = [];
3536
+ if (UPSTREAM_FETCH_TIMEOUT_MS > 0) signals.push(AbortSignal.timeout(UPSTREAM_FETCH_TIMEOUT_MS));
3537
+ if (callerSignal) signals.push(callerSignal);
3538
+ if (signals.length === 1) fetchInit.signal = signals[0];
3539
+ else if (signals.length > 1) fetchInit.signal = AbortSignal.any(signals);
3540
+ return fetch(url, fetchInit);
3541
+ };
3542
+ const response = await tryRefreshAndRetry(doFetch, "/v1/messages/count_tokens");
3543
+ if (!response.ok) {
3544
+ let errorBody = "";
3545
+ try {
3546
+ errorBody = await response.text();
3547
+ } catch {
3548
+ errorBody = "(could not read error body)";
3549
+ }
3550
+ consola.error(`Copilot count_tokens error: ${response.status} ${errorBody}`);
3551
+ throw new HTTPError("Copilot count_tokens request failed", new Response(errorBody, {
3552
+ status: response.status,
3553
+ statusText: response.statusText,
3554
+ headers: response.headers
3555
+ }));
3556
+ }
3557
+ return response;
3558
+ }
3559
+
3302
3560
  //#endregion
3303
3561
  //#region src/services/copilot/create-responses.ts
3304
3562
  const createResponses = async (payload, modelHeaders, callerSignal) => {
@@ -3360,27 +3618,6 @@ function detectAgentCall(input) {
3360
3618
  const MCP_PROTOCOL_VERSION = "2025-06-18";
3361
3619
  const SERVER_NAME = "github-router-peers";
3362
3620
  const SERVER_VERSION = "1";
3363
- /**
3364
- * Reasoning effort levels accepted by Copilot's /v1/responses (gpt-5.x) and
3365
- * /v1/chat/completions endpoints. Per the proxy's existing thinking-mode
3366
- * translator (CLAUDE.md "Thinking-mode translation"), Copilot's adaptive-
3367
- * thinking path uses these same buckets:
3368
- * <2k tokens → low, <8k → medium, <24k → high, else → xhigh.
3369
- *
3370
- * Default `high` for peer reviews — adversarial-by-design but still cost-
3371
- * conscious. Callers can pass `xhigh` explicitly for deep dives, or `medium`
3372
- * for quick sanity checks.
3373
- */
3374
- const EFFORT_LEVELS = [
3375
- "low",
3376
- "medium",
3377
- "high",
3378
- "xhigh"
3379
- ];
3380
- const DEFAULT_EFFORT = "high";
3381
- function isEffort(v) {
3382
- return typeof v === "string" && EFFORT_LEVELS.includes(v);
3383
- }
3384
3621
  /** Bounded concurrency. Originally capped at 2 (commit 4317a25) as a defensive
3385
3622
  * pre-launch guess against Opus's natural pattern of fanning out to all three
3386
3623
  * critics at once. Raised to 8 (Phase 2D of the peer-MCP plan) so the
@@ -3485,10 +3722,10 @@ function geminiAvailable() {
3485
3722
  return models.some((m) => /^gemini-3\..*pro/i.test(m.id));
3486
3723
  }
3487
3724
  function activePersonas() {
3488
- return PERSONAS_READ.filter((p) => !p.requiresHttp || geminiAvailable());
3725
+ return PERSONAS_READ.filter((p) => !p.requiresGeminiCatalog || geminiAvailable());
3489
3726
  }
3490
3727
  function toolEntries() {
3491
- return activePersonas().map((p) => ({
3728
+ const personaEntries = activePersonas().map((p) => ({
3492
3729
  name: p.toolNameHttp,
3493
3730
  description: p.description,
3494
3731
  inputSchema: {
@@ -3506,12 +3743,18 @@ function toolEntries() {
3506
3743
  },
3507
3744
  effort: {
3508
3745
  type: "string",
3509
- enum: [...EFFORT_LEVELS],
3510
- description: `Reasoning depth (low | medium | high | xhigh). Default "${DEFAULT_EFFORT}". Use 'xhigh' for explicit deep dives where you want maximum reasoning. Use 'medium' for quick sanity checks. Note: for non-OpenAI models routed via /v1/chat/completions (gemini-3.x), the upstream may silently ignore this knob.`
3746
+ enum: [...p.allowedEfforts],
3747
+ description: `Reasoning depth (${p.allowedEfforts.join(" | ")}). Default "${p.defaultEffort}". Higher tiers cost more wall-clock; lower tiers are quicker sanity checks. ` + (p.endpoint === "/v1/chat/completions" ? "Note: for gemini routed via /v1/chat/completions, the upstream may silently ignore this knob." : "")
3511
3748
  }
3512
3749
  }
3513
3750
  }
3514
3751
  }));
3752
+ const nonPersonaEntries = NON_PERSONA_MCP_TOOLS.map((t) => ({
3753
+ name: t.toolNameHttp,
3754
+ description: t.description,
3755
+ inputSchema: t.inputSchema
3756
+ }));
3757
+ return [...personaEntries, ...nonPersonaEntries];
3515
3758
  }
3516
3759
  function buildUserText(prompt, context) {
3517
3760
  if (!context) return prompt;
@@ -3539,6 +3782,11 @@ function extractChatCompletionText(response) {
3539
3782
  const c = choice.message?.content;
3540
3783
  return typeof c === "string" ? c : "";
3541
3784
  }
3785
+ function extractMessagesText(response) {
3786
+ const out = [];
3787
+ for (const block of response.content ?? []) if (block.type === "text" && typeof block.text === "string") out.push(block.text);
3788
+ return out.join("");
3789
+ }
3542
3790
  function toolError(message) {
3543
3791
  return {
3544
3792
  content: [{
@@ -3548,6 +3796,94 @@ function toolError(message) {
3548
3796
  isError: true
3549
3797
  };
3550
3798
  }
3799
+ /**
3800
+ * Empirical pre-flight cap to convert "would-bust-the-60s-MCP-ceiling"
3801
+ * calls into fast actionable errors instead of slot-leaking timeouts.
3802
+ *
3803
+ * Probed live against Copilot 2026-05-14:
3804
+ * gpt-5.5 high on a ~600B prompt = 23.8s → ~76s on 8KB (rough linear)
3805
+ * gpt-5.3-codex high on ~600B = 16.0s → ~64s on 12KB
3806
+ * claude-opus-4-7 medium (thinking=3000) on a trivial prompt = 22.5s
3807
+ * but model self-paces budget → ~50s+ on a real ~6KB review
3808
+ *
3809
+ * Returns `{tooLong: true, capBytes}` when the (persona, effort, briefBytes)
3810
+ * tuple is empirically predicted to bust the 60s ceiling.
3811
+ *
3812
+ * SCOPE: the cap is JSON-PATH ONLY. Callers (handleMcpPost) MUST gate
3813
+ * the call site by `!acceptsEventStream(...)`. The SSE path
3814
+ * (handleToolsCallSSE) keeps the connection open past the 60s ceiling
3815
+ * via heartbeats — size-based pre-flight rejection there would just
3816
+ * lock SSE clients out of their primary advantage. JSON-path clients
3817
+ * (raw curl with `Accept: application/json`, older MCP clients without
3818
+ * SSE awareness) DO still hit the underlying tools/call timer, so the
3819
+ * cap is the only way to surface a fast actionable error there
3820
+ * instead of a slot-leaking timeout.
3821
+ *
3822
+ * INVARIANT: pre-flight MUST fire BEFORE inFlightToolsCall++ — the
3823
+ * slot must not be acquired for a rejected pre-flight. handleMcpPost
3824
+ * runs the check before delegating to handleRpc → handleToolsCall (the
3825
+ * function that increments the counter). Documented in CLAUDE.md.
3826
+ *
3827
+ * gemini_critic has no cap (long-context model + Copilot may auto-pace).
3828
+ */
3829
+ const PRE_FLIGHT_CAPS = [
3830
+ {
3831
+ toolName: "codex_critic",
3832
+ effort: "high",
3833
+ maxBriefBytes: 8 * 1024
3834
+ },
3835
+ {
3836
+ toolName: "codex_reviewer",
3837
+ effort: "high",
3838
+ maxBriefBytes: 12 * 1024
3839
+ },
3840
+ {
3841
+ toolName: "opus_critic",
3842
+ effort: "medium",
3843
+ maxBriefBytes: 6 * 1024
3844
+ }
3845
+ ];
3846
+ function predictedTooLong(persona, effort, briefBytes) {
3847
+ for (const cap of PRE_FLIGHT_CAPS) if (cap.toolName === persona.toolNameHttp && cap.effort === effort && briefBytes > cap.maxBriefBytes) return {
3848
+ tooLong: true,
3849
+ capBytes: cap.maxBriefBytes
3850
+ };
3851
+ return { tooLong: false };
3852
+ }
3853
+ /**
3854
+ * JSON-path pre-flight predictedTooLong gate. Returns a JSON-RPC result
3855
+ * body wrapping a tool-error envelope when the call would bust the 60s
3856
+ * tools/call ceiling on the JSON path; returns undefined when the call
3857
+ * should proceed normally.
3858
+ *
3859
+ * Skips the check (returns undefined) for any shape problem so
3860
+ * handleRpc can return the canonical JSON-RPC error code instead:
3861
+ * - notification (no id) → handleRpc returns 202 + empty body
3862
+ * - missing/unknown name → handleRpc returns -32601
3863
+ * - missing prompt → handleRpc returns -32602
3864
+ * - invalid effort string → handleRpc returns -32602
3865
+ * - effort not in persona.allowedEfforts → handleRpc returns -32602
3866
+ */
3867
+ function jsonPathPreflightCap(body) {
3868
+ if (body.id === void 0) return void 0;
3869
+ const params = body.params ?? {};
3870
+ const name$1 = typeof params.name === "string" ? params.name : "";
3871
+ const args = params.arguments ?? {};
3872
+ const prompt = typeof args.prompt === "string" ? args.prompt : "";
3873
+ const context = typeof args.context === "string" ? args.context : void 0;
3874
+ const rawEffort = args.effort;
3875
+ if (!name$1 || !prompt) return void 0;
3876
+ const persona = activePersonas().find((p) => p.toolNameHttp === name$1);
3877
+ if (!persona) return void 0;
3878
+ if (rawEffort !== void 0 && !isEffort(rawEffort)) return void 0;
3879
+ const effortMaybe = rawEffort;
3880
+ if (effortMaybe !== void 0 && !persona.allowedEfforts.includes(effortMaybe)) return;
3881
+ const effort = effortMaybe ?? persona.defaultEffort;
3882
+ const briefBytes = Buffer.byteLength(buildUserText(prompt, context), "utf8");
3883
+ const verdict = predictedTooLong(persona, effort, briefBytes);
3884
+ if (!verdict.tooLong) return void 0;
3885
+ return rpcResult(body.id, toolError(`pre-flight rejected: ${persona.toolNameHttp} at effort=${effort} on a ${briefBytes}-byte brief is empirically predicted to exceed the JSON tools/call timeout (cap=${verdict.capBytes} bytes for this tier). Either drop to a lower effort tier, split the brief into 2-4 parallel sub-calls per the decomposition guidance, or send Accept: text/event-stream to use the SSE path which bypasses this cap.`));
3886
+ }
3551
3887
  async function callPersona(persona, prompt, context, effort, signal) {
3552
3888
  const resolvedModel = resolveModel(persona.model);
3553
3889
  const userText = buildUserText(prompt, context);
@@ -3571,6 +3907,25 @@ async function callPersona(persona, prompt, context, effort, signal) {
3571
3907
  text: text$1
3572
3908
  }] };
3573
3909
  }
3910
+ if (persona.endpoint === "/v1/messages") {
3911
+ const maxTokens = effort === "low" ? 4096 : effort === "medium" ? 8192 : effort === "high" ? 16384 : 32768;
3912
+ const text$1 = extractMessagesText(await (await createMessages(JSON.stringify({
3913
+ model: resolvedModel,
3914
+ max_tokens: maxTokens,
3915
+ system: persona.baseInstructions,
3916
+ thinking: { type: "adaptive" },
3917
+ output_config: { effort },
3918
+ messages: [{
3919
+ role: "user",
3920
+ content: userText
3921
+ }]
3922
+ }), void 0, signal)).json());
3923
+ if (!text$1) return toolError(`persona ${persona.agentName}: empty assistant output`);
3924
+ return { content: [{
3925
+ type: "text",
3926
+ text: text$1
3927
+ }] };
3928
+ }
3574
3929
  const text = extractChatCompletionText(await createChatCompletions({
3575
3930
  model: resolvedModel,
3576
3931
  messages: [{
@@ -3604,17 +3959,23 @@ async function handleToolsCall(body) {
3604
3959
  const params = body.params ?? {};
3605
3960
  const name$1 = typeof params.name === "string" ? params.name : "";
3606
3961
  const args = params.arguments ?? {};
3607
- const prompt = typeof args.prompt === "string" ? args.prompt : "";
3608
- const context = typeof args.context === "string" ? args.context : void 0;
3609
- let effort = DEFAULT_EFFORT;
3610
- if (args.effort !== void 0) {
3611
- if (!isEffort(args.effort)) return rpcError(body.id, RPC_INVALID_PARAMS, `tools/call: arguments.effort must be one of ${EFFORT_LEVELS.join("|")}; got ${JSON.stringify(args.effort)}`);
3612
- effort = args.effort;
3613
- }
3614
3962
  if (!name$1) return rpcError(body.id, RPC_INVALID_PARAMS, "tools/call missing name");
3615
3963
  const persona = activePersonas().find((p) => p.toolNameHttp === name$1);
3616
- if (!persona) return rpcError(body.id, RPC_METHOD_NOT_FOUND, `tools/call: unknown tool "${name$1}"`);
3617
- if (!prompt) return rpcError(body.id, RPC_INVALID_PARAMS, `tools/call: arguments.prompt is required`);
3964
+ const nonPersonaTool = persona ? void 0 : NON_PERSONA_MCP_TOOLS.find((t) => t.toolNameHttp === name$1);
3965
+ if (!persona && !nonPersonaTool) return rpcError(body.id, RPC_METHOD_NOT_FOUND, `tools/call: unknown tool "${name$1}"`);
3966
+ let personaPrompt;
3967
+ let personaContext;
3968
+ let personaEffort;
3969
+ if (persona) {
3970
+ if (args.effort !== void 0 && !isEffort(args.effort)) return rpcError(body.id, RPC_INVALID_PARAMS, `tools/call: arguments.effort must be one of ${EFFORT_LEVELS.join("|")}; got ${JSON.stringify(args.effort)}`);
3971
+ const requestedEffort = args.effort;
3972
+ const prompt = typeof args.prompt === "string" ? args.prompt : "";
3973
+ if (!prompt) return rpcError(body.id, RPC_INVALID_PARAMS, `tools/call: arguments.prompt is required`);
3974
+ personaPrompt = prompt;
3975
+ personaContext = typeof args.context === "string" ? args.context : void 0;
3976
+ if (requestedEffort !== void 0 && !persona.allowedEfforts.includes(requestedEffort)) return rpcError(body.id, RPC_INVALID_PARAMS, `tools/call: persona "${persona.toolNameHttp}" does not accept effort="${requestedEffort}". Allowed: ${persona.allowedEfforts.join("|")}.`);
3977
+ personaEffort = requestedEffort ?? persona.defaultEffort;
3978
+ }
3618
3979
  if (inFlightToolsCall >= MAX_INFLIGHT_TOOLS_CALL) return rpcResult(body.id, {
3619
3980
  content: [{
3620
3981
  type: "text",
@@ -3630,11 +3991,13 @@ async function handleToolsCall(body) {
3630
3991
  aborter = new AbortController();
3631
3992
  inflightAborts.set(abortKey, aborter);
3632
3993
  }
3994
+ const telemetryName = persona ? persona.agentName : nonPersonaTool.toolNameHttp;
3995
+ const telemetryModel = persona ? persona.model : "(non-persona)";
3633
3996
  try {
3634
- const result = await callPersona(persona, prompt, context, effort, aborter?.signal);
3997
+ const result = persona ? await callPersona(persona, personaPrompt, personaContext, personaEffort, aborter?.signal) : await nonPersonaTool.handler(args, aborter?.signal);
3635
3998
  logTelemetry({
3636
- name: persona.agentName,
3637
- model: persona.model,
3999
+ name: telemetryName,
4000
+ model: telemetryModel,
3638
4001
  durationMs: Date.now() - startedAt,
3639
4002
  result: result.isError ? "isError" : "ok"
3640
4003
  });
@@ -3642,8 +4005,8 @@ async function handleToolsCall(body) {
3642
4005
  } catch (err) {
3643
4006
  const message = err instanceof Error ? err.message : String(err);
3644
4007
  logTelemetry({
3645
- name: persona.agentName,
3646
- model: persona.model,
4008
+ name: telemetryName,
4009
+ model: telemetryModel,
3647
4010
  durationMs: Date.now() - startedAt,
3648
4011
  result: "exception",
3649
4012
  errorMessage: message
@@ -3651,7 +4014,7 @@ async function handleToolsCall(body) {
3651
4014
  return rpcResult(body.id, {
3652
4015
  content: [{
3653
4016
  type: "text",
3654
- text: `persona ${persona.agentName} failed: ${message}`
4017
+ text: persona ? `persona ${persona.agentName} failed: ${message}` : `tool ${nonPersonaTool.toolNameHttp} failed: ${message}`
3655
4018
  }],
3656
4019
  isError: true
3657
4020
  });
@@ -3814,6 +4177,11 @@ async function handleMcpPost(c) {
3814
4177
  consola.debug("/mcp parse error:", err);
3815
4178
  return c.json(rpcError(null, RPC_PARSE_ERROR, "request body is not valid JSON"), 200);
3816
4179
  }
4180
+ if (typeof body === "object" && body !== null && !Array.isArray(body) && body.method === "tools/call" && acceptsEventStream(c.req.header("accept"))) return handleToolsCallSSE(body);
4181
+ if (typeof body === "object" && body !== null && !Array.isArray(body) && body.method === "tools/call") {
4182
+ const preflight = jsonPathPreflightCap(body);
4183
+ if (preflight) return c.json(preflight, 200);
4184
+ }
3817
4185
  try {
3818
4186
  const { status, body: respBody } = await handleRpc(c, body);
3819
4187
  if (respBody === null) return c.body(null, status);
@@ -3824,6 +4192,111 @@ async function handleMcpPost(c) {
3824
4192
  return c.json(rpcError(echoId, RPC_INTERNAL_ERROR, err instanceof Error ? err.message : String(err)), 200);
3825
4193
  }
3826
4194
  }
4195
+ /**
4196
+ * Accept-header parsing for MCP Streamable HTTP. Per MCP 2025-06-18
4197
+ * spec, clients send `Accept: application/json, text/event-stream` to
4198
+ * indicate they can consume either response shape. Server picks; for
4199
+ * tools/call we pick SSE because Claude Code's per-tool-call timer
4200
+ * (~60s on v2.1.113+) does not fire on streamed responses.
4201
+ *
4202
+ * Lenient parse: split on commas, strip params (q-values, charset),
4203
+ * trim, lowercase, look for the SSE token. Returns false on undefined
4204
+ * / empty / strict-JSON-only Accept.
4205
+ */
4206
+ function acceptsEventStream(accept) {
4207
+ if (!accept) return false;
4208
+ return accept.toLowerCase().split(",").map((t) => t.split(";")[0].trim()).includes("text/event-stream");
4209
+ }
4210
+ /**
4211
+ * SSE-streamed response for a single tools/call. Delegates the actual
4212
+ * upstream call to `handleToolsCall` (so the per-persona effort gate,
4213
+ * predictedTooLong cap, AbortController registration, telemetry, and
4214
+ * inFlight slot accounting all run identically); wraps the awaited
4215
+ * result in an SSE envelope with periodic heartbeats while the upstream
4216
+ * fetch is in flight.
4217
+ *
4218
+ * SSE event format (per MCP Streamable HTTP):
4219
+ * event: message
4220
+ * data: <json-rpc-2.0 message>\n\n
4221
+ *
4222
+ * - Heartbeats are JSON-RPC `notifications/progress` notifications with
4223
+ * the request id as `progressToken` (per MCP progress-notification spec).
4224
+ * - The final message is the JSON-RPC response envelope returned by
4225
+ * handleToolsCall — same structure as the JSON-path response.
4226
+ * - On consumer cancel (ReadableStream.cancel), the heartbeat interval
4227
+ * is cleared and the inFlight slot's AbortController is signalled
4228
+ * (handleToolsCall observes the abort and returns an error envelope
4229
+ * that we drop unwritten — controller is already closed).
4230
+ *
4231
+ * Per CLAUDE.md "Stream lifecycle" / "The smoking gun" rules: every
4232
+ * controller.enqueue/close is wrapped in a try/catch that swallows the
4233
+ * "Invalid state: Controller is already closed" race without warning.
4234
+ */
4235
+ const SSE_HEARTBEAT_INTERVAL_MS = 5e3;
4236
+ async function handleToolsCallSSE(body) {
4237
+ const encoder = new TextEncoder();
4238
+ const callPromise = handleToolsCall(body);
4239
+ const stream = new ReadableStream({
4240
+ async start(controller) {
4241
+ let closed = false;
4242
+ const safeEnqueue = (chunk) => {
4243
+ if (closed) return;
4244
+ try {
4245
+ controller.enqueue(chunk);
4246
+ } catch (err) {
4247
+ consola.debug("/mcp SSE enqueue after close (expected race):", err);
4248
+ closed = true;
4249
+ }
4250
+ };
4251
+ const safeClose = () => {
4252
+ if (closed) return;
4253
+ closed = true;
4254
+ try {
4255
+ controller.close();
4256
+ } catch (err) {
4257
+ consola.debug("/mcp SSE close after close:", err);
4258
+ }
4259
+ };
4260
+ const sseFrame = (rpcMessage) => encoder.encode(`event: message\ndata: ${JSON.stringify(rpcMessage)}\n\n`);
4261
+ const heartbeatFrame = () => sseFrame({
4262
+ jsonrpc: "2.0",
4263
+ method: "notifications/progress",
4264
+ params: {
4265
+ progressToken: body.id ?? null,
4266
+ progress: 0,
4267
+ message: "in flight"
4268
+ }
4269
+ });
4270
+ safeEnqueue(heartbeatFrame());
4271
+ const heartbeatHandle = setInterval(() => safeEnqueue(heartbeatFrame()), SSE_HEARTBEAT_INTERVAL_MS);
4272
+ try {
4273
+ safeEnqueue(sseFrame(await callPromise));
4274
+ } catch (err) {
4275
+ consola.error("/mcp SSE upstream error:", err);
4276
+ safeEnqueue(sseFrame(rpcError(body.id ?? null, RPC_INTERNAL_ERROR, err instanceof Error ? err.message : String(err))));
4277
+ } finally {
4278
+ clearInterval(heartbeatHandle);
4279
+ safeClose();
4280
+ }
4281
+ },
4282
+ cancel() {
4283
+ const abortKey = body.id !== void 0 && body.id !== null ? body.id : void 0;
4284
+ if (abortKey !== void 0) {
4285
+ const aborter = inflightAborts.get(abortKey);
4286
+ if (aborter) aborter.abort(/* @__PURE__ */ new Error("client disconnected SSE stream"));
4287
+ }
4288
+ }
4289
+ });
4290
+ return new Response(stream, {
4291
+ status: 200,
4292
+ headers: {
4293
+ "Content-Type": "text/event-stream",
4294
+ "Cache-Control": "no-cache, no-transform",
4295
+ "Connection": "keep-alive",
4296
+ "X-Accel-Buffering": "no"
4297
+ }
4298
+ });
4299
+ }
3827
4300
  function handleMcpDelete(c) {
3828
4301
  const auth$1 = checkAuth(c);
3829
4302
  if (!auth$1.ok) return c.json(rpcError(null, RPC_INVALID_REQUEST, auth$1.reason), auth$1.status);
@@ -3848,108 +4321,6 @@ mcpRoutes.delete("/", (c) => {
3848
4321
  }
3849
4322
  });
3850
4323
 
3851
- //#endregion
3852
- //#region src/services/copilot/create-messages.ts
3853
- /**
3854
- * Build headers that match what VS Code Copilot Chat sends to the Copilot API.
3855
- *
3856
- * copilotHeaders() provides: Authorization, content-type, copilot-integration-id,
3857
- * editor-version, editor-plugin-version, user-agent, openai-intent,
3858
- * x-github-api-version, x-request-id, x-vscode-user-agent-library-version.
3859
- *
3860
- * We add the remaining headers VS Code sends for /v1/messages:
3861
- * - X-Initiator (VS Code sets dynamically; "agent" is safe for CLI use)
3862
- * - anthropic-version (VS Code's Anthropic SDK sends this)
3863
- * - X-Interaction-Id (VS Code sends a session-scoped UUID)
3864
- *
3865
- * We intentionally omit copilot-vision-request — VS Code only sends it when
3866
- * images are present, and the native /v1/messages endpoint handles vision
3867
- * without requiring the header.
3868
- *
3869
- * extraHeaders allows callers to forward client-supplied beta headers
3870
- * (anthropic-beta) so Copilot enables extended features.
3871
- */
3872
- function buildHeaders(extraHeaders) {
3873
- return {
3874
- ...copilotHeaders(state),
3875
- accept: "application/json",
3876
- "openai-intent": "messages-proxy",
3877
- "x-interaction-type": "conversation-agent",
3878
- "X-Initiator": "agent",
3879
- "anthropic-version": "2023-06-01",
3880
- "X-Interaction-Id": randomUUID(),
3881
- ...extraHeaders
3882
- };
3883
- }
3884
- /**
3885
- * Forward an Anthropic Messages API request to Copilot's native /v1/messages endpoint.
3886
- * Returns the raw Response so callers can handle streaming vs non-streaming.
3887
- */
3888
- async function createMessages(body, extraHeaders) {
3889
- if (!state.copilotToken) throw new Error("Copilot token not found");
3890
- const url = `${copilotBaseUrl(state)}/v1/messages?beta=true`;
3891
- consola.debug(`Forwarding to ${url}`);
3892
- const doFetch = () => {
3893
- const fetchInit = {
3894
- method: "POST",
3895
- headers: buildHeaders(extraHeaders),
3896
- body
3897
- };
3898
- if (UPSTREAM_FETCH_TIMEOUT_MS > 0) fetchInit.signal = AbortSignal.timeout(UPSTREAM_FETCH_TIMEOUT_MS);
3899
- return fetch(url, fetchInit);
3900
- };
3901
- const response = await tryRefreshAndRetry(doFetch, "/v1/messages");
3902
- if (!response.ok) {
3903
- let errorBody = "";
3904
- try {
3905
- errorBody = await response.text();
3906
- } catch {
3907
- errorBody = "(could not read error body)";
3908
- }
3909
- consola.error(`Copilot /v1/messages error: ${response.status} ${errorBody}`);
3910
- throw new HTTPError("Copilot messages request failed", new Response(errorBody, {
3911
- status: response.status,
3912
- statusText: response.statusText,
3913
- headers: response.headers
3914
- }));
3915
- }
3916
- return response;
3917
- }
3918
- /**
3919
- * Forward an Anthropic count_tokens request to Copilot's native endpoint.
3920
- * Returns the raw Response.
3921
- */
3922
- async function countTokens(body, extraHeaders) {
3923
- if (!state.copilotToken) throw new Error("Copilot token not found");
3924
- const url = `${copilotBaseUrl(state)}/v1/messages/count_tokens?beta=true`;
3925
- consola.debug(`Forwarding to ${url}`);
3926
- const doFetch = () => {
3927
- const fetchInit = {
3928
- method: "POST",
3929
- headers: buildHeaders(extraHeaders),
3930
- body
3931
- };
3932
- if (UPSTREAM_FETCH_TIMEOUT_MS > 0) fetchInit.signal = AbortSignal.timeout(UPSTREAM_FETCH_TIMEOUT_MS);
3933
- return fetch(url, fetchInit);
3934
- };
3935
- const response = await tryRefreshAndRetry(doFetch, "/v1/messages/count_tokens");
3936
- if (!response.ok) {
3937
- let errorBody = "";
3938
- try {
3939
- errorBody = await response.text();
3940
- } catch {
3941
- errorBody = "(could not read error body)";
3942
- }
3943
- consola.error(`Copilot count_tokens error: ${response.status} ${errorBody}`);
3944
- throw new HTTPError("Copilot count_tokens request failed", new Response(errorBody, {
3945
- status: response.status,
3946
- statusText: response.statusText,
3947
- headers: response.headers
3948
- }));
3949
- }
3950
- return response;
3951
- }
3952
-
3953
4324
  //#endregion
3954
4325
  //#region src/services/advisor/advisor.ts
3955
4326
  const ENCODER$1 = new TextEncoder();
@@ -5003,7 +5374,7 @@ async function handleCompletion(c) {
5003
5374
  type: "error",
5004
5375
  error: {
5005
5376
  type: "invalid_request_error",
5006
- message: "Inline `mcp_servers` body field is not supported by github-router (Copilot returns 400 'Extra inputs are not permitted'; the proxy would need a multi-turn tool-loop translation that has unresolved design holes — see Phase G in the plan). Configure your remote MCP servers as local stdio entries in `~/.claude/mcp.json` instead — Claude Code will spawn them locally and the proxy passes their tool calls through transparently. (https://docs.claude.com/en/docs/claude-code/mcp)"
5377
+ message: "Inline `mcp_servers` body field is not supported by github-router. Configure remote MCP servers as local stdio entries in `~/.claude/mcp.json` instead — Claude Code will spawn them locally and the proxy passes their tool calls through transparently. (https://docs.claude.com/en/docs/claude-code/mcp)"
5007
5378
  }
5008
5379
  }, 400);
5009
5380
  } catch {}
@@ -5971,6 +6342,7 @@ function getClaudeCodeEnvVars(serverUrl, model) {
5971
6342
  ANTHROPIC_BASE_URL: serverUrl,
5972
6343
  CLAUDE_CONFIG_DIR: PATHS.CLAUDE_CONFIG_DIR,
5973
6344
  MCP_TIMEOUT: "600000",
6345
+ MCP_TOOL_TIMEOUT: "600000",
5974
6346
  DISABLE_NON_ESSENTIAL_MODEL_CALLS: "1",
5975
6347
  CLAUDE_CODE_DISABLE_NONESSENTIAL_TRAFFIC: "1",
5976
6348
  DISABLE_TELEMETRY: "1"