github-router 0.3.22 → 0.3.23

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/main.js CHANGED
@@ -8,6 +8,8 @@ import path from "node:path";
8
8
  import process$1 from "node:process";
9
9
  import { execFile, execFileSync, spawn } from "node:child_process";
10
10
  import { promisify } from "node:util";
11
+ import { events } from "fetch-event-stream";
12
+ import { z } from "zod";
11
13
  import fs$1 from "node:fs";
12
14
  import { Writable } from "node:stream";
13
15
  import { serve } from "srvx";
@@ -15,8 +17,6 @@ import { getProxyForUrl } from "proxy-from-env";
15
17
  import { Agent, ProxyAgent, setGlobalDispatcher } from "undici";
16
18
  import { Hono } from "hono";
17
19
  import { cors } from "hono/cors";
18
- import { events } from "fetch-event-stream";
19
- import { z } from "zod";
20
20
  import clipboard from "clipboardy";
21
21
 
22
22
  //#region src/lib/paths.ts
@@ -1643,8 +1643,206 @@ function launchChild(target, server$1, options = {}) {
1643
1643
  });
1644
1644
  }
1645
1645
 
1646
+ //#endregion
1647
+ //#region src/services/copilot/web-search.ts
1648
+ const RpcSchema = z.object({
1649
+ jsonrpc: z.literal("2.0"),
1650
+ id: z.number().optional(),
1651
+ result: z.object({
1652
+ content: z.array(z.object({
1653
+ type: z.literal("text"),
1654
+ text: z.string()
1655
+ })).optional(),
1656
+ isError: z.boolean().optional()
1657
+ }).optional(),
1658
+ error: z.object({
1659
+ code: z.number(),
1660
+ message: z.string()
1661
+ }).optional()
1662
+ });
1663
+ const InnerSchema = z.object({
1664
+ text: z.object({
1665
+ value: z.string(),
1666
+ annotations: z.array(z.object({ url_citation: z.object({
1667
+ title: z.string(),
1668
+ url: z.string()
1669
+ }).optional() })).nullable().optional()
1670
+ }),
1671
+ bing_searches: z.array(z.unknown()).nullable().optional()
1672
+ });
1673
+ const MAX_SEARCHES_PER_SECOND = 3;
1674
+ let searchTimestamps = [];
1675
+ let throttleChain = Promise.resolve();
1676
+ async function throttleSearch() {
1677
+ const myTurn = throttleChain.then(async () => {
1678
+ const now = Date.now();
1679
+ searchTimestamps = searchTimestamps.filter((t) => now - t < 1e3);
1680
+ if (searchTimestamps.length >= MAX_SEARCHES_PER_SECOND) {
1681
+ const waitMs = 1e3 - (now - searchTimestamps[0]);
1682
+ if (waitMs > 0) {
1683
+ consola.debug(`Web search rate limited, waiting ${waitMs}ms`);
1684
+ await sleep(waitMs);
1685
+ }
1686
+ }
1687
+ searchTimestamps.push(Date.now());
1688
+ });
1689
+ throttleChain = myTurn.catch(() => {});
1690
+ return myTurn;
1691
+ }
1692
+ function mcpHeaders(sid) {
1693
+ if (!state.githubToken) throw new Error("GitHub token missing — re-run auth flow. Web search uses the GitHub PAT (not the Copilot token); the on-disk token at ~/.local/share/github-router/github_token must be present.");
1694
+ const headers = {
1695
+ Authorization: `Bearer ${state.githubToken}`,
1696
+ "content-type": "application/json",
1697
+ accept: "application/json, text/event-stream",
1698
+ "X-MCP-Host": "copilot-cli",
1699
+ "X-MCP-Toolsets": "web_search",
1700
+ "Mcp-Protocol-Version": "2025-06-18",
1701
+ "user-agent": `GitHubCopilotChat/${copilotVersion(state)}`
1702
+ };
1703
+ if (sid) headers["Mcp-Session-Id"] = sid;
1704
+ return headers;
1705
+ }
1706
+ async function postMcp(body, sid, retry = true) {
1707
+ const url = `${copilotBaseUrl(state)}/mcp`;
1708
+ const res = await fetch(url, {
1709
+ method: "POST",
1710
+ headers: mcpHeaders(sid),
1711
+ body: JSON.stringify(body)
1712
+ });
1713
+ if (!res.ok && retry && res.status >= 500) {
1714
+ await sleep(500);
1715
+ return postMcp(body, sid, false);
1716
+ }
1717
+ return res;
1718
+ }
1719
+ async function searchWeb(query) {
1720
+ await throttleSearch();
1721
+ consola.info(`Web search (MCP): "${query.slice(0, 80)}"`);
1722
+ const callId = Math.floor(Math.random() * 1e9);
1723
+ let sid;
1724
+ try {
1725
+ const initRes = await postMcp({
1726
+ jsonrpc: "2.0",
1727
+ id: 1,
1728
+ method: "initialize",
1729
+ params: {
1730
+ protocolVersion: "2024-11-05",
1731
+ capabilities: {},
1732
+ clientInfo: {
1733
+ name: "GitHubCopilotChat",
1734
+ version: copilotVersion(state)
1735
+ }
1736
+ }
1737
+ });
1738
+ if (!initRes.ok) {
1739
+ consola.error("MCP initialize failed", initRes.status);
1740
+ throw new HTTPError("MCP initialize failed", initRes);
1741
+ }
1742
+ sid = initRes.headers.get("mcp-session-id") ?? void 0;
1743
+ if (!sid) throw new HTTPError("MCP initialize: missing Mcp-Session-Id header", initRes);
1744
+ const notifRes = await postMcp({
1745
+ jsonrpc: "2.0",
1746
+ method: "notifications/initialized"
1747
+ }, sid);
1748
+ if (!notifRes.ok && notifRes.status !== 202) {
1749
+ consola.error("MCP notifications/initialized failed", notifRes.status);
1750
+ throw new HTTPError("MCP notifications/initialized failed", notifRes);
1751
+ }
1752
+ const callRes = await postMcp({
1753
+ jsonrpc: "2.0",
1754
+ id: callId,
1755
+ method: "tools/call",
1756
+ params: {
1757
+ name: "web_search",
1758
+ arguments: { query }
1759
+ }
1760
+ }, sid);
1761
+ if (!callRes.ok) {
1762
+ consola.error("MCP tools/call failed", callRes.status);
1763
+ throw new HTTPError("MCP tools/call failed", callRes);
1764
+ }
1765
+ let rpc;
1766
+ for await (const ev of events(callRes)) {
1767
+ if (!ev.data) continue;
1768
+ let parsedJson;
1769
+ try {
1770
+ parsedJson = JSON.parse(ev.data);
1771
+ } catch {
1772
+ continue;
1773
+ }
1774
+ const parsed = RpcSchema.safeParse(parsedJson);
1775
+ if (parsed.success && parsed.data.id === callId) {
1776
+ rpc = parsed.data;
1777
+ break;
1778
+ }
1779
+ }
1780
+ if (!rpc) throw new HTTPError("MCP tools/call: no matching response id in SSE stream", callRes);
1781
+ if (rpc.error) throw new HTTPError(`MCP error ${rpc.error.code}: ${rpc.error.message}`, callRes);
1782
+ if (rpc.result?.isError) throw new HTTPError("MCP web_search tool error", callRes);
1783
+ const text = rpc.result?.content?.[0]?.text;
1784
+ if (!text) throw new HTTPError("MCP web_search: empty content", callRes);
1785
+ let innerRaw;
1786
+ try {
1787
+ innerRaw = JSON.parse(text);
1788
+ } catch (err) {
1789
+ throw new HTTPError(`MCP web_search: inner content not JSON: ${err instanceof Error ? err.message : String(err)}`, callRes);
1790
+ }
1791
+ const innerParsed = InnerSchema.safeParse(innerRaw);
1792
+ if (!innerParsed.success) throw new HTTPError(`MCP web_search: inner content shape changed (${innerParsed.error.issues.map((i) => `${i.path.join(".")}: ${i.message}`).join("; ")})`, callRes);
1793
+ const inner = innerParsed.data;
1794
+ const references = [];
1795
+ for (const ann of inner.text.annotations ?? []) {
1796
+ const cite = ann.url_citation;
1797
+ if (cite && !cite.url.toLowerCase().includes("bing.com/search")) references.push({
1798
+ title: cite.title,
1799
+ url: cite.url
1800
+ });
1801
+ }
1802
+ consola.debug(`Web search returned ${references.length} references`);
1803
+ return {
1804
+ content: inner.text.value,
1805
+ references
1806
+ };
1807
+ } finally {
1808
+ if (sid) try {
1809
+ fetch(`${copilotBaseUrl(state)}/mcp`, {
1810
+ method: "DELETE",
1811
+ headers: mcpHeaders(sid)
1812
+ }).catch(() => {});
1813
+ } catch {}
1814
+ }
1815
+ }
1816
+
1646
1817
  //#endregion
1647
1818
  //#region src/lib/peer-mcp-personas.ts
1819
+ /**
1820
+ * Reasoning effort levels accepted by Copilot's /v1/responses (gpt-5.x) and
1821
+ * /v1/chat/completions endpoints. Per the proxy's existing thinking-mode
1822
+ * translator (CLAUDE.md "Thinking-mode translation"), Copilot's adaptive-
1823
+ * thinking path uses these same buckets:
1824
+ * <2k tokens → low, <8k → medium, <24k → high, else → xhigh.
1825
+ *
1826
+ * Per-persona `allowedEfforts` and `defaultEffort` constrain which subset
1827
+ * each persona exposes — enforced in handler.ts:handleToolsCall.
1828
+ *
1829
+ * **xhigh on long-running personas works via SSE-streamed /mcp responses**
1830
+ * (handler.ts:handleToolsCallSSE). Claude Code's MCP HTTP client honors
1831
+ * `text/event-stream` responses without applying the ~60s per-tool-call
1832
+ * timer that previously broke xhigh on gpt-5.5 (~56s wall) and
1833
+ * claude-opus-4-7 (high+ thinking budgets). All four personas now expose
1834
+ * all four effort tiers with `high` default; SSE handles the long tail
1835
+ * transparently to the user.
1836
+ */
1837
+ const EFFORT_LEVELS = [
1838
+ "low",
1839
+ "medium",
1840
+ "high",
1841
+ "xhigh"
1842
+ ];
1843
+ function isEffort(v) {
1844
+ return typeof v === "string" && EFFORT_LEVELS.includes(v);
1845
+ }
1648
1846
  const CRITIC_RUBRIC = `
1649
1847
  Apply this grading rubric:
1650
1848
  - Score 1–5 on three axes:
@@ -1673,7 +1871,7 @@ Self-reminder (read before every reply):
1673
1871
  `.trim();
1674
1872
  const COLD_START_CONTRACT = `
1675
1873
  Cold-start contract for the lead orchestrator (Opus):
1676
- When delegating to me, paste a self-contained brief. I have no access to your scrollback, CLAUDE.md, or the project tree. Always include:
1874
+ When delegating to me, paste a self-contained brief. I have no access to your scrollback, project memory, or the project tree. Always include:
1677
1875
  (a) the artifact under review verbatim (code/diff/plan text),
1678
1876
  (b) the constraints or "done" criteria,
1679
1877
  (c) any prior decisions I should not relitigate.
@@ -1745,39 +1943,87 @@ Reply format (markdown):
1745
1943
 
1746
1944
  Resilience reminder:
1747
1945
  If your session terminates abnormally before "Status: complete", the lead will retry once. On recovery, ask the lead to confirm what's already been done before re-applying changes — duplicate edits are worse than a slow restart.`;
1946
+ const OPUS_CRITIC_BASE = `You are opus-critic, a fresh-context Anthropic-side adversarial reviewer running on Claude Opus 4.7 — the same model and lab as the lead orchestrator that just delegated to you. You are NOT the lead. You did not see the lead's reasoning trace. You only see the brief.
1947
+
1948
+ Your job is to spot what the lead missed because of cognitive momentum, sunk-cost on a plan, or motivated reasoning toward a particular fix. Your blind-spot diversification is LIMITED compared to codex-critic (gpt-5.5) and gemini-critic (gemini-3.1-pro) — same training, same lab, same RLHF priors. Use that honestly: don't pretend to find a different perspective when the obvious read is "the lead got it right." Silence on good work is a valid and welcome answer.
1949
+
1950
+ Sycophancy is the failure mode you exist to fight. Manufactured contrarianism is a different failure of the same shape — do neither.
1951
+
1952
+ ${COLD_START_CONTRACT}
1953
+
1954
+ ${CRITIC_RUBRIC}`;
1748
1955
  const PERSONAS_READ = Object.freeze([
1749
1956
  {
1750
1957
  agentName: "codex-critic",
1751
1958
  toolNameHttp: "codex_critic",
1752
1959
  model: "gpt-5.5",
1753
1960
  endpoint: "/v1/responses",
1754
- description: "Adversarial second opinion on plans, designs, code, or systems-engineering tradeoffs. Backed by gpt-5.5 (OpenAI) — different model, different training data, different blind spots than Opus. Uses a calibrated 1–5 grading rubric and is allowed to reply 'no material objection' on solid artifacts. **CALL BEFORE: ExitPlanMode for any plan involving >2 files or new architecture; finalizing a major design choice; TeamCreate when the team's task is non-trivial.** **CALL AFTER: any commit touching concurrency, security, or streaming code paths.** If the artifact is large (>20 KB), prefer to break it into 2-4 focused batches and call this tool once per batch IN PARALLEL — each call must complete under the Claude Code MCP per-tool-call ceiling (~150s on v2.1.138 per regression #50289), so monolithic large-artifact calls will time out client-side. Aggregate findings yourself. Always pass: (a) the artifact verbatim, (b) the constraints/'done' criteria, (c) any prior decisions. Optionally pass `effort: 'xhigh'` for explicit deep dives or `effort: 'medium'` for quick sanity checks (default 'high'). The subagent has no access to your scrollback or CLAUDE.md.",
1961
+ description: "Adversarial second opinion on plans, designs, or code tradeoffs. Backed by gpt-5.5 (OpenAI) — different lab than Opus. Pass artifact verbatim.",
1755
1962
  baseInstructions: CRITIC_BASE,
1756
1963
  agentPrompt: "",
1757
1964
  writeCapable: false,
1758
- requiresHttp: false
1965
+ requiresHttp: false,
1966
+ allowedEfforts: [
1967
+ "low",
1968
+ "medium",
1969
+ "high",
1970
+ "xhigh"
1971
+ ],
1972
+ defaultEffort: "xhigh"
1759
1973
  },
1760
1974
  {
1761
1975
  agentName: "gemini-critic",
1762
1976
  toolNameHttp: "gemini_critic",
1763
1977
  model: "gemini-3.1-pro-preview",
1764
1978
  endpoint: "/v1/chat/completions",
1765
- description: "Adversarial second opinion from a different lab. Backed by gemini-3.1-pro-preview (Google) — different training data and RLHF priors than Opus AND codex-critic, the strongest blind-spot-buster when the lead wants triangulation across three labs. Use for long-context artifacts (>50k tokens), math/proof-shaped reasoning, or as a tie-breaker after codex-critic has weighed in. **CALL BEFORE: ExitPlanMode for plans where Opus + codex-critic agree (use as triangulation); finalizing irreversible architectural choices.** **CALL AFTER: commits where you want a third-lab cross-check.** If the artifact is large (>100 KB), prefer to break into batches and call in parallel — gemini handles long context well but each per-call MCP wait is still bounded (~150s on v2.1.138). Always pass: (a) the artifact verbatim, (b) the constraints/'done' criteria, (c) any prior decisions. The `effort` parameter is forwarded but may be silently ignored by Copilot's gemini route — gemini-3.x reasoning is largely auto-applied. The subagent has no access to your scrollback or CLAUDE.md.",
1979
+ description: "Adversarial second opinion. Backed by gemini-3.1-pro (Google) — third-lab triangulation, strong on long-context and formal reasoning. Pass artifact verbatim.",
1766
1980
  baseInstructions: GEMINI_CRITIC_BASE,
1767
1981
  agentPrompt: "",
1768
1982
  writeCapable: false,
1769
- requiresHttp: true
1983
+ requiresHttp: true,
1984
+ requiresGeminiCatalog: true,
1985
+ allowedEfforts: [
1986
+ "low",
1987
+ "medium",
1988
+ "high"
1989
+ ],
1990
+ defaultEffort: "high"
1770
1991
  },
1771
1992
  {
1772
1993
  agentName: "codex-reviewer",
1773
1994
  toolNameHttp: "codex_reviewer",
1774
1995
  model: "gpt-5.3-codex",
1775
1996
  endpoint: "/v1/responses",
1776
- description: "Line-level code review of a specific diff or file. Backed by gpt-5.3-codex (OpenAI) — the code-specialist sibling of gpt-5.5, trained heavily on code-review datasets so it catches different bugs than Opus. Prefer over codex-critic when the artifact is a concrete diff or single file (codex-critic is for plans/designs). **CALL AFTER: any non-trivial commit (>50 lines OR touching critical paths: streaming, auth, concurrency, persistence, security).** **CALL BEFORE: opening a PR or pushing changes a peer would review.** For diffs >20 KB, split by file-group and call once per group in parallel — each per-call wait is bounded (~150s on v2.1.138). Always pass: (a) the diff or file verbatim, (b) the change's intent, (c) test status. Optionally pass `effort: 'xhigh'` when reviewing security-critical code, `effort: 'medium'` for routine reviews (default 'high'). The subagent has no access to your scrollback or CLAUDE.md.",
1997
+ description: "Line-level review of a concrete diff or single file. Backed by gpt-5.3-codex (OpenAI) — code-specialist, narrow-scope. Pass artifact verbatim.",
1777
1998
  baseInstructions: REVIEWER_BASE,
1778
1999
  agentPrompt: "",
1779
2000
  writeCapable: false,
1780
- requiresHttp: false
2001
+ requiresHttp: false,
2002
+ allowedEfforts: [
2003
+ "low",
2004
+ "medium",
2005
+ "high",
2006
+ "xhigh"
2007
+ ],
2008
+ defaultEffort: "xhigh"
2009
+ },
2010
+ {
2011
+ agentName: "opus-critic",
2012
+ toolNameHttp: "opus_critic",
2013
+ model: "claude-opus-4-7",
2014
+ endpoint: "/v1/messages",
2015
+ description: "Adversarial second opinion from a fresh-context Opus 4.7 — cheap same-lab sanity check. Pass artifact verbatim.",
2016
+ baseInstructions: OPUS_CRITIC_BASE,
2017
+ agentPrompt: "",
2018
+ writeCapable: false,
2019
+ requiresHttp: true,
2020
+ allowedEfforts: [
2021
+ "low",
2022
+ "medium",
2023
+ "high",
2024
+ "xhigh"
2025
+ ],
2026
+ defaultEffort: "xhigh"
1781
2027
  }
1782
2028
  ]);
1783
2029
  const PERSONAS_WRITE = Object.freeze([{
@@ -1785,11 +2031,18 @@ const PERSONAS_WRITE = Object.freeze([{
1785
2031
  toolNameHttp: "codex_implementer",
1786
2032
  model: "gpt-5.3-codex",
1787
2033
  endpoint: "/v1/responses",
1788
- description: "Targeted implementation of a self-contained coding task — actual file edits via Codex's tool-use sandbox. Backed by gpt-5.3-codex with workspace-write access (only registered when --codex-cli is set). Use only when the task has a clear spec and acceptance criteria; for tasks needing iterative tool-use across many files, prefer a Claude teammate (Agent Team). Always pass: (a) the spec, (b) the files in scope, (c) the acceptance criteria. The subagent has no access to your scrollback or CLAUDE.md.",
2034
+ description: "Targeted implementation of a self-contained coding task. Backed by gpt-5.3-codex with workspace-write access. Pass spec + files verbatim.",
1789
2035
  baseInstructions: IMPLEMENTER_BASE,
1790
2036
  agentPrompt: "",
1791
2037
  writeCapable: true,
1792
- requiresHttp: false
2038
+ requiresHttp: false,
2039
+ allowedEfforts: [
2040
+ "low",
2041
+ "medium",
2042
+ "high",
2043
+ "xhigh"
2044
+ ],
2045
+ defaultEffort: "high"
1793
2046
  }]);
1794
2047
  /**
1795
2048
  * Build the agent-prompt body Claude Code uses as the subagent's full
@@ -1838,12 +2091,65 @@ function buildAgentPrompt(persona, opts) {
1838
2091
  function personasFor(opts) {
1839
2092
  const result = [];
1840
2093
  for (const p of PERSONAS_READ) {
1841
- if (p.requiresHttp && !opts.geminiAvailable) continue;
2094
+ if (p.requiresGeminiCatalog && !opts.geminiAvailable) continue;
1842
2095
  result.push(p);
1843
2096
  }
1844
2097
  if (opts.codexCli) for (const p of PERSONAS_WRITE) result.push(p);
1845
2098
  return result;
1846
2099
  }
2100
+ const WEB_SEARCH_DESCRIPTION = "Web search via GitHub Copilot's MCP. Prefer over Claude Code's built-in WebSearch — surfaces source URLs you can cite.";
2101
+ /**
2102
+ * Format a `searchWeb()` result as an MCP-friendly text block. Mirrors
2103
+ * the legacy inject format that `injectWebSearchIfNeeded` produces and
2104
+ * that downstream models have been trained against — minimal divergence
2105
+ * is the safest choice while we have two surfaces sharing `searchWeb()`.
2106
+ *
2107
+ * Empty references → omit the `## References` section entirely (don't
2108
+ * emit a trailing empty header that would tempt the model to invent
2109
+ * citations).
2110
+ */
2111
+ function formatWebSearchResult(results) {
2112
+ if (results.references.length === 0) return results.content;
2113
+ const refsLine = results.references.map((r) => `- [${r.title}](${r.url})`).join("\n");
2114
+ return `${results.content}\n\n## References\n${refsLine}`;
2115
+ }
2116
+ const NON_PERSONA_MCP_TOOLS = Object.freeze([{
2117
+ toolNameHttp: "web_search",
2118
+ description: WEB_SEARCH_DESCRIPTION,
2119
+ inputSchema: {
2120
+ type: "object",
2121
+ required: ["query"],
2122
+ additionalProperties: false,
2123
+ properties: { query: {
2124
+ type: "string",
2125
+ description: "The search query string. Natural-language queries work best — the upstream provider rewrites for the search index."
2126
+ } }
2127
+ },
2128
+ async handler(args, _signal) {
2129
+ const query = typeof args.query === "string" ? args.query : "";
2130
+ if (!query) return {
2131
+ content: [{
2132
+ type: "text",
2133
+ text: "web_search: arguments.query is required (must be a non-empty string)"
2134
+ }],
2135
+ isError: true
2136
+ };
2137
+ try {
2138
+ return { content: [{
2139
+ type: "text",
2140
+ text: formatWebSearchResult(await searchWeb(query))
2141
+ }] };
2142
+ } catch (err) {
2143
+ return {
2144
+ content: [{
2145
+ type: "text",
2146
+ text: `web_search failed: ${err instanceof Error ? err.message : String(err)}`
2147
+ }],
2148
+ isError: true
2149
+ };
2150
+ }
2151
+ }
2152
+ }]);
1847
2153
 
1848
2154
  //#endregion
1849
2155
  //#region src/lib/codex-mcp-config.ts
@@ -1914,11 +2220,11 @@ function buildPeerMcpConfig(serverUrl, opts) {
1914
2220
  * ExitPlanMode to default-on (env-disable-able).
1915
2221
  */
1916
2222
  function buildCoordinatorAgent(opts) {
1917
- const peers = ["codex-critic"];
2223
+ const peers = ["codex-critic", "opus-critic"];
1918
2224
  if (opts.geminiAvailable) peers.push("gemini-critic");
1919
2225
  peers.push("codex-reviewer");
1920
2226
  return {
1921
- description: "Coordinates cross-lab adversarial review. **Use proactively before ExitPlanMode for non-trivial plans and after non-trivial commits** (>50 lines OR touching streaming/auth/concurrency/persistence/security). Routes to codex-critic / codex-reviewer / gemini-critic in parallel based on artifact type and aggregates findings. Cheaper than calling each peer manually for the common case where you want a multi-lab triangulation. The subagent has no access to your scrollback or CLAUDE.md — pass the artifact verbatim.",
2227
+ description: "Coordinates cross-lab adversarial review across codex-critic, opus-critic, gemini-critic, codex-reviewer. Use proactively before non-trivial plans and after non-trivial commits. Always pass artifacts verbatim peers are fresh-context.",
1922
2228
  prompt: [
1923
2229
  "# Subagent: peer-review-coordinator",
1924
2230
  "",
@@ -1934,10 +2240,11 @@ function buildCoordinatorAgent(opts) {
1934
2240
  "- **Concrete diff or single file** → fan out to `codex-reviewer`" + (opts.geminiAvailable ? " AND `gemini-critic` (gemini for cross-lab triangulation)" : "") + ". For very small changes (<20 lines), one `codex-reviewer` call is enough.",
1935
2241
  "- **Tie-breaker after codex-critic has weighed in** → call `gemini-critic`" + (opts.geminiAvailable ? "" : " (NOT REGISTERED in this session — gemini-3.x not in catalog; tie-break unavailable)") + " with the artifact AND codex-critic's verdict for cross-lab cross-check.",
1936
2242
  "- **Long-context artifact (>100 KB)** → prefer `gemini-critic`" + (opts.geminiAvailable ? "" : " (NOT REGISTERED in this session)") + ". Otherwise, decompose into 2-4 batches and fan out across `codex-critic` calls in parallel.",
2243
+ "- **Fast same-lab sanity check on a moderate artifact (<5 KB)** → prefer `opus-critic` (cheapest, ~22s, only `effort: low|medium` supported). Same lab as the lead — limited blind-spot diversification, but a useful gut-check before committing to a controversial decision. For cross-lab diversification or deep dives on larger artifacts, use codex/gemini at higher effort with decomposition for >5KB.",
1937
2244
  "",
1938
2245
  "## Decomposition for large artifacts",
1939
2246
  "",
1940
- "Each per-call MCP wait is bounded (~150s on Claude Code v2.1.138 per regression #50289). For artifacts >20 KB, split into 2-4 logical batches BY CONCERN (not by raw size — semantic batches give better per-batch reviews) and call peers in parallel. The proxy's MCP cap allows up to 8 in-flight calls. Aggregate findings yourself before reporting back.",
2247
+ "Each per-call MCP wait is bounded (~60s SDK default on Claude Code v2.1.113+ per regressions #50289 / #52137 — empirically reproduced 2026-05-14). The proxy enforces per-persona effort allowlists AND a pre-flight `predictedTooLong` cap (codex_critic@high >8 KB, codex_reviewer@high >12 KB, opus_critic@medium >6 KB) to surface would-be-timeouts as fast actionable errors. For artifacts that exceed the cap, split into 2-4 logical batches BY CONCERN (not by raw size — semantic batches give better per-batch reviews) and call peers in parallel. The proxy's MCP cap allows up to 8 in-flight calls. Aggregate findings yourself before reporting back.",
1941
2248
  "",
1942
2249
  "## Aggregation contract",
1943
2250
  "",
@@ -2344,7 +2651,7 @@ function initProxyFromEnv() {
2344
2651
  //#endregion
2345
2652
  //#region package.json
2346
2653
  var name = "github-router";
2347
- var version = "0.3.22";
2654
+ var version = "0.3.23";
2348
2655
 
2349
2656
  //#endregion
2350
2657
  //#region src/lib/approval.ts
@@ -2903,177 +3210,6 @@ const createChatCompletions = async (payload, modelHeaders, callerSignal) => {
2903
3210
  return await response.json();
2904
3211
  };
2905
3212
 
2906
- //#endregion
2907
- //#region src/services/copilot/web-search.ts
2908
- const RpcSchema = z.object({
2909
- jsonrpc: z.literal("2.0"),
2910
- id: z.number().optional(),
2911
- result: z.object({
2912
- content: z.array(z.object({
2913
- type: z.literal("text"),
2914
- text: z.string()
2915
- })).optional(),
2916
- isError: z.boolean().optional()
2917
- }).optional(),
2918
- error: z.object({
2919
- code: z.number(),
2920
- message: z.string()
2921
- }).optional()
2922
- });
2923
- const InnerSchema = z.object({
2924
- text: z.object({
2925
- value: z.string(),
2926
- annotations: z.array(z.object({ url_citation: z.object({
2927
- title: z.string(),
2928
- url: z.string()
2929
- }).optional() })).nullable().optional()
2930
- }),
2931
- bing_searches: z.array(z.unknown()).nullable().optional()
2932
- });
2933
- const MAX_SEARCHES_PER_SECOND = 3;
2934
- let searchTimestamps = [];
2935
- let throttleChain = Promise.resolve();
2936
- async function throttleSearch() {
2937
- const myTurn = throttleChain.then(async () => {
2938
- const now = Date.now();
2939
- searchTimestamps = searchTimestamps.filter((t) => now - t < 1e3);
2940
- if (searchTimestamps.length >= MAX_SEARCHES_PER_SECOND) {
2941
- const waitMs = 1e3 - (now - searchTimestamps[0]);
2942
- if (waitMs > 0) {
2943
- consola.debug(`Web search rate limited, waiting ${waitMs}ms`);
2944
- await sleep(waitMs);
2945
- }
2946
- }
2947
- searchTimestamps.push(Date.now());
2948
- });
2949
- throttleChain = myTurn.catch(() => {});
2950
- return myTurn;
2951
- }
2952
- function mcpHeaders(sid) {
2953
- if (!state.githubToken) throw new Error("GitHub token missing — re-run auth flow. Web search uses the GitHub PAT (not the Copilot token); the on-disk token at ~/.local/share/github-router/github_token must be present.");
2954
- const headers = {
2955
- Authorization: `Bearer ${state.githubToken}`,
2956
- "content-type": "application/json",
2957
- accept: "application/json, text/event-stream",
2958
- "X-MCP-Host": "copilot-cli",
2959
- "X-MCP-Toolsets": "web_search",
2960
- "Mcp-Protocol-Version": "2025-06-18",
2961
- "user-agent": `GitHubCopilotChat/${copilotVersion(state)}`
2962
- };
2963
- if (sid) headers["Mcp-Session-Id"] = sid;
2964
- return headers;
2965
- }
2966
- async function postMcp(body, sid, retry = true) {
2967
- const url = `${copilotBaseUrl(state)}/mcp`;
2968
- const res = await fetch(url, {
2969
- method: "POST",
2970
- headers: mcpHeaders(sid),
2971
- body: JSON.stringify(body)
2972
- });
2973
- if (!res.ok && retry && res.status >= 500) {
2974
- await sleep(500);
2975
- return postMcp(body, sid, false);
2976
- }
2977
- return res;
2978
- }
2979
- async function searchWeb(query) {
2980
- await throttleSearch();
2981
- consola.info(`Web search (MCP): "${query.slice(0, 80)}"`);
2982
- const callId = Math.floor(Math.random() * 1e9);
2983
- let sid;
2984
- try {
2985
- const initRes = await postMcp({
2986
- jsonrpc: "2.0",
2987
- id: 1,
2988
- method: "initialize",
2989
- params: {
2990
- protocolVersion: "2024-11-05",
2991
- capabilities: {},
2992
- clientInfo: {
2993
- name: "GitHubCopilotChat",
2994
- version: copilotVersion(state)
2995
- }
2996
- }
2997
- });
2998
- if (!initRes.ok) {
2999
- consola.error("MCP initialize failed", initRes.status);
3000
- throw new HTTPError("MCP initialize failed", initRes);
3001
- }
3002
- sid = initRes.headers.get("mcp-session-id") ?? void 0;
3003
- if (!sid) throw new HTTPError("MCP initialize: missing Mcp-Session-Id header", initRes);
3004
- const notifRes = await postMcp({
3005
- jsonrpc: "2.0",
3006
- method: "notifications/initialized"
3007
- }, sid);
3008
- if (!notifRes.ok && notifRes.status !== 202) {
3009
- consola.error("MCP notifications/initialized failed", notifRes.status);
3010
- throw new HTTPError("MCP notifications/initialized failed", notifRes);
3011
- }
3012
- const callRes = await postMcp({
3013
- jsonrpc: "2.0",
3014
- id: callId,
3015
- method: "tools/call",
3016
- params: {
3017
- name: "web_search",
3018
- arguments: { query }
3019
- }
3020
- }, sid);
3021
- if (!callRes.ok) {
3022
- consola.error("MCP tools/call failed", callRes.status);
3023
- throw new HTTPError("MCP tools/call failed", callRes);
3024
- }
3025
- let rpc;
3026
- for await (const ev of events(callRes)) {
3027
- if (!ev.data) continue;
3028
- let parsedJson;
3029
- try {
3030
- parsedJson = JSON.parse(ev.data);
3031
- } catch {
3032
- continue;
3033
- }
3034
- const parsed = RpcSchema.safeParse(parsedJson);
3035
- if (parsed.success && parsed.data.id === callId) {
3036
- rpc = parsed.data;
3037
- break;
3038
- }
3039
- }
3040
- if (!rpc) throw new HTTPError("MCP tools/call: no matching response id in SSE stream", callRes);
3041
- if (rpc.error) throw new HTTPError(`MCP error ${rpc.error.code}: ${rpc.error.message}`, callRes);
3042
- if (rpc.result?.isError) throw new HTTPError("MCP web_search tool error", callRes);
3043
- const text = rpc.result?.content?.[0]?.text;
3044
- if (!text) throw new HTTPError("MCP web_search: empty content", callRes);
3045
- let innerRaw;
3046
- try {
3047
- innerRaw = JSON.parse(text);
3048
- } catch (err) {
3049
- throw new HTTPError(`MCP web_search: inner content not JSON: ${err instanceof Error ? err.message : String(err)}`, callRes);
3050
- }
3051
- const innerParsed = InnerSchema.safeParse(innerRaw);
3052
- if (!innerParsed.success) throw new HTTPError(`MCP web_search: inner content shape changed (${innerParsed.error.issues.map((i) => `${i.path.join(".")}: ${i.message}`).join("; ")})`, callRes);
3053
- const inner = innerParsed.data;
3054
- const references = [];
3055
- for (const ann of inner.text.annotations ?? []) {
3056
- const cite = ann.url_citation;
3057
- if (cite && !cite.url.toLowerCase().includes("bing.com/search")) references.push({
3058
- title: cite.title,
3059
- url: cite.url
3060
- });
3061
- }
3062
- consola.debug(`Web search returned ${references.length} references`);
3063
- return {
3064
- content: inner.text.value,
3065
- references
3066
- };
3067
- } finally {
3068
- if (sid) try {
3069
- fetch(`${copilotBaseUrl(state)}/mcp`, {
3070
- method: "DELETE",
3071
- headers: mcpHeaders(sid)
3072
- }).catch(() => {});
3073
- } catch {}
3074
- }
3075
- }
3076
-
3077
3213
  //#endregion
3078
3214
  //#region src/routes/chat-completions/handler.ts
3079
3215
  const ENCODER$2 = new TextEncoder();
@@ -3299,6 +3435,125 @@ embeddingRoutes.post("/", async (c) => {
3299
3435
  }
3300
3436
  });
3301
3437
 
3438
+ //#endregion
3439
+ //#region src/services/copilot/create-messages.ts
3440
+ /**
3441
+ * Build headers that match what VS Code Copilot Chat sends to the Copilot API.
3442
+ *
3443
+ * copilotHeaders() provides: Authorization, content-type, copilot-integration-id,
3444
+ * editor-version, editor-plugin-version, user-agent, openai-intent,
3445
+ * x-github-api-version, x-request-id, x-vscode-user-agent-library-version.
3446
+ *
3447
+ * We add the remaining headers VS Code sends for /v1/messages:
3448
+ * - X-Initiator (VS Code sets dynamically; "agent" is safe for CLI use)
3449
+ * - anthropic-version (VS Code's Anthropic SDK sends this)
3450
+ * - X-Interaction-Id (VS Code sends a session-scoped UUID)
3451
+ *
3452
+ * We intentionally omit copilot-vision-request — VS Code only sends it when
3453
+ * images are present, and the native /v1/messages endpoint handles vision
3454
+ * without requiring the header.
3455
+ *
3456
+ * extraHeaders allows callers to forward client-supplied beta headers
3457
+ * (anthropic-beta) so Copilot enables extended features.
3458
+ */
3459
+ function buildHeaders(extraHeaders) {
3460
+ return {
3461
+ ...copilotHeaders(state),
3462
+ accept: "application/json",
3463
+ "openai-intent": "messages-proxy",
3464
+ "x-interaction-type": "conversation-agent",
3465
+ "X-Initiator": "agent",
3466
+ "anthropic-version": "2023-06-01",
3467
+ "X-Interaction-Id": randomUUID(),
3468
+ ...extraHeaders
3469
+ };
3470
+ }
3471
+ /**
3472
+ * Forward an Anthropic Messages API request to Copilot's native /v1/messages endpoint.
3473
+ * Returns the raw Response so callers can handle streaming vs non-streaming.
3474
+ *
3475
+ * `callerSignal` (optional) is composed with the standard
3476
+ * UPSTREAM_FETCH_TIMEOUT_MS via AbortSignal.any so callers (e.g. the
3477
+ * peer-MCP `opus-critic` persona) can cancel the upstream call when
3478
+ * Claude Code's MCP per-tool-call ceiling fires. Mirrors the pattern
3479
+ * in createResponses / createChatCompletions.
3480
+ */
3481
+ async function createMessages(body, extraHeaders, callerSignal) {
3482
+ if (!state.copilotToken) throw new Error("Copilot token not found");
3483
+ const url = `${copilotBaseUrl(state)}/v1/messages?beta=true`;
3484
+ consola.debug(`Forwarding to ${url}`);
3485
+ const doFetch = () => {
3486
+ const fetchInit = {
3487
+ method: "POST",
3488
+ headers: buildHeaders(extraHeaders),
3489
+ body
3490
+ };
3491
+ const signals = [];
3492
+ if (UPSTREAM_FETCH_TIMEOUT_MS > 0) signals.push(AbortSignal.timeout(UPSTREAM_FETCH_TIMEOUT_MS));
3493
+ if (callerSignal) signals.push(callerSignal);
3494
+ if (signals.length === 1) fetchInit.signal = signals[0];
3495
+ else if (signals.length > 1) fetchInit.signal = AbortSignal.any(signals);
3496
+ return fetch(url, fetchInit);
3497
+ };
3498
+ const response = await tryRefreshAndRetry(doFetch, "/v1/messages");
3499
+ if (!response.ok) {
3500
+ let errorBody = "";
3501
+ try {
3502
+ errorBody = await response.text();
3503
+ } catch {
3504
+ errorBody = "(could not read error body)";
3505
+ }
3506
+ consola.error(`Copilot /v1/messages error: ${response.status} ${errorBody}`);
3507
+ throw new HTTPError("Copilot messages request failed", new Response(errorBody, {
3508
+ status: response.status,
3509
+ statusText: response.statusText,
3510
+ headers: response.headers
3511
+ }));
3512
+ }
3513
+ return response;
3514
+ }
3515
+ /**
3516
+ * Forward an Anthropic count_tokens request to Copilot's native endpoint.
3517
+ * Returns the raw Response.
3518
+ *
3519
+ * `callerSignal` is composed with UPSTREAM_FETCH_TIMEOUT_MS — same pattern
3520
+ * as createMessages.
3521
+ */
3522
+ async function countTokens(body, extraHeaders, callerSignal) {
3523
+ if (!state.copilotToken) throw new Error("Copilot token not found");
3524
+ const url = `${copilotBaseUrl(state)}/v1/messages/count_tokens?beta=true`;
3525
+ consola.debug(`Forwarding to ${url}`);
3526
+ const doFetch = () => {
3527
+ const fetchInit = {
3528
+ method: "POST",
3529
+ headers: buildHeaders(extraHeaders),
3530
+ body
3531
+ };
3532
+ const signals = [];
3533
+ if (UPSTREAM_FETCH_TIMEOUT_MS > 0) signals.push(AbortSignal.timeout(UPSTREAM_FETCH_TIMEOUT_MS));
3534
+ if (callerSignal) signals.push(callerSignal);
3535
+ if (signals.length === 1) fetchInit.signal = signals[0];
3536
+ else if (signals.length > 1) fetchInit.signal = AbortSignal.any(signals);
3537
+ return fetch(url, fetchInit);
3538
+ };
3539
+ const response = await tryRefreshAndRetry(doFetch, "/v1/messages/count_tokens");
3540
+ if (!response.ok) {
3541
+ let errorBody = "";
3542
+ try {
3543
+ errorBody = await response.text();
3544
+ } catch {
3545
+ errorBody = "(could not read error body)";
3546
+ }
3547
+ consola.error(`Copilot count_tokens error: ${response.status} ${errorBody}`);
3548
+ throw new HTTPError("Copilot count_tokens request failed", new Response(errorBody, {
3549
+ status: response.status,
3550
+ statusText: response.statusText,
3551
+ headers: response.headers
3552
+ }));
3553
+ }
3554
+ return response;
3555
+ }
3556
+
3302
3557
  //#endregion
3303
3558
  //#region src/services/copilot/create-responses.ts
3304
3559
  const createResponses = async (payload, modelHeaders, callerSignal) => {
@@ -3360,27 +3615,6 @@ function detectAgentCall(input) {
3360
3615
  const MCP_PROTOCOL_VERSION = "2025-06-18";
3361
3616
  const SERVER_NAME = "github-router-peers";
3362
3617
  const SERVER_VERSION = "1";
3363
- /**
3364
- * Reasoning effort levels accepted by Copilot's /v1/responses (gpt-5.x) and
3365
- * /v1/chat/completions endpoints. Per the proxy's existing thinking-mode
3366
- * translator (CLAUDE.md "Thinking-mode translation"), Copilot's adaptive-
3367
- * thinking path uses these same buckets:
3368
- * <2k tokens → low, <8k → medium, <24k → high, else → xhigh.
3369
- *
3370
- * Default `high` for peer reviews — adversarial-by-design but still cost-
3371
- * conscious. Callers can pass `xhigh` explicitly for deep dives, or `medium`
3372
- * for quick sanity checks.
3373
- */
3374
- const EFFORT_LEVELS = [
3375
- "low",
3376
- "medium",
3377
- "high",
3378
- "xhigh"
3379
- ];
3380
- const DEFAULT_EFFORT = "high";
3381
- function isEffort(v) {
3382
- return typeof v === "string" && EFFORT_LEVELS.includes(v);
3383
- }
3384
3618
  /** Bounded concurrency. Originally capped at 2 (commit 4317a25) as a defensive
3385
3619
  * pre-launch guess against Opus's natural pattern of fanning out to all three
3386
3620
  * critics at once. Raised to 8 (Phase 2D of the peer-MCP plan) so the
@@ -3485,10 +3719,10 @@ function geminiAvailable() {
3485
3719
  return models.some((m) => /^gemini-3\..*pro/i.test(m.id));
3486
3720
  }
3487
3721
  function activePersonas() {
3488
- return PERSONAS_READ.filter((p) => !p.requiresHttp || geminiAvailable());
3722
+ return PERSONAS_READ.filter((p) => !p.requiresGeminiCatalog || geminiAvailable());
3489
3723
  }
3490
3724
  function toolEntries() {
3491
- return activePersonas().map((p) => ({
3725
+ const personaEntries = activePersonas().map((p) => ({
3492
3726
  name: p.toolNameHttp,
3493
3727
  description: p.description,
3494
3728
  inputSchema: {
@@ -3506,12 +3740,18 @@ function toolEntries() {
3506
3740
  },
3507
3741
  effort: {
3508
3742
  type: "string",
3509
- enum: [...EFFORT_LEVELS],
3510
- description: `Reasoning depth (low | medium | high | xhigh). Default "${DEFAULT_EFFORT}". Use 'xhigh' for explicit deep dives where you want maximum reasoning. Use 'medium' for quick sanity checks. Note: for non-OpenAI models routed via /v1/chat/completions (gemini-3.x), the upstream may silently ignore this knob.`
3743
+ enum: [...p.allowedEfforts],
3744
+ description: `Reasoning depth (${p.allowedEfforts.join(" | ")}). Default "${p.defaultEffort}". Higher tiers cost more wall-clock; lower tiers are quicker sanity checks. ` + (p.endpoint === "/v1/chat/completions" ? "Note: for gemini routed via /v1/chat/completions, the upstream may silently ignore this knob." : "")
3511
3745
  }
3512
3746
  }
3513
3747
  }
3514
3748
  }));
3749
+ const nonPersonaEntries = NON_PERSONA_MCP_TOOLS.map((t) => ({
3750
+ name: t.toolNameHttp,
3751
+ description: t.description,
3752
+ inputSchema: t.inputSchema
3753
+ }));
3754
+ return [...personaEntries, ...nonPersonaEntries];
3515
3755
  }
3516
3756
  function buildUserText(prompt, context) {
3517
3757
  if (!context) return prompt;
@@ -3539,6 +3779,11 @@ function extractChatCompletionText(response) {
3539
3779
  const c = choice.message?.content;
3540
3780
  return typeof c === "string" ? c : "";
3541
3781
  }
3782
+ function extractMessagesText(response) {
3783
+ const out = [];
3784
+ for (const block of response.content ?? []) if (block.type === "text" && typeof block.text === "string") out.push(block.text);
3785
+ return out.join("");
3786
+ }
3542
3787
  function toolError(message) {
3543
3788
  return {
3544
3789
  content: [{
@@ -3548,6 +3793,94 @@ function toolError(message) {
3548
3793
  isError: true
3549
3794
  };
3550
3795
  }
3796
+ /**
3797
+ * Empirical pre-flight cap to convert "would-bust-the-60s-MCP-ceiling"
3798
+ * calls into fast actionable errors instead of slot-leaking timeouts.
3799
+ *
3800
+ * Probed live against Copilot 2026-05-14:
3801
+ * gpt-5.5 high on a ~600B prompt = 23.8s → ~76s on 8KB (rough linear)
3802
+ * gpt-5.3-codex high on ~600B = 16.0s → ~64s on 12KB
3803
+ * claude-opus-4-7 medium (thinking=3000) on a trivial prompt = 22.5s
3804
+ * but model self-paces budget → ~50s+ on a real ~6KB review
3805
+ *
3806
+ * Returns `{tooLong: true, capBytes}` when the (persona, effort, briefBytes)
3807
+ * tuple is empirically predicted to bust the 60s ceiling.
3808
+ *
3809
+ * SCOPE: the cap is JSON-PATH ONLY. Callers (handleMcpPost) MUST gate
3810
+ * the call site by `!acceptsEventStream(...)`. The SSE path
3811
+ * (handleToolsCallSSE) keeps the connection open past the 60s ceiling
3812
+ * via heartbeats — size-based pre-flight rejection there would just
3813
+ * lock SSE clients out of their primary advantage. JSON-path clients
3814
+ * (raw curl with `Accept: application/json`, older MCP clients without
3815
+ * SSE awareness) DO still hit the underlying tools/call timer, so the
3816
+ * cap is the only way to surface a fast actionable error there
3817
+ * instead of a slot-leaking timeout.
3818
+ *
3819
+ * INVARIANT: pre-flight MUST fire BEFORE inFlightToolsCall++ — the
3820
+ * slot must not be acquired for a rejected pre-flight. handleMcpPost
3821
+ * runs the check before delegating to handleRpc → handleToolsCall (the
3822
+ * function that increments the counter). Documented in CLAUDE.md.
3823
+ *
3824
+ * gemini_critic has no cap (long-context model + Copilot may auto-pace).
3825
+ */
3826
+ const PRE_FLIGHT_CAPS = [
3827
+ {
3828
+ toolName: "codex_critic",
3829
+ effort: "high",
3830
+ maxBriefBytes: 8 * 1024
3831
+ },
3832
+ {
3833
+ toolName: "codex_reviewer",
3834
+ effort: "high",
3835
+ maxBriefBytes: 12 * 1024
3836
+ },
3837
+ {
3838
+ toolName: "opus_critic",
3839
+ effort: "medium",
3840
+ maxBriefBytes: 6 * 1024
3841
+ }
3842
+ ];
3843
+ function predictedTooLong(persona, effort, briefBytes) {
3844
+ for (const cap of PRE_FLIGHT_CAPS) if (cap.toolName === persona.toolNameHttp && cap.effort === effort && briefBytes > cap.maxBriefBytes) return {
3845
+ tooLong: true,
3846
+ capBytes: cap.maxBriefBytes
3847
+ };
3848
+ return { tooLong: false };
3849
+ }
3850
+ /**
3851
+ * JSON-path pre-flight predictedTooLong gate. Returns a JSON-RPC result
3852
+ * body wrapping a tool-error envelope when the call would bust the 60s
3853
+ * tools/call ceiling on the JSON path; returns undefined when the call
3854
+ * should proceed normally.
3855
+ *
3856
+ * Skips the check (returns undefined) for any shape problem so
3857
+ * handleRpc can return the canonical JSON-RPC error code instead:
3858
+ * - notification (no id) → handleRpc returns 202 + empty body
3859
+ * - missing/unknown name → handleRpc returns -32601
3860
+ * - missing prompt → handleRpc returns -32602
3861
+ * - invalid effort string → handleRpc returns -32602
3862
+ * - effort not in persona.allowedEfforts → handleRpc returns -32602
3863
+ */
3864
+ function jsonPathPreflightCap(body) {
3865
+ if (body.id === void 0) return void 0;
3866
+ const params = body.params ?? {};
3867
+ const name$1 = typeof params.name === "string" ? params.name : "";
3868
+ const args = params.arguments ?? {};
3869
+ const prompt = typeof args.prompt === "string" ? args.prompt : "";
3870
+ const context = typeof args.context === "string" ? args.context : void 0;
3871
+ const rawEffort = args.effort;
3872
+ if (!name$1 || !prompt) return void 0;
3873
+ const persona = activePersonas().find((p) => p.toolNameHttp === name$1);
3874
+ if (!persona) return void 0;
3875
+ if (rawEffort !== void 0 && !isEffort(rawEffort)) return void 0;
3876
+ const effortMaybe = rawEffort;
3877
+ if (effortMaybe !== void 0 && !persona.allowedEfforts.includes(effortMaybe)) return;
3878
+ const effort = effortMaybe ?? persona.defaultEffort;
3879
+ const briefBytes = Buffer.byteLength(buildUserText(prompt, context), "utf8");
3880
+ const verdict = predictedTooLong(persona, effort, briefBytes);
3881
+ if (!verdict.tooLong) return void 0;
3882
+ return rpcResult(body.id, toolError(`pre-flight rejected: ${persona.toolNameHttp} at effort=${effort} on a ${briefBytes}-byte brief is empirically predicted to exceed the JSON tools/call timeout (cap=${verdict.capBytes} bytes for this tier). Either drop to a lower effort tier, split the brief into 2-4 parallel sub-calls per the decomposition guidance, or send Accept: text/event-stream to use the SSE path which bypasses this cap.`));
3883
+ }
3551
3884
  async function callPersona(persona, prompt, context, effort, signal) {
3552
3885
  const resolvedModel = resolveModel(persona.model);
3553
3886
  const userText = buildUserText(prompt, context);
@@ -3571,6 +3904,25 @@ async function callPersona(persona, prompt, context, effort, signal) {
3571
3904
  text: text$1
3572
3905
  }] };
3573
3906
  }
3907
+ if (persona.endpoint === "/v1/messages") {
3908
+ const maxTokens = effort === "low" ? 4096 : effort === "medium" ? 8192 : effort === "high" ? 16384 : 32768;
3909
+ const text$1 = extractMessagesText(await (await createMessages(JSON.stringify({
3910
+ model: resolvedModel,
3911
+ max_tokens: maxTokens,
3912
+ system: persona.baseInstructions,
3913
+ thinking: { type: "adaptive" },
3914
+ output_config: { effort },
3915
+ messages: [{
3916
+ role: "user",
3917
+ content: userText
3918
+ }]
3919
+ }), void 0, signal)).json());
3920
+ if (!text$1) return toolError(`persona ${persona.agentName}: empty assistant output`);
3921
+ return { content: [{
3922
+ type: "text",
3923
+ text: text$1
3924
+ }] };
3925
+ }
3574
3926
  const text = extractChatCompletionText(await createChatCompletions({
3575
3927
  model: resolvedModel,
3576
3928
  messages: [{
@@ -3604,17 +3956,23 @@ async function handleToolsCall(body) {
3604
3956
  const params = body.params ?? {};
3605
3957
  const name$1 = typeof params.name === "string" ? params.name : "";
3606
3958
  const args = params.arguments ?? {};
3607
- const prompt = typeof args.prompt === "string" ? args.prompt : "";
3608
- const context = typeof args.context === "string" ? args.context : void 0;
3609
- let effort = DEFAULT_EFFORT;
3610
- if (args.effort !== void 0) {
3611
- if (!isEffort(args.effort)) return rpcError(body.id, RPC_INVALID_PARAMS, `tools/call: arguments.effort must be one of ${EFFORT_LEVELS.join("|")}; got ${JSON.stringify(args.effort)}`);
3612
- effort = args.effort;
3613
- }
3614
3959
  if (!name$1) return rpcError(body.id, RPC_INVALID_PARAMS, "tools/call missing name");
3615
3960
  const persona = activePersonas().find((p) => p.toolNameHttp === name$1);
3616
- if (!persona) return rpcError(body.id, RPC_METHOD_NOT_FOUND, `tools/call: unknown tool "${name$1}"`);
3617
- if (!prompt) return rpcError(body.id, RPC_INVALID_PARAMS, `tools/call: arguments.prompt is required`);
3961
+ const nonPersonaTool = persona ? void 0 : NON_PERSONA_MCP_TOOLS.find((t) => t.toolNameHttp === name$1);
3962
+ if (!persona && !nonPersonaTool) return rpcError(body.id, RPC_METHOD_NOT_FOUND, `tools/call: unknown tool "${name$1}"`);
3963
+ let personaPrompt;
3964
+ let personaContext;
3965
+ let personaEffort;
3966
+ if (persona) {
3967
+ if (args.effort !== void 0 && !isEffort(args.effort)) return rpcError(body.id, RPC_INVALID_PARAMS, `tools/call: arguments.effort must be one of ${EFFORT_LEVELS.join("|")}; got ${JSON.stringify(args.effort)}`);
3968
+ const requestedEffort = args.effort;
3969
+ const prompt = typeof args.prompt === "string" ? args.prompt : "";
3970
+ if (!prompt) return rpcError(body.id, RPC_INVALID_PARAMS, `tools/call: arguments.prompt is required`);
3971
+ personaPrompt = prompt;
3972
+ personaContext = typeof args.context === "string" ? args.context : void 0;
3973
+ if (requestedEffort !== void 0 && !persona.allowedEfforts.includes(requestedEffort)) return rpcError(body.id, RPC_INVALID_PARAMS, `tools/call: persona "${persona.toolNameHttp}" does not accept effort="${requestedEffort}". Allowed: ${persona.allowedEfforts.join("|")}.`);
3974
+ personaEffort = requestedEffort ?? persona.defaultEffort;
3975
+ }
3618
3976
  if (inFlightToolsCall >= MAX_INFLIGHT_TOOLS_CALL) return rpcResult(body.id, {
3619
3977
  content: [{
3620
3978
  type: "text",
@@ -3630,11 +3988,13 @@ async function handleToolsCall(body) {
3630
3988
  aborter = new AbortController();
3631
3989
  inflightAborts.set(abortKey, aborter);
3632
3990
  }
3991
+ const telemetryName = persona ? persona.agentName : nonPersonaTool.toolNameHttp;
3992
+ const telemetryModel = persona ? persona.model : "(non-persona)";
3633
3993
  try {
3634
- const result = await callPersona(persona, prompt, context, effort, aborter?.signal);
3994
+ const result = persona ? await callPersona(persona, personaPrompt, personaContext, personaEffort, aborter?.signal) : await nonPersonaTool.handler(args, aborter?.signal);
3635
3995
  logTelemetry({
3636
- name: persona.agentName,
3637
- model: persona.model,
3996
+ name: telemetryName,
3997
+ model: telemetryModel,
3638
3998
  durationMs: Date.now() - startedAt,
3639
3999
  result: result.isError ? "isError" : "ok"
3640
4000
  });
@@ -3642,8 +4002,8 @@ async function handleToolsCall(body) {
3642
4002
  } catch (err) {
3643
4003
  const message = err instanceof Error ? err.message : String(err);
3644
4004
  logTelemetry({
3645
- name: persona.agentName,
3646
- model: persona.model,
4005
+ name: telemetryName,
4006
+ model: telemetryModel,
3647
4007
  durationMs: Date.now() - startedAt,
3648
4008
  result: "exception",
3649
4009
  errorMessage: message
@@ -3651,7 +4011,7 @@ async function handleToolsCall(body) {
3651
4011
  return rpcResult(body.id, {
3652
4012
  content: [{
3653
4013
  type: "text",
3654
- text: `persona ${persona.agentName} failed: ${message}`
4014
+ text: persona ? `persona ${persona.agentName} failed: ${message}` : `tool ${nonPersonaTool.toolNameHttp} failed: ${message}`
3655
4015
  }],
3656
4016
  isError: true
3657
4017
  });
@@ -3814,6 +4174,11 @@ async function handleMcpPost(c) {
3814
4174
  consola.debug("/mcp parse error:", err);
3815
4175
  return c.json(rpcError(null, RPC_PARSE_ERROR, "request body is not valid JSON"), 200);
3816
4176
  }
4177
+ if (typeof body === "object" && body !== null && !Array.isArray(body) && body.method === "tools/call" && acceptsEventStream(c.req.header("accept"))) return handleToolsCallSSE(body);
4178
+ if (typeof body === "object" && body !== null && !Array.isArray(body) && body.method === "tools/call") {
4179
+ const preflight = jsonPathPreflightCap(body);
4180
+ if (preflight) return c.json(preflight, 200);
4181
+ }
3817
4182
  try {
3818
4183
  const { status, body: respBody } = await handleRpc(c, body);
3819
4184
  if (respBody === null) return c.body(null, status);
@@ -3824,6 +4189,111 @@ async function handleMcpPost(c) {
3824
4189
  return c.json(rpcError(echoId, RPC_INTERNAL_ERROR, err instanceof Error ? err.message : String(err)), 200);
3825
4190
  }
3826
4191
  }
4192
+ /**
4193
+ * Accept-header parsing for MCP Streamable HTTP. Per MCP 2025-06-18
4194
+ * spec, clients send `Accept: application/json, text/event-stream` to
4195
+ * indicate they can consume either response shape. Server picks; for
4196
+ * tools/call we pick SSE because Claude Code's per-tool-call timer
4197
+ * (~60s on v2.1.113+) does not fire on streamed responses.
4198
+ *
4199
+ * Lenient parse: split on commas, strip params (q-values, charset),
4200
+ * trim, lowercase, look for the SSE token. Returns false on undefined
4201
+ * / empty / strict-JSON-only Accept.
4202
+ */
4203
+ function acceptsEventStream(accept) {
4204
+ if (!accept) return false;
4205
+ return accept.toLowerCase().split(",").map((t) => t.split(";")[0].trim()).includes("text/event-stream");
4206
+ }
4207
+ /**
4208
+ * SSE-streamed response for a single tools/call. Delegates the actual
4209
+ * upstream call to `handleToolsCall` (so the per-persona effort gate,
4210
+ * predictedTooLong cap, AbortController registration, telemetry, and
4211
+ * inFlight slot accounting all run identically); wraps the awaited
4212
+ * result in an SSE envelope with periodic heartbeats while the upstream
4213
+ * fetch is in flight.
4214
+ *
4215
+ * SSE event format (per MCP Streamable HTTP):
4216
+ * event: message
4217
+ * data: <json-rpc-2.0 message>\n\n
4218
+ *
4219
+ * - Heartbeats are JSON-RPC `notifications/progress` notifications with
4220
+ * the request id as `progressToken` (per MCP progress-notification spec).
4221
+ * - The final message is the JSON-RPC response envelope returned by
4222
+ * handleToolsCall — same structure as the JSON-path response.
4223
+ * - On consumer cancel (ReadableStream.cancel), the heartbeat interval
4224
+ * is cleared and the inFlight slot's AbortController is signalled
4225
+ * (handleToolsCall observes the abort and returns an error envelope
4226
+ * that we drop unwritten — controller is already closed).
4227
+ *
4228
+ * Per CLAUDE.md "Stream lifecycle" / "The smoking gun" rules: every
4229
+ * controller.enqueue/close is wrapped in a try/catch that swallows the
4230
+ * "Invalid state: Controller is already closed" race without warning.
4231
+ */
4232
+ const SSE_HEARTBEAT_INTERVAL_MS = 5e3;
4233
+ async function handleToolsCallSSE(body) {
4234
+ const encoder = new TextEncoder();
4235
+ const callPromise = handleToolsCall(body);
4236
+ const stream = new ReadableStream({
4237
+ async start(controller) {
4238
+ let closed = false;
4239
+ const safeEnqueue = (chunk) => {
4240
+ if (closed) return;
4241
+ try {
4242
+ controller.enqueue(chunk);
4243
+ } catch (err) {
4244
+ consola.debug("/mcp SSE enqueue after close (expected race):", err);
4245
+ closed = true;
4246
+ }
4247
+ };
4248
+ const safeClose = () => {
4249
+ if (closed) return;
4250
+ closed = true;
4251
+ try {
4252
+ controller.close();
4253
+ } catch (err) {
4254
+ consola.debug("/mcp SSE close after close:", err);
4255
+ }
4256
+ };
4257
+ const sseFrame = (rpcMessage) => encoder.encode(`event: message\ndata: ${JSON.stringify(rpcMessage)}\n\n`);
4258
+ const heartbeatFrame = () => sseFrame({
4259
+ jsonrpc: "2.0",
4260
+ method: "notifications/progress",
4261
+ params: {
4262
+ progressToken: body.id ?? null,
4263
+ progress: 0,
4264
+ message: "in flight"
4265
+ }
4266
+ });
4267
+ safeEnqueue(heartbeatFrame());
4268
+ const heartbeatHandle = setInterval(() => safeEnqueue(heartbeatFrame()), SSE_HEARTBEAT_INTERVAL_MS);
4269
+ try {
4270
+ safeEnqueue(sseFrame(await callPromise));
4271
+ } catch (err) {
4272
+ consola.error("/mcp SSE upstream error:", err);
4273
+ safeEnqueue(sseFrame(rpcError(body.id ?? null, RPC_INTERNAL_ERROR, err instanceof Error ? err.message : String(err))));
4274
+ } finally {
4275
+ clearInterval(heartbeatHandle);
4276
+ safeClose();
4277
+ }
4278
+ },
4279
+ cancel() {
4280
+ const abortKey = body.id !== void 0 && body.id !== null ? body.id : void 0;
4281
+ if (abortKey !== void 0) {
4282
+ const aborter = inflightAborts.get(abortKey);
4283
+ if (aborter) aborter.abort(/* @__PURE__ */ new Error("client disconnected SSE stream"));
4284
+ }
4285
+ }
4286
+ });
4287
+ return new Response(stream, {
4288
+ status: 200,
4289
+ headers: {
4290
+ "Content-Type": "text/event-stream",
4291
+ "Cache-Control": "no-cache, no-transform",
4292
+ "Connection": "keep-alive",
4293
+ "X-Accel-Buffering": "no"
4294
+ }
4295
+ });
4296
+ }
3827
4297
  function handleMcpDelete(c) {
3828
4298
  const auth$1 = checkAuth(c);
3829
4299
  if (!auth$1.ok) return c.json(rpcError(null, RPC_INVALID_REQUEST, auth$1.reason), auth$1.status);
@@ -3848,108 +4318,6 @@ mcpRoutes.delete("/", (c) => {
3848
4318
  }
3849
4319
  });
3850
4320
 
3851
- //#endregion
3852
- //#region src/services/copilot/create-messages.ts
3853
- /**
3854
- * Build headers that match what VS Code Copilot Chat sends to the Copilot API.
3855
- *
3856
- * copilotHeaders() provides: Authorization, content-type, copilot-integration-id,
3857
- * editor-version, editor-plugin-version, user-agent, openai-intent,
3858
- * x-github-api-version, x-request-id, x-vscode-user-agent-library-version.
3859
- *
3860
- * We add the remaining headers VS Code sends for /v1/messages:
3861
- * - X-Initiator (VS Code sets dynamically; "agent" is safe for CLI use)
3862
- * - anthropic-version (VS Code's Anthropic SDK sends this)
3863
- * - X-Interaction-Id (VS Code sends a session-scoped UUID)
3864
- *
3865
- * We intentionally omit copilot-vision-request — VS Code only sends it when
3866
- * images are present, and the native /v1/messages endpoint handles vision
3867
- * without requiring the header.
3868
- *
3869
- * extraHeaders allows callers to forward client-supplied beta headers
3870
- * (anthropic-beta) so Copilot enables extended features.
3871
- */
3872
- function buildHeaders(extraHeaders) {
3873
- return {
3874
- ...copilotHeaders(state),
3875
- accept: "application/json",
3876
- "openai-intent": "messages-proxy",
3877
- "x-interaction-type": "conversation-agent",
3878
- "X-Initiator": "agent",
3879
- "anthropic-version": "2023-06-01",
3880
- "X-Interaction-Id": randomUUID(),
3881
- ...extraHeaders
3882
- };
3883
- }
3884
- /**
3885
- * Forward an Anthropic Messages API request to Copilot's native /v1/messages endpoint.
3886
- * Returns the raw Response so callers can handle streaming vs non-streaming.
3887
- */
3888
- async function createMessages(body, extraHeaders) {
3889
- if (!state.copilotToken) throw new Error("Copilot token not found");
3890
- const url = `${copilotBaseUrl(state)}/v1/messages?beta=true`;
3891
- consola.debug(`Forwarding to ${url}`);
3892
- const doFetch = () => {
3893
- const fetchInit = {
3894
- method: "POST",
3895
- headers: buildHeaders(extraHeaders),
3896
- body
3897
- };
3898
- if (UPSTREAM_FETCH_TIMEOUT_MS > 0) fetchInit.signal = AbortSignal.timeout(UPSTREAM_FETCH_TIMEOUT_MS);
3899
- return fetch(url, fetchInit);
3900
- };
3901
- const response = await tryRefreshAndRetry(doFetch, "/v1/messages");
3902
- if (!response.ok) {
3903
- let errorBody = "";
3904
- try {
3905
- errorBody = await response.text();
3906
- } catch {
3907
- errorBody = "(could not read error body)";
3908
- }
3909
- consola.error(`Copilot /v1/messages error: ${response.status} ${errorBody}`);
3910
- throw new HTTPError("Copilot messages request failed", new Response(errorBody, {
3911
- status: response.status,
3912
- statusText: response.statusText,
3913
- headers: response.headers
3914
- }));
3915
- }
3916
- return response;
3917
- }
3918
- /**
3919
- * Forward an Anthropic count_tokens request to Copilot's native endpoint.
3920
- * Returns the raw Response.
3921
- */
3922
- async function countTokens(body, extraHeaders) {
3923
- if (!state.copilotToken) throw new Error("Copilot token not found");
3924
- const url = `${copilotBaseUrl(state)}/v1/messages/count_tokens?beta=true`;
3925
- consola.debug(`Forwarding to ${url}`);
3926
- const doFetch = () => {
3927
- const fetchInit = {
3928
- method: "POST",
3929
- headers: buildHeaders(extraHeaders),
3930
- body
3931
- };
3932
- if (UPSTREAM_FETCH_TIMEOUT_MS > 0) fetchInit.signal = AbortSignal.timeout(UPSTREAM_FETCH_TIMEOUT_MS);
3933
- return fetch(url, fetchInit);
3934
- };
3935
- const response = await tryRefreshAndRetry(doFetch, "/v1/messages/count_tokens");
3936
- if (!response.ok) {
3937
- let errorBody = "";
3938
- try {
3939
- errorBody = await response.text();
3940
- } catch {
3941
- errorBody = "(could not read error body)";
3942
- }
3943
- consola.error(`Copilot count_tokens error: ${response.status} ${errorBody}`);
3944
- throw new HTTPError("Copilot count_tokens request failed", new Response(errorBody, {
3945
- status: response.status,
3946
- statusText: response.statusText,
3947
- headers: response.headers
3948
- }));
3949
- }
3950
- return response;
3951
- }
3952
-
3953
4321
  //#endregion
3954
4322
  //#region src/services/advisor/advisor.ts
3955
4323
  const ENCODER$1 = new TextEncoder();
@@ -5003,7 +5371,7 @@ async function handleCompletion(c) {
5003
5371
  type: "error",
5004
5372
  error: {
5005
5373
  type: "invalid_request_error",
5006
- message: "Inline `mcp_servers` body field is not supported by github-router (Copilot returns 400 'Extra inputs are not permitted'; the proxy would need a multi-turn tool-loop translation that has unresolved design holes — see Phase G in the plan). Configure your remote MCP servers as local stdio entries in `~/.claude/mcp.json` instead — Claude Code will spawn them locally and the proxy passes their tool calls through transparently. (https://docs.claude.com/en/docs/claude-code/mcp)"
5374
+ message: "Inline `mcp_servers` body field is not supported by github-router. Configure remote MCP servers as local stdio entries in `~/.claude/mcp.json` instead — Claude Code will spawn them locally and the proxy passes their tool calls through transparently. (https://docs.claude.com/en/docs/claude-code/mcp)"
5007
5375
  }
5008
5376
  }, 400);
5009
5377
  } catch {}
@@ -5971,6 +6339,7 @@ function getClaudeCodeEnvVars(serverUrl, model) {
5971
6339
  ANTHROPIC_BASE_URL: serverUrl,
5972
6340
  CLAUDE_CONFIG_DIR: PATHS.CLAUDE_CONFIG_DIR,
5973
6341
  MCP_TIMEOUT: "600000",
6342
+ MCP_TOOL_TIMEOUT: "600000",
5974
6343
  DISABLE_NON_ESSENTIAL_MODEL_CALLS: "1",
5975
6344
  CLAUDE_CODE_DISABLE_NONESSENTIAL_TRAFFIC: "1",
5976
6345
  DISABLE_TELEMETRY: "1"