github-router 0.3.22 → 0.3.23
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +34 -0
- package/dist/main.js +699 -330
- package/dist/main.js.map +1 -1
- package/package.json +1 -1
package/dist/main.js
CHANGED
|
@@ -8,6 +8,8 @@ import path from "node:path";
|
|
|
8
8
|
import process$1 from "node:process";
|
|
9
9
|
import { execFile, execFileSync, spawn } from "node:child_process";
|
|
10
10
|
import { promisify } from "node:util";
|
|
11
|
+
import { events } from "fetch-event-stream";
|
|
12
|
+
import { z } from "zod";
|
|
11
13
|
import fs$1 from "node:fs";
|
|
12
14
|
import { Writable } from "node:stream";
|
|
13
15
|
import { serve } from "srvx";
|
|
@@ -15,8 +17,6 @@ import { getProxyForUrl } from "proxy-from-env";
|
|
|
15
17
|
import { Agent, ProxyAgent, setGlobalDispatcher } from "undici";
|
|
16
18
|
import { Hono } from "hono";
|
|
17
19
|
import { cors } from "hono/cors";
|
|
18
|
-
import { events } from "fetch-event-stream";
|
|
19
|
-
import { z } from "zod";
|
|
20
20
|
import clipboard from "clipboardy";
|
|
21
21
|
|
|
22
22
|
//#region src/lib/paths.ts
|
|
@@ -1643,8 +1643,206 @@ function launchChild(target, server$1, options = {}) {
|
|
|
1643
1643
|
});
|
|
1644
1644
|
}
|
|
1645
1645
|
|
|
1646
|
+
//#endregion
|
|
1647
|
+
//#region src/services/copilot/web-search.ts
|
|
1648
|
+
const RpcSchema = z.object({
|
|
1649
|
+
jsonrpc: z.literal("2.0"),
|
|
1650
|
+
id: z.number().optional(),
|
|
1651
|
+
result: z.object({
|
|
1652
|
+
content: z.array(z.object({
|
|
1653
|
+
type: z.literal("text"),
|
|
1654
|
+
text: z.string()
|
|
1655
|
+
})).optional(),
|
|
1656
|
+
isError: z.boolean().optional()
|
|
1657
|
+
}).optional(),
|
|
1658
|
+
error: z.object({
|
|
1659
|
+
code: z.number(),
|
|
1660
|
+
message: z.string()
|
|
1661
|
+
}).optional()
|
|
1662
|
+
});
|
|
1663
|
+
const InnerSchema = z.object({
|
|
1664
|
+
text: z.object({
|
|
1665
|
+
value: z.string(),
|
|
1666
|
+
annotations: z.array(z.object({ url_citation: z.object({
|
|
1667
|
+
title: z.string(),
|
|
1668
|
+
url: z.string()
|
|
1669
|
+
}).optional() })).nullable().optional()
|
|
1670
|
+
}),
|
|
1671
|
+
bing_searches: z.array(z.unknown()).nullable().optional()
|
|
1672
|
+
});
|
|
1673
|
+
const MAX_SEARCHES_PER_SECOND = 3;
|
|
1674
|
+
let searchTimestamps = [];
|
|
1675
|
+
let throttleChain = Promise.resolve();
|
|
1676
|
+
async function throttleSearch() {
|
|
1677
|
+
const myTurn = throttleChain.then(async () => {
|
|
1678
|
+
const now = Date.now();
|
|
1679
|
+
searchTimestamps = searchTimestamps.filter((t) => now - t < 1e3);
|
|
1680
|
+
if (searchTimestamps.length >= MAX_SEARCHES_PER_SECOND) {
|
|
1681
|
+
const waitMs = 1e3 - (now - searchTimestamps[0]);
|
|
1682
|
+
if (waitMs > 0) {
|
|
1683
|
+
consola.debug(`Web search rate limited, waiting ${waitMs}ms`);
|
|
1684
|
+
await sleep(waitMs);
|
|
1685
|
+
}
|
|
1686
|
+
}
|
|
1687
|
+
searchTimestamps.push(Date.now());
|
|
1688
|
+
});
|
|
1689
|
+
throttleChain = myTurn.catch(() => {});
|
|
1690
|
+
return myTurn;
|
|
1691
|
+
}
|
|
1692
|
+
function mcpHeaders(sid) {
|
|
1693
|
+
if (!state.githubToken) throw new Error("GitHub token missing — re-run auth flow. Web search uses the GitHub PAT (not the Copilot token); the on-disk token at ~/.local/share/github-router/github_token must be present.");
|
|
1694
|
+
const headers = {
|
|
1695
|
+
Authorization: `Bearer ${state.githubToken}`,
|
|
1696
|
+
"content-type": "application/json",
|
|
1697
|
+
accept: "application/json, text/event-stream",
|
|
1698
|
+
"X-MCP-Host": "copilot-cli",
|
|
1699
|
+
"X-MCP-Toolsets": "web_search",
|
|
1700
|
+
"Mcp-Protocol-Version": "2025-06-18",
|
|
1701
|
+
"user-agent": `GitHubCopilotChat/${copilotVersion(state)}`
|
|
1702
|
+
};
|
|
1703
|
+
if (sid) headers["Mcp-Session-Id"] = sid;
|
|
1704
|
+
return headers;
|
|
1705
|
+
}
|
|
1706
|
+
async function postMcp(body, sid, retry = true) {
|
|
1707
|
+
const url = `${copilotBaseUrl(state)}/mcp`;
|
|
1708
|
+
const res = await fetch(url, {
|
|
1709
|
+
method: "POST",
|
|
1710
|
+
headers: mcpHeaders(sid),
|
|
1711
|
+
body: JSON.stringify(body)
|
|
1712
|
+
});
|
|
1713
|
+
if (!res.ok && retry && res.status >= 500) {
|
|
1714
|
+
await sleep(500);
|
|
1715
|
+
return postMcp(body, sid, false);
|
|
1716
|
+
}
|
|
1717
|
+
return res;
|
|
1718
|
+
}
|
|
1719
|
+
async function searchWeb(query) {
|
|
1720
|
+
await throttleSearch();
|
|
1721
|
+
consola.info(`Web search (MCP): "${query.slice(0, 80)}"`);
|
|
1722
|
+
const callId = Math.floor(Math.random() * 1e9);
|
|
1723
|
+
let sid;
|
|
1724
|
+
try {
|
|
1725
|
+
const initRes = await postMcp({
|
|
1726
|
+
jsonrpc: "2.0",
|
|
1727
|
+
id: 1,
|
|
1728
|
+
method: "initialize",
|
|
1729
|
+
params: {
|
|
1730
|
+
protocolVersion: "2024-11-05",
|
|
1731
|
+
capabilities: {},
|
|
1732
|
+
clientInfo: {
|
|
1733
|
+
name: "GitHubCopilotChat",
|
|
1734
|
+
version: copilotVersion(state)
|
|
1735
|
+
}
|
|
1736
|
+
}
|
|
1737
|
+
});
|
|
1738
|
+
if (!initRes.ok) {
|
|
1739
|
+
consola.error("MCP initialize failed", initRes.status);
|
|
1740
|
+
throw new HTTPError("MCP initialize failed", initRes);
|
|
1741
|
+
}
|
|
1742
|
+
sid = initRes.headers.get("mcp-session-id") ?? void 0;
|
|
1743
|
+
if (!sid) throw new HTTPError("MCP initialize: missing Mcp-Session-Id header", initRes);
|
|
1744
|
+
const notifRes = await postMcp({
|
|
1745
|
+
jsonrpc: "2.0",
|
|
1746
|
+
method: "notifications/initialized"
|
|
1747
|
+
}, sid);
|
|
1748
|
+
if (!notifRes.ok && notifRes.status !== 202) {
|
|
1749
|
+
consola.error("MCP notifications/initialized failed", notifRes.status);
|
|
1750
|
+
throw new HTTPError("MCP notifications/initialized failed", notifRes);
|
|
1751
|
+
}
|
|
1752
|
+
const callRes = await postMcp({
|
|
1753
|
+
jsonrpc: "2.0",
|
|
1754
|
+
id: callId,
|
|
1755
|
+
method: "tools/call",
|
|
1756
|
+
params: {
|
|
1757
|
+
name: "web_search",
|
|
1758
|
+
arguments: { query }
|
|
1759
|
+
}
|
|
1760
|
+
}, sid);
|
|
1761
|
+
if (!callRes.ok) {
|
|
1762
|
+
consola.error("MCP tools/call failed", callRes.status);
|
|
1763
|
+
throw new HTTPError("MCP tools/call failed", callRes);
|
|
1764
|
+
}
|
|
1765
|
+
let rpc;
|
|
1766
|
+
for await (const ev of events(callRes)) {
|
|
1767
|
+
if (!ev.data) continue;
|
|
1768
|
+
let parsedJson;
|
|
1769
|
+
try {
|
|
1770
|
+
parsedJson = JSON.parse(ev.data);
|
|
1771
|
+
} catch {
|
|
1772
|
+
continue;
|
|
1773
|
+
}
|
|
1774
|
+
const parsed = RpcSchema.safeParse(parsedJson);
|
|
1775
|
+
if (parsed.success && parsed.data.id === callId) {
|
|
1776
|
+
rpc = parsed.data;
|
|
1777
|
+
break;
|
|
1778
|
+
}
|
|
1779
|
+
}
|
|
1780
|
+
if (!rpc) throw new HTTPError("MCP tools/call: no matching response id in SSE stream", callRes);
|
|
1781
|
+
if (rpc.error) throw new HTTPError(`MCP error ${rpc.error.code}: ${rpc.error.message}`, callRes);
|
|
1782
|
+
if (rpc.result?.isError) throw new HTTPError("MCP web_search tool error", callRes);
|
|
1783
|
+
const text = rpc.result?.content?.[0]?.text;
|
|
1784
|
+
if (!text) throw new HTTPError("MCP web_search: empty content", callRes);
|
|
1785
|
+
let innerRaw;
|
|
1786
|
+
try {
|
|
1787
|
+
innerRaw = JSON.parse(text);
|
|
1788
|
+
} catch (err) {
|
|
1789
|
+
throw new HTTPError(`MCP web_search: inner content not JSON: ${err instanceof Error ? err.message : String(err)}`, callRes);
|
|
1790
|
+
}
|
|
1791
|
+
const innerParsed = InnerSchema.safeParse(innerRaw);
|
|
1792
|
+
if (!innerParsed.success) throw new HTTPError(`MCP web_search: inner content shape changed (${innerParsed.error.issues.map((i) => `${i.path.join(".")}: ${i.message}`).join("; ")})`, callRes);
|
|
1793
|
+
const inner = innerParsed.data;
|
|
1794
|
+
const references = [];
|
|
1795
|
+
for (const ann of inner.text.annotations ?? []) {
|
|
1796
|
+
const cite = ann.url_citation;
|
|
1797
|
+
if (cite && !cite.url.toLowerCase().includes("bing.com/search")) references.push({
|
|
1798
|
+
title: cite.title,
|
|
1799
|
+
url: cite.url
|
|
1800
|
+
});
|
|
1801
|
+
}
|
|
1802
|
+
consola.debug(`Web search returned ${references.length} references`);
|
|
1803
|
+
return {
|
|
1804
|
+
content: inner.text.value,
|
|
1805
|
+
references
|
|
1806
|
+
};
|
|
1807
|
+
} finally {
|
|
1808
|
+
if (sid) try {
|
|
1809
|
+
fetch(`${copilotBaseUrl(state)}/mcp`, {
|
|
1810
|
+
method: "DELETE",
|
|
1811
|
+
headers: mcpHeaders(sid)
|
|
1812
|
+
}).catch(() => {});
|
|
1813
|
+
} catch {}
|
|
1814
|
+
}
|
|
1815
|
+
}
|
|
1816
|
+
|
|
1646
1817
|
//#endregion
|
|
1647
1818
|
//#region src/lib/peer-mcp-personas.ts
|
|
1819
|
+
/**
|
|
1820
|
+
* Reasoning effort levels accepted by Copilot's /v1/responses (gpt-5.x) and
|
|
1821
|
+
* /v1/chat/completions endpoints. Per the proxy's existing thinking-mode
|
|
1822
|
+
* translator (CLAUDE.md "Thinking-mode translation"), Copilot's adaptive-
|
|
1823
|
+
* thinking path uses these same buckets:
|
|
1824
|
+
* <2k tokens → low, <8k → medium, <24k → high, else → xhigh.
|
|
1825
|
+
*
|
|
1826
|
+
* Per-persona `allowedEfforts` and `defaultEffort` constrain which subset
|
|
1827
|
+
* each persona exposes — enforced in handler.ts:handleToolsCall.
|
|
1828
|
+
*
|
|
1829
|
+
* **xhigh on long-running personas works via SSE-streamed /mcp responses**
|
|
1830
|
+
* (handler.ts:handleToolsCallSSE). Claude Code's MCP HTTP client honors
|
|
1831
|
+
* `text/event-stream` responses without applying the ~60s per-tool-call
|
|
1832
|
+
* timer that previously broke xhigh on gpt-5.5 (~56s wall) and
|
|
1833
|
+
* claude-opus-4-7 (high+ thinking budgets). All four personas now expose
|
|
1834
|
+
* all four effort tiers with `high` default; SSE handles the long tail
|
|
1835
|
+
* transparently to the user.
|
|
1836
|
+
*/
|
|
1837
|
+
const EFFORT_LEVELS = [
|
|
1838
|
+
"low",
|
|
1839
|
+
"medium",
|
|
1840
|
+
"high",
|
|
1841
|
+
"xhigh"
|
|
1842
|
+
];
|
|
1843
|
+
function isEffort(v) {
|
|
1844
|
+
return typeof v === "string" && EFFORT_LEVELS.includes(v);
|
|
1845
|
+
}
|
|
1648
1846
|
const CRITIC_RUBRIC = `
|
|
1649
1847
|
Apply this grading rubric:
|
|
1650
1848
|
- Score 1–5 on three axes:
|
|
@@ -1673,7 +1871,7 @@ Self-reminder (read before every reply):
|
|
|
1673
1871
|
`.trim();
|
|
1674
1872
|
const COLD_START_CONTRACT = `
|
|
1675
1873
|
Cold-start contract for the lead orchestrator (Opus):
|
|
1676
|
-
When delegating to me, paste a self-contained brief. I have no access to your scrollback,
|
|
1874
|
+
When delegating to me, paste a self-contained brief. I have no access to your scrollback, project memory, or the project tree. Always include:
|
|
1677
1875
|
(a) the artifact under review verbatim (code/diff/plan text),
|
|
1678
1876
|
(b) the constraints or "done" criteria,
|
|
1679
1877
|
(c) any prior decisions I should not relitigate.
|
|
@@ -1745,39 +1943,87 @@ Reply format (markdown):
|
|
|
1745
1943
|
|
|
1746
1944
|
Resilience reminder:
|
|
1747
1945
|
If your session terminates abnormally before "Status: complete", the lead will retry once. On recovery, ask the lead to confirm what's already been done before re-applying changes — duplicate edits are worse than a slow restart.`;
|
|
1946
|
+
const OPUS_CRITIC_BASE = `You are opus-critic, a fresh-context Anthropic-side adversarial reviewer running on Claude Opus 4.7 — the same model and lab as the lead orchestrator that just delegated to you. You are NOT the lead. You did not see the lead's reasoning trace. You only see the brief.
|
|
1947
|
+
|
|
1948
|
+
Your job is to spot what the lead missed because of cognitive momentum, sunk-cost on a plan, or motivated reasoning toward a particular fix. Your blind-spot diversification is LIMITED compared to codex-critic (gpt-5.5) and gemini-critic (gemini-3.1-pro) — same training, same lab, same RLHF priors. Use that honestly: don't pretend to find a different perspective when the obvious read is "the lead got it right." Silence on good work is a valid and welcome answer.
|
|
1949
|
+
|
|
1950
|
+
Sycophancy is the failure mode you exist to fight. Manufactured contrarianism is a different failure of the same shape — do neither.
|
|
1951
|
+
|
|
1952
|
+
${COLD_START_CONTRACT}
|
|
1953
|
+
|
|
1954
|
+
${CRITIC_RUBRIC}`;
|
|
1748
1955
|
const PERSONAS_READ = Object.freeze([
|
|
1749
1956
|
{
|
|
1750
1957
|
agentName: "codex-critic",
|
|
1751
1958
|
toolNameHttp: "codex_critic",
|
|
1752
1959
|
model: "gpt-5.5",
|
|
1753
1960
|
endpoint: "/v1/responses",
|
|
1754
|
-
description: "Adversarial second opinion on plans, designs,
|
|
1961
|
+
description: "Adversarial second opinion on plans, designs, or code tradeoffs. Backed by gpt-5.5 (OpenAI) — different lab than Opus. Pass artifact verbatim.",
|
|
1755
1962
|
baseInstructions: CRITIC_BASE,
|
|
1756
1963
|
agentPrompt: "",
|
|
1757
1964
|
writeCapable: false,
|
|
1758
|
-
requiresHttp: false
|
|
1965
|
+
requiresHttp: false,
|
|
1966
|
+
allowedEfforts: [
|
|
1967
|
+
"low",
|
|
1968
|
+
"medium",
|
|
1969
|
+
"high",
|
|
1970
|
+
"xhigh"
|
|
1971
|
+
],
|
|
1972
|
+
defaultEffort: "xhigh"
|
|
1759
1973
|
},
|
|
1760
1974
|
{
|
|
1761
1975
|
agentName: "gemini-critic",
|
|
1762
1976
|
toolNameHttp: "gemini_critic",
|
|
1763
1977
|
model: "gemini-3.1-pro-preview",
|
|
1764
1978
|
endpoint: "/v1/chat/completions",
|
|
1765
|
-
description: "Adversarial second opinion
|
|
1979
|
+
description: "Adversarial second opinion. Backed by gemini-3.1-pro (Google) — third-lab triangulation, strong on long-context and formal reasoning. Pass artifact verbatim.",
|
|
1766
1980
|
baseInstructions: GEMINI_CRITIC_BASE,
|
|
1767
1981
|
agentPrompt: "",
|
|
1768
1982
|
writeCapable: false,
|
|
1769
|
-
requiresHttp: true
|
|
1983
|
+
requiresHttp: true,
|
|
1984
|
+
requiresGeminiCatalog: true,
|
|
1985
|
+
allowedEfforts: [
|
|
1986
|
+
"low",
|
|
1987
|
+
"medium",
|
|
1988
|
+
"high"
|
|
1989
|
+
],
|
|
1990
|
+
defaultEffort: "high"
|
|
1770
1991
|
},
|
|
1771
1992
|
{
|
|
1772
1993
|
agentName: "codex-reviewer",
|
|
1773
1994
|
toolNameHttp: "codex_reviewer",
|
|
1774
1995
|
model: "gpt-5.3-codex",
|
|
1775
1996
|
endpoint: "/v1/responses",
|
|
1776
|
-
description: "Line-level
|
|
1997
|
+
description: "Line-level review of a concrete diff or single file. Backed by gpt-5.3-codex (OpenAI) — code-specialist, narrow-scope. Pass artifact verbatim.",
|
|
1777
1998
|
baseInstructions: REVIEWER_BASE,
|
|
1778
1999
|
agentPrompt: "",
|
|
1779
2000
|
writeCapable: false,
|
|
1780
|
-
requiresHttp: false
|
|
2001
|
+
requiresHttp: false,
|
|
2002
|
+
allowedEfforts: [
|
|
2003
|
+
"low",
|
|
2004
|
+
"medium",
|
|
2005
|
+
"high",
|
|
2006
|
+
"xhigh"
|
|
2007
|
+
],
|
|
2008
|
+
defaultEffort: "xhigh"
|
|
2009
|
+
},
|
|
2010
|
+
{
|
|
2011
|
+
agentName: "opus-critic",
|
|
2012
|
+
toolNameHttp: "opus_critic",
|
|
2013
|
+
model: "claude-opus-4-7",
|
|
2014
|
+
endpoint: "/v1/messages",
|
|
2015
|
+
description: "Adversarial second opinion from a fresh-context Opus 4.7 — cheap same-lab sanity check. Pass artifact verbatim.",
|
|
2016
|
+
baseInstructions: OPUS_CRITIC_BASE,
|
|
2017
|
+
agentPrompt: "",
|
|
2018
|
+
writeCapable: false,
|
|
2019
|
+
requiresHttp: true,
|
|
2020
|
+
allowedEfforts: [
|
|
2021
|
+
"low",
|
|
2022
|
+
"medium",
|
|
2023
|
+
"high",
|
|
2024
|
+
"xhigh"
|
|
2025
|
+
],
|
|
2026
|
+
defaultEffort: "xhigh"
|
|
1781
2027
|
}
|
|
1782
2028
|
]);
|
|
1783
2029
|
const PERSONAS_WRITE = Object.freeze([{
|
|
@@ -1785,11 +2031,18 @@ const PERSONAS_WRITE = Object.freeze([{
|
|
|
1785
2031
|
toolNameHttp: "codex_implementer",
|
|
1786
2032
|
model: "gpt-5.3-codex",
|
|
1787
2033
|
endpoint: "/v1/responses",
|
|
1788
|
-
description: "Targeted implementation of a self-contained coding task
|
|
2034
|
+
description: "Targeted implementation of a self-contained coding task. Backed by gpt-5.3-codex with workspace-write access. Pass spec + files verbatim.",
|
|
1789
2035
|
baseInstructions: IMPLEMENTER_BASE,
|
|
1790
2036
|
agentPrompt: "",
|
|
1791
2037
|
writeCapable: true,
|
|
1792
|
-
requiresHttp: false
|
|
2038
|
+
requiresHttp: false,
|
|
2039
|
+
allowedEfforts: [
|
|
2040
|
+
"low",
|
|
2041
|
+
"medium",
|
|
2042
|
+
"high",
|
|
2043
|
+
"xhigh"
|
|
2044
|
+
],
|
|
2045
|
+
defaultEffort: "high"
|
|
1793
2046
|
}]);
|
|
1794
2047
|
/**
|
|
1795
2048
|
* Build the agent-prompt body Claude Code uses as the subagent's full
|
|
@@ -1838,12 +2091,65 @@ function buildAgentPrompt(persona, opts) {
|
|
|
1838
2091
|
function personasFor(opts) {
|
|
1839
2092
|
const result = [];
|
|
1840
2093
|
for (const p of PERSONAS_READ) {
|
|
1841
|
-
if (p.
|
|
2094
|
+
if (p.requiresGeminiCatalog && !opts.geminiAvailable) continue;
|
|
1842
2095
|
result.push(p);
|
|
1843
2096
|
}
|
|
1844
2097
|
if (opts.codexCli) for (const p of PERSONAS_WRITE) result.push(p);
|
|
1845
2098
|
return result;
|
|
1846
2099
|
}
|
|
2100
|
+
const WEB_SEARCH_DESCRIPTION = "Web search via GitHub Copilot's MCP. Prefer over Claude Code's built-in WebSearch — surfaces source URLs you can cite.";
|
|
2101
|
+
/**
|
|
2102
|
+
* Format a `searchWeb()` result as an MCP-friendly text block. Mirrors
|
|
2103
|
+
* the legacy inject format that `injectWebSearchIfNeeded` produces and
|
|
2104
|
+
* that downstream models have been trained against — minimal divergence
|
|
2105
|
+
* is the safest choice while we have two surfaces sharing `searchWeb()`.
|
|
2106
|
+
*
|
|
2107
|
+
* Empty references → omit the `## References` section entirely (don't
|
|
2108
|
+
* emit a trailing empty header that would tempt the model to invent
|
|
2109
|
+
* citations).
|
|
2110
|
+
*/
|
|
2111
|
+
function formatWebSearchResult(results) {
|
|
2112
|
+
if (results.references.length === 0) return results.content;
|
|
2113
|
+
const refsLine = results.references.map((r) => `- [${r.title}](${r.url})`).join("\n");
|
|
2114
|
+
return `${results.content}\n\n## References\n${refsLine}`;
|
|
2115
|
+
}
|
|
2116
|
+
const NON_PERSONA_MCP_TOOLS = Object.freeze([{
|
|
2117
|
+
toolNameHttp: "web_search",
|
|
2118
|
+
description: WEB_SEARCH_DESCRIPTION,
|
|
2119
|
+
inputSchema: {
|
|
2120
|
+
type: "object",
|
|
2121
|
+
required: ["query"],
|
|
2122
|
+
additionalProperties: false,
|
|
2123
|
+
properties: { query: {
|
|
2124
|
+
type: "string",
|
|
2125
|
+
description: "The search query string. Natural-language queries work best — the upstream provider rewrites for the search index."
|
|
2126
|
+
} }
|
|
2127
|
+
},
|
|
2128
|
+
async handler(args, _signal) {
|
|
2129
|
+
const query = typeof args.query === "string" ? args.query : "";
|
|
2130
|
+
if (!query) return {
|
|
2131
|
+
content: [{
|
|
2132
|
+
type: "text",
|
|
2133
|
+
text: "web_search: arguments.query is required (must be a non-empty string)"
|
|
2134
|
+
}],
|
|
2135
|
+
isError: true
|
|
2136
|
+
};
|
|
2137
|
+
try {
|
|
2138
|
+
return { content: [{
|
|
2139
|
+
type: "text",
|
|
2140
|
+
text: formatWebSearchResult(await searchWeb(query))
|
|
2141
|
+
}] };
|
|
2142
|
+
} catch (err) {
|
|
2143
|
+
return {
|
|
2144
|
+
content: [{
|
|
2145
|
+
type: "text",
|
|
2146
|
+
text: `web_search failed: ${err instanceof Error ? err.message : String(err)}`
|
|
2147
|
+
}],
|
|
2148
|
+
isError: true
|
|
2149
|
+
};
|
|
2150
|
+
}
|
|
2151
|
+
}
|
|
2152
|
+
}]);
|
|
1847
2153
|
|
|
1848
2154
|
//#endregion
|
|
1849
2155
|
//#region src/lib/codex-mcp-config.ts
|
|
@@ -1914,11 +2220,11 @@ function buildPeerMcpConfig(serverUrl, opts) {
|
|
|
1914
2220
|
* ExitPlanMode to default-on (env-disable-able).
|
|
1915
2221
|
*/
|
|
1916
2222
|
function buildCoordinatorAgent(opts) {
|
|
1917
|
-
const peers = ["codex-critic"];
|
|
2223
|
+
const peers = ["codex-critic", "opus-critic"];
|
|
1918
2224
|
if (opts.geminiAvailable) peers.push("gemini-critic");
|
|
1919
2225
|
peers.push("codex-reviewer");
|
|
1920
2226
|
return {
|
|
1921
|
-
description: "Coordinates cross-lab adversarial review.
|
|
2227
|
+
description: "Coordinates cross-lab adversarial review across codex-critic, opus-critic, gemini-critic, codex-reviewer. Use proactively before non-trivial plans and after non-trivial commits. Always pass artifacts verbatim — peers are fresh-context.",
|
|
1922
2228
|
prompt: [
|
|
1923
2229
|
"# Subagent: peer-review-coordinator",
|
|
1924
2230
|
"",
|
|
@@ -1934,10 +2240,11 @@ function buildCoordinatorAgent(opts) {
|
|
|
1934
2240
|
"- **Concrete diff or single file** → fan out to `codex-reviewer`" + (opts.geminiAvailable ? " AND `gemini-critic` (gemini for cross-lab triangulation)" : "") + ". For very small changes (<20 lines), one `codex-reviewer` call is enough.",
|
|
1935
2241
|
"- **Tie-breaker after codex-critic has weighed in** → call `gemini-critic`" + (opts.geminiAvailable ? "" : " (NOT REGISTERED in this session — gemini-3.x not in catalog; tie-break unavailable)") + " with the artifact AND codex-critic's verdict for cross-lab cross-check.",
|
|
1936
2242
|
"- **Long-context artifact (>100 KB)** → prefer `gemini-critic`" + (opts.geminiAvailable ? "" : " (NOT REGISTERED in this session)") + ". Otherwise, decompose into 2-4 batches and fan out across `codex-critic` calls in parallel.",
|
|
2243
|
+
"- **Fast same-lab sanity check on a moderate artifact (<5 KB)** → prefer `opus-critic` (cheapest, ~22s, only `effort: low|medium` supported). Same lab as the lead — limited blind-spot diversification, but a useful gut-check before committing to a controversial decision. For cross-lab diversification or deep dives on larger artifacts, use codex/gemini at higher effort with decomposition for >5KB.",
|
|
1937
2244
|
"",
|
|
1938
2245
|
"## Decomposition for large artifacts",
|
|
1939
2246
|
"",
|
|
1940
|
-
"Each per-call MCP wait is bounded (~
|
|
2247
|
+
"Each per-call MCP wait is bounded (~60s SDK default on Claude Code v2.1.113+ per regressions #50289 / #52137 — empirically reproduced 2026-05-14). The proxy enforces per-persona effort allowlists AND a pre-flight `predictedTooLong` cap (codex_critic@high >8 KB, codex_reviewer@high >12 KB, opus_critic@medium >6 KB) to surface would-be-timeouts as fast actionable errors. For artifacts that exceed the cap, split into 2-4 logical batches BY CONCERN (not by raw size — semantic batches give better per-batch reviews) and call peers in parallel. The proxy's MCP cap allows up to 8 in-flight calls. Aggregate findings yourself before reporting back.",
|
|
1941
2248
|
"",
|
|
1942
2249
|
"## Aggregation contract",
|
|
1943
2250
|
"",
|
|
@@ -2344,7 +2651,7 @@ function initProxyFromEnv() {
|
|
|
2344
2651
|
//#endregion
|
|
2345
2652
|
//#region package.json
|
|
2346
2653
|
var name = "github-router";
|
|
2347
|
-
var version = "0.3.
|
|
2654
|
+
var version = "0.3.23";
|
|
2348
2655
|
|
|
2349
2656
|
//#endregion
|
|
2350
2657
|
//#region src/lib/approval.ts
|
|
@@ -2903,177 +3210,6 @@ const createChatCompletions = async (payload, modelHeaders, callerSignal) => {
|
|
|
2903
3210
|
return await response.json();
|
|
2904
3211
|
};
|
|
2905
3212
|
|
|
2906
|
-
//#endregion
|
|
2907
|
-
//#region src/services/copilot/web-search.ts
|
|
2908
|
-
const RpcSchema = z.object({
|
|
2909
|
-
jsonrpc: z.literal("2.0"),
|
|
2910
|
-
id: z.number().optional(),
|
|
2911
|
-
result: z.object({
|
|
2912
|
-
content: z.array(z.object({
|
|
2913
|
-
type: z.literal("text"),
|
|
2914
|
-
text: z.string()
|
|
2915
|
-
})).optional(),
|
|
2916
|
-
isError: z.boolean().optional()
|
|
2917
|
-
}).optional(),
|
|
2918
|
-
error: z.object({
|
|
2919
|
-
code: z.number(),
|
|
2920
|
-
message: z.string()
|
|
2921
|
-
}).optional()
|
|
2922
|
-
});
|
|
2923
|
-
const InnerSchema = z.object({
|
|
2924
|
-
text: z.object({
|
|
2925
|
-
value: z.string(),
|
|
2926
|
-
annotations: z.array(z.object({ url_citation: z.object({
|
|
2927
|
-
title: z.string(),
|
|
2928
|
-
url: z.string()
|
|
2929
|
-
}).optional() })).nullable().optional()
|
|
2930
|
-
}),
|
|
2931
|
-
bing_searches: z.array(z.unknown()).nullable().optional()
|
|
2932
|
-
});
|
|
2933
|
-
const MAX_SEARCHES_PER_SECOND = 3;
|
|
2934
|
-
let searchTimestamps = [];
|
|
2935
|
-
let throttleChain = Promise.resolve();
|
|
2936
|
-
async function throttleSearch() {
|
|
2937
|
-
const myTurn = throttleChain.then(async () => {
|
|
2938
|
-
const now = Date.now();
|
|
2939
|
-
searchTimestamps = searchTimestamps.filter((t) => now - t < 1e3);
|
|
2940
|
-
if (searchTimestamps.length >= MAX_SEARCHES_PER_SECOND) {
|
|
2941
|
-
const waitMs = 1e3 - (now - searchTimestamps[0]);
|
|
2942
|
-
if (waitMs > 0) {
|
|
2943
|
-
consola.debug(`Web search rate limited, waiting ${waitMs}ms`);
|
|
2944
|
-
await sleep(waitMs);
|
|
2945
|
-
}
|
|
2946
|
-
}
|
|
2947
|
-
searchTimestamps.push(Date.now());
|
|
2948
|
-
});
|
|
2949
|
-
throttleChain = myTurn.catch(() => {});
|
|
2950
|
-
return myTurn;
|
|
2951
|
-
}
|
|
2952
|
-
function mcpHeaders(sid) {
|
|
2953
|
-
if (!state.githubToken) throw new Error("GitHub token missing — re-run auth flow. Web search uses the GitHub PAT (not the Copilot token); the on-disk token at ~/.local/share/github-router/github_token must be present.");
|
|
2954
|
-
const headers = {
|
|
2955
|
-
Authorization: `Bearer ${state.githubToken}`,
|
|
2956
|
-
"content-type": "application/json",
|
|
2957
|
-
accept: "application/json, text/event-stream",
|
|
2958
|
-
"X-MCP-Host": "copilot-cli",
|
|
2959
|
-
"X-MCP-Toolsets": "web_search",
|
|
2960
|
-
"Mcp-Protocol-Version": "2025-06-18",
|
|
2961
|
-
"user-agent": `GitHubCopilotChat/${copilotVersion(state)}`
|
|
2962
|
-
};
|
|
2963
|
-
if (sid) headers["Mcp-Session-Id"] = sid;
|
|
2964
|
-
return headers;
|
|
2965
|
-
}
|
|
2966
|
-
async function postMcp(body, sid, retry = true) {
|
|
2967
|
-
const url = `${copilotBaseUrl(state)}/mcp`;
|
|
2968
|
-
const res = await fetch(url, {
|
|
2969
|
-
method: "POST",
|
|
2970
|
-
headers: mcpHeaders(sid),
|
|
2971
|
-
body: JSON.stringify(body)
|
|
2972
|
-
});
|
|
2973
|
-
if (!res.ok && retry && res.status >= 500) {
|
|
2974
|
-
await sleep(500);
|
|
2975
|
-
return postMcp(body, sid, false);
|
|
2976
|
-
}
|
|
2977
|
-
return res;
|
|
2978
|
-
}
|
|
2979
|
-
async function searchWeb(query) {
|
|
2980
|
-
await throttleSearch();
|
|
2981
|
-
consola.info(`Web search (MCP): "${query.slice(0, 80)}"`);
|
|
2982
|
-
const callId = Math.floor(Math.random() * 1e9);
|
|
2983
|
-
let sid;
|
|
2984
|
-
try {
|
|
2985
|
-
const initRes = await postMcp({
|
|
2986
|
-
jsonrpc: "2.0",
|
|
2987
|
-
id: 1,
|
|
2988
|
-
method: "initialize",
|
|
2989
|
-
params: {
|
|
2990
|
-
protocolVersion: "2024-11-05",
|
|
2991
|
-
capabilities: {},
|
|
2992
|
-
clientInfo: {
|
|
2993
|
-
name: "GitHubCopilotChat",
|
|
2994
|
-
version: copilotVersion(state)
|
|
2995
|
-
}
|
|
2996
|
-
}
|
|
2997
|
-
});
|
|
2998
|
-
if (!initRes.ok) {
|
|
2999
|
-
consola.error("MCP initialize failed", initRes.status);
|
|
3000
|
-
throw new HTTPError("MCP initialize failed", initRes);
|
|
3001
|
-
}
|
|
3002
|
-
sid = initRes.headers.get("mcp-session-id") ?? void 0;
|
|
3003
|
-
if (!sid) throw new HTTPError("MCP initialize: missing Mcp-Session-Id header", initRes);
|
|
3004
|
-
const notifRes = await postMcp({
|
|
3005
|
-
jsonrpc: "2.0",
|
|
3006
|
-
method: "notifications/initialized"
|
|
3007
|
-
}, sid);
|
|
3008
|
-
if (!notifRes.ok && notifRes.status !== 202) {
|
|
3009
|
-
consola.error("MCP notifications/initialized failed", notifRes.status);
|
|
3010
|
-
throw new HTTPError("MCP notifications/initialized failed", notifRes);
|
|
3011
|
-
}
|
|
3012
|
-
const callRes = await postMcp({
|
|
3013
|
-
jsonrpc: "2.0",
|
|
3014
|
-
id: callId,
|
|
3015
|
-
method: "tools/call",
|
|
3016
|
-
params: {
|
|
3017
|
-
name: "web_search",
|
|
3018
|
-
arguments: { query }
|
|
3019
|
-
}
|
|
3020
|
-
}, sid);
|
|
3021
|
-
if (!callRes.ok) {
|
|
3022
|
-
consola.error("MCP tools/call failed", callRes.status);
|
|
3023
|
-
throw new HTTPError("MCP tools/call failed", callRes);
|
|
3024
|
-
}
|
|
3025
|
-
let rpc;
|
|
3026
|
-
for await (const ev of events(callRes)) {
|
|
3027
|
-
if (!ev.data) continue;
|
|
3028
|
-
let parsedJson;
|
|
3029
|
-
try {
|
|
3030
|
-
parsedJson = JSON.parse(ev.data);
|
|
3031
|
-
} catch {
|
|
3032
|
-
continue;
|
|
3033
|
-
}
|
|
3034
|
-
const parsed = RpcSchema.safeParse(parsedJson);
|
|
3035
|
-
if (parsed.success && parsed.data.id === callId) {
|
|
3036
|
-
rpc = parsed.data;
|
|
3037
|
-
break;
|
|
3038
|
-
}
|
|
3039
|
-
}
|
|
3040
|
-
if (!rpc) throw new HTTPError("MCP tools/call: no matching response id in SSE stream", callRes);
|
|
3041
|
-
if (rpc.error) throw new HTTPError(`MCP error ${rpc.error.code}: ${rpc.error.message}`, callRes);
|
|
3042
|
-
if (rpc.result?.isError) throw new HTTPError("MCP web_search tool error", callRes);
|
|
3043
|
-
const text = rpc.result?.content?.[0]?.text;
|
|
3044
|
-
if (!text) throw new HTTPError("MCP web_search: empty content", callRes);
|
|
3045
|
-
let innerRaw;
|
|
3046
|
-
try {
|
|
3047
|
-
innerRaw = JSON.parse(text);
|
|
3048
|
-
} catch (err) {
|
|
3049
|
-
throw new HTTPError(`MCP web_search: inner content not JSON: ${err instanceof Error ? err.message : String(err)}`, callRes);
|
|
3050
|
-
}
|
|
3051
|
-
const innerParsed = InnerSchema.safeParse(innerRaw);
|
|
3052
|
-
if (!innerParsed.success) throw new HTTPError(`MCP web_search: inner content shape changed (${innerParsed.error.issues.map((i) => `${i.path.join(".")}: ${i.message}`).join("; ")})`, callRes);
|
|
3053
|
-
const inner = innerParsed.data;
|
|
3054
|
-
const references = [];
|
|
3055
|
-
for (const ann of inner.text.annotations ?? []) {
|
|
3056
|
-
const cite = ann.url_citation;
|
|
3057
|
-
if (cite && !cite.url.toLowerCase().includes("bing.com/search")) references.push({
|
|
3058
|
-
title: cite.title,
|
|
3059
|
-
url: cite.url
|
|
3060
|
-
});
|
|
3061
|
-
}
|
|
3062
|
-
consola.debug(`Web search returned ${references.length} references`);
|
|
3063
|
-
return {
|
|
3064
|
-
content: inner.text.value,
|
|
3065
|
-
references
|
|
3066
|
-
};
|
|
3067
|
-
} finally {
|
|
3068
|
-
if (sid) try {
|
|
3069
|
-
fetch(`${copilotBaseUrl(state)}/mcp`, {
|
|
3070
|
-
method: "DELETE",
|
|
3071
|
-
headers: mcpHeaders(sid)
|
|
3072
|
-
}).catch(() => {});
|
|
3073
|
-
} catch {}
|
|
3074
|
-
}
|
|
3075
|
-
}
|
|
3076
|
-
|
|
3077
3213
|
//#endregion
|
|
3078
3214
|
//#region src/routes/chat-completions/handler.ts
|
|
3079
3215
|
const ENCODER$2 = new TextEncoder();
|
|
@@ -3299,6 +3435,125 @@ embeddingRoutes.post("/", async (c) => {
|
|
|
3299
3435
|
}
|
|
3300
3436
|
});
|
|
3301
3437
|
|
|
3438
|
+
//#endregion
|
|
3439
|
+
//#region src/services/copilot/create-messages.ts
|
|
3440
|
+
/**
|
|
3441
|
+
* Build headers that match what VS Code Copilot Chat sends to the Copilot API.
|
|
3442
|
+
*
|
|
3443
|
+
* copilotHeaders() provides: Authorization, content-type, copilot-integration-id,
|
|
3444
|
+
* editor-version, editor-plugin-version, user-agent, openai-intent,
|
|
3445
|
+
* x-github-api-version, x-request-id, x-vscode-user-agent-library-version.
|
|
3446
|
+
*
|
|
3447
|
+
* We add the remaining headers VS Code sends for /v1/messages:
|
|
3448
|
+
* - X-Initiator (VS Code sets dynamically; "agent" is safe for CLI use)
|
|
3449
|
+
* - anthropic-version (VS Code's Anthropic SDK sends this)
|
|
3450
|
+
* - X-Interaction-Id (VS Code sends a session-scoped UUID)
|
|
3451
|
+
*
|
|
3452
|
+
* We intentionally omit copilot-vision-request — VS Code only sends it when
|
|
3453
|
+
* images are present, and the native /v1/messages endpoint handles vision
|
|
3454
|
+
* without requiring the header.
|
|
3455
|
+
*
|
|
3456
|
+
* extraHeaders allows callers to forward client-supplied beta headers
|
|
3457
|
+
* (anthropic-beta) so Copilot enables extended features.
|
|
3458
|
+
*/
|
|
3459
|
+
function buildHeaders(extraHeaders) {
|
|
3460
|
+
return {
|
|
3461
|
+
...copilotHeaders(state),
|
|
3462
|
+
accept: "application/json",
|
|
3463
|
+
"openai-intent": "messages-proxy",
|
|
3464
|
+
"x-interaction-type": "conversation-agent",
|
|
3465
|
+
"X-Initiator": "agent",
|
|
3466
|
+
"anthropic-version": "2023-06-01",
|
|
3467
|
+
"X-Interaction-Id": randomUUID(),
|
|
3468
|
+
...extraHeaders
|
|
3469
|
+
};
|
|
3470
|
+
}
|
|
3471
|
+
/**
|
|
3472
|
+
* Forward an Anthropic Messages API request to Copilot's native /v1/messages endpoint.
|
|
3473
|
+
* Returns the raw Response so callers can handle streaming vs non-streaming.
|
|
3474
|
+
*
|
|
3475
|
+
* `callerSignal` (optional) is composed with the standard
|
|
3476
|
+
* UPSTREAM_FETCH_TIMEOUT_MS via AbortSignal.any so callers (e.g. the
|
|
3477
|
+
* peer-MCP `opus-critic` persona) can cancel the upstream call when
|
|
3478
|
+
* Claude Code's MCP per-tool-call ceiling fires. Mirrors the pattern
|
|
3479
|
+
* in createResponses / createChatCompletions.
|
|
3480
|
+
*/
|
|
3481
|
+
async function createMessages(body, extraHeaders, callerSignal) {
|
|
3482
|
+
if (!state.copilotToken) throw new Error("Copilot token not found");
|
|
3483
|
+
const url = `${copilotBaseUrl(state)}/v1/messages?beta=true`;
|
|
3484
|
+
consola.debug(`Forwarding to ${url}`);
|
|
3485
|
+
const doFetch = () => {
|
|
3486
|
+
const fetchInit = {
|
|
3487
|
+
method: "POST",
|
|
3488
|
+
headers: buildHeaders(extraHeaders),
|
|
3489
|
+
body
|
|
3490
|
+
};
|
|
3491
|
+
const signals = [];
|
|
3492
|
+
if (UPSTREAM_FETCH_TIMEOUT_MS > 0) signals.push(AbortSignal.timeout(UPSTREAM_FETCH_TIMEOUT_MS));
|
|
3493
|
+
if (callerSignal) signals.push(callerSignal);
|
|
3494
|
+
if (signals.length === 1) fetchInit.signal = signals[0];
|
|
3495
|
+
else if (signals.length > 1) fetchInit.signal = AbortSignal.any(signals);
|
|
3496
|
+
return fetch(url, fetchInit);
|
|
3497
|
+
};
|
|
3498
|
+
const response = await tryRefreshAndRetry(doFetch, "/v1/messages");
|
|
3499
|
+
if (!response.ok) {
|
|
3500
|
+
let errorBody = "";
|
|
3501
|
+
try {
|
|
3502
|
+
errorBody = await response.text();
|
|
3503
|
+
} catch {
|
|
3504
|
+
errorBody = "(could not read error body)";
|
|
3505
|
+
}
|
|
3506
|
+
consola.error(`Copilot /v1/messages error: ${response.status} ${errorBody}`);
|
|
3507
|
+
throw new HTTPError("Copilot messages request failed", new Response(errorBody, {
|
|
3508
|
+
status: response.status,
|
|
3509
|
+
statusText: response.statusText,
|
|
3510
|
+
headers: response.headers
|
|
3511
|
+
}));
|
|
3512
|
+
}
|
|
3513
|
+
return response;
|
|
3514
|
+
}
|
|
3515
|
+
/**
|
|
3516
|
+
* Forward an Anthropic count_tokens request to Copilot's native endpoint.
|
|
3517
|
+
* Returns the raw Response.
|
|
3518
|
+
*
|
|
3519
|
+
* `callerSignal` is composed with UPSTREAM_FETCH_TIMEOUT_MS — same pattern
|
|
3520
|
+
* as createMessages.
|
|
3521
|
+
*/
|
|
3522
|
+
async function countTokens(body, extraHeaders, callerSignal) {
|
|
3523
|
+
if (!state.copilotToken) throw new Error("Copilot token not found");
|
|
3524
|
+
const url = `${copilotBaseUrl(state)}/v1/messages/count_tokens?beta=true`;
|
|
3525
|
+
consola.debug(`Forwarding to ${url}`);
|
|
3526
|
+
const doFetch = () => {
|
|
3527
|
+
const fetchInit = {
|
|
3528
|
+
method: "POST",
|
|
3529
|
+
headers: buildHeaders(extraHeaders),
|
|
3530
|
+
body
|
|
3531
|
+
};
|
|
3532
|
+
const signals = [];
|
|
3533
|
+
if (UPSTREAM_FETCH_TIMEOUT_MS > 0) signals.push(AbortSignal.timeout(UPSTREAM_FETCH_TIMEOUT_MS));
|
|
3534
|
+
if (callerSignal) signals.push(callerSignal);
|
|
3535
|
+
if (signals.length === 1) fetchInit.signal = signals[0];
|
|
3536
|
+
else if (signals.length > 1) fetchInit.signal = AbortSignal.any(signals);
|
|
3537
|
+
return fetch(url, fetchInit);
|
|
3538
|
+
};
|
|
3539
|
+
const response = await tryRefreshAndRetry(doFetch, "/v1/messages/count_tokens");
|
|
3540
|
+
if (!response.ok) {
|
|
3541
|
+
let errorBody = "";
|
|
3542
|
+
try {
|
|
3543
|
+
errorBody = await response.text();
|
|
3544
|
+
} catch {
|
|
3545
|
+
errorBody = "(could not read error body)";
|
|
3546
|
+
}
|
|
3547
|
+
consola.error(`Copilot count_tokens error: ${response.status} ${errorBody}`);
|
|
3548
|
+
throw new HTTPError("Copilot count_tokens request failed", new Response(errorBody, {
|
|
3549
|
+
status: response.status,
|
|
3550
|
+
statusText: response.statusText,
|
|
3551
|
+
headers: response.headers
|
|
3552
|
+
}));
|
|
3553
|
+
}
|
|
3554
|
+
return response;
|
|
3555
|
+
}
|
|
3556
|
+
|
|
3302
3557
|
//#endregion
|
|
3303
3558
|
//#region src/services/copilot/create-responses.ts
|
|
3304
3559
|
const createResponses = async (payload, modelHeaders, callerSignal) => {
|
|
@@ -3360,27 +3615,6 @@ function detectAgentCall(input) {
|
|
|
3360
3615
|
const MCP_PROTOCOL_VERSION = "2025-06-18";
|
|
3361
3616
|
const SERVER_NAME = "github-router-peers";
|
|
3362
3617
|
const SERVER_VERSION = "1";
|
|
3363
|
-
/**
|
|
3364
|
-
* Reasoning effort levels accepted by Copilot's /v1/responses (gpt-5.x) and
|
|
3365
|
-
* /v1/chat/completions endpoints. Per the proxy's existing thinking-mode
|
|
3366
|
-
* translator (CLAUDE.md "Thinking-mode translation"), Copilot's adaptive-
|
|
3367
|
-
* thinking path uses these same buckets:
|
|
3368
|
-
* <2k tokens → low, <8k → medium, <24k → high, else → xhigh.
|
|
3369
|
-
*
|
|
3370
|
-
* Default `high` for peer reviews — adversarial-by-design but still cost-
|
|
3371
|
-
* conscious. Callers can pass `xhigh` explicitly for deep dives, or `medium`
|
|
3372
|
-
* for quick sanity checks.
|
|
3373
|
-
*/
|
|
3374
|
-
const EFFORT_LEVELS = [
|
|
3375
|
-
"low",
|
|
3376
|
-
"medium",
|
|
3377
|
-
"high",
|
|
3378
|
-
"xhigh"
|
|
3379
|
-
];
|
|
3380
|
-
const DEFAULT_EFFORT = "high";
|
|
3381
|
-
function isEffort(v) {
|
|
3382
|
-
return typeof v === "string" && EFFORT_LEVELS.includes(v);
|
|
3383
|
-
}
|
|
3384
3618
|
/** Bounded concurrency. Originally capped at 2 (commit 4317a25) as a defensive
|
|
3385
3619
|
* pre-launch guess against Opus's natural pattern of fanning out to all three
|
|
3386
3620
|
* critics at once. Raised to 8 (Phase 2D of the peer-MCP plan) so the
|
|
@@ -3485,10 +3719,10 @@ function geminiAvailable() {
|
|
|
3485
3719
|
return models.some((m) => /^gemini-3\..*pro/i.test(m.id));
|
|
3486
3720
|
}
|
|
3487
3721
|
function activePersonas() {
|
|
3488
|
-
return PERSONAS_READ.filter((p) => !p.
|
|
3722
|
+
return PERSONAS_READ.filter((p) => !p.requiresGeminiCatalog || geminiAvailable());
|
|
3489
3723
|
}
|
|
3490
3724
|
function toolEntries() {
|
|
3491
|
-
|
|
3725
|
+
const personaEntries = activePersonas().map((p) => ({
|
|
3492
3726
|
name: p.toolNameHttp,
|
|
3493
3727
|
description: p.description,
|
|
3494
3728
|
inputSchema: {
|
|
@@ -3506,12 +3740,18 @@ function toolEntries() {
|
|
|
3506
3740
|
},
|
|
3507
3741
|
effort: {
|
|
3508
3742
|
type: "string",
|
|
3509
|
-
enum: [...
|
|
3510
|
-
description: `Reasoning depth (
|
|
3743
|
+
enum: [...p.allowedEfforts],
|
|
3744
|
+
description: `Reasoning depth (${p.allowedEfforts.join(" | ")}). Default "${p.defaultEffort}". Higher tiers cost more wall-clock; lower tiers are quicker sanity checks. ` + (p.endpoint === "/v1/chat/completions" ? "Note: for gemini routed via /v1/chat/completions, the upstream may silently ignore this knob." : "")
|
|
3511
3745
|
}
|
|
3512
3746
|
}
|
|
3513
3747
|
}
|
|
3514
3748
|
}));
|
|
3749
|
+
const nonPersonaEntries = NON_PERSONA_MCP_TOOLS.map((t) => ({
|
|
3750
|
+
name: t.toolNameHttp,
|
|
3751
|
+
description: t.description,
|
|
3752
|
+
inputSchema: t.inputSchema
|
|
3753
|
+
}));
|
|
3754
|
+
return [...personaEntries, ...nonPersonaEntries];
|
|
3515
3755
|
}
|
|
3516
3756
|
function buildUserText(prompt, context) {
|
|
3517
3757
|
if (!context) return prompt;
|
|
@@ -3539,6 +3779,11 @@ function extractChatCompletionText(response) {
|
|
|
3539
3779
|
const c = choice.message?.content;
|
|
3540
3780
|
return typeof c === "string" ? c : "";
|
|
3541
3781
|
}
|
|
3782
|
+
function extractMessagesText(response) {
|
|
3783
|
+
const out = [];
|
|
3784
|
+
for (const block of response.content ?? []) if (block.type === "text" && typeof block.text === "string") out.push(block.text);
|
|
3785
|
+
return out.join("");
|
|
3786
|
+
}
|
|
3542
3787
|
function toolError(message) {
|
|
3543
3788
|
return {
|
|
3544
3789
|
content: [{
|
|
@@ -3548,6 +3793,94 @@ function toolError(message) {
|
|
|
3548
3793
|
isError: true
|
|
3549
3794
|
};
|
|
3550
3795
|
}
|
|
3796
|
+
/**
|
|
3797
|
+
* Empirical pre-flight cap to convert "would-bust-the-60s-MCP-ceiling"
|
|
3798
|
+
* calls into fast actionable errors instead of slot-leaking timeouts.
|
|
3799
|
+
*
|
|
3800
|
+
* Probed live against Copilot 2026-05-14:
|
|
3801
|
+
* gpt-5.5 high on a ~600B prompt = 23.8s → ~76s on 8KB (rough linear)
|
|
3802
|
+
* gpt-5.3-codex high on ~600B = 16.0s → ~64s on 12KB
|
|
3803
|
+
* claude-opus-4-7 medium (thinking=3000) on a trivial prompt = 22.5s
|
|
3804
|
+
* but model self-paces budget → ~50s+ on a real ~6KB review
|
|
3805
|
+
*
|
|
3806
|
+
* Returns `{tooLong: true, capBytes}` when the (persona, effort, briefBytes)
|
|
3807
|
+
* tuple is empirically predicted to bust the 60s ceiling.
|
|
3808
|
+
*
|
|
3809
|
+
* SCOPE: the cap is JSON-PATH ONLY. Callers (handleMcpPost) MUST gate
|
|
3810
|
+
* the call site by `!acceptsEventStream(...)`. The SSE path
|
|
3811
|
+
* (handleToolsCallSSE) keeps the connection open past the 60s ceiling
|
|
3812
|
+
* via heartbeats — size-based pre-flight rejection there would just
|
|
3813
|
+
* lock SSE clients out of their primary advantage. JSON-path clients
|
|
3814
|
+
* (raw curl with `Accept: application/json`, older MCP clients without
|
|
3815
|
+
* SSE awareness) DO still hit the underlying tools/call timer, so the
|
|
3816
|
+
* cap is the only way to surface a fast actionable error there
|
|
3817
|
+
* instead of a slot-leaking timeout.
|
|
3818
|
+
*
|
|
3819
|
+
* INVARIANT: pre-flight MUST fire BEFORE inFlightToolsCall++ — the
|
|
3820
|
+
* slot must not be acquired for a rejected pre-flight. handleMcpPost
|
|
3821
|
+
* runs the check before delegating to handleRpc → handleToolsCall (the
|
|
3822
|
+
* function that increments the counter). Documented in CLAUDE.md.
|
|
3823
|
+
*
|
|
3824
|
+
* gemini_critic has no cap (long-context model + Copilot may auto-pace).
|
|
3825
|
+
*/
|
|
3826
|
+
const PRE_FLIGHT_CAPS = [
|
|
3827
|
+
{
|
|
3828
|
+
toolName: "codex_critic",
|
|
3829
|
+
effort: "high",
|
|
3830
|
+
maxBriefBytes: 8 * 1024
|
|
3831
|
+
},
|
|
3832
|
+
{
|
|
3833
|
+
toolName: "codex_reviewer",
|
|
3834
|
+
effort: "high",
|
|
3835
|
+
maxBriefBytes: 12 * 1024
|
|
3836
|
+
},
|
|
3837
|
+
{
|
|
3838
|
+
toolName: "opus_critic",
|
|
3839
|
+
effort: "medium",
|
|
3840
|
+
maxBriefBytes: 6 * 1024
|
|
3841
|
+
}
|
|
3842
|
+
];
|
|
3843
|
+
function predictedTooLong(persona, effort, briefBytes) {
|
|
3844
|
+
for (const cap of PRE_FLIGHT_CAPS) if (cap.toolName === persona.toolNameHttp && cap.effort === effort && briefBytes > cap.maxBriefBytes) return {
|
|
3845
|
+
tooLong: true,
|
|
3846
|
+
capBytes: cap.maxBriefBytes
|
|
3847
|
+
};
|
|
3848
|
+
return { tooLong: false };
|
|
3849
|
+
}
|
|
3850
|
+
/**
|
|
3851
|
+
* JSON-path pre-flight predictedTooLong gate. Returns a JSON-RPC result
|
|
3852
|
+
* body wrapping a tool-error envelope when the call would bust the 60s
|
|
3853
|
+
* tools/call ceiling on the JSON path; returns undefined when the call
|
|
3854
|
+
* should proceed normally.
|
|
3855
|
+
*
|
|
3856
|
+
* Skips the check (returns undefined) for any shape problem so
|
|
3857
|
+
* handleRpc can return the canonical JSON-RPC error code instead:
|
|
3858
|
+
* - notification (no id) → handleRpc returns 202 + empty body
|
|
3859
|
+
* - missing/unknown name → handleRpc returns -32601
|
|
3860
|
+
* - missing prompt → handleRpc returns -32602
|
|
3861
|
+
* - invalid effort string → handleRpc returns -32602
|
|
3862
|
+
* - effort not in persona.allowedEfforts → handleRpc returns -32602
|
|
3863
|
+
*/
|
|
3864
|
+
function jsonPathPreflightCap(body) {
|
|
3865
|
+
if (body.id === void 0) return void 0;
|
|
3866
|
+
const params = body.params ?? {};
|
|
3867
|
+
const name$1 = typeof params.name === "string" ? params.name : "";
|
|
3868
|
+
const args = params.arguments ?? {};
|
|
3869
|
+
const prompt = typeof args.prompt === "string" ? args.prompt : "";
|
|
3870
|
+
const context = typeof args.context === "string" ? args.context : void 0;
|
|
3871
|
+
const rawEffort = args.effort;
|
|
3872
|
+
if (!name$1 || !prompt) return void 0;
|
|
3873
|
+
const persona = activePersonas().find((p) => p.toolNameHttp === name$1);
|
|
3874
|
+
if (!persona) return void 0;
|
|
3875
|
+
if (rawEffort !== void 0 && !isEffort(rawEffort)) return void 0;
|
|
3876
|
+
const effortMaybe = rawEffort;
|
|
3877
|
+
if (effortMaybe !== void 0 && !persona.allowedEfforts.includes(effortMaybe)) return;
|
|
3878
|
+
const effort = effortMaybe ?? persona.defaultEffort;
|
|
3879
|
+
const briefBytes = Buffer.byteLength(buildUserText(prompt, context), "utf8");
|
|
3880
|
+
const verdict = predictedTooLong(persona, effort, briefBytes);
|
|
3881
|
+
if (!verdict.tooLong) return void 0;
|
|
3882
|
+
return rpcResult(body.id, toolError(`pre-flight rejected: ${persona.toolNameHttp} at effort=${effort} on a ${briefBytes}-byte brief is empirically predicted to exceed the JSON tools/call timeout (cap=${verdict.capBytes} bytes for this tier). Either drop to a lower effort tier, split the brief into 2-4 parallel sub-calls per the decomposition guidance, or send Accept: text/event-stream to use the SSE path which bypasses this cap.`));
|
|
3883
|
+
}
|
|
3551
3884
|
async function callPersona(persona, prompt, context, effort, signal) {
|
|
3552
3885
|
const resolvedModel = resolveModel(persona.model);
|
|
3553
3886
|
const userText = buildUserText(prompt, context);
|
|
@@ -3571,6 +3904,25 @@ async function callPersona(persona, prompt, context, effort, signal) {
|
|
|
3571
3904
|
text: text$1
|
|
3572
3905
|
}] };
|
|
3573
3906
|
}
|
|
3907
|
+
if (persona.endpoint === "/v1/messages") {
|
|
3908
|
+
const maxTokens = effort === "low" ? 4096 : effort === "medium" ? 8192 : effort === "high" ? 16384 : 32768;
|
|
3909
|
+
const text$1 = extractMessagesText(await (await createMessages(JSON.stringify({
|
|
3910
|
+
model: resolvedModel,
|
|
3911
|
+
max_tokens: maxTokens,
|
|
3912
|
+
system: persona.baseInstructions,
|
|
3913
|
+
thinking: { type: "adaptive" },
|
|
3914
|
+
output_config: { effort },
|
|
3915
|
+
messages: [{
|
|
3916
|
+
role: "user",
|
|
3917
|
+
content: userText
|
|
3918
|
+
}]
|
|
3919
|
+
}), void 0, signal)).json());
|
|
3920
|
+
if (!text$1) return toolError(`persona ${persona.agentName}: empty assistant output`);
|
|
3921
|
+
return { content: [{
|
|
3922
|
+
type: "text",
|
|
3923
|
+
text: text$1
|
|
3924
|
+
}] };
|
|
3925
|
+
}
|
|
3574
3926
|
const text = extractChatCompletionText(await createChatCompletions({
|
|
3575
3927
|
model: resolvedModel,
|
|
3576
3928
|
messages: [{
|
|
@@ -3604,17 +3956,23 @@ async function handleToolsCall(body) {
|
|
|
3604
3956
|
const params = body.params ?? {};
|
|
3605
3957
|
const name$1 = typeof params.name === "string" ? params.name : "";
|
|
3606
3958
|
const args = params.arguments ?? {};
|
|
3607
|
-
const prompt = typeof args.prompt === "string" ? args.prompt : "";
|
|
3608
|
-
const context = typeof args.context === "string" ? args.context : void 0;
|
|
3609
|
-
let effort = DEFAULT_EFFORT;
|
|
3610
|
-
if (args.effort !== void 0) {
|
|
3611
|
-
if (!isEffort(args.effort)) return rpcError(body.id, RPC_INVALID_PARAMS, `tools/call: arguments.effort must be one of ${EFFORT_LEVELS.join("|")}; got ${JSON.stringify(args.effort)}`);
|
|
3612
|
-
effort = args.effort;
|
|
3613
|
-
}
|
|
3614
3959
|
if (!name$1) return rpcError(body.id, RPC_INVALID_PARAMS, "tools/call missing name");
|
|
3615
3960
|
const persona = activePersonas().find((p) => p.toolNameHttp === name$1);
|
|
3616
|
-
|
|
3617
|
-
if (!
|
|
3961
|
+
const nonPersonaTool = persona ? void 0 : NON_PERSONA_MCP_TOOLS.find((t) => t.toolNameHttp === name$1);
|
|
3962
|
+
if (!persona && !nonPersonaTool) return rpcError(body.id, RPC_METHOD_NOT_FOUND, `tools/call: unknown tool "${name$1}"`);
|
|
3963
|
+
let personaPrompt;
|
|
3964
|
+
let personaContext;
|
|
3965
|
+
let personaEffort;
|
|
3966
|
+
if (persona) {
|
|
3967
|
+
if (args.effort !== void 0 && !isEffort(args.effort)) return rpcError(body.id, RPC_INVALID_PARAMS, `tools/call: arguments.effort must be one of ${EFFORT_LEVELS.join("|")}; got ${JSON.stringify(args.effort)}`);
|
|
3968
|
+
const requestedEffort = args.effort;
|
|
3969
|
+
const prompt = typeof args.prompt === "string" ? args.prompt : "";
|
|
3970
|
+
if (!prompt) return rpcError(body.id, RPC_INVALID_PARAMS, `tools/call: arguments.prompt is required`);
|
|
3971
|
+
personaPrompt = prompt;
|
|
3972
|
+
personaContext = typeof args.context === "string" ? args.context : void 0;
|
|
3973
|
+
if (requestedEffort !== void 0 && !persona.allowedEfforts.includes(requestedEffort)) return rpcError(body.id, RPC_INVALID_PARAMS, `tools/call: persona "${persona.toolNameHttp}" does not accept effort="${requestedEffort}". Allowed: ${persona.allowedEfforts.join("|")}.`);
|
|
3974
|
+
personaEffort = requestedEffort ?? persona.defaultEffort;
|
|
3975
|
+
}
|
|
3618
3976
|
if (inFlightToolsCall >= MAX_INFLIGHT_TOOLS_CALL) return rpcResult(body.id, {
|
|
3619
3977
|
content: [{
|
|
3620
3978
|
type: "text",
|
|
@@ -3630,11 +3988,13 @@ async function handleToolsCall(body) {
|
|
|
3630
3988
|
aborter = new AbortController();
|
|
3631
3989
|
inflightAborts.set(abortKey, aborter);
|
|
3632
3990
|
}
|
|
3991
|
+
const telemetryName = persona ? persona.agentName : nonPersonaTool.toolNameHttp;
|
|
3992
|
+
const telemetryModel = persona ? persona.model : "(non-persona)";
|
|
3633
3993
|
try {
|
|
3634
|
-
const result = await callPersona(persona,
|
|
3994
|
+
const result = persona ? await callPersona(persona, personaPrompt, personaContext, personaEffort, aborter?.signal) : await nonPersonaTool.handler(args, aborter?.signal);
|
|
3635
3995
|
logTelemetry({
|
|
3636
|
-
name:
|
|
3637
|
-
model:
|
|
3996
|
+
name: telemetryName,
|
|
3997
|
+
model: telemetryModel,
|
|
3638
3998
|
durationMs: Date.now() - startedAt,
|
|
3639
3999
|
result: result.isError ? "isError" : "ok"
|
|
3640
4000
|
});
|
|
@@ -3642,8 +4002,8 @@ async function handleToolsCall(body) {
|
|
|
3642
4002
|
} catch (err) {
|
|
3643
4003
|
const message = err instanceof Error ? err.message : String(err);
|
|
3644
4004
|
logTelemetry({
|
|
3645
|
-
name:
|
|
3646
|
-
model:
|
|
4005
|
+
name: telemetryName,
|
|
4006
|
+
model: telemetryModel,
|
|
3647
4007
|
durationMs: Date.now() - startedAt,
|
|
3648
4008
|
result: "exception",
|
|
3649
4009
|
errorMessage: message
|
|
@@ -3651,7 +4011,7 @@ async function handleToolsCall(body) {
|
|
|
3651
4011
|
return rpcResult(body.id, {
|
|
3652
4012
|
content: [{
|
|
3653
4013
|
type: "text",
|
|
3654
|
-
text: `persona ${persona.agentName} failed: ${message}`
|
|
4014
|
+
text: persona ? `persona ${persona.agentName} failed: ${message}` : `tool ${nonPersonaTool.toolNameHttp} failed: ${message}`
|
|
3655
4015
|
}],
|
|
3656
4016
|
isError: true
|
|
3657
4017
|
});
|
|
@@ -3814,6 +4174,11 @@ async function handleMcpPost(c) {
|
|
|
3814
4174
|
consola.debug("/mcp parse error:", err);
|
|
3815
4175
|
return c.json(rpcError(null, RPC_PARSE_ERROR, "request body is not valid JSON"), 200);
|
|
3816
4176
|
}
|
|
4177
|
+
if (typeof body === "object" && body !== null && !Array.isArray(body) && body.method === "tools/call" && acceptsEventStream(c.req.header("accept"))) return handleToolsCallSSE(body);
|
|
4178
|
+
if (typeof body === "object" && body !== null && !Array.isArray(body) && body.method === "tools/call") {
|
|
4179
|
+
const preflight = jsonPathPreflightCap(body);
|
|
4180
|
+
if (preflight) return c.json(preflight, 200);
|
|
4181
|
+
}
|
|
3817
4182
|
try {
|
|
3818
4183
|
const { status, body: respBody } = await handleRpc(c, body);
|
|
3819
4184
|
if (respBody === null) return c.body(null, status);
|
|
@@ -3824,6 +4189,111 @@ async function handleMcpPost(c) {
|
|
|
3824
4189
|
return c.json(rpcError(echoId, RPC_INTERNAL_ERROR, err instanceof Error ? err.message : String(err)), 200);
|
|
3825
4190
|
}
|
|
3826
4191
|
}
|
|
4192
|
+
/**
|
|
4193
|
+
* Accept-header parsing for MCP Streamable HTTP. Per MCP 2025-06-18
|
|
4194
|
+
* spec, clients send `Accept: application/json, text/event-stream` to
|
|
4195
|
+
* indicate they can consume either response shape. Server picks; for
|
|
4196
|
+
* tools/call we pick SSE because Claude Code's per-tool-call timer
|
|
4197
|
+
* (~60s on v2.1.113+) does not fire on streamed responses.
|
|
4198
|
+
*
|
|
4199
|
+
* Lenient parse: split on commas, strip params (q-values, charset),
|
|
4200
|
+
* trim, lowercase, look for the SSE token. Returns false on undefined
|
|
4201
|
+
* / empty / strict-JSON-only Accept.
|
|
4202
|
+
*/
|
|
4203
|
+
function acceptsEventStream(accept) {
|
|
4204
|
+
if (!accept) return false;
|
|
4205
|
+
return accept.toLowerCase().split(",").map((t) => t.split(";")[0].trim()).includes("text/event-stream");
|
|
4206
|
+
}
|
|
4207
|
+
/**
|
|
4208
|
+
* SSE-streamed response for a single tools/call. Delegates the actual
|
|
4209
|
+
* upstream call to `handleToolsCall` (so the per-persona effort gate,
|
|
4210
|
+
* predictedTooLong cap, AbortController registration, telemetry, and
|
|
4211
|
+
* inFlight slot accounting all run identically); wraps the awaited
|
|
4212
|
+
* result in an SSE envelope with periodic heartbeats while the upstream
|
|
4213
|
+
* fetch is in flight.
|
|
4214
|
+
*
|
|
4215
|
+
* SSE event format (per MCP Streamable HTTP):
|
|
4216
|
+
* event: message
|
|
4217
|
+
* data: <json-rpc-2.0 message>\n\n
|
|
4218
|
+
*
|
|
4219
|
+
* - Heartbeats are JSON-RPC `notifications/progress` notifications with
|
|
4220
|
+
* the request id as `progressToken` (per MCP progress-notification spec).
|
|
4221
|
+
* - The final message is the JSON-RPC response envelope returned by
|
|
4222
|
+
* handleToolsCall — same structure as the JSON-path response.
|
|
4223
|
+
* - On consumer cancel (ReadableStream.cancel), the heartbeat interval
|
|
4224
|
+
* is cleared and the inFlight slot's AbortController is signalled
|
|
4225
|
+
* (handleToolsCall observes the abort and returns an error envelope
|
|
4226
|
+
* that we drop unwritten — controller is already closed).
|
|
4227
|
+
*
|
|
4228
|
+
* Per CLAUDE.md "Stream lifecycle" / "The smoking gun" rules: every
|
|
4229
|
+
* controller.enqueue/close is wrapped in a try/catch that swallows the
|
|
4230
|
+
* "Invalid state: Controller is already closed" race without warning.
|
|
4231
|
+
*/
|
|
4232
|
+
const SSE_HEARTBEAT_INTERVAL_MS = 5e3;
|
|
4233
|
+
async function handleToolsCallSSE(body) {
|
|
4234
|
+
const encoder = new TextEncoder();
|
|
4235
|
+
const callPromise = handleToolsCall(body);
|
|
4236
|
+
const stream = new ReadableStream({
|
|
4237
|
+
async start(controller) {
|
|
4238
|
+
let closed = false;
|
|
4239
|
+
const safeEnqueue = (chunk) => {
|
|
4240
|
+
if (closed) return;
|
|
4241
|
+
try {
|
|
4242
|
+
controller.enqueue(chunk);
|
|
4243
|
+
} catch (err) {
|
|
4244
|
+
consola.debug("/mcp SSE enqueue after close (expected race):", err);
|
|
4245
|
+
closed = true;
|
|
4246
|
+
}
|
|
4247
|
+
};
|
|
4248
|
+
const safeClose = () => {
|
|
4249
|
+
if (closed) return;
|
|
4250
|
+
closed = true;
|
|
4251
|
+
try {
|
|
4252
|
+
controller.close();
|
|
4253
|
+
} catch (err) {
|
|
4254
|
+
consola.debug("/mcp SSE close after close:", err);
|
|
4255
|
+
}
|
|
4256
|
+
};
|
|
4257
|
+
const sseFrame = (rpcMessage) => encoder.encode(`event: message\ndata: ${JSON.stringify(rpcMessage)}\n\n`);
|
|
4258
|
+
const heartbeatFrame = () => sseFrame({
|
|
4259
|
+
jsonrpc: "2.0",
|
|
4260
|
+
method: "notifications/progress",
|
|
4261
|
+
params: {
|
|
4262
|
+
progressToken: body.id ?? null,
|
|
4263
|
+
progress: 0,
|
|
4264
|
+
message: "in flight"
|
|
4265
|
+
}
|
|
4266
|
+
});
|
|
4267
|
+
safeEnqueue(heartbeatFrame());
|
|
4268
|
+
const heartbeatHandle = setInterval(() => safeEnqueue(heartbeatFrame()), SSE_HEARTBEAT_INTERVAL_MS);
|
|
4269
|
+
try {
|
|
4270
|
+
safeEnqueue(sseFrame(await callPromise));
|
|
4271
|
+
} catch (err) {
|
|
4272
|
+
consola.error("/mcp SSE upstream error:", err);
|
|
4273
|
+
safeEnqueue(sseFrame(rpcError(body.id ?? null, RPC_INTERNAL_ERROR, err instanceof Error ? err.message : String(err))));
|
|
4274
|
+
} finally {
|
|
4275
|
+
clearInterval(heartbeatHandle);
|
|
4276
|
+
safeClose();
|
|
4277
|
+
}
|
|
4278
|
+
},
|
|
4279
|
+
cancel() {
|
|
4280
|
+
const abortKey = body.id !== void 0 && body.id !== null ? body.id : void 0;
|
|
4281
|
+
if (abortKey !== void 0) {
|
|
4282
|
+
const aborter = inflightAborts.get(abortKey);
|
|
4283
|
+
if (aborter) aborter.abort(/* @__PURE__ */ new Error("client disconnected SSE stream"));
|
|
4284
|
+
}
|
|
4285
|
+
}
|
|
4286
|
+
});
|
|
4287
|
+
return new Response(stream, {
|
|
4288
|
+
status: 200,
|
|
4289
|
+
headers: {
|
|
4290
|
+
"Content-Type": "text/event-stream",
|
|
4291
|
+
"Cache-Control": "no-cache, no-transform",
|
|
4292
|
+
"Connection": "keep-alive",
|
|
4293
|
+
"X-Accel-Buffering": "no"
|
|
4294
|
+
}
|
|
4295
|
+
});
|
|
4296
|
+
}
|
|
3827
4297
|
function handleMcpDelete(c) {
|
|
3828
4298
|
const auth$1 = checkAuth(c);
|
|
3829
4299
|
if (!auth$1.ok) return c.json(rpcError(null, RPC_INVALID_REQUEST, auth$1.reason), auth$1.status);
|
|
@@ -3848,108 +4318,6 @@ mcpRoutes.delete("/", (c) => {
|
|
|
3848
4318
|
}
|
|
3849
4319
|
});
|
|
3850
4320
|
|
|
3851
|
-
//#endregion
|
|
3852
|
-
//#region src/services/copilot/create-messages.ts
|
|
3853
|
-
/**
|
|
3854
|
-
* Build headers that match what VS Code Copilot Chat sends to the Copilot API.
|
|
3855
|
-
*
|
|
3856
|
-
* copilotHeaders() provides: Authorization, content-type, copilot-integration-id,
|
|
3857
|
-
* editor-version, editor-plugin-version, user-agent, openai-intent,
|
|
3858
|
-
* x-github-api-version, x-request-id, x-vscode-user-agent-library-version.
|
|
3859
|
-
*
|
|
3860
|
-
* We add the remaining headers VS Code sends for /v1/messages:
|
|
3861
|
-
* - X-Initiator (VS Code sets dynamically; "agent" is safe for CLI use)
|
|
3862
|
-
* - anthropic-version (VS Code's Anthropic SDK sends this)
|
|
3863
|
-
* - X-Interaction-Id (VS Code sends a session-scoped UUID)
|
|
3864
|
-
*
|
|
3865
|
-
* We intentionally omit copilot-vision-request — VS Code only sends it when
|
|
3866
|
-
* images are present, and the native /v1/messages endpoint handles vision
|
|
3867
|
-
* without requiring the header.
|
|
3868
|
-
*
|
|
3869
|
-
* extraHeaders allows callers to forward client-supplied beta headers
|
|
3870
|
-
* (anthropic-beta) so Copilot enables extended features.
|
|
3871
|
-
*/
|
|
3872
|
-
function buildHeaders(extraHeaders) {
|
|
3873
|
-
return {
|
|
3874
|
-
...copilotHeaders(state),
|
|
3875
|
-
accept: "application/json",
|
|
3876
|
-
"openai-intent": "messages-proxy",
|
|
3877
|
-
"x-interaction-type": "conversation-agent",
|
|
3878
|
-
"X-Initiator": "agent",
|
|
3879
|
-
"anthropic-version": "2023-06-01",
|
|
3880
|
-
"X-Interaction-Id": randomUUID(),
|
|
3881
|
-
...extraHeaders
|
|
3882
|
-
};
|
|
3883
|
-
}
|
|
3884
|
-
/**
|
|
3885
|
-
* Forward an Anthropic Messages API request to Copilot's native /v1/messages endpoint.
|
|
3886
|
-
* Returns the raw Response so callers can handle streaming vs non-streaming.
|
|
3887
|
-
*/
|
|
3888
|
-
async function createMessages(body, extraHeaders) {
|
|
3889
|
-
if (!state.copilotToken) throw new Error("Copilot token not found");
|
|
3890
|
-
const url = `${copilotBaseUrl(state)}/v1/messages?beta=true`;
|
|
3891
|
-
consola.debug(`Forwarding to ${url}`);
|
|
3892
|
-
const doFetch = () => {
|
|
3893
|
-
const fetchInit = {
|
|
3894
|
-
method: "POST",
|
|
3895
|
-
headers: buildHeaders(extraHeaders),
|
|
3896
|
-
body
|
|
3897
|
-
};
|
|
3898
|
-
if (UPSTREAM_FETCH_TIMEOUT_MS > 0) fetchInit.signal = AbortSignal.timeout(UPSTREAM_FETCH_TIMEOUT_MS);
|
|
3899
|
-
return fetch(url, fetchInit);
|
|
3900
|
-
};
|
|
3901
|
-
const response = await tryRefreshAndRetry(doFetch, "/v1/messages");
|
|
3902
|
-
if (!response.ok) {
|
|
3903
|
-
let errorBody = "";
|
|
3904
|
-
try {
|
|
3905
|
-
errorBody = await response.text();
|
|
3906
|
-
} catch {
|
|
3907
|
-
errorBody = "(could not read error body)";
|
|
3908
|
-
}
|
|
3909
|
-
consola.error(`Copilot /v1/messages error: ${response.status} ${errorBody}`);
|
|
3910
|
-
throw new HTTPError("Copilot messages request failed", new Response(errorBody, {
|
|
3911
|
-
status: response.status,
|
|
3912
|
-
statusText: response.statusText,
|
|
3913
|
-
headers: response.headers
|
|
3914
|
-
}));
|
|
3915
|
-
}
|
|
3916
|
-
return response;
|
|
3917
|
-
}
|
|
3918
|
-
/**
|
|
3919
|
-
* Forward an Anthropic count_tokens request to Copilot's native endpoint.
|
|
3920
|
-
* Returns the raw Response.
|
|
3921
|
-
*/
|
|
3922
|
-
async function countTokens(body, extraHeaders) {
|
|
3923
|
-
if (!state.copilotToken) throw new Error("Copilot token not found");
|
|
3924
|
-
const url = `${copilotBaseUrl(state)}/v1/messages/count_tokens?beta=true`;
|
|
3925
|
-
consola.debug(`Forwarding to ${url}`);
|
|
3926
|
-
const doFetch = () => {
|
|
3927
|
-
const fetchInit = {
|
|
3928
|
-
method: "POST",
|
|
3929
|
-
headers: buildHeaders(extraHeaders),
|
|
3930
|
-
body
|
|
3931
|
-
};
|
|
3932
|
-
if (UPSTREAM_FETCH_TIMEOUT_MS > 0) fetchInit.signal = AbortSignal.timeout(UPSTREAM_FETCH_TIMEOUT_MS);
|
|
3933
|
-
return fetch(url, fetchInit);
|
|
3934
|
-
};
|
|
3935
|
-
const response = await tryRefreshAndRetry(doFetch, "/v1/messages/count_tokens");
|
|
3936
|
-
if (!response.ok) {
|
|
3937
|
-
let errorBody = "";
|
|
3938
|
-
try {
|
|
3939
|
-
errorBody = await response.text();
|
|
3940
|
-
} catch {
|
|
3941
|
-
errorBody = "(could not read error body)";
|
|
3942
|
-
}
|
|
3943
|
-
consola.error(`Copilot count_tokens error: ${response.status} ${errorBody}`);
|
|
3944
|
-
throw new HTTPError("Copilot count_tokens request failed", new Response(errorBody, {
|
|
3945
|
-
status: response.status,
|
|
3946
|
-
statusText: response.statusText,
|
|
3947
|
-
headers: response.headers
|
|
3948
|
-
}));
|
|
3949
|
-
}
|
|
3950
|
-
return response;
|
|
3951
|
-
}
|
|
3952
|
-
|
|
3953
4321
|
//#endregion
|
|
3954
4322
|
//#region src/services/advisor/advisor.ts
|
|
3955
4323
|
const ENCODER$1 = new TextEncoder();
|
|
@@ -5003,7 +5371,7 @@ async function handleCompletion(c) {
|
|
|
5003
5371
|
type: "error",
|
|
5004
5372
|
error: {
|
|
5005
5373
|
type: "invalid_request_error",
|
|
5006
|
-
message: "Inline `mcp_servers` body field is not supported by github-router
|
|
5374
|
+
message: "Inline `mcp_servers` body field is not supported by github-router. Configure remote MCP servers as local stdio entries in `~/.claude/mcp.json` instead — Claude Code will spawn them locally and the proxy passes their tool calls through transparently. (https://docs.claude.com/en/docs/claude-code/mcp)"
|
|
5007
5375
|
}
|
|
5008
5376
|
}, 400);
|
|
5009
5377
|
} catch {}
|
|
@@ -5971,6 +6339,7 @@ function getClaudeCodeEnvVars(serverUrl, model) {
|
|
|
5971
6339
|
ANTHROPIC_BASE_URL: serverUrl,
|
|
5972
6340
|
CLAUDE_CONFIG_DIR: PATHS.CLAUDE_CONFIG_DIR,
|
|
5973
6341
|
MCP_TIMEOUT: "600000",
|
|
6342
|
+
MCP_TOOL_TIMEOUT: "600000",
|
|
5974
6343
|
DISABLE_NON_ESSENTIAL_MODEL_CALLS: "1",
|
|
5975
6344
|
CLAUDE_CODE_DISABLE_NONESSENTIAL_TRAFFIC: "1",
|
|
5976
6345
|
DISABLE_TELEMETRY: "1"
|