github-router 0.3.22 → 0.3.24
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +34 -0
- package/dist/main.js +702 -330
- package/dist/main.js.map +1 -1
- package/package.json +1 -1
package/dist/main.js
CHANGED
|
@@ -8,6 +8,8 @@ import path from "node:path";
|
|
|
8
8
|
import process$1 from "node:process";
|
|
9
9
|
import { execFile, execFileSync, spawn } from "node:child_process";
|
|
10
10
|
import { promisify } from "node:util";
|
|
11
|
+
import { events } from "fetch-event-stream";
|
|
12
|
+
import { z } from "zod";
|
|
11
13
|
import fs$1 from "node:fs";
|
|
12
14
|
import { Writable } from "node:stream";
|
|
13
15
|
import { serve } from "srvx";
|
|
@@ -15,8 +17,6 @@ import { getProxyForUrl } from "proxy-from-env";
|
|
|
15
17
|
import { Agent, ProxyAgent, setGlobalDispatcher } from "undici";
|
|
16
18
|
import { Hono } from "hono";
|
|
17
19
|
import { cors } from "hono/cors";
|
|
18
|
-
import { events } from "fetch-event-stream";
|
|
19
|
-
import { z } from "zod";
|
|
20
20
|
import clipboard from "clipboardy";
|
|
21
21
|
|
|
22
22
|
//#region src/lib/paths.ts
|
|
@@ -65,6 +65,9 @@ const CLAUDE_HOME_POLICY = new Map([
|
|
|
65
65
|
["cache", "ISOLATED"],
|
|
66
66
|
["logs", "ISOLATED"],
|
|
67
67
|
["paste-cache", "ISOLATED"],
|
|
68
|
+
["jobs", "ISOLATED"],
|
|
69
|
+
["daemon", "ISOLATED"],
|
|
70
|
+
["daemon.log", "ISOLATED"],
|
|
68
71
|
["projects", "SHARED"],
|
|
69
72
|
["sessions", "SHARED"],
|
|
70
73
|
["tasks", "SHARED"],
|
|
@@ -1643,8 +1646,206 @@ function launchChild(target, server$1, options = {}) {
|
|
|
1643
1646
|
});
|
|
1644
1647
|
}
|
|
1645
1648
|
|
|
1649
|
+
//#endregion
|
|
1650
|
+
//#region src/services/copilot/web-search.ts
|
|
1651
|
+
const RpcSchema = z.object({
|
|
1652
|
+
jsonrpc: z.literal("2.0"),
|
|
1653
|
+
id: z.number().optional(),
|
|
1654
|
+
result: z.object({
|
|
1655
|
+
content: z.array(z.object({
|
|
1656
|
+
type: z.literal("text"),
|
|
1657
|
+
text: z.string()
|
|
1658
|
+
})).optional(),
|
|
1659
|
+
isError: z.boolean().optional()
|
|
1660
|
+
}).optional(),
|
|
1661
|
+
error: z.object({
|
|
1662
|
+
code: z.number(),
|
|
1663
|
+
message: z.string()
|
|
1664
|
+
}).optional()
|
|
1665
|
+
});
|
|
1666
|
+
const InnerSchema = z.object({
|
|
1667
|
+
text: z.object({
|
|
1668
|
+
value: z.string(),
|
|
1669
|
+
annotations: z.array(z.object({ url_citation: z.object({
|
|
1670
|
+
title: z.string(),
|
|
1671
|
+
url: z.string()
|
|
1672
|
+
}).optional() })).nullable().optional()
|
|
1673
|
+
}),
|
|
1674
|
+
bing_searches: z.array(z.unknown()).nullable().optional()
|
|
1675
|
+
});
|
|
1676
|
+
const MAX_SEARCHES_PER_SECOND = 3;
|
|
1677
|
+
let searchTimestamps = [];
|
|
1678
|
+
let throttleChain = Promise.resolve();
|
|
1679
|
+
async function throttleSearch() {
|
|
1680
|
+
const myTurn = throttleChain.then(async () => {
|
|
1681
|
+
const now = Date.now();
|
|
1682
|
+
searchTimestamps = searchTimestamps.filter((t) => now - t < 1e3);
|
|
1683
|
+
if (searchTimestamps.length >= MAX_SEARCHES_PER_SECOND) {
|
|
1684
|
+
const waitMs = 1e3 - (now - searchTimestamps[0]);
|
|
1685
|
+
if (waitMs > 0) {
|
|
1686
|
+
consola.debug(`Web search rate limited, waiting ${waitMs}ms`);
|
|
1687
|
+
await sleep(waitMs);
|
|
1688
|
+
}
|
|
1689
|
+
}
|
|
1690
|
+
searchTimestamps.push(Date.now());
|
|
1691
|
+
});
|
|
1692
|
+
throttleChain = myTurn.catch(() => {});
|
|
1693
|
+
return myTurn;
|
|
1694
|
+
}
|
|
1695
|
+
function mcpHeaders(sid) {
|
|
1696
|
+
if (!state.githubToken) throw new Error("GitHub token missing — re-run auth flow. Web search uses the GitHub PAT (not the Copilot token); the on-disk token at ~/.local/share/github-router/github_token must be present.");
|
|
1697
|
+
const headers = {
|
|
1698
|
+
Authorization: `Bearer ${state.githubToken}`,
|
|
1699
|
+
"content-type": "application/json",
|
|
1700
|
+
accept: "application/json, text/event-stream",
|
|
1701
|
+
"X-MCP-Host": "copilot-cli",
|
|
1702
|
+
"X-MCP-Toolsets": "web_search",
|
|
1703
|
+
"Mcp-Protocol-Version": "2025-06-18",
|
|
1704
|
+
"user-agent": `GitHubCopilotChat/${copilotVersion(state)}`
|
|
1705
|
+
};
|
|
1706
|
+
if (sid) headers["Mcp-Session-Id"] = sid;
|
|
1707
|
+
return headers;
|
|
1708
|
+
}
|
|
1709
|
+
async function postMcp(body, sid, retry = true) {
|
|
1710
|
+
const url = `${copilotBaseUrl(state)}/mcp`;
|
|
1711
|
+
const res = await fetch(url, {
|
|
1712
|
+
method: "POST",
|
|
1713
|
+
headers: mcpHeaders(sid),
|
|
1714
|
+
body: JSON.stringify(body)
|
|
1715
|
+
});
|
|
1716
|
+
if (!res.ok && retry && res.status >= 500) {
|
|
1717
|
+
await sleep(500);
|
|
1718
|
+
return postMcp(body, sid, false);
|
|
1719
|
+
}
|
|
1720
|
+
return res;
|
|
1721
|
+
}
|
|
1722
|
+
async function searchWeb(query) {
|
|
1723
|
+
await throttleSearch();
|
|
1724
|
+
consola.info(`Web search (MCP): "${query.slice(0, 80)}"`);
|
|
1725
|
+
const callId = Math.floor(Math.random() * 1e9);
|
|
1726
|
+
let sid;
|
|
1727
|
+
try {
|
|
1728
|
+
const initRes = await postMcp({
|
|
1729
|
+
jsonrpc: "2.0",
|
|
1730
|
+
id: 1,
|
|
1731
|
+
method: "initialize",
|
|
1732
|
+
params: {
|
|
1733
|
+
protocolVersion: "2024-11-05",
|
|
1734
|
+
capabilities: {},
|
|
1735
|
+
clientInfo: {
|
|
1736
|
+
name: "GitHubCopilotChat",
|
|
1737
|
+
version: copilotVersion(state)
|
|
1738
|
+
}
|
|
1739
|
+
}
|
|
1740
|
+
});
|
|
1741
|
+
if (!initRes.ok) {
|
|
1742
|
+
consola.error("MCP initialize failed", initRes.status);
|
|
1743
|
+
throw new HTTPError("MCP initialize failed", initRes);
|
|
1744
|
+
}
|
|
1745
|
+
sid = initRes.headers.get("mcp-session-id") ?? void 0;
|
|
1746
|
+
if (!sid) throw new HTTPError("MCP initialize: missing Mcp-Session-Id header", initRes);
|
|
1747
|
+
const notifRes = await postMcp({
|
|
1748
|
+
jsonrpc: "2.0",
|
|
1749
|
+
method: "notifications/initialized"
|
|
1750
|
+
}, sid);
|
|
1751
|
+
if (!notifRes.ok && notifRes.status !== 202) {
|
|
1752
|
+
consola.error("MCP notifications/initialized failed", notifRes.status);
|
|
1753
|
+
throw new HTTPError("MCP notifications/initialized failed", notifRes);
|
|
1754
|
+
}
|
|
1755
|
+
const callRes = await postMcp({
|
|
1756
|
+
jsonrpc: "2.0",
|
|
1757
|
+
id: callId,
|
|
1758
|
+
method: "tools/call",
|
|
1759
|
+
params: {
|
|
1760
|
+
name: "web_search",
|
|
1761
|
+
arguments: { query }
|
|
1762
|
+
}
|
|
1763
|
+
}, sid);
|
|
1764
|
+
if (!callRes.ok) {
|
|
1765
|
+
consola.error("MCP tools/call failed", callRes.status);
|
|
1766
|
+
throw new HTTPError("MCP tools/call failed", callRes);
|
|
1767
|
+
}
|
|
1768
|
+
let rpc;
|
|
1769
|
+
for await (const ev of events(callRes)) {
|
|
1770
|
+
if (!ev.data) continue;
|
|
1771
|
+
let parsedJson;
|
|
1772
|
+
try {
|
|
1773
|
+
parsedJson = JSON.parse(ev.data);
|
|
1774
|
+
} catch {
|
|
1775
|
+
continue;
|
|
1776
|
+
}
|
|
1777
|
+
const parsed = RpcSchema.safeParse(parsedJson);
|
|
1778
|
+
if (parsed.success && parsed.data.id === callId) {
|
|
1779
|
+
rpc = parsed.data;
|
|
1780
|
+
break;
|
|
1781
|
+
}
|
|
1782
|
+
}
|
|
1783
|
+
if (!rpc) throw new HTTPError("MCP tools/call: no matching response id in SSE stream", callRes);
|
|
1784
|
+
if (rpc.error) throw new HTTPError(`MCP error ${rpc.error.code}: ${rpc.error.message}`, callRes);
|
|
1785
|
+
if (rpc.result?.isError) throw new HTTPError("MCP web_search tool error", callRes);
|
|
1786
|
+
const text = rpc.result?.content?.[0]?.text;
|
|
1787
|
+
if (!text) throw new HTTPError("MCP web_search: empty content", callRes);
|
|
1788
|
+
let innerRaw;
|
|
1789
|
+
try {
|
|
1790
|
+
innerRaw = JSON.parse(text);
|
|
1791
|
+
} catch (err) {
|
|
1792
|
+
throw new HTTPError(`MCP web_search: inner content not JSON: ${err instanceof Error ? err.message : String(err)}`, callRes);
|
|
1793
|
+
}
|
|
1794
|
+
const innerParsed = InnerSchema.safeParse(innerRaw);
|
|
1795
|
+
if (!innerParsed.success) throw new HTTPError(`MCP web_search: inner content shape changed (${innerParsed.error.issues.map((i) => `${i.path.join(".")}: ${i.message}`).join("; ")})`, callRes);
|
|
1796
|
+
const inner = innerParsed.data;
|
|
1797
|
+
const references = [];
|
|
1798
|
+
for (const ann of inner.text.annotations ?? []) {
|
|
1799
|
+
const cite = ann.url_citation;
|
|
1800
|
+
if (cite && !cite.url.toLowerCase().includes("bing.com/search")) references.push({
|
|
1801
|
+
title: cite.title,
|
|
1802
|
+
url: cite.url
|
|
1803
|
+
});
|
|
1804
|
+
}
|
|
1805
|
+
consola.debug(`Web search returned ${references.length} references`);
|
|
1806
|
+
return {
|
|
1807
|
+
content: inner.text.value,
|
|
1808
|
+
references
|
|
1809
|
+
};
|
|
1810
|
+
} finally {
|
|
1811
|
+
if (sid) try {
|
|
1812
|
+
fetch(`${copilotBaseUrl(state)}/mcp`, {
|
|
1813
|
+
method: "DELETE",
|
|
1814
|
+
headers: mcpHeaders(sid)
|
|
1815
|
+
}).catch(() => {});
|
|
1816
|
+
} catch {}
|
|
1817
|
+
}
|
|
1818
|
+
}
|
|
1819
|
+
|
|
1646
1820
|
//#endregion
|
|
1647
1821
|
//#region src/lib/peer-mcp-personas.ts
|
|
1822
|
+
/**
|
|
1823
|
+
* Reasoning effort levels accepted by Copilot's /v1/responses (gpt-5.x) and
|
|
1824
|
+
* /v1/chat/completions endpoints. Per the proxy's existing thinking-mode
|
|
1825
|
+
* translator (CLAUDE.md "Thinking-mode translation"), Copilot's adaptive-
|
|
1826
|
+
* thinking path uses these same buckets:
|
|
1827
|
+
* <2k tokens → low, <8k → medium, <24k → high, else → xhigh.
|
|
1828
|
+
*
|
|
1829
|
+
* Per-persona `allowedEfforts` and `defaultEffort` constrain which subset
|
|
1830
|
+
* each persona exposes — enforced in handler.ts:handleToolsCall.
|
|
1831
|
+
*
|
|
1832
|
+
* **xhigh on long-running personas works via SSE-streamed /mcp responses**
|
|
1833
|
+
* (handler.ts:handleToolsCallSSE). Claude Code's MCP HTTP client honors
|
|
1834
|
+
* `text/event-stream` responses without applying the ~60s per-tool-call
|
|
1835
|
+
* timer that previously broke xhigh on gpt-5.5 (~56s wall) and
|
|
1836
|
+
* claude-opus-4-7 (high+ thinking budgets). All four personas now expose
|
|
1837
|
+
* all four effort tiers with `high` default; SSE handles the long tail
|
|
1838
|
+
* transparently to the user.
|
|
1839
|
+
*/
|
|
1840
|
+
const EFFORT_LEVELS = [
|
|
1841
|
+
"low",
|
|
1842
|
+
"medium",
|
|
1843
|
+
"high",
|
|
1844
|
+
"xhigh"
|
|
1845
|
+
];
|
|
1846
|
+
function isEffort(v) {
|
|
1847
|
+
return typeof v === "string" && EFFORT_LEVELS.includes(v);
|
|
1848
|
+
}
|
|
1648
1849
|
const CRITIC_RUBRIC = `
|
|
1649
1850
|
Apply this grading rubric:
|
|
1650
1851
|
- Score 1–5 on three axes:
|
|
@@ -1673,7 +1874,7 @@ Self-reminder (read before every reply):
|
|
|
1673
1874
|
`.trim();
|
|
1674
1875
|
const COLD_START_CONTRACT = `
|
|
1675
1876
|
Cold-start contract for the lead orchestrator (Opus):
|
|
1676
|
-
When delegating to me, paste a self-contained brief. I have no access to your scrollback,
|
|
1877
|
+
When delegating to me, paste a self-contained brief. I have no access to your scrollback, project memory, or the project tree. Always include:
|
|
1677
1878
|
(a) the artifact under review verbatim (code/diff/plan text),
|
|
1678
1879
|
(b) the constraints or "done" criteria,
|
|
1679
1880
|
(c) any prior decisions I should not relitigate.
|
|
@@ -1745,39 +1946,87 @@ Reply format (markdown):
|
|
|
1745
1946
|
|
|
1746
1947
|
Resilience reminder:
|
|
1747
1948
|
If your session terminates abnormally before "Status: complete", the lead will retry once. On recovery, ask the lead to confirm what's already been done before re-applying changes — duplicate edits are worse than a slow restart.`;
|
|
1949
|
+
const OPUS_CRITIC_BASE = `You are opus-critic, a fresh-context Anthropic-side adversarial reviewer running on Claude Opus 4.7 — the same model and lab as the lead orchestrator that just delegated to you. You are NOT the lead. You did not see the lead's reasoning trace. You only see the brief.
|
|
1950
|
+
|
|
1951
|
+
Your job is to spot what the lead missed because of cognitive momentum, sunk-cost on a plan, or motivated reasoning toward a particular fix. Your blind-spot diversification is LIMITED compared to codex-critic (gpt-5.5) and gemini-critic (gemini-3.1-pro) — same training, same lab, same RLHF priors. Use that honestly: don't pretend to find a different perspective when the obvious read is "the lead got it right." Silence on good work is a valid and welcome answer.
|
|
1952
|
+
|
|
1953
|
+
Sycophancy is the failure mode you exist to fight. Manufactured contrarianism is a different failure of the same shape — do neither.
|
|
1954
|
+
|
|
1955
|
+
${COLD_START_CONTRACT}
|
|
1956
|
+
|
|
1957
|
+
${CRITIC_RUBRIC}`;
|
|
1748
1958
|
const PERSONAS_READ = Object.freeze([
|
|
1749
1959
|
{
|
|
1750
1960
|
agentName: "codex-critic",
|
|
1751
1961
|
toolNameHttp: "codex_critic",
|
|
1752
1962
|
model: "gpt-5.5",
|
|
1753
1963
|
endpoint: "/v1/responses",
|
|
1754
|
-
description: "Adversarial second opinion on plans, designs,
|
|
1964
|
+
description: "Adversarial second opinion on plans, designs, or code tradeoffs. Backed by gpt-5.5 (OpenAI) — different lab than Opus. Pass artifact verbatim.",
|
|
1755
1965
|
baseInstructions: CRITIC_BASE,
|
|
1756
1966
|
agentPrompt: "",
|
|
1757
1967
|
writeCapable: false,
|
|
1758
|
-
requiresHttp: false
|
|
1968
|
+
requiresHttp: false,
|
|
1969
|
+
allowedEfforts: [
|
|
1970
|
+
"low",
|
|
1971
|
+
"medium",
|
|
1972
|
+
"high",
|
|
1973
|
+
"xhigh"
|
|
1974
|
+
],
|
|
1975
|
+
defaultEffort: "xhigh"
|
|
1759
1976
|
},
|
|
1760
1977
|
{
|
|
1761
1978
|
agentName: "gemini-critic",
|
|
1762
1979
|
toolNameHttp: "gemini_critic",
|
|
1763
1980
|
model: "gemini-3.1-pro-preview",
|
|
1764
1981
|
endpoint: "/v1/chat/completions",
|
|
1765
|
-
description: "Adversarial second opinion
|
|
1982
|
+
description: "Adversarial second opinion. Backed by gemini-3.1-pro (Google) — third-lab triangulation, strong on long-context and formal reasoning. Pass artifact verbatim.",
|
|
1766
1983
|
baseInstructions: GEMINI_CRITIC_BASE,
|
|
1767
1984
|
agentPrompt: "",
|
|
1768
1985
|
writeCapable: false,
|
|
1769
|
-
requiresHttp: true
|
|
1986
|
+
requiresHttp: true,
|
|
1987
|
+
requiresGeminiCatalog: true,
|
|
1988
|
+
allowedEfforts: [
|
|
1989
|
+
"low",
|
|
1990
|
+
"medium",
|
|
1991
|
+
"high"
|
|
1992
|
+
],
|
|
1993
|
+
defaultEffort: "high"
|
|
1770
1994
|
},
|
|
1771
1995
|
{
|
|
1772
1996
|
agentName: "codex-reviewer",
|
|
1773
1997
|
toolNameHttp: "codex_reviewer",
|
|
1774
1998
|
model: "gpt-5.3-codex",
|
|
1775
1999
|
endpoint: "/v1/responses",
|
|
1776
|
-
description: "Line-level
|
|
2000
|
+
description: "Line-level review of a concrete diff or single file. Backed by gpt-5.3-codex (OpenAI) — code-specialist, narrow-scope. Pass artifact verbatim.",
|
|
1777
2001
|
baseInstructions: REVIEWER_BASE,
|
|
1778
2002
|
agentPrompt: "",
|
|
1779
2003
|
writeCapable: false,
|
|
1780
|
-
requiresHttp: false
|
|
2004
|
+
requiresHttp: false,
|
|
2005
|
+
allowedEfforts: [
|
|
2006
|
+
"low",
|
|
2007
|
+
"medium",
|
|
2008
|
+
"high",
|
|
2009
|
+
"xhigh"
|
|
2010
|
+
],
|
|
2011
|
+
defaultEffort: "xhigh"
|
|
2012
|
+
},
|
|
2013
|
+
{
|
|
2014
|
+
agentName: "opus-critic",
|
|
2015
|
+
toolNameHttp: "opus_critic",
|
|
2016
|
+
model: "claude-opus-4-7",
|
|
2017
|
+
endpoint: "/v1/messages",
|
|
2018
|
+
description: "Adversarial second opinion from a fresh-context Opus 4.7 — cheap same-lab sanity check. Pass artifact verbatim.",
|
|
2019
|
+
baseInstructions: OPUS_CRITIC_BASE,
|
|
2020
|
+
agentPrompt: "",
|
|
2021
|
+
writeCapable: false,
|
|
2022
|
+
requiresHttp: true,
|
|
2023
|
+
allowedEfforts: [
|
|
2024
|
+
"low",
|
|
2025
|
+
"medium",
|
|
2026
|
+
"high",
|
|
2027
|
+
"xhigh"
|
|
2028
|
+
],
|
|
2029
|
+
defaultEffort: "xhigh"
|
|
1781
2030
|
}
|
|
1782
2031
|
]);
|
|
1783
2032
|
const PERSONAS_WRITE = Object.freeze([{
|
|
@@ -1785,11 +2034,18 @@ const PERSONAS_WRITE = Object.freeze([{
|
|
|
1785
2034
|
toolNameHttp: "codex_implementer",
|
|
1786
2035
|
model: "gpt-5.3-codex",
|
|
1787
2036
|
endpoint: "/v1/responses",
|
|
1788
|
-
description: "Targeted implementation of a self-contained coding task
|
|
2037
|
+
description: "Targeted implementation of a self-contained coding task. Backed by gpt-5.3-codex with workspace-write access. Pass spec + files verbatim.",
|
|
1789
2038
|
baseInstructions: IMPLEMENTER_BASE,
|
|
1790
2039
|
agentPrompt: "",
|
|
1791
2040
|
writeCapable: true,
|
|
1792
|
-
requiresHttp: false
|
|
2041
|
+
requiresHttp: false,
|
|
2042
|
+
allowedEfforts: [
|
|
2043
|
+
"low",
|
|
2044
|
+
"medium",
|
|
2045
|
+
"high",
|
|
2046
|
+
"xhigh"
|
|
2047
|
+
],
|
|
2048
|
+
defaultEffort: "high"
|
|
1793
2049
|
}]);
|
|
1794
2050
|
/**
|
|
1795
2051
|
* Build the agent-prompt body Claude Code uses as the subagent's full
|
|
@@ -1838,12 +2094,65 @@ function buildAgentPrompt(persona, opts) {
|
|
|
1838
2094
|
function personasFor(opts) {
|
|
1839
2095
|
const result = [];
|
|
1840
2096
|
for (const p of PERSONAS_READ) {
|
|
1841
|
-
if (p.
|
|
2097
|
+
if (p.requiresGeminiCatalog && !opts.geminiAvailable) continue;
|
|
1842
2098
|
result.push(p);
|
|
1843
2099
|
}
|
|
1844
2100
|
if (opts.codexCli) for (const p of PERSONAS_WRITE) result.push(p);
|
|
1845
2101
|
return result;
|
|
1846
2102
|
}
|
|
2103
|
+
const WEB_SEARCH_DESCRIPTION = "Web search via GitHub Copilot's MCP. Prefer over Claude Code's built-in WebSearch — surfaces source URLs you can cite.";
|
|
2104
|
+
/**
|
|
2105
|
+
* Format a `searchWeb()` result as an MCP-friendly text block. Mirrors
|
|
2106
|
+
* the legacy inject format that `injectWebSearchIfNeeded` produces and
|
|
2107
|
+
* that downstream models have been trained against — minimal divergence
|
|
2108
|
+
* is the safest choice while we have two surfaces sharing `searchWeb()`.
|
|
2109
|
+
*
|
|
2110
|
+
* Empty references → omit the `## References` section entirely (don't
|
|
2111
|
+
* emit a trailing empty header that would tempt the model to invent
|
|
2112
|
+
* citations).
|
|
2113
|
+
*/
|
|
2114
|
+
function formatWebSearchResult(results) {
|
|
2115
|
+
if (results.references.length === 0) return results.content;
|
|
2116
|
+
const refsLine = results.references.map((r) => `- [${r.title}](${r.url})`).join("\n");
|
|
2117
|
+
return `${results.content}\n\n## References\n${refsLine}`;
|
|
2118
|
+
}
|
|
2119
|
+
const NON_PERSONA_MCP_TOOLS = Object.freeze([{
|
|
2120
|
+
toolNameHttp: "web_search",
|
|
2121
|
+
description: WEB_SEARCH_DESCRIPTION,
|
|
2122
|
+
inputSchema: {
|
|
2123
|
+
type: "object",
|
|
2124
|
+
required: ["query"],
|
|
2125
|
+
additionalProperties: false,
|
|
2126
|
+
properties: { query: {
|
|
2127
|
+
type: "string",
|
|
2128
|
+
description: "The search query string. Natural-language queries work best — the upstream provider rewrites for the search index."
|
|
2129
|
+
} }
|
|
2130
|
+
},
|
|
2131
|
+
async handler(args, _signal) {
|
|
2132
|
+
const query = typeof args.query === "string" ? args.query : "";
|
|
2133
|
+
if (!query) return {
|
|
2134
|
+
content: [{
|
|
2135
|
+
type: "text",
|
|
2136
|
+
text: "web_search: arguments.query is required (must be a non-empty string)"
|
|
2137
|
+
}],
|
|
2138
|
+
isError: true
|
|
2139
|
+
};
|
|
2140
|
+
try {
|
|
2141
|
+
return { content: [{
|
|
2142
|
+
type: "text",
|
|
2143
|
+
text: formatWebSearchResult(await searchWeb(query))
|
|
2144
|
+
}] };
|
|
2145
|
+
} catch (err) {
|
|
2146
|
+
return {
|
|
2147
|
+
content: [{
|
|
2148
|
+
type: "text",
|
|
2149
|
+
text: `web_search failed: ${err instanceof Error ? err.message : String(err)}`
|
|
2150
|
+
}],
|
|
2151
|
+
isError: true
|
|
2152
|
+
};
|
|
2153
|
+
}
|
|
2154
|
+
}
|
|
2155
|
+
}]);
|
|
1847
2156
|
|
|
1848
2157
|
//#endregion
|
|
1849
2158
|
//#region src/lib/codex-mcp-config.ts
|
|
@@ -1914,11 +2223,11 @@ function buildPeerMcpConfig(serverUrl, opts) {
|
|
|
1914
2223
|
* ExitPlanMode to default-on (env-disable-able).
|
|
1915
2224
|
*/
|
|
1916
2225
|
function buildCoordinatorAgent(opts) {
|
|
1917
|
-
const peers = ["codex-critic"];
|
|
2226
|
+
const peers = ["codex-critic", "opus-critic"];
|
|
1918
2227
|
if (opts.geminiAvailable) peers.push("gemini-critic");
|
|
1919
2228
|
peers.push("codex-reviewer");
|
|
1920
2229
|
return {
|
|
1921
|
-
description: "Coordinates cross-lab adversarial review.
|
|
2230
|
+
description: "Coordinates cross-lab adversarial review across codex-critic, opus-critic, gemini-critic, codex-reviewer. Use proactively before non-trivial plans and after non-trivial commits. Always pass artifacts verbatim — peers are fresh-context.",
|
|
1922
2231
|
prompt: [
|
|
1923
2232
|
"# Subagent: peer-review-coordinator",
|
|
1924
2233
|
"",
|
|
@@ -1934,10 +2243,11 @@ function buildCoordinatorAgent(opts) {
|
|
|
1934
2243
|
"- **Concrete diff or single file** → fan out to `codex-reviewer`" + (opts.geminiAvailable ? " AND `gemini-critic` (gemini for cross-lab triangulation)" : "") + ". For very small changes (<20 lines), one `codex-reviewer` call is enough.",
|
|
1935
2244
|
"- **Tie-breaker after codex-critic has weighed in** → call `gemini-critic`" + (opts.geminiAvailable ? "" : " (NOT REGISTERED in this session — gemini-3.x not in catalog; tie-break unavailable)") + " with the artifact AND codex-critic's verdict for cross-lab cross-check.",
|
|
1936
2245
|
"- **Long-context artifact (>100 KB)** → prefer `gemini-critic`" + (opts.geminiAvailable ? "" : " (NOT REGISTERED in this session)") + ". Otherwise, decompose into 2-4 batches and fan out across `codex-critic` calls in parallel.",
|
|
2246
|
+
"- **Fast same-lab sanity check on a moderate artifact (<5 KB)** → prefer `opus-critic` (cheapest, ~22s, only `effort: low|medium` supported). Same lab as the lead — limited blind-spot diversification, but a useful gut-check before committing to a controversial decision. For cross-lab diversification or deep dives on larger artifacts, use codex/gemini at higher effort with decomposition for >5KB.",
|
|
1937
2247
|
"",
|
|
1938
2248
|
"## Decomposition for large artifacts",
|
|
1939
2249
|
"",
|
|
1940
|
-
"Each per-call MCP wait is bounded (~
|
|
2250
|
+
"Each per-call MCP wait is bounded (~60s SDK default on Claude Code v2.1.113+ per regressions #50289 / #52137 — empirically reproduced 2026-05-14). The proxy enforces per-persona effort allowlists AND a pre-flight `predictedTooLong` cap (codex_critic@high >8 KB, codex_reviewer@high >12 KB, opus_critic@medium >6 KB) to surface would-be-timeouts as fast actionable errors. For artifacts that exceed the cap, split into 2-4 logical batches BY CONCERN (not by raw size — semantic batches give better per-batch reviews) and call peers in parallel. The proxy's MCP cap allows up to 8 in-flight calls. Aggregate findings yourself before reporting back.",
|
|
1941
2251
|
"",
|
|
1942
2252
|
"## Aggregation contract",
|
|
1943
2253
|
"",
|
|
@@ -2344,7 +2654,7 @@ function initProxyFromEnv() {
|
|
|
2344
2654
|
//#endregion
|
|
2345
2655
|
//#region package.json
|
|
2346
2656
|
var name = "github-router";
|
|
2347
|
-
var version = "0.3.
|
|
2657
|
+
var version = "0.3.24";
|
|
2348
2658
|
|
|
2349
2659
|
//#endregion
|
|
2350
2660
|
//#region src/lib/approval.ts
|
|
@@ -2903,177 +3213,6 @@ const createChatCompletions = async (payload, modelHeaders, callerSignal) => {
|
|
|
2903
3213
|
return await response.json();
|
|
2904
3214
|
};
|
|
2905
3215
|
|
|
2906
|
-
//#endregion
|
|
2907
|
-
//#region src/services/copilot/web-search.ts
|
|
2908
|
-
const RpcSchema = z.object({
|
|
2909
|
-
jsonrpc: z.literal("2.0"),
|
|
2910
|
-
id: z.number().optional(),
|
|
2911
|
-
result: z.object({
|
|
2912
|
-
content: z.array(z.object({
|
|
2913
|
-
type: z.literal("text"),
|
|
2914
|
-
text: z.string()
|
|
2915
|
-
})).optional(),
|
|
2916
|
-
isError: z.boolean().optional()
|
|
2917
|
-
}).optional(),
|
|
2918
|
-
error: z.object({
|
|
2919
|
-
code: z.number(),
|
|
2920
|
-
message: z.string()
|
|
2921
|
-
}).optional()
|
|
2922
|
-
});
|
|
2923
|
-
const InnerSchema = z.object({
|
|
2924
|
-
text: z.object({
|
|
2925
|
-
value: z.string(),
|
|
2926
|
-
annotations: z.array(z.object({ url_citation: z.object({
|
|
2927
|
-
title: z.string(),
|
|
2928
|
-
url: z.string()
|
|
2929
|
-
}).optional() })).nullable().optional()
|
|
2930
|
-
}),
|
|
2931
|
-
bing_searches: z.array(z.unknown()).nullable().optional()
|
|
2932
|
-
});
|
|
2933
|
-
const MAX_SEARCHES_PER_SECOND = 3;
|
|
2934
|
-
let searchTimestamps = [];
|
|
2935
|
-
let throttleChain = Promise.resolve();
|
|
2936
|
-
async function throttleSearch() {
|
|
2937
|
-
const myTurn = throttleChain.then(async () => {
|
|
2938
|
-
const now = Date.now();
|
|
2939
|
-
searchTimestamps = searchTimestamps.filter((t) => now - t < 1e3);
|
|
2940
|
-
if (searchTimestamps.length >= MAX_SEARCHES_PER_SECOND) {
|
|
2941
|
-
const waitMs = 1e3 - (now - searchTimestamps[0]);
|
|
2942
|
-
if (waitMs > 0) {
|
|
2943
|
-
consola.debug(`Web search rate limited, waiting ${waitMs}ms`);
|
|
2944
|
-
await sleep(waitMs);
|
|
2945
|
-
}
|
|
2946
|
-
}
|
|
2947
|
-
searchTimestamps.push(Date.now());
|
|
2948
|
-
});
|
|
2949
|
-
throttleChain = myTurn.catch(() => {});
|
|
2950
|
-
return myTurn;
|
|
2951
|
-
}
|
|
2952
|
-
function mcpHeaders(sid) {
|
|
2953
|
-
if (!state.githubToken) throw new Error("GitHub token missing — re-run auth flow. Web search uses the GitHub PAT (not the Copilot token); the on-disk token at ~/.local/share/github-router/github_token must be present.");
|
|
2954
|
-
const headers = {
|
|
2955
|
-
Authorization: `Bearer ${state.githubToken}`,
|
|
2956
|
-
"content-type": "application/json",
|
|
2957
|
-
accept: "application/json, text/event-stream",
|
|
2958
|
-
"X-MCP-Host": "copilot-cli",
|
|
2959
|
-
"X-MCP-Toolsets": "web_search",
|
|
2960
|
-
"Mcp-Protocol-Version": "2025-06-18",
|
|
2961
|
-
"user-agent": `GitHubCopilotChat/${copilotVersion(state)}`
|
|
2962
|
-
};
|
|
2963
|
-
if (sid) headers["Mcp-Session-Id"] = sid;
|
|
2964
|
-
return headers;
|
|
2965
|
-
}
|
|
2966
|
-
async function postMcp(body, sid, retry = true) {
|
|
2967
|
-
const url = `${copilotBaseUrl(state)}/mcp`;
|
|
2968
|
-
const res = await fetch(url, {
|
|
2969
|
-
method: "POST",
|
|
2970
|
-
headers: mcpHeaders(sid),
|
|
2971
|
-
body: JSON.stringify(body)
|
|
2972
|
-
});
|
|
2973
|
-
if (!res.ok && retry && res.status >= 500) {
|
|
2974
|
-
await sleep(500);
|
|
2975
|
-
return postMcp(body, sid, false);
|
|
2976
|
-
}
|
|
2977
|
-
return res;
|
|
2978
|
-
}
|
|
2979
|
-
async function searchWeb(query) {
|
|
2980
|
-
await throttleSearch();
|
|
2981
|
-
consola.info(`Web search (MCP): "${query.slice(0, 80)}"`);
|
|
2982
|
-
const callId = Math.floor(Math.random() * 1e9);
|
|
2983
|
-
let sid;
|
|
2984
|
-
try {
|
|
2985
|
-
const initRes = await postMcp({
|
|
2986
|
-
jsonrpc: "2.0",
|
|
2987
|
-
id: 1,
|
|
2988
|
-
method: "initialize",
|
|
2989
|
-
params: {
|
|
2990
|
-
protocolVersion: "2024-11-05",
|
|
2991
|
-
capabilities: {},
|
|
2992
|
-
clientInfo: {
|
|
2993
|
-
name: "GitHubCopilotChat",
|
|
2994
|
-
version: copilotVersion(state)
|
|
2995
|
-
}
|
|
2996
|
-
}
|
|
2997
|
-
});
|
|
2998
|
-
if (!initRes.ok) {
|
|
2999
|
-
consola.error("MCP initialize failed", initRes.status);
|
|
3000
|
-
throw new HTTPError("MCP initialize failed", initRes);
|
|
3001
|
-
}
|
|
3002
|
-
sid = initRes.headers.get("mcp-session-id") ?? void 0;
|
|
3003
|
-
if (!sid) throw new HTTPError("MCP initialize: missing Mcp-Session-Id header", initRes);
|
|
3004
|
-
const notifRes = await postMcp({
|
|
3005
|
-
jsonrpc: "2.0",
|
|
3006
|
-
method: "notifications/initialized"
|
|
3007
|
-
}, sid);
|
|
3008
|
-
if (!notifRes.ok && notifRes.status !== 202) {
|
|
3009
|
-
consola.error("MCP notifications/initialized failed", notifRes.status);
|
|
3010
|
-
throw new HTTPError("MCP notifications/initialized failed", notifRes);
|
|
3011
|
-
}
|
|
3012
|
-
const callRes = await postMcp({
|
|
3013
|
-
jsonrpc: "2.0",
|
|
3014
|
-
id: callId,
|
|
3015
|
-
method: "tools/call",
|
|
3016
|
-
params: {
|
|
3017
|
-
name: "web_search",
|
|
3018
|
-
arguments: { query }
|
|
3019
|
-
}
|
|
3020
|
-
}, sid);
|
|
3021
|
-
if (!callRes.ok) {
|
|
3022
|
-
consola.error("MCP tools/call failed", callRes.status);
|
|
3023
|
-
throw new HTTPError("MCP tools/call failed", callRes);
|
|
3024
|
-
}
|
|
3025
|
-
let rpc;
|
|
3026
|
-
for await (const ev of events(callRes)) {
|
|
3027
|
-
if (!ev.data) continue;
|
|
3028
|
-
let parsedJson;
|
|
3029
|
-
try {
|
|
3030
|
-
parsedJson = JSON.parse(ev.data);
|
|
3031
|
-
} catch {
|
|
3032
|
-
continue;
|
|
3033
|
-
}
|
|
3034
|
-
const parsed = RpcSchema.safeParse(parsedJson);
|
|
3035
|
-
if (parsed.success && parsed.data.id === callId) {
|
|
3036
|
-
rpc = parsed.data;
|
|
3037
|
-
break;
|
|
3038
|
-
}
|
|
3039
|
-
}
|
|
3040
|
-
if (!rpc) throw new HTTPError("MCP tools/call: no matching response id in SSE stream", callRes);
|
|
3041
|
-
if (rpc.error) throw new HTTPError(`MCP error ${rpc.error.code}: ${rpc.error.message}`, callRes);
|
|
3042
|
-
if (rpc.result?.isError) throw new HTTPError("MCP web_search tool error", callRes);
|
|
3043
|
-
const text = rpc.result?.content?.[0]?.text;
|
|
3044
|
-
if (!text) throw new HTTPError("MCP web_search: empty content", callRes);
|
|
3045
|
-
let innerRaw;
|
|
3046
|
-
try {
|
|
3047
|
-
innerRaw = JSON.parse(text);
|
|
3048
|
-
} catch (err) {
|
|
3049
|
-
throw new HTTPError(`MCP web_search: inner content not JSON: ${err instanceof Error ? err.message : String(err)}`, callRes);
|
|
3050
|
-
}
|
|
3051
|
-
const innerParsed = InnerSchema.safeParse(innerRaw);
|
|
3052
|
-
if (!innerParsed.success) throw new HTTPError(`MCP web_search: inner content shape changed (${innerParsed.error.issues.map((i) => `${i.path.join(".")}: ${i.message}`).join("; ")})`, callRes);
|
|
3053
|
-
const inner = innerParsed.data;
|
|
3054
|
-
const references = [];
|
|
3055
|
-
for (const ann of inner.text.annotations ?? []) {
|
|
3056
|
-
const cite = ann.url_citation;
|
|
3057
|
-
if (cite && !cite.url.toLowerCase().includes("bing.com/search")) references.push({
|
|
3058
|
-
title: cite.title,
|
|
3059
|
-
url: cite.url
|
|
3060
|
-
});
|
|
3061
|
-
}
|
|
3062
|
-
consola.debug(`Web search returned ${references.length} references`);
|
|
3063
|
-
return {
|
|
3064
|
-
content: inner.text.value,
|
|
3065
|
-
references
|
|
3066
|
-
};
|
|
3067
|
-
} finally {
|
|
3068
|
-
if (sid) try {
|
|
3069
|
-
fetch(`${copilotBaseUrl(state)}/mcp`, {
|
|
3070
|
-
method: "DELETE",
|
|
3071
|
-
headers: mcpHeaders(sid)
|
|
3072
|
-
}).catch(() => {});
|
|
3073
|
-
} catch {}
|
|
3074
|
-
}
|
|
3075
|
-
}
|
|
3076
|
-
|
|
3077
3216
|
//#endregion
|
|
3078
3217
|
//#region src/routes/chat-completions/handler.ts
|
|
3079
3218
|
const ENCODER$2 = new TextEncoder();
|
|
@@ -3299,6 +3438,125 @@ embeddingRoutes.post("/", async (c) => {
|
|
|
3299
3438
|
}
|
|
3300
3439
|
});
|
|
3301
3440
|
|
|
3441
|
+
//#endregion
|
|
3442
|
+
//#region src/services/copilot/create-messages.ts
|
|
3443
|
+
/**
|
|
3444
|
+
* Build headers that match what VS Code Copilot Chat sends to the Copilot API.
|
|
3445
|
+
*
|
|
3446
|
+
* copilotHeaders() provides: Authorization, content-type, copilot-integration-id,
|
|
3447
|
+
* editor-version, editor-plugin-version, user-agent, openai-intent,
|
|
3448
|
+
* x-github-api-version, x-request-id, x-vscode-user-agent-library-version.
|
|
3449
|
+
*
|
|
3450
|
+
* We add the remaining headers VS Code sends for /v1/messages:
|
|
3451
|
+
* - X-Initiator (VS Code sets dynamically; "agent" is safe for CLI use)
|
|
3452
|
+
* - anthropic-version (VS Code's Anthropic SDK sends this)
|
|
3453
|
+
* - X-Interaction-Id (VS Code sends a session-scoped UUID)
|
|
3454
|
+
*
|
|
3455
|
+
* We intentionally omit copilot-vision-request — VS Code only sends it when
|
|
3456
|
+
* images are present, and the native /v1/messages endpoint handles vision
|
|
3457
|
+
* without requiring the header.
|
|
3458
|
+
*
|
|
3459
|
+
* extraHeaders allows callers to forward client-supplied beta headers
|
|
3460
|
+
* (anthropic-beta) so Copilot enables extended features.
|
|
3461
|
+
*/
|
|
3462
|
+
function buildHeaders(extraHeaders) {
|
|
3463
|
+
return {
|
|
3464
|
+
...copilotHeaders(state),
|
|
3465
|
+
accept: "application/json",
|
|
3466
|
+
"openai-intent": "messages-proxy",
|
|
3467
|
+
"x-interaction-type": "conversation-agent",
|
|
3468
|
+
"X-Initiator": "agent",
|
|
3469
|
+
"anthropic-version": "2023-06-01",
|
|
3470
|
+
"X-Interaction-Id": randomUUID(),
|
|
3471
|
+
...extraHeaders
|
|
3472
|
+
};
|
|
3473
|
+
}
|
|
3474
|
+
/**
|
|
3475
|
+
* Forward an Anthropic Messages API request to Copilot's native /v1/messages endpoint.
|
|
3476
|
+
* Returns the raw Response so callers can handle streaming vs non-streaming.
|
|
3477
|
+
*
|
|
3478
|
+
* `callerSignal` (optional) is composed with the standard
|
|
3479
|
+
* UPSTREAM_FETCH_TIMEOUT_MS via AbortSignal.any so callers (e.g. the
|
|
3480
|
+
* peer-MCP `opus-critic` persona) can cancel the upstream call when
|
|
3481
|
+
* Claude Code's MCP per-tool-call ceiling fires. Mirrors the pattern
|
|
3482
|
+
* in createResponses / createChatCompletions.
|
|
3483
|
+
*/
|
|
3484
|
+
async function createMessages(body, extraHeaders, callerSignal) {
|
|
3485
|
+
if (!state.copilotToken) throw new Error("Copilot token not found");
|
|
3486
|
+
const url = `${copilotBaseUrl(state)}/v1/messages?beta=true`;
|
|
3487
|
+
consola.debug(`Forwarding to ${url}`);
|
|
3488
|
+
const doFetch = () => {
|
|
3489
|
+
const fetchInit = {
|
|
3490
|
+
method: "POST",
|
|
3491
|
+
headers: buildHeaders(extraHeaders),
|
|
3492
|
+
body
|
|
3493
|
+
};
|
|
3494
|
+
const signals = [];
|
|
3495
|
+
if (UPSTREAM_FETCH_TIMEOUT_MS > 0) signals.push(AbortSignal.timeout(UPSTREAM_FETCH_TIMEOUT_MS));
|
|
3496
|
+
if (callerSignal) signals.push(callerSignal);
|
|
3497
|
+
if (signals.length === 1) fetchInit.signal = signals[0];
|
|
3498
|
+
else if (signals.length > 1) fetchInit.signal = AbortSignal.any(signals);
|
|
3499
|
+
return fetch(url, fetchInit);
|
|
3500
|
+
};
|
|
3501
|
+
const response = await tryRefreshAndRetry(doFetch, "/v1/messages");
|
|
3502
|
+
if (!response.ok) {
|
|
3503
|
+
let errorBody = "";
|
|
3504
|
+
try {
|
|
3505
|
+
errorBody = await response.text();
|
|
3506
|
+
} catch {
|
|
3507
|
+
errorBody = "(could not read error body)";
|
|
3508
|
+
}
|
|
3509
|
+
consola.error(`Copilot /v1/messages error: ${response.status} ${errorBody}`);
|
|
3510
|
+
throw new HTTPError("Copilot messages request failed", new Response(errorBody, {
|
|
3511
|
+
status: response.status,
|
|
3512
|
+
statusText: response.statusText,
|
|
3513
|
+
headers: response.headers
|
|
3514
|
+
}));
|
|
3515
|
+
}
|
|
3516
|
+
return response;
|
|
3517
|
+
}
|
|
3518
|
+
/**
|
|
3519
|
+
* Forward an Anthropic count_tokens request to Copilot's native endpoint.
|
|
3520
|
+
* Returns the raw Response.
|
|
3521
|
+
*
|
|
3522
|
+
* `callerSignal` is composed with UPSTREAM_FETCH_TIMEOUT_MS — same pattern
|
|
3523
|
+
* as createMessages.
|
|
3524
|
+
*/
|
|
3525
|
+
async function countTokens(body, extraHeaders, callerSignal) {
|
|
3526
|
+
if (!state.copilotToken) throw new Error("Copilot token not found");
|
|
3527
|
+
const url = `${copilotBaseUrl(state)}/v1/messages/count_tokens?beta=true`;
|
|
3528
|
+
consola.debug(`Forwarding to ${url}`);
|
|
3529
|
+
const doFetch = () => {
|
|
3530
|
+
const fetchInit = {
|
|
3531
|
+
method: "POST",
|
|
3532
|
+
headers: buildHeaders(extraHeaders),
|
|
3533
|
+
body
|
|
3534
|
+
};
|
|
3535
|
+
const signals = [];
|
|
3536
|
+
if (UPSTREAM_FETCH_TIMEOUT_MS > 0) signals.push(AbortSignal.timeout(UPSTREAM_FETCH_TIMEOUT_MS));
|
|
3537
|
+
if (callerSignal) signals.push(callerSignal);
|
|
3538
|
+
if (signals.length === 1) fetchInit.signal = signals[0];
|
|
3539
|
+
else if (signals.length > 1) fetchInit.signal = AbortSignal.any(signals);
|
|
3540
|
+
return fetch(url, fetchInit);
|
|
3541
|
+
};
|
|
3542
|
+
const response = await tryRefreshAndRetry(doFetch, "/v1/messages/count_tokens");
|
|
3543
|
+
if (!response.ok) {
|
|
3544
|
+
let errorBody = "";
|
|
3545
|
+
try {
|
|
3546
|
+
errorBody = await response.text();
|
|
3547
|
+
} catch {
|
|
3548
|
+
errorBody = "(could not read error body)";
|
|
3549
|
+
}
|
|
3550
|
+
consola.error(`Copilot count_tokens error: ${response.status} ${errorBody}`);
|
|
3551
|
+
throw new HTTPError("Copilot count_tokens request failed", new Response(errorBody, {
|
|
3552
|
+
status: response.status,
|
|
3553
|
+
statusText: response.statusText,
|
|
3554
|
+
headers: response.headers
|
|
3555
|
+
}));
|
|
3556
|
+
}
|
|
3557
|
+
return response;
|
|
3558
|
+
}
|
|
3559
|
+
|
|
3302
3560
|
//#endregion
|
|
3303
3561
|
//#region src/services/copilot/create-responses.ts
|
|
3304
3562
|
const createResponses = async (payload, modelHeaders, callerSignal) => {
|
|
@@ -3360,27 +3618,6 @@ function detectAgentCall(input) {
|
|
|
3360
3618
|
const MCP_PROTOCOL_VERSION = "2025-06-18";
|
|
3361
3619
|
const SERVER_NAME = "github-router-peers";
|
|
3362
3620
|
const SERVER_VERSION = "1";
|
|
3363
|
-
/**
|
|
3364
|
-
* Reasoning effort levels accepted by Copilot's /v1/responses (gpt-5.x) and
|
|
3365
|
-
* /v1/chat/completions endpoints. Per the proxy's existing thinking-mode
|
|
3366
|
-
* translator (CLAUDE.md "Thinking-mode translation"), Copilot's adaptive-
|
|
3367
|
-
* thinking path uses these same buckets:
|
|
3368
|
-
* <2k tokens → low, <8k → medium, <24k → high, else → xhigh.
|
|
3369
|
-
*
|
|
3370
|
-
* Default `high` for peer reviews — adversarial-by-design but still cost-
|
|
3371
|
-
* conscious. Callers can pass `xhigh` explicitly for deep dives, or `medium`
|
|
3372
|
-
* for quick sanity checks.
|
|
3373
|
-
*/
|
|
3374
|
-
const EFFORT_LEVELS = [
|
|
3375
|
-
"low",
|
|
3376
|
-
"medium",
|
|
3377
|
-
"high",
|
|
3378
|
-
"xhigh"
|
|
3379
|
-
];
|
|
3380
|
-
const DEFAULT_EFFORT = "high";
|
|
3381
|
-
function isEffort(v) {
|
|
3382
|
-
return typeof v === "string" && EFFORT_LEVELS.includes(v);
|
|
3383
|
-
}
|
|
3384
3621
|
/** Bounded concurrency. Originally capped at 2 (commit 4317a25) as a defensive
|
|
3385
3622
|
* pre-launch guess against Opus's natural pattern of fanning out to all three
|
|
3386
3623
|
* critics at once. Raised to 8 (Phase 2D of the peer-MCP plan) so the
|
|
@@ -3485,10 +3722,10 @@ function geminiAvailable() {
|
|
|
3485
3722
|
return models.some((m) => /^gemini-3\..*pro/i.test(m.id));
|
|
3486
3723
|
}
|
|
3487
3724
|
function activePersonas() {
|
|
3488
|
-
return PERSONAS_READ.filter((p) => !p.
|
|
3725
|
+
return PERSONAS_READ.filter((p) => !p.requiresGeminiCatalog || geminiAvailable());
|
|
3489
3726
|
}
|
|
3490
3727
|
function toolEntries() {
|
|
3491
|
-
|
|
3728
|
+
const personaEntries = activePersonas().map((p) => ({
|
|
3492
3729
|
name: p.toolNameHttp,
|
|
3493
3730
|
description: p.description,
|
|
3494
3731
|
inputSchema: {
|
|
@@ -3506,12 +3743,18 @@ function toolEntries() {
|
|
|
3506
3743
|
},
|
|
3507
3744
|
effort: {
|
|
3508
3745
|
type: "string",
|
|
3509
|
-
enum: [...
|
|
3510
|
-
description: `Reasoning depth (
|
|
3746
|
+
enum: [...p.allowedEfforts],
|
|
3747
|
+
description: `Reasoning depth (${p.allowedEfforts.join(" | ")}). Default "${p.defaultEffort}". Higher tiers cost more wall-clock; lower tiers are quicker sanity checks. ` + (p.endpoint === "/v1/chat/completions" ? "Note: for gemini routed via /v1/chat/completions, the upstream may silently ignore this knob." : "")
|
|
3511
3748
|
}
|
|
3512
3749
|
}
|
|
3513
3750
|
}
|
|
3514
3751
|
}));
|
|
3752
|
+
const nonPersonaEntries = NON_PERSONA_MCP_TOOLS.map((t) => ({
|
|
3753
|
+
name: t.toolNameHttp,
|
|
3754
|
+
description: t.description,
|
|
3755
|
+
inputSchema: t.inputSchema
|
|
3756
|
+
}));
|
|
3757
|
+
return [...personaEntries, ...nonPersonaEntries];
|
|
3515
3758
|
}
|
|
3516
3759
|
function buildUserText(prompt, context) {
|
|
3517
3760
|
if (!context) return prompt;
|
|
@@ -3539,6 +3782,11 @@ function extractChatCompletionText(response) {
|
|
|
3539
3782
|
const c = choice.message?.content;
|
|
3540
3783
|
return typeof c === "string" ? c : "";
|
|
3541
3784
|
}
|
|
3785
|
+
function extractMessagesText(response) {
|
|
3786
|
+
const out = [];
|
|
3787
|
+
for (const block of response.content ?? []) if (block.type === "text" && typeof block.text === "string") out.push(block.text);
|
|
3788
|
+
return out.join("");
|
|
3789
|
+
}
|
|
3542
3790
|
function toolError(message) {
|
|
3543
3791
|
return {
|
|
3544
3792
|
content: [{
|
|
@@ -3548,6 +3796,94 @@ function toolError(message) {
|
|
|
3548
3796
|
isError: true
|
|
3549
3797
|
};
|
|
3550
3798
|
}
|
|
3799
|
+
/**
|
|
3800
|
+
* Empirical pre-flight cap to convert "would-bust-the-60s-MCP-ceiling"
|
|
3801
|
+
* calls into fast actionable errors instead of slot-leaking timeouts.
|
|
3802
|
+
*
|
|
3803
|
+
* Probed live against Copilot 2026-05-14:
|
|
3804
|
+
* gpt-5.5 high on a ~600B prompt = 23.8s → ~76s on 8KB (rough linear)
|
|
3805
|
+
* gpt-5.3-codex high on ~600B = 16.0s → ~64s on 12KB
|
|
3806
|
+
* claude-opus-4-7 medium (thinking=3000) on a trivial prompt = 22.5s
|
|
3807
|
+
* but model self-paces budget → ~50s+ on a real ~6KB review
|
|
3808
|
+
*
|
|
3809
|
+
* Returns `{tooLong: true, capBytes}` when the (persona, effort, briefBytes)
|
|
3810
|
+
* tuple is empirically predicted to bust the 60s ceiling.
|
|
3811
|
+
*
|
|
3812
|
+
* SCOPE: the cap is JSON-PATH ONLY. Callers (handleMcpPost) MUST gate
|
|
3813
|
+
* the call site by `!acceptsEventStream(...)`. The SSE path
|
|
3814
|
+
* (handleToolsCallSSE) keeps the connection open past the 60s ceiling
|
|
3815
|
+
* via heartbeats — size-based pre-flight rejection there would just
|
|
3816
|
+
* lock SSE clients out of their primary advantage. JSON-path clients
|
|
3817
|
+
* (raw curl with `Accept: application/json`, older MCP clients without
|
|
3818
|
+
* SSE awareness) DO still hit the underlying tools/call timer, so the
|
|
3819
|
+
* cap is the only way to surface a fast actionable error there
|
|
3820
|
+
* instead of a slot-leaking timeout.
|
|
3821
|
+
*
|
|
3822
|
+
* INVARIANT: pre-flight MUST fire BEFORE inFlightToolsCall++ — the
|
|
3823
|
+
* slot must not be acquired for a rejected pre-flight. handleMcpPost
|
|
3824
|
+
* runs the check before delegating to handleRpc → handleToolsCall (the
|
|
3825
|
+
* function that increments the counter). Documented in CLAUDE.md.
|
|
3826
|
+
*
|
|
3827
|
+
* gemini_critic has no cap (long-context model + Copilot may auto-pace).
|
|
3828
|
+
*/
|
|
3829
|
+
const PRE_FLIGHT_CAPS = [
|
|
3830
|
+
{
|
|
3831
|
+
toolName: "codex_critic",
|
|
3832
|
+
effort: "high",
|
|
3833
|
+
maxBriefBytes: 8 * 1024
|
|
3834
|
+
},
|
|
3835
|
+
{
|
|
3836
|
+
toolName: "codex_reviewer",
|
|
3837
|
+
effort: "high",
|
|
3838
|
+
maxBriefBytes: 12 * 1024
|
|
3839
|
+
},
|
|
3840
|
+
{
|
|
3841
|
+
toolName: "opus_critic",
|
|
3842
|
+
effort: "medium",
|
|
3843
|
+
maxBriefBytes: 6 * 1024
|
|
3844
|
+
}
|
|
3845
|
+
];
|
|
3846
|
+
function predictedTooLong(persona, effort, briefBytes) {
|
|
3847
|
+
for (const cap of PRE_FLIGHT_CAPS) if (cap.toolName === persona.toolNameHttp && cap.effort === effort && briefBytes > cap.maxBriefBytes) return {
|
|
3848
|
+
tooLong: true,
|
|
3849
|
+
capBytes: cap.maxBriefBytes
|
|
3850
|
+
};
|
|
3851
|
+
return { tooLong: false };
|
|
3852
|
+
}
|
|
3853
|
+
/**
|
|
3854
|
+
* JSON-path pre-flight predictedTooLong gate. Returns a JSON-RPC result
|
|
3855
|
+
* body wrapping a tool-error envelope when the call would bust the 60s
|
|
3856
|
+
* tools/call ceiling on the JSON path; returns undefined when the call
|
|
3857
|
+
* should proceed normally.
|
|
3858
|
+
*
|
|
3859
|
+
* Skips the check (returns undefined) for any shape problem so
|
|
3860
|
+
* handleRpc can return the canonical JSON-RPC error code instead:
|
|
3861
|
+
* - notification (no id) → handleRpc returns 202 + empty body
|
|
3862
|
+
* - missing/unknown name → handleRpc returns -32601
|
|
3863
|
+
* - missing prompt → handleRpc returns -32602
|
|
3864
|
+
* - invalid effort string → handleRpc returns -32602
|
|
3865
|
+
* - effort not in persona.allowedEfforts → handleRpc returns -32602
|
|
3866
|
+
*/
|
|
3867
|
+
function jsonPathPreflightCap(body) {
|
|
3868
|
+
if (body.id === void 0) return void 0;
|
|
3869
|
+
const params = body.params ?? {};
|
|
3870
|
+
const name$1 = typeof params.name === "string" ? params.name : "";
|
|
3871
|
+
const args = params.arguments ?? {};
|
|
3872
|
+
const prompt = typeof args.prompt === "string" ? args.prompt : "";
|
|
3873
|
+
const context = typeof args.context === "string" ? args.context : void 0;
|
|
3874
|
+
const rawEffort = args.effort;
|
|
3875
|
+
if (!name$1 || !prompt) return void 0;
|
|
3876
|
+
const persona = activePersonas().find((p) => p.toolNameHttp === name$1);
|
|
3877
|
+
if (!persona) return void 0;
|
|
3878
|
+
if (rawEffort !== void 0 && !isEffort(rawEffort)) return void 0;
|
|
3879
|
+
const effortMaybe = rawEffort;
|
|
3880
|
+
if (effortMaybe !== void 0 && !persona.allowedEfforts.includes(effortMaybe)) return;
|
|
3881
|
+
const effort = effortMaybe ?? persona.defaultEffort;
|
|
3882
|
+
const briefBytes = Buffer.byteLength(buildUserText(prompt, context), "utf8");
|
|
3883
|
+
const verdict = predictedTooLong(persona, effort, briefBytes);
|
|
3884
|
+
if (!verdict.tooLong) return void 0;
|
|
3885
|
+
return rpcResult(body.id, toolError(`pre-flight rejected: ${persona.toolNameHttp} at effort=${effort} on a ${briefBytes}-byte brief is empirically predicted to exceed the JSON tools/call timeout (cap=${verdict.capBytes} bytes for this tier). Either drop to a lower effort tier, split the brief into 2-4 parallel sub-calls per the decomposition guidance, or send Accept: text/event-stream to use the SSE path which bypasses this cap.`));
|
|
3886
|
+
}
|
|
3551
3887
|
async function callPersona(persona, prompt, context, effort, signal) {
|
|
3552
3888
|
const resolvedModel = resolveModel(persona.model);
|
|
3553
3889
|
const userText = buildUserText(prompt, context);
|
|
@@ -3571,6 +3907,25 @@ async function callPersona(persona, prompt, context, effort, signal) {
|
|
|
3571
3907
|
text: text$1
|
|
3572
3908
|
}] };
|
|
3573
3909
|
}
|
|
3910
|
+
if (persona.endpoint === "/v1/messages") {
|
|
3911
|
+
const maxTokens = effort === "low" ? 4096 : effort === "medium" ? 8192 : effort === "high" ? 16384 : 32768;
|
|
3912
|
+
const text$1 = extractMessagesText(await (await createMessages(JSON.stringify({
|
|
3913
|
+
model: resolvedModel,
|
|
3914
|
+
max_tokens: maxTokens,
|
|
3915
|
+
system: persona.baseInstructions,
|
|
3916
|
+
thinking: { type: "adaptive" },
|
|
3917
|
+
output_config: { effort },
|
|
3918
|
+
messages: [{
|
|
3919
|
+
role: "user",
|
|
3920
|
+
content: userText
|
|
3921
|
+
}]
|
|
3922
|
+
}), void 0, signal)).json());
|
|
3923
|
+
if (!text$1) return toolError(`persona ${persona.agentName}: empty assistant output`);
|
|
3924
|
+
return { content: [{
|
|
3925
|
+
type: "text",
|
|
3926
|
+
text: text$1
|
|
3927
|
+
}] };
|
|
3928
|
+
}
|
|
3574
3929
|
const text = extractChatCompletionText(await createChatCompletions({
|
|
3575
3930
|
model: resolvedModel,
|
|
3576
3931
|
messages: [{
|
|
@@ -3604,17 +3959,23 @@ async function handleToolsCall(body) {
|
|
|
3604
3959
|
const params = body.params ?? {};
|
|
3605
3960
|
const name$1 = typeof params.name === "string" ? params.name : "";
|
|
3606
3961
|
const args = params.arguments ?? {};
|
|
3607
|
-
const prompt = typeof args.prompt === "string" ? args.prompt : "";
|
|
3608
|
-
const context = typeof args.context === "string" ? args.context : void 0;
|
|
3609
|
-
let effort = DEFAULT_EFFORT;
|
|
3610
|
-
if (args.effort !== void 0) {
|
|
3611
|
-
if (!isEffort(args.effort)) return rpcError(body.id, RPC_INVALID_PARAMS, `tools/call: arguments.effort must be one of ${EFFORT_LEVELS.join("|")}; got ${JSON.stringify(args.effort)}`);
|
|
3612
|
-
effort = args.effort;
|
|
3613
|
-
}
|
|
3614
3962
|
if (!name$1) return rpcError(body.id, RPC_INVALID_PARAMS, "tools/call missing name");
|
|
3615
3963
|
const persona = activePersonas().find((p) => p.toolNameHttp === name$1);
|
|
3616
|
-
|
|
3617
|
-
if (!
|
|
3964
|
+
const nonPersonaTool = persona ? void 0 : NON_PERSONA_MCP_TOOLS.find((t) => t.toolNameHttp === name$1);
|
|
3965
|
+
if (!persona && !nonPersonaTool) return rpcError(body.id, RPC_METHOD_NOT_FOUND, `tools/call: unknown tool "${name$1}"`);
|
|
3966
|
+
let personaPrompt;
|
|
3967
|
+
let personaContext;
|
|
3968
|
+
let personaEffort;
|
|
3969
|
+
if (persona) {
|
|
3970
|
+
if (args.effort !== void 0 && !isEffort(args.effort)) return rpcError(body.id, RPC_INVALID_PARAMS, `tools/call: arguments.effort must be one of ${EFFORT_LEVELS.join("|")}; got ${JSON.stringify(args.effort)}`);
|
|
3971
|
+
const requestedEffort = args.effort;
|
|
3972
|
+
const prompt = typeof args.prompt === "string" ? args.prompt : "";
|
|
3973
|
+
if (!prompt) return rpcError(body.id, RPC_INVALID_PARAMS, `tools/call: arguments.prompt is required`);
|
|
3974
|
+
personaPrompt = prompt;
|
|
3975
|
+
personaContext = typeof args.context === "string" ? args.context : void 0;
|
|
3976
|
+
if (requestedEffort !== void 0 && !persona.allowedEfforts.includes(requestedEffort)) return rpcError(body.id, RPC_INVALID_PARAMS, `tools/call: persona "${persona.toolNameHttp}" does not accept effort="${requestedEffort}". Allowed: ${persona.allowedEfforts.join("|")}.`);
|
|
3977
|
+
personaEffort = requestedEffort ?? persona.defaultEffort;
|
|
3978
|
+
}
|
|
3618
3979
|
if (inFlightToolsCall >= MAX_INFLIGHT_TOOLS_CALL) return rpcResult(body.id, {
|
|
3619
3980
|
content: [{
|
|
3620
3981
|
type: "text",
|
|
@@ -3630,11 +3991,13 @@ async function handleToolsCall(body) {
|
|
|
3630
3991
|
aborter = new AbortController();
|
|
3631
3992
|
inflightAborts.set(abortKey, aborter);
|
|
3632
3993
|
}
|
|
3994
|
+
const telemetryName = persona ? persona.agentName : nonPersonaTool.toolNameHttp;
|
|
3995
|
+
const telemetryModel = persona ? persona.model : "(non-persona)";
|
|
3633
3996
|
try {
|
|
3634
|
-
const result = await callPersona(persona,
|
|
3997
|
+
const result = persona ? await callPersona(persona, personaPrompt, personaContext, personaEffort, aborter?.signal) : await nonPersonaTool.handler(args, aborter?.signal);
|
|
3635
3998
|
logTelemetry({
|
|
3636
|
-
name:
|
|
3637
|
-
model:
|
|
3999
|
+
name: telemetryName,
|
|
4000
|
+
model: telemetryModel,
|
|
3638
4001
|
durationMs: Date.now() - startedAt,
|
|
3639
4002
|
result: result.isError ? "isError" : "ok"
|
|
3640
4003
|
});
|
|
@@ -3642,8 +4005,8 @@ async function handleToolsCall(body) {
|
|
|
3642
4005
|
} catch (err) {
|
|
3643
4006
|
const message = err instanceof Error ? err.message : String(err);
|
|
3644
4007
|
logTelemetry({
|
|
3645
|
-
name:
|
|
3646
|
-
model:
|
|
4008
|
+
name: telemetryName,
|
|
4009
|
+
model: telemetryModel,
|
|
3647
4010
|
durationMs: Date.now() - startedAt,
|
|
3648
4011
|
result: "exception",
|
|
3649
4012
|
errorMessage: message
|
|
@@ -3651,7 +4014,7 @@ async function handleToolsCall(body) {
|
|
|
3651
4014
|
return rpcResult(body.id, {
|
|
3652
4015
|
content: [{
|
|
3653
4016
|
type: "text",
|
|
3654
|
-
text: `persona ${persona.agentName} failed: ${message}`
|
|
4017
|
+
text: persona ? `persona ${persona.agentName} failed: ${message}` : `tool ${nonPersonaTool.toolNameHttp} failed: ${message}`
|
|
3655
4018
|
}],
|
|
3656
4019
|
isError: true
|
|
3657
4020
|
});
|
|
@@ -3814,6 +4177,11 @@ async function handleMcpPost(c) {
|
|
|
3814
4177
|
consola.debug("/mcp parse error:", err);
|
|
3815
4178
|
return c.json(rpcError(null, RPC_PARSE_ERROR, "request body is not valid JSON"), 200);
|
|
3816
4179
|
}
|
|
4180
|
+
if (typeof body === "object" && body !== null && !Array.isArray(body) && body.method === "tools/call" && acceptsEventStream(c.req.header("accept"))) return handleToolsCallSSE(body);
|
|
4181
|
+
if (typeof body === "object" && body !== null && !Array.isArray(body) && body.method === "tools/call") {
|
|
4182
|
+
const preflight = jsonPathPreflightCap(body);
|
|
4183
|
+
if (preflight) return c.json(preflight, 200);
|
|
4184
|
+
}
|
|
3817
4185
|
try {
|
|
3818
4186
|
const { status, body: respBody } = await handleRpc(c, body);
|
|
3819
4187
|
if (respBody === null) return c.body(null, status);
|
|
@@ -3824,6 +4192,111 @@ async function handleMcpPost(c) {
|
|
|
3824
4192
|
return c.json(rpcError(echoId, RPC_INTERNAL_ERROR, err instanceof Error ? err.message : String(err)), 200);
|
|
3825
4193
|
}
|
|
3826
4194
|
}
|
|
4195
|
+
/**
|
|
4196
|
+
* Accept-header parsing for MCP Streamable HTTP. Per MCP 2025-06-18
|
|
4197
|
+
* spec, clients send `Accept: application/json, text/event-stream` to
|
|
4198
|
+
* indicate they can consume either response shape. Server picks; for
|
|
4199
|
+
* tools/call we pick SSE because Claude Code's per-tool-call timer
|
|
4200
|
+
* (~60s on v2.1.113+) does not fire on streamed responses.
|
|
4201
|
+
*
|
|
4202
|
+
* Lenient parse: split on commas, strip params (q-values, charset),
|
|
4203
|
+
* trim, lowercase, look for the SSE token. Returns false on undefined
|
|
4204
|
+
* / empty / strict-JSON-only Accept.
|
|
4205
|
+
*/
|
|
4206
|
+
function acceptsEventStream(accept) {
|
|
4207
|
+
if (!accept) return false;
|
|
4208
|
+
return accept.toLowerCase().split(",").map((t) => t.split(";")[0].trim()).includes("text/event-stream");
|
|
4209
|
+
}
|
|
4210
|
+
/**
|
|
4211
|
+
* SSE-streamed response for a single tools/call. Delegates the actual
|
|
4212
|
+
* upstream call to `handleToolsCall` (so the per-persona effort gate,
|
|
4213
|
+
* predictedTooLong cap, AbortController registration, telemetry, and
|
|
4214
|
+
* inFlight slot accounting all run identically); wraps the awaited
|
|
4215
|
+
* result in an SSE envelope with periodic heartbeats while the upstream
|
|
4216
|
+
* fetch is in flight.
|
|
4217
|
+
*
|
|
4218
|
+
* SSE event format (per MCP Streamable HTTP):
|
|
4219
|
+
* event: message
|
|
4220
|
+
* data: <json-rpc-2.0 message>\n\n
|
|
4221
|
+
*
|
|
4222
|
+
* - Heartbeats are JSON-RPC `notifications/progress` notifications with
|
|
4223
|
+
* the request id as `progressToken` (per MCP progress-notification spec).
|
|
4224
|
+
* - The final message is the JSON-RPC response envelope returned by
|
|
4225
|
+
* handleToolsCall — same structure as the JSON-path response.
|
|
4226
|
+
* - On consumer cancel (ReadableStream.cancel), the heartbeat interval
|
|
4227
|
+
* is cleared and the inFlight slot's AbortController is signalled
|
|
4228
|
+
* (handleToolsCall observes the abort and returns an error envelope
|
|
4229
|
+
* that we drop unwritten — controller is already closed).
|
|
4230
|
+
*
|
|
4231
|
+
* Per CLAUDE.md "Stream lifecycle" / "The smoking gun" rules: every
|
|
4232
|
+
* controller.enqueue/close is wrapped in a try/catch that swallows the
|
|
4233
|
+
* "Invalid state: Controller is already closed" race without warning.
|
|
4234
|
+
*/
|
|
4235
|
+
const SSE_HEARTBEAT_INTERVAL_MS = 5e3;
|
|
4236
|
+
async function handleToolsCallSSE(body) {
|
|
4237
|
+
const encoder = new TextEncoder();
|
|
4238
|
+
const callPromise = handleToolsCall(body);
|
|
4239
|
+
const stream = new ReadableStream({
|
|
4240
|
+
async start(controller) {
|
|
4241
|
+
let closed = false;
|
|
4242
|
+
const safeEnqueue = (chunk) => {
|
|
4243
|
+
if (closed) return;
|
|
4244
|
+
try {
|
|
4245
|
+
controller.enqueue(chunk);
|
|
4246
|
+
} catch (err) {
|
|
4247
|
+
consola.debug("/mcp SSE enqueue after close (expected race):", err);
|
|
4248
|
+
closed = true;
|
|
4249
|
+
}
|
|
4250
|
+
};
|
|
4251
|
+
const safeClose = () => {
|
|
4252
|
+
if (closed) return;
|
|
4253
|
+
closed = true;
|
|
4254
|
+
try {
|
|
4255
|
+
controller.close();
|
|
4256
|
+
} catch (err) {
|
|
4257
|
+
consola.debug("/mcp SSE close after close:", err);
|
|
4258
|
+
}
|
|
4259
|
+
};
|
|
4260
|
+
const sseFrame = (rpcMessage) => encoder.encode(`event: message\ndata: ${JSON.stringify(rpcMessage)}\n\n`);
|
|
4261
|
+
const heartbeatFrame = () => sseFrame({
|
|
4262
|
+
jsonrpc: "2.0",
|
|
4263
|
+
method: "notifications/progress",
|
|
4264
|
+
params: {
|
|
4265
|
+
progressToken: body.id ?? null,
|
|
4266
|
+
progress: 0,
|
|
4267
|
+
message: "in flight"
|
|
4268
|
+
}
|
|
4269
|
+
});
|
|
4270
|
+
safeEnqueue(heartbeatFrame());
|
|
4271
|
+
const heartbeatHandle = setInterval(() => safeEnqueue(heartbeatFrame()), SSE_HEARTBEAT_INTERVAL_MS);
|
|
4272
|
+
try {
|
|
4273
|
+
safeEnqueue(sseFrame(await callPromise));
|
|
4274
|
+
} catch (err) {
|
|
4275
|
+
consola.error("/mcp SSE upstream error:", err);
|
|
4276
|
+
safeEnqueue(sseFrame(rpcError(body.id ?? null, RPC_INTERNAL_ERROR, err instanceof Error ? err.message : String(err))));
|
|
4277
|
+
} finally {
|
|
4278
|
+
clearInterval(heartbeatHandle);
|
|
4279
|
+
safeClose();
|
|
4280
|
+
}
|
|
4281
|
+
},
|
|
4282
|
+
cancel() {
|
|
4283
|
+
const abortKey = body.id !== void 0 && body.id !== null ? body.id : void 0;
|
|
4284
|
+
if (abortKey !== void 0) {
|
|
4285
|
+
const aborter = inflightAborts.get(abortKey);
|
|
4286
|
+
if (aborter) aborter.abort(/* @__PURE__ */ new Error("client disconnected SSE stream"));
|
|
4287
|
+
}
|
|
4288
|
+
}
|
|
4289
|
+
});
|
|
4290
|
+
return new Response(stream, {
|
|
4291
|
+
status: 200,
|
|
4292
|
+
headers: {
|
|
4293
|
+
"Content-Type": "text/event-stream",
|
|
4294
|
+
"Cache-Control": "no-cache, no-transform",
|
|
4295
|
+
"Connection": "keep-alive",
|
|
4296
|
+
"X-Accel-Buffering": "no"
|
|
4297
|
+
}
|
|
4298
|
+
});
|
|
4299
|
+
}
|
|
3827
4300
|
function handleMcpDelete(c) {
|
|
3828
4301
|
const auth$1 = checkAuth(c);
|
|
3829
4302
|
if (!auth$1.ok) return c.json(rpcError(null, RPC_INVALID_REQUEST, auth$1.reason), auth$1.status);
|
|
@@ -3848,108 +4321,6 @@ mcpRoutes.delete("/", (c) => {
|
|
|
3848
4321
|
}
|
|
3849
4322
|
});
|
|
3850
4323
|
|
|
3851
|
-
//#endregion
|
|
3852
|
-
//#region src/services/copilot/create-messages.ts
|
|
3853
|
-
/**
|
|
3854
|
-
* Build headers that match what VS Code Copilot Chat sends to the Copilot API.
|
|
3855
|
-
*
|
|
3856
|
-
* copilotHeaders() provides: Authorization, content-type, copilot-integration-id,
|
|
3857
|
-
* editor-version, editor-plugin-version, user-agent, openai-intent,
|
|
3858
|
-
* x-github-api-version, x-request-id, x-vscode-user-agent-library-version.
|
|
3859
|
-
*
|
|
3860
|
-
* We add the remaining headers VS Code sends for /v1/messages:
|
|
3861
|
-
* - X-Initiator (VS Code sets dynamically; "agent" is safe for CLI use)
|
|
3862
|
-
* - anthropic-version (VS Code's Anthropic SDK sends this)
|
|
3863
|
-
* - X-Interaction-Id (VS Code sends a session-scoped UUID)
|
|
3864
|
-
*
|
|
3865
|
-
* We intentionally omit copilot-vision-request — VS Code only sends it when
|
|
3866
|
-
* images are present, and the native /v1/messages endpoint handles vision
|
|
3867
|
-
* without requiring the header.
|
|
3868
|
-
*
|
|
3869
|
-
* extraHeaders allows callers to forward client-supplied beta headers
|
|
3870
|
-
* (anthropic-beta) so Copilot enables extended features.
|
|
3871
|
-
*/
|
|
3872
|
-
function buildHeaders(extraHeaders) {
|
|
3873
|
-
return {
|
|
3874
|
-
...copilotHeaders(state),
|
|
3875
|
-
accept: "application/json",
|
|
3876
|
-
"openai-intent": "messages-proxy",
|
|
3877
|
-
"x-interaction-type": "conversation-agent",
|
|
3878
|
-
"X-Initiator": "agent",
|
|
3879
|
-
"anthropic-version": "2023-06-01",
|
|
3880
|
-
"X-Interaction-Id": randomUUID(),
|
|
3881
|
-
...extraHeaders
|
|
3882
|
-
};
|
|
3883
|
-
}
|
|
3884
|
-
/**
|
|
3885
|
-
* Forward an Anthropic Messages API request to Copilot's native /v1/messages endpoint.
|
|
3886
|
-
* Returns the raw Response so callers can handle streaming vs non-streaming.
|
|
3887
|
-
*/
|
|
3888
|
-
async function createMessages(body, extraHeaders) {
|
|
3889
|
-
if (!state.copilotToken) throw new Error("Copilot token not found");
|
|
3890
|
-
const url = `${copilotBaseUrl(state)}/v1/messages?beta=true`;
|
|
3891
|
-
consola.debug(`Forwarding to ${url}`);
|
|
3892
|
-
const doFetch = () => {
|
|
3893
|
-
const fetchInit = {
|
|
3894
|
-
method: "POST",
|
|
3895
|
-
headers: buildHeaders(extraHeaders),
|
|
3896
|
-
body
|
|
3897
|
-
};
|
|
3898
|
-
if (UPSTREAM_FETCH_TIMEOUT_MS > 0) fetchInit.signal = AbortSignal.timeout(UPSTREAM_FETCH_TIMEOUT_MS);
|
|
3899
|
-
return fetch(url, fetchInit);
|
|
3900
|
-
};
|
|
3901
|
-
const response = await tryRefreshAndRetry(doFetch, "/v1/messages");
|
|
3902
|
-
if (!response.ok) {
|
|
3903
|
-
let errorBody = "";
|
|
3904
|
-
try {
|
|
3905
|
-
errorBody = await response.text();
|
|
3906
|
-
} catch {
|
|
3907
|
-
errorBody = "(could not read error body)";
|
|
3908
|
-
}
|
|
3909
|
-
consola.error(`Copilot /v1/messages error: ${response.status} ${errorBody}`);
|
|
3910
|
-
throw new HTTPError("Copilot messages request failed", new Response(errorBody, {
|
|
3911
|
-
status: response.status,
|
|
3912
|
-
statusText: response.statusText,
|
|
3913
|
-
headers: response.headers
|
|
3914
|
-
}));
|
|
3915
|
-
}
|
|
3916
|
-
return response;
|
|
3917
|
-
}
|
|
3918
|
-
/**
|
|
3919
|
-
* Forward an Anthropic count_tokens request to Copilot's native endpoint.
|
|
3920
|
-
* Returns the raw Response.
|
|
3921
|
-
*/
|
|
3922
|
-
async function countTokens(body, extraHeaders) {
|
|
3923
|
-
if (!state.copilotToken) throw new Error("Copilot token not found");
|
|
3924
|
-
const url = `${copilotBaseUrl(state)}/v1/messages/count_tokens?beta=true`;
|
|
3925
|
-
consola.debug(`Forwarding to ${url}`);
|
|
3926
|
-
const doFetch = () => {
|
|
3927
|
-
const fetchInit = {
|
|
3928
|
-
method: "POST",
|
|
3929
|
-
headers: buildHeaders(extraHeaders),
|
|
3930
|
-
body
|
|
3931
|
-
};
|
|
3932
|
-
if (UPSTREAM_FETCH_TIMEOUT_MS > 0) fetchInit.signal = AbortSignal.timeout(UPSTREAM_FETCH_TIMEOUT_MS);
|
|
3933
|
-
return fetch(url, fetchInit);
|
|
3934
|
-
};
|
|
3935
|
-
const response = await tryRefreshAndRetry(doFetch, "/v1/messages/count_tokens");
|
|
3936
|
-
if (!response.ok) {
|
|
3937
|
-
let errorBody = "";
|
|
3938
|
-
try {
|
|
3939
|
-
errorBody = await response.text();
|
|
3940
|
-
} catch {
|
|
3941
|
-
errorBody = "(could not read error body)";
|
|
3942
|
-
}
|
|
3943
|
-
consola.error(`Copilot count_tokens error: ${response.status} ${errorBody}`);
|
|
3944
|
-
throw new HTTPError("Copilot count_tokens request failed", new Response(errorBody, {
|
|
3945
|
-
status: response.status,
|
|
3946
|
-
statusText: response.statusText,
|
|
3947
|
-
headers: response.headers
|
|
3948
|
-
}));
|
|
3949
|
-
}
|
|
3950
|
-
return response;
|
|
3951
|
-
}
|
|
3952
|
-
|
|
3953
4324
|
//#endregion
|
|
3954
4325
|
//#region src/services/advisor/advisor.ts
|
|
3955
4326
|
const ENCODER$1 = new TextEncoder();
|
|
@@ -5003,7 +5374,7 @@ async function handleCompletion(c) {
|
|
|
5003
5374
|
type: "error",
|
|
5004
5375
|
error: {
|
|
5005
5376
|
type: "invalid_request_error",
|
|
5006
|
-
message: "Inline `mcp_servers` body field is not supported by github-router
|
|
5377
|
+
message: "Inline `mcp_servers` body field is not supported by github-router. Configure remote MCP servers as local stdio entries in `~/.claude/mcp.json` instead — Claude Code will spawn them locally and the proxy passes their tool calls through transparently. (https://docs.claude.com/en/docs/claude-code/mcp)"
|
|
5007
5378
|
}
|
|
5008
5379
|
}, 400);
|
|
5009
5380
|
} catch {}
|
|
@@ -5971,6 +6342,7 @@ function getClaudeCodeEnvVars(serverUrl, model) {
|
|
|
5971
6342
|
ANTHROPIC_BASE_URL: serverUrl,
|
|
5972
6343
|
CLAUDE_CONFIG_DIR: PATHS.CLAUDE_CONFIG_DIR,
|
|
5973
6344
|
MCP_TIMEOUT: "600000",
|
|
6345
|
+
MCP_TOOL_TIMEOUT: "600000",
|
|
5974
6346
|
DISABLE_NON_ESSENTIAL_MODEL_CALLS: "1",
|
|
5975
6347
|
CLAUDE_CODE_DISABLE_NONESSENTIAL_TRAFFIC: "1",
|
|
5976
6348
|
DISABLE_TELEMETRY: "1"
|