github-router 0.3.16 → 0.3.18
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +41 -6
- package/dist/main.js +2429 -436
- package/dist/main.js.map +1 -1
- package/package.json +1 -1
package/dist/main.js
CHANGED
|
@@ -4,18 +4,18 @@ import consola from "consola";
|
|
|
4
4
|
import fs from "node:fs/promises";
|
|
5
5
|
import os from "node:os";
|
|
6
6
|
import path from "node:path";
|
|
7
|
-
import { randomBytes, randomUUID } from "node:crypto";
|
|
7
|
+
import { randomBytes, randomUUID, timingSafeEqual } from "node:crypto";
|
|
8
8
|
import process$1 from "node:process";
|
|
9
|
+
import { execFileSync, spawn } from "node:child_process";
|
|
9
10
|
import fs$1 from "node:fs";
|
|
10
11
|
import { Writable } from "node:stream";
|
|
11
|
-
import { execFileSync, spawn } from "node:child_process";
|
|
12
12
|
import { serve } from "srvx";
|
|
13
13
|
import { getProxyForUrl } from "proxy-from-env";
|
|
14
14
|
import { Agent, ProxyAgent, setGlobalDispatcher } from "undici";
|
|
15
15
|
import { Hono } from "hono";
|
|
16
16
|
import { cors } from "hono/cors";
|
|
17
|
-
import { streamSSE } from "hono/streaming";
|
|
18
17
|
import { events } from "fetch-event-stream";
|
|
18
|
+
import { z } from "zod";
|
|
19
19
|
import clipboard from "clipboardy";
|
|
20
20
|
|
|
21
21
|
//#region src/lib/paths.ts
|
|
@@ -31,11 +31,26 @@ const PATHS = {
|
|
|
31
31
|
},
|
|
32
32
|
get ERROR_LOG_PATH() {
|
|
33
33
|
return path.join(appDir(), "error.log");
|
|
34
|
+
},
|
|
35
|
+
get CODEX_HOME() {
|
|
36
|
+
return path.join(appDir(), "codex-isolated");
|
|
37
|
+
},
|
|
38
|
+
get CLAUDE_RUNTIME_DIR() {
|
|
39
|
+
return path.join(appDir(), "runtime");
|
|
34
40
|
}
|
|
35
41
|
};
|
|
36
42
|
async function ensurePaths() {
|
|
37
43
|
await fs.mkdir(PATHS.APP_DIR, { recursive: true });
|
|
44
|
+
await fs.mkdir(PATHS.CODEX_HOME, { recursive: true });
|
|
45
|
+
await fs.mkdir(PATHS.CLAUDE_RUNTIME_DIR, { recursive: true });
|
|
46
|
+
await chmodIfPossible(PATHS.CLAUDE_RUNTIME_DIR, 448);
|
|
38
47
|
await ensureFile(PATHS.GITHUB_TOKEN_PATH);
|
|
48
|
+
await sweepStaleRuntimeFiles().catch((err) => {
|
|
49
|
+
consola.debug("Runtime sweep skipped:", err);
|
|
50
|
+
});
|
|
51
|
+
await sweepStalePeerAgentMdFiles().catch((err) => {
|
|
52
|
+
consola.debug("Peer-agent .md sweep skipped:", err);
|
|
53
|
+
});
|
|
39
54
|
}
|
|
40
55
|
async function ensureFile(filePath) {
|
|
41
56
|
try {
|
|
@@ -45,6 +60,129 @@ async function ensureFile(filePath) {
|
|
|
45
60
|
await fs.chmod(filePath, 384);
|
|
46
61
|
}
|
|
47
62
|
}
|
|
63
|
+
async function chmodIfPossible(target, mode) {
|
|
64
|
+
if (process.platform === "win32") return;
|
|
65
|
+
try {
|
|
66
|
+
await fs.chmod(target, mode);
|
|
67
|
+
} catch (err) {
|
|
68
|
+
consola.debug(`chmod ${target} ${mode.toString(8)} failed:`, err);
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
/**
|
|
72
|
+
* Write a runtime tempfile securely.
|
|
73
|
+
*
|
|
74
|
+
* - Mode `0o600` so other local users (multi-tenant boxes, shared
|
|
75
|
+
* dev containers) can't read the per-launch nonce or runtime URL.
|
|
76
|
+
* - `flag: "wx"` (O_CREAT | O_EXCL | O_WRONLY) refuses to overwrite
|
|
77
|
+
* an existing path. POSIX open(2) with O_EXCL also rejects
|
|
78
|
+
* pre-placed symlinks, killing the symlink-clobber attack vector.
|
|
79
|
+
* - The caller's responsibility to pick a path NOT yet in use.
|
|
80
|
+
* We intentionally do NOT pre-unlink: an `lstat` + `unlink` +
|
|
81
|
+
* `open(O_EXCL)` sequence still has a TOCTOU window where an
|
|
82
|
+
* attacker can drop a symlink between unlink and open. Letting
|
|
83
|
+
* `wx` fail is the safer behavior — surfaces the conflict
|
|
84
|
+
* instead of silently following.
|
|
85
|
+
*/
|
|
86
|
+
async function writeRuntimeFileSecure(filePath, content) {
|
|
87
|
+
await fs.writeFile(filePath, content, {
|
|
88
|
+
mode: 384,
|
|
89
|
+
flag: "wx"
|
|
90
|
+
});
|
|
91
|
+
}
|
|
92
|
+
/**
|
|
93
|
+
* Sweep stale runtime tempfiles. Removes files whose embedded PID is no
|
|
94
|
+
* longer a live process. A proxy crash (`kill -9`, OS reboot) leaves
|
|
95
|
+
* orphans that would otherwise accumulate forever — and worse, a stale
|
|
96
|
+
* config pointing at a now-recycled port could route MCP traffic to
|
|
97
|
+
* whatever process bound that port next.
|
|
98
|
+
*
|
|
99
|
+
* Naming convention: `peer-mcp-<pid>.json` and `peer-agents-<pid>.json`.
|
|
100
|
+
* Files not matching either pattern are left alone — this directory
|
|
101
|
+
* is shared with future runtime artifacts.
|
|
102
|
+
*
|
|
103
|
+
* We deliberately do NOT age-prune files whose PID is alive. A
|
|
104
|
+
* legitimately long-running proxy can have a tempfile older than any
|
|
105
|
+
* arbitrary threshold; deleting it out from under the live process
|
|
106
|
+
* breaks the spawned Claude Code child's MCP/agent wiring with no clean
|
|
107
|
+
* recovery. PID-wraparound risk is mitigated by (a) PID reuse on Linux
|
|
108
|
+
* being slow under typical loads, and (b) the file is only consulted by
|
|
109
|
+
* github-router itself — an unrelated process that inherits the PID
|
|
110
|
+
* never reads it.
|
|
111
|
+
*/
|
|
112
|
+
async function sweepStaleRuntimeFiles() {
|
|
113
|
+
const dir = PATHS.CLAUDE_RUNTIME_DIR;
|
|
114
|
+
let entries;
|
|
115
|
+
try {
|
|
116
|
+
entries = await fs.readdir(dir);
|
|
117
|
+
} catch (err) {
|
|
118
|
+
if (err.code === "ENOENT") return;
|
|
119
|
+
throw err;
|
|
120
|
+
}
|
|
121
|
+
for (const name$1 of entries) {
|
|
122
|
+
const match = /^peer-(?:mcp|agents)-(\d+)(?:-[0-9a-f]+)?\.json$/.exec(name$1);
|
|
123
|
+
if (!match) continue;
|
|
124
|
+
const pid = Number.parseInt(match[1], 10);
|
|
125
|
+
const filePath = path.join(dir, name$1);
|
|
126
|
+
if (isPidAlive(pid)) continue;
|
|
127
|
+
await fs.unlink(filePath).catch(() => {});
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
function isPidAlive(pid) {
|
|
131
|
+
if (!Number.isInteger(pid) || pid <= 0) return false;
|
|
132
|
+
try {
|
|
133
|
+
process.kill(pid, 0);
|
|
134
|
+
return true;
|
|
135
|
+
} catch (err) {
|
|
136
|
+
if (err.code === "EPERM") return true;
|
|
137
|
+
return false;
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
/**
|
|
141
|
+
* Sweep stale peer-* subagent .md files from `~/.claude/agents/`. Phase
|
|
142
|
+
* 2.5 writes one .md per peer agent into the canonical agents directory
|
|
143
|
+
* so they appear in Claude Code's Task `subagent_type` enum. Files are
|
|
144
|
+
* named `peer-<pid>-<rand>-<agentName>.md` so this sweep can drop
|
|
145
|
+
* orphans from crashed prior proxy sessions without touching the user's
|
|
146
|
+
* own .md files.
|
|
147
|
+
*
|
|
148
|
+
* Same liveness rule as `sweepStaleRuntimeFiles`: only delete when the
|
|
149
|
+
* file's embedded PID is no longer alive. Live PIDs keep their files —
|
|
150
|
+
* a long-running proxy doesn't lose its agent registrations.
|
|
151
|
+
*
|
|
152
|
+
* Regex tightening (Phase 2.6, codex-critic + gemini-critic 2-lab finding):
|
|
153
|
+
* the original sweep regex `^peer-(\d+)(?:-[0-9a-f]+)?-.+\.md$` was too
|
|
154
|
+
* permissive — a user-authored `peer-12345-meeting-notes.md` matches
|
|
155
|
+
* (`12345` = "PID", `-meeting-notes` = trailing `.+`) and would be
|
|
156
|
+
* silently unlinked when 12345 happens to be a dead PID (overwhelmingly
|
|
157
|
+
* likely). Tightened to require BOTH the 8-hex-char random suffix AND
|
|
158
|
+
* an exact-match persona name suffix, eliminating the risk for any
|
|
159
|
+
* realistic user filename.
|
|
160
|
+
*/
|
|
161
|
+
async function sweepStalePeerAgentMdFiles() {
|
|
162
|
+
const dir = path.join(os.homedir(), ".claude", "agents");
|
|
163
|
+
let entries;
|
|
164
|
+
try {
|
|
165
|
+
entries = await fs.readdir(dir);
|
|
166
|
+
} catch (err) {
|
|
167
|
+
if (err.code === "ENOENT") return;
|
|
168
|
+
throw err;
|
|
169
|
+
}
|
|
170
|
+
for (const name$1 of entries) {
|
|
171
|
+
const match = PEER_AGENT_MD_FILENAME.exec(name$1);
|
|
172
|
+
if (!match) continue;
|
|
173
|
+
if (isPidAlive(Number.parseInt(match[1], 10))) continue;
|
|
174
|
+
await fs.unlink(path.join(dir, name$1)).catch(() => {});
|
|
175
|
+
}
|
|
176
|
+
}
|
|
177
|
+
/**
|
|
178
|
+
* Strict regex matching only files this proxy writes:
|
|
179
|
+
* peer-<pid>-<8 hex>-<exact persona/coordinator name>.md
|
|
180
|
+
* The persona-name allowlist is the load-bearing protection against
|
|
181
|
+
* deleting user files. Update this list whenever a new persona is added
|
|
182
|
+
* to `PERSONAS_READ` / `PERSONAS_WRITE` in `peer-mcp-personas.ts` or a
|
|
183
|
+
* new coordinator-style agent is added in `codex-mcp-config.ts`.
|
|
184
|
+
*/
|
|
185
|
+
const PEER_AGENT_MD_FILENAME = /^peer-(\d+)-[0-9a-f]{8}-(?:codex-critic|codex-reviewer|gemini-critic|codex-implementer|peer-review-coordinator)\.md$/;
|
|
48
186
|
|
|
49
187
|
//#endregion
|
|
50
188
|
//#region src/lib/state.ts
|
|
@@ -68,17 +206,17 @@ const DEFAULT_COPILOT_VERSION = "0.43.2026033101";
|
|
|
68
206
|
function copilotVersion(state$1) {
|
|
69
207
|
return state$1.copilotVersion ?? DEFAULT_COPILOT_VERSION;
|
|
70
208
|
}
|
|
71
|
-
const API_VERSION = "
|
|
209
|
+
const API_VERSION = "2026-01-09";
|
|
72
210
|
const copilotBaseUrl = (state$1) => state$1.copilotApiUrl ?? "https://api.githubcopilot.com";
|
|
73
211
|
const copilotHeaders = (state$1, vision = false, integrationId = "vscode-chat") => {
|
|
74
|
-
const version = copilotVersion(state$1);
|
|
212
|
+
const version$1 = copilotVersion(state$1);
|
|
75
213
|
const headers = {
|
|
76
214
|
Authorization: `Bearer ${state$1.copilotToken}`,
|
|
77
215
|
"content-type": standardHeaders()["content-type"],
|
|
78
216
|
"copilot-integration-id": integrationId,
|
|
79
217
|
"editor-version": `vscode/${state$1.vsCodeVersion}`,
|
|
80
|
-
"editor-plugin-version": `copilot-chat/${version}`,
|
|
81
|
-
"user-agent": `GitHubCopilotChat/${version}`,
|
|
218
|
+
"editor-plugin-version": `copilot-chat/${version$1}`,
|
|
219
|
+
"user-agent": `GitHubCopilotChat/${version$1}`,
|
|
82
220
|
"openai-intent": "conversation-panel",
|
|
83
221
|
"x-interaction-type": "conversation-panel",
|
|
84
222
|
"x-github-api-version": API_VERSION,
|
|
@@ -114,7 +252,7 @@ var HTTPError = class extends Error {
|
|
|
114
252
|
}
|
|
115
253
|
};
|
|
116
254
|
async function forwardError(c, error) {
|
|
117
|
-
consola.error(
|
|
255
|
+
consola.error(`Error occurred at ${c.req.path}:`, error);
|
|
118
256
|
if (error instanceof HTTPError) {
|
|
119
257
|
const errorText = await error.response.text().catch(() => "");
|
|
120
258
|
let errorJson;
|
|
@@ -123,6 +261,17 @@ async function forwardError(c, error) {
|
|
|
123
261
|
} catch {
|
|
124
262
|
errorJson = void 0;
|
|
125
263
|
}
|
|
264
|
+
if (isContextOverflow(error.response.status, errorJson, errorText)) {
|
|
265
|
+
const upstream = resolveErrorMessage(errorJson, errorText);
|
|
266
|
+
consola.error("HTTP error (mapped to overflow):", errorJson ?? errorText);
|
|
267
|
+
return c.json({
|
|
268
|
+
type: "error",
|
|
269
|
+
error: {
|
|
270
|
+
type: "invalid_request_error",
|
|
271
|
+
message: `prompt is too long: ${upstream}`
|
|
272
|
+
}
|
|
273
|
+
}, 400);
|
|
274
|
+
}
|
|
126
275
|
if (isAnthropicError(errorJson)) {
|
|
127
276
|
consola.error("HTTP error:", errorJson);
|
|
128
277
|
return c.json(errorJson, error.response.status);
|
|
@@ -167,6 +316,29 @@ function isAnthropicError(json) {
|
|
|
167
316
|
const inner = record.error;
|
|
168
317
|
return typeof inner.type === "string" && typeof inner.message === "string";
|
|
169
318
|
}
|
|
319
|
+
const CONTEXT_OVERFLOW_SUBSTRINGS = [
|
|
320
|
+
"prompt is too long",
|
|
321
|
+
"context_length_exceeded",
|
|
322
|
+
"context length exceeded",
|
|
323
|
+
"input is too long",
|
|
324
|
+
"maximum context length",
|
|
325
|
+
"too many tokens"
|
|
326
|
+
];
|
|
327
|
+
/**
|
|
328
|
+
* Detect upstream context-overflow errors so we can remap them to a 400
|
|
329
|
+
* "prompt is too long" shape that triggers Claude Code self-compaction.
|
|
330
|
+
*
|
|
331
|
+
* Always remaps 413 (treated as a hard payload-size signal regardless of
|
|
332
|
+
* body wording). Remaps 400 only when the error text contains one of the
|
|
333
|
+
* known overflow substrings — a regular 400 (e.g. "model not found") must
|
|
334
|
+
* NOT remap.
|
|
335
|
+
*/
|
|
336
|
+
function isContextOverflow(status, errorJson, errorText) {
|
|
337
|
+
if (status === 413) return true;
|
|
338
|
+
if (status !== 400) return false;
|
|
339
|
+
const haystack = (errorText + " " + (typeof errorJson === "object" && errorJson !== null ? JSON.stringify(errorJson) : "")).toLowerCase();
|
|
340
|
+
return CONTEXT_OVERFLOW_SUBSTRINGS.some((s) => haystack.includes(s));
|
|
341
|
+
}
|
|
170
342
|
/**
|
|
171
343
|
* Map HTTP status to Anthropic error type.
|
|
172
344
|
*/
|
|
@@ -182,11 +354,35 @@ function resolveErrorType(status) {
|
|
|
182
354
|
|
|
183
355
|
//#endregion
|
|
184
356
|
//#region src/services/github/get-copilot-token.ts
|
|
357
|
+
/**
|
|
358
|
+
* Allowlist of hosts the router will trust as the Copilot API base URL.
|
|
359
|
+
* Anything else returned in `endpoints.api` (e.g. via a tampered or
|
|
360
|
+
* misconfigured token-exchange response) is rejected — otherwise a
|
|
361
|
+
* malicious value would receive the long-lived GitHub PAT we send to
|
|
362
|
+
* `/mcp` for web search (see `src/services/copilot/web-search.ts`).
|
|
363
|
+
*/
|
|
364
|
+
const COPILOT_HOST_ALLOWLIST = [
|
|
365
|
+
"api.githubcopilot.com",
|
|
366
|
+
"api.individual.githubcopilot.com",
|
|
367
|
+
"api.business.githubcopilot.com",
|
|
368
|
+
"api.enterprise.githubcopilot.com"
|
|
369
|
+
];
|
|
370
|
+
function isAllowedCopilotHost(rawUrl) {
|
|
371
|
+
let parsed;
|
|
372
|
+
try {
|
|
373
|
+
parsed = new URL(rawUrl);
|
|
374
|
+
} catch {
|
|
375
|
+
return false;
|
|
376
|
+
}
|
|
377
|
+
if (parsed.protocol !== "https:") return false;
|
|
378
|
+
return COPILOT_HOST_ALLOWLIST.includes(parsed.hostname);
|
|
379
|
+
}
|
|
185
380
|
const getCopilotToken = async () => {
|
|
186
381
|
const response = await fetch(`${GITHUB_API_BASE_URL}/copilot_internal/v2/token`, { headers: githubHeaders(state) });
|
|
187
382
|
if (!response.ok) throw new HTTPError("Failed to get Copilot token", response);
|
|
188
383
|
const data = await response.json();
|
|
189
|
-
if (data.endpoints?.api) state.copilotApiUrl = data.endpoints.api;
|
|
384
|
+
if (data.endpoints?.api) if (isAllowedCopilotHost(data.endpoints.api)) state.copilotApiUrl = data.endpoints.api;
|
|
385
|
+
else consola.warn(`Refusing to honor Copilot API endpoint "${data.endpoints.api}" from the token-exchange response — not in allowlist (${COPILOT_HOST_ALLOWLIST.join(", ")}). ` + (state.copilotApiUrl ? `Keeping existing override "${state.copilotApiUrl}".` : `Falling back to the default api.githubcopilot.com.`));
|
|
190
386
|
return data;
|
|
191
387
|
};
|
|
192
388
|
|
|
@@ -297,12 +493,14 @@ const VSCODE_BETA_PREFIXES = [
|
|
|
297
493
|
* Enabled via --extended-betas flag. Includes all betas confirmed
|
|
298
494
|
* to work with the Copilot API.
|
|
299
495
|
*
|
|
300
|
-
* Notably absent
|
|
496
|
+
* Notably absent (Copilot 400s on these — verified live):
|
|
497
|
+
* context-1m-, skills-, files-api-, code-execution-, output-128k-.
|
|
498
|
+
* 1M context is unlocked by selecting `claude-opus-4.7-1m-internal`
|
|
499
|
+
* as the model id, not via a beta header.
|
|
301
500
|
*/
|
|
302
501
|
const EXTENDED_BETA_PREFIXES = [
|
|
303
502
|
...VSCODE_BETA_PREFIXES,
|
|
304
503
|
"claude-code-",
|
|
305
|
-
"context-1m-",
|
|
306
504
|
"effort-",
|
|
307
505
|
"prompt-caching-",
|
|
308
506
|
"computer-use-",
|
|
@@ -312,10 +510,8 @@ const EXTENDED_BETA_PREFIXES = [
|
|
|
312
510
|
"compact-",
|
|
313
511
|
"structured-outputs-",
|
|
314
512
|
"fast-mode-",
|
|
315
|
-
"skills-",
|
|
316
513
|
"mcp-client-",
|
|
317
514
|
"mcp-servers-",
|
|
318
|
-
"files-api-",
|
|
319
515
|
"redact-thinking-",
|
|
320
516
|
"web-search-"
|
|
321
517
|
];
|
|
@@ -355,7 +551,10 @@ function resolveModel(modelId) {
|
|
|
355
551
|
const ciMatch = models.find((m) => m.id.toLowerCase() === lower);
|
|
356
552
|
if (ciMatch) return ciMatch.id;
|
|
357
553
|
if (lower.includes("opus")) {
|
|
358
|
-
const
|
|
554
|
+
const oneMs = models.filter((m) => m.id.includes("opus") && /-1m(?:$|-)/.test(m.id));
|
|
555
|
+
const versionMatch = lower.match(/opus-(\d+)[.-](\d+)/);
|
|
556
|
+
const requestedVersion = versionMatch ? `${versionMatch[1]}.${versionMatch[2]}` : void 0;
|
|
557
|
+
const oneM = (requestedVersion ? oneMs.find((m) => m.id.includes(`opus-${requestedVersion}-`)) : void 0) ?? oneMs[0];
|
|
359
558
|
if (oneM) return oneM.id;
|
|
360
559
|
}
|
|
361
560
|
if (lower.includes("codex")) {
|
|
@@ -380,13 +579,19 @@ function resolveCodexModel(modelId) {
|
|
|
380
579
|
const models = state.models?.data;
|
|
381
580
|
if (!models) return resolved;
|
|
382
581
|
if (models.some((m) => m.id === resolved)) return resolved;
|
|
383
|
-
const
|
|
582
|
+
const candidates = models.filter((m) => {
|
|
384
583
|
const endpoints = m.supported_endpoints ?? [];
|
|
385
|
-
|
|
584
|
+
if (m.id.includes("mini") || m.id.includes("nano")) return false;
|
|
585
|
+
return endpoints.length === 0 || endpoints.includes("/responses");
|
|
386
586
|
});
|
|
387
|
-
if (
|
|
388
|
-
|
|
389
|
-
|
|
587
|
+
if (candidates.length > 0) {
|
|
588
|
+
candidates.sort((a, b) => {
|
|
589
|
+
const aCodex = a.id.includes("codex") ? 1 : 0;
|
|
590
|
+
const bCodex = b.id.includes("codex") ? 1 : 0;
|
|
591
|
+
if (aCodex !== bCodex) return bCodex - aCodex;
|
|
592
|
+
return b.id.localeCompare(a.id);
|
|
593
|
+
});
|
|
594
|
+
const best = candidates[0].id;
|
|
390
595
|
consola.warn(`Model "${modelId}" not available, using "${best}" instead`);
|
|
391
596
|
return best;
|
|
392
597
|
}
|
|
@@ -401,9 +606,9 @@ const cacheVSCodeVersion = async () => {
|
|
|
401
606
|
consola.info(`Using VSCode version: ${response}`);
|
|
402
607
|
};
|
|
403
608
|
const cacheCopilotVersion = async () => {
|
|
404
|
-
const version = await getCopilotChatVersion();
|
|
405
|
-
state.copilotVersion = version;
|
|
406
|
-
consola.info(`Using Copilot Chat version: ${version}`);
|
|
609
|
+
const version$1 = await getCopilotChatVersion();
|
|
610
|
+
state.copilotVersion = version$1;
|
|
611
|
+
consola.info(`Using Copilot Chat version: ${version$1}`);
|
|
407
612
|
};
|
|
408
613
|
|
|
409
614
|
//#endregion
|
|
@@ -448,18 +653,62 @@ const setupCopilotToken = async () => {
|
|
|
448
653
|
consola.debug("GitHub Copilot Token fetched successfully!");
|
|
449
654
|
if (state.showToken) consola.info("Copilot token:", token);
|
|
450
655
|
const refreshInterval = Math.max((refresh_in - 60) * 1e3, 1e3);
|
|
451
|
-
setInterval(
|
|
452
|
-
|
|
656
|
+
setInterval(() => {
|
|
657
|
+
refreshCopilotToken("interval");
|
|
658
|
+
}, refreshInterval);
|
|
659
|
+
};
|
|
660
|
+
let inflightRefresh;
|
|
661
|
+
let lastRefreshSuccess = 0;
|
|
662
|
+
let lastRefreshFailure = 0;
|
|
663
|
+
const REFRESH_SUCCESS_COOLDOWN_MS = 3e4;
|
|
664
|
+
const REFRESH_FAILURE_COOLDOWN_MS = 5e3;
|
|
665
|
+
async function refreshCopilotToken(reason) {
|
|
666
|
+
if (inflightRefresh) return inflightRefresh;
|
|
667
|
+
if (reason === "401-retry") {
|
|
668
|
+
const now = Date.now();
|
|
669
|
+
if (now - lastRefreshSuccess < REFRESH_SUCCESS_COOLDOWN_MS) {
|
|
670
|
+
consola.debug(`refreshCopilotToken(${reason}) skipped: prior success within ${REFRESH_SUCCESS_COOLDOWN_MS}ms`);
|
|
671
|
+
return;
|
|
672
|
+
}
|
|
673
|
+
if (now - lastRefreshFailure < REFRESH_FAILURE_COOLDOWN_MS) {
|
|
674
|
+
consola.debug(`refreshCopilotToken(${reason}) skipped: prior failure within ${REFRESH_FAILURE_COOLDOWN_MS}ms`);
|
|
675
|
+
return;
|
|
676
|
+
}
|
|
677
|
+
}
|
|
678
|
+
inflightRefresh = (async () => {
|
|
679
|
+
consola.debug(`Refreshing Copilot token (reason=${reason})`);
|
|
453
680
|
try {
|
|
454
|
-
const { token
|
|
455
|
-
state.copilotToken = token
|
|
681
|
+
const { token } = await getCopilotToken();
|
|
682
|
+
state.copilotToken = token;
|
|
683
|
+
lastRefreshSuccess = Date.now();
|
|
456
684
|
consola.debug("Copilot token refreshed");
|
|
457
|
-
if (state.showToken) consola.info("Refreshed Copilot token:", token
|
|
685
|
+
if (state.showToken) consola.info("Refreshed Copilot token:", token);
|
|
458
686
|
} catch (error) {
|
|
459
|
-
|
|
687
|
+
lastRefreshFailure = Date.now();
|
|
688
|
+
consola.error(`Failed to refresh Copilot token (reason=${reason}):`, error);
|
|
689
|
+
} finally {
|
|
690
|
+
inflightRefresh = void 0;
|
|
460
691
|
}
|
|
461
|
-
}
|
|
462
|
-
|
|
692
|
+
})();
|
|
693
|
+
return inflightRefresh;
|
|
694
|
+
}
|
|
695
|
+
/**
|
|
696
|
+
* Try `request()`. If it returns a 401, refresh the Copilot token (subject
|
|
697
|
+
* to the single-flight + refresh-storm-protection of `refreshCopilotToken`)
|
|
698
|
+
* and retry once. After one retry, propagate whatever the second attempt
|
|
699
|
+
* returned — the caller's existing 401-handling path is preserved.
|
|
700
|
+
*
|
|
701
|
+
* The `request` callback is responsible for capturing `state.copilotToken`
|
|
702
|
+
* locally before any await; this helper does NOT re-build the request
|
|
703
|
+
* itself, just re-invokes the callback after a refresh.
|
|
704
|
+
*/
|
|
705
|
+
async function tryRefreshAndRetry(request, routePath) {
|
|
706
|
+
const first = await request();
|
|
707
|
+
if (first.status !== 401) return first;
|
|
708
|
+
consola.warn(`${routePath}: upstream returned 401, attempting one token refresh + retry`);
|
|
709
|
+
await refreshCopilotToken("401-retry");
|
|
710
|
+
return request();
|
|
711
|
+
}
|
|
463
712
|
async function setupGitHubToken(options) {
|
|
464
713
|
try {
|
|
465
714
|
const githubToken = await readGithubToken();
|
|
@@ -555,13 +804,13 @@ const checkUsage = defineCommand({
|
|
|
555
804
|
const premiumUsed = premiumTotal - premium.remaining;
|
|
556
805
|
const premiumPercentUsed = premiumTotal > 0 ? premiumUsed / premiumTotal * 100 : 0;
|
|
557
806
|
const premiumPercentRemaining = premium.percent_remaining;
|
|
558
|
-
function summarizeQuota(name, snap) {
|
|
559
|
-
if (!snap) return `${name}: N/A`;
|
|
807
|
+
function summarizeQuota(name$1, snap) {
|
|
808
|
+
if (!snap) return `${name$1}: N/A`;
|
|
560
809
|
const total = snap.entitlement;
|
|
561
810
|
const used = total - snap.remaining;
|
|
562
811
|
const percentUsed = total > 0 ? used / total * 100 : 0;
|
|
563
812
|
const percentRemaining = snap.percent_remaining;
|
|
564
|
-
return `${name}: ${used}/${total} used (${percentUsed.toFixed(1)}% used, ${percentRemaining.toFixed(1)}% remaining)`;
|
|
813
|
+
return `${name$1}: ${used}/${total} used (${percentUsed.toFixed(1)}% used, ${percentRemaining.toFixed(1)}% remaining)`;
|
|
565
814
|
}
|
|
566
815
|
const premiumLine = `Premium: ${premiumUsed}/${premiumTotal} used (${premiumPercentUsed.toFixed(1)}% used, ${premiumPercentRemaining.toFixed(1)}% remaining)`;
|
|
567
816
|
const chatLine = summarizeQuota("Chat", usage.quota_snapshots.chat);
|
|
@@ -575,140 +824,193 @@ const checkUsage = defineCommand({
|
|
|
575
824
|
});
|
|
576
825
|
|
|
577
826
|
//#endregion
|
|
578
|
-
//#region src/lib/
|
|
579
|
-
const
|
|
580
|
-
const DEDUP_MAX = 1e3;
|
|
581
|
-
const ARG_MAX_LEN = 2048;
|
|
582
|
-
const DEDUP_KEY_MAX_LEN = 200;
|
|
583
|
-
const CREDENTIAL_RE = /\b(eyJ[A-Za-z0-9_-]{20,}(?:\.[A-Za-z0-9_-]+){0,2}|gh[opsu]_[A-Za-z0-9_]{20,}|Bearer\s+\S{20,})\b/g;
|
|
584
|
-
const ALLOWED_TYPES = new Set([
|
|
585
|
-
"fatal",
|
|
586
|
-
"error",
|
|
587
|
-
"warn"
|
|
588
|
-
]);
|
|
589
|
-
function sanitize(line) {
|
|
590
|
-
return line.replace(CREDENTIAL_RE, "[REDACTED]");
|
|
591
|
-
}
|
|
592
|
-
function serializeArg(arg) {
|
|
593
|
-
if (typeof arg === "string") return arg;
|
|
594
|
-
if (arg instanceof Error) {
|
|
595
|
-
const parts = [arg.message];
|
|
596
|
-
if (arg.stack) parts.push(arg.stack);
|
|
597
|
-
return parts.join("\n");
|
|
598
|
-
}
|
|
599
|
-
return String(arg);
|
|
600
|
-
}
|
|
601
|
-
function formatLogLine(logObj) {
|
|
602
|
-
return sanitize(`${logObj.date.toISOString()} [${(logObj.type ?? "error").toUpperCase()}] ${logObj.args.map((a) => {
|
|
603
|
-
const s = serializeArg(a);
|
|
604
|
-
return s.length > ARG_MAX_LEN ? s.slice(0, ARG_MAX_LEN) + "…" : s;
|
|
605
|
-
}).join(" ").replace(/\r\n|\r|\n/g, "\\n")}\n`);
|
|
606
|
-
}
|
|
607
|
-
function makeDedupeKey(logObj) {
|
|
608
|
-
const firstArg = logObj.args.length > 0 ? serializeArg(logObj.args[0]) : "";
|
|
609
|
-
const key = `${logObj.type}:${firstArg}`;
|
|
610
|
-
return key.length > DEDUP_KEY_MAX_LEN ? key.slice(0, DEDUP_KEY_MAX_LEN) : key;
|
|
611
|
-
}
|
|
612
|
-
function rotateIfNeeded(filePath) {
|
|
613
|
-
let size;
|
|
614
|
-
try {
|
|
615
|
-
size = fs$1.statSync(filePath).size;
|
|
616
|
-
} catch {
|
|
617
|
-
return;
|
|
618
|
-
}
|
|
619
|
-
if (size <= MAX_LOG_BYTES) return;
|
|
620
|
-
try {
|
|
621
|
-
fs$1.renameSync(filePath, filePath + ".1");
|
|
622
|
-
} catch {}
|
|
623
|
-
}
|
|
624
|
-
var FileLogReporter = class {
|
|
625
|
-
filePath;
|
|
626
|
-
seen = /* @__PURE__ */ new Set();
|
|
627
|
-
writing = false;
|
|
628
|
-
constructor(filePath) {
|
|
629
|
-
this.filePath = filePath;
|
|
630
|
-
rotateIfNeeded(filePath);
|
|
631
|
-
}
|
|
632
|
-
log(logObj, _ctx) {
|
|
633
|
-
if (!ALLOWED_TYPES.has(logObj.type)) return;
|
|
634
|
-
if (this.writing) return;
|
|
635
|
-
const key = makeDedupeKey(logObj);
|
|
636
|
-
if (this.seen.has(key)) return;
|
|
637
|
-
if (this.seen.size >= DEDUP_MAX) this.seen.clear();
|
|
638
|
-
this.seen.add(key);
|
|
639
|
-
const line = formatLogLine(logObj);
|
|
640
|
-
this.writing = true;
|
|
641
|
-
try {
|
|
642
|
-
const fd = fs$1.openSync(this.filePath, "a", 384);
|
|
643
|
-
fs$1.writeSync(fd, line);
|
|
644
|
-
fs$1.closeSync(fd);
|
|
645
|
-
} catch {} finally {
|
|
646
|
-
this.writing = false;
|
|
647
|
-
}
|
|
648
|
-
}
|
|
649
|
-
};
|
|
650
|
-
const nullStream = new Writable({ write(_chunk, _encoding, cb) {
|
|
651
|
-
cb();
|
|
652
|
-
} });
|
|
827
|
+
//#region src/lib/port.ts
|
|
828
|
+
const DEFAULT_PORT = 8787;
|
|
653
829
|
/**
|
|
654
|
-
*
|
|
655
|
-
*
|
|
656
|
-
*
|
|
830
|
+
* Default model for `github-router claude`. The Anthropic-published dashed
|
|
831
|
+
* slug (`claude-opus-4-7`) — NOT the Copilot-internal slug
|
|
832
|
+
* (`claude-opus-4.7-1m-internal`) — because Claude Code 2.1.126's `/model`
|
|
833
|
+
* UI is backed by a hardcoded registry of Anthropic slugs, and an
|
|
834
|
+
* unrecognized slug causes the menu to highlight "Opus 4" with a
|
|
835
|
+
* "Newer version available" hint instead of "Opus 4.7 (1M context)".
|
|
657
836
|
*
|
|
658
|
-
*
|
|
659
|
-
*
|
|
660
|
-
*
|
|
661
|
-
*
|
|
837
|
+
* The proxy's `resolveModel` (`src/lib/utils.ts`) translates this to
|
|
838
|
+
* Copilot's `claude-opus-4.7-1m-internal` (enterprise) or
|
|
839
|
+
* `claude-opus-4.7` (Pro+/Business/Max) at request time via the
|
|
840
|
+
* family-preference + version-match branch — round-trip covered by
|
|
841
|
+
* `tests/lib-utils.test.ts:154`.
|
|
842
|
+
*
|
|
843
|
+
* `DEFAULT_CLAUDE_MODEL_FALLBACKS` covers major.minor regressions only;
|
|
844
|
+
* 1M↔200K downgrade is handled inside the resolver, so we don't need
|
|
845
|
+
* separate `-1m` entries here.
|
|
662
846
|
*/
|
|
663
|
-
|
|
664
|
-
|
|
665
|
-
|
|
666
|
-
|
|
667
|
-
|
|
668
|
-
|
|
669
|
-
|
|
670
|
-
|
|
671
|
-
|
|
672
|
-
|
|
673
|
-
|
|
674
|
-
|
|
847
|
+
const DEFAULT_CLAUDE_MODEL = "claude-opus-4-7";
|
|
848
|
+
const DEFAULT_CLAUDE_MODEL_FALLBACKS = ["claude-opus-4-6", "claude-opus-4-5"];
|
|
849
|
+
/**
|
|
850
|
+
* Default model for `github-router codex`. `gpt-5.5` is the new flagship
|
|
851
|
+
* `/responses` model; the fallback chain handles older Copilot tiers where
|
|
852
|
+
* 5.5 hasn't rolled out yet. `resolveCodexModel` provides a final
|
|
853
|
+
* "best available `/responses` model" safety net beyond this list.
|
|
854
|
+
*/
|
|
855
|
+
const DEFAULT_CODEX_MODEL = "gpt-5.5";
|
|
856
|
+
const DEFAULT_CODEX_MODEL_FALLBACKS = [
|
|
857
|
+
"gpt-5.4",
|
|
858
|
+
"gpt-5.3-codex",
|
|
859
|
+
"gpt-5.2-codex"
|
|
860
|
+
];
|
|
675
861
|
const PORT_RANGE_MIN = 11e3;
|
|
676
862
|
const PORT_RANGE_MAX = 65535;
|
|
677
863
|
/** Generate a random port number in the range [11000, 65535]. */
|
|
678
864
|
function generateRandomPort() {
|
|
679
865
|
return Math.floor(Math.random() * (PORT_RANGE_MAX - PORT_RANGE_MIN + 1)) + PORT_RANGE_MIN;
|
|
680
866
|
}
|
|
867
|
+
function envInt(key, fallback) {
|
|
868
|
+
const raw = process.env[key];
|
|
869
|
+
if (!raw) return fallback;
|
|
870
|
+
if (!/^[0-9]+$/.test(raw.trim())) {
|
|
871
|
+
consola.warn(`${key}=${JSON.stringify(raw)} is not a non-negative integer; using fallback ${fallback}`);
|
|
872
|
+
return fallback;
|
|
873
|
+
}
|
|
874
|
+
const parsed = Number.parseInt(raw, 10);
|
|
875
|
+
return Number.isFinite(parsed) && parsed > 0 ? parsed : fallback;
|
|
876
|
+
}
|
|
877
|
+
const UPSTREAM_FETCH_TIMEOUT_MS = envInt("UPSTREAM_FETCH_TIMEOUT_MS", 0);
|
|
878
|
+
const UPSTREAM_INACTIVITY_TIMEOUT_MS = envInt("UPSTREAM_INACTIVITY_TIMEOUT_MS", 3e5);
|
|
681
879
|
|
|
682
880
|
//#endregion
|
|
683
881
|
//#region src/lib/launch.ts
|
|
684
|
-
|
|
882
|
+
/**
|
|
883
|
+
* Auth-related env keys we strip from the parent before spawning the
|
|
884
|
+
* child CLI. The proxy provides its own values for everything we care
|
|
885
|
+
* about (ANTHROPIC_BASE_URL, ANTHROPIC_AUTH_TOKEN, OPENAI_BASE_URL,
|
|
886
|
+
* OPENAI_API_KEY, CODEX_HOME, ANTHROPIC_MODEL); for the rest, we want
|
|
887
|
+
* the child to behave as if the user had no parent-env auth at all.
|
|
888
|
+
*
|
|
889
|
+
* Why strip rather than override-with-empty-string:
|
|
890
|
+
* - Claude Code emits "Auth conflict" warnings whenever both
|
|
891
|
+
* ANTHROPIC_AUTH_TOKEN and ANTHROPIC_API_KEY are present (regardless
|
|
892
|
+
* of value, even when both are "dummy"). Stripping API_KEY entirely
|
|
893
|
+
* suppresses the warning AND prevents an inherited real shell key
|
|
894
|
+
* from leaking via x-api-key.
|
|
895
|
+
* - Cloud-provider toggles (CLAUDE_CODE_USE_*) and OAUTH_TOKEN, etc.
|
|
896
|
+
* are simpler dropped than overridden — a missing env var is
|
|
897
|
+
* unambiguously falsy/absent in every code path that reads it.
|
|
898
|
+
*/
|
|
899
|
+
const STRIPPED_PARENT_ENV_KEYS = [
|
|
900
|
+
"ANTHROPIC_API_KEY",
|
|
901
|
+
"ANTHROPIC_AUTH_TOKEN",
|
|
902
|
+
"ANTHROPIC_BASE_URL",
|
|
903
|
+
"ANTHROPIC_CUSTOM_HEADERS",
|
|
904
|
+
"ANTHROPIC_MODEL",
|
|
905
|
+
"CLAUDE_CODE_OAUTH_TOKEN",
|
|
906
|
+
"CLAUDE_CODE_USE_BEDROCK",
|
|
907
|
+
"CLAUDE_CODE_USE_VERTEX",
|
|
908
|
+
"CLAUDE_CODE_USE_FOUNDRY",
|
|
909
|
+
"CLAUDE_CONFIG_DIR",
|
|
910
|
+
"OPENAI_API_KEY",
|
|
911
|
+
"OPENAI_BASE_URL",
|
|
912
|
+
"CODEX_HOME"
|
|
913
|
+
];
|
|
914
|
+
/**
|
|
915
|
+
* Strip auth-related keys from a parent-process env object. The result
|
|
916
|
+
* is suitable to spread into a spawned child's env BEFORE the proxy's
|
|
917
|
+
* explicit overrides, so the proxy is the only source of truth for
|
|
918
|
+
* auth — and stale shell exports can't leak through.
|
|
919
|
+
*/
|
|
920
|
+
function sanitizeParentEnv(parent) {
|
|
921
|
+
const sanitized = { ...parent };
|
|
922
|
+
for (const key of STRIPPED_PARENT_ENV_KEYS) delete sanitized[key];
|
|
923
|
+
return sanitized;
|
|
924
|
+
}
|
|
925
|
+
function commandExists(name$1) {
|
|
685
926
|
try {
|
|
686
|
-
execFileSync(process$1.platform === "win32" ? "where.exe" : "which", [name], { stdio: "ignore" });
|
|
927
|
+
execFileSync(process$1.platform === "win32" ? "where.exe" : "which", [name$1], { stdio: "ignore" });
|
|
687
928
|
return true;
|
|
688
929
|
} catch {
|
|
689
930
|
return false;
|
|
690
931
|
}
|
|
691
932
|
}
|
|
933
|
+
/**
|
|
934
|
+
* Provider-config flags (`-c model_providers.github_router=...`) that
|
|
935
|
+
* point Codex at our proxy. Extracted from `buildCodexCmd` so the new
|
|
936
|
+
* `codex mcp-server` MCP-config builder can reuse the exact same
|
|
937
|
+
* provider definition — drift between the two paths would silently
|
|
938
|
+
* break the MCP wiring.
|
|
939
|
+
*/
|
|
940
|
+
function buildCodexProviderConfigFlags(serverUrl) {
|
|
941
|
+
return [
|
|
942
|
+
"-c",
|
|
943
|
+
`model_providers.github_router={name="github-router",base_url="${serverUrl}/v1",wire_api="responses",env_key="OPENAI_API_KEY"}`,
|
|
944
|
+
"-c",
|
|
945
|
+
"model_provider=github_router"
|
|
946
|
+
];
|
|
947
|
+
}
|
|
948
|
+
/**
|
|
949
|
+
* Inspect the installed `codex` binary. Used by the codex-MCP wiring
|
|
950
|
+
* in `claude.ts` to gate `--codex-cli`. Codex 0.129.0 introduced the
|
|
951
|
+
* `mcp-server` subcommand; older versions don't expose it, so we
|
|
952
|
+
* downgrade to the HTTP backend with a warning.
|
|
953
|
+
*/
|
|
954
|
+
function getCodexVersion() {
|
|
955
|
+
if (!commandExists("codex")) return { ok: false };
|
|
956
|
+
let raw;
|
|
957
|
+
try {
|
|
958
|
+
raw = execFileSync("codex", ["--version"], {
|
|
959
|
+
encoding: "utf8",
|
|
960
|
+
stdio: [
|
|
961
|
+
"ignore",
|
|
962
|
+
"pipe",
|
|
963
|
+
"ignore"
|
|
964
|
+
]
|
|
965
|
+
}).trim();
|
|
966
|
+
} catch {
|
|
967
|
+
return { ok: false };
|
|
968
|
+
}
|
|
969
|
+
const m = /(\d+)\.(\d+)\.(\d+)/.exec(raw);
|
|
970
|
+
if (!m) return {
|
|
971
|
+
ok: false,
|
|
972
|
+
version: raw
|
|
973
|
+
};
|
|
974
|
+
const major = Number.parseInt(m[1], 10);
|
|
975
|
+
const minor = Number.parseInt(m[2], 10);
|
|
976
|
+
const version$1 = `${m[1]}.${m[2]}.${m[3]}`;
|
|
977
|
+
return {
|
|
978
|
+
ok: major > 0 || major === 0 && minor >= 129,
|
|
979
|
+
version: version$1
|
|
980
|
+
};
|
|
981
|
+
}
|
|
982
|
+
/**
|
|
983
|
+
* Codex 0.129.0 broke two things the launcher had been relying on:
|
|
984
|
+
* (1) `--full-auto` was removed in favor of `--sandbox` + `--ask-for-approval`;
|
|
985
|
+
* passing it now exits the child immediately with
|
|
986
|
+
* `error: unexpected argument '--full-auto' found`.
|
|
987
|
+
* (2) `OPENAI_BASE_URL` is silently ignored — Codex hardcodes
|
|
988
|
+
* `https://api.openai.com/v1/responses` and 401s out without an
|
|
989
|
+
* explicit `-c model_providers.<name>.base_url` override.
|
|
990
|
+
*
|
|
991
|
+
* `buildCodexCmd` builds the launch argv that works on Codex 0.129+ while
|
|
992
|
+
* still being compatible with older versions that accept the same flags.
|
|
993
|
+
*/
|
|
994
|
+
function buildCodexCmd(target) {
|
|
995
|
+
const cmd = ["codex"];
|
|
996
|
+
if (target.serverUrl) cmd.push(...buildCodexProviderConfigFlags(target.serverUrl));
|
|
997
|
+
cmd.push("--sandbox", "workspace-write", "--ask-for-approval", "on-request", "-m", target.model ?? DEFAULT_CODEX_MODEL, ...target.extraArgs);
|
|
998
|
+
return cmd;
|
|
999
|
+
}
|
|
692
1000
|
function buildLaunchCommand(target) {
|
|
693
1001
|
return {
|
|
694
1002
|
cmd: target.kind === "claude-code" ? [
|
|
695
1003
|
"claude",
|
|
696
1004
|
"--dangerously-skip-permissions",
|
|
697
1005
|
...target.extraArgs
|
|
698
|
-
] :
|
|
699
|
-
"codex",
|
|
700
|
-
"--full-auto",
|
|
701
|
-
"-m",
|
|
702
|
-
target.model ?? DEFAULT_CODEX_MODEL,
|
|
703
|
-
...target.extraArgs
|
|
704
|
-
],
|
|
1006
|
+
] : buildCodexCmd(target),
|
|
705
1007
|
env: {
|
|
706
|
-
...process$1.env,
|
|
1008
|
+
...sanitizeParentEnv(process$1.env),
|
|
707
1009
|
...target.envVars
|
|
708
1010
|
}
|
|
709
1011
|
};
|
|
710
1012
|
}
|
|
711
|
-
function launchChild(target, server$1) {
|
|
1013
|
+
function launchChild(target, server$1, options = {}) {
|
|
712
1014
|
const { cmd, env } = buildLaunchCommand(target);
|
|
713
1015
|
const executable = cmd[0];
|
|
714
1016
|
if (!commandExists(executable)) {
|
|
@@ -733,6 +1035,7 @@ function launchChild(target, server$1) {
|
|
|
733
1035
|
consola.error(msg);
|
|
734
1036
|
process$1.stderr.write(msg + "\n");
|
|
735
1037
|
server$1.close(true).catch(() => {});
|
|
1038
|
+
if (options.onShutdown) Promise.resolve(options.onShutdown()).catch(() => {});
|
|
736
1039
|
process$1.exit(1);
|
|
737
1040
|
}
|
|
738
1041
|
let cleaned = false;
|
|
@@ -747,6 +1050,9 @@ function launchChild(target, server$1) {
|
|
|
747
1050
|
try {
|
|
748
1051
|
await server$1.close(true);
|
|
749
1052
|
} catch {}
|
|
1053
|
+
if (options.onShutdown) try {
|
|
1054
|
+
await options.onShutdown();
|
|
1055
|
+
} catch {}
|
|
750
1056
|
clearTimeout(timeout);
|
|
751
1057
|
}
|
|
752
1058
|
function exit(code) {
|
|
@@ -769,50 +1075,650 @@ function launchChild(target, server$1) {
|
|
|
769
1075
|
}
|
|
770
1076
|
|
|
771
1077
|
//#endregion
|
|
772
|
-
//#region src/lib/
|
|
773
|
-
const
|
|
774
|
-
|
|
775
|
-
|
|
776
|
-
|
|
777
|
-
|
|
778
|
-
|
|
779
|
-
|
|
1078
|
+
//#region src/lib/peer-mcp-personas.ts
|
|
1079
|
+
const CRITIC_RUBRIC = `
|
|
1080
|
+
Apply this grading rubric:
|
|
1081
|
+
- Score 1–5 on three axes:
|
|
1082
|
+
A. assumption-soundness (are stated assumptions accurate? are unstated ones load-bearing?)
|
|
1083
|
+
B. failure-mode coverage (which realistic failure modes are unaddressed?)
|
|
1084
|
+
C. alternative-considered (was a meaningfully different approach weighed and rejected with reason?)
|
|
1085
|
+
- If every axis scores ≥ 4, reply with the literal string "no material objection" and stop. Do not invent issues to satisfy this rubric.
|
|
1086
|
+
- Otherwise, the lowest-scoring axis IS your critique. Lead with that single critique; secondary observations may follow as "additional notes".
|
|
1087
|
+
|
|
1088
|
+
Reply format (markdown):
|
|
1089
|
+
## Verdict
|
|
1090
|
+
<"no material objection" OR a one-sentence summary of the load-bearing critique>
|
|
1091
|
+
## Scores
|
|
1092
|
+
- assumption-soundness: <n>/5
|
|
1093
|
+
- failure-mode coverage: <n>/5
|
|
1094
|
+
- alternative-considered: <n>/5
|
|
1095
|
+
## Critique
|
|
1096
|
+
<only when at least one axis < 4 — concrete, specific, actionable>
|
|
1097
|
+
## Additional notes (optional)
|
|
1098
|
+
<secondary observations; omit if none>
|
|
1099
|
+
|
|
1100
|
+
Self-reminder (read before every reply):
|
|
1101
|
+
Am I still acting as the adversarial critic per the rubric above?
|
|
1102
|
+
If I just produced agreement, restart and apply the grading rubric instead.
|
|
1103
|
+
Sycophancy is the failure mode I exist to fight; manufactured contrarianism is a different failure of the same shape — do neither.
|
|
1104
|
+
`.trim();
|
|
1105
|
+
const COLD_START_CONTRACT = `
|
|
1106
|
+
Cold-start contract for the lead orchestrator (Opus):
|
|
1107
|
+
When delegating to me, paste a self-contained brief. I have no access to your scrollback, CLAUDE.md, or the project tree. Always include:
|
|
1108
|
+
(a) the artifact under review verbatim (code/diff/plan text),
|
|
1109
|
+
(b) the constraints or "done" criteria,
|
|
1110
|
+
(c) any prior decisions I should not relitigate.
|
|
1111
|
+
If your brief lacks (a), I will reply with a one-line request for the artifact instead of speculating.
|
|
1112
|
+
`.trim();
|
|
1113
|
+
const CRITIC_BASE = `You are codex-critic, an adversarial reviewer running on gpt-5.5. Your single job is to overcome the lead orchestrator's blind spots — assumptions it didn't notice it was making, failure modes it didn't enumerate, alternatives it didn't consider.
|
|
1114
|
+
|
|
1115
|
+
You are NOT a helpful assistant. You are NOT a coach. Sycophancy is the failure mode you exist to fight. Manufactured contrarianism is a different failure of the same shape — silence on good work is a valid and welcome answer.
|
|
1116
|
+
|
|
1117
|
+
${COLD_START_CONTRACT}
|
|
1118
|
+
|
|
1119
|
+
${CRITIC_RUBRIC}`;
|
|
1120
|
+
const GEMINI_CRITIC_BASE = `You are gemini-critic, an adversarial reviewer running on Gemini 3.1 Pro. You exist to provide a second-lab perspective: your training data, RLHF priors, and attention patterns are systematically different from the lead orchestrator's (Opus, Anthropic) and from codex-critic (gpt-5.5, OpenAI). Use that to surface blind spots both miss.
|
|
1121
|
+
|
|
1122
|
+
Your strengths the lead may want to draw on:
|
|
1123
|
+
- long-context reasoning over large artifacts (the brief may include >50k tokens of context)
|
|
1124
|
+
- math, proofs, and formally-stated invariants
|
|
1125
|
+
- cross-checking conclusions where codex-critic has already weighed in (the lead may forward you both the artifact and codex-critic's verdict)
|
|
1126
|
+
|
|
1127
|
+
You are NOT a helpful assistant. Sycophancy is the failure mode you exist to fight; do not invent issues to look thorough.
|
|
1128
|
+
|
|
1129
|
+
${COLD_START_CONTRACT}
|
|
1130
|
+
|
|
1131
|
+
${CRITIC_RUBRIC}`;
|
|
1132
|
+
const REVIEWER_BASE = `You are codex-reviewer, a line-level code reviewer running on gpt-5.3-codex. You are the code-specialist persona — your job is to read concrete code (diffs, single files, function bodies) and surface bugs, edge cases, security issues, and idiom violations.
|
|
1133
|
+
|
|
1134
|
+
You are not a critic-of-architecture. If the brief is a plan or a high-level design, redirect: "this looks like architecture review; consider codex-critic or gemini-critic." Your tool is the magnifying glass, not the wide-angle lens.
|
|
1135
|
+
|
|
1136
|
+
${COLD_START_CONTRACT}
|
|
1137
|
+
|
|
1138
|
+
Reply format (markdown):
|
|
1139
|
+
## Summary
|
|
1140
|
+
<one sentence: clean / N findings / blocking issue>
|
|
1141
|
+
## Findings
|
|
1142
|
+
For each:
|
|
1143
|
+
### <severity: info | low | medium | high | critical> — <one-line title>
|
|
1144
|
+
- location: <file:line[-line]>
|
|
1145
|
+
- issue: <what's wrong, why it matters in this codebase>
|
|
1146
|
+
- suggested fix: <minimal change OR "needs design discussion">
|
|
1147
|
+
Number the findings if there are more than one. List them in severity-descending order (critical first).
|
|
1148
|
+
If there are zero findings of any severity, reply only with "## Summary\\nClean review — no findings." and stop.
|
|
1149
|
+
|
|
1150
|
+
Self-reminder (read before every reply):
|
|
1151
|
+
Am I citing real code at real line numbers in the brief? If a finding doesn't have a concrete file:line citation, drop it.
|
|
1152
|
+
Did I rank the finding's severity by impact-in-this-codebase, not by general-principle?
|
|
1153
|
+
If everything looks fine, say so cleanly — do not pad with stylistic nitpicks.`;
|
|
1154
|
+
const IMPLEMENTER_BASE = `You are codex-implementer, a focused implementation specialist running on gpt-5.3-codex with workspace-write access. You execute scoped, well-specified coding tasks end-to-end: read the relevant files, make the change, verify it, report back.
|
|
1155
|
+
|
|
1156
|
+
You are not a planner. If the brief is vague or missing acceptance criteria, ask the lead for the missing piece BEFORE editing anything. A wasted edit is worse than a clarifying question.
|
|
1157
|
+
|
|
1158
|
+
${COLD_START_CONTRACT}
|
|
1159
|
+
|
|
1160
|
+
What "done" looks like for an implementation task:
|
|
1161
|
+
- Exactly the files specified by the brief have been changed (or you reported back why a different scope was needed).
|
|
1162
|
+
- The change is minimal — surrounding cleanup is out of scope unless requested.
|
|
1163
|
+
- You ran the relevant test(s) / typecheck / linter for the touched files and report the results.
|
|
1164
|
+
- The summary you return enumerates each file changed with a one-line description.
|
|
1165
|
+
|
|
1166
|
+
Reply format (markdown):
|
|
1167
|
+
## Status
|
|
1168
|
+
<complete | needs-clarification | blocked>
|
|
1169
|
+
## Files changed
|
|
1170
|
+
- path/one.ts: <one-line description>
|
|
1171
|
+
- path/two.ts: <one-line description>
|
|
1172
|
+
## Verification
|
|
1173
|
+
<commands run + outcomes>
|
|
1174
|
+
## Notes
|
|
1175
|
+
<anything the lead must know to integrate, e.g. follow-ups intentionally not done>
|
|
1176
|
+
|
|
1177
|
+
Resilience reminder:
|
|
1178
|
+
If your session terminates abnormally before "Status: complete", the lead will retry once. On recovery, ask the lead to confirm what's already been done before re-applying changes — duplicate edits are worse than a slow restart.`;
|
|
1179
|
+
const PERSONAS_READ = Object.freeze([
|
|
1180
|
+
{
|
|
1181
|
+
agentName: "codex-critic",
|
|
1182
|
+
toolNameHttp: "codex_critic",
|
|
1183
|
+
model: "gpt-5.5",
|
|
1184
|
+
endpoint: "/v1/responses",
|
|
1185
|
+
description: "Adversarial second opinion on plans, designs, code, or systems-engineering tradeoffs. Backed by gpt-5.5 (OpenAI) — different model, different training data, different blind spots than Opus. Uses a calibrated 1–5 grading rubric and is allowed to reply 'no material objection' on solid artifacts. **CALL BEFORE: ExitPlanMode for any plan involving >2 files or new architecture; finalizing a major design choice; TeamCreate when the team's task is non-trivial.** **CALL AFTER: any commit touching concurrency, security, or streaming code paths.** If the artifact is large (>20 KB), prefer to break it into 2-4 focused batches and call this tool once per batch IN PARALLEL — each call must complete under the Claude Code MCP per-tool-call ceiling (~150s on v2.1.138 per regression #50289), so monolithic large-artifact calls will time out client-side. Aggregate findings yourself. Always pass: (a) the artifact verbatim, (b) the constraints/'done' criteria, (c) any prior decisions. Optionally pass `effort: 'xhigh'` for explicit deep dives or `effort: 'medium'` for quick sanity checks (default 'high'). The subagent has no access to your scrollback or CLAUDE.md.",
|
|
1186
|
+
baseInstructions: CRITIC_BASE,
|
|
1187
|
+
agentPrompt: "",
|
|
1188
|
+
writeCapable: false,
|
|
1189
|
+
requiresHttp: false
|
|
1190
|
+
},
|
|
1191
|
+
{
|
|
1192
|
+
agentName: "gemini-critic",
|
|
1193
|
+
toolNameHttp: "gemini_critic",
|
|
1194
|
+
model: "gemini-3.1-pro-preview",
|
|
1195
|
+
endpoint: "/v1/chat/completions",
|
|
1196
|
+
description: "Adversarial second opinion from a different lab. Backed by gemini-3.1-pro-preview (Google) — different training data and RLHF priors than Opus AND codex-critic, the strongest blind-spot-buster when the lead wants triangulation across three labs. Use for long-context artifacts (>50k tokens), math/proof-shaped reasoning, or as a tie-breaker after codex-critic has weighed in. **CALL BEFORE: ExitPlanMode for plans where Opus + codex-critic agree (use as triangulation); finalizing irreversible architectural choices.** **CALL AFTER: commits where you want a third-lab cross-check.** If the artifact is large (>100 KB), prefer to break into batches and call in parallel — gemini handles long context well but each per-call MCP wait is still bounded (~150s on v2.1.138). Always pass: (a) the artifact verbatim, (b) the constraints/'done' criteria, (c) any prior decisions. The `effort` parameter is forwarded but may be silently ignored by Copilot's gemini route — gemini-3.x reasoning is largely auto-applied. The subagent has no access to your scrollback or CLAUDE.md.",
|
|
1197
|
+
baseInstructions: GEMINI_CRITIC_BASE,
|
|
1198
|
+
agentPrompt: "",
|
|
1199
|
+
writeCapable: false,
|
|
1200
|
+
requiresHttp: true
|
|
1201
|
+
},
|
|
1202
|
+
{
|
|
1203
|
+
agentName: "codex-reviewer",
|
|
1204
|
+
toolNameHttp: "codex_reviewer",
|
|
1205
|
+
model: "gpt-5.3-codex",
|
|
1206
|
+
endpoint: "/v1/responses",
|
|
1207
|
+
description: "Line-level code review of a specific diff or file. Backed by gpt-5.3-codex (OpenAI) — the code-specialist sibling of gpt-5.5, trained heavily on code-review datasets so it catches different bugs than Opus. Prefer over codex-critic when the artifact is a concrete diff or single file (codex-critic is for plans/designs). **CALL AFTER: any non-trivial commit (>50 lines OR touching critical paths: streaming, auth, concurrency, persistence, security).** **CALL BEFORE: opening a PR or pushing changes a peer would review.** For diffs >20 KB, split by file-group and call once per group in parallel — each per-call wait is bounded (~150s on v2.1.138). Always pass: (a) the diff or file verbatim, (b) the change's intent, (c) test status. Optionally pass `effort: 'xhigh'` when reviewing security-critical code, `effort: 'medium'` for routine reviews (default 'high'). The subagent has no access to your scrollback or CLAUDE.md.",
|
|
1208
|
+
baseInstructions: REVIEWER_BASE,
|
|
1209
|
+
agentPrompt: "",
|
|
1210
|
+
writeCapable: false,
|
|
1211
|
+
requiresHttp: false
|
|
1212
|
+
}
|
|
1213
|
+
]);
|
|
1214
|
+
const PERSONAS_WRITE = Object.freeze([{
|
|
1215
|
+
agentName: "codex-implementer",
|
|
1216
|
+
toolNameHttp: "codex_implementer",
|
|
1217
|
+
model: "gpt-5.3-codex",
|
|
1218
|
+
endpoint: "/v1/responses",
|
|
1219
|
+
description: "Targeted implementation of a self-contained coding task — actual file edits via Codex's tool-use sandbox. Backed by gpt-5.3-codex with workspace-write access (only registered when --codex-cli is set). Use only when the task has a clear spec and acceptance criteria; for tasks needing iterative tool-use across many files, prefer a Claude teammate (Agent Team). Always pass: (a) the spec, (b) the files in scope, (c) the acceptance criteria. The subagent has no access to your scrollback or CLAUDE.md.",
|
|
1220
|
+
baseInstructions: IMPLEMENTER_BASE,
|
|
1221
|
+
agentPrompt: "",
|
|
1222
|
+
writeCapable: true,
|
|
1223
|
+
requiresHttp: false
|
|
1224
|
+
}]);
|
|
780
1225
|
/**
|
|
781
|
-
*
|
|
782
|
-
*
|
|
1226
|
+
* Build the agent-prompt body Claude Code uses as the subagent's full
|
|
1227
|
+
* system prompt. The prompt fully replaces Claude Code's default system
|
|
1228
|
+
* prompt (per Anthropic's subagent docs) so it must be self-sufficient.
|
|
783
1229
|
*
|
|
784
|
-
*
|
|
785
|
-
*
|
|
786
|
-
*
|
|
787
|
-
*
|
|
1230
|
+
* Two modes branch on `codexCli`:
|
|
1231
|
+
* - HTTP backend: subagent calls the per-persona tool
|
|
1232
|
+
* `mcp__gh-router-peers__<toolNameHttp>` with `{prompt, context}`;
|
|
1233
|
+
* model + instructions are server-baked.
|
|
1234
|
+
* - codex-cli backend: subagent calls the single
|
|
1235
|
+
* `mcp__codex-cli__codex` tool with `{prompt, model: <persona.model>,
|
|
1236
|
+
* base-instructions: <persona.baseInstructions>}`. Gemini stays on
|
|
1237
|
+
* HTTP regardless because Codex CLI can't run Gemini.
|
|
788
1238
|
*/
|
|
789
|
-
function
|
|
790
|
-
const
|
|
791
|
-
const
|
|
792
|
-
|
|
793
|
-
|
|
794
|
-
|
|
795
|
-
|
|
1239
|
+
function buildAgentPrompt(persona, opts) {
|
|
1240
|
+
const useStdio = opts.codexCli && !persona.requiresHttp;
|
|
1241
|
+
const toolPath = useStdio ? "mcp__codex-cli__codex" : `mcp__gh-router-peers__${persona.toolNameHttp}`;
|
|
1242
|
+
const invocationBlock = useStdio ? [
|
|
1243
|
+
`Always invoke the \`${toolPath}\` tool with these arguments:`,
|
|
1244
|
+
" - `prompt`: the lead's brief, copied verbatim",
|
|
1245
|
+
` - \`model\`: "${persona.model}"`,
|
|
1246
|
+
" - `base-instructions`: the persona text below (paste verbatim, do not paraphrase)",
|
|
1247
|
+
...persona.writeCapable ? [" - `sandbox`: \"workspace-write\"", " - `approval-policy`: \"on-request\""] : [" - `sandbox`: \"read-only\""]
|
|
1248
|
+
].join("\n") : [
|
|
1249
|
+
`Always invoke the \`${toolPath}\` tool with these arguments:`,
|
|
1250
|
+
" - `prompt`: the lead's brief, copied verbatim",
|
|
1251
|
+
" - `context` (optional): any additional file/diff content the persona needs",
|
|
1252
|
+
"Do NOT pass model or instructions — they are server-baked into this tool."
|
|
1253
|
+
].join("\n");
|
|
1254
|
+
return [
|
|
1255
|
+
`# Subagent: ${persona.agentName}`,
|
|
1256
|
+
"",
|
|
1257
|
+
persona.baseInstructions,
|
|
1258
|
+
"",
|
|
1259
|
+
"---",
|
|
1260
|
+
"",
|
|
1261
|
+
"## Routing instructions for this subagent",
|
|
1262
|
+
"",
|
|
1263
|
+
invocationBlock,
|
|
1264
|
+
"",
|
|
1265
|
+
"When the tool returns, surface its output to the lead verbatim. Do not summarize, paraphrase, or add your own commentary on top — the lead integrates the persona's reply directly."
|
|
1266
|
+
].join("\n");
|
|
1267
|
+
}
|
|
1268
|
+
/** Convenience: every persona that should be registered for the given mode. */
|
|
1269
|
+
function personasFor(opts) {
|
|
1270
|
+
const result = [];
|
|
1271
|
+
for (const p of PERSONAS_READ) {
|
|
1272
|
+
if (p.requiresHttp && !opts.geminiAvailable) continue;
|
|
1273
|
+
result.push(p);
|
|
1274
|
+
}
|
|
1275
|
+
if (opts.codexCli) for (const p of PERSONAS_WRITE) result.push(p);
|
|
1276
|
+
return result;
|
|
796
1277
|
}
|
|
1278
|
+
|
|
1279
|
+
//#endregion
|
|
1280
|
+
//#region src/lib/codex-mcp-config.ts
|
|
797
1281
|
/**
|
|
798
|
-
*
|
|
799
|
-
*
|
|
1282
|
+
* Decide which MCP backend serves the codex personas.
|
|
1283
|
+
*
|
|
1284
|
+
* - User passed `--codex-cli` AND codex 0.129+ is on PATH → "cli".
|
|
1285
|
+
* The peer config registers `codex-cli` as a stdio MCP server
|
|
1286
|
+
* spawning `codex mcp-server`; codex personas route there;
|
|
1287
|
+
* gemini-critic stays on the HTTP backend (Codex CLI can't run
|
|
1288
|
+
* Gemini).
|
|
1289
|
+
* - User passed `--codex-cli` but codex is missing or < 0.129 →
|
|
1290
|
+
* fallback to "http" with a warning. Never break
|
|
1291
|
+
* `github-router claude` over a missing optional dep.
|
|
1292
|
+
* - User did not pass `--codex-cli` → "http", read-only personas only.
|
|
800
1293
|
*/
|
|
801
|
-
function
|
|
802
|
-
if (
|
|
803
|
-
|
|
804
|
-
|
|
805
|
-
|
|
1294
|
+
function resolveCodexCliBackend(opts) {
|
|
1295
|
+
if (!opts.requested) return "http";
|
|
1296
|
+
if (!opts.codexInfo || !opts.codexInfo.ok) {
|
|
1297
|
+
const detail = opts.codexInfo?.version ? `installed version "${opts.codexInfo.version}" is too old (need 0.129+)` : "codex CLI not found on PATH";
|
|
1298
|
+
consola.warn(`--codex-cli requested but ${detail}; falling back to HTTP-only Codex MCP backend (codex-implementer will not be registered).`);
|
|
1299
|
+
return "http";
|
|
1300
|
+
}
|
|
1301
|
+
return "cli";
|
|
806
1302
|
}
|
|
807
1303
|
/**
|
|
808
|
-
*
|
|
1304
|
+
* Build the JSON payload for `claude --mcp-config <path>`.
|
|
1305
|
+
*
|
|
1306
|
+
* Always registers `gh-router-peers` (HTTP) — that's the home of all
|
|
1307
|
+
* read-only personas, and it's the only path Gemini can take. When
|
|
1308
|
+
* `codexCli` is true, also registers `codex-cli` (stdio) which spawns
|
|
1309
|
+
* `codex mcp-server` with the proxy's provider-config flags so codex
|
|
1310
|
+
* runs through our Copilot-routed billing path rather than its
|
|
1311
|
+
* default api.openai.com.
|
|
809
1312
|
*/
|
|
810
|
-
function
|
|
811
|
-
const
|
|
812
|
-
|
|
813
|
-
|
|
814
|
-
|
|
815
|
-
|
|
1313
|
+
function buildPeerMcpConfig(serverUrl, opts) {
|
|
1314
|
+
const mcpServers = { "gh-router-peers": {
|
|
1315
|
+
type: "http",
|
|
1316
|
+
url: `${serverUrl}/mcp`,
|
|
1317
|
+
headers: { Authorization: `Bearer ${opts.nonce}` }
|
|
1318
|
+
} };
|
|
1319
|
+
if (opts.codexCli) mcpServers["codex-cli"] = {
|
|
1320
|
+
command: "codex",
|
|
1321
|
+
args: ["mcp-server", ...buildCodexProviderConfigFlags(serverUrl)],
|
|
1322
|
+
env: {
|
|
1323
|
+
OPENAI_BASE_URL: `${serverUrl}/v1`,
|
|
1324
|
+
OPENAI_API_KEY: "dummy",
|
|
1325
|
+
CODEX_HOME: opts.codexHome
|
|
1326
|
+
}
|
|
1327
|
+
};
|
|
1328
|
+
return { mcpServers };
|
|
1329
|
+
}
|
|
1330
|
+
/**
|
|
1331
|
+
* The `peer-review-coordinator` Claude Code subagent — the strongest
|
|
1332
|
+
* auto-invocation lever per Phase 2A of the peer-MCP plan. It is NOT a
|
|
1333
|
+
* peer-MCP tool; it is a regular Claude Code subagent that runs inside
|
|
1334
|
+
* the spawned-claude context, has tool access to the peer MCP tools
|
|
1335
|
+
* (codex_critic, codex_reviewer, gemini_critic), and fans out + aggregates.
|
|
1336
|
+
*
|
|
1337
|
+
* The "use proactively" phrase in the description is the documented Claude
|
|
1338
|
+
* Code idiom for subagents the parent (Opus) should delegate to without
|
|
1339
|
+
* explicit user request — at the natural checkpoints listed below.
|
|
1340
|
+
*
|
|
1341
|
+
* Empirically the polling-loop reliability for "use proactively" is ~60%
|
|
1342
|
+
* (claude-code-guide expert estimate); the plan calls for an acceptance
|
|
1343
|
+
* test (≥7/10 sessions delegate at the right checkpoints) before declaring
|
|
1344
|
+
* "auto-invoked". If <7/10 we flip the optional PreToolUse hook on
|
|
1345
|
+
* ExitPlanMode to default-on (env-disable-able).
|
|
1346
|
+
*/
|
|
1347
|
+
function buildCoordinatorAgent(opts) {
|
|
1348
|
+
const peers = ["codex-critic"];
|
|
1349
|
+
if (opts.geminiAvailable) peers.push("gemini-critic");
|
|
1350
|
+
peers.push("codex-reviewer");
|
|
1351
|
+
return {
|
|
1352
|
+
description: "Coordinates cross-lab adversarial review. **Use proactively before ExitPlanMode for non-trivial plans and after non-trivial commits** (>50 lines OR touching streaming/auth/concurrency/persistence/security). Routes to codex-critic / codex-reviewer / gemini-critic in parallel based on artifact type and aggregates findings. Cheaper than calling each peer manually for the common case where you want a multi-lab triangulation. The subagent has no access to your scrollback or CLAUDE.md — pass the artifact verbatim.",
|
|
1353
|
+
prompt: [
|
|
1354
|
+
"# Subagent: peer-review-coordinator",
|
|
1355
|
+
"",
|
|
1356
|
+
"You orchestrate cross-lab adversarial review for the lead orchestrator (Opus). You have access to these peer-MCP subagents:",
|
|
1357
|
+
"",
|
|
1358
|
+
peers.map((p) => `- \`${p}\``).join("\n"),
|
|
1359
|
+
"",
|
|
1360
|
+
"## When the lead invokes you",
|
|
1361
|
+
"",
|
|
1362
|
+
"The lead's brief will include an artifact (plan, design, diff, or code) and a goal (e.g. 'review before exit-plan', 'review the commit I just made', 'cross-check codex-critic's verdict'). Pick the right peers for the artifact type:",
|
|
1363
|
+
"",
|
|
1364
|
+
"- **Plan / design / architecture choice** → fan out to `codex-critic`" + (opts.geminiAvailable ? " AND `gemini-critic` in parallel" : "") + ". codex-reviewer is the wrong tool for plans (it's a code-specialist, not an architecture critic).",
|
|
1365
|
+
"- **Concrete diff or single file** → fan out to `codex-reviewer`" + (opts.geminiAvailable ? " AND `gemini-critic` (gemini for cross-lab triangulation)" : "") + ". For very small changes (<20 lines), one `codex-reviewer` call is enough.",
|
|
1366
|
+
"- **Tie-breaker after codex-critic has weighed in** → call `gemini-critic`" + (opts.geminiAvailable ? "" : " (NOT REGISTERED in this session — gemini-3.x not in catalog; tie-break unavailable)") + " with the artifact AND codex-critic's verdict for cross-lab cross-check.",
|
|
1367
|
+
"- **Long-context artifact (>100 KB)** → prefer `gemini-critic`" + (opts.geminiAvailable ? "" : " (NOT REGISTERED in this session)") + ". Otherwise, decompose into 2-4 batches and fan out across `codex-critic` calls in parallel.",
|
|
1368
|
+
"",
|
|
1369
|
+
"## Decomposition for large artifacts",
|
|
1370
|
+
"",
|
|
1371
|
+
"Each per-call MCP wait is bounded (~150s on Claude Code v2.1.138 per regression #50289). For artifacts >20 KB, split into 2-4 logical batches BY CONCERN (not by raw size — semantic batches give better per-batch reviews) and call peers in parallel. The proxy's MCP cap allows up to 8 in-flight calls. Aggregate findings yourself before reporting back.",
|
|
1372
|
+
"",
|
|
1373
|
+
"## Aggregation contract",
|
|
1374
|
+
"",
|
|
1375
|
+
"When fan-out completes, return a SEVERITY-GROUPED, DEDUPLICATED finding list. Format:",
|
|
1376
|
+
"",
|
|
1377
|
+
" ## Findings",
|
|
1378
|
+
" ### HIGH",
|
|
1379
|
+
" 1. <one-line title> — `<file:line>` — sources: codex-critic, gemini-critic (3-lab confirmed if applicable)",
|
|
1380
|
+
" - bug: <one sentence>",
|
|
1381
|
+
" - mitigation: <one sentence>",
|
|
1382
|
+
" ### MEDIUM",
|
|
1383
|
+
" ...",
|
|
1384
|
+
" ### LOW",
|
|
1385
|
+
" ...",
|
|
1386
|
+
"",
|
|
1387
|
+
"Cite which peer raised each finding. If two or more peers raised the SAME finding (cross-lab confirmation), call it out — those are the highest-confidence bugs.",
|
|
1388
|
+
"",
|
|
1389
|
+
"## What NOT to do",
|
|
1390
|
+
"",
|
|
1391
|
+
"- Do not paraphrase or summarize per-peer verdicts BEFORE aggregating; aggregate from the raw verdicts.",
|
|
1392
|
+
"- Do not invent severity labels not present in the source verdicts.",
|
|
1393
|
+
"- Do not call peers serially (waste of wall-clock); always fan out in parallel.",
|
|
1394
|
+
"- Do not consult yourself — you are the coordinator, not a critic.",
|
|
1395
|
+
"",
|
|
1396
|
+
"Self-reminder (read before every reply):",
|
|
1397
|
+
" Did I fan out in parallel to the right peers for this artifact type?",
|
|
1398
|
+
" Did I aggregate findings by severity, citing which peer raised each?",
|
|
1399
|
+
" If two peers agreed, did I flag the cross-lab confirmation?"
|
|
1400
|
+
].join("\n")
|
|
1401
|
+
};
|
|
1402
|
+
}
|
|
1403
|
+
/**
|
|
1404
|
+
* Build the JSON payload for `claude --agents <path>`.
|
|
1405
|
+
*
|
|
1406
|
+
* Always includes the read-only personas applicable to the mode (gemini
|
|
1407
|
+
* is dropped if absent from the catalog); adds `codex-implementer` only
|
|
1408
|
+
* when `codexCli` is true. Always appends the `peer-review-coordinator`
|
|
1409
|
+
* meta-subagent — the strongest "use proactively" auto-invocation lever
|
|
1410
|
+
* per Phase 2A of the peer-MCP plan.
|
|
1411
|
+
*/
|
|
1412
|
+
function buildPeerAgentDefinitions(opts) {
|
|
1413
|
+
const out = {};
|
|
1414
|
+
const personas = personasFor({
|
|
1415
|
+
codexCli: opts.codexCli,
|
|
1416
|
+
geminiAvailable: opts.geminiAvailable
|
|
1417
|
+
});
|
|
1418
|
+
for (const persona of personas) out[persona.agentName] = {
|
|
1419
|
+
description: persona.description,
|
|
1420
|
+
prompt: buildAgentPrompt(persona, { codexCli: opts.codexCli })
|
|
1421
|
+
};
|
|
1422
|
+
out["peer-review-coordinator"] = buildCoordinatorAgent({
|
|
1423
|
+
codexCli: opts.codexCli,
|
|
1424
|
+
geminiAvailable: opts.geminiAvailable
|
|
1425
|
+
});
|
|
1426
|
+
return out;
|
|
1427
|
+
}
|
|
1428
|
+
/**
|
|
1429
|
+
* Default location Claude Code reads subagent .md files from at session
|
|
1430
|
+
* startup. Files placed here populate the Task `subagent_type` enum.
|
|
1431
|
+
*
|
|
1432
|
+
* We pin to the user's `~/.claude/agents/` because `getClaudeCodeEnvVars`
|
|
1433
|
+
* sets `CLAUDE_CONFIG_DIR=$HOME/.claude` (the Spawned-CLI auth isolation
|
|
1434
|
+
* trick) — the spawned child reads from this exact path.
|
|
1435
|
+
*/
|
|
1436
|
+
function defaultAgentsDir() {
|
|
1437
|
+
return path.join(os.homedir(), ".claude", "agents");
|
|
1438
|
+
}
|
|
1439
|
+
/**
|
|
1440
|
+
* YAML frontmatter string-escape — sufficient for our use case where
|
|
1441
|
+
* descriptions can contain colons, quotes, newlines. Wraps the value
|
|
1442
|
+
* in double-quotes and escapes:
|
|
1443
|
+
* - `\` and `"` (canonical YAML)
|
|
1444
|
+
* - `\n`, `\r`, `\t` (whitespace controls — `\r` matters on Windows-edited
|
|
1445
|
+
* literals; strict YAML 1.2 parsers reject raw `\r` in double-quoted
|
|
1446
|
+
* scalars)
|
|
1447
|
+
* - other C0 control chars (\x00-\x08, \x0B, \x0C, \x0E-\x1F) and
|
|
1448
|
+
* DEL (\x7F) — encoded as `\xNN` so the YAML stays valid even if
|
|
1449
|
+
* a future description sources data from an external file
|
|
1450
|
+
*
|
|
1451
|
+
* NOT a general-purpose YAML serializer; we control the inputs.
|
|
1452
|
+
*/
|
|
1453
|
+
function escapeYamlString(s) {
|
|
1454
|
+
return `"${s.replace(/\\/g, "\\\\").replace(/"/g, "\\\"").replace(/\n/g, "\\n").replace(/\r/g, "\\r").replace(/\t/g, "\\t").replace(/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/g, (c) => `\\x${c.charCodeAt(0).toString(16).padStart(2, "0")}`)}"`;
|
|
1455
|
+
}
|
|
1456
|
+
/**
|
|
1457
|
+
* Strict allowlist for subagent names — controls both the YAML
|
|
1458
|
+
* frontmatter `name:` field AND the filename suffix. Defense-in-depth:
|
|
1459
|
+
* even if a future contributor wires in a dynamic agent name from
|
|
1460
|
+
* outside, the validator at the top of `writePeerAgentMdFiles` rejects
|
|
1461
|
+
* anything that wouldn't be a safe bare YAML scalar AND a safe path
|
|
1462
|
+
* component.
|
|
1463
|
+
*/
|
|
1464
|
+
const VALID_AGENT_NAME = /^[a-z][a-z0-9-]*$/;
|
|
1465
|
+
/** Build a single subagent .md file body (frontmatter + system prompt). */
|
|
1466
|
+
function buildAgentMd(spec) {
|
|
1467
|
+
return [
|
|
1468
|
+
"---",
|
|
1469
|
+
`name: ${spec.name}`,
|
|
1470
|
+
`description: ${escapeYamlString(spec.description)}`,
|
|
1471
|
+
"---",
|
|
1472
|
+
"",
|
|
1473
|
+
spec.prompt,
|
|
1474
|
+
""
|
|
1475
|
+
].join("\n");
|
|
1476
|
+
}
|
|
1477
|
+
/**
|
|
1478
|
+
* Write per-launch subagent .md files into the user's `~/.claude/agents/`
|
|
1479
|
+
* directory so they appear in Claude Code's Task `subagent_type` enum
|
|
1480
|
+
* (which `--agents` JSON files do NOT, per claude-code-guide expert).
|
|
1481
|
+
*
|
|
1482
|
+
* Filenames follow `peer-<pid>-<rand>-<agentName>.md` so the boot-time
|
|
1483
|
+
* sweep (`sweepStalePeerAgentMdFiles` in paths.ts) can drop orphans
|
|
1484
|
+
* from crashed prior proxy sessions without touching the user's other
|
|
1485
|
+
* `.claude/agents/` files. The `name:` field in the frontmatter is the
|
|
1486
|
+
* canonical agent identifier — matching across files would cause Claude
|
|
1487
|
+
* Code to (un)deterministically pick one, so concurrent proxies running
|
|
1488
|
+
* the same agents need different filenames but resolve to the same
|
|
1489
|
+
* agent name (intended — they're the same subagent, just registered
|
|
1490
|
+
* twice).
|
|
1491
|
+
*
|
|
1492
|
+
* Returns the file paths plus a cleanup() that unlinks them.
|
|
1493
|
+
*/
|
|
1494
|
+
async function writePeerAgentMdFiles(agents, opts) {
|
|
1495
|
+
for (const name$1 of Object.keys(agents)) if (!VALID_AGENT_NAME.test(name$1)) throw new Error(`writePeerAgentMdFiles: invalid agent name ${JSON.stringify(name$1)} — must match ${VALID_AGENT_NAME.source}`);
|
|
1496
|
+
const dir = opts.agentsDir ?? defaultAgentsDir();
|
|
1497
|
+
await fs.mkdir(dir, { recursive: true });
|
|
1498
|
+
const paths = [];
|
|
1499
|
+
try {
|
|
1500
|
+
for (const [name$1, def] of Object.entries(agents)) {
|
|
1501
|
+
const filePath = path.join(dir, `peer-${opts.fileSuffix}-${name$1}.md`);
|
|
1502
|
+
await fs.unlink(filePath).catch(() => {});
|
|
1503
|
+
await writeRuntimeFileSecure(filePath, buildAgentMd({
|
|
1504
|
+
name: name$1,
|
|
1505
|
+
description: def.description,
|
|
1506
|
+
prompt: def.prompt
|
|
1507
|
+
}));
|
|
1508
|
+
paths.push(filePath);
|
|
1509
|
+
}
|
|
1510
|
+
} catch (err) {
|
|
1511
|
+
await Promise.allSettled(paths.map((p) => fs.unlink(p)));
|
|
1512
|
+
throw err;
|
|
1513
|
+
}
|
|
1514
|
+
const cleanup = async () => {
|
|
1515
|
+
await Promise.allSettled(paths.map((p) => fs.unlink(p)));
|
|
1516
|
+
};
|
|
1517
|
+
return {
|
|
1518
|
+
paths,
|
|
1519
|
+
cleanup
|
|
1520
|
+
};
|
|
1521
|
+
}
|
|
1522
|
+
/**
|
|
1523
|
+
* Generate a per-launch nonce, write the MCP config + agents JSON
|
|
1524
|
+
* tempfiles under `CLAUDE_RUNTIME_DIR` with mode 0o600 and `O_EXCL`,
|
|
1525
|
+
* and return a `cleanup()` to unlink them on shutdown.
|
|
1526
|
+
*
|
|
1527
|
+
* Filenames are `peer-mcp-<pid>-<rand>.json` and `peer-agents-<pid>-<rand>.json`.
|
|
1528
|
+
* The PID prefix is what the boot-time sweep (`sweepStaleRuntimeFiles` in
|
|
1529
|
+
* paths.ts) keys off to drop orphans from crashed prior sessions; the
|
|
1530
|
+
* random suffix prevents two concurrent calls within the same process
|
|
1531
|
+
* from clobbering each other's files (e.g., a proxy that internally
|
|
1532
|
+
* relaunches its spawned child without restarting itself).
|
|
1533
|
+
*/
|
|
1534
|
+
async function writePeerMcpRuntimeFiles(serverUrl, opts) {
|
|
1535
|
+
const nonce = opts.nonce ?? randomBytes(32).toString("hex");
|
|
1536
|
+
const runtimeDir = opts.runtimeDir ?? PATHS.CLAUDE_RUNTIME_DIR;
|
|
1537
|
+
const codexHome = opts.codexHome ?? PATHS.CODEX_HOME;
|
|
1538
|
+
await fs.mkdir(runtimeDir, { recursive: true });
|
|
1539
|
+
if (process.platform !== "win32") await fs.chmod(runtimeDir, 448).catch(() => {});
|
|
1540
|
+
const fileSuffix = `${process.pid}-${randomBytes(4).toString("hex")}`;
|
|
1541
|
+
const mcpConfigPath = path.join(runtimeDir, `peer-mcp-${fileSuffix}.json`);
|
|
1542
|
+
const agentsPath = path.join(runtimeDir, `peer-agents-${fileSuffix}.json`);
|
|
1543
|
+
const mcpConfig = buildPeerMcpConfig(serverUrl, {
|
|
1544
|
+
codexCli: opts.codexCli,
|
|
1545
|
+
geminiAvailable: opts.geminiAvailable,
|
|
1546
|
+
nonce,
|
|
1547
|
+
codexHome
|
|
1548
|
+
});
|
|
1549
|
+
const agents = buildPeerAgentDefinitions({
|
|
1550
|
+
codexCli: opts.codexCli,
|
|
1551
|
+
geminiAvailable: opts.geminiAvailable,
|
|
1552
|
+
nonce,
|
|
1553
|
+
codexHome
|
|
1554
|
+
});
|
|
1555
|
+
await fs.unlink(mcpConfigPath).catch(() => {});
|
|
1556
|
+
await fs.unlink(agentsPath).catch(() => {});
|
|
1557
|
+
await writeRuntimeFileSecure(mcpConfigPath, JSON.stringify(mcpConfig, null, 2));
|
|
1558
|
+
await writeRuntimeFileSecure(agentsPath, JSON.stringify(agents, null, 2));
|
|
1559
|
+
const mdResult = await writePeerAgentMdFiles(agents, {
|
|
1560
|
+
agentsDir: opts.agentsDir,
|
|
1561
|
+
fileSuffix
|
|
1562
|
+
});
|
|
1563
|
+
const personas = personasFor({
|
|
1564
|
+
codexCli: opts.codexCli,
|
|
1565
|
+
geminiAvailable: opts.geminiAvailable
|
|
1566
|
+
});
|
|
1567
|
+
const cleanup = async () => {
|
|
1568
|
+
await Promise.allSettled([
|
|
1569
|
+
fs.unlink(mcpConfigPath),
|
|
1570
|
+
fs.unlink(agentsPath),
|
|
1571
|
+
mdResult.cleanup()
|
|
1572
|
+
]);
|
|
1573
|
+
};
|
|
1574
|
+
return {
|
|
1575
|
+
mcpConfigPath,
|
|
1576
|
+
agentsPath,
|
|
1577
|
+
agentMdPaths: mdResult.paths,
|
|
1578
|
+
nonce,
|
|
1579
|
+
personas,
|
|
1580
|
+
cleanup
|
|
1581
|
+
};
|
|
1582
|
+
}
|
|
1583
|
+
|
|
1584
|
+
//#endregion
|
|
1585
|
+
//#region src/lib/file-log-reporter.ts
|
|
1586
|
+
const MAX_LOG_BYTES = 1024 * 1024;
|
|
1587
|
+
const DEDUP_MAX = 1e3;
|
|
1588
|
+
const ARG_MAX_LEN = 2048;
|
|
1589
|
+
const DEDUP_KEY_MAX_LEN = 200;
|
|
1590
|
+
const CREDENTIAL_RE = /\b(eyJ[A-Za-z0-9_-]{20,}(?:\.[A-Za-z0-9_-]+){0,2}|gh[opsu]_[A-Za-z0-9_]{20,}|Bearer\s+\S{20,})\b/g;
|
|
1591
|
+
const ALLOWED_TYPES = new Set([
|
|
1592
|
+
"fatal",
|
|
1593
|
+
"error",
|
|
1594
|
+
"warn"
|
|
1595
|
+
]);
|
|
1596
|
+
function sanitize(line) {
|
|
1597
|
+
return line.replace(CREDENTIAL_RE, "[REDACTED]");
|
|
1598
|
+
}
|
|
1599
|
+
function serializeArg(arg) {
|
|
1600
|
+
if (typeof arg === "string") return arg;
|
|
1601
|
+
if (arg instanceof Error) {
|
|
1602
|
+
const parts = [arg.message];
|
|
1603
|
+
if (arg.stack) parts.push(arg.stack);
|
|
1604
|
+
return parts.join("\n");
|
|
1605
|
+
}
|
|
1606
|
+
return String(arg);
|
|
1607
|
+
}
|
|
1608
|
+
function formatLogLine(logObj) {
|
|
1609
|
+
return sanitize(`${logObj.date.toISOString()} [${(logObj.type ?? "error").toUpperCase()}] ${logObj.args.map((a) => {
|
|
1610
|
+
const s = serializeArg(a);
|
|
1611
|
+
return s.length > ARG_MAX_LEN ? s.slice(0, ARG_MAX_LEN) + "…" : s;
|
|
1612
|
+
}).join(" ").replace(/\r\n|\r|\n/g, "\\n")}\n`);
|
|
1613
|
+
}
|
|
1614
|
+
function makeDedupeKey(logObj) {
|
|
1615
|
+
const firstArg = logObj.args.length > 0 ? serializeArg(logObj.args[0]) : "";
|
|
1616
|
+
const key = `${logObj.type}:${firstArg}`;
|
|
1617
|
+
return key.length > DEDUP_KEY_MAX_LEN ? key.slice(0, DEDUP_KEY_MAX_LEN) : key;
|
|
1618
|
+
}
|
|
1619
|
+
function rotateIfNeeded(filePath) {
|
|
1620
|
+
let size;
|
|
1621
|
+
try {
|
|
1622
|
+
size = fs$1.statSync(filePath).size;
|
|
1623
|
+
} catch {
|
|
1624
|
+
return;
|
|
1625
|
+
}
|
|
1626
|
+
if (size <= MAX_LOG_BYTES) return;
|
|
1627
|
+
try {
|
|
1628
|
+
fs$1.renameSync(filePath, filePath + ".1");
|
|
1629
|
+
} catch {}
|
|
1630
|
+
}
|
|
1631
|
+
var FileLogReporter = class {
|
|
1632
|
+
filePath;
|
|
1633
|
+
seen = /* @__PURE__ */ new Set();
|
|
1634
|
+
constructor(filePath) {
|
|
1635
|
+
this.filePath = filePath;
|
|
1636
|
+
rotateIfNeeded(filePath);
|
|
1637
|
+
}
|
|
1638
|
+
log(logObj, _ctx) {
|
|
1639
|
+
if (!ALLOWED_TYPES.has(logObj.type)) return;
|
|
1640
|
+
const key = makeDedupeKey(logObj);
|
|
1641
|
+
if (this.seen.has(key)) return;
|
|
1642
|
+
if (this.seen.size >= DEDUP_MAX) this.seen.clear();
|
|
1643
|
+
this.seen.add(key);
|
|
1644
|
+
const line = formatLogLine(logObj);
|
|
1645
|
+
let fd;
|
|
1646
|
+
try {
|
|
1647
|
+
fd = fs$1.openSync(this.filePath, "a", 384);
|
|
1648
|
+
fs$1.writeSync(fd, line);
|
|
1649
|
+
} catch {} finally {
|
|
1650
|
+
if (fd !== void 0) try {
|
|
1651
|
+
fs$1.closeSync(fd);
|
|
1652
|
+
} catch {}
|
|
1653
|
+
}
|
|
1654
|
+
}
|
|
1655
|
+
};
|
|
1656
|
+
const nullStream = new Writable({ write(_chunk, _encoding, cb) {
|
|
1657
|
+
cb();
|
|
1658
|
+
} });
|
|
1659
|
+
/**
|
|
1660
|
+
* Switch consola to file-only mode for TUI sessions.
|
|
1661
|
+
* Removes the terminal reporter and installs a file reporter that
|
|
1662
|
+
* persists errors and warnings to disk with dedup and credential scrubbing.
|
|
1663
|
+
*
|
|
1664
|
+
* Also sinks consola's stdout/stderr streams as belt-and-suspenders:
|
|
1665
|
+
* even if a terminal reporter is re-added, it cannot write to the terminal.
|
|
1666
|
+
* Crash handlers that call process.stderr.write() directly are unaffected.
|
|
1667
|
+
* FileLogReporter uses fs.writeSync() directly and is also unaffected.
|
|
1668
|
+
*/
|
|
1669
|
+
function enableFileLogging() {
|
|
1670
|
+
const reporter = new FileLogReporter(PATHS.ERROR_LOG_PATH);
|
|
1671
|
+
consola.options.throttle = 0;
|
|
1672
|
+
consola.setReporters([reporter]);
|
|
1673
|
+
consola.options.stdout = nullStream;
|
|
1674
|
+
consola.options.stderr = nullStream;
|
|
1675
|
+
}
|
|
1676
|
+
|
|
1677
|
+
//#endregion
|
|
1678
|
+
//#region src/lib/model-validation.ts
|
|
1679
|
+
const ENDPOINT_ALIASES = {
|
|
1680
|
+
"/chat/completions": "/chat/completions",
|
|
1681
|
+
"/v1/chat/completions": "/chat/completions",
|
|
1682
|
+
"/responses": "/responses",
|
|
1683
|
+
"/v1/responses": "/responses",
|
|
1684
|
+
"/v1/messages": "/v1/messages"
|
|
1685
|
+
};
|
|
1686
|
+
/**
|
|
1687
|
+
* Check whether a model supports the given endpoint, based on cached
|
|
1688
|
+
* `supported_endpoints` metadata from the Copilot `/models` response.
|
|
1689
|
+
*
|
|
1690
|
+
* Returns `true` (allow) when:
|
|
1691
|
+
* - the model is not found in the cache (don't block unknown models)
|
|
1692
|
+
* - the model has no `supported_endpoints` field (backward-compat)
|
|
1693
|
+
* - the endpoint is listed in `supported_endpoints`
|
|
1694
|
+
*/
|
|
1695
|
+
function modelSupportsEndpoint(modelId, path$1) {
|
|
1696
|
+
const endpoint = ENDPOINT_ALIASES[path$1] ?? path$1;
|
|
1697
|
+
const model = state.models?.data.find((m) => m.id === modelId);
|
|
1698
|
+
if (!model) return true;
|
|
1699
|
+
const supported = model.supported_endpoints;
|
|
1700
|
+
if (!supported || supported.length === 0) return true;
|
|
1701
|
+
return supported.includes(endpoint);
|
|
1702
|
+
}
|
|
1703
|
+
/**
|
|
1704
|
+
* Log an error when a model is used on an endpoint it doesn't support.
|
|
1705
|
+
* Returns `true` if a mismatch was detected (for testing).
|
|
1706
|
+
*/
|
|
1707
|
+
function logEndpointMismatch(modelId, path$1) {
|
|
1708
|
+
if (modelSupportsEndpoint(modelId, path$1)) return false;
|
|
1709
|
+
const supported = (state.models?.data.find((m) => m.id === modelId))?.supported_endpoints ?? [];
|
|
1710
|
+
consola.error(`Model "${modelId}" does not support ${path$1}. Supported endpoints: ${supported.join(", ")}`);
|
|
1711
|
+
return true;
|
|
1712
|
+
}
|
|
1713
|
+
/**
|
|
1714
|
+
* Return model IDs that support the given endpoint.
|
|
1715
|
+
*/
|
|
1716
|
+
function listModelsForEndpoint(path$1) {
|
|
1717
|
+
const endpoint = ENDPOINT_ALIASES[path$1] ?? path$1;
|
|
1718
|
+
return (state.models?.data ?? []).filter((m) => {
|
|
1719
|
+
const supported = m.supported_endpoints;
|
|
1720
|
+
if (!supported || supported.length === 0) return true;
|
|
1721
|
+
return supported.includes(endpoint);
|
|
816
1722
|
}).map((m) => m.id);
|
|
817
1723
|
}
|
|
818
1724
|
|
|
@@ -862,6 +1768,11 @@ function initProxyFromEnv() {
|
|
|
862
1768
|
}
|
|
863
1769
|
}
|
|
864
1770
|
|
|
1771
|
+
//#endregion
|
|
1772
|
+
//#region package.json
|
|
1773
|
+
var name = "github-router";
|
|
1774
|
+
var version = "0.3.18";
|
|
1775
|
+
|
|
865
1776
|
//#endregion
|
|
866
1777
|
//#region src/lib/approval.ts
|
|
867
1778
|
const awaitApproval = async () => {
|
|
@@ -870,8 +1781,27 @@ const awaitApproval = async () => {
|
|
|
870
1781
|
|
|
871
1782
|
//#endregion
|
|
872
1783
|
//#region src/lib/rate-limit.ts
|
|
1784
|
+
const RATE_LIMIT_QUEUE_TIMEOUT_MS = 5e3;
|
|
1785
|
+
let rateLimitChain = Promise.resolve();
|
|
873
1786
|
async function checkRateLimit(state$1) {
|
|
874
1787
|
if (state$1.rateLimitSeconds === void 0) return;
|
|
1788
|
+
const ticket = { aborted: false };
|
|
1789
|
+
const myTurn = rateLimitChain.then(() => doCheck(state$1, ticket));
|
|
1790
|
+
rateLimitChain = myTurn.catch(() => {});
|
|
1791
|
+
return Promise.race([myTurn, sleep(RATE_LIMIT_QUEUE_TIMEOUT_MS).then(() => {
|
|
1792
|
+
ticket.aborted = true;
|
|
1793
|
+
throw new HTTPError("Rate limit queue wait exceeded", Response.json({
|
|
1794
|
+
type: "error",
|
|
1795
|
+
error: {
|
|
1796
|
+
type: "rate_limit_error",
|
|
1797
|
+
message: `Rate limit queue exceeded ${RATE_LIMIT_QUEUE_TIMEOUT_MS}ms; try again`
|
|
1798
|
+
}
|
|
1799
|
+
}, { status: 429 }));
|
|
1800
|
+
})]);
|
|
1801
|
+
}
|
|
1802
|
+
async function doCheck(state$1, ticket) {
|
|
1803
|
+
if (state$1.rateLimitSeconds === void 0) return;
|
|
1804
|
+
if (ticket.aborted) return;
|
|
875
1805
|
const now = Date.now();
|
|
876
1806
|
if (!state$1.lastRequestTimestamp) {
|
|
877
1807
|
state$1.lastRequestTimestamp = now;
|
|
@@ -890,6 +1820,7 @@ async function checkRateLimit(state$1) {
|
|
|
890
1820
|
const waitTimeMs = waitTimeSeconds * 1e3;
|
|
891
1821
|
consola.warn(`Rate limit reached. Waiting ${waitTimeSeconds} seconds before proceeding...`);
|
|
892
1822
|
await sleep(waitTimeMs);
|
|
1823
|
+
if (ticket.aborted) return;
|
|
893
1824
|
state$1.lastRequestTimestamp = Date.now();
|
|
894
1825
|
consola.info("Rate limit wait completed, proceeding with request");
|
|
895
1826
|
}
|
|
@@ -952,6 +1883,169 @@ function detectCapabilityMismatch(info, model) {
|
|
|
952
1883
|
return err.includes("token") || err.includes("context") || err.includes("too long") || err.includes("max_tokens") || err.includes("prompt is too long");
|
|
953
1884
|
}
|
|
954
1885
|
|
|
1886
|
+
//#endregion
|
|
1887
|
+
//#region src/lib/stream-relay.ts
|
|
1888
|
+
const ENCODER$2 = new TextEncoder();
|
|
1889
|
+
/**
|
|
1890
|
+
* Detect the family of "controller has already closed" errors that Bun and
|
|
1891
|
+
* the WHATWG streams runtime throw when an enqueue/close call races with
|
|
1892
|
+
* the consumer cancelling its read. These are NOT upstream failures — they
|
|
1893
|
+
* mean the client has finished reading (or disconnected) and we should
|
|
1894
|
+
* exit pull() quietly without trying to write more bytes or log noise.
|
|
1895
|
+
*
|
|
1896
|
+
* Bun's wording: `TypeError: Invalid state: Controller is already closed`.
|
|
1897
|
+
* Other runtimes use `TypeError: The stream is closing` or
|
|
1898
|
+
* `TypeError: This ReadableStream is closed` or include "errored" / "cancelled".
|
|
1899
|
+
*/
|
|
1900
|
+
function isControllerClosedError(error) {
|
|
1901
|
+
if (!(error instanceof Error)) return false;
|
|
1902
|
+
const msg = error.message.toLowerCase();
|
|
1903
|
+
return msg.includes("controller is already closed") || msg.includes("controller is already errored") || msg.includes("readablestream is closed") || msg.includes("readablestream is already closed") || msg.includes("stream is closing") || msg.includes("stream is already closed") || msg.includes("stream is closed");
|
|
1904
|
+
}
|
|
1905
|
+
/**
|
|
1906
|
+
* Wrap an upstream SSE byte stream so that:
|
|
1907
|
+
* - Backpressure is respected (pull-based; only reads when downstream demands).
|
|
1908
|
+
* - Mid-stream errors (undici "terminated", AbortError, network resets) are
|
|
1909
|
+
* caught, logged with structured context, and converted to a final
|
|
1910
|
+
* Anthropic-shape `event: error` SSE event before the downstream is closed.
|
|
1911
|
+
* - Upstream inactivity (no chunk for `inactivityTimeoutMs`) is treated as a
|
|
1912
|
+
* soft failure that emits an error event rather than hanging forever.
|
|
1913
|
+
* - Consumer cancellation (client disconnects mid-read or finishes early)
|
|
1914
|
+
* is recognized and handled silently — NOT logged as an upstream error,
|
|
1915
|
+
* NOT followed by a futile event:error write that can corrupt the
|
|
1916
|
+
* terminal bytes the client has already buffered.
|
|
1917
|
+
*
|
|
1918
|
+
* Pre-byte upstream errors (failure on the very first read) are handled by
|
|
1919
|
+
* the same code path: an `event: error` SSE event is emitted on a 200
|
|
1920
|
+
* response, then the connection is closed. Even if the consumer's SDK
|
|
1921
|
+
* silently swallows `event: error`, the immediate close triggers the
|
|
1922
|
+
* client's socket-disconnect handler — the user always sees an error
|
|
1923
|
+
* string, never a hang.
|
|
1924
|
+
*/
|
|
1925
|
+
function relayAnthropicStream(body, opts) {
|
|
1926
|
+
const inactivityMs = opts.inactivityTimeoutMs ?? UPSTREAM_INACTIVITY_TIMEOUT_MS;
|
|
1927
|
+
const reader = body.getReader();
|
|
1928
|
+
let bytesRelayed = 0;
|
|
1929
|
+
let upstreamFinished = false;
|
|
1930
|
+
let consumerCancelled = false;
|
|
1931
|
+
const safeClose = (controller) => {
|
|
1932
|
+
try {
|
|
1933
|
+
controller.close();
|
|
1934
|
+
} catch {}
|
|
1935
|
+
};
|
|
1936
|
+
return new ReadableStream({
|
|
1937
|
+
async pull(controller) {
|
|
1938
|
+
if (consumerCancelled || upstreamFinished) {
|
|
1939
|
+
safeClose(controller);
|
|
1940
|
+
return;
|
|
1941
|
+
}
|
|
1942
|
+
try {
|
|
1943
|
+
const result = await readWithInactivityTimeout(reader, inactivityMs);
|
|
1944
|
+
if (consumerCancelled) {
|
|
1945
|
+
safeClose(controller);
|
|
1946
|
+
return;
|
|
1947
|
+
}
|
|
1948
|
+
if (result.done) {
|
|
1949
|
+
if (bytesRelayed === 0) consola.warn(`Upstream returned empty SSE stream at ${opts.routePath}`);
|
|
1950
|
+
upstreamFinished = true;
|
|
1951
|
+
safeClose(controller);
|
|
1952
|
+
return;
|
|
1953
|
+
}
|
|
1954
|
+
if (result.value) {
|
|
1955
|
+
bytesRelayed += result.value.byteLength;
|
|
1956
|
+
try {
|
|
1957
|
+
controller.enqueue(result.value);
|
|
1958
|
+
} catch (enqueueError) {
|
|
1959
|
+
if (isControllerClosedError(enqueueError)) {
|
|
1960
|
+
consumerCancelled = true;
|
|
1961
|
+
return;
|
|
1962
|
+
}
|
|
1963
|
+
throw enqueueError;
|
|
1964
|
+
}
|
|
1965
|
+
}
|
|
1966
|
+
} catch (error) {
|
|
1967
|
+
upstreamFinished = true;
|
|
1968
|
+
if (consumerCancelled) {
|
|
1969
|
+
reader.cancel(error).catch(() => {});
|
|
1970
|
+
safeClose(controller);
|
|
1971
|
+
return;
|
|
1972
|
+
}
|
|
1973
|
+
const errName = error instanceof Error ? error.name : "Error";
|
|
1974
|
+
const errMessage = error instanceof Error ? error.message : String(error);
|
|
1975
|
+
consola.error(`Upstream stream interrupted at ${opts.routePath}: bytes=${bytesRelayed} errType=${errName} message=${JSON.stringify(errMessage)}`);
|
|
1976
|
+
const event = buildAnthropicErrorEvent(errName, errMessage);
|
|
1977
|
+
try {
|
|
1978
|
+
controller.enqueue(ENCODER$2.encode(event));
|
|
1979
|
+
} catch (enqueueError) {
|
|
1980
|
+
if (!isControllerClosedError(enqueueError)) consola.warn(`Could not deliver error event to consumer at ${opts.routePath}: ${enqueueError instanceof Error ? enqueueError.message : String(enqueueError)}`);
|
|
1981
|
+
}
|
|
1982
|
+
reader.cancel(error).catch(() => {});
|
|
1983
|
+
safeClose(controller);
|
|
1984
|
+
}
|
|
1985
|
+
},
|
|
1986
|
+
cancel(reason) {
|
|
1987
|
+
consumerCancelled = true;
|
|
1988
|
+
upstreamFinished = true;
|
|
1989
|
+
reader.cancel(reason).catch(() => {});
|
|
1990
|
+
}
|
|
1991
|
+
});
|
|
1992
|
+
}
|
|
1993
|
+
async function readWithInactivityTimeout(reader, timeoutMs) {
|
|
1994
|
+
let timeoutHandle;
|
|
1995
|
+
const timeoutPromise = new Promise((_, reject) => {
|
|
1996
|
+
timeoutHandle = setTimeout(() => {
|
|
1997
|
+
reject(Object.assign(/* @__PURE__ */ new Error("upstream_inactive"), { name: "InactivityTimeout" }));
|
|
1998
|
+
}, timeoutMs);
|
|
1999
|
+
});
|
|
2000
|
+
timeoutPromise.catch(() => {});
|
|
2001
|
+
try {
|
|
2002
|
+
return await Promise.race([reader.read(), timeoutPromise]);
|
|
2003
|
+
} finally {
|
|
2004
|
+
if (timeoutHandle !== void 0) clearTimeout(timeoutHandle);
|
|
2005
|
+
}
|
|
2006
|
+
}
|
|
2007
|
+
/**
|
|
2008
|
+
* Build the SSE wire bytes for an Anthropic-format streaming error event.
|
|
2009
|
+
* Per Anthropic streaming spec, errors are sent as:
|
|
2010
|
+
* event: error
|
|
2011
|
+
* data: {"type":"error","error":{"type":"...","message":"..."}}
|
|
2012
|
+
*/
|
|
2013
|
+
function buildAnthropicErrorEvent(errName, errMessage) {
|
|
2014
|
+
const payload = {
|
|
2015
|
+
type: "error",
|
|
2016
|
+
error: {
|
|
2017
|
+
type: classifyStreamError(errName),
|
|
2018
|
+
message: `Upstream stream interrupted: ${errName}: ${errMessage}`
|
|
2019
|
+
}
|
|
2020
|
+
};
|
|
2021
|
+
return `event: error\ndata: ${JSON.stringify(payload)}\n\n`;
|
|
2022
|
+
}
|
|
2023
|
+
/**
|
|
2024
|
+
* Build the SSE wire bytes for an OpenAI-format streaming error event,
|
|
2025
|
+
* followed by the `data: [DONE]` terminator that OpenAI clients expect.
|
|
2026
|
+
*/
|
|
2027
|
+
function buildOpenAIErrorEvent(errName, errMessage) {
|
|
2028
|
+
const payload = { error: {
|
|
2029
|
+
type: classifyStreamError(errName),
|
|
2030
|
+
message: `Upstream stream interrupted: ${errName}: ${errMessage}`
|
|
2031
|
+
} };
|
|
2032
|
+
return `data: ${JSON.stringify(payload)}\n\ndata: [DONE]\n\n`;
|
|
2033
|
+
}
|
|
2034
|
+
function classifyStreamError(errName) {
|
|
2035
|
+
if (errName === "AbortError") return "timeout_error";
|
|
2036
|
+
if (errName === "InactivityTimeout") return "timeout_error";
|
|
2037
|
+
return "api_error";
|
|
2038
|
+
}
|
|
2039
|
+
function logStreamError(routePath, error) {
|
|
2040
|
+
const errName = error instanceof Error ? error.name : "Error";
|
|
2041
|
+
const errMessage = error instanceof Error ? error.message : String(error);
|
|
2042
|
+
consola.error(`Upstream stream interrupted at ${routePath}: errType=${errName} message=${JSON.stringify(errMessage)}`);
|
|
2043
|
+
return {
|
|
2044
|
+
errName,
|
|
2045
|
+
errMessage
|
|
2046
|
+
};
|
|
2047
|
+
}
|
|
2048
|
+
|
|
955
2049
|
//#endregion
|
|
956
2050
|
//#region src/lib/tokenizer.ts
|
|
957
2051
|
const ENCODING_MAP = {
|
|
@@ -1150,20 +2244,29 @@ const getTokenCount = async (payload, model) => {
|
|
|
1150
2244
|
|
|
1151
2245
|
//#endregion
|
|
1152
2246
|
//#region src/services/copilot/create-chat-completions.ts
|
|
1153
|
-
const createChatCompletions = async (payload, modelHeaders) => {
|
|
2247
|
+
const createChatCompletions = async (payload, modelHeaders, callerSignal) => {
|
|
1154
2248
|
if (!state.copilotToken) throw new Error("Copilot token not found");
|
|
1155
2249
|
const enableVision = payload.messages.some((x) => typeof x.content !== "string" && x.content?.some((x$1) => x$1.type === "image_url"));
|
|
1156
2250
|
const isAgentCall = payload.messages.some((msg) => ["assistant", "tool"].includes(msg.role));
|
|
1157
|
-
const
|
|
1158
|
-
|
|
1159
|
-
|
|
1160
|
-
|
|
2251
|
+
const url = `${copilotBaseUrl(state)}/chat/completions`;
|
|
2252
|
+
const doFetch = () => {
|
|
2253
|
+
const fetchInit = {
|
|
2254
|
+
method: "POST",
|
|
2255
|
+
headers: {
|
|
2256
|
+
...copilotHeaders(state, enableVision),
|
|
2257
|
+
...modelHeaders,
|
|
2258
|
+
"X-Initiator": isAgentCall ? "agent" : "user"
|
|
2259
|
+
},
|
|
2260
|
+
body: JSON.stringify(payload)
|
|
2261
|
+
};
|
|
2262
|
+
const signals = [];
|
|
2263
|
+
if (UPSTREAM_FETCH_TIMEOUT_MS > 0) signals.push(AbortSignal.timeout(UPSTREAM_FETCH_TIMEOUT_MS));
|
|
2264
|
+
if (callerSignal) signals.push(callerSignal);
|
|
2265
|
+
if (signals.length === 1) fetchInit.signal = signals[0];
|
|
2266
|
+
else if (signals.length > 1) fetchInit.signal = AbortSignal.any(signals);
|
|
2267
|
+
return fetch(url, fetchInit);
|
|
1161
2268
|
};
|
|
1162
|
-
const response = await
|
|
1163
|
-
method: "POST",
|
|
1164
|
-
headers,
|
|
1165
|
-
body: JSON.stringify(payload)
|
|
1166
|
-
});
|
|
2269
|
+
const response = await tryRefreshAndRetry(doFetch, "/chat/completions");
|
|
1167
2270
|
if (!response.ok) {
|
|
1168
2271
|
let errorBody = "";
|
|
1169
2272
|
try {
|
|
@@ -1185,73 +2288,185 @@ const createChatCompletions = async (payload, modelHeaders) => {
|
|
|
1185
2288
|
|
|
1186
2289
|
//#endregion
|
|
1187
2290
|
//#region src/services/copilot/web-search.ts
|
|
2291
|
+
const RpcSchema = z.object({
|
|
2292
|
+
jsonrpc: z.literal("2.0"),
|
|
2293
|
+
id: z.number().optional(),
|
|
2294
|
+
result: z.object({
|
|
2295
|
+
content: z.array(z.object({
|
|
2296
|
+
type: z.literal("text"),
|
|
2297
|
+
text: z.string()
|
|
2298
|
+
})).optional(),
|
|
2299
|
+
isError: z.boolean().optional()
|
|
2300
|
+
}).optional(),
|
|
2301
|
+
error: z.object({
|
|
2302
|
+
code: z.number(),
|
|
2303
|
+
message: z.string()
|
|
2304
|
+
}).optional()
|
|
2305
|
+
});
|
|
2306
|
+
const InnerSchema = z.object({
|
|
2307
|
+
text: z.object({
|
|
2308
|
+
value: z.string(),
|
|
2309
|
+
annotations: z.array(z.object({ url_citation: z.object({
|
|
2310
|
+
title: z.string(),
|
|
2311
|
+
url: z.string()
|
|
2312
|
+
}).optional() })).nullable().optional()
|
|
2313
|
+
}),
|
|
2314
|
+
bing_searches: z.array(z.unknown()).nullable().optional()
|
|
2315
|
+
});
|
|
1188
2316
|
const MAX_SEARCHES_PER_SECOND = 3;
|
|
1189
2317
|
let searchTimestamps = [];
|
|
2318
|
+
let throttleChain = Promise.resolve();
|
|
1190
2319
|
async function throttleSearch() {
|
|
1191
|
-
const
|
|
1192
|
-
|
|
1193
|
-
|
|
1194
|
-
|
|
1195
|
-
|
|
1196
|
-
|
|
1197
|
-
|
|
2320
|
+
const myTurn = throttleChain.then(async () => {
|
|
2321
|
+
const now = Date.now();
|
|
2322
|
+
searchTimestamps = searchTimestamps.filter((t) => now - t < 1e3);
|
|
2323
|
+
if (searchTimestamps.length >= MAX_SEARCHES_PER_SECOND) {
|
|
2324
|
+
const waitMs = 1e3 - (now - searchTimestamps[0]);
|
|
2325
|
+
if (waitMs > 0) {
|
|
2326
|
+
consola.debug(`Web search rate limited, waiting ${waitMs}ms`);
|
|
2327
|
+
await sleep(waitMs);
|
|
2328
|
+
}
|
|
1198
2329
|
}
|
|
1199
|
-
|
|
1200
|
-
|
|
2330
|
+
searchTimestamps.push(Date.now());
|
|
2331
|
+
});
|
|
2332
|
+
throttleChain = myTurn.catch(() => {});
|
|
2333
|
+
return myTurn;
|
|
1201
2334
|
}
|
|
1202
|
-
function
|
|
1203
|
-
|
|
2335
|
+
function mcpHeaders(sid) {
|
|
2336
|
+
if (!state.githubToken) throw new Error("GitHub token missing — re-run auth flow. Web search uses the GitHub PAT (not the Copilot token); the on-disk token at ~/.local/share/github-router/github_token must be present.");
|
|
2337
|
+
const headers = {
|
|
2338
|
+
Authorization: `Bearer ${state.githubToken}`,
|
|
2339
|
+
"content-type": "application/json",
|
|
2340
|
+
accept: "application/json, text/event-stream",
|
|
2341
|
+
"X-MCP-Host": "copilot-cli",
|
|
2342
|
+
"X-MCP-Toolsets": "web_search",
|
|
2343
|
+
"Mcp-Protocol-Version": "2025-06-18",
|
|
2344
|
+
"user-agent": `GitHubCopilotChat/${copilotVersion(state)}`
|
|
2345
|
+
};
|
|
2346
|
+
if (sid) headers["Mcp-Session-Id"] = sid;
|
|
2347
|
+
return headers;
|
|
1204
2348
|
}
|
|
1205
|
-
async function
|
|
1206
|
-
const
|
|
2349
|
+
async function postMcp(body, sid, retry = true) {
|
|
2350
|
+
const url = `${copilotBaseUrl(state)}/mcp`;
|
|
2351
|
+
const res = await fetch(url, {
|
|
1207
2352
|
method: "POST",
|
|
1208
|
-
headers:
|
|
1209
|
-
body: JSON.stringify(
|
|
2353
|
+
headers: mcpHeaders(sid),
|
|
2354
|
+
body: JSON.stringify(body)
|
|
1210
2355
|
});
|
|
1211
|
-
if (!
|
|
1212
|
-
|
|
1213
|
-
|
|
2356
|
+
if (!res.ok && retry && res.status >= 500) {
|
|
2357
|
+
await sleep(500);
|
|
2358
|
+
return postMcp(body, sid, false);
|
|
1214
2359
|
}
|
|
1215
|
-
return
|
|
1216
|
-
}
|
|
1217
|
-
async function sendThreadMessage(threadId, query) {
|
|
1218
|
-
const response = await fetch(`${copilotBaseUrl(state)}/github/chat/threads/${threadId}/messages`, {
|
|
1219
|
-
method: "POST",
|
|
1220
|
-
headers: threadsHeaders(),
|
|
1221
|
-
body: JSON.stringify({
|
|
1222
|
-
content: query,
|
|
1223
|
-
intent: "conversation",
|
|
1224
|
-
skills: ["web-search"],
|
|
1225
|
-
references: []
|
|
1226
|
-
})
|
|
1227
|
-
});
|
|
1228
|
-
if (!response.ok) {
|
|
1229
|
-
consola.error("Failed to send thread message", response.status);
|
|
1230
|
-
throw new Error(`Failed to send thread message: ${response.status}`);
|
|
1231
|
-
}
|
|
1232
|
-
return await response.json();
|
|
2360
|
+
return res;
|
|
1233
2361
|
}
|
|
1234
2362
|
async function searchWeb(query) {
|
|
1235
|
-
if (!state.copilotToken) throw new Error("Copilot token not found");
|
|
1236
2363
|
await throttleSearch();
|
|
1237
|
-
consola.info(`Web search: "${query.slice(0, 80)}"`);
|
|
1238
|
-
const
|
|
1239
|
-
|
|
1240
|
-
|
|
1241
|
-
|
|
1242
|
-
|
|
1243
|
-
|
|
2364
|
+
consola.info(`Web search (MCP): "${query.slice(0, 80)}"`);
|
|
2365
|
+
const callId = Math.floor(Math.random() * 1e9);
|
|
2366
|
+
let sid;
|
|
2367
|
+
try {
|
|
2368
|
+
const initRes = await postMcp({
|
|
2369
|
+
jsonrpc: "2.0",
|
|
2370
|
+
id: 1,
|
|
2371
|
+
method: "initialize",
|
|
2372
|
+
params: {
|
|
2373
|
+
protocolVersion: "2024-11-05",
|
|
2374
|
+
capabilities: {},
|
|
2375
|
+
clientInfo: {
|
|
2376
|
+
name: "GitHubCopilotChat",
|
|
2377
|
+
version: copilotVersion(state)
|
|
2378
|
+
}
|
|
2379
|
+
}
|
|
1244
2380
|
});
|
|
2381
|
+
if (!initRes.ok) {
|
|
2382
|
+
consola.error("MCP initialize failed", initRes.status);
|
|
2383
|
+
throw new HTTPError("MCP initialize failed", initRes);
|
|
2384
|
+
}
|
|
2385
|
+
sid = initRes.headers.get("mcp-session-id") ?? void 0;
|
|
2386
|
+
if (!sid) throw new HTTPError("MCP initialize: missing Mcp-Session-Id header", initRes);
|
|
2387
|
+
const notifRes = await postMcp({
|
|
2388
|
+
jsonrpc: "2.0",
|
|
2389
|
+
method: "notifications/initialized"
|
|
2390
|
+
}, sid);
|
|
2391
|
+
if (!notifRes.ok && notifRes.status !== 202) {
|
|
2392
|
+
consola.error("MCP notifications/initialized failed", notifRes.status);
|
|
2393
|
+
throw new HTTPError("MCP notifications/initialized failed", notifRes);
|
|
2394
|
+
}
|
|
2395
|
+
const callRes = await postMcp({
|
|
2396
|
+
jsonrpc: "2.0",
|
|
2397
|
+
id: callId,
|
|
2398
|
+
method: "tools/call",
|
|
2399
|
+
params: {
|
|
2400
|
+
name: "web_search",
|
|
2401
|
+
arguments: { query }
|
|
2402
|
+
}
|
|
2403
|
+
}, sid);
|
|
2404
|
+
if (!callRes.ok) {
|
|
2405
|
+
consola.error("MCP tools/call failed", callRes.status);
|
|
2406
|
+
throw new HTTPError("MCP tools/call failed", callRes);
|
|
2407
|
+
}
|
|
2408
|
+
let rpc;
|
|
2409
|
+
for await (const ev of events(callRes)) {
|
|
2410
|
+
if (!ev.data) continue;
|
|
2411
|
+
let parsedJson;
|
|
2412
|
+
try {
|
|
2413
|
+
parsedJson = JSON.parse(ev.data);
|
|
2414
|
+
} catch {
|
|
2415
|
+
continue;
|
|
2416
|
+
}
|
|
2417
|
+
const parsed = RpcSchema.safeParse(parsedJson);
|
|
2418
|
+
if (parsed.success && parsed.data.id === callId) {
|
|
2419
|
+
rpc = parsed.data;
|
|
2420
|
+
break;
|
|
2421
|
+
}
|
|
2422
|
+
}
|
|
2423
|
+
if (!rpc) throw new HTTPError("MCP tools/call: no matching response id in SSE stream", callRes);
|
|
2424
|
+
if (rpc.error) throw new HTTPError(`MCP error ${rpc.error.code}: ${rpc.error.message}`, callRes);
|
|
2425
|
+
if (rpc.result?.isError) throw new HTTPError("MCP web_search tool error", callRes);
|
|
2426
|
+
const text = rpc.result?.content?.[0]?.text;
|
|
2427
|
+
if (!text) throw new HTTPError("MCP web_search: empty content", callRes);
|
|
2428
|
+
let innerRaw;
|
|
2429
|
+
try {
|
|
2430
|
+
innerRaw = JSON.parse(text);
|
|
2431
|
+
} catch (err) {
|
|
2432
|
+
throw new HTTPError(`MCP web_search: inner content not JSON: ${err instanceof Error ? err.message : String(err)}`, callRes);
|
|
2433
|
+
}
|
|
2434
|
+
const innerParsed = InnerSchema.safeParse(innerRaw);
|
|
2435
|
+
if (!innerParsed.success) throw new HTTPError(`MCP web_search: inner content shape changed (${innerParsed.error.issues.map((i) => `${i.path.join(".")}: ${i.message}`).join("; ")})`, callRes);
|
|
2436
|
+
const inner = innerParsed.data;
|
|
2437
|
+
const references = [];
|
|
2438
|
+
for (const ann of inner.text.annotations ?? []) {
|
|
2439
|
+
const cite = ann.url_citation;
|
|
2440
|
+
if (cite && !cite.url.toLowerCase().includes("bing.com/search")) references.push({
|
|
2441
|
+
title: cite.title,
|
|
2442
|
+
url: cite.url
|
|
2443
|
+
});
|
|
2444
|
+
}
|
|
2445
|
+
consola.debug(`Web search returned ${references.length} references`);
|
|
2446
|
+
return {
|
|
2447
|
+
content: inner.text.value,
|
|
2448
|
+
references
|
|
2449
|
+
};
|
|
2450
|
+
} finally {
|
|
2451
|
+
if (sid) try {
|
|
2452
|
+
fetch(`${copilotBaseUrl(state)}/mcp`, {
|
|
2453
|
+
method: "DELETE",
|
|
2454
|
+
headers: mcpHeaders(sid)
|
|
2455
|
+
}).catch(() => {});
|
|
2456
|
+
} catch {}
|
|
1245
2457
|
}
|
|
1246
|
-
consola.debug(`Web search returned ${references.length} references`);
|
|
1247
|
-
return {
|
|
1248
|
-
content: response.message.content,
|
|
1249
|
-
references
|
|
1250
|
-
};
|
|
1251
2458
|
}
|
|
1252
2459
|
|
|
1253
2460
|
//#endregion
|
|
1254
2461
|
//#region src/routes/chat-completions/handler.ts
|
|
2462
|
+
const ENCODER$1 = new TextEncoder();
|
|
2463
|
+
function formatSSE$1(chunk) {
|
|
2464
|
+
const parts = [];
|
|
2465
|
+
if (chunk.event) parts.push(`event: ${chunk.event}`);
|
|
2466
|
+
if (chunk.data !== void 0) for (const line of String(chunk.data).split(/\r\n|\r|\n/)) parts.push(`data: ${line}`);
|
|
2467
|
+
if (chunk.id !== void 0) parts.push(`id: ${String(chunk.id)}`);
|
|
2468
|
+
return parts.join("\n") + "\n\n";
|
|
2469
|
+
}
|
|
1255
2470
|
async function handleCompletion$1(c) {
|
|
1256
2471
|
const startTime = Date.now();
|
|
1257
2472
|
await checkRateLimit(state);
|
|
@@ -1306,89 +2521,608 @@ async function handleCompletion$1(c) {
|
|
|
1306
2521
|
if (debugEnabled) consola.debug("Non-streaming response:", JSON.stringify(response));
|
|
1307
2522
|
return c.json(response);
|
|
1308
2523
|
}
|
|
1309
|
-
|
|
1310
|
-
|
|
1311
|
-
|
|
1312
|
-
|
|
2524
|
+
const iterator = response[Symbol.asyncIterator]();
|
|
2525
|
+
const firstResult = await iterator.next();
|
|
2526
|
+
if (firstResult.done) consola.warn(`Upstream /chat/completions returned an empty stream at ${c.req.path}`);
|
|
2527
|
+
let pendingFirstChunk = firstResult.done ? void 0 : firstResult.value;
|
|
2528
|
+
let upstreamFinished = firstResult.done;
|
|
2529
|
+
let consumerCancelled = false;
|
|
2530
|
+
const safeClose = (controller) => {
|
|
2531
|
+
try {
|
|
2532
|
+
controller.close();
|
|
2533
|
+
} catch {}
|
|
2534
|
+
};
|
|
2535
|
+
const releaseUpstream = (reason) => {
|
|
2536
|
+
if (typeof iterator.return === "function") iterator.return(reason).catch(() => {});
|
|
2537
|
+
};
|
|
2538
|
+
const safeEnqueue = (controller, bytes) => {
|
|
2539
|
+
try {
|
|
2540
|
+
controller.enqueue(bytes);
|
|
2541
|
+
return true;
|
|
2542
|
+
} catch (e) {
|
|
2543
|
+
if (isControllerClosedError(e)) {
|
|
2544
|
+
consumerCancelled = true;
|
|
2545
|
+
releaseUpstream(e);
|
|
2546
|
+
return false;
|
|
2547
|
+
}
|
|
2548
|
+
throw e;
|
|
2549
|
+
}
|
|
2550
|
+
};
|
|
2551
|
+
return new Response(new ReadableStream({
|
|
2552
|
+
async pull(controller) {
|
|
2553
|
+
if (consumerCancelled || upstreamFinished) {
|
|
2554
|
+
safeClose(controller);
|
|
2555
|
+
return;
|
|
2556
|
+
}
|
|
2557
|
+
if (pendingFirstChunk !== void 0) {
|
|
2558
|
+
const chunk = pendingFirstChunk;
|
|
2559
|
+
pendingFirstChunk = void 0;
|
|
2560
|
+
if (debugEnabled) consola.debug("Streaming chunk:", JSON.stringify(chunk));
|
|
2561
|
+
safeEnqueue(controller, ENCODER$1.encode(formatSSE$1(chunk)));
|
|
2562
|
+
return;
|
|
2563
|
+
}
|
|
2564
|
+
try {
|
|
2565
|
+
const result = await iterator.next();
|
|
2566
|
+
if (consumerCancelled) {
|
|
2567
|
+
safeClose(controller);
|
|
2568
|
+
return;
|
|
2569
|
+
}
|
|
2570
|
+
if (result.done) {
|
|
2571
|
+
upstreamFinished = true;
|
|
2572
|
+
safeClose(controller);
|
|
2573
|
+
return;
|
|
2574
|
+
}
|
|
2575
|
+
if (result.value === void 0 || result.value === null) return;
|
|
2576
|
+
if (debugEnabled) consola.debug("Streaming chunk:", JSON.stringify(result.value));
|
|
2577
|
+
safeEnqueue(controller, ENCODER$1.encode(formatSSE$1(result.value)));
|
|
2578
|
+
} catch (error) {
|
|
2579
|
+
upstreamFinished = true;
|
|
2580
|
+
if (consumerCancelled) {
|
|
2581
|
+
releaseUpstream(error);
|
|
2582
|
+
safeClose(controller);
|
|
2583
|
+
return;
|
|
2584
|
+
}
|
|
2585
|
+
const { errName, errMessage } = logStreamError(c.req.path, error);
|
|
2586
|
+
safeEnqueue(controller, ENCODER$1.encode(buildOpenAIErrorEvent(errName, errMessage)));
|
|
2587
|
+
releaseUpstream(error);
|
|
2588
|
+
safeClose(controller);
|
|
2589
|
+
}
|
|
2590
|
+
},
|
|
2591
|
+
cancel() {
|
|
2592
|
+
consumerCancelled = true;
|
|
2593
|
+
upstreamFinished = true;
|
|
2594
|
+
releaseUpstream();
|
|
2595
|
+
}
|
|
2596
|
+
}), {
|
|
2597
|
+
status: 200,
|
|
2598
|
+
headers: {
|
|
2599
|
+
"content-type": "text/event-stream",
|
|
2600
|
+
"cache-control": "no-cache",
|
|
2601
|
+
"transfer-encoding": "chunked",
|
|
2602
|
+
connection: "keep-alive"
|
|
2603
|
+
}
|
|
2604
|
+
});
|
|
2605
|
+
}
|
|
2606
|
+
const isNonStreaming$1 = (response) => Object.hasOwn(response, "choices");
|
|
2607
|
+
async function injectWebSearchIfNeeded$1(payload) {
|
|
2608
|
+
if (!payload.tools?.some((t) => "type" in t && t.type === "web_search" || t.function?.name === "web_search")) return;
|
|
2609
|
+
const query = payload.messages.some((msg) => msg.role === "tool") ? void 0 : extractUserQuery$2(payload.messages);
|
|
2610
|
+
if (query) try {
|
|
2611
|
+
const results = await searchWeb(query);
|
|
2612
|
+
const searchContext = [
|
|
2613
|
+
"[Web Search Results]",
|
|
2614
|
+
results.content,
|
|
2615
|
+
"",
|
|
2616
|
+
results.references.map((r) => `- [${r.title}](${r.url})`).join("\n"),
|
|
2617
|
+
"[End Web Search Results]"
|
|
2618
|
+
].join("\n");
|
|
2619
|
+
const systemMsg = payload.messages.find((msg) => msg.role === "system");
|
|
2620
|
+
if (systemMsg) systemMsg.content = `${searchContext}\n\n${typeof systemMsg.content === "string" ? systemMsg.content : Array.isArray(systemMsg.content) ? systemMsg.content.filter((p) => p.type === "text").map((p) => "text" in p ? p.text : "").join("\n") : ""}`;
|
|
2621
|
+
else payload.messages.unshift({
|
|
2622
|
+
role: "system",
|
|
2623
|
+
content: searchContext
|
|
2624
|
+
});
|
|
2625
|
+
} catch (error) {
|
|
2626
|
+
consola.warn("Web search failed, continuing without results:", error);
|
|
2627
|
+
}
|
|
2628
|
+
payload.tools = payload.tools?.filter((t) => !("type" in t && t.type === "web_search" || t.function?.name === "web_search"));
|
|
2629
|
+
if (payload.tools?.length === 0) payload.tools = void 0;
|
|
2630
|
+
if (!payload.tools) payload.tool_choice = void 0;
|
|
2631
|
+
else if (payload.tool_choice && typeof payload.tool_choice === "object" && "type" in payload.tool_choice && payload.tool_choice.type === "function") {
|
|
2632
|
+
const toolChoiceName = payload.tool_choice.function?.name;
|
|
2633
|
+
if (toolChoiceName && !payload.tools.some((tool) => tool.function.name === toolChoiceName)) payload.tool_choice = void 0;
|
|
2634
|
+
}
|
|
2635
|
+
}
|
|
2636
|
+
function extractUserQuery$2(messages) {
|
|
2637
|
+
for (let i = messages.length - 1; i >= 0; i--) {
|
|
2638
|
+
const msg = messages[i];
|
|
2639
|
+
if (msg.role === "user") {
|
|
2640
|
+
if (typeof msg.content === "string") return msg.content;
|
|
2641
|
+
if (Array.isArray(msg.content)) {
|
|
2642
|
+
const text = msg.content.find((p) => p.type === "text");
|
|
2643
|
+
if (text && "text" in text) return text.text;
|
|
2644
|
+
}
|
|
2645
|
+
}
|
|
2646
|
+
}
|
|
2647
|
+
}
|
|
2648
|
+
|
|
2649
|
+
//#endregion
|
|
2650
|
+
//#region src/routes/chat-completions/route.ts
|
|
2651
|
+
const completionRoutes = new Hono();
|
|
2652
|
+
completionRoutes.post("/", async (c) => {
|
|
2653
|
+
try {
|
|
2654
|
+
return await handleCompletion$1(c);
|
|
2655
|
+
} catch (error) {
|
|
2656
|
+
return await forwardError(c, error);
|
|
2657
|
+
}
|
|
2658
|
+
});
|
|
2659
|
+
|
|
2660
|
+
//#endregion
|
|
2661
|
+
//#region src/services/copilot/create-embeddings.ts
|
|
2662
|
+
const createEmbeddings = async (payload) => {
|
|
2663
|
+
if (!state.copilotToken) throw new Error("Copilot token not found");
|
|
2664
|
+
const response = await fetch(`${copilotBaseUrl(state)}/embeddings`, {
|
|
2665
|
+
method: "POST",
|
|
2666
|
+
headers: copilotHeaders(state),
|
|
2667
|
+
body: JSON.stringify(payload)
|
|
2668
|
+
});
|
|
2669
|
+
if (!response.ok) throw new HTTPError("Failed to create embeddings", response);
|
|
2670
|
+
return await response.json();
|
|
2671
|
+
};
|
|
2672
|
+
|
|
2673
|
+
//#endregion
|
|
2674
|
+
//#region src/routes/embeddings/route.ts
|
|
2675
|
+
const embeddingRoutes = new Hono();
|
|
2676
|
+
embeddingRoutes.post("/", async (c) => {
|
|
2677
|
+
try {
|
|
2678
|
+
const response = await createEmbeddings(await c.req.json());
|
|
2679
|
+
return c.json(response);
|
|
2680
|
+
} catch (error) {
|
|
2681
|
+
return await forwardError(c, error);
|
|
2682
|
+
}
|
|
2683
|
+
});
|
|
2684
|
+
|
|
2685
|
+
//#endregion
|
|
2686
|
+
//#region src/services/copilot/create-responses.ts
|
|
2687
|
+
const createResponses = async (payload, modelHeaders, callerSignal) => {
|
|
2688
|
+
if (!state.copilotToken) throw new Error("Copilot token not found");
|
|
2689
|
+
const enableVision = detectVision(payload.input);
|
|
2690
|
+
const isAgentCall = detectAgentCall(payload.input);
|
|
2691
|
+
const url = `${copilotBaseUrl(state)}/responses`;
|
|
2692
|
+
const doFetch = () => {
|
|
2693
|
+
const fetchInit = {
|
|
2694
|
+
method: "POST",
|
|
2695
|
+
headers: {
|
|
2696
|
+
...copilotHeaders(state, enableVision),
|
|
2697
|
+
...modelHeaders,
|
|
2698
|
+
"X-Initiator": isAgentCall ? "agent" : "user"
|
|
2699
|
+
},
|
|
2700
|
+
body: JSON.stringify(payload)
|
|
2701
|
+
};
|
|
2702
|
+
const signals = [];
|
|
2703
|
+
if (UPSTREAM_FETCH_TIMEOUT_MS > 0) signals.push(AbortSignal.timeout(UPSTREAM_FETCH_TIMEOUT_MS));
|
|
2704
|
+
if (callerSignal) signals.push(callerSignal);
|
|
2705
|
+
if (signals.length === 1) fetchInit.signal = signals[0];
|
|
2706
|
+
else if (signals.length > 1) fetchInit.signal = AbortSignal.any(signals);
|
|
2707
|
+
return fetch(url, fetchInit);
|
|
2708
|
+
};
|
|
2709
|
+
const response = await tryRefreshAndRetry(doFetch, "/responses");
|
|
2710
|
+
if (!response.ok) {
|
|
2711
|
+
consola.error("Failed to create responses", response);
|
|
2712
|
+
throw new HTTPError("Failed to create responses", response);
|
|
2713
|
+
}
|
|
2714
|
+
if (payload.stream) return events(response);
|
|
2715
|
+
return await response.json();
|
|
2716
|
+
};
|
|
2717
|
+
function detectVision(input) {
|
|
2718
|
+
if (typeof input === "string") return false;
|
|
2719
|
+
if (!Array.isArray(input)) return false;
|
|
2720
|
+
return input.some((item) => {
|
|
2721
|
+
if ("content" in item && Array.isArray(item.content)) return item.content.some((part) => part.type === "input_image");
|
|
2722
|
+
return false;
|
|
2723
|
+
});
|
|
2724
|
+
}
|
|
2725
|
+
function detectAgentCall(input) {
|
|
2726
|
+
if (typeof input === "string") return false;
|
|
2727
|
+
if (!Array.isArray(input)) return false;
|
|
2728
|
+
return input.some((item) => {
|
|
2729
|
+
if ("role" in item && item.role === "assistant") return true;
|
|
2730
|
+
if ("type" in item && (item.type === "function_call" || item.type === "function_call_output")) return true;
|
|
2731
|
+
return false;
|
|
2732
|
+
});
|
|
2733
|
+
}
|
|
2734
|
+
|
|
2735
|
+
//#endregion
|
|
2736
|
+
//#region src/routes/mcp/handler.ts
|
|
2737
|
+
const MCP_PROTOCOL_VERSION = "2025-06-18";
|
|
2738
|
+
const SERVER_NAME = "github-router-peers";
|
|
2739
|
+
const SERVER_VERSION = "1";
|
|
2740
|
+
/**
|
|
2741
|
+
* Reasoning effort levels accepted by Copilot's /v1/responses (gpt-5.x) and
|
|
2742
|
+
* /v1/chat/completions endpoints. Per the proxy's existing thinking-mode
|
|
2743
|
+
* translator (CLAUDE.md "Thinking-mode translation"), Copilot's adaptive-
|
|
2744
|
+
* thinking path uses these same buckets:
|
|
2745
|
+
* <2k tokens → low, <8k → medium, <24k → high, else → xhigh.
|
|
2746
|
+
*
|
|
2747
|
+
* Default `high` for peer reviews — adversarial-by-design but still cost-
|
|
2748
|
+
* conscious. Callers can pass `xhigh` explicitly for deep dives, or `medium`
|
|
2749
|
+
* for quick sanity checks.
|
|
2750
|
+
*/
|
|
2751
|
+
const EFFORT_LEVELS = [
|
|
2752
|
+
"low",
|
|
2753
|
+
"medium",
|
|
2754
|
+
"high",
|
|
2755
|
+
"xhigh"
|
|
2756
|
+
];
|
|
2757
|
+
const DEFAULT_EFFORT = "high";
|
|
2758
|
+
function isEffort(v) {
|
|
2759
|
+
return typeof v === "string" && EFFORT_LEVELS.includes(v);
|
|
2760
|
+
}
|
|
2761
|
+
/** Bounded concurrency. Originally capped at 2 (commit 4317a25) as a defensive
|
|
2762
|
+
* pre-launch guess against Opus's natural pattern of fanning out to all three
|
|
2763
|
+
* critics at once. Raised to 8 (Phase 2D of the peer-MCP plan) so the
|
|
2764
|
+
* decomposition pattern Phase 2B teaches Opus — "split a >20 KB artifact
|
|
2765
|
+
* into 2-4 batches and call in parallel" — can actually run in parallel
|
|
2766
|
+
* without the (3+)th call returning isError "queue full". The persona
|
|
2767
|
+
* handlers (`callPersona`) hold no shared mutable state — there's no race
|
|
2768
|
+
* the cap is hiding; the upstream Copilot's own rate-limit (surfaced as a
|
|
2769
|
+
* per-call 429 → tool isError) is the real backpressure mechanism. 8 covers
|
|
2770
|
+
* a 7-fork wave with one slot of headroom and is still a hard upper bound
|
|
2771
|
+
* against runaway clients. See docs/research/peer-mcp-investigation.md
|
|
2772
|
+
* § "Concurrency cap investigation" for the full justification. */
|
|
2773
|
+
const MAX_INFLIGHT_TOOLS_CALL = 8;
|
|
2774
|
+
let inFlightToolsCall = 0;
|
|
2775
|
+
const RPC_PARSE_ERROR = -32700;
|
|
2776
|
+
const RPC_INVALID_REQUEST = -32600;
|
|
2777
|
+
const RPC_METHOD_NOT_FOUND = -32601;
|
|
2778
|
+
const RPC_INVALID_PARAMS = -32602;
|
|
2779
|
+
const RPC_INTERNAL_ERROR = -32603;
|
|
2780
|
+
function rpcError(id, code, message, data) {
|
|
2781
|
+
return {
|
|
2782
|
+
jsonrpc: "2.0",
|
|
2783
|
+
id: id ?? null,
|
|
2784
|
+
error: data === void 0 ? {
|
|
2785
|
+
code,
|
|
2786
|
+
message
|
|
2787
|
+
} : {
|
|
2788
|
+
code,
|
|
2789
|
+
message,
|
|
2790
|
+
data
|
|
2791
|
+
}
|
|
2792
|
+
};
|
|
2793
|
+
}
|
|
2794
|
+
function rpcResult(id, result) {
|
|
2795
|
+
return {
|
|
2796
|
+
jsonrpc: "2.0",
|
|
2797
|
+
id: id ?? null,
|
|
2798
|
+
result
|
|
2799
|
+
};
|
|
2800
|
+
}
|
|
2801
|
+
function isLoopbackHost(host) {
|
|
2802
|
+
if (!host) return false;
|
|
2803
|
+
const idx = host.lastIndexOf(":");
|
|
2804
|
+
const hostname = idx >= 0 ? host.slice(0, idx) : host;
|
|
2805
|
+
return hostname === "127.0.0.1" || hostname === "localhost";
|
|
2806
|
+
}
|
|
2807
|
+
/**
|
|
2808
|
+
* Constant-time bearer compare. Random per-launch nonces aren't really
|
|
2809
|
+
* timing-attackable in practice, but this costs nothing.
|
|
2810
|
+
*/
|
|
2811
|
+
function nonceMatches(provided, expected) {
|
|
2812
|
+
if (provided.length !== expected.length) return false;
|
|
2813
|
+
const a = Buffer.from(provided);
|
|
2814
|
+
const b = Buffer.from(expected);
|
|
2815
|
+
try {
|
|
2816
|
+
return timingSafeEqual(a, b);
|
|
2817
|
+
} catch {
|
|
2818
|
+
return false;
|
|
2819
|
+
}
|
|
2820
|
+
}
|
|
2821
|
+
function checkAuth(c) {
|
|
2822
|
+
if (!isLoopbackHost(c.req.header("host"))) return {
|
|
2823
|
+
ok: false,
|
|
2824
|
+
status: 403,
|
|
2825
|
+
reason: "non-loopback Host header rejected"
|
|
2826
|
+
};
|
|
2827
|
+
const expected = state.peerMcpNonce;
|
|
2828
|
+
if (!expected) return {
|
|
2829
|
+
ok: false,
|
|
2830
|
+
status: 401,
|
|
2831
|
+
reason: "/mcp not enabled in this proxy session"
|
|
2832
|
+
};
|
|
2833
|
+
const auth$1 = c.req.header("authorization") ?? "";
|
|
2834
|
+
const m = /^Bearer\s+(.+)$/i.exec(auth$1);
|
|
2835
|
+
if (!m || !nonceMatches(m[1], expected)) return {
|
|
2836
|
+
ok: false,
|
|
2837
|
+
status: 401,
|
|
2838
|
+
reason: "missing or invalid Authorization bearer"
|
|
2839
|
+
};
|
|
2840
|
+
return { ok: true };
|
|
2841
|
+
}
|
|
2842
|
+
function geminiAvailable() {
|
|
2843
|
+
const models = state.models?.data;
|
|
2844
|
+
if (!models) return false;
|
|
2845
|
+
return models.some((m) => /^gemini-3\..*pro/i.test(m.id));
|
|
2846
|
+
}
|
|
2847
|
+
function activePersonas() {
|
|
2848
|
+
return PERSONAS_READ.filter((p) => !p.requiresHttp || geminiAvailable());
|
|
2849
|
+
}
|
|
2850
|
+
function toolEntries() {
|
|
2851
|
+
return activePersonas().map((p) => ({
|
|
2852
|
+
name: p.toolNameHttp,
|
|
2853
|
+
description: p.description,
|
|
2854
|
+
inputSchema: {
|
|
2855
|
+
type: "object",
|
|
2856
|
+
required: ["prompt"],
|
|
2857
|
+
additionalProperties: false,
|
|
2858
|
+
properties: {
|
|
2859
|
+
prompt: {
|
|
2860
|
+
type: "string",
|
|
2861
|
+
description: "The lead's brief — the artifact under review plus constraints."
|
|
2862
|
+
},
|
|
2863
|
+
context: {
|
|
2864
|
+
type: "string",
|
|
2865
|
+
description: "Optional additional context (extra file content, prior decisions). Concatenated to the brief before sending."
|
|
2866
|
+
},
|
|
2867
|
+
effort: {
|
|
2868
|
+
type: "string",
|
|
2869
|
+
enum: [...EFFORT_LEVELS],
|
|
2870
|
+
description: `Reasoning depth (low | medium | high | xhigh). Default "${DEFAULT_EFFORT}". Use 'xhigh' for explicit deep dives where you want maximum reasoning. Use 'medium' for quick sanity checks. Note: for non-OpenAI models routed via /v1/chat/completions (gemini-3.x), the upstream may silently ignore this knob.`
|
|
2871
|
+
}
|
|
2872
|
+
}
|
|
1313
2873
|
}
|
|
1314
|
-
});
|
|
2874
|
+
}));
|
|
1315
2875
|
}
|
|
1316
|
-
|
|
1317
|
-
|
|
1318
|
-
|
|
1319
|
-
|
|
1320
|
-
|
|
1321
|
-
|
|
1322
|
-
|
|
1323
|
-
|
|
1324
|
-
|
|
1325
|
-
|
|
1326
|
-
|
|
1327
|
-
|
|
1328
|
-
|
|
1329
|
-
|
|
1330
|
-
|
|
1331
|
-
|
|
2876
|
+
function buildUserText(prompt, context) {
|
|
2877
|
+
if (!context) return prompt;
|
|
2878
|
+
return `${prompt}\n\n---\n\nAdditional context:\n${context}`;
|
|
2879
|
+
}
|
|
2880
|
+
function extractResponsesText(response) {
|
|
2881
|
+
const out = [];
|
|
2882
|
+
for (const item of response.output) {
|
|
2883
|
+
if (typeof item !== "object" || item === null) continue;
|
|
2884
|
+
const obj = item;
|
|
2885
|
+
if (obj.type !== "message" || obj.role !== "assistant") continue;
|
|
2886
|
+
const content = obj.content;
|
|
2887
|
+
if (!Array.isArray(content)) continue;
|
|
2888
|
+
for (const part of content) {
|
|
2889
|
+
if (typeof part !== "object" || part === null) continue;
|
|
2890
|
+
const p = part;
|
|
2891
|
+
if ((p.type === "output_text" || p.type === "text") && typeof p.text === "string") out.push(p.text);
|
|
2892
|
+
}
|
|
2893
|
+
}
|
|
2894
|
+
return out.join("");
|
|
2895
|
+
}
|
|
2896
|
+
function extractChatCompletionText(response) {
|
|
2897
|
+
const choice = response.choices?.[0];
|
|
2898
|
+
if (!choice) return "";
|
|
2899
|
+
const c = choice.message?.content;
|
|
2900
|
+
return typeof c === "string" ? c : "";
|
|
2901
|
+
}
|
|
2902
|
+
function toolError(message) {
|
|
2903
|
+
return {
|
|
2904
|
+
content: [{
|
|
2905
|
+
type: "text",
|
|
2906
|
+
text: message
|
|
2907
|
+
}],
|
|
2908
|
+
isError: true
|
|
2909
|
+
};
|
|
2910
|
+
}
|
|
2911
|
+
async function callPersona(persona, prompt, context, effort) {
|
|
2912
|
+
const resolvedModel = resolveModel(persona.model);
|
|
2913
|
+
const userText = buildUserText(prompt, context);
|
|
2914
|
+
if (persona.endpoint === "/v1/responses") {
|
|
2915
|
+
const text$1 = extractResponsesText(await createResponses({
|
|
2916
|
+
model: resolvedModel,
|
|
2917
|
+
instructions: persona.baseInstructions,
|
|
2918
|
+
input: [{
|
|
2919
|
+
role: "user",
|
|
2920
|
+
content: [{
|
|
2921
|
+
type: "input_text",
|
|
2922
|
+
text: userText
|
|
2923
|
+
}]
|
|
2924
|
+
}],
|
|
2925
|
+
stream: false,
|
|
2926
|
+
reasoning: { effort }
|
|
2927
|
+
}));
|
|
2928
|
+
if (!text$1) return toolError(`persona ${persona.agentName}: empty assistant output`);
|
|
2929
|
+
return { content: [{
|
|
2930
|
+
type: "text",
|
|
2931
|
+
text: text$1
|
|
2932
|
+
}] };
|
|
2933
|
+
}
|
|
2934
|
+
const text = extractChatCompletionText(await createChatCompletions({
|
|
2935
|
+
model: resolvedModel,
|
|
2936
|
+
messages: [{
|
|
1332
2937
|
role: "system",
|
|
1333
|
-
content:
|
|
2938
|
+
content: persona.baseInstructions
|
|
2939
|
+
}, {
|
|
2940
|
+
role: "user",
|
|
2941
|
+
content: userText
|
|
2942
|
+
}],
|
|
2943
|
+
stream: false,
|
|
2944
|
+
reasoning_effort: effort
|
|
2945
|
+
}));
|
|
2946
|
+
if (!text) return toolError(`persona ${persona.agentName}: empty assistant output`);
|
|
2947
|
+
return { content: [{
|
|
2948
|
+
type: "text",
|
|
2949
|
+
text
|
|
2950
|
+
}] };
|
|
2951
|
+
}
|
|
2952
|
+
function logTelemetry(t) {
|
|
2953
|
+
const parts = [
|
|
2954
|
+
`[peer-mcp]`,
|
|
2955
|
+
`name=${t.name}`,
|
|
2956
|
+
`model=${t.model}`,
|
|
2957
|
+
`duration_ms=${t.durationMs}`,
|
|
2958
|
+
`result=${t.result}`
|
|
2959
|
+
];
|
|
2960
|
+
if (t.errorMessage) parts.push(`error=${JSON.stringify(t.errorMessage)}`);
|
|
2961
|
+
process.stderr.write(parts.join(" ") + "\n");
|
|
2962
|
+
}
|
|
2963
|
+
async function handleToolsCall(body) {
|
|
2964
|
+
const params = body.params ?? {};
|
|
2965
|
+
const name$1 = typeof params.name === "string" ? params.name : "";
|
|
2966
|
+
const args = params.arguments ?? {};
|
|
2967
|
+
const prompt = typeof args.prompt === "string" ? args.prompt : "";
|
|
2968
|
+
const context = typeof args.context === "string" ? args.context : void 0;
|
|
2969
|
+
let effort = DEFAULT_EFFORT;
|
|
2970
|
+
if (args.effort !== void 0) {
|
|
2971
|
+
if (!isEffort(args.effort)) return rpcError(body.id, RPC_INVALID_PARAMS, `tools/call: arguments.effort must be one of ${EFFORT_LEVELS.join("|")}; got ${JSON.stringify(args.effort)}`);
|
|
2972
|
+
effort = args.effort;
|
|
2973
|
+
}
|
|
2974
|
+
if (!name$1) return rpcError(body.id, RPC_INVALID_PARAMS, "tools/call missing name");
|
|
2975
|
+
const persona = activePersonas().find((p) => p.toolNameHttp === name$1);
|
|
2976
|
+
if (!persona) return rpcError(body.id, RPC_METHOD_NOT_FOUND, `tools/call: unknown tool "${name$1}"`);
|
|
2977
|
+
if (!prompt) return rpcError(body.id, RPC_INVALID_PARAMS, `tools/call: arguments.prompt is required`);
|
|
2978
|
+
if (inFlightToolsCall >= MAX_INFLIGHT_TOOLS_CALL) return rpcResult(body.id, {
|
|
2979
|
+
content: [{
|
|
2980
|
+
type: "text",
|
|
2981
|
+
text: `Peer MCP queue full (${MAX_INFLIGHT_TOOLS_CALL} in-flight). Retry shortly, or wait for the current persona calls to complete.`
|
|
2982
|
+
}],
|
|
2983
|
+
isError: true
|
|
2984
|
+
});
|
|
2985
|
+
inFlightToolsCall++;
|
|
2986
|
+
const startedAt = Date.now();
|
|
2987
|
+
try {
|
|
2988
|
+
const result = await callPersona(persona, prompt, context, effort);
|
|
2989
|
+
logTelemetry({
|
|
2990
|
+
name: persona.agentName,
|
|
2991
|
+
model: persona.model,
|
|
2992
|
+
durationMs: Date.now() - startedAt,
|
|
2993
|
+
result: result.isError ? "isError" : "ok"
|
|
1334
2994
|
});
|
|
1335
|
-
|
|
1336
|
-
|
|
2995
|
+
return rpcResult(body.id, result);
|
|
2996
|
+
} catch (err) {
|
|
2997
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
2998
|
+
logTelemetry({
|
|
2999
|
+
name: persona.agentName,
|
|
3000
|
+
model: persona.model,
|
|
3001
|
+
durationMs: Date.now() - startedAt,
|
|
3002
|
+
result: "exception",
|
|
3003
|
+
errorMessage: message
|
|
3004
|
+
});
|
|
3005
|
+
return rpcResult(body.id, {
|
|
3006
|
+
content: [{
|
|
3007
|
+
type: "text",
|
|
3008
|
+
text: `persona ${persona.agentName} failed: ${message}`
|
|
3009
|
+
}],
|
|
3010
|
+
isError: true
|
|
3011
|
+
});
|
|
3012
|
+
} finally {
|
|
3013
|
+
inFlightToolsCall--;
|
|
1337
3014
|
}
|
|
1338
|
-
|
|
1339
|
-
|
|
1340
|
-
if (
|
|
1341
|
-
|
|
1342
|
-
|
|
1343
|
-
|
|
3015
|
+
}
|
|
3016
|
+
async function handleRpc(_c, body) {
|
|
3017
|
+
if (body === null || typeof body !== "object" || Array.isArray(body)) return {
|
|
3018
|
+
status: 200,
|
|
3019
|
+
body: rpcError(null, RPC_INVALID_REQUEST, "jsonrpc 2.0 envelope required")
|
|
3020
|
+
};
|
|
3021
|
+
if (body.jsonrpc !== "2.0" || typeof body.method !== "string") return {
|
|
3022
|
+
status: 200,
|
|
3023
|
+
body: rpcError(body.id ?? null, RPC_INVALID_REQUEST, "jsonrpc 2.0 envelope required")
|
|
3024
|
+
};
|
|
3025
|
+
const isNotification = body.id === void 0;
|
|
3026
|
+
switch (body.method) {
|
|
3027
|
+
case "initialize":
|
|
3028
|
+
if (isNotification) return {
|
|
3029
|
+
status: 202,
|
|
3030
|
+
body: null
|
|
3031
|
+
};
|
|
3032
|
+
return {
|
|
3033
|
+
status: 200,
|
|
3034
|
+
body: rpcResult(body.id, {
|
|
3035
|
+
protocolVersion: MCP_PROTOCOL_VERSION,
|
|
3036
|
+
capabilities: { tools: { listChanged: false } },
|
|
3037
|
+
serverInfo: {
|
|
3038
|
+
name: SERVER_NAME,
|
|
3039
|
+
version: SERVER_VERSION
|
|
3040
|
+
}
|
|
3041
|
+
})
|
|
3042
|
+
};
|
|
3043
|
+
case "notifications/initialized": return {
|
|
3044
|
+
status: 202,
|
|
3045
|
+
body: null
|
|
3046
|
+
};
|
|
3047
|
+
case "tools/list":
|
|
3048
|
+
if (isNotification) return {
|
|
3049
|
+
status: 202,
|
|
3050
|
+
body: null
|
|
3051
|
+
};
|
|
3052
|
+
return {
|
|
3053
|
+
status: 200,
|
|
3054
|
+
body: rpcResult(body.id, { tools: toolEntries() })
|
|
3055
|
+
};
|
|
3056
|
+
case "tools/call":
|
|
3057
|
+
if (isNotification) return {
|
|
3058
|
+
status: 202,
|
|
3059
|
+
body: null
|
|
3060
|
+
};
|
|
3061
|
+
return {
|
|
3062
|
+
status: 200,
|
|
3063
|
+
body: await handleToolsCall(body)
|
|
3064
|
+
};
|
|
3065
|
+
case "ping":
|
|
3066
|
+
if (isNotification) return {
|
|
3067
|
+
status: 202,
|
|
3068
|
+
body: null
|
|
3069
|
+
};
|
|
3070
|
+
return {
|
|
3071
|
+
status: 200,
|
|
3072
|
+
body: rpcResult(body.id, {})
|
|
3073
|
+
};
|
|
3074
|
+
default:
|
|
3075
|
+
if (isNotification) return {
|
|
3076
|
+
status: 202,
|
|
3077
|
+
body: null
|
|
3078
|
+
};
|
|
3079
|
+
return {
|
|
3080
|
+
status: 200,
|
|
3081
|
+
body: rpcError(body.id, RPC_METHOD_NOT_FOUND, `unknown method: ${body.method}`)
|
|
3082
|
+
};
|
|
1344
3083
|
}
|
|
1345
3084
|
}
|
|
1346
|
-
function
|
|
1347
|
-
|
|
1348
|
-
|
|
1349
|
-
|
|
1350
|
-
|
|
1351
|
-
|
|
1352
|
-
|
|
1353
|
-
|
|
1354
|
-
|
|
1355
|
-
|
|
3085
|
+
async function handleMcpPost(c) {
|
|
3086
|
+
const auth$1 = checkAuth(c);
|
|
3087
|
+
if (!auth$1.ok) return c.json(rpcError(null, RPC_INVALID_REQUEST, auth$1.reason), auth$1.status);
|
|
3088
|
+
let body;
|
|
3089
|
+
try {
|
|
3090
|
+
body = await c.req.json();
|
|
3091
|
+
} catch (err) {
|
|
3092
|
+
consola.debug("/mcp parse error:", err);
|
|
3093
|
+
return c.json(rpcError(null, RPC_PARSE_ERROR, "request body is not valid JSON"), 200);
|
|
3094
|
+
}
|
|
3095
|
+
try {
|
|
3096
|
+
const { status, body: respBody } = await handleRpc(c, body);
|
|
3097
|
+
if (respBody === null) return c.body(null, status);
|
|
3098
|
+
return c.json(respBody, status);
|
|
3099
|
+
} catch (err) {
|
|
3100
|
+
consola.error("/mcp handler error:", err);
|
|
3101
|
+
const echoId = typeof body === "object" && body !== null && !Array.isArray(body) ? body.id ?? null : null;
|
|
3102
|
+
return c.json(rpcError(echoId, RPC_INTERNAL_ERROR, err instanceof Error ? err.message : String(err)), 200);
|
|
1356
3103
|
}
|
|
1357
3104
|
}
|
|
3105
|
+
function handleMcpDelete(c) {
|
|
3106
|
+
const auth$1 = checkAuth(c);
|
|
3107
|
+
if (!auth$1.ok) return c.json(rpcError(null, RPC_INVALID_REQUEST, auth$1.reason), auth$1.status);
|
|
3108
|
+
return c.body(null, 200);
|
|
3109
|
+
}
|
|
1358
3110
|
|
|
1359
3111
|
//#endregion
|
|
1360
|
-
//#region src/routes/
|
|
1361
|
-
const
|
|
1362
|
-
|
|
3112
|
+
//#region src/routes/mcp/route.ts
|
|
3113
|
+
const mcpRoutes = new Hono();
|
|
3114
|
+
mcpRoutes.post("/", async (c) => {
|
|
1363
3115
|
try {
|
|
1364
|
-
return await
|
|
3116
|
+
return await handleMcpPost(c);
|
|
1365
3117
|
} catch (error) {
|
|
1366
3118
|
return await forwardError(c, error);
|
|
1367
3119
|
}
|
|
1368
3120
|
});
|
|
1369
|
-
|
|
1370
|
-
//#endregion
|
|
1371
|
-
//#region src/services/copilot/create-embeddings.ts
|
|
1372
|
-
const createEmbeddings = async (payload) => {
|
|
1373
|
-
if (!state.copilotToken) throw new Error("Copilot token not found");
|
|
1374
|
-
const response = await fetch(`${copilotBaseUrl(state)}/embeddings`, {
|
|
1375
|
-
method: "POST",
|
|
1376
|
-
headers: copilotHeaders(state),
|
|
1377
|
-
body: JSON.stringify(payload)
|
|
1378
|
-
});
|
|
1379
|
-
if (!response.ok) throw new HTTPError("Failed to create embeddings", response);
|
|
1380
|
-
return await response.json();
|
|
1381
|
-
};
|
|
1382
|
-
|
|
1383
|
-
//#endregion
|
|
1384
|
-
//#region src/routes/embeddings/route.ts
|
|
1385
|
-
const embeddingRoutes = new Hono();
|
|
1386
|
-
embeddingRoutes.post("/", async (c) => {
|
|
3121
|
+
mcpRoutes.delete("/", (c) => {
|
|
1387
3122
|
try {
|
|
1388
|
-
|
|
1389
|
-
|
|
1390
|
-
|
|
1391
|
-
return await forwardError(c, error);
|
|
3123
|
+
return handleMcpDelete(c);
|
|
3124
|
+
} catch {
|
|
3125
|
+
return c.body(null, 500);
|
|
1392
3126
|
}
|
|
1393
3127
|
});
|
|
1394
3128
|
|
|
@@ -1414,7 +3148,7 @@ embeddingRoutes.post("/", async (c) => {
|
|
|
1414
3148
|
* (anthropic-beta) so Copilot enables extended features.
|
|
1415
3149
|
*/
|
|
1416
3150
|
function buildHeaders(extraHeaders) {
|
|
1417
|
-
|
|
3151
|
+
return {
|
|
1418
3152
|
...copilotHeaders(state),
|
|
1419
3153
|
accept: "application/json",
|
|
1420
3154
|
"openai-intent": "messages-proxy",
|
|
@@ -1424,8 +3158,6 @@ function buildHeaders(extraHeaders) {
|
|
|
1424
3158
|
"X-Interaction-Id": randomUUID(),
|
|
1425
3159
|
...extraHeaders
|
|
1426
3160
|
};
|
|
1427
|
-
delete headers["copilot-integration-id"];
|
|
1428
|
-
return headers;
|
|
1429
3161
|
}
|
|
1430
3162
|
/**
|
|
1431
3163
|
* Forward an Anthropic Messages API request to Copilot's native /v1/messages endpoint.
|
|
@@ -1433,14 +3165,18 @@ function buildHeaders(extraHeaders) {
|
|
|
1433
3165
|
*/
|
|
1434
3166
|
async function createMessages(body, extraHeaders) {
|
|
1435
3167
|
if (!state.copilotToken) throw new Error("Copilot token not found");
|
|
1436
|
-
const headers = buildHeaders(extraHeaders);
|
|
1437
3168
|
const url = `${copilotBaseUrl(state)}/v1/messages?beta=true`;
|
|
1438
3169
|
consola.debug(`Forwarding to ${url}`);
|
|
1439
|
-
const
|
|
1440
|
-
|
|
1441
|
-
|
|
1442
|
-
|
|
1443
|
-
|
|
3170
|
+
const doFetch = () => {
|
|
3171
|
+
const fetchInit = {
|
|
3172
|
+
method: "POST",
|
|
3173
|
+
headers: buildHeaders(extraHeaders),
|
|
3174
|
+
body
|
|
3175
|
+
};
|
|
3176
|
+
if (UPSTREAM_FETCH_TIMEOUT_MS > 0) fetchInit.signal = AbortSignal.timeout(UPSTREAM_FETCH_TIMEOUT_MS);
|
|
3177
|
+
return fetch(url, fetchInit);
|
|
3178
|
+
};
|
|
3179
|
+
const response = await tryRefreshAndRetry(doFetch, "/v1/messages");
|
|
1444
3180
|
if (!response.ok) {
|
|
1445
3181
|
let errorBody = "";
|
|
1446
3182
|
try {
|
|
@@ -1463,14 +3199,18 @@ async function createMessages(body, extraHeaders) {
|
|
|
1463
3199
|
*/
|
|
1464
3200
|
async function countTokens(body, extraHeaders) {
|
|
1465
3201
|
if (!state.copilotToken) throw new Error("Copilot token not found");
|
|
1466
|
-
const headers = buildHeaders(extraHeaders);
|
|
1467
3202
|
const url = `${copilotBaseUrl(state)}/v1/messages/count_tokens?beta=true`;
|
|
1468
3203
|
consola.debug(`Forwarding to ${url}`);
|
|
1469
|
-
const
|
|
1470
|
-
|
|
1471
|
-
|
|
1472
|
-
|
|
1473
|
-
|
|
3204
|
+
const doFetch = () => {
|
|
3205
|
+
const fetchInit = {
|
|
3206
|
+
method: "POST",
|
|
3207
|
+
headers: buildHeaders(extraHeaders),
|
|
3208
|
+
body
|
|
3209
|
+
};
|
|
3210
|
+
if (UPSTREAM_FETCH_TIMEOUT_MS > 0) fetchInit.signal = AbortSignal.timeout(UPSTREAM_FETCH_TIMEOUT_MS);
|
|
3211
|
+
return fetch(url, fetchInit);
|
|
3212
|
+
};
|
|
3213
|
+
const response = await tryRefreshAndRetry(doFetch, "/v1/messages/count_tokens");
|
|
1474
3214
|
if (!response.ok) {
|
|
1475
3215
|
let errorBody = "";
|
|
1476
3216
|
try {
|
|
@@ -1488,6 +3228,22 @@ async function countTokens(body, extraHeaders) {
|
|
|
1488
3228
|
return response;
|
|
1489
3229
|
}
|
|
1490
3230
|
|
|
3231
|
+
//#endregion
|
|
3232
|
+
//#region src/lib/diagnose-response.ts
|
|
3233
|
+
const PREVIEW_LIMIT = 200;
|
|
3234
|
+
async function parseJsonOrDiagnose(response, routePath) {
|
|
3235
|
+
const cloned = response.clone();
|
|
3236
|
+
try {
|
|
3237
|
+
return await response.json();
|
|
3238
|
+
} catch (error) {
|
|
3239
|
+
const contentType = response.headers.get("content-type") ?? "(none)";
|
|
3240
|
+
const bodyText = await cloned.text().catch(() => "(unreadable)");
|
|
3241
|
+
const preview = bodyText.length > PREVIEW_LIMIT ? bodyText.slice(0, PREVIEW_LIMIT) + "...(truncated)" : bodyText;
|
|
3242
|
+
consola.error(`Upstream JSON parse failed at ${routePath}: status=${response.status} content-type="${contentType}" body[0..${PREVIEW_LIMIT}]=${JSON.stringify(preview)}`);
|
|
3243
|
+
throw error;
|
|
3244
|
+
}
|
|
3245
|
+
}
|
|
3246
|
+
|
|
1491
3247
|
//#endregion
|
|
1492
3248
|
//#region src/routes/messages/count-tokens-handler.ts
|
|
1493
3249
|
const isWebSearchTool$1 = (tool) => typeof tool.type === "string" && tool.type.startsWith("web_search") || tool.name === "web_search";
|
|
@@ -1535,7 +3291,7 @@ async function handleCountTokens(c) {
|
|
|
1535
3291
|
...selectedModel?.requestHeaders,
|
|
1536
3292
|
...extraHeaders
|
|
1537
3293
|
});
|
|
1538
|
-
const responseBody = await response.
|
|
3294
|
+
const responseBody = await parseJsonOrDiagnose(response, c.req.path);
|
|
1539
3295
|
logRequest({
|
|
1540
3296
|
method: "POST",
|
|
1541
3297
|
path: c.req.path,
|
|
@@ -1702,9 +3458,8 @@ async function handleCompletion(c) {
|
|
|
1702
3458
|
if (debugEnabled) consola.debug("Anthropic request body:", rawBody.slice(0, 2e3));
|
|
1703
3459
|
if (state.manualApprove) await awaitApproval();
|
|
1704
3460
|
const betaHeaders = extractBetaHeaders(c);
|
|
1705
|
-
const { body: resolvedBody, originalModel, resolvedModel } = resolveModelInBody(await processWebSearch(rawBody));
|
|
3461
|
+
const { body: resolvedBody, originalModel, resolvedModel, selectedModel } = resolveModelInBody(await processWebSearch(rawBody));
|
|
1706
3462
|
const modelId = resolvedModel ?? originalModel;
|
|
1707
|
-
const selectedModel = state.models?.data.find((m) => m.id === modelId);
|
|
1708
3463
|
if (modelId) logEndpointMismatch(modelId, "/v1/messages");
|
|
1709
3464
|
const effectiveBetas = applyDefaultBetas(betaHeaders, resolvedModel ?? originalModel);
|
|
1710
3465
|
let response;
|
|
@@ -1727,7 +3482,17 @@ async function handleCompletion(c) {
|
|
|
1727
3482
|
}
|
|
1728
3483
|
throw error;
|
|
1729
3484
|
}
|
|
1730
|
-
|
|
3485
|
+
const contentType = response.headers.get("content-type") ?? "";
|
|
3486
|
+
const clientAcceptsSSE = (c.req.header("accept") ?? "").includes("text/event-stream");
|
|
3487
|
+
let isStreaming = contentType.includes("text/event-stream");
|
|
3488
|
+
if (!isStreaming && clientAcceptsSSE) {
|
|
3489
|
+
if (contentType === "" || contentType === "application/octet-stream") {
|
|
3490
|
+
consola.warn(`Upstream /v1/messages returned status=${response.status} content-type=${JSON.stringify(contentType)} but client requested streaming; treating response body as SSE`);
|
|
3491
|
+
isStreaming = true;
|
|
3492
|
+
}
|
|
3493
|
+
}
|
|
3494
|
+
if (debugEnabled) consola.debug(`Upstream /v1/messages: status=${response.status} content-type="${contentType}" isStreaming=${isStreaming}`);
|
|
3495
|
+
if (isStreaming) {
|
|
1731
3496
|
logRequest({
|
|
1732
3497
|
method: "POST",
|
|
1733
3498
|
path: c.req.path,
|
|
@@ -1740,18 +3505,19 @@ async function handleCompletion(c) {
|
|
|
1740
3505
|
const streamHeaders = {
|
|
1741
3506
|
"content-type": "text/event-stream",
|
|
1742
3507
|
"cache-control": "no-cache",
|
|
3508
|
+
"transfer-encoding": "chunked",
|
|
1743
3509
|
connection: "keep-alive"
|
|
1744
3510
|
};
|
|
1745
3511
|
const requestId = response.headers.get("x-request-id");
|
|
1746
3512
|
if (requestId) streamHeaders["x-request-id"] = requestId;
|
|
1747
3513
|
const reqId = response.headers.get("request-id");
|
|
1748
3514
|
if (reqId) streamHeaders["request-id"] = reqId;
|
|
1749
|
-
return new Response(response.body, {
|
|
3515
|
+
return new Response(response.body ? relayAnthropicStream(response.body, { routePath: c.req.path }) : null, {
|
|
1750
3516
|
status: response.status,
|
|
1751
3517
|
headers: streamHeaders
|
|
1752
3518
|
});
|
|
1753
3519
|
}
|
|
1754
|
-
const responseBody = await response.
|
|
3520
|
+
const responseBody = await parseJsonOrDiagnose(response, c.req.path);
|
|
1755
3521
|
logRequest({
|
|
1756
3522
|
method: "POST",
|
|
1757
3523
|
path: c.req.path,
|
|
@@ -1770,8 +3536,9 @@ async function handleCompletion(c) {
|
|
|
1770
3536
|
}
|
|
1771
3537
|
/**
|
|
1772
3538
|
* Parse the JSON body, resolve the model name, sanitize cache_control
|
|
1773
|
-
* fields,
|
|
1774
|
-
* and resolved model
|
|
3539
|
+
* fields, translate thinking-mode shape for adaptive-thinking models,
|
|
3540
|
+
* and re-serialize. Returns the body string, original/resolved model
|
|
3541
|
+
* names, and the matching model metadata (if any).
|
|
1775
3542
|
*
|
|
1776
3543
|
* Re-serialization is skipped when no modifications are needed.
|
|
1777
3544
|
*/
|
|
@@ -1791,13 +3558,84 @@ function resolveModelInBody(rawBody) {
|
|
|
1791
3558
|
modified = true;
|
|
1792
3559
|
}
|
|
1793
3560
|
}
|
|
1794
|
-
if (rawBody.includes("\"scope\"") && sanitizeCacheControl(parsed)) modified = true;
|
|
1795
3561
|
const resolvedModel = typeof parsed.model === "string" ? parsed.model : originalModel;
|
|
3562
|
+
const selectedModel = resolvedModel ? state.models?.data.find((m) => m.id === resolvedModel) : void 0;
|
|
3563
|
+
if (translateThinking(parsed, selectedModel)) modified = true;
|
|
3564
|
+
if (rawBody.includes("\"scope\"") && sanitizeCacheControl(parsed)) modified = true;
|
|
1796
3565
|
return {
|
|
1797
3566
|
body: modified ? JSON.stringify(parsed) : rawBody,
|
|
1798
3567
|
originalModel,
|
|
1799
|
-
resolvedModel
|
|
3568
|
+
resolvedModel,
|
|
3569
|
+
selectedModel
|
|
3570
|
+
};
|
|
3571
|
+
}
|
|
3572
|
+
const EFFORT_ORDER = [
|
|
3573
|
+
"low",
|
|
3574
|
+
"medium",
|
|
3575
|
+
"high",
|
|
3576
|
+
"xhigh"
|
|
3577
|
+
];
|
|
3578
|
+
/**
|
|
3579
|
+
* Bucket a thinking budget into a Copilot reasoning-effort string.
|
|
3580
|
+
* `<2000`→low, `<8000`→medium, `<24000`→high, else→xhigh.
|
|
3581
|
+
* Defaults missing/non-numeric budgets to 8000 ("high").
|
|
3582
|
+
*/
|
|
3583
|
+
function bucketEffort(budget) {
|
|
3584
|
+
const n = typeof budget === "number" && Number.isFinite(budget) ? budget : 8e3;
|
|
3585
|
+
if (n < 2e3) return "low";
|
|
3586
|
+
if (n < 8e3) return "medium";
|
|
3587
|
+
if (n < 24e3) return "high";
|
|
3588
|
+
return "xhigh";
|
|
3589
|
+
}
|
|
3590
|
+
/**
|
|
3591
|
+
* Clamp a bucketed effort to the closest value in `supported`. Ties
|
|
3592
|
+
* resolve to the lower-tier option (per EFFORT_ORDER).
|
|
3593
|
+
*
|
|
3594
|
+
* Iterates EFFORT_ORDER (canonical low→xhigh) so the first match on a
|
|
3595
|
+
* given distance is always the lower-tier value, regardless of input
|
|
3596
|
+
* order in `supported`.
|
|
3597
|
+
*/
|
|
3598
|
+
function clampEffort(bucketed, supported) {
|
|
3599
|
+
if (supported.includes(bucketed)) return bucketed;
|
|
3600
|
+
const targetIdx = EFFORT_ORDER.indexOf(bucketed);
|
|
3601
|
+
let best;
|
|
3602
|
+
let bestDist = Infinity;
|
|
3603
|
+
for (let i = 0; i < EFFORT_ORDER.length; i++) {
|
|
3604
|
+
const value = EFFORT_ORDER[i];
|
|
3605
|
+
if (!supported.includes(value)) continue;
|
|
3606
|
+
const dist = Math.abs(i - targetIdx);
|
|
3607
|
+
if (dist < bestDist) {
|
|
3608
|
+
bestDist = dist;
|
|
3609
|
+
best = value;
|
|
3610
|
+
}
|
|
3611
|
+
}
|
|
3612
|
+
return best ?? bucketed;
|
|
3613
|
+
}
|
|
3614
|
+
/**
|
|
3615
|
+
* Translate Anthropic-shape `thinking:{type:"enabled", budget_tokens}` to
|
|
3616
|
+
* Copilot-shape `thinking:{type:"adaptive"}` + `output_config.effort`
|
|
3617
|
+
* when the resolved model declares `adaptive_thinking: true`.
|
|
3618
|
+
*
|
|
3619
|
+
* Returns true if the body was modified. No-op when the model doesn't
|
|
3620
|
+
* support adaptive thinking, when thinking is missing/disabled/already
|
|
3621
|
+
* adaptive, or when `body` isn't a plain object. Client-supplied
|
|
3622
|
+
* `output_config.effort` always wins over the bucketed value.
|
|
3623
|
+
*/
|
|
3624
|
+
function translateThinking(body, model) {
|
|
3625
|
+
if (!model?.capabilities?.supports?.adaptive_thinking) return false;
|
|
3626
|
+
const thinking = body.thinking;
|
|
3627
|
+
if (!thinking || typeof thinking !== "object") return false;
|
|
3628
|
+
if (thinking.type !== "enabled") return false;
|
|
3629
|
+
const bucketed = bucketEffort(thinking.budget_tokens);
|
|
3630
|
+
const supported = model.capabilities.supports.reasoning_effort;
|
|
3631
|
+
const effort = Array.isArray(supported) && supported.length > 0 ? clampEffort(bucketed, supported) : bucketed;
|
|
3632
|
+
body.thinking = { type: "adaptive" };
|
|
3633
|
+
const existing = body.output_config && typeof body.output_config === "object" ? body.output_config : {};
|
|
3634
|
+
body.output_config = {
|
|
3635
|
+
...existing,
|
|
3636
|
+
effort: existing.effort ?? effort
|
|
1800
3637
|
};
|
|
3638
|
+
return true;
|
|
1801
3639
|
}
|
|
1802
3640
|
/**
|
|
1803
3641
|
* Strip the `scope` field from all `cache_control` objects in the body.
|
|
@@ -1864,21 +3702,18 @@ const modelRoutes = new Hono();
|
|
|
1864
3702
|
modelRoutes.get("/", async (c) => {
|
|
1865
3703
|
try {
|
|
1866
3704
|
if (!state.models) await cacheModels();
|
|
1867
|
-
const models = state.models?.data.map((model) =>
|
|
1868
|
-
|
|
1869
|
-
|
|
1870
|
-
|
|
1871
|
-
|
|
1872
|
-
|
|
1873
|
-
|
|
1874
|
-
|
|
1875
|
-
|
|
1876
|
-
|
|
1877
|
-
|
|
1878
|
-
|
|
1879
|
-
model_picker_enabled: model.model_picker_enabled,
|
|
1880
|
-
policy: model.policy
|
|
1881
|
-
}));
|
|
3705
|
+
const models = state.models?.data.map((model) => {
|
|
3706
|
+
const { requestHeaders,...rest } = model;
|
|
3707
|
+
return {
|
|
3708
|
+
...rest,
|
|
3709
|
+
object: "model",
|
|
3710
|
+
type: model.capabilities?.type ?? "model",
|
|
3711
|
+
created: 0,
|
|
3712
|
+
created_at: (/* @__PURE__ */ new Date(0)).toISOString(),
|
|
3713
|
+
owned_by: model.vendor,
|
|
3714
|
+
display_name: model.name
|
|
3715
|
+
};
|
|
3716
|
+
});
|
|
1882
3717
|
return c.json({
|
|
1883
3718
|
object: "list",
|
|
1884
3719
|
data: models,
|
|
@@ -1889,75 +3724,16 @@ modelRoutes.get("/", async (c) => {
|
|
|
1889
3724
|
}
|
|
1890
3725
|
});
|
|
1891
3726
|
|
|
1892
|
-
//#endregion
|
|
1893
|
-
//#region src/services/copilot/create-responses.ts
|
|
1894
|
-
const createResponses = async (payload, modelHeaders) => {
|
|
1895
|
-
if (!state.copilotToken) throw new Error("Copilot token not found");
|
|
1896
|
-
const enableVision = detectVision(payload.input);
|
|
1897
|
-
const isAgentCall = detectAgentCall(payload.input);
|
|
1898
|
-
const headers = {
|
|
1899
|
-
...copilotHeaders(state, enableVision),
|
|
1900
|
-
...modelHeaders,
|
|
1901
|
-
"X-Initiator": isAgentCall ? "agent" : "user"
|
|
1902
|
-
};
|
|
1903
|
-
const filteredPayload = filterUnsupportedTools(payload);
|
|
1904
|
-
const response = await fetch(`${copilotBaseUrl(state)}/responses`, {
|
|
1905
|
-
method: "POST",
|
|
1906
|
-
headers,
|
|
1907
|
-
body: JSON.stringify(filteredPayload)
|
|
1908
|
-
});
|
|
1909
|
-
if (!response.ok) {
|
|
1910
|
-
consola.error("Failed to create responses", response);
|
|
1911
|
-
throw new HTTPError("Failed to create responses", response);
|
|
1912
|
-
}
|
|
1913
|
-
if (payload.stream) return events(response);
|
|
1914
|
-
return await response.json();
|
|
1915
|
-
};
|
|
1916
|
-
function detectVision(input) {
|
|
1917
|
-
if (typeof input === "string") return false;
|
|
1918
|
-
if (!Array.isArray(input)) return false;
|
|
1919
|
-
return input.some((item) => {
|
|
1920
|
-
if ("content" in item && Array.isArray(item.content)) return item.content.some((part) => part.type === "input_image");
|
|
1921
|
-
return false;
|
|
1922
|
-
});
|
|
1923
|
-
}
|
|
1924
|
-
function detectAgentCall(input) {
|
|
1925
|
-
if (typeof input === "string") return false;
|
|
1926
|
-
if (!Array.isArray(input)) return false;
|
|
1927
|
-
return input.some((item) => {
|
|
1928
|
-
if ("role" in item && item.role === "assistant") return true;
|
|
1929
|
-
if ("type" in item && (item.type === "function_call" || item.type === "function_call_output")) return true;
|
|
1930
|
-
return false;
|
|
1931
|
-
});
|
|
1932
|
-
}
|
|
1933
|
-
function filterUnsupportedTools(payload) {
|
|
1934
|
-
if (!payload.tools || !Array.isArray(payload.tools)) return payload;
|
|
1935
|
-
const supported = payload.tools.filter((tool) => {
|
|
1936
|
-
const isSupported = tool.type === "function";
|
|
1937
|
-
if (!isSupported) consola.debug(`Stripping unsupported tool type: ${tool.type}`);
|
|
1938
|
-
return isSupported;
|
|
1939
|
-
});
|
|
1940
|
-
let toolChoice = payload.tool_choice;
|
|
1941
|
-
if (supported.length === 0) toolChoice = void 0;
|
|
1942
|
-
else if (toolChoice && typeof toolChoice === "object") {
|
|
1943
|
-
const supportedNames = new Set(supported.map((tool) => tool.name).filter(Boolean));
|
|
1944
|
-
const toolChoiceName = getToolChoiceName(toolChoice);
|
|
1945
|
-
if (toolChoiceName && !supportedNames.has(toolChoiceName)) toolChoice = void 0;
|
|
1946
|
-
}
|
|
1947
|
-
return {
|
|
1948
|
-
...payload,
|
|
1949
|
-
tools: supported.length > 0 ? supported : void 0,
|
|
1950
|
-
tool_choice: toolChoice
|
|
1951
|
-
};
|
|
1952
|
-
}
|
|
1953
|
-
function getToolChoiceName(toolChoice) {
|
|
1954
|
-
if (typeof toolChoice !== "object") return void 0;
|
|
1955
|
-
if ("function" in toolChoice && toolChoice.function && typeof toolChoice.function === "object") return toolChoice.function.name;
|
|
1956
|
-
if ("name" in toolChoice) return toolChoice.name;
|
|
1957
|
-
}
|
|
1958
|
-
|
|
1959
3727
|
//#endregion
|
|
1960
3728
|
//#region src/routes/responses/handler.ts
|
|
3729
|
+
const ENCODER = new TextEncoder();
|
|
3730
|
+
function formatSSE(chunk) {
|
|
3731
|
+
const parts = [];
|
|
3732
|
+
if (chunk.event) parts.push(`event: ${chunk.event}`);
|
|
3733
|
+
if (chunk.data !== void 0) for (const line of String(chunk.data).split(/\r\n|\r|\n/)) parts.push(`data: ${line}`);
|
|
3734
|
+
if (chunk.id !== void 0) parts.push(`id: ${String(chunk.id)}`);
|
|
3735
|
+
return parts.join("\n") + "\n\n";
|
|
3736
|
+
}
|
|
1961
3737
|
async function handleResponses(c) {
|
|
1962
3738
|
const startTime = Date.now();
|
|
1963
3739
|
await checkRateLimit(state);
|
|
@@ -1998,16 +3774,106 @@ async function handleResponses(c) {
|
|
|
1998
3774
|
if (debugEnabled) consola.debug("Non-streaming response:", JSON.stringify(response));
|
|
1999
3775
|
return c.json(response);
|
|
2000
3776
|
}
|
|
2001
|
-
|
|
2002
|
-
|
|
2003
|
-
|
|
2004
|
-
|
|
2005
|
-
|
|
2006
|
-
|
|
2007
|
-
|
|
2008
|
-
|
|
2009
|
-
|
|
2010
|
-
|
|
3777
|
+
const iterator = response[Symbol.asyncIterator]();
|
|
3778
|
+
let firstChunk;
|
|
3779
|
+
let upstreamFinished = false;
|
|
3780
|
+
while (true) {
|
|
3781
|
+
const r = await iterator.next();
|
|
3782
|
+
if (r.done) {
|
|
3783
|
+
upstreamFinished = true;
|
|
3784
|
+
break;
|
|
3785
|
+
}
|
|
3786
|
+
if (r.value === void 0 || r.value === null) continue;
|
|
3787
|
+
if (r.value.data === "[DONE]") {
|
|
3788
|
+
upstreamFinished = true;
|
|
3789
|
+
break;
|
|
3790
|
+
}
|
|
3791
|
+
if (!r.value.data) continue;
|
|
3792
|
+
firstChunk = r.value;
|
|
3793
|
+
break;
|
|
3794
|
+
}
|
|
3795
|
+
if (firstChunk === void 0) consola.warn(`Upstream /responses returned no payload events at ${c.req.path}`);
|
|
3796
|
+
let pendingFirstChunk = firstChunk;
|
|
3797
|
+
let consumerCancelled = false;
|
|
3798
|
+
const safeClose = (controller) => {
|
|
3799
|
+
try {
|
|
3800
|
+
controller.close();
|
|
3801
|
+
} catch {}
|
|
3802
|
+
};
|
|
3803
|
+
const releaseUpstream = (reason) => {
|
|
3804
|
+
if (typeof iterator.return === "function") iterator.return(reason).catch(() => {});
|
|
3805
|
+
};
|
|
3806
|
+
const safeEnqueue = (controller, bytes) => {
|
|
3807
|
+
try {
|
|
3808
|
+
controller.enqueue(bytes);
|
|
3809
|
+
return true;
|
|
3810
|
+
} catch (e) {
|
|
3811
|
+
if (isControllerClosedError(e)) {
|
|
3812
|
+
consumerCancelled = true;
|
|
3813
|
+
releaseUpstream(e);
|
|
3814
|
+
return false;
|
|
3815
|
+
}
|
|
3816
|
+
throw e;
|
|
3817
|
+
}
|
|
3818
|
+
};
|
|
3819
|
+
return new Response(new ReadableStream({
|
|
3820
|
+
async pull(controller) {
|
|
3821
|
+
if (consumerCancelled || upstreamFinished) {
|
|
3822
|
+
safeClose(controller);
|
|
3823
|
+
return;
|
|
3824
|
+
}
|
|
3825
|
+
if (pendingFirstChunk !== void 0) {
|
|
3826
|
+
const chunk = pendingFirstChunk;
|
|
3827
|
+
pendingFirstChunk = void 0;
|
|
3828
|
+
if (debugEnabled) consola.debug("Streaming chunk:", JSON.stringify(chunk));
|
|
3829
|
+
safeEnqueue(controller, ENCODER.encode(formatSSE(chunk)));
|
|
3830
|
+
return;
|
|
3831
|
+
}
|
|
3832
|
+
try {
|
|
3833
|
+
const result = await iterator.next();
|
|
3834
|
+
if (consumerCancelled) {
|
|
3835
|
+
safeClose(controller);
|
|
3836
|
+
return;
|
|
3837
|
+
}
|
|
3838
|
+
if (result.done) {
|
|
3839
|
+
upstreamFinished = true;
|
|
3840
|
+
safeClose(controller);
|
|
3841
|
+
return;
|
|
3842
|
+
}
|
|
3843
|
+
if (result.value === void 0 || result.value === null) return;
|
|
3844
|
+
if (result.value.data === "[DONE]") {
|
|
3845
|
+
upstreamFinished = true;
|
|
3846
|
+
safeClose(controller);
|
|
3847
|
+
return;
|
|
3848
|
+
}
|
|
3849
|
+
if (!result.value.data) return;
|
|
3850
|
+
if (debugEnabled) consola.debug("Streaming chunk:", JSON.stringify(result.value));
|
|
3851
|
+
safeEnqueue(controller, ENCODER.encode(formatSSE(result.value)));
|
|
3852
|
+
} catch (error) {
|
|
3853
|
+
upstreamFinished = true;
|
|
3854
|
+
if (consumerCancelled) {
|
|
3855
|
+
releaseUpstream(error);
|
|
3856
|
+
safeClose(controller);
|
|
3857
|
+
return;
|
|
3858
|
+
}
|
|
3859
|
+
const { errName, errMessage } = logStreamError(c.req.path, error);
|
|
3860
|
+
safeEnqueue(controller, ENCODER.encode(buildOpenAIErrorEvent(errName, errMessage)));
|
|
3861
|
+
releaseUpstream(error);
|
|
3862
|
+
safeClose(controller);
|
|
3863
|
+
}
|
|
3864
|
+
},
|
|
3865
|
+
cancel() {
|
|
3866
|
+
consumerCancelled = true;
|
|
3867
|
+
upstreamFinished = true;
|
|
3868
|
+
releaseUpstream();
|
|
3869
|
+
}
|
|
3870
|
+
}), {
|
|
3871
|
+
status: 200,
|
|
3872
|
+
headers: {
|
|
3873
|
+
"content-type": "text/event-stream",
|
|
3874
|
+
"cache-control": "no-cache",
|
|
3875
|
+
"transfer-encoding": "chunked",
|
|
3876
|
+
connection: "keep-alive"
|
|
2011
3877
|
}
|
|
2012
3878
|
});
|
|
2013
3879
|
}
|
|
@@ -2018,8 +3884,7 @@ async function injectWebSearchIfNeeded(payload) {
|
|
|
2018
3884
|
if (payload.input.some((item) => item.type === "function_call_output")) return;
|
|
2019
3885
|
}
|
|
2020
3886
|
const query = extractUserQuery(payload.input);
|
|
2021
|
-
if (
|
|
2022
|
-
try {
|
|
3887
|
+
if (query) try {
|
|
2023
3888
|
const results = await searchWeb(query);
|
|
2024
3889
|
const searchContext = [
|
|
2025
3890
|
"[Web Search Results]",
|
|
@@ -2032,6 +3897,13 @@ async function injectWebSearchIfNeeded(payload) {
|
|
|
2032
3897
|
} catch (error) {
|
|
2033
3898
|
consola.warn("Web search failed, continuing without results:", error);
|
|
2034
3899
|
}
|
|
3900
|
+
payload.tools = payload.tools?.filter((t) => t.type !== "web_search");
|
|
3901
|
+
if (payload.tools && payload.tools.length === 0) payload.tools = void 0;
|
|
3902
|
+
if (!payload.tools) payload.tool_choice = void 0;
|
|
3903
|
+
else if (payload.tool_choice && typeof payload.tool_choice === "object") {
|
|
3904
|
+
const choice = payload.tool_choice;
|
|
3905
|
+
if ((choice.function?.name ?? choice.name) === "web_search") payload.tool_choice = void 0;
|
|
3906
|
+
}
|
|
2035
3907
|
}
|
|
2036
3908
|
function extractUserQuery(input) {
|
|
2037
3909
|
if (typeof input === "string") return input;
|
|
@@ -2203,6 +4075,11 @@ usageRoute.get("/", async (c) => {
|
|
|
2203
4075
|
const server = new Hono();
|
|
2204
4076
|
server.use(cors());
|
|
2205
4077
|
server.get("/", (c) => c.text("Server running"));
|
|
4078
|
+
server.get("/version", (c) => c.json({
|
|
4079
|
+
name,
|
|
4080
|
+
version,
|
|
4081
|
+
gitSha: process.env.GITHUB_SHA ?? "unknown"
|
|
4082
|
+
}));
|
|
2206
4083
|
server.on("HEAD", ["/"], (c) => c.body(null, 200));
|
|
2207
4084
|
server.route("/chat/completions", completionRoutes);
|
|
2208
4085
|
server.route("/responses", responsesRoutes);
|
|
@@ -2217,6 +4094,8 @@ server.route("/v1/models", modelRoutes);
|
|
|
2217
4094
|
server.route("/v1/embeddings", embeddingRoutes);
|
|
2218
4095
|
server.route("/v1/search", searchRoutes);
|
|
2219
4096
|
server.route("/v1/messages", messageRoutes);
|
|
4097
|
+
server.route("/mcp", mcpRoutes);
|
|
4098
|
+
server.post("/api/event_logging/batch", (c) => c.body(null, 200));
|
|
2220
4099
|
server.notFound((c) => c.json({
|
|
2221
4100
|
type: "error",
|
|
2222
4101
|
error: {
|
|
@@ -2382,22 +4261,73 @@ function parseSharedArgs(args) {
|
|
|
2382
4261
|
extendedBetas: args["extended-betas"]
|
|
2383
4262
|
};
|
|
2384
4263
|
}
|
|
2385
|
-
/**
|
|
4264
|
+
/**
|
|
4265
|
+
* Build environment variables for Claude Code.
|
|
4266
|
+
*
|
|
4267
|
+
* The parent env is sanitized of every key in `STRIPPED_PARENT_ENV_KEYS`
|
|
4268
|
+
* (see `src/lib/launch.ts`) BEFORE these overrides are merged in, so we
|
|
4269
|
+
* only need to provide the positive values.
|
|
4270
|
+
*
|
|
4271
|
+
* Auth precedence in Claude Code (https://code.claude.com/docs/en/iam):
|
|
4272
|
+
* 1. Cloud provider (CLAUDE_CODE_USE_BEDROCK / VERTEX / FOUNDRY) — stripped at parent.
|
|
4273
|
+
* 2. ANTHROPIC_AUTH_TOKEN — set here to "dummy"; wins over #4–#6.
|
|
4274
|
+
* 3. ANTHROPIC_API_KEY — stripped at parent, intentionally NOT re-set
|
|
4275
|
+
* (Claude Code emits an Auth conflict warning when both AUTH_TOKEN
|
|
4276
|
+
* and API_KEY are present, even with dummy values).
|
|
4277
|
+
* 4. apiKeyHelper in settings.json — beaten by #2.
|
|
4278
|
+
* 5. CLAUDE_CODE_OAUTH_TOKEN — stripped at parent.
|
|
4279
|
+
* 6. Subscription OAuth (Keychain / ~/.claude/.credentials.json) —
|
|
4280
|
+
* INVISIBLE to the spawned child via the CLAUDE_CONFIG_DIR trick
|
|
4281
|
+
* below. The credential file is left in place so `claude /logout`
|
|
4282
|
+
* still works outside the proxy.
|
|
4283
|
+
*
|
|
4284
|
+
* `CLAUDE_CONFIG_DIR` activates Claude Code's per-config-dir keychain
|
|
4285
|
+
* isolation. Per binary-grep of Claude Code 2.1.126's `iN()` function:
|
|
4286
|
+
*
|
|
4287
|
+
* function iN(H = "") {
|
|
4288
|
+
* let _ = B6(), // resolved config-dir path
|
|
4289
|
+
* K = !process.env.CLAUDE_CONFIG_DIR ? "" : `-${sha256(_).slice(0, 8)}`;
|
|
4290
|
+
* return `Claude Code${OAUTH_FILE_SUFFIX}${H}${K}`
|
|
4291
|
+
* }
|
|
4292
|
+
*
|
|
4293
|
+
* The conditional is on PRESENCE, not value. When CLAUDE_CONFIG_DIR is
|
|
4294
|
+
* unset (the user's normal `claude` usage), the keychain service name is
|
|
4295
|
+
* "Claude Code" and their `/login` credential is found there. When set
|
|
4296
|
+
* (the proxy session), the service name becomes "Claude Code-<hash>" —
|
|
4297
|
+
* the user's credential is invisible, `iCH()` returns null, and all
|
|
4298
|
+
* three auth-conflict warnings fire `false`. The path resolves to the
|
|
4299
|
+
* default config-dir, so settings.json/skills/MCP/plugins/hooks/CLAUDE.md
|
|
4300
|
+
* still load from `~/.claude` as normal.
|
|
4301
|
+
*/
|
|
2386
4302
|
function getClaudeCodeEnvVars(serverUrl, model) {
|
|
2387
4303
|
const vars = {
|
|
2388
4304
|
ANTHROPIC_BASE_URL: serverUrl,
|
|
2389
4305
|
ANTHROPIC_AUTH_TOKEN: "dummy",
|
|
4306
|
+
CLAUDE_CONFIG_DIR: path.join(os.homedir(), ".claude"),
|
|
4307
|
+
MCP_TIMEOUT: "600000",
|
|
2390
4308
|
DISABLE_NON_ESSENTIAL_MODEL_CALLS: "1",
|
|
2391
4309
|
CLAUDE_CODE_DISABLE_NONESSENTIAL_TRAFFIC: "1"
|
|
2392
4310
|
};
|
|
2393
4311
|
if (model) vars.ANTHROPIC_MODEL = model;
|
|
2394
4312
|
return vars;
|
|
2395
4313
|
}
|
|
2396
|
-
/**
|
|
4314
|
+
/**
|
|
4315
|
+
* Build environment variables for Codex CLI.
|
|
4316
|
+
*
|
|
4317
|
+
* Like `getClaudeCodeEnvVars`, the parent env is sanitized of
|
|
4318
|
+
* `OPENAI_API_KEY` / `OPENAI_BASE_URL` / `CODEX_HOME` (see
|
|
4319
|
+
* `STRIPPED_PARENT_ENV_KEYS` in `src/lib/launch.ts`) before these
|
|
4320
|
+
* overrides are merged, so a stale shell `OPENAI_API_KEY` can't leak
|
|
4321
|
+
* through. Codex caches a ChatGPT subscription login under
|
|
4322
|
+
* `$CODEX_HOME/auth.json` which can override `OPENAI_API_KEY` per
|
|
4323
|
+
* openai/codex#2733; pointing `CODEX_HOME` at an isolated directory
|
|
4324
|
+
* masks any cached login.
|
|
4325
|
+
*/
|
|
2397
4326
|
function getCodexEnvVars(serverUrl) {
|
|
2398
4327
|
return {
|
|
2399
4328
|
OPENAI_BASE_URL: `${serverUrl}/v1`,
|
|
2400
|
-
OPENAI_API_KEY: "dummy"
|
|
4329
|
+
OPENAI_API_KEY: "dummy",
|
|
4330
|
+
CODEX_HOME: PATHS.CODEX_HOME
|
|
2401
4331
|
};
|
|
2402
4332
|
}
|
|
2403
4333
|
|
|
@@ -2414,6 +4344,21 @@ const claude = defineCommand({
|
|
|
2414
4344
|
alias: "m",
|
|
2415
4345
|
type: "string",
|
|
2416
4346
|
description: "Override the default model for Claude Code"
|
|
4347
|
+
},
|
|
4348
|
+
"codex-mcp": {
|
|
4349
|
+
type: "boolean",
|
|
4350
|
+
default: true,
|
|
4351
|
+
description: "Wire peer-model MCP personas (codex-critic, codex-reviewer, gemini-critic) into the spawned Claude Code session"
|
|
4352
|
+
},
|
|
4353
|
+
"codex-cli": {
|
|
4354
|
+
type: "boolean",
|
|
4355
|
+
default: false,
|
|
4356
|
+
description: "Add a `codex mcp-server` stdio backend so codex-implementer can mutate files. Requires codex CLI 0.129+; gracefully falls back to HTTP-only if absent."
|
|
4357
|
+
},
|
|
4358
|
+
"codex-mcp-only": {
|
|
4359
|
+
type: "boolean",
|
|
4360
|
+
default: false,
|
|
4361
|
+
description: "Pass --strict-mcp-config to claude code so only github-router's MCP servers are loaded (hides user's existing MCP servers)"
|
|
2417
4362
|
}
|
|
2418
4363
|
},
|
|
2419
4364
|
async run({ args }) {
|
|
@@ -2437,22 +4382,57 @@ const claude = defineCommand({
|
|
|
2437
4382
|
process$1.exit(1);
|
|
2438
4383
|
}
|
|
2439
4384
|
enableFileLogging();
|
|
2440
|
-
|
|
2441
|
-
|
|
2442
|
-
|
|
2443
|
-
|
|
2444
|
-
|
|
2445
|
-
|
|
2446
|
-
|
|
4385
|
+
const usingDefault = !args.model;
|
|
4386
|
+
let chosenSlug = args.model ?? DEFAULT_CLAUDE_MODEL;
|
|
4387
|
+
let resolvedSlug = resolveModel(chosenSlug);
|
|
4388
|
+
if (usingDefault && state.models) {
|
|
4389
|
+
const inCache = (slug) => state.models?.data.some((m) => m.id === resolveModel(slug)) ?? false;
|
|
4390
|
+
if (!inCache(chosenSlug)) {
|
|
4391
|
+
for (const fallback of DEFAULT_CLAUDE_MODEL_FALLBACKS) if (inCache(fallback)) {
|
|
4392
|
+
consola.info(`Default model "${chosenSlug}" not in your Copilot model list; falling back to "${fallback}".`);
|
|
4393
|
+
chosenSlug = fallback;
|
|
4394
|
+
resolvedSlug = resolveModel(fallback);
|
|
4395
|
+
break;
|
|
4396
|
+
}
|
|
2447
4397
|
}
|
|
2448
4398
|
}
|
|
2449
|
-
|
|
4399
|
+
if (resolvedSlug !== chosenSlug) consola.info(`Model "${chosenSlug}" resolved to "${resolvedSlug}"`);
|
|
4400
|
+
if (!state.models?.data.find((m) => m.id === resolvedSlug)) {
|
|
4401
|
+
const available = listModelsForEndpoint("/v1/messages");
|
|
4402
|
+
consola.warn(`Model "${resolvedSlug}" not found. Available claude models: ${available.join(", ")}`);
|
|
4403
|
+
}
|
|
4404
|
+
const banner = chosenSlug === resolvedSlug ? chosenSlug : `${chosenSlug} → ${resolvedSlug}`;
|
|
4405
|
+
process$1.stderr.write(`Server ready on ${serverUrl}, launching Claude Code (${banner})...\n`);
|
|
4406
|
+
const envVars = getClaudeCodeEnvVars(serverUrl, chosenSlug);
|
|
4407
|
+
const extraArgs = args._ ?? [];
|
|
4408
|
+
let onShutdown;
|
|
4409
|
+
if (args["codex-mcp"] !== false) try {
|
|
4410
|
+
const requestedCli = args["codex-cli"] ?? false;
|
|
4411
|
+
const backend = resolveCodexCliBackend({
|
|
4412
|
+
requested: requestedCli,
|
|
4413
|
+
codexInfo: requestedCli ? getCodexVersion() : null
|
|
4414
|
+
});
|
|
4415
|
+
const geminiAvailable$1 = state.models?.data.some((m) => /^gemini-3\..*pro/i.test(m.id)) ?? false;
|
|
4416
|
+
if (!geminiAvailable$1) consola.info("gemini-3.1-pro-preview not found in your Copilot model catalog; gemini-critic persona will not be registered.");
|
|
4417
|
+
const runtime = await writePeerMcpRuntimeFiles(serverUrl, {
|
|
4418
|
+
codexCli: backend === "cli",
|
|
4419
|
+
geminiAvailable: geminiAvailable$1
|
|
4420
|
+
});
|
|
4421
|
+
state.peerMcpNonce = runtime.nonce;
|
|
4422
|
+
onShutdown = runtime.cleanup;
|
|
4423
|
+
extraArgs.push("--mcp-config", runtime.mcpConfigPath);
|
|
4424
|
+
if (args["codex-mcp-only"] === true) extraArgs.push("--strict-mcp-config");
|
|
4425
|
+
const personaNames = runtime.personas.map((p) => p.agentName).join(", ");
|
|
4426
|
+
process$1.stderr.write(`Peer MCP wired (backend=${backend}, personas=[${personaNames}], subagent .md files=${runtime.agentMdPaths.length}).\n`);
|
|
4427
|
+
} catch (err) {
|
|
4428
|
+
consola.warn(`Peer MCP wiring failed (claude will launch without it): ${err instanceof Error ? err.message : String(err)}`);
|
|
4429
|
+
}
|
|
2450
4430
|
launchChild({
|
|
2451
4431
|
kind: "claude-code",
|
|
2452
|
-
envVars
|
|
2453
|
-
extraArgs
|
|
2454
|
-
model:
|
|
2455
|
-
}, server$1);
|
|
4432
|
+
envVars,
|
|
4433
|
+
extraArgs,
|
|
4434
|
+
model: chosenSlug
|
|
4435
|
+
}, server$1, { onShutdown });
|
|
2456
4436
|
}
|
|
2457
4437
|
});
|
|
2458
4438
|
|
|
@@ -2491,10 +4471,22 @@ const codex = defineCommand({
|
|
|
2491
4471
|
consola.error("Failed to start server:", error instanceof Error ? error.message : error);
|
|
2492
4472
|
process$1.exit(1);
|
|
2493
4473
|
}
|
|
4474
|
+
const usingDefault = !args.model;
|
|
2494
4475
|
const requestedModel = args.model ?? DEFAULT_CODEX_MODEL;
|
|
2495
4476
|
enableFileLogging();
|
|
2496
|
-
|
|
4477
|
+
let codexModel = resolveCodexModel(requestedModel);
|
|
2497
4478
|
if (codexModel !== requestedModel) consola.info(`Model "${requestedModel}" resolved to "${codexModel}"`);
|
|
4479
|
+
if (usingDefault && state.models) {
|
|
4480
|
+
const inCache = (id) => state.models?.data.some((m) => m.id === id) ?? false;
|
|
4481
|
+
if (!inCache(codexModel)) for (const fallback of DEFAULT_CODEX_MODEL_FALLBACKS) {
|
|
4482
|
+
const resolved = resolveCodexModel(fallback);
|
|
4483
|
+
if (inCache(resolved)) {
|
|
4484
|
+
consola.info(`Default model "${codexModel}" not in your Copilot model list; falling back to "${resolved}".`);
|
|
4485
|
+
codexModel = resolved;
|
|
4486
|
+
break;
|
|
4487
|
+
}
|
|
4488
|
+
}
|
|
4489
|
+
}
|
|
2498
4490
|
const modelEntry = state.models?.data.find((m) => m.id === codexModel);
|
|
2499
4491
|
if (!modelEntry) {
|
|
2500
4492
|
const available = listModelsForEndpoint("/responses");
|
|
@@ -2508,7 +4500,8 @@ const codex = defineCommand({
|
|
|
2508
4500
|
kind: "codex",
|
|
2509
4501
|
envVars: getCodexEnvVars(serverUrl),
|
|
2510
4502
|
extraArgs: args._ ?? [],
|
|
2511
|
-
model: codexModel
|
|
4503
|
+
model: codexModel,
|
|
4504
|
+
serverUrl
|
|
2512
4505
|
}, server$1);
|
|
2513
4506
|
}
|
|
2514
4507
|
});
|
|
@@ -2541,9 +4534,9 @@ async function checkTokenExists() {
|
|
|
2541
4534
|
}
|
|
2542
4535
|
}
|
|
2543
4536
|
async function getDebugInfo() {
|
|
2544
|
-
const [version, tokenExists] = await Promise.all([getPackageVersion(), checkTokenExists()]);
|
|
4537
|
+
const [version$1, tokenExists] = await Promise.all([getPackageVersion(), checkTokenExists()]);
|
|
2545
4538
|
return {
|
|
2546
|
-
version,
|
|
4539
|
+
version: version$1,
|
|
2547
4540
|
runtime: getRuntimeInfo(),
|
|
2548
4541
|
paths: {
|
|
2549
4542
|
APP_DIR: PATHS.APP_DIR,
|