github-router 0.3.45 → 0.3.52
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/browser-bridge/index.js +22 -1
- package/dist/browser-ext/background.js +351 -77
- package/dist/browser-ext/manifest.json +4 -2
- package/dist/main.js +1185 -432
- package/dist/main.js.map +1 -1
- package/package.json +1 -1
package/dist/main.js
CHANGED
|
@@ -8,7 +8,7 @@ import { createHash, randomBytes, randomUUID, timingSafeEqual } from "node:crypt
|
|
|
8
8
|
import fs, { readFile, stat } from "node:fs/promises";
|
|
9
9
|
import os, { homedir, platform } from "node:os";
|
|
10
10
|
import * as path$1 from "node:path";
|
|
11
|
-
import path from "node:path";
|
|
11
|
+
import path, { dirname, join } from "node:path";
|
|
12
12
|
import process$1 from "node:process";
|
|
13
13
|
import { execFile, execFileSync, spawn, spawnSync } from "node:child_process";
|
|
14
14
|
import { promisify } from "node:util";
|
|
@@ -17,13 +17,13 @@ import { createInterface } from "node:readline";
|
|
|
17
17
|
import Parser from "web-tree-sitter";
|
|
18
18
|
import WebSocket from "ws";
|
|
19
19
|
import { fileURLToPath } from "node:url";
|
|
20
|
+
import { events } from "fetch-event-stream";
|
|
20
21
|
import { Type } from "typebox";
|
|
21
22
|
import "partial-json";
|
|
22
23
|
import { Compile } from "typebox/compile";
|
|
23
24
|
import { Value } from "typebox/value";
|
|
24
25
|
import "yaml";
|
|
25
26
|
import "ignore";
|
|
26
|
-
import { events } from "fetch-event-stream";
|
|
27
27
|
import { z } from "zod";
|
|
28
28
|
import { Writable } from "node:stream";
|
|
29
29
|
import { serve } from "srvx";
|
|
@@ -62,14 +62,14 @@ function copilotVersion(state$1) {
|
|
|
62
62
|
const API_VERSION = "2026-01-09";
|
|
63
63
|
const copilotBaseUrl = (state$1) => state$1.copilotApiUrl ?? "https://api.githubcopilot.com";
|
|
64
64
|
const copilotHeaders = (state$1, vision = false, integrationId = "vscode-chat") => {
|
|
65
|
-
const version$
|
|
65
|
+
const version$2 = copilotVersion(state$1);
|
|
66
66
|
const headers = {
|
|
67
67
|
Authorization: `Bearer ${state$1.copilotToken}`,
|
|
68
68
|
"content-type": standardHeaders()["content-type"],
|
|
69
69
|
"copilot-integration-id": integrationId,
|
|
70
70
|
"editor-version": `vscode/${state$1.vsCodeVersion}`,
|
|
71
|
-
"editor-plugin-version": `copilot-chat/${version$
|
|
72
|
-
"user-agent": `GitHubCopilotChat/${version$
|
|
71
|
+
"editor-plugin-version": `copilot-chat/${version$2}`,
|
|
72
|
+
"user-agent": `GitHubCopilotChat/${version$2}`,
|
|
73
73
|
"openai-intent": "conversation-panel",
|
|
74
74
|
"x-interaction-type": "conversation-panel",
|
|
75
75
|
"x-github-api-version": API_VERSION,
|
|
@@ -538,9 +538,9 @@ const cacheVSCodeVersion = async () => {
|
|
|
538
538
|
consola.info(`Using VSCode version: ${response}`);
|
|
539
539
|
};
|
|
540
540
|
const cacheCopilotVersion = async () => {
|
|
541
|
-
const version$
|
|
542
|
-
state.copilotVersion = version$
|
|
543
|
-
consola.info(`Using Copilot Chat version: ${version$
|
|
541
|
+
const version$2 = await getCopilotChatVersion();
|
|
542
|
+
state.copilotVersion = version$2;
|
|
543
|
+
consola.info(`Using Copilot Chat version: ${version$2}`);
|
|
544
544
|
};
|
|
545
545
|
|
|
546
546
|
//#endregion
|
|
@@ -1117,10 +1117,10 @@ function getCodexVersion() {
|
|
|
1117
1117
|
};
|
|
1118
1118
|
const major = Number.parseInt(m[1], 10);
|
|
1119
1119
|
const minor = Number.parseInt(m[2], 10);
|
|
1120
|
-
const version$
|
|
1120
|
+
const version$2 = `${m[1]}.${m[2]}.${m[3]}`;
|
|
1121
1121
|
return {
|
|
1122
1122
|
ok: major > 0 || major === 0 && minor >= 129,
|
|
1123
|
-
version: version$
|
|
1123
|
+
version: version$2
|
|
1124
1124
|
};
|
|
1125
1125
|
}
|
|
1126
1126
|
/**
|
|
@@ -2471,6 +2471,33 @@ function round4(x) {
|
|
|
2471
2471
|
return Math.round(x * 1e4) / 1e4;
|
|
2472
2472
|
}
|
|
2473
2473
|
|
|
2474
|
+
//#endregion
|
|
2475
|
+
//#region src/lib/version.ts
|
|
2476
|
+
/**
|
|
2477
|
+
* Read this binary's published version from package.json at runtime.
|
|
2478
|
+
*
|
|
2479
|
+
* Done at runtime (not baked at build time) because release.yml builds
|
|
2480
|
+
* BEFORE `npm version patch` bumps the version — a build-time inline
|
|
2481
|
+
* would always ship the pre-bump value. The npm tarball ships package.json
|
|
2482
|
+
* alongside `dist/`, so a sibling-up lookup from import.meta.url resolves
|
|
2483
|
+
* cleanly in both dev (`src/lib/`) and bundled (`dist/`) layouts.
|
|
2484
|
+
*
|
|
2485
|
+
* Returns `"unknown"` if package.json can't be located or parsed —
|
|
2486
|
+
* never throws, so the CLI never fails to start over version reporting.
|
|
2487
|
+
*/
|
|
2488
|
+
function getPackageVersion() {
|
|
2489
|
+
try {
|
|
2490
|
+
const here = dirname(fileURLToPath(import.meta.url));
|
|
2491
|
+
const candidates = [join(here, "..", "..", "package.json"), join(here, "..", "package.json")];
|
|
2492
|
+
for (const path$2 of candidates) try {
|
|
2493
|
+
const raw = readFileSync(path$2, "utf8");
|
|
2494
|
+
const parsed = JSON.parse(raw);
|
|
2495
|
+
if (typeof parsed.version === "string" && (parsed.name === "github-router" || parsed.name === "@animeshkundu/github-router")) return parsed.version;
|
|
2496
|
+
} catch {}
|
|
2497
|
+
} catch {}
|
|
2498
|
+
return "unknown";
|
|
2499
|
+
}
|
|
2500
|
+
|
|
2474
2501
|
//#endregion
|
|
2475
2502
|
//#region src/lib/browser-mcp/browser-detect.ts
|
|
2476
2503
|
let cached;
|
|
@@ -2879,16 +2906,94 @@ function loadStableExtensionId() {
|
|
|
2879
2906
|
} catch {}
|
|
2880
2907
|
return "unknown";
|
|
2881
2908
|
}
|
|
2882
|
-
|
|
2909
|
+
/**
|
|
2910
|
+
* Reads the `version` field from the on-disk extension manifest in
|
|
2911
|
+
* extensionDir(). Returns undefined if the file is missing, unreadable,
|
|
2912
|
+
* or doesn't have a string version. Used to detect when the loaded
|
|
2913
|
+
* extension is stale relative to a freshly-updated package.
|
|
2914
|
+
*/
|
|
2915
|
+
function loadExpectedExtensionVersion() {
|
|
2916
|
+
try {
|
|
2917
|
+
const raw = readFileSync(path.join(extensionDir(), "manifest.json"), "utf8");
|
|
2918
|
+
const parsed = JSON.parse(raw);
|
|
2919
|
+
if (typeof parsed.version === "string" && parsed.version.length > 0) return parsed.version;
|
|
2920
|
+
} catch {}
|
|
2921
|
+
}
|
|
2922
|
+
/**
|
|
2923
|
+
* Source-checkout dev sentinel — see scripts/copy-browser-ext.ts. When
|
|
2924
|
+
* extensionDir() resolves to src/browser-ext/ (dev iteration via
|
|
2925
|
+
* GH_ROUTER_BROWSER_EXT_DIR, or the dist fallback when the package
|
|
2926
|
+
* isn't built), the version is "0.0.0" and the auto-reload check is a
|
|
2927
|
+
* no-op: both sides agree, no mismatch, no reload triggered.
|
|
2928
|
+
*/
|
|
2929
|
+
const DEV_VERSION_SENTINEL = "0.0.0";
|
|
2930
|
+
/**
|
|
2931
|
+
* Track which `(extensionId, expectedVersion)` pairs we've already
|
|
2932
|
+
* tried to auto-reload in this process. Prevents an infinite reload
|
|
2933
|
+
* loop if the on-disk version somehow stays ahead of what the browser
|
|
2934
|
+
* picks up (e.g. Chrome disabled the extension after reload because
|
|
2935
|
+
* a new permission was added — the loaded version stays stale).
|
|
2936
|
+
*/
|
|
2937
|
+
const attemptedReloads = /* @__PURE__ */ new Set();
|
|
2938
|
+
/**
|
|
2939
|
+
* Send POST /reload to the bridge — triggers __reload__ control frame
|
|
2940
|
+
* over native messaging, which the extension's handler dispatches into
|
|
2941
|
+
* chrome.runtime.reload(). After this returns, the OLD bridge process
|
|
2942
|
+
* may still be running (its WS clients haven't dropped); the NEW
|
|
2943
|
+
* bridge spawned by Chrome on extension reconnect will overwrite the
|
|
2944
|
+
* discovery file.
|
|
2945
|
+
*/
|
|
2946
|
+
async function postReload(port, token, timeoutMs = 1e3) {
|
|
2947
|
+
const controller = new AbortController();
|
|
2948
|
+
const timer = setTimeout(() => controller.abort(), timeoutMs);
|
|
2949
|
+
try {
|
|
2950
|
+
return (await fetch(`http://127.0.0.1:${port}/reload`, {
|
|
2951
|
+
method: "POST",
|
|
2952
|
+
headers: { authorization: `Bearer ${token}` },
|
|
2953
|
+
signal: controller.signal
|
|
2954
|
+
})).ok;
|
|
2955
|
+
} catch {
|
|
2956
|
+
return false;
|
|
2957
|
+
} finally {
|
|
2958
|
+
clearTimeout(timer);
|
|
2959
|
+
}
|
|
2960
|
+
}
|
|
2961
|
+
/**
|
|
2962
|
+
* After triggering a reload, poll the discovery file + /health until
|
|
2963
|
+
* we see the expected extension version (success) or run out of time
|
|
2964
|
+
* (caller falls back to install_required). Re-reads the discovery file
|
|
2965
|
+
* each cycle because the bridge process changes — old bridge exits
|
|
2966
|
+
* after its grace window, new bridge writes a new discovery file with
|
|
2967
|
+
* new port/token/pid.
|
|
2968
|
+
*/
|
|
2969
|
+
async function pollUntilExtensionVersion(expectedVersion, maxWaitMs, intervalMs) {
|
|
2970
|
+
const deadline = Date.now() + maxWaitMs;
|
|
2971
|
+
while (Date.now() < deadline) {
|
|
2972
|
+
await new Promise((r) => setTimeout(r, intervalMs));
|
|
2973
|
+
const disc = readBridgeDiscovery();
|
|
2974
|
+
if (!disc) continue;
|
|
2975
|
+
const health = await probeHealth(disc.port, disc.token, 500);
|
|
2976
|
+
if (health && health.ok && health.extension_connected && health.extension_loaded_version === expectedVersion) return disc;
|
|
2977
|
+
}
|
|
2978
|
+
}
|
|
2979
|
+
function buildInstallRequired(reason, autoInstalled, versionMismatch) {
|
|
2980
|
+
const instructions = (() => {
|
|
2981
|
+
if (reason === "no_supported_browser") return "No Chrome or Edge installation was detected on this host. Install one and restart the github-router proxy.";
|
|
2982
|
+
if (reason === "bridge_bundle_missing") return "The bridge bundle is missing. Run `bun run build` from the github-router checkout to produce dist/browser-bridge/index.js, then retry.";
|
|
2983
|
+
if (reason === "extension_outdated" && versionMismatch) return `Your loaded github-router browser extension is version ${versionMismatch.loaded} but the github-router package shipped version ${versionMismatch.expected}. Auto-reload was attempted and did not converge — Chrome likely disabled the extension because the new manifest declares new permissions. Open chrome://extensions (or edge://extensions), find the github-router extension card, click "Enable" if it's disabled, then click the reload arrow. Retry this tool call afterwards.`;
|
|
2984
|
+
return "Open chrome://extensions (or edge://extensions), enable Developer Mode, click 'Load unpacked', and select the load_unpacked_dir above. Then retry this tool call. If you just updated the github-router package, an extension already loaded may need to be reloaded — click the reload arrow on its card.";
|
|
2985
|
+
})();
|
|
2883
2986
|
return {
|
|
2884
2987
|
install_required: true,
|
|
2885
2988
|
reason,
|
|
2886
2989
|
auto_installed: autoInstalled,
|
|
2990
|
+
proxy_version: getPackageVersion(),
|
|
2887
2991
|
manual_steps: {
|
|
2888
2992
|
load_unpacked_dir: extensionDir(),
|
|
2889
2993
|
expected_extension_id: loadStableExtensionId(),
|
|
2890
|
-
instructions
|
|
2891
|
-
}
|
|
2994
|
+
instructions
|
|
2995
|
+
},
|
|
2996
|
+
...versionMismatch ? { version_mismatch: versionMismatch } : {}
|
|
2892
2997
|
};
|
|
2893
2998
|
}
|
|
2894
2999
|
/**
|
|
@@ -2929,6 +3034,31 @@ async function _ensureBridgeReadyImpl() {
|
|
|
2929
3034
|
const health = await probeHealth(discovery.port, discovery.token);
|
|
2930
3035
|
if (!health || !health.ok) return buildInstallRequired("bridge_not_running", autoInstalled);
|
|
2931
3036
|
if (!health.extension_connected) return buildInstallRequired("extension_not_loaded", autoInstalled);
|
|
3037
|
+
const expectedVersion = loadExpectedExtensionVersion();
|
|
3038
|
+
const loadedVersion = health.extension_loaded_version;
|
|
3039
|
+
if (typeof expectedVersion === "string" && typeof loadedVersion === "string" && expectedVersion !== DEV_VERSION_SENTINEL && loadedVersion !== DEV_VERSION_SENTINEL && expectedVersion !== loadedVersion) {
|
|
3040
|
+
const reloadKey = `${loadStableExtensionId()}::${expectedVersion}`;
|
|
3041
|
+
if (attemptedReloads.has(reloadKey)) return buildInstallRequired("extension_outdated", autoInstalled, {
|
|
3042
|
+
loaded: loadedVersion,
|
|
3043
|
+
expected: expectedVersion
|
|
3044
|
+
});
|
|
3045
|
+
attemptedReloads.add(reloadKey);
|
|
3046
|
+
if (!await postReload(discovery.port, discovery.token)) return buildInstallRequired("extension_outdated", autoInstalled, {
|
|
3047
|
+
loaded: loadedVersion,
|
|
3048
|
+
expected: expectedVersion
|
|
3049
|
+
});
|
|
3050
|
+
const newDiscovery = await pollUntilExtensionVersion(expectedVersion, 3e3, 150);
|
|
3051
|
+
if (!newDiscovery) return buildInstallRequired("extension_outdated", autoInstalled, {
|
|
3052
|
+
loaded: loadedVersion,
|
|
3053
|
+
expected: expectedVersion
|
|
3054
|
+
});
|
|
3055
|
+
return {
|
|
3056
|
+
install_required: false,
|
|
3057
|
+
port: newDiscovery.port,
|
|
3058
|
+
token: newDiscovery.token,
|
|
3059
|
+
pid: newDiscovery.pid
|
|
3060
|
+
};
|
|
3061
|
+
}
|
|
2932
3062
|
return {
|
|
2933
3063
|
install_required: false,
|
|
2934
3064
|
port: discovery.port,
|
|
@@ -3226,89 +3356,698 @@ function logAudit$1(record) {
|
|
|
3226
3356
|
}
|
|
3227
3357
|
|
|
3228
3358
|
//#endregion
|
|
3229
|
-
//#region src/lib/
|
|
3359
|
+
//#region src/lib/mcp-inflight.ts
|
|
3230
3360
|
/**
|
|
3231
|
-
*
|
|
3232
|
-
* `dispatchBrowserTool()` which (1) runs the bridge-layer URL policy
|
|
3233
|
-
* check, (2) runs the install-check pre-flight (returning structured
|
|
3234
|
-
* install_required JSON when the bridge or extension isn't ready),
|
|
3235
|
-
* and (3) opens a WS to the bridge, sends the tool call, awaits the
|
|
3236
|
-
* response with a per-tool timeout.
|
|
3361
|
+
* Shared concurrency cap for MCP `tools/call` dispatches.
|
|
3237
3362
|
*
|
|
3238
|
-
*
|
|
3239
|
-
*
|
|
3240
|
-
*
|
|
3241
|
-
*
|
|
3363
|
+
* Originally lived as a module-private counter inside
|
|
3364
|
+
* `src/routes/mcp/handler.ts`. Extracted because the worker-agent's
|
|
3365
|
+
* `peer_review` and `advisor` tools (which dispatch to peer-model
|
|
3366
|
+
* personas / the advisor responses endpoint from inside a worker
|
|
3367
|
+
* subagent loop) must participate in the same backpressure budget;
|
|
3368
|
+
* otherwise a single worker can fan out unboundedly to peers and
|
|
3369
|
+
* starve the operator's own `tools/list` callers.
|
|
3242
3370
|
*
|
|
3243
|
-
*
|
|
3371
|
+
* The counter is a single process-wide integer — no per-route
|
|
3372
|
+
* partitioning. Persona calls at the MCP boundary (handler.ts),
|
|
3373
|
+
* peer/advisor calls nested inside a worker (tools.ts), and any
|
|
3374
|
+
* future MCP-adjacent dispatcher all increment the same number.
|
|
3375
|
+
*
|
|
3376
|
+
* Cap = `MAX_INFLIGHT_TOOLS_CALL = 8`. Justification lives at the
|
|
3377
|
+
* historical home (`src/routes/mcp/handler.ts` comment block); do not
|
|
3378
|
+
* change the value without re-reading
|
|
3379
|
+
* `docs/research/peer-mcp-investigation.md` § "Concurrency cap
|
|
3380
|
+
* investigation".
|
|
3244
3381
|
*/
|
|
3245
|
-
const
|
|
3246
|
-
|
|
3247
|
-
|
|
3248
|
-
|
|
3249
|
-
|
|
3250
|
-
|
|
3251
|
-
|
|
3252
|
-
|
|
3253
|
-
|
|
3254
|
-
|
|
3255
|
-
|
|
3256
|
-
|
|
3382
|
+
const MAX_INFLIGHT_TOOLS_CALL = 8;
|
|
3383
|
+
let inFlight$1 = 0;
|
|
3384
|
+
/**
|
|
3385
|
+
* Acquire a slot if one is available. Returns a release function the
|
|
3386
|
+
* caller MUST invoke exactly once (typically from a `finally` block);
|
|
3387
|
+
* returns `null` if the cap is saturated. The release fn is idempotent
|
|
3388
|
+
* — calling it twice is a no-op so callers can release defensively
|
|
3389
|
+
* without worrying about double-decrementing the counter under unusual
|
|
3390
|
+
* unwind paths.
|
|
3391
|
+
*
|
|
3392
|
+
* Synchronous on purpose. Async semaphore acquisition would let callers
|
|
3393
|
+
* queue indefinitely; we want immediate "queue full" feedback so the
|
|
3394
|
+
* MCP client (or the model holding the nested tool call) can choose to
|
|
3395
|
+
* back off or retry.
|
|
3396
|
+
*/
|
|
3397
|
+
function acquireInFlightSlot() {
|
|
3398
|
+
if (inFlight$1 >= MAX_INFLIGHT_TOOLS_CALL) return null;
|
|
3399
|
+
inFlight$1++;
|
|
3400
|
+
let released = false;
|
|
3401
|
+
return () => {
|
|
3402
|
+
if (released) return;
|
|
3403
|
+
released = true;
|
|
3404
|
+
inFlight$1--;
|
|
3405
|
+
};
|
|
3406
|
+
}
|
|
3407
|
+
|
|
3408
|
+
//#endregion
|
|
3409
|
+
//#region src/lib/diagnose-response.ts
|
|
3410
|
+
const PREVIEW_LIMIT = 200;
|
|
3411
|
+
async function parseJsonOrDiagnose(response, routePath) {
|
|
3412
|
+
const cloned = response.clone();
|
|
3413
|
+
try {
|
|
3414
|
+
return await response.json();
|
|
3415
|
+
} catch (error) {
|
|
3416
|
+
const contentType = response.headers.get("content-type") ?? "(none)";
|
|
3417
|
+
const bodyText = await cloned.text().catch(() => "(unreadable)");
|
|
3418
|
+
const preview = bodyText.length > PREVIEW_LIMIT ? bodyText.slice(0, PREVIEW_LIMIT) + "...(truncated)" : bodyText;
|
|
3419
|
+
consola.error(`Upstream JSON parse failed at ${routePath}: status=${response.status} content-type="${contentType}" body[0..${PREVIEW_LIMIT}]=${JSON.stringify(preview)}`);
|
|
3420
|
+
throw error;
|
|
3421
|
+
}
|
|
3422
|
+
}
|
|
3423
|
+
|
|
3424
|
+
//#endregion
|
|
3425
|
+
//#region src/lib/response-cap.ts
|
|
3426
|
+
/**
|
|
3427
|
+
* Hard byte cap for non-streaming upstream response bodies.
|
|
3428
|
+
*
|
|
3429
|
+
* Anthropic responses with large tool_use blocks can legitimately reach
|
|
3430
|
+
* several MB, but a multi-GB body is either a buggy upstream or a malicious
|
|
3431
|
+
* one. Buffering it would OOM the proxy and crash all in-flight requests.
|
|
3432
|
+
*
|
|
3433
|
+
* Applies to /v1/messages, /v1/chat/completions, and /v1/responses.
|
|
3434
|
+
*/
|
|
3435
|
+
const MAX_RESPONSE_BODY_BYTES = 10 * 1024 * 1024;
|
|
3436
|
+
/**
|
|
3437
|
+
* Read a Response body with a hard byte cap, then parse as JSON.
|
|
3438
|
+
*
|
|
3439
|
+
* Falls back to the fast path (response.json()) when Content-Length is
|
|
3440
|
+
* present and within the cap, avoiding the streaming-reader overhead for
|
|
3441
|
+
* the vast majority of normal responses.
|
|
3442
|
+
*
|
|
3443
|
+
* When the cap is hit:
|
|
3444
|
+
* - the reader is cancelled to release the upstream socket
|
|
3445
|
+
* - a structured Anthropic-format error is returned to the caller
|
|
3446
|
+
* (the caller wraps it in c.json(), not throws — the client gets a
|
|
3447
|
+
* clean 413 error, not an unhandled-rejection crash)
|
|
3448
|
+
*
|
|
3449
|
+
* Returns `{ ok: true, value }` on success or `{ ok: false, errorResponse, status }`
|
|
3450
|
+
* on cap exceeded.
|
|
3451
|
+
*/
|
|
3452
|
+
async function readResponseBodyCapped(response, routePath, capBytes = MAX_RESPONSE_BODY_BYTES) {
|
|
3453
|
+
const contentLengthHeader = response.headers.get("content-length");
|
|
3454
|
+
const contentLength = contentLengthHeader ? parseInt(contentLengthHeader, 10) : NaN;
|
|
3455
|
+
if (!isNaN(contentLength) && contentLength <= capBytes) return {
|
|
3456
|
+
ok: true,
|
|
3457
|
+
value: await parseJsonOrDiagnose(response, routePath)
|
|
3458
|
+
};
|
|
3459
|
+
const reader = response.body?.getReader();
|
|
3460
|
+
if (!reader) return {
|
|
3461
|
+
ok: true,
|
|
3462
|
+
value: await parseJsonOrDiagnose(response, routePath)
|
|
3463
|
+
};
|
|
3464
|
+
const chunks = [];
|
|
3465
|
+
let totalBytes = 0;
|
|
3466
|
+
let capped = false;
|
|
3467
|
+
try {
|
|
3468
|
+
while (true) {
|
|
3469
|
+
const { done, value } = await reader.read();
|
|
3470
|
+
if (done) break;
|
|
3471
|
+
if (!value) continue;
|
|
3472
|
+
totalBytes += value.byteLength;
|
|
3473
|
+
if (totalBytes > capBytes) {
|
|
3474
|
+
capped = true;
|
|
3475
|
+
try {
|
|
3476
|
+
await reader.cancel("size_cap");
|
|
3477
|
+
} catch {}
|
|
3478
|
+
break;
|
|
3479
|
+
}
|
|
3480
|
+
chunks.push(value);
|
|
3257
3481
|
}
|
|
3258
|
-
}
|
|
3259
|
-
|
|
3260
|
-
|
|
3261
|
-
|
|
3262
|
-
|
|
3263
|
-
|
|
3264
|
-
|
|
3265
|
-
|
|
3266
|
-
|
|
3267
|
-
|
|
3268
|
-
|
|
3269
|
-
|
|
3270
|
-
|
|
3271
|
-
reuseActive: {
|
|
3272
|
-
type: "boolean",
|
|
3273
|
-
description: "When true, navigate the currently active tab instead of opening a new one. Default false."
|
|
3482
|
+
} catch (err) {
|
|
3483
|
+
if (!capped) consola.warn(`readResponseBodyCapped: read error at ${routePath}:`, err);
|
|
3484
|
+
}
|
|
3485
|
+
if (capped) {
|
|
3486
|
+
consola.warn(`Non-streaming upstream response at ${routePath} exceeded ${capBytes} bytes (10 MiB cap); dropping body to prevent OOM. Check upstream health.`);
|
|
3487
|
+
return {
|
|
3488
|
+
ok: false,
|
|
3489
|
+
status: 502,
|
|
3490
|
+
errorResponse: {
|
|
3491
|
+
type: "error",
|
|
3492
|
+
error: {
|
|
3493
|
+
type: "api_error",
|
|
3494
|
+
message: `Upstream response body exceeded the 10 MiB size cap for non-streaming ${routePath}. The upstream may be misbehaving. Try enabling streaming (stream: true) which handles large responses chunk-by-chunk.`
|
|
3274
3495
|
}
|
|
3275
3496
|
}
|
|
3276
|
-
}
|
|
3277
|
-
|
|
3278
|
-
|
|
3279
|
-
|
|
3280
|
-
|
|
3281
|
-
|
|
3282
|
-
|
|
3283
|
-
|
|
3284
|
-
|
|
3285
|
-
|
|
3286
|
-
|
|
3287
|
-
|
|
3288
|
-
|
|
3289
|
-
|
|
3290
|
-
|
|
3291
|
-
|
|
3292
|
-
|
|
3293
|
-
|
|
3294
|
-
|
|
3295
|
-
|
|
3296
|
-
|
|
3297
|
-
|
|
3298
|
-
|
|
3299
|
-
|
|
3300
|
-
|
|
3301
|
-
|
|
3302
|
-
|
|
3303
|
-
|
|
3304
|
-
|
|
3305
|
-
|
|
3306
|
-
|
|
3307
|
-
|
|
3308
|
-
|
|
3309
|
-
|
|
3310
|
-
|
|
3311
|
-
|
|
3497
|
+
};
|
|
3498
|
+
}
|
|
3499
|
+
const merged = new Uint8Array(totalBytes);
|
|
3500
|
+
let offset = 0;
|
|
3501
|
+
for (const chunk of chunks) {
|
|
3502
|
+
merged.set(chunk, offset);
|
|
3503
|
+
offset += chunk.byteLength;
|
|
3504
|
+
}
|
|
3505
|
+
const text = new TextDecoder().decode(merged);
|
|
3506
|
+
try {
|
|
3507
|
+
return {
|
|
3508
|
+
ok: true,
|
|
3509
|
+
value: JSON.parse(text)
|
|
3510
|
+
};
|
|
3511
|
+
} catch (err) {
|
|
3512
|
+
const preview = text.slice(0, 200);
|
|
3513
|
+
const contentType = response.headers.get("content-type") ?? "(none)";
|
|
3514
|
+
consola.error(`Upstream JSON parse failed at ${routePath}: status=${response.status} content-type="${contentType}" body[0..200]=${JSON.stringify(preview)}`);
|
|
3515
|
+
throw err;
|
|
3516
|
+
}
|
|
3517
|
+
}
|
|
3518
|
+
|
|
3519
|
+
//#endregion
|
|
3520
|
+
//#region src/services/copilot/create-chat-completions.ts
|
|
3521
|
+
const createChatCompletions = async (payload, modelHeaders, callerSignal) => {
|
|
3522
|
+
if (!state.copilotToken) throw new Error("Copilot token not found");
|
|
3523
|
+
const enableVision = payload.messages.some((x) => typeof x.content !== "string" && x.content?.some((x$1) => x$1.type === "image_url"));
|
|
3524
|
+
const isAgentCall = payload.messages.some((msg) => ["assistant", "tool"].includes(msg.role));
|
|
3525
|
+
const url = `${copilotBaseUrl(state)}/chat/completions`;
|
|
3526
|
+
const doFetch = () => {
|
|
3527
|
+
const fetchInit = {
|
|
3528
|
+
method: "POST",
|
|
3529
|
+
headers: {
|
|
3530
|
+
...copilotHeaders(state, enableVision),
|
|
3531
|
+
...modelHeaders,
|
|
3532
|
+
"X-Initiator": isAgentCall ? "agent" : "user"
|
|
3533
|
+
},
|
|
3534
|
+
body: JSON.stringify(payload)
|
|
3535
|
+
};
|
|
3536
|
+
const signals = [];
|
|
3537
|
+
if (UPSTREAM_FETCH_TIMEOUT_MS > 0) signals.push(AbortSignal.timeout(UPSTREAM_FETCH_TIMEOUT_MS));
|
|
3538
|
+
if (callerSignal) signals.push(callerSignal);
|
|
3539
|
+
if (signals.length === 1) fetchInit.signal = signals[0];
|
|
3540
|
+
else if (signals.length > 1) fetchInit.signal = AbortSignal.any(signals);
|
|
3541
|
+
return fetch(url, fetchInit);
|
|
3542
|
+
};
|
|
3543
|
+
const response = await tryRefreshAndRetry(doFetch, "/chat/completions");
|
|
3544
|
+
if (!response.ok) {
|
|
3545
|
+
let errorBody = "";
|
|
3546
|
+
try {
|
|
3547
|
+
errorBody = await response.text();
|
|
3548
|
+
} catch {
|
|
3549
|
+
errorBody = "(could not read error body)";
|
|
3550
|
+
}
|
|
3551
|
+
const claudeModels = state.models?.data.filter((m) => m.id.startsWith("claude")).map((m) => m.id).join(", ") ?? "(models not loaded)";
|
|
3552
|
+
consola.error(`Copilot rejected model "${payload.model}": ${response.status} ${errorBody} (available Claude models: ${claudeModels})`);
|
|
3553
|
+
throw new HTTPError("Failed to create chat completions", new Response(errorBody, {
|
|
3554
|
+
status: response.status,
|
|
3555
|
+
statusText: response.statusText,
|
|
3556
|
+
headers: response.headers
|
|
3557
|
+
}));
|
|
3558
|
+
}
|
|
3559
|
+
if (payload.stream) return events(response);
|
|
3560
|
+
const cappedResult = await readResponseBodyCapped(response, "/v1/chat/completions", MAX_RESPONSE_BODY_BYTES);
|
|
3561
|
+
if (!cappedResult.ok) throw new HTTPError("Upstream /v1/chat/completions response exceeded 10 MiB size cap", new Response(JSON.stringify(cappedResult.errorResponse), {
|
|
3562
|
+
status: cappedResult.status,
|
|
3563
|
+
headers: { "content-type": "application/json" }
|
|
3564
|
+
}));
|
|
3565
|
+
return cappedResult.value;
|
|
3566
|
+
};
|
|
3567
|
+
|
|
3568
|
+
//#endregion
|
|
3569
|
+
//#region src/lib/browser-mcp/compressor.ts
|
|
3570
|
+
/**
|
|
3571
|
+
* Static fallback chain. Order is preference: faster + multimodal +
|
|
3572
|
+
* cheaper at the top. All three support `tool_calls` and image input
|
|
3573
|
+
* (the latter is required for Phase D visual fallback).
|
|
3574
|
+
*/
|
|
3575
|
+
const COMPRESSOR_FALLBACK_CHAIN = [
|
|
3576
|
+
"gemini-3.5-flash",
|
|
3577
|
+
"gpt-5.4-mini",
|
|
3578
|
+
"claude-haiku-4-5"
|
|
3579
|
+
];
|
|
3580
|
+
let selectedBackend;
|
|
3581
|
+
/**
|
|
3582
|
+
* Walk the fallback chain against the live Copilot catalog. Returns
|
|
3583
|
+
* the first id present AND advertising `tool_calls` support, or
|
|
3584
|
+
* undefined when none match. Cached after first successful selection
|
|
3585
|
+
* so all compressor calls in a session hit the same backend; clear
|
|
3586
|
+
* the cache by calling `__resetCompressorBackendForTests`.
|
|
3587
|
+
*/
|
|
3588
|
+
function pickBackendFromCatalog() {
|
|
3589
|
+
if (selectedBackend) return selectedBackend;
|
|
3590
|
+
const models$1 = state.models?.data;
|
|
3591
|
+
if (!models$1) return void 0;
|
|
3592
|
+
for (const candidate of COMPRESSOR_FALLBACK_CHAIN) {
|
|
3593
|
+
const found = models$1.find((m) => m.id === candidate);
|
|
3594
|
+
if (!found) continue;
|
|
3595
|
+
if (found.capabilities?.supports?.tool_calls !== true) continue;
|
|
3596
|
+
selectedBackend = candidate;
|
|
3597
|
+
consola.info(`[browser-mcp] compressor backend: ${candidate}`);
|
|
3598
|
+
return candidate;
|
|
3599
|
+
}
|
|
3600
|
+
}
|
|
3601
|
+
/**
|
|
3602
|
+
* True iff any compressor backend is available. Mirrors
|
|
3603
|
+
* `workerToolsEnabled()` / `standInToolEnabled()` — used by the
|
|
3604
|
+
* compound-tool capability gate so `browser_find` / `browser_act
|
|
3605
|
+
* (intent mode)` / `browser_extract` are dropped from `tools/list`
|
|
3606
|
+
* AND fail `tools/call` with -32601 when no backend is reachable.
|
|
3607
|
+
*/
|
|
3608
|
+
function compressorAvailable() {
|
|
3609
|
+
return pickBackendFromCatalog() !== void 0;
|
|
3610
|
+
}
|
|
3611
|
+
/**
|
|
3612
|
+
* One round-trip to the picked backend. Wraps slot acquisition, payload
|
|
3613
|
+
* assembly, and JSON parsing. Forces structured output via tool-calling:
|
|
3614
|
+
* each caller supplies a tool schema and we set `tool_choice` so the
|
|
3615
|
+
* model has to emit a tool call whose `arguments` field is a
|
|
3616
|
+
* shape-validated JSON string. This eliminates a whole class of bug
|
|
3617
|
+
* where models wrap their JSON in markdown code fences despite
|
|
3618
|
+
* `response_format: { type: "json_object" }`. As a belt-and-suspenders
|
|
3619
|
+
* fallback for backends that ignore `tool_choice`, we ALSO accept
|
|
3620
|
+
* free-form `message.content` and strip a leading / trailing ```` ``` ````
|
|
3621
|
+
* code fence before parsing.
|
|
3622
|
+
*/
|
|
3623
|
+
async function callCompressor(systemPrompt, userMessage, tool, signal) {
|
|
3624
|
+
const model = pickBackendFromCatalog();
|
|
3625
|
+
if (!model) throw new Error(`browser-mcp compressor: no backend available in catalog. Checked: ${COMPRESSOR_FALLBACK_CHAIN.join(", ")}`);
|
|
3626
|
+
const release = acquireInFlightSlot();
|
|
3627
|
+
if (!release) throw new Error("browser-mcp compressor: inflight slot saturated (cap 8); try again shortly");
|
|
3628
|
+
try {
|
|
3629
|
+
const msg = ((await createChatCompletions({
|
|
3630
|
+
model,
|
|
3631
|
+
stream: false,
|
|
3632
|
+
messages: [{
|
|
3633
|
+
role: "system",
|
|
3634
|
+
content: systemPrompt
|
|
3635
|
+
}, {
|
|
3636
|
+
role: "user",
|
|
3637
|
+
content: userMessage
|
|
3638
|
+
}],
|
|
3639
|
+
tools: [{
|
|
3640
|
+
type: "function",
|
|
3641
|
+
function: {
|
|
3642
|
+
name: tool.name,
|
|
3643
|
+
description: tool.description,
|
|
3644
|
+
parameters: tool.parameters
|
|
3645
|
+
}
|
|
3646
|
+
}],
|
|
3647
|
+
tool_choice: {
|
|
3648
|
+
type: "function",
|
|
3649
|
+
function: { name: tool.name }
|
|
3650
|
+
}
|
|
3651
|
+
}, void 0, signal)).choices?.[0])?.message;
|
|
3652
|
+
const toolArgs = msg?.tool_calls?.[0]?.function?.arguments;
|
|
3653
|
+
if (typeof toolArgs === "string" && toolArgs.length > 0) return JSON.parse(toolArgs);
|
|
3654
|
+
const text = typeof msg?.content === "string" ? msg.content : "";
|
|
3655
|
+
if (text.length === 0) throw new Error("browser-mcp compressor: empty response from backend (no tool_calls and no content)");
|
|
3656
|
+
return JSON.parse(stripCodeFence(text));
|
|
3657
|
+
} finally {
|
|
3658
|
+
release();
|
|
3659
|
+
}
|
|
3660
|
+
}
|
|
3661
|
+
/**
|
|
3662
|
+
* Strip a single leading / trailing ``` (or ```json) code fence from a
|
|
3663
|
+
* model's free-form text reply so JSON.parse works. Idempotent on
|
|
3664
|
+
* fence-free input. Defensive against the failure mode caught in PR #55
|
|
3665
|
+
* smoke-test: some models wrap JSON output in ```json ... ``` even
|
|
3666
|
+
* with response_format: { type: "json_object" } set.
|
|
3667
|
+
*/
|
|
3668
|
+
function stripCodeFence(text) {
|
|
3669
|
+
const t = text.trim();
|
|
3670
|
+
const fenced = /^```(?:json)?\s*\n?([\s\S]*?)\n?```$/.exec(t);
|
|
3671
|
+
if (fenced) return fenced[1].trim();
|
|
3672
|
+
return t;
|
|
3673
|
+
}
|
|
3674
|
+
/**
|
|
3675
|
+
* Pick a single element matching the natural-language intent. Used by
|
|
3676
|
+
* `browser_act` in intent mode. Internally delegates the matching step
|
|
3677
|
+
* to `pickMatchingElements` (the same picker `browser_find` uses) so
|
|
3678
|
+
* `find` and `act` can't disagree on the same intent, then infers the
|
|
3679
|
+
* action verb deterministically from the picked element's role and
|
|
3680
|
+
* whether the intent supplied a value. Single source of truth for
|
|
3681
|
+
* element matching.
|
|
3682
|
+
*
|
|
3683
|
+
* Returns ref="" + confidence=0 when no element matches — caller
|
|
3684
|
+
* should escalate to visual fallback (when `visualSurfaces` is
|
|
3685
|
+
* present) or surface the miss to the lead model.
|
|
3686
|
+
*/
|
|
3687
|
+
async function pickElement(snapshot, intent, signal, value) {
|
|
3688
|
+
const matches = await pickMatchingElements(snapshot, intent, signal);
|
|
3689
|
+
if (matches.length === 0) return {
|
|
3690
|
+
ref: "",
|
|
3691
|
+
action: "click",
|
|
3692
|
+
confidence: 0
|
|
3693
|
+
};
|
|
3694
|
+
const top = matches[0];
|
|
3695
|
+
const el = snapshot.elements.find((e) => e.ref === top.ref);
|
|
3696
|
+
if (!el) return {
|
|
3697
|
+
ref: "",
|
|
3698
|
+
action: "click",
|
|
3699
|
+
confidence: 0
|
|
3700
|
+
};
|
|
3701
|
+
const action = inferAction(el.role, intent, value);
|
|
3702
|
+
const out = {
|
|
3703
|
+
ref: top.ref,
|
|
3704
|
+
action,
|
|
3705
|
+
confidence: .8
|
|
3706
|
+
};
|
|
3707
|
+
if (value !== void 0 && (action === "fill" || action === "type" || action === "select")) out.value = value;
|
|
3708
|
+
return out;
|
|
3709
|
+
}
|
|
3710
|
+
/**
|
|
3711
|
+
* Deterministic action picker. Given an element role + the intent text
|
|
3712
|
+
* + an optional value, decide which primitive action to dispatch.
|
|
3713
|
+
* Pulled out of the compressor's responsibility so the compressor only
|
|
3714
|
+
* has to match elements (one prompt, one schema), and action selection
|
|
3715
|
+
* is a few small rules a future contributor can read at a glance.
|
|
3716
|
+
*/
|
|
3717
|
+
function inferAction(role, intent, value) {
|
|
3718
|
+
const intentLower = intent.toLowerCase();
|
|
3719
|
+
const r = role.toLowerCase();
|
|
3720
|
+
if (/\bscroll\b/.test(intentLower) || /scroll[ -]?into[ -]?view/.test(intentLower)) return "scroll_into_view";
|
|
3721
|
+
if (r === "select" || r === "combobox") return "select";
|
|
3722
|
+
if (r === "textarea" || r === "input" || r === "textbox" || r === "searchbox" || r === "spinbutton") {
|
|
3723
|
+
if (/\btype\b/.test(intentLower) && value !== void 0) return "type";
|
|
3724
|
+
return "fill";
|
|
3725
|
+
}
|
|
3726
|
+
return "click";
|
|
3727
|
+
}
|
|
3728
|
+
const FIND_ELEMENTS_SYSTEM = `You match a natural-language intent to elements from a browser page snapshot.
|
|
3729
|
+
|
|
3730
|
+
Snapshot elements look like: {ref: "e42", role: "button", name: "Sign in"}.
|
|
3731
|
+
|
|
3732
|
+
Call the find_elements tool with up to 5 best matches ordered by relevance.`;
|
|
3733
|
+
const FIND_ELEMENTS_TOOL = {
|
|
3734
|
+
name: "find_elements",
|
|
3735
|
+
description: "Report ranked element matches for the intent.",
|
|
3736
|
+
parameters: {
|
|
3737
|
+
type: "object",
|
|
3738
|
+
required: ["matches"],
|
|
3739
|
+
additionalProperties: false,
|
|
3740
|
+
properties: { matches: {
|
|
3741
|
+
type: "array",
|
|
3742
|
+
maxItems: 5,
|
|
3743
|
+
items: {
|
|
3744
|
+
type: "object",
|
|
3745
|
+
required: ["ref", "reason"],
|
|
3746
|
+
additionalProperties: false,
|
|
3747
|
+
properties: {
|
|
3748
|
+
ref: { type: "string" },
|
|
3749
|
+
reason: { type: "string" }
|
|
3750
|
+
}
|
|
3751
|
+
}
|
|
3752
|
+
} }
|
|
3753
|
+
}
|
|
3754
|
+
};
|
|
3755
|
+
/**
|
|
3756
|
+
* Return up to 5 candidate matches for an intent. Used by
|
|
3757
|
+
* `browser_find` — the lead model gets a small ranked list rather than
|
|
3758
|
+
* a full element dump. Empty array when nothing matches.
|
|
3759
|
+
*/
|
|
3760
|
+
async function pickMatchingElements(snapshot, intent, signal) {
|
|
3761
|
+
const trimmed = snapshot.elements.map((e) => ({
|
|
3762
|
+
ref: e.ref,
|
|
3763
|
+
role: e.role,
|
|
3764
|
+
name: e.name
|
|
3765
|
+
}));
|
|
3766
|
+
const raw = await callCompressor(FIND_ELEMENTS_SYSTEM, JSON.stringify({
|
|
3767
|
+
intent,
|
|
3768
|
+
elements: trimmed
|
|
3769
|
+
}), FIND_ELEMENTS_TOOL, signal);
|
|
3770
|
+
if (!raw || typeof raw !== "object") return [];
|
|
3771
|
+
const matches = raw.matches;
|
|
3772
|
+
if (!Array.isArray(matches)) return [];
|
|
3773
|
+
const out = [];
|
|
3774
|
+
for (const m of matches.slice(0, 5)) {
|
|
3775
|
+
if (!m || typeof m !== "object") continue;
|
|
3776
|
+
const ref = m.ref;
|
|
3777
|
+
const reason = m.reason;
|
|
3778
|
+
if (typeof ref === "string" && ref.length > 0) out.push({
|
|
3779
|
+
ref,
|
|
3780
|
+
reason: typeof reason === "string" ? reason : ""
|
|
3781
|
+
});
|
|
3782
|
+
}
|
|
3783
|
+
return out;
|
|
3784
|
+
}
|
|
3785
|
+
const EXTRACT_SYSTEM = `You extract structured data from a browser page snapshot into a JSON object matching the result schema you've been given.
|
|
3786
|
+
|
|
3787
|
+
Use the snapshot's text + element list as your source. Be faithful to what's visible; do not invent values.
|
|
3788
|
+
|
|
3789
|
+
Call the extract_result tool with your answer in the result field. The result field's schema is the caller's exact requested shape — fill it completely. If a field cannot be determined from the snapshot, omit it (when optional) or use a sensible empty value (when required).`;
|
|
3790
|
+
/**
|
|
3791
|
+
* Lightweight sanity check on a caller-supplied JSON Schema: the
|
|
3792
|
+
* schema must be a non-null object AND declare at least one of a
|
|
3793
|
+
* recognized `type` value, `properties`, `items`, `$ref`, or a
|
|
3794
|
+
* compound combinator (`oneOf` / `anyOf` / `allOf`). This catches the
|
|
3795
|
+
* two failure modes the prior smoke test surfaced — empty `{}` and
|
|
3796
|
+
* structurally-malformed schemas like `{type: "nonsense"}` — both of
|
|
3797
|
+
* which the permissive upstream silently accepts and the model then
|
|
3798
|
+
* fills with a useless primitive.
|
|
3799
|
+
*
|
|
3800
|
+
* Returns an error message string when the schema fails the check,
|
|
3801
|
+
* or undefined when the schema looks plausible.
|
|
3802
|
+
*/
|
|
3803
|
+
function validateExtractSchema(schema) {
|
|
3804
|
+
if (!schema || typeof schema !== "object" || Array.isArray(schema)) return "schema must be a non-null JSON object";
|
|
3805
|
+
const obj = schema;
|
|
3806
|
+
const validTypes = new Set([
|
|
3807
|
+
"object",
|
|
3808
|
+
"array",
|
|
3809
|
+
"string",
|
|
3810
|
+
"number",
|
|
3811
|
+
"integer",
|
|
3812
|
+
"boolean",
|
|
3813
|
+
"null"
|
|
3814
|
+
]);
|
|
3815
|
+
const hasValidType = typeof obj.type === "string" && validTypes.has(obj.type);
|
|
3816
|
+
const hasShape = "properties" in obj || "items" in obj || "$ref" in obj || "oneOf" in obj || "anyOf" in obj || "allOf" in obj;
|
|
3817
|
+
if (!hasValidType && !hasShape) return `schema must declare a recognized type (one of ${Array.from(validTypes).join(", ")}) OR have properties / items / $ref / oneOf / anyOf / allOf`;
|
|
3818
|
+
if ("type" in obj && !hasValidType) return `schema 'type' field must be one of: ${Array.from(validTypes).join(", ")}`;
|
|
3819
|
+
}
|
|
3820
|
+
/**
|
|
3821
|
+
* Structured extraction. The caller's JSON schema is injected directly
|
|
3822
|
+
* into the extract_result tool's `result` parameter so the model's
|
|
3823
|
+
* tool-call mechanism enforces shape — the model can't satisfy the
|
|
3824
|
+
* call without producing data of the requested shape.
|
|
3825
|
+
*
|
|
3826
|
+
* Schema is pre-validated by `validateExtractSchema` — bad schemas
|
|
3827
|
+
* fail loud with a clear `SchemaValidationError` instead of slipping
|
|
3828
|
+
* through to the upstream (which is permissive enough to accept
|
|
3829
|
+
* garbage and let the model return a useless primitive).
|
|
3830
|
+
*
|
|
3831
|
+
* Post-validation: if the model's `result` ended up as a primitive
|
|
3832
|
+
* (string / number / boolean) when the schema declared object / array,
|
|
3833
|
+
* surface the shape mismatch — the model returned the wrong type and
|
|
3834
|
+
* the caller should know rather than receive a confusing value.
|
|
3835
|
+
*/
|
|
3836
|
+
var SchemaValidationError = class extends Error {
|
|
3837
|
+
constructor(message) {
|
|
3838
|
+
super(message);
|
|
3839
|
+
this.name = "SchemaValidationError";
|
|
3840
|
+
}
|
|
3841
|
+
};
|
|
3842
|
+
var ResultShapeError = class extends Error {
|
|
3843
|
+
constructor(message) {
|
|
3844
|
+
super(message);
|
|
3845
|
+
this.name = "ResultShapeError";
|
|
3846
|
+
}
|
|
3847
|
+
};
|
|
3848
|
+
async function extractStructured(snapshot, schema, instruction, signal) {
|
|
3849
|
+
const schemaError = validateExtractSchema(schema);
|
|
3850
|
+
if (schemaError) throw new SchemaValidationError(schemaError);
|
|
3851
|
+
const raw = await callCompressor(EXTRACT_SYSTEM, JSON.stringify({
|
|
3852
|
+
instruction,
|
|
3853
|
+
snapshot: {
|
|
3854
|
+
text: snapshot.text,
|
|
3855
|
+
elements: snapshot.elements
|
|
3856
|
+
}
|
|
3857
|
+
}), {
|
|
3858
|
+
name: "extract_result",
|
|
3859
|
+
description: "Report the extracted object. The result field's schema is the caller's requested shape; fill it completely.",
|
|
3860
|
+
parameters: {
|
|
3861
|
+
type: "object",
|
|
3862
|
+
required: ["result"],
|
|
3863
|
+
additionalProperties: false,
|
|
3864
|
+
properties: { result: schema }
|
|
3865
|
+
}
|
|
3866
|
+
}, signal);
|
|
3867
|
+
const unwrapped = raw && typeof raw === "object" && "result" in raw ? raw.result : raw;
|
|
3868
|
+
const declaredType = schema.type;
|
|
3869
|
+
if (declaredType === "object" && (typeof unwrapped !== "object" || unwrapped === null || Array.isArray(unwrapped))) throw new ResultShapeError(`schema declared type "object" but model returned ${describeType(unwrapped)}`);
|
|
3870
|
+
if (declaredType === "array" && !Array.isArray(unwrapped)) throw new ResultShapeError(`schema declared type "array" but model returned ${describeType(unwrapped)}`);
|
|
3871
|
+
return unwrapped;
|
|
3872
|
+
}
|
|
3873
|
+
function describeType(v) {
|
|
3874
|
+
if (v === null) return "null";
|
|
3875
|
+
if (Array.isArray(v)) return "array";
|
|
3876
|
+
return typeof v;
|
|
3877
|
+
}
|
|
3878
|
+
const PICK_VISUAL_SYSTEM = `You're given a browser screenshot, a natural-language intent, and a list of canvas / svg regions in CSS-pixel coordinates.
|
|
3879
|
+
|
|
3880
|
+
Find the pixel coordinates in the screenshot where the intent points. Coordinates are CSS pixels (origin top-left of viewport).
|
|
3881
|
+
|
|
3882
|
+
Call the pick_visual tool with the coordinates. If no clear target is visible, call with x=0, y=0, confidence=0.`;
|
|
3883
|
+
const PICK_VISUAL_TOOL = {
|
|
3884
|
+
name: "pick_visual",
|
|
3885
|
+
description: "Report the pixel coordinates the intent points at.",
|
|
3886
|
+
parameters: {
|
|
3887
|
+
type: "object",
|
|
3888
|
+
required: [
|
|
3889
|
+
"x",
|
|
3890
|
+
"y",
|
|
3891
|
+
"confidence",
|
|
3892
|
+
"reason"
|
|
3893
|
+
],
|
|
3894
|
+
additionalProperties: false,
|
|
3895
|
+
properties: {
|
|
3896
|
+
x: { type: "number" },
|
|
3897
|
+
y: { type: "number" },
|
|
3898
|
+
confidence: { type: "number" },
|
|
3899
|
+
reason: { type: "string" }
|
|
3900
|
+
}
|
|
3901
|
+
}
|
|
3902
|
+
};
|
|
3903
|
+
/**
|
|
3904
|
+
* Visual fallback for Phase D — used when text-based `pickElement`
|
|
3905
|
+
* misses AND the snapshot reported `visualSurfaces` in the viewport
|
|
3906
|
+
* (a canvas / svg blackhole the a11y tree can't see into). Takes the
|
|
3907
|
+
* base64-encoded screenshot, the original intent, and the surfaces
|
|
3908
|
+
* list; returns CSS-pixel coordinates the caller dispatches to
|
|
3909
|
+
* `browser_mouse {x, y}`.
|
|
3910
|
+
*/
|
|
3911
|
+
async function pickElementVisual(screenshotB64, contentType, intent, visualSurfaces, signal) {
|
|
3912
|
+
const raw = await callCompressor(PICK_VISUAL_SYSTEM, [{
|
|
3913
|
+
type: "text",
|
|
3914
|
+
text: JSON.stringify({
|
|
3915
|
+
intent,
|
|
3916
|
+
visual_surfaces: visualSurfaces
|
|
3917
|
+
})
|
|
3918
|
+
}, {
|
|
3919
|
+
type: "image_url",
|
|
3920
|
+
image_url: { url: `data:${contentType};base64,${screenshotB64}` }
|
|
3921
|
+
}], PICK_VISUAL_TOOL, signal);
|
|
3922
|
+
if (!raw || typeof raw !== "object") return {
|
|
3923
|
+
x: 0,
|
|
3924
|
+
y: 0,
|
|
3925
|
+
confidence: 0,
|
|
3926
|
+
reason: "empty backend response"
|
|
3927
|
+
};
|
|
3928
|
+
const obj = raw;
|
|
3929
|
+
return {
|
|
3930
|
+
x: typeof obj.x === "number" ? Math.round(obj.x) : 0,
|
|
3931
|
+
y: typeof obj.y === "number" ? Math.round(obj.y) : 0,
|
|
3932
|
+
confidence: typeof obj.confidence === "number" ? Math.max(0, Math.min(1, obj.confidence)) : 0,
|
|
3933
|
+
reason: typeof obj.reason === "string" ? obj.reason : ""
|
|
3934
|
+
};
|
|
3935
|
+
}
|
|
3936
|
+
|
|
3937
|
+
//#endregion
|
|
3938
|
+
//#region src/lib/browser-mcp/index.ts
|
|
3939
|
+
/**
|
|
3940
|
+
* Helper for compound tools (`browser_find` / `browser_act` /
|
|
3941
|
+
* `browser_extract`): fetch the page snapshot via the existing
|
|
3942
|
+
* primitive dispatcher and unwrap the JSON text envelope. Compound
|
|
3943
|
+
* tools all start from a snapshot, so a single helper keeps the
|
|
3944
|
+
* unwrap logic in one place.
|
|
3945
|
+
*/
|
|
3946
|
+
async function fetchSnapshot(tabId, signal) {
|
|
3947
|
+
const env = await dispatchBrowserTool("browser_read_page", {
|
|
3948
|
+
tabId,
|
|
3949
|
+
mode: "summary"
|
|
3950
|
+
}, signal);
|
|
3951
|
+
if (env.isError) throw new Error("browser_read_page returned an error envelope; bridge / extension not ready");
|
|
3952
|
+
const text = env.content?.[0]?.text;
|
|
3953
|
+
if (typeof text !== "string") throw new Error("browser_read_page returned no text content");
|
|
3954
|
+
return JSON.parse(text);
|
|
3955
|
+
}
|
|
3956
|
+
function toolEnvelope(data, isError) {
|
|
3957
|
+
const text = typeof data === "string" ? data : JSON.stringify(data, null, 2);
|
|
3958
|
+
return isError ? {
|
|
3959
|
+
content: [{
|
|
3960
|
+
type: "text",
|
|
3961
|
+
text
|
|
3962
|
+
}],
|
|
3963
|
+
isError: true
|
|
3964
|
+
} : { content: [{
|
|
3965
|
+
type: "text",
|
|
3966
|
+
text
|
|
3967
|
+
}] };
|
|
3968
|
+
}
|
|
3969
|
+
/**
|
|
3970
|
+
* Browser-control MCP tools (`browser_*`). All entries route through
|
|
3971
|
+
* `dispatchBrowserTool()` which (1) runs the bridge-layer URL policy
|
|
3972
|
+
* check, (2) runs the install-check pre-flight (returning structured
|
|
3973
|
+
* install_required JSON when the bridge or extension isn't ready),
|
|
3974
|
+
* and (3) opens a WS to the bridge, sends the tool call, awaits the
|
|
3975
|
+
* response with a per-tool timeout.
|
|
3976
|
+
*
|
|
3977
|
+
* Each entry carries `capability: "browser"` so `browserToolsEnabled()`
|
|
3978
|
+
* in `src/routes/mcp/handler.ts` drops them at both list-time and
|
|
3979
|
+
* call-time when the operator hasn't opted in via `--browse` or
|
|
3980
|
+
* `GH_ROUTER_ENABLE_BROWSE=1`.
|
|
3981
|
+
*
|
|
3982
|
+
* v1 surface: 19 tools (Phases 3 + 4a + 4b + humanlike input v2).
|
|
3983
|
+
*/
|
|
3984
|
+
const BROWSER_TOOLS = Object.freeze([
|
|
3985
|
+
{
|
|
3986
|
+
toolNameHttp: "browser_list_tabs",
|
|
3987
|
+
description: "List all open tabs across all browser windows. Returns each tab's id (used by other browser_* tools), URL, title, active flag, and window id.",
|
|
3988
|
+
inputSchema: {
|
|
3989
|
+
type: "object",
|
|
3990
|
+
additionalProperties: false,
|
|
3991
|
+
properties: {}
|
|
3992
|
+
},
|
|
3993
|
+
capability: "browser",
|
|
3994
|
+
async handler(args, signal) {
|
|
3995
|
+
return dispatchBrowserTool("browser_list_tabs", args, signal);
|
|
3996
|
+
}
|
|
3997
|
+
},
|
|
3998
|
+
{
|
|
3999
|
+
toolNameHttp: "browser_open_tab",
|
|
4000
|
+
description: "Open a URL in a new browser tab and wait for the page to finish loading. Returns the new tab's id, final URL after redirects, and HTTP status. Refuses to navigate to browser-internal settings / preferences / extensions / flags pages (returns {blocked: true, reason}); devtools://* is allowed.",
|
|
4001
|
+
inputSchema: {
|
|
4002
|
+
type: "object",
|
|
4003
|
+
required: ["url"],
|
|
4004
|
+
additionalProperties: false,
|
|
4005
|
+
properties: {
|
|
4006
|
+
url: {
|
|
4007
|
+
type: "string",
|
|
4008
|
+
description: "The URL to load. Maximum 8 KB. Settings / preferences / extensions / flags pages are blocked."
|
|
4009
|
+
},
|
|
4010
|
+
reuseActive: {
|
|
4011
|
+
type: "boolean",
|
|
4012
|
+
description: "When true, navigate the currently active tab instead of opening a new one. Default false."
|
|
4013
|
+
}
|
|
4014
|
+
}
|
|
4015
|
+
},
|
|
4016
|
+
capability: "browser",
|
|
4017
|
+
async handler(args, signal) {
|
|
4018
|
+
return dispatchBrowserTool("browser_open_tab", args, signal);
|
|
4019
|
+
}
|
|
4020
|
+
},
|
|
4021
|
+
{
|
|
4022
|
+
toolNameHttp: "browser_close_tab",
|
|
4023
|
+
description: "Close one or more tabs by tab id.",
|
|
4024
|
+
inputSchema: {
|
|
4025
|
+
type: "object",
|
|
4026
|
+
required: ["tabIds"],
|
|
4027
|
+
additionalProperties: false,
|
|
4028
|
+
properties: { tabIds: {
|
|
4029
|
+
type: "array",
|
|
4030
|
+
items: { type: "number" },
|
|
4031
|
+
description: "Array of tab ids to close (from browser_list_tabs)."
|
|
4032
|
+
} }
|
|
4033
|
+
},
|
|
4034
|
+
capability: "browser",
|
|
4035
|
+
async handler(args, signal) {
|
|
4036
|
+
return dispatchBrowserTool("browser_close_tab", args, signal);
|
|
4037
|
+
}
|
|
4038
|
+
},
|
|
4039
|
+
{
|
|
4040
|
+
toolNameHttp: "browser_navigate",
|
|
4041
|
+
description: "Navigate an existing tab: goto a URL, go back, go forward, or reload. Same URL-blocking policy as browser_open_tab.",
|
|
4042
|
+
inputSchema: {
|
|
4043
|
+
type: "object",
|
|
4044
|
+
required: ["tabId", "action"],
|
|
4045
|
+
additionalProperties: false,
|
|
4046
|
+
properties: {
|
|
4047
|
+
tabId: {
|
|
4048
|
+
type: "number",
|
|
4049
|
+
description: "Tab id from browser_list_tabs / browser_open_tab."
|
|
4050
|
+
},
|
|
3312
4051
|
action: {
|
|
3313
4052
|
type: "string",
|
|
3314
4053
|
enum: [
|
|
@@ -3360,85 +4099,26 @@ const BROWSER_TOOLS = Object.freeze([
|
|
|
3360
4099
|
},
|
|
3361
4100
|
{
|
|
3362
4101
|
toolNameHttp: "browser_read_page",
|
|
3363
|
-
description: "
|
|
3364
|
-
inputSchema: {
|
|
3365
|
-
type: "object",
|
|
3366
|
-
required: ["tabId"],
|
|
3367
|
-
additionalProperties: false,
|
|
3368
|
-
properties: { tabId: {
|
|
3369
|
-
type: "number",
|
|
3370
|
-
description: "Tab id from browser_list_tabs / browser_open_tab."
|
|
3371
|
-
} }
|
|
3372
|
-
},
|
|
3373
|
-
capability: "browser",
|
|
3374
|
-
async handler(args, signal) {
|
|
3375
|
-
return dispatchBrowserTool("browser_read_page", args, signal);
|
|
3376
|
-
}
|
|
3377
|
-
},
|
|
3378
|
-
{
|
|
3379
|
-
toolNameHttp: "browser_click",
|
|
3380
|
-
description: "Click an element by ref (from a prior browser_read_page) or CSS selector. Returns {ok, navigated} where navigated=true if the URL changed within ~300ms of the click.",
|
|
4102
|
+
description: "Compressed page snapshot for the model: visible text, interactive elements with stable refs, viewport metadata, and (when present) `visualSurfaces` listing canvas / svg regions that need vision. Each element entry carries `bbox: [x, y, w, h]` in CSS viewport pixels (same coord space as browser_mouse / drag / scroll-at-pointer). Refs (e.g. `e42`) are stable for the lifetime of one read_page snapshot and are the preferred input to follow-up actions over brittle CSS selectors. The `viewport` block (`width`, `height`, `devicePixelRatio`, `scrollX`, `scrollY`) lets you map CSS-px bbox to device-px pixels for browser_screenshot. Mode controls what ships back: `summary` (default, ~5-15 KB) returns only viewport-visible elements/text and drops nameless non-interactive nodes; `full` returns up to 200 elements + 256 KiB of innerText (the legacy behavior — use only when you need off-screen content unscrolled). PREFER browser_act / browser_find for intent-driven interaction; read_page is the lower-level snapshot when you need to enumerate.",
|
|
3381
4103
|
inputSchema: {
|
|
3382
4104
|
type: "object",
|
|
3383
4105
|
required: ["tabId"],
|
|
3384
4106
|
additionalProperties: false,
|
|
3385
4107
|
properties: {
|
|
3386
|
-
tabId: {
|
|
3387
|
-
ref: {
|
|
3388
|
-
type: "string",
|
|
3389
|
-
description: "Element ref from browser_read_page (preferred)."
|
|
3390
|
-
},
|
|
3391
|
-
selector: {
|
|
3392
|
-
type: "string",
|
|
3393
|
-
description: "CSS selector (fallback when no ref)."
|
|
3394
|
-
},
|
|
3395
|
-
button: {
|
|
3396
|
-
type: "string",
|
|
3397
|
-
enum: ["left", "right"],
|
|
3398
|
-
description: "Mouse button. Default 'left'."
|
|
3399
|
-
},
|
|
3400
|
-
clickCount: {
|
|
4108
|
+
tabId: {
|
|
3401
4109
|
type: "number",
|
|
3402
|
-
description: "
|
|
3403
|
-
}
|
|
3404
|
-
}
|
|
3405
|
-
},
|
|
3406
|
-
capability: "browser",
|
|
3407
|
-
async handler(args, signal) {
|
|
3408
|
-
return dispatchBrowserTool("browser_click", args, signal);
|
|
3409
|
-
}
|
|
3410
|
-
},
|
|
3411
|
-
{
|
|
3412
|
-
toolNameHttp: "browser_fill",
|
|
3413
|
-
description: "Type into an input / textarea, select from a dropdown, or toggle a checkbox / radio. Dispatches native input and change events so React-style controlled inputs see the value.",
|
|
3414
|
-
inputSchema: {
|
|
3415
|
-
type: "object",
|
|
3416
|
-
required: ["tabId", "value"],
|
|
3417
|
-
additionalProperties: false,
|
|
3418
|
-
properties: {
|
|
3419
|
-
tabId: { type: "number" },
|
|
3420
|
-
ref: {
|
|
3421
|
-
type: "string",
|
|
3422
|
-
description: "Element ref from browser_read_page (preferred)."
|
|
4110
|
+
description: "Tab id from browser_list_tabs / browser_open_tab."
|
|
3423
4111
|
},
|
|
3424
|
-
|
|
4112
|
+
mode: {
|
|
3425
4113
|
type: "string",
|
|
3426
|
-
|
|
3427
|
-
|
|
3428
|
-
value: { description: "The value to set. String for inputs / textareas / select option value. Boolean for checkbox / radio. Max 1 MB." },
|
|
3429
|
-
clearFirst: {
|
|
3430
|
-
type: "boolean",
|
|
3431
|
-
description: "Clear the input before typing (default true). No effect on select / checkbox."
|
|
3432
|
-
},
|
|
3433
|
-
pressEnter: {
|
|
3434
|
-
type: "boolean",
|
|
3435
|
-
description: "After typing, dispatch Enter keydown / keyup and call form.requestSubmit if available. Default false."
|
|
4114
|
+
enum: ["summary", "full"],
|
|
4115
|
+
description: "Snapshot scope. Default 'summary' returns viewport-visible elements + text capped at 20 KiB. 'full' returns up to 200 interactive elements page-wide + 256 KiB of innerText."
|
|
3436
4116
|
}
|
|
3437
4117
|
}
|
|
3438
4118
|
},
|
|
3439
4119
|
capability: "browser",
|
|
3440
4120
|
async handler(args, signal) {
|
|
3441
|
-
return dispatchBrowserTool("
|
|
4121
|
+
return dispatchBrowserTool("browser_read_page", args, signal);
|
|
3442
4122
|
}
|
|
3443
4123
|
},
|
|
3444
4124
|
{
|
|
@@ -3613,48 +4293,6 @@ const BROWSER_TOOLS = Object.freeze([
|
|
|
3613
4293
|
return dispatchBrowserTool("browser_download", args, signal);
|
|
3614
4294
|
}
|
|
3615
4295
|
},
|
|
3616
|
-
{
|
|
3617
|
-
toolNameHttp: "browser_console_logs",
|
|
3618
|
-
description: "Drain console messages a tab has emitted since the last call. The first call for a tab attaches chrome.debugger and starts capturing, so very-early-load messages from before the first call are missed; subsequent calls return everything since the previous drain. Buffer is capped at 1000 entries per tab.",
|
|
3619
|
-
inputSchema: {
|
|
3620
|
-
type: "object",
|
|
3621
|
-
required: ["tabId"],
|
|
3622
|
-
additionalProperties: false,
|
|
3623
|
-
properties: {
|
|
3624
|
-
tabId: { type: "number" },
|
|
3625
|
-
level: {
|
|
3626
|
-
type: "string",
|
|
3627
|
-
enum: [
|
|
3628
|
-
"log",
|
|
3629
|
-
"info",
|
|
3630
|
-
"warn",
|
|
3631
|
-
"error",
|
|
3632
|
-
"debug",
|
|
3633
|
-
"all"
|
|
3634
|
-
],
|
|
3635
|
-
description: "Filter by console level. Default 'all'."
|
|
3636
|
-
}
|
|
3637
|
-
}
|
|
3638
|
-
},
|
|
3639
|
-
capability: "browser",
|
|
3640
|
-
async handler(args, signal) {
|
|
3641
|
-
return dispatchBrowserTool("browser_console_logs", args, signal);
|
|
3642
|
-
}
|
|
3643
|
-
},
|
|
3644
|
-
{
|
|
3645
|
-
toolNameHttp: "browser_network_log",
|
|
3646
|
-
description: "Drain network responses a tab has received since the last call. Same lazy-attach + cap-1000 behavior as browser_console_logs. Returns request URL, method, status, mime type, and timestamp per entry.",
|
|
3647
|
-
inputSchema: {
|
|
3648
|
-
type: "object",
|
|
3649
|
-
required: ["tabId"],
|
|
3650
|
-
additionalProperties: false,
|
|
3651
|
-
properties: { tabId: { type: "number" } }
|
|
3652
|
-
},
|
|
3653
|
-
capability: "browser",
|
|
3654
|
-
async handler(args, signal) {
|
|
3655
|
-
return dispatchBrowserTool("browser_network_log", args, signal);
|
|
3656
|
-
}
|
|
3657
|
-
},
|
|
3658
4296
|
{
|
|
3659
4297
|
toolNameHttp: "browser_mouse",
|
|
3660
4298
|
description: "Move / click / hover / press / release the mouse via real CDP input events (Input.dispatchMouseEvent). Use this when you need behavior that synthetic .click() can't trigger: hover-to-reveal menus, canvas / map / image-map clicks, sites that check event.isTrusted, or precise coordinate targeting. Target with ref (from browser_read_page), CSS selector, or (x, y) in CSS viewport pixels — exactly one. action='move' is the hover (single mouseMoved fires :hover and pointerover reliably). action='dblclick' sends two press/release cycles with incrementing clickCount (a real double-click, not one cycle with clickCount=2). By default the target is hit-tested with elementFromPoint and the call fails with `target_obscured` if the topmost element isn't the target or a descendant — pass force:true to bypass when you know an overlay forwards events.",
|
|
@@ -3806,42 +4444,340 @@ const BROWSER_TOOLS = Object.freeze([
|
|
|
3806
4444
|
type: "string",
|
|
3807
4445
|
description: "The text to type. Max 4096 chars. Iterates as Unicode code points (surrogate pairs handled correctly)."
|
|
3808
4446
|
},
|
|
3809
|
-
delayMs: {
|
|
3810
|
-
type: "number",
|
|
3811
|
-
description: "Pause between characters. Default 0. Clamped to [0, 50]. Set > 0 when typing into search-as-you-type inputs that debounce."
|
|
4447
|
+
delayMs: {
|
|
4448
|
+
type: "number",
|
|
4449
|
+
description: "Pause between characters. Default 0. Clamped to [0, 50]. Set > 0 when typing into search-as-you-type inputs that debounce."
|
|
4450
|
+
}
|
|
4451
|
+
}
|
|
4452
|
+
},
|
|
4453
|
+
capability: "browser",
|
|
4454
|
+
async handler(args, signal) {
|
|
4455
|
+
return dispatchBrowserTool("browser_type", args, signal);
|
|
4456
|
+
}
|
|
4457
|
+
},
|
|
4458
|
+
{
|
|
4459
|
+
toolNameHttp: "browser_diagnostics",
|
|
4460
|
+
description: "Drain console messages or network responses for a tab, with filtering. Replaces the prior browser_console_logs / browser_network_log primitives. `kind` selects the stream; remaining params filter the result before it ships to the model so the response carries only what the caller asked for instead of a raw 1000-entry array dump. Lazy-attach behavior: first call for a tab attaches chrome.debugger; very-early-load events from before the first call are missed.",
|
|
4461
|
+
inputSchema: {
|
|
4462
|
+
type: "object",
|
|
4463
|
+
required: ["tabId", "kind"],
|
|
4464
|
+
additionalProperties: false,
|
|
4465
|
+
properties: {
|
|
4466
|
+
tabId: { type: "number" },
|
|
4467
|
+
kind: {
|
|
4468
|
+
type: "string",
|
|
4469
|
+
enum: ["console", "network"],
|
|
4470
|
+
description: "Which stream to drain."
|
|
4471
|
+
},
|
|
4472
|
+
level: {
|
|
4473
|
+
type: "string",
|
|
4474
|
+
enum: [
|
|
4475
|
+
"log",
|
|
4476
|
+
"info",
|
|
4477
|
+
"warn",
|
|
4478
|
+
"error",
|
|
4479
|
+
"debug",
|
|
4480
|
+
"all"
|
|
4481
|
+
],
|
|
4482
|
+
description: "Console only. Default 'all'. Ignored when kind=network."
|
|
4483
|
+
},
|
|
4484
|
+
regex: {
|
|
4485
|
+
type: "string",
|
|
4486
|
+
description: "Optional JS-regex string. Console: matches the message body. Network: matches the request URL."
|
|
4487
|
+
},
|
|
4488
|
+
limit: {
|
|
4489
|
+
type: "number",
|
|
4490
|
+
description: "Max entries to return after filtering. Default 100. Hard cap 1000."
|
|
4491
|
+
}
|
|
4492
|
+
}
|
|
4493
|
+
},
|
|
4494
|
+
capability: "browser",
|
|
4495
|
+
async handler(args, signal) {
|
|
4496
|
+
const kind = args.kind === "network" ? "network" : "console";
|
|
4497
|
+
const tool = kind === "network" ? "browser_network_log" : "browser_console_logs";
|
|
4498
|
+
const tabId = typeof args.tabId === "number" ? args.tabId : void 0;
|
|
4499
|
+
const level = typeof args.level === "string" ? args.level : "all";
|
|
4500
|
+
const regexStr = typeof args.regex === "string" ? args.regex : void 0;
|
|
4501
|
+
const limit = typeof args.limit === "number" ? Math.min(1e3, Math.max(1, args.limit)) : 100;
|
|
4502
|
+
const env = await dispatchBrowserTool(tool, {
|
|
4503
|
+
tabId,
|
|
4504
|
+
level
|
|
4505
|
+
}, signal);
|
|
4506
|
+
if (env.isError) return env;
|
|
4507
|
+
const text = env.content?.[0]?.text;
|
|
4508
|
+
if (typeof text !== "string") return env;
|
|
4509
|
+
let entries;
|
|
4510
|
+
try {
|
|
4511
|
+
const parsed = JSON.parse(text);
|
|
4512
|
+
entries = (Array.isArray(parsed) ? parsed : Array.isArray(parsed?.entries) ? parsed.entries : []).filter((e) => typeof e === "object" && e !== null);
|
|
4513
|
+
} catch {
|
|
4514
|
+
return env;
|
|
4515
|
+
}
|
|
4516
|
+
let filtered = entries;
|
|
4517
|
+
if (regexStr) try {
|
|
4518
|
+
const re = new RegExp(regexStr);
|
|
4519
|
+
const field = kind === "network" ? "url" : "text";
|
|
4520
|
+
filtered = filtered.filter((e) => {
|
|
4521
|
+
const v = e[field];
|
|
4522
|
+
return typeof v === "string" && re.test(v);
|
|
4523
|
+
});
|
|
4524
|
+
} catch {
|
|
4525
|
+
return toolEnvelope({ error: `invalid regex: ${regexStr}` }, true);
|
|
4526
|
+
}
|
|
4527
|
+
const out = filtered.slice(0, limit);
|
|
4528
|
+
return toolEnvelope({
|
|
4529
|
+
kind,
|
|
4530
|
+
total: entries.length,
|
|
4531
|
+
returned: out.length,
|
|
4532
|
+
entries: out
|
|
4533
|
+
});
|
|
4534
|
+
}
|
|
4535
|
+
},
|
|
4536
|
+
{
|
|
4537
|
+
toolNameHttp: "browser_find",
|
|
4538
|
+
description: "Find up to 5 elements matching a natural-language intent ('the search box at the top', 'the Submit button at the bottom of the login form'). Returns ranked candidates with stable refs the model can pass to browser_act (ref mode) or browser_mouse. Cheaper than browser_read_page when you know what you're looking for — the inner compressor (Gemini Flash class) filters the snapshot for you instead of sending the full element list to the lead model.",
|
|
4539
|
+
inputSchema: {
|
|
4540
|
+
type: "object",
|
|
4541
|
+
required: ["tabId", "intent"],
|
|
4542
|
+
additionalProperties: false,
|
|
4543
|
+
properties: {
|
|
4544
|
+
tabId: { type: "number" },
|
|
4545
|
+
intent: {
|
|
4546
|
+
type: "string",
|
|
4547
|
+
description: "Natural-language description of what to find."
|
|
4548
|
+
}
|
|
4549
|
+
}
|
|
4550
|
+
},
|
|
4551
|
+
capability: "browser_compound",
|
|
4552
|
+
async handler(args, signal) {
|
|
4553
|
+
const tabId = typeof args.tabId === "number" ? args.tabId : void 0;
|
|
4554
|
+
const intent = typeof args.intent === "string" ? args.intent : "";
|
|
4555
|
+
if (!tabId) return toolEnvelope({ error: "tabId required" }, true);
|
|
4556
|
+
if (!intent) return toolEnvelope({ error: "intent required" }, true);
|
|
4557
|
+
const snapshot = await fetchSnapshot(tabId, signal);
|
|
4558
|
+
const matches = await pickMatchingElements(snapshot, intent, signal);
|
|
4559
|
+
const indexed = new Map(snapshot.elements.map((e) => [e.ref, e]));
|
|
4560
|
+
return toolEnvelope({ matches: matches.map((m) => {
|
|
4561
|
+
const el = indexed.get(m.ref);
|
|
4562
|
+
return el ? {
|
|
4563
|
+
ref: m.ref,
|
|
4564
|
+
role: el.role,
|
|
4565
|
+
name: el.name,
|
|
4566
|
+
bbox: el.bbox,
|
|
4567
|
+
reason: m.reason
|
|
4568
|
+
} : {
|
|
4569
|
+
ref: m.ref,
|
|
4570
|
+
reason: m.reason
|
|
4571
|
+
};
|
|
4572
|
+
}) });
|
|
4573
|
+
}
|
|
4574
|
+
},
|
|
4575
|
+
{
|
|
4576
|
+
toolNameHttp: "browser_act",
|
|
4577
|
+
description: "Preferred for any click / fill / type / scroll-to action against a tab. Two modes: (1) INTENT mode — pass `intent` as natural language ('click the submit button'); the inner compressor (Gemini Flash class) maps it to an element + action. Auto-escalates to visual fallback (screenshot + multimodal model + pixel-coord click) when the intent points into a canvas / svg region the a11y tree can't see. (2) REF mode — pass `ref` (from a prior browser_find or browser_read_page) and optionally `value`; dispatches directly with zero compressor latency. This is the fold-in path for the now-removed browser_click and browser_fill. Returns {ok, action_taken, target_ref, navigated}.",
|
|
4578
|
+
inputSchema: {
|
|
4579
|
+
type: "object",
|
|
4580
|
+
required: ["tabId"],
|
|
4581
|
+
additionalProperties: false,
|
|
4582
|
+
properties: {
|
|
4583
|
+
tabId: { type: "number" },
|
|
4584
|
+
intent: {
|
|
4585
|
+
type: "string",
|
|
4586
|
+
description: "Natural-language description of the action. Triggers INTENT mode. Mutually exclusive with `ref`."
|
|
4587
|
+
},
|
|
4588
|
+
ref: {
|
|
4589
|
+
type: "string",
|
|
4590
|
+
description: "Element ref from browser_find / browser_read_page. Triggers REF mode (no compressor round-trip)."
|
|
4591
|
+
},
|
|
4592
|
+
action: {
|
|
4593
|
+
type: "string",
|
|
4594
|
+
enum: [
|
|
4595
|
+
"click",
|
|
4596
|
+
"fill",
|
|
4597
|
+
"type",
|
|
4598
|
+
"select",
|
|
4599
|
+
"scroll_into_view"
|
|
4600
|
+
],
|
|
4601
|
+
description: "REF mode only. Defaults to 'click'. In INTENT mode, the compressor picks the action."
|
|
4602
|
+
},
|
|
4603
|
+
value: {
|
|
4604
|
+
type: "string",
|
|
4605
|
+
description: "For fill / type / select: the string value to set. In INTENT mode the compressor uses this when an action requires a value."
|
|
3812
4606
|
}
|
|
3813
4607
|
}
|
|
3814
4608
|
},
|
|
3815
4609
|
capability: "browser",
|
|
3816
4610
|
async handler(args, signal) {
|
|
3817
|
-
|
|
4611
|
+
const tabId = typeof args.tabId === "number" ? args.tabId : void 0;
|
|
4612
|
+
if (!tabId) return toolEnvelope({ error: "tabId required" }, true);
|
|
4613
|
+
const refIn = typeof args.ref === "string" ? args.ref : void 0;
|
|
4614
|
+
const intent = typeof args.intent === "string" ? args.intent : void 0;
|
|
4615
|
+
const value = typeof args.value === "string" ? args.value : void 0;
|
|
4616
|
+
if (!refIn && !intent) return toolEnvelope({ error: "either `ref` (REF mode) or `intent` (INTENT mode) is required" }, true);
|
|
4617
|
+
if (refIn) return dispatchActionByRef(tabId, refIn, typeof args.action === "string" ? args.action : "click", value, signal);
|
|
4618
|
+
const snapshot = await fetchSnapshot(tabId, signal);
|
|
4619
|
+
const picked = await pickElement(snapshot, intent, signal, value);
|
|
4620
|
+
if (!picked.ref || picked.confidence < .5) {
|
|
4621
|
+
const surfaces = snapshot.visualSurfaces;
|
|
4622
|
+
if (surfaces && surfaces.length > 0) {
|
|
4623
|
+
const shotEnv = await dispatchBrowserTool("browser_screenshot", {
|
|
4624
|
+
tabId,
|
|
4625
|
+
format: "png"
|
|
4626
|
+
}, signal);
|
|
4627
|
+
if (shotEnv.isError) return toolEnvelope({
|
|
4628
|
+
ok: false,
|
|
4629
|
+
error: "no text match; screenshot for visual fallback failed",
|
|
4630
|
+
picked
|
|
4631
|
+
}, true);
|
|
4632
|
+
const shotText = shotEnv.content?.[0]?.text;
|
|
4633
|
+
let shot = {};
|
|
4634
|
+
try {
|
|
4635
|
+
shot = shotText ? JSON.parse(shotText) : {};
|
|
4636
|
+
} catch {
|
|
4637
|
+
return toolEnvelope({
|
|
4638
|
+
ok: false,
|
|
4639
|
+
error: "no text match; screenshot envelope unparseable"
|
|
4640
|
+
}, true);
|
|
4641
|
+
}
|
|
4642
|
+
if (!shot.contentType || !shot.dataBase64) return toolEnvelope({
|
|
4643
|
+
ok: false,
|
|
4644
|
+
error: "no text match; screenshot envelope missing fields"
|
|
4645
|
+
}, true);
|
|
4646
|
+
const visual = await pickElementVisual(shot.dataBase64, shot.contentType, intent, surfaces, signal);
|
|
4647
|
+
if (visual.confidence < .5) return toolEnvelope({
|
|
4648
|
+
ok: false,
|
|
4649
|
+
error: "no element matched intent (text + visual)",
|
|
4650
|
+
picked,
|
|
4651
|
+
visual
|
|
4652
|
+
}, true);
|
|
4653
|
+
const clickEnv = await dispatchBrowserTool("browser_mouse", {
|
|
4654
|
+
tabId,
|
|
4655
|
+
action: "click",
|
|
4656
|
+
x: visual.x,
|
|
4657
|
+
y: visual.y,
|
|
4658
|
+
force: true
|
|
4659
|
+
}, signal);
|
|
4660
|
+
if (clickEnv.isError) return clickEnv;
|
|
4661
|
+
return toolEnvelope({
|
|
4662
|
+
ok: true,
|
|
4663
|
+
action_taken: "click_visual",
|
|
4664
|
+
x: visual.x,
|
|
4665
|
+
y: visual.y,
|
|
4666
|
+
confidence: visual.confidence,
|
|
4667
|
+
reason: visual.reason
|
|
4668
|
+
});
|
|
4669
|
+
}
|
|
4670
|
+
return toolEnvelope({
|
|
4671
|
+
ok: false,
|
|
4672
|
+
error: "no element matched intent",
|
|
4673
|
+
picked
|
|
4674
|
+
}, true);
|
|
4675
|
+
}
|
|
4676
|
+
return dispatchActionByRef(tabId, picked.ref, picked.action, picked.value ?? value, signal);
|
|
3818
4677
|
}
|
|
3819
4678
|
},
|
|
3820
4679
|
{
|
|
3821
|
-
toolNameHttp: "
|
|
3822
|
-
description: "
|
|
4680
|
+
toolNameHttp: "browser_extract",
|
|
4681
|
+
description: "Structured extraction from the current page into a JSON object matching the provided schema. The inner compressor reads the page snapshot (text + elements) and synthesizes the typed object. Use this instead of browser_read_page + lead-model parsing when you know the shape you want (e.g. a list of {title, author, url} rows from a PR list).",
|
|
3823
4682
|
inputSchema: {
|
|
3824
4683
|
type: "object",
|
|
3825
|
-
required: [
|
|
4684
|
+
required: [
|
|
4685
|
+
"tabId",
|
|
4686
|
+
"schema",
|
|
4687
|
+
"instruction"
|
|
4688
|
+
],
|
|
3826
4689
|
additionalProperties: false,
|
|
3827
4690
|
properties: {
|
|
3828
4691
|
tabId: { type: "number" },
|
|
3829
|
-
|
|
3830
|
-
|
|
3831
|
-
description: "Element ref from browser_read_page (preferred). Exactly one of ref / selector required."
|
|
3832
|
-
},
|
|
3833
|
-
selector: {
|
|
4692
|
+
schema: { description: "JSON schema (or schema-shaped descriptor) for the desired output shape." },
|
|
4693
|
+
instruction: {
|
|
3834
4694
|
type: "string",
|
|
3835
|
-
description: "
|
|
4695
|
+
description: "What to extract, in plain language ('the visible PR list')."
|
|
3836
4696
|
}
|
|
3837
4697
|
}
|
|
3838
4698
|
},
|
|
3839
|
-
capability: "
|
|
4699
|
+
capability: "browser_compound",
|
|
3840
4700
|
async handler(args, signal) {
|
|
3841
|
-
|
|
4701
|
+
const tabId = typeof args.tabId === "number" ? args.tabId : void 0;
|
|
4702
|
+
const instruction = typeof args.instruction === "string" ? args.instruction : "";
|
|
4703
|
+
const schema = args.schema;
|
|
4704
|
+
if (!tabId) return toolEnvelope({ error: "tabId required" }, true);
|
|
4705
|
+
if (!instruction) return toolEnvelope({ error: "instruction required" }, true);
|
|
4706
|
+
if (!schema) return toolEnvelope({ error: "schema required" }, true);
|
|
4707
|
+
const snapshot = await fetchSnapshot(tabId, signal);
|
|
4708
|
+
try {
|
|
4709
|
+
return toolEnvelope(await extractStructured(snapshot, schema, instruction, signal));
|
|
4710
|
+
} catch (err) {
|
|
4711
|
+
if (err instanceof SchemaValidationError) return toolEnvelope({ error: `invalid schema: ${err.message}` }, true);
|
|
4712
|
+
if (err instanceof ResultShapeError) return toolEnvelope({ error: `extraction produced wrong shape: ${err.message}` }, true);
|
|
4713
|
+
throw err;
|
|
4714
|
+
}
|
|
3842
4715
|
}
|
|
3843
4716
|
}
|
|
3844
4717
|
]);
|
|
4718
|
+
/**
|
|
4719
|
+
* Dispatch an action against a known ref via the appropriate primitive.
|
|
4720
|
+
* Shared between REF mode and INTENT-mode-text-match in `browser_act`.
|
|
4721
|
+
* Returns an MCP envelope (text content + optional isError).
|
|
4722
|
+
*/
|
|
4723
|
+
async function dispatchActionByRef(tabId, ref, action, value, signal) {
|
|
4724
|
+
let env;
|
|
4725
|
+
switch (action) {
|
|
4726
|
+
case "click":
|
|
4727
|
+
env = await dispatchBrowserTool("browser_click", {
|
|
4728
|
+
tabId,
|
|
4729
|
+
ref
|
|
4730
|
+
}, signal);
|
|
4731
|
+
break;
|
|
4732
|
+
case "fill":
|
|
4733
|
+
env = await dispatchBrowserTool("browser_fill", {
|
|
4734
|
+
tabId,
|
|
4735
|
+
ref,
|
|
4736
|
+
value
|
|
4737
|
+
}, signal);
|
|
4738
|
+
break;
|
|
4739
|
+
case "type":
|
|
4740
|
+
await dispatchBrowserTool("browser_click", {
|
|
4741
|
+
tabId,
|
|
4742
|
+
ref
|
|
4743
|
+
}, signal);
|
|
4744
|
+
env = await dispatchBrowserTool("browser_type", {
|
|
4745
|
+
tabId,
|
|
4746
|
+
text: value ?? ""
|
|
4747
|
+
}, signal);
|
|
4748
|
+
break;
|
|
4749
|
+
case "select":
|
|
4750
|
+
env = await dispatchBrowserTool("browser_fill", {
|
|
4751
|
+
tabId,
|
|
4752
|
+
ref,
|
|
4753
|
+
value
|
|
4754
|
+
}, signal);
|
|
4755
|
+
break;
|
|
4756
|
+
case "scroll_into_view":
|
|
4757
|
+
env = await dispatchBrowserTool("browser_scroll", {
|
|
4758
|
+
tabId,
|
|
4759
|
+
target: "element",
|
|
4760
|
+
ref
|
|
4761
|
+
}, signal);
|
|
4762
|
+
break;
|
|
4763
|
+
default: return toolEnvelope({
|
|
4764
|
+
ok: false,
|
|
4765
|
+
error: `unknown action: ${action}`
|
|
4766
|
+
}, true);
|
|
4767
|
+
}
|
|
4768
|
+
if (env.isError) return env;
|
|
4769
|
+
const innerText = env.content?.[0]?.text;
|
|
4770
|
+
let parsed = {};
|
|
4771
|
+
if (typeof innerText === "string") try {
|
|
4772
|
+
parsed = JSON.parse(innerText);
|
|
4773
|
+
} catch {}
|
|
4774
|
+
return toolEnvelope({
|
|
4775
|
+
ok: true,
|
|
4776
|
+
action_taken: action,
|
|
4777
|
+
target_ref: ref,
|
|
4778
|
+
navigated: typeof parsed.navigated === "boolean" ? parsed.navigated : void 0
|
|
4779
|
+
});
|
|
4780
|
+
}
|
|
3845
4781
|
|
|
3846
4782
|
//#endregion
|
|
3847
4783
|
//#region src/vendor/pi/ai/api-registry.ts
|
|
@@ -5416,7 +6352,7 @@ const MAX_INFLIGHT_WORKER_CALLS = (() => {
|
|
|
5416
6352
|
if (!Number.isFinite(n) || n <= 0 || !Number.isInteger(n)) return 8;
|
|
5417
6353
|
return n;
|
|
5418
6354
|
})();
|
|
5419
|
-
let inFlight
|
|
6355
|
+
let inFlight = 0;
|
|
5420
6356
|
/**
|
|
5421
6357
|
* Acquire a worker slot.
|
|
5422
6358
|
*
|
|
@@ -5434,176 +6370,16 @@ let inFlight$1 = 0;
|
|
|
5434
6370
|
*/
|
|
5435
6371
|
async function acquireWorkerSlot(signal) {
|
|
5436
6372
|
if (signal?.aborted) return null;
|
|
5437
|
-
if (inFlight
|
|
5438
|
-
inFlight
|
|
6373
|
+
if (inFlight >= MAX_INFLIGHT_WORKER_CALLS) return null;
|
|
6374
|
+
inFlight += 1;
|
|
5439
6375
|
let released = false;
|
|
5440
6376
|
return () => {
|
|
5441
6377
|
if (released) return;
|
|
5442
6378
|
released = true;
|
|
5443
|
-
inFlight
|
|
5444
|
-
};
|
|
5445
|
-
}
|
|
5446
|
-
|
|
5447
|
-
//#endregion
|
|
5448
|
-
//#region src/lib/diagnose-response.ts
|
|
5449
|
-
const PREVIEW_LIMIT = 200;
|
|
5450
|
-
async function parseJsonOrDiagnose(response, routePath) {
|
|
5451
|
-
const cloned = response.clone();
|
|
5452
|
-
try {
|
|
5453
|
-
return await response.json();
|
|
5454
|
-
} catch (error) {
|
|
5455
|
-
const contentType = response.headers.get("content-type") ?? "(none)";
|
|
5456
|
-
const bodyText = await cloned.text().catch(() => "(unreadable)");
|
|
5457
|
-
const preview = bodyText.length > PREVIEW_LIMIT ? bodyText.slice(0, PREVIEW_LIMIT) + "...(truncated)" : bodyText;
|
|
5458
|
-
consola.error(`Upstream JSON parse failed at ${routePath}: status=${response.status} content-type="${contentType}" body[0..${PREVIEW_LIMIT}]=${JSON.stringify(preview)}`);
|
|
5459
|
-
throw error;
|
|
5460
|
-
}
|
|
5461
|
-
}
|
|
5462
|
-
|
|
5463
|
-
//#endregion
|
|
5464
|
-
//#region src/lib/response-cap.ts
|
|
5465
|
-
/**
|
|
5466
|
-
* Hard byte cap for non-streaming upstream response bodies.
|
|
5467
|
-
*
|
|
5468
|
-
* Anthropic responses with large tool_use blocks can legitimately reach
|
|
5469
|
-
* several MB, but a multi-GB body is either a buggy upstream or a malicious
|
|
5470
|
-
* one. Buffering it would OOM the proxy and crash all in-flight requests.
|
|
5471
|
-
*
|
|
5472
|
-
* Applies to /v1/messages, /v1/chat/completions, and /v1/responses.
|
|
5473
|
-
*/
|
|
5474
|
-
const MAX_RESPONSE_BODY_BYTES = 10 * 1024 * 1024;
|
|
5475
|
-
/**
|
|
5476
|
-
* Read a Response body with a hard byte cap, then parse as JSON.
|
|
5477
|
-
*
|
|
5478
|
-
* Falls back to the fast path (response.json()) when Content-Length is
|
|
5479
|
-
* present and within the cap, avoiding the streaming-reader overhead for
|
|
5480
|
-
* the vast majority of normal responses.
|
|
5481
|
-
*
|
|
5482
|
-
* When the cap is hit:
|
|
5483
|
-
* - the reader is cancelled to release the upstream socket
|
|
5484
|
-
* - a structured Anthropic-format error is returned to the caller
|
|
5485
|
-
* (the caller wraps it in c.json(), not throws — the client gets a
|
|
5486
|
-
* clean 413 error, not an unhandled-rejection crash)
|
|
5487
|
-
*
|
|
5488
|
-
* Returns `{ ok: true, value }` on success or `{ ok: false, errorResponse, status }`
|
|
5489
|
-
* on cap exceeded.
|
|
5490
|
-
*/
|
|
5491
|
-
async function readResponseBodyCapped(response, routePath, capBytes = MAX_RESPONSE_BODY_BYTES) {
|
|
5492
|
-
const contentLengthHeader = response.headers.get("content-length");
|
|
5493
|
-
const contentLength = contentLengthHeader ? parseInt(contentLengthHeader, 10) : NaN;
|
|
5494
|
-
if (!isNaN(contentLength) && contentLength <= capBytes) return {
|
|
5495
|
-
ok: true,
|
|
5496
|
-
value: await parseJsonOrDiagnose(response, routePath)
|
|
5497
|
-
};
|
|
5498
|
-
const reader = response.body?.getReader();
|
|
5499
|
-
if (!reader) return {
|
|
5500
|
-
ok: true,
|
|
5501
|
-
value: await parseJsonOrDiagnose(response, routePath)
|
|
6379
|
+
inFlight = Math.max(0, inFlight - 1);
|
|
5502
6380
|
};
|
|
5503
|
-
const chunks = [];
|
|
5504
|
-
let totalBytes = 0;
|
|
5505
|
-
let capped = false;
|
|
5506
|
-
try {
|
|
5507
|
-
while (true) {
|
|
5508
|
-
const { done, value } = await reader.read();
|
|
5509
|
-
if (done) break;
|
|
5510
|
-
if (!value) continue;
|
|
5511
|
-
totalBytes += value.byteLength;
|
|
5512
|
-
if (totalBytes > capBytes) {
|
|
5513
|
-
capped = true;
|
|
5514
|
-
try {
|
|
5515
|
-
await reader.cancel("size_cap");
|
|
5516
|
-
} catch {}
|
|
5517
|
-
break;
|
|
5518
|
-
}
|
|
5519
|
-
chunks.push(value);
|
|
5520
|
-
}
|
|
5521
|
-
} catch (err) {
|
|
5522
|
-
if (!capped) consola.warn(`readResponseBodyCapped: read error at ${routePath}:`, err);
|
|
5523
|
-
}
|
|
5524
|
-
if (capped) {
|
|
5525
|
-
consola.warn(`Non-streaming upstream response at ${routePath} exceeded ${capBytes} bytes (10 MiB cap); dropping body to prevent OOM. Check upstream health.`);
|
|
5526
|
-
return {
|
|
5527
|
-
ok: false,
|
|
5528
|
-
status: 502,
|
|
5529
|
-
errorResponse: {
|
|
5530
|
-
type: "error",
|
|
5531
|
-
error: {
|
|
5532
|
-
type: "api_error",
|
|
5533
|
-
message: `Upstream response body exceeded the 10 MiB size cap for non-streaming ${routePath}. The upstream may be misbehaving. Try enabling streaming (stream: true) which handles large responses chunk-by-chunk.`
|
|
5534
|
-
}
|
|
5535
|
-
}
|
|
5536
|
-
};
|
|
5537
|
-
}
|
|
5538
|
-
const merged = new Uint8Array(totalBytes);
|
|
5539
|
-
let offset = 0;
|
|
5540
|
-
for (const chunk of chunks) {
|
|
5541
|
-
merged.set(chunk, offset);
|
|
5542
|
-
offset += chunk.byteLength;
|
|
5543
|
-
}
|
|
5544
|
-
const text = new TextDecoder().decode(merged);
|
|
5545
|
-
try {
|
|
5546
|
-
return {
|
|
5547
|
-
ok: true,
|
|
5548
|
-
value: JSON.parse(text)
|
|
5549
|
-
};
|
|
5550
|
-
} catch (err) {
|
|
5551
|
-
const preview = text.slice(0, 200);
|
|
5552
|
-
const contentType = response.headers.get("content-type") ?? "(none)";
|
|
5553
|
-
consola.error(`Upstream JSON parse failed at ${routePath}: status=${response.status} content-type="${contentType}" body[0..200]=${JSON.stringify(preview)}`);
|
|
5554
|
-
throw err;
|
|
5555
|
-
}
|
|
5556
6381
|
}
|
|
5557
6382
|
|
|
5558
|
-
//#endregion
|
|
5559
|
-
//#region src/services/copilot/create-chat-completions.ts
|
|
5560
|
-
const createChatCompletions = async (payload, modelHeaders, callerSignal) => {
|
|
5561
|
-
if (!state.copilotToken) throw new Error("Copilot token not found");
|
|
5562
|
-
const enableVision = payload.messages.some((x) => typeof x.content !== "string" && x.content?.some((x$1) => x$1.type === "image_url"));
|
|
5563
|
-
const isAgentCall = payload.messages.some((msg) => ["assistant", "tool"].includes(msg.role));
|
|
5564
|
-
const url = `${copilotBaseUrl(state)}/chat/completions`;
|
|
5565
|
-
const doFetch = () => {
|
|
5566
|
-
const fetchInit = {
|
|
5567
|
-
method: "POST",
|
|
5568
|
-
headers: {
|
|
5569
|
-
...copilotHeaders(state, enableVision),
|
|
5570
|
-
...modelHeaders,
|
|
5571
|
-
"X-Initiator": isAgentCall ? "agent" : "user"
|
|
5572
|
-
},
|
|
5573
|
-
body: JSON.stringify(payload)
|
|
5574
|
-
};
|
|
5575
|
-
const signals = [];
|
|
5576
|
-
if (UPSTREAM_FETCH_TIMEOUT_MS > 0) signals.push(AbortSignal.timeout(UPSTREAM_FETCH_TIMEOUT_MS));
|
|
5577
|
-
if (callerSignal) signals.push(callerSignal);
|
|
5578
|
-
if (signals.length === 1) fetchInit.signal = signals[0];
|
|
5579
|
-
else if (signals.length > 1) fetchInit.signal = AbortSignal.any(signals);
|
|
5580
|
-
return fetch(url, fetchInit);
|
|
5581
|
-
};
|
|
5582
|
-
const response = await tryRefreshAndRetry(doFetch, "/chat/completions");
|
|
5583
|
-
if (!response.ok) {
|
|
5584
|
-
let errorBody = "";
|
|
5585
|
-
try {
|
|
5586
|
-
errorBody = await response.text();
|
|
5587
|
-
} catch {
|
|
5588
|
-
errorBody = "(could not read error body)";
|
|
5589
|
-
}
|
|
5590
|
-
const claudeModels = state.models?.data.filter((m) => m.id.startsWith("claude")).map((m) => m.id).join(", ") ?? "(models not loaded)";
|
|
5591
|
-
consola.error(`Copilot rejected model "${payload.model}": ${response.status} ${errorBody} (available Claude models: ${claudeModels})`);
|
|
5592
|
-
throw new HTTPError("Failed to create chat completions", new Response(errorBody, {
|
|
5593
|
-
status: response.status,
|
|
5594
|
-
statusText: response.statusText,
|
|
5595
|
-
headers: response.headers
|
|
5596
|
-
}));
|
|
5597
|
-
}
|
|
5598
|
-
if (payload.stream) return events(response);
|
|
5599
|
-
const cappedResult = await readResponseBodyCapped(response, "/v1/chat/completions", MAX_RESPONSE_BODY_BYTES);
|
|
5600
|
-
if (!cappedResult.ok) throw new HTTPError("Upstream /v1/chat/completions response exceeded 10 MiB size cap", new Response(JSON.stringify(cappedResult.errorResponse), {
|
|
5601
|
-
status: cappedResult.status,
|
|
5602
|
-
headers: { "content-type": "application/json" }
|
|
5603
|
-
}));
|
|
5604
|
-
return cappedResult.value;
|
|
5605
|
-
};
|
|
5606
|
-
|
|
5607
6383
|
//#endregion
|
|
5608
6384
|
//#region src/lib/worker-agent/stream-fn.ts
|
|
5609
6385
|
function createCopilotStreamFn(opts) {
|
|
@@ -6057,56 +6833,6 @@ function isAbortError(err) {
|
|
|
6057
6833
|
return false;
|
|
6058
6834
|
}
|
|
6059
6835
|
|
|
6060
|
-
//#endregion
|
|
6061
|
-
//#region src/lib/mcp-inflight.ts
|
|
6062
|
-
/**
|
|
6063
|
-
* Shared concurrency cap for MCP `tools/call` dispatches.
|
|
6064
|
-
*
|
|
6065
|
-
* Originally lived as a module-private counter inside
|
|
6066
|
-
* `src/routes/mcp/handler.ts`. Extracted because the worker-agent's
|
|
6067
|
-
* `peer_review` and `advisor` tools (which dispatch to peer-model
|
|
6068
|
-
* personas / the advisor responses endpoint from inside a worker
|
|
6069
|
-
* subagent loop) must participate in the same backpressure budget;
|
|
6070
|
-
* otherwise a single worker can fan out unboundedly to peers and
|
|
6071
|
-
* starve the operator's own `tools/list` callers.
|
|
6072
|
-
*
|
|
6073
|
-
* The counter is a single process-wide integer — no per-route
|
|
6074
|
-
* partitioning. Persona calls at the MCP boundary (handler.ts),
|
|
6075
|
-
* peer/advisor calls nested inside a worker (tools.ts), and any
|
|
6076
|
-
* future MCP-adjacent dispatcher all increment the same number.
|
|
6077
|
-
*
|
|
6078
|
-
* Cap = `MAX_INFLIGHT_TOOLS_CALL = 8`. Justification lives at the
|
|
6079
|
-
* historical home (`src/routes/mcp/handler.ts` comment block); do not
|
|
6080
|
-
* change the value without re-reading
|
|
6081
|
-
* `docs/research/peer-mcp-investigation.md` § "Concurrency cap
|
|
6082
|
-
* investigation".
|
|
6083
|
-
*/
|
|
6084
|
-
const MAX_INFLIGHT_TOOLS_CALL = 8;
|
|
6085
|
-
let inFlight = 0;
|
|
6086
|
-
/**
|
|
6087
|
-
* Acquire a slot if one is available. Returns a release function the
|
|
6088
|
-
* caller MUST invoke exactly once (typically from a `finally` block);
|
|
6089
|
-
* returns `null` if the cap is saturated. The release fn is idempotent
|
|
6090
|
-
* — calling it twice is a no-op so callers can release defensively
|
|
6091
|
-
* without worrying about double-decrementing the counter under unusual
|
|
6092
|
-
* unwind paths.
|
|
6093
|
-
*
|
|
6094
|
-
* Synchronous on purpose. Async semaphore acquisition would let callers
|
|
6095
|
-
* queue indefinitely; we want immediate "queue full" feedback so the
|
|
6096
|
-
* MCP client (or the model holding the nested tool call) can choose to
|
|
6097
|
-
* back off or retry.
|
|
6098
|
-
*/
|
|
6099
|
-
function acquireInFlightSlot() {
|
|
6100
|
-
if (inFlight >= MAX_INFLIGHT_TOOLS_CALL) return null;
|
|
6101
|
-
inFlight++;
|
|
6102
|
-
let released = false;
|
|
6103
|
-
return () => {
|
|
6104
|
-
if (released) return;
|
|
6105
|
-
released = true;
|
|
6106
|
-
inFlight--;
|
|
6107
|
-
};
|
|
6108
|
-
}
|
|
6109
|
-
|
|
6110
6836
|
//#endregion
|
|
6111
6837
|
//#region src/lib/tokenizer.ts
|
|
6112
6838
|
const ENCODING_MAP = {
|
|
@@ -6563,6 +7289,26 @@ function workerToolsEnabled() {
|
|
|
6563
7289
|
if (!found) return false;
|
|
6564
7290
|
return found.capabilities?.supports?.tool_calls === true;
|
|
6565
7291
|
}
|
|
7292
|
+
/**
|
|
7293
|
+
* Gate for the compound L2 browser tools (`browser_find`, `browser_act`
|
|
7294
|
+
* in intent mode, `browser_extract`).
|
|
7295
|
+
*
|
|
7296
|
+
* Returns true iff `compressorAvailable()` — i.e. at least one model in
|
|
7297
|
+
* the compressor fallback chain (`gemini-3.5-flash` → `gpt-5.4-mini` →
|
|
7298
|
+
* `claude-haiku-4-5`) is present in the live catalog with `tool_calls`
|
|
7299
|
+
* support. When none are reachable the compound tools are dropped from
|
|
7300
|
+
* `tools/list` AND fail `tools/call` with -32601.
|
|
7301
|
+
*
|
|
7302
|
+
* Note: this gate does NOT additionally re-check the `browser` opt-in.
|
|
7303
|
+
* The `handler.ts` filter chain runs `browser` and `browser_compound`
|
|
7304
|
+
* via separate `capability` tags; the compound tools' entries also
|
|
7305
|
+
* apply at the route level via the existing `--browse` enablement
|
|
7306
|
+
* because they live under the browser MCP surface that the route
|
|
7307
|
+
* only mounts when `state.browseEnabled`.
|
|
7308
|
+
*/
|
|
7309
|
+
function browserCompoundToolsEnabled() {
|
|
7310
|
+
return compressorAvailable();
|
|
7311
|
+
}
|
|
6566
7312
|
|
|
6567
7313
|
//#endregion
|
|
6568
7314
|
//#region src/routes/mcp/handler.ts
|
|
@@ -6739,6 +7485,7 @@ function toolEntries() {
|
|
|
6739
7485
|
if (t.capability === "worker") return workerToolsEnabled();
|
|
6740
7486
|
if (t.capability === "stand_in") return standInToolEnabled();
|
|
6741
7487
|
if (t.capability === "browser") return browserToolsEnabled();
|
|
7488
|
+
if (t.capability === "browser_compound") return browserToolsEnabled() && browserCompoundToolsEnabled();
|
|
6742
7489
|
return true;
|
|
6743
7490
|
}).map((t) => ({
|
|
6744
7491
|
name: t.toolNameHttp,
|
|
@@ -7030,6 +7777,7 @@ async function handleToolsCall(body) {
|
|
|
7030
7777
|
if (nonPersonaTool && nonPersonaTool.capability === "worker" && !workerToolsEnabled()) return rpcError(body.id, RPC_METHOD_NOT_FOUND, `tools/call: unknown tool "${name$1}"`);
|
|
7031
7778
|
if (nonPersonaTool && nonPersonaTool.capability === "stand_in" && !standInToolEnabled()) return rpcError(body.id, RPC_METHOD_NOT_FOUND, `tools/call: unknown tool "${name$1}"`);
|
|
7032
7779
|
if (nonPersonaTool && nonPersonaTool.capability === "browser" && !browserToolsEnabled()) return rpcError(body.id, RPC_METHOD_NOT_FOUND, `tools/call: unknown tool "${name$1}"`);
|
|
7780
|
+
if (nonPersonaTool && nonPersonaTool.capability === "browser_compound" && !(browserToolsEnabled() && browserCompoundToolsEnabled())) return rpcError(body.id, RPC_METHOD_NOT_FOUND, `tools/call: unknown tool "${name$1}"`);
|
|
7033
7781
|
let personaPrompt;
|
|
7034
7782
|
let personaContext;
|
|
7035
7783
|
let personaEffort;
|
|
@@ -10583,6 +11331,7 @@ function buildPeerAwarenessSnippet(opts) {
|
|
|
10583
11331
|
if (opts.workerToolsAvailable) para2Parts.push("`worker_explore` runs a Gemini-backed read-only worker that returns a summary, using its own context rather than yours; concurrent launches share the `MAX_INFLIGHT_TOOLS_CALL=8` cap with operator traffic.", "`worker_implement` is the same worker with edit/write/bash; `worktree: true` runs it in an isolated git worktree and returns the diff.", "Workers themselves have `code_search` in their toolset.");
|
|
10584
11332
|
para2Parts.push("`web_search` surfaces citable sources for docs, errors, and upstream issues.");
|
|
10585
11333
|
if (opts.standInAvailable) para2Parts.push("`stand_in` provides three-lab consensus for decision tiebreak when the user is unavailable.");
|
|
11334
|
+
if (opts.browseAvailable) para2Parts.push("`browser_*` tools (under `mcp__gh-router-peers__browser_*`) drive a real Chrome / Edge browser via a local extension; prefer the L2 compound tools `browser_act(intent | ref, value?)` / `browser_find(intent)` / `browser_extract(schema, instruction)` over the L0/L1 primitives.");
|
|
10586
11335
|
return [
|
|
10587
11336
|
"## Peer review and advisor",
|
|
10588
11337
|
"",
|
|
@@ -12045,7 +12794,7 @@ function initProxyFromEnv() {
|
|
|
12045
12794
|
//#endregion
|
|
12046
12795
|
//#region package.json
|
|
12047
12796
|
var name = "github-router";
|
|
12048
|
-
var version = "0.3.
|
|
12797
|
+
var version$1 = "0.3.52";
|
|
12049
12798
|
|
|
12050
12799
|
//#endregion
|
|
12051
12800
|
//#region src/lib/approval.ts
|
|
@@ -13716,7 +14465,7 @@ server.use(cors());
|
|
|
13716
14465
|
server.get("/", (c) => c.text("Server running"));
|
|
13717
14466
|
server.get("/version", (c) => c.json({
|
|
13718
14467
|
name,
|
|
13719
|
-
version,
|
|
14468
|
+
version: version$1,
|
|
13720
14469
|
gitSha: process.env.GITHUB_SHA ?? "unknown"
|
|
13721
14470
|
}));
|
|
13722
14471
|
server.on("HEAD", ["/"], (c) => c.body(null, 200));
|
|
@@ -14150,7 +14899,8 @@ const claude = defineCommand({
|
|
|
14150
14899
|
codexCli: backend === "cli",
|
|
14151
14900
|
geminiAvailable: geminiAvailable$1,
|
|
14152
14901
|
workerToolsAvailable: workerToolsEnabled(),
|
|
14153
|
-
standInAvailable: standInToolEnabled()
|
|
14902
|
+
standInAvailable: standInToolEnabled(),
|
|
14903
|
+
browseAvailable: state.browseEnabled
|
|
14154
14904
|
});
|
|
14155
14905
|
extraArgs.push("--append-system-prompt", peerSnippet);
|
|
14156
14906
|
try {
|
|
@@ -14247,7 +14997,7 @@ const codex = defineCommand({
|
|
|
14247
14997
|
|
|
14248
14998
|
//#endregion
|
|
14249
14999
|
//#region src/debug.ts
|
|
14250
|
-
async function getPackageVersion() {
|
|
15000
|
+
async function getPackageVersion$1() {
|
|
14251
15001
|
try {
|
|
14252
15002
|
const packageJsonPath = new URL("../package.json", import.meta.url).pathname;
|
|
14253
15003
|
return JSON.parse(await fs.readFile(packageJsonPath)).version;
|
|
@@ -14273,9 +15023,9 @@ async function checkTokenExists() {
|
|
|
14273
15023
|
}
|
|
14274
15024
|
}
|
|
14275
15025
|
async function getDebugInfo() {
|
|
14276
|
-
const [version$
|
|
15026
|
+
const [version$2, tokenExists] = await Promise.all([getPackageVersion$1(), checkTokenExists()]);
|
|
14277
15027
|
return {
|
|
14278
|
-
version: version$
|
|
15028
|
+
version: version$2,
|
|
14279
15029
|
runtime: getRuntimeInfo(),
|
|
14280
15030
|
paths: {
|
|
14281
15031
|
APP_DIR: PATHS.APP_DIR,
|
|
@@ -14597,9 +15347,12 @@ process.on("uncaughtException", (error) => {
|
|
|
14597
15347
|
consola.error("Uncaught exception:", error);
|
|
14598
15348
|
process.exit(1);
|
|
14599
15349
|
});
|
|
15350
|
+
const version = getPackageVersion();
|
|
15351
|
+
if (!process.argv.slice(2).includes("--version")) consola.info(`github-router v${version}`);
|
|
14600
15352
|
await runMain(defineCommand({
|
|
14601
15353
|
meta: {
|
|
14602
15354
|
name: "github-router",
|
|
15355
|
+
version,
|
|
14603
15356
|
description: "A reverse proxy that exposes GitHub Copilot as OpenAI and Anthropic compatible API endpoints."
|
|
14604
15357
|
},
|
|
14605
15358
|
subCommands: {
|