github-router 0.3.45 → 0.3.66
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/browser-bridge/index.js +38 -1
- package/dist/browser-ext/background.js +708 -77
- package/dist/browser-ext/manifest.json +5 -2
- package/dist/browser-ext/snapshot-cdp.js +438 -0
- package/dist/browser-ext/snapshot.js +101 -0
- package/dist/main.js +2281 -449
- package/dist/main.js.map +1 -1
- package/package.json +18 -18
package/dist/main.js
CHANGED
|
@@ -8,7 +8,7 @@ import { createHash, randomBytes, randomUUID, timingSafeEqual } from "node:crypt
|
|
|
8
8
|
import fs, { readFile, stat } from "node:fs/promises";
|
|
9
9
|
import os, { homedir, platform } from "node:os";
|
|
10
10
|
import * as path$1 from "node:path";
|
|
11
|
-
import path from "node:path";
|
|
11
|
+
import path, { dirname, join } from "node:path";
|
|
12
12
|
import process$1 from "node:process";
|
|
13
13
|
import { execFile, execFileSync, spawn, spawnSync } from "node:child_process";
|
|
14
14
|
import { promisify } from "node:util";
|
|
@@ -17,13 +17,13 @@ import { createInterface } from "node:readline";
|
|
|
17
17
|
import Parser from "web-tree-sitter";
|
|
18
18
|
import WebSocket from "ws";
|
|
19
19
|
import { fileURLToPath } from "node:url";
|
|
20
|
+
import { events } from "fetch-event-stream";
|
|
20
21
|
import { Type } from "typebox";
|
|
21
22
|
import "partial-json";
|
|
22
23
|
import { Compile } from "typebox/compile";
|
|
23
24
|
import { Value } from "typebox/value";
|
|
24
25
|
import "yaml";
|
|
25
26
|
import "ignore";
|
|
26
|
-
import { events } from "fetch-event-stream";
|
|
27
27
|
import { z } from "zod";
|
|
28
28
|
import { Writable } from "node:stream";
|
|
29
29
|
import { serve } from "srvx";
|
|
@@ -45,6 +45,8 @@ const state = {
|
|
|
45
45
|
showToken: false,
|
|
46
46
|
extendedBetas: false,
|
|
47
47
|
browseEnabled: false,
|
|
48
|
+
powerBrowseEnabled: false,
|
|
49
|
+
humanlikeForce: "auto",
|
|
48
50
|
sessionId: randomUUID(),
|
|
49
51
|
machineId: randomBytes(32).toString("hex")
|
|
50
52
|
};
|
|
@@ -62,14 +64,14 @@ function copilotVersion(state$1) {
|
|
|
62
64
|
const API_VERSION = "2026-01-09";
|
|
63
65
|
const copilotBaseUrl = (state$1) => state$1.copilotApiUrl ?? "https://api.githubcopilot.com";
|
|
64
66
|
const copilotHeaders = (state$1, vision = false, integrationId = "vscode-chat") => {
|
|
65
|
-
const version$
|
|
67
|
+
const version$2 = copilotVersion(state$1);
|
|
66
68
|
const headers = {
|
|
67
69
|
Authorization: `Bearer ${state$1.copilotToken}`,
|
|
68
70
|
"content-type": standardHeaders()["content-type"],
|
|
69
71
|
"copilot-integration-id": integrationId,
|
|
70
72
|
"editor-version": `vscode/${state$1.vsCodeVersion}`,
|
|
71
|
-
"editor-plugin-version": `copilot-chat/${version$
|
|
72
|
-
"user-agent": `GitHubCopilotChat/${version$
|
|
73
|
+
"editor-plugin-version": `copilot-chat/${version$2}`,
|
|
74
|
+
"user-agent": `GitHubCopilotChat/${version$2}`,
|
|
73
75
|
"openai-intent": "conversation-panel",
|
|
74
76
|
"x-interaction-type": "conversation-panel",
|
|
75
77
|
"x-github-api-version": API_VERSION,
|
|
@@ -538,9 +540,9 @@ const cacheVSCodeVersion = async () => {
|
|
|
538
540
|
consola.info(`Using VSCode version: ${response}`);
|
|
539
541
|
};
|
|
540
542
|
const cacheCopilotVersion = async () => {
|
|
541
|
-
const version$
|
|
542
|
-
state.copilotVersion = version$
|
|
543
|
-
consola.info(`Using Copilot Chat version: ${version$
|
|
543
|
+
const version$2 = await getCopilotChatVersion();
|
|
544
|
+
state.copilotVersion = version$2;
|
|
545
|
+
consola.info(`Using Copilot Chat version: ${version$2}`);
|
|
544
546
|
};
|
|
545
547
|
|
|
546
548
|
//#endregion
|
|
@@ -1117,10 +1119,10 @@ function getCodexVersion() {
|
|
|
1117
1119
|
};
|
|
1118
1120
|
const major = Number.parseInt(m[1], 10);
|
|
1119
1121
|
const minor = Number.parseInt(m[2], 10);
|
|
1120
|
-
const version$
|
|
1122
|
+
const version$2 = `${m[1]}.${m[2]}.${m[3]}`;
|
|
1121
1123
|
return {
|
|
1122
1124
|
ok: major > 0 || major === 0 && minor >= 129,
|
|
1123
|
-
version: version$
|
|
1125
|
+
version: version$2
|
|
1124
1126
|
};
|
|
1125
1127
|
}
|
|
1126
1128
|
/**
|
|
@@ -2471,6 +2473,33 @@ function round4(x) {
|
|
|
2471
2473
|
return Math.round(x * 1e4) / 1e4;
|
|
2472
2474
|
}
|
|
2473
2475
|
|
|
2476
|
+
//#endregion
|
|
2477
|
+
//#region src/lib/version.ts
|
|
2478
|
+
/**
|
|
2479
|
+
* Read this binary's published version from package.json at runtime.
|
|
2480
|
+
*
|
|
2481
|
+
* Done at runtime (not baked at build time) because release.yml builds
|
|
2482
|
+
* BEFORE `npm version patch` bumps the version — a build-time inline
|
|
2483
|
+
* would always ship the pre-bump value. The npm tarball ships package.json
|
|
2484
|
+
* alongside `dist/`, so a sibling-up lookup from import.meta.url resolves
|
|
2485
|
+
* cleanly in both dev (`src/lib/`) and bundled (`dist/`) layouts.
|
|
2486
|
+
*
|
|
2487
|
+
* Returns `"unknown"` if package.json can't be located or parsed —
|
|
2488
|
+
* never throws, so the CLI never fails to start over version reporting.
|
|
2489
|
+
*/
|
|
2490
|
+
function getPackageVersion() {
|
|
2491
|
+
try {
|
|
2492
|
+
const here = dirname(fileURLToPath(import.meta.url));
|
|
2493
|
+
const candidates = [join(here, "..", "..", "package.json"), join(here, "..", "package.json")];
|
|
2494
|
+
for (const path$2 of candidates) try {
|
|
2495
|
+
const raw = readFileSync(path$2, "utf8");
|
|
2496
|
+
const parsed = JSON.parse(raw);
|
|
2497
|
+
if (typeof parsed.version === "string" && (parsed.name === "github-router" || parsed.name === "@animeshkundu/github-router")) return parsed.version;
|
|
2498
|
+
} catch {}
|
|
2499
|
+
} catch {}
|
|
2500
|
+
return "unknown";
|
|
2501
|
+
}
|
|
2502
|
+
|
|
2474
2503
|
//#endregion
|
|
2475
2504
|
//#region src/lib/browser-mcp/browser-detect.ts
|
|
2476
2505
|
let cached;
|
|
@@ -2879,16 +2908,94 @@ function loadStableExtensionId() {
|
|
|
2879
2908
|
} catch {}
|
|
2880
2909
|
return "unknown";
|
|
2881
2910
|
}
|
|
2882
|
-
|
|
2911
|
+
/**
|
|
2912
|
+
* Reads the `version` field from the on-disk extension manifest in
|
|
2913
|
+
* extensionDir(). Returns undefined if the file is missing, unreadable,
|
|
2914
|
+
* or doesn't have a string version. Used to detect when the loaded
|
|
2915
|
+
* extension is stale relative to a freshly-updated package.
|
|
2916
|
+
*/
|
|
2917
|
+
function loadExpectedExtensionVersion() {
|
|
2918
|
+
try {
|
|
2919
|
+
const raw = readFileSync(path.join(extensionDir(), "manifest.json"), "utf8");
|
|
2920
|
+
const parsed = JSON.parse(raw);
|
|
2921
|
+
if (typeof parsed.version === "string" && parsed.version.length > 0) return parsed.version;
|
|
2922
|
+
} catch {}
|
|
2923
|
+
}
|
|
2924
|
+
/**
|
|
2925
|
+
* Source-checkout dev sentinel — see scripts/copy-browser-ext.ts. When
|
|
2926
|
+
* extensionDir() resolves to src/browser-ext/ (dev iteration via
|
|
2927
|
+
* GH_ROUTER_BROWSER_EXT_DIR, or the dist fallback when the package
|
|
2928
|
+
* isn't built), the version is "0.0.0" and the auto-reload check is a
|
|
2929
|
+
* no-op: both sides agree, no mismatch, no reload triggered.
|
|
2930
|
+
*/
|
|
2931
|
+
const DEV_VERSION_SENTINEL = "0.0.0";
|
|
2932
|
+
/**
|
|
2933
|
+
* Track which `(extensionId, expectedVersion)` pairs we've already
|
|
2934
|
+
* tried to auto-reload in this process. Prevents an infinite reload
|
|
2935
|
+
* loop if the on-disk version somehow stays ahead of what the browser
|
|
2936
|
+
* picks up (e.g. Chrome disabled the extension after reload because
|
|
2937
|
+
* a new permission was added — the loaded version stays stale).
|
|
2938
|
+
*/
|
|
2939
|
+
const attemptedReloads = /* @__PURE__ */ new Set();
|
|
2940
|
+
/**
|
|
2941
|
+
* Send POST /reload to the bridge — triggers __reload__ control frame
|
|
2942
|
+
* over native messaging, which the extension's handler dispatches into
|
|
2943
|
+
* chrome.runtime.reload(). After this returns, the OLD bridge process
|
|
2944
|
+
* may still be running (its WS clients haven't dropped); the NEW
|
|
2945
|
+
* bridge spawned by Chrome on extension reconnect will overwrite the
|
|
2946
|
+
* discovery file.
|
|
2947
|
+
*/
|
|
2948
|
+
async function postReload(port, token, timeoutMs = 1e3) {
|
|
2949
|
+
const controller = new AbortController();
|
|
2950
|
+
const timer = setTimeout(() => controller.abort(), timeoutMs);
|
|
2951
|
+
try {
|
|
2952
|
+
return (await fetch(`http://127.0.0.1:${port}/reload`, {
|
|
2953
|
+
method: "POST",
|
|
2954
|
+
headers: { authorization: `Bearer ${token}` },
|
|
2955
|
+
signal: controller.signal
|
|
2956
|
+
})).ok;
|
|
2957
|
+
} catch {
|
|
2958
|
+
return false;
|
|
2959
|
+
} finally {
|
|
2960
|
+
clearTimeout(timer);
|
|
2961
|
+
}
|
|
2962
|
+
}
|
|
2963
|
+
/**
|
|
2964
|
+
* After triggering a reload, poll the discovery file + /health until
|
|
2965
|
+
* we see the expected extension version (success) or run out of time
|
|
2966
|
+
* (caller falls back to install_required). Re-reads the discovery file
|
|
2967
|
+
* each cycle because the bridge process changes — old bridge exits
|
|
2968
|
+
* after its grace window, new bridge writes a new discovery file with
|
|
2969
|
+
* new port/token/pid.
|
|
2970
|
+
*/
|
|
2971
|
+
async function pollUntilExtensionVersion(expectedVersion, maxWaitMs, intervalMs) {
|
|
2972
|
+
const deadline = Date.now() + maxWaitMs;
|
|
2973
|
+
while (Date.now() < deadline) {
|
|
2974
|
+
await new Promise((r) => setTimeout(r, intervalMs));
|
|
2975
|
+
const disc = readBridgeDiscovery();
|
|
2976
|
+
if (!disc) continue;
|
|
2977
|
+
const health = await probeHealth(disc.port, disc.token, 500);
|
|
2978
|
+
if (health && health.ok && health.extension_connected && health.extension_loaded_version === expectedVersion) return disc;
|
|
2979
|
+
}
|
|
2980
|
+
}
|
|
2981
|
+
function buildInstallRequired(reason, autoInstalled, versionMismatch) {
|
|
2982
|
+
const instructions = (() => {
|
|
2983
|
+
if (reason === "no_supported_browser") return "No Chrome or Edge installation was detected on this host. Install one and restart the github-router proxy.";
|
|
2984
|
+
if (reason === "bridge_bundle_missing") return "The bridge bundle is missing. Run `bun run build` from the github-router checkout to produce dist/browser-bridge/index.js, then retry.";
|
|
2985
|
+
if (reason === "extension_outdated" && versionMismatch) return `Your loaded github-router browser extension is version ${versionMismatch.loaded} but the github-router package shipped version ${versionMismatch.expected}. Auto-reload was attempted and did not converge — Chrome likely disabled the extension because the new manifest declares new permissions. Open chrome://extensions (or edge://extensions), find the github-router extension card, click "Enable" if it's disabled, then click the reload arrow. Retry this tool call afterwards.`;
|
|
2986
|
+
return "Open chrome://extensions (or edge://extensions), enable Developer Mode, click 'Load unpacked', and select the load_unpacked_dir above. Then retry this tool call. If you just updated the github-router package, an extension already loaded may need to be reloaded — click the reload arrow on its card.";
|
|
2987
|
+
})();
|
|
2883
2988
|
return {
|
|
2884
2989
|
install_required: true,
|
|
2885
2990
|
reason,
|
|
2886
2991
|
auto_installed: autoInstalled,
|
|
2992
|
+
proxy_version: getPackageVersion(),
|
|
2887
2993
|
manual_steps: {
|
|
2888
2994
|
load_unpacked_dir: extensionDir(),
|
|
2889
2995
|
expected_extension_id: loadStableExtensionId(),
|
|
2890
|
-
instructions
|
|
2891
|
-
}
|
|
2996
|
+
instructions
|
|
2997
|
+
},
|
|
2998
|
+
...versionMismatch ? { version_mismatch: versionMismatch } : {}
|
|
2892
2999
|
};
|
|
2893
3000
|
}
|
|
2894
3001
|
/**
|
|
@@ -2929,6 +3036,31 @@ async function _ensureBridgeReadyImpl() {
|
|
|
2929
3036
|
const health = await probeHealth(discovery.port, discovery.token);
|
|
2930
3037
|
if (!health || !health.ok) return buildInstallRequired("bridge_not_running", autoInstalled);
|
|
2931
3038
|
if (!health.extension_connected) return buildInstallRequired("extension_not_loaded", autoInstalled);
|
|
3039
|
+
const expectedVersion = loadExpectedExtensionVersion();
|
|
3040
|
+
const loadedVersion = health.extension_loaded_version;
|
|
3041
|
+
if (typeof expectedVersion === "string" && typeof loadedVersion === "string" && expectedVersion !== DEV_VERSION_SENTINEL && loadedVersion !== DEV_VERSION_SENTINEL && expectedVersion !== loadedVersion) {
|
|
3042
|
+
const reloadKey = `${loadStableExtensionId()}::${expectedVersion}`;
|
|
3043
|
+
if (attemptedReloads.has(reloadKey)) return buildInstallRequired("extension_outdated", autoInstalled, {
|
|
3044
|
+
loaded: loadedVersion,
|
|
3045
|
+
expected: expectedVersion
|
|
3046
|
+
});
|
|
3047
|
+
attemptedReloads.add(reloadKey);
|
|
3048
|
+
if (!await postReload(discovery.port, discovery.token)) return buildInstallRequired("extension_outdated", autoInstalled, {
|
|
3049
|
+
loaded: loadedVersion,
|
|
3050
|
+
expected: expectedVersion
|
|
3051
|
+
});
|
|
3052
|
+
const newDiscovery = await pollUntilExtensionVersion(expectedVersion, 3e3, 150);
|
|
3053
|
+
if (!newDiscovery) return buildInstallRequired("extension_outdated", autoInstalled, {
|
|
3054
|
+
loaded: loadedVersion,
|
|
3055
|
+
expected: expectedVersion
|
|
3056
|
+
});
|
|
3057
|
+
return {
|
|
3058
|
+
install_required: false,
|
|
3059
|
+
port: newDiscovery.port,
|
|
3060
|
+
token: newDiscovery.token,
|
|
3061
|
+
pid: newDiscovery.pid
|
|
3062
|
+
};
|
|
3063
|
+
}
|
|
2932
3064
|
return {
|
|
2933
3065
|
install_required: false,
|
|
2934
3066
|
port: discovery.port,
|
|
@@ -2946,6 +3078,58 @@ function installRequiredToolResult(payload) {
|
|
|
2946
3078
|
};
|
|
2947
3079
|
}
|
|
2948
3080
|
|
|
3081
|
+
//#endregion
|
|
3082
|
+
//#region src/lib/browser-mcp/humanlike.ts
|
|
3083
|
+
/**
|
|
3084
|
+
* Sample from a Beta(2, 5) distribution scaled to [minMs, maxMs].
|
|
3085
|
+
* The Beta(2, 5) shape has its mode near 0.2 of the range — humans
|
|
3086
|
+
* follow most actions quickly, with an occasional long pause. We do
|
|
3087
|
+
* NOT use uniform random because that would produce robotically-
|
|
3088
|
+
* even spacing detectable by behavioral analysis.
|
|
3089
|
+
*
|
|
3090
|
+
* Implementation: two gamma-distributed samples via the Marsaglia /
|
|
3091
|
+
* Tsang squeeze method (Box-Muller-style sufficiency for shape ≥ 2).
|
|
3092
|
+
*/
|
|
3093
|
+
function betaDelay(minMs, maxMs) {
|
|
3094
|
+
const a = gammaSample(2);
|
|
3095
|
+
const beta = a / (a + gammaSample(5));
|
|
3096
|
+
return Math.round(minMs + beta * (maxMs - minMs));
|
|
3097
|
+
}
|
|
3098
|
+
function gammaSample(shape) {
|
|
3099
|
+
const d = shape - 1 / 3;
|
|
3100
|
+
const c = 1 / Math.sqrt(9 * d);
|
|
3101
|
+
while (true) {
|
|
3102
|
+
let x, v;
|
|
3103
|
+
do {
|
|
3104
|
+
x = normalSample();
|
|
3105
|
+
v = 1 + c * x;
|
|
3106
|
+
} while (v <= 0);
|
|
3107
|
+
v = v * v * v;
|
|
3108
|
+
const u = Math.random();
|
|
3109
|
+
if (u < 1 - .0331 * x * x * x * x) return d * v;
|
|
3110
|
+
if (Math.log(u) < .5 * x * x + d * (1 - v + Math.log(v))) return d * v;
|
|
3111
|
+
}
|
|
3112
|
+
}
|
|
3113
|
+
function normalSample() {
|
|
3114
|
+
let u = 0, v = 0;
|
|
3115
|
+
while (u === 0) u = Math.random();
|
|
3116
|
+
while (v === 0) v = Math.random();
|
|
3117
|
+
return Math.sqrt(-2 * Math.log(u)) * Math.cos(2 * Math.PI * v);
|
|
3118
|
+
}
|
|
3119
|
+
/**
|
|
3120
|
+
* Inter-action delay when paced mode is on. Returns a Beta-shaped
|
|
3121
|
+
* randomized delay in [800, 4600] ms with a base of 600 ms so the
|
|
3122
|
+
* minimum is never "too fast." Humans take 800-2800 ms between
|
|
3123
|
+
* UI actions on average, with a tail of long pauses; this matches.
|
|
3124
|
+
*
|
|
3125
|
+
* Caller is expected to subtract the time already burned in the
|
|
3126
|
+
* compound pipeline (snapshot fetch + matcher cascade) so the user-
|
|
3127
|
+
* perceived delay isn't doubled.
|
|
3128
|
+
*/
|
|
3129
|
+
function interActionDelay() {
|
|
3130
|
+
return betaDelay(800, 4600);
|
|
3131
|
+
}
|
|
3132
|
+
|
|
2949
3133
|
//#endregion
|
|
2950
3134
|
//#region src/lib/browser-mcp/policy.ts
|
|
2951
3135
|
const BLOCKED_URL_RE = /^(chrome|edge|brave|opera|vivaldi):\/\/(settings|preferences|extensions|policy|management|password|flags|flag-descriptions)/i;
|
|
@@ -2980,6 +3164,78 @@ function preflightUrlPolicy(toolName, args) {
|
|
|
2980
3164
|
|
|
2981
3165
|
//#endregion
|
|
2982
3166
|
//#region src/lib/browser-mcp/dispatch.ts
|
|
3167
|
+
/**
|
|
3168
|
+
* Tools whose dispatch counts as a mutating user action for pacing
|
|
3169
|
+
* purposes. Read-only tools (list_tabs, screenshot, read_page,
|
|
3170
|
+
* diagnostics, navigate-without-form-submit) skip the inter-action
|
|
3171
|
+
* delay because they don't look like a human clicking around.
|
|
3172
|
+
*/
|
|
3173
|
+
const PACED_TOOLS = new Set([
|
|
3174
|
+
"browser_click",
|
|
3175
|
+
"browser_fill",
|
|
3176
|
+
"browser_type",
|
|
3177
|
+
"browser_keyboard",
|
|
3178
|
+
"browser_scroll",
|
|
3179
|
+
"browser_mouse",
|
|
3180
|
+
"browser_drag"
|
|
3181
|
+
]);
|
|
3182
|
+
let lastDispatchAt = 0;
|
|
3183
|
+
let humanlikeAutoCache = {
|
|
3184
|
+
fetchedAt: 0,
|
|
3185
|
+
tabs: /* @__PURE__ */ new Set()
|
|
3186
|
+
};
|
|
3187
|
+
const HUMANLIKE_PROBE_INTERVAL_MS = 5e3;
|
|
3188
|
+
async function isHumanlikeAutoOn(tabId, signal) {
|
|
3189
|
+
if (state.humanlikeForce === "off") return false;
|
|
3190
|
+
if (typeof tabId !== "number") return false;
|
|
3191
|
+
const now = Date.now();
|
|
3192
|
+
if (now - humanlikeAutoCache.fetchedAt > HUMANLIKE_PROBE_INTERVAL_MS) try {
|
|
3193
|
+
const ready = await ensureBridgeReady();
|
|
3194
|
+
if (ready.install_required) return false;
|
|
3195
|
+
const res = await fetch(`http://127.0.0.1:${ready.port}/health`, {
|
|
3196
|
+
headers: { authorization: `Bearer ${ready.token}` },
|
|
3197
|
+
signal
|
|
3198
|
+
});
|
|
3199
|
+
if (res.ok) {
|
|
3200
|
+
const body = await res.json();
|
|
3201
|
+
const tabs = /* @__PURE__ */ new Set();
|
|
3202
|
+
for (const t of body.humanlike_tabs ?? []) if (typeof t.tabId === "number") tabs.add(t.tabId);
|
|
3203
|
+
humanlikeAutoCache = {
|
|
3204
|
+
fetchedAt: now,
|
|
3205
|
+
tabs
|
|
3206
|
+
};
|
|
3207
|
+
}
|
|
3208
|
+
} catch {}
|
|
3209
|
+
return humanlikeAutoCache.tabs.has(tabId);
|
|
3210
|
+
}
|
|
3211
|
+
async function maybeInjectHumanlikeDelay(tool, signal, tabId) {
|
|
3212
|
+
if (!PACED_TOOLS.has(tool)) return;
|
|
3213
|
+
let on = state.humanlikeForce === "on";
|
|
3214
|
+
if (!on && state.humanlikeForce === "auto") on = await isHumanlikeAutoOn(tabId, signal);
|
|
3215
|
+
if (!on) return;
|
|
3216
|
+
const target = interActionDelay();
|
|
3217
|
+
const sinceLast = Date.now() - lastDispatchAt;
|
|
3218
|
+
const wait = Math.max(0, target - sinceLast);
|
|
3219
|
+
if (wait > 0) await sleepAbortable(wait, signal);
|
|
3220
|
+
lastDispatchAt = Date.now();
|
|
3221
|
+
}
|
|
3222
|
+
function sleepAbortable(ms, signal) {
|
|
3223
|
+
return new Promise((resolve, reject) => {
|
|
3224
|
+
if (signal?.aborted) {
|
|
3225
|
+
reject(/* @__PURE__ */ new Error("aborted"));
|
|
3226
|
+
return;
|
|
3227
|
+
}
|
|
3228
|
+
const timer = setTimeout(() => {
|
|
3229
|
+
if (signal) signal.removeEventListener("abort", onAbort);
|
|
3230
|
+
resolve();
|
|
3231
|
+
}, ms);
|
|
3232
|
+
const onAbort = () => {
|
|
3233
|
+
clearTimeout(timer);
|
|
3234
|
+
reject(/* @__PURE__ */ new Error("aborted"));
|
|
3235
|
+
};
|
|
3236
|
+
if (signal) signal.addEventListener("abort", onAbort, { once: true });
|
|
3237
|
+
});
|
|
3238
|
+
}
|
|
2983
3239
|
const PER_TOOL_TIMEOUTS = {
|
|
2984
3240
|
browser_list_tabs: {
|
|
2985
3241
|
defaultMs: 5e3,
|
|
@@ -3145,6 +3401,7 @@ async function dispatchBrowserTool(tool, args, signal, opts = {}) {
|
|
|
3145
3401
|
};
|
|
3146
3402
|
const ready = await ensureBridgeReady();
|
|
3147
3403
|
if (ready.install_required) return installRequiredToolResult(ready);
|
|
3404
|
+
await maybeInjectHumanlikeDelay(tool, signal, typeof args.tabId === "number" ? args.tabId : void 0);
|
|
3148
3405
|
const { defaultMs, maxMs } = pickTimeout(tool);
|
|
3149
3406
|
const callerTimeout = typeof opts.timeoutMs === "number" && opts.timeoutMs > 0 ? Math.min(opts.timeoutMs, maxMs) : defaultMs;
|
|
3150
3407
|
try {
|
|
@@ -3226,219 +3483,1563 @@ function logAudit$1(record) {
|
|
|
3226
3483
|
}
|
|
3227
3484
|
|
|
3228
3485
|
//#endregion
|
|
3229
|
-
//#region src/lib/browser-mcp/
|
|
3486
|
+
//#region src/lib/browser-mcp/matcher.ts
|
|
3230
3487
|
/**
|
|
3231
|
-
*
|
|
3232
|
-
*
|
|
3233
|
-
* check, (2) runs the install-check pre-flight (returning structured
|
|
3234
|
-
* install_required JSON when the bridge or extension isn't ready),
|
|
3235
|
-
* and (3) opens a WS to the bridge, sends the tool call, awaits the
|
|
3236
|
-
* response with a per-tool timeout.
|
|
3237
|
-
*
|
|
3238
|
-
* Each entry carries `capability: "browser"` so `browserToolsEnabled()`
|
|
3239
|
-
* in `src/routes/mcp/handler.ts` drops them at both list-time and
|
|
3240
|
-
* call-time when the operator hasn't opted in via `--browse` or
|
|
3241
|
-
* `GH_ROUTER_ENABLE_BROWSE=1`.
|
|
3488
|
+
* Resolve an intent to an action. Synchronous, no I/O, <5ms expected
|
|
3489
|
+
* on a 200-element snapshot.
|
|
3242
3490
|
*
|
|
3243
|
-
*
|
|
3491
|
+
* Returns `{source: "escalate"}` when no layer produced a single
|
|
3492
|
+
* confident candidate. Caller is expected to invoke the fast-model
|
|
3493
|
+
* fallback path with the returned `candidates` shortlist (smaller
|
|
3494
|
+
* than the full snapshot, so fast-model token cost drops 3-5×).
|
|
3244
3495
|
*/
|
|
3245
|
-
|
|
3246
|
-
|
|
3247
|
-
|
|
3248
|
-
|
|
3249
|
-
|
|
3250
|
-
|
|
3251
|
-
|
|
3252
|
-
|
|
3253
|
-
|
|
3254
|
-
|
|
3255
|
-
|
|
3256
|
-
|
|
3496
|
+
function deterministicResolve(snapshot, parsed, value) {
|
|
3497
|
+
const v = value ?? parsed.valueFromIntent;
|
|
3498
|
+
const allCandidates = [];
|
|
3499
|
+
for (const layer of LAYERS) {
|
|
3500
|
+
const found = layer.run(snapshot, parsed, v);
|
|
3501
|
+
if (found.length === 0) continue;
|
|
3502
|
+
allCandidates.push(...found);
|
|
3503
|
+
const winners = applyTieBreakers(found, parsed);
|
|
3504
|
+
const top = winners[0];
|
|
3505
|
+
if (!top) continue;
|
|
3506
|
+
const runnerUp = winners[1];
|
|
3507
|
+
if (top.score >= layer.floor && (!runnerUp || top.score - runnerUp.score >= .15)) {
|
|
3508
|
+
const action = inferActionLocal(top.el.role, parsed, v);
|
|
3509
|
+
return {
|
|
3510
|
+
ref: top.el.ref,
|
|
3511
|
+
action,
|
|
3512
|
+
...needsValue(action) && v !== void 0 ? { value: v } : {},
|
|
3513
|
+
confidence: top.score,
|
|
3514
|
+
source: layer.name,
|
|
3515
|
+
reason: top.reason
|
|
3516
|
+
};
|
|
3257
3517
|
}
|
|
3258
|
-
}
|
|
3259
|
-
|
|
3260
|
-
|
|
3261
|
-
|
|
3262
|
-
|
|
3263
|
-
|
|
3264
|
-
|
|
3265
|
-
|
|
3266
|
-
|
|
3267
|
-
|
|
3268
|
-
|
|
3269
|
-
|
|
3270
|
-
|
|
3271
|
-
|
|
3272
|
-
|
|
3273
|
-
|
|
3274
|
-
|
|
3518
|
+
}
|
|
3519
|
+
const shortlist = dedupeAndRank(allCandidates).slice(0, 8);
|
|
3520
|
+
return {
|
|
3521
|
+
ref: "",
|
|
3522
|
+
action: parsed.verb ?? "click",
|
|
3523
|
+
...v !== void 0 ? { value: v } : {},
|
|
3524
|
+
confidence: 0,
|
|
3525
|
+
source: "escalate",
|
|
3526
|
+
reason: shortlist.length === 0 ? "no candidates from any cascade layer" : `${shortlist.length} ambiguous candidates`,
|
|
3527
|
+
candidates: shortlist.map((c) => ({
|
|
3528
|
+
ref: c.el.ref,
|
|
3529
|
+
score: c.score,
|
|
3530
|
+
layer: c.layer
|
|
3531
|
+
}))
|
|
3532
|
+
};
|
|
3533
|
+
}
|
|
3534
|
+
function applyTieBreakers(cands, parsed) {
|
|
3535
|
+
const verb = parsed.verb ?? "click";
|
|
3536
|
+
const dropDisabled = verb === "click" || verb === "fill" || verb === "type" || verb === "select";
|
|
3537
|
+
return cands.filter((c) => {
|
|
3538
|
+
if (c.el.hidden) return false;
|
|
3539
|
+
if (c.el.bbox && (c.el.bbox[2] < 4 || c.el.bbox[3] < 4)) return false;
|
|
3540
|
+
if (dropDisabled && c.el.disabled) return false;
|
|
3541
|
+
return true;
|
|
3542
|
+
}).map((c) => ({
|
|
3543
|
+
...c,
|
|
3544
|
+
score: c.score * weight(c, verb)
|
|
3545
|
+
})).sort((a, b) => b.score - a.score);
|
|
3546
|
+
}
|
|
3547
|
+
function weight(c, verb) {
|
|
3548
|
+
let w = 1;
|
|
3549
|
+
const bbox = c.el.bbox;
|
|
3550
|
+
if (bbox) {
|
|
3551
|
+
if (!(bbox[0] >= 0 && bbox[1] >= 0)) w *= .92;
|
|
3552
|
+
}
|
|
3553
|
+
if (c.el.isInIframe) w *= .95;
|
|
3554
|
+
if (verb === "click") {
|
|
3555
|
+
const r = (c.el.role || "").toLowerCase();
|
|
3556
|
+
if (r === "button") w *= 1;
|
|
3557
|
+
else if (r === "link" || r === "a") w *= .98;
|
|
3558
|
+
else if (r === "menuitem") w *= .96;
|
|
3559
|
+
else if (r === "generic" || r === "div" || r === "span") w *= .9;
|
|
3560
|
+
}
|
|
3561
|
+
return Math.min(1, w);
|
|
3562
|
+
}
|
|
3563
|
+
function dedupeAndRank(cands) {
|
|
3564
|
+
const byRef = /* @__PURE__ */ new Map();
|
|
3565
|
+
for (const c of cands) {
|
|
3566
|
+
const existing = byRef.get(c.el.ref);
|
|
3567
|
+
if (!existing || existing.score < c.score) byRef.set(c.el.ref, c);
|
|
3568
|
+
}
|
|
3569
|
+
return [...byRef.values()].sort((a, b) => b.score - a.score);
|
|
3570
|
+
}
|
|
3571
|
+
function inferActionLocal(role, parsed, value) {
|
|
3572
|
+
if (parsed.verb === "scroll_into_view") return "scroll_into_view";
|
|
3573
|
+
const intentLower = parsed.rawTarget.toLowerCase();
|
|
3574
|
+
if (/\bscroll\b/.test(intentLower)) return "scroll_into_view";
|
|
3575
|
+
const r = (role || "").toLowerCase();
|
|
3576
|
+
if (r === "select" || r === "combobox") return "select";
|
|
3577
|
+
if (r === "textarea" || r === "input" || r === "textbox" || r === "searchbox" || r === "spinbutton") {
|
|
3578
|
+
if (parsed.verb === "type") return "type";
|
|
3579
|
+
if (parsed.verb === "fill") return "fill";
|
|
3580
|
+
return value !== void 0 ? "fill" : "click";
|
|
3581
|
+
}
|
|
3582
|
+
return parsed.verb ?? "click";
|
|
3583
|
+
}
|
|
3584
|
+
function needsValue(action) {
|
|
3585
|
+
return action === "fill" || action === "type" || action === "select";
|
|
3586
|
+
}
|
|
3587
|
+
function nameOf(el) {
|
|
3588
|
+
return (el.name ?? "").trim();
|
|
3589
|
+
}
|
|
3590
|
+
function nameLowerOf(el) {
|
|
3591
|
+
return nameOf(el).toLowerCase();
|
|
3592
|
+
}
|
|
3593
|
+
function isClickableRole(role) {
|
|
3594
|
+
const r = role.toLowerCase();
|
|
3595
|
+
return r === "button" || r === "link" || r === "a" || r === "menuitem" || r === "tab" || r === "checkbox" || r === "radio" || r === "switch" || r === "option" || r === "treeitem";
|
|
3596
|
+
}
|
|
3597
|
+
function isInputRole(role) {
|
|
3598
|
+
const r = role.toLowerCase();
|
|
3599
|
+
return r === "textbox" || r === "input" || r === "textarea" || r === "searchbox" || r === "spinbutton" || r === "combobox" || r === "select" || r === "checkbox" || r === "radio";
|
|
3600
|
+
}
|
|
3601
|
+
function verbCompatible(role, verb) {
|
|
3602
|
+
if (!verb || verb === "click") return isClickableRole(role) || isInputRole(role);
|
|
3603
|
+
if (verb === "fill" || verb === "type" || verb === "select") return isInputRole(role);
|
|
3604
|
+
return true;
|
|
3605
|
+
}
|
|
3606
|
+
function wholeWordContains(haystack, needle) {
|
|
3607
|
+
if (!haystack || !needle) return false;
|
|
3608
|
+
return new RegExp(`\\b${needle.replace(/[.*+?^${}()|[\]\\]/g, "\\$&")}\\b`, "i").test(haystack);
|
|
3609
|
+
}
|
|
3610
|
+
const L0 = {
|
|
3611
|
+
name: "L0",
|
|
3612
|
+
floor: .95,
|
|
3613
|
+
run: (snapshot, parsed) => {
|
|
3614
|
+
const target = parsed.quotedName ?? parsed.normTarget;
|
|
3615
|
+
if (!target) return [];
|
|
3616
|
+
const out = [];
|
|
3617
|
+
for (const el of snapshot.elements) {
|
|
3618
|
+
if (!verbCompatible(el.role, parsed.verb)) continue;
|
|
3619
|
+
const nm = nameLowerOf(el);
|
|
3620
|
+
if (!nm) continue;
|
|
3621
|
+
if (nm === target.toLowerCase()) out.push({
|
|
3622
|
+
el,
|
|
3623
|
+
score: 1,
|
|
3624
|
+
layer: "L0",
|
|
3625
|
+
reason: `L0 exact name "${el.name}"`
|
|
3626
|
+
});
|
|
3627
|
+
}
|
|
3628
|
+
return out;
|
|
3629
|
+
}
|
|
3630
|
+
};
|
|
3631
|
+
const L1 = {
|
|
3632
|
+
name: "L1",
|
|
3633
|
+
floor: .9,
|
|
3634
|
+
run: (snapshot, parsed) => {
|
|
3635
|
+
if (parsed.verb && parsed.verb !== "fill" && parsed.verb !== "type" && parsed.verb !== "select") return [];
|
|
3636
|
+
const target = parsed.fieldHint ?? parsed.normTarget;
|
|
3637
|
+
if (!target) return [];
|
|
3638
|
+
const tgt = target.toLowerCase();
|
|
3639
|
+
const out = [];
|
|
3640
|
+
for (const el of snapshot.elements) {
|
|
3641
|
+
if (!isInputRole(el.role)) continue;
|
|
3642
|
+
const nm = nameLowerOf(el);
|
|
3643
|
+
if (nm === tgt || nm === `${tgt} *` || nm === `${tgt} (required)` || nm.endsWith(tgt) && /^[\s*()required:_-]+/.test(nm.slice(0, nm.length - tgt.length))) out.push({
|
|
3644
|
+
el,
|
|
3645
|
+
score: .95,
|
|
3646
|
+
layer: "L1",
|
|
3647
|
+
reason: `L1 label "${el.name}"`
|
|
3648
|
+
});
|
|
3649
|
+
}
|
|
3650
|
+
return out;
|
|
3651
|
+
}
|
|
3652
|
+
};
|
|
3653
|
+
const L2 = {
|
|
3654
|
+
name: "L2",
|
|
3655
|
+
floor: .7,
|
|
3656
|
+
run: (snapshot, parsed) => {
|
|
3657
|
+
const target = parsed.fieldHint ?? parsed.normTarget;
|
|
3658
|
+
if (!target) return [];
|
|
3659
|
+
const tgt = target.toLowerCase();
|
|
3660
|
+
const out = [];
|
|
3661
|
+
for (const el of snapshot.elements) {
|
|
3662
|
+
if (!isInputRole(el.role)) continue;
|
|
3663
|
+
const ph = (el.placeholder ?? "").toLowerCase();
|
|
3664
|
+
if (!ph) continue;
|
|
3665
|
+
if (ph === tgt) out.push({
|
|
3666
|
+
el,
|
|
3667
|
+
score: .85,
|
|
3668
|
+
layer: "L2",
|
|
3669
|
+
reason: `L2 placeholder exact "${el.placeholder}"`
|
|
3670
|
+
});
|
|
3671
|
+
else if (wholeWordContains(ph, tgt)) out.push({
|
|
3672
|
+
el,
|
|
3673
|
+
score: .75,
|
|
3674
|
+
layer: "L2",
|
|
3675
|
+
reason: `L2 placeholder contains "${tgt}"`
|
|
3676
|
+
});
|
|
3677
|
+
}
|
|
3678
|
+
return out;
|
|
3679
|
+
}
|
|
3680
|
+
};
|
|
3681
|
+
const L3 = {
|
|
3682
|
+
name: "L3",
|
|
3683
|
+
floor: .65,
|
|
3684
|
+
run: (snapshot, parsed) => {
|
|
3685
|
+
const target = parsed.normTarget;
|
|
3686
|
+
if (!target) return [];
|
|
3687
|
+
const out = [];
|
|
3688
|
+
for (const el of snapshot.elements) {
|
|
3689
|
+
if (!verbCompatible(el.role, parsed.verb)) continue;
|
|
3690
|
+
const nm = nameOf(el);
|
|
3691
|
+
if (!nm) continue;
|
|
3692
|
+
if (!wholeWordContains(nm, target)) continue;
|
|
3693
|
+
const score = target.length / nm.length >= .8 ? .72 : .68;
|
|
3694
|
+
out.push({
|
|
3695
|
+
el,
|
|
3696
|
+
score,
|
|
3697
|
+
layer: "L3",
|
|
3698
|
+
reason: `L3 fuzzy name "${nm}"`
|
|
3699
|
+
});
|
|
3700
|
+
}
|
|
3701
|
+
return out;
|
|
3702
|
+
}
|
|
3703
|
+
};
|
|
3704
|
+
const L4 = {
|
|
3705
|
+
name: "L4",
|
|
3706
|
+
floor: .6,
|
|
3707
|
+
run: (snapshot, parsed) => {
|
|
3708
|
+
const target = parsed.normTarget;
|
|
3709
|
+
if (!target) return [];
|
|
3710
|
+
const out = [];
|
|
3711
|
+
for (const el of snapshot.elements) {
|
|
3712
|
+
if (!isClickableRole(el.role)) continue;
|
|
3713
|
+
const text = (el.value ?? "").toLowerCase().trim();
|
|
3714
|
+
if (!text) continue;
|
|
3715
|
+
const tgt = target.toLowerCase();
|
|
3716
|
+
if (text === tgt) out.push({
|
|
3717
|
+
el,
|
|
3718
|
+
score: .65,
|
|
3719
|
+
layer: "L4",
|
|
3720
|
+
reason: `L4 text exact "${el.value}"`
|
|
3721
|
+
});
|
|
3722
|
+
else if (wholeWordContains(text, tgt)) out.push({
|
|
3723
|
+
el,
|
|
3724
|
+
score: .6,
|
|
3725
|
+
layer: "L4",
|
|
3726
|
+
reason: `L4 text contains "${tgt}"`
|
|
3727
|
+
});
|
|
3728
|
+
}
|
|
3729
|
+
return out;
|
|
3730
|
+
}
|
|
3731
|
+
};
|
|
3732
|
+
const L5 = {
|
|
3733
|
+
name: "L5",
|
|
3734
|
+
floor: .85,
|
|
3735
|
+
run: (snapshot, parsed) => {
|
|
3736
|
+
const target = parsed.normTarget;
|
|
3737
|
+
if (!target) return [];
|
|
3738
|
+
if (!/^[a-z][a-z0-9_-]{2,}$/i.test(target)) return [];
|
|
3739
|
+
const norm = target.toLowerCase().replace(/[-_]/g, "");
|
|
3740
|
+
const out = [];
|
|
3741
|
+
for (const el of snapshot.elements) {
|
|
3742
|
+
const attrs = el.attrs;
|
|
3743
|
+
if (!attrs) continue;
|
|
3744
|
+
if (attrs.testid && stripSep(attrs.testid).toLowerCase() === norm) {
|
|
3745
|
+
out.push({
|
|
3746
|
+
el,
|
|
3747
|
+
score: .9,
|
|
3748
|
+
layer: "L5",
|
|
3749
|
+
reason: `L5 testid="${attrs.testid}"`
|
|
3750
|
+
});
|
|
3751
|
+
continue;
|
|
3275
3752
|
}
|
|
3276
|
-
|
|
3277
|
-
|
|
3278
|
-
|
|
3279
|
-
|
|
3753
|
+
if (attrs.id && stripSep(attrs.id).toLowerCase() === norm) {
|
|
3754
|
+
out.push({
|
|
3755
|
+
el,
|
|
3756
|
+
score: .88,
|
|
3757
|
+
layer: "L5",
|
|
3758
|
+
reason: `L5 id="${attrs.id}"`
|
|
3759
|
+
});
|
|
3760
|
+
continue;
|
|
3761
|
+
}
|
|
3762
|
+
if (attrs.name_attr && stripSep(attrs.name_attr).toLowerCase() === norm) {
|
|
3763
|
+
out.push({
|
|
3764
|
+
el,
|
|
3765
|
+
score: .86,
|
|
3766
|
+
layer: "L5",
|
|
3767
|
+
reason: `L5 name="${attrs.name_attr}"`
|
|
3768
|
+
});
|
|
3769
|
+
continue;
|
|
3770
|
+
}
|
|
3771
|
+
if (attrs.aria_label && stripSep(attrs.aria_label).toLowerCase() === norm) out.push({
|
|
3772
|
+
el,
|
|
3773
|
+
score: .86,
|
|
3774
|
+
layer: "L5",
|
|
3775
|
+
reason: `L5 aria-label="${attrs.aria_label}"`
|
|
3776
|
+
});
|
|
3280
3777
|
}
|
|
3281
|
-
|
|
3778
|
+
return out;
|
|
3779
|
+
}
|
|
3780
|
+
};
|
|
3781
|
+
function stripSep(s) {
|
|
3782
|
+
return s.replace(/[-_\s]/g, "");
|
|
3783
|
+
}
|
|
3784
|
+
const LAYERS = [
|
|
3785
|
+
L0,
|
|
3786
|
+
L1,
|
|
3787
|
+
L2,
|
|
3788
|
+
L3,
|
|
3789
|
+
L4,
|
|
3790
|
+
L5,
|
|
3282
3791
|
{
|
|
3283
|
-
|
|
3284
|
-
|
|
3285
|
-
|
|
3286
|
-
|
|
3287
|
-
|
|
3288
|
-
|
|
3289
|
-
|
|
3290
|
-
|
|
3291
|
-
|
|
3292
|
-
|
|
3293
|
-
|
|
3294
|
-
|
|
3295
|
-
|
|
3296
|
-
|
|
3297
|
-
|
|
3792
|
+
name: "L6",
|
|
3793
|
+
floor: .75,
|
|
3794
|
+
run: (snapshot, parsed) => {
|
|
3795
|
+
if (!parsed.ordinal) return [];
|
|
3796
|
+
const { n, kind } = parsed.ordinal;
|
|
3797
|
+
const candidates = snapshot.elements.filter((el) => {
|
|
3798
|
+
if (!kind) return true;
|
|
3799
|
+
const role = el.role.toLowerCase();
|
|
3800
|
+
return role === kind || role === `${kind}s` || (el.tag ?? "").toLowerCase() === kind;
|
|
3801
|
+
});
|
|
3802
|
+
if (candidates.length < Math.abs(n)) return [];
|
|
3803
|
+
const sorted = [...candidates].sort((a, b) => {
|
|
3804
|
+
const ay = Math.floor(a.bbox[1] / 24);
|
|
3805
|
+
const by = Math.floor(b.bbox[1] / 24);
|
|
3806
|
+
if (ay !== by) return ay - by;
|
|
3807
|
+
return a.bbox[0] - b.bbox[0];
|
|
3808
|
+
});
|
|
3809
|
+
const idx = n === -1 ? sorted.length - 1 : n - 1;
|
|
3810
|
+
if (idx < 0 || idx >= sorted.length) return [];
|
|
3811
|
+
return [{
|
|
3812
|
+
el: sorted[idx],
|
|
3813
|
+
score: .8,
|
|
3814
|
+
layer: "L6",
|
|
3815
|
+
reason: `L6 ordinal pick #${n} of ${sorted.length} ${kind ?? "elements"}`
|
|
3816
|
+
}];
|
|
3298
3817
|
}
|
|
3299
3818
|
},
|
|
3300
3819
|
{
|
|
3301
|
-
|
|
3302
|
-
|
|
3303
|
-
|
|
3304
|
-
|
|
3305
|
-
|
|
3306
|
-
|
|
3307
|
-
|
|
3308
|
-
|
|
3309
|
-
|
|
3310
|
-
|
|
3311
|
-
|
|
3312
|
-
|
|
3313
|
-
|
|
3314
|
-
|
|
3315
|
-
|
|
3316
|
-
|
|
3317
|
-
|
|
3318
|
-
|
|
3319
|
-
|
|
3320
|
-
|
|
3321
|
-
|
|
3322
|
-
|
|
3323
|
-
|
|
3324
|
-
|
|
3325
|
-
|
|
3326
|
-
|
|
3327
|
-
|
|
3328
|
-
|
|
3329
|
-
}
|
|
3820
|
+
name: "L7",
|
|
3821
|
+
floor: .5,
|
|
3822
|
+
run: (snapshot, parsed) => {
|
|
3823
|
+
const hint = parsed.fieldHint ?? parsed.normTarget;
|
|
3824
|
+
if (!hint) return [];
|
|
3825
|
+
const h = hint.toLowerCase();
|
|
3826
|
+
const out = [];
|
|
3827
|
+
const inputRolePred = (el) => isInputRole(el.role);
|
|
3828
|
+
if (h === "email") {
|
|
3829
|
+
for (const el of snapshot.elements) if (el.inputType === "email" || inputRolePred(el) && (wholeWordContains(el.placeholder ?? "", "email") || wholeWordContains(el.name ?? "", "email"))) out.push({
|
|
3830
|
+
el,
|
|
3831
|
+
score: .55,
|
|
3832
|
+
layer: "L7",
|
|
3833
|
+
reason: "L7 email heuristic"
|
|
3834
|
+
});
|
|
3835
|
+
} else if (h === "password") {
|
|
3836
|
+
for (const el of snapshot.elements) if (el.inputType === "password" || inputRolePred(el) && wholeWordContains(el.name ?? "", "password")) out.push({
|
|
3837
|
+
el,
|
|
3838
|
+
score: .55,
|
|
3839
|
+
layer: "L7",
|
|
3840
|
+
reason: "L7 password heuristic"
|
|
3841
|
+
});
|
|
3842
|
+
} else if (h === "search") {
|
|
3843
|
+
for (const el of snapshot.elements) if (el.role === "searchbox" || el.inputType === "search" || inputRolePred(el) && wholeWordContains(el.name ?? "", "search")) out.push({
|
|
3844
|
+
el,
|
|
3845
|
+
score: .55,
|
|
3846
|
+
layer: "L7",
|
|
3847
|
+
reason: "L7 search heuristic"
|
|
3848
|
+
});
|
|
3849
|
+
} else if (h === "phone" || h === "tel") {
|
|
3850
|
+
for (const el of snapshot.elements) if (el.inputType === "tel" || inputRolePred(el) && wholeWordContains(el.name ?? "", "phone")) out.push({
|
|
3851
|
+
el,
|
|
3852
|
+
score: .55,
|
|
3853
|
+
layer: "L7",
|
|
3854
|
+
reason: "L7 phone heuristic"
|
|
3855
|
+
});
|
|
3856
|
+
} else if (h === "submit" || h === "sign in" || h === "signin" || h === "log in" || h === "login") {
|
|
3857
|
+
const sumRe = /^(submit|send|continue|next|save|sign[\s-]?in|sign[\s-]?up|log[\s-]?in)$/i;
|
|
3858
|
+
for (const el of snapshot.elements) if (el.role === "button" && sumRe.test(el.name ?? "")) out.push({
|
|
3859
|
+
el,
|
|
3860
|
+
score: .55,
|
|
3861
|
+
layer: "L7",
|
|
3862
|
+
reason: "L7 submit heuristic"
|
|
3863
|
+
});
|
|
3864
|
+
} else if (h === "username" || h === "user") {
|
|
3865
|
+
for (const el of snapshot.elements) if (inputRolePred(el) && (wholeWordContains(el.name ?? "", "user") || wholeWordContains(el.name ?? "", "login") || wholeWordContains(el.name ?? "", "account"))) out.push({
|
|
3866
|
+
el,
|
|
3867
|
+
score: .55,
|
|
3868
|
+
layer: "L7",
|
|
3869
|
+
reason: "L7 username heuristic"
|
|
3870
|
+
});
|
|
3330
3871
|
}
|
|
3331
|
-
|
|
3332
|
-
capability: "browser",
|
|
3333
|
-
async handler(args, signal) {
|
|
3334
|
-
return dispatchBrowserTool("browser_navigate", args, signal);
|
|
3872
|
+
return out;
|
|
3335
3873
|
}
|
|
3336
|
-
}
|
|
3337
|
-
|
|
3338
|
-
|
|
3339
|
-
|
|
3340
|
-
|
|
3341
|
-
|
|
3342
|
-
|
|
3343
|
-
|
|
3344
|
-
|
|
3345
|
-
|
|
3346
|
-
|
|
3347
|
-
|
|
3348
|
-
|
|
3349
|
-
|
|
3874
|
+
}
|
|
3875
|
+
];
|
|
3876
|
+
|
|
3877
|
+
//#endregion
|
|
3878
|
+
//#region src/lib/browser-mcp/parse-intent.ts
|
|
3879
|
+
const VERB_RE = /^\s*(click|press|tap|fill|enter|type|select|choose|scroll(?:[ -]?into[ -]?view)?|toggle|check|uncheck|open|focus|hover)\s+/i;
|
|
3880
|
+
const VALUE_RE = /\s+(?:with|to|=)\s+(.+?)\s*$/i;
|
|
3881
|
+
const QUOTED_RE = /["'`]([^"'`]+)["'`]/;
|
|
3882
|
+
const TITLE_CASE_RE = /\b([A-Z][\w]*(?:\s+[A-Z\d][\w]*){0,3})\b/;
|
|
3883
|
+
const ORDINAL_WORDS = {
|
|
3884
|
+
first: 1,
|
|
3885
|
+
second: 2,
|
|
3886
|
+
third: 3,
|
|
3887
|
+
fourth: 4,
|
|
3888
|
+
fifth: 5,
|
|
3889
|
+
sixth: 6,
|
|
3890
|
+
seventh: 7,
|
|
3891
|
+
eighth: 8,
|
|
3892
|
+
ninth: 9,
|
|
3893
|
+
tenth: 10,
|
|
3894
|
+
last: -1
|
|
3895
|
+
};
|
|
3896
|
+
const ORDINAL_WORD_RE = /\b(first|second|third|fourth|fifth|sixth|seventh|eighth|ninth|tenth|last)\s+(\w+)/i;
|
|
3897
|
+
const ORDINAL_NUM_RE = /\b(\d+)(?:st|nd|rd|th)?\s+(\w+)/i;
|
|
3898
|
+
const FIELD_HINT_KINDS = [
|
|
3899
|
+
"field",
|
|
3900
|
+
"input",
|
|
3901
|
+
"textbox",
|
|
3902
|
+
"box",
|
|
3903
|
+
"search",
|
|
3904
|
+
"dropdown",
|
|
3905
|
+
"select",
|
|
3906
|
+
"menu",
|
|
3907
|
+
"button",
|
|
3908
|
+
"link",
|
|
3909
|
+
"tab",
|
|
3910
|
+
"checkbox",
|
|
3911
|
+
"radio",
|
|
3912
|
+
"switch"
|
|
3913
|
+
];
|
|
3914
|
+
const FIELD_HINT_RE = new RegExp(`\\b(\\w+)\\s+(?:${FIELD_HINT_KINDS.join("|")})\\b`, "i");
|
|
3915
|
+
const ARTICLES_RE = /\b(the|a|an|this|that)\b/gi;
|
|
3916
|
+
/**
|
|
3917
|
+
* Parse a natural-language intent into structured parts.
|
|
3918
|
+
*
|
|
3919
|
+
* Returns a fully-formed `ParsedIntent` even for unparseable inputs
|
|
3920
|
+
* (rawTarget = the trimmed intent, normTarget = its lowercased
|
|
3921
|
+
* normalization, every other field undefined). The matcher cascade
|
|
3922
|
+
* handles "I don't know what to do" by falling through layer-by-
|
|
3923
|
+
* layer until L7 or escalate; an unparseable intent simply has
|
|
3924
|
+
* less signal for the layers to key on.
|
|
3925
|
+
*/
|
|
3926
|
+
function parseIntent(intent) {
|
|
3927
|
+
let work = String(intent ?? "").trim();
|
|
3928
|
+
let verb;
|
|
3929
|
+
const verbMatch = VERB_RE.exec(work);
|
|
3930
|
+
if (verbMatch) {
|
|
3931
|
+
verb = mapVerb(verbMatch[1]);
|
|
3932
|
+
work = work.slice(verbMatch[0].length);
|
|
3933
|
+
}
|
|
3934
|
+
let valueFromIntent;
|
|
3935
|
+
const valueMatch = VALUE_RE.exec(work);
|
|
3936
|
+
if (valueMatch) {
|
|
3937
|
+
valueFromIntent = valueMatch[1].trim();
|
|
3938
|
+
work = work.slice(0, valueMatch.index).trim();
|
|
3939
|
+
}
|
|
3940
|
+
let quotedName;
|
|
3941
|
+
const quotedMatch = QUOTED_RE.exec(work);
|
|
3942
|
+
if (quotedMatch) quotedName = quotedMatch[1].trim();
|
|
3943
|
+
else {
|
|
3944
|
+
const titleMatch = TITLE_CASE_RE.exec(work);
|
|
3945
|
+
if (titleMatch) quotedName = titleMatch[1].trim();
|
|
3946
|
+
}
|
|
3947
|
+
let ordinal;
|
|
3948
|
+
const ordWordMatch = ORDINAL_WORD_RE.exec(work);
|
|
3949
|
+
if (ordWordMatch) {
|
|
3950
|
+
const n = ORDINAL_WORDS[ordWordMatch[1].toLowerCase()];
|
|
3951
|
+
if (typeof n === "number") ordinal = {
|
|
3952
|
+
n,
|
|
3953
|
+
kind: ordWordMatch[2].toLowerCase()
|
|
3954
|
+
};
|
|
3955
|
+
} else {
|
|
3956
|
+
const ordNumMatch = ORDINAL_NUM_RE.exec(work);
|
|
3957
|
+
if (ordNumMatch) ordinal = {
|
|
3958
|
+
n: Number.parseInt(ordNumMatch[1], 10),
|
|
3959
|
+
kind: ordNumMatch[2].toLowerCase()
|
|
3960
|
+
};
|
|
3961
|
+
}
|
|
3962
|
+
let fieldHint;
|
|
3963
|
+
const fieldMatch = FIELD_HINT_RE.exec(work);
|
|
3964
|
+
if (fieldMatch) fieldHint = fieldMatch[1].toLowerCase();
|
|
3965
|
+
const rawTarget = work.trim();
|
|
3966
|
+
let normTarget = rawTarget.toLowerCase().replace(ARTICLES_RE, "").replace(/\s+/g, " ").trim();
|
|
3967
|
+
for (const kind of FIELD_HINT_KINDS) {
|
|
3968
|
+
const tail = new RegExp(`\\s+${kind}$`, "i");
|
|
3969
|
+
if (tail.test(normTarget)) {
|
|
3970
|
+
normTarget = normTarget.replace(tail, "").trim();
|
|
3971
|
+
break;
|
|
3972
|
+
}
|
|
3973
|
+
}
|
|
3974
|
+
if (ordinal) normTarget = normTarget.replace(/^(\d+(?:st|nd|rd|th)?|first|second|third|fourth|fifth|sixth|seventh|eighth|ninth|tenth|last)\s+/i, "").trim();
|
|
3975
|
+
const out = {
|
|
3976
|
+
rawTarget,
|
|
3977
|
+
normTarget
|
|
3978
|
+
};
|
|
3979
|
+
if (verb) out.verb = verb;
|
|
3980
|
+
if (quotedName) out.quotedName = quotedName;
|
|
3981
|
+
if (fieldHint) out.fieldHint = fieldHint;
|
|
3982
|
+
if (ordinal) out.ordinal = ordinal;
|
|
3983
|
+
if (valueFromIntent !== void 0) out.valueFromIntent = valueFromIntent;
|
|
3984
|
+
return out;
|
|
3985
|
+
}
|
|
3986
|
+
function mapVerb(raw) {
|
|
3987
|
+
const v = raw.toLowerCase();
|
|
3988
|
+
if (v === "click" || v === "press" || v === "tap" || v === "toggle" || v === "check" || v === "uncheck" || v === "open") return "click";
|
|
3989
|
+
if (v === "fill" || v === "enter") return "fill";
|
|
3990
|
+
if (v === "type") return "type";
|
|
3991
|
+
if (v === "select" || v === "choose") return "select";
|
|
3992
|
+
if (v === "scroll" || v === "scrollintoview" || v === "scroll into view" || v === "scroll-into-view") return "scroll_into_view";
|
|
3993
|
+
if (v === "hover" || v === "focus") return void 0;
|
|
3994
|
+
}
|
|
3995
|
+
|
|
3996
|
+
//#endregion
|
|
3997
|
+
//#region src/lib/mcp-inflight.ts
|
|
3998
|
+
/**
|
|
3999
|
+
* Shared concurrency cap for MCP `tools/call` dispatches.
|
|
4000
|
+
*
|
|
4001
|
+
* Originally lived as a module-private counter inside
|
|
4002
|
+
* `src/routes/mcp/handler.ts`. Extracted because the worker-agent's
|
|
4003
|
+
* `peer_review` and `advisor` tools (which dispatch to peer-model
|
|
4004
|
+
* personas / the advisor responses endpoint from inside a worker
|
|
4005
|
+
* subagent loop) must participate in the same backpressure budget;
|
|
4006
|
+
* otherwise a single worker can fan out unboundedly to peers and
|
|
4007
|
+
* starve the operator's own `tools/list` callers.
|
|
4008
|
+
*
|
|
4009
|
+
* The counter is a single process-wide integer — no per-route
|
|
4010
|
+
* partitioning. Persona calls at the MCP boundary (handler.ts),
|
|
4011
|
+
* peer/advisor calls nested inside a worker (tools.ts), and any
|
|
4012
|
+
* future MCP-adjacent dispatcher all increment the same number.
|
|
4013
|
+
*
|
|
4014
|
+
* Cap = `MAX_INFLIGHT_TOOLS_CALL = 8`. Justification lives at the
|
|
4015
|
+
* historical home (`src/routes/mcp/handler.ts` comment block); do not
|
|
4016
|
+
* change the value without re-reading
|
|
4017
|
+
* `docs/research/peer-mcp-investigation.md` § "Concurrency cap
|
|
4018
|
+
* investigation".
|
|
4019
|
+
*/
|
|
4020
|
+
const MAX_INFLIGHT_TOOLS_CALL = 8;
|
|
4021
|
+
let inFlight$1 = 0;
|
|
4022
|
+
/**
|
|
4023
|
+
* Acquire a slot if one is available. Returns a release function the
|
|
4024
|
+
* caller MUST invoke exactly once (typically from a `finally` block);
|
|
4025
|
+
* returns `null` if the cap is saturated. The release fn is idempotent
|
|
4026
|
+
* — calling it twice is a no-op so callers can release defensively
|
|
4027
|
+
* without worrying about double-decrementing the counter under unusual
|
|
4028
|
+
* unwind paths.
|
|
4029
|
+
*
|
|
4030
|
+
* Synchronous on purpose. Async semaphore acquisition would let callers
|
|
4031
|
+
* queue indefinitely; we want immediate "queue full" feedback so the
|
|
4032
|
+
* MCP client (or the model holding the nested tool call) can choose to
|
|
4033
|
+
* back off or retry.
|
|
4034
|
+
*/
|
|
4035
|
+
function acquireInFlightSlot() {
|
|
4036
|
+
if (inFlight$1 >= MAX_INFLIGHT_TOOLS_CALL) return null;
|
|
4037
|
+
inFlight$1++;
|
|
4038
|
+
let released = false;
|
|
4039
|
+
return () => {
|
|
4040
|
+
if (released) return;
|
|
4041
|
+
released = true;
|
|
4042
|
+
inFlight$1--;
|
|
4043
|
+
};
|
|
4044
|
+
}
|
|
4045
|
+
|
|
4046
|
+
//#endregion
|
|
4047
|
+
//#region src/lib/diagnose-response.ts
|
|
4048
|
+
const PREVIEW_LIMIT = 200;
|
|
4049
|
+
async function parseJsonOrDiagnose(response, routePath) {
|
|
4050
|
+
const cloned = response.clone();
|
|
4051
|
+
try {
|
|
4052
|
+
return await response.json();
|
|
4053
|
+
} catch (error) {
|
|
4054
|
+
const contentType = response.headers.get("content-type") ?? "(none)";
|
|
4055
|
+
const bodyText = await cloned.text().catch(() => "(unreadable)");
|
|
4056
|
+
const preview = bodyText.length > PREVIEW_LIMIT ? bodyText.slice(0, PREVIEW_LIMIT) + "...(truncated)" : bodyText;
|
|
4057
|
+
consola.error(`Upstream JSON parse failed at ${routePath}: status=${response.status} content-type="${contentType}" body[0..${PREVIEW_LIMIT}]=${JSON.stringify(preview)}`);
|
|
4058
|
+
throw error;
|
|
4059
|
+
}
|
|
4060
|
+
}
|
|
4061
|
+
|
|
4062
|
+
//#endregion
|
|
4063
|
+
//#region src/lib/response-cap.ts
|
|
4064
|
+
/**
|
|
4065
|
+
* Hard byte cap for non-streaming upstream response bodies.
|
|
4066
|
+
*
|
|
4067
|
+
* Anthropic responses with large tool_use blocks can legitimately reach
|
|
4068
|
+
* several MB, but a multi-GB body is either a buggy upstream or a malicious
|
|
4069
|
+
* one. Buffering it would OOM the proxy and crash all in-flight requests.
|
|
4070
|
+
*
|
|
4071
|
+
* Applies to /v1/messages, /v1/chat/completions, and /v1/responses.
|
|
4072
|
+
*/
|
|
4073
|
+
const MAX_RESPONSE_BODY_BYTES = 10 * 1024 * 1024;
|
|
4074
|
+
/**
|
|
4075
|
+
* Read a Response body with a hard byte cap, then parse as JSON.
|
|
4076
|
+
*
|
|
4077
|
+
* Falls back to the fast path (response.json()) when Content-Length is
|
|
4078
|
+
* present and within the cap, avoiding the streaming-reader overhead for
|
|
4079
|
+
* the vast majority of normal responses.
|
|
4080
|
+
*
|
|
4081
|
+
* When the cap is hit:
|
|
4082
|
+
* - the reader is cancelled to release the upstream socket
|
|
4083
|
+
* - a structured Anthropic-format error is returned to the caller
|
|
4084
|
+
* (the caller wraps it in c.json(), not throws — the client gets a
|
|
4085
|
+
* clean 413 error, not an unhandled-rejection crash)
|
|
4086
|
+
*
|
|
4087
|
+
* Returns `{ ok: true, value }` on success or `{ ok: false, errorResponse, status }`
|
|
4088
|
+
* on cap exceeded.
|
|
4089
|
+
*/
|
|
4090
|
+
async function readResponseBodyCapped(response, routePath, capBytes = MAX_RESPONSE_BODY_BYTES) {
|
|
4091
|
+
const contentLengthHeader = response.headers.get("content-length");
|
|
4092
|
+
const contentLength = contentLengthHeader ? parseInt(contentLengthHeader, 10) : NaN;
|
|
4093
|
+
if (!isNaN(contentLength) && contentLength <= capBytes) return {
|
|
4094
|
+
ok: true,
|
|
4095
|
+
value: await parseJsonOrDiagnose(response, routePath)
|
|
4096
|
+
};
|
|
4097
|
+
const reader = response.body?.getReader();
|
|
4098
|
+
if (!reader) return {
|
|
4099
|
+
ok: true,
|
|
4100
|
+
value: await parseJsonOrDiagnose(response, routePath)
|
|
4101
|
+
};
|
|
4102
|
+
const chunks = [];
|
|
4103
|
+
let totalBytes = 0;
|
|
4104
|
+
let capped = false;
|
|
4105
|
+
try {
|
|
4106
|
+
while (true) {
|
|
4107
|
+
const { done, value } = await reader.read();
|
|
4108
|
+
if (done) break;
|
|
4109
|
+
if (!value) continue;
|
|
4110
|
+
totalBytes += value.byteLength;
|
|
4111
|
+
if (totalBytes > capBytes) {
|
|
4112
|
+
capped = true;
|
|
4113
|
+
try {
|
|
4114
|
+
await reader.cancel("size_cap");
|
|
4115
|
+
} catch {}
|
|
4116
|
+
break;
|
|
4117
|
+
}
|
|
4118
|
+
chunks.push(value);
|
|
4119
|
+
}
|
|
4120
|
+
} catch (err) {
|
|
4121
|
+
if (!capped) consola.warn(`readResponseBodyCapped: read error at ${routePath}:`, err);
|
|
4122
|
+
}
|
|
4123
|
+
if (capped) {
|
|
4124
|
+
consola.warn(`Non-streaming upstream response at ${routePath} exceeded ${capBytes} bytes (10 MiB cap); dropping body to prevent OOM. Check upstream health.`);
|
|
4125
|
+
return {
|
|
4126
|
+
ok: false,
|
|
4127
|
+
status: 502,
|
|
4128
|
+
errorResponse: {
|
|
4129
|
+
type: "error",
|
|
4130
|
+
error: {
|
|
4131
|
+
type: "api_error",
|
|
4132
|
+
message: `Upstream response body exceeded the 10 MiB size cap for non-streaming ${routePath}. The upstream may be misbehaving. Try enabling streaming (stream: true) which handles large responses chunk-by-chunk.`
|
|
4133
|
+
}
|
|
4134
|
+
}
|
|
4135
|
+
};
|
|
4136
|
+
}
|
|
4137
|
+
const merged = new Uint8Array(totalBytes);
|
|
4138
|
+
let offset = 0;
|
|
4139
|
+
for (const chunk of chunks) {
|
|
4140
|
+
merged.set(chunk, offset);
|
|
4141
|
+
offset += chunk.byteLength;
|
|
4142
|
+
}
|
|
4143
|
+
const text = new TextDecoder().decode(merged);
|
|
4144
|
+
try {
|
|
4145
|
+
return {
|
|
4146
|
+
ok: true,
|
|
4147
|
+
value: JSON.parse(text)
|
|
4148
|
+
};
|
|
4149
|
+
} catch (err) {
|
|
4150
|
+
const preview = text.slice(0, 200);
|
|
4151
|
+
const contentType = response.headers.get("content-type") ?? "(none)";
|
|
4152
|
+
consola.error(`Upstream JSON parse failed at ${routePath}: status=${response.status} content-type="${contentType}" body[0..200]=${JSON.stringify(preview)}`);
|
|
4153
|
+
throw err;
|
|
4154
|
+
}
|
|
4155
|
+
}
|
|
4156
|
+
|
|
4157
|
+
//#endregion
|
|
4158
|
+
//#region src/services/copilot/create-chat-completions.ts
|
|
4159
|
+
const createChatCompletions = async (payload, modelHeaders, callerSignal) => {
|
|
4160
|
+
if (!state.copilotToken) throw new Error("Copilot token not found");
|
|
4161
|
+
const enableVision = payload.messages.some((x) => typeof x.content !== "string" && x.content?.some((x$1) => x$1.type === "image_url"));
|
|
4162
|
+
const isAgentCall = payload.messages.some((msg) => ["assistant", "tool"].includes(msg.role));
|
|
4163
|
+
const url = `${copilotBaseUrl(state)}/chat/completions`;
|
|
4164
|
+
const doFetch = () => {
|
|
4165
|
+
const fetchInit = {
|
|
4166
|
+
method: "POST",
|
|
4167
|
+
headers: {
|
|
4168
|
+
...copilotHeaders(state, enableVision),
|
|
4169
|
+
...modelHeaders,
|
|
4170
|
+
"X-Initiator": isAgentCall ? "agent" : "user"
|
|
4171
|
+
},
|
|
4172
|
+
body: JSON.stringify(payload)
|
|
4173
|
+
};
|
|
4174
|
+
const signals = [];
|
|
4175
|
+
if (UPSTREAM_FETCH_TIMEOUT_MS > 0) signals.push(AbortSignal.timeout(UPSTREAM_FETCH_TIMEOUT_MS));
|
|
4176
|
+
if (callerSignal) signals.push(callerSignal);
|
|
4177
|
+
if (signals.length === 1) fetchInit.signal = signals[0];
|
|
4178
|
+
else if (signals.length > 1) fetchInit.signal = AbortSignal.any(signals);
|
|
4179
|
+
return fetch(url, fetchInit);
|
|
4180
|
+
};
|
|
4181
|
+
const response = await tryRefreshAndRetry(doFetch, "/chat/completions");
|
|
4182
|
+
if (!response.ok) {
|
|
4183
|
+
let errorBody = "";
|
|
4184
|
+
try {
|
|
4185
|
+
errorBody = await response.text();
|
|
4186
|
+
} catch {
|
|
4187
|
+
errorBody = "(could not read error body)";
|
|
4188
|
+
}
|
|
4189
|
+
const claudeModels = state.models?.data.filter((m) => m.id.startsWith("claude")).map((m) => m.id).join(", ") ?? "(models not loaded)";
|
|
4190
|
+
consola.error(`Copilot rejected model "${payload.model}": ${response.status} ${errorBody} (available Claude models: ${claudeModels})`);
|
|
4191
|
+
throw new HTTPError("Failed to create chat completions", new Response(errorBody, {
|
|
4192
|
+
status: response.status,
|
|
4193
|
+
statusText: response.statusText,
|
|
4194
|
+
headers: response.headers
|
|
4195
|
+
}));
|
|
4196
|
+
}
|
|
4197
|
+
if (payload.stream) return events(response);
|
|
4198
|
+
const cappedResult = await readResponseBodyCapped(response, "/v1/chat/completions", MAX_RESPONSE_BODY_BYTES);
|
|
4199
|
+
if (!cappedResult.ok) throw new HTTPError("Upstream /v1/chat/completions response exceeded 10 MiB size cap", new Response(JSON.stringify(cappedResult.errorResponse), {
|
|
4200
|
+
status: cappedResult.status,
|
|
4201
|
+
headers: { "content-type": "application/json" }
|
|
4202
|
+
}));
|
|
4203
|
+
return cappedResult.value;
|
|
4204
|
+
};
|
|
4205
|
+
|
|
4206
|
+
//#endregion
|
|
4207
|
+
//#region src/lib/browser-mcp/compressor.ts
|
|
4208
|
+
/**
|
|
4209
|
+
* Static fallback chain. Order is preference: faster + multimodal +
|
|
4210
|
+
* cheaper at the top. All three support `tool_calls` and image input
|
|
4211
|
+
* (the latter is required for Phase D visual fallback).
|
|
4212
|
+
*/
|
|
4213
|
+
const COMPRESSOR_FALLBACK_CHAIN = [
|
|
4214
|
+
"gemini-3.5-flash",
|
|
4215
|
+
"gpt-5.4-mini",
|
|
4216
|
+
"claude-haiku-4-5"
|
|
4217
|
+
];
|
|
4218
|
+
let selectedBackend;
|
|
4219
|
+
/**
|
|
4220
|
+
* Walk the fallback chain against the live Copilot catalog. Returns
|
|
4221
|
+
* the first id present AND advertising `tool_calls` support, or
|
|
4222
|
+
* undefined when none match. Cached after first successful selection
|
|
4223
|
+
* so all compressor calls in a session hit the same backend; clear
|
|
4224
|
+
* the cache by calling `__resetCompressorBackendForTests`.
|
|
4225
|
+
*/
|
|
4226
|
+
function pickBackendFromCatalog() {
|
|
4227
|
+
if (selectedBackend) return selectedBackend;
|
|
4228
|
+
const models$1 = state.models?.data;
|
|
4229
|
+
if (!models$1) return void 0;
|
|
4230
|
+
for (const candidate of COMPRESSOR_FALLBACK_CHAIN) {
|
|
4231
|
+
const found = models$1.find((m) => m.id === candidate);
|
|
4232
|
+
if (!found) continue;
|
|
4233
|
+
if (found.capabilities?.supports?.tool_calls !== true) continue;
|
|
4234
|
+
selectedBackend = candidate;
|
|
4235
|
+
consola.info(`[browser-mcp] compressor backend: ${candidate}`);
|
|
4236
|
+
return candidate;
|
|
4237
|
+
}
|
|
4238
|
+
}
|
|
4239
|
+
/**
|
|
4240
|
+
* True iff any compressor backend is available. Mirrors
|
|
4241
|
+
* `workerToolsEnabled()` / `standInToolEnabled()` — used by the
|
|
4242
|
+
* compound-tool capability gate so `browser_find` / `browser_act
|
|
4243
|
+
* (intent mode)` / `browser_extract` are dropped from `tools/list`
|
|
4244
|
+
* AND fail `tools/call` with -32601 when no backend is reachable.
|
|
4245
|
+
*/
|
|
4246
|
+
function compressorAvailable() {
|
|
4247
|
+
return pickBackendFromCatalog() !== void 0;
|
|
4248
|
+
}
|
|
4249
|
+
/**
|
|
4250
|
+
* One round-trip to the picked backend. Wraps slot acquisition, payload
|
|
4251
|
+
* assembly, and JSON parsing. Forces structured output via tool-calling:
|
|
4252
|
+
* each caller supplies a tool schema and we set `tool_choice` so the
|
|
4253
|
+
* model has to emit a tool call whose `arguments` field is a
|
|
4254
|
+
* shape-validated JSON string. This eliminates a whole class of bug
|
|
4255
|
+
* where models wrap their JSON in markdown code fences despite
|
|
4256
|
+
* `response_format: { type: "json_object" }`. As a belt-and-suspenders
|
|
4257
|
+
* fallback for backends that ignore `tool_choice`, we ALSO accept
|
|
4258
|
+
* free-form `message.content` and strip a leading / trailing ```` ``` ````
|
|
4259
|
+
* code fence before parsing.
|
|
4260
|
+
*/
|
|
4261
|
+
async function callCompressor(systemPrompt, userMessage, tool, signal) {
|
|
4262
|
+
const model = pickBackendFromCatalog();
|
|
4263
|
+
if (!model) throw new Error(`browser-mcp compressor: no backend available in catalog. Checked: ${COMPRESSOR_FALLBACK_CHAIN.join(", ")}`);
|
|
4264
|
+
const release = acquireInFlightSlot();
|
|
4265
|
+
if (!release) throw new Error("browser-mcp compressor: inflight slot saturated (cap 8); try again shortly");
|
|
4266
|
+
try {
|
|
4267
|
+
const msg = ((await createChatCompletions({
|
|
4268
|
+
model,
|
|
4269
|
+
stream: false,
|
|
4270
|
+
messages: [{
|
|
4271
|
+
role: "system",
|
|
4272
|
+
content: systemPrompt
|
|
4273
|
+
}, {
|
|
4274
|
+
role: "user",
|
|
4275
|
+
content: userMessage
|
|
4276
|
+
}],
|
|
4277
|
+
tools: [{
|
|
4278
|
+
type: "function",
|
|
4279
|
+
function: {
|
|
4280
|
+
name: tool.name,
|
|
4281
|
+
description: tool.description,
|
|
4282
|
+
parameters: tool.parameters
|
|
4283
|
+
}
|
|
4284
|
+
}],
|
|
4285
|
+
tool_choice: {
|
|
4286
|
+
type: "function",
|
|
4287
|
+
function: { name: tool.name }
|
|
4288
|
+
}
|
|
4289
|
+
}, void 0, signal)).choices?.[0])?.message;
|
|
4290
|
+
const toolArgs = msg?.tool_calls?.[0]?.function?.arguments;
|
|
4291
|
+
if (typeof toolArgs === "string" && toolArgs.length > 0) return JSON.parse(toolArgs);
|
|
4292
|
+
const text = typeof msg?.content === "string" ? msg.content : "";
|
|
4293
|
+
if (text.length === 0) throw new Error("browser-mcp compressor: empty response from backend (no tool_calls and no content)");
|
|
4294
|
+
return JSON.parse(stripCodeFence(text));
|
|
4295
|
+
} finally {
|
|
4296
|
+
release();
|
|
4297
|
+
}
|
|
4298
|
+
}
|
|
4299
|
+
/**
|
|
4300
|
+
* Public re-export of `callCompressor` for sibling modules that need
|
|
4301
|
+
* the same forced-tool-calling pipeline (slot acquisition, fallback-
|
|
4302
|
+
* chain backend, code-fence stripping). Used by `observe.ts` to drive
|
|
4303
|
+
* the natural-language describer through the same backend the matcher
|
|
4304
|
+
* cascade escalates to, and by `decompose-planner.ts` for the
|
|
4305
|
+
* fast-model compound-step replanner.
|
|
4306
|
+
*
|
|
4307
|
+
* Kept as a thin wrapper rather than re-exporting `callCompressor`
|
|
4308
|
+
* directly so the underlying function can change signature without
|
|
4309
|
+
* breaking the public surface.
|
|
4310
|
+
*/
|
|
4311
|
+
async function callCompressorPublic(systemPrompt, userMessage, tool, signal) {
|
|
4312
|
+
return callCompressor(systemPrompt, userMessage, tool, signal);
|
|
4313
|
+
}
|
|
4314
|
+
/**
|
|
4315
|
+
* Strip a single leading / trailing ``` (or ```json) code fence from a
|
|
4316
|
+
* model's free-form text reply so JSON.parse works. Idempotent on
|
|
4317
|
+
* fence-free input. Defensive against the failure mode caught in PR #55
|
|
4318
|
+
* smoke-test: some models wrap JSON output in ```json ... ``` even
|
|
4319
|
+
* with response_format: { type: "json_object" } set.
|
|
4320
|
+
*/
|
|
4321
|
+
function stripCodeFence(text) {
|
|
4322
|
+
const t = text.trim();
|
|
4323
|
+
const fenced = /^```(?:json)?\s*\n?([\s\S]*?)\n?```$/.exec(t);
|
|
4324
|
+
if (fenced) return fenced[1].trim();
|
|
4325
|
+
return t;
|
|
4326
|
+
}
|
|
4327
|
+
/**
|
|
4328
|
+
* Pick a single element matching the natural-language intent. Used by
|
|
4329
|
+
* `browser_act` in intent mode. Internally delegates the matching step
|
|
4330
|
+
* to `pickMatchingElements` (the same picker `browser_find` uses) so
|
|
4331
|
+
* `find` and `act` can't disagree on the same intent, then infers the
|
|
4332
|
+
* action verb deterministically from the picked element's role and
|
|
4333
|
+
* whether the intent supplied a value. Single source of truth for
|
|
4334
|
+
* element matching.
|
|
4335
|
+
*
|
|
4336
|
+
* Phase 2 short-circuits the common case through the deterministic
|
|
4337
|
+
* matcher cascade in `./matcher.ts` — pure-sync, no LLM round-trip,
|
|
4338
|
+
* <5ms on a 200-element snapshot. Only when the cascade returns
|
|
4339
|
+
* `source: "escalate"` (0 candidates or >1 ambiguous candidates) do
|
|
4340
|
+
* we fall through to the existing fast-model `pickMatchingElements`
|
|
4341
|
+
* path. When we DO escalate, we pass the cascade's pre-filtered
|
|
4342
|
+
* top-K shortlist along so the fast model sees ~8 candidates instead
|
|
4343
|
+
* of the full 200-element snapshot — 3-5× token-cost reduction even
|
|
4344
|
+
* on misses.
|
|
4345
|
+
*
|
|
4346
|
+
* Returns ref="" + confidence=0 when no element matches — caller
|
|
4347
|
+
* should escalate to visual fallback (when `visualSurfaces` is
|
|
4348
|
+
* present) or surface the miss to the lead model.
|
|
4349
|
+
*/
|
|
4350
|
+
async function pickElement(snapshot, intent, signal, value) {
|
|
4351
|
+
const det = deterministicResolve(snapshot, parseIntent(intent), value);
|
|
4352
|
+
if (det.source !== "escalate" && det.ref !== "") {
|
|
4353
|
+
const out$1 = {
|
|
4354
|
+
ref: det.ref,
|
|
4355
|
+
action: det.action,
|
|
4356
|
+
confidence: det.confidence
|
|
4357
|
+
};
|
|
4358
|
+
if (det.value !== void 0) out$1.value = det.value;
|
|
4359
|
+
return out$1;
|
|
4360
|
+
}
|
|
4361
|
+
const matches = await pickMatchingElements(snapshot, intent, signal, det.candidates);
|
|
4362
|
+
if (matches.length === 0) return {
|
|
4363
|
+
ref: "",
|
|
4364
|
+
action: "click",
|
|
4365
|
+
confidence: 0
|
|
4366
|
+
};
|
|
4367
|
+
const top = matches[0];
|
|
4368
|
+
const el = snapshot.elements.find((e) => e.ref === top.ref);
|
|
4369
|
+
if (!el) return {
|
|
4370
|
+
ref: "",
|
|
4371
|
+
action: "click",
|
|
4372
|
+
confidence: 0
|
|
4373
|
+
};
|
|
4374
|
+
const action = inferAction(el.role, intent, value);
|
|
4375
|
+
const out = {
|
|
4376
|
+
ref: top.ref,
|
|
4377
|
+
action,
|
|
4378
|
+
confidence: .8
|
|
4379
|
+
};
|
|
4380
|
+
if (value !== void 0 && (action === "fill" || action === "type" || action === "select")) out.value = value;
|
|
4381
|
+
return out;
|
|
4382
|
+
}
|
|
4383
|
+
/**
|
|
4384
|
+
* Deterministic action picker. Given an element role + the intent text
|
|
4385
|
+
* + an optional value, decide which primitive action to dispatch.
|
|
4386
|
+
* Pulled out of the compressor's responsibility so the compressor only
|
|
4387
|
+
* has to match elements (one prompt, one schema), and action selection
|
|
4388
|
+
* is a few small rules a future contributor can read at a glance.
|
|
4389
|
+
*/
|
|
4390
|
+
function inferAction(role, intent, value) {
|
|
4391
|
+
const intentLower = intent.toLowerCase();
|
|
4392
|
+
const r = role.toLowerCase();
|
|
4393
|
+
if (/\bscroll\b/.test(intentLower) || /scroll[ -]?into[ -]?view/.test(intentLower)) return "scroll_into_view";
|
|
4394
|
+
if (r === "select" || r === "combobox") return "select";
|
|
4395
|
+
if (r === "textarea" || r === "input" || r === "textbox" || r === "searchbox" || r === "spinbutton") {
|
|
4396
|
+
if (/\btype\b/.test(intentLower) && value !== void 0) return "type";
|
|
4397
|
+
return "fill";
|
|
4398
|
+
}
|
|
4399
|
+
return "click";
|
|
4400
|
+
}
|
|
4401
|
+
const FIND_ELEMENTS_SYSTEM = `You match a natural-language intent to elements from a browser page snapshot.
|
|
4402
|
+
|
|
4403
|
+
Snapshot elements look like: {ref: "e42", role: "button", name: "Sign in"}.
|
|
4404
|
+
|
|
4405
|
+
Call the find_elements tool with up to 5 best matches ordered by relevance.`;
|
|
4406
|
+
const FIND_ELEMENTS_TOOL = {
|
|
4407
|
+
name: "find_elements",
|
|
4408
|
+
description: "Report ranked element matches for the intent.",
|
|
4409
|
+
parameters: {
|
|
4410
|
+
type: "object",
|
|
4411
|
+
required: ["matches"],
|
|
4412
|
+
additionalProperties: false,
|
|
4413
|
+
properties: { matches: {
|
|
4414
|
+
type: "array",
|
|
4415
|
+
maxItems: 5,
|
|
4416
|
+
items: {
|
|
4417
|
+
type: "object",
|
|
4418
|
+
required: ["ref", "reason"],
|
|
4419
|
+
additionalProperties: false,
|
|
4420
|
+
properties: {
|
|
4421
|
+
ref: { type: "string" },
|
|
4422
|
+
reason: { type: "string" }
|
|
4423
|
+
}
|
|
4424
|
+
}
|
|
4425
|
+
} }
|
|
4426
|
+
}
|
|
4427
|
+
};
|
|
4428
|
+
/**
|
|
4429
|
+
* Return up to 5 candidate matches for an intent. Used by
|
|
4430
|
+
* `browser_find` — the lead model gets a small ranked list rather than
|
|
4431
|
+
* a full element dump. Empty array when nothing matches.
|
|
4432
|
+
*
|
|
4433
|
+
* Phase 2 short-circuits via the deterministic matcher cascade when
|
|
4434
|
+
* possible. When the cascade finds a single confident match, we
|
|
4435
|
+
* synthesize a one-item `FindMatch[]` and skip the fast-model
|
|
4436
|
+
* round-trip. When the cascade's `candidates` shortlist is passed in
|
|
4437
|
+
* by `pickElement` (escalation path), we trim the snapshot to just
|
|
4438
|
+
* those refs before sending to the fast model — keeps tokens down on
|
|
4439
|
+
* misses too.
|
|
4440
|
+
*/
|
|
4441
|
+
async function pickMatchingElements(snapshot, intent, signal, shortlist) {
|
|
4442
|
+
if (!shortlist) {
|
|
4443
|
+
const det = deterministicResolve(snapshot, parseIntent(intent));
|
|
4444
|
+
if (det.source !== "escalate" && det.ref !== "") {
|
|
4445
|
+
if (snapshot.elements.find((e) => e.ref === det.ref)) return [{
|
|
4446
|
+
ref: det.ref,
|
|
4447
|
+
reason: `deterministic ${det.source}: ${det.reason}`
|
|
4448
|
+
}];
|
|
4449
|
+
}
|
|
4450
|
+
shortlist = det.candidates;
|
|
4451
|
+
}
|
|
4452
|
+
const refSet = shortlist && shortlist.length > 0 ? new Set(shortlist.map((s) => s.ref)) : void 0;
|
|
4453
|
+
const trimmed = (refSet ? snapshot.elements.filter((e) => refSet.has(e.ref)) : snapshot.elements).map((e) => ({
|
|
4454
|
+
ref: e.ref,
|
|
4455
|
+
role: e.role,
|
|
4456
|
+
name: e.name
|
|
4457
|
+
}));
|
|
4458
|
+
const raw = await callCompressor(FIND_ELEMENTS_SYSTEM, JSON.stringify({
|
|
4459
|
+
intent,
|
|
4460
|
+
elements: trimmed
|
|
4461
|
+
}), FIND_ELEMENTS_TOOL, signal);
|
|
4462
|
+
if (!raw || typeof raw !== "object") return [];
|
|
4463
|
+
const matches = raw.matches;
|
|
4464
|
+
if (!Array.isArray(matches)) return [];
|
|
4465
|
+
const out = [];
|
|
4466
|
+
for (const m of matches.slice(0, 5)) {
|
|
4467
|
+
if (!m || typeof m !== "object") continue;
|
|
4468
|
+
const ref = m.ref;
|
|
4469
|
+
const reason = m.reason;
|
|
4470
|
+
if (typeof ref === "string" && ref.length > 0) out.push({
|
|
4471
|
+
ref,
|
|
4472
|
+
reason: typeof reason === "string" ? reason : ""
|
|
4473
|
+
});
|
|
4474
|
+
}
|
|
4475
|
+
return out;
|
|
4476
|
+
}
|
|
4477
|
+
const EXTRACT_SYSTEM = `You extract structured data from a browser page snapshot into a JSON object matching the result schema you've been given.
|
|
4478
|
+
|
|
4479
|
+
Use the snapshot's text + element list as your source. Be faithful to what's visible; do not invent values.
|
|
4480
|
+
|
|
4481
|
+
Call the extract_result tool with your answer in the result field. The result field's schema is the caller's exact requested shape — fill it completely. If a field cannot be determined from the snapshot, omit it (when optional) or use a sensible empty value (when required).`;
|
|
4482
|
+
/**
|
|
4483
|
+
* Lightweight sanity check on a caller-supplied JSON Schema: the
|
|
4484
|
+
* schema must be a non-null object AND declare at least one of a
|
|
4485
|
+
* recognized `type` value, `properties`, `items`, `$ref`, or a
|
|
4486
|
+
* compound combinator (`oneOf` / `anyOf` / `allOf`). This catches the
|
|
4487
|
+
* two failure modes the prior smoke test surfaced — empty `{}` and
|
|
4488
|
+
* structurally-malformed schemas like `{type: "nonsense"}` — both of
|
|
4489
|
+
* which the permissive upstream silently accepts and the model then
|
|
4490
|
+
* fills with a useless primitive.
|
|
4491
|
+
*
|
|
4492
|
+
* Returns an error message string when the schema fails the check,
|
|
4493
|
+
* or undefined when the schema looks plausible.
|
|
4494
|
+
*/
|
|
4495
|
+
function validateExtractSchema(schema) {
|
|
4496
|
+
if (!schema || typeof schema !== "object" || Array.isArray(schema)) return "schema must be a non-null JSON object";
|
|
4497
|
+
const obj = schema;
|
|
4498
|
+
const validTypes = new Set([
|
|
4499
|
+
"object",
|
|
4500
|
+
"array",
|
|
4501
|
+
"string",
|
|
4502
|
+
"number",
|
|
4503
|
+
"integer",
|
|
4504
|
+
"boolean",
|
|
4505
|
+
"null"
|
|
4506
|
+
]);
|
|
4507
|
+
const hasValidType = typeof obj.type === "string" && validTypes.has(obj.type);
|
|
4508
|
+
const hasShape = "properties" in obj || "items" in obj || "$ref" in obj || "oneOf" in obj || "anyOf" in obj || "allOf" in obj;
|
|
4509
|
+
if (!hasValidType && !hasShape) return `schema must declare a recognized type (one of ${Array.from(validTypes).join(", ")}) OR have properties / items / $ref / oneOf / anyOf / allOf`;
|
|
4510
|
+
if ("type" in obj && !hasValidType) return `schema 'type' field must be one of: ${Array.from(validTypes).join(", ")}`;
|
|
4511
|
+
}
|
|
4512
|
+
/**
|
|
4513
|
+
* Structured extraction. The caller's JSON schema is injected directly
|
|
4514
|
+
* into the extract_result tool's `result` parameter so the model's
|
|
4515
|
+
* tool-call mechanism enforces shape — the model can't satisfy the
|
|
4516
|
+
* call without producing data of the requested shape.
|
|
4517
|
+
*
|
|
4518
|
+
* Schema is pre-validated by `validateExtractSchema` — bad schemas
|
|
4519
|
+
* fail loud with a clear `SchemaValidationError` instead of slipping
|
|
4520
|
+
* through to the upstream (which is permissive enough to accept
|
|
4521
|
+
* garbage and let the model return a useless primitive).
|
|
4522
|
+
*
|
|
4523
|
+
* Post-validation: if the model's `result` ended up as a primitive
|
|
4524
|
+
* (string / number / boolean) when the schema declared object / array,
|
|
4525
|
+
* surface the shape mismatch — the model returned the wrong type and
|
|
4526
|
+
* the caller should know rather than receive a confusing value.
|
|
4527
|
+
*/
|
|
4528
|
+
var SchemaValidationError = class extends Error {
|
|
4529
|
+
constructor(message) {
|
|
4530
|
+
super(message);
|
|
4531
|
+
this.name = "SchemaValidationError";
|
|
4532
|
+
}
|
|
4533
|
+
};
|
|
4534
|
+
var ResultShapeError = class extends Error {
|
|
4535
|
+
constructor(message) {
|
|
4536
|
+
super(message);
|
|
4537
|
+
this.name = "ResultShapeError";
|
|
4538
|
+
}
|
|
4539
|
+
};
|
|
4540
|
+
async function extractStructured(snapshot, schema, instruction, signal) {
|
|
4541
|
+
const schemaError = validateExtractSchema(schema);
|
|
4542
|
+
if (schemaError) throw new SchemaValidationError(schemaError);
|
|
4543
|
+
const raw = await callCompressor(EXTRACT_SYSTEM, JSON.stringify({
|
|
4544
|
+
instruction,
|
|
4545
|
+
snapshot: {
|
|
4546
|
+
text: snapshot.text,
|
|
4547
|
+
elements: snapshot.elements
|
|
4548
|
+
}
|
|
4549
|
+
}), {
|
|
4550
|
+
name: "extract_result",
|
|
4551
|
+
description: "Report the extracted object. The result field's schema is the caller's requested shape; fill it completely.",
|
|
4552
|
+
parameters: {
|
|
4553
|
+
type: "object",
|
|
4554
|
+
required: ["result"],
|
|
4555
|
+
additionalProperties: false,
|
|
4556
|
+
properties: { result: schema }
|
|
4557
|
+
}
|
|
4558
|
+
}, signal);
|
|
4559
|
+
const unwrapped = raw && typeof raw === "object" && "result" in raw ? raw.result : raw;
|
|
4560
|
+
const declaredType = schema.type;
|
|
4561
|
+
if (declaredType === "object" && (typeof unwrapped !== "object" || unwrapped === null || Array.isArray(unwrapped))) throw new ResultShapeError(`schema declared type "object" but model returned ${describeType(unwrapped)}`);
|
|
4562
|
+
if (declaredType === "array" && !Array.isArray(unwrapped)) throw new ResultShapeError(`schema declared type "array" but model returned ${describeType(unwrapped)}`);
|
|
4563
|
+
return unwrapped;
|
|
4564
|
+
}
|
|
4565
|
+
function describeType(v) {
|
|
4566
|
+
if (v === null) return "null";
|
|
4567
|
+
if (Array.isArray(v)) return "array";
|
|
4568
|
+
return typeof v;
|
|
4569
|
+
}
|
|
4570
|
+
const PICK_VISUAL_SYSTEM = `You're given a browser screenshot, a natural-language intent, and a list of canvas / svg regions in CSS-pixel coordinates.
|
|
4571
|
+
|
|
4572
|
+
Find the pixel coordinates in the screenshot where the intent points. Coordinates are CSS pixels (origin top-left of viewport).
|
|
4573
|
+
|
|
4574
|
+
Call the pick_visual tool with the coordinates. If no clear target is visible, call with x=0, y=0, confidence=0.`;
|
|
4575
|
+
const PICK_VISUAL_TOOL = {
|
|
4576
|
+
name: "pick_visual",
|
|
4577
|
+
description: "Report the pixel coordinates the intent points at.",
|
|
4578
|
+
parameters: {
|
|
4579
|
+
type: "object",
|
|
4580
|
+
required: [
|
|
4581
|
+
"x",
|
|
4582
|
+
"y",
|
|
4583
|
+
"confidence",
|
|
4584
|
+
"reason"
|
|
4585
|
+
],
|
|
4586
|
+
additionalProperties: false,
|
|
4587
|
+
properties: {
|
|
4588
|
+
x: { type: "number" },
|
|
4589
|
+
y: { type: "number" },
|
|
4590
|
+
confidence: { type: "number" },
|
|
4591
|
+
reason: { type: "string" }
|
|
4592
|
+
}
|
|
4593
|
+
}
|
|
4594
|
+
};
|
|
4595
|
+
/**
|
|
4596
|
+
* Visual fallback for Phase D — used when text-based `pickElement`
|
|
4597
|
+
* misses AND the snapshot reported `visualSurfaces` in the viewport
|
|
4598
|
+
* (a canvas / svg blackhole the a11y tree can't see into). Takes the
|
|
4599
|
+
* base64-encoded screenshot, the original intent, and the surfaces
|
|
4600
|
+
* list; returns CSS-pixel coordinates the caller dispatches to
|
|
4601
|
+
* `browser_mouse {x, y}`.
|
|
4602
|
+
*/
|
|
4603
|
+
async function pickElementVisual(screenshotB64, contentType, intent, visualSurfaces, signal) {
|
|
4604
|
+
const raw = await callCompressor(PICK_VISUAL_SYSTEM, [{
|
|
4605
|
+
type: "text",
|
|
4606
|
+
text: JSON.stringify({
|
|
4607
|
+
intent,
|
|
4608
|
+
visual_surfaces: visualSurfaces
|
|
4609
|
+
})
|
|
4610
|
+
}, {
|
|
4611
|
+
type: "image_url",
|
|
4612
|
+
image_url: { url: `data:${contentType};base64,${screenshotB64}` }
|
|
4613
|
+
}], PICK_VISUAL_TOOL, signal);
|
|
4614
|
+
if (!raw || typeof raw !== "object") return {
|
|
4615
|
+
x: 0,
|
|
4616
|
+
y: 0,
|
|
4617
|
+
confidence: 0,
|
|
4618
|
+
reason: "empty backend response"
|
|
4619
|
+
};
|
|
4620
|
+
const obj = raw;
|
|
4621
|
+
return {
|
|
4622
|
+
x: typeof obj.x === "number" ? Math.round(obj.x) : 0,
|
|
4623
|
+
y: typeof obj.y === "number" ? Math.round(obj.y) : 0,
|
|
4624
|
+
confidence: typeof obj.confidence === "number" ? Math.max(0, Math.min(1, obj.confidence)) : 0,
|
|
4625
|
+
reason: typeof obj.reason === "string" ? obj.reason : ""
|
|
4626
|
+
};
|
|
4627
|
+
}
|
|
4628
|
+
|
|
4629
|
+
//#endregion
|
|
4630
|
+
//#region src/lib/browser-mcp/decompose.ts
|
|
4631
|
+
const LOGIN_RE = /^log[ -]?in (?:to .+? )?with\s+([^\s/]+)\s*\/\s*(.+?)\s*$/i;
|
|
4632
|
+
const SEARCH_CLICK_RE = /^search\s+(?:for\s+)?(.+?)\s+and\s+click\s+(?:the\s+)?first\s+result\s*$/i;
|
|
4633
|
+
const CONJUNCTION_SPLIT_RE = /\s*(?:\s+and\s+then\s+|\s+then\s+|\s*;\s*|\s*,\s+and\s+)\s*/i;
|
|
4634
|
+
/**
|
|
4635
|
+
* Decompose a natural-language intent into atomic steps.
|
|
4636
|
+
*
|
|
4637
|
+
* The fallback path returns a single-step `[{intent: rawIntent}]` —
|
|
4638
|
+
* `browser_act` behaves identically to today's single-step dispatch
|
|
4639
|
+
* when no template matches.
|
|
4640
|
+
*/
|
|
4641
|
+
function decompose(intent, value) {
|
|
4642
|
+
const raw = String(intent ?? "").trim();
|
|
4643
|
+
if (!raw) return {
|
|
4644
|
+
steps: [{
|
|
4645
|
+
intent: "",
|
|
4646
|
+
...value !== void 0 ? { value } : {}
|
|
4647
|
+
}],
|
|
4648
|
+
template: "fallback"
|
|
4649
|
+
};
|
|
4650
|
+
const loginMatch = LOGIN_RE.exec(raw);
|
|
4651
|
+
if (loginMatch) {
|
|
4652
|
+
const user = loginMatch[1].trim();
|
|
4653
|
+
const pass = loginMatch[2].trim();
|
|
4654
|
+
return {
|
|
4655
|
+
steps: [
|
|
4656
|
+
{
|
|
4657
|
+
intent: "the email or username input",
|
|
4658
|
+
value: user
|
|
4659
|
+
},
|
|
4660
|
+
{
|
|
4661
|
+
intent: "the password input",
|
|
4662
|
+
value: pass
|
|
4663
|
+
},
|
|
4664
|
+
{ intent: "the Sign in or Log in button" }
|
|
4665
|
+
],
|
|
4666
|
+
template: "login",
|
|
4667
|
+
successSummary: "logged in"
|
|
4668
|
+
};
|
|
4669
|
+
}
|
|
4670
|
+
const searchMatch = SEARCH_CLICK_RE.exec(raw);
|
|
4671
|
+
if (searchMatch) {
|
|
4672
|
+
const query = searchMatch[1].trim();
|
|
4673
|
+
return {
|
|
4674
|
+
steps: [
|
|
4675
|
+
{
|
|
4676
|
+
intent: "the search input",
|
|
4677
|
+
value: query
|
|
4678
|
+
},
|
|
4679
|
+
{ intent: "the search button or submit" },
|
|
4680
|
+
{ intent: "the first search result" }
|
|
4681
|
+
],
|
|
4682
|
+
template: "search_click",
|
|
4683
|
+
successSummary: `searched for "${query}" and opened first result`
|
|
4684
|
+
};
|
|
4685
|
+
}
|
|
4686
|
+
if (CONJUNCTION_SPLIT_RE.test(raw)) {
|
|
4687
|
+
const parts = raw.split(CONJUNCTION_SPLIT_RE).map((p) => p.trim()).filter(Boolean);
|
|
4688
|
+
if (parts.length >= 2) return {
|
|
4689
|
+
steps: parts.map((p, i) => {
|
|
4690
|
+
if (i === 0 && value !== void 0) return {
|
|
4691
|
+
intent: p,
|
|
4692
|
+
value
|
|
4693
|
+
};
|
|
4694
|
+
return { intent: p };
|
|
4695
|
+
}),
|
|
4696
|
+
template: "conjunction"
|
|
4697
|
+
};
|
|
4698
|
+
}
|
|
4699
|
+
return {
|
|
4700
|
+
steps: [{
|
|
4701
|
+
intent: raw,
|
|
4702
|
+
...value !== void 0 ? { value } : {}
|
|
4703
|
+
}],
|
|
4704
|
+
template: "fallback"
|
|
4705
|
+
};
|
|
4706
|
+
}
|
|
4707
|
+
|
|
4708
|
+
//#endregion
|
|
4709
|
+
//#region src/lib/browser-mcp/observe.ts
|
|
4710
|
+
const OBSERVE_SYSTEM = `You describe a web page for an AI assistant that cannot see the DOM.
|
|
4711
|
+
|
|
4712
|
+
Write 2-4 sentences focused on user-actionable elements (forms, buttons, links) and the page's purpose. If 'intent' is provided, focus the description on the region most relevant to that intent.
|
|
4713
|
+
|
|
4714
|
+
DO NOT mention DOM refs, selectors, bbox coordinates, or any internal identifiers. Plain prose only. Treat the reader as someone who will issue commands like "click the Sign In button" — describe what's there in terms they can act on.
|
|
4715
|
+
|
|
4716
|
+
Call the describe_page tool with your description.`;
|
|
4717
|
+
const OBSERVE_TOOL = {
|
|
4718
|
+
name: "describe_page",
|
|
4719
|
+
description: "Report the natural-language description of the page.",
|
|
4720
|
+
parameters: {
|
|
4721
|
+
type: "object",
|
|
4722
|
+
required: ["description"],
|
|
4723
|
+
additionalProperties: false,
|
|
4724
|
+
properties: { description: {
|
|
4725
|
+
type: "string",
|
|
4726
|
+
description: "2-4 sentence prose description of the visible page state."
|
|
4727
|
+
} }
|
|
4728
|
+
}
|
|
4729
|
+
};
|
|
4730
|
+
/**
|
|
4731
|
+
* Produce a natural-language description of the current page state.
|
|
4732
|
+
* The lead model never sees the underlying snapshot.
|
|
4733
|
+
*/
|
|
4734
|
+
async function observePage(snapshot, intent, signal) {
|
|
4735
|
+
const trimmedElements = snapshot.elements.filter((e) => e.name && e.name.length > 0).slice(0, 80).map((e) => ({
|
|
4736
|
+
role: e.role,
|
|
4737
|
+
name: e.name
|
|
4738
|
+
}));
|
|
4739
|
+
const raw = await callCompressorPublic(OBSERVE_SYSTEM, JSON.stringify({
|
|
4740
|
+
intent: intent ?? "",
|
|
4741
|
+
url: snapshot.url ?? "",
|
|
4742
|
+
title: snapshot.title ?? "",
|
|
4743
|
+
visible_text: (snapshot.text ?? "").slice(0, 4e3),
|
|
4744
|
+
actionable_elements: trimmedElements,
|
|
4745
|
+
has_visual_surfaces: Boolean(snapshot.visualSurfaces && snapshot.visualSurfaces.length > 0)
|
|
4746
|
+
}), OBSERVE_TOOL, signal);
|
|
4747
|
+
const out = {
|
|
4748
|
+
description: raw && typeof raw === "object" && typeof raw.description === "string" ? raw.description : "Page contents could not be described.",
|
|
4749
|
+
hasVisualSurfaces: Boolean(snapshot.visualSurfaces && snapshot.visualSurfaces.length > 0)
|
|
4750
|
+
};
|
|
4751
|
+
if (snapshot.url) out.url = snapshot.url;
|
|
4752
|
+
if (snapshot.title) out.title = snapshot.title;
|
|
4753
|
+
return out;
|
|
4754
|
+
}
|
|
4755
|
+
|
|
4756
|
+
//#endregion
|
|
4757
|
+
//#region src/lib/browser-mcp/planner.ts
|
|
4758
|
+
const PLANNER_SYSTEM = `You are a browser-automation replanner. A user issued a high-level intent that was decomposed into atomic steps. Several steps ran successfully, then one failed. You see the page state AFTER the failure and decide what to do next.
|
|
4759
|
+
|
|
4760
|
+
Your job: produce a revised list of atomic steps that will accomplish the original intent given the current page. If you cannot — the page has changed in a way that makes the intent impossible (login form vanished, navigation moved elsewhere, captcha appeared) — return an empty list and explain why in reasoning.
|
|
4761
|
+
|
|
4762
|
+
Each replanned step is a free-form natural-language intent ("the email input", "the Sign In button at the bottom of the form") plus an optional value for fill/type/select actions. Be SPECIFIC about element location ("at the bottom of the form", "in the top navigation") so the deterministic matcher cascade can resolve it without ambiguity. Do NOT reference element refs.
|
|
4763
|
+
|
|
4764
|
+
Cost rule: you get ONE call per compound failure. Make every step count.
|
|
4765
|
+
|
|
4766
|
+
Call the replan_compound tool with your answer.`;
|
|
4767
|
+
const PLANNER_TOOL = {
|
|
4768
|
+
name: "replan_compound",
|
|
4769
|
+
description: "Report the revised atomic steps to complete the original compound intent.",
|
|
4770
|
+
parameters: {
|
|
4771
|
+
type: "object",
|
|
4772
|
+
required: ["steps", "reasoning"],
|
|
4773
|
+
additionalProperties: false,
|
|
4774
|
+
properties: {
|
|
4775
|
+
steps: {
|
|
4776
|
+
type: "array",
|
|
4777
|
+
maxItems: 8,
|
|
4778
|
+
items: {
|
|
4779
|
+
type: "object",
|
|
4780
|
+
required: ["intent"],
|
|
4781
|
+
additionalProperties: false,
|
|
4782
|
+
properties: {
|
|
4783
|
+
intent: { type: "string" },
|
|
4784
|
+
value: { type: "string" }
|
|
4785
|
+
}
|
|
4786
|
+
}
|
|
4787
|
+
},
|
|
4788
|
+
reasoning: {
|
|
4789
|
+
type: "string",
|
|
4790
|
+
description: "1-2 sentence explanation of the replanning decision."
|
|
4791
|
+
}
|
|
4792
|
+
}
|
|
4793
|
+
}
|
|
4794
|
+
};
|
|
4795
|
+
/**
|
|
4796
|
+
* Run the fast-model planner on a failed compound. Returns the
|
|
4797
|
+
* revised step list (may be empty if the planner gives up).
|
|
4798
|
+
*
|
|
4799
|
+
* The snapshot is trimmed before sending to keep the round-trip
|
|
4800
|
+
* small: only element role + name + brief value/placeholder if
|
|
4801
|
+
* present. Bbox / state flags / frame ids would just inflate tokens
|
|
4802
|
+
* without helping the natural-language replanner.
|
|
4803
|
+
*/
|
|
4804
|
+
async function planCompoundReplan(input, signal) {
|
|
4805
|
+
const trimmed = input.snapshot.elements.slice(0, 80).map((e) => {
|
|
4806
|
+
const out = { role: e.role };
|
|
4807
|
+
if (e.name) out.name = e.name;
|
|
4808
|
+
if (e.placeholder) out.placeholder = e.placeholder;
|
|
4809
|
+
if (e.value) out.value = e.value;
|
|
4810
|
+
return out;
|
|
4811
|
+
});
|
|
4812
|
+
const raw = await callCompressorPublic(PLANNER_SYSTEM, JSON.stringify({
|
|
4813
|
+
original_intent: input.originalIntent,
|
|
4814
|
+
original_value: input.originalValue,
|
|
4815
|
+
completed_steps: input.completedSteps.map((s) => ({
|
|
4816
|
+
intent: s.intent,
|
|
4817
|
+
...s.value !== void 0 ? { value: s.value } : {}
|
|
4818
|
+
})),
|
|
4819
|
+
failed_step: {
|
|
4820
|
+
intent: input.failedStep.intent,
|
|
4821
|
+
...input.failedStep.value !== void 0 ? { value: input.failedStep.value } : {}
|
|
4822
|
+
},
|
|
4823
|
+
failure_reason: input.failureReason,
|
|
4824
|
+
page_now: {
|
|
4825
|
+
url: input.snapshot.url ?? "",
|
|
4826
|
+
title: input.snapshot.title ?? "",
|
|
4827
|
+
visible_text: (input.snapshot.text ?? "").slice(0, 3e3),
|
|
4828
|
+
actionable_elements: trimmed
|
|
4829
|
+
}
|
|
4830
|
+
}), PLANNER_TOOL, signal);
|
|
4831
|
+
if (!raw || typeof raw !== "object") return {
|
|
4832
|
+
steps: [],
|
|
4833
|
+
reasoning: "planner returned empty response"
|
|
4834
|
+
};
|
|
4835
|
+
const obj = raw;
|
|
4836
|
+
const reasoning = typeof obj.reasoning === "string" ? obj.reasoning : "";
|
|
4837
|
+
if (!Array.isArray(obj.steps)) return {
|
|
4838
|
+
steps: [],
|
|
4839
|
+
reasoning
|
|
4840
|
+
};
|
|
4841
|
+
const steps = [];
|
|
4842
|
+
for (const s of obj.steps.slice(0, 8)) {
|
|
4843
|
+
if (!s || typeof s !== "object") continue;
|
|
4844
|
+
const intent = s.intent;
|
|
4845
|
+
const value = s.value;
|
|
4846
|
+
if (typeof intent === "string" && intent.length > 0) {
|
|
4847
|
+
const step = { intent };
|
|
4848
|
+
if (typeof value === "string") step.value = value;
|
|
4849
|
+
steps.push(step);
|
|
4850
|
+
}
|
|
4851
|
+
}
|
|
4852
|
+
return {
|
|
4853
|
+
steps,
|
|
4854
|
+
reasoning
|
|
4855
|
+
};
|
|
4856
|
+
}
|
|
4857
|
+
|
|
4858
|
+
//#endregion
|
|
4859
|
+
//#region src/lib/browser-mcp/index.ts
|
|
4860
|
+
/**
|
|
4861
|
+
* Helper for compound tools (`browser_find` / `browser_act` /
|
|
4862
|
+
* `browser_extract`): fetch the page snapshot via the existing
|
|
4863
|
+
* primitive dispatcher and unwrap the JSON text envelope. Compound
|
|
4864
|
+
* tools all start from a snapshot, so a single helper keeps the
|
|
4865
|
+
* unwrap logic in one place.
|
|
4866
|
+
*/
|
|
4867
|
+
async function fetchSnapshot(tabId, signal) {
|
|
4868
|
+
const env = await dispatchBrowserTool("browser_read_page", {
|
|
4869
|
+
tabId,
|
|
4870
|
+
mode: "summary"
|
|
4871
|
+
}, signal);
|
|
4872
|
+
if (env.isError) throw new Error("browser_read_page returned an error envelope; bridge / extension not ready");
|
|
4873
|
+
const text = env.content?.[0]?.text;
|
|
4874
|
+
if (typeof text !== "string") throw new Error("browser_read_page returned no text content");
|
|
4875
|
+
return JSON.parse(text);
|
|
4876
|
+
}
|
|
4877
|
+
function toolEnvelope(data, isError) {
|
|
4878
|
+
const text = typeof data === "string" ? data : JSON.stringify(data, null, 2);
|
|
4879
|
+
return isError ? {
|
|
4880
|
+
content: [{
|
|
4881
|
+
type: "text",
|
|
4882
|
+
text
|
|
4883
|
+
}],
|
|
4884
|
+
isError: true
|
|
4885
|
+
} : { content: [{
|
|
4886
|
+
type: "text",
|
|
4887
|
+
text
|
|
4888
|
+
}] };
|
|
4889
|
+
}
|
|
4890
|
+
/**
|
|
4891
|
+
* Browser-control MCP tools (`browser_*`). All entries route through
|
|
4892
|
+
* `dispatchBrowserTool()` which (1) runs the bridge-layer URL policy
|
|
4893
|
+
* check, (2) runs the install-check pre-flight (returning structured
|
|
4894
|
+
* install_required JSON when the bridge or extension isn't ready),
|
|
4895
|
+
* and (3) opens a WS to the bridge, sends the tool call, awaits the
|
|
4896
|
+
* response with a per-tool timeout.
|
|
4897
|
+
*
|
|
4898
|
+
* Each entry carries `capability: "browser"` so `browserToolsEnabled()`
|
|
4899
|
+
* in `src/routes/mcp/handler.ts` drops them at both list-time and
|
|
4900
|
+
* call-time when the operator hasn't opted in via `--browse` or
|
|
4901
|
+
* `GH_ROUTER_ENABLE_BROWSE=1`.
|
|
4902
|
+
*
|
|
4903
|
+
* v1 surface: 19 tools (Phases 3 + 4a + 4b + humanlike input v2).
|
|
4904
|
+
*/
|
|
4905
|
+
const BROWSER_TOOLS = Object.freeze([
|
|
4906
|
+
{
|
|
4907
|
+
toolNameHttp: "browser_list_tabs",
|
|
4908
|
+
description: "List all open tabs across all browser windows. Returns each tab's id (used by other browser_* tools), URL, title, active flag, and window id.",
|
|
4909
|
+
inputSchema: {
|
|
4910
|
+
type: "object",
|
|
4911
|
+
additionalProperties: false,
|
|
4912
|
+
properties: {}
|
|
4913
|
+
},
|
|
4914
|
+
capability: "browser_power",
|
|
4915
|
+
async handler(args, signal) {
|
|
4916
|
+
return dispatchBrowserTool("browser_list_tabs", args, signal);
|
|
4917
|
+
}
|
|
4918
|
+
},
|
|
4919
|
+
{
|
|
4920
|
+
toolNameHttp: "browser_open_tab",
|
|
4921
|
+
description: "Open a URL in a new browser tab and wait for the page to finish loading. Returns the new tab's id, final URL after redirects, and HTTP status. Refuses to navigate to browser-internal settings / preferences / extensions / flags pages (returns {blocked: true, reason}); devtools://* is allowed.",
|
|
4922
|
+
inputSchema: {
|
|
4923
|
+
type: "object",
|
|
4924
|
+
required: ["url"],
|
|
4925
|
+
additionalProperties: false,
|
|
4926
|
+
properties: {
|
|
4927
|
+
url: {
|
|
3350
4928
|
type: "string",
|
|
3351
|
-
|
|
3352
|
-
|
|
4929
|
+
description: "The URL to load. Maximum 8 KB. Settings / preferences / extensions / flags pages are blocked."
|
|
4930
|
+
},
|
|
4931
|
+
reuseActive: {
|
|
4932
|
+
type: "boolean",
|
|
4933
|
+
description: "When true, navigate the currently active tab instead of opening a new one. Default false."
|
|
3353
4934
|
}
|
|
3354
4935
|
}
|
|
3355
4936
|
},
|
|
3356
4937
|
capability: "browser",
|
|
3357
4938
|
async handler(args, signal) {
|
|
3358
|
-
return dispatchBrowserTool("
|
|
4939
|
+
return dispatchBrowserTool("browser_open_tab", args, signal);
|
|
3359
4940
|
}
|
|
3360
4941
|
},
|
|
3361
4942
|
{
|
|
3362
|
-
toolNameHttp: "
|
|
3363
|
-
description: "
|
|
4943
|
+
toolNameHttp: "browser_close_tab",
|
|
4944
|
+
description: "Close one or more tabs by tab id.",
|
|
3364
4945
|
inputSchema: {
|
|
3365
4946
|
type: "object",
|
|
3366
|
-
required: ["
|
|
4947
|
+
required: ["tabIds"],
|
|
3367
4948
|
additionalProperties: false,
|
|
3368
|
-
properties: {
|
|
3369
|
-
type: "
|
|
3370
|
-
|
|
4949
|
+
properties: { tabIds: {
|
|
4950
|
+
type: "array",
|
|
4951
|
+
items: { type: "number" },
|
|
4952
|
+
description: "Array of tab ids to close (from browser_list_tabs)."
|
|
3371
4953
|
} }
|
|
3372
4954
|
},
|
|
3373
|
-
capability: "
|
|
4955
|
+
capability: "browser_power",
|
|
3374
4956
|
async handler(args, signal) {
|
|
3375
|
-
return dispatchBrowserTool("
|
|
4957
|
+
return dispatchBrowserTool("browser_close_tab", args, signal);
|
|
3376
4958
|
}
|
|
3377
4959
|
},
|
|
3378
4960
|
{
|
|
3379
|
-
toolNameHttp: "
|
|
3380
|
-
description: "
|
|
4961
|
+
toolNameHttp: "browser_navigate",
|
|
4962
|
+
description: "Navigate an existing tab: goto a URL, go back, go forward, or reload. Same URL-blocking policy as browser_open_tab.",
|
|
3381
4963
|
inputSchema: {
|
|
3382
4964
|
type: "object",
|
|
3383
|
-
required: ["tabId"],
|
|
4965
|
+
required: ["tabId", "action"],
|
|
3384
4966
|
additionalProperties: false,
|
|
3385
4967
|
properties: {
|
|
3386
|
-
tabId: {
|
|
3387
|
-
|
|
3388
|
-
|
|
3389
|
-
description: "Element ref from browser_read_page (preferred)."
|
|
4968
|
+
tabId: {
|
|
4969
|
+
type: "number",
|
|
4970
|
+
description: "Tab id from browser_list_tabs / browser_open_tab."
|
|
3390
4971
|
},
|
|
3391
|
-
|
|
4972
|
+
action: {
|
|
3392
4973
|
type: "string",
|
|
3393
|
-
|
|
4974
|
+
enum: [
|
|
4975
|
+
"goto",
|
|
4976
|
+
"back",
|
|
4977
|
+
"forward",
|
|
4978
|
+
"reload"
|
|
4979
|
+
],
|
|
4980
|
+
description: "The navigation action."
|
|
3394
4981
|
},
|
|
3395
|
-
|
|
4982
|
+
url: {
|
|
3396
4983
|
type: "string",
|
|
3397
|
-
|
|
3398
|
-
description: "Mouse button. Default 'left'."
|
|
4984
|
+
description: "Required when action=goto. Max 8 KB."
|
|
3399
4985
|
},
|
|
3400
|
-
|
|
3401
|
-
type: "
|
|
3402
|
-
description: "
|
|
4986
|
+
hard: {
|
|
4987
|
+
type: "boolean",
|
|
4988
|
+
description: "Reload only: bypass cache (Ctrl+Shift+R behavior). Default false."
|
|
3403
4989
|
}
|
|
3404
4990
|
}
|
|
3405
4991
|
},
|
|
3406
4992
|
capability: "browser",
|
|
3407
4993
|
async handler(args, signal) {
|
|
3408
|
-
return dispatchBrowserTool("
|
|
4994
|
+
return dispatchBrowserTool("browser_navigate", args, signal);
|
|
3409
4995
|
}
|
|
3410
4996
|
},
|
|
3411
4997
|
{
|
|
3412
|
-
toolNameHttp: "
|
|
3413
|
-
description: "
|
|
4998
|
+
toolNameHttp: "browser_screenshot",
|
|
4999
|
+
description: "Capture a PNG screenshot of the visible area of a tab. Returns base64-encoded image bytes plus contentType. The tab must be active in its window; this tool auto-activates if needed.",
|
|
3414
5000
|
inputSchema: {
|
|
3415
5001
|
type: "object",
|
|
3416
|
-
required: ["tabId"
|
|
5002
|
+
required: ["tabId"],
|
|
3417
5003
|
additionalProperties: false,
|
|
3418
5004
|
properties: {
|
|
3419
|
-
tabId: {
|
|
3420
|
-
|
|
3421
|
-
|
|
3422
|
-
description: "Element ref from browser_read_page (preferred)."
|
|
5005
|
+
tabId: {
|
|
5006
|
+
type: "number",
|
|
5007
|
+
description: "Tab id from browser_list_tabs / browser_open_tab."
|
|
3423
5008
|
},
|
|
3424
|
-
|
|
5009
|
+
format: {
|
|
3425
5010
|
type: "string",
|
|
3426
|
-
|
|
3427
|
-
|
|
3428
|
-
value: { description: "The value to set. String for inputs / textareas / select option value. Boolean for checkbox / radio. Max 1 MB." },
|
|
3429
|
-
clearFirst: {
|
|
3430
|
-
type: "boolean",
|
|
3431
|
-
description: "Clear the input before typing (default true). No effect on select / checkbox."
|
|
3432
|
-
},
|
|
3433
|
-
pressEnter: {
|
|
3434
|
-
type: "boolean",
|
|
3435
|
-
description: "After typing, dispatch Enter keydown / keyup and call form.requestSubmit if available. Default false."
|
|
5011
|
+
enum: ["png", "jpeg"],
|
|
5012
|
+
description: "Image format. Default 'png'."
|
|
3436
5013
|
}
|
|
3437
5014
|
}
|
|
3438
5015
|
},
|
|
3439
5016
|
capability: "browser",
|
|
3440
5017
|
async handler(args, signal) {
|
|
3441
|
-
return dispatchBrowserTool("
|
|
5018
|
+
return dispatchBrowserTool("browser_screenshot", args, signal);
|
|
5019
|
+
}
|
|
5020
|
+
},
|
|
5021
|
+
{
|
|
5022
|
+
toolNameHttp: "browser_read_page",
|
|
5023
|
+
description: "Compressed page snapshot for the model: visible text, interactive elements with stable refs, viewport metadata, and (when present) `visualSurfaces` listing canvas / svg regions that need vision. Each element entry carries `bbox: [x, y, w, h]` in CSS viewport pixels (same coord space as browser_mouse / drag / scroll-at-pointer). Refs (e.g. `e42`) are stable for the lifetime of one read_page snapshot and are the preferred input to follow-up actions over brittle CSS selectors. The `viewport` block (`width`, `height`, `devicePixelRatio`, `scrollX`, `scrollY`) lets you map CSS-px bbox to device-px pixels for browser_screenshot. Mode controls what ships back: `summary` (default, ~5-15 KB) returns only viewport-visible elements/text and drops nameless non-interactive nodes; `full` returns up to 200 elements + 256 KiB of innerText (the legacy behavior — use only when you need off-screen content unscrolled). PREFER browser_act / browser_find for intent-driven interaction; read_page is the lower-level snapshot when you need to enumerate.",
|
|
5024
|
+
inputSchema: {
|
|
5025
|
+
type: "object",
|
|
5026
|
+
required: ["tabId"],
|
|
5027
|
+
additionalProperties: false,
|
|
5028
|
+
properties: {
|
|
5029
|
+
tabId: {
|
|
5030
|
+
type: "number",
|
|
5031
|
+
description: "Tab id from browser_list_tabs / browser_open_tab."
|
|
5032
|
+
},
|
|
5033
|
+
mode: {
|
|
5034
|
+
type: "string",
|
|
5035
|
+
enum: ["summary", "full"],
|
|
5036
|
+
description: "Snapshot scope. Default 'summary' returns viewport-visible elements + text capped at 20 KiB. 'full' returns up to 200 interactive elements page-wide + 256 KiB of innerText."
|
|
5037
|
+
}
|
|
5038
|
+
}
|
|
5039
|
+
},
|
|
5040
|
+
capability: "browser_power",
|
|
5041
|
+
async handler(args, signal) {
|
|
5042
|
+
return dispatchBrowserTool("browser_read_page", args, signal);
|
|
3442
5043
|
}
|
|
3443
5044
|
},
|
|
3444
5045
|
{
|
|
@@ -3495,7 +5096,7 @@ const BROWSER_TOOLS = Object.freeze([
|
|
|
3495
5096
|
}
|
|
3496
5097
|
}
|
|
3497
5098
|
},
|
|
3498
|
-
capability: "
|
|
5099
|
+
capability: "browser_power",
|
|
3499
5100
|
async handler(args, signal) {
|
|
3500
5101
|
return dispatchBrowserTool("browser_scroll", args, signal);
|
|
3501
5102
|
}
|
|
@@ -3515,7 +5116,7 @@ const BROWSER_TOOLS = Object.freeze([
|
|
|
3515
5116
|
}
|
|
3516
5117
|
}
|
|
3517
5118
|
},
|
|
3518
|
-
capability: "
|
|
5119
|
+
capability: "browser_power",
|
|
3519
5120
|
async handler(args, signal) {
|
|
3520
5121
|
return dispatchBrowserTool("browser_keyboard", args, signal);
|
|
3521
5122
|
}
|
|
@@ -3552,7 +5153,7 @@ const BROWSER_TOOLS = Object.freeze([
|
|
|
3552
5153
|
}
|
|
3553
5154
|
}
|
|
3554
5155
|
},
|
|
3555
|
-
capability: "
|
|
5156
|
+
capability: "browser_power",
|
|
3556
5157
|
async handler(args, signal) {
|
|
3557
5158
|
return dispatchBrowserTool("browser_wait", args, signal);
|
|
3558
5159
|
}
|
|
@@ -3576,7 +5177,7 @@ const BROWSER_TOOLS = Object.freeze([
|
|
|
3576
5177
|
}
|
|
3577
5178
|
}
|
|
3578
5179
|
},
|
|
3579
|
-
capability: "
|
|
5180
|
+
capability: "browser_power",
|
|
3580
5181
|
async handler(args, signal) {
|
|
3581
5182
|
return dispatchBrowserTool("browser_eval_js", args, signal);
|
|
3582
5183
|
}
|
|
@@ -3608,53 +5209,11 @@ const BROWSER_TOOLS = Object.freeze([
|
|
|
3608
5209
|
}
|
|
3609
5210
|
}
|
|
3610
5211
|
},
|
|
3611
|
-
capability: "
|
|
5212
|
+
capability: "browser_power",
|
|
3612
5213
|
async handler(args, signal) {
|
|
3613
5214
|
return dispatchBrowserTool("browser_download", args, signal);
|
|
3614
5215
|
}
|
|
3615
5216
|
},
|
|
3616
|
-
{
|
|
3617
|
-
toolNameHttp: "browser_console_logs",
|
|
3618
|
-
description: "Drain console messages a tab has emitted since the last call. The first call for a tab attaches chrome.debugger and starts capturing, so very-early-load messages from before the first call are missed; subsequent calls return everything since the previous drain. Buffer is capped at 1000 entries per tab.",
|
|
3619
|
-
inputSchema: {
|
|
3620
|
-
type: "object",
|
|
3621
|
-
required: ["tabId"],
|
|
3622
|
-
additionalProperties: false,
|
|
3623
|
-
properties: {
|
|
3624
|
-
tabId: { type: "number" },
|
|
3625
|
-
level: {
|
|
3626
|
-
type: "string",
|
|
3627
|
-
enum: [
|
|
3628
|
-
"log",
|
|
3629
|
-
"info",
|
|
3630
|
-
"warn",
|
|
3631
|
-
"error",
|
|
3632
|
-
"debug",
|
|
3633
|
-
"all"
|
|
3634
|
-
],
|
|
3635
|
-
description: "Filter by console level. Default 'all'."
|
|
3636
|
-
}
|
|
3637
|
-
}
|
|
3638
|
-
},
|
|
3639
|
-
capability: "browser",
|
|
3640
|
-
async handler(args, signal) {
|
|
3641
|
-
return dispatchBrowserTool("browser_console_logs", args, signal);
|
|
3642
|
-
}
|
|
3643
|
-
},
|
|
3644
|
-
{
|
|
3645
|
-
toolNameHttp: "browser_network_log",
|
|
3646
|
-
description: "Drain network responses a tab has received since the last call. Same lazy-attach + cap-1000 behavior as browser_console_logs. Returns request URL, method, status, mime type, and timestamp per entry.",
|
|
3647
|
-
inputSchema: {
|
|
3648
|
-
type: "object",
|
|
3649
|
-
required: ["tabId"],
|
|
3650
|
-
additionalProperties: false,
|
|
3651
|
-
properties: { tabId: { type: "number" } }
|
|
3652
|
-
},
|
|
3653
|
-
capability: "browser",
|
|
3654
|
-
async handler(args, signal) {
|
|
3655
|
-
return dispatchBrowserTool("browser_network_log", args, signal);
|
|
3656
|
-
}
|
|
3657
|
-
},
|
|
3658
5217
|
{
|
|
3659
5218
|
toolNameHttp: "browser_mouse",
|
|
3660
5219
|
description: "Move / click / hover / press / release the mouse via real CDP input events (Input.dispatchMouseEvent). Use this when you need behavior that synthetic .click() can't trigger: hover-to-reveal menus, canvas / map / image-map clicks, sites that check event.isTrusted, or precise coordinate targeting. Target with ref (from browser_read_page), CSS selector, or (x, y) in CSS viewport pixels — exactly one. action='move' is the hover (single mouseMoved fires :hover and pointerover reliably). action='dblclick' sends two press/release cycles with incrementing clickCount (a real double-click, not one cycle with clickCount=2). By default the target is hit-tested with elementFromPoint and the call fails with `target_obscured` if the topmost element isn't the target or a descendant — pass force:true to bypass when you know an overlay forwards events.",
|
|
@@ -3714,7 +5273,7 @@ const BROWSER_TOOLS = Object.freeze([
|
|
|
3714
5273
|
}
|
|
3715
5274
|
}
|
|
3716
5275
|
},
|
|
3717
|
-
capability: "
|
|
5276
|
+
capability: "browser_power",
|
|
3718
5277
|
async handler(args, signal) {
|
|
3719
5278
|
return dispatchBrowserTool("browser_mouse", args, signal);
|
|
3720
5279
|
}
|
|
@@ -3788,7 +5347,7 @@ const BROWSER_TOOLS = Object.freeze([
|
|
|
3788
5347
|
}
|
|
3789
5348
|
}
|
|
3790
5349
|
},
|
|
3791
|
-
capability: "
|
|
5350
|
+
capability: "browser_power",
|
|
3792
5351
|
async handler(args, signal) {
|
|
3793
5352
|
return dispatchBrowserTool("browser_drag", args, signal);
|
|
3794
5353
|
}
|
|
@@ -3812,36 +5371,448 @@ const BROWSER_TOOLS = Object.freeze([
|
|
|
3812
5371
|
}
|
|
3813
5372
|
}
|
|
3814
5373
|
},
|
|
3815
|
-
capability: "
|
|
5374
|
+
capability: "browser_power",
|
|
3816
5375
|
async handler(args, signal) {
|
|
3817
5376
|
return dispatchBrowserTool("browser_type", args, signal);
|
|
3818
5377
|
}
|
|
3819
5378
|
},
|
|
3820
5379
|
{
|
|
3821
|
-
toolNameHttp: "
|
|
3822
|
-
description: "
|
|
5380
|
+
toolNameHttp: "browser_diagnostics",
|
|
5381
|
+
description: "Drain console messages or network responses for a tab, with filtering. Replaces the prior browser_console_logs / browser_network_log primitives. `kind` selects the stream; remaining params filter the result before it ships to the model so the response carries only what the caller asked for instead of a raw 1000-entry array dump. Lazy-attach behavior: first call for a tab attaches chrome.debugger; very-early-load events from before the first call are missed.",
|
|
5382
|
+
inputSchema: {
|
|
5383
|
+
type: "object",
|
|
5384
|
+
required: ["tabId", "kind"],
|
|
5385
|
+
additionalProperties: false,
|
|
5386
|
+
properties: {
|
|
5387
|
+
tabId: { type: "number" },
|
|
5388
|
+
kind: {
|
|
5389
|
+
type: "string",
|
|
5390
|
+
enum: ["console", "network"],
|
|
5391
|
+
description: "Which stream to drain."
|
|
5392
|
+
},
|
|
5393
|
+
level: {
|
|
5394
|
+
type: "string",
|
|
5395
|
+
enum: [
|
|
5396
|
+
"log",
|
|
5397
|
+
"info",
|
|
5398
|
+
"warn",
|
|
5399
|
+
"error",
|
|
5400
|
+
"debug",
|
|
5401
|
+
"all"
|
|
5402
|
+
],
|
|
5403
|
+
description: "Console only. Default 'all'. Ignored when kind=network."
|
|
5404
|
+
},
|
|
5405
|
+
regex: {
|
|
5406
|
+
type: "string",
|
|
5407
|
+
description: "Optional JS-regex string. Console: matches the message body. Network: matches the request URL."
|
|
5408
|
+
},
|
|
5409
|
+
limit: {
|
|
5410
|
+
type: "number",
|
|
5411
|
+
description: "Max entries to return after filtering. Default 100. Hard cap 1000."
|
|
5412
|
+
}
|
|
5413
|
+
}
|
|
5414
|
+
},
|
|
5415
|
+
capability: "browser_power",
|
|
5416
|
+
async handler(args, signal) {
|
|
5417
|
+
const kind = args.kind === "network" ? "network" : "console";
|
|
5418
|
+
const tool = kind === "network" ? "browser_network_log" : "browser_console_logs";
|
|
5419
|
+
const tabId = typeof args.tabId === "number" ? args.tabId : void 0;
|
|
5420
|
+
const level = typeof args.level === "string" ? args.level : "all";
|
|
5421
|
+
const regexStr = typeof args.regex === "string" ? args.regex : void 0;
|
|
5422
|
+
const limit = typeof args.limit === "number" ? Math.min(1e3, Math.max(1, args.limit)) : 100;
|
|
5423
|
+
const env = await dispatchBrowserTool(tool, {
|
|
5424
|
+
tabId,
|
|
5425
|
+
level
|
|
5426
|
+
}, signal);
|
|
5427
|
+
if (env.isError) return env;
|
|
5428
|
+
const text = env.content?.[0]?.text;
|
|
5429
|
+
if (typeof text !== "string") return env;
|
|
5430
|
+
let entries;
|
|
5431
|
+
try {
|
|
5432
|
+
const parsed = JSON.parse(text);
|
|
5433
|
+
entries = (Array.isArray(parsed) ? parsed : Array.isArray(parsed?.entries) ? parsed.entries : []).filter((e) => typeof e === "object" && e !== null);
|
|
5434
|
+
} catch {
|
|
5435
|
+
return env;
|
|
5436
|
+
}
|
|
5437
|
+
let filtered = entries;
|
|
5438
|
+
if (regexStr) try {
|
|
5439
|
+
const re = new RegExp(regexStr);
|
|
5440
|
+
const field = kind === "network" ? "url" : "text";
|
|
5441
|
+
filtered = filtered.filter((e) => {
|
|
5442
|
+
const v = e[field];
|
|
5443
|
+
return typeof v === "string" && re.test(v);
|
|
5444
|
+
});
|
|
5445
|
+
} catch {
|
|
5446
|
+
return toolEnvelope({ error: `invalid regex: ${regexStr}` }, true);
|
|
5447
|
+
}
|
|
5448
|
+
const out = filtered.slice(0, limit);
|
|
5449
|
+
return toolEnvelope({
|
|
5450
|
+
kind,
|
|
5451
|
+
total: entries.length,
|
|
5452
|
+
returned: out.length,
|
|
5453
|
+
entries: out
|
|
5454
|
+
});
|
|
5455
|
+
}
|
|
5456
|
+
},
|
|
5457
|
+
{
|
|
5458
|
+
toolNameHttp: "browser_find",
|
|
5459
|
+
description: "Find up to 5 elements matching a natural-language intent ('the search box at the top', 'the Submit button at the bottom of the login form'). Returns ranked candidates with stable refs the model can pass to browser_act (ref mode) or browser_mouse. Cheaper than browser_read_page when you know what you're looking for — the inner compressor (Gemini Flash class) filters the snapshot for you instead of sending the full element list to the lead model.",
|
|
5460
|
+
inputSchema: {
|
|
5461
|
+
type: "object",
|
|
5462
|
+
required: ["tabId", "intent"],
|
|
5463
|
+
additionalProperties: false,
|
|
5464
|
+
properties: {
|
|
5465
|
+
tabId: { type: "number" },
|
|
5466
|
+
intent: {
|
|
5467
|
+
type: "string",
|
|
5468
|
+
description: "Natural-language description of what to find."
|
|
5469
|
+
}
|
|
5470
|
+
}
|
|
5471
|
+
},
|
|
5472
|
+
capability: "browser_power",
|
|
5473
|
+
async handler(args, signal) {
|
|
5474
|
+
const tabId = typeof args.tabId === "number" ? args.tabId : void 0;
|
|
5475
|
+
const intent = typeof args.intent === "string" ? args.intent : "";
|
|
5476
|
+
if (!tabId) return toolEnvelope({ error: "tabId required" }, true);
|
|
5477
|
+
if (!intent) return toolEnvelope({ error: "intent required" }, true);
|
|
5478
|
+
const snapshot = await fetchSnapshot(tabId, signal);
|
|
5479
|
+
const matches = await pickMatchingElements(snapshot, intent, signal);
|
|
5480
|
+
const indexed = new Map(snapshot.elements.map((e) => [e.ref, e]));
|
|
5481
|
+
return toolEnvelope({ matches: matches.map((m) => {
|
|
5482
|
+
const el = indexed.get(m.ref);
|
|
5483
|
+
return el ? {
|
|
5484
|
+
ref: m.ref,
|
|
5485
|
+
role: el.role,
|
|
5486
|
+
name: el.name,
|
|
5487
|
+
bbox: el.bbox,
|
|
5488
|
+
reason: m.reason
|
|
5489
|
+
} : {
|
|
5490
|
+
ref: m.ref,
|
|
5491
|
+
reason: m.reason
|
|
5492
|
+
};
|
|
5493
|
+
}) });
|
|
5494
|
+
}
|
|
5495
|
+
},
|
|
5496
|
+
{
|
|
5497
|
+
toolNameHttp: "browser_act",
|
|
5498
|
+
description: "Preferred for any click / fill / type / scroll-to action against a tab. Two modes: (1) INTENT mode — pass `intent` as natural language ('click the submit button'); the inner compressor (Gemini Flash class) maps it to an element + action. Auto-escalates to visual fallback (screenshot + multimodal model + pixel-coord click) when the intent points into a canvas / svg region the a11y tree can't see. (2) REF mode — pass `ref` (from a prior browser_find or browser_read_page) and optionally `value`; dispatches directly with zero compressor latency. This is the fold-in path for the now-removed browser_click and browser_fill. Returns {ok, action_taken, target_ref, navigated}.",
|
|
3823
5499
|
inputSchema: {
|
|
3824
5500
|
type: "object",
|
|
3825
5501
|
required: ["tabId"],
|
|
3826
5502
|
additionalProperties: false,
|
|
3827
5503
|
properties: {
|
|
3828
5504
|
tabId: { type: "number" },
|
|
5505
|
+
intent: {
|
|
5506
|
+
type: "string",
|
|
5507
|
+
description: "Natural-language description of the action. Triggers INTENT mode. Mutually exclusive with `ref`."
|
|
5508
|
+
},
|
|
3829
5509
|
ref: {
|
|
3830
5510
|
type: "string",
|
|
3831
|
-
description: "Element ref from browser_read_page
|
|
5511
|
+
description: "Element ref from browser_find / browser_read_page. Triggers REF mode (no compressor round-trip)."
|
|
3832
5512
|
},
|
|
3833
|
-
|
|
5513
|
+
action: {
|
|
5514
|
+
type: "string",
|
|
5515
|
+
enum: [
|
|
5516
|
+
"click",
|
|
5517
|
+
"fill",
|
|
5518
|
+
"type",
|
|
5519
|
+
"select",
|
|
5520
|
+
"scroll_into_view"
|
|
5521
|
+
],
|
|
5522
|
+
description: "REF mode only. Defaults to 'click'. In INTENT mode, the compressor picks the action."
|
|
5523
|
+
},
|
|
5524
|
+
value: {
|
|
5525
|
+
type: "string",
|
|
5526
|
+
description: "For fill / type / select: the string value to set. In INTENT mode the compressor uses this when an action requires a value."
|
|
5527
|
+
}
|
|
5528
|
+
}
|
|
5529
|
+
},
|
|
5530
|
+
capability: "browser",
|
|
5531
|
+
async handler(args, signal) {
|
|
5532
|
+
const tabId = typeof args.tabId === "number" ? args.tabId : void 0;
|
|
5533
|
+
if (!tabId) return toolEnvelope({ error: "tabId required" }, true);
|
|
5534
|
+
const refIn = typeof args.ref === "string" ? args.ref : void 0;
|
|
5535
|
+
const intent = typeof args.intent === "string" ? args.intent : void 0;
|
|
5536
|
+
const value = typeof args.value === "string" ? args.value : void 0;
|
|
5537
|
+
if (!refIn && !intent) return toolEnvelope({ error: "either `ref` (REF mode) or `intent` (INTENT mode) is required" }, true);
|
|
5538
|
+
if (refIn) return dispatchActionByRef(tabId, refIn, typeof args.action === "string" ? args.action : "click", value, signal);
|
|
5539
|
+
const decomposed = decompose(intent, value);
|
|
5540
|
+
if (decomposed.steps.length === 1) return runAtomicIntentStep(tabId, decomposed.steps[0].intent, decomposed.steps[0].value, signal);
|
|
5541
|
+
const summaries = [];
|
|
5542
|
+
let navigated = false;
|
|
5543
|
+
const completedSteps = [];
|
|
5544
|
+
for (let i = 0; i < decomposed.steps.length; i++) {
|
|
5545
|
+
const step = decomposed.steps[i];
|
|
5546
|
+
const env = await runAtomicIntentStep(tabId, step.intent, step.value, signal);
|
|
5547
|
+
const stepText = env.content?.[0]?.text;
|
|
5548
|
+
let stepResult = {};
|
|
5549
|
+
if (typeof stepText === "string") try {
|
|
5550
|
+
stepResult = JSON.parse(stepText);
|
|
5551
|
+
} catch {}
|
|
5552
|
+
if (env.isError || stepResult.ok === false) try {
|
|
5553
|
+
const failureReason = String(stepResult.error ?? "unknown");
|
|
5554
|
+
const replan = await planCompoundReplan({
|
|
5555
|
+
originalIntent: intent,
|
|
5556
|
+
originalValue: value,
|
|
5557
|
+
completedSteps,
|
|
5558
|
+
failedStep: step,
|
|
5559
|
+
failureReason,
|
|
5560
|
+
snapshot: await fetchSnapshot(tabId, signal)
|
|
5561
|
+
}, signal);
|
|
5562
|
+
if (replan.steps.length === 0) return toolEnvelope({
|
|
5563
|
+
ok: false,
|
|
5564
|
+
summary: `compound step ${i + 1}/${decomposed.steps.length} failed and planner declined: ${replan.reasoning || failureReason}`,
|
|
5565
|
+
template: decomposed.template,
|
|
5566
|
+
steps_completed: i,
|
|
5567
|
+
failed_step: step.intent,
|
|
5568
|
+
planner_reasoning: replan.reasoning
|
|
5569
|
+
}, true);
|
|
5570
|
+
const replanSummaries = [];
|
|
5571
|
+
for (let j = 0; j < replan.steps.length; j++) {
|
|
5572
|
+
const rstep = replan.steps[j];
|
|
5573
|
+
const renv = await runAtomicIntentStep(tabId, rstep.intent, rstep.value, signal);
|
|
5574
|
+
const rtext = renv.content?.[0]?.text;
|
|
5575
|
+
let rresult = {};
|
|
5576
|
+
if (typeof rtext === "string") try {
|
|
5577
|
+
rresult = JSON.parse(rtext);
|
|
5578
|
+
} catch {}
|
|
5579
|
+
if (renv.isError || rresult.ok === false) return toolEnvelope({
|
|
5580
|
+
ok: false,
|
|
5581
|
+
summary: `compound failed at original step ${i + 1}, planner replan also failed at step ${j + 1}/${replan.steps.length}: ${String(rresult.error ?? "unknown")}`,
|
|
5582
|
+
template: decomposed.template,
|
|
5583
|
+
steps_completed: i,
|
|
5584
|
+
failed_step: rstep.intent,
|
|
5585
|
+
planner_reasoning: replan.reasoning
|
|
5586
|
+
}, true);
|
|
5587
|
+
if (typeof rresult.action_taken === "string") replanSummaries.push(`${rresult.action_taken} (${rstep.intent})`);
|
|
5588
|
+
if (rresult.navigated === true) navigated = true;
|
|
5589
|
+
}
|
|
5590
|
+
return toolEnvelope({
|
|
5591
|
+
ok: true,
|
|
5592
|
+
summary: `compound recovered via planner (${replan.reasoning}): ${replanSummaries.join(" → ")}`,
|
|
5593
|
+
template: decomposed.template,
|
|
5594
|
+
steps_completed: i + replan.steps.length,
|
|
5595
|
+
navigated,
|
|
5596
|
+
planner_used: true,
|
|
5597
|
+
planner_reasoning: replan.reasoning
|
|
5598
|
+
});
|
|
5599
|
+
} catch (replanErr) {
|
|
5600
|
+
return toolEnvelope({
|
|
5601
|
+
ok: false,
|
|
5602
|
+
summary: `compound step ${i + 1}/${decomposed.steps.length} failed; planner errored: ${replanErr instanceof Error ? replanErr.message : String(replanErr)}`,
|
|
5603
|
+
template: decomposed.template,
|
|
5604
|
+
steps_completed: i,
|
|
5605
|
+
failed_step: step.intent
|
|
5606
|
+
}, true);
|
|
5607
|
+
}
|
|
5608
|
+
if (typeof stepResult.action_taken === "string") summaries.push(`${stepResult.action_taken} (${step.intent})`);
|
|
5609
|
+
if (stepResult.navigated === true) navigated = true;
|
|
5610
|
+
completedSteps.push(step);
|
|
5611
|
+
}
|
|
5612
|
+
return toolEnvelope({
|
|
5613
|
+
ok: true,
|
|
5614
|
+
summary: decomposed.successSummary ?? summaries.join(" → "),
|
|
5615
|
+
template: decomposed.template,
|
|
5616
|
+
steps_completed: decomposed.steps.length,
|
|
5617
|
+
navigated
|
|
5618
|
+
});
|
|
5619
|
+
}
|
|
5620
|
+
},
|
|
5621
|
+
{
|
|
5622
|
+
toolNameHttp: "browser_observe",
|
|
5623
|
+
description: "Get a natural-language description of the current page's user-actionable state — what forms, buttons, links, and content sections are visible — in 2-4 sentences. Optional `intent` focuses the description on a region ('describe the login form', 'what's in the comments section'). Use this BEFORE browser_act when you don't know what's on the page, or AFTER navigation to confirm the page loaded. Cheaper than screenshots when text is enough. Does not include canvas/SVG content — those surface as a `hasVisualSurfaces` flag; switch to browser_screenshot for visuals.",
|
|
5624
|
+
inputSchema: {
|
|
5625
|
+
type: "object",
|
|
5626
|
+
required: ["tabId"],
|
|
5627
|
+
additionalProperties: false,
|
|
5628
|
+
properties: {
|
|
5629
|
+
tabId: { type: "number" },
|
|
5630
|
+
intent: {
|
|
5631
|
+
type: "string",
|
|
5632
|
+
description: "Optional natural-language focus ('describe the form', 'what's in the sidebar')."
|
|
5633
|
+
}
|
|
5634
|
+
}
|
|
5635
|
+
},
|
|
5636
|
+
capability: "browser_compound",
|
|
5637
|
+
async handler(args, signal) {
|
|
5638
|
+
const tabId = typeof args.tabId === "number" ? args.tabId : void 0;
|
|
5639
|
+
const intent = typeof args.intent === "string" ? args.intent : void 0;
|
|
5640
|
+
if (!tabId) return toolEnvelope({ error: "tabId required" }, true);
|
|
5641
|
+
return toolEnvelope(await observePage(await fetchSnapshot(tabId, signal), intent, signal));
|
|
5642
|
+
}
|
|
5643
|
+
},
|
|
5644
|
+
{
|
|
5645
|
+
toolNameHttp: "browser_extract",
|
|
5646
|
+
description: "Structured extraction from the current page into a JSON object matching the provided schema. The inner compressor reads the page snapshot (text + elements) and synthesizes the typed object. Use this instead of browser_read_page + lead-model parsing when you know the shape you want (e.g. a list of {title, author, url} rows from a PR list).",
|
|
5647
|
+
inputSchema: {
|
|
5648
|
+
type: "object",
|
|
5649
|
+
required: [
|
|
5650
|
+
"tabId",
|
|
5651
|
+
"schema",
|
|
5652
|
+
"instruction"
|
|
5653
|
+
],
|
|
5654
|
+
additionalProperties: false,
|
|
5655
|
+
properties: {
|
|
5656
|
+
tabId: { type: "number" },
|
|
5657
|
+
schema: { description: "JSON schema (or schema-shaped descriptor) for the desired output shape." },
|
|
5658
|
+
instruction: {
|
|
3834
5659
|
type: "string",
|
|
3835
|
-
description: "
|
|
5660
|
+
description: "What to extract, in plain language ('the visible PR list')."
|
|
3836
5661
|
}
|
|
3837
5662
|
}
|
|
3838
|
-
},
|
|
3839
|
-
capability: "
|
|
3840
|
-
async handler(args, signal) {
|
|
3841
|
-
|
|
5663
|
+
},
|
|
5664
|
+
capability: "browser_compound",
|
|
5665
|
+
async handler(args, signal) {
|
|
5666
|
+
const tabId = typeof args.tabId === "number" ? args.tabId : void 0;
|
|
5667
|
+
const instruction = typeof args.instruction === "string" ? args.instruction : "";
|
|
5668
|
+
const schema = args.schema;
|
|
5669
|
+
if (!tabId) return toolEnvelope({ error: "tabId required" }, true);
|
|
5670
|
+
if (!instruction) return toolEnvelope({ error: "instruction required" }, true);
|
|
5671
|
+
if (!schema) return toolEnvelope({ error: "schema required" }, true);
|
|
5672
|
+
const snapshot = await fetchSnapshot(tabId, signal);
|
|
5673
|
+
try {
|
|
5674
|
+
return toolEnvelope(await extractStructured(snapshot, schema, instruction, signal));
|
|
5675
|
+
} catch (err) {
|
|
5676
|
+
if (err instanceof SchemaValidationError) return toolEnvelope({ error: `invalid schema: ${err.message}` }, true);
|
|
5677
|
+
if (err instanceof ResultShapeError) return toolEnvelope({ error: `extraction produced wrong shape: ${err.message}` }, true);
|
|
5678
|
+
throw err;
|
|
5679
|
+
}
|
|
5680
|
+
}
|
|
5681
|
+
}
|
|
5682
|
+
]);
|
|
5683
|
+
/**
|
|
5684
|
+
* Run a single atomic intent step: fetch snapshot, run matcher
|
|
5685
|
+
* cascade (via pickElement), visual fallback on no-match, dispatch
|
|
5686
|
+
* the resolved action. Returns the standard MCP envelope.
|
|
5687
|
+
*
|
|
5688
|
+
* Pulled out of `browser_act`'s handler so the compound-intent loop
|
|
5689
|
+
* (decompose path) can call it per-step without duplicating the
|
|
5690
|
+
* snapshot + visual-fallback logic.
|
|
5691
|
+
*/
|
|
5692
|
+
async function runAtomicIntentStep(tabId, intent, value, signal) {
|
|
5693
|
+
const snapshot = await fetchSnapshot(tabId, signal);
|
|
5694
|
+
const picked = await pickElement(snapshot, intent, signal, value);
|
|
5695
|
+
if (!picked.ref || picked.confidence < .5) {
|
|
5696
|
+
const surfaces = snapshot.visualSurfaces;
|
|
5697
|
+
if (surfaces && surfaces.length > 0) {
|
|
5698
|
+
const shotEnv = await dispatchBrowserTool("browser_screenshot", {
|
|
5699
|
+
tabId,
|
|
5700
|
+
format: "png"
|
|
5701
|
+
}, signal);
|
|
5702
|
+
if (shotEnv.isError) return toolEnvelope({
|
|
5703
|
+
ok: false,
|
|
5704
|
+
error: "no text match; screenshot for visual fallback failed",
|
|
5705
|
+
picked
|
|
5706
|
+
}, true);
|
|
5707
|
+
const shotText = shotEnv.content?.[0]?.text;
|
|
5708
|
+
let shot = {};
|
|
5709
|
+
try {
|
|
5710
|
+
shot = shotText ? JSON.parse(shotText) : {};
|
|
5711
|
+
} catch {
|
|
5712
|
+
return toolEnvelope({
|
|
5713
|
+
ok: false,
|
|
5714
|
+
error: "no text match; screenshot envelope unparseable"
|
|
5715
|
+
}, true);
|
|
5716
|
+
}
|
|
5717
|
+
if (!shot.contentType || !shot.dataBase64) return toolEnvelope({
|
|
5718
|
+
ok: false,
|
|
5719
|
+
error: "no text match; screenshot envelope missing fields"
|
|
5720
|
+
}, true);
|
|
5721
|
+
const visual = await pickElementVisual(shot.dataBase64, shot.contentType, intent, surfaces, signal);
|
|
5722
|
+
if (visual.confidence < .5) return toolEnvelope({
|
|
5723
|
+
ok: false,
|
|
5724
|
+
error: "no element matched intent (text + visual)",
|
|
5725
|
+
picked,
|
|
5726
|
+
visual
|
|
5727
|
+
}, true);
|
|
5728
|
+
const clickEnv = await dispatchBrowserTool("browser_mouse", {
|
|
5729
|
+
tabId,
|
|
5730
|
+
action: "click",
|
|
5731
|
+
x: visual.x,
|
|
5732
|
+
y: visual.y,
|
|
5733
|
+
force: true
|
|
5734
|
+
}, signal);
|
|
5735
|
+
if (clickEnv.isError) return clickEnv;
|
|
5736
|
+
return toolEnvelope({
|
|
5737
|
+
ok: true,
|
|
5738
|
+
action_taken: "click_visual",
|
|
5739
|
+
x: visual.x,
|
|
5740
|
+
y: visual.y,
|
|
5741
|
+
confidence: visual.confidence,
|
|
5742
|
+
reason: visual.reason
|
|
5743
|
+
});
|
|
3842
5744
|
}
|
|
5745
|
+
return toolEnvelope({
|
|
5746
|
+
ok: false,
|
|
5747
|
+
error: "no element matched intent",
|
|
5748
|
+
picked
|
|
5749
|
+
}, true);
|
|
3843
5750
|
}
|
|
3844
|
-
|
|
5751
|
+
return dispatchActionByRef(tabId, picked.ref, picked.action, picked.value ?? value, signal);
|
|
5752
|
+
}
|
|
5753
|
+
/**
|
|
5754
|
+
* Dispatch an action against a known ref via the appropriate primitive.
|
|
5755
|
+
* Shared between REF mode and INTENT-mode-text-match in `browser_act`.
|
|
5756
|
+
* Returns an MCP envelope (text content + optional isError).
|
|
5757
|
+
*/
|
|
5758
|
+
async function dispatchActionByRef(tabId, ref, action, value, signal) {
|
|
5759
|
+
let env;
|
|
5760
|
+
switch (action) {
|
|
5761
|
+
case "click":
|
|
5762
|
+
env = await dispatchBrowserTool("browser_click", {
|
|
5763
|
+
tabId,
|
|
5764
|
+
ref
|
|
5765
|
+
}, signal);
|
|
5766
|
+
break;
|
|
5767
|
+
case "fill":
|
|
5768
|
+
env = await dispatchBrowserTool("browser_fill", {
|
|
5769
|
+
tabId,
|
|
5770
|
+
ref,
|
|
5771
|
+
value
|
|
5772
|
+
}, signal);
|
|
5773
|
+
break;
|
|
5774
|
+
case "type":
|
|
5775
|
+
await dispatchBrowserTool("browser_click", {
|
|
5776
|
+
tabId,
|
|
5777
|
+
ref
|
|
5778
|
+
}, signal);
|
|
5779
|
+
env = await dispatchBrowserTool("browser_type", {
|
|
5780
|
+
tabId,
|
|
5781
|
+
text: value ?? ""
|
|
5782
|
+
}, signal);
|
|
5783
|
+
break;
|
|
5784
|
+
case "select":
|
|
5785
|
+
env = await dispatchBrowserTool("browser_fill", {
|
|
5786
|
+
tabId,
|
|
5787
|
+
ref,
|
|
5788
|
+
value
|
|
5789
|
+
}, signal);
|
|
5790
|
+
break;
|
|
5791
|
+
case "scroll_into_view":
|
|
5792
|
+
env = await dispatchBrowserTool("browser_scroll", {
|
|
5793
|
+
tabId,
|
|
5794
|
+
target: "element",
|
|
5795
|
+
ref
|
|
5796
|
+
}, signal);
|
|
5797
|
+
break;
|
|
5798
|
+
default: return toolEnvelope({
|
|
5799
|
+
ok: false,
|
|
5800
|
+
error: `unknown action: ${action}`
|
|
5801
|
+
}, true);
|
|
5802
|
+
}
|
|
5803
|
+
if (env.isError) return env;
|
|
5804
|
+
const innerText = env.content?.[0]?.text;
|
|
5805
|
+
let parsed = {};
|
|
5806
|
+
if (typeof innerText === "string") try {
|
|
5807
|
+
parsed = JSON.parse(innerText);
|
|
5808
|
+
} catch {}
|
|
5809
|
+
return toolEnvelope({
|
|
5810
|
+
ok: true,
|
|
5811
|
+
action_taken: action,
|
|
5812
|
+
target_ref: ref,
|
|
5813
|
+
navigated: typeof parsed.navigated === "boolean" ? parsed.navigated : void 0
|
|
5814
|
+
});
|
|
5815
|
+
}
|
|
3845
5816
|
|
|
3846
5817
|
//#endregion
|
|
3847
5818
|
//#region src/vendor/pi/ai/api-registry.ts
|
|
@@ -5416,7 +7387,7 @@ const MAX_INFLIGHT_WORKER_CALLS = (() => {
|
|
|
5416
7387
|
if (!Number.isFinite(n) || n <= 0 || !Number.isInteger(n)) return 8;
|
|
5417
7388
|
return n;
|
|
5418
7389
|
})();
|
|
5419
|
-
let inFlight
|
|
7390
|
+
let inFlight = 0;
|
|
5420
7391
|
/**
|
|
5421
7392
|
* Acquire a worker slot.
|
|
5422
7393
|
*
|
|
@@ -5434,176 +7405,16 @@ let inFlight$1 = 0;
|
|
|
5434
7405
|
*/
|
|
5435
7406
|
async function acquireWorkerSlot(signal) {
|
|
5436
7407
|
if (signal?.aborted) return null;
|
|
5437
|
-
if (inFlight
|
|
5438
|
-
inFlight
|
|
7408
|
+
if (inFlight >= MAX_INFLIGHT_WORKER_CALLS) return null;
|
|
7409
|
+
inFlight += 1;
|
|
5439
7410
|
let released = false;
|
|
5440
7411
|
return () => {
|
|
5441
7412
|
if (released) return;
|
|
5442
7413
|
released = true;
|
|
5443
|
-
inFlight
|
|
5444
|
-
};
|
|
5445
|
-
}
|
|
5446
|
-
|
|
5447
|
-
//#endregion
|
|
5448
|
-
//#region src/lib/diagnose-response.ts
|
|
5449
|
-
const PREVIEW_LIMIT = 200;
|
|
5450
|
-
async function parseJsonOrDiagnose(response, routePath) {
|
|
5451
|
-
const cloned = response.clone();
|
|
5452
|
-
try {
|
|
5453
|
-
return await response.json();
|
|
5454
|
-
} catch (error) {
|
|
5455
|
-
const contentType = response.headers.get("content-type") ?? "(none)";
|
|
5456
|
-
const bodyText = await cloned.text().catch(() => "(unreadable)");
|
|
5457
|
-
const preview = bodyText.length > PREVIEW_LIMIT ? bodyText.slice(0, PREVIEW_LIMIT) + "...(truncated)" : bodyText;
|
|
5458
|
-
consola.error(`Upstream JSON parse failed at ${routePath}: status=${response.status} content-type="${contentType}" body[0..${PREVIEW_LIMIT}]=${JSON.stringify(preview)}`);
|
|
5459
|
-
throw error;
|
|
5460
|
-
}
|
|
5461
|
-
}
|
|
5462
|
-
|
|
5463
|
-
//#endregion
|
|
5464
|
-
//#region src/lib/response-cap.ts
|
|
5465
|
-
/**
|
|
5466
|
-
* Hard byte cap for non-streaming upstream response bodies.
|
|
5467
|
-
*
|
|
5468
|
-
* Anthropic responses with large tool_use blocks can legitimately reach
|
|
5469
|
-
* several MB, but a multi-GB body is either a buggy upstream or a malicious
|
|
5470
|
-
* one. Buffering it would OOM the proxy and crash all in-flight requests.
|
|
5471
|
-
*
|
|
5472
|
-
* Applies to /v1/messages, /v1/chat/completions, and /v1/responses.
|
|
5473
|
-
*/
|
|
5474
|
-
const MAX_RESPONSE_BODY_BYTES = 10 * 1024 * 1024;
|
|
5475
|
-
/**
|
|
5476
|
-
* Read a Response body with a hard byte cap, then parse as JSON.
|
|
5477
|
-
*
|
|
5478
|
-
* Falls back to the fast path (response.json()) when Content-Length is
|
|
5479
|
-
* present and within the cap, avoiding the streaming-reader overhead for
|
|
5480
|
-
* the vast majority of normal responses.
|
|
5481
|
-
*
|
|
5482
|
-
* When the cap is hit:
|
|
5483
|
-
* - the reader is cancelled to release the upstream socket
|
|
5484
|
-
* - a structured Anthropic-format error is returned to the caller
|
|
5485
|
-
* (the caller wraps it in c.json(), not throws — the client gets a
|
|
5486
|
-
* clean 413 error, not an unhandled-rejection crash)
|
|
5487
|
-
*
|
|
5488
|
-
* Returns `{ ok: true, value }` on success or `{ ok: false, errorResponse, status }`
|
|
5489
|
-
* on cap exceeded.
|
|
5490
|
-
*/
|
|
5491
|
-
async function readResponseBodyCapped(response, routePath, capBytes = MAX_RESPONSE_BODY_BYTES) {
|
|
5492
|
-
const contentLengthHeader = response.headers.get("content-length");
|
|
5493
|
-
const contentLength = contentLengthHeader ? parseInt(contentLengthHeader, 10) : NaN;
|
|
5494
|
-
if (!isNaN(contentLength) && contentLength <= capBytes) return {
|
|
5495
|
-
ok: true,
|
|
5496
|
-
value: await parseJsonOrDiagnose(response, routePath)
|
|
5497
|
-
};
|
|
5498
|
-
const reader = response.body?.getReader();
|
|
5499
|
-
if (!reader) return {
|
|
5500
|
-
ok: true,
|
|
5501
|
-
value: await parseJsonOrDiagnose(response, routePath)
|
|
7414
|
+
inFlight = Math.max(0, inFlight - 1);
|
|
5502
7415
|
};
|
|
5503
|
-
const chunks = [];
|
|
5504
|
-
let totalBytes = 0;
|
|
5505
|
-
let capped = false;
|
|
5506
|
-
try {
|
|
5507
|
-
while (true) {
|
|
5508
|
-
const { done, value } = await reader.read();
|
|
5509
|
-
if (done) break;
|
|
5510
|
-
if (!value) continue;
|
|
5511
|
-
totalBytes += value.byteLength;
|
|
5512
|
-
if (totalBytes > capBytes) {
|
|
5513
|
-
capped = true;
|
|
5514
|
-
try {
|
|
5515
|
-
await reader.cancel("size_cap");
|
|
5516
|
-
} catch {}
|
|
5517
|
-
break;
|
|
5518
|
-
}
|
|
5519
|
-
chunks.push(value);
|
|
5520
|
-
}
|
|
5521
|
-
} catch (err) {
|
|
5522
|
-
if (!capped) consola.warn(`readResponseBodyCapped: read error at ${routePath}:`, err);
|
|
5523
|
-
}
|
|
5524
|
-
if (capped) {
|
|
5525
|
-
consola.warn(`Non-streaming upstream response at ${routePath} exceeded ${capBytes} bytes (10 MiB cap); dropping body to prevent OOM. Check upstream health.`);
|
|
5526
|
-
return {
|
|
5527
|
-
ok: false,
|
|
5528
|
-
status: 502,
|
|
5529
|
-
errorResponse: {
|
|
5530
|
-
type: "error",
|
|
5531
|
-
error: {
|
|
5532
|
-
type: "api_error",
|
|
5533
|
-
message: `Upstream response body exceeded the 10 MiB size cap for non-streaming ${routePath}. The upstream may be misbehaving. Try enabling streaming (stream: true) which handles large responses chunk-by-chunk.`
|
|
5534
|
-
}
|
|
5535
|
-
}
|
|
5536
|
-
};
|
|
5537
|
-
}
|
|
5538
|
-
const merged = new Uint8Array(totalBytes);
|
|
5539
|
-
let offset = 0;
|
|
5540
|
-
for (const chunk of chunks) {
|
|
5541
|
-
merged.set(chunk, offset);
|
|
5542
|
-
offset += chunk.byteLength;
|
|
5543
|
-
}
|
|
5544
|
-
const text = new TextDecoder().decode(merged);
|
|
5545
|
-
try {
|
|
5546
|
-
return {
|
|
5547
|
-
ok: true,
|
|
5548
|
-
value: JSON.parse(text)
|
|
5549
|
-
};
|
|
5550
|
-
} catch (err) {
|
|
5551
|
-
const preview = text.slice(0, 200);
|
|
5552
|
-
const contentType = response.headers.get("content-type") ?? "(none)";
|
|
5553
|
-
consola.error(`Upstream JSON parse failed at ${routePath}: status=${response.status} content-type="${contentType}" body[0..200]=${JSON.stringify(preview)}`);
|
|
5554
|
-
throw err;
|
|
5555
|
-
}
|
|
5556
7416
|
}
|
|
5557
7417
|
|
|
5558
|
-
//#endregion
|
|
5559
|
-
//#region src/services/copilot/create-chat-completions.ts
|
|
5560
|
-
const createChatCompletions = async (payload, modelHeaders, callerSignal) => {
|
|
5561
|
-
if (!state.copilotToken) throw new Error("Copilot token not found");
|
|
5562
|
-
const enableVision = payload.messages.some((x) => typeof x.content !== "string" && x.content?.some((x$1) => x$1.type === "image_url"));
|
|
5563
|
-
const isAgentCall = payload.messages.some((msg) => ["assistant", "tool"].includes(msg.role));
|
|
5564
|
-
const url = `${copilotBaseUrl(state)}/chat/completions`;
|
|
5565
|
-
const doFetch = () => {
|
|
5566
|
-
const fetchInit = {
|
|
5567
|
-
method: "POST",
|
|
5568
|
-
headers: {
|
|
5569
|
-
...copilotHeaders(state, enableVision),
|
|
5570
|
-
...modelHeaders,
|
|
5571
|
-
"X-Initiator": isAgentCall ? "agent" : "user"
|
|
5572
|
-
},
|
|
5573
|
-
body: JSON.stringify(payload)
|
|
5574
|
-
};
|
|
5575
|
-
const signals = [];
|
|
5576
|
-
if (UPSTREAM_FETCH_TIMEOUT_MS > 0) signals.push(AbortSignal.timeout(UPSTREAM_FETCH_TIMEOUT_MS));
|
|
5577
|
-
if (callerSignal) signals.push(callerSignal);
|
|
5578
|
-
if (signals.length === 1) fetchInit.signal = signals[0];
|
|
5579
|
-
else if (signals.length > 1) fetchInit.signal = AbortSignal.any(signals);
|
|
5580
|
-
return fetch(url, fetchInit);
|
|
5581
|
-
};
|
|
5582
|
-
const response = await tryRefreshAndRetry(doFetch, "/chat/completions");
|
|
5583
|
-
if (!response.ok) {
|
|
5584
|
-
let errorBody = "";
|
|
5585
|
-
try {
|
|
5586
|
-
errorBody = await response.text();
|
|
5587
|
-
} catch {
|
|
5588
|
-
errorBody = "(could not read error body)";
|
|
5589
|
-
}
|
|
5590
|
-
const claudeModels = state.models?.data.filter((m) => m.id.startsWith("claude")).map((m) => m.id).join(", ") ?? "(models not loaded)";
|
|
5591
|
-
consola.error(`Copilot rejected model "${payload.model}": ${response.status} ${errorBody} (available Claude models: ${claudeModels})`);
|
|
5592
|
-
throw new HTTPError("Failed to create chat completions", new Response(errorBody, {
|
|
5593
|
-
status: response.status,
|
|
5594
|
-
statusText: response.statusText,
|
|
5595
|
-
headers: response.headers
|
|
5596
|
-
}));
|
|
5597
|
-
}
|
|
5598
|
-
if (payload.stream) return events(response);
|
|
5599
|
-
const cappedResult = await readResponseBodyCapped(response, "/v1/chat/completions", MAX_RESPONSE_BODY_BYTES);
|
|
5600
|
-
if (!cappedResult.ok) throw new HTTPError("Upstream /v1/chat/completions response exceeded 10 MiB size cap", new Response(JSON.stringify(cappedResult.errorResponse), {
|
|
5601
|
-
status: cappedResult.status,
|
|
5602
|
-
headers: { "content-type": "application/json" }
|
|
5603
|
-
}));
|
|
5604
|
-
return cappedResult.value;
|
|
5605
|
-
};
|
|
5606
|
-
|
|
5607
7418
|
//#endregion
|
|
5608
7419
|
//#region src/lib/worker-agent/stream-fn.ts
|
|
5609
7420
|
function createCopilotStreamFn(opts) {
|
|
@@ -6057,56 +7868,6 @@ function isAbortError(err) {
|
|
|
6057
7868
|
return false;
|
|
6058
7869
|
}
|
|
6059
7870
|
|
|
6060
|
-
//#endregion
|
|
6061
|
-
//#region src/lib/mcp-inflight.ts
|
|
6062
|
-
/**
|
|
6063
|
-
* Shared concurrency cap for MCP `tools/call` dispatches.
|
|
6064
|
-
*
|
|
6065
|
-
* Originally lived as a module-private counter inside
|
|
6066
|
-
* `src/routes/mcp/handler.ts`. Extracted because the worker-agent's
|
|
6067
|
-
* `peer_review` and `advisor` tools (which dispatch to peer-model
|
|
6068
|
-
* personas / the advisor responses endpoint from inside a worker
|
|
6069
|
-
* subagent loop) must participate in the same backpressure budget;
|
|
6070
|
-
* otherwise a single worker can fan out unboundedly to peers and
|
|
6071
|
-
* starve the operator's own `tools/list` callers.
|
|
6072
|
-
*
|
|
6073
|
-
* The counter is a single process-wide integer — no per-route
|
|
6074
|
-
* partitioning. Persona calls at the MCP boundary (handler.ts),
|
|
6075
|
-
* peer/advisor calls nested inside a worker (tools.ts), and any
|
|
6076
|
-
* future MCP-adjacent dispatcher all increment the same number.
|
|
6077
|
-
*
|
|
6078
|
-
* Cap = `MAX_INFLIGHT_TOOLS_CALL = 8`. Justification lives at the
|
|
6079
|
-
* historical home (`src/routes/mcp/handler.ts` comment block); do not
|
|
6080
|
-
* change the value without re-reading
|
|
6081
|
-
* `docs/research/peer-mcp-investigation.md` § "Concurrency cap
|
|
6082
|
-
* investigation".
|
|
6083
|
-
*/
|
|
6084
|
-
const MAX_INFLIGHT_TOOLS_CALL = 8;
|
|
6085
|
-
let inFlight = 0;
|
|
6086
|
-
/**
|
|
6087
|
-
* Acquire a slot if one is available. Returns a release function the
|
|
6088
|
-
* caller MUST invoke exactly once (typically from a `finally` block);
|
|
6089
|
-
* returns `null` if the cap is saturated. The release fn is idempotent
|
|
6090
|
-
* — calling it twice is a no-op so callers can release defensively
|
|
6091
|
-
* without worrying about double-decrementing the counter under unusual
|
|
6092
|
-
* unwind paths.
|
|
6093
|
-
*
|
|
6094
|
-
* Synchronous on purpose. Async semaphore acquisition would let callers
|
|
6095
|
-
* queue indefinitely; we want immediate "queue full" feedback so the
|
|
6096
|
-
* MCP client (or the model holding the nested tool call) can choose to
|
|
6097
|
-
* back off or retry.
|
|
6098
|
-
*/
|
|
6099
|
-
function acquireInFlightSlot() {
|
|
6100
|
-
if (inFlight >= MAX_INFLIGHT_TOOLS_CALL) return null;
|
|
6101
|
-
inFlight++;
|
|
6102
|
-
let released = false;
|
|
6103
|
-
return () => {
|
|
6104
|
-
if (released) return;
|
|
6105
|
-
released = true;
|
|
6106
|
-
inFlight--;
|
|
6107
|
-
};
|
|
6108
|
-
}
|
|
6109
|
-
|
|
6110
7871
|
//#endregion
|
|
6111
7872
|
//#region src/lib/tokenizer.ts
|
|
6112
7873
|
const ENCODING_MAP = {
|
|
@@ -6563,6 +8324,47 @@ function workerToolsEnabled() {
|
|
|
6563
8324
|
if (!found) return false;
|
|
6564
8325
|
return found.capabilities?.supports?.tool_calls === true;
|
|
6565
8326
|
}
|
|
8327
|
+
/**
|
|
8328
|
+
* Gate for the compound L2 browser tools (`browser_find`, `browser_act`
|
|
8329
|
+
* in intent mode, `browser_extract`).
|
|
8330
|
+
*
|
|
8331
|
+
* Returns true iff `compressorAvailable()` — i.e. at least one model in
|
|
8332
|
+
* the compressor fallback chain (`gemini-3.5-flash` → `gpt-5.4-mini` →
|
|
8333
|
+
* `claude-haiku-4-5`) is present in the live catalog with `tool_calls`
|
|
8334
|
+
* support. When none are reachable the compound tools are dropped from
|
|
8335
|
+
* `tools/list` AND fail `tools/call` with -32601.
|
|
8336
|
+
*
|
|
8337
|
+
* Note: this gate does NOT additionally re-check the `browser` opt-in.
|
|
8338
|
+
* The `handler.ts` filter chain runs `browser` and `browser_compound`
|
|
8339
|
+
* via separate `capability` tags; the compound tools' entries also
|
|
8340
|
+
* apply at the route level via the existing `--browse` enablement
|
|
8341
|
+
* because they live under the browser MCP surface that the route
|
|
8342
|
+
* only mounts when `state.browseEnabled`.
|
|
8343
|
+
*/
|
|
8344
|
+
function browserCompoundToolsEnabled() {
|
|
8345
|
+
return compressorAvailable();
|
|
8346
|
+
}
|
|
8347
|
+
/**
|
|
8348
|
+
* Gate for the L0/L1 power browser tools (`browser_read_page`,
|
|
8349
|
+
* `browser_mouse`, `browser_drag`, `browser_type`, `browser_keyboard`,
|
|
8350
|
+
* `browser_scroll`, `browser_eval_js`, `browser_diagnostics`,
|
|
8351
|
+
* `browser_find`, `browser_close_tab`, `browser_list_tabs`,
|
|
8352
|
+
* `browser_wait`, `browser_download`).
|
|
8353
|
+
*
|
|
8354
|
+
* Returns true iff `state.powerBrowseEnabled` (set by `--power-browse`
|
|
8355
|
+
* or `GH_ROUTER_ENABLE_POWER_BROWSE=1`). When off, the default
|
|
8356
|
+
* `--browse` surface exposes only the 6 lead-model tools (`act`,
|
|
8357
|
+
* `observe`, `extract`, `navigate`, `screenshot`, `open_tab`) that
|
|
8358
|
+
* hide DOM details behind intent. Power mode adds the raw primitives
|
|
8359
|
+
* for users who want direct coord/keystroke control.
|
|
8360
|
+
*
|
|
8361
|
+
* `handler.ts` filter chain ANDs this with `browserToolsEnabled()`
|
|
8362
|
+
* (defense-in-depth — power without basic is meaningless and the
|
|
8363
|
+
* setup path already forces basic on when power is on).
|
|
8364
|
+
*/
|
|
8365
|
+
function browserPowerToolsEnabled() {
|
|
8366
|
+
return state.powerBrowseEnabled === true;
|
|
8367
|
+
}
|
|
6566
8368
|
|
|
6567
8369
|
//#endregion
|
|
6568
8370
|
//#region src/routes/mcp/handler.ts
|
|
@@ -6739,6 +8541,8 @@ function toolEntries() {
|
|
|
6739
8541
|
if (t.capability === "worker") return workerToolsEnabled();
|
|
6740
8542
|
if (t.capability === "stand_in") return standInToolEnabled();
|
|
6741
8543
|
if (t.capability === "browser") return browserToolsEnabled();
|
|
8544
|
+
if (t.capability === "browser_compound") return browserToolsEnabled() && browserCompoundToolsEnabled();
|
|
8545
|
+
if (t.capability === "browser_power") return browserToolsEnabled() && browserPowerToolsEnabled();
|
|
6742
8546
|
return true;
|
|
6743
8547
|
}).map((t) => ({
|
|
6744
8548
|
name: t.toolNameHttp,
|
|
@@ -7030,6 +8834,8 @@ async function handleToolsCall(body) {
|
|
|
7030
8834
|
if (nonPersonaTool && nonPersonaTool.capability === "worker" && !workerToolsEnabled()) return rpcError(body.id, RPC_METHOD_NOT_FOUND, `tools/call: unknown tool "${name$1}"`);
|
|
7031
8835
|
if (nonPersonaTool && nonPersonaTool.capability === "stand_in" && !standInToolEnabled()) return rpcError(body.id, RPC_METHOD_NOT_FOUND, `tools/call: unknown tool "${name$1}"`);
|
|
7032
8836
|
if (nonPersonaTool && nonPersonaTool.capability === "browser" && !browserToolsEnabled()) return rpcError(body.id, RPC_METHOD_NOT_FOUND, `tools/call: unknown tool "${name$1}"`);
|
|
8837
|
+
if (nonPersonaTool && nonPersonaTool.capability === "browser_compound" && !(browserToolsEnabled() && browserCompoundToolsEnabled())) return rpcError(body.id, RPC_METHOD_NOT_FOUND, `tools/call: unknown tool "${name$1}"`);
|
|
8838
|
+
if (nonPersonaTool && nonPersonaTool.capability === "browser_power" && !(browserToolsEnabled() && browserPowerToolsEnabled())) return rpcError(body.id, RPC_METHOD_NOT_FOUND, `tools/call: unknown tool "${name$1}"`);
|
|
7033
8839
|
let personaPrompt;
|
|
7034
8840
|
let personaContext;
|
|
7035
8841
|
let personaEffort;
|
|
@@ -10583,6 +12389,10 @@ function buildPeerAwarenessSnippet(opts) {
|
|
|
10583
12389
|
if (opts.workerToolsAvailable) para2Parts.push("`worker_explore` runs a Gemini-backed read-only worker that returns a summary, using its own context rather than yours; concurrent launches share the `MAX_INFLIGHT_TOOLS_CALL=8` cap with operator traffic.", "`worker_implement` is the same worker with edit/write/bash; `worktree: true` runs it in an isolated git worktree and returns the diff.", "Workers themselves have `code_search` in their toolset.");
|
|
10584
12390
|
para2Parts.push("`web_search` surfaces citable sources for docs, errors, and upstream issues.");
|
|
10585
12391
|
if (opts.standInAvailable) para2Parts.push("`stand_in` provides three-lab consensus for decision tiebreak when the user is unavailable.");
|
|
12392
|
+
if (opts.browseAvailable) {
|
|
12393
|
+
const powerNote = opts.powerBrowseAvailable ? " Power mode is on: the L0/L1 primitives (`browser_mouse`, `browser_drag`, `browser_type`, `browser_keyboard`, `browser_scroll`, `browser_eval_js`, `browser_read_page`, `browser_diagnostics`, `browser_find`) are also available for direct DOM / coordinate control." : "";
|
|
12394
|
+
para2Parts.push(`\`browser_*\` tools (under \`mcp__gh-router-peers__browser_*\`) drive a real Chrome / Edge browser via a local extension. Lead surface: \`browser_act(intent, value?)\` for any click / fill / type / scroll-to (an inner fast model resolves intent), \`browser_observe(intent?)\` for a 2-4 sentence natural-language page description, \`browser_extract(schema, instruction)\` for typed extraction, \`browser_navigate\` / \`browser_open_tab\` / \`browser_screenshot\` for state and visuals. The lead model never sees raw DOM: refs, bboxes, and role/name dumps stay internal.${powerNote}`);
|
|
12395
|
+
}
|
|
10586
12396
|
return [
|
|
10587
12397
|
"## Peer review and advisor",
|
|
10588
12398
|
"",
|
|
@@ -12045,7 +13855,7 @@ function initProxyFromEnv() {
|
|
|
12045
13855
|
//#endregion
|
|
12046
13856
|
//#region package.json
|
|
12047
13857
|
var name = "github-router";
|
|
12048
|
-
var version = "0.3.
|
|
13858
|
+
var version$1 = "0.3.66";
|
|
12049
13859
|
|
|
12050
13860
|
//#endregion
|
|
12051
13861
|
//#region src/lib/approval.ts
|
|
@@ -13716,7 +15526,7 @@ server.use(cors());
|
|
|
13716
15526
|
server.get("/", (c) => c.text("Server running"));
|
|
13717
15527
|
server.get("/version", (c) => c.json({
|
|
13718
15528
|
name,
|
|
13719
|
-
version,
|
|
15529
|
+
version: version$1,
|
|
13720
15530
|
gitSha: process.env.GITHUB_SHA ?? "unknown"
|
|
13721
15531
|
}));
|
|
13722
15532
|
server.on("HEAD", ["/"], (c) => c.body(null, 200));
|
|
@@ -13767,6 +15577,11 @@ async function setupAndServe(options) {
|
|
|
13767
15577
|
state.showToken = options.showToken;
|
|
13768
15578
|
state.extendedBetas = options.extendedBetas;
|
|
13769
15579
|
state.browseEnabled = options.browseEnabled || process.env.GH_ROUTER_ENABLE_BROWSE === "1";
|
|
15580
|
+
state.powerBrowseEnabled = options.powerBrowseEnabled || process.env.GH_ROUTER_ENABLE_POWER_BROWSE === "1";
|
|
15581
|
+
if (state.powerBrowseEnabled) state.browseEnabled = true;
|
|
15582
|
+
if (process.env.GH_ROUTER_BROWSER_NO_HUMANLIKE === "1") state.humanlikeForce = "off";
|
|
15583
|
+
else if (options.humanlikeEnabled || process.env.GH_ROUTER_HUMANLIKE === "1") state.humanlikeForce = "on";
|
|
15584
|
+
else state.humanlikeForce = "auto";
|
|
13770
15585
|
if (process.env.COPILOT_API_URL) state.copilotApiUrl = process.env.COPILOT_API_URL;
|
|
13771
15586
|
await ensurePaths();
|
|
13772
15587
|
await cacheVSCodeVersion();
|
|
@@ -13874,6 +15689,16 @@ const sharedServerArgs = {
|
|
|
13874
15689
|
type: "boolean",
|
|
13875
15690
|
default: false,
|
|
13876
15691
|
description: "Enable the browser-control MCP tools (browser_open_tab, browser_screenshot, browser_click, etc.) on /mcp. Requires Chrome or Edge installed; the bundled extension must be loaded on first tool call (the proxy returns install_required with Web Store URLs + a Load Unpacked fallback path). Off by default; can also be enabled with GH_ROUTER_ENABLE_BROWSE=1."
|
|
15692
|
+
},
|
|
15693
|
+
"power-browse": {
|
|
15694
|
+
type: "boolean",
|
|
15695
|
+
default: false,
|
|
15696
|
+
description: "Expose the full ~18-tool browser MCP surface (raw read_page, mouse / drag / scroll / keyboard / type primitives, eval_js, diagnostics, find, locate). Default --browse exposes only the 6 lead-model tools (act, observe, extract, navigate, screenshot, open_tab) that hide DOM details behind intent. Implies --browse. Off by default; can also be enabled with GH_ROUTER_ENABLE_POWER_BROWSE=1."
|
|
15697
|
+
},
|
|
15698
|
+
humanlike: {
|
|
15699
|
+
type: "boolean",
|
|
15700
|
+
default: false,
|
|
15701
|
+
description: "Force humanlike pacing on ALL browser tool dispatches: Beta-distributed inter-action delays (800-4600 ms), Bezier mouse trajectories with overshoot-and-correct, per-keystroke jitter with word-end pauses, scroll chunking. Use for known anti-bot sites (Cloudflare, Datadome). Off by default (auto mode); GH_ROUTER_HUMANLIKE=1 is the env equivalent. GH_ROUTER_BROWSER_NO_HUMANLIKE=1 hard-disables (wins over --humanlike, for tests)."
|
|
13877
15702
|
}
|
|
13878
15703
|
};
|
|
13879
15704
|
const allowedAccountTypes = new Set([
|
|
@@ -13911,7 +15736,9 @@ function parseSharedArgs(args) {
|
|
|
13911
15736
|
showToken: args["show-token"],
|
|
13912
15737
|
proxyEnv: args["proxy-env"],
|
|
13913
15738
|
extendedBetas: args["extended-betas"],
|
|
13914
|
-
browseEnabled: args.browse
|
|
15739
|
+
browseEnabled: args.browse,
|
|
15740
|
+
powerBrowseEnabled: args["power-browse"],
|
|
15741
|
+
humanlikeEnabled: args.humanlike
|
|
13915
15742
|
};
|
|
13916
15743
|
}
|
|
13917
15744
|
/**
|
|
@@ -14150,7 +15977,9 @@ const claude = defineCommand({
|
|
|
14150
15977
|
codexCli: backend === "cli",
|
|
14151
15978
|
geminiAvailable: geminiAvailable$1,
|
|
14152
15979
|
workerToolsAvailable: workerToolsEnabled(),
|
|
14153
|
-
standInAvailable: standInToolEnabled()
|
|
15980
|
+
standInAvailable: standInToolEnabled(),
|
|
15981
|
+
browseAvailable: state.browseEnabled,
|
|
15982
|
+
powerBrowseAvailable: state.powerBrowseEnabled
|
|
14154
15983
|
});
|
|
14155
15984
|
extraArgs.push("--append-system-prompt", peerSnippet);
|
|
14156
15985
|
try {
|
|
@@ -14247,7 +16076,7 @@ const codex = defineCommand({
|
|
|
14247
16076
|
|
|
14248
16077
|
//#endregion
|
|
14249
16078
|
//#region src/debug.ts
|
|
14250
|
-
async function getPackageVersion() {
|
|
16079
|
+
async function getPackageVersion$1() {
|
|
14251
16080
|
try {
|
|
14252
16081
|
const packageJsonPath = new URL("../package.json", import.meta.url).pathname;
|
|
14253
16082
|
return JSON.parse(await fs.readFile(packageJsonPath)).version;
|
|
@@ -14273,9 +16102,9 @@ async function checkTokenExists() {
|
|
|
14273
16102
|
}
|
|
14274
16103
|
}
|
|
14275
16104
|
async function getDebugInfo() {
|
|
14276
|
-
const [version$
|
|
16105
|
+
const [version$2, tokenExists] = await Promise.all([getPackageVersion$1(), checkTokenExists()]);
|
|
14277
16106
|
return {
|
|
14278
|
-
version: version$
|
|
16107
|
+
version: version$2,
|
|
14279
16108
|
runtime: getRuntimeInfo(),
|
|
14280
16109
|
paths: {
|
|
14281
16110
|
APP_DIR: PATHS.APP_DIR,
|
|
@@ -14597,9 +16426,12 @@ process.on("uncaughtException", (error) => {
|
|
|
14597
16426
|
consola.error("Uncaught exception:", error);
|
|
14598
16427
|
process.exit(1);
|
|
14599
16428
|
});
|
|
16429
|
+
const version = getPackageVersion();
|
|
16430
|
+
if (!process.argv.slice(2).includes("--version")) consola.info(`github-router v${version}`);
|
|
14600
16431
|
await runMain(defineCommand({
|
|
14601
16432
|
meta: {
|
|
14602
16433
|
name: "github-router",
|
|
16434
|
+
version,
|
|
14603
16435
|
description: "A reverse proxy that exposes GitHub Copilot as OpenAI and Anthropic compatible API endpoints."
|
|
14604
16436
|
},
|
|
14605
16437
|
subCommands: {
|