github-router 0.3.52 → 0.3.68
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +6 -5
- package/dist/browser-bridge/index.js +17 -1
- package/dist/browser-ext/background.js +358 -1
- package/dist/browser-ext/manifest.json +2 -1
- package/dist/browser-ext/snapshot-cdp.js +438 -0
- package/dist/browser-ext/snapshot.js +101 -0
- package/dist/main.js +1210 -99
- package/dist/main.js.map +1 -1
- package/package.json +18 -18
package/dist/main.js
CHANGED
|
@@ -45,6 +45,8 @@ const state = {
|
|
|
45
45
|
showToken: false,
|
|
46
46
|
extendedBetas: false,
|
|
47
47
|
browseEnabled: false,
|
|
48
|
+
powerBrowseEnabled: false,
|
|
49
|
+
humanlikeForce: "auto",
|
|
48
50
|
sessionId: randomUUID(),
|
|
49
51
|
machineId: randomBytes(32).toString("hex")
|
|
50
52
|
};
|
|
@@ -929,15 +931,34 @@ async function autoUpdateClaude(latestVersion) {
|
|
|
929
931
|
//#endregion
|
|
930
932
|
//#region src/lib/port.ts
|
|
931
933
|
const DEFAULT_PORT = 8787;
|
|
932
|
-
const DEFAULT_CLAUDE_MODEL_FALLBACKS = [
|
|
934
|
+
const DEFAULT_CLAUDE_MODEL_FALLBACKS = [
|
|
935
|
+
"claude-opus-4-7",
|
|
936
|
+
"claude-opus-4-6",
|
|
937
|
+
"claude-opus-4-5"
|
|
938
|
+
];
|
|
933
939
|
/**
|
|
934
940
|
* Cap-aware default picker for `ANTHROPIC_MODEL` on the implicit-default
|
|
935
941
|
* path. Returns `claude-opus-${family}[1m]` when the live Copilot catalog
|
|
936
|
-
*
|
|
937
|
-
*
|
|
938
|
-
*
|
|
939
|
-
*
|
|
940
|
-
*
|
|
942
|
+
* shows the family is 1M-capable, else the bare `claude-opus-${family}`
|
|
943
|
+
* slug. `family` defaults to `"4.8"` so the no-arg call selects the
|
|
944
|
+
* current default; explicit values like `"4.7"` or `"4.6"` are used to
|
|
945
|
+
* honor the `github-router claude -m <version>` family shorthand.
|
|
946
|
+
*
|
|
947
|
+
* **Dual-signal 1M detection**. The Opus families have evolved different
|
|
948
|
+
* shapes in Copilot's catalog over time:
|
|
949
|
+
* 1. **Sibling-slug signal** — `opus-${family}-1m` (or `opus-${family}-1m-internal`)
|
|
950
|
+
* exists as a separate catalog entry distinct from the base slug.
|
|
951
|
+
* This is how 4.6 and 4.7 ship (`claude-opus-4.6-1m`,
|
|
952
|
+
* `claude-opus-4.7-1m-internal`). Matched by the version-anchored
|
|
953
|
+
* regex below.
|
|
954
|
+
* 2. **Base-slug capability signal** — the catalog entry whose id IS
|
|
955
|
+
* the base `opus-${family}` slug advertises
|
|
956
|
+
* `capabilities.limits.max_context_window_tokens >= 1_000_000`. This
|
|
957
|
+
* is how 4.8 ships — there is no `-1m` sibling; the single
|
|
958
|
+
* `claude-opus-4.8` id is the 1M variant.
|
|
959
|
+
* Either signal flips on the `[1m]` decoration. Both signals together
|
|
960
|
+
* also flip it on (no double-counting). The breadcrumb log names which
|
|
961
|
+
* signal fired so users can spot catalog shape changes.
|
|
941
962
|
*
|
|
942
963
|
* The `[1m]` literal-bracket suffix is Claude Code's local 1M-context
|
|
943
964
|
* unlock — cc-backup `src/utils/context.ts:35-40` matches `/\[1m\]/i`
|
|
@@ -947,14 +968,14 @@ const DEFAULT_CLAUDE_MODEL_FALLBACKS = ["claude-opus-4-6", "claude-opus-4-5"];
|
|
|
947
968
|
* proxy routes the underlying request.
|
|
948
969
|
*
|
|
949
970
|
* Cap-awareness matters because on non-enterprise Copilot tiers there
|
|
950
|
-
* is no
|
|
971
|
+
* is no 1M opus backend; sending `[1m]` there would either 400 at
|
|
951
972
|
* Copilot or (with `resolveModel`'s graceful-degrade) silently
|
|
952
973
|
* downgrade upstream while Claude Code still over-accounts context.
|
|
953
974
|
* This helper detects the catalog state at launch and only opts in
|
|
954
975
|
* when the backend can actually serve 1M.
|
|
955
976
|
*
|
|
956
977
|
* Sonnet/Haiku families are intentionally NOT given `[1m]` defaults
|
|
957
|
-
* because Copilot has no
|
|
978
|
+
* because Copilot has no 1M backend for them (and Anthropic-side
|
|
958
979
|
* `modelSupports1M` doesn't list haiku at all). See
|
|
959
980
|
* `src/lib/server-setup.ts:getClaudeCodeEnvVars` for the
|
|
960
981
|
* `ANTHROPIC_DEFAULT_{SONNET,HAIKU,OPUS}_MODEL` tier defaults.
|
|
@@ -964,18 +985,25 @@ const DEFAULT_CLAUDE_MODEL_FALLBACKS = ["claude-opus-4-6", "claude-opus-4-5"];
|
|
|
964
985
|
* can't tell the difference between "no catalog yet" and "no 1M
|
|
965
986
|
* variant" — defaulting safe-side preserves the pre-change behavior).
|
|
966
987
|
*/
|
|
967
|
-
const DEFAULT_OPUS_FAMILY = "4.
|
|
988
|
+
const DEFAULT_OPUS_FAMILY = "4.8";
|
|
989
|
+
const ONE_M_TOKENS = 1e6;
|
|
968
990
|
function pickClaudeDefault(opusFamily = DEFAULT_OPUS_FAMILY) {
|
|
969
991
|
const dotted = opusFamily.replace(/-/g, ".");
|
|
970
992
|
const bareSlug = `claude-opus-${dotted.replace(/\./g, "-")}`;
|
|
971
993
|
const versionPattern = dotted.replace(/\./g, "[.-]");
|
|
972
994
|
const oneMRegex = new RegExp(`opus-${versionPattern}-1m(?:$|-)`, "i");
|
|
995
|
+
const baseSlugRegex = new RegExp(`^claude-opus-${versionPattern}$`, "i");
|
|
973
996
|
const familyRegex = new RegExp(`opus-${versionPattern}(?:$|[-.])`, "i");
|
|
974
997
|
const models$1 = state.models?.data ?? [];
|
|
975
|
-
const
|
|
998
|
+
const siblingOneM = models$1.some((m) => oneMRegex.test(m.id));
|
|
999
|
+
const baseSlugMaxContext = models$1.reduce((max, m) => baseSlugRegex.test(m.id) ? Math.max(max, m.capabilities?.limits?.max_context_window_tokens ?? 0) : max, 0);
|
|
1000
|
+
const baseSlugOneM = baseSlugMaxContext >= ONE_M_TOKENS;
|
|
1001
|
+
const has1m = siblingOneM || baseSlugOneM;
|
|
976
1002
|
if (opusFamily !== DEFAULT_OPUS_FAMILY && state.models && models$1.length > 0 && !models$1.some((m) => familyRegex.test(m.id))) consola.warn(`Requested Opus family "${dotted}" not found in Copilot catalog; using "${bareSlug}" anyway (resolveModel may not find a backend for it).`);
|
|
977
1003
|
if (has1m) {
|
|
978
|
-
|
|
1004
|
+
const signal = siblingOneM ? baseSlugOneM ? "sibling-slug + base-slug 1M capability" : `sibling slug opus-${dotted}-1m` : `base slug ${bareSlug} (max_context_window_tokens=${baseSlugMaxContext})`;
|
|
1005
|
+
const pinHint = siblingOneM ? ` Pass --model ${bareSlug} to pin 200K.` : ` (No separate 200K variant of ${dotted} exists in the catalog — the bare slug IS the 1M backend.)`;
|
|
1006
|
+
consola.info(`Catalog signals opus-${dotted} is 1M-capable (${signal}); defaulting ANTHROPIC_MODEL to "${bareSlug}[1m]" so Claude Code accounts for 1M context locally. Set CLAUDE_CODE_DISABLE_1M_CONTEXT=1 to opt out (HIPAA).${pinHint}`);
|
|
979
1007
|
return `${bareSlug}[1m]`;
|
|
980
1008
|
}
|
|
981
1009
|
return bareSlug;
|
|
@@ -3076,6 +3104,58 @@ function installRequiredToolResult(payload) {
|
|
|
3076
3104
|
};
|
|
3077
3105
|
}
|
|
3078
3106
|
|
|
3107
|
+
//#endregion
|
|
3108
|
+
//#region src/lib/browser-mcp/humanlike.ts
|
|
3109
|
+
/**
|
|
3110
|
+
* Sample from a Beta(2, 5) distribution scaled to [minMs, maxMs].
|
|
3111
|
+
* The Beta(2, 5) shape has its mode near 0.2 of the range — humans
|
|
3112
|
+
* follow most actions quickly, with an occasional long pause. We do
|
|
3113
|
+
* NOT use uniform random because that would produce robotically-
|
|
3114
|
+
* even spacing detectable by behavioral analysis.
|
|
3115
|
+
*
|
|
3116
|
+
* Implementation: two gamma-distributed samples via the Marsaglia /
|
|
3117
|
+
* Tsang squeeze method (Box-Muller-style sufficiency for shape ≥ 2).
|
|
3118
|
+
*/
|
|
3119
|
+
function betaDelay(minMs, maxMs) {
|
|
3120
|
+
const a = gammaSample(2);
|
|
3121
|
+
const beta = a / (a + gammaSample(5));
|
|
3122
|
+
return Math.round(minMs + beta * (maxMs - minMs));
|
|
3123
|
+
}
|
|
3124
|
+
function gammaSample(shape) {
|
|
3125
|
+
const d = shape - 1 / 3;
|
|
3126
|
+
const c = 1 / Math.sqrt(9 * d);
|
|
3127
|
+
while (true) {
|
|
3128
|
+
let x, v;
|
|
3129
|
+
do {
|
|
3130
|
+
x = normalSample();
|
|
3131
|
+
v = 1 + c * x;
|
|
3132
|
+
} while (v <= 0);
|
|
3133
|
+
v = v * v * v;
|
|
3134
|
+
const u = Math.random();
|
|
3135
|
+
if (u < 1 - .0331 * x * x * x * x) return d * v;
|
|
3136
|
+
if (Math.log(u) < .5 * x * x + d * (1 - v + Math.log(v))) return d * v;
|
|
3137
|
+
}
|
|
3138
|
+
}
|
|
3139
|
+
function normalSample() {
|
|
3140
|
+
let u = 0, v = 0;
|
|
3141
|
+
while (u === 0) u = Math.random();
|
|
3142
|
+
while (v === 0) v = Math.random();
|
|
3143
|
+
return Math.sqrt(-2 * Math.log(u)) * Math.cos(2 * Math.PI * v);
|
|
3144
|
+
}
|
|
3145
|
+
/**
|
|
3146
|
+
* Inter-action delay when paced mode is on. Returns a Beta-shaped
|
|
3147
|
+
* randomized delay in [800, 4600] ms with a base of 600 ms so the
|
|
3148
|
+
* minimum is never "too fast." Humans take 800-2800 ms between
|
|
3149
|
+
* UI actions on average, with a tail of long pauses; this matches.
|
|
3150
|
+
*
|
|
3151
|
+
* Caller is expected to subtract the time already burned in the
|
|
3152
|
+
* compound pipeline (snapshot fetch + matcher cascade) so the user-
|
|
3153
|
+
* perceived delay isn't doubled.
|
|
3154
|
+
*/
|
|
3155
|
+
function interActionDelay() {
|
|
3156
|
+
return betaDelay(800, 4600);
|
|
3157
|
+
}
|
|
3158
|
+
|
|
3079
3159
|
//#endregion
|
|
3080
3160
|
//#region src/lib/browser-mcp/policy.ts
|
|
3081
3161
|
const BLOCKED_URL_RE = /^(chrome|edge|brave|opera|vivaldi):\/\/(settings|preferences|extensions|policy|management|password|flags|flag-descriptions)/i;
|
|
@@ -3110,6 +3190,78 @@ function preflightUrlPolicy(toolName, args) {
|
|
|
3110
3190
|
|
|
3111
3191
|
//#endregion
|
|
3112
3192
|
//#region src/lib/browser-mcp/dispatch.ts
|
|
3193
|
+
/**
|
|
3194
|
+
* Tools whose dispatch counts as a mutating user action for pacing
|
|
3195
|
+
* purposes. Read-only tools (list_tabs, screenshot, read_page,
|
|
3196
|
+
* diagnostics, navigate-without-form-submit) skip the inter-action
|
|
3197
|
+
* delay because they don't look like a human clicking around.
|
|
3198
|
+
*/
|
|
3199
|
+
const PACED_TOOLS = new Set([
|
|
3200
|
+
"browser_click",
|
|
3201
|
+
"browser_fill",
|
|
3202
|
+
"browser_type",
|
|
3203
|
+
"browser_keyboard",
|
|
3204
|
+
"browser_scroll",
|
|
3205
|
+
"browser_mouse",
|
|
3206
|
+
"browser_drag"
|
|
3207
|
+
]);
|
|
3208
|
+
let lastDispatchAt = 0;
|
|
3209
|
+
let humanlikeAutoCache = {
|
|
3210
|
+
fetchedAt: 0,
|
|
3211
|
+
tabs: /* @__PURE__ */ new Set()
|
|
3212
|
+
};
|
|
3213
|
+
const HUMANLIKE_PROBE_INTERVAL_MS = 5e3;
|
|
3214
|
+
async function isHumanlikeAutoOn(tabId, signal) {
|
|
3215
|
+
if (state.humanlikeForce === "off") return false;
|
|
3216
|
+
if (typeof tabId !== "number") return false;
|
|
3217
|
+
const now = Date.now();
|
|
3218
|
+
if (now - humanlikeAutoCache.fetchedAt > HUMANLIKE_PROBE_INTERVAL_MS) try {
|
|
3219
|
+
const ready = await ensureBridgeReady();
|
|
3220
|
+
if (ready.install_required) return false;
|
|
3221
|
+
const res = await fetch(`http://127.0.0.1:${ready.port}/health`, {
|
|
3222
|
+
headers: { authorization: `Bearer ${ready.token}` },
|
|
3223
|
+
signal
|
|
3224
|
+
});
|
|
3225
|
+
if (res.ok) {
|
|
3226
|
+
const body = await res.json();
|
|
3227
|
+
const tabs = /* @__PURE__ */ new Set();
|
|
3228
|
+
for (const t of body.humanlike_tabs ?? []) if (typeof t.tabId === "number") tabs.add(t.tabId);
|
|
3229
|
+
humanlikeAutoCache = {
|
|
3230
|
+
fetchedAt: now,
|
|
3231
|
+
tabs
|
|
3232
|
+
};
|
|
3233
|
+
}
|
|
3234
|
+
} catch {}
|
|
3235
|
+
return humanlikeAutoCache.tabs.has(tabId);
|
|
3236
|
+
}
|
|
3237
|
+
async function maybeInjectHumanlikeDelay(tool, signal, tabId) {
|
|
3238
|
+
if (!PACED_TOOLS.has(tool)) return;
|
|
3239
|
+
let on = state.humanlikeForce === "on";
|
|
3240
|
+
if (!on && state.humanlikeForce === "auto") on = await isHumanlikeAutoOn(tabId, signal);
|
|
3241
|
+
if (!on) return;
|
|
3242
|
+
const target = interActionDelay();
|
|
3243
|
+
const sinceLast = Date.now() - lastDispatchAt;
|
|
3244
|
+
const wait = Math.max(0, target - sinceLast);
|
|
3245
|
+
if (wait > 0) await sleepAbortable(wait, signal);
|
|
3246
|
+
lastDispatchAt = Date.now();
|
|
3247
|
+
}
|
|
3248
|
+
function sleepAbortable(ms, signal) {
|
|
3249
|
+
return new Promise((resolve, reject) => {
|
|
3250
|
+
if (signal?.aborted) {
|
|
3251
|
+
reject(/* @__PURE__ */ new Error("aborted"));
|
|
3252
|
+
return;
|
|
3253
|
+
}
|
|
3254
|
+
const timer = setTimeout(() => {
|
|
3255
|
+
if (signal) signal.removeEventListener("abort", onAbort);
|
|
3256
|
+
resolve();
|
|
3257
|
+
}, ms);
|
|
3258
|
+
const onAbort = () => {
|
|
3259
|
+
clearTimeout(timer);
|
|
3260
|
+
reject(/* @__PURE__ */ new Error("aborted"));
|
|
3261
|
+
};
|
|
3262
|
+
if (signal) signal.addEventListener("abort", onAbort, { once: true });
|
|
3263
|
+
});
|
|
3264
|
+
}
|
|
3113
3265
|
const PER_TOOL_TIMEOUTS = {
|
|
3114
3266
|
browser_list_tabs: {
|
|
3115
3267
|
defaultMs: 5e3,
|
|
@@ -3275,6 +3427,7 @@ async function dispatchBrowserTool(tool, args, signal, opts = {}) {
|
|
|
3275
3427
|
};
|
|
3276
3428
|
const ready = await ensureBridgeReady();
|
|
3277
3429
|
if (ready.install_required) return installRequiredToolResult(ready);
|
|
3430
|
+
await maybeInjectHumanlikeDelay(tool, signal, typeof args.tabId === "number" ? args.tabId : void 0);
|
|
3278
3431
|
const { defaultMs, maxMs } = pickTimeout(tool);
|
|
3279
3432
|
const callerTimeout = typeof opts.timeoutMs === "number" && opts.timeoutMs > 0 ? Math.min(opts.timeoutMs, maxMs) : defaultMs;
|
|
3280
3433
|
try {
|
|
@@ -3355,6 +3508,517 @@ function logAudit$1(record) {
|
|
|
3355
3508
|
})();
|
|
3356
3509
|
}
|
|
3357
3510
|
|
|
3511
|
+
//#endregion
|
|
3512
|
+
//#region src/lib/browser-mcp/matcher.ts
|
|
3513
|
+
/**
|
|
3514
|
+
* Resolve an intent to an action. Synchronous, no I/O, <5ms expected
|
|
3515
|
+
* on a 200-element snapshot.
|
|
3516
|
+
*
|
|
3517
|
+
* Returns `{source: "escalate"}` when no layer produced a single
|
|
3518
|
+
* confident candidate. Caller is expected to invoke the fast-model
|
|
3519
|
+
* fallback path with the returned `candidates` shortlist (smaller
|
|
3520
|
+
* than the full snapshot, so fast-model token cost drops 3-5×).
|
|
3521
|
+
*/
|
|
3522
|
+
function deterministicResolve(snapshot, parsed, value) {
|
|
3523
|
+
const v = value ?? parsed.valueFromIntent;
|
|
3524
|
+
const allCandidates = [];
|
|
3525
|
+
for (const layer of LAYERS) {
|
|
3526
|
+
const found = layer.run(snapshot, parsed, v);
|
|
3527
|
+
if (found.length === 0) continue;
|
|
3528
|
+
allCandidates.push(...found);
|
|
3529
|
+
const winners = applyTieBreakers(found, parsed);
|
|
3530
|
+
const top = winners[0];
|
|
3531
|
+
if (!top) continue;
|
|
3532
|
+
const runnerUp = winners[1];
|
|
3533
|
+
if (top.score >= layer.floor && (!runnerUp || top.score - runnerUp.score >= .15)) {
|
|
3534
|
+
const action = inferActionLocal(top.el.role, parsed, v);
|
|
3535
|
+
return {
|
|
3536
|
+
ref: top.el.ref,
|
|
3537
|
+
action,
|
|
3538
|
+
...needsValue(action) && v !== void 0 ? { value: v } : {},
|
|
3539
|
+
confidence: top.score,
|
|
3540
|
+
source: layer.name,
|
|
3541
|
+
reason: top.reason
|
|
3542
|
+
};
|
|
3543
|
+
}
|
|
3544
|
+
}
|
|
3545
|
+
const shortlist = dedupeAndRank(allCandidates).slice(0, 8);
|
|
3546
|
+
return {
|
|
3547
|
+
ref: "",
|
|
3548
|
+
action: parsed.verb ?? "click",
|
|
3549
|
+
...v !== void 0 ? { value: v } : {},
|
|
3550
|
+
confidence: 0,
|
|
3551
|
+
source: "escalate",
|
|
3552
|
+
reason: shortlist.length === 0 ? "no candidates from any cascade layer" : `${shortlist.length} ambiguous candidates`,
|
|
3553
|
+
candidates: shortlist.map((c) => ({
|
|
3554
|
+
ref: c.el.ref,
|
|
3555
|
+
score: c.score,
|
|
3556
|
+
layer: c.layer
|
|
3557
|
+
}))
|
|
3558
|
+
};
|
|
3559
|
+
}
|
|
3560
|
+
function applyTieBreakers(cands, parsed) {
|
|
3561
|
+
const verb = parsed.verb ?? "click";
|
|
3562
|
+
const dropDisabled = verb === "click" || verb === "fill" || verb === "type" || verb === "select";
|
|
3563
|
+
return cands.filter((c) => {
|
|
3564
|
+
if (c.el.hidden) return false;
|
|
3565
|
+
if (c.el.bbox && (c.el.bbox[2] < 4 || c.el.bbox[3] < 4)) return false;
|
|
3566
|
+
if (dropDisabled && c.el.disabled) return false;
|
|
3567
|
+
return true;
|
|
3568
|
+
}).map((c) => ({
|
|
3569
|
+
...c,
|
|
3570
|
+
score: c.score * weight(c, verb)
|
|
3571
|
+
})).sort((a, b) => b.score - a.score);
|
|
3572
|
+
}
|
|
3573
|
+
function weight(c, verb) {
|
|
3574
|
+
let w = 1;
|
|
3575
|
+
const bbox = c.el.bbox;
|
|
3576
|
+
if (bbox) {
|
|
3577
|
+
if (!(bbox[0] >= 0 && bbox[1] >= 0)) w *= .92;
|
|
3578
|
+
}
|
|
3579
|
+
if (c.el.isInIframe) w *= .95;
|
|
3580
|
+
if (verb === "click") {
|
|
3581
|
+
const r = (c.el.role || "").toLowerCase();
|
|
3582
|
+
if (r === "button") w *= 1;
|
|
3583
|
+
else if (r === "link" || r === "a") w *= .98;
|
|
3584
|
+
else if (r === "menuitem") w *= .96;
|
|
3585
|
+
else if (r === "generic" || r === "div" || r === "span") w *= .9;
|
|
3586
|
+
}
|
|
3587
|
+
return Math.min(1, w);
|
|
3588
|
+
}
|
|
3589
|
+
function dedupeAndRank(cands) {
|
|
3590
|
+
const byRef = /* @__PURE__ */ new Map();
|
|
3591
|
+
for (const c of cands) {
|
|
3592
|
+
const existing = byRef.get(c.el.ref);
|
|
3593
|
+
if (!existing || existing.score < c.score) byRef.set(c.el.ref, c);
|
|
3594
|
+
}
|
|
3595
|
+
return [...byRef.values()].sort((a, b) => b.score - a.score);
|
|
3596
|
+
}
|
|
3597
|
+
function inferActionLocal(role, parsed, value) {
|
|
3598
|
+
if (parsed.verb === "scroll_into_view") return "scroll_into_view";
|
|
3599
|
+
const intentLower = parsed.rawTarget.toLowerCase();
|
|
3600
|
+
if (/\bscroll\b/.test(intentLower)) return "scroll_into_view";
|
|
3601
|
+
const r = (role || "").toLowerCase();
|
|
3602
|
+
if (r === "select" || r === "combobox") return "select";
|
|
3603
|
+
if (r === "textarea" || r === "input" || r === "textbox" || r === "searchbox" || r === "spinbutton") {
|
|
3604
|
+
if (parsed.verb === "type") return "type";
|
|
3605
|
+
if (parsed.verb === "fill") return "fill";
|
|
3606
|
+
return value !== void 0 ? "fill" : "click";
|
|
3607
|
+
}
|
|
3608
|
+
return parsed.verb ?? "click";
|
|
3609
|
+
}
|
|
3610
|
+
function needsValue(action) {
|
|
3611
|
+
return action === "fill" || action === "type" || action === "select";
|
|
3612
|
+
}
|
|
3613
|
+
function nameOf(el) {
|
|
3614
|
+
return (el.name ?? "").trim();
|
|
3615
|
+
}
|
|
3616
|
+
function nameLowerOf(el) {
|
|
3617
|
+
return nameOf(el).toLowerCase();
|
|
3618
|
+
}
|
|
3619
|
+
function isClickableRole(role) {
|
|
3620
|
+
const r = role.toLowerCase();
|
|
3621
|
+
return r === "button" || r === "link" || r === "a" || r === "menuitem" || r === "tab" || r === "checkbox" || r === "radio" || r === "switch" || r === "option" || r === "treeitem";
|
|
3622
|
+
}
|
|
3623
|
+
function isInputRole(role) {
|
|
3624
|
+
const r = role.toLowerCase();
|
|
3625
|
+
return r === "textbox" || r === "input" || r === "textarea" || r === "searchbox" || r === "spinbutton" || r === "combobox" || r === "select" || r === "checkbox" || r === "radio";
|
|
3626
|
+
}
|
|
3627
|
+
function verbCompatible(role, verb) {
|
|
3628
|
+
if (!verb || verb === "click") return isClickableRole(role) || isInputRole(role);
|
|
3629
|
+
if (verb === "fill" || verb === "type" || verb === "select") return isInputRole(role);
|
|
3630
|
+
return true;
|
|
3631
|
+
}
|
|
3632
|
+
function wholeWordContains(haystack, needle) {
|
|
3633
|
+
if (!haystack || !needle) return false;
|
|
3634
|
+
return new RegExp(`\\b${needle.replace(/[.*+?^${}()|[\]\\]/g, "\\$&")}\\b`, "i").test(haystack);
|
|
3635
|
+
}
|
|
3636
|
+
const L0 = {
|
|
3637
|
+
name: "L0",
|
|
3638
|
+
floor: .95,
|
|
3639
|
+
run: (snapshot, parsed) => {
|
|
3640
|
+
const target = parsed.quotedName ?? parsed.normTarget;
|
|
3641
|
+
if (!target) return [];
|
|
3642
|
+
const out = [];
|
|
3643
|
+
for (const el of snapshot.elements) {
|
|
3644
|
+
if (!verbCompatible(el.role, parsed.verb)) continue;
|
|
3645
|
+
const nm = nameLowerOf(el);
|
|
3646
|
+
if (!nm) continue;
|
|
3647
|
+
if (nm === target.toLowerCase()) out.push({
|
|
3648
|
+
el,
|
|
3649
|
+
score: 1,
|
|
3650
|
+
layer: "L0",
|
|
3651
|
+
reason: `L0 exact name "${el.name}"`
|
|
3652
|
+
});
|
|
3653
|
+
}
|
|
3654
|
+
return out;
|
|
3655
|
+
}
|
|
3656
|
+
};
|
|
3657
|
+
const L1 = {
|
|
3658
|
+
name: "L1",
|
|
3659
|
+
floor: .9,
|
|
3660
|
+
run: (snapshot, parsed) => {
|
|
3661
|
+
if (parsed.verb && parsed.verb !== "fill" && parsed.verb !== "type" && parsed.verb !== "select") return [];
|
|
3662
|
+
const target = parsed.fieldHint ?? parsed.normTarget;
|
|
3663
|
+
if (!target) return [];
|
|
3664
|
+
const tgt = target.toLowerCase();
|
|
3665
|
+
const out = [];
|
|
3666
|
+
for (const el of snapshot.elements) {
|
|
3667
|
+
if (!isInputRole(el.role)) continue;
|
|
3668
|
+
const nm = nameLowerOf(el);
|
|
3669
|
+
if (nm === tgt || nm === `${tgt} *` || nm === `${tgt} (required)` || nm.endsWith(tgt) && /^[\s*()required:_-]+/.test(nm.slice(0, nm.length - tgt.length))) out.push({
|
|
3670
|
+
el,
|
|
3671
|
+
score: .95,
|
|
3672
|
+
layer: "L1",
|
|
3673
|
+
reason: `L1 label "${el.name}"`
|
|
3674
|
+
});
|
|
3675
|
+
}
|
|
3676
|
+
return out;
|
|
3677
|
+
}
|
|
3678
|
+
};
|
|
3679
|
+
const L2 = {
|
|
3680
|
+
name: "L2",
|
|
3681
|
+
floor: .7,
|
|
3682
|
+
run: (snapshot, parsed) => {
|
|
3683
|
+
const target = parsed.fieldHint ?? parsed.normTarget;
|
|
3684
|
+
if (!target) return [];
|
|
3685
|
+
const tgt = target.toLowerCase();
|
|
3686
|
+
const out = [];
|
|
3687
|
+
for (const el of snapshot.elements) {
|
|
3688
|
+
if (!isInputRole(el.role)) continue;
|
|
3689
|
+
const ph = (el.placeholder ?? "").toLowerCase();
|
|
3690
|
+
if (!ph) continue;
|
|
3691
|
+
if (ph === tgt) out.push({
|
|
3692
|
+
el,
|
|
3693
|
+
score: .85,
|
|
3694
|
+
layer: "L2",
|
|
3695
|
+
reason: `L2 placeholder exact "${el.placeholder}"`
|
|
3696
|
+
});
|
|
3697
|
+
else if (wholeWordContains(ph, tgt)) out.push({
|
|
3698
|
+
el,
|
|
3699
|
+
score: .75,
|
|
3700
|
+
layer: "L2",
|
|
3701
|
+
reason: `L2 placeholder contains "${tgt}"`
|
|
3702
|
+
});
|
|
3703
|
+
}
|
|
3704
|
+
return out;
|
|
3705
|
+
}
|
|
3706
|
+
};
|
|
3707
|
+
const L3 = {
|
|
3708
|
+
name: "L3",
|
|
3709
|
+
floor: .65,
|
|
3710
|
+
run: (snapshot, parsed) => {
|
|
3711
|
+
const target = parsed.normTarget;
|
|
3712
|
+
if (!target) return [];
|
|
3713
|
+
const out = [];
|
|
3714
|
+
for (const el of snapshot.elements) {
|
|
3715
|
+
if (!verbCompatible(el.role, parsed.verb)) continue;
|
|
3716
|
+
const nm = nameOf(el);
|
|
3717
|
+
if (!nm) continue;
|
|
3718
|
+
if (!wholeWordContains(nm, target)) continue;
|
|
3719
|
+
const score = target.length / nm.length >= .8 ? .72 : .68;
|
|
3720
|
+
out.push({
|
|
3721
|
+
el,
|
|
3722
|
+
score,
|
|
3723
|
+
layer: "L3",
|
|
3724
|
+
reason: `L3 fuzzy name "${nm}"`
|
|
3725
|
+
});
|
|
3726
|
+
}
|
|
3727
|
+
return out;
|
|
3728
|
+
}
|
|
3729
|
+
};
|
|
3730
|
+
const L4 = {
|
|
3731
|
+
name: "L4",
|
|
3732
|
+
floor: .6,
|
|
3733
|
+
run: (snapshot, parsed) => {
|
|
3734
|
+
const target = parsed.normTarget;
|
|
3735
|
+
if (!target) return [];
|
|
3736
|
+
const out = [];
|
|
3737
|
+
for (const el of snapshot.elements) {
|
|
3738
|
+
if (!isClickableRole(el.role)) continue;
|
|
3739
|
+
const text = (el.value ?? "").toLowerCase().trim();
|
|
3740
|
+
if (!text) continue;
|
|
3741
|
+
const tgt = target.toLowerCase();
|
|
3742
|
+
if (text === tgt) out.push({
|
|
3743
|
+
el,
|
|
3744
|
+
score: .65,
|
|
3745
|
+
layer: "L4",
|
|
3746
|
+
reason: `L4 text exact "${el.value}"`
|
|
3747
|
+
});
|
|
3748
|
+
else if (wholeWordContains(text, tgt)) out.push({
|
|
3749
|
+
el,
|
|
3750
|
+
score: .6,
|
|
3751
|
+
layer: "L4",
|
|
3752
|
+
reason: `L4 text contains "${tgt}"`
|
|
3753
|
+
});
|
|
3754
|
+
}
|
|
3755
|
+
return out;
|
|
3756
|
+
}
|
|
3757
|
+
};
|
|
3758
|
+
const L5 = {
|
|
3759
|
+
name: "L5",
|
|
3760
|
+
floor: .85,
|
|
3761
|
+
run: (snapshot, parsed) => {
|
|
3762
|
+
const target = parsed.normTarget;
|
|
3763
|
+
if (!target) return [];
|
|
3764
|
+
if (!/^[a-z][a-z0-9_-]{2,}$/i.test(target)) return [];
|
|
3765
|
+
const norm = target.toLowerCase().replace(/[-_]/g, "");
|
|
3766
|
+
const out = [];
|
|
3767
|
+
for (const el of snapshot.elements) {
|
|
3768
|
+
const attrs = el.attrs;
|
|
3769
|
+
if (!attrs) continue;
|
|
3770
|
+
if (attrs.testid && stripSep(attrs.testid).toLowerCase() === norm) {
|
|
3771
|
+
out.push({
|
|
3772
|
+
el,
|
|
3773
|
+
score: .9,
|
|
3774
|
+
layer: "L5",
|
|
3775
|
+
reason: `L5 testid="${attrs.testid}"`
|
|
3776
|
+
});
|
|
3777
|
+
continue;
|
|
3778
|
+
}
|
|
3779
|
+
if (attrs.id && stripSep(attrs.id).toLowerCase() === norm) {
|
|
3780
|
+
out.push({
|
|
3781
|
+
el,
|
|
3782
|
+
score: .88,
|
|
3783
|
+
layer: "L5",
|
|
3784
|
+
reason: `L5 id="${attrs.id}"`
|
|
3785
|
+
});
|
|
3786
|
+
continue;
|
|
3787
|
+
}
|
|
3788
|
+
if (attrs.name_attr && stripSep(attrs.name_attr).toLowerCase() === norm) {
|
|
3789
|
+
out.push({
|
|
3790
|
+
el,
|
|
3791
|
+
score: .86,
|
|
3792
|
+
layer: "L5",
|
|
3793
|
+
reason: `L5 name="${attrs.name_attr}"`
|
|
3794
|
+
});
|
|
3795
|
+
continue;
|
|
3796
|
+
}
|
|
3797
|
+
if (attrs.aria_label && stripSep(attrs.aria_label).toLowerCase() === norm) out.push({
|
|
3798
|
+
el,
|
|
3799
|
+
score: .86,
|
|
3800
|
+
layer: "L5",
|
|
3801
|
+
reason: `L5 aria-label="${attrs.aria_label}"`
|
|
3802
|
+
});
|
|
3803
|
+
}
|
|
3804
|
+
return out;
|
|
3805
|
+
}
|
|
3806
|
+
};
|
|
3807
|
+
function stripSep(s) {
|
|
3808
|
+
return s.replace(/[-_\s]/g, "");
|
|
3809
|
+
}
|
|
3810
|
+
const LAYERS = [
|
|
3811
|
+
L0,
|
|
3812
|
+
L1,
|
|
3813
|
+
L2,
|
|
3814
|
+
L3,
|
|
3815
|
+
L4,
|
|
3816
|
+
L5,
|
|
3817
|
+
{
|
|
3818
|
+
name: "L6",
|
|
3819
|
+
floor: .75,
|
|
3820
|
+
run: (snapshot, parsed) => {
|
|
3821
|
+
if (!parsed.ordinal) return [];
|
|
3822
|
+
const { n, kind } = parsed.ordinal;
|
|
3823
|
+
const candidates = snapshot.elements.filter((el) => {
|
|
3824
|
+
if (!kind) return true;
|
|
3825
|
+
const role = el.role.toLowerCase();
|
|
3826
|
+
return role === kind || role === `${kind}s` || (el.tag ?? "").toLowerCase() === kind;
|
|
3827
|
+
});
|
|
3828
|
+
if (candidates.length < Math.abs(n)) return [];
|
|
3829
|
+
const sorted = [...candidates].sort((a, b) => {
|
|
3830
|
+
const ay = Math.floor(a.bbox[1] / 24);
|
|
3831
|
+
const by = Math.floor(b.bbox[1] / 24);
|
|
3832
|
+
if (ay !== by) return ay - by;
|
|
3833
|
+
return a.bbox[0] - b.bbox[0];
|
|
3834
|
+
});
|
|
3835
|
+
const idx = n === -1 ? sorted.length - 1 : n - 1;
|
|
3836
|
+
if (idx < 0 || idx >= sorted.length) return [];
|
|
3837
|
+
return [{
|
|
3838
|
+
el: sorted[idx],
|
|
3839
|
+
score: .8,
|
|
3840
|
+
layer: "L6",
|
|
3841
|
+
reason: `L6 ordinal pick #${n} of ${sorted.length} ${kind ?? "elements"}`
|
|
3842
|
+
}];
|
|
3843
|
+
}
|
|
3844
|
+
},
|
|
3845
|
+
{
|
|
3846
|
+
name: "L7",
|
|
3847
|
+
floor: .5,
|
|
3848
|
+
run: (snapshot, parsed) => {
|
|
3849
|
+
const hint = parsed.fieldHint ?? parsed.normTarget;
|
|
3850
|
+
if (!hint) return [];
|
|
3851
|
+
const h = hint.toLowerCase();
|
|
3852
|
+
const out = [];
|
|
3853
|
+
const inputRolePred = (el) => isInputRole(el.role);
|
|
3854
|
+
if (h === "email") {
|
|
3855
|
+
for (const el of snapshot.elements) if (el.inputType === "email" || inputRolePred(el) && (wholeWordContains(el.placeholder ?? "", "email") || wholeWordContains(el.name ?? "", "email"))) out.push({
|
|
3856
|
+
el,
|
|
3857
|
+
score: .55,
|
|
3858
|
+
layer: "L7",
|
|
3859
|
+
reason: "L7 email heuristic"
|
|
3860
|
+
});
|
|
3861
|
+
} else if (h === "password") {
|
|
3862
|
+
for (const el of snapshot.elements) if (el.inputType === "password" || inputRolePred(el) && wholeWordContains(el.name ?? "", "password")) out.push({
|
|
3863
|
+
el,
|
|
3864
|
+
score: .55,
|
|
3865
|
+
layer: "L7",
|
|
3866
|
+
reason: "L7 password heuristic"
|
|
3867
|
+
});
|
|
3868
|
+
} else if (h === "search") {
|
|
3869
|
+
for (const el of snapshot.elements) if (el.role === "searchbox" || el.inputType === "search" || inputRolePred(el) && wholeWordContains(el.name ?? "", "search")) out.push({
|
|
3870
|
+
el,
|
|
3871
|
+
score: .55,
|
|
3872
|
+
layer: "L7",
|
|
3873
|
+
reason: "L7 search heuristic"
|
|
3874
|
+
});
|
|
3875
|
+
} else if (h === "phone" || h === "tel") {
|
|
3876
|
+
for (const el of snapshot.elements) if (el.inputType === "tel" || inputRolePred(el) && wholeWordContains(el.name ?? "", "phone")) out.push({
|
|
3877
|
+
el,
|
|
3878
|
+
score: .55,
|
|
3879
|
+
layer: "L7",
|
|
3880
|
+
reason: "L7 phone heuristic"
|
|
3881
|
+
});
|
|
3882
|
+
} else if (h === "submit" || h === "sign in" || h === "signin" || h === "log in" || h === "login") {
|
|
3883
|
+
const sumRe = /^(submit|send|continue|next|save|sign[\s-]?in|sign[\s-]?up|log[\s-]?in)$/i;
|
|
3884
|
+
for (const el of snapshot.elements) if (el.role === "button" && sumRe.test(el.name ?? "")) out.push({
|
|
3885
|
+
el,
|
|
3886
|
+
score: .55,
|
|
3887
|
+
layer: "L7",
|
|
3888
|
+
reason: "L7 submit heuristic"
|
|
3889
|
+
});
|
|
3890
|
+
} else if (h === "username" || h === "user") {
|
|
3891
|
+
for (const el of snapshot.elements) if (inputRolePred(el) && (wholeWordContains(el.name ?? "", "user") || wholeWordContains(el.name ?? "", "login") || wholeWordContains(el.name ?? "", "account"))) out.push({
|
|
3892
|
+
el,
|
|
3893
|
+
score: .55,
|
|
3894
|
+
layer: "L7",
|
|
3895
|
+
reason: "L7 username heuristic"
|
|
3896
|
+
});
|
|
3897
|
+
}
|
|
3898
|
+
return out;
|
|
3899
|
+
}
|
|
3900
|
+
}
|
|
3901
|
+
];
|
|
3902
|
+
|
|
3903
|
+
//#endregion
|
|
3904
|
+
//#region src/lib/browser-mcp/parse-intent.ts
|
|
3905
|
+
const VERB_RE = /^\s*(click|press|tap|fill|enter|type|select|choose|scroll(?:[ -]?into[ -]?view)?|toggle|check|uncheck|open|focus|hover)\s+/i;
|
|
3906
|
+
const VALUE_RE = /\s+(?:with|to|=)\s+(.+?)\s*$/i;
|
|
3907
|
+
const QUOTED_RE = /["'`]([^"'`]+)["'`]/;
|
|
3908
|
+
const TITLE_CASE_RE = /\b([A-Z][\w]*(?:\s+[A-Z\d][\w]*){0,3})\b/;
|
|
3909
|
+
const ORDINAL_WORDS = {
|
|
3910
|
+
first: 1,
|
|
3911
|
+
second: 2,
|
|
3912
|
+
third: 3,
|
|
3913
|
+
fourth: 4,
|
|
3914
|
+
fifth: 5,
|
|
3915
|
+
sixth: 6,
|
|
3916
|
+
seventh: 7,
|
|
3917
|
+
eighth: 8,
|
|
3918
|
+
ninth: 9,
|
|
3919
|
+
tenth: 10,
|
|
3920
|
+
last: -1
|
|
3921
|
+
};
|
|
3922
|
+
const ORDINAL_WORD_RE = /\b(first|second|third|fourth|fifth|sixth|seventh|eighth|ninth|tenth|last)\s+(\w+)/i;
|
|
3923
|
+
const ORDINAL_NUM_RE = /\b(\d+)(?:st|nd|rd|th)?\s+(\w+)/i;
|
|
3924
|
+
const FIELD_HINT_KINDS = [
|
|
3925
|
+
"field",
|
|
3926
|
+
"input",
|
|
3927
|
+
"textbox",
|
|
3928
|
+
"box",
|
|
3929
|
+
"search",
|
|
3930
|
+
"dropdown",
|
|
3931
|
+
"select",
|
|
3932
|
+
"menu",
|
|
3933
|
+
"button",
|
|
3934
|
+
"link",
|
|
3935
|
+
"tab",
|
|
3936
|
+
"checkbox",
|
|
3937
|
+
"radio",
|
|
3938
|
+
"switch"
|
|
3939
|
+
];
|
|
3940
|
+
const FIELD_HINT_RE = new RegExp(`\\b(\\w+)\\s+(?:${FIELD_HINT_KINDS.join("|")})\\b`, "i");
|
|
3941
|
+
const ARTICLES_RE = /\b(the|a|an|this|that)\b/gi;
|
|
3942
|
+
/**
|
|
3943
|
+
* Parse a natural-language intent into structured parts.
|
|
3944
|
+
*
|
|
3945
|
+
* Returns a fully-formed `ParsedIntent` even for unparseable inputs
|
|
3946
|
+
* (rawTarget = the trimmed intent, normTarget = its lowercased
|
|
3947
|
+
* normalization, every other field undefined). The matcher cascade
|
|
3948
|
+
* handles "I don't know what to do" by falling through layer-by-
|
|
3949
|
+
* layer until L7 or escalate; an unparseable intent simply has
|
|
3950
|
+
* less signal for the layers to key on.
|
|
3951
|
+
*/
|
|
3952
|
+
function parseIntent(intent) {
|
|
3953
|
+
let work = String(intent ?? "").trim();
|
|
3954
|
+
let verb;
|
|
3955
|
+
const verbMatch = VERB_RE.exec(work);
|
|
3956
|
+
if (verbMatch) {
|
|
3957
|
+
verb = mapVerb(verbMatch[1]);
|
|
3958
|
+
work = work.slice(verbMatch[0].length);
|
|
3959
|
+
}
|
|
3960
|
+
let valueFromIntent;
|
|
3961
|
+
const valueMatch = VALUE_RE.exec(work);
|
|
3962
|
+
if (valueMatch) {
|
|
3963
|
+
valueFromIntent = valueMatch[1].trim();
|
|
3964
|
+
work = work.slice(0, valueMatch.index).trim();
|
|
3965
|
+
}
|
|
3966
|
+
let quotedName;
|
|
3967
|
+
const quotedMatch = QUOTED_RE.exec(work);
|
|
3968
|
+
if (quotedMatch) quotedName = quotedMatch[1].trim();
|
|
3969
|
+
else {
|
|
3970
|
+
const titleMatch = TITLE_CASE_RE.exec(work);
|
|
3971
|
+
if (titleMatch) quotedName = titleMatch[1].trim();
|
|
3972
|
+
}
|
|
3973
|
+
let ordinal;
|
|
3974
|
+
const ordWordMatch = ORDINAL_WORD_RE.exec(work);
|
|
3975
|
+
if (ordWordMatch) {
|
|
3976
|
+
const n = ORDINAL_WORDS[ordWordMatch[1].toLowerCase()];
|
|
3977
|
+
if (typeof n === "number") ordinal = {
|
|
3978
|
+
n,
|
|
3979
|
+
kind: ordWordMatch[2].toLowerCase()
|
|
3980
|
+
};
|
|
3981
|
+
} else {
|
|
3982
|
+
const ordNumMatch = ORDINAL_NUM_RE.exec(work);
|
|
3983
|
+
if (ordNumMatch) ordinal = {
|
|
3984
|
+
n: Number.parseInt(ordNumMatch[1], 10),
|
|
3985
|
+
kind: ordNumMatch[2].toLowerCase()
|
|
3986
|
+
};
|
|
3987
|
+
}
|
|
3988
|
+
let fieldHint;
|
|
3989
|
+
const fieldMatch = FIELD_HINT_RE.exec(work);
|
|
3990
|
+
if (fieldMatch) fieldHint = fieldMatch[1].toLowerCase();
|
|
3991
|
+
const rawTarget = work.trim();
|
|
3992
|
+
let normTarget = rawTarget.toLowerCase().replace(ARTICLES_RE, "").replace(/\s+/g, " ").trim();
|
|
3993
|
+
for (const kind of FIELD_HINT_KINDS) {
|
|
3994
|
+
const tail = new RegExp(`\\s+${kind}$`, "i");
|
|
3995
|
+
if (tail.test(normTarget)) {
|
|
3996
|
+
normTarget = normTarget.replace(tail, "").trim();
|
|
3997
|
+
break;
|
|
3998
|
+
}
|
|
3999
|
+
}
|
|
4000
|
+
if (ordinal) normTarget = normTarget.replace(/^(\d+(?:st|nd|rd|th)?|first|second|third|fourth|fifth|sixth|seventh|eighth|ninth|tenth|last)\s+/i, "").trim();
|
|
4001
|
+
const out = {
|
|
4002
|
+
rawTarget,
|
|
4003
|
+
normTarget
|
|
4004
|
+
};
|
|
4005
|
+
if (verb) out.verb = verb;
|
|
4006
|
+
if (quotedName) out.quotedName = quotedName;
|
|
4007
|
+
if (fieldHint) out.fieldHint = fieldHint;
|
|
4008
|
+
if (ordinal) out.ordinal = ordinal;
|
|
4009
|
+
if (valueFromIntent !== void 0) out.valueFromIntent = valueFromIntent;
|
|
4010
|
+
return out;
|
|
4011
|
+
}
|
|
4012
|
+
function mapVerb(raw) {
|
|
4013
|
+
const v = raw.toLowerCase();
|
|
4014
|
+
if (v === "click" || v === "press" || v === "tap" || v === "toggle" || v === "check" || v === "uncheck" || v === "open") return "click";
|
|
4015
|
+
if (v === "fill" || v === "enter") return "fill";
|
|
4016
|
+
if (v === "type") return "type";
|
|
4017
|
+
if (v === "select" || v === "choose") return "select";
|
|
4018
|
+
if (v === "scroll" || v === "scrollintoview" || v === "scroll into view" || v === "scroll-into-view") return "scroll_into_view";
|
|
4019
|
+
if (v === "hover" || v === "focus") return void 0;
|
|
4020
|
+
}
|
|
4021
|
+
|
|
3358
4022
|
//#endregion
|
|
3359
4023
|
//#region src/lib/mcp-inflight.ts
|
|
3360
4024
|
/**
|
|
@@ -3659,6 +4323,21 @@ async function callCompressor(systemPrompt, userMessage, tool, signal) {
|
|
|
3659
4323
|
}
|
|
3660
4324
|
}
|
|
3661
4325
|
/**
|
|
4326
|
+
* Public re-export of `callCompressor` for sibling modules that need
|
|
4327
|
+
* the same forced-tool-calling pipeline (slot acquisition, fallback-
|
|
4328
|
+
* chain backend, code-fence stripping). Used by `observe.ts` to drive
|
|
4329
|
+
* the natural-language describer through the same backend the matcher
|
|
4330
|
+
* cascade escalates to, and by `decompose-planner.ts` for the
|
|
4331
|
+
* fast-model compound-step replanner.
|
|
4332
|
+
*
|
|
4333
|
+
* Kept as a thin wrapper rather than re-exporting `callCompressor`
|
|
4334
|
+
* directly so the underlying function can change signature without
|
|
4335
|
+
* breaking the public surface.
|
|
4336
|
+
*/
|
|
4337
|
+
async function callCompressorPublic(systemPrompt, userMessage, tool, signal) {
|
|
4338
|
+
return callCompressor(systemPrompt, userMessage, tool, signal);
|
|
4339
|
+
}
|
|
4340
|
+
/**
|
|
3662
4341
|
* Strip a single leading / trailing ``` (or ```json) code fence from a
|
|
3663
4342
|
* model's free-form text reply so JSON.parse works. Idempotent on
|
|
3664
4343
|
* fence-free input. Defensive against the failure mode caught in PR #55
|
|
@@ -3680,12 +4359,32 @@ function stripCodeFence(text) {
|
|
|
3680
4359
|
* whether the intent supplied a value. Single source of truth for
|
|
3681
4360
|
* element matching.
|
|
3682
4361
|
*
|
|
4362
|
+
* Phase 2 short-circuits the common case through the deterministic
|
|
4363
|
+
* matcher cascade in `./matcher.ts` — pure-sync, no LLM round-trip,
|
|
4364
|
+
* <5ms on a 200-element snapshot. Only when the cascade returns
|
|
4365
|
+
* `source: "escalate"` (0 candidates or >1 ambiguous candidates) do
|
|
4366
|
+
* we fall through to the existing fast-model `pickMatchingElements`
|
|
4367
|
+
* path. When we DO escalate, we pass the cascade's pre-filtered
|
|
4368
|
+
* top-K shortlist along so the fast model sees ~8 candidates instead
|
|
4369
|
+
* of the full 200-element snapshot — 3-5× token-cost reduction even
|
|
4370
|
+
* on misses.
|
|
4371
|
+
*
|
|
3683
4372
|
* Returns ref="" + confidence=0 when no element matches — caller
|
|
3684
4373
|
* should escalate to visual fallback (when `visualSurfaces` is
|
|
3685
4374
|
* present) or surface the miss to the lead model.
|
|
3686
4375
|
*/
|
|
3687
4376
|
async function pickElement(snapshot, intent, signal, value) {
|
|
3688
|
-
const
|
|
4377
|
+
const det = deterministicResolve(snapshot, parseIntent(intent), value);
|
|
4378
|
+
if (det.source !== "escalate" && det.ref !== "") {
|
|
4379
|
+
const out$1 = {
|
|
4380
|
+
ref: det.ref,
|
|
4381
|
+
action: det.action,
|
|
4382
|
+
confidence: det.confidence
|
|
4383
|
+
};
|
|
4384
|
+
if (det.value !== void 0) out$1.value = det.value;
|
|
4385
|
+
return out$1;
|
|
4386
|
+
}
|
|
4387
|
+
const matches = await pickMatchingElements(snapshot, intent, signal, det.candidates);
|
|
3689
4388
|
if (matches.length === 0) return {
|
|
3690
4389
|
ref: "",
|
|
3691
4390
|
action: "click",
|
|
@@ -3756,9 +4455,28 @@ const FIND_ELEMENTS_TOOL = {
|
|
|
3756
4455
|
* Return up to 5 candidate matches for an intent. Used by
|
|
3757
4456
|
* `browser_find` — the lead model gets a small ranked list rather than
|
|
3758
4457
|
* a full element dump. Empty array when nothing matches.
|
|
3759
|
-
|
|
3760
|
-
|
|
3761
|
-
|
|
4458
|
+
*
|
|
4459
|
+
* Phase 2 short-circuits via the deterministic matcher cascade when
|
|
4460
|
+
* possible. When the cascade finds a single confident match, we
|
|
4461
|
+
* synthesize a one-item `FindMatch[]` and skip the fast-model
|
|
4462
|
+
* round-trip. When the cascade's `candidates` shortlist is passed in
|
|
4463
|
+
* by `pickElement` (escalation path), we trim the snapshot to just
|
|
4464
|
+
* those refs before sending to the fast model — keeps tokens down on
|
|
4465
|
+
* misses too.
|
|
4466
|
+
*/
|
|
4467
|
+
async function pickMatchingElements(snapshot, intent, signal, shortlist) {
|
|
4468
|
+
if (!shortlist) {
|
|
4469
|
+
const det = deterministicResolve(snapshot, parseIntent(intent));
|
|
4470
|
+
if (det.source !== "escalate" && det.ref !== "") {
|
|
4471
|
+
if (snapshot.elements.find((e) => e.ref === det.ref)) return [{
|
|
4472
|
+
ref: det.ref,
|
|
4473
|
+
reason: `deterministic ${det.source}: ${det.reason}`
|
|
4474
|
+
}];
|
|
4475
|
+
}
|
|
4476
|
+
shortlist = det.candidates;
|
|
4477
|
+
}
|
|
4478
|
+
const refSet = shortlist && shortlist.length > 0 ? new Set(shortlist.map((s) => s.ref)) : void 0;
|
|
4479
|
+
const trimmed = (refSet ? snapshot.elements.filter((e) => refSet.has(e.ref)) : snapshot.elements).map((e) => ({
|
|
3762
4480
|
ref: e.ref,
|
|
3763
4481
|
role: e.role,
|
|
3764
4482
|
name: e.name
|
|
@@ -3934,6 +4652,235 @@ async function pickElementVisual(screenshotB64, contentType, intent, visualSurfa
|
|
|
3934
4652
|
};
|
|
3935
4653
|
}
|
|
3936
4654
|
|
|
4655
|
+
//#endregion
|
|
4656
|
+
//#region src/lib/browser-mcp/decompose.ts
|
|
4657
|
+
const LOGIN_RE = /^log[ -]?in (?:to .+? )?with\s+([^\s/]+)\s*\/\s*(.+?)\s*$/i;
|
|
4658
|
+
const SEARCH_CLICK_RE = /^search\s+(?:for\s+)?(.+?)\s+and\s+click\s+(?:the\s+)?first\s+result\s*$/i;
|
|
4659
|
+
const CONJUNCTION_SPLIT_RE = /\s*(?:\s+and\s+then\s+|\s+then\s+|\s*;\s*|\s*,\s+and\s+)\s*/i;
|
|
4660
|
+
/**
|
|
4661
|
+
* Decompose a natural-language intent into atomic steps.
|
|
4662
|
+
*
|
|
4663
|
+
* The fallback path returns a single-step `[{intent: rawIntent}]` —
|
|
4664
|
+
* `browser_act` behaves identically to today's single-step dispatch
|
|
4665
|
+
* when no template matches.
|
|
4666
|
+
*/
|
|
4667
|
+
function decompose(intent, value) {
|
|
4668
|
+
const raw = String(intent ?? "").trim();
|
|
4669
|
+
if (!raw) return {
|
|
4670
|
+
steps: [{
|
|
4671
|
+
intent: "",
|
|
4672
|
+
...value !== void 0 ? { value } : {}
|
|
4673
|
+
}],
|
|
4674
|
+
template: "fallback"
|
|
4675
|
+
};
|
|
4676
|
+
const loginMatch = LOGIN_RE.exec(raw);
|
|
4677
|
+
if (loginMatch) {
|
|
4678
|
+
const user = loginMatch[1].trim();
|
|
4679
|
+
const pass = loginMatch[2].trim();
|
|
4680
|
+
return {
|
|
4681
|
+
steps: [
|
|
4682
|
+
{
|
|
4683
|
+
intent: "the email or username input",
|
|
4684
|
+
value: user
|
|
4685
|
+
},
|
|
4686
|
+
{
|
|
4687
|
+
intent: "the password input",
|
|
4688
|
+
value: pass
|
|
4689
|
+
},
|
|
4690
|
+
{ intent: "the Sign in or Log in button" }
|
|
4691
|
+
],
|
|
4692
|
+
template: "login",
|
|
4693
|
+
successSummary: "logged in"
|
|
4694
|
+
};
|
|
4695
|
+
}
|
|
4696
|
+
const searchMatch = SEARCH_CLICK_RE.exec(raw);
|
|
4697
|
+
if (searchMatch) {
|
|
4698
|
+
const query = searchMatch[1].trim();
|
|
4699
|
+
return {
|
|
4700
|
+
steps: [
|
|
4701
|
+
{
|
|
4702
|
+
intent: "the search input",
|
|
4703
|
+
value: query
|
|
4704
|
+
},
|
|
4705
|
+
{ intent: "the search button or submit" },
|
|
4706
|
+
{ intent: "the first search result" }
|
|
4707
|
+
],
|
|
4708
|
+
template: "search_click",
|
|
4709
|
+
successSummary: `searched for "${query}" and opened first result`
|
|
4710
|
+
};
|
|
4711
|
+
}
|
|
4712
|
+
if (CONJUNCTION_SPLIT_RE.test(raw)) {
|
|
4713
|
+
const parts = raw.split(CONJUNCTION_SPLIT_RE).map((p) => p.trim()).filter(Boolean);
|
|
4714
|
+
if (parts.length >= 2) return {
|
|
4715
|
+
steps: parts.map((p, i) => {
|
|
4716
|
+
if (i === 0 && value !== void 0) return {
|
|
4717
|
+
intent: p,
|
|
4718
|
+
value
|
|
4719
|
+
};
|
|
4720
|
+
return { intent: p };
|
|
4721
|
+
}),
|
|
4722
|
+
template: "conjunction"
|
|
4723
|
+
};
|
|
4724
|
+
}
|
|
4725
|
+
return {
|
|
4726
|
+
steps: [{
|
|
4727
|
+
intent: raw,
|
|
4728
|
+
...value !== void 0 ? { value } : {}
|
|
4729
|
+
}],
|
|
4730
|
+
template: "fallback"
|
|
4731
|
+
};
|
|
4732
|
+
}
|
|
4733
|
+
|
|
4734
|
+
//#endregion
|
|
4735
|
+
//#region src/lib/browser-mcp/observe.ts
|
|
4736
|
+
const OBSERVE_SYSTEM = `You describe a web page for an AI assistant that cannot see the DOM.
|
|
4737
|
+
|
|
4738
|
+
Write 2-4 sentences focused on user-actionable elements (forms, buttons, links) and the page's purpose. If 'intent' is provided, focus the description on the region most relevant to that intent.
|
|
4739
|
+
|
|
4740
|
+
DO NOT mention DOM refs, selectors, bbox coordinates, or any internal identifiers. Plain prose only. Treat the reader as someone who will issue commands like "click the Sign In button" — describe what's there in terms they can act on.
|
|
4741
|
+
|
|
4742
|
+
Call the describe_page tool with your description.`;
|
|
4743
|
+
const OBSERVE_TOOL = {
|
|
4744
|
+
name: "describe_page",
|
|
4745
|
+
description: "Report the natural-language description of the page.",
|
|
4746
|
+
parameters: {
|
|
4747
|
+
type: "object",
|
|
4748
|
+
required: ["description"],
|
|
4749
|
+
additionalProperties: false,
|
|
4750
|
+
properties: { description: {
|
|
4751
|
+
type: "string",
|
|
4752
|
+
description: "2-4 sentence prose description of the visible page state."
|
|
4753
|
+
} }
|
|
4754
|
+
}
|
|
4755
|
+
};
|
|
4756
|
+
/**
|
|
4757
|
+
* Produce a natural-language description of the current page state.
|
|
4758
|
+
* The lead model never sees the underlying snapshot.
|
|
4759
|
+
*/
|
|
4760
|
+
async function observePage(snapshot, intent, signal) {
|
|
4761
|
+
const trimmedElements = snapshot.elements.filter((e) => e.name && e.name.length > 0).slice(0, 80).map((e) => ({
|
|
4762
|
+
role: e.role,
|
|
4763
|
+
name: e.name
|
|
4764
|
+
}));
|
|
4765
|
+
const raw = await callCompressorPublic(OBSERVE_SYSTEM, JSON.stringify({
|
|
4766
|
+
intent: intent ?? "",
|
|
4767
|
+
url: snapshot.url ?? "",
|
|
4768
|
+
title: snapshot.title ?? "",
|
|
4769
|
+
visible_text: (snapshot.text ?? "").slice(0, 4e3),
|
|
4770
|
+
actionable_elements: trimmedElements,
|
|
4771
|
+
has_visual_surfaces: Boolean(snapshot.visualSurfaces && snapshot.visualSurfaces.length > 0)
|
|
4772
|
+
}), OBSERVE_TOOL, signal);
|
|
4773
|
+
const out = {
|
|
4774
|
+
description: raw && typeof raw === "object" && typeof raw.description === "string" ? raw.description : "Page contents could not be described.",
|
|
4775
|
+
hasVisualSurfaces: Boolean(snapshot.visualSurfaces && snapshot.visualSurfaces.length > 0)
|
|
4776
|
+
};
|
|
4777
|
+
if (snapshot.url) out.url = snapshot.url;
|
|
4778
|
+
if (snapshot.title) out.title = snapshot.title;
|
|
4779
|
+
return out;
|
|
4780
|
+
}
|
|
4781
|
+
|
|
4782
|
+
//#endregion
|
|
4783
|
+
//#region src/lib/browser-mcp/planner.ts
|
|
4784
|
+
const PLANNER_SYSTEM = `You are a browser-automation replanner. A user issued a high-level intent that was decomposed into atomic steps. Several steps ran successfully, then one failed. You see the page state AFTER the failure and decide what to do next.
|
|
4785
|
+
|
|
4786
|
+
Your job: produce a revised list of atomic steps that will accomplish the original intent given the current page. If you cannot — the page has changed in a way that makes the intent impossible (login form vanished, navigation moved elsewhere, captcha appeared) — return an empty list and explain why in reasoning.
|
|
4787
|
+
|
|
4788
|
+
Each replanned step is a free-form natural-language intent ("the email input", "the Sign In button at the bottom of the form") plus an optional value for fill/type/select actions. Be SPECIFIC about element location ("at the bottom of the form", "in the top navigation") so the deterministic matcher cascade can resolve it without ambiguity. Do NOT reference element refs.
|
|
4789
|
+
|
|
4790
|
+
Cost rule: you get ONE call per compound failure. Make every step count.
|
|
4791
|
+
|
|
4792
|
+
Call the replan_compound tool with your answer.`;
|
|
4793
|
+
const PLANNER_TOOL = {
|
|
4794
|
+
name: "replan_compound",
|
|
4795
|
+
description: "Report the revised atomic steps to complete the original compound intent.",
|
|
4796
|
+
parameters: {
|
|
4797
|
+
type: "object",
|
|
4798
|
+
required: ["steps", "reasoning"],
|
|
4799
|
+
additionalProperties: false,
|
|
4800
|
+
properties: {
|
|
4801
|
+
steps: {
|
|
4802
|
+
type: "array",
|
|
4803
|
+
maxItems: 8,
|
|
4804
|
+
items: {
|
|
4805
|
+
type: "object",
|
|
4806
|
+
required: ["intent"],
|
|
4807
|
+
additionalProperties: false,
|
|
4808
|
+
properties: {
|
|
4809
|
+
intent: { type: "string" },
|
|
4810
|
+
value: { type: "string" }
|
|
4811
|
+
}
|
|
4812
|
+
}
|
|
4813
|
+
},
|
|
4814
|
+
reasoning: {
|
|
4815
|
+
type: "string",
|
|
4816
|
+
description: "1-2 sentence explanation of the replanning decision."
|
|
4817
|
+
}
|
|
4818
|
+
}
|
|
4819
|
+
}
|
|
4820
|
+
};
|
|
4821
|
+
/**
|
|
4822
|
+
* Run the fast-model planner on a failed compound. Returns the
|
|
4823
|
+
* revised step list (may be empty if the planner gives up).
|
|
4824
|
+
*
|
|
4825
|
+
* The snapshot is trimmed before sending to keep the round-trip
|
|
4826
|
+
* small: only element role + name + brief value/placeholder if
|
|
4827
|
+
* present. Bbox / state flags / frame ids would just inflate tokens
|
|
4828
|
+
* without helping the natural-language replanner.
|
|
4829
|
+
*/
|
|
4830
|
+
async function planCompoundReplan(input, signal) {
|
|
4831
|
+
const trimmed = input.snapshot.elements.slice(0, 80).map((e) => {
|
|
4832
|
+
const out = { role: e.role };
|
|
4833
|
+
if (e.name) out.name = e.name;
|
|
4834
|
+
if (e.placeholder) out.placeholder = e.placeholder;
|
|
4835
|
+
if (e.value) out.value = e.value;
|
|
4836
|
+
return out;
|
|
4837
|
+
});
|
|
4838
|
+
const raw = await callCompressorPublic(PLANNER_SYSTEM, JSON.stringify({
|
|
4839
|
+
original_intent: input.originalIntent,
|
|
4840
|
+
original_value: input.originalValue,
|
|
4841
|
+
completed_steps: input.completedSteps.map((s) => ({
|
|
4842
|
+
intent: s.intent,
|
|
4843
|
+
...s.value !== void 0 ? { value: s.value } : {}
|
|
4844
|
+
})),
|
|
4845
|
+
failed_step: {
|
|
4846
|
+
intent: input.failedStep.intent,
|
|
4847
|
+
...input.failedStep.value !== void 0 ? { value: input.failedStep.value } : {}
|
|
4848
|
+
},
|
|
4849
|
+
failure_reason: input.failureReason,
|
|
4850
|
+
page_now: {
|
|
4851
|
+
url: input.snapshot.url ?? "",
|
|
4852
|
+
title: input.snapshot.title ?? "",
|
|
4853
|
+
visible_text: (input.snapshot.text ?? "").slice(0, 3e3),
|
|
4854
|
+
actionable_elements: trimmed
|
|
4855
|
+
}
|
|
4856
|
+
}), PLANNER_TOOL, signal);
|
|
4857
|
+
if (!raw || typeof raw !== "object") return {
|
|
4858
|
+
steps: [],
|
|
4859
|
+
reasoning: "planner returned empty response"
|
|
4860
|
+
};
|
|
4861
|
+
const obj = raw;
|
|
4862
|
+
const reasoning = typeof obj.reasoning === "string" ? obj.reasoning : "";
|
|
4863
|
+
if (!Array.isArray(obj.steps)) return {
|
|
4864
|
+
steps: [],
|
|
4865
|
+
reasoning
|
|
4866
|
+
};
|
|
4867
|
+
const steps = [];
|
|
4868
|
+
for (const s of obj.steps.slice(0, 8)) {
|
|
4869
|
+
if (!s || typeof s !== "object") continue;
|
|
4870
|
+
const intent = s.intent;
|
|
4871
|
+
const value = s.value;
|
|
4872
|
+
if (typeof intent === "string" && intent.length > 0) {
|
|
4873
|
+
const step = { intent };
|
|
4874
|
+
if (typeof value === "string") step.value = value;
|
|
4875
|
+
steps.push(step);
|
|
4876
|
+
}
|
|
4877
|
+
}
|
|
4878
|
+
return {
|
|
4879
|
+
steps,
|
|
4880
|
+
reasoning
|
|
4881
|
+
};
|
|
4882
|
+
}
|
|
4883
|
+
|
|
3937
4884
|
//#endregion
|
|
3938
4885
|
//#region src/lib/browser-mcp/index.ts
|
|
3939
4886
|
/**
|
|
@@ -3990,7 +4937,7 @@ const BROWSER_TOOLS = Object.freeze([
|
|
|
3990
4937
|
additionalProperties: false,
|
|
3991
4938
|
properties: {}
|
|
3992
4939
|
},
|
|
3993
|
-
capability: "
|
|
4940
|
+
capability: "browser_power",
|
|
3994
4941
|
async handler(args, signal) {
|
|
3995
4942
|
return dispatchBrowserTool("browser_list_tabs", args, signal);
|
|
3996
4943
|
}
|
|
@@ -4031,7 +4978,7 @@ const BROWSER_TOOLS = Object.freeze([
|
|
|
4031
4978
|
description: "Array of tab ids to close (from browser_list_tabs)."
|
|
4032
4979
|
} }
|
|
4033
4980
|
},
|
|
4034
|
-
capability: "
|
|
4981
|
+
capability: "browser_power",
|
|
4035
4982
|
async handler(args, signal) {
|
|
4036
4983
|
return dispatchBrowserTool("browser_close_tab", args, signal);
|
|
4037
4984
|
}
|
|
@@ -4116,7 +5063,7 @@ const BROWSER_TOOLS = Object.freeze([
|
|
|
4116
5063
|
}
|
|
4117
5064
|
}
|
|
4118
5065
|
},
|
|
4119
|
-
capability: "
|
|
5066
|
+
capability: "browser_power",
|
|
4120
5067
|
async handler(args, signal) {
|
|
4121
5068
|
return dispatchBrowserTool("browser_read_page", args, signal);
|
|
4122
5069
|
}
|
|
@@ -4175,7 +5122,7 @@ const BROWSER_TOOLS = Object.freeze([
|
|
|
4175
5122
|
}
|
|
4176
5123
|
}
|
|
4177
5124
|
},
|
|
4178
|
-
capability: "
|
|
5125
|
+
capability: "browser_power",
|
|
4179
5126
|
async handler(args, signal) {
|
|
4180
5127
|
return dispatchBrowserTool("browser_scroll", args, signal);
|
|
4181
5128
|
}
|
|
@@ -4195,7 +5142,7 @@ const BROWSER_TOOLS = Object.freeze([
|
|
|
4195
5142
|
}
|
|
4196
5143
|
}
|
|
4197
5144
|
},
|
|
4198
|
-
capability: "
|
|
5145
|
+
capability: "browser_power",
|
|
4199
5146
|
async handler(args, signal) {
|
|
4200
5147
|
return dispatchBrowserTool("browser_keyboard", args, signal);
|
|
4201
5148
|
}
|
|
@@ -4232,7 +5179,7 @@ const BROWSER_TOOLS = Object.freeze([
|
|
|
4232
5179
|
}
|
|
4233
5180
|
}
|
|
4234
5181
|
},
|
|
4235
|
-
capability: "
|
|
5182
|
+
capability: "browser_power",
|
|
4236
5183
|
async handler(args, signal) {
|
|
4237
5184
|
return dispatchBrowserTool("browser_wait", args, signal);
|
|
4238
5185
|
}
|
|
@@ -4256,7 +5203,7 @@ const BROWSER_TOOLS = Object.freeze([
|
|
|
4256
5203
|
}
|
|
4257
5204
|
}
|
|
4258
5205
|
},
|
|
4259
|
-
capability: "
|
|
5206
|
+
capability: "browser_power",
|
|
4260
5207
|
async handler(args, signal) {
|
|
4261
5208
|
return dispatchBrowserTool("browser_eval_js", args, signal);
|
|
4262
5209
|
}
|
|
@@ -4288,7 +5235,7 @@ const BROWSER_TOOLS = Object.freeze([
|
|
|
4288
5235
|
}
|
|
4289
5236
|
}
|
|
4290
5237
|
},
|
|
4291
|
-
capability: "
|
|
5238
|
+
capability: "browser_power",
|
|
4292
5239
|
async handler(args, signal) {
|
|
4293
5240
|
return dispatchBrowserTool("browser_download", args, signal);
|
|
4294
5241
|
}
|
|
@@ -4352,7 +5299,7 @@ const BROWSER_TOOLS = Object.freeze([
|
|
|
4352
5299
|
}
|
|
4353
5300
|
}
|
|
4354
5301
|
},
|
|
4355
|
-
capability: "
|
|
5302
|
+
capability: "browser_power",
|
|
4356
5303
|
async handler(args, signal) {
|
|
4357
5304
|
return dispatchBrowserTool("browser_mouse", args, signal);
|
|
4358
5305
|
}
|
|
@@ -4426,7 +5373,7 @@ const BROWSER_TOOLS = Object.freeze([
|
|
|
4426
5373
|
}
|
|
4427
5374
|
}
|
|
4428
5375
|
},
|
|
4429
|
-
capability: "
|
|
5376
|
+
capability: "browser_power",
|
|
4430
5377
|
async handler(args, signal) {
|
|
4431
5378
|
return dispatchBrowserTool("browser_drag", args, signal);
|
|
4432
5379
|
}
|
|
@@ -4450,7 +5397,7 @@ const BROWSER_TOOLS = Object.freeze([
|
|
|
4450
5397
|
}
|
|
4451
5398
|
}
|
|
4452
5399
|
},
|
|
4453
|
-
capability: "
|
|
5400
|
+
capability: "browser_power",
|
|
4454
5401
|
async handler(args, signal) {
|
|
4455
5402
|
return dispatchBrowserTool("browser_type", args, signal);
|
|
4456
5403
|
}
|
|
@@ -4491,7 +5438,7 @@ const BROWSER_TOOLS = Object.freeze([
|
|
|
4491
5438
|
}
|
|
4492
5439
|
}
|
|
4493
5440
|
},
|
|
4494
|
-
capability: "
|
|
5441
|
+
capability: "browser_power",
|
|
4495
5442
|
async handler(args, signal) {
|
|
4496
5443
|
const kind = args.kind === "network" ? "network" : "console";
|
|
4497
5444
|
const tool = kind === "network" ? "browser_network_log" : "browser_console_logs";
|
|
@@ -4548,7 +5495,7 @@ const BROWSER_TOOLS = Object.freeze([
|
|
|
4548
5495
|
}
|
|
4549
5496
|
}
|
|
4550
5497
|
},
|
|
4551
|
-
capability: "
|
|
5498
|
+
capability: "browser_power",
|
|
4552
5499
|
async handler(args, signal) {
|
|
4553
5500
|
const tabId = typeof args.tabId === "number" ? args.tabId : void 0;
|
|
4554
5501
|
const intent = typeof args.intent === "string" ? args.intent : "";
|
|
@@ -4615,65 +5562,109 @@ const BROWSER_TOOLS = Object.freeze([
|
|
|
4615
5562
|
const value = typeof args.value === "string" ? args.value : void 0;
|
|
4616
5563
|
if (!refIn && !intent) return toolEnvelope({ error: "either `ref` (REF mode) or `intent` (INTENT mode) is required" }, true);
|
|
4617
5564
|
if (refIn) return dispatchActionByRef(tabId, refIn, typeof args.action === "string" ? args.action : "click", value, signal);
|
|
4618
|
-
const
|
|
4619
|
-
|
|
4620
|
-
|
|
4621
|
-
|
|
4622
|
-
|
|
4623
|
-
|
|
4624
|
-
|
|
4625
|
-
|
|
5565
|
+
const decomposed = decompose(intent, value);
|
|
5566
|
+
if (decomposed.steps.length === 1) return runAtomicIntentStep(tabId, decomposed.steps[0].intent, decomposed.steps[0].value, signal);
|
|
5567
|
+
const summaries = [];
|
|
5568
|
+
let navigated = false;
|
|
5569
|
+
const completedSteps = [];
|
|
5570
|
+
for (let i = 0; i < decomposed.steps.length; i++) {
|
|
5571
|
+
const step = decomposed.steps[i];
|
|
5572
|
+
const env = await runAtomicIntentStep(tabId, step.intent, step.value, signal);
|
|
5573
|
+
const stepText = env.content?.[0]?.text;
|
|
5574
|
+
let stepResult = {};
|
|
5575
|
+
if (typeof stepText === "string") try {
|
|
5576
|
+
stepResult = JSON.parse(stepText);
|
|
5577
|
+
} catch {}
|
|
5578
|
+
if (env.isError || stepResult.ok === false) try {
|
|
5579
|
+
const failureReason = String(stepResult.error ?? "unknown");
|
|
5580
|
+
const replan = await planCompoundReplan({
|
|
5581
|
+
originalIntent: intent,
|
|
5582
|
+
originalValue: value,
|
|
5583
|
+
completedSteps,
|
|
5584
|
+
failedStep: step,
|
|
5585
|
+
failureReason,
|
|
5586
|
+
snapshot: await fetchSnapshot(tabId, signal)
|
|
4626
5587
|
}, signal);
|
|
4627
|
-
if (
|
|
5588
|
+
if (replan.steps.length === 0) return toolEnvelope({
|
|
4628
5589
|
ok: false,
|
|
4629
|
-
|
|
4630
|
-
|
|
5590
|
+
summary: `compound step ${i + 1}/${decomposed.steps.length} failed and planner declined: ${replan.reasoning || failureReason}`,
|
|
5591
|
+
template: decomposed.template,
|
|
5592
|
+
steps_completed: i,
|
|
5593
|
+
failed_step: step.intent,
|
|
5594
|
+
planner_reasoning: replan.reasoning
|
|
4631
5595
|
}, true);
|
|
4632
|
-
const
|
|
4633
|
-
let
|
|
4634
|
-
|
|
4635
|
-
|
|
4636
|
-
|
|
4637
|
-
|
|
5596
|
+
const replanSummaries = [];
|
|
5597
|
+
for (let j = 0; j < replan.steps.length; j++) {
|
|
5598
|
+
const rstep = replan.steps[j];
|
|
5599
|
+
const renv = await runAtomicIntentStep(tabId, rstep.intent, rstep.value, signal);
|
|
5600
|
+
const rtext = renv.content?.[0]?.text;
|
|
5601
|
+
let rresult = {};
|
|
5602
|
+
if (typeof rtext === "string") try {
|
|
5603
|
+
rresult = JSON.parse(rtext);
|
|
5604
|
+
} catch {}
|
|
5605
|
+
if (renv.isError || rresult.ok === false) return toolEnvelope({
|
|
4638
5606
|
ok: false,
|
|
4639
|
-
|
|
5607
|
+
summary: `compound failed at original step ${i + 1}, planner replan also failed at step ${j + 1}/${replan.steps.length}: ${String(rresult.error ?? "unknown")}`,
|
|
5608
|
+
template: decomposed.template,
|
|
5609
|
+
steps_completed: i,
|
|
5610
|
+
failed_step: rstep.intent,
|
|
5611
|
+
planner_reasoning: replan.reasoning
|
|
4640
5612
|
}, true);
|
|
5613
|
+
if (typeof rresult.action_taken === "string") replanSummaries.push(`${rresult.action_taken} (${rstep.intent})`);
|
|
5614
|
+
if (rresult.navigated === true) navigated = true;
|
|
4641
5615
|
}
|
|
4642
|
-
if (!shot.contentType || !shot.dataBase64) return toolEnvelope({
|
|
4643
|
-
ok: false,
|
|
4644
|
-
error: "no text match; screenshot envelope missing fields"
|
|
4645
|
-
}, true);
|
|
4646
|
-
const visual = await pickElementVisual(shot.dataBase64, shot.contentType, intent, surfaces, signal);
|
|
4647
|
-
if (visual.confidence < .5) return toolEnvelope({
|
|
4648
|
-
ok: false,
|
|
4649
|
-
error: "no element matched intent (text + visual)",
|
|
4650
|
-
picked,
|
|
4651
|
-
visual
|
|
4652
|
-
}, true);
|
|
4653
|
-
const clickEnv = await dispatchBrowserTool("browser_mouse", {
|
|
4654
|
-
tabId,
|
|
4655
|
-
action: "click",
|
|
4656
|
-
x: visual.x,
|
|
4657
|
-
y: visual.y,
|
|
4658
|
-
force: true
|
|
4659
|
-
}, signal);
|
|
4660
|
-
if (clickEnv.isError) return clickEnv;
|
|
4661
5616
|
return toolEnvelope({
|
|
4662
5617
|
ok: true,
|
|
4663
|
-
|
|
4664
|
-
|
|
4665
|
-
|
|
4666
|
-
|
|
4667
|
-
|
|
5618
|
+
summary: `compound recovered via planner (${replan.reasoning}): ${replanSummaries.join(" → ")}`,
|
|
5619
|
+
template: decomposed.template,
|
|
5620
|
+
steps_completed: i + replan.steps.length,
|
|
5621
|
+
navigated,
|
|
5622
|
+
planner_used: true,
|
|
5623
|
+
planner_reasoning: replan.reasoning
|
|
4668
5624
|
});
|
|
5625
|
+
} catch (replanErr) {
|
|
5626
|
+
return toolEnvelope({
|
|
5627
|
+
ok: false,
|
|
5628
|
+
summary: `compound step ${i + 1}/${decomposed.steps.length} failed; planner errored: ${replanErr instanceof Error ? replanErr.message : String(replanErr)}`,
|
|
5629
|
+
template: decomposed.template,
|
|
5630
|
+
steps_completed: i,
|
|
5631
|
+
failed_step: step.intent
|
|
5632
|
+
}, true);
|
|
4669
5633
|
}
|
|
4670
|
-
|
|
4671
|
-
|
|
4672
|
-
|
|
4673
|
-
picked
|
|
4674
|
-
}, true);
|
|
5634
|
+
if (typeof stepResult.action_taken === "string") summaries.push(`${stepResult.action_taken} (${step.intent})`);
|
|
5635
|
+
if (stepResult.navigated === true) navigated = true;
|
|
5636
|
+
completedSteps.push(step);
|
|
4675
5637
|
}
|
|
4676
|
-
return
|
|
5638
|
+
return toolEnvelope({
|
|
5639
|
+
ok: true,
|
|
5640
|
+
summary: decomposed.successSummary ?? summaries.join(" → "),
|
|
5641
|
+
template: decomposed.template,
|
|
5642
|
+
steps_completed: decomposed.steps.length,
|
|
5643
|
+
navigated
|
|
5644
|
+
});
|
|
5645
|
+
}
|
|
5646
|
+
},
|
|
5647
|
+
{
|
|
5648
|
+
toolNameHttp: "browser_observe",
|
|
5649
|
+
description: "Get a natural-language description of the current page's user-actionable state — what forms, buttons, links, and content sections are visible — in 2-4 sentences. Optional `intent` focuses the description on a region ('describe the login form', 'what's in the comments section'). Use this BEFORE browser_act when you don't know what's on the page, or AFTER navigation to confirm the page loaded. Cheaper than screenshots when text is enough. Does not include canvas/SVG content — those surface as a `hasVisualSurfaces` flag; switch to browser_screenshot for visuals.",
|
|
5650
|
+
inputSchema: {
|
|
5651
|
+
type: "object",
|
|
5652
|
+
required: ["tabId"],
|
|
5653
|
+
additionalProperties: false,
|
|
5654
|
+
properties: {
|
|
5655
|
+
tabId: { type: "number" },
|
|
5656
|
+
intent: {
|
|
5657
|
+
type: "string",
|
|
5658
|
+
description: "Optional natural-language focus ('describe the form', 'what's in the sidebar')."
|
|
5659
|
+
}
|
|
5660
|
+
}
|
|
5661
|
+
},
|
|
5662
|
+
capability: "browser_compound",
|
|
5663
|
+
async handler(args, signal) {
|
|
5664
|
+
const tabId = typeof args.tabId === "number" ? args.tabId : void 0;
|
|
5665
|
+
const intent = typeof args.intent === "string" ? args.intent : void 0;
|
|
5666
|
+
if (!tabId) return toolEnvelope({ error: "tabId required" }, true);
|
|
5667
|
+
return toolEnvelope(await observePage(await fetchSnapshot(tabId, signal), intent, signal));
|
|
4677
5668
|
}
|
|
4678
5669
|
},
|
|
4679
5670
|
{
|
|
@@ -4716,6 +5707,76 @@ const BROWSER_TOOLS = Object.freeze([
|
|
|
4716
5707
|
}
|
|
4717
5708
|
]);
|
|
4718
5709
|
/**
|
|
5710
|
+
* Run a single atomic intent step: fetch snapshot, run matcher
|
|
5711
|
+
* cascade (via pickElement), visual fallback on no-match, dispatch
|
|
5712
|
+
* the resolved action. Returns the standard MCP envelope.
|
|
5713
|
+
*
|
|
5714
|
+
* Pulled out of `browser_act`'s handler so the compound-intent loop
|
|
5715
|
+
* (decompose path) can call it per-step without duplicating the
|
|
5716
|
+
* snapshot + visual-fallback logic.
|
|
5717
|
+
*/
|
|
5718
|
+
async function runAtomicIntentStep(tabId, intent, value, signal) {
|
|
5719
|
+
const snapshot = await fetchSnapshot(tabId, signal);
|
|
5720
|
+
const picked = await pickElement(snapshot, intent, signal, value);
|
|
5721
|
+
if (!picked.ref || picked.confidence < .5) {
|
|
5722
|
+
const surfaces = snapshot.visualSurfaces;
|
|
5723
|
+
if (surfaces && surfaces.length > 0) {
|
|
5724
|
+
const shotEnv = await dispatchBrowserTool("browser_screenshot", {
|
|
5725
|
+
tabId,
|
|
5726
|
+
format: "png"
|
|
5727
|
+
}, signal);
|
|
5728
|
+
if (shotEnv.isError) return toolEnvelope({
|
|
5729
|
+
ok: false,
|
|
5730
|
+
error: "no text match; screenshot for visual fallback failed",
|
|
5731
|
+
picked
|
|
5732
|
+
}, true);
|
|
5733
|
+
const shotText = shotEnv.content?.[0]?.text;
|
|
5734
|
+
let shot = {};
|
|
5735
|
+
try {
|
|
5736
|
+
shot = shotText ? JSON.parse(shotText) : {};
|
|
5737
|
+
} catch {
|
|
5738
|
+
return toolEnvelope({
|
|
5739
|
+
ok: false,
|
|
5740
|
+
error: "no text match; screenshot envelope unparseable"
|
|
5741
|
+
}, true);
|
|
5742
|
+
}
|
|
5743
|
+
if (!shot.contentType || !shot.dataBase64) return toolEnvelope({
|
|
5744
|
+
ok: false,
|
|
5745
|
+
error: "no text match; screenshot envelope missing fields"
|
|
5746
|
+
}, true);
|
|
5747
|
+
const visual = await pickElementVisual(shot.dataBase64, shot.contentType, intent, surfaces, signal);
|
|
5748
|
+
if (visual.confidence < .5) return toolEnvelope({
|
|
5749
|
+
ok: false,
|
|
5750
|
+
error: "no element matched intent (text + visual)",
|
|
5751
|
+
picked,
|
|
5752
|
+
visual
|
|
5753
|
+
}, true);
|
|
5754
|
+
const clickEnv = await dispatchBrowserTool("browser_mouse", {
|
|
5755
|
+
tabId,
|
|
5756
|
+
action: "click",
|
|
5757
|
+
x: visual.x,
|
|
5758
|
+
y: visual.y,
|
|
5759
|
+
force: true
|
|
5760
|
+
}, signal);
|
|
5761
|
+
if (clickEnv.isError) return clickEnv;
|
|
5762
|
+
return toolEnvelope({
|
|
5763
|
+
ok: true,
|
|
5764
|
+
action_taken: "click_visual",
|
|
5765
|
+
x: visual.x,
|
|
5766
|
+
y: visual.y,
|
|
5767
|
+
confidence: visual.confidence,
|
|
5768
|
+
reason: visual.reason
|
|
5769
|
+
});
|
|
5770
|
+
}
|
|
5771
|
+
return toolEnvelope({
|
|
5772
|
+
ok: false,
|
|
5773
|
+
error: "no element matched intent",
|
|
5774
|
+
picked
|
|
5775
|
+
}, true);
|
|
5776
|
+
}
|
|
5777
|
+
return dispatchActionByRef(tabId, picked.ref, picked.action, picked.value ?? value, signal);
|
|
5778
|
+
}
|
|
5779
|
+
/**
|
|
4719
5780
|
* Dispatch an action against a known ref via the appropriate primitive.
|
|
4720
5781
|
* Shared between REF mode and INTENT-mode-text-match in `browser_act`.
|
|
4721
5782
|
* Returns an MCP envelope (text content + optional isError).
|
|
@@ -7309,6 +8370,27 @@ function workerToolsEnabled() {
|
|
|
7309
8370
|
function browserCompoundToolsEnabled() {
|
|
7310
8371
|
return compressorAvailable();
|
|
7311
8372
|
}
|
|
8373
|
+
/**
|
|
8374
|
+
* Gate for the L0/L1 power browser tools (`browser_read_page`,
|
|
8375
|
+
* `browser_mouse`, `browser_drag`, `browser_type`, `browser_keyboard`,
|
|
8376
|
+
* `browser_scroll`, `browser_eval_js`, `browser_diagnostics`,
|
|
8377
|
+
* `browser_find`, `browser_close_tab`, `browser_list_tabs`,
|
|
8378
|
+
* `browser_wait`, `browser_download`).
|
|
8379
|
+
*
|
|
8380
|
+
* Returns true iff `state.powerBrowseEnabled` (set by `--power-browse`
|
|
8381
|
+
* or `GH_ROUTER_ENABLE_POWER_BROWSE=1`). When off, the default
|
|
8382
|
+
* `--browse` surface exposes only the 6 lead-model tools (`act`,
|
|
8383
|
+
* `observe`, `extract`, `navigate`, `screenshot`, `open_tab`) that
|
|
8384
|
+
* hide DOM details behind intent. Power mode adds the raw primitives
|
|
8385
|
+
* for users who want direct coord/keystroke control.
|
|
8386
|
+
*
|
|
8387
|
+
* `handler.ts` filter chain ANDs this with `browserToolsEnabled()`
|
|
8388
|
+
* (defense-in-depth — power without basic is meaningless and the
|
|
8389
|
+
* setup path already forces basic on when power is on).
|
|
8390
|
+
*/
|
|
8391
|
+
function browserPowerToolsEnabled() {
|
|
8392
|
+
return state.powerBrowseEnabled === true;
|
|
8393
|
+
}
|
|
7312
8394
|
|
|
7313
8395
|
//#endregion
|
|
7314
8396
|
//#region src/routes/mcp/handler.ts
|
|
@@ -7438,17 +8520,21 @@ function browserToolsEnabled() {
|
|
|
7438
8520
|
return hasSupportedBrowserInstalled();
|
|
7439
8521
|
}
|
|
7440
8522
|
/**
|
|
7441
|
-
* The 1M-context Opus variant (`claude-opus-4.
|
|
7442
|
-
*
|
|
7443
|
-
* opus_critic prefers it so it can take large artifacts in one shot
|
|
8523
|
+
* The 1M-context Opus 4.6 variant (`claude-opus-4.6-1m`, `max_prompt_tokens`
|
|
8524
|
+
* 936K). opus_critic prefers it so it can take large artifacts in one shot
|
|
7444
8525
|
* (the whole point of pairing it with gpt-5.5 as the big-window peers);
|
|
7445
|
-
* falls back to the 200K `claude-opus-4-
|
|
7446
|
-
*
|
|
7447
|
-
|
|
7448
|
-
|
|
8526
|
+
* falls back to the 200K `claude-opus-4-6` when the catalog doesn't carry
|
|
8527
|
+
* a 1M 4.6 slug. The regex is version-anchored to 4.6 AND requires a
|
|
8528
|
+
* `-1m` suffix boundary (not a permissive `.*1m`), so it does NOT
|
|
8529
|
+
* false-positive on `claude-opus-4.7-1m-internal` (stand_in's pinned
|
|
8530
|
+
* 4.7 row), `claude-opus-4.6-1max` (hypothetical), or `claude-opus-4.8`
|
|
8531
|
+
* (1M-without-sibling). Tolerates dotted (`opus-4.6-1m`) and dashed
|
|
8532
|
+
* (`opus-4-6-1m`) catalog separators.
|
|
8533
|
+
*/
|
|
8534
|
+
const OPUS_1M_RE = /opus-4[.-]6-1m(?:$|-)/i;
|
|
7449
8535
|
function resolveOpusCriticModel() {
|
|
7450
8536
|
const oneM = state.models?.data?.find((m) => OPUS_1M_RE.test(m.id));
|
|
7451
|
-
return oneM ? oneM.id : "claude-opus-4-
|
|
8537
|
+
return oneM ? oneM.id : "claude-opus-4-6";
|
|
7452
8538
|
}
|
|
7453
8539
|
function activePersonas() {
|
|
7454
8540
|
return PERSONAS_READ.filter((p) => !p.requiresGeminiCatalog || geminiAvailable()).map((p) => p.toolNameHttp === "opus_critic" ? {
|
|
@@ -7486,6 +8572,7 @@ function toolEntries() {
|
|
|
7486
8572
|
if (t.capability === "stand_in") return standInToolEnabled();
|
|
7487
8573
|
if (t.capability === "browser") return browserToolsEnabled();
|
|
7488
8574
|
if (t.capability === "browser_compound") return browserToolsEnabled() && browserCompoundToolsEnabled();
|
|
8575
|
+
if (t.capability === "browser_power") return browserToolsEnabled() && browserPowerToolsEnabled();
|
|
7489
8576
|
return true;
|
|
7490
8577
|
}).map((t) => ({
|
|
7491
8578
|
name: t.toolNameHttp,
|
|
@@ -7543,6 +8630,8 @@ function toolError(message) {
|
|
|
7543
8630
|
* gpt-5.3-codex high on ~600B = 16.0s → ~64s on 12KB
|
|
7544
8631
|
* claude-opus-4-7 medium (thinking=3000) on a trivial prompt = 22.5s
|
|
7545
8632
|
* but model self-paces budget → ~50s+ on a real ~6KB review
|
|
8633
|
+
* (still applicable to stand_in's 4.7 row; opus_critic now runs on
|
|
8634
|
+
* 4.6 with similar empirical shape)
|
|
7546
8635
|
*
|
|
7547
8636
|
* Returns `{tooLong: true, capBytes}` when the (persona, effort, briefBytes)
|
|
7548
8637
|
* tuple is empirically predicted to bust the 60s ceiling.
|
|
@@ -7778,6 +8867,7 @@ async function handleToolsCall(body) {
|
|
|
7778
8867
|
if (nonPersonaTool && nonPersonaTool.capability === "stand_in" && !standInToolEnabled()) return rpcError(body.id, RPC_METHOD_NOT_FOUND, `tools/call: unknown tool "${name$1}"`);
|
|
7779
8868
|
if (nonPersonaTool && nonPersonaTool.capability === "browser" && !browserToolsEnabled()) return rpcError(body.id, RPC_METHOD_NOT_FOUND, `tools/call: unknown tool "${name$1}"`);
|
|
7780
8869
|
if (nonPersonaTool && nonPersonaTool.capability === "browser_compound" && !(browserToolsEnabled() && browserCompoundToolsEnabled())) return rpcError(body.id, RPC_METHOD_NOT_FOUND, `tools/call: unknown tool "${name$1}"`);
|
|
8870
|
+
if (nonPersonaTool && nonPersonaTool.capability === "browser_power" && !(browserToolsEnabled() && browserPowerToolsEnabled())) return rpcError(body.id, RPC_METHOD_NOT_FOUND, `tools/call: unknown tool "${name$1}"`);
|
|
7781
8871
|
let personaPrompt;
|
|
7782
8872
|
let personaContext;
|
|
7783
8873
|
let personaEffort;
|
|
@@ -11024,10 +12114,11 @@ function round2(n) {
|
|
|
11024
12114
|
* **xhigh on long-running personas works via SSE-streamed /mcp responses**
|
|
11025
12115
|
* (handler.ts:handleToolsCallSSE). Claude Code's MCP HTTP client honors
|
|
11026
12116
|
* `text/event-stream` responses without applying the ~60s per-tool-call
|
|
11027
|
-
* timer that previously broke xhigh on gpt-5.5 (~56s wall) and
|
|
11028
|
-
*
|
|
11029
|
-
*
|
|
11030
|
-
*
|
|
12117
|
+
* timer that previously broke xhigh on gpt-5.5 (~56s wall) and on
|
|
12118
|
+
* Anthropic Opus families (high+ thinking budgets). opus-critic itself
|
|
12119
|
+
* now runs on claude-opus-4-6 which doesn't advertise xhigh, so the
|
|
12120
|
+
* SSE long-tail concern there is moot; the SSE machinery still applies
|
|
12121
|
+
* to the other personas that do expose xhigh.
|
|
11031
12122
|
*/
|
|
11032
12123
|
const EFFORT_LEVELS = [
|
|
11033
12124
|
"low",
|
|
@@ -11205,9 +12296,9 @@ const PERSONAS_READ = Object.freeze([
|
|
|
11205
12296
|
{
|
|
11206
12297
|
agentName: "opus-critic",
|
|
11207
12298
|
toolNameHttp: "opus_critic",
|
|
11208
|
-
model: "claude-opus-4-
|
|
12299
|
+
model: "claude-opus-4-6",
|
|
11209
12300
|
endpoint: "/v1/messages",
|
|
11210
|
-
description: "Adversarial second opinion from a fresh-context Opus 4.
|
|
12301
|
+
description: "Adversarial second opinion from a fresh-context Opus 4.6 — same lab as the lead, limited blind-spot diversity vs cross-lab critics. On enterprise catalogs that carry Opus-4.6-1M it runs with a ≈936K-token input window; otherwise ≈168K. Pinned one minor behind the default Opus so the panel spans more of the version curve. Catches confabulation. Pass artifact verbatim.",
|
|
11211
12302
|
baseInstructions: OPUS_CRITIC_BASE,
|
|
11212
12303
|
agentPrompt: "",
|
|
11213
12304
|
writeCapable: false,
|
|
@@ -11215,10 +12306,9 @@ const PERSONAS_READ = Object.freeze([
|
|
|
11215
12306
|
allowedEfforts: [
|
|
11216
12307
|
"low",
|
|
11217
12308
|
"medium",
|
|
11218
|
-
"high"
|
|
11219
|
-
"xhigh"
|
|
12309
|
+
"high"
|
|
11220
12310
|
],
|
|
11221
|
-
defaultEffort: "
|
|
12311
|
+
defaultEffort: "high"
|
|
11222
12312
|
}
|
|
11223
12313
|
]);
|
|
11224
12314
|
const PERSONAS_WRITE = Object.freeze([{
|
|
@@ -11331,7 +12421,10 @@ function buildPeerAwarenessSnippet(opts) {
|
|
|
11331
12421
|
if (opts.workerToolsAvailable) para2Parts.push("`worker_explore` runs a Gemini-backed read-only worker that returns a summary, using its own context rather than yours; concurrent launches share the `MAX_INFLIGHT_TOOLS_CALL=8` cap with operator traffic.", "`worker_implement` is the same worker with edit/write/bash; `worktree: true` runs it in an isolated git worktree and returns the diff.", "Workers themselves have `code_search` in their toolset.");
|
|
11332
12422
|
para2Parts.push("`web_search` surfaces citable sources for docs, errors, and upstream issues.");
|
|
11333
12423
|
if (opts.standInAvailable) para2Parts.push("`stand_in` provides three-lab consensus for decision tiebreak when the user is unavailable.");
|
|
11334
|
-
if (opts.browseAvailable)
|
|
12424
|
+
if (opts.browseAvailable) {
|
|
12425
|
+
const powerNote = opts.powerBrowseAvailable ? " Power mode is on: the L0/L1 primitives (`browser_mouse`, `browser_drag`, `browser_type`, `browser_keyboard`, `browser_scroll`, `browser_eval_js`, `browser_read_page`, `browser_diagnostics`, `browser_find`) are also available for direct DOM / coordinate control." : "";
|
|
12426
|
+
para2Parts.push(`\`browser_*\` tools (under \`mcp__gh-router-peers__browser_*\`) drive a real Chrome / Edge browser via a local extension. Lead surface: \`browser_act(intent, value?)\` for any click / fill / type / scroll-to (an inner fast model resolves intent), \`browser_observe(intent?)\` for a 2-4 sentence natural-language page description, \`browser_extract(schema, instruction)\` for typed extraction, \`browser_navigate\` / \`browser_open_tab\` / \`browser_screenshot\` for state and visuals. The lead model never sees raw DOM: refs, bboxes, and role/name dumps stay internal.${powerNote}`);
|
|
12427
|
+
}
|
|
11335
12428
|
return [
|
|
11336
12429
|
"## Peer review and advisor",
|
|
11337
12430
|
"",
|
|
@@ -12794,7 +13887,7 @@ function initProxyFromEnv() {
|
|
|
12794
13887
|
//#endregion
|
|
12795
13888
|
//#region package.json
|
|
12796
13889
|
var name = "github-router";
|
|
12797
|
-
var version$1 = "0.3.
|
|
13890
|
+
var version$1 = "0.3.68";
|
|
12798
13891
|
|
|
12799
13892
|
//#endregion
|
|
12800
13893
|
//#region src/lib/approval.ts
|
|
@@ -14516,6 +15609,11 @@ async function setupAndServe(options) {
|
|
|
14516
15609
|
state.showToken = options.showToken;
|
|
14517
15610
|
state.extendedBetas = options.extendedBetas;
|
|
14518
15611
|
state.browseEnabled = options.browseEnabled || process.env.GH_ROUTER_ENABLE_BROWSE === "1";
|
|
15612
|
+
state.powerBrowseEnabled = options.powerBrowseEnabled || process.env.GH_ROUTER_ENABLE_POWER_BROWSE === "1";
|
|
15613
|
+
if (state.powerBrowseEnabled) state.browseEnabled = true;
|
|
15614
|
+
if (process.env.GH_ROUTER_BROWSER_NO_HUMANLIKE === "1") state.humanlikeForce = "off";
|
|
15615
|
+
else if (options.humanlikeEnabled || process.env.GH_ROUTER_HUMANLIKE === "1") state.humanlikeForce = "on";
|
|
15616
|
+
else state.humanlikeForce = "auto";
|
|
14519
15617
|
if (process.env.COPILOT_API_URL) state.copilotApiUrl = process.env.COPILOT_API_URL;
|
|
14520
15618
|
await ensurePaths();
|
|
14521
15619
|
await cacheVSCodeVersion();
|
|
@@ -14623,6 +15721,16 @@ const sharedServerArgs = {
|
|
|
14623
15721
|
type: "boolean",
|
|
14624
15722
|
default: false,
|
|
14625
15723
|
description: "Enable the browser-control MCP tools (browser_open_tab, browser_screenshot, browser_click, etc.) on /mcp. Requires Chrome or Edge installed; the bundled extension must be loaded on first tool call (the proxy returns install_required with Web Store URLs + a Load Unpacked fallback path). Off by default; can also be enabled with GH_ROUTER_ENABLE_BROWSE=1."
|
|
15724
|
+
},
|
|
15725
|
+
"power-browse": {
|
|
15726
|
+
type: "boolean",
|
|
15727
|
+
default: false,
|
|
15728
|
+
description: "Expose the full ~18-tool browser MCP surface (raw read_page, mouse / drag / scroll / keyboard / type primitives, eval_js, diagnostics, find, locate). Default --browse exposes only the 6 lead-model tools (act, observe, extract, navigate, screenshot, open_tab) that hide DOM details behind intent. Implies --browse. Off by default; can also be enabled with GH_ROUTER_ENABLE_POWER_BROWSE=1."
|
|
15729
|
+
},
|
|
15730
|
+
humanlike: {
|
|
15731
|
+
type: "boolean",
|
|
15732
|
+
default: false,
|
|
15733
|
+
description: "Force humanlike pacing on ALL browser tool dispatches: Beta-distributed inter-action delays (800-4600 ms), Bezier mouse trajectories with overshoot-and-correct, per-keystroke jitter with word-end pauses, scroll chunking. Use for known anti-bot sites (Cloudflare, Datadome). Off by default (auto mode); GH_ROUTER_HUMANLIKE=1 is the env equivalent. GH_ROUTER_BROWSER_NO_HUMANLIKE=1 hard-disables (wins over --humanlike, for tests)."
|
|
14626
15734
|
}
|
|
14627
15735
|
};
|
|
14628
15736
|
const allowedAccountTypes = new Set([
|
|
@@ -14660,7 +15768,9 @@ function parseSharedArgs(args) {
|
|
|
14660
15768
|
showToken: args["show-token"],
|
|
14661
15769
|
proxyEnv: args["proxy-env"],
|
|
14662
15770
|
extendedBetas: args["extended-betas"],
|
|
14663
|
-
browseEnabled: args.browse
|
|
15771
|
+
browseEnabled: args.browse,
|
|
15772
|
+
powerBrowseEnabled: args["power-browse"],
|
|
15773
|
+
humanlikeEnabled: args.humanlike
|
|
14664
15774
|
};
|
|
14665
15775
|
}
|
|
14666
15776
|
/**
|
|
@@ -14714,10 +15824,10 @@ function getClaudeCodeEnvVars(serverUrl, model) {
|
|
|
14714
15824
|
DISABLE_TELEMETRY: "1"
|
|
14715
15825
|
};
|
|
14716
15826
|
if (model) vars.ANTHROPIC_MODEL = model;
|
|
14717
|
-
if (process.env.ANTHROPIC_SMALL_FAST_MODEL === void 0) vars.ANTHROPIC_SMALL_FAST_MODEL = "claude-
|
|
15827
|
+
if (process.env.ANTHROPIC_SMALL_FAST_MODEL === void 0) vars.ANTHROPIC_SMALL_FAST_MODEL = "claude-sonnet-4-6";
|
|
14718
15828
|
if (process.env.ANTHROPIC_DEFAULT_SONNET_MODEL === void 0) vars.ANTHROPIC_DEFAULT_SONNET_MODEL = "claude-sonnet-4-6";
|
|
14719
15829
|
if (process.env.ANTHROPIC_DEFAULT_HAIKU_MODEL === void 0) vars.ANTHROPIC_DEFAULT_HAIKU_MODEL = "claude-haiku-4-5";
|
|
14720
|
-
if (process.env.ANTHROPIC_DEFAULT_OPUS_MODEL === void 0) vars.ANTHROPIC_DEFAULT_OPUS_MODEL = "claude-opus-4-
|
|
15830
|
+
if (process.env.ANTHROPIC_DEFAULT_OPUS_MODEL === void 0) vars.ANTHROPIC_DEFAULT_OPUS_MODEL = "claude-opus-4-8";
|
|
14721
15831
|
if (process.env.CLAUDE_CODE_PLAN_V2_AGENT_COUNT === void 0) vars.CLAUDE_CODE_PLAN_V2_AGENT_COUNT = "7";
|
|
14722
15832
|
for (const key of [
|
|
14723
15833
|
"CLAUDE_CODE_ENABLE_EXPERIMENTAL_ADVISOR_TOOL",
|
|
@@ -14900,7 +16010,8 @@ const claude = defineCommand({
|
|
|
14900
16010
|
geminiAvailable: geminiAvailable$1,
|
|
14901
16011
|
workerToolsAvailable: workerToolsEnabled(),
|
|
14902
16012
|
standInAvailable: standInToolEnabled(),
|
|
14903
|
-
browseAvailable: state.browseEnabled
|
|
16013
|
+
browseAvailable: state.browseEnabled,
|
|
16014
|
+
powerBrowseAvailable: state.powerBrowseEnabled
|
|
14904
16015
|
});
|
|
14905
16016
|
extraArgs.push("--append-system-prompt", peerSnippet);
|
|
14906
16017
|
try {
|