npm - github-router - Versions diffs - 0.3.52 → 0.3.66 - Mend

github-router 0.3.52 → 0.3.66

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

package/dist/browser-bridge/index.js +17 -1
package/dist/browser-ext/background.js +358 -1
package/dist/browser-ext/manifest.json +2 -1
package/dist/browser-ext/snapshot-cdp.js +438 -0
package/dist/browser-ext/snapshot.js +101 -0
package/dist/main.js +1148 -69
package/dist/main.js.map +1 -1
package/package.json +18 -18

package/dist/main.js CHANGED Viewed

@@ -45,6 +45,8 @@ const state = {
 	showToken: false,
 	extendedBetas: false,
 	browseEnabled: false,
+	powerBrowseEnabled: false,
+	humanlikeForce: "auto",
 	sessionId: randomUUID(),
 	machineId: randomBytes(32).toString("hex")
 };
@@ -3076,6 +3078,58 @@ function installRequiredToolResult(payload) {
 	};
 }
+//#endregion
+//#region src/lib/browser-mcp/humanlike.ts
+/**
+* Sample from a Beta(2, 5) distribution scaled to [minMs, maxMs].
+* The Beta(2, 5) shape has its mode near 0.2 of the range — humans
+* follow most actions quickly, with an occasional long pause. We do
+* NOT use uniform random because that would produce robotically-
+* even spacing detectable by behavioral analysis.
+*
+* Implementation: two gamma-distributed samples via the Marsaglia /
+* Tsang squeeze method (Box-Muller-style sufficiency for shape ≥ 2).
+*/
+function betaDelay(minMs, maxMs) {
+	const a = gammaSample(2);
+	const beta = a / (a + gammaSample(5));
+	return Math.round(minMs + beta * (maxMs - minMs));
+}
+function gammaSample(shape) {
+	const d = shape - 1 / 3;
+	const c = 1 / Math.sqrt(9 * d);
+	while (true) {
+		let x, v;
+		do {
+			x = normalSample();
+			v = 1 + c * x;
+		} while (v <= 0);
+		v = v * v * v;
+		const u = Math.random();
+		if (u < 1 - .0331 * x * x * x * x) return d * v;
+		if (Math.log(u) < .5 * x * x + d * (1 - v + Math.log(v))) return d * v;
+	}
+}
+function normalSample() {
+	let u = 0, v = 0;
+	while (u === 0) u = Math.random();
+	while (v === 0) v = Math.random();
+	return Math.sqrt(-2 * Math.log(u)) * Math.cos(2 * Math.PI * v);
+}
+/**
+* Inter-action delay when paced mode is on. Returns a Beta-shaped
+* randomized delay in [800, 4600] ms with a base of 600 ms so the
+* minimum is never "too fast." Humans take 800-2800 ms between
+* UI actions on average, with a tail of long pauses; this matches.
+*
+* Caller is expected to subtract the time already burned in the
+* compound pipeline (snapshot fetch + matcher cascade) so the user-
+* perceived delay isn't doubled.
+*/
+function interActionDelay() {
+	return betaDelay(800, 4600);
+}
 //#endregion
 //#region src/lib/browser-mcp/policy.ts
 const BLOCKED_URL_RE = /^(chrome|edge|brave|opera|vivaldi):\/\/(settings|preferences|extensions|policy|management|password|flags|flag-descriptions)/i;
@@ -3110,6 +3164,78 @@ function preflightUrlPolicy(toolName, args) {
 //#endregion
 //#region src/lib/browser-mcp/dispatch.ts
+/**
+* Tools whose dispatch counts as a mutating user action for pacing
+* purposes. Read-only tools (list_tabs, screenshot, read_page,
+* diagnostics, navigate-without-form-submit) skip the inter-action
+* delay because they don't look like a human clicking around.
+*/
+const PACED_TOOLS = new Set([
+	"browser_click",
+	"browser_fill",
+	"browser_type",
+	"browser_keyboard",
+	"browser_scroll",
+	"browser_mouse",
+	"browser_drag"
+]);
+let lastDispatchAt = 0;
+let humanlikeAutoCache = {
+	fetchedAt: 0,
+	tabs: /* @__PURE__ */ new Set()
+};
+const HUMANLIKE_PROBE_INTERVAL_MS = 5e3;
+async function isHumanlikeAutoOn(tabId, signal) {
+	if (state.humanlikeForce === "off") return false;
+	if (typeof tabId !== "number") return false;
+	const now = Date.now();
+	if (now - humanlikeAutoCache.fetchedAt > HUMANLIKE_PROBE_INTERVAL_MS) try {
+		const ready = await ensureBridgeReady();
+		if (ready.install_required) return false;
+		const res = await fetch(`http://127.0.0.1:${ready.port}/health`, {
+			headers: { authorization: `Bearer ${ready.token}` },
+			signal
+		});
+		if (res.ok) {
+			const body = await res.json();
+			const tabs = /* @__PURE__ */ new Set();
+			for (const t of body.humanlike_tabs ?? []) if (typeof t.tabId === "number") tabs.add(t.tabId);
+			humanlikeAutoCache = {
+				fetchedAt: now,
+				tabs
+			};
+		}
+	} catch {}
+	return humanlikeAutoCache.tabs.has(tabId);
+}
+async function maybeInjectHumanlikeDelay(tool, signal, tabId) {
+	if (!PACED_TOOLS.has(tool)) return;
+	let on = state.humanlikeForce === "on";
+	if (!on && state.humanlikeForce === "auto") on = await isHumanlikeAutoOn(tabId, signal);
+	if (!on) return;
+	const target = interActionDelay();
+	const sinceLast = Date.now() - lastDispatchAt;
+	const wait = Math.max(0, target - sinceLast);
+	if (wait > 0) await sleepAbortable(wait, signal);
+	lastDispatchAt = Date.now();
+}
+function sleepAbortable(ms, signal) {
+	return new Promise((resolve, reject) => {
+		if (signal?.aborted) {
+			reject(/* @__PURE__ */ new Error("aborted"));
+			return;
+		}
+		const timer = setTimeout(() => {
+			if (signal) signal.removeEventListener("abort", onAbort);
+			resolve();
+		}, ms);
+		const onAbort = () => {
+			clearTimeout(timer);
+			reject(/* @__PURE__ */ new Error("aborted"));
+		};
+		if (signal) signal.addEventListener("abort", onAbort, { once: true });
+	});
+}
 const PER_TOOL_TIMEOUTS = {
 	browser_list_tabs: {
 		defaultMs: 5e3,
@@ -3275,6 +3401,7 @@ async function dispatchBrowserTool(tool, args, signal, opts = {}) {
 	};
 	const ready = await ensureBridgeReady();
 	if (ready.install_required) return installRequiredToolResult(ready);
+	await maybeInjectHumanlikeDelay(tool, signal, typeof args.tabId === "number" ? args.tabId : void 0);
 	const { defaultMs, maxMs } = pickTimeout(tool);
 	const callerTimeout = typeof opts.timeoutMs === "number" && opts.timeoutMs > 0 ? Math.min(opts.timeoutMs, maxMs) : defaultMs;
 	try {
@@ -3355,6 +3482,517 @@ function logAudit$1(record) {
 	})();
 }
+//#endregion
+//#region src/lib/browser-mcp/matcher.ts
+/**
+* Resolve an intent to an action. Synchronous, no I/O, <5ms expected
+* on a 200-element snapshot.
+*
+* Returns `{source: "escalate"}` when no layer produced a single
+* confident candidate. Caller is expected to invoke the fast-model
+* fallback path with the returned `candidates` shortlist (smaller
+* than the full snapshot, so fast-model token cost drops 3-5×).
+*/
+function deterministicResolve(snapshot, parsed, value) {
+	const v = value ?? parsed.valueFromIntent;
+	const allCandidates = [];
+	for (const layer of LAYERS) {
+		const found = layer.run(snapshot, parsed, v);
+		if (found.length === 0) continue;
+		allCandidates.push(...found);
+		const winners = applyTieBreakers(found, parsed);
+		const top = winners[0];
+		if (!top) continue;
+		const runnerUp = winners[1];
+		if (top.score >= layer.floor && (!runnerUp || top.score - runnerUp.score >= .15)) {
+			const action = inferActionLocal(top.el.role, parsed, v);
+			return {
+				ref: top.el.ref,
+				action,
+				...needsValue(action) && v !== void 0 ? { value: v } : {},
+				confidence: top.score,
+				source: layer.name,
+				reason: top.reason
+			};
+		}
+	}
+	const shortlist = dedupeAndRank(allCandidates).slice(0, 8);
+	return {
+		ref: "",
+		action: parsed.verb ?? "click",
+		...v !== void 0 ? { value: v } : {},
+		confidence: 0,
+		source: "escalate",
+		reason: shortlist.length === 0 ? "no candidates from any cascade layer" : `${shortlist.length} ambiguous candidates`,
+		candidates: shortlist.map((c) => ({
+			ref: c.el.ref,
+			score: c.score,
+			layer: c.layer
+		}))
+	};
+}
+function applyTieBreakers(cands, parsed) {
+	const verb = parsed.verb ?? "click";
+	const dropDisabled = verb === "click" || verb === "fill" || verb === "type" || verb === "select";
+	return cands.filter((c) => {
+		if (c.el.hidden) return false;
+		if (c.el.bbox && (c.el.bbox[2] < 4 || c.el.bbox[3] < 4)) return false;
+		if (dropDisabled && c.el.disabled) return false;
+		return true;
+	}).map((c) => ({
+		...c,
+		score: c.score * weight(c, verb)
+	})).sort((a, b) => b.score - a.score);
+}
+function weight(c, verb) {
+	let w = 1;
+	const bbox = c.el.bbox;
+	if (bbox) {
+		if (!(bbox[0] >= 0 && bbox[1] >= 0)) w *= .92;
+	}
+	if (c.el.isInIframe) w *= .95;
+	if (verb === "click") {
+		const r = (c.el.role || "").toLowerCase();
+		if (r === "button") w *= 1;
+		else if (r === "link" || r === "a") w *= .98;
+		else if (r === "menuitem") w *= .96;
+		else if (r === "generic" || r === "div" || r === "span") w *= .9;
+	}
+	return Math.min(1, w);
+}
+function dedupeAndRank(cands) {
+	const byRef = /* @__PURE__ */ new Map();
+	for (const c of cands) {
+		const existing = byRef.get(c.el.ref);
+		if (!existing || existing.score < c.score) byRef.set(c.el.ref, c);
+	}
+	return [...byRef.values()].sort((a, b) => b.score - a.score);
+}
+function inferActionLocal(role, parsed, value) {
+	if (parsed.verb === "scroll_into_view") return "scroll_into_view";
+	const intentLower = parsed.rawTarget.toLowerCase();
+	if (/\bscroll\b/.test(intentLower)) return "scroll_into_view";
+	const r = (role || "").toLowerCase();
+	if (r === "select" || r === "combobox") return "select";
+	if (r === "textarea" || r === "input" || r === "textbox" || r === "searchbox" || r === "spinbutton") {
+		if (parsed.verb === "type") return "type";
+		if (parsed.verb === "fill") return "fill";
+		return value !== void 0 ? "fill" : "click";
+	}
+	return parsed.verb ?? "click";
+}
+function needsValue(action) {
+	return action === "fill" || action === "type" || action === "select";
+}
+function nameOf(el) {
+	return (el.name ?? "").trim();
+}
+function nameLowerOf(el) {
+	return nameOf(el).toLowerCase();
+}
+function isClickableRole(role) {
+	const r = role.toLowerCase();
+	return r === "button" || r === "link" || r === "a" || r === "menuitem" || r === "tab" || r === "checkbox" || r === "radio" || r === "switch" || r === "option" || r === "treeitem";
+}
+function isInputRole(role) {
+	const r = role.toLowerCase();
+	return r === "textbox" || r === "input" || r === "textarea" || r === "searchbox" || r === "spinbutton" || r === "combobox" || r === "select" || r === "checkbox" || r === "radio";
+}
+function verbCompatible(role, verb) {
+	if (!verb || verb === "click") return isClickableRole(role) || isInputRole(role);
+	if (verb === "fill" || verb === "type" || verb === "select") return isInputRole(role);
+	return true;
+}
+function wholeWordContains(haystack, needle) {
+	if (!haystack || !needle) return false;
+	return new RegExp(`\\b${needle.replace(/[.*+?^${}()|[\]\\]/g, "\\$&")}\\b`, "i").test(haystack);
+}
+const L0 = {
+	name: "L0",
+	floor: .95,
+	run: (snapshot, parsed) => {
+		const target = parsed.quotedName ?? parsed.normTarget;
+		if (!target) return [];
+		const out = [];
+		for (const el of snapshot.elements) {
+			if (!verbCompatible(el.role, parsed.verb)) continue;
+			const nm = nameLowerOf(el);
+			if (!nm) continue;
+			if (nm === target.toLowerCase()) out.push({
+				el,
+				score: 1,
+				layer: "L0",
+				reason: `L0 exact name "${el.name}"`
+			});
+		}
+		return out;
+	}
+};
+const L1 = {
+	name: "L1",
+	floor: .9,
+	run: (snapshot, parsed) => {
+		if (parsed.verb && parsed.verb !== "fill" && parsed.verb !== "type" && parsed.verb !== "select") return [];
+		const target = parsed.fieldHint ?? parsed.normTarget;
+		if (!target) return [];
+		const tgt = target.toLowerCase();
+		const out = [];
+		for (const el of snapshot.elements) {
+			if (!isInputRole(el.role)) continue;
+			const nm = nameLowerOf(el);
+			if (nm === tgt || nm === `${tgt} *` || nm === `${tgt} (required)` || nm.endsWith(tgt) && /^[\s*()required:_-]+/.test(nm.slice(0, nm.length - tgt.length))) out.push({
+				el,
+				score: .95,
+				layer: "L1",
+				reason: `L1 label "${el.name}"`
+			});
+		}
+		return out;
+	}
+};
+const L2 = {
+	name: "L2",
+	floor: .7,
+	run: (snapshot, parsed) => {
+		const target = parsed.fieldHint ?? parsed.normTarget;
+		if (!target) return [];
+		const tgt = target.toLowerCase();
+		const out = [];
+		for (const el of snapshot.elements) {
+			if (!isInputRole(el.role)) continue;
+			const ph = (el.placeholder ?? "").toLowerCase();
+			if (!ph) continue;
+			if (ph === tgt) out.push({
+				el,
+				score: .85,
+				layer: "L2",
+				reason: `L2 placeholder exact "${el.placeholder}"`
+			});
+			else if (wholeWordContains(ph, tgt)) out.push({
+				el,
+				score: .75,
+				layer: "L2",
+				reason: `L2 placeholder contains "${tgt}"`
+			});
+		}
+		return out;
+	}
+};
+const L3 = {
+	name: "L3",
+	floor: .65,
+	run: (snapshot, parsed) => {
+		const target = parsed.normTarget;
+		if (!target) return [];
+		const out = [];
+		for (const el of snapshot.elements) {
+			if (!verbCompatible(el.role, parsed.verb)) continue;
+			const nm = nameOf(el);
+			if (!nm) continue;
+			if (!wholeWordContains(nm, target)) continue;
+			const score = target.length / nm.length >= .8 ? .72 : .68;
+			out.push({
+				el,
+				score,
+				layer: "L3",
+				reason: `L3 fuzzy name "${nm}"`
+			});
+		}
+		return out;
+	}
+};
+const L4 = {
+	name: "L4",
+	floor: .6,
+	run: (snapshot, parsed) => {
+		const target = parsed.normTarget;
+		if (!target) return [];
+		const out = [];
+		for (const el of snapshot.elements) {
+			if (!isClickableRole(el.role)) continue;
+			const text = (el.value ?? "").toLowerCase().trim();
+			if (!text) continue;
+			const tgt = target.toLowerCase();
+			if (text === tgt) out.push({
+				el,
+				score: .65,
+				layer: "L4",
+				reason: `L4 text exact "${el.value}"`
+			});
+			else if (wholeWordContains(text, tgt)) out.push({
+				el,
+				score: .6,
+				layer: "L4",
+				reason: `L4 text contains "${tgt}"`
+			});
+		}
+		return out;
+	}
+};
+const L5 = {
+	name: "L5",
+	floor: .85,
+	run: (snapshot, parsed) => {
+		const target = parsed.normTarget;
+		if (!target) return [];
+		if (!/^[a-z][a-z0-9_-]{2,}$/i.test(target)) return [];
+		const norm = target.toLowerCase().replace(/[-_]/g, "");
+		const out = [];
+		for (const el of snapshot.elements) {
+			const attrs = el.attrs;
+			if (!attrs) continue;
+			if (attrs.testid && stripSep(attrs.testid).toLowerCase() === norm) {
+				out.push({
+					el,
+					score: .9,
+					layer: "L5",
+					reason: `L5 testid="${attrs.testid}"`
+				});
+				continue;
+			}
+			if (attrs.id && stripSep(attrs.id).toLowerCase() === norm) {
+				out.push({
+					el,
+					score: .88,
+					layer: "L5",
+					reason: `L5 id="${attrs.id}"`
+				});
+				continue;
+			}
+			if (attrs.name_attr && stripSep(attrs.name_attr).toLowerCase() === norm) {
+				out.push({
+					el,
+					score: .86,
+					layer: "L5",
+					reason: `L5 name="${attrs.name_attr}"`
+				});
+				continue;
+			}
+			if (attrs.aria_label && stripSep(attrs.aria_label).toLowerCase() === norm) out.push({
+				el,
+				score: .86,
+				layer: "L5",
+				reason: `L5 aria-label="${attrs.aria_label}"`
+			});
+		}
+		return out;
+	}
+};
+function stripSep(s) {
+	return s.replace(/[-_\s]/g, "");
+}
+const LAYERS = [
+	L0,
+	L1,
+	L2,
+	L3,
+	L4,
+	L5,
+	{
+		name: "L6",
+		floor: .75,
+		run: (snapshot, parsed) => {
+			if (!parsed.ordinal) return [];
+			const { n, kind } = parsed.ordinal;
+			const candidates = snapshot.elements.filter((el) => {
+				if (!kind) return true;
+				const role = el.role.toLowerCase();
+				return role === kind || role === `${kind}s` || (el.tag ?? "").toLowerCase() === kind;
+			});
+			if (candidates.length < Math.abs(n)) return [];
+			const sorted = [...candidates].sort((a, b) => {
+				const ay = Math.floor(a.bbox[1] / 24);
+				const by = Math.floor(b.bbox[1] / 24);
+				if (ay !== by) return ay - by;
+				return a.bbox[0] - b.bbox[0];
+			});
+			const idx = n === -1 ? sorted.length - 1 : n - 1;
+			if (idx < 0 || idx >= sorted.length) return [];
+			return [{
+				el: sorted[idx],
+				score: .8,
+				layer: "L6",
+				reason: `L6 ordinal pick #${n} of ${sorted.length} ${kind ?? "elements"}`
+			}];
+		}
+	},
+	{
+		name: "L7",
+		floor: .5,
+		run: (snapshot, parsed) => {
+			const hint = parsed.fieldHint ?? parsed.normTarget;
+			if (!hint) return [];
+			const h = hint.toLowerCase();
+			const out = [];
+			const inputRolePred = (el) => isInputRole(el.role);
+			if (h === "email") {
+				for (const el of snapshot.elements) if (el.inputType === "email" || inputRolePred(el) && (wholeWordContains(el.placeholder ?? "", "email") || wholeWordContains(el.name ?? "", "email"))) out.push({
+					el,
+					score: .55,
+					layer: "L7",
+					reason: "L7 email heuristic"
+				});
+			} else if (h === "password") {
+				for (const el of snapshot.elements) if (el.inputType === "password" || inputRolePred(el) && wholeWordContains(el.name ?? "", "password")) out.push({
+					el,
+					score: .55,
+					layer: "L7",
+					reason: "L7 password heuristic"
+				});
+			} else if (h === "search") {
+				for (const el of snapshot.elements) if (el.role === "searchbox" || el.inputType === "search" || inputRolePred(el) && wholeWordContains(el.name ?? "", "search")) out.push({
+					el,
+					score: .55,
+					layer: "L7",
+					reason: "L7 search heuristic"
+				});
+			} else if (h === "phone" || h === "tel") {
+				for (const el of snapshot.elements) if (el.inputType === "tel" || inputRolePred(el) && wholeWordContains(el.name ?? "", "phone")) out.push({
+					el,
+					score: .55,
+					layer: "L7",
+					reason: "L7 phone heuristic"
+				});
+			} else if (h === "submit" || h === "sign in" || h === "signin" || h === "log in" || h === "login") {
+				const sumRe = /^(submit|send|continue|next|save|sign[\s-]?in|sign[\s-]?up|log[\s-]?in)$/i;
+				for (const el of snapshot.elements) if (el.role === "button" && sumRe.test(el.name ?? "")) out.push({
+					el,
+					score: .55,
+					layer: "L7",
+					reason: "L7 submit heuristic"
+				});
+			} else if (h === "username" || h === "user") {
+				for (const el of snapshot.elements) if (inputRolePred(el) && (wholeWordContains(el.name ?? "", "user") || wholeWordContains(el.name ?? "", "login") || wholeWordContains(el.name ?? "", "account"))) out.push({
+					el,
+					score: .55,
+					layer: "L7",
+					reason: "L7 username heuristic"
+				});
+			}
+			return out;
+		}
+	}
+];
+//#endregion
+//#region src/lib/browser-mcp/parse-intent.ts
+const VERB_RE = /^\s*(click|press|tap|fill|enter|type|select|choose|scroll(?:[ -]?into[ -]?view)?|toggle|check|uncheck|open|focus|hover)\s+/i;
+const VALUE_RE = /\s+(?:with|to|=)\s+(.+?)\s*$/i;
+const QUOTED_RE = /["'`]([^"'`]+)["'`]/;
+const TITLE_CASE_RE = /\b([A-Z][\w]*(?:\s+[A-Z\d][\w]*){0,3})\b/;
+const ORDINAL_WORDS = {
+	first: 1,
+	second: 2,
+	third: 3,
+	fourth: 4,
+	fifth: 5,
+	sixth: 6,
+	seventh: 7,
+	eighth: 8,
+	ninth: 9,
+	tenth: 10,
+	last: -1
+};
+const ORDINAL_WORD_RE = /\b(first|second|third|fourth|fifth|sixth|seventh|eighth|ninth|tenth|last)\s+(\w+)/i;
+const ORDINAL_NUM_RE = /\b(\d+)(?:st|nd|rd|th)?\s+(\w+)/i;
+const FIELD_HINT_KINDS = [
+	"field",
+	"input",
+	"textbox",
+	"box",
+	"search",
+	"dropdown",
+	"select",
+	"menu",
+	"button",
+	"link",
+	"tab",
+	"checkbox",
+	"radio",
+	"switch"
+];
+const FIELD_HINT_RE = new RegExp(`\\b(\\w+)\\s+(?:${FIELD_HINT_KINDS.join("|")})\\b`, "i");
+const ARTICLES_RE = /\b(the|a|an|this|that)\b/gi;
+/**
+* Parse a natural-language intent into structured parts.
+*
+* Returns a fully-formed `ParsedIntent` even for unparseable inputs
+* (rawTarget = the trimmed intent, normTarget = its lowercased
+* normalization, every other field undefined). The matcher cascade
+* handles "I don't know what to do" by falling through layer-by-
+* layer until L7 or escalate; an unparseable intent simply has
+* less signal for the layers to key on.
+*/
+function parseIntent(intent) {
+	let work = String(intent ?? "").trim();
+	let verb;
+	const verbMatch = VERB_RE.exec(work);
+	if (verbMatch) {
+		verb = mapVerb(verbMatch[1]);
+		work = work.slice(verbMatch[0].length);
+	}
+	let valueFromIntent;
+	const valueMatch = VALUE_RE.exec(work);
+	if (valueMatch) {
+		valueFromIntent = valueMatch[1].trim();
+		work = work.slice(0, valueMatch.index).trim();
+	}
+	let quotedName;
+	const quotedMatch = QUOTED_RE.exec(work);
+	if (quotedMatch) quotedName = quotedMatch[1].trim();
+	else {
+		const titleMatch = TITLE_CASE_RE.exec(work);
+		if (titleMatch) quotedName = titleMatch[1].trim();
+	}
+	let ordinal;
+	const ordWordMatch = ORDINAL_WORD_RE.exec(work);
+	if (ordWordMatch) {
+		const n = ORDINAL_WORDS[ordWordMatch[1].toLowerCase()];
+		if (typeof n === "number") ordinal = {
+			n,
+			kind: ordWordMatch[2].toLowerCase()
+		};
+	} else {
+		const ordNumMatch = ORDINAL_NUM_RE.exec(work);
+		if (ordNumMatch) ordinal = {
+			n: Number.parseInt(ordNumMatch[1], 10),
+			kind: ordNumMatch[2].toLowerCase()
+		};
+	}
+	let fieldHint;
+	const fieldMatch = FIELD_HINT_RE.exec(work);
+	if (fieldMatch) fieldHint = fieldMatch[1].toLowerCase();
+	const rawTarget = work.trim();
+	let normTarget = rawTarget.toLowerCase().replace(ARTICLES_RE, "").replace(/\s+/g, " ").trim();
+	for (const kind of FIELD_HINT_KINDS) {
+		const tail = new RegExp(`\\s+${kind}$`, "i");
+		if (tail.test(normTarget)) {
+			normTarget = normTarget.replace(tail, "").trim();
+			break;
+		}
+	}
+	if (ordinal) normTarget = normTarget.replace(/^(\d+(?:st|nd|rd|th)?|first|second|third|fourth|fifth|sixth|seventh|eighth|ninth|tenth|last)\s+/i, "").trim();
+	const out = {
+		rawTarget,
+		normTarget
+	};
+	if (verb) out.verb = verb;
+	if (quotedName) out.quotedName = quotedName;
+	if (fieldHint) out.fieldHint = fieldHint;
+	if (ordinal) out.ordinal = ordinal;
+	if (valueFromIntent !== void 0) out.valueFromIntent = valueFromIntent;
+	return out;
+}
+function mapVerb(raw) {
+	const v = raw.toLowerCase();
+	if (v === "click" || v === "press" || v === "tap" || v === "toggle" || v === "check" || v === "uncheck" || v === "open") return "click";
+	if (v === "fill" || v === "enter") return "fill";
+	if (v === "type") return "type";
+	if (v === "select" || v === "choose") return "select";
+	if (v === "scroll" || v === "scrollintoview" || v === "scroll into view" || v === "scroll-into-view") return "scroll_into_view";
+	if (v === "hover" || v === "focus") return void 0;
+}
 //#endregion
 //#region src/lib/mcp-inflight.ts
 /**
@@ -3659,6 +4297,21 @@ async function callCompressor(systemPrompt, userMessage, tool, signal) {
 	}
 }
 /**
+* Public re-export of `callCompressor` for sibling modules that need
+* the same forced-tool-calling pipeline (slot acquisition, fallback-
+* chain backend, code-fence stripping). Used by `observe.ts` to drive
+* the natural-language describer through the same backend the matcher
+* cascade escalates to, and by `decompose-planner.ts` for the
+* fast-model compound-step replanner.
+*
+* Kept as a thin wrapper rather than re-exporting `callCompressor`
+* directly so the underlying function can change signature without
+* breaking the public surface.
+*/
+async function callCompressorPublic(systemPrompt, userMessage, tool, signal) {
+	return callCompressor(systemPrompt, userMessage, tool, signal);
+}
+/**
 * Strip a single leading / trailing ``` (or ```json) code fence from a
 * model's free-form text reply so JSON.parse works. Idempotent on
 * fence-free input. Defensive against the failure mode caught in PR #55
@@ -3680,12 +4333,32 @@ function stripCodeFence(text) {
 * whether the intent supplied a value. Single source of truth for
 * element matching.
 *
+* Phase 2 short-circuits the common case through the deterministic
+* matcher cascade in `./matcher.ts` — pure-sync, no LLM round-trip,
+* <5ms on a 200-element snapshot. Only when the cascade returns
+* `source: "escalate"` (0 candidates or >1 ambiguous candidates) do
+* we fall through to the existing fast-model `pickMatchingElements`
+* path. When we DO escalate, we pass the cascade's pre-filtered
+* top-K shortlist along so the fast model sees ~8 candidates instead
+* of the full 200-element snapshot — 3-5× token-cost reduction even
+* on misses.
+*
 * Returns ref="" + confidence=0 when no element matches — caller
 * should escalate to visual fallback (when `visualSurfaces` is
 * present) or surface the miss to the lead model.
 */
 async function pickElement(snapshot, intent, signal, value) {
-	const matches = await pickMatchingElements(snapshot, intent, signal);
+	const det = deterministicResolve(snapshot, parseIntent(intent), value);
+	if (det.source !== "escalate" && det.ref !== "") {
+		const out$1 = {
+			ref: det.ref,
+			action: det.action,
+			confidence: det.confidence
+		};
+		if (det.value !== void 0) out$1.value = det.value;
+		return out$1;
+	}
+	const matches = await pickMatchingElements(snapshot, intent, signal, det.candidates);
 	if (matches.length === 0) return {
 		ref: "",
 		action: "click",
@@ -3756,9 +4429,28 @@ const FIND_ELEMENTS_TOOL = {
 * Return up to 5 candidate matches for an intent. Used by
 * `browser_find` — the lead model gets a small ranked list rather than
 * a full element dump. Empty array when nothing matches.
-*/
-async function pickMatchingElements(snapshot, intent, signal) {
-	const trimmed = snapshot.elements.map((e) => ({
+*
+* Phase 2 short-circuits via the deterministic matcher cascade when
+* possible. When the cascade finds a single confident match, we
+* synthesize a one-item `FindMatch[]` and skip the fast-model
+* round-trip. When the cascade's `candidates` shortlist is passed in
+* by `pickElement` (escalation path), we trim the snapshot to just
+* those refs before sending to the fast model — keeps tokens down on
+* misses too.
+*/
+async function pickMatchingElements(snapshot, intent, signal, shortlist) {
+	if (!shortlist) {
+		const det = deterministicResolve(snapshot, parseIntent(intent));
+		if (det.source !== "escalate" && det.ref !== "") {
+			if (snapshot.elements.find((e) => e.ref === det.ref)) return [{
+				ref: det.ref,
+				reason: `deterministic ${det.source}: ${det.reason}`
+			}];
+		}
+		shortlist = det.candidates;
+	}
+	const refSet = shortlist && shortlist.length > 0 ? new Set(shortlist.map((s) => s.ref)) : void 0;
+	const trimmed = (refSet ? snapshot.elements.filter((e) => refSet.has(e.ref)) : snapshot.elements).map((e) => ({
 		ref: e.ref,
 		role: e.role,
 		name: e.name
@@ -3934,6 +4626,235 @@ async function pickElementVisual(screenshotB64, contentType, intent, visualSurfa
 	};
 }
+//#endregion
+//#region src/lib/browser-mcp/decompose.ts
+const LOGIN_RE = /^log[ -]?in (?:to .+? )?with\s+([^\s/]+)\s*\/\s*(.+?)\s*$/i;
+const SEARCH_CLICK_RE = /^search\s+(?:for\s+)?(.+?)\s+and\s+click\s+(?:the\s+)?first\s+result\s*$/i;
+const CONJUNCTION_SPLIT_RE = /\s*(?:\s+and\s+then\s+|\s+then\s+|\s*;\s*|\s*,\s+and\s+)\s*/i;
+/**
+* Decompose a natural-language intent into atomic steps.
+*
+* The fallback path returns a single-step `[{intent: rawIntent}]` —
+* `browser_act` behaves identically to today's single-step dispatch
+* when no template matches.
+*/
+function decompose(intent, value) {
+	const raw = String(intent ?? "").trim();
+	if (!raw) return {
+		steps: [{
+			intent: "",
+			...value !== void 0 ? { value } : {}
+		}],
+		template: "fallback"
+	};
+	const loginMatch = LOGIN_RE.exec(raw);
+	if (loginMatch) {
+		const user = loginMatch[1].trim();
+		const pass = loginMatch[2].trim();
+		return {
+			steps: [
+				{
+					intent: "the email or username input",
+					value: user
+				},
+				{
+					intent: "the password input",
+					value: pass
+				},
+				{ intent: "the Sign in or Log in button" }
+			],
+			template: "login",
+			successSummary: "logged in"
+		};
+	}
+	const searchMatch = SEARCH_CLICK_RE.exec(raw);
+	if (searchMatch) {
+		const query = searchMatch[1].trim();
+		return {
+			steps: [
+				{
+					intent: "the search input",
+					value: query
+				},
+				{ intent: "the search button or submit" },
+				{ intent: "the first search result" }
+			],
+			template: "search_click",
+			successSummary: `searched for "${query}" and opened first result`
+		};
+	}
+	if (CONJUNCTION_SPLIT_RE.test(raw)) {
+		const parts = raw.split(CONJUNCTION_SPLIT_RE).map((p) => p.trim()).filter(Boolean);
+		if (parts.length >= 2) return {
+			steps: parts.map((p, i) => {
+				if (i === 0 && value !== void 0) return {
+					intent: p,
+					value
+				};
+				return { intent: p };
+			}),
+			template: "conjunction"
+		};
+	}
+	return {
+		steps: [{
+			intent: raw,
+			...value !== void 0 ? { value } : {}
+		}],
+		template: "fallback"
+	};
+}
+//#endregion
+//#region src/lib/browser-mcp/observe.ts
+const OBSERVE_SYSTEM = `You describe a web page for an AI assistant that cannot see the DOM.
+Write 2-4 sentences focused on user-actionable elements (forms, buttons, links) and the page's purpose. If 'intent' is provided, focus the description on the region most relevant to that intent.
+DO NOT mention DOM refs, selectors, bbox coordinates, or any internal identifiers. Plain prose only. Treat the reader as someone who will issue commands like "click the Sign In button" — describe what's there in terms they can act on.
+Call the describe_page tool with your description.`;
+const OBSERVE_TOOL = {
+	name: "describe_page",
+	description: "Report the natural-language description of the page.",
+	parameters: {
+		type: "object",
+		required: ["description"],
+		additionalProperties: false,
+		properties: { description: {
+			type: "string",
+			description: "2-4 sentence prose description of the visible page state."
+		} }
+	}
+};
+/**
+* Produce a natural-language description of the current page state.
+* The lead model never sees the underlying snapshot.
+*/
+async function observePage(snapshot, intent, signal) {
+	const trimmedElements = snapshot.elements.filter((e) => e.name && e.name.length > 0).slice(0, 80).map((e) => ({
+		role: e.role,
+		name: e.name
+	}));
+	const raw = await callCompressorPublic(OBSERVE_SYSTEM, JSON.stringify({
+		intent: intent ?? "",
+		url: snapshot.url ?? "",
+		title: snapshot.title ?? "",
+		visible_text: (snapshot.text ?? "").slice(0, 4e3),
+		actionable_elements: trimmedElements,
+		has_visual_surfaces: Boolean(snapshot.visualSurfaces && snapshot.visualSurfaces.length > 0)
+	}), OBSERVE_TOOL, signal);
+	const out = {
+		description: raw && typeof raw === "object" && typeof raw.description === "string" ? raw.description : "Page contents could not be described.",
+		hasVisualSurfaces: Boolean(snapshot.visualSurfaces && snapshot.visualSurfaces.length > 0)
+	};
+	if (snapshot.url) out.url = snapshot.url;
+	if (snapshot.title) out.title = snapshot.title;
+	return out;
+}
+//#endregion
+//#region src/lib/browser-mcp/planner.ts
+const PLANNER_SYSTEM = `You are a browser-automation replanner. A user issued a high-level intent that was decomposed into atomic steps. Several steps ran successfully, then one failed. You see the page state AFTER the failure and decide what to do next.
+Your job: produce a revised list of atomic steps that will accomplish the original intent given the current page. If you cannot — the page has changed in a way that makes the intent impossible (login form vanished, navigation moved elsewhere, captcha appeared) — return an empty list and explain why in reasoning.
+Each replanned step is a free-form natural-language intent ("the email input", "the Sign In button at the bottom of the form") plus an optional value for fill/type/select actions. Be SPECIFIC about element location ("at the bottom of the form", "in the top navigation") so the deterministic matcher cascade can resolve it without ambiguity. Do NOT reference element refs.
+Cost rule: you get ONE call per compound failure. Make every step count.
+Call the replan_compound tool with your answer.`;
+const PLANNER_TOOL = {
+	name: "replan_compound",
+	description: "Report the revised atomic steps to complete the original compound intent.",
+	parameters: {
+		type: "object",
+		required: ["steps", "reasoning"],
+		additionalProperties: false,
+		properties: {
+			steps: {
+				type: "array",
+				maxItems: 8,
+				items: {
+					type: "object",
+					required: ["intent"],
+					additionalProperties: false,
+					properties: {
+						intent: { type: "string" },
+						value: { type: "string" }
+					}
+				}
+			},
+			reasoning: {
+				type: "string",
+				description: "1-2 sentence explanation of the replanning decision."
+			}
+		}
+	}
+};
+/**
+* Run the fast-model planner on a failed compound. Returns the
+* revised step list (may be empty if the planner gives up).
+*
+* The snapshot is trimmed before sending to keep the round-trip
+* small: only element role + name + brief value/placeholder if
+* present. Bbox / state flags / frame ids would just inflate tokens
+* without helping the natural-language replanner.
+*/
+async function planCompoundReplan(input, signal) {
+	const trimmed = input.snapshot.elements.slice(0, 80).map((e) => {
+		const out = { role: e.role };
+		if (e.name) out.name = e.name;
+		if (e.placeholder) out.placeholder = e.placeholder;
+		if (e.value) out.value = e.value;
+		return out;
+	});
+	const raw = await callCompressorPublic(PLANNER_SYSTEM, JSON.stringify({
+		original_intent: input.originalIntent,
+		original_value: input.originalValue,
+		completed_steps: input.completedSteps.map((s) => ({
+			intent: s.intent,
+			...s.value !== void 0 ? { value: s.value } : {}
+		})),
+		failed_step: {
+			intent: input.failedStep.intent,
+			...input.failedStep.value !== void 0 ? { value: input.failedStep.value } : {}
+		},
+		failure_reason: input.failureReason,
+		page_now: {
+			url: input.snapshot.url ?? "",
+			title: input.snapshot.title ?? "",
+			visible_text: (input.snapshot.text ?? "").slice(0, 3e3),
+			actionable_elements: trimmed
+		}
+	}), PLANNER_TOOL, signal);
+	if (!raw || typeof raw !== "object") return {
+		steps: [],
+		reasoning: "planner returned empty response"
+	};
+	const obj = raw;
+	const reasoning = typeof obj.reasoning === "string" ? obj.reasoning : "";
+	if (!Array.isArray(obj.steps)) return {
+		steps: [],
+		reasoning
+	};
+	const steps = [];
+	for (const s of obj.steps.slice(0, 8)) {
+		if (!s || typeof s !== "object") continue;
+		const intent = s.intent;
+		const value = s.value;
+		if (typeof intent === "string" && intent.length > 0) {
+			const step = { intent };
+			if (typeof value === "string") step.value = value;
+			steps.push(step);
+		}
+	}
+	return {
+		steps,
+		reasoning
+	};
+}
 //#endregion
 //#region src/lib/browser-mcp/index.ts
 /**
@@ -3990,7 +4911,7 @@ const BROWSER_TOOLS = Object.freeze([
 			additionalProperties: false,
 			properties: {}
 		},
-		capability: "browser",
+		capability: "browser_power",
 		async handler(args, signal) {
 			return dispatchBrowserTool("browser_list_tabs", args, signal);
 		}
@@ -4031,7 +4952,7 @@ const BROWSER_TOOLS = Object.freeze([
 				description: "Array of tab ids to close (from browser_list_tabs)."
 			} }
 		},
-		capability: "browser",
+		capability: "browser_power",
 		async handler(args, signal) {
 			return dispatchBrowserTool("browser_close_tab", args, signal);
 		}
@@ -4116,7 +5037,7 @@ const BROWSER_TOOLS = Object.freeze([
 				}
 			}
 		},
-		capability: "browser",
+		capability: "browser_power",
 		async handler(args, signal) {
 			return dispatchBrowserTool("browser_read_page", args, signal);
 		}
@@ -4175,7 +5096,7 @@ const BROWSER_TOOLS = Object.freeze([
 				}
 			}
 		},
-		capability: "browser",
+		capability: "browser_power",
 		async handler(args, signal) {
 			return dispatchBrowserTool("browser_scroll", args, signal);
 		}
@@ -4195,7 +5116,7 @@ const BROWSER_TOOLS = Object.freeze([
 				}
 			}
 		},
-		capability: "browser",
+		capability: "browser_power",
 		async handler(args, signal) {
 			return dispatchBrowserTool("browser_keyboard", args, signal);
 		}
@@ -4232,7 +5153,7 @@ const BROWSER_TOOLS = Object.freeze([
 				}
 			}
 		},
-		capability: "browser",
+		capability: "browser_power",
 		async handler(args, signal) {
 			return dispatchBrowserTool("browser_wait", args, signal);
 		}
@@ -4256,7 +5177,7 @@ const BROWSER_TOOLS = Object.freeze([
 				}
 			}
 		},
-		capability: "browser",
+		capability: "browser_power",
 		async handler(args, signal) {
 			return dispatchBrowserTool("browser_eval_js", args, signal);
 		}
@@ -4288,7 +5209,7 @@ const BROWSER_TOOLS = Object.freeze([
 				}
 			}
 		},
-		capability: "browser",
+		capability: "browser_power",
 		async handler(args, signal) {
 			return dispatchBrowserTool("browser_download", args, signal);
 		}
@@ -4352,7 +5273,7 @@ const BROWSER_TOOLS = Object.freeze([
 				}
 			}
 		},
-		capability: "browser",
+		capability: "browser_power",
 		async handler(args, signal) {
 			return dispatchBrowserTool("browser_mouse", args, signal);
 		}
@@ -4426,7 +5347,7 @@ const BROWSER_TOOLS = Object.freeze([
 				}
 			}
 		},
-		capability: "browser",
+		capability: "browser_power",
 		async handler(args, signal) {
 			return dispatchBrowserTool("browser_drag", args, signal);
 		}
@@ -4450,7 +5371,7 @@ const BROWSER_TOOLS = Object.freeze([
 				}
 			}
 		},
-		capability: "browser",
+		capability: "browser_power",
 		async handler(args, signal) {
 			return dispatchBrowserTool("browser_type", args, signal);
 		}
@@ -4491,7 +5412,7 @@ const BROWSER_TOOLS = Object.freeze([
 				}
 			}
 		},
-		capability: "browser",
+		capability: "browser_power",
 		async handler(args, signal) {
 			const kind = args.kind === "network" ? "network" : "console";
 			const tool = kind === "network" ? "browser_network_log" : "browser_console_logs";
@@ -4548,7 +5469,7 @@ const BROWSER_TOOLS = Object.freeze([
 				}
 			}
 		},
-		capability: "browser_compound",
+		capability: "browser_power",
 		async handler(args, signal) {
 			const tabId = typeof args.tabId === "number" ? args.tabId : void 0;
 			const intent = typeof args.intent === "string" ? args.intent : "";
@@ -4615,65 +5536,109 @@ const BROWSER_TOOLS = Object.freeze([
 			const value = typeof args.value === "string" ? args.value : void 0;
 			if (!refIn && !intent) return toolEnvelope({ error: "either `ref` (REF mode) or `intent` (INTENT mode) is required" }, true);
 			if (refIn) return dispatchActionByRef(tabId, refIn, typeof args.action === "string" ? args.action : "click", value, signal);
-			const snapshot = await fetchSnapshot(tabId, signal);
-			const picked = await pickElement(snapshot, intent, signal, value);
-			if (!picked.ref || picked.confidence < .5) {
-				const surfaces = snapshot.visualSurfaces;
-				if (surfaces && surfaces.length > 0) {
-					const shotEnv = await dispatchBrowserTool("browser_screenshot", {
-						tabId,
-						format: "png"
+			const decomposed = decompose(intent, value);
+			if (decomposed.steps.length === 1) return runAtomicIntentStep(tabId, decomposed.steps[0].intent, decomposed.steps[0].value, signal);
+			const summaries = [];
+			let navigated = false;
+			const completedSteps = [];
+			for (let i = 0; i < decomposed.steps.length; i++) {
+				const step = decomposed.steps[i];
+				const env = await runAtomicIntentStep(tabId, step.intent, step.value, signal);
+				const stepText = env.content?.[0]?.text;
+				let stepResult = {};
+				if (typeof stepText === "string") try {
+					stepResult = JSON.parse(stepText);
+				} catch {}
+				if (env.isError || stepResult.ok === false) try {
+					const failureReason = String(stepResult.error ?? "unknown");
+					const replan = await planCompoundReplan({
+						originalIntent: intent,
+						originalValue: value,
+						completedSteps,
+						failedStep: step,
+						failureReason,
+						snapshot: await fetchSnapshot(tabId, signal)
 					}, signal);
-					if (shotEnv.isError) return toolEnvelope({
+					if (replan.steps.length === 0) return toolEnvelope({
 						ok: false,
-						error: "no text match; screenshot for visual fallback failed",
-						picked
+						summary: `compound step ${i + 1}/${decomposed.steps.length} failed and planner declined: ${replan.reasoning || failureReason}`,
+						template: decomposed.template,
+						steps_completed: i,
+						failed_step: step.intent,
+						planner_reasoning: replan.reasoning
 					}, true);
-					const shotText = shotEnv.content?.[0]?.text;
-					let shot = {};
-					try {
-						shot = shotText ? JSON.parse(shotText) : {};
-					} catch {
-						return toolEnvelope({
+					const replanSummaries = [];
+					for (let j = 0; j < replan.steps.length; j++) {
+						const rstep = replan.steps[j];
+						const renv = await runAtomicIntentStep(tabId, rstep.intent, rstep.value, signal);
+						const rtext = renv.content?.[0]?.text;
+						let rresult = {};
+						if (typeof rtext === "string") try {
+							rresult = JSON.parse(rtext);
+						} catch {}
+						if (renv.isError || rresult.ok === false) return toolEnvelope({
 							ok: false,
-							error: "no text match; screenshot envelope unparseable"
+							summary: `compound failed at original step ${i + 1}, planner replan also failed at step ${j + 1}/${replan.steps.length}: ${String(rresult.error ?? "unknown")}`,
+							template: decomposed.template,
+							steps_completed: i,
+							failed_step: rstep.intent,
+							planner_reasoning: replan.reasoning
 						}, true);
+						if (typeof rresult.action_taken === "string") replanSummaries.push(`${rresult.action_taken} (${rstep.intent})`);
+						if (rresult.navigated === true) navigated = true;
 					}
-					if (!shot.contentType || !shot.dataBase64) return toolEnvelope({
-						ok: false,
-						error: "no text match; screenshot envelope missing fields"
-					}, true);
-					const visual = await pickElementVisual(shot.dataBase64, shot.contentType, intent, surfaces, signal);
-					if (visual.confidence < .5) return toolEnvelope({
-						ok: false,
-						error: "no element matched intent (text + visual)",
-						picked,
-						visual
-					}, true);
-					const clickEnv = await dispatchBrowserTool("browser_mouse", {
-						tabId,
-						action: "click",
-						x: visual.x,
-						y: visual.y,
-						force: true
-					}, signal);
-					if (clickEnv.isError) return clickEnv;
 					return toolEnvelope({
 						ok: true,
-						action_taken: "click_visual",
-						x: visual.x,
-						y: visual.y,
-						confidence: visual.confidence,
-						reason: visual.reason
+						summary: `compound recovered via planner (${replan.reasoning}): ${replanSummaries.join(" → ")}`,
+						template: decomposed.template,
+						steps_completed: i + replan.steps.length,
+						navigated,
+						planner_used: true,
+						planner_reasoning: replan.reasoning
 					});
+				} catch (replanErr) {
+					return toolEnvelope({
+						ok: false,
+						summary: `compound step ${i + 1}/${decomposed.steps.length} failed; planner errored: ${replanErr instanceof Error ? replanErr.message : String(replanErr)}`,
+						template: decomposed.template,
+						steps_completed: i,
+						failed_step: step.intent
+					}, true);
+				}
+				if (typeof stepResult.action_taken === "string") summaries.push(`${stepResult.action_taken} (${step.intent})`);
+				if (stepResult.navigated === true) navigated = true;
+				completedSteps.push(step);
+			}
+			return toolEnvelope({
+				ok: true,
+				summary: decomposed.successSummary ?? summaries.join(" → "),
+				template: decomposed.template,
+				steps_completed: decomposed.steps.length,
+				navigated
+			});
+		}
+	},
+	{
+		toolNameHttp: "browser_observe",
+		description: "Get a natural-language description of the current page's user-actionable state — what forms, buttons, links, and content sections are visible — in 2-4 sentences. Optional `intent` focuses the description on a region ('describe the login form', 'what's in the comments section'). Use this BEFORE browser_act when you don't know what's on the page, or AFTER navigation to confirm the page loaded. Cheaper than screenshots when text is enough. Does not include canvas/SVG content — those surface as a `hasVisualSurfaces` flag; switch to browser_screenshot for visuals.",
+		inputSchema: {
+			type: "object",
+			required: ["tabId"],
+			additionalProperties: false,
+			properties: {
+				tabId: { type: "number" },
+				intent: {
+					type: "string",
+					description: "Optional natural-language focus ('describe the form', 'what's in the sidebar')."
 				}
-				return toolEnvelope({
-					ok: false,
-					error: "no element matched intent",
-					picked
-				}, true);
 			}
-			return dispatchActionByRef(tabId, picked.ref, picked.action, picked.value ?? value, signal);
+		},
+		capability: "browser_compound",
+		async handler(args, signal) {
+			const tabId = typeof args.tabId === "number" ? args.tabId : void 0;
+			const intent = typeof args.intent === "string" ? args.intent : void 0;
+			if (!tabId) return toolEnvelope({ error: "tabId required" }, true);
+			return toolEnvelope(await observePage(await fetchSnapshot(tabId, signal), intent, signal));
 		}
 	},
 	{
@@ -4716,6 +5681,76 @@ const BROWSER_TOOLS = Object.freeze([
 	}
 ]);
 /**
+* Run a single atomic intent step: fetch snapshot, run matcher
+* cascade (via pickElement), visual fallback on no-match, dispatch
+* the resolved action. Returns the standard MCP envelope.
+*
+* Pulled out of `browser_act`'s handler so the compound-intent loop
+* (decompose path) can call it per-step without duplicating the
+* snapshot + visual-fallback logic.
+*/
+async function runAtomicIntentStep(tabId, intent, value, signal) {
+	const snapshot = await fetchSnapshot(tabId, signal);
+	const picked = await pickElement(snapshot, intent, signal, value);
+	if (!picked.ref || picked.confidence < .5) {
+		const surfaces = snapshot.visualSurfaces;
+		if (surfaces && surfaces.length > 0) {
+			const shotEnv = await dispatchBrowserTool("browser_screenshot", {
+				tabId,
+				format: "png"
+			}, signal);
+			if (shotEnv.isError) return toolEnvelope({
+				ok: false,
+				error: "no text match; screenshot for visual fallback failed",
+				picked
+			}, true);
+			const shotText = shotEnv.content?.[0]?.text;
+			let shot = {};
+			try {
+				shot = shotText ? JSON.parse(shotText) : {};
+			} catch {
+				return toolEnvelope({
+					ok: false,
+					error: "no text match; screenshot envelope unparseable"
+				}, true);
+			}
+			if (!shot.contentType || !shot.dataBase64) return toolEnvelope({
+				ok: false,
+				error: "no text match; screenshot envelope missing fields"
+			}, true);
+			const visual = await pickElementVisual(shot.dataBase64, shot.contentType, intent, surfaces, signal);
+			if (visual.confidence < .5) return toolEnvelope({
+				ok: false,
+				error: "no element matched intent (text + visual)",
+				picked,
+				visual
+			}, true);
+			const clickEnv = await dispatchBrowserTool("browser_mouse", {
+				tabId,
+				action: "click",
+				x: visual.x,
+				y: visual.y,
+				force: true
+			}, signal);
+			if (clickEnv.isError) return clickEnv;
+			return toolEnvelope({
+				ok: true,
+				action_taken: "click_visual",
+				x: visual.x,
+				y: visual.y,
+				confidence: visual.confidence,
+				reason: visual.reason
+			});
+		}
+		return toolEnvelope({
+			ok: false,
+			error: "no element matched intent",
+			picked
+		}, true);
+	}
+	return dispatchActionByRef(tabId, picked.ref, picked.action, picked.value ?? value, signal);
+}
+/**
 * Dispatch an action against a known ref via the appropriate primitive.
 * Shared between REF mode and INTENT-mode-text-match in `browser_act`.
 * Returns an MCP envelope (text content + optional isError).
@@ -7309,6 +8344,27 @@ function workerToolsEnabled() {
 function browserCompoundToolsEnabled() {
 	return compressorAvailable();
 }
+/**
+* Gate for the L0/L1 power browser tools (`browser_read_page`,
+* `browser_mouse`, `browser_drag`, `browser_type`, `browser_keyboard`,
+* `browser_scroll`, `browser_eval_js`, `browser_diagnostics`,
+* `browser_find`, `browser_close_tab`, `browser_list_tabs`,
+* `browser_wait`, `browser_download`).
+*
+* Returns true iff `state.powerBrowseEnabled` (set by `--power-browse`
+* or `GH_ROUTER_ENABLE_POWER_BROWSE=1`). When off, the default
+* `--browse` surface exposes only the 6 lead-model tools (`act`,
+* `observe`, `extract`, `navigate`, `screenshot`, `open_tab`) that
+* hide DOM details behind intent. Power mode adds the raw primitives
+* for users who want direct coord/keystroke control.
+*
+* `handler.ts` filter chain ANDs this with `browserToolsEnabled()`
+* (defense-in-depth — power without basic is meaningless and the
+* setup path already forces basic on when power is on).
+*/
+function browserPowerToolsEnabled() {
+	return state.powerBrowseEnabled === true;
+}
 //#endregion
 //#region src/routes/mcp/handler.ts
@@ -7486,6 +8542,7 @@ function toolEntries() {
 		if (t.capability === "stand_in") return standInToolEnabled();
 		if (t.capability === "browser") return browserToolsEnabled();
 		if (t.capability === "browser_compound") return browserToolsEnabled() && browserCompoundToolsEnabled();
+		if (t.capability === "browser_power") return browserToolsEnabled() && browserPowerToolsEnabled();
 		return true;
 	}).map((t) => ({
 		name: t.toolNameHttp,
@@ -7778,6 +8835,7 @@ async function handleToolsCall(body) {
 	if (nonPersonaTool && nonPersonaTool.capability === "stand_in" && !standInToolEnabled()) return rpcError(body.id, RPC_METHOD_NOT_FOUND, `tools/call: unknown tool "${name$1}"`);
 	if (nonPersonaTool && nonPersonaTool.capability === "browser" && !browserToolsEnabled()) return rpcError(body.id, RPC_METHOD_NOT_FOUND, `tools/call: unknown tool "${name$1}"`);
 	if (nonPersonaTool && nonPersonaTool.capability === "browser_compound" && !(browserToolsEnabled() && browserCompoundToolsEnabled())) return rpcError(body.id, RPC_METHOD_NOT_FOUND, `tools/call: unknown tool "${name$1}"`);
+	if (nonPersonaTool && nonPersonaTool.capability === "browser_power" && !(browserToolsEnabled() && browserPowerToolsEnabled())) return rpcError(body.id, RPC_METHOD_NOT_FOUND, `tools/call: unknown tool "${name$1}"`);
 	let personaPrompt;
 	let personaContext;
 	let personaEffort;
@@ -11331,7 +12389,10 @@ function buildPeerAwarenessSnippet(opts) {
 	if (opts.workerToolsAvailable) para2Parts.push("`worker_explore` runs a Gemini-backed read-only worker that returns a summary, using its own context rather than yours; concurrent launches share the `MAX_INFLIGHT_TOOLS_CALL=8` cap with operator traffic.", "`worker_implement` is the same worker with edit/write/bash; `worktree: true` runs it in an isolated git worktree and returns the diff.", "Workers themselves have `code_search` in their toolset.");
 	para2Parts.push("`web_search` surfaces citable sources for docs, errors, and upstream issues.");
 	if (opts.standInAvailable) para2Parts.push("`stand_in` provides three-lab consensus for decision tiebreak when the user is unavailable.");
-	if (opts.browseAvailable) para2Parts.push("`browser_*` tools (under `mcp__gh-router-peers__browser_*`) drive a real Chrome / Edge browser via a local extension; prefer the L2 compound tools `browser_act(intent | ref, value?)` / `browser_find(intent)` / `browser_extract(schema, instruction)` over the L0/L1 primitives.");
+	if (opts.browseAvailable) {
+		const powerNote = opts.powerBrowseAvailable ? " Power mode is on: the L0/L1 primitives (`browser_mouse`, `browser_drag`, `browser_type`, `browser_keyboard`, `browser_scroll`, `browser_eval_js`, `browser_read_page`, `browser_diagnostics`, `browser_find`) are also available for direct DOM / coordinate control." : "";
+		para2Parts.push(`\`browser_*\` tools (under \`mcp__gh-router-peers__browser_*\`) drive a real Chrome / Edge browser via a local extension. Lead surface: \`browser_act(intent, value?)\` for any click / fill / type / scroll-to (an inner fast model resolves intent), \`browser_observe(intent?)\` for a 2-4 sentence natural-language page description, \`browser_extract(schema, instruction)\` for typed extraction, \`browser_navigate\` / \`browser_open_tab\` / \`browser_screenshot\` for state and visuals. The lead model never sees raw DOM: refs, bboxes, and role/name dumps stay internal.${powerNote}`);
+	}
 	return [
 		"## Peer review and advisor",
 		"",
@@ -12794,7 +13855,7 @@ function initProxyFromEnv() {
 //#endregion
 //#region package.json
 var name = "github-router";
-var version$1 = "0.3.52";
+var version$1 = "0.3.66";
 //#endregion
 //#region src/lib/approval.ts
@@ -14516,6 +15577,11 @@ async function setupAndServe(options) {
 	state.showToken = options.showToken;
 	state.extendedBetas = options.extendedBetas;
 	state.browseEnabled = options.browseEnabled || process.env.GH_ROUTER_ENABLE_BROWSE === "1";
+	state.powerBrowseEnabled = options.powerBrowseEnabled || process.env.GH_ROUTER_ENABLE_POWER_BROWSE === "1";
+	if (state.powerBrowseEnabled) state.browseEnabled = true;
+	if (process.env.GH_ROUTER_BROWSER_NO_HUMANLIKE === "1") state.humanlikeForce = "off";
+	else if (options.humanlikeEnabled || process.env.GH_ROUTER_HUMANLIKE === "1") state.humanlikeForce = "on";
+	else state.humanlikeForce = "auto";
 	if (process.env.COPILOT_API_URL) state.copilotApiUrl = process.env.COPILOT_API_URL;
 	await ensurePaths();
 	await cacheVSCodeVersion();
@@ -14623,6 +15689,16 @@ const sharedServerArgs = {
 		type: "boolean",
 		default: false,
 		description: "Enable the browser-control MCP tools (browser_open_tab, browser_screenshot, browser_click, etc.) on /mcp. Requires Chrome or Edge installed; the bundled extension must be loaded on first tool call (the proxy returns install_required with Web Store URLs + a Load Unpacked fallback path). Off by default; can also be enabled with GH_ROUTER_ENABLE_BROWSE=1."
+	},
+	"power-browse": {
+		type: "boolean",
+		default: false,
+		description: "Expose the full ~18-tool browser MCP surface (raw read_page, mouse / drag / scroll / keyboard / type primitives, eval_js, diagnostics, find, locate). Default --browse exposes only the 6 lead-model tools (act, observe, extract, navigate, screenshot, open_tab) that hide DOM details behind intent. Implies --browse. Off by default; can also be enabled with GH_ROUTER_ENABLE_POWER_BROWSE=1."
+	},
+	humanlike: {
+		type: "boolean",
+		default: false,
+		description: "Force humanlike pacing on ALL browser tool dispatches: Beta-distributed inter-action delays (800-4600 ms), Bezier mouse trajectories with overshoot-and-correct, per-keystroke jitter with word-end pauses, scroll chunking. Use for known anti-bot sites (Cloudflare, Datadome). Off by default (auto mode); GH_ROUTER_HUMANLIKE=1 is the env equivalent. GH_ROUTER_BROWSER_NO_HUMANLIKE=1 hard-disables (wins over --humanlike, for tests)."
 	}
 };
 const allowedAccountTypes = new Set([
@@ -14660,7 +15736,9 @@ function parseSharedArgs(args) {
 		showToken: args["show-token"],
 		proxyEnv: args["proxy-env"],
 		extendedBetas: args["extended-betas"],
-		browseEnabled: args.browse
+		browseEnabled: args.browse,
+		powerBrowseEnabled: args["power-browse"],
+		humanlikeEnabled: args.humanlike
 	};
 }
 /**
@@ -14900,7 +15978,8 @@ const claude = defineCommand({
 				geminiAvailable: geminiAvailable$1,
 				workerToolsAvailable: workerToolsEnabled(),
 				standInAvailable: standInToolEnabled(),
-				browseAvailable: state.browseEnabled
+				browseAvailable: state.browseEnabled,
+				powerBrowseAvailable: state.powerBrowseEnabled
 			});
 			extraArgs.push("--append-system-prompt", peerSnippet);
 			try {