npm - pi-chrome - Versions diffs - 0.6.1 → 0.8.0 - Mend

pi-chrome 0.6.1 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/extensions/chrome-profile-bridge/browser-extension/manifest.json +2 -2
package/extensions/chrome-profile-bridge/browser-extension/service_worker.js +685 -121
package/extensions/chrome-profile-bridge/index.ts +329 -30
package/package.json +29 -29

package/extensions/chrome-profile-bridge/index.ts CHANGED Viewed

@@ -46,7 +46,7 @@ type BridgeResult = {
 	error?: string;
 };
-const PI_CHROME_VERSION = "0.6.1";
+const PI_CHROME_VERSION = "0.8.0";
 const DEFAULT_HOST = process.env.PI_CHROME_BRIDGE_HOST ?? "127.0.0.1";
 const DEFAULT_PORT = Number(process.env.PI_CHROME_BRIDGE_PORT ?? "17318");
 const DEFAULT_TIMEOUT_MS = 30_000;
@@ -83,7 +83,30 @@ function workspaceCwd(ctx: ExtensionContext): string {
 function browserExtensionPath(): string {
 	return join(extensionRoot(), "browser-extension");
-}
+}
+function hostnameOf(url: string | undefined): string {
+	if (!url) return "";
+	try { return new URL(url).hostname; } catch { return ""; }
+}
+// Description of a click/type/fill result's significant fields so the agent doesn't have to
+// guess whether the action actually changed the page.
+function summarizeActionResult(result: unknown): string | undefined {
+	if (!result || typeof result !== "object") return undefined;
+	const r = result as Record<string, unknown>;
+	const parts: string[] = [];
+	if (r.pageMutated === false) parts.push("pageMutated=false");
+	if (r.defaultPrevented === true) parts.push("defaultPrevented=true");
+	if (r.elementVisible === false) parts.push("element NOT visible");
+	if (r.occludedBy) {
+		const o = r.occludedBy as { tag?: string; id?: string };
+		parts.push(`occluded by <${o.tag ?? "?"}${o.id ? "#" + o.id : ""}>`);
+	}
+	if (r.valueMatches === false) parts.push("input value did not stick");
+	if (r.autoplayHint) parts.push("autoplay-gated affordance — synthetic click may not start media");
+	return parts.length ? parts.join("; ") : undefined;
+}
 function readRequestBody(request: IncomingMessage): Promise<string> {
 	return new Promise((resolveBody, rejectBody) => {
@@ -374,29 +397,44 @@ export default function (pi: ExtensionAPI): void {
 	pi.on("before_agent_start", (event) => {
 		const primer = `
 <chrome-profile-bridge>
-Chrome control is available through the chrome_* tools via a companion Chrome extension installed in the user's normal Chrome profile.
-This is not CDP: it can use the user's existing Chrome windows and authenticated sessions after the user loads the companion browser extension.
-If chrome_* tools time out, ask the user to run /chrome-onboard, then load the bundled browser-extension folder in chrome://extensions. Prefer chrome_snapshot before clicking/typing. Avoid destructive actions unless explicitly requested. By default chrome_* tools focus Chrome and activate the target tab so the user can watch the agent work. The user can switch to silent/background mode for the whole session via /chrome-background; you can also pass background=true on a single tool call when the user explicitly wants the action to be silent (for example, scraping while they keep working in another app).
+Chrome control is available through the chrome_* tools via a companion Chrome extension installed in the user's normal Chrome profile. Tools target the existing signed-in profile, no CDP, no throwaway profile.
+Capability model (important):
+- All input is **synthetic DOM events** (\`isTrusted=false\`). Synthetic events drive React/Vue/Angular state fine, but they do NOT satisfy Chrome's user-activation gates: audio/video autoplay, clipboard write, file pickers, fullscreen, and Web Push prompts will NOT open from a chrome_click.
+- \`chrome_evaluate\` runs in MAIN world via the Function constructor. It works on pages with strict CSP (\`script-src 'self'\` without \`'unsafe-eval'\`), and surfaces thrown exceptions.
+- Tool results include \`pageMutated\`, \`defaultPrevented\`, \`elementVisible\`, \`occludedBy\`, and (for type/fill) \`valueMatches\`. If \`pageMutated\` is false after a click that should have changed something, the click likely didn't take effect — do NOT just retry; check the action result and snapshot for the cause.
+Usage rules:
+1. \`chrome_snapshot\` before clicking/typing; pass \`uid\` over \`selector\`.
+2. \`includeSnapshot=true\` on click/type/fill to verify in one round trip.
+3. If \`chrome_evaluate\` returns null when you expected a value, the expression evaluated to null/undefined in the page; surface the value via \`JSON.stringify\` to confirm.
+4. \`chrome_navigate\` supports an optional \`initScript\` that runs at document_start in MAIN world for the next navigation (good for seeding localStorage or stubbing Date.now).
+5. By default chrome_* tools focus Chrome so the user can watch; pass \`background=true\` or run /chrome-background to silence the whole session.
+6. If you hit an autoplay/clipboard/file-picker gate, tell the user; this bridge cannot satisfy it.
+7. Run /chrome-doctor when in doubt about connectivity or capabilities.
 </chrome-profile-bridge>`;
 		return { systemPrompt: event.systemPrompt + primer };
 	});
 	pi.registerCommand("chrome-doctor", {
 		description:
-			"Check Chrome bridge connectivity and diagnose setup. Reports the local bridge, companion Chrome extension status (ID + version), and a one-line fix for common failures (extension not loaded, stale service worker, version drift).",
+			"Check Chrome bridge connectivity and capability tier. Probes the local bridge, the companion Chrome extension, MAIN-world evaluation, and CDP availability, and prints one-line fixes for common failures.",
 		handler: async (_args, ctx) => {
 			ctx.ui.notify("Performing Chrome bridge health check", "info");
 			const lines: string[] = [`pi-chrome v${PI_CHROME_VERSION}`];
 			const status = bridge.status();
 			lines.push(`• Local bridge: mode=${status.mode}, url=${status.url}`);
+			let extensionAlive = false;
 			try {
+				const started = Date.now();
 				const version = (await bridge.send("tab.version", {}, 35_000)) as {
 					extensionId?: string;
 					extensionVersion?: string;
+					bridgeUrl?: string;
 				};
-				if (version.extensionId)
-					lines.push(`✓ Companion Chrome extension responding (ID: ${version.extensionId}, ext v${version.extensionVersion ?? "unknown"})`);
-				else lines.push("✓ Companion Chrome extension responding (no extension ID reported)");
+				const latencyMs = Date.now() - started;
+				extensionAlive = true;
+				lines.push(`✓ Companion Chrome extension responding (ID: ${version.extensionId ?? "?"}, ext v${version.extensionVersion ?? "?"}, latency ${latencyMs}ms)`);
 				if (version.extensionVersion && version.extensionVersion !== PI_CHROME_VERSION) {
 					lines.push(
 						`⚠ Extension version (${version.extensionVersion}) differs from pi-chrome (${PI_CHROME_VERSION}). Reload "Pi Existing Chrome Profile Bridge" in chrome://extensions to pick up the latest service worker.`,
@@ -411,6 +449,43 @@ If chrome_* tools time out, ask the user to run /chrome-onboard, then load the b
 					lines.push("  Fix: run /chrome-onboard, then load the bundled browser-extension folder in chrome://extensions and keep that Chrome window open.");
 				}
 			}
+			if (extensionAlive) {
+				// MAIN-world evaluate probe.
+				try {
+					const value = await bridge.send("page.evaluate", { expression: "1+1", awaitPromise: true, foreground: false }, 10_000);
+					if (value === 2) lines.push(`✓ chrome_evaluate("1+1") = 2`);
+					else lines.push(`⚠ chrome_evaluate("1+1") returned ${JSON.stringify(value)} (expected 2). The current tab may have a restrictive CSP or be a chrome:// URL.`);
+				} catch (error) {
+					lines.push(`✗ chrome_evaluate failed: ${(error as Error).message}`);
+				}
+				// Capability probe via MAIN-world helper.
+				try {
+					const probe = (await bridge.send("page.probe", { foreground: false }, 10_000)) as Record<string, unknown>;
+					if (probe && probe.arithmetic === 2) lines.push(`✓ MAIN-world helper injection works (location=${hostnameOf(String(probe.location))})`);
+					if (probe && probe.webdriver) lines.push(`⚠ navigator.webdriver=true on current tab — site fingerprinting may flag automation.`);
+				} catch (error) {
+					lines.push(`⚠ page.probe failed: ${(error as Error).message}`);
+				}
+			}
+			// CDP availability hint.
+			try {
+				const controller = new AbortController();
+				const timer = setTimeout(() => controller.abort(), 250);
+				const response = await fetch("http://127.0.0.1:9222/json/version", { signal: controller.signal }).catch(() => undefined);
+				clearTimeout(timer);
+				if (response && response.ok) {
+					const info = (await response.json().catch(() => ({}))) as { Browser?: string };
+					lines.push(`✓ CDP endpoint reachable at 127.0.0.1:9222 (${info.Browser ?? "unknown"}). Trusted input via CDP is not yet wired into pi-chrome — reserved for a future release.`);
+				} else {
+					lines.push(`• CDP not available (no listener on 127.0.0.1:9222). Synthetic input only; autoplay/clipboard/file-picker gates cannot be satisfied. Future pi-chrome versions will use CDP for trusted input when this port is enabled.`);
+				}
+			} catch {
+				lines.push(`• CDP probe inconclusive.`);
+			}
 			ctx.ui.notify(lines.join("\n"), "info");
 		},
 	});
@@ -529,13 +604,16 @@ If chrome_* tools time out, ask the user to run /chrome-onboard, then load the b
 		name: "chrome_snapshot",
 		label: "Chrome Snapshot",
 		description:
-			"Inspect a page in the user's existing Chrome profile: title, URL, visible body text, viewport, and clickable/focusable elements with CSS selectors. Brings Chrome to the foreground by default so the user can watch; pass background=true to inspect silently.",
+			"Inspect a page in the user's existing Chrome profile: title, URL, visible body text, viewport, and clickable/focusable elements with stable uids plus CSS selectors. Brings Chrome to the foreground by default so the user can watch; pass background=true to inspect silently.",
 		promptSnippet: "Inspect the current Chrome page and get CSS selectors for browser automation.",
 		parameters: Type.Object({
 			targetId: Type.Optional(Type.String()),
 			urlIncludes: Type.Optional(Type.String()),
 			titleIncludes: Type.Optional(Type.String()),
 			maxElements: Type.Optional(Type.Number({ default: MAX_ELEMENTS })),
+			containingText: Type.Optional(Type.String({ description: "Only return elements whose label/text contains this string (case-insensitive). Useful when the page has many controls." })),
+			roleFilter: Type.Optional(Type.String({ description: "Only return elements matching this ARIA role or tag name (case-insensitive). e.g. 'button', 'link', 'textbox'." })),
+			nearUid: Type.Optional(Type.String({ description: "Sort elements by proximity to this snapshot uid. Useful for finding controls near a known anchor." })),
 			background: Type.Optional(
 				Type.Boolean({ description: "If true, run silently in the background without focusing Chrome. Default false (Chrome focuses + tab activates so the user can watch)." }),
 			),
@@ -565,6 +643,7 @@ If chrome_* tools time out, ask the user to run /chrome-onboard, then load the b
 			titleIncludes: Type.Optional(Type.String()),
 			waitUntilLoad: Type.Optional(Type.Boolean({ default: true })),
 			timeoutMs: Type.Optional(Type.Number({ default: 15_000 })),
+			initScript: Type.Optional(Type.String({ description: "Optional JavaScript source to run in MAIN world at document_start of the next navigation. Useful for seeding localStorage, stubbing Date.now(), or defining navigator.webdriver=undefined. Requires the companion extension's webNavigation permission." })),
 			background: Type.Optional(
 				Type.Boolean({ description: "If true, navigate silently without focusing Chrome. Default false." }),
 			),
@@ -572,8 +651,8 @@ If chrome_* tools time out, ask the user to run /chrome-onboard, then load the b
 			port: Type.Optional(Type.Number()),
 		}),
 		async execute(_id, params): Promise<ToolTextResult> {
-			const result = await bridge.send("page.navigate", withBackground(params), params.timeoutMs ?? 15_000);
-			return { content: [{ type: "text", text: `Navigated to ${params.url}` }], details: { result: result as Json } };
+			const result = await bridge.send("page.navigate", withBackground(params), (params.timeoutMs ?? 15_000) + 2_000);
+			return { content: [{ type: "text", text: `Navigated to ${params.url}${params.initScript ? " (with initScript)" : ""}` }], details: { result: result as Json } };
 		},
 	});
@@ -586,7 +665,6 @@ If chrome_* tools time out, ask the user to run /chrome-onboard, then load the b
 		parameters: Type.Object({
 			expression: Type.String(),
 			awaitPromise: Type.Optional(Type.Boolean({ default: true })),
-			returnByValue: Type.Optional(Type.Boolean({ default: true })),
 			targetId: Type.Optional(Type.String()),
 			urlIncludes: Type.Optional(Type.String()),
 			titleIncludes: Type.Optional(Type.String()),
@@ -598,7 +676,12 @@ If chrome_* tools time out, ask the user to run /chrome-onboard, then load the b
 		}),
 		async execute(_id, params): Promise<ToolTextResult> {
 			const value = await bridge.send("page.evaluate", withBackground(params), DEFAULT_TIMEOUT_MS);
-			return { content: [{ type: "text", text: truncateText(typeof value === "string" ? value : safeJson(value)) }], details: { value: value as Json } };
+			const text = value === undefined
+				? "undefined"
+				: typeof value === "string"
+					? value
+					: safeJson(value) ?? "undefined";
+			return { content: [{ type: "text", text: truncateText(text) }], details: { value: value as Json } };
 		},
 	});
@@ -606,12 +689,15 @@ If chrome_* tools time out, ask the user to run /chrome-onboard, then load the b
 		name: "chrome_click",
 		label: "Chrome Click",
 		description:
-			"Click a CSS selector or viewport coordinate in an existing Chrome tab through the companion extension. The click is dispatched as a synthetic DOM event; by default Chrome is focused so the user can watch, pass background=true to click silently.",
-		promptSnippet: "Click page elements in Chrome by selector or viewport coordinate.",
+			"Click a snapshot uid, CSS selector, or viewport coordinate in an existing Chrome tab through the companion extension. The click is dispatched as a synthetic DOM event; by default Chrome is focused so the user can watch, pass background=true to click silently. Pass includeSnapshot=true to return a fresh snapshot after the click.",
+		promptSnippet: "Click page elements in Chrome by snapshot uid, selector, or viewport coordinate.",
 		parameters: Type.Object({
-			selector: Type.Optional(Type.String({ description: "CSS selector to click. Prefer selectors from chrome_snapshot." })),
-			x: Type.Optional(Type.Number({ description: "Viewport x coordinate if selector is omitted." })),
-			y: Type.Optional(Type.Number({ description: "Viewport y coordinate if selector is omitted." })),
+			uid: Type.Optional(Type.String({ description: "Stable element uid from chrome_snapshot. Prefer uid over selector after taking a snapshot." })),
+			selector: Type.Optional(Type.String({ description: "CSS selector to click. Prefer uid from chrome_snapshot when available." })),
+			x: Type.Optional(Type.Number({ description: "Viewport x coordinate if uid/selector is omitted." })),
+			y: Type.Optional(Type.Number({ description: "Viewport y coordinate if uid/selector is omitted." })),
+			includeSnapshot: Type.Optional(Type.Boolean({ description: "If true, include a fresh chrome_snapshot result after the click." })),
+			maxElements: Type.Optional(Type.Number({ default: MAX_ELEMENTS, description: "Max elements in the included snapshot." })),
 			targetId: Type.Optional(Type.String()),
 			urlIncludes: Type.Optional(Type.String()),
 			titleIncludes: Type.Optional(Type.String()),
@@ -622,8 +708,12 @@ If chrome_* tools time out, ask the user to run /chrome-onboard, then load the b
 			port: Type.Optional(Type.Number()),
 		}),
 		async execute(_id, params): Promise<ToolTextResult> {
-			const result = await bridge.send("page.click", withBackground(params), DEFAULT_TIMEOUT_MS);
-			return { content: [{ type: "text", text: `Clicked ${params.selector ?? `${params.x},${params.y}`}` }], details: { result: result as Json } };
+			const raw = await bridge.send("page.click", withBackground(params), DEFAULT_TIMEOUT_MS);
+			const result = (params.includeSnapshot ? (raw as { result: unknown }).result : raw) as Json;
+			const summary = summarizeActionResult(result);
+			const target = params.uid ?? params.selector ?? `${params.x},${params.y}`;
+			const text = summary ? `Clicked ${target} — ${summary}` : `Clicked ${target}`;
+			return { content: [{ type: "text", text }], details: { result: raw as Json } };
 		},
 	});
@@ -631,11 +721,14 @@ If chrome_* tools time out, ask the user to run /chrome-onboard, then load the b
 		name: "chrome_type",
 		label: "Chrome Type",
 		description:
-			"Focus an optional CSS selector, then type text into an existing Chrome tab through the companion extension. By default focuses Chrome and activates the tab so the user can watch; pass background=true to type silently.",
-		promptSnippet: "Type text into Chrome, optionally focusing a selector first.",
+			"Focus an optional snapshot uid or CSS selector, then type text into an existing Chrome tab through the companion extension. By default focuses Chrome and activates the tab so the user can watch; pass background=true to type silently. Pass includeSnapshot=true to return a fresh snapshot after typing.",
+		promptSnippet: "Type text into Chrome, optionally focusing a snapshot uid or selector first.",
 		parameters: Type.Object({
 			text: Type.String(),
+			uid: Type.Optional(Type.String({ description: "Stable element uid from chrome_snapshot." })),
 			selector: Type.Optional(Type.String({ description: "CSS selector to focus before typing." })),
+			includeSnapshot: Type.Optional(Type.Boolean({ description: "If true, include a fresh chrome_snapshot result after typing." })),
+			maxElements: Type.Optional(Type.Number({ default: MAX_ELEMENTS, description: "Max elements in the included snapshot." })),
 			pressEnter: Type.Optional(Type.Boolean()),
 			targetId: Type.Optional(Type.String()),
 			urlIncludes: Type.Optional(Type.String()),
@@ -647,8 +740,44 @@ If chrome_* tools time out, ask the user to run /chrome-onboard, then load the b
 			port: Type.Optional(Type.Number()),
 		}),
 		async execute(_id, params): Promise<ToolTextResult> {
-			const result = await bridge.send("page.type", withBackground(params), DEFAULT_TIMEOUT_MS);
-			return { content: [{ type: "text", text: `Typed ${params.text.length} character(s)${params.selector ? ` into ${params.selector}` : ""}.` }], details: { result: result as Json } };
+			const raw = await bridge.send("page.type", withBackground(params), DEFAULT_TIMEOUT_MS);
+			const result = (params.includeSnapshot ? (raw as { result: unknown }).result : raw) as Json;
+			const summary = summarizeActionResult(result);
+			const into = params.uid || params.selector ? ` into ${params.uid ?? params.selector}` : "";
+			const base = `Typed ${params.text.length} character(s)${into}.`;
+			return { content: [{ type: "text", text: summary ? `${base} (${summary})` : base }], details: { result: raw as Json } };
+		},
+	});
+	pi.registerTool({
+		name: "chrome_fill",
+		label: "Chrome Fill",
+		description:
+			"Set the full value of a text input, textarea, or contenteditable element using framework-aware native value setters and input/change events. Accepts a snapshot uid or CSS selector. Pass includeSnapshot=true to verify after filling.",
+		promptSnippet: "Fill a Chrome form field by snapshot uid or selector, optionally returning a fresh snapshot.",
+		parameters: Type.Object({
+			text: Type.String(),
+			uid: Type.Optional(Type.String({ description: "Stable element uid from chrome_snapshot." })),
+			selector: Type.Optional(Type.String({ description: "CSS selector to fill if uid is omitted." })),
+			submit: Type.Optional(Type.Boolean({ description: "If true, press Enter after filling." })),
+			includeSnapshot: Type.Optional(Type.Boolean({ description: "If true, include a fresh chrome_snapshot result after filling." })),
+			maxElements: Type.Optional(Type.Number({ default: MAX_ELEMENTS, description: "Max elements in the included snapshot." })),
+			targetId: Type.Optional(Type.String()),
+			urlIncludes: Type.Optional(Type.String()),
+			titleIncludes: Type.Optional(Type.String()),
+			background: Type.Optional(
+				Type.Boolean({ description: "If true, fill silently without focusing Chrome. Default false." }),
+			),
+			host: Type.Optional(Type.String()),
+			port: Type.Optional(Type.Number()),
+		}),
+		async execute(_id, params): Promise<ToolTextResult> {
+			const raw = await bridge.send("page.fill", withBackground(params), DEFAULT_TIMEOUT_MS);
+			const result = (params.includeSnapshot ? (raw as { result: unknown }).result : raw) as Json;
+			const summary = summarizeActionResult(result);
+			const into = params.uid || params.selector ? ` into ${params.uid ?? params.selector}` : "";
+			const base = `Filled ${params.text.length} character(s)${into}.`;
+			return { content: [{ type: "text", text: summary ? `${base} (${summary})` : base }], details: { result: raw as Json } };
 		},
 	});
@@ -656,10 +785,12 @@ If chrome_* tools time out, ask the user to run /chrome-onboard, then load the b
 		name: "chrome_key",
 		label: "Chrome Key",
 		description:
-			"Send a keyboard key to an existing Chrome tab (Enter, Escape, Tab, Backspace, Delete, ArrowUp/Down/Left/Right, or one character). By default focuses Chrome and activates the tab so the user can watch; pass background=true to send the key silently.",
+			"Send a keyboard key to an existing Chrome tab (Enter, Escape, Tab, Backspace, Delete, ArrowUp/Down/Left/Right, or one character). By default focuses Chrome and activates the tab so the user can watch; pass background=true to send the key silently. Pass includeSnapshot=true to verify after the keypress.",
 		promptSnippet: "Press keys in Chrome through the companion extension.",
 		parameters: Type.Object({
 			key: Type.String(),
+			includeSnapshot: Type.Optional(Type.Boolean({ description: "If true, include a fresh chrome_snapshot result after the keypress." })),
+			maxElements: Type.Optional(Type.Number({ default: MAX_ELEMENTS, description: "Max elements in the included snapshot." })),
 			targetId: Type.Optional(Type.String()),
 			urlIncludes: Type.Optional(Type.String()),
 			titleIncludes: Type.Optional(Type.String()),
@@ -670,8 +801,11 @@ If chrome_* tools time out, ask the user to run /chrome-onboard, then load the b
 			port: Type.Optional(Type.Number()),
 		}),
 		async execute(_id, params): Promise<ToolTextResult> {
-			const result = await bridge.send("page.key", withBackground(params), DEFAULT_TIMEOUT_MS);
-			return { content: [{ type: "text", text: `Pressed ${params.key}.` }], details: { result: result as Json } };
+			const raw = await bridge.send("page.key", withBackground(params), DEFAULT_TIMEOUT_MS);
+			const result = (params.includeSnapshot ? (raw as { result: unknown }).result : raw) as Json;
+			const summary = summarizeActionResult(result);
+			const base = `Pressed ${params.key}.`;
+			return { content: [{ type: "text", text: summary ? `${base} (${summary})` : base }], details: { result: raw as Json } };
 		},
 	});
@@ -697,6 +831,69 @@ If chrome_* tools time out, ask the user to run /chrome-onboard, then load the b
 		},
 	});
+	pi.registerTool({
+		name: "chrome_list_console_messages",
+		label: "Chrome Console Messages",
+		description:
+			"List console messages captured in the page by the companion extension. Capture starts after any chrome_snapshot, chrome_evaluate, chrome_list_console_messages, or chrome_list_network_requests call installs page instrumentation.",
+		promptSnippet: "List captured console messages from the active Chrome page.",
+		parameters: Type.Object({
+			clear: Type.Optional(Type.Boolean({ description: "Clear the captured console log after reading." })),
+			targetId: Type.Optional(Type.String()),
+			urlIncludes: Type.Optional(Type.String()),
+			titleIncludes: Type.Optional(Type.String()),
+			background: Type.Optional(Type.Boolean({ description: "If true, run silently without focusing Chrome. Default false." })),
+			host: Type.Optional(Type.String()),
+			port: Type.Optional(Type.Number()),
+		}),
+		async execute(_id, params): Promise<ToolTextResult> {
+			const result = await bridge.send("page.console.list", withBackground(params), DEFAULT_TIMEOUT_MS);
+			return { content: [{ type: "text", text: truncateText(safeJson(result)) }], details: { result: result as Json } };
+		},
+	});
+	pi.registerTool({
+		name: "chrome_list_network_requests",
+		label: "Chrome Network Requests",
+		description:
+			"List fetch/XMLHttpRequest activity captured in the page by the companion extension. Capture starts after instrumentation is installed by snapshot/evaluate/network/console tools; browser document/static asset requests are not captured. Use includePreservedRequests=true to keep requests from earlier same-tab navigations that were captured before navigation.",
+		promptSnippet: "List captured XHR/fetch requests from the active Chrome page before doing DOM-heavy debugging.",
+		parameters: Type.Object({
+			includePreservedRequests: Type.Optional(Type.Boolean({ description: "Include captured requests from earlier locations in the same tab/session." })),
+			clear: Type.Optional(Type.Boolean({ description: "Clear the captured request log after reading." })),
+			targetId: Type.Optional(Type.String()),
+			urlIncludes: Type.Optional(Type.String()),
+			titleIncludes: Type.Optional(Type.String()),
+			background: Type.Optional(Type.Boolean({ description: "If true, run silently without focusing Chrome. Default false." })),
+			host: Type.Optional(Type.String()),
+			port: Type.Optional(Type.Number()),
+		}),
+		async execute(_id, params): Promise<ToolTextResult> {
+			const result = await bridge.send("page.network.list", withBackground(params), DEFAULT_TIMEOUT_MS);
+			return { content: [{ type: "text", text: truncateText(safeJson(result)) }], details: { result: result as Json } };
+		},
+	});
+	pi.registerTool({
+		name: "chrome_get_network_request",
+		label: "Chrome Network Request",
+		description: "Retrieve one captured fetch/XMLHttpRequest entry, including response body when available, by requestId from chrome_list_network_requests.",
+		promptSnippet: "Fetch captured request details and response body by requestId.",
+		parameters: Type.Object({
+			requestId: Type.String({ description: "Request id returned by chrome_list_network_requests." }),
+			targetId: Type.Optional(Type.String()),
+			urlIncludes: Type.Optional(Type.String()),
+			titleIncludes: Type.Optional(Type.String()),
+			background: Type.Optional(Type.Boolean({ description: "If true, run silently without focusing Chrome. Default false." })),
+			host: Type.Optional(Type.String()),
+			port: Type.Optional(Type.Number()),
+		}),
+		async execute(_id, params): Promise<ToolTextResult> {
+			const result = await bridge.send("page.network.get", withBackground(params), DEFAULT_TIMEOUT_MS);
+			return { content: [{ type: "text", text: truncateText(safeJson(result)) }], details: { result: result as Json } };
+		},
+	});
 	pi.registerTool({
 		name: "chrome_screenshot",
 		label: "Chrome Screenshot",
@@ -722,11 +919,113 @@ If chrome_* tools time out, ask the user to run /chrome-onboard, then load the b
 			const cwd = workspaceCwd(ctx);
 			const defaultPath = join(cwd, ".pi", "chrome-screenshots", `${new Date().toISOString().replace(/[:.]/g, "-")}.${format}`);
 			const outputPath = params.path ? resolve(cwd, params.path) : defaultPath;
-			const result = (await bridge.send("page.screenshot", withBackground(params), DEFAULT_TIMEOUT_MS)) as { dataUrl: string; tab?: unknown };
-			const base64 = result.dataUrl.replace(/^data:image\/(?:png|jpeg);base64,/, "");
+			const result = (await bridge.send("page.screenshot", withBackground(params), params.fullPage ? 120_000 : DEFAULT_TIMEOUT_MS)) as {
+				dataUrl?: string;
+				tab?: unknown;
+				fullPage?: boolean;
+				dimensions?: { width: number; height: number; viewportHeight: number; dpr: number };
+				tiles?: Array<{ y: number; dataUrl: string }>;
+			};
 			await mkdir(dirname(outputPath), { recursive: true });
+			if (result.fullPage && result.tiles && result.dimensions) {
+				// Stitch via PNG if format is png; otherwise we fall back to writing tile files and a
+				// manifest. We avoid pulling in an image library by writing each tile next to the main
+				// path with a -tileN suffix and a stitched.json manifest.
+				const { width, height, viewportHeight, dpr } = result.dimensions;
+				const manifest: Array<{ path: string; y: number }> = [];
+				for (let i = 0; i < result.tiles.length; i++) {
+					const tile = result.tiles[i];
+					const tilePath = outputPath.replace(/(\.[^.]+)$/, `-tile${i}$1`);
+					const base64 = tile.dataUrl.replace(/^data:image\/(?:png|jpeg);base64,/, "");
+					await writeFile(tilePath, Buffer.from(base64, "base64"));
+					manifest.push({ path: tilePath, y: tile.y });
+				}
+				await writeFile(outputPath + ".json", JSON.stringify({ width, height, viewportHeight, dpr, tiles: manifest }, null, 2));
+				return {
+					content: [{ type: "text", text: `Saved ${result.tiles.length} full-page tile(s) for ${width}×${height}px page. Manifest: ${outputPath}.json` }],
+					details: { manifest: outputPath + ".json", tiles: manifest, dimensions: result.dimensions, tab: result.tab } as unknown as Record<string, unknown>,
+				};
+			}
+			if (!result.dataUrl) throw new Error("Screenshot returned no dataUrl");
+			const base64 = result.dataUrl.replace(/^data:image\/(?:png|jpeg);base64,/, "");
 			await writeFile(outputPath, Buffer.from(base64, "base64"));
 			return { content: [{ type: "text", text: `Saved Chrome screenshot to ${outputPath}` }], details: { path: outputPath, format, tab: result.tab } };
 		},
 	});
+	pi.registerTool({
+		name: "chrome_hover",
+		label: "Chrome Hover",
+		description: "Hover over an element (synthetic pointerover/mouseover/pointermove) by uid, selector, or x/y. Triggers CSS :hover state and any JS hover handlers; isTrusted is false.",
+		promptSnippet: "Hover a Chrome element to trigger :hover / mouseover handlers.",
+		parameters: Type.Object({
+			uid: Type.Optional(Type.String()),
+			selector: Type.Optional(Type.String()),
+			x: Type.Optional(Type.Number()),
+			y: Type.Optional(Type.Number()),
+			targetId: Type.Optional(Type.String()),
+			urlIncludes: Type.Optional(Type.String()),
+			titleIncludes: Type.Optional(Type.String()),
+			background: Type.Optional(Type.Boolean()),
+		}),
+		async execute(_id, params): Promise<ToolTextResult> {
+			const result = await bridge.send("page.hover", withBackground(params), DEFAULT_TIMEOUT_MS);
+			return { content: [{ type: "text", text: `Hovered ${params.uid ?? params.selector ?? `${params.x},${params.y}`}` }], details: { result: result as Json } };
+		},
+	});
+	pi.registerTool({
+		name: "chrome_drag",
+		label: "Chrome Drag",
+		description: "Synthetic pointer drag from one uid/selector/point to another. Dispatches pointerdown → multi-step pointermove → pointerup. Note: HTML5 DataTransfer is NOT synthesized, so native HTML5 drag-and-drop targets may not respond.",
+		promptSnippet: "Drag a Chrome element from one point to another.",
+		parameters: Type.Object({
+			fromUid: Type.Optional(Type.String()),
+			fromSelector: Type.Optional(Type.String()),
+			fromX: Type.Optional(Type.Number()),
+			fromY: Type.Optional(Type.Number()),
+			toUid: Type.Optional(Type.String()),
+			toSelector: Type.Optional(Type.String()),
+			toX: Type.Optional(Type.Number()),
+			toY: Type.Optional(Type.Number()),
+			steps: Type.Optional(Type.Number({ default: 12 })),
+			targetId: Type.Optional(Type.String()),
+			urlIncludes: Type.Optional(Type.String()),
+			titleIncludes: Type.Optional(Type.String()),
+			background: Type.Optional(Type.Boolean()),
+		}),
+		async execute(_id, params): Promise<ToolTextResult> {
+			const result = await bridge.send("page.drag", withBackground(params), DEFAULT_TIMEOUT_MS);
+			return { content: [{ type: "text", text: `Dragged from ${params.fromUid ?? params.fromSelector} to ${params.toUid ?? params.toSelector}` }], details: { result: result as Json } };
+		},
+	});
+	pi.registerTool({
+		name: "chrome_upload_file",
+		label: "Chrome Upload File",
+		description: "Programmatically set the files of an <input type=file> element from local file paths. Uses DataTransfer to populate input.files and dispatches input+change events. Does NOT open the native file picker; works with React/Vue/Angular controlled inputs.",
+		promptSnippet: "Attach local files to a Chrome <input type=file> without opening the native file picker.",
+		parameters: Type.Object({
+			uid: Type.Optional(Type.String()),
+			selector: Type.Optional(Type.String()),
+			paths: Type.Array(Type.String(), { description: "Local absolute file paths to upload." }),
+			targetId: Type.Optional(Type.String()),
+			urlIncludes: Type.Optional(Type.String()),
+			titleIncludes: Type.Optional(Type.String()),
+			background: Type.Optional(Type.Boolean()),
+		}),
+		async execute(_id, params, _signal, _onUpdate, ctx): Promise<ToolTextResult> {
+			const { readFile } = await import("node:fs/promises");
+			const { basename } = await import("node:path");
+			const cwd = workspaceCwd(ctx);
+			const files: Array<{ name: string; type: string; base64: string }> = [];
+			for (const p of params.paths) {
+				const abs = resolve(cwd, p);
+				const buf = await readFile(abs);
+				files.push({ name: basename(abs), type: "application/octet-stream", base64: buf.toString("base64") });
+			}
+			const result = await bridge.send("page.upload", withBackground({ ...params, files }), DEFAULT_TIMEOUT_MS);
+			return { content: [{ type: "text", text: `Uploaded ${files.length} file(s) to ${params.uid ?? params.selector}` }], details: { result: result as Json } };
+		},
+	});
 }

package/package.json CHANGED Viewed

@@ -1,31 +1,31 @@
 {
-  "name": "pi-chrome",
-  "version": "0.6.1",
-  "description": "Drive your existing logged-in Chrome from Pi \u2014 no re-login, no throwaway profile, watch the agent work in real time (or toggle quiet background mode).",
-  "keywords": [
-    "pi-package",
-    "pi-extension",
-    "chrome",
-    "browser",
-    "automation",
-    "authenticated-session",
-    "real-profile",
-    "web-debugging"
-  ],
-  "license": "MIT",
-  "type": "commonjs",
-  "files": [
-    "extensions",
-    "README.md"
-  ],
-  "pi": {
-    "extensions": [
-      "./extensions/chrome-profile-bridge/index.ts"
-    ]
-  },
-  "peerDependencies": {
-    "@earendil-works/pi-ai": "*",
-    "@earendil-works/pi-coding-agent": "*",
-    "typebox": "*"
-  }
+	"name": "pi-chrome",
+	"version": "0.8.0",
+	"description": "Drive your existing logged-in Chrome from Pi — no re-login, no throwaway profile, watch the agent work in real time (or toggle quiet background mode).",
+	"keywords": [
+		"pi-package",
+		"pi-extension",
+		"chrome",
+		"browser",
+		"automation",
+		"authenticated-session",
+		"real-profile",
+		"web-debugging"
+	],
+	"license": "MIT",
+	"type": "commonjs",
+	"files": [
+		"extensions",
+		"README.md"
+	],
+	"pi": {
+		"extensions": [
+			"./extensions/chrome-profile-bridge/index.ts"
+		]
+	},
+	"peerDependencies": {
+		"@earendil-works/pi-ai": "*",
+		"@earendil-works/pi-coding-agent": "*",
+		"typebox": "*"
+	}
 }