pi-chrome 0.7.0 → 0.9.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,7 +1,7 @@
1
1
  import type { ExtensionAPI, ExtensionContext } from "@earendil-works/pi-coding-agent";
2
2
  import { StringEnum } from "@earendil-works/pi-ai";
3
3
  import { Type } from "typebox";
4
- import { existsSync, statSync } from "node:fs";
4
+ import { existsSync, readFileSync, statSync } from "node:fs";
5
5
  import { mkdir, writeFile } from "node:fs/promises";
6
6
  import { createServer, type IncomingMessage, type Server, type ServerResponse } from "node:http";
7
7
  import { dirname, join, resolve } from "node:path";
@@ -46,7 +46,16 @@ type BridgeResult = {
46
46
  error?: string;
47
47
  };
48
48
 
49
- const PI_CHROME_VERSION = "0.7.0";
49
+ const PI_CHROME_PKG_PATH = resolve(__dirname, "..", "..", "package.json");
50
+ function readPiChromeVersion(): string {
51
+ try {
52
+ const pkg = JSON.parse(readFileSync(PI_CHROME_PKG_PATH, "utf8")) as { version?: string };
53
+ if (pkg.version) return pkg.version;
54
+ } catch {}
55
+ return "0.0.0-dev";
56
+ }
57
+ const PI_CHROME_VERSION = readPiChromeVersion();
58
+ const PI_CHROME_GLOBAL_KEY = "__piChromeProfileBridgeLoaded__";
50
59
  const DEFAULT_HOST = process.env.PI_CHROME_BRIDGE_HOST ?? "127.0.0.1";
51
60
  const DEFAULT_PORT = Number(process.env.PI_CHROME_BRIDGE_PORT ?? "17318");
52
61
  const DEFAULT_TIMEOUT_MS = 30_000;
@@ -83,7 +92,30 @@ function workspaceCwd(ctx: ExtensionContext): string {
83
92
 
84
93
  function browserExtensionPath(): string {
85
94
  return join(extensionRoot(), "browser-extension");
86
- }
95
+ }
96
+
97
+ function hostnameOf(url: string | undefined): string {
98
+ if (!url) return "";
99
+ try { return new URL(url).hostname; } catch { return ""; }
100
+ }
101
+
102
+ // Description of a click/type/fill result's significant fields so the agent doesn't have to
103
+ // guess whether the action actually changed the page.
104
+ function summarizeActionResult(result: unknown): string | undefined {
105
+ if (!result || typeof result !== "object") return undefined;
106
+ const r = result as Record<string, unknown>;
107
+ const parts: string[] = [];
108
+ if (r.pageMutated === false) parts.push("pageMutated=false");
109
+ if (r.defaultPrevented === true) parts.push("defaultPrevented=true");
110
+ if (r.elementVisible === false) parts.push("element NOT visible");
111
+ if (r.occludedBy) {
112
+ const o = r.occludedBy as { tag?: string; id?: string };
113
+ parts.push(`occluded by <${o.tag ?? "?"}${o.id ? "#" + o.id : ""}>`);
114
+ }
115
+ if (r.valueMatches === false) parts.push("input value did not stick");
116
+ if (r.autoplayHint) parts.push("autoplay-gated affordance — synthetic click may not start media");
117
+ return parts.length ? parts.join("; ") : undefined;
118
+ }
87
119
 
88
120
  function readRequestBody(request: IncomingMessage): Promise<string> {
89
121
  return new Promise((resolveBody, rejectBody) => {
@@ -94,13 +126,14 @@ function readRequestBody(request: IncomingMessage): Promise<string> {
94
126
  });
95
127
  }
96
128
 
97
- function sendJson(response: ServerResponse, status: number, body: unknown): void {
129
+ function sendJson(response: ServerResponse, status: number, body: unknown, extraHeaders?: Record<string, string>): void {
98
130
  response.writeHead(status, {
99
131
  "content-type": "application/json; charset=utf-8",
100
132
  "access-control-allow-origin": "*",
101
133
  "access-control-allow-methods": "GET,POST,OPTIONS",
102
134
  "access-control-allow-headers": "content-type",
103
135
  "cache-control": "no-store",
136
+ ...(extraHeaders ?? {}),
104
137
  });
105
138
  response.end(JSON.stringify(body));
106
139
  }
@@ -292,7 +325,16 @@ class ChromeProfileBridge {
292
325
  if (command) this.queue.unshift(command);
293
326
  return;
294
327
  }
295
- sendJson(response, 200, command ? { type: "command", command } : { type: "none" });
328
+ // Re-read version on every /next so bumping package.json takes effect without pi restart.
329
+ const currentVersion = readPiChromeVersion();
330
+ sendJson(
331
+ response,
332
+ 200,
333
+ command
334
+ ? { type: "command", command, expectedExtensionVersion: currentVersion }
335
+ : { type: "none", expectedExtensionVersion: currentVersion },
336
+ { "x-pi-chrome-version": currentVersion },
337
+ );
296
338
  return;
297
339
  }
298
340
  if (request.method === "POST" && url.pathname === "/result") {
@@ -338,6 +380,18 @@ const imageFormatValues = ["png", "jpeg"] as const;
338
380
  const waitForValues = ["selector", "expression"] as const;
339
381
 
340
382
  export default function (pi: ExtensionAPI): void {
383
+ const globalState = globalThis as typeof globalThis & {
384
+ [PI_CHROME_GLOBAL_KEY]?: { version: string; root: string };
385
+ };
386
+ const alreadyLoaded = globalState[PI_CHROME_GLOBAL_KEY];
387
+ if (alreadyLoaded) {
388
+ console.warn(
389
+ `pi-chrome already loaded from ${alreadyLoaded.root} (v${alreadyLoaded.version}); skipping duplicate from ${extensionRoot()}.`,
390
+ );
391
+ return;
392
+ }
393
+ globalState[PI_CHROME_GLOBAL_KEY] = { version: PI_CHROME_VERSION, root: extensionRoot() };
394
+
341
395
  const bridge = new ChromeProfileBridge(DEFAULT_HOST, DEFAULT_PORT);
342
396
  let backgroundDefault = false;
343
397
 
@@ -374,21 +428,35 @@ export default function (pi: ExtensionAPI): void {
374
428
  pi.on("before_agent_start", (event) => {
375
429
  const primer = `
376
430
  <chrome-profile-bridge>
377
- Chrome control is available through the chrome_* tools via a companion Chrome extension installed in the user's normal Chrome profile.
378
- This is not CDP: it can use the user's existing Chrome windows and authenticated sessions after the user loads the companion browser extension.
379
- If chrome_* tools time out, ask the user to run /chrome-onboard, then load the bundled browser-extension folder in chrome://extensions. Prefer chrome_snapshot before clicking/typing; use stable element uids from snapshots with chrome_click/chrome_type when available. For form work, use includeSnapshot=true on actions to verify in one round trip. Avoid destructive actions unless explicitly requested. By default chrome_* tools focus Chrome and activate the target tab so the user can watch the agent work. The user can switch to silent/background mode for the whole session via /chrome-background; you can also pass background=true on a single tool call when the user explicitly wants the action to be silent (for example, scraping while they keep working in another app).
431
+ Chrome control is available through the chrome_* tools via a companion Chrome extension installed in the user's normal Chrome profile. Tools target the existing signed-in profile, no CDP, no throwaway profile.
432
+
433
+ Capability model (important):
434
+ - All input is **synthetic DOM events** (\`isTrusted=false\`). Synthetic events drive React/Vue/Angular state fine, but they do NOT satisfy Chrome's user-activation gates: audio/video autoplay, clipboard write, file pickers, fullscreen, and Web Push prompts will NOT open from a chrome_click.
435
+ - \`chrome_evaluate\` runs in MAIN world via the Function constructor. It works on pages with strict CSP (\`script-src 'self'\` without \`'unsafe-eval'\`), and surfaces thrown exceptions.
436
+ - Tool results include \`pageMutated\`, \`defaultPrevented\`, \`elementVisible\`, \`occludedBy\`, and (for type/fill) \`valueMatches\`. If \`pageMutated\` is false after a click that should have changed something, the click likely didn't take effect — do NOT just retry; check the action result and snapshot for the cause.
437
+
438
+ Usage rules:
439
+ 1. \`chrome_snapshot\` before clicking/typing; pass \`uid\` over \`selector\`.
440
+ 2. \`includeSnapshot=true\` on click/type/fill to verify in one round trip.
441
+ 3. If \`chrome_evaluate\` returns null when you expected a value, the expression evaluated to null/undefined in the page; surface the value via \`JSON.stringify\` to confirm.
442
+ 4. \`chrome_navigate\` supports an optional \`initScript\` that runs at document_start in MAIN world for the next navigation (good for seeding localStorage or stubbing Date.now).
443
+ 5. By default chrome_* tools focus Chrome so the user can watch; pass \`background=true\` or run /chrome-background to silence the whole session.
444
+ 6. If you hit an autoplay/clipboard/file-picker gate, tell the user; this bridge cannot satisfy it.
445
+ 7. Run /chrome-doctor when in doubt about connectivity or capabilities.
380
446
  </chrome-profile-bridge>`;
381
447
  return { systemPrompt: event.systemPrompt + primer };
382
448
  });
383
449
 
384
450
  pi.registerCommand("chrome-doctor", {
385
451
  description:
386
- "Check Chrome bridge connectivity and diagnose setup. Reports the local bridge, companion Chrome extension status (ID + version), and a one-line fix for common failures (extension not loaded, stale service worker, version drift).",
452
+ "Check Chrome bridge connectivity and capability tier. Probes the local bridge, the companion Chrome extension, MAIN-world evaluation, and CDP availability, and prints one-line fixes for common failures.",
387
453
  handler: async (_args, ctx) => {
388
454
  ctx.ui.notify("Performing Chrome bridge health check", "info");
389
455
  const lines: string[] = [`pi-chrome v${PI_CHROME_VERSION}`];
390
456
  const status = bridge.status();
391
457
  lines.push(`• Local bridge: mode=${status.mode}, url=${status.url}`);
458
+ let extensionAlive = false;
459
+ let versionMismatch = false;
392
460
  try {
393
461
  const started = Date.now();
394
462
  const version = (await bridge.send("tab.version", {}, 35_000)) as {
@@ -397,14 +465,17 @@ If chrome_* tools time out, ask the user to run /chrome-onboard, then load the b
397
465
  bridgeUrl?: string;
398
466
  };
399
467
  const latencyMs = Date.now() - started;
400
- if (version.extensionId)
401
- lines.push(`✓ Companion Chrome extension responding (ID: ${version.extensionId}, ext v${version.extensionVersion ?? "unknown"}, latency ${latencyMs}ms)`);
402
- else lines.push(`✓ Companion Chrome extension responding (no extension ID reported, latency ${latencyMs}ms)`);
403
- if (version.bridgeUrl) lines.push(`• Extension polling: ${version.bridgeUrl}`);
468
+ extensionAlive = true;
404
469
  if (version.extensionVersion && version.extensionVersion !== PI_CHROME_VERSION) {
470
+ versionMismatch = true;
405
471
  lines.push(
406
- `⚠ Extension version (${version.extensionVersion}) differs from pi-chrome (${PI_CHROME_VERSION}). Reload "Pi Existing Chrome Profile Bridge" in chrome://extensions to pick up the latest service worker.`,
472
+ `✗ EXTENSION VERSION MISMATCH: companion extension is v${version.extensionVersion}, but pi-chrome is v${PI_CHROME_VERSION}.`,
473
+ ` All chrome_* tools will run with the OLD extension code until this is fixed.`,
474
+ ` Fix: open chrome://extensions and click reload on "Pi Existing Chrome Profile Bridge".`,
475
+ ` (Future version drifts will self-heal: the extension now polls pi-chrome's expected version and reloads itself.)`,
407
476
  );
477
+ } else {
478
+ lines.push(`✓ Companion Chrome extension responding (ID: ${version.extensionId ?? "?"}, ext v${version.extensionVersion ?? "?"}, latency ${latencyMs}ms)`);
408
479
  }
409
480
  } catch (error) {
410
481
  const message = (error as Error).message;
@@ -415,6 +486,45 @@ If chrome_* tools time out, ask the user to run /chrome-onboard, then load the b
415
486
  lines.push(" Fix: run /chrome-onboard, then load the bundled browser-extension folder in chrome://extensions and keep that Chrome window open.");
416
487
  }
417
488
  }
489
+
490
+ if (extensionAlive && !versionMismatch) {
491
+ // MAIN-world evaluate probe.
492
+ try {
493
+ const value = await bridge.send("page.evaluate", { expression: "1+1", awaitPromise: true, foreground: false }, 10_000);
494
+ if (value === 2) lines.push(`✓ chrome_evaluate("1+1") = 2`);
495
+ else lines.push(`⚠ chrome_evaluate("1+1") returned ${JSON.stringify(value)} (expected 2). The current tab may have a restrictive CSP or be a chrome:// URL.`);
496
+ } catch (error) {
497
+ lines.push(`✗ chrome_evaluate failed: ${(error as Error).message}`);
498
+ }
499
+
500
+ // Capability probe via MAIN-world helper.
501
+ try {
502
+ const probe = (await bridge.send("page.probe", { foreground: false }, 10_000)) as Record<string, unknown>;
503
+ if (probe && probe.arithmetic === 2) lines.push(`✓ MAIN-world helper injection works (location=${hostnameOf(String(probe.location))})`);
504
+ if (probe && probe.webdriver) lines.push(`⚠ navigator.webdriver=true on current tab — site fingerprinting may flag automation.`);
505
+ } catch (error) {
506
+ lines.push(`⚠ page.probe failed: ${(error as Error).message}`);
507
+ }
508
+ } else if (versionMismatch) {
509
+ lines.push(`… Skipped MAIN-world capability checks because the loaded extension is stale.`);
510
+ }
511
+
512
+ // CDP availability hint.
513
+ try {
514
+ const controller = new AbortController();
515
+ const timer = setTimeout(() => controller.abort(), 250);
516
+ const response = await fetch("http://127.0.0.1:9222/json/version", { signal: controller.signal }).catch(() => undefined);
517
+ clearTimeout(timer);
518
+ if (response && response.ok) {
519
+ const info = (await response.json().catch(() => ({}))) as { Browser?: string };
520
+ lines.push(`✓ CDP endpoint reachable at 127.0.0.1:9222 (${info.Browser ?? "unknown"}). Trusted input via CDP is not yet wired into pi-chrome — reserved for a future release.`);
521
+ } else {
522
+ lines.push(`• CDP not available (no listener on 127.0.0.1:9222). Synthetic input only; autoplay/clipboard/file-picker gates cannot be satisfied. Future pi-chrome versions will use CDP for trusted input when this port is enabled.`);
523
+ }
524
+ } catch {
525
+ lines.push(`• CDP probe inconclusive.`);
526
+ }
527
+
418
528
  ctx.ui.notify(lines.join("\n"), "info");
419
529
  },
420
530
  });
@@ -540,6 +650,9 @@ If chrome_* tools time out, ask the user to run /chrome-onboard, then load the b
540
650
  urlIncludes: Type.Optional(Type.String()),
541
651
  titleIncludes: Type.Optional(Type.String()),
542
652
  maxElements: Type.Optional(Type.Number({ default: MAX_ELEMENTS })),
653
+ containingText: Type.Optional(Type.String({ description: "Only return elements whose label/text contains this string (case-insensitive). Useful when the page has many controls." })),
654
+ roleFilter: Type.Optional(Type.String({ description: "Only return elements matching this ARIA role or tag name (case-insensitive). e.g. 'button', 'link', 'textbox'." })),
655
+ nearUid: Type.Optional(Type.String({ description: "Sort elements by proximity to this snapshot uid. Useful for finding controls near a known anchor." })),
543
656
  background: Type.Optional(
544
657
  Type.Boolean({ description: "If true, run silently in the background without focusing Chrome. Default false (Chrome focuses + tab activates so the user can watch)." }),
545
658
  ),
@@ -569,6 +682,7 @@ If chrome_* tools time out, ask the user to run /chrome-onboard, then load the b
569
682
  titleIncludes: Type.Optional(Type.String()),
570
683
  waitUntilLoad: Type.Optional(Type.Boolean({ default: true })),
571
684
  timeoutMs: Type.Optional(Type.Number({ default: 15_000 })),
685
+ initScript: Type.Optional(Type.String({ description: "Optional JavaScript source to run in MAIN world at document_start of the next navigation. Useful for seeding localStorage, stubbing Date.now(), or defining navigator.webdriver=undefined. Requires the companion extension's webNavigation permission." })),
572
686
  background: Type.Optional(
573
687
  Type.Boolean({ description: "If true, navigate silently without focusing Chrome. Default false." }),
574
688
  ),
@@ -576,8 +690,8 @@ If chrome_* tools time out, ask the user to run /chrome-onboard, then load the b
576
690
  port: Type.Optional(Type.Number()),
577
691
  }),
578
692
  async execute(_id, params): Promise<ToolTextResult> {
579
- const result = await bridge.send("page.navigate", withBackground(params), params.timeoutMs ?? 15_000);
580
- return { content: [{ type: "text", text: `Navigated to ${params.url}` }], details: { result: result as Json } };
693
+ const result = await bridge.send("page.navigate", withBackground(params), (params.timeoutMs ?? 15_000) + 2_000);
694
+ return { content: [{ type: "text", text: `Navigated to ${params.url}${params.initScript ? " (with initScript)" : ""}` }], details: { result: result as Json } };
581
695
  },
582
696
  });
583
697
 
@@ -590,7 +704,6 @@ If chrome_* tools time out, ask the user to run /chrome-onboard, then load the b
590
704
  parameters: Type.Object({
591
705
  expression: Type.String(),
592
706
  awaitPromise: Type.Optional(Type.Boolean({ default: true })),
593
- returnByValue: Type.Optional(Type.Boolean({ default: true })),
594
707
  targetId: Type.Optional(Type.String()),
595
708
  urlIncludes: Type.Optional(Type.String()),
596
709
  titleIncludes: Type.Optional(Type.String()),
@@ -602,7 +715,12 @@ If chrome_* tools time out, ask the user to run /chrome-onboard, then load the b
602
715
  }),
603
716
  async execute(_id, params): Promise<ToolTextResult> {
604
717
  const value = await bridge.send("page.evaluate", withBackground(params), DEFAULT_TIMEOUT_MS);
605
- return { content: [{ type: "text", text: truncateText(typeof value === "string" ? value : safeJson(value)) }], details: { value: value as Json } };
718
+ const text = value === undefined
719
+ ? "undefined"
720
+ : typeof value === "string"
721
+ ? value
722
+ : safeJson(value) ?? "undefined";
723
+ return { content: [{ type: "text", text: truncateText(text) }], details: { value: value as Json } };
606
724
  },
607
725
  });
608
726
 
@@ -629,8 +747,12 @@ If chrome_* tools time out, ask the user to run /chrome-onboard, then load the b
629
747
  port: Type.Optional(Type.Number()),
630
748
  }),
631
749
  async execute(_id, params): Promise<ToolTextResult> {
632
- const result = await bridge.send("page.click", withBackground(params), DEFAULT_TIMEOUT_MS);
633
- return { content: [{ type: "text", text: `Clicked ${params.uid ?? params.selector ?? `${params.x},${params.y}`}` }], details: { result: result as Json } };
750
+ const raw = await bridge.send("page.click", withBackground(params), DEFAULT_TIMEOUT_MS);
751
+ const result = (params.includeSnapshot ? (raw as { result: unknown }).result : raw) as Json;
752
+ const summary = summarizeActionResult(result);
753
+ const target = params.uid ?? params.selector ?? `${params.x},${params.y}`;
754
+ const text = summary ? `Clicked ${target} — ${summary}` : `Clicked ${target}`;
755
+ return { content: [{ type: "text", text }], details: { result: raw as Json } };
634
756
  },
635
757
  });
636
758
 
@@ -657,8 +779,12 @@ If chrome_* tools time out, ask the user to run /chrome-onboard, then load the b
657
779
  port: Type.Optional(Type.Number()),
658
780
  }),
659
781
  async execute(_id, params): Promise<ToolTextResult> {
660
- const result = await bridge.send("page.type", withBackground(params), DEFAULT_TIMEOUT_MS);
661
- return { content: [{ type: "text", text: `Typed ${params.text.length} character(s)${params.uid || params.selector ? ` into ${params.uid ?? params.selector}` : ""}.` }], details: { result: result as Json } };
782
+ const raw = await bridge.send("page.type", withBackground(params), DEFAULT_TIMEOUT_MS);
783
+ const result = (params.includeSnapshot ? (raw as { result: unknown }).result : raw) as Json;
784
+ const summary = summarizeActionResult(result);
785
+ const into = params.uid || params.selector ? ` into ${params.uid ?? params.selector}` : "";
786
+ const base = `Typed ${params.text.length} character(s)${into}.`;
787
+ return { content: [{ type: "text", text: summary ? `${base} (${summary})` : base }], details: { result: raw as Json } };
662
788
  },
663
789
  });
664
790
 
@@ -685,8 +811,12 @@ If chrome_* tools time out, ask the user to run /chrome-onboard, then load the b
685
811
  port: Type.Optional(Type.Number()),
686
812
  }),
687
813
  async execute(_id, params): Promise<ToolTextResult> {
688
- const result = await bridge.send("page.fill", withBackground(params), DEFAULT_TIMEOUT_MS);
689
- return { content: [{ type: "text", text: `Filled ${params.text.length} character(s)${params.uid || params.selector ? ` into ${params.uid ?? params.selector}` : ""}.` }], details: { result: result as Json } };
814
+ const raw = await bridge.send("page.fill", withBackground(params), DEFAULT_TIMEOUT_MS);
815
+ const result = (params.includeSnapshot ? (raw as { result: unknown }).result : raw) as Json;
816
+ const summary = summarizeActionResult(result);
817
+ const into = params.uid || params.selector ? ` into ${params.uid ?? params.selector}` : "";
818
+ const base = `Filled ${params.text.length} character(s)${into}.`;
819
+ return { content: [{ type: "text", text: summary ? `${base} (${summary})` : base }], details: { result: raw as Json } };
690
820
  },
691
821
  });
692
822
 
@@ -710,8 +840,11 @@ If chrome_* tools time out, ask the user to run /chrome-onboard, then load the b
710
840
  port: Type.Optional(Type.Number()),
711
841
  }),
712
842
  async execute(_id, params): Promise<ToolTextResult> {
713
- const result = await bridge.send("page.key", withBackground(params), DEFAULT_TIMEOUT_MS);
714
- return { content: [{ type: "text", text: `Pressed ${params.key}.` }], details: { result: result as Json } };
843
+ const raw = await bridge.send("page.key", withBackground(params), DEFAULT_TIMEOUT_MS);
844
+ const result = (params.includeSnapshot ? (raw as { result: unknown }).result : raw) as Json;
845
+ const summary = summarizeActionResult(result);
846
+ const base = `Pressed ${params.key}.`;
847
+ return { content: [{ type: "text", text: summary ? `${base} (${summary})` : base }], details: { result: raw as Json } };
715
848
  },
716
849
  });
717
850
 
@@ -825,11 +958,135 @@ If chrome_* tools time out, ask the user to run /chrome-onboard, then load the b
825
958
  const cwd = workspaceCwd(ctx);
826
959
  const defaultPath = join(cwd, ".pi", "chrome-screenshots", `${new Date().toISOString().replace(/[:.]/g, "-")}.${format}`);
827
960
  const outputPath = params.path ? resolve(cwd, params.path) : defaultPath;
828
- const result = (await bridge.send("page.screenshot", withBackground(params), DEFAULT_TIMEOUT_MS)) as { dataUrl: string; tab?: unknown };
829
- const base64 = result.dataUrl.replace(/^data:image\/(?:png|jpeg);base64,/, "");
961
+ const result = (await bridge.send("page.screenshot", withBackground(params), params.fullPage ? 120_000 : DEFAULT_TIMEOUT_MS)) as {
962
+ dataUrl?: string;
963
+ tab?: unknown;
964
+ fullPage?: boolean;
965
+ dimensions?: { width: number; height: number; viewportHeight: number; dpr: number };
966
+ tiles?: Array<{ y: number; dataUrl: string }>;
967
+ };
830
968
  await mkdir(dirname(outputPath), { recursive: true });
969
+ if (result.fullPage && result.tiles && result.dimensions) {
970
+ // Stitch via PNG if format is png; otherwise we fall back to writing tile files and a
971
+ // manifest. We avoid pulling in an image library by writing each tile next to the main
972
+ // path with a -tileN suffix and a stitched.json manifest.
973
+ const { width, height, viewportHeight, dpr } = result.dimensions;
974
+ const manifest: Array<{ path: string; y: number }> = [];
975
+ for (let i = 0; i < result.tiles.length; i++) {
976
+ const tile = result.tiles[i];
977
+ const tilePath = outputPath.replace(/(\.[^.]+)$/, `-tile${i}$1`);
978
+ const base64 = tile.dataUrl.replace(/^data:image\/(?:png|jpeg);base64,/, "");
979
+ await writeFile(tilePath, Buffer.from(base64, "base64"));
980
+ manifest.push({ path: tilePath, y: tile.y });
981
+ }
982
+ await writeFile(outputPath + ".json", JSON.stringify({ width, height, viewportHeight, dpr, tiles: manifest }, null, 2));
983
+ return {
984
+ content: [{ type: "text", text: `Saved ${result.tiles.length} full-page tile(s) for ${width}×${height}px page. Manifest: ${outputPath}.json` }],
985
+ details: { manifest: outputPath + ".json", tiles: manifest, dimensions: result.dimensions, tab: result.tab } as unknown as Record<string, unknown>,
986
+ };
987
+ }
988
+ if (!result.dataUrl) throw new Error("Screenshot returned no dataUrl");
989
+ const base64 = result.dataUrl.replace(/^data:image\/(?:png|jpeg);base64,/, "");
831
990
  await writeFile(outputPath, Buffer.from(base64, "base64"));
832
991
  return { content: [{ type: "text", text: `Saved Chrome screenshot to ${outputPath}` }], details: { path: outputPath, format, tab: result.tab } };
833
992
  },
834
993
  });
994
+
995
+ pi.registerTool({
996
+ name: "chrome_hover",
997
+ label: "Chrome Hover",
998
+ description: "Hover over an element (synthetic pointerover/mouseover/pointermove) by uid, selector, or x/y. Triggers CSS :hover state and any JS hover handlers; isTrusted is false.",
999
+ promptSnippet: "Hover a Chrome element to trigger :hover / mouseover handlers.",
1000
+ parameters: Type.Object({
1001
+ uid: Type.Optional(Type.String()),
1002
+ selector: Type.Optional(Type.String()),
1003
+ x: Type.Optional(Type.Number()),
1004
+ y: Type.Optional(Type.Number()),
1005
+ targetId: Type.Optional(Type.String()),
1006
+ urlIncludes: Type.Optional(Type.String()),
1007
+ titleIncludes: Type.Optional(Type.String()),
1008
+ background: Type.Optional(Type.Boolean()),
1009
+ }),
1010
+ async execute(_id, params): Promise<ToolTextResult> {
1011
+ const result = await bridge.send("page.hover", withBackground(params), DEFAULT_TIMEOUT_MS);
1012
+ return { content: [{ type: "text", text: `Hovered ${params.uid ?? params.selector ?? `${params.x},${params.y}`}` }], details: { result: result as Json } };
1013
+ },
1014
+ });
1015
+
1016
+ pi.registerTool({
1017
+ name: "chrome_drag",
1018
+ label: "Chrome Drag",
1019
+ description: "Synthetic drag from one uid/selector/point to another. Dispatches pointerdown → humanised pointermove path → dragstart/drag/dragenter/dragover/dragleave/drop/dragend with a shared HTML5 DataTransfer, then pointerup. isTrusted=false.",
1020
+ promptSnippet: "Drag a Chrome element from one point to another.",
1021
+ parameters: Type.Object({
1022
+ fromUid: Type.Optional(Type.String()),
1023
+ fromSelector: Type.Optional(Type.String()),
1024
+ fromX: Type.Optional(Type.Number()),
1025
+ fromY: Type.Optional(Type.Number()),
1026
+ toUid: Type.Optional(Type.String()),
1027
+ toSelector: Type.Optional(Type.String()),
1028
+ toX: Type.Optional(Type.Number()),
1029
+ toY: Type.Optional(Type.Number()),
1030
+ steps: Type.Optional(Type.Number({ default: 12 })),
1031
+ targetId: Type.Optional(Type.String()),
1032
+ urlIncludes: Type.Optional(Type.String()),
1033
+ titleIncludes: Type.Optional(Type.String()),
1034
+ background: Type.Optional(Type.Boolean()),
1035
+ }),
1036
+ async execute(_id, params): Promise<ToolTextResult> {
1037
+ const result = await bridge.send("page.drag", withBackground(params), DEFAULT_TIMEOUT_MS);
1038
+ return { content: [{ type: "text", text: `Dragged from ${params.fromUid ?? params.fromSelector} to ${params.toUid ?? params.toSelector}` }], details: { result: result as Json } };
1039
+ },
1040
+ });
1041
+
1042
+ pi.registerTool({
1043
+ name: "chrome_scroll",
1044
+ label: "Chrome Scroll",
1045
+ description: "Scroll the page or a specific scrollable element by dispatching real wheel events with momentum-shaped deltas, then applying the scroll. Positive deltaY scrolls down. Pass uid/selector to scroll within a container, otherwise the document scrolls.",
1046
+ promptSnippet: "Scroll a Chrome page or container via wheel events (not raw scrollTop).",
1047
+ parameters: Type.Object({
1048
+ uid: Type.Optional(Type.String()),
1049
+ selector: Type.Optional(Type.String()),
1050
+ deltaY: Type.Optional(Type.Number({ description: "Pixels to scroll vertically. Positive = down." })),
1051
+ deltaX: Type.Optional(Type.Number({ description: "Pixels to scroll horizontally. Positive = right." })),
1052
+ steps: Type.Optional(Type.Number({ description: "Number of wheel events to dispatch. Defaults to ceil(|deltaY|/100)." })),
1053
+ targetId: Type.Optional(Type.String()),
1054
+ urlIncludes: Type.Optional(Type.String()),
1055
+ titleIncludes: Type.Optional(Type.String()),
1056
+ background: Type.Optional(Type.Boolean()),
1057
+ }),
1058
+ async execute(_id, params): Promise<ToolTextResult> {
1059
+ const result = await bridge.send("page.scroll", withBackground(params), DEFAULT_TIMEOUT_MS);
1060
+ return { content: [{ type: "text", text: `Scrolled dy=${params.deltaY ?? 0} dx=${params.deltaX ?? 0}` }], details: { result: result as Json } };
1061
+ },
1062
+ });
1063
+
1064
+ pi.registerTool({
1065
+ name: "chrome_upload_file",
1066
+ label: "Chrome Upload File",
1067
+ description: "Programmatically set the files of an <input type=file> element from local file paths. Uses DataTransfer to populate input.files and dispatches input+change events. Does NOT open the native file picker; works with React/Vue/Angular controlled inputs.",
1068
+ promptSnippet: "Attach local files to a Chrome <input type=file> without opening the native file picker.",
1069
+ parameters: Type.Object({
1070
+ uid: Type.Optional(Type.String()),
1071
+ selector: Type.Optional(Type.String()),
1072
+ paths: Type.Array(Type.String(), { description: "Local absolute file paths to upload." }),
1073
+ targetId: Type.Optional(Type.String()),
1074
+ urlIncludes: Type.Optional(Type.String()),
1075
+ titleIncludes: Type.Optional(Type.String()),
1076
+ background: Type.Optional(Type.Boolean()),
1077
+ }),
1078
+ async execute(_id, params, _signal, _onUpdate, ctx): Promise<ToolTextResult> {
1079
+ const { readFile } = await import("node:fs/promises");
1080
+ const { basename } = await import("node:path");
1081
+ const cwd = workspaceCwd(ctx);
1082
+ const files: Array<{ name: string; type: string; base64: string }> = [];
1083
+ for (const p of params.paths) {
1084
+ const abs = resolve(cwd, p);
1085
+ const buf = await readFile(abs);
1086
+ files.push({ name: basename(abs), type: "application/octet-stream", base64: buf.toString("base64") });
1087
+ }
1088
+ const result = await bridge.send("page.upload", withBackground({ ...params, files }), DEFAULT_TIMEOUT_MS);
1089
+ return { content: [{ type: "text", text: `Uploaded ${files.length} file(s) to ${params.uid ?? params.selector}` }], details: { result: result as Json } };
1090
+ },
1091
+ });
835
1092
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "pi-chrome",
3
- "version": "0.7.0",
3
+ "version": "0.9.1",
4
4
  "description": "Drive your existing logged-in Chrome from Pi — no re-login, no throwaway profile, watch the agent work in real time (or toggle quiet background mode).",
5
5
  "keywords": [
6
6
  "pi-package",