pi-chrome 0.6.1 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -46,7 +46,7 @@ type BridgeResult = {
46
46
  error?: string;
47
47
  };
48
48
 
49
- const PI_CHROME_VERSION = "0.6.1";
49
+ const PI_CHROME_VERSION = "0.8.0";
50
50
  const DEFAULT_HOST = process.env.PI_CHROME_BRIDGE_HOST ?? "127.0.0.1";
51
51
  const DEFAULT_PORT = Number(process.env.PI_CHROME_BRIDGE_PORT ?? "17318");
52
52
  const DEFAULT_TIMEOUT_MS = 30_000;
@@ -83,7 +83,30 @@ function workspaceCwd(ctx: ExtensionContext): string {
83
83
 
84
84
  function browserExtensionPath(): string {
85
85
  return join(extensionRoot(), "browser-extension");
86
- }
86
+ }
87
+
88
+ function hostnameOf(url: string | undefined): string {
89
+ if (!url) return "";
90
+ try { return new URL(url).hostname; } catch { return ""; }
91
+ }
92
+
93
+ // Description of a click/type/fill result's significant fields so the agent doesn't have to
94
+ // guess whether the action actually changed the page.
95
+ function summarizeActionResult(result: unknown): string | undefined {
96
+ if (!result || typeof result !== "object") return undefined;
97
+ const r = result as Record<string, unknown>;
98
+ const parts: string[] = [];
99
+ if (r.pageMutated === false) parts.push("pageMutated=false");
100
+ if (r.defaultPrevented === true) parts.push("defaultPrevented=true");
101
+ if (r.elementVisible === false) parts.push("element NOT visible");
102
+ if (r.occludedBy) {
103
+ const o = r.occludedBy as { tag?: string; id?: string };
104
+ parts.push(`occluded by <${o.tag ?? "?"}${o.id ? "#" + o.id : ""}>`);
105
+ }
106
+ if (r.valueMatches === false) parts.push("input value did not stick");
107
+ if (r.autoplayHint) parts.push("autoplay-gated affordance — synthetic click may not start media");
108
+ return parts.length ? parts.join("; ") : undefined;
109
+ }
87
110
 
88
111
  function readRequestBody(request: IncomingMessage): Promise<string> {
89
112
  return new Promise((resolveBody, rejectBody) => {
@@ -374,29 +397,44 @@ export default function (pi: ExtensionAPI): void {
374
397
  pi.on("before_agent_start", (event) => {
375
398
  const primer = `
376
399
  <chrome-profile-bridge>
377
- Chrome control is available through the chrome_* tools via a companion Chrome extension installed in the user's normal Chrome profile.
378
- This is not CDP: it can use the user's existing Chrome windows and authenticated sessions after the user loads the companion browser extension.
379
- If chrome_* tools time out, ask the user to run /chrome-onboard, then load the bundled browser-extension folder in chrome://extensions. Prefer chrome_snapshot before clicking/typing. Avoid destructive actions unless explicitly requested. By default chrome_* tools focus Chrome and activate the target tab so the user can watch the agent work. The user can switch to silent/background mode for the whole session via /chrome-background; you can also pass background=true on a single tool call when the user explicitly wants the action to be silent (for example, scraping while they keep working in another app).
400
+ Chrome control is available through the chrome_* tools via a companion Chrome extension installed in the user's normal Chrome profile. Tools target the existing signed-in profile, no CDP, no throwaway profile.
401
+
402
+ Capability model (important):
403
+ - All input is **synthetic DOM events** (\`isTrusted=false\`). Synthetic events drive React/Vue/Angular state fine, but they do NOT satisfy Chrome's user-activation gates: audio/video autoplay, clipboard write, file pickers, fullscreen, and Web Push prompts will NOT open from a chrome_click.
404
+ - \`chrome_evaluate\` runs in MAIN world via the Function constructor. It works on pages with strict CSP (\`script-src 'self'\` without \`'unsafe-eval'\`), and surfaces thrown exceptions.
405
+ - Tool results include \`pageMutated\`, \`defaultPrevented\`, \`elementVisible\`, \`occludedBy\`, and (for type/fill) \`valueMatches\`. If \`pageMutated\` is false after a click that should have changed something, the click likely didn't take effect — do NOT just retry; check the action result and snapshot for the cause.
406
+
407
+ Usage rules:
408
+ 1. \`chrome_snapshot\` before clicking/typing; pass \`uid\` over \`selector\`.
409
+ 2. \`includeSnapshot=true\` on click/type/fill to verify in one round trip.
410
+ 3. If \`chrome_evaluate\` returns null when you expected a value, the expression evaluated to null/undefined in the page; surface the value via \`JSON.stringify\` to confirm.
411
+ 4. \`chrome_navigate\` supports an optional \`initScript\` that runs at document_start in MAIN world for the next navigation (good for seeding localStorage or stubbing Date.now).
412
+ 5. By default chrome_* tools focus Chrome so the user can watch; pass \`background=true\` or run /chrome-background to silence the whole session.
413
+ 6. If you hit an autoplay/clipboard/file-picker gate, tell the user; this bridge cannot satisfy it.
414
+ 7. Run /chrome-doctor when in doubt about connectivity or capabilities.
380
415
  </chrome-profile-bridge>`;
381
416
  return { systemPrompt: event.systemPrompt + primer };
382
417
  });
383
418
 
384
419
  pi.registerCommand("chrome-doctor", {
385
420
  description:
386
- "Check Chrome bridge connectivity and diagnose setup. Reports the local bridge, companion Chrome extension status (ID + version), and a one-line fix for common failures (extension not loaded, stale service worker, version drift).",
421
+ "Check Chrome bridge connectivity and capability tier. Probes the local bridge, the companion Chrome extension, MAIN-world evaluation, and CDP availability, and prints one-line fixes for common failures.",
387
422
  handler: async (_args, ctx) => {
388
423
  ctx.ui.notify("Performing Chrome bridge health check", "info");
389
424
  const lines: string[] = [`pi-chrome v${PI_CHROME_VERSION}`];
390
425
  const status = bridge.status();
391
426
  lines.push(`• Local bridge: mode=${status.mode}, url=${status.url}`);
427
+ let extensionAlive = false;
392
428
  try {
429
+ const started = Date.now();
393
430
  const version = (await bridge.send("tab.version", {}, 35_000)) as {
394
431
  extensionId?: string;
395
432
  extensionVersion?: string;
433
+ bridgeUrl?: string;
396
434
  };
397
- if (version.extensionId)
398
- lines.push(`✓ Companion Chrome extension responding (ID: ${version.extensionId}, ext v${version.extensionVersion ?? "unknown"})`);
399
- else lines.push("✓ Companion Chrome extension responding (no extension ID reported)");
435
+ const latencyMs = Date.now() - started;
436
+ extensionAlive = true;
437
+ lines.push(`✓ Companion Chrome extension responding (ID: ${version.extensionId ?? "?"}, ext v${version.extensionVersion ?? "?"}, latency ${latencyMs}ms)`);
400
438
  if (version.extensionVersion && version.extensionVersion !== PI_CHROME_VERSION) {
401
439
  lines.push(
402
440
  `⚠ Extension version (${version.extensionVersion}) differs from pi-chrome (${PI_CHROME_VERSION}). Reload "Pi Existing Chrome Profile Bridge" in chrome://extensions to pick up the latest service worker.`,
@@ -411,6 +449,43 @@ If chrome_* tools time out, ask the user to run /chrome-onboard, then load the b
411
449
  lines.push(" Fix: run /chrome-onboard, then load the bundled browser-extension folder in chrome://extensions and keep that Chrome window open.");
412
450
  }
413
451
  }
452
+
453
+ if (extensionAlive) {
454
+ // MAIN-world evaluate probe.
455
+ try {
456
+ const value = await bridge.send("page.evaluate", { expression: "1+1", awaitPromise: true, foreground: false }, 10_000);
457
+ if (value === 2) lines.push(`✓ chrome_evaluate("1+1") = 2`);
458
+ else lines.push(`⚠ chrome_evaluate("1+1") returned ${JSON.stringify(value)} (expected 2). The current tab may have a restrictive CSP or be a chrome:// URL.`);
459
+ } catch (error) {
460
+ lines.push(`✗ chrome_evaluate failed: ${(error as Error).message}`);
461
+ }
462
+
463
+ // Capability probe via MAIN-world helper.
464
+ try {
465
+ const probe = (await bridge.send("page.probe", { foreground: false }, 10_000)) as Record<string, unknown>;
466
+ if (probe && probe.arithmetic === 2) lines.push(`✓ MAIN-world helper injection works (location=${hostnameOf(String(probe.location))})`);
467
+ if (probe && probe.webdriver) lines.push(`⚠ navigator.webdriver=true on current tab — site fingerprinting may flag automation.`);
468
+ } catch (error) {
469
+ lines.push(`⚠ page.probe failed: ${(error as Error).message}`);
470
+ }
471
+ }
472
+
473
+ // CDP availability hint.
474
+ try {
475
+ const controller = new AbortController();
476
+ const timer = setTimeout(() => controller.abort(), 250);
477
+ const response = await fetch("http://127.0.0.1:9222/json/version", { signal: controller.signal }).catch(() => undefined);
478
+ clearTimeout(timer);
479
+ if (response && response.ok) {
480
+ const info = (await response.json().catch(() => ({}))) as { Browser?: string };
481
+ lines.push(`✓ CDP endpoint reachable at 127.0.0.1:9222 (${info.Browser ?? "unknown"}). Trusted input via CDP is not yet wired into pi-chrome — reserved for a future release.`);
482
+ } else {
483
+ lines.push(`• CDP not available (no listener on 127.0.0.1:9222). Synthetic input only; autoplay/clipboard/file-picker gates cannot be satisfied. Future pi-chrome versions will use CDP for trusted input when this port is enabled.`);
484
+ }
485
+ } catch {
486
+ lines.push(`• CDP probe inconclusive.`);
487
+ }
488
+
414
489
  ctx.ui.notify(lines.join("\n"), "info");
415
490
  },
416
491
  });
@@ -529,13 +604,16 @@ If chrome_* tools time out, ask the user to run /chrome-onboard, then load the b
529
604
  name: "chrome_snapshot",
530
605
  label: "Chrome Snapshot",
531
606
  description:
532
- "Inspect a page in the user's existing Chrome profile: title, URL, visible body text, viewport, and clickable/focusable elements with CSS selectors. Brings Chrome to the foreground by default so the user can watch; pass background=true to inspect silently.",
607
+ "Inspect a page in the user's existing Chrome profile: title, URL, visible body text, viewport, and clickable/focusable elements with stable uids plus CSS selectors. Brings Chrome to the foreground by default so the user can watch; pass background=true to inspect silently.",
533
608
  promptSnippet: "Inspect the current Chrome page and get CSS selectors for browser automation.",
534
609
  parameters: Type.Object({
535
610
  targetId: Type.Optional(Type.String()),
536
611
  urlIncludes: Type.Optional(Type.String()),
537
612
  titleIncludes: Type.Optional(Type.String()),
538
613
  maxElements: Type.Optional(Type.Number({ default: MAX_ELEMENTS })),
614
+ containingText: Type.Optional(Type.String({ description: "Only return elements whose label/text contains this string (case-insensitive). Useful when the page has many controls." })),
615
+ roleFilter: Type.Optional(Type.String({ description: "Only return elements matching this ARIA role or tag name (case-insensitive). e.g. 'button', 'link', 'textbox'." })),
616
+ nearUid: Type.Optional(Type.String({ description: "Sort elements by proximity to this snapshot uid. Useful for finding controls near a known anchor." })),
539
617
  background: Type.Optional(
540
618
  Type.Boolean({ description: "If true, run silently in the background without focusing Chrome. Default false (Chrome focuses + tab activates so the user can watch)." }),
541
619
  ),
@@ -565,6 +643,7 @@ If chrome_* tools time out, ask the user to run /chrome-onboard, then load the b
565
643
  titleIncludes: Type.Optional(Type.String()),
566
644
  waitUntilLoad: Type.Optional(Type.Boolean({ default: true })),
567
645
  timeoutMs: Type.Optional(Type.Number({ default: 15_000 })),
646
+ initScript: Type.Optional(Type.String({ description: "Optional JavaScript source to run in MAIN world at document_start of the next navigation. Useful for seeding localStorage, stubbing Date.now(), or defining navigator.webdriver=undefined. Requires the companion extension's webNavigation permission." })),
568
647
  background: Type.Optional(
569
648
  Type.Boolean({ description: "If true, navigate silently without focusing Chrome. Default false." }),
570
649
  ),
@@ -572,8 +651,8 @@ If chrome_* tools time out, ask the user to run /chrome-onboard, then load the b
572
651
  port: Type.Optional(Type.Number()),
573
652
  }),
574
653
  async execute(_id, params): Promise<ToolTextResult> {
575
- const result = await bridge.send("page.navigate", withBackground(params), params.timeoutMs ?? 15_000);
576
- return { content: [{ type: "text", text: `Navigated to ${params.url}` }], details: { result: result as Json } };
654
+ const result = await bridge.send("page.navigate", withBackground(params), (params.timeoutMs ?? 15_000) + 2_000);
655
+ return { content: [{ type: "text", text: `Navigated to ${params.url}${params.initScript ? " (with initScript)" : ""}` }], details: { result: result as Json } };
577
656
  },
578
657
  });
579
658
 
@@ -586,7 +665,6 @@ If chrome_* tools time out, ask the user to run /chrome-onboard, then load the b
586
665
  parameters: Type.Object({
587
666
  expression: Type.String(),
588
667
  awaitPromise: Type.Optional(Type.Boolean({ default: true })),
589
- returnByValue: Type.Optional(Type.Boolean({ default: true })),
590
668
  targetId: Type.Optional(Type.String()),
591
669
  urlIncludes: Type.Optional(Type.String()),
592
670
  titleIncludes: Type.Optional(Type.String()),
@@ -598,7 +676,12 @@ If chrome_* tools time out, ask the user to run /chrome-onboard, then load the b
598
676
  }),
599
677
  async execute(_id, params): Promise<ToolTextResult> {
600
678
  const value = await bridge.send("page.evaluate", withBackground(params), DEFAULT_TIMEOUT_MS);
601
- return { content: [{ type: "text", text: truncateText(typeof value === "string" ? value : safeJson(value)) }], details: { value: value as Json } };
679
+ const text = value === undefined
680
+ ? "undefined"
681
+ : typeof value === "string"
682
+ ? value
683
+ : safeJson(value) ?? "undefined";
684
+ return { content: [{ type: "text", text: truncateText(text) }], details: { value: value as Json } };
602
685
  },
603
686
  });
604
687
 
@@ -606,12 +689,15 @@ If chrome_* tools time out, ask the user to run /chrome-onboard, then load the b
606
689
  name: "chrome_click",
607
690
  label: "Chrome Click",
608
691
  description:
609
- "Click a CSS selector or viewport coordinate in an existing Chrome tab through the companion extension. The click is dispatched as a synthetic DOM event; by default Chrome is focused so the user can watch, pass background=true to click silently.",
610
- promptSnippet: "Click page elements in Chrome by selector or viewport coordinate.",
692
+ "Click a snapshot uid, CSS selector, or viewport coordinate in an existing Chrome tab through the companion extension. The click is dispatched as a synthetic DOM event; by default Chrome is focused so the user can watch, pass background=true to click silently. Pass includeSnapshot=true to return a fresh snapshot after the click.",
693
+ promptSnippet: "Click page elements in Chrome by snapshot uid, selector, or viewport coordinate.",
611
694
  parameters: Type.Object({
612
- selector: Type.Optional(Type.String({ description: "CSS selector to click. Prefer selectors from chrome_snapshot." })),
613
- x: Type.Optional(Type.Number({ description: "Viewport x coordinate if selector is omitted." })),
614
- y: Type.Optional(Type.Number({ description: "Viewport y coordinate if selector is omitted." })),
695
+ uid: Type.Optional(Type.String({ description: "Stable element uid from chrome_snapshot. Prefer uid over selector after taking a snapshot." })),
696
+ selector: Type.Optional(Type.String({ description: "CSS selector to click. Prefer uid from chrome_snapshot when available." })),
697
+ x: Type.Optional(Type.Number({ description: "Viewport x coordinate if uid/selector is omitted." })),
698
+ y: Type.Optional(Type.Number({ description: "Viewport y coordinate if uid/selector is omitted." })),
699
+ includeSnapshot: Type.Optional(Type.Boolean({ description: "If true, include a fresh chrome_snapshot result after the click." })),
700
+ maxElements: Type.Optional(Type.Number({ default: MAX_ELEMENTS, description: "Max elements in the included snapshot." })),
615
701
  targetId: Type.Optional(Type.String()),
616
702
  urlIncludes: Type.Optional(Type.String()),
617
703
  titleIncludes: Type.Optional(Type.String()),
@@ -622,8 +708,12 @@ If chrome_* tools time out, ask the user to run /chrome-onboard, then load the b
622
708
  port: Type.Optional(Type.Number()),
623
709
  }),
624
710
  async execute(_id, params): Promise<ToolTextResult> {
625
- const result = await bridge.send("page.click", withBackground(params), DEFAULT_TIMEOUT_MS);
626
- return { content: [{ type: "text", text: `Clicked ${params.selector ?? `${params.x},${params.y}`}` }], details: { result: result as Json } };
711
+ const raw = await bridge.send("page.click", withBackground(params), DEFAULT_TIMEOUT_MS);
712
+ const result = (params.includeSnapshot ? (raw as { result: unknown }).result : raw) as Json;
713
+ const summary = summarizeActionResult(result);
714
+ const target = params.uid ?? params.selector ?? `${params.x},${params.y}`;
715
+ const text = summary ? `Clicked ${target} — ${summary}` : `Clicked ${target}`;
716
+ return { content: [{ type: "text", text }], details: { result: raw as Json } };
627
717
  },
628
718
  });
629
719
 
@@ -631,11 +721,14 @@ If chrome_* tools time out, ask the user to run /chrome-onboard, then load the b
631
721
  name: "chrome_type",
632
722
  label: "Chrome Type",
633
723
  description:
634
- "Focus an optional CSS selector, then type text into an existing Chrome tab through the companion extension. By default focuses Chrome and activates the tab so the user can watch; pass background=true to type silently.",
635
- promptSnippet: "Type text into Chrome, optionally focusing a selector first.",
724
+ "Focus an optional snapshot uid or CSS selector, then type text into an existing Chrome tab through the companion extension. By default focuses Chrome and activates the tab so the user can watch; pass background=true to type silently. Pass includeSnapshot=true to return a fresh snapshot after typing.",
725
+ promptSnippet: "Type text into Chrome, optionally focusing a snapshot uid or selector first.",
636
726
  parameters: Type.Object({
637
727
  text: Type.String(),
728
+ uid: Type.Optional(Type.String({ description: "Stable element uid from chrome_snapshot." })),
638
729
  selector: Type.Optional(Type.String({ description: "CSS selector to focus before typing." })),
730
+ includeSnapshot: Type.Optional(Type.Boolean({ description: "If true, include a fresh chrome_snapshot result after typing." })),
731
+ maxElements: Type.Optional(Type.Number({ default: MAX_ELEMENTS, description: "Max elements in the included snapshot." })),
639
732
  pressEnter: Type.Optional(Type.Boolean()),
640
733
  targetId: Type.Optional(Type.String()),
641
734
  urlIncludes: Type.Optional(Type.String()),
@@ -647,8 +740,44 @@ If chrome_* tools time out, ask the user to run /chrome-onboard, then load the b
647
740
  port: Type.Optional(Type.Number()),
648
741
  }),
649
742
  async execute(_id, params): Promise<ToolTextResult> {
650
- const result = await bridge.send("page.type", withBackground(params), DEFAULT_TIMEOUT_MS);
651
- return { content: [{ type: "text", text: `Typed ${params.text.length} character(s)${params.selector ? ` into ${params.selector}` : ""}.` }], details: { result: result as Json } };
743
+ const raw = await bridge.send("page.type", withBackground(params), DEFAULT_TIMEOUT_MS);
744
+ const result = (params.includeSnapshot ? (raw as { result: unknown }).result : raw) as Json;
745
+ const summary = summarizeActionResult(result);
746
+ const into = params.uid || params.selector ? ` into ${params.uid ?? params.selector}` : "";
747
+ const base = `Typed ${params.text.length} character(s)${into}.`;
748
+ return { content: [{ type: "text", text: summary ? `${base} (${summary})` : base }], details: { result: raw as Json } };
749
+ },
750
+ });
751
+
752
+ pi.registerTool({
753
+ name: "chrome_fill",
754
+ label: "Chrome Fill",
755
+ description:
756
+ "Set the full value of a text input, textarea, or contenteditable element using framework-aware native value setters and input/change events. Accepts a snapshot uid or CSS selector. Pass includeSnapshot=true to verify after filling.",
757
+ promptSnippet: "Fill a Chrome form field by snapshot uid or selector, optionally returning a fresh snapshot.",
758
+ parameters: Type.Object({
759
+ text: Type.String(),
760
+ uid: Type.Optional(Type.String({ description: "Stable element uid from chrome_snapshot." })),
761
+ selector: Type.Optional(Type.String({ description: "CSS selector to fill if uid is omitted." })),
762
+ submit: Type.Optional(Type.Boolean({ description: "If true, press Enter after filling." })),
763
+ includeSnapshot: Type.Optional(Type.Boolean({ description: "If true, include a fresh chrome_snapshot result after filling." })),
764
+ maxElements: Type.Optional(Type.Number({ default: MAX_ELEMENTS, description: "Max elements in the included snapshot." })),
765
+ targetId: Type.Optional(Type.String()),
766
+ urlIncludes: Type.Optional(Type.String()),
767
+ titleIncludes: Type.Optional(Type.String()),
768
+ background: Type.Optional(
769
+ Type.Boolean({ description: "If true, fill silently without focusing Chrome. Default false." }),
770
+ ),
771
+ host: Type.Optional(Type.String()),
772
+ port: Type.Optional(Type.Number()),
773
+ }),
774
+ async execute(_id, params): Promise<ToolTextResult> {
775
+ const raw = await bridge.send("page.fill", withBackground(params), DEFAULT_TIMEOUT_MS);
776
+ const result = (params.includeSnapshot ? (raw as { result: unknown }).result : raw) as Json;
777
+ const summary = summarizeActionResult(result);
778
+ const into = params.uid || params.selector ? ` into ${params.uid ?? params.selector}` : "";
779
+ const base = `Filled ${params.text.length} character(s)${into}.`;
780
+ return { content: [{ type: "text", text: summary ? `${base} (${summary})` : base }], details: { result: raw as Json } };
652
781
  },
653
782
  });
654
783
 
@@ -656,10 +785,12 @@ If chrome_* tools time out, ask the user to run /chrome-onboard, then load the b
656
785
  name: "chrome_key",
657
786
  label: "Chrome Key",
658
787
  description:
659
- "Send a keyboard key to an existing Chrome tab (Enter, Escape, Tab, Backspace, Delete, ArrowUp/Down/Left/Right, or one character). By default focuses Chrome and activates the tab so the user can watch; pass background=true to send the key silently.",
788
+ "Send a keyboard key to an existing Chrome tab (Enter, Escape, Tab, Backspace, Delete, ArrowUp/Down/Left/Right, or one character). By default focuses Chrome and activates the tab so the user can watch; pass background=true to send the key silently. Pass includeSnapshot=true to verify after the keypress.",
660
789
  promptSnippet: "Press keys in Chrome through the companion extension.",
661
790
  parameters: Type.Object({
662
791
  key: Type.String(),
792
+ includeSnapshot: Type.Optional(Type.Boolean({ description: "If true, include a fresh chrome_snapshot result after the keypress." })),
793
+ maxElements: Type.Optional(Type.Number({ default: MAX_ELEMENTS, description: "Max elements in the included snapshot." })),
663
794
  targetId: Type.Optional(Type.String()),
664
795
  urlIncludes: Type.Optional(Type.String()),
665
796
  titleIncludes: Type.Optional(Type.String()),
@@ -670,8 +801,11 @@ If chrome_* tools time out, ask the user to run /chrome-onboard, then load the b
670
801
  port: Type.Optional(Type.Number()),
671
802
  }),
672
803
  async execute(_id, params): Promise<ToolTextResult> {
673
- const result = await bridge.send("page.key", withBackground(params), DEFAULT_TIMEOUT_MS);
674
- return { content: [{ type: "text", text: `Pressed ${params.key}.` }], details: { result: result as Json } };
804
+ const raw = await bridge.send("page.key", withBackground(params), DEFAULT_TIMEOUT_MS);
805
+ const result = (params.includeSnapshot ? (raw as { result: unknown }).result : raw) as Json;
806
+ const summary = summarizeActionResult(result);
807
+ const base = `Pressed ${params.key}.`;
808
+ return { content: [{ type: "text", text: summary ? `${base} (${summary})` : base }], details: { result: raw as Json } };
675
809
  },
676
810
  });
677
811
 
@@ -697,6 +831,69 @@ If chrome_* tools time out, ask the user to run /chrome-onboard, then load the b
697
831
  },
698
832
  });
699
833
 
834
+ pi.registerTool({
835
+ name: "chrome_list_console_messages",
836
+ label: "Chrome Console Messages",
837
+ description:
838
+ "List console messages captured in the page by the companion extension. Capture starts after any chrome_snapshot, chrome_evaluate, chrome_list_console_messages, or chrome_list_network_requests call installs page instrumentation.",
839
+ promptSnippet: "List captured console messages from the active Chrome page.",
840
+ parameters: Type.Object({
841
+ clear: Type.Optional(Type.Boolean({ description: "Clear the captured console log after reading." })),
842
+ targetId: Type.Optional(Type.String()),
843
+ urlIncludes: Type.Optional(Type.String()),
844
+ titleIncludes: Type.Optional(Type.String()),
845
+ background: Type.Optional(Type.Boolean({ description: "If true, run silently without focusing Chrome. Default false." })),
846
+ host: Type.Optional(Type.String()),
847
+ port: Type.Optional(Type.Number()),
848
+ }),
849
+ async execute(_id, params): Promise<ToolTextResult> {
850
+ const result = await bridge.send("page.console.list", withBackground(params), DEFAULT_TIMEOUT_MS);
851
+ return { content: [{ type: "text", text: truncateText(safeJson(result)) }], details: { result: result as Json } };
852
+ },
853
+ });
854
+
855
+ pi.registerTool({
856
+ name: "chrome_list_network_requests",
857
+ label: "Chrome Network Requests",
858
+ description:
859
+ "List fetch/XMLHttpRequest activity captured in the page by the companion extension. Capture starts after instrumentation is installed by snapshot/evaluate/network/console tools; browser document/static asset requests are not captured. Use includePreservedRequests=true to keep requests from earlier same-tab navigations that were captured before navigation.",
860
+ promptSnippet: "List captured XHR/fetch requests from the active Chrome page before doing DOM-heavy debugging.",
861
+ parameters: Type.Object({
862
+ includePreservedRequests: Type.Optional(Type.Boolean({ description: "Include captured requests from earlier locations in the same tab/session." })),
863
+ clear: Type.Optional(Type.Boolean({ description: "Clear the captured request log after reading." })),
864
+ targetId: Type.Optional(Type.String()),
865
+ urlIncludes: Type.Optional(Type.String()),
866
+ titleIncludes: Type.Optional(Type.String()),
867
+ background: Type.Optional(Type.Boolean({ description: "If true, run silently without focusing Chrome. Default false." })),
868
+ host: Type.Optional(Type.String()),
869
+ port: Type.Optional(Type.Number()),
870
+ }),
871
+ async execute(_id, params): Promise<ToolTextResult> {
872
+ const result = await bridge.send("page.network.list", withBackground(params), DEFAULT_TIMEOUT_MS);
873
+ return { content: [{ type: "text", text: truncateText(safeJson(result)) }], details: { result: result as Json } };
874
+ },
875
+ });
876
+
877
+ pi.registerTool({
878
+ name: "chrome_get_network_request",
879
+ label: "Chrome Network Request",
880
+ description: "Retrieve one captured fetch/XMLHttpRequest entry, including response body when available, by requestId from chrome_list_network_requests.",
881
+ promptSnippet: "Fetch captured request details and response body by requestId.",
882
+ parameters: Type.Object({
883
+ requestId: Type.String({ description: "Request id returned by chrome_list_network_requests." }),
884
+ targetId: Type.Optional(Type.String()),
885
+ urlIncludes: Type.Optional(Type.String()),
886
+ titleIncludes: Type.Optional(Type.String()),
887
+ background: Type.Optional(Type.Boolean({ description: "If true, run silently without focusing Chrome. Default false." })),
888
+ host: Type.Optional(Type.String()),
889
+ port: Type.Optional(Type.Number()),
890
+ }),
891
+ async execute(_id, params): Promise<ToolTextResult> {
892
+ const result = await bridge.send("page.network.get", withBackground(params), DEFAULT_TIMEOUT_MS);
893
+ return { content: [{ type: "text", text: truncateText(safeJson(result)) }], details: { result: result as Json } };
894
+ },
895
+ });
896
+
700
897
  pi.registerTool({
701
898
  name: "chrome_screenshot",
702
899
  label: "Chrome Screenshot",
@@ -722,11 +919,113 @@ If chrome_* tools time out, ask the user to run /chrome-onboard, then load the b
722
919
  const cwd = workspaceCwd(ctx);
723
920
  const defaultPath = join(cwd, ".pi", "chrome-screenshots", `${new Date().toISOString().replace(/[:.]/g, "-")}.${format}`);
724
921
  const outputPath = params.path ? resolve(cwd, params.path) : defaultPath;
725
- const result = (await bridge.send("page.screenshot", withBackground(params), DEFAULT_TIMEOUT_MS)) as { dataUrl: string; tab?: unknown };
726
- const base64 = result.dataUrl.replace(/^data:image\/(?:png|jpeg);base64,/, "");
922
+ const result = (await bridge.send("page.screenshot", withBackground(params), params.fullPage ? 120_000 : DEFAULT_TIMEOUT_MS)) as {
923
+ dataUrl?: string;
924
+ tab?: unknown;
925
+ fullPage?: boolean;
926
+ dimensions?: { width: number; height: number; viewportHeight: number; dpr: number };
927
+ tiles?: Array<{ y: number; dataUrl: string }>;
928
+ };
727
929
  await mkdir(dirname(outputPath), { recursive: true });
930
+ if (result.fullPage && result.tiles && result.dimensions) {
931
+ // Stitch via PNG if format is png; otherwise we fall back to writing tile files and a
932
+ // manifest. We avoid pulling in an image library by writing each tile next to the main
933
+ // path with a -tileN suffix and a stitched.json manifest.
934
+ const { width, height, viewportHeight, dpr } = result.dimensions;
935
+ const manifest: Array<{ path: string; y: number }> = [];
936
+ for (let i = 0; i < result.tiles.length; i++) {
937
+ const tile = result.tiles[i];
938
+ const tilePath = outputPath.replace(/(\.[^.]+)$/, `-tile${i}$1`);
939
+ const base64 = tile.dataUrl.replace(/^data:image\/(?:png|jpeg);base64,/, "");
940
+ await writeFile(tilePath, Buffer.from(base64, "base64"));
941
+ manifest.push({ path: tilePath, y: tile.y });
942
+ }
943
+ await writeFile(outputPath + ".json", JSON.stringify({ width, height, viewportHeight, dpr, tiles: manifest }, null, 2));
944
+ return {
945
+ content: [{ type: "text", text: `Saved ${result.tiles.length} full-page tile(s) for ${width}×${height}px page. Manifest: ${outputPath}.json` }],
946
+ details: { manifest: outputPath + ".json", tiles: manifest, dimensions: result.dimensions, tab: result.tab } as unknown as Record<string, unknown>,
947
+ };
948
+ }
949
+ if (!result.dataUrl) throw new Error("Screenshot returned no dataUrl");
950
+ const base64 = result.dataUrl.replace(/^data:image\/(?:png|jpeg);base64,/, "");
728
951
  await writeFile(outputPath, Buffer.from(base64, "base64"));
729
952
  return { content: [{ type: "text", text: `Saved Chrome screenshot to ${outputPath}` }], details: { path: outputPath, format, tab: result.tab } };
730
953
  },
731
954
  });
955
+
956
+ pi.registerTool({
957
+ name: "chrome_hover",
958
+ label: "Chrome Hover",
959
+ description: "Hover over an element (synthetic pointerover/mouseover/pointermove) by uid, selector, or x/y. Triggers CSS :hover state and any JS hover handlers; isTrusted is false.",
960
+ promptSnippet: "Hover a Chrome element to trigger :hover / mouseover handlers.",
961
+ parameters: Type.Object({
962
+ uid: Type.Optional(Type.String()),
963
+ selector: Type.Optional(Type.String()),
964
+ x: Type.Optional(Type.Number()),
965
+ y: Type.Optional(Type.Number()),
966
+ targetId: Type.Optional(Type.String()),
967
+ urlIncludes: Type.Optional(Type.String()),
968
+ titleIncludes: Type.Optional(Type.String()),
969
+ background: Type.Optional(Type.Boolean()),
970
+ }),
971
+ async execute(_id, params): Promise<ToolTextResult> {
972
+ const result = await bridge.send("page.hover", withBackground(params), DEFAULT_TIMEOUT_MS);
973
+ return { content: [{ type: "text", text: `Hovered ${params.uid ?? params.selector ?? `${params.x},${params.y}`}` }], details: { result: result as Json } };
974
+ },
975
+ });
976
+
977
+ pi.registerTool({
978
+ name: "chrome_drag",
979
+ label: "Chrome Drag",
980
+ description: "Synthetic pointer drag from one uid/selector/point to another. Dispatches pointerdown → multi-step pointermove → pointerup. Note: HTML5 DataTransfer is NOT synthesized, so native HTML5 drag-and-drop targets may not respond.",
981
+ promptSnippet: "Drag a Chrome element from one point to another.",
982
+ parameters: Type.Object({
983
+ fromUid: Type.Optional(Type.String()),
984
+ fromSelector: Type.Optional(Type.String()),
985
+ fromX: Type.Optional(Type.Number()),
986
+ fromY: Type.Optional(Type.Number()),
987
+ toUid: Type.Optional(Type.String()),
988
+ toSelector: Type.Optional(Type.String()),
989
+ toX: Type.Optional(Type.Number()),
990
+ toY: Type.Optional(Type.Number()),
991
+ steps: Type.Optional(Type.Number({ default: 12 })),
992
+ targetId: Type.Optional(Type.String()),
993
+ urlIncludes: Type.Optional(Type.String()),
994
+ titleIncludes: Type.Optional(Type.String()),
995
+ background: Type.Optional(Type.Boolean()),
996
+ }),
997
+ async execute(_id, params): Promise<ToolTextResult> {
998
+ const result = await bridge.send("page.drag", withBackground(params), DEFAULT_TIMEOUT_MS);
999
+ return { content: [{ type: "text", text: `Dragged from ${params.fromUid ?? params.fromSelector} to ${params.toUid ?? params.toSelector}` }], details: { result: result as Json } };
1000
+ },
1001
+ });
1002
+
1003
+ pi.registerTool({
1004
+ name: "chrome_upload_file",
1005
+ label: "Chrome Upload File",
1006
+ description: "Programmatically set the files of an <input type=file> element from local file paths. Uses DataTransfer to populate input.files and dispatches input+change events. Does NOT open the native file picker; works with React/Vue/Angular controlled inputs.",
1007
+ promptSnippet: "Attach local files to a Chrome <input type=file> without opening the native file picker.",
1008
+ parameters: Type.Object({
1009
+ uid: Type.Optional(Type.String()),
1010
+ selector: Type.Optional(Type.String()),
1011
+ paths: Type.Array(Type.String(), { description: "Local absolute file paths to upload." }),
1012
+ targetId: Type.Optional(Type.String()),
1013
+ urlIncludes: Type.Optional(Type.String()),
1014
+ titleIncludes: Type.Optional(Type.String()),
1015
+ background: Type.Optional(Type.Boolean()),
1016
+ }),
1017
+ async execute(_id, params, _signal, _onUpdate, ctx): Promise<ToolTextResult> {
1018
+ const { readFile } = await import("node:fs/promises");
1019
+ const { basename } = await import("node:path");
1020
+ const cwd = workspaceCwd(ctx);
1021
+ const files: Array<{ name: string; type: string; base64: string }> = [];
1022
+ for (const p of params.paths) {
1023
+ const abs = resolve(cwd, p);
1024
+ const buf = await readFile(abs);
1025
+ files.push({ name: basename(abs), type: "application/octet-stream", base64: buf.toString("base64") });
1026
+ }
1027
+ const result = await bridge.send("page.upload", withBackground({ ...params, files }), DEFAULT_TIMEOUT_MS);
1028
+ return { content: [{ type: "text", text: `Uploaded ${files.length} file(s) to ${params.uid ?? params.selector}` }], details: { result: result as Json } };
1029
+ },
1030
+ });
732
1031
  }
package/package.json CHANGED
@@ -1,31 +1,31 @@
1
1
  {
2
- "name": "pi-chrome",
3
- "version": "0.6.1",
4
- "description": "Drive your existing logged-in Chrome from Pi \u2014 no re-login, no throwaway profile, watch the agent work in real time (or toggle quiet background mode).",
5
- "keywords": [
6
- "pi-package",
7
- "pi-extension",
8
- "chrome",
9
- "browser",
10
- "automation",
11
- "authenticated-session",
12
- "real-profile",
13
- "web-debugging"
14
- ],
15
- "license": "MIT",
16
- "type": "commonjs",
17
- "files": [
18
- "extensions",
19
- "README.md"
20
- ],
21
- "pi": {
22
- "extensions": [
23
- "./extensions/chrome-profile-bridge/index.ts"
24
- ]
25
- },
26
- "peerDependencies": {
27
- "@earendil-works/pi-ai": "*",
28
- "@earendil-works/pi-coding-agent": "*",
29
- "typebox": "*"
30
- }
2
+ "name": "pi-chrome",
3
+ "version": "0.8.0",
4
+ "description": "Drive your existing logged-in Chrome from Pi no re-login, no throwaway profile, watch the agent work in real time (or toggle quiet background mode).",
5
+ "keywords": [
6
+ "pi-package",
7
+ "pi-extension",
8
+ "chrome",
9
+ "browser",
10
+ "automation",
11
+ "authenticated-session",
12
+ "real-profile",
13
+ "web-debugging"
14
+ ],
15
+ "license": "MIT",
16
+ "type": "commonjs",
17
+ "files": [
18
+ "extensions",
19
+ "README.md"
20
+ ],
21
+ "pi": {
22
+ "extensions": [
23
+ "./extensions/chrome-profile-bridge/index.ts"
24
+ ]
25
+ },
26
+ "peerDependencies": {
27
+ "@earendil-works/pi-ai": "*",
28
+ "@earendil-works/pi-coding-agent": "*",
29
+ "typebox": "*"
30
+ }
31
31
  }