pi-chrome 0.10.1 → 0.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -69,7 +69,7 @@ pi-chrome v<version>
69
69
 
70
70
  By default, `chrome_*` clicks and keystrokes are **synthetic** DOM events (`event.isTrusted === false`). They drive React/Vue/Angular state correctly but **do not** satisfy Chrome's user-activation gates: clipboard write, fullscreen, file picker, and autoplay all need a real user gesture.
71
71
 
72
- pi-chrome can optionally route input through `chrome.debugger` (CDP `Input.dispatchMouseEvent` / `Input.dispatchKeyEvent`) so each event arrives as `isTrusted=true`, satisfies user-activation, and bypasses site bot-detection that filters synthetic events. The tradeoff: Chrome pins a yellow *"Pi Existing Chrome Profile Bridge started debugging this browser"* banner to the top of any debugged tab.
72
+ pi-chrome can optionally route input through `chrome.debugger` (CDP `Input.dispatchMouseEvent` / `Input.dispatchKeyEvent`) so each event arrives as `isTrusted=true`, satisfies user-activation, and bypasses site bot-detection that filters synthetic events. The tradeoff: Chrome pins a yellow *"Pi Chrome Connector started debugging this browser"* banner to the top of any debugged tab.
73
73
 
74
74
  Usage:
75
75
 
@@ -1,14 +1,14 @@
1
1
  {
2
2
  "manifest_version": 3,
3
- "name": "Pi Existing Chrome Profile Bridge",
4
- "version": "0.10.1",
5
- "description": "Lets Pi control tabs in this existing Chrome profile via a local bridge at 127.0.0.1.",
3
+ "name": "Pi Chrome Connector",
4
+ "version": "0.11.0",
5
+ "description": "Lets Pi control tabs in Chrome via a local connector at 127.0.0.1.",
6
6
  "permissions": ["tabs", "scripting", "storage", "activeTab", "alarms", "webNavigation", "debugger"],
7
7
  "host_permissions": ["<all_urls>", "http://127.0.0.1:17318/*"],
8
8
  "background": {
9
9
  "service_worker": "service_worker.js"
10
10
  },
11
11
  "action": {
12
- "default_title": "Pi Chrome Bridge"
12
+ "default_title": "Pi Chrome Connector"
13
13
  }
14
14
  }
@@ -1,12 +1,12 @@
1
1
  const BRIDGE_URL = "http://127.0.0.1:17318";
2
- const CLIENT_NAME = `Pi Chrome Bridge ${chrome.runtime.id}`;
2
+ const CLIENT_NAME = `Pi Chrome Connector ${chrome.runtime.id}`;
3
3
  const POLL_ERROR_BACKOFF_MS = 2000;
4
4
  let polling = false;
5
5
 
6
6
  // =================== Trusted-input (CDP) layer ===================
7
7
  // Tracks which tabs we have attached chrome.debugger to, plus session-level mode.
8
8
  const attachedTabs = new Map(); // tabId -> { detachAt: number, pointer: {x,y} }
9
- let TRUSTED_MODE = "off"; // "off" | "on" | "auto"
9
+ let TRUSTED_MODE = "auto"; // "off" | "on" | "auto" (default: smart retry only)
10
10
  const TRUSTED_IDLE_DETACH_MS = 15_000;
11
11
  const CDP_VERSION = "1.3";
12
12
 
@@ -35,6 +35,31 @@ function trustedStatus() {
35
35
  };
36
36
  }
37
37
 
38
+ // Auto-upgrade: if synthetic result carries suggestTrusted=true, the bridge mode is "auto"
39
+ // (default) or "on", and the caller didn't explicitly opt out, retry once with trusted CDP
40
+ // path. Surfaces both results so callers can see what happened.
41
+ async function maybeUpgradeToTrusted(kind, params, syntheticResult, trustedFn) {
42
+ if (!syntheticResult || !syntheticResult.suggestTrusted) return syntheticResult;
43
+ if (params && params.trusted === false) return syntheticResult;
44
+ if (TRUSTED_MODE === "off") return syntheticResult;
45
+ if (!chrome.debugger) return syntheticResult;
46
+ try {
47
+ const trustedResult = await trustedFn();
48
+ return {
49
+ ...trustedResult,
50
+ autoRetried: true,
51
+ autoRetryReason: syntheticResult.suggestReason || `${kind} produced no mutation`,
52
+ syntheticAttempt: { pageMutated: syntheticResult.pageMutated, suggestReason: syntheticResult.suggestReason },
53
+ };
54
+ } catch (error) {
55
+ return {
56
+ ...syntheticResult,
57
+ autoRetryAttempted: true,
58
+ autoRetryError: error?.message || String(error),
59
+ };
60
+ }
61
+ }
62
+
38
63
  async function attachDebugger(tabId) {
39
64
  if (!chrome.debugger) throw new Error("chrome.debugger API unavailable; reload the extension to grant the new permission");
40
65
  if (attachedTabs.has(tabId)) {
@@ -485,9 +510,11 @@ async function dispatch(action, params) {
485
510
  ]);
486
511
  case "page.evaluate":
487
512
  return evaluateInTab(params);
488
- case "page.click":
513
+ case "page.click": {
489
514
  if (await wantsTrusted(params)) return trustedClick(params);
490
- return executeActionInTab(params, clickPage, [params.selector ?? null, params.uid ?? null, params.x ?? null, params.y ?? null]);
515
+ const synth = await executeActionInTab(params, clickPage, [params.selector ?? null, params.uid ?? null, params.x ?? null, params.y ?? null]);
516
+ return await maybeUpgradeToTrusted("click", params, synth, () => trustedClick(params));
517
+ }
491
518
  case "page.hover":
492
519
  if (await wantsTrusted(params)) return trustedHover(params);
493
520
  return executeActionInTab(params, hoverPage, [params.selector ?? null, params.uid ?? null, params.x ?? null, params.y ?? null]);
@@ -496,9 +523,11 @@ async function dispatch(action, params) {
496
523
  return executeActionInTab(params, dragPage, [params.fromUid ?? null, params.fromSelector ?? null, params.fromX ?? null, params.fromY ?? null, params.toUid ?? null, params.toSelector ?? null, params.toX ?? null, params.toY ?? null, params.steps ?? 12]);
497
524
  case "page.upload":
498
525
  return executeActionInTab(params, uploadFiles, [params.selector ?? null, params.uid ?? null, params.files || []]);
499
- case "page.type":
526
+ case "page.type": {
500
527
  if (await wantsTrusted(params)) return trustedType(params);
501
- return executeActionInTab(params, typeIntoPage, [params.selector ?? null, params.uid ?? null, params.text || "", Boolean(params.pressEnter)]);
528
+ const synth = await executeActionInTab(params, typeIntoPage, [params.selector ?? null, params.uid ?? null, params.text || "", Boolean(params.pressEnter)]);
529
+ return await maybeUpgradeToTrusted("type", params, synth, () => trustedType(params));
530
+ }
502
531
  case "page.fill":
503
532
  if (await wantsTrusted(params)) return trustedFill(params);
504
533
  return executeActionInTab(params, fillPage, [params.selector ?? null, params.uid ?? null, params.text || "", params.submit === true]);
@@ -1262,23 +1291,46 @@ async function clickPage(selector, uid, x, y) {
1262
1291
  // Heuristic: if the clicked thing looks like a media play affordance and the page has paused
1263
1292
  // audio/video, the synthetic click may not unlock autoplay. Surface a warning.
1264
1293
  let autoplayHint;
1265
- const label = (point.element.getAttribute("aria-label") || point.element.textContent || "").toLowerCase();
1266
- if (/^(play|start|begin|next|continue|unmute)/.test(label.trim())) {
1294
+ const labelRaw = (point.element.getAttribute("aria-label") || point.element.textContent || "").trim();
1295
+ const label = labelRaw.toLowerCase();
1296
+ if (/^(play|start|begin|next|continue|unmute)/.test(label)) {
1267
1297
  const idleMedia = Array.from(document.querySelectorAll("audio,video")).some((m) => m.paused);
1268
1298
  if (idleMedia) autoplayHint = "This element looks like a media affordance and the page has paused media. Synthetic clicks do not satisfy user-activation gates; audio/video may not start.";
1269
1299
  }
1300
+ const pageMutated = pageHash() !== before;
1301
+ // Smart-auto retry hint: only set when synthetic produced no observable change AND the
1302
+ // element looks gated, OR the page just emitted a user-activation rejection. The dispatcher
1303
+ // uses this to decide whether to retry with trusted mode.
1304
+ let suggestTrusted = false;
1305
+ let suggestReason;
1306
+ if (!pageMutated) {
1307
+ if (autoplayHint) { suggestTrusted = true; suggestReason = "play/media affordance + idle media"; }
1308
+ else if (/copy(\s|$)|paste|share|download|fullscreen|sign in with|continue with|allow|enable/i.test(label)) {
1309
+ suggestTrusted = true; suggestReason = `label '${labelRaw.slice(0, 40)}' looks gated`;
1310
+ } else {
1311
+ // Inspect recent console errors for activation-gate rejections.
1312
+ const recent = (state.console || []).slice(-8);
1313
+ const hit = recent.find((e) => /NotAllowedError|Document is not focused|requires transient activation|gesture is required/.test(
1314
+ (e.args || []).map((a) => typeof a === "string" ? a : (a && a.message) || JSON.stringify(a)).join(" ")
1315
+ ));
1316
+ if (hit) { suggestTrusted = true; suggestReason = "recent console error indicates user-activation gate"; }
1317
+ }
1318
+ }
1270
1319
  return {
1271
1320
  x: point.x,
1272
1321
  y: point.y,
1273
1322
  selector,
1274
1323
  uid,
1275
1324
  tag: point.element.tagName,
1325
+ label: labelRaw.slice(0, 80) || undefined,
1276
1326
  isTrusted: false,
1277
1327
  defaultPrevented,
1278
1328
  elementVisible: visible,
1279
1329
  occludedBy: occluded || undefined,
1280
- pageMutated: pageHash() !== before,
1330
+ pageMutated,
1281
1331
  autoplayHint,
1332
+ suggestTrusted: suggestTrusted || undefined,
1333
+ suggestReason,
1282
1334
  };
1283
1335
  }
1284
1336
 
@@ -1526,15 +1578,27 @@ async function typeIntoPage(selector, uid, text, pressEnter) {
1526
1578
  const before = pageHash();
1527
1579
  let element = elementBySelectorOrUid(selector, uid) || document.activeElement;
1528
1580
  if (!element) throw new Error(selector || uid ? `No element for ${selector || uid}` : "No active element");
1581
+ const initialValue = "value" in element ? element.value : (element.isContentEditable ? element.textContent : null);
1529
1582
  element.focus();
1530
1583
  if (!(element.isContentEditable || "value" in element)) throw new Error("Focused element is not text-editable");
1531
1584
  for (const ch of Array.from(text)) await typeCharacter(element, ch);
1532
1585
  if (pressEnter) pressKeyInPage("Enter");
1586
+ const finalValue = "value" in element ? element.value : element.textContent;
1587
+ const valueMatches = "value" in element ? element.value.includes(text) : (element.textContent || "").includes(text);
1588
+ const pageMutated = pageHash() !== before;
1589
+ // Smart-auto retry hint when typing didn't land at all (e.g., editor blocks synthetic input).
1590
+ let suggestTrusted = false, suggestReason;
1591
+ if (text.length > 0 && initialValue === finalValue) {
1592
+ suggestTrusted = true;
1593
+ suggestReason = "value did not change — editor likely rejects synthetic input";
1594
+ }
1533
1595
  return {
1534
1596
  selector, uid, length: text.length, pressEnter,
1535
1597
  isTrusted: false,
1536
- valueMatches: "value" in element ? element.value.includes(text) : undefined,
1537
- pageMutated: pageHash() !== before,
1598
+ valueMatches,
1599
+ pageMutated,
1600
+ suggestTrusted: suggestTrusted || undefined,
1601
+ suggestReason,
1538
1602
  };
1539
1603
  }
1540
1604
 
@@ -471,7 +471,7 @@ Usage rules:
471
471
  lines.push(
472
472
  `✗ EXTENSION VERSION MISMATCH: companion extension is v${version.extensionVersion}, but pi-chrome is v${PI_CHROME_VERSION}.`,
473
473
  ` All chrome_* tools will run with the OLD extension code until this is fixed.`,
474
- ` Fix: open chrome://extensions and click reload on "Pi Existing Chrome Profile Bridge".`,
474
+ ` Fix: open chrome://extensions and click reload on "Pi Chrome Connector".`,
475
475
  ` (Future version drifts will self-heal: the extension now polls pi-chrome's expected version and reloads itself.)`,
476
476
  );
477
477
  } else {
@@ -518,7 +518,14 @@ Usage rules:
518
518
  permissionGranted?: boolean;
519
519
  };
520
520
  if (status.permissionGranted) {
521
- lines.push(`✓ Trusted-input mode available via chrome.debugger (current: ${status.mode ?? "off"}${status.attachedTabs && status.attachedTabs.length ? `; attached to tab ${status.attachedTabs.join(",")}` : ""}). Pass trusted=true on chrome_click/type/etc, or run /chrome-trusted on, to satisfy isTrusted + user-activation gates.`);
521
+ const attached = status.attachedTabs && status.attachedTabs.length ? `; attached to tab ${status.attachedTabs.join(",")}` : "";
522
+ const note =
523
+ status.mode === "auto"
524
+ ? " — smart-retry enabled: synthetic input runs first; if a click/type produced no page change AND the target looks gated, the call is automatically re-run with trusted CDP (yellow debugger banner appears only for that retry)."
525
+ : status.mode === "on"
526
+ ? " — every chrome_* call goes through CDP; the yellow debugger banner is visible while attached."
527
+ : " — synthetic events only; pass trusted=true on chrome_click/type/etc, or switch to auto/on with /chrome-trusted, when isTrusted or user-activation gates matter.";
528
+ lines.push(`✓ Trusted-input mode available via chrome.debugger (current: ${status.mode ?? "off"}${attached}).${note}`);
522
529
  } else {
523
530
  lines.push(`⚠ chrome.debugger API unavailable. The extension is missing the "debugger" permission — reload the extension in chrome://extensions and accept the new permission prompt.`);
524
531
  }
@@ -546,27 +553,83 @@ Usage rules:
546
553
  return matches.length > 0 ? matches : null;
547
554
  },
548
555
  handler: async (args, ctx) => {
549
- const arg = (args || "").trim().toLowerCase();
550
- if (arg === "status" || arg === "") {
551
- try {
552
- const status = (await bridge.send("trusted.status", {}, 5_000)) as { mode: string; attachedTabs: number[]; permissionGranted: boolean };
553
- const attached = status.attachedTabs?.length ? ` (attached to tab ${status.attachedTabs.join(",")})` : "";
554
- const perm = status.permissionGranted ? "" : " chrome.debugger API unavailable; reload the extension and accept the new permission.";
555
- ctx.ui.notify(`Trusted-input mode: ${status.mode}${attached}${perm}`, "info");
556
- } catch (error) {
557
- ctx.ui.notify(`Failed to read trusted mode: ${(error as Error).message}`, "warning");
556
+ const rawArg = (args || "").trim().toLowerCase();
557
+
558
+ // Resolve current status once for both branches (interactive picker + direct args).
559
+ let status: { mode: string; attachedTabs: number[]; permissionGranted: boolean } | undefined;
560
+ try {
561
+ status = (await bridge.send("trusted.status", {}, 5_000)) as typeof status;
562
+ } catch (error) {
563
+ ctx.ui.notify(`Failed to read trusted mode: ${(error as Error).message}`, "warning");
564
+ return;
565
+ }
566
+ if (!status) return;
567
+
568
+ if (!status.permissionGranted) {
569
+ ctx.ui.notify(
570
+ "chrome.debugger API unavailable — the extension is missing the 'debugger' permission. Open chrome://extensions, reload 'Pi Chrome Connector', and accept the new permission prompt.",
571
+ "warning",
572
+ );
573
+ return;
574
+ }
575
+
576
+ const attached = status.attachedTabs?.length ? ` — currently attached to tab ${status.attachedTabs.join(",")}` : "";
577
+ const current = status.mode;
578
+
579
+ let target = rawArg;
580
+ if (target === "status") {
581
+ ctx.ui.notify(`Trusted-input mode: ${current}${attached}`, "info");
582
+ return;
583
+ }
584
+ if (!target) {
585
+ // Interactive picker. Show current mode + tradeoffs in each label.
586
+ const options = [
587
+ `auto${current === "auto" ? " (current)" : ""} — default; synthetic first, retry with CDP only when a call looks gated`,
588
+ `off${current === "off" ? " (current)" : ""} — synthetic DOM events only; never auto-retry`,
589
+ `on${current === "on" ? " (current)" : ""} — every chrome_* call goes through CDP (yellow debugger banner permanently visible)`,
590
+ `status — print current mode and any attached tabs\u2026`,
591
+ ];
592
+ const picked = await ctx.ui.select(
593
+ `Trusted-input mode (current: ${current}${attached})`,
594
+ options,
595
+ );
596
+ if (!picked) return; // cancelled
597
+ if (picked.startsWith("on")) target = "on";
598
+ else if (picked.startsWith("off")) target = "off";
599
+ else if (picked.startsWith("auto")) target = "auto";
600
+ else if (picked.startsWith("status")) {
601
+ ctx.ui.notify(`Trusted-input mode: ${current}${attached}`, "info");
602
+ return;
558
603
  }
604
+ }
605
+
606
+ if (!["on", "off", "auto"].includes(target)) {
607
+ ctx.ui.notify(`Unknown argument '${rawArg}'. Use: on | off | auto | status, or run /chrome-trusted with no args for a picker.`, "warning");
559
608
  return;
560
609
  }
561
- if (!["on", "off", "auto"].includes(arg)) {
562
- ctx.ui.notify(`Unknown argument '${arg}'. Use: on | off | auto | status`, "warning");
610
+
611
+ if (target === current) {
612
+ ctx.ui.notify(`Trusted-input mode already ${current}.`, "info");
563
613
  return;
564
614
  }
615
+
616
+ // Extra confirmation only on first-time "on" (warn about banner).
617
+ if (target === "on" && current === "off") {
618
+ const ok = await ctx.ui.confirm(
619
+ "Turn on trusted-input mode?",
620
+ "All chrome_* tools will dispatch through chrome.debugger (CDP). Events will arrive as isTrusted=true and satisfy user-activation gates (clipboard, fullscreen, autoplay, file picker).\n\nChrome will pin a yellow 'Pi Chrome Connector started debugging this browser' banner to the top of any debugged tab while attached. Clicking 'Cancel' on that banner detaches the debugger.",
621
+ );
622
+ if (!ok) {
623
+ ctx.ui.notify("Trusted-input mode unchanged.", "info");
624
+ return;
625
+ }
626
+ }
627
+
565
628
  try {
566
- const result = (await bridge.send("trusted.mode", { mode: arg }, 5_000)) as { mode: string };
629
+ const result = (await bridge.send("trusted.mode", { mode: target }, 5_000)) as { mode: string };
567
630
  if (result.mode === "on") {
568
631
  ctx.ui.notify(
569
- "Trusted-input mode ON. All chrome_* tools now dispatch through chrome.debugger (CDP). Chrome will show a yellow 'started debugging this browser' banner. Events arrive as isTrusted=true and satisfy user-activation gates.",
632
+ "Trusted-input mode ON. chrome_* tools now dispatch through chrome.debugger. The yellow debugger banner will appear when Chrome is next driven.",
570
633
  "info",
571
634
  );
572
635
  } else if (result.mode === "off") {
@@ -779,7 +842,7 @@ Usage rules:
779
842
  name: "chrome_click",
780
843
  label: "Chrome Click",
781
844
  description:
782
- "Click a snapshot uid, CSS selector, or viewport coordinate in an existing Chrome tab through the companion extension. Defaults to synthetic DOM events (isTrusted=false). Pass trusted=true (or run /chrome-trusted on) to route through chrome.debugger so events arrive as browser-trusted and satisfy user-activation gates Chrome shows a yellow 'started debugging' banner while attached. Pass includeSnapshot=true to return a fresh snapshot after the click.",
845
+ "Click a snapshot uid, CSS selector, or viewport coordinate. Default 'auto' mode runs synthetic DOM events first and silently retries with trusted CDP only when the click looks gated (no page change + affordance label matches play/copy/share/sign-in/etc, or a recent NotAllowedError). The yellow 'started debugging' banner appears only when the retry actually happens. Pass trusted=true to force CDP for this call (banner appears immediately). Pass trusted=false to skip retry. Pass includeSnapshot=true to return a fresh snapshot after the click.",
783
846
  promptSnippet: "Click page elements in Chrome by snapshot uid, selector, or viewport coordinate.",
784
847
  parameters: Type.Object({
785
848
  uid: Type.Optional(Type.String({ description: "Stable element uid from chrome_snapshot. Prefer uid over selector after taking a snapshot." })),
@@ -812,7 +875,7 @@ Usage rules:
812
875
  name: "chrome_type",
813
876
  label: "Chrome Type",
814
877
  description:
815
- "Focus an optional snapshot uid or CSS selector, then type text into an existing Chrome tab. Defaults to synthetic per-character keydown/beforeinput/input/keyup sequence. Pass trusted=true (or run /chrome-trusted on) to route through chrome.debugger so each keystroke is browser-trusted (isTrusted=true). Pass includeSnapshot=true to return a fresh snapshot after typing.",
878
+ "Focus an optional snapshot uid or CSS selector, then type text. Default 'auto' mode runs synthetic per-character keydown/beforeinput/input/keyup first; if the input value doesn't change at all (editor rejected synthetic input) the call is silently retried through chrome.debugger so each keystroke is browser-trusted (isTrusted=true). Pass trusted=true to force CDP for this call. Pass trusted=false to skip retry. Pass includeSnapshot=true to return a fresh snapshot after typing.",
816
879
  promptSnippet: "Type text into Chrome, optionally focusing a snapshot uid or selector first.",
817
880
  parameters: Type.Object({
818
881
  text: Type.String(),
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "pi-chrome",
3
- "version": "0.10.1",
3
+ "version": "0.11.0",
4
4
  "description": "Drive your existing logged-in Chrome from Pi — no re-login, no throwaway profile, watch the agent work in real time (or toggle quiet background mode).",
5
5
  "keywords": [
6
6
  "pi-package",