pi-chrome 0.15.1 → 0.15.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -2,6 +2,10 @@
2
2
 
3
3
  All notable user-facing changes to `pi-chrome`.
4
4
 
5
+ ## 0.15.2 — 2026-05-13
6
+
7
+ - **Recipe prompts rewritten in user-language.** Earlier recipes leaked tool names into the `You:` prompts ("Use `chrome_tab list` to find my GitHub notifications tab…"), implying users need to know the tool catalog before they can ask anything. Prompts now read as natural intent; the agent trace below each one still shows the `chrome_*` primitives the agent picked. Affects the 30-second try-this block, all 3 hero recipes (PR triage / Linear standup / Bug repro), and 3 of the 6 collapsed recipes (auth-only data pull, network forensics, file upload).
8
+
5
9
  ## 0.15.1 — 2026-05-13
6
10
 
7
11
  - **Architecture diagram now renders on pi.dev.** Replaced Unicode box-drawing characters (`┌─┐│└┘┬▼`) with plain ASCII (`+ - | v`). Pi.dev's monospace font was dropping the horizontal `─` glyphs, leaving the diagram as floating vertical bars. ASCII renders everywhere.
package/README.md CHANGED
@@ -52,10 +52,9 @@ pi-chrome v<version>
52
52
  ## Try this in 30 seconds after install
53
53
 
54
54
  ```text
55
- Use chrome_tab list to find my GitHub notifications tab.
56
- chrome_snapshot it, then write a 5-bullet triage:
57
- which PRs need my review today, sorted by staleness.
58
- Do not click anything yet.
55
+ Look at my GitHub notifications tab and triage which PRs
56
+ need my review today, sorted by staleness.
57
+ Don't click anything yet just read and summarize.
59
58
  ```
60
59
 
61
60
  You'll watch the agent jump to your GitHub tab and read the page — using **your** session, **your** filters, **your** orgs.
@@ -69,7 +68,7 @@ Each recipe assumes the relevant tab is already open in the Chrome you control.
69
68
  **PR triage**
70
69
 
71
70
  ```text
72
- You: "Use chrome_tab list to find my GitHub notifications tab, then summarize PRs needing my review today, sorted by staleness."
71
+ You: "Look at my GitHub notifications tab and summarize the PRs needing my review today, sorted by staleness."
73
72
  Agent: chrome_tab(list) → chrome_snapshot(uid:el-notifications) → chrome_evaluate(...)
74
73
  ✓ 7 PRs waiting on you. 2 stale >3d (storage-rewrite, billing-v2).
75
74
  1 just turned CI-green (api-keys-prune). Full sorted list below.
@@ -79,10 +78,10 @@ You: [pastes the list straight into Linear]
79
78
  **Linear standup**
80
79
 
81
80
  ```text
82
- You: "Open my Linear current cycle in the active tab and write a 5-bullet standup."
81
+ You: "Open my Linear current cycle and write a 5-bullet standup from it."
83
82
  Agent: chrome_tab(activate, urlIncludes:"linear.app") → chrome_snapshot(uid:el-cycle) → chrome_evaluate(...)
84
83
  ✓ 5 in-progress, 2 blocked. Standup draft:
85
- • Shipped: bridge auto-recover. • In flight: trusted-mode retry path.
84
+ • Shipped: bridge auto-recover. • In flight: input reliability path.
86
85
  • Blocked: vendor portal CSP (waiting on infra).
87
86
  • Next: benchmark v2. • Risk: none today.
88
87
  You: [drops it into #standup]
@@ -91,7 +90,7 @@ You: [drops it into #standup]
91
90
  **Bug repro with evidence**
92
91
 
93
92
  ```text
94
- You: "Repro the checkout 500 on staging, save a screenshot at each step under ./repro/."
93
+ You: "Reproduce the checkout 500 on staging. Save a screenshot at each step under ./repro/."
95
94
  Agent: chrome_navigate(staging) → chrome_click(uid:el-add-to-cart) → chrome_screenshot(./repro/01-cart.png)
96
95
  → chrome_click(uid:el-checkout) → chrome_list_network_requests() → chrome_screenshot(./repro/02-500.png)
97
96
  ✓ POST /api/checkout → 500. Response body saved → ./repro/checkout-500.json
@@ -112,13 +111,13 @@ You: [files the ticket with the folder attached]
112
111
  > Snapshot `localhost:3000` and the staging URL of the same page; tell me what's visually different.
113
112
 
114
113
  **Auth-only data pull**
115
- > Open my analytics dashboard tab and `chrome_evaluate` to extract today's KPIs from page state.
114
+ > Open my analytics dashboard tab and pull today's KPIs from the page.
116
115
 
117
116
  **Network forensics**
118
- > Reproduce the checkout bug, then use `chrome_list_network_requests` to find the failing call and dump its response body.
117
+ > Reproduce the checkout bug, find the failing API call, and dump its response body.
119
118
 
120
119
  **File upload through React**
121
- > Open the photo uploader, `chrome_upload_file` with `./fixtures/sample.png`, confirm preview rendered.
120
+ > Open the photo uploader, upload `./fixtures/sample.png`, confirm the preview renders.
122
121
 
123
122
  </details>
124
123
 
@@ -136,13 +135,13 @@ You: [files the ticket with the folder attached]
136
135
  | Re-login required | **Never** | Every run | Sometimes | Every run |
137
136
  | **Multiple agents drive the same Chrome at once** | ✅ shared bridge | ❌ port collisions | ❌ | ❌ |
138
137
  | Watch agent work, live | ✅ default; toggle quiet | ❌ headless or new window | ⚠️ debugger banner always | ❌ new window |
139
- | Real browser-trusted clicks | ✅ opt-in (`chrome clicks on`) | ✅ | ✅ | ✅ |
138
+ | Real browser input | ✅ always for input tools | ✅ | ✅ | ✅ |
140
139
  | Network/console capture | ✅ built-in | ✅ | ✅ | ⚠️ via extensions |
141
140
  | **Honest result envelopes¹** | ✅ | ⚠️ | ❌ | ❌ |
142
141
  | Self-graded by built-in benchmark² | ✅ 38 primitives + 4 long-horizon | n/a | n/a | n/a |
143
142
 
144
143
  ¹ Every action returns `pageMutated`, `defaultPrevented`, `elementVisible`, `occludedBy`, and `valueMatches` so the agent knows when a click didn't take effect — instead of looping blindly.
145
- ² [`test-suite/`](./test-suite) is mode-aware: a synthetic-events tool is *expected* to fail clipboard. If you build a competing tool, send a PR with your scores. We benchmark in public.
144
+ ² [`test-suite/`](./test-suite) grades browser-control primitives across input fidelity, activation gates, DOM complexity, and agent safety. If you build a competing tool, send a PR with your scores. We benchmark in public.
146
145
 
147
146
  ---
148
147
 
@@ -174,29 +173,17 @@ This is why agents using pi-chrome don't get stuck in retry loops on broken site
174
173
  | **Inspect** | `chrome_snapshot` (uids + selectors + text + viewport), `chrome_screenshot`, `chrome_evaluate` |
175
174
  | **Navigate** | `chrome_navigate` (with optional `initScript` at `document_start`), `chrome_wait_for` |
176
175
  | **Interact** | `chrome_click`, `chrome_type`, `chrome_fill`, `chrome_key`, `chrome_hover` |
177
- | **Gesture** | `chrome_drag` (HTML5 DataTransfer), `chrome_scroll` (wheel + momentum), `chrome_tap` (touch) |
178
- | **Files** | `chrome_upload_file` (no native picker; works with React/Vue/Angular file inputs) |
176
+ | **Gesture** | `chrome_drag` (Chrome pointer drag), `chrome_scroll` (wheel + momentum), `chrome_tap` (touch) |
177
+ | **Files** | `chrome_upload_file` (Chrome file-input control; no native picker) |
179
178
  | **Observe** | `chrome_list_console_messages`, `chrome_list_network_requests`, `chrome_get_network_request` (with response body) |
180
179
 
181
- Each tool is documented inline in Pi — agents see the parameters and the gotchas (synthetic vs. trusted, autoplay gates, file picker limits) without trial-and-error.
180
+ Each tool is documented inline in Pi — agents see the parameters and gotchas (Chrome input, CSP limits, file upload behavior) without trial-and-error.
182
181
 
183
182
  ---
184
183
 
185
- ## Click & input modes
184
+ ## Click & input behavior
186
185
 
187
- `pi-chrome` can drive Chrome two ways:
188
-
189
- - **Quiet** — synthetic DOM events. Fast, no UI banners. Drives React/Vue/Angular state. Won't satisfy autoplay, clipboard, file picker, fullscreen, or user-activation gates.
190
- - **Trusted** — `chrome.debugger` / CDP under the hood. Indistinguishable from a person clicking. Shows Chrome's *"Pi Chrome Connector started debugging this browser"* banner while active.
191
-
192
- ```text
193
- /chrome clicks auto # default: quiet, upgrade to trusted only when needed
194
- /chrome clicks off # always quiet, never banner
195
- /chrome clicks on # always trusted, banner stays up
196
- /chrome clicks status
197
- ```
198
-
199
- Per-call `trusted: true / false` on any input tool wins over the global mode.
186
+ `pi-chrome` drives interactive controls through Chrome's real input layer: clicks, typing, fill, keys, hover, drag, scroll, and touch. Under the hood it uses `chrome.debugger` / CDP, so input satisfies normal user-activation gates. Chrome may show the *"Pi Chrome Connector started debugging this browser"* banner while attached.
200
187
 
201
188
  ### Background / watch modes
202
189
 
@@ -214,7 +201,7 @@ Per-call `background: true` wins over the session toggle.
214
201
 
215
202
  - `/chrome doctor` — single command: connectivity, extension version, bridge owner, version drift, MAIN-world helper injection, `chrome_evaluate("1+1") === 2`, fingerprint flags.
216
203
  - `/chrome onboard` — guided first-time setup.
217
- - `/chrome quiet status`, `/chrome clicks status` — current modes.
204
+ - `/chrome quiet status` — current watch/background setting.
218
205
 
219
206
  If the loaded Chrome extension is older than `pi-chrome` on disk, `/chrome doctor` tells you to reload it from `chrome://extensions`.
220
207
 
@@ -242,7 +229,7 @@ Multiple Pi sessions (planner / worker / audit) can all drive the same Chrome at
242
229
 
243
230
  [`test-suite/`](./test-suite) is a benchmark for **any** browser-control agent (not just pi-chrome). It includes **38 primitive challenges** plus **4 hermetic BrowserGym-style long-horizon tasks**.
244
231
 
245
- Scoring is **expected-outcome-by-mode**, not raw PASS count: each challenge has an expected verdict per mode (`synthetic`, `trusted`, `manual`) and a tool grades itself by whether its actual outcome matches the expected one. This avoids false equivalence between modes — a synthetic-events tool isn't supposed to satisfy a clipboard user-activation gate; matching that expectation is the pass.
232
+ Scoring tracks expected outcomes per challenge rather than raw PASS count, so tools are judged against their declared browser-control capability.
246
233
 
247
234
  Each challenge exposes `window.__verdict` / `window.__reason` / `window.__events` and a manifest entry with expected results per mode.
248
235
 
@@ -251,7 +238,7 @@ cd test-suite && python3 -m http.server 8765
251
238
  # open http://127.0.0.1:8765/ in the Chrome window pi-chrome controls
252
239
  ```
253
240
 
254
- Categories: `trusted-input`, `pointer-humanization`, `keyboard`, `activation-gates`, `scroll`, `drag-drop`, `clipboard`, `native-controls`, `frameworks`, `editing`, `dom-complexity`, `frames`, `files`, `observability`, `fingerprint`, `agent-safety`.
241
+ Categories: `real-input`, `pointer-humanization`, `keyboard`, `activation-gates`, `scroll`, `drag-drop`, `clipboard`, `native-controls`, `frameworks`, `editing`, `dom-complexity`, `frames`, `files`, `observability`, `fingerprint`, `agent-safety`.
255
242
 
256
243
  If you build a competing tool, please open a PR with your scores. We benchmark in public.
257
244
 
@@ -36,21 +36,21 @@ We benchmark in public — see [`../test-suite/`](../test-suite). Where exact sc
36
36
 
37
37
  ## Axis 1 — drivers (where pi-chrome lives)
38
38
 
39
- | Tool | Transport | Profile | Trusted events | Banner when controlling | Default detectable as bot |
39
+ | Tool | Transport | Profile | Browser input | Banner when controlling | Default detectable as bot |
40
40
  | --------------------------------- | ------------------------------------------ | ---------------------------------- | -------------------- | ----------------------------------- | ------------------------- |
41
41
  | Playwright | CDP (own driver) | throwaway by default | always | always ("controlled by test software") | yes (webdriver flag, automation flags) |
42
42
  | Puppeteer | CDP | throwaway by default | always | always | yes |
43
43
  | Selenium | WebDriver / BiDi | throwaway | partial (BiDi improves) | always | most detectable |
44
44
  | puppeteer-stealth / playwright-extra | CDP + patches | throwaway | always | always | medium (patches flags) |
45
45
  | Raw CDP | direct devtools protocol | either (needs `--remote-debugging-port`) | always | always | yes |
46
- | **pi-chrome** | **Chrome extension bridge → local loopback** | **your real Chrome profile, signed-in cookies, extensions, history** | **opt-in** (`/chrome clicks on` or `trusted: true`) | **only when trusted mode is active** | **synthetic mode bypasses common detection signals**¹ |
46
+ | **pi-chrome** | **Chrome extension bridge → local loopback** | **your real Chrome profile, signed-in cookies, extensions, history** | **always for input tools** | **while Chrome input is attached** | **low (real profile + Chrome input)¹** |
47
47
 
48
- ¹ pi-chrome synthetic mode dispatches DOM events with `isTrusted=false` most sites don't check; some anti-bot defenses do. The [`test-suite/`](../test-suite) grades both modes against common detection signals. Trusted mode uses `chrome.debugger` and shows Chrome's banner like every other CDP-based tool.
48
+ ¹ pi-chrome uses `chrome.debugger` for browser input and shows Chrome's banner like other CDP-based tools. The [`test-suite/`](../test-suite) grades browser-control behavior against common detection signals.
49
49
 
50
50
  ### What makes pi-chrome different on this axis
51
51
 
52
52
  1. **Profile attach, not driver launch.** Every other driver fights cookie persistence, login walls, MFA, and extension state. pi-chrome inherits all of it because it *is* your Chrome.
53
- 2. **Synthetic-first, trusted-on-demand.** Two-tier event model agents pick the right tradeoff per call. Competitors are all-trusted (CDP) and always show the banner. pi-chrome avoids it by default; you opt in when a site needs it (autoplay gate, clipboard, file picker).
53
+ 2. **Chrome input against your real profile.** Interactive tools use CDP input for reliability while still controlling the Chrome profile you already use.
54
54
  3. **Extension bridge transport.** No `--remote-debugging-port`, no throwaway Chromium. Survives Chrome auto-updates. Works alongside your normal Chrome usage.
55
55
  4. **Honest result envelopes.** Every action returns `pageMutated`, `defaultPrevented`, `elementVisible`, `occludedBy`, `valueMatches`. Competitors return `void` or generic acks; agents loop blindly on broken clicks.
56
56
  5. **Multi-session shared bridge.** Planner + worker + audit Pi sessions all drive the same Chrome concurrently.
@@ -73,7 +73,7 @@ These wrap a driver with an LLM loop. They are **higher-level than pi-chrome** a
73
73
  | **OpenAI Operator** | proprietary | OpenAI's own VLM + browser; ChatGPT-integrated. | closed, hosted |
74
74
  | **Project Mariner** (Google) | proprietary Chrome integration | Google's own VLM Chrome experiment. | closed |
75
75
  | **Surfer 2 / Surfer-H** (H Company) | proprietary | Hosted proprietary agent stack. | closed, hosted |
76
- | **Anthropic Computer Use** | OS-level screenshots + mouse/keyboard | Broader than browser; trusted events at OS level. | closed (API) |
76
+ | **Anthropic Computer Use** | OS-level screenshots + mouse/keyboard | Broader than browser; OS-level events. | closed (API) |
77
77
 
78
78
  **Why pi-chrome is not on this list:** it's intentionally **not an agent**. There's no LLM loop, no `.act("click the blue button")`. Pi handles the loop; pi-chrome provides the primitives. This means:
79
79
 
@@ -134,7 +134,7 @@ If your threat model excludes extensions with broad permissions, neither approac
134
134
 
135
135
  ## Public benchmarks worth knowing (for axis 2 / axis 3 comparison)
136
136
 
137
- Pi-chrome itself ships a benchmark suite ([`../test-suite/`](../test-suite)) of **38 primitive challenges** plus **4 hermetic BrowserGym-style long-horizon tasks** covering trusted-input, pointer humanization, keyboard fidelity, drag/drop, Shadow DOM, file uploads, network observability, fingerprint leaks, and agent-safety honeypots. Scoring is **expected-outcome-by-mode** (not raw PASS count): each challenge has expected verdicts per mode (`synthetic` / `trusted` / `manual`) and a tool grades itself by whether its actual outcome matches expectations. That's **driver-level** grading.
137
+ Pi-chrome itself ships a benchmark suite ([`../test-suite/`](../test-suite)) of **38 primitive challenges** plus **4 hermetic BrowserGym-style long-horizon tasks** covering real input, pointer humanization, keyboard fidelity, drag/drop, Shadow DOM, file uploads, network observability, fingerprint leaks, and agent-safety honeypots. Scoring tracks expected outcomes per challenge instead of raw PASS count. That's **driver-level** grading.
138
138
 
139
139
  For **agent-level** comparison (axis 2), the public benchmarks worth citing:
140
140
 
package/docs/EXAMPLES.md CHANGED
@@ -129,14 +129,14 @@ component re-rendered with the new value.
129
129
 
130
130
  ```text
131
131
  chrome_upload_file paths=[./fixtures/avatar.png] selector="input[type=file]"
132
- # No native file picker opens. Works with React/Vue/Angular controlled inputs.
132
+ # Uses Chrome file-input control. No native file picker opens.
133
133
  ```
134
134
 
135
135
  ### Drag-to-reorder lists
136
136
 
137
137
  ```text
138
138
  chrome_drag fromUid=row-3 toUid=row-1
139
- # Fires real HTML5 dragstart/dragover/drop with a shared DataTransfer.
139
+ # Uses Chrome pointer drag through its input layer.
140
140
  ```
141
141
 
142
142
  ## Multi-session patterns
@@ -153,14 +153,14 @@ chrome_drag fromUid=row-3 toUid=row-1
153
153
 
154
154
  A third Pi session can run `chrome_snapshot` periodically in `background: true` mode and post summaries via `pi-qq` — handy for long-running flows.
155
155
 
156
- ## When to prefer trusted clicks
156
+ ## Chrome input
157
157
 
158
- Pass `trusted: true` on `chrome_click` (or run `/chrome clicks on`) when:
158
+ Interactive tools use Chrome's real input layer by default: clicks, typing, fill, keys, hover, drag, scroll, and touch. This is reliable for:
159
159
 
160
- - the click should open a file picker
161
- - the click should write to the clipboard or read it
162
- - the click should start an audio/video play
163
- - the click should request fullscreen / push permission
164
- - the page is wrapped in a strict user-activation guard (some paywalls / login flows)
160
+ - sign-in flows
161
+ - guarded buttons
162
+ - audio/video controls
163
+ - fullscreen / permission prompts
164
+ - pages with strict CSP or user-activation checks
165
165
 
166
- Everything else is faster and quieter without it.
166
+ Chrome may show its debugger banner while pi-chrome is attached.
package/docs/FAQ.md CHANGED
@@ -14,15 +14,13 @@ By default no — extensions need explicit "Allow in incognito" permission. Togg
14
14
 
15
15
  ## Will sites detect that I'm automating?
16
16
 
17
- For **synthetic** (quiet) input: yes, technically. `event.isTrusted` is `false`. Most sites don't check; some anti-bot defenses do.
17
+ Interactive controls use Chrome's real input layer via CDP: pointer paths are humanized, key cadence has variance, and normal user-activation gates are satisfied. Some detectors check for the `chrome.debugger` API attached and Chrome will show the "Chrome is being debugged" banner.
18
18
 
19
- For **trusted** (CDP) input: events are `isTrusted=true`, pointer paths are humanized, key cadence has variance. Most fingerprint-based detectors don't fire. Some specifically check for the `chrome.debugger` API attached and will show the "Chrome is being debugged" banner. That banner is the visible cost of trusted mode.
20
-
21
- The [`test-suite/`](../test-suite) grades both modes against common detection signals.
19
+ The [`test-suite/`](../test-suite) grades browser-control behavior against common detection signals.
22
20
 
23
21
  ## Why do I see a banner saying "Pi Chrome Connector started debugging this browser"?
24
22
 
25
- That's Chrome's built-in warning when an extension uses `chrome.debugger`. pi-chrome uses it only in trusted-input mode. If you don't want to see it, run `/chrome clicks off` and accept that some sign-in flows / file pickers / clipboard ops won't work.
23
+ That's Chrome's built-in warning when an extension uses `chrome.debugger`. pi-chrome uses Chrome's input layer for interactive controls, so the banner appears while attached.
26
24
 
27
25
  ## Can a malicious page escape and access my other tabs?
28
26
 
@@ -38,7 +36,7 @@ Web Store extensions cannot communicate with a local process bridge controlled b
38
36
 
39
37
  ## What happens when I update pi-chrome?
40
38
 
41
- `/chrome doctor` will warn you if the loaded extension is older than the installed `pi-chrome`. Reload it from `chrome://extensions` to pick up the new version. Trusted-input mode in particular requires re-approving the `debugger` permission once.
39
+ `/chrome doctor` will warn you if the loaded extension is older than the installed `pi-chrome`. Reload it from `chrome://extensions` to pick up the new version. Updates that add Chrome permissions may require re-approval once.
42
40
 
43
41
  ## What's the install footprint?
44
42
 
@@ -58,17 +56,17 @@ Yes. The handler compiles with `new Function(...)` in the MAIN world, which work
58
56
  Either:
59
57
  - The element was occluded (look for `occludedBy: <selector>` in the envelope).
60
58
  - The click handler called `event.preventDefault()` and the page intentionally ignored it.
61
- - The site rejects synthetic events. Try `trusted: true` or `/chrome clicks on`.
59
+ - The target changed after your snapshot; take a fresh snapshot or screenshot.
62
60
 
63
61
  The result envelope tells you which one. **Don't blind-retry.**
64
62
 
65
63
  ## Why does `chrome_type` return `valueMatches=false`?
66
64
 
67
- The editor rejected the synthetic input. Common culprits: contenteditable rich-text editors, native date pickers, masked-input libraries. Try `chrome_fill` (uses framework-aware native setters) or `trusted: true`.
65
+ The field rejected or transformed the typed value. Common culprits: contenteditable rich-text editors, native date pickers, masked-input libraries, or masks. Try `chrome_fill`, then verify with `includeSnapshot=true`.
68
66
 
69
67
  ## How do I attach a file to a React file input?
70
68
 
71
- `chrome_upload_file` — populates `input.files` via a real `DataTransfer` and fires `input` + `change` events. It does **not** open the native file picker (no synthetic event can; that's a user-activation gate). Works with React/Vue/Angular controlled inputs.
69
+ `chrome_upload_file` — uses Chrome DevTools file-input control and fires `input` + `change` events. It does **not** open the native file picker. Works with React/Vue/Angular controlled inputs.
72
70
 
73
71
  ## Can it record videos?
74
72
 
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "manifest_version": 3,
3
3
  "name": "Pi Chrome Connector",
4
- "version": "0.15.1",
4
+ "version": "0.15.3",
5
5
  "description": "Lets Pi control tabs in Chrome via a local connector at 127.0.0.1.",
6
6
  "permissions": [
7
7
  "tabs",
@@ -3,64 +3,23 @@ const CLIENT_NAME = `Pi Chrome Connector ${chrome.runtime.id}`;
3
3
  const POLL_ERROR_BACKOFF_MS = 2000;
4
4
  let polling = false;
5
5
 
6
- // =================== Trusted-input (CDP) layer ===================
7
- // Tracks which tabs we have attached chrome.debugger to, plus session-level mode.
6
+ // =================== Chrome input (CDP) layer ===================
7
+ // Tracks which tabs we have attached chrome.debugger to.
8
8
  const attachedTabs = new Map(); // tabId -> { detachAt: number, pointer: {x,y} }
9
- let TRUSTED_MODE = "auto"; // "off" | "on" | "auto" (default: smart retry only)
10
- const TRUSTED_IDLE_DETACH_MS = 15_000;
9
+ const INPUT_IDLE_DETACH_MS = 15_000;
11
10
  const CDP_VERSION = "1.3";
12
11
 
13
12
  function sleep(ms) { return new Promise((r) => setTimeout(r, ms)); }
14
13
  function rng(min, max) { return min + Math.random() * (max - min); }
15
14
 
16
- async function wantsTrusted(params) {
17
- if (params && params.trusted === false) return false;
18
- if (params && params.trusted === true) return true;
19
- return TRUSTED_MODE === "on";
20
- }
21
-
22
- function setTrustedMode(mode) {
23
- const next = String(mode || "").toLowerCase();
24
- if (!["off", "on", "auto"].includes(next)) throw new Error(`bad trusted mode: ${next}`);
25
- TRUSTED_MODE = next;
26
- if (next === "off") void detachAll();
27
- return { mode: TRUSTED_MODE };
28
- }
29
-
30
- function trustedStatus() {
15
+ function inputStatus() {
31
16
  return {
32
- mode: TRUSTED_MODE,
33
17
  attachedTabs: Array.from(attachedTabs.keys()),
34
18
  permissionGranted: typeof chrome !== "undefined" && !!chrome.debugger,
35
19
  };
36
20
  }
37
21
 
38
- // Auto-upgrade: if synthetic result carries suggestTrusted=true, the bridge mode is "auto"
39
- // (default) or "on", and the caller didn't explicitly opt out, retry once with trusted CDP
40
- // path. Surfaces both results so callers can see what happened.
41
- async function maybeUpgradeToTrusted(kind, params, syntheticResult, trustedFn) {
42
- if (!syntheticResult || !syntheticResult.suggestTrusted) return syntheticResult;
43
- if (params && params.trusted === false) return syntheticResult;
44
- if (TRUSTED_MODE === "off") return syntheticResult;
45
- if (!chrome.debugger) return syntheticResult;
46
- try {
47
- const trustedResult = await trustedFn();
48
- return {
49
- ...trustedResult,
50
- autoRetried: true,
51
- autoRetryReason: syntheticResult.suggestReason || `${kind} produced no mutation`,
52
- syntheticAttempt: { pageMutated: syntheticResult.pageMutated, suggestReason: syntheticResult.suggestReason },
53
- };
54
- } catch (error) {
55
- return {
56
- ...syntheticResult,
57
- autoRetryAttempted: true,
58
- autoRetryError: error?.message || String(error),
59
- };
60
- }
61
- }
62
-
63
- // Last few attach failures, kept for /chrome doctor + trusted.debug diagnostics.
22
+ // Last few attach failures, kept for diagnostics.
64
23
  const attachDebugLog = [];
65
24
  function recordAttachEvent(entry) {
66
25
  attachDebugLog.push({ ...entry, t: Date.now() });
@@ -71,7 +30,7 @@ async function attachDebugger(tabId) {
71
30
  if (!chrome.debugger) throw new Error("chrome.debugger API unavailable; reload the extension to grant the new permission");
72
31
  if (attachedTabs.has(tabId)) {
73
32
  const entry = attachedTabs.get(tabId);
74
- entry.detachAt = Date.now() + TRUSTED_IDLE_DETACH_MS;
33
+ entry.detachAt = Date.now() + INPUT_IDLE_DETACH_MS;
75
34
  return entry;
76
35
  }
77
36
  // Before each attach, force-detach any stale CDP target this extension owns on the tab.
@@ -123,19 +82,18 @@ async function attachDebugger(tabId) {
123
82
  }
124
83
  recordAttachEvent({ kind: "attached", tabId });
125
84
  // Seed pointer in a plausible "just left the address bar" location.
126
- const entry = { detachAt: Date.now() + TRUSTED_IDLE_DETACH_MS, pointer: { x: 120 + Math.random() * 200, y: 80 + Math.random() * 120 } };
85
+ const entry = { detachAt: Date.now() + INPUT_IDLE_DETACH_MS, pointer: { x: 120 + Math.random() * 200, y: 80 + Math.random() * 120 } };
127
86
  attachedTabs.set(tabId, entry);
128
87
  return entry;
129
88
  }
130
89
 
131
- async function trustedDebug(params) {
90
+ async function inputDebug(params) {
132
91
  const tab = params?.targetId ? await chrome.tabs.get(Number(params.targetId)).catch(() => null) : null;
133
92
  let targets = [];
134
93
  try { targets = await new Promise((resolve) => chrome.debugger.getTargets((t) => resolve(t || []))); } catch {}
135
94
  return {
136
95
  extensionVersion: chrome.runtime.getManifest().version,
137
96
  extensionId: chrome.runtime.id,
138
- trustedMode: TRUSTED_MODE,
139
97
  attachedTabs: Array.from(attachedTabs.keys()),
140
98
  requestedTab: tab ? { id: tab.id, url: tab.url, status: tab.status, title: tab.title } : null,
141
99
  cdpTargets: targets,
@@ -158,7 +116,7 @@ if (chrome.debugger && chrome.debugger.onDetach) {
158
116
  chrome.debugger.onDetach.addListener(({ tabId }, reason) => {
159
117
  if (tabId !== undefined) attachedTabs.delete(tabId);
160
118
  if (reason === "canceled_by_user") {
161
- console.warn(`[pi-chrome] debugger canceled by user on tab ${tabId}; trusted mode will reattach on next call`);
119
+ console.warn(`[pi-chrome] debugger canceled by user on tab ${tabId}; Chrome input will reattach on next call`);
162
120
  }
163
121
  });
164
122
  }
@@ -166,7 +124,7 @@ if (chrome.debugger && chrome.debugger.onDetach) {
166
124
  setInterval(() => {
167
125
  const now = Date.now();
168
126
  for (const [tabId, entry] of attachedTabs) {
169
- if (entry.detachAt && entry.detachAt < now && TRUSTED_MODE !== "on") {
127
+ if (entry.detachAt && entry.detachAt < now) {
170
128
  void detachDebugger(tabId);
171
129
  }
172
130
  }
@@ -245,7 +203,7 @@ async function cdp(tabId, method, params) {
245
203
  const id = extractForeignExtId(after) || extractForeignExtId(before) || "unknown";
246
204
  throw new Error(
247
205
  `Another Chrome extension (${id}) has an input overlay on this page (e.g. a password manager / autofill popup). \n` +
248
- `pi-chrome tried to dismiss it with Escape but it reappeared. Disable that extension on this page, focus the field via Tab instead of clicking, or run /chrome quiet off so the agent uses synthetic input here.`,
206
+ `pi-chrome tried to dismiss it with Escape but it reappeared. Disable that extension on this page, close its popup, or focus the field via Tab instead of clicking.`,
249
207
  );
250
208
  }
251
209
  throw retryErr;
@@ -254,7 +212,7 @@ async function cdp(tabId, method, params) {
254
212
  if (!isStale) throw error;
255
213
  attachedTabs.delete(tabId);
256
214
  await chrome.debugger.attach({ tabId }, CDP_VERSION).catch(() => undefined);
257
- attachedTabs.set(tabId, { detachAt: Date.now() + TRUSTED_IDLE_DETACH_MS, pointer: { x: 120 + Math.random() * 200, y: 80 + Math.random() * 120 } });
215
+ attachedTabs.set(tabId, { detachAt: Date.now() + INPUT_IDLE_DETACH_MS, pointer: { x: 120 + Math.random() * 200, y: 80 + Math.random() * 120 } });
258
216
  return cdpRaw(tabId, method, params);
259
217
  }
260
218
  }
@@ -280,7 +238,7 @@ async function resolveTargetInTab(tabId, params) {
280
238
  args: [params.selector ?? null, params.uid ?? null, params.x ?? null, params.y ?? null],
281
239
  });
282
240
  const v = results?.[0]?.result;
283
- if (!v || !v.found) throw new Error("Could not resolve target element for trusted action");
241
+ if (!v || !v.found) throw new Error("Could not resolve target element for Chrome input");
284
242
  return v;
285
243
  }
286
244
 
@@ -378,7 +336,7 @@ async function cdpTypeChar(tabId, ch) {
378
336
  await sleep(rng(35, 130));
379
337
  }
380
338
 
381
- async function trustedClick(params) {
339
+ async function chromeInputClick(params) {
382
340
  const tab = await getTabByParams(params);
383
341
  if (params.foreground) await bringToFront(tab);
384
342
  await attachDebugger(tab.id);
@@ -390,7 +348,7 @@ async function trustedClick(params) {
390
348
  await cdp(tab.id, "Input.dispatchMouseEvent", { type: "mouseReleased", x: point.x, y: point.y, button: "left", buttons: 0, clickCount: 1, pointerType: "mouse" });
391
349
  // Reset :focus-visible if the click landed on a focusable element. CDP-driven pointer
392
350
  // focus can leave :focus-visible=true in Chromium, which trips heuristics that expect
393
- // pointer focus to suppress the focus ring (synthetic clicks naturally land on false).
351
+ // Reset focus styling after pointer click when possible.
394
352
  if (params.selector || params.uid) {
395
353
  await chrome.scripting.executeScript({
396
354
  target: { tabId: tab.id, frameIds: [0] },
@@ -407,10 +365,10 @@ async function trustedClick(params) {
407
365
  args: [params.selector ?? null, params.uid ?? null],
408
366
  }).catch(() => undefined);
409
367
  }
410
- return { trusted: true, x: point.x, y: point.y, tag: resolved.tag };
368
+ return { input: "chrome", x: point.x, y: point.y, tag: resolved.tag };
411
369
  }
412
370
 
413
- async function trustedHover(params) {
371
+ async function chromeInputHover(params) {
414
372
  const tab = await getTabByParams(params);
415
373
  if (params.foreground) await bringToFront(tab);
416
374
  await attachDebugger(tab.id);
@@ -418,15 +376,15 @@ async function trustedHover(params) {
418
376
  const point = resolved.rect ? pickInsideRect(resolved.rect) : { x: resolved.x, y: resolved.y };
419
377
  await cdpMoveTo(tab.id, point.x, point.y);
420
378
  await sleep(rng(80, 220));
421
- return { trusted: true, x: point.x, y: point.y, tag: resolved.tag };
379
+ return { input: "chrome", x: point.x, y: point.y, tag: resolved.tag };
422
380
  }
423
381
 
424
- async function trustedKey(params) {
382
+ async function chromeInputKey(params) {
425
383
  const tab = await getTabByParams(params);
426
384
  if (params.foreground) await bringToFront(tab);
427
385
  await attachDebugger(tab.id);
428
386
  const key = String(params.key || "");
429
- if (!key) throw new Error("trusted.key: missing key");
387
+ if (!key) throw new Error("chrome.key: missing key");
430
388
  const mods = params.modifiers || {};
431
389
  const modBits = cdpModifiersFor(mods);
432
390
  // Press modifiers in standard order, then key, then release in reverse.
@@ -456,10 +414,10 @@ async function trustedKey(params) {
456
414
  await sleep(rng(5, 18));
457
415
  await cdp(tab.id, "Input.dispatchKeyEvent", { type: "keyUp", key: m.key, code: m.code, windowsVirtualKeyCode: m.vk, modifiers: 0 });
458
416
  }
459
- return { trusted: true, key: info.key, modifiers: mods };
417
+ return { input: "chrome", key: info.key, modifiers: mods };
460
418
  }
461
419
 
462
- async function trustedType(params) {
420
+ async function chromeInputType(params) {
463
421
  const tab = await getTabByParams(params);
464
422
  if (params.foreground) await bringToFront(tab);
465
423
  await attachDebugger(tab.id);
@@ -477,16 +435,16 @@ async function trustedType(params) {
477
435
  for (const ch of Array.from(text)) await cdpTypeChar(tab.id, ch);
478
436
  if (params.pressEnter) {
479
437
  await cdpTypeChar(tab.id, "\r").catch(() => undefined);
480
- await trustedKey({ ...params, key: "Enter" });
438
+ await chromeInputKey({ ...params, key: "Enter" });
481
439
  }
482
- return { trusted: true, length: text.length };
440
+ return { input: "chrome", length: text.length };
483
441
  }
484
442
 
485
- async function trustedFill(params) {
443
+ async function chromeInputFill(params) {
486
444
  const tab = await getTabByParams(params);
487
445
  if (params.foreground) await bringToFront(tab);
488
446
  await attachDebugger(tab.id);
489
- if (!(params.selector || params.uid)) throw new Error("trusted.fill: selector or uid required");
447
+ if (!(params.selector || params.uid)) throw new Error("chrome.fill: selector or uid required");
490
448
  const resolved = await resolveTargetInTab(tab.id, params);
491
449
  const point = resolved.rect ? pickInsideRect(resolved.rect) : { x: resolved.x, y: resolved.y };
492
450
  await cdpMoveTo(tab.id, point.x, point.y);
@@ -503,11 +461,11 @@ async function trustedFill(params) {
503
461
  await sleep(rng(20, 60));
504
462
  const text = String(params.text || "");
505
463
  for (const ch of Array.from(text)) await cdpTypeChar(tab.id, ch);
506
- if (params.submit) await trustedKey({ ...params, key: "Enter" });
507
- return { trusted: true, length: text.length };
464
+ if (params.submit) await chromeInputKey({ ...params, key: "Enter" });
465
+ return { input: "chrome", length: text.length };
508
466
  }
509
467
 
510
- async function trustedScroll(params) {
468
+ async function chromeInputScroll(params) {
511
469
  const tab = await getTabByParams(params);
512
470
  if (params.foreground) await bringToFront(tab);
513
471
  await attachDebugger(tab.id);
@@ -554,26 +512,26 @@ async function trustedScroll(params) {
554
512
  // Sleep one+ frame so IntersectionObserver / rAF samples can run between events.
555
513
  await sleep(rng(22, 48));
556
514
  }
557
- return { trusted: true, deltaX: totalX, deltaY: totalY, steps: n };
515
+ return { input: "chrome", deltaX: totalX, deltaY: totalY, steps: n };
558
516
  }
559
517
 
560
- async function trustedTap(params) {
518
+ async function chromeInputTap(params) {
561
519
  const tab = await getTabByParams(params);
562
520
  if (params.foreground) await bringToFront(tab);
563
521
  await attachDebugger(tab.id);
564
522
  const resolved = (params.selector || params.uid || (typeof params.x === "number" && typeof params.y === "number"))
565
523
  ? await resolveTargetInTab(tab.id, params)
566
524
  : null;
567
- if (!resolved || !resolved.found) throw new Error("trusted.tap: target not found");
525
+ if (!resolved || !resolved.found) throw new Error("chrome.tap: target not found");
568
526
  const point = resolved.rect ? pickInsideRect(resolved.rect) : { x: resolved.x, y: resolved.y };
569
527
  const tp = { x: point.x, y: point.y, radiusX: 8, radiusY: 8, rotationAngle: 0, force: 0.5, id: 1 };
570
528
  await cdp(tab.id, "Input.dispatchTouchEvent", { type: "touchStart", touchPoints: [tp] });
571
529
  await sleep(rng(40, 110));
572
530
  await cdp(tab.id, "Input.dispatchTouchEvent", { type: "touchEnd", touchPoints: [] });
573
- return { trusted: true, x: point.x, y: point.y, tag: resolved.tag };
531
+ return { input: "chrome", x: point.x, y: point.y, tag: resolved.tag };
574
532
  }
575
533
 
576
- async function trustedDrag(params) {
534
+ async function chromeInputDrag(params) {
577
535
  const tab = await getTabByParams(params);
578
536
  if (params.foreground) await bringToFront(tab);
579
537
  await attachDebugger(tab.id);
@@ -595,7 +553,40 @@ async function trustedDrag(params) {
595
553
  await sleep(rng(10, 26));
596
554
  }
597
555
  await cdp(tab.id, "Input.dispatchMouseEvent", { type: "mouseReleased", x: tp.x, y: tp.y, button: "left", buttons: 0, clickCount: 1, pointerType: "mouse" });
598
- return { trusted: true, from: fp, to: tp, steps };
556
+ return { input: "chrome", from: fp, to: tp, steps };
557
+ }
558
+
559
+ async function chromeInputUpload(params) {
560
+ const tab = await getTabByParams(params);
561
+ if (params.foreground) await bringToFront(tab);
562
+ await attachDebugger(tab.id);
563
+ if (!(params.selector || params.uid)) throw new Error("chrome.upload: selector or uid required");
564
+ const paths = Array.isArray(params.paths) ? params.paths.map(String) : [];
565
+ if (!paths.length) throw new Error("chrome.upload: no file paths provided");
566
+ const expression = `(() => {
567
+ const selector = ${JSON.stringify(params.selector ?? null)};
568
+ const uid = ${JSON.stringify(params.uid ?? null)};
569
+ const state = window.__PI_CHROME_STATE__;
570
+ const el = uid && state && state.elements ? state.elements[uid] : (selector ? document.querySelector(selector) : null);
571
+ if (!el || el.tagName !== "INPUT" || el.type !== "file") throw new Error("Target must be <input type=file>");
572
+ el.scrollIntoView({ block: "center", inline: "center", behavior: "instant" });
573
+ return el;
574
+ })()`;
575
+ const evaluated = await cdp(tab.id, "Runtime.evaluate", { expression, objectGroup: "pi-chrome-upload", includeCommandLineAPI: false, returnByValue: false });
576
+ if (evaluated.exceptionDetails) throw new Error(evaluated.exceptionDetails.text || "Could not resolve file input");
577
+ const objectId = evaluated.result?.objectId;
578
+ if (!objectId) throw new Error("Could not resolve file input object");
579
+ await cdp(tab.id, "DOM.enable", {}).catch(() => undefined);
580
+ const requested = await cdp(tab.id, "DOM.requestNode", { objectId });
581
+ if (!requested.nodeId) throw new Error("Could not resolve file input node");
582
+ await cdp(tab.id, "DOM.setFileInputFiles", { nodeId: requested.nodeId, files: paths });
583
+ await cdp(tab.id, "Runtime.callFunctionOn", {
584
+ objectId,
585
+ functionDeclaration: `function() { this.dispatchEvent(new Event("input", { bubbles: true })); this.dispatchEvent(new Event("change", { bubbles: true })); return this.files ? this.files.length : 0; }`,
586
+ returnByValue: true,
587
+ }).catch(() => undefined);
588
+ await cdp(tab.id, "Runtime.releaseObject", { objectId }).catch(() => undefined);
589
+ return { input: "chrome", uploaded: paths.map((path) => ({ path })) };
599
590
  }
600
591
  // ===============================================================
601
592
 
@@ -720,41 +711,28 @@ async function dispatch(action, params) {
720
711
  ]);
721
712
  case "page.evaluate":
722
713
  return evaluateInTab(params);
723
- case "page.click": {
724
- if (await wantsTrusted(params)) return trustedClick(params);
725
- const synth = await executeActionInTab(params, clickPage, [params.selector ?? null, params.uid ?? null, params.x ?? null, params.y ?? null]);
726
- return await maybeUpgradeToTrusted("click", params, synth, () => trustedClick(params));
727
- }
714
+ case "page.click":
715
+ return chromeInputClick(params);
728
716
  case "page.hover":
729
- if (await wantsTrusted(params)) return trustedHover(params);
730
- return executeActionInTab(params, hoverPage, [params.selector ?? null, params.uid ?? null, params.x ?? null, params.y ?? null]);
717
+ return chromeInputHover(params);
731
718
  case "page.drag":
732
- if (await wantsTrusted(params)) return trustedDrag(params);
733
- return executeActionInTab(params, dragPage, [params.fromUid ?? null, params.fromSelector ?? null, params.fromX ?? null, params.fromY ?? null, params.toUid ?? null, params.toSelector ?? null, params.toX ?? null, params.toY ?? null, params.steps ?? 12]);
719
+ return chromeInputDrag(params);
734
720
  case "page.upload":
735
- return executeActionInTab(params, uploadFiles, [params.selector ?? null, params.uid ?? null, params.files || []]);
736
- case "page.type": {
737
- if (await wantsTrusted(params)) return trustedType(params);
738
- const synth = await executeActionInTab(params, typeIntoPage, [params.selector ?? null, params.uid ?? null, params.text || "", Boolean(params.pressEnter)]);
739
- return await maybeUpgradeToTrusted("type", params, synth, () => trustedType(params));
740
- }
721
+ return chromeInputUpload(params);
722
+ case "page.type":
723
+ return chromeInputType(params);
741
724
  case "page.fill":
742
- if (await wantsTrusted(params)) return trustedFill(params);
743
- return executeActionInTab(params, fillPage, [params.selector ?? null, params.uid ?? null, params.text || "", params.submit === true]);
725
+ return chromeInputFill(params);
744
726
  case "page.key":
745
- if (await wantsTrusted(params)) return trustedKey(params);
746
- return executeActionInTab(params, pressKeyInPage, [params.key]);
727
+ return chromeInputKey(params);
747
728
  case "page.scroll":
748
- if (await wantsTrusted(params)) return trustedScroll(params);
749
- return executeActionInTab(params, scrollPage, [params.selector ?? null, params.uid ?? null, params.deltaY ?? 0, params.deltaX ?? 0, params.steps ?? null]);
729
+ return chromeInputScroll(params);
750
730
  case "page.tap":
751
- return trustedTap(params);
752
- case "trusted.mode":
753
- return setTrustedMode(params.mode);
754
- case "trusted.status":
755
- return trustedStatus();
756
- case "trusted.debug":
757
- return trustedDebug(params);
731
+ return chromeInputTap(params);
732
+ case "input.status":
733
+ return inputStatus();
734
+ case "input.debug":
735
+ return inputDebug(params);
758
736
  case "page.console.list":
759
737
  return executeInTab(params, listConsoleMessages, [params.clear === true]);
760
738
  case "page.network.list":
@@ -1503,31 +1481,31 @@ async function clickPage(selector, uid, x, y) {
1503
1481
  defaultPrevented = dispatchPointerLikeEvent(point.element, "click", point.x, point.y, prevX, prevY) || defaultPrevented;
1504
1482
  state.pointer = { x: point.x, y: point.y, t: performance.now() };
1505
1483
  // Heuristic: if the clicked thing looks like a media play affordance and the page has paused
1506
- // audio/video, the synthetic click may not unlock autoplay. Surface a warning.
1484
+ // audio/video, the DOM-event click may not unlock autoplay. Surface a warning.
1507
1485
  let autoplayHint;
1508
1486
  const labelRaw = (point.element.getAttribute("aria-label") || point.element.textContent || "").trim();
1509
1487
  const label = labelRaw.toLowerCase();
1510
1488
  if (/^(play|start|begin|next|continue|unmute)/.test(label)) {
1511
1489
  const idleMedia = Array.from(document.querySelectorAll("audio,video")).some((m) => m.paused);
1512
- if (idleMedia) autoplayHint = "This element looks like a media affordance and the page has paused media. Synthetic clicks do not satisfy user-activation gates; audio/video may not start.";
1490
+ if (idleMedia) autoplayHint = "This element looks like a media affordance and the page has paused media. DOM-event clicks do not satisfy user-activation gates; audio/video may not start.";
1513
1491
  }
1514
1492
  const pageMutated = pageHash() !== before;
1515
- // Smart-auto retry hint: only set when synthetic produced no observable change AND the
1493
+ // Smart-auto retry hint: only set when DOM-event path produced no observable change AND the
1516
1494
  // element looks gated, OR the page just emitted a user-activation rejection. The dispatcher
1517
- // uses this to decide whether to retry with trusted mode.
1518
- let suggestTrusted = false;
1495
+ // uses this to decide whether to retry with Chrome input.
1496
+ let suggestChromeInput = false;
1519
1497
  let suggestReason;
1520
1498
  if (!pageMutated) {
1521
- if (autoplayHint) { suggestTrusted = true; suggestReason = "play/media affordance + idle media"; }
1499
+ if (autoplayHint) { suggestChromeInput = true; suggestReason = "play/media affordance + idle media"; }
1522
1500
  else if (/copy(\s|$)|paste|share|download|fullscreen|sign in with|continue with|allow|enable/i.test(label)) {
1523
- suggestTrusted = true; suggestReason = `label '${labelRaw.slice(0, 40)}' looks gated`;
1501
+ suggestChromeInput = true; suggestReason = `label '${labelRaw.slice(0, 40)}' looks gated`;
1524
1502
  } else {
1525
1503
  // Inspect recent console errors for activation-gate rejections.
1526
1504
  const recent = (state.console || []).slice(-8);
1527
1505
  const hit = recent.find((e) => /NotAllowedError|Document is not focused|requires transient activation|gesture is required/.test(
1528
1506
  (e.args || []).map((a) => typeof a === "string" ? a : (a && a.message) || JSON.stringify(a)).join(" ")
1529
1507
  ));
1530
- if (hit) { suggestTrusted = true; suggestReason = "recent console error indicates user-activation gate"; }
1508
+ if (hit) { suggestChromeInput = true; suggestReason = "recent console error indicates user-activation gate"; }
1531
1509
  }
1532
1510
  }
1533
1511
  return {
@@ -1537,13 +1515,13 @@ async function clickPage(selector, uid, x, y) {
1537
1515
  uid,
1538
1516
  tag: point.element.tagName,
1539
1517
  label: labelRaw.slice(0, 80) || undefined,
1540
- isTrusted: false,
1518
+ input: "dom",
1541
1519
  defaultPrevented,
1542
1520
  elementVisible: visible,
1543
1521
  occludedBy: occluded || undefined,
1544
1522
  pageMutated,
1545
1523
  autoplayHint,
1546
- suggestTrusted: suggestTrusted || undefined,
1524
+ suggestChromeInput: suggestChromeInput || undefined,
1547
1525
  suggestReason,
1548
1526
  };
1549
1527
  }
@@ -1561,7 +1539,7 @@ async function hoverPage(selector, uid, x, y) {
1561
1539
  }
1562
1540
  // Small dwell so hover-intent handlers fire.
1563
1541
  await sleepPage(rand(80, 220));
1564
- return { x: point.x, y: point.y, selector, uid, tag: point.element.tagName, defaultPrevented, isTrusted: false };
1542
+ return { x: point.x, y: point.y, selector, uid, tag: point.element.tagName, defaultPrevented, input: "dom" };
1565
1543
  }
1566
1544
 
1567
1545
  async function dragPage(fromUid, fromSelector, fromX, fromY, toUid, toSelector, toX, toY, steps) {
@@ -1627,7 +1605,7 @@ async function dragPage(fromUid, fromSelector, fromX, fromY, toUid, toSelector,
1627
1605
  to: { x: to.x, y: to.y },
1628
1606
  steps: n,
1629
1607
  pageMutated: pageHash() !== before,
1630
- note: "Synthetic drag with HTML5 DragEvent + shared DataTransfer. isTrusted is still false.",
1608
+ note: "DOM-event drag with HTML5 DragEvent + shared DataTransfer.",
1631
1609
  };
1632
1610
  }
1633
1611
 
@@ -1677,7 +1655,7 @@ async function scrollPage(selector, uid, deltaY, deltaX, steps) {
1677
1655
  deltaX: movedX, deltaY: movedY, steps: n,
1678
1656
  scrollTop: target.scrollTop, scrollLeft: target.scrollLeft,
1679
1657
  pageMutated: pageHash() !== before,
1680
- isTrusted: false,
1658
+ input: "dom",
1681
1659
  };
1682
1660
  }
1683
1661
 
@@ -1800,18 +1778,18 @@ async function typeIntoPage(selector, uid, text, pressEnter) {
1800
1778
  const finalValue = "value" in element ? element.value : element.textContent;
1801
1779
  const valueMatches = "value" in element ? element.value.includes(text) : (element.textContent || "").includes(text);
1802
1780
  const pageMutated = pageHash() !== before;
1803
- // Smart-auto retry hint when typing didn't land at all (e.g., editor blocks synthetic input).
1804
- let suggestTrusted = false, suggestReason;
1781
+ // Smart-auto retry hint when typing didn't land at all (e.g., editor blocks DOM-event input).
1782
+ let suggestChromeInput = false, suggestReason;
1805
1783
  if (text.length > 0 && initialValue === finalValue) {
1806
- suggestTrusted = true;
1807
- suggestReason = "value did not change — editor likely rejects synthetic input";
1784
+ suggestChromeInput = true;
1785
+ suggestReason = "value did not change — editor likely rejects DOM-event input";
1808
1786
  }
1809
1787
  return {
1810
1788
  selector, uid, length: text.length, pressEnter,
1811
- isTrusted: false,
1789
+ input: "dom",
1812
1790
  valueMatches,
1813
1791
  pageMutated,
1814
- suggestTrusted: suggestTrusted || undefined,
1792
+ suggestChromeInput: suggestChromeInput || undefined,
1815
1793
  suggestReason,
1816
1794
  };
1817
1795
  }
@@ -1836,7 +1814,7 @@ function fillPage(selector, uid, text, submit) {
1836
1814
  if (submit) pressKeyInPage("Enter");
1837
1815
  return {
1838
1816
  selector, uid, length: String(text).length, submit,
1839
- isTrusted: false,
1817
+ input: "dom",
1840
1818
  valueMatches: "value" in element ? element.value === String(text) : undefined,
1841
1819
  pageMutated: pageHash() !== before,
1842
1820
  };
@@ -1890,7 +1868,7 @@ async function pressKeyInPage(key) {
1890
1868
  }
1891
1869
  return {
1892
1870
  key: normalized,
1893
- isTrusted: false,
1871
+ input: "dom",
1894
1872
  defaultPrevented: down.defaultPrevented || up.defaultPrevented,
1895
1873
  pageMutated: pageHash() !== before,
1896
1874
  };
@@ -114,7 +114,7 @@ function summarizeActionResult(result: unknown): string | undefined {
114
114
  parts.push(`occluded by <${o.tag ?? "?"}${o.id ? "#" + o.id : ""}>`);
115
115
  }
116
116
  if (r.valueMatches === false) parts.push("input value did not stick");
117
- if (r.autoplayHint) parts.push("autoplay-gated affordance — synthetic click may not start media");
117
+ if (r.autoplayHint) parts.push("autoplay-gated affordance");
118
118
  return parts.length ? parts.join("; ") : undefined;
119
119
  }
120
120
 
@@ -159,7 +159,7 @@ class ChromeProfileBridge {
159
159
 
160
160
  get connected(): boolean {
161
161
  // MV3 service workers can pause between polls/alarms. Treat a recent poll as
162
- // connected without sending a synthetic command; real chrome_* tool calls are
162
+ // connected without sending a probe command; real chrome_* tool calls are
163
163
  // the authoritative end-to-end health check.
164
164
  return this.lastSeenAt !== undefined && Date.now() - this.lastSeenAt < 5 * 60_000;
165
165
  }
@@ -436,10 +436,10 @@ export default function (pi: ExtensionAPI): void {
436
436
  Chrome control is available through the chrome_* tools via a companion Chrome extension installed in the user's normal Chrome profile. Tools target the existing signed-in profile, no CDP, no throwaway profile.
437
437
 
438
438
  Capability model (important):
439
- - Default input path is **synthetic DOM events** (\`isTrusted=false\`). Synthetic events drive React/Vue/Angular state fine, but they do NOT satisfy Chrome's user-activation gates: audio/video autoplay, clipboard write, file pickers, fullscreen, and Web Push prompts will NOT open from a synthetic chrome_click.
440
- - **Trusted escape hatch**: chrome_click / chrome_type / chrome_key / chrome_fill / chrome_hover / chrome_drag / chrome_scroll all accept \`trusted: true\`, which dispatches through chrome.debugger / CDP. Trusted events are browser-trusted (\`isTrusted=true\`) and **bypass page CSP entirely** because they're injected at the input layer, not via JS. Default mode is \`auto\`: synthetic first, silent CDP retry only when the click looks gated. If a synthetic click/type produced no \`pageMutated\` or you got a CSP/eval error from chrome_evaluate, escalate to \`trusted: true\` yourself — don't ask the user.
441
- - \`chrome_evaluate\` and \`chrome_snapshot\` run in MAIN world via the **Function constructor**, which requires \`'unsafe-eval'\` in the page CSP. Pages with strict CSP (e.g. github.com, many bank/SaaS apps) will throw \`EvalError: ... 'unsafe-eval' is not an allowed source of script\` and chrome_snapshot will return empty. On those pages, drive the page with \`chrome_screenshot\` (extension API, not gated by CSP) + \`chrome_click\`/\`chrome_type\`/\`chrome_key\` with \`trusted: true\` and viewport coordinates. \`chrome_navigate\`, \`chrome_screenshot\`, \`chrome_tab\`, and trusted input all keep working under any CSP.
442
- - Tool results include \`pageMutated\`, \`defaultPrevented\`, \`elementVisible\`, \`occludedBy\`, and (for type/fill) \`valueMatches\`. If \`pageMutated\` is false after a click that should have changed something, the click likely didn't take effect do NOT just retry the same way; either escalate to \`trusted: true\` or check the snapshot for occlusion.
439
+ - Interactive controls (click/type/fill/key/hover/drag/scroll/tap) use Chrome's real input layer via chrome.debugger / CDP. Events satisfy normal user-activation gates.
440
+ - Input bypasses page CSP because it is injected at browser input layer, not page JavaScript. Chrome may show the “Pi Chrome Connector started debugging this browser” banner while attached.
441
+ - \`chrome_evaluate\` and \`chrome_snapshot\` run in MAIN world via the **Function constructor**, which requires \`'unsafe-eval'\` in the page CSP. Pages with strict CSP (e.g. github.com, many bank/SaaS apps) will throw \`EvalError: ... 'unsafe-eval' is not an allowed source of script\` and chrome_snapshot will return empty. On those pages, drive the page with \`chrome_screenshot\` + viewport-coordinate \`chrome_click\`/\`chrome_type\`/\`chrome_key\`. \`chrome_navigate\`, \`chrome_screenshot\`, \`chrome_tab\`, and Chrome input all keep working under any CSP.
442
+ - Tool results include \`pageMutated\`, \`defaultPrevented\`, \`elementVisible\`, \`occludedBy\`, and (for type/fill) \`valueMatches\`. If an action result indicates no page change or occlusion, inspect current page state instead of repeating blindly.
443
443
 
444
444
  Usage rules:
445
445
  1. \`chrome_snapshot\` before clicking/typing; pass \`uid\` over \`selector\`.
@@ -447,7 +447,7 @@ Usage rules:
447
447
  3. If \`chrome_evaluate\` returns null when you expected a value, the expression evaluated to null/undefined in the page; surface the value via \`JSON.stringify\` to confirm.
448
448
  4. \`chrome_navigate\` supports an optional \`initScript\` that runs at document_start in MAIN world for the next navigation (good for seeding localStorage or stubbing Date.now).
449
449
  5. By default chrome_* tools focus Chrome so the user can watch; pass \`background=true\` or run /chrome quiet to silence the whole session.
450
- 6. If you hit an autoplay/clipboard/file-picker gate, tell the user; this bridge cannot satisfy it. (Generic clicks/typing/CSP gates are fine escalate to \`trusted: true\`.)
450
+ 6. If you hit a native file-picker or privileged browser prompt gate, tell the user; generic clicks/typing/CSP gates are handled by Chrome input.
451
451
  7. Run /chrome doctor when in doubt about connectivity or capabilities.
452
452
  </chrome-profile-bridge>`;
453
453
  return { systemPrompt: event.systemPrompt + primer };
@@ -514,102 +514,9 @@ Usage rules:
514
514
  lines.push(`… Skipped the remaining checks until you reload the Chrome extension.`);
515
515
  }
516
516
 
517
- // Real-input mode probe (plain English for the user).
518
- if (extensionAlive && !versionMismatch) {
519
- try {
520
- const status = (await bridge.send("trusted.status", {}, 5_000)) as {
521
- mode?: string;
522
- attachedTabs?: number[];
523
- permissionGranted?: boolean;
524
- };
525
- if (status.permissionGranted) {
526
- const banner = status.attachedTabs && status.attachedTabs.length ? ` (‘Pi Chrome Connector started debugging this browser’ banner up on ${status.attachedTabs.length} tab(s))` : "";
527
- const note =
528
- status.mode === "auto"
529
- ? " Clicks/keys are quiet by default; if a site rejects a quiet click, pi-chrome retries it once with a real-looking click. The Chrome banner shows only when that retry happens."
530
- : status.mode === "on"
531
- ? " Every click and keystroke uses a real-looking event. The Chrome banner stays up on every tab pi-chrome touches."
532
- : " All clicks are quiet, no banner. Some sites (sign-ins, copy buttons, file pickers, paywalls) may silently ignore them. Run /chrome clicks if a site isn’t responding.";
533
- const label = status.mode === "auto" ? "auto (smart upgrade)" : status.mode === "on" ? "on (always real-looking)" : "off (always quiet)";
534
- lines.push(`✓ Click mode: ${label}${banner}.${note}`);
535
- } else {
536
- lines.push(`⚠ Can't send real-looking clicks yet — the companion extension is missing a permission. Open chrome://extensions, click reload on 'Pi Chrome Connector', and accept the new permission prompt.`);
537
- }
538
- } catch (error) {
539
- lines.push(`⚠ Couldn't check click mode: ${(error as Error).message}`);
540
- }
541
- }
542
-
543
517
  ctx.ui.notify(lines.join("\n"), "info");
544
518
  };
545
519
 
546
- // Click realism handler. With no args, cycles auto → on → off → auto. Explicit args jump
547
- // directly. 'status' prints the current mode without changing it.
548
- const CLICKS_CYCLE = ["auto", "on", "off"] as const;
549
- const CLICKS_DESC: Record<string, string> = {
550
- auto: "Quiet by default; pi-chrome retries once with a real-looking click if a site rejects the quiet one. The Chrome banner appears only when that retry happens.",
551
- off: "All clicks are quiet, no banner. Some sites (sign-ins, copy buttons, file pickers, paywalls) may silently ignore these clicks.",
552
- on: "Every click and keystroke looks real to websites. Chrome shows a 'Pi Chrome Connector started debugging this browser' banner on every tab pi-chrome touches.",
553
- };
554
- const CLICKS_LABEL: Record<string, string> = {
555
- auto: "auto (smart upgrade)",
556
- off: "off (always quiet)",
557
- on: "on (always real-looking)",
558
- };
559
-
560
- const trustedHandler = async (ctx: ExtensionContext, args: string) => {
561
- const rawArg = (args || "").trim().toLowerCase();
562
-
563
- let status: { mode: string; attachedTabs: number[]; permissionGranted: boolean } | undefined;
564
- try {
565
- status = (await bridge.send("trusted.status", {}, 5_000)) as typeof status;
566
- } catch (error) {
567
- ctx.ui.notify(`Couldn't check current click mode: ${(error as Error).message}`, "warning");
568
- return;
569
- }
570
- if (!status) return;
571
-
572
- if (!status.permissionGranted) {
573
- ctx.ui.notify(
574
- "pi-chrome can't drive real-looking clicks yet — the companion extension is missing a permission. Open chrome://extensions, click reload on 'Pi Chrome Connector', and accept the new permission prompt that appears.",
575
- "warning",
576
- );
577
- return;
578
- }
579
-
580
- const current = status.mode;
581
- const attached = status.attachedTabs?.length ? ` (banner up on ${status.attachedTabs.length} tab(s))` : "";
582
-
583
- if (rawArg === "status") {
584
- ctx.ui.notify(`Click mode is ${CLICKS_LABEL[current] ?? current}${attached}. ${CLICKS_DESC[current] ?? ""}`, "info");
585
- return;
586
- }
587
-
588
- // No argument = cycle to the next mode.
589
- let target = rawArg;
590
- if (!target) {
591
- const idx = CLICKS_CYCLE.indexOf(current as typeof CLICKS_CYCLE[number]);
592
- target = CLICKS_CYCLE[(idx + 1 + CLICKS_CYCLE.length) % CLICKS_CYCLE.length];
593
- }
594
-
595
- if (!["on", "off", "auto"].includes(target)) {
596
- ctx.ui.notify(`Unknown click mode '${rawArg}'. Pick one of: auto | off | on | status.`, "warning");
597
- return;
598
- }
599
-
600
- if (target === current) {
601
- ctx.ui.notify(`Click mode is already ${CLICKS_LABEL[current] ?? current}.`, "info");
602
- return;
603
- }
604
-
605
- try {
606
- await bridge.send("trusted.mode", { mode: target }, 5_000);
607
- ctx.ui.notify(`Click mode → ${CLICKS_LABEL[target] ?? target}. ${CLICKS_DESC[target] ?? ""}`, "info");
608
- } catch (error) {
609
- ctx.ui.notify(`Couldn't switch click mode: ${(error as Error).message}`, "warning");
610
- }
611
- };
612
-
613
520
  // Quiet (Chrome focus) handler. No args = toggle. Explicit on/off/status.
614
521
  const QUIET_DESC: Record<string, string> = {
615
522
  on: "pi-chrome works in the background; Chrome won't pop up or steal focus.",
@@ -672,11 +579,6 @@ Usage rules:
672
579
  } catch {
673
580
  parts.push(`✗ Chrome not responding`);
674
581
  }
675
- try {
676
- const t = (await bridge.send("trusted.status", {}, 3_000)) as { mode?: string; attachedTabs?: number[] };
677
- const banner = t.attachedTabs?.length ? `, banner on ${t.attachedTabs.length} tab(s)` : "";
678
- parts.push(`clicks: ${t.mode ?? "?"}${banner}`);
679
- } catch {}
680
582
  parts.push(`quiet: ${backgroundDefault ? "on" : "off"}`);
681
583
  return parts.join(" · ");
682
584
  };
@@ -689,24 +591,6 @@ Usage rules:
689
591
  // the last value also saves; Esc / 'q' closes. The description below changes with the
690
592
  // current value so users always see what the active setting means.
691
593
  const openSettingsDialog = async (ctx: ExtensionContext): Promise<void> => {
692
- // Read current click mode (might fail if extension permission missing).
693
- let clicksMode: string = "auto";
694
- let permissionGranted = false;
695
- try {
696
- const t = (await bridge.send("trusted.status", {}, 5_000)) as { mode?: string; permissionGranted?: boolean };
697
- clicksMode = t.mode ?? "auto";
698
- permissionGranted = !!t.permissionGranted;
699
- } catch {}
700
-
701
- const clicksItem: SettingItem = {
702
- id: "clicks",
703
- label: "Click realism",
704
- currentValue: clicksMode,
705
- values: ["auto", "on", "off"],
706
- description: permissionGranted
707
- ? (CLICKS_DESC[clicksMode] ?? "")
708
- : "Real-looking clicks unavailable: reload the Chrome extension in chrome://extensions and accept the new permission prompt.",
709
- };
710
594
  const quietItem: SettingItem = {
711
595
  id: "quiet",
712
596
  label: "Quiet mode",
@@ -714,7 +598,7 @@ Usage rules:
714
598
  values: ["on", "off"],
715
599
  description: QUIET_DESC[backgroundDefault ? "on" : "off"] ?? "",
716
600
  };
717
- const items: SettingItem[] = [clicksItem, quietItem];
601
+ const items: SettingItem[] = [quietItem];
718
602
 
719
603
  await ctx.ui.custom<void>((_tui, theme, _kb, done) => {
720
604
  const container = new Container();
@@ -727,21 +611,7 @@ Usage rules:
727
611
  Math.min(items.length + 2, 8),
728
612
  getSettingsListTheme(),
729
613
  (id, newValue) => {
730
- if (id === "clicks") {
731
- if (!permissionGranted) {
732
- ctx.ui.notify("Click mode locked: reload the Chrome extension first.", "warning");
733
- // Revert by snapping back to the previous value.
734
- list.updateValue("clicks", clicksItem.currentValue);
735
- return;
736
- }
737
- // Mutate description so the help text matches the new value.
738
- clicksItem.currentValue = newValue;
739
- clicksItem.description = CLICKS_DESC[newValue] ?? "";
740
- list.invalidate();
741
- void bridge.send("trusted.mode", { mode: newValue }, 5_000).catch((err) => {
742
- ctx.ui.notify(`Couldn't switch click mode: ${(err as Error).message}`, "warning");
743
- });
744
- } else if (id === "quiet") {
614
+ if (id === "quiet") {
745
615
  backgroundDefault = newValue === "on";
746
616
  quietItem.currentValue = newValue;
747
617
  quietItem.description = QUIET_DESC[newValue] ?? "";
@@ -762,7 +632,7 @@ Usage rules:
762
632
 
763
633
  pi.registerCommand("chrome", {
764
634
  description:
765
- "All pi-chrome controls in one place.\n /chrome status — one-line snapshot of connection + current modes.\n /chrome doctor — full health check.\n /chrome onboard — install the Chrome companion extension.\n /chrome clicks [auto|off|on|status] — how realistic should pi-chrome's clicks be.\n /chrome quiet [on|off|status|toggle] — whether Chrome pops to the front when pi-chrome acts.\nRun with no arguments for an interactive picker that shows current state.",
635
+ "All pi-chrome controls in one place.\n /chrome status — one-line snapshot of connection + quiet mode.\n /chrome doctor — full health check.\n /chrome onboard — install the Chrome companion extension.\n /chrome quiet [on|off|status|toggle] — whether Chrome pops to the front when pi-chrome acts.\nRun with no arguments for an interactive picker that shows current state.",
766
636
  getArgumentCompletions: (prefix) => {
767
637
  const raw = prefix;
768
638
  const trimmedRight = raw.replace(/\s+$/, "");
@@ -779,19 +649,11 @@ Usage rules:
779
649
  let candidates: Item[] = [];
780
650
  if (path.length === 0) {
781
651
  candidates = [
782
- { fullValue: "status", label: "status", description: "One-line summary: connection + click mode + quiet mode." },
652
+ { fullValue: "status", label: "status", description: "One-line summary: connection + quiet mode." },
783
653
  { fullValue: "doctor", label: "doctor", description: "Full health check. Tells you if Chrome is connected and what's wrong if it isn't." },
784
654
  { fullValue: "onboard", label: "onboard", description: "Install the Chrome companion extension (first-time setup)." },
785
- { fullValue: "clicks", label: "clicks", description: "How realistic should pi-chrome's clicks be? auto / off / on." },
786
655
  { fullValue: "quiet", label: "quiet", description: "Should Chrome pop to the front when pi-chrome acts, or work silently?" },
787
656
  ];
788
- } else if (path[0] === "clicks" && path.length === 1) {
789
- candidates = [
790
- { fullValue: "clicks auto", label: "auto", description: "Default. Quiet clicks; upgrade to real-looking ones only when a site rejects them." },
791
- { fullValue: "clicks off", label: "off", description: "Always quiet. No banner. Some sites won't accept the clicks." },
792
- { fullValue: "clicks on", label: "on", description: "Always real-looking. Chrome shows a banner. Best for stubborn sites." },
793
- { fullValue: "clicks status", label: "status", description: "Show the current click mode." },
794
- ];
795
657
  } else if (path[0] === "quiet" && path.length === 1) {
796
658
  candidates = [
797
659
  { fullValue: "quiet on", label: "on", description: "Work silently. Chrome stays in the background. Your editor keeps focus." },
@@ -817,22 +679,18 @@ Usage rules:
817
679
  case "status": return statusHandler(ctx);
818
680
  case "doctor": return doctorHandler(ctx);
819
681
  case "onboard": return onboardHandler(ctx);
820
- case "clicks":
821
- case "trusted": // legacy alias
822
- return trustedHandler(ctx, subArgs);
823
682
  case "quiet":
824
683
  case "background": // legacy alias
825
684
  return backgroundHandler(ctx, subArgs);
826
685
  case "settings": {
827
- // Legacy nested form: /chrome settings background ... or /chrome settings trusted ...
686
+ // Legacy nested form: /chrome settings background ...
828
687
  const [setting, ...settingArgs] = rest;
829
688
  if (setting === "background") return backgroundHandler(ctx, settingArgs.join(" "));
830
- if (setting === "trusted") return trustedHandler(ctx, settingArgs.join(" "));
831
- ctx.ui.notify(`'/chrome settings' was removed. Use /chrome clicks or /chrome quiet directly.`, "warning");
689
+ ctx.ui.notify(`'/chrome settings' was removed. Use /chrome quiet directly.`, "warning");
832
690
  return;
833
691
  }
834
692
  default:
835
- ctx.ui.notify(`Unknown subcommand '${head}'. Try: /chrome status | doctor | onboard | clicks | quiet.`, "warning");
693
+ ctx.ui.notify(`Unknown subcommand '${head}'. Try: /chrome status | doctor | onboard | quiet.`, "warning");
836
694
  }
837
695
  },
838
696
  });
@@ -986,7 +844,7 @@ Usage rules:
986
844
  name: "chrome_click",
987
845
  label: "Chrome Click",
988
846
  description:
989
- "Click a snapshot uid, CSS selector, or viewport coordinate. Default 'auto' mode runs synthetic DOM events first and silently retries with trusted CDP only when the click looks gated (no page change + affordance label matches play/copy/share/sign-in/etc, or a recent NotAllowedError). The 'started debugging' banner appears only when the retry actually happens. Pass trusted=true to force CDP for this call (banner appears immediately). Pass trusted=false to skip retry. Pass includeSnapshot=true to return a fresh snapshot after the click.",
847
+ "Click a snapshot uid, CSS selector, or viewport coordinate using Chrome's real input layer. Pass includeSnapshot=true to return a fresh snapshot after the click.",
990
848
  promptSnippet: "Click page elements in Chrome by snapshot uid, selector, or viewport coordinate.",
991
849
  parameters: Type.Object({
992
850
  uid: Type.Optional(Type.String({ description: "Stable element uid from chrome_snapshot. Prefer uid over selector after taking a snapshot." })),
@@ -1001,7 +859,6 @@ Usage rules:
1001
859
  background: Type.Optional(
1002
860
  Type.Boolean({ description: "If true, click silently without focusing Chrome. Default false." }),
1003
861
  ),
1004
- trusted: Type.Optional(Type.Boolean({ description: "If true, dispatch through chrome.debugger / CDP so the event is browser-trusted (isTrusted=true, user-activation satisfied). Triggers Chrome's 'started debugging this browser' banner." })),
1005
862
  host: Type.Optional(Type.String()),
1006
863
  port: Type.Optional(Type.Number()),
1007
864
  }),
@@ -1019,7 +876,7 @@ Usage rules:
1019
876
  name: "chrome_type",
1020
877
  label: "Chrome Type",
1021
878
  description:
1022
- "Focus an optional snapshot uid or CSS selector, then type text. Default 'auto' mode runs synthetic per-character keydown/beforeinput/input/keyup first; if the input value doesn't change at all (editor rejected synthetic input) the call is silently retried through chrome.debugger so each keystroke is browser-trusted (isTrusted=true). Pass trusted=true to force CDP for this call. Pass trusted=false to skip retry. Pass includeSnapshot=true to return a fresh snapshot after typing.",
879
+ "Focus an optional snapshot uid or CSS selector, then type text using Chrome's real keyboard input. Pass includeSnapshot=true to return a fresh snapshot after typing.",
1023
880
  promptSnippet: "Type text into Chrome, optionally focusing a snapshot uid or selector first.",
1024
881
  parameters: Type.Object({
1025
882
  text: Type.String(),
@@ -1034,7 +891,6 @@ Usage rules:
1034
891
  background: Type.Optional(
1035
892
  Type.Boolean({ description: "If true, type silently without focusing Chrome. Default false." }),
1036
893
  ),
1037
- trusted: Type.Optional(Type.Boolean({ description: "If true, dispatch through chrome.debugger / CDP so each keystroke is browser-trusted. Triggers Chrome's debugger banner." })),
1038
894
  host: Type.Optional(Type.String()),
1039
895
  port: Type.Optional(Type.Number()),
1040
896
  }),
@@ -1052,7 +908,7 @@ Usage rules:
1052
908
  name: "chrome_fill",
1053
909
  label: "Chrome Fill",
1054
910
  description:
1055
- "Set the full value of a text input, textarea, or contenteditable element using framework-aware native value setters and input/change events. Accepts a snapshot uid or CSS selector. Pass includeSnapshot=true to verify after filling.",
911
+ "Set the full value of a text input, textarea, or contenteditable element using Chrome click/select/delete/type input. Accepts a snapshot uid or CSS selector. Pass includeSnapshot=true to verify after filling.",
1056
912
  promptSnippet: "Fill a Chrome form field by snapshot uid or selector, optionally returning a fresh snapshot.",
1057
913
  parameters: Type.Object({
1058
914
  text: Type.String(),
@@ -1067,7 +923,6 @@ Usage rules:
1067
923
  background: Type.Optional(
1068
924
  Type.Boolean({ description: "If true, fill silently without focusing Chrome. Default false." }),
1069
925
  ),
1070
- trusted: Type.Optional(Type.Boolean({ description: "If true, dispatch through chrome.debugger / CDP for browser-trusted input. Triggers Chrome's debugger banner." })),
1071
926
  host: Type.Optional(Type.String()),
1072
927
  port: Type.Optional(Type.Number()),
1073
928
  }),
@@ -1094,7 +949,7 @@ Usage rules:
1094
949
  ctrlKey: Type.Optional(Type.Boolean()),
1095
950
  altKey: Type.Optional(Type.Boolean()),
1096
951
  metaKey: Type.Optional(Type.Boolean()),
1097
- }, { description: "Modifier keys to hold while pressing the key (chord). Only honoured for trusted-mode presses; synthetic path ignores." })),
952
+ }, { description: "Modifier keys to hold while pressing the key (chord)." })),
1098
953
  includeSnapshot: Type.Optional(Type.Boolean({ description: "If true, include a fresh chrome_snapshot result after the keypress." })),
1099
954
  maxElements: Type.Optional(Type.Number({ default: MAX_ELEMENTS, description: "Max elements in the included snapshot." })),
1100
955
  targetId: Type.Optional(Type.String()),
@@ -1103,7 +958,6 @@ Usage rules:
1103
958
  background: Type.Optional(
1104
959
  Type.Boolean({ description: "If true, send the key silently without focusing Chrome. Default false." }),
1105
960
  ),
1106
- trusted: Type.Optional(Type.Boolean({ description: "If true, dispatch through chrome.debugger / CDP so the keystroke is browser-trusted." })),
1107
961
  host: Type.Optional(Type.String()),
1108
962
  port: Type.Optional(Type.Number()),
1109
963
  }),
@@ -1263,7 +1117,7 @@ Usage rules:
1263
1117
  pi.registerTool({
1264
1118
  name: "chrome_hover",
1265
1119
  label: "Chrome Hover",
1266
- description: "Hover over an element (synthetic pointerover/mouseover/pointermove) by uid, selector, or x/y. Triggers CSS :hover state and any JS hover handlers; isTrusted is false.",
1120
+ description: "Hover over an element by uid, selector, or x/y using Chrome pointer movement.",
1267
1121
  promptSnippet: "Hover a Chrome element to trigger :hover / mouseover handlers.",
1268
1122
  parameters: Type.Object({
1269
1123
  uid: Type.Optional(Type.String()),
@@ -1274,7 +1128,6 @@ Usage rules:
1274
1128
  urlIncludes: Type.Optional(Type.String()),
1275
1129
  titleIncludes: Type.Optional(Type.String()),
1276
1130
  background: Type.Optional(Type.Boolean()),
1277
- trusted: Type.Optional(Type.Boolean({ description: "If true, dispatch through chrome.debugger / CDP for browser-trusted hover." })),
1278
1131
  }),
1279
1132
  async execute(_id, params): Promise<ToolTextResult> {
1280
1133
  const result = await bridge.send("page.hover", withBackground(params), DEFAULT_TIMEOUT_MS);
@@ -1285,7 +1138,7 @@ Usage rules:
1285
1138
  pi.registerTool({
1286
1139
  name: "chrome_drag",
1287
1140
  label: "Chrome Drag",
1288
- description: "Synthetic drag from one uid/selector/point to another. Dispatches pointerdown humanised pointermove path → dragstart/drag/dragenter/dragover/dragleave/drop/dragend with a shared HTML5 DataTransfer, then pointerup. isTrusted=false.",
1141
+ description: "Drag from one uid/selector/point to another using Chrome pointer input.",
1289
1142
  promptSnippet: "Drag a Chrome element from one point to another.",
1290
1143
  parameters: Type.Object({
1291
1144
  fromUid: Type.Optional(Type.String()),
@@ -1301,7 +1154,6 @@ Usage rules:
1301
1154
  urlIncludes: Type.Optional(Type.String()),
1302
1155
  titleIncludes: Type.Optional(Type.String()),
1303
1156
  background: Type.Optional(Type.Boolean()),
1304
- trusted: Type.Optional(Type.Boolean({ description: "If true, dispatch through chrome.debugger / CDP so the drag is browser-trusted (real HTML5 dragstart/drop with native DataTransfer)." })),
1305
1157
  }),
1306
1158
  async execute(_id, params): Promise<ToolTextResult> {
1307
1159
  const result = await bridge.send("page.drag", withBackground(params), DEFAULT_TIMEOUT_MS);
@@ -1313,7 +1165,7 @@ Usage rules:
1313
1165
  name: "chrome_tap",
1314
1166
  label: "Chrome Tap (Touch)",
1315
1167
  description:
1316
- "Dispatch a real browser-trusted touchstart/touchend tap via chrome.debugger (CDP Input.dispatchTouchEvent). Use for sites that gate on TouchEvent rather than MouseEvent (mobile-first PWAs, swipe carousels). Always uses the trusted CDP path — the 'started debugging' banner appears.",
1168
+ "Dispatch a real touchstart/touchend tap through Chrome's input layer. Use for sites that gate on TouchEvent rather than MouseEvent (mobile-first PWAs, swipe carousels). Chrome may show its debugging banner while attached.",
1317
1169
  promptSnippet: "Tap (real touch) a Chrome element by snapshot uid, selector, or coordinate.",
1318
1170
  parameters: Type.Object({
1319
1171
  uid: Type.Optional(Type.String()),
@@ -1347,7 +1199,6 @@ Usage rules:
1347
1199
  urlIncludes: Type.Optional(Type.String()),
1348
1200
  titleIncludes: Type.Optional(Type.String()),
1349
1201
  background: Type.Optional(Type.Boolean()),
1350
- trusted: Type.Optional(Type.Boolean({ description: "If true, dispatch wheel events through chrome.debugger / CDP for browser-trusted scrolling." })),
1351
1202
  }),
1352
1203
  async execute(_id, params): Promise<ToolTextResult> {
1353
1204
  const result = await bridge.send("page.scroll", withBackground(params), DEFAULT_TIMEOUT_MS);
@@ -1358,7 +1209,7 @@ Usage rules:
1358
1209
  pi.registerTool({
1359
1210
  name: "chrome_upload_file",
1360
1211
  label: "Chrome Upload File",
1361
- description: "Programmatically set the files of an <input type=file> element from local file paths. Uses DataTransfer to populate input.files and dispatches input+change events. Does NOT open the native file picker; works with React/Vue/Angular controlled inputs.",
1212
+ description: "Attach local files to an <input type=file> element using Chrome DevTools file-input control. Does NOT open the native file picker; works with React/Vue/Angular controlled inputs.",
1362
1213
  promptSnippet: "Attach local files to a Chrome <input type=file> without opening the native file picker.",
1363
1214
  parameters: Type.Object({
1364
1215
  uid: Type.Optional(Type.String()),
@@ -1370,17 +1221,10 @@ Usage rules:
1370
1221
  background: Type.Optional(Type.Boolean()),
1371
1222
  }),
1372
1223
  async execute(_id, params, _signal, _onUpdate, ctx): Promise<ToolTextResult> {
1373
- const { readFile } = await import("node:fs/promises");
1374
- const { basename } = await import("node:path");
1375
1224
  const cwd = workspaceCwd(ctx);
1376
- const files: Array<{ name: string; type: string; base64: string }> = [];
1377
- for (const p of params.paths) {
1378
- const abs = resolve(cwd, p);
1379
- const buf = await readFile(abs);
1380
- files.push({ name: basename(abs), type: "application/octet-stream", base64: buf.toString("base64") });
1381
- }
1382
- const result = await bridge.send("page.upload", withBackground({ ...params, files }), DEFAULT_TIMEOUT_MS);
1383
- return { content: [{ type: "text", text: `Uploaded ${files.length} file(s) to ${params.uid ?? params.selector}` }], details: { result: result as Json } };
1225
+ const paths = params.paths.map((p) => resolve(cwd, p));
1226
+ const result = await bridge.send("page.upload", withBackground({ ...params, paths }), DEFAULT_TIMEOUT_MS);
1227
+ return { content: [{ type: "text", text: `Uploaded ${paths.length} file(s) to ${params.uid ?? params.selector}` }], details: { result: result as Json } };
1384
1228
  },
1385
1229
  });
1386
1230
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "pi-chrome",
3
- "version": "0.15.1",
3
+ "version": "0.15.3",
4
4
  "scripts": {
5
5
  "version": "node scripts/sync-manifest-version.js",
6
6
  "prepublishOnly": "node scripts/sync-manifest-version.js"
@@ -36,7 +36,10 @@
36
36
  "stagehand-alternative"
37
37
  ],
38
38
  "license": "MIT",
39
- "author": { "name": "tianrendong", "company": "Earendil Inc." },
39
+ "author": {
40
+ "name": "tianrendong",
41
+ "company": "Earendil Inc."
42
+ },
40
43
  "homepage": "https://github.com/tianrendong/pi-chrome#readme",
41
44
  "repository": {
42
45
  "type": "git",