npm - playwriter - Versions diffs - 0.0.63 → 0.0.89 - Mend

playwriter 0.0.63 → 0.0.89

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (223) hide show

package/dist/a11y-client.js +18 -8
package/dist/aria-snapshot.d.ts +41 -3
package/dist/aria-snapshot.d.ts.map +1 -1
package/dist/aria-snapshot.js +134 -55
package/dist/aria-snapshot.js.map +1 -1
package/dist/aria-snapshot.test.js +5 -2
package/dist/aria-snapshot.test.js.map +1 -1
package/dist/aria-snapshot.unit.test.js +83 -41
package/dist/aria-snapshot.unit.test.js.map +1 -1
package/dist/assets/cursors/screen-studio/pointer-macos-tahoe-data-url.d.ts +5 -0
package/dist/assets/cursors/screen-studio/pointer-macos-tahoe-data-url.d.ts.map +1 -0
package/dist/assets/cursors/screen-studio/pointer-macos-tahoe-data-url.js +5 -0
package/dist/assets/cursors/screen-studio/pointer-macos-tahoe-data-url.js.map +1 -0
package/dist/bippy.js +1 -1
package/dist/cdp-log.d.ts +1 -1
package/dist/cdp-log.d.ts.map +1 -1
package/dist/cdp-log.js +1 -1
package/dist/cdp-log.js.map +1 -1
package/dist/cdp-relay.d.ts.map +1 -1
package/dist/cdp-relay.js +492 -298
package/dist/cdp-relay.js.map +1 -1
package/dist/cdp-session.d.ts.map +1 -1
package/dist/cdp-session.js.map +1 -1
package/dist/cdp-types.d.ts.map +1 -1
package/dist/cdp-types.js +7 -7
package/dist/cdp-types.js.map +1 -1
package/dist/clean-html.d.ts.map +1 -1
package/dist/clean-html.js +4 -5
package/dist/clean-html.js.map +1 -1
package/dist/cli.js +45 -27
package/dist/cli.js.map +1 -1
package/dist/create-logger.d.ts.map +1 -1
package/dist/create-logger.js +3 -1
package/dist/create-logger.js.map +1 -1
package/dist/debugger-examples-types.d.ts.map +1 -1
package/dist/debugger.d.ts.map +1 -1
package/dist/debugger.js +1 -3
package/dist/debugger.js.map +1 -1
package/dist/diff-utils.d.ts.map +1 -1
package/dist/diff-utils.js +1 -4
package/dist/diff-utils.js.map +1 -1
package/dist/editor-api.md +12 -2
package/dist/editor-examples.d.ts +1 -1
package/dist/editor-examples.d.ts.map +1 -1
package/dist/editor-examples.js +1 -1
package/dist/editor-examples.js.map +1 -1
package/dist/editor.d.ts +1 -1
package/dist/editor.d.ts.map +1 -1
package/dist/editor.js +1 -1
package/dist/editor.js.map +1 -1
package/dist/executor.d.ts +26 -3
package/dist/executor.d.ts.map +1 -1
package/dist/executor.js +297 -64
package/dist/executor.js.map +1 -1
package/dist/executor.unit.test.js +38 -1
package/dist/executor.unit.test.js.map +1 -1
package/dist/extension-connection.test.js +139 -36
package/dist/extension-connection.test.js.map +1 -1
package/dist/ffmpeg.d.ts +148 -0
package/dist/ffmpeg.d.ts.map +1 -0
package/dist/ffmpeg.js +523 -0
package/dist/ffmpeg.js.map +1 -0
package/dist/ghost-browser.d.ts.map +1 -1
package/dist/ghost-browser.js.map +1 -1
package/dist/ghost-cursor-client.js +287 -0
package/dist/ghost-cursor.d.ts +27 -0
package/dist/ghost-cursor.d.ts.map +1 -0
package/dist/ghost-cursor.js +63 -0
package/dist/ghost-cursor.js.map +1 -0
package/dist/htmlrewrite.d.ts.map +1 -1
package/dist/htmlrewrite.js +17 -55
package/dist/htmlrewrite.js.map +1 -1
package/dist/htmlrewrite.test.js.map +1 -1
package/dist/kill-port.d.ts.map +1 -1
package/dist/kill-port.js +1 -3
package/dist/kill-port.js.map +1 -1
package/dist/locator-selector.test.d.ts +2 -0
package/dist/locator-selector.test.d.ts.map +1 -0
package/dist/locator-selector.test.js +96 -0
package/dist/locator-selector.test.js.map +1 -0
package/dist/mcp-client.js.map +1 -1
package/dist/mcp.d.ts.map +1 -1
package/dist/mcp.js +8 -3
package/dist/mcp.js.map +1 -1
package/dist/on-mouse-action.test.d.ts +2 -0
package/dist/on-mouse-action.test.d.ts.map +1 -0
package/dist/on-mouse-action.test.js +155 -0
package/dist/on-mouse-action.test.js.map +1 -0
package/dist/page-markdown.js +4 -4
package/dist/page-markdown.js.map +1 -1
package/dist/prompt.md +450 -377
package/dist/protocol.d.ts +4 -0
package/dist/protocol.d.ts.map +1 -1
package/dist/readability.js +16 -2
package/dist/recording-ghost-cursor.d.ts +41 -0
package/dist/recording-ghost-cursor.d.ts.map +1 -0
package/dist/recording-ghost-cursor.js +79 -0
package/dist/recording-ghost-cursor.js.map +1 -0
package/dist/recording-relay.d.ts.map +1 -1
package/dist/recording-relay.js +8 -8
package/dist/recording-relay.js.map +1 -1
package/dist/relay-client.d.ts +17 -4
package/dist/relay-client.d.ts.map +1 -1
package/dist/relay-client.js +45 -11
package/dist/relay-client.js.map +1 -1
package/dist/relay-core.test.d.ts.map +1 -1
package/dist/relay-core.test.js +515 -26
package/dist/relay-core.test.js.map +1 -1
package/dist/relay-navigation.test.d.ts.map +1 -1
package/dist/relay-navigation.test.js +169 -31
package/dist/relay-navigation.test.js.map +1 -1
package/dist/relay-session.test.d.ts.map +1 -1
package/dist/relay-session.test.js +113 -65
package/dist/relay-session.test.js.map +1 -1
package/dist/relay-state.d.ts +158 -0
package/dist/relay-state.d.ts.map +1 -0
package/dist/relay-state.js +306 -0
package/dist/relay-state.js.map +1 -0
package/dist/relay-state.test.d.ts +2 -0
package/dist/relay-state.test.d.ts.map +1 -0
package/dist/relay-state.test.js +472 -0
package/dist/relay-state.test.js.map +1 -0
package/dist/scoped-fs.d.ts.map +1 -1
package/dist/scoped-fs.js.map +1 -1
package/dist/screen-recording.d.ts +66 -4
package/dist/screen-recording.d.ts.map +1 -1
package/dist/screen-recording.js +150 -13
package/dist/screen-recording.js.map +1 -1
package/dist/screen-recording.test.d.ts +2 -0
package/dist/screen-recording.test.d.ts.map +1 -0
package/dist/screen-recording.test.js +102 -0
package/dist/screen-recording.test.js.map +1 -0
package/dist/selector-generator.js +1 -1
package/dist/snapshot-tools.test.js +71 -28
package/dist/snapshot-tools.test.js.map +1 -1
package/dist/start-relay-server.d.ts +1 -1
package/dist/start-relay-server.d.ts.map +1 -1
package/dist/start-relay-server.js +1 -1
package/dist/start-relay-server.js.map +1 -1
package/dist/styles-api.md +8 -1
package/dist/styles-examples.d.ts +1 -1
package/dist/styles-examples.d.ts.map +1 -1
package/dist/styles-examples.js +1 -1
package/dist/styles-examples.js.map +1 -1
package/dist/styles.d.ts.map +1 -1
package/dist/styles.js +1 -3
package/dist/styles.js.map +1 -1
package/dist/test-declarations.d.ts.map +1 -1
package/dist/test-utils.d.ts +1 -1
package/dist/test-utils.d.ts.map +1 -1
package/dist/test-utils.js +7 -5
package/dist/test-utils.js.map +1 -1
package/dist/utils.d.ts.map +1 -1
package/dist/utils.js.map +1 -1
package/dist/wait-for-page-load.d.ts.map +1 -1
package/dist/wait-for-page-load.js +1 -1
package/dist/wait-for-page-load.js.map +1 -1
package/package.json +4 -3
package/src/a11y-client.ts +5 -4
package/src/aria-snapshot.test.ts +5 -2
package/src/aria-snapshot.ts +306 -117
package/src/aria-snapshot.unit.test.ts +199 -141
package/src/aria-snapshots/github-interactive.txt +2 -0
package/src/aria-snapshots/github-raw.txt +5 -1
package/src/aria-snapshots/hackernews-interactive.txt +238 -241
package/src/aria-snapshots/hackernews-raw.txt +265 -269
package/src/assets/aria-labels-example.png +0 -0
package/src/assets/aria-labels-github.png +0 -0
package/src/assets/aria-labels-hacker-news.png +0 -0
package/src/assets/aria-labels-old-reddit.png +0 -0
package/src/assets/cursors/screen-studio/pointer-macos-tahoe-data-url.ts +5 -0
package/src/assets/cursors/screen-studio/pointer-macos-tahoe.svg +18 -0
package/src/cdp-log.ts +4 -1
package/src/cdp-relay.ts +1059 -737
package/src/cdp-session.ts +12 -3
package/src/cdp-types.ts +51 -51
package/src/clean-html.ts +4 -5
package/src/cli.ts +82 -55
package/src/create-logger.ts +5 -3
package/src/debugger-examples-types.ts +4 -1
package/src/debugger.ts +1 -5
package/src/diff-utils.ts +2 -5
package/src/editor-examples.ts +11 -1
package/src/editor.ts +10 -2
package/src/executor.ts +374 -73
package/src/executor.unit.test.ts +48 -1
package/src/extension-connection.test.ts +612 -488
package/src/ffmpeg.ts +769 -0
package/src/ghost-browser.ts +4 -6
package/src/ghost-cursor-client.ts +369 -0
package/src/ghost-cursor.ts +110 -0
package/src/htmlrewrite.test.ts +6 -2
package/src/htmlrewrite.ts +348 -386
package/src/kill-port.ts +1 -3
package/src/locator-selector.test.ts +115 -0
package/src/mcp-client.ts +1 -1
package/src/mcp.ts +21 -15
package/src/on-mouse-action.test.ts +196 -0
package/src/page-markdown.ts +7 -7
package/src/protocol.ts +73 -57
package/src/recording-ghost-cursor.ts +113 -0
package/src/recording-relay.ts +20 -12
package/src/relay-client.ts +85 -18
package/src/relay-core.test.ts +1117 -578
package/src/relay-navigation.test.ts +648 -483
package/src/relay-session.test.ts +984 -929
package/src/relay-state.test.ts +570 -0
package/src/relay-state.ts +497 -0
package/src/resource.md +21 -49
package/src/scoped-fs.ts +9 -3
package/src/screen-recording.test.ts +111 -0
package/src/screen-recording.ts +256 -31
package/src/skill.md +476 -396
package/src/snapshot-tools.test.ts +580 -528
package/src/snapshots/shadcn-ui-accessibility-full.md +8 -8
package/src/snapshots/shadcn-ui-accessibility-interactive.md +8 -8
package/src/start-relay-server.ts +14 -11
package/src/styles-examples.ts +8 -1
package/src/styles.ts +20 -21
package/src/test-declarations.ts +6 -6
package/src/test-utils.ts +104 -91
package/src/utils.ts +2 -1
package/src/wait-for-page-load.ts +6 -1

package/src/skill.md CHANGED Viewed

@@ -14,6 +14,7 @@ If using npx or bunx always use @latest for the first session command. so we are
 ### Session management
 Each session runs in an **isolated sandbox** with its own `state` object. Use sessions to:
 - Keep state separate between different tasks or agents
 - Persist data (pages, variables) across multiple execute calls
 - Avoid interference when multiple agents use playwriter simultaneously
@@ -51,49 +52,55 @@ playwriter -s <sessionId> -e "<code>"
 The `-s` flag specifies a session ID (required). Get one with `playwriter session new`. Use the same session to persist state across commands.
-Default timeout is 10 seconds. you can increase the timeout with `--timeout <ms>`
 **Examples:**
 ```bash
 # Navigate to a page
-playwriter -s 1 -e "state.page = await context.newPage(); await state.page.goto('https://example.com')"
+playwriter -s 1 -e 'state.page = await context.newPage(); await state.page.goto("https://example.com")'
 # Click a button
-playwriter -s 1 -e "await page.click('button')"
+playwriter -s 1 -e 'await state.page.click("button")'
 # Get page title
-playwriter -s 1 -e "await page.title()"
+playwriter -s 1 -e 'await state.page.title()'
 # Take a screenshot
-playwriter -s 1 -e "await page.screenshot({ path: 'screenshot.png', scale: 'css' })"
+playwriter -s 1 -e 'await state.page.screenshot({ path: "screenshot.png", scale: "css" })'
 # Get accessibility snapshot
-playwriter -s 1 -e "await accessibilitySnapshot({ page })"
+playwriter -s 1 -e 'await snapshot({ page: state.page })'
 # Get accessibility snapshot for a specific iframe
-const frame = await page.locator('iframe').contentFrame()
-await accessibilitySnapshot({ frame })
+playwriter -s 1 -e 'const frame = await state.page.locator("iframe").contentFrame(); await snapshot({ frame })'
 ```
+**Why single quotes?** Always wrap `-e` code in single quotes (`'...'`) to prevent bash from interpreting `$`, backticks, and other special characters inside your JS code. Use double quotes or backtick template literals for strings inside the JS code.
 **Multiline code:**
 ```bash
-# Using $'...' syntax for multiline code
-playwriter -s 1 -e $'
-const title = await page.title();
-const url = page.url();
-console.log({ title, url });
-'
-# Or use heredoc
+# Preferred: use heredoc with quoted delimiter (disables all bash expansion)
 playwriter -s 1 -e "$(cat <<'EOF'
-const links = await page.$$eval('a', els => els.map(e => e.href));
+const links = await state.page.$$eval('a', els => els.map(e => e.href));
 console.log('Found', links.length, 'links');
+const price = text.match(/\$[\d.]+/);
 EOF
 )"
+# Alternative: $'...' syntax (but beware: \n and \t become special, and
+# single quotes inside must be escaped as \')
+playwriter -s 1 -e $'
+const title = await state.page.title();
+const url = state.page.url();
+console.log({ title, url });
+'
 ```
+**Quoting rules summary:**
+- **Single quotes** (`'...'`): best for one-liners. No bash expansion at all. But you cannot include a literal single quote inside — use double quotes for JS strings instead.
+- **Heredoc** (`<<'EOF'`): best for multiline code. The quoted `'EOF'` delimiter disables all bash expansion. Any character works inside, including `$`, backticks, and single quotes.
+- **`$'...'`**: allows `\'` escaping but `\n`, `\t`, `\\` become special — conflicts with JS regex patterns.
 ### Debugging playwriter issues
 If some internal critical error happens you can read the relay server logs to understand the issue. The log file is located in the user home directory:
@@ -119,350 +126,323 @@ If you find a bug, you can create a gh issue using `gh issue create -R remorses/
 Control user's Chrome browser via playwright code snippets. Prefer single-line code with semicolons between statements. Use playwriter immediately without waiting for user actions; only if you get "extension is not connected" or "no browser tabs have Playwriter enabled" should you ask the user to click the playwriter extension icon on the target tab.
+**When to use playwriter instead of webfetch/curl:** If a website is JS-heavy (SPAs like Instagram, Twitter, Facebook, etc.), has cookie consent modals, login walls, lazy-loaded content, carousels, or infinite scroll — **always use playwriter**. Simple fetch/webfetch will return an empty HTML shell with no content. Do NOT waste time trying curl, webfetch, or parsing raw HTML from JS-rendered sites. Go straight to playwriter: navigate with a real browser, dismiss modals, then extract what you need via `page.evaluate()` or network interception.
 **If Chrome is not running**, the extension can't connect. Start Chrome from the command line before retrying:
 ```bash
 # macOS
-open -a "Google Chrome"
+open -a "Google Chrome" --args --profile-directory=Default
 # Linux
-google-chrome &
+google-chrome --profile-directory=Default &
 # Windows (cmd)
-start chrome.exe
+start chrome.exe --profile-directory=Default
 # Windows (PowerShell)
-Start-Process chrome.exe
+Start-Process chrome.exe -ArgumentList '--profile-directory=Default'
 ```
 To also enable automatic tab capture for screen recording (no manual extension click needed), add the `--allowlisted-extension-id` and `--auto-accept-this-tab-capture` flags:
 ```bash
 # macOS
-open -a "Google Chrome" --args --allowlisted-extension-id=jfeammnjpkecdekppnclgkkffahnhfhe --auto-accept-this-tab-capture
+open -a "Google Chrome" --args --profile-directory=Default --allowlisted-extension-id=jfeammnjpkecdekppnclgkkffahnhfhe --auto-accept-this-tab-capture
 # Linux
-google-chrome --allowlisted-extension-id=jfeammnjpkecdekppnclgkkffahnhfhe --auto-accept-this-tab-capture &
+google-chrome --profile-directory=Default --allowlisted-extension-id=jfeammnjpkecdekppnclgkkffahnhfhe --auto-accept-this-tab-capture &
 # Windows
-start chrome.exe --allowlisted-extension-id=jfeammnjpkecdekppnclgkkffahnhfhe --auto-accept-this-tab-capture
+start chrome.exe --profile-directory=Default --allowlisted-extension-id=jfeammnjpkecdekppnclgkkffahnhfhe --auto-accept-this-tab-capture
 ```
 You can collaborate with the user - they can help with captchas, difficult elements, or reproducing bugs.
 ## context variables
-- `state` - object persisted between calls **within your session**. Each session has its own isolated state. Use to store pages, data, listeners (e.g., `state.myPage = await context.newPage()`)
+- `state` - object persisted between calls **within your session**. Each session has its own isolated state. Use to store pages, data, listeners (e.g., `state.page = await context.newPage()`)
 - `page` - a default page (may be shared with other agents). Prefer creating your own page and storing it in `state` (see "working with pages")
 - `context` - browser context, access all pages via `context.pages()`
-- `require` - load Node.js modules like fs
+- `require` - load Node.js modules (e.g., `const fs = require('node:fs')`). ESM `import` is not available in the sandbox
 - Node.js globals: `setTimeout`, `setInterval`, `fetch`, `URL`, `Buffer`, `crypto`, etc.
 **Important:** `state` is **session-isolated** but pages are **shared** across all sessions. See "working with pages" for how to avoid interference.
 ## rules
-- **Create your own page**: see "working with pages" — always create and store your own page in `state`, never use the default `page` for automation
+- **Initialize state.page first**: see "working with pages" — at the start of a task, assign `state.page` (reuse `about:blank` or create one) and use `state.page` for all automation steps.
 - **Multiple calls**: use multiple execute calls for complex logic - helps understand intermediate state and isolate which action failed
 - **Never close**: never call `browser.close()` or `context.close()`. Only close pages you created or if user asks
 - **No bringToFront**: never call unless user asks - it's disruptive and unnecessary, you can interact with background pages
 - **Check state after actions**: always verify page state after clicking/submitting (see next section)
-- **Clean up listeners**: call `page.removeAllListeners()` at end of message to prevent leaks
-- **CDP sessions**: use `getCDPSession({ page })` not `page.context().newCDPSession()` - NEVER use `newCDPSession()` method, it doesn't work through playwriter relay
-- **Wait for load**: use `page.waitForLoadState('domcontentloaded')` not `page.waitForEvent('load')` - waitForEvent times out if already loaded
-- **Avoid timeouts**: prefer proper waits over `page.waitForTimeout()` - there are better ways to wait for elements
+- **Clean up listeners**: call `state.page.removeAllListeners()` at end of message to prevent leaks
+- **CDP sessions**: use `getCDPSession({ page: state.page })` not `state.page.context().newCDPSession()` - NEVER use `newCDPSession()` method, it doesn't work through playwriter relay
+- **Wait for load**: use `state.page.waitForLoadState('domcontentloaded')` not `state.page.waitForEvent('load')` - waitForEvent times out if already loaded
+- **Minimize timeouts**: prefer proper waits (`waitForSelector`, `waitForPageLoad`) over `state.page.waitForTimeout()`. Short timeouts (1-2s) are acceptable for non-deterministic events like popups, animations, or tab opens where no specific selector is available
+- **Snapshot before screenshot**: always use `snapshot()` first to understand page state (text-based, fast, cheap). Only use `screenshot` when you specifically need visual/spatial information. Never take a screenshot just to check if a page loaded or to read text content — snapshot gives you that instantly without burning image tokens
+- **Snapshot replaces page.evaluate() for inspection**: do NOT write `page.evaluate()` calls to manually query class names, bounding boxes, child counts, or visibility flags. `snapshot()` already shows every interactive element with its text, role, and a ready-to-use locator. If you catch yourself writing `document.querySelector` or `getBoundingClientRect` inside evaluate — stop and use `snapshot()` instead. Reserve `page.evaluate()` for actions that modify page state (e.g., `localStorage.clear()`, scroll manipulation) or extract non-DOM data (e.g., `window.__CONFIG__`)
 ## interaction feedback loop
-Every browser interaction should follow a **observe → act → observe** loop. After every action, you must check its result before proceeding. Never chain multiple actions blindly — the page may not have responded as expected.
-**Core loop:**
+Every browser interaction must follow **observe → act → observe**. Never chain multiple actions blindly.
-1. **Open page** — get or create your page and navigate to the target URL
-2. **Observe** — take an accessibility snapshot to understand the current state
-3. **Update priors** — read the snapshot, identify the element to interact with
+1. **Open page** — get or create your page, navigate to URL
+2. **Observe** — print `state.page.url()` + `snapshot()`. Always print URL — pages can redirect unexpectedly.
+3. **Check** — if page isn't ready (loading, wrong URL, content missing), wait and observe again
 4. **Act** — perform one action (click, type, submit)
-5. **Observe again** — take another snapshot to verify the action's effect
-6. **Repeat** — continue from step 3 until the task is complete
-```
-┌─────────────────────────────────────────────┐
-│            open page + goto URL             │
-└──────────────────┬──────────────────────────┘
-                   ▼
-          ┌────────────────┐
-          │    observe      │◄─────────────────┐
-          │  (snapshot)     │                   │
-          └───────┬────────┘                   │
-                  ▼                            │
-          ┌────────────────┐                   │
-          │  update priors  │                   │
-          │  (read result)  │                   │
-          └───────┬────────┘                   │
-                  ▼                            │
-          ┌────────────────┐                   │
-          │      act        │                   │
-          │  (click/type)   │──────────────────┘
-          └────────────────┘
-```
-**Example: opening a Framer plugin via the command palette**
-Each step is a separate execute call. Notice how every action is followed by a snapshot to verify what happened:
+5. **Observe again** — print URL + snapshot to verify the action's effect
+6. **Repeat** from step 3 until task is complete
 ```js
-// 1. Open page and observe
-state.myPage = context.pages().find(p => p.url() === 'about:blank') ?? await context.newPage();
-await state.myPage.goto('https://framer.com/projects/my-project', { waitUntil: 'domcontentloaded' });
-await accessibilitySnapshot({ page: state.myPage }).then(console.log)
+// Each step should be a separate execute call:
+// Step 1: navigate + observe
+state.page = context.pages().find((p) => p.url() === 'about:blank') ?? (await context.newPage())
+await state.page.goto('https://example.com', { waitUntil: 'domcontentloaded' })
+console.log('URL:', state.page.url())
+await snapshot({ page: state.page }).then(console.log)
 ```
 ```js
-// 2. Act: open command palette → observe result
-await state.myPage.keyboard.press('Meta+k');
-await accessibilitySnapshot({ page: state.myPage, search: /dialog|Search/ }).then(console.log)
+// Step 2: act + observe
+await state.page.locator('button:has-text("Submit")').click()
+console.log('URL:', state.page.url())
+await snapshot({ page: state.page }).then(console.log)
 ```
-```js
-// 3. Act: type search query → observe result
-await state.myPage.keyboard.type('MCP');
-await accessibilitySnapshot({ page: state.myPage, search: /MCP/ }).then(console.log)
-```
+If nothing changed after an action, try `waitForPageLoad({ page: state.page, timeout: 3000 })` or you may have clicked the wrong element.
-```js
-// 4. Act: press Enter → observe plugin loaded
-await state.myPage.keyboard.press('Enter');
-await state.myPage.waitForTimeout(1000);
-const frame = state.myPage.frames().find(f => f.url().includes('plugins.framercdn.com'));
-await accessibilitySnapshot({ page: state.myPage, frame: frame || undefined }).then(console.log)
-```
+**Deeper observation** — when snapshots aren't enough to understand what happened, combine multiple channels:
-**Other ways to observe action results:**
+```js
+// Check console for errors after an action
+const errors = await getLatestLogs({ page: state.page, search: /error|fail/i, count: 20 })
-Snapshots are the primary feedback mechanism, but some actions have side effects that are better observed through other channels:
+// Combine snapshot + logs for full picture
+const snap = await snapshot({ page: state.page, search: /dialog|error|message/ })
+const logs = await getLatestLogs({ page: state.page, search: /error/i, count: 10 })
+console.log('UI:', snap)
+console.log('Logs:', logs)
+```
-- **Console logs** — check for errors or app state after an action:
-  ```js
-  await getLatestLogs({ page, search: /error|fail/i, count: 20 })
-  ```
-- **Network requests** — verify API calls were made after a form submit or button click:
-  ```js
-  page.on('response', async res => { if (res.url().includes('/api/')) { console.log(res.status(), res.url()); } });
-  ```
-- **URL changes** — confirm navigation happened:
-  ```js
-  console.log(page.url())
-  ```
-- **Screenshots** — only when you need to verify visual layout (CSS, spatial positioning, colors). Snapshots are always preferred for content verification.
+Use `getLatestLogs()` for console errors, `state.page.url()` for navigation, screenshots only for visual layout issues.
 ## common mistakes to avoid
 **1. Not verifying actions succeeded**
 Always check page state after important actions (form submissions, uploads, typing). Your mental model can diverge from actual browser state:
 ```js
-await page.keyboard.type('my text');
-await accessibilitySnapshot({ page, search: /my text/ })
+await state.page.keyboard.type('my text')
+await snapshot({ page: state.page, search: /my text/ })
 // If verifying visual layout specifically, use screenshotWithAccessibilityLabels instead
 ```
 **2. Assuming paste/upload worked**
 Clipboard paste (`Meta+v`) can silently fail. For file uploads, prefer file input:
 ```js
 // Reliable: use file input
-const fileInput = page.locator('input[type="file"]').first();
-await fileInput.setInputFiles('/path/to/image.png');
+const fileInput = state.page.locator('input[type="file"]').first()
+await fileInput.setInputFiles('/path/to/image.png')
 // Unreliable: clipboard paste may silently fail, need to focus textarea first for example
-await page.keyboard.press('Meta+v');  // always verify with screenshot!
+await state.page.keyboard.press('Meta+v') // always verify with screenshot!
 ```
 **3. Using stale locators from old snapshots**
-Locators (especially ones with `>> nth=`) can change when the page updates. Always get a fresh snapshot before clicking:
-```js
-// BAD: using ref from minutes ago
-await page.locator('[id="old-id"]').click();  // element may have changed
+Locators (especially ones with `>> nth=`) can change when the page updates. Always get a fresh snapshot before clicking, then immediately use locators from that output:
-// GOOD: get fresh snapshot, then immediately use locators from it
-await accessibilitySnapshot({ page, showDiffSinceLastCall: true })
+```js
+await snapshot({ page: state.page, showDiffSinceLastCall: true })
 // Now use the NEW locators from this output
 ```
 **4. Wrong assumptions about current page/element**
 Before destructive actions (delete, submit), verify you're targeting the right thing:
 ```js
 // Before deleting, verify it's the right item
-await page.screenshotWithAccessibilityLabels({ page });
+await screenshotWithAccessibilityLabels({ page: state.page })
 // READ the screenshot to confirm, THEN proceed with delete
 ```
 **5. Text concatenation without line breaks**
-`keyboard.type()` doesn't insert newlines from `\n` in strings. Use `keyboard.press('Enter')`:
-```js
-// BAD: newlines in string don't create line breaks
-await page.keyboard.type('Line 1\nLine 2');  // becomes "Line 1Line 2"
+`keyboard.type()` doesn't insert newlines from `\n` in strings. Use `keyboard.press('Enter')` between lines:
-// GOOD: use Enter key for line breaks
-await page.keyboard.type('Line 1');
-await page.keyboard.press('Enter');
-await page.keyboard.type('Line 2');
+```js
+await state.page.keyboard.type('Line 1')
+await state.page.keyboard.press('Enter')
+await state.page.keyboard.type('Line 2')
 ```
-**6. Quote escaping in $'...' syntax**
-When using `$'...'` for multiline code, nested quotes break parsing. Use different quote styles or escape them:
-```bash
-# BAD: nested double quotes break $'...'
-playwriter -s 1 -e $'await page.locator("[id=\"_r_a_\"]").click()'
+**6. Quote escaping in bash**
+Bash parses `$`, backticks, and `\` inside double-quoted strings. This silently corrupts JS code. Always use single quotes or heredoc:
-# GOOD: use single quotes inside, or template strings
-playwriter -s 1 -e $'await page.locator(\'[id="_r_a_"]\').click()'
+```bash
+# single quotes — bash passes everything through literally
+playwriter -s 1 -e 'await state.page.locator(`[id="_r_a_"]`).click()'
-# GOOD: use heredoc for complex quoting
+# heredoc for complex code with mixed quotes
 playwriter -s 1 -e "$(cat <<'EOF'
-await page.locator('[id="_r_a_"]').click()
+await state.page.locator('[id="_r_a_"]').click()
+const match = html.match(/\$[\d.]+/g)
 EOF
 )"
 ```
 **7. Using screenshots when snapshots suffice**
-Screenshots + image analysis is expensive and slow. Only use screenshots for visual/CSS issues:
-```js
-// BAD: screenshot to check if text appeared (wastes tokens on image analysis)
-await page.screenshot({ path: 'check.png', scale: 'css' });
-// GOOD: snapshot is text — fast, cheap, searchable
-await accessibilitySnapshot({ page, search: /expected text/i })
+Screenshots + image analysis is expensive and slow. Only use screenshots for visual/CSS issues. Use snapshot for text checks:
-// GOOD: evaluate DOM directly for content checks
-const text = await page.evaluate(() => document.querySelector('.message')?.textContent);
+```js
+await snapshot({ page: state.page, search: /expected text/i })
 ```
 **8. Assuming page content loaded**
 Even after `goto()`, dynamic content may not be ready:
 ```js
-await page.goto('https://example.com');
+await state.page.goto('https://example.com')
 // Content may still be loading via JavaScript!
-await page.waitForSelector('article', { timeout: 10000 });
+await state.page.waitForSelector('article', { timeout: 10000 })
 // Or use waitForPageLoad utility
-await waitForPageLoad({ page, timeout: 5000 });
+await waitForPageLoad({ page: state.page, timeout: 5000 })
 ```
-**9. Login buttons that open popups**
-Playwriter extension cannot control popup windows. If a login button opens a popup (common with OAuth/SSO), use cmd+click to open in a new tab instead:
+**9. Not using playwriter for JS-rendered sites**
+Do NOT waste context trying webfetch, curl, or Playwright CLI screenshots on SPAs (Instagram, Twitter, etc.). These return empty HTML shells. Use playwriter directly:
 ```js
-// BAD: popup window is not controllable by playwriter
-await page.click('button:has-text("Login with Google")');
+state.page = context.pages().find((p) => p.url() === 'about:blank') ?? (await context.newPage())
+await state.page.goto('https://www.instagram.com/p/ABC123/', { waitUntil: 'domcontentloaded' })
+await waitForPageLoad({ page: state.page, timeout: 8000 })
+await snapshot({ page: state.page, search: /cookie|consent|accept/i }).then(console.log)
+```
-// GOOD: cmd+click opens in new tab that playwriter can control
-await page.locator('button:has-text("Login with Google")').click({ modifiers: ['Meta'] });
-await page.waitForTimeout(2000);
+**10. Login buttons that open popups**
+Playwriter cannot control popup windows. Use cmd+click to open in a new tab instead:
+```js
+await state.page.locator('button:has-text("Login with Google")').click({ modifiers: ['Meta'] })
+await state.page.waitForTimeout(2000)
 // Verify new tab opened - last page should be the login page
-const pages = context.pages();
-const loginPage = pages[pages.length - 1];
-if (loginPage.url() === page.url()) {
-  throw new Error('Cmd+click did not open new tab - login may have opened as popup');
+const pages = context.pages()
+const loginPage = pages[pages.length - 1]
+if (loginPage.url() === state.page.url()) {
+  throw new Error('Cmd+click did not open new tab - login may have opened as popup')
 }
 // Complete login flow in loginPage, cookies are shared with original page
-await loginPage.locator('[data-email]').first().click();
-await loginPage.waitForURL('**/callback**');
+await loginPage.locator('[data-email]').first().click()
+await loginPage.waitForURL('**/callback**')
 // Original page should now be authenticated
 ```
-## checking page state
-After any action (click, submit, navigate), verify what happened. **Always prefer accessibility snapshots over screenshots** — snapshots are text (cheap, fast, searchable), screenshots require image analysis (expensive, slow).
+**11. Click times out or does nothing — snapshot to find the blocker**
+When a click times out, a **modal or overlay** is likely intercepting pointer events. Do not retry with different selectors or `{ force: true }` — snapshot to find the blocker:
 ```js
-// Default: use snapshot with optional filtering
-page.url() + '\n' + await accessibilitySnapshot({ page })
+// click timed out → don't retry blindly, find what's blocking
+await snapshot({ page: state.page, search: /dialog|modal/i })
+// Found modal → interact with it properly (don't just close via X, it may reappear)
+await state.page.getByRole('radio', { name: 'Nope, Vanilla' }).click()
+```
+**12. Never use `dispatchEvent` or `{ force: true }` to bypass blockers**
+`dispatchEvent(new MouseEvent(...))`, `{ force: true }`, and `element.click()` inside `page.evaluate()` bypass Playwright checks but **do not trigger React/Vue/Svelte handlers** — state won't update. Use snapshot to find the real interactive element:
-// Filter for specific content when snapshot is large
-await accessibilitySnapshot({ page, search: /dialog|button|error/i })
+```js
+await state.page.getByRole('radio', { name: 'Node.js' }).click()
 ```
-Only use `screenshotWithAccessibilityLabels({ page })` for **visual layout issues** (CSS bugs, spatial positioning, colors). For verifying text content, button states, or form values, snapshots are always sufficient.
+**13. Over-investigating instead of just interacting**
+When something doesn't respond to a click, do NOT start inspecting CDP event listeners, React fibers, canvas pixel data, or writing `page.evaluate()` to read class names and bounding boxes. This wastes massive context. Instead:
-If nothing changed, try `await waitForPageLoad({ page, timeout: 3000 })` or you may have clicked the wrong element.
+1. Take a `snapshot()` — it shows every interactive element and what to click
+2. Try a different interaction pattern if `click()` didn't work:
+   - **Drawing/annotation tools, canvas paint** → `mouse.down`, move with steps, `mouse.up` (see drag section)
+   - **Keyboard-activated modes** → press the shortcut key (snapshot shows tooltip text like "Draw mode D")
+   - **Sliders, timeline scrubbers** → drag pattern
+   - **Collapsed/toggled toolbars** → click the toggle first, wait, then interact
+3. Take another `snapshot()` to see what changed
+4. Only investigate DOM internals if correct interaction patterns produce zero response after 2–3 attempts
 ## accessibility snapshots
 ```js
-await accessibilitySnapshot({ page, search?, showDiffSinceLastCall? })
+await snapshot({ page: state.page, search?, showDiffSinceLastCall? })
 ```
 - `search` - string/regex to filter results (returns first 10 matching lines)
-- `showDiffSinceLastCall` - returns diff since last snapshot (default: `true`). Pass `false` to get full snapshot.
+- `showDiffSinceLastCall` - returns diff since last snapshot (default: `true`, but `false` when `search` is provided). Pass `false` to get full snapshot.
-Snapshots return full content on first call, then diffs on subsequent calls. If nothing changed, returns "No changes since last snapshot" message. Use `showDiffSinceLastCall: false` to always get full content.
+Snapshots return full content on first call, then diffs on subsequent calls. Diff is only returned when shorter than full content. If nothing changed, returns "No changes since last snapshot" message. Use `showDiffSinceLastCall: false` to always get full content. When `search` is provided, diffing is disabled by default so the search filters the full content — pass `showDiffSinceLastCall: true` explicitly to combine both. This diffing behavior also applies to `getCleanHTML` and `getPageMarkdown`.
 Example output:
 ```md
 - banner:
-    - link "Home" [id="nav-home"]
-    - navigation:
-        - link "Docs" [data-testid="docs-link"]
-        - link "Blog" role=link[name="Blog"]
+  - link "Home" [id="nav-home"]
+  - navigation:
+    - link "Docs" [data-testid="docs-link"]
+    - link "Blog" role=link[name="Blog"]
 ```
-Each interactive line ends with a Playwright locator you can pass to `page.locator()`.
+Each interactive line ends with a Playwright locator you can pass to `state.page.locator()`.
 If multiple elements share the same locator, a `>> nth=N` suffix is added (0-based)
 to make it unique.
-If a screenshot shows ref labels like `e3`, resolve them using the last snapshot:
+**Use snapshot locators directly — never invent selectors.** The snapshot output IS the selector. Do not guess CSS selectors or `getByText` when the snapshot already gives you the exact match:
 ```js
-const snapshot = await accessibilitySnapshot({ page })
-const locator = refToLocator({ ref: 'e3' })
-await page.locator(locator!).click()
+// Snapshot shows: role=radio[name="Nope, Vanilla"]  →  use it directly
+await state.page.getByRole('radio', { name: 'Nope, Vanilla' }).click()
+// Snapshot shows: role=link[name="SIGN IN"]  →  or pass raw string to locator()
+await state.page.locator('role=link[name="SIGN IN"]').click()
 ```
+**Beware CSS text-transform**: snapshots show visual text (`heading "NODE.JS"`) but DOM may be `"Node.js"`. Use case-insensitive regex: `getByRole('heading', { name: /node\.js/i })`.
+If a screenshot shows ref labels like `e3`, resolve them using the last snapshot:
 ```js
-await page.locator('[id="nav-home"]').click()
-await page.locator('[data-testid="docs-link"]').click()
-await page.locator('role=link[name="Blog"]').click()
+const snap = await snapshot({ page: state.page })
+const locator = refToLocator({ ref: 'e3' })
+await state.page.locator(locator!).click()
 ```
 Search for specific elements:
 ```js
-const snapshot = await accessibilitySnapshot({ page, search: /button|submit/i })
+const snap = await snapshot({ page: state.page, search: /button|submit/i })
 ```
-**Filtering large snapshots in JS** — when the built-in `search` isn't enough (e.g., you need multiple patterns or custom logic), filter the snapshot string directly:
+**Scoping snapshots to a specific element** — pass a `locator` instead of `page` to snapshot only a subtree. This dramatically reduces output size when you only care about one section of the page (e.g., the main content area, ignoring the sidebar/header/footer):
 ```js
-const snap = await accessibilitySnapshot({ page, showDiffSinceLastCall: false });
-const relevant = snap.split('\n').filter(l =>
-  l.includes('dialog') || l.includes('error') || l.includes('button')
-).join('\n');
-console.log(relevant);
-```
+// Full page snapshot: ~150 lines (sidebar, nav, header, footer, everything)
+await snapshot({ page: state.page })
-This is much cheaper than taking a screenshot — use it as your primary debugging tool for verifying text content, checking if elements exist, or confirming state changes.
+// Scoped to main: ~20 lines (just the content you care about)
+await snapshot({ locator: state.page.locator('main') })
-## choosing between snapshot methods
+// Scope to a specific form, dialog, or section
+await snapshot({ locator: state.page.locator('[role="dialog"]') })
+await snapshot({ locator: state.page.locator('form#checkout') })
+```
-Both `accessibilitySnapshot` and `screenshotWithAccessibilityLabels` use the same ref system, so you can combine them effectively.
+Use this whenever the full page snapshot is dominated by navigation or layout elements you don't need. It saves significant tokens and makes the output much easier to parse.
-**Use `accessibilitySnapshot` when:**
-- Page has simple, semantic structure (articles, forms, lists)
-- You need to search for specific text or patterns
-- Token usage matters (text is smaller than images)
-- You need to process the output programmatically
+**Filtering large snapshots in JS** — when `search` isn't enough, filter the string directly: `snap.split('\n').filter(l => l.includes('dialog') || l.includes('error')).join('\n')`
-**Use `screenshotWithAccessibilityLabels` when:**
-- Page has complex visual layout (grids, galleries, dashboards, maps)
-- Spatial position matters (e.g., "first image", "top-left button")
-- DOM order doesn't match visual order
-- You need to understand the visual hierarchy
+## choosing between snapshot methods
-**Combining both:** Use screenshot first to understand layout and identify target elements visually, then use `accessibilitySnapshot({ search: /pattern/ })` for efficient searching in subsequent calls.
+Use `snapshot` for text-heavy pages (forms, articles) — fast, cheap, searchable. Use `screenshotWithAccessibilityLabels` for complex visual layouts (grids, galleries, dashboards) where spatial position matters. Both share the same ref system and can be combined.
 ## selector best practices
-**For unknown websites**: use `accessibilitySnapshot()` - it shows what's actually interactive with stable locators.
+**For unknown websites**: use `snapshot()` - it shows what's actually interactive with stable locators.
 **For development** (when you have source code access), prefer stable selectors in this order:
@@ -476,16 +456,16 @@ Both `accessibilitySnapshot` and `screenshotWithAccessibilityLabels` use the sam
 Combine locators for precision:
 ```js
-page.locator('tr').filter({ hasText: 'John' }).locator('button').click()
-page.locator('button').nth(2).click()
+state.page.locator('tr').filter({ hasText: 'John' }).locator('button').click()
+state.page.locator('button').nth(2).click()
 ```
 If a locator matches multiple elements, Playwright throws "strict mode violation". Use `.first()`, `.last()`, or `.nth(n)`:
 ```js
-await page.locator('button').first().click()  // first match
-await page.locator('.item').last().click()    // last match
-await page.locator('li').nth(3).click()       // 4th item (0-indexed)
+await state.page.locator('button').first().click() // first match
+await state.page.locator('.item').last().click() // last match
+await state.page.locator('li').nth(3).click() // 4th item (0-indexed)
 ```
 ## working with pages
@@ -494,15 +474,15 @@ await page.locator('li').nth(3).click()       // 4th item (0-indexed)
 **Get or create your page (first call):**
-On your very first execute call, reuse an existing empty tab or create a new one, and navigate it **in the same execute call**. Store it in `state` and use `state.myPage` for all subsequent operations instead of the default `page` variable:
+On your very first execute call, reuse an existing empty tab or create a new one, and navigate it **in the same execute call**. Store it in `state` and use `state.page` for all subsequent operations instead of the default `page` variable:
 ```js
 // Reuse an empty about:blank tab if available, otherwise create a new one.
 // IMPORTANT: always navigate immediately in the same call to avoid another
 // agent grabbing the same about:blank tab between execute calls.
-state.myPage = context.pages().find(p => p.url() === 'about:blank') ?? await context.newPage();
-await state.myPage.goto('https://example.com');
-// Use state.myPage for ALL subsequent operations
+state.page = context.pages().find((p) => p.url() === 'about:blank') ?? (await context.newPage())
+await state.page.goto('https://example.com')
+// Use state.page for ALL subsequent operations
 ```
 **Handle page closures gracefully:**
@@ -510,10 +490,10 @@ await state.myPage.goto('https://example.com');
 The user may close your page by accident (e.g., closing a tab in Chrome). Always check before using it and recreate if needed:
 ```js
-if (!state.myPage || state.myPage.isClosed()) {
-  state.myPage = context.pages().find(p => p.url() === 'about:blank') ?? await context.newPage();
+if (!state.page || state.page.isClosed()) {
+  state.page = context.pages().find((p) => p.url() === 'about:blank') ?? (await context.newPage())
 }
-await state.myPage.goto('https://example.com');
+await state.page.goto('https://example.com')
 ```
 **Use an existing page only when the user asks:**
@@ -521,16 +501,16 @@ await state.myPage.goto('https://example.com');
 Only use a page from `context.pages()` if the user explicitly asks you to control a specific tab they already opened (e.g., they're logged into an app). Find it by URL pattern and store it in state:
 ```js
-const pages = context.pages().filter(x => x.url().includes('myapp.com'));
-if (pages.length === 0) throw new Error('No myapp.com page found. Ask user to enable playwriter on it.');
-if (pages.length > 1) throw new Error(`Found ${pages.length} matching pages, expected 1`);
-state.targetPage = pages[0];
+const pages = context.pages().filter((x) => x.url().includes('myapp.com'))
+if (pages.length === 0) throw new Error('No myapp.com page found. Ask user to enable playwriter on it.')
+if (pages.length > 1) throw new Error(`Found ${pages.length} matching pages, expected 1`)
+state.targetPage = pages[0]
 ```
 **List all available pages:**
 ```js
-context.pages().map(p => p.url())
+context.pages().map((p) => p.url())
 ```
 ## navigation
@@ -538,42 +518,49 @@ context.pages().map(p => p.url())
 **Use `domcontentloaded`** for `page.goto()`:
 ```js
-await page.goto('https://example.com', { waitUntil: 'domcontentloaded' });
-await waitForPageLoad({ page, timeout: 5000 });
+await state.page.goto('https://example.com', { waitUntil: 'domcontentloaded' })
+await waitForPageLoad({ page: state.page, timeout: 5000 })
 ```
 ## common patterns
-**Authenticated fetches** - to access protected resources, fetch from within page context (includes session cookies automatically):
+**Authenticated fetches** - fetch from within page context to include session cookies automatically:
 ```js
-// BAD: curl/external requests don't have session cookies
-// curl -H "Cookie: ..." often fails due to missing cookies or CSRF
+const data = await state.page.evaluate(async (url) => {
+  const resp = await fetch(url)
+  return await resp.text()
+}, 'https://example.com/protected/resource')
+```
+**Read page cookies via CDP** - use `Network.getCookies` on the page CDP session:
-// GOOD: fetch inside page.evaluate uses browser's full session
-const data = await page.evaluate(async (url) => {
-  const resp = await fetch(url);
-  return await resp.text();
-}, 'https://example.com/protected/resource');
+```js
+const cdp = await getCDPSession({ page: state.page })
+const { cookies } = await cdp.send('Network.getCookies', { urls: [state.page.url()] })
+console.log(cookies)
 ```
+MUST use this for page-scoped cookies in extension mode. `Storage.getCookies` is a root-session command and will fail in playwriter.
 **Downloading large data** - console output truncates large strings. Trigger a browser download instead:
 ```js
 // Fetch protected data and trigger download to user's Downloads folder
-await page.evaluate(async (url) => {
-  const resp = await fetch(url);
-  const data = await resp.text();
-  const blob = new Blob([data], { type: 'application/octet-stream' });
-  const a = document.createElement('a');
-  a.href = URL.createObjectURL(blob);
-  a.download = 'data.json';
-  a.click();
-}, 'https://example.com/protected/large-file');
+await state.page.evaluate(async (url) => {
+  const resp = await fetch(url)
+  const data = await resp.text()
+  const blob = new Blob([data], { type: 'application/octet-stream' })
+  const a = document.createElement('a')
+  a.href = URL.createObjectURL(blob)
+  a.download = 'data.json'
+  a.click()
+}, 'https://example.com/protected/large-file')
 // File saves to ~/Downloads - read it from there
 ```
 **Avoid permission-gated browser APIs** - some APIs require user permission prompts or special browser flags. These often fail silently or hang. Examples to avoid:
 - `navigator.clipboard.writeText()` - requires permission
 - Multiple concurrent downloads - browser may block
 - `window.showSaveFilePicker()` - requires user gesture
@@ -581,42 +568,76 @@ await page.evaluate(async (url) => {
 Instead, use simpler alternatives (single download via `a.click()`, store data in `state`, etc).
-**Links that open new tabs** - use cmd+click to open in a controllable new tab:
+**Downloads** - capture and save:
 ```js
-// For links with target=_blank or buttons that open popups
-await page.locator('a[target=_blank]').click({ modifiers: ['Meta'] });
-await page.waitForTimeout(1000);
-// New tab is last in context.pages()
-const pages = context.pages();
-const newTab = pages[pages.length - 1];
-console.log('New tab URL:', newTab.url());
+const [download] = await Promise.all([state.page.waitForEvent('download'), state.page.click('button.download')])
+await download.saveAs(`/tmp/${download.suggestedFilename()}`)
 ```
-Note: `page.waitForEvent('popup')` is unreliable - playwriter cannot control popup windows opened via `window.open`. Use cmd+click instead.
-**Downloads** - capture and save:
+**iFrames** - two approaches depending on what you need:
 ```js
-const [download] = await Promise.all([page.waitForEvent('download'), page.click('button.download')]);
-await download.saveAs(`/tmp/${download.suggestedFilename()}`);
+// frameLocator: for chaining locator operations (click, fill, etc.)
+const frame = state.page.frameLocator('#my-iframe')
+await frame.locator('button').click()
+// contentFrame: returns a Frame object, needed for snapshot({ frame })
+const frame2 = await state.page.locator('iframe').contentFrame()
+await snapshot({ frame: frame2 })
 ```
-**iFrames** - use frameLocator:
+**Dialogs** - handle alerts/confirms/prompts:
 ```js
-const frame = page.frameLocator('#my-iframe');
-await frame.locator('button').click();
+state.page.on('dialog', async (dialog) => {
+  console.log(dialog.message())
+  await dialog.accept()
+})
+await state.page.click('button.trigger-alert')
 ```
-**Dialogs** - handle alerts/confirms/prompts:
+**Handling page obstacles (cookie modals, login walls, age gates)** - most major websites show blocking overlays. Always check for these with `snapshot()` right after navigation and dismiss them before doing anything else:
 ```js
-page.on('dialog', async dialog => { console.log(dialog.message()); await dialog.accept(); });
-await page.click('button.trigger-alert');
+// After navigating, check for common obstacles
+await waitForPageLoad({ page: state.page, timeout: 5000 })
+const snap = await snapshot({
+  page: state.page,
+  search: /cookie|consent|accept|reject|decline|allow|age|verify|login|sign.in/i,
+})
+console.log(snap)
+// Look for dismiss/accept/decline buttons in the snapshot, then click them:
+// await state.page.locator('button:has-text("Accept")').click();
+// await state.page.locator('button:has-text("Decline optional")').click();
+// Then re-snapshot to confirm the modal is gone before proceeding
 ```
+If the page requires login and the user is already logged into Chrome, their session cookies are available — just navigate and the page should load authenticated. If not, ask the user for help or use their existing logged-in tab via `context.pages()`.
+**Extracting and downloading media (images, videos)** - use `page.evaluate()` to extract URLs from the rendered DOM, then download via Node.js in the sandbox. This is far more reliable than parsing raw HTML:
+```js
+// Extract all image URLs from rendered DOM
+const images = await state.page.evaluate(() =>
+  Array.from(document.querySelectorAll('img[src]')).map((img) => ({
+    src: img.src,
+    alt: img.alt,
+    width: img.naturalWidth,
+  })),
+)
+console.log(JSON.stringify(images, null, 2))
+// Download a specific image to disk
+const fs = require('node:fs')
+const resp = await fetch(images[0].src)
+const buf = Buffer.from(await resp.arrayBuffer())
+fs.writeFileSync('./downloaded-image.jpg', buf)
+console.log('Saved', buf.length, 'bytes')
+```
+For carousels or lazy-loaded galleries, you may need to click navigation arrows or scroll first, then re-extract. Use network interception (see "network interception" section) to capture high-resolution CDN URLs that may differ from the `img.src` thumbnails.
 ## utility functions
 **getLatestLogs** - retrieve captured browser console logs (up to 5000 per page, cleared on navigation):
@@ -625,51 +646,41 @@ await page.click('button.trigger-alert');
 await getLatestLogs({ page?, count?, search? })
 // Examples:
 const errors = await getLatestLogs({ search: /error/i, count: 50 })
-const pageLogs = await getLatestLogs({ page })
+const pageLogs = await getLatestLogs({ page: state.page })
 ```
-For custom log collection across runs, store in state: `state.logs = []; page.on('console', m => state.logs.push(m.text()))`
+For custom log collection across runs, store in state: `state.logs = []; state.page.on('console', m => state.logs.push(m.text()))`
 **getCleanHTML** - get cleaned HTML from a locator or page, with search and diffing:
 ```js
 await getCleanHTML({ locator, search?, showDiffSinceLastCall?, includeStyles? })
 // Examples:
-const html = await getCleanHTML({ locator: page.locator('body') })
-const html = await getCleanHTML({ locator: page, search: /button/i })
-const fullHtml = await getCleanHTML({ locator: page, showDiffSinceLastCall: false })  // disable diff
+const html = await getCleanHTML({ locator: state.page.locator('body') })
+const html = await getCleanHTML({ locator: state.page, search: /button/i })
+const fullHtml = await getCleanHTML({ locator: state.page, showDiffSinceLastCall: false })  // disable diff
 ```
 **Parameters:**
 - `locator` - Playwright Locator or Page to get HTML from
 - `search` - string/regex to filter results (returns first 10 matching lines with 5 lines context)
-- `showDiffSinceLastCall` - returns diff since last call (default: `true`). Pass `false` to get full HTML.
+- `showDiffSinceLastCall` - returns diff since last call (default: `true`, but `false` when `search` is provided). Pass `false` to get full HTML.
 - `includeStyles` - keep style and class attributes (default: false)
-**HTML processing:**
-The function cleans HTML for compact, readable output:
-- **Removes tags**: script, style, link, meta, noscript, svg, head
-- **Unwraps nested wrappers**: Empty divs/spans with no attributes that only wrap a single child are collapsed (e.g., `<div><div><div><p>text</p></div></div></div>` → `<div><p>text</p></div>`)
-- **Removes empty elements**: Elements with no attributes and no content are removed
-- **Truncates long values**: Attribute values >200 chars and text content >500 chars are truncated
-**Attributes kept (summary):**
-- Common semantic and ARIA attributes (e.g., `href`, `name`, `type`, `aria-*`)
-- All `data-*` test attributes
-- Frequently used test IDs and special attributes (e.g., `testid`, `qa`, `e2e`, `vimium-label`)
-Snapshots return full content on first call, then diffs on subsequent calls. Diff is only returned when shorter than full content.
+Cleans HTML automatically: removes script/style/svg/head tags, unwraps empty wrappers, removes empty elements, truncates long values. Keeps semantic attributes (`href`, `name`, `type`, `aria-*`, `data-*`).
 **getPageMarkdown** - extract main page content as plain text using Mozilla Readability (same algorithm as Firefox Reader View). Strips navigation, ads, sidebars, and other clutter. Returns formatted text with title, author, and content:
 ```js
-await getPageMarkdown({ page, search?, showDiffSinceLastCall? })
+await getPageMarkdown({ page: state.page, search?, showDiffSinceLastCall? })
 // Examples:
-const content = await getPageMarkdown({ page, showDiffSinceLastCall: false })  // full article
-const matches = await getPageMarkdown({ page, search: /API/i })  // search within content
+const content = await getPageMarkdown({ page: state.page, showDiffSinceLastCall: false })  // full article
+const matches = await getPageMarkdown({ page: state.page, search: /API/i })  // search within content
 ```
 **Output format:**
 ```
 # Article Title
@@ -681,130 +692,145 @@ The main article content as plain text, with paragraphs preserved...
 ```
 **Parameters:**
 - `page` - Playwright Page to extract content from
 - `search` - string/regex to filter content (returns first 10 matching lines with 5 lines context)
-- `showDiffSinceLastCall` - returns diff since last call (default: `true`). Pass `false` to get full content.
-Snapshots return full content on first call, then diffs on subsequent calls. Diff is only returned when shorter than full content.
-**Use cases:**
-- Extract article text for LLM processing without HTML noise
-- Get readable content from news sites, blogs, documentation
-- Compare content changes after interactions
+- `showDiffSinceLastCall` - returns diff since last call (default: `true`, but `false` when `search` is provided). Pass `false` to get full content.
 **waitForPageLoad** - smart load detection that ignores analytics/ads:
 ```js
-await waitForPageLoad({ page, timeout?, pollInterval?, minWait? })
+await waitForPageLoad({ page: state.page, timeout?, pollInterval?, minWait? })
 // Returns: { success, readyState, pendingRequests, waitTimeMs, timedOut }
 ```
 **getCDPSession** - send raw CDP commands:
 ```js
-const cdp = await getCDPSession({ page });
-const metrics = await cdp.send('Page.getLayoutMetrics');
+const cdp = await getCDPSession({ page: state.page })
+const metrics = await cdp.send('Page.getLayoutMetrics')
 ```
 **getLocatorStringForElement** - get stable Playwright selector from an element:
 ```js
-const selector = await getLocatorStringForElement(page.locator('[id="submit-btn"]'));
+const selector = await getLocatorStringForElement(state.page.locator('[id="submit-btn"]'))
 // => "getByRole('button', { name: 'Save' })"
 ```
 **getReactSource** - get React component source location (dev mode only):
 ```js
-const source = await getReactSource({ locator: page.locator('[data-testid="submit-btn"]') });
+const source = await getReactSource({ locator: state.page.locator('[data-testid="submit-btn"]') })
 // => { fileName, lineNumber, columnNumber, componentName }
 ```
 **getStylesForLocator** - inspect CSS styles applied to an element, like browser DevTools "Styles" panel. Useful for debugging styling issues, finding where a CSS property is defined (file:line), and checking inherited styles. Returns selector, source location, and declarations for each matching rule. ALWAYS fetch `https://playwriter.dev/resources/styles-api.md` first with curl or webfetch tool.
 ```js
-const styles = await getStylesForLocator({ locator: page.locator('.btn'), cdp: await getCDPSession({ page }) });
-console.log(formatStylesAsText(styles));
+const styles = await getStylesForLocator({
+  locator: state.page.locator('.btn'),
+  cdp: await getCDPSession({ page: state.page }),
+})
+console.log(formatStylesAsText(styles))
 ```
 **createDebugger** - set breakpoints, step through code, inspect variables at runtime. Useful for debugging issues that only reproduce in browser, understanding code flow, and inspecting state at specific points. Can pause on exceptions, evaluate expressions in scope, and blackbox framework code. ALWAYS fetch `https://playwriter.dev/resources/debugger-api.md` first.
 ```js
-const cdp = await getCDPSession({ page }); const dbg = createDebugger({ cdp }); await dbg.enable();
-const scripts = await dbg.listScripts({ search: 'app' });
-await dbg.setBreakpoint({ file: scripts[0].url, line: 42 });
+const cdp = await getCDPSession({ page: state.page })
+const dbg = createDebugger({ cdp })
+await dbg.enable()
+const scripts = await dbg.listScripts({ search: 'app' })
+await dbg.setBreakpoint({ file: scripts[0].url, line: 42 })
 // when paused: dbg.inspectLocalVariables(), dbg.stepOver(), dbg.resume()
 ```
 **createEditor** - view and live-edit page scripts and CSS at runtime. Edits are in-memory (persist until reload). Useful for testing quick fixes, searching page scripts with grep, and toggling debug flags. ALWAYS read `https://playwriter.dev/resources/editor-api.md` first.
 ```js
-const cdp = await getCDPSession({ page }); const editor = createEditor({ cdp }); await editor.enable();
-const matches = await editor.grep({ regex: /console\.log/ });
-await editor.edit({ url: matches[0].url, oldString: 'DEBUG = false', newString: 'DEBUG = true' });
+const cdp = await getCDPSession({ page: state.page })
+const editor = createEditor({ cdp })
+await editor.enable()
+const matches = await editor.grep({ regex: /console\.log/ })
+await editor.edit({ url: matches[0].url, oldString: 'DEBUG = false', newString: 'DEBUG = true' })
 ```
 **screenshotWithAccessibilityLabels** - take a screenshot with Vimium-style visual labels overlaid on interactive elements. Shows labels, captures screenshot, then removes labels. The image and accessibility snapshot are automatically included in the response. Can be called multiple times to capture multiple screenshots. Use a timeout of **20 seconds** for complex pages.
-Prefer this for pages with grids, image galleries, maps, or complex visual layouts where spatial position matters. For simple text-heavy pages, `accessibilitySnapshot` with search is faster and uses fewer tokens.
+Prefer this for pages with grids, image galleries, maps, or complex visual layouts where spatial position matters. For simple text-heavy pages, `snapshot` with search is faster and uses fewer tokens.
 ```js
-await screenshotWithAccessibilityLabels({ page });
+await screenshotWithAccessibilityLabels({ page: state.page })
 // Image and accessibility snapshot are automatically included in response
 // Use refs from snapshot to interact with elements
-await page.locator('[id="submit-btn"]').click();
+await state.page.locator('[id="submit-btn"]').click()
 // Can take multiple screenshots in one execution
-await screenshotWithAccessibilityLabels({ page });
-await page.click('button');
-await screenshotWithAccessibilityLabels({ page });
+await screenshotWithAccessibilityLabels({ page: state.page })
+await state.page.click('button')
+await screenshotWithAccessibilityLabels({ page: state.page })
 // Both images are included in the response
 ```
 Labels are color-coded: yellow=links, orange=buttons, coral=inputs, pink=checkboxes, peach=sliders, salmon=menus, amber=tabs.
-**startRecording / stopRecording** - record the page as a video at native FPS (30-60fps). Uses `chrome.tabCapture` in the extension context, so **recording survives page navigation**. Video is saved as mp4.
+**resizeImage** - shrink an image in-place so it consumes fewer tokens when read back into context. `await resizeImage({ input: './screenshot.png' })`. Also accepts `width`, `height`, `maxDimension`, `quality`, `output`.
+**recording.start / recording.stop** - record the page as a video at native FPS (30-60fps). Uses `chrome.tabCapture` so **recording survives page navigation**. Auto-overlays a ghost cursor that follows mouse actions. Requires user to have clicked the Playwriter extension icon on the tab. Auto-resizes viewport to 16:9 (override with `aspectRatio: null`). Auto-stops after 15 min (override with `maxDurationMs`).
-**Note**: Recording requires the user to have clicked the Playwriter extension icon on the tab. This grants `activeTab` permission needed for `chrome.tabCapture`. Recording works on tabs where the icon was clicked - if you need to record a new tab, ask the user to click the icon on it first.
+For demos, use interaction methods (`locator.click()`, `page.mouse.move()`) instead of `goto()` to show realistic cursor motion.
 ```js
-// Start recording - outputPath must be specified upfront
-await startRecording({
-  page,
+await recording.start({
+  page: state.page,
   outputPath: './recording.mp4',
-  frameRate: 30,        // default: 30
-  audio: false,         // default: false (tab audio)
-  videoBitsPerSecond: 2500000  // 2.5 Mbps
-});
+  frameRate: 30, // default
+  audio: false, // default (tab audio)
+  videoBitsPerSecond: 2500000,
+  aspectRatio: { width: 16, height: 9 }, // default, set null to skip
+  maxDurationMs: 15 * 60 * 1000, // default, set 0 to disable
+})
+// Recording survives navigation
+await state.page.click('a')
+await state.page.waitForLoadState('domcontentloaded')
-// Navigate around - recording continues!
-await page.click('a');
-await page.waitForLoadState('domcontentloaded');
-await page.goBack();
+// Stop — save full result including executionTimestamps for createDemoVideo
+state.recordingResult = await recording.stop({ page: state.page })
-// Stop and get result
-const { path, duration, size } = await stopRecording({ page });
-console.log(`Saved ${size} bytes, duration: ${duration}ms`);
+// Other: recording.isRecording({ page }), recording.cancel({ page })
 ```
-Additional recording utilities:
-```js
-// Check if recording is active
-const { isRecording, startedAt } = await isRecording({ page });
+**ghostCursor.show / ghostCursor.hide** - show/hide cursor overlay for screenshots and demos:
-// Cancel recording without saving
-await cancelRecording({ page });
+```js
+await ghostCursor.show({ page: state.page, style: 'minimal' }) // 'minimal', 'dot', 'screenstudio'
+await ghostCursor.hide({ page: state.page })
 ```
-**Key difference from getDisplayMedia**: This approach uses `chrome.tabCapture` which runs in the extension context, not the page. The recording persists across navigations because the extension holds the `MediaRecorder`, not the page's JavaScript context.
+**createDemoVideo** - speeds up idle sections (time between execute() calls) while keeping interactions at normal speed. Requires `ffmpeg`/`ffprobe`. Timestamps are tracked automatically during recording and returned by `recording.stop()`. **Timeout**: can take 60–120+ seconds, always pass `--timeout 120000` or higher.
+```js
+// After recording.stop(), save full result to state (executionTimestamps powers idle detection)
+state.recordingResult = await recording.stop({ page: state.page })
+// In a SEPARATE execute call with --timeout 120000:
+const demoPath = await createDemoVideo({
+  recordingPath: state.recordingResult.path,
+  durationMs: state.recordingResult.duration,
+  executionTimestamps: state.recordingResult.executionTimestamps,
+  speed: 6, // default 6x for idle sections
+})
+```
 ## pinned elements
 Users can right-click → "Copy Playwriter Element Reference" to store elements in `globalThis.playwriterPinnedElem1` (increments for each pin). The reference is copied to clipboard:
 ```js
-const el = await page.evaluateHandle(() => globalThis.playwriterPinnedElem1);
-await el.click();
+const el = await state.page.evaluateHandle(() => globalThis.playwriterPinnedElem1)
+await el.click()
 ```
 ## taking screenshots
@@ -812,24 +838,28 @@ await el.click();
 Always use `scale: 'css'` to avoid 2-4x larger images on high-DPI displays:
 ```js
-await page.screenshot({ path: 'shot.png', scale: 'css' });
+await state.page.screenshot({ path: 'shot.png', scale: 'css' })
 ```
-If you want to read back the image file into context make sure to resize it first, scaling down the image to make sure max size is 1500px. for example with `sips --resampleHeightWidthMax 1500 input.png --out output.png` on macOS.
+If you want to read back the image file into context, resize it first so it consumes fewer tokens:
+```js
+await resizeImage({ input: './shot.png' })
+```
 ## page.evaluate
 Code inside `page.evaluate()` runs in the browser - use plain JavaScript only, no TypeScript syntax. Return values and log outside (console.log inside evaluate runs in browser, not visible):
 ```js
-const title = await page.evaluate(() => document.title);
-console.log('Title:', title);
+const title = await state.page.evaluate(() => document.title)
+console.log('Title:', title)
-const info = await page.evaluate(() => ({
-    url: location.href,
-    buttons: document.querySelectorAll('button').length,
-}));
-console.log(info);
+const info = await state.page.evaluate(() => ({
+  url: location.href,
+  buttons: document.querySelectorAll('button').length,
+}))
+console.log(info)
 ```
 ## loading files
@@ -837,7 +867,9 @@ console.log(info);
 Fill inputs with file content:
 ```js
-const fs = require('node:fs'); const content = fs.readFileSync('./data.txt', 'utf-8'); await page.locator('textarea').fill(content);
+const fs = require('node:fs')
+const content = fs.readFileSync('./data.txt', 'utf-8')
+await state.page.locator('textarea').fill(content)
 ```
 ## network interception
@@ -845,103 +877,151 @@ const fs = require('node:fs'); const content = fs.readFileSync('./data.txt', 'ut
 For scraping or reverse-engineering APIs, intercept network requests instead of scrolling DOM. Store in `state` to analyze across calls:
 ```js
-state.requests = []; state.responses = [];
-page.on('request', req => { if (req.url().includes('/api/')) state.requests.push({ url: req.url(), method: req.method(), headers: req.headers() }); });
-page.on('response', async res => { if (res.url().includes('/api/')) { try { state.responses.push({ url: res.url(), status: res.status(), body: await res.json() }); } catch {} } });
+state.requests = []
+state.responses = []
+state.page.on('request', (req) => {
+  if (req.url().includes('/api/')) state.requests.push({ url: req.url(), method: req.method(), headers: req.headers() })
+})
+state.page.on('response', async (res) => {
+  if (res.url().includes('/api/')) {
+    try {
+      state.responses.push({ url: res.url(), status: res.status(), body: await res.json() })
+    } catch {}
+  }
+})
 ```
 Then trigger actions (scroll, click, navigate) and analyze captured data:
 ```js
-console.log('Captured', state.responses.length, 'API calls');
-state.responses.forEach(r => console.log(r.status, r.url.slice(0, 80)));
+console.log('Captured', state.responses.length, 'API calls')
+state.responses.forEach((r) => console.log(r.status, r.url.slice(0, 80)))
 ```
 Inspect a specific response to understand schema:
 ```js
-const resp = state.responses.find(r => r.url.includes('users'));
-console.log(JSON.stringify(resp.body, null, 2).slice(0, 2000));
+const resp = state.responses.find((r) => r.url.includes('users'))
+console.log(JSON.stringify(resp.body, null, 2).slice(0, 2000))
 ```
 Replay API directly (useful for pagination):
 ```js
-const { url, headers } = state.requests.find(r => r.url.includes('feed'));
-const data = await page.evaluate(async ({ url, headers }) => { const res = await fetch(url, { headers }); return res.json(); }, { url, headers });
-console.log(data);
+const { url, headers } = state.requests.find((r) => r.url.includes('feed'))
+const data = await state.page.evaluate(
+  async ({ url, headers }) => {
+    const res = await fetch(url, { headers })
+    return res.json()
+  },
+  { url, headers },
+)
+console.log(data)
 ```
-Clean up listeners when done: `page.removeAllListeners('request'); page.removeAllListeners('response');`
+Clean up listeners when done: `state.page.removeAllListeners('request'); state.page.removeAllListeners('response');`
-## debugging web apps
+## computer use (low-level mouse/keyboard)
-When debugging why a web app isn't working (e.g., content not rendering, API errors, state issues), use these techniques **before** resorting to screenshots:
-**1. Console logs** — use `getLatestLogs` to check for errors:
+### clicking
 ```js
-const errors = await getLatestLogs({ page, search: /error|fail/i, count: 20 });
-const appLogs = await getLatestLogs({ page, search: /myComponent|state/i });
+// Preferred: by locator (stable, auto-waits, no coordinates needed)
+await state.page.locator('button[name="Submit"]').click()
+await state.page.locator('text=Login').click({ button: 'right' })
+await state.page.locator('text=Login').dblclick()
+await state.page
+  .locator('a')
+  .first()
+  .click({ modifiers: ['Meta'] }) // cmd+click opens new tab
+// By coordinates (when locators aren't available, e.g. canvas, maps, custom widgets)
+await state.page.mouse.click(450, 320) // left click
+await state.page.mouse.click(450, 320, { button: 'right' }) // right click
+await state.page.mouse.dblclick(450, 320) // double click
+await state.page.mouse.click(450, 320, { clickCount: 3 }) // triple click
+await state.page.mouse.click(450, 320, { modifiers: ['Shift'] }) // shift+click
 ```
-**2. DOM inspection via evaluate** — check content directly without screenshots:
+### hover
 ```js
-const info = await page.evaluate(() => {
-  const msgs = document.querySelectorAll('.message');
-  return Array.from(msgs).map(m => ({
-    text: m.textContent?.slice(0, 200),
-    visible: m.offsetHeight > 0,
-  }));
-});
-console.log(JSON.stringify(info, null, 2));
+await state.page.locator('.tooltip-trigger').hover() // by locator (preferred)
+await state.page.mouse.move(450, 320) // by coordinates
 ```
-**3. Combine snapshot + logs for full picture:**
+### scroll
 ```js
-await page.keyboard.press('Enter');
-await page.waitForTimeout(2000);
+// By locator (preferred)
+await state.page.locator('#footer').scrollIntoViewIfNeeded()
-const snap = await accessibilitySnapshot({ page, search: /dialog|error|message/ });
-const logs = await getLatestLogs({ page, search: /error/i, count: 10 });
-console.log('UI:', snap);
-console.log('Logs:', logs);
+// By pixel (for canvas, maps, infinite scroll)
+await state.page.mouse.wheel(0, 300) // scroll down 300px
+await state.page.mouse.wheel(0, -300) // scroll up
+await state.page.mouse.wheel(300, 0) // scroll right
+await state.page.mouse.wheel(-300, 0) // scroll left
+// Scroll at a specific position
+await state.page.mouse.move(450, 320)
+await state.page.mouse.wheel(0, 500)
+// Scroll inside a container
+await state.page.locator('.scrollable-list').evaluate((el) => {
+  el.scrollTop += 500
+})
 ```
-## capabilities
+### drag
-Examples of what playwriter can do:
-- Monitor console logs while user reproduces a bug
-- Intercept network requests to reverse-engineer APIs and build SDKs
-- Scrape data by replaying paginated API calls instead of scrolling DOM
-- Get accessibility snapshot to find elements, then automate interactions
-- Use visual screenshots to understand complex layouts like image grids, dashboards, or maps
-- Debug issues by collecting logs and controlling the page simultaneously
-- Handle popups, downloads, iframes, and dialog boxes
-- Record videos of browser sessions that survive page navigation
+```js
+// By locator (preferred)
+await state.page.locator('#item').dragTo(state.page.locator('#target'))
+// By coordinates (for canvas, sliders, custom drag targets)
+await state.page.mouse.move(100, 200)
+await state.page.mouse.down()
+await state.page.mouse.move(400, 500, { steps: 10 }) // steps for smooth drag
+await state.page.mouse.up()
+```
-## Ghost Browser integration
+**Freehand drawing, annotation widgets, and canvas tools** use this same `mouse.down → move → up` pattern. If a widget expects a drawn stroke (paint tools, annotation overlays, range sliders, timeline scrubbers), always use held-mouse motion — not `mouse.click()`:
-Playwriter supports [Ghost Browser](https://ghostbrowser.com/) for multi-identity automation. When running in Ghost Browser, the `chrome` object exposes APIs to control identities, proxies, and sessions - useful for managing multiple accounts, rotating proxies, or isolated cookie sessions.
+```js
+// Draw a stroke across a canvas or annotation layer
+await state.page.mouse.move(startX, startY)
+await state.page.mouse.down()
+await state.page.mouse.move(endX, endY, { steps: 15 }) // steps = smoother stroke
+await state.page.mouse.up()
+await state.page.waitForTimeout(500) // let the widget process the stroke
+```
+### key hold / release / repeat
 ```js
-// List identities and open tabs in different ones
-const identities = await chrome.projects.getIdentitiesList();
-await chrome.ghostPublicAPI.openTab({ url: 'https://reddit.com', identity: identities[0].id });
+// Hold modifier while pressing another key
+await state.page.keyboard.down('Shift')
+await state.page.keyboard.press('ArrowDown')
+await state.page.keyboard.up('Shift')
-// Assign proxies per tab or identity
-const proxies = await chrome.ghostProxies.getList();
-await chrome.ghostProxies.setTabProxy(tabId, proxies[0].id);
+// Repeat a key
+for (let i = 0; i < 5; i++) await state.page.keyboard.press('ArrowDown')
 ```
-For complete API reference with all methods, types, and examples, read:
-`extension/src/ghost-browser-api.d.ts`
+### resize viewport
-Note: Only works in Ghost Browser. In regular Chrome, calls fail with "not available".
+```js
+await state.page.setViewportSize({ width: 1280, height: 720 })
+```
-## debugging playwriter issues
+### region screenshot (zoom equivalent)
+```js
+await state.page.screenshot({ path: 'region.png', scale: 'css', clip: { x: 100, y: 200, width: 400, height: 300 } })
+```
+Prefer locator-based actions over coordinates — locators are stable across scroll/resize, auto-wait for elements, and don't require screenshot round-trips that burn ~800 image tokens per cycle.
+## Ghost Browser integration
-if some internal critical error happens you can read your own relay ws logs to understand the issue, it will show logs from extension, mcp and ws server together. then you can create a gh issue using `gh issue create -R remorses/playwriter --title title --body body`. ask for user confirmation before doing this.
+When running in [Ghost Browser](https://ghostbrowser.com/), the `chrome` object exposes APIs for multi-identity automation (identities, proxies, sessions). See `extension/src/ghost-browser-api.d.ts` for full API reference. Only works in Ghost Browser — calls fail in regular Chrome.