playwriter 0.0.63 → 0.0.89

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (223) hide show
  1. package/dist/a11y-client.js +18 -8
  2. package/dist/aria-snapshot.d.ts +41 -3
  3. package/dist/aria-snapshot.d.ts.map +1 -1
  4. package/dist/aria-snapshot.js +134 -55
  5. package/dist/aria-snapshot.js.map +1 -1
  6. package/dist/aria-snapshot.test.js +5 -2
  7. package/dist/aria-snapshot.test.js.map +1 -1
  8. package/dist/aria-snapshot.unit.test.js +83 -41
  9. package/dist/aria-snapshot.unit.test.js.map +1 -1
  10. package/dist/assets/cursors/screen-studio/pointer-macos-tahoe-data-url.d.ts +5 -0
  11. package/dist/assets/cursors/screen-studio/pointer-macos-tahoe-data-url.d.ts.map +1 -0
  12. package/dist/assets/cursors/screen-studio/pointer-macos-tahoe-data-url.js +5 -0
  13. package/dist/assets/cursors/screen-studio/pointer-macos-tahoe-data-url.js.map +1 -0
  14. package/dist/bippy.js +1 -1
  15. package/dist/cdp-log.d.ts +1 -1
  16. package/dist/cdp-log.d.ts.map +1 -1
  17. package/dist/cdp-log.js +1 -1
  18. package/dist/cdp-log.js.map +1 -1
  19. package/dist/cdp-relay.d.ts.map +1 -1
  20. package/dist/cdp-relay.js +492 -298
  21. package/dist/cdp-relay.js.map +1 -1
  22. package/dist/cdp-session.d.ts.map +1 -1
  23. package/dist/cdp-session.js.map +1 -1
  24. package/dist/cdp-types.d.ts.map +1 -1
  25. package/dist/cdp-types.js +7 -7
  26. package/dist/cdp-types.js.map +1 -1
  27. package/dist/clean-html.d.ts.map +1 -1
  28. package/dist/clean-html.js +4 -5
  29. package/dist/clean-html.js.map +1 -1
  30. package/dist/cli.js +45 -27
  31. package/dist/cli.js.map +1 -1
  32. package/dist/create-logger.d.ts.map +1 -1
  33. package/dist/create-logger.js +3 -1
  34. package/dist/create-logger.js.map +1 -1
  35. package/dist/debugger-examples-types.d.ts.map +1 -1
  36. package/dist/debugger.d.ts.map +1 -1
  37. package/dist/debugger.js +1 -3
  38. package/dist/debugger.js.map +1 -1
  39. package/dist/diff-utils.d.ts.map +1 -1
  40. package/dist/diff-utils.js +1 -4
  41. package/dist/diff-utils.js.map +1 -1
  42. package/dist/editor-api.md +12 -2
  43. package/dist/editor-examples.d.ts +1 -1
  44. package/dist/editor-examples.d.ts.map +1 -1
  45. package/dist/editor-examples.js +1 -1
  46. package/dist/editor-examples.js.map +1 -1
  47. package/dist/editor.d.ts +1 -1
  48. package/dist/editor.d.ts.map +1 -1
  49. package/dist/editor.js +1 -1
  50. package/dist/editor.js.map +1 -1
  51. package/dist/executor.d.ts +26 -3
  52. package/dist/executor.d.ts.map +1 -1
  53. package/dist/executor.js +297 -64
  54. package/dist/executor.js.map +1 -1
  55. package/dist/executor.unit.test.js +38 -1
  56. package/dist/executor.unit.test.js.map +1 -1
  57. package/dist/extension-connection.test.js +139 -36
  58. package/dist/extension-connection.test.js.map +1 -1
  59. package/dist/ffmpeg.d.ts +148 -0
  60. package/dist/ffmpeg.d.ts.map +1 -0
  61. package/dist/ffmpeg.js +523 -0
  62. package/dist/ffmpeg.js.map +1 -0
  63. package/dist/ghost-browser.d.ts.map +1 -1
  64. package/dist/ghost-browser.js.map +1 -1
  65. package/dist/ghost-cursor-client.js +287 -0
  66. package/dist/ghost-cursor.d.ts +27 -0
  67. package/dist/ghost-cursor.d.ts.map +1 -0
  68. package/dist/ghost-cursor.js +63 -0
  69. package/dist/ghost-cursor.js.map +1 -0
  70. package/dist/htmlrewrite.d.ts.map +1 -1
  71. package/dist/htmlrewrite.js +17 -55
  72. package/dist/htmlrewrite.js.map +1 -1
  73. package/dist/htmlrewrite.test.js.map +1 -1
  74. package/dist/kill-port.d.ts.map +1 -1
  75. package/dist/kill-port.js +1 -3
  76. package/dist/kill-port.js.map +1 -1
  77. package/dist/locator-selector.test.d.ts +2 -0
  78. package/dist/locator-selector.test.d.ts.map +1 -0
  79. package/dist/locator-selector.test.js +96 -0
  80. package/dist/locator-selector.test.js.map +1 -0
  81. package/dist/mcp-client.js.map +1 -1
  82. package/dist/mcp.d.ts.map +1 -1
  83. package/dist/mcp.js +8 -3
  84. package/dist/mcp.js.map +1 -1
  85. package/dist/on-mouse-action.test.d.ts +2 -0
  86. package/dist/on-mouse-action.test.d.ts.map +1 -0
  87. package/dist/on-mouse-action.test.js +155 -0
  88. package/dist/on-mouse-action.test.js.map +1 -0
  89. package/dist/page-markdown.js +4 -4
  90. package/dist/page-markdown.js.map +1 -1
  91. package/dist/prompt.md +450 -377
  92. package/dist/protocol.d.ts +4 -0
  93. package/dist/protocol.d.ts.map +1 -1
  94. package/dist/readability.js +16 -2
  95. package/dist/recording-ghost-cursor.d.ts +41 -0
  96. package/dist/recording-ghost-cursor.d.ts.map +1 -0
  97. package/dist/recording-ghost-cursor.js +79 -0
  98. package/dist/recording-ghost-cursor.js.map +1 -0
  99. package/dist/recording-relay.d.ts.map +1 -1
  100. package/dist/recording-relay.js +8 -8
  101. package/dist/recording-relay.js.map +1 -1
  102. package/dist/relay-client.d.ts +17 -4
  103. package/dist/relay-client.d.ts.map +1 -1
  104. package/dist/relay-client.js +45 -11
  105. package/dist/relay-client.js.map +1 -1
  106. package/dist/relay-core.test.d.ts.map +1 -1
  107. package/dist/relay-core.test.js +515 -26
  108. package/dist/relay-core.test.js.map +1 -1
  109. package/dist/relay-navigation.test.d.ts.map +1 -1
  110. package/dist/relay-navigation.test.js +169 -31
  111. package/dist/relay-navigation.test.js.map +1 -1
  112. package/dist/relay-session.test.d.ts.map +1 -1
  113. package/dist/relay-session.test.js +113 -65
  114. package/dist/relay-session.test.js.map +1 -1
  115. package/dist/relay-state.d.ts +158 -0
  116. package/dist/relay-state.d.ts.map +1 -0
  117. package/dist/relay-state.js +306 -0
  118. package/dist/relay-state.js.map +1 -0
  119. package/dist/relay-state.test.d.ts +2 -0
  120. package/dist/relay-state.test.d.ts.map +1 -0
  121. package/dist/relay-state.test.js +472 -0
  122. package/dist/relay-state.test.js.map +1 -0
  123. package/dist/scoped-fs.d.ts.map +1 -1
  124. package/dist/scoped-fs.js.map +1 -1
  125. package/dist/screen-recording.d.ts +66 -4
  126. package/dist/screen-recording.d.ts.map +1 -1
  127. package/dist/screen-recording.js +150 -13
  128. package/dist/screen-recording.js.map +1 -1
  129. package/dist/screen-recording.test.d.ts +2 -0
  130. package/dist/screen-recording.test.d.ts.map +1 -0
  131. package/dist/screen-recording.test.js +102 -0
  132. package/dist/screen-recording.test.js.map +1 -0
  133. package/dist/selector-generator.js +1 -1
  134. package/dist/snapshot-tools.test.js +71 -28
  135. package/dist/snapshot-tools.test.js.map +1 -1
  136. package/dist/start-relay-server.d.ts +1 -1
  137. package/dist/start-relay-server.d.ts.map +1 -1
  138. package/dist/start-relay-server.js +1 -1
  139. package/dist/start-relay-server.js.map +1 -1
  140. package/dist/styles-api.md +8 -1
  141. package/dist/styles-examples.d.ts +1 -1
  142. package/dist/styles-examples.d.ts.map +1 -1
  143. package/dist/styles-examples.js +1 -1
  144. package/dist/styles-examples.js.map +1 -1
  145. package/dist/styles.d.ts.map +1 -1
  146. package/dist/styles.js +1 -3
  147. package/dist/styles.js.map +1 -1
  148. package/dist/test-declarations.d.ts.map +1 -1
  149. package/dist/test-utils.d.ts +1 -1
  150. package/dist/test-utils.d.ts.map +1 -1
  151. package/dist/test-utils.js +7 -5
  152. package/dist/test-utils.js.map +1 -1
  153. package/dist/utils.d.ts.map +1 -1
  154. package/dist/utils.js.map +1 -1
  155. package/dist/wait-for-page-load.d.ts.map +1 -1
  156. package/dist/wait-for-page-load.js +1 -1
  157. package/dist/wait-for-page-load.js.map +1 -1
  158. package/package.json +4 -3
  159. package/src/a11y-client.ts +5 -4
  160. package/src/aria-snapshot.test.ts +5 -2
  161. package/src/aria-snapshot.ts +306 -117
  162. package/src/aria-snapshot.unit.test.ts +199 -141
  163. package/src/aria-snapshots/github-interactive.txt +2 -0
  164. package/src/aria-snapshots/github-raw.txt +5 -1
  165. package/src/aria-snapshots/hackernews-interactive.txt +238 -241
  166. package/src/aria-snapshots/hackernews-raw.txt +265 -269
  167. package/src/assets/aria-labels-example.png +0 -0
  168. package/src/assets/aria-labels-github.png +0 -0
  169. package/src/assets/aria-labels-hacker-news.png +0 -0
  170. package/src/assets/aria-labels-old-reddit.png +0 -0
  171. package/src/assets/cursors/screen-studio/pointer-macos-tahoe-data-url.ts +5 -0
  172. package/src/assets/cursors/screen-studio/pointer-macos-tahoe.svg +18 -0
  173. package/src/cdp-log.ts +4 -1
  174. package/src/cdp-relay.ts +1059 -737
  175. package/src/cdp-session.ts +12 -3
  176. package/src/cdp-types.ts +51 -51
  177. package/src/clean-html.ts +4 -5
  178. package/src/cli.ts +82 -55
  179. package/src/create-logger.ts +5 -3
  180. package/src/debugger-examples-types.ts +4 -1
  181. package/src/debugger.ts +1 -5
  182. package/src/diff-utils.ts +2 -5
  183. package/src/editor-examples.ts +11 -1
  184. package/src/editor.ts +10 -2
  185. package/src/executor.ts +374 -73
  186. package/src/executor.unit.test.ts +48 -1
  187. package/src/extension-connection.test.ts +612 -488
  188. package/src/ffmpeg.ts +769 -0
  189. package/src/ghost-browser.ts +4 -6
  190. package/src/ghost-cursor-client.ts +369 -0
  191. package/src/ghost-cursor.ts +110 -0
  192. package/src/htmlrewrite.test.ts +6 -2
  193. package/src/htmlrewrite.ts +348 -386
  194. package/src/kill-port.ts +1 -3
  195. package/src/locator-selector.test.ts +115 -0
  196. package/src/mcp-client.ts +1 -1
  197. package/src/mcp.ts +21 -15
  198. package/src/on-mouse-action.test.ts +196 -0
  199. package/src/page-markdown.ts +7 -7
  200. package/src/protocol.ts +73 -57
  201. package/src/recording-ghost-cursor.ts +113 -0
  202. package/src/recording-relay.ts +20 -12
  203. package/src/relay-client.ts +85 -18
  204. package/src/relay-core.test.ts +1117 -578
  205. package/src/relay-navigation.test.ts +648 -483
  206. package/src/relay-session.test.ts +984 -929
  207. package/src/relay-state.test.ts +570 -0
  208. package/src/relay-state.ts +497 -0
  209. package/src/resource.md +21 -49
  210. package/src/scoped-fs.ts +9 -3
  211. package/src/screen-recording.test.ts +111 -0
  212. package/src/screen-recording.ts +256 -31
  213. package/src/skill.md +476 -396
  214. package/src/snapshot-tools.test.ts +580 -528
  215. package/src/snapshots/shadcn-ui-accessibility-full.md +8 -8
  216. package/src/snapshots/shadcn-ui-accessibility-interactive.md +8 -8
  217. package/src/start-relay-server.ts +14 -11
  218. package/src/styles-examples.ts +8 -1
  219. package/src/styles.ts +20 -21
  220. package/src/test-declarations.ts +6 -6
  221. package/src/test-utils.ts +104 -91
  222. package/src/utils.ts +2 -1
  223. package/src/wait-for-page-load.ts +6 -1
package/dist/prompt.md CHANGED
@@ -2,350 +2,323 @@
2
2
 
3
3
  Control user's Chrome browser via playwright code snippets. Prefer single-line code with semicolons between statements. Use playwriter immediately without waiting for user actions; only if you get "extension is not connected" or "no browser tabs have Playwriter enabled" should you ask the user to click the playwriter extension icon on the target tab.
4
4
 
5
+ **When to use playwriter instead of webfetch/curl:** If a website is JS-heavy (SPAs like Instagram, Twitter, Facebook, etc.), has cookie consent modals, login walls, lazy-loaded content, carousels, or infinite scroll — **always use playwriter**. Simple fetch/webfetch will return an empty HTML shell with no content. Do NOT waste time trying curl, webfetch, or parsing raw HTML from JS-rendered sites. Go straight to playwriter: navigate with a real browser, dismiss modals, then extract what you need via `page.evaluate()` or network interception.
6
+
5
7
  **If Chrome is not running**, the extension can't connect. Start Chrome from the command line before retrying:
6
8
 
7
9
  ```bash
8
10
  # macOS
9
- open -a "Google Chrome"
11
+ open -a "Google Chrome" --args --profile-directory=Default
10
12
 
11
13
  # Linux
12
- google-chrome &
14
+ google-chrome --profile-directory=Default &
13
15
 
14
16
  # Windows (cmd)
15
- start chrome.exe
17
+ start chrome.exe --profile-directory=Default
16
18
 
17
19
  # Windows (PowerShell)
18
- Start-Process chrome.exe
20
+ Start-Process chrome.exe -ArgumentList '--profile-directory=Default'
19
21
  ```
20
22
 
21
23
  To also enable automatic tab capture for screen recording (no manual extension click needed), add the `--allowlisted-extension-id` and `--auto-accept-this-tab-capture` flags:
22
24
 
23
25
  ```bash
24
26
  # macOS
25
- open -a "Google Chrome" --args --allowlisted-extension-id=jfeammnjpkecdekppnclgkkffahnhfhe --auto-accept-this-tab-capture
27
+ open -a "Google Chrome" --args --profile-directory=Default --allowlisted-extension-id=jfeammnjpkecdekppnclgkkffahnhfhe --auto-accept-this-tab-capture
26
28
 
27
29
  # Linux
28
- google-chrome --allowlisted-extension-id=jfeammnjpkecdekppnclgkkffahnhfhe --auto-accept-this-tab-capture &
30
+ google-chrome --profile-directory=Default --allowlisted-extension-id=jfeammnjpkecdekppnclgkkffahnhfhe --auto-accept-this-tab-capture &
29
31
 
30
32
  # Windows
31
- start chrome.exe --allowlisted-extension-id=jfeammnjpkecdekppnclgkkffahnhfhe --auto-accept-this-tab-capture
33
+ start chrome.exe --profile-directory=Default --allowlisted-extension-id=jfeammnjpkecdekppnclgkkffahnhfhe --auto-accept-this-tab-capture
32
34
  ```
33
35
 
34
36
  You can collaborate with the user - they can help with captchas, difficult elements, or reproducing bugs.
35
37
 
36
38
  ## context variables
37
39
 
38
- - `state` - object persisted between calls **within your session**. Each session has its own isolated state. Use to store pages, data, listeners (e.g., `state.myPage = await context.newPage()`)
40
+ - `state` - object persisted between calls **within your session**. Each session has its own isolated state. Use to store pages, data, listeners (e.g., `state.page = await context.newPage()`)
39
41
  - `page` - a default page (may be shared with other agents). Prefer creating your own page and storing it in `state` (see "working with pages")
40
42
  - `context` - browser context, access all pages via `context.pages()`
41
- - `require` - load Node.js modules like fs
43
+ - `require` - load Node.js modules (e.g., `const fs = require('node:fs')`). ESM `import` is not available in the sandbox
42
44
  - Node.js globals: `setTimeout`, `setInterval`, `fetch`, `URL`, `Buffer`, `crypto`, etc.
43
45
 
44
46
  **Important:** `state` is **session-isolated** but pages are **shared** across all sessions. See "working with pages" for how to avoid interference.
45
47
 
46
48
  ## rules
47
49
 
48
- - **Create your own page**: see "working with pages" — always create and store your own page in `state`, never use the default `page` for automation
50
+ - **Initialize state.page first**: see "working with pages" — at the start of a task, assign `state.page` (reuse `about:blank` or create one) and use `state.page` for all automation steps.
49
51
  - **Multiple calls**: use multiple execute calls for complex logic - helps understand intermediate state and isolate which action failed
50
52
  - **Never close**: never call `browser.close()` or `context.close()`. Only close pages you created or if user asks
51
53
  - **No bringToFront**: never call unless user asks - it's disruptive and unnecessary, you can interact with background pages
52
54
  - **Check state after actions**: always verify page state after clicking/submitting (see next section)
53
- - **Clean up listeners**: call `page.removeAllListeners()` at end of message to prevent leaks
54
- - **CDP sessions**: use `getCDPSession({ page })` not `page.context().newCDPSession()` - NEVER use `newCDPSession()` method, it doesn't work through playwriter relay
55
- - **Wait for load**: use `page.waitForLoadState('domcontentloaded')` not `page.waitForEvent('load')` - waitForEvent times out if already loaded
56
- - **Avoid timeouts**: prefer proper waits over `page.waitForTimeout()` - there are better ways to wait for elements
55
+ - **Clean up listeners**: call `state.page.removeAllListeners()` at end of message to prevent leaks
56
+ - **CDP sessions**: use `getCDPSession({ page: state.page })` not `state.page.context().newCDPSession()` - NEVER use `newCDPSession()` method, it doesn't work through playwriter relay
57
+ - **Wait for load**: use `state.page.waitForLoadState('domcontentloaded')` not `state.page.waitForEvent('load')` - waitForEvent times out if already loaded
58
+ - **Minimize timeouts**: prefer proper waits (`waitForSelector`, `waitForPageLoad`) over `state.page.waitForTimeout()`. Short timeouts (1-2s) are acceptable for non-deterministic events like popups, animations, or tab opens where no specific selector is available
59
+ - **Snapshot before screenshot**: always use `snapshot()` first to understand page state (text-based, fast, cheap). Only use `screenshot` when you specifically need visual/spatial information. Never take a screenshot just to check if a page loaded or to read text content — snapshot gives you that instantly without burning image tokens
60
+ - **Snapshot replaces page.evaluate() for inspection**: do NOT write `page.evaluate()` calls to manually query class names, bounding boxes, child counts, or visibility flags. `snapshot()` already shows every interactive element with its text, role, and a ready-to-use locator. If you catch yourself writing `document.querySelector` or `getBoundingClientRect` inside evaluate — stop and use `snapshot()` instead. Reserve `page.evaluate()` for actions that modify page state (e.g., `localStorage.clear()`, scroll manipulation) or extract non-DOM data (e.g., `window.__CONFIG__`)
57
61
 
58
62
  ## interaction feedback loop
59
63
 
60
- Every browser interaction should follow a **observe → act → observe** loop. After every action, you must check its result before proceeding. Never chain multiple actions blindly — the page may not have responded as expected.
61
-
62
- **Core loop:**
64
+ Every browser interaction must follow **observe → act → observe**. Never chain multiple actions blindly.
63
65
 
64
- 1. **Open page** — get or create your page and navigate to the target URL
65
- 2. **Observe** — take an accessibility snapshot to understand the current state
66
- 3. **Update priors** — read the snapshot, identify the element to interact with
66
+ 1. **Open page** — get or create your page, navigate to URL
67
+ 2. **Observe** — print `state.page.url()` + `snapshot()`. Always print URL pages can redirect unexpectedly.
68
+ 3. **Check** — if page isn't ready (loading, wrong URL, content missing), wait and observe again
67
69
  4. **Act** — perform one action (click, type, submit)
68
- 5. **Observe again** — take another snapshot to verify the action's effect
69
- 6. **Repeat** — continue from step 3 until the task is complete
70
-
71
- ```
72
- ┌─────────────────────────────────────────────┐
73
- │ open page + goto URL │
74
- └──────────────────┬──────────────────────────┘
75
-
76
- ┌────────────────┐
77
- │ observe │◄─────────────────┐
78
- │ (snapshot) │ │
79
- └───────┬────────┘ │
80
- ▼ │
81
- ┌────────────────┐ │
82
- │ update priors │ │
83
- │ (read result) │ │
84
- └───────┬────────┘ │
85
- ▼ │
86
- ┌────────────────┐ │
87
- │ act │ │
88
- │ (click/type) │──────────────────┘
89
- └────────────────┘
90
- ```
91
-
92
- **Example: opening a Framer plugin via the command palette**
93
-
94
- Each step is a separate execute call. Notice how every action is followed by a snapshot to verify what happened:
70
+ 5. **Observe again** — print URL + snapshot to verify the action's effect
71
+ 6. **Repeat** from step 3 until task is complete
95
72
 
96
73
  ```js
97
- // 1. Open page and observe
98
- state.myPage = context.pages().find(p => p.url() === 'about:blank') ?? await context.newPage();
99
- await state.myPage.goto('https://framer.com/projects/my-project', { waitUntil: 'domcontentloaded' });
100
- await accessibilitySnapshot({ page: state.myPage }).then(console.log)
74
+ // Each step should be a separate execute call:
75
+ // Step 1: navigate + observe
76
+ state.page = context.pages().find((p) => p.url() === 'about:blank') ?? (await context.newPage())
77
+ await state.page.goto('https://example.com', { waitUntil: 'domcontentloaded' })
78
+ console.log('URL:', state.page.url())
79
+ await snapshot({ page: state.page }).then(console.log)
101
80
  ```
102
81
 
103
82
  ```js
104
- // 2. Act: open command palette → observe result
105
- await state.myPage.keyboard.press('Meta+k');
106
- await accessibilitySnapshot({ page: state.myPage, search: /dialog|Search/ }).then(console.log)
83
+ // Step 2: act + observe
84
+ await state.page.locator('button:has-text("Submit")').click()
85
+ console.log('URL:', state.page.url())
86
+ await snapshot({ page: state.page }).then(console.log)
107
87
  ```
108
88
 
109
- ```js
110
- // 3. Act: type search query → observe result
111
- await state.myPage.keyboard.type('MCP');
112
- await accessibilitySnapshot({ page: state.myPage, search: /MCP/ }).then(console.log)
113
- ```
89
+ If nothing changed after an action, try `waitForPageLoad({ page: state.page, timeout: 3000 })` or you may have clicked the wrong element.
114
90
 
115
- ```js
116
- // 4. Act: press Enter → observe plugin loaded
117
- await state.myPage.keyboard.press('Enter');
118
- await state.myPage.waitForTimeout(1000);
119
- const frame = state.myPage.frames().find(f => f.url().includes('plugins.framercdn.com'));
120
- await accessibilitySnapshot({ page: state.myPage, frame: frame || undefined }).then(console.log)
121
- ```
91
+ **Deeper observation** — when snapshots aren't enough to understand what happened, combine multiple channels:
122
92
 
123
- **Other ways to observe action results:**
93
+ ```js
94
+ // Check console for errors after an action
95
+ const errors = await getLatestLogs({ page: state.page, search: /error|fail/i, count: 20 })
124
96
 
125
- Snapshots are the primary feedback mechanism, but some actions have side effects that are better observed through other channels:
97
+ // Combine snapshot + logs for full picture
98
+ const snap = await snapshot({ page: state.page, search: /dialog|error|message/ })
99
+ const logs = await getLatestLogs({ page: state.page, search: /error/i, count: 10 })
100
+ console.log('UI:', snap)
101
+ console.log('Logs:', logs)
102
+ ```
126
103
 
127
- - **Console logs** check for errors or app state after an action:
128
- ```js
129
- await getLatestLogs({ page, search: /error|fail/i, count: 20 })
130
- ```
131
- - **Network requests** — verify API calls were made after a form submit or button click:
132
- ```js
133
- page.on('response', async res => { if (res.url().includes('/api/')) { console.log(res.status(), res.url()); } });
134
- ```
135
- - **URL changes** — confirm navigation happened:
136
- ```js
137
- console.log(page.url())
138
- ```
139
- - **Screenshots** — only when you need to verify visual layout (CSS, spatial positioning, colors). Snapshots are always preferred for content verification.
104
+ Use `getLatestLogs()` for console errors, `state.page.url()` for navigation, screenshots only for visual layout issues.
140
105
 
141
106
  ## common mistakes to avoid
142
107
 
143
108
  **1. Not verifying actions succeeded**
144
109
  Always check page state after important actions (form submissions, uploads, typing). Your mental model can diverge from actual browser state:
110
+
145
111
  ```js
146
- await page.keyboard.type('my text');
147
- await accessibilitySnapshot({ page, search: /my text/ })
112
+ await state.page.keyboard.type('my text')
113
+ await snapshot({ page: state.page, search: /my text/ })
148
114
  // If verifying visual layout specifically, use screenshotWithAccessibilityLabels instead
149
115
  ```
150
116
 
151
117
  **2. Assuming paste/upload worked**
152
118
  Clipboard paste (`Meta+v`) can silently fail. For file uploads, prefer file input:
119
+
153
120
  ```js
154
121
  // Reliable: use file input
155
- const fileInput = page.locator('input[type="file"]').first();
156
- await fileInput.setInputFiles('/path/to/image.png');
122
+ const fileInput = state.page.locator('input[type="file"]').first()
123
+ await fileInput.setInputFiles('/path/to/image.png')
157
124
 
158
125
  // Unreliable: clipboard paste may silently fail, need to focus textarea first for example
159
- await page.keyboard.press('Meta+v'); // always verify with screenshot!
126
+ await state.page.keyboard.press('Meta+v') // always verify with screenshot!
160
127
  ```
161
128
 
162
129
  **3. Using stale locators from old snapshots**
163
- Locators (especially ones with `>> nth=`) can change when the page updates. Always get a fresh snapshot before clicking:
164
- ```js
165
- // BAD: using ref from minutes ago
166
- await page.locator('[id="old-id"]').click(); // element may have changed
130
+ Locators (especially ones with `>> nth=`) can change when the page updates. Always get a fresh snapshot before clicking, then immediately use locators from that output:
167
131
 
168
- // GOOD: get fresh snapshot, then immediately use locators from it
169
- await accessibilitySnapshot({ page, showDiffSinceLastCall: true })
132
+ ```js
133
+ await snapshot({ page: state.page, showDiffSinceLastCall: true })
170
134
  // Now use the NEW locators from this output
171
135
  ```
172
136
 
173
137
  **4. Wrong assumptions about current page/element**
174
138
  Before destructive actions (delete, submit), verify you're targeting the right thing:
139
+
175
140
  ```js
176
141
  // Before deleting, verify it's the right item
177
- await page.screenshotWithAccessibilityLabels({ page });
142
+ await screenshotWithAccessibilityLabels({ page: state.page })
178
143
  // READ the screenshot to confirm, THEN proceed with delete
179
144
  ```
180
145
 
181
146
  **5. Text concatenation without line breaks**
182
- `keyboard.type()` doesn't insert newlines from `\n` in strings. Use `keyboard.press('Enter')`:
183
- ```js
184
- // BAD: newlines in string don't create line breaks
185
- await page.keyboard.type('Line 1\nLine 2'); // becomes "Line 1Line 2"
147
+ `keyboard.type()` doesn't insert newlines from `\n` in strings. Use `keyboard.press('Enter')` between lines:
186
148
 
187
- // GOOD: use Enter key for line breaks
188
- await page.keyboard.type('Line 1');
189
- await page.keyboard.press('Enter');
190
- await page.keyboard.type('Line 2');
149
+ ```js
150
+ await state.page.keyboard.type('Line 1')
151
+ await state.page.keyboard.press('Enter')
152
+ await state.page.keyboard.type('Line 2')
191
153
  ```
192
154
 
193
- **6. Quote escaping in $'...' syntax**
194
- When using `$'...'` for multiline code, nested quotes break parsing. Use different quote styles or escape them:
195
- ```bash
196
- # BAD: nested double quotes break $'...'
197
- playwriter -s 1 -e $'await page.locator("[id=\"_r_a_\"]").click()'
155
+ **6. Quote escaping in bash**
156
+ Bash parses `$`, backticks, and `\` inside double-quoted strings. This silently corrupts JS code. Always use single quotes or heredoc:
198
157
 
199
- # GOOD: use single quotes inside, or template strings
200
- playwriter -s 1 -e $'await page.locator(\'[id="_r_a_"]\').click()'
158
+ ```bash
159
+ # single quotes bash passes everything through literally
160
+ playwriter -s 1 -e 'await state.page.locator(`[id="_r_a_"]`).click()'
201
161
 
202
- # GOOD: use heredoc for complex quoting
162
+ # heredoc for complex code with mixed quotes
203
163
  playwriter -s 1 -e "$(cat <<'EOF'
204
- await page.locator('[id="_r_a_"]').click()
164
+ await state.page.locator('[id="_r_a_"]').click()
165
+ const match = html.match(/\$[\d.]+/g)
205
166
  EOF
206
167
  )"
207
168
  ```
208
169
 
209
170
  **7. Using screenshots when snapshots suffice**
210
- Screenshots + image analysis is expensive and slow. Only use screenshots for visual/CSS issues:
211
- ```js
212
- // BAD: screenshot to check if text appeared (wastes tokens on image analysis)
213
- await page.screenshot({ path: 'check.png', scale: 'css' });
214
-
215
- // GOOD: snapshot is text — fast, cheap, searchable
216
- await accessibilitySnapshot({ page, search: /expected text/i })
171
+ Screenshots + image analysis is expensive and slow. Only use screenshots for visual/CSS issues. Use snapshot for text checks:
217
172
 
218
- // GOOD: evaluate DOM directly for content checks
219
- const text = await page.evaluate(() => document.querySelector('.message')?.textContent);
173
+ ```js
174
+ await snapshot({ page: state.page, search: /expected text/i })
220
175
  ```
221
176
 
222
177
  **8. Assuming page content loaded**
223
178
  Even after `goto()`, dynamic content may not be ready:
179
+
224
180
  ```js
225
- await page.goto('https://example.com');
181
+ await state.page.goto('https://example.com')
226
182
  // Content may still be loading via JavaScript!
227
- await page.waitForSelector('article', { timeout: 10000 });
183
+ await state.page.waitForSelector('article', { timeout: 10000 })
228
184
  // Or use waitForPageLoad utility
229
- await waitForPageLoad({ page, timeout: 5000 });
185
+ await waitForPageLoad({ page: state.page, timeout: 5000 })
230
186
  ```
231
187
 
232
- **9. Login buttons that open popups**
233
- Playwriter extension cannot control popup windows. If a login button opens a popup (common with OAuth/SSO), use cmd+click to open in a new tab instead:
188
+ **9. Not using playwriter for JS-rendered sites**
189
+ Do NOT waste context trying webfetch, curl, or Playwright CLI screenshots on SPAs (Instagram, Twitter, etc.). These return empty HTML shells. Use playwriter directly:
190
+
234
191
  ```js
235
- // BAD: popup window is not controllable by playwriter
236
- await page.click('button:has-text("Login with Google")');
192
+ state.page = context.pages().find((p) => p.url() === 'about:blank') ?? (await context.newPage())
193
+ await state.page.goto('https://www.instagram.com/p/ABC123/', { waitUntil: 'domcontentloaded' })
194
+ await waitForPageLoad({ page: state.page, timeout: 8000 })
195
+ await snapshot({ page: state.page, search: /cookie|consent|accept/i }).then(console.log)
196
+ ```
237
197
 
238
- // GOOD: cmd+click opens in new tab that playwriter can control
239
- await page.locator('button:has-text("Login with Google")').click({ modifiers: ['Meta'] });
240
- await page.waitForTimeout(2000);
198
+ **10. Login buttons that open popups**
199
+ Playwriter cannot control popup windows. Use cmd+click to open in a new tab instead:
200
+
201
+ ```js
202
+ await state.page.locator('button:has-text("Login with Google")').click({ modifiers: ['Meta'] })
203
+ await state.page.waitForTimeout(2000)
241
204
 
242
205
  // Verify new tab opened - last page should be the login page
243
- const pages = context.pages();
244
- const loginPage = pages[pages.length - 1];
245
- if (loginPage.url() === page.url()) {
246
- throw new Error('Cmd+click did not open new tab - login may have opened as popup');
206
+ const pages = context.pages()
207
+ const loginPage = pages[pages.length - 1]
208
+ if (loginPage.url() === state.page.url()) {
209
+ throw new Error('Cmd+click did not open new tab - login may have opened as popup')
247
210
  }
248
211
 
249
212
  // Complete login flow in loginPage, cookies are shared with original page
250
- await loginPage.locator('[data-email]').first().click();
251
- await loginPage.waitForURL('**/callback**');
213
+ await loginPage.locator('[data-email]').first().click()
214
+ await loginPage.waitForURL('**/callback**')
252
215
  // Original page should now be authenticated
253
216
  ```
254
217
 
255
- ## checking page state
256
-
257
- After any action (click, submit, navigate), verify what happened. **Always prefer accessibility snapshots over screenshots** — snapshots are text (cheap, fast, searchable), screenshots require image analysis (expensive, slow).
218
+ **11. Click times out or does nothing — snapshot to find the blocker**
219
+ When a click times out, a **modal or overlay** is likely intercepting pointer events. Do not retry with different selectors or `{ force: true }` — snapshot to find the blocker:
258
220
 
259
221
  ```js
260
- // Default: use snapshot with optional filtering
261
- page.url() + '\n' + await accessibilitySnapshot({ page })
222
+ // click timed out don't retry blindly, find what's blocking
223
+ await snapshot({ page: state.page, search: /dialog|modal/i })
224
+ // Found modal → interact with it properly (don't just close via X, it may reappear)
225
+ await state.page.getByRole('radio', { name: 'Nope, Vanilla' }).click()
226
+ ```
227
+
228
+ **12. Never use `dispatchEvent` or `{ force: true }` to bypass blockers**
229
+ `dispatchEvent(new MouseEvent(...))`, `{ force: true }`, and `element.click()` inside `page.evaluate()` bypass Playwright checks but **do not trigger React/Vue/Svelte handlers** — state won't update. Use snapshot to find the real interactive element:
262
230
 
263
- // Filter for specific content when snapshot is large
264
- await accessibilitySnapshot({ page, search: /dialog|button|error/i })
231
+ ```js
232
+ await state.page.getByRole('radio', { name: 'Node.js' }).click()
265
233
  ```
266
234
 
267
- Only use `screenshotWithAccessibilityLabels({ page })` for **visual layout issues** (CSS bugs, spatial positioning, colors). For verifying text content, button states, or form values, snapshots are always sufficient.
235
+ **13. Over-investigating instead of just interacting**
236
+ When something doesn't respond to a click, do NOT start inspecting CDP event listeners, React fibers, canvas pixel data, or writing `page.evaluate()` to read class names and bounding boxes. This wastes massive context. Instead:
268
237
 
269
- If nothing changed, try `await waitForPageLoad({ page, timeout: 3000 })` or you may have clicked the wrong element.
238
+ 1. Take a `snapshot()` it shows every interactive element and what to click
239
+ 2. Try a different interaction pattern if `click()` didn't work:
240
+ - **Drawing/annotation tools, canvas paint** → `mouse.down`, move with steps, `mouse.up` (see drag section)
241
+ - **Keyboard-activated modes** → press the shortcut key (snapshot shows tooltip text like "Draw mode D")
242
+ - **Sliders, timeline scrubbers** → drag pattern
243
+ - **Collapsed/toggled toolbars** → click the toggle first, wait, then interact
244
+ 3. Take another `snapshot()` to see what changed
245
+ 4. Only investigate DOM internals if correct interaction patterns produce zero response after 2–3 attempts
270
246
 
271
247
  ## accessibility snapshots
272
248
 
273
249
  ```js
274
- await accessibilitySnapshot({ page, search?, showDiffSinceLastCall? })
250
+ await snapshot({ page: state.page, search?, showDiffSinceLastCall? })
275
251
  ```
276
252
 
277
253
  - `search` - string/regex to filter results (returns first 10 matching lines)
278
- - `showDiffSinceLastCall` - returns diff since last snapshot (default: `true`). Pass `false` to get full snapshot.
254
+ - `showDiffSinceLastCall` - returns diff since last snapshot (default: `true`, but `false` when `search` is provided). Pass `false` to get full snapshot.
279
255
 
280
- Snapshots return full content on first call, then diffs on subsequent calls. If nothing changed, returns "No changes since last snapshot" message. Use `showDiffSinceLastCall: false` to always get full content.
256
+ Snapshots return full content on first call, then diffs on subsequent calls. Diff is only returned when shorter than full content. If nothing changed, returns "No changes since last snapshot" message. Use `showDiffSinceLastCall: false` to always get full content. When `search` is provided, diffing is disabled by default so the search filters the full content — pass `showDiffSinceLastCall: true` explicitly to combine both. This diffing behavior also applies to `getCleanHTML` and `getPageMarkdown`.
281
257
 
282
258
  Example output:
283
259
 
284
260
  ```md
285
261
  - banner:
286
- - link "Home" [id="nav-home"]
287
- - navigation:
288
- - link "Docs" [data-testid="docs-link"]
289
- - link "Blog" role=link[name="Blog"]
262
+ - link "Home" [id="nav-home"]
263
+ - navigation:
264
+ - link "Docs" [data-testid="docs-link"]
265
+ - link "Blog" role=link[name="Blog"]
290
266
  ```
291
267
 
292
- Each interactive line ends with a Playwright locator you can pass to `page.locator()`.
268
+ Each interactive line ends with a Playwright locator you can pass to `state.page.locator()`.
293
269
  If multiple elements share the same locator, a `>> nth=N` suffix is added (0-based)
294
270
  to make it unique.
295
271
 
296
- If a screenshot shows ref labels like `e3`, resolve them using the last snapshot:
272
+ **Use snapshot locators directly never invent selectors.** The snapshot output IS the selector. Do not guess CSS selectors or `getByText` when the snapshot already gives you the exact match:
297
273
 
298
274
  ```js
299
- const snapshot = await accessibilitySnapshot({ page })
300
- const locator = refToLocator({ ref: 'e3' })
301
- await page.locator(locator!).click()
275
+ // Snapshot shows: role=radio[name="Nope, Vanilla"] → use it directly
276
+ await state.page.getByRole('radio', { name: 'Nope, Vanilla' }).click()
277
+ // Snapshot shows: role=link[name="SIGN IN"] → or pass raw string to locator()
278
+ await state.page.locator('role=link[name="SIGN IN"]').click()
302
279
  ```
303
280
 
281
+ **Beware CSS text-transform**: snapshots show visual text (`heading "NODE.JS"`) but DOM may be `"Node.js"`. Use case-insensitive regex: `getByRole('heading', { name: /node\.js/i })`.
282
+
283
+ If a screenshot shows ref labels like `e3`, resolve them using the last snapshot:
284
+
304
285
  ```js
305
- await page.locator('[id="nav-home"]').click()
306
- await page.locator('[data-testid="docs-link"]').click()
307
- await page.locator('role=link[name="Blog"]').click()
286
+ const snap = await snapshot({ page: state.page })
287
+ const locator = refToLocator({ ref: 'e3' })
288
+ await state.page.locator(locator!).click()
308
289
  ```
309
290
 
310
291
  Search for specific elements:
311
292
 
312
293
  ```js
313
- const snapshot = await accessibilitySnapshot({ page, search: /button|submit/i })
294
+ const snap = await snapshot({ page: state.page, search: /button|submit/i })
314
295
  ```
315
296
 
316
- **Filtering large snapshots in JS** — when the built-in `search` isn't enough (e.g., you need multiple patterns or custom logic), filter the snapshot string directly:
297
+ **Scoping snapshots to a specific element** — pass a `locator` instead of `page` to snapshot only a subtree. This dramatically reduces output size when you only care about one section of the page (e.g., the main content area, ignoring the sidebar/header/footer):
317
298
 
318
299
  ```js
319
- const snap = await accessibilitySnapshot({ page, showDiffSinceLastCall: false });
320
- const relevant = snap.split('\n').filter(l =>
321
- l.includes('dialog') || l.includes('error') || l.includes('button')
322
- ).join('\n');
323
- console.log(relevant);
324
- ```
300
+ // Full page snapshot: ~150 lines (sidebar, nav, header, footer, everything)
301
+ await snapshot({ page: state.page })
325
302
 
326
- This is much cheaper than taking a screenshot — use it as your primary debugging tool for verifying text content, checking if elements exist, or confirming state changes.
303
+ // Scoped to main: ~20 lines (just the content you care about)
304
+ await snapshot({ locator: state.page.locator('main') })
327
305
 
328
- ## choosing between snapshot methods
306
+ // Scope to a specific form, dialog, or section
307
+ await snapshot({ locator: state.page.locator('[role="dialog"]') })
308
+ await snapshot({ locator: state.page.locator('form#checkout') })
309
+ ```
329
310
 
330
- Both `accessibilitySnapshot` and `screenshotWithAccessibilityLabels` use the same ref system, so you can combine them effectively.
311
+ Use this whenever the full page snapshot is dominated by navigation or layout elements you don't need. It saves significant tokens and makes the output much easier to parse.
331
312
 
332
- **Use `accessibilitySnapshot` when:**
333
- - Page has simple, semantic structure (articles, forms, lists)
334
- - You need to search for specific text or patterns
335
- - Token usage matters (text is smaller than images)
336
- - You need to process the output programmatically
313
+ **Filtering large snapshots in JS** — when `search` isn't enough, filter the string directly: `snap.split('\n').filter(l => l.includes('dialog') || l.includes('error')).join('\n')`
337
314
 
338
- **Use `screenshotWithAccessibilityLabels` when:**
339
- - Page has complex visual layout (grids, galleries, dashboards, maps)
340
- - Spatial position matters (e.g., "first image", "top-left button")
341
- - DOM order doesn't match visual order
342
- - You need to understand the visual hierarchy
315
+ ## choosing between snapshot methods
343
316
 
344
- **Combining both:** Use screenshot first to understand layout and identify target elements visually, then use `accessibilitySnapshot({ search: /pattern/ })` for efficient searching in subsequent calls.
317
+ Use `snapshot` for text-heavy pages (forms, articles) fast, cheap, searchable. Use `screenshotWithAccessibilityLabels` for complex visual layouts (grids, galleries, dashboards) where spatial position matters. Both share the same ref system and can be combined.
345
318
 
346
319
  ## selector best practices
347
320
 
348
- **For unknown websites**: use `accessibilitySnapshot()` - it shows what's actually interactive with stable locators.
321
+ **For unknown websites**: use `snapshot()` - it shows what's actually interactive with stable locators.
349
322
 
350
323
  **For development** (when you have source code access), prefer stable selectors in this order:
351
324
 
@@ -359,16 +332,16 @@ Both `accessibilitySnapshot` and `screenshotWithAccessibilityLabels` use the sam
359
332
  Combine locators for precision:
360
333
 
361
334
  ```js
362
- page.locator('tr').filter({ hasText: 'John' }).locator('button').click()
363
- page.locator('button').nth(2).click()
335
+ state.page.locator('tr').filter({ hasText: 'John' }).locator('button').click()
336
+ state.page.locator('button').nth(2).click()
364
337
  ```
365
338
 
366
339
  If a locator matches multiple elements, Playwright throws "strict mode violation". Use `.first()`, `.last()`, or `.nth(n)`:
367
340
 
368
341
  ```js
369
- await page.locator('button').first().click() // first match
370
- await page.locator('.item').last().click() // last match
371
- await page.locator('li').nth(3).click() // 4th item (0-indexed)
342
+ await state.page.locator('button').first().click() // first match
343
+ await state.page.locator('.item').last().click() // last match
344
+ await state.page.locator('li').nth(3).click() // 4th item (0-indexed)
372
345
  ```
373
346
 
374
347
  ## working with pages
@@ -377,15 +350,15 @@ await page.locator('li').nth(3).click() // 4th item (0-indexed)
377
350
 
378
351
  **Get or create your page (first call):**
379
352
 
380
- On your very first execute call, reuse an existing empty tab or create a new one, and navigate it **in the same execute call**. Store it in `state` and use `state.myPage` for all subsequent operations instead of the default `page` variable:
353
+ On your very first execute call, reuse an existing empty tab or create a new one, and navigate it **in the same execute call**. Store it in `state` and use `state.page` for all subsequent operations instead of the default `page` variable:
381
354
 
382
355
  ```js
383
356
  // Reuse an empty about:blank tab if available, otherwise create a new one.
384
357
  // IMPORTANT: always navigate immediately in the same call to avoid another
385
358
  // agent grabbing the same about:blank tab between execute calls.
386
- state.myPage = context.pages().find(p => p.url() === 'about:blank') ?? await context.newPage();
387
- await state.myPage.goto('https://example.com');
388
- // Use state.myPage for ALL subsequent operations
359
+ state.page = context.pages().find((p) => p.url() === 'about:blank') ?? (await context.newPage())
360
+ await state.page.goto('https://example.com')
361
+ // Use state.page for ALL subsequent operations
389
362
  ```
390
363
 
391
364
  **Handle page closures gracefully:**
@@ -393,10 +366,10 @@ await state.myPage.goto('https://example.com');
393
366
  The user may close your page by accident (e.g., closing a tab in Chrome). Always check before using it and recreate if needed:
394
367
 
395
368
  ```js
396
- if (!state.myPage || state.myPage.isClosed()) {
397
- state.myPage = context.pages().find(p => p.url() === 'about:blank') ?? await context.newPage();
369
+ if (!state.page || state.page.isClosed()) {
370
+ state.page = context.pages().find((p) => p.url() === 'about:blank') ?? (await context.newPage())
398
371
  }
399
- await state.myPage.goto('https://example.com');
372
+ await state.page.goto('https://example.com')
400
373
  ```
401
374
 
402
375
  **Use an existing page only when the user asks:**
@@ -404,16 +377,16 @@ await state.myPage.goto('https://example.com');
404
377
  Only use a page from `context.pages()` if the user explicitly asks you to control a specific tab they already opened (e.g., they're logged into an app). Find it by URL pattern and store it in state:
405
378
 
406
379
  ```js
407
- const pages = context.pages().filter(x => x.url().includes('myapp.com'));
408
- if (pages.length === 0) throw new Error('No myapp.com page found. Ask user to enable playwriter on it.');
409
- if (pages.length > 1) throw new Error(`Found ${pages.length} matching pages, expected 1`);
410
- state.targetPage = pages[0];
380
+ const pages = context.pages().filter((x) => x.url().includes('myapp.com'))
381
+ if (pages.length === 0) throw new Error('No myapp.com page found. Ask user to enable playwriter on it.')
382
+ if (pages.length > 1) throw new Error(`Found ${pages.length} matching pages, expected 1`)
383
+ state.targetPage = pages[0]
411
384
  ```
412
385
 
413
386
  **List all available pages:**
414
387
 
415
388
  ```js
416
- context.pages().map(p => p.url())
389
+ context.pages().map((p) => p.url())
417
390
  ```
418
391
 
419
392
  ## navigation
@@ -421,42 +394,49 @@ context.pages().map(p => p.url())
421
394
  **Use `domcontentloaded`** for `page.goto()`:
422
395
 
423
396
  ```js
424
- await page.goto('https://example.com', { waitUntil: 'domcontentloaded' });
425
- await waitForPageLoad({ page, timeout: 5000 });
397
+ await state.page.goto('https://example.com', { waitUntil: 'domcontentloaded' })
398
+ await waitForPageLoad({ page: state.page, timeout: 5000 })
426
399
  ```
427
400
 
428
401
  ## common patterns
429
402
 
430
- **Authenticated fetches** - to access protected resources, fetch from within page context (includes session cookies automatically):
403
+ **Authenticated fetches** - fetch from within page context to include session cookies automatically:
431
404
 
432
405
  ```js
433
- // BAD: curl/external requests don't have session cookies
434
- // curl -H "Cookie: ..." often fails due to missing cookies or CSRF
406
+ const data = await state.page.evaluate(async (url) => {
407
+ const resp = await fetch(url)
408
+ return await resp.text()
409
+ }, 'https://example.com/protected/resource')
410
+ ```
411
+
412
+ **Read page cookies via CDP** - use `Network.getCookies` on the page CDP session:
435
413
 
436
- // GOOD: fetch inside page.evaluate uses browser's full session
437
- const data = await page.evaluate(async (url) => {
438
- const resp = await fetch(url);
439
- return await resp.text();
440
- }, 'https://example.com/protected/resource');
414
+ ```js
415
+ const cdp = await getCDPSession({ page: state.page })
416
+ const { cookies } = await cdp.send('Network.getCookies', { urls: [state.page.url()] })
417
+ console.log(cookies)
441
418
  ```
442
419
 
420
+ MUST use this for page-scoped cookies in extension mode. `Storage.getCookies` is a root-session command and will fail in playwriter.
421
+
443
422
  **Downloading large data** - console output truncates large strings. Trigger a browser download instead:
444
423
 
445
424
  ```js
446
425
  // Fetch protected data and trigger download to user's Downloads folder
447
- await page.evaluate(async (url) => {
448
- const resp = await fetch(url);
449
- const data = await resp.text();
450
- const blob = new Blob([data], { type: 'application/octet-stream' });
451
- const a = document.createElement('a');
452
- a.href = URL.createObjectURL(blob);
453
- a.download = 'data.json';
454
- a.click();
455
- }, 'https://example.com/protected/large-file');
426
+ await state.page.evaluate(async (url) => {
427
+ const resp = await fetch(url)
428
+ const data = await resp.text()
429
+ const blob = new Blob([data], { type: 'application/octet-stream' })
430
+ const a = document.createElement('a')
431
+ a.href = URL.createObjectURL(blob)
432
+ a.download = 'data.json'
433
+ a.click()
434
+ }, 'https://example.com/protected/large-file')
456
435
  // File saves to ~/Downloads - read it from there
457
436
  ```
458
437
 
459
438
  **Avoid permission-gated browser APIs** - some APIs require user permission prompts or special browser flags. These often fail silently or hang. Examples to avoid:
439
+
460
440
  - `navigator.clipboard.writeText()` - requires permission
461
441
  - Multiple concurrent downloads - browser may block
462
442
  - `window.showSaveFilePicker()` - requires user gesture
@@ -464,42 +444,76 @@ await page.evaluate(async (url) => {
464
444
 
465
445
  Instead, use simpler alternatives (single download via `a.click()`, store data in `state`, etc).
466
446
 
467
- **Links that open new tabs** - use cmd+click to open in a controllable new tab:
447
+ **Downloads** - capture and save:
468
448
 
469
449
  ```js
470
- // For links with target=_blank or buttons that open popups
471
- await page.locator('a[target=_blank]').click({ modifiers: ['Meta'] });
472
- await page.waitForTimeout(1000);
473
-
474
- // New tab is last in context.pages()
475
- const pages = context.pages();
476
- const newTab = pages[pages.length - 1];
477
- console.log('New tab URL:', newTab.url());
450
+ const [download] = await Promise.all([state.page.waitForEvent('download'), state.page.click('button.download')])
451
+ await download.saveAs(`/tmp/${download.suggestedFilename()}`)
478
452
  ```
479
453
 
480
- Note: `page.waitForEvent('popup')` is unreliable - playwriter cannot control popup windows opened via `window.open`. Use cmd+click instead.
454
+ **iFrames** - two approaches depending on what you need:
481
455
 
482
- **Downloads** - capture and save:
456
+ ```js
457
+ // frameLocator: for chaining locator operations (click, fill, etc.)
458
+ const frame = state.page.frameLocator('#my-iframe')
459
+ await frame.locator('button').click()
460
+
461
+ // contentFrame: returns a Frame object, needed for snapshot({ frame })
462
+ const frame2 = await state.page.locator('iframe').contentFrame()
463
+ await snapshot({ frame: frame2 })
464
+ ```
465
+
466
+ **Dialogs** - handle alerts/confirms/prompts:
483
467
 
484
468
  ```js
485
- const [download] = await Promise.all([page.waitForEvent('download'), page.click('button.download')]);
486
- await download.saveAs(`/tmp/${download.suggestedFilename()}`);
469
+ state.page.on('dialog', async (dialog) => {
470
+ console.log(dialog.message())
471
+ await dialog.accept()
472
+ })
473
+ await state.page.click('button.trigger-alert')
487
474
  ```
488
475
 
489
- **iFrames** - use frameLocator:
476
+ **Handling page obstacles (cookie modals, login walls, age gates)** - most major websites show blocking overlays. Always check for these with `snapshot()` right after navigation and dismiss them before doing anything else:
490
477
 
491
478
  ```js
492
- const frame = page.frameLocator('#my-iframe');
493
- await frame.locator('button').click();
479
+ // After navigating, check for common obstacles
480
+ await waitForPageLoad({ page: state.page, timeout: 5000 })
481
+ const snap = await snapshot({
482
+ page: state.page,
483
+ search: /cookie|consent|accept|reject|decline|allow|age|verify|login|sign.in/i,
484
+ })
485
+ console.log(snap)
486
+ // Look for dismiss/accept/decline buttons in the snapshot, then click them:
487
+ // await state.page.locator('button:has-text("Accept")').click();
488
+ // await state.page.locator('button:has-text("Decline optional")').click();
489
+ // Then re-snapshot to confirm the modal is gone before proceeding
494
490
  ```
495
491
 
496
- **Dialogs** - handle alerts/confirms/prompts:
492
+ If the page requires login and the user is already logged into Chrome, their session cookies are available — just navigate and the page should load authenticated. If not, ask the user for help or use their existing logged-in tab via `context.pages()`.
493
+
494
+ **Extracting and downloading media (images, videos)** - use `page.evaluate()` to extract URLs from the rendered DOM, then download via Node.js in the sandbox. This is far more reliable than parsing raw HTML:
497
495
 
498
496
  ```js
499
- page.on('dialog', async dialog => { console.log(dialog.message()); await dialog.accept(); });
500
- await page.click('button.trigger-alert');
497
+ // Extract all image URLs from rendered DOM
498
+ const images = await state.page.evaluate(() =>
499
+ Array.from(document.querySelectorAll('img[src]')).map((img) => ({
500
+ src: img.src,
501
+ alt: img.alt,
502
+ width: img.naturalWidth,
503
+ })),
504
+ )
505
+ console.log(JSON.stringify(images, null, 2))
506
+
507
+ // Download a specific image to disk
508
+ const fs = require('node:fs')
509
+ const resp = await fetch(images[0].src)
510
+ const buf = Buffer.from(await resp.arrayBuffer())
511
+ fs.writeFileSync('./downloaded-image.jpg', buf)
512
+ console.log('Saved', buf.length, 'bytes')
501
513
  ```
502
514
 
515
+ For carousels or lazy-loaded galleries, you may need to click navigation arrows or scroll first, then re-extract. Use network interception (see "network interception" section) to capture high-resolution CDN URLs that may differ from the `img.src` thumbnails.
516
+
503
517
  ## utility functions
504
518
 
505
519
  **getLatestLogs** - retrieve captured browser console logs (up to 5000 per page, cleared on navigation):
@@ -508,51 +522,41 @@ await page.click('button.trigger-alert');
508
522
  await getLatestLogs({ page?, count?, search? })
509
523
  // Examples:
510
524
  const errors = await getLatestLogs({ search: /error/i, count: 50 })
511
- const pageLogs = await getLatestLogs({ page })
525
+ const pageLogs = await getLatestLogs({ page: state.page })
512
526
  ```
513
527
 
514
- For custom log collection across runs, store in state: `state.logs = []; page.on('console', m => state.logs.push(m.text()))`
528
+ For custom log collection across runs, store in state: `state.logs = []; state.page.on('console', m => state.logs.push(m.text()))`
515
529
 
516
530
  **getCleanHTML** - get cleaned HTML from a locator or page, with search and diffing:
517
531
 
518
532
  ```js
519
533
  await getCleanHTML({ locator, search?, showDiffSinceLastCall?, includeStyles? })
520
534
  // Examples:
521
- const html = await getCleanHTML({ locator: page.locator('body') })
522
- const html = await getCleanHTML({ locator: page, search: /button/i })
523
- const fullHtml = await getCleanHTML({ locator: page, showDiffSinceLastCall: false }) // disable diff
535
+ const html = await getCleanHTML({ locator: state.page.locator('body') })
536
+ const html = await getCleanHTML({ locator: state.page, search: /button/i })
537
+ const fullHtml = await getCleanHTML({ locator: state.page, showDiffSinceLastCall: false }) // disable diff
524
538
  ```
525
539
 
526
540
  **Parameters:**
541
+
527
542
  - `locator` - Playwright Locator or Page to get HTML from
528
543
  - `search` - string/regex to filter results (returns first 10 matching lines with 5 lines context)
529
- - `showDiffSinceLastCall` - returns diff since last call (default: `true`). Pass `false` to get full HTML.
544
+ - `showDiffSinceLastCall` - returns diff since last call (default: `true`, but `false` when `search` is provided). Pass `false` to get full HTML.
530
545
  - `includeStyles` - keep style and class attributes (default: false)
531
546
 
532
- **HTML processing:**
533
- The function cleans HTML for compact, readable output:
534
- - **Removes tags**: script, style, link, meta, noscript, svg, head
535
- - **Unwraps nested wrappers**: Empty divs/spans with no attributes that only wrap a single child are collapsed (e.g., `<div><div><div><p>text</p></div></div></div>` → `<div><p>text</p></div>`)
536
- - **Removes empty elements**: Elements with no attributes and no content are removed
537
- - **Truncates long values**: Attribute values >200 chars and text content >500 chars are truncated
538
-
539
- **Attributes kept (summary):**
540
- - Common semantic and ARIA attributes (e.g., `href`, `name`, `type`, `aria-*`)
541
- - All `data-*` test attributes
542
- - Frequently used test IDs and special attributes (e.g., `testid`, `qa`, `e2e`, `vimium-label`)
543
-
544
- Snapshots return full content on first call, then diffs on subsequent calls. Diff is only returned when shorter than full content.
547
+ Cleans HTML automatically: removes script/style/svg/head tags, unwraps empty wrappers, removes empty elements, truncates long values. Keeps semantic attributes (`href`, `name`, `type`, `aria-*`, `data-*`).
545
548
 
546
549
  **getPageMarkdown** - extract main page content as plain text using Mozilla Readability (same algorithm as Firefox Reader View). Strips navigation, ads, sidebars, and other clutter. Returns formatted text with title, author, and content:
547
550
 
548
551
  ```js
549
- await getPageMarkdown({ page, search?, showDiffSinceLastCall? })
552
+ await getPageMarkdown({ page: state.page, search?, showDiffSinceLastCall? })
550
553
  // Examples:
551
- const content = await getPageMarkdown({ page, showDiffSinceLastCall: false }) // full article
552
- const matches = await getPageMarkdown({ page, search: /API/i }) // search within content
554
+ const content = await getPageMarkdown({ page: state.page, showDiffSinceLastCall: false }) // full article
555
+ const matches = await getPageMarkdown({ page: state.page, search: /API/i }) // search within content
553
556
  ```
554
557
 
555
558
  **Output format:**
559
+
556
560
  ```
557
561
  # Article Title
558
562
 
@@ -564,130 +568,145 @@ The main article content as plain text, with paragraphs preserved...
564
568
  ```
565
569
 
566
570
  **Parameters:**
571
+
567
572
  - `page` - Playwright Page to extract content from
568
573
  - `search` - string/regex to filter content (returns first 10 matching lines with 5 lines context)
569
- - `showDiffSinceLastCall` - returns diff since last call (default: `true`). Pass `false` to get full content.
570
-
571
- Snapshots return full content on first call, then diffs on subsequent calls. Diff is only returned when shorter than full content.
572
-
573
- **Use cases:**
574
- - Extract article text for LLM processing without HTML noise
575
- - Get readable content from news sites, blogs, documentation
576
- - Compare content changes after interactions
574
+ - `showDiffSinceLastCall` - returns diff since last call (default: `true`, but `false` when `search` is provided). Pass `false` to get full content.
577
575
 
578
576
  **waitForPageLoad** - smart load detection that ignores analytics/ads:
579
577
 
580
578
  ```js
581
- await waitForPageLoad({ page, timeout?, pollInterval?, minWait? })
579
+ await waitForPageLoad({ page: state.page, timeout?, pollInterval?, minWait? })
582
580
  // Returns: { success, readyState, pendingRequests, waitTimeMs, timedOut }
583
581
  ```
584
582
 
585
583
  **getCDPSession** - send raw CDP commands:
586
584
 
587
585
  ```js
588
- const cdp = await getCDPSession({ page });
589
- const metrics = await cdp.send('Page.getLayoutMetrics');
586
+ const cdp = await getCDPSession({ page: state.page })
587
+ const metrics = await cdp.send('Page.getLayoutMetrics')
590
588
  ```
591
589
 
592
590
  **getLocatorStringForElement** - get stable Playwright selector from an element:
593
591
 
594
592
  ```js
595
- const selector = await getLocatorStringForElement(page.locator('[id="submit-btn"]'));
593
+ const selector = await getLocatorStringForElement(state.page.locator('[id="submit-btn"]'))
596
594
  // => "getByRole('button', { name: 'Save' })"
597
595
  ```
598
596
 
599
597
  **getReactSource** - get React component source location (dev mode only):
600
598
 
601
599
  ```js
602
- const source = await getReactSource({ locator: page.locator('[data-testid="submit-btn"]') });
600
+ const source = await getReactSource({ locator: state.page.locator('[data-testid="submit-btn"]') })
603
601
  // => { fileName, lineNumber, columnNumber, componentName }
604
602
  ```
605
603
 
606
604
  **getStylesForLocator** - inspect CSS styles applied to an element, like browser DevTools "Styles" panel. Useful for debugging styling issues, finding where a CSS property is defined (file:line), and checking inherited styles. Returns selector, source location, and declarations for each matching rule. ALWAYS fetch `https://playwriter.dev/resources/styles-api.md` first with curl or webfetch tool.
607
605
 
608
606
  ```js
609
- const styles = await getStylesForLocator({ locator: page.locator('.btn'), cdp: await getCDPSession({ page }) });
610
- console.log(formatStylesAsText(styles));
607
+ const styles = await getStylesForLocator({
608
+ locator: state.page.locator('.btn'),
609
+ cdp: await getCDPSession({ page: state.page }),
610
+ })
611
+ console.log(formatStylesAsText(styles))
611
612
  ```
612
613
 
613
614
  **createDebugger** - set breakpoints, step through code, inspect variables at runtime. Useful for debugging issues that only reproduce in browser, understanding code flow, and inspecting state at specific points. Can pause on exceptions, evaluate expressions in scope, and blackbox framework code. ALWAYS fetch `https://playwriter.dev/resources/debugger-api.md` first.
614
615
 
615
616
  ```js
616
- const cdp = await getCDPSession({ page }); const dbg = createDebugger({ cdp }); await dbg.enable();
617
- const scripts = await dbg.listScripts({ search: 'app' });
618
- await dbg.setBreakpoint({ file: scripts[0].url, line: 42 });
617
+ const cdp = await getCDPSession({ page: state.page })
618
+ const dbg = createDebugger({ cdp })
619
+ await dbg.enable()
620
+ const scripts = await dbg.listScripts({ search: 'app' })
621
+ await dbg.setBreakpoint({ file: scripts[0].url, line: 42 })
619
622
  // when paused: dbg.inspectLocalVariables(), dbg.stepOver(), dbg.resume()
620
623
  ```
621
624
 
622
625
  **createEditor** - view and live-edit page scripts and CSS at runtime. Edits are in-memory (persist until reload). Useful for testing quick fixes, searching page scripts with grep, and toggling debug flags. ALWAYS read `https://playwriter.dev/resources/editor-api.md` first.
623
626
 
624
627
  ```js
625
- const cdp = await getCDPSession({ page }); const editor = createEditor({ cdp }); await editor.enable();
626
- const matches = await editor.grep({ regex: /console\.log/ });
627
- await editor.edit({ url: matches[0].url, oldString: 'DEBUG = false', newString: 'DEBUG = true' });
628
+ const cdp = await getCDPSession({ page: state.page })
629
+ const editor = createEditor({ cdp })
630
+ await editor.enable()
631
+ const matches = await editor.grep({ regex: /console\.log/ })
632
+ await editor.edit({ url: matches[0].url, oldString: 'DEBUG = false', newString: 'DEBUG = true' })
628
633
  ```
629
634
 
630
635
  **screenshotWithAccessibilityLabels** - take a screenshot with Vimium-style visual labels overlaid on interactive elements. Shows labels, captures screenshot, then removes labels. The image and accessibility snapshot are automatically included in the response. Can be called multiple times to capture multiple screenshots. Use a timeout of **20 seconds** for complex pages.
631
636
 
632
- Prefer this for pages with grids, image galleries, maps, or complex visual layouts where spatial position matters. For simple text-heavy pages, `accessibilitySnapshot` with search is faster and uses fewer tokens.
637
+ Prefer this for pages with grids, image galleries, maps, or complex visual layouts where spatial position matters. For simple text-heavy pages, `snapshot` with search is faster and uses fewer tokens.
633
638
 
634
639
  ```js
635
- await screenshotWithAccessibilityLabels({ page });
640
+ await screenshotWithAccessibilityLabels({ page: state.page })
636
641
  // Image and accessibility snapshot are automatically included in response
637
642
  // Use refs from snapshot to interact with elements
638
- await page.locator('[id="submit-btn"]').click();
643
+ await state.page.locator('[id="submit-btn"]').click()
639
644
 
640
645
  // Can take multiple screenshots in one execution
641
- await screenshotWithAccessibilityLabels({ page });
642
- await page.click('button');
643
- await screenshotWithAccessibilityLabels({ page });
646
+ await screenshotWithAccessibilityLabels({ page: state.page })
647
+ await state.page.click('button')
648
+ await screenshotWithAccessibilityLabels({ page: state.page })
644
649
  // Both images are included in the response
645
650
  ```
646
651
 
647
652
  Labels are color-coded: yellow=links, orange=buttons, coral=inputs, pink=checkboxes, peach=sliders, salmon=menus, amber=tabs.
648
653
 
649
- **startRecording / stopRecording** - record the page as a video at native FPS (30-60fps). Uses `chrome.tabCapture` in the extension context, so **recording survives page navigation**. Video is saved as mp4.
654
+ **resizeImage** - shrink an image in-place so it consumes fewer tokens when read back into context. `await resizeImage({ input: './screenshot.png' })`. Also accepts `width`, `height`, `maxDimension`, `quality`, `output`.
655
+
656
+ **recording.start / recording.stop** - record the page as a video at native FPS (30-60fps). Uses `chrome.tabCapture` so **recording survives page navigation**. Auto-overlays a ghost cursor that follows mouse actions. Requires user to have clicked the Playwriter extension icon on the tab. Auto-resizes viewport to 16:9 (override with `aspectRatio: null`). Auto-stops after 15 min (override with `maxDurationMs`).
650
657
 
651
- **Note**: Recording requires the user to have clicked the Playwriter extension icon on the tab. This grants `activeTab` permission needed for `chrome.tabCapture`. Recording works on tabs where the icon was clicked - if you need to record a new tab, ask the user to click the icon on it first.
658
+ For demos, use interaction methods (`locator.click()`, `page.mouse.move()`) instead of `goto()` to show realistic cursor motion.
652
659
 
653
660
  ```js
654
- // Start recording - outputPath must be specified upfront
655
- await startRecording({
656
- page,
661
+ await recording.start({
662
+ page: state.page,
657
663
  outputPath: './recording.mp4',
658
- frameRate: 30, // default: 30
659
- audio: false, // default: false (tab audio)
660
- videoBitsPerSecond: 2500000 // 2.5 Mbps
661
- });
664
+ frameRate: 30, // default
665
+ audio: false, // default (tab audio)
666
+ videoBitsPerSecond: 2500000,
667
+ aspectRatio: { width: 16, height: 9 }, // default, set null to skip
668
+ maxDurationMs: 15 * 60 * 1000, // default, set 0 to disable
669
+ })
670
+
671
+ // Recording survives navigation
672
+ await state.page.click('a')
673
+ await state.page.waitForLoadState('domcontentloaded')
662
674
 
663
- // Navigate around - recording continues!
664
- await page.click('a');
665
- await page.waitForLoadState('domcontentloaded');
666
- await page.goBack();
675
+ // Stop save full result including executionTimestamps for createDemoVideo
676
+ state.recordingResult = await recording.stop({ page: state.page })
667
677
 
668
- // Stop and get result
669
- const { path, duration, size } = await stopRecording({ page });
670
- console.log(`Saved ${size} bytes, duration: ${duration}ms`);
678
+ // Other: recording.isRecording({ page }), recording.cancel({ page })
671
679
  ```
672
680
 
673
- Additional recording utilities:
674
- ```js
675
- // Check if recording is active
676
- const { isRecording, startedAt } = await isRecording({ page });
681
+ **ghostCursor.show / ghostCursor.hide** - show/hide cursor overlay for screenshots and demos:
677
682
 
678
- // Cancel recording without saving
679
- await cancelRecording({ page });
683
+ ```js
684
+ await ghostCursor.show({ page: state.page, style: 'minimal' }) // 'minimal', 'dot', 'screenstudio'
685
+ await ghostCursor.hide({ page: state.page })
680
686
  ```
681
687
 
682
- **Key difference from getDisplayMedia**: This approach uses `chrome.tabCapture` which runs in the extension context, not the page. The recording persists across navigations because the extension holds the `MediaRecorder`, not the page's JavaScript context.
688
+ **createDemoVideo** - speeds up idle sections (time between execute() calls) while keeping interactions at normal speed. Requires `ffmpeg`/`ffprobe`. Timestamps are tracked automatically during recording and returned by `recording.stop()`. **Timeout**: can take 60–120+ seconds, always pass `--timeout 120000` or higher.
689
+
690
+ ```js
691
+ // After recording.stop(), save full result to state (executionTimestamps powers idle detection)
692
+ state.recordingResult = await recording.stop({ page: state.page })
693
+
694
+ // In a SEPARATE execute call with --timeout 120000:
695
+ const demoPath = await createDemoVideo({
696
+ recordingPath: state.recordingResult.path,
697
+ durationMs: state.recordingResult.duration,
698
+ executionTimestamps: state.recordingResult.executionTimestamps,
699
+ speed: 6, // default 6x for idle sections
700
+ })
701
+ ```
683
702
 
684
703
  ## pinned elements
685
704
 
686
705
  Users can right-click → "Copy Playwriter Element Reference" to store elements in `globalThis.playwriterPinnedElem1` (increments for each pin). The reference is copied to clipboard:
687
706
 
688
707
  ```js
689
- const el = await page.evaluateHandle(() => globalThis.playwriterPinnedElem1);
690
- await el.click();
708
+ const el = await state.page.evaluateHandle(() => globalThis.playwriterPinnedElem1)
709
+ await el.click()
691
710
  ```
692
711
 
693
712
  ## taking screenshots
@@ -695,24 +714,28 @@ await el.click();
695
714
  Always use `scale: 'css'` to avoid 2-4x larger images on high-DPI displays:
696
715
 
697
716
  ```js
698
- await page.screenshot({ path: 'shot.png', scale: 'css' });
717
+ await state.page.screenshot({ path: 'shot.png', scale: 'css' })
699
718
  ```
700
719
 
701
- If you want to read back the image file into context make sure to resize it first, scaling down the image to make sure max size is 1500px. for example with `sips --resampleHeightWidthMax 1500 input.png --out output.png` on macOS.
720
+ If you want to read back the image file into context, resize it first so it consumes fewer tokens:
721
+
722
+ ```js
723
+ await resizeImage({ input: './shot.png' })
724
+ ```
702
725
 
703
726
  ## page.evaluate
704
727
 
705
728
  Code inside `page.evaluate()` runs in the browser - use plain JavaScript only, no TypeScript syntax. Return values and log outside (console.log inside evaluate runs in browser, not visible):
706
729
 
707
730
  ```js
708
- const title = await page.evaluate(() => document.title);
709
- console.log('Title:', title);
731
+ const title = await state.page.evaluate(() => document.title)
732
+ console.log('Title:', title)
710
733
 
711
- const info = await page.evaluate(() => ({
712
- url: location.href,
713
- buttons: document.querySelectorAll('button').length,
714
- }));
715
- console.log(info);
734
+ const info = await state.page.evaluate(() => ({
735
+ url: location.href,
736
+ buttons: document.querySelectorAll('button').length,
737
+ }))
738
+ console.log(info)
716
739
  ```
717
740
 
718
741
  ## loading files
@@ -720,7 +743,9 @@ console.log(info);
720
743
  Fill inputs with file content:
721
744
 
722
745
  ```js
723
- const fs = require('node:fs'); const content = fs.readFileSync('./data.txt', 'utf-8'); await page.locator('textarea').fill(content);
746
+ const fs = require('node:fs')
747
+ const content = fs.readFileSync('./data.txt', 'utf-8')
748
+ await state.page.locator('textarea').fill(content)
724
749
  ```
725
750
 
726
751
  ## network interception
@@ -728,103 +753,151 @@ const fs = require('node:fs'); const content = fs.readFileSync('./data.txt', 'ut
728
753
  For scraping or reverse-engineering APIs, intercept network requests instead of scrolling DOM. Store in `state` to analyze across calls:
729
754
 
730
755
  ```js
731
- state.requests = []; state.responses = [];
732
- page.on('request', req => { if (req.url().includes('/api/')) state.requests.push({ url: req.url(), method: req.method(), headers: req.headers() }); });
733
- page.on('response', async res => { if (res.url().includes('/api/')) { try { state.responses.push({ url: res.url(), status: res.status(), body: await res.json() }); } catch {} } });
756
+ state.requests = []
757
+ state.responses = []
758
+ state.page.on('request', (req) => {
759
+ if (req.url().includes('/api/')) state.requests.push({ url: req.url(), method: req.method(), headers: req.headers() })
760
+ })
761
+ state.page.on('response', async (res) => {
762
+ if (res.url().includes('/api/')) {
763
+ try {
764
+ state.responses.push({ url: res.url(), status: res.status(), body: await res.json() })
765
+ } catch {}
766
+ }
767
+ })
734
768
  ```
735
769
 
736
770
  Then trigger actions (scroll, click, navigate) and analyze captured data:
737
771
 
738
772
  ```js
739
- console.log('Captured', state.responses.length, 'API calls');
740
- state.responses.forEach(r => console.log(r.status, r.url.slice(0, 80)));
773
+ console.log('Captured', state.responses.length, 'API calls')
774
+ state.responses.forEach((r) => console.log(r.status, r.url.slice(0, 80)))
741
775
  ```
742
776
 
743
777
  Inspect a specific response to understand schema:
744
778
 
745
779
  ```js
746
- const resp = state.responses.find(r => r.url.includes('users'));
747
- console.log(JSON.stringify(resp.body, null, 2).slice(0, 2000));
780
+ const resp = state.responses.find((r) => r.url.includes('users'))
781
+ console.log(JSON.stringify(resp.body, null, 2).slice(0, 2000))
748
782
  ```
749
783
 
750
784
  Replay API directly (useful for pagination):
751
785
 
752
786
  ```js
753
- const { url, headers } = state.requests.find(r => r.url.includes('feed'));
754
- const data = await page.evaluate(async ({ url, headers }) => { const res = await fetch(url, { headers }); return res.json(); }, { url, headers });
755
- console.log(data);
787
+ const { url, headers } = state.requests.find((r) => r.url.includes('feed'))
788
+ const data = await state.page.evaluate(
789
+ async ({ url, headers }) => {
790
+ const res = await fetch(url, { headers })
791
+ return res.json()
792
+ },
793
+ { url, headers },
794
+ )
795
+ console.log(data)
756
796
  ```
757
797
 
758
- Clean up listeners when done: `page.removeAllListeners('request'); page.removeAllListeners('response');`
798
+ Clean up listeners when done: `state.page.removeAllListeners('request'); state.page.removeAllListeners('response');`
759
799
 
760
- ## debugging web apps
800
+ ## computer use (low-level mouse/keyboard)
761
801
 
762
- When debugging why a web app isn't working (e.g., content not rendering, API errors, state issues), use these techniques **before** resorting to screenshots:
763
-
764
- **1. Console logs** — use `getLatestLogs` to check for errors:
802
+ ### clicking
765
803
 
766
804
  ```js
767
- const errors = await getLatestLogs({ page, search: /error|fail/i, count: 20 });
768
- const appLogs = await getLatestLogs({ page, search: /myComponent|state/i });
805
+ // Preferred: by locator (stable, auto-waits, no coordinates needed)
806
+ await state.page.locator('button[name="Submit"]').click()
807
+ await state.page.locator('text=Login').click({ button: 'right' })
808
+ await state.page.locator('text=Login').dblclick()
809
+ await state.page
810
+ .locator('a')
811
+ .first()
812
+ .click({ modifiers: ['Meta'] }) // cmd+click opens new tab
813
+
814
+ // By coordinates (when locators aren't available, e.g. canvas, maps, custom widgets)
815
+ await state.page.mouse.click(450, 320) // left click
816
+ await state.page.mouse.click(450, 320, { button: 'right' }) // right click
817
+ await state.page.mouse.dblclick(450, 320) // double click
818
+ await state.page.mouse.click(450, 320, { clickCount: 3 }) // triple click
819
+ await state.page.mouse.click(450, 320, { modifiers: ['Shift'] }) // shift+click
769
820
  ```
770
821
 
771
- **2. DOM inspection via evaluate** — check content directly without screenshots:
822
+ ### hover
772
823
 
773
824
  ```js
774
- const info = await page.evaluate(() => {
775
- const msgs = document.querySelectorAll('.message');
776
- return Array.from(msgs).map(m => ({
777
- text: m.textContent?.slice(0, 200),
778
- visible: m.offsetHeight > 0,
779
- }));
780
- });
781
- console.log(JSON.stringify(info, null, 2));
825
+ await state.page.locator('.tooltip-trigger').hover() // by locator (preferred)
826
+ await state.page.mouse.move(450, 320) // by coordinates
782
827
  ```
783
828
 
784
- **3. Combine snapshot + logs for full picture:**
829
+ ### scroll
785
830
 
786
831
  ```js
787
- await page.keyboard.press('Enter');
788
- await page.waitForTimeout(2000);
832
+ // By locator (preferred)
833
+ await state.page.locator('#footer').scrollIntoViewIfNeeded()
834
+
835
+ // By pixel (for canvas, maps, infinite scroll)
836
+ await state.page.mouse.wheel(0, 300) // scroll down 300px
837
+ await state.page.mouse.wheel(0, -300) // scroll up
838
+ await state.page.mouse.wheel(300, 0) // scroll right
839
+ await state.page.mouse.wheel(-300, 0) // scroll left
789
840
 
790
- const snap = await accessibilitySnapshot({ page, search: /dialog|error|message/ });
791
- const logs = await getLatestLogs({ page, search: /error/i, count: 10 });
792
- console.log('UI:', snap);
793
- console.log('Logs:', logs);
841
+ // Scroll at a specific position
842
+ await state.page.mouse.move(450, 320)
843
+ await state.page.mouse.wheel(0, 500)
844
+
845
+ // Scroll inside a container
846
+ await state.page.locator('.scrollable-list').evaluate((el) => {
847
+ el.scrollTop += 500
848
+ })
794
849
  ```
795
850
 
796
- ## capabilities
851
+ ### drag
852
+
853
+ ```js
854
+ // By locator (preferred)
855
+ await state.page.locator('#item').dragTo(state.page.locator('#target'))
797
856
 
798
- Examples of what playwriter can do:
799
- - Monitor console logs while user reproduces a bug
800
- - Intercept network requests to reverse-engineer APIs and build SDKs
801
- - Scrape data by replaying paginated API calls instead of scrolling DOM
802
- - Get accessibility snapshot to find elements, then automate interactions
803
- - Use visual screenshots to understand complex layouts like image grids, dashboards, or maps
804
- - Debug issues by collecting logs and controlling the page simultaneously
805
- - Handle popups, downloads, iframes, and dialog boxes
806
- - Record videos of browser sessions that survive page navigation
857
+ // By coordinates (for canvas, sliders, custom drag targets)
858
+ await state.page.mouse.move(100, 200)
859
+ await state.page.mouse.down()
860
+ await state.page.mouse.move(400, 500, { steps: 10 }) // steps for smooth drag
861
+ await state.page.mouse.up()
862
+ ```
807
863
 
864
+ **Freehand drawing, annotation widgets, and canvas tools** use this same `mouse.down → move → up` pattern. If a widget expects a drawn stroke (paint tools, annotation overlays, range sliders, timeline scrubbers), always use held-mouse motion — not `mouse.click()`:
808
865
 
809
- ## Ghost Browser integration
866
+ ```js
867
+ // Draw a stroke across a canvas or annotation layer
868
+ await state.page.mouse.move(startX, startY)
869
+ await state.page.mouse.down()
870
+ await state.page.mouse.move(endX, endY, { steps: 15 }) // steps = smoother stroke
871
+ await state.page.mouse.up()
872
+ await state.page.waitForTimeout(500) // let the widget process the stroke
873
+ ```
810
874
 
811
- Playwriter supports [Ghost Browser](https://ghostbrowser.com/) for multi-identity automation. When running in Ghost Browser, the `chrome` object exposes APIs to control identities, proxies, and sessions - useful for managing multiple accounts, rotating proxies, or isolated cookie sessions.
875
+ ### key hold / release / repeat
812
876
 
813
877
  ```js
814
- // List identities and open tabs in different ones
815
- const identities = await chrome.projects.getIdentitiesList();
816
- await chrome.ghostPublicAPI.openTab({ url: 'https://reddit.com', identity: identities[0].id });
878
+ // Hold modifier while pressing another key
879
+ await state.page.keyboard.down('Shift')
880
+ await state.page.keyboard.press('ArrowDown')
881
+ await state.page.keyboard.up('Shift')
882
+
883
+ // Repeat a key
884
+ for (let i = 0; i < 5; i++) await state.page.keyboard.press('ArrowDown')
885
+ ```
817
886
 
818
- // Assign proxies per tab or identity
819
- const proxies = await chrome.ghostProxies.getList();
820
- await chrome.ghostProxies.setTabProxy(tabId, proxies[0].id);
887
+ ### resize viewport
888
+
889
+ ```js
890
+ await state.page.setViewportSize({ width: 1280, height: 720 })
821
891
  ```
822
892
 
823
- For complete API reference with all methods, types, and examples, read:
824
- `extension/src/ghost-browser-api.d.ts`
893
+ ### region screenshot (zoom equivalent)
894
+
895
+ ```js
896
+ await state.page.screenshot({ path: 'region.png', scale: 'css', clip: { x: 100, y: 200, width: 400, height: 300 } })
897
+ ```
825
898
 
826
- Note: Only works in Ghost Browser. In regular Chrome, calls fail with "not available".
899
+ Prefer locator-based actions over coordinates locators are stable across scroll/resize, auto-wait for elements, and don't require screenshot round-trips that burn ~800 image tokens per cycle.
827
900
 
828
- ## debugging playwriter issues
901
+ ## Ghost Browser integration
829
902
 
830
- if some internal critical error happens you can read your own relay ws logs to understand the issue, it will show logs from extension, mcp and ws server together. then you can create a gh issue using `gh issue create -R remorses/playwriter --title title --body body`. ask for user confirmation before doing this.
903
+ When running in [Ghost Browser](https://ghostbrowser.com/), the `chrome` object exposes APIs for multi-identity automation (identities, proxies, sessions). See `extension/src/ghost-browser-api.d.ts` for full API reference. Only works in Ghost Browser calls fail in regular Chrome.