playwriter 0.0.39 → 0.0.41

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/cdp-relay.ts CHANGED
@@ -1,5 +1,6 @@
1
1
  import { Hono } from 'hono'
2
2
  import { serve } from '@hono/node-server'
3
+ import { getConnInfo } from '@hono/node-server/conninfo'
3
4
  import { createNodeWebSocket } from '@hono/node-ws'
4
5
  import type { WSContext } from 'hono/ws'
5
6
  import type { Protocol } from './cdp-types.js'
@@ -395,23 +396,23 @@ export async function startPlayWriterCDPRelayServer({ port = 19988, host = '127.
395
396
  'elnnakgjclnapgflmidlpobefkdmapdm', // Dev extension (loaded unpacked)
396
397
  ]
397
398
 
398
- function isAllowedOrigin(origin: string | undefined): boolean {
399
- if (!origin) {
400
- return true // Node.js clients don't send Origin
401
- }
402
- if (origin.startsWith('chrome-extension://')) {
403
- const extensionId = origin.replace('chrome-extension://', '')
404
- return ALLOWED_EXTENSION_IDS.includes(extensionId)
405
- }
406
- return false // Reject browser origins (http://, https://, etc.)
407
- }
408
-
409
399
  app.get('/cdp/:clientId?', (c, next) => {
410
400
  const origin = c.req.header('origin')
411
- if (!isAllowedOrigin(origin)) {
412
- logger?.log(chalk.red(`Rejecting /cdp WebSocket from origin: ${origin}`))
413
- return c.text('Forbidden', 403)
401
+
402
+ // Validate Origin header if present (Node.js clients don't send it)
403
+ if (origin) {
404
+ if (origin.startsWith('chrome-extension://')) {
405
+ const extensionId = origin.replace('chrome-extension://', '')
406
+ if (!ALLOWED_EXTENSION_IDS.includes(extensionId)) {
407
+ logger?.log(chalk.red(`Rejecting /cdp WebSocket from unknown extension: ${extensionId}`))
408
+ return c.text('Forbidden', 403)
409
+ }
410
+ } else {
411
+ logger?.log(chalk.red(`Rejecting /cdp WebSocket from origin: ${origin}`))
412
+ return c.text('Forbidden', 403)
413
+ }
414
414
  }
415
+
415
416
  if (token) {
416
417
  const url = new URL(c.req.url, 'http://localhost')
417
418
  const providedToken = url.searchParams.get('token')
@@ -574,11 +575,33 @@ export async function startPlayWriterCDPRelayServer({ port = 19988, host = '127.
574
575
  }))
575
576
 
576
577
  app.get('/extension', (c, next) => {
578
+ // 1. Host Validation: The extension endpoint must ONLY be accessed from localhost.
579
+ // This prevents attackers on the network from hijacking the browser session
580
+ // even if the server is exposed via 0.0.0.0.
581
+ const info = getConnInfo(c)
582
+ const remoteAddress = info.remote.address
583
+ const isLocalhost = remoteAddress === '127.0.0.1' || remoteAddress === '::1'
584
+
585
+ if (!isLocalhost) {
586
+ logger?.log(chalk.red(`Rejecting /extension WebSocket from remote IP: ${remoteAddress}`))
587
+ return c.text('Forbidden - Extension must be local', 403)
588
+ }
589
+
590
+ // 2. Origin Validation: Prevent browser-based attacks (CSRF).
591
+ // Browsers cannot spoof the Origin header, so this ensures the connection
592
+ // is coming from our specific Chrome Extension, not a malicious website.
577
593
  const origin = c.req.header('origin')
578
- if (!isAllowedOrigin(origin)) {
579
- logger?.log(chalk.red(`Rejecting /extension WebSocket from origin: ${origin}`))
594
+ if (!origin || !origin.startsWith('chrome-extension://')) {
595
+ logger?.log(chalk.red(`Rejecting /extension WebSocket: origin must be chrome-extension://, got: ${origin || 'none'}`))
580
596
  return c.text('Forbidden', 403)
581
597
  }
598
+
599
+ const extensionId = origin.replace('chrome-extension://', '')
600
+ if (!ALLOWED_EXTENSION_IDS.includes(extensionId)) {
601
+ logger?.log(chalk.red(`Rejecting /extension WebSocket from unknown extension: ${extensionId}`))
602
+ return c.text('Forbidden', 403)
603
+ }
604
+
582
605
  return next()
583
606
  }, upgradeWebSocket(() => {
584
607
  return {
package/src/mcp.test.ts CHANGED
@@ -2307,6 +2307,135 @@ describe('MCP Server Tests', () => {
2307
2307
  console.log(`Screenshots saved to: ${assetsDir}`)
2308
2308
  }, 120000)
2309
2309
 
2310
+ it('should take screenshot with accessibility labels via MCP execute tool', async () => {
2311
+ const browserContext = getBrowserContext()
2312
+ const serviceWorker = await getExtensionServiceWorker(browserContext)
2313
+
2314
+ const page = await browserContext.newPage()
2315
+ await page.setContent(`
2316
+ <html>
2317
+ <head>
2318
+ <style>
2319
+ body {
2320
+ margin: 0;
2321
+ background: #e8f4f8;
2322
+ position: relative;
2323
+ min-height: 100vh;
2324
+ }
2325
+ .controls {
2326
+ padding: 20px;
2327
+ position: relative;
2328
+ z-index: 10;
2329
+ }
2330
+ .grid-marker {
2331
+ position: absolute;
2332
+ background: rgba(255, 100, 100, 0.3);
2333
+ border: 1px solid #ff6464;
2334
+ font-size: 10px;
2335
+ color: #333;
2336
+ display: flex;
2337
+ align-items: center;
2338
+ justify-content: center;
2339
+ }
2340
+ .h-marker {
2341
+ left: 0;
2342
+ width: 100%;
2343
+ height: 20px;
2344
+ }
2345
+ .v-marker {
2346
+ top: 0;
2347
+ height: 100%;
2348
+ width: 20px;
2349
+ }
2350
+ </style>
2351
+ </head>
2352
+ <body>
2353
+ <div class="controls">
2354
+ <button id="submit-btn">Submit Form</button>
2355
+ <a href="/about">About Us</a>
2356
+ <input type="text" placeholder="Enter your name" />
2357
+ </div>
2358
+ <!-- Horizontal markers every 200px -->
2359
+ <div class="grid-marker h-marker" style="top: 200px;">200px</div>
2360
+ <div class="grid-marker h-marker" style="top: 400px;">400px</div>
2361
+ <div class="grid-marker h-marker" style="top: 600px;">600px</div>
2362
+ <!-- Vertical markers every 200px -->
2363
+ <div class="grid-marker v-marker" style="left: 200px;">200</div>
2364
+ <div class="grid-marker v-marker" style="left: 400px;">400</div>
2365
+ <div class="grid-marker v-marker" style="left: 600px;">600</div>
2366
+ <div class="grid-marker v-marker" style="left: 800px;">800</div>
2367
+ <div class="grid-marker v-marker" style="left: 1000px;">1000</div>
2368
+ <div class="grid-marker v-marker" style="left: 1200px;">1200</div>
2369
+ </body>
2370
+ </html>
2371
+ `)
2372
+ await page.bringToFront()
2373
+
2374
+ await serviceWorker.evaluate(async () => {
2375
+ await globalThis.toggleExtensionForActiveTab()
2376
+ })
2377
+ await new Promise(r => setTimeout(r, 400))
2378
+
2379
+ // Take screenshot with accessibility labels via MCP
2380
+ const result = await client.callTool({
2381
+ name: 'execute',
2382
+ arguments: {
2383
+ code: js`
2384
+ let testPage;
2385
+ for (const p of context.pages()) {
2386
+ const html = await p.content();
2387
+ if (html.includes('submit-btn')) { testPage = p; break; }
2388
+ }
2389
+ if (!testPage) throw new Error('Test page not found');
2390
+ await screenshotWithAccessibilityLabels({ page: testPage });
2391
+ `,
2392
+ timeout: 15000,
2393
+ },
2394
+ })
2395
+
2396
+ expect(result.isError).toBeFalsy()
2397
+
2398
+ // Verify response has both text and image content
2399
+ const content = result.content as any[]
2400
+ expect(content.length).toBe(2)
2401
+
2402
+ // Check text content
2403
+ const textContent = content.find(c => c.type === 'text')
2404
+ expect(textContent).toBeDefined()
2405
+ expect(textContent.text).toContain('Screenshot saved to:')
2406
+ expect(textContent.text).toContain('.jpg')
2407
+ expect(textContent.text).toContain('Labels shown:')
2408
+ expect(textContent.text).toContain('Accessibility snapshot:')
2409
+ expect(textContent.text).toContain('Submit Form')
2410
+
2411
+ // Check image content
2412
+ const imageContent = content.find(c => c.type === 'image')
2413
+ expect(imageContent).toBeDefined()
2414
+ expect(imageContent.mimeType).toBe('image/jpeg')
2415
+ expect(imageContent.data).toBeDefined()
2416
+ expect(imageContent.data.length).toBeGreaterThan(100) // base64 data should be substantial
2417
+
2418
+ // Verify the image is valid JPEG by checking base64
2419
+ const buffer = Buffer.from(imageContent.data, 'base64')
2420
+ const dimensions = imageSize(buffer)
2421
+
2422
+ // Get actual viewport size from page
2423
+ const viewport = await page.evaluate(() => ({
2424
+ innerWidth: window.innerWidth,
2425
+ innerHeight: window.innerHeight,
2426
+ outerWidth: window.outerWidth,
2427
+ outerHeight: window.outerHeight,
2428
+ }))
2429
+ console.log('Screenshot dimensions:', dimensions.width, 'x', dimensions.height)
2430
+ console.log('Window viewport:', viewport)
2431
+
2432
+ expect(dimensions.type).toBe('jpg')
2433
+ expect(dimensions.width).toBeGreaterThan(0)
2434
+ expect(dimensions.height).toBeGreaterThan(0)
2435
+
2436
+ await page.close()
2437
+ }, 60000)
2438
+
2310
2439
  })
2311
2440
 
2312
2441
 
@@ -2648,6 +2777,11 @@ describe('CDP Session Tests', () => {
2648
2777
  const browserContext = getBrowserContext()
2649
2778
  const serviceWorker = await getExtensionServiceWorker(browserContext)
2650
2779
 
2780
+ // Clear any existing connected tabs from previous tests
2781
+ await serviceWorker.evaluate(async () => {
2782
+ await globalThis.disconnectEverything()
2783
+ })
2784
+
2651
2785
  const page = await browserContext.newPage()
2652
2786
  await page.goto('https://example.com/')
2653
2787
  await page.bringToFront()
@@ -2692,6 +2826,11 @@ describe('CDP Session Tests', () => {
2692
2826
  const browserContext = getBrowserContext()
2693
2827
  const serviceWorker = await getExtensionServiceWorker(browserContext)
2694
2828
 
2829
+ // Clear any existing connected tabs from previous tests
2830
+ await serviceWorker.evaluate(async () => {
2831
+ await globalThis.disconnectEverything()
2832
+ })
2833
+
2695
2834
  const page1 = await browserContext.newPage()
2696
2835
  await page1.goto('https://example.com/')
2697
2836
  await page1.bringToFront()
@@ -3267,6 +3406,12 @@ describe('Auto-enable Tests', () => {
3267
3406
  const browserContext = getBrowserContext()
3268
3407
  const serviceWorker = await getExtensionServiceWorker(browserContext)
3269
3408
 
3409
+ // Ensure clean state - disconnect any tabs from previous tests or setup
3410
+ await serviceWorker.evaluate(async () => {
3411
+ await globalThis.disconnectEverything()
3412
+ })
3413
+ await new Promise(r => setTimeout(r, 100))
3414
+
3270
3415
  // Verify no tabs are connected
3271
3416
  const tabCountBefore = await serviceWorker.evaluate(() => {
3272
3417
  const state = globalThis.getExtensionState()
package/src/mcp.ts CHANGED
@@ -21,7 +21,7 @@ import { Editor } from './editor.js'
21
21
  import { getStylesForLocator, formatStylesAsText, type StylesResult } from './styles.js'
22
22
  import { getReactSource, type ReactSourceLocation } from './react-source.js'
23
23
  import { ScopedFS } from './scoped-fs.js'
24
- import { showAriaRefLabels, hideAriaRefLabels } from './aria-snapshot.js'
24
+ import { screenshotWithAccessibilityLabels, type ScreenshotResult } from './aria-snapshot.js'
25
25
  const __filename = fileURLToPath(import.meta.url)
26
26
  const __dirname = path.dirname(__filename)
27
27
 
@@ -86,8 +86,7 @@ interface VMContext {
86
86
  getStylesForLocator: (options: { locator: any }) => Promise<StylesResult>
87
87
  formatStylesAsText: (styles: StylesResult) => string
88
88
  getReactSource: (options: { locator: any }) => Promise<ReactSourceLocation | null>
89
- showAriaRefLabels: (options: { page: Page; interactiveOnly?: boolean }) => Promise<{ snapshot: string; labelCount: number }>
90
- hideAriaRefLabels: (options: { page: Page }) => Promise<void>
89
+ screenshotWithAccessibilityLabels: (options: { page: Page; interactiveOnly?: boolean }) => Promise<void>
91
90
  require: NodeRequire
92
91
  import: (specifier: string) => Promise<any>
93
92
  }
@@ -864,6 +863,13 @@ server.tool(
864
863
  return getReactSource({ locator: options.locator, cdp })
865
864
  }
866
865
 
866
+ // Collector for screenshots taken during this execution
867
+ const screenshotCollector: ScreenshotResult[] = []
868
+
869
+ const screenshotWithAccessibilityLabelsFn = async (options: { page: Page; interactiveOnly?: boolean }) => {
870
+ return screenshotWithAccessibilityLabels({ ...options, collector: screenshotCollector })
871
+ }
872
+
867
873
  let vmContextObj: VMContextWithGlobals = {
868
874
  page,
869
875
  context,
@@ -880,8 +886,7 @@ server.tool(
880
886
  getStylesForLocator: getStylesForLocatorFn,
881
887
  formatStylesAsText,
882
888
  getReactSource: getReactSourceFn,
883
- showAriaRefLabels,
884
- hideAriaRefLabels,
889
+ screenshotWithAccessibilityLabels: screenshotWithAccessibilityLabelsFn,
885
890
  resetPlaywright: async () => {
886
891
  const { page: newPage, context: newContext } = await resetConnection()
887
892
 
@@ -901,8 +906,7 @@ server.tool(
901
906
  getStylesForLocator: getStylesForLocatorFn,
902
907
  formatStylesAsText,
903
908
  getReactSource: getReactSourceFn,
904
- showAriaRefLabels,
905
- hideAriaRefLabels,
909
+ screenshotWithAccessibilityLabels: screenshotWithAccessibilityLabelsFn,
906
910
  resetPlaywright: vmContextObj.resetPlaywright,
907
911
  require: sandboxedRequire,
908
912
  // TODO --experimental-vm-modules is needed to make import work in vm
@@ -943,6 +947,13 @@ server.tool(
943
947
  responseText += 'Code executed successfully (no output)'
944
948
  }
945
949
 
950
+ // Add screenshot info to response text
951
+ for (const screenshot of screenshotCollector) {
952
+ responseText += `\nScreenshot saved to: ${screenshot.path}\n`
953
+ responseText += `Labels shown: ${screenshot.labelCount}\n\n`
954
+ responseText += `Accessibility snapshot:\n${screenshot.snapshot}\n`
955
+ }
956
+
946
957
  const MAX_LENGTH = 6000
947
958
  let finalText = responseText.trim()
948
959
  if (finalText.length > MAX_LENGTH) {
@@ -951,14 +962,24 @@ server.tool(
951
962
  `\n\n[Truncated to ${MAX_LENGTH} characters. Better manage your logs or paginate them to read the full logs]`
952
963
  }
953
964
 
954
- return {
955
- content: [
956
- {
957
- type: 'text',
958
- text: finalText,
959
- },
960
- ],
965
+ // Build content array with text and any collected screenshots
966
+ const content: Array<{ type: 'text'; text: string } | { type: 'image'; data: string; mimeType: string }> = [
967
+ {
968
+ type: 'text',
969
+ text: finalText,
970
+ },
971
+ ]
972
+
973
+ // Add all collected screenshots as images
974
+ for (const screenshot of screenshotCollector) {
975
+ content.push({
976
+ type: 'image',
977
+ data: screenshot.base64,
978
+ mimeType: screenshot.mimeType,
979
+ })
961
980
  }
981
+
982
+ return { content }
962
983
  } catch (error: any) {
963
984
  const errorStack = error.stack || error.message
964
985
  const isTimeoutError = error instanceof CodeExecutionTimeoutError || error.name === 'TimeoutError'
package/src/prompt.md CHANGED
@@ -31,6 +31,8 @@ After any action (click, submit, navigate), verify what happened:
31
31
  console.log('url:', page.url()); console.log(await accessibilitySnapshot({ page }).then(x => x.split('\n').slice(0, 30).join('\n')));
32
32
  ```
33
33
 
34
+ For visually complex pages (grids, galleries, dashboards), use `screenshotWithAccessibilityLabels({ page })` instead to understand spatial layout.
35
+
34
36
  If nothing changed, try `await page.waitForLoadState('networkidle', {timeout: 3000})` or you may have clicked the wrong element.
35
37
 
36
38
  ## accessibility snapshots
@@ -66,6 +68,24 @@ Search for specific elements:
66
68
  const snapshot = await accessibilitySnapshot({ page, search: /button|submit/i })
67
69
  ```
68
70
 
71
+ ## choosing between snapshot methods
72
+
73
+ Both `accessibilitySnapshot` and `screenshotWithAccessibilityLabels` use the same `aria-ref` system, so you can combine them effectively.
74
+
75
+ **Use `accessibilitySnapshot` when:**
76
+ - Page has simple, semantic structure (articles, forms, lists)
77
+ - You need to search for specific text or patterns
78
+ - Token usage matters (text is smaller than images)
79
+ - You need to process the output programmatically
80
+
81
+ **Use `screenshotWithAccessibilityLabels` when:**
82
+ - Page has complex visual layout (grids, galleries, dashboards, maps)
83
+ - Spatial position matters (e.g., "first image", "top-left button")
84
+ - DOM order doesn't match visual order
85
+ - You need to understand the visual hierarchy
86
+
87
+ **Combining both:** Use screenshot first to understand layout and identify target elements visually, then use `accessibilitySnapshot({ search: /pattern/ })` for efficient searching in subsequent calls.
88
+
69
89
  ## selector best practices
70
90
 
71
91
  **For unknown websites**: use `accessibilitySnapshot()` with `aria-ref` - it shows what's actually interactive.
@@ -206,24 +226,25 @@ const matches = await editor.grep({ regex: /console\.log/ });
206
226
  await editor.edit({ url: matches[0].url, oldString: 'DEBUG = false', newString: 'DEBUG = true' });
207
227
  ```
208
228
 
209
- **showAriaRefLabels** - overlay Vimium-style visual labels on interactive elements. Useful for taking screenshots where you can see element references. Labels auto-hide after 30 seconds. Call again if page HTML changes or scrolls to get fresh labels. Use a timeout of 10 seconds at least.
229
+ **screenshotWithAccessibilityLabels** - take a screenshot with Vimium-style visual labels overlaid on interactive elements. Shows labels, captures screenshot, then removes labels. The image and accessibility snapshot are automatically included in the response. Can be called multiple times to capture multiple screenshots. Use a timeout of **20 seconds** for complex pages.
230
+
231
+ Prefer this for pages with grids, image galleries, maps, or complex visual layouts where spatial position matters. For simple text-heavy pages, `accessibilitySnapshot` with search is faster and uses fewer tokens.
210
232
 
211
233
  ```js
212
- const { snapshot, labelCount } = await showAriaRefLabels({ page });
213
- console.log(`Showing ${labelCount} labels`);
214
- await page.screenshot({ path: '/tmp/labeled-page.png' });
215
- // Use aria-ref from snapshot to interact
234
+ await screenshotWithAccessibilityLabels({ page });
235
+ // Image and accessibility snapshot are automatically included in response
236
+ // Use aria-ref from snapshot to interact with elements
216
237
  await page.locator('aria-ref=e5').click();
238
+
239
+ // Can take multiple screenshots in one execution
240
+ await screenshotWithAccessibilityLabels({ page });
241
+ await page.click('button');
242
+ await screenshotWithAccessibilityLabels({ page });
243
+ // Both images are included in the response
217
244
  ```
218
245
 
219
246
  Labels are color-coded: yellow=links, orange=buttons, coral=inputs, pink=checkboxes, peach=sliders, salmon=menus, amber=tabs.
220
247
 
221
- **hideAriaRefLabels** - manually remove labels before the 30-second auto-hide:
222
-
223
- ```js
224
- await hideAriaRefLabels({ page });
225
- ```
226
-
227
248
  ## pinned elements
228
249
 
229
250
  Users can right-click → "Copy Playwriter Element Reference" to store elements in `globalThis.playwriterPinnedElem1` (increments for each pin). The reference is copied to clipboard:
@@ -297,5 +318,6 @@ Examples of what playwriter can do:
297
318
  - Intercept network requests to reverse-engineer APIs and build SDKs
298
319
  - Scrape data by replaying paginated API calls instead of scrolling DOM
299
320
  - Get accessibility snapshot to find elements, then automate interactions
321
+ - Use visual screenshots to understand complex layouts like image grids, dashboards, or maps
300
322
  - Debug issues by collecting logs and controlling the page simultaneously
301
323
  - Handle popups, downloads, iframes, and dialog boxes