playwriter 0.0.40 → 0.0.41

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/cdp-relay.ts CHANGED
@@ -1,5 +1,6 @@
1
1
  import { Hono } from 'hono'
2
2
  import { serve } from '@hono/node-server'
3
+ import { getConnInfo } from '@hono/node-server/conninfo'
3
4
  import { createNodeWebSocket } from '@hono/node-ws'
4
5
  import type { WSContext } from 'hono/ws'
5
6
  import type { Protocol } from './cdp-types.js'
@@ -395,23 +396,23 @@ export async function startPlayWriterCDPRelayServer({ port = 19988, host = '127.
395
396
  'elnnakgjclnapgflmidlpobefkdmapdm', // Dev extension (loaded unpacked)
396
397
  ]
397
398
 
398
- function isAllowedOrigin(origin: string | undefined): boolean {
399
- if (!origin) {
400
- return true // Node.js clients don't send Origin
401
- }
402
- if (origin.startsWith('chrome-extension://')) {
403
- const extensionId = origin.replace('chrome-extension://', '')
404
- return ALLOWED_EXTENSION_IDS.includes(extensionId)
405
- }
406
- return false // Reject browser origins (http://, https://, etc.)
407
- }
408
-
409
399
  app.get('/cdp/:clientId?', (c, next) => {
410
400
  const origin = c.req.header('origin')
411
- if (!isAllowedOrigin(origin)) {
412
- logger?.log(chalk.red(`Rejecting /cdp WebSocket from origin: ${origin}`))
413
- return c.text('Forbidden', 403)
401
+
402
+ // Validate Origin header if present (Node.js clients don't send it)
403
+ if (origin) {
404
+ if (origin.startsWith('chrome-extension://')) {
405
+ const extensionId = origin.replace('chrome-extension://', '')
406
+ if (!ALLOWED_EXTENSION_IDS.includes(extensionId)) {
407
+ logger?.log(chalk.red(`Rejecting /cdp WebSocket from unknown extension: ${extensionId}`))
408
+ return c.text('Forbidden', 403)
409
+ }
410
+ } else {
411
+ logger?.log(chalk.red(`Rejecting /cdp WebSocket from origin: ${origin}`))
412
+ return c.text('Forbidden', 403)
413
+ }
414
414
  }
415
+
415
416
  if (token) {
416
417
  const url = new URL(c.req.url, 'http://localhost')
417
418
  const providedToken = url.searchParams.get('token')
@@ -574,11 +575,33 @@ export async function startPlayWriterCDPRelayServer({ port = 19988, host = '127.
574
575
  }))
575
576
 
576
577
  app.get('/extension', (c, next) => {
578
+ // 1. Host Validation: The extension endpoint must ONLY be accessed from localhost.
579
+ // This prevents attackers on the network from hijacking the browser session
580
+ // even if the server is exposed via 0.0.0.0.
581
+ const info = getConnInfo(c)
582
+ const remoteAddress = info.remote.address
583
+ const isLocalhost = remoteAddress === '127.0.0.1' || remoteAddress === '::1'
584
+
585
+ if (!isLocalhost) {
586
+ logger?.log(chalk.red(`Rejecting /extension WebSocket from remote IP: ${remoteAddress}`))
587
+ return c.text('Forbidden - Extension must be local', 403)
588
+ }
589
+
590
+ // 2. Origin Validation: Prevent browser-based attacks (CSRF).
591
+ // Browsers cannot spoof the Origin header, so this ensures the connection
592
+ // is coming from our specific Chrome Extension, not a malicious website.
577
593
  const origin = c.req.header('origin')
578
- if (!isAllowedOrigin(origin)) {
579
- logger?.log(chalk.red(`Rejecting /extension WebSocket from origin: ${origin}`))
594
+ if (!origin || !origin.startsWith('chrome-extension://')) {
595
+ logger?.log(chalk.red(`Rejecting /extension WebSocket: origin must be chrome-extension://, got: ${origin || 'none'}`))
580
596
  return c.text('Forbidden', 403)
581
597
  }
598
+
599
+ const extensionId = origin.replace('chrome-extension://', '')
600
+ if (!ALLOWED_EXTENSION_IDS.includes(extensionId)) {
601
+ logger?.log(chalk.red(`Rejecting /extension WebSocket from unknown extension: ${extensionId}`))
602
+ return c.text('Forbidden', 403)
603
+ }
604
+
582
605
  return next()
583
606
  }, upgradeWebSocket(() => {
584
607
  return {
package/src/mcp.test.ts CHANGED
@@ -2314,10 +2314,58 @@ describe('MCP Server Tests', () => {
2314
2314
  const page = await browserContext.newPage()
2315
2315
  await page.setContent(`
2316
2316
  <html>
2317
+ <head>
2318
+ <style>
2319
+ body {
2320
+ margin: 0;
2321
+ background: #e8f4f8;
2322
+ position: relative;
2323
+ min-height: 100vh;
2324
+ }
2325
+ .controls {
2326
+ padding: 20px;
2327
+ position: relative;
2328
+ z-index: 10;
2329
+ }
2330
+ .grid-marker {
2331
+ position: absolute;
2332
+ background: rgba(255, 100, 100, 0.3);
2333
+ border: 1px solid #ff6464;
2334
+ font-size: 10px;
2335
+ color: #333;
2336
+ display: flex;
2337
+ align-items: center;
2338
+ justify-content: center;
2339
+ }
2340
+ .h-marker {
2341
+ left: 0;
2342
+ width: 100%;
2343
+ height: 20px;
2344
+ }
2345
+ .v-marker {
2346
+ top: 0;
2347
+ height: 100%;
2348
+ width: 20px;
2349
+ }
2350
+ </style>
2351
+ </head>
2317
2352
  <body>
2318
- <button id="submit-btn">Submit Form</button>
2319
- <a href="/about">About Us</a>
2320
- <input type="text" placeholder="Enter your name" />
2353
+ <div class="controls">
2354
+ <button id="submit-btn">Submit Form</button>
2355
+ <a href="/about">About Us</a>
2356
+ <input type="text" placeholder="Enter your name" />
2357
+ </div>
2358
+ <!-- Horizontal markers every 200px -->
2359
+ <div class="grid-marker h-marker" style="top: 200px;">200px</div>
2360
+ <div class="grid-marker h-marker" style="top: 400px;">400px</div>
2361
+ <div class="grid-marker h-marker" style="top: 600px;">600px</div>
2362
+ <!-- Vertical markers every 200px -->
2363
+ <div class="grid-marker v-marker" style="left: 200px;">200</div>
2364
+ <div class="grid-marker v-marker" style="left: 400px;">400</div>
2365
+ <div class="grid-marker v-marker" style="left: 600px;">600</div>
2366
+ <div class="grid-marker v-marker" style="left: 800px;">800</div>
2367
+ <div class="grid-marker v-marker" style="left: 1000px;">1000</div>
2368
+ <div class="grid-marker v-marker" style="left: 1200px;">1200</div>
2321
2369
  </body>
2322
2370
  </html>
2323
2371
  `)
@@ -2370,6 +2418,17 @@ describe('MCP Server Tests', () => {
2370
2418
  // Verify the image is valid JPEG by checking base64
2371
2419
  const buffer = Buffer.from(imageContent.data, 'base64')
2372
2420
  const dimensions = imageSize(buffer)
2421
+
2422
+ // Get actual viewport size from page
2423
+ const viewport = await page.evaluate(() => ({
2424
+ innerWidth: window.innerWidth,
2425
+ innerHeight: window.innerHeight,
2426
+ outerWidth: window.outerWidth,
2427
+ outerHeight: window.outerHeight,
2428
+ }))
2429
+ console.log('Screenshot dimensions:', dimensions.width, 'x', dimensions.height)
2430
+ console.log('Window viewport:', viewport)
2431
+
2373
2432
  expect(dimensions.type).toBe('jpg')
2374
2433
  expect(dimensions.width).toBeGreaterThan(0)
2375
2434
  expect(dimensions.height).toBeGreaterThan(0)
package/src/prompt.md CHANGED
@@ -31,6 +31,8 @@ After any action (click, submit, navigate), verify what happened:
31
31
  console.log('url:', page.url()); console.log(await accessibilitySnapshot({ page }).then(x => x.split('\n').slice(0, 30).join('\n')));
32
32
  ```
33
33
 
34
+ For visually complex pages (grids, galleries, dashboards), use `screenshotWithAccessibilityLabels({ page })` instead to understand spatial layout.
35
+
34
36
  If nothing changed, try `await page.waitForLoadState('networkidle', {timeout: 3000})` or you may have clicked the wrong element.
35
37
 
36
38
  ## accessibility snapshots
@@ -66,6 +68,24 @@ Search for specific elements:
66
68
  const snapshot = await accessibilitySnapshot({ page, search: /button|submit/i })
67
69
  ```
68
70
 
71
+ ## choosing between snapshot methods
72
+
73
+ Both `accessibilitySnapshot` and `screenshotWithAccessibilityLabels` use the same `aria-ref` system, so you can combine them effectively.
74
+
75
+ **Use `accessibilitySnapshot` when:**
76
+ - Page has simple, semantic structure (articles, forms, lists)
77
+ - You need to search for specific text or patterns
78
+ - Token usage matters (text is smaller than images)
79
+ - You need to process the output programmatically
80
+
81
+ **Use `screenshotWithAccessibilityLabels` when:**
82
+ - Page has complex visual layout (grids, galleries, dashboards, maps)
83
+ - Spatial position matters (e.g., "first image", "top-left button")
84
+ - DOM order doesn't match visual order
85
+ - You need to understand the visual hierarchy
86
+
87
+ **Combining both:** Use screenshot first to understand layout and identify target elements visually, then use `accessibilitySnapshot({ search: /pattern/ })` for efficient searching in subsequent calls.
88
+
69
89
  ## selector best practices
70
90
 
71
91
  **For unknown websites**: use `accessibilitySnapshot()` with `aria-ref` - it shows what's actually interactive.
@@ -206,7 +226,9 @@ const matches = await editor.grep({ regex: /console\.log/ });
206
226
  await editor.edit({ url: matches[0].url, oldString: 'DEBUG = false', newString: 'DEBUG = true' });
207
227
  ```
208
228
 
209
- **screenshotWithAccessibilityLabels** - take a screenshot with Vimium-style visual labels overlaid on interactive elements. Shows labels, captures screenshot, then removes labels. The image and accessibility snapshot are automatically included in the response. Can be called multiple times to capture multiple screenshots. Use a timeout of 10 seconds at least.
229
+ **screenshotWithAccessibilityLabels** - take a screenshot with Vimium-style visual labels overlaid on interactive elements. Shows labels, captures screenshot, then removes labels. The image and accessibility snapshot are automatically included in the response. Can be called multiple times to capture multiple screenshots. Use a timeout of **20 seconds** for complex pages.
230
+
231
+ Prefer this for pages with grids, image galleries, maps, or complex visual layouts where spatial position matters. For simple text-heavy pages, `accessibilitySnapshot` with search is faster and uses fewer tokens.
210
232
 
211
233
  ```js
212
234
  await screenshotWithAccessibilityLabels({ page });
@@ -296,5 +318,6 @@ Examples of what playwriter can do:
296
318
  - Intercept network requests to reverse-engineer APIs and build SDKs
297
319
  - Scrape data by replaying paginated API calls instead of scrolling DOM
298
320
  - Get accessibility snapshot to find elements, then automate interactions
321
+ - Use visual screenshots to understand complex layouts like image grids, dashboards, or maps
299
322
  - Debug issues by collecting logs and controlling the page simultaneously
300
323
  - Handle popups, downloads, iframes, and dialog boxes