npm - playwriter - Versions diffs - 0.0.39 → 0.0.41 - Mend

playwriter 0.0.39 → 0.0.41

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

package/dist/aria-snapshot.d.ts +27 -0
package/dist/aria-snapshot.d.ts.map +1 -1
package/dist/aria-snapshot.js +111 -4
package/dist/aria-snapshot.js.map +1 -1
package/dist/cdp-relay.d.ts.map +1 -1
package/dist/cdp-relay.js +34 -15
package/dist/cdp-relay.js.map +1 -1
package/dist/mcp.d.ts.map +1 -1
package/dist/mcp.js +30 -13
package/dist/mcp.js.map +1 -1
package/dist/mcp.test.js +130 -0
package/dist/mcp.test.js.map +1 -1
package/package.json +1 -1
package/src/aria-snapshot.ts +139 -4
package/src/assets/aria-labels-github.png +0 -0
package/src/assets/aria-labels-google-snapshot.txt +1 -1
package/src/assets/aria-labels-google.png +0 -0
package/src/assets/aria-labels-hacker-news-snapshot.txt +826 -814
package/src/assets/aria-labels-hacker-news.png +0 -0
package/src/cdp-relay.ts +39 -16
package/src/mcp.test.ts +145 -0
package/src/mcp.ts +35 -14
package/src/prompt.md +33 -11

package/src/assets/aria-labels-hacker-news.png CHANGED Viewed

Binary file

package/src/cdp-relay.ts CHANGED Viewed

@@ -1,5 +1,6 @@
 import { Hono } from 'hono'
 import { serve } from '@hono/node-server'
+import { getConnInfo } from '@hono/node-server/conninfo'
 import { createNodeWebSocket } from '@hono/node-ws'
 import type { WSContext } from 'hono/ws'
 import type { Protocol } from './cdp-types.js'
@@ -395,23 +396,23 @@ export async function startPlayWriterCDPRelayServer({ port = 19988, host = '127.
     'elnnakgjclnapgflmidlpobefkdmapdm', // Dev extension (loaded unpacked)
   ]
-  function isAllowedOrigin(origin: string | undefined): boolean {
-    if (!origin) {
-      return true // Node.js clients don't send Origin
-    }
-    if (origin.startsWith('chrome-extension://')) {
-      const extensionId = origin.replace('chrome-extension://', '')
-      return ALLOWED_EXTENSION_IDS.includes(extensionId)
-    }
-    return false // Reject browser origins (http://, https://, etc.)
-  }
   app.get('/cdp/:clientId?', (c, next) => {
     const origin = c.req.header('origin')
-    if (!isAllowedOrigin(origin)) {
-      logger?.log(chalk.red(`Rejecting /cdp WebSocket from origin: ${origin}`))
-      return c.text('Forbidden', 403)
+    // Validate Origin header if present (Node.js clients don't send it)
+    if (origin) {
+      if (origin.startsWith('chrome-extension://')) {
+        const extensionId = origin.replace('chrome-extension://', '')
+        if (!ALLOWED_EXTENSION_IDS.includes(extensionId)) {
+          logger?.log(chalk.red(`Rejecting /cdp WebSocket from unknown extension: ${extensionId}`))
+          return c.text('Forbidden', 403)
+        }
+      } else {
+        logger?.log(chalk.red(`Rejecting /cdp WebSocket from origin: ${origin}`))
+        return c.text('Forbidden', 403)
+      }
     }
     if (token) {
       const url = new URL(c.req.url, 'http://localhost')
       const providedToken = url.searchParams.get('token')
@@ -574,11 +575,33 @@ export async function startPlayWriterCDPRelayServer({ port = 19988, host = '127.
   }))
   app.get('/extension', (c, next) => {
+    // 1. Host Validation: The extension endpoint must ONLY be accessed from localhost.
+    // This prevents attackers on the network from hijacking the browser session
+    // even if the server is exposed via 0.0.0.0.
+    const info = getConnInfo(c)
+    const remoteAddress = info.remote.address
+    const isLocalhost = remoteAddress === '127.0.0.1' || remoteAddress === '::1'
+    if (!isLocalhost) {
+      logger?.log(chalk.red(`Rejecting /extension WebSocket from remote IP: ${remoteAddress}`))
+      return c.text('Forbidden - Extension must be local', 403)
+    }
+    // 2. Origin Validation: Prevent browser-based attacks (CSRF).
+    // Browsers cannot spoof the Origin header, so this ensures the connection
+    // is coming from our specific Chrome Extension, not a malicious website.
     const origin = c.req.header('origin')
-    if (!isAllowedOrigin(origin)) {
-      logger?.log(chalk.red(`Rejecting /extension WebSocket from origin: ${origin}`))
+    if (!origin || !origin.startsWith('chrome-extension://')) {
+      logger?.log(chalk.red(`Rejecting /extension WebSocket: origin must be chrome-extension://, got: ${origin || 'none'}`))
       return c.text('Forbidden', 403)
     }
+    const extensionId = origin.replace('chrome-extension://', '')
+    if (!ALLOWED_EXTENSION_IDS.includes(extensionId)) {
+      logger?.log(chalk.red(`Rejecting /extension WebSocket from unknown extension: ${extensionId}`))
+      return c.text('Forbidden', 403)
+    }
     return next()
   }, upgradeWebSocket(() => {
     return {

package/src/mcp.test.ts CHANGED Viewed

@@ -2307,6 +2307,135 @@ describe('MCP Server Tests', () => {
         console.log(`Screenshots saved to: ${assetsDir}`)
     }, 120000)
+    it('should take screenshot with accessibility labels via MCP execute tool', async () => {
+        const browserContext = getBrowserContext()
+        const serviceWorker = await getExtensionServiceWorker(browserContext)
+        const page = await browserContext.newPage()
+        await page.setContent(`
+            <html>
+                <head>
+                    <style>
+                        body {
+                            margin: 0;
+                            background: #e8f4f8;
+                            position: relative;
+                            min-height: 100vh;
+                        }
+                        .controls {
+                            padding: 20px;
+                            position: relative;
+                            z-index: 10;
+                        }
+                        .grid-marker {
+                            position: absolute;
+                            background: rgba(255, 100, 100, 0.3);
+                            border: 1px solid #ff6464;
+                            font-size: 10px;
+                            color: #333;
+                            display: flex;
+                            align-items: center;
+                            justify-content: center;
+                        }
+                        .h-marker {
+                            left: 0;
+                            width: 100%;
+                            height: 20px;
+                        }
+                        .v-marker {
+                            top: 0;
+                            height: 100%;
+                            width: 20px;
+                        }
+                    </style>
+                </head>
+                <body>
+                    <div class="controls">
+                        <button id="submit-btn">Submit Form</button>
+                        <a href="/about">About Us</a>
+                        <input type="text" placeholder="Enter your name" />
+                    </div>
+                    <!-- Horizontal markers every 200px -->
+                    <div class="grid-marker h-marker" style="top: 200px;">200px</div>
+                    <div class="grid-marker h-marker" style="top: 400px;">400px</div>
+                    <div class="grid-marker h-marker" style="top: 600px;">600px</div>
+                    <!-- Vertical markers every 200px -->
+                    <div class="grid-marker v-marker" style="left: 200px;">200</div>
+                    <div class="grid-marker v-marker" style="left: 400px;">400</div>
+                    <div class="grid-marker v-marker" style="left: 600px;">600</div>
+                    <div class="grid-marker v-marker" style="left: 800px;">800</div>
+                    <div class="grid-marker v-marker" style="left: 1000px;">1000</div>
+                    <div class="grid-marker v-marker" style="left: 1200px;">1200</div>
+                </body>
+            </html>
+        `)
+        await page.bringToFront()
+        await serviceWorker.evaluate(async () => {
+            await globalThis.toggleExtensionForActiveTab()
+        })
+        await new Promise(r => setTimeout(r, 400))
+        // Take screenshot with accessibility labels via MCP
+        const result = await client.callTool({
+            name: 'execute',
+            arguments: {
+                code: js`
+                    let testPage;
+                    for (const p of context.pages()) {
+                        const html = await p.content();
+                        if (html.includes('submit-btn')) { testPage = p; break; }
+                    }
+                    if (!testPage) throw new Error('Test page not found');
+                    await screenshotWithAccessibilityLabels({ page: testPage });
+                `,
+                timeout: 15000,
+            },
+        })
+        expect(result.isError).toBeFalsy()
+        // Verify response has both text and image content
+        const content = result.content as any[]
+        expect(content.length).toBe(2)
+        // Check text content
+        const textContent = content.find(c => c.type === 'text')
+        expect(textContent).toBeDefined()
+        expect(textContent.text).toContain('Screenshot saved to:')
+        expect(textContent.text).toContain('.jpg')
+        expect(textContent.text).toContain('Labels shown:')
+        expect(textContent.text).toContain('Accessibility snapshot:')
+        expect(textContent.text).toContain('Submit Form')
+        // Check image content
+        const imageContent = content.find(c => c.type === 'image')
+        expect(imageContent).toBeDefined()
+        expect(imageContent.mimeType).toBe('image/jpeg')
+        expect(imageContent.data).toBeDefined()
+        expect(imageContent.data.length).toBeGreaterThan(100) // base64 data should be substantial
+        // Verify the image is valid JPEG by checking base64
+        const buffer = Buffer.from(imageContent.data, 'base64')
+        const dimensions = imageSize(buffer)
+        // Get actual viewport size from page
+        const viewport = await page.evaluate(() => ({
+            innerWidth: window.innerWidth,
+            innerHeight: window.innerHeight,
+            outerWidth: window.outerWidth,
+            outerHeight: window.outerHeight,
+        }))
+        console.log('Screenshot dimensions:', dimensions.width, 'x', dimensions.height)
+        console.log('Window viewport:', viewport)
+        expect(dimensions.type).toBe('jpg')
+        expect(dimensions.width).toBeGreaterThan(0)
+        expect(dimensions.height).toBeGreaterThan(0)
+        await page.close()
+    }, 60000)
 })
@@ -2648,6 +2777,11 @@ describe('CDP Session Tests', () => {
         const browserContext = getBrowserContext()
         const serviceWorker = await getExtensionServiceWorker(browserContext)
+        // Clear any existing connected tabs from previous tests
+        await serviceWorker.evaluate(async () => {
+            await globalThis.disconnectEverything()
+        })
         const page = await browserContext.newPage()
         await page.goto('https://example.com/')
         await page.bringToFront()
@@ -2692,6 +2826,11 @@ describe('CDP Session Tests', () => {
         const browserContext = getBrowserContext()
         const serviceWorker = await getExtensionServiceWorker(browserContext)
+        // Clear any existing connected tabs from previous tests
+        await serviceWorker.evaluate(async () => {
+            await globalThis.disconnectEverything()
+        })
         const page1 = await browserContext.newPage()
         await page1.goto('https://example.com/')
         await page1.bringToFront()
@@ -3267,6 +3406,12 @@ describe('Auto-enable Tests', () => {
         const browserContext = getBrowserContext()
         const serviceWorker = await getExtensionServiceWorker(browserContext)
+        // Ensure clean state - disconnect any tabs from previous tests or setup
+        await serviceWorker.evaluate(async () => {
+            await globalThis.disconnectEverything()
+        })
+        await new Promise(r => setTimeout(r, 100))
         // Verify no tabs are connected
         const tabCountBefore = await serviceWorker.evaluate(() => {
             const state = globalThis.getExtensionState()

package/src/mcp.ts CHANGED Viewed

@@ -21,7 +21,7 @@ import { Editor } from './editor.js'
 import { getStylesForLocator, formatStylesAsText, type StylesResult } from './styles.js'
 import { getReactSource, type ReactSourceLocation } from './react-source.js'
 import { ScopedFS } from './scoped-fs.js'
-import { showAriaRefLabels, hideAriaRefLabels } from './aria-snapshot.js'
+import { screenshotWithAccessibilityLabels, type ScreenshotResult } from './aria-snapshot.js'
 const __filename = fileURLToPath(import.meta.url)
 const __dirname = path.dirname(__filename)
@@ -86,8 +86,7 @@ interface VMContext {
   getStylesForLocator: (options: { locator: any }) => Promise<StylesResult>
   formatStylesAsText: (styles: StylesResult) => string
   getReactSource: (options: { locator: any }) => Promise<ReactSourceLocation | null>
-  showAriaRefLabels: (options: { page: Page; interactiveOnly?: boolean }) => Promise<{ snapshot: string; labelCount: number }>
-  hideAriaRefLabels: (options: { page: Page }) => Promise<void>
+  screenshotWithAccessibilityLabels: (options: { page: Page; interactiveOnly?: boolean }) => Promise<void>
   require: NodeRequire
   import: (specifier: string) => Promise<any>
 }
@@ -864,6 +863,13 @@ server.tool(
         return getReactSource({ locator: options.locator, cdp })
       }
+      // Collector for screenshots taken during this execution
+      const screenshotCollector: ScreenshotResult[] = []
+      const screenshotWithAccessibilityLabelsFn = async (options: { page: Page; interactiveOnly?: boolean }) => {
+        return screenshotWithAccessibilityLabels({ ...options, collector: screenshotCollector })
+      }
       let vmContextObj: VMContextWithGlobals = {
         page,
         context,
@@ -880,8 +886,7 @@ server.tool(
         getStylesForLocator: getStylesForLocatorFn,
         formatStylesAsText,
         getReactSource: getReactSourceFn,
-        showAriaRefLabels,
-        hideAriaRefLabels,
+        screenshotWithAccessibilityLabels: screenshotWithAccessibilityLabelsFn,
         resetPlaywright: async () => {
           const { page: newPage, context: newContext } = await resetConnection()
@@ -901,8 +906,7 @@ server.tool(
             getStylesForLocator: getStylesForLocatorFn,
             formatStylesAsText,
             getReactSource: getReactSourceFn,
-            showAriaRefLabels,
-            hideAriaRefLabels,
+            screenshotWithAccessibilityLabels: screenshotWithAccessibilityLabelsFn,
             resetPlaywright: vmContextObj.resetPlaywright,
             require: sandboxedRequire,
             // TODO --experimental-vm-modules is needed to make import work in vm
@@ -943,6 +947,13 @@ server.tool(
         responseText += 'Code executed successfully (no output)'
       }
+      // Add screenshot info to response text
+      for (const screenshot of screenshotCollector) {
+        responseText += `\nScreenshot saved to: ${screenshot.path}\n`
+        responseText += `Labels shown: ${screenshot.labelCount}\n\n`
+        responseText += `Accessibility snapshot:\n${screenshot.snapshot}\n`
+      }
       const MAX_LENGTH = 6000
       let finalText = responseText.trim()
       if (finalText.length > MAX_LENGTH) {
@@ -951,14 +962,24 @@ server.tool(
           `\n\n[Truncated to ${MAX_LENGTH} characters. Better manage your logs or paginate them to read the full logs]`
       }
-      return {
-        content: [
-          {
-            type: 'text',
-            text: finalText,
-          },
-        ],
+      // Build content array with text and any collected screenshots
+      const content: Array<{ type: 'text'; text: string } | { type: 'image'; data: string; mimeType: string }> = [
+        {
+          type: 'text',
+          text: finalText,
+        },
+      ]
+      // Add all collected screenshots as images
+      for (const screenshot of screenshotCollector) {
+        content.push({
+          type: 'image',
+          data: screenshot.base64,
+          mimeType: screenshot.mimeType,
+        })
       }
+      return { content }
     } catch (error: any) {
       const errorStack = error.stack || error.message
       const isTimeoutError = error instanceof CodeExecutionTimeoutError || error.name === 'TimeoutError'

package/src/prompt.md CHANGED Viewed

@@ -31,6 +31,8 @@ After any action (click, submit, navigate), verify what happened:
 console.log('url:', page.url()); console.log(await accessibilitySnapshot({ page }).then(x => x.split('\n').slice(0, 30).join('\n')));
 ```
+For visually complex pages (grids, galleries, dashboards), use `screenshotWithAccessibilityLabels({ page })` instead to understand spatial layout.
 If nothing changed, try `await page.waitForLoadState('networkidle', {timeout: 3000})` or you may have clicked the wrong element.
 ## accessibility snapshots
@@ -66,6 +68,24 @@ Search for specific elements:
 const snapshot = await accessibilitySnapshot({ page, search: /button|submit/i })
 ```
+## choosing between snapshot methods
+Both `accessibilitySnapshot` and `screenshotWithAccessibilityLabels` use the same `aria-ref` system, so you can combine them effectively.
+**Use `accessibilitySnapshot` when:**
+- Page has simple, semantic structure (articles, forms, lists)
+- You need to search for specific text or patterns
+- Token usage matters (text is smaller than images)
+- You need to process the output programmatically
+**Use `screenshotWithAccessibilityLabels` when:**
+- Page has complex visual layout (grids, galleries, dashboards, maps)
+- Spatial position matters (e.g., "first image", "top-left button")
+- DOM order doesn't match visual order
+- You need to understand the visual hierarchy
+**Combining both:** Use screenshot first to understand layout and identify target elements visually, then use `accessibilitySnapshot({ search: /pattern/ })` for efficient searching in subsequent calls.
 ## selector best practices
 **For unknown websites**: use `accessibilitySnapshot()` with `aria-ref` - it shows what's actually interactive.
@@ -206,24 +226,25 @@ const matches = await editor.grep({ regex: /console\.log/ });
 await editor.edit({ url: matches[0].url, oldString: 'DEBUG = false', newString: 'DEBUG = true' });
 ```
-**showAriaRefLabels** - overlay Vimium-style visual labels on interactive elements. Useful for taking screenshots where you can see element references. Labels auto-hide after 30 seconds. Call again if page HTML changes or scrolls to get fresh labels. Use a timeout of 10 seconds at least.
+**screenshotWithAccessibilityLabels** - take a screenshot with Vimium-style visual labels overlaid on interactive elements. Shows labels, captures screenshot, then removes labels. The image and accessibility snapshot are automatically included in the response. Can be called multiple times to capture multiple screenshots. Use a timeout of **20 seconds** for complex pages.
+Prefer this for pages with grids, image galleries, maps, or complex visual layouts where spatial position matters. For simple text-heavy pages, `accessibilitySnapshot` with search is faster and uses fewer tokens.
 ```js
-const { snapshot, labelCount } = await showAriaRefLabels({ page });
-console.log(`Showing ${labelCount} labels`);
-await page.screenshot({ path: '/tmp/labeled-page.png' });
-// Use aria-ref from snapshot to interact
+await screenshotWithAccessibilityLabels({ page });
+// Image and accessibility snapshot are automatically included in response
+// Use aria-ref from snapshot to interact with elements
 await page.locator('aria-ref=e5').click();
+// Can take multiple screenshots in one execution
+await screenshotWithAccessibilityLabels({ page });
+await page.click('button');
+await screenshotWithAccessibilityLabels({ page });
+// Both images are included in the response
 ```
 Labels are color-coded: yellow=links, orange=buttons, coral=inputs, pink=checkboxes, peach=sliders, salmon=menus, amber=tabs.
-**hideAriaRefLabels** - manually remove labels before the 30-second auto-hide:
-```js
-await hideAriaRefLabels({ page });
-```
 ## pinned elements
 Users can right-click → "Copy Playwriter Element Reference" to store elements in `globalThis.playwriterPinnedElem1` (increments for each pin). The reference is copied to clipboard:
@@ -297,5 +318,6 @@ Examples of what playwriter can do:
 - Intercept network requests to reverse-engineer APIs and build SDKs
 - Scrape data by replaying paginated API calls instead of scrolling DOM
 - Get accessibility snapshot to find elements, then automate interactions
+- Use visual screenshots to understand complex layouts like image grids, dashboards, or maps
 - Debug issues by collecting logs and controlling the page simultaneously
 - Handle popups, downloads, iframes, and dialog boxes