npm - agent-browser - Versions diffs - 0.18.0 → 0.19.0 - Mend

agent-browser 0.18.0 → 0.19.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

package/README.md +131 -57
package/bin/agent-browser-darwin-arm64 +0 -0
package/bin/agent-browser-darwin-x64 +0 -0
package/bin/agent-browser-linux-arm64 +0 -0
package/bin/agent-browser-linux-x64 +0 -0
package/bin/agent-browser-win32-x64.exe +0 -0
package/dist/actions.js +45 -44
package/dist/actions.js.map +1 -1
package/dist/browser.d.ts +38 -1
package/dist/browser.d.ts.map +1 -1
package/dist/browser.js +152 -0
package/dist/browser.js.map +1 -1
package/dist/daemon.d.ts.map +1 -1
package/dist/daemon.js +0 -2
package/dist/daemon.js.map +1 -1
package/dist/index.d.ts +7 -0
package/dist/index.d.ts.map +1 -0
package/dist/index.js +5 -0
package/dist/index.js.map +1 -0
package/dist/protocol.d.ts.map +1 -1
package/dist/protocol.js +3 -1
package/dist/protocol.js.map +1 -1
package/dist/types.d.ts +3 -1
package/dist/types.d.ts.map +1 -1
package/package.json +11 -2
package/skills/agent-browser/SKILL.md +28 -14

package/README.md CHANGED Viewed

@@ -115,6 +115,8 @@ agent-browser drag <src> <tgt>        # Drag and drop
 agent-browser upload <sel> <files>    # Upload files
 agent-browser screenshot [path]       # Take screenshot (--full for full page, saves to a temporary directory if no path)
 agent-browser screenshot --annotate   # Annotated screenshot with numbered element labels
+agent-browser screenshot --screenshot-dir ./shots    # Save to custom directory
+agent-browser screenshot --screenshot-format jpeg --screenshot-quality 80
 agent-browser pdf <path>              # Save as PDF
 agent-browser snapshot                # Accessibility tree with refs (best for AI)
 agent-browser eval <js>               # Run JavaScript (-b for base64, --stdin for piped input)
@@ -165,6 +167,7 @@ agent-browser find nth <n> <sel> <action> [value]     # Nth match
 **Options:** `--name <name>` (filter role by accessible name), `--exact` (require exact text match)
 **Examples:**
 ```bash
 agent-browser find role button click --name "Submit"
 agent-browser find text "Sign In" click
@@ -178,14 +181,27 @@ agent-browser find nth 2 "a" text
 ```bash
 agent-browser wait <selector>         # Wait for element to be visible
 agent-browser wait <ms>               # Wait for time (milliseconds)
-agent-browser wait --text "Welcome"   # Wait for text to appear
+agent-browser wait --text "Welcome"   # Wait for text to appear (substring match)
 agent-browser wait --url "**/dash"    # Wait for URL pattern
 agent-browser wait --load networkidle # Wait for load state
 agent-browser wait --fn "window.ready === true"  # Wait for JS condition
+# Wait for text/element to disappear
+agent-browser wait --fn "!document.body.innerText.includes('Loading...')"
+agent-browser wait "#spinner" --state hidden
 ```
 **Load states:** `load`, `domcontentloaded`, `networkidle`
+### Clipboard
+```bash
+agent-browser clipboard read                      # Read text from clipboard
+agent-browser clipboard write "Hello, World!"     # Write text to clipboard
+agent-browser clipboard copy                      # Copy current selection (Ctrl+C)
+agent-browser clipboard paste                     # Paste from clipboard (Ctrl+V)
+```
 ### Mouse Control
 ```bash
@@ -375,6 +391,7 @@ agent-browser session
 ```
 Each session has its own:
 - Browser instance
 - Cookies and storage
 - Navigation history
@@ -396,6 +413,7 @@ AGENT_BROWSER_PROFILE=~/.myapp-profile agent-browser open myapp.com
 ```
 The profile directory stores:
 - Cookies and localStorage
 - IndexedDB data
 - Service workers
@@ -432,10 +450,10 @@ export AGENT_BROWSER_ENCRYPTION_KEY=<64-char-hex-key>
 agent-browser --session-name secure open example.com
 ```
-| Variable | Description |
-|----------|-------------|
-| `AGENT_BROWSER_SESSION_NAME` | Auto-save/load state persistence name |
-| `AGENT_BROWSER_ENCRYPTION_KEY` | 64-char hex key for AES-256-GCM encryption |
+| Variable                          | Description                                        |
+| --------------------------------- | -------------------------------------------------- |
+| `AGENT_BROWSER_SESSION_NAME`      | Auto-save/load state persistence name              |
+| `AGENT_BROWSER_ENCRYPTION_KEY`    | 64-char hex key for AES-256-GCM encryption         |
 | `AGENT_BROWSER_STATE_EXPIRE_DAYS` | Auto-delete states older than N days (default: 30) |
 ## Security
@@ -449,14 +467,14 @@ agent-browser includes security features for safe AI agent deployments. All feat
 - **Action Confirmation** -- Require explicit approval for sensitive action categories: `--confirm-actions eval,download`
 - **Output Length Limits** -- Prevent context flooding: `--max-output 50000`
-| Variable | Description |
-|----------|-------------|
-| `AGENT_BROWSER_CONTENT_BOUNDARIES` | Wrap page output in boundary markers |
-| `AGENT_BROWSER_MAX_OUTPUT` | Max characters for page output |
-| `AGENT_BROWSER_ALLOWED_DOMAINS` | Comma-separated allowed domain patterns |
-| `AGENT_BROWSER_ACTION_POLICY` | Path to action policy JSON file |
-| `AGENT_BROWSER_CONFIRM_ACTIONS` | Action categories requiring confirmation |
-| `AGENT_BROWSER_CONFIRM_INTERACTIVE` | Enable interactive confirmation prompts |
+| Variable                            | Description                              |
+| ----------------------------------- | ---------------------------------------- |
+| `AGENT_BROWSER_CONTENT_BOUNDARIES`  | Wrap page output in boundary markers     |
+| `AGENT_BROWSER_MAX_OUTPUT`          | Max characters for page output           |
+| `AGENT_BROWSER_ALLOWED_DOMAINS`     | Comma-separated allowed domain patterns  |
+| `AGENT_BROWSER_ACTION_POLICY`       | Path to action policy JSON file          |
+| `AGENT_BROWSER_CONFIRM_ACTIONS`     | Action categories requiring confirmation |
+| `AGENT_BROWSER_CONFIRM_INTERACTIVE` | Enable interactive confirmation prompts  |
 See [Security documentation](https://agent-browser.dev/security) for details.
@@ -474,13 +492,13 @@ agent-browser snapshot -s "#main"         # Scope to CSS selector
 agent-browser snapshot -i -c -d 5         # Combine options
 ```
-| Option | Description |
-|--------|-------------|
-| `-i, --interactive` | Only show interactive elements (buttons, links, inputs) |
-| `-C, --cursor` | Include cursor-interactive elements (cursor:pointer, onclick, tabindex) |
-| `-c, --compact` | Remove empty structural elements |
-| `-d, --depth <n>` | Limit tree depth |
-| `-s, --selector <sel>` | Scope to CSS selector |
+| Option                 | Description                                                             |
+| ---------------------- | ----------------------------------------------------------------------- |
+| `-i, --interactive`    | Only show interactive elements (buttons, links, inputs)                 |
+| `-C, --cursor`         | Include cursor-interactive elements (cursor:pointer, onclick, tabindex) |
+| `-c, --compact`        | Remove empty structural elements                                        |
+| `-d, --depth <n>`      | Limit tree depth                                                        |
+| `-s, --selector <sel>` | Scope to CSS selector                                                   |
 The `-C` flag is useful for modern web apps that use custom clickable elements (divs, spans) instead of standard buttons/links.
@@ -529,6 +547,9 @@ This is useful for multimodal AI models that can reason about visual layout, unl
 | `--json` | JSON output (for agents) |
 | `--full, -f` | Full page screenshot |
 | `--annotate` | Annotated screenshot with numbered element labels (or `AGENT_BROWSER_ANNOTATE` env) |
+| `--screenshot-dir <path>` | Default screenshot output directory (or `AGENT_BROWSER_SCREENSHOT_DIR` env) |
+| `--screenshot-quality <n>` | JPEG quality 0-100 (or `AGENT_BROWSER_SCREENSHOT_QUALITY` env) |
+| `--screenshot-format <fmt>` | Screenshot format: `png`, `jpeg` (or `AGENT_BROWSER_SCREENSHOT_FORMAT` env) |
 | `--headed` | Show browser window (not headless) (or `AGENT_BROWSER_HEADED` env) |
 | `--cdp <port\|url>` | Connect via Chrome DevTools Protocol (port or WebSocket URL) |
 | `--auto-connect` | Auto-discover and connect to running Chrome (or `AGENT_BROWSER_AUTO_CONNECT` env) |
@@ -596,8 +617,8 @@ export AGENT_BROWSER_DEFAULT_TIMEOUT=45000
 > **Note:** Setting this above 30000 (30s) may cause EAGAIN errors on slow operations because the CLI's read timeout will expire before Playwright responds. The CLI retries transient errors automatically, but response times will increase.
-| Variable | Description |
-|----------|-------------|
+| Variable                        | Description                                       |
+| ------------------------------- | ------------------------------------------------- |
 | `AGENT_BROWSER_DEFAULT_TIMEOUT` | Default Playwright timeout in ms (default: 25000) |
 ## Selectors
@@ -623,6 +644,7 @@ agent-browser hover @e4                   # Hover the link
 ```
 **Why use refs?**
 - **Deterministic**: Ref points to exact element from snapshot
 - **Fast**: No DOM re-query needed
 - **AI-friendly**: Snapshot + ref workflow is optimal for LLMs
@@ -723,6 +745,7 @@ agent-browser open other-site.com
 ```
 This is useful for:
 - **Skipping login flows** - Authenticate via headers instead of UI
 - **Switching users** - Start new sessions with different auth tokens
 - **API testing** - Access protected endpoints directly
@@ -744,6 +767,7 @@ agent-browser set headers '{"X-Custom-Header": "value"}'
 ## Custom Browser Executable
 Use a custom browser executable instead of the bundled Chromium. This is useful for:
 - **Serverless deployment**: Use lightweight Chromium builds like `@sparticuz/chromium` (~50MB vs ~684MB)
 - **System browsers**: Use an existing Chrome/Chromium installation
 - **Custom builds**: Use modified browser builds
@@ -804,6 +828,7 @@ agent-browser screenshot report.png
 ```
 The `--allow-file-access` flag adds Chromium flags (`--allow-file-access-from-files`, `--allow-file-access`) that allow `file://` URLs to:
 - Load and render local files
 - Access other local files via JavaScript (XHR, fetch)
 - Load local resources (images, scripts, stylesheets)
@@ -831,10 +856,12 @@ agent-browser --cdp "wss://your-browser-service.com/cdp?token=..." snapshot
 ```
 The `--cdp` flag accepts either:
 - A port number (e.g., `9222`) for local connections via `http://localhost:{port}`
 - A full WebSocket URL (e.g., `wss://...` or `ws://...`) for remote browser services
 This enables control of:
 - Electron apps
 - Chrome/Chromium instances with remote debugging
 - WebView2 applications
@@ -854,10 +881,12 @@ AGENT_BROWSER_AUTO_CONNECT=1 agent-browser snapshot
 ```
 Auto-connect discovers Chrome by:
 1. Reading Chrome's `DevToolsActivePort` file from the default user data directory
 2. Falling back to probing common debugging ports (9222, 9229)
 This is useful when:
 - Chrome 144+ has remote debugging enabled via `chrome://inspect/#remote-debugging` (which uses a dynamic port)
 - You want a zero-configuration connection to your existing browser
 - You don't want to track which port Chrome is using
@@ -881,6 +910,7 @@ This starts a WebSocket server on the specified port that streams the browser vi
 Connect to `ws://localhost:9223` to receive frames and send input:
 **Receive frames:**
 ```json
 {
   "type": "frame",
@@ -897,6 +927,7 @@ Connect to `ws://localhost:9223` to receive frames and send input:
 ```
 **Send mouse events:**
 ```json
 {
   "type": "input_mouse",
@@ -909,6 +940,7 @@ Connect to `ws://localhost:9223` to receive frames and send input:
 ```
 **Send keyboard events:**
 ```json
 {
   "type": "input_keyboard",
@@ -919,6 +951,7 @@ Connect to `ws://localhost:9223` to receive frames and send input:
 ```
 **Send touch events:**
 ```json
 {
   "type": "input_touch",
@@ -939,16 +972,19 @@ await browser.launch({ headless: true });
 await browser.navigate('https://example.com');
 // Start screencast
-await browser.startScreencast((frame) => {
-  // frame.data is base64-encoded image
-  // frame.metadata contains viewport info
-  console.log('Frame received:', frame.metadata.deviceWidth, 'x', frame.metadata.deviceHeight);
-}, {
-  format: 'jpeg',
-  quality: 80,
-  maxWidth: 1280,
-  maxHeight: 720,
-});
+await browser.startScreencast(
+  (frame) => {
+    // frame.data is base64-encoded image
+    // frame.metadata contains viewport info
+    console.log('Frame received:', frame.metadata.deviceWidth, 'x', frame.metadata.deviceHeight);
+  },
+  {
+    format: 'jpeg',
+    quality: 80,
+    maxWidth: 1280,
+    maxHeight: 720,
+  }
+);
 // Inject mouse events
 await browser.injectMouseEvent({
@@ -1000,18 +1036,18 @@ agent-browser open example.com
 Or add to your config file (`agent-browser.json`):
 ```json
-{"native": true}
+{ "native": true }
 ```
 ### What's Different
-| | Default (Node.js) | Native (`--native`) |
-|---|---|---|
-| **Runtime** | Node.js + Playwright | Pure Rust binary |
-| **Protocol** | Playwright protocol | Direct CDP / WebDriver |
-| **Install size** | Larger (Node.js + npm deps) | Smaller (single binary) |
-| **Browser support** | Chromium, Firefox, WebKit | Chromium, Safari (via WebDriver) |
-| **Stability** | Stable | Experimental |
+|                     | Default (Node.js)           | Native (`--native`)              |
+| ------------------- | --------------------------- | -------------------------------- |
+| **Runtime**         | Node.js + Playwright        | Pure Rust binary                 |
+| **Protocol**        | Playwright protocol         | Direct CDP / WebDriver           |
+| **Install size**    | Larger (Node.js + npm deps) | Smaller (single binary)          |
+| **Browser support** | Chromium, Firefox, WebKit   | Chromium, Safari (via WebDriver) |
+| **Stability**       | Stable                      | Experimental                     |
 ### Known Limitations
@@ -1021,13 +1057,13 @@ Or add to your config file (`agent-browser.json`):
 ## Platforms
-| Platform | Binary | Fallback |
-|----------|--------|----------|
-| macOS ARM64 | Native Rust | Node.js |
-| macOS x64 | Native Rust | Node.js |
-| Linux ARM64 | Native Rust | Node.js |
-| Linux x64 | Native Rust | Node.js |
-| Windows x64 | Native Rust | Node.js |
+| Platform    | Binary      | Fallback |
+| ----------- | ----------- | -------- |
+| macOS ARM64 | Native Rust | Node.js  |
+| macOS x64   | Native Rust | Node.js  |
+| Linux ARM64 | Native Rust | Node.js  |
+| Linux x64   | Native Rust | Node.js  |
+| Windows x64 | Native Rust | Node.js  |
 ## Usage with AI Agents
@@ -1071,6 +1107,7 @@ For more consistent results, add to your project or global instructions file:
 Use `agent-browser` for web automation. Run `agent-browser --help` for all commands.
 Core workflow:
 1. `agent-browser open <url>` - Navigate to page
 2. `agent-browser snapshot -i` - Get interactive elements with refs (@e1, @e2)
 3. `agent-browser click @e1` / `fill @e2 "text"` - Interact using refs
@@ -1122,11 +1159,11 @@ export AGENT_BROWSER_IOS_DEVICE="iPhone 16 Pro"
 agent-browser open https://example.com
 ```
-| Variable | Description |
-|----------|-------------|
-| `AGENT_BROWSER_PROVIDER` | Set to `ios` to enable iOS mode |
+| Variable                   | Description                                     |
+| -------------------------- | ----------------------------------------------- |
+| `AGENT_BROWSER_PROVIDER`   | Set to `ios` to enable iOS mode                 |
 | `AGENT_BROWSER_IOS_DEVICE` | Device name (e.g., "iPhone 16 Pro", "iPad Pro") |
-| `AGENT_BROWSER_IOS_UDID` | Device UDID (alternative to device name) |
+| `AGENT_BROWSER_IOS_UDID`   | Device UDID (alternative to device name)        |
 **Supported devices:** All iOS Simulators available in Xcode (iPhones, iPads), plus real iOS devices.
@@ -1137,6 +1174,7 @@ agent-browser open https://example.com
 Appium also supports real iOS devices connected via USB. This requires additional one-time setup:
 **1. Get your device UDID:**
 ```bash
 xcrun xctrace list devices
 # or
@@ -1144,6 +1182,7 @@ system_profiler SPUSBDataType | grep -A 5 "iPhone\|iPad"
 ```
 **2. Sign WebDriverAgent (one-time):**
 ```bash
 # Open the WebDriverAgent Xcode project
 cd ~/.appium/node_modules/appium-xcuitest-driver/node_modules/appium-webdriveragent
@@ -1151,12 +1190,14 @@ open WebDriverAgent.xcodeproj
 ```
 In Xcode:
 - Select the `WebDriverAgentRunner` target
 - Go to Signing & Capabilities
 - Select your Team (requires Apple Developer account, free tier works)
 - Let Xcode manage signing automatically
 **3. Use with agent-browser:**
 ```bash
 # Connect device via USB, then:
 agent-browser -p ios --device "<DEVICE_UDID>" open https://example.com
@@ -1166,11 +1207,44 @@ agent-browser -p ios --device "John's iPhone" open https://example.com
 ```
 **Real device notes:**
 - First run installs WebDriverAgent to the device (may require Trust prompt)
 - Device must be unlocked and connected via USB
 - Slightly slower initial connection than simulator
 - Tests against real Safari performance and behavior
+### Browserless
+[Browserless](https://browserless.io) provides cloud browser infrastructure with a Sessions API. Use it when running agent-browser in environments where a local browser isn't available.
+To enable Browserless, use the `-p` flag:
+```bash
+export BROWSERLESS_API_KEY="your-api-token"
+agent-browser -p browserless open https://example.com
+```
+Or use environment variables for CI/scripts:
+```bash
+export AGENT_BROWSER_PROVIDER=browserless
+export BROWSERLESS_API_KEY="your-api-token"
+agent-browser open https://example.com
+```
+Optional configuration via environment variables:
+| Variable                   | Description                                      | Default                                 |
+| -------------------------- | ------------------------------------------------ | --------------------------------------- |
+| `BROWSERLESS_API_URL`      | Base API URL (for custom regions or self-hosted) | `https://production-sfo.browserless.io` |
+| `BROWSERLESS_BROWSER_TYPE` | Type of browser to use (chromium or chrome)      | chromium                                |
+| `BROWSERLESS_TTL`          | Session TTL in milliseconds                      | `300000`                                |
+| `BROWSERLESS_STEALTH`      | Enable stealth mode (`true`/`false`)             | `true`                                  |
+When enabled, agent-browser connects to a Browserless cloud session instead of launching a local browser. All commands work identically.
+Get your API token from the [Browserless Dashboard](https://browserless.io).
 ### Browserbase
 [Browserbase](https://browserbase.com) provides remote browser infrastructure to make deployment of agentic browsing agents easy. Use it when running the agent-browser CLI in an environment where a local browser isn't feasible.
@@ -1238,12 +1312,12 @@ agent-browser open https://example.com
 Optional configuration via environment variables:
-| Variable | Description | Default |
-|----------|-------------|---------|
-| `KERNEL_HEADLESS` | Run browser in headless mode (`true`/`false`) | `false` |
-| `KERNEL_STEALTH` | Enable stealth mode to avoid bot detection (`true`/`false`) | `true` |
-| `KERNEL_TIMEOUT_SECONDS` | Session timeout in seconds | `300` |
-| `KERNEL_PROFILE_NAME` | Browser profile name for persistent cookies/logins (created if it doesn't exist) | (none) |
+| Variable                 | Description                                                                      | Default |
+| ------------------------ | -------------------------------------------------------------------------------- | ------- |
+| `KERNEL_HEADLESS`        | Run browser in headless mode (`true`/`false`)                                    | `false` |
+| `KERNEL_STEALTH`         | Enable stealth mode to avoid bot detection (`true`/`false`)                      | `true`  |
+| `KERNEL_TIMEOUT_SECONDS` | Session timeout in seconds                                                       | `300`   |
+| `KERNEL_PROFILE_NAME`    | Browser profile name for persistent cookies/logins (created if it doesn't exist) | (none)  |
 When enabled, agent-browser connects to a Kernel cloud session instead of launching a local browser. All commands work identically.

package/bin/agent-browser-darwin-arm64 CHANGED Viewed

Binary file

package/bin/agent-browser-darwin-x64 CHANGED Viewed

Binary file

package/bin/agent-browser-linux-arm64 CHANGED Viewed

Binary file

package/bin/agent-browser-linux-x64 CHANGED Viewed

Binary file

package/bin/agent-browser-win32-x64.exe CHANGED Viewed

Binary file

package/dist/actions.js CHANGED Viewed

@@ -411,19 +411,11 @@ async function handleLaunch(command, browser) {
     return successResponse(command.id, { launched: true });
 }
 async function handleNavigate(command, browser) {
-    browser.checkDomainAllowed(command.url);
-    const page = browser.getPage();
-    // If headers are provided, set up scoped headers for this origin
-    if (command.headers && Object.keys(command.headers).length > 0) {
-        await browser.setScopedHeaders(command.url, command.headers);
-    }
-    await page.goto(command.url, {
-        waitUntil: command.waitUntil ?? 'load',
-    });
-    return successResponse(command.id, {
-        url: page.url(),
-        title: await page.title(),
+    const result = await browser.navigate(command.url, {
+        headers: command.headers,
+        waitUntil: command.waitUntil,
     });
+    return successResponse(command.id, result);
 }
 async function handleClick(command, browser) {
     // Support both refs (@e1) and regular selectors
@@ -513,7 +505,7 @@ async function handleScreenshot(command, browser) {
             const timestamp = new Date().toISOString().replace(/[:.]/g, '-');
             const random = Math.random().toString(36).substring(2, 8);
             const filename = `screenshot-${timestamp}-${random}.${ext}`;
-            const screenshotDir = path.join(getAppDir(), 'tmp', 'screenshots');
+            const screenshotDir = command.screenshotDir ?? path.join(getAppDir(), 'tmp', 'screenshots');
             mkdirSync(screenshotDir, { recursive: true });
             savePath = path.join(screenshotDir, filename);
         }
@@ -696,7 +688,10 @@ async function handleEvaluate(command, browser) {
 }
 async function handleWait(command, browser) {
     const page = browser.getPage();
-    if (command.selector) {
+    if (command.text) {
+        await page.waitForFunction(`(document.body.innerText || '').includes(${JSON.stringify(command.text)})`, { timeout: command.timeout });
+    }
+    else if (command.selector) {
         await page.waitForSelector(command.selector, {
             state: command.state ?? 'visible',
             timeout: command.timeout,
@@ -706,7 +701,6 @@ async function handleWait(command, browser) {
         await page.waitForTimeout(command.timeout);
     }
     else {
-        // Default: wait for load state
         await page.waitForLoadState('load');
     }
     return successResponse(command.id, { waited: true });
@@ -772,7 +766,8 @@ async function handleContent(command, browser) {
     const page = browser.getPage();
     let html;
     if (command.selector) {
-        html = await page.locator(command.selector).innerHTML();
+        const locator = browser.getLocator(command.selector);
+        html = await locator.innerHTML();
     }
     else {
         html = await page.content();
@@ -1240,13 +1235,13 @@ async function handleIsChecked(command, browser) {
     return successResponse(command.id, { checked });
 }
 async function handleCount(command, browser) {
-    const page = browser.getPage();
-    const count = await page.locator(command.selector).count();
+    const locator = browser.getLocator(command.selector);
+    const count = await locator.count();
     return successResponse(command.id, { count });
 }
 async function handleBoundingBox(command, browser) {
-    const page = browser.getPage();
-    const box = await page.locator(command.selector).boundingBox();
+    const locator = browser.getLocator(command.selector);
+    const box = await locator.boundingBox();
     return successResponse(command.id, { box });
 }
 async function handleStyles(command, browser) {
@@ -1365,8 +1360,6 @@ async function handleStateLoad(command, browser) {
         return errorResponse(command.id, `State file not found: ${command.path}`);
     }
     await browser.launch({
-        id: command.id,
-        action: 'launch',
         headless: true,
         autoStateFilePath: command.path,
     });
@@ -1534,7 +1527,7 @@ async function handleKeyboard(command, browser) {
 async function handleWheel(command, browser) {
     const page = browser.getPage();
     if (command.selector) {
-        const element = page.locator(command.selector);
+        const element = browser.getLocator(command.selector);
         await element.hover();
     }
     await page.mouse.wheel(command.deltaX ?? 0, command.deltaY ?? 0);
@@ -1549,41 +1542,50 @@ async function handleClipboard(command, browser) {
     const page = browser.getPage();
     switch (command.operation) {
         case 'copy':
-            await page.keyboard.press('Control+c');
+            await page.keyboard.press('ControlOrMeta+c');
             return successResponse(command.id, { copied: true });
         case 'paste':
-            await page.keyboard.press('Control+v');
+            await page.keyboard.press('ControlOrMeta+v');
             return successResponse(command.id, { pasted: true });
-        case 'read':
+        case 'read': {
             const text = await page.evaluate('navigator.clipboard.readText()');
             return successResponse(command.id, { text });
+        }
+        case 'write': {
+            if (!command.text) {
+                return errorResponse(command.id, "Missing 'text' parameter for clipboard write");
+            }
+            await page.evaluate(`navigator.clipboard.writeText(${JSON.stringify(command.text)})`);
+            return successResponse(command.id, { written: command.text });
+        }
         default:
             return errorResponse(command.id, 'Unknown clipboard operation');
     }
 }
 async function handleHighlight(command, browser) {
-    const page = browser.getPage();
-    await page.locator(command.selector).highlight();
+    const locator = browser.getLocator(command.selector);
+    await locator.highlight();
     return successResponse(command.id, { highlighted: true });
 }
 async function handleClear(command, browser) {
-    const page = browser.getPage();
-    await page.locator(command.selector).clear();
+    const locator = browser.getLocator(command.selector);
+    await locator.clear();
     return successResponse(command.id, { cleared: true });
 }
 async function handleSelectAll(command, browser) {
-    const page = browser.getPage();
-    await page.locator(command.selector).selectText();
+    const locator = browser.getLocator(command.selector);
+    await locator.selectText();
     return successResponse(command.id, { selected: true });
 }
 async function handleInnerText(command, browser) {
-    const page = browser.getPage();
-    const text = await page.locator(command.selector).innerText();
+    const locator = browser.getLocator(command.selector);
+    const text = await locator.innerText();
     return successResponse(command.id, { text });
 }
 async function handleInnerHtml(command, browser) {
     const page = browser.getPage();
-    const html = await page.locator(command.selector).innerHTML();
+    const locator = browser.getLocator(command.selector);
+    const html = await locator.innerHTML();
     return successResponse(command.id, { html, origin: page.url() });
 }
 async function handleInputValue(command, browser) {
@@ -1593,13 +1595,13 @@ async function handleInputValue(command, browser) {
     return successResponse(command.id, { value, origin: page.url() });
 }
 async function handleSetValue(command, browser) {
-    const page = browser.getPage();
-    await page.locator(command.selector).fill(command.value);
+    const locator = browser.getLocator(command.selector);
+    await locator.fill(command.value);
     return successResponse(command.id, { set: true });
 }
 async function handleDispatch(command, browser) {
-    const page = browser.getPage();
-    await page.locator(command.selector).dispatchEvent(command.event, command.eventInit);
+    const locator = browser.getLocator(command.selector);
+    await locator.dispatchEvent(command.event, command.eventInit);
     return successResponse(command.id, { dispatched: command.event });
 }
 async function handleEvalHandle(command, browser) {
@@ -1705,8 +1707,7 @@ async function handleGetByTestId(command, browser) {
     }
 }
 async function handleNth(command, browser) {
-    const page = browser.getPage();
-    const base = page.locator(command.selector);
+    const base = browser.getLocator(command.selector);
     const locator = command.index === -1 ? base.last() : base.nth(command.index);
     switch (command.subaction) {
         case 'click':
@@ -1816,8 +1817,8 @@ async function handleInsertText(command, browser) {
     return successResponse(command.id, { inserted: true });
 }
 async function handleMultiSelect(command, browser) {
-    const page = browser.getPage();
-    const selected = await page.locator(command.selector).selectOption(command.values);
+    const locator = browser.getLocator(command.selector);
+    const selected = await locator.selectOption(command.values);
     return successResponse(command.id, { selected });
 }
 async function handleWaitForDownload(command, browser) {
@@ -1965,7 +1966,7 @@ async function handleDiffScreenshot(command, browser) {
     const page = browser.getPage();
     let screenshotBuffer;
     if (command.selector) {
-        const locator = browser.getLocatorFromRef(command.selector) || page.locator(command.selector);
+        const locator = browser.getLocator(command.selector);
         screenshotBuffer = await locator.screenshot({ type: 'png' });
     }
     else {