npm - agent-browser - Versions diffs - 0.1.2 → 0.2.0 - Mend

agent-browser 0.1.2 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

package/AGENTS.md ADDED Viewed

@@ -0,0 +1,26 @@
+# AGENTS.md
+Instructions for AI coding agents working with this codebase.
+<!-- opensrc:start -->
+## Source Code Reference
+Source code for dependencies is available in `opensrc/` for deeper understanding of implementation details.
+See `opensrc/sources.json` for the list of available packages and their versions.
+Use this source code when you need to understand how a package works internally, not just its types/interface.
+### Fetching Additional Source Code
+To fetch source code for a package or repository you need to understand, run:
+```bash
+npx opensrc <package>           # npm package (e.g., npx opensrc zod)
+npx opensrc pypi:<package>      # Python package (e.g., npx opensrc pypi:requests)
+npx opensrc crates:<package>    # Rust crate (e.g., npx opensrc crates:serde)
+npx opensrc <owner>/<repo>      # GitHub repo (e.g., npx opensrc vercel/ai)
+```
+<!-- opensrc:end -->

package/README.md CHANGED Viewed

@@ -14,13 +14,22 @@ pnpm build
 ```bash
 agent-browser open example.com
-agent-browser click "#submit"
-agent-browser fill "#email" "test@example.com"
-agent-browser get text "h1"
+agent-browser snapshot                    # Get accessibility tree with refs
+agent-browser click @e2                   # Click by ref from snapshot
+agent-browser fill @e3 "test@example.com" # Fill by ref
+agent-browser get text @e1                # Get text by ref
 agent-browser screenshot page.png
 agent-browser close
 ```
+### Traditional Selectors (also supported)
+```bash
+agent-browser click "#submit"
+agent-browser fill "#email" "test@example.com"
+agent-browser find role button click --name "Submit"
+```
 ## Commands
 ### Core Commands
@@ -48,7 +57,7 @@ agent-browser upload <sel> <files>    # Upload files
 agent-browser download [path]         # Wait for download
 agent-browser screenshot [path]       # Take screenshot (--full for full page)
 agent-browser pdf <path>              # Save as PDF
-agent-browser snapshot                # Accessibility tree (best for AI)
+agent-browser snapshot                # Accessibility tree with refs (best for AI)
 agent-browser eval <js>               # Run JavaScript
 agent-browser close                   # Close browser
 ```
@@ -244,19 +253,49 @@ agent-browser session list
 ## Selectors
+### Refs (Recommended for AI)
+Refs provide deterministic element selection from snapshots:
+```bash
+# 1. Get snapshot with refs
+agent-browser snapshot
+# Output:
+# - heading "Example Domain" [ref=e1] [level=1]
+# - button "Submit" [ref=e2]
+# - textbox "Email" [ref=e3]
+# - link "Learn more" [ref=e4]
+# 2. Use refs to interact
+agent-browser click @e2                   # Click the button
+agent-browser fill @e3 "test@example.com" # Fill the textbox
+agent-browser get text @e1                # Get heading text
+agent-browser hover @e4                   # Hover the link
+```
+**Why use refs?**
+- **Deterministic**: Ref points to exact element from snapshot
+- **Fast**: No DOM re-query needed
+- **AI-friendly**: Snapshot + ref workflow is optimal for LLMs
+### CSS Selectors
 ```bash
-# CSS
 agent-browser click "#id"
 agent-browser click ".class"
 agent-browser click "div > button"
+```
-# Text
-agent-browser click "text=Submit"
+### Text & XPath
-# XPath
+```bash
+agent-browser click "text=Submit"
 agent-browser click "xpath=//button"
+```
+### Semantic Locators
-# Semantic (recommended)
+```bash
 agent-browser find role button click --name "Submit"
 agent-browser find label "Email" fill "test@test.com"
 ```
@@ -267,8 +306,26 @@ Use `--json` for machine-readable output:
 ```bash
 agent-browser snapshot --json
-agent-browser get text "h1" --json
-agent-browser is visible ".modal" --json
+# Returns: {"success":true,"data":{"snapshot":"...","refs":{"e1":{"role":"heading","name":"Title"},...}}}
+agent-browser get text @e1 --json
+agent-browser is visible @e2 --json
+```
+### Optimal AI Workflow
+```bash
+# 1. Navigate and get snapshot
+agent-browser open example.com
+agent-browser snapshot --json    # AI parses tree and refs
+# 2. AI identifies target refs from snapshot
+# 3. Execute actions using refs
+agent-browser click @e2
+agent-browser fill @e3 "input text"
+# 4. Get new snapshot if page changed
+agent-browser snapshot --json
 ```
 ## License

package/benchmark/benchmark.ts ADDED Viewed

@@ -0,0 +1,521 @@
+#!/usr/bin/env npx tsx
+/**
+ * Benchmark: agent-browser vs playwright-mcp
+ *
+ * Measures:
+ * - Speed: cold start, navigation, click, snapshot operations
+ * - Context usage: output/response size in bytes and estimated tokens
+ */
+import { spawn, execSync, ChildProcess } from 'child_process';
+import * as readline from 'readline';
+// ============================================================================
+// Configuration
+// ============================================================================
+const TEST_URL = 'https://example.com';
+const ITERATIONS = 3;
+interface BenchmarkResult {
+  operation: string;
+  tool: string;
+  timeMs: number;
+  outputBytes: number;
+  estimatedTokens: number;
+}
+const results: BenchmarkResult[] = [];
+// Estimate tokens (~4 chars per token for English text)
+function estimateTokens(text: string): number {
+  return Math.ceil(text.length / 4);
+}
+function formatBytes(bytes: number): string {
+  if (bytes < 1024) return `${bytes} B`;
+  if (bytes < 1024 * 1024) return `${(bytes / 1024).toFixed(1)} KB`;
+  return `${(bytes / (1024 * 1024)).toFixed(1)} MB`;
+}
+// ============================================================================
+// Agent-Browser Benchmarks
+// ============================================================================
+async function runAgentBrowser(args: string[]): Promise<{ timeMs: number; output: string }> {
+  const start = performance.now();
+  return new Promise((resolve, reject) => {
+    const proc = spawn('node', ['./dist/index.js', '--session', 'benchmark', '--json', ...args], {
+      cwd: process.cwd(),
+      env: { ...process.env },
+    });
+    let output = '';
+    let stderr = '';
+    proc.stdout.on('data', (data) => { output += data.toString(); });
+    proc.stderr.on('data', (data) => { stderr += data.toString(); });
+    proc.on('close', (code) => {
+      const timeMs = performance.now() - start;
+      if (code !== 0 && !output.includes('"success"')) {
+        reject(new Error(`agent-browser failed: ${stderr || output}`));
+      } else {
+        resolve({ timeMs, output: output.trim() });
+      }
+    });
+    proc.on('error', reject);
+  });
+}
+async function benchmarkAgentBrowser(): Promise<void> {
+  console.log('\n📦 Benchmarking agent-browser...\n');
+  // Clean up any existing session
+  try {
+    await runAgentBrowser(['close']);
+  } catch {
+    // Ignore - session might not exist
+  }
+  // Wait a bit for cleanup
+  await new Promise(r => setTimeout(r, 500));
+  // Cold start (includes daemon startup + browser launch + navigation)
+  console.log('  ⏱️  Cold start (navigate)...');
+  const coldStart = await runAgentBrowser(['open', TEST_URL]);
+  results.push({
+    operation: 'cold_start_navigate',
+    tool: 'agent-browser',
+    timeMs: coldStart.timeMs,
+    outputBytes: coldStart.output.length,
+    estimatedTokens: estimateTokens(coldStart.output),
+  });
+  console.log(`     ${coldStart.timeMs.toFixed(0)}ms, ${formatBytes(coldStart.output.length)}`);
+  // Warm operations
+  for (let i = 0; i < ITERATIONS; i++) {
+    // Navigate (warm)
+    console.log(`  ⏱️  Navigate (warm, iter ${i + 1})...`);
+    const nav = await runAgentBrowser(['open', TEST_URL]);
+    results.push({
+      operation: 'navigate_warm',
+      tool: 'agent-browser',
+      timeMs: nav.timeMs,
+      outputBytes: nav.output.length,
+      estimatedTokens: estimateTokens(nav.output),
+    });
+    console.log(`     ${nav.timeMs.toFixed(0)}ms, ${formatBytes(nav.output.length)}`);
+    // Snapshot
+    console.log(`  ⏱️  Snapshot (iter ${i + 1})...`);
+    const snapshot = await runAgentBrowser(['snapshot']);
+    results.push({
+      operation: 'snapshot',
+      tool: 'agent-browser',
+      timeMs: snapshot.timeMs,
+      outputBytes: snapshot.output.length,
+      estimatedTokens: estimateTokens(snapshot.output),
+    });
+    console.log(`     ${snapshot.timeMs.toFixed(0)}ms, ${formatBytes(snapshot.output.length)}`);
+    // Get title
+    console.log(`  ⏱️  Get title (iter ${i + 1})...`);
+    const title = await runAgentBrowser(['get', 'title']);
+    results.push({
+      operation: 'get_title',
+      tool: 'agent-browser',
+      timeMs: title.timeMs,
+      outputBytes: title.output.length,
+      estimatedTokens: estimateTokens(title.output),
+    });
+    console.log(`     ${title.timeMs.toFixed(0)}ms, ${formatBytes(title.output.length)}`);
+    // Get URL
+    console.log(`  ⏱️  Get URL (iter ${i + 1})...`);
+    const url = await runAgentBrowser(['get', 'url']);
+    results.push({
+      operation: 'get_url',
+      tool: 'agent-browser',
+      timeMs: url.timeMs,
+      outputBytes: url.output.length,
+      estimatedTokens: estimateTokens(url.output),
+    });
+    console.log(`     ${url.timeMs.toFixed(0)}ms, ${formatBytes(url.output.length)}`);
+    // Click (on a link that exists on example.com)
+    console.log(`  ⏱️  Click link (iter ${i + 1})...`);
+    const click = await runAgentBrowser(['click', 'a']);
+    results.push({
+      operation: 'click',
+      tool: 'agent-browser',
+      timeMs: click.timeMs,
+      outputBytes: click.output.length,
+      estimatedTokens: estimateTokens(click.output),
+    });
+    console.log(`     ${click.timeMs.toFixed(0)}ms, ${formatBytes(click.output.length)}`);
+    // Navigate back for next iteration
+    await runAgentBrowser(['open', TEST_URL]);
+  }
+  // Screenshot
+  console.log('  ⏱️  Screenshot...');
+  const screenshot = await runAgentBrowser(['screenshot']);
+  results.push({
+    operation: 'screenshot',
+    tool: 'agent-browser',
+    timeMs: screenshot.timeMs,
+    outputBytes: screenshot.output.length,
+    estimatedTokens: estimateTokens(screenshot.output),
+  });
+  console.log(`     ${screenshot.timeMs.toFixed(0)}ms, ${formatBytes(screenshot.output.length)}`);
+  // Close
+  console.log('  ⏱️  Close...');
+  const close = await runAgentBrowser(['close']);
+  results.push({
+    operation: 'close',
+    tool: 'agent-browser',
+    timeMs: close.timeMs,
+    outputBytes: close.output.length,
+    estimatedTokens: estimateTokens(close.output),
+  });
+  console.log(`     ${close.timeMs.toFixed(0)}ms`);
+}
+// ============================================================================
+// Playwright-MCP Benchmarks
+// ============================================================================
+class MCPClient {
+  private proc: ChildProcess;
+  private rl: readline.Interface;
+  private responseBuffer: Map<number, { resolve: (v: any) => void; reject: (e: Error) => void }> = new Map();
+  private requestId = 0;
+  private ready = false;
+  constructor() {
+    this.proc = spawn('node', ['./opensrc/repos/github.com/microsoft/playwright-mcp/cli.js', '--headless'], {
+      cwd: process.cwd(),
+      stdio: ['pipe', 'pipe', 'pipe'],
+    });
+    this.rl = readline.createInterface({ input: this.proc.stdout! });
+    this.rl.on('line', (line) => {
+      try {
+        const msg = JSON.parse(line);
+        if (msg.id !== undefined && this.responseBuffer.has(msg.id)) {
+          const handler = this.responseBuffer.get(msg.id)!;
+          this.responseBuffer.delete(msg.id);
+          handler.resolve(msg);
+        }
+      } catch {
+        // Non-JSON output, ignore
+      }
+    });
+    this.proc.stderr?.on('data', (data) => {
+      // Debug output, ignore in benchmarks
+    });
+  }
+  async initialize(): Promise<{ timeMs: number; output: string }> {
+    const start = performance.now();
+    // Send initialize request
+    const initResult = await this.sendRequest('initialize', {
+      protocolVersion: '2024-11-05',
+      capabilities: {},
+      clientInfo: { name: 'benchmark', version: '1.0.0' },
+    });
+    // Send initialized notification
+    this.sendNotification('notifications/initialized', {});
+    const timeMs = performance.now() - start;
+    const output = JSON.stringify(initResult);
+    this.ready = true;
+    return { timeMs, output };
+  }
+  async callTool(name: string, args: Record<string, unknown>): Promise<{ timeMs: number; output: string }> {
+    const start = performance.now();
+    const result = await this.sendRequest('tools/call', { name, arguments: args });
+    const timeMs = performance.now() - start;
+    const output = JSON.stringify(result);
+    return { timeMs, output };
+  }
+  private sendRequest(method: string, params: Record<string, unknown>): Promise<any> {
+    const id = ++this.requestId;
+    const request = { jsonrpc: '2.0', id, method, params };
+    return new Promise((resolve, reject) => {
+      this.responseBuffer.set(id, { resolve, reject });
+      this.proc.stdin!.write(JSON.stringify(request) + '\n');
+      // Timeout after 30s
+      setTimeout(() => {
+        if (this.responseBuffer.has(id)) {
+          this.responseBuffer.delete(id);
+          reject(new Error(`Request timeout: ${method}`));
+        }
+      }, 30000);
+    });
+  }
+  private sendNotification(method: string, params: Record<string, unknown>): void {
+    const notification = { jsonrpc: '2.0', method, params };
+    this.proc.stdin!.write(JSON.stringify(notification) + '\n');
+  }
+  async close(): Promise<void> {
+    this.proc.kill();
+    this.rl.close();
+  }
+}
+async function benchmarkPlaywrightMCP(): Promise<void> {
+  console.log('\n📦 Benchmarking playwright-mcp...\n');
+  let client: MCPClient | null = null;
+  try {
+    // Cold start (includes server startup + initialization)
+    console.log('  ⏱️  Cold start (initialize + navigate)...');
+    const coldStartBegin = performance.now();
+    client = new MCPClient();
+    const init = await client.initialize();
+    // Navigate
+    const nav = await client.callTool('browser_navigate', { url: TEST_URL });
+    const coldStartTime = performance.now() - coldStartBegin;
+    const coldStartOutput = init.output + nav.output;
+    results.push({
+      operation: 'cold_start_navigate',
+      tool: 'playwright-mcp',
+      timeMs: coldStartTime,
+      outputBytes: coldStartOutput.length,
+      estimatedTokens: estimateTokens(coldStartOutput),
+    });
+    console.log(`     ${coldStartTime.toFixed(0)}ms, ${formatBytes(coldStartOutput.length)}`);
+    // Warm operations
+    for (let i = 0; i < ITERATIONS; i++) {
+      // Navigate (warm)
+      console.log(`  ⏱️  Navigate (warm, iter ${i + 1})...`);
+      const navWarm = await client.callTool('browser_navigate', { url: TEST_URL });
+      results.push({
+        operation: 'navigate_warm',
+        tool: 'playwright-mcp',
+        timeMs: navWarm.timeMs,
+        outputBytes: navWarm.output.length,
+        estimatedTokens: estimateTokens(navWarm.output),
+      });
+      console.log(`     ${navWarm.timeMs.toFixed(0)}ms, ${formatBytes(navWarm.output.length)}`);
+      // Snapshot
+      console.log(`  ⏱️  Snapshot (iter ${i + 1})...`);
+      const snapshot = await client.callTool('browser_snapshot', {});
+      results.push({
+        operation: 'snapshot',
+        tool: 'playwright-mcp',
+        timeMs: snapshot.timeMs,
+        outputBytes: snapshot.output.length,
+        estimatedTokens: estimateTokens(snapshot.output),
+      });
+      console.log(`     ${snapshot.timeMs.toFixed(0)}ms, ${formatBytes(snapshot.output.length)}`);
+      // Note: playwright-mcp doesn't have separate get_title/get_url tools
+      // Title and URL are included in snapshot, so we'll skip those
+      // Click
+      console.log(`  ⏱️  Click link (iter ${i + 1})...`);
+      // playwright-mcp uses ref from snapshot - we'll use a generic approach
+      const click = await client.callTool('browser_click', {
+        element: 'More information link',
+        ref: 'a'  // This might not work exactly the same way
+      });
+      results.push({
+        operation: 'click',
+        tool: 'playwright-mcp',
+        timeMs: click.timeMs,
+        outputBytes: click.output.length,
+        estimatedTokens: estimateTokens(click.output),
+      });
+      console.log(`     ${click.timeMs.toFixed(0)}ms, ${formatBytes(click.output.length)}`);
+      // Navigate back
+      await client.callTool('browser_navigate', { url: TEST_URL });
+    }
+    // Screenshot
+    console.log('  ⏱️  Screenshot...');
+    const screenshot = await client.callTool('browser_take_screenshot', {});
+    results.push({
+      operation: 'screenshot',
+      tool: 'playwright-mcp',
+      timeMs: screenshot.timeMs,
+      outputBytes: screenshot.output.length,
+      estimatedTokens: estimateTokens(screenshot.output),
+    });
+    console.log(`     ${screenshot.timeMs.toFixed(0)}ms, ${formatBytes(screenshot.output.length)}`);
+    // Close
+    console.log('  ⏱️  Close...');
+    const closeStart = performance.now();
+    const closeResult = await client.callTool('browser_close', {});
+    results.push({
+      operation: 'close',
+      tool: 'playwright-mcp',
+      timeMs: closeResult.timeMs,
+      outputBytes: closeResult.output.length,
+      estimatedTokens: estimateTokens(closeResult.output),
+    });
+    console.log(`     ${closeResult.timeMs.toFixed(0)}ms`);
+  } catch (error) {
+    console.error('  ❌ playwright-mcp benchmark failed:', error);
+  } finally {
+    if (client) {
+      await client.close();
+    }
+  }
+}
+// ============================================================================
+// Results Summary
+// ============================================================================
+function printResults(): void {
+  console.log('\n' + '='.repeat(80));
+  console.log('📊 BENCHMARK RESULTS');
+  console.log('='.repeat(80));
+  // Group by operation
+  const operations = [...new Set(results.map(r => r.operation))];
+  console.log('\n📈 Speed Comparison (average across iterations):\n');
+  console.log('| Operation           | agent-browser | playwright-mcp | Difference |');
+  console.log('|---------------------|---------------|----------------|------------|');
+  for (const op of operations) {
+    const agentResults = results.filter(r => r.operation === op && r.tool === 'agent-browser');
+    const mcpResults = results.filter(r => r.operation === op && r.tool === 'playwright-mcp');
+    const agentAvg = agentResults.length > 0
+      ? agentResults.reduce((sum, r) => sum + r.timeMs, 0) / agentResults.length
+      : null;
+    const mcpAvg = mcpResults.length > 0
+      ? mcpResults.reduce((sum, r) => sum + r.timeMs, 0) / mcpResults.length
+      : null;
+    const agentStr = agentAvg !== null ? `${agentAvg.toFixed(0)}ms`.padEnd(13) : 'N/A'.padEnd(13);
+    const mcpStr = mcpAvg !== null ? `${mcpAvg.toFixed(0)}ms`.padEnd(14) : 'N/A'.padEnd(14);
+    let diff = '';
+    if (agentAvg !== null && mcpAvg !== null) {
+      const ratio = agentAvg / mcpAvg;
+      if (ratio < 1) {
+        diff = `${((1 - ratio) * 100).toFixed(0)}% faster`;
+      } else if (ratio > 1) {
+        diff = `${((ratio - 1) * 100).toFixed(0)}% slower`;
+      } else {
+        diff = 'same';
+      }
+    }
+    console.log(`| ${op.padEnd(19)} | ${agentStr} | ${mcpStr} | ${diff.padEnd(10)} |`);
+  }
+  console.log('\n📦 Context Usage (output size for AI consumption):\n');
+  console.log('| Operation           | agent-browser      | playwright-mcp     |');
+  console.log('|---------------------|--------------------|--------------------|');
+  for (const op of operations) {
+    const agentResults = results.filter(r => r.operation === op && r.tool === 'agent-browser');
+    const mcpResults = results.filter(r => r.operation === op && r.tool === 'playwright-mcp');
+    const agentAvgBytes = agentResults.length > 0
+      ? agentResults.reduce((sum, r) => sum + r.outputBytes, 0) / agentResults.length
+      : null;
+    const agentAvgTokens = agentResults.length > 0
+      ? agentResults.reduce((sum, r) => sum + r.estimatedTokens, 0) / agentResults.length
+      : null;
+    const mcpAvgBytes = mcpResults.length > 0
+      ? mcpResults.reduce((sum, r) => sum + r.outputBytes, 0) / mcpResults.length
+      : null;
+    const mcpAvgTokens = mcpResults.length > 0
+      ? mcpResults.reduce((sum, r) => sum + r.estimatedTokens, 0) / mcpResults.length
+      : null;
+    const agentStr = agentAvgBytes !== null
+      ? `${formatBytes(agentAvgBytes)} (~${Math.round(agentAvgTokens!)} tok)`.padEnd(18)
+      : 'N/A'.padEnd(18);
+    const mcpStr = mcpAvgBytes !== null
+      ? `${formatBytes(mcpAvgBytes)} (~${Math.round(mcpAvgTokens!)} tok)`.padEnd(18)
+      : 'N/A'.padEnd(18);
+    console.log(`| ${op.padEnd(19)} | ${agentStr} | ${mcpStr} |`);
+  }
+  // Total context usage
+  const agentTotal = results.filter(r => r.tool === 'agent-browser');
+  const mcpTotal = results.filter(r => r.tool === 'playwright-mcp');
+  const agentTotalBytes = agentTotal.reduce((sum, r) => sum + r.outputBytes, 0);
+  const agentTotalTokens = agentTotal.reduce((sum, r) => sum + r.estimatedTokens, 0);
+  const mcpTotalBytes = mcpTotal.reduce((sum, r) => sum + r.outputBytes, 0);
+  const mcpTotalTokens = mcpTotal.reduce((sum, r) => sum + r.estimatedTokens, 0);
+  console.log('\n📊 Total Context Usage (all operations combined):');
+  console.log(`   agent-browser:  ${formatBytes(agentTotalBytes)} (~${agentTotalTokens} tokens)`);
+  console.log(`   playwright-mcp: ${formatBytes(mcpTotalBytes)} (~${mcpTotalTokens} tokens)`);
+  if (agentTotalBytes > 0 && mcpTotalBytes > 0) {
+    const ratio = agentTotalBytes / mcpTotalBytes;
+    if (ratio < 1) {
+      console.log(`   → agent-browser uses ${((1 - ratio) * 100).toFixed(0)}% less context`);
+    } else {
+      console.log(`   → playwright-mcp uses ${((1 - 1/ratio) * 100).toFixed(0)}% less context`);
+    }
+  }
+  console.log('\n' + '='.repeat(80));
+}
+// ============================================================================
+// Main
+// ============================================================================
+async function main(): Promise<void> {
+  console.log('🚀 Browser Automation Benchmark');
+  console.log(`   Testing against: ${TEST_URL}`);
+  console.log(`   Iterations: ${ITERATIONS}`);
+  console.log('='.repeat(80));
+  try {
+    // Build first
+    console.log('\n🔨 Building agent-browser...');
+    execSync('pnpm build', { cwd: process.cwd(), stdio: 'inherit' });
+    await benchmarkAgentBrowser();
+    await benchmarkPlaywrightMCP();
+    printResults();
+  } catch (error) {
+    console.error('\n❌ Benchmark failed:', error);
+    process.exit(1);
+  }
+}
+main();