agent-browser 0.1.3 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,322 @@
1
+ #!/usr/bin/env npx tsx
2
+ /**
3
+ * Comprehensive Benchmark: agent-browser vs playwright-mcp
4
+ *
5
+ * Tests realistic AI agent workflows on real websites.
6
+ */
7
+
8
+ import { execSync, spawn } from 'child_process';
9
+ import * as path from 'path';
10
+
11
+ interface Result {
12
+ tool: string;
13
+ workflow: string;
14
+ operation: string;
15
+ timeMs: number;
16
+ outputBytes: number;
17
+ }
18
+
19
+ const results: Result[] = [];
20
+
21
+ function formatTime(ms: number): string {
22
+ if (ms < 1000) return `${ms.toFixed(0)}ms`;
23
+ return `${(ms / 1000).toFixed(2)}s`;
24
+ }
25
+
26
+ function formatBytes(bytes: number): string {
27
+ if (bytes < 1024) return `${bytes}B`;
28
+ return `${(bytes / 1024).toFixed(1)}KB`;
29
+ }
30
+
31
+ // ============================================================================
32
+ // Agent-Browser Runner
33
+ // ============================================================================
34
+
35
+ function runAB(args: string[], session: string = 'bench'): { ms: number; output: string } {
36
+ const start = performance.now();
37
+ let output = '';
38
+ try {
39
+ output = execSync(`./bin/agent-browser ${args.join(' ')}`, {
40
+ stdio: 'pipe',
41
+ timeout: 30000,
42
+ env: { ...process.env, AGENT_BROWSER_SESSION: session },
43
+ }).toString();
44
+ } catch (e: any) {
45
+ output = e.stdout?.toString() || e.message || '';
46
+ }
47
+ return { ms: performance.now() - start, output };
48
+ }
49
+
50
+ // ============================================================================
51
+ // Playwright-MCP Runner
52
+ // ============================================================================
53
+
54
+ interface MCPClient {
55
+ call: (tool: string, args: Record<string, unknown>) => Promise<{ ms: number; output: string }>;
56
+ close: () => void;
57
+ }
58
+
59
+ async function createMCPClient(): Promise<MCPClient> {
60
+ const mcpPath = path.join(process.cwd(), 'opensrc/repos/github.com/microsoft/playwright-mcp/cli.js');
61
+ const proc = spawn('node', [mcpPath, '--headless'], { stdio: ['pipe', 'pipe', 'pipe'] });
62
+
63
+ let buffer = '';
64
+ let requestId = 0;
65
+ const pending = new Map<number, { resolve: (v: any) => void; reject: (e: Error) => void }>();
66
+
67
+ proc.stdout!.on('data', (data: Buffer) => {
68
+ buffer += data.toString();
69
+ const lines = buffer.split('\n');
70
+ buffer = lines.pop() || '';
71
+ for (const line of lines) {
72
+ if (!line.trim()) continue;
73
+ try {
74
+ const msg = JSON.parse(line);
75
+ if (msg.id !== undefined && pending.has(msg.id)) {
76
+ pending.get(msg.id)!.resolve(msg);
77
+ pending.delete(msg.id);
78
+ }
79
+ } catch {}
80
+ }
81
+ });
82
+
83
+ const send = (method: string, params: Record<string, unknown>): Promise<any> => {
84
+ const id = ++requestId;
85
+ return new Promise((resolve, reject) => {
86
+ pending.set(id, { resolve, reject });
87
+ proc.stdin!.write(JSON.stringify({ jsonrpc: '2.0', id, method, params }) + '\n');
88
+ setTimeout(() => {
89
+ if (pending.has(id)) {
90
+ pending.delete(id);
91
+ reject(new Error('Timeout'));
92
+ }
93
+ }, 30000);
94
+ });
95
+ };
96
+
97
+ // Initialize
98
+ await send('initialize', {
99
+ protocolVersion: '2024-11-05',
100
+ capabilities: {},
101
+ clientInfo: { name: 'benchmark', version: '1.0.0' },
102
+ });
103
+ proc.stdin!.write(JSON.stringify({ jsonrpc: '2.0', method: 'notifications/initialized', params: {} }) + '\n');
104
+
105
+ return {
106
+ call: async (tool: string, args: Record<string, unknown>) => {
107
+ const start = performance.now();
108
+ const result = await send('tools/call', { name: tool, arguments: args });
109
+ return { ms: performance.now() - start, output: JSON.stringify(result) };
110
+ },
111
+ close: () => proc.kill(),
112
+ };
113
+ }
114
+
115
+ // ============================================================================
116
+ // Workflows
117
+ // ============================================================================
118
+
119
+ interface Workflow {
120
+ name: string;
121
+ description: string;
122
+ steps: Array<{
123
+ name: string;
124
+ ab: string[];
125
+ mcp: { tool: string; args: Record<string, unknown> };
126
+ }>;
127
+ }
128
+
129
+ const workflows: Workflow[] = [
130
+ {
131
+ name: 'Wikipedia Research',
132
+ description: 'Navigate Wikipedia, read content, follow links',
133
+ steps: [
134
+ { name: 'Navigate', ab: ['open', 'https://en.wikipedia.org/wiki/Artificial_intelligence'], mcp: { tool: 'browser_navigate', args: { url: 'https://en.wikipedia.org/wiki/Artificial_intelligence' } } },
135
+ { name: 'Snapshot', ab: ['snapshot', '-i'], mcp: { tool: 'browser_snapshot', args: {} } },
136
+ { name: 'Get title', ab: ['get', 'title'], mcp: { tool: 'browser_snapshot', args: {} } },
137
+ { name: 'Snapshot 2', ab: ['snapshot', '-i'], mcp: { tool: 'browser_snapshot', args: {} } },
138
+ { name: 'Click link', ab: ['click', 'a[href="/wiki/Machine_learning"]'], mcp: { tool: 'browser_click', args: { element: 'Machine learning', ref: 'internal link' } } },
139
+ { name: 'Snapshot 3', ab: ['snapshot', '-i'], mcp: { tool: 'browser_snapshot', args: {} } },
140
+ ],
141
+ },
142
+ {
143
+ name: 'GitHub Browse',
144
+ description: 'Browse a GitHub repository',
145
+ steps: [
146
+ { name: 'Navigate', ab: ['open', 'https://github.com/anthropics/anthropic-cookbook'], mcp: { tool: 'browser_navigate', args: { url: 'https://github.com/anthropics/anthropic-cookbook' } } },
147
+ { name: 'Snapshot', ab: ['snapshot', '-i'], mcp: { tool: 'browser_snapshot', args: {} } },
148
+ { name: 'Get URL', ab: ['get', 'url'], mcp: { tool: 'browser_snapshot', args: {} } },
149
+ { name: 'Eval (stars)', ab: ['eval', 'document.querySelector("#repo-stars-counter-star")?.textContent'], mcp: { tool: 'browser_snapshot', args: {} } },
150
+ { name: 'Snapshot 2', ab: ['snapshot', '-i'], mcp: { tool: 'browser_snapshot', args: {} } },
151
+ ],
152
+ },
153
+ {
154
+ name: 'Hacker News',
155
+ description: 'Browse Hacker News front page',
156
+ steps: [
157
+ { name: 'Navigate', ab: ['open', 'https://news.ycombinator.com'], mcp: { tool: 'browser_navigate', args: { url: 'https://news.ycombinator.com' } } },
158
+ { name: 'Snapshot', ab: ['snapshot', '-i'], mcp: { tool: 'browser_snapshot', args: {} } },
159
+ { name: 'Eval (count)', ab: ['eval', 'document.querySelectorAll(".athing").length'], mcp: { tool: 'browser_snapshot', args: {} } },
160
+ { name: 'Snapshot 2', ab: ['snapshot', '-i'], mcp: { tool: 'browser_snapshot', args: {} } },
161
+ { name: 'Get title', ab: ['get', 'title'], mcp: { tool: 'browser_snapshot', args: {} } },
162
+ ],
163
+ },
164
+ ];
165
+
166
+ // ============================================================================
167
+ // Run Benchmarks
168
+ // ============================================================================
169
+
170
+ async function runAgentBrowser(workflow: Workflow, session: string): Promise<void> {
171
+ // Cleanup
172
+ try { runAB(['close'], session); } catch {}
173
+ await sleep(100);
174
+
175
+ for (const step of workflow.steps) {
176
+ const r = runAB(step.ab, session);
177
+ results.push({
178
+ tool: 'agent-browser',
179
+ workflow: workflow.name,
180
+ operation: step.name,
181
+ timeMs: r.ms,
182
+ outputBytes: r.output.length,
183
+ });
184
+ }
185
+
186
+ try { runAB(['close'], session); } catch {}
187
+ }
188
+
189
+ async function runPlaywrightMCP(workflow: Workflow): Promise<void> {
190
+ let client: MCPClient | null = null;
191
+ try {
192
+ client = await createMCPClient();
193
+
194
+ for (const step of workflow.steps) {
195
+ const r = await client.call(step.mcp.tool, step.mcp.args);
196
+ results.push({
197
+ tool: 'playwright-mcp',
198
+ workflow: workflow.name,
199
+ operation: step.name,
200
+ timeMs: r.ms,
201
+ outputBytes: r.output.length,
202
+ });
203
+ }
204
+
205
+ await client.call('browser_close', {});
206
+ } catch (e) {
207
+ console.log(` ⚠️ MCP error: ${e}`);
208
+ } finally {
209
+ client?.close();
210
+ }
211
+ }
212
+
213
+ // ============================================================================
214
+ // Reporting
215
+ // ============================================================================
216
+
217
+ function printResults(): void {
218
+ console.log('\n' + '═'.repeat(80));
219
+ console.log('📊 DETAILED RESULTS');
220
+ console.log('═'.repeat(80));
221
+
222
+ for (const workflow of workflows) {
223
+ console.log(`\n📋 ${workflow.name}`);
224
+ console.log('─'.repeat(70));
225
+ console.log('│ Operation │ agent-browser │ playwright-mcp │ Diff │');
226
+ console.log('├────────────────────┼───────────────┼────────────────┼───────────┤');
227
+
228
+ let abTotal = 0, mcpTotal = 0;
229
+
230
+ for (const step of workflow.steps) {
231
+ const ab = results.find(r => r.tool === 'agent-browser' && r.workflow === workflow.name && r.operation === step.name);
232
+ const mcp = results.find(r => r.tool === 'playwright-mcp' && r.workflow === workflow.name && r.operation === step.name);
233
+
234
+ const abTime = ab?.timeMs || 0;
235
+ const mcpTime = mcp?.timeMs || 0;
236
+ abTotal += abTime;
237
+ mcpTotal += mcpTime;
238
+
239
+ const diff = mcpTime - abTime;
240
+ const diffStr = diff > 0 ? `+${formatTime(diff)}` : formatTime(diff);
241
+
242
+ console.log(`│ ${step.name.padEnd(18)} │ ${formatTime(abTime).padEnd(13)} │ ${formatTime(mcpTime).padEnd(14)} │ ${diffStr.padEnd(9)} │`);
243
+ }
244
+
245
+ console.log('├────────────────────┼───────────────┼────────────────┼───────────┤');
246
+ const totalDiff = mcpTotal - abTotal;
247
+ const totalDiffStr = totalDiff > 0 ? `+${formatTime(totalDiff)}` : formatTime(totalDiff);
248
+ console.log(`│ ${'TOTAL'.padEnd(18)} │ ${formatTime(abTotal).padEnd(13)} │ ${formatTime(mcpTotal).padEnd(14)} │ ${totalDiffStr.padEnd(9)} │`);
249
+ console.log('└────────────────────┴───────────────┴────────────────┴───────────┘');
250
+ }
251
+
252
+ // Summary
253
+ const abTotalAll = results.filter(r => r.tool === 'agent-browser').reduce((s, r) => s + r.timeMs, 0);
254
+ const mcpTotalAll = results.filter(r => r.tool === 'playwright-mcp').reduce((s, r) => s + r.timeMs, 0);
255
+ const abOps = results.filter(r => r.tool === 'agent-browser').length;
256
+ const mcpOps = results.filter(r => r.tool === 'playwright-mcp').length;
257
+
258
+ console.log('\n' + '═'.repeat(80));
259
+ console.log('📈 SUMMARY');
260
+ console.log('═'.repeat(80));
261
+ console.log(`\n Workflows tested: ${workflows.length}`);
262
+ console.log(` Total operations: ${abOps} (agent-browser), ${mcpOps} (playwright-mcp)`);
263
+ console.log(`\n agent-browser total: ${formatTime(abTotalAll)} (${(abTotalAll / abOps).toFixed(0)}ms avg/op)`);
264
+ console.log(` playwright-mcp total: ${formatTime(mcpTotalAll)} (${(mcpTotalAll / mcpOps).toFixed(0)}ms avg/op)`);
265
+
266
+ if (abTotalAll < mcpTotalAll) {
267
+ console.log(`\n ✅ agent-browser is ${((mcpTotalAll - abTotalAll) / 1000).toFixed(2)}s faster overall`);
268
+ } else {
269
+ console.log(`\n ⏱️ playwright-mcp is ${((abTotalAll - mcpTotalAll) / 1000).toFixed(2)}s faster overall`);
270
+ }
271
+
272
+ // Context usage
273
+ const abBytes = results.filter(r => r.tool === 'agent-browser').reduce((s, r) => s + r.outputBytes, 0);
274
+ const mcpBytes = results.filter(r => r.tool === 'playwright-mcp').reduce((s, r) => s + r.outputBytes, 0);
275
+
276
+ console.log(`\n Context usage:`);
277
+ console.log(` agent-browser: ${formatBytes(abBytes)} (~${Math.ceil(abBytes / 4)} tokens)`);
278
+ console.log(` playwright-mcp: ${formatBytes(mcpBytes)} (~${Math.ceil(mcpBytes / 4)} tokens)`);
279
+
280
+ console.log('\n' + '═'.repeat(80));
281
+ }
282
+
283
+ function sleep(ms: number): Promise<void> {
284
+ return new Promise(r => setTimeout(r, ms));
285
+ }
286
+
287
+ // ============================================================================
288
+ // Main
289
+ // ============================================================================
290
+
291
+ async function main(): Promise<void> {
292
+ console.log('═'.repeat(80));
293
+ console.log('🚀 COMPREHENSIVE BENCHMARK: agent-browser vs playwright-mcp');
294
+ console.log('═'.repeat(80));
295
+ console.log('\nWorkflows:');
296
+ for (const w of workflows) {
297
+ console.log(` • ${w.name}: ${w.description} (${w.steps.length} steps)`);
298
+ }
299
+
300
+ console.log('\n🔨 Building...');
301
+ execSync('pnpm build', { cwd: process.cwd(), stdio: 'inherit' });
302
+
303
+ for (const workflow of workflows) {
304
+ console.log(`\n${'─'.repeat(80)}`);
305
+ console.log(`📋 Running: ${workflow.name}`);
306
+ console.log('─'.repeat(80));
307
+
308
+ console.log('\n agent-browser:');
309
+ await runAgentBrowser(workflow, `ab-${workflow.name.toLowerCase().replace(/\s+/g, '-')}`);
310
+ const abTime = results.filter(r => r.tool === 'agent-browser' && r.workflow === workflow.name).reduce((s, r) => s + r.timeMs, 0);
311
+ console.log(` ✓ Completed in ${formatTime(abTime)}`);
312
+
313
+ console.log('\n playwright-mcp:');
314
+ await runPlaywrightMCP(workflow);
315
+ const mcpTime = results.filter(r => r.tool === 'playwright-mcp' && r.workflow === workflow.name).reduce((s, r) => s + r.timeMs, 0);
316
+ console.log(` ✓ Completed in ${formatTime(mcpTime)}`);
317
+ }
318
+
319
+ printResults();
320
+ }
321
+
322
+ main().catch(console.error);
package/bin/agent-browser CHANGED
Binary file
@@ -1 +1 @@
1
- {"version":3,"file":"actions.d.ts","sourceRoot":"","sources":["../src/actions.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,cAAc,CAAC;AACnD,OAAO,KAAK,EACV,OAAO,EACP,QAAQ,EAoGT,MAAM,YAAY,CAAC;AAQpB;;GAEG;AACH,wBAAsB,cAAc,CAAC,OAAO,EAAE,OAAO,EAAE,OAAO,EAAE,cAAc,GAAG,OAAO,CAAC,QAAQ,CAAC,CAiPjG"}
1
+ {"version":3,"file":"actions.d.ts","sourceRoot":"","sources":["../src/actions.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,cAAc,CAAC;AACnD,OAAO,KAAK,EACV,OAAO,EACP,QAAQ,EAoGT,MAAM,YAAY,CAAC;AASpB;;GAEG;AACH,wBAAsB,cAAc,CAAC,OAAO,EAAE,OAAO,EAAE,OAAO,EAAE,cAAc,GAAG,OAAO,CAAC,QAAQ,CAAC,CAiPjG"}
package/dist/actions.js CHANGED
@@ -260,8 +260,9 @@ async function handleNavigate(command, browser) {
260
260
  });
261
261
  }
262
262
  async function handleClick(command, browser) {
263
- const page = browser.getPage();
264
- await page.click(command.selector, {
263
+ // Support both refs (@e1) and regular selectors
264
+ const locator = browser.getLocator(command.selector);
265
+ await locator.click({
265
266
  button: command.button,
266
267
  clickCount: command.clickCount,
267
268
  delay: command.delay,
@@ -269,11 +270,11 @@ async function handleClick(command, browser) {
269
270
  return successResponse(command.id, { clicked: true });
270
271
  }
271
272
  async function handleType(command, browser) {
272
- const page = browser.getPage();
273
+ const locator = browser.getLocator(command.selector);
273
274
  if (command.clear) {
274
- await page.fill(command.selector, '');
275
+ await locator.fill('');
275
276
  }
276
- await page.type(command.selector, command.text, {
277
+ await locator.pressSequentially(command.text, {
277
278
  delay: command.delay,
278
279
  });
279
280
  return successResponse(command.id, { typed: true });
@@ -311,11 +312,21 @@ async function handleScreenshot(command, browser) {
311
312
  }
312
313
  }
313
314
  async function handleSnapshot(command, browser) {
314
- const page = browser.getPage();
315
- // Use ariaSnapshot which returns a string representation of the accessibility tree
316
- const snapshot = await page.locator(':root').ariaSnapshot();
315
+ // Use enhanced snapshot with refs and optional filtering
316
+ const { tree, refs } = await browser.getSnapshot({
317
+ interactive: command.interactive,
318
+ maxDepth: command.maxDepth,
319
+ compact: command.compact,
320
+ selector: command.selector,
321
+ });
322
+ // Simplify refs for output (just role and name)
323
+ const simpleRefs = {};
324
+ for (const [ref, data] of Object.entries(refs)) {
325
+ simpleRefs[ref] = { role: data.role, name: data.name };
326
+ }
317
327
  return successResponse(command.id, {
318
- snapshot: snapshot ?? 'Empty page',
328
+ snapshot: tree || 'Empty page',
329
+ refs: Object.keys(simpleRefs).length > 0 ? simpleRefs : undefined,
319
330
  });
320
331
  }
321
332
  async function handleEvaluate(command, browser) {
@@ -378,14 +389,14 @@ async function handleScroll(command, browser) {
378
389
  return successResponse(command.id, { scrolled: true });
379
390
  }
380
391
  async function handleSelect(command, browser) {
381
- const page = browser.getPage();
392
+ const locator = browser.getLocator(command.selector);
382
393
  const values = Array.isArray(command.values) ? command.values : [command.values];
383
- await page.selectOption(command.selector, values);
394
+ await locator.selectOption(values);
384
395
  return successResponse(command.id, { selected: values });
385
396
  }
386
397
  async function handleHover(command, browser) {
387
- const page = browser.getPage();
388
- await page.hover(command.selector);
398
+ const locator = browser.getLocator(command.selector);
399
+ await locator.hover();
389
400
  return successResponse(command.id, { hovered: true });
390
401
  }
391
402
  async function handleContent(command, browser) {
@@ -432,34 +443,34 @@ async function handleWindowNew(command, browser) {
432
443
  }
433
444
  // New handlers for enhanced Playwright parity
434
445
  async function handleFill(command, browser) {
435
- const frame = browser.getFrame();
436
- await frame.fill(command.selector, command.value);
446
+ const locator = browser.getLocator(command.selector);
447
+ await locator.fill(command.value);
437
448
  return successResponse(command.id, { filled: true });
438
449
  }
439
450
  async function handleCheck(command, browser) {
440
- const frame = browser.getFrame();
441
- await frame.check(command.selector);
451
+ const locator = browser.getLocator(command.selector);
452
+ await locator.check();
442
453
  return successResponse(command.id, { checked: true });
443
454
  }
444
455
  async function handleUncheck(command, browser) {
445
- const frame = browser.getFrame();
446
- await frame.uncheck(command.selector);
456
+ const locator = browser.getLocator(command.selector);
457
+ await locator.uncheck();
447
458
  return successResponse(command.id, { unchecked: true });
448
459
  }
449
460
  async function handleUpload(command, browser) {
450
- const frame = browser.getFrame();
461
+ const locator = browser.getLocator(command.selector);
451
462
  const files = Array.isArray(command.files) ? command.files : [command.files];
452
- await frame.setInputFiles(command.selector, files);
463
+ await locator.setInputFiles(files);
453
464
  return successResponse(command.id, { uploaded: files });
454
465
  }
455
466
  async function handleDoubleClick(command, browser) {
456
- const frame = browser.getFrame();
457
- await frame.dblclick(command.selector);
467
+ const locator = browser.getLocator(command.selector);
468
+ await locator.dblclick();
458
469
  return successResponse(command.id, { clicked: true });
459
470
  }
460
471
  async function handleFocus(command, browser) {
461
- const frame = browser.getFrame();
462
- await frame.focus(command.selector);
472
+ const locator = browser.getLocator(command.selector);
473
+ await locator.focus();
463
474
  return successResponse(command.id, { focused: true });
464
475
  }
465
476
  async function handleDrag(command, browser) {
@@ -702,28 +713,28 @@ async function handleTitle(command, browser) {
702
713
  return successResponse(command.id, { title });
703
714
  }
704
715
  async function handleGetAttribute(command, browser) {
705
- const page = browser.getPage();
706
- const value = await page.getAttribute(command.selector, command.attribute);
716
+ const locator = browser.getLocator(command.selector);
717
+ const value = await locator.getAttribute(command.attribute);
707
718
  return successResponse(command.id, { attribute: command.attribute, value });
708
719
  }
709
720
  async function handleGetText(command, browser) {
710
- const page = browser.getPage();
711
- const text = await page.textContent(command.selector);
721
+ const locator = browser.getLocator(command.selector);
722
+ const text = await locator.textContent();
712
723
  return successResponse(command.id, { text });
713
724
  }
714
725
  async function handleIsVisible(command, browser) {
715
- const page = browser.getPage();
716
- const visible = await page.isVisible(command.selector);
726
+ const locator = browser.getLocator(command.selector);
727
+ const visible = await locator.isVisible();
717
728
  return successResponse(command.id, { visible });
718
729
  }
719
730
  async function handleIsEnabled(command, browser) {
720
- const page = browser.getPage();
721
- const enabled = await page.isEnabled(command.selector);
731
+ const locator = browser.getLocator(command.selector);
732
+ const enabled = await locator.isEnabled();
722
733
  return successResponse(command.id, { enabled });
723
734
  }
724
735
  async function handleIsChecked(command, browser) {
725
- const page = browser.getPage();
726
- const checked = await page.isChecked(command.selector);
736
+ const locator = browser.getLocator(command.selector);
737
+ const checked = await locator.isChecked();
727
738
  return successResponse(command.id, { checked });
728
739
  }
729
740
  async function handleCount(command, browser) {