@ulpi/browse 0.7.3 → 0.7.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +44 -22
- package/package.json +1 -1
- package/src/snapshot.ts +65 -12
package/README.md
CHANGED
|
@@ -10,26 +10,33 @@ Ten actions and you've burned **146K tokens — 73% of a 200K context window**
|
|
|
10
10
|
|
|
11
11
|
**Same 10 actions: ~11K tokens. 6% of context. 13x less than @playwright/mcp.**
|
|
12
12
|
|
|
13
|
-
## Benchmarks
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
|
22
|
-
|
|
23
|
-
|
|
|
24
|
-
|
|
|
25
|
-
|
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
13
|
+
## Benchmarks
|
|
14
|
+
|
|
15
|
+
### vs Agent Browser & Browser-Use (Token Cost)
|
|
16
|
+
|
|
17
|
+
Tested on 3 sites across multi-step browsing flows — navigate, snapshot, scroll, search, extract text:
|
|
18
|
+
|
|
19
|
+
**browse is 2.4-2.8x cheaper on tokens, 1.3-2.6x faster, and uses 7% of context vs 17-20%.**
|
|
20
|
+
|
|
21
|
+
| Tool | Total Tokens | Total Time | Context Used (200K) |
|
|
22
|
+
|------|-------------:|-----------:|--------------------:|
|
|
23
|
+
| **browse** | **14,134** | **28.5s** | **7.1%** |
|
|
24
|
+
| agent-browser | 39,414 | 36.2s | 19.7% |
|
|
25
|
+
| browser-use | 34,281 | 72.7s | 17.1% |
|
|
26
|
+
|
|
27
|
+
**Per site:**
|
|
28
|
+
|
|
29
|
+
| Site | browse tokens | agent-browser tokens | browser-use tokens | browse time | agent-browser time | browser-use time |
|
|
30
|
+
|------|-------:|-------------:|------------:|------:|------:|------:|
|
|
31
|
+
| amazon.com | 7,531 | 11,596 | 20,508 | 10.1s | 12.9s | 21.9s |
|
|
32
|
+
| bbc.com | 4,032 | 24,861 | 8,827 | 9.8s | 13.5s | 29.9s |
|
|
33
|
+
| booking.com | 2,571 | 2,957 | 4,946 | 8.6s | 9.8s | 20.9s |
|
|
34
|
+
|
|
35
|
+
browse uses **2.4x fewer tokens** than browser-use and **2.8x fewer** than agent-browser — and completes **2.5x faster** than browser-use across the same workflows.
|
|
36
|
+
|
|
37
|
+
### vs @playwright/mcp (Architecture)
|
|
38
|
+
|
|
39
|
+
@playwright/mcp dumps the full accessibility snapshot on every action (navigate, click, type). browse returns ~15 tokens per action — the agent requests a snapshot only when it needs one:
|
|
33
40
|
|
|
34
41
|
| | @playwright/mcp | @ulpi/browse |
|
|
35
42
|
|---|---:|---:|
|
|
@@ -241,8 +248,10 @@ browse click @e52
|
|
|
241
248
|
|
|
242
249
|
### Snapshot & Refs
|
|
243
250
|
```
|
|
244
|
-
snapshot [-i] [-c] [-C] [-d N] [-s sel]
|
|
245
|
-
-i Interactive elements only (
|
|
251
|
+
snapshot [-i] [-f] [-V] [-c] [-C] [-d N] [-s sel]
|
|
252
|
+
-i Interactive elements only — terse flat list (minimal tokens)
|
|
253
|
+
-f Full — indented tree with props and children (use with -i)
|
|
254
|
+
-V Viewport — only elements visible in current viewport
|
|
246
255
|
-c Compact — remove empty structural nodes
|
|
247
256
|
-C Cursor-interactive — detect hidden clickable elements
|
|
248
257
|
-d N Limit tree depth
|
|
@@ -362,6 +371,19 @@ Inspired by and originally derived from the `/browse` skill in [gstack](https://
|
|
|
362
371
|
|
|
363
372
|
## Changelog
|
|
364
373
|
|
|
374
|
+
### v0.7.0 — Token Optimization
|
|
375
|
+
|
|
376
|
+
- `snapshot -i` now outputs terse flat list by default (no indentation, no props, names truncated to 30 chars)
|
|
377
|
+
- `-f` flag for full indented ARIA tree with props/children (the old `-i` behavior)
|
|
378
|
+
- `-V` flag for viewport-only snapshot — filters to elements visible in the current viewport (BBC: 189 → 28 elements, ~85% reduction)
|
|
379
|
+
- `browse version` / `--version` / `-V` — print CLI version
|
|
380
|
+
- 2.4-2.8x fewer tokens than browser-use and agent-browser across real-world benchmarks
|
|
381
|
+
|
|
382
|
+
### v0.4.0 — Video Recording
|
|
383
|
+
|
|
384
|
+
- `video start [dir]` | `video stop` | `video status` — compositor-level WebM recording
|
|
385
|
+
- Works with local and remote (CDP) browsers
|
|
386
|
+
|
|
365
387
|
### v0.3.0 — Headed Mode, Clipboard, DevTools
|
|
366
388
|
|
|
367
389
|
- `--headed` flag — run browser in visible mode for debugging and demos
|
package/package.json
CHANGED
package/src/snapshot.ts
CHANGED
|
@@ -446,27 +446,80 @@ export async function handleSnapshot(
|
|
|
446
446
|
output.push(outputLine);
|
|
447
447
|
}
|
|
448
448
|
|
|
449
|
-
// Viewport filter: remove elements
|
|
449
|
+
// Viewport filter: remove elements outside the visible viewport
|
|
450
|
+
// Uses a single page.evaluate() for speed — checking 189 locators individually is slow
|
|
450
451
|
if (opts.viewport) {
|
|
451
452
|
const vp = page.viewportSize();
|
|
452
453
|
if (vp) {
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
454
|
+
// Build a list of {ref, role, name} to check in the DOM
|
|
455
|
+
const checks = Array.from(refMap.keys()).map(ref => {
|
|
456
|
+
const line = output.find(l => l.includes(`@${ref} `));
|
|
457
|
+
const roleMatch = line?.match(/\[(\w+)\]/);
|
|
458
|
+
const nameMatch = line?.match(/"([^"]*)"/);
|
|
459
|
+
return { ref, role: roleMatch?.[1] || '', name: nameMatch?.[1] || '' };
|
|
460
|
+
});
|
|
461
|
+
|
|
462
|
+
const visibleRefs = await evalCtx.evaluate(
|
|
463
|
+
({ checks, vpHeight }) => {
|
|
464
|
+
const ROLE_TO_SELECTOR: Record<string, string> = {
|
|
465
|
+
link: 'a,[role="link"]',
|
|
466
|
+
button: 'button,[role="button"],input[type="button"],input[type="submit"]',
|
|
467
|
+
textbox: 'input:not([type="checkbox"]):not([type="radio"]):not([type="submit"]):not([type="button"]):not([type="hidden"]),textarea,[role="textbox"]',
|
|
468
|
+
checkbox: 'input[type="checkbox"],[role="checkbox"]',
|
|
469
|
+
radio: 'input[type="radio"],[role="radio"]',
|
|
470
|
+
combobox: 'select,[role="combobox"]',
|
|
471
|
+
searchbox: 'input[type="search"],[role="searchbox"]',
|
|
472
|
+
tab: '[role="tab"]',
|
|
473
|
+
switch: '[role="switch"]',
|
|
474
|
+
slider: 'input[type="range"],[role="slider"]',
|
|
475
|
+
menuitem: '[role="menuitem"]',
|
|
476
|
+
option: 'option,[role="option"]',
|
|
477
|
+
};
|
|
478
|
+
|
|
479
|
+
const visible = new Set<string>();
|
|
480
|
+
// Track which elements we've already matched per role+name
|
|
481
|
+
const roleCounts = new Map<string, number>();
|
|
482
|
+
|
|
483
|
+
for (const { ref, role, name } of checks) {
|
|
484
|
+
const selector = ROLE_TO_SELECTOR[role] || `[role="${role}"]`;
|
|
485
|
+
const all = document.querySelectorAll(selector);
|
|
486
|
+
const key = `${role}:${name}`;
|
|
487
|
+
const skip = roleCounts.get(key) || 0;
|
|
488
|
+
|
|
489
|
+
let matched = 0;
|
|
490
|
+
for (let i = 0; i < all.length; i++) {
|
|
491
|
+
const el = all[i] as HTMLElement;
|
|
492
|
+
// Match by accessible name (textContent or aria-label)
|
|
493
|
+
const accName = (el.getAttribute('aria-label') || el.textContent || '').trim();
|
|
494
|
+
// For terse mode, name may be truncated — check startsWith
|
|
495
|
+
const nameMatches = !name || accName === name ||
|
|
496
|
+
(name.endsWith('...') && accName.startsWith(name.slice(0, -3)));
|
|
497
|
+
if (!nameMatches) continue;
|
|
498
|
+
|
|
499
|
+
if (matched < skip) { matched++; continue; }
|
|
500
|
+
|
|
501
|
+
const rect = el.getBoundingClientRect();
|
|
502
|
+
if (rect.y + rect.height > 0 && rect.y < vpHeight) {
|
|
503
|
+
visible.add(ref);
|
|
504
|
+
}
|
|
505
|
+
matched++;
|
|
506
|
+
break;
|
|
460
507
|
}
|
|
461
|
-
|
|
462
|
-
toRemove.add(ref);
|
|
508
|
+
roleCounts.set(key, skip + 1);
|
|
463
509
|
}
|
|
464
|
-
|
|
510
|
+
return [...visible];
|
|
511
|
+
},
|
|
512
|
+
{ checks, vpHeight: vp.height }
|
|
465
513
|
);
|
|
514
|
+
|
|
515
|
+
const visibleSet = new Set(visibleRefs);
|
|
516
|
+
const toRemove = new Set<string>();
|
|
517
|
+
for (const ref of refMap.keys()) {
|
|
518
|
+
if (!visibleSet.has(ref)) toRemove.add(ref);
|
|
519
|
+
}
|
|
466
520
|
for (const ref of toRemove) {
|
|
467
521
|
refMap.delete(ref);
|
|
468
522
|
}
|
|
469
|
-
// Remove output lines for filtered refs
|
|
470
523
|
for (let i = output.length - 1; i >= 0; i--) {
|
|
471
524
|
const match = output[i].match(/@(e\d+)/);
|
|
472
525
|
if (match && toRemove.has(match[1])) {
|