@ulpi/browse 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/BENCHMARKS.md +222 -0
- package/LICENSE +21 -0
- package/README.md +324 -0
- package/bin/browse.ts +2 -0
- package/package.json +54 -0
- package/skill/SKILL.md +301 -0
- package/src/browser-manager.ts +687 -0
- package/src/buffers.ts +81 -0
- package/src/bun.d.ts +47 -0
- package/src/cli.ts +442 -0
- package/src/commands/meta.ts +358 -0
- package/src/commands/read.ts +304 -0
- package/src/commands/write.ts +259 -0
- package/src/constants.ts +12 -0
- package/src/diff.d.ts +12 -0
- package/src/install-skill.ts +98 -0
- package/src/server.ts +325 -0
- package/src/session-manager.ts +121 -0
- package/src/snapshot.ts +497 -0
- package/src/types.ts +12 -0
package/BENCHMARKS.md
ADDED
|
@@ -0,0 +1,222 @@
|
|
|
1
|
+
# Benchmarks: @ulpi/browse vs @playwright/mcp
|
|
2
|
+
|
|
3
|
+
Measured 2026-03-15. Same machine, same Chromium, same pages.
|
|
4
|
+
|
|
5
|
+
## What Gets Dumped Into the AI Context
|
|
6
|
+
|
|
7
|
+
**@playwright/mcp**: Every `browser_navigate`, `browser_click`, or `browser_type` returns the **full accessibility snapshot** — automatically, whether you need it or not.
|
|
8
|
+
|
|
9
|
+
**@ulpi/browse**: `goto` returns a one-liner (`"Navigated to ... (200)"`). The agent **chooses** what to request: `text`, `snapshot -i`, `links`, `forms`, etc.
|
|
10
|
+
|
|
11
|
+
## Per-Page Token Cost
|
|
12
|
+
|
|
13
|
+
| Site | Page | @playwright/mcp navigate | browse snapshot -i | Ratio |
|
|
14
|
+
|------|------|-------------------------:|-------------------:|------:|
|
|
15
|
+
| mumzworld.com | Homepage | ~51,151 | ~15,072 | **3x** |
|
|
16
|
+
| mumzworld.com | Search | ~13,860 | ~3,614 | **4x** |
|
|
17
|
+
| mumzworld.com | PDP | ~10,071 | ~3,084 | **3x** |
|
|
18
|
+
| amazon.com | Homepage | ~10,431 | ~2,150 | **5x** |
|
|
19
|
+
| amazon.com | Search | ~19,458 | ~3,644 | **5x** |
|
|
20
|
+
| ebay.com | Homepage | ~4,641 | ~1,557 | **3x** |
|
|
21
|
+
| ebay.com | Search | ~35,929 | ~7,088 | **5x** |
|
|
22
|
+
| ebay.com | PDP | ~1,294 | ~678 | **2x** |
|
|
23
|
+
| nike.com | Homepage | ~2,495 | ~816 | **3x** |
|
|
24
|
+
| nike.com | Search | ~7,998 | ~2,678 | **3x** |
|
|
25
|
+
| nike.com | PDP | ~3,034 | ~989 | **3x** |
|
|
26
|
+
| **TOTAL** | **11 pages** | **~160,362** | **~41,370** | **4x** |
|
|
27
|
+
|
|
28
|
+
`browse goto` alone costs ~10-25 tokens per navigation (one-liner confirmation). The agent requests a snapshot only when it needs to see the page.
|
|
29
|
+
|
|
30
|
+
## 10-Step Agent Session
|
|
31
|
+
|
|
32
|
+
A typical flow: navigate, snapshot, click, snapshot, fill, click, snapshot, check result.
|
|
33
|
+
|
|
34
|
+
| | @playwright/mcp | @ulpi/browse |
|
|
35
|
+
|---|---:|---:|
|
|
36
|
+
| Tokens per navigate/click/type | ~14,578 (auto-dumped) | ~15 (one-liner) |
|
|
37
|
+
| 10 actions total | ~145,780 | ~11,388 (3 snapshots + 7 actions) |
|
|
38
|
+
| Context consumed (200K window) | 73% | 6% |
|
|
39
|
+
|
|
40
|
+
## Raw Data
|
|
41
|
+
|
|
42
|
+
### mumzworld.com
|
|
43
|
+
|
|
44
|
+
#### Homepage
|
|
45
|
+
|
|
46
|
+
| Approach | Size | ~Tokens | Notes |
|
|
47
|
+
|----------|-----:|--------:|-------|
|
|
48
|
+
| @playwright/mcp navigate | 199.8 KB | ~51,151 | Full snapshot auto-dumped |
|
|
49
|
+
| Playwright page.content() | 3.50 MB | ~917,905 | Raw HTML |
|
|
50
|
+
| browse goto | 44 B | ~11 | One-liner |
|
|
51
|
+
| browse text | 19.4 KB | ~4,971 | Clean visible text |
|
|
52
|
+
| browse snapshot | 159.5 KB | ~40,844 | Full tree + @refs |
|
|
53
|
+
| **browse snapshot -i** | **58.9 KB** | **~15,072** | **Interactive + @refs** |
|
|
54
|
+
| browse links | 62.2 KB | ~15,913 | Text → URL |
|
|
55
|
+
| browse forms | 213 B | ~53 | Structured JSON |
|
|
56
|
+
|
|
57
|
+
#### Search
|
|
58
|
+
|
|
59
|
+
| Approach | Size | ~Tokens | Notes |
|
|
60
|
+
|----------|-----:|--------:|-------|
|
|
61
|
+
| @playwright/mcp navigate | 54.1 KB | ~13,860 | Full snapshot auto-dumped |
|
|
62
|
+
| Playwright page.content() | 1.08 MB | ~283,764 | Raw HTML |
|
|
63
|
+
| browse goto | 66 B | ~17 | One-liner |
|
|
64
|
+
| browse text | 6.6 KB | ~1,687 | Clean visible text |
|
|
65
|
+
| browse snapshot | 49.2 KB | ~12,585 | Full tree + @refs |
|
|
66
|
+
| **browse snapshot -i** | **14.1 KB** | **~3,614** | **Interactive + @refs** |
|
|
67
|
+
| browse links | 14.0 KB | ~3,587 | Text → URL |
|
|
68
|
+
| browse forms | 305 B | ~76 | Structured JSON |
|
|
69
|
+
|
|
70
|
+
#### PDP
|
|
71
|
+
|
|
72
|
+
| Approach | Size | ~Tokens | Notes |
|
|
73
|
+
|----------|-----:|--------:|-------|
|
|
74
|
+
| @playwright/mcp navigate | 39.3 KB | ~10,071 | Full snapshot auto-dumped |
|
|
75
|
+
| Playwright page.content() | 1.48 MB | ~387,614 | Raw HTML |
|
|
76
|
+
| browse goto | 101 B | ~25 | One-liner |
|
|
77
|
+
| browse text | 6.9 KB | ~1,767 | Clean visible text |
|
|
78
|
+
| browse snapshot | 33.2 KB | ~8,508 | Full tree + @refs |
|
|
79
|
+
| **browse snapshot -i** | **12.0 KB** | **~3,084** | **Interactive + @refs** |
|
|
80
|
+
| browse links | 12.5 KB | ~3,203 | Text → URL |
|
|
81
|
+
| browse forms | 545 B | ~136 | Structured JSON |
|
|
82
|
+
|
|
83
|
+
### amazon.com
|
|
84
|
+
|
|
85
|
+
#### Homepage
|
|
86
|
+
|
|
87
|
+
| Approach | Size | ~Tokens | Notes |
|
|
88
|
+
|----------|-----:|--------:|-------|
|
|
89
|
+
| @playwright/mcp navigate | 40.7 KB | ~10,431 | Full snapshot auto-dumped |
|
|
90
|
+
| Playwright page.content() | 584.2 KB | ~149,544 | Raw HTML |
|
|
91
|
+
| browse goto | 41 B | ~10 | One-liner |
|
|
92
|
+
| browse text | 4.7 KB | ~1,192 | Clean visible text |
|
|
93
|
+
| browse snapshot | 19.6 KB | ~5,008 | Full tree + @refs |
|
|
94
|
+
| **browse snapshot -i** | **8.4 KB** | **~2,150** | **Interactive + @refs** |
|
|
95
|
+
| browse links | 38.5 KB | ~9,853 | Text → URL |
|
|
96
|
+
| browse forms | 4.2 KB | ~1,075 | Structured JSON |
|
|
97
|
+
|
|
98
|
+
#### Search
|
|
99
|
+
|
|
100
|
+
| Approach | Size | ~Tokens | Notes |
|
|
101
|
+
|----------|-----:|--------:|-------|
|
|
102
|
+
| @playwright/mcp navigate | 76.0 KB | ~19,458 | Full snapshot auto-dumped |
|
|
103
|
+
| Playwright page.content() | 673.5 KB | ~172,417 | Raw HTML |
|
|
104
|
+
| browse goto | 59 B | ~15 | One-liner |
|
|
105
|
+
| browse text | 8.1 KB | ~2,069 | Clean visible text |
|
|
106
|
+
| browse snapshot | 29.1 KB | ~7,446 | Full tree + @refs |
|
|
107
|
+
| **browse snapshot -i** | **14.2 KB** | **~3,644** | **Interactive + @refs** |
|
|
108
|
+
| browse links | 49.7 KB | ~12,712 | Text → URL |
|
|
109
|
+
| browse forms | 5.4 KB | ~1,377 | Structured JSON |
|
|
110
|
+
|
|
111
|
+
### ebay.com
|
|
112
|
+
|
|
113
|
+
#### Homepage
|
|
114
|
+
|
|
115
|
+
| Approach | Size | ~Tokens | Notes |
|
|
116
|
+
|----------|-----:|--------:|-------|
|
|
117
|
+
| @playwright/mcp navigate | 18.1 KB | ~4,641 | Full snapshot auto-dumped |
|
|
118
|
+
| Playwright page.content() | 1.70 MB | ~445,637 | Raw HTML |
|
|
119
|
+
| browse goto | 39 B | ~10 | One-liner |
|
|
120
|
+
| browse text | 4.9 KB | ~1,245 | Clean visible text |
|
|
121
|
+
| browse snapshot | 10.6 KB | ~2,715 | Full tree + @refs |
|
|
122
|
+
| **browse snapshot -i** | **6.1 KB** | **~1,557** | **Interactive + @refs** |
|
|
123
|
+
| browse links | 29.4 KB | ~7,533 | Text → URL |
|
|
124
|
+
| browse forms | 3.9 KB | ~1,006 | Structured JSON |
|
|
125
|
+
|
|
126
|
+
#### Search
|
|
127
|
+
|
|
128
|
+
| Approach | Size | ~Tokens | Notes |
|
|
129
|
+
|----------|-----:|--------:|-------|
|
|
130
|
+
| @playwright/mcp navigate | 140.3 KB | ~35,929 | Full snapshot auto-dumped |
|
|
131
|
+
| Playwright page.content() | 1.26 MB | ~331,247 | Raw HTML |
|
|
132
|
+
| browse goto | 69 B | ~17 | One-liner |
|
|
133
|
+
| browse text | 17.7 KB | ~4,526 | Clean visible text |
|
|
134
|
+
| browse snapshot | 57.6 KB | ~14,750 | Full tree + @refs |
|
|
135
|
+
| **browse snapshot -i** | **27.7 KB** | **~7,088** | **Interactive + @refs** |
|
|
136
|
+
| browse links | 61.9 KB | ~15,851 | Text → URL |
|
|
137
|
+
| browse forms | 4.4 KB | ~1,124 | Structured JSON |
|
|
138
|
+
|
|
139
|
+
#### PDP
|
|
140
|
+
|
|
141
|
+
| Approach | Size | ~Tokens | Notes |
|
|
142
|
+
|----------|-----:|--------:|-------|
|
|
143
|
+
| @playwright/mcp navigate | 5.1 KB | ~1,294 | Full snapshot auto-dumped |
|
|
144
|
+
| Playwright page.content() | 1.07 MB | ~279,725 | Raw HTML |
|
|
145
|
+
| browse goto | 56 B | ~14 | One-liner |
|
|
146
|
+
| browse text | 1.2 KB | ~315 | Clean visible text |
|
|
147
|
+
| browse snapshot | 3.5 KB | ~889 | Full tree + @refs |
|
|
148
|
+
| **browse snapshot -i** | **2.6 KB** | **~678** | **Interactive + @refs** |
|
|
149
|
+
| browse links | 7.6 KB | ~1,934 | Text → URL |
|
|
150
|
+
| browse forms | 3.9 KB | ~1,006 | Structured JSON |
|
|
151
|
+
|
|
152
|
+
### nike.com
|
|
153
|
+
|
|
154
|
+
#### Homepage
|
|
155
|
+
|
|
156
|
+
| Approach | Size | ~Tokens | Notes |
|
|
157
|
+
|----------|-----:|--------:|-------|
|
|
158
|
+
| @playwright/mcp navigate | 9.7 KB | ~2,495 | Full snapshot auto-dumped |
|
|
159
|
+
| Playwright page.content() | 700.4 KB | ~179,315 | Raw HTML |
|
|
160
|
+
| browse goto | 39 B | ~10 | One-liner |
|
|
161
|
+
| browse text | 2.4 KB | ~607 | Clean visible text |
|
|
162
|
+
| browse snapshot | 5.1 KB | ~1,315 | Full tree + @refs |
|
|
163
|
+
| **browse snapshot -i** | **3.2 KB** | **~816** | **Interactive + @refs** |
|
|
164
|
+
| browse links | 30.2 KB | ~7,744 | Text → URL |
|
|
165
|
+
| browse forms | 1.3 KB | ~341 | Structured JSON |
|
|
166
|
+
|
|
167
|
+
#### Search
|
|
168
|
+
|
|
169
|
+
| Approach | Size | ~Tokens | Notes |
|
|
170
|
+
|----------|-----:|--------:|-------|
|
|
171
|
+
| @playwright/mcp navigate | 31.2 KB | ~7,998 | Full snapshot auto-dumped |
|
|
172
|
+
| Playwright page.content() | 1.08 MB | ~282,582 | Raw HTML |
|
|
173
|
+
| browse goto | 57 B | ~14 | One-liner |
|
|
174
|
+
| browse text | 5.9 KB | ~1,502 | Clean visible text |
|
|
175
|
+
| browse snapshot | 16.2 KB | ~4,152 | Full tree + @refs |
|
|
176
|
+
| **browse snapshot -i** | **10.5 KB** | **~2,678** | **Interactive + @refs** |
|
|
177
|
+
| browse links | 26.6 KB | ~6,798 | Text → URL |
|
|
178
|
+
| browse forms | 291 B | ~73 | Structured JSON |
|
|
179
|
+
|
|
180
|
+
#### PDP
|
|
181
|
+
|
|
182
|
+
| Approach | Size | ~Tokens | Notes |
|
|
183
|
+
|----------|-----:|--------:|-------|
|
|
184
|
+
| @playwright/mcp navigate | 11.9 KB | ~3,034 | Full snapshot auto-dumped |
|
|
185
|
+
| Playwright page.content() | 972.4 KB | ~248,945 | Raw HTML |
|
|
186
|
+
| browse goto | 81 B | ~20 | One-liner |
|
|
187
|
+
| browse text | 5.1 KB | ~1,313 | Clean visible text |
|
|
188
|
+
| browse snapshot | 9.0 KB | ~2,314 | Full tree + @refs |
|
|
189
|
+
| **browse snapshot -i** | **3.9 KB** | **~989** | **Interactive + @refs** |
|
|
190
|
+
| browse links | 24.2 KB | ~6,205 | Text → URL |
|
|
191
|
+
| browse forms | 283 B | ~71 | Structured JSON |
|
|
192
|
+
|
|
193
|
+
## Architectural Differences
|
|
194
|
+
|
|
195
|
+
| | @playwright/mcp | @ulpi/browse |
|
|
196
|
+
|---|---|---|
|
|
197
|
+
| **Navigate response** | Full snapshot (~5-50K tokens) | One-liner (~15 tokens) |
|
|
198
|
+
| **Click/type response** | Full snapshot (~5-50K tokens) | One-liner (~15 tokens) |
|
|
199
|
+
| **Agent controls output** | No — always gets full dump | Yes — requests what it needs |
|
|
200
|
+
| **Interactive-only filter** | No | `snapshot -i` |
|
|
201
|
+
| **Cursor-interactive detection** | No | `snapshot -C` (cursor:pointer, onclick, tabindex) |
|
|
202
|
+
| **Clean text extraction** | No | `text` command |
|
|
203
|
+
| **Form discovery** | No | `forms` command |
|
|
204
|
+
| **Link extraction** | No | `links` command |
|
|
205
|
+
| **Network/console logs** | console_messages only | `network` + `console` |
|
|
206
|
+
| **Performance timing** | No | `perf` command |
|
|
207
|
+
| **Cookies/storage** | No | `cookies` + `storage` |
|
|
208
|
+
| **Snapshot diff** | No | `snapshot-diff` |
|
|
209
|
+
| **Page text diff** | No | `diff <url1> <url2>` |
|
|
210
|
+
| **Responsive screenshots** | No | `responsive` (3 viewports) |
|
|
211
|
+
| **Persistent daemon** | No — new browser per session | Yes — ~100ms per command |
|
|
212
|
+
| **Crash recovery** | No | Auto-restart with safe retry |
|
|
213
|
+
| **Total commands** | ~15 tools | 40+ commands |
|
|
214
|
+
|
|
215
|
+
## Methodology
|
|
216
|
+
|
|
217
|
+
- Token estimates: ~4 chars per token (standard approximation)
|
|
218
|
+
- @playwright/mcp: simulated by calling `ariaSnapshot()` after navigation (identical to what the MCP server does internally)
|
|
219
|
+
- All pages loaded with `waitUntil: domcontentloaded` + 2.5s settle
|
|
220
|
+
- Pages returning < 200 bytes excluded (bot detection)
|
|
221
|
+
- Measured 2026-03-15, Playwright 1.58.2, Chromium headless
|
|
222
|
+
- Rerun: `bun run benchmark.ts`
|
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 Ciprian Hacman
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/README.md
ADDED
|
@@ -0,0 +1,324 @@
|
|
|
1
|
+
# @ulpi/browse
|
|
2
|
+
|
|
3
|
+
**The headless browser CLI built for AI agents — not humans.**
|
|
4
|
+
|
|
5
|
+
When AI agents browse the web, the bottleneck isn't Chromium — it's **what gets dumped into the context window**. [`@playwright/mcp`](https://github.com/microsoft/playwright-mcp) sends the full accessibility snapshot on every navigate, click, and keystroke. On a real e-commerce page, that's **~16,000 tokens per action** — automatically, whether the agent needs it or not.
|
|
6
|
+
|
|
7
|
+
Ten actions and you've burned **146K tokens — 73% of a 200K context window** — just on browser output. That leaves almost nothing for the agent to actually think.
|
|
8
|
+
|
|
9
|
+
`@ulpi/browse` flips this. Navigation returns 11 tokens. Clicks return 15 tokens. The agent requests a page snapshot **only when it needs one** — and can filter to interactive elements only, cutting another 2-6x.
|
|
10
|
+
|
|
11
|
+
**Same 10 actions: ~11K tokens. 6% of context. 13x less than @playwright/mcp.**
|
|
12
|
+
|
|
13
|
+
## Benchmarks (Measured)
|
|
14
|
+
|
|
15
|
+
Tested on 4 e-commerce sites (mumzworld, amazon, ebay, nike) across homepage, search results, and product detail pages ([raw data](BENCHMARKS.md)):
|
|
16
|
+
|
|
17
|
+
| Site | Page | @playwright/mcp navigate | browse snapshot -i | Reduction |
|
|
18
|
+
|------|------|-------------------------:|-------------------:|----------:|
|
|
19
|
+
| mumzworld.com | Homepage | ~51,151 | ~15,072 | **3x** |
|
|
20
|
+
| mumzworld.com | Search | ~13,860 | ~3,614 | **4x** |
|
|
21
|
+
| mumzworld.com | PDP | ~10,071 | ~3,084 | **3x** |
|
|
22
|
+
| amazon.com | Homepage | ~10,431 | ~2,150 | **5x** |
|
|
23
|
+
| amazon.com | Search | ~19,458 | ~3,644 | **5x** |
|
|
24
|
+
| ebay.com | Homepage | ~4,641 | ~1,557 | **3x** |
|
|
25
|
+
| ebay.com | Search | ~35,929 | ~7,088 | **5x** |
|
|
26
|
+
| ebay.com | PDP | ~1,294 | ~678 | **2x** |
|
|
27
|
+
| nike.com | Homepage | ~2,495 | ~816 | **3x** |
|
|
28
|
+
| nike.com | Search | ~7,998 | ~2,678 | **3x** |
|
|
29
|
+
| nike.com | PDP | ~3,034 | ~989 | **3x** |
|
|
30
|
+
| **TOTAL** | **11 pages** | **~160,362** | **~41,370** | **4x** |
|
|
31
|
+
|
|
32
|
+
And that's the per-snapshot comparison. The real gap is architectural — @playwright/mcp dumps a snapshot on every action (navigate, click, type). `browse` only returns ~15 tokens per action:
|
|
33
|
+
|
|
34
|
+
| | @playwright/mcp | @ulpi/browse |
|
|
35
|
+
|---|---:|---:|
|
|
36
|
+
| Tokens on `navigate` | ~14,578 (auto-dumped) | **~11** (one-liner) |
|
|
37
|
+
| Tokens on `click` | ~14,578 (auto-dumped) | **~15** (one-liner) |
|
|
38
|
+
| 10-action session | ~145,780 | **~11,388** |
|
|
39
|
+
| Context consumed (200K) | **73%** | **6%** |
|
|
40
|
+
|
|
41
|
+
The agent decides when to see the page. Most actions don't need a snapshot.
|
|
42
|
+
|
|
43
|
+
Rerun: `bun run benchmark`
|
|
44
|
+
|
|
45
|
+
## Why It's Faster
|
|
46
|
+
|
|
47
|
+
### 1. You Control What Enters the Context
|
|
48
|
+
|
|
49
|
+
```
|
|
50
|
+
@playwright/mcp browser_navigate → 51,150 tokens (full snapshot, every time)
|
|
51
|
+
|
|
52
|
+
browse goto → 11 tokens ("Navigated to https://... (200)")
|
|
53
|
+
browse text → 4,970 tokens (clean visible text, when you need it)
|
|
54
|
+
browse snap -i → 15,072 tokens (interactive elements + refs, when you need it)
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
You pick the right view for the task. Reading prices? Use `text`. Need to click something? Use `snapshot -i`. Just navigating? `goto` is enough.
|
|
58
|
+
|
|
59
|
+
### 2. Ref-Based Interaction — No Selector Construction
|
|
60
|
+
|
|
61
|
+
After `snapshot`, every element gets a ref (`@e1`, `@e2`, ...) backed by a Playwright Locator. The agent doesn't waste tokens constructing CSS selectors:
|
|
62
|
+
|
|
63
|
+
```bash
|
|
64
|
+
$ browse snapshot -i
|
|
65
|
+
@e1 [button] "Help 24/7"
|
|
66
|
+
@e2 [link] "Mumzworld"
|
|
67
|
+
@e3 [searchbox]
|
|
68
|
+
@e4 [link] "Sign In"
|
|
69
|
+
@e5 [link] "Cart"
|
|
70
|
+
|
|
71
|
+
$ browse fill @e3 "strollers"
|
|
72
|
+
Filled @e3
|
|
73
|
+
|
|
74
|
+
$ browse press Enter
|
|
75
|
+
Pressed Enter
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
### 3. Cursor-Interactive Detection — What ARIA Misses
|
|
79
|
+
|
|
80
|
+
Modern SPAs use `<div onclick>`, `cursor: pointer`, `tabindex`, and `data-action` for interactivity. These are **invisible** to accessibility trees — both @playwright/mcp and raw `ariaSnapshot()` miss them.
|
|
81
|
+
|
|
82
|
+
```bash
|
|
83
|
+
$ browse snapshot -i -C
|
|
84
|
+
@e1 [button] "Submit"
|
|
85
|
+
@e2 [textbox] "Email"
|
|
86
|
+
|
|
87
|
+
[cursor-interactive]
|
|
88
|
+
@e3 [div.card] "Add to cart" (cursor:pointer)
|
|
89
|
+
@e4 [span.close] "Close dialog" (onclick)
|
|
90
|
+
@e5 [div.menu] "Open Menu" (data-action)
|
|
91
|
+
```
|
|
92
|
+
|
|
93
|
+
Every detected element gets a ref. `browse click @e3` just works.
|
|
94
|
+
|
|
95
|
+
### 4. 40+ Purpose-Built Commands vs Generic Tools
|
|
96
|
+
|
|
97
|
+
@playwright/mcp has ~15 tools. For anything beyond navigate/click/type, you write JavaScript via `browser_evaluate`. `browse` has purpose-built commands that return structured, minimal output:
|
|
98
|
+
|
|
99
|
+
| Need | @playwright/mcp | browse |
|
|
100
|
+
|------|----------------|--------|
|
|
101
|
+
| Page text | `browser_evaluate` + custom JS | `text` |
|
|
102
|
+
| Form fields | `browser_evaluate` + custom JS | `forms` → structured JSON |
|
|
103
|
+
| All links | `browser_evaluate` + custom JS | `links` → `Text → URL` |
|
|
104
|
+
| Network log | Not available | `network` |
|
|
105
|
+
| Cookies | Not available | `cookies` |
|
|
106
|
+
| Performance | Not available | `perf` |
|
|
107
|
+
| Page diff | Not available | `diff <url1> <url2>` |
|
|
108
|
+
| Snapshot diff | Not available | `snapshot-diff` |
|
|
109
|
+
| Responsive screenshots | Not available | `responsive` |
|
|
110
|
+
| Device emulation | Not available | `emulate iphone` |
|
|
111
|
+
|
|
112
|
+
### 5. Persistent Daemon — 100ms Commands
|
|
113
|
+
|
|
114
|
+
```
|
|
115
|
+
First command: ~2s (server + Chromium startup, once)
|
|
116
|
+
Every command after: ~100-200ms (HTTP to localhost)
|
|
117
|
+
```
|
|
118
|
+
|
|
119
|
+
@playwright/mcp starts a new browser per MCP session. `browse` keeps the server running across commands with auto-shutdown after 30 min idle. Crash recovery is built in — the CLI detects a dead server and restarts transparently.
|
|
120
|
+
|
|
121
|
+
### 6. Multi-Agent Sessions — Parallel Browsing on One Chromium
|
|
122
|
+
|
|
123
|
+
Run multiple AI agents in parallel, each with its own isolated browser session, sharing a single Chromium process. Each session gets its own tabs, refs, cookies, localStorage, and console/network buffers — zero cross-talk.
|
|
124
|
+
|
|
125
|
+
```bash
|
|
126
|
+
# Agent A researches strollers on mumzworld
|
|
127
|
+
browse --session agent-a goto https://www.mumzworld.com
|
|
128
|
+
browse --session agent-a snapshot -i
|
|
129
|
+
browse --session agent-a fill @e3 "strollers"
|
|
130
|
+
browse --session agent-a press Enter
|
|
131
|
+
|
|
132
|
+
# Agent B checks competitor pricing on amazon — simultaneously
|
|
133
|
+
browse --session agent-b goto https://www.amazon.com
|
|
134
|
+
browse --session agent-b snapshot -i
|
|
135
|
+
browse --session agent-b fill @e6 "baby stroller"
|
|
136
|
+
browse --session agent-b press Enter
|
|
137
|
+
|
|
138
|
+
# Or set once via env var
|
|
139
|
+
export BROWSE_SESSION=agent-a
|
|
140
|
+
browse text # runs in agent-a's session
|
|
141
|
+
```
|
|
142
|
+
|
|
143
|
+
Under the hood, each session is a separate Playwright `BrowserContext` on the shared Chromium — same isolation model as browser profiles (separate cookies, storage, cache). One process, no extra memory for multiple Chromium instances.
|
|
144
|
+
|
|
145
|
+
```
|
|
146
|
+
browse --session <id> <command>
|
|
147
|
+
│
|
|
148
|
+
Persistent server (one Chromium process)
|
|
149
|
+
│
|
|
150
|
+
SessionManager
|
|
151
|
+
├── "default" → BrowserContext → tabs, refs, cookies, buffers
|
|
152
|
+
├── "agent-a" → BrowserContext → tabs, refs, cookies, buffers
|
|
153
|
+
└── "agent-b" → BrowserContext → tabs, refs, cookies, buffers
|
|
154
|
+
```
|
|
155
|
+
|
|
156
|
+
**Session management:**
|
|
157
|
+
```bash
|
|
158
|
+
browse sessions # list active sessions with tab counts
|
|
159
|
+
browse session-close agent-a # close a session (frees its tabs/context)
|
|
160
|
+
browse status # shows total session count
|
|
161
|
+
```
|
|
162
|
+
|
|
163
|
+
Sessions auto-close after the idle timeout (default 30 min). The server shuts down when all sessions are idle. Without `--session`, everything runs in a `"default"` session — fully backward compatible.
|
|
164
|
+
|
|
165
|
+
For full process isolation (separate Chromium instances), use `BROWSE_PORT` to run independent servers.
|
|
166
|
+
|
|
167
|
+
## Install
|
|
168
|
+
|
|
169
|
+
```bash
|
|
170
|
+
bun install -g @ulpi/browse
|
|
171
|
+
```
|
|
172
|
+
|
|
173
|
+
Requires [Bun](https://bun.sh). Chromium is installed automatically via Playwright.
|
|
174
|
+
|
|
175
|
+
### Claude Code Skill (optional)
|
|
176
|
+
|
|
177
|
+
Install the browse skill into your project so Claude Code uses it automatically:
|
|
178
|
+
|
|
179
|
+
```bash
|
|
180
|
+
browse install-skill
|
|
181
|
+
```
|
|
182
|
+
|
|
183
|
+
This copies the skill definition to `.claude/skills/browse/SKILL.md` and adds all browse commands to `.claude/settings.json` permissions — no more approval prompts.
|
|
184
|
+
|
|
185
|
+
## Real-World Example: E-Commerce Flow
|
|
186
|
+
|
|
187
|
+
Agent browses mumzworld.com — search, find a product, add to cart, checkout:
|
|
188
|
+
|
|
189
|
+
```bash
|
|
190
|
+
browse goto https://www.mumzworld.com
|
|
191
|
+
browse snapshot -i # find searchbox → @e3
|
|
192
|
+
browse fill @e3 "strollers"
|
|
193
|
+
browse press Enter
|
|
194
|
+
|
|
195
|
+
browse text # scan prices in results
|
|
196
|
+
browse goto "https://www.mumzworld.com/en/doona-infant-car-seat..."
|
|
197
|
+
|
|
198
|
+
browse snapshot -i # find Add to Cart → @e54
|
|
199
|
+
browse click @e54
|
|
200
|
+
|
|
201
|
+
browse snapshot -i -s "[role=dialog]" # scope to cart modal
|
|
202
|
+
browse click @e3 # "View Cart"
|
|
203
|
+
|
|
204
|
+
browse snapshot -i # find Checkout → @e52
|
|
205
|
+
browse click @e52
|
|
206
|
+
```
|
|
207
|
+
|
|
208
|
+
**12 steps. ~24K tokens total.** With @playwright/mcp: **~240K tokens** for the same flow (every action dumps a full snapshot).
|
|
209
|
+
|
|
210
|
+
## Command Reference
|
|
211
|
+
|
|
212
|
+
### Navigation
|
|
213
|
+
`goto <url>` | `back` | `forward` | `reload` | `url`
|
|
214
|
+
|
|
215
|
+
### Content Extraction
|
|
216
|
+
`text` | `html [sel]` | `links` | `forms` | `accessibility`
|
|
217
|
+
|
|
218
|
+
### Interaction
|
|
219
|
+
`click <sel>` | `fill <sel> <val>` | `select <sel> <val>` | `hover <sel>` | `type <text>` | `press <key>` | `scroll [sel]` | `wait <sel>` | `viewport <WxH>`
|
|
220
|
+
|
|
221
|
+
### Snapshot & Refs
|
|
222
|
+
```
|
|
223
|
+
snapshot [-i] [-c] [-C] [-d N] [-s sel]
|
|
224
|
+
-i Interactive elements only (buttons, links, inputs)
|
|
225
|
+
-c Compact — remove empty structural nodes
|
|
226
|
+
-C Cursor-interactive — detect hidden clickable elements
|
|
227
|
+
-d N Limit tree depth
|
|
228
|
+
-s Scope to CSS selector
|
|
229
|
+
```
|
|
230
|
+
After snapshot, use `@e1`, `@e2`... as selectors in any command.
|
|
231
|
+
|
|
232
|
+
### Snapshot Diff
|
|
233
|
+
`snapshot-diff` — compare current page against last snapshot.
|
|
234
|
+
|
|
235
|
+
### Device Emulation
|
|
236
|
+
`emulate <device>` | `emulate reset` | `devices [filter]`
|
|
237
|
+
|
|
238
|
+
100+ devices: iPhone 12-17, Pixel 5-7, iPad, Galaxy, and all Playwright built-ins.
|
|
239
|
+
|
|
240
|
+
### Inspection
|
|
241
|
+
`js <expr>` | `eval <file>` | `css <sel> <prop>` | `attrs <sel>` | `state <sel>` | `console [--clear]` | `network [--clear]` | `cookies` | `storage [set <k> <v>]` | `perf`
|
|
242
|
+
|
|
243
|
+
### Visual
|
|
244
|
+
`screenshot [path]` | `screenshot --annotate` | `pdf [path]` | `responsive [prefix]`
|
|
245
|
+
|
|
246
|
+
### Compare
|
|
247
|
+
`diff <url1> <url2>` — text diff between two pages.
|
|
248
|
+
|
|
249
|
+
### Multi-Step
|
|
250
|
+
```bash
|
|
251
|
+
echo '[["goto","https://example.com"],["text"]]' | browse chain
|
|
252
|
+
```
|
|
253
|
+
|
|
254
|
+
### Tabs
|
|
255
|
+
`tabs` | `tab <id>` | `newtab [url]` | `closetab [id]`
|
|
256
|
+
|
|
257
|
+
### Sessions
|
|
258
|
+
`sessions` | `session-close <id>`
|
|
259
|
+
|
|
260
|
+
### Server Control
|
|
261
|
+
`status` | `cookie <n>=<v>` | `header <n>:<v>` | `useragent <str>` | `stop` | `restart`
|
|
262
|
+
|
|
263
|
+
## Architecture
|
|
264
|
+
|
|
265
|
+
```
|
|
266
|
+
browse [--session <id>] <command>
|
|
267
|
+
│
|
|
268
|
+
▼
|
|
269
|
+
CLI (thin HTTP client)
|
|
270
|
+
X-Browse-Session: <id>
|
|
271
|
+
│
|
|
272
|
+
▼
|
|
273
|
+
Persistent server (localhost, auto-started)
|
|
274
|
+
│
|
|
275
|
+
SessionManager
|
|
276
|
+
├── Session "default" → BrowserContext + tabs + refs + buffers
|
|
277
|
+
├── Session "agent-a" → BrowserContext + tabs + refs + buffers
|
|
278
|
+
└── Session "agent-b" → BrowserContext + tabs + refs + buffers
|
|
279
|
+
│
|
|
280
|
+
▼
|
|
281
|
+
Chromium (Playwright, headless, shared)
|
|
282
|
+
```
|
|
283
|
+
|
|
284
|
+
## Environment Variables
|
|
285
|
+
|
|
286
|
+
| Variable | Default | Description |
|
|
287
|
+
|----------|---------|-------------|
|
|
288
|
+
| `BROWSE_PORT` | auto 9400-10400 | Fixed server port |
|
|
289
|
+
| `BROWSE_SESSION` | (none) | Default session ID for all commands |
|
|
290
|
+
| `BROWSE_IDLE_TIMEOUT` | 1800000 (30m) | Idle shutdown in ms |
|
|
291
|
+
| `BROWSE_LOCAL_DIR` | `.browse/` or `/tmp` | State/log directory |
|
|
292
|
+
|
|
293
|
+
## Acknowledgments
|
|
294
|
+
|
|
295
|
+
Inspired by and originally derived from the `/browse` skill in [gstack](https://github.com/garrytan/gstack) by Garry Tan. The core architecture — persistent Chromium daemon, thin CLI client, ref-based element selection via ARIA snapshots — comes from gstack.
|
|
296
|
+
|
|
297
|
+
### Added beyond gstack
|
|
298
|
+
|
|
299
|
+
**New commands:**
|
|
300
|
+
- `emulate` / `devices` — device emulation with 100+ devices (iPhone, Pixel, iPad, custom descriptors)
|
|
301
|
+
- `snapshot -C` — cursor-interactive detection (cursor:pointer, onclick, tabindex, data-action)
|
|
302
|
+
- `snapshot-diff` — before/after comparison with ref-number stripping
|
|
303
|
+
- `dialog` / `dialog-accept` / `dialog-dismiss` — dialog handling with prompt value support
|
|
304
|
+
- `state` — element state inspection (visible, enabled, checked, focused, bounding box)
|
|
305
|
+
- `upload` — file upload to input elements
|
|
306
|
+
- `sessions` / `session-close` — multi-agent session multiplexing
|
|
307
|
+
- `screenshot --annotate` — numbered badge overlay with legend
|
|
308
|
+
|
|
309
|
+
**Architectural improvements:**
|
|
310
|
+
- Session multiplexing — multiple agents share one Chromium via isolated BrowserContexts
|
|
311
|
+
- Per-tab ref scoping — refs belong to the tab that created them, cross-tab usage throws clear error
|
|
312
|
+
- Per-tab snapshot baselines — `snapshot-diff` compares the correct baseline after tab switches
|
|
313
|
+
- Safe retry classification — read commands auto-retry after crash, write commands don't (prevents double form submissions)
|
|
314
|
+
- Concurrency-safe server spawning — file lock with stale detection prevents race conditions
|
|
315
|
+
- Network correlation via WeakMap — accurate request/response pairing even with duplicate URLs
|
|
316
|
+
- Content-Length based sizing — avoids reading response bodies into memory
|
|
317
|
+
- TreeWalker text extraction — `text` command never triggers MutationObservers
|
|
318
|
+
- Tab creation rollback — failed `newTab(url)` closes the page instead of leaving orphan tabs
|
|
319
|
+
- Context recreation with rollback — `emulate`/`useragent` preserve cookies and all tab URLs, rollback on failure
|
|
320
|
+
- Crash callback — server flushes buffers and cleans state file before exit
|
|
321
|
+
|
|
322
|
+
## License
|
|
323
|
+
|
|
324
|
+
MIT
|
package/bin/browse.ts
ADDED
package/package.json
ADDED
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@ulpi/browse",
|
|
3
|
+
"version": "0.1.0",
|
|
4
|
+
"repository": {
|
|
5
|
+
"type": "git",
|
|
6
|
+
"url": "https://github.com/ulpi-io/browse"
|
|
7
|
+
},
|
|
8
|
+
"dependencies": {
|
|
9
|
+
"diff": "^7.0.0",
|
|
10
|
+
"playwright": "^1.58.2"
|
|
11
|
+
},
|
|
12
|
+
"bin": {
|
|
13
|
+
"browse": "bin/browse.ts"
|
|
14
|
+
},
|
|
15
|
+
"description": "Fast headless browser CLI — persistent Chromium daemon via Playwright.",
|
|
16
|
+
"engines": {
|
|
17
|
+
"bun": ">=1.0.0"
|
|
18
|
+
},
|
|
19
|
+
"files": [
|
|
20
|
+
"bin/",
|
|
21
|
+
"src/",
|
|
22
|
+
"skill/",
|
|
23
|
+
"LICENSE",
|
|
24
|
+
"README.md",
|
|
25
|
+
"BENCHMARKS.md"
|
|
26
|
+
],
|
|
27
|
+
"keywords": [
|
|
28
|
+
"browser",
|
|
29
|
+
"automation",
|
|
30
|
+
"playwright",
|
|
31
|
+
"headless",
|
|
32
|
+
"cli",
|
|
33
|
+
"ai-agent",
|
|
34
|
+
"claude"
|
|
35
|
+
],
|
|
36
|
+
"license": "MIT",
|
|
37
|
+
"publishConfig": {
|
|
38
|
+
"access": "public"
|
|
39
|
+
},
|
|
40
|
+
"type": "module",
|
|
41
|
+
"devDependencies": {
|
|
42
|
+
"@types/node": "^25.5.0",
|
|
43
|
+
"typescript": "^5.9.3"
|
|
44
|
+
},
|
|
45
|
+
"scripts": {
|
|
46
|
+
"build": "bun build --compile src/cli.ts --outfile dist/browse",
|
|
47
|
+
"dev": "bun run src/cli.ts",
|
|
48
|
+
"server": "bun run src/server.ts",
|
|
49
|
+
"test": "bun test",
|
|
50
|
+
"start": "bun run src/server.ts",
|
|
51
|
+
"postinstall": "bunx playwright install chromium",
|
|
52
|
+
"benchmark": "bun run benchmark.ts"
|
|
53
|
+
}
|
|
54
|
+
}
|