@ulpi/browse 0.2.4 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -92,7 +92,7 @@ $ browse snapshot -i -C
92
92
 
93
93
  Every detected element gets a ref. `browse click @e3` just works.
94
94
 
95
- ### 4. 58+ Purpose-Built Commands vs Generic Tools
95
+ ### 4. 75 Purpose-Built Commands vs Generic Tools
96
96
 
97
97
  @playwright/mcp has ~15 tools. For anything beyond navigate/click/type, you write JavaScript via `browser_evaluate`. `browse` has purpose-built commands that return structured, minimal output:
98
98
 
@@ -116,6 +116,9 @@ Every detected element gets a ref. `browse click @e3` just works.
116
116
  | State persistence | Not available | `state save\|load` |
117
117
  | Credential vault | Not available | `auth save\|login\|list` |
118
118
  | HAR recording | Not available | `har start\|stop` |
119
+ | Clipboard access | Not available | `clipboard [write <text>]` |
120
+ | Element finding | Not available | `find role\|text\|label\|placeholder\|testid` |
121
+ | DevTools inspect | Not available | `inspect` |
119
122
  | Domain restriction | Not available | `--allowed-domains` |
120
123
  | Prompt injection defense | Not available | `--content-boundaries` |
121
124
  | JSON output mode | Not available | `--json` |
@@ -255,13 +258,17 @@ After snapshot, use `@e1`, `@e2`... as selectors in any command.
255
258
  100+ devices: iPhone 12-17, Pixel 5-7, iPad, Galaxy, and all Playwright built-ins.
256
259
 
257
260
  ### Inspection
258
- `js <expr>` | `eval <file>` | `css <sel> <prop>` | `attrs <sel>` | `element-state <sel>` | `value <sel>` | `count <sel>` | `console [--clear]` | `network [--clear]` | `cookies` | `storage [set <k> <v>]` | `perf`
261
+ `js <expr>` | `eval <file>` | `css <sel> <prop>` | `attrs <sel>` | `element-state <sel>` | `value <sel>` | `count <sel>` | `clipboard [write <text>]` | `console [--clear]` | `network [--clear]` | `cookies` | `storage [set <k> <v>]` | `perf`
259
262
 
260
263
  ### Visual
261
264
  `screenshot [path]` | `screenshot --annotate` | `pdf [path]` | `responsive [prefix]`
262
265
 
263
266
  ### Compare
264
267
  `diff <url1> <url2>` — text diff between two pages.
268
+ `screenshot-diff <baseline> [current]` — pixel-level visual regression testing.
269
+
270
+ ### Find
271
+ `find role|text|label|placeholder|testid <query> [name]` — semantic element locators.
265
272
 
266
273
  ### Multi-Step
267
274
  ```bash
@@ -281,13 +288,16 @@ echo '[["goto","https://example.com"],["text"]]' | browse chain
281
288
  `route <pattern> block` | `route <pattern> fulfill <status> [body]` | `route clear` | `offline [on|off]`
282
289
 
283
290
  ### State & Auth
284
- `state save [name]` | `state load [name]` | `auth save <name> <url> <user> <pass>` | `auth login <name>` | `auth list` | `auth delete <name>`
291
+ `state save [name]` | `state load [name]` | `state list` | `state show [name]` | `auth save <name> <url> <user> <pass>` | `auth login <name>` | `auth list` | `auth delete <name>`
285
292
 
286
293
  ### Recording
287
294
  `har start` | `har stop [path]`
288
295
 
296
+ ### Debug
297
+ `inspect` — open DevTools debugger (requires `BROWSE_DEBUG_PORT`).
298
+
289
299
  ### Server Control
290
- `status` | `cookie <n>=<v>` | `header <n>:<v>` | `useragent <str>` | `stop` | `restart`
300
+ `status` | `instances` | `cookie <n>=<v>` | `header <n>:<v>` | `useragent <str>` | `stop` | `restart`
291
301
 
292
302
  ## Architecture
293
303
 
@@ -310,11 +320,22 @@ browse [--session <id>] <command>
310
320
  Chromium (Playwright, headless, shared)
311
321
  ```
312
322
 
323
+ ## CLI Options
324
+
325
+ | Flag | Description |
326
+ |------|-------------|
327
+ | `--session <id>` | Named session (isolates tabs, refs, cookies) |
328
+ | `--json` | Wrap output as `{success, data, command}` |
329
+ | `--content-boundaries` | Wrap page content in nonce-delimited markers |
330
+ | `--allowed-domains <d,d>` | Block navigation/resources outside allowlist |
331
+ | `--headed` | Run browser in headed (visible) mode |
332
+
313
333
  ## Environment Variables
314
334
 
315
335
  | Variable | Default | Description |
316
336
  |----------|---------|-------------|
317
337
  | `BROWSE_PORT` | auto 9400-10400 | Fixed server port |
338
+ | `BROWSE_PORT_START` | 9400 | Start of port scan range |
318
339
  | `BROWSE_SESSION` | (none) | Default session ID for all commands |
319
340
  | `BROWSE_INSTANCE` | auto (PPID) | Instance ID for multi-Claude isolation |
320
341
  | `BROWSE_IDLE_TIMEOUT` | 1800000 (30m) | Idle shutdown in ms |
@@ -323,33 +344,35 @@ browse [--session <id>] <command>
323
344
  | `BROWSE_JSON` | (none) | Set to `1` for JSON output mode |
324
345
  | `BROWSE_CONTENT_BOUNDARIES` | (none) | Set to `1` for nonce-delimited output |
325
346
  | `BROWSE_ALLOWED_DOMAINS` | (none) | Comma-separated domain allowlist |
347
+ | `BROWSE_HEADED` | (none) | Set to `1` for headed (visible) browser mode |
326
348
  | `BROWSE_PROXY` | (none) | Proxy server URL |
327
349
  | `BROWSE_PROXY_BYPASS` | (none) | Proxy bypass list |
328
350
  | `BROWSE_CDP_URL` | (none) | Connect to remote Chrome via CDP |
351
+ | `BROWSE_SERVER_SCRIPT` | auto-detected | Override path to server.ts |
352
+ | `BROWSE_DEBUG_PORT` | (none) | Port for DevTools debugging (inspect command) |
353
+ | `BROWSE_POLICY` | browse-policy.json | Path to action policy file |
354
+ | `BROWSE_CONFIRM_ACTIONS` | (none) | Comma-separated commands requiring confirmation |
355
+ | `BROWSE_ENCRYPTION_KEY` | auto-generated | 64-char hex AES key for credential vault |
356
+ | `BROWSE_AUTH_PASSWORD` | (none) | Password for auth save (alt to `--password-stdin`) |
329
357
 
330
358
  ## Acknowledgments
331
359
 
332
360
  Inspired by and originally derived from the `/browse` skill in [gstack](https://github.com/garrytan/gstack) by Garry Tan. The core architecture — persistent Chromium daemon, thin CLI client, ref-based element selection via ARIA snapshots — comes from gstack.
333
361
 
334
- ### Added beyond gstack
362
+ ## Changelog
335
363
 
336
- **v0.1.0 — Foundation:**
337
- - `emulate` / `devices` — device emulation (100+ devices)
338
- - `snapshot -C` — cursor-interactive detection
339
- - `snapshot-diff` — before/after comparison with ref-number stripping
340
- - `dialog` / `dialog-accept` / `dialog-dismiss` — dialog handling
341
- - `upload` — file upload
342
- - `screenshot --annotate` — numbered badge overlay with legend
343
- - Session multiplexing — multiple agents share one Chromium
344
- - Safe retry classification — read vs write commands
345
- - TreeWalker text extraction — no MutationObserver triggers
364
+ ### v0.3.0 — Headed Mode, Clipboard, DevTools
346
365
 
347
- **v0.2.0Security, Interactions, DX:**
348
- - `--json` — structured output mode for agent frameworks
349
- - `--content-boundaries` — CSPRNG nonce wrapping for prompt injection defense
350
- - `--allowed-domains` — domain allowlist (HTTP + WebSocket/EventSource/sendBeacon)
351
- - `browse-policy.json` — action policy gate (allow/deny/confirm per command)
352
- - `auth save/login/list/delete` AES-256-GCM encrypted credential vault
366
+ - `--headed` flag run browser in visible mode for debugging and demos
367
+ - `clipboard [write <text>]` read and write clipboard contents
368
+ - `inspect` command open DevTools debugger via `BROWSE_DEBUG_PORT`
369
+ - `screenshot --annotate` pixel-annotated PNG with numbered badges
370
+ - `instances` command list all running browse servers
371
+ - `BROWSE_DEBUG_PORT` env var for DevTools debugging
372
+
373
+ ### v0.2.0 — Security, Interactions, DX
374
+
375
+ **Commands:**
353
376
  - `dblclick`, `focus`, `check`, `uncheck`, `drag`, `keydown`, `keyup` — interaction commands
354
377
  - `frame <sel>` / `frame main` — iframe targeting
355
378
  - `value <sel>`, `count <sel>` — element inspection
@@ -363,15 +386,41 @@ Inspired by and originally derived from the `/browse` skill in [gstack](https://
363
386
  - `har start/stop` — HAR recording and export
364
387
  - `screenshot-diff` — pixel-level visual regression testing
365
388
  - `find role/text/label/placeholder/testid` — semantic element locators
366
- - Auto-instance servers via PPID — multi-Claude isolation
367
- - Per-session output folders (`.browse/sessions/{id}/`)
389
+
390
+ **Security:**
391
+ - `--allowed-domains` — domain allowlist (HTTP + WebSocket/EventSource/sendBeacon)
392
+ - `browse-policy.json` — action policy gate (allow/deny/confirm per command)
393
+ - `auth save/login/list/delete` — AES-256-GCM encrypted credential vault
394
+ - `--content-boundaries` — CSPRNG nonce wrapping for prompt injection defense
395
+
396
+ **DX:**
397
+ - `--json` — structured output mode for agent frameworks
368
398
  - `browse.json` config file support
369
399
  - AI-friendly error messages — Playwright errors rewritten to actionable hints
400
+ - Per-session output folders (`.browse/sessions/{id}/`)
401
+
402
+ **Infrastructure:**
403
+ - Auto-instance servers via PPID — multi-Claude isolation
370
404
  - CDP remote connection (`BROWSE_CDP_URL`)
371
405
  - Proxy support (`BROWSE_PROXY`)
372
406
  - Compiled binary self-spawn mode
373
407
  - Orphaned server cleanup
374
408
 
409
+ ### v0.1.0 — Foundation
410
+
411
+ **Commands:**
412
+ - `emulate` / `devices` — device emulation (100+ devices)
413
+ - `snapshot -C` — cursor-interactive detection
414
+ - `snapshot-diff` — before/after comparison with ref-number stripping
415
+ - `dialog` / `dialog-accept` / `dialog-dismiss` — dialog handling
416
+ - `upload` — file upload
417
+ - `screenshot --annotate` — numbered badge overlay with legend
418
+
419
+ **Infrastructure:**
420
+ - Session multiplexing — multiple agents share one Chromium
421
+ - Safe retry classification — read vs write commands
422
+ - TreeWalker text extraction — no MutationObserver triggers
423
+
375
424
  ## License
376
425
 
377
426
  MIT
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@ulpi/browse",
3
- "version": "0.2.4",
3
+ "version": "0.3.0",
4
4
  "repository": {
5
5
  "type": "git",
6
6
  "url": "https://github.com/ulpi-io/browse"
package/skill/SKILL.md CHANGED
@@ -78,7 +78,10 @@ If the file is missing or does not contain browse permission rules in `permissio
78
78
  "Bash(browse route:*)", "Bash(browse offline:*)",
79
79
  "Bash(browse status:*)", "Bash(browse stop:*)", "Bash(browse restart:*)",
80
80
  "Bash(browse cookie:*)", "Bash(browse header:*)",
81
- "Bash(browse useragent:*)"
81
+ "Bash(browse useragent:*)",
82
+ "Bash(browse clipboard:*)", "Bash(browse screenshot-diff:*)",
83
+ "Bash(browse find:*)", "Bash(browse inspect:*)",
84
+ "Bash(browse instances:*)", "Bash(browse --headed:*)"
82
85
  ```
83
86
 
84
87
  ## IMPORTANT
@@ -201,6 +204,25 @@ browse emulate reset
201
204
  # Parallel sessions
202
205
  browse --session agent-a goto https://site1.com
203
206
  browse --session agent-b goto https://site2.com
207
+
208
+ # Clipboard
209
+ browse clipboard
210
+ browse clipboard write "copied text"
211
+
212
+ # Find elements semantically
213
+ browse find role button
214
+ browse find text "Submit"
215
+ browse find testid "login-btn"
216
+
217
+ # Screenshot diff (visual regression)
218
+ browse screenshot-diff baseline.png current.png
219
+
220
+ # Headed mode (visible browser)
221
+ browse --headed goto https://example.com
222
+
223
+ # State list / show
224
+ browse state list
225
+ browse state show mysite
204
226
  ```
205
227
 
206
228
  ## Command Reference
@@ -297,6 +319,8 @@ browse cookies Dump all cookies as JSON
297
319
  browse storage [set <k> <v>] View/set localStorage
298
320
  browse perf Page load performance timings
299
321
  browse devices [filter] List available device names
322
+ browse clipboard Read system clipboard text
323
+ browse clipboard write <text> Write text to system clipboard
300
324
  ```
301
325
 
302
326
  ### Visual
@@ -313,9 +337,19 @@ browse frame <selector> Target an iframe (subsequent commands run inside
313
337
  browse frame main Return to main page
314
338
  ```
315
339
 
340
+ ### Find (semantic element locators)
341
+ ```
342
+ browse find role <query> Find elements by ARIA role
343
+ browse find text <query> Find elements by text content
344
+ browse find label <query> Find elements by label
345
+ browse find placeholder <query> Find elements by placeholder
346
+ browse find testid <query> Find elements by test ID
347
+ ```
348
+
316
349
  ### Compare
317
350
  ```
318
- browse diff <url1> <url2> Text diff between two pages
351
+ browse diff <url1> <url2> Text diff between two pages
352
+ browse screenshot-diff <base> [curr] Pixel-diff two PNG screenshots
319
353
  ```
320
354
 
321
355
  ### Multi-step (chain)
@@ -342,6 +376,8 @@ browse session-close <id> Close a session
342
376
  ```
343
377
  browse state save [name] Save cookies + localStorage (all origins)
344
378
  browse state load [name] Restore saved state
379
+ browse state list List saved states
380
+ browse state show [name] Show contents of saved state
345
381
  ```
346
382
 
347
383
  ### Auth vault
@@ -361,8 +397,10 @@ browse har stop [path] Stop and save HAR file
361
397
  ### Server management
362
398
  ```
363
399
  browse status Server health, uptime, session count
400
+ browse instances List all running browse servers (instance, PID, port, status)
364
401
  browse stop Shutdown server
365
402
  browse restart Kill + restart server
403
+ browse inspect Open DevTools (requires BROWSE_DEBUG_PORT)
366
404
  ```
367
405
 
368
406
  ## CLI Flags
@@ -373,6 +411,7 @@ browse restart Kill + restart server
373
411
  | `--json` | Wrap output as `{success, data, command}` |
374
412
  | `--content-boundaries` | Wrap page content in nonce-delimited markers (prompt injection defense) |
375
413
  | `--allowed-domains <d,d>` | Block navigation/resources outside allowlist |
414
+ | `--headed` | Run browser in headed (visible) mode |
376
415
 
377
416
  ## Speed Rules
378
417
 
@@ -420,17 +459,24 @@ browse restart Kill + restart server
420
459
  | Secure browsing | `--allowed-domains example.com goto https://example.com` |
421
460
  | Scroll through results | `scroll down` → `text` → `scroll down` → `text` |
422
461
  | Drag and drop | `drag @e1 @e2` |
462
+ | Read/write clipboard | `clipboard` / `clipboard write "text"` |
463
+ | Find by accessibility | `find role button` / `find text "Submit"` |
464
+ | Visual regression | `screenshot-diff baseline.png` |
465
+ | Debug with DevTools | `inspect` (set BROWSE_DEBUG_PORT first) |
466
+ | See the browser | `browse --headed goto <url>` |
423
467
 
424
468
  ## Architecture
425
469
 
426
470
  - Persistent Chromium daemon on localhost (port 9400-10400)
427
471
  - Bearer token auth per session
428
- - Auto-instance: each parent process (Claude Code) gets its own server
472
+ - One server per project directory `--session` handles agent isolation
429
473
  - Session multiplexing: multiple agents share one Chromium via isolated BrowserContexts
474
+ - For separate servers: set `BROWSE_INSTANCE` env var (e.g., fault isolation between teams)
475
+ - `browse instances` — discover all running servers (PID, port, status, session count)
430
476
  - Project-local state: `.browse/` directory at project root (auto-created, self-gitignored)
431
477
  - `sessions/{id}/` — per-session screenshots, logs, PDFs
432
478
  - `states/{name}.json` — saved browser state (cookies + localStorage)
433
- - `browse-server-{instance}.json` — server PID, port, auth token
479
+ - `browse-server.json` — server PID, port, auth token
434
480
  - Auto-shutdown when all sessions idle past 30 min
435
481
  - Chromium crash → server exits → auto-restarts on next command
436
482
  - AI-friendly error messages: Playwright errors rewritten to actionable hints
@@ -187,6 +187,10 @@ export class BrowserManager {
187
187
  return this.buffers;
188
188
  }
189
189
 
190
+ getBrowser(): Browser | null {
191
+ return this.browser;
192
+ }
193
+
190
194
  getContext(): BrowserContext | null {
191
195
  return this.context;
192
196
  }
@@ -199,11 +203,9 @@ export class BrowserManager {
199
203
  this.browser = await chromium.launch({ headless: true });
200
204
  this.ownsBrowser = true;
201
205
 
202
- // Chromium crash → flush what we can, then exit
206
+ // Chromium crash → notify caller (server uses this to exit; tests ignore it)
203
207
  this.browser.on('disconnected', () => {
204
- console.error('[browse] FATAL: Chromium process crashed or was killed. Server exiting.');
205
208
  if (onCrash) onCrash();
206
- process.exit(1);
207
209
  });
208
210
 
209
211
  this.context = await this.browser.newContext({
package/src/cli.ts CHANGED
@@ -19,14 +19,13 @@ const cliFlags = {
19
19
  json: false,
20
20
  contentBoundaries: false,
21
21
  allowedDomains: '' as string,
22
+ headed: false,
22
23
  };
23
24
 
24
25
  const BROWSE_PORT = parseInt(process.env.BROWSE_PORT || '0', 10);
25
- // Instance isolation: each parent process (e.g., Claude Code) gets its own server.
26
- // BROWSE_PORT takes precedence (explicit), then BROWSE_INSTANCE (env override), then PPID (auto).
27
- // In compiled mode ($bunfs), PPID is unstable (shell forks per invocation) — skip it.
28
- const IS_COMPILED = import.meta.dir.includes('$bunfs');
29
- const BROWSE_INSTANCE = process.env.BROWSE_INSTANCE || (BROWSE_PORT || IS_COMPILED ? '' : String(process.ppid));
26
+ // One server per project directory by default. Sessions handle agent isolation.
27
+ // For multiple servers on the same project: set BROWSE_INSTANCE or BROWSE_PORT.
28
+ const BROWSE_INSTANCE = process.env.BROWSE_INSTANCE || '';
30
29
  const INSTANCE_SUFFIX = BROWSE_PORT ? `-${BROWSE_PORT}` : (BROWSE_INSTANCE ? `-${BROWSE_INSTANCE}` : '');
31
30
 
32
31
  /**
@@ -124,6 +123,50 @@ function isProcessAlive(pid: number): boolean {
124
123
  }
125
124
  }
126
125
 
126
+ async function listInstances(): Promise<void> {
127
+ try {
128
+ const files = fs.readdirSync(LOCAL_DIR).filter(
129
+ f => f.startsWith('browse-server') && f.endsWith('.json') && !f.endsWith('.lock')
130
+ );
131
+ if (files.length === 0) { console.log('(no running instances)'); return; }
132
+
133
+ let found = false;
134
+ for (const file of files) {
135
+ try {
136
+ const data = JSON.parse(fs.readFileSync(path.join(LOCAL_DIR, file), 'utf-8'));
137
+ if (!data.pid || !data.port) continue;
138
+
139
+ const alive = isProcessAlive(data.pid);
140
+ let status = 'dead';
141
+ let sessions = 0;
142
+ if (alive) {
143
+ try {
144
+ const resp = await fetch(`http://127.0.0.1:${data.port}/health`, { signal: AbortSignal.timeout(1000) });
145
+ if (resp.ok) {
146
+ const health = await resp.json() as any;
147
+ status = health.status === 'healthy' ? 'healthy' : 'unhealthy';
148
+ sessions = health.sessions || 0;
149
+ }
150
+ } catch { status = 'unreachable'; }
151
+ }
152
+
153
+ // Derive instance name from filename
154
+ const match = file.match(/^browse-server-?(.*)\.json$/);
155
+ const instance = match?.[1] || 'default';
156
+
157
+ console.log(` ${instance.padEnd(15)} PID ${String(data.pid).padEnd(8)} port ${data.port} ${status}${sessions ? ` ${sessions} session(s)` : ''}`);
158
+ found = true;
159
+
160
+ // Clean up dead entries
161
+ if (!alive) {
162
+ try { fs.unlinkSync(path.join(LOCAL_DIR, file)); } catch {}
163
+ }
164
+ } catch {}
165
+ }
166
+ if (!found) console.log('(no running instances)');
167
+ } catch { console.log('(no running instances)'); }
168
+ }
169
+
127
170
  function isBrowseProcess(pid: number): boolean {
128
171
  try {
129
172
  const { execSync } = require('child_process');
@@ -209,7 +252,7 @@ async function startServer(): Promise<ServerState> {
209
252
  : ['bun', 'run', SERVER_SCRIPT];
210
253
  const proc = Bun.spawn(spawnCmd, {
211
254
  stdio: ['ignore', 'pipe', 'pipe'],
212
- env: { ...process.env, __BROWSE_SERVER_MODE: '1', BROWSE_LOCAL_DIR: LOCAL_DIR, BROWSE_INSTANCE },
255
+ env: { ...process.env, __BROWSE_SERVER_MODE: '1', BROWSE_LOCAL_DIR: LOCAL_DIR, BROWSE_INSTANCE, ...(cliFlags.headed ? { BROWSE_HEADED: '1' } : {}) },
213
256
  });
214
257
 
215
258
  // Don't hold the CLI open
@@ -291,9 +334,10 @@ async function ensureServer(): Promise<ServerState> {
291
334
  }
292
335
 
293
336
  /**
294
- * Clean up orphaned browse servers:
295
- * 1. Remove state files with dead PIDs
296
- * 2. Kill live servers from other instances (old PPID-suffixed state files)
337
+ * Clean up orphaned browse server state files.
338
+ * Removes any browse-server*.json whose PID is dead.
339
+ * Kills live orphans (legacy PPID-suffixed files from pre-v0.2.4) if they're browse processes.
340
+ * Preserves intentional BROWSE_PORT instances (suffix matches port inside the file).
297
341
  */
298
342
  function cleanOrphanedServers(): void {
299
343
  try {
@@ -301,27 +345,20 @@ function cleanOrphanedServers(): void {
301
345
  for (const file of files) {
302
346
  if (!file.startsWith('browse-server') || !file.endsWith('.json') || file.endsWith('.lock')) continue;
303
347
  const filePath = path.join(LOCAL_DIR, file);
304
- if (filePath === STATE_FILE) continue; // Don't touch our own state file
305
- // Only clean files with PID-based suffixes. Skip port-based and non-numeric.
306
- // Port-based files have a port number from a BROWSE_PORT env var.
307
- // PID-based files have a process ID (typically >10000, never <1000).
308
- // To distinguish: read the state file and check if the suffix matches the PID inside.
309
- const suffixMatch = file.match(/browse-server-(\d+)\.json$/);
310
- if (!suffixMatch) continue;
311
- const suffix = parseInt(suffixMatch[1], 10);
348
+ if (filePath === STATE_FILE) continue;
312
349
  try {
313
350
  const data = JSON.parse(fs.readFileSync(filePath, 'utf-8'));
314
- if (!data.pid) continue;
315
- // Port-based file: suffix matches the port inside (intentional BROWSE_PORT instance)
316
- if (data.port === suffix) continue;
317
- // PID-based file: suffix was a PPID from the spawning CLI
318
- if (isProcessAlive(data.pid) && isBrowseProcess(data.pid)) {
351
+ if (!data.pid) { fs.unlinkSync(filePath); continue; }
352
+ // Preserve intentional BROWSE_PORT instances (suffix = port number)
353
+ const suffixMatch = file.match(/browse-server-(\d+)\.json$/);
354
+ if (suffixMatch && data.port === parseInt(suffixMatch[1], 10) && isProcessAlive(data.pid)) continue;
355
+ // Dead process remove state file
356
+ if (!isProcessAlive(data.pid)) { fs.unlinkSync(filePath); continue; }
357
+ // Live orphan (legacy PPID file) → kill if it's a browse process
358
+ if (isBrowseProcess(data.pid)) {
319
359
  try { process.kill(data.pid, 'SIGTERM'); } catch {}
320
360
  }
321
- if (!isProcessAlive(data.pid)) {
322
- fs.unlinkSync(filePath);
323
- }
324
- } catch {}
361
+ } catch { try { fs.unlinkSync(filePath); } catch {} }
325
362
  }
326
363
  } catch {}
327
364
  }
@@ -523,12 +560,27 @@ export async function main() {
523
560
  }
524
561
  allowedDomains = allowedDomains || process.env.BROWSE_ALLOWED_DOMAINS || (config.allowedDomains ? config.allowedDomains.join(',') : undefined);
525
562
 
563
+ // Extract --headed flag (only before command)
564
+ let headed = false;
565
+ const headedIdx = args.indexOf('--headed');
566
+ if (headedIdx !== -1 && headedIdx < findCommandIndex(args)) {
567
+ headed = true;
568
+ args.splice(headedIdx, 1);
569
+ }
570
+ headed = headed || process.env.BROWSE_HEADED === '1';
571
+
526
572
  // Set global flags for sendCommand()
527
573
  cliFlags.json = jsonMode;
528
574
  cliFlags.contentBoundaries = contentBoundaries;
529
575
  cliFlags.allowedDomains = allowedDomains || '';
576
+ cliFlags.headed = headed;
530
577
 
531
578
  // ─── Local commands (no server needed) ─────────────────────
579
+ if (args[0] === 'instances') {
580
+ await listInstances();
581
+ return;
582
+ }
583
+
532
584
  if (args[0] === 'install-skill') {
533
585
  const { installSkill } = await import('./install-skill');
534
586
  installSkill(args[1]);
@@ -552,7 +604,7 @@ Device: emulate <device> | emulate reset | devices [filter]
552
604
  Inspection: js <expr> | eval <file> | css <sel> <prop> | attrs <sel>
553
605
  element-state <sel> | console [--clear] | network [--clear]
554
606
  cookies | storage [set <k> <v>] | perf
555
- value <sel> | count <sel>
607
+ value <sel> | count <sel> | clipboard [write <text>]
556
608
  Visual: screenshot [path] | pdf [path] | responsive [prefix]
557
609
  Snapshot: snapshot [-i] [-c] [-C] [-d N] [-s sel]
558
610
  Find: find role|text|label|placeholder|testid <query> [name]
@@ -566,7 +618,8 @@ Sessions: sessions | session-close <id>
566
618
  Auth: auth save <name> <url> <user> <pass|--password-stdin>
567
619
  auth login <name> | auth list | auth delete <name>
568
620
  State: state save|load|list|show [name]
569
- Server: status | cookie <n>=<v> | header <n>:<v>
621
+ Debug: inspect (requires BROWSE_DEBUG_PORT)
622
+ Server: status | instances | cookie <n>=<v> | header <n>:<v>
570
623
  useragent <str> | stop | restart
571
624
  Setup: install-skill [path]
572
625
 
@@ -575,6 +628,7 @@ Options:
575
628
  --json Wrap output as {success, data, command}
576
629
  --content-boundaries Wrap page content in nonce-delimited markers
577
630
  --allowed-domains <d,d> Block navigation/resources outside allowlist
631
+ --headed Run browser in headed (visible) mode
578
632
 
579
633
  Snapshot flags:
580
634
  -i Interactive elements only (buttons, links, inputs)
@@ -346,7 +346,7 @@ export async function handleMetaCommand(
346
346
  const { PolicyChecker } = await import('../policy');
347
347
 
348
348
  const WRITE_SET = new Set(['goto','back','forward','reload','click','dblclick','fill','select','hover','focus','check','uncheck','type','press','scroll','wait','viewport','cookie','header','useragent','upload','dialog-accept','dialog-dismiss','emulate','drag','keydown','keyup','highlight','download','route','offline']);
349
- const READ_SET = new Set(['text','html','links','forms','accessibility','js','eval','css','attrs','element-state','dialog','console','network','cookies','storage','perf','devices','value','count']);
349
+ const READ_SET = new Set(['text','html','links','forms','accessibility','js','eval','css','attrs','element-state','dialog','console','network','cookies','storage','perf','devices','value','count','clipboard']);
350
350
 
351
351
  const sessionBuffers = currentSession?.buffers;
352
352
  const policy = new PolicyChecker();
@@ -516,10 +516,8 @@ export async function handleMetaCommand(
516
516
  const diffPath = extIdx > 0
517
517
  ? baseline.slice(0, extIdx) + '-diff' + baseline.slice(extIdx)
518
518
  : baseline + '-diff.png';
519
- if (!result.passed) {
520
- // Write current screenshot as the "what changed" artifact
521
- // (true pixel-diff image generation requires re-rendering differences)
522
- fs.writeFileSync(diffPath, currentBuffer);
519
+ if (!result.passed && result.diffImage) {
520
+ fs.writeFileSync(diffPath, result.diffImage);
523
521
  }
524
522
 
525
523
  return [
@@ -528,7 +526,7 @@ export async function handleMetaCommand(
528
526
  `Mismatch: ${result.mismatchPct.toFixed(3)}%`,
529
527
  `Threshold: ${thresholdPct}%`,
530
528
  `Result: ${result.passed ? 'PASS' : 'FAIL'}`,
531
- ...(!result.passed ? [`Current saved: ${diffPath}`] : []),
529
+ ...(!result.passed ? [`Diff saved: ${diffPath}`] : []),
532
530
  ].join('\n');
533
531
  }
534
532
 
@@ -684,6 +682,33 @@ export async function handleMetaCommand(
684
682
  return `Switched to frame: ${selector}`;
685
683
  }
686
684
 
685
+ // ─── DevTools Inspect ──────────────────────────────
686
+ case 'inspect': {
687
+ const debugPort = parseInt(process.env.BROWSE_DEBUG_PORT || '0', 10);
688
+ if (!debugPort) {
689
+ throw new Error(
690
+ 'DevTools inspect requires BROWSE_DEBUG_PORT to be set.\n' +
691
+ 'Restart with: BROWSE_DEBUG_PORT=9222 browse restart\n' +
692
+ 'Then run: browse inspect'
693
+ );
694
+ }
695
+ try {
696
+ const resp = await fetch(`http://127.0.0.1:${debugPort}/json`, { signal: AbortSignal.timeout(2000) });
697
+ const pages = await resp.json() as any[];
698
+ const currentUrl = bm.getCurrentUrl();
699
+ const target = pages.find((p: any) => p.url === currentUrl) || pages[0];
700
+ if (!target) throw new Error('No debuggable pages found');
701
+ return [
702
+ `DevTools URL: ${target.devtoolsFrontendUrl}`,
703
+ `Page: ${target.title} (${target.url})`,
704
+ `WebSocket: ${target.webSocketDebuggerUrl}`,
705
+ ].join('\n');
706
+ } catch (err: any) {
707
+ if (err.message.includes('BROWSE_DEBUG_PORT')) throw err;
708
+ throw new Error(`Cannot reach Chrome debug port at ${debugPort}: ${err.message}`);
709
+ }
710
+ }
711
+
687
712
  default:
688
713
  throw new Error(`Unknown meta command: ${command}`);
689
714
  }
@@ -314,6 +314,23 @@ export async function handleReadCommand(
314
314
  return String(count);
315
315
  }
316
316
 
317
+ case 'clipboard': {
318
+ if (args[0] === 'write') {
319
+ const text = args.slice(1).join(' ');
320
+ if (!text) throw new Error('Usage: browse clipboard write <text>');
321
+ await page.context().grantPermissions(['clipboard-read', 'clipboard-write']);
322
+ await evalCtx.evaluate((t) => navigator.clipboard.writeText(t), text);
323
+ return `Clipboard set: ${text.slice(0, 50)}${text.length > 50 ? '...' : ''}`;
324
+ }
325
+ await page.context().grantPermissions(['clipboard-read', 'clipboard-write']);
326
+ try {
327
+ const text = await evalCtx.evaluate(() => navigator.clipboard.readText());
328
+ return text || '(empty clipboard)';
329
+ } catch {
330
+ return '(clipboard not available)';
331
+ }
332
+ }
333
+
317
334
  case 'devices': {
318
335
  const filter = args.join(' ').toLowerCase();
319
336
  const all = listDevices();
@@ -1,10 +1,11 @@
1
1
  /**
2
- * Self-contained PNG decoder + pixel comparator.
3
- * No external deps — uses only zlib.inflateSync (Node/Bun built-in).
2
+ * Self-contained PNG decoder, encoder + pixel comparator.
3
+ * No external deps — uses only zlib (Node/Bun built-in).
4
4
  * Works in both dev mode (bun run) and compiled binary ($bunfs).
5
5
  *
6
- * Supports: 8-bit RGB (color type 2) and RGBA (color type 6).
6
+ * Decoder supports: 8-bit RGB (color type 2) and RGBA (color type 6).
7
7
  * Handles all 5 PNG scanline filter types (None/Sub/Up/Average/Paeth).
8
+ * Encoder outputs: 8-bit RGBA (color type 6), filter None, zlib-compressed.
8
9
  */
9
10
 
10
11
  import * as zlib from 'zlib';
@@ -22,6 +23,7 @@ export interface CompareResult {
22
23
  diffPixels: number;
23
24
  mismatchPct: number;
24
25
  passed: boolean;
26
+ diffImage?: Buffer;
25
27
  }
26
28
 
27
29
  export function decodePNG(buf: Buffer): DecodedImage {
@@ -96,6 +98,111 @@ export function decodePNG(buf: Buffer): DecodedImage {
96
98
  return { width, height, data: pixels };
97
99
  }
98
100
 
101
+ /**
102
+ * Encode a DecodedImage (RGBA pixels) into a PNG buffer.
103
+ * Uses filter type None (0) for simplicity — zlib handles compression.
104
+ */
105
+ export function encodePNG(img: DecodedImage): Buffer {
106
+ // Helper: write a PNG chunk (length + type + data + CRC32)
107
+ function writeChunk(type: string, data: Buffer): Buffer {
108
+ const chunk = Buffer.alloc(12 + data.length);
109
+ chunk.writeUInt32BE(data.length, 0);
110
+ chunk.write(type, 4, 4, 'ascii');
111
+ data.copy(chunk, 8);
112
+ // CRC32 covers type + data
113
+ const crcData = chunk.slice(4, 8 + data.length);
114
+ chunk.writeUInt32BE(zlib.crc32(crcData) >>> 0, 8 + data.length);
115
+ return chunk;
116
+ }
117
+
118
+ // PNG signature
119
+ const signature = Buffer.from(PNG_MAGIC);
120
+
121
+ // IHDR: width(4) + height(4) + bitDepth(1) + colorType(1) + compression(1) + filter(1) + interlace(1)
122
+ const ihdr = Buffer.alloc(13);
123
+ ihdr.writeUInt32BE(img.width, 0);
124
+ ihdr.writeUInt32BE(img.height, 4);
125
+ ihdr[8] = 8; // bit depth
126
+ ihdr[9] = 6; // color type: RGBA
127
+ ihdr[10] = 0; // compression method
128
+ ihdr[11] = 0; // filter method
129
+ ihdr[12] = 0; // no interlace
130
+
131
+ // IDAT: for each scanline, prepend filter byte 0 (None), then raw RGBA pixels
132
+ const rawStride = img.width * 4;
133
+ const rawData = Buffer.alloc(img.height * (1 + rawStride));
134
+ for (let y = 0; y < img.height; y++) {
135
+ const outOff = y * (1 + rawStride);
136
+ rawData[outOff] = 0; // filter type: None
137
+ img.data.copy(rawData, outOff + 1, y * rawStride, (y + 1) * rawStride);
138
+ }
139
+ const compressed = zlib.deflateSync(rawData);
140
+
141
+ // IEND: empty chunk
142
+ const iend = Buffer.alloc(0);
143
+
144
+ return Buffer.concat([
145
+ signature,
146
+ writeChunk('IHDR', ihdr),
147
+ writeChunk('IDAT', compressed),
148
+ writeChunk('IEND', iend),
149
+ ]);
150
+ }
151
+
152
+ /**
153
+ * Generate a visual diff image highlighting pixel differences.
154
+ * - Pixels only in one image (size mismatch): bright red (255,0,0,255)
155
+ * - Pixels differing beyond threshold: red-tinted (255, g/3, b/3, 255)
156
+ * - Pixels matching: dimmed (r/3, g/3, b/3, 128)
157
+ */
158
+ export function generateDiffImage(base: DecodedImage, curr: DecodedImage, colorThreshold: number): Buffer {
159
+ const w = Math.max(base.width, curr.width);
160
+ const h = Math.max(base.height, curr.height);
161
+ const diffData = Buffer.alloc(w * h * 4);
162
+ const colorThreshSq = colorThreshold * colorThreshold * 3;
163
+
164
+ for (let y = 0; y < h; y++) {
165
+ for (let x = 0; x < w; x++) {
166
+ const di = (y * w + x) * 4;
167
+ const inBase = x < base.width && y < base.height;
168
+ const inCurr = x < curr.width && y < curr.height;
169
+
170
+ if (!inBase || !inCurr) {
171
+ // Size mismatch — bright red
172
+ diffData[di] = 255;
173
+ diffData[di + 1] = 0;
174
+ diffData[di + 2] = 0;
175
+ diffData[di + 3] = 255;
176
+ continue;
177
+ }
178
+
179
+ const bi = (y * base.width + x) * 4;
180
+ const ci = (y * curr.width + x) * 4;
181
+ const dr = base.data[bi] - curr.data[ci];
182
+ const dg = base.data[bi + 1] - curr.data[ci + 1];
183
+ const db = base.data[bi + 2] - curr.data[ci + 2];
184
+ const distSq = dr * dr + dg * dg + db * db;
185
+ const isDiff = colorThreshold === 0 ? distSq > 0 : distSq > colorThreshSq;
186
+
187
+ if (isDiff) {
188
+ // Different — red-tinted using current image colors
189
+ diffData[di] = 255;
190
+ diffData[di + 1] = (curr.data[ci + 1] / 3) | 0;
191
+ diffData[di + 2] = (curr.data[ci + 2] / 3) | 0;
192
+ diffData[di + 3] = 255;
193
+ } else {
194
+ // Matching — dimmed
195
+ diffData[di] = (curr.data[ci] / 3) | 0;
196
+ diffData[di + 1] = (curr.data[ci + 1] / 3) | 0;
197
+ diffData[di + 2] = (curr.data[ci + 2] / 3) | 0;
198
+ diffData[di + 3] = 128;
199
+ }
200
+ }
201
+ }
202
+
203
+ return encodePNG({ width: w, height: h, data: diffData });
204
+ }
205
+
99
206
  export function compareScreenshots(
100
207
  baselineBuf: Buffer,
101
208
  currentBuf: Buffer,
@@ -129,10 +236,12 @@ export function compareScreenshots(
129
236
  }
130
237
 
131
238
  const mismatchPct = totalPixels > 0 ? (diffPixels / totalPixels) * 100 : 0;
132
- return {
133
- totalPixels,
134
- diffPixels,
135
- mismatchPct,
136
- passed: mismatchPct <= thresholdPct,
137
- };
239
+ const passed = mismatchPct <= thresholdPct;
240
+ const result: CompareResult = { totalPixels, diffPixels, mismatchPct, passed };
241
+
242
+ if (!passed) {
243
+ result.diffImage = generateDiffImage(base, curr, colorThreshold);
244
+ }
245
+
246
+ return result;
138
247
  }
package/src/server.ts CHANGED
@@ -26,6 +26,7 @@ export { type LogEntry, type NetworkEntry };
26
26
 
27
27
  // ─── Auth (inline) ─────────────────────────────────────────────
28
28
  const AUTH_TOKEN = crypto.randomUUID();
29
+ const DEBUG_PORT = parseInt(process.env.BROWSE_DEBUG_PORT || '0', 10);
29
30
  const BROWSE_PORT = parseInt(process.env.BROWSE_PORT || '0', 10); // 0 = auto-scan
30
31
  const BROWSE_INSTANCE = process.env.BROWSE_INSTANCE || '';
31
32
  const INSTANCE_SUFFIX = BROWSE_PORT ? `-${BROWSE_PORT}` : (BROWSE_INSTANCE ? `-${BROWSE_INSTANCE}` : '');
@@ -107,7 +108,7 @@ const READ_COMMANDS = new Set([
107
108
  'text', 'html', 'links', 'forms', 'accessibility',
108
109
  'js', 'eval', 'css', 'attrs', 'element-state', 'dialog',
109
110
  'console', 'network', 'cookies', 'storage', 'perf', 'devices',
110
- 'value', 'count',
111
+ 'value', 'count', 'clipboard',
111
112
  ]);
112
113
 
113
114
  const WRITE_COMMANDS = new Set([
@@ -128,7 +129,7 @@ const META_COMMANDS = new Set([
128
129
  'url', 'snapshot', 'snapshot-diff', 'screenshot-diff',
129
130
  'sessions', 'session-close',
130
131
  'frame', 'state', 'find',
131
- 'auth', 'har',
132
+ 'auth', 'har', 'inspect',
132
133
  ]);
133
134
 
134
135
  // Probe if a port is free using net.createServer (not Bun.serve which fatally crashes on EADDRINUSE)
@@ -373,7 +374,10 @@ async function start() {
373
374
  console.log(`[browse] Connected to remote Chrome via CDP: ${cdpUrl}`);
374
375
  } else {
375
376
  // Launch local Chromium
376
- const launchOptions: Record<string, any> = { headless: true };
377
+ const launchOptions: Record<string, any> = { headless: process.env.BROWSE_HEADED !== '1' };
378
+ if (DEBUG_PORT > 0) {
379
+ launchOptions.args = [`--remote-debugging-port=${DEBUG_PORT}`];
380
+ }
377
381
  const proxyServer = process.env.BROWSE_PROXY;
378
382
  if (proxyServer) {
379
383
  launchOptions.proxy = { server: proxyServer };
@@ -383,17 +387,11 @@ async function start() {
383
387
  }
384
388
  browser = await chromium.launch(launchOptions);
385
389
 
386
- // Chromium crash → flush, cleanup, exit (only for owned browser)
390
+ // Chromium crash → clean shutdown (only for owned browser)
387
391
  browser.on('disconnected', () => {
388
- console.error('[browse] FATAL: Chromium process crashed or was killed. Server exiting.');
389
- if (sessionManager) flushAllBuffers(sessionManager, true);
390
- try {
391
- const currentState = JSON.parse(fs.readFileSync(STATE_FILE, 'utf-8'));
392
- if (currentState.pid === process.pid || currentState.token === AUTH_TOKEN) {
393
- fs.unlinkSync(STATE_FILE);
394
- }
395
- } catch {}
396
- process.exit(1);
392
+ if (isShuttingDown) return;
393
+ console.error('[browse] Chromium disconnected. Shutting down.');
394
+ shutdown();
397
395
  });
398
396
  }
399
397
 
@@ -445,13 +443,16 @@ async function start() {
445
443
  });
446
444
 
447
445
  // Write state file
448
- const state = {
446
+ const state: Record<string, any> = {
449
447
  pid: process.pid,
450
448
  port,
451
449
  token: AUTH_TOKEN,
452
450
  startedAt: new Date().toISOString(),
453
451
  serverPath: path.resolve(import.meta.dir, 'server.ts'),
454
452
  };
453
+ if (DEBUG_PORT > 0) {
454
+ state.debugPort = DEBUG_PORT;
455
+ }
455
456
  fs.writeFileSync(STATE_FILE, JSON.stringify(state, null, 2), { mode: 0o600 });
456
457
 
457
458
  console.log(`[browse] Server running on http://127.0.0.1:${port} (PID: ${process.pid})`);