@ulpi/browse 0.2.5 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -92,7 +92,7 @@ $ browse snapshot -i -C
92
92
 
93
93
  Every detected element gets a ref. `browse click @e3` just works.
94
94
 
95
- ### 4. 58+ Purpose-Built Commands vs Generic Tools
95
+ ### 4. 75 Purpose-Built Commands vs Generic Tools
96
96
 
97
97
  @playwright/mcp has ~15 tools. For anything beyond navigate/click/type, you write JavaScript via `browser_evaluate`. `browse` has purpose-built commands that return structured, minimal output:
98
98
 
@@ -116,6 +116,9 @@ Every detected element gets a ref. `browse click @e3` just works.
116
116
  | State persistence | Not available | `state save\|load` |
117
117
  | Credential vault | Not available | `auth save\|login\|list` |
118
118
  | HAR recording | Not available | `har start\|stop` |
119
+ | Clipboard access | Not available | `clipboard [write <text>]` |
120
+ | Element finding | Not available | `find role\|text\|label\|placeholder\|testid` |
121
+ | DevTools inspect | Not available | `inspect` |
119
122
  | Domain restriction | Not available | `--allowed-domains` |
120
123
  | Prompt injection defense | Not available | `--content-boundaries` |
121
124
  | JSON output mode | Not available | `--json` |
@@ -255,13 +258,17 @@ After snapshot, use `@e1`, `@e2`... as selectors in any command.
255
258
  100+ devices: iPhone 12-17, Pixel 5-7, iPad, Galaxy, and all Playwright built-ins.
256
259
 
257
260
  ### Inspection
258
- `js <expr>` | `eval <file>` | `css <sel> <prop>` | `attrs <sel>` | `element-state <sel>` | `value <sel>` | `count <sel>` | `console [--clear]` | `network [--clear]` | `cookies` | `storage [set <k> <v>]` | `perf`
261
+ `js <expr>` | `eval <file>` | `css <sel> <prop>` | `attrs <sel>` | `element-state <sel>` | `value <sel>` | `count <sel>` | `clipboard [write <text>]` | `console [--clear]` | `network [--clear]` | `cookies` | `storage [set <k> <v>]` | `perf`
259
262
 
260
263
  ### Visual
261
264
  `screenshot [path]` | `screenshot --annotate` | `pdf [path]` | `responsive [prefix]`
262
265
 
263
266
  ### Compare
264
267
  `diff <url1> <url2>` — text diff between two pages.
268
+ `screenshot-diff <baseline> [current]` — pixel-level visual regression testing.
269
+
270
+ ### Find
271
+ `find role|text|label|placeholder|testid <query> [name]` — semantic element locators.
265
272
 
266
273
  ### Multi-Step
267
274
  ```bash
@@ -281,13 +288,16 @@ echo '[["goto","https://example.com"],["text"]]' | browse chain
281
288
  `route <pattern> block` | `route <pattern> fulfill <status> [body]` | `route clear` | `offline [on|off]`
282
289
 
283
290
  ### State & Auth
284
- `state save [name]` | `state load [name]` | `auth save <name> <url> <user> <pass>` | `auth login <name>` | `auth list` | `auth delete <name>`
291
+ `state save [name]` | `state load [name]` | `state list` | `state show [name]` | `auth save <name> <url> <user> <pass>` | `auth login <name>` | `auth list` | `auth delete <name>`
285
292
 
286
293
  ### Recording
287
294
  `har start` | `har stop [path]`
288
295
 
296
+ ### Debug
297
+ `inspect` — open DevTools debugger (requires `BROWSE_DEBUG_PORT`).
298
+
289
299
  ### Server Control
290
- `status` | `cookie <n>=<v>` | `header <n>:<v>` | `useragent <str>` | `stop` | `restart`
300
+ `status` | `instances` | `cookie <n>=<v>` | `header <n>:<v>` | `useragent <str>` | `stop` | `restart`
291
301
 
292
302
  ## Architecture
293
303
 
@@ -310,11 +320,22 @@ browse [--session <id>] <command>
310
320
  Chromium (Playwright, headless, shared)
311
321
  ```
312
322
 
323
+ ## CLI Options
324
+
325
+ | Flag | Description |
326
+ |------|-------------|
327
+ | `--session <id>` | Named session (isolates tabs, refs, cookies) |
328
+ | `--json` | Wrap output as `{success, data, command}` |
329
+ | `--content-boundaries` | Wrap page content in nonce-delimited markers |
330
+ | `--allowed-domains <d,d>` | Block navigation/resources outside allowlist |
331
+ | `--headed` | Run browser in headed (visible) mode |
332
+
313
333
  ## Environment Variables
314
334
 
315
335
  | Variable | Default | Description |
316
336
  |----------|---------|-------------|
317
337
  | `BROWSE_PORT` | auto 9400-10400 | Fixed server port |
338
+ | `BROWSE_PORT_START` | 9400 | Start of port scan range |
318
339
  | `BROWSE_SESSION` | (none) | Default session ID for all commands |
319
340
  | `BROWSE_INSTANCE` | auto (PPID) | Instance ID for multi-Claude isolation |
320
341
  | `BROWSE_IDLE_TIMEOUT` | 1800000 (30m) | Idle shutdown in ms |
@@ -323,33 +344,35 @@ browse [--session <id>] <command>
323
344
  | `BROWSE_JSON` | (none) | Set to `1` for JSON output mode |
324
345
  | `BROWSE_CONTENT_BOUNDARIES` | (none) | Set to `1` for nonce-delimited output |
325
346
  | `BROWSE_ALLOWED_DOMAINS` | (none) | Comma-separated domain allowlist |
347
+ | `BROWSE_HEADED` | (none) | Set to `1` for headed (visible) browser mode |
326
348
  | `BROWSE_PROXY` | (none) | Proxy server URL |
327
349
  | `BROWSE_PROXY_BYPASS` | (none) | Proxy bypass list |
328
350
  | `BROWSE_CDP_URL` | (none) | Connect to remote Chrome via CDP |
351
+ | `BROWSE_SERVER_SCRIPT` | auto-detected | Override path to server.ts |
352
+ | `BROWSE_DEBUG_PORT` | (none) | Port for DevTools debugging (inspect command) |
353
+ | `BROWSE_POLICY` | browse-policy.json | Path to action policy file |
354
+ | `BROWSE_CONFIRM_ACTIONS` | (none) | Comma-separated commands requiring confirmation |
355
+ | `BROWSE_ENCRYPTION_KEY` | auto-generated | 64-char hex AES key for credential vault |
356
+ | `BROWSE_AUTH_PASSWORD` | (none) | Password for auth save (alt to `--password-stdin`) |
329
357
 
330
358
  ## Acknowledgments
331
359
 
332
360
  Inspired by and originally derived from the `/browse` skill in [gstack](https://github.com/garrytan/gstack) by Garry Tan. The core architecture — persistent Chromium daemon, thin CLI client, ref-based element selection via ARIA snapshots — comes from gstack.
333
361
 
334
- ### Added beyond gstack
362
+ ## Changelog
335
363
 
336
- **v0.1.0 — Foundation:**
337
- - `emulate` / `devices` — device emulation (100+ devices)
338
- - `snapshot -C` — cursor-interactive detection
339
- - `snapshot-diff` — before/after comparison with ref-number stripping
340
- - `dialog` / `dialog-accept` / `dialog-dismiss` — dialog handling
341
- - `upload` — file upload
342
- - `screenshot --annotate` — numbered badge overlay with legend
343
- - Session multiplexing — multiple agents share one Chromium
344
- - Safe retry classification — read vs write commands
345
- - TreeWalker text extraction — no MutationObserver triggers
364
+ ### v0.3.0 — Headed Mode, Clipboard, DevTools
346
365
 
347
- **v0.2.0Security, Interactions, DX:**
348
- - `--json` — structured output mode for agent frameworks
349
- - `--content-boundaries` — CSPRNG nonce wrapping for prompt injection defense
350
- - `--allowed-domains` — domain allowlist (HTTP + WebSocket/EventSource/sendBeacon)
351
- - `browse-policy.json` — action policy gate (allow/deny/confirm per command)
352
- - `auth save/login/list/delete` AES-256-GCM encrypted credential vault
366
+ - `--headed` flag run browser in visible mode for debugging and demos
367
+ - `clipboard [write <text>]` read and write clipboard contents
368
+ - `inspect` command open DevTools debugger via `BROWSE_DEBUG_PORT`
369
+ - `screenshot --annotate` pixel-annotated PNG with numbered badges
370
+ - `instances` command list all running browse servers
371
+ - `BROWSE_DEBUG_PORT` env var for DevTools debugging
372
+
373
+ ### v0.2.0 — Security, Interactions, DX
374
+
375
+ **Commands:**
353
376
  - `dblclick`, `focus`, `check`, `uncheck`, `drag`, `keydown`, `keyup` — interaction commands
354
377
  - `frame <sel>` / `frame main` — iframe targeting
355
378
  - `value <sel>`, `count <sel>` — element inspection
@@ -363,15 +386,41 @@ Inspired by and originally derived from the `/browse` skill in [gstack](https://
363
386
  - `har start/stop` — HAR recording and export
364
387
  - `screenshot-diff` — pixel-level visual regression testing
365
388
  - `find role/text/label/placeholder/testid` — semantic element locators
366
- - Auto-instance servers via PPID — multi-Claude isolation
367
- - Per-session output folders (`.browse/sessions/{id}/`)
389
+
390
+ **Security:**
391
+ - `--allowed-domains` — domain allowlist (HTTP + WebSocket/EventSource/sendBeacon)
392
+ - `browse-policy.json` — action policy gate (allow/deny/confirm per command)
393
+ - `auth save/login/list/delete` — AES-256-GCM encrypted credential vault
394
+ - `--content-boundaries` — CSPRNG nonce wrapping for prompt injection defense
395
+
396
+ **DX:**
397
+ - `--json` — structured output mode for agent frameworks
368
398
  - `browse.json` config file support
369
399
  - AI-friendly error messages — Playwright errors rewritten to actionable hints
400
+ - Per-session output folders (`.browse/sessions/{id}/`)
401
+
402
+ **Infrastructure:**
403
+ - Auto-instance servers via PPID — multi-Claude isolation
370
404
  - CDP remote connection (`BROWSE_CDP_URL`)
371
405
  - Proxy support (`BROWSE_PROXY`)
372
406
  - Compiled binary self-spawn mode
373
407
  - Orphaned server cleanup
374
408
 
409
+ ### v0.1.0 — Foundation
410
+
411
+ **Commands:**
412
+ - `emulate` / `devices` — device emulation (100+ devices)
413
+ - `snapshot -C` — cursor-interactive detection
414
+ - `snapshot-diff` — before/after comparison with ref-number stripping
415
+ - `dialog` / `dialog-accept` / `dialog-dismiss` — dialog handling
416
+ - `upload` — file upload
417
+ - `screenshot --annotate` — numbered badge overlay with legend
418
+
419
+ **Infrastructure:**
420
+ - Session multiplexing — multiple agents share one Chromium
421
+ - Safe retry classification — read vs write commands
422
+ - TreeWalker text extraction — no MutationObserver triggers
423
+
375
424
  ## License
376
425
 
377
426
  MIT
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@ulpi/browse",
3
- "version": "0.2.5",
3
+ "version": "0.3.0",
4
4
  "repository": {
5
5
  "type": "git",
6
6
  "url": "https://github.com/ulpi-io/browse"
package/skill/SKILL.md CHANGED
@@ -78,7 +78,10 @@ If the file is missing or does not contain browse permission rules in `permissio
78
78
  "Bash(browse route:*)", "Bash(browse offline:*)",
79
79
  "Bash(browse status:*)", "Bash(browse stop:*)", "Bash(browse restart:*)",
80
80
  "Bash(browse cookie:*)", "Bash(browse header:*)",
81
- "Bash(browse useragent:*)"
81
+ "Bash(browse useragent:*)",
82
+ "Bash(browse clipboard:*)", "Bash(browse screenshot-diff:*)",
83
+ "Bash(browse find:*)", "Bash(browse inspect:*)",
84
+ "Bash(browse instances:*)", "Bash(browse --headed:*)"
82
85
  ```
83
86
 
84
87
  ## IMPORTANT
@@ -201,6 +204,25 @@ browse emulate reset
201
204
  # Parallel sessions
202
205
  browse --session agent-a goto https://site1.com
203
206
  browse --session agent-b goto https://site2.com
207
+
208
+ # Clipboard
209
+ browse clipboard
210
+ browse clipboard write "copied text"
211
+
212
+ # Find elements semantically
213
+ browse find role button
214
+ browse find text "Submit"
215
+ browse find testid "login-btn"
216
+
217
+ # Screenshot diff (visual regression)
218
+ browse screenshot-diff baseline.png current.png
219
+
220
+ # Headed mode (visible browser)
221
+ browse --headed goto https://example.com
222
+
223
+ # State list / show
224
+ browse state list
225
+ browse state show mysite
204
226
  ```
205
227
 
206
228
  ## Command Reference
@@ -297,6 +319,8 @@ browse cookies Dump all cookies as JSON
297
319
  browse storage [set <k> <v>] View/set localStorage
298
320
  browse perf Page load performance timings
299
321
  browse devices [filter] List available device names
322
+ browse clipboard Read system clipboard text
323
+ browse clipboard write <text> Write text to system clipboard
300
324
  ```
301
325
 
302
326
  ### Visual
@@ -313,9 +337,19 @@ browse frame <selector> Target an iframe (subsequent commands run inside
313
337
  browse frame main Return to main page
314
338
  ```
315
339
 
340
+ ### Find (semantic element locators)
341
+ ```
342
+ browse find role <query> Find elements by ARIA role
343
+ browse find text <query> Find elements by text content
344
+ browse find label <query> Find elements by label
345
+ browse find placeholder <query> Find elements by placeholder
346
+ browse find testid <query> Find elements by test ID
347
+ ```
348
+
316
349
  ### Compare
317
350
  ```
318
- browse diff <url1> <url2> Text diff between two pages
351
+ browse diff <url1> <url2> Text diff between two pages
352
+ browse screenshot-diff <base> [curr] Pixel-diff two PNG screenshots
319
353
  ```
320
354
 
321
355
  ### Multi-step (chain)
@@ -342,6 +376,8 @@ browse session-close <id> Close a session
342
376
  ```
343
377
  browse state save [name] Save cookies + localStorage (all origins)
344
378
  browse state load [name] Restore saved state
379
+ browse state list List saved states
380
+ browse state show [name] Show contents of saved state
345
381
  ```
346
382
 
347
383
  ### Auth vault
@@ -364,6 +400,7 @@ browse status Server health, uptime, session count
364
400
  browse instances List all running browse servers (instance, PID, port, status)
365
401
  browse stop Shutdown server
366
402
  browse restart Kill + restart server
403
+ browse inspect Open DevTools (requires BROWSE_DEBUG_PORT)
367
404
  ```
368
405
 
369
406
  ## CLI Flags
@@ -374,6 +411,7 @@ browse restart Kill + restart server
374
411
  | `--json` | Wrap output as `{success, data, command}` |
375
412
  | `--content-boundaries` | Wrap page content in nonce-delimited markers (prompt injection defense) |
376
413
  | `--allowed-domains <d,d>` | Block navigation/resources outside allowlist |
414
+ | `--headed` | Run browser in headed (visible) mode |
377
415
 
378
416
  ## Speed Rules
379
417
 
@@ -421,6 +459,11 @@ browse restart Kill + restart server
421
459
  | Secure browsing | `--allowed-domains example.com goto https://example.com` |
422
460
  | Scroll through results | `scroll down` → `text` → `scroll down` → `text` |
423
461
  | Drag and drop | `drag @e1 @e2` |
462
+ | Read/write clipboard | `clipboard` / `clipboard write "text"` |
463
+ | Find by accessibility | `find role button` / `find text "Submit"` |
464
+ | Visual regression | `screenshot-diff baseline.png` |
465
+ | Debug with DevTools | `inspect` (set BROWSE_DEBUG_PORT first) |
466
+ | See the browser | `browse --headed goto <url>` |
424
467
 
425
468
  ## Architecture
426
469
 
@@ -187,6 +187,10 @@ export class BrowserManager {
187
187
  return this.buffers;
188
188
  }
189
189
 
190
+ getBrowser(): Browser | null {
191
+ return this.browser;
192
+ }
193
+
190
194
  getContext(): BrowserContext | null {
191
195
  return this.context;
192
196
  }
@@ -199,11 +203,9 @@ export class BrowserManager {
199
203
  this.browser = await chromium.launch({ headless: true });
200
204
  this.ownsBrowser = true;
201
205
 
202
- // Chromium crash → flush what we can, then exit
206
+ // Chromium crash → notify caller (server uses this to exit; tests ignore it)
203
207
  this.browser.on('disconnected', () => {
204
- console.error('[browse] FATAL: Chromium process crashed or was killed. Server exiting.');
205
208
  if (onCrash) onCrash();
206
- process.exit(1);
207
209
  });
208
210
 
209
211
  this.context = await this.browser.newContext({
package/src/cli.ts CHANGED
@@ -19,6 +19,7 @@ const cliFlags = {
19
19
  json: false,
20
20
  contentBoundaries: false,
21
21
  allowedDomains: '' as string,
22
+ headed: false,
22
23
  };
23
24
 
24
25
  const BROWSE_PORT = parseInt(process.env.BROWSE_PORT || '0', 10);
@@ -251,7 +252,7 @@ async function startServer(): Promise<ServerState> {
251
252
  : ['bun', 'run', SERVER_SCRIPT];
252
253
  const proc = Bun.spawn(spawnCmd, {
253
254
  stdio: ['ignore', 'pipe', 'pipe'],
254
- env: { ...process.env, __BROWSE_SERVER_MODE: '1', BROWSE_LOCAL_DIR: LOCAL_DIR, BROWSE_INSTANCE },
255
+ env: { ...process.env, __BROWSE_SERVER_MODE: '1', BROWSE_LOCAL_DIR: LOCAL_DIR, BROWSE_INSTANCE, ...(cliFlags.headed ? { BROWSE_HEADED: '1' } : {}) },
255
256
  });
256
257
 
257
258
  // Don't hold the CLI open
@@ -559,10 +560,20 @@ export async function main() {
559
560
  }
560
561
  allowedDomains = allowedDomains || process.env.BROWSE_ALLOWED_DOMAINS || (config.allowedDomains ? config.allowedDomains.join(',') : undefined);
561
562
 
563
+ // Extract --headed flag (only before command)
564
+ let headed = false;
565
+ const headedIdx = args.indexOf('--headed');
566
+ if (headedIdx !== -1 && headedIdx < findCommandIndex(args)) {
567
+ headed = true;
568
+ args.splice(headedIdx, 1);
569
+ }
570
+ headed = headed || process.env.BROWSE_HEADED === '1';
571
+
562
572
  // Set global flags for sendCommand()
563
573
  cliFlags.json = jsonMode;
564
574
  cliFlags.contentBoundaries = contentBoundaries;
565
575
  cliFlags.allowedDomains = allowedDomains || '';
576
+ cliFlags.headed = headed;
566
577
 
567
578
  // ─── Local commands (no server needed) ─────────────────────
568
579
  if (args[0] === 'instances') {
@@ -593,7 +604,7 @@ Device: emulate <device> | emulate reset | devices [filter]
593
604
  Inspection: js <expr> | eval <file> | css <sel> <prop> | attrs <sel>
594
605
  element-state <sel> | console [--clear] | network [--clear]
595
606
  cookies | storage [set <k> <v>] | perf
596
- value <sel> | count <sel>
607
+ value <sel> | count <sel> | clipboard [write <text>]
597
608
  Visual: screenshot [path] | pdf [path] | responsive [prefix]
598
609
  Snapshot: snapshot [-i] [-c] [-C] [-d N] [-s sel]
599
610
  Find: find role|text|label|placeholder|testid <query> [name]
@@ -607,6 +618,7 @@ Sessions: sessions | session-close <id>
607
618
  Auth: auth save <name> <url> <user> <pass|--password-stdin>
608
619
  auth login <name> | auth list | auth delete <name>
609
620
  State: state save|load|list|show [name]
621
+ Debug: inspect (requires BROWSE_DEBUG_PORT)
610
622
  Server: status | instances | cookie <n>=<v> | header <n>:<v>
611
623
  useragent <str> | stop | restart
612
624
  Setup: install-skill [path]
@@ -616,6 +628,7 @@ Options:
616
628
  --json Wrap output as {success, data, command}
617
629
  --content-boundaries Wrap page content in nonce-delimited markers
618
630
  --allowed-domains <d,d> Block navigation/resources outside allowlist
631
+ --headed Run browser in headed (visible) mode
619
632
 
620
633
  Snapshot flags:
621
634
  -i Interactive elements only (buttons, links, inputs)
@@ -346,7 +346,7 @@ export async function handleMetaCommand(
346
346
  const { PolicyChecker } = await import('../policy');
347
347
 
348
348
  const WRITE_SET = new Set(['goto','back','forward','reload','click','dblclick','fill','select','hover','focus','check','uncheck','type','press','scroll','wait','viewport','cookie','header','useragent','upload','dialog-accept','dialog-dismiss','emulate','drag','keydown','keyup','highlight','download','route','offline']);
349
- const READ_SET = new Set(['text','html','links','forms','accessibility','js','eval','css','attrs','element-state','dialog','console','network','cookies','storage','perf','devices','value','count']);
349
+ const READ_SET = new Set(['text','html','links','forms','accessibility','js','eval','css','attrs','element-state','dialog','console','network','cookies','storage','perf','devices','value','count','clipboard']);
350
350
 
351
351
  const sessionBuffers = currentSession?.buffers;
352
352
  const policy = new PolicyChecker();
@@ -516,10 +516,8 @@ export async function handleMetaCommand(
516
516
  const diffPath = extIdx > 0
517
517
  ? baseline.slice(0, extIdx) + '-diff' + baseline.slice(extIdx)
518
518
  : baseline + '-diff.png';
519
- if (!result.passed) {
520
- // Write current screenshot as the "what changed" artifact
521
- // (true pixel-diff image generation requires re-rendering differences)
522
- fs.writeFileSync(diffPath, currentBuffer);
519
+ if (!result.passed && result.diffImage) {
520
+ fs.writeFileSync(diffPath, result.diffImage);
523
521
  }
524
522
 
525
523
  return [
@@ -528,7 +526,7 @@ export async function handleMetaCommand(
528
526
  `Mismatch: ${result.mismatchPct.toFixed(3)}%`,
529
527
  `Threshold: ${thresholdPct}%`,
530
528
  `Result: ${result.passed ? 'PASS' : 'FAIL'}`,
531
- ...(!result.passed ? [`Current saved: ${diffPath}`] : []),
529
+ ...(!result.passed ? [`Diff saved: ${diffPath}`] : []),
532
530
  ].join('\n');
533
531
  }
534
532
 
@@ -684,6 +682,33 @@ export async function handleMetaCommand(
684
682
  return `Switched to frame: ${selector}`;
685
683
  }
686
684
 
685
+ // ─── DevTools Inspect ──────────────────────────────
686
+ case 'inspect': {
687
+ const debugPort = parseInt(process.env.BROWSE_DEBUG_PORT || '0', 10);
688
+ if (!debugPort) {
689
+ throw new Error(
690
+ 'DevTools inspect requires BROWSE_DEBUG_PORT to be set.\n' +
691
+ 'Restart with: BROWSE_DEBUG_PORT=9222 browse restart\n' +
692
+ 'Then run: browse inspect'
693
+ );
694
+ }
695
+ try {
696
+ const resp = await fetch(`http://127.0.0.1:${debugPort}/json`, { signal: AbortSignal.timeout(2000) });
697
+ const pages = await resp.json() as any[];
698
+ const currentUrl = bm.getCurrentUrl();
699
+ const target = pages.find((p: any) => p.url === currentUrl) || pages[0];
700
+ if (!target) throw new Error('No debuggable pages found');
701
+ return [
702
+ `DevTools URL: ${target.devtoolsFrontendUrl}`,
703
+ `Page: ${target.title} (${target.url})`,
704
+ `WebSocket: ${target.webSocketDebuggerUrl}`,
705
+ ].join('\n');
706
+ } catch (err: any) {
707
+ if (err.message.includes('BROWSE_DEBUG_PORT')) throw err;
708
+ throw new Error(`Cannot reach Chrome debug port at ${debugPort}: ${err.message}`);
709
+ }
710
+ }
711
+
687
712
  default:
688
713
  throw new Error(`Unknown meta command: ${command}`);
689
714
  }
@@ -314,6 +314,23 @@ export async function handleReadCommand(
314
314
  return String(count);
315
315
  }
316
316
 
317
+ case 'clipboard': {
318
+ if (args[0] === 'write') {
319
+ const text = args.slice(1).join(' ');
320
+ if (!text) throw new Error('Usage: browse clipboard write <text>');
321
+ await page.context().grantPermissions(['clipboard-read', 'clipboard-write']);
322
+ await evalCtx.evaluate((t) => navigator.clipboard.writeText(t), text);
323
+ return `Clipboard set: ${text.slice(0, 50)}${text.length > 50 ? '...' : ''}`;
324
+ }
325
+ await page.context().grantPermissions(['clipboard-read', 'clipboard-write']);
326
+ try {
327
+ const text = await evalCtx.evaluate(() => navigator.clipboard.readText());
328
+ return text || '(empty clipboard)';
329
+ } catch {
330
+ return '(clipboard not available)';
331
+ }
332
+ }
333
+
317
334
  case 'devices': {
318
335
  const filter = args.join(' ').toLowerCase();
319
336
  const all = listDevices();
@@ -1,10 +1,11 @@
1
1
  /**
2
- * Self-contained PNG decoder + pixel comparator.
3
- * No external deps — uses only zlib.inflateSync (Node/Bun built-in).
2
+ * Self-contained PNG decoder, encoder + pixel comparator.
3
+ * No external deps — uses only zlib (Node/Bun built-in).
4
4
  * Works in both dev mode (bun run) and compiled binary ($bunfs).
5
5
  *
6
- * Supports: 8-bit RGB (color type 2) and RGBA (color type 6).
6
+ * Decoder supports: 8-bit RGB (color type 2) and RGBA (color type 6).
7
7
  * Handles all 5 PNG scanline filter types (None/Sub/Up/Average/Paeth).
8
+ * Encoder outputs: 8-bit RGBA (color type 6), filter None, zlib-compressed.
8
9
  */
9
10
 
10
11
  import * as zlib from 'zlib';
@@ -22,6 +23,7 @@ export interface CompareResult {
22
23
  diffPixels: number;
23
24
  mismatchPct: number;
24
25
  passed: boolean;
26
+ diffImage?: Buffer;
25
27
  }
26
28
 
27
29
  export function decodePNG(buf: Buffer): DecodedImage {
@@ -96,6 +98,111 @@ export function decodePNG(buf: Buffer): DecodedImage {
96
98
  return { width, height, data: pixels };
97
99
  }
98
100
 
101
+ /**
102
+ * Encode a DecodedImage (RGBA pixels) into a PNG buffer.
103
+ * Uses filter type None (0) for simplicity — zlib handles compression.
104
+ */
105
+ export function encodePNG(img: DecodedImage): Buffer {
106
+ // Helper: write a PNG chunk (length + type + data + CRC32)
107
+ function writeChunk(type: string, data: Buffer): Buffer {
108
+ const chunk = Buffer.alloc(12 + data.length);
109
+ chunk.writeUInt32BE(data.length, 0);
110
+ chunk.write(type, 4, 4, 'ascii');
111
+ data.copy(chunk, 8);
112
+ // CRC32 covers type + data
113
+ const crcData = chunk.slice(4, 8 + data.length);
114
+ chunk.writeUInt32BE(zlib.crc32(crcData) >>> 0, 8 + data.length);
115
+ return chunk;
116
+ }
117
+
118
+ // PNG signature
119
+ const signature = Buffer.from(PNG_MAGIC);
120
+
121
+ // IHDR: width(4) + height(4) + bitDepth(1) + colorType(1) + compression(1) + filter(1) + interlace(1)
122
+ const ihdr = Buffer.alloc(13);
123
+ ihdr.writeUInt32BE(img.width, 0);
124
+ ihdr.writeUInt32BE(img.height, 4);
125
+ ihdr[8] = 8; // bit depth
126
+ ihdr[9] = 6; // color type: RGBA
127
+ ihdr[10] = 0; // compression method
128
+ ihdr[11] = 0; // filter method
129
+ ihdr[12] = 0; // no interlace
130
+
131
+ // IDAT: for each scanline, prepend filter byte 0 (None), then raw RGBA pixels
132
+ const rawStride = img.width * 4;
133
+ const rawData = Buffer.alloc(img.height * (1 + rawStride));
134
+ for (let y = 0; y < img.height; y++) {
135
+ const outOff = y * (1 + rawStride);
136
+ rawData[outOff] = 0; // filter type: None
137
+ img.data.copy(rawData, outOff + 1, y * rawStride, (y + 1) * rawStride);
138
+ }
139
+ const compressed = zlib.deflateSync(rawData);
140
+
141
+ // IEND: empty chunk
142
+ const iend = Buffer.alloc(0);
143
+
144
+ return Buffer.concat([
145
+ signature,
146
+ writeChunk('IHDR', ihdr),
147
+ writeChunk('IDAT', compressed),
148
+ writeChunk('IEND', iend),
149
+ ]);
150
+ }
151
+
152
+ /**
153
+ * Generate a visual diff image highlighting pixel differences.
154
+ * - Pixels only in one image (size mismatch): bright red (255,0,0,255)
155
+ * - Pixels differing beyond threshold: red-tinted (255, g/3, b/3, 255)
156
+ * - Pixels matching: dimmed (r/3, g/3, b/3, 128)
157
+ */
158
+ export function generateDiffImage(base: DecodedImage, curr: DecodedImage, colorThreshold: number): Buffer {
159
+ const w = Math.max(base.width, curr.width);
160
+ const h = Math.max(base.height, curr.height);
161
+ const diffData = Buffer.alloc(w * h * 4);
162
+ const colorThreshSq = colorThreshold * colorThreshold * 3;
163
+
164
+ for (let y = 0; y < h; y++) {
165
+ for (let x = 0; x < w; x++) {
166
+ const di = (y * w + x) * 4;
167
+ const inBase = x < base.width && y < base.height;
168
+ const inCurr = x < curr.width && y < curr.height;
169
+
170
+ if (!inBase || !inCurr) {
171
+ // Size mismatch — bright red
172
+ diffData[di] = 255;
173
+ diffData[di + 1] = 0;
174
+ diffData[di + 2] = 0;
175
+ diffData[di + 3] = 255;
176
+ continue;
177
+ }
178
+
179
+ const bi = (y * base.width + x) * 4;
180
+ const ci = (y * curr.width + x) * 4;
181
+ const dr = base.data[bi] - curr.data[ci];
182
+ const dg = base.data[bi + 1] - curr.data[ci + 1];
183
+ const db = base.data[bi + 2] - curr.data[ci + 2];
184
+ const distSq = dr * dr + dg * dg + db * db;
185
+ const isDiff = colorThreshold === 0 ? distSq > 0 : distSq > colorThreshSq;
186
+
187
+ if (isDiff) {
188
+ // Different — red-tinted using current image colors
189
+ diffData[di] = 255;
190
+ diffData[di + 1] = (curr.data[ci + 1] / 3) | 0;
191
+ diffData[di + 2] = (curr.data[ci + 2] / 3) | 0;
192
+ diffData[di + 3] = 255;
193
+ } else {
194
+ // Matching — dimmed
195
+ diffData[di] = (curr.data[ci] / 3) | 0;
196
+ diffData[di + 1] = (curr.data[ci + 1] / 3) | 0;
197
+ diffData[di + 2] = (curr.data[ci + 2] / 3) | 0;
198
+ diffData[di + 3] = 128;
199
+ }
200
+ }
201
+ }
202
+
203
+ return encodePNG({ width: w, height: h, data: diffData });
204
+ }
205
+
99
206
  export function compareScreenshots(
100
207
  baselineBuf: Buffer,
101
208
  currentBuf: Buffer,
@@ -129,10 +236,12 @@ export function compareScreenshots(
129
236
  }
130
237
 
131
238
  const mismatchPct = totalPixels > 0 ? (diffPixels / totalPixels) * 100 : 0;
132
- return {
133
- totalPixels,
134
- diffPixels,
135
- mismatchPct,
136
- passed: mismatchPct <= thresholdPct,
137
- };
239
+ const passed = mismatchPct <= thresholdPct;
240
+ const result: CompareResult = { totalPixels, diffPixels, mismatchPct, passed };
241
+
242
+ if (!passed) {
243
+ result.diffImage = generateDiffImage(base, curr, colorThreshold);
244
+ }
245
+
246
+ return result;
138
247
  }
package/src/server.ts CHANGED
@@ -26,6 +26,7 @@ export { type LogEntry, type NetworkEntry };
26
26
 
27
27
  // ─── Auth (inline) ─────────────────────────────────────────────
28
28
  const AUTH_TOKEN = crypto.randomUUID();
29
+ const DEBUG_PORT = parseInt(process.env.BROWSE_DEBUG_PORT || '0', 10);
29
30
  const BROWSE_PORT = parseInt(process.env.BROWSE_PORT || '0', 10); // 0 = auto-scan
30
31
  const BROWSE_INSTANCE = process.env.BROWSE_INSTANCE || '';
31
32
  const INSTANCE_SUFFIX = BROWSE_PORT ? `-${BROWSE_PORT}` : (BROWSE_INSTANCE ? `-${BROWSE_INSTANCE}` : '');
@@ -107,7 +108,7 @@ const READ_COMMANDS = new Set([
107
108
  'text', 'html', 'links', 'forms', 'accessibility',
108
109
  'js', 'eval', 'css', 'attrs', 'element-state', 'dialog',
109
110
  'console', 'network', 'cookies', 'storage', 'perf', 'devices',
110
- 'value', 'count',
111
+ 'value', 'count', 'clipboard',
111
112
  ]);
112
113
 
113
114
  const WRITE_COMMANDS = new Set([
@@ -128,7 +129,7 @@ const META_COMMANDS = new Set([
128
129
  'url', 'snapshot', 'snapshot-diff', 'screenshot-diff',
129
130
  'sessions', 'session-close',
130
131
  'frame', 'state', 'find',
131
- 'auth', 'har',
132
+ 'auth', 'har', 'inspect',
132
133
  ]);
133
134
 
134
135
  // Probe if a port is free using net.createServer (not Bun.serve which fatally crashes on EADDRINUSE)
@@ -373,7 +374,10 @@ async function start() {
373
374
  console.log(`[browse] Connected to remote Chrome via CDP: ${cdpUrl}`);
374
375
  } else {
375
376
  // Launch local Chromium
376
- const launchOptions: Record<string, any> = { headless: true };
377
+ const launchOptions: Record<string, any> = { headless: process.env.BROWSE_HEADED !== '1' };
378
+ if (DEBUG_PORT > 0) {
379
+ launchOptions.args = [`--remote-debugging-port=${DEBUG_PORT}`];
380
+ }
377
381
  const proxyServer = process.env.BROWSE_PROXY;
378
382
  if (proxyServer) {
379
383
  launchOptions.proxy = { server: proxyServer };
@@ -383,17 +387,11 @@ async function start() {
383
387
  }
384
388
  browser = await chromium.launch(launchOptions);
385
389
 
386
- // Chromium crash → flush, cleanup, exit (only for owned browser)
390
+ // Chromium crash → clean shutdown (only for owned browser)
387
391
  browser.on('disconnected', () => {
388
- console.error('[browse] FATAL: Chromium process crashed or was killed. Server exiting.');
389
- if (sessionManager) flushAllBuffers(sessionManager, true);
390
- try {
391
- const currentState = JSON.parse(fs.readFileSync(STATE_FILE, 'utf-8'));
392
- if (currentState.pid === process.pid || currentState.token === AUTH_TOKEN) {
393
- fs.unlinkSync(STATE_FILE);
394
- }
395
- } catch {}
396
- process.exit(1);
392
+ if (isShuttingDown) return;
393
+ console.error('[browse] Chromium disconnected. Shutting down.');
394
+ shutdown();
397
395
  });
398
396
  }
399
397
 
@@ -445,13 +443,16 @@ async function start() {
445
443
  });
446
444
 
447
445
  // Write state file
448
- const state = {
446
+ const state: Record<string, any> = {
449
447
  pid: process.pid,
450
448
  port,
451
449
  token: AUTH_TOKEN,
452
450
  startedAt: new Date().toISOString(),
453
451
  serverPath: path.resolve(import.meta.dir, 'server.ts'),
454
452
  };
453
+ if (DEBUG_PORT > 0) {
454
+ state.debugPort = DEBUG_PORT;
455
+ }
455
456
  fs.writeFileSync(STATE_FILE, JSON.stringify(state, null, 2), { mode: 0o600 });
456
457
 
457
458
  console.log(`[browse] Server running on http://127.0.0.1:${port} (PID: ${process.pid})`);