barebrowse 0.3.3 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -6,7 +6,7 @@ allowed-tools: Bash(barebrowse:*)
6
6
 
7
7
  # barebrowse CLI — Browser Automation for Agents
8
8
 
9
- Browse any URL using the user's real browser with real cookies. Returns pruned ARIA snapshots (40-90% smaller than raw) with `[ref=N]` markers for interaction. Handles cookie consent, login sessions, and bot detection automatically.
9
+ Browse any URL using the user's real browser with real cookies. Returns pruned ARIA snapshots (40-90% smaller than raw) with `[ref=N]` markers for interaction. Handles cookie consent, login sessions, JS dialogs, and bot detection automatically.
10
10
 
11
11
  ## Quick Start
12
12
 
@@ -36,15 +36,21 @@ All output files go to `.barebrowse/` in the current directory. Read them with t
36
36
  - `--browser=firefox|chromium` — Cookie source
37
37
  - `--prune-mode=act|read` — Default pruning mode
38
38
  - `--timeout=N` — Navigation timeout in ms
39
+ - `--proxy=URL` — HTTP/SOCKS proxy server
40
+ - `--viewport=WxH` — Viewport size (e.g. 1280x720)
41
+ - `--storage-state=FILE` — Load cookies/localStorage from JSON file
39
42
 
40
43
  ### Navigation
41
44
 
42
45
  | Command | Output |
43
46
  |---------|--------|
44
47
  | `barebrowse goto <url>` | Navigates, waits for load, dismisses consent. Prints "ok". |
48
+ | `barebrowse back` | Go back in browser history. |
49
+ | `barebrowse forward` | Go forward in browser history. |
45
50
  | `barebrowse snapshot` | ARIA snapshot → `.barebrowse/page-<timestamp>.yml` |
46
51
  | `barebrowse snapshot --mode=read` | Read mode: keeps all text (for content extraction) |
47
52
  | `barebrowse screenshot` | Screenshot → `.barebrowse/screenshot-<timestamp>.png` |
53
+ | `barebrowse pdf [--landscape]` | PDF export → `.barebrowse/page-<timestamp>.pdf` |
48
54
 
49
55
  ### Interaction
50
56
 
@@ -57,6 +63,15 @@ All output files go to `.barebrowse/` in the current directory. Read them with t
57
63
  | `barebrowse scroll <deltaY>` | Scroll page (positive=down, negative=up) |
58
64
  | `barebrowse hover <ref>` | Hover over element (triggers tooltips) |
59
65
  | `barebrowse select <ref> <value>` | Select dropdown option |
66
+ | `barebrowse drag <fromRef> <toRef>` | Drag element to another element |
67
+ | `barebrowse upload <ref> <files..>` | Upload file(s) to a file input element |
68
+
69
+ ### Tabs
70
+
71
+ | Command | Description |
72
+ |---------|-------------|
73
+ | `barebrowse tabs` | List open tabs (index, url, title) |
74
+ | `barebrowse tab <index>` | Switch to tab by index |
60
75
 
61
76
  ### Debugging
62
77
 
@@ -64,9 +79,17 @@ All output files go to `.barebrowse/` in the current directory. Read them with t
64
79
  |---------|--------|
65
80
  | `barebrowse eval <expression>` | Evaluate JS in page, print result |
66
81
  | `barebrowse wait-idle` | Wait for network idle (no requests for 500ms) |
82
+ | `barebrowse wait-for [opts]` | Wait for content to appear on page |
67
83
  | `barebrowse console-logs` | Console logs → `.barebrowse/console-<timestamp>.json` |
68
84
  | `barebrowse network-log` | Network log → `.barebrowse/network-<timestamp>.json` |
69
85
  | `barebrowse network-log --failed` | Only failed/4xx/5xx requests |
86
+ | `barebrowse dialog-log` | JS dialog log → `.barebrowse/dialogs-<timestamp>.json` |
87
+ | `barebrowse save-state` | Cookies + localStorage → `.barebrowse/state-<timestamp>.json` |
88
+
89
+ **wait-for flags:**
90
+ - `--text=STRING` — Wait for text to appear in page body
91
+ - `--selector=CSS` — Wait for CSS selector to match
92
+ - `--timeout=N` — Max wait time in ms (default: 30000)
70
93
 
71
94
  ## Snapshot Format
72
95
 
@@ -80,7 +103,7 @@ The snapshot is a YAML-like ARIA tree. Each line is one node:
80
103
  - link "More information..." [ref=8]
81
104
  ```
82
105
 
83
- - `[ref=N]` — Use this number with click, type, fill, hover, select
106
+ - `[ref=N]` — Use this number with click, type, fill, hover, select, drag, upload
84
107
  - Refs change on every snapshot — always take a fresh snapshot before interacting
85
108
  - **act mode** (default): interactive elements + labels — for clicking, typing, navigating
86
109
  - **read mode**: all text content — for reading articles, extracting data
@@ -90,7 +113,7 @@ The snapshot is a YAML-like ARIA tree. Each line is one node:
90
113
  1. `barebrowse open <url>` — start session
91
114
  2. `barebrowse snapshot` — observe page (read the .yml file)
92
115
  3. Decide action based on snapshot content
93
- 4. `barebrowse click/type/fill/press/scroll <ref>` — act
116
+ 4. `barebrowse click/type/fill/press/scroll/drag/upload <ref>` — act
94
117
  5. `barebrowse snapshot` — observe result (refs are now different!)
95
118
  6. Repeat 3-5 until goal achieved
96
119
  7. `barebrowse close` — clean up
@@ -100,6 +123,11 @@ The snapshot is a YAML-like ARIA tree. Each line is one node:
100
123
  - **Always snapshot before interacting** — refs are ephemeral and change every time
101
124
  - **Use `fill` instead of `type`** when replacing existing text in input fields
102
125
  - **Use `--mode=read`** for snapshot when you need to extract article content or data
126
+ - **Use `back`/`forward`** to navigate browser history instead of re-entering URLs
127
+ - **Use `upload`** for file inputs — pass absolute paths to the files
128
+ - **Use `wait-for`** when content loads asynchronously — more reliable than `wait-idle`
129
+ - **Check `dialog-log`** if JS alerts/confirms were auto-dismissed during your session
130
+ - **Use `save-state`** to persist cookies/localStorage for later sessions via `--storage-state`
103
131
  - **Check `console-logs`** when page behavior seems wrong — JS errors show up there
104
132
  - **Check `network-log --failed`** to debug missing content or broken API calls
105
133
  - **Use `eval`** as an escape hatch when ARIA tree doesn't show what you need
package/CHANGELOG.md CHANGED
@@ -1,5 +1,45 @@
1
1
  # Changelog
2
2
 
3
+ ## 0.4.0
4
+
5
+ 10 new features inspired by Playwright MCP. All validated manually against live sites.
6
+
7
+ ### New commands
8
+ - `back` / `forward` — Browser history navigation via `Page.getNavigationHistory`
9
+ - `drag <fromRef> <toRef>` — Drag-and-drop between elements (Kanban boards, sliders)
10
+ - `upload <ref> <files..>` — File upload via `DOM.setFileInputFiles`
11
+ - `pdf [--landscape]` — PDF export via `Page.printToPDF`
12
+ - `tabs` / `tab <index>` — List and switch between browser tabs
13
+ - `wait-for --text=X --selector=Y` — Poll for content to appear on page
14
+ - `save-state` — Export cookies + localStorage to JSON
15
+ - `dialog-log` — View auto-dismissed JS dialog history
16
+
17
+ ### New open flags
18
+ - `--proxy=URL` — HTTP/SOCKS proxy server (pass-through to Chromium launch args)
19
+ - `--viewport=WxH` — Set viewport dimensions via `Emulation.setDeviceMetricsOverride`
20
+ - `--storage-state=FILE` — Load cookies/localStorage from previously saved JSON
21
+
22
+ ### Built-in behavior
23
+ - JS dialog auto-dismiss — alert/confirm/prompt handled via `Page.handleJavaScriptDialog`, logged to `dialogLog`
24
+
25
+ ### Library API additions (connect())
26
+ - `goBack()`, `goForward()`, `drag(fromRef, toRef)`, `upload(ref, files)`
27
+ - `pdf(opts)`, `tabs()`, `switchTab(index)`, `waitFor({ text, selector, timeout })`
28
+ - `saveState(filePath)`, `dialogLog` array
29
+ - New connect opts: `proxy`, `viewport`, `storageState`
30
+
31
+ ### MCP server
32
+ - 5 new tools: `back`, `forward`, `drag`, `upload`, `pdf` (12 total, was 7)
33
+
34
+ ### bareagent adapter
35
+ - 4 new tools: `back`, `forward`, `drag`, `upload` (13 total, was 9)
36
+
37
+ ### Docs
38
+ - SKILL.md updated with all new commands and flags
39
+ - README: new actions table, dialog handling in obstacle course
40
+ - barebrowse.context.md: full connect() API table updated
41
+ - docs/00-context/system-state.md: actions + obstacle tables updated
42
+
3
43
  ## 0.3.3
4
44
 
5
45
  - Simplified skill install paths: Claude Code (`.claude/` project, `~/.claude/` global), other agents (`.barebrowse/commands/` project, `~/.config/barebrowse/commands/` global)
package/README.md CHANGED
@@ -123,6 +123,7 @@ This is the obstacle course your agent doesn't have to think about:
123
123
  | **SPA navigation** (YouTube, GitHub) | SPA-aware wait: frameNavigated + loadEventFired | Both |
124
124
  | **Bot detection** (Google, Reddit) | Stealth patches (headless) + headed fallback with real cookies | Both |
125
125
  | **navigator.webdriver leak** | Patched before page scripts run: webdriver, plugins, languages, chrome object | Headless |
126
+ | **JS dialogs** (alert/confirm/prompt) | Auto-dismiss via CDP, logged for inspection | Both |
126
127
  | **Profile locking** | Unique temp dir per headless instance | Headless |
127
128
  | **ARIA noise** | 9-step pruning pipeline (ported from mcprune): wrapper collapse, noise removal, landmark promotion | Both |
128
129
 
@@ -146,6 +147,7 @@ Everything the agent can do through barebrowse:
146
147
  | Action | What it does |
147
148
  |--------|-------------|
148
149
  | **Navigate** | Load a URL, wait for page load, auto-dismiss consent |
150
+ | **Back / Forward** | Browser history navigation |
149
151
  | **Snapshot** | Pruned ARIA tree with `[ref=N]` markers (40-90% token reduction) |
150
152
  | **Click** | Scroll into view + mouse click at element center |
151
153
  | **Type** | Focus + insert text, with option to clear existing content first |
@@ -153,9 +155,16 @@ Everything the agent can do through barebrowse:
153
155
  | **Scroll** | Mouse wheel up or down |
154
156
  | **Hover** | Move mouse to element center (triggers tooltips, hover states) |
155
157
  | **Select** | Set dropdown value (native select or custom dropdown) |
158
+ | **Drag** | Drag one element to another (Kanban boards, sliders) |
159
+ | **Upload** | Set files on a file input element |
156
160
  | **Screenshot** | Page capture as base64 PNG/JPEG/WebP |
161
+ | **PDF** | Export page as PDF |
162
+ | **Tabs** | List open tabs, switch between them |
163
+ | **Wait for content** | Poll for text or CSS selector to appear on page |
157
164
  | **Wait for navigation** | SPA-aware: works for full page loads and pushState |
158
165
  | **Wait for network idle** | Resolve when no pending requests for 500ms |
166
+ | **Dialog handling** | Auto-dismiss JS alert/confirm/prompt dialogs |
167
+ | **Save state** | Export cookies + localStorage to JSON |
159
168
  | **Inject cookies** | Extract from Firefox/Chromium and inject via CDP |
160
169
  | **Raw CDP** | Escape hatch for any Chrome DevTools Protocol command |
161
170
 
@@ -56,6 +56,8 @@ const snapshot = await browse('https://example.com', {
56
56
  | Method | Args | Returns | Notes |
57
57
  |---|---|---|---|
58
58
  | `goto(url, timeout?)` | url: string, timeout: number (default 30000) | void | Navigate + wait for load + dismiss consent |
59
+ | `goBack()` | -- | void | Navigate back in browser history |
60
+ | `goForward()` | -- | void | Navigate forward in browser history |
59
61
  | `snapshot(pruneOpts?)` | false or { mode: 'act'\|'read' } | string | ARIA tree with `[ref=N]` markers. Pass `false` for raw. |
60
62
  | `click(ref)` | ref: string | void | Scroll into view + mouse press+release at center |
61
63
  | `type(ref, text, opts?)` | ref: string, text: string, opts: { clear?, keyEvents? } | void | Focus + insert text. `clear: true` replaces existing. |
@@ -63,13 +65,26 @@ const snapshot = await browse('https://example.com', {
63
65
  | `scroll(deltaY)` | deltaY: number | void | Mouse wheel. Positive = down, negative = up. |
64
66
  | `hover(ref)` | ref: string | void | Move mouse to element center |
65
67
  | `select(ref, value)` | ref: string, value: string | void | Set `<select>` value or click custom dropdown option |
68
+ | `drag(fromRef, toRef)` | fromRef: string, toRef: string | void | Drag from one element to another |
69
+ | `upload(ref, files)` | ref: string, files: string[] | void | Set files on a file input (absolute paths) |
66
70
  | `screenshot(opts?)` | { format?: 'png'\|'jpeg'\|'webp', quality?: number } | string (base64) | Page screenshot |
71
+ | `pdf(opts?)` | { landscape?: boolean } | string (base64) | Export page as PDF |
72
+ | `tabs()` | -- | Array<{index, url, title, targetId}> | List open browser tabs |
73
+ | `switchTab(index)` | index: number | void | Switch to tab by index |
74
+ | `waitFor(opts)` | { text?: string, selector?: string, timeout?: number } | void | Poll for content to appear on page |
67
75
  | `waitForNavigation(timeout?)` | timeout: number (default 30000) | void | Wait for page load or frame navigation |
68
76
  | `waitForNetworkIdle(opts?)` | { timeout?: number, idle?: number } | void | Wait until no pending requests for `idle` ms (default 500) |
77
+ | `saveState(filePath)` | filePath: string | void | Export cookies + localStorage to JSON file |
69
78
  | `injectCookies(url, opts?)` | url: string, { browser?: string } | void | Extract cookies from user's browser and inject via CDP |
79
+ | `dialogLog` | -- | Array<{type, message, timestamp}> | Auto-dismissed JS dialog history |
70
80
  | `cdp` | -- | object | Raw CDP session for escape hatch: `page.cdp.send(method, params)` |
71
81
  | `close()` | -- | void | Close page, disconnect CDP, kill browser (if headless) |
72
82
 
83
+ **connect() options** (in addition to mode/port/consent):
84
+ - `proxy: 'http://...'` — HTTP/SOCKS proxy for browser
85
+ - `viewport: '1280x720'` — Set viewport dimensions
86
+ - `storageState: 'file.json'` — Load cookies/localStorage from saved state
87
+
73
88
  ## Snapshot format
74
89
 
75
90
  The snapshot is a YAML-like ARIA tree. Each line is one node:
package/cli.js CHANGED
@@ -2,35 +2,7 @@
2
2
  /**
3
3
  * cli.js -- barebrowse CLI entry point.
4
4
  *
5
- * Session commands:
6
- * barebrowse open [url] [flags] Open browser session (daemon)
7
- * barebrowse close Close session + kill daemon
8
- * barebrowse status Check if session is running
9
- *
10
- * Navigation:
11
- * barebrowse goto <url> Navigate to URL
12
- * barebrowse snapshot [--mode] Get pruned ARIA snapshot → file
13
- * barebrowse screenshot [--format] Take screenshot → file
14
- *
15
- * Interaction:
16
- * barebrowse click <ref> Click element by ref
17
- * barebrowse type <ref> <text> Type text into element
18
- * barebrowse fill <ref> <text> Clear + type (replace content)
19
- * barebrowse press <key> Press special key
20
- * barebrowse scroll <deltaY> Scroll page
21
- * barebrowse hover <ref> Hover over element
22
- * barebrowse select <ref> <value> Select dropdown value
23
- *
24
- * Self-sufficiency:
25
- * barebrowse eval <expression> Evaluate JS in page
26
- * barebrowse wait-idle [--timeout] Wait for network idle
27
- * barebrowse console-logs Dump console logs → file
28
- * barebrowse network-log Dump network log → file
29
- *
30
- * Legacy / tools:
31
- * barebrowse browse <url> [mode] One-shot browse (stdout)
32
- * barebrowse mcp Start MCP server (stdio)
33
- * barebrowse install [--skill] Auto-configure MCP or install skill
5
+ * See `barebrowse` (no args) for full command reference.
34
6
  */
35
7
 
36
8
  import { existsSync, readFileSync, writeFileSync, mkdirSync, copyFileSync } from 'node:fs';
@@ -84,6 +56,26 @@ if (args.includes('--daemon-internal')) {
84
56
  await cmdProxy('console-logs', { level: parseFlag('--level'), clear: hasFlag('--clear') });
85
57
  } else if (cmd === 'network-log') {
86
58
  await cmdProxy('network-log', { failed: hasFlag('--failed') });
59
+ } else if (cmd === 'back') {
60
+ await cmdProxy('back');
61
+ } else if (cmd === 'forward') {
62
+ await cmdProxy('forward');
63
+ } else if (cmd === 'drag' && args[1] && args[2]) {
64
+ await cmdProxy('drag', { fromRef: args[1], toRef: args[2] });
65
+ } else if (cmd === 'upload' && args[1] && args[2]) {
66
+ await cmdProxy('upload', { ref: args[1], files: args.slice(2).filter(a => !a.startsWith('--')).map(f => resolve(f)) });
67
+ } else if (cmd === 'pdf') {
68
+ await cmdProxy('pdf', { landscape: hasFlag('--landscape') });
69
+ } else if (cmd === 'tabs') {
70
+ await cmdProxy('tabs');
71
+ } else if (cmd === 'tab' && args[1]) {
72
+ await cmdProxy('tab', { index: Number(args[1]) });
73
+ } else if (cmd === 'wait-for') {
74
+ await cmdProxy('wait-for', { text: parseFlag('--text'), selector: parseFlag('--selector'), timeout: parseFlag('--timeout') });
75
+ } else if (cmd === 'save-state') {
76
+ await cmdProxy('save-state');
77
+ } else if (cmd === 'dialog-log') {
78
+ await cmdProxy('dialog-log');
87
79
  } else {
88
80
  printUsage();
89
81
  }
@@ -111,6 +103,9 @@ async function cmdOpen() {
111
103
  timeout: parseFlag('--timeout'),
112
104
  pruneMode: parseFlag('--prune-mode') || 'act',
113
105
  consent: !hasFlag('--no-consent'),
106
+ proxy: parseFlag('--proxy'),
107
+ viewport: parseFlag('--viewport'),
108
+ storageState: parseFlag('--storage-state'),
114
109
  };
115
110
 
116
111
  try {
@@ -208,6 +203,9 @@ async function runDaemonInternal() {
208
203
  timeout: parseFlag('--timeout'),
209
204
  pruneMode: parseFlag('--prune-mode') || 'act',
210
205
  consent: !hasFlag('--no-consent'),
206
+ proxy: parseFlag('--proxy'),
207
+ viewport: parseFlag('--viewport'),
208
+ storageState: parseFlag('--storage-state'),
211
209
  };
212
210
  const outputDir = parseFlag('--output-dir') || resolve('.barebrowse');
213
211
  const url = parseFlag('--url');
@@ -360,11 +358,17 @@ Session:
360
358
  --timeout=N Navigation timeout in ms
361
359
  --prune-mode=act|read Default pruning mode
362
360
  --no-consent Skip consent dismissal
361
+ --proxy=URL HTTP/SOCKS proxy server
362
+ --viewport=WxH Viewport size (e.g. 1280x720)
363
+ --storage-state=FILE Load cookies/localStorage from JSON file
363
364
 
364
365
  Navigation:
365
366
  barebrowse goto <url> Navigate to URL
367
+ barebrowse back Go back in history
368
+ barebrowse forward Go forward in history
366
369
  barebrowse snapshot [--mode=M] ARIA snapshot -> .barebrowse/page-*.yml
367
370
  barebrowse screenshot [--format] Screenshot -> .barebrowse/screenshot-*.png
371
+ barebrowse pdf [--landscape] PDF export -> .barebrowse/page-*.pdf
368
372
 
369
373
  Interaction:
370
374
  barebrowse click <ref> Click element
@@ -374,12 +378,24 @@ Interaction:
374
378
  barebrowse scroll <deltaY> Scroll (positive=down)
375
379
  barebrowse hover <ref> Hover element
376
380
  barebrowse select <ref> <value> Select dropdown value
381
+ barebrowse drag <from> <to> Drag element to another
382
+ barebrowse upload <ref> <files..> Upload files to file input
383
+
384
+ Tabs:
385
+ barebrowse tabs List open tabs
386
+ barebrowse tab <index> Switch to tab by index
377
387
 
378
388
  Debugging:
379
389
  barebrowse eval <expression> Run JS in page context
380
390
  barebrowse wait-idle [--timeout] Wait for network idle
391
+ barebrowse wait-for [opts] Wait for text/selector to appear
392
+ --text=STRING Wait for text in page body
393
+ --selector=CSS Wait for CSS selector to match
394
+ --timeout=N Max wait time in ms (default: 30000)
381
395
  barebrowse console-logs Console logs -> .barebrowse/console-*.json
382
396
  barebrowse network-log Network log -> .barebrowse/network-*.json
397
+ barebrowse dialog-log JS dialog log -> .barebrowse/dialogs-*.json
398
+ barebrowse save-state Cookies + localStorage -> .barebrowse/state-*.json
383
399
 
384
400
  One-shot:
385
401
  barebrowse browse <url> [mode] Browse + print snapshot to stdout
@@ -48,6 +48,14 @@ Every action returns a **pruned ARIA snapshot** -- the agent's view of the page
48
48
  | Screenshot | `page.screenshot(opts)` | `Page.captureScreenshot`, returns base64 string | Done |
49
49
  | Wait for nav | `page.waitForNavigation()` | Promise.race of loadEventFired + frameNavigated (SPA-aware) | Done |
50
50
  | Wait for idle | `page.waitForNetworkIdle(opts)` | Resolve when no pending requests for N ms (default 500) | Done |
51
+ | Wait for content | `page.waitFor({ text, selector })` | Poll for text or CSS selector to appear on page | Done |
52
+ | Back / Forward | `page.goBack()` / `page.goForward()` | Browser history navigation via `Page.getNavigationHistory` | Done |
53
+ | Drag | `page.drag(fromRef, toRef)` | Mouse down on source, move to target, release | Done |
54
+ | Upload | `page.upload(ref, files)` | Set files on file input via `DOM.setFileInputFiles` | Done |
55
+ | PDF | `page.pdf(opts)` | Export page as PDF via `Page.printToPDF` | Done |
56
+ | Tabs | `page.tabs()` / `page.switchTab(index)` | List and switch between browser tabs | Done |
57
+ | Dialog handling | Auto | JS alert/confirm/prompt auto-dismissed, logged to `page.dialogLog` | Done |
58
+ | Save state | `page.saveState(filePath)` | Export cookies + localStorage to JSON for later `--storage-state` | Done |
51
59
  | Inject cookies | `page.injectCookies(url, opts)` | Extract cookies from Firefox/Chromium, inject via CDP | Done |
52
60
  | Raw CDP | `page.cdp.send(method, params)` | Escape hatch for any CDP command | Done |
53
61
  | Close | `page.close()` | Close page target, disconnect CDP, kill browser (if headless) | Done |
@@ -70,6 +78,7 @@ Every action returns a **pruned ARIA snapshot** -- the agent's view of the page
70
78
  | **SPA navigation** (YouTube, GitHub) | `waitForNavigation()` uses frameNavigated + loadEventFired race | Both |
71
79
  | **Bot detection** (Google, Reddit) | Stealth patches (headless) + headed mode with real cookies | Both |
72
80
  | **`navigator.webdriver`** | Stealth patches: webdriver, plugins, languages, chrome object | Headless |
81
+ | **JS dialogs** (alert/confirm/prompt) | Auto-dismiss via `Page.handleJavaScriptDialog`, logged to `dialogLog` | Both |
73
82
  | **Profile locking** | Unique temp dir per headless instance (`/tmp/barebrowse-<pid>-<ts>`) | Headless |
74
83
  | **ARIA noise** | 9-step pruning: wrapper collapse, noise removal, landmark promotion | Both |
75
84
 
package/mcp-server.js CHANGED
@@ -3,7 +3,7 @@
3
3
  * mcp-server.js — MCP server for barebrowse.
4
4
  *
5
5
  * Raw JSON-RPC 2.0 over stdio. No SDK dependency.
6
- * 7 tools: browse (one-shot), goto, snapshot, click, type, press, scroll.
6
+ * 12 tools: browse, goto, snapshot, click, type, press, scroll, back, forward, drag, upload, pdf.
7
7
  *
8
8
  * Session tools share a singleton page, lazy-created on first use.
9
9
  * Action tools return 'ok' — agent calls snapshot explicitly to observe.
@@ -93,6 +93,50 @@ const TOOLS = [
93
93
  required: ['deltaY'],
94
94
  },
95
95
  },
96
+ {
97
+ name: 'back',
98
+ description: 'Go back in browser history. Returns ok.',
99
+ inputSchema: { type: 'object', properties: {} },
100
+ },
101
+ {
102
+ name: 'forward',
103
+ description: 'Go forward in browser history. Returns ok.',
104
+ inputSchema: { type: 'object', properties: {} },
105
+ },
106
+ {
107
+ name: 'drag',
108
+ description: 'Drag one element to another by refs from the snapshot. Returns ok.',
109
+ inputSchema: {
110
+ type: 'object',
111
+ properties: {
112
+ fromRef: { type: 'string', description: 'Source element ref' },
113
+ toRef: { type: 'string', description: 'Target element ref' },
114
+ },
115
+ required: ['fromRef', 'toRef'],
116
+ },
117
+ },
118
+ {
119
+ name: 'upload',
120
+ description: 'Upload files to a file input element by ref. Returns ok.',
121
+ inputSchema: {
122
+ type: 'object',
123
+ properties: {
124
+ ref: { type: 'string', description: 'File input element ref' },
125
+ files: { type: 'array', items: { type: 'string' }, description: 'Absolute file paths' },
126
+ },
127
+ required: ['ref', 'files'],
128
+ },
129
+ },
130
+ {
131
+ name: 'pdf',
132
+ description: 'Export current page as PDF. Returns base64-encoded PDF data.',
133
+ inputSchema: {
134
+ type: 'object',
135
+ properties: {
136
+ landscape: { type: 'boolean', description: 'Landscape orientation (default: false)' },
137
+ },
138
+ },
139
+ },
96
140
  ];
97
141
 
98
142
  async function handleToolCall(name, args) {
@@ -129,6 +173,30 @@ async function handleToolCall(name, args) {
129
173
  await page.scroll(args.deltaY);
130
174
  return 'ok';
131
175
  }
176
+ case 'back': {
177
+ const page = await getPage();
178
+ await page.goBack();
179
+ return 'ok';
180
+ }
181
+ case 'forward': {
182
+ const page = await getPage();
183
+ await page.goForward();
184
+ return 'ok';
185
+ }
186
+ case 'drag': {
187
+ const page = await getPage();
188
+ await page.drag(args.fromRef, args.toRef);
189
+ return 'ok';
190
+ }
191
+ case 'upload': {
192
+ const page = await getPage();
193
+ await page.upload(args.ref, args.files);
194
+ return 'ok';
195
+ }
196
+ case 'pdf': {
197
+ const page = await getPage();
198
+ return await page.pdf({ landscape: args.landscape });
199
+ }
132
200
  default:
133
201
  throw new Error(`Unknown tool: ${name}`);
134
202
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "barebrowse",
3
- "version": "0.3.3",
3
+ "version": "0.4.0",
4
4
  "description": "Authenticated web browsing for autonomous agents via CDP. URL in, pruned ARIA snapshot out.",
5
5
  "type": "module",
6
6
  "main": "src/index.js",
package/src/bareagent.js CHANGED
@@ -133,6 +133,44 @@ export function createBrowseTools(opts = {}) {
133
133
  },
134
134
  execute: async ({ ref, value }) => actionAndSnapshot((page) => page.select(ref, value)),
135
135
  },
136
+ {
137
+ name: 'back',
138
+ description: 'Go back in browser history. Returns the updated snapshot.',
139
+ parameters: { type: 'object', properties: {} },
140
+ execute: async () => actionAndSnapshot((page) => page.goBack()),
141
+ },
142
+ {
143
+ name: 'forward',
144
+ description: 'Go forward in browser history. Returns the updated snapshot.',
145
+ parameters: { type: 'object', properties: {} },
146
+ execute: async () => actionAndSnapshot((page) => page.goForward()),
147
+ },
148
+ {
149
+ name: 'drag',
150
+ description: 'Drag one element to another by their refs. Returns the updated snapshot.',
151
+ parameters: {
152
+ type: 'object',
153
+ properties: {
154
+ fromRef: { type: 'string', description: 'Source element ref' },
155
+ toRef: { type: 'string', description: 'Target element ref' },
156
+ },
157
+ required: ['fromRef', 'toRef'],
158
+ },
159
+ execute: async ({ fromRef, toRef }) => actionAndSnapshot((page) => page.drag(fromRef, toRef)),
160
+ },
161
+ {
162
+ name: 'upload',
163
+ description: 'Upload files to a file input element. Returns the updated snapshot.',
164
+ parameters: {
165
+ type: 'object',
166
+ properties: {
167
+ ref: { type: 'string', description: 'File input element ref' },
168
+ files: { type: 'array', items: { type: 'string' }, description: 'Absolute file paths' },
169
+ },
170
+ required: ['ref', 'files'],
171
+ },
172
+ execute: async ({ ref, files }) => actionAndSnapshot((page) => page.upload(ref, files)),
173
+ },
136
174
  {
137
175
  name: 'screenshot',
138
176
  description: 'Take a screenshot of the current page. Returns base64-encoded image.',
package/src/chromium.js CHANGED
@@ -84,6 +84,10 @@ export async function launch(opts = {}) {
84
84
  '--disable-features=MediaRouter',
85
85
  ];
86
86
 
87
+ if (opts.proxy) {
88
+ args.push(`--proxy-server=${opts.proxy}`);
89
+ }
90
+
87
91
  if (opts.userDataDir) {
88
92
  args.push(`--user-data-dir=${opts.userDataDir}`);
89
93
  } else {
package/src/daemon.js CHANGED
@@ -36,6 +36,9 @@ export async function startDaemon(opts, outputDir, initialUrl) {
36
36
  if (opts.timeout) args.push('--timeout', String(opts.timeout));
37
37
  if (opts.pruneMode) args.push('--prune-mode', opts.pruneMode);
38
38
  if (opts.consent === false) args.push('--no-consent');
39
+ if (opts.proxy) args.push('--proxy', opts.proxy);
40
+ if (opts.viewport) args.push('--viewport', opts.viewport);
41
+ if (opts.storageState) args.push('--storage-state', opts.storageState);
39
42
 
40
43
  const child = spawn(process.execPath, args, {
41
44
  detached: true,
@@ -71,6 +74,9 @@ export async function runDaemon(opts, outputDir, initialUrl) {
71
74
  mode: opts.mode || 'headless',
72
75
  port: opts.port ? Number(opts.port) : undefined,
73
76
  consent: opts.consent,
77
+ proxy: opts.proxy,
78
+ viewport: opts.viewport,
79
+ storageState: opts.storageState,
74
80
  });
75
81
 
76
82
  // Console log capture
@@ -192,6 +198,63 @@ export async function runDaemon(opts, outputDir, initialUrl) {
192
198
  return { ok: true };
193
199
  },
194
200
 
201
+ async back() {
202
+ await page.goBack();
203
+ return { ok: true };
204
+ },
205
+
206
+ async forward() {
207
+ await page.goForward();
208
+ return { ok: true };
209
+ },
210
+
211
+ async drag({ fromRef, toRef }) {
212
+ await page.drag(String(fromRef), String(toRef));
213
+ return { ok: true };
214
+ },
215
+
216
+ async upload({ ref, files }) {
217
+ await page.upload(String(ref), files);
218
+ return { ok: true };
219
+ },
220
+
221
+ async pdf({ landscape }) {
222
+ const data = await page.pdf({ landscape });
223
+ const ts = new Date().toISOString().replace(/[:.]/g, '-');
224
+ const file = join(absDir, `page-${ts}.pdf`);
225
+ writeFileSync(file, Buffer.from(data, 'base64'));
226
+ return { ok: true, file };
227
+ },
228
+
229
+ async tabs() {
230
+ const list = await page.tabs();
231
+ return { ok: true, value: list };
232
+ },
233
+
234
+ async tab({ index }) {
235
+ await page.switchTab(Number(index));
236
+ return { ok: true };
237
+ },
238
+
239
+ async 'wait-for'({ text, selector, timeout }) {
240
+ await page.waitFor({ text, selector, timeout });
241
+ return { ok: true };
242
+ },
243
+
244
+ async 'save-state'() {
245
+ const ts = new Date().toISOString().replace(/[:.]/g, '-');
246
+ const file = join(absDir, `state-${ts}.json`);
247
+ await page.saveState(file);
248
+ return { ok: true, file };
249
+ },
250
+
251
+ async 'dialog-log'() {
252
+ const ts = new Date().toISOString().replace(/[:.]/g, '-');
253
+ const file = join(absDir, `dialogs-${ts}.json`);
254
+ writeFileSync(file, JSON.stringify(page.dialogLog, null, 2));
255
+ return { ok: true, file, count: page.dialogLog.length };
256
+ },
257
+
195
258
  async eval({ expression }) {
196
259
  const result = await page.cdp.send('Runtime.evaluate', {
197
260
  expression,
package/src/index.js CHANGED
@@ -13,7 +13,7 @@ import { createCDP } from './cdp.js';
13
13
  import { formatTree } from './aria.js';
14
14
  import { authenticate } from './auth.js';
15
15
  import { prune as pruneTree } from './prune.js';
16
- import { click as cdpClick, type as cdpType, scroll as cdpScroll, press as cdpPress, hover as cdpHover, select as cdpSelect } from './interact.js';
16
+ import { click as cdpClick, type as cdpType, scroll as cdpScroll, press as cdpPress, hover as cdpHover, select as cdpSelect, drag as cdpDrag, upload as cdpUpload } from './interact.js';
17
17
  import { dismissConsent } from './consent.js';
18
18
  import { applyStealth } from './stealth.js';
19
19
 
@@ -45,12 +45,12 @@ export async function browse(url, opts = {}) {
45
45
  cdp = await createCDP(wsUrl);
46
46
  } else {
47
47
  // headless or hybrid (start headless)
48
- browser = await launch();
48
+ browser = await launch({ proxy: opts.proxy });
49
49
  cdp = await createCDP(browser.wsUrl);
50
50
  }
51
51
 
52
52
  // Step 2: Create a new page target and attach
53
- let page = await createPage(cdp, mode !== 'headed');
53
+ let page = await createPage(cdp, mode !== 'headed', { viewport: opts.viewport });
54
54
 
55
55
  // Step 2.5: Suppress permission prompts
56
56
  await suppressPermissions(cdp);
@@ -84,7 +84,7 @@ export async function browse(url, opts = {}) {
84
84
  const port = opts.port || 9222;
85
85
  const wsUrl = await getDebugUrl(port);
86
86
  cdp = await createCDP(wsUrl);
87
- page = await createPage(cdp, false);
87
+ page = await createPage(cdp, false, { viewport: opts.viewport });
88
88
  await suppressPermissions(cdp);
89
89
  if (opts.cookies !== false) {
90
90
  try { await authenticate(page.session, url, { browser: opts.browser }); } catch {}
@@ -131,16 +131,41 @@ export async function connect(opts = {}) {
131
131
  const wsUrl = await getDebugUrl(port);
132
132
  cdp = await createCDP(wsUrl);
133
133
  } else {
134
- browser = await launch();
134
+ browser = await launch({ proxy: opts.proxy });
135
135
  cdp = await createCDP(browser.wsUrl);
136
136
  }
137
137
 
138
- const page = await createPage(cdp, mode !== 'headed');
138
+ const page = await createPage(cdp, mode !== 'headed', { viewport: opts.viewport });
139
139
  let refMap = new Map();
140
140
 
141
141
  // Suppress permission prompts for all modes
142
142
  await suppressPermissions(cdp);
143
143
 
144
+ // Load storage state (cookies + localStorage) from file
145
+ if (opts.storageState) {
146
+ try {
147
+ const { readFileSync } = await import('node:fs');
148
+ const state = JSON.parse(readFileSync(opts.storageState, 'utf8'));
149
+ if (state.cookies?.length) {
150
+ await page.session.send('Network.setCookies', { cookies: state.cookies });
151
+ }
152
+ } catch { /* file not found or invalid — continue without */ }
153
+ }
154
+
155
+ // Auto-dismiss JS dialogs (alert, confirm, prompt)
156
+ const dialogLog = [];
157
+ page.session.on('Page.javascriptDialogOpening', async (params) => {
158
+ dialogLog.push({
159
+ type: params.type,
160
+ message: params.message,
161
+ timestamp: new Date().toISOString(),
162
+ });
163
+ await page.session.send('Page.handleJavaScriptDialog', {
164
+ accept: params.type !== 'beforeunload',
165
+ promptText: params.defaultPrompt || '',
166
+ });
167
+ });
168
+
144
169
  return {
145
170
  async goto(url, timeout = 30000) {
146
171
  await navigate(page, url, timeout);
@@ -149,6 +174,20 @@ export async function connect(opts = {}) {
149
174
  }
150
175
  },
151
176
 
177
+ async goBack() {
178
+ const { currentIndex, entries } = await page.session.send('Page.getNavigationHistory');
179
+ if (currentIndex <= 0) throw new Error('No previous page in history');
180
+ await page.session.send('Page.navigateToHistoryEntry', { entryId: entries[currentIndex - 1].id });
181
+ await new Promise((r) => setTimeout(r, 500));
182
+ },
183
+
184
+ async goForward() {
185
+ const { currentIndex, entries } = await page.session.send('Page.getNavigationHistory');
186
+ if (currentIndex >= entries.length - 1) throw new Error('No next page in history');
187
+ await page.session.send('Page.navigateToHistoryEntry', { entryId: entries[currentIndex + 1].id });
188
+ await new Promise((r) => setTimeout(r, 500));
189
+ },
190
+
152
191
  async injectCookies(url, cookieOpts) {
153
192
  await authenticate(page.session, url, { browser: cookieOpts?.browser });
154
193
  },
@@ -193,6 +232,80 @@ export async function connect(opts = {}) {
193
232
  await cdpSelect(page.session, backendNodeId, value);
194
233
  },
195
234
 
235
+ async drag(fromRef, toRef) {
236
+ const fromId = refMap.get(fromRef);
237
+ const toId = refMap.get(toRef);
238
+ if (!fromId) throw new Error(`No element found for ref "${fromRef}"`);
239
+ if (!toId) throw new Error(`No element found for ref "${toRef}"`);
240
+ await cdpDrag(page.session, fromId, toId);
241
+ },
242
+
243
+ async upload(ref, files) {
244
+ const backendNodeId = refMap.get(ref);
245
+ if (!backendNodeId) throw new Error(`No element found for ref "${ref}"`);
246
+ await cdpUpload(page.session, backendNodeId, files);
247
+ },
248
+
249
+ async pdf(pdfOpts = {}) {
250
+ const { data } = await page.session.send('Page.printToPDF', {
251
+ landscape: pdfOpts.landscape || false,
252
+ printBackground: true,
253
+ });
254
+ return data; // base64
255
+ },
256
+
257
+ async tabs() {
258
+ const { targetInfos } = await cdp.send('Target.getTargets');
259
+ return targetInfos
260
+ .filter((t) => t.type === 'page')
261
+ .map((t, i) => ({ index: i, url: t.url, title: t.title, targetId: t.targetId }));
262
+ },
263
+
264
+ async switchTab(index) {
265
+ const { targetInfos } = await cdp.send('Target.getTargets');
266
+ const pages = targetInfos.filter((t) => t.type === 'page');
267
+ if (index < 0 || index >= pages.length) throw new Error(`Tab index ${index} out of range (0-${pages.length - 1})`);
268
+ await cdp.send('Target.activateTarget', { targetId: pages[index].targetId });
269
+ },
270
+
271
+ async waitFor(waitOpts = {}) {
272
+ const timeout = waitOpts.timeout || 30000;
273
+ const interval = 200;
274
+ const deadline = Date.now() + timeout;
275
+
276
+ while (Date.now() < deadline) {
277
+ if (waitOpts.text) {
278
+ const { result } = await page.session.send('Runtime.evaluate', {
279
+ expression: 'document.body?.innerText || ""',
280
+ returnByValue: true,
281
+ });
282
+ if (result.value && result.value.includes(waitOpts.text)) return;
283
+ }
284
+ if (waitOpts.selector) {
285
+ const { result } = await page.session.send('Runtime.evaluate', {
286
+ expression: `!!document.querySelector(${JSON.stringify(waitOpts.selector)})`,
287
+ returnByValue: true,
288
+ });
289
+ if (result.value) return;
290
+ }
291
+ await new Promise((r) => setTimeout(r, interval));
292
+ }
293
+ throw new Error(`waitFor timed out after ${timeout}ms`);
294
+ },
295
+
296
+ async saveState(filePath) {
297
+ const { cookies } = await page.session.send('Network.getAllCookies');
298
+ const { result } = await page.session.send('Runtime.evaluate', {
299
+ expression: 'JSON.stringify(Object.fromEntries(Object.entries(localStorage)))',
300
+ returnByValue: true,
301
+ });
302
+ const state = { cookies, localStorage: JSON.parse(result.value || '{}') };
303
+ const { writeFileSync } = await import('node:fs');
304
+ writeFileSync(filePath, JSON.stringify(state, null, 2));
305
+ },
306
+
307
+ dialogLog,
308
+
196
309
  async screenshot(screenshotOpts = {}) {
197
310
  const format = screenshotOpts.format || 'png';
198
311
  const params = { format };
@@ -260,7 +373,7 @@ async function suppressPermissions(cdp) {
260
373
  * @param {object} cdp - CDP client
261
374
  * @param {boolean} [stealth=false] - Apply stealth patches (headless only)
262
375
  */
263
- async function createPage(cdp, stealth = false) {
376
+ async function createPage(cdp, stealth = false, pageOpts = {}) {
264
377
  const { targetId } = await cdp.send('Target.createTarget', { url: 'about:blank' });
265
378
  const { sessionId } = await cdp.send('Target.attachToTarget', {
266
379
  targetId,
@@ -279,6 +392,16 @@ async function createPage(cdp, stealth = false) {
279
392
  await applyStealth(session);
280
393
  }
281
394
 
395
+ // Set viewport size if specified (e.g. "1280x720")
396
+ if (pageOpts.viewport) {
397
+ const [w, h] = pageOpts.viewport.split('x').map(Number);
398
+ if (w && h) {
399
+ await session.send('Emulation.setDeviceMetricsOverride', {
400
+ width: w, height: h, deviceScaleFactor: 1, mobile: false,
401
+ });
402
+ }
403
+ }
404
+
282
405
  return { session, targetId, sessionId };
283
406
  }
284
407
 
package/src/interact.js CHANGED
@@ -206,3 +206,39 @@ export async function select(session, backendNodeId, value) {
206
206
  returnByValue: true,
207
207
  });
208
208
  }
209
+
210
+ /**
211
+ * Drag one element to another.
212
+ * Scrolls source into view, mouse down, move to target center, mouse up.
213
+ *
214
+ * @param {object} session - Session-scoped CDP handle
215
+ * @param {number} fromNodeId - Source element backendDOMNodeId
216
+ * @param {number} toNodeId - Target element backendDOMNodeId
217
+ */
218
+ export async function drag(session, fromNodeId, toNodeId) {
219
+ const from = await getCenter(session, fromNodeId);
220
+ const to = await getCenter(session, toNodeId);
221
+
222
+ await session.send('Input.dispatchMouseEvent', {
223
+ type: 'mousePressed', x: from.x, y: from.y, button: 'left', clickCount: 1,
224
+ });
225
+ // Intermediate move for drag recognition
226
+ const midX = (from.x + to.x) / 2;
227
+ const midY = (from.y + to.y) / 2;
228
+ await session.send('Input.dispatchMouseEvent', { type: 'mouseMoved', x: midX, y: midY });
229
+ await session.send('Input.dispatchMouseEvent', { type: 'mouseMoved', x: to.x, y: to.y });
230
+ await session.send('Input.dispatchMouseEvent', {
231
+ type: 'mouseReleased', x: to.x, y: to.y, button: 'left', clickCount: 1,
232
+ });
233
+ }
234
+
235
+ /**
236
+ * Upload files to a file input element.
237
+ *
238
+ * @param {object} session - Session-scoped CDP handle
239
+ * @param {number} backendNodeId - Backend DOM node ID of the file input
240
+ * @param {string[]} files - Absolute paths to files to upload
241
+ */
242
+ export async function upload(session, backendNodeId, files) {
243
+ await session.send('DOM.setFileInputFiles', { files, backendNodeId });
244
+ }