@aquintanar/browser39 1.6.0 → 1.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. package/README.md +312 -0
  2. package/index.js +3 -1
  3. package/package.json +7 -6
package/README.md ADDED
@@ -0,0 +1,312 @@
1
+ <p align="center">
2
+ <img src="https://raw.githubusercontent.com/alejandroqh/browser39/main/docs/logo.png" alt="browser39" width="500">
3
+ </p>
4
+
5
+ # browser39
6
+
7
+ A headless open source web browser for AI agents. Converts pages to token-optimized Markdown locally. Single binary, no external browser, no fees.
8
+
9
+ browser39 fetches web pages and converts them to token-optimized Markdown that LLMs can actually consume. It runs JavaScript, manages cookies and sessions, queries the DOM, and fills forms. All processing happens locally, no data is sent to third-party services.
10
+
11
+ **Works with:** [Claude Desktop & Claude Code](docs/install-claude.md) | [OpenClaw](docs/install-openclaw.md) | [Any agent via CLI](docs/install-cli.md)
12
+
13
+ ## Comparison
14
+
15
+ | | browser39 | Playwright / Puppeteer | Raw HTTP (requests, ureq) |
16
+ |--|-----------|----------------------|---------------------------|
17
+ | External browser | None (single binary) | Requires Chrome/Chromium | None |
18
+ | Binary size | ~52MB | ~280MB with browser | N/A (library) |
19
+ | Platforms | macOS, Linux, Windows | macOS, Linux, Windows | Any |
20
+ | JavaScript | Yes (V8 via deno_core) | Yes (full V8) | No |
21
+ | HTML to Markdown | Built-in, token-optimized | No (raw HTML or screenshots) | DIY |
22
+ | Token preselection | Content sections, agent picks what to read | No | No |
23
+ | Cookies & sessions | Automatic, persisted, encrypted | Manual | Manual |
24
+ | DOM queries | CSS selectors + full JS DOM API | Full DOM API | No |
25
+ | Forms | fill + submit | Full interaction | Manual POST |
26
+ | Auth & secrets | Profiles, redaction, opaque handles | Manual | Manual |
27
+ | Transports | MCP (stdio + HTTP), JSONL, CLI | Library API | Library API |
28
+
29
+ ### Token savings in practice
30
+
31
+ Real test: extracting the "Optical communications" section from [Artemis II on Wikipedia](https://en.wikipedia.org/wiki/Artemis_II) (full page: ~14,600 tokens).
32
+
33
+ | | Raw HTTP | WebFetch (Claude Code built-in) | [Mistral Web Search](https://docs.mistral.ai/agents/tools/built-in/websearch) | browser39 |
34
+ |--|----------|--------------------------------|-------------------|-----------|
35
+ | **How it works** | Fetch full page, truncate to ~1,000 tokens | Send full page (~14,600 tokens) to intermediate model with extraction prompt | Cloud API: search + page processing by Mistral model | Fetch → content selectors list → targeted section fetch |
36
+ | **Tokens consumed** | ~1,000 (truncated) | ~14,600 (processed by intermediate model) | Cloud processed, not disclosed | **196** |
37
+ | **Found the section?** | No. Section is at token ~6,320, truncated away | Yes, but returns a lossy summary | Depends on search ranking | Yes. Exact original content |
38
+ | **Content quality** | Nav menus, infobox, article intro | Paraphrased, no links, no references | Summary with citations | Lossless markdown with links and citations |
39
+ | **Session state** | None | None | None | Cookies, history, follow-up queries free |
40
+ | **Data processing** | Local | Processed remotely | Processed remotely | Local |
41
+ | **Cost per call** | Free | Bundled | [$30 / 1,000 calls](https://mistral.ai/pricing#api) | Free |
42
+ | **Retries needed** | Pagination to find it | None, but no control over output | May not find specific section | None. Agent sees structure first |
43
+
44
+ browser39 returns the exact section in **196 tokens** at zero cost. The raw approach misses it entirely, WebFetch burns **75x more tokens** through an intermediate model, and cloud tools like Mistral's charge $0.03 per call.
45
+
46
+ ## Install
47
+
48
+ ```bash
49
+ npm install @aquintanar/browser39
50
+ ```
51
+
52
+ Or via Cargo:
53
+
54
+ ```bash
55
+ cargo install browser39
56
+ ```
57
+
58
+ Pre-built binaries available on the [releases page](https://github.com/alejandroqh/browser39/releases).
59
+
60
+ ### Auto-install prompts
61
+
62
+ Copy and paste into your agent to install browser39 automatically:
63
+
64
+ **Claude Code**
65
+
66
+ > Install browser39 as an MCP server. Download the binary for this system from https://github.com/alejandroqh/browser39/releases/latest/download/ — assets are named browser39-{os}-{arch} (macos-arm64, macos-x64, linux-arm64, linux-x64, windows-x64.exe). Save to ~/.local/bin/browser39, make it executable, and add it to MCP settings with command "browser39" and args ["mcp"].
67
+
68
+ **OpenClaw**
69
+
70
+ > Install the browser39 plugin: openclaw plugins install browser39 --marketplace https://github.com/alejandroqh/openclaw-marketplace --dangerously-force-unsafe-install && openclaw gateway restart
71
+
72
+ ### Auto-update prompts
73
+
74
+ Copy and paste into your agent to update browser39 to the latest version:
75
+
76
+ **Claude Code**
77
+
78
+ > Update browser39 to the latest version. Download the latest binary for this system from https://github.com/alejandroqh/browser39/releases/latest/download/ — assets are named browser39-{os}-{arch} (macos-arm64, macos-x64, linux-arm64, linux-x64, windows-x64.exe). Replace the existing binary at ~/.local/bin/browser39 and make it executable. Then restart the MCP server.
79
+
80
+ **OpenClaw**
81
+
82
+ > Update the browser39 plugin: openclaw plugins update browser39 && openclaw gateway restart
83
+
84
+ ## Quick Start
85
+
86
+ ### Claude Desktop / Claude Code (MCP)
87
+
88
+ Add to your MCP settings:
89
+
90
+ ```json
91
+ {
92
+ "mcpServers": {
93
+ "browser39": {
94
+ "command": "browser39",
95
+ "args": ["mcp"]
96
+ }
97
+ }
98
+ }
99
+ ```
100
+
101
+ 29 tools available instantly: `browser39_fetch`, `browser39_click`, `browser39_links`, `browser39_dom_query`, `browser39_fill`, `browser39_submit`, `browser39_search`, cookies, storage, history, config management, and more.
102
+
103
+ See [docs/install-claude.md](docs/install-claude.md) for the full guide.
104
+
105
+ ### OpenClaw
106
+
107
+ ```bash
108
+ openclaw plugins install browser39 --marketplace https://github.com/alejandroqh/openclaw-marketplace --dangerously-force-unsafe-install
109
+ openclaw gateway restart
110
+ ```
111
+
112
+ See [docs/install-openclaw.md](docs/install-openclaw.md) for bundle vs native plugin setup.
113
+
114
+ ### CLI: one-shot fetch
115
+
116
+ ```bash
117
+ browser39 fetch https://example.com
118
+ ```
119
+
120
+ ```
121
+ # Example Domain
122
+ This domain is for use in documentation examples without needing permission.
123
+
124
+ [Learn more](https://iana.org/domains/example)
125
+ ```
126
+
127
+ ### CLI: agent integration (watch mode)
128
+
129
+ Long-running subprocess that any language can talk to via JSONL files:
130
+
131
+ ```bash
132
+ touch commands.jsonl
133
+ browser39 watch commands.jsonl --output results.jsonl
134
+ ```
135
+
136
+ ```bash
137
+ # From your agent (Python, Node, Rust, shell, anything):
138
+ echo '{"id":"1","action":"fetch","v":1,"seq":1,"url":"https://example.com"}' >> commands.jsonl
139
+ ```
140
+
141
+ Drop-in `web_search` and `visit_website` tool examples: **[Python](examples/browser39_tools.py)** | **[TypeScript](examples/browser39_tools.ts)** | **[Rust](examples/browser39_tools.rs)**
142
+
143
+ See [docs/install-cli.md](docs/install-cli.md) for the full integration guide.
144
+
145
+ ## Features
146
+
147
+ ### Token optimization
148
+
149
+ browser39 minimizes token usage when feeding web content to LLMs:
150
+
151
+ - **Content preselection**: on first fetch, returns available content sections with token estimates instead of dumping the full page. The agent picks the relevant section and re-fetches with a targeted `selector`.
152
+ - **Heading auto-expand**: `selector: "#Astronauts"` returns the full section until the next same-level heading, not just the heading text.
153
+ - **HTML to Markdown**: strips scripts, styles, and non-content elements.
154
+ - **Compact link references** (JSON mode): `[text][N]` instead of inline URLs, with full URLs in the `links` array.
155
+ - **Same-origin URL shortening**: links on the same domain show path-only.
156
+ - **Link deduplication**: same-URL links (image + headline cards) emitted once.
157
+
158
+ ### JavaScript execution
159
+
160
+ V8 (via deno_core) runs JavaScript against a full DOM environment:
161
+
162
+ - **Traversal**: `parentElement`, `children`, `firstChild`, `lastChild`, `nextSibling`, `previousSibling`, `closest()`, `matches()`, `contains()`
163
+ - **Lookup**: `getElementById`, `getElementsByClassName`, `getElementsByTagName`, `getElementsByName`
164
+ - **Mutation**: `createElement`, `createTextNode`, `appendChild`, `removeChild`, `insertBefore`, `setAttribute`, `removeAttribute`, `textContent`/`innerHTML` setters
165
+ - **Events**: `addEventListener`, `removeEventListener`, `dispatchEvent`, `new Event`/`CustomEvent`/`MouseEvent`/`KeyboardEvent`/`InputEvent`
166
+ - **Web APIs**: `localStorage`, `document.cookie`, `console.log` (captured), `setTimeout`, `atob`/`btoa`, `getComputedStyle`, `MutationObserver`
167
+ - **Forms**: `element.value` get/set, `element.click()`, `form.submit()`
168
+
169
+ ```json
170
+ {"action": "dom_query", "script": "document.querySelectorAll('a').length"}
171
+ {"action": "dom_query", "script": "document.getElementById('content').closest('section').textContent"}
172
+ {"action": "dom_query", "script": "document.querySelector('h1').setAttribute('class', 'modified')"}
173
+ ```
174
+
175
+ ### Session persistence
176
+
177
+ Cookies, localStorage, and browsing history are persisted to disk by default (`~/.local/share/browser39/session.enc`, AES-256-GCM encrypted). An agent can log in once and stay authenticated across restarts.
178
+
179
+ Disable with `--no-persist` or config:
180
+
181
+ ```toml
182
+ [session]
183
+ persistence = "memory"
184
+ ```
185
+
186
+ ### Forms
187
+
188
+ Fill fields by CSS selector and submit. browser39 handles `enctype`, builds the HTTP request, and returns the response page:
189
+
190
+ ```json
191
+ {"action": "fill", "fields": [{"selector": "#user", "value": "agent"}, {"selector": "#pass", "value": "secret", "sensitive": true}]}
192
+ {"action": "submit", "selector": "form#login"}
193
+ ```
194
+
195
+ ### Security
196
+
197
+ Auth profiles keep credentials out of the LLM conversation. The agent references a profile name and never sees the token:
198
+
199
+ ```toml
200
+ [auth.github]
201
+ header = "Authorization"
202
+ value_env = "GITHUB_TOKEN"
203
+ value_prefix = "Bearer "
204
+ domains = ["api.github.com"]
205
+ ```
206
+
207
+ ```json
208
+ {"action": "fetch", "url": "https://api.github.com/repos", "auth_profile": "github"}
209
+ ```
210
+
211
+ ### Config management via MCP
212
+
213
+ Agents can manage browser39's configuration directly through MCP tools — change the search engine, store credentials, manage auth profiles, cookies, storage, and headers. Sensitive values are stored securely on disk but **never returned** via MCP; `config_show` masks them with `••••••`.
214
+
215
+ ```
216
+ > browser39_config_set key="search.engine" value="https://www.google.com/search?q={}"
217
+ Set search.engine = https://www.google.com/search?q={}
218
+
219
+ > browser39_config_auth_set name="github" header="Authorization" value="Bearer ghp_..." domains=["api.github.com"]
220
+ Auth profile 'github' saved
221
+
222
+ > browser39_config_show section="auth"
223
+ {"auth": {"github": {"header": "Authorization", "value": "••••••", ...}}}
224
+ ```
225
+
226
+ 10 config tools: `config_show`, `config_set`, `config_auth_set/delete`, `config_cookie_set/delete`, `config_storage_set/delete`, `config_header_set/delete`.
227
+
228
+ ### All transports
229
+
230
+ | Transport | Command | Use case |
231
+ |-----------|---------|----------|
232
+ | MCP (stdio) | `browser39 mcp` | Claude Desktop, Claude Code, local MCP clients |
233
+ | MCP (HTTP) | `browser39 mcp --transport sse --port 8039` | Remote agents, cloud deployments |
234
+ | JSONL watch | `browser39 watch commands.jsonl` | Any language, long-running agent IPC |
235
+ | JSONL batch | `browser39 batch commands.jsonl` | One-shot scripted operations |
236
+ | CLI fetch | `browser39 fetch <url>` | Quick page retrieval, shell scripts |
237
+
238
+ ## Configuration
239
+
240
+ ```bash
241
+ browser39 --config path/to/config.toml fetch https://example.com
242
+ ```
243
+
244
+ Precedence: `--config` flag > `BROWSER39_CONFIG` env > `~/.config/browser39/config.toml`
245
+
246
+ See [docs/config.md](docs/config.md) for the full reference.
247
+
248
+ ## Plugin Integration
249
+
250
+ ### Claude Code
251
+
252
+ Install the binary from GitHub:
253
+
254
+ ```bash
255
+ cargo install browser39
256
+ ```
257
+
258
+ Then add it as an MCP server:
259
+
260
+ ```bash
261
+ claude mcp add browser39 browser39 -- mcp
262
+ ```
263
+
264
+ Or manually in `.mcp.json`:
265
+
266
+ ```json
267
+ {
268
+ "mcpServers": {
269
+ "browser39": {
270
+ "command": "browser39",
271
+ "args": ["mcp"]
272
+ }
273
+ }
274
+ }
275
+ ```
276
+
277
+ ### Claude Bundle (`.claude-plugin/`)
278
+
279
+ Makes browser39 installable via `openclaw plugins install` as a Claude bundle. Maps MCP server config from `.mcp.json`.
280
+
281
+ ```bash
282
+ openclaw plugins install browser39 --marketplace https://github.com/alejandroqh/openclaw-marketplace --dangerously-force-unsafe-install
283
+ ```
284
+
285
+ ### OpenClaw Native Plugin (`openclaw-plugin/`)
286
+
287
+ Full OpenClaw native plugin with typed tool proxies, config schema, and automatic MCP lifecycle management. Configurable options:
288
+
289
+ | Option | Description |
290
+ |---|---|
291
+ | `binaryPath` | Path to the browser39 binary (default: `browser39` in PATH) |
292
+ | `configPath` | Path to browser39 config.toml file |
293
+
294
+ ## Documentation
295
+
296
+ | Doc | Description |
297
+ |-----|-------------|
298
+ | [install-claude.md](docs/install-claude.md) | Claude Desktop and Claude Code setup |
299
+ | [install-openclaw.md](docs/install-openclaw.md) | OpenClaw bundle and native plugin |
300
+ | [install-cli.md](docs/install-cli.md) | CLI integration guide with Rust, Python, TypeScript examples |
301
+ | [jsonl-protocol.md](docs/jsonl-protocol.md) | Full JSONL protocol specification |
302
+ | [config.md](docs/config.md) | Configuration reference |
303
+
304
+ ## Development
305
+
306
+ ```bash
307
+ cargo build # Build
308
+ cargo run # Run
309
+ cargo test # Run all tests
310
+ cargo clippy # Lint
311
+ cargo fmt # Format
312
+ ```
package/index.js CHANGED
@@ -6,6 +6,7 @@ const PLATFORMS = {
6
6
  "darwin-x64": "@aquintanar/browser39-darwin-x64",
7
7
  "linux-x64": "@aquintanar/browser39-linux-x64",
8
8
  "linux-arm64": "@aquintanar/browser39-linux-arm64",
9
+ "win32-x64": "@aquintanar/browser39-win32-x64",
9
10
  };
10
11
 
11
12
  function executablePath() {
@@ -17,9 +18,10 @@ function executablePath() {
17
18
  );
18
19
  }
19
20
 
21
+ const binName = os.platform() === "win32" ? "browser39.exe" : "browser39";
20
22
  try {
21
23
  const binDir = path.dirname(require.resolve(`${pkg}/package.json`));
22
- return path.join(binDir, "bin", "browser39");
24
+ return path.join(binDir, "bin", binName);
23
25
  } catch {
24
26
  throw new Error(
25
27
  `browser39: platform package "${pkg}" not installed. Run: npm install ${pkg}`
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@aquintanar/browser39",
3
- "version": "1.6.0",
3
+ "version": "1.6.1",
4
4
  "description": "A headless web browser for AI agents. Converts pages to token-optimized Markdown. Single binary, no external browser.",
5
5
  "license": "Apache-2.0",
6
6
  "repository": {
@@ -14,11 +14,12 @@
14
14
  "bin": {
15
15
  "browser39": "bin.js"
16
16
  },
17
- "files": ["index.js", "index.d.ts", "bin.js"],
17
+ "files": ["index.js", "index.d.ts", "bin.js", "README.md"],
18
18
  "optionalDependencies": {
19
- "@aquintanar/browser39-darwin-arm64": "1.6.0",
20
- "@aquintanar/browser39-darwin-x64": "1.6.0",
21
- "@aquintanar/browser39-linux-x64": "1.6.0",
22
- "@aquintanar/browser39-linux-arm64": "1.6.0"
19
+ "@aquintanar/browser39-darwin-arm64": "1.6.1",
20
+ "@aquintanar/browser39-darwin-x64": "1.6.1",
21
+ "@aquintanar/browser39-linux-x64": "1.6.1",
22
+ "@aquintanar/browser39-linux-arm64": "1.6.1",
23
+ "@aquintanar/browser39-win32-x64": "1.6.1"
23
24
  }
24
25
  }