barebrowse 0.10.1 → 0.12.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +100 -0
- package/README.md +13 -0
- package/barebrowse.context.md +14 -2
- package/cli.js +8 -0
- package/package.json +38 -6
- package/src/auth.js +35 -10
- package/src/bareagent.js +16 -5
- package/src/cdp.js +4 -2
- package/src/chromium.js +19 -6
- package/src/daemon.js +54 -12
- package/src/index.js +50 -2
- package/src/network-idle.js +4 -1
- package/src/prune.js +1 -1
- package/src/session-client.js +6 -2
- package/src/url-guard.js +138 -0
- package/src/wearehere.d.ts +6 -0
- package/types/aria.d.ts +17 -0
- package/types/auth.d.ts +35 -0
- package/types/bareagent.d.ts +25 -0
- package/types/blocklist.d.ts +21 -0
- package/types/cdp.d.ts +16 -0
- package/types/chromium.d.ts +58 -0
- package/types/consent.d.ts +9 -0
- package/types/daemon.d.ts +10 -0
- package/types/index.d.ts +138 -0
- package/types/interact.d.ts +79 -0
- package/types/network-idle.d.ts +19 -0
- package/types/prune.d.ts +13 -0
- package/types/session-client.d.ts +19 -0
- package/types/stealth.d.ts +14 -0
- package/types/url-guard.d.ts +26 -0
- package/commands/barebrowse/SKILL.md +0 -133
- package/commands/barebrowse.md +0 -132
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,105 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## [Unreleased]
|
|
4
|
+
|
|
5
|
+
## [0.12.0] - 2026-05-29
|
|
6
|
+
|
|
7
|
+
### Added
|
|
8
|
+
- **Shipped TypeScript types, generated from JSDoc.** The package now ships
|
|
9
|
+
`.d.ts` declarations so adopters get autocomplete and type errors out of the
|
|
10
|
+
box — no `@types/barebrowse`. The `.js` we author is still the `.js` that
|
|
11
|
+
ships; there is **no build step for runtime code**. Types are generated by
|
|
12
|
+
`tsc` (`checkJs` + `strictNullChecks`), emitted to a git-ignored `types/`, and
|
|
13
|
+
built into the tarball at publish via `prepublishOnly`. Because they are
|
|
14
|
+
generated-and-never-committed, the JSDoc, the `.d.ts`, and CI cannot drift.
|
|
15
|
+
`exports` now carries a `types` condition on every subpath.
|
|
16
|
+
- **`ci.yml` (push/PR gate):** `npm ci → typecheck → build:types → test`. A
|
|
17
|
+
JSDoc/code mismatch is now a type error that blocks merge. No lint step — `tsc`
|
|
18
|
+
covers the bug class that matters for a vanilla-ESM lib.
|
|
19
|
+
- Dev-only tooling: `typescript` + `@types/node` (devDependencies; never
|
|
20
|
+
shipped), `tsconfig.json`, and `typecheck` / `build:types` / `prepublishOnly`
|
|
21
|
+
scripts.
|
|
22
|
+
|
|
23
|
+
### Changed
|
|
24
|
+
- **`publish.yml` is now manual-only (`workflow_dispatch`) — npm OIDC trusted publishing with provenance, idempotent, and verifies the registry end-state.**
|
|
25
|
+
- **Packaging now uses a `files` allowlist** (`src/`, generated `types/`,
|
|
26
|
+
`cli.js`, `mcp-server.js`, and the doc set) instead of the old `.npmignore`
|
|
27
|
+
denylist, which was removed. Repo-only files (`test/`, `docs/`, `CLAUDE.md`)
|
|
28
|
+
are excluded from the tarball.
|
|
29
|
+
|
|
30
|
+
### Fixed
|
|
31
|
+
- **`auth.js`: cookie databases are now opened with `readOnly: true`.** The
|
|
32
|
+
previous `readonly` (lowercase) key is silently ignored by `node:sqlite`;
|
|
33
|
+
surfaced by the new `tsc` typecheck. Read-only was already enforced via the
|
|
34
|
+
`?immutable=1` connection URI, so observable behavior is unchanged — this
|
|
35
|
+
honors the intended option. Added minimal, behavior-preserving null/type
|
|
36
|
+
guards in a few spots (`server.address()`, SQLite row values) flagged by
|
|
37
|
+
`strictNullChecks`.
|
|
38
|
+
|
|
39
|
+
## 0.11.0
|
|
40
|
+
|
|
41
|
+
### Security hardening — audit findings fixed, safe-by-default
|
|
42
|
+
|
|
43
|
+
A full security audit of the library + CLI daemon + MCP server. Eight
|
|
44
|
+
findings were reproduced with live PoCs, fixed, and locked in with 14 new
|
|
45
|
+
regression tests (143 → 157 passing). Two new opt-in controls; two new
|
|
46
|
+
defaults that change behavior (see **Breaking** below).
|
|
47
|
+
|
|
48
|
+
- **Daemon authentication (was: unauthenticated `eval` over loopback).**
|
|
49
|
+
The CLI daemon's HTTP server bound to `127.0.0.1` but had no auth — and
|
|
50
|
+
loopback is shared across local users, so any local process could POST
|
|
51
|
+
`/command` (including `eval` = arbitrary JS in the authenticated browser).
|
|
52
|
+
Now every daemon mints a 32-byte random token at startup, written into
|
|
53
|
+
`session.json` (mode `0600`) and required on `/command` via the
|
|
54
|
+
`x-barebrowse-token` header (constant-time compare). `session-client.js`
|
|
55
|
+
reads and sends it transparently — no caller change. `GET /status` stays
|
|
56
|
+
open as a liveness ping returning only `{ ok, pid }`.
|
|
57
|
+
- **Artifact permissions.** The session dir is now created `0700` and all
|
|
58
|
+
daemon artifacts (`session.json`, snapshots, screenshots, PDFs, console /
|
|
59
|
+
network / dialog logs) plus `page.saveState()` output are written `0600`.
|
|
60
|
+
`saveState` holds cookies + localStorage (session tokens), so this stops a
|
|
61
|
+
multi-user host from reading another user's credentials off disk.
|
|
62
|
+
- **Navigation scheme guard (new module `src/url-guard.js`).** `goto()` /
|
|
63
|
+
`browse()` now reject local-resource and browser-internal schemes
|
|
64
|
+
(`file:`, `view-source:`, `chrome:`, `chrome-extension:`, `filesystem:`,
|
|
65
|
+
`devtools:`, …) by default — closing a confirmed local-file-read /
|
|
66
|
+
directory-listing vector for a prompt-injected agent. `http`/`https`/
|
|
67
|
+
`data`/`blob`/`about` stay allowed (`data:` is opaque-origin and the
|
|
68
|
+
test-fixture mechanism — not a read/SSRF vector). Override with
|
|
69
|
+
`{ allowLocalUrls: true }`.
|
|
70
|
+
- **SSRF guard (opt-in `blockPrivateNetwork`).** When set, `goto()`/
|
|
71
|
+
`browse()` refuse loopback / RFC-1918 / link-local / cloud-metadata
|
|
72
|
+
(`169.254.169.254`) / `*.internal` hosts. Off by default so localhost
|
|
73
|
+
dev-server browsing keeps working. Exposed as `--block-private-network`.
|
|
74
|
+
- **Upload sandbox (opt-in `uploadDir`).** `upload()` confirmed it would
|
|
75
|
+
attach any absolute path to a file input (exfil vector under prompt
|
|
76
|
+
injection). When `uploadDir` is set, every path must resolve (symlinks
|
|
77
|
+
included, via `realpath`) inside it. Default unrestricted — nothing breaks
|
|
78
|
+
unless you opt in. Exposed as `--upload-dir=DIR`. Both new opts pass
|
|
79
|
+
through `connect()` → MCP / bareagent / CLI daemon uniformly.
|
|
80
|
+
- **Cookie injection scoped precisely (was: over-broad substring match).**
|
|
81
|
+
`authenticate()` matched `host_key LIKE '%domain%'`, so browsing
|
|
82
|
+
`apple.com` injected cookies for `apple.com.evil.org` / `notapple.com`,
|
|
83
|
+
and `mybank.co.uk` (→ `co.uk`) pulled every `*.co.uk` cookie. The LIKE
|
|
84
|
+
query is now only a coarse pre-filter; a precise RFC-6265
|
|
85
|
+
`cookieDomainMatch()` decides what actually gets injected (parent-domain
|
|
86
|
+
cookies like `.google.com` still apply to `mail.google.com`).
|
|
87
|
+
- **Hardening:** browser discovery uses `execFileSync('which', [name])`
|
|
88
|
+
(no shell) instead of an interpolated `execSync` string; the cleanup
|
|
89
|
+
busy-wait drops a `sleep` subprocess for `Atomics.wait`. Added
|
|
90
|
+
`.gitignore` (was missing — `.barebrowse/` state/snapshots could be
|
|
91
|
+
accidentally committed). Pinned `wearehere` to exact `1.0.0`.
|
|
92
|
+
- **Tests:** 157 total (14 new) — `test/unit/url-guard.test.js` (19
|
|
93
|
+
assertions over scheme/private-host policy), `cookieDomainMatch` cases in
|
|
94
|
+
`test/unit/auth.test.js`, daemon token + `0600` perms in
|
|
95
|
+
`test/integration/cli.test.js`.
|
|
96
|
+
|
|
97
|
+
**Breaking:** (1) `file:`/`chrome:`/etc. navigation now throws by default —
|
|
98
|
+
pass `allowLocalUrls: true` to restore. (2) The CLI daemon now requires the
|
|
99
|
+
token; this is transparent via the bundled `session-client`, but any
|
|
100
|
+
third-party client hitting the daemon's HTTP API directly must send
|
|
101
|
+
`x-barebrowse-token` from `session.json`.
|
|
102
|
+
|
|
3
103
|
## 0.10.1
|
|
4
104
|
|
|
5
105
|
### Blocklist long-tail additions + legacy-Chrome warn + switchTab attach-mode test
|
package/README.md
CHANGED
|
@@ -35,6 +35,8 @@ npm install barebrowse
|
|
|
35
35
|
|
|
36
36
|
Requires Node.js >= 22 and any installed Chromium-based browser.
|
|
37
37
|
|
|
38
|
+
Ships with TypeScript types (generated from JSDoc) — autocomplete and type-checking work out of the box, no `@types/barebrowse` needed. The library is vanilla JS with no build step.
|
|
39
|
+
|
|
38
40
|
## Three ways to use it
|
|
39
41
|
|
|
40
42
|
### 1. CLI session -- for coding agents and quick testing
|
|
@@ -134,6 +136,17 @@ No clone profile, no fresh cookies — the agent sees what you see.
|
|
|
134
136
|
|
|
135
137
|
Cookie consent walls (29 languages, with real mouse click fallback for stubborn CMPs), login walls (cookie extraction from your browsers), bot detection (ARIA node count heuristic + stealth patches + automatic headed fallback — snapshot shows `[BOT CHALLENGE DETECTED]` warning when blocked), permission prompts, SPA navigation, JS dialogs, off-screen elements, pre-filled inputs, ARIA noise, and profile locking. The agent doesn't think about any of it.
|
|
136
138
|
|
|
139
|
+
## Safe by default (v0.11.0)
|
|
140
|
+
|
|
141
|
+
barebrowse hands an autonomous — and therefore prompt-injectable — agent an *authenticated* browser, so the defaults are calibrated for that threat:
|
|
142
|
+
|
|
143
|
+
- **Local-resource schemes blocked.** `file:`, `view-source:`, `chrome:`, etc. are rejected by default (a confirmed local-file-read vector); `http`/`https`/`data` stay allowed. Override with `allowLocalUrls: true`.
|
|
144
|
+
- **Cookie injection scoped** to a precise RFC-6265 domain match — browsing one site can't pull look-alike or unrelated cookies into the session.
|
|
145
|
+
- **CLI daemon authenticated** with a per-session token (loopback alone isn't an authorization boundary); snapshots and saved state are written owner-only (`0600`).
|
|
146
|
+
- **Opt-in hardening** for stricter deployments: `blockPrivateNetwork` (SSRF guard for loopback/RFC-1918/cloud-metadata) and `uploadDir` (confine `upload()` to one directory). Both available on the library, MCP, bareagent, and CLI (`--block-private-network`, `--upload-dir`).
|
|
147
|
+
|
|
148
|
+
See `barebrowse.context.md` and the PRD's "Security Model & Safe Defaults" for the full rationale.
|
|
149
|
+
|
|
137
150
|
## What the agent sees
|
|
138
151
|
|
|
139
152
|
Raw ARIA output from a page is noisy -- decorative wrappers, hidden elements, structural junk. The pruning pipeline (ported from [mcprune](https://github.com/hamr0/mcprune)) strips it down to what matters.
|
package/barebrowse.context.md
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
# barebrowse -- Integration Guide
|
|
2
2
|
|
|
3
3
|
> For AI assistants and developers wiring barebrowse into a project.
|
|
4
|
-
> v0.
|
|
4
|
+
> v0.12.0 | Node.js >= 22 | 0 required deps | Apache-2.0
|
|
5
5
|
|
|
6
6
|
## What this is
|
|
7
7
|
|
|
@@ -13,6 +13,11 @@ No Playwright. No bundled browser. No build step. Vanilla JS, ES modules.
|
|
|
13
13
|
npm install barebrowse
|
|
14
14
|
```
|
|
15
15
|
|
|
16
|
+
**TypeScript:** ships with `.d.ts` types generated from the source JSDoc, so
|
|
17
|
+
autocomplete and type-checking work out of the box — no `@types/barebrowse`
|
|
18
|
+
needed. The library itself is vanilla JS with no build step; the types are a
|
|
19
|
+
publish-time artifact.
|
|
20
|
+
|
|
16
21
|
Three integration paths:
|
|
17
22
|
1. **Library:** `import { browse, connect } from 'barebrowse'` -- one-shot or interactive session
|
|
18
23
|
2. **MCP server:** `barebrowse mcp` -- JSON-RPC over stdio for Claude Desktop, Cursor, etc.
|
|
@@ -95,6 +100,9 @@ const snapshot = await browse('https://example.com', {
|
|
|
95
100
|
- `downloadPath: '/abs/dir'` — Where downloads land. Default: per-session `mkdtemp` under `/tmp/barebrowse-dl-*` that gets removed on `close()`. Caller-supplied paths are not cleaned up — caller owns the lifecycle.
|
|
96
101
|
- `blockAds: true|false` — CDP-level URL blocking of 128 common ad/tracker patterns (Google ads/analytics, FB/Amazon/MS/Adobe ad+analytics, Segment/Amplitude/Mixpanel/Heap/PostHog, Hotjar/FullStory/LogRocket, Criteo/Taboola/Outbrain, the consumer-pixel cluster, AppNexus/Rubicon/PubMatic supply, marketing automation; v0.10.1 added AppsFlyer/Branch/Adjust, Cloudflare Web Analytics, Matomo Cloud). Default `true` for launched browsers, `false` in attach mode (would affect any tab in the user's running browser). Explicit `true` in attach mode is honored and follows the session across `switchTab()` (regression-tested). Shrinks ARIA snapshots and speeds page loads. On legacy Chromium lacking `Network.setBlockedURLs` a one-time `console.warn` surfaces the fallback.
|
|
97
102
|
- `blockUrls: ['*://foo.com/*', ...]` — Extra glob patterns (CDP `Network.setBlockedURLs` format) to block in addition to the default. Merged with the default unless `blockAds: false`.
|
|
103
|
+
- `allowLocalUrls: true|false` — (v0.11.0) Default `false`: navigation to local-resource / browser-internal schemes (`file:`, `view-source:`, `chrome:`, `filesystem:`, `devtools:`, …) is **blocked** to stop a prompt-injected agent reading local files. `http`/`https`/`data`/`blob`/`about` are always allowed. Set `true` to permit local schemes.
|
|
104
|
+
- `blockPrivateNetwork: true|false` — (v0.11.0) Default `false`. When `true`, `goto()`/`browse()` refuse loopback / RFC-1918 / link-local / cloud-metadata (`169.254.169.254`) / `*.internal` hosts (SSRF guard). Off by default so localhost dev-server browsing works. Hostname-based — does not catch DNS names that resolve to private IPs.
|
|
105
|
+
- `uploadDir: '/abs/dir'` — (v0.11.0) Default unset (no restriction). When set, `upload()` rejects any file that does not resolve (symlinks included, via `realpath`) inside this directory — sandboxes the agent's file-upload capability.
|
|
98
106
|
|
|
99
107
|
## Snapshot format
|
|
100
108
|
|
|
@@ -226,7 +234,7 @@ barebrowse save-state # → .barebrowse/state-<timestamp>.json
|
|
|
226
234
|
barebrowse close # Kill daemon + browser
|
|
227
235
|
```
|
|
228
236
|
|
|
229
|
-
**Open flags:** `--mode=headless|headed|hybrid`, `--port=N` (attach to running browser), `--proxy=URL`, `--viewport=WxH`, `--storage-state=FILE`, `--download-path=DIR` (v0.9.0), `--no-cookies`, `--browser=firefox|chromium`, `--timeout=N`
|
|
237
|
+
**Open flags:** `--mode=headless|headed|hybrid`, `--port=N` (attach to running browser), `--proxy=URL`, `--viewport=WxH`, `--storage-state=FILE`, `--download-path=DIR` (v0.9.0), `--no-cookies`, `--browser=firefox|chromium`, `--timeout=N`, `--block-private-network` (SSRF guard, v0.11.0), `--upload-dir=DIR` (upload sandbox, v0.11.0)
|
|
230
238
|
|
|
231
239
|
Session lifecycle: `open` spawns a background daemon holding a `connect()` session. Subsequent commands POST to the daemon over HTTP (localhost). `close` shuts everything down. JS dialogs (alert/confirm/prompt) are auto-dismissed and logged.
|
|
232
240
|
|
|
@@ -355,6 +363,10 @@ Useful for agent threshold decisions: "skip sites above score 40", "warn if term
|
|
|
355
363
|
|
|
356
364
|
14. **`eval` MCP tool is opt-in.** Set `BAREBROWSE_MCP_EVAL=1` to register it. Default off because `Runtime.evaluate` in an authenticated session can read cookies/localStorage, post on the user's behalf, hit any same-origin endpoint. CLI/connect()/daemon all keep `eval` because the developer is the caller; MCP gates it because the agent acts with less judgment.
|
|
357
365
|
|
|
366
|
+
15. **The CLI daemon requires a per-session token (v0.11.0).** `open` mints a 32-byte random token, writes it into `.barebrowse/session.json` (mode `0600`) and requires it on `POST /command` via the `x-barebrowse-token` header (loopback is shared across local users, so binding to `127.0.0.1` alone isn't an authorization boundary). The bundled `session-client` sends it automatically — no change for CLI users. A third-party client hitting the daemon HTTP API directly must read the token from `session.json` and send it. `GET /status` stays open (liveness only). The session dir is `0700`; snapshots, `saveState`, and logs are written `0600`.
|
|
367
|
+
|
|
368
|
+
16. **Navigation is scheme-guarded by default (v0.11.0).** `file:`/`chrome:`/etc. throw unless `allowLocalUrls: true`; `blockPrivateNetwork` and `uploadDir` add opt-in SSRF and upload-sandbox controls. All four are exposed identically on the library, MCP/bareagent (via `connect` opts), and the CLI (`--block-private-network`, `--upload-dir=DIR`; the scheme guard and token are always on).
|
|
369
|
+
|
|
358
370
|
## Constraints
|
|
359
371
|
|
|
360
372
|
- **Node >= 22** -- built-in WebSocket, built-in SQLite
|
package/cli.js
CHANGED
|
@@ -119,6 +119,8 @@ async function cmdOpen() {
|
|
|
119
119
|
downloadPath: parseFlag('--download-path'),
|
|
120
120
|
blockAds: hasFlag('--no-block-ads') ? false : undefined,
|
|
121
121
|
blockUrls: parseFlagAll('--block-urls'),
|
|
122
|
+
blockPrivateNetwork: hasFlag('--block-private-network') || undefined,
|
|
123
|
+
uploadDir: parseFlag('--upload-dir') ? resolve(parseFlag('--upload-dir')) : undefined,
|
|
122
124
|
};
|
|
123
125
|
|
|
124
126
|
try {
|
|
@@ -222,6 +224,8 @@ async function runDaemonInternal() {
|
|
|
222
224
|
downloadPath: parseFlag('--download-path'),
|
|
223
225
|
blockAds: hasFlag('--no-block-ads') ? false : undefined,
|
|
224
226
|
blockUrls: parseFlagAll('--block-urls'),
|
|
227
|
+
blockPrivateNetwork: hasFlag('--block-private-network') || undefined,
|
|
228
|
+
uploadDir: parseFlag('--upload-dir'),
|
|
225
229
|
};
|
|
226
230
|
const outputDir = parseFlag('--output-dir') || resolve('.barebrowse');
|
|
227
231
|
const url = parseFlag('--url');
|
|
@@ -489,6 +493,10 @@ Session:
|
|
|
489
493
|
Default: enabled in owned-browser modes, disabled in attach mode.
|
|
490
494
|
--block-urls=PATTERN Extra URL glob to block (repeatable, e.g. --block-urls='*://*.foo.com/*').
|
|
491
495
|
Use the =VALUE form when the pattern could be mistaken for a flag.
|
|
496
|
+
--block-private-network SSRF guard: refuse to navigate to loopback / RFC-1918 / link-local /
|
|
497
|
+
cloud-metadata hosts. Off by default so localhost browsing works.
|
|
498
|
+
--upload-dir=DIR Sandbox uploads: reject files outside DIR (symlinks resolved).
|
|
499
|
+
Default: no restriction. (file:/chrome: schemes are always blocked.)
|
|
492
500
|
|
|
493
501
|
Navigation:
|
|
494
502
|
barebrowse goto <url> Navigate to URL
|
package/package.json
CHANGED
|
@@ -1,21 +1,49 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "barebrowse",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.12.0",
|
|
4
4
|
"description": "Authenticated web browsing for autonomous agents via CDP. URL in, pruned ARIA snapshot out.",
|
|
5
|
+
"repository": {
|
|
6
|
+
"type": "git",
|
|
7
|
+
"url": "git+https://github.com/hamr0/barebrowse.git"
|
|
8
|
+
},
|
|
9
|
+
"homepage": "https://github.com/hamr0/barebrowse#readme",
|
|
10
|
+
"bugs": "https://github.com/hamr0/barebrowse/issues",
|
|
5
11
|
"type": "module",
|
|
6
12
|
"main": "src/index.js",
|
|
13
|
+
"types": "./types/index.d.ts",
|
|
7
14
|
"exports": {
|
|
8
|
-
".":
|
|
9
|
-
|
|
15
|
+
".": {
|
|
16
|
+
"types": "./types/index.d.ts",
|
|
17
|
+
"default": "./src/index.js"
|
|
18
|
+
},
|
|
19
|
+
"./bareagent": {
|
|
20
|
+
"types": "./types/bareagent.d.ts",
|
|
21
|
+
"default": "./src/bareagent.js"
|
|
22
|
+
}
|
|
10
23
|
},
|
|
11
24
|
"bin": {
|
|
12
25
|
"barebrowse": "./cli.js"
|
|
13
26
|
},
|
|
27
|
+
"files": [
|
|
28
|
+
"src/",
|
|
29
|
+
"types/",
|
|
30
|
+
"cli.js",
|
|
31
|
+
"mcp-server.js",
|
|
32
|
+
"barebrowse.context.md",
|
|
33
|
+
"README.md",
|
|
34
|
+
"CHANGELOG.md",
|
|
35
|
+
"NOTICE"
|
|
36
|
+
],
|
|
14
37
|
"engines": {
|
|
15
38
|
"node": ">=22"
|
|
16
39
|
},
|
|
17
40
|
"scripts": {
|
|
18
|
-
"test": "node --test test/unit/*.test.js test/integration/*.test.js"
|
|
41
|
+
"test": "node --test test/unit/*.test.js test/integration/*.test.js",
|
|
42
|
+
"test:unit": "node --test test/unit/*.test.js",
|
|
43
|
+
"test:integration": "node --test test/integration/*.test.js",
|
|
44
|
+
"typecheck": "tsc --noEmit",
|
|
45
|
+
"build:types": "tsc",
|
|
46
|
+
"prepublishOnly": "npm run build:types"
|
|
19
47
|
},
|
|
20
48
|
"keywords": [
|
|
21
49
|
"browser",
|
|
@@ -29,7 +57,11 @@
|
|
|
29
57
|
"headless"
|
|
30
58
|
],
|
|
31
59
|
"optionalDependencies": {
|
|
32
|
-
"wearehere": "
|
|
60
|
+
"wearehere": "1.0.0"
|
|
33
61
|
},
|
|
34
|
-
"license": "Apache-2.0"
|
|
62
|
+
"license": "Apache-2.0",
|
|
63
|
+
"devDependencies": {
|
|
64
|
+
"@types/node": "^25.9.1",
|
|
65
|
+
"typescript": "^6.0.3"
|
|
66
|
+
}
|
|
35
67
|
}
|
package/src/auth.js
CHANGED
|
@@ -126,7 +126,7 @@ function extractChromiumCookies(dbPath, domain) {
|
|
|
126
126
|
const aesKey = deriveKey(password);
|
|
127
127
|
|
|
128
128
|
// immutable=1 bypasses WAL lock on live databases
|
|
129
|
-
const db = new DatabaseSync(`file://${dbPath}?immutable=1`, {
|
|
129
|
+
const db = new DatabaseSync(`file://${dbPath}?immutable=1`, { readOnly: true });
|
|
130
130
|
|
|
131
131
|
let sql = `SELECT host_key, name, value, encrypted_value, path,
|
|
132
132
|
CAST(expires_utc AS TEXT) AS expires_utc, is_secure, is_httponly, samesite
|
|
@@ -144,7 +144,8 @@ function extractChromiumCookies(dbPath, domain) {
|
|
|
144
144
|
const SAMESITE = { 0: 'None', 1: 'Lax', 2: 'Strict' };
|
|
145
145
|
|
|
146
146
|
return rows.map((row) => {
|
|
147
|
-
const
|
|
147
|
+
const rawEnc = row.encrypted_value;
|
|
148
|
+
const enc = rawEnc instanceof Uint8Array ? Buffer.from(rawEnc) : Buffer.alloc(0);
|
|
148
149
|
let value;
|
|
149
150
|
try {
|
|
150
151
|
value = enc.length > 0 ? decryptCookie(enc, aesKey) : row.value;
|
|
@@ -154,7 +155,9 @@ function extractChromiumCookies(dbPath, domain) {
|
|
|
154
155
|
|
|
155
156
|
// Chrome timestamp: microseconds since 1601-01-01
|
|
156
157
|
const CHROME_EPOCH = 11644473600000000n;
|
|
157
|
-
const expiresUtc = row.expires_utc
|
|
158
|
+
const expiresUtc = typeof row.expires_utc === 'string' || typeof row.expires_utc === 'number'
|
|
159
|
+
? BigInt(row.expires_utc)
|
|
160
|
+
: 0n;
|
|
158
161
|
const expires = expiresUtc > 0n
|
|
159
162
|
? Number((expiresUtc - CHROME_EPOCH) / 1000000n)
|
|
160
163
|
: -1;
|
|
@@ -179,7 +182,7 @@ function extractChromiumCookies(dbPath, domain) {
|
|
|
179
182
|
* @returns {Array<object>} Cookies in CDP Network.setCookie format
|
|
180
183
|
*/
|
|
181
184
|
function extractFirefoxCookies(dbPath, domain) {
|
|
182
|
-
const db = new DatabaseSync(`file://${dbPath}?immutable=1`, {
|
|
185
|
+
const db = new DatabaseSync(`file://${dbPath}?immutable=1`, { readOnly: true });
|
|
183
186
|
|
|
184
187
|
let sql = `SELECT host, name, value, path, expiry, isSecure, isHttpOnly, sameSite
|
|
185
188
|
FROM moz_cookies`;
|
|
@@ -268,6 +271,22 @@ export async function injectCookies(session, cookies) {
|
|
|
268
271
|
}
|
|
269
272
|
}
|
|
270
273
|
|
|
274
|
+
/**
|
|
275
|
+
* RFC 6265 domain-match: does `host` belong to a cookie declared for
|
|
276
|
+
* `cookieDomain`? Leading dot on the cookie domain is ignored (host-only
|
|
277
|
+
* vs domain cookies are matched the same here, intentionally — we want
|
|
278
|
+
* parent-domain cookies like .google.com to apply to mail.google.com).
|
|
279
|
+
* @param {string} host - target hostname (e.g. 'mail.google.com')
|
|
280
|
+
* @param {string} cookieDomain - cookie's host_key (e.g. '.google.com')
|
|
281
|
+
* @returns {boolean}
|
|
282
|
+
*/
|
|
283
|
+
export function cookieDomainMatch(host, cookieDomain) {
|
|
284
|
+
const h = String(host).toLowerCase();
|
|
285
|
+
const d = String(cookieDomain).toLowerCase().replace(/^\./, '');
|
|
286
|
+
if (!d) return false;
|
|
287
|
+
return h === d || h.endsWith('.' + d);
|
|
288
|
+
}
|
|
289
|
+
|
|
271
290
|
/**
|
|
272
291
|
* Extract cookies for a URL and inject them into a CDP session.
|
|
273
292
|
* Convenience function combining extractCookies + injectCookies.
|
|
@@ -276,12 +295,18 @@ export async function injectCookies(session, cookies) {
|
|
|
276
295
|
* @param {object} [opts] - Options passed to extractCookies
|
|
277
296
|
*/
|
|
278
297
|
export async function authenticate(session, url, opts = {}) {
|
|
279
|
-
|
|
280
|
-
//
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
298
|
+
const fullHost = new URL(url).hostname.toLowerCase();
|
|
299
|
+
// Coarse SQL pre-filter: strip to a registrable-ish domain so the LIKE query
|
|
300
|
+
// returns a superset (incl. parent-domain cookies). slice(-2) is a cheap
|
|
301
|
+
// heuristic — it over-selects for multi-part eTLDs (co.uk) and as a substring
|
|
302
|
+
// match, so the precise RFC-6265 domain-match below is what actually decides
|
|
303
|
+
// which cookies get injected. Without it, browsing apple.com would inject
|
|
304
|
+
// cookies for apple.com.evil.org and every *.co.uk site (verified).
|
|
305
|
+
const noWww = fullHost.replace(/^www\./, '');
|
|
306
|
+
const parts = noWww.split('.');
|
|
307
|
+
const coarseDomain = parts.length > 2 ? parts.slice(-2).join('.') : noWww;
|
|
308
|
+
const candidates = extractCookies({ ...opts, domain: coarseDomain });
|
|
309
|
+
const cookies = candidates.filter((c) => cookieDomainMatch(fullHost, c.domain));
|
|
285
310
|
if (cookies.length > 0) {
|
|
286
311
|
await injectCookies(session, cookies);
|
|
287
312
|
}
|
package/src/bareagent.js
CHANGED
|
@@ -11,6 +11,8 @@
|
|
|
11
11
|
* 300ms settle delay after actions for DOM updates.
|
|
12
12
|
*/
|
|
13
13
|
|
|
14
|
+
/// <reference path="./wearehere.d.ts" />
|
|
15
|
+
|
|
14
16
|
import { browse, connect } from './index.js';
|
|
15
17
|
|
|
16
18
|
// Optional: privacy assessment via wearehere
|
|
@@ -22,6 +24,14 @@ try {
|
|
|
22
24
|
const SETTLE_MS = 300;
|
|
23
25
|
const settle = () => new Promise((r) => setTimeout(r, SETTLE_MS));
|
|
24
26
|
|
|
27
|
+
/**
|
|
28
|
+
* @typedef {object} BrowseTool
|
|
29
|
+
* @property {string} name
|
|
30
|
+
* @property {string} description
|
|
31
|
+
* @property {object} parameters - JSON-schema-shaped parameter spec
|
|
32
|
+
* @property {(args?: any) => Promise<any>} execute
|
|
33
|
+
*/
|
|
34
|
+
|
|
25
35
|
/**
|
|
26
36
|
* Create bareagent-compatible browse tools.
|
|
27
37
|
* @param {object} [opts] - Options passed to connect() for session tools
|
|
@@ -42,6 +52,7 @@ export function createBrowseTools(opts = {}) {
|
|
|
42
52
|
return await page.snapshot();
|
|
43
53
|
}
|
|
44
54
|
|
|
55
|
+
/** @type {BrowseTool[]} */
|
|
45
56
|
const tools = [
|
|
46
57
|
{
|
|
47
58
|
name: 'browse',
|
|
@@ -77,7 +88,7 @@ export function createBrowseTools(opts = {}) {
|
|
|
77
88
|
pruneMode: { type: 'string', enum: ['act', 'read'], description: '"act" (default) for interactive elements only; "read" for paragraphs and long text (articles/docs).' },
|
|
78
89
|
},
|
|
79
90
|
},
|
|
80
|
-
execute: async ({ pruneMode } = {}) => {
|
|
91
|
+
execute: async (/** @type {{ pruneMode?: string }} */ { pruneMode } = {}) => {
|
|
81
92
|
const page = await getPage();
|
|
82
93
|
return await page.snapshot(pruneMode ? { mode: pruneMode } : undefined);
|
|
83
94
|
},
|
|
@@ -231,7 +242,7 @@ export function createBrowseTools(opts = {}) {
|
|
|
231
242
|
landscape: { type: 'boolean', description: 'Landscape orientation (default: false)' },
|
|
232
243
|
},
|
|
233
244
|
},
|
|
234
|
-
execute: async ({ landscape } = {}) => {
|
|
245
|
+
execute: async (/** @type {{ landscape?: boolean }} */ { landscape } = {}) => {
|
|
235
246
|
const page = await getPage();
|
|
236
247
|
return await page.pdf({ landscape });
|
|
237
248
|
},
|
|
@@ -245,7 +256,7 @@ export function createBrowseTools(opts = {}) {
|
|
|
245
256
|
format: { type: 'string', enum: ['png', 'jpeg', 'webp'], description: 'Image format (default: png)' },
|
|
246
257
|
},
|
|
247
258
|
},
|
|
248
|
-
execute: async ({ format } = {}) => {
|
|
259
|
+
execute: async (/** @type {{ format?: string }} */ { format } = {}) => {
|
|
249
260
|
const page = await getPage();
|
|
250
261
|
return await page.screenshot({ format });
|
|
251
262
|
},
|
|
@@ -259,7 +270,7 @@ export function createBrowseTools(opts = {}) {
|
|
|
259
270
|
ignoreCache: { type: 'boolean', description: 'Bypass HTTP cache (hard reload). Default: false.' },
|
|
260
271
|
},
|
|
261
272
|
},
|
|
262
|
-
execute: async ({ ignoreCache } = {}) => actionAndSnapshot((page) => page.reload({ ignoreCache })),
|
|
273
|
+
execute: async (/** @type {{ ignoreCache?: boolean }} */ { ignoreCache } = {}) => actionAndSnapshot((page) => page.reload({ ignoreCache })),
|
|
263
274
|
},
|
|
264
275
|
{
|
|
265
276
|
name: 'wait_for',
|
|
@@ -272,7 +283,7 @@ export function createBrowseTools(opts = {}) {
|
|
|
272
283
|
timeout: { type: 'number', description: 'Timeout in ms (default: 30000)' },
|
|
273
284
|
},
|
|
274
285
|
},
|
|
275
|
-
execute: async ({ text, selector, timeout } = {}) => actionAndSnapshot((page) => page.waitFor({ text, selector, timeout })),
|
|
286
|
+
execute: async (/** @type {{ text?: string, selector?: string, timeout?: number }} */ { text, selector, timeout } = {}) => actionAndSnapshot((page) => page.waitFor({ text, selector, timeout })),
|
|
276
287
|
},
|
|
277
288
|
{
|
|
278
289
|
name: 'downloads',
|
package/src/cdp.js
CHANGED
|
@@ -12,7 +12,7 @@
|
|
|
12
12
|
/**
|
|
13
13
|
* Create a CDP client connected to the given WebSocket URL.
|
|
14
14
|
* @param {string} wsUrl - WebSocket URL (ws://127.0.0.1:PORT/devtools/...)
|
|
15
|
-
* @returns {Promise<
|
|
15
|
+
* @returns {Promise<object>} CDP client ({ send, on, once, session, close })
|
|
16
16
|
*/
|
|
17
17
|
export async function createCDP(wsUrl) {
|
|
18
18
|
const ws = new WebSocket(wsUrl);
|
|
@@ -20,7 +20,8 @@ export async function createCDP(wsUrl) {
|
|
|
20
20
|
const pending = new Map(); // id → { resolve, reject }
|
|
21
21
|
const listeners = new Map(); // "method" or "sessionId:method" → Set<callback>
|
|
22
22
|
|
|
23
|
-
|
|
23
|
+
/** @type {Promise<void>} */
|
|
24
|
+
const connected = new Promise((resolve, reject) => {
|
|
24
25
|
const timeout = setTimeout(() => reject(new Error('CDP connection timeout (5s)')), 5000);
|
|
25
26
|
ws.onopen = () => { clearTimeout(timeout); resolve(); };
|
|
26
27
|
ws.onerror = (e) => {
|
|
@@ -28,6 +29,7 @@ export async function createCDP(wsUrl) {
|
|
|
28
29
|
reject(new Error(`CDP WebSocket connection failed: ${e.message || 'unknown error'}`));
|
|
29
30
|
};
|
|
30
31
|
});
|
|
32
|
+
await connected;
|
|
31
33
|
|
|
32
34
|
ws.onmessage = (event) => {
|
|
33
35
|
const msg = JSON.parse(typeof event.data === 'string' ? event.data : event.data.toString());
|
package/src/chromium.js
CHANGED
|
@@ -5,9 +5,14 @@
|
|
|
5
5
|
* Modes: headless (launch new, no UI), headed (launch new, visible window).
|
|
6
6
|
*/
|
|
7
7
|
|
|
8
|
-
import {
|
|
8
|
+
import { execFileSync, spawn } from 'node:child_process';
|
|
9
9
|
import { existsSync, rmSync } from 'node:fs';
|
|
10
10
|
|
|
11
|
+
/** Block the current thread for `ms` without spawning a process. */
|
|
12
|
+
function sleepSync(ms) {
|
|
13
|
+
Atomics.wait(new Int32Array(new SharedArrayBuffer(4)), 0, 0, ms);
|
|
14
|
+
}
|
|
15
|
+
|
|
11
16
|
// Track launched browsers so we can clean them up if the parent crashes.
|
|
12
17
|
// Registered exit handlers (one-time) iterate this set on shutdown.
|
|
13
18
|
const activeBrowsers = new Set();
|
|
@@ -29,7 +34,7 @@ function reapAllSync() {
|
|
|
29
34
|
for (const b of toReap) {
|
|
30
35
|
for (let i = 0; i < 20; i++) {
|
|
31
36
|
try { process.kill(b.process.pid, 0); } catch { break; }
|
|
32
|
-
|
|
37
|
+
sleepSync(50);
|
|
33
38
|
}
|
|
34
39
|
if (b.ownedProfileDir) {
|
|
35
40
|
try { rmSync(b.ownedProfileDir, { recursive: true, force: true }); } catch {}
|
|
@@ -84,8 +89,11 @@ export function findBrowser() {
|
|
|
84
89
|
if (existsSync(candidate)) return candidate;
|
|
85
90
|
continue;
|
|
86
91
|
}
|
|
87
|
-
// Relative name —
|
|
88
|
-
const path =
|
|
92
|
+
// Relative name — resolve via `which` (execFile: no shell, no injection)
|
|
93
|
+
const path = execFileSync('which', [candidate], {
|
|
94
|
+
encoding: 'utf8',
|
|
95
|
+
stdio: ['ignore', 'pipe', 'ignore'],
|
|
96
|
+
}).trim();
|
|
89
97
|
if (path) return path;
|
|
90
98
|
} catch {
|
|
91
99
|
// Not found, try next
|
|
@@ -104,7 +112,8 @@ export function findBrowser() {
|
|
|
104
112
|
* @param {number} [opts.port=0] - CDP port (0 = random available port)
|
|
105
113
|
* @param {string} [opts.userDataDir] - Browser profile directory
|
|
106
114
|
* @param {boolean} [opts.headed=false] - Launch in headed mode (with visible window)
|
|
107
|
-
* @
|
|
115
|
+
* @param {string} [opts.proxy] - Proxy server (e.g. 'http://host:port')
|
|
116
|
+
* @returns {Promise<{wsUrl: string, process: import('node:child_process').ChildProcess, port: number}>}
|
|
108
117
|
*/
|
|
109
118
|
export async function launch(opts = {}) {
|
|
110
119
|
const binary = opts.binary || findBrowser();
|
|
@@ -227,6 +236,7 @@ export async function cleanupBrowser(browser) {
|
|
|
227
236
|
if (!browser) return;
|
|
228
237
|
activeBrowsers.delete(browser);
|
|
229
238
|
if (browser.process && !browser.process.killed && browser.process.exitCode === null) {
|
|
239
|
+
/** @type {Promise<void>} */
|
|
230
240
|
const exited = new Promise((resolve) => {
|
|
231
241
|
const timer = setTimeout(resolve, 2000);
|
|
232
242
|
browser.process.once('exit', () => { clearTimeout(timer); resolve(); });
|
|
@@ -274,7 +284,10 @@ export async function cleanupBrowser(browser) {
|
|
|
274
284
|
export async function getDebugUrl(port) {
|
|
275
285
|
const res = await fetch(`http://127.0.0.1:${port}/json/version`);
|
|
276
286
|
if (!res.ok) throw new Error(`Cannot reach browser debug port at ${port}: ${res.status}`);
|
|
277
|
-
const data = await res.json();
|
|
287
|
+
const data = /** @type {{ webSocketDebuggerUrl?: string }} */ (await res.json());
|
|
288
|
+
if (!data.webSocketDebuggerUrl) {
|
|
289
|
+
throw new Error(`Browser debug port at ${port} returned no webSocketDebuggerUrl`);
|
|
290
|
+
}
|
|
278
291
|
return data.webSocketDebuggerUrl;
|
|
279
292
|
}
|
|
280
293
|
|