npm - barebrowse - Versions diffs - 0.4.0 → 0.4.2 - Mend

barebrowse 0.4.0 → 0.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

package/.idea/barebrowse.iml +10 -0
package/.idea/inspectionProfiles/profiles_settings.xml +6 -0
package/.idea/misc.xml +4 -0
package/.idea/modules.xml +8 -0
package/.idea/vcs.xml +6 -0
package/CHANGELOG.md +34 -0
package/README.md +7 -7
package/barebrowse.context.md +17 -9
package/cli.js +2 -2
package/commands/barebrowse.md +134 -0
package/docs/00-context/system-state.md +20 -14
package/docs/03-logs/validation-log.md +146 -0
package/docs/04-process/testing.md +21 -0
package/docs/README.md +2 -1
package/docs/skill-template.md +3 -3
package/mcp-server.js +5 -4
package/package.json +1 -1
package/src/auth.js +17 -7
package/src/index.js +45 -15
/package/{.claude/skills → commands}/barebrowse/SKILL.md +0 -0

package/.idea/barebrowse.iml ADDED Viewed

@@ -0,0 +1,10 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<module type="PYTHON_MODULE" version="4">
+  <component name="NewModuleRootManager">
+    <content url="file://$MODULE_DIR$">
+      <sourceFolder url="file://$MODULE_DIR$/src" isTestSource="false" />
+    </content>
+    <orderEntry type="jdk" jdkName="Python 3.14" jdkType="Python SDK" />
+    <orderEntry type="sourceFolder" forTests="false" />
+  </component>
+</module>

package/.idea/inspectionProfiles/profiles_settings.xml ADDED Viewed

@@ -0,0 +1,6 @@
+<component name="InspectionProjectProfileManager">
+  <settings>
+    <option name="USE_PROJECT_PROFILE" value="false" />
+    <version value="1.0" />
+  </settings>
+</component>

package/.idea/misc.xml ADDED Viewed

@@ -0,0 +1,4 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.14" project-jdk-type="Python SDK" />
+</project>

package/.idea/modules.xml ADDED Viewed

@@ -0,0 +1,8 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectModuleManager">
+    <modules>
+      <module fileurl="file://$PROJECT_DIR$/.idea/barebrowse.iml" filepath="$PROJECT_DIR$/.idea/barebrowse.iml" />
+    </modules>
+  </component>
+</project>

package/.idea/vcs.xml ADDED Viewed

@@ -0,0 +1,6 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="VcsDirectoryMappings">
+    <mapping directory="" vcs="Git" />
+  </component>
+</project>

package/CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,39 @@
 # Changelog
+## 0.4.2
+Authenticated browsing improvements. MCP sessions now auto-inject cookies and fall back to headed mode when bot-detected.
+### MCP server
+- Session uses `mode: 'hybrid'` — headless by default, automatic headed fallback on challenge pages
+- `goto` tool now injects cookies from user's browsers before navigation (Chromium + Firefox merged)
+- Tool descriptions updated with trigger words for better agent tool selection
+### Cookie extraction (`src/auth.js`)
+- `extractCookies()` auto mode merges all browsers (Chromium + Firefox, last-write-wins by `name@domain`)
+- `authenticate()` strips subdomains (`mail.google.com` → `google.com`) so parent-domain cookies are included
+### Challenge detection (`src/index.js`)
+- `isChallengePage()` detects Reddit block pages ("prove your humanity", "file a ticket")
+- `connect()` hybrid fallback triggers on `goto()` when challenge detected
+### Skill files
+- New: `commands/barebrowse.md` — CLI command reference for non-Claude agents (same as SKILL.md)
+- Moved: `SKILL.md` from `.claude/skills/barebrowse/` to `commands/barebrowse/SKILL.md`
+- `install --skill` reads from new `commands/` path
+### Docs
+- README: MCP tool count 7→12, bareagent tools 9→13, skill install paths updated
+- barebrowse.context.md: v0.4.2, hybrid for connect(), MCP cookie injection
+- docs/00-context/system-state.md: bareagent 13 tools, CLI 27 commands, file map updated, published to npm
+- docs/03-logs/validation-log.md: full MCP validation results (Gmail, YouTube, LinkedIn, Reddit, Amazon, GitHub)
+## 0.4.1
+- Docs: testing guide updated with v0.4.0 manual validation table
+- Docs: barebrowse.context.md — CLI examples expanded, open flags listed, MCP tool count 7→12
+- Docs: validation-log.md — full manual test results for all 10 new features
 ## 0.4.0
 10 new features inspired by Playwright MCP. All validated manually against live sites.

package/README.md CHANGED Viewed

@@ -45,9 +45,9 @@ Outputs go to `.barebrowse/` as files -- agents read them with their file tools,
 **Teach your agent the commands** by installing the skill file (a markdown reference the agent reads as context). The CLI tool itself still needs `npm install barebrowse` -- the skill just teaches the agent how to use it.
-**Claude Code:** `.claude/skills/barebrowse/` (project) or `~/.claude/skills/barebrowse/` (global, via `barebrowse install --skill`).
+**Claude Code:** Copy `commands/barebrowse/SKILL.md` to `.claude/skills/barebrowse/SKILL.md` (project) or run `barebrowse install --skill` (global).
-**Other agents:** `.barebrowse/commands/` (project) or `~/.config/barebrowse/commands/` (global). Copy [SKILL.md](.claude/skills/barebrowse/SKILL.md) there.
+**Other agents:** Copy `commands/barebrowse.md` to your agent's command/skill directory.
 For writing your own skill files for other CLI tools: [docs/skill-template.md](docs/skill-template.md).
@@ -87,11 +87,11 @@ Or manually add to your config (`claude_desktop_config.json`, `.cursor/mcp.json`
 }
 ```
-7 tools: `browse`, `goto`, `snapshot`, `click`, `type`, `press`, `scroll`.
+12 tools: `browse`, `goto`, `snapshot`, `click`, `type`, `press`, `scroll`, `back`, `forward`, `drag`, `upload`, `pdf`. Session runs in hybrid mode with automatic cookie injection.
 ### 3. Library -- for agentic automation
-Import barebrowse in your agent code. One-shot reads, interactive sessions, full observe-think-act loops. Works with any LLM orchestration library. Ships with a ready-made adapter for [bareagent](https://www.npmjs.com/package/bare-agent) (9 tools, auto-snapshot after every action).
+Import barebrowse in your agent code. One-shot reads, interactive sessions, full observe-think-act loops. Works with any LLM orchestration library. Ships with a ready-made adapter for [bareagent](https://www.npmjs.com/package/bare-agent) (13 tools, auto-snapshot after every action).
 For code examples, API reference, and wiring instructions, see **[barebrowse.context.md](barebrowse.context.md)** -- the full integration guide.
@@ -115,13 +115,13 @@ This is the obstacle course your agent doesn't have to think about:
 | **Consent behind iframe overlay** | JS click via DOM.resolveNode bypasses z-index/overlay issues | Both |
 | **Permission prompts** (location, camera, mic) | Launch flags + CDP Browser.setPermission auto-deny | Both |
 | **Media autoplay blocked** | Autoplay policy flag on launch | Both |
-| **Login walls** | Cookie extraction from Firefox/Chromium, injected via CDP | Both |
+| **Login walls** | Cookie extraction from all browsers (Firefox + Chromium merged), injected via CDP | Both |
 | **Pre-filled form inputs** | Select-all + delete before typing | Both |
 | **Off-screen elements** | Scrolled into view before every click | Both |
 | **Form submission** | Enter key triggers onsubmit | Both |
 | **Tab between fields** | Tab key moves focus correctly | Both |
 | **SPA navigation** (YouTube, GitHub) | SPA-aware wait: frameNavigated + loadEventFired | Both |
-| **Bot detection** (Google, Reddit) | Stealth patches (headless) + headed fallback with real cookies | Both |
+| **Bot detection** (Google, Reddit) | Stealth patches (headless) + automatic headed fallback with real cookies | Hybrid |
 | **navigator.webdriver leak** | Patched before page scripts run: webdriver, plugins, languages, chrome object | Headless |
 | **JS dialogs** (alert/confirm/prompt) | Auto-dismiss via CDP, logged for inspection | Both |
 | **Profile locking** | Unique temp dir per headless instance | Headless |
@@ -148,7 +148,7 @@ Everything the agent can do through barebrowse:
 |--------|-------------|
 | **Navigate** | Load a URL, wait for page load, auto-dismiss consent |
 | **Back / Forward** | Browser history navigation |
-| **Snapshot** | Pruned ARIA tree with `[ref=N]` markers (40-90% token reduction) |
+| **Snapshot** | Pruned ARIA tree with `[ref=N]` markers. Two modes: `act` (buttons, links, inputs) and `read` (full text). 40-90% token reduction. |
 | **Click** | Scroll into view + mouse click at element center |
 | **Type** | Focus + insert text, with option to clear existing content first |
 | **Press** | Special keys: Enter, Tab, Escape, Backspace, Delete, arrows, Space |

package/barebrowse.context.md CHANGED Viewed

@@ -1,7 +1,7 @@
 # barebrowse -- Integration Guide
 > For AI assistants and developers wiring barebrowse into a project.
-> v0.3.0 | Node.js >= 22 | 0 required deps | MIT
+> v0.4.2 | Node.js >= 22 | 0 required deps | MIT
 ## What this is
@@ -51,7 +51,7 @@ const snapshot = await browse('https://example.com', {
 ## connect() API
-`connect(opts)` returns a page handle for interactive sessions. Same opts as `browse()` for mode/port.
+`connect(opts)` returns a page handle for interactive sessions. Same opts as `browse()` for mode/port. Supports `hybrid` mode — starts headless, falls back to headed on bot detection (same as `browse()`).
 | Method | Args | Returns | Notes |
 |---|---|---|---|
@@ -185,7 +185,7 @@ try {
 ```
 `createBrowseTools(opts)` returns:
-- `tools` -- array of bareagent-compatible tool objects (browse, goto, snapshot, click, type, press, scroll, select, screenshot)
+- `tools` -- array of bareagent-compatible tool objects (browse, goto, snapshot, click, type, press, scroll, select, back, forward, drag, upload, screenshot)
 - `close()` -- cleanup function, call when done
 Action tools (click, type, press, scroll, goto) auto-return a fresh snapshot so the LLM always sees the result. 300ms settle delay after actions for DOM updates.
@@ -199,14 +199,20 @@ barebrowse open https://example.com    # Start daemon + navigate
 barebrowse snapshot                    # → .barebrowse/page-<timestamp>.yml
 barebrowse click 8                     # Click element ref=8
 barebrowse type 12 hello world         # Type into element ref=12
-barebrowse screenshot                  # → .barebrowse/screenshot-<timestamp>.png
-barebrowse console-logs                # → .barebrowse/console-<timestamp>.json
+barebrowse back                        # Go back in history
+barebrowse upload 7 /path/to/file.pdf  # Upload file to file input
+barebrowse pdf                         # → .barebrowse/page-<timestamp>.pdf
+barebrowse wait-for --text="Success"   # Wait for content to appear
+barebrowse tabs                        # List open tabs
+barebrowse save-state                  # → .barebrowse/state-<timestamp>.json
 barebrowse close                       # Kill daemon + browser
 ```
-Session lifecycle: `open` spawns a background daemon holding a `connect()` session. Subsequent commands POST to the daemon over HTTP (localhost). `close` shuts everything down.
+**Open flags:** `--mode=headless|headed|hybrid`, `--proxy=URL`, `--viewport=WxH`, `--storage-state=FILE`, `--no-cookies`, `--browser=firefox|chromium`, `--timeout=N`
-Full command reference: `.claude/skills/barebrowse/SKILL.md`
+Session lifecycle: `open` spawns a background daemon holding a `connect()` session. Subsequent commands POST to the daemon over HTTP (localhost). `close` shuts everything down. JS dialogs (alert/confirm/prompt) are auto-dismissed and logged.
+Full command reference: `commands/barebrowse/SKILL.md` (Claude Code) or `commands/barebrowse.md` (other agents)
 ## MCP wrapper
@@ -228,11 +234,13 @@ barebrowse ships an MCP server for direct use with Claude Desktop, Cursor, or an
 }
 ```
-7 tools exposed: `browse` (one-shot), `goto`, `snapshot`, `click`, `type`, `press`, `scroll`.
+12 tools exposed: `browse` (one-shot), `goto`, `snapshot`, `click`, `type`, `press`, `scroll`, `back`, `forward`, `drag`, `upload`, `pdf`.
 Action tools return `'ok'` -- the agent calls `snapshot` explicitly to observe. This avoids double-token output since MCP tool calls are cheap to chain.
-Session tools (goto, snapshot, click, type, press, scroll) share a singleton page, lazy-created on first use.
+Session runs in hybrid mode (headless with automatic headed fallback on bot detection). `goto` injects cookies from the user's browser before navigation for authenticated access.
+Session tools share a singleton page, lazy-created on first use.
 ## Architecture

package/cli.js CHANGED Viewed

@@ -286,7 +286,7 @@ function install() {
 function installSkill() {
   const thisDir = fileURLToPath(new URL('.', import.meta.url));
-  const src = join(thisDir, '.claude', 'skills', 'barebrowse', 'SKILL.md');
+  const src = join(thisDir, 'commands', 'barebrowse', 'SKILL.md');
   if (!existsSync(src)) {
     console.error('SKILL.md not found in package. Reinstall barebrowse.');
@@ -408,6 +408,6 @@ MCP:
 As a library:
   import { browse, connect } from 'barebrowse';
-More: see README.md or barebrowse.context.md
+More: see README.md or commands/barebrowse.md
 `);
 }

package/commands/barebrowse.md ADDED Viewed

@@ -0,0 +1,134 @@
+---
+name: barebrowse
+description: Browser automation using the user's real browser with real cookies. Handles consent walls, login sessions, and bot detection automatically.
+allowed-tools: Bash(barebrowse:*)
+---
+# barebrowse CLI — Browser Automation for Agents
+Browse any URL using the user's real browser with real cookies. Returns pruned ARIA snapshots (40-90% smaller than raw) with `[ref=N]` markers for interaction. Handles cookie consent, login sessions, JS dialogs, and bot detection automatically.
+## Quick Start
+```bash
+barebrowse open https://example.com    # Start session + navigate
+barebrowse snapshot                    # Get ARIA snapshot → .barebrowse/page-*.yml
+barebrowse click 8                     # Click element with ref=8
+barebrowse snapshot                    # See result
+barebrowse close                       # End session
+```
+All output files go to `.barebrowse/` in the current directory. Read them with the Read tool when needed.
+## Commands
+### Session Lifecycle
+| Command | Description |
+|---------|-------------|
+| `barebrowse open [url] [flags]` | Start browser session. Optionally navigate to URL. |
+| `barebrowse close` | Close session and kill browser. |
+| `barebrowse status` | Check if session is running. |
+**Open flags:**
+- `--mode=headless|headed|hybrid` — Browser mode (default: headless)
+- `--no-cookies` — Skip cookie injection
+- `--browser=firefox|chromium` — Cookie source
+- `--prune-mode=act|read` — Default pruning mode
+- `--timeout=N` — Navigation timeout in ms
+- `--proxy=URL` — HTTP/SOCKS proxy server
+- `--viewport=WxH` — Viewport size (e.g. 1280x720)
+- `--storage-state=FILE` — Load cookies/localStorage from JSON file
+### Navigation
+| Command | Output |
+|---------|--------|
+| `barebrowse goto <url>` | Navigates, waits for load, dismisses consent. Prints "ok". |
+| `barebrowse back` | Go back in browser history. |
+| `barebrowse forward` | Go forward in browser history. |
+| `barebrowse snapshot` | ARIA snapshot → `.barebrowse/page-<timestamp>.yml` |
+| `barebrowse snapshot --mode=read` | Read mode: keeps all text (for content extraction) |
+| `barebrowse screenshot` | Screenshot → `.barebrowse/screenshot-<timestamp>.png` |
+| `barebrowse pdf [--landscape]` | PDF export → `.barebrowse/page-<timestamp>.pdf` |
+### Interaction
+| Command | Description |
+|---------|-------------|
+| `barebrowse click <ref>` | Click element (scrolls into view first) |
+| `barebrowse type <ref> <text>` | Type text into element |
+| `barebrowse fill <ref> <text>` | Clear existing content + type new text |
+| `barebrowse press <key>` | Press key: Enter, Tab, Escape, Backspace, Delete, arrows, Space |
+| `barebrowse scroll <deltaY>` | Scroll page (positive=down, negative=up) |
+| `barebrowse hover <ref>` | Hover over element (triggers tooltips) |
+| `barebrowse select <ref> <value>` | Select dropdown option |
+| `barebrowse drag <fromRef> <toRef>` | Drag element to another element |
+| `barebrowse upload <ref> <files..>` | Upload file(s) to a file input element |
+### Tabs
+| Command | Description |
+|---------|-------------|
+| `barebrowse tabs` | List open tabs (index, url, title) |
+| `barebrowse tab <index>` | Switch to tab by index |
+### Debugging
+| Command | Output |
+|---------|--------|
+| `barebrowse eval <expression>` | Evaluate JS in page, print result |
+| `barebrowse wait-idle` | Wait for network idle (no requests for 500ms) |
+| `barebrowse wait-for [opts]` | Wait for content to appear on page |
+| `barebrowse console-logs` | Console logs → `.barebrowse/console-<timestamp>.json` |
+| `barebrowse network-log` | Network log → `.barebrowse/network-<timestamp>.json` |
+| `barebrowse network-log --failed` | Only failed/4xx/5xx requests |
+| `barebrowse dialog-log` | JS dialog log → `.barebrowse/dialogs-<timestamp>.json` |
+| `barebrowse save-state` | Cookies + localStorage → `.barebrowse/state-<timestamp>.json` |
+**wait-for flags:**
+- `--text=STRING` — Wait for text to appear in page body
+- `--selector=CSS` — Wait for CSS selector to match
+- `--timeout=N` — Max wait time in ms (default: 30000)
+## Snapshot Format
+The snapshot is a YAML-like ARIA tree. Each line is one node:
+```
+- WebArea "Example Domain" [ref=1]
+  - heading "Example Domain" [level=1] [ref=3]
+  - paragraph [ref=5]
+    - StaticText "This domain is for use in illustrative examples." [ref=6]
+  - link "More information..." [ref=8]
+```
+- `[ref=N]` — Use this number with click, type, fill, hover, select, drag, upload
+- Refs change on every snapshot — always take a fresh snapshot before interacting
+- **act mode** (default): interactive elements + labels — for clicking, typing, navigating
+- **read mode**: all text content — for reading articles, extracting data
+## Workflow Pattern
+1. `barebrowse open <url>` — start session
+2. `barebrowse snapshot` — observe page (read the .yml file)
+3. Decide action based on snapshot content
+4. `barebrowse click/type/fill/press/scroll/drag/upload <ref>` — act
+5. `barebrowse snapshot` — observe result (refs are now different!)
+6. Repeat 3-5 until goal achieved
+7. `barebrowse close` — clean up
+## Tips
+- **Always snapshot before interacting** — refs are ephemeral and change every time
+- **Use `fill` instead of `type`** when replacing existing text in input fields
+- **Use `--mode=read`** for snapshot when you need to extract article content or data
+- **Use `back`/`forward`** to navigate browser history instead of re-entering URLs
+- **Use `upload`** for file inputs — pass absolute paths to the files
+- **Use `wait-for`** when content loads asynchronously — more reliable than `wait-idle`
+- **Check `dialog-log`** if JS alerts/confirms were auto-dismissed during your session
+- **Use `save-state`** to persist cookies/localStorage for later sessions via `--storage-state`
+- **Check `console-logs`** when page behavior seems wrong — JS errors show up there
+- **Check `network-log --failed`** to debug missing content or broken API calls
+- **Use `eval`** as an escape hatch when ARIA tree doesn't show what you need
+- **One session per project** — `.barebrowse/` is project-scoped
+- For bot-detected sites, use `--mode=headed` (requires browser with `--remote-debugging-port=9222`)

package/docs/00-context/system-state.md CHANGED Viewed

@@ -70,7 +70,7 @@ Every action returns a **pruned ARIA snapshot** -- the agent's view of the page
 | **Consent behind iframe overlay** | JS `.click()` via `DOM.resolveNode` bypasses z-index/overlay issues | Both |
 | **Permission prompts** (location, notifications, camera, mic) | Launch flags + CDP `Browser.setPermission` auto-deny | Both |
 | **Media autoplay blocked** | `--autoplay-policy=no-user-gesture-required` | Both |
-| **Login walls** | Firefox cookie extraction, CDP injection (user's real sessions) | Both |
+| **Login walls** | All-browser cookie merge (Firefox + Chromium), CDP injection (user's real sessions) | Both |
 | **Pre-filled form inputs** | `type({ clear: true })` selects all + deletes before typing | Both |
 | **Off-screen elements** | `DOM.scrollIntoViewIfNeeded` before every click | Both |
 | **Form submission** | `press('Enter')` with proper `text: '\r'` triggers onsubmit | Both |
@@ -86,8 +86,6 @@ Every action returns a **pruned ARIA snapshot** -- the agent's view of the page
 | Obstacle | What's Needed | Difficulty |
 |----------|--------------|------------|
-| File upload | `Input.setFiles` via CDP | Low |
-| Drag and drop | `Input.dispatchDragEvent` sequence | Medium |
 | Infinite scroll | Scroll + wait for new content strategy | Medium |
 | CAPTCHAs | Cannot solve -- headed mode lets user solve manually | N/A |
 | Cross-origin iframes | Frame tree traversal via CDP | Medium |
@@ -251,9 +249,10 @@ Chrome permission prompts (location, notifications, camera, mic, etc.) are suppr
 - No user prompt ever appears -- agents browse without interruption
 ### Cross-browser cookie injection -- done
-Firefox cookies (user's default browser) extracted from SQLite -> injected into headless or headed Chromium via CDP `Network.setCookie`. No need to use Chromium as daily browser.
-- `browse()`: auto-injects cookies before navigation (opt-out with `{ cookies: false }`)
+Auto mode merges cookies from all detected browsers (Chromium + Firefox, last-write-wins by name+domain). No need to use Chromium as daily browser.
+- `browse()`: auto-injects merged cookies before navigation (opt-out with `{ cookies: false }`)
 - `connect()`: manual injection via `page.injectCookies(url, { browser: 'firefox' })`
+- MCP `goto`: auto-injects cookies before every navigation
 - Proven: YouTube login session transferred from Firefox -> headed Chromium -> video playback
 ### Stealth patches -- done
@@ -292,7 +291,7 @@ const result = await loop.run(messages, tools);
 await close();
 ```
-9 tools: browse, goto, snapshot, click, type, press, scroll, select, screenshot.
+13 tools: browse, goto, snapshot, click, type, press, scroll, select, back, forward, drag, upload, screenshot.
 Action tools auto-return snapshot (300ms settle delay). The LLM always sees the result.
 ### MCP server
@@ -310,8 +309,9 @@ Raw JSON-RPC 2.0 over stdio. Zero SDK dependencies. `npm install barebrowse` the
 }
 ```
-7 tools: browse (one-shot), goto, snapshot, click, type, press, scroll.
+12 tools: browse (one-shot), goto, snapshot, click, type, press, scroll, back, forward, drag, upload, pdf.
 Action tools return `'ok'` -- agent calls `snapshot` explicitly (MCP tool calls are cheap to chain).
+Session runs in hybrid mode (headless + automatic headed fallback on bot detection). `goto` injects cookies from the user's browser before navigation.
 Session tools share a singleton page, lazy-created on first use.
 ### CLI session -- for coding agents + human devs
@@ -328,11 +328,11 @@ barebrowse close                       # Kill daemon + browser
 Architecture: `open` spawns a detached child process running an HTTP server on a random localhost port. Session state stored in `.barebrowse/session.json`. Subsequent commands POST to the daemon. `close` sends shutdown, daemon calls `page.close()` + `process.exit(0)`.
-Full commands: open, close, status, goto, snapshot, screenshot, click, type, fill, press, scroll, hover, select, eval, wait-idle, console-logs, network-log.
+Full commands: open, close, status, goto, back, forward, snapshot, screenshot, pdf, click, type, fill, press, scroll, hover, select, drag, upload, tabs, tab, eval, wait-idle, wait-for, console-logs, network-log, dialog-log, save-state.
 Self-sufficiency features (console/network capture, eval) let agents debug without guessing -- they see JS errors and failed requests directly.
-SKILL.md (`.claude/skills/barebrowse/SKILL.md`) teaches Claude Code the CLI commands. Install with `barebrowse install --skill`.
+SKILL.md (`commands/barebrowse/SKILL.md`) teaches Claude Code the CLI commands. Install with `barebrowse install --skill`.
 ---
@@ -353,7 +353,7 @@ barebrowse = the eyes + hands  (browse, read, interact with the web)
 - **Linux first.** Tested on Fedora/KDE. macOS/Windows cookie extraction paths exist in auth.js but untested.
 - **Node >= 22.** Built-in WebSocket, built-in SQLite.
 - **Not a server.** Library that agents import. Wrap as MCP (included) or HTTP if needed.
-- **Not cross-platform tested.** Local development only, not published to npm.
+- **Not cross-platform tested.** Tested on Linux only. Published to npm as `barebrowse`.
 ---
@@ -381,14 +381,20 @@ barebrowse/
 │   ├── headed-demo.js # Interactive demo: Wikipedia → DuckDuckGo
 │   └── yt-demo.js     # YouTube demo: Firefox cookies → search → play video
 ├── docs/
-│   ├── prd.md         # Decisions + rationale (reference)
-│   ├── poc-plan.md    # Original POC phases + DoD
-│   ├── blueprint.md   # This file
-│   └── testing.md     # Test guide: pyramid, all 54 tests, CI strategy
+│   ├── README.md             # Documentation navigation guide
+│   ├── 00-context/           # vision, assumptions, system-state (this file)
+│   ├── 01-product/           # prd.md
+│   ├── 03-logs/              # decisions, implementation, bugs, validation, insights
+│   ├── 04-process/           # dev-workflow, definition-of-done, testing (64 tests)
+│   └── archive/              # poc-plan.md
 ├── mcp-server.js      # MCP server (JSON-RPC 2.0 over stdio)
 ├── cli.js             # CLI entry: session commands, MCP, browse, install
 ├── .mcp.json          # MCP server config for Claude Desktop / Cursor
 ├── barebrowse.context.md  # LLM-consumable integration guide
+├── commands/
+│   ├── barebrowse.md         # CLI command reference (any agent)
+│   └── barebrowse/
+│       └── SKILL.md          # CLI command reference (Claude Code skill)
 ├── package.json
 ├── README.md
 └── CLAUDE.md

package/docs/03-logs/validation-log.md CHANGED Viewed

@@ -120,4 +120,150 @@ Full end-to-end validation of every CLI command against real websites.
 ---
+## New features manual validation (v0.4.0)
+All tested against live sites via CLI session from `/tmp`.
+### Navigation: back/forward
+| Command | Result |
+|---------|--------|
+| `open https://example.com` | Session started |
+| `goto https://wikipedia.org` | "ok" |
+| `back` | "ok" — returned to example.com |
+| `forward` | "ok" — returned to wikipedia.org |
+### File upload
+| Command | Result |
+|---------|--------|
+| `goto 'data:text/html,<input type="file" id="f"><script>...</script>'` | "ok" |
+| `snapshot` | `button "Choose File" [ref=7]` |
+| `upload 7 /tmp/test-upload.txt` | "ok" |
+| `eval 'document.title'` | `"uploaded"` — onchange fired, confirmed working |
+### PDF export
+| Command | Result |
+|---------|--------|
+| (on wikipedia.org) `pdf` | `.barebrowse/page-*.pdf` — 200,716 bytes |
+### Tabs
+| Command | Result |
+|---------|--------|
+| `tabs` | `[{"index":0,"url":"https://www.wikipedia.org/","title":"Wikipedia",...}, {"index":1,"url":"about:blank",...}]` |
+### Wait-for
+| Command | Result |
+|---------|--------|
+| `wait-for --text=Wikipedia` | "ok" — found text immediately |
+| `wait-for --selector=body` | "ok" — found selector immediately |
+### JS dialog auto-dismiss
+| Command | Result |
+|---------|--------|
+| `eval 'alert("hello from dialog"); "done"'` | `"done"` — alert auto-dismissed, eval continued |
+| `dialog-log` | `.barebrowse/dialogs-*.json (1 entries)` — dialog logged with type, message, timestamp |
+### Save state
+| Command | Result |
+|---------|--------|
+| `save-state` | `.barebrowse/state-*.json` — 2,836 bytes (cookies + localStorage) |
+### Viewport flag
+| Command | Result |
+|---------|--------|
+| `open https://example.com --viewport=800x600` | Session started |
+| `eval 'window.innerWidth + "x" + window.innerHeight'` | `"800x600"` — confirmed |
+### Drag (wired, needs drag-and-drop UI for visual test)
+Wired through interact.js → index.js → daemon.js → cli.js. Mouse event sequence: mousePressed at source → mouseMoved to midpoint → mouseMoved to target → mouseReleased at target. Requires a drag-and-drop UI to validate visually.
+### Proxy flag
+Wired through cli.js → daemon.js → chromium.js → `--proxy-server` Chromium launch arg. Requires a proxy server to validate.
+### Storage-state flag
+Wired through cli.js → daemon.js → connect() → `Network.setCookies` on startup. Loads from JSON file produced by `save-state`.
+---
+## MCP server validation (v0.4.1)
+All 12 MCP tools tested live via Claude Code MCP integration. Stats line (`# X chars → Y chars (N% pruned)`) confirmed on every snapshot.
+### Tools tested successfully (10/12)
+| Tool | Test | Result |
+|------|------|--------|
+| `browse` | One-shot HN | `51,397 → 26,983 (48% pruned)` — stats line present |
+| `goto` | DDG, Wikipedia, data: URLs | All navigated successfully |
+| `snapshot` | Multiple pages | Stats line on every snapshot, pruning working |
+| `click` | Wikipedia "About Wikipedia" link | Navigated to target page |
+| `type` | DDG search box `barebrowse npm` | Text entered correctly |
+| `press` | Enter to submit DDG search | Search submitted (CAPTCHA returned — expected headless) |
+| `scroll` | 500px down on Wikipedia:About | Scrolled successfully |
+| `back` | After Wikipedia:About → CDP page | Returned to previous page |
+| `forward` | After back | Returned to Wikipedia:About |
+| `pdf` | Wikipedia:About | 380K base64 PDF generated |
+### Tools tested with known limitations (2/12)
+| Tool | Test | Result |
+|------|------|--------|
+| `upload` | data: page with file input | `ok` returned, file set via DOM.setFileInputFiles. onchange fires but result text pruned in act mode (non-interactive content). Works in integration tests. |
+| `drag` | data: page with draggable divs | Mouse events dispatched but HTML5 drag/drop dataTransfer not populated via CDP synthetic events. Known CDP limitation (same as Playwright). |
+### Observations
+- DDG returned CAPTCHA in headless ("Select all squares containing a duck") — expected, hybrid mode handles this
+- Stats line format: `# 42,367 chars → 5,453 chars (87% pruned)` — present on all pruned snapshots
+- Token reduction ranges observed: 37% (Wikipedia) to 88% (example.com)
+---
+## MCP cookies + hybrid fallback validation (v0.4.2)
+Three changes tested: all-browser cookie merge in auth.js, hybrid mode for connect(), cookie injection + hybrid in MCP goto.
+### Cookie injection — login-walled sites via MCP goto
+| Site | Logged In? | Details |
+|------|-----------|---------|
+| **Gmail** | Yes | Full inbox visible: Compose, labels, 4 emails. Required domain-stripping fix (`mail.google.com` → `google.com`) to capture parent-domain cookies (SID, HSID, etc.). 47 cookies merged from Firefox + Chromium. |
+| **YouTube** | Yes | Personalized feed: tabs for Linux, AI, Electrical Engineering. Recommendations include Claude Code videos, KDE Plasma. Account buttons visible. |
+| **LinkedIn** | Yes | Full feed as Amr Hassan: Home, My Network, Jobs, Messaging, Notifications. Posts visible. Stealth patches + cookies bypassed LinkedIn's aggressive bot detection. |
+| **Amazon.nl** | No (expected) | Not logged in but consent dismissed, search + product pages worked. Cookie injection had no effect (no Amazon session in Firefox). |
+| **GitHub** | No | Shows generic homepage with "Sign in". No GitHub session cookies in Firefox. |
+### Bot detection — hybrid fallback
+| Site | Headless Result | Hybrid Fallback | Final Result |
+|------|----------------|-----------------|--------------|
+| **Google Search** | Full results, no CAPTCHA | Not triggered (stealth sufficient) | Pass — logged in as Amr Hassan |
+| **Reddit** | "Prove your humanity" + reCAPTCHA | Triggered → connected to headed Chromium on 9222 | Pass — full feed with posts, logged in |
+| **LinkedIn** | Loaded fine with stealth + cookies | Not triggered | Pass |
+### Bug fixes discovered during validation
+1. **Domain stripping in authenticate()**: `mail.google.com` extracted only 9 cookies (subdomain-specific). Fix: strip to registrable domain (`google.com`) → 47 cookies including all auth cookies (SID, HSID, SSID, APISID, SAPISID).
+2. **Reddit challenge detection**: Block page shows "Prove your humanity" and "File a ticket" — neither matched existing challenge phrases. Added both to `isChallengePage()`.
+### connect() hybrid mode
+Tested `connect({ mode: 'hybrid' })` with Reddit: headless detected challenge → killed browser → connected to headed Chromium → Reddit loaded with full content. Same code path as MCP session.
+### All-browser cookie merge
+`extractCookies({ domain: 'google.com' })` in auto mode: Chromium cookies merged first, then Firefox cookies (last-write-wins by `name@domain`). 47 cookies total for google.com. Previous behavior: stopped at first browser found (Chromium only, missed Firefox session).
+---
 *Add new validation entries when testing against new sites or features.*

package/docs/04-process/testing.md CHANGED Viewed

@@ -152,6 +152,27 @@ Tests real interactions: clicking, typing, scrolling, form submission, and navig
 ---
+## Manual validation (v0.4.0 features)
+Features added in v0.4.0 are manually validated but not yet in the automated test suite. See `docs/03-logs/validation-log.md` for full results.
+| Feature | Validation method | Result |
+|---------|-------------------|--------|
+| `back` / `forward` | example.com → wikipedia → back → forward | ok |
+| `upload <ref> <files..>` | data: URL with file input, verified onchange fired | ok |
+| `pdf` | Wikipedia export, 200KB PDF | ok |
+| `tabs` | Listed 2 tabs with urls/titles | ok |
+| `wait-for --text` | Found "Wikipedia" text | ok |
+| `wait-for --selector` | Found `body` selector | ok |
+| `dialog-log` | alert() auto-dismissed, 1 entry logged | ok |
+| `save-state` | 2.8KB cookies + localStorage JSON | ok |
+| `--viewport=WxH` | 800x600, confirmed via innerWidth/innerHeight | ok |
+| `drag` | Wired through all layers, needs drag UI to visually test |
+| `--proxy` | Wired to Chromium launch arg, needs proxy to test |
+| `--storage-state` | Wired to Network.setCookies, loads from save-state output |
+---
 ## Writing new tests
 Follow the existing pattern:

package/docs/README.md CHANGED Viewed

@@ -50,6 +50,7 @@
 |------|---------|
 | `README.md` | Public-facing project overview |
 | `barebrowse.context.md` | LLM-consumable integration guide (full API, gotchas, wiring) |
-| `.claude/skills/barebrowse/SKILL.md` | CLI command reference + Claude Code skill definition |
+| `commands/barebrowse.md` | CLI command reference for any agent (same as SKILL.md without frontmatter) |
+| `commands/barebrowse/SKILL.md` | CLI command reference for Claude Code (copy to `.claude/skills/`) |
 | `CHANGELOG.md` | Detailed version-by-version changelog |
 | `CLAUDE.md` | AI agent instructions for this project |

package/docs/skill-template.md CHANGED Viewed

@@ -92,15 +92,15 @@ Both require the underlying tool to be installed. Choose based on your agent's c
 ## Example: barebrowse
-barebrowse ships its skill at `.claude/skills/barebrowse/SKILL.md`:
+barebrowse ships its skill at `commands/barebrowse/SKILL.md`:
 ```bash
 # Claude Code — project
-cp node_modules/barebrowse/.claude/skills/barebrowse/SKILL.md .claude/skills/barebrowse/SKILL.md
+cp node_modules/barebrowse/commands/barebrowse/SKILL.md .claude/skills/barebrowse/SKILL.md
 # Claude Code — global
 barebrowse install --skill
 # Other agents — project or global
-cp node_modules/barebrowse/.claude/skills/barebrowse/SKILL.md .barebrowse/commands/SKILL.md
+cp node_modules/barebrowse/commands/barebrowse/SKILL.md .barebrowse/commands/SKILL.md
 ```

package/mcp-server.js CHANGED Viewed

@@ -14,14 +14,14 @@ import { browse, connect } from './src/index.js';
 let _page = null;
 async function getPage() {
-  if (!_page) _page = await connect();
+  if (!_page) _page = await connect({ mode: 'hybrid' });
   return _page;
 }
 const TOOLS = [
   {
     name: 'browse',
-    description: 'One-shot: navigate to a URL and return a pruned ARIA snapshot. Stateless — does not use the session page.',
+    description: 'Browse a URL in a real browser. Use instead of web fetch when the page needs JavaScript, login cookies, consent dismissal, or bot detection. Returns a pruned ARIA snapshot with [ref=N] markers for interaction. Stateless — does not use the session page.',
     inputSchema: {
       type: 'object',
       properties: {
@@ -33,7 +33,7 @@ const TOOLS = [
   },
   {
     name: 'goto',
-    description: 'Navigate the session page to a URL. Returns ok — call snapshot to observe.',
+    description: 'Navigate the session page to a URL. Injects cookies from the user\'s browser for authenticated access. Returns ok — call snapshot to observe.',
     inputSchema: {
       type: 'object',
       properties: {
@@ -146,6 +146,7 @@ async function handleToolCall(name, args) {
     case 'goto': {
       const page = await getPage();
+      try { await page.injectCookies(args.url); } catch {}
       await page.goto(args.url);
       return 'ok';
     }
@@ -217,7 +218,7 @@ async function handleMessage(msg) {
     return jsonrpcResponse(id, {
       protocolVersion: '2024-11-05',
       capabilities: { tools: {} },
-      serverInfo: { name: 'barebrowse', version: '0.2.2' },
+      serverInfo: { name: 'barebrowse', version: '0.4.2' },
     });
   }

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "barebrowse",
-  "version": "0.4.0",
+  "version": "0.4.2",
   "description": "Authenticated web browsing for autonomous agents via CDP. URL in, pruned ARIA snapshot out.",
   "type": "module",
   "main": "src/index.js",

package/src/auth.js CHANGED Viewed

@@ -232,14 +232,20 @@ export function extractCookies(opts = {}) {
     return extractChromiumCookies(path, domain);
   }
-  // Auto-detect: try Chromium browsers first, then Firefox
+  // Auto: try all browsers, merge (last-write-wins by name+domain)
+  const all = new Map();
   const chromium = findChromiumCookieDb();
-  if (chromium) return extractChromiumCookies(chromium.path, domain);
+  if (chromium) {
+    for (const c of extractChromiumCookies(chromium.path, domain))
+      all.set(`${c.name}@${c.domain}`, c);
+  }
   const firefox = findFirefoxCookieDb();
-  if (firefox) return extractFirefoxCookies(firefox, domain);
-  throw new Error('No browser cookie database found');
+  if (firefox) {
+    for (const c of extractFirefoxCookies(firefox, domain))
+      all.set(`${c.name}@${c.domain}`, c);
+  }
+  if (all.size === 0) throw new Error('No browser cookie database found');
+  return [...all.values()];
 }
 /**
@@ -270,7 +276,11 @@ export async function injectCookies(session, cookies) {
  * @param {object} [opts] - Options passed to extractCookies
  */
 export async function authenticate(session, url, opts = {}) {
-  const domain = new URL(url).hostname.replace(/^www\./, '');
+  // Strip to registrable domain so mail.google.com → google.com
+  // This ensures parent-domain cookies (.google.com) are included
+  const hostname = new URL(url).hostname.replace(/^www\./, '');
+  const parts = hostname.split('.');
+  const domain = parts.length > 2 ? parts.slice(-2).join('.') : hostname;
   const cookies = extractCookies({ ...opts, domain });
   if (cookies.length > 0) {
     await injectCookies(session, cookies);

package/src/index.js CHANGED Viewed

@@ -95,13 +95,16 @@ export async function browse(url, opts = {}) {
     }
     // Step 6: Prune for agent consumption
+    const raw = formatTree(tree);
     let snapshot;
     if (opts.prune !== false) {
       const pruned = pruneTree(tree, { mode: opts.pruneMode || 'act' });
       snapshot = formatTree(pruned);
     } else {
-      snapshot = formatTree(tree);
+      snapshot = raw;
     }
+    const stats = `# ${raw.length.toLocaleString()} chars → ${snapshot.length.toLocaleString()} chars (${Math.round((1 - snapshot.length / raw.length) * 100)}% pruned)`;
+    snapshot = stats + '\n' + snapshot;
     // Step 7: Clean up
     await cdp.send('Target.closeTarget', { targetId: page.targetId });
@@ -117,7 +120,7 @@ export async function browse(url, opts = {}) {
  * Connect to a browser for a long-lived interactive session.
  *
  * @param {object} [opts]
- * @param {'headless'|'headed'} [opts.mode='headless'] - Browser mode
+ * @param {'headless'|'headed'|'hybrid'} [opts.mode='headless'] - Browser mode
  * @param {number} [opts.port=9222] - CDP port for headed mode
  * @returns {Promise<object>} Page handle with goto, snapshot, close
  */
@@ -135,7 +138,7 @@ export async function connect(opts = {}) {
     cdp = await createCDP(browser.wsUrl);
   }
-  const page = await createPage(cdp, mode !== 'headed', { viewport: opts.viewport });
+  let page = await createPage(cdp, mode !== 'headed', { viewport: opts.viewport });
   let refMap = new Map();
   // Suppress permission prompts for all modes
@@ -154,17 +157,20 @@ export async function connect(opts = {}) {
   // Auto-dismiss JS dialogs (alert, confirm, prompt)
   const dialogLog = [];
-  page.session.on('Page.javascriptDialogOpening', async (params) => {
-    dialogLog.push({
-      type: params.type,
-      message: params.message,
-      timestamp: new Date().toISOString(),
-    });
-    await page.session.send('Page.handleJavaScriptDialog', {
-      accept: params.type !== 'beforeunload',
-      promptText: params.defaultPrompt || '',
+  function setupDialogHandler(session) {
+    session.on('Page.javascriptDialogOpening', async (params) => {
+      dialogLog.push({
+        type: params.type,
+        message: params.message,
+        timestamp: new Date().toISOString(),
+      });
+      await session.send('Page.handleJavaScriptDialog', {
+        accept: params.type !== 'beforeunload',
+        promptText: params.defaultPrompt || '',
+      });
     });
-  });
+  }
+  setupDialogHandler(page.session);
   return {
     async goto(url, timeout = 30000) {
@@ -172,6 +178,25 @@ export async function connect(opts = {}) {
       if (opts.consent !== false) {
         await dismissConsent(page.session);
       }
+      // Hybrid fallback: if bot-blocked, retry with headed browser
+      if (mode === 'hybrid') {
+        const { tree } = await ariaTree(page);
+        if (isChallengePage(tree)) {
+          await cdp.send('Target.closeTarget', { targetId: page.targetId });
+          cdp.close();
+          if (browser) { browser.process.kill(); browser = null; }
+          const port = opts.port || 9222;
+          const wsUrl = await getDebugUrl(port);
+          cdp = await createCDP(wsUrl);
+          page = await createPage(cdp, false, { viewport: opts.viewport });
+          setupDialogHandler(page.session);
+          await suppressPermissions(cdp);
+          await navigate(page, url, timeout);
+          if (opts.consent !== false) await dismissConsent(page.session);
+        }
+      }
     },
     async goBack() {
@@ -195,9 +220,12 @@ export async function connect(opts = {}) {
     async snapshot(pruneOpts) {
       const result = await ariaTree(page);
       refMap = result.refMap;
-      if (pruneOpts === false) return formatTree(result.tree);
+      const raw = formatTree(result.tree);
+      if (pruneOpts === false) return raw;
       const pruned = pruneTree(result.tree, { mode: pruneOpts?.mode || 'act' });
-      return formatTree(pruned);
+      const out = formatTree(pruned);
+      const stats = `# ${raw.length.toLocaleString()} chars → ${out.length.toLocaleString()} chars (${Math.round((1 - out.length / raw.length) * 100)}% pruned)`;
+      return stats + '\n' + out;
     },
     async click(ref) {
@@ -541,7 +569,9 @@ function isChallengePage(tree) {
     'checking your browser',
     'please wait',
     'verify you are human',
+    'prove your humanity',
     'attention required',
+    'file a ticket',
   ];
   const lower = text.toLowerCase();
   return challengePhrases.some((p) => lower.includes(p));

/package/{.claude/skills → commands}/barebrowse/SKILL.md RENAMED Viewed

File without changes