eyebrowse 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- eyebrowse-0.1.0/.env.example +26 -0
- eyebrowse-0.1.0/.gitattributes +12 -0
- eyebrowse-0.1.0/.gitignore +45 -0
- eyebrowse-0.1.0/CLAUDE.md +156 -0
- eyebrowse-0.1.0/LICENSE +21 -0
- eyebrowse-0.1.0/PKG-INFO +287 -0
- eyebrowse-0.1.0/README.md +253 -0
- eyebrowse-0.1.0/docs/TOOLS.md +132 -0
- eyebrowse-0.1.0/docs/demo.gif +0 -0
- eyebrowse-0.1.0/docs/demo.mp4 +0 -0
- eyebrowse-0.1.0/examples/direct_usage.py +76 -0
- eyebrowse-0.1.0/examples/make_demo.py +246 -0
- eyebrowse-0.1.0/examples/record_demo.py +39 -0
- eyebrowse-0.1.0/eyebrowse/__init__.py +34 -0
- eyebrowse-0.1.0/eyebrowse/api.py +293 -0
- eyebrowse-0.1.0/eyebrowse/captcha/__init__.py +43 -0
- eyebrowse-0.1.0/eyebrowse/captcha/base.py +107 -0
- eyebrowse-0.1.0/eyebrowse/captcha/capmonster.py +11 -0
- eyebrowse-0.1.0/eyebrowse/captcha/capsolver.py +14 -0
- eyebrowse-0.1.0/eyebrowse/captcha/inject.py +60 -0
- eyebrowse-0.1.0/eyebrowse/captcha/nextcaptcha.py +11 -0
- eyebrowse-0.1.0/eyebrowse/captcha/twocaptcha.py +11 -0
- eyebrowse-0.1.0/eyebrowse/config.py +69 -0
- eyebrowse-0.1.0/eyebrowse/engine/__init__.py +7 -0
- eyebrowse-0.1.0/eyebrowse/engine/camoufox_engine.py +166 -0
- eyebrowse-0.1.0/eyebrowse/engine/session.py +900 -0
- eyebrowse-0.1.0/eyebrowse/extract.py +38 -0
- eyebrowse-0.1.0/eyebrowse/identity.py +62 -0
- eyebrowse-0.1.0/eyebrowse/mcp/__init__.py +5 -0
- eyebrowse-0.1.0/eyebrowse/mcp/server.py +46 -0
- eyebrowse-0.1.0/eyebrowse/mcp/state.py +22 -0
- eyebrowse-0.1.0/eyebrowse/mcp/tools/__init__.py +40 -0
- eyebrowse-0.1.0/eyebrowse/mcp/tools/captcha.py +50 -0
- eyebrowse-0.1.0/eyebrowse/mcp/tools/cookies.py +56 -0
- eyebrowse-0.1.0/eyebrowse/mcp/tools/devtools.py +77 -0
- eyebrowse-0.1.0/eyebrowse/mcp/tools/emulate.py +46 -0
- eyebrowse-0.1.0/eyebrowse/mcp/tools/extract.py +18 -0
- eyebrowse-0.1.0/eyebrowse/mcp/tools/identity.py +50 -0
- eyebrowse-0.1.0/eyebrowse/mcp/tools/interact.py +194 -0
- eyebrowse-0.1.0/eyebrowse/mcp/tools/mouse.py +52 -0
- eyebrowse-0.1.0/eyebrowse/mcp/tools/navigate.py +72 -0
- eyebrowse-0.1.0/eyebrowse/mcp/tools/netcontrol.py +42 -0
- eyebrowse-0.1.0/eyebrowse/mcp/tools/network.py +44 -0
- eyebrowse-0.1.0/eyebrowse/mcp/tools/observe.py +76 -0
- eyebrowse-0.1.0/eyebrowse/mcp/tools/sessions.py +67 -0
- eyebrowse-0.1.0/eyebrowse/mcp/tools/state_tools.py +39 -0
- eyebrowse-0.1.0/eyebrowse/mcp/tools/verify.py +30 -0
- eyebrowse-0.1.0/eyebrowse/mcp/tools/webstorage.py +70 -0
- eyebrowse-0.1.0/eyebrowse/proxy.py +78 -0
- eyebrowse-0.1.0/eyebrowse/snapshot.py +138 -0
- eyebrowse-0.1.0/pyproject.toml +64 -0
- eyebrowse-0.1.0/setup.ps1 +86 -0
- eyebrowse-0.1.0/uv.lock +1853 -0
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
# ──────────────────────────────────────────────────────────────────────────
|
|
2
|
+
# EyeBrowse secrets & config. Copy to `.env` (which is gitignored) and fill in.
|
|
3
|
+
# Loaded by pydantic-settings. Vars without the EYEBROWSE_ prefix (the captcha
|
|
4
|
+
# keys) are read by name to match each provider's conventional env var.
|
|
5
|
+
# ──────────────────────────────────────────────────────────────────────────
|
|
6
|
+
|
|
7
|
+
# ── Proxy (residential / ISP recommended; datacenter IPs trigger trust penalties) ──
|
|
8
|
+
EYEBROWSE_PROXY_SERVER=
|
|
9
|
+
EYEBROWSE_PROXY_USERNAME=
|
|
10
|
+
EYEBROWSE_PROXY_PASSWORD=
|
|
11
|
+
|
|
12
|
+
# ── Captcha solver API keys (API-mode only; no browser extension) ──
|
|
13
|
+
# Fill in the provider(s) you actually use.
|
|
14
|
+
CAPSOLVER_API_KEY=
|
|
15
|
+
TWOCAPTCHA_API_KEY=
|
|
16
|
+
CAPMONSTER_API_KEY=
|
|
17
|
+
NEXTCAPTCHA_API_KEY=
|
|
18
|
+
# Default provider when browser_solve_captcha is called without one:
|
|
19
|
+
# capsolver | twocaptcha | capmonster | nextcaptcha
|
|
20
|
+
EYEBROWSE_CAPTCHA_PROVIDER=capsolver
|
|
21
|
+
|
|
22
|
+
# ── Stealth defaults (engine already defaults these; uncomment to override) ──
|
|
23
|
+
# EYEBROWSE_HEADLESS=false
|
|
24
|
+
# EYEBROWSE_HUMANIZE=true
|
|
25
|
+
# EYEBROWSE_GEOIP=true
|
|
26
|
+
# EYEBROWSE_BLOCK_WEBRTC=true
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
# Normalize line endings. Text files are stored LF in the repo; checked out
|
|
2
|
+
# per-platform. Windows shell scripts MUST be CRLF on checkout — Windows
|
|
3
|
+
# PowerShell 5.1 fails to parse here-strings (and other constructs) when a
|
|
4
|
+
# .ps1 has LF-only endings.
|
|
5
|
+
* text=auto
|
|
6
|
+
|
|
7
|
+
*.ps1 text eol=crlf
|
|
8
|
+
*.bat text eol=crlf
|
|
9
|
+
*.cmd text eol=crlf
|
|
10
|
+
|
|
11
|
+
# Keep shell scripts LF so they run on Linux/macOS.
|
|
12
|
+
*.sh text eol=lf
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
# ── Python ──
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[cod]
|
|
4
|
+
*.egg-info/
|
|
5
|
+
.eggs/
|
|
6
|
+
dist/
|
|
7
|
+
build/
|
|
8
|
+
|
|
9
|
+
# ── Virtualenv / uv ──
|
|
10
|
+
.venv/
|
|
11
|
+
venv/
|
|
12
|
+
|
|
13
|
+
# ── Secrets / env (keep the example) ──
|
|
14
|
+
.env
|
|
15
|
+
*.env
|
|
16
|
+
!.env.example
|
|
17
|
+
# Never commit private keys / certs
|
|
18
|
+
*.pem
|
|
19
|
+
*.key
|
|
20
|
+
*.pfx
|
|
21
|
+
*.p12
|
|
22
|
+
|
|
23
|
+
# ── Reference clones (read-only, NOT vendored) ──
|
|
24
|
+
reference/
|
|
25
|
+
|
|
26
|
+
# ── Runtime data: browser profiles, HAR captures, recordings, traces, extraction ──
|
|
27
|
+
# (Generated artifacts default to data/. Curated demo media for the README lives in
|
|
28
|
+
# docs/ and is intentionally NOT ignored.)
|
|
29
|
+
profiles/
|
|
30
|
+
user_data/
|
|
31
|
+
data/
|
|
32
|
+
*.har
|
|
33
|
+
|
|
34
|
+
# ── Caches ──
|
|
35
|
+
.cache/
|
|
36
|
+
.pytest_cache/
|
|
37
|
+
.ruff_cache/
|
|
38
|
+
.mypy_cache/
|
|
39
|
+
.dmypy.json
|
|
40
|
+
|
|
41
|
+
# ── OS / editor ──
|
|
42
|
+
.DS_Store
|
|
43
|
+
Thumbs.db
|
|
44
|
+
.idea/
|
|
45
|
+
.vscode/
|
|
@@ -0,0 +1,156 @@
|
|
|
1
|
+
# CLAUDE.md — EyeBrowse build notes
|
|
2
|
+
|
|
3
|
+
Guidance for working in this repo. EyeBrowse is **Phase 1: a stealthy, LLM-drivable
|
|
4
|
+
browser-control engine** (Camoufox) consumed two ways from one codebase: as a Python
|
|
5
|
+
library (`from eyebrowse import EyeBrowse`) and over MCP (FastMCP, stdio). The engine
|
|
6
|
+
holds no workflow logic — consumers decide *what* to do; it provides *what's possible*.
|
|
7
|
+
|
|
8
|
+
## Layout
|
|
9
|
+
|
|
10
|
+
```
|
|
11
|
+
eyebrowse/
|
|
12
|
+
api.py EyeBrowse façade — the single public entry point
|
|
13
|
+
config.py pydantic-settings (proxy, captcha keys, stealth defaults)
|
|
14
|
+
snapshot.py aria_snapshot(mode="ai") + aria-ref= locator resolution
|
|
15
|
+
proxy.py ProxyConfig + ProxyProvider (pluggable rotation)
|
|
16
|
+
identity.py Identity + random_identity() (fingerprint + profile)
|
|
17
|
+
extract.py Crawl4AI raw: feed → markdown (no LLM; lazy, optional `extract` extra)
|
|
18
|
+
engine/
|
|
19
|
+
camoufox_engine.py launch primitive (sequential-launch semaphore, HAR rule)
|
|
20
|
+
session.py Session (verbs) + SessionRegistry (current-session)
|
|
21
|
+
captcha/ base (Anti-Captcha-style polling) + 4 providers + inject.py
|
|
22
|
+
mcp/
|
|
23
|
+
server.py FastMCP entrypoint (lifespan holds one EyeBrowse), main()
|
|
24
|
+
state.py process-wide engine handle
|
|
25
|
+
tools/ 16 tool-group modules (1:1 over the façade) = 69 tools
|
|
26
|
+
examples/direct_usage.py library usage proof (no MCP)
|
|
27
|
+
docs/TOOLS.md full per-tool reference (generated from the live server)
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
## Run
|
|
31
|
+
|
|
32
|
+
```bash
|
|
33
|
+
uv sync # core engine
|
|
34
|
+
uv sync --extra extract # + Crawl4AI (heavier; only needed for extract())
|
|
35
|
+
uv run python -m camoufox fetch # pinned Firefox binary + GeoIP db
|
|
36
|
+
uv run python examples/direct_usage.py # verify the library path
|
|
37
|
+
uv run eyebrowse-mcp # run the MCP server (stdio)
|
|
38
|
+
claude mcp add eyebrowse uv run eyebrowse-mcp # register with Claude Code
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
## Pinned versions (lockstep matters)
|
|
42
|
+
|
|
43
|
+
* `camoufox==0.4.11` → Firefox **v135.0.1-beta.24** (safely below the v146 Brotli bug
|
|
44
|
+
that renders an empty DOM on `content-encoding: br`). Re-run `python -m camoufox fetch`
|
|
45
|
+
after any camoufox bump.
|
|
46
|
+
* `playwright==1.60.0` (pulled by camoufox), `mcp==1.27.2`. Python 3.12 (pinned `<3.13`).
|
|
47
|
+
|
|
48
|
+
## Verified API facts (current installs — supersede the original research report)
|
|
49
|
+
|
|
50
|
+
* **Snapshot/refs:** `page.accessibility` and `Page._snapshot_for_ai` are **gone** in
|
|
51
|
+
Playwright 1.60. Use **`await page.aria_snapshot(mode="ai")`** → YAML ARIA tree with
|
|
52
|
+
`[ref=eN]` handles, and resolve with **`page.locator("aria-ref=eN")`**. This is the
|
|
53
|
+
whole LLM-drivable interaction model (see `snapshot.py`). No DOM injection needed.
|
|
54
|
+
* **Screenshot:** return Playwright's raw PNG bytes via `Image(data=bytes, format="png")`
|
|
55
|
+
— never `PIL.tobytes()` (raw pixels → corrupt image).
|
|
56
|
+
* **Camoufox:** `AsyncCamoufox(**launch_options)` accepts pass-through kwargs
|
|
57
|
+
(`headless`, `humanize`, `geoip`, `block_webrtc`, `proxy`, `os`, `screen`,
|
|
58
|
+
`persistent_context`, `user_data_dir`, `disable_coop`). Yields a **Browser** when
|
|
59
|
+
ephemeral, a **BrowserContext** when `persistent_context=True`.
|
|
60
|
+
|
|
61
|
+
## Key constraints (encoded in code — don't "simplify" away)
|
|
62
|
+
|
|
63
|
+
* **Sequential context creation.** Parallel `new_context()` deadlocks Camoufox's Juggler
|
|
64
|
+
pipe → launch is serialized behind a semaphore in `CamoufoxEngine` (interactions after
|
|
65
|
+
are parallel-safe).
|
|
66
|
+
* **HAR ⇒ ephemeral.** `launch_persistent_context` strips `record_har_*`. HAR is only set
|
|
67
|
+
on `browser.new_context(record_har_*)`, and Playwright only flushes it on context close,
|
|
68
|
+
so `export_har()` **closes the session**. HAR + a logged-in profile ⇒ upstream mitmproxy.
|
|
69
|
+
* **Captcha = API-mode only.** No browser extension (Chromium-oriented, unstable in
|
|
70
|
+
Firefox, raises bot-score). Solvers fetch a token over HTTP; `captcha/inject.py` writes
|
|
71
|
+
it into the response field and overrides `turnstile/grecaptcha.getResponse`.
|
|
72
|
+
* **Stealth defaults:** `geoip=True`, `humanize=True`, `block_webrtc=True`. For identity
|
|
73
|
+
rotation, rotate IP + geo (geoip) + fingerprint (fresh profile) together.
|
|
74
|
+
* **Windows:** runs headful/headless natively (no Xvfb). The `1x1x24` Xvfb patch is a
|
|
75
|
+
Linux/Docker concern, deferred.
|
|
76
|
+
|
|
77
|
+
## MCP tool surface — 78 tools (grouped in `mcp/tools/`)
|
|
78
|
+
|
|
79
|
+
- **sessions**: `browser_new_session` / `browser_close_session` / `browser_list_sessions`
|
|
80
|
+
- **navigate**: `browser_navigate` / `browser_navigate_back` / `browser_navigate_forward` / `browser_reload` / `browser_tabs` / `browser_switch_to_popup`
|
|
81
|
+
- **observe**: `browser_snapshot` / `browser_snapshot_frame` / `browser_screenshot` / `browser_resize` / `browser_console_messages` / `browser_wait_for_download`
|
|
82
|
+
- **interact**: `browser_click` / `browser_type` / `browser_keyboard_type` / `browser_fill_form` / `browser_hover` / `browser_select_option` / `browser_press_key` / `browser_drag` / `browser_file_upload` / `browser_handle_dialog` / `browser_wait_for` / `browser_evaluate` / `browser_scroll` / `browser_scroll_to_bottom`
|
|
83
|
+
- **mouse (coordinate)**: `browser_mouse_move` / `browser_mouse_click` / `browser_mouse_down` / `browser_mouse_up` / `browser_mouse_wheel` / `browser_mouse_drag`
|
|
84
|
+
- **network**: `browser_network_requests` / `browser_network_request` / `browser_ws_messages`
|
|
85
|
+
- **netcontrol**: `browser_block_urls` / `browser_unblock_urls` / `browser_set_offline` / `browser_mock_url`
|
|
86
|
+
- **cookies**: `browser_cookie_list` / `browser_cookie_get` / `browser_cookie_set` / `browser_cookie_delete` / `browser_cookie_clear`
|
|
87
|
+
- **localStorage**: `browser_localstorage_list` / `_get` / `_set` / `_remove` / `_clear`
|
|
88
|
+
- **sessionStorage**: `browser_sessionstorage_list` / `_get` / `_set` / `_remove` / `_clear`
|
|
89
|
+
- **state**: `browser_storage_state` / `browser_har_export`
|
|
90
|
+
- **identity / proxy**: `browser_new_identity` / `browser_set_proxy`
|
|
91
|
+
- **verify**: `browser_verify_element_visible` / `browser_verify_element_hidden` / `browser_verify_text_visible` / `browser_verify_value`
|
|
92
|
+
- **devtools**: `browser_highlight` / `browser_clear_highlights` / `browser_generate_locator` / `browser_start_tracing` / `browser_stop_tracing` / `browser_start_recording` / `browser_stop_recording` (→ animated GIF)
|
|
93
|
+
- **emulate**: `browser_set_geolocation` / `browser_set_extra_headers` / `browser_grant_permissions`
|
|
94
|
+
- **captcha**: `browser_solve_captcha` / `browser_totp_generate` · **extract**: `browser_extract`
|
|
95
|
+
|
|
96
|
+
## Camoufox behavior notes (verified — affect implementation choices)
|
|
97
|
+
|
|
98
|
+
* **`page.evaluate` runs in an ISOLATED world.** It can read/write the DOM, but cannot see
|
|
99
|
+
`window.*` globals set by the page's own scripts (`window.__appState` → returns null).
|
|
100
|
+
Use DOM, network inspection, or HAR to read app state — not page-global JS vars.
|
|
101
|
+
* **Cross-origin iframes:** `aria_snapshot(mode="ai")` assigns frame-prefixed refs like
|
|
102
|
+
`f1e36`. **`fN` is `"f"+frame.seq`** (a creation-order counter), **NOT** an index into
|
|
103
|
+
`page.frames` (which is DFS order) — they diverge on dynamic/nested pages, so never do
|
|
104
|
+
`page.frames[N]`. `ref_locator` hands the whole ref to Playwright's `aria-ref=` engine,
|
|
105
|
+
which jumps to the frame by seq via `_jumpToAriaRefFrameIfNeeded`
|
|
106
|
+
(`frameManager.frames().find(f => f.seq === N)`). For a real `Frame` (evaluate /
|
|
107
|
+
snapshot_frame) we resolve an element via aria-ref then read `owner_frame()` /
|
|
108
|
+
`content_frame()` — see `snapshot.py:frame_for_ref`. `browser_evaluate(frame_ref=...)`
|
|
109
|
+
runs JS **inside** the frame, so it works on CROSS-ORIGIN frames (the frame's own
|
|
110
|
+
context is reachable; only top-frame `contentDocument` access is blocked). If the page
|
|
111
|
+
snapshot collapses a frame to empty (usually a load-timing issue), `wait_for(network_idle)`
|
|
112
|
+
then re-snapshot, or use `browser_snapshot_frame('f1')`. Nested frames (`f1f2eM`) are
|
|
113
|
+
NOT supported — Playwright's own ref regex is single-level `^f\d+e\d+$`.
|
|
114
|
+
* **Shadow DOM:** `aria_snapshot` traverses open shadow roots automatically via the
|
|
115
|
+
accessibility tree. Shadow-hosted elements get plain `eN` refs; `aria-ref=eN` resolves
|
|
116
|
+
through shadow boundaries natively — no extra handling needed.
|
|
117
|
+
* **Rich text editors (TipTap, Quill, ProseMirror):** `fill()` is a no-op on
|
|
118
|
+
`contenteditable` divs. Pattern: `browser_mouse_click(x, y)` to focus the editor →
|
|
119
|
+
`browser_keyboard_type(text)` to insert. `browser_type` with a ref also works if the
|
|
120
|
+
snapshot returns a meaningful ref for the editor element.
|
|
121
|
+
* **History navigation (reload / back / forward) isn't reported to Playwright** by Firefox's
|
|
122
|
+
Juggler, so `Session` drives them via `goto` + an internal navigate() URL history (see
|
|
123
|
+
`session.py`). Caveat: back/forward only traverse navigate()-driven history, not
|
|
124
|
+
click-driven navigations.
|
|
125
|
+
* **Coordinate mouse works** (`page.mouse.*` dispatches real events); element/ref clicks also
|
|
126
|
+
work. Both are supported.
|
|
127
|
+
* **Viewport** defaults to maximize-to-the-randomized-spoofed-screen (large screenshots, full
|
|
128
|
+
per-session fingerprint entropy, `inner <= screen`). Pin a fixed size via
|
|
129
|
+
`EYEBROWSE_VIEWPORT_WIDTH/HEIGHT` only if you need predictable dimensions.
|
|
130
|
+
* **HAR export closes the session** (Playwright flushes the buffer on context close only).
|
|
131
|
+
Non-destructive checkpoint pattern: `browser_storage_state` → `browser_har_export` →
|
|
132
|
+
`browser_new_session(storage_state=...)`.
|
|
133
|
+
* **Not available on Firefox/Camoufox** (Chromium-only / non-functional): `page.pdf()`,
|
|
134
|
+
media emulation (`prefers-color-scheme` via `setEmulatedMedia`), and **native video**
|
|
135
|
+
(`record_video_dir` yields a `page.video` path but Camoufox never writes the file) —
|
|
136
|
+
intentionally omitted rather than shipped broken. For demos/README, use
|
|
137
|
+
`browser_start_recording`/`browser_stop_recording` instead: it captures viewport frames and
|
|
138
|
+
encodes them with **ffmpeg** into a smooth real-time **MP4/WebM/GIF** (format from the path
|
|
139
|
+
extension; `also_gif=True` emits a sibling GIF). ffmpeg resolves from the bundled
|
|
140
|
+
`imageio-ffmpeg` (preferred — modern, has `palettegen`/`libx264`) or a system ffmpeg on PATH;
|
|
141
|
+
falls back to a Pillow GIF only if neither is present. See `examples/record_demo.py`.
|
|
142
|
+
Caveat: real capture rate is capped by `page.screenshot()` speed (~10-20 fps); the output is
|
|
143
|
+
encoded at the requested fps (default 30) in real time. A true-fps OS-capture mode (ffmpeg
|
|
144
|
+
`gdigrab`, headful) is a possible future addition.
|
|
145
|
+
|
|
146
|
+
## Secrets
|
|
147
|
+
|
|
148
|
+
`.env` (gitignored; see `.env.example`). Proxy + stealth settings use the `EYEBROWSE_`
|
|
149
|
+
prefix; captcha keys use provider-conventional names (`CAPSOLVER_API_KEY`, …). Never
|
|
150
|
+
hardcode. Extraction is markdown-only and uses **no LLM** — so no LLM provider keys are
|
|
151
|
+
ever read (the consuming agent does any structuring).
|
|
152
|
+
|
|
153
|
+
## Conventions
|
|
154
|
+
|
|
155
|
+
Library-first: add capability to the façade/engine, then expose it as a thin MCP tool
|
|
156
|
+
(no logic in the tool). Keep `Session` the home of per-session verbs. Async throughout.
|
eyebrowse-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 EyeBrowse contributors
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
eyebrowse-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,287 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: eyebrowse
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: EyeBrowse — a stealthy, LLM-drivable browser-control engine (Camoufox), consumed as a library and over MCP.
|
|
5
|
+
Project-URL: Homepage, https://github.com/Evil-Bane/eyebrowse
|
|
6
|
+
Project-URL: Repository, https://github.com/Evil-Bane/eyebrowse
|
|
7
|
+
Project-URL: Issues, https://github.com/Evil-Bane/eyebrowse/issues
|
|
8
|
+
Author-email: Evil-Bane <72240505+Evil-Bane@users.noreply.github.com>
|
|
9
|
+
License: MIT
|
|
10
|
+
License-File: LICENSE
|
|
11
|
+
Keywords: agent,anti-detection,browser-automation,camoufox,firefox,llm,mcp,model-context-protocol,playwright,stealth,web-scraping
|
|
12
|
+
Classifier: Development Status :: 4 - Beta
|
|
13
|
+
Classifier: Intended Audience :: Developers
|
|
14
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
15
|
+
Classifier: Operating System :: OS Independent
|
|
16
|
+
Classifier: Programming Language :: Python :: 3
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
18
|
+
Classifier: Topic :: Internet :: WWW/HTTP :: Browsers
|
|
19
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
20
|
+
Classifier: Topic :: Software Development :: Testing
|
|
21
|
+
Requires-Python: <3.13,>=3.12
|
|
22
|
+
Requires-Dist: camoufox[geoip]==0.4.11
|
|
23
|
+
Requires-Dist: httpx>=0.27
|
|
24
|
+
Requires-Dist: imageio-ffmpeg>=0.5
|
|
25
|
+
Requires-Dist: mcp>=1.2.0
|
|
26
|
+
Requires-Dist: pillow>=10.0
|
|
27
|
+
Requires-Dist: playwright<2,>=1.40
|
|
28
|
+
Requires-Dist: pydantic-settings>=2.2
|
|
29
|
+
Requires-Dist: pydantic>=2.6
|
|
30
|
+
Requires-Dist: pyotp>=2.9
|
|
31
|
+
Provides-Extra: extract
|
|
32
|
+
Requires-Dist: crawl4ai>=0.4.0; extra == 'extract'
|
|
33
|
+
Description-Content-Type: text/markdown
|
|
34
|
+
|
|
35
|
+
<div align="center">
|
|
36
|
+
|
|
37
|
+
# 🦊 EyeBrowse
|
|
38
|
+
|
|
39
|
+
### A stealthy, LLM-drivable browser engine — one codebase, two faces.
|
|
40
|
+
|
|
41
|
+
A **Python library** *and* an **MCP server** for driving a real, hard-to-detect browser
|
|
42
|
+
— Camoufox, a Firefox fork with C++-level fingerprint spoofing — so legitimate automation
|
|
43
|
+
isn't false-flagged or IP-banned by Cloudflare, DataDome, Akamai, or PerimeterX.
|
|
44
|
+
|
|
45
|
+
[](https://pypi.org/project/eyebrowse/)
|
|
46
|
+
[](https://www.python.org/)
|
|
47
|
+
[](LICENSE)
|
|
48
|
+
[](docs/TOOLS.md)
|
|
49
|
+
[-FF6611.svg)](https://github.com/daijro/camoufox)
|
|
50
|
+
|
|
51
|
+

|
|
52
|
+
|
|
53
|
+
<sub>▶ Higher-quality MP4: <a href="https://github.com/Evil-Bane/eyebrowse/blob/master/docs/demo.mp4">docs/demo.mp4</a> — real EyeBrowse captures, composed into an ad by <code>examples/make_demo.py</code></sub>
|
|
54
|
+
|
|
55
|
+
</div>
|
|
56
|
+
|
|
57
|
+
---
|
|
58
|
+
|
|
59
|
+
## Why EyeBrowse?
|
|
60
|
+
|
|
61
|
+
- 🥷 **Stealth by default** — engine-level fingerprint spoofing (`geoip` + `humanize` + `block_webrtc` on out of the box); `navigator.webdriver` masked; viewport auto-sized to the spoofed screen. No `puppeteer-extra` band-aids — the anti-detection is *in the browser*.
|
|
62
|
+
- 🤖 **Built for LLMs** — pages are read as an **ARIA tree with `[ref=…]` handles**; the model acts by ref (`click`/`type`/`hover`), not by brittle CSS or raw pixels. Cross-origin iframes, shadow DOM, popups — handled.
|
|
63
|
+
- 🧰 **Library *and* MCP from one codebase** — a clean Python API (`EyeBrowse` + `Session`), mirrored 1:1 by a thin **MCP server** (**78 `browser_*` tools**) for Claude Code and any MCP client.
|
|
64
|
+
- 🪟 **Never boxed in** — the curated high-level API doesn't hide Playwright: reach `session.page` / `.context` / `.browser` for anything it doesn't wrap.
|
|
65
|
+
- 🔋 **Batteries included** — multi-session, proxy + identity rotation, API-mode captcha solvers, full HAR capture, screen recording (MP4/GIF), and clean-markdown extraction.
|
|
66
|
+
|
|
67
|
+
> **Scope.** This repo is **Phase 1: the browser engine.** It holds **no workflow logic** —
|
|
68
|
+
> consumers decide *what* to do; the engine provides *what's possible*. (A Phase-2 agent is a
|
|
69
|
+
> separate effort.)
|
|
70
|
+
|
|
71
|
+
## Contents
|
|
72
|
+
|
|
73
|
+
[Quickstart](#quickstart) · [Install](#install) · [Features](#features) · [Library](#use-as-a-library) · [MCP](#use-over-mcp) · [Proxy & identity](#proxy--identity-optional) · [Extraction](#extraction) · [Recording](#recording) · [How it works](#how-it-works) · [Caveats](#camoufox-caveats) · [Tools](docs/TOOLS.md) · [License](#license)
|
|
74
|
+
|
|
75
|
+
## Quickstart
|
|
76
|
+
|
|
77
|
+
```bash
|
|
78
|
+
pip install eyebrowse
|
|
79
|
+
python -m camoufox fetch # one-time: download the stealth Firefox + GeoIP db
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
```python
|
|
83
|
+
import asyncio
|
|
84
|
+
from eyebrowse import EyeBrowse
|
|
85
|
+
|
|
86
|
+
async def main():
|
|
87
|
+
eb = EyeBrowse() # stealth defaults: geoip · humanize · block_webrtc
|
|
88
|
+
async with eb.session() as s:
|
|
89
|
+
await s.navigate("https://example.com")
|
|
90
|
+
print(await s.snapshot()) # ARIA tree with [ref=...] handles
|
|
91
|
+
await s.click("e6") # act on a ref from the snapshot
|
|
92
|
+
await eb.aclose()
|
|
93
|
+
|
|
94
|
+
asyncio.run(main())
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
…or wire it into **Claude Code** (or any MCP client) — see [Use over MCP](#use-over-mcp).
|
|
98
|
+
|
|
99
|
+
## Install
|
|
100
|
+
|
|
101
|
+
**From PyPI**
|
|
102
|
+
|
|
103
|
+
```bash
|
|
104
|
+
pip install eyebrowse # or: uv pip install eyebrowse
|
|
105
|
+
python -m camoufox fetch # one-time browser fetch (like Playwright's `playwright install`)
|
|
106
|
+
pip install "eyebrowse[extract]" # optional: + Crawl4AI markdown extraction (heavier)
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
**From source (development)**
|
|
110
|
+
|
|
111
|
+
```bash
|
|
112
|
+
git clone https://github.com/Evil-Bane/eyebrowse && cd eyebrowse
|
|
113
|
+
uv sync # core engine (add --extra extract for Crawl4AI)
|
|
114
|
+
uv run python -m camoufox fetch
|
|
115
|
+
cp .env.example .env # only if you use a proxy / captcha keys
|
|
116
|
+
```
|
|
117
|
+
|
|
118
|
+
Python 3.12 (pinned `<3.13`). Pinned engine: `camoufox==0.4.11` (Firefox v135), `playwright 1.60`, `mcp 1.27`.
|
|
119
|
+
|
|
120
|
+
## Features
|
|
121
|
+
|
|
122
|
+
| | |
|
|
123
|
+
|---|---|
|
|
124
|
+
| 🥷 **Stealth** | Camoufox engine-level fingerprint spoofing; `geoip` + `humanize` + `block_webrtc` by default; `webdriver` masked; viewport matched to the spoofed screen. |
|
|
125
|
+
| 🤖 **LLM interaction** | `aria_snapshot(mode="ai")` → ARIA tree + `[ref]` handles; click / type / hover / select / drag / file-upload / dialogs / keyboard; coordinate mouse too. |
|
|
126
|
+
| 🪟 **Frames & DOM** | cross-origin iframe routing by ref, shadow-DOM piercing, popup/new-tab switching, `evaluate` inside any frame. |
|
|
127
|
+
| 🗂 **Multi-session** | independent stealth sessions, each with its own context / identity / proxy. |
|
|
128
|
+
| 🌐 **Network** | inspect requests/responses (incl. XHR/fetch bodies & WebSocket frames), block URLs, mock responses, go offline, full **HAR** export. |
|
|
129
|
+
| 💾 **State** | cookies, localStorage & sessionStorage (CRUD), `storage_state` save/reload. |
|
|
130
|
+
| 🪪 **Identity rotation** | fresh fingerprint (OS/screen) + isolated profile + paired proxy; pluggable residential `ProxyProvider`. |
|
|
131
|
+
| 🧩 **Captcha** | pluggable **API-mode** solvers (CapSolver / 2Captcha / CapMonster / NextCaptcha) + TOTP — no browser extension. |
|
|
132
|
+
| 📄 **Extraction** | Crawl4AI `raw:` feed → clean, token-efficient **markdown** (no LLM, no API keys). |
|
|
133
|
+
| 🎥 **Capture** | screenshots, Playwright tracing, and **screen recording → MP4 / WebM / GIF** (ffmpeg). |
|
|
134
|
+
| ✅ **Verify & debug** | assertions, element highlighting, locator generation, geolocation/header emulation. |
|
|
135
|
+
|
|
136
|
+
Full per-tool reference: **[docs/TOOLS.md](docs/TOOLS.md)** (78 tools across 17 groups).
|
|
137
|
+
|
|
138
|
+
## Use as a library
|
|
139
|
+
|
|
140
|
+
```python
|
|
141
|
+
import asyncio
|
|
142
|
+
from eyebrowse import EyeBrowse
|
|
143
|
+
|
|
144
|
+
async def main():
|
|
145
|
+
eb = EyeBrowse() # stealth defaults
|
|
146
|
+
try:
|
|
147
|
+
async with eb.session() as s: # a stealth session (auto-closed)
|
|
148
|
+
await s.navigate("https://example.com")
|
|
149
|
+
print(await s.snapshot()) # ARIA tree with [ref=...] handles
|
|
150
|
+
await s.click("e6") # act on a ref
|
|
151
|
+
await s.type("e8", "hello", submit=True)
|
|
152
|
+
png = await s.screenshot(full_page=True)
|
|
153
|
+
title = await s.page.title() # full Playwright power when you need it
|
|
154
|
+
finally:
|
|
155
|
+
await eb.aclose()
|
|
156
|
+
|
|
157
|
+
asyncio.run(main())
|
|
158
|
+
```
|
|
159
|
+
|
|
160
|
+
Run the included proof: `uv run python examples/direct_usage.py`.
|
|
161
|
+
|
|
162
|
+
## Use over MCP
|
|
163
|
+
|
|
164
|
+
EyeBrowse ships an MCP server (`eyebrowse-mcp`, FastMCP over stdio). Add it to any MCP client.
|
|
165
|
+
|
|
166
|
+
**Claude Code (CLI):**
|
|
167
|
+
|
|
168
|
+
```bash
|
|
169
|
+
claude mcp add eyebrowse -- eyebrowse-mcp
|
|
170
|
+
```
|
|
171
|
+
|
|
172
|
+
**Any MCP client (JSON config):**
|
|
173
|
+
|
|
174
|
+
```json
|
|
175
|
+
{
|
|
176
|
+
"mcpServers": {
|
|
177
|
+
"eyebrowse": {
|
|
178
|
+
"command": "eyebrowse-mcp"
|
|
179
|
+
}
|
|
180
|
+
}
|
|
181
|
+
}
|
|
182
|
+
```
|
|
183
|
+
|
|
184
|
+
Then drive the loop: `browser_navigate(url)` → read the snapshot → act by ref
|
|
185
|
+
(`browser_click` / `browser_type` / …). A default session is auto-created, so most tools just
|
|
186
|
+
work. Full list: **[docs/TOOLS.md](docs/TOOLS.md)**.
|
|
187
|
+
|
|
188
|
+
## Proxy & identity (optional)
|
|
189
|
+
|
|
190
|
+
Runs **proxyless by default** (`geoip` still aligns locale/timezone to your real IP). Add a proxy
|
|
191
|
+
only when you want one:
|
|
192
|
+
|
|
193
|
+
```python
|
|
194
|
+
await eb.new_session(proxy="http://user:pass@residential.example:8080")
|
|
195
|
+
await eb.rotate_identity(proxy="socks5://host:1080") # fresh fingerprint + paired IP
|
|
196
|
+
await eb.new_session(no_proxy=True) # force proxyless
|
|
197
|
+
```
|
|
198
|
+
|
|
199
|
+
Set a default once via `EYEBROWSE_PROXY_*` in `.env`, `eb.set_static_proxy(...)`, or a custom
|
|
200
|
+
`ProxyProvider` for rotation. Over MCP: `browser_new_session(proxy_url=…)` /
|
|
201
|
+
`browser_new_identity(proxy_url=…)` / `browser_set_proxy(…)`.
|
|
202
|
+
|
|
203
|
+
## Extraction
|
|
204
|
+
|
|
205
|
+
`eb.extract()` (or `browser_extract`) hands the rendered HTML to Crawl4AI's `raw:` feed and
|
|
206
|
+
returns clean, pruned **markdown** — **no LLM is called and no LLM keys are ever read**; the
|
|
207
|
+
consuming agent does any structuring.
|
|
208
|
+
|
|
209
|
+
```python
|
|
210
|
+
md = await eb.extract() # markdown string
|
|
211
|
+
res = await eb.extract(output_path="data/page.md") # → {"path": ..., "chars": ...}
|
|
212
|
+
```
|
|
213
|
+
|
|
214
|
+
## Recording
|
|
215
|
+
|
|
216
|
+
Camoufox can't write native browser video, so EyeBrowse rolls its own recorder: it captures
|
|
217
|
+
viewport frames and encodes them with **ffmpeg** into a smooth, real-time **MP4/WebM** (and a
|
|
218
|
+
palette-optimized **GIF** that autoplays inline on GitHub). ffmpeg ships bundled
|
|
219
|
+
(`imageio-ffmpeg`), so it works out of the box.
|
|
220
|
+
|
|
221
|
+
```python
|
|
222
|
+
await s.start_recording(fps=30)
|
|
223
|
+
# ... drive the browser ...
|
|
224
|
+
await s.stop_recording("demo.mp4", extra_paths=["demo.gif"])
|
|
225
|
+
```
|
|
226
|
+
|
|
227
|
+
The demo at the top was produced this way — see **`examples/make_demo.py`**.
|
|
228
|
+
|
|
229
|
+
## How it works
|
|
230
|
+
|
|
231
|
+
```
|
|
232
|
+
CONSUMERS ENGINE (library: eyebrowse/)
|
|
233
|
+
Claude Code ──MCP──▶ mcp/ ──▶ EyeBrowse façade (public API)
|
|
234
|
+
your code ─ import ──────────▶ ├─ Camoufox engine (stealth context, HAR)
|
|
235
|
+
any MCP client ├─ proxy / identity rotation (pluggable)
|
|
236
|
+
├─ captcha solvers (pluggable, API-mode)
|
|
237
|
+
└─ Crawl4AI (raw: feed) → clean markdown
|
|
238
|
+
```
|
|
239
|
+
|
|
240
|
+
The façade (`EyeBrowse` + `Session`) is the product; the MCP adapter is a thin 1:1 wrapper over
|
|
241
|
+
it. The high-level API is curated and LLM-friendly — *not* a reimplementation of all of Playwright
|
|
242
|
+
— and the raw `page` / `context` / `browser` objects are always one attribute away.
|
|
243
|
+
|
|
244
|
+
## Camoufox caveats
|
|
245
|
+
|
|
246
|
+
These shape the API — worth knowing:
|
|
247
|
+
|
|
248
|
+
- **`evaluate` runs in an isolated world** — it sees the DOM but **not** page-script `window.*`
|
|
249
|
+
globals. Read app state via DOM / network / HAR.
|
|
250
|
+
- **reload / back / forward** aren't reported by Firefox's Juggler, so they're driven via `goto` +
|
|
251
|
+
an internal navigate() history (they traverse navigate()-driven history, not click-driven).
|
|
252
|
+
- **Not available on Firefox/Camoufox** (intentionally omitted, not broken): `page.pdf()`, media
|
|
253
|
+
emulation, and native Playwright video — use the **ffmpeg recorder** instead.
|
|
254
|
+
|
|
255
|
+
## Project layout
|
|
256
|
+
|
|
257
|
+
```
|
|
258
|
+
eyebrowse/
|
|
259
|
+
api.py EyeBrowse façade — the single public entry point
|
|
260
|
+
config.py settings / secrets (pydantic-settings)
|
|
261
|
+
snapshot.py aria_snapshot(mode="ai") + aria-ref= resolution
|
|
262
|
+
proxy.py ProxyConfig + pluggable ProxyProvider
|
|
263
|
+
identity.py Identity + random_identity()
|
|
264
|
+
extract.py Crawl4AI raw: feed → markdown (lazy, optional dep)
|
|
265
|
+
engine/ camoufox_engine.py (launch) + session.py (verbs + registry)
|
|
266
|
+
captcha/ solver ABC + 4 providers + DOM detect/inject
|
|
267
|
+
mcp/ FastMCP server + state + tools/ (17 groups, 78 tools)
|
|
268
|
+
examples/direct_usage.py library proof (no MCP)
|
|
269
|
+
examples/make_demo.py the screen-recording demo above
|
|
270
|
+
docs/TOOLS.md full tool reference
|
|
271
|
+
```
|
|
272
|
+
|
|
273
|
+
Build notes, version-pin rationale, and verified Camoufox behavior live in **[CLAUDE.md](CLAUDE.md)**.
|
|
274
|
+
|
|
275
|
+
## Roadmap
|
|
276
|
+
|
|
277
|
+
- ✅ **Phase 1 — the engine** (this repo): stealth Camoufox engine, 78-tool MCP surface, library API.
|
|
278
|
+
- 🔭 **Phase 2 — the agent**: an LLM agent + durable orchestrator that drives this engine to run web tasks at scale (separate project).
|
|
279
|
+
|
|
280
|
+
## Authorized use
|
|
281
|
+
|
|
282
|
+
EyeBrowse is for **personal automation** and **authorized security research / bug-bounty** work.
|
|
283
|
+
Use it only against properties you own or are explicitly permitted to test.
|
|
284
|
+
|
|
285
|
+
## License
|
|
286
|
+
|
|
287
|
+
[MIT](LICENSE) © Evil-Bane
|