human-browser 4.3.4 โ 4.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +128 -76
- package/SKILL.md +46 -0
- package/package.json +1 -1
- package/scripts/browser-human.js +171 -14
package/README.md
CHANGED
|
@@ -1,9 +1,10 @@
|
|
|
1
1
|
# Human Browser โ Cloud Stealth Browser for AI Agents
|
|
2
2
|
|
|
3
|
-
> **No Mac Mini. No local
|
|
3
|
+
> **No Mac Mini. No local install. Your agent runs it anywhere.**
|
|
4
4
|
> Residential IPs from 10+ countries. Bypasses Cloudflare, DataDome, PerimeterX.
|
|
5
5
|
>
|
|
6
|
-
> ๐ **Product page:** https://humanbrowser.cloud
|
|
6
|
+
> ๐ **Product page:** https://humanbrowser.cloud
|
|
7
|
+
> ๐ค **A2A endpoint:** `https://agent.humanbrowser.cloud/a2a`
|
|
7
8
|
> ๐ฌ **Support:** https://t.me/virixlabs
|
|
8
9
|
|
|
9
10
|
---
|
|
@@ -15,115 +16,166 @@ Regular Playwright on a data-center server gets blocked **immediately** by:
|
|
|
15
16
|
- DataDome (fingerprint analysis)
|
|
16
17
|
- PerimeterX (behavioral analysis)
|
|
17
18
|
- Instagram, LinkedIn, TikTok (residential IP requirement)
|
|
19
|
+
- Google sign-in, Akamai BMP (TLS / WebRTC fingerprint matching)
|
|
18
20
|
|
|
19
|
-
|
|
20
|
-
1. **Residential IP** โ real ISP address from the target country (not a data center)
|
|
21
|
-
2. **Real device fingerprint** โ iPhone 15 Pro or Windows Chrome, complete with canvas, WebGL, fonts
|
|
22
|
-
3. **Human-like behavior** โ Bezier mouse curves, 60โ220ms typing, natural scroll with jitter
|
|
23
|
-
4. **Full anti-detection** โ `webdriver=false`, no automation flags, correct timezone & geolocation
|
|
21
|
+
Local stealth libraries (patchright, undetected-chromedriver, playwright-stealth) close some of these gaps in JS, but leak others โ most notably **WebRTC ICE candidates** that surface the server's real datacenter IP regardless of your proxy.
|
|
24
22
|
|
|
25
|
-
|
|
23
|
+
Human Browser's **cloud build** runs a custom forked Chromium with **C++-source-level fingerprint patches** (TLS ja3/ja4 matched to real Chrome, GPU vendor/renderer spoofing, WebRTC IP replacement, canvas/WebGL/audio noise) that you cannot get from any npm package. Plus residential proxies. Plus a live browser viewer. **Drive it via the Agent2Agent protocol โ no install, no version pinning, no Linux build of Chromium for you to maintain.**
|
|
26
24
|
|
|
27
|
-
|
|
25
|
+
---
|
|
28
26
|
|
|
29
|
-
|
|
27
|
+
## Agent2Agent (A2A) โ recommended path
|
|
30
28
|
|
|
31
|
-
|
|
32
|
-
|
|
29
|
+
**Endpoint:** `https://agent.humanbrowser.cloud/a2a`
|
|
30
|
+
**Auth:** `Authorization: Bearer hb_live_<your-token>`
|
|
31
|
+
**Agent card:** `https://agent.humanbrowser.cloud/.well-known/agent.json`
|
|
33
32
|
|
|
34
|
-
|
|
35
|
-
const { browser, page, humanType, humanClick, humanScroll, sleep } = await launchHuman();
|
|
36
|
-
// Output: ๐ Human Browser trial activated! (~100MB Romania residential IP)
|
|
33
|
+
Works with anything that speaks A2A โ LangGraph, CrewAI, Google ADK, OpenAI Agents SDK, Claude/Anthropic agents, any hand-rolled JSON-RPC client.
|
|
37
34
|
|
|
38
|
-
|
|
39
|
-
const { page } = await launchHuman({ country: 'us' }); // US residential IP
|
|
40
|
-
const { page } = await launchHuman({ country: 'gb' }); // UK residential IP
|
|
35
|
+
### Submit + poll (recommended)
|
|
41
36
|
|
|
42
|
-
|
|
43
|
-
|
|
37
|
+
```bash
|
|
38
|
+
# 1. Submit a goal โ get back task_id and viewerUrl
|
|
39
|
+
curl -sS https://agent.humanbrowser.cloud/a2a \
|
|
40
|
+
-H "Authorization: Bearer hb_live_<your-token>" \
|
|
41
|
+
-H "Content-Type: application/json" \
|
|
42
|
+
-d '{
|
|
43
|
+
"jsonrpc": "2.0",
|
|
44
|
+
"id": 1,
|
|
45
|
+
"method": "message/send",
|
|
46
|
+
"params": {
|
|
47
|
+
"message": {
|
|
48
|
+
"role": "user",
|
|
49
|
+
"metadata": { "profile": "main", "model": "anthropic/claude-sonnet-4-6" },
|
|
50
|
+
"parts": [{ "kind": "text", "text": "Log into example.com and report the dashboard total" }]
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
}'
|
|
54
|
+
# โ { "result": {
|
|
55
|
+
# "id": "t_abc...",
|
|
56
|
+
# "metadata": { "viewerUrl": "https://humanbrowser.cloud/a/s_xyz?k=...", "cost": {...} }
|
|
57
|
+
# } }
|
|
58
|
+
|
|
59
|
+
# 2. Poll task.metadata while running โ every 5โ10 s.
|
|
60
|
+
# metadata is enriched per step: step_count, current_url, last_thinking,
|
|
61
|
+
# last_eval, last_action, cost.
|
|
62
|
+
curl -sS https://agent.humanbrowser.cloud/a2a \
|
|
63
|
+
-H "Authorization: Bearer hb_live_<your-token>" \
|
|
64
|
+
-H "Content-Type: application/json" \
|
|
65
|
+
-d '{"jsonrpc":"2.0","id":2,"method":"tasks/get","params":{"id":"t_abc..."}}'
|
|
66
|
+
|
|
67
|
+
# 3. When task.state โ {completed, failed, canceled} โ done.
|
|
68
|
+
# Final outcome is in task.metadata.outcome:
|
|
69
|
+
# { success, result, step_count, duration_ms, cost, files }
|
|
70
|
+
# Full artifact in task.artifacts[0].
|
|
71
|
+
```
|
|
44
72
|
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
73
|
+
### Or: webhook callback (no polling)
|
|
74
|
+
|
|
75
|
+
Pass `callback_url` in `message.metadata` and we POST the final `Task` envelope to that URL when the task hits a terminal state. Signed with `X-HB-Signature: sha256=<HMAC>` when the server is configured with a webhook secret. Retries 3ร on 5xx / network errors with 2 / 8 / 30 s backoff.
|
|
76
|
+
|
|
77
|
+
```json
|
|
78
|
+
{
|
|
79
|
+
"message": {
|
|
80
|
+
"role": "user",
|
|
81
|
+
"metadata": {
|
|
82
|
+
"profile": "main",
|
|
83
|
+
"callback_url": "https://your-agent.host/hb-callback"
|
|
84
|
+
},
|
|
85
|
+
"parts": [{ "kind": "text", "text": "..." }]
|
|
86
|
+
}
|
|
87
|
+
}
|
|
50
88
|
```
|
|
51
89
|
|
|
52
|
-
|
|
90
|
+
### Or: SSE streaming
|
|
91
|
+
|
|
92
|
+
`method: "message/stream"` returns Server-Sent Events; each step pushes a `status-update` event with the latest `task.metadata`. Use this if your client can hold a long-lived connection.
|
|
93
|
+
|
|
94
|
+
### Live viewer
|
|
95
|
+
|
|
96
|
+
Every spawned session ships a `viewerUrl` in `task.metadata`. Open it to:
|
|
97
|
+
- Watch the browser live
|
|
98
|
+
- See the agent timeline (๐ค you ยท ๐ค caller-agent like dzeny ยท ๐ง hb-agent โ color-coded)
|
|
99
|
+
- Inject manual goals into a running session via the input box
|
|
100
|
+
|
|
101
|
+
### Important: don't fire-and-await
|
|
102
|
+
|
|
103
|
+
`message/send` resolves only on terminal state, and tasks routinely run 5โ30 min. Don't hold an HTTP connection that long โ NAT / load-balancer / proxy timeouts will sever it and your client loses context **while our server keeps running and billing**. Use polling or webhooks instead.
|
|
53
104
|
|
|
54
105
|
---
|
|
55
106
|
|
|
56
|
-
##
|
|
107
|
+
## Sensitive data handling
|
|
57
108
|
|
|
58
|
-
|
|
59
|
-
npm install playwright
|
|
60
|
-
npx playwright install chromium --with-deps
|
|
109
|
+
Pass logins / passwords / API keys via the A2A `DataPart` with `metadata.sensitive=true` โ they're injected as task input and are **stripped from logs, never written to artifacts, never echoed back in streams**.
|
|
61
110
|
|
|
62
|
-
|
|
63
|
-
|
|
111
|
+
```json
|
|
112
|
+
{
|
|
113
|
+
"message": {
|
|
114
|
+
"role": "user",
|
|
115
|
+
"parts": [
|
|
116
|
+
{ "kind": "text", "text": "Log in and download the latest report" },
|
|
117
|
+
{ "kind": "data", "data": { "email": "...", "password": "..." }, "metadata": { "sensitive": true } }
|
|
118
|
+
]
|
|
119
|
+
}
|
|
120
|
+
}
|
|
64
121
|
```
|
|
65
122
|
|
|
66
123
|
---
|
|
67
124
|
|
|
68
|
-
##
|
|
125
|
+
## Profile persistence + per-token defaults
|
|
69
126
|
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
| ๐ฏ๐ต Japan | `jp` | Japanese e-commerce, Line |
|
|
78
|
-
| ๐ซ๐ท France | `fr` | EU services, luxury brands |
|
|
79
|
-
| ๐จ๐ฆ Canada | `ca` | North American services |
|
|
80
|
-
| ๐ธ๐ฌ Singapore | `sg` | APAC/SEA e-commerce |
|
|
81
|
-
| ๐ฆ๐บ Australia | `au` | Oceania content |
|
|
127
|
+
Each session inherits a named **profile** that persists cookies / localStorage / IndexedDB / Service Worker storage between spawns. Pass it in `message.metadata.profile`:
|
|
128
|
+
|
|
129
|
+
```json
|
|
130
|
+
{ "message": { "metadata": { "profile": "polymarket-main" }, "role": "user", "parts": [...] } }
|
|
131
|
+
```
|
|
132
|
+
|
|
133
|
+
If you don't pass one, the server uses the token's `default_profile` (configurable per token via admin API) and falls back to `"default"`. Same profile across reconnects โ same cookies, same fingerprint, same WebRTC IP. Up to 5 concurrent sessions per token by default.
|
|
82
134
|
|
|
83
135
|
---
|
|
84
136
|
|
|
85
|
-
##
|
|
137
|
+
## Library mode (advanced โ for self-hosting)
|
|
86
138
|
|
|
87
|
-
|
|
88
|
-
Buy directly at **humanbrowser.cloud** โ we handle everything, from $13.99/mo.
|
|
89
|
-
Supports crypto (USDT/ETH/BTC/SOL) and card. AI agents can auto-purchase.
|
|
139
|
+
If you want to skip the cloud entirely and drive your own Chromium with a residential proxy you supply yourself, the `human-browser` npm package exposes `launchHuman()` โ a drop-in Playwright launcher with our humanizer helpers, geo-fingerprint plumbing, and built-in 2captcha integration.
|
|
90
140
|
|
|
91
|
-
|
|
141
|
+
**Note:** the library does NOT include our forked Chromium with C++ stealth patches โ that binary is part of the cloud build only. Library mode is patchright-stealth-plus-helpers; expect lower pass-rate on Cloudflare BM / DataDome / Google sign-in / WebRTC-aware bot scoring than the cloud.
|
|
92
142
|
|
|
93
|
-
|
|
94
|
-
|
|
143
|
+
```js
|
|
144
|
+
const { launchHuman } = require('human-browser');
|
|
95
145
|
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
| **[Decodo](https://decodo.com)** (ex-Smartproxy) | โญโญโญโญโญ | ~$2.5/GB | Cloudflare, DataDome, all-round. No KYC. |
|
|
99
|
-
| **[Bright Data](https://get.brightdata.com/4ihj1kk8jt0v)** | โญโญโญโญโญ | ~$8.4/GB | Enterprise-grade, 72M+ IPs, 195 countries |
|
|
100
|
-
| **[IPRoyal](https://iproyal.com)** | โญโญโญโญ | ~$1.75/GB | High volume, budget, ethically sourced |
|
|
101
|
-
| **[NodeMaven](https://nodemaven.com)** | โญโญโญโญ | ~$3.5/GB | High success rate, pay-per-GB, no minimums |
|
|
102
|
-
| **[Oxylabs](https://oxylabs.io)** | โญโญโญโญโญ | ~$8/GB | Business-grade, dedicated support |
|
|
146
|
+
// Zero config โ auto-fetches trial credentials from humanbrowser.cloud
|
|
147
|
+
const { browser, page, humanType, humanClick, humanScroll, sleep } = await launchHuman();
|
|
103
148
|
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
PROXY_PORT=22225
|
|
107
|
-
PROXY_USER=your-username
|
|
108
|
-
PROXY_PASS=your-password
|
|
149
|
+
const { page } = await launchHuman({ country: 'us' }); // US residential IP
|
|
150
|
+
const { page } = await launchHuman({ mobile: false }); // Desktop Chrome fingerprint
|
|
109
151
|
```
|
|
110
152
|
|
|
153
|
+
```bash
|
|
154
|
+
npm install human-browser playwright
|
|
155
|
+
npx playwright install chromium --with-deps
|
|
156
|
+
```
|
|
157
|
+
|
|
158
|
+
For proxy-provider env vars (BYO Decodo / IPRoyal / Bright Data / NodeMaven / Oxylabs), see the [proxy setup notes](./references/brightdata-setup.md) and the env section of `scripts/browser-human.js`.
|
|
159
|
+
|
|
111
160
|
---
|
|
112
161
|
|
|
113
|
-
##
|
|
162
|
+
## Supported countries
|
|
114
163
|
|
|
115
|
-
|
|
|
116
|
-
|
|
117
|
-
|
|
|
118
|
-
|
|
|
119
|
-
|
|
|
120
|
-
|
|
|
121
|
-
|
|
|
122
|
-
|
|
|
123
|
-
|
|
|
124
|
-
|
|
|
164
|
+
| Country | Code | Best for |
|
|
165
|
+
|---------|------|----------|
|
|
166
|
+
| ๐ท๐ด Romania | `ro` | Polymarket, Instagram, Binance, Cloudflare |
|
|
167
|
+
| ๐บ๐ธ United States | `us` | Netflix, DoorDash, US banks, Amazon |
|
|
168
|
+
| ๐ฌ๐ง United Kingdom | `gb` | Polymarket, Binance, BBC iPlayer |
|
|
169
|
+
| ๐ฉ๐ช Germany | `de` | EU services, German e-commerce |
|
|
170
|
+
| ๐ณ๐ฑ Netherlands | `nl` | Crypto, Polymarket, Web3 |
|
|
171
|
+
| ๐ฏ๐ต Japan | `jp` | Japanese e-commerce, Line |
|
|
172
|
+
| ๐ซ๐ท France | `fr` | EU services, luxury brands |
|
|
173
|
+
| ๐จ๐ฆ Canada | `ca` | North American services |
|
|
174
|
+
| ๐ธ๐ฌ Singapore | `sg` | APAC/SEA e-commerce |
|
|
175
|
+
| ๐ฆ๐บ Australia | `au` | Oceania content |
|
|
125
176
|
|
|
126
177
|
---
|
|
127
178
|
|
|
128
|
-
โ **
|
|
179
|
+
โ **Buy a plan + pricing:** https://humanbrowser.cloud
|
|
129
180
|
โ **Support & questions:** https://t.me/virixlabs
|
|
181
|
+
โ **Full spec for hand-rolled A2A clients:** [SKILL.md](./SKILL.md)
|
package/SKILL.md
CHANGED
|
@@ -954,4 +954,50 @@ curl -sX POST https://agent.humanbrowser.cloud/a2a \
|
|
|
954
954
|
|
|
955
955
|
For live streaming, swap `message/send` โ `message/stream` and read the response as `text/event-stream`. Each frame is a JSON-RPC notification carrying a `Task`, `TaskStatusUpdateEvent` or `TaskArtifactUpdateEvent` โ exactly what `runOnCloud()` parses internally.
|
|
956
956
|
|
|
957
|
+
#### Webhook callback (v77+)
|
|
958
|
+
|
|
959
|
+
If you'd rather not poll, pass `callback_url` in `message.metadata` and we POST the final task envelope to that URL when the task hits a terminal state:
|
|
960
|
+
|
|
961
|
+
```bash
|
|
962
|
+
curl -sX POST https://agent.humanbrowser.cloud/a2a \
|
|
963
|
+
-H "Authorization: Bearer $HUMANBROWSER_API_TOKEN" \
|
|
964
|
+
-H "Content-Type: application/json" \
|
|
965
|
+
-d '{
|
|
966
|
+
"jsonrpc":"2.0","id":1,"method":"message/send",
|
|
967
|
+
"params":{"message":{
|
|
968
|
+
"role":"user",
|
|
969
|
+
"metadata":{"callback_url":"https://your-host/hb-callback"},
|
|
970
|
+
"parts":[{"kind":"text","text":"..."}]
|
|
971
|
+
}}
|
|
972
|
+
}'
|
|
973
|
+
```
|
|
974
|
+
|
|
975
|
+
The POST carries the full `Task` JSON (status, history, artifacts, metadata) plus `kind: "task.final"` and a `deliveredAt` timestamp. Headers: `Content-Type: application/json`, `X-HB-Task-Id`, `X-HB-Task-State`, and `X-HB-Signature: sha256=<HMAC>` when the server is configured with `A2A_WEBHOOK_SECRET`. Retries 3ร on 5xx / network error with 2 / 8 / 30 s backoff. HTTPS only; max URL length 1000 chars.
|
|
976
|
+
|
|
977
|
+
#### Per-step metadata (v77+)
|
|
978
|
+
|
|
979
|
+
`task.metadata` is now enriched on every step the agent takes, so polling `tasks/get` gives a rich progress snapshot without parsing `task.history`:
|
|
980
|
+
|
|
981
|
+
| Field | When updated | Example |
|
|
982
|
+
|---|---|---|
|
|
983
|
+
| `step_count` | each step | `12` |
|
|
984
|
+
| `current_url` | each step | `"https://featured.com/experts/questions"` |
|
|
985
|
+
| `last_thinking` | each step | first ~2 KB of the agent's reasoning |
|
|
986
|
+
| `last_next_goal` | each step | the planner's next-step intent |
|
|
987
|
+
| `last_eval` | each step | the agent's own verdict on the last action |
|
|
988
|
+
| `last_action` | each action | `{ "name": "click", "at": "2026-05-11T..." }` |
|
|
989
|
+
| `cost` | each LLM call | `{ tokens_in, tokens_out, usd, model }` |
|
|
990
|
+
| `outcome` | terminal `done` | `{ success, result, step_count, duration_ms, cost, files }` |
|
|
991
|
+
| `viewerUrl` | initial | `https://humanbrowser.cloud/a/s_xyz?k=...` |
|
|
992
|
+
|
|
993
|
+
A polling client thus renders a faithful "what's it doing right now" panel:
|
|
994
|
+
|
|
995
|
+
```
|
|
996
|
+
step 12/50 ยท https://featured.com/experts/questions
|
|
997
|
+
last action: click on "Page 2"
|
|
998
|
+
last eval: Successfully navigated to page 2 of 7
|
|
999
|
+
cost: $0.58
|
|
1000
|
+
viewer: https://humanbrowser.cloud/a/s_xyz?k=...
|
|
1001
|
+
```
|
|
1002
|
+
|
|
957
1003
|
> **Note on multi-turn**: the A2A spec describes an `input-required` state for tasks that need follow-up input. The current cloud build runs every task to terminal in one shot โ multi-turn resumption is reserved in the protocol but not yet wired up server-side. Use `tasks/cancel` and submit a fresh task if you need to redirect.
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "human-browser",
|
|
3
|
-
"version": "4.
|
|
3
|
+
"version": "4.5.0",
|
|
4
4
|
"description": "Stealth browser for AI agents. Bypasses Cloudflare, DataDome, PerimeterX. Residential IPs from 10+ countries. iPhone 15 Pro fingerprint. Drop-in Playwright replacement โ launchHuman() just works.",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"browser-automation",
|
package/scripts/browser-human.js
CHANGED
|
@@ -25,20 +25,60 @@
|
|
|
25
25
|
// โโโ PLAYWRIGHT RESOLVER โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
|
26
26
|
// Works in any context: clawhub install, workspace, Clawster containers
|
|
27
27
|
|
|
28
|
+
// Try patchright first (drop-in Playwright fork with CDP-leak patches:
|
|
29
|
+
// no Runtime.enable, no HeadlessChrome UA, navigator.webdriver removed at
|
|
30
|
+
// source level). Fall back to vanilla playwright if patchright is not
|
|
31
|
+
// installed โ keeps the npm package usable without the optional dep.
|
|
28
32
|
function _requirePlaywright() {
|
|
33
|
+
const fs = require('fs');
|
|
29
34
|
const tries = [
|
|
30
|
-
() => require('
|
|
31
|
-
() => require(`${__dirname}/../node_modules/
|
|
32
|
-
() => require(`${__dirname}/../../node_modules/
|
|
33
|
-
() => require(`${
|
|
34
|
-
() => require('
|
|
35
|
+
['patchright', () => require('patchright')],
|
|
36
|
+
['patchright (server/node_modules)', () => require(`${__dirname}/../node_modules/patchright`)],
|
|
37
|
+
['patchright (workspace)', () => require(`${__dirname}/../../node_modules/patchright`)],
|
|
38
|
+
['patchright (server/prototype/node_modules)', () => require(`${__dirname}/../prototype/node_modules/patchright`)],
|
|
39
|
+
['patchright (/app/prototype)', () => require('/app/prototype/node_modules/patchright')],
|
|
40
|
+
['playwright', () => require('playwright')],
|
|
41
|
+
['playwright (server/node_modules)', () => require(`${__dirname}/../node_modules/playwright`)],
|
|
42
|
+
['playwright (workspace)', () => require(`${__dirname}/../../node_modules/playwright`)],
|
|
43
|
+
['playwright (HOME workspace)', () => require(`${process.env.HOME || '/root'}/.openclaw/workspace/node_modules/playwright`)],
|
|
44
|
+
['playwright (./node_modules)', () => require('./node_modules/playwright')],
|
|
35
45
|
];
|
|
36
|
-
|
|
37
|
-
|
|
46
|
+
// Verify the chromium executable is actually present before accepting a
|
|
47
|
+
// module โ patchright pins to a different chromium revision than playwright,
|
|
48
|
+
// and v48 production was broken because patchright was loadable but its
|
|
49
|
+
// chromium-1217 binary wasn't installed in the image.
|
|
50
|
+
function _executableExists(mod) {
|
|
51
|
+
try {
|
|
52
|
+
const exe = mod.chromium.executablePath();
|
|
53
|
+
return exe && fs.existsSync(exe);
|
|
54
|
+
} catch (_) {
|
|
55
|
+
return false;
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
let lastValid = null;
|
|
59
|
+
let lastValidLabel = null;
|
|
60
|
+
for (const [label, fn] of tries) {
|
|
61
|
+
try {
|
|
62
|
+
const mod = fn();
|
|
63
|
+
if (_executableExists(mod)) {
|
|
64
|
+
try { console.log(`[human-browser] launcher using ${label}`); } catch (_) {}
|
|
65
|
+
return mod;
|
|
66
|
+
}
|
|
67
|
+
// Module loadable but executable missing โ keep looking for a usable backend.
|
|
68
|
+
if (!lastValid) { lastValid = mod; lastValidLabel = label; }
|
|
69
|
+
try { console.warn(`[human-browser] ${label} loaded but chromium binary missing โ trying next`); } catch (_) {}
|
|
70
|
+
} catch (_) {}
|
|
71
|
+
}
|
|
72
|
+
// No backend has its chromium installed. Return the first loadable module
|
|
73
|
+
// anyway; the eventual launch() will throw a clear "Executable doesn't exist"
|
|
74
|
+
// message that surfaces in session-server logs.
|
|
75
|
+
if (lastValid) {
|
|
76
|
+
try { console.warn(`[human-browser] falling back to ${lastValidLabel} (chromium binary missing โ launch will fail)`); } catch (_) {}
|
|
77
|
+
return lastValid;
|
|
38
78
|
}
|
|
39
79
|
throw new Error(
|
|
40
|
-
'[human-browser] playwright
|
|
41
|
-
'Run: npm install playwright && npx playwright install chromium'
|
|
80
|
+
'[human-browser] neither patchright nor playwright found.\n' +
|
|
81
|
+
'Run: npm install patchright playwright && npx playwright install chromium && npx patchright install chromium'
|
|
42
82
|
);
|
|
43
83
|
}
|
|
44
84
|
|
|
@@ -111,9 +151,16 @@ function buildDevice(mobile, country = 'ro') {
|
|
|
111
151
|
|
|
112
152
|
const PROXY_PRESETS = {
|
|
113
153
|
decodo: {
|
|
114
|
-
//
|
|
115
|
-
|
|
116
|
-
|
|
154
|
+
// Canonical Decodo entrypoint: gate.decodo.com:7000 (rotating endpoint)
|
|
155
|
+
// with sticky session + country encoded in username (`user-` prefix +
|
|
156
|
+
// `-country-XX-session-Y-sessionduration-30`). Verified: country IS
|
|
157
|
+
// enforced this way; the port-range form (gate.decodo.com:10001..49999)
|
|
158
|
+
// pins sticky IP but IGNORES country in username โ that's why us-zone was
|
|
159
|
+
// leaking UK Sky Broadband IPs. The country-subdomain form
|
|
160
|
+
// (us.decodo.com:port) is the same legacy soft-routing path.
|
|
161
|
+
serverTemplate: (country, port) => `http://gate.decodo.com:7000`,
|
|
162
|
+
usernameTemplate: (user, country, port) =>
|
|
163
|
+
`user-${user}-country-${country}-session-${port}-sessionduration-30`,
|
|
117
164
|
defaultUser: null,
|
|
118
165
|
defaultPass: null,
|
|
119
166
|
defaultCountry: 'ro',
|
|
@@ -490,9 +537,35 @@ async function launchHuman(opts = {}) {
|
|
|
490
537
|
'--disable-setuid-sandbox',
|
|
491
538
|
'--ignore-certificate-errors',
|
|
492
539
|
'--disable-blink-features=AutomationControlled',
|
|
493
|
-
'
|
|
540
|
+
// QUIC over UDP can't traverse an HTTP CONNECT proxy; without this
|
|
541
|
+
// Chromium spends 30s+ retrying QUIC against google.com before TCP
|
|
542
|
+
// fallback, blowing the bubus NavigateToUrlEvent budget.
|
|
543
|
+
'--disable-quic',
|
|
544
|
+
// Kill startup chatter that races Playwright's CDP-based proxy auth
|
|
545
|
+
// interceptor (Chromium asks for Proxy-Authorization only after a 407;
|
|
546
|
+
// dozens of concurrent startup fetches all hit 407 simultaneously and
|
|
547
|
+
// some land on a tunnel the proxy already dropped โ "duplicate response"
|
|
548
|
+
// CDP warnings + 60s+ NavigateToUrlEvent timeouts).
|
|
549
|
+
'--disable-features=IsolateOrigins,site-per-process,UseDnsHttpsSvcb,UseDnsHttpsSvcbAlpn,AsyncDns,OptimizationHints,OptimizationGuideModelDownloading,OptimizationTargetPrediction,InterestFeedContentSuggestions,Translate,MediaRouter',
|
|
550
|
+
'--disable-background-networking',
|
|
551
|
+
'--disable-component-update',
|
|
552
|
+
'--disable-sync',
|
|
553
|
+
'--disable-domain-reliability',
|
|
554
|
+
'--no-default-browser-check',
|
|
555
|
+
'--no-first-run',
|
|
556
|
+
'--no-pings',
|
|
557
|
+
// Belt-and-braces: tell Chrome to never bypass the proxy locally.
|
|
558
|
+
'--proxy-bypass-list=<-loopback>',
|
|
494
559
|
];
|
|
495
|
-
if (cdpPort)
|
|
560
|
+
if (cdpPort) {
|
|
561
|
+
launchArgs.push(`--remote-debugging-port=${cdpPort}`);
|
|
562
|
+
// Chrome 111+ already blocks page-origin DevTools WS by default, but make
|
|
563
|
+
// the policy explicit and future-proof: only allow connections from
|
|
564
|
+
// server-side WS clients (no Origin header) โ anything claiming a real
|
|
565
|
+
// page origin is rejected. Setting an unreachable HTTPS sentinel keeps
|
|
566
|
+
// the spec-required allowlist non-empty without granting any real origin.
|
|
567
|
+
launchArgs.push('--remote-allow-origins=https://disabled.invalid');
|
|
568
|
+
}
|
|
496
569
|
|
|
497
570
|
const effectiveHeadless = headed ? false : headless;
|
|
498
571
|
|
|
@@ -550,6 +623,84 @@ async function launchHuman(opts = {}) {
|
|
|
550
623
|
}
|
|
551
624
|
}, { mobile, locale: meta.locale });
|
|
552
625
|
|
|
626
|
+
// Bot-friendly URL rewrites: top-level navigations only, applied via
|
|
627
|
+
// ctx.route(). Reddit's www/new front-end is heavily Cloudflare-bot-blocked
|
|
628
|
+
// for fresh residential IPs; old.reddit.com is on the same auth/cookie space
|
|
629
|
+
// but ships a much lighter (and far less protected) HTML page. Net effect:
|
|
630
|
+
// higher pass-through rate without changing any user-visible profile state.
|
|
631
|
+
await ctx.route(/^https?:\/\/(www\.|new\.|m\.)?reddit\.com\//i, (route) => {
|
|
632
|
+
try {
|
|
633
|
+
const orig = route.request().url();
|
|
634
|
+
// Only rewrite navigation/document loads โ leave XHR/static asset paths
|
|
635
|
+
// alone so login flows and OAuth don't break.
|
|
636
|
+
if (route.request().resourceType() !== 'document') return route.continue();
|
|
637
|
+
const rewritten = orig.replace(
|
|
638
|
+
/^(https?:\/\/)(?:www\.|new\.|m\.)?reddit\.com\//i,
|
|
639
|
+
'$1old.reddit.com/'
|
|
640
|
+
);
|
|
641
|
+
if (rewritten === orig) return route.continue();
|
|
642
|
+
console.warn(`[human-browser] rewrite reddit ${orig} -> ${rewritten}`);
|
|
643
|
+
return route.continue({ url: rewritten });
|
|
644
|
+
} catch (_) { try { route.continue(); } catch (_) {} }
|
|
645
|
+
});
|
|
646
|
+
|
|
647
|
+
// Anti-bot response logger. Catches main-frame responses with status that
|
|
648
|
+
// indicates a block/challenge (403/429/451/503) or known CF/Akamai/PerimeterX
|
|
649
|
+
// signatures, and emits a single-line log so the operator can see at a glance
|
|
650
|
+
// when sites are pushing back. Body sniffing is best-effort and bounded
|
|
651
|
+
// (first 4KB) to avoid memory issues on huge pages.
|
|
652
|
+
ctx.on('response', async (resp) => {
|
|
653
|
+
try {
|
|
654
|
+
const req = resp.request();
|
|
655
|
+
// Only main document loads โ not images/fonts/scripts.
|
|
656
|
+
if (req.resourceType() !== 'document') return;
|
|
657
|
+
const url = req.url();
|
|
658
|
+
const status = resp.status();
|
|
659
|
+
let host = '';
|
|
660
|
+
try { host = new URL(url).host; } catch (_) {}
|
|
661
|
+
|
|
662
|
+
// Status-based ban signals.
|
|
663
|
+
const banStatus = status === 403 || status === 429 || status === 451 ||
|
|
664
|
+
(status === 503 && host !== '127.0.0.1');
|
|
665
|
+
|
|
666
|
+
// Header signatures (Cloudflare's challenge / hCaptcha / Akamai BMP).
|
|
667
|
+
let banReason = null;
|
|
668
|
+
const hdrs = resp.headers();
|
|
669
|
+
const cfChlg = hdrs['cf-mitigated'] || hdrs['cf-chl-bypass'] || '';
|
|
670
|
+
const server = (hdrs['server'] || '').toLowerCase();
|
|
671
|
+
const xrh = (hdrs['x-robots-tag'] || '').toLowerCase();
|
|
672
|
+
if (cfChlg) banReason = `cf-mitigated:${cfChlg}`;
|
|
673
|
+
else if (server.includes('akamaighost') && status >= 400) banReason = 'akamai-block';
|
|
674
|
+
else if (banStatus) banReason = `status-${status}`;
|
|
675
|
+
|
|
676
|
+
if (!banReason) return;
|
|
677
|
+
|
|
678
|
+
// Best-effort body sniff for inline reason. Kept tiny.
|
|
679
|
+
let bodyHint = '';
|
|
680
|
+
try {
|
|
681
|
+
const buf = await resp.body();
|
|
682
|
+
const txt = buf.slice(0, 4096).toString('utf8').replace(/\s+/g, ' ');
|
|
683
|
+
// Pull a few telltale phrases.
|
|
684
|
+
const matchers = [
|
|
685
|
+
/just a moment/i, /attention required/i, /access denied/i,
|
|
686
|
+
/blocked.{0,40}network security/i, /verifying you are human/i,
|
|
687
|
+
/your request has been blocked/i, /unusual traffic/i,
|
|
688
|
+
/captcha/i, /perimeterx/i, /datadome/i, /forbidden/i,
|
|
689
|
+
];
|
|
690
|
+
for (const re of matchers) {
|
|
691
|
+
const m = txt.match(re);
|
|
692
|
+
if (m) { bodyHint = m[0].slice(0, 80); break; }
|
|
693
|
+
}
|
|
694
|
+
} catch (_) {}
|
|
695
|
+
|
|
696
|
+
console.warn(
|
|
697
|
+
`[human-browser] BLOCKED host=${host} status=${status} reason=${banReason}` +
|
|
698
|
+
(bodyHint ? ` hint="${bodyHint}"` : '') +
|
|
699
|
+
` url=${url.slice(0, 200)}`
|
|
700
|
+
);
|
|
701
|
+
} catch (_) { /* listener must never throw */ }
|
|
702
|
+
});
|
|
703
|
+
|
|
553
704
|
// Persistent context launches with a default page; reuse it instead of
|
|
554
705
|
// opening a second tab (ephemeral context starts with no pages).
|
|
555
706
|
const existing = ctx.pages();
|
|
@@ -573,6 +724,12 @@ async function launchHuman(opts = {}) {
|
|
|
573
724
|
return {
|
|
574
725
|
browser, ctx, page,
|
|
575
726
|
cdpHttpUrl, cdpWsUrl,
|
|
727
|
+
proxy, // resolved {server, username, password} or null โ needed so callers
|
|
728
|
+
// (session-server.js โ browser-use-runner.py) can hand the same creds
|
|
729
|
+
// to browser-use's root-CDP proxy auth handler. Without this, browser-use
|
|
730
|
+
// creates its own targets via raw CDP and Chromium returns 407 because
|
|
731
|
+
// patchright's Fetch.authRequired interceptor is bound per-CRPage, not
|
|
732
|
+
// browser-wide.
|
|
576
733
|
humanClick, humanMouseMove, humanType, humanScroll, humanRead, sleep, rand,
|
|
577
734
|
};
|
|
578
735
|
}
|