barebrowse 0.5.8 → 0.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +57 -0
- package/README.md +3 -3
- package/barebrowse.context.md +10 -12
- package/mcp-server.js +43 -67
- package/package.json +1 -1
- package/src/chromium.js +5 -4
- package/src/index.js +51 -33
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,62 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## 0.6.1
|
|
4
|
+
|
|
5
|
+
Headed fallback is now a per-navigation escape hatch, not a permanent mode switch. Graceful degradation when headed is unavailable.
|
|
6
|
+
|
|
7
|
+
### Switch-back to headless (`src/index.js`)
|
|
8
|
+
- `connect().goto()` in hybrid mode: if currently headed from a previous fallback, kills the headed browser and launches fresh headless before navigating
|
|
9
|
+
- New `currentlyHeaded` runtime state variable tracks actual browser mode (vs `mode` which is user config)
|
|
10
|
+
- `createPage()` stealth decision uses runtime mode (`!currentlyHeaded`) instead of config mode (`mode !== 'headed'`)
|
|
11
|
+
- `createTab()` also uses `currentlyHeaded` for correct stealth application
|
|
12
|
+
|
|
13
|
+
### Graceful degradation (`src/index.js`)
|
|
14
|
+
- `connect().goto()` hybrid fallback wrapped in try/catch — if `launch({ headed: true })` fails (no `$DISPLAY`, no Wayland, CI/Docker), keeps the headless result with `botBlocked: true` and `[BOT CHALLENGE DETECTED]` warning
|
|
15
|
+
- `browse()` hybrid fallback also wrapped in try/catch — same graceful degradation for one-shot browsing
|
|
16
|
+
- No crash on headless-only environments
|
|
17
|
+
|
|
18
|
+
### Flow after changes
|
|
19
|
+
```
|
|
20
|
+
goto(url) in hybrid mode:
|
|
21
|
+
1. If currently headed → kill headed, launch headless, reset currentlyHeaded
|
|
22
|
+
2. Navigate to url
|
|
23
|
+
3. Check bot-blocked
|
|
24
|
+
4. If bot-blocked → TRY launch headed (set currentlyHeaded=true)
|
|
25
|
+
→ CATCH: headed unavailable, keep headless result
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
### Docs
|
|
29
|
+
- Updated hybrid mode descriptions in barebrowse.context.md, system-state.md, prd.md
|
|
30
|
+
|
|
31
|
+
### Tests
|
|
32
|
+
- All existing tests pass (tests use headless mode, unaffected by hybrid logic)
|
|
33
|
+
|
|
34
|
+
## 0.6.0
|
|
35
|
+
|
|
36
|
+
Self-launching headed fallback. Headed and hybrid modes no longer require a manually-launched browser on port 9222 — barebrowse auto-launches a visible Chromium window via `launch({ headed: true })`.
|
|
37
|
+
|
|
38
|
+
### Headed mode auto-launch (`src/chromium.js`)
|
|
39
|
+
- `launch()` accepts `headed` option — skips `--headless=new` and `--hide-scrollbars` flags
|
|
40
|
+
- Same temp profile, same random port, same CDP parsing, same process return
|
|
41
|
+
|
|
42
|
+
### Hybrid fallback fix (`src/index.js`)
|
|
43
|
+
- All 4 `getDebugUrl(port)` call sites replaced with `launch({ headed: true, proxy })` + `createCDP(browser.wsUrl)`
|
|
44
|
+
- `browse()` headed branch, `browse()` hybrid fallback, `connect()` headed branch, `connect().goto()` hybrid fallback
|
|
45
|
+
- `getDebugUrl` import removed from index.js (still exported from chromium.js for external use)
|
|
46
|
+
- Hybrid mode now actually works — previously it tried to connect to port 9222 which nobody ran
|
|
47
|
+
|
|
48
|
+
### Assess handler simplified (`mcp-server.js`)
|
|
49
|
+
- Removed dual-path `runAssess(headed)` function (~60 lines of broken headed fallback)
|
|
50
|
+
- Assess now uses the session's hybrid mode: if tab is bot-blocked, triggers headed fallback via main page `goto()`, then retries in a new tab
|
|
51
|
+
- One flow, no separate `connect({ mode: 'headed' })` call
|
|
52
|
+
|
|
53
|
+
### Docs
|
|
54
|
+
- Removed all "launch browser with --remote-debugging-port=9222" instructions
|
|
55
|
+
- Updated headed/hybrid mode descriptions across barebrowse.context.md, README.md, system-state.md, prd.md
|
|
56
|
+
|
|
57
|
+
### Tests
|
|
58
|
+
- 71/71 passing — no test changes needed (all tests use headless mode)
|
|
59
|
+
|
|
3
60
|
## 0.5.8
|
|
4
61
|
|
|
5
62
|
Bot challenge detection for all browsing, not just assess.
|
package/README.md
CHANGED
|
@@ -100,12 +100,12 @@ For code examples, API reference, and wiring instructions, see **[barebrowse.con
|
|
|
100
100
|
| Mode | What happens | Best for |
|
|
101
101
|
|------|-------------|----------|
|
|
102
102
|
| **Headless** (default) | Launches a fresh Chromium, no UI | Fast automation, scraping, reading pages |
|
|
103
|
-
| **Headed** |
|
|
104
|
-
| **Hybrid** | Tries headless first,
|
|
103
|
+
| **Headed** | Auto-launches a visible Chromium window | Bot-detected sites, visual debugging, CAPTCHAs |
|
|
104
|
+
| **Hybrid** | Tries headless first, auto-launches headed if blocked | General-purpose agent browsing |
|
|
105
105
|
|
|
106
106
|
## What it handles automatically
|
|
107
107
|
|
|
108
|
-
Cookie consent walls (29 languages, with real mouse click fallback for stubborn CMPs), login walls (cookie extraction from your browsers), bot detection (stealth patches + automatic headed fallback
|
|
108
|
+
Cookie consent walls (29 languages, with real mouse click fallback for stubborn CMPs), login walls (cookie extraction from your browsers), bot detection (ARIA node count heuristic + stealth patches + automatic headed fallback — snapshot shows `[BOT CHALLENGE DETECTED]` warning when blocked), permission prompts, SPA navigation, JS dialogs, off-screen elements, pre-filled inputs, ARIA noise, and profile locking. The agent doesn't think about any of it.
|
|
109
109
|
|
|
110
110
|
## What the agent sees
|
|
111
111
|
|
package/barebrowse.context.md
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
# barebrowse -- Integration Guide
|
|
2
2
|
|
|
3
3
|
> For AI assistants and developers wiring barebrowse into a project.
|
|
4
|
-
> v0.
|
|
4
|
+
> v0.6.1 | Node.js >= 22 | 0 required deps | MIT
|
|
5
5
|
|
|
6
6
|
## What this is
|
|
7
7
|
|
|
@@ -23,10 +23,8 @@ Three integration paths:
|
|
|
23
23
|
| Mode | What it does | When to use |
|
|
24
24
|
|---|---|---|
|
|
25
25
|
| `headless` (default) | Launches a fresh Chromium, no UI | Scraping, reading, fast automation |
|
|
26
|
-
| `headed` |
|
|
27
|
-
| `hybrid` | Tries headless first,
|
|
28
|
-
|
|
29
|
-
Headed mode requires the browser to be launched with `--remote-debugging-port=9222`.
|
|
26
|
+
| `headed` | Auto-launches a visible Chromium window | Bot-detected sites, debugging, visual tasks |
|
|
27
|
+
| `hybrid` | Tries headless first, headed fallback per-navigation (switches back to headless next time) | General-purpose agent browsing |
|
|
30
28
|
|
|
31
29
|
## Minimal usage: one-shot browse
|
|
32
30
|
|
|
@@ -45,13 +43,12 @@ const snapshot = await browse('https://example.com', {
|
|
|
45
43
|
pruneMode: 'act', // 'act' (interactive elements) | 'read' (all content)
|
|
46
44
|
consent: true, // auto-dismiss cookie consent dialogs
|
|
47
45
|
timeout: 30000, // navigation timeout in ms
|
|
48
|
-
port: 9222, // CDP port for headed/hybrid mode
|
|
49
46
|
});
|
|
50
47
|
```
|
|
51
48
|
|
|
52
49
|
## connect() API
|
|
53
50
|
|
|
54
|
-
`connect(opts)` returns a page handle for interactive sessions. Same opts as `browse()` for mode
|
|
51
|
+
`connect(opts)` returns a page handle for interactive sessions. Same opts as `browse()` for mode. Supports `hybrid` mode — starts headless, auto-launches headed on bot detection (same as `browse()`).
|
|
55
52
|
|
|
56
53
|
| Method | Args | Returns | Notes |
|
|
57
54
|
|---|---|---|---|
|
|
@@ -76,9 +73,10 @@ const snapshot = await browse('https://example.com', {
|
|
|
76
73
|
| `waitForNetworkIdle(opts?)` | { timeout?: number, idle?: number } | void | Wait until no pending requests for `idle` ms (default 500) |
|
|
77
74
|
| `saveState(filePath)` | filePath: string | void | Export cookies + localStorage to JSON file |
|
|
78
75
|
| `injectCookies(url, opts?)` | url: string, { browser?: string } | void | Extract cookies from user's browser and inject via CDP |
|
|
76
|
+
| `botBlocked` | -- | boolean | True if last `goto()` hit a bot challenge (ARIA node count <50). Resets on each navigation. |
|
|
79
77
|
| `dialogLog` | -- | Array<{type, message, timestamp}> | Auto-dismissed JS dialog history |
|
|
80
78
|
| `cdp` | -- | object | Raw CDP session for escape hatch: `page.cdp.send(method, params)` |
|
|
81
|
-
| `createTab()` | -- | tab handle | New tab in same browser. Returns `{ goto, injectCookies, waitForNetworkIdle, cdp, close }`. Tab close doesn't affect session. |
|
|
79
|
+
| `createTab()` | -- | tab handle | New tab in same browser. Returns `{ goto, botBlocked, injectCookies, waitForNetworkIdle, cdp, close }`. Tab close doesn't affect session. |
|
|
82
80
|
| `close()` | -- | void | Close page, disconnect CDP, kill browser (if headless) |
|
|
83
81
|
|
|
84
82
|
**connect() options** (in addition to mode/port/consent):
|
|
@@ -154,7 +152,7 @@ barebrowse can inject cookies from the user's real browser sessions, bypassing l
|
|
|
154
152
|
| Off-screen elements | `DOM.scrollIntoViewIfNeeded` before every click | Both |
|
|
155
153
|
| Form submission | `press('Enter')` triggers onsubmit | Both |
|
|
156
154
|
| SPA navigation | `waitForNavigation()` uses loadEventFired + frameNavigated | Both |
|
|
157
|
-
| Bot detection |
|
|
155
|
+
| Bot detection | ARIA node count (<50 = bot-blocked) + text heuristics. `botBlocked` flag set after every `goto()`. Hybrid fallback switches to headed. Snapshot shows `[BOT CHALLENGE DETECTED]` warning. | Hybrid |
|
|
158
156
|
| `navigator.webdriver` | Stealth patches in headless (webdriver, plugins, chrome obj) | Headless |
|
|
159
157
|
| Profile locking | Unique temp dir per headless instance | Headless |
|
|
160
158
|
| Shared memory crash (Linux) | `--disable-dev-shm-usage` flag prevents `/dev/shm` exhaustion | Headless |
|
|
@@ -311,13 +309,13 @@ Useful for agent threshold decisions: "skip sites above score 40", "warn if term
|
|
|
311
309
|
|
|
312
310
|
3. **Pruning modes matter.** `act` mode (default) keeps interactive elements + visible labels. `read` mode keeps all text content. Use `read` for content extraction, `act` for form filling and navigation.
|
|
313
311
|
|
|
314
|
-
4. **Headed mode
|
|
312
|
+
4. **Headed mode auto-launches Chromium.** No need to start a browser manually — barebrowse launches a headed Chromium instance with CDP enabled automatically.
|
|
315
313
|
|
|
316
314
|
5. **Cookie extraction needs unlocked profile.** Chromium cookies are AES-encrypted with a keyring key. If Chromium is running, the profile may be locked. Firefox cookies are plaintext and always accessible.
|
|
317
315
|
|
|
318
|
-
6. **Hybrid mode
|
|
316
|
+
6. **Hybrid mode is per-navigation.** If headless is bot-blocked, hybrid kills headless and launches headed for that URL. On the next `goto()`, it switches back to headless automatically. If headed can't launch (no display — CI, Docker), it degrades gracefully with the headless result and a `[BOT CHALLENGE DETECTED]` warning.
|
|
319
317
|
|
|
320
|
-
7. **One page per connect().** Each `connect()` call creates one page.
|
|
318
|
+
7. **One page per connect(), but tabs are supported.** Each `connect()` call creates one page. Use `createTab()` for additional tabs in the same browser.
|
|
321
319
|
|
|
322
320
|
8. **Consent dismiss is best-effort.** It handles 16+ tested sites across 29 languages but novel consent implementations may need manual handling. Disable with `{ consent: false }`.
|
|
323
321
|
|
package/mcp-server.js
CHANGED
|
@@ -54,26 +54,18 @@ async function getPage() {
|
|
|
54
54
|
}
|
|
55
55
|
}
|
|
56
56
|
|
|
57
|
-
// Concurrency limiter
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
57
|
+
// Concurrency limiter — one assess at a time.
|
|
58
|
+
// Headless tabs are fast, but headed fallback uses the user's single browser.
|
|
59
|
+
// Running multiple headed navigations simultaneously hangs the browser.
|
|
60
|
+
let _assessLock = Promise.resolve();
|
|
61
61
|
|
|
62
62
|
function acquireAssessSlot() {
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
return new Promise((resolve) => _assessQueue.push(resolve));
|
|
63
|
+
let release;
|
|
64
|
+
const prev = _assessLock;
|
|
65
|
+
_assessLock = new Promise((r) => { release = r; });
|
|
66
|
+
return prev.then(() => release);
|
|
68
67
|
}
|
|
69
68
|
|
|
70
|
-
function releaseAssessSlot() {
|
|
71
|
-
if (_assessQueue.length > 0) {
|
|
72
|
-
_assessQueue.shift()();
|
|
73
|
-
} else {
|
|
74
|
-
_assessRunning--;
|
|
75
|
-
}
|
|
76
|
-
}
|
|
77
69
|
|
|
78
70
|
const TOOLS = [
|
|
79
71
|
{
|
|
@@ -292,66 +284,50 @@ async function handleToolCall(name, args) {
|
|
|
292
284
|
}
|
|
293
285
|
case 'assess': {
|
|
294
286
|
if (!assessFn) throw new Error('wearehere is not installed. Run: npm install wearehere');
|
|
295
|
-
await acquireAssessSlot();
|
|
287
|
+
const releaseSlot = await acquireAssessSlot();
|
|
296
288
|
try {
|
|
297
|
-
const
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
tab = await connect({ mode: 'headed' });
|
|
301
|
-
} else {
|
|
302
|
-
const page = await getPage();
|
|
303
|
-
tab = await page.createTab();
|
|
304
|
-
}
|
|
305
|
-
let timer;
|
|
306
|
-
try {
|
|
307
|
-
const result = await Promise.race([
|
|
308
|
-
(async () => {
|
|
309
|
-
await tab.injectCookies(args.url).catch(() => {});
|
|
310
|
-
return await assessFn(tab, args.url, { timeout: args.timeout, settle: args.settle });
|
|
311
|
-
})(),
|
|
312
|
-
new Promise((_, reject) => {
|
|
313
|
-
timer = setTimeout(() => {
|
|
314
|
-
tab.close().catch(() => {});
|
|
315
|
-
reject(new Error('assess timeout'));
|
|
316
|
-
}, 30000);
|
|
317
|
-
}),
|
|
318
|
-
]);
|
|
319
|
-
clearTimeout(timer);
|
|
320
|
-
const wasBotBlocked = tab.botBlocked;
|
|
321
|
-
await tab.close().catch(() => {});
|
|
322
|
-
return { result, botBlocked: wasBotBlocked };
|
|
323
|
-
} catch (err) {
|
|
324
|
-
clearTimeout(timer);
|
|
325
|
-
await tab.close().catch(() => {});
|
|
326
|
-
throw err;
|
|
327
|
-
}
|
|
328
|
-
};
|
|
329
|
-
|
|
330
|
-
// Try headless first
|
|
289
|
+
const page = await getPage();
|
|
290
|
+
const tab = await page.createTab();
|
|
291
|
+
let timer;
|
|
331
292
|
try {
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
293
|
+
await tab.injectCookies(args.url).catch(() => {});
|
|
294
|
+
const result = await Promise.race([
|
|
295
|
+
assessFn(tab, args.url, { timeout: args.timeout, settle: args.settle }),
|
|
296
|
+
new Promise((_, rej) => { timer = setTimeout(() => rej(new Error('assess timeout')), 30000); }),
|
|
297
|
+
]);
|
|
298
|
+
clearTimeout(timer);
|
|
299
|
+
if (tab.botBlocked) {
|
|
300
|
+
// Bot-blocked — trigger hybrid fallback via main page, retry in new tab
|
|
301
|
+
await tab.close().catch(() => {});
|
|
302
|
+
await page.goto(args.url);
|
|
303
|
+
const tab2 = await page.createTab();
|
|
304
|
+
let timer2;
|
|
335
305
|
try {
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
306
|
+
await tab2.injectCookies(args.url).catch(() => {});
|
|
307
|
+
const r2 = await Promise.race([
|
|
308
|
+
assessFn(tab2, args.url, { timeout: args.timeout, settle: args.settle }),
|
|
309
|
+
new Promise((_, rej) => { timer2 = setTimeout(() => rej(new Error('assess timeout')), 30000); }),
|
|
310
|
+
]);
|
|
311
|
+
clearTimeout(timer2);
|
|
312
|
+
if (tab2.botBlocked) r2._warning = 'Bot-blocked in both modes. Score may be unreliable.';
|
|
313
|
+
await tab2.close().catch(() => {});
|
|
314
|
+
return JSON.stringify(r2, null, 2);
|
|
315
|
+
} catch (err2) {
|
|
316
|
+
clearTimeout(timer2);
|
|
317
|
+
await tab2.close().catch(() => {});
|
|
318
|
+
throw err2;
|
|
340
319
|
}
|
|
341
320
|
}
|
|
321
|
+
await tab.close().catch(() => {});
|
|
342
322
|
return JSON.stringify(result, null, 2);
|
|
343
323
|
} catch (err) {
|
|
324
|
+
clearTimeout(timer);
|
|
325
|
+
await tab.close().catch(() => {});
|
|
344
326
|
if (isCdpDead(err)) _page = null;
|
|
345
|
-
|
|
346
|
-
try {
|
|
347
|
-
const headed = await runAssess(true);
|
|
348
|
-
return JSON.stringify(headed.result, null, 2);
|
|
349
|
-
} catch (retryErr) {
|
|
350
|
-
throw retryErr;
|
|
351
|
-
}
|
|
327
|
+
throw err;
|
|
352
328
|
}
|
|
353
329
|
} finally {
|
|
354
|
-
|
|
330
|
+
releaseSlot();
|
|
355
331
|
}
|
|
356
332
|
}
|
|
357
333
|
default:
|
|
@@ -374,7 +350,7 @@ async function handleMessage(msg) {
|
|
|
374
350
|
return jsonrpcResponse(id, {
|
|
375
351
|
protocolVersion: '2024-11-05',
|
|
376
352
|
capabilities: { tools: {} },
|
|
377
|
-
serverInfo: { name: 'barebrowse', version: '0.
|
|
353
|
+
serverInfo: { name: 'barebrowse', version: '0.6.0' },
|
|
378
354
|
});
|
|
379
355
|
}
|
|
380
356
|
|
package/package.json
CHANGED
package/src/chromium.js
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
* chromium.js — Find, launch, and connect to Chromium-based browsers.
|
|
3
3
|
*
|
|
4
4
|
* Supports: Chrome, Chromium, Brave, Edge, Vivaldi, Arc, Opera.
|
|
5
|
-
* Modes: headless (launch new), headed (
|
|
5
|
+
* Modes: headless (launch new, no UI), headed (launch new, visible window).
|
|
6
6
|
*/
|
|
7
7
|
|
|
8
8
|
import { execSync, spawn } from 'node:child_process';
|
|
@@ -55,11 +55,12 @@ export function findBrowser() {
|
|
|
55
55
|
}
|
|
56
56
|
|
|
57
57
|
/**
|
|
58
|
-
* Launch a
|
|
58
|
+
* Launch a Chromium instance with CDP enabled.
|
|
59
59
|
* @param {object} [opts]
|
|
60
60
|
* @param {string} [opts.binary] - Path to browser binary (auto-detected if omitted)
|
|
61
61
|
* @param {number} [opts.port=0] - CDP port (0 = random available port)
|
|
62
62
|
* @param {string} [opts.userDataDir] - Browser profile directory
|
|
63
|
+
* @param {boolean} [opts.headed=false] - Launch in headed mode (with visible window)
|
|
63
64
|
* @returns {Promise<{wsUrl: string, process: ChildProcess, port: number}>}
|
|
64
65
|
*/
|
|
65
66
|
export async function launch(opts = {}) {
|
|
@@ -67,7 +68,6 @@ export async function launch(opts = {}) {
|
|
|
67
68
|
const port = opts.port || 0;
|
|
68
69
|
|
|
69
70
|
const args = [
|
|
70
|
-
'--headless=new',
|
|
71
71
|
`--remote-debugging-port=${port}`,
|
|
72
72
|
'--no-first-run',
|
|
73
73
|
'--no-default-browser-check',
|
|
@@ -75,7 +75,8 @@ export async function launch(opts = {}) {
|
|
|
75
75
|
'--disable-sync',
|
|
76
76
|
'--disable-translate',
|
|
77
77
|
'--mute-audio',
|
|
78
|
-
|
|
78
|
+
// Headless-only flags
|
|
79
|
+
...(!opts.headed ? ['--headless=new', '--hide-scrollbars'] : []),
|
|
79
80
|
// Suppress permission prompts (location, notifications, camera, mic, etc.)
|
|
80
81
|
'--disable-notifications',
|
|
81
82
|
'--autoplay-policy=no-user-gesture-required',
|
package/src/index.js
CHANGED
|
@@ -8,7 +8,7 @@
|
|
|
8
8
|
* const snapshot = await browse('https://example.com');
|
|
9
9
|
*/
|
|
10
10
|
|
|
11
|
-
import { launch
|
|
11
|
+
import { launch } from './chromium.js';
|
|
12
12
|
import { createCDP } from './cdp.js';
|
|
13
13
|
import { formatTree } from './aria.js';
|
|
14
14
|
import { authenticate } from './auth.js';
|
|
@@ -27,7 +27,6 @@ import { applyStealth } from './stealth.js';
|
|
|
27
27
|
* @param {boolean} [opts.cookies=true] - Inject user's cookies (Phase 2)
|
|
28
28
|
* @param {boolean} [opts.prune=true] - Apply ARIA pruning (Phase 2)
|
|
29
29
|
* @param {number} [opts.timeout=30000] - Navigation timeout in ms
|
|
30
|
-
* @param {number} [opts.port] - CDP port for headed mode
|
|
31
30
|
* @returns {Promise<string>} ARIA snapshot text
|
|
32
31
|
*/
|
|
33
32
|
export async function browse(url, opts = {}) {
|
|
@@ -40,9 +39,8 @@ export async function browse(url, opts = {}) {
|
|
|
40
39
|
try {
|
|
41
40
|
// Step 1: Get a CDP connection
|
|
42
41
|
if (mode === 'headed') {
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
cdp = await createCDP(wsUrl);
|
|
42
|
+
browser = await launch({ headed: true, proxy: opts.proxy });
|
|
43
|
+
cdp = await createCDP(browser.wsUrl);
|
|
46
44
|
} else {
|
|
47
45
|
// headless or hybrid (start headless)
|
|
48
46
|
browser = await launch({ proxy: opts.proxy });
|
|
@@ -81,17 +79,20 @@ export async function browse(url, opts = {}) {
|
|
|
81
79
|
cdp.close();
|
|
82
80
|
if (browser) { browser.process.kill(); browser = null; }
|
|
83
81
|
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
82
|
+
try {
|
|
83
|
+
browser = await launch({ headed: true, proxy: opts.proxy });
|
|
84
|
+
cdp = await createCDP(browser.wsUrl);
|
|
85
|
+
page = await createPage(cdp, false, { viewport: opts.viewport });
|
|
86
|
+
await suppressPermissions(cdp);
|
|
87
|
+
if (opts.cookies !== false) {
|
|
88
|
+
try { await authenticate(page.session, url, { browser: opts.browser }); } catch {}
|
|
89
|
+
}
|
|
90
|
+
await navigate(page, url, timeout);
|
|
91
|
+
if (opts.consent !== false) await dismissConsent(page.session);
|
|
92
|
+
({ tree } = await ariaTree(page));
|
|
93
|
+
} catch {
|
|
94
|
+
// Headed launch failed (no display?) — return headless result as-is
|
|
91
95
|
}
|
|
92
|
-
await navigate(page, url, timeout);
|
|
93
|
-
if (opts.consent !== false) await dismissConsent(page.session);
|
|
94
|
-
({ tree } = await ariaTree(page));
|
|
95
96
|
}
|
|
96
97
|
|
|
97
98
|
// Step 6: Prune for agent consumption
|
|
@@ -121,7 +122,6 @@ export async function browse(url, opts = {}) {
|
|
|
121
122
|
*
|
|
122
123
|
* @param {object} [opts]
|
|
123
124
|
* @param {'headless'|'headed'|'hybrid'} [opts.mode='headless'] - Browser mode
|
|
124
|
-
* @param {number} [opts.port=9222] - CDP port for headed mode
|
|
125
125
|
* @returns {Promise<object>} Page handle with goto, snapshot, close
|
|
126
126
|
*/
|
|
127
127
|
export async function connect(opts = {}) {
|
|
@@ -130,15 +130,15 @@ export async function connect(opts = {}) {
|
|
|
130
130
|
let cdp;
|
|
131
131
|
|
|
132
132
|
if (mode === 'headed') {
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
cdp = await createCDP(wsUrl);
|
|
133
|
+
browser = await launch({ headed: true, proxy: opts.proxy });
|
|
134
|
+
cdp = await createCDP(browser.wsUrl);
|
|
136
135
|
} else {
|
|
137
136
|
browser = await launch({ proxy: opts.proxy });
|
|
138
137
|
cdp = await createCDP(browser.wsUrl);
|
|
139
138
|
}
|
|
140
139
|
|
|
141
|
-
let
|
|
140
|
+
let currentlyHeaded = (mode === 'headed');
|
|
141
|
+
let page = await createPage(cdp, !currentlyHeaded, { viewport: opts.viewport });
|
|
142
142
|
let refMap = new Map();
|
|
143
143
|
let botBlocked = false;
|
|
144
144
|
|
|
@@ -175,6 +175,20 @@ export async function connect(opts = {}) {
|
|
|
175
175
|
|
|
176
176
|
return {
|
|
177
177
|
async goto(url, timeout = 30000) {
|
|
178
|
+
// Switch back to headless if we fell back to headed previously
|
|
179
|
+
if (currentlyHeaded && mode === 'hybrid') {
|
|
180
|
+
await cdp.send('Target.closeTarget', { targetId: page.targetId });
|
|
181
|
+
cdp.close();
|
|
182
|
+
if (browser) { browser.process.kill(); browser = null; }
|
|
183
|
+
|
|
184
|
+
browser = await launch({ proxy: opts.proxy });
|
|
185
|
+
cdp = await createCDP(browser.wsUrl);
|
|
186
|
+
page = await createPage(cdp, true, { viewport: opts.viewport });
|
|
187
|
+
setupDialogHandler(page.session);
|
|
188
|
+
await suppressPermissions(cdp);
|
|
189
|
+
currentlyHeaded = false;
|
|
190
|
+
}
|
|
191
|
+
|
|
178
192
|
await navigate(page, url, timeout);
|
|
179
193
|
if (opts.consent !== false) {
|
|
180
194
|
await dismissConsent(page.session);
|
|
@@ -190,18 +204,22 @@ export async function connect(opts = {}) {
|
|
|
190
204
|
cdp.close();
|
|
191
205
|
if (browser) { browser.process.kill(); browser = null; }
|
|
192
206
|
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
207
|
+
try {
|
|
208
|
+
browser = await launch({ headed: true, proxy: opts.proxy });
|
|
209
|
+
cdp = await createCDP(browser.wsUrl);
|
|
210
|
+
page = await createPage(cdp, false, { viewport: opts.viewport });
|
|
211
|
+
setupDialogHandler(page.session);
|
|
212
|
+
await suppressPermissions(cdp);
|
|
213
|
+
await navigate(page, url, timeout);
|
|
214
|
+
if (opts.consent !== false) await dismissConsent(page.session);
|
|
215
|
+
|
|
216
|
+
// Re-check after headed fallback
|
|
217
|
+
const after = await ariaTree(page);
|
|
218
|
+
botBlocked = isChallengePage(after.tree, after.nodeCount);
|
|
219
|
+
currentlyHeaded = true;
|
|
220
|
+
} catch {
|
|
221
|
+
// Headed launch failed (no display?) — keep headless result, botBlocked stays true
|
|
222
|
+
}
|
|
205
223
|
}
|
|
206
224
|
},
|
|
207
225
|
|
|
@@ -375,7 +393,7 @@ export async function connect(opts = {}) {
|
|
|
375
393
|
cdp: page.session,
|
|
376
394
|
|
|
377
395
|
async createTab() {
|
|
378
|
-
const tab = await createPage(cdp,
|
|
396
|
+
const tab = await createPage(cdp, !currentlyHeaded, { viewport: opts.viewport });
|
|
379
397
|
await suppressPermissions(cdp);
|
|
380
398
|
let tabBotBlocked = false;
|
|
381
399
|
return {
|