barebrowse 0.1.0 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.mcp.json +8 -0
- package/CHANGELOG.md +100 -0
- package/CLAUDE.md +22 -0
- package/README.md +123 -43
- package/barebrowse.context.md +261 -0
- package/cli.js +156 -0
- package/docs/blueprint.md +361 -0
- package/docs/testing.md +202 -0
- package/mcp-server.js +216 -0
- package/package.json +22 -9
- package/src/aria.js +69 -0
- package/src/auth.js +279 -0
- package/src/bareagent.js +161 -0
- package/src/cdp.js +148 -0
- package/src/chromium.js +148 -0
- package/src/consent.js +210 -0
- package/src/index.js +186 -10
- package/src/interact.js +208 -0
- package/src/prune.js +472 -0
- package/src/stealth.js +51 -0
package/src/chromium.js
ADDED
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* chromium.js — Find, launch, and connect to Chromium-based browsers.
|
|
3
|
+
*
|
|
4
|
+
* Supports: Chrome, Chromium, Brave, Edge, Vivaldi, Arc, Opera.
|
|
5
|
+
* Modes: headless (launch new), headed (connect to running).
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import { execSync, spawn } from 'node:child_process';
|
|
9
|
+
import { existsSync } from 'node:fs';
|
|
10
|
+
|
|
11
|
+
// Common Chromium binary paths by platform (Linux focus for POC)
|
|
12
|
+
const CANDIDATES = [
|
|
13
|
+
// Linux
|
|
14
|
+
'chromium-browser',
|
|
15
|
+
'chromium',
|
|
16
|
+
'google-chrome-stable',
|
|
17
|
+
'google-chrome',
|
|
18
|
+
'brave-browser-stable',
|
|
19
|
+
'brave-browser',
|
|
20
|
+
'microsoft-edge-stable',
|
|
21
|
+
'microsoft-edge',
|
|
22
|
+
'vivaldi-stable',
|
|
23
|
+
'vivaldi',
|
|
24
|
+
// macOS (future)
|
|
25
|
+
'/Applications/Google Chrome.app/Contents/MacOS/Google Chrome',
|
|
26
|
+
'/Applications/Brave Browser.app/Contents/MacOS/Brave Browser',
|
|
27
|
+
'/Applications/Chromium.app/Contents/MacOS/Chromium',
|
|
28
|
+
'/Applications/Microsoft Edge.app/Contents/MacOS/Microsoft Edge',
|
|
29
|
+
];
|
|
30
|
+
|
|
31
|
+
/**
|
|
32
|
+
* Find the first available Chromium binary on the system.
|
|
33
|
+
* @returns {string} Path to the binary
|
|
34
|
+
* @throws {Error} If no Chromium browser is found
|
|
35
|
+
*/
|
|
36
|
+
export function findBrowser() {
|
|
37
|
+
for (const candidate of CANDIDATES) {
|
|
38
|
+
try {
|
|
39
|
+
// Absolute path — check directly
|
|
40
|
+
if (candidate.startsWith('/')) {
|
|
41
|
+
if (existsSync(candidate)) return candidate;
|
|
42
|
+
continue;
|
|
43
|
+
}
|
|
44
|
+
// Relative name — check via which
|
|
45
|
+
const path = execSync(`which ${candidate} 2>/dev/null`, { encoding: 'utf8' }).trim();
|
|
46
|
+
if (path) return path;
|
|
47
|
+
} catch {
|
|
48
|
+
// Not found, try next
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
throw new Error(
|
|
52
|
+
'No Chromium-based browser found. Install Chrome, Chromium, Brave, or Edge.\n' +
|
|
53
|
+
'On Fedora: sudo dnf install chromium'
|
|
54
|
+
);
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
/**
|
|
58
|
+
* Launch a headless Chromium instance with CDP enabled.
|
|
59
|
+
* @param {object} [opts]
|
|
60
|
+
* @param {string} [opts.binary] - Path to browser binary (auto-detected if omitted)
|
|
61
|
+
* @param {number} [opts.port=0] - CDP port (0 = random available port)
|
|
62
|
+
* @param {string} [opts.userDataDir] - Browser profile directory
|
|
63
|
+
* @returns {Promise<{wsUrl: string, process: ChildProcess, port: number}>}
|
|
64
|
+
*/
|
|
65
|
+
export async function launch(opts = {}) {
|
|
66
|
+
const binary = opts.binary || findBrowser();
|
|
67
|
+
const port = opts.port || 0;
|
|
68
|
+
|
|
69
|
+
const args = [
|
|
70
|
+
'--headless=new',
|
|
71
|
+
`--remote-debugging-port=${port}`,
|
|
72
|
+
'--no-first-run',
|
|
73
|
+
'--no-default-browser-check',
|
|
74
|
+
'--disable-background-networking',
|
|
75
|
+
'--disable-sync',
|
|
76
|
+
'--disable-translate',
|
|
77
|
+
'--mute-audio',
|
|
78
|
+
'--hide-scrollbars',
|
|
79
|
+
// Suppress permission prompts (location, notifications, camera, mic, etc.)
|
|
80
|
+
'--disable-notifications',
|
|
81
|
+
'--autoplay-policy=no-user-gesture-required',
|
|
82
|
+
'--use-fake-device-for-media-stream',
|
|
83
|
+
'--use-fake-ui-for-media-stream',
|
|
84
|
+
'--disable-features=MediaRouter',
|
|
85
|
+
];
|
|
86
|
+
|
|
87
|
+
if (opts.userDataDir) {
|
|
88
|
+
args.push(`--user-data-dir=${opts.userDataDir}`);
|
|
89
|
+
} else {
|
|
90
|
+
// Use a unique temp profile so we don't lock the user's profile
|
|
91
|
+
// or conflict with parallel instances
|
|
92
|
+
args.push(`--user-data-dir=/tmp/barebrowse-${process.pid}-${Date.now()}`);
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
// about:blank as initial page
|
|
96
|
+
args.push('about:blank');
|
|
97
|
+
|
|
98
|
+
const child = spawn(binary, args, {
|
|
99
|
+
stdio: ['ignore', 'pipe', 'pipe'],
|
|
100
|
+
});
|
|
101
|
+
|
|
102
|
+
// Parse the WebSocket URL from stderr
|
|
103
|
+
// Chrome prints: "DevTools listening on ws://127.0.0.1:PORT/devtools/browser/UUID"
|
|
104
|
+
const wsUrl = await new Promise((resolve, reject) => {
|
|
105
|
+
let stderr = '';
|
|
106
|
+
const timeout = setTimeout(() => {
|
|
107
|
+
reject(new Error(`Browser failed to start within 10s. stderr: ${stderr}`));
|
|
108
|
+
}, 10000);
|
|
109
|
+
|
|
110
|
+
child.stderr.on('data', (chunk) => {
|
|
111
|
+
stderr += chunk.toString();
|
|
112
|
+
const match = stderr.match(/ws:\/\/[^\s]+/);
|
|
113
|
+
if (match) {
|
|
114
|
+
clearTimeout(timeout);
|
|
115
|
+
resolve(match[0]);
|
|
116
|
+
}
|
|
117
|
+
});
|
|
118
|
+
|
|
119
|
+
child.on('error', (err) => {
|
|
120
|
+
clearTimeout(timeout);
|
|
121
|
+
reject(new Error(`Failed to launch browser: ${err.message}`));
|
|
122
|
+
});
|
|
123
|
+
|
|
124
|
+
child.on('exit', (code) => {
|
|
125
|
+
clearTimeout(timeout);
|
|
126
|
+
if (!stderr.includes('ws://')) {
|
|
127
|
+
reject(new Error(`Browser exited with code ${code}. stderr: ${stderr}`));
|
|
128
|
+
}
|
|
129
|
+
});
|
|
130
|
+
});
|
|
131
|
+
|
|
132
|
+
// Extract port from wsUrl
|
|
133
|
+
const actualPort = parseInt(new URL(wsUrl).port, 10);
|
|
134
|
+
|
|
135
|
+
return { wsUrl, process: child, port: actualPort };
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
/**
|
|
139
|
+
* Get the CDP WebSocket URL for a browser already running with --remote-debugging-port.
|
|
140
|
+
* @param {number} port - The debug port
|
|
141
|
+
* @returns {Promise<string>} WebSocket URL
|
|
142
|
+
*/
|
|
143
|
+
export async function getDebugUrl(port) {
|
|
144
|
+
const res = await fetch(`http://127.0.0.1:${port}/json/version`);
|
|
145
|
+
if (!res.ok) throw new Error(`Cannot reach browser debug port at ${port}: ${res.status}`);
|
|
146
|
+
const data = await res.json();
|
|
147
|
+
return data.webSocketDebuggerUrl;
|
|
148
|
+
}
|
package/src/consent.js
ADDED
|
@@ -0,0 +1,210 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* consent.js — Auto-dismiss cookie consent dialogs via ARIA tree inspection.
|
|
3
|
+
*
|
|
4
|
+
* Scans the page ARIA tree for consent dialogs and clicks the "accept" button.
|
|
5
|
+
* Works across languages by matching common accept/agree patterns.
|
|
6
|
+
* Runs once after page load — no polling, no mutation observers.
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
// Button text patterns that mean "accept all" / "I agree" across common languages.
|
|
10
|
+
// Order matters: more specific patterns first to avoid false positives.
|
|
11
|
+
const ACCEPT_PATTERNS = [
|
|
12
|
+
// English
|
|
13
|
+
/\baccept\s*all\b/i,
|
|
14
|
+
/\ballow\s*all\b/i,
|
|
15
|
+
/\bagree\s*to\s*all\b/i,
|
|
16
|
+
/\byes,?\s*i\s*agree\b/i,
|
|
17
|
+
/\bi\s*agree\b/i,
|
|
18
|
+
/\baccept\s*cookies?\b/i,
|
|
19
|
+
/\ballow\s*cookies?\b/i,
|
|
20
|
+
/\bgot\s*it\b/i,
|
|
21
|
+
// Dutch
|
|
22
|
+
/\balles\s*accepteren\b/i,
|
|
23
|
+
/\balles\s*toestaan\b/i,
|
|
24
|
+
/\bakkoord\b/i,
|
|
25
|
+
// German
|
|
26
|
+
/\balle\s*akzeptieren\b/i,
|
|
27
|
+
/\ballem\s*zustimmen\b/i,
|
|
28
|
+
/\balle\s*cookies?\s*akzeptieren\b/i,
|
|
29
|
+
// French
|
|
30
|
+
/\btout\s*accepter\b/i,
|
|
31
|
+
/\baccepter\s*tout\b/i,
|
|
32
|
+
/\bj['']accepte\b/i,
|
|
33
|
+
// Spanish
|
|
34
|
+
/\baceptar\s*todo\b/i,
|
|
35
|
+
/\baceptar\s*todas?\b/i,
|
|
36
|
+
// Italian
|
|
37
|
+
/\baccetta\s*tutto\b/i,
|
|
38
|
+
/\baccetto\b/i,
|
|
39
|
+
// Portuguese
|
|
40
|
+
/\baceitar\s*tudo\b/i,
|
|
41
|
+
// Generic single-word fallbacks (only matched inside dialogs)
|
|
42
|
+
/^accept$/i,
|
|
43
|
+
/^agree$/i,
|
|
44
|
+
/^ok$/i,
|
|
45
|
+
];
|
|
46
|
+
|
|
47
|
+
// Roles that indicate a consent dialog container.
|
|
48
|
+
const DIALOG_ROLES = new Set(['dialog', 'alertdialog']);
|
|
49
|
+
|
|
50
|
+
// Text patterns in dialog names/headings that confirm it's about consent.
|
|
51
|
+
const CONSENT_DIALOG_HINTS = [
|
|
52
|
+
/cookie/i,
|
|
53
|
+
/consent/i,
|
|
54
|
+
/privacy/i,
|
|
55
|
+
/voordat\s*je\s*verdergaat/i, // Dutch: "Before you continue"
|
|
56
|
+
/before\s*you\s*continue/i,
|
|
57
|
+
/bevor\s*du\s*fortf/i, // German: "Before you continue"
|
|
58
|
+
/avant\s*de\s*continuer/i, // French: "Before you continue"
|
|
59
|
+
];
|
|
60
|
+
|
|
61
|
+
/**
|
|
62
|
+
* Click a node via JavaScript .click() instead of mouse events.
|
|
63
|
+
* Bypasses iframe overlays and z-index issues that block coordinate-based clicks.
|
|
64
|
+
*/
|
|
65
|
+
async function jsClick(session, backendNodeId) {
|
|
66
|
+
const { object } = await session.send('DOM.resolveNode', { backendNodeId });
|
|
67
|
+
await session.send('Runtime.callFunctionOn', {
|
|
68
|
+
objectId: object.objectId,
|
|
69
|
+
functionDeclaration: 'function() { this.click(); }',
|
|
70
|
+
});
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
/**
|
|
74
|
+
* Try to dismiss a cookie consent dialog on the current page.
|
|
75
|
+
* Inspects the ARIA tree for dialog elements with consent-related content,
|
|
76
|
+
* then clicks the "accept" button.
|
|
77
|
+
*
|
|
78
|
+
* @param {object} session - Session-scoped CDP handle
|
|
79
|
+
* @returns {Promise<boolean>} true if a consent dialog was dismissed
|
|
80
|
+
*/
|
|
81
|
+
export async function dismissConsent(session) {
|
|
82
|
+
await session.send('Accessibility.enable');
|
|
83
|
+
const { nodes } = await session.send('Accessibility.getFullAXTree');
|
|
84
|
+
|
|
85
|
+
// Build a parent lookup: nodeId → parentId
|
|
86
|
+
const parentMap = new Map();
|
|
87
|
+
const nodeMap = new Map();
|
|
88
|
+
for (const node of nodes) {
|
|
89
|
+
nodeMap.set(node.nodeId, node);
|
|
90
|
+
if (node.parentId) parentMap.set(node.nodeId, node.parentId);
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
// Find dialog nodes that look like consent dialogs
|
|
94
|
+
const consentDialogs = new Set();
|
|
95
|
+
for (const node of nodes) {
|
|
96
|
+
const role = node.role?.value;
|
|
97
|
+
if (!DIALOG_ROLES.has(role)) continue;
|
|
98
|
+
|
|
99
|
+
const name = node.name?.value || '';
|
|
100
|
+
// Check if dialog name hints at consent
|
|
101
|
+
if (CONSENT_DIALOG_HINTS.some((p) => p.test(name))) {
|
|
102
|
+
consentDialogs.add(node.nodeId);
|
|
103
|
+
continue;
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
// Check children for consent-related headings/text
|
|
107
|
+
if (hasConsentContent(node.nodeId, nodes, nodeMap, parentMap)) {
|
|
108
|
+
consentDialogs.add(node.nodeId);
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
// If no consent dialog found, scan for consent-related buttons anywhere
|
|
113
|
+
// (some sites use banners, not dialogs)
|
|
114
|
+
if (consentDialogs.size === 0) {
|
|
115
|
+
return tryGlobalConsentButton(nodes, session);
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
// Find accept buttons inside consent dialogs
|
|
119
|
+
for (const dialogId of consentDialogs) {
|
|
120
|
+
const button = findAcceptButton(dialogId, nodes, nodeMap, parentMap);
|
|
121
|
+
if (button?.backendDOMNodeId) {
|
|
122
|
+
try {
|
|
123
|
+
await jsClick(session, button.backendDOMNodeId);
|
|
124
|
+
await new Promise((r) => setTimeout(r, 1000));
|
|
125
|
+
return true;
|
|
126
|
+
} catch {
|
|
127
|
+
// Click failed — try next dialog
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
// Dialog found but no accept button inside it — some sites put the button
|
|
133
|
+
// outside the dialog (e.g. BBC's SourcePoint). Fall through to global scan.
|
|
134
|
+
return tryGlobalConsentButton(nodes, session);
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
/**
|
|
138
|
+
* Check if a dialog contains consent-related content in its descendants.
|
|
139
|
+
*/
|
|
140
|
+
function hasConsentContent(dialogId, nodes, nodeMap, parentMap) {
|
|
141
|
+
for (const node of nodes) {
|
|
142
|
+
if (!isDescendantOf(node.nodeId, dialogId, parentMap)) continue;
|
|
143
|
+
const role = node.role?.value;
|
|
144
|
+
const name = node.name?.value || '';
|
|
145
|
+
|
|
146
|
+
// Check headings and static text within the dialog
|
|
147
|
+
if (role === 'heading' || role === 'StaticText' || role === 'generic') {
|
|
148
|
+
if (CONSENT_DIALOG_HINTS.some((p) => p.test(name))) return true;
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
return false;
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
/**
|
|
155
|
+
* Find the best "accept" button inside a dialog subtree.
|
|
156
|
+
*/
|
|
157
|
+
function findAcceptButton(dialogId, nodes, nodeMap, parentMap) {
|
|
158
|
+
for (const pattern of ACCEPT_PATTERNS) {
|
|
159
|
+
for (const node of nodes) {
|
|
160
|
+
if (node.role?.value !== 'button') continue;
|
|
161
|
+
const name = node.name?.value || '';
|
|
162
|
+
if (!name || !pattern.test(name)) continue;
|
|
163
|
+
if (!isDescendantOf(node.nodeId, dialogId, parentMap)) continue;
|
|
164
|
+
return node;
|
|
165
|
+
}
|
|
166
|
+
}
|
|
167
|
+
return null;
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
/**
|
|
171
|
+
* Fallback: look for consent buttons anywhere on the page.
|
|
172
|
+
* Only matches strong patterns (not single-word fallbacks) to avoid false positives.
|
|
173
|
+
*/
|
|
174
|
+
function tryGlobalConsentButton(nodes, session) {
|
|
175
|
+
// Only use the specific multi-word patterns for global search
|
|
176
|
+
const strictPatterns = ACCEPT_PATTERNS.filter((p) => {
|
|
177
|
+
const src = p.source;
|
|
178
|
+
return src.includes('\\s') || src.includes('\\b.*\\b.*\\b');
|
|
179
|
+
});
|
|
180
|
+
|
|
181
|
+
// Actually, let's just use all non-single-word patterns
|
|
182
|
+
const safePatterns = ACCEPT_PATTERNS.slice(0, -3); // exclude ^accept$, ^agree$, ^ok$
|
|
183
|
+
|
|
184
|
+
for (const pattern of safePatterns) {
|
|
185
|
+
for (const node of nodes) {
|
|
186
|
+
if (node.role?.value !== 'button') continue;
|
|
187
|
+
const name = node.name?.value || '';
|
|
188
|
+
if (name && pattern.test(name) && node.backendDOMNodeId) {
|
|
189
|
+
return jsClick(session, node.backendDOMNodeId)
|
|
190
|
+
.then(() => new Promise((r) => setTimeout(r, 1000)))
|
|
191
|
+
.then(() => true)
|
|
192
|
+
.catch(() => false);
|
|
193
|
+
}
|
|
194
|
+
}
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
return Promise.resolve(false);
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
/**
|
|
201
|
+
* Check if nodeId is a descendant of ancestorId by walking parentMap.
|
|
202
|
+
*/
|
|
203
|
+
function isDescendantOf(nodeId, ancestorId, parentMap) {
|
|
204
|
+
let current = parentMap.get(nodeId);
|
|
205
|
+
while (current) {
|
|
206
|
+
if (current === ancestorId) return true;
|
|
207
|
+
current = parentMap.get(current);
|
|
208
|
+
}
|
|
209
|
+
return false;
|
|
210
|
+
}
|
package/src/index.js
CHANGED
|
@@ -13,7 +13,9 @@ import { createCDP } from './cdp.js';
|
|
|
13
13
|
import { formatTree } from './aria.js';
|
|
14
14
|
import { authenticate } from './auth.js';
|
|
15
15
|
import { prune as pruneTree } from './prune.js';
|
|
16
|
-
import { click as cdpClick, type as cdpType, scroll as cdpScroll, press as cdpPress } from './interact.js';
|
|
16
|
+
import { click as cdpClick, type as cdpType, scroll as cdpScroll, press as cdpPress, hover as cdpHover, select as cdpSelect } from './interact.js';
|
|
17
|
+
import { dismissConsent } from './consent.js';
|
|
18
|
+
import { applyStealth } from './stealth.js';
|
|
17
19
|
|
|
18
20
|
/**
|
|
19
21
|
* Browse a URL and return an ARIA snapshot.
|
|
@@ -42,13 +44,16 @@ export async function browse(url, opts = {}) {
|
|
|
42
44
|
const wsUrl = await getDebugUrl(port);
|
|
43
45
|
cdp = await createCDP(wsUrl);
|
|
44
46
|
} else {
|
|
45
|
-
// headless
|
|
47
|
+
// headless or hybrid (start headless)
|
|
46
48
|
browser = await launch();
|
|
47
49
|
cdp = await createCDP(browser.wsUrl);
|
|
48
50
|
}
|
|
49
51
|
|
|
50
52
|
// Step 2: Create a new page target and attach
|
|
51
|
-
|
|
53
|
+
let page = await createPage(cdp, mode !== 'headed');
|
|
54
|
+
|
|
55
|
+
// Step 2.5: Suppress permission prompts
|
|
56
|
+
await suppressPermissions(cdp);
|
|
52
57
|
|
|
53
58
|
// Step 3: Cookie injection — extract from user's browser, inject via CDP
|
|
54
59
|
if (opts.cookies !== false) {
|
|
@@ -62,8 +67,32 @@ export async function browse(url, opts = {}) {
|
|
|
62
67
|
// Step 4: Navigate and wait for load
|
|
63
68
|
await navigate(page, url, timeout);
|
|
64
69
|
|
|
70
|
+
// Step 4.5: Auto-dismiss cookie consent dialogs
|
|
71
|
+
if (opts.consent !== false) {
|
|
72
|
+
await dismissConsent(page.session);
|
|
73
|
+
}
|
|
74
|
+
|
|
65
75
|
// Step 5: Get ARIA tree
|
|
66
|
-
|
|
76
|
+
let { tree } = await ariaTree(page);
|
|
77
|
+
|
|
78
|
+
// Step 5.5: Hybrid fallback — if headless was bot-blocked, retry headed
|
|
79
|
+
if (mode === 'hybrid' && isChallengePage(tree)) {
|
|
80
|
+
await cdp.send('Target.closeTarget', { targetId: page.targetId });
|
|
81
|
+
cdp.close();
|
|
82
|
+
if (browser) { browser.process.kill(); browser = null; }
|
|
83
|
+
|
|
84
|
+
const port = opts.port || 9222;
|
|
85
|
+
const wsUrl = await getDebugUrl(port);
|
|
86
|
+
cdp = await createCDP(wsUrl);
|
|
87
|
+
page = await createPage(cdp, false);
|
|
88
|
+
await suppressPermissions(cdp);
|
|
89
|
+
if (opts.cookies !== false) {
|
|
90
|
+
try { await authenticate(page.session, url, { browser: opts.browser }); } catch {}
|
|
91
|
+
}
|
|
92
|
+
await navigate(page, url, timeout);
|
|
93
|
+
if (opts.consent !== false) await dismissConsent(page.session);
|
|
94
|
+
({ tree } = await ariaTree(page));
|
|
95
|
+
}
|
|
67
96
|
|
|
68
97
|
// Step 6: Prune for agent consumption
|
|
69
98
|
let snapshot;
|
|
@@ -106,12 +135,22 @@ export async function connect(opts = {}) {
|
|
|
106
135
|
cdp = await createCDP(browser.wsUrl);
|
|
107
136
|
}
|
|
108
137
|
|
|
109
|
-
const page = await createPage(cdp);
|
|
138
|
+
const page = await createPage(cdp, mode !== 'headed');
|
|
110
139
|
let refMap = new Map();
|
|
111
140
|
|
|
141
|
+
// Suppress permission prompts for all modes
|
|
142
|
+
await suppressPermissions(cdp);
|
|
143
|
+
|
|
112
144
|
return {
|
|
113
145
|
async goto(url, timeout = 30000) {
|
|
114
146
|
await navigate(page, url, timeout);
|
|
147
|
+
if (opts.consent !== false) {
|
|
148
|
+
await dismissConsent(page.session);
|
|
149
|
+
}
|
|
150
|
+
},
|
|
151
|
+
|
|
152
|
+
async injectCookies(url, cookieOpts) {
|
|
153
|
+
await authenticate(page.session, url, { browser: cookieOpts?.browser });
|
|
115
154
|
},
|
|
116
155
|
|
|
117
156
|
async snapshot(pruneOpts) {
|
|
@@ -128,10 +167,10 @@ export async function connect(opts = {}) {
|
|
|
128
167
|
await cdpClick(page.session, backendNodeId);
|
|
129
168
|
},
|
|
130
169
|
|
|
131
|
-
async type(ref, text,
|
|
170
|
+
async type(ref, text, typeOpts) {
|
|
132
171
|
const backendNodeId = refMap.get(ref);
|
|
133
172
|
if (!backendNodeId) throw new Error(`No element found for ref "${ref}"`);
|
|
134
|
-
await cdpType(page.session, backendNodeId, text,
|
|
173
|
+
await cdpType(page.session, backendNodeId, text, typeOpts);
|
|
135
174
|
},
|
|
136
175
|
|
|
137
176
|
async scroll(deltaY) {
|
|
@@ -142,8 +181,42 @@ export async function connect(opts = {}) {
|
|
|
142
181
|
await cdpPress(page.session, key);
|
|
143
182
|
},
|
|
144
183
|
|
|
145
|
-
|
|
146
|
-
|
|
184
|
+
async hover(ref) {
|
|
185
|
+
const backendNodeId = refMap.get(ref);
|
|
186
|
+
if (!backendNodeId) throw new Error(`No element found for ref "${ref}"`);
|
|
187
|
+
await cdpHover(page.session, backendNodeId);
|
|
188
|
+
},
|
|
189
|
+
|
|
190
|
+
async select(ref, value) {
|
|
191
|
+
const backendNodeId = refMap.get(ref);
|
|
192
|
+
if (!backendNodeId) throw new Error(`No element found for ref "${ref}"`);
|
|
193
|
+
await cdpSelect(page.session, backendNodeId, value);
|
|
194
|
+
},
|
|
195
|
+
|
|
196
|
+
async screenshot(screenshotOpts = {}) {
|
|
197
|
+
const format = screenshotOpts.format || 'png';
|
|
198
|
+
const params = { format };
|
|
199
|
+
if (format === 'jpeg' || format === 'webp') {
|
|
200
|
+
params.quality = screenshotOpts.quality || 80;
|
|
201
|
+
}
|
|
202
|
+
const { data } = await page.session.send('Page.captureScreenshot', params);
|
|
203
|
+
return data;
|
|
204
|
+
},
|
|
205
|
+
|
|
206
|
+
async waitForNavigation(timeout = 30000) {
|
|
207
|
+
// Wait for loadEventFired (full page load). If it doesn't fire within
|
|
208
|
+
// timeout, fall back to frameNavigated (SPA pushState/replaceState).
|
|
209
|
+
try {
|
|
210
|
+
await page.session.once('Page.loadEventFired', timeout);
|
|
211
|
+
} catch {
|
|
212
|
+
// Timeout — likely SPA nav with no load event. frameNavigated may
|
|
213
|
+
// have already fired. Give a settle delay for DOM updates.
|
|
214
|
+
await new Promise((r) => setTimeout(r, 500));
|
|
215
|
+
}
|
|
216
|
+
},
|
|
217
|
+
|
|
218
|
+
waitForNetworkIdle(idleOpts = {}) {
|
|
219
|
+
return waitForNetworkIdle(page.session, idleOpts);
|
|
147
220
|
},
|
|
148
221
|
|
|
149
222
|
/** Raw CDP session for escape hatch */
|
|
@@ -159,10 +232,35 @@ export async function connect(opts = {}) {
|
|
|
159
232
|
|
|
160
233
|
// --- Internal helpers ---
|
|
161
234
|
|
|
235
|
+
/**
|
|
236
|
+
* Suppress permission prompts (notifications, geolocation, camera, mic, etc.)
|
|
237
|
+
* via CDP Browser.setPermission. Works for both headless and headed modes.
|
|
238
|
+
*/
|
|
239
|
+
const DENY_PERMISSIONS = [
|
|
240
|
+
'geolocation', 'notifications', 'midi', 'midiSysex',
|
|
241
|
+
'durableStorage', 'audioCapture', 'videoCapture',
|
|
242
|
+
'backgroundSync', 'sensors', 'idleDetection',
|
|
243
|
+
];
|
|
244
|
+
|
|
245
|
+
async function suppressPermissions(cdp) {
|
|
246
|
+
for (const name of DENY_PERMISSIONS) {
|
|
247
|
+
try {
|
|
248
|
+
await cdp.send('Browser.setPermission', {
|
|
249
|
+
permission: { name },
|
|
250
|
+
setting: 'denied',
|
|
251
|
+
});
|
|
252
|
+
} catch {
|
|
253
|
+
// Permission type not supported in this Chrome version — skip
|
|
254
|
+
}
|
|
255
|
+
}
|
|
256
|
+
}
|
|
257
|
+
|
|
162
258
|
/**
|
|
163
259
|
* Create a new page target and return a session-scoped handle.
|
|
260
|
+
* @param {object} cdp - CDP client
|
|
261
|
+
* @param {boolean} [stealth=false] - Apply stealth patches (headless only)
|
|
164
262
|
*/
|
|
165
|
-
async function createPage(cdp) {
|
|
263
|
+
async function createPage(cdp, stealth = false) {
|
|
166
264
|
const { targetId } = await cdp.send('Target.createTarget', { url: 'about:blank' });
|
|
167
265
|
const { sessionId } = await cdp.send('Target.attachToTarget', {
|
|
168
266
|
targetId,
|
|
@@ -176,6 +274,11 @@ async function createPage(cdp) {
|
|
|
176
274
|
await session.send('Network.enable');
|
|
177
275
|
await session.send('DOM.enable');
|
|
178
276
|
|
|
277
|
+
// Apply stealth patches before any navigation (headless only)
|
|
278
|
+
if (stealth) {
|
|
279
|
+
await applyStealth(session);
|
|
280
|
+
}
|
|
281
|
+
|
|
179
282
|
return { session, targetId, sessionId };
|
|
180
283
|
}
|
|
181
284
|
|
|
@@ -256,3 +359,76 @@ function extractProps(props) {
|
|
|
256
359
|
for (const p of props) result[p.name] = p.value?.value;
|
|
257
360
|
return result;
|
|
258
361
|
}
|
|
362
|
+
|
|
363
|
+
/**
|
|
364
|
+
* Wait until no network requests are pending for `idle` ms.
|
|
365
|
+
* @param {object} session - Session-scoped CDP handle
|
|
366
|
+
* @param {object} [opts]
|
|
367
|
+
* @param {number} [opts.timeout=30000] - Max wait time
|
|
368
|
+
* @param {number} [opts.idle=500] - Idle threshold in ms
|
|
369
|
+
*/
|
|
370
|
+
function waitForNetworkIdle(session, opts = {}) {
|
|
371
|
+
const timeout = opts.timeout || 30000;
|
|
372
|
+
const idle = opts.idle || 500;
|
|
373
|
+
|
|
374
|
+
return new Promise((resolve, reject) => {
|
|
375
|
+
let pending = 0;
|
|
376
|
+
let timer = null;
|
|
377
|
+
const unsubs = [];
|
|
378
|
+
|
|
379
|
+
const done = () => {
|
|
380
|
+
clearTimeout(timer);
|
|
381
|
+
clearTimeout(deadlineTimer);
|
|
382
|
+
for (const unsub of unsubs) unsub();
|
|
383
|
+
resolve();
|
|
384
|
+
};
|
|
385
|
+
|
|
386
|
+
const check = () => {
|
|
387
|
+
clearTimeout(timer);
|
|
388
|
+
if (pending <= 0) {
|
|
389
|
+
pending = 0;
|
|
390
|
+
timer = setTimeout(done, idle);
|
|
391
|
+
}
|
|
392
|
+
};
|
|
393
|
+
|
|
394
|
+
unsubs.push(session.on('Network.requestWillBeSent', () => { pending++; clearTimeout(timer); }));
|
|
395
|
+
unsubs.push(session.on('Network.loadingFinished', () => { pending--; check(); }));
|
|
396
|
+
unsubs.push(session.on('Network.loadingFailed', () => { pending--; check(); }));
|
|
397
|
+
|
|
398
|
+
const deadlineTimer = setTimeout(() => {
|
|
399
|
+
for (const unsub of unsubs) unsub();
|
|
400
|
+
reject(new Error(`waitForNetworkIdle timed out after ${timeout}ms`));
|
|
401
|
+
}, timeout);
|
|
402
|
+
|
|
403
|
+
// Start check immediately (might already be idle)
|
|
404
|
+
check();
|
|
405
|
+
});
|
|
406
|
+
}
|
|
407
|
+
|
|
408
|
+
/**
|
|
409
|
+
* Detect if a page is a bot-challenge page (Cloudflare, etc.).
|
|
410
|
+
* Heuristic: very short ARIA tree + known challenge phrases.
|
|
411
|
+
*/
|
|
412
|
+
function isChallengePage(tree) {
|
|
413
|
+
if (!tree) return true;
|
|
414
|
+
const text = flattenTreeText(tree);
|
|
415
|
+
const challengePhrases = [
|
|
416
|
+
'just a moment',
|
|
417
|
+
'checking if the site connection is secure',
|
|
418
|
+
'checking your browser',
|
|
419
|
+
'please wait',
|
|
420
|
+
'verify you are human',
|
|
421
|
+
'attention required',
|
|
422
|
+
];
|
|
423
|
+
const lower = text.toLowerCase();
|
|
424
|
+
return challengePhrases.some((p) => lower.includes(p));
|
|
425
|
+
}
|
|
426
|
+
|
|
427
|
+
function flattenTreeText(node) {
|
|
428
|
+
if (!node) return '';
|
|
429
|
+
let text = node.name || '';
|
|
430
|
+
for (const child of node.children || []) {
|
|
431
|
+
text += ' ' + flattenTreeText(child);
|
|
432
|
+
}
|
|
433
|
+
return text;
|
|
434
|
+
}
|