barebrowse 0.5.5 → 0.5.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +18 -0
- package/README.md +2 -2
- package/barebrowse.context.md +3 -3
- package/mcp-server.js +31 -8
- package/package.json +1 -1
- package/src/consent.js +44 -4
- package/src/index.js +3 -0
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,23 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## 0.5.6
|
|
4
|
+
|
|
5
|
+
Assess now works on bot-blocking EU sites. Headed fallback + consent fix.
|
|
6
|
+
|
|
7
|
+
### Assess headed fallback (`mcp-server.js`)
|
|
8
|
+
- Assess tries headless first; if result looks bot-blocked (score ≤5, all zeros), retries with a separate `connect({ mode: 'headed' })` session
|
|
9
|
+
- Previously all assess scans ran headless-only — sites like Lufthansa, Coolblue, Rabobank returned score 5 (empty page behind bot wall)
|
|
10
|
+
- Now: Lufthansa 50/high, Coolblue 55/high, Rabobank 75/critical
|
|
11
|
+
|
|
12
|
+
### Consent dismissal improvements (`src/consent.js`)
|
|
13
|
+
- Tab `goto()` now runs `dismissConsent()` (was missing — consent walls blocked all trackers from loading, making assess see a clean page)
|
|
14
|
+
- Added `/\baccepteren\b/i` Dutch pattern (Rabobank uses bare "ACCEPTEREN" without "alles")
|
|
15
|
+
- realClick fallback: if jsClick doesn't dismiss the CMP (button disappears from ARIA but overlay stays), retries with real `Input.dispatchMouseEvent` mouse click
|
|
16
|
+
- Both dialog-scoped and global consent paths now have the jsClick→realClick fallback
|
|
17
|
+
|
|
18
|
+
### createTab consent (`src/index.js`)
|
|
19
|
+
- `createTab().goto()` now dismisses consent after navigation (same as main page `goto()`)
|
|
20
|
+
|
|
3
21
|
## 0.5.5
|
|
4
22
|
|
|
5
23
|
Fix assess tab leak and Linux shared memory crash.
|
package/README.md
CHANGED
|
@@ -105,7 +105,7 @@ For code examples, API reference, and wiring instructions, see **[barebrowse.con
|
|
|
105
105
|
|
|
106
106
|
## What it handles automatically
|
|
107
107
|
|
|
108
|
-
Cookie consent walls (29 languages), login walls (cookie extraction from your browsers), bot detection (stealth patches + automatic headed fallback on challenge pages, error pages, and near-empty responses), permission prompts, SPA navigation, JS dialogs, off-screen elements, pre-filled inputs, ARIA noise, and profile locking. The agent doesn't think about any of it.
|
|
108
|
+
Cookie consent walls (29 languages, with real mouse click fallback for stubborn CMPs), login walls (cookie extraction from your browsers), bot detection (stealth patches + automatic headed fallback on challenge pages, error pages, and near-empty responses), permission prompts, SPA navigation, JS dialogs, off-screen elements, pre-filled inputs, ARIA noise, and profile locking. The agent doesn't think about any of it.
|
|
109
109
|
|
|
110
110
|
## What the agent sees
|
|
111
111
|
|
|
@@ -139,7 +139,7 @@ Everything the agent can do through barebrowse:
|
|
|
139
139
|
| **Upload** | Set files on a file input element |
|
|
140
140
|
| **Screenshot** | Page capture as base64 PNG/JPEG/WebP |
|
|
141
141
|
| **PDF** | Export page as PDF |
|
|
142
|
-
| **Assess** | Privacy scan: score (0-100), risk level, 10-category breakdown.
|
|
142
|
+
| **Assess** | Privacy scan: score (0-100), risk level, 10-category breakdown. Tries headless first, falls back to headed if bot-blocked. Consent auto-dismissed before scan. Max 3 concurrent, 30s timeout, tabs cleaned up. Requires `npm install wearehere`. |
|
|
143
143
|
| **Tabs** | List open tabs, switch between them |
|
|
144
144
|
| **Wait for content** | Poll for text or CSS selector to appear on page |
|
|
145
145
|
| **Wait for navigation** | SPA-aware: works for full page loads and pushState |
|
package/barebrowse.context.md
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
# barebrowse -- Integration Guide
|
|
2
2
|
|
|
3
3
|
> For AI assistants and developers wiring barebrowse into a project.
|
|
4
|
-
> v0.5.
|
|
4
|
+
> v0.5.6 | Node.js >= 22 | 0 required deps | MIT
|
|
5
5
|
|
|
6
6
|
## What this is
|
|
7
7
|
|
|
@@ -146,7 +146,7 @@ barebrowse can inject cookies from the user's real browser sessions, bypassing l
|
|
|
146
146
|
| Obstacle | How | Mode |
|
|
147
147
|
|---|---|---|
|
|
148
148
|
| Cookie consent | ARIA scan + jsClick accept button, 29 languages | Both |
|
|
149
|
-
| Consent behind iframes | JS `.click()` via DOM.resolveNode bypasses overlays | Both |
|
|
149
|
+
| Consent behind iframes | JS `.click()` via DOM.resolveNode bypasses overlays, real mouse click fallback for CMPs that ignore synthetic clicks | Both |
|
|
150
150
|
| Permission prompts | Launch flags + CDP Browser.setPermission auto-deny | Both |
|
|
151
151
|
| Media autoplay blocked | `--autoplay-policy=no-user-gesture-required` | Both |
|
|
152
152
|
| Login walls | Cookie extraction from Firefox/Chromium + CDP injection | Both |
|
|
@@ -243,7 +243,7 @@ Action tools return `'ok'` -- the agent calls `snapshot` explicitly to observe.
|
|
|
243
243
|
|
|
244
244
|
Session runs in hybrid mode (headless with automatic headed fallback on bot detection). `goto` injects cookies from the user's browser before navigation for authenticated access.
|
|
245
245
|
|
|
246
|
-
Session tools share a singleton page, lazy-created on first use. Assess
|
|
246
|
+
Session tools share a singleton page, lazy-created on first use. Assess tries headless first; if bot-blocked (score ≤5 with all zeros), retries with a separate headed session. Tabs dismissed for consent and closed after every scan. Max 3 concurrent, with CDP crash recovery.
|
|
247
247
|
|
|
248
248
|
## Architecture
|
|
249
249
|
|
package/mcp-server.js
CHANGED
|
@@ -294,9 +294,14 @@ async function handleToolCall(name, args) {
|
|
|
294
294
|
if (!assessFn) throw new Error('wearehere is not installed. Run: npm install wearehere');
|
|
295
295
|
await acquireAssessSlot();
|
|
296
296
|
try {
|
|
297
|
-
const runAssess = async () => {
|
|
298
|
-
|
|
299
|
-
|
|
297
|
+
const runAssess = async (headed) => {
|
|
298
|
+
let tab;
|
|
299
|
+
if (headed) {
|
|
300
|
+
tab = await connect({ mode: 'headed' });
|
|
301
|
+
} else {
|
|
302
|
+
const page = await getPage();
|
|
303
|
+
tab = await page.createTab();
|
|
304
|
+
}
|
|
300
305
|
let timer;
|
|
301
306
|
try {
|
|
302
307
|
const result = await Promise.race([
|
|
@@ -320,15 +325,33 @@ async function handleToolCall(name, args) {
|
|
|
320
325
|
throw err;
|
|
321
326
|
}
|
|
322
327
|
};
|
|
328
|
+
|
|
329
|
+
// Try headless first
|
|
323
330
|
try {
|
|
324
|
-
|
|
331
|
+
const result = await runAssess(false);
|
|
332
|
+
// Check if result looks bot-blocked (score 0-5, no trackers, few cookies)
|
|
333
|
+
try {
|
|
334
|
+
const parsed = JSON.parse(result);
|
|
335
|
+
const { network, trackers, profiling } = parsed.categories || {};
|
|
336
|
+
const allZero = (network?.score || 0) === 0
|
|
337
|
+
&& (trackers?.score || 0) === 0
|
|
338
|
+
&& (profiling?.score || 0) === 0;
|
|
339
|
+
if (allZero && (parsed.score || 0) <= 5) {
|
|
340
|
+
// Likely bot-blocked — retry headed
|
|
341
|
+
try {
|
|
342
|
+
return await runAssess(true);
|
|
343
|
+
} catch {
|
|
344
|
+
return result; // headed failed, return headless result
|
|
345
|
+
}
|
|
346
|
+
}
|
|
347
|
+
} catch {}
|
|
348
|
+
return result;
|
|
325
349
|
} catch (err) {
|
|
326
350
|
if (isCdpDead(err)) _page = null;
|
|
327
|
-
|
|
351
|
+
// Headless crashed — try headed
|
|
328
352
|
try {
|
|
329
|
-
return await runAssess();
|
|
353
|
+
return await runAssess(true);
|
|
330
354
|
} catch (retryErr) {
|
|
331
|
-
if (isCdpDead(retryErr)) _page = null;
|
|
332
355
|
throw retryErr;
|
|
333
356
|
}
|
|
334
357
|
}
|
|
@@ -356,7 +379,7 @@ async function handleMessage(msg) {
|
|
|
356
379
|
return jsonrpcResponse(id, {
|
|
357
380
|
protocolVersion: '2024-11-05',
|
|
358
381
|
capabilities: { tools: {} },
|
|
359
|
-
serverInfo: { name: 'barebrowse', version: '0.5.
|
|
382
|
+
serverInfo: { name: 'barebrowse', version: '0.5.6' },
|
|
360
383
|
});
|
|
361
384
|
}
|
|
362
385
|
|
package/package.json
CHANGED
package/src/consent.js
CHANGED
|
@@ -21,6 +21,7 @@ const ACCEPT_PATTERNS = [
|
|
|
21
21
|
// Dutch
|
|
22
22
|
/\balles\s*accepteren\b/i,
|
|
23
23
|
/\balles\s*toestaan\b/i,
|
|
24
|
+
/\baccepteren\b/i,
|
|
24
25
|
/\bakkoord\b/i,
|
|
25
26
|
// German
|
|
26
27
|
/\balle\s*akzeptieren\b/i,
|
|
@@ -162,6 +163,20 @@ async function jsClick(session, backendNodeId) {
|
|
|
162
163
|
});
|
|
163
164
|
}
|
|
164
165
|
|
|
166
|
+
/**
|
|
167
|
+
* Click a node via real mouse events (scrollIntoView → getBoxModel → mousePressed/Released).
|
|
168
|
+
* Some CMPs ignore synthetic .click() and only respond to real Input events.
|
|
169
|
+
*/
|
|
170
|
+
async function realClick(session, backendNodeId) {
|
|
171
|
+
await session.send('DOM.scrollIntoViewIfNeeded', { backendNodeId });
|
|
172
|
+
const { model } = await session.send('DOM.getBoxModel', { backendNodeId });
|
|
173
|
+
const [x1, y1, x2, y2, x3, y3, x4, y4] = model.content;
|
|
174
|
+
const cx = (x1 + x3) / 2;
|
|
175
|
+
const cy = (y1 + y3) / 2;
|
|
176
|
+
await session.send('Input.dispatchMouseEvent', { type: 'mousePressed', x: cx, y: cy, button: 'left', clickCount: 1 });
|
|
177
|
+
await session.send('Input.dispatchMouseEvent', { type: 'mouseReleased', x: cx, y: cy, button: 'left', clickCount: 1 });
|
|
178
|
+
}
|
|
179
|
+
|
|
165
180
|
/**
|
|
166
181
|
* Try to dismiss a cookie consent dialog on the current page.
|
|
167
182
|
* Inspects the ARIA tree for dialog elements with consent-related content,
|
|
@@ -212,8 +227,19 @@ export async function dismissConsent(session) {
|
|
|
212
227
|
const button = findAcceptButton(dialogId, nodes, nodeMap, parentMap);
|
|
213
228
|
if (button?.backendDOMNodeId) {
|
|
214
229
|
try {
|
|
230
|
+
// Try jsClick first (bypasses iframe overlays)
|
|
215
231
|
await jsClick(session, button.backendDOMNodeId);
|
|
216
232
|
await new Promise((r) => setTimeout(r, 1000));
|
|
233
|
+
// Check if consent actually dismissed — some CMPs ignore synthetic clicks
|
|
234
|
+
const { nodes: nodesAfter } = await session.send('Accessibility.getFullAXTree');
|
|
235
|
+
const stillThere = nodesAfter.some((n) =>
|
|
236
|
+
n.role?.value === 'button' && n.name?.value === button.name?.value
|
|
237
|
+
);
|
|
238
|
+
if (stillThere) {
|
|
239
|
+
// Retry with real mouse event
|
|
240
|
+
await realClick(session, button.backendDOMNodeId);
|
|
241
|
+
await new Promise((r) => setTimeout(r, 1000));
|
|
242
|
+
}
|
|
217
243
|
return true;
|
|
218
244
|
} catch {
|
|
219
245
|
// Click failed — try next dialog
|
|
@@ -278,10 +304,24 @@ function tryGlobalConsentButton(nodes, session) {
|
|
|
278
304
|
if (node.role?.value !== 'button') continue;
|
|
279
305
|
const name = node.name?.value || '';
|
|
280
306
|
if (name && pattern.test(name) && node.backendDOMNodeId) {
|
|
281
|
-
return
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
307
|
+
return (async () => {
|
|
308
|
+
try {
|
|
309
|
+
await jsClick(session, node.backendDOMNodeId);
|
|
310
|
+
await new Promise((r) => setTimeout(r, 1000));
|
|
311
|
+
// Check if button still exists — retry with real click if so
|
|
312
|
+
const { nodes: nodesAfter } = await session.send('Accessibility.getFullAXTree');
|
|
313
|
+
const stillThere = nodesAfter.some((n) =>
|
|
314
|
+
n.role?.value === 'button' && n.name?.value === name
|
|
315
|
+
);
|
|
316
|
+
if (stillThere) {
|
|
317
|
+
await realClick(session, node.backendDOMNodeId);
|
|
318
|
+
await new Promise((r) => setTimeout(r, 1000));
|
|
319
|
+
}
|
|
320
|
+
return true;
|
|
321
|
+
} catch {
|
|
322
|
+
return false;
|
|
323
|
+
}
|
|
324
|
+
})();
|
|
285
325
|
}
|
|
286
326
|
}
|
|
287
327
|
}
|
package/src/index.js
CHANGED
|
@@ -371,6 +371,9 @@ export async function connect(opts = {}) {
|
|
|
371
371
|
return {
|
|
372
372
|
async goto(url, timeout = 30000) {
|
|
373
373
|
await navigate(tab, url, timeout);
|
|
374
|
+
if (opts.consent !== false) {
|
|
375
|
+
await dismissConsent(tab.session);
|
|
376
|
+
}
|
|
374
377
|
},
|
|
375
378
|
async injectCookies(url, cookieOpts) {
|
|
376
379
|
await authenticate(tab.session, url, { browser: cookieOpts?.browser });
|