barebrowse 0.5.6 → 0.5.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,5 +1,19 @@
1
1
  # Changelog
2
2
 
3
+ ## 0.5.7
4
+
5
+ MCP server crash resilience + process hardening.
6
+
7
+ ### Process hardening (`mcp-server.js`)
8
+ - Added `unhandledRejection` and `uncaughtException` handlers — browser OOM/crash no longer kills the MCP server process
9
+ - Previously: heavy sites like zalando.de crashed the browser via OOM, the CDP WebSocket close rejected pending promises, unhandled rejections crashed Node
10
+ - Now: session resets and next request gets a fresh browser. Server stays alive.
11
+
12
+ ### Validated at scale
13
+ - Scanned 149 sites across NL/US/EU with zero server crashes
14
+ - Only 1 genuine timeout (rtv.nl) — all former crashers (zalando.de, otto.de, bijenkorf.nl, jumbo.com, klm.nl) now return results
15
+ - Full results: `wearehere-scan-results.md`
16
+
3
17
  ## 0.5.6
4
18
 
5
19
  Assess now works on bot-blocking EU sites. Headed fallback + consent fix.
@@ -1,7 +1,7 @@
1
1
  # barebrowse -- Integration Guide
2
2
 
3
3
  > For AI assistants and developers wiring barebrowse into a project.
4
- > v0.5.6 | Node.js >= 22 | 0 required deps | MIT
4
+ > v0.5.7 | Node.js >= 22 | 0 required deps | MIT
5
5
 
6
6
  ## What this is
7
7
 
@@ -243,7 +243,7 @@ Action tools return `'ok'` -- the agent calls `snapshot` explicitly to observe.
243
243
 
244
244
  Session runs in hybrid mode (headless with automatic headed fallback on bot detection). `goto` injects cookies from the user's browser before navigation for authenticated access.
245
245
 
246
- Session tools share a singleton page, lazy-created on first use. Assess tries headless first; if bot-blocked (score ≤5 with all zeros), retries with a separate headed session. Tabs dismissed for consent and closed after every scan. Max 3 concurrent, with CDP crash recovery.
246
+ Session tools share a singleton page, lazy-created on first use. Assess tries headless first; if bot-blocked (score ≤5 with all zeros), retries with a separate headed session. Tabs dismissed for consent and closed after every scan. Max 3 concurrent. Browser OOM/crash auto-recovers (session resets, server stays alive).
247
247
 
248
248
  ## Architecture
249
249
 
package/mcp-server.js CHANGED
@@ -379,7 +379,7 @@ async function handleMessage(msg) {
379
379
  return jsonrpcResponse(id, {
380
380
  protocolVersion: '2024-11-05',
381
381
  capabilities: { tools: {} },
382
- serverInfo: { name: 'barebrowse', version: '0.5.6' },
382
+ serverInfo: { name: 'barebrowse', version: '0.5.7' },
383
383
  });
384
384
  }
385
385
 
@@ -443,6 +443,15 @@ process.stdin.on('data', (chunk) => {
443
443
  }
444
444
  });
445
445
 
446
+ // Prevent unhandled rejections and uncaught exceptions from crashing the server.
447
+ // Browser OOM/crash rejects all pending CDP promises — some may not be awaited.
448
+ process.on('unhandledRejection', (err) => {
449
+ _page = null;
450
+ });
451
+ process.on('uncaughtException', (err) => {
452
+ _page = null;
453
+ });
454
+
446
455
  // Clean up on exit
447
456
  process.on('SIGINT', async () => {
448
457
  if (_page) await _page.close().catch(() => {});
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "barebrowse",
3
- "version": "0.5.6",
3
+ "version": "0.5.7",
4
4
  "description": "Authenticated web browsing for autonomous agents via CDP. URL in, pruned ARIA snapshot out.",
5
5
  "type": "module",
6
6
  "main": "src/index.js",
package/rescan.mjs ADDED
@@ -0,0 +1,107 @@
1
+ import { connect } from '/home/hamr/PycharmProjects/barebrowse/src/index.js';
2
+ import { assess } from 'wearehere';
3
+
4
+ const SITES = [
5
+ // NL timeouts
6
+ 'rtv.nl', 'bijenkorf.nl', 'jumbo.com', 'klm.nl',
7
+ // EU timeouts
8
+ 'zalando.de', 'otto.de', 'allegro.pl',
9
+ // Suspected bot-blocked zeros (NL)
10
+ 'coolblue.nl', 'rabobank.nl', 'telegraaf.nl', 'wehkamp.nl',
11
+ 'ing.nl', 'kvk.nl', 'thuisbezorgd.nl', 'transavia.com', 'schiphol.nl',
12
+ // Suspected bot-blocked zeros (EU)
13
+ 'lufthansa.com', 'fnac.com', 'svt.se', 'revolut.com',
14
+ 'leboncoin.fr', 'subito.it', 'idealista.com',
15
+ // Suspected bot-blocked zeros (US)
16
+ 'washingtonpost.com', 'usatoday.com', 'etsy.com', 'costco.com',
17
+ 'homedepot.com', 'chatgpt.com', 'doordash.com', 'yelp.com', 'reuters.com',
18
+ 'snapchat.com', 'cnn.com'
19
+ ];
20
+
21
+ async function scanSite(url) {
22
+ const full = 'https://www.' + url;
23
+
24
+ // Try headless first
25
+ let page;
26
+ try {
27
+ page = await connect({ mode: 'hybrid' });
28
+ const tab = await page.createTab();
29
+ try {
30
+ await tab.injectCookies(full).catch(() => {});
31
+ const r = await Promise.race([
32
+ assess(tab, full, { timeout: 30000, settle: 3000 }),
33
+ new Promise((_, rej) => setTimeout(() => rej(new Error('timeout')), 35000))
34
+ ]);
35
+ await tab.close().catch(() => {});
36
+
37
+ // Check if bot-blocked
38
+ const { network, trackers, profiling } = r.categories;
39
+ const allZero = (network?.score||0) === 0 && (trackers?.score||0) === 0 && (profiling?.score||0) === 0;
40
+ if (allZero && r.score <= 5) {
41
+ await page.close().catch(() => {});
42
+ // Retry headed
43
+ const hp = await connect({ mode: 'headed' });
44
+ try {
45
+ await hp.injectCookies(full).catch(() => {});
46
+ const r2 = await Promise.race([
47
+ assess(hp, full, { timeout: 30000, settle: 3000 }),
48
+ new Promise((_, rej) => setTimeout(() => rej(new Error('timeout')), 35000))
49
+ ]);
50
+ console.log(url + '|' + r2.score + '|' + r2.risk + '|HEADED|' + summarize(r2));
51
+ return;
52
+ } finally {
53
+ await hp.close().catch(() => {});
54
+ }
55
+ }
56
+ console.log(url + '|' + r.score + '|' + r.risk + '|HEADLESS|' + summarize(r));
57
+ } catch (e) {
58
+ await tab.close().catch(() => {});
59
+ if (e.message === 'timeout') {
60
+ // Try headed on timeout too
61
+ await page.close().catch(() => {});
62
+ try {
63
+ const hp = await connect({ mode: 'headed' });
64
+ try {
65
+ await hp.injectCookies(full).catch(() => {});
66
+ const r2 = await Promise.race([
67
+ assess(hp, full, { timeout: 30000, settle: 3000 }),
68
+ new Promise((_, rej) => setTimeout(() => rej(new Error('timeout')), 35000))
69
+ ]);
70
+ console.log(url + '|' + r2.score + '|' + r2.risk + '|HEADED|' + summarize(r2));
71
+ return;
72
+ } finally {
73
+ await hp.close().catch(() => {});
74
+ }
75
+ } catch {
76
+ console.log(url + '|—|timeout|BOTH|Failed both modes');
77
+ return;
78
+ }
79
+ }
80
+ console.log(url + '|—|error|—|' + e.message);
81
+ } finally {
82
+ await page?.close().catch(() => {});
83
+ }
84
+ } catch (e) {
85
+ console.log(url + '|—|error|—|' + e.message);
86
+ }
87
+ }
88
+
89
+ function summarize(r) {
90
+ const parts = [];
91
+ const c = r.categories;
92
+ if (c.cookies?.score > 0) parts.push(c.cookies.summary);
93
+ if (c.network?.score > 0) parts.push(c.network.summary);
94
+ if (c.trackers?.score > 0) parts.push(c.trackers.summary);
95
+ if (c.profiling?.score > 0) parts.push(c.profiling.summary);
96
+ if (c.selling_data?.score > 0) parts.push(c.selling_data.summary);
97
+ if (c.pressure?.score > 0) parts.push('pressure:' + c.pressure.score);
98
+ if (c.watching?.score > 0) parts.push(c.watching.summary);
99
+ if (c.stored_data?.score > 0) parts.push(c.stored_data.summary);
100
+ if (c.terms?.score > 0) parts.push('terms:' + c.terms.summary);
101
+ if (parts.length === 0) parts.push('Clean');
102
+ return parts.join('; ');
103
+ }
104
+
105
+ for (const site of SITES) {
106
+ await scanSite(site);
107
+ }