barebrowse 0.10.0 → 0.10.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +43 -0
- package/barebrowse.context.md +3 -3
- package/package.json +1 -1
- package/src/blocklist.js +12 -0
- package/src/index.js +21 -4
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,48 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## 0.10.1
|
|
4
|
+
|
|
5
|
+
### Blocklist long-tail additions + legacy-Chrome warn + switchTab attach-mode test
|
|
6
|
+
|
|
7
|
+
Carry-forward items from the v0.10.0 backlog. All additive, no behavior
|
|
8
|
+
change on supported Chrome.
|
|
9
|
+
|
|
10
|
+
- **8 new patterns in `src/blocklist.js`** (120 → 128, still in the
|
|
11
|
+
curated 80–200 band):
|
|
12
|
+
- Mobile-measurement-on-web cluster (increasingly served from web
|
|
13
|
+
pages, not just SDKs): `*.appsflyer.com`, `*.branch.io`,
|
|
14
|
+
`*.adjust.com`.
|
|
15
|
+
- Privacy-friendly analytics that still tracks from an agent POV:
|
|
16
|
+
`static.cloudflareinsights.com` (Cloudflare Web Analytics),
|
|
17
|
+
`*.matomo.cloud` (Matomo Cloud's hosted tier).
|
|
18
|
+
- Broader Outbrain coverage: `amplify.outbrain.com`,
|
|
19
|
+
`log.outbrain.com` (in addition to the existing
|
|
20
|
+
`widgets.outbrain.com` and `*.outbrain.com/utils/*`).
|
|
21
|
+
- Broader PostHog: `*.posthog.com/static/array.js*` (the snippet
|
|
22
|
+
loader, in addition to the existing `/e/` and `/decide/` endpoints).
|
|
23
|
+
- **One-time `console.warn` when `Network.setBlockedURLs` rejects.**
|
|
24
|
+
Legacy Chromium builds lacking the method previously failed silently
|
|
25
|
+
inside `applyBlocklist`; now a single warn per process surfaces the
|
|
26
|
+
reason so callers don't wonder why blocking isn't engaging. Stays
|
|
27
|
+
silent on supported Chrome (success path), stays silent when
|
|
28
|
+
`blockAds: false` opts out entirely. Module-scoped flag —
|
|
29
|
+
intentionally not per-session, since the failure mode is the
|
|
30
|
+
browser, not the session.
|
|
31
|
+
- **`switchTab()` + `blockAds:true` attach-mode integration test.**
|
|
32
|
+
The v0.10.0 JSDoc claimed blocklist follows `switchTab()` in attach
|
|
33
|
+
mode but had no automated guard. New test in
|
|
34
|
+
`test/integration/blocklist.test.js` launches a real browser, opens
|
|
35
|
+
a second tab via raw CDP (bypassing barebrowse so the tab simulates
|
|
36
|
+
one the user already had open), attaches with explicit
|
|
37
|
+
`blockAds: true` + `blockUrls: [pattern]`, switches into that tab,
|
|
38
|
+
and asserts the tracker server gets zero hits and the tracker script
|
|
39
|
+
never executed. Locks in the post-switch `applyBlocklist` call site
|
|
40
|
+
that was added in v0.10.0.
|
|
41
|
+
- **Tests:** 143 total (5 new). 4 new unit tests in
|
|
42
|
+
`test/unit/blocklist.test.js` (long-tail coverage drift guard +
|
|
43
|
+
3-subtest warn-once suite covering rejection, success path, and
|
|
44
|
+
opted-out paths); 1 new integration test as above.
|
|
45
|
+
|
|
3
46
|
## 0.10.0
|
|
4
47
|
|
|
5
48
|
### Ad/tracker URL blocking + canvas-noise stealth + Chromium pgid reap fix
|
package/barebrowse.context.md
CHANGED
|
@@ -45,7 +45,7 @@ const snapshot = await browse('https://example.com', {
|
|
|
45
45
|
prune: true, // apply ARIA pruning (47-95% token reduction)
|
|
46
46
|
pruneMode: 'act', // 'act' (interactive elements) | 'read' (all content)
|
|
47
47
|
consent: true, // auto-dismiss cookie consent dialogs
|
|
48
|
-
blockAds: true, // block
|
|
48
|
+
blockAds: true, // block 128 ad/tracker URL patterns (default on for owned browsers)
|
|
49
49
|
blockUrls: [], // extra URL globs to block (merged with the default)
|
|
50
50
|
timeout: 30000, // navigation timeout in ms
|
|
51
51
|
});
|
|
@@ -93,7 +93,7 @@ const snapshot = await browse('https://example.com', {
|
|
|
93
93
|
- `viewport: '1280x720'` — Set viewport dimensions
|
|
94
94
|
- `storageState: 'file.json'` — Load cookies/localStorage from saved state
|
|
95
95
|
- `downloadPath: '/abs/dir'` — Where downloads land. Default: per-session `mkdtemp` under `/tmp/barebrowse-dl-*` that gets removed on `close()`. Caller-supplied paths are not cleaned up — caller owns the lifecycle.
|
|
96
|
-
- `blockAds: true|false` — CDP-level URL blocking of
|
|
96
|
+
- `blockAds: true|false` — CDP-level URL blocking of 128 common ad/tracker patterns (Google ads/analytics, FB/Amazon/MS/Adobe ad+analytics, Segment/Amplitude/Mixpanel/Heap/PostHog, Hotjar/FullStory/LogRocket, Criteo/Taboola/Outbrain, the consumer-pixel cluster, AppNexus/Rubicon/PubMatic supply, marketing automation; v0.10.1 added AppsFlyer/Branch/Adjust, Cloudflare Web Analytics, Matomo Cloud). Default `true` for launched browsers, `false` in attach mode (would affect any tab in the user's running browser). Explicit `true` in attach mode is honored and follows the session across `switchTab()` (regression-tested). Shrinks ARIA snapshots and speeds page loads. On legacy Chromium lacking `Network.setBlockedURLs` a one-time `console.warn` surfaces the fallback.
|
|
97
97
|
- `blockUrls: ['*://foo.com/*', ...]` — Extra glob patterns (CDP `Network.setBlockedURLs` format) to block in addition to the default. Merged with the default unless `blockAds: false`.
|
|
98
98
|
|
|
99
99
|
## Snapshot format
|
|
@@ -166,7 +166,7 @@ barebrowse can inject cookies from the user's real browser sessions, bypassing l
|
|
|
166
166
|
| SPA navigation | `waitForNavigation()` uses loadEventFired + frameNavigated | Both |
|
|
167
167
|
| Bot detection | v0.9.0 (H9): Cloudflare-strong phrases ("Just a moment", "Attention Required", "verify you are human") fire alone; generic phrases ("access denied", "unknown error") only fire on near-empty pages — no more false-positive headed-launches on legitimate 4xx/5xx pages. `botBlocked` flag set after every `goto()`. Hybrid fallback switches to headed. Snapshot shows `[BOT CHALLENGE DETECTED]` warning. | Hybrid |
|
|
168
168
|
| Stealth (headless tells) | v0.9.0 (H4): `Network.setUserAgentOverride` strips "HeadlessChrome" from UA in HTTP headers AND `navigator.userAgent`; JS patches for webdriver, plugins, languages, full `chrome.runtime` enum shape, `Notification` constructor + `permission: 'default'`, `hardwareConcurrency: 8`, `deviceMemory: 8`, WebGL `UNMASKED_VENDOR_WEBGL`/`UNMASKED_RENDERER_WEBGL` spoofed to Intel. v0.10.0: canvas fingerprint noise — `toDataURL`/`getImageData` XOR a per-session `crypto.getRandomValues`-seeded mask into ~1 byte per 64-byte stride (stable within a session, different across sessions; bitmap is restored after encoding so legitimate canvas use is unaffected). | Headless |
|
|
169
|
-
| Ad / tracker URL blocking | v0.10.0: CDP `Network.setBlockedURLs` with
|
|
169
|
+
| Ad / tracker URL blocking | v0.10.0: CDP `Network.setBlockedURLs` with 128 curated patterns (Google/FB/Amazon/MS/Adobe ad+analytics, the major SaaS analytics + session-replay stacks, content-rec, supply-side ad networks, marketing automation). v0.10.1 added long-tail: AppsFlyer/Branch/Adjust, Cloudflare Web Analytics, Matomo Cloud, broader Outbrain (`amplify`/`log`) and PostHog (`/static/array.js`). Default on for launched browsers, off in attach mode. `opts.blockUrls` extends; `opts.blockAds: false` disables. Shrinks ARIA snapshots and speeds loads. v0.10.1: regression-tested across `switchTab()` in attach mode; one-time `console.warn` if Chromium lacks the CDP method. | Launched |
|
|
170
170
|
| iframe / OOPIF content (Stripe, reCAPTCHA, embedded forms) | v0.9.0 (H2): `Target.setAutoAttach({flatten:true})` registers a CDP session per iframe; `ariaTree()` walks `Page.getFrameTree`, fetches each frame's AX tree on the right session, splices children under iframe placeholders via `DOM.getFrameOwner`. Refs route via `{session, backendNodeId}` so clicks dispatch in the iframe's Input domain. `--site-per-process` launch flag forces every iframe — including same-origin — into OOPIF so coords work. | Both |
|
|
171
171
|
| Downloads | v0.9.0 (H7): `Browser.setDownloadBehavior({behavior:'allowAndName', downloadPath, eventsEnabled:true})` + listeners populate `page.downloads`. Files land at `savedPath` (under `--download-path` if supplied, else per-session `/tmp/barebrowse-dl-*`). | Headless + Headed (skipped in attach mode) |
|
|
172
172
|
| Profile locking | Unique temp dir per headless instance | Headless |
|
package/package.json
CHANGED
package/src/blocklist.js
CHANGED
|
@@ -99,6 +99,8 @@ export const DEFAULT_BLOCKLIST = [
|
|
|
99
99
|
'*://trc.taboola.com/*',
|
|
100
100
|
'*://widgets.outbrain.com/*',
|
|
101
101
|
'*://*.outbrain.com/utils/*',
|
|
102
|
+
'*://amplify.outbrain.com/*',
|
|
103
|
+
'*://log.outbrain.com/*',
|
|
102
104
|
|
|
103
105
|
// --- Tealium / Marketo / Pardot / Salesforce marketing ---
|
|
104
106
|
'*://tags.tiqcdn.com/*',
|
|
@@ -152,6 +154,7 @@ export const DEFAULT_BLOCKLIST = [
|
|
|
152
154
|
'*://heapanalytics.com/h*',
|
|
153
155
|
'*://*.posthog.com/e/*',
|
|
154
156
|
'*://*.posthog.com/decide/*',
|
|
157
|
+
'*://*.posthog.com/static/array.js*',
|
|
155
158
|
|
|
156
159
|
// --- Marketing automation ---
|
|
157
160
|
'*://track.hubspot.com/*',
|
|
@@ -170,6 +173,15 @@ export const DEFAULT_BLOCKLIST = [
|
|
|
170
173
|
'*://sessions.bugsnag.com/*',
|
|
171
174
|
'*://notify.bugsnag.com/*',
|
|
172
175
|
|
|
176
|
+
// --- Mobile-measurement (increasingly served on web too) ---
|
|
177
|
+
'*://*.appsflyer.com/*',
|
|
178
|
+
'*://*.branch.io/*',
|
|
179
|
+
'*://*.adjust.com/*',
|
|
180
|
+
|
|
181
|
+
// --- Privacy-friendly analytics (still trackers from an agent POV) ---
|
|
182
|
+
'*://static.cloudflareinsights.com/*',
|
|
183
|
+
'*://*.matomo.cloud/*',
|
|
184
|
+
|
|
173
185
|
// --- Misc widely-deployed ad networks ---
|
|
174
186
|
'*://*.adnxs.com/*', // AppNexus / Xandr
|
|
175
187
|
'*://*.rubiconproject.com/*',
|
package/src/index.js
CHANGED
|
@@ -758,12 +758,21 @@ async function attachToExistingTarget(cdp, targetId, pageOpts = {}) {
|
|
|
758
758
|
return { session, targetId, sessionId, framesByFrameId };
|
|
759
759
|
}
|
|
760
760
|
|
|
761
|
+
// One-time warn flag for Network.setBlockedURLs reject. Module-scoped so the
|
|
762
|
+
// warn fires once per process across every session — legacy Chrome will keep
|
|
763
|
+
// rejecting and we don't want to spam.
|
|
764
|
+
let blocklistWarned = false;
|
|
765
|
+
|
|
761
766
|
/**
|
|
762
767
|
* Apply Network.setBlockedURLs for ad/tracker blocking on a session.
|
|
763
768
|
* Default list is on; pass blockAds:false to skip, blockUrls:[] to extend.
|
|
764
|
-
*
|
|
769
|
+
* On failure (legacy Chrome lacking the method) warns once and continues —
|
|
770
|
+
* blocking is an enhancement, not a hard requirement.
|
|
771
|
+
*
|
|
772
|
+
* Exported for unit testing of the warn-once behavior; not part of the public
|
|
773
|
+
* API surface.
|
|
765
774
|
*/
|
|
766
|
-
async function applyBlocklist(session, pageOpts) {
|
|
775
|
+
export async function applyBlocklist(session, pageOpts) {
|
|
767
776
|
if (pageOpts.blockAds === false && !pageOpts.blockUrls) return;
|
|
768
777
|
const patterns = pageOpts.blockAds === false
|
|
769
778
|
? (pageOpts.blockUrls || [])
|
|
@@ -771,11 +780,19 @@ async function applyBlocklist(session, pageOpts) {
|
|
|
771
780
|
if (!patterns.length) return;
|
|
772
781
|
try {
|
|
773
782
|
await session.send('Network.setBlockedURLs', { urls: patterns });
|
|
774
|
-
} catch {
|
|
775
|
-
|
|
783
|
+
} catch (err) {
|
|
784
|
+
if (!blocklistWarned) {
|
|
785
|
+
blocklistWarned = true;
|
|
786
|
+
console.warn(`barebrowse: Network.setBlockedURLs unsupported — ad/tracker blocking disabled (${err.message})`);
|
|
787
|
+
}
|
|
776
788
|
}
|
|
777
789
|
}
|
|
778
790
|
|
|
791
|
+
/** Test-only: reset the warn-once flag. Not part of the public API. */
|
|
792
|
+
export function _resetBlocklistWarning() {
|
|
793
|
+
blocklistWarned = false;
|
|
794
|
+
}
|
|
795
|
+
|
|
779
796
|
/**
|
|
780
797
|
* Navigate to a URL and wait for the page to load.
|
|
781
798
|
*/
|