@browserless/goto 10.9.16 → 10.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -1,14 +1,18 @@
1
1
  <div align="center">
2
- <br>
3
2
  <img style="width: 500px; margin:3rem 0 1.5rem;" src="https://github.com/microlinkhq/browserless/raw/master/static/logo-banner.png#gh-light-mode-only" alt="browserless">
4
3
  <img style="width: 500px; margin:3rem 0 1.5rem;" src="https://github.com/microlinkhq/browserless/raw/master/static/logo-banner-light.png#gh-dark-mode-only" alt="browserless">
5
- <br>
6
- <br>
7
- <p align="center"><strong>@browserless/goto</strong>: Go to a page aborting unnecessary requests.</p>
8
- <p align="center">See <a href="https://browserless.js.org/#%2F%3Fid=gotopage-options" target='_blank' rel='noopener noreferrer'>goto</a> section our <a href="https://browserless.js.org" target='_blank' rel='noopener noreferrer'>website</a> for more information.</p>
9
- <br>
4
+ <br><br>
5
+ <a href="https://microlink.io"><img src="https://img.shields.io/badge/powered_by-microlink.io-blue?style=flat-square&color=%23EA407B" alt="Powered by microlink.io"></a>
6
+ <img src="https://img.shields.io/github/tag/microlinkhq/browserless.svg?style=flat-square" alt="Last version">
7
+ <a href="https://coveralls.io/github/microlinkhq/browserless"><img src="https://img.shields.io/coveralls/microlinkhq/browserless.svg?style=flat-square" alt="Coverage Status"></a>
8
+ <a href="https://www.npmjs.org/package/browserless"><img src="https://img.shields.io/npm/dm/browserless.svg?style=flat-square" alt="NPM Status"></a>
9
+ <br><br>
10
10
  </div>
11
11
 
12
+ > @browserless/goto: Go to a page aborting unnecessary requests.
13
+
14
+ See the [Go To section](https://browserless.js.org/#/?id=gotopage-options) on our website for more details.
15
+
12
16
  ## Install
13
17
 
14
18
  Using npm:
@@ -17,6 +21,192 @@ Using npm:
17
21
  npm install @browserless/goto --save
18
22
  ```
19
23
 
24
+ ## About
25
+
26
+ This package provides **advanced page navigation** with built-in ad blocking, smart waiting strategies, and extensive customization options. It's the core navigation engine that powers all browserless page loading operations.
27
+
28
+ ### What this package does
29
+
30
+ The `@browserless/goto` package allows you to:
31
+
32
+ - **Navigate to pages** with optimized loading and smart waiting strategies
33
+ - **Block ads and trackers** using a precompiled Ghostery ad-blocker engine
34
+ - **Inject scripts, modules, and styles** into pages
35
+ - **Emulate devices** with viewport, user agent, and media features
36
+ - **Intercept and abort requests** by resource type
37
+ - **Handle cookies and authentication** seamlessly
38
+
39
+ ### Usage
40
+
41
+ ```js
42
+ const createGoto = require('@browserless/goto')
43
+ const puppeteer = require('puppeteer')
44
+
45
+ const goto = createGoto({
46
+ timeout: 30000,
47
+ defaultDevice: 'Macbook Pro 13'
48
+ })
49
+
50
+ const browser = await puppeteer.launch()
51
+ const page = await browser.newPage()
52
+
53
+ const { response, device, error } = await goto(page, {
54
+ url: 'https://example.com',
55
+ adblock: true,
56
+ waitUntil: 'auto'
57
+ })
58
+ ```
59
+
60
+ ### Options
61
+
62
+ | Option | Type | Default | Description |
63
+ |--------|------|---------|-------------|
64
+ | `url` | `string` | — | Target URL to navigate to |
65
+ | `html` | `string` | — | HTML content to render (instead of URL) |
66
+ | `adblock` | `boolean` | `true` | Enable built-in ad blocker |
67
+ | `waitUntil` | `string\|string[]` | `'auto'` | Navigation wait condition |
68
+ | `timeout` | `number` | `30000` | Navigation timeout in ms |
69
+ | `device` | `string` | `'Macbook Pro 13'` | Device to emulate |
70
+ | `headers` | `object` | `{}` | Extra HTTP headers |
71
+ | `javascript` | `boolean` | `true` | Enable/disable JavaScript |
72
+ | `animations` | `boolean` | `false` | Enable CSS animations |
73
+ | `colorScheme` | `string` | — | `'light'` or `'dark'` preference |
74
+ | `mediaType` | `string` | — | CSS media type (`'screen'`, `'print'`) |
75
+ | `timezone` | `string` | — | Timezone to emulate |
76
+ | `authenticate` | `object` | — | HTTP authentication credentials |
77
+ | `scripts` | `string\|string[]` | — | Scripts to inject |
78
+ | `modules` | `string\|string[]` | — | ES modules to inject |
79
+ | `styles` | `string\|string[]` | — | Stylesheets to inject |
80
+ | `click` | `string\|string[]` | — | CSS selectors to click |
81
+ | `scroll` | `string` | — | CSS selector to scroll into view |
82
+ | `abortTypes` | `string[]` | `[]` | Resource types to abort |
83
+ | `waitForSelector` | `string` | — | Wait for selector to appear |
84
+ | `waitForFunction` | `string` | — | Wait for function to return truthy |
85
+ | `waitForTimeout` | `number` | — | Wait for specified milliseconds |
86
+ | `onPageRequest` | `function` | — | Request interception handler |
87
+
88
+ ### Smart waiting with `waitUntil: 'auto'`
89
+
90
+ The default `'auto'` mode intelligently waits for page readiness:
91
+
92
+ ```js
93
+ // Auto mode combines 'load' with 'networkidle2' smartly
94
+ await goto(page, { url: 'https://example.com', waitUntil: 'auto' })
95
+
96
+ // Standard Puppeteer wait conditions also supported
97
+ await goto(page, { url: 'https://example.com', waitUntil: 'networkidle0' })
98
+ await goto(page, { url: 'https://example.com', waitUntil: ['load', 'domcontentloaded'] })
99
+ ```
100
+
101
+ ### Built-in Ad Blocker
102
+
103
+ The package includes a precompiled [Ghostery ad-blocker](https://github.com/ghostery/adblocker) engine that blocks ads and trackers automatically:
104
+
105
+ ```js
106
+ // Enabled by default
107
+ await goto(page, { url: 'https://example.com', adblock: true })
108
+
109
+ // Disable for specific pages
110
+ await goto(page, { url: 'https://example.com', adblock: false })
111
+
112
+ // The adblocker can be disabled mid-session
113
+ page.disableAdblock()
114
+ ```
115
+
116
+ Cookie consent handling (opt-out) is included as part of `adblock: true` and powered by [duckduckgo/autoconsent](https://github.com/duckduckgo/autoconsent).
117
+
118
+ ### Script and style injection
119
+
120
+ Inject external resources or inline code:
121
+
122
+ ```js
123
+ await goto(page, {
124
+ url: 'https://example.com',
125
+ // External URLs
126
+ scripts: ['https://cdn.example.com/library.js'],
127
+ // ES modules
128
+ modules: ['https://cdn.example.com/module.mjs'],
129
+ // CSS (URLs, paths, or inline)
130
+ styles: [
131
+ 'https://cdn.example.com/styles.css',
132
+ 'body { background: red; }'
133
+ ]
134
+ })
135
+ ```
136
+
137
+ ### Request interception
138
+
139
+ Abort specific resource types to speed up navigation:
140
+
141
+ ```js
142
+ await goto(page, {
143
+ url: 'https://example.com',
144
+ abortTypes: ['image', 'stylesheet', 'font', 'media'],
145
+ onPageRequest: (request, page) => {
146
+ console.log('Request:', request.url())
147
+ }
148
+ })
149
+ ```
150
+
151
+ ### Device emulation
152
+
153
+ ```js
154
+ // Use preset device
155
+ await goto(page, { url: 'https://example.com', device: 'iPhone 13' })
156
+
157
+ // Custom viewport
158
+ await goto(page, {
159
+ url: 'https://example.com',
160
+ viewport: { width: 1920, height: 1080 }
161
+ })
162
+
163
+ // Custom headers
164
+ await goto(page, {
165
+ url: 'https://example.com',
166
+ headers: {
167
+ 'user-agent': 'custom-agent',
168
+ 'cookie': 'session=abc123'
169
+ }
170
+ })
171
+ ```
172
+
173
+ ### Return value
174
+
175
+ The `goto` function returns:
176
+
177
+ ```js
178
+ const { response, device, error } = await goto(page, { url })
179
+
180
+ // response: Puppeteer Response object (or undefined if navigation failed)
181
+ // device: { userAgent, viewport } used for the request
182
+ // error: Error object if navigation failed
183
+ ```
184
+
185
+ ### How it fits in the monorepo
186
+
187
+ This is the **core navigation engine** used by the entire browserless ecosystem:
188
+
189
+ | Consumer | Purpose |
190
+ |----------|---------|
191
+ | `browserless` (core) | Powers `.goto()`, `.html()`, `.text()`, `.pdf()`, `.screenshot()` |
192
+ | `@browserless/screenshot` | Navigation before capturing screenshots |
193
+ | `@browserless/pdf` | Navigation before generating PDFs |
194
+ | `@browserless/function` | Navigation for sandboxed function execution |
195
+ | `@browserless/lighthouse` | Navigation for Lighthouse audits |
196
+
197
+ ### Dependencies
198
+
199
+ | Package | Purpose |
200
+ |---------|---------|
201
+ | `@browserless/devices` | Device descriptor lookups and emulation |
202
+ | `@ghostery/adblocker-puppeteer` | Ad and tracker blocking |
203
+ | `debug-logfmt` | Structured debug logging |
204
+ | `got` | HTTP client for postinstall script |
205
+ | `is-url-http` | Detect if value is URL for injection |
206
+ | `p-reflect` / `p-timeout` | Promise utilities for timeouts |
207
+ | `shallow-equal` | Viewport comparison optimization |
208
+ | `tough-cookie` | Cookie string parsing |
209
+
20
210
  ## License
21
211
 
22
212
  **@browserless/goto** © [Microlink](https://microlink.io), released under the [MIT](https://github.com/microlinkhq/browserless/blob/master/LICENSE.md) License.<br>
package/package.json CHANGED
@@ -1,8 +1,8 @@
1
1
  {
2
2
  "name": "@browserless/goto",
3
- "description": "Go to a page aborting unnecessary requests",
3
+ "description": "Navigate to web pages with built-in ad blocking, device emulation, and optimized loading for faster automation.",
4
4
  "homepage": "https://browserless.js.org/#/?id=gotopage-options",
5
- "version": "10.9.16",
5
+ "version": "10.10.0",
6
6
  "main": "src/index.js",
7
7
  "author": {
8
8
  "email": "hello@microlink.io",
@@ -18,18 +18,20 @@
18
18
  "url": "https://github.com/microlinkhq/browserless/issues"
19
19
  },
20
20
  "keywords": [
21
- "browser",
22
21
  "browserless",
23
- "chrome",
24
- "chromeless",
25
- "core",
26
22
  "goto",
23
+ "navigation",
27
24
  "headless",
28
- "page",
29
- "puppeteer"
25
+ "chrome",
26
+ "puppeteer",
27
+ "adblock",
28
+ "automation",
29
+ "device-emulation",
30
+ "performance"
30
31
  ],
31
32
  "dependencies": {
32
- "@browserless/devices": "^10.7.13",
33
+ "@browserless/devices": "^10.9.18",
34
+ "@duckduckgo/autoconsent": "~14.53.0",
33
35
  "@ghostery/adblocker-puppeteer": "~2.13.4",
34
36
  "debug-logfmt": "~1.4.7",
35
37
  "got": "~11.8.6",
@@ -40,7 +42,7 @@
40
42
  "tough-cookie": "~6.0.0"
41
43
  },
42
44
  "devDependencies": {
43
- "@browserless/test": "^10.9.16",
45
+ "@browserless/test": "^10.9.18",
44
46
  "ava": "5",
45
47
  "p-wait-for": "3"
46
48
  },
@@ -64,5 +66,5 @@
64
66
  "timeout": "2m",
65
67
  "workerThreads": false
66
68
  },
67
- "gitHead": "b40c9ffbb5d346ff1f09857ca12fa1c876b92849"
69
+ "gitHead": "9b80e677418f2defb804d39043680fe65e3e277b"
68
70
  }
package/src/adblock.js ADDED
@@ -0,0 +1,109 @@
1
+ 'use strict'
2
+
3
+ const { PuppeteerBlocker } = require('@ghostery/adblocker-puppeteer')
4
+ const path = require('path')
5
+ const fs = require('fs')
6
+
7
+ const debug = require('debug-logfmt')('browserless:goto:adblock')
8
+
9
+ const engine = PuppeteerBlocker.deserialize(
10
+ new Uint8Array(fs.readFileSync(path.resolve(__dirname, './engine.bin')))
11
+ )
12
+
13
+ engine.on('request-blocked', ({ url }) => debug('block', url))
14
+ engine.on('request-redirected', ({ url }) => debug('redirect', url))
15
+
16
+ /**
17
+ * autoconsent.playwright.js is the only browser-injectable IIFE bundle in the package.
18
+ * It is not in the package's "exports" map, so pin @duckduckgo/autoconsent with ~ to
19
+ * avoid breakage from internal restructuring on minor/patch bumps.
20
+ */
21
+ const autoconsentPlaywrightScript = fs.readFileSync(
22
+ path.resolve(
23
+ path.dirname(require.resolve('@duckduckgo/autoconsent')),
24
+ 'autoconsent.playwright.js'
25
+ ),
26
+ 'utf8'
27
+ )
28
+
29
+ /* Configuration passed to autoconsent's `initResp` message.
30
+ See https://github.com/duckduckgo/autoconsent/blob/main/api.md */
31
+ const autoconsentConfig = Object.freeze({
32
+ /* activate consent rule matching */
33
+ enabled: true,
34
+ /* automatically reject (opt-out) all cookies */
35
+ autoAction: 'optOut',
36
+ /* hide banners early via CSS before detection finishes */
37
+ enablePrehide: true,
38
+ /* apply CSS-only rules that hide popups lacking a reject button */
39
+ enableCosmeticRules: true,
40
+ /* skip bundled ABP/uBO cosmetic filter list (saves bundle size) */
41
+ enableFilterList: false,
42
+ /* how many times to retry CMP detection (~50 ms apart) */
43
+ detectRetries: 20,
44
+ logs: {
45
+ /* CMP detection / opt-out lifecycle events */
46
+ lifecycle: false,
47
+ /* individual rule step execution */
48
+ rulesteps: false,
49
+ /* eval snippet calls */
50
+ evals: false,
51
+ /* rule errors */
52
+ errors: false,
53
+ /* background ↔ content-script messages */
54
+ messages: false
55
+ }
56
+ })
57
+
58
+ const sendMessage = (page, message) =>
59
+ page
60
+ .evaluate(msg => {
61
+ if (window.autoconsentReceiveMessage) {
62
+ return window.autoconsentReceiveMessage(msg)
63
+ }
64
+ }, message)
65
+ .catch(() => {})
66
+
67
+ const setupAutoConsent = async page => {
68
+ if (page._autoconsentSetup) return
69
+
70
+ await page.exposeFunction('autoconsentSendMessage', async message => {
71
+ if (!message || typeof message !== 'object') return
72
+
73
+ if (message.type === 'init') {
74
+ return sendMessage(page, { type: 'initResp', config: autoconsentConfig })
75
+ }
76
+
77
+ if (message.type === 'eval') {
78
+ return sendMessage(page, { type: 'evalResp', id: message.id, result: false })
79
+ }
80
+ })
81
+
82
+ await page.evaluateOnNewDocument(autoconsentPlaywrightScript)
83
+ page._autoconsentSetup = true
84
+ }
85
+
86
+ const runAutoConsent = page => page.evaluate(autoconsentPlaywrightScript)
87
+
88
+ const enableBlockingInPage = (page, run, actionTimeout) => {
89
+ page.disableAdblock = () =>
90
+ engine
91
+ .disableBlockingInPage(page, { keepRequestInterception: true })
92
+ .then(() => debug('disabled'))
93
+ .catch(() => {})
94
+
95
+ return [
96
+ run({
97
+ fn: setupAutoConsent(page),
98
+ timeout: actionTimeout,
99
+ debug: 'autoconsent:setup'
100
+ }),
101
+ run({
102
+ fn: engine.enableBlockingInPage(page),
103
+ timeout: actionTimeout,
104
+ debug: 'adblock'
105
+ })
106
+ ]
107
+ }
108
+
109
+ module.exports = { enableBlockingInPage, runAutoConsent }
package/src/index.js CHANGED
@@ -1,6 +1,5 @@
1
1
  'use strict'
2
2
 
3
- const { PuppeteerBlocker } = require('@ghostery/adblocker-puppeteer')
4
3
  const { shallowEqualObjects } = require('shallow-equal')
5
4
  const { setTimeout } = require('node:timers/promises')
6
5
  const createDevices = require('@browserless/devices')
@@ -8,25 +7,17 @@ const toughCookie = require('tough-cookie')
8
7
  const pReflect = require('p-reflect')
9
8
  const pTimeout = require('p-timeout')
10
9
  const isUrl = require('is-url-http')
11
- const path = require('path')
12
- const fs = require('fs')
13
10
 
14
11
  const { DEFAULT_INTERCEPT_RESOLUTION_PRIORITY } = require('puppeteer')
15
12
 
13
+ const adblock = require('./adblock')
14
+
16
15
  const debug = require('debug-logfmt')('browserless:goto')
17
16
  debug.continue = require('debug-logfmt')('browserless:goto:continue')
18
17
  debug.abort = require('debug-logfmt')('browserless:goto:abort')
19
- debug.adblock = require('debug-logfmt')('browserless:goto:adblock')
20
18
 
21
19
  const truncate = (str, n = 80) => (str.length > n ? str.substr(0, n - 1) + '…' : str)
22
20
 
23
- const engine = PuppeteerBlocker.deserialize(
24
- new Uint8Array(fs.readFileSync(path.resolve(__dirname, './engine.bin')))
25
- )
26
-
27
- engine.on('request-blocked', ({ url }) => debug.adblock('block', url))
28
- engine.on('request-redirected', ({ url }) => debug.adblock('redirect', url))
29
-
30
21
  const isEmpty = val => val == null || !(Object.keys(val) || val).length
31
22
 
32
23
  const castArray = value => [].concat(value).filter(Boolean)
@@ -181,7 +172,7 @@ module.exports = ({ defaultDevice = 'Macbook Pro 13', timeout: globalTimeout, ..
181
172
  page,
182
173
  {
183
174
  abortTypes = [],
184
- adblock = true,
175
+ adblock: withAdblock = true,
185
176
  animations = false,
186
177
  authenticate,
187
178
  click,
@@ -268,33 +259,8 @@ module.exports = ({ defaultDevice = 'Macbook Pro 13', timeout: globalTimeout, ..
268
259
  })
269
260
  }
270
261
 
271
- if (adblock) {
272
- let adblockContext
273
-
274
- page.disableAdblock = () => {
275
- // TODO: drop this when https://github.com/ghostery/adblocker/pull/5161 is merged
276
-
277
- engine.contexts.delete(page)
278
-
279
- if (adblockContext.blocker.config.loadNetworkFilters) {
280
- adblockContext.page.off('request', adblockContext.onRequest)
281
- }
282
-
283
- if (adblockContext.blocker.config.loadCosmeticFilters) {
284
- adblockContext.page.off('frameattached', adblockContext.onFrameNavigated)
285
- adblockContext.page.off('domcontentloaded', adblockContext.onDomContentLoaded)
286
- }
287
-
288
- debug.adblock('disabled')
289
- }
290
-
291
- prePromises.push(
292
- run({
293
- fn: engine.enableBlockingInPage(page).then(context => (adblockContext = context)),
294
- timeout: actionTimeout,
295
- debug: 'adblock'
296
- })
297
- )
262
+ if (withAdblock) {
263
+ prePromises.push(...adblock.enableBlockingInPage(page, run, actionTimeout))
298
264
  }
299
265
 
300
266
  if (javascript === false) {
@@ -398,6 +364,14 @@ module.exports = ({ defaultDevice = 'Macbook Pro 13', timeout: globalTimeout, ..
398
364
  debug: { fn: html ? 'html' : 'url', waitUntil }
399
365
  })
400
366
 
367
+ if (withAdblock) {
368
+ await run({
369
+ fn: adblock.runAutoConsent(page),
370
+ timeout: actionTimeout,
371
+ debug: 'autoconsent:run'
372
+ })
373
+ }
374
+
401
375
  for (const [key, value] of Object.entries({
402
376
  waitForSelector,
403
377
  waitForFunction