@browserless/goto 10.9.18 → 10.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -113,6 +113,8 @@ await goto(page, { url: 'https://example.com', adblock: false })
113
113
  page.disableAdblock()
114
114
  ```
115
115
 
116
+ Cookie consent handling (opt-out) is included as part of `adblock: true` and powered by [duckduckgo/autoconsent](https://github.com/duckduckgo/autoconsent).
117
+
116
118
  ### Script and style injection
117
119
 
118
120
  Inject external resources or inline code:
package/package.json CHANGED
@@ -2,7 +2,7 @@
2
2
  "name": "@browserless/goto",
3
3
  "description": "Navigate to web pages with built-in ad blocking, device emulation, and optimized loading for faster automation.",
4
4
  "homepage": "https://browserless.js.org/#/?id=gotopage-options",
5
- "version": "10.9.18",
5
+ "version": "10.10.0",
6
6
  "main": "src/index.js",
7
7
  "author": {
8
8
  "email": "hello@microlink.io",
@@ -31,6 +31,7 @@
31
31
  ],
32
32
  "dependencies": {
33
33
  "@browserless/devices": "^10.9.18",
34
+ "@duckduckgo/autoconsent": "~14.53.0",
34
35
  "@ghostery/adblocker-puppeteer": "~2.13.4",
35
36
  "debug-logfmt": "~1.4.7",
36
37
  "got": "~11.8.6",
@@ -65,5 +66,5 @@
65
66
  "timeout": "2m",
66
67
  "workerThreads": false
67
68
  },
68
- "gitHead": "f5e8cd0788e4bad3b3ad9b007943754f96653817"
69
+ "gitHead": "9b80e677418f2defb804d39043680fe65e3e277b"
69
70
  }
package/src/adblock.js ADDED
@@ -0,0 +1,109 @@
1
+ 'use strict'
2
+
3
+ const { PuppeteerBlocker } = require('@ghostery/adblocker-puppeteer')
4
+ const path = require('path')
5
+ const fs = require('fs')
6
+
7
+ const debug = require('debug-logfmt')('browserless:goto:adblock')
8
+
9
+ const engine = PuppeteerBlocker.deserialize(
10
+ new Uint8Array(fs.readFileSync(path.resolve(__dirname, './engine.bin')))
11
+ )
12
+
13
+ engine.on('request-blocked', ({ url }) => debug('block', url))
14
+ engine.on('request-redirected', ({ url }) => debug('redirect', url))
15
+
16
+ /**
17
+ * autoconsent.playwright.js is the only browser-injectable IIFE bundle in the package.
18
+ * It is not in the package's "exports" map, so pin @duckduckgo/autoconsent with ~ to
19
+ * avoid breakage from internal restructuring on minor/patch bumps.
20
+ */
21
+ const autoconsentPlaywrightScript = fs.readFileSync(
22
+ path.resolve(
23
+ path.dirname(require.resolve('@duckduckgo/autoconsent')),
24
+ 'autoconsent.playwright.js'
25
+ ),
26
+ 'utf8'
27
+ )
28
+
29
+ /* Configuration passed to autoconsent's `initResp` message.
30
+ See https://github.com/duckduckgo/autoconsent/blob/main/api.md */
31
+ const autoconsentConfig = Object.freeze({
32
+ /* activate consent rule matching */
33
+ enabled: true,
34
+ /* automatically reject (opt-out) all cookies */
35
+ autoAction: 'optOut',
36
+ /* hide banners early via CSS before detection finishes */
37
+ enablePrehide: true,
38
+ /* apply CSS-only rules that hide popups lacking a reject button */
39
+ enableCosmeticRules: true,
40
+ /* skip bundled ABP/uBO cosmetic filter list (saves bundle size) */
41
+ enableFilterList: false,
42
+ /* how many times to retry CMP detection (~50 ms apart) */
43
+ detectRetries: 20,
44
+ logs: {
45
+ /* CMP detection / opt-out lifecycle events */
46
+ lifecycle: false,
47
+ /* individual rule step execution */
48
+ rulesteps: false,
49
+ /* eval snippet calls */
50
+ evals: false,
51
+ /* rule errors */
52
+ errors: false,
53
+ /* background ↔ content-script messages */
54
+ messages: false
55
+ }
56
+ })
57
+
58
+ const sendMessage = (page, message) =>
59
+ page
60
+ .evaluate(msg => {
61
+ if (window.autoconsentReceiveMessage) {
62
+ return window.autoconsentReceiveMessage(msg)
63
+ }
64
+ }, message)
65
+ .catch(() => {})
66
+
67
+ const setupAutoConsent = async page => {
68
+ if (page._autoconsentSetup) return
69
+
70
+ await page.exposeFunction('autoconsentSendMessage', async message => {
71
+ if (!message || typeof message !== 'object') return
72
+
73
+ if (message.type === 'init') {
74
+ return sendMessage(page, { type: 'initResp', config: autoconsentConfig })
75
+ }
76
+
77
+ if (message.type === 'eval') {
78
+ return sendMessage(page, { type: 'evalResp', id: message.id, result: false })
79
+ }
80
+ })
81
+
82
+ await page.evaluateOnNewDocument(autoconsentPlaywrightScript)
83
+ page._autoconsentSetup = true
84
+ }
85
+
86
+ const runAutoConsent = page => page.evaluate(autoconsentPlaywrightScript)
87
+
88
+ const enableBlockingInPage = (page, run, actionTimeout) => {
89
+ page.disableAdblock = () =>
90
+ engine
91
+ .disableBlockingInPage(page, { keepRequestInterception: true })
92
+ .then(() => debug('disabled'))
93
+ .catch(() => {})
94
+
95
+ return [
96
+ run({
97
+ fn: setupAutoConsent(page),
98
+ timeout: actionTimeout,
99
+ debug: 'autoconsent:setup'
100
+ }),
101
+ run({
102
+ fn: engine.enableBlockingInPage(page),
103
+ timeout: actionTimeout,
104
+ debug: 'adblock'
105
+ })
106
+ ]
107
+ }
108
+
109
+ module.exports = { enableBlockingInPage, runAutoConsent }
package/src/index.js CHANGED
@@ -1,6 +1,5 @@
1
1
  'use strict'
2
2
 
3
- const { PuppeteerBlocker } = require('@ghostery/adblocker-puppeteer')
4
3
  const { shallowEqualObjects } = require('shallow-equal')
5
4
  const { setTimeout } = require('node:timers/promises')
6
5
  const createDevices = require('@browserless/devices')
@@ -8,25 +7,17 @@ const toughCookie = require('tough-cookie')
8
7
  const pReflect = require('p-reflect')
9
8
  const pTimeout = require('p-timeout')
10
9
  const isUrl = require('is-url-http')
11
- const path = require('path')
12
- const fs = require('fs')
13
10
 
14
11
  const { DEFAULT_INTERCEPT_RESOLUTION_PRIORITY } = require('puppeteer')
15
12
 
13
+ const adblock = require('./adblock')
14
+
16
15
  const debug = require('debug-logfmt')('browserless:goto')
17
16
  debug.continue = require('debug-logfmt')('browserless:goto:continue')
18
17
  debug.abort = require('debug-logfmt')('browserless:goto:abort')
19
- debug.adblock = require('debug-logfmt')('browserless:goto:adblock')
20
18
 
21
19
  const truncate = (str, n = 80) => (str.length > n ? str.substr(0, n - 1) + '…' : str)
22
20
 
23
- const engine = PuppeteerBlocker.deserialize(
24
- new Uint8Array(fs.readFileSync(path.resolve(__dirname, './engine.bin')))
25
- )
26
-
27
- engine.on('request-blocked', ({ url }) => debug.adblock('block', url))
28
- engine.on('request-redirected', ({ url }) => debug.adblock('redirect', url))
29
-
30
21
  const isEmpty = val => val == null || !(Object.keys(val) || val).length
31
22
 
32
23
  const castArray = value => [].concat(value).filter(Boolean)
@@ -181,7 +172,7 @@ module.exports = ({ defaultDevice = 'Macbook Pro 13', timeout: globalTimeout, ..
181
172
  page,
182
173
  {
183
174
  abortTypes = [],
184
- adblock = true,
175
+ adblock: withAdblock = true,
185
176
  animations = false,
186
177
  authenticate,
187
178
  click,
@@ -268,33 +259,8 @@ module.exports = ({ defaultDevice = 'Macbook Pro 13', timeout: globalTimeout, ..
268
259
  })
269
260
  }
270
261
 
271
- if (adblock) {
272
- let adblockContext
273
-
274
- page.disableAdblock = () => {
275
- // TODO: drop this when https://github.com/ghostery/adblocker/pull/5161 is merged
276
-
277
- engine.contexts.delete(page)
278
-
279
- if (adblockContext.blocker.config.loadNetworkFilters) {
280
- adblockContext.page.off('request', adblockContext.onRequest)
281
- }
282
-
283
- if (adblockContext.blocker.config.loadCosmeticFilters) {
284
- adblockContext.page.off('frameattached', adblockContext.onFrameNavigated)
285
- adblockContext.page.off('domcontentloaded', adblockContext.onDomContentLoaded)
286
- }
287
-
288
- debug.adblock('disabled')
289
- }
290
-
291
- prePromises.push(
292
- run({
293
- fn: engine.enableBlockingInPage(page).then(context => (adblockContext = context)),
294
- timeout: actionTimeout,
295
- debug: 'adblock'
296
- })
297
- )
262
+ if (withAdblock) {
263
+ prePromises.push(...adblock.enableBlockingInPage(page, run, actionTimeout))
298
264
  }
299
265
 
300
266
  if (javascript === false) {
@@ -398,6 +364,14 @@ module.exports = ({ defaultDevice = 'Macbook Pro 13', timeout: globalTimeout, ..
398
364
  debug: { fn: html ? 'html' : 'url', waitUntil }
399
365
  })
400
366
 
367
+ if (withAdblock) {
368
+ await run({
369
+ fn: adblock.runAutoConsent(page),
370
+ timeout: actionTimeout,
371
+ debug: 'autoconsent:run'
372
+ })
373
+ }
374
+
401
375
  for (const [key, value] of Object.entries({
402
376
  waitForSelector,
403
377
  waitForFunction