@browserless/goto 10.9.18 → 10.10.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -113,6 +113,8 @@ await goto(page, { url: 'https://example.com', adblock: false })
113
113
  page.disableAdblock()
114
114
  ```
115
115
 
116
+ Cookie consent handling (opt-out) is included as part of `adblock: true` and powered by [duckduckgo/autoconsent](https://github.com/duckduckgo/autoconsent).
117
+
116
118
  ### Script and style injection
117
119
 
118
120
  Inject external resources or inline code:
package/package.json CHANGED
@@ -2,7 +2,7 @@
2
2
  "name": "@browserless/goto",
3
3
  "description": "Navigate to web pages with built-in ad blocking, device emulation, and optimized loading for faster automation.",
4
4
  "homepage": "https://browserless.js.org/#/?id=gotopage-options",
5
- "version": "10.9.18",
5
+ "version": "10.10.1",
6
6
  "main": "src/index.js",
7
7
  "author": {
8
8
  "email": "hello@microlink.io",
@@ -31,6 +31,7 @@
31
31
  ],
32
32
  "dependencies": {
33
33
  "@browserless/devices": "^10.9.18",
34
+ "@duckduckgo/autoconsent": "~14.53.0",
34
35
  "@ghostery/adblocker-puppeteer": "~2.13.4",
35
36
  "debug-logfmt": "~1.4.7",
36
37
  "got": "~11.8.6",
@@ -65,5 +66,5 @@
65
66
  "timeout": "2m",
66
67
  "workerThreads": false
67
68
  },
68
- "gitHead": "f5e8cd0788e4bad3b3ad9b007943754f96653817"
69
+ "gitHead": "32f6e72bcb489a83ac9659520a3961aeb97c47b7"
69
70
  }
package/src/adblock.js ADDED
@@ -0,0 +1,126 @@
1
+ 'use strict'
2
+
3
+ const { PuppeteerBlocker } = require('@ghostery/adblocker-puppeteer')
4
+ const path = require('path')
5
+ const fs = require('fs')
6
+
7
+ const debug = require('debug-logfmt')('browserless:goto:adblock')
8
+
9
+ let enginePromise
10
+
11
+ const getEngine = () => {
12
+ if (enginePromise) return enginePromise
13
+
14
+ enginePromise = fs.promises.readFile(path.resolve(__dirname, './engine.bin')).then(buffer => {
15
+ const engine = PuppeteerBlocker.deserialize(new Uint8Array(buffer))
16
+ engine.on('request-blocked', ({ url }) => debug('block', url))
17
+ engine.on('request-redirected', ({ url }) => debug('redirect', url))
18
+ return engine
19
+ })
20
+
21
+ return enginePromise
22
+ }
23
+
24
+ /**
25
+ * autoconsent.playwright.js is the only browser-injectable IIFE bundle in the package.
26
+ * It is not in the package's "exports" map, so pin @duckduckgo/autoconsent with ~ to
27
+ * avoid breakage from internal restructuring on minor/patch bumps.
28
+ */
29
+ let autoconsentPlaywrightScriptPromise
30
+
31
+ const getAutoconsentPlaywrightScript = () => {
32
+ if (autoconsentPlaywrightScriptPromise) return autoconsentPlaywrightScriptPromise
33
+
34
+ autoconsentPlaywrightScriptPromise = fs.promises.readFile(
35
+ path.resolve(
36
+ path.dirname(require.resolve('@duckduckgo/autoconsent')),
37
+ 'autoconsent.playwright.js'
38
+ ),
39
+ 'utf8'
40
+ )
41
+
42
+ return autoconsentPlaywrightScriptPromise
43
+ }
44
+
45
+ /* Configuration passed to autoconsent's `initResp` message.
46
+ See https://github.com/duckduckgo/autoconsent/blob/main/api.md */
47
+ const autoconsentConfig = Object.freeze({
48
+ /* activate consent rule matching */
49
+ enabled: true,
50
+ /* automatically reject (opt-out) all cookies */
51
+ autoAction: 'optOut',
52
+ /* hide banners early via CSS before detection finishes */
53
+ enablePrehide: true,
54
+ /* apply CSS-only rules that hide popups lacking a reject button */
55
+ enableCosmeticRules: true,
56
+ /* skip bundled ABP/uBO cosmetic filter list (saves bundle size) */
57
+ enableFilterList: false,
58
+ /* how many times to retry CMP detection (~50 ms apart) */
59
+ detectRetries: 20,
60
+ logs: {
61
+ /* CMP detection / opt-out lifecycle events */
62
+ lifecycle: false,
63
+ /* individual rule step execution */
64
+ rulesteps: false,
65
+ /* eval snippet calls */
66
+ evals: false,
67
+ /* rule errors */
68
+ errors: false,
69
+ /* background ↔ content-script messages */
70
+ messages: false
71
+ }
72
+ })
73
+
74
+ const sendMessage = (page, message) =>
75
+ page
76
+ .evaluate(msg => {
77
+ if (window.autoconsentReceiveMessage) {
78
+ return window.autoconsentReceiveMessage(msg)
79
+ }
80
+ }, message)
81
+ .catch(() => {})
82
+
83
+ const setupAutoConsent = async page => {
84
+ if (page._autoconsentSetup) return
85
+ const autoconsentPlaywrightScript = await getAutoconsentPlaywrightScript()
86
+
87
+ await page.exposeFunction('autoconsentSendMessage', async message => {
88
+ if (!message || typeof message !== 'object') return
89
+
90
+ if (message.type === 'init') {
91
+ return sendMessage(page, { type: 'initResp', config: autoconsentConfig })
92
+ }
93
+
94
+ if (message.type === 'eval') {
95
+ return sendMessage(page, { type: 'evalResp', id: message.id, result: false })
96
+ }
97
+ })
98
+
99
+ await page.evaluateOnNewDocument(autoconsentPlaywrightScript)
100
+ page._autoconsentSetup = true
101
+ }
102
+
103
+ const runAutoConsent = async page => page.evaluate(await getAutoconsentPlaywrightScript())
104
+
105
+ const enableBlockingInPage = (page, run, actionTimeout) => {
106
+ page.disableAdblock = () =>
107
+ getEngine()
108
+ .then(engine => engine.disableBlockingInPage(page, { keepRequestInterception: true }))
109
+ .then(() => debug('disabled'))
110
+ .catch(() => {})
111
+
112
+ return [
113
+ run({
114
+ fn: setupAutoConsent(page),
115
+ timeout: actionTimeout,
116
+ debug: 'autoconsent:setup'
117
+ }),
118
+ run({
119
+ fn: getEngine().then(engine => engine.enableBlockingInPage(page)),
120
+ timeout: actionTimeout,
121
+ debug: 'adblock'
122
+ })
123
+ ]
124
+ }
125
+
126
+ module.exports = { enableBlockingInPage, runAutoConsent }
package/src/engine.bin CHANGED
Binary file
package/src/index.js CHANGED
@@ -1,6 +1,5 @@
1
1
  'use strict'
2
2
 
3
- const { PuppeteerBlocker } = require('@ghostery/adblocker-puppeteer')
4
3
  const { shallowEqualObjects } = require('shallow-equal')
5
4
  const { setTimeout } = require('node:timers/promises')
6
5
  const createDevices = require('@browserless/devices')
@@ -8,25 +7,17 @@ const toughCookie = require('tough-cookie')
8
7
  const pReflect = require('p-reflect')
9
8
  const pTimeout = require('p-timeout')
10
9
  const isUrl = require('is-url-http')
11
- const path = require('path')
12
- const fs = require('fs')
13
10
 
14
11
  const { DEFAULT_INTERCEPT_RESOLUTION_PRIORITY } = require('puppeteer')
15
12
 
13
+ const adblock = require('./adblock')
14
+
16
15
  const debug = require('debug-logfmt')('browserless:goto')
17
16
  debug.continue = require('debug-logfmt')('browserless:goto:continue')
18
17
  debug.abort = require('debug-logfmt')('browserless:goto:abort')
19
- debug.adblock = require('debug-logfmt')('browserless:goto:adblock')
20
18
 
21
19
  const truncate = (str, n = 80) => (str.length > n ? str.substr(0, n - 1) + '…' : str)
22
20
 
23
- const engine = PuppeteerBlocker.deserialize(
24
- new Uint8Array(fs.readFileSync(path.resolve(__dirname, './engine.bin')))
25
- )
26
-
27
- engine.on('request-blocked', ({ url }) => debug.adblock('block', url))
28
- engine.on('request-redirected', ({ url }) => debug.adblock('redirect', url))
29
-
30
21
  const isEmpty = val => val == null || !(Object.keys(val) || val).length
31
22
 
32
23
  const castArray = value => [].concat(value).filter(Boolean)
@@ -39,15 +30,13 @@ const run = async ({ fn, timeout, debug: props }) => {
39
30
  return result
40
31
  }
41
32
 
42
- const parseCookies = (url, str) =>
43
- str.split(';').reduce((acc, cookieStr) => {
44
- const jar = new toughCookie.CookieJar(undefined, { rejectPublicSuffixes: false })
45
- jar.setCookieSync(cookieStr.trim(), url)
46
- const parsedCookie = jar.serializeSync().cookies[0]
33
+ const parseCookies = (url, str) => {
34
+ const jar = new toughCookie.CookieJar(undefined, { rejectPublicSuffixes: false })
47
35
 
48
- // Use this instead of the above when the following issue is fixed:
49
- // https://github.com/salesforce/tough-cookie/issues/149
50
- // const ret = toughCookie.parse(cookie).serializeSync();
36
+ return str.split(';').reduce((acc, cookieStr) => {
37
+ const cookie = jar.setCookieSync(cookieStr.trim(), url)
38
+ if (!cookie) return acc
39
+ const parsedCookie = cookie.toJSON()
51
40
 
52
41
  parsedCookie.name = parsedCookie.key
53
42
  delete parsedCookie.key
@@ -59,6 +48,7 @@ const parseCookies = (url, str) =>
59
48
  acc.push(parsedCookie)
60
49
  return acc
61
50
  }, [])
51
+ }
62
52
 
63
53
  const getMediaFeatures = ({ animations, colorScheme }) => {
64
54
  const prefers = []
@@ -181,7 +171,7 @@ module.exports = ({ defaultDevice = 'Macbook Pro 13', timeout: globalTimeout, ..
181
171
  page,
182
172
  {
183
173
  abortTypes = [],
184
- adblock = true,
174
+ adblock: withAdblock = true,
185
175
  animations = false,
186
176
  authenticate,
187
177
  click,
@@ -268,33 +258,8 @@ module.exports = ({ defaultDevice = 'Macbook Pro 13', timeout: globalTimeout, ..
268
258
  })
269
259
  }
270
260
 
271
- if (adblock) {
272
- let adblockContext
273
-
274
- page.disableAdblock = () => {
275
- // TODO: drop this when https://github.com/ghostery/adblocker/pull/5161 is merged
276
-
277
- engine.contexts.delete(page)
278
-
279
- if (adblockContext.blocker.config.loadNetworkFilters) {
280
- adblockContext.page.off('request', adblockContext.onRequest)
281
- }
282
-
283
- if (adblockContext.blocker.config.loadCosmeticFilters) {
284
- adblockContext.page.off('frameattached', adblockContext.onFrameNavigated)
285
- adblockContext.page.off('domcontentloaded', adblockContext.onDomContentLoaded)
286
- }
287
-
288
- debug.adblock('disabled')
289
- }
290
-
291
- prePromises.push(
292
- run({
293
- fn: engine.enableBlockingInPage(page).then(context => (adblockContext = context)),
294
- timeout: actionTimeout,
295
- debug: 'adblock'
296
- })
297
- )
261
+ if (withAdblock) {
262
+ prePromises.push(...adblock.enableBlockingInPage(page, run, actionTimeout))
298
263
  }
299
264
 
300
265
  if (javascript === false) {
@@ -398,6 +363,14 @@ module.exports = ({ defaultDevice = 'Macbook Pro 13', timeout: globalTimeout, ..
398
363
  debug: { fn: html ? 'html' : 'url', waitUntil }
399
364
  })
400
365
 
366
+ if (withAdblock) {
367
+ await run({
368
+ fn: adblock.runAutoConsent(page),
369
+ timeout: actionTimeout,
370
+ debug: 'autoconsent:run'
371
+ })
372
+ }
373
+
401
374
  for (const [key, value] of Object.entries({
402
375
  waitForSelector,
403
376
  waitForFunction