@browserless/goto 10.11.4 → 10.12.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -2,7 +2,7 @@
2
2
  "name": "@browserless/goto",
3
3
  "description": "Navigate to web pages with built-in ad blocking, device emulation, and optimized loading for faster automation.",
4
4
  "homepage": "https://browserless.js.org/#/?id=gotopage-options",
5
- "version": "10.11.4",
5
+ "version": "10.12.7",
6
6
  "main": "src/index.js",
7
7
  "author": {
8
8
  "email": "hello@microlink.io",
@@ -31,18 +31,18 @@
31
31
  ],
32
32
  "dependencies": {
33
33
  "@browserless/devices": "^10.11.3",
34
- "@duckduckgo/autoconsent": "~14.56.0",
34
+ "@duckduckgo/autoconsent": "~14.67.0",
35
35
  "@ghostery/adblocker-puppeteer": "~2.14.1",
36
- "debug-logfmt": "~1.4.8",
36
+ "debug-logfmt": "~1.4.10",
37
37
  "got": "~11.8.6",
38
38
  "is-url-http": "~2.3.13",
39
39
  "p-reflect": "~2.1.0",
40
40
  "p-timeout": "~4.1.0",
41
41
  "shallow-equal": "~3.1.0",
42
- "tough-cookie": "~6.0.0"
42
+ "tough-cookie": "~6.0.1"
43
43
  },
44
44
  "devDependencies": {
45
- "@browserless/test": "^10.11.4",
45
+ "@browserless/test": "^10.12.6",
46
46
  "ava": "5",
47
47
  "p-wait-for": "3"
48
48
  },
@@ -66,5 +66,5 @@
66
66
  "timeout": "2m",
67
67
  "workerThreads": false
68
68
  },
69
- "gitHead": "9ff2a0f0dad20d64a5c47a04a989ee77d1ee5ae8"
69
+ "gitHead": "398978a3237780fa44ee70e9ffeb30759cab50a9"
70
70
  }
package/src/adblock.js CHANGED
@@ -1,60 +1,67 @@
1
1
  'use strict'
2
2
 
3
3
  const { PuppeteerBlocker } = require('@ghostery/adblocker-puppeteer')
4
+ const { randomUUID } = require('crypto')
5
+ const pTimeout = require('p-timeout')
4
6
  const fs = require('fs/promises')
5
7
  const path = require('path')
6
8
 
7
9
  const debug = require('debug-logfmt')('browserless:goto:adblock')
8
10
 
9
- let enginePromise
11
+ const lazy = fn => {
12
+ let p
13
+ return () => (p ??= fn())
14
+ }
10
15
 
11
- const getEngine = () => {
12
- if (enginePromise) return enginePromise
16
+ const autoconsentDir = path.dirname(require.resolve('@duckduckgo/autoconsent'))
13
17
 
14
- enginePromise = fs.readFile(path.resolve(__dirname, './engine.bin')).then(buffer => {
18
+ const getEngine = lazy(() =>
19
+ fs.readFile(path.resolve(__dirname, './engine.bin')).then(buffer => {
15
20
  const engine = PuppeteerBlocker.deserialize(new Uint8Array(buffer))
16
21
  engine.on('request-blocked', ({ url }) => debug('block', url))
17
22
  engine.on('request-redirected', ({ url }) => debug('redirect', url))
18
23
  return engine
19
24
  })
20
-
21
- return enginePromise
22
- }
25
+ )
23
26
 
24
27
  /**
25
28
  * autoconsent.playwright.js is the only browser-injectable IIFE bundle in the package.
26
29
  * It is not in the package's "exports" map, so pin @duckduckgo/autoconsent with ~ to
27
30
  * avoid breakage from internal restructuring on minor/patch bumps.
28
31
  */
29
- let autoconsentPlaywrightScriptPromise
30
-
31
- const getAutoconsentPlaywrightScript = () => {
32
- if (autoconsentPlaywrightScriptPromise) return autoconsentPlaywrightScriptPromise
32
+ const getAutoconsentPlaywrightScript = lazy(() =>
33
+ fs.readFile(path.resolve(autoconsentDir, 'autoconsent.playwright.js'), 'utf8')
34
+ )
33
35
 
34
- autoconsentPlaywrightScriptPromise = fs.readFile(
35
- path.resolve(
36
- path.dirname(require.resolve('@duckduckgo/autoconsent')),
37
- 'autoconsent.playwright.js'
38
- ),
39
- 'utf8'
40
- )
41
-
42
- return autoconsentPlaywrightScriptPromise
43
- }
36
+ const getAutoconsentRules = lazy(() =>
37
+ fs.readFile(path.resolve(autoconsentDir, '../rules/compact-rules.json'), 'utf8').then(JSON.parse)
38
+ )
44
39
 
45
40
  /* Configuration passed to autoconsent's `initResp` message.
46
- See https://github.com/duckduckgo/autoconsent/blob/main/api.md */
41
+ See https://github.com/duckduckgo/autoconsent/blob/main/docs/api.md */
47
42
  const autoconsentConfig = Object.freeze({
48
43
  /* activate consent rule matching */
49
44
  enabled: true,
50
45
  /* automatically reject (opt-out) all cookies */
51
46
  autoAction: 'optOut',
47
+ /* skip these CMPs even if detected */
48
+ disabledCmps: [],
52
49
  /* hide banners early via CSS before detection finishes */
53
50
  enablePrehide: true,
54
51
  /* apply CSS-only rules that hide popups lacking a reject button */
55
52
  enableCosmeticRules: true,
53
+ /* enable rules auto-generated from common CMP patterns */
54
+ enableGeneratedRules: true,
56
55
  /* skip bundled ABP/uBO cosmetic filter list (saves bundle size) */
57
56
  enableFilterList: false,
57
+ /* detect CMPs using heuristics when no specific rule matches */
58
+ enableHeuristicDetection: true,
59
+ /* fall back to heuristic click when no specific rule matches */
60
+ enableHeuristicAction: true,
61
+ /* run in the page's main world (false = isolated world) */
62
+ isMainWorld: false,
63
+ /* max ms to keep prehide CSS applied before removing it */
64
+ prehideTimeout: 2000,
58
65
  /* how many times to retry CMP detection (~50 ms apart) */
59
66
  detectRetries: 20,
60
67
  logs: {
@@ -62,12 +69,16 @@ const autoconsentConfig = Object.freeze({
62
69
  lifecycle: false,
63
70
  /* individual rule step execution */
64
71
  rulesteps: false,
72
+ /* CMP detection step details */
73
+ detectionsteps: false,
65
74
  /* eval snippet calls */
66
75
  evals: false,
67
76
  /* rule errors */
68
77
  errors: false,
69
78
  /* background ↔ content-script messages */
70
- messages: false
79
+ messages: false,
80
+ /* wait/delay step timing */
81
+ waits: false
71
82
  }
72
83
  })
73
84
 
@@ -80,29 +91,63 @@ const sendMessage = (page, message) =>
80
91
  }, message)
81
92
  .catch(() => {})
82
93
 
83
- const setupAutoConsent = async page => {
94
+ const setupAutoConsent = async (page, timeout) => {
84
95
  if (page._autoconsentSetup) return
85
96
  const autoconsentPlaywrightScript = await getAutoconsentPlaywrightScript()
97
+ const nonce = randomUUID()
86
98
 
87
99
  await page.exposeFunction('autoconsentSendMessage', async message => {
88
100
  if (!message || typeof message !== 'object') return
101
+ if (message.__nonce !== nonce) return
89
102
 
90
- if (message.type === 'init') {
91
- return sendMessage(page, { type: 'initResp', config: autoconsentConfig })
92
- }
103
+ switch (message.type) {
104
+ case 'init': {
105
+ page._autoconsentInitDone = true
106
+ const rules = await getAutoconsentRules()
107
+ return sendMessage(page, { type: 'initResp', config: autoconsentConfig, rules })
108
+ }
109
+
110
+ case 'eval': {
111
+ let result = false
112
+ try {
113
+ result = await pTimeout(page.evaluate(message.code), timeout)
114
+ } catch {}
115
+ return sendMessage(page, { type: 'evalResp', id: message.id, result })
116
+ }
117
+
118
+ case 'cmpDetected':
119
+ case 'popupFound':
120
+ case 'autoconsentDone':
121
+ debug(message.type, { cmp: message.cmp })
122
+ break
93
123
 
94
- if (message.type === 'eval') {
95
- return sendMessage(page, { type: 'evalResp', id: message.id, result: false })
124
+ case 'optOutResult':
125
+ debug(message.type, { result: message.result })
126
+ break
127
+
128
+ case 'autoconsentError':
129
+ debug(message.type, { details: message.details })
130
+ break
96
131
  }
97
132
  })
98
133
 
99
- await page.evaluateOnNewDocument(autoconsentPlaywrightScript)
134
+ /* Single injection: wrap the binding in the top frame so every outgoing
135
+ message carries the nonce, then run the autoconsent script. Child frames
136
+ keep the raw CDP binding which lacks the nonce, so their messages are
137
+ silently rejected. */
138
+ const nonceGuard = `(function(n){if(window.self!==window.top)return;var raw=window.autoconsentSendMessage;if(raw)window.autoconsentSendMessage=function(msg){return raw(Object.assign({},msg,{__nonce:n}))}})(${JSON.stringify(nonce)});`
139
+ await page.evaluateOnNewDocument(nonceGuard + autoconsentPlaywrightScript)
100
140
  page._autoconsentSetup = true
101
141
  }
102
142
 
103
- const runAutoConsent = async page => page.evaluate(await getAutoconsentPlaywrightScript())
143
+ const runAutoConsent = async page => {
144
+ if (page._autoconsentInitDone) return
145
+ return page.evaluate(await getAutoconsentPlaywrightScript())
146
+ }
147
+
148
+ const enableBlockingInPage = (page, run, timeout) => {
149
+ getAutoconsentRules().catch(() => {})
104
150
 
105
- const enableBlockingInPage = (page, run, actionTimeout) => {
106
151
  page.disableAdblock = () =>
107
152
  getEngine()
108
153
  .then(engine => engine.disableBlockingInPage(page, { keepRequestInterception: true }))
@@ -111,13 +156,13 @@ const enableBlockingInPage = (page, run, actionTimeout) => {
111
156
 
112
157
  return [
113
158
  run({
114
- fn: setupAutoConsent(page),
115
- timeout: actionTimeout,
159
+ fn: setupAutoConsent(page, timeout),
160
+ timeout,
116
161
  debug: 'autoconsent:setup'
117
162
  }),
118
163
  run({
119
164
  fn: getEngine().then(engine => engine.enableBlockingInPage(page)),
120
- timeout: actionTimeout,
165
+ timeout,
121
166
  debug: 'adblock'
122
167
  })
123
168
  ]
package/src/engine.bin CHANGED
Binary file