@browserless/goto 10.12.6 → 10.12.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -2,7 +2,7 @@
2
2
  "name": "@browserless/goto",
3
3
  "description": "Navigate to web pages with built-in ad blocking, device emulation, and optimized loading for faster automation.",
4
4
  "homepage": "https://browserless.js.org/#/?id=gotopage-options",
5
- "version": "10.12.6",
5
+ "version": "10.12.7",
6
6
  "main": "src/index.js",
7
7
  "author": {
8
8
  "email": "hello@microlink.io",
@@ -66,5 +66,5 @@
66
66
  "timeout": "2m",
67
67
  "workerThreads": false
68
68
  },
69
- "gitHead": "5d4aaa106be3e196f5bfb1b52fc3d712ba9d512c"
69
+ "gitHead": "398978a3237780fa44ee70e9ffeb30759cab50a9"
70
70
  }
package/src/adblock.js CHANGED
@@ -8,59 +8,60 @@ const path = require('path')
8
8
 
9
9
  const debug = require('debug-logfmt')('browserless:goto:adblock')
10
10
 
11
- let enginePromise
11
+ const lazy = fn => {
12
+ let p
13
+ return () => (p ??= fn())
14
+ }
12
15
 
13
- const getEngine = () => {
14
- if (enginePromise) return enginePromise
16
+ const autoconsentDir = path.dirname(require.resolve('@duckduckgo/autoconsent'))
15
17
 
16
- enginePromise = fs.readFile(path.resolve(__dirname, './engine.bin')).then(buffer => {
18
+ const getEngine = lazy(() =>
19
+ fs.readFile(path.resolve(__dirname, './engine.bin')).then(buffer => {
17
20
  const engine = PuppeteerBlocker.deserialize(new Uint8Array(buffer))
18
21
  engine.on('request-blocked', ({ url }) => debug('block', url))
19
22
  engine.on('request-redirected', ({ url }) => debug('redirect', url))
20
23
  return engine
21
24
  })
22
-
23
- return enginePromise
24
- }
25
+ )
25
26
 
26
27
  /**
27
28
  * autoconsent.playwright.js is the only browser-injectable IIFE bundle in the package.
28
29
  * It is not in the package's "exports" map, so pin @duckduckgo/autoconsent with ~ to
29
30
  * avoid breakage from internal restructuring on minor/patch bumps.
30
31
  */
31
- let autoconsentPlaywrightScriptPromise
32
-
33
- const getAutoconsentPlaywrightScript = () => {
34
- if (autoconsentPlaywrightScriptPromise) return autoconsentPlaywrightScriptPromise
35
-
36
- autoconsentPlaywrightScriptPromise = fs.readFile(
37
- path.resolve(
38
- path.dirname(require.resolve('@duckduckgo/autoconsent')),
39
- 'autoconsent.playwright.js'
40
- ),
41
- 'utf8'
42
- )
32
+ const getAutoconsentPlaywrightScript = lazy(() =>
33
+ fs.readFile(path.resolve(autoconsentDir, 'autoconsent.playwright.js'), 'utf8')
34
+ )
43
35
 
44
- return autoconsentPlaywrightScriptPromise
45
- }
36
+ const getAutoconsentRules = lazy(() =>
37
+ fs.readFile(path.resolve(autoconsentDir, '../rules/compact-rules.json'), 'utf8').then(JSON.parse)
38
+ )
46
39
 
47
40
  /* Configuration passed to autoconsent's `initResp` message.
48
- See https://github.com/duckduckgo/autoconsent/blob/main/api.md */
41
+ See https://github.com/duckduckgo/autoconsent/blob/main/docs/api.md */
49
42
  const autoconsentConfig = Object.freeze({
50
43
  /* activate consent rule matching */
51
44
  enabled: true,
52
45
  /* automatically reject (opt-out) all cookies */
53
46
  autoAction: 'optOut',
47
+ /* skip these CMPs even if detected */
48
+ disabledCmps: [],
54
49
  /* hide banners early via CSS before detection finishes */
55
50
  enablePrehide: true,
56
51
  /* apply CSS-only rules that hide popups lacking a reject button */
57
52
  enableCosmeticRules: true,
58
53
  /* enable rules auto-generated from common CMP patterns */
59
54
  enableGeneratedRules: true,
60
- /* fall back to heuristic click when no specific rule matches */
61
- enableHeuristicAction: true,
62
55
  /* skip bundled ABP/uBO cosmetic filter list (saves bundle size) */
63
56
  enableFilterList: false,
57
+ /* detect CMPs using heuristics when no specific rule matches */
58
+ enableHeuristicDetection: true,
59
+ /* fall back to heuristic click when no specific rule matches */
60
+ enableHeuristicAction: true,
61
+ /* run in the page's main world (false = isolated world) */
62
+ isMainWorld: false,
63
+ /* max ms to keep prehide CSS applied before removing it */
64
+ prehideTimeout: 2000,
64
65
  /* how many times to retry CMP detection (~50 ms apart) */
65
66
  detectRetries: 20,
66
67
  logs: {
@@ -68,12 +69,16 @@ const autoconsentConfig = Object.freeze({
68
69
  lifecycle: false,
69
70
  /* individual rule step execution */
70
71
  rulesteps: false,
72
+ /* CMP detection step details */
73
+ detectionsteps: false,
71
74
  /* eval snippet calls */
72
75
  evals: false,
73
76
  /* rule errors */
74
77
  errors: false,
75
78
  /* background ↔ content-script messages */
76
- messages: false
79
+ messages: false,
80
+ /* wait/delay step timing */
81
+ waits: false
77
82
  }
78
83
  })
79
84
 
@@ -95,35 +100,54 @@ const setupAutoConsent = async (page, timeout) => {
95
100
  if (!message || typeof message !== 'object') return
96
101
  if (message.__nonce !== nonce) return
97
102
 
98
- if (message.type === 'init') {
99
- return sendMessage(page, { type: 'initResp', config: autoconsentConfig })
100
- }
103
+ switch (message.type) {
104
+ case 'init': {
105
+ page._autoconsentInitDone = true
106
+ const rules = await getAutoconsentRules()
107
+ return sendMessage(page, { type: 'initResp', config: autoconsentConfig, rules })
108
+ }
109
+
110
+ case 'eval': {
111
+ let result = false
112
+ try {
113
+ result = await pTimeout(page.evaluate(message.code), timeout)
114
+ } catch {}
115
+ return sendMessage(page, { type: 'evalResp', id: message.id, result })
116
+ }
117
+
118
+ case 'cmpDetected':
119
+ case 'popupFound':
120
+ case 'autoconsentDone':
121
+ debug(message.type, { cmp: message.cmp })
122
+ break
101
123
 
102
- if (message.type === 'eval') {
103
- let result = false
104
- try {
105
- result = await pTimeout(page.evaluate(message.code), timeout)
106
- } catch {}
107
- return sendMessage(page, { type: 'evalResp', id: message.id, result })
124
+ case 'optOutResult':
125
+ debug(message.type, { result: message.result })
126
+ break
127
+
128
+ case 'autoconsentError':
129
+ debug(message.type, { details: message.details })
130
+ break
108
131
  }
109
132
  })
110
133
 
111
- /* Wrap the binding in the top frame so every outgoing message carries the
112
- nonce. Child frames (including cross-origin iframes) keep the raw CDP
113
- binding which lacks the nonce, so their messages are silently rejected. */
114
- await page.evaluateOnNewDocument(n => {
115
- if (window.self !== window.top) return
116
- const raw = window.autoconsentSendMessage
117
- if (raw) window.autoconsentSendMessage = msg => raw({ ...msg, __nonce: n })
118
- }, nonce)
119
-
120
- await page.evaluateOnNewDocument(autoconsentPlaywrightScript)
134
+ /* Single injection: wrap the binding in the top frame so every outgoing
135
+ message carries the nonce, then run the autoconsent script. Child frames
136
+ keep the raw CDP binding which lacks the nonce, so their messages are
137
+ silently rejected. */
138
+ const nonceGuard = `(function(n){if(window.self!==window.top)return;var raw=window.autoconsentSendMessage;if(raw)window.autoconsentSendMessage=function(msg){return raw(Object.assign({},msg,{__nonce:n}))}})(${JSON.stringify(nonce)});`
139
+ await page.evaluateOnNewDocument(nonceGuard + autoconsentPlaywrightScript)
121
140
  page._autoconsentSetup = true
122
141
  }
123
142
 
124
- const runAutoConsent = async page => page.evaluate(await getAutoconsentPlaywrightScript())
143
+ const runAutoConsent = async page => {
144
+ if (page._autoconsentInitDone) return
145
+ return page.evaluate(await getAutoconsentPlaywrightScript())
146
+ }
125
147
 
126
148
  const enableBlockingInPage = (page, run, timeout) => {
149
+ getAutoconsentRules().catch(() => {})
150
+
127
151
  page.disableAdblock = () =>
128
152
  getEngine()
129
153
  .then(engine => engine.disableBlockingInPage(page, { keepRequestInterception: true }))
package/src/engine.bin CHANGED
Binary file