@browserless/goto 10.12.6 → 10.12.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +2 -2
- package/src/adblock.js +69 -45
- package/src/engine.bin +0 -0
package/package.json
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
"name": "@browserless/goto",
|
|
3
3
|
"description": "Navigate to web pages with built-in ad blocking, device emulation, and optimized loading for faster automation.",
|
|
4
4
|
"homepage": "https://browserless.js.org/#/?id=gotopage-options",
|
|
5
|
-
"version": "10.12.
|
|
5
|
+
"version": "10.12.7",
|
|
6
6
|
"main": "src/index.js",
|
|
7
7
|
"author": {
|
|
8
8
|
"email": "hello@microlink.io",
|
|
@@ -66,5 +66,5 @@
|
|
|
66
66
|
"timeout": "2m",
|
|
67
67
|
"workerThreads": false
|
|
68
68
|
},
|
|
69
|
-
"gitHead": "
|
|
69
|
+
"gitHead": "398978a3237780fa44ee70e9ffeb30759cab50a9"
|
|
70
70
|
}
|
package/src/adblock.js
CHANGED
|
@@ -8,59 +8,60 @@ const path = require('path')
|
|
|
8
8
|
|
|
9
9
|
const debug = require('debug-logfmt')('browserless:goto:adblock')
|
|
10
10
|
|
|
11
|
-
|
|
11
|
+
const lazy = fn => {
|
|
12
|
+
let p
|
|
13
|
+
return () => (p ??= fn())
|
|
14
|
+
}
|
|
12
15
|
|
|
13
|
-
const
|
|
14
|
-
if (enginePromise) return enginePromise
|
|
16
|
+
const autoconsentDir = path.dirname(require.resolve('@duckduckgo/autoconsent'))
|
|
15
17
|
|
|
16
|
-
|
|
18
|
+
const getEngine = lazy(() =>
|
|
19
|
+
fs.readFile(path.resolve(__dirname, './engine.bin')).then(buffer => {
|
|
17
20
|
const engine = PuppeteerBlocker.deserialize(new Uint8Array(buffer))
|
|
18
21
|
engine.on('request-blocked', ({ url }) => debug('block', url))
|
|
19
22
|
engine.on('request-redirected', ({ url }) => debug('redirect', url))
|
|
20
23
|
return engine
|
|
21
24
|
})
|
|
22
|
-
|
|
23
|
-
return enginePromise
|
|
24
|
-
}
|
|
25
|
+
)
|
|
25
26
|
|
|
26
27
|
/**
|
|
27
28
|
* autoconsent.playwright.js is the only browser-injectable IIFE bundle in the package.
|
|
28
29
|
* It is not in the package's "exports" map, so pin @duckduckgo/autoconsent with ~ to
|
|
29
30
|
* avoid breakage from internal restructuring on minor/patch bumps.
|
|
30
31
|
*/
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
if (autoconsentPlaywrightScriptPromise) return autoconsentPlaywrightScriptPromise
|
|
35
|
-
|
|
36
|
-
autoconsentPlaywrightScriptPromise = fs.readFile(
|
|
37
|
-
path.resolve(
|
|
38
|
-
path.dirname(require.resolve('@duckduckgo/autoconsent')),
|
|
39
|
-
'autoconsent.playwright.js'
|
|
40
|
-
),
|
|
41
|
-
'utf8'
|
|
42
|
-
)
|
|
32
|
+
const getAutoconsentPlaywrightScript = lazy(() =>
|
|
33
|
+
fs.readFile(path.resolve(autoconsentDir, 'autoconsent.playwright.js'), 'utf8')
|
|
34
|
+
)
|
|
43
35
|
|
|
44
|
-
|
|
45
|
-
|
|
36
|
+
const getAutoconsentRules = lazy(() =>
|
|
37
|
+
fs.readFile(path.resolve(autoconsentDir, '../rules/compact-rules.json'), 'utf8').then(JSON.parse)
|
|
38
|
+
)
|
|
46
39
|
|
|
47
40
|
/* Configuration passed to autoconsent's `initResp` message.
|
|
48
|
-
See https://github.com/duckduckgo/autoconsent/blob/main/api.md */
|
|
41
|
+
See https://github.com/duckduckgo/autoconsent/blob/main/docs/api.md */
|
|
49
42
|
const autoconsentConfig = Object.freeze({
|
|
50
43
|
/* activate consent rule matching */
|
|
51
44
|
enabled: true,
|
|
52
45
|
/* automatically reject (opt-out) all cookies */
|
|
53
46
|
autoAction: 'optOut',
|
|
47
|
+
/* skip these CMPs even if detected */
|
|
48
|
+
disabledCmps: [],
|
|
54
49
|
/* hide banners early via CSS before detection finishes */
|
|
55
50
|
enablePrehide: true,
|
|
56
51
|
/* apply CSS-only rules that hide popups lacking a reject button */
|
|
57
52
|
enableCosmeticRules: true,
|
|
58
53
|
/* enable rules auto-generated from common CMP patterns */
|
|
59
54
|
enableGeneratedRules: true,
|
|
60
|
-
/* fall back to heuristic click when no specific rule matches */
|
|
61
|
-
enableHeuristicAction: true,
|
|
62
55
|
/* skip bundled ABP/uBO cosmetic filter list (saves bundle size) */
|
|
63
56
|
enableFilterList: false,
|
|
57
|
+
/* detect CMPs using heuristics when no specific rule matches */
|
|
58
|
+
enableHeuristicDetection: true,
|
|
59
|
+
/* fall back to heuristic click when no specific rule matches */
|
|
60
|
+
enableHeuristicAction: true,
|
|
61
|
+
/* run in the page's main world (false = isolated world) */
|
|
62
|
+
isMainWorld: false,
|
|
63
|
+
/* max ms to keep prehide CSS applied before removing it */
|
|
64
|
+
prehideTimeout: 2000,
|
|
64
65
|
/* how many times to retry CMP detection (~50 ms apart) */
|
|
65
66
|
detectRetries: 20,
|
|
66
67
|
logs: {
|
|
@@ -68,12 +69,16 @@ const autoconsentConfig = Object.freeze({
|
|
|
68
69
|
lifecycle: false,
|
|
69
70
|
/* individual rule step execution */
|
|
70
71
|
rulesteps: false,
|
|
72
|
+
/* CMP detection step details */
|
|
73
|
+
detectionsteps: false,
|
|
71
74
|
/* eval snippet calls */
|
|
72
75
|
evals: false,
|
|
73
76
|
/* rule errors */
|
|
74
77
|
errors: false,
|
|
75
78
|
/* background ↔ content-script messages */
|
|
76
|
-
messages: false
|
|
79
|
+
messages: false,
|
|
80
|
+
/* wait/delay step timing */
|
|
81
|
+
waits: false
|
|
77
82
|
}
|
|
78
83
|
})
|
|
79
84
|
|
|
@@ -95,35 +100,54 @@ const setupAutoConsent = async (page, timeout) => {
|
|
|
95
100
|
if (!message || typeof message !== 'object') return
|
|
96
101
|
if (message.__nonce !== nonce) return
|
|
97
102
|
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
103
|
+
switch (message.type) {
|
|
104
|
+
case 'init': {
|
|
105
|
+
page._autoconsentInitDone = true
|
|
106
|
+
const rules = await getAutoconsentRules()
|
|
107
|
+
return sendMessage(page, { type: 'initResp', config: autoconsentConfig, rules })
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
case 'eval': {
|
|
111
|
+
let result = false
|
|
112
|
+
try {
|
|
113
|
+
result = await pTimeout(page.evaluate(message.code), timeout)
|
|
114
|
+
} catch {}
|
|
115
|
+
return sendMessage(page, { type: 'evalResp', id: message.id, result })
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
case 'cmpDetected':
|
|
119
|
+
case 'popupFound':
|
|
120
|
+
case 'autoconsentDone':
|
|
121
|
+
debug(message.type, { cmp: message.cmp })
|
|
122
|
+
break
|
|
101
123
|
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
124
|
+
case 'optOutResult':
|
|
125
|
+
debug(message.type, { result: message.result })
|
|
126
|
+
break
|
|
127
|
+
|
|
128
|
+
case 'autoconsentError':
|
|
129
|
+
debug(message.type, { details: message.details })
|
|
130
|
+
break
|
|
108
131
|
}
|
|
109
132
|
})
|
|
110
133
|
|
|
111
|
-
/*
|
|
112
|
-
|
|
113
|
-
binding which lacks the nonce, so their messages are
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
if (raw) window.autoconsentSendMessage = msg => raw({ ...msg, __nonce: n })
|
|
118
|
-
}, nonce)
|
|
119
|
-
|
|
120
|
-
await page.evaluateOnNewDocument(autoconsentPlaywrightScript)
|
|
134
|
+
/* Single injection: wrap the binding in the top frame so every outgoing
|
|
135
|
+
message carries the nonce, then run the autoconsent script. Child frames
|
|
136
|
+
keep the raw CDP binding which lacks the nonce, so their messages are
|
|
137
|
+
silently rejected. */
|
|
138
|
+
const nonceGuard = `(function(n){if(window.self!==window.top)return;var raw=window.autoconsentSendMessage;if(raw)window.autoconsentSendMessage=function(msg){return raw(Object.assign({},msg,{__nonce:n}))}})(${JSON.stringify(nonce)});`
|
|
139
|
+
await page.evaluateOnNewDocument(nonceGuard + autoconsentPlaywrightScript)
|
|
121
140
|
page._autoconsentSetup = true
|
|
122
141
|
}
|
|
123
142
|
|
|
124
|
-
const runAutoConsent = async page =>
|
|
143
|
+
const runAutoConsent = async page => {
|
|
144
|
+
if (page._autoconsentInitDone) return
|
|
145
|
+
return page.evaluate(await getAutoconsentPlaywrightScript())
|
|
146
|
+
}
|
|
125
147
|
|
|
126
148
|
const enableBlockingInPage = (page, run, timeout) => {
|
|
149
|
+
getAutoconsentRules().catch(() => {})
|
|
150
|
+
|
|
127
151
|
page.disableAdblock = () =>
|
|
128
152
|
getEngine()
|
|
129
153
|
.then(engine => engine.disableBlockingInPage(page, { keepRequestInterception: true }))
|
package/src/engine.bin
CHANGED
|
Binary file
|