@browserless/goto 10.11.4 → 10.12.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +6 -6
- package/src/adblock.js +80 -35
- package/src/engine.bin +0 -0
package/package.json
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
"name": "@browserless/goto",
|
|
3
3
|
"description": "Navigate to web pages with built-in ad blocking, device emulation, and optimized loading for faster automation.",
|
|
4
4
|
"homepage": "https://browserless.js.org/#/?id=gotopage-options",
|
|
5
|
-
"version": "10.
|
|
5
|
+
"version": "10.12.7",
|
|
6
6
|
"main": "src/index.js",
|
|
7
7
|
"author": {
|
|
8
8
|
"email": "hello@microlink.io",
|
|
@@ -31,18 +31,18 @@
|
|
|
31
31
|
],
|
|
32
32
|
"dependencies": {
|
|
33
33
|
"@browserless/devices": "^10.11.3",
|
|
34
|
-
"@duckduckgo/autoconsent": "~14.
|
|
34
|
+
"@duckduckgo/autoconsent": "~14.67.0",
|
|
35
35
|
"@ghostery/adblocker-puppeteer": "~2.14.1",
|
|
36
|
-
"debug-logfmt": "~1.4.
|
|
36
|
+
"debug-logfmt": "~1.4.10",
|
|
37
37
|
"got": "~11.8.6",
|
|
38
38
|
"is-url-http": "~2.3.13",
|
|
39
39
|
"p-reflect": "~2.1.0",
|
|
40
40
|
"p-timeout": "~4.1.0",
|
|
41
41
|
"shallow-equal": "~3.1.0",
|
|
42
|
-
"tough-cookie": "~6.0.
|
|
42
|
+
"tough-cookie": "~6.0.1"
|
|
43
43
|
},
|
|
44
44
|
"devDependencies": {
|
|
45
|
-
"@browserless/test": "^10.
|
|
45
|
+
"@browserless/test": "^10.12.6",
|
|
46
46
|
"ava": "5",
|
|
47
47
|
"p-wait-for": "3"
|
|
48
48
|
},
|
|
@@ -66,5 +66,5 @@
|
|
|
66
66
|
"timeout": "2m",
|
|
67
67
|
"workerThreads": false
|
|
68
68
|
},
|
|
69
|
-
"gitHead": "
|
|
69
|
+
"gitHead": "398978a3237780fa44ee70e9ffeb30759cab50a9"
|
|
70
70
|
}
|
package/src/adblock.js
CHANGED
|
@@ -1,60 +1,67 @@
|
|
|
1
1
|
'use strict'
|
|
2
2
|
|
|
3
3
|
const { PuppeteerBlocker } = require('@ghostery/adblocker-puppeteer')
|
|
4
|
+
const { randomUUID } = require('crypto')
|
|
5
|
+
const pTimeout = require('p-timeout')
|
|
4
6
|
const fs = require('fs/promises')
|
|
5
7
|
const path = require('path')
|
|
6
8
|
|
|
7
9
|
const debug = require('debug-logfmt')('browserless:goto:adblock')
|
|
8
10
|
|
|
9
|
-
|
|
11
|
+
const lazy = fn => {
|
|
12
|
+
let p
|
|
13
|
+
return () => (p ??= fn())
|
|
14
|
+
}
|
|
10
15
|
|
|
11
|
-
const
|
|
12
|
-
if (enginePromise) return enginePromise
|
|
16
|
+
const autoconsentDir = path.dirname(require.resolve('@duckduckgo/autoconsent'))
|
|
13
17
|
|
|
14
|
-
|
|
18
|
+
const getEngine = lazy(() =>
|
|
19
|
+
fs.readFile(path.resolve(__dirname, './engine.bin')).then(buffer => {
|
|
15
20
|
const engine = PuppeteerBlocker.deserialize(new Uint8Array(buffer))
|
|
16
21
|
engine.on('request-blocked', ({ url }) => debug('block', url))
|
|
17
22
|
engine.on('request-redirected', ({ url }) => debug('redirect', url))
|
|
18
23
|
return engine
|
|
19
24
|
})
|
|
20
|
-
|
|
21
|
-
return enginePromise
|
|
22
|
-
}
|
|
25
|
+
)
|
|
23
26
|
|
|
24
27
|
/**
|
|
25
28
|
* autoconsent.playwright.js is the only browser-injectable IIFE bundle in the package.
|
|
26
29
|
* It is not in the package's "exports" map, so pin @duckduckgo/autoconsent with ~ to
|
|
27
30
|
* avoid breakage from internal restructuring on minor/patch bumps.
|
|
28
31
|
*/
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
if (autoconsentPlaywrightScriptPromise) return autoconsentPlaywrightScriptPromise
|
|
32
|
+
const getAutoconsentPlaywrightScript = lazy(() =>
|
|
33
|
+
fs.readFile(path.resolve(autoconsentDir, 'autoconsent.playwright.js'), 'utf8')
|
|
34
|
+
)
|
|
33
35
|
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
'autoconsent.playwright.js'
|
|
38
|
-
),
|
|
39
|
-
'utf8'
|
|
40
|
-
)
|
|
41
|
-
|
|
42
|
-
return autoconsentPlaywrightScriptPromise
|
|
43
|
-
}
|
|
36
|
+
const getAutoconsentRules = lazy(() =>
|
|
37
|
+
fs.readFile(path.resolve(autoconsentDir, '../rules/compact-rules.json'), 'utf8').then(JSON.parse)
|
|
38
|
+
)
|
|
44
39
|
|
|
45
40
|
/* Configuration passed to autoconsent's `initResp` message.
|
|
46
|
-
See https://github.com/duckduckgo/autoconsent/blob/main/api.md */
|
|
41
|
+
See https://github.com/duckduckgo/autoconsent/blob/main/docs/api.md */
|
|
47
42
|
const autoconsentConfig = Object.freeze({
|
|
48
43
|
/* activate consent rule matching */
|
|
49
44
|
enabled: true,
|
|
50
45
|
/* automatically reject (opt-out) all cookies */
|
|
51
46
|
autoAction: 'optOut',
|
|
47
|
+
/* skip these CMPs even if detected */
|
|
48
|
+
disabledCmps: [],
|
|
52
49
|
/* hide banners early via CSS before detection finishes */
|
|
53
50
|
enablePrehide: true,
|
|
54
51
|
/* apply CSS-only rules that hide popups lacking a reject button */
|
|
55
52
|
enableCosmeticRules: true,
|
|
53
|
+
/* enable rules auto-generated from common CMP patterns */
|
|
54
|
+
enableGeneratedRules: true,
|
|
56
55
|
/* skip bundled ABP/uBO cosmetic filter list (saves bundle size) */
|
|
57
56
|
enableFilterList: false,
|
|
57
|
+
/* detect CMPs using heuristics when no specific rule matches */
|
|
58
|
+
enableHeuristicDetection: true,
|
|
59
|
+
/* fall back to heuristic click when no specific rule matches */
|
|
60
|
+
enableHeuristicAction: true,
|
|
61
|
+
/* run in the page's main world (false = isolated world) */
|
|
62
|
+
isMainWorld: false,
|
|
63
|
+
/* max ms to keep prehide CSS applied before removing it */
|
|
64
|
+
prehideTimeout: 2000,
|
|
58
65
|
/* how many times to retry CMP detection (~50 ms apart) */
|
|
59
66
|
detectRetries: 20,
|
|
60
67
|
logs: {
|
|
@@ -62,12 +69,16 @@ const autoconsentConfig = Object.freeze({
|
|
|
62
69
|
lifecycle: false,
|
|
63
70
|
/* individual rule step execution */
|
|
64
71
|
rulesteps: false,
|
|
72
|
+
/* CMP detection step details */
|
|
73
|
+
detectionsteps: false,
|
|
65
74
|
/* eval snippet calls */
|
|
66
75
|
evals: false,
|
|
67
76
|
/* rule errors */
|
|
68
77
|
errors: false,
|
|
69
78
|
/* background ↔ content-script messages */
|
|
70
|
-
messages: false
|
|
79
|
+
messages: false,
|
|
80
|
+
/* wait/delay step timing */
|
|
81
|
+
waits: false
|
|
71
82
|
}
|
|
72
83
|
})
|
|
73
84
|
|
|
@@ -80,29 +91,63 @@ const sendMessage = (page, message) =>
|
|
|
80
91
|
}, message)
|
|
81
92
|
.catch(() => {})
|
|
82
93
|
|
|
83
|
-
const setupAutoConsent = async page => {
|
|
94
|
+
const setupAutoConsent = async (page, timeout) => {
|
|
84
95
|
if (page._autoconsentSetup) return
|
|
85
96
|
const autoconsentPlaywrightScript = await getAutoconsentPlaywrightScript()
|
|
97
|
+
const nonce = randomUUID()
|
|
86
98
|
|
|
87
99
|
await page.exposeFunction('autoconsentSendMessage', async message => {
|
|
88
100
|
if (!message || typeof message !== 'object') return
|
|
101
|
+
if (message.__nonce !== nonce) return
|
|
89
102
|
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
103
|
+
switch (message.type) {
|
|
104
|
+
case 'init': {
|
|
105
|
+
page._autoconsentInitDone = true
|
|
106
|
+
const rules = await getAutoconsentRules()
|
|
107
|
+
return sendMessage(page, { type: 'initResp', config: autoconsentConfig, rules })
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
case 'eval': {
|
|
111
|
+
let result = false
|
|
112
|
+
try {
|
|
113
|
+
result = await pTimeout(page.evaluate(message.code), timeout)
|
|
114
|
+
} catch {}
|
|
115
|
+
return sendMessage(page, { type: 'evalResp', id: message.id, result })
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
case 'cmpDetected':
|
|
119
|
+
case 'popupFound':
|
|
120
|
+
case 'autoconsentDone':
|
|
121
|
+
debug(message.type, { cmp: message.cmp })
|
|
122
|
+
break
|
|
93
123
|
|
|
94
|
-
|
|
95
|
-
|
|
124
|
+
case 'optOutResult':
|
|
125
|
+
debug(message.type, { result: message.result })
|
|
126
|
+
break
|
|
127
|
+
|
|
128
|
+
case 'autoconsentError':
|
|
129
|
+
debug(message.type, { details: message.details })
|
|
130
|
+
break
|
|
96
131
|
}
|
|
97
132
|
})
|
|
98
133
|
|
|
99
|
-
|
|
134
|
+
/* Single injection: wrap the binding in the top frame so every outgoing
|
|
135
|
+
message carries the nonce, then run the autoconsent script. Child frames
|
|
136
|
+
keep the raw CDP binding which lacks the nonce, so their messages are
|
|
137
|
+
silently rejected. */
|
|
138
|
+
const nonceGuard = `(function(n){if(window.self!==window.top)return;var raw=window.autoconsentSendMessage;if(raw)window.autoconsentSendMessage=function(msg){return raw(Object.assign({},msg,{__nonce:n}))}})(${JSON.stringify(nonce)});`
|
|
139
|
+
await page.evaluateOnNewDocument(nonceGuard + autoconsentPlaywrightScript)
|
|
100
140
|
page._autoconsentSetup = true
|
|
101
141
|
}
|
|
102
142
|
|
|
103
|
-
const runAutoConsent = async page =>
|
|
143
|
+
const runAutoConsent = async page => {
|
|
144
|
+
if (page._autoconsentInitDone) return
|
|
145
|
+
return page.evaluate(await getAutoconsentPlaywrightScript())
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
const enableBlockingInPage = (page, run, timeout) => {
|
|
149
|
+
getAutoconsentRules().catch(() => {})
|
|
104
150
|
|
|
105
|
-
const enableBlockingInPage = (page, run, actionTimeout) => {
|
|
106
151
|
page.disableAdblock = () =>
|
|
107
152
|
getEngine()
|
|
108
153
|
.then(engine => engine.disableBlockingInPage(page, { keepRequestInterception: true }))
|
|
@@ -111,13 +156,13 @@ const enableBlockingInPage = (page, run, actionTimeout) => {
|
|
|
111
156
|
|
|
112
157
|
return [
|
|
113
158
|
run({
|
|
114
|
-
fn: setupAutoConsent(page),
|
|
115
|
-
timeout
|
|
159
|
+
fn: setupAutoConsent(page, timeout),
|
|
160
|
+
timeout,
|
|
116
161
|
debug: 'autoconsent:setup'
|
|
117
162
|
}),
|
|
118
163
|
run({
|
|
119
164
|
fn: getEngine().then(engine => engine.enableBlockingInPage(page)),
|
|
120
|
-
timeout
|
|
165
|
+
timeout,
|
|
121
166
|
debug: 'adblock'
|
|
122
167
|
})
|
|
123
168
|
]
|
package/src/engine.bin
CHANGED
|
Binary file
|