@browserless/goto 10.9.18 → 10.10.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -0
- package/package.json +3 -2
- package/src/adblock.js +126 -0
- package/src/engine.bin +0 -0
- package/src/index.js +20 -47
package/README.md
CHANGED
|
@@ -113,6 +113,8 @@ await goto(page, { url: 'https://example.com', adblock: false })
|
|
|
113
113
|
page.disableAdblock()
|
|
114
114
|
```
|
|
115
115
|
|
|
116
|
+
Cookie consent handling (opt-out) is included as part of `adblock: true` and powered by [duckduckgo/autoconsent](https://github.com/duckduckgo/autoconsent).
|
|
117
|
+
|
|
116
118
|
### Script and style injection
|
|
117
119
|
|
|
118
120
|
Inject external resources or inline code:
|
package/package.json
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
"name": "@browserless/goto",
|
|
3
3
|
"description": "Navigate to web pages with built-in ad blocking, device emulation, and optimized loading for faster automation.",
|
|
4
4
|
"homepage": "https://browserless.js.org/#/?id=gotopage-options",
|
|
5
|
-
"version": "10.
|
|
5
|
+
"version": "10.10.1",
|
|
6
6
|
"main": "src/index.js",
|
|
7
7
|
"author": {
|
|
8
8
|
"email": "hello@microlink.io",
|
|
@@ -31,6 +31,7 @@
|
|
|
31
31
|
],
|
|
32
32
|
"dependencies": {
|
|
33
33
|
"@browserless/devices": "^10.9.18",
|
|
34
|
+
"@duckduckgo/autoconsent": "~14.53.0",
|
|
34
35
|
"@ghostery/adblocker-puppeteer": "~2.13.4",
|
|
35
36
|
"debug-logfmt": "~1.4.7",
|
|
36
37
|
"got": "~11.8.6",
|
|
@@ -65,5 +66,5 @@
|
|
|
65
66
|
"timeout": "2m",
|
|
66
67
|
"workerThreads": false
|
|
67
68
|
},
|
|
68
|
-
"gitHead": "
|
|
69
|
+
"gitHead": "32f6e72bcb489a83ac9659520a3961aeb97c47b7"
|
|
69
70
|
}
|
package/src/adblock.js
ADDED
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
'use strict'
|
|
2
|
+
|
|
3
|
+
const { PuppeteerBlocker } = require('@ghostery/adblocker-puppeteer')
|
|
4
|
+
const path = require('path')
|
|
5
|
+
const fs = require('fs')
|
|
6
|
+
|
|
7
|
+
const debug = require('debug-logfmt')('browserless:goto:adblock')
|
|
8
|
+
|
|
9
|
+
let enginePromise
|
|
10
|
+
|
|
11
|
+
const getEngine = () => {
|
|
12
|
+
if (enginePromise) return enginePromise
|
|
13
|
+
|
|
14
|
+
enginePromise = fs.promises.readFile(path.resolve(__dirname, './engine.bin')).then(buffer => {
|
|
15
|
+
const engine = PuppeteerBlocker.deserialize(new Uint8Array(buffer))
|
|
16
|
+
engine.on('request-blocked', ({ url }) => debug('block', url))
|
|
17
|
+
engine.on('request-redirected', ({ url }) => debug('redirect', url))
|
|
18
|
+
return engine
|
|
19
|
+
})
|
|
20
|
+
|
|
21
|
+
return enginePromise
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
/**
|
|
25
|
+
* autoconsent.playwright.js is the only browser-injectable IIFE bundle in the package.
|
|
26
|
+
* It is not in the package's "exports" map, so pin @duckduckgo/autoconsent with ~ to
|
|
27
|
+
* avoid breakage from internal restructuring on minor/patch bumps.
|
|
28
|
+
*/
|
|
29
|
+
let autoconsentPlaywrightScriptPromise
|
|
30
|
+
|
|
31
|
+
const getAutoconsentPlaywrightScript = () => {
|
|
32
|
+
if (autoconsentPlaywrightScriptPromise) return autoconsentPlaywrightScriptPromise
|
|
33
|
+
|
|
34
|
+
autoconsentPlaywrightScriptPromise = fs.promises.readFile(
|
|
35
|
+
path.resolve(
|
|
36
|
+
path.dirname(require.resolve('@duckduckgo/autoconsent')),
|
|
37
|
+
'autoconsent.playwright.js'
|
|
38
|
+
),
|
|
39
|
+
'utf8'
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
return autoconsentPlaywrightScriptPromise
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
/* Configuration passed to autoconsent's `initResp` message.
|
|
46
|
+
See https://github.com/duckduckgo/autoconsent/blob/main/api.md */
|
|
47
|
+
const autoconsentConfig = Object.freeze({
|
|
48
|
+
/* activate consent rule matching */
|
|
49
|
+
enabled: true,
|
|
50
|
+
/* automatically reject (opt-out) all cookies */
|
|
51
|
+
autoAction: 'optOut',
|
|
52
|
+
/* hide banners early via CSS before detection finishes */
|
|
53
|
+
enablePrehide: true,
|
|
54
|
+
/* apply CSS-only rules that hide popups lacking a reject button */
|
|
55
|
+
enableCosmeticRules: true,
|
|
56
|
+
/* skip bundled ABP/uBO cosmetic filter list (saves bundle size) */
|
|
57
|
+
enableFilterList: false,
|
|
58
|
+
/* how many times to retry CMP detection (~50 ms apart) */
|
|
59
|
+
detectRetries: 20,
|
|
60
|
+
logs: {
|
|
61
|
+
/* CMP detection / opt-out lifecycle events */
|
|
62
|
+
lifecycle: false,
|
|
63
|
+
/* individual rule step execution */
|
|
64
|
+
rulesteps: false,
|
|
65
|
+
/* eval snippet calls */
|
|
66
|
+
evals: false,
|
|
67
|
+
/* rule errors */
|
|
68
|
+
errors: false,
|
|
69
|
+
/* background ↔ content-script messages */
|
|
70
|
+
messages: false
|
|
71
|
+
}
|
|
72
|
+
})
|
|
73
|
+
|
|
74
|
+
const sendMessage = (page, message) =>
|
|
75
|
+
page
|
|
76
|
+
.evaluate(msg => {
|
|
77
|
+
if (window.autoconsentReceiveMessage) {
|
|
78
|
+
return window.autoconsentReceiveMessage(msg)
|
|
79
|
+
}
|
|
80
|
+
}, message)
|
|
81
|
+
.catch(() => {})
|
|
82
|
+
|
|
83
|
+
const setupAutoConsent = async page => {
|
|
84
|
+
if (page._autoconsentSetup) return
|
|
85
|
+
const autoconsentPlaywrightScript = await getAutoconsentPlaywrightScript()
|
|
86
|
+
|
|
87
|
+
await page.exposeFunction('autoconsentSendMessage', async message => {
|
|
88
|
+
if (!message || typeof message !== 'object') return
|
|
89
|
+
|
|
90
|
+
if (message.type === 'init') {
|
|
91
|
+
return sendMessage(page, { type: 'initResp', config: autoconsentConfig })
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
if (message.type === 'eval') {
|
|
95
|
+
return sendMessage(page, { type: 'evalResp', id: message.id, result: false })
|
|
96
|
+
}
|
|
97
|
+
})
|
|
98
|
+
|
|
99
|
+
await page.evaluateOnNewDocument(autoconsentPlaywrightScript)
|
|
100
|
+
page._autoconsentSetup = true
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
const runAutoConsent = async page => page.evaluate(await getAutoconsentPlaywrightScript())
|
|
104
|
+
|
|
105
|
+
const enableBlockingInPage = (page, run, actionTimeout) => {
|
|
106
|
+
page.disableAdblock = () =>
|
|
107
|
+
getEngine()
|
|
108
|
+
.then(engine => engine.disableBlockingInPage(page, { keepRequestInterception: true }))
|
|
109
|
+
.then(() => debug('disabled'))
|
|
110
|
+
.catch(() => {})
|
|
111
|
+
|
|
112
|
+
return [
|
|
113
|
+
run({
|
|
114
|
+
fn: setupAutoConsent(page),
|
|
115
|
+
timeout: actionTimeout,
|
|
116
|
+
debug: 'autoconsent:setup'
|
|
117
|
+
}),
|
|
118
|
+
run({
|
|
119
|
+
fn: getEngine().then(engine => engine.enableBlockingInPage(page)),
|
|
120
|
+
timeout: actionTimeout,
|
|
121
|
+
debug: 'adblock'
|
|
122
|
+
})
|
|
123
|
+
]
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
module.exports = { enableBlockingInPage, runAutoConsent }
|
package/src/engine.bin
CHANGED
|
Binary file
|
package/src/index.js
CHANGED
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
'use strict'
|
|
2
2
|
|
|
3
|
-
const { PuppeteerBlocker } = require('@ghostery/adblocker-puppeteer')
|
|
4
3
|
const { shallowEqualObjects } = require('shallow-equal')
|
|
5
4
|
const { setTimeout } = require('node:timers/promises')
|
|
6
5
|
const createDevices = require('@browserless/devices')
|
|
@@ -8,25 +7,17 @@ const toughCookie = require('tough-cookie')
|
|
|
8
7
|
const pReflect = require('p-reflect')
|
|
9
8
|
const pTimeout = require('p-timeout')
|
|
10
9
|
const isUrl = require('is-url-http')
|
|
11
|
-
const path = require('path')
|
|
12
|
-
const fs = require('fs')
|
|
13
10
|
|
|
14
11
|
const { DEFAULT_INTERCEPT_RESOLUTION_PRIORITY } = require('puppeteer')
|
|
15
12
|
|
|
13
|
+
const adblock = require('./adblock')
|
|
14
|
+
|
|
16
15
|
const debug = require('debug-logfmt')('browserless:goto')
|
|
17
16
|
debug.continue = require('debug-logfmt')('browserless:goto:continue')
|
|
18
17
|
debug.abort = require('debug-logfmt')('browserless:goto:abort')
|
|
19
|
-
debug.adblock = require('debug-logfmt')('browserless:goto:adblock')
|
|
20
18
|
|
|
21
19
|
const truncate = (str, n = 80) => (str.length > n ? str.substr(0, n - 1) + '…' : str)
|
|
22
20
|
|
|
23
|
-
const engine = PuppeteerBlocker.deserialize(
|
|
24
|
-
new Uint8Array(fs.readFileSync(path.resolve(__dirname, './engine.bin')))
|
|
25
|
-
)
|
|
26
|
-
|
|
27
|
-
engine.on('request-blocked', ({ url }) => debug.adblock('block', url))
|
|
28
|
-
engine.on('request-redirected', ({ url }) => debug.adblock('redirect', url))
|
|
29
|
-
|
|
30
21
|
const isEmpty = val => val == null || !(Object.keys(val) || val).length
|
|
31
22
|
|
|
32
23
|
const castArray = value => [].concat(value).filter(Boolean)
|
|
@@ -39,15 +30,13 @@ const run = async ({ fn, timeout, debug: props }) => {
|
|
|
39
30
|
return result
|
|
40
31
|
}
|
|
41
32
|
|
|
42
|
-
const parseCookies = (url, str) =>
|
|
43
|
-
|
|
44
|
-
const jar = new toughCookie.CookieJar(undefined, { rejectPublicSuffixes: false })
|
|
45
|
-
jar.setCookieSync(cookieStr.trim(), url)
|
|
46
|
-
const parsedCookie = jar.serializeSync().cookies[0]
|
|
33
|
+
const parseCookies = (url, str) => {
|
|
34
|
+
const jar = new toughCookie.CookieJar(undefined, { rejectPublicSuffixes: false })
|
|
47
35
|
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
36
|
+
return str.split(';').reduce((acc, cookieStr) => {
|
|
37
|
+
const cookie = jar.setCookieSync(cookieStr.trim(), url)
|
|
38
|
+
if (!cookie) return acc
|
|
39
|
+
const parsedCookie = cookie.toJSON()
|
|
51
40
|
|
|
52
41
|
parsedCookie.name = parsedCookie.key
|
|
53
42
|
delete parsedCookie.key
|
|
@@ -59,6 +48,7 @@ const parseCookies = (url, str) =>
|
|
|
59
48
|
acc.push(parsedCookie)
|
|
60
49
|
return acc
|
|
61
50
|
}, [])
|
|
51
|
+
}
|
|
62
52
|
|
|
63
53
|
const getMediaFeatures = ({ animations, colorScheme }) => {
|
|
64
54
|
const prefers = []
|
|
@@ -181,7 +171,7 @@ module.exports = ({ defaultDevice = 'Macbook Pro 13', timeout: globalTimeout, ..
|
|
|
181
171
|
page,
|
|
182
172
|
{
|
|
183
173
|
abortTypes = [],
|
|
184
|
-
adblock = true,
|
|
174
|
+
adblock: withAdblock = true,
|
|
185
175
|
animations = false,
|
|
186
176
|
authenticate,
|
|
187
177
|
click,
|
|
@@ -268,33 +258,8 @@ module.exports = ({ defaultDevice = 'Macbook Pro 13', timeout: globalTimeout, ..
|
|
|
268
258
|
})
|
|
269
259
|
}
|
|
270
260
|
|
|
271
|
-
if (
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
page.disableAdblock = () => {
|
|
275
|
-
// TODO: drop this when https://github.com/ghostery/adblocker/pull/5161 is merged
|
|
276
|
-
|
|
277
|
-
engine.contexts.delete(page)
|
|
278
|
-
|
|
279
|
-
if (adblockContext.blocker.config.loadNetworkFilters) {
|
|
280
|
-
adblockContext.page.off('request', adblockContext.onRequest)
|
|
281
|
-
}
|
|
282
|
-
|
|
283
|
-
if (adblockContext.blocker.config.loadCosmeticFilters) {
|
|
284
|
-
adblockContext.page.off('frameattached', adblockContext.onFrameNavigated)
|
|
285
|
-
adblockContext.page.off('domcontentloaded', adblockContext.onDomContentLoaded)
|
|
286
|
-
}
|
|
287
|
-
|
|
288
|
-
debug.adblock('disabled')
|
|
289
|
-
}
|
|
290
|
-
|
|
291
|
-
prePromises.push(
|
|
292
|
-
run({
|
|
293
|
-
fn: engine.enableBlockingInPage(page).then(context => (adblockContext = context)),
|
|
294
|
-
timeout: actionTimeout,
|
|
295
|
-
debug: 'adblock'
|
|
296
|
-
})
|
|
297
|
-
)
|
|
261
|
+
if (withAdblock) {
|
|
262
|
+
prePromises.push(...adblock.enableBlockingInPage(page, run, actionTimeout))
|
|
298
263
|
}
|
|
299
264
|
|
|
300
265
|
if (javascript === false) {
|
|
@@ -398,6 +363,14 @@ module.exports = ({ defaultDevice = 'Macbook Pro 13', timeout: globalTimeout, ..
|
|
|
398
363
|
debug: { fn: html ? 'html' : 'url', waitUntil }
|
|
399
364
|
})
|
|
400
365
|
|
|
366
|
+
if (withAdblock) {
|
|
367
|
+
await run({
|
|
368
|
+
fn: adblock.runAutoConsent(page),
|
|
369
|
+
timeout: actionTimeout,
|
|
370
|
+
debug: 'autoconsent:run'
|
|
371
|
+
})
|
|
372
|
+
}
|
|
373
|
+
|
|
401
374
|
for (const [key, value] of Object.entries({
|
|
402
375
|
waitForSelector,
|
|
403
376
|
waitForFunction
|