@browserless/goto 10.9.18 → 10.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -0
- package/package.json +3 -2
- package/src/adblock.js +109 -0
- package/src/index.js +13 -39
package/README.md
CHANGED
|
@@ -113,6 +113,8 @@ await goto(page, { url: 'https://example.com', adblock: false })
|
|
|
113
113
|
page.disableAdblock()
|
|
114
114
|
```
|
|
115
115
|
|
|
116
|
+
Cookie consent handling (opt-out) is included as part of `adblock: true` and powered by [duckduckgo/autoconsent](https://github.com/duckduckgo/autoconsent).
|
|
117
|
+
|
|
116
118
|
### Script and style injection
|
|
117
119
|
|
|
118
120
|
Inject external resources or inline code:
|
package/package.json
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
"name": "@browserless/goto",
|
|
3
3
|
"description": "Navigate to web pages with built-in ad blocking, device emulation, and optimized loading for faster automation.",
|
|
4
4
|
"homepage": "https://browserless.js.org/#/?id=gotopage-options",
|
|
5
|
-
"version": "10.
|
|
5
|
+
"version": "10.10.0",
|
|
6
6
|
"main": "src/index.js",
|
|
7
7
|
"author": {
|
|
8
8
|
"email": "hello@microlink.io",
|
|
@@ -31,6 +31,7 @@
|
|
|
31
31
|
],
|
|
32
32
|
"dependencies": {
|
|
33
33
|
"@browserless/devices": "^10.9.18",
|
|
34
|
+
"@duckduckgo/autoconsent": "~14.53.0",
|
|
34
35
|
"@ghostery/adblocker-puppeteer": "~2.13.4",
|
|
35
36
|
"debug-logfmt": "~1.4.7",
|
|
36
37
|
"got": "~11.8.6",
|
|
@@ -65,5 +66,5 @@
|
|
|
65
66
|
"timeout": "2m",
|
|
66
67
|
"workerThreads": false
|
|
67
68
|
},
|
|
68
|
-
"gitHead": "
|
|
69
|
+
"gitHead": "9b80e677418f2defb804d39043680fe65e3e277b"
|
|
69
70
|
}
|
package/src/adblock.js
ADDED
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
'use strict'
|
|
2
|
+
|
|
3
|
+
const { PuppeteerBlocker } = require('@ghostery/adblocker-puppeteer')
|
|
4
|
+
const path = require('path')
|
|
5
|
+
const fs = require('fs')
|
|
6
|
+
|
|
7
|
+
const debug = require('debug-logfmt')('browserless:goto:adblock')
|
|
8
|
+
|
|
9
|
+
const engine = PuppeteerBlocker.deserialize(
|
|
10
|
+
new Uint8Array(fs.readFileSync(path.resolve(__dirname, './engine.bin')))
|
|
11
|
+
)
|
|
12
|
+
|
|
13
|
+
engine.on('request-blocked', ({ url }) => debug('block', url))
|
|
14
|
+
engine.on('request-redirected', ({ url }) => debug('redirect', url))
|
|
15
|
+
|
|
16
|
+
/**
|
|
17
|
+
* autoconsent.playwright.js is the only browser-injectable IIFE bundle in the package.
|
|
18
|
+
* It is not in the package's "exports" map, so pin @duckduckgo/autoconsent with ~ to
|
|
19
|
+
* avoid breakage from internal restructuring on minor/patch bumps.
|
|
20
|
+
*/
|
|
21
|
+
const autoconsentPlaywrightScript = fs.readFileSync(
|
|
22
|
+
path.resolve(
|
|
23
|
+
path.dirname(require.resolve('@duckduckgo/autoconsent')),
|
|
24
|
+
'autoconsent.playwright.js'
|
|
25
|
+
),
|
|
26
|
+
'utf8'
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
/* Configuration passed to autoconsent's `initResp` message.
|
|
30
|
+
See https://github.com/duckduckgo/autoconsent/blob/main/api.md */
|
|
31
|
+
const autoconsentConfig = Object.freeze({
|
|
32
|
+
/* activate consent rule matching */
|
|
33
|
+
enabled: true,
|
|
34
|
+
/* automatically reject (opt-out) all cookies */
|
|
35
|
+
autoAction: 'optOut',
|
|
36
|
+
/* hide banners early via CSS before detection finishes */
|
|
37
|
+
enablePrehide: true,
|
|
38
|
+
/* apply CSS-only rules that hide popups lacking a reject button */
|
|
39
|
+
enableCosmeticRules: true,
|
|
40
|
+
/* skip bundled ABP/uBO cosmetic filter list (saves bundle size) */
|
|
41
|
+
enableFilterList: false,
|
|
42
|
+
/* how many times to retry CMP detection (~50 ms apart) */
|
|
43
|
+
detectRetries: 20,
|
|
44
|
+
logs: {
|
|
45
|
+
/* CMP detection / opt-out lifecycle events */
|
|
46
|
+
lifecycle: false,
|
|
47
|
+
/* individual rule step execution */
|
|
48
|
+
rulesteps: false,
|
|
49
|
+
/* eval snippet calls */
|
|
50
|
+
evals: false,
|
|
51
|
+
/* rule errors */
|
|
52
|
+
errors: false,
|
|
53
|
+
/* background ↔ content-script messages */
|
|
54
|
+
messages: false
|
|
55
|
+
}
|
|
56
|
+
})
|
|
57
|
+
|
|
58
|
+
const sendMessage = (page, message) =>
|
|
59
|
+
page
|
|
60
|
+
.evaluate(msg => {
|
|
61
|
+
if (window.autoconsentReceiveMessage) {
|
|
62
|
+
return window.autoconsentReceiveMessage(msg)
|
|
63
|
+
}
|
|
64
|
+
}, message)
|
|
65
|
+
.catch(() => {})
|
|
66
|
+
|
|
67
|
+
const setupAutoConsent = async page => {
|
|
68
|
+
if (page._autoconsentSetup) return
|
|
69
|
+
|
|
70
|
+
await page.exposeFunction('autoconsentSendMessage', async message => {
|
|
71
|
+
if (!message || typeof message !== 'object') return
|
|
72
|
+
|
|
73
|
+
if (message.type === 'init') {
|
|
74
|
+
return sendMessage(page, { type: 'initResp', config: autoconsentConfig })
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
if (message.type === 'eval') {
|
|
78
|
+
return sendMessage(page, { type: 'evalResp', id: message.id, result: false })
|
|
79
|
+
}
|
|
80
|
+
})
|
|
81
|
+
|
|
82
|
+
await page.evaluateOnNewDocument(autoconsentPlaywrightScript)
|
|
83
|
+
page._autoconsentSetup = true
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
const runAutoConsent = page => page.evaluate(autoconsentPlaywrightScript)
|
|
87
|
+
|
|
88
|
+
const enableBlockingInPage = (page, run, actionTimeout) => {
|
|
89
|
+
page.disableAdblock = () =>
|
|
90
|
+
engine
|
|
91
|
+
.disableBlockingInPage(page, { keepRequestInterception: true })
|
|
92
|
+
.then(() => debug('disabled'))
|
|
93
|
+
.catch(() => {})
|
|
94
|
+
|
|
95
|
+
return [
|
|
96
|
+
run({
|
|
97
|
+
fn: setupAutoConsent(page),
|
|
98
|
+
timeout: actionTimeout,
|
|
99
|
+
debug: 'autoconsent:setup'
|
|
100
|
+
}),
|
|
101
|
+
run({
|
|
102
|
+
fn: engine.enableBlockingInPage(page),
|
|
103
|
+
timeout: actionTimeout,
|
|
104
|
+
debug: 'adblock'
|
|
105
|
+
})
|
|
106
|
+
]
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
module.exports = { enableBlockingInPage, runAutoConsent }
|
package/src/index.js
CHANGED
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
'use strict'
|
|
2
2
|
|
|
3
|
-
const { PuppeteerBlocker } = require('@ghostery/adblocker-puppeteer')
|
|
4
3
|
const { shallowEqualObjects } = require('shallow-equal')
|
|
5
4
|
const { setTimeout } = require('node:timers/promises')
|
|
6
5
|
const createDevices = require('@browserless/devices')
|
|
@@ -8,25 +7,17 @@ const toughCookie = require('tough-cookie')
|
|
|
8
7
|
const pReflect = require('p-reflect')
|
|
9
8
|
const pTimeout = require('p-timeout')
|
|
10
9
|
const isUrl = require('is-url-http')
|
|
11
|
-
const path = require('path')
|
|
12
|
-
const fs = require('fs')
|
|
13
10
|
|
|
14
11
|
const { DEFAULT_INTERCEPT_RESOLUTION_PRIORITY } = require('puppeteer')
|
|
15
12
|
|
|
13
|
+
const adblock = require('./adblock')
|
|
14
|
+
|
|
16
15
|
const debug = require('debug-logfmt')('browserless:goto')
|
|
17
16
|
debug.continue = require('debug-logfmt')('browserless:goto:continue')
|
|
18
17
|
debug.abort = require('debug-logfmt')('browserless:goto:abort')
|
|
19
|
-
debug.adblock = require('debug-logfmt')('browserless:goto:adblock')
|
|
20
18
|
|
|
21
19
|
const truncate = (str, n = 80) => (str.length > n ? str.substr(0, n - 1) + '…' : str)
|
|
22
20
|
|
|
23
|
-
const engine = PuppeteerBlocker.deserialize(
|
|
24
|
-
new Uint8Array(fs.readFileSync(path.resolve(__dirname, './engine.bin')))
|
|
25
|
-
)
|
|
26
|
-
|
|
27
|
-
engine.on('request-blocked', ({ url }) => debug.adblock('block', url))
|
|
28
|
-
engine.on('request-redirected', ({ url }) => debug.adblock('redirect', url))
|
|
29
|
-
|
|
30
21
|
const isEmpty = val => val == null || !(Object.keys(val) || val).length
|
|
31
22
|
|
|
32
23
|
const castArray = value => [].concat(value).filter(Boolean)
|
|
@@ -181,7 +172,7 @@ module.exports = ({ defaultDevice = 'Macbook Pro 13', timeout: globalTimeout, ..
|
|
|
181
172
|
page,
|
|
182
173
|
{
|
|
183
174
|
abortTypes = [],
|
|
184
|
-
adblock = true,
|
|
175
|
+
adblock: withAdblock = true,
|
|
185
176
|
animations = false,
|
|
186
177
|
authenticate,
|
|
187
178
|
click,
|
|
@@ -268,33 +259,8 @@ module.exports = ({ defaultDevice = 'Macbook Pro 13', timeout: globalTimeout, ..
|
|
|
268
259
|
})
|
|
269
260
|
}
|
|
270
261
|
|
|
271
|
-
if (
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
page.disableAdblock = () => {
|
|
275
|
-
// TODO: drop this when https://github.com/ghostery/adblocker/pull/5161 is merged
|
|
276
|
-
|
|
277
|
-
engine.contexts.delete(page)
|
|
278
|
-
|
|
279
|
-
if (adblockContext.blocker.config.loadNetworkFilters) {
|
|
280
|
-
adblockContext.page.off('request', adblockContext.onRequest)
|
|
281
|
-
}
|
|
282
|
-
|
|
283
|
-
if (adblockContext.blocker.config.loadCosmeticFilters) {
|
|
284
|
-
adblockContext.page.off('frameattached', adblockContext.onFrameNavigated)
|
|
285
|
-
adblockContext.page.off('domcontentloaded', adblockContext.onDomContentLoaded)
|
|
286
|
-
}
|
|
287
|
-
|
|
288
|
-
debug.adblock('disabled')
|
|
289
|
-
}
|
|
290
|
-
|
|
291
|
-
prePromises.push(
|
|
292
|
-
run({
|
|
293
|
-
fn: engine.enableBlockingInPage(page).then(context => (adblockContext = context)),
|
|
294
|
-
timeout: actionTimeout,
|
|
295
|
-
debug: 'adblock'
|
|
296
|
-
})
|
|
297
|
-
)
|
|
262
|
+
if (withAdblock) {
|
|
263
|
+
prePromises.push(...adblock.enableBlockingInPage(page, run, actionTimeout))
|
|
298
264
|
}
|
|
299
265
|
|
|
300
266
|
if (javascript === false) {
|
|
@@ -398,6 +364,14 @@ module.exports = ({ defaultDevice = 'Macbook Pro 13', timeout: globalTimeout, ..
|
|
|
398
364
|
debug: { fn: html ? 'html' : 'url', waitUntil }
|
|
399
365
|
})
|
|
400
366
|
|
|
367
|
+
if (withAdblock) {
|
|
368
|
+
await run({
|
|
369
|
+
fn: adblock.runAutoConsent(page),
|
|
370
|
+
timeout: actionTimeout,
|
|
371
|
+
debug: 'autoconsent:run'
|
|
372
|
+
})
|
|
373
|
+
}
|
|
374
|
+
|
|
401
375
|
for (const [key, value] of Object.entries({
|
|
402
376
|
waitForSelector,
|
|
403
377
|
waitForFunction
|