@browserless/goto 10.10.1 → 10.11.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +2 -2
- package/src/adblock.js +3 -3
- package/src/index.js +224 -95
package/package.json
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
"name": "@browserless/goto",
|
|
3
3
|
"description": "Navigate to web pages with built-in ad blocking, device emulation, and optimized loading for faster automation.",
|
|
4
4
|
"homepage": "https://browserless.js.org/#/?id=gotopage-options",
|
|
5
|
-
"version": "10.
|
|
5
|
+
"version": "10.11.0",
|
|
6
6
|
"main": "src/index.js",
|
|
7
7
|
"author": {
|
|
8
8
|
"email": "hello@microlink.io",
|
|
@@ -66,5 +66,5 @@
|
|
|
66
66
|
"timeout": "2m",
|
|
67
67
|
"workerThreads": false
|
|
68
68
|
},
|
|
69
|
-
"gitHead": "
|
|
69
|
+
"gitHead": "e6bd03bae91b08baaa6f1c0c0c9743f6c7de3d29"
|
|
70
70
|
}
|
package/src/adblock.js
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
'use strict'
|
|
2
2
|
|
|
3
3
|
const { PuppeteerBlocker } = require('@ghostery/adblocker-puppeteer')
|
|
4
|
+
const fs = require('fs/promises')
|
|
4
5
|
const path = require('path')
|
|
5
|
-
const fs = require('fs')
|
|
6
6
|
|
|
7
7
|
const debug = require('debug-logfmt')('browserless:goto:adblock')
|
|
8
8
|
|
|
@@ -11,7 +11,7 @@ let enginePromise
|
|
|
11
11
|
const getEngine = () => {
|
|
12
12
|
if (enginePromise) return enginePromise
|
|
13
13
|
|
|
14
|
-
enginePromise = fs.
|
|
14
|
+
enginePromise = fs.readFile(path.resolve(__dirname, './engine.bin')).then(buffer => {
|
|
15
15
|
const engine = PuppeteerBlocker.deserialize(new Uint8Array(buffer))
|
|
16
16
|
engine.on('request-blocked', ({ url }) => debug('block', url))
|
|
17
17
|
engine.on('request-redirected', ({ url }) => debug('redirect', url))
|
|
@@ -31,7 +31,7 @@ let autoconsentPlaywrightScriptPromise
|
|
|
31
31
|
const getAutoconsentPlaywrightScript = () => {
|
|
32
32
|
if (autoconsentPlaywrightScriptPromise) return autoconsentPlaywrightScriptPromise
|
|
33
33
|
|
|
34
|
-
autoconsentPlaywrightScriptPromise = fs.
|
|
34
|
+
autoconsentPlaywrightScriptPromise = fs.readFile(
|
|
35
35
|
path.resolve(
|
|
36
36
|
path.dirname(require.resolve('@duckduckgo/autoconsent')),
|
|
37
37
|
'autoconsent.playwright.js'
|
package/src/index.js
CHANGED
|
@@ -22,15 +22,15 @@ const isEmpty = val => val == null || !(Object.keys(val) || val).length
|
|
|
22
22
|
|
|
23
23
|
const castArray = value => [].concat(value).filter(Boolean)
|
|
24
24
|
|
|
25
|
-
const
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
return
|
|
25
|
+
const getDefaultPath = pathname => {
|
|
26
|
+
if (!pathname || pathname[0] !== '/') return '/'
|
|
27
|
+
if (pathname === '/') return '/'
|
|
28
|
+
|
|
29
|
+
const rightSlash = pathname.lastIndexOf('/')
|
|
30
|
+
return rightSlash === 0 ? '/' : pathname.slice(0, rightSlash)
|
|
31
31
|
}
|
|
32
32
|
|
|
33
|
-
const
|
|
33
|
+
const parseCookiesWithJar = (url, str) => {
|
|
34
34
|
const jar = new toughCookie.CookieJar(undefined, { rejectPublicSuffixes: false })
|
|
35
35
|
|
|
36
36
|
return str.split(';').reduce((acc, cookieStr) => {
|
|
@@ -50,6 +50,81 @@ const parseCookies = (url, str) => {
|
|
|
50
50
|
}, [])
|
|
51
51
|
}
|
|
52
52
|
|
|
53
|
+
const run = async ({ fn, timeout, debug: props }) => {
|
|
54
|
+
const duration = debug.duration()
|
|
55
|
+
const result = await pReflect(timeout ? pTimeout(fn, timeout) : fn)
|
|
56
|
+
const errorProps = result.isRejected ? { error: result.reason.message || result.reason } : {}
|
|
57
|
+
duration(props, errorProps)
|
|
58
|
+
return result
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
const stopLoadingOnTimeout = (page, timeout) => {
|
|
62
|
+
let timeoutId
|
|
63
|
+
|
|
64
|
+
return {
|
|
65
|
+
promise: new Promise(resolve => {
|
|
66
|
+
timeoutId = globalThis.setTimeout(() => {
|
|
67
|
+
pReflect(page._client().send('Page.stopLoading')).then(resolve)
|
|
68
|
+
}, timeout)
|
|
69
|
+
|
|
70
|
+
if (typeof timeoutId.unref === 'function') timeoutId.unref()
|
|
71
|
+
}),
|
|
72
|
+
clear: () => {
|
|
73
|
+
if (timeoutId) clearTimeout(timeoutId)
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
const parseCookies = (url, str) => {
|
|
79
|
+
let parsedURL
|
|
80
|
+
|
|
81
|
+
try {
|
|
82
|
+
parsedURL = new URL(url)
|
|
83
|
+
} catch {
|
|
84
|
+
return parseCookiesWithJar(url, str)
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
const domain = parsedURL.hostname
|
|
88
|
+
|
|
89
|
+
if (!domain) {
|
|
90
|
+
return parseCookiesWithJar(url, str)
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
const path = getDefaultPath(parsedURL.pathname)
|
|
94
|
+
const chunks = str.split(';')
|
|
95
|
+
const cookies = new Array(chunks.length)
|
|
96
|
+
let index = 0
|
|
97
|
+
|
|
98
|
+
for (const chunk of chunks) {
|
|
99
|
+
const cookieStr = chunk.trim()
|
|
100
|
+
|
|
101
|
+
if (cookieStr.length === 0) {
|
|
102
|
+
return parseCookiesWithJar(url, str)
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
const separatorIndex = cookieStr.indexOf('=')
|
|
106
|
+
|
|
107
|
+
if (separatorIndex === -1) {
|
|
108
|
+
return parseCookiesWithJar(url, str)
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
const name = cookieStr.slice(0, separatorIndex).trim()
|
|
112
|
+
|
|
113
|
+
if (name.length === 0) {
|
|
114
|
+
return parseCookiesWithJar(url, str)
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
cookies[index++] = {
|
|
118
|
+
name,
|
|
119
|
+
value: cookieStr.slice(separatorIndex + 1).trim(),
|
|
120
|
+
domain,
|
|
121
|
+
path
|
|
122
|
+
}
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
return cookies
|
|
126
|
+
}
|
|
127
|
+
|
|
53
128
|
const getMediaFeatures = ({ animations, colorScheme }) => {
|
|
54
129
|
const prefers = []
|
|
55
130
|
if (animations === false) prefers.push({ name: 'prefers-reduced-motion', value: 'reduce' })
|
|
@@ -225,37 +300,51 @@ module.exports = ({ defaultDevice = 'Macbook Pro 13', timeout: globalTimeout, ..
|
|
|
225
300
|
)
|
|
226
301
|
}
|
|
227
302
|
|
|
228
|
-
const
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
})
|
|
303
|
+
const abortTypesSet = abortTypes.length > 0 ? new Set(abortTypes) : null
|
|
304
|
+
|
|
305
|
+
const requestHandlers = []
|
|
306
|
+
let abortTypesHandler
|
|
307
|
+
let disableInterceptionForAbortTypes = false
|
|
234
308
|
|
|
235
309
|
if (onPageRequest) {
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
)
|
|
310
|
+
const onPageRequestHandler = req => onPageRequest(req, page)
|
|
311
|
+
page.on('request', onPageRequestHandler)
|
|
312
|
+
requestHandlers.push(onPageRequestHandler)
|
|
239
313
|
}
|
|
240
314
|
|
|
241
315
|
if (abortTypes.length > 0) {
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
316
|
+
abortTypesHandler = req => {
|
|
317
|
+
if (req.isInterceptResolutionHandled()) return
|
|
318
|
+
const resourceType = req.resourceType()
|
|
319
|
+
const url = truncate(req.url())
|
|
320
|
+
|
|
321
|
+
if (!abortTypesSet.has(resourceType)) {
|
|
322
|
+
debug.continue({ url, resourceType })
|
|
323
|
+
return req.continue(req.continueRequestOverrides(), DEFAULT_INTERCEPT_RESOLUTION_PRIORITY)
|
|
324
|
+
}
|
|
325
|
+
debug.abort({ url, resourceType })
|
|
326
|
+
return req.abort('blockedbyclient', DEFAULT_INTERCEPT_RESOLUTION_PRIORITY)
|
|
327
|
+
}
|
|
328
|
+
|
|
329
|
+
page.on('request', abortTypesHandler)
|
|
330
|
+
requestHandlers.push(abortTypesHandler)
|
|
331
|
+
}
|
|
332
|
+
|
|
333
|
+
if (requestHandlers.length > 0) {
|
|
334
|
+
prePromises.push(
|
|
335
|
+
run({
|
|
336
|
+
fn: page.setRequestInterception(true),
|
|
337
|
+
debug: 'enableInterception'
|
|
338
|
+
}).then(result => {
|
|
339
|
+
// If interception setup fails, remove handlers to avoid keeping dead listeners.
|
|
340
|
+
if (result.isRejected) {
|
|
341
|
+
requestHandlers.forEach(handler => page.off('request', handler))
|
|
342
|
+
} else if (abortTypesHandler && !withAdblock && !onPageRequest) {
|
|
343
|
+
disableInterceptionForAbortTypes = true
|
|
254
344
|
}
|
|
255
|
-
|
|
256
|
-
return req.abort('blockedbyclient', DEFAULT_INTERCEPT_RESOLUTION_PRIORITY)
|
|
345
|
+
return result
|
|
257
346
|
})
|
|
258
|
-
|
|
347
|
+
)
|
|
259
348
|
}
|
|
260
349
|
|
|
261
350
|
if (withAdblock) {
|
|
@@ -272,7 +361,11 @@ module.exports = ({ defaultDevice = 'Macbook Pro 13', timeout: globalTimeout, ..
|
|
|
272
361
|
)
|
|
273
362
|
}
|
|
274
363
|
|
|
275
|
-
const device = getDevice({
|
|
364
|
+
const device = getDevice({
|
|
365
|
+
headers,
|
|
366
|
+
device: args.device ?? defaultDevice,
|
|
367
|
+
viewport: args.viewport
|
|
368
|
+
})
|
|
276
369
|
|
|
277
370
|
if (device.userAgent && !headers['user-agent']) {
|
|
278
371
|
headers['user-agent'] = device.userAgent
|
|
@@ -291,10 +384,11 @@ module.exports = ({ defaultDevice = 'Macbook Pro 13', timeout: globalTimeout, ..
|
|
|
291
384
|
const headersKeys = Object.keys(headers)
|
|
292
385
|
|
|
293
386
|
if (headersKeys.length > 0) {
|
|
294
|
-
const
|
|
387
|
+
const cookie = headers.cookie
|
|
388
|
+
const userAgent = headers['user-agent']
|
|
295
389
|
|
|
296
|
-
if (
|
|
297
|
-
const cookies = parseCookies(url,
|
|
390
|
+
if (cookie) {
|
|
391
|
+
const cookies = parseCookies(url, cookie)
|
|
298
392
|
prePromises.push(
|
|
299
393
|
run({
|
|
300
394
|
fn: page.setCookie(...cookies),
|
|
@@ -304,25 +398,41 @@ module.exports = ({ defaultDevice = 'Macbook Pro 13', timeout: globalTimeout, ..
|
|
|
304
398
|
)
|
|
305
399
|
}
|
|
306
400
|
|
|
307
|
-
|
|
308
|
-
const extraHTTPHeadersKeys = Object.keys(extraHTTPHeaders)
|
|
309
|
-
|
|
310
|
-
if (headers['user-agent']) {
|
|
401
|
+
if (userAgent) {
|
|
311
402
|
prePromises.push(
|
|
312
403
|
run({
|
|
313
|
-
fn: page.setUserAgent(
|
|
404
|
+
fn: page.setUserAgent(userAgent),
|
|
314
405
|
timeout: actionTimeout,
|
|
315
|
-
debug: { 'user-agent':
|
|
406
|
+
debug: { 'user-agent': userAgent }
|
|
316
407
|
})
|
|
317
408
|
)
|
|
318
409
|
}
|
|
319
410
|
|
|
320
|
-
if (
|
|
411
|
+
if (cookie) {
|
|
412
|
+
const extraHTTPHeaders = {}
|
|
413
|
+
const extraHTTPHeadersKeys = []
|
|
414
|
+
|
|
415
|
+
for (const key of headersKeys) {
|
|
416
|
+
if (key === 'cookie') continue
|
|
417
|
+
extraHTTPHeaders[key] = headers[key]
|
|
418
|
+
extraHTTPHeadersKeys.push(key)
|
|
419
|
+
}
|
|
420
|
+
|
|
421
|
+
if (extraHTTPHeadersKeys.length > 0) {
|
|
422
|
+
prePromises.push(
|
|
423
|
+
run({
|
|
424
|
+
fn: page.setExtraHTTPHeaders(extraHTTPHeaders),
|
|
425
|
+
timeout: actionTimeout,
|
|
426
|
+
debug: { headers: extraHTTPHeadersKeys }
|
|
427
|
+
})
|
|
428
|
+
)
|
|
429
|
+
}
|
|
430
|
+
} else if (!(userAgent && headersKeys.length === 1)) {
|
|
321
431
|
prePromises.push(
|
|
322
432
|
run({
|
|
323
|
-
fn: page.setExtraHTTPHeaders(
|
|
433
|
+
fn: page.setExtraHTTPHeaders(headers),
|
|
324
434
|
timeout: actionTimeout,
|
|
325
|
-
debug: { headers:
|
|
435
|
+
debug: { headers: headersKeys }
|
|
326
436
|
})
|
|
327
437
|
)
|
|
328
438
|
}
|
|
@@ -350,72 +460,91 @@ module.exports = ({ defaultDevice = 'Macbook Pro 13', timeout: globalTimeout, ..
|
|
|
350
460
|
)
|
|
351
461
|
}
|
|
352
462
|
|
|
353
|
-
|
|
463
|
+
try {
|
|
464
|
+
await Promise.all(prePromises)
|
|
354
465
|
|
|
355
|
-
|
|
356
|
-
|
|
466
|
+
let clearStopLoadingTimer = () => {}
|
|
467
|
+
const navigationPromise = html
|
|
357
468
|
? page.setContent(html, { waitUntil, ...args })
|
|
358
|
-
:
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
fn: adblock.runAutoConsent(page),
|
|
369
|
-
timeout: actionTimeout,
|
|
370
|
-
debug: 'autoconsent:run'
|
|
469
|
+
: (() => {
|
|
470
|
+
const { promise, clear } = stopLoadingOnTimeout(page, gotoTimeout)
|
|
471
|
+
clearStopLoadingTimer = clear
|
|
472
|
+
return Promise.race([page.goto(url, { waitUntil, ...args }), promise])
|
|
473
|
+
})()
|
|
474
|
+
|
|
475
|
+
const { value: response, reason: error } = await run({
|
|
476
|
+
fn: navigationPromise,
|
|
477
|
+
timeout: gotoTimeout,
|
|
478
|
+
debug: { fn: html ? 'html' : 'url', waitUntil }
|
|
371
479
|
})
|
|
372
|
-
|
|
480
|
+
clearStopLoadingTimer()
|
|
373
481
|
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
482
|
+
if (withAdblock) {
|
|
483
|
+
await run({
|
|
484
|
+
fn: adblock.runAutoConsent(page),
|
|
485
|
+
timeout: actionTimeout,
|
|
486
|
+
debug: 'autoconsent:run'
|
|
487
|
+
})
|
|
380
488
|
}
|
|
381
|
-
}
|
|
382
489
|
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
animations,
|
|
391
|
-
modules,
|
|
392
|
-
scripts,
|
|
393
|
-
styles
|
|
394
|
-
})
|
|
490
|
+
if (waitForSelector) {
|
|
491
|
+
await run({
|
|
492
|
+
fn: page.waitForSelector(waitForSelector),
|
|
493
|
+
timeout: gotoTimeout,
|
|
494
|
+
debug: { waitForSelector }
|
|
495
|
+
})
|
|
496
|
+
}
|
|
395
497
|
|
|
396
|
-
|
|
397
|
-
for (const selector of castArray(click)) {
|
|
498
|
+
if (waitForFunction) {
|
|
398
499
|
await run({
|
|
399
|
-
fn: page.
|
|
400
|
-
timeout:
|
|
401
|
-
debug: {
|
|
500
|
+
fn: page.waitForFunction(waitForFunction),
|
|
501
|
+
timeout: gotoTimeout,
|
|
502
|
+
debug: { waitForFunction }
|
|
402
503
|
})
|
|
403
504
|
}
|
|
404
|
-
}
|
|
405
505
|
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
506
|
+
if (waitForTimeout) {
|
|
507
|
+
await setTimeout(waitForTimeout)
|
|
508
|
+
}
|
|
509
|
+
|
|
510
|
+
await inject(page, {
|
|
409
511
|
timeout: actionTimeout,
|
|
410
|
-
|
|
512
|
+
mediaType,
|
|
513
|
+
animations,
|
|
514
|
+
modules,
|
|
515
|
+
scripts,
|
|
516
|
+
styles
|
|
411
517
|
})
|
|
412
|
-
}
|
|
413
518
|
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
519
|
+
if (click) {
|
|
520
|
+
for (const selector of castArray(click)) {
|
|
521
|
+
await run({
|
|
522
|
+
fn: page.click(selector),
|
|
523
|
+
timeout: actionTimeout,
|
|
524
|
+
debug: { click: selector }
|
|
525
|
+
})
|
|
526
|
+
}
|
|
527
|
+
}
|
|
528
|
+
|
|
529
|
+
if (scroll) {
|
|
530
|
+
await run({
|
|
531
|
+
fn: page.$eval(scroll, el => el.scrollIntoView()),
|
|
532
|
+
timeout: actionTimeout,
|
|
533
|
+
debug: { scroll }
|
|
534
|
+
})
|
|
535
|
+
}
|
|
536
|
+
|
|
537
|
+
if (isWaitUntilAuto) {
|
|
538
|
+
await waitUntilAuto(page, { response, timeout: actionTimeout * 2 })
|
|
539
|
+
}
|
|
417
540
|
|
|
418
|
-
|
|
541
|
+
return { response, device, error }
|
|
542
|
+
} finally {
|
|
543
|
+
if (abortTypesHandler) page.off('request', abortTypesHandler)
|
|
544
|
+
if (disableInterceptionForAbortTypes) {
|
|
545
|
+
await pReflect(page.setRequestInterception(false))
|
|
546
|
+
}
|
|
547
|
+
}
|
|
419
548
|
}
|
|
420
549
|
|
|
421
550
|
goto.getDevice = getDevice
|