@browserless/goto 10.10.1 → 10.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -2,7 +2,7 @@
2
2
  "name": "@browserless/goto",
3
3
  "description": "Navigate to web pages with built-in ad blocking, device emulation, and optimized loading for faster automation.",
4
4
  "homepage": "https://browserless.js.org/#/?id=gotopage-options",
5
- "version": "10.10.1",
5
+ "version": "10.11.0",
6
6
  "main": "src/index.js",
7
7
  "author": {
8
8
  "email": "hello@microlink.io",
@@ -66,5 +66,5 @@
66
66
  "timeout": "2m",
67
67
  "workerThreads": false
68
68
  },
69
- "gitHead": "32f6e72bcb489a83ac9659520a3961aeb97c47b7"
69
+ "gitHead": "e6bd03bae91b08baaa6f1c0c0c9743f6c7de3d29"
70
70
  }
package/src/adblock.js CHANGED
@@ -1,8 +1,8 @@
1
1
  'use strict'
2
2
 
3
3
  const { PuppeteerBlocker } = require('@ghostery/adblocker-puppeteer')
4
+ const fs = require('fs/promises')
4
5
  const path = require('path')
5
- const fs = require('fs')
6
6
 
7
7
  const debug = require('debug-logfmt')('browserless:goto:adblock')
8
8
 
@@ -11,7 +11,7 @@ let enginePromise
11
11
  const getEngine = () => {
12
12
  if (enginePromise) return enginePromise
13
13
 
14
- enginePromise = fs.promises.readFile(path.resolve(__dirname, './engine.bin')).then(buffer => {
14
+ enginePromise = fs.readFile(path.resolve(__dirname, './engine.bin')).then(buffer => {
15
15
  const engine = PuppeteerBlocker.deserialize(new Uint8Array(buffer))
16
16
  engine.on('request-blocked', ({ url }) => debug('block', url))
17
17
  engine.on('request-redirected', ({ url }) => debug('redirect', url))
@@ -31,7 +31,7 @@ let autoconsentPlaywrightScriptPromise
31
31
  const getAutoconsentPlaywrightScript = () => {
32
32
  if (autoconsentPlaywrightScriptPromise) return autoconsentPlaywrightScriptPromise
33
33
 
34
- autoconsentPlaywrightScriptPromise = fs.promises.readFile(
34
+ autoconsentPlaywrightScriptPromise = fs.readFile(
35
35
  path.resolve(
36
36
  path.dirname(require.resolve('@duckduckgo/autoconsent')),
37
37
  'autoconsent.playwright.js'
package/src/index.js CHANGED
@@ -22,15 +22,15 @@ const isEmpty = val => val == null || !(Object.keys(val) || val).length
22
22
 
23
23
  const castArray = value => [].concat(value).filter(Boolean)
24
24
 
25
- const run = async ({ fn, timeout, debug: props }) => {
26
- const duration = debug.duration()
27
- const result = await pReflect(timeout ? pTimeout(fn, timeout) : fn)
28
- const errorProps = result.isRejected ? { error: result.reason.message || result.reason } : {}
29
- duration(props, errorProps)
30
- return result
25
+ const getDefaultPath = pathname => {
26
+ if (!pathname || pathname[0] !== '/') return '/'
27
+ if (pathname === '/') return '/'
28
+
29
+ const rightSlash = pathname.lastIndexOf('/')
30
+ return rightSlash === 0 ? '/' : pathname.slice(0, rightSlash)
31
31
  }
32
32
 
33
- const parseCookies = (url, str) => {
33
+ const parseCookiesWithJar = (url, str) => {
34
34
  const jar = new toughCookie.CookieJar(undefined, { rejectPublicSuffixes: false })
35
35
 
36
36
  return str.split(';').reduce((acc, cookieStr) => {
@@ -50,6 +50,81 @@ const parseCookies = (url, str) => {
50
50
  }, [])
51
51
  }
52
52
 
53
+ const run = async ({ fn, timeout, debug: props }) => {
54
+ const duration = debug.duration()
55
+ const result = await pReflect(timeout ? pTimeout(fn, timeout) : fn)
56
+ const errorProps = result.isRejected ? { error: result.reason.message || result.reason } : {}
57
+ duration(props, errorProps)
58
+ return result
59
+ }
60
+
61
+ const stopLoadingOnTimeout = (page, timeout) => {
62
+ let timeoutId
63
+
64
+ return {
65
+ promise: new Promise(resolve => {
66
+ timeoutId = globalThis.setTimeout(() => {
67
+ pReflect(page._client().send('Page.stopLoading')).then(resolve)
68
+ }, timeout)
69
+
70
+ if (typeof timeoutId.unref === 'function') timeoutId.unref()
71
+ }),
72
+ clear: () => {
73
+ if (timeoutId) clearTimeout(timeoutId)
74
+ }
75
+ }
76
+ }
77
+
78
+ const parseCookies = (url, str) => {
79
+ let parsedURL
80
+
81
+ try {
82
+ parsedURL = new URL(url)
83
+ } catch {
84
+ return parseCookiesWithJar(url, str)
85
+ }
86
+
87
+ const domain = parsedURL.hostname
88
+
89
+ if (!domain) {
90
+ return parseCookiesWithJar(url, str)
91
+ }
92
+
93
+ const path = getDefaultPath(parsedURL.pathname)
94
+ const chunks = str.split(';')
95
+ const cookies = new Array(chunks.length)
96
+ let index = 0
97
+
98
+ for (const chunk of chunks) {
99
+ const cookieStr = chunk.trim()
100
+
101
+ if (cookieStr.length === 0) {
102
+ return parseCookiesWithJar(url, str)
103
+ }
104
+
105
+ const separatorIndex = cookieStr.indexOf('=')
106
+
107
+ if (separatorIndex === -1) {
108
+ return parseCookiesWithJar(url, str)
109
+ }
110
+
111
+ const name = cookieStr.slice(0, separatorIndex).trim()
112
+
113
+ if (name.length === 0) {
114
+ return parseCookiesWithJar(url, str)
115
+ }
116
+
117
+ cookies[index++] = {
118
+ name,
119
+ value: cookieStr.slice(separatorIndex + 1).trim(),
120
+ domain,
121
+ path
122
+ }
123
+ }
124
+
125
+ return cookies
126
+ }
127
+
53
128
  const getMediaFeatures = ({ animations, colorScheme }) => {
54
129
  const prefers = []
55
130
  if (animations === false) prefers.push({ name: 'prefers-reduced-motion', value: 'reduce' })
@@ -225,37 +300,51 @@ module.exports = ({ defaultDevice = 'Macbook Pro 13', timeout: globalTimeout, ..
225
300
  )
226
301
  }
227
302
 
228
- const enableInterception =
229
- (onPageRequest || abortTypes.length > 0) &&
230
- run({
231
- fn: page.setRequestInterception(true),
232
- debug: 'enableInterception'
233
- })
303
+ const abortTypesSet = abortTypes.length > 0 ? new Set(abortTypes) : null
304
+
305
+ const requestHandlers = []
306
+ let abortTypesHandler
307
+ let disableInterceptionForAbortTypes = false
234
308
 
235
309
  if (onPageRequest) {
236
- Promise.resolve(enableInterception).then(() =>
237
- page.on('request', req => onPageRequest(req, page))
238
- )
310
+ const onPageRequestHandler = req => onPageRequest(req, page)
311
+ page.on('request', onPageRequestHandler)
312
+ requestHandlers.push(onPageRequestHandler)
239
313
  }
240
314
 
241
315
  if (abortTypes.length > 0) {
242
- Promise.resolve(enableInterception).then(() => {
243
- page.on('request', req => {
244
- if (req.isInterceptResolutionHandled()) return
245
- const resourceType = req.resourceType()
246
- const url = truncate(req.url())
247
-
248
- if (!abortTypes.includes(resourceType)) {
249
- debug.continue({ url, resourceType })
250
- return req.continue(
251
- req.continueRequestOverrides(),
252
- DEFAULT_INTERCEPT_RESOLUTION_PRIORITY
253
- )
316
+ abortTypesHandler = req => {
317
+ if (req.isInterceptResolutionHandled()) return
318
+ const resourceType = req.resourceType()
319
+ const url = truncate(req.url())
320
+
321
+ if (!abortTypesSet.has(resourceType)) {
322
+ debug.continue({ url, resourceType })
323
+ return req.continue(req.continueRequestOverrides(), DEFAULT_INTERCEPT_RESOLUTION_PRIORITY)
324
+ }
325
+ debug.abort({ url, resourceType })
326
+ return req.abort('blockedbyclient', DEFAULT_INTERCEPT_RESOLUTION_PRIORITY)
327
+ }
328
+
329
+ page.on('request', abortTypesHandler)
330
+ requestHandlers.push(abortTypesHandler)
331
+ }
332
+
333
+ if (requestHandlers.length > 0) {
334
+ prePromises.push(
335
+ run({
336
+ fn: page.setRequestInterception(true),
337
+ debug: 'enableInterception'
338
+ }).then(result => {
339
+ // If interception setup fails, remove handlers to avoid keeping dead listeners.
340
+ if (result.isRejected) {
341
+ requestHandlers.forEach(handler => page.off('request', handler))
342
+ } else if (abortTypesHandler && !withAdblock && !onPageRequest) {
343
+ disableInterceptionForAbortTypes = true
254
344
  }
255
- debug.abort({ url, resourceType })
256
- return req.abort('blockedbyclient', DEFAULT_INTERCEPT_RESOLUTION_PRIORITY)
345
+ return result
257
346
  })
258
- })
347
+ )
259
348
  }
260
349
 
261
350
  if (withAdblock) {
@@ -272,7 +361,11 @@ module.exports = ({ defaultDevice = 'Macbook Pro 13', timeout: globalTimeout, ..
272
361
  )
273
362
  }
274
363
 
275
- const device = getDevice({ headers, ...args, device: args.device ?? defaultDevice })
364
+ const device = getDevice({
365
+ headers,
366
+ device: args.device ?? defaultDevice,
367
+ viewport: args.viewport
368
+ })
276
369
 
277
370
  if (device.userAgent && !headers['user-agent']) {
278
371
  headers['user-agent'] = device.userAgent
@@ -291,10 +384,11 @@ module.exports = ({ defaultDevice = 'Macbook Pro 13', timeout: globalTimeout, ..
291
384
  const headersKeys = Object.keys(headers)
292
385
 
293
386
  if (headersKeys.length > 0) {
294
- const { cookie, ...headersWithoutCookie } = headers
387
+ const cookie = headers.cookie
388
+ const userAgent = headers['user-agent']
295
389
 
296
- if (headers.cookie) {
297
- const cookies = parseCookies(url, headers.cookie)
390
+ if (cookie) {
391
+ const cookies = parseCookies(url, cookie)
298
392
  prePromises.push(
299
393
  run({
300
394
  fn: page.setCookie(...cookies),
@@ -304,25 +398,41 @@ module.exports = ({ defaultDevice = 'Macbook Pro 13', timeout: globalTimeout, ..
304
398
  )
305
399
  }
306
400
 
307
- const extraHTTPHeaders = headers.cookie ? headersWithoutCookie : headers
308
- const extraHTTPHeadersKeys = Object.keys(extraHTTPHeaders)
309
-
310
- if (headers['user-agent']) {
401
+ if (userAgent) {
311
402
  prePromises.push(
312
403
  run({
313
- fn: page.setUserAgent(headers['user-agent']),
404
+ fn: page.setUserAgent(userAgent),
314
405
  timeout: actionTimeout,
315
- debug: { 'user-agent': headers['user-agent'] }
406
+ debug: { 'user-agent': userAgent }
316
407
  })
317
408
  )
318
409
  }
319
410
 
320
- if (extraHTTPHeadersKeys.length > 0) {
411
+ if (cookie) {
412
+ const extraHTTPHeaders = {}
413
+ const extraHTTPHeadersKeys = []
414
+
415
+ for (const key of headersKeys) {
416
+ if (key === 'cookie') continue
417
+ extraHTTPHeaders[key] = headers[key]
418
+ extraHTTPHeadersKeys.push(key)
419
+ }
420
+
421
+ if (extraHTTPHeadersKeys.length > 0) {
422
+ prePromises.push(
423
+ run({
424
+ fn: page.setExtraHTTPHeaders(extraHTTPHeaders),
425
+ timeout: actionTimeout,
426
+ debug: { headers: extraHTTPHeadersKeys }
427
+ })
428
+ )
429
+ }
430
+ } else if (!(userAgent && headersKeys.length === 1)) {
321
431
  prePromises.push(
322
432
  run({
323
- fn: page.setExtraHTTPHeaders(extraHTTPHeaders),
433
+ fn: page.setExtraHTTPHeaders(headers),
324
434
  timeout: actionTimeout,
325
- debug: { headers: extraHTTPHeadersKeys }
435
+ debug: { headers: headersKeys }
326
436
  })
327
437
  )
328
438
  }
@@ -350,72 +460,91 @@ module.exports = ({ defaultDevice = 'Macbook Pro 13', timeout: globalTimeout, ..
350
460
  )
351
461
  }
352
462
 
353
- await Promise.all(prePromises)
463
+ try {
464
+ await Promise.all(prePromises)
354
465
 
355
- const { value: response, reason: error } = await run({
356
- fn: html
466
+ let clearStopLoadingTimer = () => {}
467
+ const navigationPromise = html
357
468
  ? page.setContent(html, { waitUntil, ...args })
358
- : Promise.race([
359
- page.goto(url, { waitUntil, ...args }),
360
- setTimeout(gotoTimeout).then(() => page._client().send('Page.stopLoading'))
361
- ]),
362
- timeout: gotoTimeout,
363
- debug: { fn: html ? 'html' : 'url', waitUntil }
364
- })
365
-
366
- if (withAdblock) {
367
- await run({
368
- fn: adblock.runAutoConsent(page),
369
- timeout: actionTimeout,
370
- debug: 'autoconsent:run'
469
+ : (() => {
470
+ const { promise, clear } = stopLoadingOnTimeout(page, gotoTimeout)
471
+ clearStopLoadingTimer = clear
472
+ return Promise.race([page.goto(url, { waitUntil, ...args }), promise])
473
+ })()
474
+
475
+ const { value: response, reason: error } = await run({
476
+ fn: navigationPromise,
477
+ timeout: gotoTimeout,
478
+ debug: { fn: html ? 'html' : 'url', waitUntil }
371
479
  })
372
- }
480
+ clearStopLoadingTimer()
373
481
 
374
- for (const [key, value] of Object.entries({
375
- waitForSelector,
376
- waitForFunction
377
- })) {
378
- if (value) {
379
- await run({ fn: page[key](value), timeout: gotoTimeout, debug: { [key]: value } })
482
+ if (withAdblock) {
483
+ await run({
484
+ fn: adblock.runAutoConsent(page),
485
+ timeout: actionTimeout,
486
+ debug: 'autoconsent:run'
487
+ })
380
488
  }
381
- }
382
489
 
383
- if (waitForTimeout) {
384
- await setTimeout(waitForTimeout)
385
- }
386
-
387
- await inject(page, {
388
- timeout: actionTimeout,
389
- mediaType,
390
- animations,
391
- modules,
392
- scripts,
393
- styles
394
- })
490
+ if (waitForSelector) {
491
+ await run({
492
+ fn: page.waitForSelector(waitForSelector),
493
+ timeout: gotoTimeout,
494
+ debug: { waitForSelector }
495
+ })
496
+ }
395
497
 
396
- if (click) {
397
- for (const selector of castArray(click)) {
498
+ if (waitForFunction) {
398
499
  await run({
399
- fn: page.click(selector),
400
- timeout: actionTimeout,
401
- debug: { click: selector }
500
+ fn: page.waitForFunction(waitForFunction),
501
+ timeout: gotoTimeout,
502
+ debug: { waitForFunction }
402
503
  })
403
504
  }
404
- }
405
505
 
406
- if (scroll) {
407
- await run({
408
- fn: page.$eval(scroll, el => el.scrollIntoView()),
506
+ if (waitForTimeout) {
507
+ await setTimeout(waitForTimeout)
508
+ }
509
+
510
+ await inject(page, {
409
511
  timeout: actionTimeout,
410
- debug: { scroll }
512
+ mediaType,
513
+ animations,
514
+ modules,
515
+ scripts,
516
+ styles
411
517
  })
412
- }
413
518
 
414
- if (isWaitUntilAuto) {
415
- await waitUntilAuto(page, { response, timeout: actionTimeout * 2 })
416
- }
519
+ if (click) {
520
+ for (const selector of castArray(click)) {
521
+ await run({
522
+ fn: page.click(selector),
523
+ timeout: actionTimeout,
524
+ debug: { click: selector }
525
+ })
526
+ }
527
+ }
528
+
529
+ if (scroll) {
530
+ await run({
531
+ fn: page.$eval(scroll, el => el.scrollIntoView()),
532
+ timeout: actionTimeout,
533
+ debug: { scroll }
534
+ })
535
+ }
536
+
537
+ if (isWaitUntilAuto) {
538
+ await waitUntilAuto(page, { response, timeout: actionTimeout * 2 })
539
+ }
417
540
 
418
- return { response, device, error }
541
+ return { response, device, error }
542
+ } finally {
543
+ if (abortTypesHandler) page.off('request', abortTypesHandler)
544
+ if (disableInterceptionForAbortTypes) {
545
+ await pReflect(page.setRequestInterception(false))
546
+ }
547
+ }
419
548
  }
420
549
 
421
550
  goto.getDevice = getDevice