html-get 2.23.0 → 2.24.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/package.json +1 -1
  2. package/src/index.js +27 -2
package/package.json CHANGED
@@ -2,7 +2,7 @@
2
2
  "name": "html-get",
3
3
  "description": "Get the HTML from any website, fine-tuned for correction & speed",
4
4
  "homepage": "https://nicedoc.com/microlinkhq/html-get",
5
- "version": "2.23.0",
5
+ "version": "2.24.1",
6
6
  "types": "index.d.ts",
7
7
  "main": "src/index.js",
8
8
  "bin": {
package/src/index.js CHANGED
@@ -192,6 +192,11 @@ const prerender = PCancelable.fn(
192
192
 
193
193
  const modes = { fetch, prerender }
194
194
 
195
+ const hasShadowDOM = $ =>
196
+ $('*')
197
+ .toArray()
198
+ .some(el => el.tagName?.includes('-'))
199
+
195
200
  const isFetchMode = url => {
196
201
  const parsedUrl = parseUrl(url)
197
202
  return autoDomains.some(conditions =>
@@ -309,11 +314,31 @@ module.exports = PCancelable.fn(
309
314
 
310
315
  onCancel(() => promise.cancel())
311
316
 
312
- const { mode, html, $, ...payload } = await promise
317
+ let { mode, html, $, ...payload } = await promise
318
+
319
+ let shadowDOM = hasShadowDOM($)
320
+
321
+ if (mode === 'fetch' && getBrowserless && shadowDOM) {
322
+ debug('shadow DOM detected, retrying with prerender', { url: targetUrl })
323
+ const prerenderPromise = getContent(targetUrl, 'prerender', {
324
+ getBrowserless,
325
+ getTemporalFile,
326
+ gotOpts,
327
+ headers,
328
+ mutool,
329
+ puppeteerOpts,
330
+ rewriteUrls,
331
+ rewriteHtml,
332
+ toEncode
333
+ })
334
+ onCancel(() => prerenderPromise.cancel())
335
+ ;({ mode, html, $, ...payload } = await prerenderPromise)
336
+ shadowDOM = hasShadowDOM($)
337
+ }
313
338
 
314
339
  return Object.assign(payload, {
315
340
  ...serializeHtml($),
316
- stats: { mode, timing: duration() }
341
+ stats: { mode, timing: duration(), shadowDOM }
317
342
  })
318
343
  }
319
344
  )