html-get 2.15.1 → 2.16.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -2,7 +2,7 @@
2
2
  "name": "html-get",
3
3
  "description": "Get the HTML from any website, fine-tuned for correction & speed",
4
4
  "homepage": "https://nicedoc.com/microlinkhq/html-get",
5
- "version": "2.15.1",
5
+ "version": "2.16.1",
6
6
  "main": "src/index.js",
7
7
  "bin": {
8
8
  "html-get": "bin/index.js"
@@ -35,7 +35,7 @@
35
35
  ],
36
36
  "dependencies": {
37
37
  "@kikobeats/time-span": "~1.0.3",
38
- "@metascraper/helpers": "~5.43.4",
38
+ "@metascraper/helpers": "~5.45.0",
39
39
  "cheerio": "~1.0.0-rc.12",
40
40
  "css-url-regex": "~4.0.0",
41
41
  "debug-logfmt": "~1.2.2",
@@ -44,11 +44,11 @@
44
44
  "html-encode": "~2.1.6",
45
45
  "html-urls": "~2.4.55",
46
46
  "is-html-content": "~1.0.0",
47
+ "localhost-url-regex": "~1.0.11",
47
48
  "lodash": "~4.17.21",
48
49
  "mri": "~1.2.0",
49
50
  "p-cancelable": "~2.1.0",
50
51
  "p-retry": "~4.6.0",
51
- "replace-string": "~3.1.0",
52
52
  "tinyspawn": "~1.2.6",
53
53
  "top-sites": "~1.1.205"
54
54
  },
@@ -67,7 +67,7 @@
67
67
  "nano-staged": "latest",
68
68
  "npm-check-updates": "latest",
69
69
  "pretty": "latest",
70
- "puppeteer": "latest",
70
+ "puppeteer": "21",
71
71
  "regex-iso-date": "latest",
72
72
  "simple-git-hooks": "latest",
73
73
  "standard": "latest",
@@ -1 +1 @@
1
- [[["domainWithoutSuffix","google"]],[["domainWithoutSuffix","youtube"]],[["domainWithoutSuffix","microsoft"]],[["domainWithoutSuffix","apple"]],[["domainWithoutSuffix","wikipedia"]],[["domainWithoutSuffix","wordpress"]],[["domainWithoutSuffix","blogspot"]],[["domainWithoutSuffix","github"]],[["domainWithoutSuffix","vimeo"]],[["domainWithoutSuffix","theguardian"]],[["domainWithoutSuffix","imdb"]],[["domainWithoutSuffix","bbc"]],[["domainWithoutSuffix","slideshare"]],[["domainWithoutSuffix","nytimes"]],[["domainWithoutSuffix","spotify"]],[["domainWithoutSuffix","twitter"]],[["domainWithoutSuffix","soundcloud"]],[["domainWithoutSuffix","telegraph"]],[["domainWithoutSuffix","pinterest"]],[["domainWithoutSuffix","huffingtonpost"]],[["domainWithoutSuffix","yelp"]],[["domainWithoutSuffix","techcrunch"]],[["domainWithoutSuffix","zoom"]],[["domainWithoutSuffix","stackoverflow"]],[["domain","abc.net.au"]],[["domainWithoutSuffix","eventbrite"]],[["domainWithoutSuffix","engadget"]],[["domainWithoutSuffix","theverge"]],[["domainWithoutSuffix","substack"]],[["domainWithoutSuffix","giphy"]],[["domainWithoutSuffix","imgur"]],[["domainWithoutSuffix","csdn"]],[["domainWithoutSuffix","deviantart"]],[["domainWithoutSuffix","digg"]],[["domainWithoutSuffix","dribbble"]],[["domainWithoutSuffix","etsy"]],[["domainWithoutSuffix","flickr"]],[["domainWithoutSuffix","ghost"]],[["domainWithoutSuffix","gitlab"]],[["domainWithoutSuffix","meetup"]],[["domainWithoutSuffix","producthunt"]],[["domainWithoutSuffix","sourceforge"]],[["domainWithoutSuffix","tumblr"]],[["domainWithoutSuffix","ycombinator"]]]
1
+ [[["domainWithoutSuffix","google"]],[["domainWithoutSuffix","youtube"]],[["domainWithoutSuffix","microsoft"]],[["domainWithoutSuffix","apple"]],[["domainWithoutSuffix","wikipedia"]],[["domainWithoutSuffix","wordpress"]],[["domainWithoutSuffix","blogspot"]],[["domainWithoutSuffix","vimeo"]],[["domainWithoutSuffix","github"]],[["domainWithoutSuffix","bbc"]],[["domainWithoutSuffix","nytimes"]],[["domainWithoutSuffix","theguardian"]],[["domainWithoutSuffix","slideshare"]],[["domainWithoutSuffix","imdb"]],[["domainWithoutSuffix","telegraph"]],[["domainWithoutSuffix","pinterest"]],[["domainWithoutSuffix","spotify"]],[["domainWithoutSuffix","twitter"]],[["domainWithoutSuffix","soundcloud"]],[["domainWithoutSuffix","huffingtonpost"]],[["domainWithoutSuffix","techcrunch"]],[["domainWithoutSuffix","zoom"]],[["domainWithoutSuffix","eventbrite"]],[["domainWithoutSuffix","engadget"]],[["domainWithoutSuffix","stackoverflow"]],[["domain","abc.net.au"]],[["domainWithoutSuffix","yelp"]],[["domainWithoutSuffix","theverge"]],[["domainWithoutSuffix","digg"]],[["domainWithoutSuffix","csdn"]],[["domainWithoutSuffix","deviantart"]],[["domainWithoutSuffix","dribbble"]],[["domainWithoutSuffix","etsy"]],[["domainWithoutSuffix","flickr"]],[["domainWithoutSuffix","ghost"]],[["domainWithoutSuffix","giphy"]],[["domainWithoutSuffix","gitlab"]],[["domainWithoutSuffix","imgur"]],[["domainWithoutSuffix","meetup"]],[["domainWithoutSuffix","producthunt"]],[["domainWithoutSuffix","sourceforge"]],[["domainWithoutSuffix","substack"]],[["domainWithoutSuffix","tumblr"]],[["domainWithoutSuffix","ycombinator"]]]
package/src/html.js CHANGED
@@ -1,8 +1,8 @@
1
1
  'use strict'
2
2
 
3
3
  const { get, split, nth, castArray, forEach } = require('lodash')
4
+ const localhostUrl = require('localhost-url-regex')
4
5
  const { TAGS: URL_TAGS } = require('html-urls')
5
- const replaceString = require('replace-string')
6
6
  const isHTML = require('is-html-content')
7
7
  const cssUrl = require('css-url-regex')
8
8
  const execall = require('execall')
@@ -95,7 +95,9 @@ const rewriteHtmlUrls = ({ $, url }) => {
95
95
  const el = $(this)
96
96
  const attr = el.attr(urlAttr)
97
97
 
98
- if (typeof attr === 'string' && !attr.startsWith('http')) {
98
+ if (localhostUrl().test(attr)) {
99
+ el.remove()
100
+ } else if (typeof attr === 'string' && !attr.startsWith('http')) {
99
101
  try {
100
102
  const newAttr = new URL(attr, url).toString()
101
103
  el.attr(urlAttr, newAttr)
@@ -117,7 +119,7 @@ const rewriteCssUrls = ({ html, url }) => {
117
119
  if (cssUrl.startsWith('/')) {
118
120
  try {
119
121
  const absoluteUrl = new URL(cssUrl, url).toString()
120
- html = replaceString(html, `url(${cssUrl})`, `url(${absoluteUrl})`)
122
+ html = html.replaceAll(`url(${cssUrl})`, `url(${absoluteUrl})`)
121
123
  } catch (_) {}
122
124
  }
123
125
  })