html-get 2.18.4 → 2.19.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +10 -7
- package/src/auto-domains.json +1 -1
- package/src/html.js +11 -10
package/package.json
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
"name": "html-get",
|
|
3
3
|
"description": "Get the HTML from any website, fine-tuned for correction & speed",
|
|
4
4
|
"homepage": "https://nicedoc.com/microlinkhq/html-get",
|
|
5
|
-
"version": "2.
|
|
5
|
+
"version": "2.19.0",
|
|
6
6
|
"main": "src/index.js",
|
|
7
7
|
"bin": {
|
|
8
8
|
"html-get": "bin/index.js"
|
|
@@ -36,22 +36,22 @@
|
|
|
36
36
|
],
|
|
37
37
|
"dependencies": {
|
|
38
38
|
"@kikobeats/time-span": "~1.0.5",
|
|
39
|
-
"@metascraper/helpers": "~5.
|
|
39
|
+
"@metascraper/helpers": "~5.46.1",
|
|
40
40
|
"cheerio": "~1.0.0",
|
|
41
41
|
"css-url-regex": "~4.0.0",
|
|
42
|
-
"debug-logfmt": "~1.2.
|
|
42
|
+
"debug-logfmt": "~1.2.3",
|
|
43
43
|
"execall": "~2.0.0",
|
|
44
44
|
"got": "~11.8.6",
|
|
45
45
|
"html-encode": "~2.1.7",
|
|
46
|
-
"html-urls": "~2.4.
|
|
46
|
+
"html-urls": "~2.4.62",
|
|
47
47
|
"is-html-content": "~1.0.0",
|
|
48
|
-
"
|
|
48
|
+
"is-local-address": "~2.2.0",
|
|
49
49
|
"lodash": "~4.17.21",
|
|
50
50
|
"mri": "~1.2.0",
|
|
51
51
|
"p-cancelable": "~2.1.0",
|
|
52
52
|
"p-retry": "~4.6.0",
|
|
53
|
-
"tinyspawn": "~1.3.
|
|
54
|
-
"top-sites": "~1.1.
|
|
53
|
+
"tinyspawn": "~1.3.3",
|
|
54
|
+
"top-sites": "~1.1.220"
|
|
55
55
|
},
|
|
56
56
|
"devDependencies": {
|
|
57
57
|
"@commitlint/cli": "latest",
|
|
@@ -125,6 +125,9 @@
|
|
|
125
125
|
"finepack"
|
|
126
126
|
]
|
|
127
127
|
},
|
|
128
|
+
"pnpm": {
|
|
129
|
+
"neverBuiltDependencies": []
|
|
130
|
+
},
|
|
128
131
|
"simple-git-hooks": {
|
|
129
132
|
"commit-msg": "npx commitlint --edit",
|
|
130
133
|
"pre-commit": "npx nano-staged"
|
package/src/auto-domains.json
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
[[["domainWithoutSuffix","youtube"]],[["domainWithoutSuffix","google"]],[["domainWithoutSuffix","apple"]],[["domainWithoutSuffix","wordpress"]],[["domainWithoutSuffix","microsoft"]],[["domainWithoutSuffix","wikipedia"]],[["domainWithoutSuffix","vimeo"]],[["domainWithoutSuffix","
|
|
1
|
+
[[["domainWithoutSuffix","youtube"]],[["domainWithoutSuffix","google"]],[["domainWithoutSuffix","apple"]],[["domainWithoutSuffix","wordpress"]],[["domainWithoutSuffix","microsoft"]],[["domainWithoutSuffix","wikipedia"]],[["domainWithoutSuffix","vimeo"]],[["domainWithoutSuffix","github"]],[["domainWithoutSuffix","blogspot"]],[["domainWithoutSuffix","twitter"]],[["domainWithoutSuffix","bbc"]],[["domainWithoutSuffix","nytimes"]],[["domainWithoutSuffix","imdb"]],[["domainWithoutSuffix","theguardian"]],[["domain","x.com"]],[["domainWithoutSuffix","slideshare"]],[["domainWithoutSuffix","huffingtonpost"]],[["domainWithoutSuffix","instagram"]],[["domainWithoutSuffix","pinterest"]],[["domainWithoutSuffix","telegraph"]],[["domainWithoutSuffix","spotify"]],[["domainWithoutSuffix","eventbrite"]],[["domainWithoutSuffix","yelp"]],[["domainWithoutSuffix","arxiv"]],[["domain","abc.net.au"]],[["domainWithoutSuffix","zoom"]],[["domainWithoutSuffix","techcrunch"]],[["domainWithoutSuffix","soundcloud"]],[["domainWithoutSuffix","engadget"]],[["domainWithoutSuffix","theverge"]],[["domainWithoutSuffix","dribbble"]],[["domainWithoutSuffix","digg"]],[["domainWithoutSuffix","csdn"]],[["domainWithoutSuffix","deviantart"]],[["domainWithoutSuffix","etsy"]],[["domainWithoutSuffix","flickr"]],[["domainWithoutSuffix","ghost"]],[["domainWithoutSuffix","giphy"]],[["domainWithoutSuffix","gitlab"]],[["domainWithoutSuffix","imgur"]],[["domainWithoutSuffix","meetup"]],[["domainWithoutSuffix","producthunt"]],[["domainWithoutSuffix","reddit"]],[["domainWithoutSuffix","sourceforge"]],[["domainWithoutSuffix","stackoverflow"]],[["domainWithoutSuffix","substack"]],[["domainWithoutSuffix","tumblr"]],[["domainWithoutSuffix","ycombinator"]]]
|
package/src/html.js
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
const { get, split, nth, castArray, forEach } = require('lodash')
|
|
4
4
|
const debug = require('debug-logfmt')('html-get:rewrite')
|
|
5
|
-
const
|
|
5
|
+
const isLocalAddress = require('is-local-address')
|
|
6
6
|
const { TAGS: URL_TAGS } = require('html-urls')
|
|
7
7
|
const isHTML = require('is-html-content')
|
|
8
8
|
const cssUrl = require('css-url-regex')
|
|
@@ -118,15 +118,16 @@ const rewriteHtmlUrls = ({ $, url }) => {
|
|
|
118
118
|
$(tagName.join(',')).each(function () {
|
|
119
119
|
const el = $(this)
|
|
120
120
|
const attr = el.attr(urlAttr)
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
121
|
+
if (typeof attr !== 'string') return
|
|
122
|
+
try {
|
|
123
|
+
const urlObj = new URL(attr, url)
|
|
124
|
+
if (!urlObj.protocol.startsWith('http')) return
|
|
125
|
+
if (isLocalAddress(urlObj.hostname)) {
|
|
126
|
+
el.remove()
|
|
127
|
+
} else {
|
|
128
|
+
el.attr(urlAttr, urlObj.toString())
|
|
129
|
+
}
|
|
130
|
+
} catch (_) {}
|
|
130
131
|
})
|
|
131
132
|
})
|
|
132
133
|
}
|