html-get 2.9.4 → 2.9.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +8 -8
- package/src/auto-domains.json +14 -14
- package/src/html.js +3 -3
- package/src/index.js +8 -8
package/package.json
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
"name": "html-get",
|
|
3
3
|
"description": "Get the HTML from any website, using prerendering when is necessary.",
|
|
4
4
|
"homepage": "https://nicedoc.com/microlinkhq/html-get",
|
|
5
|
-
"version": "2.9.
|
|
5
|
+
"version": "2.9.8",
|
|
6
6
|
"main": "src/index.js",
|
|
7
7
|
"bin": {
|
|
8
8
|
"html-get": "bin/index.js"
|
|
@@ -29,7 +29,7 @@
|
|
|
29
29
|
"request"
|
|
30
30
|
],
|
|
31
31
|
"dependencies": {
|
|
32
|
-
"@metascraper/helpers": "~5.
|
|
32
|
+
"@metascraper/helpers": "~5.25.0",
|
|
33
33
|
"cheerio": "~1.0.0-rc.10",
|
|
34
34
|
"css-url-regex": "~4.0.0",
|
|
35
35
|
"debug-logfmt": "~1.0.4",
|
|
@@ -57,7 +57,7 @@
|
|
|
57
57
|
"conventional-github-releaser": "latest",
|
|
58
58
|
"finepack": "latest",
|
|
59
59
|
"git-authors-cli": "latest",
|
|
60
|
-
"
|
|
60
|
+
"nano-staged": "latest",
|
|
61
61
|
"npm-check-updates": "latest",
|
|
62
62
|
"nyc": "latest",
|
|
63
63
|
"prettier-standard": "latest",
|
|
@@ -105,19 +105,19 @@
|
|
|
105
105
|
"@commitlint/config-conventional"
|
|
106
106
|
]
|
|
107
107
|
},
|
|
108
|
-
"
|
|
109
|
-
"package.json": [
|
|
110
|
-
"finepack --sort-ignore-object-at ava"
|
|
111
|
-
],
|
|
108
|
+
"nano-staged": {
|
|
112
109
|
"*.js": [
|
|
113
110
|
"prettier-standard"
|
|
114
111
|
],
|
|
115
112
|
"*.md": [
|
|
116
113
|
"standard-markdown"
|
|
114
|
+
],
|
|
115
|
+
"package.json": [
|
|
116
|
+
"finepack --sort-ignore-object-at ava"
|
|
117
117
|
]
|
|
118
118
|
},
|
|
119
119
|
"simple-git-hooks": {
|
|
120
120
|
"commit-msg": "npx commitlint --edit",
|
|
121
|
-
"pre-commit": "npx
|
|
121
|
+
"pre-commit": "npx nano-staged"
|
|
122
122
|
}
|
|
123
123
|
}
|
package/src/auto-domains.json
CHANGED
|
@@ -1,37 +1,37 @@
|
|
|
1
1
|
[
|
|
2
2
|
"google",
|
|
3
|
-
"apple",
|
|
4
3
|
"youtube",
|
|
4
|
+
"apple",
|
|
5
5
|
"microsoft",
|
|
6
|
-
"wordpress",
|
|
7
6
|
"wikipedia",
|
|
7
|
+
"wordpress",
|
|
8
|
+
"blogspot",
|
|
8
9
|
"vimeo",
|
|
9
10
|
"github",
|
|
10
|
-
"
|
|
11
|
-
"bbc",
|
|
12
|
-
"theguardian",
|
|
11
|
+
"nytimes",
|
|
13
12
|
"imdb",
|
|
13
|
+
"theguardian",
|
|
14
|
+
"bbc",
|
|
14
15
|
"slideshare",
|
|
15
|
-
"nytimes",
|
|
16
|
-
"soundcloud",
|
|
17
16
|
"huffingtonpost",
|
|
18
17
|
"telegraph",
|
|
19
18
|
"pinterest",
|
|
19
|
+
"soundcloud",
|
|
20
|
+
"eventbrite",
|
|
21
|
+
"engadget",
|
|
22
|
+
"spotify",
|
|
20
23
|
"yelp",
|
|
21
|
-
"stackoverflow",
|
|
22
24
|
"zoom",
|
|
23
25
|
"techcrunch",
|
|
24
|
-
"engadget",
|
|
25
|
-
"eventbrite",
|
|
26
|
-
"spotify",
|
|
27
26
|
"theverge",
|
|
28
|
-
"
|
|
27
|
+
"etsy",
|
|
28
|
+
"imgur",
|
|
29
|
+
"stackoverflow",
|
|
29
30
|
"csdn",
|
|
30
31
|
"digg",
|
|
31
|
-
"etsy",
|
|
32
32
|
"flickr",
|
|
33
33
|
"ghost",
|
|
34
|
-
"
|
|
34
|
+
"giphy",
|
|
35
35
|
"meetup",
|
|
36
36
|
"producthunt",
|
|
37
37
|
"reddit",
|
package/src/html.js
CHANGED
|
@@ -97,7 +97,7 @@ const rewriteCssUrls = ({ html, url }) => {
|
|
|
97
97
|
if (cssUrl.startsWith('/')) {
|
|
98
98
|
try {
|
|
99
99
|
const absoluteUrl = new URL(cssUrl, url).toString()
|
|
100
|
-
html = replaceString(html, cssUrl
|
|
100
|
+
html = replaceString(html, `url(${cssUrl})`, `url(${absoluteUrl})`)
|
|
101
101
|
} catch (_) {}
|
|
102
102
|
}
|
|
103
103
|
})
|
|
@@ -141,7 +141,7 @@ module.exports = ({
|
|
|
141
141
|
|
|
142
142
|
const $ = cheerio.load(content)
|
|
143
143
|
|
|
144
|
-
if (rewriteUrls) rewriteHtmlUrls({ $, url
|
|
144
|
+
if (rewriteUrls) rewriteHtmlUrls({ $, url })
|
|
145
145
|
|
|
146
146
|
addHead({ $, url, headers })
|
|
147
147
|
|
|
@@ -164,7 +164,7 @@ module.exports = ({
|
|
|
164
164
|
if (scripts) injectScripts({ $, scripts, type: 'text/javascript' })
|
|
165
165
|
if (modules) injectScripts({ $, modules, type: 'module' })
|
|
166
166
|
|
|
167
|
-
return rewriteCssUrls({ html: $.html(), url })
|
|
167
|
+
return rewriteUrls ? rewriteCssUrls({ html: $.html(), url }) : $.html()
|
|
168
168
|
}
|
|
169
169
|
|
|
170
170
|
module.exports.isHTML = isHTML
|
package/src/index.js
CHANGED
|
@@ -20,9 +20,9 @@ const fetch = (
|
|
|
20
20
|
) =>
|
|
21
21
|
new PCancelable(async (resolve, reject, onCancel) => {
|
|
22
22
|
const req = got(url, {
|
|
23
|
-
responseType: 'buffer',
|
|
24
23
|
timeout: reflect ? timeout / 2 : timeout,
|
|
25
|
-
...opts
|
|
24
|
+
...opts,
|
|
25
|
+
responseType: 'buffer'
|
|
26
26
|
})
|
|
27
27
|
|
|
28
28
|
onCancel.shouldReject = false
|
|
@@ -46,12 +46,12 @@ const fetch = (
|
|
|
46
46
|
return reflect
|
|
47
47
|
? resolve({ isRejected: true, error })
|
|
48
48
|
: resolve({
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
49
|
+
url,
|
|
50
|
+
html: '',
|
|
51
|
+
mode: 'fetch',
|
|
52
|
+
headers: error.response ? error.response.headers : {},
|
|
53
|
+
statusCode: error.response ? error.response.statusCode : undefined
|
|
54
|
+
})
|
|
55
55
|
}
|
|
56
56
|
})
|
|
57
57
|
|