html-get 2.11.3 → 2.11.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +6 -4
- package/src/auto-domains.json +30 -30
- package/src/index.js +32 -21
package/package.json
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
"name": "html-get",
|
|
3
3
|
"description": "Get the HTML from any website, using prerendering when is necessary.",
|
|
4
4
|
"homepage": "https://nicedoc.com/microlinkhq/html-get",
|
|
5
|
-
"version": "2.11.
|
|
5
|
+
"version": "2.11.5",
|
|
6
6
|
"main": "src/index.js",
|
|
7
7
|
"bin": {
|
|
8
8
|
"html-get": "bin/index.js"
|
|
@@ -50,6 +50,7 @@
|
|
|
50
50
|
"devDependencies": {
|
|
51
51
|
"@commitlint/cli": "latest",
|
|
52
52
|
"@commitlint/config-conventional": "latest",
|
|
53
|
+
"@ksmithut/prettier-standard": "latest",
|
|
53
54
|
"ava": "latest",
|
|
54
55
|
"browserless": "latest",
|
|
55
56
|
"c8": "latest",
|
|
@@ -59,7 +60,6 @@
|
|
|
59
60
|
"git-authors-cli": "latest",
|
|
60
61
|
"nano-staged": "latest",
|
|
61
62
|
"npm-check-updates": "latest",
|
|
62
|
-
"prettier-standard": "latest",
|
|
63
63
|
"pretty": "latest",
|
|
64
64
|
"puppeteer": "latest",
|
|
65
65
|
"regex-iso-date": "latest",
|
|
@@ -98,7 +98,8 @@
|
|
|
98
98
|
"test/**/*.js",
|
|
99
99
|
"!test/util.js"
|
|
100
100
|
],
|
|
101
|
-
"timeout": "2m"
|
|
101
|
+
"timeout": "2m",
|
|
102
|
+
"workerThreads": false
|
|
102
103
|
},
|
|
103
104
|
"commitlint": {
|
|
104
105
|
"extends": [
|
|
@@ -107,7 +108,8 @@
|
|
|
107
108
|
},
|
|
108
109
|
"nano-staged": {
|
|
109
110
|
"*.js": [
|
|
110
|
-
"prettier-standard"
|
|
111
|
+
"prettier-standard",
|
|
112
|
+
"standard --fix"
|
|
111
113
|
],
|
|
112
114
|
"*.md": [
|
|
113
115
|
"standard-markdown"
|
package/src/auto-domains.json
CHANGED
|
@@ -14,25 +14,25 @@
|
|
|
14
14
|
[
|
|
15
15
|
[
|
|
16
16
|
"domainWithoutSuffix",
|
|
17
|
-
"
|
|
17
|
+
"apple"
|
|
18
18
|
]
|
|
19
19
|
],
|
|
20
20
|
[
|
|
21
21
|
[
|
|
22
22
|
"domainWithoutSuffix",
|
|
23
|
-
"
|
|
23
|
+
"microsoft"
|
|
24
24
|
]
|
|
25
25
|
],
|
|
26
26
|
[
|
|
27
27
|
[
|
|
28
28
|
"domainWithoutSuffix",
|
|
29
|
-
"
|
|
29
|
+
"wordpress"
|
|
30
30
|
]
|
|
31
31
|
],
|
|
32
32
|
[
|
|
33
33
|
[
|
|
34
34
|
"domainWithoutSuffix",
|
|
35
|
-
"
|
|
35
|
+
"wikipedia"
|
|
36
36
|
]
|
|
37
37
|
],
|
|
38
38
|
[
|
|
@@ -53,12 +53,6 @@
|
|
|
53
53
|
"github"
|
|
54
54
|
]
|
|
55
55
|
],
|
|
56
|
-
[
|
|
57
|
-
[
|
|
58
|
-
"domainWithoutSuffix",
|
|
59
|
-
"imdb"
|
|
60
|
-
]
|
|
61
|
-
],
|
|
62
56
|
[
|
|
63
57
|
[
|
|
64
58
|
"domainWithoutSuffix",
|
|
@@ -71,12 +65,6 @@
|
|
|
71
65
|
"slideshare"
|
|
72
66
|
]
|
|
73
67
|
],
|
|
74
|
-
[
|
|
75
|
-
[
|
|
76
|
-
"domainWithoutSuffix",
|
|
77
|
-
"theguardian"
|
|
78
|
-
]
|
|
79
|
-
],
|
|
80
68
|
[
|
|
81
69
|
[
|
|
82
70
|
"domainWithoutSuffix",
|
|
@@ -86,19 +74,19 @@
|
|
|
86
74
|
[
|
|
87
75
|
[
|
|
88
76
|
"domainWithoutSuffix",
|
|
89
|
-
"
|
|
77
|
+
"theguardian"
|
|
90
78
|
]
|
|
91
79
|
],
|
|
92
80
|
[
|
|
93
81
|
[
|
|
94
82
|
"domainWithoutSuffix",
|
|
95
|
-
"
|
|
83
|
+
"imdb"
|
|
96
84
|
]
|
|
97
85
|
],
|
|
98
86
|
[
|
|
99
87
|
[
|
|
100
88
|
"domainWithoutSuffix",
|
|
101
|
-
"
|
|
89
|
+
"pinterest"
|
|
102
90
|
]
|
|
103
91
|
],
|
|
104
92
|
[
|
|
@@ -110,25 +98,25 @@
|
|
|
110
98
|
[
|
|
111
99
|
[
|
|
112
100
|
"domainWithoutSuffix",
|
|
113
|
-
"
|
|
101
|
+
"spotify"
|
|
114
102
|
]
|
|
115
103
|
],
|
|
116
104
|
[
|
|
117
105
|
[
|
|
118
106
|
"domainWithoutSuffix",
|
|
119
|
-
"
|
|
107
|
+
"soundcloud"
|
|
120
108
|
]
|
|
121
109
|
],
|
|
122
110
|
[
|
|
123
111
|
[
|
|
124
112
|
"domainWithoutSuffix",
|
|
125
|
-
"
|
|
113
|
+
"huffingtonpost"
|
|
126
114
|
]
|
|
127
115
|
],
|
|
128
116
|
[
|
|
129
117
|
[
|
|
130
118
|
"domainWithoutSuffix",
|
|
131
|
-
"
|
|
119
|
+
"engadget"
|
|
132
120
|
]
|
|
133
121
|
],
|
|
134
122
|
[
|
|
@@ -140,7 +128,7 @@
|
|
|
140
128
|
[
|
|
141
129
|
[
|
|
142
130
|
"domainWithoutSuffix",
|
|
143
|
-
"
|
|
131
|
+
"yelp"
|
|
144
132
|
]
|
|
145
133
|
],
|
|
146
134
|
[
|
|
@@ -152,25 +140,25 @@
|
|
|
152
140
|
[
|
|
153
141
|
[
|
|
154
142
|
"domainWithoutSuffix",
|
|
155
|
-
"
|
|
143
|
+
"zoom"
|
|
156
144
|
]
|
|
157
145
|
],
|
|
158
146
|
[
|
|
159
147
|
[
|
|
160
148
|
"domainWithoutSuffix",
|
|
161
|
-
"
|
|
149
|
+
"techcrunch"
|
|
162
150
|
]
|
|
163
151
|
],
|
|
164
152
|
[
|
|
165
153
|
[
|
|
166
154
|
"domainWithoutSuffix",
|
|
167
|
-
"
|
|
155
|
+
"theverge"
|
|
168
156
|
]
|
|
169
157
|
],
|
|
170
158
|
[
|
|
171
159
|
[
|
|
172
160
|
"domainWithoutSuffix",
|
|
173
|
-
"
|
|
161
|
+
"giphy"
|
|
174
162
|
]
|
|
175
163
|
],
|
|
176
164
|
[
|
|
@@ -191,6 +179,12 @@
|
|
|
191
179
|
"deviantart"
|
|
192
180
|
]
|
|
193
181
|
],
|
|
182
|
+
[
|
|
183
|
+
[
|
|
184
|
+
"domainWithoutSuffix",
|
|
185
|
+
"digg"
|
|
186
|
+
]
|
|
187
|
+
],
|
|
194
188
|
[
|
|
195
189
|
[
|
|
196
190
|
"domainWithoutSuffix",
|
|
@@ -200,13 +194,13 @@
|
|
|
200
194
|
[
|
|
201
195
|
[
|
|
202
196
|
"domainWithoutSuffix",
|
|
203
|
-
"
|
|
197
|
+
"flickr"
|
|
204
198
|
]
|
|
205
199
|
],
|
|
206
200
|
[
|
|
207
201
|
[
|
|
208
202
|
"domainWithoutSuffix",
|
|
209
|
-
"
|
|
203
|
+
"ghost"
|
|
210
204
|
]
|
|
211
205
|
],
|
|
212
206
|
[
|
|
@@ -239,6 +233,12 @@
|
|
|
239
233
|
"sourceforge"
|
|
240
234
|
]
|
|
241
235
|
],
|
|
236
|
+
[
|
|
237
|
+
[
|
|
238
|
+
"domainWithoutSuffix",
|
|
239
|
+
"stackoverflow"
|
|
240
|
+
]
|
|
241
|
+
],
|
|
242
242
|
[
|
|
243
243
|
[
|
|
244
244
|
"domainWithoutSuffix",
|
package/src/index.js
CHANGED
|
@@ -12,6 +12,7 @@ const autoDomains = require('./auto-domains')
|
|
|
12
12
|
const addHtml = require('./html')
|
|
13
13
|
|
|
14
14
|
const REQ_TIMEOUT = 8000
|
|
15
|
+
const ABORT_TYPES = ['image', 'stylesheet', 'font']
|
|
15
16
|
|
|
16
17
|
const fetch = PCancelable.fn(
|
|
17
18
|
async (
|
|
@@ -68,7 +69,7 @@ const prerender = PCancelable.fn(
|
|
|
68
69
|
headers,
|
|
69
70
|
gotOpts,
|
|
70
71
|
timeout = REQ_TIMEOUT,
|
|
71
|
-
abortTypes =
|
|
72
|
+
abortTypes = ABORT_TYPES,
|
|
72
73
|
...opts
|
|
73
74
|
},
|
|
74
75
|
onCancel
|
|
@@ -151,24 +152,32 @@ const determinateMode = (url, { prerender }) => {
|
|
|
151
152
|
return isFetchMode(url) ? 'fetch' : 'prerender'
|
|
152
153
|
}
|
|
153
154
|
|
|
154
|
-
const getContent =
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
155
|
+
const getContent = PCancelable.fn(
|
|
156
|
+
(
|
|
157
|
+
url,
|
|
158
|
+
mode,
|
|
159
|
+
{ getBrowserless, gotOpts, headers, puppeteerOpts, rewriteUrls, toEncode },
|
|
160
|
+
onCancel
|
|
161
|
+
) => {
|
|
162
|
+
const isFetchMode = mode === 'fetch'
|
|
163
|
+
const fetchOpts = isFetchMode
|
|
164
|
+
? { headers, toEncode, ...gotOpts }
|
|
165
|
+
: { headers, toEncode, getBrowserless, gotOpts, ...puppeteerOpts }
|
|
166
|
+
|
|
167
|
+
const promise = modes[mode](url, fetchOpts)
|
|
168
|
+
onCancel(() => promise.cancel())
|
|
169
|
+
|
|
170
|
+
return promise.then(content => {
|
|
171
|
+
const html = addHtml({
|
|
172
|
+
...content,
|
|
173
|
+
...(isFetchMode ? puppeteerOpts : undefined),
|
|
174
|
+
rewriteUrls
|
|
175
|
+
})
|
|
176
|
+
|
|
177
|
+
return { ...content, html }
|
|
178
|
+
})
|
|
179
|
+
}
|
|
180
|
+
)
|
|
172
181
|
|
|
173
182
|
module.exports = PCancelable.fn(
|
|
174
183
|
async (
|
|
@@ -205,13 +214,15 @@ module.exports = PCancelable.fn(
|
|
|
205
214
|
toEncode
|
|
206
215
|
})
|
|
207
216
|
|
|
208
|
-
onCancel(() => promise.
|
|
217
|
+
onCancel(() => promise.cancel())
|
|
209
218
|
|
|
210
219
|
const { mode, ...payload } = await promise
|
|
211
220
|
|
|
212
|
-
return
|
|
221
|
+
return Object.assign(payload, { stats: { mode, timing: time.rounded() } })
|
|
213
222
|
}
|
|
214
223
|
)
|
|
215
224
|
|
|
216
225
|
module.exports.REQ_TIMEOUT = REQ_TIMEOUT
|
|
226
|
+
module.exports.ABORT_TYPES = ABORT_TYPES
|
|
217
227
|
module.exports.isFetchMode = isFetchMode
|
|
228
|
+
module.exports.getContent = getContent
|