html-get 2.21.17 → 2.21.19
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +2 -2
- package/src/auto-domains.json +37 -37
- package/src/util.js +2 -1
package/package.json
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
"name": "html-get",
|
|
3
3
|
"description": "Get the HTML from any website, fine-tuned for correction & speed",
|
|
4
4
|
"homepage": "https://nicedoc.com/microlinkhq/html-get",
|
|
5
|
-
"version": "2.21.
|
|
5
|
+
"version": "2.21.19",
|
|
6
6
|
"main": "src/index.js",
|
|
7
7
|
"bin": {
|
|
8
8
|
"html-get": "bin/index.js"
|
|
@@ -42,7 +42,7 @@
|
|
|
42
42
|
"dependencies": {
|
|
43
43
|
"@kikobeats/time-span": "~1.0.5",
|
|
44
44
|
"@metascraper/helpers": "~5.49.1",
|
|
45
|
-
"cheerio": "~1.
|
|
45
|
+
"cheerio": "~1.2.0",
|
|
46
46
|
"content-type": "~1.0.5",
|
|
47
47
|
"css-url-regex": "~4.0.0",
|
|
48
48
|
"debug-logfmt": "~1.4.0",
|
package/src/auto-domains.json
CHANGED
|
@@ -2,19 +2,19 @@
|
|
|
2
2
|
[
|
|
3
3
|
[
|
|
4
4
|
"domainWithoutSuffix",
|
|
5
|
-
"
|
|
5
|
+
"google"
|
|
6
6
|
]
|
|
7
7
|
],
|
|
8
8
|
[
|
|
9
9
|
[
|
|
10
10
|
"domainWithoutSuffix",
|
|
11
|
-
"
|
|
11
|
+
"youtube"
|
|
12
12
|
]
|
|
13
13
|
],
|
|
14
14
|
[
|
|
15
15
|
[
|
|
16
16
|
"domainWithoutSuffix",
|
|
17
|
-
"
|
|
17
|
+
"microsoft"
|
|
18
18
|
]
|
|
19
19
|
],
|
|
20
20
|
[
|
|
@@ -26,19 +26,19 @@
|
|
|
26
26
|
[
|
|
27
27
|
[
|
|
28
28
|
"domainWithoutSuffix",
|
|
29
|
-
"
|
|
29
|
+
"wordpress"
|
|
30
30
|
]
|
|
31
31
|
],
|
|
32
32
|
[
|
|
33
33
|
[
|
|
34
34
|
"domainWithoutSuffix",
|
|
35
|
-
"
|
|
35
|
+
"wikipedia"
|
|
36
36
|
]
|
|
37
37
|
],
|
|
38
38
|
[
|
|
39
39
|
[
|
|
40
40
|
"domainWithoutSuffix",
|
|
41
|
-
"
|
|
41
|
+
"blogspot"
|
|
42
42
|
]
|
|
43
43
|
],
|
|
44
44
|
[
|
|
@@ -56,13 +56,13 @@
|
|
|
56
56
|
[
|
|
57
57
|
[
|
|
58
58
|
"domainWithoutSuffix",
|
|
59
|
-
"
|
|
59
|
+
"nytimes"
|
|
60
60
|
]
|
|
61
61
|
],
|
|
62
62
|
[
|
|
63
63
|
[
|
|
64
64
|
"domainWithoutSuffix",
|
|
65
|
-
"
|
|
65
|
+
"bbc"
|
|
66
66
|
]
|
|
67
67
|
],
|
|
68
68
|
[
|
|
@@ -80,7 +80,7 @@
|
|
|
80
80
|
[
|
|
81
81
|
[
|
|
82
82
|
"domainWithoutSuffix",
|
|
83
|
-
"
|
|
83
|
+
"theguardian"
|
|
84
84
|
]
|
|
85
85
|
],
|
|
86
86
|
[
|
|
@@ -92,13 +92,13 @@
|
|
|
92
92
|
[
|
|
93
93
|
[
|
|
94
94
|
"domainWithoutSuffix",
|
|
95
|
-
"
|
|
95
|
+
"huffingtonpost"
|
|
96
96
|
]
|
|
97
97
|
],
|
|
98
98
|
[
|
|
99
99
|
[
|
|
100
100
|
"domainWithoutSuffix",
|
|
101
|
-
"
|
|
101
|
+
"pinterest"
|
|
102
102
|
]
|
|
103
103
|
],
|
|
104
104
|
[
|
|
@@ -110,13 +110,13 @@
|
|
|
110
110
|
[
|
|
111
111
|
[
|
|
112
112
|
"domainWithoutSuffix",
|
|
113
|
-
"
|
|
113
|
+
"instagram"
|
|
114
114
|
]
|
|
115
115
|
],
|
|
116
116
|
[
|
|
117
117
|
[
|
|
118
118
|
"domainWithoutSuffix",
|
|
119
|
-
"
|
|
119
|
+
"soundcloud"
|
|
120
120
|
]
|
|
121
121
|
],
|
|
122
122
|
[
|
|
@@ -133,134 +133,134 @@
|
|
|
133
133
|
],
|
|
134
134
|
[
|
|
135
135
|
[
|
|
136
|
-
"
|
|
137
|
-
"
|
|
136
|
+
"domainWithoutSuffix",
|
|
137
|
+
"engadget"
|
|
138
138
|
]
|
|
139
139
|
],
|
|
140
140
|
[
|
|
141
141
|
[
|
|
142
|
-
"
|
|
143
|
-
"
|
|
142
|
+
"domain",
|
|
143
|
+
"abc.net.au"
|
|
144
144
|
]
|
|
145
145
|
],
|
|
146
146
|
[
|
|
147
147
|
[
|
|
148
148
|
"domainWithoutSuffix",
|
|
149
|
-
"
|
|
149
|
+
"eventbrite"
|
|
150
150
|
]
|
|
151
151
|
],
|
|
152
152
|
[
|
|
153
153
|
[
|
|
154
154
|
"domainWithoutSuffix",
|
|
155
|
-
"
|
|
155
|
+
"yelp"
|
|
156
156
|
]
|
|
157
157
|
],
|
|
158
158
|
[
|
|
159
159
|
[
|
|
160
160
|
"domainWithoutSuffix",
|
|
161
|
-
"
|
|
161
|
+
"theverge"
|
|
162
162
|
]
|
|
163
163
|
],
|
|
164
164
|
[
|
|
165
165
|
[
|
|
166
166
|
"domainWithoutSuffix",
|
|
167
|
-
"
|
|
167
|
+
"arxiv"
|
|
168
168
|
]
|
|
169
169
|
],
|
|
170
170
|
[
|
|
171
171
|
[
|
|
172
172
|
"domainWithoutSuffix",
|
|
173
|
-
"
|
|
173
|
+
"imgur"
|
|
174
174
|
]
|
|
175
175
|
],
|
|
176
176
|
[
|
|
177
177
|
[
|
|
178
178
|
"domainWithoutSuffix",
|
|
179
|
-
"
|
|
179
|
+
"reddit"
|
|
180
180
|
]
|
|
181
181
|
],
|
|
182
182
|
[
|
|
183
183
|
[
|
|
184
184
|
"domainWithoutSuffix",
|
|
185
|
-
"
|
|
185
|
+
"stackoverflow"
|
|
186
186
|
]
|
|
187
187
|
],
|
|
188
188
|
[
|
|
189
189
|
[
|
|
190
190
|
"domainWithoutSuffix",
|
|
191
|
-
"
|
|
191
|
+
"flickr"
|
|
192
192
|
]
|
|
193
193
|
],
|
|
194
194
|
[
|
|
195
195
|
[
|
|
196
196
|
"domainWithoutSuffix",
|
|
197
|
-
"
|
|
197
|
+
"sourceforge"
|
|
198
198
|
]
|
|
199
199
|
],
|
|
200
200
|
[
|
|
201
201
|
[
|
|
202
202
|
"domainWithoutSuffix",
|
|
203
|
-
"
|
|
203
|
+
"dribbble"
|
|
204
204
|
]
|
|
205
205
|
],
|
|
206
206
|
[
|
|
207
207
|
[
|
|
208
208
|
"domainWithoutSuffix",
|
|
209
|
-
"
|
|
209
|
+
"csdn"
|
|
210
210
|
]
|
|
211
211
|
],
|
|
212
212
|
[
|
|
213
213
|
[
|
|
214
214
|
"domainWithoutSuffix",
|
|
215
|
-
"
|
|
215
|
+
"deviantart"
|
|
216
216
|
]
|
|
217
217
|
],
|
|
218
218
|
[
|
|
219
219
|
[
|
|
220
220
|
"domainWithoutSuffix",
|
|
221
|
-
"
|
|
221
|
+
"digg"
|
|
222
222
|
]
|
|
223
223
|
],
|
|
224
224
|
[
|
|
225
225
|
[
|
|
226
226
|
"domainWithoutSuffix",
|
|
227
|
-
"
|
|
227
|
+
"etsy"
|
|
228
228
|
]
|
|
229
229
|
],
|
|
230
230
|
[
|
|
231
231
|
[
|
|
232
232
|
"domainWithoutSuffix",
|
|
233
|
-
"
|
|
233
|
+
"ghost"
|
|
234
234
|
]
|
|
235
235
|
],
|
|
236
236
|
[
|
|
237
237
|
[
|
|
238
238
|
"domainWithoutSuffix",
|
|
239
|
-
"
|
|
239
|
+
"giphy"
|
|
240
240
|
]
|
|
241
241
|
],
|
|
242
242
|
[
|
|
243
243
|
[
|
|
244
244
|
"domainWithoutSuffix",
|
|
245
|
-
"
|
|
245
|
+
"gitlab"
|
|
246
246
|
]
|
|
247
247
|
],
|
|
248
248
|
[
|
|
249
249
|
[
|
|
250
250
|
"domainWithoutSuffix",
|
|
251
|
-
"
|
|
251
|
+
"meetup"
|
|
252
252
|
]
|
|
253
253
|
],
|
|
254
254
|
[
|
|
255
255
|
[
|
|
256
256
|
"domainWithoutSuffix",
|
|
257
|
-
"
|
|
257
|
+
"producthunt"
|
|
258
258
|
]
|
|
259
259
|
],
|
|
260
260
|
[
|
|
261
261
|
[
|
|
262
262
|
"domainWithoutSuffix",
|
|
263
|
-
"
|
|
263
|
+
"substack"
|
|
264
264
|
]
|
|
265
265
|
],
|
|
266
266
|
[
|
package/src/util.js
CHANGED
|
@@ -2,12 +2,13 @@
|
|
|
2
2
|
|
|
3
3
|
const NullProtoObj = require('null-prototype-object')
|
|
4
4
|
const { parse } = require('content-type')
|
|
5
|
+
const { EOL } = require('node:os')
|
|
5
6
|
|
|
6
7
|
const CACHE = new NullProtoObj()
|
|
7
8
|
|
|
8
9
|
const parseContentType = contentType =>
|
|
9
10
|
typeof contentType === 'string'
|
|
10
|
-
? parse(contentType)
|
|
11
|
+
? parse(contentType.split(EOL)[0])
|
|
11
12
|
: { type: undefined, parameters: {} }
|
|
12
13
|
|
|
13
14
|
const contentType = headers => {
|