@data-fair/processing-web-scraper 0.2.0 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/index.js +8 -1
- package/package.json +2 -2
package/index.js
CHANGED
|
@@ -210,8 +210,9 @@ exports.run = async ({ pluginConfig, processingConfig, processingId, dir, tmpDir
|
|
|
210
210
|
if (page.etag) headers['if-none-match'] = page.etag
|
|
211
211
|
let response
|
|
212
212
|
try {
|
|
213
|
-
response = await axios.get(page.url, { headers })
|
|
213
|
+
response = await axios.get(page.url, { headers, maxRedirects: 0 })
|
|
214
214
|
} catch (err) {
|
|
215
|
+
// content did not change
|
|
215
216
|
if (err.status === 304) {
|
|
216
217
|
await log.debug(`page was not modified since last exploration ${page.url}`)
|
|
217
218
|
sentIds.add(page._id)
|
|
@@ -220,6 +221,12 @@ exports.run = async ({ pluginConfig, processingConfig, processingId, dir, tmpDir
|
|
|
220
221
|
}
|
|
221
222
|
continue
|
|
222
223
|
}
|
|
224
|
+
// follow a redirect
|
|
225
|
+
if (err.status === 301) {
|
|
226
|
+
await log.debug(`page redirected ${page.url} -> ${err.headers.location}`)
|
|
227
|
+
pages.push({ url: new URL(err.headers.location, page.url).href, source: 'redirect ' + page.url })
|
|
228
|
+
continue
|
|
229
|
+
}
|
|
223
230
|
await log.warning(`failed to fetch page ${page.url} - ${err.status || err.message}`)
|
|
224
231
|
if (page.source) await log.warning(`this broken URL comes from ${page.source}`)
|
|
225
232
|
continue
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@data-fair/processing-web-scraper",
|
|
3
|
-
"version": "0.2.
|
|
3
|
+
"version": "0.2.1",
|
|
4
4
|
"description": "A small Web scraper that publishes its data into data-fair datasets.",
|
|
5
5
|
"main": "index.js",
|
|
6
6
|
"scripts": {
|
|
@@ -22,7 +22,7 @@
|
|
|
22
22
|
},
|
|
23
23
|
"homepage": "https://github.com/data-fair/processing-web-scraper#readme",
|
|
24
24
|
"devDependencies": {
|
|
25
|
-
"@data-fair/processings-test-utils": "^0.5.
|
|
25
|
+
"@data-fair/processings-test-utils": "^0.5.1",
|
|
26
26
|
"config": "^3.3.6",
|
|
27
27
|
"eslint": "^7.18.0",
|
|
28
28
|
"express": "^4.18.2",
|