@data-fair/processing-web-scraper 0.2.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/index.js +8 -1
  2. package/package.json +2 -2
package/index.js CHANGED
@@ -210,8 +210,9 @@ exports.run = async ({ pluginConfig, processingConfig, processingId, dir, tmpDir
210
210
  if (page.etag) headers['if-none-match'] = page.etag
211
211
  let response
212
212
  try {
213
- response = await axios.get(page.url, { headers })
213
+ response = await axios.get(page.url, { headers, maxRedirects: 0 })
214
214
  } catch (err) {
215
+ // content did not change
215
216
  if (err.status === 304) {
216
217
  await log.debug(`page was not modified since last exploration ${page.url}`)
217
218
  sentIds.add(page._id)
@@ -220,6 +221,12 @@ exports.run = async ({ pluginConfig, processingConfig, processingId, dir, tmpDir
220
221
  }
221
222
  continue
222
223
  }
224
+ // follow a redirect
225
+ if (err.status === 301) {
226
+ await log.debug(`page redirected ${page.url} -> ${err.headers.location}`)
227
+ pages.push({ url: new URL(err.headers.location, page.url).href, source: 'redirect ' + page.url })
228
+ continue
229
+ }
223
230
  await log.warning(`failed to fetch page ${page.url} - ${err.status || err.message}`)
224
231
  if (page.source) await log.warning(`this broken URL comes from ${page.source}`)
225
232
  continue
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@data-fair/processing-web-scraper",
3
- "version": "0.2.0",
3
+ "version": "0.2.1",
4
4
  "description": "A small Web scraper that publishes its data into data-fair datasets.",
5
5
  "main": "index.js",
6
6
  "scripts": {
@@ -22,7 +22,7 @@
22
22
  },
23
23
  "homepage": "https://github.com/data-fair/processing-web-scraper#readme",
24
24
  "devDependencies": {
25
- "@data-fair/processings-test-utils": "^0.5.0",
25
+ "@data-fair/processings-test-utils": "^0.5.1",
26
26
  "config": "^3.3.6",
27
27
  "eslint": "^7.18.0",
28
28
  "express": "^4.18.2",