unprint 0.18.3 → 0.18.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/package.json +1 -1
  2. package/src/app.js +32 -4
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "unprint",
3
- "version": "0.18.3",
3
+ "version": "0.18.5",
4
4
  "description": "Simplify common web scraping tasks while staying in control of the data.",
5
5
  "main": "src/app.js",
6
6
  "scripts": {},
package/src/app.js CHANGED
@@ -1154,7 +1154,7 @@ async function getBrowserInstance(scope, options, useProxy = false) {
1154
1154
  });
1155
1155
 
1156
1156
  const contextLauncher = browserLauncher.then((browser) => browser.newContext({
1157
- userAgent: 'unprint',
1157
+ userAgent: options.browserUserAgent || options.userAgent,
1158
1158
  ...options.context,
1159
1159
  ...(useProxy && {
1160
1160
  proxy: {
@@ -1206,11 +1206,25 @@ function getAgent(options, url) {
1206
1206
  ) {
1207
1207
  return new undici.ProxyAgent(`http://${options.proxy.host}:${options.proxy.port}/`, {
1208
1208
  bodyTimeout: options.timeout,
1209
+ interceptors: { // only applies to fetch
1210
+ Agent: [
1211
+ undici.interceptors.redirect({
1212
+ maxRedirection: options.followRedirects ? options.maxRedirects : 0,
1213
+ }),
1214
+ ],
1215
+ },
1209
1216
  });
1210
1217
  }
1211
1218
 
1212
1219
  return new undici.Agent({
1213
1220
  bodyTimeout: options.timeout,
1221
+ interceptors: { // only applies to fetch
1222
+ Agent: [
1223
+ undici.interceptors.redirect({
1224
+ maxRedirection: options.followRedirects ? options.maxRedirects : 0,
1225
+ }),
1226
+ ],
1227
+ },
1214
1228
  });
1215
1229
  }
1216
1230
 
@@ -1379,11 +1393,13 @@ function curateRequestBody(body) {
1379
1393
  return { body };
1380
1394
  }
1381
1395
 
1382
- async function request(url, body, customOptions = {}, method = 'GET') {
1396
+ async function request(url, body, customOptions = {}, method = 'GET', redirects = 0) {
1383
1397
  const options = merge.all([{
1384
1398
  interface: 'fetch', // fetch or request
1385
1399
  timeout: 10000,
1386
1400
  extract: true,
1401
+ followRedirects: true,
1402
+ maxRedirects: 3,
1387
1403
  url,
1388
1404
  }, globalOptions, customOptions]);
1389
1405
 
@@ -1425,12 +1441,24 @@ async function request(url, body, customOptions = {}, method = 'GET') {
1425
1441
  async text() { return error.cause?.cause?.message || 'Request aborted'; },
1426
1442
  }));
1427
1443
 
1444
+ const status = res.statusCode || res.status;
1445
+
1446
+ // fetch handles redirects internally, configured by agent/dispatcher, don't follow again
1447
+ if (options.interface === 'request'
1448
+ && [301, 302, 303, 307, 308].includes(status)
1449
+ && res.headers.location
1450
+ && options.followRedirects
1451
+ && redirects < options.maxRedirects
1452
+ ) {
1453
+ const newUrl = new URL(res.headers.location, url).toString();
1454
+
1455
+ return request(newUrl, body, options, method, redirects + 1);
1456
+ }
1457
+
1428
1458
  const data = options.interface === 'fetch'
1429
1459
  ? await res.text()
1430
1460
  : await res.body.text();
1431
1461
 
1432
- const status = res.statusCode || res.status;
1433
-
1434
1462
  if (!(status >= 200 && status < 300)) {
1435
1463
  handleError(new Error(`HTTP response from ${url} not OK (${status} ${res.statusText}): ${data}`), 'HTTP_NOT_OK');
1436
1464