unprint 0.18.31 → 0.18.33

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -234,6 +234,7 @@ Options
234
234
  * `browserUserAgent`: The default user agent header for browser-like requests (`get` interface `fetch` and `browserRequest`)
235
235
  * `apiUserAgent`: The default user agent header for raw requests (`get` interface `request`)
236
236
  * `useBrowser`: Forward the call to `unprint.browser()` (see below), only for GET-requests
237
+ * `fullCookies`: Return cookie metadata in `res.cookies`, instead of just values
237
238
 
238
239
  Use Playwright with Chromium (experimental)
239
240
  * `unprint.browser(url, [options])`
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "unprint",
3
- "version": "0.18.31",
3
+ "version": "0.18.33",
4
4
  "description": "Simplify common web scraping tasks while staying in control of the data.",
5
5
  "main": "src/app.js",
6
6
  "scripts": {},
@@ -30,6 +30,7 @@
30
30
  "moment-timezone": "^0.5.34",
31
31
  "object-hash": "^3.0.0",
32
32
  "patchright": "^1.56.1",
33
+ "set-cookie-parser": "^3.0.1",
33
34
  "srcset": "^4.0.0",
34
35
  "tunnel": "^0.0.6",
35
36
  "undici": "^7.18.2"
package/src/app.js CHANGED
@@ -6,6 +6,7 @@ const EventEmitter = require('events');
6
6
  const undici = require('undici');
7
7
  const qs = require('node:querystring');
8
8
  const cookie = require('cookie');
9
+ const { parseSetCookie } = require('set-cookie-parser');
9
10
  const Bottleneck = require('bottleneck');
10
11
  const moment = require('moment-timezone');
11
12
  const merge = require('deepmerge');
@@ -1122,15 +1123,21 @@ function curateHeaders(headers, options) {
1122
1123
  return headers;
1123
1124
  }
1124
1125
 
1125
- function curateCookies(headers) {
1126
- if (headers) {
1127
- const setCookie = typeof headers.get === 'function'
1128
- ? headers.get('set-cookie')
1129
- : headers['set-cookie'];
1126
+ function curateCookies(res, options) {
1127
+ if (res) {
1128
+ const setCookie = typeof res.headers.get === 'function'
1129
+ ? res.headers.get('set-cookie')
1130
+ : res.headers['set-cookie'];
1130
1131
 
1131
1132
  if (setCookie) {
1132
1133
  try {
1133
- return cookie.parseCookie(setCookie);
1134
+ const cookies = parseSetCookie(res, { map: true, decode: false });
1135
+
1136
+ if (options.fullCookies) {
1137
+ return cookies;
1138
+ }
1139
+
1140
+ return Object.fromEntries(Object.entries(cookies).map(([_key, value]) => [value.name, value.value]));
1134
1141
  } catch (_error) {
1135
1142
  // invalid cookie
1136
1143
  }
@@ -1148,7 +1155,7 @@ function curateResponse(res, data, options, { url, control, customOptions }) {
1148
1155
  status: res.statusCode || res.status,
1149
1156
  statusText: res.statusText,
1150
1157
  headers: res.headers,
1151
- cookies: curateCookies(res.headers),
1158
+ cookies: curateCookies(res, customOptions),
1152
1159
  response: res,
1153
1160
  res,
1154
1161
  control,
@@ -1401,15 +1408,15 @@ async function browserRequest(url, customOptions = {}) {
1401
1408
 
1402
1409
  await closeBrowser(client, options);
1403
1410
 
1404
- return {
1411
+ return curateResponse({
1405
1412
  ok: false,
1406
1413
  status,
1407
1414
  statusText,
1408
1415
  headers,
1409
- cookies: curateCookies(headers),
1416
+ cookies: curateCookies(res, customOptions),
1410
1417
  response: res,
1411
1418
  res,
1412
- };
1419
+ }, data, options, { url, customOptions });
1413
1420
  }
1414
1421
 
1415
1422
  events.emit('requestSuccess', feedbackBase);
@@ -1437,7 +1444,7 @@ async function browserRequest(url, customOptions = {}) {
1437
1444
  status,
1438
1445
  statusText,
1439
1446
  headers,
1440
- cookies: curateCookies(headers),
1447
+ cookies: curateCookies(res, customOptions),
1441
1448
  response: res,
1442
1449
  res,
1443
1450
  };
@@ -1565,6 +1572,10 @@ async function request(url, body, customOptions = {}, method = 'GET', redirects
1565
1572
  return request(newUrl, body, options, method, redirects + 1);
1566
1573
  }
1567
1574
 
1575
+ const data = options.interface === 'fetch'
1576
+ ? await res.text()
1577
+ : await res.body.text();
1578
+
1568
1579
  if (!(status >= 200 && status < 300)) {
1569
1580
  handleError(new Error(`HTTP response from ${url} not OK (${status} ${res.statusText})`), 'HTTP_NOT_OK');
1570
1581
 
@@ -1574,21 +1585,17 @@ async function request(url, body, customOptions = {}, method = 'GET', redirects
1574
1585
  statusText: res.statusText,
1575
1586
  });
1576
1587
 
1577
- return {
1588
+ return curateResponse({
1578
1589
  ok: false,
1579
1590
  status,
1580
1591
  statusText: res.statusText,
1581
1592
  headers: res.headers,
1582
- cookies: curateCookies(res.headers),
1593
+ cookies: curateCookies(res, customOptions),
1583
1594
  response: res,
1584
1595
  res,
1585
- };
1596
+ }, data, options, { url, customOptions });
1586
1597
  }
1587
1598
 
1588
- const data = options.interface === 'fetch'
1589
- ? await res.text()
1590
- : await res.body.text();
1591
-
1592
1599
  events.emit('requestSuccess', {
1593
1600
  ...feedbackBase,
1594
1601
  status,
package/tests/init.js CHANGED
@@ -51,10 +51,13 @@ async function initTest() {
51
51
  useProxy: false,
52
52
  });
53
53
 
54
+ const setCookiesRes = await unprint.get(`http://127.0.0.1:${port}/cookies`);
55
+
54
56
  console.log('JSON RES', jsonRes);
55
57
  console.log('ERROR RES', errorRes);
56
58
  console.log('COOKIES RES', cookiesRes);
57
59
  console.log('PROXY RES', proxyRes.data);
60
+ console.log('SET COOKIES RES', setCookiesRes.cookies);
58
61
 
59
62
  console.log('title', res.context.query.content('//*[contains(text(), "Test")]'));
60
63
  console.log('date', res.context.query.date('#date', 'DD-MM-YYYY HH:mm'));
@@ -123,6 +126,13 @@ async function initServer() {
123
126
  res.send(data);
124
127
  });
125
128
 
129
+ app.get('/cookies', (req, res) => {
130
+ res.cookie('hello', 'world', { httpOnly: true });
131
+ res.cookie('foo', 'bar');
132
+
133
+ res.send(data);
134
+ });
135
+
126
136
  app.get('/error/:code', (req, res) => {
127
137
  res.status(Number(req.params.code)).send();
128
138
  });