unprint 0.18.31 → 0.18.32
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -0
- package/package.json +2 -1
- package/src/app.js +17 -10
- package/tests/init.js +10 -0
package/README.md
CHANGED
|
@@ -234,6 +234,7 @@ Options
|
|
|
234
234
|
* `browserUserAgent`: The default user agent header for browser-like requests (`get` interface `fetch` and `browserRequest`)
|
|
235
235
|
* `apiUserAgent`: The default user agent header for raw requests (`get` interface `request`)
|
|
236
236
|
* `useBrowser`: Forward the call to `unprint.browser()` (see below), only for GET-requests
|
|
237
|
+
* `fullCookies`: Return cookie metadata in `res.cookies`, instead of just values
|
|
237
238
|
|
|
238
239
|
Use Playwright with Chromium (experimental)
|
|
239
240
|
* `unprint.browser(url, [options])`
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "unprint",
|
|
3
|
-
"version": "0.18.
|
|
3
|
+
"version": "0.18.32",
|
|
4
4
|
"description": "Simplify common web scraping tasks while staying in control of the data.",
|
|
5
5
|
"main": "src/app.js",
|
|
6
6
|
"scripts": {},
|
|
@@ -30,6 +30,7 @@
|
|
|
30
30
|
"moment-timezone": "^0.5.34",
|
|
31
31
|
"object-hash": "^3.0.0",
|
|
32
32
|
"patchright": "^1.56.1",
|
|
33
|
+
"set-cookie-parser": "^3.0.1",
|
|
33
34
|
"srcset": "^4.0.0",
|
|
34
35
|
"tunnel": "^0.0.6",
|
|
35
36
|
"undici": "^7.18.2"
|
package/src/app.js
CHANGED
|
@@ -6,6 +6,7 @@ const EventEmitter = require('events');
|
|
|
6
6
|
const undici = require('undici');
|
|
7
7
|
const qs = require('node:querystring');
|
|
8
8
|
const cookie = require('cookie');
|
|
9
|
+
const { parseSetCookie } = require('set-cookie-parser');
|
|
9
10
|
const Bottleneck = require('bottleneck');
|
|
10
11
|
const moment = require('moment-timezone');
|
|
11
12
|
const merge = require('deepmerge');
|
|
@@ -1122,15 +1123,21 @@ function curateHeaders(headers, options) {
|
|
|
1122
1123
|
return headers;
|
|
1123
1124
|
}
|
|
1124
1125
|
|
|
1125
|
-
function curateCookies(
|
|
1126
|
-
if (
|
|
1127
|
-
const setCookie = typeof headers.get === 'function'
|
|
1128
|
-
? headers.get('set-cookie')
|
|
1129
|
-
: headers['set-cookie'];
|
|
1126
|
+
function curateCookies(res, options) {
|
|
1127
|
+
if (res) {
|
|
1128
|
+
const setCookie = typeof res.headers.get === 'function'
|
|
1129
|
+
? res.headers.get('set-cookie')
|
|
1130
|
+
: res.headers['set-cookie'];
|
|
1130
1131
|
|
|
1131
1132
|
if (setCookie) {
|
|
1132
1133
|
try {
|
|
1133
|
-
|
|
1134
|
+
const cookies = parseSetCookie(res, { map: true, decode: false });
|
|
1135
|
+
|
|
1136
|
+
if (options.fullCookies) {
|
|
1137
|
+
return cookies;
|
|
1138
|
+
}
|
|
1139
|
+
|
|
1140
|
+
return Object.fromEntries(Object.entries(cookies).map(([_key, value]) => [value.name, value.value]));
|
|
1134
1141
|
} catch (_error) {
|
|
1135
1142
|
// invalid cookie
|
|
1136
1143
|
}
|
|
@@ -1148,7 +1155,7 @@ function curateResponse(res, data, options, { url, control, customOptions }) {
|
|
|
1148
1155
|
status: res.statusCode || res.status,
|
|
1149
1156
|
statusText: res.statusText,
|
|
1150
1157
|
headers: res.headers,
|
|
1151
|
-
cookies: curateCookies(res
|
|
1158
|
+
cookies: curateCookies(res, customOptions),
|
|
1152
1159
|
response: res,
|
|
1153
1160
|
res,
|
|
1154
1161
|
control,
|
|
@@ -1406,7 +1413,7 @@ async function browserRequest(url, customOptions = {}) {
|
|
|
1406
1413
|
status,
|
|
1407
1414
|
statusText,
|
|
1408
1415
|
headers,
|
|
1409
|
-
cookies: curateCookies(
|
|
1416
|
+
cookies: curateCookies(res, customOptions),
|
|
1410
1417
|
response: res,
|
|
1411
1418
|
res,
|
|
1412
1419
|
};
|
|
@@ -1437,7 +1444,7 @@ async function browserRequest(url, customOptions = {}) {
|
|
|
1437
1444
|
status,
|
|
1438
1445
|
statusText,
|
|
1439
1446
|
headers,
|
|
1440
|
-
cookies: curateCookies(
|
|
1447
|
+
cookies: curateCookies(res, customOptions),
|
|
1441
1448
|
response: res,
|
|
1442
1449
|
res,
|
|
1443
1450
|
};
|
|
@@ -1579,7 +1586,7 @@ async function request(url, body, customOptions = {}, method = 'GET', redirects
|
|
|
1579
1586
|
status,
|
|
1580
1587
|
statusText: res.statusText,
|
|
1581
1588
|
headers: res.headers,
|
|
1582
|
-
cookies: curateCookies(res
|
|
1589
|
+
cookies: curateCookies(res, customOptions),
|
|
1583
1590
|
response: res,
|
|
1584
1591
|
res,
|
|
1585
1592
|
};
|
package/tests/init.js
CHANGED
|
@@ -51,10 +51,13 @@ async function initTest() {
|
|
|
51
51
|
useProxy: false,
|
|
52
52
|
});
|
|
53
53
|
|
|
54
|
+
const setCookiesRes = await unprint.get(`http://127.0.0.1:${port}/cookies`);
|
|
55
|
+
|
|
54
56
|
console.log('JSON RES', jsonRes);
|
|
55
57
|
console.log('ERROR RES', errorRes);
|
|
56
58
|
console.log('COOKIES RES', cookiesRes);
|
|
57
59
|
console.log('PROXY RES', proxyRes.data);
|
|
60
|
+
console.log('SET COOKIES RES', setCookiesRes.cookies);
|
|
58
61
|
|
|
59
62
|
console.log('title', res.context.query.content('//*[contains(text(), "Test")]'));
|
|
60
63
|
console.log('date', res.context.query.date('#date', 'DD-MM-YYYY HH:mm'));
|
|
@@ -123,6 +126,13 @@ async function initServer() {
|
|
|
123
126
|
res.send(data);
|
|
124
127
|
});
|
|
125
128
|
|
|
129
|
+
app.get('/cookies', (req, res) => {
|
|
130
|
+
res.cookie('hello', 'world', { httpOnly: true });
|
|
131
|
+
res.cookie('foo', 'bar');
|
|
132
|
+
|
|
133
|
+
res.send(data);
|
|
134
|
+
});
|
|
135
|
+
|
|
126
136
|
app.get('/error/:code', (req, res) => {
|
|
127
137
|
res.status(Number(req.params.code)).send();
|
|
128
138
|
});
|