unprint 0.18.31 → 0.18.33
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -0
- package/package.json +2 -1
- package/src/app.js +25 -18
- package/tests/init.js +10 -0
package/README.md
CHANGED
|
@@ -234,6 +234,7 @@ Options
|
|
|
234
234
|
* `browserUserAgent`: The default user agent header for browser-like requests (`get` interface `fetch` and `browserRequest`)
|
|
235
235
|
* `apiUserAgent`: The default user agent header for raw requests (`get` interface `request`)
|
|
236
236
|
* `useBrowser`: Forward the call to `unprint.browser()` (see below), only for GET-requests
|
|
237
|
+
* `fullCookies`: Return cookie metadata in `res.cookies`, instead of just values
|
|
237
238
|
|
|
238
239
|
Use Playwright with Chromium (experimental)
|
|
239
240
|
* `unprint.browser(url, [options])`
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "unprint",
|
|
3
|
-
"version": "0.18.
|
|
3
|
+
"version": "0.18.33",
|
|
4
4
|
"description": "Simplify common web scraping tasks while staying in control of the data.",
|
|
5
5
|
"main": "src/app.js",
|
|
6
6
|
"scripts": {},
|
|
@@ -30,6 +30,7 @@
|
|
|
30
30
|
"moment-timezone": "^0.5.34",
|
|
31
31
|
"object-hash": "^3.0.0",
|
|
32
32
|
"patchright": "^1.56.1",
|
|
33
|
+
"set-cookie-parser": "^3.0.1",
|
|
33
34
|
"srcset": "^4.0.0",
|
|
34
35
|
"tunnel": "^0.0.6",
|
|
35
36
|
"undici": "^7.18.2"
|
package/src/app.js
CHANGED
|
@@ -6,6 +6,7 @@ const EventEmitter = require('events');
|
|
|
6
6
|
const undici = require('undici');
|
|
7
7
|
const qs = require('node:querystring');
|
|
8
8
|
const cookie = require('cookie');
|
|
9
|
+
const { parseSetCookie } = require('set-cookie-parser');
|
|
9
10
|
const Bottleneck = require('bottleneck');
|
|
10
11
|
const moment = require('moment-timezone');
|
|
11
12
|
const merge = require('deepmerge');
|
|
@@ -1122,15 +1123,21 @@ function curateHeaders(headers, options) {
|
|
|
1122
1123
|
return headers;
|
|
1123
1124
|
}
|
|
1124
1125
|
|
|
1125
|
-
function curateCookies(
|
|
1126
|
-
if (
|
|
1127
|
-
const setCookie = typeof headers.get === 'function'
|
|
1128
|
-
? headers.get('set-cookie')
|
|
1129
|
-
: headers['set-cookie'];
|
|
1126
|
+
function curateCookies(res, options) {
|
|
1127
|
+
if (res) {
|
|
1128
|
+
const setCookie = typeof res.headers.get === 'function'
|
|
1129
|
+
? res.headers.get('set-cookie')
|
|
1130
|
+
: res.headers['set-cookie'];
|
|
1130
1131
|
|
|
1131
1132
|
if (setCookie) {
|
|
1132
1133
|
try {
|
|
1133
|
-
|
|
1134
|
+
const cookies = parseSetCookie(res, { map: true, decode: false });
|
|
1135
|
+
|
|
1136
|
+
if (options.fullCookies) {
|
|
1137
|
+
return cookies;
|
|
1138
|
+
}
|
|
1139
|
+
|
|
1140
|
+
return Object.fromEntries(Object.entries(cookies).map(([_key, value]) => [value.name, value.value]));
|
|
1134
1141
|
} catch (_error) {
|
|
1135
1142
|
// invalid cookie
|
|
1136
1143
|
}
|
|
@@ -1148,7 +1155,7 @@ function curateResponse(res, data, options, { url, control, customOptions }) {
|
|
|
1148
1155
|
status: res.statusCode || res.status,
|
|
1149
1156
|
statusText: res.statusText,
|
|
1150
1157
|
headers: res.headers,
|
|
1151
|
-
cookies: curateCookies(res
|
|
1158
|
+
cookies: curateCookies(res, customOptions),
|
|
1152
1159
|
response: res,
|
|
1153
1160
|
res,
|
|
1154
1161
|
control,
|
|
@@ -1401,15 +1408,15 @@ async function browserRequest(url, customOptions = {}) {
|
|
|
1401
1408
|
|
|
1402
1409
|
await closeBrowser(client, options);
|
|
1403
1410
|
|
|
1404
|
-
return {
|
|
1411
|
+
return curateResponse({
|
|
1405
1412
|
ok: false,
|
|
1406
1413
|
status,
|
|
1407
1414
|
statusText,
|
|
1408
1415
|
headers,
|
|
1409
|
-
cookies: curateCookies(
|
|
1416
|
+
cookies: curateCookies(res, customOptions),
|
|
1410
1417
|
response: res,
|
|
1411
1418
|
res,
|
|
1412
|
-
};
|
|
1419
|
+
}, data, options, { url, customOptions });
|
|
1413
1420
|
}
|
|
1414
1421
|
|
|
1415
1422
|
events.emit('requestSuccess', feedbackBase);
|
|
@@ -1437,7 +1444,7 @@ async function browserRequest(url, customOptions = {}) {
|
|
|
1437
1444
|
status,
|
|
1438
1445
|
statusText,
|
|
1439
1446
|
headers,
|
|
1440
|
-
cookies: curateCookies(
|
|
1447
|
+
cookies: curateCookies(res, customOptions),
|
|
1441
1448
|
response: res,
|
|
1442
1449
|
res,
|
|
1443
1450
|
};
|
|
@@ -1565,6 +1572,10 @@ async function request(url, body, customOptions = {}, method = 'GET', redirects
|
|
|
1565
1572
|
return request(newUrl, body, options, method, redirects + 1);
|
|
1566
1573
|
}
|
|
1567
1574
|
|
|
1575
|
+
const data = options.interface === 'fetch'
|
|
1576
|
+
? await res.text()
|
|
1577
|
+
: await res.body.text();
|
|
1578
|
+
|
|
1568
1579
|
if (!(status >= 200 && status < 300)) {
|
|
1569
1580
|
handleError(new Error(`HTTP response from ${url} not OK (${status} ${res.statusText})`), 'HTTP_NOT_OK');
|
|
1570
1581
|
|
|
@@ -1574,21 +1585,17 @@ async function request(url, body, customOptions = {}, method = 'GET', redirects
|
|
|
1574
1585
|
statusText: res.statusText,
|
|
1575
1586
|
});
|
|
1576
1587
|
|
|
1577
|
-
return {
|
|
1588
|
+
return curateResponse({
|
|
1578
1589
|
ok: false,
|
|
1579
1590
|
status,
|
|
1580
1591
|
statusText: res.statusText,
|
|
1581
1592
|
headers: res.headers,
|
|
1582
|
-
cookies: curateCookies(res
|
|
1593
|
+
cookies: curateCookies(res, customOptions),
|
|
1583
1594
|
response: res,
|
|
1584
1595
|
res,
|
|
1585
|
-
};
|
|
1596
|
+
}, data, options, { url, customOptions });
|
|
1586
1597
|
}
|
|
1587
1598
|
|
|
1588
|
-
const data = options.interface === 'fetch'
|
|
1589
|
-
? await res.text()
|
|
1590
|
-
: await res.body.text();
|
|
1591
|
-
|
|
1592
1599
|
events.emit('requestSuccess', {
|
|
1593
1600
|
...feedbackBase,
|
|
1594
1601
|
status,
|
package/tests/init.js
CHANGED
|
@@ -51,10 +51,13 @@ async function initTest() {
|
|
|
51
51
|
useProxy: false,
|
|
52
52
|
});
|
|
53
53
|
|
|
54
|
+
const setCookiesRes = await unprint.get(`http://127.0.0.1:${port}/cookies`);
|
|
55
|
+
|
|
54
56
|
console.log('JSON RES', jsonRes);
|
|
55
57
|
console.log('ERROR RES', errorRes);
|
|
56
58
|
console.log('COOKIES RES', cookiesRes);
|
|
57
59
|
console.log('PROXY RES', proxyRes.data);
|
|
60
|
+
console.log('SET COOKIES RES', setCookiesRes.cookies);
|
|
58
61
|
|
|
59
62
|
console.log('title', res.context.query.content('//*[contains(text(), "Test")]'));
|
|
60
63
|
console.log('date', res.context.query.date('#date', 'DD-MM-YYYY HH:mm'));
|
|
@@ -123,6 +126,13 @@ async function initServer() {
|
|
|
123
126
|
res.send(data);
|
|
124
127
|
});
|
|
125
128
|
|
|
129
|
+
app.get('/cookies', (req, res) => {
|
|
130
|
+
res.cookie('hello', 'world', { httpOnly: true });
|
|
131
|
+
res.cookie('foo', 'bar');
|
|
132
|
+
|
|
133
|
+
res.send(data);
|
|
134
|
+
});
|
|
135
|
+
|
|
126
136
|
app.get('/error/:code', (req, res) => {
|
|
127
137
|
res.status(Number(req.params.code)).send();
|
|
128
138
|
});
|