unprint 0.18.1 → 0.18.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +5 -1
- package/package.json +1 -1
- package/src/app.js +21 -13
- package/tests/init.js +10 -2
package/README.md
CHANGED
|
@@ -211,9 +211,13 @@ Extracts the CSS `url()` background from a style attribute. Alias for `query.sty
|
|
|
211
211
|
Options
|
|
212
212
|
* `select`: Pre-query and initialize a specific element on the page.
|
|
213
213
|
* `selectAll`: Pre-query and initialize multiple specific element on the page.
|
|
214
|
+
* `interface`: Use undici `fetch` (browser-like, default) or `request` (raw)
|
|
215
|
+
* `userAgent`: The default user agent header
|
|
216
|
+
* `browserUserAgent`: The default user agent header for browser-like requests (`get` interface `fetch` and `browserRequest`)
|
|
217
|
+
* `apiUserAgent`: The default user agent header for raw requests (`get` interface `request`)
|
|
214
218
|
|
|
215
219
|
Use Playwright with Chromium (experimental)
|
|
216
|
-
* `unprint.browserRequest(url, [options])`
|
|
220
|
+
* `unprint.browserRequest(url, [options])` or `unprint.browser(url, [options])`
|
|
217
221
|
* `unprint.closeAllBrowsers()`: Close reused browser instances.
|
|
218
222
|
|
|
219
223
|
Additional options
|
package/package.json
CHANGED
package/src/app.js
CHANGED
|
@@ -16,6 +16,7 @@ const settings = {
|
|
|
16
16
|
throwErrors: false,
|
|
17
17
|
logErrors: true,
|
|
18
18
|
requestTimeout: 30000,
|
|
19
|
+
userAgent: 'unprint',
|
|
19
20
|
limits: {
|
|
20
21
|
default: {
|
|
21
22
|
interval: 10,
|
|
@@ -1056,9 +1057,11 @@ function getCookie(options) {
|
|
|
1056
1057
|
return headerCookieData;
|
|
1057
1058
|
}
|
|
1058
1059
|
|
|
1059
|
-
function
|
|
1060
|
+
function curateHeaders(headers, options) {
|
|
1060
1061
|
if (headers && options.defaultHeaders !== false) {
|
|
1061
|
-
return Object.fromEntries(Object.entries(headers)
|
|
1062
|
+
return Object.fromEntries(Object.entries(headers)
|
|
1063
|
+
.map(([key, value]) => [key.toLowerCase(), value])
|
|
1064
|
+
.filter(([_key, value]) => value !== null));
|
|
1062
1065
|
}
|
|
1063
1066
|
|
|
1064
1067
|
return headers;
|
|
@@ -1246,8 +1249,9 @@ async function browserRequest(url, customOptions = {}) {
|
|
|
1246
1249
|
const headers = route.request().headers();
|
|
1247
1250
|
|
|
1248
1251
|
route.continue({
|
|
1249
|
-
headers:
|
|
1252
|
+
headers: curateHeaders({
|
|
1250
1253
|
...headers,
|
|
1254
|
+
'user-agent': options.browserUserAgent || options.userAgent,
|
|
1251
1255
|
...options.headers,
|
|
1252
1256
|
cookie: getCookie(options),
|
|
1253
1257
|
}, options),
|
|
@@ -1377,6 +1381,7 @@ function curateRequestBody(body) {
|
|
|
1377
1381
|
|
|
1378
1382
|
async function request(url, body, customOptions = {}, method = 'GET') {
|
|
1379
1383
|
const options = merge.all([{
|
|
1384
|
+
interface: 'fetch', // fetch or request
|
|
1380
1385
|
timeout: 10000,
|
|
1381
1386
|
extract: true,
|
|
1382
1387
|
url,
|
|
@@ -1401,13 +1406,14 @@ async function request(url, body, customOptions = {}, method = 'GET') {
|
|
|
1401
1406
|
const curatedBody = curateRequestBody(body);
|
|
1402
1407
|
const curatedCookie = getCookie(options);
|
|
1403
1408
|
|
|
1404
|
-
const headers =
|
|
1409
|
+
const headers = curateHeaders({
|
|
1405
1410
|
...curatedBody.headers,
|
|
1411
|
+
'user-agent': (options.interface === 'fetch' ? options.browserUserAgent : options.apiUserAgent) || options.userAgent,
|
|
1406
1412
|
...options.headers,
|
|
1407
1413
|
cookie: curatedCookie,
|
|
1408
1414
|
}, options);
|
|
1409
1415
|
|
|
1410
|
-
const res = await limiter.schedule(async () => undici.
|
|
1416
|
+
const res = await limiter.schedule(async () => undici[options.interface](url, {
|
|
1411
1417
|
dispatcher: agent,
|
|
1412
1418
|
method,
|
|
1413
1419
|
body: curatedBody.body,
|
|
@@ -1419,20 +1425,24 @@ async function request(url, body, customOptions = {}, method = 'GET') {
|
|
|
1419
1425
|
async text() { return error.cause?.cause?.message || 'Request aborted'; },
|
|
1420
1426
|
}));
|
|
1421
1427
|
|
|
1422
|
-
|
|
1423
|
-
|
|
1428
|
+
const data = options.interface === 'fetch'
|
|
1429
|
+
? await res.text()
|
|
1430
|
+
: await res.body.text();
|
|
1424
1431
|
|
|
1425
|
-
|
|
1432
|
+
const status = res.statusCode || res.status;
|
|
1433
|
+
|
|
1434
|
+
if (!(status >= 200 && status < 300)) {
|
|
1435
|
+
handleError(new Error(`HTTP response from ${url} not OK (${status} ${res.statusText}): ${data}`), 'HTTP_NOT_OK');
|
|
1426
1436
|
|
|
1427
1437
|
events.emit('requestError', {
|
|
1428
1438
|
...feedbackBase,
|
|
1429
|
-
status
|
|
1439
|
+
status,
|
|
1430
1440
|
statusText: res.statusText,
|
|
1431
1441
|
});
|
|
1432
1442
|
|
|
1433
1443
|
return {
|
|
1434
1444
|
ok: false,
|
|
1435
|
-
status
|
|
1445
|
+
status,
|
|
1436
1446
|
statusText: res.statusText,
|
|
1437
1447
|
headers: res.headers,
|
|
1438
1448
|
response: res,
|
|
@@ -1442,12 +1452,10 @@ async function request(url, body, customOptions = {}, method = 'GET') {
|
|
|
1442
1452
|
|
|
1443
1453
|
events.emit('requestSuccess', {
|
|
1444
1454
|
...feedbackBase,
|
|
1445
|
-
status
|
|
1455
|
+
status,
|
|
1446
1456
|
statusText: res.statusText,
|
|
1447
1457
|
});
|
|
1448
1458
|
|
|
1449
|
-
const data = await res.text();
|
|
1450
|
-
|
|
1451
1459
|
return curateResponse(res, data, options, { url, customOptions });
|
|
1452
1460
|
}
|
|
1453
1461
|
|
package/tests/init.js
CHANGED
|
@@ -11,7 +11,9 @@ const port = process.env.PORT || 3101;
|
|
|
11
11
|
|
|
12
12
|
async function initTest() {
|
|
13
13
|
unprint.options({
|
|
14
|
-
|
|
14
|
+
userAgent: 'unprint',
|
|
15
|
+
apiUserAgent: 'unprint',
|
|
16
|
+
browserUserAgent: 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/106.0.0.0 Safari/537.36',
|
|
15
17
|
limits: {
|
|
16
18
|
default: {
|
|
17
19
|
concurrency: 1,
|
|
@@ -30,7 +32,7 @@ async function initTest() {
|
|
|
30
32
|
unprint.on('requestSuccess', (successData) => console.log('success', successData));
|
|
31
33
|
// unprint.on('query', (queryData) => console.log('query', queryData));
|
|
32
34
|
|
|
33
|
-
const res = await unprint.get(`http://127.0.0.1:${port}/html`, { select: 'body' });
|
|
35
|
+
const res = await unprint.get(`http://127.0.0.1:${port}/html`, { select: 'body', interface: 'request' });
|
|
34
36
|
|
|
35
37
|
const jsonRes = await unprint.get(`http://127.0.0.1:${port}/json`);
|
|
36
38
|
const errorRes = await unprint.get(`http://127.0.0.1:${port}/error/404`);
|
|
@@ -44,9 +46,15 @@ async function initTest() {
|
|
|
44
46
|
},
|
|
45
47
|
});
|
|
46
48
|
|
|
49
|
+
const proxyRes = await unprint.get('https://api.ipify.org?format=json', {
|
|
50
|
+
interface: 'request',
|
|
51
|
+
useProxy: true,
|
|
52
|
+
});
|
|
53
|
+
|
|
47
54
|
console.log('JSON RES', jsonRes);
|
|
48
55
|
console.log('ERROR RES', errorRes);
|
|
49
56
|
console.log('COOKIES RES', cookiesRes);
|
|
57
|
+
console.log('PROXY RES', proxyRes.data);
|
|
50
58
|
|
|
51
59
|
console.log('title', res.context.query.content('//*[contains(text(), "Test")]'));
|
|
52
60
|
console.log('date', res.context.query.date('#date', 'DD-MM-YYYY HH:mm'));
|