unprint 0.18.1 → 0.18.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -211,9 +211,10 @@ Extracts the CSS `url()` background from a style attribute. Alias for `query.sty
211
211
  Options
212
212
  * `select`: Pre-query and initialize a specific element on the page.
213
213
  * `selectAll`: Pre-query and initialize multiple specific element on the page.
214
+ * `interface`: Use undici `fetch` (browser-like, default) or `request` (raw)
214
215
 
215
216
  Use Playwright with Chromium (experimental)
216
- * `unprint.browserRequest(url, [options])`
217
+ * `unprint.browserRequest(url, [options])` or `unprint.browser(url, [options])`
217
218
  * `unprint.closeAllBrowsers()`: Close reused browser instances.
218
219
 
219
220
  Additional options
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "unprint",
3
- "version": "0.18.1",
3
+ "version": "0.18.2",
4
4
  "description": "Simplify common web scraping tasks while staying in control of the data.",
5
5
  "main": "src/app.js",
6
6
  "scripts": {},
package/src/app.js CHANGED
@@ -1377,6 +1377,7 @@ function curateRequestBody(body) {
1377
1377
 
1378
1378
  async function request(url, body, customOptions = {}, method = 'GET') {
1379
1379
  const options = merge.all([{
1380
+ interface: 'fetch', // fetch or request
1380
1381
  timeout: 10000,
1381
1382
  extract: true,
1382
1383
  url,
@@ -1407,7 +1408,7 @@ async function request(url, body, customOptions = {}, method = 'GET') {
1407
1408
  cookie: curatedCookie,
1408
1409
  }, options);
1409
1410
 
1410
- const res = await limiter.schedule(async () => undici.fetch(url, {
1411
+ const res = await limiter.schedule(async () => undici[options.interface](url, {
1411
1412
  dispatcher: agent,
1412
1413
  method,
1413
1414
  body: curatedBody.body,
@@ -1419,20 +1420,24 @@ async function request(url, body, customOptions = {}, method = 'GET') {
1419
1420
  async text() { return error.cause?.cause?.message || 'Request aborted'; },
1420
1421
  }));
1421
1422
 
1422
- if (!(res.status >= 200 && res.status < 300)) {
1423
- const data = await res.text();
1423
+ const data = options.interface === 'fetch'
1424
+ ? await res.text()
1425
+ : await res.body.text();
1424
1426
 
1425
- handleError(new Error(`HTTP response from ${url} not OK (${res.status} ${res.statusText}): ${data}`), 'HTTP_NOT_OK');
1427
+ const status = res.statusCode || res.status;
1428
+
1429
+ if (!(status >= 200 && status < 300)) {
1430
+ handleError(new Error(`HTTP response from ${url} not OK (${status} ${res.statusText}): ${data}`), 'HTTP_NOT_OK');
1426
1431
 
1427
1432
  events.emit('requestError', {
1428
1433
  ...feedbackBase,
1429
- status: res.status,
1434
+ status,
1430
1435
  statusText: res.statusText,
1431
1436
  });
1432
1437
 
1433
1438
  return {
1434
1439
  ok: false,
1435
- status: res.status,
1440
+ status,
1436
1441
  statusText: res.statusText,
1437
1442
  headers: res.headers,
1438
1443
  response: res,
@@ -1442,12 +1447,10 @@ async function request(url, body, customOptions = {}, method = 'GET') {
1442
1447
 
1443
1448
  events.emit('requestSuccess', {
1444
1449
  ...feedbackBase,
1445
- status: res.status,
1450
+ status,
1446
1451
  statusText: res.statusText,
1447
1452
  });
1448
1453
 
1449
- const data = await res.text();
1450
-
1451
1454
  return curateResponse(res, data, options, { url, customOptions });
1452
1455
  }
1453
1456
 
package/tests/init.js CHANGED
@@ -30,7 +30,7 @@ async function initTest() {
30
30
  unprint.on('requestSuccess', (successData) => console.log('success', successData));
31
31
  // unprint.on('query', (queryData) => console.log('query', queryData));
32
32
 
33
- const res = await unprint.get(`http://127.0.0.1:${port}/html`, { select: 'body' });
33
+ const res = await unprint.get(`http://127.0.0.1:${port}/html`, { select: 'body', interface: 'request' });
34
34
 
35
35
  const jsonRes = await unprint.get(`http://127.0.0.1:${port}/json`);
36
36
  const errorRes = await unprint.get(`http://127.0.0.1:${port}/error/404`);
@@ -44,10 +44,17 @@ async function initTest() {
44
44
  },
45
45
  });
46
46
 
47
+ const proxyRes = await unprint.get('https://api.ipify.org?format=json', {
48
+ interface: 'request',
49
+ useProxy: true,
50
+ });
51
+
47
52
  console.log('JSON RES', jsonRes);
48
53
  console.log('ERROR RES', errorRes);
49
54
  console.log('COOKIES RES', cookiesRes);
50
55
 
56
+ console.log('PROXY RES', proxyRes.data);
57
+
51
58
  console.log('title', res.context.query.content('//*[contains(text(), "Test")]'));
52
59
  console.log('date', res.context.query.date('#date', 'DD-MM-YYYY HH:mm'));
53
60
  console.log('date xpath', res.context.query.date('//div[contains(text(), "Today:")]', 'MMM DD, YYYY'));