unprint 0.17.7 → 0.17.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "unprint",
3
- "version": "0.17.7",
3
+ "version": "0.17.9",
4
4
  "description": "Simplify common web scraping tasks while staying in control of the data.",
5
5
  "main": "src/app.js",
6
6
  "scripts": {},
package/src/app.js CHANGED
@@ -191,12 +191,18 @@ function queryContents(context, selector, customOptions) {
191
191
  const options = {
192
192
  ...context.options,
193
193
  trim: true,
194
+ filter: true,
194
195
  ...customOptions,
195
196
  };
196
197
 
197
198
  const targets = queryElements(context, selector, options);
199
+ const extractedContents = targets.map((target) => extractContent(target, options));
200
+
201
+ if (options.filter) {
202
+ return extractedContents.filter(Boolean);
203
+ }
198
204
 
199
- return targets.map((target) => extractContent(target, options)).filter(Boolean);
205
+ return extractedContents;
200
206
  }
201
207
 
202
208
  function queryAttribute(context, selector, attribute, customOptions) {
@@ -277,15 +283,24 @@ function queryNumber(context, selector, customOptions) {
277
283
  }
278
284
 
279
285
  function queryNumbers(context, selector, customOptions) {
286
+ const options = {
287
+ filter: true,
288
+ ...customOptions,
289
+ };
290
+
280
291
  const numberStrings = queryContents(context, selector, customOptions);
281
292
 
282
293
  if (!numberStrings) {
283
294
  return null;
284
295
  }
285
296
 
286
- return numberStrings
287
- .map((numberString) => extractNumber(numberString, customOptions))
288
- .filter(Boolean);
297
+ const extractedNumbers = numberStrings.map((numberString) => extractNumber(numberString, customOptions));
298
+
299
+ if (options.filter) {
300
+ return extractedNumbers.filter(Boolean);
301
+ }
302
+
303
+ return extractedNumbers;
289
304
  }
290
305
 
291
306
  function queryHtml(context, selector, customOptions) {
@@ -1149,6 +1164,14 @@ function curateResponse(res, options, { url, control, customOptions }) {
1149
1164
  };
1150
1165
  }
1151
1166
 
1167
+ async function closeBrowser(client, options) {
1168
+ if (options.client === null // this browser is single-use
1169
+ || (client.retired && client.active === 0)) { // this browser is retired to minimize garbage build-up
1170
+ // this browser won't be reused
1171
+ await client.browser.close();
1172
+ }
1173
+ }
1174
+
1152
1175
  async function browserRequest(url, customOptions = {}) {
1153
1176
  const options = merge.all([{
1154
1177
  timeout: 1000,
@@ -1182,7 +1205,15 @@ async function browserRequest(url, customOptions = {}) {
1182
1205
 
1183
1206
  const res = await page.goto(url, {
1184
1207
  ...options.page,
1185
- });
1208
+ }).catch((error) => error);
1209
+
1210
+ if (res instanceof Error) {
1211
+ return {
1212
+ ok: false,
1213
+ status: null,
1214
+ statusText: res.name,
1215
+ };
1216
+ }
1186
1217
 
1187
1218
  const status = res.status();
1188
1219
  const statusText = res.statusText();
@@ -1199,6 +1230,8 @@ async function browserRequest(url, customOptions = {}) {
1199
1230
 
1200
1231
  client.active -= 1;
1201
1232
 
1233
+ await closeBrowser(client, options);
1234
+
1202
1235
  return {
1203
1236
  ok: false,
1204
1237
  status,
@@ -1221,6 +1254,8 @@ async function browserRequest(url, customOptions = {}) {
1221
1254
  } catch (error) {
1222
1255
  client.active -= 1;
1223
1256
 
1257
+ await closeBrowser(client, options);
1258
+
1224
1259
  return {
1225
1260
  ok: false,
1226
1261
  controlError: error.message,
@@ -1247,11 +1282,7 @@ async function browserRequest(url, customOptions = {}) {
1247
1282
 
1248
1283
  client.active -= 1;
1249
1284
 
1250
- if (options.client === null // this browser is single-use
1251
- || (client.retired && client.active === 0)) { // this browser is retired to minimize garbage build-up
1252
- // this browser won't be reused
1253
- await client.browser.close();
1254
- }
1285
+ await closeBrowser(client, options);
1255
1286
 
1256
1287
  return curateResponse({
1257
1288
  data,
package/tests/browser.js CHANGED
@@ -16,6 +16,8 @@ unprint.options({ // or unprint.options();
16
16
 
17
17
  async function initTest() {
18
18
  // concurrency
19
+ /*
20
+ console.log('TEST CONCURRENCY');
19
21
  await Promise.all(Array.from({ length: 20 }).map(async () => {
20
22
  // await unprint.browser(`https://tools-httpstatus.pickup-services.com/${Math.random() < 0.2 ? '404' : '200'}?sleep=${Math.round(Math.random() * 500)}`, {
21
23
  await unprint.browser(`https://tools-httpstatus.pickup-services.com/200?sleep=${Math.round(Math.random() * 5000)}`, {
@@ -26,11 +28,24 @@ async function initTest() {
26
28
  },
27
29
  });
28
30
  }));
31
+ */
29
32
 
30
33
  // console.log('Requests done, waiting...');
31
34
 
32
35
  // await new Promise((resolve) => { setTimeout(() => resolve(), 60 * 60 * 1000); });
36
+ // timeout
37
+ console.log('TEST TIMEOUT');
38
+ await unprint.browser('https://tools-httpstatus.pickup-services.com/200?sleep=30000', {
39
+ // client: null,
40
+ browser: {
41
+ headless: true,
42
+ },
43
+ page: {
44
+ timeout: 5000,
45
+ },
46
+ });
33
47
 
48
+ /*
34
49
  await Promise.all([
35
50
  unprint.browser('https://tools-httpstatus.pickup-services.com/200?sleep=5000', {
36
51
  browser: {
@@ -56,6 +71,7 @@ async function initTest() {
56
71
  }),
57
72
  ]);
58
73
 
74
+ console.log('TEST SCRAPE');
59
75
  const res = await unprint.browser('https://www.scrapingcourse.com/', {
60
76
  browser: {
61
77
  headless: false,
@@ -69,6 +85,8 @@ async function initTest() {
69
85
 
70
86
  console.log('CARD TITLES', cards);
71
87
  console.log('CONTROL OUT', res.control);
88
+ */
89
+ console.log('CLOSING ALL BROWSERS');
72
90
 
73
91
  await unprint.closeAllBrowsers();
74
92
  }