unprint 0.17.7 → 0.17.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/app.js +41 -10
- package/tests/browser.js +18 -0
package/package.json
CHANGED
package/src/app.js
CHANGED
|
@@ -191,12 +191,18 @@ function queryContents(context, selector, customOptions) {
|
|
|
191
191
|
const options = {
|
|
192
192
|
...context.options,
|
|
193
193
|
trim: true,
|
|
194
|
+
filter: true,
|
|
194
195
|
...customOptions,
|
|
195
196
|
};
|
|
196
197
|
|
|
197
198
|
const targets = queryElements(context, selector, options);
|
|
199
|
+
const extractedContents = targets.map((target) => extractContent(target, options));
|
|
200
|
+
|
|
201
|
+
if (options.filter) {
|
|
202
|
+
return extractedContents.filter(Boolean);
|
|
203
|
+
}
|
|
198
204
|
|
|
199
|
-
return
|
|
205
|
+
return extractedContents;
|
|
200
206
|
}
|
|
201
207
|
|
|
202
208
|
function queryAttribute(context, selector, attribute, customOptions) {
|
|
@@ -277,15 +283,24 @@ function queryNumber(context, selector, customOptions) {
|
|
|
277
283
|
}
|
|
278
284
|
|
|
279
285
|
function queryNumbers(context, selector, customOptions) {
|
|
286
|
+
const options = {
|
|
287
|
+
filter: true,
|
|
288
|
+
...customOptions,
|
|
289
|
+
};
|
|
290
|
+
|
|
280
291
|
const numberStrings = queryContents(context, selector, customOptions);
|
|
281
292
|
|
|
282
293
|
if (!numberStrings) {
|
|
283
294
|
return null;
|
|
284
295
|
}
|
|
285
296
|
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
297
|
+
const extractedNumbers = numberStrings.map((numberString) => extractNumber(numberString, customOptions));
|
|
298
|
+
|
|
299
|
+
if (options.filter) {
|
|
300
|
+
return extractedNumbers.filter(Boolean);
|
|
301
|
+
}
|
|
302
|
+
|
|
303
|
+
return extractedNumbers;
|
|
289
304
|
}
|
|
290
305
|
|
|
291
306
|
function queryHtml(context, selector, customOptions) {
|
|
@@ -1149,6 +1164,14 @@ function curateResponse(res, options, { url, control, customOptions }) {
|
|
|
1149
1164
|
};
|
|
1150
1165
|
}
|
|
1151
1166
|
|
|
1167
|
+
async function closeBrowser(client, options) {
|
|
1168
|
+
if (options.client === null // this browser is single-use
|
|
1169
|
+
|| (client.retired && client.active === 0)) { // this browser is retired to minimize garbage build-up
|
|
1170
|
+
// this browser won't be reused
|
|
1171
|
+
await client.browser.close();
|
|
1172
|
+
}
|
|
1173
|
+
}
|
|
1174
|
+
|
|
1152
1175
|
async function browserRequest(url, customOptions = {}) {
|
|
1153
1176
|
const options = merge.all([{
|
|
1154
1177
|
timeout: 1000,
|
|
@@ -1182,7 +1205,15 @@ async function browserRequest(url, customOptions = {}) {
|
|
|
1182
1205
|
|
|
1183
1206
|
const res = await page.goto(url, {
|
|
1184
1207
|
...options.page,
|
|
1185
|
-
});
|
|
1208
|
+
}).catch((error) => error);
|
|
1209
|
+
|
|
1210
|
+
if (res instanceof Error) {
|
|
1211
|
+
return {
|
|
1212
|
+
ok: false,
|
|
1213
|
+
status: null,
|
|
1214
|
+
statusText: res.name,
|
|
1215
|
+
};
|
|
1216
|
+
}
|
|
1186
1217
|
|
|
1187
1218
|
const status = res.status();
|
|
1188
1219
|
const statusText = res.statusText();
|
|
@@ -1199,6 +1230,8 @@ async function browserRequest(url, customOptions = {}) {
|
|
|
1199
1230
|
|
|
1200
1231
|
client.active -= 1;
|
|
1201
1232
|
|
|
1233
|
+
await closeBrowser(client, options);
|
|
1234
|
+
|
|
1202
1235
|
return {
|
|
1203
1236
|
ok: false,
|
|
1204
1237
|
status,
|
|
@@ -1221,6 +1254,8 @@ async function browserRequest(url, customOptions = {}) {
|
|
|
1221
1254
|
} catch (error) {
|
|
1222
1255
|
client.active -= 1;
|
|
1223
1256
|
|
|
1257
|
+
await closeBrowser(client, options);
|
|
1258
|
+
|
|
1224
1259
|
return {
|
|
1225
1260
|
ok: false,
|
|
1226
1261
|
controlError: error.message,
|
|
@@ -1247,11 +1282,7 @@ async function browserRequest(url, customOptions = {}) {
|
|
|
1247
1282
|
|
|
1248
1283
|
client.active -= 1;
|
|
1249
1284
|
|
|
1250
|
-
|
|
1251
|
-
|| (client.retired && client.active === 0)) { // this browser is retired to minimize garbage build-up
|
|
1252
|
-
// this browser won't be reused
|
|
1253
|
-
await client.browser.close();
|
|
1254
|
-
}
|
|
1285
|
+
await closeBrowser(client, options);
|
|
1255
1286
|
|
|
1256
1287
|
return curateResponse({
|
|
1257
1288
|
data,
|
package/tests/browser.js
CHANGED
|
@@ -16,6 +16,8 @@ unprint.options({ // or unprint.options();
|
|
|
16
16
|
|
|
17
17
|
async function initTest() {
|
|
18
18
|
// concurrency
|
|
19
|
+
/*
|
|
20
|
+
console.log('TEST CONCURRENCY');
|
|
19
21
|
await Promise.all(Array.from({ length: 20 }).map(async () => {
|
|
20
22
|
// await unprint.browser(`https://tools-httpstatus.pickup-services.com/${Math.random() < 0.2 ? '404' : '200'}?sleep=${Math.round(Math.random() * 500)}`, {
|
|
21
23
|
await unprint.browser(`https://tools-httpstatus.pickup-services.com/200?sleep=${Math.round(Math.random() * 5000)}`, {
|
|
@@ -26,11 +28,24 @@ async function initTest() {
|
|
|
26
28
|
},
|
|
27
29
|
});
|
|
28
30
|
}));
|
|
31
|
+
*/
|
|
29
32
|
|
|
30
33
|
// console.log('Requests done, waiting...');
|
|
31
34
|
|
|
32
35
|
// await new Promise((resolve) => { setTimeout(() => resolve(), 60 * 60 * 1000); });
|
|
36
|
+
// timeout
|
|
37
|
+
console.log('TEST TIMEOUT');
|
|
38
|
+
await unprint.browser('https://tools-httpstatus.pickup-services.com/200?sleep=30000', {
|
|
39
|
+
// client: null,
|
|
40
|
+
browser: {
|
|
41
|
+
headless: true,
|
|
42
|
+
},
|
|
43
|
+
page: {
|
|
44
|
+
timeout: 5000,
|
|
45
|
+
},
|
|
46
|
+
});
|
|
33
47
|
|
|
48
|
+
/*
|
|
34
49
|
await Promise.all([
|
|
35
50
|
unprint.browser('https://tools-httpstatus.pickup-services.com/200?sleep=5000', {
|
|
36
51
|
browser: {
|
|
@@ -56,6 +71,7 @@ async function initTest() {
|
|
|
56
71
|
}),
|
|
57
72
|
]);
|
|
58
73
|
|
|
74
|
+
console.log('TEST SCRAPE');
|
|
59
75
|
const res = await unprint.browser('https://www.scrapingcourse.com/', {
|
|
60
76
|
browser: {
|
|
61
77
|
headless: false,
|
|
@@ -69,6 +85,8 @@ async function initTest() {
|
|
|
69
85
|
|
|
70
86
|
console.log('CARD TITLES', cards);
|
|
71
87
|
console.log('CONTROL OUT', res.control);
|
|
88
|
+
*/
|
|
89
|
+
console.log('CLOSING ALL BROWSERS');
|
|
72
90
|
|
|
73
91
|
await unprint.closeAllBrowsers();
|
|
74
92
|
}
|