unprint 0.17.6 → 0.17.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -1
- package/package.json +1 -1
- package/src/app.js +53 -14
- package/tests/browser.js +12 -15
package/README.md
CHANGED
|
@@ -218,7 +218,8 @@ Use Playwright with Chromium (experimental)
|
|
|
218
218
|
|
|
219
219
|
Additional options
|
|
220
220
|
* `control`: Async function to interface with Playwright page passed as argument
|
|
221
|
-
* `
|
|
221
|
+
* `clientScope`: Browser instance to (re)use, set to `null` to force new scope every request, default `main`.
|
|
222
|
+
* `clientRetirement`: Number of requests until a browser gets restarted for resource clean-up, default `20`.
|
|
222
223
|
* `browser`: Options object passed to Playwright's `launch`.
|
|
223
224
|
* `browser.headless`: Headless mode, set to `false` to launch visible browser, default `true`.
|
|
224
225
|
* `context`: Options object passed to Playwright's `newContext`.
|
package/package.json
CHANGED
package/src/app.js
CHANGED
|
@@ -191,12 +191,18 @@ function queryContents(context, selector, customOptions) {
|
|
|
191
191
|
const options = {
|
|
192
192
|
...context.options,
|
|
193
193
|
trim: true,
|
|
194
|
+
filter: true,
|
|
194
195
|
...customOptions,
|
|
195
196
|
};
|
|
196
197
|
|
|
197
198
|
const targets = queryElements(context, selector, options);
|
|
199
|
+
const extractedContents = targets.map((target) => extractContent(target, options));
|
|
200
|
+
|
|
201
|
+
if (options.filter) {
|
|
202
|
+
return extractedContents.filter(Boolean);
|
|
203
|
+
}
|
|
198
204
|
|
|
199
|
-
return
|
|
205
|
+
return extractedContents;
|
|
200
206
|
}
|
|
201
207
|
|
|
202
208
|
function queryAttribute(context, selector, attribute, customOptions) {
|
|
@@ -277,15 +283,24 @@ function queryNumber(context, selector, customOptions) {
|
|
|
277
283
|
}
|
|
278
284
|
|
|
279
285
|
function queryNumbers(context, selector, customOptions) {
|
|
286
|
+
const options = {
|
|
287
|
+
filter: true,
|
|
288
|
+
...customOptions,
|
|
289
|
+
};
|
|
290
|
+
|
|
280
291
|
const numberStrings = queryContents(context, selector, customOptions);
|
|
281
292
|
|
|
282
293
|
if (!numberStrings) {
|
|
283
294
|
return null;
|
|
284
295
|
}
|
|
285
296
|
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
297
|
+
const extractedNumbers = numberStrings.map((numberString) => extractNumber(numberString, customOptions));
|
|
298
|
+
|
|
299
|
+
if (options.filter) {
|
|
300
|
+
return extractedNumbers.filter(Boolean);
|
|
301
|
+
}
|
|
302
|
+
|
|
303
|
+
return extractedNumbers;
|
|
289
304
|
}
|
|
290
305
|
|
|
291
306
|
function queryHtml(context, selector, customOptions) {
|
|
@@ -1060,6 +1075,13 @@ async function getBrowserInstance(scope, options, useProxy = false) {
|
|
|
1060
1075
|
if (clients.has(scopeKey)) {
|
|
1061
1076
|
const client = clients.get(scopeKey);
|
|
1062
1077
|
|
|
1078
|
+
client.uses += 1;
|
|
1079
|
+
|
|
1080
|
+
if (client.uses >= (options.clientRetirement || 20)) {
|
|
1081
|
+
client.retired = true;
|
|
1082
|
+
clients.delete(scopeKey);
|
|
1083
|
+
}
|
|
1084
|
+
|
|
1063
1085
|
await client.launchers;
|
|
1064
1086
|
|
|
1065
1087
|
return client;
|
|
@@ -1082,7 +1104,14 @@ async function getBrowserInstance(scope, options, useProxy = false) {
|
|
|
1082
1104
|
}));
|
|
1083
1105
|
|
|
1084
1106
|
const launchers = Promise.all([browserLauncher, contextLauncher]);
|
|
1085
|
-
|
|
1107
|
+
|
|
1108
|
+
const client = {
|
|
1109
|
+
key: scopeKey,
|
|
1110
|
+
launchers,
|
|
1111
|
+
active: 0,
|
|
1112
|
+
uses: 1,
|
|
1113
|
+
retired: false,
|
|
1114
|
+
};
|
|
1086
1115
|
|
|
1087
1116
|
if (scope) {
|
|
1088
1117
|
clients.set(scopeKey, client);
|
|
@@ -1139,7 +1168,7 @@ async function browserRequest(url, customOptions = {}) {
|
|
|
1139
1168
|
const options = merge.all([{
|
|
1140
1169
|
timeout: 1000,
|
|
1141
1170
|
extract: true,
|
|
1142
|
-
|
|
1171
|
+
client: 'main',
|
|
1143
1172
|
limiter: 'browser',
|
|
1144
1173
|
url,
|
|
1145
1174
|
}, globalOptions, customOptions]);
|
|
@@ -1160,8 +1189,11 @@ async function browserRequest(url, customOptions = {}) {
|
|
|
1160
1189
|
events.emit('requestInit', feedbackBase);
|
|
1161
1190
|
|
|
1162
1191
|
return limiter.schedule(async () => {
|
|
1163
|
-
const
|
|
1164
|
-
|
|
1192
|
+
const client = await getBrowserInstance(options.client, options, useProxy);
|
|
1193
|
+
|
|
1194
|
+
client.active += 1;
|
|
1195
|
+
|
|
1196
|
+
const page = await client.context.newPage();
|
|
1165
1197
|
|
|
1166
1198
|
const res = await page.goto(url, {
|
|
1167
1199
|
...options.page,
|
|
@@ -1180,6 +1212,8 @@ async function browserRequest(url, customOptions = {}) {
|
|
|
1180
1212
|
statusText,
|
|
1181
1213
|
});
|
|
1182
1214
|
|
|
1215
|
+
client.active -= 1;
|
|
1216
|
+
|
|
1183
1217
|
return {
|
|
1184
1218
|
ok: false,
|
|
1185
1219
|
status,
|
|
@@ -1198,8 +1232,10 @@ async function browserRequest(url, customOptions = {}) {
|
|
|
1198
1232
|
|
|
1199
1233
|
if (customOptions.control) {
|
|
1200
1234
|
try {
|
|
1201
|
-
control = await customOptions.control(page,
|
|
1235
|
+
control = await customOptions.control(page, client);
|
|
1202
1236
|
} catch (error) {
|
|
1237
|
+
client.active -= 1;
|
|
1238
|
+
|
|
1203
1239
|
return {
|
|
1204
1240
|
ok: false,
|
|
1205
1241
|
controlError: error.message,
|
|
@@ -1218,17 +1254,20 @@ async function browserRequest(url, customOptions = {}) {
|
|
|
1218
1254
|
|
|
1219
1255
|
await page.close();
|
|
1220
1256
|
|
|
1221
|
-
if (options.scope === null) {
|
|
1222
|
-
// this browser won't be reused
|
|
1223
|
-
await browser.close();
|
|
1224
|
-
}
|
|
1225
|
-
|
|
1226
1257
|
events.emit('requestSuccess', {
|
|
1227
1258
|
...feedbackBase,
|
|
1228
1259
|
status,
|
|
1229
1260
|
statusText,
|
|
1230
1261
|
});
|
|
1231
1262
|
|
|
1263
|
+
client.active -= 1;
|
|
1264
|
+
|
|
1265
|
+
if (options.client === null // this browser is single-use
|
|
1266
|
+
|| (client.retired && client.active === 0)) { // this browser is retired to minimize garbage build-up
|
|
1267
|
+
// this browser won't be reused
|
|
1268
|
+
await client.browser.close();
|
|
1269
|
+
}
|
|
1270
|
+
|
|
1232
1271
|
return curateResponse({
|
|
1233
1272
|
data,
|
|
1234
1273
|
status,
|
package/tests/browser.js
CHANGED
|
@@ -16,23 +16,20 @@ unprint.options({ // or unprint.options();
|
|
|
16
16
|
|
|
17
17
|
async function initTest() {
|
|
18
18
|
// concurrency
|
|
19
|
-
await Promise.all(
|
|
20
|
-
unprint.browser(
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
}),
|
|
25
|
-
unprint.browser('https://tools-httpstatus.pickup-services.com/200?sleep=500', {
|
|
26
|
-
browser: {
|
|
27
|
-
headless: false,
|
|
28
|
-
},
|
|
29
|
-
}),
|
|
30
|
-
unprint.browser('https://tools-httpstatus.pickup-services.com/200?sleep=500', {
|
|
19
|
+
await Promise.all(Array.from({ length: 20 }).map(async () => {
|
|
20
|
+
// await unprint.browser(`https://tools-httpstatus.pickup-services.com/${Math.random() < 0.2 ? '404' : '200'}?sleep=${Math.round(Math.random() * 500)}`, {
|
|
21
|
+
await unprint.browser(`https://tools-httpstatus.pickup-services.com/200?sleep=${Math.round(Math.random() * 5000)}`, {
|
|
22
|
+
// client: null,
|
|
23
|
+
interval: 100,
|
|
31
24
|
browser: {
|
|
32
|
-
headless:
|
|
25
|
+
headless: true,
|
|
33
26
|
},
|
|
34
|
-
})
|
|
35
|
-
|
|
27
|
+
});
|
|
28
|
+
}));
|
|
29
|
+
|
|
30
|
+
// console.log('Requests done, waiting...');
|
|
31
|
+
|
|
32
|
+
// await new Promise((resolve) => { setTimeout(() => resolve(), 60 * 60 * 1000); });
|
|
36
33
|
|
|
37
34
|
await Promise.all([
|
|
38
35
|
unprint.browser('https://tools-httpstatus.pickup-services.com/200?sleep=5000', {
|