unprint 0.17.6 → 0.17.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -218,7 +218,8 @@ Use Playwright with Chromium (experimental)
218
218
 
219
219
  Additional options
220
220
  * `control`: Async function to interface with Playwright page passed as argument
221
- * `scope`: Browser instance to (re)use, set to `null` to force new scope every request, default `main`.
221
+ * `clientScope`: Browser instance to (re)use, set to `null` to force new scope every request, default `main`.
222
+ * `clientRetirement`: Number of requests until a browser gets restarted for resource clean-up, default `20`.
222
223
  * `browser`: Options object passed to Playwright's `launch`.
223
224
  * `browser.headless`: Headless mode, set to `false` to launch visible browser, default `true`.
224
225
  * `context`: Options object passed to Playwright's `newContext`.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "unprint",
3
- "version": "0.17.6",
3
+ "version": "0.17.7",
4
4
  "description": "Simplify common web scraping tasks while staying in control of the data.",
5
5
  "main": "src/app.js",
6
6
  "scripts": {},
package/src/app.js CHANGED
@@ -1060,6 +1060,13 @@ async function getBrowserInstance(scope, options, useProxy = false) {
1060
1060
  if (clients.has(scopeKey)) {
1061
1061
  const client = clients.get(scopeKey);
1062
1062
 
1063
+ client.uses += 1;
1064
+
1065
+ if (client.uses >= (options.clientRetirement || 20)) {
1066
+ client.retired = true;
1067
+ clients.delete(scopeKey);
1068
+ }
1069
+
1063
1070
  await client.launchers;
1064
1071
 
1065
1072
  return client;
@@ -1082,7 +1089,14 @@ async function getBrowserInstance(scope, options, useProxy = false) {
1082
1089
  }));
1083
1090
 
1084
1091
  const launchers = Promise.all([browserLauncher, contextLauncher]);
1085
- const client = { launchers };
1092
+
1093
+ const client = {
1094
+ key: scopeKey,
1095
+ launchers,
1096
+ active: 0,
1097
+ uses: 1,
1098
+ retired: false,
1099
+ };
1086
1100
 
1087
1101
  if (scope) {
1088
1102
  clients.set(scopeKey, client);
@@ -1139,7 +1153,7 @@ async function browserRequest(url, customOptions = {}) {
1139
1153
  const options = merge.all([{
1140
1154
  timeout: 1000,
1141
1155
  extract: true,
1142
- scope: 'main',
1156
+ client: 'main',
1143
1157
  limiter: 'browser',
1144
1158
  url,
1145
1159
  }, globalOptions, customOptions]);
@@ -1160,8 +1174,11 @@ async function browserRequest(url, customOptions = {}) {
1160
1174
  events.emit('requestInit', feedbackBase);
1161
1175
 
1162
1176
  return limiter.schedule(async () => {
1163
- const { context, browser } = await getBrowserInstance(options.scope, options, useProxy);
1164
- const page = await context.newPage();
1177
+ const client = await getBrowserInstance(options.client, options, useProxy);
1178
+
1179
+ client.active += 1;
1180
+
1181
+ const page = await client.context.newPage();
1165
1182
 
1166
1183
  const res = await page.goto(url, {
1167
1184
  ...options.page,
@@ -1180,6 +1197,8 @@ async function browserRequest(url, customOptions = {}) {
1180
1197
  statusText,
1181
1198
  });
1182
1199
 
1200
+ client.active -= 1;
1201
+
1183
1202
  return {
1184
1203
  ok: false,
1185
1204
  status,
@@ -1198,8 +1217,10 @@ async function browserRequest(url, customOptions = {}) {
1198
1217
 
1199
1218
  if (customOptions.control) {
1200
1219
  try {
1201
- control = await customOptions.control(page, { context, browser });
1220
+ control = await customOptions.control(page, client);
1202
1221
  } catch (error) {
1222
+ client.active -= 1;
1223
+
1203
1224
  return {
1204
1225
  ok: false,
1205
1226
  controlError: error.message,
@@ -1218,17 +1239,20 @@ async function browserRequest(url, customOptions = {}) {
1218
1239
 
1219
1240
  await page.close();
1220
1241
 
1221
- if (options.scope === null) {
1222
- // this browser won't be reused
1223
- await browser.close();
1224
- }
1225
-
1226
1242
  events.emit('requestSuccess', {
1227
1243
  ...feedbackBase,
1228
1244
  status,
1229
1245
  statusText,
1230
1246
  });
1231
1247
 
1248
+ client.active -= 1;
1249
+
1250
+ if (options.client === null // this browser is single-use
1251
+ || (client.retired && client.active === 0)) { // this browser is retired to minimize garbage build-up
1252
+ // this browser won't be reused
1253
+ await client.browser.close();
1254
+ }
1255
+
1232
1256
  return curateResponse({
1233
1257
  data,
1234
1258
  status,
package/tests/browser.js CHANGED
@@ -16,23 +16,20 @@ unprint.options({ // or unprint.options();
16
16
 
17
17
  async function initTest() {
18
18
  // concurrency
19
- await Promise.all([
20
- unprint.browser('https://tools-httpstatus.pickup-services.com/200?sleep=500', {
21
- browser: {
22
- headless: false,
23
- },
24
- }),
25
- unprint.browser('https://tools-httpstatus.pickup-services.com/200?sleep=500', {
26
- browser: {
27
- headless: false,
28
- },
29
- }),
30
- unprint.browser('https://tools-httpstatus.pickup-services.com/200?sleep=500', {
19
+ await Promise.all(Array.from({ length: 20 }).map(async () => {
20
+ // await unprint.browser(`https://tools-httpstatus.pickup-services.com/${Math.random() < 0.2 ? '404' : '200'}?sleep=${Math.round(Math.random() * 500)}`, {
21
+ await unprint.browser(`https://tools-httpstatus.pickup-services.com/200?sleep=${Math.round(Math.random() * 5000)}`, {
22
+ // client: null,
23
+ interval: 100,
31
24
  browser: {
32
- headless: false,
25
+ headless: true,
33
26
  },
34
- }),
35
- ]);
27
+ });
28
+ }));
29
+
30
+ // console.log('Requests done, waiting...');
31
+
32
+ // await new Promise((resolve) => { setTimeout(() => resolve(), 60 * 60 * 1000); });
36
33
 
37
34
  await Promise.all([
38
35
  unprint.browser('https://tools-httpstatus.pickup-services.com/200?sleep=5000', {