unprint 0.19.2 → 0.19.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "unprint",
3
- "version": "0.19.2",
3
+ "version": "0.19.3",
4
4
  "description": "Simplify common web scraping tasks while staying in control of the data.",
5
5
  "main": "src/app.js",
6
6
  "repository": {
package/src/app.js CHANGED
@@ -1222,11 +1222,13 @@ const clients = new Map();
1222
1222
  /* eslint-enable no-param-reassign */
1223
1223
  async function getBrowserInstance(scope, options, useProxy = false) {
1224
1224
  const scopeKey = `${scope}_${useProxy ? 'proxy' : 'direct'}_${options.browser ? hashObject(options.browser) : 'default'}_${options.context ? hashObject(options.context) : 'default'}`;
1225
+ const now = new Date();
1225
1226
 
1226
1227
  if (clients.has(scopeKey)) {
1227
1228
  const client = clients.get(scopeKey);
1228
1229
 
1229
1230
  client.uses += 1;
1231
+ client.lastUsedAt = now;
1230
1232
 
1231
1233
  if (client.uses >= (options.clientRetirement || 20)) {
1232
1234
  client.retired = true;
@@ -1262,6 +1264,8 @@ async function getBrowserInstance(scope, options, useProxy = false) {
1262
1264
  active: 0,
1263
1265
  uses: 1,
1264
1266
  retired: false,
1267
+ createdAt: now,
1268
+ lastUsedAt: now,
1265
1269
  };
1266
1270
 
1267
1271
  if (scope) {
@@ -1274,12 +1278,18 @@ async function getBrowserInstance(scope, options, useProxy = false) {
1274
1278
  return client;
1275
1279
  }
1276
1280
 
1281
+ function getAllBrowsers() {
1282
+ return clients;
1283
+ }
1284
+
1277
1285
  async function closeAllBrowsers() {
1278
1286
  const closingClients = Array.from(clients.values());
1279
1287
 
1280
1288
  await Promise.all(closingClients.map(async (client) => {
1281
1289
  await client.context.close();
1282
1290
  await client.browser.close();
1291
+
1292
+ clients.delete(client.key);
1283
1293
  }));
1284
1294
 
1285
1295
  events.emit('browserClose', {
@@ -1290,20 +1300,26 @@ async function closeAllBrowsers() {
1290
1300
  });
1291
1301
  }
1292
1302
 
1293
- async function closeBrowser(client, options) {
1303
+ async function closeBrowser(client, options = {}) {
1294
1304
  if (options.client === null // this browser is single-use
1295
1305
  || (client.retired && client.active === 0)) { // this browser is retired to minimize garbage build-up
1296
1306
  // this browser won't be reused, browser close DOES NOT automatically close context https://github.com/microsoft/playwright/issues/15163
1297
1307
  await client.context.close();
1298
1308
  await client.browser.close();
1299
1309
 
1310
+ clients.delete(client.key);
1311
+
1300
1312
  events.emit('browserClose', {
1301
1313
  keys: [client.key],
1302
1314
  active: client.active,
1303
1315
  retired: !!client.retired,
1304
1316
  clients: clients.size,
1305
1317
  });
1318
+
1319
+ return true;
1306
1320
  }
1321
+
1322
+ return false;
1307
1323
  }
1308
1324
 
1309
1325
  function getAgent(options, url) {
@@ -1729,7 +1745,9 @@ const unprint = {
1729
1745
  request,
1730
1746
  browserRequest,
1731
1747
  browser: browserRequest,
1748
+ closeBrowser,
1732
1749
  closeAllBrowsers,
1750
+ getAllBrowsers,
1733
1751
  initialize: init,
1734
1752
  initializeAll: initAll,
1735
1753
  init,
package/src/server.js CHANGED
@@ -2,6 +2,7 @@
2
2
 
3
3
  const crypto = require('crypto');
4
4
  const express = require('express');
5
+ const timers = require('timers/promises');
5
6
 
6
7
  require('dotenv').config();
7
8
 
@@ -79,6 +80,26 @@ async function handleRequest(req, res, unprint, method) {
79
80
  });
80
81
  }
81
82
 
83
+ async function monitorBrowsers(unprint) {
84
+ await timers.setTimeout(10000);
85
+
86
+ const clients = unprint.getAllBrowsers();
87
+
88
+ const checkedClients = await Promise.all(Array.from(clients.values()).map(async (client) => {
89
+ if (new Date() - client.lastUsedAt > 300_000) { // 5 minute expiry
90
+ return unprint.closeBrowser(client, { client: null });
91
+ }
92
+
93
+ return false;
94
+ }));
95
+
96
+ const closedClients = checkedClients.filter(Boolean).length;
97
+
98
+ logger.info(`Closed ${closedClients}/${checkedClients.length} browsers`);
99
+
100
+ monitorBrowsers(unprint);
101
+ }
102
+
82
103
  async function initServer(address, unprint) {
83
104
  const app = express();
84
105
  const addressComponents = typeof address === 'boolean' ? [] : String(address).split(':');
@@ -88,7 +109,7 @@ async function initServer(address, unprint) {
88
109
 
89
110
  app.use(express.json());
90
111
 
91
- app.use(async (req, res, next) => {
112
+ app.use(async (req, _res, next) => {
92
113
  if (process.env.UNPRINT_KEY) {
93
114
  if (process.env.UNPRINT_KEY.length !== req.headers['unprint-key']?.length
94
115
  || !crypto.timingSafeEqual(Buffer.from(process.env.UNPRINT_KEY, 'utf16le'), Buffer.from(req.headers['unprint-key'], 'utf16le'))) {
@@ -134,6 +155,8 @@ async function initServer(address, unprint) {
134
155
 
135
156
  logger.info(`Started unprint server on ${host}:${port}`);
136
157
  });
158
+
159
+ monitorBrowsers(unprint);
137
160
  }
138
161
 
139
162
  module.exports = initServer;
package/tests/remote.js CHANGED
@@ -13,9 +13,10 @@ unprint.configure({
13
13
 
14
14
  async function init() {
15
15
  unprint.on('requestInit', (event) => console.log('INIT', event));
16
+ unprint.on('browserClose', (event) => console.log('CLOSE', event));
16
17
 
17
18
  const res = await unprint.browser('https://www.google.com', {
18
- useRemote: false,
19
+ useRemote: true,
19
20
  async control(page) {
20
21
  const form = await page.locator('form');
21
22