unprint 0.17.2 → 0.17.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -18,6 +18,10 @@ unprint.options({
18
18
  concurrency: 10,
19
19
  interval: 10, // ms
20
20
  },
21
+ browser: {
22
+ concurrency: 5,
23
+ interval: 20,
24
+ },
21
25
  [hostname]: {
22
26
  enable: true, // enabled by default
23
27
  concurrency: 1,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "unprint",
3
- "version": "0.17.2",
3
+ "version": "0.17.4",
4
4
  "description": "Simplify common web scraping tasks while staying in control of the data.",
5
5
  "main": "src/app.js",
6
6
  "scripts": {},
package/src/app.js CHANGED
@@ -21,6 +21,10 @@ const settings = {
21
21
  interval: 10,
22
22
  concurrency: 10,
23
23
  },
24
+ browser: {
25
+ interval: 20,
26
+ concurrency: 5,
27
+ },
24
28
  },
25
29
  };
26
30
 
@@ -988,7 +992,7 @@ function getLimiterValue(prop, options, hostname) {
988
992
  return options.limits[hostname][prop];
989
993
  }
990
994
 
991
- return options.limits.default[prop];
995
+ return options.limits[options?.limiter || 'default'][prop];
992
996
  }
993
997
 
994
998
  function getLimiter(url, options) {
@@ -1048,25 +1052,34 @@ const clients = new Map();
1048
1052
  /* eslint-enable no-param-reassign */
1049
1053
  async function getBrowserInstance(scope, options) {
1050
1054
  if (clients.has(scope)) {
1051
- return clients.get(scope);
1055
+ const client = clients.get(scope);
1056
+
1057
+ await client.launchers;
1058
+
1059
+ return client;
1052
1060
  }
1053
1061
 
1054
- const browser = await chromium.launch({
1062
+ // if we await here, we create a race condition, and a second call to getBrowserInstance would launch another browser that will overwrite the first one
1063
+ const browserLauncher = chromium.launch({
1055
1064
  headless: true,
1056
1065
  ...options.browser,
1057
1066
  });
1058
1067
 
1059
- const context = await browser.newContext({
1068
+ const contextLauncher = browserLauncher.then((browser) => browser.newContext({
1060
1069
  userAgent: 'unprint',
1061
1070
  ...options.context,
1062
- });
1071
+ }));
1063
1072
 
1064
- const client = { context, browser };
1073
+ const launchers = Promise.all([browserLauncher, contextLauncher]);
1074
+ const client = { launchers };
1065
1075
 
1066
1076
  if (scope) {
1067
1077
  clients.set(scope, client);
1068
1078
  }
1069
1079
 
1080
+ client.browser = await browserLauncher;
1081
+ client.context = await contextLauncher;
1082
+
1070
1083
  return client;
1071
1084
  }
1072
1085
 
@@ -1116,6 +1129,7 @@ async function browserRequest(url, customOptions = {}) {
1116
1129
  timeout: 1000,
1117
1130
  extract: true,
1118
1131
  scope: 'main',
1132
+ limiter: 'browser',
1119
1133
  url,
1120
1134
  }, globalOptions, customOptions]);
1121
1135
 
package/tests/browser.js CHANGED
@@ -3,6 +3,25 @@
3
3
  const unprint = require('../src/app');
4
4
 
5
5
  async function initTest() {
6
+ // concurrency
7
+ await Promise.all([
8
+ unprint.browser('https://tools-httpstatus.pickup-services.com/200?sleep=500', {
9
+ browser: {
10
+ headless: false,
11
+ },
12
+ }),
13
+ unprint.browser('https://tools-httpstatus.pickup-services.com/200?sleep=500', {
14
+ browser: {
15
+ headless: false,
16
+ },
17
+ }),
18
+ unprint.browser('https://tools-httpstatus.pickup-services.com/200?sleep=500', {
19
+ browser: {
20
+ headless: false,
21
+ },
22
+ }),
23
+ ]);
24
+
6
25
  await Promise.all([
7
26
  unprint.browser('https://tools-httpstatus.pickup-services.com/200?sleep=5000', {
8
27
  browser: {