unprint 0.16.4-beta → 0.17.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -205,15 +205,24 @@ Extracts the CSS `url()` background from a style attribute. Alias for `query.sty
205
205
  * `unprint.request(url, body, [options], [method])`
206
206
 
207
207
  Options
208
- * `select`: Pre-query and initialize a specific element on the page
209
- * `selectAll`: Pre-query and initialize multiple specific element on the page
208
+ * `select`: Pre-query and initialize a specific element on the page.
209
+ * `selectAll`: Pre-query and initialize multiple specific element on the page.
210
210
 
211
211
  Use Playwright with Chromium (experimental)
212
212
  * `unprint.browserRequest(url, [options])`
213
- * `unprint.closeAllBrowsers()`
213
+ * `unprint.closeAllBrowsers()`: Close reused browser instances.
214
214
 
215
215
  Additional options
216
- * `browser`: Options object passed to Playwright
216
+ * `control`: Async function to interface with Playwright page passed as argument
217
+ * `scope`: Browser instance to (re)use, set to `null` to force new scope every request, default `main`.
218
+ * `browser`: Options object passed to Playwright's `launch`, requires new scope.
219
+ * `browser.headless`: Headless mode, set to `false` to launch visible browser, default `true`.
220
+ * `context`: Options object passed to Playwright's `newContext`, requires new scope.
221
+ * `page`: Options object passed to Playwright's `goto`.
222
+
223
+ This requires you to install the Chromium executable:
224
+ * `sudo npx patchright install-deps`
225
+ * `npx patchright install`
217
226
 
218
227
  Returns
219
228
  ```javascript
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "unprint",
3
- "version": "0.16.4-beta",
3
+ "version": "0.17.1",
4
4
  "description": "Simplify common web scraping tasks while staying in control of the data.",
5
5
  "main": "src/app.js",
6
6
  "scripts": {},
package/src/app.js CHANGED
@@ -1046,22 +1046,26 @@ function setProxy(instance, options, url) {
1046
1046
  const clients = new Map();
1047
1047
 
1048
1048
  /* eslint-enable no-param-reassign */
1049
- async function getBrowserInstance(scope) {
1049
+ async function getBrowserInstance(scope, options) {
1050
1050
  if (clients.has(scope)) {
1051
1051
  return clients.get(scope);
1052
1052
  }
1053
1053
 
1054
1054
  const browser = await chromium.launch({
1055
- headless: false,
1055
+ headless: true,
1056
+ ...options.browser,
1056
1057
  });
1057
1058
 
1058
1059
  const context = await browser.newContext({
1059
1060
  userAgent: 'unprint',
1061
+ ...options.context,
1060
1062
  });
1061
1063
 
1062
1064
  const client = { context, browser };
1063
1065
 
1064
- clients.set(scope, client);
1066
+ if (scope) {
1067
+ clients.set(scope, client);
1068
+ }
1065
1069
 
1066
1070
  return client;
1067
1071
  }
@@ -1126,11 +1130,11 @@ async function browserRequest(url, customOptions = {}) {
1126
1130
  };
1127
1131
 
1128
1132
  return limiter.schedule(async () => {
1129
- const { context, browser } = await getBrowserInstance(options.scope);
1133
+ const { context, browser } = await getBrowserInstance(options.scope, options);
1130
1134
  const page = await context.newPage();
1131
1135
 
1132
1136
  const res = await page.goto(url, {
1133
- ...options.browser,
1137
+ ...options.page,
1134
1138
  });
1135
1139
 
1136
1140
  const status = res.status();
@@ -1169,7 +1173,11 @@ async function browserRequest(url, customOptions = {}) {
1169
1173
  const data = await page.content();
1170
1174
 
1171
1175
  await page.close();
1172
- // await browser.close();
1176
+
1177
+ if (options.scope === null) {
1178
+ // this browser won't be reused
1179
+ await browser.close();
1180
+ }
1173
1181
 
1174
1182
  return curateResponse({
1175
1183
  data,
package/tests/browser.js CHANGED
@@ -5,7 +5,10 @@ const unprint = require('../src/app');
5
5
  async function initTest() {
6
6
  await Promise.all([
7
7
  unprint.browser('https://tools-httpstatus.pickup-services.com/200?sleep=5000', {
8
- headless: false,
8
+ browser: {
9
+ headless: false,
10
+ },
11
+ scope: null,
9
12
  async control(_page) {
10
13
  //
11
14
  },
@@ -16,7 +19,10 @@ async function initTest() {
16
19
  }, 1000);
17
20
  }).then(async () => {
18
21
  await unprint.browser('https://tools-httpstatus.pickup-services.com/200?sleep=2000', {
19
- headless: false,
22
+ browser: {
23
+ headless: false,
24
+ },
25
+ scope: null,
20
26
  async control(_page) {
21
27
  //
22
28
  },