unprint 0.17.0 → 0.17.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -210,10 +210,11 @@ Options
210
210
 
211
211
  Use Playwright with Chromium (experimental)
212
212
  * `unprint.browserRequest(url, [options])`
213
- * `unprint.closeAllBrowsers()`
213
+ * `unprint.closeAllBrowsers()`: Close reused browser instances.
214
214
 
215
215
  Additional options
216
- * `scope`: Browser instance to (re)use, default `main`.
216
+ * `control`: Async function to interface with Playwright page passed as argument
217
+ * `scope`: Browser instance to (re)use, set to `null` to force new scope every request, default `main`.
217
218
  * `browser`: Options object passed to Playwright's `launch`, requires new scope.
218
219
  * `browser.headless`: Headless mode, set to `false` to launch visible browser, default `true`.
219
220
  * `context`: Options object passed to Playwright's `newContext`, requires new scope.
@@ -238,6 +239,7 @@ Returns
238
239
  ok, // (boolean) status code >= 200 and < 300
239
240
  response, // (object) the original axios response object, alias 'res'
240
241
  res, // (object) alias for 'response'
242
+ control, // return value from browser control function
241
243
  }
242
244
  ```
243
245
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "unprint",
3
- "version": "0.17.0",
3
+ "version": "0.17.2",
4
4
  "description": "Simplify common web scraping tasks while staying in control of the data.",
5
5
  "main": "src/app.js",
6
6
  "scripts": {},
package/src/app.js CHANGED
@@ -1063,7 +1063,9 @@ async function getBrowserInstance(scope, options) {
1063
1063
 
1064
1064
  const client = { context, browser };
1065
1065
 
1066
- clients.set(scope, client);
1066
+ if (scope) {
1067
+ clients.set(scope, client);
1068
+ }
1067
1069
 
1068
1070
  return client;
1069
1071
  }
@@ -1072,7 +1074,7 @@ async function closeAllBrowsers() {
1072
1074
  await Promise.all(Array.from(clients.values()).map(async (client) => client.browser.close()));
1073
1075
  }
1074
1076
 
1075
- function curateResponse(res, options, { url, customOptions }) {
1077
+ function curateResponse(res, options, { url, control, customOptions }) {
1076
1078
  const base = {
1077
1079
  ok: true,
1078
1080
  status: res.status,
@@ -1080,6 +1082,7 @@ function curateResponse(res, options, { url, customOptions }) {
1080
1082
  headers: res.headers,
1081
1083
  response: res,
1082
1084
  res,
1085
+ control,
1083
1086
  };
1084
1087
 
1085
1088
  if (['application/json', 'application/javascript'].some((type) => res.headers['content-type']?.includes(type)) && typeof res.data === 'object') {
@@ -1162,8 +1165,10 @@ async function browserRequest(url, customOptions = {}) {
1162
1165
 
1163
1166
  await page.waitForLoadState();
1164
1167
 
1168
+ let control = null;
1169
+
1165
1170
  if (customOptions.control) {
1166
- await customOptions.control(page, { context, browser });
1171
+ control = await customOptions.control(page, { context, browser });
1167
1172
  }
1168
1173
 
1169
1174
  events.emit('controlSuccess', feedbackBase);
@@ -1171,14 +1176,22 @@ async function browserRequest(url, customOptions = {}) {
1171
1176
  const data = await page.content();
1172
1177
 
1173
1178
  await page.close();
1174
- // await browser.close();
1179
+
1180
+ if (options.scope === null) {
1181
+ // this browser won't be reused
1182
+ await browser.close();
1183
+ }
1175
1184
 
1176
1185
  return curateResponse({
1177
1186
  data,
1178
1187
  status,
1179
1188
  statusText,
1180
1189
  headers,
1181
- }, options, { url, customOptions });
1190
+ }, options, {
1191
+ url,
1192
+ customOptions,
1193
+ control,
1194
+ });
1182
1195
  });
1183
1196
  }
1184
1197
 
package/tests/browser.js CHANGED
@@ -5,7 +5,9 @@ const unprint = require('../src/app');
5
5
  async function initTest() {
6
6
  await Promise.all([
7
7
  unprint.browser('https://tools-httpstatus.pickup-services.com/200?sleep=5000', {
8
- headless: false,
8
+ browser: {
9
+ headless: false,
10
+ },
9
11
  async control(_page) {
10
12
  //
11
13
  },
@@ -16,7 +18,9 @@ async function initTest() {
16
18
  }, 1000);
17
19
  }).then(async () => {
18
20
  await unprint.browser('https://tools-httpstatus.pickup-services.com/200?sleep=2000', {
19
- headless: false,
21
+ browser: {
22
+ headless: false,
23
+ },
20
24
  async control(_page) {
21
25
  //
22
26
  },
@@ -28,13 +32,14 @@ async function initTest() {
28
32
  // await unprint.browser('https://www.scrapingcourse.com/', {
29
33
  headless: false,
30
34
  async control(_page) {
31
- //
35
+ return 'test';
32
36
  },
33
37
  });
34
38
 
35
39
  const cards = res.context.query.contents('h2');
36
40
 
37
41
  console.log('CARD TITLES', cards);
42
+ console.log('CONTROL OUT', res.control);
38
43
 
39
44
  await unprint.closeAllBrowsers();
40
45
  }