unprint 0.18.28 → 0.18.30

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. package/README.md +17 -1
  2. package/package.json +1 -1
  3. package/src/app.js +10 -0
package/README.md CHANGED
@@ -233,6 +233,7 @@ Options
233
233
  * `userAgent`: The default user agent header
234
234
  * `browserUserAgent`: The default user agent header for browser-like requests (`get` interface `fetch` and `browserRequest`)
235
235
  * `apiUserAgent`: The default user agent header for raw requests (`get` interface `request`)
236
+ * `useBrowser`: Forward the call to `unprint.browser()` (see below), only for GET-requests
236
237
 
237
238
  Use Playwright with Chromium (experimental)
238
239
  * `unprint.browser(url, [options])`
@@ -270,6 +271,17 @@ Returns
270
271
  }
271
272
  ```
272
273
 
274
+ ### Helpers
275
+ * `initialize(source, [selector], [options])` (`init`): Initialize element or HTML as unprint context
276
+ * `initializeAll(source, [selector], [options])` (`initAll`): Initialize element or HTML as multiple contexts
277
+ * `extractDate(string, [format], [options])`: Parse date with moment and some curation
278
+ * `extractDateAgo(string, [options])`: Extract relative date (e.g. 4 months ago)
279
+ * `extractDuration(timestamp, [matchRegex])`: Parse duration (e.g. 04:11:05) to seconds
280
+ * `extractTimestamp(string)`: Parse timestamp (e.g. 4H11M5S) to seconds
281
+ * `extractNumber(string, [options])`: Parse string as number
282
+ * `extractSourceSet(string, [options])`: Parse source set to object
283
+ * `formatDate(date, format, inputFormat)`: Format date with moment
284
+
273
285
  ### Proxy
274
286
  ```javascript
275
287
  unprint.options({ // or unprint.options();
@@ -299,7 +311,11 @@ Usage:
299
311
  * `unprint.off('trigger', callbackFn)`
300
312
 
301
313
  Triggers:
314
+ * `query`: A query method was used
302
315
  * `requestInit`: A HTTP request is about to be made
303
316
  * `requestSuccess`: The HTTP request completed with an OK status code
304
317
  * `requestError`: The HTTP request completed with an error status code
305
- * `query`: A query method was used
318
+ * `browserOpen`: A browser window was launched or used
319
+ * `browserClose`: A browser window was closed
320
+ * `controlSuccess`: A browser call control method succeeded
321
+ * `controlError`: A browser call control method failed
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "unprint",
3
- "version": "0.18.28",
3
+ "version": "0.18.30",
4
4
  "description": "Simplify common web scraping tasks while staying in control of the data.",
5
5
  "main": "src/app.js",
6
6
  "scripts": {},
package/src/app.js CHANGED
@@ -1426,6 +1426,11 @@ async function browserRequest(url, customOptions = {}) {
1426
1426
 
1427
1427
  await closeBrowser(client, options);
1428
1428
 
1429
+ events.emit('controlError', {
1430
+ ...feedbackBase,
1431
+ error,
1432
+ });
1433
+
1429
1434
  return {
1430
1435
  ok: false,
1431
1436
  controlError: error.message,
@@ -1503,6 +1508,10 @@ async function request(url, body, customOptions = {}, method = 'GET', redirects
1503
1508
  url,
1504
1509
  }, globalOptions, customOptions]);
1505
1510
 
1511
+ if (options.useBrowser) {
1512
+ return browserRequest(url, options);
1513
+ }
1514
+
1506
1515
  const { limiter, interval, concurrency } = getLimiter(url, options);
1507
1516
 
1508
1517
  const agent = getAgent(options, url);
@@ -1624,6 +1633,7 @@ module.exports = {
1624
1633
  extractDateAgo,
1625
1634
  extractDuration,
1626
1635
  extractNumber,
1636
+ extractSourceSet,
1627
1637
  extractTimestamp,
1628
1638
  formatDate,
1629
1639
  dateConstants: {