unprint 0.3.0 → 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/.editorconfig CHANGED
File without changes
package/.eslintrc CHANGED
File without changes
package/README.md CHANGED
@@ -7,6 +7,15 @@ unprint is a web scraping utility built around JSDOM, providing convenience meth
7
7
  ## Usage
8
8
  `const unprint = require('unprint');`
9
9
 
10
+ ### Global options
11
+ ```
12
+ unprint.options({
13
+ headers: {
14
+ 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/106.0.0.0 Safari/537.36'
15
+ },
16
+ })
17
+ ```
18
+
10
19
  ### Querying
11
20
  For optimal flexibility, unprint query methods can be used with or without initialization. If you already have access to DOM elements using another library or unprint instance, you can query it by using the uninitialized `query` methods provided directly from the library, and passing the element as the first argument, as such:
12
21
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "unprint",
3
- "version": "0.3.0",
3
+ "version": "0.4.1",
4
4
  "description": "Simplify common web scraping tasks while staying in control of the data.",
5
5
  "main": "src/app.js",
6
6
  "scripts": {
@@ -24,6 +24,7 @@
24
24
  "dependencies": {
25
25
  "axios": "^0.27.2",
26
26
  "bottleneck": "^2.19.5",
27
+ "deepmerge": "^4.2.2",
27
28
  "eslint": "^8.17.0",
28
29
  "eslint-config-airbnb": "^19.0.4",
29
30
  "eslint-config-airbnb-base": "^15.0.0",
package/src/app.js CHANGED
@@ -3,6 +3,7 @@
3
3
  const { JSDOM, VirtualConsole } = require('jsdom');
4
4
  const axios = require('axios').default;
5
5
  const moment = require('moment-timezone');
6
+ const merge = require('deepmerge');
6
7
 
7
8
  const settings = {
8
9
  throwErrors: false,
@@ -32,6 +33,12 @@ const defaultOptions = {
32
33
  trim: true,
33
34
  };
34
35
 
36
+ let globalOptions = {};
37
+
38
+ function configure(newOptions) {
39
+ globalOptions = newOptions;
40
+ }
41
+
35
42
  function trim(string) {
36
43
  if (typeof string === 'string') {
37
44
  return string.trim().replace(/\s+/g, ' ');
@@ -288,6 +295,30 @@ function queryImages(context, selector = 'img', customOptions) {
288
295
  return imageUrls.map((imageUrl) => prefixUrl(imageUrl, options.origin, options));
289
296
  }
290
297
 
298
+ function queryVideo(context, selector = 'source', customOptions) {
299
+ const options = {
300
+ ...context.options,
301
+ attribute: 'src',
302
+ ...customOptions,
303
+ };
304
+
305
+ const videoUrl = queryContent(context, selector, options);
306
+
307
+ return prefixUrl(videoUrl, options.origin, options);
308
+ }
309
+
310
+ function queryVideos(context, selector = 'source', customOptions) {
311
+ const options = {
312
+ ...context.options,
313
+ attribute: 'src',
314
+ ...customOptions,
315
+ };
316
+
317
+ const videoUrls = queryContents(context, selector, options);
318
+
319
+ return videoUrls.map((videoUrl) => prefixUrl(videoUrl, options.origin, options));
320
+ }
321
+
291
322
  function extractJson(element) {
292
323
  if (!element) {
293
324
  return null;
@@ -385,6 +416,8 @@ const queryFns = {
385
416
  date: queryDate,
386
417
  dates: queryDates,
387
418
  url: queryUrl,
419
+ video: queryVideo,
420
+ videos: queryVideos,
388
421
  };
389
422
 
390
423
  function isDomObject(element) {
@@ -485,12 +518,11 @@ function initAll(context, selector, options) {
485
518
  }
486
519
 
487
520
  async function request(url, body, customOptions = {}, method = 'GET') {
488
- const options = {
521
+ const options = merge.all([{
489
522
  timeout: 1000,
490
523
  extract: true,
491
524
  url,
492
- ...customOptions,
493
- };
525
+ }, globalOptions, customOptions]);
494
526
 
495
527
  const res = await axios({
496
528
  url,
@@ -551,6 +583,7 @@ async function post(url, body, options) {
551
583
  }
552
584
 
553
585
  module.exports = {
586
+ configure,
554
587
  get,
555
588
  post,
556
589
  request,
@@ -559,5 +592,6 @@ module.exports = {
559
592
  init,
560
593
  initAll,
561
594
  extractDate,
595
+ options: configure,
562
596
  query: initQueryFns(queryFns),
563
597
  };
package/tests/data.json CHANGED
File without changes
package/tests/index.html CHANGED
File without changes
package/tests/init.js CHANGED
@@ -10,6 +10,8 @@ const data = require('./data.json');
10
10
  const port = process.env.PORT || 3101;
11
11
 
12
12
  async function initTest() {
13
+ unprint.options({ headers: { 'user-agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/106.0.0.0 Safari/537.36' } });
14
+
13
15
  const res = await unprint.get(`http://127.0.0.1:${port}/html`, { select: 'body' });
14
16
  // const jsonRes = await unprint.get(`http://127.0.0.1:${port}/json`);
15
17
  // const errorRes = await unprint.get(`http://127.0.0.1:${port}/error/404`);