unprint 0.3.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/.editorconfig CHANGED
File without changes
package/.eslintrc CHANGED
File without changes
package/README.md CHANGED
@@ -7,6 +7,15 @@ unprint is a web scraping utility built around JSDOM, providing convenience meth
7
7
  ## Usage
8
8
  `const unprint = require('unprint');`
9
9
 
10
+ ### Global options
11
+ ```
12
+ unprint.options({
13
+ headers: {
14
+ 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/106.0.0.0 Safari/537.36'
15
+ },
16
+ })
17
+ ```
18
+
10
19
  ### Querying
11
20
  For optimal flexibility, unprint query methods can be used with or without initialization. If you already have access to DOM elements using another library or unprint instance, you can query it by using the uninitialized `query` methods provided directly from the library, and passing the element as the first argument, as such:
12
21
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "unprint",
3
- "version": "0.3.0",
3
+ "version": "0.4.0",
4
4
  "description": "Simplify common web scraping tasks while staying in control of the data.",
5
5
  "main": "src/app.js",
6
6
  "scripts": {
@@ -24,6 +24,7 @@
24
24
  "dependencies": {
25
25
  "axios": "^0.27.2",
26
26
  "bottleneck": "^2.19.5",
27
+ "deepmerge": "^4.2.2",
27
28
  "eslint": "^8.17.0",
28
29
  "eslint-config-airbnb": "^19.0.4",
29
30
  "eslint-config-airbnb-base": "^15.0.0",
package/src/app.js CHANGED
@@ -3,6 +3,7 @@
3
3
  const { JSDOM, VirtualConsole } = require('jsdom');
4
4
  const axios = require('axios').default;
5
5
  const moment = require('moment-timezone');
6
+ const merge = require('deepmerge');
6
7
 
7
8
  const settings = {
8
9
  throwErrors: false,
@@ -32,6 +33,12 @@ const defaultOptions = {
32
33
  trim: true,
33
34
  };
34
35
 
36
+ let globalOptions = {};
37
+
38
+ function configure(newOptions) {
39
+ globalOptions = newOptions;
40
+ }
41
+
35
42
  function trim(string) {
36
43
  if (typeof string === 'string') {
37
44
  return string.trim().replace(/\s+/g, ' ');
@@ -288,6 +295,30 @@ function queryImages(context, selector = 'img', customOptions) {
288
295
  return imageUrls.map((imageUrl) => prefixUrl(imageUrl, options.origin, options));
289
296
  }
290
297
 
298
+ function queryVideo(context, selector = 'source', customOptions) {
299
+ const options = {
300
+ ...context.options,
301
+ attribute: 'src',
302
+ ...customOptions,
303
+ };
304
+
305
+ const videoUrl = queryContent(context, selector, options);
306
+
307
+ return prefixUrl(videoUrl, options.origin, options);
308
+ }
309
+
310
+ function queryVideos(context, selector = 'source', customOptions) {
311
+ const options = {
312
+ ...context.options,
313
+ attribute: 'src',
314
+ ...customOptions,
315
+ };
316
+
317
+ const videoUrls = queryContents(context, selector, options);
318
+
319
+ return videoUrls.map((videoUrl) => prefixUrl(videoUrl, options.origin, options));
320
+ }
321
+
291
322
  function extractJson(element) {
292
323
  if (!element) {
293
324
  return null;
@@ -385,6 +416,8 @@ const queryFns = {
385
416
  date: queryDate,
386
417
  dates: queryDates,
387
418
  url: queryUrl,
419
+ video: queryVideo,
420
+ videos: queryVideos,
388
421
  };
389
422
 
390
423
  function isDomObject(element) {
@@ -485,12 +518,13 @@ function initAll(context, selector, options) {
485
518
  }
486
519
 
487
520
  async function request(url, body, customOptions = {}, method = 'GET') {
488
- const options = {
521
+ const options = merge.all([{
489
522
  timeout: 1000,
490
523
  extract: true,
491
524
  url,
492
- ...customOptions,
493
- };
525
+ }, globalOptions, customOptions]);
526
+
527
+ console.log('options', options, globalOptions);
494
528
 
495
529
  const res = await axios({
496
530
  url,
@@ -551,6 +585,7 @@ async function post(url, body, options) {
551
585
  }
552
586
 
553
587
  module.exports = {
588
+ configure,
554
589
  get,
555
590
  post,
556
591
  request,
@@ -559,5 +594,6 @@ module.exports = {
559
594
  init,
560
595
  initAll,
561
596
  extractDate,
597
+ options: configure,
562
598
  query: initQueryFns(queryFns),
563
599
  };
package/tests/data.json CHANGED
File without changes
package/tests/index.html CHANGED
File without changes
package/tests/init.js CHANGED
@@ -10,6 +10,8 @@ const data = require('./data.json');
10
10
  const port = process.env.PORT || 3101;
11
11
 
12
12
  async function initTest() {
13
+ unprint.options({ headers: { 'user-agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/106.0.0.0 Safari/537.36' } });
14
+
13
15
  const res = await unprint.get(`http://127.0.0.1:${port}/html`, { select: 'body' });
14
16
  // const jsonRes = await unprint.get(`http://127.0.0.1:${port}/json`);
15
17
  // const errorRes = await unprint.get(`http://127.0.0.1:${port}/error/404`);