unprint 0.14.2 → 0.15.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -181,6 +181,19 @@ Options
181
181
 
182
182
  Returns the CSS style attributes of an element as an object.
183
183
 
184
+ #### Query style URL
185
+ `query.styleUrl([selector], [styleAttribute], [options])`
186
+
187
+ Extracts the CSS `url()` link from a style attribute, such as a background.
188
+
189
+ Arguments
190
+ * `styleAttribute`: the CSS style attribute to extract the URL from
191
+
192
+ #### Query style background
193
+ `query.styleUrl([selector], [options])`
194
+
195
+ Extracts the CSS `url()` background from a style attribute. Alias for `query.styleUrl([selector], 'background-image', [options])`.
196
+
184
197
  ### HTTP request
185
198
  * `unprint.get(url, [options])`
186
199
  * `unprint.post(url, body, [options])`
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "unprint",
3
- "version": "0.14.2",
3
+ "version": "0.15.0",
4
4
  "description": "Simplify common web scraping tasks while staying in control of the data.",
5
5
  "main": "src/app.js",
6
6
  "scripts": {
package/src/app.js CHANGED
@@ -523,6 +523,8 @@ function queryStyle(context, selector, customOptions) {
523
523
  removeStyleFunctionSpaces(element, options);
524
524
  }
525
525
 
526
+ console.log('OPTIONS', options.styleAttribute);
527
+
526
528
  if (element.style) {
527
529
  return options.styleAttribute
528
530
  ? element.style.getPropertyValue(options.styleAttribute)
@@ -554,6 +556,45 @@ function queryStyles(context, selector, customOptions) {
554
556
  return elStyles.filter(Boolean);
555
557
  }
556
558
 
559
+ function queryStyleUrl(context, selector, styleAttribute, customOptions) {
560
+ const options = {
561
+ styleAttribute,
562
+ ...customOptions,
563
+ };
564
+
565
+ const style = queryStyle(context, selector, options);
566
+
567
+ if (!style) {
568
+ return null;
569
+ }
570
+
571
+ console.log('STYLE', styleAttribute, style);
572
+
573
+ const url = style.match(/url\(['"]?(.*)['"]?\)/)?.[1];
574
+
575
+ return url;
576
+ }
577
+
578
+ function queryStyleUrls(context, selector, styleAttribute, customOptions) {
579
+ const options = {
580
+ styleAttribute,
581
+ ...customOptions,
582
+ };
583
+
584
+ const styles = queryStyles(context, selector, options);
585
+ const urls = styles.map((style) => style.match(/url\(['"]?(.*)['"]?\)/)?.[1])?.filter(Boolean);
586
+
587
+ return urls;
588
+ }
589
+
590
+ function queryBackground(context, selector, customOptions) {
591
+ return queryStyleUrl(context, selector, 'background-image', customOptions);
592
+ }
593
+
594
+ function queryBackgrounds(context, selector, customOptions) {
595
+ return queryStyleUrls(context, selector, 'background-image', customOptions);
596
+ }
597
+
557
598
  function queryVideo(context, selector = 'video source', customOptions) {
558
599
  const options = {
559
600
  ...context.options,
@@ -738,6 +779,8 @@ const queryFns = {
738
779
  attributes: queryAttributes,
739
780
  attr: queryAttribute,
740
781
  attrs: queryAttributes,
782
+ background: queryBackground,
783
+ backgrounds: queryBackgrounds,
741
784
  dataset: queryDataset,
742
785
  datasets: queryDatasets,
743
786
  data: queryDataset,
@@ -754,6 +797,8 @@ const queryFns = {
754
797
  jsons: queryJsons,
755
798
  style: queryStyle,
756
799
  styles: queryStyles,
800
+ styleUrl: queryStyleUrl,
801
+ styleUrls: queryStyleUrls,
757
802
  number: queryNumber,
758
803
  num: queryNumber,
759
804
  numbers: queryNumbers,
@@ -950,11 +995,13 @@ function setProxy(instance, options, url) {
950
995
  instance.defaults.httpAgent = proxyAgent;
951
996
  instance.defaults.httpsAgent = proxyAgent;
952
997
 
953
- return;
998
+ return true;
954
999
  }
955
1000
 
956
1001
  instance.defaults.httpAgent = options.httpsAgent || new http.Agent({ ...options.agent });
957
1002
  instance.defaults.httpsAgent = options.httpsAgent || new https.Agent({ ...options.agent });
1003
+
1004
+ return false;
958
1005
  }
959
1006
  /* eslint-enable no-param-reassign */
960
1007
 
@@ -967,16 +1014,6 @@ async function request(url, body, customOptions = {}, method = 'GET') {
967
1014
 
968
1015
  const { limiter, interval, concurrency } = getLimiter(url, options);
969
1016
 
970
- const feedbackBase = {
971
- url,
972
- method,
973
- interval,
974
- concurrency,
975
- options,
976
- };
977
-
978
- events.emit('requestInit', feedbackBase);
979
-
980
1017
  const instance = axios.create({
981
1018
  data: body,
982
1019
  validateStatus: null,
@@ -987,7 +1024,18 @@ async function request(url, body, customOptions = {}, method = 'GET') {
987
1024
  // httpAgent: options.httpAgent || new http.Agent({ ...options.agent }),
988
1025
  });
989
1026
 
990
- setProxy(instance, options, url);
1027
+ const isProxied = setProxy(instance, options, url);
1028
+
1029
+ const feedbackBase = {
1030
+ url,
1031
+ method,
1032
+ interval,
1033
+ concurrency,
1034
+ isProxied,
1035
+ options,
1036
+ };
1037
+
1038
+ events.emit('requestInit', feedbackBase);
991
1039
 
992
1040
  const res = await limiter.schedule(async () => instance.get(url));
993
1041
 
package/tests/index.html CHANGED
@@ -45,6 +45,7 @@
45
45
  <!-- deliberate space in url( ) to test JSDOM's quirky handling of this -->
46
46
  <div class="style" style="background-image: url( https://i.imgur.com/eDQmLys.jpg ); color: red;"></div>
47
47
  <div class="background" style="background: url(https://i.imgur.com/xFHbuDL.jpeg) 0 0/contain no-repeat; color: green;"></div>
48
+ <div class="background" style="background: url('https://i.redd.it/e91oo4ueyeb71.jpg') 0 0/contain no-repeat; color: green;"></div>
48
49
  <div class="style" style="margin: 1rem; color: blue;"></div>
49
50
 
50
51
  <video id="video" poster="https://i.imgur.com/eDQmLys.jpg"><source src="https://i.imgur.com/eDQmLys.mp4"></video>
package/tests/init.js CHANGED
@@ -28,7 +28,7 @@ async function initTest() {
28
28
  unprint.on('requestInit', (initData) => console.log('init', initData));
29
29
  unprint.on('requestError', (errorData) => console.error('error', errorData));
30
30
  unprint.on('requestSuccess', (successData) => console.log('success', successData));
31
- unprint.on('query', (queryData) => console.log('query', queryData));
31
+ // unprint.on('query', (queryData) => console.log('query', queryData));
32
32
 
33
33
  const res = await unprint.get(`http://127.0.0.1:${port}/html`, { select: 'body' });
34
34
  // const jsonRes = await unprint.get(`http://127.0.0.1:${port}/json`);
@@ -60,6 +60,10 @@ async function initTest() {
60
60
  console.log('style background', res.context.query.style('.background'));
61
61
  console.log('styles', res.context.query.styles('.style'));
62
62
  console.log('styles color', res.context.query.styles('.style', { styleAttribute: 'color' }));
63
+ console.log('style url', res.context.query.styleUrl('.background', 'background-image'));
64
+ console.log('style urls', res.context.query.styleUrls('.background', 'background-image'));
65
+ console.log('background', res.context.query.background('.background'));
66
+ console.log('backgrounds', res.context.query.backgrounds('.background'));
63
67
  console.log('path', res.context.query.url('#path'));
64
68
  console.log('relative path', res.context.query.url('#relativePath'));
65
69
  console.log('exists', res.context.query.exists('#title'));