unprint 0.16.1 → 0.16.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -52,11 +52,16 @@ query.element('h1#title'); // HTMLHeadingElement
52
52
  **From here on, the query methods will be described in their initialized form.** The API for the *uninitialized* methods is identical, except for the element passed as the first argument
53
53
 
54
54
  #### Selector
55
- The selector can be a CSS selector, an XPath selector starting with `/`, or an array of either or both acting as fallbacks. If the selector is falsy, the input element will be used.
55
+ The selector can be a CSS selector, an XPath selector starting with `/` or `(`, or an array of either or both acting as fallbacks. If the selector is falsy, the input element will be used.
56
+
57
+ * XPath Caveat: `//` and `(//` at the *start* of the selector are converted to `.//` and `(.//` for more intuitive relative selection, but any consecutive `//` will be absolute.
56
58
 
57
59
  #### Querying multiple elements
58
60
  Most methods can be used in plural, returning an array of results, i.e. `query.elements()`, `query.dates()`.
59
61
 
62
+ Options
63
+ * `filterDuplicates`: When an array of selectors results in the same element being selected multiple times, ensure each element is only returned once, default `true`.
64
+
60
65
  #### Query an element
61
66
  * `query.element([selector], [options])`
62
67
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "unprint",
3
- "version": "0.16.1",
3
+ "version": "0.16.3",
4
4
  "description": "Simplify common web scraping tasks while staying in control of the data.",
5
5
  "main": "src/app.js",
6
6
  "scripts": {
package/src/app.js CHANGED
@@ -74,9 +74,9 @@ function getElements(context, selector, firstOnly = false) {
74
74
  return context.element;
75
75
  }
76
76
 
77
- if (/^\//.test(selector)) {
78
- // XPath selector
79
- const iterator = globalWindow.document.evaluate(`.${selector}`, context.element, null, globalWindow.XPathResult.ORDERED_NODE_ITERATOR_TYPE, null);
77
+ if (selector.startsWith('/') || selector.startsWith('(')) {
78
+ // XPath selector, . prefix ensures selector is relative to current node, won't work for deeper selections
79
+ const iterator = globalWindow.document.evaluate(selector.replace(/^\//, './').replace(/^\(\//, '(./'), context.element, null, globalWindow.XPathResult.ORDERED_NODE_ITERATOR_TYPE, null);
80
80
 
81
81
  if (firstOnly) {
82
82
  return iterator.iterateNext();
@@ -102,14 +102,20 @@ function queryElement(context, selectors, _customOptions) {
102
102
  return target || null;
103
103
  }
104
104
 
105
- function queryElements(context, selectors, _customOptions) {
105
+ function queryElements(context, selectors, customOptions = {}) {
106
106
  if (!selectors) {
107
107
  return context.element;
108
108
  }
109
109
 
110
- const targets = [].concat(selectors).reduce((acc, selector) => acc || getElements(context, selector, false), null);
110
+ const options = customOptions;
111
+ const targets = [].concat(selectors).reduce((acc, selector) => acc.concat(getElements(context, selector, false)), []).filter(Boolean);
111
112
 
112
- return targets || [];
113
+ if (options.filterDuplicates === false) {
114
+ return targets || [];
115
+ }
116
+
117
+ // findIndex always finds first index, if current index is not the first index, it's a dupe
118
+ return targets.filter((target, index, array) => index === array.findIndex((dupe) => target === dupe));
113
119
  }
114
120
 
115
121
  function queryExistence(context, selector, customOptions) {
package/tests/init.js CHANGED
@@ -37,6 +37,7 @@ async function initTest() {
37
37
  console.log('title', res.context.query.content('//*[contains(text(), "Test")]'));
38
38
  console.log('date', res.context.query.date('#date', 'DD-MM-YYYY HH:mm'));
39
39
  console.log('date xpath', res.context.query.date('//div[contains(text(), "Today:")]', 'MMM DD, YYYY'));
40
+ console.log('date grouped xpath', res.context.query.date('(//div[contains(text(), "Today:")])', 'MMM DD, YYYY'));
40
41
  console.log('duration', res.context.query.duration('#duration'));
41
42
  console.log('timestamp', res.context.query.duration('#timestamp'));
42
43
  console.log('timestring', res.context.query.duration('#timestring'));
@@ -46,6 +47,7 @@ async function initTest() {
46
47
  console.log('number indexed', res.context.query.number('.number', { match: /(\d+)/, matchIndex: 1 }));
47
48
  console.log('data', res.context.query.json('#json'));
48
49
  console.log('items', res.context.query.contents('.item'));
50
+ console.log('items css xpath array', res.context.query.contents(['.item', '//li[contains(@class, "number")]']));
49
51
  console.log('link', res.context.query.url('#link'));
50
52
  console.log('links', res.context.query.urls('.link'));
51
53
  console.log('text', res.context.query.text('.text'));