unprint 0.6.2 → 0.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -71,6 +71,14 @@ Return the number of elements that match the selector.
71
71
 
72
72
  Return the text contents of an element (`.textContent`).
73
73
 
74
+ #### Query a number
75
+ `query.number([selector], [options])`
76
+
77
+ Options
78
+ * `match`: The regular expression to use to extract a number from text, default `/\d+(\.\d*)?/` for decimal numbers.
79
+
80
+ Return the contents of the element or attribute as a Number primitive.
81
+
74
82
  #### Query the HTML
75
83
  `query.content([selector], [options])`
76
84
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "unprint",
3
- "version": "0.6.2",
3
+ "version": "0.7.1",
4
4
  "description": "Simplify common web scraping tasks while staying in control of the data.",
5
5
  "main": "src/app.js",
6
6
  "scripts": {
package/src/app.js CHANGED
@@ -62,7 +62,7 @@ function getElements(context, selector, firstOnly = false) {
62
62
  return context.element;
63
63
  }
64
64
 
65
- if (/^\/\//.test(selector)) {
65
+ if (/^\//.test(selector)) {
66
66
  // XPath selector
67
67
  const iterator = globalWindow.document.evaluate(selector, context.element, null, globalWindow.XPathResult.ORDERED_NODE_ITERATOR_TYPE, null);
68
68
 
@@ -177,6 +177,48 @@ function queryAttributes(context, selector, attribute, customOptions) {
177
177
  });
178
178
  }
179
179
 
180
+ const defaultNumberRegexp = /\d+(\.\d*)?/;
181
+
182
+ function matchNumberString(numberString, match) {
183
+ if (numberString && match) {
184
+ return Number(numberString.match(match)?.[0]);
185
+ }
186
+
187
+ if (numberString) {
188
+ return Number(numberString);
189
+ }
190
+
191
+ return null;
192
+ }
193
+
194
+ function queryNumber(context, selector, customOptions) {
195
+ const numberString = queryContent(context, selector, customOptions);
196
+
197
+ const options = {
198
+ match: defaultNumberRegexp,
199
+ ...customOptions,
200
+ };
201
+
202
+ return matchNumberString(numberString, options.match);
203
+ }
204
+
205
+ function queryNumbers(context, selector, customOptions) {
206
+ const numberStrings = queryContents(context, selector, customOptions);
207
+
208
+ const options = {
209
+ match: defaultNumberRegexp,
210
+ ...customOptions,
211
+ };
212
+
213
+ if (!numberStrings) {
214
+ return null;
215
+ }
216
+
217
+ return numberStrings
218
+ .map((numberString) => matchNumberString(numberString, options.match))
219
+ .filter(Boolean);
220
+ }
221
+
180
222
  function queryHtml(context, selector, customOptions) {
181
223
  const target = queryElement(context, selector, customOptions);
182
224
 
@@ -436,7 +478,7 @@ function queryDates(context, selector, format, customOptions) {
436
478
  }
437
479
 
438
480
  function extractDuration(durationString, match) {
439
- const durationMatch = durationString.match(match || /(\d+:)?\d+:\d+/);
481
+ const durationMatch = durationString?.match(match || /(\d+:)?\d+:\d+/);
440
482
 
441
483
  if (durationMatch) {
442
484
  const segments = ['00'].concat(durationMatch[0].split(/[:hm]/)).slice(-3);
@@ -448,7 +490,7 @@ function extractDuration(durationString, match) {
448
490
  }
449
491
 
450
492
  function extractTimestamp(durationString) {
451
- const timestampMatch = durationString.match(/(\d+H)?\s*(\d+M)?\s*\d+S?/i);
493
+ const timestampMatch = durationString?.match(/(\d+H)?\s*(\d+M)?\s*\d+S?/i);
452
494
 
453
495
  if (timestampMatch) {
454
496
  const hours = timestampMatch[0].match(/(\d+)H/i)?.[1] || 0;
@@ -500,6 +542,10 @@ const queryFns = {
500
542
  imgs: queryImages,
501
543
  json: queryJson,
502
544
  jsons: queryJsons,
545
+ number: queryNumber,
546
+ num: queryNumber,
547
+ numbers: queryNumbers,
548
+ nums: queryNumbers,
503
549
  date: queryDate,
504
550
  dates: queryDates,
505
551
  duration: queryDuration,
package/tests/index.html CHANGED
@@ -14,16 +14,22 @@
14
14
  <li class="item">Item 3</li>
15
15
  </ul>
16
16
 
17
+ <ul id="numbers">
18
+ <li class="number">123</li>
19
+ <li class="number">234.56</li>
20
+ <li class="number">789.0</li>
21
+ </ul>
22
+
17
23
  <a id="link" href="http://localhost:3101/html">Get HTML</a>
18
24
  <a id="path" href="/json">Get data</a>
19
25
  <a id="relativePath" href="./json">Get data</a>
20
26
 
21
27
  <div id="date">Date: 22-07-2022 02:00</div>
22
28
  <div id="date2">Date: 13-05-2022 18:00</div>
29
+ <div>Today: Nov 14, 2022</div>
23
30
 
24
31
  <div id="duration">01:15:33</div>
25
32
  <div id="timestamp">PT1H34M18S</div>
26
- <div id="timestamp">PT34M18S</div>
27
33
 
28
34
  <img class="image" src="https://i.redd.it/vn9h981hlx281.png">
29
35
  <img class="image" src="https://i.redd.it/1s22dsrqy0181.jpg">
package/tests/init.js CHANGED
@@ -18,8 +18,11 @@ async function initTest() {
18
18
 
19
19
  console.log('title', res.context.query.content('//*[contains(text(), "Test")]'));
20
20
  console.log('date', res.context.query.date('#date', 'DD-MM-YYYY HH:mm'));
21
+ console.log('date xpath', res.context.query.date('//div[contains(text(), "Today:")]', 'MMM DD, YYYY'));
21
22
  console.log('duration', res.context.query.duration('#duration'));
22
23
  console.log('timestamp', res.context.query.duration('#timestamp'));
24
+ console.log('number', res.context.query.number('.number'));
25
+ console.log('numbers', res.context.query.numbers('.number'));
23
26
  console.log('data', res.context.query.json('#json'));
24
27
  console.log('items', res.context.query.contents('.item'));
25
28
  console.log('link', res.context.query.url('#link'));