unprint 0.5.0 → 0.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -122,6 +122,14 @@ Options
122
122
 
123
123
  Returns a Date object.
124
124
 
125
+ #### Query a duration
126
+ `query.duration(selector, format, [options])` or `query.dur`
127
+
128
+ Options
129
+ * `match` (RegExp): The text to extract before attempting to parse it as a duration. The default expression will attempt to extract `(hh:)mm:ss` and `PT##H##M##S`.
130
+
131
+ Returns the duration in seconds as a number.
132
+
125
133
  #### Query JSON
126
134
  `query.json([selector], [options])`
127
135
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "unprint",
3
- "version": "0.5.0",
3
+ "version": "0.6.1",
4
4
  "description": "Simplify common web scraping tasks while staying in control of the data.",
5
5
  "main": "src/app.js",
6
6
  "scripts": {
package/src/app.js CHANGED
@@ -435,6 +435,49 @@ function queryDates(context, selector, format, customOptions) {
435
435
  }));
436
436
  }
437
437
 
438
+ function extractDuration(durationString, match) {
439
+ const durationMatch = durationString.match(match || /(\d+:)?\d+:\d+/);
440
+
441
+ if (durationMatch) {
442
+ const segments = ['00'].concat(durationMatch[0].split(/[:hm]/)).slice(-3);
443
+
444
+ return moment.duration(segments.join(':')).asSeconds();
445
+ }
446
+
447
+ return null;
448
+ }
449
+
450
+ function extractTimestamp(durationString) {
451
+ const timestampMatch = durationString.match(/(\d+H)?\s*(\d+M)?\s*\d+S?/i);
452
+
453
+ if (timestampMatch) {
454
+ const hours = timestampMatch[0].match(/(\d+)H/i)?.[1] || 0;
455
+ const minutes = timestampMatch[0].match(/(\d+)M/i)?.[1] || 0;
456
+ const seconds = timestampMatch[0].match(/(\d+)(S|$)/i)?.[1] || 0;
457
+
458
+ return (Number(hours) * 3600) + (Number(minutes) * 60) + Number(seconds);
459
+ }
460
+
461
+ return null;
462
+ }
463
+
464
+ function queryDuration(context, selector, customOptions) {
465
+ const options = { ...customOptions };
466
+ const durationString = queryContent(context, selector, customOptions);
467
+
468
+ if (!durationString) {
469
+ return null;
470
+ }
471
+
472
+ if (options.match) {
473
+ return extractDuration(durationString, options.match);
474
+ }
475
+
476
+ return extractDuration(durationString)
477
+ || extractTimestamp(durationString)
478
+ || null;
479
+ }
480
+
438
481
  const queryFns = {
439
482
  element: queryElement,
440
483
  elements: queryElements,
@@ -459,6 +502,8 @@ const queryFns = {
459
502
  jsons: queryJsons,
460
503
  date: queryDate,
461
504
  dates: queryDates,
505
+ duration: queryDuration,
506
+ dur: queryDuration,
462
507
  sourceSet: querySourceSet,
463
508
  srcSet: querySourceSet,
464
509
  url: queryUrl,
@@ -638,6 +683,7 @@ module.exports = {
638
683
  init,
639
684
  initAll,
640
685
  extractDate,
686
+ extractDuration,
641
687
  options: configure,
642
688
  query: initQueryFns(queryFns),
643
689
  };
package/tests/index.html CHANGED
@@ -21,6 +21,10 @@
21
21
  <div id="date">Date: 22-07-2022 02:00</div>
22
22
  <div id="date2">Date: 13-05-2022 18:00</div>
23
23
 
24
+ <div id="duration">01:15:33</div>
25
+ <div id="timestamp">PT1H34M18S</div>
26
+ <div id="timestamp">PT34M18S</div>
27
+
24
28
  <img class="image" src="https://i.redd.it/vn9h981hlx281.png">
25
29
  <img class="image" src="https://i.redd.it/1s22dsrqy0181.jpg">
26
30
  <img class="image" src="https://i.redd.it/e91oo4ueyeb71.jpg">
package/tests/init.js CHANGED
@@ -18,6 +18,8 @@ async function initTest() {
18
18
 
19
19
  console.log('title', res.context.query.content('//*[contains(text(), "Test")]'));
20
20
  console.log('date', res.context.query.date('#date', 'DD-MM-YYYY HH:mm'));
21
+ console.log('duration', res.context.query.duration('#duration'));
22
+ console.log('timestamp', res.context.query.duration('#timestamp'));
21
23
  console.log('data', res.context.query.json('#json'));
22
24
  console.log('items', res.context.query.contents('.item'));
23
25
  console.log('link', res.context.query.url('#link'));