unprint 0.4.3 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +75 -3
- package/package.json +1 -1
- package/src/app.js +91 -0
- package/tests/index.html +5 -0
- package/tests/init.js +3 -0
package/README.md
CHANGED
|
@@ -43,11 +43,73 @@ query.element('h1#title'); // HTMLHeadingElement
|
|
|
43
43
|
#### Selector
|
|
44
44
|
The selector can be a CSS selector, an XPath selector starting with `//`, or an array of either or both acting as fallbacks. If the selector is falsy, the input element will be used.
|
|
45
45
|
|
|
46
|
+
#### Querying multiple elements
|
|
47
|
+
Most methods can be used in plural, returning an array of results, i.e. `query.elements()`, `query.dates()`.
|
|
48
|
+
|
|
46
49
|
#### Query an element
|
|
47
50
|
* `query.element([selector], [options])`
|
|
48
51
|
|
|
49
52
|
Returns the element node directly.
|
|
50
53
|
|
|
54
|
+
#### Query an attribute
|
|
55
|
+
`query.attribute(selector, attribute, [options])` or `query.attr()`
|
|
56
|
+
|
|
57
|
+
Return the contents of an attribute. Alias for `query.element([selector], { attribute: [attribute] })`.
|
|
58
|
+
|
|
59
|
+
#### Query existence
|
|
60
|
+
`query.exists(selector, [options])`
|
|
61
|
+
|
|
62
|
+
Return the presence of an element as a boolean.
|
|
63
|
+
|
|
64
|
+
#### Query count
|
|
65
|
+
`query.count(selector, [options])`
|
|
66
|
+
|
|
67
|
+
Return the number of elements that match the selector.
|
|
68
|
+
|
|
69
|
+
#### Query the content
|
|
70
|
+
`query.content([selector], [options])`
|
|
71
|
+
|
|
72
|
+
Return the text contents of an element (`.textContent`).
|
|
73
|
+
|
|
74
|
+
#### Query the HTML
|
|
75
|
+
`query.content([selector], [options])`
|
|
76
|
+
|
|
77
|
+
Return the HTML contents of an element (`.innerHTML`).
|
|
78
|
+
|
|
79
|
+
#### Query a URL
|
|
80
|
+
`query.url([selector], [options])`
|
|
81
|
+
|
|
82
|
+
Options
|
|
83
|
+
* `origin`: The hostname to prefix when it is not included in the URL (`/path`).
|
|
84
|
+
* `protocol`: The protocol to use when it is not included in the URL (`:www.example.com`, default `http`).
|
|
85
|
+
|
|
86
|
+
Returns the `href` from an anchor element (or any other specified target) as a string.
|
|
87
|
+
|
|
88
|
+
#### Query an image
|
|
89
|
+
`query.image([selector], [options])` or `query.img()`
|
|
90
|
+
|
|
91
|
+
Options:
|
|
92
|
+
* All options supported by `query.url()`.
|
|
93
|
+
|
|
94
|
+
Returns the `src` from an image element (or any other specified target) as a string.
|
|
95
|
+
|
|
96
|
+
#### Query a source set
|
|
97
|
+
`query.sourceSet([selector], [options])` or `query.srcSet()`
|
|
98
|
+
|
|
99
|
+
Options:
|
|
100
|
+
* `includeDescriptor`: Produce an array of `{ descriptor, url }` instead of URL strings.
|
|
101
|
+
* All options supported by `query.url()`.
|
|
102
|
+
|
|
103
|
+
Returns an array of media URLs from the `srcset` of an media element as strings sorted by their descriptor from large to small.
|
|
104
|
+
|
|
105
|
+
#### Query a video
|
|
106
|
+
`query.video([selector], [options])`
|
|
107
|
+
|
|
108
|
+
Options:
|
|
109
|
+
* All options supported by `query.url()`.
|
|
110
|
+
|
|
111
|
+
Returns the `src` from an video source element (or any other specified target) as a string.
|
|
112
|
+
|
|
51
113
|
#### Query a date
|
|
52
114
|
`query.date(selector, format, [options])`
|
|
53
115
|
|
|
@@ -55,13 +117,23 @@ Arguments
|
|
|
55
117
|
* `format` (string, array): The input format as a string or array of strings described by the [Moment.js docs](https://momentjs.com/docs/#/displaying/format/).
|
|
56
118
|
|
|
57
119
|
Options
|
|
58
|
-
* `match (RegExp): The text to extract before attempting to parse it as a date. The default expression will attempt to extract any of 01-01-1970, 1970-01-01, 01/01/1970 or January 1, 1970 with optional 00:00[:00] time.
|
|
120
|
+
* `match` (RegExp): The text to extract before attempting to parse it as a date. The default expression will attempt to extract any of 01-01-1970, 1970-01-01, 01/01/1970 or January 1, 1970 with optional 00:00[:00] time.
|
|
59
121
|
* `timezone` (string): The name of the input timezone, defaults to 'UTC'.
|
|
60
122
|
|
|
61
123
|
Returns a Date object.
|
|
62
124
|
|
|
63
|
-
####
|
|
64
|
-
|
|
125
|
+
#### Query a duration
|
|
126
|
+
`query.duration(selector, format, [options])` or `query.dur`
|
|
127
|
+
|
|
128
|
+
Options
|
|
129
|
+
* `match` (RegExp): The text to extract before attempting to parse it as a duration. The default expression will attempt to extract `(hh:)mm:ss` and `PT##H##M##S`.
|
|
130
|
+
|
|
131
|
+
Returns the duration in seconds as a number.
|
|
132
|
+
|
|
133
|
+
#### Query JSON
|
|
134
|
+
`query.json([selector], [options])`
|
|
135
|
+
|
|
136
|
+
Returns the parsed JSON content of an element as an object.
|
|
65
137
|
|
|
66
138
|
### HTTP request
|
|
67
139
|
* `unprint.get(url, [options])`
|
package/package.json
CHANGED
package/src/app.js
CHANGED
|
@@ -295,6 +295,50 @@ function queryImages(context, selector = 'img', customOptions) {
|
|
|
295
295
|
return imageUrls.map((imageUrl) => prefixUrl(imageUrl, options.origin, options));
|
|
296
296
|
}
|
|
297
297
|
|
|
298
|
+
function querySourceSet(context, selector, attr = 'srcset', customOptions = {}) {
|
|
299
|
+
const srcset = queryAttribute(context, selector, attr, customOptions);
|
|
300
|
+
|
|
301
|
+
if (!srcset) {
|
|
302
|
+
return null;
|
|
303
|
+
}
|
|
304
|
+
|
|
305
|
+
const sources = srcset
|
|
306
|
+
.split(/\s*,\s*/)
|
|
307
|
+
.map((source) => {
|
|
308
|
+
const [link, descriptor] = source.split(' ');
|
|
309
|
+
|
|
310
|
+
if (link) {
|
|
311
|
+
return {
|
|
312
|
+
descriptor: descriptor || 'fallback',
|
|
313
|
+
url: prefixUrl(link, customOptions.origin, customOptions.protocol),
|
|
314
|
+
};
|
|
315
|
+
}
|
|
316
|
+
|
|
317
|
+
return null;
|
|
318
|
+
})
|
|
319
|
+
.filter(Boolean)
|
|
320
|
+
.sort((sourceA, sourceB) => {
|
|
321
|
+
if (sourceB.descriptor === 'fallback' || parseInt(sourceA.descriptor, 10) > parseInt(sourceB.descriptor, 10)) {
|
|
322
|
+
return -1;
|
|
323
|
+
}
|
|
324
|
+
|
|
325
|
+
if (parseInt(sourceA.descriptor, 10) < parseInt(sourceB.descriptor, 10)) {
|
|
326
|
+
return 1;
|
|
327
|
+
}
|
|
328
|
+
|
|
329
|
+
return 0;
|
|
330
|
+
});
|
|
331
|
+
|
|
332
|
+
if (customOptions.includeDescriptor) {
|
|
333
|
+
return sources.map((source) => ({
|
|
334
|
+
descriptor: source.descriptor,
|
|
335
|
+
url: prefixUrl(source.url),
|
|
336
|
+
}));
|
|
337
|
+
}
|
|
338
|
+
|
|
339
|
+
return sources.map((source) => prefixUrl(source.url));
|
|
340
|
+
}
|
|
341
|
+
|
|
298
342
|
function queryVideo(context, selector = 'source', customOptions) {
|
|
299
343
|
const options = {
|
|
300
344
|
...context.options,
|
|
@@ -391,6 +435,48 @@ function queryDates(context, selector, format, customOptions) {
|
|
|
391
435
|
}));
|
|
392
436
|
}
|
|
393
437
|
|
|
438
|
+
function extractDuration(durationString, match) {
|
|
439
|
+
const durationMatch = durationString.match(match || /(\d+:)?\d+:\d+/);
|
|
440
|
+
|
|
441
|
+
if (durationMatch) {
|
|
442
|
+
const segments = ['00'].concat(durationMatch[0].split(/[:hm]/)).slice(-3);
|
|
443
|
+
|
|
444
|
+
return moment.duration(segments.join(':')).asSeconds();
|
|
445
|
+
}
|
|
446
|
+
|
|
447
|
+
return null;
|
|
448
|
+
}
|
|
449
|
+
function extractTimestamp(durationString) {
|
|
450
|
+
const timestampMatch = durationString.match(/(\d+H)?\s*(\d+M)?\s*\d+S?/i);
|
|
451
|
+
|
|
452
|
+
if (timestampMatch) {
|
|
453
|
+
const hours = timestampMatch[0].match(/(\d+)H/i)?.[1] || 0;
|
|
454
|
+
const minutes = timestampMatch[0].match(/(\d+)M/i)?.[1] || 0;
|
|
455
|
+
const seconds = timestampMatch[0].match(/(\d+)(S|$)/i)?.[1] || 0;
|
|
456
|
+
|
|
457
|
+
return (Number(hours) * 3600) + (Number(minutes) * 60) + Number(seconds);
|
|
458
|
+
}
|
|
459
|
+
|
|
460
|
+
return null;
|
|
461
|
+
}
|
|
462
|
+
|
|
463
|
+
function queryDuration(context, selector, customOptions) {
|
|
464
|
+
const options = { ...customOptions };
|
|
465
|
+
const durationString = queryContent(context, selector, customOptions);
|
|
466
|
+
|
|
467
|
+
if (!durationString) {
|
|
468
|
+
return null;
|
|
469
|
+
}
|
|
470
|
+
|
|
471
|
+
if (options.match) {
|
|
472
|
+
return extractDuration(durationString, options.match);
|
|
473
|
+
}
|
|
474
|
+
|
|
475
|
+
return extractDuration(durationString)
|
|
476
|
+
|| extractTimestamp(durationString)
|
|
477
|
+
|| null;
|
|
478
|
+
}
|
|
479
|
+
|
|
394
480
|
const queryFns = {
|
|
395
481
|
element: queryElement,
|
|
396
482
|
elements: queryElements,
|
|
@@ -415,6 +501,10 @@ const queryFns = {
|
|
|
415
501
|
jsons: queryJsons,
|
|
416
502
|
date: queryDate,
|
|
417
503
|
dates: queryDates,
|
|
504
|
+
duration: queryDuration,
|
|
505
|
+
dur: queryDuration,
|
|
506
|
+
sourceSet: querySourceSet,
|
|
507
|
+
srcSet: querySourceSet,
|
|
418
508
|
url: queryUrl,
|
|
419
509
|
video: queryVideo,
|
|
420
510
|
videos: queryVideos,
|
|
@@ -592,6 +682,7 @@ module.exports = {
|
|
|
592
682
|
init,
|
|
593
683
|
initAll,
|
|
594
684
|
extractDate,
|
|
685
|
+
extractDuration,
|
|
595
686
|
options: configure,
|
|
596
687
|
query: initQueryFns(queryFns),
|
|
597
688
|
};
|
package/tests/index.html
CHANGED
|
@@ -21,10 +21,15 @@
|
|
|
21
21
|
<div id="date">Date: 22-07-2022 02:00</div>
|
|
22
22
|
<div id="date2">Date: 13-05-2022 18:00</div>
|
|
23
23
|
|
|
24
|
+
<div id="duration">01:15:33</div>
|
|
25
|
+
<div id="timestamp">PT1H34M18S</div>
|
|
26
|
+
|
|
24
27
|
<img class="image" src="https://i.redd.it/vn9h981hlx281.png">
|
|
25
28
|
<img class="image" src="https://i.redd.it/1s22dsrqy0181.jpg">
|
|
26
29
|
<img class="image" src="https://i.redd.it/e91oo4ueyeb71.jpg">
|
|
27
30
|
|
|
31
|
+
<img class="srcset" srcset="https://i.redd.it/e91oo4ueyeb71.jpg 240w, https://i.redd.it/vn9h981hlx281.png 480w, https://i.redd.it/e91oo4ueyeb71.jpg 640w">
|
|
32
|
+
|
|
28
33
|
<video id="video"><source src="https://i.imgur.com/eDQmLys.mp4"></video>
|
|
29
34
|
|
|
30
35
|
<script id="json" type="application/js">{"foo": "bar", "lorem": "ipsum", "hello": "world"}</script>
|
package/tests/init.js
CHANGED
|
@@ -18,11 +18,14 @@ async function initTest() {
|
|
|
18
18
|
|
|
19
19
|
console.log('title', res.context.query.content('//*[contains(text(), "Test")]'));
|
|
20
20
|
console.log('date', res.context.query.date('#date', 'DD-MM-YYYY HH:mm'));
|
|
21
|
+
console.log('duration', res.context.query.duration('#duration'));
|
|
22
|
+
console.log('timestamp', res.context.query.duration('#timestamp'));
|
|
21
23
|
console.log('data', res.context.query.json('#json'));
|
|
22
24
|
console.log('items', res.context.query.contents('.item'));
|
|
23
25
|
console.log('link', res.context.query.url('#link'));
|
|
24
26
|
console.log('image', res.context.query.img('.image'));
|
|
25
27
|
console.log('images', res.context.query.imgs('.image'));
|
|
28
|
+
console.log('srcset', res.context.query.sourceSet('.srcset'));
|
|
26
29
|
console.log('path', res.context.query.url('#path'));
|
|
27
30
|
console.log('relative path', res.context.query.url('#relativePath'));
|
|
28
31
|
console.log('exists', res.context.query.exists('#title'));
|