npm - unprint - Versions diffs - 0.4.3 → 0.5.0 - Mend

unprint 0.4.3 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/README.md CHANGED Viewed

@@ -43,11 +43,73 @@ query.element('h1#title'); // HTMLHeadingElement
 #### Selector
 The selector can be a CSS selector, an XPath selector starting with `//`, or an array of either or both acting as fallbacks. If the selector is falsy, the input element will be used.
+#### Querying multiple elements
+Most methods can be used in plural, returning an array of results, i.e. `query.elements()`, `query.dates()`.
 #### Query an element
 * `query.element([selector], [options])`
 Returns the element node directly.
+#### Query an attribute
+`query.attribute(selector, attribute, [options])` or `query.attr()`
+Return the contents of an attribute. Alias for `query.element([selector], { attribute: [attribute] })`.
+#### Query existence
+`query.exists(selector, [options])`
+Return the presence of an element as a boolean.
+#### Query count
+`query.count(selector, [options])`
+Return the number of elements that match the selector.
+#### Query the content
+`query.content([selector], [options])`
+Return the text contents of an element (`.textContent`).
+#### Query the HTML
+`query.content([selector], [options])`
+Return the HTML contents of an element (`.innerHTML`).
+#### Query a URL
+`query.url([selector], [options])`
+Options
+* `origin`: The hostname to prefix when it is not included in the URL (`/path`).
+* `protocol`: The protocol to use when it is not included in the URL (`:www.example.com`, default `http`).
+Returns the `href` from an anchor element (or any other specified target) as a string.
+#### Query an image
+`query.image([selector], [options])` or `query.img()`
+Options:
+* All options supported by `query.url()`.
+Returns the `src` from an image element (or any other specified target) as a string.
+#### Query a source set
+`query.sourceSet([selector], [options])` or `query.srcSet()`
+Options:
+* `includeDescriptor`: Produce an array of `{ descriptor, url }` instead of URL strings.
+* All options supported by `query.url()`.
+Returns an array of media URLs from the `srcset` of an media element as strings sorted by their descriptor from large to small.
+#### Query a video
+`query.video([selector], [options])`
+Options:
+* All options supported by `query.url()`.
+Returns the `src` from an video source element (or any other specified target) as a string.
 #### Query a date
 `query.date(selector, format, [options])`
@@ -55,13 +117,15 @@ Arguments
 * `format` (string, array): The input format as a string or array of strings described by the [Moment.js docs](https://momentjs.com/docs/#/displaying/format/).
 Options
-* `match (RegExp): The text to extract before attempting to parse it as a date. The default expression will attempt to extract any of 01-01-1970, 1970-01-01, 01/01/1970 or January 1, 1970 with optional 00:00[:00] time.
+* `match` (RegExp): The text to extract before attempting to parse it as a date. The default expression will attempt to extract any of 01-01-1970, 1970-01-01, 01/01/1970 or January 1, 1970 with optional 00:00[:00] time.
 * `timezone` (string): The name of the input timezone, defaults to 'UTC'.
 Returns a Date object.
-#### Querying multiple elements
-Most methods can be used in plural, returning an array of results, i.e. `query.elements()`, `query.dates()`.
+#### Query JSON
+`query.json([selector], [options])`
+Returns the parsed JSON content of an element as an object.
 ### HTTP request
 * `unprint.get(url, [options])`

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "unprint",
-  "version": "0.4.3",
+  "version": "0.5.0",
   "description": "Simplify common web scraping tasks while staying in control of the data.",
   "main": "src/app.js",
   "scripts": {

package/src/app.js CHANGED Viewed

@@ -295,6 +295,50 @@ function queryImages(context, selector = 'img', customOptions) {
 	return imageUrls.map((imageUrl) => prefixUrl(imageUrl, options.origin, options));
 }
+function querySourceSet(context, selector, attr = 'srcset', customOptions = {}) {
+	const srcset = queryAttribute(context, selector, attr, customOptions);
+	if (!srcset) {
+		return null;
+	}
+	const sources = srcset
+		.split(/\s*,\s*/)
+		.map((source) => {
+			const [link, descriptor] = source.split(' ');
+			if (link) {
+				return {
+					descriptor: descriptor || 'fallback',
+					url: prefixUrl(link, customOptions.origin, customOptions.protocol),
+				};
+			}
+			return null;
+		})
+		.filter(Boolean)
+		.sort((sourceA, sourceB) => {
+			if (sourceB.descriptor === 'fallback' || parseInt(sourceA.descriptor, 10) > parseInt(sourceB.descriptor, 10)) {
+				return -1;
+			}
+			if (parseInt(sourceA.descriptor, 10) < parseInt(sourceB.descriptor, 10)) {
+				return 1;
+			}
+			return 0;
+		});
+	if (customOptions.includeDescriptor) {
+		return sources.map((source) => ({
+			descriptor: source.descriptor,
+			url: prefixUrl(source.url),
+		}));
+	}
+	return sources.map((source) => prefixUrl(source.url));
+}
 function queryVideo(context, selector = 'source', customOptions) {
 	const options = {
 		...context.options,
@@ -415,6 +459,8 @@ const queryFns = {
 	jsons: queryJsons,
 	date: queryDate,
 	dates: queryDates,
+	sourceSet: querySourceSet,
+	srcSet: querySourceSet,
 	url: queryUrl,
 	video: queryVideo,
 	videos: queryVideos,

package/tests/index.html CHANGED Viewed

@@ -25,6 +25,8 @@
 		<img class="image" src="https://i.redd.it/1s22dsrqy0181.jpg">
 		<img class="image" src="https://i.redd.it/e91oo4ueyeb71.jpg">
+		<img class="srcset" srcset="https://i.redd.it/e91oo4ueyeb71.jpg 240w, https://i.redd.it/vn9h981hlx281.png 480w, https://i.redd.it/e91oo4ueyeb71.jpg 640w">
 		<video id="video"><source src="https://i.imgur.com/eDQmLys.mp4"></video>
 		<script id="json" type="application/js">{"foo": "bar", "lorem": "ipsum", "hello": "world"}</script>

package/tests/init.js CHANGED Viewed

@@ -23,6 +23,7 @@ async function initTest() {
 	console.log('link', res.context.query.url('#link'));
 	console.log('image', res.context.query.img('.image'));
 	console.log('images', res.context.query.imgs('.image'));
+	console.log('srcset', res.context.query.sourceSet('.srcset'));
 	console.log('path', res.context.query.url('#path'));
 	console.log('relative path', res.context.query.url('#relativePath'));
 	console.log('exists', res.context.query.exists('#title'));