unprint 0.9.0 → 0.9.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -41,7 +41,7 @@ query.element('h1#title'); // HTMLHeadingElement
41
41
  **From here on, the query methods will be described in their initialized form.** The API for the *uninitialized* methods is identical, except for the element passed as the first argument
42
42
 
43
43
  #### Selector
44
- The selector can be a CSS selector, an XPath selector starting with `//`, or an array of either or both acting as fallbacks. If the selector is falsy, the input element will be used.
44
+ The selector can be a CSS selector, an XPath selector starting with `/`, or an array of either or both acting as fallbacks. If the selector is falsy, the input element will be used.
45
45
 
46
46
  #### Querying multiple elements
47
47
  Most methods can be used in plural, returning an array of results, i.e. `query.elements()`, `query.dates()`.
@@ -75,7 +75,8 @@ Return the text contents of an element (`.textContent`).
75
75
  `query.number([selector], [options])`
76
76
 
77
77
  Options
78
- * `match`: The regular expression to use to extract a number from text, default `/\d+(\.\d*)?/` for decimal numbers.
78
+ * `match`: The regular expression to use to extract a number from text, default `/\d+(\.\d+)?/` for decimal numbers.
79
+ * `matchIndex`: The index of the match result, useful for expressions containing groups or a global flag, default `0`.
79
80
 
80
81
  Return the contents of the element or attribute as a Number primitive.
81
82
 
@@ -131,6 +132,7 @@ Arguments
131
132
 
132
133
  Options
133
134
  * `match` (RegExp): The text to extract before attempting to parse it as a date. The default expression will attempt to extract any of 01-01-1970, 1970-01-01, 01/01/1970 or January 1, 1970 with optional 00:00[:00] time.
135
+ * `matchIndex`: The index of the match result, useful for expressions containing groups or a global flag, default `0`.
134
136
  * `timezone` (string): The name of the input timezone, defaults to 'UTC'.
135
137
 
136
138
  Returns a Date object.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "unprint",
3
- "version": "0.9.0",
3
+ "version": "0.9.2",
4
4
  "description": "Simplify common web scraping tasks while staying in control of the data.",
5
5
  "main": "src/app.js",
6
6
  "scripts": {
package/src/app.js CHANGED
@@ -189,11 +189,11 @@ function queryDatasets(context, selector, dataAttribute, customOptions) {
189
189
  return targets.map((target) => target.dataset[dataAttribute]);
190
190
  }
191
191
 
192
- const defaultNumberRegexp = /\d+(\.\d*)?/;
192
+ const defaultNumberRegexp = /\d+(\.\d+)?/;
193
193
 
194
- function matchNumberString(numberString, match) {
195
- if (numberString && match) {
196
- return Number(numberString.match(match)?.[0]);
194
+ function matchNumberString(numberString, options) {
195
+ if (numberString && options.match) {
196
+ return Number(numberString.match(options.match)?.[options.matchIndex]);
197
197
  }
198
198
 
199
199
  if (numberString) {
@@ -208,10 +208,11 @@ function queryNumber(context, selector, customOptions) {
208
208
 
209
209
  const options = {
210
210
  match: defaultNumberRegexp,
211
+ matchIndex: 0,
211
212
  ...customOptions,
212
213
  };
213
214
 
214
- return matchNumberString(numberString, options.match);
215
+ return matchNumberString(numberString, options);
215
216
  }
216
217
 
217
218
  function queryNumbers(context, selector, customOptions) {
@@ -219,6 +220,7 @@ function queryNumbers(context, selector, customOptions) {
219
220
 
220
221
  const options = {
221
222
  match: defaultNumberRegexp,
223
+ matchIndex: 0,
222
224
  ...customOptions,
223
225
  };
224
226
 
@@ -227,7 +229,7 @@ function queryNumbers(context, selector, customOptions) {
227
229
  }
228
230
 
229
231
  return numberStrings
230
- .map((numberString) => matchNumberString(numberString, options.match))
232
+ .map((numberString) => matchNumberString(numberString, options))
231
233
  .filter(Boolean);
232
234
  }
233
235
 
@@ -450,28 +452,28 @@ function queryPosters(context, selector = 'video', customOptions) {
450
452
  return posterUrls.map((posterUrl) => prefixUrl(posterUrl, options.origin, options));
451
453
  }
452
454
 
453
- function extractJson(element) {
454
- if (!element) {
455
+ function extractJson(dataString) {
456
+ if (!dataString) {
455
457
  return null;
456
458
  }
457
459
 
458
460
  try {
459
- return JSON.parse(element.innerHTML);
461
+ return JSON.parse(dataString);
460
462
  } catch (error) {
461
463
  return null;
462
464
  }
463
465
  }
464
466
 
465
467
  function queryJson(context, selector, customOptions) {
466
- const target = queryElement(context, selector, customOptions);
468
+ const dataString = queryContent(context, selector, customOptions);
467
469
 
468
- return extractJson(target);
470
+ return extractJson(dataString);
469
471
  }
470
472
 
471
473
  function queryJsons(context, selector, customOptions) {
472
- const targets = queryElements(context, selector, customOptions);
474
+ const dataStrings = queryContents(context, selector, customOptions);
473
475
 
474
- return targets.map((target) => extractJson(target)).filter(Boolean);
476
+ return dataStrings.map((dataString) => extractJson(dataString)).filter(Boolean);
475
477
  }
476
478
 
477
479
  function extractDate(dateString, format, customOptions) {
@@ -485,6 +487,7 @@ function extractDate(dateString, format, customOptions) {
485
487
 
486
488
  const options = {
487
489
  match: /((\d{1,4}[/-]\d{1,2}[/-]\d{1,4})|(\w+\s+\d{1,2},?\s+\d{4}))(\s+\d{1,2}:\d{2}(:\d{2})?)?/g, // matches any of 01-01-1970, 1970-01-01 and January 1, 1970 with optional 00:00[:00] time
490
+ matchIndex: 0,
488
491
  timezone: 'UTC',
489
492
  ...customOptions,
490
493
  };
@@ -494,7 +497,7 @@ function extractDate(dateString, format, customOptions) {
494
497
  : trim(dateString);
495
498
 
496
499
  if (dateStamp) {
497
- const dateValue = moment.tz(options.match ? dateStamp[0] : dateStamp, format, options.timezone);
500
+ const dateValue = moment.tz(options.match ? dateStamp[options.matchIndex] : dateStamp, format, options.timezone);
498
501
 
499
502
  if (dateValue.isValid()) {
500
503
  return dateValue.toDate();
package/tests/init.js CHANGED
@@ -23,6 +23,7 @@ async function initTest() {
23
23
  console.log('timestamp', res.context.query.duration('#timestamp'));
24
24
  console.log('number', res.context.query.number('.number'));
25
25
  console.log('numbers', res.context.query.numbers('.number'));
26
+ console.log('number indexed', res.context.query.number('.number', { match: /(\d+)/, matchIndex: 1 }));
26
27
  console.log('data', res.context.query.json('#json'));
27
28
  console.log('items', res.context.query.contents('.item'));
28
29
  console.log('link', res.context.query.url('#link'));