unprint 0.18.6 → 0.18.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -168,6 +168,22 @@ Options
168
168
 
169
169
  Returns a Date object.
170
170
 
171
+ #### Query a relative date
172
+ `query.dateAgo(selector, [options])`
173
+
174
+ Parses relative timestamps such as '8 weeks ago' and '1 year ago'.
175
+
176
+ Options
177
+ * `match` (RegExp): The text to extract before attempting to parse it as a period. Expects two capture groups, one for the duration and one for the unit.
178
+
179
+ Returns
180
+ ```
181
+ {
182
+ date: '2026-01-01', // Date object
183
+ precision: 'day', // year, month, week, day
184
+ }
185
+ ```
186
+
171
187
  #### Query a duration
172
188
  `query.duration(selector, format, [options])` or `query.dur`
173
189
 
@@ -217,7 +233,7 @@ Options
217
233
  * `apiUserAgent`: The default user agent header for raw requests (`get` interface `request`)
218
234
 
219
235
  Use Playwright with Chromium (experimental)
220
- * `unprint.browserRequest(url, [options])` or `unprint.browser(url, [options])`
236
+ * `unprint.browser(url, [options])`
221
237
  * `unprint.closeAllBrowsers()`: Close reused browser instances.
222
238
 
223
239
  Additional options
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "unprint",
3
- "version": "0.18.6",
3
+ "version": "0.18.8",
4
4
  "description": "Simplify common web scraping tasks while staying in control of the data.",
5
5
  "main": "src/app.js",
6
6
  "scripts": {},
package/src/app.js CHANGED
@@ -775,6 +775,50 @@ function queryDates(context, selector, format, customOptions) {
775
775
  }));
776
776
  }
777
777
 
778
+ const periodRegex = /years?|months?|weeks?|days?/;
779
+
780
+ function extractDateAgo(dateString, customOptions) {
781
+ if (!dateString) {
782
+ return null;
783
+ }
784
+
785
+ const options = {
786
+ match: /(\d+)\s*(\w+)/,
787
+ ...customOptions,
788
+ };
789
+
790
+ const timeMatch = dateString.match(options.match);
791
+
792
+ if (timeMatch) {
793
+ const [n, period] = timeMatch.slice(1);
794
+
795
+ if (periodRegex.test(period)) {
796
+ const thenDate = moment.utc().subtract(Number(n), period);
797
+
798
+ return {
799
+ date: thenDate.toDate(),
800
+ precision: period.replace(/s$/, ''),
801
+ };
802
+ }
803
+ }
804
+
805
+ return null;
806
+ }
807
+
808
+ function queryDateAgo(context, selector, customOptions) {
809
+ const dateString = queryContent(context, selector, customOptions);
810
+
811
+ return extractDateAgo(dateString, customOptions);
812
+ }
813
+
814
+ function queryDatesAgo(context, selector, customOptions) {
815
+ const dateStrings = queryContents(context, selector, customOptions);
816
+
817
+ return dateStrings
818
+ .map((dateString) => extractDateAgo(dateString, customOptions))
819
+ .filter(Boolean);
820
+ }
821
+
778
822
  function formatDate(dateValue, format, inputFormat) {
779
823
  if (inputFormat) {
780
824
  return moment(dateValue, inputFormat).format(format);
@@ -866,6 +910,9 @@ const queryFns = {
866
910
  posters: queryPosters,
867
911
  date: queryDate,
868
912
  dates: queryDates,
913
+ dateAgo: queryDateAgo,
914
+ datesAgo: queryDatesAgo,
915
+ dateAgos: queryDatesAgo,
869
916
  duration: queryDuration,
870
917
  dur: queryDuration,
871
918
  sourceSet: querySourceSet,
@@ -1434,6 +1481,7 @@ async function request(url, body, customOptions = {}, method = 'GET', redirects
1434
1481
  method,
1435
1482
  body: curatedBody.body,
1436
1483
  headers,
1484
+ redirect: options.followRedirects ? 'follow' : 'manual',
1437
1485
  signal: options.abortSignal,
1438
1486
  })).catch((error) => ({ // tends to happen when proxy can't reach host
1439
1487
  status: 500,
@@ -1519,6 +1567,7 @@ module.exports = {
1519
1567
  init,
1520
1568
  initAll,
1521
1569
  extractDate,
1570
+ extractDateAgo,
1522
1571
  extractDuration,
1523
1572
  extractNumber,
1524
1573
  extractTimestamp,
package/tests/index.html CHANGED
@@ -31,6 +31,11 @@
31
31
  <div id="date2">Date: 13-05-2022 18:00</div>
32
32
  <div>Today: Nov 14, 2022</div>
33
33
 
34
+ <div class="date-ago">3 weeks ago</div>
35
+ <div class="date-ago">2 days ago</div>
36
+ <div class="date-ago">1 year ago</div>
37
+ <div class="date-ago">November 10th</div>
38
+
34
39
  <div id="duration">01:15:33</div>
35
40
  <div id="timestamp">PT1H34M18S</div>
36
41
  <div id="timestring">1 hour 40 minutes 5 seconds</div>
package/tests/init.js CHANGED
@@ -60,6 +60,8 @@ async function initTest() {
60
60
  console.log('date', res.context.query.date('#date', 'DD-MM-YYYY HH:mm'));
61
61
  console.log('date xpath', res.context.query.date('//div[contains(text(), "Today:")]', 'MMM DD, YYYY'));
62
62
  console.log('date grouped xpath', res.context.query.date('(//div[contains(text(), "Today:")])', 'MMM DD, YYYY'));
63
+ console.log('date ago', res.context.query.dateAgo('.date-ago'));
64
+ console.log('dates ago', res.context.query.dateAgos('.date-ago'));
63
65
  console.log('duration', res.context.query.duration('#duration'));
64
66
  console.log('timestamp', res.context.query.duration('#timestamp'));
65
67
  console.log('timestring', res.context.query.duration('#timestring'));