@jocmp/mercury-parser 3.0.8 → 3.0.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/mercury.js CHANGED
@@ -36,10 +36,7 @@ var customParseFormat = require('dayjs/plugin/customParseFormat');
36
36
  var wuzzy = require('wuzzy');
37
37
  var difflib = require('difflib');
38
38
 
39
- function _interopDefaultLegacy (e) { return e && typeof e === 'object' && 'default' in e ? e : { 'default': e }; }
40
-
41
- function _interopNamespace(e) {
42
- if (e && e.__esModule) return e;
39
+ function _interopNamespaceDefault(e) {
43
40
  var n = Object.create(null);
44
41
  if (e) {
45
42
  Object.keys(e).forEach(function (k) {
@@ -52,45 +49,11 @@ function _interopNamespace(e) {
52
49
  }
53
50
  });
54
51
  }
55
- n["default"] = e;
52
+ n.default = e;
56
53
  return Object.freeze(n);
57
54
  }
58
55
 
59
- var _Object$keys__default = /*#__PURE__*/_interopDefaultLegacy(_Object$keys);
60
- var _Object$getOwnPropertySymbols__default = /*#__PURE__*/_interopDefaultLegacy(_Object$getOwnPropertySymbols);
61
- var _Object$getOwnPropertyDescriptor__default = /*#__PURE__*/_interopDefaultLegacy(_Object$getOwnPropertyDescriptor);
62
- var _Object$getOwnPropertyDescriptors__default = /*#__PURE__*/_interopDefaultLegacy(_Object$getOwnPropertyDescriptors);
63
- var _Object$defineProperties__default = /*#__PURE__*/_interopDefaultLegacy(_Object$defineProperties);
64
- var _Object$defineProperty__default = /*#__PURE__*/_interopDefaultLegacy(_Object$defineProperty);
65
- var _defineProperty__default = /*#__PURE__*/_interopDefaultLegacy(_defineProperty);
66
- var _objectWithoutProperties__default = /*#__PURE__*/_interopDefaultLegacy(_objectWithoutProperties);
67
- var _asyncToGenerator__default = /*#__PURE__*/_interopDefaultLegacy(_asyncToGenerator);
68
- var _regeneratorRuntime__default = /*#__PURE__*/_interopDefaultLegacy(_regeneratorRuntime);
69
- var URL__default = /*#__PURE__*/_interopDefaultLegacy(URL$1);
70
- var TurndownService__default = /*#__PURE__*/_interopDefaultLegacy(TurndownService);
71
- var cheerio__namespace = /*#__PURE__*/_interopNamespace(cheerio);
72
- var iconv__default = /*#__PURE__*/_interopDefaultLegacy(iconv);
73
- var _parseInt__default = /*#__PURE__*/_interopDefaultLegacy(_parseInt);
74
- var _slicedToArray__default = /*#__PURE__*/_interopDefaultLegacy(_slicedToArray);
75
- var _Promise__default = /*#__PURE__*/_interopDefaultLegacy(_Promise);
76
- var request__default = /*#__PURE__*/_interopDefaultLegacy(request);
77
- var _Reflect$ownKeys__default = /*#__PURE__*/_interopDefaultLegacy(_Reflect$ownKeys);
78
- var _toConsumableArray__default = /*#__PURE__*/_interopDefaultLegacy(_toConsumableArray);
79
- var _parseFloat__default = /*#__PURE__*/_interopDefaultLegacy(_parseFloat);
80
- var _Set__default = /*#__PURE__*/_interopDefaultLegacy(_Set);
81
- var _Array$from__default = /*#__PURE__*/_interopDefaultLegacy(_Array$from);
82
- var _Symbol__default = /*#__PURE__*/_interopDefaultLegacy(_Symbol);
83
- var _Symbol$iterator__default = /*#__PURE__*/_interopDefaultLegacy(_Symbol$iterator);
84
- var _Array$isArray__default = /*#__PURE__*/_interopDefaultLegacy(_Array$isArray);
85
- var _Object$assign__default = /*#__PURE__*/_interopDefaultLegacy(_Object$assign);
86
- var stringDirection__default = /*#__PURE__*/_interopDefaultLegacy(stringDirection);
87
- var _Number$isNaN__default = /*#__PURE__*/_interopDefaultLegacy(_Number$isNaN);
88
- var dayjs__default = /*#__PURE__*/_interopDefaultLegacy(dayjs);
89
- var utc__default = /*#__PURE__*/_interopDefaultLegacy(utc);
90
- var timezonePlugin__default = /*#__PURE__*/_interopDefaultLegacy(timezonePlugin);
91
- var customParseFormat__default = /*#__PURE__*/_interopDefaultLegacy(customParseFormat);
92
- var wuzzy__default = /*#__PURE__*/_interopDefaultLegacy(wuzzy);
93
- var difflib__default = /*#__PURE__*/_interopDefaultLegacy(difflib);
56
+ var cheerio__namespace = /*#__PURE__*/_interopNamespaceDefault(cheerio);
94
57
 
95
58
  var NORMALIZE_RE = /\s{2,}(?![^<>]*<\/(pre|code|textarea)>)/g;
96
59
  function normalizeSpaces(text) {
@@ -138,7 +101,7 @@ var DEFAULT_ENCODING = 'utf-8';
138
101
  function pageNumFromUrl(url) {
139
102
  var matches = url.match(PAGE_IN_HREF_RE);
140
103
  if (!matches) return null;
141
- var pageNum = _parseInt__default["default"](matches[6], 10);
104
+ var pageNum = _parseInt(matches[6], 10);
142
105
 
143
106
  // Return pageNum < 100, otherwise
144
107
  // return null
@@ -176,7 +139,7 @@ function isGoodSegment(segment, index, firstSegmentHasLetters) {
176
139
  // pagination data exists in it. Useful for comparing to other links
177
140
  // that might have pagination data within them.
178
141
  function articleBaseUrl(url, parsed) {
179
- var parsedUrl = parsed || URL__default["default"].parse(url);
142
+ var parsedUrl = parsed || URL$1.parse(url);
180
143
  var protocol = parsedUrl.protocol,
181
144
  host = parsedUrl.host,
182
145
  path = parsedUrl.path;
@@ -187,7 +150,7 @@ function articleBaseUrl(url, parsed) {
187
150
  // Split off and save anything that looks like a file type.
188
151
  if (segment.includes('.')) {
189
152
  var _segment$split = segment.split('.'),
190
- _segment$split2 = _slicedToArray__default["default"](_segment$split, 2),
153
+ _segment$split2 = _slicedToArray(_segment$split, 2),
191
154
  possibleSegment = _segment$split2[0],
192
155
  fileExt = _segment$split2[1];
193
156
  if (IS_ALPHA_RE.test(fileExt)) {
@@ -237,10 +200,10 @@ function getEncoding(str) {
237
200
  var encoding = DEFAULT_ENCODING;
238
201
  var matches = ENCODING_RE.exec(str);
239
202
  if (matches !== null) {
240
- var _matches = _slicedToArray__default["default"](matches, 2);
203
+ var _matches = _slicedToArray(matches, 2);
241
204
  str = _matches[1];
242
205
  }
243
- if (iconv__default["default"].encodingExists(str)) {
206
+ if (iconv.encodingExists(str)) {
244
207
  encoding = str;
245
208
  }
246
209
  return encoding;
@@ -266,11 +229,11 @@ var BAD_CONTENT_TYPES_RE = new RegExp("^(".concat(BAD_CONTENT_TYPES.join('|'), "
266
229
  // for us to attempt parsing. Defaults to 5 MB.
267
230
  var MAX_CONTENT_LENGTH = 5242880;
268
231
 
269
- function ownKeys$h(e, r) { var t = _Object$keys__default["default"](e); if (_Object$getOwnPropertySymbols__default["default"]) { var o = _Object$getOwnPropertySymbols__default["default"](e); r && (o = o.filter(function (r) { return _Object$getOwnPropertyDescriptor__default["default"](e, r).enumerable; })), t.push.apply(t, o); } return t; }
270
- function _objectSpread$h(e) { for (var r = 1; r < arguments.length; r++) { var t = null != arguments[r] ? arguments[r] : {}; r % 2 ? ownKeys$h(Object(t), !0).forEach(function (r) { _defineProperty__default["default"](e, r, t[r]); }) : _Object$getOwnPropertyDescriptors__default["default"] ? _Object$defineProperties__default["default"](e, _Object$getOwnPropertyDescriptors__default["default"](t)) : ownKeys$h(Object(t)).forEach(function (r) { _Object$defineProperty__default["default"](e, r, _Object$getOwnPropertyDescriptor__default["default"](t, r)); }); } return e; }
232
+ function ownKeys$h(e, r) { var t = _Object$keys(e); if (_Object$getOwnPropertySymbols) { var o = _Object$getOwnPropertySymbols(e); r && (o = o.filter(function (r) { return _Object$getOwnPropertyDescriptor(e, r).enumerable; })), t.push.apply(t, o); } return t; }
233
+ function _objectSpread$h(e) { for (var r = 1; r < arguments.length; r++) { var t = null != arguments[r] ? arguments[r] : {}; r % 2 ? ownKeys$h(Object(t), true).forEach(function (r) { _defineProperty(e, r, t[r]); }) : _Object$getOwnPropertyDescriptors ? _Object$defineProperties(e, _Object$getOwnPropertyDescriptors(t)) : ownKeys$h(Object(t)).forEach(function (r) { _Object$defineProperty(e, r, _Object$getOwnPropertyDescriptor(t, r)); }); } return e; }
271
234
  function get(options) {
272
- return new _Promise__default["default"](function (resolve, reject) {
273
- request__default["default"](options, function (err, response, body) {
235
+ return new _Promise(function (resolve, reject) {
236
+ request(options, function (err, response, body) {
274
237
  if (err) {
275
238
  reject(err);
276
239
  } else {
@@ -329,7 +292,7 @@ function fetchResource(_x, _x2) {
329
292
  return _fetchResource.apply(this, arguments);
330
293
  }
331
294
  function _fetchResource() {
332
- _fetchResource = _asyncToGenerator__default["default"](/*#__PURE__*/_regeneratorRuntime__default["default"].mark(function _callee(url, parsedUrl) {
295
+ _fetchResource = _asyncToGenerator(/*#__PURE__*/_regeneratorRuntime.mark(function _callee(url, parsedUrl) {
333
296
  var headers,
334
297
  options,
335
298
  _yield$get,
@@ -337,11 +300,11 @@ function _fetchResource() {
337
300
  body,
338
301
  _args = arguments,
339
302
  _t;
340
- return _regeneratorRuntime__default["default"].wrap(function (_context) {
303
+ return _regeneratorRuntime.wrap(function (_context) {
341
304
  while (1) switch (_context.prev = _context.next) {
342
305
  case 0:
343
306
  headers = _args.length > 2 && _args[2] !== undefined ? _args[2] : {};
344
- parsedUrl = parsedUrl || URL__default["default"].parse(encodeURI(url));
307
+ parsedUrl = parsedUrl || URL$1.parse(encodeURI(url));
345
308
  options = _objectSpread$h({
346
309
  url: parsedUrl.href,
347
310
  headers: _objectSpread$h(_objectSpread$h({}, REQUEST_HEADERS), headers),
@@ -603,7 +566,7 @@ function getAttrs(node) {
603
566
  var attribs = node.attribs,
604
567
  attributes = node.attributes;
605
568
  if (!attribs && attributes) {
606
- var attrs = _Reflect$ownKeys__default["default"](attributes).reduce(function (acc, index) {
569
+ var attrs = _Reflect$ownKeys(attributes).reduce(function (acc, index) {
607
570
  var attr = attributes[index];
608
571
 
609
572
  // In browser, Reflect.ownKeys includes non-numeric keys like 'length', 'item', etc.
@@ -623,7 +586,7 @@ function convertNodeTo($node, $) {
623
586
  return $;
624
587
  }
625
588
  var attrs = getAttrs(node) || {};
626
- var attribString = _Reflect$ownKeys__default["default"](attrs).map(function (key) {
589
+ var attribString = _Reflect$ownKeys(attrs).map(function (key) {
627
590
  return "".concat(key, "=").concat(attrs[key]);
628
591
  }).join(' ');
629
592
  var html;
@@ -682,8 +645,8 @@ function convertToParagraphs($) {
682
645
  }
683
646
 
684
647
  function cleanForHeight($img, $) {
685
- var height = _parseInt__default["default"]($img.attr('height'), 10);
686
- var width = _parseInt__default["default"]($img.attr('width'), 10) || 20;
648
+ var height = _parseInt($img.attr('height'), 10);
649
+ var width = _parseInt($img.attr('width'), 10) || 20;
687
650
 
688
651
  // Remove images that explicitly have very small heights or
689
652
  // widths, because they are most likely shims or icons,
@@ -722,10 +685,10 @@ function markToKeep(article, $, url) {
722
685
  tags = KEEP_SELECTORS;
723
686
  }
724
687
  if (url) {
725
- var _URL$parse = URL__default["default"].parse(url),
688
+ var _URL$parse = URL$1.parse(url),
726
689
  protocol = _URL$parse.protocol,
727
690
  hostname = _URL$parse.hostname;
728
- tags = [].concat(_toConsumableArray__default["default"](tags), ["iframe[src^=\"".concat(protocol, "//").concat(hostname, "\"]")]);
691
+ tags = [].concat(_toConsumableArray(tags), ["iframe[src^=\"".concat(protocol, "//").concat(hostname, "\"]")]);
729
692
  }
730
693
  $(tags.join(','), article).addClass(KEEP_CLASS);
731
694
  return $;
@@ -767,21 +730,21 @@ function setAttrs(node, attrs) {
767
730
  while (node.attributes.length > 0) {
768
731
  node.removeAttribute(node.attributes[0].name);
769
732
  }
770
- _Reflect$ownKeys__default["default"](attrs).forEach(function (key) {
733
+ _Reflect$ownKeys(attrs).forEach(function (key) {
771
734
  node.setAttribute(key, attrs[key]);
772
735
  });
773
736
  }
774
737
  return node;
775
738
  }
776
739
 
777
- function ownKeys$g(e, r) { var t = _Object$keys__default["default"](e); if (_Object$getOwnPropertySymbols__default["default"]) { var o = _Object$getOwnPropertySymbols__default["default"](e); r && (o = o.filter(function (r) { return _Object$getOwnPropertyDescriptor__default["default"](e, r).enumerable; })), t.push.apply(t, o); } return t; }
778
- function _objectSpread$g(e) { for (var r = 1; r < arguments.length; r++) { var t = null != arguments[r] ? arguments[r] : {}; r % 2 ? ownKeys$g(Object(t), !0).forEach(function (r) { _defineProperty__default["default"](e, r, t[r]); }) : _Object$getOwnPropertyDescriptors__default["default"] ? _Object$defineProperties__default["default"](e, _Object$getOwnPropertyDescriptors__default["default"](t)) : ownKeys$g(Object(t)).forEach(function (r) { _Object$defineProperty__default["default"](e, r, _Object$getOwnPropertyDescriptor__default["default"](t, r)); }); } return e; }
740
+ function ownKeys$g(e, r) { var t = _Object$keys(e); if (_Object$getOwnPropertySymbols) { var o = _Object$getOwnPropertySymbols(e); r && (o = o.filter(function (r) { return _Object$getOwnPropertyDescriptor(e, r).enumerable; })), t.push.apply(t, o); } return t; }
741
+ function _objectSpread$g(e) { for (var r = 1; r < arguments.length; r++) { var t = null != arguments[r] ? arguments[r] : {}; r % 2 ? ownKeys$g(Object(t), true).forEach(function (r) { _defineProperty(e, r, t[r]); }) : _Object$getOwnPropertyDescriptors ? _Object$defineProperties(e, _Object$getOwnPropertyDescriptors(t)) : ownKeys$g(Object(t)).forEach(function (r) { _Object$defineProperty(e, r, _Object$getOwnPropertyDescriptor(t, r)); }); } return e; }
779
742
  function removeAllButWhitelist($article, $) {
780
743
  $article.find('*').each(function (index, node) {
781
744
  var attrs = getAttrs(node);
782
- setAttrs(node, _Reflect$ownKeys__default["default"](attrs).reduce(function (acc, attr) {
745
+ setAttrs(node, _Reflect$ownKeys(attrs).reduce(function (acc, attr) {
783
746
  if (WHITELIST_ATTRS_RE.test(attr)) {
784
- return _objectSpread$g(_objectSpread$g({}, acc), {}, _defineProperty__default["default"]({}, attr, attrs[attr]));
747
+ return _objectSpread$g(_objectSpread$g({}, acc), {}, _defineProperty({}, attr, attrs[attr]));
785
748
  }
786
749
  return acc;
787
750
  }, {}));
@@ -812,7 +775,7 @@ function removeEmpty($article, $) {
812
775
  // the node's score attribute
813
776
  // returns null if no score set
814
777
  function getScore($node) {
815
- return _parseFloat__default["default"]($node.attr('score')) || null;
778
+ return _parseFloat($node.attr('score')) || null;
816
779
  }
817
780
 
818
781
  function setScore($node, $, score) {
@@ -878,6 +841,7 @@ function scoreParagraph(node) {
878
841
 
879
842
  // // CONTENT FETCHING CONSTANTS ////
880
843
 
844
+
881
845
  // A list of tags that should be ignored when trying to find the top candidate
882
846
  // for a document.
883
847
  var NON_TOP_CANDIDATE_TAGS = ['br', 'b', 'i', 'label', 'hr', 'area', 'base', 'basefont', 'input', 'img', 'link', 'meta'];
@@ -996,7 +960,7 @@ function getWeight(node) {
996
960
  return score;
997
961
  }
998
962
 
999
- // eslint-disable-next-line import/no-cycle
963
+ // eslint-disable-next-line import-x/no-cycle
1000
964
  function addScore($node, $, amount) {
1001
965
  try {
1002
966
  var score = getOrInitScore($node, $) + amount;
@@ -1007,7 +971,7 @@ function addScore($node, $, amount) {
1007
971
  return $node;
1008
972
  }
1009
973
 
1010
- // eslint-disable-next-line import/no-cycle
974
+ // eslint-disable-next-line import-x/no-cycle
1011
975
 
1012
976
  // Adds 1/4 of a child's score to its parent
1013
977
  function addToParent(node, $, score) {
@@ -1204,7 +1168,7 @@ function absolutize($, rootUrl, attr) {
1204
1168
  var attrs = getAttrs(node);
1205
1169
  var url = attrs[attr];
1206
1170
  if (!url) return;
1207
- var absoluteUrl = URL__default["default"].resolve(baseUrl || rootUrl, url);
1171
+ var absoluteUrl = URL$1.resolve(baseUrl || rootUrl, url);
1208
1172
  setAttr(node, attr, absoluteUrl);
1209
1173
  });
1210
1174
  }
@@ -1222,10 +1186,10 @@ function absolutizeSet($, rootUrl, $content) {
1222
1186
  // a candidate URL cannot start or end with a comma
1223
1187
  // descriptors are separated from the URLs by unescaped whitespace
1224
1188
  var parts = candidate.trim().replace(/,$/, '').split(/\s+/);
1225
- parts[0] = URL__default["default"].resolve(rootUrl, parts[0]);
1189
+ parts[0] = URL$1.resolve(rootUrl, parts[0]);
1226
1190
  return parts.join(' ');
1227
1191
  });
1228
- var absoluteUrlSet = _toConsumableArray__default["default"](new _Set__default["default"](absoluteCandidates)).join(', ');
1192
+ var absoluteUrlSet = _toConsumableArray(new _Set(absoluteCandidates)).join(', ');
1229
1193
  setAttr(node, 'srcset', absoluteUrlSet);
1230
1194
  }
1231
1195
  });
@@ -1246,8 +1210,8 @@ function stripTags(text, $) {
1246
1210
  return cleanText === '' ? text : cleanText;
1247
1211
  }
1248
1212
 
1249
- function _createForOfIteratorHelper$4(r, e) { var t = "undefined" != typeof _Symbol__default["default"] && r[_Symbol$iterator__default["default"]] || r["@@iterator"]; if (!t) { if (_Array$isArray__default["default"](r) || (t = _unsupportedIterableToArray$4(r)) || e && r && "number" == typeof r.length) { t && (r = t); var _n = 0, F = function F() {}; return { s: F, n: function n() { return _n >= r.length ? { done: !0 } : { done: !1, value: r[_n++] }; }, e: function e(r) { throw r; }, f: F }; } throw new TypeError("Invalid attempt to iterate non-iterable instance.\nIn order to be iterable, non-array objects must have a [Symbol.iterator]() method."); } var o, a = !0, u = !1; return { s: function s() { t = t.call(r); }, n: function n() { var r = t.next(); return a = r.done, r; }, e: function e(r) { u = !0, o = r; }, f: function f() { try { a || null == t["return"] || t["return"](); } finally { if (u) throw o; } } }; }
1250
- function _unsupportedIterableToArray$4(r, a) { if (r) { if ("string" == typeof r) return _arrayLikeToArray$4(r, a); var t = {}.toString.call(r).slice(8, -1); return "Object" === t && r.constructor && (t = r.constructor.name), "Map" === t || "Set" === t ? _Array$from__default["default"](r) : "Arguments" === t || /^(?:Ui|I)nt(?:8|16|32)(?:Clamped)?Array$/.test(t) ? _arrayLikeToArray$4(r, a) : void 0; } }
1213
+ function _createForOfIteratorHelper$4(r, e) { var t = "undefined" != typeof _Symbol && r[_Symbol$iterator] || r["@@iterator"]; if (!t) { if (_Array$isArray(r) || (t = _unsupportedIterableToArray$4(r)) || e) { t && (r = t); var _n = 0, F = function F() {}; return { s: F, n: function n() { return _n >= r.length ? { done: true } : { done: false, value: r[_n++] }; }, e: function e(r) { throw r; }, f: F }; } throw new TypeError("Invalid attempt to iterate non-iterable instance.\nIn order to be iterable, non-array objects must have a [Symbol.iterator]() method."); } var o, a = true, u = false; return { s: function s() { t = t.call(r); }, n: function n() { var r = t.next(); return a = r.done, r; }, e: function e(r) { u = true, o = r; }, f: function f() { try { a || null == t["return"] || t["return"](); } finally { if (u) throw o; } } }; }
1214
+ function _unsupportedIterableToArray$4(r, a) { if (r) { if ("string" == typeof r) return _arrayLikeToArray$4(r, a); var t = {}.toString.call(r).slice(8, -1); return "Object" === t && r.constructor && (t = r.constructor.name), "Map" === t || "Set" === t ? _Array$from(r) : "Arguments" === t || /^(?:Ui|I)nt(?:8|16|32)(?:Clamped)?Array$/.test(t) ? _arrayLikeToArray$4(r, a) : void 0; } }
1251
1215
  function _arrayLikeToArray$4(r, a) { (null == a || a > r.length) && (a = r.length); for (var e = 0, n = Array(a); e < a; e++) n[e] = r[e]; return n; }
1252
1216
 
1253
1217
  // Given a node type to search for, and a list of meta tag names to
@@ -1257,8 +1221,6 @@ function extractFromMeta($, metaNames, cachedNames) {
1257
1221
  var foundNames = metaNames.filter(function (name) {
1258
1222
  return cachedNames.indexOf(name) !== -1;
1259
1223
  });
1260
-
1261
- // eslint-disable-next-line no-restricted-syntax
1262
1224
  var _iterator = _createForOfIteratorHelper$4(foundNames),
1263
1225
  _step;
1264
1226
  try {
@@ -1288,7 +1250,7 @@ function extractFromMeta($, metaNames, cachedNames) {
1288
1250
  if (cleanTags) {
1289
1251
  metaValue = stripTags(values[0], $);
1290
1252
  } else {
1291
- var _values = _slicedToArray__default["default"](values, 1);
1253
+ var _values = _slicedToArray(values, 1);
1292
1254
  metaValue = _values[0];
1293
1255
  }
1294
1256
  return {
@@ -1323,8 +1285,8 @@ function withinComment($node) {
1323
1285
  return commentParent !== undefined;
1324
1286
  }
1325
1287
 
1326
- function _createForOfIteratorHelper$3(r, e) { var t = "undefined" != typeof _Symbol__default["default"] && r[_Symbol$iterator__default["default"]] || r["@@iterator"]; if (!t) { if (_Array$isArray__default["default"](r) || (t = _unsupportedIterableToArray$3(r)) || e && r && "number" == typeof r.length) { t && (r = t); var _n = 0, F = function F() {}; return { s: F, n: function n() { return _n >= r.length ? { done: !0 } : { done: !1, value: r[_n++] }; }, e: function e(r) { throw r; }, f: F }; } throw new TypeError("Invalid attempt to iterate non-iterable instance.\nIn order to be iterable, non-array objects must have a [Symbol.iterator]() method."); } var o, a = !0, u = !1; return { s: function s() { t = t.call(r); }, n: function n() { var r = t.next(); return a = r.done, r; }, e: function e(r) { u = !0, o = r; }, f: function f() { try { a || null == t["return"] || t["return"](); } finally { if (u) throw o; } } }; }
1327
- function _unsupportedIterableToArray$3(r, a) { if (r) { if ("string" == typeof r) return _arrayLikeToArray$3(r, a); var t = {}.toString.call(r).slice(8, -1); return "Object" === t && r.constructor && (t = r.constructor.name), "Map" === t || "Set" === t ? _Array$from__default["default"](r) : "Arguments" === t || /^(?:Ui|I)nt(?:8|16|32)(?:Clamped)?Array$/.test(t) ? _arrayLikeToArray$3(r, a) : void 0; } }
1288
+ function _createForOfIteratorHelper$3(r, e) { var t = "undefined" != typeof _Symbol && r[_Symbol$iterator] || r["@@iterator"]; if (!t) { if (_Array$isArray(r) || (t = _unsupportedIterableToArray$3(r)) || e) { t && (r = t); var _n = 0, F = function F() {}; return { s: F, n: function n() { return _n >= r.length ? { done: true } : { done: false, value: r[_n++] }; }, e: function e(r) { throw r; }, f: F }; } throw new TypeError("Invalid attempt to iterate non-iterable instance.\nIn order to be iterable, non-array objects must have a [Symbol.iterator]() method."); } var o, a = true, u = false; return { s: function s() { t = t.call(r); }, n: function n() { var r = t.next(); return a = r.done, r; }, e: function e(r) { u = true, o = r; }, f: function f() { try { a || null == t["return"] || t["return"](); } finally { if (u) throw o; } } }; }
1289
+ function _unsupportedIterableToArray$3(r, a) { if (r) { if ("string" == typeof r) return _arrayLikeToArray$3(r, a); var t = {}.toString.call(r).slice(8, -1); return "Object" === t && r.constructor && (t = r.constructor.name), "Map" === t || "Set" === t ? _Array$from(r) : "Arguments" === t || /^(?:Ui|I)nt(?:8|16|32)(?:Clamped)?Array$/.test(t) ? _arrayLikeToArray$3(r, a) : void 0; } }
1328
1290
  function _arrayLikeToArray$3(r, a) { (null == a || a > r.length) && (a = r.length); for (var e = 0, n = Array(a); e < a; e++) n[e] = r[e]; return n; }
1329
1291
  function isGoodNode($node, maxChildren) {
1330
1292
  // If it has a number of children, it's more likely a container
@@ -1345,7 +1307,6 @@ function isGoodNode($node, maxChildren) {
1345
1307
  function extractFromSelectors($, selectors) {
1346
1308
  var maxChildren = arguments.length > 2 && arguments[2] !== undefined ? arguments[2] : 1;
1347
1309
  var textOnly = arguments.length > 3 && arguments[3] !== undefined ? arguments[3] : true;
1348
- // eslint-disable-next-line no-restricted-syntax
1349
1310
  var _iterator = _createForOfIteratorHelper$3(selectors),
1350
1311
  _step;
1351
1312
  try {
@@ -1414,7 +1375,7 @@ function convertLazyLoadedImages($) {
1414
1375
  };
1415
1376
  $('img').each(function (_, img) {
1416
1377
  var attrs = getAttrs(img);
1417
- _Reflect$ownKeys__default["default"](attrs).forEach(function (attr) {
1378
+ _Reflect$ownKeys(attrs).forEach(function (attr) {
1418
1379
  var value = attrs[attr];
1419
1380
  if (attr !== 'srcset' && IS_LINK.test(value) && IS_SRCSET.test(value)) {
1420
1381
  $(img).attr('srcset', value);
@@ -1456,9 +1417,9 @@ var Resource = {
1456
1417
  create: function create(url, preparedResponse, parsedUrl) {
1457
1418
  var _arguments = arguments,
1458
1419
  _this = this;
1459
- return _asyncToGenerator__default["default"](/*#__PURE__*/_regeneratorRuntime__default["default"].mark(function _callee() {
1420
+ return _asyncToGenerator(/*#__PURE__*/_regeneratorRuntime.mark(function _callee() {
1460
1421
  var headers, result, validResponse;
1461
- return _regeneratorRuntime__default["default"].wrap(function (_context) {
1422
+ return _regeneratorRuntime.wrap(function (_context) {
1462
1423
  while (1) switch (_context.prev = _context.next) {
1463
1424
  case 0:
1464
1425
  headers = _arguments.length > 3 && _arguments[3] !== undefined ? _arguments[3] : {};
@@ -1538,7 +1499,7 @@ var Resource = {
1538
1499
  }
1539
1500
  var encoding = getEncoding(contentType);
1540
1501
  // UTF-8 is handled natively by Node.js, skip iconv-lite
1541
- var decodedContent = encoding === 'utf-8' ? content.toString('utf-8') : iconv__default["default"].decode(content, encoding);
1502
+ var decodedContent = encoding === 'utf-8' ? content.toString('utf-8') : iconv.decode(content, encoding);
1542
1503
  var $ = cheerio__namespace.load(decodedContent);
1543
1504
  // after first cheerio.load, check to see if encoding matches
1544
1505
  var contentTypeSelector = isBrowser ? 'meta[http-equiv=content-type]' : 'meta[http-equiv=content-type i]';
@@ -1547,7 +1508,7 @@ var Resource = {
1547
1508
 
1548
1509
  // if encodings in the header/body dont match, use the one in the body
1549
1510
  if (metaContentType && properEncoding !== encoding) {
1550
- decodedContent = properEncoding === 'utf-8' ? content.toString('utf-8') : iconv__default["default"].decode(content, properEncoding);
1511
+ decodedContent = properEncoding === 'utf-8' ? content.toString('utf-8') : iconv.decode(content, properEncoding);
1551
1512
  $ = cheerio__namespace.load(decodedContent);
1552
1513
  }
1553
1514
  return $;
@@ -1557,8 +1518,8 @@ var Resource = {
1557
1518
  function range() {
1558
1519
  var start = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : 1;
1559
1520
  var end = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : 1;
1560
- return /*#__PURE__*/_regeneratorRuntime__default["default"].mark(function _callee() {
1561
- return _regeneratorRuntime__default["default"].wrap(function (_context) {
1521
+ return /*#__PURE__*/_regeneratorRuntime.mark(function _callee() {
1522
+ return _regeneratorRuntime.wrap(function (_context) {
1562
1523
  while (1) switch (_context.prev = _context.next) {
1563
1524
  case 0:
1564
1525
  if (!(start <= end)) {
@@ -1592,7 +1553,7 @@ var merge = function merge(extractor, domains) {
1592
1553
  }, {});
1593
1554
  };
1594
1555
  function mergeSupportedDomains(extractor) {
1595
- return extractor.supportedDomains ? merge(extractor, [extractor.domain].concat(_toConsumableArray__default["default"](extractor.supportedDomains))) : merge(extractor, [extractor.domain]);
1556
+ return extractor.supportedDomains ? merge(extractor, [extractor.domain].concat(_toConsumableArray(extractor.supportedDomains))) : merge(extractor, [extractor.domain]);
1596
1557
  }
1597
1558
 
1598
1559
  var apiExtractors = {};
@@ -1603,7 +1564,7 @@ function addExtractor(extractor) {
1603
1564
  message: 'Unable to add custom extractor. Invalid parameters.'
1604
1565
  };
1605
1566
  }
1606
- _Object$assign__default["default"](apiExtractors, mergeSupportedDomains(extractor));
1567
+ _Object$assign(apiExtractors, mergeSupportedDomains(extractor));
1607
1568
  return apiExtractors;
1608
1569
  }
1609
1570
 
@@ -2313,7 +2274,7 @@ var MediumExtractor = {
2313
2274
  var $parent = $node.parents('figure');
2314
2275
  if (ytRe.test(thumb)) {
2315
2276
  var _thumb$match = thumb.match(ytRe),
2316
- _thumb$match2 = _slicedToArray__default["default"](_thumb$match, 2);
2277
+ _thumb$match2 = _slicedToArray(_thumb$match, 2);
2317
2278
  _thumb$match2[0];
2318
2279
  var youtubeId = _thumb$match2[1]; // eslint-disable-line
2319
2280
  $node.attr('src', "https://www.youtube.com/embed/".concat(youtubeId));
@@ -2336,7 +2297,7 @@ var MediumExtractor = {
2336
2297
  // Remove any smaller images that did not get caught by the generic image
2337
2298
  // cleaner (author photo 48px, leading sentence images 79px, etc.).
2338
2299
  img: function img($node) {
2339
- var width = _parseInt__default["default"]($node.attr('width'), 10);
2300
+ var width = _parseInt($node.attr('width'), 10);
2340
2301
  if (width < 100) $node.remove();
2341
2302
  }
2342
2303
  },
@@ -3231,7 +3192,7 @@ var WwwMsnbcComExtractor = {
3231
3192
  // before it's consumable content? E.g., unusual lazy loaded images
3232
3193
  transforms: {
3233
3194
  '.pane-node-body': function paneNodeBody($node, $) {
3234
- var _WwwMsnbcComExtractor = _slicedToArray__default["default"](WwwMsnbcComExtractor.lead_image_url.selectors[0], 2),
3195
+ var _WwwMsnbcComExtractor = _slicedToArray(WwwMsnbcComExtractor.lead_image_url.selectors[0], 2),
3235
3196
  selector = _WwwMsnbcComExtractor[0],
3236
3197
  attr = _WwwMsnbcComExtractor[1];
3237
3198
  var src = $(selector).attr(attr);
@@ -5380,7 +5341,7 @@ var WiredJpExtractor = {
5380
5341
  'img[data-original]': function imgDataOriginal($node) {
5381
5342
  var dataOriginal = $node.attr('data-original');
5382
5343
  var src = $node.attr('src');
5383
- var url = URL__default["default"].resolve(src, dataOriginal);
5344
+ var url = URL$1.resolve(src, dataOriginal);
5384
5345
  $node.attr('src', url);
5385
5346
  }
5386
5347
  },
@@ -5684,8 +5645,6 @@ var PastebinComExtractor = {
5684
5645
  }
5685
5646
  };
5686
5647
 
5687
- /* eslint-disable no-nested-ternary */
5688
- /* eslint-disable no-unused-expressions */
5689
5648
  var WwwAbendblattDeExtractor = {
5690
5649
  domain: 'www.abendblatt.de',
5691
5650
  title: {
@@ -6304,14 +6263,14 @@ var WwwSePlExtractor = {
6304
6263
  }
6305
6264
  };
6306
6265
 
6307
- function ownKeys$f(e, r) { var t = _Object$keys__default["default"](e); if (_Object$getOwnPropertySymbols__default["default"]) { var o = _Object$getOwnPropertySymbols__default["default"](e); r && (o = o.filter(function (r) { return _Object$getOwnPropertyDescriptor__default["default"](e, r).enumerable; })), t.push.apply(t, o); } return t; }
6308
- function _objectSpread$f(e) { for (var r = 1; r < arguments.length; r++) { var t = null != arguments[r] ? arguments[r] : {}; r % 2 ? ownKeys$f(Object(t), !0).forEach(function (r) { _defineProperty__default["default"](e, r, t[r]); }) : _Object$getOwnPropertyDescriptors__default["default"] ? _Object$defineProperties__default["default"](e, _Object$getOwnPropertyDescriptors__default["default"](t)) : ownKeys$f(Object(t)).forEach(function (r) { _Object$defineProperty__default["default"](e, r, _Object$getOwnPropertyDescriptor__default["default"](t, r)); }); } return e; }
6266
+ function ownKeys$f(e, r) { var t = _Object$keys(e); if (_Object$getOwnPropertySymbols) { var o = _Object$getOwnPropertySymbols(e); r && (o = o.filter(function (r) { return _Object$getOwnPropertyDescriptor(e, r).enumerable; })), t.push.apply(t, o); } return t; }
6267
+ function _objectSpread$f(e) { for (var r = 1; r < arguments.length; r++) { var t = null != arguments[r] ? arguments[r] : {}; r % 2 ? ownKeys$f(Object(t), true).forEach(function (r) { _defineProperty(e, r, t[r]); }) : _Object$getOwnPropertyDescriptors ? _Object$defineProperties(e, _Object$getOwnPropertyDescriptors(t)) : ownKeys$f(Object(t)).forEach(function (r) { _Object$defineProperty(e, r, _Object$getOwnPropertyDescriptor(t, r)); }); } return e; }
6309
6268
  var SportSePlExtractor = _objectSpread$f(_objectSpread$f({}, WwwSePlExtractor), {}, {
6310
6269
  domain: 'sport.se.pl'
6311
6270
  });
6312
6271
 
6313
- function ownKeys$e(e, r) { var t = _Object$keys__default["default"](e); if (_Object$getOwnPropertySymbols__default["default"]) { var o = _Object$getOwnPropertySymbols__default["default"](e); r && (o = o.filter(function (r) { return _Object$getOwnPropertyDescriptor__default["default"](e, r).enumerable; })), t.push.apply(t, o); } return t; }
6314
- function _objectSpread$e(e) { for (var r = 1; r < arguments.length; r++) { var t = null != arguments[r] ? arguments[r] : {}; r % 2 ? ownKeys$e(Object(t), !0).forEach(function (r) { _defineProperty__default["default"](e, r, t[r]); }) : _Object$getOwnPropertyDescriptors__default["default"] ? _Object$defineProperties__default["default"](e, _Object$getOwnPropertyDescriptors__default["default"](t)) : ownKeys$e(Object(t)).forEach(function (r) { _Object$defineProperty__default["default"](e, r, _Object$getOwnPropertyDescriptor__default["default"](t, r)); }); } return e; }
6272
+ function ownKeys$e(e, r) { var t = _Object$keys(e); if (_Object$getOwnPropertySymbols) { var o = _Object$getOwnPropertySymbols(e); r && (o = o.filter(function (r) { return _Object$getOwnPropertyDescriptor(e, r).enumerable; })), t.push.apply(t, o); } return t; }
6273
+ function _objectSpread$e(e) { for (var r = 1; r < arguments.length; r++) { var t = null != arguments[r] ? arguments[r] : {}; r % 2 ? ownKeys$e(Object(t), true).forEach(function (r) { _defineProperty(e, r, t[r]); }) : _Object$getOwnPropertyDescriptors ? _Object$defineProperties(e, _Object$getOwnPropertyDescriptors(t)) : ownKeys$e(Object(t)).forEach(function (r) { _Object$defineProperty(e, r, _Object$getOwnPropertyDescriptor(t, r)); }); } return e; }
6315
6274
  var PolitykaSePlExtractor = _objectSpread$e(_objectSpread$e({}, WwwSePlExtractor), {}, {
6316
6275
  domain: 'polityka.se.pl'
6317
6276
  });
@@ -6344,20 +6303,20 @@ var SuperserialeSePlExtractor = {
6344
6303
  }
6345
6304
  };
6346
6305
 
6347
- function ownKeys$d(e, r) { var t = _Object$keys__default["default"](e); if (_Object$getOwnPropertySymbols__default["default"]) { var o = _Object$getOwnPropertySymbols__default["default"](e); r && (o = o.filter(function (r) { return _Object$getOwnPropertyDescriptor__default["default"](e, r).enumerable; })), t.push.apply(t, o); } return t; }
6348
- function _objectSpread$d(e) { for (var r = 1; r < arguments.length; r++) { var t = null != arguments[r] ? arguments[r] : {}; r % 2 ? ownKeys$d(Object(t), !0).forEach(function (r) { _defineProperty__default["default"](e, r, t[r]); }) : _Object$getOwnPropertyDescriptors__default["default"] ? _Object$defineProperties__default["default"](e, _Object$getOwnPropertyDescriptors__default["default"](t)) : ownKeys$d(Object(t)).forEach(function (r) { _Object$defineProperty__default["default"](e, r, _Object$getOwnPropertyDescriptor__default["default"](t, r)); }); } return e; }
6306
+ function ownKeys$d(e, r) { var t = _Object$keys(e); if (_Object$getOwnPropertySymbols) { var o = _Object$getOwnPropertySymbols(e); r && (o = o.filter(function (r) { return _Object$getOwnPropertyDescriptor(e, r).enumerable; })), t.push.apply(t, o); } return t; }
6307
+ function _objectSpread$d(e) { for (var r = 1; r < arguments.length; r++) { var t = null != arguments[r] ? arguments[r] : {}; r % 2 ? ownKeys$d(Object(t), true).forEach(function (r) { _defineProperty(e, r, t[r]); }) : _Object$getOwnPropertyDescriptors ? _Object$defineProperties(e, _Object$getOwnPropertyDescriptors(t)) : ownKeys$d(Object(t)).forEach(function (r) { _Object$defineProperty(e, r, _Object$getOwnPropertyDescriptor(t, r)); }); } return e; }
6349
6308
  var SzczecinSePlExtractor = _objectSpread$d(_objectSpread$d({}, WwwSePlExtractor), {}, {
6350
6309
  domain: 'szczecin.se.pl'
6351
6310
  });
6352
6311
 
6353
- function ownKeys$c(e, r) { var t = _Object$keys__default["default"](e); if (_Object$getOwnPropertySymbols__default["default"]) { var o = _Object$getOwnPropertySymbols__default["default"](e); r && (o = o.filter(function (r) { return _Object$getOwnPropertyDescriptor__default["default"](e, r).enumerable; })), t.push.apply(t, o); } return t; }
6354
- function _objectSpread$c(e) { for (var r = 1; r < arguments.length; r++) { var t = null != arguments[r] ? arguments[r] : {}; r % 2 ? ownKeys$c(Object(t), !0).forEach(function (r) { _defineProperty__default["default"](e, r, t[r]); }) : _Object$getOwnPropertyDescriptors__default["default"] ? _Object$defineProperties__default["default"](e, _Object$getOwnPropertyDescriptors__default["default"](t)) : ownKeys$c(Object(t)).forEach(function (r) { _Object$defineProperty__default["default"](e, r, _Object$getOwnPropertyDescriptor__default["default"](t, r)); }); } return e; }
6312
+ function ownKeys$c(e, r) { var t = _Object$keys(e); if (_Object$getOwnPropertySymbols) { var o = _Object$getOwnPropertySymbols(e); r && (o = o.filter(function (r) { return _Object$getOwnPropertyDescriptor(e, r).enumerable; })), t.push.apply(t, o); } return t; }
6313
+ function _objectSpread$c(e) { for (var r = 1; r < arguments.length; r++) { var t = null != arguments[r] ? arguments[r] : {}; r % 2 ? ownKeys$c(Object(t), true).forEach(function (r) { _defineProperty(e, r, t[r]); }) : _Object$getOwnPropertyDescriptors ? _Object$defineProperties(e, _Object$getOwnPropertyDescriptors(t)) : ownKeys$c(Object(t)).forEach(function (r) { _Object$defineProperty(e, r, _Object$getOwnPropertyDescriptor(t, r)); }); } return e; }
6355
6314
  var SuperbizSePlExtractor = _objectSpread$c(_objectSpread$c({}, WwwSePlExtractor), {}, {
6356
6315
  domain: 'superbiz.se.pl'
6357
6316
  });
6358
6317
 
6359
- function ownKeys$b(e, r) { var t = _Object$keys__default["default"](e); if (_Object$getOwnPropertySymbols__default["default"]) { var o = _Object$getOwnPropertySymbols__default["default"](e); r && (o = o.filter(function (r) { return _Object$getOwnPropertyDescriptor__default["default"](e, r).enumerable; })), t.push.apply(t, o); } return t; }
6360
- function _objectSpread$b(e) { for (var r = 1; r < arguments.length; r++) { var t = null != arguments[r] ? arguments[r] : {}; r % 2 ? ownKeys$b(Object(t), !0).forEach(function (r) { _defineProperty__default["default"](e, r, t[r]); }) : _Object$getOwnPropertyDescriptors__default["default"] ? _Object$defineProperties__default["default"](e, _Object$getOwnPropertyDescriptors__default["default"](t)) : ownKeys$b(Object(t)).forEach(function (r) { _Object$defineProperty__default["default"](e, r, _Object$getOwnPropertyDescriptor__default["default"](t, r)); }); } return e; }
6318
+ function ownKeys$b(e, r) { var t = _Object$keys(e); if (_Object$getOwnPropertySymbols) { var o = _Object$getOwnPropertySymbols(e); r && (o = o.filter(function (r) { return _Object$getOwnPropertyDescriptor(e, r).enumerable; })), t.push.apply(t, o); } return t; }
6319
+ function _objectSpread$b(e) { for (var r = 1; r < arguments.length; r++) { var t = null != arguments[r] ? arguments[r] : {}; r % 2 ? ownKeys$b(Object(t), true).forEach(function (r) { _defineProperty(e, r, t[r]); }) : _Object$getOwnPropertyDescriptors ? _Object$defineProperties(e, _Object$getOwnPropertyDescriptors(t)) : ownKeys$b(Object(t)).forEach(function (r) { _Object$defineProperty(e, r, _Object$getOwnPropertyDescriptor(t, r)); }); } return e; }
6361
6320
  var PortalobronnySePlExtractor = _objectSpread$b(_objectSpread$b({}, WwwSePlExtractor), {}, {
6362
6321
  domain: 'portalobronny.se.pl'
6363
6322
  });
@@ -6384,26 +6343,26 @@ var PolskisamorzadSePlExtractor = {
6384
6343
  }
6385
6344
  };
6386
6345
 
6387
- function ownKeys$a(e, r) { var t = _Object$keys__default["default"](e); if (_Object$getOwnPropertySymbols__default["default"]) { var o = _Object$getOwnPropertySymbols__default["default"](e); r && (o = o.filter(function (r) { return _Object$getOwnPropertyDescriptor__default["default"](e, r).enumerable; })), t.push.apply(t, o); } return t; }
6388
- function _objectSpread$a(e) { for (var r = 1; r < arguments.length; r++) { var t = null != arguments[r] ? arguments[r] : {}; r % 2 ? ownKeys$a(Object(t), !0).forEach(function (r) { _defineProperty__default["default"](e, r, t[r]); }) : _Object$getOwnPropertyDescriptors__default["default"] ? _Object$defineProperties__default["default"](e, _Object$getOwnPropertyDescriptors__default["default"](t)) : ownKeys$a(Object(t)).forEach(function (r) { _Object$defineProperty__default["default"](e, r, _Object$getOwnPropertyDescriptor__default["default"](t, r)); }); } return e; }
6346
+ function ownKeys$a(e, r) { var t = _Object$keys(e); if (_Object$getOwnPropertySymbols) { var o = _Object$getOwnPropertySymbols(e); r && (o = o.filter(function (r) { return _Object$getOwnPropertyDescriptor(e, r).enumerable; })), t.push.apply(t, o); } return t; }
6347
+ function _objectSpread$a(e) { for (var r = 1; r < arguments.length; r++) { var t = null != arguments[r] ? arguments[r] : {}; r % 2 ? ownKeys$a(Object(t), true).forEach(function (r) { _defineProperty(e, r, t[r]); }) : _Object$getOwnPropertyDescriptors ? _Object$defineProperties(e, _Object$getOwnPropertyDescriptors(t)) : ownKeys$a(Object(t)).forEach(function (r) { _Object$defineProperty(e, r, _Object$getOwnPropertyDescriptor(t, r)); }); } return e; }
6389
6348
  var LodzSePlExtractor = _objectSpread$a(_objectSpread$a({}, WwwSePlExtractor), {}, {
6390
6349
  domain: 'lodz.se.pl'
6391
6350
  });
6392
6351
 
6393
- function ownKeys$9(e, r) { var t = _Object$keys__default["default"](e); if (_Object$getOwnPropertySymbols__default["default"]) { var o = _Object$getOwnPropertySymbols__default["default"](e); r && (o = o.filter(function (r) { return _Object$getOwnPropertyDescriptor__default["default"](e, r).enumerable; })), t.push.apply(t, o); } return t; }
6394
- function _objectSpread$9(e) { for (var r = 1; r < arguments.length; r++) { var t = null != arguments[r] ? arguments[r] : {}; r % 2 ? ownKeys$9(Object(t), !0).forEach(function (r) { _defineProperty__default["default"](e, r, t[r]); }) : _Object$getOwnPropertyDescriptors__default["default"] ? _Object$defineProperties__default["default"](e, _Object$getOwnPropertyDescriptors__default["default"](t)) : ownKeys$9(Object(t)).forEach(function (r) { _Object$defineProperty__default["default"](e, r, _Object$getOwnPropertyDescriptor__default["default"](t, r)); }); } return e; }
6352
+ function ownKeys$9(e, r) { var t = _Object$keys(e); if (_Object$getOwnPropertySymbols) { var o = _Object$getOwnPropertySymbols(e); r && (o = o.filter(function (r) { return _Object$getOwnPropertyDescriptor(e, r).enumerable; })), t.push.apply(t, o); } return t; }
6353
+ function _objectSpread$9(e) { for (var r = 1; r < arguments.length; r++) { var t = null != arguments[r] ? arguments[r] : {}; r % 2 ? ownKeys$9(Object(t), true).forEach(function (r) { _defineProperty(e, r, t[r]); }) : _Object$getOwnPropertyDescriptors ? _Object$defineProperties(e, _Object$getOwnPropertyDescriptors(t)) : ownKeys$9(Object(t)).forEach(function (r) { _Object$defineProperty(e, r, _Object$getOwnPropertyDescriptor(t, r)); }); } return e; }
6395
6354
  var WroclawSePlExtractor = _objectSpread$9(_objectSpread$9({}, WwwSePlExtractor), {}, {
6396
6355
  domain: 'wroclaw.se.pl'
6397
6356
  });
6398
6357
 
6399
- function ownKeys$8(e, r) { var t = _Object$keys__default["default"](e); if (_Object$getOwnPropertySymbols__default["default"]) { var o = _Object$getOwnPropertySymbols__default["default"](e); r && (o = o.filter(function (r) { return _Object$getOwnPropertyDescriptor__default["default"](e, r).enumerable; })), t.push.apply(t, o); } return t; }
6400
- function _objectSpread$8(e) { for (var r = 1; r < arguments.length; r++) { var t = null != arguments[r] ? arguments[r] : {}; r % 2 ? ownKeys$8(Object(t), !0).forEach(function (r) { _defineProperty__default["default"](e, r, t[r]); }) : _Object$getOwnPropertyDescriptors__default["default"] ? _Object$defineProperties__default["default"](e, _Object$getOwnPropertyDescriptors__default["default"](t)) : ownKeys$8(Object(t)).forEach(function (r) { _Object$defineProperty__default["default"](e, r, _Object$getOwnPropertyDescriptor__default["default"](t, r)); }); } return e; }
6358
+ function ownKeys$8(e, r) { var t = _Object$keys(e); if (_Object$getOwnPropertySymbols) { var o = _Object$getOwnPropertySymbols(e); r && (o = o.filter(function (r) { return _Object$getOwnPropertyDescriptor(e, r).enumerable; })), t.push.apply(t, o); } return t; }
6359
+ function _objectSpread$8(e) { for (var r = 1; r < arguments.length; r++) { var t = null != arguments[r] ? arguments[r] : {}; r % 2 ? ownKeys$8(Object(t), true).forEach(function (r) { _defineProperty(e, r, t[r]); }) : _Object$getOwnPropertyDescriptors ? _Object$defineProperties(e, _Object$getOwnPropertyDescriptors(t)) : ownKeys$8(Object(t)).forEach(function (r) { _Object$defineProperty(e, r, _Object$getOwnPropertyDescriptor(t, r)); }); } return e; }
6401
6360
  var LublinSePlExtractor = _objectSpread$8(_objectSpread$8({}, WwwSePlExtractor), {}, {
6402
6361
  domain: 'lublin.se.pl'
6403
6362
  });
6404
6363
 
6405
- function ownKeys$7(e, r) { var t = _Object$keys__default["default"](e); if (_Object$getOwnPropertySymbols__default["default"]) { var o = _Object$getOwnPropertySymbols__default["default"](e); r && (o = o.filter(function (r) { return _Object$getOwnPropertyDescriptor__default["default"](e, r).enumerable; })), t.push.apply(t, o); } return t; }
6406
- function _objectSpread$7(e) { for (var r = 1; r < arguments.length; r++) { var t = null != arguments[r] ? arguments[r] : {}; r % 2 ? ownKeys$7(Object(t), !0).forEach(function (r) { _defineProperty__default["default"](e, r, t[r]); }) : _Object$getOwnPropertyDescriptors__default["default"] ? _Object$defineProperties__default["default"](e, _Object$getOwnPropertyDescriptors__default["default"](t)) : ownKeys$7(Object(t)).forEach(function (r) { _Object$defineProperty__default["default"](e, r, _Object$getOwnPropertyDescriptor__default["default"](t, r)); }); } return e; }
6364
+ function ownKeys$7(e, r) { var t = _Object$keys(e); if (_Object$getOwnPropertySymbols) { var o = _Object$getOwnPropertySymbols(e); r && (o = o.filter(function (r) { return _Object$getOwnPropertyDescriptor(e, r).enumerable; })), t.push.apply(t, o); } return t; }
6365
+ function _objectSpread$7(e) { for (var r = 1; r < arguments.length; r++) { var t = null != arguments[r] ? arguments[r] : {}; r % 2 ? ownKeys$7(Object(t), true).forEach(function (r) { _defineProperty(e, r, t[r]); }) : _Object$getOwnPropertyDescriptors ? _Object$defineProperties(e, _Object$getOwnPropertyDescriptors(t)) : ownKeys$7(Object(t)).forEach(function (r) { _Object$defineProperty(e, r, _Object$getOwnPropertyDescriptor(t, r)); }); } return e; }
6407
6366
  var BialystokSePlExtractor = _objectSpread$7(_objectSpread$7({}, WwwSePlExtractor), {}, {
6408
6367
  domain: 'bialystok.se.pl'
6409
6368
  });
@@ -6659,7 +6618,7 @@ var WwwPolygonComExtractor = {
6659
6618
  img: function img($node) {
6660
6619
  var srcset = $node.attr('srcset');
6661
6620
  var _split = (srcset || '').split(','),
6662
- _split2 = _slicedToArray__default["default"](_split, 1),
6621
+ _split2 = _slicedToArray(_split, 1),
6663
6622
  src = _split2[0];
6664
6623
  if (src) {
6665
6624
  $node.parent().replaceWith("<figure><img srcset=\"".concat(srcset, "\" src=\"").concat(src, "\"/></figure>"));
@@ -6699,7 +6658,7 @@ var WwwThevergeComExtractor = {
6699
6658
  img: function img($node) {
6700
6659
  var srcset = $node.attr('srcset');
6701
6660
  var _split = (srcset || '').split(','),
6702
- _split2 = _slicedToArray__default["default"](_split, 1),
6661
+ _split2 = _slicedToArray(_split, 1),
6703
6662
  src = _split2[0];
6704
6663
  if (src) {
6705
6664
  $node.parent().replaceWith("<figure><img srcset=\"".concat(srcset, "\" src=\"").concat(src, "\"/></figure>"));
@@ -7233,8 +7192,8 @@ var WwwEuronewsComExtractor = {
7233
7192
  }
7234
7193
  };
7235
7194
 
7236
- function ownKeys$6(e, r) { var t = _Object$keys__default["default"](e); if (_Object$getOwnPropertySymbols__default["default"]) { var o = _Object$getOwnPropertySymbols__default["default"](e); r && (o = o.filter(function (r) { return _Object$getOwnPropertyDescriptor__default["default"](e, r).enumerable; })), t.push.apply(t, o); } return t; }
7237
- function _objectSpread$6(e) { for (var r = 1; r < arguments.length; r++) { var t = null != arguments[r] ? arguments[r] : {}; r % 2 ? ownKeys$6(Object(t), !0).forEach(function (r) { _defineProperty__default["default"](e, r, t[r]); }) : _Object$getOwnPropertyDescriptors__default["default"] ? _Object$defineProperties__default["default"](e, _Object$getOwnPropertyDescriptors__default["default"](t)) : ownKeys$6(Object(t)).forEach(function (r) { _Object$defineProperty__default["default"](e, r, _Object$getOwnPropertyDescriptor__default["default"](t, r)); }); } return e; }
7195
+ function ownKeys$6(e, r) { var t = _Object$keys(e); if (_Object$getOwnPropertySymbols) { var o = _Object$getOwnPropertySymbols(e); r && (o = o.filter(function (r) { return _Object$getOwnPropertyDescriptor(e, r).enumerable; })), t.push.apply(t, o); } return t; }
7196
+ function _objectSpread$6(e) { for (var r = 1; r < arguments.length; r++) { var t = null != arguments[r] ? arguments[r] : {}; r % 2 ? ownKeys$6(Object(t), true).forEach(function (r) { _defineProperty(e, r, t[r]); }) : _Object$getOwnPropertyDescriptors ? _Object$defineProperties(e, _Object$getOwnPropertyDescriptors(t)) : ownKeys$6(Object(t)).forEach(function (r) { _Object$defineProperty(e, r, _Object$getOwnPropertyDescriptor(t, r)); }); } return e; }
7238
7197
  var GrEuronewsComExtractor = _objectSpread$6(_objectSpread$6({}, WwwEuronewsComExtractor), {}, {
7239
7198
  domain: 'gr.euronews.com'
7240
7199
  });
@@ -7264,211 +7223,426 @@ var WwwIlfattoquotidianoItExtractor = {
7264
7223
  }
7265
7224
  };
7266
7225
 
7226
+ var ActualidadRtComExtractor = {
7227
+ domain: 'actualidad.rt.com',
7228
+ title: {
7229
+ selectors: [['meta[name="og:title"]', 'value']]
7230
+ },
7231
+ author: {
7232
+ selectors: [['meta[name="article:author"]', 'value']]
7233
+ },
7234
+ date_published: {
7235
+ selectors: [['meta[name="mediator_published_time"]', 'value']]
7236
+ },
7237
+ dek: {
7238
+ selectors: [['meta[name="og:description"]', 'value']]
7239
+ },
7240
+ lead_image_url: {
7241
+ selectors: [['meta[name="og:image"]', 'value']]
7242
+ },
7243
+ content: {
7244
+ selectors: ['.ArticleView-text'],
7245
+ transforms: {},
7246
+ // RT wraps each <img> in a <picture> whose <source> elements carry a
7247
+ // base64 placeholder srcset; browsers honor that over the real <img src>,
7248
+ // so drop the sources and let the <img> (real URL) render.
7249
+ clean: ['.ReadMore-root', 'source']
7250
+ }
7251
+ };
7252
+
7253
+ var WwwTweaktownComExtractor = {
7254
+ domain: 'www.tweaktown.com',
7255
+ title: {
7256
+ selectors: [['meta[name="og:title"]', 'value']]
7257
+ },
7258
+ author: {
7259
+ selectors: ['.info-bar-div2 a[rel="author"]']
7260
+ },
7261
+ date_published: {
7262
+ selectors: [['meta[name="article:published_time"]', 'value']]
7263
+ },
7264
+ dek: {
7265
+ selectors: [['meta[name="og:description"]', 'value']]
7266
+ },
7267
+ lead_image_url: {
7268
+ selectors: [['meta[name="og:image"]', 'value']]
7269
+ },
7270
+ content: {
7271
+ selectors: ['#article-body'],
7272
+ transforms: {},
7273
+ clean: []
7274
+ }
7275
+ };
7276
+
7277
+ var WwwFrandroidComExtractor = {
7278
+ domain: 'www.frandroid.com',
7279
+ title: {
7280
+ selectors: [['meta[name="og:title"]', 'value']]
7281
+ },
7282
+ author: {
7283
+ selectors: [['meta[name="parsely-author"]', 'value']]
7284
+ },
7285
+ date_published: {
7286
+ selectors: [['meta[name="article:published_time"]', 'value']]
7287
+ },
7288
+ dek: {
7289
+ selectors: [['meta[name="og:description"]', 'value']]
7290
+ },
7291
+ lead_image_url: {
7292
+ selectors: [['meta[name="og:image"]', 'value']]
7293
+ },
7294
+ content: {
7295
+ selectors: ['section.article-content'],
7296
+ transforms: {
7297
+ h2: function h2(node) {
7298
+ return node.attr('class', 'mercury-parser-keep');
7299
+ },
7300
+ h3: function h3(node) {
7301
+ return node.attr('class', 'mercury-parser-keep');
7302
+ }
7303
+ },
7304
+ clean: ['.index-menu-wrapper', '.is-gastric-kingfisher', '.newsletter-form', '.share', '.article-footer', '.js-feed-posts', '.optidigital-adslot', '[id^="optidigital-adslot"]']
7305
+ }
7306
+ };
7307
+
7308
+ var WwwMotorsportComExtractor = {
7309
+ domain: 'www.motorsport.com',
7310
+ title: {
7311
+ selectors: [['meta[name="og:title"]', 'value']]
7312
+ },
7313
+ author: {
7314
+ selectors: ['.msnt-author-toolbar a[href*="/info/about-us/"]']
7315
+ },
7316
+ date_published: {
7317
+ selectors: [['meta[name="datePublished"]', 'value']]
7318
+ },
7319
+ dek: {
7320
+ selectors: ['h2.text-article-description']
7321
+ },
7322
+ lead_image_url: {
7323
+ selectors: [['meta[name="og:image"]', 'value']]
7324
+ },
7325
+ content: {
7326
+ selectors: ['.ms-article-content'],
7327
+ transforms: {
7328
+ h2: function h2(node) {
7329
+ return node.attr('class', 'mercury-parser-keep');
7330
+ }
7331
+ },
7332
+ clean: ['msnt-survey-promo', '.article-fullwidth-gallery_item ~ .article-fullwidth-gallery_item', '.ms-inarticle-widgets', '.relatedContent', '.ms-apb', '.ms-ap-native', '.outstream_partner']
7333
+ }
7334
+ };
7335
+
7336
+ var SubstackComExtractor = {
7337
+ domain: 'substack.com',
7338
+ title: {
7339
+ selectors: [['meta[name="og:title"]', 'value']]
7340
+ },
7341
+ author: {
7342
+ selectors: [['meta[name="author"]', 'value']]
7343
+ },
7344
+ date_published: {
7345
+ selectors: [['meta[name="article:published_time"]', 'value']]
7346
+ },
7347
+ dek: {
7348
+ selectors: [['meta[name="og:description"]', 'value']]
7349
+ },
7350
+ lead_image_url: {
7351
+ selectors: [['meta[name="og:image"]', 'value']]
7352
+ },
7353
+ content: {
7354
+ selectors: ['.available-content'],
7355
+ transforms: {
7356
+ 'div.captioned-image-container': 'figure',
7357
+ 'div.image-link': function divImageLink($node) {
7358
+ $node.replaceWith($node.find('img'));
7359
+ }
7360
+ },
7361
+ clean: ['.subscribe-widget', '.subscription-widget-wrap', '.subscription-widget-wrap-editor', '.button-wrapper', '.poll-embed', '.share-dialog']
7362
+ }
7363
+ };
7364
+
7365
+ var WwwDwComExtractor = {
7366
+ domain: 'www.dw.com',
7367
+ title: {
7368
+ selectors: [['meta[name="og:title"]', 'value']]
7369
+ },
7370
+ author: {
7371
+ selectors: ['.author-name .author-link']
7372
+ },
7373
+ date_published: {
7374
+ selectors: [['meta[name="date"]', 'value']]
7375
+ },
7376
+ dek: {
7377
+ selectors: [['meta[name="og:description"]', 'value']]
7378
+ },
7379
+ lead_image_url: {
7380
+ selectors: [['meta[name="og:image"]', 'value']]
7381
+ },
7382
+ content: {
7383
+ selectors: ['[data-tracking-name="rich-text"]'],
7384
+ transforms: {
7385
+ // DW inline images are responsive: the real template lives in data-url
7386
+ // with a literal ${formatId} size token that JS would replace, leaving a
7387
+ // broken src in the raw HTML. Resolve it to a standard content size.
7388
+ img: function img(node) {
7389
+ var template = node.attr('data-url') || node.attr('src') || '';
7390
+ if (template.includes('${formatId}')) {
7391
+ node.attr('src', template.replace('${formatId}', '6'));
7392
+ }
7393
+ }
7394
+ },
7395
+ // Embedded tweets are non-functional fallback markup without JS.
7396
+ clean: ['blockquote.tweet.embed']
7397
+ }
7398
+ };
7399
+
7400
+ var WwwAnimenewsnetworkComExtractor = {
7401
+ domain: 'www.animenewsnetwork.com',
7402
+ title: {
7403
+ selectors: [['meta[name="og:title"]', 'value']]
7404
+ },
7405
+ author: null,
7406
+ date_published: {
7407
+ selectors: [['small time', 'datetime']]
7408
+ },
7409
+ dek: {
7410
+ selectors: [['meta[name="description"]', 'value']]
7411
+ },
7412
+ lead_image_url: {
7413
+ selectors: [['meta[name="og:image"]', 'value']]
7414
+ },
7415
+ content: {
7416
+ selectors: ['.KonaBody'],
7417
+ transforms: {
7418
+ // Images are lazy-loaded: real URL in data-src, a spacer.gif in src.
7419
+ // Promote data-src so the images survive cleaning and render.
7420
+ img: function img(node) {
7421
+ var dataSrc = node.attr('data-src');
7422
+ if (dataSrc) {
7423
+ var src = dataSrc.startsWith('/') ? "https://www.animenewsnetwork.com".concat(dataSrc) : dataSrc;
7424
+ node.attr('src', src);
7425
+ node.removeAttr('data-src');
7426
+ }
7427
+ }
7428
+ },
7429
+ // .intro duplicates the dek; instaread-player is an audio widget.
7430
+ clean: ['.intro', 'instaread-player']
7431
+ }
7432
+ };
7433
+
7267
7434
  var CustomExtractors = /*#__PURE__*/Object.freeze({
7268
7435
  __proto__: null,
7436
+ AbcnewsGoComExtractor: AbcnewsGoComExtractor,
7437
+ ActualidadRtComExtractor: ActualidadRtComExtractor,
7438
+ ApartmentTherapyExtractor: ApartmentTherapyExtractor,
7439
+ ArstechnicaComExtractor: ArstechnicaComExtractor,
7269
7440
  BalloonJuiceComExtractor: BalloonJuiceComExtractor,
7441
+ BialystokSePlExtractor: BialystokSePlExtractor,
7442
+ BiorxivOrgExtractor: BiorxivOrgExtractor,
7443
+ BlisterreviewComExtractor: BlisterreviewComExtractor,
7270
7444
  BloggerExtractor: BloggerExtractor,
7271
- NYMagExtractor: NYMagExtractor,
7272
- WikipediaExtractor: WikipediaExtractor,
7273
- TwitterExtractor: TwitterExtractor,
7274
- NYTimesExtractor: NYTimesExtractor,
7275
- TheAtlanticExtractor: TheAtlanticExtractor,
7276
- NewYorkerExtractor: NewYorkerExtractor,
7277
- WiredExtractor: WiredExtractor,
7278
- MSNExtractor: MSNExtractor,
7279
- YahooExtractor: YahooExtractor,
7445
+ BookwalkerJpExtractor: BookwalkerJpExtractor,
7446
+ BroadwayWorldExtractor: BroadwayWorldExtractor,
7447
+ BskyAppExtractor: BskyAppExtractor,
7448
+ BuzzapJpExtractor: BuzzapJpExtractor,
7280
7449
  BuzzfeedExtractor: BuzzfeedExtractor,
7281
- WikiaExtractor: WikiaExtractor,
7282
- LittleThingsExtractor: LittleThingsExtractor,
7283
- PoliticoExtractor: PoliticoExtractor,
7450
+ ChicagoyimbyComExtractor: ChicagoyimbyComExtractor,
7451
+ ClinicaltrialsGovExtractor: ClinicaltrialsGovExtractor,
7452
+ DeadlineComExtractor: DeadlineComExtractor,
7284
7453
  DeadspinExtractor: DeadspinExtractor,
7285
- BroadwayWorldExtractor: BroadwayWorldExtractor,
7286
- ApartmentTherapyExtractor: ApartmentTherapyExtractor,
7287
- MediumExtractor: MediumExtractor,
7288
- WwwTmzComExtractor: WwwTmzComExtractor,
7289
- WwwWashingtonpostComExtractor: WwwWashingtonpostComExtractor,
7290
- WwwHuffingtonpostComExtractor: WwwHuffingtonpostComExtractor,
7291
- NewrepublicComExtractor: NewrepublicComExtractor,
7292
- MoneyCnnComExtractor: MoneyCnnComExtractor,
7293
- WwwCnnComExtractor: WwwCnnComExtractor,
7294
- WwwAolComExtractor: WwwAolComExtractor,
7295
- WwwYoutubeComExtractor: WwwYoutubeComExtractor,
7296
- WwwTheguardianComExtractor: WwwTheguardianComExtractor,
7297
- WwwSbnationComExtractor: WwwSbnationComExtractor,
7298
- WwwBloombergComExtractor: WwwBloombergComExtractor,
7299
- WwwBustleComExtractor: WwwBustleComExtractor,
7300
- WwwNprOrgExtractor: WwwNprOrgExtractor,
7301
- WwwRecodeNetExtractor: WwwRecodeNetExtractor,
7302
- QzComExtractor: QzComExtractor,
7303
- WwwDmagazineComExtractor: WwwDmagazineComExtractor,
7304
- WwwReutersComExtractor: WwwReutersComExtractor,
7305
- MashableComExtractor: MashableComExtractor,
7306
- WwwChicagotribuneComExtractor: WwwChicagotribuneComExtractor,
7307
- WwwVoxComExtractor: WwwVoxComExtractor,
7308
- NewsNationalgeographicComExtractor: NewsNationalgeographicComExtractor,
7309
- WwwNationalgeographicComExtractor: WwwNationalgeographicComExtractor,
7310
- WwwLatimesComExtractor: WwwLatimesComExtractor,
7311
- PagesixComExtractor: PagesixComExtractor,
7312
- ThefederalistpapersOrgExtractor: ThefederalistpapersOrgExtractor,
7313
- WwwCbssportsComExtractor: WwwCbssportsComExtractor,
7314
- WwwMsnbcComExtractor: WwwMsnbcComExtractor,
7315
- WwwThepoliticalinsiderComExtractor: WwwThepoliticalinsiderComExtractor,
7316
- WwwMentalflossComExtractor: WwwMentalflossComExtractor,
7317
- AbcnewsGoComExtractor: AbcnewsGoComExtractor,
7318
- WwwNydailynewsComExtractor: WwwNydailynewsComExtractor,
7319
- WwwCnbcComExtractor: WwwCnbcComExtractor,
7320
- WwwPopsugarComExtractor: WwwPopsugarComExtractor,
7321
- ObserverComExtractor: ObserverComExtractor,
7322
- PeopleComExtractor: PeopleComExtractor,
7323
- WwwUsmagazineComExtractor: WwwUsmagazineComExtractor,
7324
- WwwRollingstoneComExtractor: WwwRollingstoneComExtractor,
7325
- twofortysevensportsComExtractor: twofortysevensportsComExtractor,
7326
- UproxxComExtractor: UproxxComExtractor,
7327
- WwwEonlineComExtractor: WwwEonlineComExtractor,
7328
- WwwMiamiheraldComExtractor: WwwMiamiheraldComExtractor,
7329
- WwwRefinery29ComExtractor: WwwRefinery29ComExtractor,
7330
- WwwMacrumorsComExtractor: WwwMacrumorsComExtractor,
7331
- WwwAndroidcentralComExtractor: WwwAndroidcentralComExtractor,
7332
- WwwSiComExtractor: WwwSiComExtractor,
7333
- WwwRawstoryComExtractor: WwwRawstoryComExtractor,
7334
- WwwCnetComExtractor: WwwCnetComExtractor,
7335
- WwwTodayComExtractor: WwwTodayComExtractor,
7336
- WwwAlComExtractor: WwwAlComExtractor,
7337
- WwwThepennyhoarderComExtractor: WwwThepennyhoarderComExtractor,
7338
- WwwWesternjournalismComExtractor: WwwWesternjournalismComExtractor,
7339
- WwwAmericanowComExtractor: WwwAmericanowComExtractor,
7340
- ScienceflyComExtractor: ScienceflyComExtractor,
7341
- HellogigglesComExtractor: HellogigglesComExtractor,
7342
- ThoughtcatalogComExtractor: ThoughtcatalogComExtractor,
7343
- WwwInquisitrComExtractor: WwwInquisitrComExtractor,
7344
- WwwNbcnewsComExtractor: WwwNbcnewsComExtractor,
7454
+ EconomictimesIndiatimesComExtractor: EconomictimesIndiatimesComExtractor,
7455
+ EpaperZeitDeExtractor: EpaperZeitDeExtractor,
7456
+ FactorioComExtractor: FactorioComExtractor,
7345
7457
  FortuneComExtractor: FortuneComExtractor,
7346
- WwwLinkedinComExtractor: WwwLinkedinComExtractor,
7347
- ObamawhitehouseArchivesGovExtractor: ObamawhitehouseArchivesGovExtractor,
7348
- WwwOpposingviewsComExtractor: WwwOpposingviewsComExtractor,
7349
- WwwProspectmagazineCoUkExtractor: WwwProspectmagazineCoUkExtractor,
7350
7458
  ForwardComExtractor: ForwardComExtractor,
7351
- WwwQdailyComExtractor: WwwQdailyComExtractor,
7459
+ GeniusComExtractor: GeniusComExtractor,
7460
+ GetnewsJpExtractor: GetnewsJpExtractor,
7461
+ GithubComExtractor: GithubComExtractor,
7462
+ GonintendoComExtractor: GonintendoComExtractor,
7352
7463
  GothamistComExtractor: GothamistComExtractor,
7353
- WwwFoolComExtractor: WwwFoolComExtractor,
7354
- WwwSlateComExtractor: WwwSlateComExtractor,
7464
+ GrEuronewsComExtractor: GrEuronewsComExtractor,
7465
+ HellogigglesComExtractor: HellogigglesComExtractor,
7355
7466
  IciRadioCanadaCaExtractor: IciRadioCanadaCaExtractor,
7356
- WwwFortinetComExtractor: WwwFortinetComExtractor,
7357
- WwwFastcompanyComExtractor: WwwFastcompanyComExtractor,
7358
- BlisterreviewComExtractor: BlisterreviewComExtractor,
7467
+ JapanCnetComExtractor: JapanCnetComExtractor,
7468
+ JapanZdnetComExtractor: JapanZdnetComExtractor,
7469
+ JvndbJvnJpExtractor: JvndbJvnJpExtractor,
7470
+ LittleThingsExtractor: LittleThingsExtractor,
7471
+ LodzSePlExtractor: LodzSePlExtractor,
7472
+ LublinSePlExtractor: LublinSePlExtractor,
7473
+ MSNExtractor: MSNExtractor,
7474
+ MaTtiasBeExtractor: MaTtiasBeExtractor,
7475
+ MashableComExtractor: MashableComExtractor,
7476
+ MediumExtractor: MediumExtractor,
7477
+ MobilesyrupComExtractor: MobilesyrupComExtractor,
7478
+ MoneyCnnComExtractor: MoneyCnnComExtractor,
7479
+ NYMagExtractor: NYMagExtractor,
7480
+ NYTimesExtractor: NYTimesExtractor,
7481
+ NewYorkerExtractor: NewYorkerExtractor,
7482
+ NewrepublicComExtractor: NewrepublicComExtractor,
7359
7483
  NewsMynaviJpExtractor: NewsMynaviJpExtractor,
7360
- ClinicaltrialsGovExtractor: ClinicaltrialsGovExtractor,
7361
- GithubComExtractor: GithubComExtractor,
7362
- WwwRedditComExtractor: WwwRedditComExtractor,
7484
+ NewsNationalgeographicComExtractor: NewsNationalgeographicComExtractor,
7485
+ NewsPtsOrgTwExtractor: NewsPtsOrgTwExtractor,
7486
+ Nineto5googleComExtractor: Nineto5googleComExtractor,
7487
+ Nineto5linuxComExtractor: Nineto5linuxComExtractor,
7488
+ Nineto5macComExtractor: Nineto5macComExtractor,
7489
+ ObamawhitehouseArchivesGovExtractor: ObamawhitehouseArchivesGovExtractor,
7490
+ ObserverComExtractor: ObserverComExtractor,
7491
+ OrfAtExtractor: OrfAtExtractor,
7363
7492
  OtrsComExtractor: OtrsComExtractor,
7364
- WwwOssnewsJpExtractor: WwwOssnewsJpExtractor,
7365
- BuzzapJpExtractor: BuzzapJpExtractor,
7366
- WwwAsahiComExtractor: WwwAsahiComExtractor,
7367
- WwwSanwaCoJpExtractor: WwwSanwaCoJpExtractor,
7368
- WwwElecomCoJpExtractor: WwwElecomCoJpExtractor,
7369
- ScanNetsecurityNeJpExtractor: ScanNetsecurityNeJpExtractor,
7370
- JvndbJvnJpExtractor: JvndbJvnJpExtractor,
7371
- GeniusComExtractor: GeniusComExtractor,
7372
- WwwJnsaOrgExtractor: WwwJnsaOrgExtractor,
7493
+ PagesixComExtractor: PagesixComExtractor,
7494
+ PastebinComExtractor: PastebinComExtractor,
7495
+ PeopleComExtractor: PeopleComExtractor,
7373
7496
  PhpspotOrgExtractor: PhpspotOrgExtractor,
7374
- WwwInfoqComExtractor: WwwInfoqComExtractor,
7375
- WwwMoongiftJpExtractor: WwwMoongiftJpExtractor,
7376
- WwwItmediaCoJpExtractor: WwwItmediaCoJpExtractor,
7377
- WwwPublickey1JpExtractor: WwwPublickey1JpExtractor,
7378
- TakagihiromitsuJpExtractor: TakagihiromitsuJpExtractor,
7379
- BookwalkerJpExtractor: BookwalkerJpExtractor,
7380
- WwwYomiuriCoJpExtractor: WwwYomiuriCoJpExtractor,
7381
- JapanCnetComExtractor: JapanCnetComExtractor,
7382
- DeadlineComExtractor: DeadlineComExtractor,
7383
- WwwGizmodoJpExtractor: WwwGizmodoJpExtractor,
7384
- GetnewsJpExtractor: GetnewsJpExtractor,
7385
- WwwLifehackerJpExtractor: WwwLifehackerJpExtractor,
7497
+ PitchforkComExtractor: PitchforkComExtractor,
7498
+ PoliticoExtractor: PoliticoExtractor,
7499
+ PolitykaSePlExtractor: PolitykaSePlExtractor,
7500
+ PolskisamorzadSePlExtractor: PolskisamorzadSePlExtractor,
7501
+ PortalobronnySePlExtractor: PortalobronnySePlExtractor,
7502
+ QzComExtractor: QzComExtractor,
7503
+ ScanNetsecurityNeJpExtractor: ScanNetsecurityNeJpExtractor,
7504
+ ScienceflyComExtractor: ScienceflyComExtractor,
7386
7505
  SectIijAdJpExtractor: SectIijAdJpExtractor,
7387
- WwwOreillyCoJpExtractor: WwwOreillyCoJpExtractor,
7388
- WwwIpaGoJpExtractor: WwwIpaGoJpExtractor,
7389
- WeeklyAsciiJpExtractor: WeeklyAsciiJpExtractor,
7506
+ SgNewsYahooComExtractor: SgNewsYahooComExtractor,
7507
+ SpektrumExtractor: SpektrumExtractor,
7508
+ SportSePlExtractor: SportSePlExtractor,
7509
+ SubstackComExtractor: SubstackComExtractor,
7510
+ SuperbizSePlExtractor: SuperbizSePlExtractor,
7511
+ SuperserialeSePlExtractor: SuperserialeSePlExtractor,
7512
+ SzczecinSePlExtractor: SzczecinSePlExtractor,
7513
+ TakagihiromitsuJpExtractor: TakagihiromitsuJpExtractor,
7514
+ TarnkappeInfoExtractor: TarnkappeInfoExtractor,
7515
+ TechcrunchComExtractor: TechcrunchComExtractor,
7390
7516
  TechlogIijAdJpExtractor: TechlogIijAdJpExtractor,
7391
- WiredJpExtractor: WiredJpExtractor,
7392
- JapanZdnetComExtractor: JapanZdnetComExtractor,
7393
- WwwRbbtodayComExtractor: WwwRbbtodayComExtractor,
7394
- WwwLemondeFrExtractor: WwwLemondeFrExtractor,
7395
- WwwPhoronixComExtractor: WwwPhoronixComExtractor,
7396
- PitchforkComExtractor: PitchforkComExtractor,
7397
- BiorxivOrgExtractor: BiorxivOrgExtractor,
7398
- EpaperZeitDeExtractor: EpaperZeitDeExtractor,
7399
- WwwLadbibleComExtractor: WwwLadbibleComExtractor,
7517
+ TerminaltroveComExtractor: TerminaltroveComExtractor,
7518
+ TheAtlanticExtractor: TheAtlanticExtractor,
7519
+ ThefederalistpapersOrgExtractor: ThefederalistpapersOrgExtractor,
7520
+ ThoughtcatalogComExtractor: ThoughtcatalogComExtractor,
7400
7521
  TimesofindiaIndiatimesComExtractor: TimesofindiaIndiatimesComExtractor,
7401
- MaTtiasBeExtractor: MaTtiasBeExtractor,
7402
- PastebinComExtractor: PastebinComExtractor,
7403
- WwwAbendblattDeExtractor: WwwAbendblattDeExtractor,
7404
- WwwGrueneDeExtractor: WwwGrueneDeExtractor,
7405
- ArstechnicaComExtractor: ArstechnicaComExtractor,
7406
- WwwNdtvComExtractor: WwwNdtvComExtractor,
7407
- SpektrumExtractor: SpektrumExtractor,
7408
- WwwInvestmentexecutiveComExtractor: WwwInvestmentexecutiveComExtractor,
7409
- WwwCbcCaExtractor: WwwCbcCaExtractor,
7410
- WwwVersantsComExtractor: WwwVersantsComExtractor,
7522
+ TldrTechExtractor: TldrTechExtractor,
7523
+ TwitterExtractor: TwitterExtractor,
7524
+ UproxxComExtractor: UproxxComExtractor,
7525
+ WccftechComExtractor: WccftechComExtractor,
7526
+ WeeklyAsciiJpExtractor: WeeklyAsciiJpExtractor,
7527
+ WikiaExtractor: WikiaExtractor,
7528
+ WikipediaExtractor: WikipediaExtractor,
7529
+ WiredExtractor: WiredExtractor,
7530
+ WiredJpExtractor: WiredJpExtractor,
7531
+ WroclawSePlExtractor: WroclawSePlExtractor,
7411
7532
  Www1pezeshkComExtractor: Www1pezeshkComExtractor,
7533
+ WwwAbendblattDeExtractor: WwwAbendblattDeExtractor,
7534
+ WwwAlComExtractor: WwwAlComExtractor,
7535
+ WwwAmericanowComExtractor: WwwAmericanowComExtractor,
7412
7536
  WwwAndroidauthorityComExtractor: WwwAndroidauthorityComExtractor,
7413
- TechcrunchComExtractor: TechcrunchComExtractor,
7414
- WwwHardwarezoneComSgExtractor: WwwHardwarezoneComSgExtractor,
7415
- WwwSpiegelDeExtractor: WwwSpiegelDeExtractor,
7416
- MobilesyrupComExtractor: MobilesyrupComExtractor,
7537
+ WwwAndroidcentralComExtractor: WwwAndroidcentralComExtractor,
7538
+ WwwAnimenewsnetworkComExtractor: WwwAnimenewsnetworkComExtractor,
7539
+ WwwAolComExtractor: WwwAolComExtractor,
7540
+ WwwAsahiComExtractor: WwwAsahiComExtractor,
7541
+ WwwBlickDeExtractor: WwwBlickDeExtractor,
7542
+ WwwBloombergComExtractor: WwwBloombergComExtractor,
7543
+ WwwBustleComExtractor: WwwBustleComExtractor,
7544
+ WwwCbcCaExtractor: WwwCbcCaExtractor,
7545
+ WwwCbssportsComExtractor: WwwCbssportsComExtractor,
7417
7546
  WwwChannelnewsasiaComExtractor: WwwChannelnewsasiaComExtractor,
7418
- WccftechComExtractor: WccftechComExtractor,
7547
+ WwwChicagotribuneComExtractor: WwwChicagotribuneComExtractor,
7548
+ WwwCnbcComExtractor: WwwCnbcComExtractor,
7549
+ WwwCnetComExtractor: WwwCnetComExtractor,
7550
+ WwwCnnComExtractor: WwwCnnComExtractor,
7551
+ WwwDmagazineComExtractor: WwwDmagazineComExtractor,
7552
+ WwwDwComExtractor: WwwDwComExtractor,
7553
+ WwwElecomCoJpExtractor: WwwElecomCoJpExtractor,
7554
+ WwwEngadgetComExtractor: WwwEngadgetComExtractor,
7555
+ WwwEonlineComExtractor: WwwEonlineComExtractor,
7556
+ WwwEuronewsComExtractor: WwwEuronewsComExtractor,
7557
+ WwwFastcompanyComExtractor: WwwFastcompanyComExtractor,
7558
+ WwwFlatpanelshdComExtractor: WwwFlatpanelshdComExtractor,
7559
+ WwwFoolComExtractor: WwwFoolComExtractor,
7560
+ WwwFortinetComExtractor: WwwFortinetComExtractor,
7561
+ WwwFrandroidComExtractor: WwwFrandroidComExtractor,
7562
+ WwwFuturaSciencesComExtractor: WwwFuturaSciencesComExtractor,
7563
+ WwwGizmodoJpExtractor: WwwGizmodoJpExtractor,
7564
+ WwwGrueneDeExtractor: WwwGrueneDeExtractor,
7565
+ WwwHardwarezoneComSgExtractor: WwwHardwarezoneComSgExtractor,
7419
7566
  WwwHeiseDeExtractor: WwwHeiseDeExtractor,
7420
- TldrTechExtractor: TldrTechExtractor,
7421
- BskyAppExtractor: BskyAppExtractor,
7422
- WwwNtvDeExtractor: WwwNtvDeExtractor,
7423
- SportSePlExtractor: SportSePlExtractor,
7424
- WwwSePlExtractor: WwwSePlExtractor,
7425
- PolitykaSePlExtractor: PolitykaSePlExtractor,
7426
- SuperserialeSePlExtractor: SuperserialeSePlExtractor,
7427
- SzczecinSePlExtractor: SzczecinSePlExtractor,
7428
- SuperbizSePlExtractor: SuperbizSePlExtractor,
7429
- PortalobronnySePlExtractor: PortalobronnySePlExtractor,
7430
- PolskisamorzadSePlExtractor: PolskisamorzadSePlExtractor,
7431
- LodzSePlExtractor: LodzSePlExtractor,
7432
- WroclawSePlExtractor: WroclawSePlExtractor,
7433
- LublinSePlExtractor: LublinSePlExtractor,
7434
- BialystokSePlExtractor: BialystokSePlExtractor,
7567
+ WwwHuffingtonpostComExtractor: WwwHuffingtonpostComExtractor,
7568
+ WwwIlfattoquotidianoItExtractor: WwwIlfattoquotidianoItExtractor,
7569
+ WwwInfoqComExtractor: WwwInfoqComExtractor,
7570
+ WwwInquisitrComExtractor: WwwInquisitrComExtractor,
7571
+ WwwInvestmentexecutiveComExtractor: WwwInvestmentexecutiveComExtractor,
7572
+ WwwIpaGoJpExtractor: WwwIpaGoJpExtractor,
7573
+ WwwItmediaCoJpExtractor: WwwItmediaCoJpExtractor,
7574
+ WwwJalopnikComExtractor: WwwJalopnikComExtractor,
7575
+ WwwJnsaOrgExtractor: WwwJnsaOrgExtractor,
7576
+ WwwLadbibleComExtractor: WwwLadbibleComExtractor,
7577
+ WwwLatimesComExtractor: WwwLatimesComExtractor,
7435
7578
  WwwLebensmittelwarnungDeExtractor: WwwLebensmittelwarnungDeExtractor,
7579
+ WwwLemondeFrExtractor: WwwLemondeFrExtractor,
7580
+ WwwLifehackerJpExtractor: WwwLifehackerJpExtractor,
7581
+ WwwLinkedinComExtractor: WwwLinkedinComExtractor,
7582
+ WwwMacrumorsComExtractor: WwwMacrumorsComExtractor,
7583
+ WwwMentalflossComExtractor: WwwMentalflossComExtractor,
7584
+ WwwMiamiheraldComExtractor: WwwMiamiheraldComExtractor,
7585
+ WwwMoongiftJpExtractor: WwwMoongiftJpExtractor,
7586
+ WwwMotorsportComExtractor: WwwMotorsportComExtractor,
7587
+ WwwMsnbcComExtractor: WwwMsnbcComExtractor,
7588
+ WwwNationalgeographicComExtractor: WwwNationalgeographicComExtractor,
7589
+ WwwNbcnewsComExtractor: WwwNbcnewsComExtractor,
7590
+ WwwNdtvComExtractor: WwwNdtvComExtractor,
7591
+ WwwNotebookcheckNetExtractor: WwwNotebookcheckNetExtractor,
7592
+ WwwNprOrgExtractor: WwwNprOrgExtractor,
7593
+ WwwNtvDeExtractor: WwwNtvDeExtractor,
7594
+ WwwNumeramaComExtractor: WwwNumeramaComExtractor,
7595
+ WwwNydailynewsComExtractor: WwwNydailynewsComExtractor,
7596
+ WwwOpposingviewsComExtractor: WwwOpposingviewsComExtractor,
7597
+ WwwOreillyCoJpExtractor: WwwOreillyCoJpExtractor,
7598
+ WwwOssnewsJpExtractor: WwwOssnewsJpExtractor,
7599
+ WwwPhoronixComExtractor: WwwPhoronixComExtractor,
7600
+ WwwPolygonComExtractor: WwwPolygonComExtractor,
7601
+ WwwPopsugarComExtractor: WwwPopsugarComExtractor,
7602
+ WwwProspectmagazineCoUkExtractor: WwwProspectmagazineCoUkExtractor,
7603
+ WwwPublickey1JpExtractor: WwwPublickey1JpExtractor,
7436
7604
  WwwQbitaiComExtractor: WwwQbitaiComExtractor,
7437
- EconomictimesIndiatimesComExtractor: EconomictimesIndiatimesComExtractor,
7438
- FactorioComExtractor: FactorioComExtractor,
7605
+ WwwQdailyComExtractor: WwwQdailyComExtractor,
7606
+ WwwRawstoryComExtractor: WwwRawstoryComExtractor,
7607
+ WwwRbbtodayComExtractor: WwwRbbtodayComExtractor,
7608
+ WwwRecodeNetExtractor: WwwRecodeNetExtractor,
7609
+ WwwRedditComExtractor: WwwRedditComExtractor,
7610
+ WwwRefinery29ComExtractor: WwwRefinery29ComExtractor,
7611
+ WwwReutersComExtractor: WwwReutersComExtractor,
7612
+ WwwRollingstoneComExtractor: WwwRollingstoneComExtractor,
7613
+ WwwSanwaCoJpExtractor: WwwSanwaCoJpExtractor,
7614
+ WwwSbnationComExtractor: WwwSbnationComExtractor,
7615
+ WwwSePlExtractor: WwwSePlExtractor,
7616
+ WwwSiComExtractor: WwwSiComExtractor,
7617
+ WwwSlateComExtractor: WwwSlateComExtractor,
7618
+ WwwSpiegelDeExtractor: WwwSpiegelDeExtractor,
7439
7619
  WwwTagesschauDeExtractor: WwwTagesschauDeExtractor,
7440
- Nineto5googleComExtractor: Nineto5googleComExtractor,
7441
- WwwEngadgetComExtractor: WwwEngadgetComExtractor,
7442
- TarnkappeInfoExtractor: TarnkappeInfoExtractor,
7443
- WwwVortezNetExtractor: WwwVortezNetExtractor,
7444
- WwwPolygonComExtractor: WwwPolygonComExtractor,
7445
- WwwThevergeComExtractor: WwwThevergeComExtractor,
7446
7620
  WwwTechpowerupComExtractor: WwwTechpowerupComExtractor,
7447
- WwwFlatpanelshdComExtractor: WwwFlatpanelshdComExtractor,
7448
- Nineto5macComExtractor: Nineto5macComExtractor,
7449
- WwwNotebookcheckNetExtractor: WwwNotebookcheckNetExtractor,
7450
- WwwFuturaSciencesComExtractor: WwwFuturaSciencesComExtractor,
7451
- SgNewsYahooComExtractor: SgNewsYahooComExtractor,
7452
- GonintendoComExtractor: GonintendoComExtractor,
7453
- OrfAtExtractor: OrfAtExtractor,
7454
- WwwVideogameschronicleComExtractor: WwwVideogameschronicleComExtractor,
7455
- WwwNumeramaComExtractor: WwwNumeramaComExtractor,
7456
- TerminaltroveComExtractor: TerminaltroveComExtractor,
7457
- NewsPtsOrgTwExtractor: NewsPtsOrgTwExtractor,
7458
7621
  WwwThedriveComExtractor: WwwThedriveComExtractor,
7459
- ChicagoyimbyComExtractor: ChicagoyimbyComExtractor,
7460
- WwwJalopnikComExtractor: WwwJalopnikComExtractor,
7461
- Nineto5linuxComExtractor: Nineto5linuxComExtractor,
7622
+ WwwTheguardianComExtractor: WwwTheguardianComExtractor,
7623
+ WwwThepennyhoarderComExtractor: WwwThepennyhoarderComExtractor,
7624
+ WwwThepoliticalinsiderComExtractor: WwwThepoliticalinsiderComExtractor,
7625
+ WwwThevergeComExtractor: WwwThevergeComExtractor,
7626
+ WwwTmzComExtractor: WwwTmzComExtractor,
7627
+ WwwTodayComExtractor: WwwTodayComExtractor,
7462
7628
  WwwTransfermarktDeExtractor: WwwTransfermarktDeExtractor,
7463
- WwwBlickDeExtractor: WwwBlickDeExtractor,
7464
- WwwEuronewsComExtractor: WwwEuronewsComExtractor,
7465
- GrEuronewsComExtractor: GrEuronewsComExtractor,
7466
- WwwIlfattoquotidianoItExtractor: WwwIlfattoquotidianoItExtractor
7629
+ WwwTweaktownComExtractor: WwwTweaktownComExtractor,
7630
+ WwwUsmagazineComExtractor: WwwUsmagazineComExtractor,
7631
+ WwwVersantsComExtractor: WwwVersantsComExtractor,
7632
+ WwwVideogameschronicleComExtractor: WwwVideogameschronicleComExtractor,
7633
+ WwwVortezNetExtractor: WwwVortezNetExtractor,
7634
+ WwwVoxComExtractor: WwwVoxComExtractor,
7635
+ WwwWashingtonpostComExtractor: WwwWashingtonpostComExtractor,
7636
+ WwwWesternjournalismComExtractor: WwwWesternjournalismComExtractor,
7637
+ WwwYomiuriCoJpExtractor: WwwYomiuriCoJpExtractor,
7638
+ WwwYoutubeComExtractor: WwwYoutubeComExtractor,
7639
+ YahooExtractor: YahooExtractor,
7640
+ twofortysevensportsComExtractor: twofortysevensportsComExtractor
7467
7641
  });
7468
7642
 
7469
- function ownKeys$5(e, r) { var t = _Object$keys__default["default"](e); if (_Object$getOwnPropertySymbols__default["default"]) { var o = _Object$getOwnPropertySymbols__default["default"](e); r && (o = o.filter(function (r) { return _Object$getOwnPropertyDescriptor__default["default"](e, r).enumerable; })), t.push.apply(t, o); } return t; }
7470
- function _objectSpread$5(e) { for (var r = 1; r < arguments.length; r++) { var t = null != arguments[r] ? arguments[r] : {}; r % 2 ? ownKeys$5(Object(t), !0).forEach(function (r) { _defineProperty__default["default"](e, r, t[r]); }) : _Object$getOwnPropertyDescriptors__default["default"] ? _Object$defineProperties__default["default"](e, _Object$getOwnPropertyDescriptors__default["default"](t)) : ownKeys$5(Object(t)).forEach(function (r) { _Object$defineProperty__default["default"](e, r, _Object$getOwnPropertyDescriptor__default["default"](t, r)); }); } return e; }
7471
- var Extractors = _Object$keys__default["default"](CustomExtractors).reduce(function (acc, key) {
7643
+ function ownKeys$5(e, r) { var t = _Object$keys(e); if (_Object$getOwnPropertySymbols) { var o = _Object$getOwnPropertySymbols(e); r && (o = o.filter(function (r) { return _Object$getOwnPropertyDescriptor(e, r).enumerable; })), t.push.apply(t, o); } return t; }
7644
+ function _objectSpread$5(e) { for (var r = 1; r < arguments.length; r++) { var t = null != arguments[r] ? arguments[r] : {}; r % 2 ? ownKeys$5(Object(t), true).forEach(function (r) { _defineProperty(e, r, t[r]); }) : _Object$getOwnPropertyDescriptors ? _Object$defineProperties(e, _Object$getOwnPropertyDescriptors(t)) : ownKeys$5(Object(t)).forEach(function (r) { _Object$defineProperty(e, r, _Object$getOwnPropertyDescriptor(t, r)); }); } return e; }
7645
+ var Extractors = _Object$keys(CustomExtractors).reduce(function (acc, key) {
7472
7646
  var extractor = CustomExtractors[key];
7473
7647
  return _objectSpread$5(_objectSpread$5({}, acc), mergeSupportedDomains(extractor));
7474
7648
  }, {});
@@ -7538,9 +7712,9 @@ function cleanDek(dek, _ref) {
7538
7712
  return normalizeSpaces(dekText.trim());
7539
7713
  }
7540
7714
 
7541
- dayjs__default["default"].extend(utc__default["default"]);
7542
- dayjs__default["default"].extend(timezonePlugin__default["default"]);
7543
- dayjs__default["default"].extend(customParseFormat__default["default"]);
7715
+ dayjs.extend(utc);
7716
+ dayjs.extend(timezonePlugin);
7717
+ dayjs.extend(customParseFormat);
7544
7718
  var TIMEZONE_ABBR_RE = /\b(EST|EDT|CST|CDT|MST|MDT|PST|PDT|ET|CT|MT|PT|GMT|UTC)\b/gi;
7545
7719
  // Check if string contains timezone offset info (e.g., +0000, GMT+0000, Z)
7546
7720
  var HAS_TIMEZONE_RE = /([+-]\d{2}:?\d{2}|Z|\bGMT[+-]\d+|\bUTC\b)/i;
@@ -7560,53 +7734,53 @@ function cleanDateString(dateString) {
7560
7734
  }
7561
7735
  function createDate(dateString, timezone, format) {
7562
7736
  if (TIME_WITH_OFFSET_RE.test(dateString)) {
7563
- return dayjs__default["default"](new Date(dateString));
7737
+ return dayjs(new Date(dateString));
7564
7738
  }
7565
7739
  if (TIME_AGO_STRING.test(dateString)) {
7566
7740
  var fragments = TIME_AGO_STRING.exec(dateString);
7567
- return dayjs__default["default"]().subtract(fragments[1], fragments[2]);
7741
+ return dayjs().subtract(fragments[1], fragments[2]);
7568
7742
  }
7569
7743
  if (TIME_NOW_STRING.test(dateString)) {
7570
- return dayjs__default["default"]();
7744
+ return dayjs();
7571
7745
  }
7572
7746
  var stringHasTimezone = hasTimezoneInfo(dateString);
7573
7747
  var cleanedDateString = stripTimezoneAbbr(dateString);
7574
7748
  if (stringHasTimezone) {
7575
7749
  var _nativeDate = new Date(dateString);
7576
- if (!_Number$isNaN__default["default"](_nativeDate.getTime())) {
7577
- return dayjs__default["default"](_nativeDate);
7750
+ if (!_Number$isNaN(_nativeDate.getTime())) {
7751
+ return dayjs(_nativeDate);
7578
7752
  }
7579
7753
  }
7580
7754
  if (timezone && !stringHasTimezone) {
7581
7755
  if (format) {
7582
7756
  var cleanedFormat = stripTimezoneFromFormat(format);
7583
7757
  try {
7584
- var _parsed = dayjs__default["default"].tz(cleanedDateString, cleanedFormat, timezone);
7758
+ var _parsed = dayjs.tz(cleanedDateString, cleanedFormat, timezone);
7585
7759
  if (_parsed.isValid()) return _parsed;
7586
7760
  } catch (_unused) {
7587
7761
  // Fall through
7588
7762
  }
7589
7763
  }
7590
7764
  var _nativeDate2 = new Date(cleanedDateString);
7591
- if (!_Number$isNaN__default["default"](_nativeDate2.getTime())) {
7592
- return dayjs__default["default"](_nativeDate2).tz(timezone, true);
7765
+ if (!_Number$isNaN(_nativeDate2.getTime())) {
7766
+ return dayjs(_nativeDate2).tz(timezone, true);
7593
7767
  }
7594
- var parsed = dayjs__default["default"](cleanedDateString);
7768
+ var parsed = dayjs(cleanedDateString);
7595
7769
  if (parsed.isValid()) {
7596
7770
  return parsed.tz(timezone, true);
7597
7771
  }
7598
- return dayjs__default["default"](null);
7772
+ return dayjs(null);
7599
7773
  }
7600
7774
  if (format) {
7601
7775
  var _cleanedFormat = stripTimezoneFromFormat(format);
7602
- var _parsed2 = dayjs__default["default"](cleanedDateString, _cleanedFormat);
7776
+ var _parsed2 = dayjs(cleanedDateString, _cleanedFormat);
7603
7777
  if (_parsed2.isValid()) return _parsed2;
7604
7778
  }
7605
7779
  var nativeDate = new Date(cleanedDateString);
7606
- if (!_Number$isNaN__default["default"](nativeDate.getTime())) {
7607
- return dayjs__default["default"](nativeDate);
7780
+ if (!_Number$isNaN(nativeDate.getTime())) {
7781
+ return dayjs(nativeDate);
7608
7782
  }
7609
- return dayjs__default["default"](cleanedDateString);
7783
+ return dayjs(cleanedDateString);
7610
7784
  }
7611
7785
 
7612
7786
  // Take a date published string, and hopefully return a date out of
@@ -7617,10 +7791,10 @@ function cleanDatePublished(dateString) {
7617
7791
  format = _ref.format;
7618
7792
  // If string is in milliseconds or seconds, convert to int and return
7619
7793
  if (MS_DATE_STRING.test(dateString)) {
7620
- return new Date(_parseInt__default["default"](dateString, 10)).toISOString();
7794
+ return new Date(_parseInt(dateString, 10)).toISOString();
7621
7795
  }
7622
7796
  if (SEC_DATE_STRING.test(dateString)) {
7623
- return new Date(_parseInt__default["default"](dateString, 10) * 1000).toISOString();
7797
+ return new Date(_parseInt(dateString, 10) * 1000).toISOString();
7624
7798
  }
7625
7799
  var date = createDate(dateString, timezone, format);
7626
7800
  if (!date.isValid()) {
@@ -7695,13 +7869,13 @@ function extractBreadcrumbTitle(splitTitle, text) {
7695
7869
  acc[titleText] = acc[titleText] ? acc[titleText] + 1 : 1;
7696
7870
  return acc;
7697
7871
  }, {});
7698
- var _Reflect$ownKeys$redu = _Reflect$ownKeys__default["default"](termCounts).reduce(function (acc, key) {
7872
+ var _Reflect$ownKeys$redu = _Reflect$ownKeys(termCounts).reduce(function (acc, key) {
7699
7873
  if (acc[1] < termCounts[key]) {
7700
7874
  return [key, termCounts[key]];
7701
7875
  }
7702
7876
  return acc;
7703
7877
  }, [0, 0]),
7704
- _Reflect$ownKeys$redu2 = _slicedToArray__default["default"](_Reflect$ownKeys$redu, 2),
7878
+ _Reflect$ownKeys$redu2 = _slicedToArray(_Reflect$ownKeys$redu, 2),
7705
7879
  maxTerm = _Reflect$ownKeys$redu2[0],
7706
7880
  termCount = _Reflect$ownKeys$redu2[1];
7707
7881
 
@@ -7730,16 +7904,16 @@ function cleanDomainFromTitle(splitTitle, url) {
7730
7904
  //
7731
7905
  // Strip out the big TLDs - it just makes the matching a bit more
7732
7906
  // accurate. Not the end of the world if it doesn't strip right.
7733
- var _URL$parse = URL__default["default"].parse(url),
7907
+ var _URL$parse = URL$1.parse(url),
7734
7908
  host = _URL$parse.host;
7735
7909
  var nakedDomain = host.replace(DOMAIN_ENDINGS_RE, '');
7736
7910
  var startSlug = splitTitle[0].toLowerCase().replace(' ', '');
7737
- var startSlugRatio = wuzzy__default["default"].levenshtein(startSlug, nakedDomain);
7911
+ var startSlugRatio = wuzzy.levenshtein(startSlug, nakedDomain);
7738
7912
  if (startSlugRatio > 0.4 && startSlug.length > 5) {
7739
7913
  return splitTitle.slice(2).join('');
7740
7914
  }
7741
7915
  var endSlug = splitTitle.slice(-1)[0].toLowerCase().replace(' ', '');
7742
- var endSlugRatio = wuzzy__default["default"].levenshtein(endSlug, nakedDomain);
7916
+ var endSlugRatio = wuzzy.levenshtein(endSlug, nakedDomain);
7743
7917
  if (endSlugRatio > 0.4 && endSlug.length >= 5) {
7744
7918
  return splitTitle.slice(0, -2).join('');
7745
7919
  }
@@ -7839,7 +8013,7 @@ function scoreContent($) {
7839
8013
  // First, look for special hNews based selectors and give them a big
7840
8014
  // boost, if they exist
7841
8015
  HNEWS_CONTENT_SELECTORS.forEach(function (_ref) {
7842
- var _ref2 = _slicedToArray__default["default"](_ref, 2),
8016
+ var _ref2 = _slicedToArray(_ref, 2),
7843
8017
  parentSelector = _ref2[0],
7844
8018
  childSelector = _ref2[1];
7845
8019
  $("".concat(parentSelector, " ").concat(childSelector)).each(function (index, node) {
@@ -7971,11 +8145,11 @@ function extractBestNode($, opts) {
7971
8145
  return $topCandidate;
7972
8146
  }
7973
8147
 
7974
- function _createForOfIteratorHelper$2(r, e) { var t = "undefined" != typeof _Symbol__default["default"] && r[_Symbol$iterator__default["default"]] || r["@@iterator"]; if (!t) { if (_Array$isArray__default["default"](r) || (t = _unsupportedIterableToArray$2(r)) || e && r && "number" == typeof r.length) { t && (r = t); var _n = 0, F = function F() {}; return { s: F, n: function n() { return _n >= r.length ? { done: !0 } : { done: !1, value: r[_n++] }; }, e: function e(r) { throw r; }, f: F }; } throw new TypeError("Invalid attempt to iterate non-iterable instance.\nIn order to be iterable, non-array objects must have a [Symbol.iterator]() method."); } var o, a = !0, u = !1; return { s: function s() { t = t.call(r); }, n: function n() { var r = t.next(); return a = r.done, r; }, e: function e(r) { u = !0, o = r; }, f: function f() { try { a || null == t["return"] || t["return"](); } finally { if (u) throw o; } } }; }
7975
- function _unsupportedIterableToArray$2(r, a) { if (r) { if ("string" == typeof r) return _arrayLikeToArray$2(r, a); var t = {}.toString.call(r).slice(8, -1); return "Object" === t && r.constructor && (t = r.constructor.name), "Map" === t || "Set" === t ? _Array$from__default["default"](r) : "Arguments" === t || /^(?:Ui|I)nt(?:8|16|32)(?:Clamped)?Array$/.test(t) ? _arrayLikeToArray$2(r, a) : void 0; } }
8148
+ function _createForOfIteratorHelper$2(r, e) { var t = "undefined" != typeof _Symbol && r[_Symbol$iterator] || r["@@iterator"]; if (!t) { if (_Array$isArray(r) || (t = _unsupportedIterableToArray$2(r)) || e) { t && (r = t); var _n = 0, F = function F() {}; return { s: F, n: function n() { return _n >= r.length ? { done: true } : { done: false, value: r[_n++] }; }, e: function e(r) { throw r; }, f: F }; } throw new TypeError("Invalid attempt to iterate non-iterable instance.\nIn order to be iterable, non-array objects must have a [Symbol.iterator]() method."); } var o, a = true, u = false; return { s: function s() { t = t.call(r); }, n: function n() { var r = t.next(); return a = r.done, r; }, e: function e(r) { u = true, o = r; }, f: function f() { try { a || null == t["return"] || t["return"](); } finally { if (u) throw o; } } }; }
8149
+ function _unsupportedIterableToArray$2(r, a) { if (r) { if ("string" == typeof r) return _arrayLikeToArray$2(r, a); var t = {}.toString.call(r).slice(8, -1); return "Object" === t && r.constructor && (t = r.constructor.name), "Map" === t || "Set" === t ? _Array$from(r) : "Arguments" === t || /^(?:Ui|I)nt(?:8|16|32)(?:Clamped)?Array$/.test(t) ? _arrayLikeToArray$2(r, a) : void 0; } }
7976
8150
  function _arrayLikeToArray$2(r, a) { (null == a || a > r.length) && (a = r.length); for (var e = 0, n = Array(a); e < a; e++) n[e] = r[e]; return n; }
7977
- function ownKeys$4(e, r) { var t = _Object$keys__default["default"](e); if (_Object$getOwnPropertySymbols__default["default"]) { var o = _Object$getOwnPropertySymbols__default["default"](e); r && (o = o.filter(function (r) { return _Object$getOwnPropertyDescriptor__default["default"](e, r).enumerable; })), t.push.apply(t, o); } return t; }
7978
- function _objectSpread$4(e) { for (var r = 1; r < arguments.length; r++) { var t = null != arguments[r] ? arguments[r] : {}; r % 2 ? ownKeys$4(Object(t), !0).forEach(function (r) { _defineProperty__default["default"](e, r, t[r]); }) : _Object$getOwnPropertyDescriptors__default["default"] ? _Object$defineProperties__default["default"](e, _Object$getOwnPropertyDescriptors__default["default"](t)) : ownKeys$4(Object(t)).forEach(function (r) { _Object$defineProperty__default["default"](e, r, _Object$getOwnPropertyDescriptor__default["default"](t, r)); }); } return e; }
8151
+ function ownKeys$4(e, r) { var t = _Object$keys(e); if (_Object$getOwnPropertySymbols) { var o = _Object$getOwnPropertySymbols(e); r && (o = o.filter(function (r) { return _Object$getOwnPropertyDescriptor(e, r).enumerable; })), t.push.apply(t, o); } return t; }
8152
+ function _objectSpread$4(e) { for (var r = 1; r < arguments.length; r++) { var t = null != arguments[r] ? arguments[r] : {}; r % 2 ? ownKeys$4(Object(t), true).forEach(function (r) { _defineProperty(e, r, t[r]); }) : _Object$getOwnPropertyDescriptors ? _Object$defineProperties(e, _Object$getOwnPropertyDescriptors(t)) : ownKeys$4(Object(t)).forEach(function (r) { _Object$defineProperty(e, r, _Object$getOwnPropertyDescriptor(t, r)); }); } return e; }
7979
8153
  var GenericContentExtractor = {
7980
8154
  defaultOpts: {
7981
8155
  stripUnlikelyCandidates: true,
@@ -8018,8 +8192,7 @@ var GenericContentExtractor = {
8018
8192
 
8019
8193
  // We didn't succeed on first pass, one by one disable our
8020
8194
  // extraction opts and try again.
8021
- // eslint-disable-next-line no-restricted-syntax
8022
- var _iterator = _createForOfIteratorHelper$2(_Reflect$ownKeys__default["default"](opts).filter(function (k) {
8195
+ var _iterator = _createForOfIteratorHelper$2(_Reflect$ownKeys(opts).filter(function (k) {
8023
8196
  return opts[k] === true;
8024
8197
  })),
8025
8198
  _step;
@@ -8147,8 +8320,8 @@ var AUTHOR_SELECTORS = ['.entry .entry-author', '.author.vcard .fn', '.author .v
8147
8320
  var bylineRe = /^[\n\s]*By/i;
8148
8321
  var BYLINE_SELECTORS_RE = [['#byline', bylineRe], ['.byline', bylineRe]];
8149
8322
 
8150
- function _createForOfIteratorHelper$1(r, e) { var t = "undefined" != typeof _Symbol__default["default"] && r[_Symbol$iterator__default["default"]] || r["@@iterator"]; if (!t) { if (_Array$isArray__default["default"](r) || (t = _unsupportedIterableToArray$1(r)) || e && r && "number" == typeof r.length) { t && (r = t); var _n = 0, F = function F() {}; return { s: F, n: function n() { return _n >= r.length ? { done: !0 } : { done: !1, value: r[_n++] }; }, e: function e(r) { throw r; }, f: F }; } throw new TypeError("Invalid attempt to iterate non-iterable instance.\nIn order to be iterable, non-array objects must have a [Symbol.iterator]() method."); } var o, a = !0, u = !1; return { s: function s() { t = t.call(r); }, n: function n() { var r = t.next(); return a = r.done, r; }, e: function e(r) { u = !0, o = r; }, f: function f() { try { a || null == t["return"] || t["return"](); } finally { if (u) throw o; } } }; }
8151
- function _unsupportedIterableToArray$1(r, a) { if (r) { if ("string" == typeof r) return _arrayLikeToArray$1(r, a); var t = {}.toString.call(r).slice(8, -1); return "Object" === t && r.constructor && (t = r.constructor.name), "Map" === t || "Set" === t ? _Array$from__default["default"](r) : "Arguments" === t || /^(?:Ui|I)nt(?:8|16|32)(?:Clamped)?Array$/.test(t) ? _arrayLikeToArray$1(r, a) : void 0; } }
8323
+ function _createForOfIteratorHelper$1(r, e) { var t = "undefined" != typeof _Symbol && r[_Symbol$iterator] || r["@@iterator"]; if (!t) { if (_Array$isArray(r) || (t = _unsupportedIterableToArray$1(r)) || e) { t && (r = t); var _n = 0, F = function F() {}; return { s: F, n: function n() { return _n >= r.length ? { done: true } : { done: false, value: r[_n++] }; }, e: function e(r) { throw r; }, f: F }; } throw new TypeError("Invalid attempt to iterate non-iterable instance.\nIn order to be iterable, non-array objects must have a [Symbol.iterator]() method."); } var o, a = true, u = false; return { s: function s() { t = t.call(r); }, n: function n() { var r = t.next(); return a = r.done, r; }, e: function e(r) { u = true, o = r; }, f: function f() { try { a || null == t["return"] || t["return"](); } finally { if (u) throw o; } } }; }
8324
+ function _unsupportedIterableToArray$1(r, a) { if (r) { if ("string" == typeof r) return _arrayLikeToArray$1(r, a); var t = {}.toString.call(r).slice(8, -1); return "Object" === t && r.constructor && (t = r.constructor.name), "Map" === t || "Set" === t ? _Array$from(r) : "Arguments" === t || /^(?:Ui|I)nt(?:8|16|32)(?:Clamped)?Array$/.test(t) ? _arrayLikeToArray$1(r, a) : void 0; } }
8152
8325
  function _arrayLikeToArray$1(r, a) { (null == a || a > r.length) && (a = r.length); for (var e = 0, n = Array(a); e < a; e++) n[e] = r[e]; return n; }
8153
8326
  var GenericAuthorExtractor = {
8154
8327
  extract: function extract(_ref) {
@@ -8171,12 +8344,11 @@ var GenericAuthorExtractor = {
8171
8344
 
8172
8345
  // Last, use our looser regular-expression based selectors for
8173
8346
  // potential authors.
8174
- // eslint-disable-next-line no-restricted-syntax
8175
8347
  var _iterator = _createForOfIteratorHelper$1(BYLINE_SELECTORS_RE),
8176
8348
  _step;
8177
8349
  try {
8178
8350
  for (_iterator.s(); !(_step = _iterator.n()).done;) {
8179
- var _step$value = _slicedToArray__default["default"](_step.value, 2),
8351
+ var _step$value = _slicedToArray(_step.value, 2),
8180
8352
  selector = _step$value[0],
8181
8353
  regex = _step$value[1];
8182
8354
  var node = $(selector);
@@ -8333,8 +8505,8 @@ function scoreBySibling($img) {
8333
8505
  }
8334
8506
  function scoreByDimensions($img) {
8335
8507
  var score = 0;
8336
- var width = _parseFloat__default["default"]($img.attr('width'));
8337
- var height = _parseFloat__default["default"]($img.attr('height'));
8508
+ var width = _parseFloat($img.attr('width'));
8509
+ var height = _parseFloat($img.attr('height'));
8338
8510
  var src = $img.attr('src');
8339
8511
 
8340
8512
  // Penalty for skinny images
@@ -8361,8 +8533,8 @@ function scoreByPosition($imgs, index) {
8361
8533
  return $imgs.length / 2 - index;
8362
8534
  }
8363
8535
 
8364
- function _createForOfIteratorHelper(r, e) { var t = "undefined" != typeof _Symbol__default["default"] && r[_Symbol$iterator__default["default"]] || r["@@iterator"]; if (!t) { if (_Array$isArray__default["default"](r) || (t = _unsupportedIterableToArray(r)) || e && r && "number" == typeof r.length) { t && (r = t); var _n = 0, F = function F() {}; return { s: F, n: function n() { return _n >= r.length ? { done: !0 } : { done: !1, value: r[_n++] }; }, e: function e(r) { throw r; }, f: F }; } throw new TypeError("Invalid attempt to iterate non-iterable instance.\nIn order to be iterable, non-array objects must have a [Symbol.iterator]() method."); } var o, a = !0, u = !1; return { s: function s() { t = t.call(r); }, n: function n() { var r = t.next(); return a = r.done, r; }, e: function e(r) { u = !0, o = r; }, f: function f() { try { a || null == t["return"] || t["return"](); } finally { if (u) throw o; } } }; }
8365
- function _unsupportedIterableToArray(r, a) { if (r) { if ("string" == typeof r) return _arrayLikeToArray(r, a); var t = {}.toString.call(r).slice(8, -1); return "Object" === t && r.constructor && (t = r.constructor.name), "Map" === t || "Set" === t ? _Array$from__default["default"](r) : "Arguments" === t || /^(?:Ui|I)nt(?:8|16|32)(?:Clamped)?Array$/.test(t) ? _arrayLikeToArray(r, a) : void 0; } }
8536
+ function _createForOfIteratorHelper(r, e) { var t = "undefined" != typeof _Symbol && r[_Symbol$iterator] || r["@@iterator"]; if (!t) { if (_Array$isArray(r) || (t = _unsupportedIterableToArray(r)) || e) { t && (r = t); var _n = 0, F = function F() {}; return { s: F, n: function n() { return _n >= r.length ? { done: true } : { done: false, value: r[_n++] }; }, e: function e(r) { throw r; }, f: F }; } throw new TypeError("Invalid attempt to iterate non-iterable instance.\nIn order to be iterable, non-array objects must have a [Symbol.iterator]() method."); } var o, a = true, u = false; return { s: function s() { t = t.call(r); }, n: function n() { var r = t.next(); return a = r.done, r; }, e: function e(r) { u = true, o = r; }, f: function f() { try { a || null == t["return"] || t["return"](); } finally { if (u) throw o; } } }; }
8537
+ function _unsupportedIterableToArray(r, a) { if (r) { if ("string" == typeof r) return _arrayLikeToArray(r, a); var t = {}.toString.call(r).slice(8, -1); return "Object" === t && r.constructor && (t = r.constructor.name), "Map" === t || "Set" === t ? _Array$from(r) : "Arguments" === t || /^(?:Ui|I)nt(?:8|16|32)(?:Clamped)?Array$/.test(t) ? _arrayLikeToArray(r, a) : void 0; } }
8366
8538
  function _arrayLikeToArray(r, a) { (null == a || a > r.length) && (a = r.length); for (var e = 0, n = Array(a); e < a; e++) n[e] = r[e]; return n; }
8367
8539
 
8368
8540
  // Given a resource, try to find the lead image URL from within
@@ -8412,10 +8584,10 @@ var GenericLeadImageUrlExtractor = {
8412
8584
  score += scoreByPosition(imgs, index);
8413
8585
  imgScores[src] = score;
8414
8586
  });
8415
- var _Reflect$ownKeys$redu = _Reflect$ownKeys__default["default"](imgScores).reduce(function (acc, key) {
8587
+ var _Reflect$ownKeys$redu = _Reflect$ownKeys(imgScores).reduce(function (acc, key) {
8416
8588
  return imgScores[key] > acc[1] ? [key, imgScores[key]] : acc;
8417
8589
  }, [null, 0]),
8418
- _Reflect$ownKeys$redu2 = _slicedToArray__default["default"](_Reflect$ownKeys$redu, 2),
8590
+ _Reflect$ownKeys$redu2 = _slicedToArray(_Reflect$ownKeys$redu, 2),
8419
8591
  topUrl = _Reflect$ownKeys$redu2[0],
8420
8592
  topScore = _Reflect$ownKeys$redu2[1];
8421
8593
  if (topScore > 0) {
@@ -8425,7 +8597,6 @@ var GenericLeadImageUrlExtractor = {
8425
8597
 
8426
8598
  // If nothing else worked, check to see if there are any really
8427
8599
  // probable nodes in the doc, like <link rel="image_src" />.
8428
- // eslint-disable-next-line no-restricted-syntax
8429
8600
  var _iterator = _createForOfIteratorHelper(LEAD_IMAGE_URL_SELECTORS),
8430
8601
  _step;
8431
8602
  try {
@@ -8464,7 +8635,7 @@ function scoreSimilarity(score, articleUrl, href) {
8464
8635
  // sliding scale, subtract points from this link based on
8465
8636
  // similarity.
8466
8637
  if (score > 0) {
8467
- var similarity = new difflib__default["default"].SequenceMatcher(null, articleUrl, href).ratio();
8638
+ var similarity = new difflib.SequenceMatcher(null, articleUrl, href).ratio();
8468
8639
  // Subtract .1 from diff_percent when calculating modifier,
8469
8640
  // which means that if it's less than 10% different, we give a
8470
8641
  // bonus instead. Ex:
@@ -8485,7 +8656,7 @@ function scoreLinkText(linkText, pageNum) {
8485
8656
  // get scored, and sorted properly by score.
8486
8657
  var score = 0;
8487
8658
  if (IS_DIGIT_RE.test(linkText.trim())) {
8488
- var linkTextAsNum = _parseInt__default["default"](linkText, 10);
8659
+ var linkTextAsNum = _parseInt(linkText, 10);
8489
8660
  // If it's the first page, we already got it on the first call.
8490
8661
  // Give it a negative score. Otherwise, up to page 10, give a
8491
8662
  // small bonus.
@@ -8554,7 +8725,7 @@ function scoreByParents($link) {
8554
8725
  var positiveMatch = false;
8555
8726
  var negativeMatch = false;
8556
8727
  var score = 0;
8557
- _Array$from__default["default"](range(0, 4)).forEach(function () {
8728
+ _Array$from(range(0, 4)).forEach(function () {
8558
8729
  if ($parent.length === 0) {
8559
8730
  return;
8560
8731
  }
@@ -8604,7 +8775,7 @@ function shouldScore(href, articleUrl, baseUrl, parsedUrl, linkText, previousUrl
8604
8775
  return false;
8605
8776
  }
8606
8777
  var hostname = parsedUrl.hostname;
8607
- var _URL$parse = URL__default["default"].parse(href),
8778
+ var _URL$parse = URL$1.parse(href),
8608
8779
  linkHost = _URL$parse.hostname;
8609
8780
 
8610
8781
  // Domain mismatch.
@@ -8679,7 +8850,7 @@ function scoreLinks(_ref) {
8679
8850
  $ = _ref.$,
8680
8851
  _ref$previousUrls = _ref.previousUrls,
8681
8852
  previousUrls = _ref$previousUrls === void 0 ? [] : _ref$previousUrls;
8682
- parsedUrl = parsedUrl || URL__default["default"].parse(articleUrl);
8853
+ parsedUrl = parsedUrl || URL$1.parse(articleUrl);
8683
8854
  var baseRegex = makeBaseRegex(baseUrl);
8684
8855
  var isWp = isWordpress($);
8685
8856
 
@@ -8730,7 +8901,7 @@ function scoreLinks(_ref) {
8730
8901
  possiblePage.score = score;
8731
8902
  return possiblePages;
8732
8903
  }, {});
8733
- return _Reflect$ownKeys__default["default"](scoredPages).length === 0 ? null : scoredPages;
8904
+ return _Reflect$ownKeys(scoredPages).length === 0 ? null : scoredPages;
8734
8905
  }
8735
8906
 
8736
8907
  // Looks for and returns next page url
@@ -8742,7 +8913,7 @@ var GenericNextPageUrlExtractor = {
8742
8913
  parsedUrl = _ref.parsedUrl,
8743
8914
  _ref$previousUrls = _ref.previousUrls,
8744
8915
  previousUrls = _ref$previousUrls === void 0 ? [] : _ref$previousUrls;
8745
- parsedUrl = parsedUrl || URL__default["default"].parse(url);
8916
+ parsedUrl = parsedUrl || URL$1.parse(url);
8746
8917
  var articleUrl = removeAnchor(url);
8747
8918
  var baseUrl = articleBaseUrl(url, parsedUrl);
8748
8919
  var links = $('a[href]').toArray();
@@ -8760,7 +8931,7 @@ var GenericNextPageUrlExtractor = {
8760
8931
 
8761
8932
  // now that we've scored all possible pages,
8762
8933
  // find the biggest one.
8763
- var topPage = _Reflect$ownKeys__default["default"](scoredLinks).reduce(function (acc, link) {
8934
+ var topPage = _Reflect$ownKeys(scoredLinks).reduce(function (acc, link) {
8764
8935
  var scoredLink = scoredLinks[link];
8765
8936
  return scoredLink.score > acc.score ? scoredLink : acc;
8766
8937
  }, {
@@ -8779,7 +8950,7 @@ var GenericNextPageUrlExtractor = {
8779
8950
  var CANONICAL_META_SELECTORS = ['og:url'];
8780
8951
 
8781
8952
  function parseDomain(url) {
8782
- var parsedUrl = URL__default["default"].parse(url);
8953
+ var parsedUrl = URL$1.parse(url);
8783
8954
  var hostname = parsedUrl.hostname;
8784
8955
  return hostname;
8785
8956
  }
@@ -8850,7 +9021,7 @@ var ellipsize$1 = (function (str, max, opts) {
8850
9021
  if (typeof str !== 'string' || str.length === 0) return '';
8851
9022
  if (max === 0) return '';
8852
9023
  opts = opts || {};
8853
- _Object$keys__default["default"](defaults).forEach(function (key) {
9024
+ _Object$keys(defaults).forEach(function (key) {
8854
9025
  if (opts[key] === null || typeof opts[key] === 'undefined') {
8855
9026
  opts[key] = defaults[key];
8856
9027
  }
@@ -8906,8 +9077,8 @@ var GenericWordCountExtractor = {
8906
9077
  }
8907
9078
  };
8908
9079
 
8909
- function ownKeys$3(e, r) { var t = _Object$keys__default["default"](e); if (_Object$getOwnPropertySymbols__default["default"]) { var o = _Object$getOwnPropertySymbols__default["default"](e); r && (o = o.filter(function (r) { return _Object$getOwnPropertyDescriptor__default["default"](e, r).enumerable; })), t.push.apply(t, o); } return t; }
8910
- function _objectSpread$3(e) { for (var r = 1; r < arguments.length; r++) { var t = null != arguments[r] ? arguments[r] : {}; r % 2 ? ownKeys$3(Object(t), !0).forEach(function (r) { _defineProperty__default["default"](e, r, t[r]); }) : _Object$getOwnPropertyDescriptors__default["default"] ? _Object$defineProperties__default["default"](e, _Object$getOwnPropertyDescriptors__default["default"](t)) : ownKeys$3(Object(t)).forEach(function (r) { _Object$defineProperty__default["default"](e, r, _Object$getOwnPropertyDescriptor__default["default"](t, r)); }); } return e; }
9080
+ function ownKeys$3(e, r) { var t = _Object$keys(e); if (_Object$getOwnPropertySymbols) { var o = _Object$getOwnPropertySymbols(e); r && (o = o.filter(function (r) { return _Object$getOwnPropertyDescriptor(e, r).enumerable; })), t.push.apply(t, o); } return t; }
9081
+ function _objectSpread$3(e) { for (var r = 1; r < arguments.length; r++) { var t = null != arguments[r] ? arguments[r] : {}; r % 2 ? ownKeys$3(Object(t), true).forEach(function (r) { _defineProperty(e, r, t[r]); }) : _Object$getOwnPropertyDescriptors ? _Object$defineProperties(e, _Object$getOwnPropertyDescriptors(t)) : ownKeys$3(Object(t)).forEach(function (r) { _Object$defineProperty(e, r, _Object$getOwnPropertyDescriptor(t, r)); }); } return e; }
8911
9082
  var GenericExtractor = {
8912
9083
  // This extractor is the default for all domains
8913
9084
  domain: '*',
@@ -8923,7 +9094,7 @@ var GenericExtractor = {
8923
9094
  word_count: GenericWordCountExtractor.extract,
8924
9095
  direction: function direction(_ref) {
8925
9096
  var title = _ref.title;
8926
- return stringDirection__default["default"].getDirection(title);
9097
+ return stringDirection.getDirection(title);
8927
9098
  },
8928
9099
  extract: function extract(options) {
8929
9100
  var html = options.html,
@@ -8979,22 +9150,22 @@ var Detectors = {
8979
9150
  'meta[name="generator"][value="blogger"]': BloggerExtractor
8980
9151
  };
8981
9152
  function detectByHtml($) {
8982
- var selector = _Reflect$ownKeys__default["default"](Detectors).find(function (s) {
9153
+ var selector = _Reflect$ownKeys(Detectors).find(function (s) {
8983
9154
  return $(s).length > 0;
8984
9155
  });
8985
9156
  return Detectors[selector];
8986
9157
  }
8987
9158
 
8988
9159
  function getExtractor(url, parsedUrl, $) {
8989
- parsedUrl = parsedUrl || URL__default["default"].parse(url);
9160
+ parsedUrl = parsedUrl || URL$1.parse(url);
8990
9161
  var _parsedUrl = parsedUrl,
8991
9162
  hostname = _parsedUrl.hostname;
8992
9163
  var baseDomain = hostname.split('.').slice(-2).join('.');
8993
9164
  return apiExtractors[hostname] || apiExtractors[baseDomain] || Extractors[hostname] || Extractors[baseDomain] || detectByHtml($) || GenericExtractor;
8994
9165
  }
8995
9166
 
8996
- function ownKeys$2(e, r) { var t = _Object$keys__default["default"](e); if (_Object$getOwnPropertySymbols__default["default"]) { var o = _Object$getOwnPropertySymbols__default["default"](e); r && (o = o.filter(function (r) { return _Object$getOwnPropertyDescriptor__default["default"](e, r).enumerable; })), t.push.apply(t, o); } return t; }
8997
- function _objectSpread$2(e) { for (var r = 1; r < arguments.length; r++) { var t = null != arguments[r] ? arguments[r] : {}; r % 2 ? ownKeys$2(Object(t), !0).forEach(function (r) { _defineProperty__default["default"](e, r, t[r]); }) : _Object$getOwnPropertyDescriptors__default["default"] ? _Object$defineProperties__default["default"](e, _Object$getOwnPropertyDescriptors__default["default"](t)) : ownKeys$2(Object(t)).forEach(function (r) { _Object$defineProperty__default["default"](e, r, _Object$getOwnPropertyDescriptor__default["default"](t, r)); }); } return e; }
9167
+ function ownKeys$2(e, r) { var t = _Object$keys(e); if (_Object$getOwnPropertySymbols) { var o = _Object$getOwnPropertySymbols(e); r && (o = o.filter(function (r) { return _Object$getOwnPropertyDescriptor(e, r).enumerable; })), t.push.apply(t, o); } return t; }
9168
+ function _objectSpread$2(e) { for (var r = 1; r < arguments.length; r++) { var t = null != arguments[r] ? arguments[r] : {}; r % 2 ? ownKeys$2(Object(t), true).forEach(function (r) { _defineProperty(e, r, t[r]); }) : _Object$getOwnPropertyDescriptors ? _Object$defineProperties(e, _Object$getOwnPropertyDescriptors(t)) : ownKeys$2(Object(t)).forEach(function (r) { _Object$defineProperty(e, r, _Object$getOwnPropertyDescriptor(t, r)); }); } return e; }
8998
9169
 
8999
9170
  // Remove elements by an array of selectors
9000
9171
  function cleanBySelectors($content, $, _ref) {
@@ -9008,7 +9179,7 @@ function cleanBySelectors($content, $, _ref) {
9008
9179
  function transformElements($content, $, _ref2) {
9009
9180
  var transforms = _ref2.transforms;
9010
9181
  if (!transforms) return $content;
9011
- _Reflect$ownKeys__default["default"](transforms).forEach(function (key) {
9182
+ _Reflect$ownKeys(transforms).forEach(function (key) {
9012
9183
  var $matches = $(key, $content);
9013
9184
  var value = transforms[key];
9014
9185
 
@@ -9032,13 +9203,13 @@ function transformElements($content, $, _ref2) {
9032
9203
  }
9033
9204
  function findMatchingSelector($, selectors, extractHtml, allowMultiple) {
9034
9205
  return selectors.find(function (selector) {
9035
- if (_Array$isArray__default["default"](selector)) {
9206
+ if (_Array$isArray(selector)) {
9036
9207
  if (extractHtml) {
9037
9208
  return selector.reduce(function (acc, s) {
9038
9209
  return acc && $(s).length > 0;
9039
9210
  }, true);
9040
9211
  }
9041
- var _selector = _slicedToArray__default["default"](selector, 2),
9212
+ var _selector = _slicedToArray(selector, 2),
9042
9213
  s = _selector[0],
9043
9214
  attr = _selector[1];
9044
9215
  return (allowMultiple || !allowMultiple && $(s).length === 1) && $(s).attr(attr) && $(s).attr(attr).trim() !== '';
@@ -9080,7 +9251,7 @@ function select(opts) {
9080
9251
  // multi-match selection, which allows the parser to choose several
9081
9252
  // selectors to include in the result. Note that all selectors in the
9082
9253
  // array must match in order for this selector to trigger
9083
- if (_Array$isArray__default["default"](matchingSelector)) {
9254
+ if (_Array$isArray(matchingSelector)) {
9084
9255
  $content = $(matchingSelector.join(','));
9085
9256
  var $wrapper = $('<div></div>');
9086
9257
  $content.each(function (_, element) {
@@ -9114,8 +9285,8 @@ function select(opts) {
9114
9285
  var result;
9115
9286
  // if selector is an array (e.g., ['img', 'src']),
9116
9287
  // extract the attr
9117
- if (_Array$isArray__default["default"](matchingSelector)) {
9118
- var _matchingSelector = _slicedToArray__default["default"](matchingSelector, 3),
9288
+ if (_Array$isArray(matchingSelector)) {
9289
+ var _matchingSelector = _slicedToArray(matchingSelector, 3),
9119
9290
  selector = _matchingSelector[0],
9120
9291
  attr = _matchingSelector[1],
9121
9292
  transform = _matchingSelector[2];
@@ -9132,7 +9303,7 @@ function select(opts) {
9132
9303
  return $(el).text().trim();
9133
9304
  });
9134
9305
  }
9135
- result = _Array$isArray__default["default"](result.toArray()) && allowMultiple ? result.toArray() : result[0];
9306
+ result = _Array$isArray(result.toArray()) && allowMultiple ? result.toArray() : result[0];
9136
9307
  // Allow custom extractor to skip default cleaner
9137
9308
  // for this type; defaults to true
9138
9309
  if (defaultCleaner && Cleaners[type]) {
@@ -9142,7 +9313,7 @@ function select(opts) {
9142
9313
  }
9143
9314
  function selectExtendedTypes(extend, opts) {
9144
9315
  var results = {};
9145
- _Reflect$ownKeys__default["default"](extend).forEach(function (t) {
9316
+ _Reflect$ownKeys(extend).forEach(function (t) {
9146
9317
  if (!results[t]) {
9147
9318
  results[t] = select(_objectSpread$2(_objectSpread$2({}, opts), {}, {
9148
9319
  type: t,
@@ -9260,15 +9431,15 @@ var RootExtractor = {
9260
9431
  }
9261
9432
  };
9262
9433
 
9263
- function ownKeys$1(e, r) { var t = _Object$keys__default["default"](e); if (_Object$getOwnPropertySymbols__default["default"]) { var o = _Object$getOwnPropertySymbols__default["default"](e); r && (o = o.filter(function (r) { return _Object$getOwnPropertyDescriptor__default["default"](e, r).enumerable; })), t.push.apply(t, o); } return t; }
9264
- function _objectSpread$1(e) { for (var r = 1; r < arguments.length; r++) { var t = null != arguments[r] ? arguments[r] : {}; r % 2 ? ownKeys$1(Object(t), !0).forEach(function (r) { _defineProperty__default["default"](e, r, t[r]); }) : _Object$getOwnPropertyDescriptors__default["default"] ? _Object$defineProperties__default["default"](e, _Object$getOwnPropertyDescriptors__default["default"](t)) : ownKeys$1(Object(t)).forEach(function (r) { _Object$defineProperty__default["default"](e, r, _Object$getOwnPropertyDescriptor__default["default"](t, r)); }); } return e; }
9434
+ function ownKeys$1(e, r) { var t = _Object$keys(e); if (_Object$getOwnPropertySymbols) { var o = _Object$getOwnPropertySymbols(e); r && (o = o.filter(function (r) { return _Object$getOwnPropertyDescriptor(e, r).enumerable; })), t.push.apply(t, o); } return t; }
9435
+ function _objectSpread$1(e) { for (var r = 1; r < arguments.length; r++) { var t = null != arguments[r] ? arguments[r] : {}; r % 2 ? ownKeys$1(Object(t), true).forEach(function (r) { _defineProperty(e, r, t[r]); }) : _Object$getOwnPropertyDescriptors ? _Object$defineProperties(e, _Object$getOwnPropertyDescriptors(t)) : ownKeys$1(Object(t)).forEach(function (r) { _Object$defineProperty(e, r, _Object$getOwnPropertyDescriptor(t, r)); }); } return e; }
9265
9436
  function collectAllPages(_x) {
9266
9437
  return _collectAllPages.apply(this, arguments);
9267
9438
  }
9268
9439
  function _collectAllPages() {
9269
- _collectAllPages = _asyncToGenerator__default["default"](/*#__PURE__*/_regeneratorRuntime__default["default"].mark(function _callee(_ref) {
9440
+ _collectAllPages = _asyncToGenerator(/*#__PURE__*/_regeneratorRuntime.mark(function _callee(_ref) {
9270
9441
  var next_page_url, html, $, metaCache, result, Extractor, title, url, pages, previousUrls, extractorOpts, nextPageResult, word_count;
9271
- return _regeneratorRuntime__default["default"].wrap(function (_context) {
9442
+ return _regeneratorRuntime.wrap(function (_context) {
9272
9443
  while (1) switch (_context.prev = _context.next) {
9273
9444
  case 0:
9274
9445
  next_page_url = _ref.next_page_url, html = _ref.html, $ = _ref.$, metaCache = _ref.metaCache, result = _ref.result, Extractor = _ref.Extractor, title = _ref.title, url = _ref.url;
@@ -9282,7 +9453,6 @@ function _collectAllPages() {
9282
9453
  break;
9283
9454
  }
9284
9455
  pages += 1;
9285
- // eslint-disable-next-line no-await-in-loop
9286
9456
  _context.next = 2;
9287
9457
  return Resource.create(next_page_url);
9288
9458
  case 2:
@@ -9323,17 +9493,17 @@ function _collectAllPages() {
9323
9493
  }
9324
9494
 
9325
9495
  var _excluded = ["html"];
9326
- function ownKeys(e, r) { var t = _Object$keys__default["default"](e); if (_Object$getOwnPropertySymbols__default["default"]) { var o = _Object$getOwnPropertySymbols__default["default"](e); r && (o = o.filter(function (r) { return _Object$getOwnPropertyDescriptor__default["default"](e, r).enumerable; })), t.push.apply(t, o); } return t; }
9327
- function _objectSpread(e) { for (var r = 1; r < arguments.length; r++) { var t = null != arguments[r] ? arguments[r] : {}; r % 2 ? ownKeys(Object(t), !0).forEach(function (r) { _defineProperty__default["default"](e, r, t[r]); }) : _Object$getOwnPropertyDescriptors__default["default"] ? _Object$defineProperties__default["default"](e, _Object$getOwnPropertyDescriptors__default["default"](t)) : ownKeys(Object(t)).forEach(function (r) { _Object$defineProperty__default["default"](e, r, _Object$getOwnPropertyDescriptor__default["default"](t, r)); }); } return e; }
9496
+ function ownKeys(e, r) { var t = _Object$keys(e); if (_Object$getOwnPropertySymbols) { var o = _Object$getOwnPropertySymbols(e); r && (o = o.filter(function (r) { return _Object$getOwnPropertyDescriptor(e, r).enumerable; })), t.push.apply(t, o); } return t; }
9497
+ function _objectSpread(e) { for (var r = 1; r < arguments.length; r++) { var t = null != arguments[r] ? arguments[r] : {}; r % 2 ? ownKeys(Object(t), true).forEach(function (r) { _defineProperty(e, r, t[r]); }) : _Object$getOwnPropertyDescriptors ? _Object$defineProperties(e, _Object$getOwnPropertyDescriptors(t)) : ownKeys(Object(t)).forEach(function (r) { _Object$defineProperty(e, r, _Object$getOwnPropertyDescriptor(t, r)); }); } return e; }
9328
9498
  var Parser = {
9329
9499
  parse: function parse(url) {
9330
9500
  var _arguments = arguments;
9331
- return _asyncToGenerator__default["default"](/*#__PURE__*/_regeneratorRuntime__default["default"].mark(function _callee() {
9501
+ return _asyncToGenerator(/*#__PURE__*/_regeneratorRuntime.mark(function _callee() {
9332
9502
  var _ref, html, opts, _opts$fetchAllPages, fetchAllPages, _opts$fallback, fallback, _opts$contentType, contentType, _opts$headers, headers, extend, customExtractor, parsedUrl, $, Extractor, metaCache, extendedTypes, result, _result, title, next_page_url, turndownService;
9333
- return _regeneratorRuntime__default["default"].wrap(function (_context) {
9503
+ return _regeneratorRuntime.wrap(function (_context) {
9334
9504
  while (1) switch (_context.prev = _context.next) {
9335
9505
  case 0:
9336
- _ref = _arguments.length > 1 && _arguments[1] !== undefined ? _arguments[1] : {}, html = _ref.html, opts = _objectWithoutProperties__default["default"](_ref, _excluded);
9506
+ _ref = _arguments.length > 1 && _arguments[1] !== undefined ? _arguments[1] : {}, html = _ref.html, opts = _objectWithoutProperties(_ref, _excluded);
9337
9507
  _opts$fetchAllPages = opts.fetchAllPages, fetchAllPages = _opts$fetchAllPages === void 0 ? true : _opts$fetchAllPages, _opts$fallback = opts.fallback, fallback = _opts$fallback === void 0 ? true : _opts$fallback, _opts$contentType = opts.contentType, contentType = _opts$contentType === void 0 ? 'html' : _opts$contentType, _opts$headers = opts.headers, headers = _opts$headers === void 0 ? {} : _opts$headers, extend = opts.extend, customExtractor = opts.customExtractor; // if no url was passed and this is the browser version,
9338
9508
  // set url to window.location.href and load the html
9339
9509
  // from the current page
@@ -9341,7 +9511,7 @@ var Parser = {
9341
9511
  url = window.location.href; // eslint-disable-line no-undef
9342
9512
  html = html || document.documentElement.outerHTML; // eslint-disable-line no-undef
9343
9513
  }
9344
- parsedUrl = URL__default["default"].parse(url);
9514
+ parsedUrl = URL$1.parse(url);
9345
9515
  if (validateUrl(parsedUrl)) {
9346
9516
  _context.next = 1;
9347
9517
  break;
@@ -9421,7 +9591,7 @@ var Parser = {
9421
9591
  });
9422
9592
  case 6:
9423
9593
  if (contentType === 'markdown') {
9424
- turndownService = new TurndownService__default["default"]();
9594
+ turndownService = new TurndownService();
9425
9595
  result.content = turndownService.turndown(result.content);
9426
9596
  } else if (contentType === 'text') {
9427
9597
  result.content = $.text($(result.content));